[ Index ]

PHP Cross Reference of Moodle 1.9.3 [Build 15-Oct-2008]

title

Body

[close]

/search/Zend/Search/ -> Lucene.php (source)

   1  <?php
   2  /**
   3   * Zend Framework
   4   *
   5   * LICENSE
   6   *
   7   * This source file is subject to the new BSD license that is bundled
   8   * with this package in the file LICENSE.txt.
   9   * It is also available through the world-wide-web at this URL:
  10   * http://framework.zend.com/license/new-bsd
  11   * If you did not receive a copy of the license and are unable to
  12   * obtain it through the world-wide-web, please send an email
  13   * to license@zend.com so we can send you a copy immediately.
  14   *
  15   * @category   Zend
  16   * @package    Zend_Search_Lucene
  17   * @copyright  Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
  18   * @license    http://framework.zend.com/license/new-bsd     New BSD License
  19   */
  20  
  21  
  22  /** Zend_Search_Lucene_Exception */
  23  require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Exception.php';
  24  
  25  /** Zend_Search_Lucene_Document */
  26  require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Document.php';
  27  
  28  /** Zend_Search_Lucene_Document_Html */
  29  require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Document/Html.php';
  30  
  31  /** Zend_Search_Lucene_Storage_Directory */
  32  require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Storage/Directory/Filesystem.php';
  33  
  34  /** Zend_Search_Lucene_Storage_File_Memory */
  35  require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Storage/File/Memory.php';
  36  
  37  /** Zend_Search_Lucene_Index_Term */
  38  require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Index/Term.php';
  39  
  40  /** Zend_Search_Lucene_Index_TermInfo */
  41  require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Index/TermInfo.php';
  42  
  43  /** Zend_Search_Lucene_Index_SegmentInfo */
  44  require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Index/SegmentInfo.php';
  45  
  46  /** Zend_Search_Lucene_Index_FieldInfo */
  47  require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Index/FieldInfo.php';
  48  
  49  /** Zend_Search_Lucene_Index_Writer */
  50  require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Index/Writer.php';
  51  
  52  /** Zend_Search_Lucene_Search_QueryParser */
  53  require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/QueryParser.php';
  54  
  55  /** Zend_Search_Lucene_Search_QueryHit */
  56  require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/QueryHit.php';
  57  
  58  /** Zend_Search_Lucene_Search_Similarity */
  59  require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/Similarity.php';
  60  
  61  /** Zend_Search_Lucene_Index_SegmentInfoPriorityQueue */
  62  require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Index/SegmentInfoPriorityQueue.php';
  63  
  64  
  65  /** Zend_Search_Lucene_Interface */
  66  require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Interface.php';
  67  
  68  /** Zend_Search_Lucene_Proxy */
  69  require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Proxy.php';
  70  
  71  
  72  /**
  73   * @category   Zend
  74   * @package    Zend_Search_Lucene
  75   * @copyright  Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
  76   * @license    http://framework.zend.com/license/new-bsd     New BSD License
  77   */
  78  class Zend_Search_Lucene implements Zend_Search_Lucene_Interface
  79  {
  80      /**
  81       * Default field name for search
  82       *
  83       * Null means search through all fields
  84       *
  85       * @var string
  86       */
  87      private static $_defaultSearchField = null;
  88  
  89      /**
  90       * File system adapter.
  91       *
  92       * @var Zend_Search_Lucene_Storage_Directory
  93       */
  94      private $_directory = null;
  95  
  96      /**
  97       * File system adapter closing option
  98       *
  99       * @var boolean
 100       */
 101      private $_closeDirOnExit = true;
 102  
 103      /**
 104       * Writer for this index, not instantiated unless required.
 105       *
 106       * @var Zend_Search_Lucene_Index_Writer
 107       */
 108      private $_writer = null;
 109  
 110      /**
 111       * Array of Zend_Search_Lucene_Index_SegmentInfo objects for this index.
 112       *
 113       * @var array Zend_Search_Lucene_Index_SegmentInfo
 114       */
 115      private $_segmentInfos = array();
 116  
 117      /**
 118       * Number of documents in this index.
 119       *
 120       * @var integer
 121       */
 122      private $_docCount = 0;
 123  
 124      /**
 125       * Flag for index changes
 126       *
 127       * @var boolean
 128       */
 129      private $_hasChanges = false;
 130  
 131  
 132      /**
 133       * Index lock object
 134       *
 135       * @var Zend_Search_Lucene_Storage_File
 136       */
 137      private $_lock;
 138  
 139      /**
 140       * Signal, that index is already closed, changes are fixed and resources are cleaned up
 141       *
 142       * @var boolean
 143       */
 144      private $_closed = false;
 145  
 146      /**
 147       * Number of references to the index object
 148       *
 149       * @var integer
 150       */
 151      private $_refCount = 0;
 152  
 153  
 154      /**
 155       * Create index
 156       *
 157       * @param mixed $directory
 158       * @return Zend_Search_Lucene_Interface
 159       */
 160      public static function create($directory)
 161      {
 162          return new Zend_Search_Lucene_Proxy(new Zend_Search_Lucene($directory, true));
 163      }
 164  
 165      /**
 166       * Open index
 167       *
 168       * @param mixed $directory
 169       * @return Zend_Search_Lucene_Interface
 170       */
 171      public static function open($directory)
 172      {
 173          return new Zend_Search_Lucene_Proxy(new Zend_Search_Lucene($directory, false));
 174      }
 175  
 176      /**
 177       * Opens the index.
 178       *
 179       * IndexReader constructor needs Directory as a parameter. It should be
 180       * a string with a path to the index folder or a Directory object.
 181       *
 182       * @param mixed $directory
 183       * @throws Zend_Search_Lucene_Exception
 184       */
 185      public function __construct($directory = null, $create = false)
 186      {
 187          if ($directory === null) {
 188              throw new Zend_Search_Exception('No index directory specified');
 189          }
 190  
 191          if ($directory instanceof Zend_Search_Lucene_Storage_Directory_Filesystem) {
 192              $this->_directory      = $directory;
 193              $this->_closeDirOnExit = false;
 194          } else {
 195              $this->_directory      = new Zend_Search_Lucene_Storage_Directory_Filesystem($directory);
 196              $this->_closeDirOnExit = true;
 197          }
 198  
 199  
 200          // Get a shared lock to the index
 201          $this->_lock = $this->_directory->createFile('index.lock');
 202  
 203          $this->_segmentInfos = array();
 204  
 205          if ($create) {
 206              // Throw an exception if index is under processing now
 207              if (!$this->_lock->lock(LOCK_EX, true)) {
 208                  throw new Zend_Search_Lucene_Exception('Can\'t create index. It\'s under processing now');
 209              }
 210  
 211              // Writer will create segments file for empty segments list
 212              $this->_writer = new Zend_Search_Lucene_Index_Writer($this->_directory, $this->_segmentInfos, true);
 213  
 214              if (!$this->_lock->lock(LOCK_SH)) {
 215                  throw new Zend_Search_Lucene_Exception('Can\'t reduce lock level from Exclusive to Shared');
 216              }
 217          } else {
 218              // Wait if index is under switching from one set of segments to another (Index_Writer::_updateSegments())
 219              if (!$this->_lock->lock(LOCK_SH)) {
 220                  throw new Zend_Search_Lucene_Exception('Can\'t obtain shared index lock');
 221              }
 222              $this->_writer = null;
 223          }
 224  
 225  
 226          $segmentsFile = $this->_directory->getFileObject('segments');
 227  
 228          $format = $segmentsFile->readInt();
 229  
 230          if ($format != (int)0xFFFFFFFF) {
 231              throw new Zend_Search_Lucene_Exception('Wrong segments file format');
 232          }
 233  
 234          // read version
 235          // $segmentsFile->readLong();
 236          $segmentsFile->readInt(); $segmentsFile->readInt();
 237  
 238          // read segment name counter
 239          $segmentsFile->readInt();
 240  
 241          $segments = $segmentsFile->readInt();
 242  
 243          $this->_docCount = 0;
 244  
 245          // read segmentInfos
 246          for ($count = 0; $count < $segments; $count++) {
 247              $segName = $segmentsFile->readString();
 248              $segSize = $segmentsFile->readInt();
 249              $this->_docCount += $segSize;
 250  
 251              $this->_segmentInfos[] =
 252                                  new Zend_Search_Lucene_Index_SegmentInfo($segName,
 253                                                                           $segSize,
 254                                                                           $this->_directory);
 255          }
 256      }
 257  
 258      /**
 259       * Close current index and free resources
 260       */
 261      private function _close()
 262      {
 263          if ($this->_closed) {
 264              // index is already closed and resources are cleaned up
 265              return;
 266          }
 267  
 268          $this->commit();
 269  
 270          // Free shared lock
 271          $this->_lock->unlock();
 272  
 273          if ($this->_closeDirOnExit) {
 274              $this->_directory->close();
 275          }
 276  
 277          $this->_directory    = null;
 278          $this->_writer       = null;
 279          $this->_segmentInfos = null;
 280  
 281          $this->_closed = true;
 282      }
 283  
 284      /**
 285       * Add reference to the index object
 286       *
 287       * @internal
 288       */
 289      public function addReference()
 290      {
 291          $this->_refCount++;
 292      }
 293  
 294      /**
 295       * Remove reference from the index object
 296       *
 297       * When reference count becomes zero, index is closed and resources are cleaned up
 298       *
 299       * @internal
 300       */
 301      public function removeReference()
 302      {
 303          $this->_refCount--;
 304  
 305          if ($this->_refCount == 0) {
 306              $this->_close();
 307          }
 308      }
 309  
 310      /**
 311       * Object destructor
 312       */
 313      public function __destruct()
 314      {
 315          $this->_close();
 316      }
 317  
 318      /**
 319       * Returns an instance of Zend_Search_Lucene_Index_Writer for the index
 320       *
 321       * @internal
 322       * @return Zend_Search_Lucene_Index_Writer
 323       */
 324      public function getIndexWriter()
 325      {
 326          if (!$this->_writer instanceof Zend_Search_Lucene_Index_Writer) {
 327              $this->_writer = new Zend_Search_Lucene_Index_Writer($this->_directory, $this->_segmentInfos);
 328          }
 329  
 330          return $this->_writer;
 331      }
 332  
 333  
 334      /**
 335       * Returns the Zend_Search_Lucene_Storage_Directory instance for this index.
 336       *
 337       * @return Zend_Search_Lucene_Storage_Directory
 338       */
 339      public function getDirectory()
 340      {
 341          return $this->_directory;
 342      }
 343  
 344  
 345      /**
 346       * Returns the total number of documents in this index (including deleted documents).
 347       *
 348       * @return integer
 349       */
 350      public function count()
 351      {
 352          return $this->_docCount;
 353      }
 354  
 355      /**
 356       * Returns one greater than the largest possible document number.
 357       * This may be used to, e.g., determine how big to allocate a structure which will have
 358       * an element for every document number in an index.
 359       *
 360       * @return integer
 361       */
 362      public function maxDoc()
 363      {
 364          return $this->count();
 365      }
 366  
 367      /**
 368       * Returns the total number of non-deleted documents in this index.
 369       *
 370       * @return integer
 371       */
 372      public function numDocs()
 373      {
 374          $numDocs = 0;
 375  
 376          foreach ($this->_segmentInfos as $segmentInfo) {
 377              $numDocs += $segmentInfo->numDocs();
 378          }
 379  
 380          return $numDocs;
 381      }
 382  
 383      /**
 384       * Checks, that document is deleted
 385       *
 386       * @param integer $id
 387       * @return boolean
 388       * @throws Zend_Search_Lucene_Exception    Exception is thrown if $id is out of the range
 389       */
 390      public function isDeleted($id)
 391      {
 392          if ($id >= $this->_docCount) {
 393              throw new Zend_Search_Lucene_Exception('Document id is out of the range.');
 394          }
 395  
 396          $segmentStartId = 0;
 397          foreach ($this->_segmentInfos as $segmentInfo) {
 398              if ($segmentStartId + $segmentInfo->count() > $id) {
 399                  break;
 400              }
 401  
 402              $segmentStartId += $segmentInfo->count();
 403          }
 404  
 405          return $segmentInfo->isDeleted($id - $segmentStartId);
 406      }
 407  
 408      /**
 409       * Set default search field.
 410       *
 411       * Null means, that search is performed through all fields by default
 412       *
 413       * Default value is null
 414       *
 415       * @param string $fieldName
 416       */
 417      public static function setDefaultSearchField($fieldName)
 418      {
 419          self::$_defaultSearchField = $fieldName;
 420      }
 421  
 422      /**
 423       * Get default search field.
 424       *
 425       * Null means, that search is performed through all fields by default
 426       *
 427       * @return string
 428       */
 429      public static function getDefaultSearchField()
 430      {
 431          return self::$_defaultSearchField;
 432      }
 433  
 434      /**
 435       * Retrieve index maxBufferedDocs option
 436       *
 437       * maxBufferedDocs is a minimal number of documents required before
 438       * the buffered in-memory documents are written into a new Segment
 439       *
 440       * Default value is 10
 441       *
 442       * @return integer
 443       */
 444      public function getMaxBufferedDocs()
 445      {
 446          return $this->getIndexWriter()->maxBufferedDocs;
 447      }
 448  
 449      /**
 450       * Set index maxBufferedDocs option
 451       *
 452       * maxBufferedDocs is a minimal number of documents required before
 453       * the buffered in-memory documents are written into a new Segment
 454       *
 455       * Default value is 10
 456       *
 457       * @param integer $maxBufferedDocs
 458       */
 459      public function setMaxBufferedDocs($maxBufferedDocs)
 460      {
 461          $this->getIndexWriter()->maxBufferedDocs = $maxBufferedDocs;
 462      }
 463  
 464      /**
 465       * Retrieve index maxMergeDocs option
 466       *
 467       * maxMergeDocs is a largest number of documents ever merged by addDocument().
 468       * Small values (e.g., less than 10,000) are best for interactive indexing,
 469       * as this limits the length of pauses while indexing to a few seconds.
 470       * Larger values are best for batched indexing and speedier searches.
 471       *
 472       * Default value is PHP_INT_MAX
 473       *
 474       * @return integer
 475       */
 476      public function getMaxMergeDocs()
 477      {
 478          return $this->getIndexWriter()->maxMergeDocs;
 479      }
 480  
 481      /**
 482       * Set index maxMergeDocs option
 483       *
 484       * maxMergeDocs is a largest number of documents ever merged by addDocument().
 485       * Small values (e.g., less than 10,000) are best for interactive indexing,
 486       * as this limits the length of pauses while indexing to a few seconds.
 487       * Larger values are best for batched indexing and speedier searches.
 488       *
 489       * Default value is PHP_INT_MAX
 490       *
 491       * @param integer $maxMergeDocs
 492       */
 493      public function setMaxMergeDocs($maxMergeDocs)
 494      {
 495          $this->getIndexWriter()->maxMergeDocs = $maxMergeDocs;
 496      }
 497  
 498      /**
 499       * Retrieve index mergeFactor option
 500       *
 501       * mergeFactor determines how often segment indices are merged by addDocument().
 502       * With smaller values, less RAM is used while indexing,
 503       * and searches on unoptimized indices are faster,
 504       * but indexing speed is slower.
 505       * With larger values, more RAM is used during indexing,
 506       * and while searches on unoptimized indices are slower,
 507       * indexing is faster.
 508       * Thus larger values (> 10) are best for batch index creation,
 509       * and smaller values (< 10) for indices that are interactively maintained.
 510       *
 511       * Default value is 10
 512       *
 513       * @return integer
 514       */
 515      public function getMergeFactor()
 516      {
 517          return $this->getIndexWriter()->mergeFactor;
 518      }
 519  
 520      /**
 521       * Set index mergeFactor option
 522       *
 523       * mergeFactor determines how often segment indices are merged by addDocument().
 524       * With smaller values, less RAM is used while indexing,
 525       * and searches on unoptimized indices are faster,
 526       * but indexing speed is slower.
 527       * With larger values, more RAM is used during indexing,
 528       * and while searches on unoptimized indices are slower,
 529       * indexing is faster.
 530       * Thus larger values (> 10) are best for batch index creation,
 531       * and smaller values (< 10) for indices that are interactively maintained.
 532       *
 533       * Default value is 10
 534       *
 535       * @param integer $maxMergeDocs
 536       */
 537      public function setMergeFactor($mergeFactor)
 538      {
 539          $this->getIndexWriter()->mergeFactor = $mergeFactor;
 540      }
 541  
 542      /**
 543       * Performs a query against the index and returns an array
 544       * of Zend_Search_Lucene_Search_QueryHit objects.
 545       * Input is a string or Zend_Search_Lucene_Search_Query.
 546       *
 547       * @param mixed $query
 548       * @return array Zend_Search_Lucene_Search_QueryHit
 549       * @throws Zend_Search_Lucene_Exception
 550       */
 551      public function find($query)
 552      {
 553          if (is_string($query)) {
 554              $query = Zend_Search_Lucene_Search_QueryParser::parse($query);
 555          }
 556  
 557          if (!$query instanceof Zend_Search_Lucene_Search_Query) {
 558              throw new Zend_Search_Lucene_Exception('Query must be a string or Zend_Search_Lucene_Search_Query object');
 559          }
 560  
 561          $this->commit();
 562  
 563          $hits   = array();
 564          $scores = array();
 565          $ids    = array();
 566  
 567          $query = $query->rewrite($this)->optimize($this);
 568  
 569          $query->execute($this);
 570  
 571          $topScore = 0;
 572  
 573          foreach ($query->matchedDocs() as $id => $num) {
 574              $docScore = $query->score($id, $this);
 575              if( $docScore != 0 ) {
 576                  $hit = new Zend_Search_Lucene_Search_QueryHit($this);
 577                  $hit->id = $id;
 578                  $hit->score = $docScore;
 579  
 580                  $hits[]   = $hit;
 581                  $ids[]    = $id;
 582                  $scores[] = $docScore;
 583  
 584                  if ($docScore > $topScore) {
 585                      $topScore = $docScore;
 586                  }
 587              }
 588          }
 589  
 590          if (count($hits) == 0) {
 591              // skip sorting, which may cause a error on empty index
 592              return array();
 593          }
 594  
 595          if ($topScore > 1) {
 596              foreach ($hits as $hit) {
 597                  $hit->score /= $topScore;
 598              }
 599          }
 600  
 601          if (func_num_args() == 1) {
 602              // sort by scores
 603              array_multisort($scores, SORT_DESC, SORT_NUMERIC,
 604                              $ids,    SORT_ASC,  SORT_NUMERIC,
 605                              $hits);
 606          } else {
 607              // sort by given field names
 608  
 609              $argList    = func_get_args();
 610              $fieldNames = $this->getFieldNames();
 611              $sortArgs   = array();
 612  
 613              for ($count = 1; $count < count($argList); $count++) {
 614                  $fieldName = $argList[$count];
 615  
 616                  if (!is_string($fieldName)) {
 617                      throw new Zend_Search_Lucene_Exception('Field name must be a string.');
 618                  }
 619  
 620                  if (!in_array($fieldName, $fieldNames)) {
 621                      throw new Zend_Search_Lucene_Exception('Wrong field name.');
 622                  }
 623  
 624                  $valuesArray = array();
 625                  foreach ($hits as $hit) {
 626                      try {
 627                          $value = $hit->getDocument()->getFieldValue($fieldName);
 628                      } catch (Zend_Search_Lucene_Exception $e) {
 629                          if (strpos($e->getMessage(), 'not found') === false) {
 630                              throw $e;
 631                          } else {
 632                              $value = null;
 633                          }
 634                      }
 635  
 636                      $valuesArray[] = $value;
 637                  }
 638  
 639                  $sortArgs[] = $valuesArray;
 640  
 641                  if ($count + 1 < count($argList)  &&  is_integer($argList[$count+1])) {
 642                      $count++;
 643                      $sortArgs[] = $argList[$count];
 644  
 645                      if ($count + 1 < count($argList)  &&  is_integer($argList[$count+1])) {
 646                          $count++;
 647                          $sortArgs[] = $argList[$count];
 648                      } else {
 649                          if ($argList[$count] == SORT_ASC  || $argList[$count] == SORT_DESC) {
 650                              $sortArgs[] = SORT_REGULAR;
 651                          } else {
 652                              $sortArgs[] = SORT_ASC;
 653                          }
 654                      }
 655                  } else {
 656                      $sortArgs[] = SORT_ASC;
 657                      $sortArgs[] = SORT_REGULAR;
 658                  }
 659              }
 660  
 661              // Sort by id's if values are equal
 662              $sortArgs[] = $ids;
 663              $sortArgs[] = SORT_ASC;
 664              $sortArgs[] = SORT_NUMERIC;
 665  
 666              // Array to be sorted
 667              $sortArgs[] = &$hits;
 668  
 669              // Do sort
 670              call_user_func_array('array_multisort', $sortArgs);
 671          }
 672  
 673          return $hits;
 674      }
 675  
 676  
 677      /**
 678       * Returns a list of all unique field names that exist in this index.
 679       *
 680       * @param boolean $indexed
 681       * @return array
 682       */
 683      public function getFieldNames($indexed = false)
 684      {
 685          $result = array();
 686          foreach( $this->_segmentInfos as $segmentInfo ) {
 687              $result = array_merge($result, $segmentInfo->getFields($indexed));
 688          }
 689          return $result;
 690      }
 691  
 692  
 693      /**
 694       * Returns a Zend_Search_Lucene_Document object for the document
 695       * number $id in this index.
 696       *
 697       * @param integer|Zend_Search_Lucene_Search_QueryHit $id
 698       * @return Zend_Search_Lucene_Document
 699       */
 700      public function getDocument($id)
 701      {
 702          if ($id instanceof Zend_Search_Lucene_Search_QueryHit) {
 703              /* @var $id Zend_Search_Lucene_Search_QueryHit */
 704              $id = $id->id;
 705          }
 706  
 707          if ($id >= $this->_docCount) {
 708              throw new Zend_Search_Lucene_Exception('Document id is out of the range.');
 709          }
 710  
 711          $segmentStartId = 0;
 712          foreach ($this->_segmentInfos as $segmentInfo) {
 713              if ($segmentStartId + $segmentInfo->count() > $id) {
 714                  break;
 715              }
 716  
 717              $segmentStartId += $segmentInfo->count();
 718          }
 719  
 720          $fdxFile = $segmentInfo->openCompoundFile('.fdx');
 721          $fdxFile->seek( ($id-$segmentStartId)*8, SEEK_CUR );
 722          $fieldValuesPosition = $fdxFile->readLong();
 723  
 724          $fdtFile = $segmentInfo->openCompoundFile('.fdt');
 725          $fdtFile->seek($fieldValuesPosition, SEEK_CUR);
 726          $fieldCount = $fdtFile->readVInt();
 727  
 728          $doc = new Zend_Search_Lucene_Document();
 729          for ($count = 0; $count < $fieldCount; $count++) {
 730              $fieldNum = $fdtFile->readVInt();
 731              $bits = $fdtFile->readByte();
 732  
 733              $fieldInfo = $segmentInfo->getField($fieldNum);
 734  
 735              if (!($bits & 2)) { // Text data
 736                  $field = new Zend_Search_Lucene_Field($fieldInfo->name,
 737                                                        $fdtFile->readString(),
 738                                                        'UTF-8',
 739                                                        true,
 740                                                        $fieldInfo->isIndexed,
 741                                                        $bits & 1 );
 742              } else {            // Binary data
 743                  $field = new Zend_Search_Lucene_Field($fieldInfo->name,
 744                                                        $fdtFile->readBinary(),
 745                                                        '',
 746                                                        true,
 747                                                        $fieldInfo->isIndexed,
 748                                                        $bits & 1,
 749                                                        true );
 750              }
 751  
 752              $doc->addField($field);
 753          }
 754  
 755          return $doc;
 756      }
 757  
 758  
 759      /**
 760       * Returns true if index contain documents with specified term.
 761       *
 762       * Is used for query optimization.
 763       *
 764       * @param Zend_Search_Lucene_Index_Term $term
 765       * @return boolean
 766       */
 767      public function hasTerm(Zend_Search_Lucene_Index_Term $term)
 768      {
 769          foreach ($this->_segmentInfos as $segInfo) {
 770              if ($segInfo->getTermInfo($term) instanceof Zend_Search_Lucene_Index_TermInfo) {
 771                  return true;
 772              }
 773          }
 774  
 775          return false;
 776      }
 777  
 778      /**
 779       * Returns IDs of all the documents containing term.
 780       *
 781       * @param Zend_Search_Lucene_Index_Term $term
 782       * @return array
 783       */
 784      public function termDocs(Zend_Search_Lucene_Index_Term $term)
 785      {
 786          $result = array();
 787          $segmentStartDocId = 0;
 788  
 789          foreach ($this->_segmentInfos as $segInfo) {
 790              $termInfo = $segInfo->getTermInfo($term);
 791  
 792              if (!$termInfo instanceof Zend_Search_Lucene_Index_TermInfo) {
 793                  $segmentStartDocId += $segInfo->count();
 794                  continue;
 795              }
 796  
 797              $frqFile = $segInfo->openCompoundFile('.frq');
 798              $frqFile->seek($termInfo->freqPointer,SEEK_CUR);
 799              $docId = 0;
 800              for( $count=0; $count < $termInfo->docFreq; $count++ ) {
 801                  $docDelta = $frqFile->readVInt();
 802                  if( $docDelta % 2 == 1 ) {
 803                      $docId += ($docDelta-1)/2;
 804                  } else {
 805                      $docId += $docDelta/2;
 806                      // read freq
 807                      $frqFile->readVInt();
 808                  }
 809  
 810                  $result[] = $segmentStartDocId + $docId;
 811              }
 812  
 813              $segmentStartDocId += $segInfo->count();
 814          }
 815  
 816          return $result;
 817      }
 818  
 819  
 820      /**
 821       * Returns an array of all term freqs.
 822       * Result array structure: array(docId => freq, ...)
 823       *
 824       * @param Zend_Search_Lucene_Index_Term $term
 825       * @return integer
 826       */
 827      public function termFreqs(Zend_Search_Lucene_Index_Term $term)
 828      {
 829          $result = array();
 830          $segmentStartDocId = 0;
 831          foreach ($this->_segmentInfos as $segmentInfo) {
 832              $result += $segmentInfo->termFreqs($term, $segmentStartDocId);
 833  
 834              $segmentStartDocId += $segmentInfo->count();
 835          }
 836  
 837          return $result;
 838      }
 839  
 840      /**
 841       * Returns an array of all term positions in the documents.
 842       * Result array structure: array(docId => array(pos1, pos2, ...), ...)
 843       *
 844       * @param Zend_Search_Lucene_Index_Term $term
 845       * @return array
 846       */
 847      public function termPositions(Zend_Search_Lucene_Index_Term $term)
 848      {
 849          $result = array();
 850          $segmentStartDocId = 0;
 851          foreach ($this->_segmentInfos as $segmentInfo) {
 852              $result += $segmentInfo->termPositions($term, $segmentStartDocId);
 853  
 854              $segmentStartDocId += $segmentInfo->count();
 855          }
 856  
 857          return $result;
 858      }
 859  
 860  
 861      /**
 862       * Returns the number of documents in this index containing the $term.
 863       *
 864       * @param Zend_Search_Lucene_Index_Term $term
 865       * @return integer
 866       */
 867      public function docFreq(Zend_Search_Lucene_Index_Term $term)
 868      {
 869          $result = 0;
 870          foreach ($this->_segmentInfos as $segInfo) {
 871              $termInfo = $segInfo->getTermInfo($term);
 872              if ($termInfo !== null) {
 873                  $result += $termInfo->docFreq;
 874              }
 875          }
 876  
 877          return $result;
 878      }
 879  
 880  
 881      /**
 882       * Retrive similarity used by index reader
 883       *
 884       * @return Zend_Search_Lucene_Search_Similarity
 885       */
 886      public function getSimilarity()
 887      {
 888          return Zend_Search_Lucene_Search_Similarity::getDefault();
 889      }
 890  
 891  
 892      /**
 893       * Returns a normalization factor for "field, document" pair.
 894       *
 895       * @param integer $id
 896       * @param string $fieldName
 897       * @return float
 898       */
 899      public function norm($id, $fieldName)
 900      {
 901          if ($id >= $this->_docCount) {
 902              return null;
 903          }
 904  
 905          $segmentStartId = 0;
 906          foreach ($this->_segmentInfos as $segInfo) {
 907              if ($segmentStartId + $segInfo->count() > $id) {
 908                  break;
 909              }
 910  
 911              $segmentStartId += $segInfo->count();
 912          }
 913  
 914          if ($segInfo->isDeleted($id - $segmentStartId)) {
 915              return 0;
 916          }
 917  
 918          return $segInfo->norm($id - $segmentStartId, $fieldName);
 919      }
 920  
 921      /**
 922       * Returns true if any documents have been deleted from this index.
 923       *
 924       * @return boolean
 925       */
 926      public function hasDeletions()
 927      {
 928          foreach ($this->_segmentInfos as $segmentInfo) {
 929              if ($segmentInfo->hasDeletions()) {
 930                  return true;
 931              }
 932          }
 933  
 934          return false;
 935      }
 936  
 937  
 938      /**
 939       * Deletes a document from the index.
 940       * $id is an internal document id
 941       *
 942       * @param integer|Zend_Search_Lucene_Search_QueryHit $id
 943       * @throws Zend_Search_Lucene_Exception
 944       */
 945      public function delete($id)
 946      {
 947          if ($id instanceof Zend_Search_Lucene_Search_QueryHit) {
 948              /* @var $id Zend_Search_Lucene_Search_QueryHit */
 949              $id = $id->id;
 950          }
 951  
 952          if ($id >= $this->_docCount) {
 953              throw new Zend_Search_Lucene_Exception('Document id is out of the range.');
 954          }
 955  
 956          $segmentStartId = 0;
 957          foreach ($this->_segmentInfos as $segmentInfo) {
 958              if ($segmentStartId + $segmentInfo->count() > $id) {
 959                  break;
 960              }
 961  
 962              $segmentStartId += $segmentInfo->count();
 963          }
 964          $segmentInfo->delete($id - $segmentStartId);
 965  
 966          $this->_hasChanges = true;
 967      }
 968  
 969  
 970  
 971      /**
 972       * Adds a document to this index.
 973       *
 974       * @param Zend_Search_Lucene_Document $document
 975       */
 976      public function addDocument(Zend_Search_Lucene_Document $document)
 977      {
 978          $this->getIndexWriter()->addDocument($document);
 979          $this->_docCount++;
 980      }
 981  
 982  
 983      /**
 984       * Update document counter
 985       */
 986      private function _updateDocCount()
 987      {
 988          $this->_docCount = 0;
 989          foreach ($this->_segmentInfos as $segInfo) {
 990              $this->_docCount += $segInfo->count();
 991          }
 992      }
 993  
 994      /**
 995       * Commit changes resulting from delete() or undeleteAll() operations.
 996       *
 997       * @todo undeleteAll processing.
 998       */
 999      public function commit()
1000      {
1001          if ($this->_hasChanges) {
1002              foreach ($this->_segmentInfos as $segInfo) {
1003                  $segInfo->writeChanges();
1004              }
1005  
1006              $this->_hasChanges = false;
1007          }
1008  
1009          if ($this->_writer !== null) {
1010              $this->_writer->commit();
1011  
1012              $this->_updateDocCount();
1013          }
1014      }
1015  
1016  
1017      /**
1018       * Optimize index.
1019       *
1020       * Merges all segments into one
1021       */
1022      public function optimize()
1023      {
1024          // Commit changes if any changes have been made
1025          $this->commit();
1026  
1027          if (count($this->_segmentInfos) > 1 || $this->hasDeletions()) {
1028              $this->getIndexWriter()->optimize();
1029              $this->_updateDocCount();
1030          }
1031      }
1032  
1033  
1034      /**
1035       * Returns an array of all terms in this index.
1036       *
1037       * @return array
1038       */
1039      public function terms()
1040      {
1041          $result = array();
1042  
1043          $segmentInfoQueue = new Zend_Search_Lucene_Index_SegmentInfoPriorityQueue();
1044  
1045          foreach ($this->_segmentInfos as $segmentInfo) {
1046              $segmentInfo->reset();
1047  
1048              // Skip "empty" segments
1049              if ($segmentInfo->currentTerm() !== null) {
1050                  $segmentInfoQueue->put($segmentInfo);
1051              }
1052          }
1053  
1054          while (($segmentInfo = $segmentInfoQueue->pop()) !== null) {
1055              if ($segmentInfoQueue->top() === null ||
1056                  $segmentInfoQueue->top()->currentTerm()->key() !=
1057                              $segmentInfo->currentTerm()->key()) {
1058                  // We got new term
1059                  $result[] = $segmentInfo->currentTerm();
1060              }
1061  
1062              $segmentInfo->nextTerm();
1063              // check, if segment dictionary is finished
1064              if ($segmentInfo->currentTerm() !== null) {
1065                  // Put segment back into the priority queue
1066                  $segmentInfoQueue->put($segmentInfo);
1067              }
1068          }
1069  
1070          return $result;
1071      }
1072  
1073  
1074      /*************************************************************************
1075      @todo UNIMPLEMENTED
1076      *************************************************************************/
1077      /**
1078       * Undeletes all documents currently marked as deleted in this index.
1079       *
1080       * @todo Implementation
1081       */
1082      public function undeleteAll()
1083      {}
1084  }


Generated: Wed Jan 14 11:33:29 2009 Cross-referenced by PHPXref 0.7