[ Index ]

PHP Cross Reference of Moodle 1.9.3 [Build 15-Oct-2008]

title

Body

[close]

/search/Zend/Search/Lucene/Index/ -> Writer.php (source)

   1  <?php
   2  /**
   3   * Zend Framework
   4   *
   5   * LICENSE
   6   *
   7   * This source file is subject to the new BSD license that is bundled
   8   * with this package in the file LICENSE.txt.
   9   * It is also available through the world-wide-web at this URL:
  10   * http://framework.zend.com/license/new-bsd
  11   * If you did not receive a copy of the license and are unable to
  12   * obtain it through the world-wide-web, please send an email
  13   * to license@zend.com so we can send you a copy immediately.
  14   *
  15   * @category   Zend
  16   * @package    Zend_Search_Lucene
  17   * @subpackage Index
  18   * @copyright  Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
  19   * @license    http://framework.zend.com/license/new-bsd     New BSD License
  20   */
  21  
  22  
  23  /** Zend_Search_Lucene_Index_SegmentWriter_ */
  24  require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Index/SegmentWriter/DocumentWriter.php';
  25  
  26  /** Zend_Search_Lucene_Index_SegmentInfo */
  27  require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Index/SegmentInfo.php';
  28  
  29  /** Zend_Search_Lucene_Index_SegmentMerger */
  30  require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Index/SegmentMerger.php';
  31  
  32  
  33  
  34  /**
  35   * @category   Zend
  36   * @package    Zend_Search_Lucene
  37   * @subpackage Index
  38   * @copyright  Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
  39   * @license    http://framework.zend.com/license/new-bsd     New BSD License
  40   */
  41  class Zend_Search_Lucene_Index_Writer
  42  {
  43      /**
  44       * @todo Implement Analyzer substitution
  45       * @todo Implement Zend_Search_Lucene_Storage_DirectoryRAM and Zend_Search_Lucene_Storage_FileRAM to use it for
  46       *       temporary index files
  47       * @todo Directory lock processing
  48       */
  49  
  50      /**
  51       * Number of documents required before the buffered in-memory
  52       * documents are written into a new Segment
  53       *
  54       * Default value is 10
  55       *
  56       * @var integer
  57       */
  58      public $maxBufferedDocs = 10;
  59  
  60      /**
  61       * Largest number of documents ever merged by addDocument().
  62       * Small values (e.g., less than 10,000) are best for interactive indexing,
  63       * as this limits the length of pauses while indexing to a few seconds.
  64       * Larger values are best for batched indexing and speedier searches.
  65       *
  66       * Default value is PHP_INT_MAX
  67       *
  68       * @var integer
  69       */
  70      public $maxMergeDocs = PHP_INT_MAX;
  71  
  72      /**
  73       * Determines how often segment indices are merged by addDocument().
  74       *
  75       * With smaller values, less RAM is used while indexing,
  76       * and searches on unoptimized indices are faster,
  77       * but indexing speed is slower.
  78       *
  79       * With larger values, more RAM is used during indexing,
  80       * and while searches on unoptimized indices are slower,
  81       * indexing is faster.
  82       *
  83       * Thus larger values (> 10) are best for batch index creation,
  84       * and smaller values (< 10) for indices that are interactively maintained.
  85       *
  86       * Default value is 10
  87       *
  88       * @var integer
  89       */
  90      public $mergeFactor = 10;
  91  
  92      /**
  93       * File system adapter.
  94       *
  95       * @var Zend_Search_Lucene_Storage_Directory
  96       */
  97      private $_directory = null;
  98  
  99  
 100      /**
 101       * Changes counter.
 102       *
 103       * @var integer
 104       */
 105      private $_versionUpdate = 0;
 106  
 107      /**
 108       * List of the segments, created by index writer
 109       * Array of Zend_Search_Lucene_Index_SegmentInfo objects
 110       *
 111       * @var array
 112       */
 113      private $_newSegments = array();
 114  
 115      /**
 116       * List of segments to be deleted on commit
 117       *
 118       * @var array
 119       */
 120      private $_segmentsToDelete = array();
 121  
 122      /**
 123       * Current segment to add documents
 124       *
 125       * @var Zend_Search_Lucene_Index_SegmentWriter_DocumentWriter
 126       */
 127      private $_currentSegment = null;
 128  
 129      /**
 130       * Array of Zend_Search_Lucene_Index_SegmentInfo objects for this index.
 131       *
 132       * It's a reference to the corresponding Zend_Search_Lucene::$_segmentInfos array
 133       *
 134       * @var array Zend_Search_Lucene_Index_SegmentInfo
 135       */
 136      private $_segmentInfos;
 137  
 138      /**
 139       * List of indexfiles extensions
 140       *
 141       * @var array
 142       */
 143      private static $_indexExtensions = array('.cfs' => '.cfs',
 144                                               '.fnm' => '.fnm',
 145                                               '.fdx' => '.fdx',
 146                                               '.fdt' => '.fdt',
 147                                               '.tis' => '.tis',
 148                                               '.tii' => '.tii',
 149                                               '.frq' => '.frq',
 150                                               '.prx' => '.prx',
 151                                               '.tvx' => '.tvx',
 152                                               '.tvd' => '.tvd',
 153                                               '.tvf' => '.tvf',
 154                                               '.del' => '.del',
 155                                               '.sti' => '.sti' );
 156  
 157      /**
 158       * Opens the index for writing
 159       *
 160       * IndexWriter constructor needs Directory as a parameter. It should be
 161       * a string with a path to the index folder or a Directory object.
 162       * Second constructor parameter create is optional - true to create the
 163       * index or overwrite the existing one.
 164       *
 165       * @param Zend_Search_Lucene_Storage_Directory $directory
 166       * @param array $segmentInfos
 167       * @param boolean $create
 168       */
 169      public function __construct(Zend_Search_Lucene_Storage_Directory $directory, &$segmentInfos, $create = false)
 170      {
 171          $this->_directory    = $directory;
 172          $this->_segmentInfos = &$segmentInfos;
 173  
 174          if ($create) {
 175              foreach ($this->_directory->fileList() as $file) {
 176                  if ($file == 'deletable' ||
 177                      $file == 'segments'  ||
 178                      isset(self::$_indexExtensions[ substr($file, strlen($file)-4)]) ||
 179                      preg_match('/\.f\d+$/i', $file) /* matches <segment_name>.f<decimal_nmber> file names */) {
 180                          $this->_directory->deleteFile($file);
 181                      }
 182              }
 183              $segmentsFile = $this->_directory->createFile('segments');
 184              $segmentsFile->writeInt((int)0xFFFFFFFF);
 185  
 186              // write version (is initialized by current time
 187              // $segmentsFile->writeLong((int)microtime(true));
 188              $version = microtime(true);
 189              $segmentsFile->writeInt((int)($version/((double)0xFFFFFFFF + 1)));
 190              $segmentsFile->writeInt((int)($version & 0xFFFFFFFF));
 191  
 192              // write name counter
 193              $segmentsFile->writeInt(0);
 194              // write segment counter
 195              $segmentsFile->writeInt(0);
 196  
 197              $deletableFile = $this->_directory->createFile('deletable');
 198              // write counter
 199              $deletableFile->writeInt(0);
 200          } else {
 201              $segmentsFile = $this->_directory->getFileObject('segments');
 202              $format = $segmentsFile->readInt();
 203              if ($format != (int)0xFFFFFFFF) {
 204                  throw new Zend_Search_Lucene_Exception('Wrong segments file format');
 205              }
 206          }
 207      }
 208  
 209      /**
 210       * Adds a document to this index.
 211       *
 212       * @param Zend_Search_Lucene_Document $document
 213       */
 214      public function addDocument(Zend_Search_Lucene_Document $document)
 215      {
 216          if ($this->_currentSegment === null) {
 217              $this->_currentSegment =
 218                  new Zend_Search_Lucene_Index_SegmentWriter_DocumentWriter($this->_directory, $this->_newSegmentName());
 219          }
 220          $this->_currentSegment->addDocument($document);
 221  
 222          if ($this->_currentSegment->count() >= $this->maxBufferedDocs) {
 223              $this->commit();
 224          }
 225  
 226          $this->_versionUpdate++;
 227  
 228          $this->_maybeMergeSegments();
 229      }
 230  
 231  
 232      /**
 233       * Merge segments if necessary
 234       */
 235      private function _maybeMergeSegments()
 236      {
 237          $segmentSizes = array();
 238          foreach ($this->_segmentInfos as $segId => $segmentInfo) {
 239              $segmentSizes[$segId] = $segmentInfo->count();
 240          }
 241  
 242          $mergePool   = array();
 243          $poolSize    = 0;
 244          $sizeToMerge = $this->maxBufferedDocs;
 245          asort($segmentSizes, SORT_NUMERIC);
 246          foreach ($segmentSizes as $segId => $size) {
 247              // Check, if segment comes into a new merging block
 248              while ($size >= $sizeToMerge) {
 249                  // Merge previous block if it's large enough
 250                  if ($poolSize >= $sizeToMerge) {
 251                      $this->_mergeSegments($mergePool);
 252                  }
 253                  $mergePool   = array();
 254                  $poolSize    = 0;
 255  
 256                  $sizeToMerge *= $this->mergeFactor;
 257  
 258                  if ($sizeToMerge > $this->maxMergeDocs) {
 259                      return;
 260                  }
 261              }
 262  
 263              $mergePool[] = $this->_segmentInfos[$segId];
 264              $poolSize += $size;
 265          }
 266  
 267          if ($poolSize >= $sizeToMerge) {
 268              $this->_mergeSegments($mergePool);
 269          }
 270      }
 271  
 272      /**
 273       * Merge specified segments
 274       *
 275       * $segments is an array of SegmentInfo objects
 276       *
 277       * @param array $segments
 278       */
 279      private function _mergeSegments($segments)
 280      {
 281          // Try to get exclusive non-blocking lock to the 'index.optimization.lock'
 282          // Skip optimization if it's performed by other process right now
 283          $optimizationLock = $this->_directory->createFile('index.optimization.lock');
 284          if (!$optimizationLock->lock(LOCK_EX,true)) {
 285              return;
 286          }
 287  
 288          $newName = $this->_newSegmentName();
 289          $merger = new Zend_Search_Lucene_Index_SegmentMerger($this->_directory,
 290                                                               $newName);
 291          foreach ($segments as $segmentInfo) {
 292              $merger->addSource($segmentInfo);
 293              $this->_segmentsToDelete[$segmentInfo->getName()] = $segmentInfo->getName();
 294          }
 295  
 296          $newSegment = $merger->merge();
 297          if ($newSegment !== null) {
 298              $this->_newSegments[$newSegment->getName()] = $newSegment;
 299          }
 300  
 301          $this->commit();
 302  
 303          // optimization is finished
 304          $optimizationLock->unlock();
 305      }
 306  
 307      /**
 308       * Update segments file by adding current segment to a list
 309       *
 310       * @throws Zend_Search_Lucene_Exception
 311       */
 312      private function _updateSegments()
 313      {
 314          // Get an exclusive index lock
 315          // Wait, until all parallel searchers or indexers won't stop
 316          // and stop all next searchers, while we are updating segments file
 317          $lock = $this->_directory->getFileObject('index.lock');
 318          if (!$lock->lock(LOCK_EX)) {
 319              throw new Zend_Search_Lucene_Exception('Can\'t obtain exclusive index lock');
 320          }
 321  
 322  
 323          // Do not share file handlers to get file updates from other sessions.
 324          $segmentsFile   = $this->_directory->getFileObject('segments', false);
 325          $newSegmentFile = $this->_directory->createFile('segments.new', false);
 326  
 327          // Write format marker
 328          $newSegmentFile->writeInt((int)0xFFFFFFFF);
 329  
 330          // Write index version
 331          $segmentsFile->seek(4, SEEK_CUR);
 332          // $version = $segmentsFile->readLong() + $this->_versionUpdate;
 333          // Process version on 32-bit platforms
 334          $versionHigh = $segmentsFile->readInt();
 335          $versionLow  = $segmentsFile->readInt();
 336          $version = $versionHigh * ((double)0xFFFFFFFF + 1) +
 337                     (($versionLow < 0)? (double)0xFFFFFFFF - (-1 - $versionLow) : $versionLow);
 338          $version += $this->_versionUpdate;
 339          $this->_versionUpdate = 0;
 340          $newSegmentFile->writeInt((int)($version/((double)0xFFFFFFFF + 1)));
 341          $newSegmentFile->writeInt((int)($version & 0xFFFFFFFF));
 342  
 343          // Write segment name counter
 344          $newSegmentFile->writeInt($segmentsFile->readInt());
 345  
 346          // Get number of segments offset
 347          $numOfSegmentsOffset = $newSegmentFile->tell();
 348          // Write number of segemnts
 349          $segmentsCount = $segmentsFile->readInt();
 350          $newSegmentFile->writeInt(0);  // Write dummy data (segment counter)
 351  
 352          $segments = array();
 353          for ($count = 0; $count < $segmentsCount; $count++) {
 354              $segName = $segmentsFile->readString();
 355              $segSize = $segmentsFile->readInt();
 356  
 357              if (!in_array($segName, $this->_segmentsToDelete)) {
 358                  $newSegmentFile->writeString($segName);
 359                  $newSegmentFile->writeInt($segSize);
 360  
 361                  $segments[$segName] = $segSize;
 362              }
 363          }
 364          $segmentsFile->close();
 365  
 366          $segmentsCount = count($segments) + count($this->_newSegments);
 367  
 368          // Remove segments, not listed in $segments (deleted)
 369          // Load segments, not listed in $this->_segmentInfos
 370          foreach ($this->_segmentInfos as $segId => $segInfo) {
 371              if (isset($segments[$segInfo->getName()])) {
 372                  // Segment is already included into $this->_segmentInfos
 373                  unset($segments[$segInfo->getName()]);
 374              } else {
 375                  // remove deleted segment from a list
 376                  unset($this->_segmentInfos[$segId]);
 377              }
 378          }
 379          // $segments contains a list of segments to load
 380          // do it later
 381  
 382          foreach ($this->_newSegments as $segName => $segmentInfo) {
 383              $newSegmentFile->writeString($segName);
 384              $newSegmentFile->writeInt($segmentInfo->count());
 385  
 386              $this->_segmentInfos[] = $segmentInfo;
 387          }
 388          $this->_newSegments = array();
 389  
 390          $newSegmentFile->seek($numOfSegmentsOffset);
 391          $newSegmentFile->writeInt($segmentsCount);  // Update segments count
 392          $newSegmentFile->close();
 393          $this->_directory->renameFile('segments.new', 'segments');
 394  
 395  
 396          // Segments file update is finished
 397          // Switch back to shared lock mode
 398          $lock->lock(LOCK_SH);
 399  
 400  
 401          $fileList = $this->_directory->fileList();
 402          foreach ($this->_segmentsToDelete as $nameToDelete) {
 403              foreach (self::$_indexExtensions as $ext) {
 404                  if ($this->_directory->fileExists($nameToDelete . $ext)) {
 405                      $this->_directory->deleteFile($nameToDelete . $ext);
 406                  }
 407              }
 408  
 409              foreach ($fileList as $file) {
 410                  if (substr($file, 0, strlen($nameToDelete) + 2) == ($nameToDelete . '.f') &&
 411                      ctype_digit( substr($file, strlen($nameToDelete) + 2) )) {
 412                          $this->_directory->deleteFile($file);
 413                      }
 414              }
 415          }
 416          $this->_segmentsToDelete = array();
 417  
 418          // Load segments, created by other process
 419          foreach ($segments as $segName => $segSize) {
 420              // Load new segments
 421              $this->_segmentInfos[] = new Zend_Search_Lucene_Index_SegmentInfo($segName,
 422                                                                                $segSize,
 423                                                                                $this->_directory);
 424          }
 425      }
 426  
 427  
 428      /**
 429       * Commit current changes
 430       */
 431      public function commit()
 432      {
 433          if ($this->_currentSegment !== null) {
 434              $newSegment = $this->_currentSegment->close();
 435              if ($newSegment !== null) {
 436                  $this->_newSegments[$newSegment->getName()] = $newSegment;
 437              }
 438              $this->_currentSegment = null;
 439          }
 440  
 441          if (count($this->_newSegments)      != 0 ||
 442              count($this->_segmentsToDelete) != 0) {
 443              $this->_updateSegments();
 444          }
 445      }
 446  
 447  
 448      /**
 449       * Merges the provided indexes into this index.
 450       *
 451       * @param array $readers
 452       * @return void
 453       */
 454      public function addIndexes($readers)
 455      {
 456          /**
 457           * @todo implementation
 458           */
 459      }
 460  
 461      /**
 462       * Merges all segments together into a single segment, optimizing
 463       * an index for search.
 464       * Input is an array of Zend_Search_Lucene_Index_SegmentInfo objects
 465       *
 466       * @throws Zend_Search_Lucene_Exception
 467       */
 468      public function optimize()
 469      {
 470          $this->_mergeSegments($this->_segmentInfos);
 471      }
 472  
 473      /**
 474       * Get name for new segment
 475       *
 476       * @return string
 477       */
 478      private function _newSegmentName()
 479      {
 480          // Do not share file handler to get file updates from other sessions.
 481          $segmentsFile = $this->_directory->getFileObject('segments', false);
 482  
 483          // Get exclusive segments file lock
 484          // We have guarantee, that we will not intersect with _updateSegments() call
 485          // of other process, because it needs exclusive index lock and waits
 486          // until all other searchers won't stop
 487          if (!$segmentsFile->lock(LOCK_EX)) {
 488              throw new Zend_Search_Lucene_Exception('Can\'t obtain exclusive index lock');
 489          }
 490  
 491          $segmentsFile->seek(12); // 12 = 4 (int, file format marker) + 8 (long, index version)
 492          $segmentNameCounter = $segmentsFile->readInt();
 493  
 494          $segmentsFile->seek(12); // 12 = 4 (int, file format marker) + 8 (long, index version)
 495          $segmentsFile->writeInt($segmentNameCounter + 1);
 496  
 497          // Flash output to guarantee that wrong value will not be loaded between unlock and
 498          // return (which calls $segmentsFile destructor)
 499          $segmentsFile->flush();
 500  
 501          $segmentsFile->unlock();
 502  
 503          return '_' . base_convert($segmentNameCounter, 10, 36);
 504      }
 505  
 506  }


Generated: Wed Jan 14 11:33:29 2009 Cross-referenced by PHPXref 0.7