| [ Index ] |
PHP Cross Reference of Moodle 1.9.3 [Build 15-Oct-2008] |
[Summary view] [Print] [Text view]
1 <?php 2 /** 3 * Zend Framework 4 * 5 * LICENSE 6 * 7 * This source file is subject to the new BSD license that is bundled 8 * with this package in the file LICENSE.txt. 9 * It is also available through the world-wide-web at this URL: 10 * http://framework.zend.com/license/new-bsd 11 * If you did not receive a copy of the license and are unable to 12 * obtain it through the world-wide-web, please send an email 13 * to license@zend.com so we can send you a copy immediately. 14 * 15 * @category Zend 16 * @package Zend_Search_Lucene 17 * @subpackage Index 18 * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com) 19 * @license http://framework.zend.com/license/new-bsd New BSD License 20 */ 21 22 23 /** Zend_Search_Lucene_Index_SegmentWriter_ */ 24 require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Index/SegmentWriter/DocumentWriter.php'; 25 26 /** Zend_Search_Lucene_Index_SegmentInfo */ 27 require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Index/SegmentInfo.php'; 28 29 /** Zend_Search_Lucene_Index_SegmentMerger */ 30 require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Index/SegmentMerger.php'; 31 32 33 34 /** 35 * @category Zend 36 * @package Zend_Search_Lucene 37 * @subpackage Index 38 * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com) 39 * @license http://framework.zend.com/license/new-bsd New BSD License 40 */ 41 class Zend_Search_Lucene_Index_Writer 42 { 43 /** 44 * @todo Implement Analyzer substitution 45 * @todo Implement Zend_Search_Lucene_Storage_DirectoryRAM and Zend_Search_Lucene_Storage_FileRAM to use it for 46 * temporary index files 47 * @todo Directory lock processing 48 */ 49 50 /** 51 * Number of documents required before the buffered in-memory 52 * documents are written into a new Segment 53 * 54 * Default value is 10 55 * 56 * @var integer 57 */ 58 public $maxBufferedDocs = 10; 59 60 /** 61 * Largest number of documents ever merged by addDocument(). 62 * Small values (e.g., less than 10,000) are best for interactive indexing, 63 * as this limits the length of pauses while indexing to a few seconds. 64 * Larger values are best for batched indexing and speedier searches. 65 * 66 * Default value is PHP_INT_MAX 67 * 68 * @var integer 69 */ 70 public $maxMergeDocs = PHP_INT_MAX; 71 72 /** 73 * Determines how often segment indices are merged by addDocument(). 74 * 75 * With smaller values, less RAM is used while indexing, 76 * and searches on unoptimized indices are faster, 77 * but indexing speed is slower. 78 * 79 * With larger values, more RAM is used during indexing, 80 * and while searches on unoptimized indices are slower, 81 * indexing is faster. 82 * 83 * Thus larger values (> 10) are best for batch index creation, 84 * and smaller values (< 10) for indices that are interactively maintained. 85 * 86 * Default value is 10 87 * 88 * @var integer 89 */ 90 public $mergeFactor = 10; 91 92 /** 93 * File system adapter. 94 * 95 * @var Zend_Search_Lucene_Storage_Directory 96 */ 97 private $_directory = null; 98 99 100 /** 101 * Changes counter. 102 * 103 * @var integer 104 */ 105 private $_versionUpdate = 0; 106 107 /** 108 * List of the segments, created by index writer 109 * Array of Zend_Search_Lucene_Index_SegmentInfo objects 110 * 111 * @var array 112 */ 113 private $_newSegments = array(); 114 115 /** 116 * List of segments to be deleted on commit 117 * 118 * @var array 119 */ 120 private $_segmentsToDelete = array(); 121 122 /** 123 * Current segment to add documents 124 * 125 * @var Zend_Search_Lucene_Index_SegmentWriter_DocumentWriter 126 */ 127 private $_currentSegment = null; 128 129 /** 130 * Array of Zend_Search_Lucene_Index_SegmentInfo objects for this index. 131 * 132 * It's a reference to the corresponding Zend_Search_Lucene::$_segmentInfos array 133 * 134 * @var array Zend_Search_Lucene_Index_SegmentInfo 135 */ 136 private $_segmentInfos; 137 138 /** 139 * List of indexfiles extensions 140 * 141 * @var array 142 */ 143 private static $_indexExtensions = array('.cfs' => '.cfs', 144 '.fnm' => '.fnm', 145 '.fdx' => '.fdx', 146 '.fdt' => '.fdt', 147 '.tis' => '.tis', 148 '.tii' => '.tii', 149 '.frq' => '.frq', 150 '.prx' => '.prx', 151 '.tvx' => '.tvx', 152 '.tvd' => '.tvd', 153 '.tvf' => '.tvf', 154 '.del' => '.del', 155 '.sti' => '.sti' ); 156 157 /** 158 * Opens the index for writing 159 * 160 * IndexWriter constructor needs Directory as a parameter. It should be 161 * a string with a path to the index folder or a Directory object. 162 * Second constructor parameter create is optional - true to create the 163 * index or overwrite the existing one. 164 * 165 * @param Zend_Search_Lucene_Storage_Directory $directory 166 * @param array $segmentInfos 167 * @param boolean $create 168 */ 169 public function __construct(Zend_Search_Lucene_Storage_Directory $directory, &$segmentInfos, $create = false) 170 { 171 $this->_directory = $directory; 172 $this->_segmentInfos = &$segmentInfos; 173 174 if ($create) { 175 foreach ($this->_directory->fileList() as $file) { 176 if ($file == 'deletable' || 177 $file == 'segments' || 178 isset(self::$_indexExtensions[ substr($file, strlen($file)-4)]) || 179 preg_match('/\.f\d+$/i', $file) /* matches <segment_name>.f<decimal_nmber> file names */) { 180 $this->_directory->deleteFile($file); 181 } 182 } 183 $segmentsFile = $this->_directory->createFile('segments'); 184 $segmentsFile->writeInt((int)0xFFFFFFFF); 185 186 // write version (is initialized by current time 187 // $segmentsFile->writeLong((int)microtime(true)); 188 $version = microtime(true); 189 $segmentsFile->writeInt((int)($version/((double)0xFFFFFFFF + 1))); 190 $segmentsFile->writeInt((int)($version & 0xFFFFFFFF)); 191 192 // write name counter 193 $segmentsFile->writeInt(0); 194 // write segment counter 195 $segmentsFile->writeInt(0); 196 197 $deletableFile = $this->_directory->createFile('deletable'); 198 // write counter 199 $deletableFile->writeInt(0); 200 } else { 201 $segmentsFile = $this->_directory->getFileObject('segments'); 202 $format = $segmentsFile->readInt(); 203 if ($format != (int)0xFFFFFFFF) { 204 throw new Zend_Search_Lucene_Exception('Wrong segments file format'); 205 } 206 } 207 } 208 209 /** 210 * Adds a document to this index. 211 * 212 * @param Zend_Search_Lucene_Document $document 213 */ 214 public function addDocument(Zend_Search_Lucene_Document $document) 215 { 216 if ($this->_currentSegment === null) { 217 $this->_currentSegment = 218 new Zend_Search_Lucene_Index_SegmentWriter_DocumentWriter($this->_directory, $this->_newSegmentName()); 219 } 220 $this->_currentSegment->addDocument($document); 221 222 if ($this->_currentSegment->count() >= $this->maxBufferedDocs) { 223 $this->commit(); 224 } 225 226 $this->_versionUpdate++; 227 228 $this->_maybeMergeSegments(); 229 } 230 231 232 /** 233 * Merge segments if necessary 234 */ 235 private function _maybeMergeSegments() 236 { 237 $segmentSizes = array(); 238 foreach ($this->_segmentInfos as $segId => $segmentInfo) { 239 $segmentSizes[$segId] = $segmentInfo->count(); 240 } 241 242 $mergePool = array(); 243 $poolSize = 0; 244 $sizeToMerge = $this->maxBufferedDocs; 245 asort($segmentSizes, SORT_NUMERIC); 246 foreach ($segmentSizes as $segId => $size) { 247 // Check, if segment comes into a new merging block 248 while ($size >= $sizeToMerge) { 249 // Merge previous block if it's large enough 250 if ($poolSize >= $sizeToMerge) { 251 $this->_mergeSegments($mergePool); 252 } 253 $mergePool = array(); 254 $poolSize = 0; 255 256 $sizeToMerge *= $this->mergeFactor; 257 258 if ($sizeToMerge > $this->maxMergeDocs) { 259 return; 260 } 261 } 262 263 $mergePool[] = $this->_segmentInfos[$segId]; 264 $poolSize += $size; 265 } 266 267 if ($poolSize >= $sizeToMerge) { 268 $this->_mergeSegments($mergePool); 269 } 270 } 271 272 /** 273 * Merge specified segments 274 * 275 * $segments is an array of SegmentInfo objects 276 * 277 * @param array $segments 278 */ 279 private function _mergeSegments($segments) 280 { 281 // Try to get exclusive non-blocking lock to the 'index.optimization.lock' 282 // Skip optimization if it's performed by other process right now 283 $optimizationLock = $this->_directory->createFile('index.optimization.lock'); 284 if (!$optimizationLock->lock(LOCK_EX,true)) { 285 return; 286 } 287 288 $newName = $this->_newSegmentName(); 289 $merger = new Zend_Search_Lucene_Index_SegmentMerger($this->_directory, 290 $newName); 291 foreach ($segments as $segmentInfo) { 292 $merger->addSource($segmentInfo); 293 $this->_segmentsToDelete[$segmentInfo->getName()] = $segmentInfo->getName(); 294 } 295 296 $newSegment = $merger->merge(); 297 if ($newSegment !== null) { 298 $this->_newSegments[$newSegment->getName()] = $newSegment; 299 } 300 301 $this->commit(); 302 303 // optimization is finished 304 $optimizationLock->unlock(); 305 } 306 307 /** 308 * Update segments file by adding current segment to a list 309 * 310 * @throws Zend_Search_Lucene_Exception 311 */ 312 private function _updateSegments() 313 { 314 // Get an exclusive index lock 315 // Wait, until all parallel searchers or indexers won't stop 316 // and stop all next searchers, while we are updating segments file 317 $lock = $this->_directory->getFileObject('index.lock'); 318 if (!$lock->lock(LOCK_EX)) { 319 throw new Zend_Search_Lucene_Exception('Can\'t obtain exclusive index lock'); 320 } 321 322 323 // Do not share file handlers to get file updates from other sessions. 324 $segmentsFile = $this->_directory->getFileObject('segments', false); 325 $newSegmentFile = $this->_directory->createFile('segments.new', false); 326 327 // Write format marker 328 $newSegmentFile->writeInt((int)0xFFFFFFFF); 329 330 // Write index version 331 $segmentsFile->seek(4, SEEK_CUR); 332 // $version = $segmentsFile->readLong() + $this->_versionUpdate; 333 // Process version on 32-bit platforms 334 $versionHigh = $segmentsFile->readInt(); 335 $versionLow = $segmentsFile->readInt(); 336 $version = $versionHigh * ((double)0xFFFFFFFF + 1) + 337 (($versionLow < 0)? (double)0xFFFFFFFF - (-1 - $versionLow) : $versionLow); 338 $version += $this->_versionUpdate; 339 $this->_versionUpdate = 0; 340 $newSegmentFile->writeInt((int)($version/((double)0xFFFFFFFF + 1))); 341 $newSegmentFile->writeInt((int)($version & 0xFFFFFFFF)); 342 343 // Write segment name counter 344 $newSegmentFile->writeInt($segmentsFile->readInt()); 345 346 // Get number of segments offset 347 $numOfSegmentsOffset = $newSegmentFile->tell(); 348 // Write number of segemnts 349 $segmentsCount = $segmentsFile->readInt(); 350 $newSegmentFile->writeInt(0); // Write dummy data (segment counter) 351 352 $segments = array(); 353 for ($count = 0; $count < $segmentsCount; $count++) { 354 $segName = $segmentsFile->readString(); 355 $segSize = $segmentsFile->readInt(); 356 357 if (!in_array($segName, $this->_segmentsToDelete)) { 358 $newSegmentFile->writeString($segName); 359 $newSegmentFile->writeInt($segSize); 360 361 $segments[$segName] = $segSize; 362 } 363 } 364 $segmentsFile->close(); 365 366 $segmentsCount = count($segments) + count($this->_newSegments); 367 368 // Remove segments, not listed in $segments (deleted) 369 // Load segments, not listed in $this->_segmentInfos 370 foreach ($this->_segmentInfos as $segId => $segInfo) { 371 if (isset($segments[$segInfo->getName()])) { 372 // Segment is already included into $this->_segmentInfos 373 unset($segments[$segInfo->getName()]); 374 } else { 375 // remove deleted segment from a list 376 unset($this->_segmentInfos[$segId]); 377 } 378 } 379 // $segments contains a list of segments to load 380 // do it later 381 382 foreach ($this->_newSegments as $segName => $segmentInfo) { 383 $newSegmentFile->writeString($segName); 384 $newSegmentFile->writeInt($segmentInfo->count()); 385 386 $this->_segmentInfos[] = $segmentInfo; 387 } 388 $this->_newSegments = array(); 389 390 $newSegmentFile->seek($numOfSegmentsOffset); 391 $newSegmentFile->writeInt($segmentsCount); // Update segments count 392 $newSegmentFile->close(); 393 $this->_directory->renameFile('segments.new', 'segments'); 394 395 396 // Segments file update is finished 397 // Switch back to shared lock mode 398 $lock->lock(LOCK_SH); 399 400 401 $fileList = $this->_directory->fileList(); 402 foreach ($this->_segmentsToDelete as $nameToDelete) { 403 foreach (self::$_indexExtensions as $ext) { 404 if ($this->_directory->fileExists($nameToDelete . $ext)) { 405 $this->_directory->deleteFile($nameToDelete . $ext); 406 } 407 } 408 409 foreach ($fileList as $file) { 410 if (substr($file, 0, strlen($nameToDelete) + 2) == ($nameToDelete . '.f') && 411 ctype_digit( substr($file, strlen($nameToDelete) + 2) )) { 412 $this->_directory->deleteFile($file); 413 } 414 } 415 } 416 $this->_segmentsToDelete = array(); 417 418 // Load segments, created by other process 419 foreach ($segments as $segName => $segSize) { 420 // Load new segments 421 $this->_segmentInfos[] = new Zend_Search_Lucene_Index_SegmentInfo($segName, 422 $segSize, 423 $this->_directory); 424 } 425 } 426 427 428 /** 429 * Commit current changes 430 */ 431 public function commit() 432 { 433 if ($this->_currentSegment !== null) { 434 $newSegment = $this->_currentSegment->close(); 435 if ($newSegment !== null) { 436 $this->_newSegments[$newSegment->getName()] = $newSegment; 437 } 438 $this->_currentSegment = null; 439 } 440 441 if (count($this->_newSegments) != 0 || 442 count($this->_segmentsToDelete) != 0) { 443 $this->_updateSegments(); 444 } 445 } 446 447 448 /** 449 * Merges the provided indexes into this index. 450 * 451 * @param array $readers 452 * @return void 453 */ 454 public function addIndexes($readers) 455 { 456 /** 457 * @todo implementation 458 */ 459 } 460 461 /** 462 * Merges all segments together into a single segment, optimizing 463 * an index for search. 464 * Input is an array of Zend_Search_Lucene_Index_SegmentInfo objects 465 * 466 * @throws Zend_Search_Lucene_Exception 467 */ 468 public function optimize() 469 { 470 $this->_mergeSegments($this->_segmentInfos); 471 } 472 473 /** 474 * Get name for new segment 475 * 476 * @return string 477 */ 478 private function _newSegmentName() 479 { 480 // Do not share file handler to get file updates from other sessions. 481 $segmentsFile = $this->_directory->getFileObject('segments', false); 482 483 // Get exclusive segments file lock 484 // We have guarantee, that we will not intersect with _updateSegments() call 485 // of other process, because it needs exclusive index lock and waits 486 // until all other searchers won't stop 487 if (!$segmentsFile->lock(LOCK_EX)) { 488 throw new Zend_Search_Lucene_Exception('Can\'t obtain exclusive index lock'); 489 } 490 491 $segmentsFile->seek(12); // 12 = 4 (int, file format marker) + 8 (long, index version) 492 $segmentNameCounter = $segmentsFile->readInt(); 493 494 $segmentsFile->seek(12); // 12 = 4 (int, file format marker) + 8 (long, index version) 495 $segmentsFile->writeInt($segmentNameCounter + 1); 496 497 // Flash output to guarantee that wrong value will not be loaded between unlock and 498 // return (which calls $segmentsFile destructor) 499 $segmentsFile->flush(); 500 501 $segmentsFile->unlock(); 502 503 return '_' . base_convert($segmentNameCounter, 10, 36); 504 } 505 506 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
| Generated: Wed Jan 14 11:33:29 2009 | Cross-referenced by PHPXref 0.7 |