| [ Index ] |
PHP Cross Reference of Moodle 1.9.3 [Build 15-Oct-2008] |
[Summary view] [Print] [Text view]
1 <?php 2 /** 3 * Zend Framework 4 * 5 * LICENSE 6 * 7 * This source file is subject to the new BSD license that is bundled 8 * with this package in the file LICENSE.txt. 9 * It is also available through the world-wide-web at this URL: 10 * http://framework.zend.com/license/new-bsd 11 * If you did not receive a copy of the license and are unable to 12 * obtain it through the world-wide-web, please send an email 13 * to license@zend.com so we can send you a copy immediately. 14 * 15 * @category Zend 16 * @package Zend_Search_Lucene 17 * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com) 18 * @license http://framework.zend.com/license/new-bsd New BSD License 19 */ 20 21 22 /** Zend_Search_Lucene_Exception */ 23 require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Exception.php'; 24 25 /** Zend_Search_Lucene_Document */ 26 require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Document.php'; 27 28 /** Zend_Search_Lucene_Document_Html */ 29 require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Document/Html.php'; 30 31 /** Zend_Search_Lucene_Storage_Directory */ 32 require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Storage/Directory/Filesystem.php'; 33 34 /** Zend_Search_Lucene_Storage_File_Memory */ 35 require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Storage/File/Memory.php'; 36 37 /** Zend_Search_Lucene_Index_Term */ 38 require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Index/Term.php'; 39 40 /** Zend_Search_Lucene_Index_TermInfo */ 41 require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Index/TermInfo.php'; 42 43 /** Zend_Search_Lucene_Index_SegmentInfo */ 44 require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Index/SegmentInfo.php'; 45 46 /** Zend_Search_Lucene_Index_FieldInfo */ 47 require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Index/FieldInfo.php'; 48 49 /** Zend_Search_Lucene_Index_Writer */ 50 require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Index/Writer.php'; 51 52 /** Zend_Search_Lucene_Search_QueryParser */ 53 require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/QueryParser.php'; 54 55 /** Zend_Search_Lucene_Search_QueryHit */ 56 require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/QueryHit.php'; 57 58 /** Zend_Search_Lucene_Search_Similarity */ 59 require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/Similarity.php'; 60 61 /** Zend_Search_Lucene_Index_SegmentInfoPriorityQueue */ 62 require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Index/SegmentInfoPriorityQueue.php'; 63 64 65 /** Zend_Search_Lucene_Interface */ 66 require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Interface.php'; 67 68 /** Zend_Search_Lucene_Proxy */ 69 require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Proxy.php'; 70 71 72 /** 73 * @category Zend 74 * @package Zend_Search_Lucene 75 * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com) 76 * @license http://framework.zend.com/license/new-bsd New BSD License 77 */ 78 class Zend_Search_Lucene implements Zend_Search_Lucene_Interface 79 { 80 /** 81 * Default field name for search 82 * 83 * Null means search through all fields 84 * 85 * @var string 86 */ 87 private static $_defaultSearchField = null; 88 89 /** 90 * File system adapter. 91 * 92 * @var Zend_Search_Lucene_Storage_Directory 93 */ 94 private $_directory = null; 95 96 /** 97 * File system adapter closing option 98 * 99 * @var boolean 100 */ 101 private $_closeDirOnExit = true; 102 103 /** 104 * Writer for this index, not instantiated unless required. 105 * 106 * @var Zend_Search_Lucene_Index_Writer 107 */ 108 private $_writer = null; 109 110 /** 111 * Array of Zend_Search_Lucene_Index_SegmentInfo objects for this index. 112 * 113 * @var array Zend_Search_Lucene_Index_SegmentInfo 114 */ 115 private $_segmentInfos = array(); 116 117 /** 118 * Number of documents in this index. 119 * 120 * @var integer 121 */ 122 private $_docCount = 0; 123 124 /** 125 * Flag for index changes 126 * 127 * @var boolean 128 */ 129 private $_hasChanges = false; 130 131 132 /** 133 * Index lock object 134 * 135 * @var Zend_Search_Lucene_Storage_File 136 */ 137 private $_lock; 138 139 /** 140 * Signal, that index is already closed, changes are fixed and resources are cleaned up 141 * 142 * @var boolean 143 */ 144 private $_closed = false; 145 146 /** 147 * Number of references to the index object 148 * 149 * @var integer 150 */ 151 private $_refCount = 0; 152 153 154 /** 155 * Create index 156 * 157 * @param mixed $directory 158 * @return Zend_Search_Lucene_Interface 159 */ 160 public static function create($directory) 161 { 162 return new Zend_Search_Lucene_Proxy(new Zend_Search_Lucene($directory, true)); 163 } 164 165 /** 166 * Open index 167 * 168 * @param mixed $directory 169 * @return Zend_Search_Lucene_Interface 170 */ 171 public static function open($directory) 172 { 173 return new Zend_Search_Lucene_Proxy(new Zend_Search_Lucene($directory, false)); 174 } 175 176 /** 177 * Opens the index. 178 * 179 * IndexReader constructor needs Directory as a parameter. It should be 180 * a string with a path to the index folder or a Directory object. 181 * 182 * @param mixed $directory 183 * @throws Zend_Search_Lucene_Exception 184 */ 185 public function __construct($directory = null, $create = false) 186 { 187 if ($directory === null) { 188 throw new Zend_Search_Exception('No index directory specified'); 189 } 190 191 if ($directory instanceof Zend_Search_Lucene_Storage_Directory_Filesystem) { 192 $this->_directory = $directory; 193 $this->_closeDirOnExit = false; 194 } else { 195 $this->_directory = new Zend_Search_Lucene_Storage_Directory_Filesystem($directory); 196 $this->_closeDirOnExit = true; 197 } 198 199 200 // Get a shared lock to the index 201 $this->_lock = $this->_directory->createFile('index.lock'); 202 203 $this->_segmentInfos = array(); 204 205 if ($create) { 206 // Throw an exception if index is under processing now 207 if (!$this->_lock->lock(LOCK_EX, true)) { 208 throw new Zend_Search_Lucene_Exception('Can\'t create index. It\'s under processing now'); 209 } 210 211 // Writer will create segments file for empty segments list 212 $this->_writer = new Zend_Search_Lucene_Index_Writer($this->_directory, $this->_segmentInfos, true); 213 214 if (!$this->_lock->lock(LOCK_SH)) { 215 throw new Zend_Search_Lucene_Exception('Can\'t reduce lock level from Exclusive to Shared'); 216 } 217 } else { 218 // Wait if index is under switching from one set of segments to another (Index_Writer::_updateSegments()) 219 if (!$this->_lock->lock(LOCK_SH)) { 220 throw new Zend_Search_Lucene_Exception('Can\'t obtain shared index lock'); 221 } 222 $this->_writer = null; 223 } 224 225 226 $segmentsFile = $this->_directory->getFileObject('segments'); 227 228 $format = $segmentsFile->readInt(); 229 230 if ($format != (int)0xFFFFFFFF) { 231 throw new Zend_Search_Lucene_Exception('Wrong segments file format'); 232 } 233 234 // read version 235 // $segmentsFile->readLong(); 236 $segmentsFile->readInt(); $segmentsFile->readInt(); 237 238 // read segment name counter 239 $segmentsFile->readInt(); 240 241 $segments = $segmentsFile->readInt(); 242 243 $this->_docCount = 0; 244 245 // read segmentInfos 246 for ($count = 0; $count < $segments; $count++) { 247 $segName = $segmentsFile->readString(); 248 $segSize = $segmentsFile->readInt(); 249 $this->_docCount += $segSize; 250 251 $this->_segmentInfos[] = 252 new Zend_Search_Lucene_Index_SegmentInfo($segName, 253 $segSize, 254 $this->_directory); 255 } 256 } 257 258 /** 259 * Close current index and free resources 260 */ 261 private function _close() 262 { 263 if ($this->_closed) { 264 // index is already closed and resources are cleaned up 265 return; 266 } 267 268 $this->commit(); 269 270 // Free shared lock 271 $this->_lock->unlock(); 272 273 if ($this->_closeDirOnExit) { 274 $this->_directory->close(); 275 } 276 277 $this->_directory = null; 278 $this->_writer = null; 279 $this->_segmentInfos = null; 280 281 $this->_closed = true; 282 } 283 284 /** 285 * Add reference to the index object 286 * 287 * @internal 288 */ 289 public function addReference() 290 { 291 $this->_refCount++; 292 } 293 294 /** 295 * Remove reference from the index object 296 * 297 * When reference count becomes zero, index is closed and resources are cleaned up 298 * 299 * @internal 300 */ 301 public function removeReference() 302 { 303 $this->_refCount--; 304 305 if ($this->_refCount == 0) { 306 $this->_close(); 307 } 308 } 309 310 /** 311 * Object destructor 312 */ 313 public function __destruct() 314 { 315 $this->_close(); 316 } 317 318 /** 319 * Returns an instance of Zend_Search_Lucene_Index_Writer for the index 320 * 321 * @internal 322 * @return Zend_Search_Lucene_Index_Writer 323 */ 324 public function getIndexWriter() 325 { 326 if (!$this->_writer instanceof Zend_Search_Lucene_Index_Writer) { 327 $this->_writer = new Zend_Search_Lucene_Index_Writer($this->_directory, $this->_segmentInfos); 328 } 329 330 return $this->_writer; 331 } 332 333 334 /** 335 * Returns the Zend_Search_Lucene_Storage_Directory instance for this index. 336 * 337 * @return Zend_Search_Lucene_Storage_Directory 338 */ 339 public function getDirectory() 340 { 341 return $this->_directory; 342 } 343 344 345 /** 346 * Returns the total number of documents in this index (including deleted documents). 347 * 348 * @return integer 349 */ 350 public function count() 351 { 352 return $this->_docCount; 353 } 354 355 /** 356 * Returns one greater than the largest possible document number. 357 * This may be used to, e.g., determine how big to allocate a structure which will have 358 * an element for every document number in an index. 359 * 360 * @return integer 361 */ 362 public function maxDoc() 363 { 364 return $this->count(); 365 } 366 367 /** 368 * Returns the total number of non-deleted documents in this index. 369 * 370 * @return integer 371 */ 372 public function numDocs() 373 { 374 $numDocs = 0; 375 376 foreach ($this->_segmentInfos as $segmentInfo) { 377 $numDocs += $segmentInfo->numDocs(); 378 } 379 380 return $numDocs; 381 } 382 383 /** 384 * Checks, that document is deleted 385 * 386 * @param integer $id 387 * @return boolean 388 * @throws Zend_Search_Lucene_Exception Exception is thrown if $id is out of the range 389 */ 390 public function isDeleted($id) 391 { 392 if ($id >= $this->_docCount) { 393 throw new Zend_Search_Lucene_Exception('Document id is out of the range.'); 394 } 395 396 $segmentStartId = 0; 397 foreach ($this->_segmentInfos as $segmentInfo) { 398 if ($segmentStartId + $segmentInfo->count() > $id) { 399 break; 400 } 401 402 $segmentStartId += $segmentInfo->count(); 403 } 404 405 return $segmentInfo->isDeleted($id - $segmentStartId); 406 } 407 408 /** 409 * Set default search field. 410 * 411 * Null means, that search is performed through all fields by default 412 * 413 * Default value is null 414 * 415 * @param string $fieldName 416 */ 417 public static function setDefaultSearchField($fieldName) 418 { 419 self::$_defaultSearchField = $fieldName; 420 } 421 422 /** 423 * Get default search field. 424 * 425 * Null means, that search is performed through all fields by default 426 * 427 * @return string 428 */ 429 public static function getDefaultSearchField() 430 { 431 return self::$_defaultSearchField; 432 } 433 434 /** 435 * Retrieve index maxBufferedDocs option 436 * 437 * maxBufferedDocs is a minimal number of documents required before 438 * the buffered in-memory documents are written into a new Segment 439 * 440 * Default value is 10 441 * 442 * @return integer 443 */ 444 public function getMaxBufferedDocs() 445 { 446 return $this->getIndexWriter()->maxBufferedDocs; 447 } 448 449 /** 450 * Set index maxBufferedDocs option 451 * 452 * maxBufferedDocs is a minimal number of documents required before 453 * the buffered in-memory documents are written into a new Segment 454 * 455 * Default value is 10 456 * 457 * @param integer $maxBufferedDocs 458 */ 459 public function setMaxBufferedDocs($maxBufferedDocs) 460 { 461 $this->getIndexWriter()->maxBufferedDocs = $maxBufferedDocs; 462 } 463 464 /** 465 * Retrieve index maxMergeDocs option 466 * 467 * maxMergeDocs is a largest number of documents ever merged by addDocument(). 468 * Small values (e.g., less than 10,000) are best for interactive indexing, 469 * as this limits the length of pauses while indexing to a few seconds. 470 * Larger values are best for batched indexing and speedier searches. 471 * 472 * Default value is PHP_INT_MAX 473 * 474 * @return integer 475 */ 476 public function getMaxMergeDocs() 477 { 478 return $this->getIndexWriter()->maxMergeDocs; 479 } 480 481 /** 482 * Set index maxMergeDocs option 483 * 484 * maxMergeDocs is a largest number of documents ever merged by addDocument(). 485 * Small values (e.g., less than 10,000) are best for interactive indexing, 486 * as this limits the length of pauses while indexing to a few seconds. 487 * Larger values are best for batched indexing and speedier searches. 488 * 489 * Default value is PHP_INT_MAX 490 * 491 * @param integer $maxMergeDocs 492 */ 493 public function setMaxMergeDocs($maxMergeDocs) 494 { 495 $this->getIndexWriter()->maxMergeDocs = $maxMergeDocs; 496 } 497 498 /** 499 * Retrieve index mergeFactor option 500 * 501 * mergeFactor determines how often segment indices are merged by addDocument(). 502 * With smaller values, less RAM is used while indexing, 503 * and searches on unoptimized indices are faster, 504 * but indexing speed is slower. 505 * With larger values, more RAM is used during indexing, 506 * and while searches on unoptimized indices are slower, 507 * indexing is faster. 508 * Thus larger values (> 10) are best for batch index creation, 509 * and smaller values (< 10) for indices that are interactively maintained. 510 * 511 * Default value is 10 512 * 513 * @return integer 514 */ 515 public function getMergeFactor() 516 { 517 return $this->getIndexWriter()->mergeFactor; 518 } 519 520 /** 521 * Set index mergeFactor option 522 * 523 * mergeFactor determines how often segment indices are merged by addDocument(). 524 * With smaller values, less RAM is used while indexing, 525 * and searches on unoptimized indices are faster, 526 * but indexing speed is slower. 527 * With larger values, more RAM is used during indexing, 528 * and while searches on unoptimized indices are slower, 529 * indexing is faster. 530 * Thus larger values (> 10) are best for batch index creation, 531 * and smaller values (< 10) for indices that are interactively maintained. 532 * 533 * Default value is 10 534 * 535 * @param integer $maxMergeDocs 536 */ 537 public function setMergeFactor($mergeFactor) 538 { 539 $this->getIndexWriter()->mergeFactor = $mergeFactor; 540 } 541 542 /** 543 * Performs a query against the index and returns an array 544 * of Zend_Search_Lucene_Search_QueryHit objects. 545 * Input is a string or Zend_Search_Lucene_Search_Query. 546 * 547 * @param mixed $query 548 * @return array Zend_Search_Lucene_Search_QueryHit 549 * @throws Zend_Search_Lucene_Exception 550 */ 551 public function find($query) 552 { 553 if (is_string($query)) { 554 $query = Zend_Search_Lucene_Search_QueryParser::parse($query); 555 } 556 557 if (!$query instanceof Zend_Search_Lucene_Search_Query) { 558 throw new Zend_Search_Lucene_Exception('Query must be a string or Zend_Search_Lucene_Search_Query object'); 559 } 560 561 $this->commit(); 562 563 $hits = array(); 564 $scores = array(); 565 $ids = array(); 566 567 $query = $query->rewrite($this)->optimize($this); 568 569 $query->execute($this); 570 571 $topScore = 0; 572 573 foreach ($query->matchedDocs() as $id => $num) { 574 $docScore = $query->score($id, $this); 575 if( $docScore != 0 ) { 576 $hit = new Zend_Search_Lucene_Search_QueryHit($this); 577 $hit->id = $id; 578 $hit->score = $docScore; 579 580 $hits[] = $hit; 581 $ids[] = $id; 582 $scores[] = $docScore; 583 584 if ($docScore > $topScore) { 585 $topScore = $docScore; 586 } 587 } 588 } 589 590 if (count($hits) == 0) { 591 // skip sorting, which may cause a error on empty index 592 return array(); 593 } 594 595 if ($topScore > 1) { 596 foreach ($hits as $hit) { 597 $hit->score /= $topScore; 598 } 599 } 600 601 if (func_num_args() == 1) { 602 // sort by scores 603 array_multisort($scores, SORT_DESC, SORT_NUMERIC, 604 $ids, SORT_ASC, SORT_NUMERIC, 605 $hits); 606 } else { 607 // sort by given field names 608 609 $argList = func_get_args(); 610 $fieldNames = $this->getFieldNames(); 611 $sortArgs = array(); 612 613 for ($count = 1; $count < count($argList); $count++) { 614 $fieldName = $argList[$count]; 615 616 if (!is_string($fieldName)) { 617 throw new Zend_Search_Lucene_Exception('Field name must be a string.'); 618 } 619 620 if (!in_array($fieldName, $fieldNames)) { 621 throw new Zend_Search_Lucene_Exception('Wrong field name.'); 622 } 623 624 $valuesArray = array(); 625 foreach ($hits as $hit) { 626 try { 627 $value = $hit->getDocument()->getFieldValue($fieldName); 628 } catch (Zend_Search_Lucene_Exception $e) { 629 if (strpos($e->getMessage(), 'not found') === false) { 630 throw $e; 631 } else { 632 $value = null; 633 } 634 } 635 636 $valuesArray[] = $value; 637 } 638 639 $sortArgs[] = $valuesArray; 640 641 if ($count + 1 < count($argList) && is_integer($argList[$count+1])) { 642 $count++; 643 $sortArgs[] = $argList[$count]; 644 645 if ($count + 1 < count($argList) && is_integer($argList[$count+1])) { 646 $count++; 647 $sortArgs[] = $argList[$count]; 648 } else { 649 if ($argList[$count] == SORT_ASC || $argList[$count] == SORT_DESC) { 650 $sortArgs[] = SORT_REGULAR; 651 } else { 652 $sortArgs[] = SORT_ASC; 653 } 654 } 655 } else { 656 $sortArgs[] = SORT_ASC; 657 $sortArgs[] = SORT_REGULAR; 658 } 659 } 660 661 // Sort by id's if values are equal 662 $sortArgs[] = $ids; 663 $sortArgs[] = SORT_ASC; 664 $sortArgs[] = SORT_NUMERIC; 665 666 // Array to be sorted 667 $sortArgs[] = &$hits; 668 669 // Do sort 670 call_user_func_array('array_multisort', $sortArgs); 671 } 672 673 return $hits; 674 } 675 676 677 /** 678 * Returns a list of all unique field names that exist in this index. 679 * 680 * @param boolean $indexed 681 * @return array 682 */ 683 public function getFieldNames($indexed = false) 684 { 685 $result = array(); 686 foreach( $this->_segmentInfos as $segmentInfo ) { 687 $result = array_merge($result, $segmentInfo->getFields($indexed)); 688 } 689 return $result; 690 } 691 692 693 /** 694 * Returns a Zend_Search_Lucene_Document object for the document 695 * number $id in this index. 696 * 697 * @param integer|Zend_Search_Lucene_Search_QueryHit $id 698 * @return Zend_Search_Lucene_Document 699 */ 700 public function getDocument($id) 701 { 702 if ($id instanceof Zend_Search_Lucene_Search_QueryHit) { 703 /* @var $id Zend_Search_Lucene_Search_QueryHit */ 704 $id = $id->id; 705 } 706 707 if ($id >= $this->_docCount) { 708 throw new Zend_Search_Lucene_Exception('Document id is out of the range.'); 709 } 710 711 $segmentStartId = 0; 712 foreach ($this->_segmentInfos as $segmentInfo) { 713 if ($segmentStartId + $segmentInfo->count() > $id) { 714 break; 715 } 716 717 $segmentStartId += $segmentInfo->count(); 718 } 719 720 $fdxFile = $segmentInfo->openCompoundFile('.fdx'); 721 $fdxFile->seek( ($id-$segmentStartId)*8, SEEK_CUR ); 722 $fieldValuesPosition = $fdxFile->readLong(); 723 724 $fdtFile = $segmentInfo->openCompoundFile('.fdt'); 725 $fdtFile->seek($fieldValuesPosition, SEEK_CUR); 726 $fieldCount = $fdtFile->readVInt(); 727 728 $doc = new Zend_Search_Lucene_Document(); 729 for ($count = 0; $count < $fieldCount; $count++) { 730 $fieldNum = $fdtFile->readVInt(); 731 $bits = $fdtFile->readByte(); 732 733 $fieldInfo = $segmentInfo->getField($fieldNum); 734 735 if (!($bits & 2)) { // Text data 736 $field = new Zend_Search_Lucene_Field($fieldInfo->name, 737 $fdtFile->readString(), 738 'UTF-8', 739 true, 740 $fieldInfo->isIndexed, 741 $bits & 1 ); 742 } else { // Binary data 743 $field = new Zend_Search_Lucene_Field($fieldInfo->name, 744 $fdtFile->readBinary(), 745 '', 746 true, 747 $fieldInfo->isIndexed, 748 $bits & 1, 749 true ); 750 } 751 752 $doc->addField($field); 753 } 754 755 return $doc; 756 } 757 758 759 /** 760 * Returns true if index contain documents with specified term. 761 * 762 * Is used for query optimization. 763 * 764 * @param Zend_Search_Lucene_Index_Term $term 765 * @return boolean 766 */ 767 public function hasTerm(Zend_Search_Lucene_Index_Term $term) 768 { 769 foreach ($this->_segmentInfos as $segInfo) { 770 if ($segInfo->getTermInfo($term) instanceof Zend_Search_Lucene_Index_TermInfo) { 771 return true; 772 } 773 } 774 775 return false; 776 } 777 778 /** 779 * Returns IDs of all the documents containing term. 780 * 781 * @param Zend_Search_Lucene_Index_Term $term 782 * @return array 783 */ 784 public function termDocs(Zend_Search_Lucene_Index_Term $term) 785 { 786 $result = array(); 787 $segmentStartDocId = 0; 788 789 foreach ($this->_segmentInfos as $segInfo) { 790 $termInfo = $segInfo->getTermInfo($term); 791 792 if (!$termInfo instanceof Zend_Search_Lucene_Index_TermInfo) { 793 $segmentStartDocId += $segInfo->count(); 794 continue; 795 } 796 797 $frqFile = $segInfo->openCompoundFile('.frq'); 798 $frqFile->seek($termInfo->freqPointer,SEEK_CUR); 799 $docId = 0; 800 for( $count=0; $count < $termInfo->docFreq; $count++ ) { 801 $docDelta = $frqFile->readVInt(); 802 if( $docDelta % 2 == 1 ) { 803 $docId += ($docDelta-1)/2; 804 } else { 805 $docId += $docDelta/2; 806 // read freq 807 $frqFile->readVInt(); 808 } 809 810 $result[] = $segmentStartDocId + $docId; 811 } 812 813 $segmentStartDocId += $segInfo->count(); 814 } 815 816 return $result; 817 } 818 819 820 /** 821 * Returns an array of all term freqs. 822 * Result array structure: array(docId => freq, ...) 823 * 824 * @param Zend_Search_Lucene_Index_Term $term 825 * @return integer 826 */ 827 public function termFreqs(Zend_Search_Lucene_Index_Term $term) 828 { 829 $result = array(); 830 $segmentStartDocId = 0; 831 foreach ($this->_segmentInfos as $segmentInfo) { 832 $result += $segmentInfo->termFreqs($term, $segmentStartDocId); 833 834 $segmentStartDocId += $segmentInfo->count(); 835 } 836 837 return $result; 838 } 839 840 /** 841 * Returns an array of all term positions in the documents. 842 * Result array structure: array(docId => array(pos1, pos2, ...), ...) 843 * 844 * @param Zend_Search_Lucene_Index_Term $term 845 * @return array 846 */ 847 public function termPositions(Zend_Search_Lucene_Index_Term $term) 848 { 849 $result = array(); 850 $segmentStartDocId = 0; 851 foreach ($this->_segmentInfos as $segmentInfo) { 852 $result += $segmentInfo->termPositions($term, $segmentStartDocId); 853 854 $segmentStartDocId += $segmentInfo->count(); 855 } 856 857 return $result; 858 } 859 860 861 /** 862 * Returns the number of documents in this index containing the $term. 863 * 864 * @param Zend_Search_Lucene_Index_Term $term 865 * @return integer 866 */ 867 public function docFreq(Zend_Search_Lucene_Index_Term $term) 868 { 869 $result = 0; 870 foreach ($this->_segmentInfos as $segInfo) { 871 $termInfo = $segInfo->getTermInfo($term); 872 if ($termInfo !== null) { 873 $result += $termInfo->docFreq; 874 } 875 } 876 877 return $result; 878 } 879 880 881 /** 882 * Retrive similarity used by index reader 883 * 884 * @return Zend_Search_Lucene_Search_Similarity 885 */ 886 public function getSimilarity() 887 { 888 return Zend_Search_Lucene_Search_Similarity::getDefault(); 889 } 890 891 892 /** 893 * Returns a normalization factor for "field, document" pair. 894 * 895 * @param integer $id 896 * @param string $fieldName 897 * @return float 898 */ 899 public function norm($id, $fieldName) 900 { 901 if ($id >= $this->_docCount) { 902 return null; 903 } 904 905 $segmentStartId = 0; 906 foreach ($this->_segmentInfos as $segInfo) { 907 if ($segmentStartId + $segInfo->count() > $id) { 908 break; 909 } 910 911 $segmentStartId += $segInfo->count(); 912 } 913 914 if ($segInfo->isDeleted($id - $segmentStartId)) { 915 return 0; 916 } 917 918 return $segInfo->norm($id - $segmentStartId, $fieldName); 919 } 920 921 /** 922 * Returns true if any documents have been deleted from this index. 923 * 924 * @return boolean 925 */ 926 public function hasDeletions() 927 { 928 foreach ($this->_segmentInfos as $segmentInfo) { 929 if ($segmentInfo->hasDeletions()) { 930 return true; 931 } 932 } 933 934 return false; 935 } 936 937 938 /** 939 * Deletes a document from the index. 940 * $id is an internal document id 941 * 942 * @param integer|Zend_Search_Lucene_Search_QueryHit $id 943 * @throws Zend_Search_Lucene_Exception 944 */ 945 public function delete($id) 946 { 947 if ($id instanceof Zend_Search_Lucene_Search_QueryHit) { 948 /* @var $id Zend_Search_Lucene_Search_QueryHit */ 949 $id = $id->id; 950 } 951 952 if ($id >= $this->_docCount) { 953 throw new Zend_Search_Lucene_Exception('Document id is out of the range.'); 954 } 955 956 $segmentStartId = 0; 957 foreach ($this->_segmentInfos as $segmentInfo) { 958 if ($segmentStartId + $segmentInfo->count() > $id) { 959 break; 960 } 961 962 $segmentStartId += $segmentInfo->count(); 963 } 964 $segmentInfo->delete($id - $segmentStartId); 965 966 $this->_hasChanges = true; 967 } 968 969 970 971 /** 972 * Adds a document to this index. 973 * 974 * @param Zend_Search_Lucene_Document $document 975 */ 976 public function addDocument(Zend_Search_Lucene_Document $document) 977 { 978 $this->getIndexWriter()->addDocument($document); 979 $this->_docCount++; 980 } 981 982 983 /** 984 * Update document counter 985 */ 986 private function _updateDocCount() 987 { 988 $this->_docCount = 0; 989 foreach ($this->_segmentInfos as $segInfo) { 990 $this->_docCount += $segInfo->count(); 991 } 992 } 993 994 /** 995 * Commit changes resulting from delete() or undeleteAll() operations. 996 * 997 * @todo undeleteAll processing. 998 */ 999 public function commit() 1000 { 1001 if ($this->_hasChanges) { 1002 foreach ($this->_segmentInfos as $segInfo) { 1003 $segInfo->writeChanges(); 1004 } 1005 1006 $this->_hasChanges = false; 1007 } 1008 1009 if ($this->_writer !== null) { 1010 $this->_writer->commit(); 1011 1012 $this->_updateDocCount(); 1013 } 1014 } 1015 1016 1017 /** 1018 * Optimize index. 1019 * 1020 * Merges all segments into one 1021 */ 1022 public function optimize() 1023 { 1024 // Commit changes if any changes have been made 1025 $this->commit(); 1026 1027 if (count($this->_segmentInfos) > 1 || $this->hasDeletions()) { 1028 $this->getIndexWriter()->optimize(); 1029 $this->_updateDocCount(); 1030 } 1031 } 1032 1033 1034 /** 1035 * Returns an array of all terms in this index. 1036 * 1037 * @return array 1038 */ 1039 public function terms() 1040 { 1041 $result = array(); 1042 1043 $segmentInfoQueue = new Zend_Search_Lucene_Index_SegmentInfoPriorityQueue(); 1044 1045 foreach ($this->_segmentInfos as $segmentInfo) { 1046 $segmentInfo->reset(); 1047 1048 // Skip "empty" segments 1049 if ($segmentInfo->currentTerm() !== null) { 1050 $segmentInfoQueue->put($segmentInfo); 1051 } 1052 } 1053 1054 while (($segmentInfo = $segmentInfoQueue->pop()) !== null) { 1055 if ($segmentInfoQueue->top() === null || 1056 $segmentInfoQueue->top()->currentTerm()->key() != 1057 $segmentInfo->currentTerm()->key()) { 1058 // We got new term 1059 $result[] = $segmentInfo->currentTerm(); 1060 } 1061 1062 $segmentInfo->nextTerm(); 1063 // check, if segment dictionary is finished 1064 if ($segmentInfo->currentTerm() !== null) { 1065 // Put segment back into the priority queue 1066 $segmentInfoQueue->put($segmentInfo); 1067 } 1068 } 1069 1070 return $result; 1071 } 1072 1073 1074 /************************************************************************* 1075 @todo UNIMPLEMENTED 1076 *************************************************************************/ 1077 /** 1078 * Undeletes all documents currently marked as deleted in this index. 1079 * 1080 * @todo Implementation 1081 */ 1082 public function undeleteAll() 1083 {} 1084 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
| Generated: Wed Jan 14 11:33:29 2009 | Cross-referenced by PHPXref 0.7 |