| [ Index ] |
PHP Cross Reference of Moodle 1.9.3 [Build 15-Oct-2008] |
[Summary view] [Print] [Text view]
1 <?php 2 /** 3 * Zend Framework 4 * 5 * LICENSE 6 * 7 * This source file is subject to the new BSD license that is bundled 8 * with this package in the file LICENSE.txt. 9 * It is also available through the world-wide-web at this URL: 10 * http://framework.zend.com/license/new-bsd 11 * If you did not receive a copy of the license and are unable to 12 * obtain it through the world-wide-web, please send an email 13 * to license@zend.com so we can send you a copy immediately. 14 * 15 * @category Zend 16 * @package Zend_Search_Lucene 17 * @subpackage Index 18 * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com) 19 * @license http://framework.zend.com/license/new-bsd New BSD License 20 */ 21 22 23 /** Zend_Search_Lucene_Exception */ 24 require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Exception.php'; 25 26 /** Zend_Search_Lucene_Index_SegmentInfo */ 27 require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Index/SegmentInfo.php'; 28 29 /** Zend_Search_Lucene_Index_SegmentWriter_StreamWriter */ 30 require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Index/SegmentWriter/StreamWriter.php'; 31 32 /** Zend_Search_Lucene_Index_SegmentInfoPriorityQueue */ 33 require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Index/SegmentInfoPriorityQueue.php'; 34 35 36 /** 37 * @category Zend 38 * @package Zend_Search_Lucene 39 * @subpackage Index 40 * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com) 41 * @license http://framework.zend.com/license/new-bsd New BSD License 42 */ 43 class Zend_Search_Lucene_Index_SegmentMerger 44 { 45 /** 46 * Target segment writer 47 * 48 * @var Zend_Search_Lucene_Index_SegmentWriter_StreamWriter 49 */ 50 private $_writer; 51 52 /** 53 * Number of docs in a new segment 54 * 55 * @var integer 56 */ 57 private $_docCount; 58 59 /** 60 * A set of segments to be merged 61 * 62 * @var array Zend_Search_Lucene_Index_SegmentInfo 63 */ 64 private $_segmentInfos = array(); 65 66 /** 67 * Flag to signal, that merge is already done 68 * 69 * @var boolean 70 */ 71 private $_mergeDone = false; 72 73 /** 74 * Field map 75 * [<segment_name>][<field_number>] => <target_field_number> 76 * 77 * @var array 78 */ 79 private $_fieldsMap = array(); 80 81 82 83 /** 84 * Object constructor. 85 * 86 * Creates new segment merger with $directory as target to merge segments into 87 * and $name as a name of new segment 88 * 89 * @param Zend_Search_Lucene_Storage_Directory $directory 90 * @param string $name 91 */ 92 public function __construct($directory, $name) 93 { 94 $this->_writer = new Zend_Search_Lucene_Index_SegmentWriter_StreamWriter($directory, $name); 95 } 96 97 98 /** 99 * Add segmnet to a collection of segments to be merged 100 * 101 * @param Zend_Search_Lucene_Index_SegmentInfo $segment 102 */ 103 public function addSource(Zend_Search_Lucene_Index_SegmentInfo $segmentInfo) 104 { 105 $this->_segmentInfos[$segmentInfo->getName()] = $segmentInfo; 106 } 107 108 109 /** 110 * Do merge. 111 * 112 * Returns number of documents in newly created segment 113 * 114 * @return Zend_Search_Lucene_Index_SegmentInfo 115 * @throws Zend_Search_Lucene_Exception 116 */ 117 public function merge() 118 { 119 if ($this->_mergeDone) { 120 throw new Zend_Search_Lucene_Exception('Merge is already done.'); 121 } 122 123 if (count($this->_segmentInfos) < 1) { 124 throw new Zend_Search_Lucene_Exception('Wrong number of segments to be merged (' 125 . count($this->_segmentInfos) 126 . ').'); 127 } 128 129 $this->_mergeFields(); 130 $this->_mergeNorms(); 131 $this->_mergeStoredFields(); 132 $this->_mergeTerms(); 133 134 $this->_mergeDone = true; 135 136 return $this->_writer->close(); 137 } 138 139 140 /** 141 * Merge fields information 142 */ 143 private function _mergeFields() 144 { 145 foreach ($this->_segmentInfos as $segName => $segmentInfo) { 146 foreach ($segmentInfo->getFieldInfos() as $fieldInfo) { 147 $this->_fieldsMap[$segName][$fieldInfo->number] = $this->_writer->addFieldInfo($fieldInfo); 148 } 149 } 150 } 151 152 /** 153 * Merge field's normalization factors 154 */ 155 private function _mergeNorms() 156 { 157 foreach ($this->_writer->getFieldInfos() as $fieldInfo) { 158 if ($fieldInfo->isIndexed) { 159 foreach ($this->_segmentInfos as $segName => $segmentInfo) { 160 if ($segmentInfo->hasDeletions()) { 161 $srcNorm = $segmentInfo->normVector($fieldInfo->name); 162 $norm = ''; 163 $docs = $segmentInfo->count(); 164 for ($count = 0; $count < $docs; $count++) { 165 if (!$segmentInfo->isDeleted($count)) { 166 $norm .= $srcNorm[$count]; 167 } 168 } 169 $this->_writer->addNorm($fieldInfo->name, $norm); 170 } else { 171 $this->_writer->addNorm($fieldInfo->name, $segmentInfo->normVector($fieldInfo->name)); 172 } 173 } 174 } 175 } 176 } 177 178 /** 179 * Merge fields information 180 */ 181 private function _mergeStoredFields() 182 { 183 $this->_docCount = 0; 184 185 foreach ($this->_segmentInfos as $segName => $segmentInfo) { 186 $fdtFile = $segmentInfo->openCompoundFile('.fdt'); 187 188 for ($count = 0; $count < $segmentInfo->count(); $count++) { 189 $fieldCount = $fdtFile->readVInt(); 190 $storedFields = array(); 191 192 for ($count2 = 0; $count2 < $fieldCount; $count2++) { 193 $fieldNum = $fdtFile->readVInt(); 194 $bits = $fdtFile->readByte(); 195 $fieldInfo = $segmentInfo->getField($fieldNum); 196 197 if (!($bits & 2)) { // Text data 198 $storedFields[] = 199 new Zend_Search_Lucene_Field($fieldInfo->name, 200 $fdtFile->readString(), 201 'UTF-8', 202 true, 203 $fieldInfo->isIndexed, 204 $bits & 1 ); 205 } else { // Binary data 206 $storedFields[] = 207 new Zend_Search_Lucene_Field($fieldInfo->name, 208 $fdtFile->readBinary(), 209 '', 210 true, 211 $fieldInfo->isIndexed, 212 $bits & 1, 213 true); 214 } 215 } 216 217 if (!$segmentInfo->isDeleted($count)) { 218 $this->_docCount++; 219 $this->_writer->addStoredFields($storedFields); 220 } 221 } 222 } 223 } 224 225 226 /** 227 * Merge fields information 228 */ 229 private function _mergeTerms() 230 { 231 $segmentInfoQueue = new Zend_Search_Lucene_Index_SegmentInfoPriorityQueue(); 232 233 $segmentStartId = 0; 234 foreach ($this->_segmentInfos as $segName => $segmentInfo) { 235 $segmentStartId = $segmentInfo->reset($segmentStartId, true); 236 237 // Skip "empty" segments 238 if ($segmentInfo->currentTerm() !== null) { 239 $segmentInfoQueue->put($segmentInfo); 240 } 241 } 242 243 $this->_writer->initializeDictionaryFiles(); 244 245 $termDocs = array(); 246 while (($segmentInfo = $segmentInfoQueue->pop()) !== null) { 247 // Merge positions array 248 $termDocs += $segmentInfo->currentTermPositions(); 249 250 if ($segmentInfoQueue->top() === null || 251 $segmentInfoQueue->top()->currentTerm()->key() != 252 $segmentInfo->currentTerm()->key()) { 253 // We got new term 254 ksort($termDocs, SORT_NUMERIC); 255 256 // Add term if it's contained in any document 257 if (count($termDocs) > 0) { 258 $this->_writer->addTerm($segmentInfo->currentTerm(), $termDocs); 259 } 260 $termDocs = array(); 261 } 262 263 $segmentInfo->nextTerm(); 264 // check, if segment dictionary is finished 265 if ($segmentInfo->currentTerm() !== null) { 266 // Put segment back into the priority queue 267 $segmentInfoQueue->put($segmentInfo); 268 } 269 } 270 271 $this->_writer->closeDictionaryFiles(); 272 } 273 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
| Generated: Wed Jan 14 11:33:29 2009 | Cross-referenced by PHPXref 0.7 |