| [ Index ] |
PHP Cross Reference of Moodle 1.9.3 [Build 15-Oct-2008] |
[Summary view] [Print] [Text view]
1 <?php 2 /** 3 * Zend Framework 4 * 5 * LICENSE 6 * 7 * This source file is subject to the new BSD license that is bundled 8 * with this package in the file LICENSE.txt. 9 * It is also available through the world-wide-web at this URL: 10 * http://framework.zend.com/license/new-bsd 11 * If you did not receive a copy of the license and are unable to 12 * obtain it through the world-wide-web, please send an email 13 * to license@zend.com so we can send you a copy immediately. 14 * 15 * @category Zend 16 * @package Zend_Search_Lucene 17 * @subpackage Index 18 * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com) 19 * @license http://framework.zend.com/license/new-bsd New BSD License 20 */ 21 22 23 /** Zend_Search_Lucene_Exception */ 24 require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Exception.php'; 25 26 /** Zend_Search_Lucene_Analysis_Analyzer */ 27 require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Analysis/Analyzer.php'; 28 29 /** Zend_Search_Lucene_Index_SegmentWriter */ 30 require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Index/SegmentWriter.php'; 31 32 33 /** 34 * @category Zend 35 * @package Zend_Search_Lucene 36 * @subpackage Index 37 * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com) 38 * @license http://framework.zend.com/license/new-bsd New BSD License 39 */ 40 class Zend_Search_Lucene_Index_SegmentWriter_DocumentWriter extends Zend_Search_Lucene_Index_SegmentWriter 41 { 42 /** 43 * Term Dictionary 44 * Array of the Zend_Search_Lucene_Index_Term objects 45 * Corresponding Zend_Search_Lucene_Index_TermInfo object stored in the $_termDictionaryInfos 46 * 47 * @var array 48 */ 49 protected $_termDictionary; 50 51 /** 52 * Documents, which contain the term 53 * 54 * @var array 55 */ 56 protected $_termDocs; 57 58 /** 59 * Object constructor. 60 * 61 * @param Zend_Search_Lucene_Storage_Directory $directory 62 * @param string $name 63 */ 64 public function __construct(Zend_Search_Lucene_Storage_Directory $directory, $name) 65 { 66 parent::__construct($directory, $name); 67 68 $this->_termDocs = array(); 69 $this->_termDictionary = array(); 70 } 71 72 73 /** 74 * Adds a document to this segment. 75 * 76 * @param Zend_Search_Lucene_Document $document 77 * @throws Zend_Search_Lucene_Exception 78 */ 79 public function addDocument(Zend_Search_Lucene_Document $document) 80 { 81 $storedFields = array(); 82 $docNorms = array(); 83 $similarity = Zend_Search_Lucene_Search_Similarity::getDefault(); 84 85 foreach ($document->getFieldNames() as $fieldName) { 86 $field = $document->getField($fieldName); 87 $this->addField($field); 88 89 if ($field->storeTermVector) { 90 /** 91 * @todo term vector storing support 92 */ 93 throw new Zend_Search_Lucene_Exception('Store term vector functionality is not supported yet.'); 94 } 95 96 if ($field->isIndexed) { 97 if ($field->isTokenized) { 98 $analyzer = Zend_Search_Lucene_Analysis_Analyzer::getDefault(); 99 $analyzer->setInput($field->value, $field->encoding); 100 101 $position = 0; 102 $tokenCounter = 0; 103 while (($token = $analyzer->nextToken()) !== null) { 104 $tokenCounter++; 105 106 $term = new Zend_Search_Lucene_Index_Term($token->getTermText(), $field->name); 107 $termKey = $term->key(); 108 109 if (!isset($this->_termDictionary[$termKey])) { 110 // New term 111 $this->_termDictionary[$termKey] = $term; 112 $this->_termDocs[$termKey] = array(); 113 $this->_termDocs[$termKey][$this->_docCount] = array(); 114 } else if (!isset($this->_termDocs[$termKey][$this->_docCount])) { 115 // Existing term, but new term entry 116 $this->_termDocs[$termKey][$this->_docCount] = array(); 117 } 118 $position += $token->getPositionIncrement(); 119 $this->_termDocs[$termKey][$this->_docCount][] = $position; 120 } 121 122 $docNorms[$field->name] = chr($similarity->encodeNorm( $similarity->lengthNorm($field->name, 123 $tokenCounter)* 124 $document->boost* 125 $field->boost )); 126 } else { 127 $term = new Zend_Search_Lucene_Index_Term($field->getUtf8Value(), $field->name); 128 $termKey = $term->key(); 129 130 if (!isset($this->_termDictionary[$termKey])) { 131 // New term 132 $this->_termDictionary[$termKey] = $term; 133 $this->_termDocs[$termKey] = array(); 134 $this->_termDocs[$termKey][$this->_docCount] = array(); 135 } else if (!isset($this->_termDocs[$termKey][$this->_docCount])) { 136 // Existing term, but new term entry 137 $this->_termDocs[$termKey][$this->_docCount] = array(); 138 } 139 $this->_termDocs[$termKey][$this->_docCount][] = 0; // position 140 141 $docNorms[$field->name] = chr($similarity->encodeNorm( $similarity->lengthNorm($field->name, 1)* 142 $document->boost* 143 $field->boost )); 144 } 145 } 146 147 if ($field->isStored) { 148 $storedFields[] = $field; 149 } 150 } 151 152 153 foreach ($this->_fields as $fieldName => $field) { 154 if (!$field->isIndexed) { 155 continue; 156 } 157 158 if (!isset($this->_norms[$fieldName])) { 159 $this->_norms[$fieldName] = str_repeat(chr($similarity->encodeNorm( $similarity->lengthNorm($fieldName, 0) )), 160 $this->_docCount); 161 } 162 163 if (isset($docNorms[$fieldName])){ 164 $this->_norms[$fieldName] .= $docNorms[$fieldName]; 165 } else { 166 $this->_norms[$fieldName] .= chr($similarity->encodeNorm( $similarity->lengthNorm($fieldName, 0) )); 167 } 168 } 169 170 $this->addStoredFields($storedFields); 171 } 172 173 174 /** 175 * Dump Term Dictionary (.tis) and Term Dictionary Index (.tii) segment files 176 */ 177 protected function _dumpDictionary() 178 { 179 ksort($this->_termDictionary, SORT_STRING); 180 181 $this->initializeDictionaryFiles(); 182 183 foreach ($this->_termDictionary as $termId => $term) { 184 $this->addTerm($term, $this->_termDocs[$termId]); 185 } 186 187 $this->closeDictionaryFiles(); 188 } 189 190 191 /** 192 * Close segment, write it to disk and return segment info 193 * 194 * @return Zend_Search_Lucene_Index_SegmentInfo 195 */ 196 public function close() 197 { 198 if ($this->_docCount == 0) { 199 return null; 200 } 201 202 $this->_dumpFNM(); 203 $this->_dumpDictionary(); 204 205 $this->_generateCFS(); 206 207 return new Zend_Search_Lucene_Index_SegmentInfo($this->_name, 208 $this->_docCount, 209 $this->_directory); 210 } 211 212 } 213
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
| Generated: Wed Jan 14 11:33:29 2009 | Cross-referenced by PHPXref 0.7 |