[ Index ]

PHP Cross Reference of Moodle 1.9.3 [Build 15-Oct-2008]

title

Body

[close]

/search/Zend/Search/Lucene/Index/SegmentWriter/ -> DocumentWriter.php (source)

   1  <?php
   2  /**
   3   * Zend Framework
   4   *
   5   * LICENSE
   6   *
   7   * This source file is subject to the new BSD license that is bundled
   8   * with this package in the file LICENSE.txt.
   9   * It is also available through the world-wide-web at this URL:
  10   * http://framework.zend.com/license/new-bsd
  11   * If you did not receive a copy of the license and are unable to
  12   * obtain it through the world-wide-web, please send an email
  13   * to license@zend.com so we can send you a copy immediately.
  14   *
  15   * @category   Zend
  16   * @package    Zend_Search_Lucene
  17   * @subpackage Index
  18   * @copyright  Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
  19   * @license    http://framework.zend.com/license/new-bsd     New BSD License
  20   */
  21  
  22  
  23  /** Zend_Search_Lucene_Exception */
  24  require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Exception.php';
  25  
  26  /** Zend_Search_Lucene_Analysis_Analyzer */
  27  require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Analysis/Analyzer.php';
  28  
  29  /** Zend_Search_Lucene_Index_SegmentWriter */
  30  require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Index/SegmentWriter.php';
  31  
  32  
  33  /**
  34   * @category   Zend
  35   * @package    Zend_Search_Lucene
  36   * @subpackage Index
  37   * @copyright  Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
  38   * @license    http://framework.zend.com/license/new-bsd     New BSD License
  39   */
  40  class Zend_Search_Lucene_Index_SegmentWriter_DocumentWriter extends Zend_Search_Lucene_Index_SegmentWriter
  41  {
  42      /**
  43       * Term Dictionary
  44       * Array of the Zend_Search_Lucene_Index_Term objects
  45       * Corresponding Zend_Search_Lucene_Index_TermInfo object stored in the $_termDictionaryInfos
  46       *
  47       * @var array
  48       */
  49      protected $_termDictionary;
  50  
  51      /**
  52       * Documents, which contain the term
  53       *
  54       * @var array
  55       */
  56      protected $_termDocs;
  57  
  58      /**
  59       * Object constructor.
  60       *
  61       * @param Zend_Search_Lucene_Storage_Directory $directory
  62       * @param string $name
  63       */
  64      public function __construct(Zend_Search_Lucene_Storage_Directory $directory, $name)
  65      {
  66          parent::__construct($directory, $name);
  67  
  68          $this->_termDocs       = array();
  69          $this->_termDictionary = array();
  70      }
  71  
  72  
  73      /**
  74       * Adds a document to this segment.
  75       *
  76       * @param Zend_Search_Lucene_Document $document
  77       * @throws Zend_Search_Lucene_Exception
  78       */
  79      public function addDocument(Zend_Search_Lucene_Document $document)
  80      {
  81          $storedFields = array();
  82          $docNorms     = array();
  83          $similarity   = Zend_Search_Lucene_Search_Similarity::getDefault();
  84  
  85          foreach ($document->getFieldNames() as $fieldName) {
  86              $field = $document->getField($fieldName);
  87              $this->addField($field);
  88  
  89              if ($field->storeTermVector) {
  90                  /**
  91                   * @todo term vector storing support
  92                   */
  93                  throw new Zend_Search_Lucene_Exception('Store term vector functionality is not supported yet.');
  94              }
  95  
  96              if ($field->isIndexed) {
  97                  if ($field->isTokenized) {
  98                      $analyzer = Zend_Search_Lucene_Analysis_Analyzer::getDefault();
  99                      $analyzer->setInput($field->value, $field->encoding);
 100  
 101                      $position     = 0;
 102                      $tokenCounter = 0;
 103                      while (($token = $analyzer->nextToken()) !== null) {
 104                          $tokenCounter++;
 105  
 106                          $term = new Zend_Search_Lucene_Index_Term($token->getTermText(), $field->name);
 107                          $termKey = $term->key();
 108  
 109                          if (!isset($this->_termDictionary[$termKey])) {
 110                              // New term
 111                              $this->_termDictionary[$termKey] = $term;
 112                              $this->_termDocs[$termKey] = array();
 113                              $this->_termDocs[$termKey][$this->_docCount] = array();
 114                          } else if (!isset($this->_termDocs[$termKey][$this->_docCount])) {
 115                              // Existing term, but new term entry
 116                              $this->_termDocs[$termKey][$this->_docCount] = array();
 117                          }
 118                          $position += $token->getPositionIncrement();
 119                          $this->_termDocs[$termKey][$this->_docCount][] = $position;
 120                      }
 121  
 122                      $docNorms[$field->name] = chr($similarity->encodeNorm( $similarity->lengthNorm($field->name,
 123                                                                                                     $tokenCounter)*
 124                                                                             $document->boost*
 125                                                                             $field->boost ));
 126                  } else {
 127                      $term = new Zend_Search_Lucene_Index_Term($field->getUtf8Value(), $field->name);
 128                      $termKey = $term->key();
 129  
 130                      if (!isset($this->_termDictionary[$termKey])) {
 131                          // New term
 132                          $this->_termDictionary[$termKey] = $term;
 133                          $this->_termDocs[$termKey] = array();
 134                          $this->_termDocs[$termKey][$this->_docCount] = array();
 135                      } else if (!isset($this->_termDocs[$termKey][$this->_docCount])) {
 136                          // Existing term, but new term entry
 137                          $this->_termDocs[$termKey][$this->_docCount] = array();
 138                      }
 139                      $this->_termDocs[$termKey][$this->_docCount][] = 0; // position
 140  
 141                      $docNorms[$field->name] = chr($similarity->encodeNorm( $similarity->lengthNorm($field->name, 1)*
 142                                                                             $document->boost*
 143                                                                             $field->boost ));
 144                  }
 145              }
 146  
 147              if ($field->isStored) {
 148                  $storedFields[] = $field;
 149              }
 150          }
 151  
 152  
 153          foreach ($this->_fields as $fieldName => $field) {
 154              if (!$field->isIndexed) {
 155                  continue;
 156              }
 157  
 158              if (!isset($this->_norms[$fieldName])) {
 159                  $this->_norms[$fieldName] = str_repeat(chr($similarity->encodeNorm( $similarity->lengthNorm($fieldName, 0) )),
 160                                                         $this->_docCount);
 161              }
 162  
 163              if (isset($docNorms[$fieldName])){
 164                  $this->_norms[$fieldName] .= $docNorms[$fieldName];
 165              } else {
 166                  $this->_norms[$fieldName] .= chr($similarity->encodeNorm( $similarity->lengthNorm($fieldName, 0) ));
 167              }
 168          }
 169  
 170          $this->addStoredFields($storedFields);
 171      }
 172  
 173  
 174      /**
 175       * Dump Term Dictionary (.tis) and Term Dictionary Index (.tii) segment files
 176       */
 177      protected function _dumpDictionary()
 178      {
 179          ksort($this->_termDictionary, SORT_STRING);
 180  
 181          $this->initializeDictionaryFiles();
 182  
 183          foreach ($this->_termDictionary as $termId => $term) {
 184              $this->addTerm($term, $this->_termDocs[$termId]);
 185          }
 186  
 187          $this->closeDictionaryFiles();
 188      }
 189  
 190  
 191      /**
 192       * Close segment, write it to disk and return segment info
 193       *
 194       * @return Zend_Search_Lucene_Index_SegmentInfo
 195       */
 196      public function close()
 197      {
 198          if ($this->_docCount == 0) {
 199              return null;
 200          }
 201  
 202          $this->_dumpFNM();
 203          $this->_dumpDictionary();
 204  
 205          $this->_generateCFS();
 206  
 207          return new Zend_Search_Lucene_Index_SegmentInfo($this->_name,
 208                                                          $this->_docCount,
 209                                                          $this->_directory);
 210      }
 211  
 212  }
 213  


Generated: Wed Jan 14 11:33:29 2009 Cross-referenced by PHPXref 0.7