[ Index ]

PHP Cross Reference of Moodle 1.9.3 [Build 15-Oct-2008]

title

Body

[close]

/search/Zend/Search/Lucene/Index/ -> SegmentMerger.php (source)

   1  <?php
   2  /**
   3   * Zend Framework
   4   *
   5   * LICENSE
   6   *
   7   * This source file is subject to the new BSD license that is bundled
   8   * with this package in the file LICENSE.txt.
   9   * It is also available through the world-wide-web at this URL:
  10   * http://framework.zend.com/license/new-bsd
  11   * If you did not receive a copy of the license and are unable to
  12   * obtain it through the world-wide-web, please send an email
  13   * to license@zend.com so we can send you a copy immediately.
  14   *
  15   * @category   Zend
  16   * @package    Zend_Search_Lucene
  17   * @subpackage Index
  18   * @copyright  Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
  19   * @license    http://framework.zend.com/license/new-bsd     New BSD License
  20   */
  21  
  22  
  23  /** Zend_Search_Lucene_Exception */
  24  require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Exception.php';
  25  
  26  /** Zend_Search_Lucene_Index_SegmentInfo */
  27  require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Index/SegmentInfo.php';
  28  
  29  /** Zend_Search_Lucene_Index_SegmentWriter_StreamWriter */
  30  require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Index/SegmentWriter/StreamWriter.php';
  31  
  32  /** Zend_Search_Lucene_Index_SegmentInfoPriorityQueue */
  33  require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Index/SegmentInfoPriorityQueue.php';
  34  
  35  
  36  /**
  37   * @category   Zend
  38   * @package    Zend_Search_Lucene
  39   * @subpackage Index
  40   * @copyright  Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
  41   * @license    http://framework.zend.com/license/new-bsd     New BSD License
  42   */
  43  class Zend_Search_Lucene_Index_SegmentMerger
  44  {
  45      /**
  46       * Target segment writer
  47       *
  48       * @var Zend_Search_Lucene_Index_SegmentWriter_StreamWriter
  49       */
  50      private $_writer;
  51  
  52      /**
  53       * Number of docs in a new segment
  54       *
  55       * @var integer
  56       */
  57      private $_docCount;
  58  
  59      /**
  60       * A set of segments to be merged
  61       *
  62       * @var array Zend_Search_Lucene_Index_SegmentInfo
  63       */
  64      private $_segmentInfos = array();
  65  
  66      /**
  67       * Flag to signal, that merge is already done
  68       *
  69       * @var boolean
  70       */
  71      private $_mergeDone = false;
  72  
  73      /**
  74       * Field map
  75       * [<segment_name>][<field_number>] => <target_field_number>
  76       *
  77       * @var array
  78       */
  79      private $_fieldsMap = array();
  80  
  81  
  82  
  83      /**
  84       * Object constructor.
  85       *
  86       * Creates new segment merger with $directory as target to merge segments into
  87       * and $name as a name of new segment
  88       *
  89       * @param Zend_Search_Lucene_Storage_Directory $directory
  90       * @param string $name
  91       */
  92      public function __construct($directory, $name)
  93      {
  94          $this->_writer = new Zend_Search_Lucene_Index_SegmentWriter_StreamWriter($directory, $name);
  95      }
  96  
  97  
  98      /**
  99       * Add segmnet to a collection of segments to be merged
 100       *
 101       * @param Zend_Search_Lucene_Index_SegmentInfo $segment
 102       */
 103      public function addSource(Zend_Search_Lucene_Index_SegmentInfo $segmentInfo)
 104      {
 105          $this->_segmentInfos[$segmentInfo->getName()] = $segmentInfo;
 106      }
 107  
 108  
 109      /**
 110       * Do merge.
 111       *
 112       * Returns number of documents in newly created segment
 113       *
 114       * @return Zend_Search_Lucene_Index_SegmentInfo
 115       * @throws Zend_Search_Lucene_Exception
 116       */
 117      public function merge()
 118      {
 119          if ($this->_mergeDone) {
 120              throw new Zend_Search_Lucene_Exception('Merge is already done.');
 121          }
 122  
 123          if (count($this->_segmentInfos) < 1) {
 124              throw new Zend_Search_Lucene_Exception('Wrong number of segments to be merged ('
 125                                                   . count($this->_segmentInfos)
 126                                                   . ').');
 127          }
 128  
 129          $this->_mergeFields();
 130          $this->_mergeNorms();
 131          $this->_mergeStoredFields();
 132          $this->_mergeTerms();
 133  
 134          $this->_mergeDone = true;
 135  
 136          return $this->_writer->close();
 137      }
 138  
 139  
 140      /**
 141       * Merge fields information
 142       */
 143      private function _mergeFields()
 144      {
 145          foreach ($this->_segmentInfos as $segName => $segmentInfo) {
 146              foreach ($segmentInfo->getFieldInfos() as $fieldInfo) {
 147                  $this->_fieldsMap[$segName][$fieldInfo->number] = $this->_writer->addFieldInfo($fieldInfo);
 148              }
 149          }
 150      }
 151  
 152      /**
 153       * Merge field's normalization factors
 154       */
 155      private function _mergeNorms()
 156      {
 157          foreach ($this->_writer->getFieldInfos() as $fieldInfo) {
 158              if ($fieldInfo->isIndexed) {
 159                  foreach ($this->_segmentInfos as $segName => $segmentInfo) {
 160                      if ($segmentInfo->hasDeletions()) {
 161                          $srcNorm = $segmentInfo->normVector($fieldInfo->name);
 162                          $norm    = '';
 163                          $docs    = $segmentInfo->count();
 164                          for ($count = 0; $count < $docs; $count++) {
 165                              if (!$segmentInfo->isDeleted($count)) {
 166                                  $norm .= $srcNorm[$count];
 167                              }
 168                          }
 169                          $this->_writer->addNorm($fieldInfo->name, $norm);
 170                      } else {
 171                          $this->_writer->addNorm($fieldInfo->name, $segmentInfo->normVector($fieldInfo->name));
 172                      }
 173                  }
 174              }
 175          }
 176      }
 177  
 178      /**
 179       * Merge fields information
 180       */
 181      private function _mergeStoredFields()
 182      {
 183          $this->_docCount = 0;
 184  
 185          foreach ($this->_segmentInfos as $segName => $segmentInfo) {
 186              $fdtFile = $segmentInfo->openCompoundFile('.fdt');
 187  
 188              for ($count = 0; $count < $segmentInfo->count(); $count++) {
 189                  $fieldCount = $fdtFile->readVInt();
 190                  $storedFields = array();
 191  
 192                  for ($count2 = 0; $count2 < $fieldCount; $count2++) {
 193                      $fieldNum = $fdtFile->readVInt();
 194                      $bits = $fdtFile->readByte();
 195                      $fieldInfo = $segmentInfo->getField($fieldNum);
 196  
 197                      if (!($bits & 2)) { // Text data
 198                          $storedFields[] =
 199                                   new Zend_Search_Lucene_Field($fieldInfo->name,
 200                                                                $fdtFile->readString(),
 201                                                                'UTF-8',
 202                                                                true,
 203                                                                $fieldInfo->isIndexed,
 204                                                                $bits & 1 );
 205                      } else {            // Binary data
 206                          $storedFields[] =
 207                                   new Zend_Search_Lucene_Field($fieldInfo->name,
 208                                                                $fdtFile->readBinary(),
 209                                                                '',
 210                                                                true,
 211                                                                $fieldInfo->isIndexed,
 212                                                                $bits & 1,
 213                                                                true);
 214                      }
 215                  }
 216  
 217                  if (!$segmentInfo->isDeleted($count)) {
 218                      $this->_docCount++;
 219                      $this->_writer->addStoredFields($storedFields);
 220                  }
 221              }
 222          }
 223      }
 224  
 225  
 226      /**
 227       * Merge fields information
 228       */
 229      private function _mergeTerms()
 230      {
 231          $segmentInfoQueue = new Zend_Search_Lucene_Index_SegmentInfoPriorityQueue();
 232  
 233          $segmentStartId = 0;
 234          foreach ($this->_segmentInfos as $segName => $segmentInfo) {
 235              $segmentStartId = $segmentInfo->reset($segmentStartId, true);
 236  
 237              // Skip "empty" segments
 238              if ($segmentInfo->currentTerm() !== null) {
 239                  $segmentInfoQueue->put($segmentInfo);
 240              }
 241          }
 242  
 243          $this->_writer->initializeDictionaryFiles();
 244  
 245          $termDocs = array();
 246          while (($segmentInfo = $segmentInfoQueue->pop()) !== null) {
 247              // Merge positions array
 248              $termDocs += $segmentInfo->currentTermPositions();
 249  
 250              if ($segmentInfoQueue->top() === null ||
 251                  $segmentInfoQueue->top()->currentTerm()->key() !=
 252                              $segmentInfo->currentTerm()->key()) {
 253                  // We got new term
 254                  ksort($termDocs, SORT_NUMERIC);
 255  
 256                  // Add term if it's contained in any document
 257                  if (count($termDocs) > 0) {
 258                      $this->_writer->addTerm($segmentInfo->currentTerm(), $termDocs);
 259                  }
 260                  $termDocs = array();
 261              }
 262  
 263              $segmentInfo->nextTerm();
 264              // check, if segment dictionary is finished
 265              if ($segmentInfo->currentTerm() !== null) {
 266                  // Put segment back into the priority queue
 267                  $segmentInfoQueue->put($segmentInfo);
 268              }
 269          }
 270  
 271          $this->_writer->closeDictionaryFiles();
 272      }
 273  }


Generated: Wed Jan 14 11:33:29 2009 Cross-referenced by PHPXref 0.7