[ Index ]

PHP Cross Reference of Moodle 1.9.3 [Build 15-Oct-2008]

title

Body

[close]

/search/Zend/Search/Lucene/Search/Query/ -> MultiTerm.php (source)

   1  <?php
   2  /**
   3   * Zend Framework
   4   *
   5   * LICENSE
   6   *
   7   * This source file is subject to the new BSD license that is bundled
   8   * with this package in the file LICENSE.txt.
   9   * It is also available through the world-wide-web at this URL:
  10   * http://framework.zend.com/license/new-bsd
  11   * If you did not receive a copy of the license and are unable to
  12   * obtain it through the world-wide-web, please send an email
  13   * to license@zend.com so we can send you a copy immediately.
  14   *
  15   * @category   Zend
  16   * @package    Zend_Search_Lucene
  17   * @subpackage Search
  18   * @copyright  Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
  19   * @license    http://framework.zend.com/license/new-bsd     New BSD License
  20   */
  21  
  22  
  23  /** Zend_Search_Lucene_Search_Query */
  24  require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/Query.php';
  25  
  26  /** Zend_Search_Lucene_Search_Weight_MultiTerm */
  27  require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/Weight/MultiTerm.php';
  28  
  29  
  30  /**
  31   * @category   Zend
  32   * @package    Zend_Search_Lucene
  33   * @subpackage Search
  34   * @copyright  Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
  35   * @license    http://framework.zend.com/license/new-bsd     New BSD License
  36   */
  37  class Zend_Search_Lucene_Search_Query_MultiTerm extends Zend_Search_Lucene_Search_Query
  38  {
  39  
  40      /**
  41       * Terms to find.
  42       * Array of Zend_Search_Lucene_Index_Term
  43       *
  44       * @var array
  45       */
  46      private $_terms = array();
  47  
  48      /**
  49       * Term signs.
  50       * If true then term is required.
  51       * If false then term is prohibited.
  52       * If null then term is neither prohibited, nor required
  53       *
  54       * If array is null then all terms are required
  55       *
  56       * @var array
  57       */
  58      private $_signs;
  59  
  60      /**
  61       * Result vector.
  62       *
  63       * @var array
  64       */
  65      private $_resVector = null;
  66  
  67      /**
  68       * Terms positions vectors.
  69       * Array of Arrays:
  70       * term1Id => (docId => freq, ...)
  71       * term2Id => (docId => freq, ...)
  72       *
  73       * @var array
  74       */
  75      private $_termsFreqs = array();
  76  
  77  
  78      /**
  79       * A score factor based on the fraction of all query terms
  80       * that a document contains.
  81       * float for conjunction queries
  82       * array of float for non conjunction queries
  83       *
  84       * @var mixed
  85       */
  86      private $_coord = null;
  87  
  88  
  89      /**
  90       * Terms weights
  91       * array of Zend_Search_Lucene_Search_Weight
  92       *
  93       * @var array
  94       */
  95      private $_weights = array();
  96  
  97  
  98      /**
  99       * Class constructor.  Create a new multi-term query object.
 100       *
 101       * if $signs array is omitted then all terms are required
 102       * it differs from addTerm() behavior, but should never be used
 103       *
 104       * @param array $terms    Array of Zend_Search_Lucene_Index_Term objects
 105       * @param array $signs    Array of signs.  Sign is boolean|null.
 106       * @return void
 107       */
 108      public function __construct($terms = null, $signs = null)
 109      {
 110          if (is_array($terms)) {
 111              $this->_terms = $terms;
 112  
 113              $this->_signs = null;
 114              // Check if all terms are required
 115              if (is_array($signs)) {
 116                  foreach ($signs as $sign ) {
 117                      if ($sign !== true) {
 118                          $this->_signs = $signs;
 119                          break;
 120                      }
 121                  }
 122              }
 123          }
 124      }
 125  
 126  
 127      /**
 128       * Add a $term (Zend_Search_Lucene_Index_Term) to this query.
 129       *
 130       * The sign is specified as:
 131       *     TRUE  - term is required
 132       *     FALSE - term is prohibited
 133       *     NULL  - term is neither prohibited, nor required
 134       *
 135       * @param  Zend_Search_Lucene_Index_Term $term
 136       * @param  boolean|null $sign
 137       * @return void
 138       */
 139      public function addTerm(Zend_Search_Lucene_Index_Term $term, $sign = null) {
 140          if ($sign !== true || $this->_signs !== null) {       // Skip, if all terms are required
 141              if ($this->_signs === null) {                     // Check, If all previous terms are required
 142                  foreach ($this->_terms as $prevTerm) {
 143                      $this->_signs[] = true;
 144                  }
 145              }
 146              $this->_signs[] = $sign;
 147          }
 148  
 149          $this->_terms[] = $term;
 150      }
 151  
 152  
 153      /**
 154       * Re-write query into primitive queries in the context of specified index
 155       *
 156       * @param Zend_Search_Lucene_Interface $index
 157       * @return Zend_Search_Lucene_Search_Query
 158       */
 159      public function rewrite(Zend_Search_Lucene_Interface $index)
 160      {
 161          if (count($this->_terms) == 0) {
 162              return new Zend_Search_Lucene_Search_Query_Empty();
 163          }
 164  
 165          // Check, that all fields are qualified
 166          $allQualified = true;
 167          foreach ($this->_terms as $term) {
 168              if ($term->field === null) {
 169                  $allQualified = false;
 170                  break;
 171              }
 172          }
 173  
 174          if ($allQualified) {
 175              return $this;
 176          } else {
 177              /** transform multiterm query to boolean and apply rewrite() method to subqueries. */
 178              $query = new Zend_Search_Lucene_Search_Query_Boolean();
 179              $query->setBoost($this->getBoost());
 180  
 181              foreach ($this->_terms as $termId => $term) {
 182                  $subquery = new Zend_Search_Lucene_Search_Query_Term($term);
 183  
 184                  $query->addSubquery($subquery->rewrite($index),
 185                                      ($this->_signs === null)?  true : $this->_signs[$termId]);
 186              }
 187  
 188              return $query;
 189          }
 190      }
 191  
 192      /**
 193       * Optimize query in the context of specified index
 194       *
 195       * @param Zend_Search_Lucene_Interface $index
 196       * @return Zend_Search_Lucene_Search_Query
 197       */
 198      public function optimize(Zend_Search_Lucene_Interface $index)
 199      {
 200          $terms = $this->_terms;
 201          $signs = $this->_signs;
 202  
 203          foreach ($terms as $id => $term) {
 204              if (!$index->hasTerm($term)) {
 205                  if ($signs === null  ||  $signs[$id] === true) {
 206                      // Term is required
 207                      return new Zend_Search_Lucene_Search_Query_Empty();
 208                  } else {
 209                      // Term is optional or prohibited
 210                      // Remove it from terms and signs list
 211                      unset($terms[$id]);
 212                      unset($signs[$id]);
 213                  }
 214              }
 215          }
 216  
 217          // Check if all presented terms are prohibited
 218          $allProhibited = true;
 219          if ($signs === null) {
 220              $allProhibited = false;
 221          } else {
 222              foreach ($signs as $sign) {
 223                  if ($sign !== false) {
 224                      $allProhibited = false;
 225                      break;
 226                  }
 227              }
 228          }
 229          if ($allProhibited) {
 230              return new Zend_Search_Lucene_Search_Query_Empty();
 231          }
 232  
 233          /**
 234           * @todo make an optimization for repeated terms
 235           * (they may have different signs)
 236           */
 237  
 238          if (count($terms) == 1) {
 239              // It's already checked, that it's not a prohibited term
 240  
 241              // It's one term query with one required or optional element
 242              $optimizedQuery = new Zend_Search_Lucene_Search_Query_Term(reset($terms));
 243              $optimizedQuery->setBoost($this->getBoost());
 244  
 245              return $optimizedQuery;
 246          }
 247  
 248          if (count($terms) == 0) {
 249              return new Zend_Search_Lucene_Search_Query_Empty();
 250          }
 251  
 252          $optimizedQuery = new Zend_Search_Lucene_Search_Query_MultiTerm($terms, $signs);
 253          $optimizedQuery->setBoost($this->getBoost());
 254          return $optimizedQuery;
 255      }
 256  
 257  
 258      /**
 259       * Returns query term
 260       *
 261       * @return array
 262       */
 263      public function getTerms()
 264      {
 265          return $this->_terms;
 266      }
 267  
 268  
 269      /**
 270       * Return terms signs
 271       *
 272       * @return array
 273       */
 274      public function getSigns()
 275      {
 276          return $this->_signs;
 277      }
 278  
 279  
 280      /**
 281       * Set weight for specified term
 282       *
 283       * @param integer $num
 284       * @param Zend_Search_Lucene_Search_Weight_Term $weight
 285       */
 286      public function setWeight($num, $weight)
 287      {
 288          $this->_weights[$num] = $weight;
 289      }
 290  
 291  
 292      /**
 293       * Constructs an appropriate Weight implementation for this query.
 294       *
 295       * @param Zend_Search_Lucene_Interface $reader
 296       * @return Zend_Search_Lucene_Search_Weight
 297       */
 298      public function createWeight(Zend_Search_Lucene_Interface $reader)
 299      {
 300          $this->_weight = new Zend_Search_Lucene_Search_Weight_MultiTerm($this, $reader);
 301          return $this->_weight;
 302      }
 303  
 304  
 305      /**
 306       * Calculate result vector for Conjunction query
 307       * (like '+something +another')
 308       *
 309       * @param Zend_Search_Lucene_Interface $reader
 310       */
 311      private function _calculateConjunctionResult(Zend_Search_Lucene_Interface $reader)
 312      {
 313          $this->_resVector = null;
 314  
 315          if (count($this->_terms) == 0) {
 316              $this->_resVector = array();
 317          }
 318  
 319          foreach( $this->_terms as $termId=>$term ) {
 320              if($this->_resVector === null) {
 321                  $this->_resVector = array_flip($reader->termDocs($term));
 322              } else {
 323                  $this->_resVector = array_intersect_key($this->_resVector, array_flip($reader->termDocs($term)));
 324              }
 325  
 326              if (count($this->_resVector) == 0) {
 327                  // Empty result set, we don't need to check other terms
 328                  break;
 329              }
 330  
 331              $this->_termsFreqs[$termId] = $reader->termFreqs($term);
 332          }
 333  
 334          ksort($this->_resVector, SORT_NUMERIC);
 335      }
 336  
 337  
 338      /**
 339       * Calculate result vector for non Conjunction query
 340       * (like '+something -another')
 341       *
 342       * @param Zend_Search_Lucene_Interface $reader
 343       */
 344      private function _calculateNonConjunctionResult(Zend_Search_Lucene_Interface $reader)
 345      {
 346          $required   = null;
 347          $optional   = array();
 348          $prohibited = array();
 349  
 350          foreach ($this->_terms as $termId => $term) {
 351              $termDocs = array_flip($reader->termDocs($term));
 352  
 353              if ($this->_signs[$termId] === true) {
 354                  // required
 355                  if ($required !== null) {
 356                      // array intersection
 357                      $required = array_intersect_key($required, $termDocs);
 358                  } else {
 359                      $required = $termDocs;
 360                  }
 361              } elseif ($this->_signs[$termId] === false) {
 362                  // prohibited
 363                  // array union
 364                  $prohibited += $termDocs;
 365              } else {
 366                  // neither required, nor prohibited
 367                  // array union
 368                  $optional += $termDocs;
 369              }
 370  
 371              $this->_termsFreqs[$termId] = $reader->termFreqs($term);
 372          }
 373  
 374          if ($required !== null) {
 375              $this->_resVector = (count($prohibited) > 0) ?
 376                                             array_diff_key($required, $prohibited) :
 377                                             $required;
 378          } else {
 379              $this->_resVector = (count($prohibited) > 0) ?
 380                                             array_diff_key($optional, $prohibited) :
 381                                             $optional;
 382          }
 383  
 384          ksort($this->_resVector, SORT_NUMERIC);
 385      }
 386  
 387  
 388      /**
 389       * Score calculator for conjunction queries (all terms are required)
 390       *
 391       * @param integer $docId
 392       * @param Zend_Search_Lucene_Interface $reader
 393       * @return float
 394       */
 395      public function _conjunctionScore($docId, Zend_Search_Lucene_Interface $reader)
 396      {
 397          if ($this->_coord === null) {
 398              $this->_coord = $reader->getSimilarity()->coord(count($this->_terms),
 399                                                              count($this->_terms) );
 400          }
 401  
 402          $score = 0.0;
 403  
 404          foreach ($this->_terms as $termId=>$term) {
 405              /**
 406               * We don't need to check that term freq is not 0
 407               * Score calculation is performed only for matched docs
 408               */
 409              $score += $reader->getSimilarity()->tf($this->_termsFreqs[$termId][$docId]) *
 410                        $this->_weights[$termId]->getValue() *
 411                        $reader->norm($docId, $term->field);
 412          }
 413  
 414          return $score * $this->_coord * $this->getBoost();
 415      }
 416  
 417  
 418      /**
 419       * Score calculator for non conjunction queries (not all terms are required)
 420       *
 421       * @param integer $docId
 422       * @param Zend_Search_Lucene_Interface $reader
 423       * @return float
 424       */
 425      public function _nonConjunctionScore($docId, $reader)
 426      {
 427          if ($this->_coord === null) {
 428              $this->_coord = array();
 429  
 430              $maxCoord = 0;
 431              foreach ($this->_signs as $sign) {
 432                  if ($sign !== false /* not prohibited */) {
 433                      $maxCoord++;
 434                  }
 435              }
 436  
 437              for ($count = 0; $count <= $maxCoord; $count++) {
 438                  $this->_coord[$count] = $reader->getSimilarity()->coord($count, $maxCoord);
 439              }
 440          }
 441  
 442          $score = 0.0;
 443          $matchedTerms = 0;
 444          foreach ($this->_terms as $termId=>$term) {
 445              // Check if term is
 446              if ($this->_signs[$termId] !== false &&        // not prohibited
 447                  isset($this->_termsFreqs[$termId][$docId]) // matched
 448                 ) {
 449                  $matchedTerms++;
 450  
 451                  /**
 452                   * We don't need to check that term freq is not 0
 453                   * Score calculation is performed only for matched docs
 454                   */
 455                  $score +=
 456                        $reader->getSimilarity()->tf($this->_termsFreqs[$termId][$docId]) *
 457                        $this->_weights[$termId]->getValue() *
 458                        $reader->norm($docId, $term->field);
 459              }
 460          }
 461  
 462          return $score * $this->_coord[$matchedTerms] * $this->getBoost();
 463      }
 464  
 465      /**
 466       * Execute query in context of index reader
 467       * It also initializes necessary internal structures
 468       *
 469       * @param Zend_Search_Lucene_Interface $reader
 470       */
 471      public function execute(Zend_Search_Lucene_Interface $reader)
 472      {
 473          if ($this->_signs === null) {
 474              $this->_calculateConjunctionResult($reader);
 475          } else {
 476              $this->_calculateNonConjunctionResult($reader);
 477          }
 478  
 479          // Initialize weight if it's not done yet
 480          $this->_initWeight($reader);
 481      }
 482  
 483      /**
 484       * Get document ids likely matching the query
 485       *
 486       * It's an array with document ids as keys (performance considerations)
 487       *
 488       * @return array
 489       */
 490      public function matchedDocs()
 491      {
 492          return $this->_resVector;
 493      }
 494  
 495      /**
 496       * Score specified document
 497       *
 498       * @param integer $docId
 499       * @param Zend_Search_Lucene_Interface $reader
 500       * @return float
 501       */
 502      public function score($docId, Zend_Search_Lucene_Interface $reader)
 503      {
 504          if (isset($this->_resVector[$docId])) {
 505              if ($this->_signs === null) {
 506                  return $this->_conjunctionScore($docId, $reader);
 507              } else {
 508                  return $this->_nonConjunctionScore($docId, $reader);
 509              }
 510          } else {
 511              return 0;
 512          }
 513      }
 514  
 515      /**
 516       * Return query terms
 517       *
 518       * @return array
 519       */
 520      public function getQueryTerms()
 521      {
 522          if ($this->_signs === null) {
 523              return $this->_terms;
 524          }
 525  
 526          $terms = array();
 527  
 528          foreach ($this->_signs as $id => $sign) {
 529              if ($sign !== false) {
 530                  $terms[] = $this->_terms[$id];
 531              }
 532          }
 533  
 534          return $terms;
 535      }
 536  
 537      /**
 538       * Highlight query terms
 539       *
 540       * @param integer &$colorIndex
 541       * @param Zend_Search_Lucene_Document_Html $doc
 542       */
 543      public function highlightMatchesDOM(Zend_Search_Lucene_Document_Html $doc, &$colorIndex)
 544      {
 545          $words = array();
 546  
 547          if ($this->_signs === null) {
 548              foreach ($this->_terms as $term) {
 549                  $words[] = $term->text;
 550              }
 551          } else {
 552              foreach ($this->_signs as $id => $sign) {
 553                  if ($sign !== false) {
 554                      $words[] = $this->_terms[$id]->text;
 555                  }
 556              }
 557          }
 558  
 559          $doc->highlight($words, $this->_getHighlightColor($colorIndex));
 560      }
 561  
 562      /**
 563       * Print a query
 564       *
 565       * @return string
 566       */
 567      public function __toString()
 568      {
 569          // It's used only for query visualisation, so we don't care about characters escaping
 570  
 571          $query = '';
 572  
 573          foreach ($this->_terms as $id => $term) {
 574              if ($id != 0) {
 575                  $query .= ' ';
 576              }
 577  
 578              if ($this->_signs === null || $this->_signs[$id] === true) {
 579                  $query .= '+';
 580              } else if ($this->_signs[$id] === false) {
 581                  $query .= '-';
 582              }
 583  
 584              if ($term->field !== null) {
 585                  $query .= $term->field . ':';
 586              }
 587              $query .= $term->text;
 588          }
 589  
 590          if ($this->getBoost() != 1) {
 591              $query = '(' . $query . ')^' . $this->getBoost();
 592          }
 593  
 594          return $query;
 595      }
 596  }
 597  


Generated: Wed Jan 14 11:33:29 2009 Cross-referenced by PHPXref 0.7