[ Index ]

PHP Cross Reference of Moodle 1.9.3 [Build 15-Oct-2008]

title

Body

[close]

/search/Zend/Search/Lucene/Search/Query/ -> Boolean.php (source)

   1  <?php
   2  /**
   3   * Zend Framework
   4   *
   5   * LICENSE
   6   *
   7   * This source file is subject to the new BSD license that is bundled
   8   * with this package in the file LICENSE.txt.
   9   * It is also available through the world-wide-web at this URL:
  10   * http://framework.zend.com/license/new-bsd
  11   * If you did not receive a copy of the license and are unable to
  12   * obtain it through the world-wide-web, please send an email
  13   * to license@zend.com so we can send you a copy immediately.
  14   *
  15   * @category   Zend
  16   * @package    Zend_Search_Lucene
  17   * @subpackage Search
  18   * @copyright  Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
  19   * @license    http://framework.zend.com/license/new-bsd     New BSD License
  20   */
  21  
  22  
  23  /** Zend_Search_Lucene_Search_Query */
  24  require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/Query.php';
  25  
  26  /** Zend_Search_Lucene_Search_Weight_Boolean */
  27  require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/Weight/Boolean.php';
  28  
  29  
  30  /**
  31   * @category   Zend
  32   * @package    Zend_Search_Lucene
  33   * @subpackage Search
  34   * @copyright  Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
  35   * @license    http://framework.zend.com/license/new-bsd     New BSD License
  36   */
  37  class Zend_Search_Lucene_Search_Query_Boolean extends Zend_Search_Lucene_Search_Query
  38  {
  39  
  40      /**
  41       * Subqueries
  42       * Array of Zend_Search_Lucene_Query
  43       *
  44       * @var array
  45       */
  46      private $_subqueries = array();
  47  
  48      /**
  49       * Subqueries signs.
  50       * If true then subquery is required.
  51       * If false then subquery is prohibited.
  52       * If null then subquery is neither prohibited, nor required
  53       *
  54       * If array is null then all subqueries are required
  55       *
  56       * @var array
  57       */
  58      private $_signs = array();
  59  
  60      /**
  61       * Result vector.
  62       *
  63       * @var array
  64       */
  65      private $_resVector = null;
  66  
  67      /**
  68       * A score factor based on the fraction of all query subqueries
  69       * that a document contains.
  70       * float for conjunction queries
  71       * array of float for non conjunction queries
  72       *
  73       * @var mixed
  74       */
  75      private $_coord = null;
  76  
  77  
  78      /**
  79       * Class constructor.  Create a new Boolean query object.
  80       *
  81       * if $signs array is omitted then all subqueries are required
  82       * it differs from addSubquery() behavior, but should never be used
  83       *
  84       * @param array $subqueries    Array of Zend_Search_Search_Query objects
  85       * @param array $signs    Array of signs.  Sign is boolean|null.
  86       * @return void
  87       */
  88      public function __construct($subqueries = null, $signs = null)
  89      {
  90          if (is_array($subqueries)) {
  91              $this->_subqueries = $subqueries;
  92  
  93              $this->_signs = null;
  94              // Check if all subqueries are required
  95              if (is_array($signs)) {
  96                  foreach ($signs as $sign ) {
  97                      if ($sign !== true) {
  98                          $this->_signs = $signs;
  99                          break;
 100                      }
 101                  }
 102              }
 103          }
 104      }
 105  
 106  
 107      /**
 108       * Add a $subquery (Zend_Search_Lucene_Query) to this query.
 109       *
 110       * The sign is specified as:
 111       *     TRUE  - subquery is required
 112       *     FALSE - subquery is prohibited
 113       *     NULL  - subquery is neither prohibited, nor required
 114       *
 115       * @param  Zend_Search_Lucene_Search_Query $subquery
 116       * @param  boolean|null $sign
 117       * @return void
 118       */
 119      public function addSubquery(Zend_Search_Lucene_Search_Query $subquery, $sign=null) {
 120          if ($sign !== true || $this->_signs !== null) {       // Skip, if all subqueries are required
 121              if ($this->_signs === null) {                     // Check, If all previous subqueries are required
 122                  foreach ($this->_subqueries as $prevSubquery) {
 123                      $this->_signs[] = true;
 124                  }
 125              }
 126              $this->_signs[] = $sign;
 127          }
 128  
 129          $this->_subqueries[] = $subquery;
 130      }
 131  
 132      /**
 133       * Re-write queries into primitive queries
 134       *
 135       * @param Zend_Search_Lucene_Interface $index
 136       * @return Zend_Search_Lucene_Search_Query
 137       */
 138      public function rewrite(Zend_Search_Lucene_Interface $index)
 139      {
 140          $query = new Zend_Search_Lucene_Search_Query_Boolean();
 141          $query->setBoost($this->getBoost());
 142  
 143          foreach ($this->_subqueries as $subqueryId => $subquery) {
 144              $query->addSubquery($subquery->rewrite($index),
 145                                  ($this->_signs === null)?  true : $this->_signs[$subqueryId]);
 146          }
 147  
 148          return $query;
 149      }
 150  
 151      /**
 152       * Optimize query in the context of specified index
 153       *
 154       * @param Zend_Search_Lucene_Interface $index
 155       * @return Zend_Search_Lucene_Search_Query
 156       */
 157      public function optimize(Zend_Search_Lucene_Interface $index)
 158      {
 159          $subqueries = array();
 160          $signs      = array();
 161  
 162          // Optimize all subqueries
 163          foreach ($this->_subqueries as $id => $subquery) {
 164              $subqueries[] = $subquery->optimize($index);
 165              $signs[]      = ($this->_signs === null)? true : $this->_signs[$id];
 166          }
 167  
 168          // Check for empty subqueries
 169          foreach ($subqueries as $id => $subquery) {
 170              if ($subquery instanceof Zend_Search_Lucene_Search_Query_Empty) {
 171                  if ($signs[$id] === true) {
 172                      // Matching is required, but is actually empty
 173                      return new Zend_Search_Lucene_Search_Query_Empty();
 174                  } else {
 175                      // Matching is optional or prohibited, but is empty
 176                      // Remove it from subqueries and signs list
 177                      unset($subqueries[$id]);
 178                      unset($signs[$id]);
 179                  }
 180              }
 181          }
 182  
 183  
 184          // Check if all non-empty subqueries are prohibited
 185          $allProhibited = true;
 186          foreach ($signs as $sign) {
 187              if ($sign !== false) {
 188                  $allProhibited = false;
 189                  break;
 190              }
 191          }
 192          if ($allProhibited) {
 193              return new Zend_Search_Lucene_Search_Query_Empty();
 194          }
 195  
 196  
 197          // Check, if reduced subqueries list has only one entry
 198          if (count($subqueries) == 1) {
 199              // It's a query with only one required or optional clause
 200              // (it's already checked, that it's not a prohibited clause)
 201  
 202              if ($this->getBoost() == 1) {
 203                  return reset($subqueries);
 204              }
 205  
 206              $optimizedQuery = clone reset($subqueries);
 207              $optimizedQuery->setBoost($optimizedQuery->getBoost()*$this->getBoost());
 208  
 209              return $optimizedQuery;
 210          }
 211  
 212  
 213          // Check, if reduced subqueries list is empty
 214          if (count($subqueries) == 0) {
 215              return new Zend_Search_Lucene_Search_Query_Empty();
 216          }
 217  
 218  
 219          // Prepare first candidate for optimized query
 220          $optimizedQuery = new Zend_Search_Lucene_Search_Query_Boolean($subqueries, $signs);
 221          $optimizedQuery->setBoost($this->getBoost());
 222  
 223  
 224          $terms        = array();
 225          $tsigns       = array();
 226          $boostFactors = array();
 227  
 228          // Try to decompose term and multi-term subqueries
 229          foreach ($subqueries as $id => $subquery) {
 230              if ($subquery instanceof Zend_Search_Lucene_Search_Query_Term) {
 231                  $terms[]        = $subquery->getTerm();
 232                  $tsigns[]       = $signs[$id];
 233                  $boostFactors[] = $subquery->getBoost();
 234  
 235                  // remove subquery from a subqueries list
 236                  unset($subqueries[$id]);
 237                  unset($signs[$id]);
 238             } else if ($subquery instanceof Zend_Search_Lucene_Search_Query_MultiTerm) {
 239                  $subTerms = $subquery->getTerms();
 240                  $subSigns = $subquery->getSigns();
 241  
 242                  if ($signs[$id] === true) {
 243                      // It's a required multi-term subquery.
 244                      // Something like '... +(+term1 -term2 term3 ...) ...'
 245  
 246                      // Multi-term required subquery can be decomposed only if it contains
 247                      // required terms and doesn't contain prohibited terms:
 248                      // ... +(+term1 term2 ...) ... => ... +term1 term2 ...
 249                      //
 250                      // Check this
 251                      $hasRequired   = false;
 252                      $hasProhibited = false;
 253                      if ($subSigns === null) {
 254                          // All subterms are required
 255                          $hasRequired = true;
 256                      } else {
 257                          foreach ($subSigns as $sign) {
 258                              if ($sign === true) {
 259                                  $hasRequired   = true;
 260                              } else if ($sign === false) {
 261                                  $hasProhibited = true;
 262                                  break;
 263                              }
 264                          }
 265                      }
 266                      // Continue if subquery has prohibited terms or doesn't have required terms
 267                      if ($hasProhibited  ||  !$hasRequired) {
 268                          continue;
 269                      }
 270  
 271                      foreach ($subTerms as $termId => $term) {
 272                          $terms[]        = $term;
 273                          $tsigns[]       = ($subSigns === null)? true : $subSigns[$termId];
 274                          $boostFactors[] = $subquery->getBoost();
 275                      }
 276  
 277                      // remove subquery from a subqueries list
 278                      unset($subqueries[$id]);
 279                      unset($signs[$id]);
 280  
 281                  } else { // $signs[$id] === null  ||  $signs[$id] === false
 282                      // It's an optional or prohibited multi-term subquery.
 283                      // Something like '... (+term1 -term2 term3 ...) ...'
 284                      // or
 285                      // something like '... -(+term1 -term2 term3 ...) ...'
 286  
 287                      // Multi-term optional and required subqueries can be decomposed
 288                      // only if all terms are optional.
 289                      //
 290                      // Check if all terms are optional.
 291                      $onlyOptional = true;
 292                      if ($subSigns === null) {
 293                          // All subterms are required
 294                          $onlyOptional = false;
 295                      } else {
 296                          foreach ($subSigns as $sign) {
 297                              if ($sign !== null) {
 298                                  $onlyOptional = false;
 299                                  break;
 300                              }
 301                          }
 302                      }
 303  
 304                      // Continue if non-optional terms are presented in this multi-term subquery
 305                      if (!$onlyOptional) {
 306                          continue;
 307                      }
 308  
 309                      foreach ($subTerms as $termId => $term) {
 310                          $terms[]  = $term;
 311                          $tsigns[] = ($signs[$id] === null)? null  /* optional */ :
 312                                                              false /* prohibited */;
 313                          $boostFactors[] = $subquery->getBoost();
 314                      }
 315  
 316                      // remove subquery from a subqueries list
 317                      unset($subqueries[$id]);
 318                      unset($signs[$id]);
 319                  }
 320              }
 321          }
 322  
 323  
 324          // Check, if there are no decomposed subqueries
 325          if (count($terms) == 0 ) {
 326              // return prepared candidate
 327              return $optimizedQuery;
 328          }
 329  
 330  
 331          // Check, if all subqueries have been decomposed and all terms has the same boost factor
 332          if (count($subqueries) == 0  &&  count(array_unique($boostFactors)) == 1) {
 333              $optimizedQuery = new Zend_Search_Lucene_Search_Query_MultiTerm($terms, $tsigns);
 334              $optimizedQuery->setBoost(reset($boostFactors)*$this->getBoost());
 335  
 336              return $optimizedQuery;
 337          }
 338  
 339  
 340          // This boolean query can't be transformed to Term/MultiTerm query and still contains
 341          // several subqueries
 342  
 343          // Separate prohibited terms
 344          $prohibitedTerms        = array();
 345          foreach ($terms as $id => $term) {
 346              if ($tsigns[$id] === false) {
 347                  $prohibitedTerms[]        = $term;
 348  
 349                  unset($terms[$id]);
 350                  unset($tsigns[$id]);
 351                  unset($boostFactors[$id]);
 352              }
 353          }
 354  
 355          if (count($terms) == 1) {
 356              $clause = new Zend_Search_Lucene_Search_Query_Term(reset($terms));
 357              $clause->setBoost(reset($boostFactors));
 358  
 359              $subqueries[] = $clause;
 360              $signs[]      = reset($tsigns);
 361  
 362              // Clear terms list
 363              $terms = array();
 364          } else if (count($terms) > 1  &&  count(array_unique($boostFactors)) == 1) {
 365              $clause = new Zend_Search_Lucene_Search_Query_MultiTerm($terms, $tsigns);
 366              $clause->setBoost(reset($boostFactors));
 367  
 368              $subqueries[] = $clause;
 369              // Clause sign is 'required' if clause contains required terms. 'Optional' otherwise.
 370              $signs[]      = (in_array(true, $tsigns))? true : null;
 371  
 372              // Clear terms list
 373              $terms = array();
 374          }
 375  
 376          if (count($prohibitedTerms) == 1) {
 377              // (boost factors are not significant for prohibited clauses)
 378              $subqueries[] = new Zend_Search_Lucene_Search_Query_Term(reset($prohibitedTerms));
 379              $signs[]      = false;
 380  
 381              // Clear prohibited terms list
 382              $prohibitedTerms = array();
 383          } else if (count($prohibitedTerms) > 1) {
 384              // prepare signs array
 385              $prohibitedSigns = array();
 386              foreach ($prohibitedTerms as $id => $term) {
 387                  // all prohibited term are grouped as optional into multi-term query
 388                  $prohibitedSigns[$id] = null;
 389              }
 390  
 391              // (boost factors are not significant for prohibited clauses)
 392              $subqueries[] = new Zend_Search_Lucene_Search_Query_MultiTerm($prohibitedTerms, $prohibitedSigns);
 393              // Clause sign is 'prohibited'
 394              $signs[]      = false;
 395  
 396              // Clear terms list
 397              $prohibitedTerms = array();
 398          }
 399  
 400          /** @todo Group terms with the same boost factors together */
 401  
 402          // Check, that all terms are processed
 403          // Replace candidate for optimized query
 404          if (count($terms) == 0  &&  count($prohibitedTerms) == 0) {
 405              $optimizedQuery = new Zend_Search_Lucene_Search_Query_Boolean($subqueries, $signs);
 406              $optimizedQuery->setBoost($this->getBoost());
 407          }
 408  
 409          return $optimizedQuery;
 410      }
 411  
 412      /**
 413       * Returns subqueries
 414       *
 415       * @return array
 416       */
 417      public function getSubqueries()
 418      {
 419          return $this->_subqueries;
 420      }
 421  
 422  
 423      /**
 424       * Return subqueries signs
 425       *
 426       * @return array
 427       */
 428      public function getSigns()
 429      {
 430          return $this->_signs;
 431      }
 432  
 433  
 434      /**
 435       * Constructs an appropriate Weight implementation for this query.
 436       *
 437       * @param Zend_Search_Lucene_Interface $reader
 438       * @return Zend_Search_Lucene_Search_Weight
 439       */
 440      public function createWeight(Zend_Search_Lucene_Interface $reader)
 441      {
 442          $this->_weight = new Zend_Search_Lucene_Search_Weight_Boolean($this, $reader);
 443          return $this->_weight;
 444      }
 445  
 446  
 447      /**
 448       * Calculate result vector for Conjunction query
 449       * (like '<subquery1> AND <subquery2> AND <subquery3>')
 450       */
 451      private function _calculateConjunctionResult()
 452      {
 453          $this->_resVector = null;
 454  
 455          if (count($this->_subqueries) == 0) {
 456              $this->_resVector = array();
 457          }
 458  
 459          foreach ($this->_subqueries as $subquery) {
 460              if($this->_resVector === null) {
 461                  $this->_resVector = $subquery->matchedDocs();
 462              } else {
 463                  $this->_resVector = array_intersect_key($this->_resVector, $subquery->matchedDocs());
 464              }
 465  
 466              if (count($this->_resVector) == 0) {
 467                  // Empty result set, we don't need to check other terms
 468                  break;
 469              }
 470          }
 471  
 472          ksort($this->_resVector, SORT_NUMERIC);
 473      }
 474  
 475  
 476      /**
 477       * Calculate result vector for non Conjunction query
 478       * (like '<subquery1> AND <subquery2> AND NOT <subquery3> OR <subquery4>')
 479       */
 480      private function _calculateNonConjunctionResult()
 481      {
 482          $required   = null;
 483          $optional   = array();
 484  
 485          foreach ($this->_subqueries as $subqueryId => $subquery) {
 486              $docs = $subquery->matchedDocs();
 487  
 488              if ($this->_signs[$subqueryId] === true) {
 489                  // required
 490                  if ($required !== null) {
 491                      // array intersection
 492                      $required = array_intersect_key($required, $docs);
 493                  } else {
 494                      $required = $docs;
 495                  }
 496              } elseif ($this->_signs[$subqueryId] === false) {
 497                  // prohibited
 498                  // Do nothing. matchedDocs() may include non-matching id's
 499              } else {
 500                  // neither required, nor prohibited
 501                  // array union
 502                  $optional += $docs;
 503              }
 504          }
 505  
 506          if ($required !== null) {
 507              $this->_resVector = &$required;
 508          } else {
 509              $this->_resVector = &$optional;
 510          }
 511  
 512          ksort($this->_resVector, SORT_NUMERIC);
 513      }
 514  
 515  
 516      /**
 517       * Score calculator for conjunction queries (all subqueries are required)
 518       *
 519       * @param integer $docId
 520       * @param Zend_Search_Lucene_Interface $reader
 521       * @return float
 522       */
 523      public function _conjunctionScore($docId, Zend_Search_Lucene_Interface $reader)
 524      {
 525          if ($this->_coord === null) {
 526              $this->_coord = $reader->getSimilarity()->coord(count($this->_subqueries),
 527                                                              count($this->_subqueries) );
 528          }
 529  
 530          $score = 0;
 531  
 532          foreach ($this->_subqueries as $subquery) {
 533              $subscore = $subquery->score($docId, $reader);
 534  
 535              if ($subscore == 0) {
 536                  return 0;
 537              }
 538  
 539              $score += $subquery->score($docId, $reader) * $this->_coord;
 540          }
 541  
 542          return $score * $this->_coord * $this->getBoost();
 543      }
 544  
 545  
 546      /**
 547       * Score calculator for non conjunction queries (not all subqueries are required)
 548       *
 549       * @param integer $docId
 550       * @param Zend_Search_Lucene_Interface $reader
 551       * @return float
 552       */
 553      public function _nonConjunctionScore($docId, Zend_Search_Lucene_Interface $reader)
 554      {
 555          if ($this->_coord === null) {
 556              $this->_coord = array();
 557  
 558              $maxCoord = 0;
 559              foreach ($this->_signs as $sign) {
 560                  if ($sign !== false /* not prohibited */) {
 561                      $maxCoord++;
 562                  }
 563              }
 564  
 565              for ($count = 0; $count <= $maxCoord; $count++) {
 566                  $this->_coord[$count] = $reader->getSimilarity()->coord($count, $maxCoord);
 567              }
 568          }
 569  
 570          $score = 0;
 571          $matchedSubqueries = 0;
 572          foreach ($this->_subqueries as $subqueryId => $subquery) {
 573              $subscore = $subquery->score($docId, $reader);
 574  
 575              // Prohibited
 576              if ($this->_signs[$subqueryId] === false && $subscore != 0) {
 577                  return 0;
 578              }
 579  
 580              // is required, but doen't match
 581              if ($this->_signs[$subqueryId] === true &&  $subscore == 0) {
 582                  return 0;
 583              }
 584  
 585              if ($subscore != 0) {
 586                  $matchedSubqueries++;
 587                  $score += $subscore;
 588              }
 589          }
 590  
 591          return $score * $this->_coord[$matchedSubqueries] * $this->getBoost();
 592      }
 593  
 594      /**
 595       * Execute query in context of index reader
 596       * It also initializes necessary internal structures
 597       *
 598       * @param Zend_Search_Lucene_Interface $reader
 599       */
 600      public function execute(Zend_Search_Lucene_Interface $reader)
 601      {
 602          // Initialize weight if it's not done yet
 603          $this->_initWeight($reader);
 604  
 605          foreach ($this->_subqueries as $subquery) {
 606              $subquery->execute($reader);
 607          }
 608  
 609          if ($this->_signs === null) {
 610              $this->_calculateConjunctionResult();
 611          } else {
 612              $this->_calculateNonConjunctionResult();
 613          }
 614      }
 615  
 616  
 617  
 618      /**
 619       * Get document ids likely matching the query
 620       *
 621       * It's an array with document ids as keys (performance considerations)
 622       *
 623       * @return array
 624       */
 625      public function matchedDocs()
 626      {
 627          return $this->_resVector;
 628      }
 629  
 630      /**
 631       * Score specified document
 632       *
 633       * @param integer $docId
 634       * @param Zend_Search_Lucene_Interface $reader
 635       * @return float
 636       */
 637      public function score($docId, Zend_Search_Lucene_Interface $reader)
 638      {
 639          if (isset($this->_resVector[$docId])) {
 640              if ($this->_signs === null) {
 641                  return $this->_conjunctionScore($docId, $reader);
 642              } else {
 643                  return $this->_nonConjunctionScore($docId, $reader);
 644              }
 645          } else {
 646              return 0;
 647          }
 648      }
 649  
 650      /**
 651       * Return query terms
 652       *
 653       * @return array
 654       */
 655      public function getQueryTerms()
 656      {
 657          $terms = array();
 658  
 659          foreach ($this->_subqueries as $id => $subquery) {
 660              if ($this->_signs === null  ||  $this->_signs[$id] !== false) {
 661                  $terms = array_merge($terms, $subquery->getQueryTerms());
 662              }
 663          }
 664  
 665          return $terms;
 666      }
 667  
 668      /**
 669       * Highlight query terms
 670       *
 671       * @param integer &$colorIndex
 672       * @param Zend_Search_Lucene_Document_Html $doc
 673       */
 674      public function highlightMatchesDOM(Zend_Search_Lucene_Document_Html $doc, &$colorIndex)
 675      {
 676          foreach ($this->_subqueries as $id => $subquery) {
 677              if ($this->_signs === null  ||  $this->_signs[$id] !== false) {
 678                  $subquery->highlightMatchesDOM($doc, $colorIndex);
 679              }
 680          }
 681      }
 682  
 683      /**
 684       * Print a query
 685       *
 686       * @return string
 687       */
 688      public function __toString()
 689      {
 690          // It's used only for query visualisation, so we don't care about characters escaping
 691  
 692          $query = '';
 693  
 694          foreach ($this->_subqueries as $id => $subquery) {
 695              if ($id != 0) {
 696                  $query .= ' ';
 697              }
 698  
 699              if ($this->_signs === null || $this->_signs[$id] === true) {
 700                  $query .= '+';
 701              } else if ($this->_signs[$id] === false) {
 702                  $query .= '-';
 703              }
 704  
 705              $query .= '(' . $subquery->__toString() . ')';
 706  
 707              if ($subquery->getBoost() != 1) {
 708                  $query .= '^' . $subquery->getBoost();
 709              }
 710          }
 711  
 712          return $query;
 713      }
 714  }
 715  


Generated: Wed Jan 14 11:33:29 2009 Cross-referenced by PHPXref 0.7