[ Index ]

PHP Cross Reference of Moodle 1.9.3 [Build 15-Oct-2008]

title

Body

[close]

/search/Zend/Search/Lucene/Search/ -> QueryParserContext.php (source)

   1  <?php
   2  /**
   3   * Zend Framework
   4   *
   5   * LICENSE
   6   *
   7   * This source file is subject to the new BSD license that is bundled
   8   * with this package in the file LICENSE.txt.
   9   * It is also available through the world-wide-web at this URL:
  10   * http://framework.zend.com/license/new-bsd
  11   * If you did not receive a copy of the license and are unable to
  12   * obtain it through the world-wide-web, please send an email
  13   * to license@zend.com so we can send you a copy immediately.
  14   *
  15   * @category   Zend
  16   * @package    Zend_Search_Lucene
  17   * @subpackage Search
  18   * @copyright  Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
  19   * @license    http://framework.zend.com/license/new-bsd     New BSD License
  20   */
  21  
  22  /** Zend_Search_Lucene_FSM */
  23  require_once $CFG->dirroot.'/search/Zend/Search/Lucene/FSM.php';
  24  
  25  
  26  /** Zend_Search_Lucene_Index_Term */
  27  require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Index/Term.php';
  28  
  29  /** Zend_Search_Lucene_Search_QueryToken */
  30  require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/QueryToken.php';
  31  
  32  /** Zend_Search_Lucene_Search_Query_Term */
  33  require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/Query/Term.php';
  34  
  35  /** Zend_Search_Lucene_Search_Query_MultiTerm */
  36  require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/Query/MultiTerm.php';
  37  
  38  /** Zend_Search_Lucene_Search_Query_Boolean */
  39  require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/Query/Boolean.php';
  40  
  41  /** Zend_Search_Lucene_Search_Query_Phrase */
  42  require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/Query/Phrase.php';
  43  
  44  /** Zend_Search_Lucene_Exception */
  45  require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Exception.php';
  46  
  47  /** Zend_Search_Lucene_Search_QueryParserException */
  48  require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/QueryParserException.php';
  49  
  50  /** Zend_Search_Lucene_Search_BooleanExpressionRecognizer */
  51  require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/BooleanExpressionRecognizer.php';
  52  
  53  /** Zend_Search_Lucene_Search_QueryEntry */
  54  require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/QueryEntry.php';
  55  
  56  
  57  /**
  58   * @category   Zend
  59   * @package    Zend_Search_Lucene
  60   * @subpackage Search
  61   * @copyright  Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
  62   * @license    http://framework.zend.com/license/new-bsd     New BSD License
  63   */
  64  class Zend_Search_Lucene_Search_QueryParserContext
  65  {
  66      /**
  67       * Default field for the context.
  68       *
  69       * null means, that term should be searched through all fields
  70       * Zend_Search_Lucene_Search_Query::rewriteQuery($index) transletes such queries to several
  71       *
  72       * @var string|null
  73       */
  74      private $_defaultField;
  75  
  76      /**
  77       * Field specified for next entry
  78       *
  79       * @var string
  80       */
  81      private $_nextEntryField = null;
  82  
  83      /**
  84       * True means, that term is required.
  85       * False means, that term is prohibited.
  86       * null means, that term is neither prohibited, nor required
  87       *
  88       * @var boolean
  89       */
  90      private $_nextEntrySign = null;
  91  
  92  
  93      /**
  94       * Entries grouping mode
  95       */
  96      const GM_SIGNS   = 0;  // Signs mode: '+term1 term2 -term3 +(subquery1) -(subquery2)'
  97      const GM_BOOLEAN = 1;  // Boolean operators mode: 'term1 and term2  or  (subquery1) and not (subquery2)'
  98  
  99      /**
 100       * Grouping mode
 101       *
 102       * @var integer
 103       */
 104      private $_mode = null;
 105  
 106      /**
 107       * Entries signs.
 108       * Used in GM_SIGNS grouping mode
 109       *
 110       * @var arrays
 111       */
 112      private $_signs = array();
 113  
 114      /**
 115       * Query entries
 116       * Each entry is a Zend_Search_Lucene_Search_QueryEntry object or
 117       * boolean operator (Zend_Search_Lucene_Search_QueryToken class constant)
 118       *
 119       * @var array
 120       */
 121      private $_entries = array();
 122  
 123      /**
 124       * Query string encoding
 125       *
 126       * @var string
 127       */
 128      private $_encoding;
 129  
 130  
 131      /**
 132       * Context object constructor
 133       *
 134       * @param string $encoding
 135       * @param string|null $defaultField
 136       */
 137      public function __construct($encoding, $defaultField = null)
 138      {
 139          $this->_encoding     = $encoding;
 140          $this->_defaultField = $defaultField;
 141      }
 142  
 143  
 144      /**
 145       * Get context default field
 146       *
 147       * @return string|null
 148       */
 149      public function getField()
 150      {
 151          return ($this->_nextEntryField !== null)  ?  $this->_nextEntryField : $this->_defaultField;
 152      }
 153  
 154      /**
 155       * Set field for next entry
 156       *
 157       * @param string $field
 158       */
 159      public function setNextEntryField($field)
 160      {
 161          $this->_nextEntryField = $field;
 162      }
 163  
 164  
 165      /**
 166       * Set sign for next entry
 167       *
 168       * @param integer $sign
 169       * @throws Zend_Search_Lucene_Exception
 170       */
 171      public function setNextEntrySign($sign)
 172      {
 173          if ($this->_mode === self::GM_BOOLEAN) {
 174              throw new Zend_Search_Lucene_Search_QueryParserException('It\'s not allowed to mix boolean and signs styles in the same subquery.');
 175          }
 176  
 177          $this->_mode = self::GM_SIGNS;
 178  
 179          if ($sign == Zend_Search_Lucene_Search_QueryToken::TT_REQUIRED) {
 180              $this->_nextEntrySign = true;
 181          } else if ($sign == Zend_Search_Lucene_Search_QueryToken::TT_PROHIBITED) {
 182              $this->_nextEntrySign = false;
 183          } else {
 184              throw new Zend_Search_Lucene_Exception('Unrecognized sign type.');
 185          }
 186      }
 187  
 188  
 189      /**
 190       * Add entry to a query
 191       *
 192       * @param Zend_Search_Lucene_Search_QueryEntry $entry
 193       */
 194      public function addEntry(Zend_Search_Lucene_Search_QueryEntry $entry)
 195      {
 196          if ($this->_mode !== self::GM_BOOLEAN) {
 197              $this->_signs[] = $this->_nextEntrySign;
 198          }
 199  
 200          $this->_entries[] = $entry;
 201  
 202          $this->_nextEntryField = null;
 203          $this->_nextEntrySign  = null;
 204      }
 205  
 206  
 207      /**
 208       * Process fuzzy search or proximity search modifier
 209       *
 210       * @throws Zend_Search_Lucene_Search_QueryParserException
 211       */
 212      public function processFuzzyProximityModifier($parameter = null)
 213      {
 214          // Check, that modifier has came just after word or phrase
 215          if ($this->_nextEntryField !== null  ||  $this->_nextEntrySign !== null) {
 216              throw new Zend_Search_Lucene_Search_QueryParserException('\'~\' modifier must follow word or phrase.');
 217          }
 218  
 219          $lastEntry = array_pop($this->_entries);
 220  
 221          if (!$lastEntry instanceof Zend_Search_Lucene_Search_QueryEntry) {
 222              // there are no entries or last entry is boolean operator
 223              throw new Zend_Search_Lucene_Search_QueryParserException('\'~\' modifier must follow word or phrase.');
 224          }
 225  
 226          $lastEntry->processFuzzyProximityModifier($parameter);
 227  
 228          $this->_entries[] = $lastEntry;
 229      }
 230  
 231      /**
 232       * Set boost factor to the entry
 233       *
 234       * @param float $boostFactor
 235       */
 236      public function boost($boostFactor)
 237      {
 238          // Check, that modifier has came just after word or phrase
 239          if ($this->_nextEntryField !== null  ||  $this->_nextEntrySign !== null) {
 240              throw new Zend_Search_Lucene_Search_QueryParserException('\'^\' modifier must follow word, phrase or subquery.');
 241          }
 242  
 243          $lastEntry = array_pop($this->_entries);
 244  
 245          if (!$lastEntry instanceof Zend_Search_Lucene_Search_QueryEntry) {
 246              // there are no entries or last entry is boolean operator
 247              throw new Zend_Search_Lucene_Search_QueryParserException('\'^\' modifier must follow word, phrase or subquery.');
 248          }
 249  
 250          $lastEntry->boost($boostFactor);
 251  
 252          $this->_entries[] = $lastEntry;
 253      }
 254  
 255      /**
 256       * Process logical operator
 257       *
 258       * @param integer $operator
 259       */
 260      public function addLogicalOperator($operator)
 261      {
 262          if ($this->_mode === self::GM_SIGNS) {
 263              throw new Zend_Search_Lucene_Search_QueryParserException('It\'s not allowed to mix boolean and signs styles in the same subquery.');
 264          }
 265  
 266          $this->_mode = self::GM_BOOLEAN;
 267  
 268          $this->_entries[] = $operator;
 269      }
 270  
 271  
 272      /**
 273       * Generate 'signs style' query from the context
 274       * '+term1 term2 -term3 +(<subquery1>) ...'
 275       *
 276       * @return Zend_Search_Lucene_Search_Query
 277       */
 278      public function _signStyleExpressionQuery()
 279      {
 280          $query = new Zend_Search_Lucene_Search_Query_Boolean();
 281  
 282          if (Zend_Search_Lucene_Search_QueryParser::getDefaultOperator() == Zend_Search_Lucene_Search_QueryParser::B_AND) {
 283              $defaultSign = true; // required
 284          } else {
 285              // Zend_Search_Lucene_Search_QueryParser::B_OR
 286              $defaultSign = null; // optional
 287          }
 288  
 289          foreach ($this->_entries as $entryId => $entry) {
 290              $sign = ($this->_signs[$entryId] !== null) ?  $this->_signs[$entryId] : $defaultSign;
 291              $query->addSubquery($entry->getQuery($this->_encoding), $sign);
 292          }
 293  
 294          return $query;
 295      }
 296  
 297  
 298      /**
 299       * Generate 'boolean style' query from the context
 300       * 'term1 and term2   or   term3 and (<subquery1>) and not (<subquery2>)'
 301       *
 302       * @return Zend_Search_Lucene_Search_Query
 303       * @throws Zend_Search_Lucene
 304       */
 305      private function _booleanExpressionQuery()
 306      {
 307          /**
 308           * We treat each level of an expression as a boolean expression in
 309           * a Disjunctive Normal Form
 310           *
 311           * AND operator has higher precedence than OR
 312           *
 313           * Thus logical query is a disjunction of one or more conjunctions of
 314           * one or more query entries
 315           */
 316  
 317          $expressionRecognizer = new Zend_Search_Lucene_Search_BooleanExpressionRecognizer();
 318  
 319          try {
 320              foreach ($this->_entries as $entry) {
 321                  if ($entry instanceof Zend_Search_Lucene_Search_QueryEntry) {
 322                      $expressionRecognizer->processLiteral($entry);
 323                  } else {
 324                      switch ($entry) {
 325                          case Zend_Search_Lucene_Search_QueryToken::TT_AND_LEXEME:
 326                              $expressionRecognizer->processOperator(Zend_Search_Lucene_Search_BooleanExpressionRecognizer::IN_AND_OPERATOR);
 327                              break;
 328  
 329                          case Zend_Search_Lucene_Search_QueryToken::TT_OR_LEXEME:
 330                              $expressionRecognizer->processOperator(Zend_Search_Lucene_Search_BooleanExpressionRecognizer::IN_OR_OPERATOR);
 331                              break;
 332  
 333                          case Zend_Search_Lucene_Search_QueryToken::TT_NOT_LEXEME:
 334                              $expressionRecognizer->processOperator(Zend_Search_Lucene_Search_BooleanExpressionRecognizer::IN_NOT_OPERATOR);
 335                              break;
 336  
 337                          default:
 338                              throw new Zend_Search_Lucene('Boolean expression error. Unknown operator type.');
 339                      }
 340                  }
 341              }
 342  
 343              $conjuctions = $expressionRecognizer->finishExpression();
 344          } catch (Zend_Search_Exception $e) {
 345              // throw new Zend_Search_Lucene_Search_QueryParserException('Boolean expression error. Error message: \'' .
 346              //                                                          $e->getMessage() . '\'.' );
 347              // It's query syntax error message and it should be user friendly. So FSM message is omitted
 348              throw new Zend_Search_Lucene_Search_QueryParserException('Boolean expression error.');
 349          }
 350  
 351          // Remove 'only negative' conjunctions
 352          foreach ($conjuctions as $conjuctionId => $conjuction) {
 353              $nonNegativeEntryFound = false;
 354  
 355              foreach ($conjuction as $conjuctionEntry) {
 356                  if ($conjuctionEntry[1]) {
 357                      $nonNegativeEntryFound = true;
 358                      break;
 359                  }
 360              }
 361  
 362              if (!$nonNegativeEntryFound) {
 363                  unset($conjuctions[$conjuctionId]);
 364              }
 365          }
 366  
 367  
 368          $subqueries = array();
 369          foreach ($conjuctions as  $conjuction) {
 370              // Check, if it's a one term conjuction
 371              if (count($conjuction) == 1) {
 372                  $subqueries[] = $conjuction[0][0]->getQuery($this->_encoding);
 373              } else {
 374                  $subquery = new Zend_Search_Lucene_Search_Query_Boolean();
 375  
 376                  foreach ($conjuction as $conjuctionEntry) {
 377                      $subquery->addSubquery($conjuctionEntry[0]->getQuery($this->_encoding), $conjuctionEntry[1]);
 378                  }
 379  
 380                  $subqueries[] = $subquery;
 381              }
 382          }
 383  
 384          if (count($subqueries) == 0) {
 385              return new Zend_Search_Lucene_Search_Query_Empty();
 386          }
 387  
 388          if (count($subqueries) == 1) {
 389              return $subqueries[0];
 390          }
 391  
 392  
 393          $query = new Zend_Search_Lucene_Search_Query_Boolean();
 394  
 395          foreach ($subqueries as $subquery) {
 396              // Non-requirered entry/subquery
 397              $query->addSubquery($subquery);
 398          }
 399  
 400          return $query;
 401      }
 402  
 403      /**
 404       * Generate query from current context
 405       *
 406       * @return Zend_Search_Lucene_Search_Query
 407       */
 408      public function getQuery()
 409      {
 410          if ($this->_mode === self::GM_BOOLEAN) {
 411              return $this->_booleanExpressionQuery();
 412          } else {
 413              return $this->_signStyleExpressionQuery();
 414          }
 415      }
 416  }


Generated: Wed Jan 14 11:33:29 2009 Cross-referenced by PHPXref 0.7