[ Index ]

PHP Cross Reference of Moodle 1.9.3 [Build 15-Oct-2008]

title

Body

[close]

/search/Zend/Search/Lucene/Search/Query/ -> Wildcard.php (source)

   1  <?php
   2  /**
   3   * Zend Framework
   4   *
   5   * LICENSE
   6   *
   7   * This source file is subject to the new BSD license that is bundled
   8   * with this package in the file LICENSE.txt.
   9   * It is also available through the world-wide-web at this URL:
  10   * http://framework.zend.com/license/new-bsd
  11   * If you did not receive a copy of the license and are unable to
  12   * obtain it through the world-wide-web, please send an email
  13   * to license@zend.com so we can send you a copy immediately.
  14   *
  15   * @category   Zend
  16   * @package    Zend_Search_Lucene
  17   * @subpackage Search
  18   * @copyright  Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
  19   * @license    http://framework.zend.com/license/new-bsd     New BSD License
  20   */
  21  
  22  
  23  /** Zend_Search_Lucene_Search_Query */
  24  require_once 'Zend/Search/Lucene/Search/Query.php';
  25  
  26  /** Zend_Search_Lucene_Search_Query_MultiTerm */
  27  require_once 'Zend/Search/Lucene/Search/Query/MultiTerm.php';
  28  
  29  
  30  /**
  31   * @category   Zend
  32   * @package    Zend_Search_Lucene
  33   * @subpackage Search
  34   * @copyright  Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
  35   * @license    http://framework.zend.com/license/new-bsd     New BSD License
  36   */
  37  class Zend_Search_Lucene_Search_Query_Wildcard extends Zend_Search_Lucene_Search_Query
  38  {
  39      /**
  40       * Search pattern.
  41       *
  42       * Field has to be fully specified or has to be null
  43       * Text may contain '*' or '?' symbols
  44       *
  45       * @var Zend_Search_Lucene_Index_Term
  46       */
  47      private $_pattern;
  48  
  49      /**
  50       * Matched terms.
  51       *
  52       * Matched terms list.
  53       * It's filled during the search (rewrite operation) and may be used for search result
  54       * post-processing
  55       *
  56       * Array of Zend_Search_Lucene_Index_Term objects
  57       *
  58       * @var array
  59       */
  60      private $_matches = null;
  61  
  62      /**
  63       * Zend_Search_Lucene_Search_Query_Wildcard constructor.
  64       *
  65       * @param Zend_Search_Lucene_Index_Term $pattern
  66       */
  67      public function __construct(Zend_Search_Lucene_Index_Term $pattern)
  68      {
  69          $this->_pattern = $pattern;
  70      }
  71  
  72      /**
  73       * Get terms prefix
  74       *
  75       * @param string $word
  76       * @return string
  77       */
  78      private static function _getPrefix($word)
  79      {
  80          $questionMarkPosition = strpos($word, '?');
  81          $astrericPosition     = strpos($word, '*');
  82  
  83          if ($questionMarkPosition !== false) {
  84              if ($astrericPosition !== false) {
  85                  return substr($word, 0, min($questionMarkPosition, $astrericPosition));
  86              }
  87  
  88              return substr($word, 0, $questionMarkPosition);
  89          } else if ($astrericPosition !== false) {
  90              return substr($word, 0, $astrericPosition);
  91          }
  92  
  93          return $word;
  94      }
  95  
  96      /**
  97       * Re-write query into primitive queries in the context of specified index
  98       *
  99       * @param Zend_Search_Lucene_Interface $index
 100       * @return Zend_Search_Lucene_Search_Query
 101       */
 102      public function rewrite(Zend_Search_Lucene_Interface $index)
 103      {
 104          $this->_matches = array();
 105  
 106          if ($this->_pattern->field === null) {
 107              // Search through all fields
 108              $fields = $index->getFieldNames(true /* indexed fields list */);
 109          } else {
 110              $fields = array($this->_pattern->field);
 111          }
 112  
 113          $prefix          = self::_getPrefix($this->_pattern->text);
 114          $prefixLength    = strlen($prefix);
 115          $matchExpression = '/^' . str_replace(array('\\?', '\\*'), array('.', '.*') , preg_quote($this->_pattern->text, '/')) . '$/';
 116  
 117          /** @todo check for PCRE unicode support may be performed through Zend_Environment in some future */
 118          if (@preg_match('/\pL/u', 'a') == 1) {
 119              // PCRE unicode support is turned on
 120              // add Unicode modifier to the match expression
 121              $matchExpression .= 'u';
 122          }
 123  
 124  
 125          foreach ($fields as $field) {
 126              $index->resetTermsStream();
 127  
 128              if ($prefix != '') {
 129                  $index->skipTo(new Zend_Search_Lucene_Index_Term($prefix, $field));
 130  
 131                  while ($index->currentTerm() !== null          &&
 132                         $index->currentTerm()->field == $field  &&
 133                         substr($index->currentTerm()->text, 0, $prefixLength) == $prefix) {
 134                      if (preg_match($matchExpression, $index->currentTerm()->text) === 1) {
 135                          $this->_matches[] = $index->currentTerm();
 136                      }
 137  
 138                      $index->nextTerm();
 139                  }
 140              } else {
 141                  $index->skipTo(new Zend_Search_Lucene_Index_Term('', $field));
 142  
 143                  while ($index->currentTerm() !== null  &&  $index->currentTerm()->field == $field) {
 144                      if (preg_match($matchExpression, $index->currentTerm()->text) === 1) {
 145                          $this->_matches[] = $index->currentTerm();
 146                      }
 147  
 148                      $index->nextTerm();
 149                  }
 150              }
 151  
 152              $index->closeTermsStream();
 153          }
 154  
 155          if (count($this->_matches) == 0) {
 156              return new Zend_Search_Lucene_Search_Query_Empty();
 157          } else if (count($this->_matches) == 1) {
 158              return new Zend_Search_Lucene_Search_Query_Term(reset($this->_matches));
 159          } else {
 160              $rewrittenQuery = new Zend_Search_Lucene_Search_Query_MultiTerm();
 161  
 162              foreach ($this->_matches as $matchedTerm) {
 163                  $rewrittenQuery->addTerm($matchedTerm);
 164              }
 165  
 166              return $rewrittenQuery;
 167          }
 168      }
 169  
 170      /**
 171       * Optimize query in the context of specified index
 172       *
 173       * @param Zend_Search_Lucene_Interface $index
 174       * @return Zend_Search_Lucene_Search_Query
 175       */
 176      public function optimize(Zend_Search_Lucene_Interface $index)
 177      {
 178          throw new Zend_Search_Lucene_Exception('Wildcard query should not be directly used for search. Use $query->rewrite($index)');
 179      }
 180  
 181  
 182      /**
 183       * Returns query pattern
 184       *
 185       * @return Zend_Search_Lucene_Index_Term
 186       */
 187      public function getPattern()
 188      {
 189          return $this->_pattern;
 190      }
 191  
 192  
 193      /**
 194       * Return query terms
 195       *
 196       * @return array
 197       * @throws Zend_Search_Lucene_Exception
 198       */
 199      public function getQueryTerms()
 200      {
 201          if ($this->_matches === null) {
 202              throw new Zend_Search_Lucene_Exception('Search has to be performed first to get matched terms');
 203          }
 204  
 205          return $this->_matches;
 206      }
 207  
 208      /**
 209       * Constructs an appropriate Weight implementation for this query.
 210       *
 211       * @param Zend_Search_Lucene_Interface $reader
 212       * @return Zend_Search_Lucene_Search_Weight
 213       * @throws Zend_Search_Lucene_Exception
 214       */
 215      public function createWeight(Zend_Search_Lucene_Interface $reader)
 216      {
 217          throw new Zend_Search_Lucene_Exception('Wildcard query should not be directly used for search. Use $query->rewrite($index)');
 218      }
 219  
 220  
 221      /**
 222       * Execute query in context of index reader
 223       * It also initializes necessary internal structures
 224       *
 225       * @param Zend_Search_Lucene_Interface $reader
 226       * @throws Zend_Search_Lucene_Exception
 227       */
 228      public function execute(Zend_Search_Lucene_Interface $reader)
 229      {
 230          throw new Zend_Search_Lucene_Exception('Wildcard query should not be directly used for search. Use $query->rewrite($index)');
 231      }
 232  
 233      /**
 234       * Get document ids likely matching the query
 235       *
 236       * It's an array with document ids as keys (performance considerations)
 237       *
 238       * @return array
 239       * @throws Zend_Search_Lucene_Exception
 240       */
 241      public function matchedDocs()
 242      {
 243          throw new Zend_Search_Lucene_Exception('Wildcard query should not be directly used for search. Use $query->rewrite($index)');
 244      }
 245  
 246      /**
 247       * Score specified document
 248       *
 249       * @param integer $docId
 250       * @param Zend_Search_Lucene_Interface $reader
 251       * @return float
 252       * @throws Zend_Search_Lucene_Exception
 253       */
 254      public function score($docId, Zend_Search_Lucene_Interface $reader)
 255      {
 256          throw new Zend_Search_Lucene_Exception('Wildcard query should not be directly used for search. Use $query->rewrite($index)');
 257      }
 258  
 259      /**
 260       * Highlight query terms
 261       *
 262       * @param integer &$colorIndex
 263       * @param Zend_Search_Lucene_Document_Html $doc
 264       */
 265      public function highlightMatchesDOM(Zend_Search_Lucene_Document_Html $doc, &$colorIndex)
 266      {
 267          $words = array();
 268  
 269          $matchExpression = '/^' . str_replace(array('\\?', '\\*'), array('.', '.*') , preg_quote($this->_pattern->text, '/')) . '$/';
 270          if (@preg_match('/\pL/u', 'a') == 1) {
 271              // PCRE unicode support is turned on
 272              // add Unicode modifier to the match expression
 273              $matchExpression .= 'u';
 274          }
 275  
 276          $tokens = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($doc->getFieldUtf8Value('body'), 'UTF-8');
 277          foreach ($tokens as $token) {
 278              if (preg_match($matchExpression, $token->getTermText()) === 1) {
 279                  $words[] = $token->getTermText();
 280              }
 281          }
 282  
 283          $doc->highlight($words, $this->_getHighlightColor($colorIndex));
 284      }
 285  
 286      /**
 287       * Print a query
 288       *
 289       * @return string
 290       */
 291      public function __toString()
 292      {
 293          // It's used only for query visualisation, so we don't care about characters escaping
 294          return (($this->_pattern->field === null)? '' : $this->_pattern->field . ':') . $this->_pattern->text;
 295      }
 296  }
 297  


Generated: Wed Jan 14 11:33:29 2009 Cross-referenced by PHPXref 0.7