| [ Index ] |
PHP Cross Reference of Moodle 1.9.3 [Build 15-Oct-2008] |
[Summary view] [Print] [Text view]
1 <?php 2 /** 3 * Zend Framework 4 * 5 * LICENSE 6 * 7 * This source file is subject to the new BSD license that is bundled 8 * with this package in the file LICENSE.txt. 9 * It is also available through the world-wide-web at this URL: 10 * http://framework.zend.com/license/new-bsd 11 * If you did not receive a copy of the license and are unable to 12 * obtain it through the world-wide-web, please send an email 13 * to license@zend.com so we can send you a copy immediately. 14 * 15 * @category Zend 16 * @package Zend_Search_Lucene 17 * @subpackage Search 18 * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com) 19 * @license http://framework.zend.com/license/new-bsd New BSD License 20 */ 21 22 23 /** 24 * Zend_Search_Lucene_Search_Query 25 */ 26 require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/Query.php'; 27 28 /** 29 * Zend_Search_Lucene_Search_Weight_MultiTerm 30 */ 31 require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/Weight/Phrase.php'; 32 33 34 /** 35 * A Query that matches documents containing a particular sequence of terms. 36 * 37 * @category Zend 38 * @package Zend_Search_Lucene 39 * @subpackage Search 40 * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com) 41 * @license http://framework.zend.com/license/new-bsd New BSD License 42 */ 43 class Zend_Search_Lucene_Search_Query_Phrase extends Zend_Search_Lucene_Search_Query 44 { 45 /** 46 * Terms to find. 47 * Array of Zend_Search_Lucene_Index_Term objects. 48 * 49 * @var array 50 */ 51 private $_terms; 52 53 /** 54 * Term positions (relative positions of terms within the phrase). 55 * Array of integers 56 * 57 * @var array 58 */ 59 private $_offsets; 60 61 /** 62 * Sets the number of other words permitted between words in query phrase. 63 * If zero, then this is an exact phrase search. For larger values this works 64 * like a WITHIN or NEAR operator. 65 * 66 * The slop is in fact an edit-distance, where the units correspond to 67 * moves of terms in the query phrase out of position. For example, to switch 68 * the order of two words requires two moves (the first move places the words 69 * atop one another), so to permit re-orderings of phrases, the slop must be 70 * at least two. 71 * More exact matches are scored higher than sloppier matches, thus search 72 * results are sorted by exactness. 73 * 74 * The slop is zero by default, requiring exact matches. 75 * 76 * @var integer 77 */ 78 private $_slop; 79 80 /** 81 * Result vector. 82 * 83 * @var array 84 */ 85 private $_resVector = null; 86 87 /** 88 * Terms positions vectors. 89 * Array of Arrays: 90 * term1Id => (docId => array( pos1, pos2, ... ), ...) 91 * term2Id => (docId => array( pos1, pos2, ... ), ...) 92 * 93 * @var array 94 */ 95 private $_termsPositions = array(); 96 97 /** 98 * Class constructor. Create a new prase query. 99 * 100 * @param string $field Field to search. 101 * @param array $terms Terms to search Array of strings. 102 * @param array $offsets Relative term positions. Array of integers. 103 * @throws Zend_Search_Lucene_Exception 104 */ 105 public function __construct($terms = null, $offsets = null, $field = null) 106 { 107 $this->_slop = 0; 108 109 if (is_array($terms)) { 110 $this->_terms = array(); 111 foreach ($terms as $termId => $termText) { 112 $this->_terms[$termId] = ($field !== null)? new Zend_Search_Lucene_Index_Term($termText, $field): 113 new Zend_Search_Lucene_Index_Term($termText); 114 } 115 } else if ($terms === null) { 116 $this->_terms = array(); 117 } else { 118 throw new Zend_Search_Lucene_Exception('terms argument must be array of strings or null'); 119 } 120 121 if (is_array($offsets)) { 122 if (count($this->_terms) != count($offsets)) { 123 throw new Zend_Search_Lucene_Exception('terms and offsets arguments must have the same size.'); 124 } 125 $this->_offsets = $offsets; 126 } else if ($offsets === null) { 127 $this->_offsets = array(); 128 foreach ($this->_terms as $termId => $term) { 129 $position = count($this->_offsets); 130 $this->_offsets[$termId] = $position; 131 } 132 } else { 133 throw new Zend_Search_Lucene_Exception('offsets argument must be array of strings or null'); 134 } 135 } 136 137 /** 138 * Set slop 139 * 140 * @param integer $slop 141 */ 142 public function setSlop($slop) 143 { 144 $this->_slop = $slop; 145 } 146 147 148 /** 149 * Get slop 150 * 151 * @return integer 152 */ 153 public function getSlop() 154 { 155 return $this->_slop; 156 } 157 158 159 /** 160 * Adds a term to the end of the query phrase. 161 * The relative position of the term is specified explicitly or the one immediately 162 * after the last term added. 163 * 164 * @param Zend_Search_Lucene_Index_Term $term 165 * @param integer $position 166 */ 167 public function addTerm(Zend_Search_Lucene_Index_Term $term, $position = null) { 168 if ((count($this->_terms) != 0)&&(end($this->_terms)->field != $term->field)) { 169 throw new Zend_Search_Lucene_Exception('All phrase terms must be in the same field: ' . 170 $term->field . ':' . $term->text); 171 } 172 173 $this->_terms[] = $term; 174 if ($position !== null) { 175 $this->_offsets[] = $position; 176 } else if (count($this->_offsets) != 0) { 177 $this->_offsets[] = end($this->_offsets) + 1; 178 } else { 179 $this->_offsets[] = 0; 180 } 181 } 182 183 184 /** 185 * Re-write query into primitive queries in the context of specified index 186 * 187 * @param Zend_Search_Lucene_Interface $index 188 * @return Zend_Search_Lucene_Search_Query 189 */ 190 public function rewrite(Zend_Search_Lucene_Interface $index) 191 { 192 if (count($this->_terms) == 0) { 193 return new Zend_Search_Lucene_Search_Query_Empty(); 194 } else if ($this->_terms[0]->field !== null) { 195 return $this; 196 } else { 197 $query = new Zend_Search_Lucene_Search_Query_Boolean(); 198 $query->setBoost($this->getBoost()); 199 200 foreach ($index->getFieldNames(true) as $fieldName) { 201 $subquery = new Zend_Search_Lucene_Search_Query_Phrase(); 202 $subquery->setSlop($this->getSlop()); 203 204 foreach ($this->_terms as $termId => $term) { 205 $qualifiedTerm = new Zend_Search_Lucene_Index_Term($term->text, $fieldName); 206 207 $subquery->addTerm($qualifiedTerm, $this->_offsets[$termId]); 208 } 209 210 $query->addSubquery($subquery); 211 } 212 213 return $query; 214 } 215 } 216 217 /** 218 * Optimize query in the context of specified index 219 * 220 * @param Zend_Search_Lucene_Interface $index 221 * @return Zend_Search_Lucene_Search_Query 222 */ 223 public function optimize(Zend_Search_Lucene_Interface $index) 224 { 225 // Check, that index contains all phrase terms 226 foreach ($this->_terms as $term) { 227 if (!$index->hasTerm($term)) { 228 return new Zend_Search_Lucene_Search_Query_Empty(); 229 } 230 } 231 232 if (count($this->_terms) == 1) { 233 // It's one term query 234 $optimizedQuery = new Zend_Search_Lucene_Search_Query_Term(reset($this->_terms)); 235 $optimizedQuery->setBoost($this->getBoost()); 236 237 return $optimizedQuery; 238 } 239 240 if (count($this->_terms) == 0) { 241 return new Zend_Search_Lucene_Search_Query_Empty(); 242 } 243 244 245 return $this; 246 } 247 248 /** 249 * Returns query term 250 * 251 * @return array 252 */ 253 public function getTerms() 254 { 255 return $this->_terms; 256 } 257 258 259 /** 260 * Set weight for specified term 261 * 262 * @param integer $num 263 * @param Zend_Search_Lucene_Search_Weight_Term $weight 264 */ 265 public function setWeight($num, $weight) 266 { 267 $this->_weights[$num] = $weight; 268 } 269 270 271 /** 272 * Constructs an appropriate Weight implementation for this query. 273 * 274 * @param Zend_Search_Lucene_Interface $reader 275 * @return Zend_Search_Lucene_Search_Weight 276 */ 277 public function createWeight(Zend_Search_Lucene_Interface $reader) 278 { 279 $this->_weight = new Zend_Search_Lucene_Search_Weight_Phrase($this, $reader); 280 return $this->_weight; 281 } 282 283 284 /** 285 * Score calculator for exact phrase queries (terms sequence is fixed) 286 * 287 * @param integer $docId 288 * @return float 289 */ 290 public function _exactPhraseFreq($docId) 291 { 292 $freq = 0; 293 294 // Term Id with lowest cardinality 295 $lowCardTermId = null; 296 297 // Calculate $lowCardTermId 298 foreach ($this->_terms as $termId => $term) { 299 if ($lowCardTermId === null || 300 count($this->_termsPositions[$termId][$docId]) < 301 count($this->_termsPositions[$lowCardTermId][$docId]) ) { 302 $lowCardTermId = $termId; 303 } 304 } 305 306 // Walk through positions of the term with lowest cardinality 307 foreach ($this->_termsPositions[$lowCardTermId][$docId] as $lowCardPos) { 308 // We expect phrase to be found 309 $freq++; 310 311 // Walk through other terms 312 foreach ($this->_terms as $termId => $term) { 313 if ($termId != $lowCardTermId) { 314 $expectedPosition = $lowCardPos + 315 ($this->_offsets[$termId] - 316 $this->_offsets[$lowCardTermId]); 317 318 if (!in_array($expectedPosition, $this->_termsPositions[$termId][$docId])) { 319 $freq--; // Phrase wasn't found. 320 break; 321 } 322 } 323 } 324 } 325 326 return $freq; 327 } 328 329 /** 330 * Score calculator for sloppy phrase queries (terms sequence is fixed) 331 * 332 * @param integer $docId 333 * @param Zend_Search_Lucene_Interface $reader 334 * @return float 335 */ 336 public function _sloppyPhraseFreq($docId, Zend_Search_Lucene_Interface $reader) 337 { 338 $freq = 0; 339 340 $phraseQueue = array(); 341 $phraseQueue[0] = array(); // empty phrase 342 $lastTerm = null; 343 344 // Walk through the terms to create phrases. 345 foreach ($this->_terms as $termId => $term) { 346 $queueSize = count($phraseQueue); 347 $firstPass = true; 348 349 // Walk through the term positions. 350 // Each term position produces a set of phrases. 351 foreach ($this->_termsPositions[$termId][$docId] as $termPosition ) { 352 if ($firstPass) { 353 for ($count = 0; $count < $queueSize; $count++) { 354 $phraseQueue[$count][$termId] = $termPosition; 355 } 356 } else { 357 for ($count = 0; $count < $queueSize; $count++) { 358 if ($lastTerm !== null && 359 abs( $termPosition - $phraseQueue[$count][$lastTerm] - 360 ($this->_offsets[$termId] - $this->_offsets[$lastTerm])) > $this->_slop) { 361 continue; 362 } 363 364 $newPhraseId = count($phraseQueue); 365 $phraseQueue[$newPhraseId] = $phraseQueue[$count]; 366 $phraseQueue[$newPhraseId][$termId] = $termPosition; 367 } 368 369 } 370 371 $firstPass = false; 372 } 373 $lastTerm = $termId; 374 } 375 376 377 foreach ($phraseQueue as $phrasePos) { 378 $minDistance = null; 379 380 for ($shift = -$this->_slop; $shift <= $this->_slop; $shift++) { 381 $distance = 0; 382 $start = reset($phrasePos) - reset($this->_offsets) + $shift; 383 384 foreach ($this->_terms as $termId => $term) { 385 $distance += abs($phrasePos[$termId] - $this->_offsets[$termId] - $start); 386 387 if($distance > $this->_slop) { 388 break; 389 } 390 } 391 392 if ($minDistance === null || $distance < $minDistance) { 393 $minDistance = $distance; 394 } 395 } 396 397 if ($minDistance <= $this->_slop) { 398 $freq += $reader->getSimilarity()->sloppyFreq($minDistance); 399 } 400 } 401 402 return $freq; 403 } 404 405 /** 406 * Execute query in context of index reader 407 * It also initializes necessary internal structures 408 * 409 * @param Zend_Search_Lucene_Interface $reader 410 */ 411 public function execute(Zend_Search_Lucene_Interface $reader) 412 { 413 $this->_resVector = null; 414 415 if (count($this->_terms) == 0) { 416 $this->_resVector = array(); 417 } 418 419 foreach( $this->_terms as $termId=>$term ) { 420 if($this->_resVector === null) { 421 $this->_resVector = array_flip($reader->termDocs($term)); 422 } else { 423 $this->_resVector = array_intersect_key($this->_resVector, array_flip($reader->termDocs($term))); 424 } 425 426 if (count($this->_resVector) == 0) { 427 // Empty result set, we don't need to check other terms 428 break; 429 } 430 431 $this->_termsPositions[$termId] = $reader->termPositions($term); 432 } 433 434 ksort($this->_resVector, SORT_NUMERIC); 435 436 // Initialize weight if it's not done yet 437 $this->_initWeight($reader); 438 } 439 440 /** 441 * Get document ids likely matching the query 442 * 443 * It's an array with document ids as keys (performance considerations) 444 * 445 * @return array 446 */ 447 public function matchedDocs() 448 { 449 return $this->_resVector; 450 } 451 452 /** 453 * Score specified document 454 * 455 * @param integer $docId 456 * @param Zend_Search_Lucene_Interface $reader 457 * @return float 458 */ 459 public function score($docId, Zend_Search_Lucene_Interface $reader) 460 { 461 if (isset($this->_resVector[$docId])) { 462 if ($this->_slop == 0) { 463 $freq = $this->_exactPhraseFreq($docId); 464 } else { 465 $freq = $this->_sloppyPhraseFreq($docId, $reader); 466 } 467 468 if ($freq != 0) { 469 $tf = $reader->getSimilarity()->tf($freq); 470 $weight = $this->_weight->getValue(); 471 $norm = $reader->norm($docId, reset($this->_terms)->field); 472 473 return $tf * $weight * $norm * $this->getBoost(); 474 } 475 476 // Included in result, but culculated freq is zero 477 return 0; 478 } else { 479 return 0; 480 } 481 } 482 483 /** 484 * Return query terms 485 * 486 * @return array 487 */ 488 public function getQueryTerms() 489 { 490 return $this->_terms; 491 } 492 493 /** 494 * Highlight query terms 495 * 496 * @param integer &$colorIndex 497 * @param Zend_Search_Lucene_Document_Html $doc 498 */ 499 public function highlightMatchesDOM(Zend_Search_Lucene_Document_Html $doc, &$colorIndex) 500 { 501 $words = array(); 502 foreach ($this->_terms as $term) { 503 $words[] = $term->text; 504 } 505 506 $doc->highlight($words, $this->_getHighlightColor($colorIndex)); 507 } 508 509 /** 510 * Print a query 511 * 512 * @return string 513 */ 514 public function __toString() 515 { 516 // It's used only for query visualisation, so we don't care about characters escaping 517 518 $query = ''; 519 520 if (isset($this->_terms[0]) && $this->_terms[0]->field !== null) { 521 $query .= $this->_terms[0]->field . ':'; 522 } 523 524 $query .= '"'; 525 526 foreach ($this->_terms as $id => $term) { 527 if ($id != 0) { 528 $query .= ' '; 529 } 530 $query .= $term->text; 531 } 532 533 $query .= '"'; 534 535 if ($this->_slop != 0) { 536 $query .= '~' . $this->_slop; 537 } 538 539 return $query; 540 } 541 } 542
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
| Generated: Wed Jan 14 11:33:29 2009 | Cross-referenced by PHPXref 0.7 |