| [ Index ] |
PHP Cross Reference of Moodle 1.9.3 [Build 15-Oct-2008] |
[Summary view] [Print] [Text view]
1 <?php 2 /** 3 * Zend Framework 4 * 5 * LICENSE 6 * 7 * This source file is subject to the new BSD license that is bundled 8 * with this package in the file LICENSE.txt. 9 * It is also available through the world-wide-web at this URL: 10 * http://framework.zend.com/license/new-bsd 11 * If you did not receive a copy of the license and are unable to 12 * obtain it through the world-wide-web, please send an email 13 * to license@zend.com so we can send you a copy immediately. 14 * 15 * @category Zend 16 * @package Zend_Search_Lucene 17 * @subpackage Search 18 * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com) 19 * @license http://framework.zend.com/license/new-bsd New BSD License 20 */ 21 22 23 /** Zend_Search_Lucene_Search_Query */ 24 require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/Query.php'; 25 26 /** Zend_Search_Lucene_Search_Weight_MultiTerm */ 27 require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/Weight/MultiTerm.php'; 28 29 30 /** 31 * @category Zend 32 * @package Zend_Search_Lucene 33 * @subpackage Search 34 * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com) 35 * @license http://framework.zend.com/license/new-bsd New BSD License 36 */ 37 class Zend_Search_Lucene_Search_Query_MultiTerm extends Zend_Search_Lucene_Search_Query 38 { 39 40 /** 41 * Terms to find. 42 * Array of Zend_Search_Lucene_Index_Term 43 * 44 * @var array 45 */ 46 private $_terms = array(); 47 48 /** 49 * Term signs. 50 * If true then term is required. 51 * If false then term is prohibited. 52 * If null then term is neither prohibited, nor required 53 * 54 * If array is null then all terms are required 55 * 56 * @var array 57 */ 58 private $_signs; 59 60 /** 61 * Result vector. 62 * 63 * @var array 64 */ 65 private $_resVector = null; 66 67 /** 68 * Terms positions vectors. 69 * Array of Arrays: 70 * term1Id => (docId => freq, ...) 71 * term2Id => (docId => freq, ...) 72 * 73 * @var array 74 */ 75 private $_termsFreqs = array(); 76 77 78 /** 79 * A score factor based on the fraction of all query terms 80 * that a document contains. 81 * float for conjunction queries 82 * array of float for non conjunction queries 83 * 84 * @var mixed 85 */ 86 private $_coord = null; 87 88 89 /** 90 * Terms weights 91 * array of Zend_Search_Lucene_Search_Weight 92 * 93 * @var array 94 */ 95 private $_weights = array(); 96 97 98 /** 99 * Class constructor. Create a new multi-term query object. 100 * 101 * if $signs array is omitted then all terms are required 102 * it differs from addTerm() behavior, but should never be used 103 * 104 * @param array $terms Array of Zend_Search_Lucene_Index_Term objects 105 * @param array $signs Array of signs. Sign is boolean|null. 106 * @return void 107 */ 108 public function __construct($terms = null, $signs = null) 109 { 110 if (is_array($terms)) { 111 $this->_terms = $terms; 112 113 $this->_signs = null; 114 // Check if all terms are required 115 if (is_array($signs)) { 116 foreach ($signs as $sign ) { 117 if ($sign !== true) { 118 $this->_signs = $signs; 119 break; 120 } 121 } 122 } 123 } 124 } 125 126 127 /** 128 * Add a $term (Zend_Search_Lucene_Index_Term) to this query. 129 * 130 * The sign is specified as: 131 * TRUE - term is required 132 * FALSE - term is prohibited 133 * NULL - term is neither prohibited, nor required 134 * 135 * @param Zend_Search_Lucene_Index_Term $term 136 * @param boolean|null $sign 137 * @return void 138 */ 139 public function addTerm(Zend_Search_Lucene_Index_Term $term, $sign = null) { 140 if ($sign !== true || $this->_signs !== null) { // Skip, if all terms are required 141 if ($this->_signs === null) { // Check, If all previous terms are required 142 foreach ($this->_terms as $prevTerm) { 143 $this->_signs[] = true; 144 } 145 } 146 $this->_signs[] = $sign; 147 } 148 149 $this->_terms[] = $term; 150 } 151 152 153 /** 154 * Re-write query into primitive queries in the context of specified index 155 * 156 * @param Zend_Search_Lucene_Interface $index 157 * @return Zend_Search_Lucene_Search_Query 158 */ 159 public function rewrite(Zend_Search_Lucene_Interface $index) 160 { 161 if (count($this->_terms) == 0) { 162 return new Zend_Search_Lucene_Search_Query_Empty(); 163 } 164 165 // Check, that all fields are qualified 166 $allQualified = true; 167 foreach ($this->_terms as $term) { 168 if ($term->field === null) { 169 $allQualified = false; 170 break; 171 } 172 } 173 174 if ($allQualified) { 175 return $this; 176 } else { 177 /** transform multiterm query to boolean and apply rewrite() method to subqueries. */ 178 $query = new Zend_Search_Lucene_Search_Query_Boolean(); 179 $query->setBoost($this->getBoost()); 180 181 foreach ($this->_terms as $termId => $term) { 182 $subquery = new Zend_Search_Lucene_Search_Query_Term($term); 183 184 $query->addSubquery($subquery->rewrite($index), 185 ($this->_signs === null)? true : $this->_signs[$termId]); 186 } 187 188 return $query; 189 } 190 } 191 192 /** 193 * Optimize query in the context of specified index 194 * 195 * @param Zend_Search_Lucene_Interface $index 196 * @return Zend_Search_Lucene_Search_Query 197 */ 198 public function optimize(Zend_Search_Lucene_Interface $index) 199 { 200 $terms = $this->_terms; 201 $signs = $this->_signs; 202 203 foreach ($terms as $id => $term) { 204 if (!$index->hasTerm($term)) { 205 if ($signs === null || $signs[$id] === true) { 206 // Term is required 207 return new Zend_Search_Lucene_Search_Query_Empty(); 208 } else { 209 // Term is optional or prohibited 210 // Remove it from terms and signs list 211 unset($terms[$id]); 212 unset($signs[$id]); 213 } 214 } 215 } 216 217 // Check if all presented terms are prohibited 218 $allProhibited = true; 219 if ($signs === null) { 220 $allProhibited = false; 221 } else { 222 foreach ($signs as $sign) { 223 if ($sign !== false) { 224 $allProhibited = false; 225 break; 226 } 227 } 228 } 229 if ($allProhibited) { 230 return new Zend_Search_Lucene_Search_Query_Empty(); 231 } 232 233 /** 234 * @todo make an optimization for repeated terms 235 * (they may have different signs) 236 */ 237 238 if (count($terms) == 1) { 239 // It's already checked, that it's not a prohibited term 240 241 // It's one term query with one required or optional element 242 $optimizedQuery = new Zend_Search_Lucene_Search_Query_Term(reset($terms)); 243 $optimizedQuery->setBoost($this->getBoost()); 244 245 return $optimizedQuery; 246 } 247 248 if (count($terms) == 0) { 249 return new Zend_Search_Lucene_Search_Query_Empty(); 250 } 251 252 $optimizedQuery = new Zend_Search_Lucene_Search_Query_MultiTerm($terms, $signs); 253 $optimizedQuery->setBoost($this->getBoost()); 254 return $optimizedQuery; 255 } 256 257 258 /** 259 * Returns query term 260 * 261 * @return array 262 */ 263 public function getTerms() 264 { 265 return $this->_terms; 266 } 267 268 269 /** 270 * Return terms signs 271 * 272 * @return array 273 */ 274 public function getSigns() 275 { 276 return $this->_signs; 277 } 278 279 280 /** 281 * Set weight for specified term 282 * 283 * @param integer $num 284 * @param Zend_Search_Lucene_Search_Weight_Term $weight 285 */ 286 public function setWeight($num, $weight) 287 { 288 $this->_weights[$num] = $weight; 289 } 290 291 292 /** 293 * Constructs an appropriate Weight implementation for this query. 294 * 295 * @param Zend_Search_Lucene_Interface $reader 296 * @return Zend_Search_Lucene_Search_Weight 297 */ 298 public function createWeight(Zend_Search_Lucene_Interface $reader) 299 { 300 $this->_weight = new Zend_Search_Lucene_Search_Weight_MultiTerm($this, $reader); 301 return $this->_weight; 302 } 303 304 305 /** 306 * Calculate result vector for Conjunction query 307 * (like '+something +another') 308 * 309 * @param Zend_Search_Lucene_Interface $reader 310 */ 311 private function _calculateConjunctionResult(Zend_Search_Lucene_Interface $reader) 312 { 313 $this->_resVector = null; 314 315 if (count($this->_terms) == 0) { 316 $this->_resVector = array(); 317 } 318 319 foreach( $this->_terms as $termId=>$term ) { 320 if($this->_resVector === null) { 321 $this->_resVector = array_flip($reader->termDocs($term)); 322 } else { 323 $this->_resVector = array_intersect_key($this->_resVector, array_flip($reader->termDocs($term))); 324 } 325 326 if (count($this->_resVector) == 0) { 327 // Empty result set, we don't need to check other terms 328 break; 329 } 330 331 $this->_termsFreqs[$termId] = $reader->termFreqs($term); 332 } 333 334 ksort($this->_resVector, SORT_NUMERIC); 335 } 336 337 338 /** 339 * Calculate result vector for non Conjunction query 340 * (like '+something -another') 341 * 342 * @param Zend_Search_Lucene_Interface $reader 343 */ 344 private function _calculateNonConjunctionResult(Zend_Search_Lucene_Interface $reader) 345 { 346 $required = null; 347 $optional = array(); 348 $prohibited = array(); 349 350 foreach ($this->_terms as $termId => $term) { 351 $termDocs = array_flip($reader->termDocs($term)); 352 353 if ($this->_signs[$termId] === true) { 354 // required 355 if ($required !== null) { 356 // array intersection 357 $required = array_intersect_key($required, $termDocs); 358 } else { 359 $required = $termDocs; 360 } 361 } elseif ($this->_signs[$termId] === false) { 362 // prohibited 363 // array union 364 $prohibited += $termDocs; 365 } else { 366 // neither required, nor prohibited 367 // array union 368 $optional += $termDocs; 369 } 370 371 $this->_termsFreqs[$termId] = $reader->termFreqs($term); 372 } 373 374 if ($required !== null) { 375 $this->_resVector = (count($prohibited) > 0) ? 376 array_diff_key($required, $prohibited) : 377 $required; 378 } else { 379 $this->_resVector = (count($prohibited) > 0) ? 380 array_diff_key($optional, $prohibited) : 381 $optional; 382 } 383 384 ksort($this->_resVector, SORT_NUMERIC); 385 } 386 387 388 /** 389 * Score calculator for conjunction queries (all terms are required) 390 * 391 * @param integer $docId 392 * @param Zend_Search_Lucene_Interface $reader 393 * @return float 394 */ 395 public function _conjunctionScore($docId, Zend_Search_Lucene_Interface $reader) 396 { 397 if ($this->_coord === null) { 398 $this->_coord = $reader->getSimilarity()->coord(count($this->_terms), 399 count($this->_terms) ); 400 } 401 402 $score = 0.0; 403 404 foreach ($this->_terms as $termId=>$term) { 405 /** 406 * We don't need to check that term freq is not 0 407 * Score calculation is performed only for matched docs 408 */ 409 $score += $reader->getSimilarity()->tf($this->_termsFreqs[$termId][$docId]) * 410 $this->_weights[$termId]->getValue() * 411 $reader->norm($docId, $term->field); 412 } 413 414 return $score * $this->_coord * $this->getBoost(); 415 } 416 417 418 /** 419 * Score calculator for non conjunction queries (not all terms are required) 420 * 421 * @param integer $docId 422 * @param Zend_Search_Lucene_Interface $reader 423 * @return float 424 */ 425 public function _nonConjunctionScore($docId, $reader) 426 { 427 if ($this->_coord === null) { 428 $this->_coord = array(); 429 430 $maxCoord = 0; 431 foreach ($this->_signs as $sign) { 432 if ($sign !== false /* not prohibited */) { 433 $maxCoord++; 434 } 435 } 436 437 for ($count = 0; $count <= $maxCoord; $count++) { 438 $this->_coord[$count] = $reader->getSimilarity()->coord($count, $maxCoord); 439 } 440 } 441 442 $score = 0.0; 443 $matchedTerms = 0; 444 foreach ($this->_terms as $termId=>$term) { 445 // Check if term is 446 if ($this->_signs[$termId] !== false && // not prohibited 447 isset($this->_termsFreqs[$termId][$docId]) // matched 448 ) { 449 $matchedTerms++; 450 451 /** 452 * We don't need to check that term freq is not 0 453 * Score calculation is performed only for matched docs 454 */ 455 $score += 456 $reader->getSimilarity()->tf($this->_termsFreqs[$termId][$docId]) * 457 $this->_weights[$termId]->getValue() * 458 $reader->norm($docId, $term->field); 459 } 460 } 461 462 return $score * $this->_coord[$matchedTerms] * $this->getBoost(); 463 } 464 465 /** 466 * Execute query in context of index reader 467 * It also initializes necessary internal structures 468 * 469 * @param Zend_Search_Lucene_Interface $reader 470 */ 471 public function execute(Zend_Search_Lucene_Interface $reader) 472 { 473 if ($this->_signs === null) { 474 $this->_calculateConjunctionResult($reader); 475 } else { 476 $this->_calculateNonConjunctionResult($reader); 477 } 478 479 // Initialize weight if it's not done yet 480 $this->_initWeight($reader); 481 } 482 483 /** 484 * Get document ids likely matching the query 485 * 486 * It's an array with document ids as keys (performance considerations) 487 * 488 * @return array 489 */ 490 public function matchedDocs() 491 { 492 return $this->_resVector; 493 } 494 495 /** 496 * Score specified document 497 * 498 * @param integer $docId 499 * @param Zend_Search_Lucene_Interface $reader 500 * @return float 501 */ 502 public function score($docId, Zend_Search_Lucene_Interface $reader) 503 { 504 if (isset($this->_resVector[$docId])) { 505 if ($this->_signs === null) { 506 return $this->_conjunctionScore($docId, $reader); 507 } else { 508 return $this->_nonConjunctionScore($docId, $reader); 509 } 510 } else { 511 return 0; 512 } 513 } 514 515 /** 516 * Return query terms 517 * 518 * @return array 519 */ 520 public function getQueryTerms() 521 { 522 if ($this->_signs === null) { 523 return $this->_terms; 524 } 525 526 $terms = array(); 527 528 foreach ($this->_signs as $id => $sign) { 529 if ($sign !== false) { 530 $terms[] = $this->_terms[$id]; 531 } 532 } 533 534 return $terms; 535 } 536 537 /** 538 * Highlight query terms 539 * 540 * @param integer &$colorIndex 541 * @param Zend_Search_Lucene_Document_Html $doc 542 */ 543 public function highlightMatchesDOM(Zend_Search_Lucene_Document_Html $doc, &$colorIndex) 544 { 545 $words = array(); 546 547 if ($this->_signs === null) { 548 foreach ($this->_terms as $term) { 549 $words[] = $term->text; 550 } 551 } else { 552 foreach ($this->_signs as $id => $sign) { 553 if ($sign !== false) { 554 $words[] = $this->_terms[$id]->text; 555 } 556 } 557 } 558 559 $doc->highlight($words, $this->_getHighlightColor($colorIndex)); 560 } 561 562 /** 563 * Print a query 564 * 565 * @return string 566 */ 567 public function __toString() 568 { 569 // It's used only for query visualisation, so we don't care about characters escaping 570 571 $query = ''; 572 573 foreach ($this->_terms as $id => $term) { 574 if ($id != 0) { 575 $query .= ' '; 576 } 577 578 if ($this->_signs === null || $this->_signs[$id] === true) { 579 $query .= '+'; 580 } else if ($this->_signs[$id] === false) { 581 $query .= '-'; 582 } 583 584 if ($term->field !== null) { 585 $query .= $term->field . ':'; 586 } 587 $query .= $term->text; 588 } 589 590 if ($this->getBoost() != 1) { 591 $query = '(' . $query . ')^' . $this->getBoost(); 592 } 593 594 return $query; 595 } 596 } 597
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
| Generated: Wed Jan 14 11:33:29 2009 | Cross-referenced by PHPXref 0.7 |