| [ Index ] |
PHP Cross Reference of Moodle 1.9.3 [Build 15-Oct-2008] |
[Summary view] [Print] [Text view]
1 <?php 2 /** 3 * Zend Framework 4 * 5 * LICENSE 6 * 7 * This source file is subject to the new BSD license that is bundled 8 * with this package in the file LICENSE.txt. 9 * It is also available through the world-wide-web at this URL: 10 * http://framework.zend.com/license/new-bsd 11 * If you did not receive a copy of the license and are unable to 12 * obtain it through the world-wide-web, please send an email 13 * to license@zend.com so we can send you a copy immediately. 14 * 15 * @category Zend 16 * @package Zend_Search_Lucene 17 * @subpackage Search 18 * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com) 19 * @license http://framework.zend.com/license/new-bsd New BSD License 20 */ 21 22 23 /** Zend_Search_Lucene_Search_Query */ 24 require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/Query.php'; 25 26 /** Zend_Search_Lucene_Search_Weight_Boolean */ 27 require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/Weight/Boolean.php'; 28 29 30 /** 31 * @category Zend 32 * @package Zend_Search_Lucene 33 * @subpackage Search 34 * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com) 35 * @license http://framework.zend.com/license/new-bsd New BSD License 36 */ 37 class Zend_Search_Lucene_Search_Query_Boolean extends Zend_Search_Lucene_Search_Query 38 { 39 40 /** 41 * Subqueries 42 * Array of Zend_Search_Lucene_Query 43 * 44 * @var array 45 */ 46 private $_subqueries = array(); 47 48 /** 49 * Subqueries signs. 50 * If true then subquery is required. 51 * If false then subquery is prohibited. 52 * If null then subquery is neither prohibited, nor required 53 * 54 * If array is null then all subqueries are required 55 * 56 * @var array 57 */ 58 private $_signs = array(); 59 60 /** 61 * Result vector. 62 * 63 * @var array 64 */ 65 private $_resVector = null; 66 67 /** 68 * A score factor based on the fraction of all query subqueries 69 * that a document contains. 70 * float for conjunction queries 71 * array of float for non conjunction queries 72 * 73 * @var mixed 74 */ 75 private $_coord = null; 76 77 78 /** 79 * Class constructor. Create a new Boolean query object. 80 * 81 * if $signs array is omitted then all subqueries are required 82 * it differs from addSubquery() behavior, but should never be used 83 * 84 * @param array $subqueries Array of Zend_Search_Search_Query objects 85 * @param array $signs Array of signs. Sign is boolean|null. 86 * @return void 87 */ 88 public function __construct($subqueries = null, $signs = null) 89 { 90 if (is_array($subqueries)) { 91 $this->_subqueries = $subqueries; 92 93 $this->_signs = null; 94 // Check if all subqueries are required 95 if (is_array($signs)) { 96 foreach ($signs as $sign ) { 97 if ($sign !== true) { 98 $this->_signs = $signs; 99 break; 100 } 101 } 102 } 103 } 104 } 105 106 107 /** 108 * Add a $subquery (Zend_Search_Lucene_Query) to this query. 109 * 110 * The sign is specified as: 111 * TRUE - subquery is required 112 * FALSE - subquery is prohibited 113 * NULL - subquery is neither prohibited, nor required 114 * 115 * @param Zend_Search_Lucene_Search_Query $subquery 116 * @param boolean|null $sign 117 * @return void 118 */ 119 public function addSubquery(Zend_Search_Lucene_Search_Query $subquery, $sign=null) { 120 if ($sign !== true || $this->_signs !== null) { // Skip, if all subqueries are required 121 if ($this->_signs === null) { // Check, If all previous subqueries are required 122 foreach ($this->_subqueries as $prevSubquery) { 123 $this->_signs[] = true; 124 } 125 } 126 $this->_signs[] = $sign; 127 } 128 129 $this->_subqueries[] = $subquery; 130 } 131 132 /** 133 * Re-write queries into primitive queries 134 * 135 * @param Zend_Search_Lucene_Interface $index 136 * @return Zend_Search_Lucene_Search_Query 137 */ 138 public function rewrite(Zend_Search_Lucene_Interface $index) 139 { 140 $query = new Zend_Search_Lucene_Search_Query_Boolean(); 141 $query->setBoost($this->getBoost()); 142 143 foreach ($this->_subqueries as $subqueryId => $subquery) { 144 $query->addSubquery($subquery->rewrite($index), 145 ($this->_signs === null)? true : $this->_signs[$subqueryId]); 146 } 147 148 return $query; 149 } 150 151 /** 152 * Optimize query in the context of specified index 153 * 154 * @param Zend_Search_Lucene_Interface $index 155 * @return Zend_Search_Lucene_Search_Query 156 */ 157 public function optimize(Zend_Search_Lucene_Interface $index) 158 { 159 $subqueries = array(); 160 $signs = array(); 161 162 // Optimize all subqueries 163 foreach ($this->_subqueries as $id => $subquery) { 164 $subqueries[] = $subquery->optimize($index); 165 $signs[] = ($this->_signs === null)? true : $this->_signs[$id]; 166 } 167 168 // Check for empty subqueries 169 foreach ($subqueries as $id => $subquery) { 170 if ($subquery instanceof Zend_Search_Lucene_Search_Query_Empty) { 171 if ($signs[$id] === true) { 172 // Matching is required, but is actually empty 173 return new Zend_Search_Lucene_Search_Query_Empty(); 174 } else { 175 // Matching is optional or prohibited, but is empty 176 // Remove it from subqueries and signs list 177 unset($subqueries[$id]); 178 unset($signs[$id]); 179 } 180 } 181 } 182 183 184 // Check if all non-empty subqueries are prohibited 185 $allProhibited = true; 186 foreach ($signs as $sign) { 187 if ($sign !== false) { 188 $allProhibited = false; 189 break; 190 } 191 } 192 if ($allProhibited) { 193 return new Zend_Search_Lucene_Search_Query_Empty(); 194 } 195 196 197 // Check, if reduced subqueries list has only one entry 198 if (count($subqueries) == 1) { 199 // It's a query with only one required or optional clause 200 // (it's already checked, that it's not a prohibited clause) 201 202 if ($this->getBoost() == 1) { 203 return reset($subqueries); 204 } 205 206 $optimizedQuery = clone reset($subqueries); 207 $optimizedQuery->setBoost($optimizedQuery->getBoost()*$this->getBoost()); 208 209 return $optimizedQuery; 210 } 211 212 213 // Check, if reduced subqueries list is empty 214 if (count($subqueries) == 0) { 215 return new Zend_Search_Lucene_Search_Query_Empty(); 216 } 217 218 219 // Prepare first candidate for optimized query 220 $optimizedQuery = new Zend_Search_Lucene_Search_Query_Boolean($subqueries, $signs); 221 $optimizedQuery->setBoost($this->getBoost()); 222 223 224 $terms = array(); 225 $tsigns = array(); 226 $boostFactors = array(); 227 228 // Try to decompose term and multi-term subqueries 229 foreach ($subqueries as $id => $subquery) { 230 if ($subquery instanceof Zend_Search_Lucene_Search_Query_Term) { 231 $terms[] = $subquery->getTerm(); 232 $tsigns[] = $signs[$id]; 233 $boostFactors[] = $subquery->getBoost(); 234 235 // remove subquery from a subqueries list 236 unset($subqueries[$id]); 237 unset($signs[$id]); 238 } else if ($subquery instanceof Zend_Search_Lucene_Search_Query_MultiTerm) { 239 $subTerms = $subquery->getTerms(); 240 $subSigns = $subquery->getSigns(); 241 242 if ($signs[$id] === true) { 243 // It's a required multi-term subquery. 244 // Something like '... +(+term1 -term2 term3 ...) ...' 245 246 // Multi-term required subquery can be decomposed only if it contains 247 // required terms and doesn't contain prohibited terms: 248 // ... +(+term1 term2 ...) ... => ... +term1 term2 ... 249 // 250 // Check this 251 $hasRequired = false; 252 $hasProhibited = false; 253 if ($subSigns === null) { 254 // All subterms are required 255 $hasRequired = true; 256 } else { 257 foreach ($subSigns as $sign) { 258 if ($sign === true) { 259 $hasRequired = true; 260 } else if ($sign === false) { 261 $hasProhibited = true; 262 break; 263 } 264 } 265 } 266 // Continue if subquery has prohibited terms or doesn't have required terms 267 if ($hasProhibited || !$hasRequired) { 268 continue; 269 } 270 271 foreach ($subTerms as $termId => $term) { 272 $terms[] = $term; 273 $tsigns[] = ($subSigns === null)? true : $subSigns[$termId]; 274 $boostFactors[] = $subquery->getBoost(); 275 } 276 277 // remove subquery from a subqueries list 278 unset($subqueries[$id]); 279 unset($signs[$id]); 280 281 } else { // $signs[$id] === null || $signs[$id] === false 282 // It's an optional or prohibited multi-term subquery. 283 // Something like '... (+term1 -term2 term3 ...) ...' 284 // or 285 // something like '... -(+term1 -term2 term3 ...) ...' 286 287 // Multi-term optional and required subqueries can be decomposed 288 // only if all terms are optional. 289 // 290 // Check if all terms are optional. 291 $onlyOptional = true; 292 if ($subSigns === null) { 293 // All subterms are required 294 $onlyOptional = false; 295 } else { 296 foreach ($subSigns as $sign) { 297 if ($sign !== null) { 298 $onlyOptional = false; 299 break; 300 } 301 } 302 } 303 304 // Continue if non-optional terms are presented in this multi-term subquery 305 if (!$onlyOptional) { 306 continue; 307 } 308 309 foreach ($subTerms as $termId => $term) { 310 $terms[] = $term; 311 $tsigns[] = ($signs[$id] === null)? null /* optional */ : 312 false /* prohibited */; 313 $boostFactors[] = $subquery->getBoost(); 314 } 315 316 // remove subquery from a subqueries list 317 unset($subqueries[$id]); 318 unset($signs[$id]); 319 } 320 } 321 } 322 323 324 // Check, if there are no decomposed subqueries 325 if (count($terms) == 0 ) { 326 // return prepared candidate 327 return $optimizedQuery; 328 } 329 330 331 // Check, if all subqueries have been decomposed and all terms has the same boost factor 332 if (count($subqueries) == 0 && count(array_unique($boostFactors)) == 1) { 333 $optimizedQuery = new Zend_Search_Lucene_Search_Query_MultiTerm($terms, $tsigns); 334 $optimizedQuery->setBoost(reset($boostFactors)*$this->getBoost()); 335 336 return $optimizedQuery; 337 } 338 339 340 // This boolean query can't be transformed to Term/MultiTerm query and still contains 341 // several subqueries 342 343 // Separate prohibited terms 344 $prohibitedTerms = array(); 345 foreach ($terms as $id => $term) { 346 if ($tsigns[$id] === false) { 347 $prohibitedTerms[] = $term; 348 349 unset($terms[$id]); 350 unset($tsigns[$id]); 351 unset($boostFactors[$id]); 352 } 353 } 354 355 if (count($terms) == 1) { 356 $clause = new Zend_Search_Lucene_Search_Query_Term(reset($terms)); 357 $clause->setBoost(reset($boostFactors)); 358 359 $subqueries[] = $clause; 360 $signs[] = reset($tsigns); 361 362 // Clear terms list 363 $terms = array(); 364 } else if (count($terms) > 1 && count(array_unique($boostFactors)) == 1) { 365 $clause = new Zend_Search_Lucene_Search_Query_MultiTerm($terms, $tsigns); 366 $clause->setBoost(reset($boostFactors)); 367 368 $subqueries[] = $clause; 369 // Clause sign is 'required' if clause contains required terms. 'Optional' otherwise. 370 $signs[] = (in_array(true, $tsigns))? true : null; 371 372 // Clear terms list 373 $terms = array(); 374 } 375 376 if (count($prohibitedTerms) == 1) { 377 // (boost factors are not significant for prohibited clauses) 378 $subqueries[] = new Zend_Search_Lucene_Search_Query_Term(reset($prohibitedTerms)); 379 $signs[] = false; 380 381 // Clear prohibited terms list 382 $prohibitedTerms = array(); 383 } else if (count($prohibitedTerms) > 1) { 384 // prepare signs array 385 $prohibitedSigns = array(); 386 foreach ($prohibitedTerms as $id => $term) { 387 // all prohibited term are grouped as optional into multi-term query 388 $prohibitedSigns[$id] = null; 389 } 390 391 // (boost factors are not significant for prohibited clauses) 392 $subqueries[] = new Zend_Search_Lucene_Search_Query_MultiTerm($prohibitedTerms, $prohibitedSigns); 393 // Clause sign is 'prohibited' 394 $signs[] = false; 395 396 // Clear terms list 397 $prohibitedTerms = array(); 398 } 399 400 /** @todo Group terms with the same boost factors together */ 401 402 // Check, that all terms are processed 403 // Replace candidate for optimized query 404 if (count($terms) == 0 && count($prohibitedTerms) == 0) { 405 $optimizedQuery = new Zend_Search_Lucene_Search_Query_Boolean($subqueries, $signs); 406 $optimizedQuery->setBoost($this->getBoost()); 407 } 408 409 return $optimizedQuery; 410 } 411 412 /** 413 * Returns subqueries 414 * 415 * @return array 416 */ 417 public function getSubqueries() 418 { 419 return $this->_subqueries; 420 } 421 422 423 /** 424 * Return subqueries signs 425 * 426 * @return array 427 */ 428 public function getSigns() 429 { 430 return $this->_signs; 431 } 432 433 434 /** 435 * Constructs an appropriate Weight implementation for this query. 436 * 437 * @param Zend_Search_Lucene_Interface $reader 438 * @return Zend_Search_Lucene_Search_Weight 439 */ 440 public function createWeight(Zend_Search_Lucene_Interface $reader) 441 { 442 $this->_weight = new Zend_Search_Lucene_Search_Weight_Boolean($this, $reader); 443 return $this->_weight; 444 } 445 446 447 /** 448 * Calculate result vector for Conjunction query 449 * (like '<subquery1> AND <subquery2> AND <subquery3>') 450 */ 451 private function _calculateConjunctionResult() 452 { 453 $this->_resVector = null; 454 455 if (count($this->_subqueries) == 0) { 456 $this->_resVector = array(); 457 } 458 459 foreach ($this->_subqueries as $subquery) { 460 if($this->_resVector === null) { 461 $this->_resVector = $subquery->matchedDocs(); 462 } else { 463 $this->_resVector = array_intersect_key($this->_resVector, $subquery->matchedDocs()); 464 } 465 466 if (count($this->_resVector) == 0) { 467 // Empty result set, we don't need to check other terms 468 break; 469 } 470 } 471 472 ksort($this->_resVector, SORT_NUMERIC); 473 } 474 475 476 /** 477 * Calculate result vector for non Conjunction query 478 * (like '<subquery1> AND <subquery2> AND NOT <subquery3> OR <subquery4>') 479 */ 480 private function _calculateNonConjunctionResult() 481 { 482 $required = null; 483 $optional = array(); 484 485 foreach ($this->_subqueries as $subqueryId => $subquery) { 486 $docs = $subquery->matchedDocs(); 487 488 if ($this->_signs[$subqueryId] === true) { 489 // required 490 if ($required !== null) { 491 // array intersection 492 $required = array_intersect_key($required, $docs); 493 } else { 494 $required = $docs; 495 } 496 } elseif ($this->_signs[$subqueryId] === false) { 497 // prohibited 498 // Do nothing. matchedDocs() may include non-matching id's 499 } else { 500 // neither required, nor prohibited 501 // array union 502 $optional += $docs; 503 } 504 } 505 506 if ($required !== null) { 507 $this->_resVector = &$required; 508 } else { 509 $this->_resVector = &$optional; 510 } 511 512 ksort($this->_resVector, SORT_NUMERIC); 513 } 514 515 516 /** 517 * Score calculator for conjunction queries (all subqueries are required) 518 * 519 * @param integer $docId 520 * @param Zend_Search_Lucene_Interface $reader 521 * @return float 522 */ 523 public function _conjunctionScore($docId, Zend_Search_Lucene_Interface $reader) 524 { 525 if ($this->_coord === null) { 526 $this->_coord = $reader->getSimilarity()->coord(count($this->_subqueries), 527 count($this->_subqueries) ); 528 } 529 530 $score = 0; 531 532 foreach ($this->_subqueries as $subquery) { 533 $subscore = $subquery->score($docId, $reader); 534 535 if ($subscore == 0) { 536 return 0; 537 } 538 539 $score += $subquery->score($docId, $reader) * $this->_coord; 540 } 541 542 return $score * $this->_coord * $this->getBoost(); 543 } 544 545 546 /** 547 * Score calculator for non conjunction queries (not all subqueries are required) 548 * 549 * @param integer $docId 550 * @param Zend_Search_Lucene_Interface $reader 551 * @return float 552 */ 553 public function _nonConjunctionScore($docId, Zend_Search_Lucene_Interface $reader) 554 { 555 if ($this->_coord === null) { 556 $this->_coord = array(); 557 558 $maxCoord = 0; 559 foreach ($this->_signs as $sign) { 560 if ($sign !== false /* not prohibited */) { 561 $maxCoord++; 562 } 563 } 564 565 for ($count = 0; $count <= $maxCoord; $count++) { 566 $this->_coord[$count] = $reader->getSimilarity()->coord($count, $maxCoord); 567 } 568 } 569 570 $score = 0; 571 $matchedSubqueries = 0; 572 foreach ($this->_subqueries as $subqueryId => $subquery) { 573 $subscore = $subquery->score($docId, $reader); 574 575 // Prohibited 576 if ($this->_signs[$subqueryId] === false && $subscore != 0) { 577 return 0; 578 } 579 580 // is required, but doen't match 581 if ($this->_signs[$subqueryId] === true && $subscore == 0) { 582 return 0; 583 } 584 585 if ($subscore != 0) { 586 $matchedSubqueries++; 587 $score += $subscore; 588 } 589 } 590 591 return $score * $this->_coord[$matchedSubqueries] * $this->getBoost(); 592 } 593 594 /** 595 * Execute query in context of index reader 596 * It also initializes necessary internal structures 597 * 598 * @param Zend_Search_Lucene_Interface $reader 599 */ 600 public function execute(Zend_Search_Lucene_Interface $reader) 601 { 602 // Initialize weight if it's not done yet 603 $this->_initWeight($reader); 604 605 foreach ($this->_subqueries as $subquery) { 606 $subquery->execute($reader); 607 } 608 609 if ($this->_signs === null) { 610 $this->_calculateConjunctionResult(); 611 } else { 612 $this->_calculateNonConjunctionResult(); 613 } 614 } 615 616 617 618 /** 619 * Get document ids likely matching the query 620 * 621 * It's an array with document ids as keys (performance considerations) 622 * 623 * @return array 624 */ 625 public function matchedDocs() 626 { 627 return $this->_resVector; 628 } 629 630 /** 631 * Score specified document 632 * 633 * @param integer $docId 634 * @param Zend_Search_Lucene_Interface $reader 635 * @return float 636 */ 637 public function score($docId, Zend_Search_Lucene_Interface $reader) 638 { 639 if (isset($this->_resVector[$docId])) { 640 if ($this->_signs === null) { 641 return $this->_conjunctionScore($docId, $reader); 642 } else { 643 return $this->_nonConjunctionScore($docId, $reader); 644 } 645 } else { 646 return 0; 647 } 648 } 649 650 /** 651 * Return query terms 652 * 653 * @return array 654 */ 655 public function getQueryTerms() 656 { 657 $terms = array(); 658 659 foreach ($this->_subqueries as $id => $subquery) { 660 if ($this->_signs === null || $this->_signs[$id] !== false) { 661 $terms = array_merge($terms, $subquery->getQueryTerms()); 662 } 663 } 664 665 return $terms; 666 } 667 668 /** 669 * Highlight query terms 670 * 671 * @param integer &$colorIndex 672 * @param Zend_Search_Lucene_Document_Html $doc 673 */ 674 public function highlightMatchesDOM(Zend_Search_Lucene_Document_Html $doc, &$colorIndex) 675 { 676 foreach ($this->_subqueries as $id => $subquery) { 677 if ($this->_signs === null || $this->_signs[$id] !== false) { 678 $subquery->highlightMatchesDOM($doc, $colorIndex); 679 } 680 } 681 } 682 683 /** 684 * Print a query 685 * 686 * @return string 687 */ 688 public function __toString() 689 { 690 // It's used only for query visualisation, so we don't care about characters escaping 691 692 $query = ''; 693 694 foreach ($this->_subqueries as $id => $subquery) { 695 if ($id != 0) { 696 $query .= ' '; 697 } 698 699 if ($this->_signs === null || $this->_signs[$id] === true) { 700 $query .= '+'; 701 } else if ($this->_signs[$id] === false) { 702 $query .= '-'; 703 } 704 705 $query .= '(' . $subquery->__toString() . ')'; 706 707 if ($subquery->getBoost() != 1) { 708 $query .= '^' . $subquery->getBoost(); 709 } 710 } 711 712 return $query; 713 } 714 } 715
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
| Generated: Wed Jan 14 11:33:29 2009 | Cross-referenced by PHPXref 0.7 |