| [ Index ] |
PHP Cross Reference of Moodle 1.9.3 [Build 15-Oct-2008] |
[Summary view] [Print] [Text view]
1 <?php 2 /** 3 * Zend Framework 4 * 5 * LICENSE 6 * 7 * This source file is subject to the new BSD license that is bundled 8 * with this package in the file LICENSE.txt. 9 * It is also available through the world-wide-web at this URL: 10 * http://framework.zend.com/license/new-bsd 11 * If you did not receive a copy of the license and are unable to 12 * obtain it through the world-wide-web, please send an email 13 * to license@zend.com so we can send you a copy immediately. 14 * 15 * @category Zend 16 * @package Zend_Search_Lucene 17 * @subpackage Search 18 * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com) 19 * @license http://framework.zend.com/license/new-bsd New BSD License 20 */ 21 22 23 /** Zend_Search_Lucene_Search_Similarity_Default */ 24 require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/Similarity/Default.php'; 25 26 27 /** 28 * @category Zend 29 * @package Zend_Search_Lucene 30 * @subpackage Search 31 * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com) 32 * @license http://framework.zend.com/license/new-bsd New BSD License 33 */ 34 abstract class Zend_Search_Lucene_Search_Similarity 35 { 36 /** 37 * The Similarity implementation used by default. 38 * 39 * @var Zend_Search_Lucene_Search_Similarity 40 */ 41 private static $_defaultImpl; 42 43 /** 44 * Cache of decoded bytes. 45 * Array of floats 46 * 47 * @var array 48 */ 49 private static $_normTable = array( 0 => 0.0, 50 1 => 5.820766E-10, 51 2 => 6.9849193E-10, 52 3 => 8.1490725E-10, 53 4 => 9.313226E-10, 54 5 => 1.1641532E-9, 55 6 => 1.3969839E-9, 56 7 => 1.6298145E-9, 57 8 => 1.8626451E-9, 58 9 => 2.3283064E-9, 59 10 => 2.7939677E-9, 60 11 => 3.259629E-9, 61 12 => 3.7252903E-9, 62 13 => 4.656613E-9, 63 14 => 5.5879354E-9, 64 15 => 6.519258E-9, 65 16 => 7.4505806E-9, 66 17 => 9.313226E-9, 67 18 => 1.1175871E-8, 68 19 => 1.3038516E-8, 69 20 => 1.4901161E-8, 70 21 => 1.8626451E-8, 71 22 => 2.2351742E-8, 72 23 => 2.6077032E-8, 73 24 => 2.9802322E-8, 74 25 => 3.7252903E-8, 75 26 => 4.4703484E-8, 76 27 => 5.2154064E-8, 77 28 => 5.9604645E-8, 78 29 => 7.4505806E-8, 79 30 => 8.940697E-8, 80 31 => 1.0430813E-7, 81 32 => 1.1920929E-7, 82 33 => 1.4901161E-7, 83 34 => 1.7881393E-7, 84 35 => 2.0861626E-7, 85 36 => 2.3841858E-7, 86 37 => 2.9802322E-7, 87 38 => 3.5762787E-7, 88 39 => 4.172325E-7, 89 40 => 4.7683716E-7, 90 41 => 5.9604645E-7, 91 42 => 7.1525574E-7, 92 43 => 8.34465E-7, 93 44 => 9.536743E-7, 94 45 => 1.1920929E-6, 95 46 => 1.4305115E-6, 96 47 => 1.66893E-6, 97 48 => 1.9073486E-6, 98 49 => 2.3841858E-6, 99 50 => 2.861023E-6, 100 51 => 3.33786E-6, 101 52 => 3.8146973E-6, 102 53 => 4.7683716E-6, 103 54 => 5.722046E-6, 104 55 => 6.67572E-6, 105 56 => 7.6293945E-6, 106 57 => 9.536743E-6, 107 58 => 1.1444092E-5, 108 59 => 1.335144E-5, 109 60 => 1.5258789E-5, 110 61 => 1.9073486E-5, 111 62 => 2.2888184E-5, 112 63 => 2.670288E-5, 113 64 => 3.0517578E-5, 114 65 => 3.8146973E-5, 115 66 => 4.5776367E-5, 116 67 => 5.340576E-5, 117 68 => 6.1035156E-5, 118 69 => 7.6293945E-5, 119 70 => 9.1552734E-5, 120 71 => 1.0681152E-4, 121 72 => 1.2207031E-4, 122 73 => 1.5258789E-4, 123 74 => 1.8310547E-4, 124 75 => 2.1362305E-4, 125 76 => 2.4414062E-4, 126 77 => 3.0517578E-4, 127 78 => 3.6621094E-4, 128 79 => 4.272461E-4, 129 80 => 4.8828125E-4, 130 81 => 6.1035156E-4, 131 82 => 7.324219E-4, 132 83 => 8.544922E-4, 133 84 => 9.765625E-4, 134 85 => 0.0012207031, 135 86 => 0.0014648438, 136 87 => 0.0017089844, 137 88 => 0.001953125, 138 89 => 0.0024414062, 139 90 => 0.0029296875, 140 91 => 0.0034179688, 141 92 => 0.00390625, 142 93 => 0.0048828125, 143 94 => 0.005859375, 144 95 => 0.0068359375, 145 96 => 0.0078125, 146 97 => 0.009765625, 147 98 => 0.01171875, 148 99 => 0.013671875, 149 100 => 0.015625, 150 101 => 0.01953125, 151 102 => 0.0234375, 152 103 => 0.02734375, 153 104 => 0.03125, 154 105 => 0.0390625, 155 106 => 0.046875, 156 107 => 0.0546875, 157 108 => 0.0625, 158 109 => 0.078125, 159 110 => 0.09375, 160 111 => 0.109375, 161 112 => 0.125, 162 113 => 0.15625, 163 114 => 0.1875, 164 115 => 0.21875, 165 116 => 0.25, 166 117 => 0.3125, 167 118 => 0.375, 168 119 => 0.4375, 169 120 => 0.5, 170 121 => 0.625, 171 122 => 0.75, 172 123 => 0.875, 173 124 => 1.0, 174 125 => 1.25, 175 126 => 1.5, 176 127 => 1.75, 177 128 => 2.0, 178 129 => 2.5, 179 130 => 3.0, 180 131 => 3.5, 181 132 => 4.0, 182 133 => 5.0, 183 134 => 6.0, 184 135 => 7.0, 185 136 => 8.0, 186 137 => 10.0, 187 138 => 12.0, 188 139 => 14.0, 189 140 => 16.0, 190 141 => 20.0, 191 142 => 24.0, 192 143 => 28.0, 193 144 => 32.0, 194 145 => 40.0, 195 146 => 48.0, 196 147 => 56.0, 197 148 => 64.0, 198 149 => 80.0, 199 150 => 96.0, 200 151 => 112.0, 201 152 => 128.0, 202 153 => 160.0, 203 154 => 192.0, 204 155 => 224.0, 205 156 => 256.0, 206 157 => 320.0, 207 158 => 384.0, 208 159 => 448.0, 209 160 => 512.0, 210 161 => 640.0, 211 162 => 768.0, 212 163 => 896.0, 213 164 => 1024.0, 214 165 => 1280.0, 215 166 => 1536.0, 216 167 => 1792.0, 217 168 => 2048.0, 218 169 => 2560.0, 219 170 => 3072.0, 220 171 => 3584.0, 221 172 => 4096.0, 222 173 => 5120.0, 223 174 => 6144.0, 224 175 => 7168.0, 225 176 => 8192.0, 226 177 => 10240.0, 227 178 => 12288.0, 228 179 => 14336.0, 229 180 => 16384.0, 230 181 => 20480.0, 231 182 => 24576.0, 232 183 => 28672.0, 233 184 => 32768.0, 234 185 => 40960.0, 235 186 => 49152.0, 236 187 => 57344.0, 237 188 => 65536.0, 238 189 => 81920.0, 239 190 => 98304.0, 240 191 => 114688.0, 241 192 => 131072.0, 242 193 => 163840.0, 243 194 => 196608.0, 244 195 => 229376.0, 245 196 => 262144.0, 246 197 => 327680.0, 247 198 => 393216.0, 248 199 => 458752.0, 249 200 => 524288.0, 250 201 => 655360.0, 251 202 => 786432.0, 252 203 => 917504.0, 253 204 => 1048576.0, 254 205 => 1310720.0, 255 206 => 1572864.0, 256 207 => 1835008.0, 257 208 => 2097152.0, 258 209 => 2621440.0, 259 210 => 3145728.0, 260 211 => 3670016.0, 261 212 => 4194304.0, 262 213 => 5242880.0, 263 214 => 6291456.0, 264 215 => 7340032.0, 265 216 => 8388608.0, 266 217 => 1.048576E7, 267 218 => 1.2582912E7, 268 219 => 1.4680064E7, 269 220 => 1.6777216E7, 270 221 => 2.097152E7, 271 222 => 2.5165824E7, 272 223 => 2.9360128E7, 273 224 => 3.3554432E7, 274 225 => 4.194304E7, 275 226 => 5.0331648E7, 276 227 => 5.8720256E7, 277 228 => 6.7108864E7, 278 229 => 8.388608E7, 279 230 => 1.00663296E8, 280 231 => 1.17440512E8, 281 232 => 1.34217728E8, 282 233 => 1.6777216E8, 283 234 => 2.01326592E8, 284 235 => 2.34881024E8, 285 236 => 2.68435456E8, 286 237 => 3.3554432E8, 287 238 => 4.02653184E8, 288 239 => 4.69762048E8, 289 240 => 5.3687091E8, 290 241 => 6.7108864E8, 291 242 => 8.0530637E8, 292 243 => 9.395241E8, 293 244 => 1.07374182E9, 294 245 => 1.34217728E9, 295 246 => 1.61061274E9, 296 247 => 1.87904819E9, 297 248 => 2.14748365E9, 298 249 => 2.68435456E9, 299 250 => 3.22122547E9, 300 251 => 3.75809638E9, 301 252 => 4.2949673E9, 302 253 => 5.3687091E9, 303 254 => 6.4424509E9, 304 255 => 7.5161928E9 ); 305 306 307 /** 308 * Set the default Similarity implementation used by indexing and search 309 * code. 310 * 311 * @param Zend_Search_Lucene_Search_Similarity $similarity 312 */ 313 public static function setDefault(Zend_Search_Lucene_Search_Similarity $similarity) 314 { 315 self::$_defaultImpl = $similarity; 316 } 317 318 319 /** 320 * Return the default Similarity implementation used by indexing and search 321 * code. 322 * 323 * @return Zend_Search_Lucene_Search_Similarity 324 */ 325 public static function getDefault() 326 { 327 if (!self::$_defaultImpl instanceof Zend_Search_Lucene_Search_Similarity) { 328 self::$_defaultImpl = new Zend_Search_Lucene_Search_Similarity_Default(); 329 } 330 331 return self::$_defaultImpl; 332 } 333 334 335 /** 336 * Computes the normalization value for a field given the total number of 337 * terms contained in a field. These values, together with field boosts, are 338 * stored in an index and multipled into scores for hits on each field by the 339 * search code. 340 * 341 * Matches in longer fields are less precise, so implemenations of this 342 * method usually return smaller values when 'numTokens' is large, 343 * and larger values when 'numTokens' is small. 344 * 345 * That these values are computed under 346 * IndexWriter::addDocument(Document) and stored then using 347 * encodeNorm(float). Thus they have limited precision, and documents 348 * must be re-indexed if this method is altered. 349 * 350 * fieldName - name of field 351 * numTokens - the total number of tokens contained in fields named 352 * 'fieldName' of 'doc'. 353 * Returns a normalization factor for hits on this field of this document 354 * 355 * @param string $fieldName 356 * @param integer $numTokens 357 * @return float 358 */ 359 abstract public function lengthNorm($fieldName, $numTokens); 360 361 /** 362 * Computes the normalization value for a query given the sum of the squared 363 * weights of each of the query terms. This value is then multipled into the 364 * weight of each query term. 365 * 366 * This does not affect ranking, but rather just attempts to make scores 367 * from different queries comparable. 368 * 369 * sumOfSquaredWeights - the sum of the squares of query term weights 370 * Returns a normalization factor for query weights 371 * 372 * @param float $sumOfSquaredWeights 373 * @return float 374 */ 375 abstract public function queryNorm($sumOfSquaredWeights); 376 377 378 /** 379 * Decodes a normalization factor stored in an index. 380 * 381 * @param integer $byte 382 * @return float 383 */ 384 public static function decodeNorm($byte) 385 { 386 return self::$_normTable[$byte & 0xFF]; 387 } 388 389 390 /** 391 * Encodes a normalization factor for storage in an index. 392 * 393 * The encoding uses a five-bit exponent and three-bit mantissa, thus 394 * representing values from around 7x10^9 to 2x10^-9 with about one 395 * significant decimal digit of accuracy. Zero is also represented. 396 * Negative numbers are rounded up to zero. Values too large to represent 397 * are rounded down to the largest representable value. Positive values too 398 * small to represent are rounded up to the smallest positive representable 399 * value. 400 * 401 * @param float $f 402 * @return integer 403 */ 404 static function encodeNorm($f) 405 { 406 return self::_floatToByte($f); 407 } 408 409 /** 410 * Float to byte conversion 411 * 412 * @param integer $b 413 * @return float 414 */ 415 private static function _floatToByte($f) 416 { 417 // round negatives up to zero 418 if ($f <= 0.0) { 419 return 0; 420 } 421 422 // search for appropriate value 423 $lowIndex = 0; 424 $highIndex = 255; 425 while ($highIndex >= $lowIndex) { 426 // $mid = ($highIndex - $lowIndex)/2; 427 $mid = ($highIndex + $lowIndex) >> 1; 428 $delta = $f - self::$_normTable[$mid]; 429 430 if ($delta < 0) { 431 $highIndex = $mid-1; 432 } elseif ($delta > 0) { 433 $lowIndex = $mid+1; 434 } else { 435 return $mid; // We got it! 436 } 437 } 438 439 // round to closest value 440 if ($highIndex != 255 && 441 $f - self::$_normTable[$highIndex] > self::$_normTable[$highIndex+1] - $f ) { 442 return $highIndex + 1; 443 } else { 444 return $highIndex; 445 } 446 } 447 448 449 /** 450 * Computes a score factor based on a term or phrase's frequency in a 451 * document. This value is multiplied by the idf(Term, Searcher) 452 * factor for each term in the query and these products are then summed to 453 * form the initial score for a document. 454 * 455 * Terms and phrases repeated in a document indicate the topic of the 456 * document, so implementations of this method usually return larger values 457 * when 'freq' is large, and smaller values when 'freq' 458 * is small. 459 * 460 * freq - the frequency of a term within a document 461 * Returns a score factor based on a term's within-document frequency 462 * 463 * @param float $freq 464 * @return float 465 */ 466 abstract public function tf($freq); 467 468 /** 469 * Computes the amount of a sloppy phrase match, based on an edit distance. 470 * This value is summed for each sloppy phrase match in a document to form 471 * the frequency that is passed to tf(float). 472 * 473 * A phrase match with a small edit distance to a document passage more 474 * closely matches the document, so implementations of this method usually 475 * return larger values when the edit distance is small and smaller values 476 * when it is large. 477 * 478 * distance - the edit distance of this sloppy phrase match 479 * Returns the frequency increment for this match 480 * 481 * @param integer $distance 482 * @return float 483 */ 484 abstract public function sloppyFreq($distance); 485 486 487 /** 488 * Computes a score factor for a simple term or a phrase. 489 * 490 * The default implementation is: 491 * return idfFreq(searcher.docFreq(term), searcher.maxDoc()); 492 * 493 * input - the term in question or array of terms 494 * reader - reader the document collection being searched 495 * Returns a score factor for the term 496 * 497 * @param mixed $input 498 * @param Zend_Search_Lucene_Interface $reader 499 * @return a score factor for the term 500 */ 501 public function idf($input, Zend_Search_Lucene_Interface $reader) 502 { 503 if (!is_array($input)) { 504 return $this->idfFreq($reader->docFreq($input), $reader->count()); 505 } else { 506 $idf = 0.0; 507 foreach ($input as $term) { 508 $idf += $this->idfFreq($reader->docFreq($term), $reader->count()); 509 } 510 return $idf; 511 } 512 } 513 514 /** 515 * Computes a score factor based on a term's document frequency (the number 516 * of documents which contain the term). This value is multiplied by the 517 * tf(int) factor for each term in the query and these products are 518 * then summed to form the initial score for a document. 519 * 520 * Terms that occur in fewer documents are better indicators of topic, so 521 * implemenations of this method usually return larger values for rare terms, 522 * and smaller values for common terms. 523 * 524 * docFreq - the number of documents which contain the term 525 * numDocs - the total number of documents in the collection 526 * Returns a score factor based on the term's document frequency 527 * 528 * @param integer $docFreq 529 * @param integer $numDocs 530 * @return float 531 */ 532 abstract public function idfFreq($docFreq, $numDocs); 533 534 /** 535 * Computes a score factor based on the fraction of all query terms that a 536 * document contains. This value is multiplied into scores. 537 * 538 * The presence of a large portion of the query terms indicates a better 539 * match with the query, so implemenations of this method usually return 540 * larger values when the ratio between these parameters is large and smaller 541 * values when the ratio between them is small. 542 * 543 * overlap - the number of query terms matched in the document 544 * maxOverlap - the total number of terms in the query 545 * Returns a score factor based on term overlap with the query 546 * 547 * @param integer $overlap 548 * @param integer $maxOverlap 549 * @return float 550 */ 551 abstract public function coord($overlap, $maxOverlap); 552 } 553
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
| Generated: Wed Jan 14 11:33:29 2009 | Cross-referenced by PHPXref 0.7 |