[ Index ]

PHP Cross Reference of Moodle 1.9.3 [Build 15-Oct-2008]

title

Body

[close]

/search/documents/ -> physical_htm.php (source)

   1  <?php
   2  /**
   3  * Global Search Engine for Moodle
   4  *
   5  * @package search
   6  * @category core
   7  * @subpackage document_wrappers
   8  * @author Valery Fremaux [valery.fremaux@club-internet.fr] > 1.8
   9  * @date 2008/03/31
  10  * @license http://www.gnu.org/copyleft/gpl.html GNU Public License
  11  *
  12  * this is a format handler for getting text out of a proprietary binary format 
  13  * so it can be indexed by Lucene search engine
  14  */
  15  
  16  /**
  17  * @param object $resource
  18  * @uses CFG, USER
  19  */
  20  function get_text_for_indexing_htm(&$resource){
  21      global $CFG, $USER;
  22      
  23      // SECURITY : do not allow non admin execute anything on system !!
  24      if (!isadmin($USER->id)) return;
  25  
  26      // just get text
  27      $text = implode('', file("{$CFG->dataroot}/{$resource->course}/{$resource->reference}"));
  28  
  29      // extract keywords and other interesting meta information and put it back as real content for indexing
  30      if (preg_match('/(.*)<meta ([^>]*)>(.*)/is', $text, $matches)){
  31          $prefix = $matches[1];
  32          $meta_attributes = $matches[2];
  33          $suffix = $matches[3];
  34          if (preg_match('/name="(keywords|description)"/i', $meta_attributes)){
  35              preg_match('/content="([^"]+)"/i', $meta_attributes, $matches);
  36              $text = $prefix.' '.$matches[1].' '.$suffix;
  37          }
  38      }
  39      // brutally filters all html tags
  40      $text = preg_replace("/<[^>]*>/", '', $text);
  41      $text = preg_replace("/<!--[^>]*-->/", '', $text);
  42      $text = html_entity_decode($text, ENT_COMPAT, 'UTF-8');
  43      $text = mb_convert_encoding($text, 'UTF-8', 'AUTO');
  44      
  45      /*
  46      * debug code for tracing input
  47      echo "<hr/>";
  48      $FILE = fopen("filetrace.log", 'w');
  49      fwrite($FILE, $text);
  50      fclose($FILE);
  51      echo "<hr/>";
  52      */
  53      
  54      if (!empty($CFG->block_search_limit_index_body)){
  55          $text = shorten($text, $CFG->block_search_limit_index_body);
  56      }
  57      return $text;
  58  }
  59  ?>


Generated: Wed Jan 14 11:33:29 2009 Cross-referenced by PHPXref 0.7