| [ Index ] |
PHP Cross Reference of Moodle 1.9.3 [Build 15-Oct-2008] |
[Summary view] [Print] [Text view]
1 <?php 2 3 /*************************************************************** 4 * Library to convert HTML into an approximate text equivalent * 5 *************************************************************** 6 7 Version: 1.0.3 (with modifications) 8 Copyright 2003 Mark Wilton-Jones 9 License: HowToCreate script license with written permission 10 URL: http://www.howtocreate.co.uk/php/ 11 12 For full details about the script and to get the latest version, 13 please see the HowToCreate web site above. 14 15 This version contains modifications for Moodle. In each case the 16 lines are marked with "Moodle", so you can see what has changed. 17 18 ********************************************************************/ 19 20 function html2text( $badStr ) { 21 22 $is_open_tb = false; 23 $is_open_dq = false; 24 $is_open_sq = false; 25 26 //remove comments 27 28 while (substr_count($badStr, '<!--') && 29 substr_count($badStr, '-->') && 30 strpos($badStr, '-->', strpos($badStr, '<!--' ) ) > strpos( $badStr, '<!--' ) ) { 31 $badStr = substr( $badStr, 0, strpos( $badStr, '<!--' ) ) . 32 substr( $badStr, strpos( $badStr, '-->', 33 strpos( $badStr, '<!--' ) ) + 3 ); 34 } 35 36 //now make sure all HTML tags are correctly written (> not in between quotes) 37 38 $len = strlen($badStr); // Moodle 39 $chr = $badStr{0}; // Moodle 40 $goodStr = ''; // Moodle 41 42 if ($len > 0) { // Moodle 43 for ($x=0; $x < $len; $x++ ) { // Moodle 44 $chr = $badStr{$x}; //take each letter in turn and check if that character is permitted there 45 switch ( $chr ) { 46 case '<': 47 if ( !$is_open_tb && strtolower( substr( $badStr, $x + 1, 5 ) ) == 'style' ) { 48 $x = strpos( strtolower( $badStr ), '</style>', $x ) + 7; // Moodle 49 $chr = ''; 50 } else if ( !$is_open_tb && strtolower( substr( $badStr, $x + 1, 6 ) ) == 'script' ) { 51 $x = strpos( strtolower( $badStr ), '</script>', $x ) + 8; // Moodle 52 $chr = ''; 53 } else if (!$is_open_tb) { 54 $is_open_tb = true; 55 } else { 56 $chr = '<'; 57 } 58 break; 59 60 case '>': 61 if ( !$is_open_tb || $is_open_dq || $is_open_sq ) { 62 $chr = '>'; 63 } else { 64 $is_open_tb = false; 65 } 66 break; 67 68 case '"': 69 if ( $is_open_tb && !$is_open_dq && !$is_open_sq ) { 70 $is_open_dq = true; 71 } else if ( $is_open_tb && $is_open_dq && !$is_open_sq ) { 72 $is_open_dq = false; 73 } else { 74 $chr = '"'; 75 } 76 break; 77 78 case "'": 79 if ( $is_open_tb && !$is_open_dq && !$is_open_sq ) { 80 $is_open_sq = true; 81 } else if ( $is_open_tb && !$is_open_dq && $is_open_sq ) { 82 $is_open_sq = false; 83 } 84 break; 85 } 86 $goodStr .= $chr; 87 } 88 } // Moodle 89 90 //now that the page is valid (I hope) for strip_tags, strip all unwanted tags 91 92 $goodStr = strip_tags( $goodStr, '<title><hr><h1><h2><h3><h4><h5><h6><div><p><pre><sup><ul><ol><br><dl><dt><table><caption><tr><li><dd><th><td><a><area><img><form><input><textarea><button><select><option>' ); 93 94 //strip extra whitespace except between <pre> and <textarea> tags 95 96 $badStr = preg_split( "/<\/?pre[^>]*>/i", $goodStr ); 97 98 for ( $x = 0; isset($badStr[$x]) && is_string( $badStr[$x] ); $x++ ) { // Moodle: added isset() test 99 if ( $x % 2 ) { $badStr[$x] = '<pre>'.$badStr[$x].'</pre>'; } else { 100 $goodStr = preg_split( "/<\/?textarea[^>]*>/i", $badStr[$x] ); 101 for ( $z = 0; isset($goodStr[$z]) && is_string( $goodStr[$z] ); $z++ ) { // Moodle: added isset() test 102 if ( $z % 2 ) { $goodStr[$z] = '<textarea>'.$goodStr[$z].'</textarea>'; } else { 103 $goodStr[$z] = str_replace(' ', ' ', $goodStr[$z] ); 104 } 105 } 106 $badStr[$x] = implode('',$goodStr); 107 } 108 } 109 110 $goodStr = implode('',$badStr); 111 112 //remove all options from select inputs 113 114 $goodStr = preg_replace( "/<option[^>]*>[^<]*/i", '', $goodStr ); 115 116 //replace all tags with their text equivalents 117 118 $goodStr = preg_replace( "/<(\/title|hr)[^>]*>/i", "\n --------------------\n", $goodStr ); 119 120 $goodStr = preg_replace( "/<(h|div|p)[^>]*>/i", "\n\n", $goodStr ); 121 122 $goodStr = preg_replace( "/<sup[^>]*>/i", '^', $goodStr ); 123 124 $goodStr = preg_replace( "/<(ul|ol|br|dl|dt|table|caption|\/textarea|tr[^>]*>\s*<(td|th))[^>]*>/i", "\n", $goodStr ); 125 126 $goodStr = preg_replace( "/<li[^>]*>/i", "\n� ", $goodStr ); 127 128 $goodStr = preg_replace( "/<dd[^>]*>/i", "\n\t", $goodStr ); 129 130 $goodStr = preg_replace( "/<(th|td)[^>]*>/i", "\t", $goodStr ); 131 132 // $goodStr = preg_replace( "/<a[^>]* href=(\"((?!\"|#|javascript:)[^\"#]*)(\"|#)|'((?!'|#|javascript:)[^'#]*)('|#)|((?!'|\"|>|#|javascript:)[^#\"'> ]*))[^>]*>/i", "[LINK: $2$4$6] ", $goodStr ); // Moodle 133 $goodStr = preg_replace( "/<a\s[^>]*href=(\"((?!\"|#|javascript:)[^\"#]*)(\"|#)|'((?!'|#|javascript:)[^'#]*)('|#)|((?!'|\"|>|#|javascript:)[^#\"'> ]*))[^>]*>([^<]*)<\/a>/i", "$7 [$2$4$6]", $goodStr ); 134 135 // $goodStr = preg_replace( "/<img[^>]* alt=(\"([^\"]+)\"|'([^']+)'|([^\"'> ]+))[^>]*>/i", "[IMAGE: $2$3$4] ", $goodStr ); // Moodle 136 $goodStr = preg_replace( "/<img[^>]* alt=(\"([^\"]+)\"|'([^']+)'|([^\"'> ]+))[^>]*>/i", "[$2$3$4] ", $goodStr ); 137 138 $goodStr = preg_replace( "/<form[^>]* action=(\"([^\"]+)\"|'([^']+)'|([^\"'> ]+))[^>]*>/i", "\n[FORM: $2$3$4] ", $goodStr ); 139 140 $goodStr = preg_replace( "/<(input|textarea|button|select)[^>]*>/i", "[INPUT] ", $goodStr ); 141 142 //strip all remaining tags (mostly closing tags) 143 144 $goodStr = strip_tags( $goodStr ); 145 146 //convert HTML entities 147 148 $goodStr = strtr( $goodStr, array_flip( get_html_translation_table( HTML_ENTITIES ) ) ); 149 150 preg_replace( "/&#(\d+);/me", "chr('$1')", $goodStr ); 151 152 //wordwrap 153 154 // $goodStr = wordwrap( $goodStr ); // Moodle 155 $goodStr = wordwrap( $goodStr, 78 ); 156 157 //make sure there are no more than 3 linebreaks in a row and trim whitespace 158 $goodStr = preg_replace("/\r\n?|\f/", "\n", $goodStr); 159 $goodStr = preg_replace("/\n(\s*\n){2}/", "\n\n\n", $goodStr); 160 $goodStr = preg_replace("/[ \t]+(\n|$)/", "$1", $goodStr); 161 $goodStr = preg_replace("/^\n*|\n*$/", '', $goodStr); 162 163 return $goodStr; 164 165 } 166 167 ?>
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
| Generated: Wed Jan 14 11:33:29 2009 | Cross-referenced by PHPXref 0.7 |