| [ Index ] |
PHP Cross Reference of Moodle 1.9.3 [Build 15-Oct-2008] |
[Summary view] [Print] [Text view]
1 <?php 2 3 # kses 0.2.2 - HTML/XHTML filter that only allows some elements and attributes 4 # Copyright (C) 2002, 2003, 2005 Ulf Harnhammar 5 # 6 # This program is free software and open source software; you can redistribute 7 # it and/or modify it under the terms of the GNU General Public License as 8 # published by the Free Software Foundation; either version 2 of the License, 9 # or (at your option) any later version. 10 # 11 # This program is distributed in the hope that it will be useful, but WITHOUT 12 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 14 # more details. 15 # 16 # You should have received a copy of the GNU General Public License along 17 # with this program; if not, write to the Free Software Foundation, Inc., 18 # 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA or visit 19 # http://www.gnu.org/licenses/gpl.html 20 # 21 # *** CONTACT INFORMATION *** 22 # 23 # E-mail: metaur at users dot sourceforge dot net 24 # Web page: http://sourceforge.net/projects/kses 25 # Paper mail: Ulf Harnhammar 26 # Ymergatan 17 C 27 # 753 25 Uppsala 28 # SWEDEN 29 # 30 # [kses strips evil scripts!] 31 32 33 function kses($string, $allowed_html, $allowed_protocols = 34 array('http', 'https', 'ftp', 'news', 'nntp', 'telnet', 35 'gopher', 'mailto')) 36 ############################################################################### 37 # This function makes sure that only the allowed HTML element names, attribute 38 # names and attribute values plus only sane HTML entities will occur in 39 # $string. You have to remove any slashes from PHP's magic quotes before you 40 # call this function. 41 ############################################################################### 42 { 43 $string = kses_no_null($string); 44 $string = kses_js_entities($string); 45 $string = kses_normalize_entities($string); 46 $string = kses_hook($string); 47 $allowed_html_fixed = kses_array_lc($allowed_html); 48 return kses_split($string, $allowed_html_fixed, $allowed_protocols); 49 } # function kses 50 51 52 function kses_hook($string) 53 ############################################################################### 54 # You add any kses hooks here. 55 ############################################################################### 56 { 57 return $string; 58 } # function kses_hook 59 60 61 function kses_version() 62 ############################################################################### 63 # This function returns kses' version number. 64 ############################################################################### 65 { 66 return '0.2.2'; 67 } # function kses_version 68 69 70 function kses_split($string, $allowed_html, $allowed_protocols) 71 ############################################################################### 72 # This function searches for HTML tags, no matter how malformed. It also 73 # matches stray ">" characters. 74 ############################################################################### 75 { 76 return preg_replace('%(<'. # EITHER: < 77 '[^>]*'. # things that aren't > 78 '(>|$)'. # > or end of string 79 '|>)%e', # OR: just a > 80 "kses_split2('\\1', \$allowed_html, ". 81 '$allowed_protocols)', 82 $string); 83 } # function kses_split 84 85 86 function kses_split2($string, $allowed_html, $allowed_protocols) 87 ############################################################################### 88 # This function does a lot of work. It rejects some very malformed things 89 # like <:::>. It returns an empty string, if the element isn't allowed (look 90 # ma, no strip_tags()!). Otherwise it splits the tag into an element and an 91 # attribute list. 92 ############################################################################### 93 { 94 $string = kses_stripslashes($string); 95 96 if (substr($string, 0, 1) != '<') 97 return '>'; 98 # It matched a ">" character 99 100 if (!preg_match('%^<\s*(/\s*)?([a-zA-Z0-9]+)([^>]*)>?$%', $string, $matches)) 101 return ''; 102 # It's seriously malformed 103 104 $slash = trim($matches[1]); 105 $elem = $matches[2]; 106 $attrlist = $matches[3]; 107 108 if (!@isset($allowed_html[strtolower($elem)])) 109 return ''; 110 # They are using a not allowed HTML element 111 112 if ($slash != '') 113 return "<$slash$elem>"; 114 # No attributes are allowed for closing elements 115 116 return kses_attr("$slash$elem", $attrlist, $allowed_html, 117 $allowed_protocols); 118 } # function kses_split2 119 120 121 function kses_attr($element, $attr, $allowed_html, $allowed_protocols) 122 ############################################################################### 123 # This function removes all attributes, if none are allowed for this element. 124 # If some are allowed it calls kses_hair() to split them further, and then it 125 # builds up new HTML code from the data that kses_hair() returns. It also 126 # removes "<" and ">" characters, if there are any left. One more thing it 127 # does is to check if the tag has a closing XHTML slash, and if it does, 128 # it puts one in the returned code as well. 129 ############################################################################### 130 { 131 # Is there a closing XHTML slash at the end of the attributes? 132 133 $xhtml_slash = ''; 134 if (preg_match('%\s/\s*$%', $attr)) 135 $xhtml_slash = ' /'; 136 137 # Are any attributes allowed at all for this element? 138 139 if (@count($allowed_html[strtolower($element)]) == 0) 140 return "<$element$xhtml_slash>"; 141 142 # Split it 143 144 $attrarr = kses_hair($attr, $allowed_protocols); 145 146 # Go through $attrarr, and save the allowed attributes for this element 147 # in $attr2 148 149 $attr2 = ''; 150 151 foreach ($attrarr as $arreach) 152 { 153 if (!@isset($allowed_html[strtolower($element)] 154 [strtolower($arreach['name'])])) 155 continue; # the attribute is not allowed 156 157 $current = $allowed_html[strtolower($element)] 158 [strtolower($arreach['name'])]; 159 160 if (!is_array($current)) 161 $attr2 .= ' '.$arreach['whole']; 162 # there are no checks 163 164 else 165 { 166 # there are some checks 167 $ok = true; 168 foreach ($current as $currkey => $currval) 169 if (!kses_check_attr_val($arreach['value'], $arreach['vless'], 170 $currkey, $currval)) 171 { $ok = false; break; } 172 173 if ($ok) 174 $attr2 .= ' '.$arreach['whole']; # it passed them 175 } # if !is_array($current) 176 } # foreach 177 178 # Remove any "<" or ">" characters 179 180 $attr2 = preg_replace('/[<>]/', '', $attr2); 181 182 return "<$element$attr2$xhtml_slash>"; 183 } # function kses_attr 184 185 186 function kses_hair($attr, $allowed_protocols) 187 ############################################################################### 188 # This function does a lot of work. It parses an attribute list into an array 189 # with attribute data, and tries to do the right thing even if it gets weird 190 # input. It will add quotes around attribute values that don't have any quotes 191 # or apostrophes around them, to make it easier to produce HTML code that will 192 # conform to W3C's HTML specification. It will also remove bad URL protocols 193 # from attribute values. 194 ############################################################################### 195 { 196 $attrarr = array(); 197 $mode = 0; 198 $attrname = ''; 199 200 # Loop through the whole attribute list 201 202 while (strlen($attr) != 0) 203 { 204 $working = 0; # Was the last operation successful? 205 206 switch ($mode) 207 { 208 case 0: # attribute name, href for instance 209 210 if (preg_match('/^([-a-zA-Z]+)/', $attr, $match)) 211 { 212 $attrname = $match[1]; 213 $working = $mode = 1; 214 $attr = preg_replace('/^[-a-zA-Z]+/', '', $attr); 215 } 216 217 break; 218 219 case 1: # equals sign or valueless ("selected") 220 221 if (preg_match('/^\s*=\s*/', $attr)) # equals sign 222 { 223 $working = 1; $mode = 2; 224 $attr = preg_replace('/^\s*=\s*/', '', $attr); 225 break; 226 } 227 228 if (preg_match('/^\s+/', $attr)) # valueless 229 { 230 $working = 1; $mode = 0; 231 $attrarr[] = array 232 ('name' => $attrname, 233 'value' => '', 234 'whole' => $attrname, 235 'vless' => 'y'); 236 $attr = preg_replace('/^\s+/', '', $attr); 237 } 238 239 break; 240 241 case 2: # attribute value, a URL after href= for instance 242 243 if (preg_match('/^"([^"]*)"(\s+|$)/', $attr, $match)) 244 # "value" 245 { 246 $thisval = kses_bad_protocol($match[1], $allowed_protocols); 247 248 $attrarr[] = array 249 ('name' => $attrname, 250 'value' => $thisval, 251 'whole' => "$attrname=\"$thisval\"", 252 'vless' => 'n'); 253 $working = 1; $mode = 0; 254 $attr = preg_replace('/^"[^"]*"(\s+|$)/', '', $attr); 255 break; 256 } 257 258 if (preg_match("/^'([^']*)'(\s+|$)/", $attr, $match)) 259 # 'value' 260 { 261 $thisval = kses_bad_protocol($match[1], $allowed_protocols); 262 263 $attrarr[] = array 264 ('name' => $attrname, 265 'value' => $thisval, 266 'whole' => "$attrname='$thisval'", 267 'vless' => 'n'); 268 $working = 1; $mode = 0; 269 $attr = preg_replace("/^'[^']*'(\s+|$)/", '', $attr); 270 break; 271 } 272 273 if (preg_match("%^([^\s\"']+)(\s+|$)%", $attr, $match)) 274 # value 275 { 276 $thisval = kses_bad_protocol($match[1], $allowed_protocols); 277 278 $attrarr[] = array 279 ('name' => $attrname, 280 'value' => $thisval, 281 'whole' => "$attrname=\"$thisval\"", 282 'vless' => 'n'); 283 # We add quotes to conform to W3C's HTML spec. 284 $working = 1; $mode = 0; 285 $attr = preg_replace("%^[^\s\"']+(\s+|$)%", '', $attr); 286 } 287 288 break; 289 } # switch 290 291 if ($working == 0) # not well formed, remove and try again 292 { 293 $attr = kses_html_error($attr); 294 $mode = 0; 295 } 296 } # while 297 298 if ($mode == 1) 299 # special case, for when the attribute list ends with a valueless 300 # attribute like "selected" 301 $attrarr[] = array 302 ('name' => $attrname, 303 'value' => '', 304 'whole' => $attrname, 305 'vless' => 'y'); 306 307 return $attrarr; 308 } # function kses_hair 309 310 311 function kses_check_attr_val($value, $vless, $checkname, $checkvalue) 312 ############################################################################### 313 # This function performs different checks for attribute values. The currently 314 # implemented checks are "maxlen", "minlen", "maxval", "minval" and "valueless" 315 # with even more checks to come soon. 316 ############################################################################### 317 { 318 $ok = true; 319 320 switch (strtolower($checkname)) 321 { 322 case 'maxlen': 323 # The maxlen check makes sure that the attribute value has a length not 324 # greater than the given value. This can be used to avoid Buffer Overflows 325 # in WWW clients and various Internet servers. 326 327 if (strlen($value) > $checkvalue) 328 $ok = false; 329 break; 330 331 case 'minlen': 332 # The minlen check makes sure that the attribute value has a length not 333 # smaller than the given value. 334 335 if (strlen($value) < $checkvalue) 336 $ok = false; 337 break; 338 339 case 'maxval': 340 # The maxval check does two things: it checks that the attribute value is 341 # an integer from 0 and up, without an excessive amount of zeroes or 342 # whitespace (to avoid Buffer Overflows). It also checks that the attribute 343 # value is not greater than the given value. 344 # This check can be used to avoid Denial of Service attacks. 345 346 if (!preg_match('/^\s{0,6}[0-9]{1,6}\s{0,6}$/', $value)) 347 $ok = false; 348 if ($value > $checkvalue) 349 $ok = false; 350 break; 351 352 case 'minval': 353 # The minval check checks that the attribute value is a positive integer, 354 # and that it is not smaller than the given value. 355 356 if (!preg_match('/^\s{0,6}[0-9]{1,6}\s{0,6}$/', $value)) 357 $ok = false; 358 if ($value < $checkvalue) 359 $ok = false; 360 break; 361 362 case 'valueless': 363 # The valueless check checks if the attribute has a value 364 # (like <a href="blah">) or not (<option selected>). If the given value 365 # is a "y" or a "Y", the attribute must not have a value. 366 # If the given value is an "n" or an "N", the attribute must have one. 367 368 if (strtolower($checkvalue) != $vless) 369 $ok = false; 370 break; 371 } # switch 372 373 return $ok; 374 } # function kses_check_attr_val 375 376 377 function kses_bad_protocol($string, $allowed_protocols) 378 ############################################################################### 379 # This function removes all non-allowed protocols from the beginning of 380 # $string. It ignores whitespace and the case of the letters, and it does 381 # understand HTML entities. It does its work in a while loop, so it won't be 382 # fooled by a string like "javascript:javascript:alert(57)". 383 ############################################################################### 384 { 385 $string = kses_no_null($string); 386 $string = preg_replace('/([^\xc3-\xcf])\xad+/', '\\1', $string); # deals with Opera "feature" -- moodle utf8 fix 387 $string2 = $string.'a'; 388 389 while ($string != $string2) 390 { 391 $string2 = $string; 392 $string = kses_bad_protocol_once($string, $allowed_protocols); 393 } # while 394 395 return $string; 396 } # function kses_bad_protocol 397 398 399 function kses_no_null($string) 400 ############################################################################### 401 # This function removes any NULL characters in $string. 402 ############################################################################### 403 { 404 $string = preg_replace('/\0+/', '', $string); 405 $string = preg_replace('/(\\\\0)+/', '', $string); 406 407 return $string; 408 } # function kses_no_null 409 410 411 function kses_stripslashes($string) 412 ############################################################################### 413 # This function changes the character sequence \" to just " 414 # It leaves all other slashes alone. It's really weird, but the quoting from 415 # preg_replace(//e) seems to require this. 416 ############################################################################### 417 { 418 return preg_replace('%\\\\"%', '"', $string); 419 } # function kses_stripslashes 420 421 422 function kses_array_lc($inarray) 423 ############################################################################### 424 # This function goes through an array, and changes the keys to all lower case. 425 ############################################################################### 426 { 427 $outarray = array(); 428 429 foreach ($inarray as $inkey => $inval) 430 { 431 $outkey = strtolower($inkey); 432 $outarray[$outkey] = array(); 433 434 foreach ($inval as $inkey2 => $inval2) 435 { 436 $outkey2 = strtolower($inkey2); 437 $outarray[$outkey][$outkey2] = $inval2; 438 } # foreach $inval 439 } # foreach $inarray 440 441 return $outarray; 442 } # function kses_array_lc 443 444 445 function kses_js_entities($string) 446 ############################################################################### 447 # This function removes the HTML JavaScript entities found in early versions of 448 # Netscape 4. 449 ############################################################################### 450 { 451 return preg_replace('%&\s*\{[^}]*(\}\s*;?|$)%', '', $string); 452 } # function kses_js_entities 453 454 455 function kses_html_error($string) 456 ############################################################################### 457 # This function deals with parsing errors in kses_hair(). The general plan is 458 # to remove everything to and including some whitespace, but it deals with 459 # quotes and apostrophes as well. 460 ############################################################################### 461 { 462 return preg_replace('/^("[^"]*("|$)|\'[^\']*(\'|$)|\S)*\s*/', '', $string); 463 } # function kses_html_error 464 465 466 function kses_bad_protocol_once($string, $allowed_protocols) 467 ############################################################################### 468 # This function searches for URL protocols at the beginning of $string, while 469 # handling whitespace and HTML entities. 470 ############################################################################### 471 { 472 $string2 = preg_split('/:|:|:/i', $string, 2); 473 if(isset($string2[1]) && !preg_match('%/\?%',$string2[0])) 474 { 475 $string = kses_bad_protocol_once2($string2[0],$allowed_protocols).trim($string2[1]); 476 } 477 return $string; 478 } # function kses_bad_protocol_once 479 480 481 function kses_bad_protocol_once2($string, $allowed_protocols) 482 ############################################################################### 483 # This function processes URL protocols, checks to see if they're in the white- 484 # list or not, and returns different data depending on the answer. 485 ############################################################################### 486 { 487 $string2 = kses_decode_entities($string); 488 $string2 = preg_replace('/\s/', '', $string2); 489 $string2 = kses_no_null($string2); 490 $string2 = preg_replace('/\xad+/', '', $string2); 491 # deals with Opera "feature" 492 $string2 = strtolower($string2); 493 494 $allowed = false; 495 foreach ($allowed_protocols as $one_protocol) 496 if (strtolower($one_protocol) == $string2) 497 { 498 $allowed = true; 499 break; 500 } 501 502 if ($allowed) 503 return "$string2:"; 504 else 505 return ''; 506 } # function kses_bad_protocol_once2 507 508 509 function kses_normalize_entities($string) 510 ############################################################################### 511 # This function normalizes HTML entities. It will convert "AT&T" to the correct 512 # "AT&T", ":" to ":", "&#XYZZY;" to "&#XYZZY;" and so on. 513 ############################################################################### 514 { 515 # Disarm all entities by converting & to & 516 517 $string = str_replace('&', '&', $string); 518 519 # Change back the allowed entities in our entity whitelist 520 521 $string = preg_replace('/&([A-Za-z][A-Za-z0-9]{0,19});/', 522 '&\\1;', $string); 523 $string = preg_replace('/&#0*([0-9]{1,5});/e', 524 'kses_normalize_entities2("\\1")', $string); 525 $string = preg_replace('/&#([Xx])0*(([0-9A-Fa-f]{2}){1,2});/', 526 '&#\\1\\2;', $string); 527 528 return $string; 529 } # function kses_normalize_entities 530 531 532 function kses_normalize_entities2($i) 533 ############################################################################### 534 # This function helps kses_normalize_entities() to only accept 16 bit values 535 # and nothing more for &#number; entities. 536 ############################################################################### 537 { 538 return (($i > 65535) ? "&#$i;" : "&#$i;"); 539 } # function kses_normalize_entities2 540 541 542 function kses_decode_entities($string) 543 ############################################################################### 544 # This function decodes numeric HTML entities (A and A). It doesn't 545 # do anything with other entities like ä, but we don't need them in the 546 # URL protocol whitelisting system anyway. 547 ############################################################################### 548 { 549 $string = preg_replace('/&#([0-9]+);/e', 'chr("\\1")', $string); 550 $string = preg_replace('/&#[Xx]([0-9A-Fa-f]+);/e', 'chr(hexdec("\\1"))', 551 $string); 552 553 return $string; 554 } # function kses_decode_entities 555 556 ?>
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
| Generated: Wed Jan 14 11:33:29 2009 | Cross-referenced by PHPXref 0.7 |