| [ Index ] |
PHP Cross Reference of Moodle 1.9.3 [Build 15-Oct-2008] |
[Summary view] [Print] [Text view]
1 <?php 2 # 3 # Markdown Extra - A text-to-HTML conversion tool for web writers 4 # 5 # PHP Markdown & Extra 6 # Copyright (c) 2004-2007 Michel Fortin 7 # <http://www.michelf.com/projects/php-markdown/> 8 # 9 # Original Markdown 10 # Copyright (c) 2004-2006 John Gruber 11 # <http://daringfireball.net/projects/markdown/> 12 # 13 14 15 define( 'MARKDOWN_VERSION', "1.0.1j" ); # Tue 4 Sep 2007 16 define( 'MARKDOWNEXTRA_VERSION', "1.1.6" ); # Tue 4 Sep 2007 17 18 19 # 20 # Global default settings: 21 # 22 23 # Change to ">" for HTML output 24 @define( 'MARKDOWN_EMPTY_ELEMENT_SUFFIX', " />"); 25 26 # Define the width of a tab for code blocks. 27 @define( 'MARKDOWN_TAB_WIDTH', 4 ); 28 29 # Optional title attribute for footnote links and backlinks. 30 @define( 'MARKDOWN_FN_LINK_TITLE', "" ); 31 @define( 'MARKDOWN_FN_BACKLINK_TITLE', "" ); 32 33 # Optional class attribute for footnote links and backlinks. 34 @define( 'MARKDOWN_FN_LINK_CLASS', "" ); 35 @define( 'MARKDOWN_FN_BACKLINK_CLASS', "" ); 36 37 38 # 39 # WordPress settings: 40 # 41 42 # Change to false to remove Markdown from posts and/or comments. 43 @define( 'MARKDOWN_WP_POSTS', true ); 44 @define( 'MARKDOWN_WP_COMMENTS', true ); 45 46 47 48 ### Standard Function Interface ### 49 50 @define( 'MARKDOWN_PARSER_CLASS', 'MarkdownExtra_Parser' ); 51 52 function Markdown($text) { 53 # 54 # Initialize the parser and return the result of its transform method. 55 # 56 # Setup static parser variable. 57 static $parser; 58 if (!isset($parser)) { 59 $parser_class = MARKDOWN_PARSER_CLASS; 60 $parser = new $parser_class; 61 } 62 63 # Transform text using parser. 64 return $parser->transform($text); 65 } 66 67 68 ### WordPress Plugin Interface ### 69 70 /* 71 Plugin Name: Markdown Extra 72 Plugin URI: http://www.michelf.com/projects/php-markdown/ 73 Description: <a href="http://daringfireball.net/projects/markdown/syntax">Markdown syntax</a> allows you to write using an easy-to-read, easy-to-write plain text format. Based on the original Perl version by <a href="http://daringfireball.net/">John Gruber</a>. <a href="http://www.michelf.com/projects/php-markdown/">More...</a> 74 Version: 1.1.6 75 Author: Michel Fortin 76 Author URI: http://www.michelf.com/ 77 */ 78 79 if (isset($wp_version)) { 80 # More details about how it works here: 81 # <http://www.michelf.com/weblog/2005/wordpress-text-flow-vs-markdown/> 82 83 # Post content and excerpts 84 # - Remove WordPress paragraph generator. 85 # - Run Markdown on excerpt, then remove all tags. 86 # - Add paragraph tag around the excerpt, but remove it for the excerpt rss. 87 if (MARKDOWN_WP_POSTS) { 88 remove_filter('the_content', 'wpautop'); 89 remove_filter('the_content_rss', 'wpautop'); 90 remove_filter('the_excerpt', 'wpautop'); 91 add_filter('the_content', 'Markdown', 6); 92 add_filter('the_content_rss', 'Markdown', 6); 93 add_filter('get_the_excerpt', 'Markdown', 6); 94 add_filter('get_the_excerpt', 'trim', 7); 95 add_filter('the_excerpt', 'mdwp_add_p'); 96 add_filter('the_excerpt_rss', 'mdwp_strip_p'); 97 98 remove_filter('content_save_pre', 'balanceTags', 50); 99 remove_filter('excerpt_save_pre', 'balanceTags', 50); 100 add_filter('the_content', 'balanceTags', 50); 101 add_filter('get_the_excerpt', 'balanceTags', 9); 102 } 103 104 # Comments 105 # - Remove WordPress paragraph generator. 106 # - Remove WordPress auto-link generator. 107 # - Scramble important tags before passing them to the kses filter. 108 # - Run Markdown on excerpt then remove paragraph tags. 109 if (MARKDOWN_WP_COMMENTS) { 110 remove_filter('comment_text', 'wpautop', 30); 111 remove_filter('comment_text', 'make_clickable'); 112 add_filter('pre_comment_content', 'Markdown', 6); 113 add_filter('pre_comment_content', 'mdwp_hide_tags', 8); 114 add_filter('pre_comment_content', 'mdwp_show_tags', 12); 115 add_filter('get_comment_text', 'Markdown', 6); 116 add_filter('get_comment_excerpt', 'Markdown', 6); 117 add_filter('get_comment_excerpt', 'mdwp_strip_p', 7); 118 119 global $mdwp_hidden_tags, $mdwp_placeholders; 120 $mdwp_hidden_tags = explode(' ', 121 '<p> </p> <pre> </pre> <ol> </ol> <ul> </ul> <li> </li>'); 122 $mdwp_placeholders = explode(' ', str_rot13( 123 'pEj07ZbbBZ U1kqgh4w4p pre2zmeN6K QTi31t9pre ol0MP1jzJR '. 124 'ML5IjmbRol ulANi1NsGY J7zRLJqPul liA8ctl16T K9nhooUHli')); 125 } 126 127 function mdwp_add_p($text) { 128 if (!preg_match('{^$|^<(p|ul|ol|dl|pre|blockquote)>}i', $text)) { 129 $text = '<p>'.$text.'</p>'; 130 $text = preg_replace('{\n{2,}}', "</p>\n\n<p>", $text); 131 } 132 return $text; 133 } 134 135 function mdwp_strip_p($t) { return preg_replace('{</?p>}i', '', $t); } 136 137 function mdwp_hide_tags($text) { 138 global $mdwp_hidden_tags, $mdwp_placeholders; 139 return str_replace($mdwp_hidden_tags, $mdwp_placeholders, $text); 140 } 141 function mdwp_show_tags($text) { 142 global $mdwp_hidden_tags, $mdwp_placeholders; 143 return str_replace($mdwp_placeholders, $mdwp_hidden_tags, $text); 144 } 145 } 146 147 148 ### bBlog Plugin Info ### 149 150 function identify_modifier_markdown() { 151 return array( 152 'name' => 'markdown', 153 'type' => 'modifier', 154 'nicename' => 'PHP Markdown Extra', 155 'description' => 'A text-to-HTML conversion tool for web writers', 156 'authors' => 'Michel Fortin and John Gruber', 157 'licence' => 'GPL', 158 'version' => MARKDOWNEXTRA_VERSION, 159 'help' => '<a href="http://daringfireball.net/projects/markdown/syntax">Markdown syntax</a> allows you to write using an easy-to-read, easy-to-write plain text format. Based on the original Perl version by <a href="http://daringfireball.net/">John Gruber</a>. <a href="http://www.michelf.com/projects/php-markdown/">More...</a>', 160 ); 161 } 162 163 164 ### Smarty Modifier Interface ### 165 166 function smarty_modifier_markdown($text) { 167 return Markdown($text); 168 } 169 170 171 ### Textile Compatibility Mode ### 172 173 # Rename this file to "classTextile.php" and it can replace Textile everywhere. 174 175 if (strcasecmp(substr(__FILE__, -16), "classTextile.php") == 0) { 176 # Try to include PHP SmartyPants. Should be in the same directory. 177 @include_once 'smartypants.php'; 178 # Fake Textile class. It calls Markdown instead. 179 class Textile { 180 function TextileThis($text, $lite='', $encode='') { 181 if ($lite == '' && $encode == '') $text = Markdown($text); 182 if (function_exists('SmartyPants')) $text = SmartyPants($text); 183 return $text; 184 } 185 # Fake restricted version: restrictions are not supported for now. 186 function TextileRestricted($text, $lite='', $noimage='') { 187 return $this->TextileThis($text, $lite); 188 } 189 # Workaround to ensure compatibility with TextPattern 4.0.3. 190 function blockLite($text) { return $text; } 191 } 192 } 193 194 195 196 # 197 # Markdown Parser Class 198 # 199 200 class Markdown_Parser { 201 202 # Regex to match balanced [brackets]. 203 # Needed to insert a maximum bracked depth while converting to PHP. 204 var $nested_brackets_depth = 6; 205 var $nested_brackets; 206 207 var $nested_url_parenthesis_depth = 4; 208 var $nested_url_parenthesis; 209 210 # Table of hash values for escaped characters: 211 var $escape_chars = '\`*_{}[]()>#+-.!'; 212 213 # Change to ">" for HTML output. 214 var $empty_element_suffix = MARKDOWN_EMPTY_ELEMENT_SUFFIX; 215 var $tab_width = MARKDOWN_TAB_WIDTH; 216 217 # Change to `true` to disallow markup or entities. 218 var $no_markup = false; 219 var $no_entities = false; 220 221 222 function Markdown_Parser() { 223 # 224 # Constructor function. Initialize appropriate member variables. 225 # 226 $this->_initDetab(); 227 228 $this->nested_brackets = 229 str_repeat('(?>[^\[\]]+|\[', $this->nested_brackets_depth). 230 str_repeat('\])*', $this->nested_brackets_depth); 231 232 $this->nested_url_parenthesis = 233 str_repeat('(?>[^()\s]+|\(', $this->nested_url_parenthesis_depth). 234 str_repeat('(?>\)))*', $this->nested_url_parenthesis_depth); 235 236 # Sort document, block, and span gamut in ascendent priority order. 237 asort($this->document_gamut); 238 asort($this->block_gamut); 239 asort($this->span_gamut); 240 } 241 242 243 # Internal hashes used during transformation. 244 var $urls = array(); 245 var $titles = array(); 246 var $html_hashes = array(); 247 248 # Status flag to avoid invalid nesting. 249 var $in_anchor = false; 250 251 252 function transform($text) { 253 # 254 # Main function. The order in which other subs are called here is 255 # essential. Link and image substitutions need to happen before 256 # _EscapeSpecialCharsWithinTagAttributes(), so that any *'s or _'s in the <a> 257 # and <img> tags get encoded. 258 # 259 # Clear the global hashes. If we don't clear these, you get conflicts 260 # from other articles when generating a page which contains more than 261 # one article (e.g. an index page that shows the N most recent 262 # articles): 263 $this->urls = array(); 264 $this->titles = array(); 265 $this->html_hashes = array(); 266 267 # Standardize line endings: 268 # DOS to Unix and Mac to Unix 269 $text = preg_replace('{\r\n?}', "\n", $text); 270 271 # Make sure $text ends with a couple of newlines: 272 $text .= "\n\n"; 273 274 # Convert all tabs to spaces. 275 $text = $this->detab($text); 276 277 # Turn block-level HTML blocks into hash entries 278 $text = $this->hashHTMLBlocks($text); 279 280 # Strip any lines consisting only of spaces and tabs. 281 # This makes subsequent regexen easier to write, because we can 282 # match consecutive blank lines with /\n+/ instead of something 283 # contorted like /[ ]*\n+/ . 284 $text = preg_replace('/^[ ]+$/m', '', $text); 285 286 # Run document gamut methods. 287 foreach ($this->document_gamut as $method => $priority) { 288 $text = $this->$method($text); 289 } 290 291 return $text . "\n"; 292 } 293 294 var $document_gamut = array( 295 # Strip link definitions, store in hashes. 296 "stripLinkDefinitions" => 20, 297 298 "runBasicBlockGamut" => 30, 299 ); 300 301 302 function stripLinkDefinitions($text) { 303 # 304 # Strips link definitions from text, stores the URLs and titles in 305 # hash references. 306 # 307 $less_than_tab = $this->tab_width - 1; 308 309 # Link defs are in the form: ^[id]: url "optional title" 310 $text = preg_replace_callback('{ 311 ^[ ]{0,'.$less_than_tab.'}\[(.+)\][ ]?: # id = $1 312 [ ]* 313 \n? # maybe *one* newline 314 [ ]* 315 <?(\S+?)>? # url = $2 316 [ ]* 317 \n? # maybe one newline 318 [ ]* 319 (?: 320 (?<=\s) # lookbehind for whitespace 321 ["(] 322 (.*?) # title = $3 323 [")] 324 [ ]* 325 )? # title is optional 326 (?:\n+|\Z) 327 }xm', 328 array(&$this, '_stripLinkDefinitions_callback'), 329 $text); 330 return $text; 331 } 332 function _stripLinkDefinitions_callback($matches) { 333 $link_id = strtolower($matches[1]); 334 $this->urls[$link_id] = $this->encodeAmpsAndAngles($matches[2]); 335 if (isset($matches[3])) 336 $this->titles[$link_id] = str_replace('"', '"', $matches[3]); 337 return ''; # String that will replace the block 338 } 339 340 341 function hashHTMLBlocks($text) { 342 if ($this->no_markup) return $text; 343 344 $less_than_tab = $this->tab_width - 1; 345 346 # Hashify HTML blocks: 347 # We only want to do this for block-level HTML tags, such as headers, 348 # lists, and tables. That's because we still want to wrap <p>s around 349 # "paragraphs" that are wrapped in non-block-level tags, such as anchors, 350 # phrase emphasis, and spans. The list of tags we're looking for is 351 # hard-coded: 352 # 353 # * List "a" is made of tags which can be both inline or block-level. 354 # These will be treated block-level when the start tag is alone on 355 # its line, otherwise they're not matched here and will be taken as 356 # inline later. 357 # * List "b" is made of tags which are always block-level; 358 # 359 $block_tags_a = 'ins|del'; 360 $block_tags_b = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|'. 361 'script|noscript|form|fieldset|iframe|math'; 362 363 # Regular expression for the content of a block tag. 364 $nested_tags_level = 4; 365 $attr = ' 366 (?> # optional tag attributes 367 \s # starts with whitespace 368 (?> 369 [^>"/]+ # text outside quotes 370 | 371 /+(?!>) # slash not followed by ">" 372 | 373 "[^"]*" # text inside double quotes (tolerate ">") 374 | 375 \'[^\']*\' # text inside single quotes (tolerate ">") 376 )* 377 )? 378 '; 379 $content = 380 str_repeat(' 381 (?> 382 [^<]+ # content without tag 383 | 384 <\2 # nested opening tag 385 '.$attr.' # attributes 386 (?> 387 /> 388 | 389 >', $nested_tags_level). # end of opening tag 390 '.*?'. # last level nested tag content 391 str_repeat(' 392 </\2\s*> # closing nested tag 393 ) 394 | 395 <(?!/\2\s*> # other tags with a different name 396 ) 397 )*', 398 $nested_tags_level); 399 $content2 = str_replace('\2', '\3', $content); 400 401 # First, look for nested blocks, e.g.: 402 # <div> 403 # <div> 404 # tags for inner block must be indented. 405 # </div> 406 # </div> 407 # 408 # The outermost tags must start at the left margin for this to match, and 409 # the inner nested divs must be indented. 410 # We need to do this before the next, more liberal match, because the next 411 # match will start at the first `<div>` and stop at the first `</div>`. 412 $text = preg_replace_callback('{(?> 413 (?> 414 (?<=\n\n) # Starting after a blank line 415 | # or 416 \A\n? # the beginning of the doc 417 ) 418 ( # save in $1 419 420 # Match from `\n<tag>` to `</tag>\n`, handling nested tags 421 # in between. 422 423 [ ]{0,'.$less_than_tab.'} 424 <('.$block_tags_b.')# start tag = $2 425 '.$attr.'> # attributes followed by > and \n 426 '.$content.' # content, support nesting 427 </\2> # the matching end tag 428 [ ]* # trailing spaces/tabs 429 (?=\n+|\Z) # followed by a newline or end of document 430 431 | # Special version for tags of group a. 432 433 [ ]{0,'.$less_than_tab.'} 434 <('.$block_tags_a.')# start tag = $3 435 '.$attr.'>[ ]*\n # attributes followed by > 436 '.$content2.' # content, support nesting 437 </\3> # the matching end tag 438 [ ]* # trailing spaces/tabs 439 (?=\n+|\Z) # followed by a newline or end of document 440 441 | # Special case just for <hr />. It was easier to make a special 442 # case than to make the other regex more complicated. 443 444 [ ]{0,'.$less_than_tab.'} 445 <(hr) # start tag = $2 446 \b # word break 447 ([^<>])*? # 448 /?> # the matching end tag 449 [ ]* 450 (?=\n{2,}|\Z) # followed by a blank line or end of document 451 452 | # Special case for standalone HTML comments: 453 454 [ ]{0,'.$less_than_tab.'} 455 (?s: 456 <!-- .*? --> 457 ) 458 [ ]* 459 (?=\n{2,}|\Z) # followed by a blank line or end of document 460 461 | # PHP and ASP-style processor instructions (<? and <%) 462 463 [ ]{0,'.$less_than_tab.'} 464 (?s: 465 <([?%]) # $2 466 .*? 467 \2> 468 ) 469 [ ]* 470 (?=\n{2,}|\Z) # followed by a blank line or end of document 471 472 ) 473 )}Sxmi', 474 array(&$this, '_hashHTMLBlocks_callback'), 475 $text); 476 477 return $text; 478 } 479 function _hashHTMLBlocks_callback($matches) { 480 $text = $matches[1]; 481 $key = $this->hashBlock($text); 482 return "\n\n$key\n\n"; 483 } 484 485 486 function hashPart($text, $boundary = 'X') { 487 # 488 # Called whenever a tag must be hashed when a function insert an atomic 489 # element in the text stream. Passing $text to through this function gives 490 # a unique text-token which will be reverted back when calling unhash. 491 # 492 # The $boundary argument specify what character should be used to surround 493 # the token. By convension, "B" is used for block elements that needs not 494 # to be wrapped into paragraph tags at the end, ":" is used for elements 495 # that are word separators and "S" is used for general span-level elements. 496 # 497 # Swap back any tag hash found in $text so we do not have to `unhash` 498 # multiple times at the end. 499 $text = $this->unhash($text); 500 501 # Then hash the block. 502 static $i = 0; 503 $key = "$boundary\x1A" . ++$i . $boundary; 504 $this->html_hashes[$key] = $text; 505 return $key; # String that will replace the tag. 506 } 507 508 509 function hashBlock($text) { 510 # 511 # Shortcut function for hashPart with block-level boundaries. 512 # 513 return $this->hashPart($text, 'B'); 514 } 515 516 517 var $block_gamut = array( 518 # 519 # These are all the transformations that form block-level 520 # tags like paragraphs, headers, and list items. 521 # 522 "doHeaders" => 10, 523 "doHorizontalRules" => 20, 524 525 "doLists" => 40, 526 "doCodeBlocks" => 50, 527 "doBlockQuotes" => 60, 528 ); 529 530 function runBlockGamut($text) { 531 # 532 # Run block gamut tranformations. 533 # 534 # We need to escape raw HTML in Markdown source before doing anything 535 # else. This need to be done for each block, and not only at the 536 # begining in the Markdown function since hashed blocks can be part of 537 # list items and could have been indented. Indented blocks would have 538 # been seen as a code block in a previous pass of hashHTMLBlocks. 539 $text = $this->hashHTMLBlocks($text); 540 541 return $this->runBasicBlockGamut($text); 542 } 543 544 function runBasicBlockGamut($text) { 545 # 546 # Run block gamut tranformations, without hashing HTML blocks. This is 547 # useful when HTML blocks are known to be already hashed, like in the first 548 # whole-document pass. 549 # 550 foreach ($this->block_gamut as $method => $priority) { 551 $text = $this->$method($text); 552 } 553 554 # Finally form paragraph and restore hashed blocks. 555 $text = $this->formParagraphs($text); 556 557 return $text; 558 } 559 560 561 function doHorizontalRules($text) { 562 # Do Horizontal Rules: 563 return preg_replace( 564 '{ 565 ^[ ]{0,3} # Leading space 566 ([*-_]) # $1: First marker 567 (?> # Repeated marker group 568 [ ]{0,2} # Zero, one, or two spaces. 569 \1 # Marker character 570 ){2,} # Group repeated at least twice 571 [ ]* # Tailing spaces 572 $ # End of line. 573 }mx', 574 "\n".$this->hashBlock("<hr$this->empty_element_suffix")."\n", 575 $text); 576 } 577 578 579 var $span_gamut = array( 580 # 581 # These are all the transformations that occur *within* block-level 582 # tags like paragraphs, headers, and list items. 583 # 584 # Process character escapes, code spans, and inline HTML 585 # in one shot. 586 "parseSpan" => -30, 587 588 # Process anchor and image tags. Images must come first, 589 # because ![foo][f] looks like an anchor. 590 "doImages" => 10, 591 "doAnchors" => 20, 592 593 # Make links out of things like `<http://example.com/>` 594 # Must come after doAnchors, because you can use < and > 595 # delimiters in inline links like [this](<url>). 596 "doAutoLinks" => 30, 597 "encodeAmpsAndAngles" => 40, 598 599 "doItalicsAndBold" => 50, 600 "doHardBreaks" => 60, 601 ); 602 603 function runSpanGamut($text) { 604 # 605 # Run span gamut tranformations. 606 # 607 foreach ($this->span_gamut as $method => $priority) { 608 $text = $this->$method($text); 609 } 610 611 return $text; 612 } 613 614 615 function doHardBreaks($text) { 616 # Do hard breaks: 617 return preg_replace_callback('/ {2,}\n/', 618 array(&$this, '_doHardBreaks_callback'), $text); 619 } 620 function _doHardBreaks_callback($matches) { 621 return $this->hashPart("<br$this->empty_element_suffix\n"); 622 } 623 624 625 function doAnchors($text) { 626 # 627 # Turn Markdown link shortcuts into XHTML <a> tags. 628 # 629 if ($this->in_anchor) return $text; 630 $this->in_anchor = true; 631 632 # 633 # First, handle reference-style links: [link text] [id] 634 # 635 $text = preg_replace_callback('{ 636 ( # wrap whole match in $1 637 \[ 638 ('.$this->nested_brackets.') # link text = $2 639 \] 640 641 [ ]? # one optional space 642 (?:\n[ ]*)? # one optional newline followed by spaces 643 644 \[ 645 (.*?) # id = $3 646 \] 647 ) 648 }xs', 649 array(&$this, '_doAnchors_reference_callback'), $text); 650 651 # 652 # Next, inline-style links: [link text](url "optional title") 653 # 654 $text = preg_replace_callback('{ 655 ( # wrap whole match in $1 656 \[ 657 ('.$this->nested_brackets.') # link text = $2 658 \] 659 \( # literal paren 660 [ ]* 661 (?: 662 <(\S*)> # href = $3 663 | 664 ('.$this->nested_url_parenthesis.') # href = $4 665 ) 666 [ ]* 667 ( # $5 668 ([\'"]) # quote char = $6 669 (.*?) # Title = $7 670 \6 # matching quote 671 [ ]* # ignore any spaces/tabs between closing quote and ) 672 )? # title is optional 673 \) 674 ) 675 }xs', 676 array(&$this, '_DoAnchors_inline_callback'), $text); 677 678 # 679 # Last, handle reference-style shortcuts: [link text] 680 # These must come last in case you've also got [link test][1] 681 # or [link test](/foo) 682 # 683 // $text = preg_replace_callback('{ 684 // ( # wrap whole match in $1 685 // \[ 686 // ([^\[\]]+) # link text = $2; can\'t contain [ or ] 687 // \] 688 // ) 689 // }xs', 690 // array(&$this, '_doAnchors_reference_callback'), $text); 691 692 $this->in_anchor = false; 693 return $text; 694 } 695 function _doAnchors_reference_callback($matches) { 696 $whole_match = $matches[1]; 697 $link_text = $matches[2]; 698 $link_id =& $matches[3]; 699 700 if ($link_id == "") { 701 # for shortcut links like [this][] or [this]. 702 $link_id = $link_text; 703 } 704 705 # lower-case and turn embedded newlines into spaces 706 $link_id = strtolower($link_id); 707 $link_id = preg_replace('{[ ]?\n}', ' ', $link_id); 708 709 if (isset($this->urls[$link_id])) { 710 $url = $this->urls[$link_id]; 711 $url = $this->encodeAmpsAndAngles($url); 712 713 $result = "<a href=\"$url\""; 714 if ( isset( $this->titles[$link_id] ) ) { 715 $title = $this->titles[$link_id]; 716 $title = $this->encodeAmpsAndAngles($title); 717 $result .= " title=\"$title\""; 718 } 719 720 $link_text = $this->runSpanGamut($link_text); 721 $result .= ">$link_text</a>"; 722 $result = $this->hashPart($result); 723 } 724 else { 725 $result = $whole_match; 726 } 727 return $result; 728 } 729 function _doAnchors_inline_callback($matches) { 730 $whole_match = $matches[1]; 731 $link_text = $this->runSpanGamut($matches[2]); 732 $url = $matches[3] == '' ? $matches[4] : $matches[3]; 733 $title =& $matches[7]; 734 735 $url = $this->encodeAmpsAndAngles($url); 736 737 $result = "<a href=\"$url\""; 738 if (isset($title)) { 739 $title = str_replace('"', '"', $title); 740 $title = $this->encodeAmpsAndAngles($title); 741 $result .= " title=\"$title\""; 742 } 743 744 $link_text = $this->runSpanGamut($link_text); 745 $result .= ">$link_text</a>"; 746 747 return $this->hashPart($result); 748 } 749 750 751 function doImages($text) { 752 # 753 # Turn Markdown image shortcuts into <img> tags. 754 # 755 # 756 # First, handle reference-style labeled images: ![alt text][id] 757 # 758 $text = preg_replace_callback('{ 759 ( # wrap whole match in $1 760 !\[ 761 ('.$this->nested_brackets.') # alt text = $2 762 \] 763 764 [ ]? # one optional space 765 (?:\n[ ]*)? # one optional newline followed by spaces 766 767 \[ 768 (.*?) # id = $3 769 \] 770 771 ) 772 }xs', 773 array(&$this, '_doImages_reference_callback'), $text); 774 775 # 776 # Next, handle inline images:  777 # Don't forget: encode * and _ 778 # 779 $text = preg_replace_callback('{ 780 ( # wrap whole match in $1 781 !\[ 782 ('.$this->nested_brackets.') # alt text = $2 783 \] 784 \s? # One optional whitespace character 785 \( # literal paren 786 [ ]* 787 (?: 788 <(\S*)> # src url = $3 789 | 790 ('.$this->nested_url_parenthesis.') # src url = $4 791 ) 792 [ ]* 793 ( # $5 794 ([\'"]) # quote char = $6 795 (.*?) # title = $7 796 \6 # matching quote 797 [ ]* 798 )? # title is optional 799 \) 800 ) 801 }xs', 802 array(&$this, '_doImages_inline_callback'), $text); 803 804 return $text; 805 } 806 function _doImages_reference_callback($matches) { 807 $whole_match = $matches[1]; 808 $alt_text = $matches[2]; 809 $link_id = strtolower($matches[3]); 810 811 if ($link_id == "") { 812 $link_id = strtolower($alt_text); # for shortcut links like ![this][]. 813 } 814 815 $alt_text = str_replace('"', '"', $alt_text); 816 if (isset($this->urls[$link_id])) { 817 $url = $this->urls[$link_id]; 818 $result = "<img src=\"$url\" alt=\"$alt_text\""; 819 if (isset($this->titles[$link_id])) { 820 $title = $this->titles[$link_id]; 821 $result .= " title=\"$title\""; 822 } 823 $result .= $this->empty_element_suffix; 824 $result = $this->hashPart($result); 825 } 826 else { 827 # If there's no such link ID, leave intact: 828 $result = $whole_match; 829 } 830 831 return $result; 832 } 833 function _doImages_inline_callback($matches) { 834 $whole_match = $matches[1]; 835 $alt_text = $matches[2]; 836 $url = $matches[3] == '' ? $matches[4] : $matches[3]; 837 $title =& $matches[7]; 838 839 $alt_text = str_replace('"', '"', $alt_text); 840 $result = "<img src=\"$url\" alt=\"$alt_text\""; 841 if (isset($title)) { 842 $title = str_replace('"', '"', $title); 843 $result .= " title=\"$title\""; # $title already quoted 844 } 845 $result .= $this->empty_element_suffix; 846 847 return $this->hashPart($result); 848 } 849 850 851 function doHeaders($text) { 852 # Setext-style headers: 853 # Header 1 854 # ======== 855 # 856 # Header 2 857 # -------- 858 # 859 $text = preg_replace_callback('{ ^(.+?)[ ]*\n(=+|-+)[ ]*\n+ }mx', 860 array(&$this, '_doHeaders_callback_setext'), $text); 861 862 # atx-style headers: 863 # # Header 1 864 # ## Header 2 865 # ## Header 2 with closing hashes ## 866 # ... 867 # ###### Header 6 868 # 869 $text = preg_replace_callback('{ 870 ^(\#{1,6}) # $1 = string of #\'s 871 [ ]* 872 (.+?) # $2 = Header text 873 [ ]* 874 \#* # optional closing #\'s (not counted) 875 \n+ 876 }xm', 877 array(&$this, '_doHeaders_callback_atx'), $text); 878 879 return $text; 880 } 881 function _doHeaders_callback_setext($matches) { 882 $level = $matches[2]{0} == '=' ? 1 : 2; 883 $block = "<h$level>".$this->runSpanGamut($matches[1])."</h$level>"; 884 return "\n" . $this->hashBlock($block) . "\n\n"; 885 } 886 function _doHeaders_callback_atx($matches) { 887 $level = strlen($matches[1]); 888 $block = "<h$level>".$this->runSpanGamut($matches[2])."</h$level>"; 889 return "\n" . $this->hashBlock($block) . "\n\n"; 890 } 891 892 893 function doLists($text) { 894 # 895 # Form HTML ordered (numbered) and unordered (bulleted) lists. 896 # 897 $less_than_tab = $this->tab_width - 1; 898 899 # Re-usable patterns to match list item bullets and number markers: 900 $marker_ul = '[*+-]'; 901 $marker_ol = '\d+[.]'; 902 $marker_any = "(?:$marker_ul|$marker_ol)"; 903 904 $markers = array($marker_ul, $marker_ol); 905 906 foreach ($markers as $marker) { 907 # Re-usable pattern to match any entirel ul or ol list: 908 $whole_list = ' 909 ( # $1 = whole list 910 ( # $2 911 [ ]{0,'.$less_than_tab.'} 912 ('.$marker.') # $3 = first list item marker 913 [ ]+ 914 ) 915 (?s:.+?) 916 ( # $4 917 \z 918 | 919 \n{2,} 920 (?=\S) 921 (?! # Negative lookahead for another list item marker 922 [ ]* 923 '.$marker.'[ ]+ 924 ) 925 ) 926 ) 927 '; // mx 928 929 # We use a different prefix before nested lists than top-level lists. 930 # See extended comment in _ProcessListItems(). 931 932 if ($this->list_level) { 933 $text = preg_replace_callback('{ 934 ^ 935 '.$whole_list.' 936 }mx', 937 array(&$this, '_doLists_callback'), $text); 938 } 939 else { 940 $text = preg_replace_callback('{ 941 (?:(?<=\n)\n|\A\n?) # Must eat the newline 942 '.$whole_list.' 943 }mx', 944 array(&$this, '_doLists_callback'), $text); 945 } 946 } 947 948 return $text; 949 } 950 function _doLists_callback($matches) { 951 # Re-usable patterns to match list item bullets and number markers: 952 $marker_ul = '[*+-]'; 953 $marker_ol = '\d+[.]'; 954 $marker_any = "(?:$marker_ul|$marker_ol)"; 955 956 $list = $matches[1]; 957 $list_type = preg_match("/$marker_ul/", $matches[3]) ? "ul" : "ol"; 958 959 $marker_any = ( $list_type == "ul" ? $marker_ul : $marker_ol ); 960 961 $list .= "\n"; 962 $result = $this->processListItems($list, $marker_any); 963 964 $result = $this->hashBlock("<$list_type>\n" . $result . "</$list_type>"); 965 return "\n". $result ."\n\n"; 966 } 967 968 var $list_level = 0; 969 970 function processListItems($list_str, $marker_any) { 971 # 972 # Process the contents of a single ordered or unordered list, splitting it 973 # into individual list items. 974 # 975 # The $this->list_level global keeps track of when we're inside a list. 976 # Each time we enter a list, we increment it; when we leave a list, 977 # we decrement. If it's zero, we're not in a list anymore. 978 # 979 # We do this because when we're not inside a list, we want to treat 980 # something like this: 981 # 982 # I recommend upgrading to version 983 # 8. Oops, now this line is treated 984 # as a sub-list. 985 # 986 # As a single paragraph, despite the fact that the second line starts 987 # with a digit-period-space sequence. 988 # 989 # Whereas when we're inside a list (or sub-list), that line will be 990 # treated as the start of a sub-list. What a kludge, huh? This is 991 # an aspect of Markdown's syntax that's hard to parse perfectly 992 # without resorting to mind-reading. Perhaps the solution is to 993 # change the syntax rules such that sub-lists must start with a 994 # starting cardinal number; e.g. "1." or "a.". 995 996 $this->list_level++; 997 998 # trim trailing blank lines: 999 $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str); 1000 1001 $list_str = preg_replace_callback('{ 1002 (\n)? # leading line = $1 1003 (^[ ]*) # leading whitespace = $2 1004 ('.$marker_any.') [ ]+ # list marker = $3 1005 ((?s:.+?)) # list item text = $4 1006 (?:(\n+(?=\n))|\n) # tailing blank line = $5 1007 (?= \n* (\z | \2 ('.$marker_any.') [ ]+)) 1008 }xm', 1009 array(&$this, '_processListItems_callback'), $list_str); 1010 1011 $this->list_level--; 1012 return $list_str; 1013 } 1014 function _processListItems_callback($matches) { 1015 $item = $matches[4]; 1016 $leading_line =& $matches[1]; 1017 $leading_space =& $matches[2]; 1018 $tailing_blank_line =& $matches[5]; 1019 1020 if ($leading_line || $tailing_blank_line || 1021 preg_match('/\n{2,}/', $item)) 1022 { 1023 $item = $this->runBlockGamut($this->outdent($item)."\n"); 1024 } 1025 else { 1026 # Recursion for sub-lists: 1027 $item = $this->doLists($this->outdent($item)); 1028 $item = preg_replace('/\n+$/', '', $item); 1029 $item = $this->runSpanGamut($item); 1030 } 1031 1032 return "<li>" . $item . "</li>\n"; 1033 } 1034 1035 1036 function doCodeBlocks($text) { 1037 # 1038 # Process Markdown `<pre><code>` blocks. 1039 # 1040 $text = preg_replace_callback('{ 1041 (?:\n\n|\A) 1042 ( # $1 = the code block -- one or more lines, starting with a space/tab 1043 (?> 1044 [ ]{'.$this->tab_width.'} # Lines must start with a tab or a tab-width of spaces 1045 .*\n+ 1046 )+ 1047 ) 1048 ((?=^[ ]{0,'.$this->tab_width.'}\S)|\Z) # Lookahead for non-space at line-start, or end of doc 1049 }xm', 1050 array(&$this, '_doCodeBlocks_callback'), $text); 1051 1052 return $text; 1053 } 1054 function _doCodeBlocks_callback($matches) { 1055 $codeblock = $matches[1]; 1056 1057 $codeblock = $this->outdent($codeblock); 1058 $codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES); 1059 1060 # trim leading newlines and trailing newlines 1061 $codeblock = preg_replace('/\A\n+|\n+\z/', '', $codeblock); 1062 1063 $codeblock = "<pre><code>$codeblock\n</code></pre>"; 1064 return "\n\n".$this->hashBlock($codeblock)."\n\n"; 1065 } 1066 1067 1068 function makeCodeSpan($code) { 1069 # 1070 # Create a code span markup for $code. Called from handleSpanToken. 1071 # 1072 $code = htmlspecialchars(trim($code), ENT_NOQUOTES); 1073 return $this->hashPart("<code>$code</code>"); 1074 } 1075 1076 1077 function doItalicsAndBold($text) { 1078 # <strong> must go first: 1079 $text = preg_replace_callback('{ 1080 ( # $1: Marker 1081 (?<!\*\*) \* | # (not preceded by two chars of 1082 (?<!__) _ # the same marker) 1083 ) 1084 \1 1085 (?=\S) # Not followed by whitespace 1086 (?!\1\1) # or two others marker chars. 1087 ( # $2: Content 1088 (?> 1089 [^*_]+? # Anthing not em markers. 1090 | 1091 # Balence any regular emphasis inside. 1092 \1 (?=\S) .+? (?<=\S) \1 1093 | 1094 . # Allow unbalenced * and _. 1095 )+? 1096 ) 1097 (?<=\S) \1\1 # End mark not preceded by whitespace. 1098 }sx', 1099 array(&$this, '_doItalicAndBold_strong_callback'), $text); 1100 # Then <em>: 1101 $text = preg_replace_callback( 1102 '{ ( (?<!\*)\* | (?<!_)_ ) (?=\S) (?! \1) (.+?) (?<=\S)(?<!\s(?=\1).) \1 }sx', 1103 array(&$this, '_doItalicAndBold_em_callback'), $text); 1104 1105 return $text; 1106 } 1107 function _doItalicAndBold_em_callback($matches) { 1108 $text = $matches[2]; 1109 $text = $this->runSpanGamut($text); 1110 return $this->hashPart("<em>$text</em>"); 1111 } 1112 function _doItalicAndBold_strong_callback($matches) { 1113 $text = $matches[2]; 1114 $text = $this->runSpanGamut($text); 1115 return $this->hashPart("<strong>$text</strong>"); 1116 } 1117 1118 1119 function doBlockQuotes($text) { 1120 $text = preg_replace_callback('/ 1121 ( # Wrap whole match in $1 1122 (?> 1123 ^[ ]*>[ ]? # ">" at the start of a line 1124 .+\n # rest of the first line 1125 (.+\n)* # subsequent consecutive lines 1126 \n* # blanks 1127 )+ 1128 ) 1129 /xm', 1130 array(&$this, '_doBlockQuotes_callback'), $text); 1131 1132 return $text; 1133 } 1134 function _doBlockQuotes_callback($matches) { 1135 $bq = $matches[1]; 1136 # trim one level of quoting - trim whitespace-only lines 1137 $bq = preg_replace('/^[ ]*>[ ]?|^[ ]+$/m', '', $bq); 1138 $bq = $this->runBlockGamut($bq); # recurse 1139 1140 $bq = preg_replace('/^/m', " ", $bq); 1141 # These leading spaces cause problem with <pre> content, 1142 # so we need to fix that: 1143 $bq = preg_replace_callback('{(\s*<pre>.+?</pre>)}sx', 1144 array(&$this, '_DoBlockQuotes_callback2'), $bq); 1145 1146 return "\n". $this->hashBlock("<blockquote>\n$bq\n</blockquote>")."\n\n"; 1147 } 1148 function _doBlockQuotes_callback2($matches) { 1149 $pre = $matches[1]; 1150 $pre = preg_replace('/^ /m', '', $pre); 1151 return $pre; 1152 } 1153 1154 1155 function formParagraphs($text) { 1156 # 1157 # Params: 1158 # $text - string to process with html <p> tags 1159 # 1160 # Strip leading and trailing lines: 1161 $text = preg_replace('/\A\n+|\n+\z/', '', $text); 1162 1163 $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY); 1164 1165 # 1166 # Wrap <p> tags and unhashify HTML blocks 1167 # 1168 foreach ($grafs as $key => $value) { 1169 if (!preg_match('/^B\x1A[0-9]+B$/', $value)) { 1170 # Is a paragraph. 1171 $value = $this->runSpanGamut($value); 1172 $value = preg_replace('/^([ ]*)/', "<p>", $value); 1173 $value .= "</p>"; 1174 $grafs[$key] = $this->unhash($value); 1175 } 1176 else { 1177 # Is a block. 1178 # Modify elements of @grafs in-place... 1179 $graf = $value; 1180 $block = $this->html_hashes[$graf]; 1181 $graf = $block; 1182 // if (preg_match('{ 1183 // \A 1184 // ( # $1 = <div> tag 1185 // <div \s+ 1186 // [^>]* 1187 // \b 1188 // markdown\s*=\s* ([\'"]) # $2 = attr quote char 1189 // 1 1190 // \2 1191 // [^>]* 1192 // > 1193 // ) 1194 // ( # $3 = contents 1195 // .* 1196 // ) 1197 // (</div>) # $4 = closing tag 1198 // \z 1199 // }xs', $block, $matches)) 1200 // { 1201 // list(, $div_open, , $div_content, $div_close) = $matches; 1202 // 1203 // # We can't call Markdown(), because that resets the hash; 1204 // # that initialization code should be pulled into its own sub, though. 1205 // $div_content = $this->hashHTMLBlocks($div_content); 1206 // 1207 // # Run document gamut methods on the content. 1208 // foreach ($this->document_gamut as $method => $priority) { 1209 // $div_content = $this->$method($div_content); 1210 // } 1211 // 1212 // $div_open = preg_replace( 1213 // '{\smarkdown\s*=\s*([\'"]).+?\1}', '', $div_open); 1214 // 1215 // $graf = $div_open . "\n" . $div_content . "\n" . $div_close; 1216 // } 1217 $grafs[$key] = $graf; 1218 } 1219 } 1220 1221 return implode("\n\n", $grafs); 1222 } 1223 1224 1225 function encodeAmpsAndAngles($text) { 1226 # Smart processing for ampersands and angle brackets that need to be encoded. 1227 if ($this->no_entities) { 1228 $text = str_replace('&', '&', $text); 1229 $text = str_replace('<', '<', $text); 1230 return $text; 1231 } 1232 1233 # Ampersand-encoding based entirely on Nat Irons's Amputator MT plugin: 1234 # http://bumppo.net/projects/amputator/ 1235 $text = preg_replace('/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/', 1236 '&', $text);; 1237 1238 # Encode naked <'s 1239 $text = preg_replace('{<(?![a-z/?\$!%])}i', '<', $text); 1240 1241 return $text; 1242 } 1243 1244 1245 function doAutoLinks($text) { 1246 $text = preg_replace_callback('{<((https?|ftp|dict):[^\'">\s]+)>}', 1247 array(&$this, '_doAutoLinks_url_callback'), $text); 1248 1249 # Email addresses: <address@domain.foo> 1250 $text = preg_replace_callback('{ 1251 < 1252 (?:mailto:)? 1253 ( 1254 [-.\w\x80-\xFF]+ 1255 \@ 1256 [-a-z0-9\x80-\xFF]+(\.[-a-z0-9\x80-\xFF]+)*\.[a-z]+ 1257 ) 1258 > 1259 }xi', 1260 array(&$this, '_doAutoLinks_email_callback'), $text); 1261 1262 return $text; 1263 } 1264 function _doAutoLinks_url_callback($matches) { 1265 $url = $this->encodeAmpsAndAngles($matches[1]); 1266 $link = "<a href=\"$url\">$url</a>"; 1267 return $this->hashPart($link); 1268 } 1269 function _doAutoLinks_email_callback($matches) { 1270 $address = $matches[1]; 1271 $link = $this->encodeEmailAddress($address); 1272 return $this->hashPart($link); 1273 } 1274 1275 1276 function encodeEmailAddress($addr) { 1277 # 1278 # Input: an email address, e.g. "foo@example.com" 1279 # 1280 # Output: the email address as a mailto link, with each character 1281 # of the address encoded as either a decimal or hex entity, in 1282 # the hopes of foiling most address harvesting spam bots. E.g.: 1283 # 1284 # <p><a href="mailto:foo 1285 # @example.co 1286 # m">foo@exampl 1287 # e.com</a></p> 1288 # 1289 # Based by a filter by Matthew Wickline, posted to BBEdit-Talk. 1290 # With some optimizations by Milian Wolff. 1291 # 1292 $addr = "mailto:" . $addr; 1293 $chars = preg_split('/(?<!^)(?!$)/', $addr); 1294 $seed = (int)abs(crc32($addr) / strlen($addr)); # Deterministic seed. 1295 1296 foreach ($chars as $key => $char) { 1297 $ord = ord($char); 1298 # Ignore non-ascii chars. 1299 if ($ord < 128) { 1300 $r = ($seed * (1 + $key)) % 100; # Pseudo-random function. 1301 # roughly 10% raw, 45% hex, 45% dec 1302 # '@' *must* be encoded. I insist. 1303 if ($r > 90 && $char != '@') /* do nothing */; 1304 else if ($r < 45) $chars[$key] = '&#x'.dechex($ord).';'; 1305 else $chars[$key] = '&#'.$ord.';'; 1306 } 1307 } 1308 1309 $addr = implode('', $chars); 1310 $text = implode('', array_slice($chars, 7)); # text without `mailto:` 1311 $addr = "<a href=\"$addr\">$text</a>"; 1312 1313 return $addr; 1314 } 1315 1316 1317 function parseSpan($str) { 1318 # 1319 # Take the string $str and parse it into tokens, hashing embeded HTML, 1320 # escaped characters and handling code spans. 1321 # 1322 $output = ''; 1323 1324 $regex = '{ 1325 ( 1326 \\\\['.preg_quote($this->escape_chars).'] 1327 | 1328 (?<![`\\\\]) 1329 `+ # code span marker 1330 '.( $this->no_markup ? '' : ' 1331 | 1332 <!-- .*? --> # comment 1333 | 1334 <\?.*?\?> | <%.*?%> # processing instruction 1335 | 1336 <[/!$]?[-a-zA-Z0-9:]+ # regular tags 1337 (?> 1338 \s 1339 (?>[^"\'>]+|"[^"]*"|\'[^\']*\')* 1340 )? 1341 > 1342 ').' 1343 ) 1344 }xs'; 1345 1346 while (1) { 1347 # 1348 # Each loop iteration seach for either the next tag, the next 1349 # openning code span marker, or the next escaped character. 1350 # Each token is then passed to handleSpanToken. 1351 # 1352 $parts = preg_split($regex, $str, 2, PREG_SPLIT_DELIM_CAPTURE); 1353 1354 # Create token from text preceding tag. 1355 if ($parts[0] != "") { 1356 $output .= $parts[0]; 1357 } 1358 1359 # Check if we reach the end. 1360 if (isset($parts[1])) { 1361 $output .= $this->handleSpanToken($parts[1], $parts[2]); 1362 $str = $parts[2]; 1363 } 1364 else { 1365 break; 1366 } 1367 } 1368 1369 return $output; 1370 } 1371 1372 1373 function handleSpanToken($token, &$str) { 1374 # 1375 # Handle $token provided by parseSpan by determining its nature and 1376 # returning the corresponding value that should replace it. 1377 # 1378 switch ($token{0}) { 1379 case "\\": 1380 return $this->hashPart("&#". ord($token{1}). ";"); 1381 case "`": 1382 # Search for end marker in remaining text. 1383 if (preg_match('/^(.*?[^`])'.$token.'(?!`)(.*)$/sm', 1384 $str, $matches)) 1385 { 1386 $str = $matches[2]; 1387 $codespan = $this->makeCodeSpan($matches[1]); 1388 return $this->hashPart($codespan); 1389 } 1390 return $token; // return as text since no ending marker found. 1391 default: 1392 return $this->hashPart($token); 1393 } 1394 } 1395 1396 1397 function outdent($text) { 1398 # 1399 # Remove one level of line-leading tabs or spaces 1400 # 1401 return preg_replace('/^(\t|[ ]{1,'.$this->tab_width.'})/m', '', $text); 1402 } 1403 1404 1405 # String length function for detab. `_initDetab` will create a function to 1406 # hanlde UTF-8 if the default function does not exist. 1407 var $utf8_strlen = 'mb_strlen'; 1408 1409 function detab($text) { 1410 # 1411 # Replace tabs with the appropriate amount of space. 1412 # 1413 # For each line we separate the line in blocks delemited by 1414 # tab characters. Then we reconstruct every line by adding the 1415 # appropriate number of space between each blocks. 1416 1417 $text = preg_replace_callback('/^.*\t.*$/m', 1418 array(&$this, '_detab_callback'), $text); 1419 1420 return $text; 1421 } 1422 function _detab_callback($matches) { 1423 $line = $matches[0]; 1424 $strlen = $this->utf8_strlen; # strlen function for UTF-8. 1425 1426 # Split in blocks. 1427 $blocks = explode("\t", $line); 1428 # Add each blocks to the line. 1429 $line = $blocks[0]; 1430 unset($blocks[0]); # Do not add first block twice. 1431 foreach ($blocks as $block) { 1432 # Calculate amount of space, insert spaces, insert block. 1433 $amount = $this->tab_width - 1434 $strlen($line, 'UTF-8') % $this->tab_width; 1435 $line .= str_repeat(" ", $amount) . $block; 1436 } 1437 return $line; 1438 } 1439 function _initDetab() { 1440 # 1441 # Check for the availability of the function in the `utf8_strlen` property 1442 # (initially `mb_strlen`). If the function is not available, create a 1443 # function that will loosely count the number of UTF-8 characters with a 1444 # regular expression. 1445 # 1446 if (function_exists($this->utf8_strlen)) return; 1447 $this->utf8_strlen = create_function('$text', 'return preg_match_all( 1448 "/[\\\\x00-\\\\xBF]|[\\\\xC0-\\\\xFF][\\\\x80-\\\\xBF]*/", 1449 $text, $m);'); 1450 } 1451 1452 1453 function unhash($text) { 1454 # 1455 # Swap back in all the tags hashed by _HashHTMLBlocks. 1456 # 1457 return preg_replace_callback('/(.)\x1A[0-9]+\1/', 1458 array(&$this, '_unhash_callback'), $text); 1459 } 1460 function _unhash_callback($matches) { 1461 return $this->html_hashes[$matches[0]]; 1462 } 1463 1464 } 1465 1466 1467 # 1468 # Markdown Extra Parser Class 1469 # 1470 1471 class MarkdownExtra_Parser extends Markdown_Parser { 1472 1473 # Prefix for footnote ids. 1474 var $fn_id_prefix = ""; 1475 1476 # Optional title attribute for footnote links and backlinks. 1477 var $fn_link_title = MARKDOWN_FN_LINK_TITLE; 1478 var $fn_backlink_title = MARKDOWN_FN_BACKLINK_TITLE; 1479 1480 # Optional class attribute for footnote links and backlinks. 1481 var $fn_link_class = MARKDOWN_FN_LINK_CLASS; 1482 var $fn_backlink_class = MARKDOWN_FN_BACKLINK_CLASS; 1483 1484 1485 function MarkdownExtra_Parser() { 1486 # 1487 # Constructor function. Initialize the parser object. 1488 # 1489 # Add extra escapable characters before parent constructor 1490 # initialize the table. 1491 $this->escape_chars .= ':|'; 1492 1493 # Insert extra document, block, and span transformations. 1494 # Parent constructor will do the sorting. 1495 $this->document_gamut += array( 1496 "stripFootnotes" => 15, 1497 "stripAbbreviations" => 25, 1498 "appendFootnotes" => 50, 1499 ); 1500 $this->block_gamut += array( 1501 "doTables" => 15, 1502 "doDefLists" => 45, 1503 ); 1504 $this->span_gamut += array( 1505 "doFootnotes" => 5, 1506 "doAbbreviations" => 70, 1507 ); 1508 1509 parent::Markdown_Parser(); 1510 } 1511 1512 1513 # Extra hashes used during extra transformations. 1514 var $footnotes = array(); 1515 var $footnotes_ordered = array(); 1516 var $abbr_desciptions = array(); 1517 var $abbr_matches = array(); 1518 1519 # Status flag to avoid invalid nesting. 1520 var $in_footnote = false; 1521 1522 1523 function transform($text) { 1524 # 1525 # Added clear to the new $html_hashes, reordered `hashHTMLBlocks` before 1526 # blank line stripping and added extra parameter to `runBlockGamut`. 1527 # 1528 # Clear the global hashes. If we don't clear these, you get conflicts 1529 # from other articles when generating a page which contains more than 1530 # one article (e.g. an index page that shows the N most recent 1531 # articles): 1532 $this->footnotes = array(); 1533 $this->footnotes_ordered = array(); 1534 $this->abbr_desciptions = array(); 1535 $this->abbr_matches = array(); 1536 1537 return parent::transform($text); 1538 } 1539 1540 1541 ### HTML Block Parser ### 1542 1543 # Tags that are always treated as block tags: 1544 var $block_tags = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|form|fieldset|iframe|hr|legend'; 1545 1546 # Tags treated as block tags only if the opening tag is alone on it's line: 1547 var $context_block_tags = 'script|noscript|math|ins|del'; 1548 1549 # Tags where markdown="1" default to span mode: 1550 var $contain_span_tags = 'p|h[1-6]|li|dd|dt|td|th|legend|address'; 1551 1552 # Tags which must not have their contents modified, no matter where 1553 # they appear: 1554 var $clean_tags = 'script|math'; 1555 1556 # Tags that do not need to be closed. 1557 var $auto_close_tags = 'hr|img'; 1558 1559 1560 function hashHTMLBlocks($text) { 1561 # 1562 # Hashify HTML Blocks and "clean tags". 1563 # 1564 # We only want to do this for block-level HTML tags, such as headers, 1565 # lists, and tables. That's because we still want to wrap <p>s around 1566 # "paragraphs" that are wrapped in non-block-level tags, such as anchors, 1567 # phrase emphasis, and spans. The list of tags we're looking for is 1568 # hard-coded. 1569 # 1570 # This works by calling _HashHTMLBlocks_InMarkdown, which then calls 1571 # _HashHTMLBlocks_InHTML when it encounter block tags. When the markdown="1" 1572 # attribute is found whitin a tag, _HashHTMLBlocks_InHTML calls back 1573 # _HashHTMLBlocks_InMarkdown to handle the Markdown syntax within the tag. 1574 # These two functions are calling each other. It's recursive! 1575 # 1576 # 1577 # Call the HTML-in-Markdown hasher. 1578 # 1579 list($text, ) = $this->_hashHTMLBlocks_inMarkdown($text); 1580 1581 return $text; 1582 } 1583 function _hashHTMLBlocks_inMarkdown($text, $indent = 0, 1584 $enclosing_tag = '', $span = false) 1585 { 1586 # 1587 # Parse markdown text, calling _HashHTMLBlocks_InHTML for block tags. 1588 # 1589 # * $indent is the number of space to be ignored when checking for code 1590 # blocks. This is important because if we don't take the indent into 1591 # account, something like this (which looks right) won't work as expected: 1592 # 1593 # <div> 1594 # <div markdown="1"> 1595 # Hello World. <-- Is this a Markdown code block or text? 1596 # </div> <-- Is this a Markdown code block or a real tag? 1597 # <div> 1598 # 1599 # If you don't like this, just don't indent the tag on which 1600 # you apply the markdown="1" attribute. 1601 # 1602 # * If $enclosing_tag is not empty, stops at the first unmatched closing 1603 # tag with that name. Nested tags supported. 1604 # 1605 # * If $span is true, text inside must treated as span. So any double 1606 # newline will be replaced by a single newline so that it does not create 1607 # paragraphs. 1608 # 1609 # Returns an array of that form: ( processed text , remaining text ) 1610 # 1611 if ($text === '') return array('', ''); 1612 1613 # Regex to check for the presense of newlines around a block tag. 1614 $newline_match_before = '/(?:^\n?|\n\n)*$/'; 1615 $newline_match_after = 1616 '{ 1617 ^ # Start of text following the tag. 1618 (?:[ ]*<!--.*?-->)? # Optional comment. 1619 [ ]*\n # Must be followed by newline. 1620 }xs'; 1621 1622 # Regex to match any tag. 1623 $block_tag_match = 1624 '{ 1625 ( # $2: Capture hole tag. 1626 </? # Any opening or closing tag. 1627 (?: # Tag name. 1628 '.$this->block_tags.' | 1629 '.$this->context_block_tags.' | 1630 '.$this->clean_tags.' | 1631 (?!\s)'.$enclosing_tag.' 1632 ) 1633 \s* # Whitespace. 1634 (?> 1635 ".*?" | # Double quotes (can contain `>`) 1636 \'.*?\' | # Single quotes (can contain `>`) 1637 .+? # Anything but quotes and `>`. 1638 )*? 1639 > # End of tag. 1640 | 1641 <!-- .*? --> # HTML Comment 1642 | 1643 <\?.*?\?> | <%.*?%> # Processing instruction 1644 | 1645 <!\[CDATA\[.*?\]\]> # CData Block 1646 ) 1647 }xs'; 1648 1649 1650 $depth = 0; # Current depth inside the tag tree. 1651 $parsed = ""; # Parsed text that will be returned. 1652 1653 # 1654 # Loop through every tag until we find the closing tag of the parent 1655 # or loop until reaching the end of text if no parent tag specified. 1656 # 1657 do { 1658 # 1659 # Split the text using the first $tag_match pattern found. 1660 # Text before pattern will be first in the array, text after 1661 # pattern will be at the end, and between will be any catches made 1662 # by the pattern. 1663 # 1664 $parts = preg_split($block_tag_match, $text, 2, 1665 PREG_SPLIT_DELIM_CAPTURE); 1666 1667 # If in Markdown span mode, add a empty-string span-level hash 1668 # after each newline to prevent triggering any block element. 1669 if ($span) { 1670 $void = $this->hashPart("", ':'); 1671 $newline = "$void\n"; 1672 $parts[0] = $void . str_replace("\n", $newline, $parts[0]) . $void; 1673 } 1674 1675 $parsed .= $parts[0]; # Text before current tag. 1676 1677 # If end of $text has been reached. Stop loop. 1678 if (count($parts) < 3) { 1679 $text = ""; 1680 break; 1681 } 1682 1683 $tag = $parts[1]; # Tag to handle. 1684 $text = $parts[2]; # Remaining text after current tag. 1685 1686 # 1687 # Check for: Tag inside code block or span 1688 # 1689 if (# Find current paragraph 1690 preg_match('/(?>^\n?|\n\n)((?>.+\n?)*?)$/', $parsed, $matches) && 1691 ( 1692 # Then match in it either a code block... 1693 preg_match('/^ {'.($indent+4).'}.*(?>\n {'.($indent+4).'}.*)*'. 1694 '(?!\n)$/', $matches[1], $x) || 1695 # ...or unbalenced code span markers. (the regex matches balenced) 1696 !preg_match('/^(?>[^`]+|(`+)(?>[^`]+|(?!\1[^`])`)*?\1(?!`))*$/s', 1697 $matches[1]) 1698 )) 1699 { 1700 # Tag is in code block or span and may not be a tag at all. So we 1701 # simply skip the first char (should be a `<`). 1702 $parsed .= $tag{0}; 1703 $text = substr($tag, 1) . $text; # Put back $tag minus first char. 1704 } 1705 # 1706 # Check for: Opening Block level tag or 1707 # Opening Content Block tag (like ins and del) 1708 # used as a block tag (tag is alone on it's line). 1709 # 1710 else if (preg_match("{^<(?:$this->block_tags)\b}", $tag) || 1711 ( preg_match("{^<(?:$this->context_block_tags)\b}", $tag) && 1712 preg_match($newline_match_before, $parsed) && 1713 preg_match($newline_match_after, $text) ) 1714 ) 1715 { 1716 # Need to parse tag and following text using the HTML parser. 1717 list($block_text, $text) = 1718 $this->_hashHTMLBlocks_inHTML($tag . $text, "hashBlock", true); 1719 1720 # Make sure it stays outside of any paragraph by adding newlines. 1721 $parsed .= "\n\n$block_text\n\n"; 1722 } 1723 # 1724 # Check for: Clean tag (like script, math) 1725 # HTML Comments, processing instructions. 1726 # 1727 else if (preg_match("{^<(?:$this->clean_tags)\b}", $tag) || 1728 $tag{1} == '!' || $tag{1} == '?') 1729 { 1730 # Need to parse tag and following text using the HTML parser. 1731 # (don't check for markdown attribute) 1732 list($block_text, $text) = 1733 $this->_hashHTMLBlocks_inHTML($tag . $text, "hashClean", false); 1734 1735 $parsed .= $block_text; 1736 } 1737 # 1738 # Check for: Tag with same name as enclosing tag. 1739 # 1740 else if ($enclosing_tag !== '' && 1741 # Same name as enclosing tag. 1742 preg_match("{^</?(?:$enclosing_tag)\b}", $tag)) 1743 { 1744 # 1745 # Increase/decrease nested tag count. 1746 # 1747 if ($tag{1} == '/') $depth--; 1748 else if ($tag{strlen($tag)-2} != '/') $depth++; 1749 1750 if ($depth < 0) { 1751 # 1752 # Going out of parent element. Clean up and break so we 1753 # return to the calling function. 1754 # 1755 $text = $tag . $text; 1756 break; 1757 } 1758 1759 $parsed .= $tag; 1760 } 1761 else { 1762 $parsed .= $tag; 1763 } 1764 } while ($depth >= 0); 1765 1766 return array($parsed, $text); 1767 } 1768 function _hashHTMLBlocks_inHTML($text, $hash_method, $md_attr) { 1769 # 1770 # Parse HTML, calling _HashHTMLBlocks_InMarkdown for block tags. 1771 # 1772 # * Calls $hash_method to convert any blocks. 1773 # * Stops when the first opening tag closes. 1774 # * $md_attr indicate if the use of the `markdown="1"` attribute is allowed. 1775 # (it is not inside clean tags) 1776 # 1777 # Returns an array of that form: ( processed text , remaining text ) 1778 # 1779 if ($text === '') return array('', ''); 1780 1781 # Regex to match `markdown` attribute inside of a tag. 1782 $markdown_attr_match = ' 1783 { 1784 \s* # Eat whitespace before the `markdown` attribute 1785 markdown 1786 \s*=\s* 1787 (?: 1788 (["\']) # $1: quote delimiter 1789 (.*?) # $2: attribute value 1790 \1 # matching delimiter 1791 | 1792 ([^\s>]*) # $3: unquoted attribute value 1793 ) 1794 () # $4: make $3 always defined (avoid warnings) 1795 }xs'; 1796 1797 # Regex to match any tag. 1798 $tag_match = '{ 1799 ( # $2: Capture hole tag. 1800 </? # Any opening or closing tag. 1801 [\w:$]+ # Tag name. 1802 \s* # Whitespace. 1803 (?> 1804 ".*?" | # Double quotes (can contain `>`) 1805 \'.*?\' | # Single quotes (can contain `>`) 1806 .+? # Anything but quotes and `>`. 1807 )*? 1808 > # End of tag. 1809 | 1810 <!-- .*? --> # HTML Comment 1811 | 1812 <\?.*?\?> | <%.*?%> # Processing instruction 1813 | 1814 <!\[CDATA\[.*?\]\]> # CData Block 1815 ) 1816 }xs'; 1817 1818 $original_text = $text; # Save original text in case of faliure. 1819 1820 $depth = 0; # Current depth inside the tag tree. 1821 $block_text = ""; # Temporary text holder for current text. 1822 $parsed = ""; # Parsed text that will be returned. 1823 1824 # 1825 # Get the name of the starting tag. 1826 # 1827 if (preg_match("/^<([\w:$]*)\b/", $text, $matches)) 1828 $base_tag_name = $matches[1]; 1829 1830 # 1831 # Loop through every tag until we find the corresponding closing tag. 1832 # 1833 do { 1834 # 1835 # Split the text using the first $tag_match pattern found. 1836 # Text before pattern will be first in the array, text after 1837 # pattern will be at the end, and between will be any catches made 1838 # by the pattern. 1839 # 1840 $parts = preg_split($tag_match, $text, 2, PREG_SPLIT_DELIM_CAPTURE); 1841 1842 if (count($parts) < 3) { 1843 # 1844 # End of $text reached with unbalenced tag(s). 1845 # In that case, we return original text unchanged and pass the 1846 # first character as filtered to prevent an infinite loop in the 1847 # parent function. 1848 # 1849 return array($original_text{0}, substr($original_text, 1)); 1850 } 1851 1852 $block_text .= $parts[0]; # Text before current tag. 1853 $tag = $parts[1]; # Tag to handle. 1854 $text = $parts[2]; # Remaining text after current tag. 1855 1856 # 1857 # Check for: Auto-close tag (like <hr/>) 1858 # Comments and Processing Instructions. 1859 # 1860 if (preg_match("{^</?(?:$this->auto_close_tags)\b}", $tag) || 1861 $tag{1} == '!' || $tag{1} == '?') 1862 { 1863 # Just add the tag to the block as if it was text. 1864 $block_text .= $tag; 1865 } 1866 else { 1867 # 1868 # Increase/decrease nested tag count. Only do so if 1869 # the tag's name match base tag's. 1870 # 1871 if (preg_match("{^</?$base_tag_name\b}", $tag)) { 1872 if ($tag{1} == '/') $depth--; 1873 else if ($tag{strlen($tag)-2} != '/') $depth++; 1874 } 1875 1876 # 1877 # Check for `markdown="1"` attribute and handle it. 1878 # 1879 if ($md_attr && 1880 preg_match($markdown_attr_match, $tag, $attr_m) && 1881 preg_match('/^1|block|span$/', $attr_m[2] . $attr_m[3])) 1882 { 1883 # Remove `markdown` attribute from opening tag. 1884 $tag = preg_replace($markdown_attr_match, '', $tag); 1885 1886 # Check if text inside this tag must be parsed in span mode. 1887 $this->mode = $attr_m[2] . $attr_m[3]; 1888 $span_mode = $this->mode == 'span' || $this->mode != 'block' && 1889 preg_match("{^<(?:$this->contain_span_tags)\b}", $tag); 1890 1891 # Calculate indent before tag. 1892 preg_match('/(?:^|\n)( *?)(?! ).*?$/', $block_text, $matches); 1893 $indent = strlen($matches[1]); 1894 1895 # End preceding block with this tag. 1896 $block_text .= $tag; 1897 $parsed .= $this->$hash_method($block_text); 1898 1899 # Get enclosing tag name for the ParseMarkdown function. 1900 preg_match('/^<([\w:$]*)\b/', $tag, $matches); 1901 $tag_name = $matches[1]; 1902 1903 # Parse the content using the HTML-in-Markdown parser. 1904 list ($block_text, $text) 1905 = $this->_hashHTMLBlocks_inMarkdown($text, $indent, 1906 $tag_name, $span_mode); 1907 1908 # Outdent markdown text. 1909 if ($indent > 0) { 1910 $block_text = preg_replace("/^[ ]{1,$indent}/m", "", 1911 $block_text); 1912 } 1913 1914 # Append tag content to parsed text. 1915 if (!$span_mode) $parsed .= "\n\n$block_text\n\n"; 1916 else $parsed .= "$block_text"; 1917 1918 # Start over a new block. 1919 $block_text = ""; 1920 } 1921 else $block_text .= $tag; 1922 } 1923 1924 } while ($depth > 0); 1925 1926 # 1927 # Hash last block text that wasn't processed inside the loop. 1928 # 1929 $parsed .= $this->$hash_method($block_text); 1930 1931 return array($parsed, $text); 1932 } 1933 1934 1935 function hashClean($text) { 1936 # 1937 # Called whenever a tag must be hashed when a function insert a "clean" tag 1938 # in $text, it pass through this function and is automaticaly escaped, 1939 # blocking invalid nested overlap. 1940 # 1941 return $this->hashPart($text, 'C'); 1942 } 1943 1944 1945 function doHeaders($text) { 1946 # 1947 # Redefined to add id attribute support. 1948 # 1949 # Setext-style headers: 1950 # Header 1 {#header1} 1951 # ======== 1952 # 1953 # Header 2 {#header2} 1954 # -------- 1955 # 1956 $text = preg_replace_callback( 1957 '{ 1958 (^.+?) # $1: Header text 1959 (?:[ ]+\{\#([-_:a-zA-Z0-9]+)\})? # $2: Id attribute 1960 [ ]*\n(=+|-+)[ ]*\n+ # $3: Header footer 1961 }mx', 1962 array(&$this, '_doHeaders_callback_setext'), $text); 1963 1964 # atx-style headers: 1965 # # Header 1 {#header1} 1966 # ## Header 2 {#header2} 1967 # ## Header 2 with closing hashes ## {#header3} 1968 # ... 1969 # ###### Header 6 {#header2} 1970 # 1971 $text = preg_replace_callback('{ 1972 ^(\#{1,6}) # $1 = string of #\'s 1973 [ ]* 1974 (.+?) # $2 = Header text 1975 [ ]* 1976 \#* # optional closing #\'s (not counted) 1977 (?:[ ]+\{\#([-_:a-zA-Z0-9]+)\})? # id attribute 1978 [ ]* 1979 \n+ 1980 }xm', 1981 array(&$this, '_doHeaders_callback_atx'), $text); 1982 1983 return $text; 1984 } 1985 function _doHeaders_attr($attr) { 1986 if (empty($attr)) return ""; 1987 return " id=\"$attr\""; 1988 } 1989 function _doHeaders_callback_setext($matches) { 1990 $level = $matches[3]{0} == '=' ? 1 : 2; 1991 $attr = $this->_doHeaders_attr($id =& $matches[2]); 1992 $block = "<h$level$attr>".$this->runSpanGamut($matches[1])."</h$level>"; 1993 return "\n" . $this->hashBlock($block) . "\n\n"; 1994 } 1995 function _doHeaders_callback_atx($matches) { 1996 $level = strlen($matches[1]); 1997 $attr = $this->_doHeaders_attr($id =& $matches[3]); 1998 $block = "<h$level$attr>".$this->runSpanGamut($matches[2])."</h$level>"; 1999 return "\n" . $this->hashBlock($block) . "\n\n"; 2000 } 2001 2002 2003 function doTables($text) { 2004 # 2005 # Form HTML tables. 2006 # 2007 $less_than_tab = $this->tab_width - 1; 2008 # 2009 # Find tables with leading pipe. 2010 # 2011 # | Header 1 | Header 2 2012 # | -------- | -------- 2013 # | Cell 1 | Cell 2 2014 # | Cell 3 | Cell 4 2015 # 2016 $text = preg_replace_callback(' 2017 { 2018 ^ # Start of a line 2019 [ ]{0,'.$less_than_tab.'} # Allowed whitespace. 2020 [|] # Optional leading pipe (present) 2021 (.+) \n # $1: Header row (at least one pipe) 2022 2023 [ ]{0,'.$less_than_tab.'} # Allowed whitespace. 2024 [|] ([ ]*[-:]+[-| :]*) \n # $2: Header underline 2025 2026 ( # $3: Cells 2027 (?> 2028 [ ]* # Allowed whitespace. 2029 [|] .* \n # Row content. 2030 )* 2031 ) 2032 (?=\n|\Z) # Stop at final double newline. 2033 }xm', 2034 array(&$this, '_doTable_leadingPipe_callback'), $text); 2035 2036 # 2037 # Find tables without leading pipe. 2038 # 2039 # Header 1 | Header 2 2040 # -------- | -------- 2041 # Cell 1 | Cell 2 2042 # Cell 3 | Cell 4 2043 # 2044 $text = preg_replace_callback(' 2045 { 2046 ^ # Start of a line 2047 [ ]{0,'.$less_than_tab.'} # Allowed whitespace. 2048 (\S.*[|].*) \n # $1: Header row (at least one pipe) 2049 2050 [ ]{0,'.$less_than_tab.'} # Allowed whitespace. 2051 ([-:]+[ ]*[|][-| :]*) \n # $2: Header underline 2052 2053 ( # $3: Cells 2054 (?> 2055 .* [|] .* \n # Row content 2056 )* 2057 ) 2058 (?=\n|\Z) # Stop at final double newline. 2059 }xm', 2060 array(&$this, '_DoTable_callback'), $text); 2061 2062 return $text; 2063 } 2064 function _doTable_leadingPipe_callback($matches) { 2065 $head = $matches[1]; 2066 $underline = $matches[2]; 2067 $content = $matches[3]; 2068 2069 # Remove leading pipe for each row. 2070 $content = preg_replace('/^ *[|]/m', '', $content); 2071 2072 return $this->_doTable_callback(array($matches[0], $head, $underline, $content)); 2073 } 2074 function _doTable_callback($matches) { 2075 $head = $matches[1]; 2076 $underline = $matches[2]; 2077 $content = $matches[3]; 2078 2079 # Remove any tailing pipes for each line. 2080 $head = preg_replace('/[|] *$/m', '', $head); 2081 $underline = preg_replace('/[|] *$/m', '', $underline); 2082 $content = preg_replace('/[|] *$/m', '', $content); 2083 2084 # Reading alignement from header underline. 2085 $separators = preg_split('/ *[|] */', $underline); 2086 foreach ($separators as $n => $s) { 2087 if (preg_match('/^ *-+: *$/', $s)) $attr[$n] = ' align="right"'; 2088 else if (preg_match('/^ *:-+: *$/', $s))$attr[$n] = ' align="center"'; 2089 else if (preg_match('/^ *:-+ *$/', $s)) $attr[$n] = ' align="left"'; 2090 else $attr[$n] = ''; 2091 } 2092 2093 # Parsing span elements, including code spans, character escapes, 2094 # and inline HTML tags, so that pipes inside those gets ignored. 2095 $head = $this->parseSpan($head); 2096 $headers = preg_split('/ *[|] */', $head); 2097 $col_count = count($headers); 2098 2099 # Write column headers. 2100 $text = "<table>\n"; 2101 $text .= "<thead>\n"; 2102 $text .= "<tr>\n"; 2103 foreach ($headers as $n => $header) 2104 $text .= " <th$attr[$n]>".$this->runSpanGamut(trim($header))."</th>\n"; 2105 $text .= "</tr>\n"; 2106 $text .= "</thead>\n"; 2107 2108 # Split content by row. 2109 $rows = explode("\n", trim($content, "\n")); 2110 2111 $text .= "<tbody>\n"; 2112 foreach ($rows as $row) { 2113 # Parsing span elements, including code spans, character escapes, 2114 # and inline HTML tags, so that pipes inside those gets ignored. 2115 $row = $this->parseSpan($row); 2116 2117 # Split row by cell. 2118 $row_cells = preg_split('/ *[|] */', $row, $col_count); 2119 $row_cells = array_pad($row_cells, $col_count, ''); 2120 2121 $text .= "<tr>\n"; 2122 foreach ($row_cells as $n => $cell) 2123 $text .= " <td$attr[$n]>".$this->runSpanGamut(trim($cell))."</td>\n"; 2124 $text .= "</tr>\n"; 2125 } 2126 $text .= "</tbody>\n"; 2127 $text .= "</table>"; 2128 2129 return $this->hashBlock($text) . "\n"; 2130 } 2131 2132 2133 function doDefLists($text) { 2134 # 2135 # Form HTML definition lists. 2136 # 2137 $less_than_tab = $this->tab_width - 1; 2138 2139 # Re-usable pattern to match any entire dl list: 2140 $whole_list = '(?> 2141 ( # $1 = whole list 2142 ( # $2 2143 [ ]{0,'.$less_than_tab.'} 2144 ((?>.*\S.*\n)+) # $3 = defined term 2145 \n? 2146 [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition 2147 ) 2148 (?s:.+?) 2149 ( # $4 2150 \z 2151 | 2152 \n{2,} 2153 (?=\S) 2154 (?! # Negative lookahead for another term 2155 [ ]{0,'.$less_than_tab.'} 2156 (?: \S.*\n )+? # defined term 2157 \n? 2158 [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition 2159 ) 2160 (?! # Negative lookahead for another definition 2161 [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition 2162 ) 2163 ) 2164 ) 2165 )'; // mx 2166 2167 $text = preg_replace_callback('{ 2168 (?:(?<=\n\n)|\A\n?) 2169 '.$whole_list.' 2170 }mx', 2171 array(&$this, '_doDefLists_callback'), $text); 2172 2173 return $text; 2174 } 2175 function _doDefLists_callback($matches) { 2176 # Re-usable patterns to match list item bullets and number markers: 2177 $list = $matches[1]; 2178 2179 # Turn double returns into triple returns, so that we can make a 2180 # paragraph for the last item in a list, if necessary: 2181 $result = trim($this->processDefListItems($list)); 2182 $result = "<dl>\n" . $result . "\n</dl>"; 2183 return $this->hashBlock($result) . "\n\n"; 2184 } 2185 2186 2187 function processDefListItems($list_str) { 2188 # 2189 # Process the contents of a single definition list, splitting it 2190 # into individual term and definition list items. 2191 # 2192 $less_than_tab = $this->tab_width - 1; 2193 2194 # trim trailing blank lines: 2195 $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str); 2196 2197 # Process definition terms. 2198 $list_str = preg_replace_callback('{ 2199 (?:\n\n+|\A\n?) # leading line 2200 ( # definition terms = $1 2201 [ ]{0,'.$less_than_tab.'} # leading whitespace 2202 (?![:][ ]|[ ]) # negative lookahead for a definition 2203 # mark (colon) or more whitespace. 2204 (?: \S.* \n)+? # actual term (not whitespace). 2205 ) 2206 (?=\n?[ ]{0,3}:[ ]) # lookahead for following line feed 2207 # with a definition mark. 2208 }xm', 2209 array(&$this, '_processDefListItems_callback_dt'), $list_str); 2210 2211 # Process actual definitions. 2212 $list_str = preg_replace_callback('{ 2213 \n(\n+)? # leading line = $1 2214 [ ]{0,'.$less_than_tab.'} # whitespace before colon 2215 [:][ ]+ # definition mark (colon) 2216 ((?s:.+?)) # definition text = $2 2217 (?= \n+ # stop at next definition mark, 2218 (?: # next term or end of text 2219 [ ]{0,'.$less_than_tab.'} [:][ ] | 2220 <dt> | \z 2221 ) 2222 ) 2223 }xm', 2224 array(&$this, '_processDefListItems_callback_dd'), $list_str); 2225 2226 return $list_str; 2227 } 2228 function _processDefListItems_callback_dt($matches) { 2229 $terms = explode("\n", trim($matches[1])); 2230 $text = ''; 2231 foreach ($terms as $term) { 2232 $term = $this->runSpanGamut(trim($term)); 2233 $text .= "\n<dt>" . $term . "</dt>"; 2234 } 2235 return $text . "\n"; 2236 } 2237 function _processDefListItems_callback_dd($matches) { 2238 $leading_line = $matches[1]; 2239 $def = $matches[2]; 2240 2241 if ($leading_line || preg_match('/\n{2,}/', $def)) { 2242 $def = $this->runBlockGamut($this->outdent($def . "\n\n")); 2243 $def = "\n". $def ."\n"; 2244 } 2245 else { 2246 $def = rtrim($def); 2247 $def = $this->runSpanGamut($this->outdent($def)); 2248 } 2249 2250 return "\n<dd>" . $def . "</dd>\n"; 2251 } 2252 2253 2254 function doItalicsAndBold($text) { 2255 # 2256 # Redefined to change emphasis by underscore behaviour so that it does not 2257 # work in the middle of a word. 2258 # 2259 # <strong> must go first: 2260 $text = preg_replace_callback(array( 2261 '{ 2262 ( # $1: Marker 2263 (?<![a-zA-Z0-9]) # Not preceded by alphanum 2264 (?<!__) # or by two marker chars. 2265 __ 2266 ) 2267 (?=\S) # Not followed by whitespace 2268 (?!__) # or two others marker chars. 2269 ( # $2: Content 2270 (?> 2271 [^_]+? # Anthing not em markers. 2272 | 2273 # Balence any regular _ emphasis inside. 2274 (?<![a-zA-Z0-9]) _ (?=\S) (.+?) 2275 (?<=\S) _ (?![a-zA-Z0-9]) 2276 | 2277 _+ # Allow unbalenced as last resort. 2278 )+? 2279 ) 2280 (?<=\S) __ # End mark not preceded by whitespace. 2281 (?![a-zA-Z0-9]) # Not followed by alphanum 2282 (?!__) # or two others marker chars. 2283 }sx', 2284 '{ 2285 ( (?<!\*\*) \*\* ) # $1: Marker (not preceded by two *) 2286 (?=\S) # Not followed by whitespace 2287 (?!\1) # or two others marker chars. 2288 ( # $2: Content 2289 (?> 2290 [^*]+? # Anthing not em markers. 2291 | 2292 # Balence any regular * emphasis inside. 2293 \* (?=\S) (.+?) (?<=\S) \* 2294 | 2295 \* # Allow unbalenced as last resort. 2296 )+? 2297 ) 2298 (?<=\S) \*\* # End mark not preceded by whitespace. 2299 }sx', 2300 ), 2301 array(&$this, '_doItalicAndBold_strong_callback'), $text); 2302 # Then <em>: 2303 $text = preg_replace_callback(array( 2304 '{ ( (?<![a-zA-Z0-9])(?<!_)_ ) (?=\S) (?! \1) (.+?) (?<=\S) \1(?![a-zA-Z0-9]) }sx', 2305 '{ ( (?<!\*)\* ) (?=\S) (?! \1) (.+?) (?<=\S)(?<!\s\*) \1 }sx', 2306 ), 2307 array(&$this, '_doItalicAndBold_em_callback'), $text); 2308 2309 return $text; 2310 } 2311 2312 2313 function formParagraphs($text) { 2314 # 2315 # Params: 2316 # $text - string to process with html <p> tags 2317 # 2318 # Strip leading and trailing lines: 2319 $text = preg_replace('/\A\n+|\n+\z/', '', $text); 2320 2321 $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY); 2322 2323 # 2324 # Wrap <p> tags and unhashify HTML blocks 2325 # 2326 foreach ($grafs as $key => $value) { 2327 $value = trim($this->runSpanGamut($value)); 2328 2329 # Check if this should be enclosed in a paragraph. 2330 # Clean tag hashes & block tag hashes are left alone. 2331 $is_p = !preg_match('/^B\x1A[0-9]+B|^C\x1A[0-9]+C$/', $value); 2332 2333 if ($is_p) { 2334 $value = "<p>$value</p>"; 2335 } 2336 $grafs[$key] = $value; 2337 } 2338 2339 # Join grafs in one text, then unhash HTML tags. 2340 $text = implode("\n\n", $grafs); 2341 2342 # Finish by removing any tag hashes still present in $text. 2343 $text = $this->unhash($text); 2344 2345 return $text; 2346 } 2347 2348 2349 ### Footnotes 2350 2351 function stripFootnotes($text) { 2352 # 2353 # Strips link definitions from text, stores the URLs and titles in 2354 # hash references. 2355 # 2356 $less_than_tab = $this->tab_width - 1; 2357 2358 # Link defs are in the form: [^id]: url "optional title" 2359 $text = preg_replace_callback('{ 2360 ^[ ]{0,'.$less_than_tab.'}\[\^(.+?)\][ ]?: # note_id = $1 2361 [ ]* 2362 \n? # maybe *one* newline 2363 ( # text = $2 (no blank lines allowed) 2364 (?: 2365 .+ # actual text 2366 | 2367 \n # newlines but 2368 (?!\[\^.+?\]:\s)# negative lookahead for footnote marker. 2369 (?!\n+[ ]{0,3}\S)# ensure line is not blank and followed 2370 # by non-indented content 2371 )* 2372 ) 2373 }xm', 2374 array(&$this, '_stripFootnotes_callback'), 2375 $text); 2376 return $text; 2377 } 2378 function _stripFootnotes_callback($matches) { 2379 $note_id = $this->fn_id_prefix . $matches[1]; 2380 $this->footnotes[$note_id] = $this->outdent($matches[2]); 2381 return ''; # String that will replace the block 2382 } 2383 2384 2385 function doFootnotes($text) { 2386 # 2387 # Replace footnote references in $text [^id] with a special text-token 2388 # which will be can be 2389 # 2390 if (!$this->in_footnote && !$this->in_anchor) { 2391 $text = preg_replace('{\[\^(.+?)\]}', "F\x1Afn:\\1\x1A:", $text); 2392 } 2393 return $text; 2394 } 2395 2396 2397 function appendFootnotes($text) { 2398 # 2399 # Append footnote list to text. 2400 # 2401 2402 $text = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}', 2403 array(&$this, '_appendFootnotes_callback'), $text); 2404 2405 if (!empty($this->footnotes_ordered)) { 2406 $text .= "\n\n"; 2407 $text .= "<div class=\"footnotes\">\n"; 2408 $text .= "<hr". MARKDOWN_EMPTY_ELEMENT_SUFFIX ."\n"; 2409 $text .= "<ol>\n\n"; 2410 2411 $attr = " rev=\"footnote\""; 2412 if ($this->fn_backlink_class != "") { 2413 $class = $this->fn_backlink_class; 2414 $class = $this->encodeAmpsAndAngles($class); 2415 $class = str_replace('"', '"', $class); 2416 $attr .= " class=\"$class\""; 2417 } 2418 if ($this->fn_backlink_title != "") { 2419 $title = $this->fn_backlink_title; 2420 $title = $this->encodeAmpsAndAngles($title); 2421 $title = str_replace('"', '"', $title); 2422 $attr .= " title=\"$title\""; 2423 } 2424 $num = 0; 2425 2426 $this->in_footnote = true; 2427 2428 foreach ($this->footnotes_ordered as $note_id => $footnote) { 2429 $footnote .= "\n"; # Need to append newline before parsing. 2430 $footnote = $this->runBlockGamut("$footnote\n"); 2431 2432 $attr2 = str_replace("%%", ++$num, $attr); 2433 2434 # Add backlink to last paragraph; create new paragraph if needed. 2435 $backlink = "<a href=\"#fnref:$note_id\"$attr2>↩</a>"; 2436 if (preg_match('{</p>$}', $footnote)) { 2437 $footnote = substr($footnote, 0, -4) . " $backlink</p>"; 2438 } else { 2439 $footnote .= "\n\n<p>$backlink</p>"; 2440 } 2441 2442 $text .= "<li id=\"fn:$note_id\">\n"; 2443 $text .= $footnote . "\n"; 2444 $text .= "</li>\n\n"; 2445 } 2446 2447 $this->in_footnote = false; 2448 2449 $text .= "</ol>\n"; 2450 $text .= "</div>"; 2451 } 2452 return $text; 2453 } 2454 function _appendFootnotes_callback($matches) { 2455 $node_id = $this->fn_id_prefix . $matches[1]; 2456 2457 # Create footnote marker only if it has a corresponding footnote *and* 2458 # the footnote hasn't been used by another marker. 2459 if (isset($this->footnotes[$node_id])) { 2460 # Transfert footnote content to the ordered list. 2461 $this->footnotes_ordered[$node_id] = $this->footnotes[$node_id]; 2462 unset($this->footnotes[$node_id]); 2463 2464 $num = count($this->footnotes_ordered); 2465 $attr = " rel=\"footnote\""; 2466 if ($this->fn_link_class != "") { 2467 $class = $this->fn_link_class; 2468 $class = $this->encodeAmpsAndAngles($class); 2469 $class = str_replace('"', '"', $class); 2470 $attr .= " class=\"$class\""; 2471 } 2472 if (