[ Index ] |
PHP Cross Reference of vtigercrm-6.1.0 |
[Summary view] [Print] [Text view]
1 <?php 2 # 3 # Markdown - A text-to-HTML conversion tool for web writers 4 # 5 # PHP Markdown 6 # Copyright (c) 2004-2014 Michel Fortin 7 # <http://michelf.com/projects/php-markdown/> 8 # 9 # Original Markdown 10 # Copyright (c) 2004-2006 John Gruber 11 # <http://daringfireball.net/projects/markdown/> 12 # 13 namespace Michelf; 14 15 16 # 17 # Markdown Parser Class 18 # 19 class Markdown implements MarkdownInterface { 20 21 ### Version ### 22 23 const MARKDOWNLIB_VERSION = "1.4.1"; 24 25 ### Simple Function Interface ### 26 27 public static function defaultTransform($text) { 28 # 29 # Initialize the parser and return the result of its transform method. 30 # This will work fine for derived classes too. 31 # 32 # Take parser class on which this function was called. 33 $parser_class = \get_called_class(); 34 35 # try to take parser from the static parser list 36 static $parser_list; 37 $parser =& $parser_list[$parser_class]; 38 39 # create the parser it not already set 40 if (!$parser) 41 $parser = new $parser_class; 42 43 # Transform text using parser. 44 return $parser->transform($text); 45 } 46 47 ### Configuration Variables ### 48 49 # Change to ">" for HTML output. 50 public $empty_element_suffix = " />"; 51 public $tab_width = 4; 52 53 # Change to `true` to disallow markup or entities. 54 public $no_markup = false; 55 public $no_entities = false; 56 57 # Predefined urls and titles for reference links and images. 58 public $predef_urls = array(); 59 public $predef_titles = array(); 60 61 62 ### Parser Implementation ### 63 64 # Regex to match balanced [brackets]. 65 # Needed to insert a maximum bracked depth while converting to PHP. 66 protected $nested_brackets_depth = 6; 67 protected $nested_brackets_re; 68 69 protected $nested_url_parenthesis_depth = 4; 70 protected $nested_url_parenthesis_re; 71 72 # Table of hash values for escaped characters: 73 protected $escape_chars = '\`*_{}[]()>#+-.!'; 74 protected $escape_chars_re; 75 76 77 public function __construct() { 78 # 79 # Constructor function. Initialize appropriate member variables. 80 # 81 $this->_initDetab(); 82 $this->prepareItalicsAndBold(); 83 84 $this->nested_brackets_re = 85 str_repeat('(?>[^\[\]]+|\[', $this->nested_brackets_depth). 86 str_repeat('\])*', $this->nested_brackets_depth); 87 88 $this->nested_url_parenthesis_re = 89 str_repeat('(?>[^()\s]+|\(', $this->nested_url_parenthesis_depth). 90 str_repeat('(?>\)))*', $this->nested_url_parenthesis_depth); 91 92 $this->escape_chars_re = '['.preg_quote($this->escape_chars).']'; 93 94 # Sort document, block, and span gamut in ascendent priority order. 95 asort($this->document_gamut); 96 asort($this->block_gamut); 97 asort($this->span_gamut); 98 } 99 100 101 # Internal hashes used during transformation. 102 protected $urls = array(); 103 protected $titles = array(); 104 protected $html_hashes = array(); 105 106 # Status flag to avoid invalid nesting. 107 protected $in_anchor = false; 108 109 110 protected function setup() { 111 # 112 # Called before the transformation process starts to setup parser 113 # states. 114 # 115 # Clear global hashes. 116 $this->urls = $this->predef_urls; 117 $this->titles = $this->predef_titles; 118 $this->html_hashes = array(); 119 120 $this->in_anchor = false; 121 } 122 123 protected function teardown() { 124 # 125 # Called after the transformation process to clear any variable 126 # which may be taking up memory unnecessarly. 127 # 128 $this->urls = array(); 129 $this->titles = array(); 130 $this->html_hashes = array(); 131 } 132 133 134 public function transform($text) { 135 # 136 # Main function. Performs some preprocessing on the input text 137 # and pass it through the document gamut. 138 # 139 $this->setup(); 140 141 # Remove UTF-8 BOM and marker character in input, if present. 142 $text = preg_replace('{^\xEF\xBB\xBF|\x1A}', '', $text); 143 144 # Standardize line endings: 145 # DOS to Unix and Mac to Unix 146 $text = preg_replace('{\r\n?}', "\n", $text); 147 148 # Make sure $text ends with a couple of newlines: 149 $text .= "\n\n"; 150 151 # Convert all tabs to spaces. 152 $text = $this->detab($text); 153 154 # Turn block-level HTML blocks into hash entries 155 $text = $this->hashHTMLBlocks($text); 156 157 # Strip any lines consisting only of spaces and tabs. 158 # This makes subsequent regexen easier to write, because we can 159 # match consecutive blank lines with /\n+/ instead of something 160 # contorted like /[ ]*\n+/ . 161 $text = preg_replace('/^[ ]+$/m', '', $text); 162 163 # Run document gamut methods. 164 foreach ($this->document_gamut as $method => $priority) { 165 $text = $this->$method($text); 166 } 167 168 $this->teardown(); 169 170 return $text . "\n"; 171 } 172 173 protected $document_gamut = array( 174 # Strip link definitions, store in hashes. 175 "stripLinkDefinitions" => 20, 176 177 "runBasicBlockGamut" => 30, 178 ); 179 180 181 protected function stripLinkDefinitions($text) { 182 # 183 # Strips link definitions from text, stores the URLs and titles in 184 # hash references. 185 # 186 $less_than_tab = $this->tab_width - 1; 187 188 # Link defs are in the form: ^[id]: url "optional title" 189 $text = preg_replace_callback('{ 190 ^[ ]{0,'.$less_than_tab.'}\[(.+)\][ ]?: # id = $1 191 [ ]* 192 \n? # maybe *one* newline 193 [ ]* 194 (?: 195 <(.+?)> # url = $2 196 | 197 (\S+?) # url = $3 198 ) 199 [ ]* 200 \n? # maybe one newline 201 [ ]* 202 (?: 203 (?<=\s) # lookbehind for whitespace 204 ["(] 205 (.*?) # title = $4 206 [")] 207 [ ]* 208 )? # title is optional 209 (?:\n+|\Z) 210 }xm', 211 array($this, '_stripLinkDefinitions_callback'), 212 $text); 213 return $text; 214 } 215 protected function _stripLinkDefinitions_callback($matches) { 216 $link_id = strtolower($matches[1]); 217 $url = $matches[2] == '' ? $matches[3] : $matches[2]; 218 $this->urls[$link_id] = $url; 219 $this->titles[$link_id] =& $matches[4]; 220 return ''; # String that will replace the block 221 } 222 223 224 protected function hashHTMLBlocks($text) { 225 if ($this->no_markup) return $text; 226 227 $less_than_tab = $this->tab_width - 1; 228 229 # Hashify HTML blocks: 230 # We only want to do this for block-level HTML tags, such as headers, 231 # lists, and tables. That's because we still want to wrap <p>s around 232 # "paragraphs" that are wrapped in non-block-level tags, such as anchors, 233 # phrase emphasis, and spans. The list of tags we're looking for is 234 # hard-coded: 235 # 236 # * List "a" is made of tags which can be both inline or block-level. 237 # These will be treated block-level when the start tag is alone on 238 # its line, otherwise they're not matched here and will be taken as 239 # inline later. 240 # * List "b" is made of tags which are always block-level; 241 # 242 $block_tags_a_re = 'ins|del'; 243 $block_tags_b_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|'. 244 'script|noscript|style|form|fieldset|iframe|math|svg|'. 245 'article|section|nav|aside|hgroup|header|footer|'. 246 'figure'; 247 248 # Regular expression for the content of a block tag. 249 $nested_tags_level = 4; 250 $attr = ' 251 (?> # optional tag attributes 252 \s # starts with whitespace 253 (?> 254 [^>"/]+ # text outside quotes 255 | 256 /+(?!>) # slash not followed by ">" 257 | 258 "[^"]*" # text inside double quotes (tolerate ">") 259 | 260 \'[^\']*\' # text inside single quotes (tolerate ">") 261 )* 262 )? 263 '; 264 $content = 265 str_repeat(' 266 (?> 267 [^<]+ # content without tag 268 | 269 <\2 # nested opening tag 270 '.$attr.' # attributes 271 (?> 272 /> 273 | 274 >', $nested_tags_level). # end of opening tag 275 '.*?'. # last level nested tag content 276 str_repeat(' 277 </\2\s*> # closing nested tag 278 ) 279 | 280 <(?!/\2\s*> # other tags with a different name 281 ) 282 )*', 283 $nested_tags_level); 284 $content2 = str_replace('\2', '\3', $content); 285 286 # First, look for nested blocks, e.g.: 287 # <div> 288 # <div> 289 # tags for inner block must be indented. 290 # </div> 291 # </div> 292 # 293 # The outermost tags must start at the left margin for this to match, and 294 # the inner nested divs must be indented. 295 # We need to do this before the next, more liberal match, because the next 296 # match will start at the first `<div>` and stop at the first `</div>`. 297 $text = preg_replace_callback('{(?> 298 (?> 299 (?<=\n) # Starting on its own line 300 | # or 301 \A\n? # the at beginning of the doc 302 ) 303 ( # save in $1 304 305 # Match from `\n<tag>` to `</tag>\n`, handling nested tags 306 # in between. 307 308 [ ]{0,'.$less_than_tab.'} 309 <('.$block_tags_b_re.')# start tag = $2 310 '.$attr.'> # attributes followed by > and \n 311 '.$content.' # content, support nesting 312 </\2> # the matching end tag 313 [ ]* # trailing spaces/tabs 314 (?=\n+|\Z) # followed by a newline or end of document 315 316 | # Special version for tags of group a. 317 318 [ ]{0,'.$less_than_tab.'} 319 <('.$block_tags_a_re.')# start tag = $3 320 '.$attr.'>[ ]*\n # attributes followed by > 321 '.$content2.' # content, support nesting 322 </\3> # the matching end tag 323 [ ]* # trailing spaces/tabs 324 (?=\n+|\Z) # followed by a newline or end of document 325 326 | # Special case just for <hr />. It was easier to make a special 327 # case than to make the other regex more complicated. 328 329 [ ]{0,'.$less_than_tab.'} 330 <(hr) # start tag = $2 331 '.$attr.' # attributes 332 /?> # the matching end tag 333 [ ]* 334 (?=\n{2,}|\Z) # followed by a blank line or end of document 335 336 | # Special case for standalone HTML comments: 337 338 [ ]{0,'.$less_than_tab.'} 339 (?s: 340 <!-- .*? --> 341 ) 342 [ ]* 343 (?=\n{2,}|\Z) # followed by a blank line or end of document 344 345 | # PHP and ASP-style processor instructions (<? and <%) 346 347 [ ]{0,'.$less_than_tab.'} 348 (?s: 349 <([?%]) # $2 350 .*? 351 \2> 352 ) 353 [ ]* 354 (?=\n{2,}|\Z) # followed by a blank line or end of document 355 356 ) 357 )}Sxmi', 358 array($this, '_hashHTMLBlocks_callback'), 359 $text); 360 361 return $text; 362 } 363 protected function _hashHTMLBlocks_callback($matches) { 364 $text = $matches[1]; 365 $key = $this->hashBlock($text); 366 return "\n\n$key\n\n"; 367 } 368 369 370 protected function hashPart($text, $boundary = 'X') { 371 # 372 # Called whenever a tag must be hashed when a function insert an atomic 373 # element in the text stream. Passing $text to through this function gives 374 # a unique text-token which will be reverted back when calling unhash. 375 # 376 # The $boundary argument specify what character should be used to surround 377 # the token. By convension, "B" is used for block elements that needs not 378 # to be wrapped into paragraph tags at the end, ":" is used for elements 379 # that are word separators and "X" is used in the general case. 380 # 381 # Swap back any tag hash found in $text so we do not have to `unhash` 382 # multiple times at the end. 383 $text = $this->unhash($text); 384 385 # Then hash the block. 386 static $i = 0; 387 $key = "$boundary\x1A" . ++$i . $boundary; 388 $this->html_hashes[$key] = $text; 389 return $key; # String that will replace the tag. 390 } 391 392 393 protected function hashBlock($text) { 394 # 395 # Shortcut function for hashPart with block-level boundaries. 396 # 397 return $this->hashPart($text, 'B'); 398 } 399 400 401 protected $block_gamut = array( 402 # 403 # These are all the transformations that form block-level 404 # tags like paragraphs, headers, and list items. 405 # 406 "doHeaders" => 10, 407 "doHorizontalRules" => 20, 408 409 "doLists" => 40, 410 "doCodeBlocks" => 50, 411 "doBlockQuotes" => 60, 412 ); 413 414 protected function runBlockGamut($text) { 415 # 416 # Run block gamut tranformations. 417 # 418 # We need to escape raw HTML in Markdown source before doing anything 419 # else. This need to be done for each block, and not only at the 420 # begining in the Markdown function since hashed blocks can be part of 421 # list items and could have been indented. Indented blocks would have 422 # been seen as a code block in a previous pass of hashHTMLBlocks. 423 $text = $this->hashHTMLBlocks($text); 424 425 return $this->runBasicBlockGamut($text); 426 } 427 428 protected function runBasicBlockGamut($text) { 429 # 430 # Run block gamut tranformations, without hashing HTML blocks. This is 431 # useful when HTML blocks are known to be already hashed, like in the first 432 # whole-document pass. 433 # 434 foreach ($this->block_gamut as $method => $priority) { 435 $text = $this->$method($text); 436 } 437 438 # Finally form paragraph and restore hashed blocks. 439 $text = $this->formParagraphs($text); 440 441 return $text; 442 } 443 444 445 protected function doHorizontalRules($text) { 446 # Do Horizontal Rules: 447 return preg_replace( 448 '{ 449 ^[ ]{0,3} # Leading space 450 ([-*_]) # $1: First marker 451 (?> # Repeated marker group 452 [ ]{0,2} # Zero, one, or two spaces. 453 \1 # Marker character 454 ){2,} # Group repeated at least twice 455 [ ]* # Tailing spaces 456 $ # End of line. 457 }mx', 458 "\n".$this->hashBlock("<hr$this->empty_element_suffix")."\n", 459 $text); 460 } 461 462 463 protected $span_gamut = array( 464 # 465 # These are all the transformations that occur *within* block-level 466 # tags like paragraphs, headers, and list items. 467 # 468 # Process character escapes, code spans, and inline HTML 469 # in one shot. 470 "parseSpan" => -30, 471 472 # Process anchor and image tags. Images must come first, 473 # because ![foo][f] looks like an anchor. 474 "doImages" => 10, 475 "doAnchors" => 20, 476 477 # Make links out of things like `<http://example.com/>` 478 # Must come after doAnchors, because you can use < and > 479 # delimiters in inline links like [this](<url>). 480 "doAutoLinks" => 30, 481 "encodeAmpsAndAngles" => 40, 482 483 "doItalicsAndBold" => 50, 484 "doHardBreaks" => 60, 485 ); 486 487 protected function runSpanGamut($text) { 488 # 489 # Run span gamut tranformations. 490 # 491 foreach ($this->span_gamut as $method => $priority) { 492 $text = $this->$method($text); 493 } 494 495 return $text; 496 } 497 498 499 protected function doHardBreaks($text) { 500 # Do hard breaks: 501 return preg_replace_callback('/ {2,}\n/', 502 array($this, '_doHardBreaks_callback'), $text); 503 } 504 protected function _doHardBreaks_callback($matches) { 505 return $this->hashPart("<br$this->empty_element_suffix\n"); 506 } 507 508 509 protected function doAnchors($text) { 510 # 511 # Turn Markdown link shortcuts into XHTML <a> tags. 512 # 513 if ($this->in_anchor) return $text; 514 $this->in_anchor = true; 515 516 # 517 # First, handle reference-style links: [link text] [id] 518 # 519 $text = preg_replace_callback('{ 520 ( # wrap whole match in $1 521 \[ 522 ('.$this->nested_brackets_re.') # link text = $2 523 \] 524 525 [ ]? # one optional space 526 (?:\n[ ]*)? # one optional newline followed by spaces 527 528 \[ 529 (.*?) # id = $3 530 \] 531 ) 532 }xs', 533 array($this, '_doAnchors_reference_callback'), $text); 534 535 # 536 # Next, inline-style links: [link text](url "optional title") 537 # 538 $text = preg_replace_callback('{ 539 ( # wrap whole match in $1 540 \[ 541 ('.$this->nested_brackets_re.') # link text = $2 542 \] 543 \( # literal paren 544 [ \n]* 545 (?: 546 <(.+?)> # href = $3 547 | 548 ('.$this->nested_url_parenthesis_re.') # href = $4 549 ) 550 [ \n]* 551 ( # $5 552 ([\'"]) # quote char = $6 553 (.*?) # Title = $7 554 \6 # matching quote 555 [ \n]* # ignore any spaces/tabs between closing quote and ) 556 )? # title is optional 557 \) 558 ) 559 }xs', 560 array($this, '_doAnchors_inline_callback'), $text); 561 562 # 563 # Last, handle reference-style shortcuts: [link text] 564 # These must come last in case you've also got [link text][1] 565 # or [link text](/foo) 566 # 567 $text = preg_replace_callback('{ 568 ( # wrap whole match in $1 569 \[ 570 ([^\[\]]+) # link text = $2; can\'t contain [ or ] 571 \] 572 ) 573 }xs', 574 array($this, '_doAnchors_reference_callback'), $text); 575 576 $this->in_anchor = false; 577 return $text; 578 } 579 protected function _doAnchors_reference_callback($matches) { 580 $whole_match = $matches[1]; 581 $link_text = $matches[2]; 582 $link_id =& $matches[3]; 583 584 if ($link_id == "") { 585 # for shortcut links like [this][] or [this]. 586 $link_id = $link_text; 587 } 588 589 # lower-case and turn embedded newlines into spaces 590 $link_id = strtolower($link_id); 591 $link_id = preg_replace('{[ ]?\n}', ' ', $link_id); 592 593 if (isset($this->urls[$link_id])) { 594 $url = $this->urls[$link_id]; 595 $url = $this->encodeAttribute($url); 596 597 $result = "<a href=\"$url\""; 598 if ( isset( $this->titles[$link_id] ) ) { 599 $title = $this->titles[$link_id]; 600 $title = $this->encodeAttribute($title); 601 $result .= " title=\"$title\""; 602 } 603 604 $link_text = $this->runSpanGamut($link_text); 605 $result .= ">$link_text</a>"; 606 $result = $this->hashPart($result); 607 } 608 else { 609 $result = $whole_match; 610 } 611 return $result; 612 } 613 protected function _doAnchors_inline_callback($matches) { 614 $whole_match = $matches[1]; 615 $link_text = $this->runSpanGamut($matches[2]); 616 $url = $matches[3] == '' ? $matches[4] : $matches[3]; 617 $title =& $matches[7]; 618 619 // if the URL was of the form <s p a c e s> it got caught by the HTML 620 // tag parser and hashed. Need to reverse the process before using the URL. 621 $unhashed = $this->unhash($url); 622 if ($unhashed != $url) 623 $url = preg_replace('/^<(.*)>$/', '\1', $unhashed); 624 625 $url = $this->encodeAttribute($url); 626 627 $result = "<a href=\"$url\""; 628 if (isset($title)) { 629 $title = $this->encodeAttribute($title); 630 $result .= " title=\"$title\""; 631 } 632 633 $link_text = $this->runSpanGamut($link_text); 634 $result .= ">$link_text</a>"; 635 636 return $this->hashPart($result); 637 } 638 639 640 protected function doImages($text) { 641 # 642 # Turn Markdown image shortcuts into <img> tags. 643 # 644 # 645 # First, handle reference-style labeled images: ![alt text][id] 646 # 647 $text = preg_replace_callback('{ 648 ( # wrap whole match in $1 649 !\[ 650 ('.$this->nested_brackets_re.') # alt text = $2 651 \] 652 653 [ ]? # one optional space 654 (?:\n[ ]*)? # one optional newline followed by spaces 655 656 \[ 657 (.*?) # id = $3 658 \] 659 660 ) 661 }xs', 662 array($this, '_doImages_reference_callback'), $text); 663 664 # 665 # Next, handle inline images:  666 # Don't forget: encode * and _ 667 # 668 $text = preg_replace_callback('{ 669 ( # wrap whole match in $1 670 !\[ 671 ('.$this->nested_brackets_re.') # alt text = $2 672 \] 673 \s? # One optional whitespace character 674 \( # literal paren 675 [ \n]* 676 (?: 677 <(\S*)> # src url = $3 678 | 679 ('.$this->nested_url_parenthesis_re.') # src url = $4 680 ) 681 [ \n]* 682 ( # $5 683 ([\'"]) # quote char = $6 684 (.*?) # title = $7 685 \6 # matching quote 686 [ \n]* 687 )? # title is optional 688 \) 689 ) 690 }xs', 691 array($this, '_doImages_inline_callback'), $text); 692 693 return $text; 694 } 695 protected function _doImages_reference_callback($matches) { 696 $whole_match = $matches[1]; 697 $alt_text = $matches[2]; 698 $link_id = strtolower($matches[3]); 699 700 if ($link_id == "") { 701 $link_id = strtolower($alt_text); # for shortcut links like ![this][]. 702 } 703 704 $alt_text = $this->encodeAttribute($alt_text); 705 if (isset($this->urls[$link_id])) { 706 $url = $this->encodeAttribute($this->urls[$link_id]); 707 $result = "<img src=\"$url\" alt=\"$alt_text\""; 708 if (isset($this->titles[$link_id])) { 709 $title = $this->titles[$link_id]; 710 $title = $this->encodeAttribute($title); 711 $result .= " title=\"$title\""; 712 } 713 $result .= $this->empty_element_suffix; 714 $result = $this->hashPart($result); 715 } 716 else { 717 # If there's no such link ID, leave intact: 718 $result = $whole_match; 719 } 720 721 return $result; 722 } 723 protected function _doImages_inline_callback($matches) { 724 $whole_match = $matches[1]; 725 $alt_text = $matches[2]; 726 $url = $matches[3] == '' ? $matches[4] : $matches[3]; 727 $title =& $matches[7]; 728 729 $alt_text = $this->encodeAttribute($alt_text); 730 $url = $this->encodeAttribute($url); 731 $result = "<img src=\"$url\" alt=\"$alt_text\""; 732 if (isset($title)) { 733 $title = $this->encodeAttribute($title); 734 $result .= " title=\"$title\""; # $title already quoted 735 } 736 $result .= $this->empty_element_suffix; 737 738 return $this->hashPart($result); 739 } 740 741 742 protected function doHeaders($text) { 743 # Setext-style headers: 744 # Header 1 745 # ======== 746 # 747 # Header 2 748 # -------- 749 # 750 $text = preg_replace_callback('{ ^(.+?)[ ]*\n(=+|-+)[ ]*\n+ }mx', 751 array($this, '_doHeaders_callback_setext'), $text); 752 753 # atx-style headers: 754 # # Header 1 755 # ## Header 2 756 # ## Header 2 with closing hashes ## 757 # ... 758 # ###### Header 6 759 # 760 $text = preg_replace_callback('{ 761 ^(\#{1,6}) # $1 = string of #\'s 762 [ ]* 763 (.+?) # $2 = Header text 764 [ ]* 765 \#* # optional closing #\'s (not counted) 766 \n+ 767 }xm', 768 array($this, '_doHeaders_callback_atx'), $text); 769 770 return $text; 771 } 772 protected function _doHeaders_callback_setext($matches) { 773 # Terrible hack to check we haven't found an empty list item. 774 if ($matches[2] == '-' && preg_match('{^-(?: |$)}', $matches[1])) 775 return $matches[0]; 776 777 $level = $matches[2]{0} == '=' ? 1 : 2; 778 $block = "<h$level>".$this->runSpanGamut($matches[1])."</h$level>"; 779 return "\n" . $this->hashBlock($block) . "\n\n"; 780 } 781 protected function _doHeaders_callback_atx($matches) { 782 $level = strlen($matches[1]); 783 $block = "<h$level>".$this->runSpanGamut($matches[2])."</h$level>"; 784 return "\n" . $this->hashBlock($block) . "\n\n"; 785 } 786 787 788 protected function doLists($text) { 789 # 790 # Form HTML ordered (numbered) and unordered (bulleted) lists. 791 # 792 $less_than_tab = $this->tab_width - 1; 793 794 # Re-usable patterns to match list item bullets and number markers: 795 $marker_ul_re = '[*+-]'; 796 $marker_ol_re = '\d+[\.]'; 797 798 $markers_relist = array( 799 $marker_ul_re => $marker_ol_re, 800 $marker_ol_re => $marker_ul_re, 801 ); 802 803 foreach ($markers_relist as $marker_re => $other_marker_re) { 804 # Re-usable pattern to match any entirel ul or ol list: 805 $whole_list_re = ' 806 ( # $1 = whole list 807 ( # $2 808 ([ ]{0,'.$less_than_tab.'}) # $3 = number of spaces 809 ('.$marker_re.') # $4 = first list item marker 810 [ ]+ 811 ) 812 (?s:.+?) 813 ( # $5 814 \z 815 | 816 \n{2,} 817 (?=\S) 818 (?! # Negative lookahead for another list item marker 819 [ ]* 820 '.$marker_re.'[ ]+ 821 ) 822 | 823 (?= # Lookahead for another kind of list 824 \n 825 \3 # Must have the same indentation 826 '.$other_marker_re.'[ ]+ 827 ) 828 ) 829 ) 830 '; // mx 831 832 # We use a different prefix before nested lists than top-level lists. 833 # See extended comment in _ProcessListItems(). 834 835 if ($this->list_level) { 836 $text = preg_replace_callback('{ 837 ^ 838 '.$whole_list_re.' 839 }mx', 840 array($this, '_doLists_callback'), $text); 841 } 842 else { 843 $text = preg_replace_callback('{ 844 (?:(?<=\n)\n|\A\n?) # Must eat the newline 845 '.$whole_list_re.' 846 }mx', 847 array($this, '_doLists_callback'), $text); 848 } 849 } 850 851 return $text; 852 } 853 protected function _doLists_callback($matches) { 854 # Re-usable patterns to match list item bullets and number markers: 855 $marker_ul_re = '[*+-]'; 856 $marker_ol_re = '\d+[\.]'; 857 $marker_any_re = "(?:$marker_ul_re|$marker_ol_re)"; 858 859 $list = $matches[1]; 860 $list_type = preg_match("/$marker_ul_re/", $matches[4]) ? "ul" : "ol"; 861 862 $marker_any_re = ( $list_type == "ul" ? $marker_ul_re : $marker_ol_re ); 863 864 $list .= "\n"; 865 $result = $this->processListItems($list, $marker_any_re); 866 867 $result = $this->hashBlock("<$list_type>\n" . $result . "</$list_type>"); 868 return "\n". $result ."\n\n"; 869 } 870 871 protected $list_level = 0; 872 873 protected function processListItems($list_str, $marker_any_re) { 874 # 875 # Process the contents of a single ordered or unordered list, splitting it 876 # into individual list items. 877 # 878 # The $this->list_level global keeps track of when we're inside a list. 879 # Each time we enter a list, we increment it; when we leave a list, 880 # we decrement. If it's zero, we're not in a list anymore. 881 # 882 # We do this because when we're not inside a list, we want to treat 883 # something like this: 884 # 885 # I recommend upgrading to version 886 # 8. Oops, now this line is treated 887 # as a sub-list. 888 # 889 # As a single paragraph, despite the fact that the second line starts 890 # with a digit-period-space sequence. 891 # 892 # Whereas when we're inside a list (or sub-list), that line will be 893 # treated as the start of a sub-list. What a kludge, huh? This is 894 # an aspect of Markdown's syntax that's hard to parse perfectly 895 # without resorting to mind-reading. Perhaps the solution is to 896 # change the syntax rules such that sub-lists must start with a 897 # starting cardinal number; e.g. "1." or "a.". 898 899 $this->list_level++; 900 901 # trim trailing blank lines: 902 $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str); 903 904 $list_str = preg_replace_callback('{ 905 (\n)? # leading line = $1 906 (^[ ]*) # leading whitespace = $2 907 ('.$marker_any_re.' # list marker and space = $3 908 (?:[ ]+|(?=\n)) # space only required if item is not empty 909 ) 910 ((?s:.*?)) # list item text = $4 911 (?:(\n+(?=\n))|\n) # tailing blank line = $5 912 (?= \n* (\z | \2 ('.$marker_any_re.') (?:[ ]+|(?=\n)))) 913 }xm', 914 array($this, '_processListItems_callback'), $list_str); 915 916 $this->list_level--; 917 return $list_str; 918 } 919 protected function _processListItems_callback($matches) { 920 $item = $matches[4]; 921 $leading_line =& $matches[1]; 922 $leading_space =& $matches[2]; 923 $marker_space = $matches[3]; 924 $tailing_blank_line =& $matches[5]; 925 926 if ($leading_line || $tailing_blank_line || 927 preg_match('/\n{2,}/', $item)) 928 { 929 # Replace marker with the appropriate whitespace indentation 930 $item = $leading_space . str_repeat(' ', strlen($marker_space)) . $item; 931 $item = $this->runBlockGamut($this->outdent($item)."\n"); 932 } 933 else { 934 # Recursion for sub-lists: 935 $item = $this->doLists($this->outdent($item)); 936 $item = preg_replace('/\n+$/', '', $item); 937 $item = $this->runSpanGamut($item); 938 } 939 940 return "<li>" . $item . "</li>\n"; 941 } 942 943 944 protected function doCodeBlocks($text) { 945 # 946 # Process Markdown `<pre><code>` blocks. 947 # 948 $text = preg_replace_callback('{ 949 (?:\n\n|\A\n?) 950 ( # $1 = the code block -- one or more lines, starting with a space/tab 951 (?> 952 [ ]{'.$this->tab_width.'} # Lines must start with a tab or a tab-width of spaces 953 .*\n+ 954 )+ 955 ) 956 ((?=^[ ]{0,'.$this->tab_width.'}\S)|\Z) # Lookahead for non-space at line-start, or end of doc 957 }xm', 958 array($this, '_doCodeBlocks_callback'), $text); 959 960 return $text; 961 } 962 protected function _doCodeBlocks_callback($matches) { 963 $codeblock = $matches[1]; 964 965 $codeblock = $this->outdent($codeblock); 966 $codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES); 967 968 # trim leading newlines and trailing newlines 969 $codeblock = preg_replace('/\A\n+|\n+\z/', '', $codeblock); 970 971 $codeblock = "<pre><code>$codeblock\n</code></pre>"; 972 return "\n\n".$this->hashBlock($codeblock)."\n\n"; 973 } 974 975 976 protected function makeCodeSpan($code) { 977 # 978 # Create a code span markup for $code. Called from handleSpanToken. 979 # 980 $code = htmlspecialchars(trim($code), ENT_NOQUOTES); 981 return $this->hashPart("<code>$code</code>"); 982 } 983 984 985 protected $em_relist = array( 986 '' => '(?:(?<!\*)\*(?!\*)|(?<!_)_(?!_))(?![\.,:;]?\s)', 987 '*' => '(?<![\s*])\*(?!\*)', 988 '_' => '(?<![\s_])_(?!_)', 989 ); 990 protected $strong_relist = array( 991 '' => '(?:(?<!\*)\*\*(?!\*)|(?<!_)__(?!_))(?![\.,:;]?\s)', 992 '**' => '(?<![\s*])\*\*(?!\*)', 993 '__' => '(?<![\s_])__(?!_)', 994 ); 995 protected $em_strong_relist = array( 996 '' => '(?:(?<!\*)\*\*\*(?!\*)|(?<!_)___(?!_))(?![\.,:;]?\s)', 997 '***' => '(?<![\s*])\*\*\*(?!\*)', 998 '___' => '(?<![\s_])___(?!_)', 999 ); 1000 protected $em_strong_prepared_relist; 1001 1002 protected function prepareItalicsAndBold() { 1003 # 1004 # Prepare regular expressions for searching emphasis tokens in any 1005 # context. 1006 # 1007 foreach ($this->em_relist as $em => $em_re) { 1008 foreach ($this->strong_relist as $strong => $strong_re) { 1009 # Construct list of allowed token expressions. 1010 $token_relist = array(); 1011 if (isset($this->em_strong_relist["$em$strong"])) { 1012 $token_relist[] = $this->em_strong_relist["$em$strong"]; 1013 } 1014 $token_relist[] = $em_re; 1015 $token_relist[] = $strong_re; 1016 1017 # Construct master expression from list. 1018 $token_re = '{('. implode('|', $token_relist) .')}'; 1019 $this->em_strong_prepared_relist["$em$strong"] = $token_re; 1020 } 1021 } 1022 } 1023 1024 protected function doItalicsAndBold($text) { 1025 $token_stack = array(''); 1026 $text_stack = array(''); 1027 $em = ''; 1028 $strong = ''; 1029 $tree_char_em = false; 1030 1031 while (1) { 1032 # 1033 # Get prepared regular expression for seraching emphasis tokens 1034 # in current context. 1035 # 1036 $token_re = $this->em_strong_prepared_relist["$em$strong"]; 1037 1038 # 1039 # Each loop iteration search for the next emphasis token. 1040 # Each token is then passed to handleSpanToken. 1041 # 1042 $parts = preg_split($token_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE); 1043 $text_stack[0] .= $parts[0]; 1044 $token =& $parts[1]; 1045 $text =& $parts[2]; 1046 1047 if (empty($token)) { 1048 # Reached end of text span: empty stack without emitting. 1049 # any more emphasis. 1050 while ($token_stack[0]) { 1051 $text_stack[1] .= array_shift($token_stack); 1052 $text_stack[0] .= array_shift($text_stack); 1053 } 1054 break; 1055 } 1056 1057 $token_len = strlen($token); 1058 if ($tree_char_em) { 1059 # Reached closing marker while inside a three-char emphasis. 1060 if ($token_len == 3) { 1061 # Three-char closing marker, close em and strong. 1062 array_shift($token_stack); 1063 $span = array_shift($text_stack); 1064 $span = $this->runSpanGamut($span); 1065 $span = "<strong><em>$span</em></strong>"; 1066 $text_stack[0] .= $this->hashPart($span); 1067 $em = ''; 1068 $strong = ''; 1069 } else { 1070 # Other closing marker: close one em or strong and 1071 # change current token state to match the other 1072 $token_stack[0] = str_repeat($token{0}, 3-$token_len); 1073 $tag = $token_len == 2 ? "strong" : "em"; 1074 $span = $text_stack[0]; 1075 $span = $this->runSpanGamut($span); 1076 $span = "<$tag>$span</$tag>"; 1077 $text_stack[0] = $this->hashPart($span); 1078 $$tag = ''; # $$tag stands for $em or $strong 1079 } 1080 $tree_char_em = false; 1081 } else if ($token_len == 3) { 1082 if ($em) { 1083 # Reached closing marker for both em and strong. 1084 # Closing strong marker: 1085 for ($i = 0; $i < 2; ++$i) { 1086 $shifted_token = array_shift($token_stack); 1087 $tag = strlen($shifted_token) == 2 ? "strong" : "em"; 1088 $span = array_shift($text_stack); 1089 $span = $this->runSpanGamut($span); 1090 $span = "<$tag>$span</$tag>"; 1091 $text_stack[0] .= $this->hashPart($span); 1092 $$tag = ''; # $$tag stands for $em or $strong 1093 } 1094 } else { 1095 # Reached opening three-char emphasis marker. Push on token 1096 # stack; will be handled by the special condition above. 1097 $em = $token{0}; 1098 $strong = "$em$em"; 1099 array_unshift($token_stack, $token); 1100 array_unshift($text_stack, ''); 1101 $tree_char_em = true; 1102 } 1103 } else if ($token_len == 2) { 1104 if ($strong) { 1105 # Unwind any dangling emphasis marker: 1106 if (strlen($token_stack[0]) == 1) { 1107 $text_stack[1] .= array_shift($token_stack); 1108 $text_stack[0] .= array_shift($text_stack); 1109 } 1110 # Closing strong marker: 1111 array_shift($token_stack); 1112 $span = array_shift($text_stack); 1113 $span = $this->runSpanGamut($span); 1114 $span = "<strong>$span</strong>"; 1115 $text_stack[0] .= $this->hashPart($span); 1116 $strong = ''; 1117 } else { 1118 array_unshift($token_stack, $token); 1119 array_unshift($text_stack, ''); 1120 $strong = $token; 1121 } 1122 } else { 1123 # Here $token_len == 1 1124 if ($em) { 1125 if (strlen($token_stack[0]) == 1) { 1126 # Closing emphasis marker: 1127 array_shift($token_stack); 1128 $span = array_shift($text_stack); 1129 $span = $this->runSpanGamut($span); 1130 $span = "<em>$span</em>"; 1131 $text_stack[0] .= $this->hashPart($span); 1132 $em = ''; 1133 } else { 1134 $text_stack[0] .= $token; 1135 } 1136 } else { 1137 array_unshift($token_stack, $token); 1138 array_unshift($text_stack, ''); 1139 $em = $token; 1140 } 1141 } 1142 } 1143 return $text_stack[0]; 1144 } 1145 1146 1147 protected function doBlockQuotes($text) { 1148 $text = preg_replace_callback('/ 1149 ( # Wrap whole match in $1 1150 (?> 1151 ^[ ]*>[ ]? # ">" at the start of a line 1152 .+\n # rest of the first line 1153 (.+\n)* # subsequent consecutive lines 1154 \n* # blanks 1155 )+ 1156 ) 1157 /xm', 1158 array($this, '_doBlockQuotes_callback'), $text); 1159 1160 return $text; 1161 } 1162 protected function _doBlockQuotes_callback($matches) { 1163 $bq = $matches[1]; 1164 # trim one level of quoting - trim whitespace-only lines 1165 $bq = preg_replace('/^[ ]*>[ ]?|^[ ]+$/m', '', $bq); 1166 $bq = $this->runBlockGamut($bq); # recurse 1167 1168 $bq = preg_replace('/^/m', " ", $bq); 1169 # These leading spaces cause problem with <pre> content, 1170 # so we need to fix that: 1171 $bq = preg_replace_callback('{(\s*<pre>.+?</pre>)}sx', 1172 array($this, '_doBlockQuotes_callback2'), $bq); 1173 1174 return "\n". $this->hashBlock("<blockquote>\n$bq\n</blockquote>")."\n\n"; 1175 } 1176 protected function _doBlockQuotes_callback2($matches) { 1177 $pre = $matches[1]; 1178 $pre = preg_replace('/^ /m', '', $pre); 1179 return $pre; 1180 } 1181 1182 1183 protected function formParagraphs($text) { 1184 # 1185 # Params: 1186 # $text - string to process with html <p> tags 1187 # 1188 # Strip leading and trailing lines: 1189 $text = preg_replace('/\A\n+|\n+\z/', '', $text); 1190 1191 $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY); 1192 1193 # 1194 # Wrap <p> tags and unhashify HTML blocks 1195 # 1196 foreach ($grafs as $key => $value) { 1197 if (!preg_match('/^B\x1A[0-9]+B$/', $value)) { 1198 # Is a paragraph. 1199 $value = $this->runSpanGamut($value); 1200 $value = preg_replace('/^([ ]*)/', "<p>", $value); 1201 $value .= "</p>"; 1202 $grafs[$key] = $this->unhash($value); 1203 } 1204 else { 1205 # Is a block. 1206 # Modify elements of @grafs in-place... 1207 $graf = $value; 1208 $block = $this->html_hashes[$graf]; 1209 $graf = $block; 1210 // if (preg_match('{ 1211 // \A 1212 // ( # $1 = <div> tag 1213 // <div \s+ 1214 // [^>]* 1215 // \b 1216 // markdown\s*=\s* ([\'"]) # $2 = attr quote char 1217 // 1 1218 // \2 1219 // [^>]* 1220 // > 1221 // ) 1222 // ( # $3 = contents 1223 // .* 1224 // ) 1225 // (</div>) # $4 = closing tag 1226 // \z 1227 // }xs', $block, $matches)) 1228 // { 1229 // list(, $div_open, , $div_content, $div_close) = $matches; 1230 // 1231 // # We can't call Markdown(), because that resets the hash; 1232 // # that initialization code should be pulled into its own sub, though. 1233 // $div_content = $this->hashHTMLBlocks($div_content); 1234 // 1235 // # Run document gamut methods on the content. 1236 // foreach ($this->document_gamut as $method => $priority) { 1237 // $div_content = $this->$method($div_content); 1238 // } 1239 // 1240 // $div_open = preg_replace( 1241 // '{\smarkdown\s*=\s*([\'"]).+?\1}', '', $div_open); 1242 // 1243 // $graf = $div_open . "\n" . $div_content . "\n" . $div_close; 1244 // } 1245 $grafs[$key] = $graf; 1246 } 1247 } 1248 1249 return implode("\n\n", $grafs); 1250 } 1251 1252 1253 protected function encodeAttribute($text) { 1254 # 1255 # Encode text for a double-quoted HTML attribute. This function 1256 # is *not* suitable for attributes enclosed in single quotes. 1257 # 1258 $text = $this->encodeAmpsAndAngles($text); 1259 $text = str_replace('"', '"', $text); 1260 return $text; 1261 } 1262 1263 1264 protected function encodeAmpsAndAngles($text) { 1265 # 1266 # Smart processing for ampersands and angle brackets that need to 1267 # be encoded. Valid character entities are left alone unless the 1268 # no-entities mode is set. 1269 # 1270 if ($this->no_entities) { 1271 $text = str_replace('&', '&', $text); 1272 } else { 1273 # Ampersand-encoding based entirely on Nat Irons's Amputator 1274 # MT plugin: <http://bumppo.net/projects/amputator/> 1275 $text = preg_replace('/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/', 1276 '&', $text); 1277 } 1278 # Encode remaining <'s 1279 $text = str_replace('<', '<', $text); 1280 1281 return $text; 1282 } 1283 1284 1285 protected function doAutoLinks($text) { 1286 $text = preg_replace_callback('{<((https?|ftp|dict):[^\'">\s]+)>}i', 1287 array($this, '_doAutoLinks_url_callback'), $text); 1288 1289 # Email addresses: <[email protected]> 1290 $text = preg_replace_callback('{ 1291 < 1292 (?:mailto:)? 1293 ( 1294 (?: 1295 [-!#$%&\'*+/=?^_`.{|}~\w\x80-\xFF]+ 1296 | 1297 ".*?" 1298 ) 1299 \@ 1300 (?: 1301 [-a-z0-9\x80-\xFF]+(\.[-a-z0-9\x80-\xFF]+)*\.[a-z]+ 1302 | 1303 \[[\d.a-fA-F:]+\] # IPv4 & IPv6 1304 ) 1305 ) 1306 > 1307 }xi', 1308 array($this, '_doAutoLinks_email_callback'), $text); 1309 $text = preg_replace_callback('{<(tel:([^\'">\s]+))>}i',array($this, '_doAutoLinks_tel_callback'), $text); 1310 1311 return $text; 1312 } 1313 protected function _doAutoLinks_tel_callback($matches) { 1314 $url = $this->encodeAttribute($matches[1]); 1315 $tel = $this->encodeAttribute($matches[2]); 1316 $link = "<a href=\"$url\">$tel</a>"; 1317 return $this->hashPart($link); 1318 } 1319 protected function _doAutoLinks_url_callback($matches) { 1320 $url = $this->encodeAttribute($matches[1]); 1321 $link = "<a href=\"$url\">$url</a>"; 1322 return $this->hashPart($link); 1323 } 1324 protected function _doAutoLinks_email_callback($matches) { 1325 $address = $matches[1]; 1326 $link = $this->encodeEmailAddress($address); 1327 return $this->hashPart($link); 1328 } 1329 1330 1331 protected function encodeEmailAddress($addr) { 1332 # 1333 # Input: an email address, e.g. "[email protected]" 1334 # 1335 # Output: the email address as a mailto link, with each character 1336 # of the address encoded as either a decimal or hex entity, in 1337 # the hopes of foiling most address harvesting spam bots. E.g.: 1338 # 1339 # <p><a href="mailto:foo 1340 # @example.co 1341 # m">foo@exampl 1342 # e.com</a></p> 1343 # 1344 # Based by a filter by Matthew Wickline, posted to BBEdit-Talk. 1345 # With some optimizations by Milian Wolff. 1346 # 1347 $addr = "mailto:" . $addr; 1348 $chars = preg_split('/(?<!^)(?!$)/', $addr); 1349 $seed = (int)abs(crc32($addr) / strlen($addr)); # Deterministic seed. 1350 1351 foreach ($chars as $key => $char) { 1352 $ord = ord($char); 1353 # Ignore non-ascii chars. 1354 if ($ord < 128) { 1355 $r = ($seed * (1 + $key)) % 100; # Pseudo-random function. 1356 # roughly 10% raw, 45% hex, 45% dec 1357 # '@' *must* be encoded. I insist. 1358 # '"' has to be encoded inside the attribute 1359 if ($r > 90 && $char != '@' && $char != '"') /* do nothing */; 1360 else if ($r < 45) $chars[$key] = '&#x'.dechex($ord).';'; 1361 else $chars[$key] = '&#'.$ord.';'; 1362 } 1363 } 1364 1365 $addr = implode('', $chars); 1366 $text = implode('', array_slice($chars, 7)); # text without `mailto:` 1367 $addr = "<a href=\"$addr\">$text</a>"; 1368 1369 return $addr; 1370 } 1371 1372 1373 protected function parseSpan($str) { 1374 # 1375 # Take the string $str and parse it into tokens, hashing embeded HTML, 1376 # escaped characters and handling code spans. 1377 # 1378 $output = ''; 1379 1380 $span_re = '{ 1381 ( 1382 \\\\'.$this->escape_chars_re.' 1383 | 1384 (?<![`\\\\]) 1385 `+ # code span marker 1386 '.( $this->no_markup ? '' : ' 1387 | 1388 <!-- .*? --> # comment 1389 | 1390 <\?.*?\?> | <%.*?%> # processing instruction 1391 | 1392 <[!$]?[-a-zA-Z0-9:_]+ # regular tags 1393 (?> 1394 \s 1395 (?>[^"\'>]+|"[^"]*"|\'[^\']*\')* 1396 )? 1397 > 1398 | 1399 <[-a-zA-Z0-9:_]+\s*/> # xml-style empty tag 1400 | 1401 </[-a-zA-Z0-9:_]+\s*> # closing tag 1402 ').' 1403 ) 1404 }xs'; 1405 1406 while (1) { 1407 # 1408 # Each loop iteration seach for either the next tag, the next 1409 # openning code span marker, or the next escaped character. 1410 # Each token is then passed to handleSpanToken. 1411 # 1412 $parts = preg_split($span_re, $str, 2, PREG_SPLIT_DELIM_CAPTURE); 1413 1414 # Create token from text preceding tag. 1415 if ($parts[0] != "") { 1416 $output .= $parts[0]; 1417 } 1418 1419 # Check if we reach the end. 1420 if (isset($parts[1])) { 1421 $output .= $this->handleSpanToken($parts[1], $parts[2]); 1422 $str = $parts[2]; 1423 } 1424 else { 1425 break; 1426 } 1427 } 1428 1429 return $output; 1430 } 1431 1432 1433 protected function handleSpanToken($token, &$str) { 1434 # 1435 # Handle $token provided by parseSpan by determining its nature and 1436 # returning the corresponding value that should replace it. 1437 # 1438 switch ($token{0}) { 1439 case "\\": 1440 return $this->hashPart("&#". ord($token{1}). ";"); 1441 case "`": 1442 # Search for end marker in remaining text. 1443 if (preg_match('/^(.*?[^`])'.preg_quote($token).'(?!`)(.*)$/sm', 1444 $str, $matches)) 1445 { 1446 $str = $matches[2]; 1447 $codespan = $this->makeCodeSpan($matches[1]); 1448 return $this->hashPart($codespan); 1449 } 1450 return $token; // return as text since no ending marker found. 1451 default: 1452 return $this->hashPart($token); 1453 } 1454 } 1455 1456 1457 protected function outdent($text) { 1458 # 1459 # Remove one level of line-leading tabs or spaces 1460 # 1461 return preg_replace('/^(\t|[ ]{1,'.$this->tab_width.'})/m', '', $text); 1462 } 1463 1464 1465 # String length function for detab. `_initDetab` will create a function to 1466 # hanlde UTF-8 if the default function does not exist. 1467 protected $utf8_strlen = 'mb_strlen'; 1468 1469 protected function detab($text) { 1470 # 1471 # Replace tabs with the appropriate amount of space. 1472 # 1473 # For each line we separate the line in blocks delemited by 1474 # tab characters. Then we reconstruct every line by adding the 1475 # appropriate number of space between each blocks. 1476 1477 $text = preg_replace_callback('/^.*\t.*$/m', 1478 array($this, '_detab_callback'), $text); 1479 1480 return $text; 1481 } 1482 protected function _detab_callback($matches) { 1483 $line = $matches[0]; 1484 $strlen = $this->utf8_strlen; # strlen function for UTF-8. 1485 1486 # Split in blocks. 1487 $blocks = explode("\t", $line); 1488 # Add each blocks to the line. 1489 $line = $blocks[0]; 1490 unset($blocks[0]); # Do not add first block twice. 1491 foreach ($blocks as $block) { 1492 # Calculate amount of space, insert spaces, insert block. 1493 $amount = $this->tab_width - 1494 $strlen($line, 'UTF-8') % $this->tab_width; 1495 $line .= str_repeat(" ", $amount) . $block; 1496 } 1497 return $line; 1498 } 1499 protected function _initDetab() { 1500 # 1501 # Check for the availability of the function in the `utf8_strlen` property 1502 # (initially `mb_strlen`). If the function is not available, create a 1503 # function that will loosely count the number of UTF-8 characters with a 1504 # regular expression. 1505 # 1506 if (function_exists($this->utf8_strlen)) return; 1507 $this->utf8_strlen = create_function('$text', 'return preg_match_all( 1508 "/[\\\\x00-\\\\xBF]|[\\\\xC0-\\\\xFF][\\\\x80-\\\\xBF]*/", 1509 $text, $m);'); 1510 } 1511 1512 1513 protected function unhash($text) { 1514 # 1515 # Swap back in all the tags hashed by _HashHTMLBlocks. 1516 # 1517 return preg_replace_callback('/(.)\x1A[0-9]+\1/', 1518 array($this, '_unhash_callback'), $text); 1519 } 1520 protected function _unhash_callback($matches) { 1521 return $this->html_hashes[$matches[0]]; 1522 } 1523 1524 } 1525 1526 1527 # 1528 # Temporary Markdown Extra Parser Implementation Class 1529 # 1530 # NOTE: DON'T USE THIS CLASS 1531 # Currently the implementation of of Extra resides here in this temporary class. 1532 # This makes it easier to propagate the changes between the three different 1533 # packaging styles of PHP Markdown. When this issue is resolved, this 1534 # MarkdownExtra_TmpImpl class here will disappear and \Michelf\MarkdownExtra 1535 # will contain the code. So please use \Michelf\MarkdownExtra and ignore this 1536 # one. 1537 # 1538 1539 abstract class _MarkdownExtra_TmpImpl extends \Michelf\Markdown { 1540 1541 ### Configuration Variables ### 1542 1543 # Prefix for footnote ids. 1544 public $fn_id_prefix = ""; 1545 1546 # Optional title attribute for footnote links and backlinks. 1547 public $fn_link_title = ""; 1548 public $fn_backlink_title = ""; 1549 1550 # Optional class attribute for footnote links and backlinks. 1551 public $fn_link_class = "footnote-ref"; 1552 public $fn_backlink_class = "footnote-backref"; 1553 1554 # Class name for table cell alignment (%% replaced left/center/right) 1555 # For instance: 'go-%%' becomes 'go-left' or 'go-right' or 'go-center' 1556 # If empty, the align attribute is used instead of a class name. 1557 public $table_align_class_tmpl = ''; 1558 1559 # Optional class prefix for fenced code block. 1560 public $code_class_prefix = ""; 1561 # Class attribute for code blocks goes on the `code` tag; 1562 # setting this to true will put attributes on the `pre` tag instead. 1563 public $code_attr_on_pre = false; 1564 1565 # Predefined abbreviations. 1566 public $predef_abbr = array(); 1567 1568 1569 ### Parser Implementation ### 1570 1571 public function __construct() { 1572 # 1573 # Constructor function. Initialize the parser object. 1574 # 1575 # Add extra escapable characters before parent constructor 1576 # initialize the table. 1577 $this->escape_chars .= ':|'; 1578 1579 # Insert extra document, block, and span transformations. 1580 # Parent constructor will do the sorting. 1581 $this->document_gamut += array( 1582 "doFencedCodeBlocks" => 5, 1583 "stripFootnotes" => 15, 1584 "stripAbbreviations" => 25, 1585 "appendFootnotes" => 50, 1586 ); 1587 $this->block_gamut += array( 1588 "doFencedCodeBlocks" => 5, 1589 "doTables" => 15, 1590 "doDefLists" => 45, 1591 ); 1592 $this->span_gamut += array( 1593 "doFootnotes" => 5, 1594 "doAbbreviations" => 70, 1595 ); 1596 1597 parent::__construct(); 1598 } 1599 1600 1601 # Extra variables used during extra transformations. 1602 protected $footnotes = array(); 1603 protected $footnotes_ordered = array(); 1604 protected $footnotes_ref_count = array(); 1605 protected $footnotes_numbers = array(); 1606 protected $abbr_desciptions = array(); 1607 protected $abbr_word_re = ''; 1608 1609 # Give the current footnote number. 1610 protected $footnote_counter = 1; 1611 1612 1613 protected function setup() { 1614 # 1615 # Setting up Extra-specific variables. 1616 # 1617 parent::setup(); 1618 1619 $this->footnotes = array(); 1620 $this->footnotes_ordered = array(); 1621 $this->footnotes_ref_count = array(); 1622 $this->footnotes_numbers = array(); 1623 $this->abbr_desciptions = array(); 1624 $this->abbr_word_re = ''; 1625 $this->footnote_counter = 1; 1626 1627 foreach ($this->predef_abbr as $abbr_word => $abbr_desc) { 1628 if ($this->abbr_word_re) 1629 $this->abbr_word_re .= '|'; 1630 $this->abbr_word_re .= preg_quote($abbr_word); 1631 $this->abbr_desciptions[$abbr_word] = trim($abbr_desc); 1632 } 1633 } 1634 1635 protected function teardown() { 1636 # 1637 # Clearing Extra-specific variables. 1638 # 1639 $this->footnotes = array(); 1640 $this->footnotes_ordered = array(); 1641 $this->footnotes_ref_count = array(); 1642 $this->footnotes_numbers = array(); 1643 $this->abbr_desciptions = array(); 1644 $this->abbr_word_re = ''; 1645 1646 parent::teardown(); 1647 } 1648 1649 1650 ### Extra Attribute Parser ### 1651 1652 # Expression to use to catch attributes (includes the braces) 1653 protected $id_class_attr_catch_re = '\{((?:[ ]*[#.][-_:a-zA-Z0-9]+){1,})[ ]*\}'; 1654 # Expression to use when parsing in a context when no capture is desired 1655 protected $id_class_attr_nocatch_re = '\{(?:[ ]*[#.][-_:a-zA-Z0-9]+){1,}[ ]*\}'; 1656 1657 protected function doExtraAttributes($tag_name, $attr) { 1658 # 1659 # Parse attributes caught by the $this->id_class_attr_catch_re expression 1660 # and return the HTML-formatted list of attributes. 1661 # 1662 # Currently supported attributes are .class and #id. 1663 # 1664 if (empty($attr)) return ""; 1665 1666 # Split on components 1667 preg_match_all('/[#.][-_:a-zA-Z0-9]+/', $attr, $matches); 1668 $elements = $matches[0]; 1669 1670 # handle classes and ids (only first id taken into account) 1671 $classes = array(); 1672 $id = false; 1673 foreach ($elements as $element) { 1674 if ($element{0} == '.') { 1675 $classes[] = substr($element, 1); 1676 } else if ($element{0} == '#') { 1677 if ($id === false) $id = substr($element, 1); 1678 } 1679 } 1680 1681 # compose attributes as string 1682 $attr_str = ""; 1683 if (!empty($id)) { 1684 $attr_str .= ' id="'.$id.'"'; 1685 } 1686 if (!empty($classes)) { 1687 $attr_str .= ' class="'.implode(" ", $classes).'"'; 1688 } 1689 return $attr_str; 1690 } 1691 1692 1693 protected function stripLinkDefinitions($text) { 1694 # 1695 # Strips link definitions from text, stores the URLs and titles in 1696 # hash references. 1697 # 1698 $less_than_tab = $this->tab_width - 1; 1699 1700 # Link defs are in the form: ^[id]: url "optional title" 1701 $text = preg_replace_callback('{ 1702 ^[ ]{0,'.$less_than_tab.'}\[(.+)\][ ]?: # id = $1 1703 [ ]* 1704 \n? # maybe *one* newline 1705 [ ]* 1706 (?: 1707 <(.+?)> # url = $2 1708 | 1709 (\S+?) # url = $3 1710 ) 1711 [ ]* 1712 \n? # maybe one newline 1713 [ ]* 1714 (?: 1715 (?<=\s) # lookbehind for whitespace 1716 ["(] 1717 (.*?) # title = $4 1718 [")] 1719 [ ]* 1720 )? # title is optional 1721 (?:[ ]* '.$this->id_class_attr_catch_re.' )? # $5 = extra id & class attr 1722 (?:\n+|\Z) 1723 }xm', 1724 array($this, '_stripLinkDefinitions_callback'), 1725 $text); 1726 return $text; 1727 } 1728 protected function _stripLinkDefinitions_callback($matches) { 1729 $link_id = strtolower($matches[1]); 1730 $url = $matches[2] == '' ? $matches[3] : $matches[2]; 1731 $this->urls[$link_id] = $url; 1732 $this->titles[$link_id] =& $matches[4]; 1733 $this->ref_attr[$link_id] = $this->doExtraAttributes("", $dummy =& $matches[5]); 1734 return ''; # String that will replace the block 1735 } 1736 1737 1738 ### HTML Block Parser ### 1739 1740 # Tags that are always treated as block tags: 1741 protected $block_tags_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|form|fieldset|iframe|hr|legend|article|section|nav|aside|hgroup|header|footer|figcaption|figure'; 1742 1743 # Tags treated as block tags only if the opening tag is alone on its line: 1744 protected $context_block_tags_re = 'script|noscript|style|ins|del|iframe|object|source|track|param|math|svg|canvas|audio|video'; 1745 1746 # Tags where markdown="1" default to span mode: 1747 protected $contain_span_tags_re = 'p|h[1-6]|li|dd|dt|td|th|legend|address'; 1748 1749 # Tags which must not have their contents modified, no matter where 1750 # they appear: 1751 protected $clean_tags_re = 'script|style|math|svg'; 1752 1753 # Tags that do not need to be closed. 1754 protected $auto_close_tags_re = 'hr|img|param|source|track'; 1755 1756 1757 protected function hashHTMLBlocks($text) { 1758 # 1759 # Hashify HTML Blocks and "clean tags". 1760 # 1761 # We only want to do this for block-level HTML tags, such as headers, 1762 # lists, and tables. That's because we still want to wrap <p>s around 1763 # "paragraphs" that are wrapped in non-block-level tags, such as anchors, 1764 # phrase emphasis, and spans. The list of tags we're looking for is 1765 # hard-coded. 1766 # 1767 # This works by calling _HashHTMLBlocks_InMarkdown, which then calls 1768 # _HashHTMLBlocks_InHTML when it encounter block tags. When the markdown="1" 1769 # attribute is found within a tag, _HashHTMLBlocks_InHTML calls back 1770 # _HashHTMLBlocks_InMarkdown to handle the Markdown syntax within the tag. 1771 # These two functions are calling each other. It's recursive! 1772 # 1773 if ($this->no_markup) return $text; 1774 1775 # 1776 # Call the HTML-in-Markdown hasher. 1777 # 1778 list($text, ) = $this->_hashHTMLBlocks_inMarkdown($text); 1779 1780 return $text; 1781 } 1782 protected function _hashHTMLBlocks_inMarkdown($text, $indent = 0, 1783 $enclosing_tag_re = '', $span = false) 1784 { 1785 # 1786 # Parse markdown text, calling _HashHTMLBlocks_InHTML for block tags. 1787 # 1788 # * $indent is the number of space to be ignored when checking for code 1789 # blocks. This is important because if we don't take the indent into 1790 # account, something like this (which looks right) won't work as expected: 1791 # 1792 # <div> 1793 # <div markdown="1"> 1794 # Hello World. <-- Is this a Markdown code block or text? 1795 # </div> <-- Is this a Markdown code block or a real tag? 1796 # <div> 1797 # 1798 # If you don't like this, just don't indent the tag on which 1799 # you apply the markdown="1" attribute. 1800 # 1801 # * If $enclosing_tag_re is not empty, stops at the first unmatched closing 1802 # tag with that name. Nested tags supported. 1803 # 1804 # * If $span is true, text inside must treated as span. So any double 1805 # newline will be replaced by a single newline so that it does not create 1806 # paragraphs. 1807 # 1808 # Returns an array of that form: ( processed text , remaining text ) 1809 # 1810 if ($text === '') return array('', ''); 1811 1812 # Regex to check for the presense of newlines around a block tag. 1813 $newline_before_re = '/(?:^\n?|\n\n)*$/'; 1814 $newline_after_re = 1815 '{ 1816 ^ # Start of text following the tag. 1817 (?>[ ]*<!--.*?-->)? # Optional comment. 1818 [ ]*\n # Must be followed by newline. 1819 }xs'; 1820 1821 # Regex to match any tag. 1822 $block_tag_re = 1823 '{ 1824 ( # $2: Capture whole tag. 1825 </? # Any opening or closing tag. 1826 (?> # Tag name. 1827 '.$this->block_tags_re.' | 1828 '.$this->context_block_tags_re.' | 1829 '.$this->clean_tags_re.' | 1830 (?!\s)'.$enclosing_tag_re.' 1831 ) 1832 (?: 1833 (?=[\s"\'/a-zA-Z0-9]) # Allowed characters after tag name. 1834 (?> 1835 ".*?" | # Double quotes (can contain `>`) 1836 \'.*?\' | # Single quotes (can contain `>`) 1837 .+? # Anything but quotes and `>`. 1838 )*? 1839 )? 1840 > # End of tag. 1841 | 1842 <!-- .*? --> # HTML Comment 1843 | 1844 <\?.*?\?> | <%.*?%> # Processing instruction 1845 | 1846 <!\[CDATA\[.*?\]\]> # CData Block 1847 '. ( !$span ? ' # If not in span. 1848 | 1849 # Indented code block 1850 (?: ^[ ]*\n | ^ | \n[ ]*\n ) 1851 [ ]{'.($indent+4).'}[^\n]* \n 1852 (?> 1853 (?: [ ]{'.($indent+4).'}[^\n]* | [ ]* ) \n 1854 )* 1855 | 1856 # Fenced code block marker 1857 (?<= ^ | \n ) 1858 [ ]{0,'.($indent+3).'}(?:~{3,}|`{3,}) 1859 [ ]* 1860 (?: 1861 \.?[-_:a-zA-Z0-9]+ # standalone class name 1862 | 1863 '.$this->id_class_attr_nocatch_re.' # extra attributes 1864 )? 1865 [ ]* 1866 (?= \n ) 1867 ' : '' ). ' # End (if not is span). 1868 | 1869 # Code span marker 1870 # Note, this regex needs to go after backtick fenced 1871 # code blocks but it should also be kept outside of the 1872 # "if not in span" condition adding backticks to the parser 1873 `+ 1874 ) 1875 }xs'; 1876 1877 1878 $depth = 0; # Current depth inside the tag tree. 1879 $parsed = ""; # Parsed text that will be returned. 1880 1881 # 1882 # Loop through every tag until we find the closing tag of the parent 1883 # or loop until reaching the end of text if no parent tag specified. 1884 # 1885 do { 1886 # 1887 # Split the text using the first $tag_match pattern found. 1888 # Text before pattern will be first in the array, text after 1889 # pattern will be at the end, and between will be any catches made 1890 # by the pattern. 1891 # 1892 $parts = preg_split($block_tag_re, $text, 2, 1893 PREG_SPLIT_DELIM_CAPTURE); 1894 1895 # If in Markdown span mode, add a empty-string span-level hash 1896 # after each newline to prevent triggering any block element. 1897 if ($span) { 1898 $void = $this->hashPart("", ':'); 1899 $newline = "$void\n"; 1900 $parts[0] = $void . str_replace("\n", $newline, $parts[0]) . $void; 1901 } 1902 1903 $parsed .= $parts[0]; # Text before current tag. 1904 1905 # If end of $text has been reached. Stop loop. 1906 if (count($parts) < 3) { 1907 $text = ""; 1908 break; 1909 } 1910 1911 $tag = $parts[1]; # Tag to handle. 1912 $text = $parts[2]; # Remaining text after current tag. 1913 $tag_re = preg_quote($tag); # For use in a regular expression. 1914 1915 # 1916 # Check for: Fenced code block marker. 1917 # Note: need to recheck the whole tag to disambiguate backtick 1918 # fences from code spans 1919 # 1920 if (preg_match('{^\n?([ ]{0,'.($indent+3).'})(~{3,}|`{3,})[ ]*(?:\.?[-_:a-zA-Z0-9]+|'.$this->id_class_attr_nocatch_re.')?[ ]*\n?$}', $tag, $capture)) { 1921 # Fenced code block marker: find matching end marker. 1922 $fence_indent = strlen($capture[1]); # use captured indent in re 1923 $fence_re = $capture[2]; # use captured fence in re 1924 if (preg_match('{^(?>.*\n)*?[ ]{'.($fence_indent).'}'.$fence_re.'[ ]*(?:\n|$)}', $text, 1925 $matches)) 1926 { 1927 # End marker found: pass text unchanged until marker. 1928 $parsed .= $tag . $matches[0]; 1929 $text = substr($text, strlen($matches[0])); 1930 } 1931 else { 1932 # No end marker: just skip it. 1933 $parsed .= $tag; 1934 } 1935 } 1936 # 1937 # Check for: Indented code block. 1938 # 1939 else if ($tag{0} == "\n" || $tag{0} == " ") { 1940 # Indented code block: pass it unchanged, will be handled 1941 # later. 1942 $parsed .= $tag; 1943 } 1944 # 1945 # Check for: Code span marker 1946 # Note: need to check this after backtick fenced code blocks 1947 # 1948 else if ($tag{0} == "`") { 1949 # Find corresponding end marker. 1950 $tag_re = preg_quote($tag); 1951 if (preg_match('{^(?>.+?|\n(?!\n))*?(?<!`)'.$tag_re.'(?!`)}', 1952 $text, $matches)) 1953 { 1954 # End marker found: pass text unchanged until marker. 1955 $parsed .= $tag . $matches[0]; 1956 $text = substr($text, strlen($matches[0])); 1957 } 1958 else { 1959 # Unmatched marker: just skip it. 1960 $parsed .= $tag; 1961 } 1962 } 1963 # 1964 # Check for: Opening Block level tag or 1965 # Opening Context Block tag (like ins and del) 1966 # used as a block tag (tag is alone on it's line). 1967 # 1968 else if (preg_match('{^<(?:'.$this->block_tags_re.')\b}', $tag) || 1969 ( preg_match('{^<(?:'.$this->context_block_tags_re.')\b}', $tag) && 1970 preg_match($newline_before_re, $parsed) && 1971 preg_match($newline_after_re, $text) ) 1972 ) 1973 { 1974 # Need to parse tag and following text using the HTML parser. 1975 list($block_text, $text) = 1976 $this->_hashHTMLBlocks_inHTML($tag . $text, "hashBlock", true); 1977 1978 # Make sure it stays outside of any paragraph by adding newlines. 1979 $parsed .= "\n\n$block_text\n\n"; 1980 } 1981 # 1982 # Check for: Clean tag (like script, math) 1983 # HTML Comments, processing instructions. 1984 # 1985 else if (preg_match('{^<(?:'.$this->clean_tags_re.')\b}', $tag) || 1986 $tag{1} == '!' || $tag{1} == '?') 1987 { 1988 # Need to parse tag and following text using the HTML parser. 1989 # (don't check for markdown attribute) 1990 list($block_text, $text) = 1991 $this->_hashHTMLBlocks_inHTML($tag . $text, "hashClean", false); 1992 1993 $parsed .= $block_text; 1994 } 1995 # 1996 # Check for: Tag with same name as enclosing tag. 1997 # 1998 else if ($enclosing_tag_re !== '' && 1999 # Same name as enclosing tag. 2000 preg_match('{^</?(?:'.$enclosing_tag_re.')\b}', $tag)) 2001 { 2002 # 2003 # Increase/decrease nested tag count. 2004 # 2005 if ($tag{1} == '/') $depth--; 2006 else if ($tag{strlen($tag)-2} != '/') $depth++; 2007 2008 if ($depth < 0) { 2009 # 2010 # Going out of parent element. Clean up and break so we 2011 # return to the calling function. 2012 # 2013 $text = $tag . $text; 2014 break; 2015 } 2016 2017 $parsed .= $tag; 2018 } 2019 else { 2020 $parsed .= $tag; 2021 } 2022 } while ($depth >= 0); 2023 2024 return array($parsed, $text); 2025 } 2026 protected function _hashHTMLBlocks_inHTML($text, $hash_method, $md_attr) { 2027 # 2028 # Parse HTML, calling _HashHTMLBlocks_InMarkdown for block tags. 2029 # 2030 # * Calls $hash_method to convert any blocks. 2031 # * Stops when the first opening tag closes. 2032 # * $md_attr indicate if the use of the `markdown="1"` attribute is allowed. 2033 # (it is not inside clean tags) 2034 # 2035 # Returns an array of that form: ( processed text , remaining text ) 2036 # 2037 if ($text === '') return array('', ''); 2038 2039 # Regex to match `markdown` attribute inside of a tag. 2040 $markdown_attr_re = ' 2041 { 2042 \s* # Eat whitespace before the `markdown` attribute 2043 markdown 2044 \s*=\s* 2045 (?> 2046 (["\']) # $1: quote delimiter 2047 (.*?) # $2: attribute value 2048 \1 # matching delimiter 2049 | 2050 ([^\s>]*) # $3: unquoted attribute value 2051 ) 2052 () # $4: make $3 always defined (avoid warnings) 2053 }xs'; 2054 2055 # Regex to match any tag. 2056 $tag_re = '{ 2057 ( # $2: Capture whole tag. 2058 </? # Any opening or closing tag. 2059 [\w:$]+ # Tag name. 2060 (?: 2061 (?=[\s"\'/a-zA-Z0-9]) # Allowed characters after tag name. 2062 (?> 2063 ".*?" | # Double quotes (can contain `>`) 2064 \'.*?\' | # Single quotes (can contain `>`) 2065 .+? # Anything but quotes and `>`. 2066 )*? 2067 )? 2068 > # End of tag. 2069 | 2070 <!-- .*? --> # HTML Comment 2071 | 2072 <\?.*?\?> | <%.*?%> # Processing instruction 2073 | 2074 <!\[CDATA\[.*?\]\]> # CData Block 2075 ) 2076 }xs'; 2077 2078 $original_text = $text; # Save original text in case of faliure. 2079 2080 $depth = 0; # Current depth inside the tag tree. 2081 $block_text = ""; # Temporary text holder for current text. 2082 $parsed = ""; # Parsed text that will be returned. 2083 2084 # 2085 # Get the name of the starting tag. 2086 # (This pattern makes $base_tag_name_re safe without quoting.) 2087 # 2088 if (preg_match('/^<([\w:$]*)\b/', $text, $matches)) 2089 $base_tag_name_re = $matches[1]; 2090 2091 # 2092 # Loop through every tag until we find the corresponding closing tag. 2093 # 2094 do { 2095 # 2096 # Split the text using the first $tag_match pattern found. 2097 # Text before pattern will be first in the array, text after 2098 # pattern will be at the end, and between will be any catches made 2099 # by the pattern. 2100 # 2101 $parts = preg_split($tag_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE); 2102 2103 if (count($parts) < 3) { 2104 # 2105 # End of $text reached with unbalenced tag(s). 2106 # In that case, we return original text unchanged and pass the 2107 # first character as filtered to prevent an infinite loop in the 2108 # parent function. 2109 # 2110 return array($original_text{0}, substr($original_text, 1)); 2111 } 2112 2113 $block_text .= $parts[0]; # Text before current tag. 2114 $tag = $parts[1]; # Tag to handle. 2115 $text = $parts[2]; # Remaining text after current tag. 2116 2117 # 2118 # Check for: Auto-close tag (like <hr/>) 2119 # Comments and Processing Instructions. 2120 # 2121 if (preg_match('{^</?(?:'.$this->auto_close_tags_re.')\b}', $tag) || 2122 $tag{1} == '!' || $tag{1} == '?') 2123 { 2124 # Just add the tag to the block as if it was text. 2125 $block_text .= $tag; 2126 } 2127 else { 2128 # 2129 # Increase/decrease nested tag count. Only do so if 2130 # the tag's name match base tag's. 2131 # 2132 if (preg_match('{^</?'.$base_tag_name_re.'\b}', $tag)) { 2133 if ($tag{1} == '/') $depth--; 2134 else if ($tag{strlen($tag)-2} != '/') $depth++; 2135 } 2136 2137 # 2138 # Check for `markdown="1"` attribute and handle it. 2139 # 2140 if ($md_attr && 2141 preg_match($markdown_attr_re, $tag, $attr_m) && 2142 preg_match('/^1|block|span$/', $attr_m[2] . $attr_m[3])) 2143 { 2144 # Remove `markdown` attribute from opening tag. 2145 $tag = preg_replace($markdown_attr_re, '', $tag); 2146 2147 # Check if text inside this tag must be parsed in span mode. 2148 $this->mode = $attr_m[2] . $attr_m[3]; 2149 $span_mode = $this->mode == 'span' || $this->mode != 'block' && 2150 preg_match('{^<(?:'.$this->contain_span_tags_re.')\b}', $tag); 2151 2152 # Calculate indent before tag. 2153 if (preg_match('/(?:^|\n)( *?)(?! ).*?$/', $block_text, $matches)) { 2154 $strlen = $this->utf8_strlen; 2155 $indent = $strlen($matches[1], 'UTF-8'); 2156 } else { 2157 $indent = 0; 2158 } 2159 2160 # End preceding block with this tag. 2161 $block_text .= $tag; 2162 $parsed .= $this->$hash_method($block_text); 2163 2164 # Get enclosing tag name for the ParseMarkdown function. 2165 # (This pattern makes $tag_name_re safe without quoting.) 2166 preg_match('/^<([\w:$]*)\b/', $tag, $matches); 2167 $tag_name_re = $matches[1]; 2168 2169 # Parse the content using the HTML-in-Markdown parser. 2170 list ($block_text, $text) 2171 = $this->_hashHTMLBlocks_inMarkdown($text, $indent, 2172 $tag_name_re, $span_mode); 2173 2174 # Outdent markdown text. 2175 if ($indent > 0) { 2176 $block_text = preg_replace("/^[ ]{1,$indent}/m", "", 2177 $block_text); 2178 } 2179 2180 # Append tag content to parsed text. 2181 if (!$span_mode) $parsed .= "\n\n$block_text\n\n"; 2182 else $parsed .= "$block_text"; 2183 2184 # Start over with a new block. 2185 $block_text = ""; 2186 } 2187 else $block_text .= $tag; 2188 } 2189 2190 } while ($depth > 0); 2191 2192 # 2193 # Hash last block text that wasn't processed inside the loop. 2194 # 2195 $parsed .= $this->$hash_method($block_text); 2196 2197 return array($parsed, $text); 2198 } 2199 2200 2201 protected function hashClean($text) { 2202 # 2203 # Called whenever a tag must be hashed when a function inserts a "clean" tag 2204 # in $text, it passes through this function and is automaticaly escaped, 2205 # blocking invalid nested overlap. 2206 # 2207 return $this->hashPart($text, 'C'); 2208 } 2209 2210 2211 protected function doAnchors($text) { 2212 # 2213 # Turn Markdown link shortcuts into XHTML <a> tags. 2214 # 2215 if ($this->in_anchor) return $text; 2216 $this->in_anchor = true; 2217 2218 # 2219 # First, handle reference-style links: [link text] [id] 2220 # 2221 $text = preg_replace_callback('{ 2222 ( # wrap whole match in $1 2223 \[ 2224 ('.$this->nested_brackets_re.') # link text = $2 2225 \] 2226 2227 [ ]? # one optional space 2228 (?:\n[ ]*)? # one optional newline followed by spaces 2229 2230 \[ 2231 (.*?) # id = $3 2232 \] 2233 ) 2234 }xs', 2235 array($this, '_doAnchors_reference_callback'), $text); 2236 2237 # 2238 # Next, inline-style links: [link text](url "optional title") 2239 # 2240 $text = preg_replace_callback('{ 2241 ( # wrap whole match in $1 2242 \[ 2243 ('.$this->nested_brackets_re.') # link text = $2 2244 \] 2245 \( # literal paren 2246 [ \n]* 2247 (?: 2248 <(.+?)> # href = $3 2249 | 2250 ('.$this->nested_url_parenthesis_re.') # href = $4 2251 ) 2252 [ \n]* 2253 ( # $5 2254 ([\'"]) # quote char = $6 2255 (.*?) # Title = $7 2256 \6 # matching quote 2257 [ \n]* # ignore any spaces/tabs between closing quote and ) 2258 )? # title is optional 2259 \) 2260 (?:[ ]? '.$this->id_class_attr_catch_re.' )? # $8 = id/class attributes 2261 ) 2262 }xs', 2263 array($this, '_doAnchors_inline_callback'), $text); 2264 2265 # 2266 # Last, handle reference-style shortcuts: [link text] 2267 # These must come last in case you've also got [link text][1] 2268 # or [link text](/foo) 2269 # 2270 $text = preg_replace_callback('{ 2271 ( # wrap whole match in $1 2272 \[ 2273 ([^\[\]]+) # link text = $2; can\'t contain [ or ] 2274 \] 2275 ) 2276 }xs', 2277 array($this, '_doAnchors_reference_callback'), $text); 2278 2279 $this->in_anchor = false; 2280 return $text; 2281 } 2282 protected function _doAnchors_reference_callback($matches) { 2283 $whole_match = $matches[1]; 2284 $link_text = $matches[2]; 2285 $link_id =& $matches[3]; 2286 2287 if ($link_id == "") { 2288 # for shortcut links like [this][] or [this]. 2289 $link_id = $link_text; 2290 } 2291 2292 # lower-case and turn embedded newlines into spaces 2293 $link_id = strtolower($link_id); 2294 $link_id = preg_replace('{[ ]?\n}', ' ', $link_id); 2295 2296 if (isset($this->urls[$link_id])) { 2297 $url = $this->urls[$link_id]; 2298 $url = $this->encodeAttribute($url); 2299 2300 $result = "<a href=\"$url\""; 2301 if ( isset( $this->titles[$link_id] ) ) { 2302 $title = $this->titles[$link_id]; 2303 $title = $this->encodeAttribute($title); 2304 $result .= " title=\"$title\""; 2305 } 2306 if (isset($this->ref_attr[$link_id])) 2307 $result .= $this->ref_attr[$link_id]; 2308 2309 $link_text = $this->runSpanGamut($link_text); 2310 $result .= ">$link_text</a>"; 2311 $result = $this->hashPart($result); 2312 } 2313 else { 2314 $result = $whole_match; 2315 } 2316 return $result; 2317 } 2318 protected function _doAnchors_inline_callback($matches) { 2319 $whole_match = $matches[1]; 2320 $link_text = $this->runSpanGamut($matches[2]); 2321 $url = $matches[3] == '' ? $matches[4] : $matches[3]; 2322 $title =& $matches[7]; 2323 $attr = $this->doExtraAttributes("a", $dummy =& $matches[8]); 2324 2325 // if the URL was of the form <s p a c e s> it got caught by the HTML 2326 // tag parser and hashed. Need to reverse the process before using the URL. 2327 $unhashed = $this->unhash($url); 2328 if ($unhashed != $url) 2329 $url = preg_replace('/^<(.*)>$/', '\1', $unhashed); 2330 2331 $url = $this->encodeAttribute($url); 2332 2333 $result = "<a href=\"$url\""; 2334 if (isset($title)) { 2335 $title = $this->encodeAttribute($title); 2336 $result .= " title=\"$title\""; 2337 } 2338 $result .= $attr; 2339 2340 $link_text = $this->runSpanGamut($link_text); 2341 $result .= ">$link_text</a>"; 2342 2343 return $this->hashPart($result); 2344 } 2345 2346 2347 protected function doImages($text) { 2348 # 2349 # Turn Markdown image shortcuts into <img> tags. 2350 # 2351 # 2352 # First, handle reference-style labeled images: ![alt text][id] 2353 # 2354 $text = preg_replace_callback('{ 2355 ( # wrap whole match in $1 2356 !\[ 2357 ('.$this->nested_brackets_re.') # alt text = $2 2358 \] 2359 2360 [ ]? # one optional space 2361 (?:\n[ ]*)? # one optional newline followed by spaces 2362 2363 \[ 2364 (.*?) # id = $3 2365 \] 2366 2367 ) 2368 }xs', 2369 array($this, '_doImages_reference_callback'), $text); 2370 2371 # 2372 # Next, handle inline images:  2373 # Don't forget: encode * and _ 2374 # 2375 $text = preg_replace_callback('{ 2376 ( # wrap whole match in $1 2377 !\[ 2378 ('.$this->nested_brackets_re.') # alt text = $2 2379 \] 2380 \s? # One optional whitespace character 2381 \( # literal paren 2382 [ \n]* 2383 (?: 2384 <(\S*)> # src url = $3 2385 | 2386 ('.$this->nested_url_parenthesis_re.') # src url = $4 2387 ) 2388 [ \n]* 2389 ( # $5 2390 ([\'"]) # quote char = $6 2391 (.*?) # title = $7 2392 \6 # matching quote 2393 [ \n]* 2394 )? # title is optional 2395 \) 2396 (?:[ ]? '.$this->id_class_attr_catch_re.' )? # $8 = id/class attributes 2397 ) 2398 }xs', 2399 array($this, '_doImages_inline_callback'), $text); 2400 2401 return $text; 2402 } 2403 protected function _doImages_reference_callback($matches) { 2404 $whole_match = $matches[1]; 2405 $alt_text = $matches[2]; 2406 $link_id = strtolower($matches[3]); 2407 2408 if ($link_id == "") { 2409 $link_id = strtolower($alt_text); # for shortcut links like ![this][]. 2410 } 2411 2412 $alt_text = $this->encodeAttribute($alt_text); 2413 if (isset($this->urls[$link_id])) { 2414 $url = $this->encodeAttribute($this->urls[$link_id]); 2415 $result = "<img src=\"$url\" alt=\"$alt_text\""; 2416 if (isset($this->titles[$link_id])) { 2417 $title = $this->titles[$link_id]; 2418 $title = $this->encodeAttribute($title); 2419 $result .= " title=\"$title\""; 2420 } 2421 if (isset($this->ref_attr[$link_id])) 2422 $result .= $this->ref_attr[$link_id]; 2423 $result .= $this->empty_element_suffix; 2424 $result = $this->hashPart($result); 2425 } 2426 else { 2427 # If there's no such link ID, leave intact: 2428 $result = $whole_match; 2429 } 2430 2431 return $result; 2432 } 2433 protected function _doImages_inline_callback($matches) { 2434 $whole_match = $matches[1]; 2435 $alt_text = $matches[2]; 2436 $url = $matches[3] == '' ? $matches[4] : $matches[3]; 2437 $title =& $matches[7]; 2438 $attr = $this->doExtraAttributes("img", $dummy =& $matches[8]); 2439 2440 $alt_text = $this->encodeAttribute($alt_text); 2441 $url = $this->encodeAttribute($url); 2442 $result = "<img src=\"$url\" alt=\"$alt_text\""; 2443 if (isset($title)) { 2444 $title = $this->encodeAttribute($title); 2445 $result .= " title=\"$title\""; # $title already quoted 2446 } 2447 $result .= $attr; 2448 $result .= $this->empty_element_suffix; 2449 2450 return $this->hashPart($result); 2451 } 2452 2453 2454 protected function doHeaders($text) { 2455 # 2456 # Redefined to add id and class attribute support. 2457 # 2458 # Setext-style headers: 2459 # Header 1 {#header1} 2460 # ======== 2461 # 2462 # Header 2 {#header2 .class1 .class2} 2463 # -------- 2464 # 2465 $text = preg_replace_callback( 2466 '{ 2467 (^.+?) # $1: Header text 2468 (?:[ ]+ '.$this->id_class_attr_catch_re.' )? # $3 = id/class attributes 2469 [ ]*\n(=+|-+)[ ]*\n+ # $3: Header footer 2470 }mx', 2471 array($this, '_doHeaders_callback_setext'), $text); 2472 2473 # atx-style headers: 2474 # # Header 1 {#header1} 2475 # ## Header 2 {#header2} 2476 # ## Header 2 with closing hashes ## {#header3.class1.class2} 2477 # ... 2478 # ###### Header 6 {.class2} 2479 # 2480 $text = preg_replace_callback('{ 2481 ^(\#{1,6}) # $1 = string of #\'s 2482 [ ]* 2483 (.+?) # $2 = Header text 2484 [ ]* 2485 \#* # optional closing #\'s (not counted) 2486 (?:[ ]+ '.$this->id_class_attr_catch_re.' )? # $3 = id/class attributes 2487 [ ]* 2488 \n+ 2489 }xm', 2490 array($this, '_doHeaders_callback_atx'), $text); 2491 2492 return $text; 2493 } 2494 protected function _doHeaders_callback_setext($matches) { 2495 if ($matches[3] == '-' && preg_match('{^- }', $matches[1])) 2496 return $matches[0]; 2497 $level = $matches[3]{0} == '=' ? 1 : 2; 2498 $attr = $this->doExtraAttributes("h$level", $dummy =& $matches[2]); 2499 $block = "<h$level$attr>".$this->runSpanGamut($matches[1])."</h$level>"; 2500 return "\n" . $this->hashBlock($block) . "\n\n"; 2501 } 2502 protected function _doHeaders_callback_atx($matches) { 2503 $level = strlen($matches[1]); 2504 $attr = $this->doExtraAttributes("h$level", $dummy =& $matches[3]); 2505 $block = "<h$level$attr>".$this->runSpanGamut($matches[2])."</h$level>"; 2506 return "\n" . $this->hashBlock($block) . "\n\n"; 2507 } 2508 2509 2510 protected function doTables($text) { 2511 # 2512 # Form HTML tables. 2513 # 2514 $less_than_tab = $this->tab_width - 1; 2515 # 2516 # Find tables with leading pipe. 2517 # 2518 # | Header 1 | Header 2 2519 # | -------- | -------- 2520 # | Cell 1 | Cell 2 2521 # | Cell 3 | Cell 4 2522 # 2523 $text = preg_replace_callback(' 2524 { 2525 ^ # Start of a line 2526 [ ]{0,'.$less_than_tab.'} # Allowed whitespace. 2527 [|] # Optional leading pipe (present) 2528 (.+) \n # $1: Header row (at least one pipe) 2529 2530 [ ]{0,'.$less_than_tab.'} # Allowed whitespace. 2531 [|] ([ ]*[-:]+[-| :]*) \n # $2: Header underline 2532 2533 ( # $3: Cells 2534 (?> 2535 [ ]* # Allowed whitespace. 2536 [|] .* \n # Row content. 2537 )* 2538 ) 2539 (?=\n|\Z) # Stop at final double newline. 2540 }xm', 2541 array($this, '_doTable_leadingPipe_callback'), $text); 2542 2543 # 2544 # Find tables without leading pipe. 2545 # 2546 # Header 1 | Header 2 2547 # -------- | -------- 2548 # Cell 1 | Cell 2 2549 # Cell 3 | Cell 4 2550 # 2551 $text = preg_replace_callback(' 2552 { 2553 ^ # Start of a line 2554 [ ]{0,'.$less_than_tab.'} # Allowed whitespace. 2555 (\S.*[|].*) \n # $1: Header row (at least one pipe) 2556 2557 [ ]{0,'.$less_than_tab.'} # Allowed whitespace. 2558 ([-:]+[ ]*[|][-| :]*) \n # $2: Header underline 2559 2560 ( # $3: Cells 2561 (?> 2562 .* [|] .* \n # Row content 2563 )* 2564 ) 2565 (?=\n|\Z) # Stop at final double newline. 2566 }xm', 2567 array($this, '_DoTable_callback'), $text); 2568 2569 return $text; 2570 } 2571 protected function _doTable_leadingPipe_callback($matches) { 2572 $head = $matches[1]; 2573 $underline = $matches[2]; 2574 $content = $matches[3]; 2575 2576 # Remove leading pipe for each row. 2577 $content = preg_replace('/^ *[|]/m', '', $content); 2578 2579 return $this->_doTable_callback(array($matches[0], $head, $underline, $content)); 2580 } 2581 protected function _doTable_makeAlignAttr($alignname) 2582 { 2583 if (empty($this->table_align_class_tmpl)) 2584 return " align=\"$alignname\""; 2585 2586 $classname = str_replace('%%', $alignname, $this->table_align_class_tmpl); 2587 return " class=\"$classname\""; 2588 } 2589 protected function _doTable_callback($matches) { 2590 $head = $matches[1]; 2591 $underline = $matches[2]; 2592 $content = $matches[3]; 2593 2594 # Remove any tailing pipes for each line. 2595 $head = preg_replace('/[|] *$/m', '', $head); 2596 $underline = preg_replace('/[|] *$/m', '', $underline); 2597 $content = preg_replace('/[|] *$/m', '', $content); 2598 2599 # Reading alignement from header underline. 2600 $separators = preg_split('/ *[|] */', $underline); 2601 foreach ($separators as $n => $s) { 2602 if (preg_match('/^ *-+: *$/', $s)) 2603 $attr[$n] = $this->_doTable_makeAlignAttr('right'); 2604 else if (preg_match('/^ *:-+: *$/', $s)) 2605 $attr[$n] = $this->_doTable_makeAlignAttr('center'); 2606 else if (preg_match('/^ *:-+ *$/', $s)) 2607 $attr[$n] = $this->_doTable_makeAlignAttr('left'); 2608 else 2609 $attr[$n] = ''; 2610 } 2611 2612 # Parsing span elements, including code spans, character escapes, 2613 # and inline HTML tags, so that pipes inside those gets ignored. 2614 $head = $this->parseSpan($head); 2615 $headers = preg_split('/ *[|] */', $head); 2616 $col_count = count($headers); 2617 $attr = array_pad($attr, $col_count, ''); 2618 2619 # Write column headers. 2620 $text = "<table>\n"; 2621 $text .= "<thead>\n"; 2622 $text .= "<tr>\n"; 2623 foreach ($headers as $n => $header) 2624 $text .= " <th$attr[$n]>".$this->runSpanGamut(trim($header))."</th>\n"; 2625 $text .= "</tr>\n"; 2626 $text .= "</thead>\n"; 2627 2628 # Split content by row. 2629 $rows = explode("\n", trim($content, "\n")); 2630 2631 $text .= "<tbody>\n"; 2632 foreach ($rows as $row) { 2633 # Parsing span elements, including code spans, character escapes, 2634 # and inline HTML tags, so that pipes inside those gets ignored. 2635 $row = $this->parseSpan($row); 2636 2637 # Split row by cell. 2638 $row_cells = preg_split('/ *[|] */', $row, $col_count); 2639 $row_cells = array_pad($row_cells, $col_count, ''); 2640 2641 $text .= "<tr>\n"; 2642 foreach ($row_cells as $n => $cell) 2643 $text .= " <td$attr[$n]>".$this->runSpanGamut(trim($cell))."</td>\n"; 2644 $text .= "</tr>\n"; 2645 } 2646 $text .= "</tbody>\n"; 2647 $text .= "</table>"; 2648 2649 return $this->hashBlock($text) . "\n"; 2650 } 2651 2652 2653 protected function doDefLists($text) { 2654 # 2655 # Form HTML definition lists. 2656 # 2657 $less_than_tab = $this->tab_width - 1; 2658 2659 # Re-usable pattern to match any entire dl list: 2660 $whole_list_re = '(?> 2661 ( # $1 = whole list 2662 ( # $2 2663 [ ]{0,'.$less_than_tab.'} 2664 ((?>.*\S.*\n)+) # $3 = defined term 2665 \n? 2666 [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition 2667 ) 2668 (?s:.+?) 2669 ( # $4 2670 \z 2671 | 2672 \n{2,} 2673 (?=\S) 2674 (?! # Negative lookahead for another term 2675 [ ]{0,'.$less_than_tab.'} 2676 (?: \S.*\n )+? # defined term 2677 \n? 2678 [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition 2679 ) 2680 (?! # Negative lookahead for another definition 2681 [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition 2682 ) 2683 ) 2684 ) 2685 )'; // mx 2686 2687 $text = preg_replace_callback('{ 2688 (?>\A\n?|(?<=\n\n)) 2689 '.$whole_list_re.' 2690 }mx', 2691 array($this, '_doDefLists_callback'), $text); 2692 2693 return $text; 2694 } 2695 protected function _doDefLists_callback($matches) { 2696 # Re-usable patterns to match list item bullets and number markers: 2697 $list = $matches[1]; 2698 2699 # Turn double returns into triple returns, so that we can make a 2700 # paragraph for the last item in a list, if necessary: 2701 $result = trim($this->processDefListItems($list)); 2702 $result = "<dl>\n" . $result . "\n</dl>"; 2703 return $this->hashBlock($result) . "\n\n"; 2704 } 2705 2706 2707 protected function processDefListItems($list_str) { 2708 # 2709 # Process the contents of a single definition list, splitting it 2710 # into individual term and definition list items. 2711 # 2712 $less_than_tab = $this->tab_width - 1; 2713 2714 # trim trailing blank lines: 2715 $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str); 2716 2717 # Process definition terms. 2718 $list_str = preg_replace_callback('{ 2719 (?>\A\n?|\n\n+) # leading line 2720 ( # definition terms = $1 2721 [ ]{0,'.$less_than_tab.'} # leading whitespace 2722 (?!\:[ ]|[ ]) # negative lookahead for a definition 2723 # mark (colon) or more whitespace. 2724 (?> \S.* \n)+? # actual term (not whitespace). 2725 ) 2726 (?=\n?[ ]{0,3}:[ ]) # lookahead for following line feed 2727 # with a definition mark. 2728 }xm', 2729 array($this, '_processDefListItems_callback_dt'), $list_str); 2730 2731 # Process actual definitions. 2732 $list_str = preg_replace_callback('{ 2733 \n(\n+)? # leading line = $1 2734 ( # marker space = $2 2735 [ ]{0,'.$less_than_tab.'} # whitespace before colon 2736 \:[ ]+ # definition mark (colon) 2737 ) 2738 ((?s:.+?)) # definition text = $3 2739 (?= \n+ # stop at next definition mark, 2740 (?: # next term or end of text 2741 [ ]{0,'.$less_than_tab.'} \:[ ] | 2742 <dt> | \z 2743 ) 2744 ) 2745 }xm', 2746 array($this, '_processDefListItems_callback_dd'), $list_str); 2747 2748 return $list_str; 2749 } 2750 protected function _processDefListItems_callback_dt($matches) { 2751 $terms = explode("\n", trim($matches[1])); 2752 $text = ''; 2753 foreach ($terms as $term) { 2754 $term = $this->runSpanGamut(trim($term)); 2755 $text .= "\n<dt>" . $term . "</dt>"; 2756 } 2757 return $text . "\n"; 2758 } 2759 protected function _processDefListItems_callback_dd($matches) { 2760 $leading_line = $matches[1]; 2761 $marker_space = $matches[2]; 2762 $def = $matches[3]; 2763 2764 if ($leading_line || preg_match('/\n{2,}/', $def)) { 2765 # Replace marker with the appropriate whitespace indentation 2766 $def = str_repeat(' ', strlen($marker_space)) . $def; 2767 $def = $this->runBlockGamut($this->outdent($def . "\n\n")); 2768 $def = "\n". $def ."\n"; 2769 } 2770 else { 2771 $def = rtrim($def); 2772 $def = $this->runSpanGamut($this->outdent($def)); 2773 } 2774 2775 return "\n<dd>" . $def . "</dd>\n"; 2776 } 2777 2778 2779 protected function doFencedCodeBlocks($text) { 2780 # 2781 # Adding the fenced code block syntax to regular Markdown: 2782 # 2783 # ~~~ 2784 # Code block 2785 # ~~~ 2786 # 2787 $less_than_tab = $this->tab_width; 2788 2789 $text = preg_replace_callback('{ 2790 (?:\n|\A) 2791 # 1: Opening marker 2792 ( 2793 (?:~{3,}|`{3,}) # 3 or more tildes/backticks. 2794 ) 2795 [ ]* 2796 (?: 2797 \.?([-_:a-zA-Z0-9]+) # 2: standalone class name 2798 | 2799 '.$this->id_class_attr_catch_re.' # 3: Extra attributes 2800 )? 2801 [ ]* \n # Whitespace and newline following marker. 2802 2803 # 4: Content 2804 ( 2805 (?> 2806 (?!\1 [ ]* \n) # Not a closing marker. 2807 .*\n+ 2808 )+ 2809 ) 2810 2811 # Closing marker. 2812 \1 [ ]* (?= \n ) 2813 }xm', 2814 array($this, '_doFencedCodeBlocks_callback'), $text); 2815 2816 return $text; 2817 } 2818 protected function _doFencedCodeBlocks_callback($matches) { 2819 $classname =& $matches[2]; 2820 $attrs =& $matches[3]; 2821 $codeblock = $matches[4]; 2822 $codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES); 2823 $codeblock = preg_replace_callback('/^\n+/', 2824 array($this, '_doFencedCodeBlocks_newlines'), $codeblock); 2825 2826 if ($classname != "") { 2827 if ($classname{0} == '.') 2828 $classname = substr($classname, 1); 2829 $attr_str = ' class="'.$this->code_class_prefix.$classname.'"'; 2830 } else { 2831 $attr_str = $this->doExtraAttributes($this->code_attr_on_pre ? "pre" : "code", $attrs); 2832 } 2833 $pre_attr_str = $this->code_attr_on_pre ? $attr_str : ''; 2834 $code_attr_str = $this->code_attr_on_pre ? '' : $attr_str; 2835 $codeblock = "<pre$pre_attr_str><code$code_attr_str>$codeblock</code></pre>"; 2836 2837 return "\n\n".$this->hashBlock($codeblock)."\n\n"; 2838 } 2839 protected function _doFencedCodeBlocks_newlines($matches) { 2840 return str_repeat("<br$this->empty_element_suffix", 2841 strlen($matches[0])); 2842 } 2843 2844 2845 # 2846 # Redefining emphasis markers so that emphasis by underscore does not 2847 # work in the middle of a word. 2848 # 2849 protected $em_relist = array( 2850 '' => '(?:(?<!\*)\*(?!\*)|(?<![a-zA-Z0-9_])_(?!_))(?![\.,:;]?\s)', 2851 '*' => '(?<![\s*])\*(?!\*)', 2852 '_' => '(?<![\s_])_(?![a-zA-Z0-9_])', 2853 ); 2854 protected $strong_relist = array( 2855 '' => '(?:(?<!\*)\*\*(?!\*)|(?<![a-zA-Z0-9_])__(?!_))(?![\.,:;]?\s)', 2856 '**' => '(?<![\s*])\*\*(?!\*)', 2857 '__' => '(?<![\s_])__(?![a-zA-Z0-9_])', 2858 ); 2859 protected $em_strong_relist = array( 2860 '' => '(?:(?<!\*)\*\*\*(?!\*)|(?<![a-zA-Z0-9_])___(?!_))(?![\.,:;]?\s)', 2861 '***' => '(?<![\s*])\*\*\*(?!\*)', 2862 '___' => '(?<![\s_])___(?![a-zA-Z0-9_])', 2863 ); 2864 2865 2866 protected function formParagraphs($text) { 2867 # 2868 # Params: 2869 # $text - string to process with html <p> tags 2870 # 2871 # Strip leading and trailing lines: 2872 $text = preg_replace('/\A\n+|\n+\z/', '', $text); 2873 2874 $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY); 2875 2876 # 2877 # Wrap <p> tags and unhashify HTML blocks 2878 # 2879 foreach ($grafs as $key => $value) { 2880 $value = trim($this->runSpanGamut($value)); 2881 2882 # Check if this should be enclosed in a paragraph. 2883 # Clean tag hashes & block tag hashes are left alone. 2884 $is_p = !preg_match('/^B\x1A[0-9]+B|^C\x1A[0-9]+C$/', $value); 2885 2886 if ($is_p) { 2887 $value = "<p>$value</p>"; 2888 } 2889 $grafs[$key] = $value; 2890 } 2891 2892 # Join grafs in one text, then unhash HTML tags. 2893 $text = implode("\n\n", $grafs); 2894 2895 # Finish by removing any tag hashes still present in $text. 2896 $text = $this->unhash($text); 2897 2898 return $text; 2899 } 2900 2901 2902 ### Footnotes 2903 2904 protected function stripFootnotes($text) { 2905 # 2906 # Strips link definitions from text, stores the URLs and titles in 2907 # hash references. 2908 # 2909 $less_than_tab = $this->tab_width - 1; 2910 2911 # Link defs are in the form: [^id]: url "optional title" 2912 $text = preg_replace_callback('{ 2913 ^[ ]{0,'.$less_than_tab.'}\[\^(.+?)\][ ]?: # note_id = $1 2914 [ ]* 2915 \n? # maybe *one* newline 2916 ( # text = $2 (no blank lines allowed) 2917 (?: 2918 .+ # actual text 2919 | 2920 \n # newlines but 2921 (?!\[.+?\][ ]?:\s)# negative lookahead for footnote or link definition marker. 2922 (?!\n+[ ]{0,3}\S)# ensure line is not blank and followed 2923 # by non-indented content 2924 )* 2925 ) 2926 }xm', 2927 array($this, '_stripFootnotes_callback'), 2928 $text); 2929 return $text; 2930 } 2931 protected function _stripFootnotes_callback($matches) { 2932 $note_id = $this->fn_id_prefix . $matches[1]; 2933 $this->footnotes[$note_id] = $this->outdent($matches[2]); 2934 return ''; # String that will replace the block 2935 } 2936 2937 2938 protected function doFootnotes($text) { 2939 # 2940 # Replace footnote references in $text [^id] with a special text-token 2941 # which will be replaced by the actual footnote marker in appendFootnotes. 2942 # 2943 if (!$this->in_anchor) { 2944 $text = preg_replace('{\[\^(.+?)\]}', "F\x1Afn:\\1\x1A:", $text); 2945 } 2946 return $text; 2947 } 2948 2949 2950 protected function appendFootnotes($text) { 2951 # 2952 # Append footnote list to text. 2953 # 2954 $text = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}', 2955 array($this, '_appendFootnotes_callback'), $text); 2956 2957 if (!empty($this->footnotes_ordered)) { 2958 $text .= "\n\n"; 2959 $text .= "<div class=\"footnotes\">\n"; 2960 $text .= "<hr". $this->empty_element_suffix ."\n"; 2961 $text .= "<ol>\n\n"; 2962 2963 $attr = ""; 2964 if ($this->fn_backlink_class != "") { 2965 $class = $this->fn_backlink_class; 2966 $class = $this->encodeAttribute($class); 2967 $attr .= " class=\"$class\""; 2968 } 2969 if ($this->fn_backlink_title != "") { 2970 $title = $this->fn_backlink_title; 2971 $title = $this->encodeAttribute($title); 2972 $attr .= " title=\"$title\""; 2973 } 2974 $num = 0; 2975 2976 while (!empty($this->footnotes_ordered)) { 2977 $footnote = reset($this->footnotes_ordered); 2978 $note_id = key($this->footnotes_ordered); 2979 unset($this->footnotes_ordered[$note_id]); 2980 $ref_count = $this->footnotes_ref_count[$note_id]; 2981 unset($this->footnotes_ref_count[$note_id]); 2982 unset($this->footnotes[$note_id]); 2983 2984 $footnote .= "\n"; # Need to append newline before parsing. 2985 $footnote = $this->runBlockGamut("$footnote\n"); 2986 $footnote = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}', 2987 array($this, '_appendFootnotes_callback'), $footnote); 2988 2989 $attr = str_replace("%%", ++$num, $attr); 2990 $note_id = $this->encodeAttribute($note_id); 2991 2992 # Prepare backlink, multiple backlinks if multiple references 2993 $backlink = "<a href=\"#fnref:$note_id\"$attr>↩</a>"; 2994 for ($ref_num = 2; $ref_num <= $ref_count; ++$ref_num) { 2995 $backlink .= " <a href=\"#fnref$ref_num:$note_id\"$attr>↩</a>"; 2996 } 2997 # Add backlink to last paragraph; create new paragraph if needed. 2998 if (preg_match('{</p>$}', $footnote)) { 2999 $footnote = substr($footnote, 0, -4) . " $backlink</p>"; 3000 } else { 3001 $footnote .= "\n\n<p>$backlink</p>"; 3002 } 3003 3004 $text .= "<li id=\"fn:$note_id\">\n"; 3005 $text .= $footnote . "\n"; 3006 $text .= "</li>\n\n"; 3007 } 3008 3009 $text .= "</ol>\n"; 3010 $text .= "</div>"; 3011 } 3012 return $text; 3013 } 3014 protected function _appendFootnotes_callback($matches) { 3015 $node_id = $this->fn_id_prefix . $matches[1]; 3016 3017 # Create footnote marker only if it has a corresponding footnote *and* 3018 # the footnote hasn't been used by another marker. 3019 if (isset($this->footnotes[$node_id])) { 3020 $num =& $this->footnotes_numbers[$node_id]; 3021 if (!isset($num)) { 3022 # Transfer footnote content to the ordered list and give it its 3023 # number 3024 $this->footnotes_ordered[$node_id] = $this->footnotes[$node_id]; 3025 $this->footnotes_ref_count[$node_id] = 1; 3026 $num = $this->footnote_counter++; 3027 $ref_count_mark = ''; 3028 } else { 3029 $ref_count_mark = $this->footnotes_ref_count[$node_id] += 1; 3030 } 3031 3032 $attr = ""; 3033 if ($this->fn_link_class != "") { 3034 $class = $this->fn_link_class; 3035 $class = $this->encodeAttribute($class); 3036 $attr .= " class=\"$class\""; 3037 } 3038 if ($this->fn_link_title != "") { 3039 $title = $this->fn_link_title; 3040 $title = $this->encodeAttribute($title); 3041 $attr .= " title=\"$title\""; 3042 } 3043 3044 $attr = str_replace("%%", $num, $attr); 3045 $node_id = $this->encodeAttribute($node_id); 3046 3047 return 3048 "<sup id=\"fnref$ref_count_mark:$node_id\">". 3049 "<a href=\"#fn:$node_id\"$attr>$num</a>". 3050 "</sup>"; 3051 } 3052 3053 return "[^".$matches[1]."]"; 3054 } 3055 3056 3057 ### Abbreviations ### 3058 3059 protected function stripAbbreviations($text) { 3060 # 3061 # Strips abbreviations from text, stores titles in hash references. 3062 # 3063 $less_than_tab = $this->tab_width - 1; 3064 3065 # Link defs are in the form: [id]*: url "optional title" 3066 $text = preg_replace_callback('{ 3067 ^[ ]{0,'.$less_than_tab.'}\*\[(.+?)\][ ]?: # abbr_id = $1 3068 (.*) # text = $2 (no blank lines allowed) 3069 }xm', 3070 array($this, '_stripAbbreviations_callback'), 3071 $text); 3072 return $text; 3073 } 3074 protected function _stripAbbreviations_callback($matches) { 3075 $abbr_word = $matches[1]; 3076 $abbr_desc = $matches[2]; 3077 if ($this->abbr_word_re) 3078 $this->abbr_word_re .= '|'; 3079 $this->abbr_word_re .= preg_quote($abbr_word); 3080 $this->abbr_desciptions[$abbr_word] = trim($abbr_desc); 3081 return ''; # String that will replace the block 3082 } 3083 3084 3085 protected function doAbbreviations($text) { 3086 # 3087 # Find defined abbreviations in text and wrap them in <abbr> elements. 3088 # 3089 if ($this->abbr_word_re) { 3090 // cannot use the /x modifier because abbr_word_re may 3091 // contain significant spaces: 3092 $text = preg_replace_callback('{'. 3093 '(?<![\w\x1A])'. 3094 '(?:'.$this->abbr_word_re.')'. 3095 '(?![\w\x1A])'. 3096 '}', 3097 array($this, '_doAbbreviations_callback'), $text); 3098 } 3099 return $text; 3100 } 3101 protected function _doAbbreviations_callback($matches) { 3102 $abbr = $matches[0]; 3103 if (isset($this->abbr_desciptions[$abbr])) { 3104 $desc = $this->abbr_desciptions[$abbr]; 3105 if (empty($desc)) { 3106 return $this->hashPart("<abbr>$abbr</abbr>"); 3107 } else { 3108 $desc = $this->encodeAttribute($desc); 3109 return $this->hashPart("<abbr title=\"$desc\">$abbr</abbr>"); 3110 } 3111 } else { 3112 return $matches[0]; 3113 } 3114 } 3115 3116 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
Generated: Fri Nov 28 20:08:37 2014 | Cross-referenced by PHPXref 0.7.1 |