PHPXRef 0.7.1 : moodle-2.8 : /lib/markdown/Markdown.php source

[Summary view] [Print] [Text view]
   1  <?php
   2  #
   3  # Markdown  -  A text-to-HTML conversion tool for web writers
   4  #
   5  # PHP Markdown  
   6  # Copyright (c) 2004-2014 Michel Fortin  
   7  # <http://michelf.com/projects/php-markdown/>
   8  #
   9  # Original Markdown  
  10  # Copyright (c) 2004-2006 John Gruber  
  11  # <http://daringfireball.net/projects/markdown/>
  12  #
  13  namespace Michelf;
  14  
  15  
  16  #
  17  # Markdown Parser Class
  18  #
  19  
  20  class Markdown implements MarkdownInterface {
  21  
  22      ### Version ###
  23  
  24      const  MARKDOWNLIB_VERSION  =  "1.4.1";
  25  
  26      ### Simple Function Interface ###
  27  
  28  	public static function defaultTransform($text) {
  29      #
  30      # Initialize the parser and return the result of its transform method.
  31      # This will work fine for derived classes too.
  32      #
  33          # Take parser class on which this function was called.
  34          $parser_class = \get_called_class();
  35  
  36          # try to take parser from the static parser list
  37          static $parser_list;
  38          $parser =& $parser_list[$parser_class];
  39  
  40          # create the parser it not already set
  41          if (!$parser)
  42              $parser = new $parser_class;
  43  
  44          # Transform text using parser.
  45          return $parser->transform($text);
  46      }
  47  
  48      ### Configuration Variables ###
  49  
  50      # Change to ">" for HTML output.
  51      public $empty_element_suffix = " />";
  52      public $tab_width = 4;
  53      
  54      # Change to `true` to disallow markup or entities.
  55      public $no_markup = false;
  56      public $no_entities = false;
  57      
  58      # Predefined urls and titles for reference links and images.
  59      public $predef_urls = array();
  60      public $predef_titles = array();
  61  
  62  
  63      ### Parser Implementation ###
  64  
  65      # Regex to match balanced [brackets].
  66      # Needed to insert a maximum bracked depth while converting to PHP.
  67      protected $nested_brackets_depth = 6;
  68      protected $nested_brackets_re;
  69      
  70      protected $nested_url_parenthesis_depth = 4;
  71      protected $nested_url_parenthesis_re;
  72  
  73      # Table of hash values for escaped characters:
  74      protected $escape_chars = '\`*_{}[]()>#+-.!';
  75      protected $escape_chars_re;
  76  
  77  
  78  	public function __construct() {
  79      #
  80      # Constructor function. Initialize appropriate member variables.
  81      #
  82          $this->_initDetab();
  83          $this->prepareItalicsAndBold();
  84      
  85          $this->nested_brackets_re = 
  86              str_repeat('(?>[^\[\]]+|\[', $this->nested_brackets_depth).
  87              str_repeat('\])*', $this->nested_brackets_depth);
  88      
  89          $this->nested_url_parenthesis_re = 
  90              str_repeat('(?>[^()\s]+|\(', $this->nested_url_parenthesis_depth).
  91              str_repeat('(?>\)))*', $this->nested_url_parenthesis_depth);
  92          
  93          $this->escape_chars_re = '['.preg_quote($this->escape_chars).']';
  94          
  95          # Sort document, block, and span gamut in ascendent priority order.
  96          asort($this->document_gamut);
  97          asort($this->block_gamut);
  98          asort($this->span_gamut);
  99      }
 100  
 101  
 102      # Internal hashes used during transformation.
 103      protected $urls = array();
 104      protected $titles = array();
 105      protected $html_hashes = array();
 106      
 107      # Status flag to avoid invalid nesting.
 108      protected $in_anchor = false;
 109      
 110      
 111  	protected function setup() {
 112      #
 113      # Called before the transformation process starts to setup parser 
 114      # states.
 115      #
 116          # Clear global hashes.
 117          $this->urls = $this->predef_urls;
 118          $this->titles = $this->predef_titles;
 119          $this->html_hashes = array();
 120          
 121          $this->in_anchor = false;
 122      }
 123      
 124  	protected function teardown() {
 125      #
 126      # Called after the transformation process to clear any variable 
 127      # which may be taking up memory unnecessarly.
 128      #
 129          $this->urls = array();
 130          $this->titles = array();
 131          $this->html_hashes = array();
 132      }
 133  
 134  
 135  	public function transform($text) {
 136      #
 137      # Main function. Performs some preprocessing on the input text
 138      # and pass it through the document gamut.
 139      #
 140          $this->setup();
 141      
 142          # Remove UTF-8 BOM and marker character in input, if present.
 143          $text = preg_replace('{^\xEF\xBB\xBF|\x1A}', '', $text);
 144  
 145          # Standardize line endings:
 146          #   DOS to Unix and Mac to Unix
 147          $text = preg_replace('{\r\n?}', "\n", $text);
 148  
 149          # Make sure $text ends with a couple of newlines:
 150          $text .= "\n\n";
 151  
 152          # Convert all tabs to spaces.
 153          $text = $this->detab($text);
 154  
 155          # Turn block-level HTML blocks into hash entries
 156          $text = $this->hashHTMLBlocks($text);
 157  
 158          # Strip any lines consisting only of spaces and tabs.
 159          # This makes subsequent regexen easier to write, because we can
 160          # match consecutive blank lines with /\n+/ instead of something
 161          # contorted like /[ ]*\n+/ .
 162          $text = preg_replace('/^[ ]+$/m', '', $text);
 163  
 164          # Run document gamut methods.
 165          foreach ($this->document_gamut as $method => $priority) {
 166              $text = $this->$method($text);
 167          }
 168          
 169          $this->teardown();
 170  
 171          return $text . "\n";
 172      }
 173      
 174      protected $document_gamut = array(
 175          # Strip link definitions, store in hashes.
 176          "stripLinkDefinitions" => 20,
 177          
 178          "runBasicBlockGamut"   => 30,
 179          );
 180  
 181  
 182  	protected function stripLinkDefinitions($text) {
 183      #
 184      # Strips link definitions from text, stores the URLs and titles in
 185      # hash references.
 186      #
 187          $less_than_tab = $this->tab_width - 1;
 188  
 189          # Link defs are in the form: ^[id]: url "optional title"
 190          $text = preg_replace_callback('{
 191                              ^[ ]{0,'.$less_than_tab.'}\[(.+)\][ ]?:    # id = $1
 192                                [ ]*
 193                                \n?                # maybe *one* newline
 194                                [ ]*
 195                              (?:
 196                                <(.+?)>            # url = $2
 197                              |
 198                                (\S+?)            # url = $3
 199                              )
 200                                [ ]*
 201                                \n?                # maybe one newline
 202                                [ ]*
 203                              (?:
 204                                  (?<=\s)            # lookbehind for whitespace
 205                                  ["(]
 206                                  (.*?)            # title = $4
 207                                  [")]
 208                                  [ ]*
 209                              )?    # title is optional
 210                              (?:\n+|\Z)
 211              }xm',
 212              array($this, '_stripLinkDefinitions_callback'),
 213              $text);
 214          return $text;
 215      }
 216  	protected function _stripLinkDefinitions_callback($matches) {
 217          $link_id = strtolower($matches[1]);
 218          $url = $matches[2] == '' ? $matches[3] : $matches[2];
 219          $this->urls[$link_id] = $url;
 220          $this->titles[$link_id] =& $matches[4];
 221          return ''; # String that will replace the block
 222      }
 223  
 224  
 225  	protected function hashHTMLBlocks($text) {
 226          if ($this->no_markup)  return $text;
 227  
 228          $less_than_tab = $this->tab_width - 1;
 229  
 230          # Hashify HTML blocks:
 231          # We only want to do this for block-level HTML tags, such as headers,
 232          # lists, and tables. That's because we still want to wrap <p>s around
 233          # "paragraphs" that are wrapped in non-block-level tags, such as anchors,
 234          # phrase emphasis, and spans. The list of tags we're looking for is
 235          # hard-coded:
 236          #
 237          # *  List "a" is made of tags which can be both inline or block-level.
 238          #    These will be treated block-level when the start tag is alone on 
 239          #    its line, otherwise they're not matched here and will be taken as 
 240          #    inline later.
 241          # *  List "b" is made of tags which are always block-level;
 242          #
 243          $block_tags_a_re = 'ins|del';
 244          $block_tags_b_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|'.
 245                             'script|noscript|style|form|fieldset|iframe|math|svg|'.
 246                             'article|section|nav|aside|hgroup|header|footer|'.
 247                             'figure';
 248  
 249          # Regular expression for the content of a block tag.
 250          $nested_tags_level = 4;
 251          $attr = '
 252              (?>                # optional tag attributes
 253                \s            # starts with whitespace
 254                (?>
 255                  [^>"/]+        # text outside quotes
 256                |
 257                  /+(?!>)        # slash not followed by ">"
 258                |
 259                  "[^"]*"        # text inside double quotes (tolerate ">")
 260                |
 261                  \'[^\']*\'    # text inside single quotes (tolerate ">")
 262                )*
 263              )?    
 264              ';
 265          $content =
 266              str_repeat('
 267                  (?>
 268                    [^<]+            # content without tag
 269                  |
 270                    <\2            # nested opening tag
 271                      '.$attr.'    # attributes
 272                      (?>
 273                        />
 274                      |
 275                        >', $nested_tags_level).    # end of opening tag
 276                        '.*?'.                    # last level nested tag content
 277              str_repeat('
 278                        </\2\s*>    # closing nested tag
 279                      )
 280                    |                
 281                      <(?!/\2\s*>    # other tags with a different name
 282                    )
 283                  )*',
 284                  $nested_tags_level);
 285          $content2 = str_replace('\2', '\3', $content);
 286  
 287          # First, look for nested blocks, e.g.:
 288          #     <div>
 289          #         <div>
 290          #         tags for inner block must be indented.
 291          #         </div>
 292          #     </div>
 293          #
 294          # The outermost tags must start at the left margin for this to match, and
 295          # the inner nested divs must be indented.
 296          # We need to do this before the next, more liberal match, because the next
 297          # match will start at the first `<div>` and stop at the first `</div>`.
 298          $text = preg_replace_callback('{(?>
 299              (?>
 300                  (?<=\n)            # Starting on its own line
 301                  |                # or
 302                  \A\n?            # the at beginning of the doc
 303              )
 304              (                        # save in $1
 305  
 306                # Match from `\n<tag>` to `</tag>\n`, handling nested tags 
 307                # in between.
 308                      
 309                          [ ]{0,'.$less_than_tab.'}
 310                          <('.$block_tags_b_re.')# start tag = $2
 311                          '.$attr.'>            # attributes followed by > and \n
 312                          '.$content.'        # content, support nesting
 313                          </\2>                # the matching end tag
 314                          [ ]*                # trailing spaces/tabs
 315                          (?=\n+|\Z)    # followed by a newline or end of document
 316  
 317              | # Special version for tags of group a.
 318  
 319                          [ ]{0,'.$less_than_tab.'}
 320                          <('.$block_tags_a_re.')# start tag = $3
 321                          '.$attr.'>[ ]*\n    # attributes followed by >
 322                          '.$content2.'        # content, support nesting
 323                          </\3>                # the matching end tag
 324                          [ ]*                # trailing spaces/tabs
 325                          (?=\n+|\Z)    # followed by a newline or end of document
 326                      
 327              | # Special case just for <hr />. It was easier to make a special 
 328                # case than to make the other regex more complicated.
 329              
 330                          [ ]{0,'.$less_than_tab.'}
 331                          <(hr)                # start tag = $2
 332                          '.$attr.'            # attributes
 333                          /?>                    # the matching end tag
 334                          [ ]*
 335                          (?=\n{2,}|\Z)        # followed by a blank line or end of document
 336              
 337              | # Special case for standalone HTML comments:
 338              
 339                      [ ]{0,'.$less_than_tab.'}
 340                      (?s:
 341                          <!-- .*? -->
 342                      )
 343                      [ ]*
 344                      (?=\n{2,}|\Z)        # followed by a blank line or end of document
 345              
 346              | # PHP and ASP-style processor instructions (<? and <%)
 347              
 348                      [ ]{0,'.$less_than_tab.'}
 349                      (?s:
 350                          <([?%])            # $2
 351                          .*?
 352                          \2>
 353                      )
 354                      [ ]*
 355                      (?=\n{2,}|\Z)        # followed by a blank line or end of document
 356                      
 357              )
 358              )}Sxmi',
 359              array($this, '_hashHTMLBlocks_callback'),
 360              $text);
 361  
 362          return $text;
 363      }
 364  	protected function _hashHTMLBlocks_callback($matches) {
 365          $text = $matches[1];
 366          $key  = $this->hashBlock($text);
 367          return "\n\n$key\n\n";
 368      }
 369      
 370      
 371  	protected function hashPart($text, $boundary = 'X') {
 372      #
 373      # Called whenever a tag must be hashed when a function insert an atomic 
 374      # element in the text stream. Passing $text to through this function gives
 375      # a unique text-token which will be reverted back when calling unhash.
 376      #
 377      # The $boundary argument specify what character should be used to surround
 378      # the token. By convension, "B" is used for block elements that needs not
 379      # to be wrapped into paragraph tags at the end, ":" is used for elements
 380      # that are word separators and "X" is used in the general case.
 381      #
 382          # Swap back any tag hash found in $text so we do not have to `unhash`
 383          # multiple times at the end.
 384          $text = $this->unhash($text);
 385          
 386          # Then hash the block.
 387          static $i = 0;
 388          $key = "$boundary\x1A" . ++$i . $boundary;
 389          $this->html_hashes[$key] = $text;
 390          return $key; # String that will replace the tag.
 391      }
 392  
 393  
 394  	protected function hashBlock($text) {
 395      #
 396      # Shortcut function for hashPart with block-level boundaries.
 397      #
 398          return $this->hashPart($text, 'B');
 399      }
 400  
 401  
 402      protected $block_gamut = array(
 403      #
 404      # These are all the transformations that form block-level
 405      # tags like paragraphs, headers, and list items.
 406      #
 407          "doHeaders"         => 10,
 408          "doHorizontalRules" => 20,
 409          
 410          "doLists"           => 40,
 411          "doCodeBlocks"      => 50,
 412          "doBlockQuotes"     => 60,
 413          );
 414  
 415  	protected function runBlockGamut($text) {
 416      #
 417      # Run block gamut tranformations.
 418      #
 419          # We need to escape raw HTML in Markdown source before doing anything 
 420          # else. This need to be done for each block, and not only at the 
 421          # begining in the Markdown function since hashed blocks can be part of
 422          # list items and could have been indented. Indented blocks would have 
 423          # been seen as a code block in a previous pass of hashHTMLBlocks.
 424          $text = $this->hashHTMLBlocks($text);
 425          
 426          return $this->runBasicBlockGamut($text);
 427      }
 428      
 429  	protected function runBasicBlockGamut($text) {
 430      #
 431      # Run block gamut tranformations, without hashing HTML blocks. This is 
 432      # useful when HTML blocks are known to be already hashed, like in the first
 433      # whole-document pass.
 434      #
 435          foreach ($this->block_gamut as $method => $priority) {
 436              $text = $this->$method($text);
 437          }
 438          
 439          # Finally form paragraph and restore hashed blocks.
 440          $text = $this->formParagraphs($text);
 441  
 442          return $text;
 443      }
 444      
 445      
 446  	protected function doHorizontalRules($text) {
 447          # Do Horizontal Rules:
 448          return preg_replace(
 449              '{
 450                  ^[ ]{0,3}    # Leading space
 451                  ([-*_])        # $1: First marker
 452                  (?>            # Repeated marker group
 453                      [ ]{0,2}    # Zero, one, or two spaces.
 454                      \1            # Marker character
 455                  ){2,}        # Group repeated at least twice
 456                  [ ]*        # Tailing spaces
 457                  $            # End of line.
 458              }mx',
 459              "\n".$this->hashBlock("<hr$this->empty_element_suffix")."\n", 
 460              $text);
 461      }
 462  
 463  
 464      protected $span_gamut = array(
 465      #
 466      # These are all the transformations that occur *within* block-level
 467      # tags like paragraphs, headers, and list items.
 468      #
 469          # Process character escapes, code spans, and inline HTML
 470          # in one shot.
 471          "parseSpan"           => -30,
 472  
 473          # Process anchor and image tags. Images must come first,
 474          # because ![foo][f] looks like an anchor.
 475          "doImages"            =>  10,
 476          "doAnchors"           =>  20,
 477          
 478          # Make links out of things like `<http://example.com/>`
 479          # Must come after doAnchors, because you can use < and >
 480          # delimiters in inline links like [this](<url>).
 481          "doAutoLinks"         =>  30,
 482          "encodeAmpsAndAngles" =>  40,
 483  
 484          "doItalicsAndBold"    =>  50,
 485          "doHardBreaks"        =>  60,
 486          );
 487  
 488  	protected function runSpanGamut($text) {
 489      #
 490      # Run span gamut tranformations.
 491      #
 492          foreach ($this->span_gamut as $method => $priority) {
 493              $text = $this->$method($text);
 494          }
 495  
 496          return $text;
 497      }
 498      
 499      
 500  	protected function doHardBreaks($text) {
 501          # Do hard breaks:
 502          return preg_replace_callback('/ {2,}\n/', 
 503              array($this, '_doHardBreaks_callback'), $text);
 504      }
 505  	protected function _doHardBreaks_callback($matches) {
 506          return $this->hashPart("<br$this->empty_element_suffix\n");
 507      }
 508  
 509  
 510  	protected function doAnchors($text) {
 511      #
 512      # Turn Markdown link shortcuts into XHTML <a> tags.
 513      #
 514          if ($this->in_anchor) return $text;
 515          $this->in_anchor = true;
 516          
 517          #
 518          # First, handle reference-style links: [link text] [id]
 519          #
 520          $text = preg_replace_callback('{
 521              (                    # wrap whole match in $1
 522                \[
 523                  ('.$this->nested_brackets_re.')    # link text = $2
 524                \]
 525  
 526                [ ]?                # one optional space
 527                (?:\n[ ]*)?        # one optional newline followed by spaces
 528  
 529                \[
 530                  (.*?)        # id = $3
 531                \]
 532              )
 533              }xs',
 534              array($this, '_doAnchors_reference_callback'), $text);
 535  
 536          #
 537          # Next, inline-style links: [link text](url "optional title")
 538          #
 539          $text = preg_replace_callback('{
 540              (                # wrap whole match in $1
 541                \[
 542                  ('.$this->nested_brackets_re.')    # link text = $2
 543                \]
 544                \(            # literal paren
 545                  [ \n]*
 546                  (?:
 547                      <(.+?)>    # href = $3
 548                  |
 549                      ('.$this->nested_url_parenthesis_re.')    # href = $4
 550                  )
 551                  [ \n]*
 552                  (            # $5
 553                    ([\'"])    # quote char = $6
 554                    (.*?)        # Title = $7
 555                    \6        # matching quote
 556                    [ \n]*    # ignore any spaces/tabs between closing quote and )
 557                  )?            # title is optional
 558                \)
 559              )
 560              }xs',
 561              array($this, '_doAnchors_inline_callback'), $text);
 562  
 563          #
 564          # Last, handle reference-style shortcuts: [link text]
 565          # These must come last in case you've also got [link text][1]
 566          # or [link text](/foo)
 567          #
 568          $text = preg_replace_callback('{
 569              (                    # wrap whole match in $1
 570                \[
 571                  ([^\[\]]+)        # link text = $2; can\'t contain [ or ]
 572                \]
 573              )
 574              }xs',
 575              array($this, '_doAnchors_reference_callback'), $text);
 576  
 577          $this->in_anchor = false;
 578          return $text;
 579      }
 580  	protected function _doAnchors_reference_callback($matches) {
 581          $whole_match =  $matches[1];
 582          $link_text   =  $matches[2];
 583          $link_id     =& $matches[3];
 584  
 585          if ($link_id == "") {
 586              # for shortcut links like [this][] or [this].
 587              $link_id = $link_text;
 588          }
 589          
 590          # lower-case and turn embedded newlines into spaces
 591          $link_id = strtolower($link_id);
 592          $link_id = preg_replace('{[ ]?\n}', ' ', $link_id);
 593  
 594          if (isset($this->urls[$link_id])) {
 595              $url = $this->urls[$link_id];
 596              $url = $this->encodeAttribute($url);
 597              
 598              $result = "<a href=\"$url\"";
 599              if ( isset( $this->titles[$link_id] ) ) {
 600                  $title = $this->titles[$link_id];
 601                  $title = $this->encodeAttribute($title);
 602                  $result .=  " title=\"$title\"";
 603              }
 604          
 605              $link_text = $this->runSpanGamut($link_text);
 606              $result .= ">$link_text</a>";
 607              $result = $this->hashPart($result);
 608          }
 609          else {
 610              $result = $whole_match;
 611          }
 612          return $result;
 613      }
 614  	protected function _doAnchors_inline_callback($matches) {
 615          $whole_match    =  $matches[1];
 616          $link_text        =  $this->runSpanGamut($matches[2]);
 617          $url            =  $matches[3] == '' ? $matches[4] : $matches[3];
 618          $title            =& $matches[7];
 619  
 620          // if the URL was of the form <s p a c e s> it got caught by the HTML
 621          // tag parser and hashed. Need to reverse the process before using the URL.
 622          $unhashed = $this->unhash($url);
 623          if ($unhashed != $url)
 624              $url = preg_replace('/^<(.*)>$/', '\1', $unhashed);
 625  
 626          $url = $this->encodeAttribute($url);
 627  
 628          $result = "<a href=\"$url\"";
 629          if (isset($title)) {
 630              $title = $this->encodeAttribute($title);
 631              $result .=  " title=\"$title\"";
 632          }
 633          
 634          $link_text = $this->runSpanGamut($link_text);
 635          $result .= ">$link_text</a>";
 636  
 637          return $this->hashPart($result);
 638      }
 639  
 640  
 641  	protected function doImages($text) {
 642      #
 643      # Turn Markdown image shortcuts into <img> tags.
 644      #
 645          #
 646          # First, handle reference-style labeled images: ![alt text][id]
 647          #
 648          $text = preg_replace_callback('{
 649              (                # wrap whole match in $1
 650                !\[
 651                  ('.$this->nested_brackets_re.')        # alt text = $2
 652                \]
 653  
 654                [ ]?                # one optional space
 655                (?:\n[ ]*)?        # one optional newline followed by spaces
 656  
 657                \[
 658                  (.*?)        # id = $3
 659                \]
 660  
 661              )
 662              }xs', 
 663              array($this, '_doImages_reference_callback'), $text);
 664  
 665          #
 666          # Next, handle inline images:  ![alt text](url "optional title")
 667          # Don't forget: encode * and _
 668          #
 669          $text = preg_replace_callback('{
 670              (                # wrap whole match in $1
 671                !\[
 672                  ('.$this->nested_brackets_re.')        # alt text = $2
 673                \]
 674                \s?            # One optional whitespace character
 675                \(            # literal paren
 676                  [ \n]*
 677                  (?:
 678                      <(\S*)>    # src url = $3
 679                  |
 680                      ('.$this->nested_url_parenthesis_re.')    # src url = $4
 681                  )
 682                  [ \n]*
 683                  (            # $5
 684                    ([\'"])    # quote char = $6
 685                    (.*?)        # title = $7
 686                    \6        # matching quote
 687                    [ \n]*
 688                  )?            # title is optional
 689                \)
 690              )
 691              }xs',
 692              array($this, '_doImages_inline_callback'), $text);
 693  
 694          return $text;
 695      }
 696  	protected function _doImages_reference_callback($matches) {
 697          $whole_match = $matches[1];
 698          $alt_text    = $matches[2];
 699          $link_id     = strtolower($matches[3]);
 700  
 701          if ($link_id == "") {
 702              $link_id = strtolower($alt_text); # for shortcut links like ![this][].
 703          }
 704  
 705          $alt_text = $this->encodeAttribute($alt_text);
 706          if (isset($this->urls[$link_id])) {
 707              $url = $this->encodeAttribute($this->urls[$link_id]);
 708              $result = "<img src=\"$url\" alt=\"$alt_text\"";
 709              if (isset($this->titles[$link_id])) {
 710                  $title = $this->titles[$link_id];
 711                  $title = $this->encodeAttribute($title);
 712                  $result .=  " title=\"$title\"";
 713              }
 714              $result .= $this->empty_element_suffix;
 715              $result = $this->hashPart($result);
 716          }
 717          else {
 718              # If there's no such link ID, leave intact:
 719              $result = $whole_match;
 720          }
 721  
 722          return $result;
 723      }
 724  	protected function _doImages_inline_callback($matches) {
 725          $whole_match    = $matches[1];
 726          $alt_text        = $matches[2];
 727          $url            = $matches[3] == '' ? $matches[4] : $matches[3];
 728          $title            =& $matches[7];
 729  
 730          $alt_text = $this->encodeAttribute($alt_text);
 731          $url = $this->encodeAttribute($url);
 732          $result = "<img src=\"$url\" alt=\"$alt_text\"";
 733          if (isset($title)) {
 734              $title = $this->encodeAttribute($title);
 735              $result .=  " title=\"$title\""; # $title already quoted
 736          }
 737          $result .= $this->empty_element_suffix;
 738  
 739          return $this->hashPart($result);
 740      }
 741  
 742  
 743  	protected function doHeaders($text) {
 744          # Setext-style headers:
 745          #      Header 1
 746          #      ========
 747          #  
 748          #      Header 2
 749          #      --------
 750          #
 751          $text = preg_replace_callback('{ ^(.+?)[ ]*\n(=+|-+)[ ]*\n+ }mx',
 752              array($this, '_doHeaders_callback_setext'), $text);
 753  
 754          # atx-style headers:
 755          #    # Header 1
 756          #    ## Header 2
 757          #    ## Header 2 with closing hashes ##
 758          #    ...
 759          #    ###### Header 6
 760          #
 761          $text = preg_replace_callback('{
 762                  ^(\#{1,6})    # $1 = string of #\'s
 763                  [ ]*
 764                  (.+?)        # $2 = Header text
 765                  [ ]*
 766                  \#*            # optional closing #\'s (not counted)
 767                  \n+
 768              }xm',
 769              array($this, '_doHeaders_callback_atx'), $text);
 770  
 771          return $text;
 772      }
 773  	protected function _doHeaders_callback_setext($matches) {
 774          # Terrible hack to check we haven't found an empty list item.
 775          if ($matches[2] == '-' && preg_match('{^-(?: |$)}', $matches[1]))
 776              return $matches[0];
 777          
 778          $level = $matches[2]{0} == '=' ? 1 : 2;
 779          $block = "<h$level>".$this->runSpanGamut($matches[1])."</h$level>";
 780          return "\n" . $this->hashBlock($block) . "\n\n";
 781      }
 782  	protected function _doHeaders_callback_atx($matches) {
 783          $level = strlen($matches[1]);
 784          $block = "<h$level>".$this->runSpanGamut($matches[2])."</h$level>";
 785          return "\n" . $this->hashBlock($block) . "\n\n";
 786      }
 787  
 788  
 789  	protected function doLists($text) {
 790      #
 791      # Form HTML ordered (numbered) and unordered (bulleted) lists.
 792      #
 793          $less_than_tab = $this->tab_width - 1;
 794  
 795          # Re-usable patterns to match list item bullets and number markers:
 796          $marker_ul_re  = '[*+-]';
 797          $marker_ol_re  = '\d+[\.]';
 798  
 799          $markers_relist = array(
 800              $marker_ul_re => $marker_ol_re,
 801              $marker_ol_re => $marker_ul_re,
 802              );
 803  
 804          foreach ($markers_relist as $marker_re => $other_marker_re) {
 805              # Re-usable pattern to match any entirel ul or ol list:
 806              $whole_list_re = '
 807                  (                                # $1 = whole list
 808                    (                                # $2
 809                      ([ ]{0,'.$less_than_tab.'})    # $3 = number of spaces
 810                      ('.$marker_re.')            # $4 = first list item marker
 811                      [ ]+
 812                    )
 813                    (?s:.+?)
 814                    (                                # $5
 815                        \z
 816                      |
 817                        \n{2,}
 818                        (?=\S)
 819                        (?!                        # Negative lookahead for another list item marker
 820                          [ ]*
 821                          '.$marker_re.'[ ]+
 822                        )
 823                      |
 824                        (?=                        # Lookahead for another kind of list
 825                          \n
 826                          \3                        # Must have the same indentation
 827                          '.$other_marker_re.'[ ]+
 828                        )
 829                    )
 830                  )
 831              '; // mx
 832              
 833              # We use a different prefix before nested lists than top-level lists.
 834              # See extended comment in _ProcessListItems().
 835          
 836              if ($this->list_level) {
 837                  $text = preg_replace_callback('{
 838                          ^
 839                          '.$whole_list_re.'
 840                      }mx',
 841                      array($this, '_doLists_callback'), $text);
 842              }
 843              else {
 844                  $text = preg_replace_callback('{
 845                          (?:(?<=\n)\n|\A\n?) # Must eat the newline
 846                          '.$whole_list_re.'
 847                      }mx',
 848                      array($this, '_doLists_callback'), $text);
 849              }
 850          }
 851  
 852          return $text;
 853      }
 854  	protected function _doLists_callback($matches) {
 855          # Re-usable patterns to match list item bullets and number markers:
 856          $marker_ul_re  = '[*+-]';
 857          $marker_ol_re  = '\d+[\.]';
 858          $marker_any_re = "(?:$marker_ul_re|$marker_ol_re)";
 859          
 860          $list = $matches[1];
 861          $list_type = preg_match("/$marker_ul_re/", $matches[4]) ? "ul" : "ol";
 862          
 863          $marker_any_re = ( $list_type == "ul" ? $marker_ul_re : $marker_ol_re );
 864          
 865          $list .= "\n";
 866          $result = $this->processListItems($list, $marker_any_re);
 867          
 868          $result = $this->hashBlock("<$list_type>\n" . $result . "</$list_type>");
 869          return "\n". $result ."\n\n";
 870      }
 871  
 872      protected $list_level = 0;
 873  
 874  	protected function processListItems($list_str, $marker_any_re) {
 875      #
 876      #    Process the contents of a single ordered or unordered list, splitting it
 877      #    into individual list items.
 878      #
 879          # The $this->list_level global keeps track of when we're inside a list.
 880          # Each time we enter a list, we increment it; when we leave a list,
 881          # we decrement. If it's zero, we're not in a list anymore.
 882          #
 883          # We do this because when we're not inside a list, we want to treat
 884          # something like this:
 885          #
 886          #        I recommend upgrading to version
 887          #        8. Oops, now this line is treated
 888          #        as a sub-list.
 889          #
 890          # As a single paragraph, despite the fact that the second line starts
 891          # with a digit-period-space sequence.
 892          #
 893          # Whereas when we're inside a list (or sub-list), that line will be
 894          # treated as the start of a sub-list. What a kludge, huh? This is
 895          # an aspect of Markdown's syntax that's hard to parse perfectly
 896          # without resorting to mind-reading. Perhaps the solution is to
 897          # change the syntax rules such that sub-lists must start with a
 898          # starting cardinal number; e.g. "1." or "a.".
 899          
 900          $this->list_level++;
 901  
 902          # trim trailing blank lines:
 903          $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
 904  
 905          $list_str = preg_replace_callback('{
 906              (\n)?                            # leading line = $1
 907              (^[ ]*)                            # leading whitespace = $2
 908              ('.$marker_any_re.'                # list marker and space = $3
 909                  (?:[ ]+|(?=\n))    # space only required if item is not empty
 910              )
 911              ((?s:.*?))                        # list item text   = $4
 912              (?:(\n+(?=\n))|\n)                # tailing blank line = $5
 913              (?= \n* (\z | \2 ('.$marker_any_re.') (?:[ ]+|(?=\n))))
 914              }xm',
 915              array($this, '_processListItems_callback'), $list_str);
 916  
 917          $this->list_level--;
 918          return $list_str;
 919      }
 920  	protected function _processListItems_callback($matches) {
 921          $item = $matches[4];
 922          $leading_line =& $matches[1];
 923          $leading_space =& $matches[2];
 924          $marker_space = $matches[3];
 925          $tailing_blank_line =& $matches[5];
 926  
 927          if ($leading_line || $tailing_blank_line || 
 928              preg_match('/\n{2,}/', $item))
 929          {
 930              # Replace marker with the appropriate whitespace indentation
 931              $item = $leading_space . str_repeat(' ', strlen($marker_space)) . $item;
 932              $item = $this->runBlockGamut($this->outdent($item)."\n");
 933          }
 934          else {
 935              # Recursion for sub-lists:
 936              $item = $this->doLists($this->outdent($item));
 937              $item = preg_replace('/\n+$/', '', $item);
 938              $item = $this->runSpanGamut($item);
 939          }
 940  
 941          return "<li>" . $item . "</li>\n";
 942      }
 943  
 944  
 945  	protected function doCodeBlocks($text) {
 946      #
 947      #    Process Markdown `<pre><code>` blocks.
 948      #
 949          $text = preg_replace_callback('{
 950                  (?:\n\n|\A\n?)
 951                  (                # $1 = the code block -- one or more lines, starting with a space/tab
 952                    (?>
 953                      [ ]{'.$this->tab_width.'}  # Lines must start with a tab or a tab-width of spaces
 954                      .*\n+
 955                    )+
 956                  )
 957                  ((?=^[ ]{0,'.$this->tab_width.'}\S)|\Z)    # Lookahead for non-space at line-start, or end of doc
 958              }xm',
 959              array($this, '_doCodeBlocks_callback'), $text);
 960  
 961          return $text;
 962      }
 963  	protected function _doCodeBlocks_callback($matches) {
 964          $codeblock = $matches[1];
 965  
 966          $codeblock = $this->outdent($codeblock);
 967          $codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES);
 968  
 969          # trim leading newlines and trailing newlines
 970          $codeblock = preg_replace('/\A\n+|\n+\z/', '', $codeblock);
 971  
 972          $codeblock = "<pre><code>$codeblock\n</code></pre>";
 973          return "\n\n".$this->hashBlock($codeblock)."\n\n";
 974      }
 975  
 976  
 977  	protected function makeCodeSpan($code) {
 978      #
 979      # Create a code span markup for $code. Called from handleSpanToken.
 980      #
 981          $code = htmlspecialchars(trim($code), ENT_NOQUOTES);
 982          return $this->hashPart("<code>$code</code>");
 983      }
 984  
 985  
 986      protected $em_relist = array(
 987          ''  => '(?:(?<!\*)\*(?!\*)|(?<!_)_(?!_))(?![\.,:;]?\s)',
 988          '*' => '(?<![\s*])\*(?!\*)',
 989          '_' => '(?<![\s_])_(?!_)',
 990          );
 991      protected $strong_relist = array(
 992          ''   => '(?:(?<!\*)\*\*(?!\*)|(?<!_)__(?!_))(?![\.,:;]?\s)',
 993          '**' => '(?<![\s*])\*\*(?!\*)',
 994          '__' => '(?<![\s_])__(?!_)',
 995          );
 996      protected $em_strong_relist = array(
 997          ''    => '(?:(?<!\*)\*\*\*(?!\*)|(?<!_)___(?!_))(?![\.,:;]?\s)',
 998          '***' => '(?<![\s*])\*\*\*(?!\*)',
 999          '___' => '(?<![\s_])___(?!_)',
1000          );
1001      protected $em_strong_prepared_relist;
1002      
1003  	protected function prepareItalicsAndBold() {
1004      #
1005      # Prepare regular expressions for searching emphasis tokens in any
1006      # context.
1007      #
1008          foreach ($this->em_relist as $em => $em_re) {
1009              foreach ($this->strong_relist as $strong => $strong_re) {
1010                  # Construct list of allowed token expressions.
1011                  $token_relist = array();
1012                  if (isset($this->em_strong_relist["$em$strong"])) {
1013                      $token_relist[] = $this->em_strong_relist["$em$strong"];
1014                  }
1015                  $token_relist[] = $em_re;
1016                  $token_relist[] = $strong_re;
1017                  
1018                  # Construct master expression from list.
1019                  $token_re = '{('. implode('|', $token_relist) .')}';
1020                  $this->em_strong_prepared_relist["$em$strong"] = $token_re;
1021              }
1022          }
1023      }
1024      
1025  	protected function doItalicsAndBold($text) {
1026          $token_stack = array('');
1027          $text_stack = array('');
1028          $em = '';
1029          $strong = '';
1030          $tree_char_em = false;
1031          
1032          while (1) {
1033              #
1034              # Get prepared regular expression for seraching emphasis tokens
1035              # in current context.
1036              #
1037              $token_re = $this->em_strong_prepared_relist["$em$strong"];
1038              
1039              #
1040              # Each loop iteration search for the next emphasis token. 
1041              # Each token is then passed to handleSpanToken.
1042              #
1043              $parts = preg_split($token_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE);
1044              $text_stack[0] .= $parts[0];
1045              $token =& $parts[1];
1046              $text =& $parts[2];
1047              
1048              if (empty($token)) {
1049                  # Reached end of text span: empty stack without emitting.
1050                  # any more emphasis.
1051                  while ($token_stack[0]) {
1052                      $text_stack[1] .= array_shift($token_stack);
1053                      $text_stack[0] .= array_shift($text_stack);
1054                  }
1055                  break;
1056              }
1057              
1058              $token_len = strlen($token);
1059              if ($tree_char_em) {
1060                  # Reached closing marker while inside a three-char emphasis.
1061                  if ($token_len == 3) {
1062                      # Three-char closing marker, close em and strong.
1063                      array_shift($token_stack);
1064                      $span = array_shift($text_stack);
1065                      $span = $this->runSpanGamut($span);
1066                      $span = "<strong><em>$span</em></strong>";
1067                      $text_stack[0] .= $this->hashPart($span);
1068                      $em = '';
1069                      $strong = '';
1070                  } else {
1071                      # Other closing marker: close one em or strong and
1072                      # change current token state to match the other
1073                      $token_stack[0] = str_repeat($token{0}, 3-$token_len);
1074                      $tag = $token_len == 2 ? "strong" : "em";
1075                      $span = $text_stack[0];
1076                      $span = $this->runSpanGamut($span);
1077                      $span = "<$tag>$span</$tag>";
1078                      $text_stack[0] = $this->hashPart($span);
1079                      $$tag = ''; # $$tag stands for $em or $strong
1080                  }
1081                  $tree_char_em = false;
1082              } else if ($token_len == 3) {
1083                  if ($em) {
1084                      # Reached closing marker for both em and strong.
1085                      # Closing strong marker:
1086                      for ($i = 0; $i < 2; ++$i) {
1087                          $shifted_token = array_shift($token_stack);
1088                          $tag = strlen($shifted_token) == 2 ? "strong" : "em";
1089                          $span = array_shift($text_stack);
1090                          $span = $this->runSpanGamut($span);
1091                          $span = "<$tag>$span</$tag>";
1092                          $text_stack[0] .= $this->hashPart($span);
1093                          $$tag = ''; # $$tag stands for $em or $strong
1094                      }
1095                  } else {
1096                      # Reached opening three-char emphasis marker. Push on token 
1097                      # stack; will be handled by the special condition above.
1098                      $em = $token{0};
1099                      $strong = "$em$em";
1100                      array_unshift($token_stack, $token);
1101                      array_unshift($text_stack, '');
1102                      $tree_char_em = true;
1103                  }
1104              } else if ($token_len == 2) {
1105                  if ($strong) {
1106                      # Unwind any dangling emphasis marker:
1107                      if (strlen($token_stack[0]) == 1) {
1108                          $text_stack[1] .= array_shift($token_stack);
1109                          $text_stack[0] .= array_shift($text_stack);
1110                      }
1111                      # Closing strong marker:
1112                      array_shift($token_stack);
1113                      $span = array_shift($text_stack);
1114                      $span = $this->runSpanGamut($span);
1115                      $span = "<strong>$span</strong>";
1116                      $text_stack[0] .= $this->hashPart($span);
1117                      $strong = '';
1118                  } else {
1119                      array_unshift($token_stack, $token);
1120                      array_unshift($text_stack, '');
1121                      $strong = $token;
1122                  }
1123              } else {
1124                  # Here $token_len == 1
1125                  if ($em) {
1126                      if (strlen($token_stack[0]) == 1) {
1127                          # Closing emphasis marker:
1128                          array_shift($token_stack);
1129                          $span = array_shift($text_stack);
1130                          $span = $this->runSpanGamut($span);
1131                          $span = "<em>$span</em>";
1132                          $text_stack[0] .= $this->hashPart($span);
1133                          $em = '';
1134                      } else {
1135                          $text_stack[0] .= $token;
1136                      }
1137                  } else {
1138                      array_unshift($token_stack, $token);
1139                      array_unshift($text_stack, '');
1140                      $em = $token;
1141                  }
1142              }
1143          }
1144          return $text_stack[0];
1145      }
1146  
1147  
1148  	protected function doBlockQuotes($text) {
1149          $text = preg_replace_callback('/
1150                (                                # Wrap whole match in $1
1151                  (?>
1152                    ^[ ]*>[ ]?            # ">" at the start of a line
1153                      .+\n                    # rest of the first line
1154                    (.+\n)*                    # subsequent consecutive lines
1155                    \n*                        # blanks
1156                  )+
1157                )
1158              /xm',
1159              array($this, '_doBlockQuotes_callback'), $text);
1160  
1161          return $text;
1162      }
1163  	protected function _doBlockQuotes_callback($matches) {
1164          $bq = $matches[1];
1165          # trim one level of quoting - trim whitespace-only lines
1166          $bq = preg_replace('/^[ ]*>[ ]?|^[ ]+$/m', '', $bq);
1167          $bq = $this->runBlockGamut($bq);        # recurse
1168  
1169          $bq = preg_replace('/^/m', "  ", $bq);
1170          # These leading spaces cause problem with <pre> content, 
1171          # so we need to fix that:
1172          $bq = preg_replace_callback('{(\s*<pre>.+?</pre>)}sx', 
1173              array($this, '_doBlockQuotes_callback2'), $bq);
1174  
1175          return "\n". $this->hashBlock("<blockquote>\n$bq\n</blockquote>")."\n\n";
1176      }
1177  	protected function _doBlockQuotes_callback2($matches) {
1178          $pre = $matches[1];
1179          $pre = preg_replace('/^  /m', '', $pre);
1180          return $pre;
1181      }
1182  
1183  
1184  	protected function formParagraphs($text) {
1185      #
1186      #    Params:
1187      #        $text - string to process with html <p> tags
1188      #
1189          # Strip leading and trailing lines:
1190          $text = preg_replace('/\A\n+|\n+\z/', '', $text);
1191  
1192          $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
1193  
1194          #
1195          # Wrap <p> tags and unhashify HTML blocks
1196          #
1197          foreach ($grafs as $key => $value) {
1198              if (!preg_match('/^B\x1A[0-9]+B$/', $value)) {
1199                  # Is a paragraph.
1200                  $value = $this->runSpanGamut($value);
1201                  $value = preg_replace('/^([ ]*)/', "<p>", $value);
1202                  $value .= "</p>";
1203                  $grafs[$key] = $this->unhash($value);
1204              }
1205              else {
1206                  # Is a block.
1207                  # Modify elements of @grafs in-place...
1208                  $graf = $value;
1209                  $block = $this->html_hashes[$graf];
1210                  $graf = $block;
1211  //                if (preg_match('{
1212  //                    \A
1213  //                    (                            # $1 = <div> tag
1214  //                      <div  \s+
1215  //                      [^>]*
1216  //                      \b
1217  //                      markdown\s*=\s*  ([\'"])    #    $2 = attr quote char
1218  //                      1
1219  //                      \2
1220  //                      [^>]*
1221  //                      >
1222  //                    )
1223  //                    (                            # $3 = contents
1224  //                    .*
1225  //                    )
1226  //                    (</div>)                    # $4 = closing tag
1227  //                    \z
1228  //                    }xs', $block, $matches))
1229  //                {
1230  //                    list(, $div_open, , $div_content, $div_close) = $matches;
1231  //
1232  //                    # We can't call Markdown(), because that resets the hash;
1233  //                    # that initialization code should be pulled into its own sub, though.
1234  //                    $div_content = $this->hashHTMLBlocks($div_content);
1235  //                    
1236  //                    # Run document gamut methods on the content.
1237  //                    foreach ($this->document_gamut as $method => $priority) {
1238  //                        $div_content = $this->$method($div_content);
1239  //                    }
1240  //
1241  //                    $div_open = preg_replace(
1242  //                        '{\smarkdown\s*=\s*([\'"]).+?\1}', '', $div_open);
1243  //
1244  //                    $graf = $div_open . "\n" . $div_content . "\n" . $div_close;
1245  //                }
1246                  $grafs[$key] = $graf;
1247              }
1248          }
1249  
1250          return implode("\n\n", $grafs);
1251      }
1252  
1253  
1254  	protected function encodeAttribute($text) {
1255      #
1256      # Encode text for a double-quoted HTML attribute. This function
1257      # is *not* suitable for attributes enclosed in single quotes.
1258      #
1259          $text = $this->encodeAmpsAndAngles($text);
1260          $text = str_replace('"', '&quot;', $text);
1261          return $text;
1262      }
1263      
1264      
1265  	protected function encodeAmpsAndAngles($text) {
1266      #
1267      # Smart processing for ampersands and angle brackets that need to 
1268      # be encoded. Valid character entities are left alone unless the
1269      # no-entities mode is set.
1270      #
1271          if ($this->no_entities) {
1272              $text = str_replace('&', '&amp;', $text);
1273          } else {
1274              # Ampersand-encoding based entirely on Nat Irons's Amputator
1275              # MT plugin: <http://bumppo.net/projects/amputator/>
1276              $text = preg_replace('/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/', 
1277                                  '&amp;', $text);
1278          }
1279          # Encode remaining <'s
1280          $text = str_replace('<', '&lt;', $text);
1281  
1282          return $text;
1283      }
1284  
1285  
1286  	protected function doAutoLinks($text) {
1287          $text = preg_replace_callback('{<((https?|ftp|dict):[^\'">\s]+)>}i', 
1288              array($this, '_doAutoLinks_url_callback'), $text);
1289  
1290          # Email addresses: <[email protected]>
1291          $text = preg_replace_callback('{
1292              <
1293              (?:mailto:)?
1294              (
1295                  (?:
1296                      [-!#$%&\'*+/=?^_`.{|}~\w\x80-\xFF]+
1297                  |
1298                      ".*?"
1299                  )
1300                  \@
1301                  (?:
1302                      [-a-z0-9\x80-\xFF]+(\.[-a-z0-9\x80-\xFF]+)*\.[a-z]+
1303                  |
1304                      \[[\d.a-fA-F:]+\]    # IPv4 & IPv6
1305                  )
1306              )
1307              >
1308              }xi',
1309              array($this, '_doAutoLinks_email_callback'), $text);
1310          $text = preg_replace_callback('{<(tel:([^\'">\s]+))>}i',array($this, '_doAutoLinks_tel_callback'), $text);
1311  
1312          return $text;
1313      }
1314  	protected function _doAutoLinks_tel_callback($matches) {
1315          $url = $this->encodeAttribute($matches[1]);
1316          $tel = $this->encodeAttribute($matches[2]);
1317          $link = "<a href=\"$url\">$tel</a>";
1318          return $this->hashPart($link);
1319      }
1320  	protected function _doAutoLinks_url_callback($matches) {
1321          $url = $this->encodeAttribute($matches[1]);
1322          $link = "<a href=\"$url\">$url</a>";
1323          return $this->hashPart($link);
1324      }
1325  	protected function _doAutoLinks_email_callback($matches) {
1326          $address = $matches[1];
1327          $link = $this->encodeEmailAddress($address);
1328          return $this->hashPart($link);
1329      }
1330  
1331  
1332  	protected function encodeEmailAddress($addr) {
1333      #
1334      #    Input: an email address, e.g. "[email protected]"
1335      #
1336      #    Output: the email address as a mailto link, with each character
1337      #        of the address encoded as either a decimal or hex entity, in
1338      #        the hopes of foiling most address harvesting spam bots. E.g.:
1339      #
1340      #      <p><a href="&#109;&#x61;&#105;&#x6c;&#116;&#x6f;&#58;&#x66;o&#111;
1341      #        &#x40;&#101;&#x78;&#97;&#x6d;&#112;&#x6c;&#101;&#46;&#x63;&#111;
1342      #        &#x6d;">&#x66;o&#111;&#x40;&#101;&#x78;&#97;&#x6d;&#112;&#x6c;
1343      #        &#101;&#46;&#x63;&#111;&#x6d;</a></p>
1344      #
1345      #    Based by a filter by Matthew Wickline, posted to BBEdit-Talk.
1346      #   With some optimizations by Milian Wolff.
1347      #
1348          $addr = "mailto:" . $addr;
1349          $chars = preg_split('/(?<!^)(?!$)/', $addr);
1350          $seed = (int)abs(crc32($addr) / strlen($addr)); # Deterministic seed.
1351          
1352          foreach ($chars as $key => $char) {
1353              $ord = ord($char);
1354              # Ignore non-ascii chars.
1355              if ($ord < 128) {
1356                  $r = ($seed * (1 + $key)) % 100; # Pseudo-random function.
1357                  # roughly 10% raw, 45% hex, 45% dec
1358                  # '@' *must* be encoded. I insist.
1359                  # '"' has to be encoded inside the attribute
1360                  if ($r > 90 && $char != '@' && $char != '"') /* do nothing */;
1361                  else if ($r < 45) $chars[$key] = '&#x'.dechex($ord).';';
1362                  else              $chars[$key] = '&#'.$ord.';';
1363              }
1364          }
1365          
1366          $addr = implode('', $chars);
1367          $text = implode('', array_slice($chars, 7)); # text without `mailto:`
1368          $addr = "<a href=\"$addr\">$text</a>";
1369  
1370          return $addr;
1371      }
1372  
1373  
1374  	protected function parseSpan($str) {
1375      #
1376      # Take the string $str and parse it into tokens, hashing embeded HTML,
1377      # escaped characters and handling code spans.
1378      #
1379          $output = '';
1380          
1381          $span_re = '{
1382                  (
1383                      \\\\'.$this->escape_chars_re.'
1384                  |
1385                      (?<![`\\\\])
1386                      `+                        # code span marker
1387              '.( $this->no_markup ? '' : '
1388                  |
1389                      <!--    .*?     -->        # comment
1390                  |
1391                      <\?.*?\?> | <%.*?%>        # processing instruction
1392                  |
1393                      <[!$]?[-a-zA-Z0-9:_]+    # regular tags
1394                      (?>
1395                          \s
1396                          (?>[^"\'>]+|"[^"]*"|\'[^\']*\')*
1397                      )?
1398                      >
1399                  |
1400                      <[-a-zA-Z0-9:_]+\s*/> # xml-style empty tag
1401                  |
1402                      </[-a-zA-Z0-9:_]+\s*> # closing tag
1403              ').'
1404                  )
1405                  }xs';
1406  
1407          while (1) {
1408              #
1409              # Each loop iteration seach for either the next tag, the next 
1410              # openning code span marker, or the next escaped character. 
1411              # Each token is then passed to handleSpanToken.
1412              #
1413              $parts = preg_split($span_re, $str, 2, PREG_SPLIT_DELIM_CAPTURE);
1414              
1415              # Create token from text preceding tag.
1416              if ($parts[0] != "") {
1417                  $output .= $parts[0];
1418              }
1419              
1420              # Check if we reach the end.
1421              if (isset($parts[1])) {
1422                  $output .= $this->handleSpanToken($parts[1], $parts[2]);
1423                  $str = $parts[2];
1424              }
1425              else {
1426                  break;
1427              }
1428          }
1429          
1430          return $output;
1431      }
1432      
1433      
1434  	protected function handleSpanToken($token, &$str) {
1435      #
1436      # Handle $token provided by parseSpan by determining its nature and 
1437      # returning the corresponding value that should replace it.
1438      #
1439          switch ($token{0}) {
1440              case "\\":
1441                  return $this->hashPart("&#". ord($token{1}). ";");
1442              case "`":
1443                  # Search for end marker in remaining text.
1444                  if (preg_match('/^(.*?[^`])'.preg_quote($token).'(?!`)(.*)$/sm', 
1445                      $str, $matches))
1446                  {
1447                      $str = $matches[2];
1448                      $codespan = $this->makeCodeSpan($matches[1]);
1449                      return $this->hashPart($codespan);
1450                  }
1451                  return $token; // return as text since no ending marker found.
1452              default:
1453                  return $this->hashPart($token);
1454          }
1455      }
1456  
1457  
1458  	protected function outdent($text) {
1459      #
1460      # Remove one level of line-leading tabs or spaces
1461      #
1462          return preg_replace('/^(\t|[ ]{1,'.$this->tab_width.'})/m', '', $text);
1463      }
1464  
1465  
1466      # String length function for detab. `_initDetab` will create a function to 
1467      # hanlde UTF-8 if the default function does not exist.
1468      protected $utf8_strlen = 'mb_strlen';
1469      
1470  	protected function detab($text) {
1471      #
1472      # Replace tabs with the appropriate amount of space.
1473      #
1474          # For each line we separate the line in blocks delemited by
1475          # tab characters. Then we reconstruct every line by adding the 
1476          # appropriate number of space between each blocks.
1477          
1478          $text = preg_replace_callback('/^.*\t.*$/m',
1479              array($this, '_detab_callback'), $text);
1480  
1481          return $text;
1482      }
1483  	protected function _detab_callback($matches) {
1484          $line = $matches[0];
1485          $strlen = $this->utf8_strlen; # strlen function for UTF-8.
1486          
1487          # Split in blocks.
1488          $blocks = explode("\t", $line);
1489          # Add each blocks to the line.
1490          $line = $blocks[0];
1491          unset($blocks[0]); # Do not add first block twice.
1492          foreach ($blocks as $block) {
1493              # Calculate amount of space, insert spaces, insert block.
1494              $amount = $this->tab_width - 
1495                  $strlen($line, 'UTF-8') % $this->tab_width;
1496              $line .= str_repeat(" ", $amount) . $block;
1497          }
1498          return $line;
1499      }
1500  	protected function _initDetab() {
1501      #
1502      # Check for the availability of the function in the `utf8_strlen` property
1503      # (initially `mb_strlen`). If the function is not available, create a 
1504      # function that will loosely count the number of UTF-8 characters with a
1505      # regular expression.
1506      #
1507          if (function_exists($this->utf8_strlen)) return;
1508          $this->utf8_strlen = create_function('$text', 'return preg_match_all(
1509              "/[\\\\x00-\\\\xBF]|[\\\\xC0-\\\\xFF][\\\\x80-\\\\xBF]*/", 
1510              $text, $m);');
1511      }
1512  
1513  
1514  	protected function unhash($text) {
1515      #
1516      # Swap back in all the tags hashed by _HashHTMLBlocks.
1517      #
1518          return preg_replace_callback('/(.)\x1A[0-9]+\1/', 
1519              array($this, '_unhash_callback'), $text);
1520      }
1521  	protected function _unhash_callback($matches) {
1522          return $this->html_hashes[$matches[0]];
1523      }
1524  
1525  }
1526  
1527  
1528  #
1529  # Temporary Markdown Extra Parser Implementation Class
1530  #
1531  # NOTE: DON'T USE THIS CLASS
1532  # Currently the implementation of of Extra resides here in this temporary class.
1533  # This makes it easier to propagate the changes between the three different
1534  # packaging styles of PHP Markdown. When this issue is resolved, this
1535  # MarkdownExtra_TmpImpl class here will disappear and \Michelf\MarkdownExtra
1536  # will contain the code. So please use \Michelf\MarkdownExtra and ignore this
1537  # one.
1538  #
1539  
1540  abstract class _MarkdownExtra_TmpImpl extends \Michelf\Markdown {
1541  
1542      ### Configuration Variables ###
1543  
1544      # Prefix for footnote ids.
1545      public $fn_id_prefix = "";
1546      
1547      # Optional title attribute for footnote links and backlinks.
1548      public $fn_link_title = "";
1549      public $fn_backlink_title = "";
1550      
1551      # Optional class attribute for footnote links and backlinks.
1552      public $fn_link_class = "footnote-ref";
1553      public $fn_backlink_class = "footnote-backref";
1554  
1555      # Class name for table cell alignment (%% replaced left/center/right)
1556      # For instance: 'go-%%' becomes 'go-left' or 'go-right' or 'go-center'
1557      # If empty, the align attribute is used instead of a class name.
1558      public $table_align_class_tmpl = '';
1559  
1560      # Optional class prefix for fenced code block.
1561      public $code_class_prefix = "";
1562      # Class attribute for code blocks goes on the `code` tag;
1563      # setting this to true will put attributes on the `pre` tag instead.
1564      public $code_attr_on_pre = false;
1565      
1566      # Predefined abbreviations.
1567      public $predef_abbr = array();
1568  
1569  
1570      ### Parser Implementation ###
1571  
1572  	public function __construct() {
1573      #
1574      # Constructor function. Initialize the parser object.
1575      #
1576          # Add extra escapable characters before parent constructor 
1577          # initialize the table.
1578          $this->escape_chars .= ':|';
1579          
1580          # Insert extra document, block, and span transformations. 
1581          # Parent constructor will do the sorting.
1582          $this->document_gamut += array(
1583              "doFencedCodeBlocks" => 5,
1584              "stripFootnotes"     => 15,
1585              "stripAbbreviations" => 25,
1586              "appendFootnotes"    => 50,
1587              );
1588          $this->block_gamut += array(
1589              "doFencedCodeBlocks" => 5,
1590              "doTables"           => 15,
1591              "doDefLists"         => 45,
1592              );
1593          $this->span_gamut += array(
1594              "doFootnotes"        => 5,
1595              "doAbbreviations"    => 70,
1596              );
1597          
1598          parent::__construct();
1599      }
1600      
1601      
1602      # Extra variables used during extra transformations.
1603      protected $footnotes = array();
1604      protected $footnotes_ordered = array();
1605      protected $footnotes_ref_count = array();
1606      protected $footnotes_numbers = array();
1607      protected $abbr_desciptions = array();
1608      protected $abbr_word_re = '';
1609      
1610      # Give the current footnote number.
1611      protected $footnote_counter = 1;
1612      
1613      
1614  	protected function setup() {
1615      #
1616      # Setting up Extra-specific variables.
1617      #
1618          parent::setup();
1619          
1620          $this->footnotes = array();
1621          $this->footnotes_ordered = array();
1622          $this->footnotes_ref_count = array();
1623          $this->footnotes_numbers = array();
1624          $this->abbr_desciptions = array();
1625          $this->abbr_word_re = '';
1626          $this->footnote_counter = 1;
1627          
1628          foreach ($this->predef_abbr as $abbr_word => $abbr_desc) {
1629              if ($this->abbr_word_re)
1630                  $this->abbr_word_re .= '|';
1631              $this->abbr_word_re .= preg_quote($abbr_word);
1632              $this->abbr_desciptions[$abbr_word] = trim($abbr_desc);
1633          }
1634      }
1635      
1636  	protected function teardown() {
1637      #
1638      # Clearing Extra-specific variables.
1639      #
1640          $this->footnotes = array();
1641          $this->footnotes_ordered = array();
1642          $this->footnotes_ref_count = array();
1643          $this->footnotes_numbers = array();
1644          $this->abbr_desciptions = array();
1645          $this->abbr_word_re = '';
1646          
1647          parent::teardown();
1648      }
1649      
1650      
1651      ### Extra Attribute Parser ###
1652  
1653      # Expression to use to catch attributes (includes the braces)
1654      protected $id_class_attr_catch_re = '\{((?:[ ]*[#.][-_:a-zA-Z0-9]+){1,})[ ]*\}';
1655      # Expression to use when parsing in a context when no capture is desired
1656      protected $id_class_attr_nocatch_re = '\{(?:[ ]*[#.][-_:a-zA-Z0-9]+){1,}[ ]*\}';
1657  
1658  	protected function doExtraAttributes($tag_name, $attr) {
1659      #
1660      # Parse attributes caught by the $this->id_class_attr_catch_re expression
1661      # and return the HTML-formatted list of attributes.
1662      #
1663      # Currently supported attributes are .class and #id.
1664      #
1665          if (empty($attr)) return "";
1666          
1667          # Split on components
1668          preg_match_all('/[#.][-_:a-zA-Z0-9]+/', $attr, $matches);
1669          $elements = $matches[0];
1670  
1671          # handle classes and ids (only first id taken into account)
1672          $classes = array();
1673          $id = false;
1674          foreach ($elements as $element) {
1675              if ($element{0} == '.') {
1676                  $classes[] = substr($element, 1);
1677              } else if ($element{0} == '#') {
1678                  if ($id === false) $id = substr($element, 1);
1679              }
1680          }
1681  
1682          # compose attributes as string
1683          $attr_str = "";
1684          if (!empty($id)) {
1685              $attr_str .= ' id="'.$id.'"';
1686          }
1687          if (!empty($classes)) {
1688              $attr_str .= ' class="'.implode(" ", $classes).'"';
1689          }
1690          return $attr_str;
1691      }
1692  
1693  
1694  	protected function stripLinkDefinitions($text) {
1695      #
1696      # Strips link definitions from text, stores the URLs and titles in
1697      # hash references.
1698      #
1699          $less_than_tab = $this->tab_width - 1;
1700  
1701          # Link defs are in the form: ^[id]: url "optional title"
1702          $text = preg_replace_callback('{
1703                              ^[ ]{0,'.$less_than_tab.'}\[(.+)\][ ]?:    # id = $1
1704                                [ ]*
1705                                \n?                # maybe *one* newline
1706                                [ ]*
1707                              (?:
1708                                <(.+?)>            # url = $2
1709                              |
1710                                (\S+?)            # url = $3
1711                              )
1712                                [ ]*
1713                                \n?                # maybe one newline
1714                                [ ]*
1715                              (?:
1716                                  (?<=\s)            # lookbehind for whitespace
1717                                  ["(]
1718                                  (.*?)            # title = $4
1719                                  [")]
1720                                  [ ]*
1721                              )?    # title is optional
1722                      (?:[ ]* '.$this->id_class_attr_catch_re.' )?  # $5 = extra id & class attr
1723                              (?:\n+|\Z)
1724              }xm',
1725              array($this, '_stripLinkDefinitions_callback'),
1726              $text);
1727          return $text;
1728      }
1729  	protected function _stripLinkDefinitions_callback($matches) {
1730          $link_id = strtolower($matches[1]);
1731          $url = $matches[2] == '' ? $matches[3] : $matches[2];
1732          $this->urls[$link_id] = $url;
1733          $this->titles[$link_id] =& $matches[4];
1734          $this->ref_attr[$link_id] = $this->doExtraAttributes("", $dummy =& $matches[5]);
1735          return ''; # String that will replace the block
1736      }
1737  
1738  
1739      ### HTML Block Parser ###
1740      
1741      # Tags that are always treated as block tags:
1742      protected $block_tags_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|form|fieldset|iframe|hr|legend|article|section|nav|aside|hgroup|header|footer|figcaption|figure';
1743                             
1744      # Tags treated as block tags only if the opening tag is alone on its line:
1745      protected $context_block_tags_re = 'script|noscript|style|ins|del|iframe|object|source|track|param|math|svg|canvas|audio|video';
1746      
1747      # Tags where markdown="1" default to span mode:
1748      protected $contain_span_tags_re = 'p|h[1-6]|li|dd|dt|td|th|legend|address';
1749      
1750      # Tags which must not have their contents modified, no matter where 
1751      # they appear:
1752      protected $clean_tags_re = 'script|style|math|svg';
1753      
1754      # Tags that do not need to be closed.
1755      protected $auto_close_tags_re = 'hr|img|param|source|track';
1756      
1757  
1758  	protected function hashHTMLBlocks($text) {
1759      #
1760      # Hashify HTML Blocks and "clean tags".
1761      #
1762      # We only want to do this for block-level HTML tags, such as headers,
1763      # lists, and tables. That's because we still want to wrap <p>s around
1764      # "paragraphs" that are wrapped in non-block-level tags, such as anchors,
1765      # phrase emphasis, and spans. The list of tags we're looking for is
1766      # hard-coded.
1767      #
1768      # This works by calling _HashHTMLBlocks_InMarkdown, which then calls
1769      # _HashHTMLBlocks_InHTML when it encounter block tags. When the markdown="1" 
1770      # attribute is found within a tag, _HashHTMLBlocks_InHTML calls back
1771      #  _HashHTMLBlocks_InMarkdown to handle the Markdown syntax within the tag.
1772      # These two functions are calling each other. It's recursive!
1773      #
1774          if ($this->no_markup)  return $text;
1775  
1776          #
1777          # Call the HTML-in-Markdown hasher.
1778          #
1779          list($text, ) = $this->_hashHTMLBlocks_inMarkdown($text);
1780          
1781          return $text;
1782      }
1783  	protected function _hashHTMLBlocks_inMarkdown($text, $indent = 0,
1784                                          $enclosing_tag_re = '', $span = false)
1785      {
1786      #
1787      # Parse markdown text, calling _HashHTMLBlocks_InHTML for block tags.
1788      #
1789      # *   $indent is the number of space to be ignored when checking for code 
1790      #     blocks. This is important because if we don't take the indent into 
1791      #     account, something like this (which looks right) won't work as expected:
1792      #
1793      #     <div>
1794      #         <div markdown="1">
1795      #         Hello World.  <-- Is this a Markdown code block or text?
1796      #         </div>  <-- Is this a Markdown code block or a real tag?
1797      #     <div>
1798      #
1799      #     If you don't like this, just don't indent the tag on which
1800      #     you apply the markdown="1" attribute.
1801      #
1802      # *   If $enclosing_tag_re is not empty, stops at the first unmatched closing 
1803      #     tag with that name. Nested tags supported.
1804      #
1805      # *   If $span is true, text inside must treated as span. So any double 
1806      #     newline will be replaced by a single newline so that it does not create 
1807      #     paragraphs.
1808      #
1809      # Returns an array of that form: ( processed text , remaining text )
1810      #
1811          if ($text === '') return array('', '');
1812  
1813          # Regex to check for the presense of newlines around a block tag.
1814          $newline_before_re = '/(?:^\n?|\n\n)*$/';
1815          $newline_after_re = 
1816              '{
1817                  ^                        # Start of text following the tag.
1818                  (?>[ ]*<!--.*?-->)?        # Optional comment.
1819                  [ ]*\n                    # Must be followed by newline.
1820              }xs';
1821          
1822          # Regex to match any tag.
1823          $block_tag_re =
1824              '{
1825                  (                    # $2: Capture whole tag.
1826                      </?                    # Any opening or closing tag.
1827                          (?>                # Tag name.
1828                              '.$this->block_tags_re.'            |
1829                              '.$this->context_block_tags_re.'    |
1830                              '.$this->clean_tags_re.'            |
1831                              (?!\s)'.$enclosing_tag_re.'
1832                          )
1833                          (?:
1834                              (?=[\s"\'/a-zA-Z0-9])    # Allowed characters after tag name.
1835                              (?>
1836                                  ".*?"        |    # Double quotes (can contain `>`)
1837                                  \'.*?\'       |    # Single quotes (can contain `>`)
1838                                  .+?                # Anything but quotes and `>`.
1839                              )*?
1840                          )?
1841                      >                    # End of tag.
1842                  |
1843                      <!--    .*?     -->    # HTML Comment
1844                  |
1845                      <\?.*?\?> | <%.*?%>    # Processing instruction
1846                  |
1847                      <!\[CDATA\[.*?\]\]>    # CData Block
1848                  '. ( !$span ? ' # If not in span.
1849                  |
1850                      # Indented code block
1851                      (?: ^[ ]*\n | ^ | \n[ ]*\n )
1852                      [ ]{'.($indent+4).'}[^\n]* \n
1853                      (?>
1854                          (?: [ ]{'.($indent+4).'}[^\n]* | [ ]* ) \n
1855                      )*
1856                  |
1857                      # Fenced code block marker
1858                      (?<= ^ | \n )
1859                      [ ]{0,'.($indent+3).'}(?:~{3,}|`{3,})
1860                                      [ ]*
1861                      (?:
1862                      \.?[-_:a-zA-Z0-9]+ # standalone class name
1863                      |
1864                          '.$this->id_class_attr_nocatch_re.' # extra attributes
1865                      )?
1866                      [ ]*
1867                      (?= \n )
1868                  ' : '' ). ' # End (if not is span).
1869                  |
1870                      # Code span marker
1871                      # Note, this regex needs to go after backtick fenced
1872                      # code blocks but it should also be kept outside of the
1873                      # "if not in span" condition adding backticks to the parser
1874                      `+
1875                  )
1876              }xs';
1877  
1878          
1879          $depth = 0;        # Current depth inside the tag tree.
1880          $parsed = "";    # Parsed text that will be returned.
1881  
1882          #
1883          # Loop through every tag until we find the closing tag of the parent
1884          # or loop until reaching the end of text if no parent tag specified.
1885          #
1886          do {
1887              #
1888              # Split the text using the first $tag_match pattern found.
1889              # Text before  pattern will be first in the array, text after
1890              # pattern will be at the end, and between will be any catches made 
1891              # by the pattern.
1892              #
1893              $parts = preg_split($block_tag_re, $text, 2, 
1894                                  PREG_SPLIT_DELIM_CAPTURE);
1895              
1896              # If in Markdown span mode, add a empty-string span-level hash 
1897              # after each newline to prevent triggering any block element.
1898              if ($span) {
1899                  $void = $this->hashPart("", ':');
1900                  $newline = "$void\n";
1901                  $parts[0] = $void . str_replace("\n", $newline, $parts[0]) . $void;
1902              }
1903              
1904              $parsed .= $parts[0]; # Text before current tag.
1905              
1906              # If end of $text has been reached. Stop loop.
1907              if (count($parts) < 3) {
1908                  $text = "";
1909                  break;
1910              }
1911              
1912              $tag  = $parts[1]; # Tag to handle.
1913              $text = $parts[2]; # Remaining text after current tag.
1914              $tag_re = preg_quote($tag); # For use in a regular expression.
1915              
1916              #
1917              # Check for: Fenced code block marker.
1918              # Note: need to recheck the whole tag to disambiguate backtick
1919              # fences from code spans
1920              #
1921              if (preg_match('{^\n?([ ]{0,'.($indent+3).'})(~{3,}|`{3,})[ ]*(?:\.?[-_:a-zA-Z0-9]+|'.$this->id_class_attr_nocatch_re.')?[ ]*\n?$}', $tag, $capture)) {
1922                  # Fenced code block marker: find matching end marker.
1923                  $fence_indent = strlen($capture[1]); # use captured indent in re
1924                  $fence_re = $capture[2]; # use captured fence in re
1925                  if (preg_match('{^(?>.*\n)*?[ ]{'.($fence_indent).'}'.$fence_re.'[ ]*(?:\n|$)}', $text,
1926                      $matches)) 
1927                  {
1928                      # End marker found: pass text unchanged until marker.
1929                      $parsed .= $tag . $matches[0];
1930                      $text = substr($text, strlen($matches[0]));
1931                  }
1932                  else {
1933                      # No end marker: just skip it.
1934                      $parsed .= $tag;
1935                  }
1936              }
1937              #
1938              # Check for: Indented code block.
1939              #
1940              else if ($tag{0} == "\n" || $tag{0} == " ") {
1941                  # Indented code block: pass it unchanged, will be handled 
1942                  # later.
1943                  $parsed .= $tag;
1944              }
1945              #
1946              # Check for: Code span marker
1947              # Note: need to check this after backtick fenced code blocks
1948              #
1949              else if ($tag{0} == "`") {
1950                  # Find corresponding end marker.
1951                  $tag_re = preg_quote($tag);
1952                  if (preg_match('{^(?>.+?|\n(?!\n))*?(?<!`)'.$tag_re.'(?!`)}',
1953                      $text, $matches))
1954                  {
1955                      # End marker found: pass text unchanged until marker.
1956                      $parsed .= $tag . $matches[0];
1957                      $text = substr($text, strlen($matches[0]));
1958                  }
1959                  else {
1960                      # Unmatched marker: just skip it.
1961                      $parsed .= $tag;
1962                  }
1963              }
1964              #
1965              # Check for: Opening Block level tag or
1966              #            Opening Context Block tag (like ins and del) 
1967              #               used as a block tag (tag is alone on it's line).
1968              #
1969              else if (preg_match('{^<(?:'.$this->block_tags_re.')\b}', $tag) ||
1970                  (    preg_match('{^<(?:'.$this->context_block_tags_re.')\b}', $tag) &&
1971                      preg_match($newline_before_re, $parsed) &&
1972                      preg_match($newline_after_re, $text)    )
1973                  )
1974              {
1975                  # Need to parse tag and following text using the HTML parser.
1976                  list($block_text, $text) = 
1977                      $this->_hashHTMLBlocks_inHTML($tag . $text, "hashBlock", true);
1978                  
1979                  # Make sure it stays outside of any paragraph by adding newlines.
1980                  $parsed .= "\n\n$block_text\n\n";
1981              }
1982              #
1983              # Check for: Clean tag (like script, math)
1984              #            HTML Comments, processing instructions.
1985              #
1986              else if (preg_match('{^<(?:'.$this->clean_tags_re.')\b}', $tag) ||
1987                  $tag{1} == '!' || $tag{1} == '?')
1988              {
1989                  # Need to parse tag and following text using the HTML parser.
1990                  # (don't check for markdown attribute)
1991                  list($block_text, $text) = 
1992                      $this->_hashHTMLBlocks_inHTML($tag . $text, "hashClean", false);
1993                  
1994                  $parsed .= $block_text;
1995              }
1996              #
1997              # Check for: Tag with same name as enclosing tag.
1998              #
1999              else if ($enclosing_tag_re !== '' &&
2000                  # Same name as enclosing tag.
2001                  preg_match('{^</?(?:'.$enclosing_tag_re.')\b}', $tag))
2002              {
2003                  #
2004                  # Increase/decrease nested tag count.
2005                  #
2006                  if ($tag{1} == '/')                        $depth--;
2007                  else if ($tag{strlen($tag)-2} != '/')    $depth++;
2008  
2009                  if ($depth < 0) {
2010                      #
2011                      # Going out of parent element. Clean up and break so we
2012                      # return to the calling function.
2013                      #
2014                      $text = $tag . $text;
2015                      break;
2016                  }
2017                  
2018                  $parsed .= $tag;
2019              }
2020              else {
2021                  $parsed .= $tag;
2022              }
2023          } while ($depth >= 0);
2024          
2025          return array($parsed, $text);
2026      }
2027  	protected function _hashHTMLBlocks_inHTML($text, $hash_method, $md_attr) {
2028      #
2029      # Parse HTML, calling _HashHTMLBlocks_InMarkdown for block tags.
2030      #
2031      # *   Calls $hash_method to convert any blocks.
2032      # *   Stops when the first opening tag closes.
2033      # *   $md_attr indicate if the use of the `markdown="1"` attribute is allowed.
2034      #     (it is not inside clean tags)
2035      #
2036      # Returns an array of that form: ( processed text , remaining text )
2037      #
2038          if ($text === '') return array('', '');
2039          
2040          # Regex to match `markdown` attribute inside of a tag.
2041          $markdown_attr_re = '
2042              {
2043                  \s*            # Eat whitespace before the `markdown` attribute
2044                  markdown
2045                  \s*=\s*
2046                  (?>
2047                      (["\'])        # $1: quote delimiter        
2048                      (.*?)        # $2: attribute value
2049                      \1            # matching delimiter    
2050                  |
2051                      ([^\s>]*)    # $3: unquoted attribute value
2052                  )
2053                  ()                # $4: make $3 always defined (avoid warnings)
2054              }xs';
2055          
2056          # Regex to match any tag.
2057          $tag_re = '{
2058                  (                    # $2: Capture whole tag.
2059                      </?                    # Any opening or closing tag.
2060                          [\w:$]+            # Tag name.
2061                          (?:
2062                              (?=[\s"\'/a-zA-Z0-9])    # Allowed characters after tag name.
2063                              (?>
2064                                  ".*?"        |    # Double quotes (can contain `>`)
2065                                  \'.*?\'       |    # Single quotes (can contain `>`)
2066                                  .+?                # Anything but quotes and `>`.
2067                              )*?
2068                          )?
2069                      >                    # End of tag.
2070                  |
2071                      <!--    .*?     -->    # HTML Comment
2072                  |
2073                      <\?.*?\?> | <%.*?%>    # Processing instruction
2074                  |
2075                      <!\[CDATA\[.*?\]\]>    # CData Block
2076                  )
2077              }xs';
2078          
2079          $original_text = $text;        # Save original text in case of faliure.
2080          
2081          $depth        = 0;    # Current depth inside the tag tree.
2082          $block_text    = "";    # Temporary text holder for current text.
2083          $parsed        = "";    # Parsed text that will be returned.
2084  
2085          #
2086          # Get the name of the starting tag.
2087          # (This pattern makes $base_tag_name_re safe without quoting.)
2088          #
2089          if (preg_match('/^<([\w:$]*)\b/', $text, $matches))
2090              $base_tag_name_re = $matches[1];
2091  
2092          #
2093          # Loop through every tag until we find the corresponding closing tag.
2094          #
2095          do {
2096              #
2097              # Split the text using the first $tag_match pattern found.
2098              # Text before  pattern will be first in the array, text after
2099              # pattern will be at the end, and between will be any catches made 
2100              # by the pattern.
2101              #
2102              $parts = preg_split($tag_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE);
2103              
2104              if (count($parts) < 3) {
2105                  #
2106                  # End of $text reached with unbalenced tag(s).
2107                  # In that case, we return original text unchanged and pass the
2108                  # first character as filtered to prevent an infinite loop in the 
2109                  # parent function.
2110                  #
2111                  return array($original_text{0}, substr($original_text, 1));
2112              }
2113              
2114              $block_text .= $parts[0]; # Text before current tag.
2115              $tag         = $parts[1]; # Tag to handle.
2116              $text        = $parts[2]; # Remaining text after current tag.
2117              
2118              #
2119              # Check for: Auto-close tag (like <hr/>)
2120              #             Comments and Processing Instructions.
2121              #
2122              if (preg_match('{^</?(?:'.$this->auto_close_tags_re.')\b}', $tag) ||
2123                  $tag{1} == '!' || $tag{1} == '?')
2124              {
2125                  # Just add the tag to the block as if it was text.
2126                  $block_text .= $tag;
2127              }
2128              else {
2129                  #
2130                  # Increase/decrease nested tag count. Only do so if
2131                  # the tag's name match base tag's.
2132                  #
2133                  if (preg_match('{^</?'.$base_tag_name_re.'\b}', $tag)) {
2134                      if ($tag{1} == '/')                        $depth--;
2135                      else if ($tag{strlen($tag)-2} != '/')    $depth++;
2136                  }
2137                  
2138                  #
2139                  # Check for `markdown="1"` attribute and handle it.
2140                  #
2141                  if ($md_attr && 
2142                      preg_match($markdown_attr_re, $tag, $attr_m) &&
2143                      preg_match('/^1|block|span$/', $attr_m[2] . $attr_m[3]))
2144                  {
2145                      # Remove `markdown` attribute from opening tag.
2146                      $tag = preg_replace($markdown_attr_re, '', $tag);
2147                      
2148                      # Check if text inside this tag must be parsed in span mode.
2149                      $this->mode = $attr_m[2] . $attr_m[3];
2150                      $span_mode = $this->mode == 'span' || $this->mode != 'block' &&
2151                          preg_match('{^<(?:'.$this->contain_span_tags_re.')\b}', $tag);
2152                      
2153                      # Calculate indent before tag.
2154                      if (preg_match('/(?:^|\n)( *?)(?! ).*?$/', $block_text, $matches)) {
2155                          $strlen = $this->utf8_strlen;
2156                          $indent = $strlen($matches[1], 'UTF-8');
2157                      } else {
2158                          $indent = 0;
2159                      }
2160                      
2161                      # End preceding block with this tag.
2162                      $block_text .= $tag;
2163                      $parsed .= $this->$hash_method($block_text);
2164                      
2165                      # Get enclosing tag name for the ParseMarkdown function.
2166                      # (This pattern makes $tag_name_re safe without quoting.)
2167                      preg_match('/^<([\w:$]*)\b/', $tag, $matches);
2168                      $tag_name_re = $matches[1];
2169                      
2170                      # Parse the content using the HTML-in-Markdown parser.
2171                      list ($block_text, $text)
2172                          = $this->_hashHTMLBlocks_inMarkdown($text, $indent, 
2173                              $tag_name_re, $span_mode);
2174                      
2175                      # Outdent markdown text.
2176                      if ($indent > 0) {
2177                          $block_text = preg_replace("/^[ ]{1,$indent}/m", "", 
2178                                                      $block_text);
2179                      }
2180                      
2181                      # Append tag content to parsed text.
2182                      if (!$span_mode)    $parsed .= "\n\n$block_text\n\n";
2183                      else                $parsed .= "$block_text";
2184                      
2185                      # Start over with a new block.
2186                      $block_text = "";
2187                  }
2188                  else $block_text .= $tag;
2189              }
2190              
2191          } while ($depth > 0);
2192          
2193          #
2194          # Hash last block text that wasn't processed inside the loop.
2195          #
2196          $parsed .= $this->$hash_method($block_text);
2197          
2198          return array($parsed, $text);
2199      }
2200  
2201  
2202  	protected function hashClean($text) {
2203      #
2204      # Called whenever a tag must be hashed when a function inserts a "clean" tag
2205      # in $text, it passes through this function and is automaticaly escaped, 
2206      # blocking invalid nested overlap.
2207      #
2208          return $this->hashPart($text, 'C');
2209      }
2210  
2211  
2212  	protected function doAnchors($text) {
2213      #
2214      # Turn Markdown link shortcuts into XHTML <a> tags.
2215      #
2216          if ($this->in_anchor) return $text;
2217          $this->in_anchor = true;
2218          
2219          #
2220          # First, handle reference-style links: [link text] [id]
2221          #
2222          $text = preg_replace_callback('{
2223              (                    # wrap whole match in $1
2224                \[
2225                  ('.$this->nested_brackets_re.')    # link text = $2
2226                \]
2227  
2228                [ ]?                # one optional space
2229                (?:\n[ ]*)?        # one optional newline followed by spaces
2230  
2231                \[
2232                  (.*?)        # id = $3
2233                \]
2234              )
2235              }xs',
2236              array($this, '_doAnchors_reference_callback'), $text);
2237  
2238          #
2239          # Next, inline-style links: [link text](url "optional title")
2240          #
2241          $text = preg_replace_callback('{
2242              (                # wrap whole match in $1
2243                \[
2244                  ('.$this->nested_brackets_re.')    # link text = $2
2245                \]
2246                \(            # literal paren
2247                  [ \n]*
2248                  (?:
2249                      <(.+?)>    # href = $3
2250                  |
2251                      ('.$this->nested_url_parenthesis_re.')    # href = $4
2252                  )
2253                  [ \n]*
2254                  (            # $5
2255                    ([\'"])    # quote char = $6
2256                    (.*?)        # Title = $7
2257                    \6        # matching quote
2258                    [ \n]*    # ignore any spaces/tabs between closing quote and )
2259                  )?            # title is optional
2260                \)
2261                (?:[ ]? '.$this->id_class_attr_catch_re.' )?     # $8 = id/class attributes
2262              )
2263              }xs',
2264              array($this, '_doAnchors_inline_callback'), $text);
2265  
2266          #
2267          # Last, handle reference-style shortcuts: [link text]
2268          # These must come last in case you've also got [link text][1]
2269          # or [link text](/foo)
2270          #
2271          $text = preg_replace_callback('{
2272              (                    # wrap whole match in $1
2273                \[
2274                  ([^\[\]]+)        # link text = $2; can\'t contain [ or ]
2275                \]
2276              )
2277              }xs',
2278              array($this, '_doAnchors_reference_callback'), $text);
2279  
2280          $this->in_anchor = false;
2281          return $text;
2282      }
2283  	protected function _doAnchors_reference_callback($matches) {
2284          $whole_match =  $matches[1];
2285          $link_text   =  $matches[2];
2286          $link_id     =& $matches[3];
2287  
2288          if ($link_id == "") {
2289              # for shortcut links like [this][] or [this].
2290              $link_id = $link_text;
2291          }
2292          
2293          # lower-case and turn embedded newlines into spaces
2294          $link_id = strtolower($link_id);
2295          $link_id = preg_replace('{[ ]?\n}', ' ', $link_id);
2296  
2297          if (isset($this->urls[$link_id])) {
2298              $url = $this->urls[$link_id];
2299              $url = $this->encodeAttribute($url);
2300              
2301              $result = "<a href=\"$url\"";
2302              if ( isset( $this->titles[$link_id] ) ) {
2303                  $title = $this->titles[$link_id];
2304                  $title = $this->encodeAttribute($title);
2305                  $result .=  " title=\"$title\"";
2306              }
2307              if (isset($this->ref_attr[$link_id]))
2308                  $result .= $this->ref_attr[$link_id];
2309          
2310              $link_text = $this->runSpanGamut($link_text);
2311              $result .= ">$link_text</a>";
2312              $result = $this->hashPart($result);
2313          }
2314          else {
2315              $result = $whole_match;
2316          }
2317          return $result;
2318      }
2319  	protected function _doAnchors_inline_callback($matches) {
2320          $whole_match    =  $matches[1];
2321          $link_text        =  $this->runSpanGamut($matches[2]);
2322          $url            =  $matches[3] == '' ? $matches[4] : $matches[3];
2323          $title            =& $matches[7];
2324          $attr  = $this->doExtraAttributes("a", $dummy =& $matches[8]);
2325  
2326          // if the URL was of the form <s p a c e s> it got caught by the HTML
2327          // tag parser and hashed. Need to reverse the process before using the URL.
2328          $unhashed = $this->unhash($url);
2329          if ($unhashed != $url)
2330              $url = preg_replace('/^<(.*)>$/', '\1', $unhashed);
2331  
2332          $url = $this->encodeAttribute($url);
2333  
2334          $result = "<a href=\"$url\"";
2335          if (isset($title)) {
2336              $title = $this->encodeAttribute($title);
2337              $result .=  " title=\"$title\"";
2338          }
2339          $result .= $attr;
2340          
2341          $link_text = $this->runSpanGamut($link_text);
2342          $result .= ">$link_text</a>";
2343  
2344          return $this->hashPart($result);
2345      }
2346  
2347  
2348  	protected function doImages($text) {
2349      #
2350      # Turn Markdown image shortcuts into <img> tags.
2351      #
2352          #
2353          # First, handle reference-style labeled images: ![alt text][id]
2354          #
2355          $text = preg_replace_callback('{
2356              (                # wrap whole match in $1
2357                !\[
2358                  ('.$this->nested_brackets_re.')        # alt text = $2
2359                \]
2360  
2361                [ ]?                # one optional space
2362                (?:\n[ ]*)?        # one optional newline followed by spaces
2363  
2364                \[
2365                  (.*?)        # id = $3
2366                \]
2367  
2368              )
2369              }xs', 
2370              array($this, '_doImages_reference_callback'), $text);
2371  
2372          #
2373          # Next, handle inline images:  ![alt text](url "optional title")
2374          # Don't forget: encode * and _
2375          #
2376          $text = preg_replace_callback('{
2377              (                # wrap whole match in $1
2378                !\[
2379                  ('.$this->nested_brackets_re.')        # alt text = $2
2380                \]
2381                \s?            # One optional whitespace character
2382                \(            # literal paren
2383                  [ \n]*
2384                  (?:
2385                      <(\S*)>    # src url = $3
2386                  |
2387                      ('.$this->nested_url_parenthesis_re.')    # src url = $4
2388                  )
2389                  [ \n]*
2390                  (            # $5
2391                    ([\'"])    # quote char = $6
2392                    (.*?)        # title = $7
2393                    \6        # matching quote
2394                    [ \n]*
2395                  )?            # title is optional
2396                \)
2397                (?:[ ]? '.$this->id_class_attr_catch_re.' )?     # $8 = id/class attributes
2398              )
2399              }xs',
2400              array($this, '_doImages_inline_callback'), $text);
2401  
2402          return $text;
2403      }
2404  	protected function _doImages_reference_callback($matches) {
2405          $whole_match = $matches[1];
2406          $alt_text    = $matches[2];
2407          $link_id     = strtolower($matches[3]);
2408  
2409          if ($link_id == "") {
2410              $link_id = strtolower($alt_text); # for shortcut links like ![this][].
2411          }
2412  
2413          $alt_text = $this->encodeAttribute($alt_text);
2414          if (isset($this->urls[$link_id])) {
2415              $url = $this->encodeAttribute($this->urls[$link_id]);
2416              $result = "<img src=\"$url\" alt=\"$alt_text\"";
2417              if (isset($this->titles[$link_id])) {
2418                  $title = $this->titles[$link_id];
2419                  $title = $this->encodeAttribute($title);
2420                  $result .=  " title=\"$title\"";
2421              }
2422              if (isset($this->ref_attr[$link_id]))
2423                  $result .= $this->ref_attr[$link_id];
2424              $result .= $this->empty_element_suffix;
2425              $result = $this->hashPart($result);
2426          }
2427          else {
2428              # If there's no such link ID, leave intact:
2429              $result = $whole_match;
2430          }
2431  
2432          return $result;
2433      }
2434  	protected function _doImages_inline_callback($matches) {
2435          $whole_match    = $matches[1];
2436          $alt_text        = $matches[2];
2437          $url            = $matches[3] == '' ? $matches[4] : $matches[3];
2438          $title            =& $matches[7];
2439          $attr  = $this->doExtraAttributes("img", $dummy =& $matches[8]);
2440  
2441          $alt_text = $this->encodeAttribute($alt_text);
2442          $url = $this->encodeAttribute($url);
2443          $result = "<img src=\"$url\" alt=\"$alt_text\"";
2444          if (isset($title)) {
2445              $title = $this->encodeAttribute($title);
2446              $result .=  " title=\"$title\""; # $title already quoted
2447          }
2448          $result .= $attr;
2449          $result .= $this->empty_element_suffix;
2450  
2451          return $this->hashPart($result);
2452      }
2453  
2454  
2455  	protected function doHeaders($text) {
2456      #
2457      # Redefined to add id and class attribute support.
2458      #
2459          # Setext-style headers:
2460          #      Header 1  {#header1}
2461          #      ========
2462          #  
2463          #      Header 2  {#header2 .class1 .class2}
2464          #      --------
2465          #
2466          $text = preg_replace_callback(
2467              '{
2468                  (^.+?)                                # $1: Header text
2469                  (?:[ ]+ '.$this->id_class_attr_catch_re.' )?     # $3 = id/class attributes
2470                  [ ]*\n(=+|-+)[ ]*\n+                # $3: Header footer
2471              }mx',
2472              array($this, '_doHeaders_callback_setext'), $text);
2473  
2474          # atx-style headers:
2475          #    # Header 1        {#header1}
2476          #    ## Header 2       {#header2}
2477          #    ## Header 2 with closing hashes ##  {#header3.class1.class2}
2478          #    ...
2479          #    ###### Header 6   {.class2}
2480          #
2481          $text = preg_replace_callback('{
2482                  ^(\#{1,6})    # $1 = string of #\'s
2483                  [ ]*
2484                  (.+?)        # $2 = Header text
2485                  [ ]*
2486                  \#*            # optional closing #\'s (not counted)
2487                  (?:[ ]+ '.$this->id_class_attr_catch_re.' )?     # $3 = id/class attributes
2488                  [ ]*
2489                  \n+
2490              }xm',
2491              array($this, '_doHeaders_callback_atx'), $text);
2492  
2493          return $text;
2494      }
2495  	protected function _doHeaders_callback_setext($matches) {
2496          if ($matches[3] == '-' && preg_match('{^- }', $matches[1]))
2497              return $matches[0];
2498          $level = $matches[3]{0} == '=' ? 1 : 2;
2499          $attr  = $this->doExtraAttributes("h$level", $dummy =& $matches[2]);
2500          $block = "<h$level$attr>".$this->runSpanGamut($matches[1])."</h$level>";
2501          return "\n" . $this->hashBlock($block) . "\n\n";
2502      }
2503  	protected function _doHeaders_callback_atx($matches) {
2504          $level = strlen($matches[1]);
2505          $attr  = $this->doExtraAttributes("h$level", $dummy =& $matches[3]);
2506          $block = "<h$level$attr>".$this->runSpanGamut($matches[2])."</h$level>";
2507          return "\n" . $this->hashBlock($block) . "\n\n";
2508      }
2509  
2510  
2511  	protected function doTables($text) {
2512      #
2513      # Form HTML tables.
2514      #
2515          $less_than_tab = $this->tab_width - 1;
2516          #
2517          # Find tables with leading pipe.
2518          #
2519          #    | Header 1 | Header 2
2520          #    | -------- | --------
2521          #    | Cell 1   | Cell 2
2522          #    | Cell 3   | Cell 4
2523          #
2524          $text = preg_replace_callback('
2525              {
2526                  ^                            # Start of a line
2527                  [ ]{0,'.$less_than_tab.'}    # Allowed whitespace.
2528                  [|]                            # Optional leading pipe (present)
2529                  (.+) \n                        # $1: Header row (at least one pipe)
2530                  
2531                  [ ]{0,'.$less_than_tab.'}    # Allowed whitespace.
2532                  [|] ([ ]*[-:]+[-| :]*) \n    # $2: Header underline
2533                  
2534                  (                            # $3: Cells
2535                      (?>
2536                          [ ]*                # Allowed whitespace.
2537                          [|] .* \n            # Row content.
2538                      )*
2539                  )
2540                  (?=\n|\Z)                    # Stop at final double newline.
2541              }xm',
2542              array($this, '_doTable_leadingPipe_callback'), $text);
2543          
2544          #
2545          # Find tables without leading pipe.
2546          #
2547          #    Header 1 | Header 2
2548          #    -------- | --------
2549          #    Cell 1   | Cell 2
2550          #    Cell 3   | Cell 4
2551          #
2552          $text = preg_replace_callback('
2553              {
2554                  ^                            # Start of a line
2555                  [ ]{0,'.$less_than_tab.'}    # Allowed whitespace.
2556                  (\S.*[|].*) \n                # $1: Header row (at least one pipe)
2557                  
2558                  [ ]{0,'.$less_than_tab.'}    # Allowed whitespace.
2559                  ([-:]+[ ]*[|][-| :]*) \n    # $2: Header underline
2560                  
2561                  (                            # $3: Cells
2562                      (?>
2563                          .* [|] .* \n        # Row content
2564                      )*
2565                  )
2566                  (?=\n|\Z)                    # Stop at final double newline.
2567              }xm',
2568              array($this, '_DoTable_callback'), $text);
2569  
2570          return $text;
2571      }
2572  	protected function _doTable_leadingPipe_callback($matches) {
2573          $head        = $matches[1];
2574          $underline    = $matches[2];
2575          $content    = $matches[3];
2576          
2577          # Remove leading pipe for each row.
2578          $content    = preg_replace('/^ *[|]/m', '', $content);
2579          
2580          return $this->_doTable_callback(array($matches[0], $head, $underline, $content));
2581      }
2582  	protected function _doTable_makeAlignAttr($alignname)
2583      {
2584          if (empty($this->table_align_class_tmpl))
2585              return " align=\"$alignname\"";
2586  
2587          $classname = str_replace('%%', $alignname, $this->table_align_class_tmpl);
2588          return " class=\"$classname\"";
2589      }
2590  	protected function _doTable_callback($matches) {
2591          $head        = $matches[1];
2592          $underline    = $matches[2];
2593          $content    = $matches[3];
2594  
2595          # Remove any tailing pipes for each line.
2596          $head        = preg_replace('/[|] *$/m', '', $head);
2597          $underline    = preg_replace('/[|] *$/m', '', $underline);
2598          $content    = preg_replace('/[|] *$/m', '', $content);
2599          
2600          # Reading alignement from header underline.
2601          $separators    = preg_split('/ *[|] */', $underline);
2602          foreach ($separators as $n => $s) {
2603              if (preg_match('/^ *-+: *$/', $s))
2604                  $attr[$n] = $this->_doTable_makeAlignAttr('right');
2605              else if (preg_match('/^ *:-+: *$/', $s))
2606                  $attr[$n] = $this->_doTable_makeAlignAttr('center');
2607              else if (preg_match('/^ *:-+ *$/', $s))
2608                  $attr[$n] = $this->_doTable_makeAlignAttr('left');
2609              else
2610                  $attr[$n] = '';
2611          }
2612          
2613          # Parsing span elements, including code spans, character escapes, 
2614          # and inline HTML tags, so that pipes inside those gets ignored.
2615          $head        = $this->parseSpan($head);
2616          $headers    = preg_split('/ *[|] */', $head);
2617          $col_count    = count($headers);
2618          $attr       = array_pad($attr, $col_count, '');
2619          
2620          # Write column headers.
2621          $text = "<table>\n";
2622          $text .= "<thead>\n";
2623          $text .= "<tr>\n";
2624          foreach ($headers as $n => $header)
2625              $text .= "  <th$attr[$n]>".$this->runSpanGamut(trim($header))."</th>\n";
2626          $text .= "</tr>\n";
2627          $text .= "</thead>\n";
2628          
2629          # Split content by row.
2630          $rows = explode("\n", trim($content, "\n"));
2631          
2632          $text .= "<tbody>\n";
2633          foreach ($rows as $row) {
2634              # Parsing span elements, including code spans, character escapes, 
2635              # and inline HTML tags, so that pipes inside those gets ignored.
2636              $row = $this->parseSpan($row);
2637              
2638              # Split row by cell.
2639              $row_cells = preg_split('/ *[|] */', $row, $col_count);
2640              $row_cells = array_pad($row_cells, $col_count, '');
2641              
2642              $text .= "<tr>\n";
2643              foreach ($row_cells as $n => $cell)
2644                  $text .= "  <td$attr[$n]>".$this->runSpanGamut(trim($cell))."</td>\n";
2645              $text .= "</tr>\n";
2646          }
2647          $text .= "</tbody>\n";
2648          $text .= "</table>";
2649          
2650          return $this->hashBlock($text) . "\n";
2651      }
2652  
2653      
2654  	protected function doDefLists($text) {
2655      #
2656      # Form HTML definition lists.
2657      #
2658          $less_than_tab = $this->tab_width - 1;
2659  
2660          # Re-usable pattern to match any entire dl list:
2661          $whole_list_re = '(?>
2662              (                                # $1 = whole list
2663                (                                # $2
2664                  [ ]{0,'.$less_than_tab.'}
2665                  ((?>.*\S.*\n)+)                # $3 = defined term
2666                  \n?
2667                  [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
2668                )
2669                (?s:.+?)
2670                (                                # $4
2671                    \z
2672                  |
2673                    \n{2,}
2674                    (?=\S)
2675                    (?!                        # Negative lookahead for another term
2676                      [ ]{0,'.$less_than_tab.'}
2677                      (?: \S.*\n )+?            # defined term
2678                      \n?
2679                      [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
2680                    )
2681                    (?!                        # Negative lookahead for another definition
2682                      [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
2683                    )
2684                )
2685              )
2686          )'; // mx
2687  
2688          $text = preg_replace_callback('{
2689                  (?>\A\n?|(?<=\n\n))
2690                  '.$whole_list_re.'
2691              }mx',
2692              array($this, '_doDefLists_callback'), $text);
2693  
2694          return $text;
2695      }
2696  	protected function _doDefLists_callback($matches) {
2697          # Re-usable patterns to match list item bullets and number markers:
2698          $list = $matches[1];
2699          
2700          # Turn double returns into triple returns, so that we can make a
2701          # paragraph for the last item in a list, if necessary:
2702          $result = trim($this->processDefListItems($list));
2703          $result = "<dl>\n" . $result . "\n</dl>";
2704          return $this->hashBlock($result) . "\n\n";
2705      }
2706  
2707  
2708  	protected function processDefListItems($list_str) {
2709      #
2710      #    Process the contents of a single definition list, splitting it
2711      #    into individual term and definition list items.
2712      #
2713          $less_than_tab = $this->tab_width - 1;
2714          
2715          # trim trailing blank lines:
2716          $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
2717  
2718          # Process definition terms.
2719          $list_str = preg_replace_callback('{
2720              (?>\A\n?|\n\n+)                    # leading line
2721              (                                # definition terms = $1
2722                  [ ]{0,'.$less_than_tab.'}    # leading whitespace
2723                  (?!\:[ ]|[ ])                # negative lookahead for a definition
2724                                              #   mark (colon) or more whitespace.
2725                  (?> \S.* \n)+?                # actual term (not whitespace).    
2726              )            
2727              (?=\n?[ ]{0,3}:[ ])                # lookahead for following line feed 
2728                                              #   with a definition mark.
2729              }xm',
2730              array($this, '_processDefListItems_callback_dt'), $list_str);
2731  
2732          # Process actual definitions.
2733          $list_str = preg_replace_callback('{
2734              \n(\n+)?                        # leading line = $1
2735              (                                # marker space = $2
2736                  [ ]{0,'.$less_than_tab.'}    # whitespace before colon
2737                  \:[ ]+                        # definition mark (colon)
2738              )
2739              ((?s:.+?))                        # definition text = $3
2740              (?= \n+                         # stop at next definition mark,
2741                  (?:                            # next term or end of text
2742                      [ ]{0,'.$less_than_tab.'} \:[ ]    |
2743                      <dt> | \z
2744                  )                        
2745              )                    
2746              }xm',
2747              array($this, '_processDefListItems_callback_dd'), $list_str);
2748  
2749          return $list_str;
2750      }
2751  	protected function _processDefListItems_callback_dt($matches) {
2752          $terms = explode("\n", trim($matches[1]));
2753          $text = '';
2754          foreach ($terms as $term) {
2755              $term = $this->runSpanGamut(trim($term));
2756              $text .= "\n<dt>" . $term . "</dt>";
2757          }
2758          return $text . "\n";
2759      }
2760  	protected function _processDefListItems_callback_dd($matches) {
2761          $leading_line    = $matches[1];
2762          $marker_space    = $matches[2];
2763          $def            = $matches[3];
2764  
2765          if ($leading_line || preg_match('/\n{2,}/', $def)) {
2766              # Replace marker with the appropriate whitespace indentation
2767              $def = str_repeat(' ', strlen($marker_space)) . $def;
2768              $def = $this->runBlockGamut($this->outdent($def . "\n\n"));
2769              $def = "\n". $def ."\n";
2770          }
2771          else {
2772              $def = rtrim($def);
2773              $def = $this->runSpanGamut($this->outdent($def));
2774          }
2775  
2776          return "\n<dd>" . $def . "</dd>\n";
2777      }
2778  
2779  
2780  	protected function doFencedCodeBlocks($text) {
2781      #
2782      # Adding the fenced code block syntax to regular Markdown:
2783      #
2784      # ~~~
2785      # Code block
2786      # ~~~
2787      #
2788          $less_than_tab = $this->tab_width;
2789          
2790          $text = preg_replace_callback('{
2791                  (?:\n|\A)
2792                  # 1: Opening marker
2793                  (
2794                      (?:~{3,}|`{3,}) # 3 or more tildes/backticks.
2795                  )
2796                  [ ]*
2797                  (?:
2798                      \.?([-_:a-zA-Z0-9]+) # 2: standalone class name
2799                  |
2800                      '.$this->id_class_attr_catch_re.' # 3: Extra attributes
2801                  )?
2802                  [ ]* \n # Whitespace and newline following marker.
2803                  
2804                  # 4: Content
2805                  (
2806                      (?>
2807                          (?!\1 [ ]* \n)    # Not a closing marker.
2808                          .*\n+
2809                      )+
2810                  )
2811                  
2812                  # Closing marker.
2813                  \1 [ ]* (?= \n )
2814              }xm',
2815              array($this, '_doFencedCodeBlocks_callback'), $text);
2816  
2817          return $text;
2818      }
2819  	protected function _doFencedCodeBlocks_callback($matches) {
2820          $classname =& $matches[2];
2821          $attrs     =& $matches[3];
2822          $codeblock = $matches[4];
2823          $codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES);
2824          $codeblock = preg_replace_callback('/^\n+/',
2825              array($this, '_doFencedCodeBlocks_newlines'), $codeblock);
2826  
2827          if ($classname != "") {
2828              if ($classname{0} == '.')
2829                  $classname = substr($classname, 1);
2830              $attr_str = ' class="'.$this->code_class_prefix.$classname.'"';
2831          } else {
2832              $attr_str = $this->doExtraAttributes($this->code_attr_on_pre ? "pre" : "code", $attrs);
2833          }
2834          $pre_attr_str  = $this->code_attr_on_pre ? $attr_str : '';
2835          $code_attr_str = $this->code_attr_on_pre ? '' : $attr_str;
2836          $codeblock  = "<pre$pre_attr_str><code$code_attr_str>$codeblock</code></pre>";
2837          
2838          return "\n\n".$this->hashBlock($codeblock)."\n\n";
2839      }
2840  	protected function _doFencedCodeBlocks_newlines($matches) {
2841          return str_repeat("<br$this->empty_element_suffix", 
2842              strlen($matches[0]));
2843      }
2844  
2845  
2846      #
2847      # Redefining emphasis markers so that emphasis by underscore does not
2848      # work in the middle of a word.
2849      #
2850      protected $em_relist = array(
2851          ''  => '(?:(?<!\*)\*(?!\*)|(?<![a-zA-Z0-9_])_(?!_))(?![\.,:;]?\s)',
2852          '*' => '(?<![\s*])\*(?!\*)',
2853          '_' => '(?<![\s_])_(?![a-zA-Z0-9_])',
2854          );
2855      protected $strong_relist = array(
2856          ''   => '(?:(?<!\*)\*\*(?!\*)|(?<![a-zA-Z0-9_])__(?!_))(?![\.,:;]?\s)',
2857          '**' => '(?<![\s*])\*\*(?!\*)',
2858          '__' => '(?<![\s_])__(?![a-zA-Z0-9_])',
2859          );
2860      protected $em_strong_relist = array(
2861          ''    => '(?:(?<!\*)\*\*\*(?!\*)|(?<![a-zA-Z0-9_])___(?!_))(?![\.,:;]?\s)',
2862          '***' => '(?<![\s*])\*\*\*(?!\*)',
2863          '___' => '(?<![\s_])___(?![a-zA-Z0-9_])',
2864          );
2865  
2866  
2867  	protected function formParagraphs($text) {
2868      #
2869      #    Params:
2870      #        $text - string to process with html <p> tags
2871      #
2872          # Strip leading and trailing lines:
2873          $text = preg_replace('/\A\n+|\n+\z/', '', $text);
2874          
2875          $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
2876  
2877          #
2878          # Wrap <p> tags and unhashify HTML blocks
2879          #
2880          foreach ($grafs as $key => $value) {
2881              $value = trim($this->runSpanGamut($value));
2882              
2883              # Check if this should be enclosed in a paragraph.
2884              # Clean tag hashes & block tag hashes are left alone.
2885              $is_p = !preg_match('/^B\x1A[0-9]+B|^C\x1A[0-9]+C$/', $value);
2886              
2887              if ($is_p) {
2888                  $value = "<p>$value</p>";
2889              }
2890              $grafs[$key] = $value;
2891          }
2892          
2893          # Join grafs in one text, then unhash HTML tags. 
2894          $text = implode("\n\n", $grafs);
2895          
2896          # Finish by removing any tag hashes still present in $text.
2897          $text = $this->unhash($text);
2898          
2899          return $text;
2900      }
2901      
2902      
2903      ### Footnotes
2904      
2905  	protected function stripFootnotes($text) {
2906      #
2907      # Strips link definitions from text, stores the URLs and titles in
2908      # hash references.
2909      #
2910          $less_than_tab = $this->tab_width - 1;
2911  
2912          # Link defs are in the form: [^id]: url "optional title"
2913          $text = preg_replace_callback('{
2914              ^[ ]{0,'.$less_than_tab.'}\[\^(.+?)\][ ]?:    # note_id = $1
2915                [ ]*
2916                \n?                    # maybe *one* newline
2917              (                        # text = $2 (no blank lines allowed)
2918                  (?:                    
2919                      .+                # actual text
2920                  |
2921                      \n                # newlines but 
2922                      (?!\[.+?\][ ]?:\s)# negative lookahead for footnote or link definition marker.
2923                      (?!\n+[ ]{0,3}\S)# ensure line is not blank and followed 
2924                                      # by non-indented content
2925                  )*
2926              )        
2927              }xm',
2928              array($this, '_stripFootnotes_callback'),
2929              $text);
2930          return $text;
2931      }
2932  	protected function _stripFootnotes_callback($matches) {
2933          $note_id = $this->fn_id_prefix . $matches[1];
2934          $this->footnotes[$note_id] = $this->outdent($matches[2]);
2935          return ''; # String that will replace the block
2936      }
2937  
2938  
2939  	protected function doFootnotes($text) {
2940      #
2941      # Replace footnote references in $text [^id] with a special text-token 
2942      # which will be replaced by the actual footnote marker in appendFootnotes.
2943      #
2944          if (!$this->in_anchor) {
2945              $text = preg_replace('{\[\^(.+?)\]}', "F\x1Afn:\\1\x1A:", $text);
2946          }
2947          return $text;
2948      }
2949  
2950      
2951  	protected function appendFootnotes($text) {
2952      #
2953      # Append footnote list to text.
2954      #
2955          $text = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}', 
2956              array($this, '_appendFootnotes_callback'), $text);
2957      
2958          if (!empty($this->footnotes_ordered)) {
2959              $text .= "\n\n";
2960              $text .= "<div class=\"footnotes\">\n";
2961              $text .= "<hr". $this->empty_element_suffix ."\n";
2962              $text .= "<ol>\n\n";
2963  
2964              $attr = "";
2965              if ($this->fn_backlink_class != "") {
2966                  $class = $this->fn_backlink_class;
2967                  $class = $this->encodeAttribute($class);
2968                  $attr .= " class=\"$class\"";
2969              }
2970              if ($this->fn_backlink_title != "") {
2971                  $title = $this->fn_backlink_title;
2972                  $title = $this->encodeAttribute($title);
2973                  $attr .= " title=\"$title\"";
2974              }
2975              $num = 0;
2976              
2977              while (!empty($this->footnotes_ordered)) {
2978                  $footnote = reset($this->footnotes_ordered);
2979                  $note_id = key($this->footnotes_ordered);
2980                  unset($this->footnotes_ordered[$note_id]);
2981                  $ref_count = $this->footnotes_ref_count[$note_id];
2982                  unset($this->footnotes_ref_count[$note_id]);
2983                  unset($this->footnotes[$note_id]);
2984                  
2985                  $footnote .= "\n"; # Need to append newline before parsing.
2986                  $footnote = $this->runBlockGamut("$footnote\n");                
2987                  $footnote = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}', 
2988                      array($this, '_appendFootnotes_callback'), $footnote);
2989                  
2990                  $attr = str_replace("%%", ++$num, $attr);
2991                  $note_id = $this->encodeAttribute($note_id);
2992  
2993                  # Prepare backlink, multiple backlinks if multiple references
2994                  $backlink = "<a href=\"#fnref:$note_id\"$attr>&#8617;</a>";
2995                  for ($ref_num = 2; $ref_num <= $ref_count; ++$ref_num) {
2996                      $backlink .= " <a href=\"#fnref$ref_num:$note_id\"$attr>&#8617;</a>";
2997                  }
2998                  # Add backlink to last paragraph; create new paragraph if needed.
2999                  if (preg_match('{</p>$}', $footnote)) {
3000                      $footnote = substr($footnote, 0, -4) . "&#160;$backlink</p>";
3001                  } else {
3002                      $footnote .= "\n\n<p>$backlink</p>";
3003                  }
3004                  
3005                  $text .= "<li id=\"fn:$note_id\">\n";
3006                  $text .= $footnote . "\n";
3007                  $text .= "</li>\n\n";
3008              }
3009              
3010              $text .= "</ol>\n";
3011              $text .= "</div>";
3012          }
3013          return $text;
3014      }
3015  	protected function _appendFootnotes_callback($matches) {
3016          $node_id = $this->fn_id_prefix . $matches[1];
3017          
3018          # Create footnote marker only if it has a corresponding footnote *and*
3019          # the footnote hasn't been used by another marker.
3020          if (isset($this->footnotes[$node_id])) {
3021              $num =& $this->footnotes_numbers[$node_id];
3022              if (!isset($num)) {
3023                  # Transfer footnote content to the ordered list and give it its
3024                  # number
3025                  $this->footnotes_ordered[$node_id] = $this->footnotes[$node_id];
3026                  $this->footnotes_ref_count[$node_id] = 1;
3027                  $num = $this->footnote_counter++;
3028                  $ref_count_mark = '';
3029              } else {
3030                  $ref_count_mark = $this->footnotes_ref_count[$node_id] += 1;
3031              }
3032  
3033              $attr = "";
3034              if ($this->fn_link_class != "") {
3035                  $class = $this->fn_link_class;
3036                  $class = $this->encodeAttribute($class);
3037                  $attr .= " class=\"$class\"";
3038              }
3039              if ($this->fn_link_title != "") {
3040                  $title = $this->fn_link_title;
3041                  $title = $this->encodeAttribute($title);
3042                  $attr .= " title=\"$title\"";
3043              }
3044              
3045              $attr = str_replace("%%", $num, $attr);
3046              $node_id = $this->encodeAttribute($node_id);
3047              
3048              return
3049                  "<sup id=\"fnref$ref_count_mark:$node_id\">".
3050                  "<a href=\"#fn:$node_id\"$attr>$num</a>".
3051                  "</sup>";
3052          }
3053          
3054          return "[^".$matches[1]."]";
3055      }
3056          
3057      
3058      ### Abbreviations ###
3059      
3060  	protected function stripAbbreviations($text) {
3061      #
3062      # Strips abbreviations from text, stores titles in hash references.
3063      #
3064          $less_than_tab = $this->tab_width - 1;
3065  
3066          # Link defs are in the form: [id]*: url "optional title"
3067          $text = preg_replace_callback('{
3068              ^[ ]{0,'.$less_than_tab.'}\*\[(.+?)\][ ]?:    # abbr_id = $1
3069              (.*)                    # text = $2 (no blank lines allowed)    
3070              }xm',
3071              array($this, '_stripAbbreviations_callback'),
3072              $text);
3073          return $text;
3074      }
3075  	protected function _stripAbbreviations_callback($matches) {
3076          $abbr_word = $matches[1];
3077          $abbr_desc = $matches[2];
3078          if ($this->abbr_word_re)
3079              $this->abbr_word_re .= '|';
3080          $this->abbr_word_re .= preg_quote($abbr_word);
3081          $this->abbr_desciptions[$abbr_word] = trim($abbr_desc);
3082          return ''; # String that will replace the block
3083      }
3084      
3085      
3086  	protected function doAbbreviations($text) {
3087      #
3088      # Find defined abbreviations in text and wrap them in <abbr> elements.
3089      #
3090          if ($this->abbr_word_re) {
3091              // cannot use the /x modifier because abbr_word_re may 
3092              // contain significant spaces:
3093              $text = preg_replace_callback('{'.
3094                  '(?<![\w\x1A])'.
3095                  '(?:'.$this->abbr_word_re.')'.
3096                  '(?![\w\x1A])'.
3097                  '}', 
3098                  array($this, '_doAbbreviations_callback'), $text);
3099          }
3100          return $text;
3101      }
3102  	protected function _doAbbreviations_callback($matches) {
3103          $abbr = $matches[0];
3104          if (isset($this->abbr_desciptions[$abbr])) {
3105              $desc = $this->abbr_desciptions[$abbr];
3106              if (empty($desc)) {
3107                  return $this->hashPart("<abbr>$abbr</abbr>");
3108              } else {
3109                  $desc = $this->encodeAttribute($desc);
3110                  return $this->hashPart("<abbr title=\"$desc\">$abbr</abbr>");
3111              }
3112          } else {
3113              return $matches[0];
3114          }
3115      }
3116  
3117  }
PHP Cross Reference of moodle-2.8

/lib/markdown/ -> Markdown.php (source)