[ Index ]

PHP Cross Reference of vtigercrm-6.1.0

title

Body

[close]

/libraries/PHPMarkdown/Michelf/ -> Markdown.php (source)

   1  <?php
   2  #
   3  # Markdown  -  A text-to-HTML conversion tool for web writers
   4  #
   5  # PHP Markdown  
   6  # Copyright (c) 2004-2014 Michel Fortin  
   7  # <http://michelf.com/projects/php-markdown/>
   8  #
   9  # Original Markdown  
  10  # Copyright (c) 2004-2006 John Gruber  
  11  # <http://daringfireball.net/projects/markdown/>
  12  #
  13  namespace Michelf;
  14  
  15  
  16  #
  17  # Markdown Parser Class
  18  #
  19  class Markdown implements MarkdownInterface {
  20  
  21      ### Version ###
  22  
  23      const  MARKDOWNLIB_VERSION  =  "1.4.1";
  24  
  25      ### Simple Function Interface ###
  26  
  27  	public static function defaultTransform($text) {
  28      #
  29      # Initialize the parser and return the result of its transform method.
  30      # This will work fine for derived classes too.
  31      #
  32          # Take parser class on which this function was called.
  33          $parser_class = \get_called_class();
  34  
  35          # try to take parser from the static parser list
  36          static $parser_list;
  37          $parser =& $parser_list[$parser_class];
  38  
  39          # create the parser it not already set
  40          if (!$parser)
  41              $parser = new $parser_class;
  42  
  43          # Transform text using parser.
  44          return $parser->transform($text);
  45      }
  46  
  47      ### Configuration Variables ###
  48  
  49      # Change to ">" for HTML output.
  50      public $empty_element_suffix = " />";
  51      public $tab_width = 4;
  52      
  53      # Change to `true` to disallow markup or entities.
  54      public $no_markup = false;
  55      public $no_entities = false;
  56      
  57      # Predefined urls and titles for reference links and images.
  58      public $predef_urls = array();
  59      public $predef_titles = array();
  60  
  61  
  62      ### Parser Implementation ###
  63  
  64      # Regex to match balanced [brackets].
  65      # Needed to insert a maximum bracked depth while converting to PHP.
  66      protected $nested_brackets_depth = 6;
  67      protected $nested_brackets_re;
  68      
  69      protected $nested_url_parenthesis_depth = 4;
  70      protected $nested_url_parenthesis_re;
  71  
  72      # Table of hash values for escaped characters:
  73      protected $escape_chars = '\`*_{}[]()>#+-.!';
  74      protected $escape_chars_re;
  75  
  76  
  77  	public function __construct() {
  78      #
  79      # Constructor function. Initialize appropriate member variables.
  80      #
  81          $this->_initDetab();
  82          $this->prepareItalicsAndBold();
  83      
  84          $this->nested_brackets_re = 
  85              str_repeat('(?>[^\[\]]+|\[', $this->nested_brackets_depth).
  86              str_repeat('\])*', $this->nested_brackets_depth);
  87      
  88          $this->nested_url_parenthesis_re = 
  89              str_repeat('(?>[^()\s]+|\(', $this->nested_url_parenthesis_depth).
  90              str_repeat('(?>\)))*', $this->nested_url_parenthesis_depth);
  91          
  92          $this->escape_chars_re = '['.preg_quote($this->escape_chars).']';
  93          
  94          # Sort document, block, and span gamut in ascendent priority order.
  95          asort($this->document_gamut);
  96          asort($this->block_gamut);
  97          asort($this->span_gamut);
  98      }
  99  
 100  
 101      # Internal hashes used during transformation.
 102      protected $urls = array();
 103      protected $titles = array();
 104      protected $html_hashes = array();
 105      
 106      # Status flag to avoid invalid nesting.
 107      protected $in_anchor = false;
 108      
 109      
 110  	protected function setup() {
 111      #
 112      # Called before the transformation process starts to setup parser 
 113      # states.
 114      #
 115          # Clear global hashes.
 116          $this->urls = $this->predef_urls;
 117          $this->titles = $this->predef_titles;
 118          $this->html_hashes = array();
 119          
 120          $this->in_anchor = false;
 121      }
 122      
 123  	protected function teardown() {
 124      #
 125      # Called after the transformation process to clear any variable 
 126      # which may be taking up memory unnecessarly.
 127      #
 128          $this->urls = array();
 129          $this->titles = array();
 130          $this->html_hashes = array();
 131      }
 132  
 133  
 134  	public function transform($text) {
 135      #
 136      # Main function. Performs some preprocessing on the input text
 137      # and pass it through the document gamut.
 138      #
 139          $this->setup();
 140      
 141          # Remove UTF-8 BOM and marker character in input, if present.
 142          $text = preg_replace('{^\xEF\xBB\xBF|\x1A}', '', $text);
 143  
 144          # Standardize line endings:
 145          #   DOS to Unix and Mac to Unix
 146          $text = preg_replace('{\r\n?}', "\n", $text);
 147  
 148          # Make sure $text ends with a couple of newlines:
 149          $text .= "\n\n";
 150  
 151          # Convert all tabs to spaces.
 152          $text = $this->detab($text);
 153  
 154          # Turn block-level HTML blocks into hash entries
 155          $text = $this->hashHTMLBlocks($text);
 156  
 157          # Strip any lines consisting only of spaces and tabs.
 158          # This makes subsequent regexen easier to write, because we can
 159          # match consecutive blank lines with /\n+/ instead of something
 160          # contorted like /[ ]*\n+/ .
 161          $text = preg_replace('/^[ ]+$/m', '', $text);
 162  
 163          # Run document gamut methods.
 164          foreach ($this->document_gamut as $method => $priority) {
 165              $text = $this->$method($text);
 166          }
 167          
 168          $this->teardown();
 169  
 170          return $text . "\n";
 171      }
 172      
 173      protected $document_gamut = array(
 174          # Strip link definitions, store in hashes.
 175          "stripLinkDefinitions" => 20,
 176          
 177          "runBasicBlockGamut"   => 30,
 178          );
 179  
 180  
 181  	protected function stripLinkDefinitions($text) {
 182      #
 183      # Strips link definitions from text, stores the URLs and titles in
 184      # hash references.
 185      #
 186          $less_than_tab = $this->tab_width - 1;
 187  
 188          # Link defs are in the form: ^[id]: url "optional title"
 189          $text = preg_replace_callback('{
 190                              ^[ ]{0,'.$less_than_tab.'}\[(.+)\][ ]?:    # id = $1
 191                                [ ]*
 192                                \n?                # maybe *one* newline
 193                                [ ]*
 194                              (?:
 195                                <(.+?)>            # url = $2
 196                              |
 197                                (\S+?)            # url = $3
 198                              )
 199                                [ ]*
 200                                \n?                # maybe one newline
 201                                [ ]*
 202                              (?:
 203                                  (?<=\s)            # lookbehind for whitespace
 204                                  ["(]
 205                                  (.*?)            # title = $4
 206                                  [")]
 207                                  [ ]*
 208                              )?    # title is optional
 209                              (?:\n+|\Z)
 210              }xm',
 211              array($this, '_stripLinkDefinitions_callback'),
 212              $text);
 213          return $text;
 214      }
 215  	protected function _stripLinkDefinitions_callback($matches) {
 216          $link_id = strtolower($matches[1]);
 217          $url = $matches[2] == '' ? $matches[3] : $matches[2];
 218          $this->urls[$link_id] = $url;
 219          $this->titles[$link_id] =& $matches[4];
 220          return ''; # String that will replace the block
 221      }
 222  
 223  
 224  	protected function hashHTMLBlocks($text) {
 225          if ($this->no_markup)  return $text;
 226  
 227          $less_than_tab = $this->tab_width - 1;
 228  
 229          # Hashify HTML blocks:
 230          # We only want to do this for block-level HTML tags, such as headers,
 231          # lists, and tables. That's because we still want to wrap <p>s around
 232          # "paragraphs" that are wrapped in non-block-level tags, such as anchors,
 233          # phrase emphasis, and spans. The list of tags we're looking for is
 234          # hard-coded:
 235          #
 236          # *  List "a" is made of tags which can be both inline or block-level.
 237          #    These will be treated block-level when the start tag is alone on 
 238          #    its line, otherwise they're not matched here and will be taken as 
 239          #    inline later.
 240          # *  List "b" is made of tags which are always block-level;
 241          #
 242          $block_tags_a_re = 'ins|del';
 243          $block_tags_b_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|'.
 244                             'script|noscript|style|form|fieldset|iframe|math|svg|'.
 245                             'article|section|nav|aside|hgroup|header|footer|'.
 246                             'figure';
 247  
 248          # Regular expression for the content of a block tag.
 249          $nested_tags_level = 4;
 250          $attr = '
 251              (?>                # optional tag attributes
 252                \s            # starts with whitespace
 253                (?>
 254                  [^>"/]+        # text outside quotes
 255                |
 256                  /+(?!>)        # slash not followed by ">"
 257                |
 258                  "[^"]*"        # text inside double quotes (tolerate ">")
 259                |
 260                  \'[^\']*\'    # text inside single quotes (tolerate ">")
 261                )*
 262              )?    
 263              ';
 264          $content =
 265              str_repeat('
 266                  (?>
 267                    [^<]+            # content without tag
 268                  |
 269                    <\2            # nested opening tag
 270                      '.$attr.'    # attributes
 271                      (?>
 272                        />
 273                      |
 274                        >', $nested_tags_level).    # end of opening tag
 275                        '.*?'.                    # last level nested tag content
 276              str_repeat('
 277                        </\2\s*>    # closing nested tag
 278                      )
 279                    |                
 280                      <(?!/\2\s*>    # other tags with a different name
 281                    )
 282                  )*',
 283                  $nested_tags_level);
 284          $content2 = str_replace('\2', '\3', $content);
 285  
 286          # First, look for nested blocks, e.g.:
 287          #     <div>
 288          #         <div>
 289          #         tags for inner block must be indented.
 290          #         </div>
 291          #     </div>
 292          #
 293          # The outermost tags must start at the left margin for this to match, and
 294          # the inner nested divs must be indented.
 295          # We need to do this before the next, more liberal match, because the next
 296          # match will start at the first `<div>` and stop at the first `</div>`.
 297          $text = preg_replace_callback('{(?>
 298              (?>
 299                  (?<=\n)            # Starting on its own line
 300                  |                # or
 301                  \A\n?            # the at beginning of the doc
 302              )
 303              (                        # save in $1
 304  
 305                # Match from `\n<tag>` to `</tag>\n`, handling nested tags 
 306                # in between.
 307                      
 308                          [ ]{0,'.$less_than_tab.'}
 309                          <('.$block_tags_b_re.')# start tag = $2
 310                          '.$attr.'>            # attributes followed by > and \n
 311                          '.$content.'        # content, support nesting
 312                          </\2>                # the matching end tag
 313                          [ ]*                # trailing spaces/tabs
 314                          (?=\n+|\Z)    # followed by a newline or end of document
 315  
 316              | # Special version for tags of group a.
 317  
 318                          [ ]{0,'.$less_than_tab.'}
 319                          <('.$block_tags_a_re.')# start tag = $3
 320                          '.$attr.'>[ ]*\n    # attributes followed by >
 321                          '.$content2.'        # content, support nesting
 322                          </\3>                # the matching end tag
 323                          [ ]*                # trailing spaces/tabs
 324                          (?=\n+|\Z)    # followed by a newline or end of document
 325                      
 326              | # Special case just for <hr />. It was easier to make a special 
 327                # case than to make the other regex more complicated.
 328              
 329                          [ ]{0,'.$less_than_tab.'}
 330                          <(hr)                # start tag = $2
 331                          '.$attr.'            # attributes
 332                          /?>                    # the matching end tag
 333                          [ ]*
 334                          (?=\n{2,}|\Z)        # followed by a blank line or end of document
 335              
 336              | # Special case for standalone HTML comments:
 337              
 338                      [ ]{0,'.$less_than_tab.'}
 339                      (?s:
 340                          <!-- .*? -->
 341                      )
 342                      [ ]*
 343                      (?=\n{2,}|\Z)        # followed by a blank line or end of document
 344              
 345              | # PHP and ASP-style processor instructions (<? and <%)
 346              
 347                      [ ]{0,'.$less_than_tab.'}
 348                      (?s:
 349                          <([?%])            # $2
 350                          .*?
 351                          \2>
 352                      )
 353                      [ ]*
 354                      (?=\n{2,}|\Z)        # followed by a blank line or end of document
 355                      
 356              )
 357              )}Sxmi',
 358              array($this, '_hashHTMLBlocks_callback'),
 359              $text);
 360  
 361          return $text;
 362      }
 363  	protected function _hashHTMLBlocks_callback($matches) {
 364          $text = $matches[1];
 365          $key  = $this->hashBlock($text);
 366          return "\n\n$key\n\n";
 367      }
 368      
 369      
 370  	protected function hashPart($text, $boundary = 'X') {
 371      #
 372      # Called whenever a tag must be hashed when a function insert an atomic 
 373      # element in the text stream. Passing $text to through this function gives
 374      # a unique text-token which will be reverted back when calling unhash.
 375      #
 376      # The $boundary argument specify what character should be used to surround
 377      # the token. By convension, "B" is used for block elements that needs not
 378      # to be wrapped into paragraph tags at the end, ":" is used for elements
 379      # that are word separators and "X" is used in the general case.
 380      #
 381          # Swap back any tag hash found in $text so we do not have to `unhash`
 382          # multiple times at the end.
 383          $text = $this->unhash($text);
 384          
 385          # Then hash the block.
 386          static $i = 0;
 387          $key = "$boundary\x1A" . ++$i . $boundary;
 388          $this->html_hashes[$key] = $text;
 389          return $key; # String that will replace the tag.
 390      }
 391  
 392  
 393  	protected function hashBlock($text) {
 394      #
 395      # Shortcut function for hashPart with block-level boundaries.
 396      #
 397          return $this->hashPart($text, 'B');
 398      }
 399  
 400  
 401      protected $block_gamut = array(
 402      #
 403      # These are all the transformations that form block-level
 404      # tags like paragraphs, headers, and list items.
 405      #
 406          "doHeaders"         => 10,
 407          "doHorizontalRules" => 20,
 408          
 409          "doLists"           => 40,
 410          "doCodeBlocks"      => 50,
 411          "doBlockQuotes"     => 60,
 412          );
 413  
 414  	protected function runBlockGamut($text) {
 415      #
 416      # Run block gamut tranformations.
 417      #
 418          # We need to escape raw HTML in Markdown source before doing anything 
 419          # else. This need to be done for each block, and not only at the 
 420          # begining in the Markdown function since hashed blocks can be part of
 421          # list items and could have been indented. Indented blocks would have 
 422          # been seen as a code block in a previous pass of hashHTMLBlocks.
 423          $text = $this->hashHTMLBlocks($text);
 424          
 425          return $this->runBasicBlockGamut($text);
 426      }
 427      
 428  	protected function runBasicBlockGamut($text) {
 429      #
 430      # Run block gamut tranformations, without hashing HTML blocks. This is 
 431      # useful when HTML blocks are known to be already hashed, like in the first
 432      # whole-document pass.
 433      #
 434          foreach ($this->block_gamut as $method => $priority) {
 435              $text = $this->$method($text);
 436          }
 437          
 438          # Finally form paragraph and restore hashed blocks.
 439          $text = $this->formParagraphs($text);
 440  
 441          return $text;
 442      }
 443      
 444      
 445  	protected function doHorizontalRules($text) {
 446          # Do Horizontal Rules:
 447          return preg_replace(
 448              '{
 449                  ^[ ]{0,3}    # Leading space
 450                  ([-*_])        # $1: First marker
 451                  (?>            # Repeated marker group
 452                      [ ]{0,2}    # Zero, one, or two spaces.
 453                      \1            # Marker character
 454                  ){2,}        # Group repeated at least twice
 455                  [ ]*        # Tailing spaces
 456                  $            # End of line.
 457              }mx',
 458              "\n".$this->hashBlock("<hr$this->empty_element_suffix")."\n", 
 459              $text);
 460      }
 461  
 462  
 463      protected $span_gamut = array(
 464      #
 465      # These are all the transformations that occur *within* block-level
 466      # tags like paragraphs, headers, and list items.
 467      #
 468          # Process character escapes, code spans, and inline HTML
 469          # in one shot.
 470          "parseSpan"           => -30,
 471  
 472          # Process anchor and image tags. Images must come first,
 473          # because ![foo][f] looks like an anchor.
 474          "doImages"            =>  10,
 475          "doAnchors"           =>  20,
 476          
 477          # Make links out of things like `<http://example.com/>`
 478          # Must come after doAnchors, because you can use < and >
 479          # delimiters in inline links like [this](<url>).
 480          "doAutoLinks"         =>  30,
 481          "encodeAmpsAndAngles" =>  40,
 482  
 483          "doItalicsAndBold"    =>  50,
 484          "doHardBreaks"        =>  60,
 485          );
 486  
 487  	protected function runSpanGamut($text) {
 488      #
 489      # Run span gamut tranformations.
 490      #
 491          foreach ($this->span_gamut as $method => $priority) {
 492              $text = $this->$method($text);
 493          }
 494  
 495          return $text;
 496      }
 497      
 498      
 499  	protected function doHardBreaks($text) {
 500          # Do hard breaks:
 501          return preg_replace_callback('/ {2,}\n/', 
 502              array($this, '_doHardBreaks_callback'), $text);
 503      }
 504  	protected function _doHardBreaks_callback($matches) {
 505          return $this->hashPart("<br$this->empty_element_suffix\n");
 506      }
 507  
 508  
 509  	protected function doAnchors($text) {
 510      #
 511      # Turn Markdown link shortcuts into XHTML <a> tags.
 512      #
 513          if ($this->in_anchor) return $text;
 514          $this->in_anchor = true;
 515          
 516          #
 517          # First, handle reference-style links: [link text] [id]
 518          #
 519          $text = preg_replace_callback('{
 520              (                    # wrap whole match in $1
 521                \[
 522                  ('.$this->nested_brackets_re.')    # link text = $2
 523                \]
 524  
 525                [ ]?                # one optional space
 526                (?:\n[ ]*)?        # one optional newline followed by spaces
 527  
 528                \[
 529                  (.*?)        # id = $3
 530                \]
 531              )
 532              }xs',
 533              array($this, '_doAnchors_reference_callback'), $text);
 534  
 535          #
 536          # Next, inline-style links: [link text](url "optional title")
 537          #
 538          $text = preg_replace_callback('{
 539              (                # wrap whole match in $1
 540                \[
 541                  ('.$this->nested_brackets_re.')    # link text = $2
 542                \]
 543                \(            # literal paren
 544                  [ \n]*
 545                  (?:
 546                      <(.+?)>    # href = $3
 547                  |
 548                      ('.$this->nested_url_parenthesis_re.')    # href = $4
 549                  )
 550                  [ \n]*
 551                  (            # $5
 552                    ([\'"])    # quote char = $6
 553                    (.*?)        # Title = $7
 554                    \6        # matching quote
 555                    [ \n]*    # ignore any spaces/tabs between closing quote and )
 556                  )?            # title is optional
 557                \)
 558              )
 559              }xs',
 560              array($this, '_doAnchors_inline_callback'), $text);
 561  
 562          #
 563          # Last, handle reference-style shortcuts: [link text]
 564          # These must come last in case you've also got [link text][1]
 565          # or [link text](/foo)
 566          #
 567          $text = preg_replace_callback('{
 568              (                    # wrap whole match in $1
 569                \[
 570                  ([^\[\]]+)        # link text = $2; can\'t contain [ or ]
 571                \]
 572              )
 573              }xs',
 574              array($this, '_doAnchors_reference_callback'), $text);
 575  
 576          $this->in_anchor = false;
 577          return $text;
 578      }
 579  	protected function _doAnchors_reference_callback($matches) {
 580          $whole_match =  $matches[1];
 581          $link_text   =  $matches[2];
 582          $link_id     =& $matches[3];
 583  
 584          if ($link_id == "") {
 585              # for shortcut links like [this][] or [this].
 586              $link_id = $link_text;
 587          }
 588          
 589          # lower-case and turn embedded newlines into spaces
 590          $link_id = strtolower($link_id);
 591          $link_id = preg_replace('{[ ]?\n}', ' ', $link_id);
 592  
 593          if (isset($this->urls[$link_id])) {
 594              $url = $this->urls[$link_id];
 595              $url = $this->encodeAttribute($url);
 596              
 597              $result = "<a href=\"$url\"";
 598              if ( isset( $this->titles[$link_id] ) ) {
 599                  $title = $this->titles[$link_id];
 600                  $title = $this->encodeAttribute($title);
 601                  $result .=  " title=\"$title\"";
 602              }
 603          
 604              $link_text = $this->runSpanGamut($link_text);
 605              $result .= ">$link_text</a>";
 606              $result = $this->hashPart($result);
 607          }
 608          else {
 609              $result = $whole_match;
 610          }
 611          return $result;
 612      }
 613  	protected function _doAnchors_inline_callback($matches) {
 614          $whole_match    =  $matches[1];
 615          $link_text        =  $this->runSpanGamut($matches[2]);
 616          $url            =  $matches[3] == '' ? $matches[4] : $matches[3];
 617          $title            =& $matches[7];
 618  
 619          // if the URL was of the form <s p a c e s> it got caught by the HTML
 620          // tag parser and hashed. Need to reverse the process before using the URL.
 621          $unhashed = $this->unhash($url);
 622          if ($unhashed != $url)
 623              $url = preg_replace('/^<(.*)>$/', '\1', $unhashed);
 624  
 625          $url = $this->encodeAttribute($url);
 626  
 627          $result = "<a href=\"$url\"";
 628          if (isset($title)) {
 629              $title = $this->encodeAttribute($title);
 630              $result .=  " title=\"$title\"";
 631          }
 632          
 633          $link_text = $this->runSpanGamut($link_text);
 634          $result .= ">$link_text</a>";
 635  
 636          return $this->hashPart($result);
 637      }
 638  
 639  
 640  	protected function doImages($text) {
 641      #
 642      # Turn Markdown image shortcuts into <img> tags.
 643      #
 644          #
 645          # First, handle reference-style labeled images: ![alt text][id]
 646          #
 647          $text = preg_replace_callback('{
 648              (                # wrap whole match in $1
 649                !\[
 650                  ('.$this->nested_brackets_re.')        # alt text = $2
 651                \]
 652  
 653                [ ]?                # one optional space
 654                (?:\n[ ]*)?        # one optional newline followed by spaces
 655  
 656                \[
 657                  (.*?)        # id = $3
 658                \]
 659  
 660              )
 661              }xs', 
 662              array($this, '_doImages_reference_callback'), $text);
 663  
 664          #
 665          # Next, handle inline images:  ![alt text](url "optional title")
 666          # Don't forget: encode * and _
 667          #
 668          $text = preg_replace_callback('{
 669              (                # wrap whole match in $1
 670                !\[
 671                  ('.$this->nested_brackets_re.')        # alt text = $2
 672                \]
 673                \s?            # One optional whitespace character
 674                \(            # literal paren
 675                  [ \n]*
 676                  (?:
 677                      <(\S*)>    # src url = $3
 678                  |
 679                      ('.$this->nested_url_parenthesis_re.')    # src url = $4
 680                  )
 681                  [ \n]*
 682                  (            # $5
 683                    ([\'"])    # quote char = $6
 684                    (.*?)        # title = $7
 685                    \6        # matching quote
 686                    [ \n]*
 687                  )?            # title is optional
 688                \)
 689              )
 690              }xs',
 691              array($this, '_doImages_inline_callback'), $text);
 692  
 693          return $text;
 694      }
 695  	protected function _doImages_reference_callback($matches) {
 696          $whole_match = $matches[1];
 697          $alt_text    = $matches[2];
 698          $link_id     = strtolower($matches[3]);
 699  
 700          if ($link_id == "") {
 701              $link_id = strtolower($alt_text); # for shortcut links like ![this][].
 702          }
 703  
 704          $alt_text = $this->encodeAttribute($alt_text);
 705          if (isset($this->urls[$link_id])) {
 706              $url = $this->encodeAttribute($this->urls[$link_id]);
 707              $result = "<img src=\"$url\" alt=\"$alt_text\"";
 708              if (isset($this->titles[$link_id])) {
 709                  $title = $this->titles[$link_id];
 710                  $title = $this->encodeAttribute($title);
 711                  $result .=  " title=\"$title\"";
 712              }
 713              $result .= $this->empty_element_suffix;
 714              $result = $this->hashPart($result);
 715          }
 716          else {
 717              # If there's no such link ID, leave intact:
 718              $result = $whole_match;
 719          }
 720  
 721          return $result;
 722      }
 723  	protected function _doImages_inline_callback($matches) {
 724          $whole_match    = $matches[1];
 725          $alt_text        = $matches[2];
 726          $url            = $matches[3] == '' ? $matches[4] : $matches[3];
 727          $title            =& $matches[7];
 728  
 729          $alt_text = $this->encodeAttribute($alt_text);
 730          $url = $this->encodeAttribute($url);
 731          $result = "<img src=\"$url\" alt=\"$alt_text\"";
 732          if (isset($title)) {
 733              $title = $this->encodeAttribute($title);
 734              $result .=  " title=\"$title\""; # $title already quoted
 735          }
 736          $result .= $this->empty_element_suffix;
 737  
 738          return $this->hashPart($result);
 739      }
 740  
 741  
 742  	protected function doHeaders($text) {
 743          # Setext-style headers:
 744          #      Header 1
 745          #      ========
 746          #  
 747          #      Header 2
 748          #      --------
 749          #
 750          $text = preg_replace_callback('{ ^(.+?)[ ]*\n(=+|-+)[ ]*\n+ }mx',
 751              array($this, '_doHeaders_callback_setext'), $text);
 752  
 753          # atx-style headers:
 754          #    # Header 1
 755          #    ## Header 2
 756          #    ## Header 2 with closing hashes ##
 757          #    ...
 758          #    ###### Header 6
 759          #
 760          $text = preg_replace_callback('{
 761                  ^(\#{1,6})    # $1 = string of #\'s
 762                  [ ]*
 763                  (.+?)        # $2 = Header text
 764                  [ ]*
 765                  \#*            # optional closing #\'s (not counted)
 766                  \n+
 767              }xm',
 768              array($this, '_doHeaders_callback_atx'), $text);
 769  
 770          return $text;
 771      }
 772  	protected function _doHeaders_callback_setext($matches) {
 773          # Terrible hack to check we haven't found an empty list item.
 774          if ($matches[2] == '-' && preg_match('{^-(?: |$)}', $matches[1]))
 775              return $matches[0];
 776          
 777          $level = $matches[2]{0} == '=' ? 1 : 2;
 778          $block = "<h$level>".$this->runSpanGamut($matches[1])."</h$level>";
 779          return "\n" . $this->hashBlock($block) . "\n\n";
 780      }
 781  	protected function _doHeaders_callback_atx($matches) {
 782          $level = strlen($matches[1]);
 783          $block = "<h$level>".$this->runSpanGamut($matches[2])."</h$level>";
 784          return "\n" . $this->hashBlock($block) . "\n\n";
 785      }
 786  
 787  
 788  	protected function doLists($text) {
 789      #
 790      # Form HTML ordered (numbered) and unordered (bulleted) lists.
 791      #
 792          $less_than_tab = $this->tab_width - 1;
 793  
 794          # Re-usable patterns to match list item bullets and number markers:
 795          $marker_ul_re  = '[*+-]';
 796          $marker_ol_re  = '\d+[\.]';
 797  
 798          $markers_relist = array(
 799              $marker_ul_re => $marker_ol_re,
 800              $marker_ol_re => $marker_ul_re,
 801              );
 802  
 803          foreach ($markers_relist as $marker_re => $other_marker_re) {
 804              # Re-usable pattern to match any entirel ul or ol list:
 805              $whole_list_re = '
 806                  (                                # $1 = whole list
 807                    (                                # $2
 808                      ([ ]{0,'.$less_than_tab.'})    # $3 = number of spaces
 809                      ('.$marker_re.')            # $4 = first list item marker
 810                      [ ]+
 811                    )
 812                    (?s:.+?)
 813                    (                                # $5
 814                        \z
 815                      |
 816                        \n{2,}
 817                        (?=\S)
 818                        (?!                        # Negative lookahead for another list item marker
 819                          [ ]*
 820                          '.$marker_re.'[ ]+
 821                        )
 822                      |
 823                        (?=                        # Lookahead for another kind of list
 824                          \n
 825                          \3                        # Must have the same indentation
 826                          '.$other_marker_re.'[ ]+
 827                        )
 828                    )
 829                  )
 830              '; // mx
 831              
 832              # We use a different prefix before nested lists than top-level lists.
 833              # See extended comment in _ProcessListItems().
 834          
 835              if ($this->list_level) {
 836                  $text = preg_replace_callback('{
 837                          ^
 838                          '.$whole_list_re.'
 839                      }mx',
 840                      array($this, '_doLists_callback'), $text);
 841              }
 842              else {
 843                  $text = preg_replace_callback('{
 844                          (?:(?<=\n)\n|\A\n?) # Must eat the newline
 845                          '.$whole_list_re.'
 846                      }mx',
 847                      array($this, '_doLists_callback'), $text);
 848              }
 849          }
 850  
 851          return $text;
 852      }
 853  	protected function _doLists_callback($matches) {
 854          # Re-usable patterns to match list item bullets and number markers:
 855          $marker_ul_re  = '[*+-]';
 856          $marker_ol_re  = '\d+[\.]';
 857          $marker_any_re = "(?:$marker_ul_re|$marker_ol_re)";
 858          
 859          $list = $matches[1];
 860          $list_type = preg_match("/$marker_ul_re/", $matches[4]) ? "ul" : "ol";
 861          
 862          $marker_any_re = ( $list_type == "ul" ? $marker_ul_re : $marker_ol_re );
 863          
 864          $list .= "\n";
 865          $result = $this->processListItems($list, $marker_any_re);
 866          
 867          $result = $this->hashBlock("<$list_type>\n" . $result . "</$list_type>");
 868          return "\n". $result ."\n\n";
 869      }
 870  
 871      protected $list_level = 0;
 872  
 873  	protected function processListItems($list_str, $marker_any_re) {
 874      #
 875      #    Process the contents of a single ordered or unordered list, splitting it
 876      #    into individual list items.
 877      #
 878          # The $this->list_level global keeps track of when we're inside a list.
 879          # Each time we enter a list, we increment it; when we leave a list,
 880          # we decrement. If it's zero, we're not in a list anymore.
 881          #
 882          # We do this because when we're not inside a list, we want to treat
 883          # something like this:
 884          #
 885          #        I recommend upgrading to version
 886          #        8. Oops, now this line is treated
 887          #        as a sub-list.
 888          #
 889          # As a single paragraph, despite the fact that the second line starts
 890          # with a digit-period-space sequence.
 891          #
 892          # Whereas when we're inside a list (or sub-list), that line will be
 893          # treated as the start of a sub-list. What a kludge, huh? This is
 894          # an aspect of Markdown's syntax that's hard to parse perfectly
 895          # without resorting to mind-reading. Perhaps the solution is to
 896          # change the syntax rules such that sub-lists must start with a
 897          # starting cardinal number; e.g. "1." or "a.".
 898          
 899          $this->list_level++;
 900  
 901          # trim trailing blank lines:
 902          $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
 903  
 904          $list_str = preg_replace_callback('{
 905              (\n)?                            # leading line = $1
 906              (^[ ]*)                            # leading whitespace = $2
 907              ('.$marker_any_re.'                # list marker and space = $3
 908                  (?:[ ]+|(?=\n))    # space only required if item is not empty
 909              )
 910              ((?s:.*?))                        # list item text   = $4
 911              (?:(\n+(?=\n))|\n)                # tailing blank line = $5
 912              (?= \n* (\z | \2 ('.$marker_any_re.') (?:[ ]+|(?=\n))))
 913              }xm',
 914              array($this, '_processListItems_callback'), $list_str);
 915  
 916          $this->list_level--;
 917          return $list_str;
 918      }
 919  	protected function _processListItems_callback($matches) {
 920          $item = $matches[4];
 921          $leading_line =& $matches[1];
 922          $leading_space =& $matches[2];
 923          $marker_space = $matches[3];
 924          $tailing_blank_line =& $matches[5];
 925  
 926          if ($leading_line || $tailing_blank_line || 
 927              preg_match('/\n{2,}/', $item))
 928          {
 929              # Replace marker with the appropriate whitespace indentation
 930              $item = $leading_space . str_repeat(' ', strlen($marker_space)) . $item;
 931              $item = $this->runBlockGamut($this->outdent($item)."\n");
 932          }
 933          else {
 934              # Recursion for sub-lists:
 935              $item = $this->doLists($this->outdent($item));
 936              $item = preg_replace('/\n+$/', '', $item);
 937              $item = $this->runSpanGamut($item);
 938          }
 939  
 940          return "<li>" . $item . "</li>\n";
 941      }
 942  
 943  
 944  	protected function doCodeBlocks($text) {
 945      #
 946      #    Process Markdown `<pre><code>` blocks.
 947      #
 948          $text = preg_replace_callback('{
 949                  (?:\n\n|\A\n?)
 950                  (                # $1 = the code block -- one or more lines, starting with a space/tab
 951                    (?>
 952                      [ ]{'.$this->tab_width.'}  # Lines must start with a tab or a tab-width of spaces
 953                      .*\n+
 954                    )+
 955                  )
 956                  ((?=^[ ]{0,'.$this->tab_width.'}\S)|\Z)    # Lookahead for non-space at line-start, or end of doc
 957              }xm',
 958              array($this, '_doCodeBlocks_callback'), $text);
 959  
 960          return $text;
 961      }
 962  	protected function _doCodeBlocks_callback($matches) {
 963          $codeblock = $matches[1];
 964  
 965          $codeblock = $this->outdent($codeblock);
 966          $codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES);
 967  
 968          # trim leading newlines and trailing newlines
 969          $codeblock = preg_replace('/\A\n+|\n+\z/', '', $codeblock);
 970  
 971          $codeblock = "<pre><code>$codeblock\n</code></pre>";
 972          return "\n\n".$this->hashBlock($codeblock)."\n\n";
 973      }
 974  
 975  
 976  	protected function makeCodeSpan($code) {
 977      #
 978      # Create a code span markup for $code. Called from handleSpanToken.
 979      #
 980          $code = htmlspecialchars(trim($code), ENT_NOQUOTES);
 981          return $this->hashPart("<code>$code</code>");
 982      }
 983  
 984  
 985      protected $em_relist = array(
 986          ''  => '(?:(?<!\*)\*(?!\*)|(?<!_)_(?!_))(?![\.,:;]?\s)',
 987          '*' => '(?<![\s*])\*(?!\*)',
 988          '_' => '(?<![\s_])_(?!_)',
 989          );
 990      protected $strong_relist = array(
 991          ''   => '(?:(?<!\*)\*\*(?!\*)|(?<!_)__(?!_))(?![\.,:;]?\s)',
 992          '**' => '(?<![\s*])\*\*(?!\*)',
 993          '__' => '(?<![\s_])__(?!_)',
 994          );
 995      protected $em_strong_relist = array(
 996          ''    => '(?:(?<!\*)\*\*\*(?!\*)|(?<!_)___(?!_))(?![\.,:;]?\s)',
 997          '***' => '(?<![\s*])\*\*\*(?!\*)',
 998          '___' => '(?<![\s_])___(?!_)',
 999          );
1000      protected $em_strong_prepared_relist;
1001      
1002  	protected function prepareItalicsAndBold() {
1003      #
1004      # Prepare regular expressions for searching emphasis tokens in any
1005      # context.
1006      #
1007          foreach ($this->em_relist as $em => $em_re) {
1008              foreach ($this->strong_relist as $strong => $strong_re) {
1009                  # Construct list of allowed token expressions.
1010                  $token_relist = array();
1011                  if (isset($this->em_strong_relist["$em$strong"])) {
1012                      $token_relist[] = $this->em_strong_relist["$em$strong"];
1013                  }
1014                  $token_relist[] = $em_re;
1015                  $token_relist[] = $strong_re;
1016                  
1017                  # Construct master expression from list.
1018                  $token_re = '{('. implode('|', $token_relist) .')}';
1019                  $this->em_strong_prepared_relist["$em$strong"] = $token_re;
1020              }
1021          }
1022      }
1023      
1024  	protected function doItalicsAndBold($text) {
1025          $token_stack = array('');
1026          $text_stack = array('');
1027          $em = '';
1028          $strong = '';
1029          $tree_char_em = false;
1030          
1031          while (1) {
1032              #
1033              # Get prepared regular expression for seraching emphasis tokens
1034              # in current context.
1035              #
1036              $token_re = $this->em_strong_prepared_relist["$em$strong"];
1037              
1038              #
1039              # Each loop iteration search for the next emphasis token. 
1040              # Each token is then passed to handleSpanToken.
1041              #
1042              $parts = preg_split($token_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE);
1043              $text_stack[0] .= $parts[0];
1044              $token =& $parts[1];
1045              $text =& $parts[2];
1046              
1047              if (empty($token)) {
1048                  # Reached end of text span: empty stack without emitting.
1049                  # any more emphasis.
1050                  while ($token_stack[0]) {
1051                      $text_stack[1] .= array_shift($token_stack);
1052                      $text_stack[0] .= array_shift($text_stack);
1053                  }
1054                  break;
1055              }
1056              
1057              $token_len = strlen($token);
1058              if ($tree_char_em) {
1059                  # Reached closing marker while inside a three-char emphasis.
1060                  if ($token_len == 3) {
1061                      # Three-char closing marker, close em and strong.
1062                      array_shift($token_stack);
1063                      $span = array_shift($text_stack);
1064                      $span = $this->runSpanGamut($span);
1065                      $span = "<strong><em>$span</em></strong>";
1066                      $text_stack[0] .= $this->hashPart($span);
1067                      $em = '';
1068                      $strong = '';
1069                  } else {
1070                      # Other closing marker: close one em or strong and
1071                      # change current token state to match the other
1072                      $token_stack[0] = str_repeat($token{0}, 3-$token_len);
1073                      $tag = $token_len == 2 ? "strong" : "em";
1074                      $span = $text_stack[0];
1075                      $span = $this->runSpanGamut($span);
1076                      $span = "<$tag>$span</$tag>";
1077                      $text_stack[0] = $this->hashPart($span);
1078                      $$tag = ''; # $$tag stands for $em or $strong
1079                  }
1080                  $tree_char_em = false;
1081              } else if ($token_len == 3) {
1082                  if ($em) {
1083                      # Reached closing marker for both em and strong.
1084                      # Closing strong marker:
1085                      for ($i = 0; $i < 2; ++$i) {
1086                          $shifted_token = array_shift($token_stack);
1087                          $tag = strlen($shifted_token) == 2 ? "strong" : "em";
1088                          $span = array_shift($text_stack);
1089                          $span = $this->runSpanGamut($span);
1090                          $span = "<$tag>$span</$tag>";
1091                          $text_stack[0] .= $this->hashPart($span);
1092                          $$tag = ''; # $$tag stands for $em or $strong
1093                      }
1094                  } else {
1095                      # Reached opening three-char emphasis marker. Push on token 
1096                      # stack; will be handled by the special condition above.
1097                      $em = $token{0};
1098                      $strong = "$em$em";
1099                      array_unshift($token_stack, $token);
1100                      array_unshift($text_stack, '');
1101                      $tree_char_em = true;
1102                  }
1103              } else if ($token_len == 2) {
1104                  if ($strong) {
1105                      # Unwind any dangling emphasis marker:
1106                      if (strlen($token_stack[0]) == 1) {
1107                          $text_stack[1] .= array_shift($token_stack);
1108                          $text_stack[0] .= array_shift($text_stack);
1109                      }
1110                      # Closing strong marker:
1111                      array_shift($token_stack);
1112                      $span = array_shift($text_stack);
1113                      $span = $this->runSpanGamut($span);
1114                      $span = "<strong>$span</strong>";
1115                      $text_stack[0] .= $this->hashPart($span);
1116                      $strong = '';
1117                  } else {
1118                      array_unshift($token_stack, $token);
1119                      array_unshift($text_stack, '');
1120                      $strong = $token;
1121                  }
1122              } else {
1123                  # Here $token_len == 1
1124                  if ($em) {
1125                      if (strlen($token_stack[0]) == 1) {
1126                          # Closing emphasis marker:
1127                          array_shift($token_stack);
1128                          $span = array_shift($text_stack);
1129                          $span = $this->runSpanGamut($span);
1130                          $span = "<em>$span</em>";
1131                          $text_stack[0] .= $this->hashPart($span);
1132                          $em = '';
1133                      } else {
1134                          $text_stack[0] .= $token;
1135                      }
1136                  } else {
1137                      array_unshift($token_stack, $token);
1138                      array_unshift($text_stack, '');
1139                      $em = $token;
1140                  }
1141              }
1142          }
1143          return $text_stack[0];
1144      }
1145  
1146  
1147  	protected function doBlockQuotes($text) {
1148          $text = preg_replace_callback('/
1149                (                                # Wrap whole match in $1
1150                  (?>
1151                    ^[ ]*>[ ]?            # ">" at the start of a line
1152                      .+\n                    # rest of the first line
1153                    (.+\n)*                    # subsequent consecutive lines
1154                    \n*                        # blanks
1155                  )+
1156                )
1157              /xm',
1158              array($this, '_doBlockQuotes_callback'), $text);
1159  
1160          return $text;
1161      }
1162  	protected function _doBlockQuotes_callback($matches) {
1163          $bq = $matches[1];
1164          # trim one level of quoting - trim whitespace-only lines
1165          $bq = preg_replace('/^[ ]*>[ ]?|^[ ]+$/m', '', $bq);
1166          $bq = $this->runBlockGamut($bq);        # recurse
1167  
1168          $bq = preg_replace('/^/m', "  ", $bq);
1169          # These leading spaces cause problem with <pre> content, 
1170          # so we need to fix that:
1171          $bq = preg_replace_callback('{(\s*<pre>.+?</pre>)}sx', 
1172              array($this, '_doBlockQuotes_callback2'), $bq);
1173  
1174          return "\n". $this->hashBlock("<blockquote>\n$bq\n</blockquote>")."\n\n";
1175      }
1176  	protected function _doBlockQuotes_callback2($matches) {
1177          $pre = $matches[1];
1178          $pre = preg_replace('/^  /m', '', $pre);
1179          return $pre;
1180      }
1181  
1182  
1183  	protected function formParagraphs($text) {
1184      #
1185      #    Params:
1186      #        $text - string to process with html <p> tags
1187      #
1188          # Strip leading and trailing lines:
1189          $text = preg_replace('/\A\n+|\n+\z/', '', $text);
1190  
1191          $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
1192  
1193          #
1194          # Wrap <p> tags and unhashify HTML blocks
1195          #
1196          foreach ($grafs as $key => $value) {
1197              if (!preg_match('/^B\x1A[0-9]+B$/', $value)) {
1198                  # Is a paragraph.
1199                  $value = $this->runSpanGamut($value);
1200                  $value = preg_replace('/^([ ]*)/', "<p>", $value);
1201                  $value .= "</p>";
1202                  $grafs[$key] = $this->unhash($value);
1203              }
1204              else {
1205                  # Is a block.
1206                  # Modify elements of @grafs in-place...
1207                  $graf = $value;
1208                  $block = $this->html_hashes[$graf];
1209                  $graf = $block;
1210  //                if (preg_match('{
1211  //                    \A
1212  //                    (                            # $1 = <div> tag
1213  //                      <div  \s+
1214  //                      [^>]*
1215  //                      \b
1216  //                      markdown\s*=\s*  ([\'"])    #    $2 = attr quote char
1217  //                      1
1218  //                      \2
1219  //                      [^>]*
1220  //                      >
1221  //                    )
1222  //                    (                            # $3 = contents
1223  //                    .*
1224  //                    )
1225  //                    (</div>)                    # $4 = closing tag
1226  //                    \z
1227  //                    }xs', $block, $matches))
1228  //                {
1229  //                    list(, $div_open, , $div_content, $div_close) = $matches;
1230  //
1231  //                    # We can't call Markdown(), because that resets the hash;
1232  //                    # that initialization code should be pulled into its own sub, though.
1233  //                    $div_content = $this->hashHTMLBlocks($div_content);
1234  //                    
1235  //                    # Run document gamut methods on the content.
1236  //                    foreach ($this->document_gamut as $method => $priority) {
1237  //                        $div_content = $this->$method($div_content);
1238  //                    }
1239  //
1240  //                    $div_open = preg_replace(
1241  //                        '{\smarkdown\s*=\s*([\'"]).+?\1}', '', $div_open);
1242  //
1243  //                    $graf = $div_open . "\n" . $div_content . "\n" . $div_close;
1244  //                }
1245                  $grafs[$key] = $graf;
1246              }
1247          }
1248  
1249          return implode("\n\n", $grafs);
1250      }
1251  
1252  
1253  	protected function encodeAttribute($text) {
1254      #
1255      # Encode text for a double-quoted HTML attribute. This function
1256      # is *not* suitable for attributes enclosed in single quotes.
1257      #
1258          $text = $this->encodeAmpsAndAngles($text);
1259          $text = str_replace('"', '&quot;', $text);
1260          return $text;
1261      }
1262      
1263      
1264  	protected function encodeAmpsAndAngles($text) {
1265      #
1266      # Smart processing for ampersands and angle brackets that need to 
1267      # be encoded. Valid character entities are left alone unless the
1268      # no-entities mode is set.
1269      #
1270          if ($this->no_entities) {
1271              $text = str_replace('&', '&amp;', $text);
1272          } else {
1273              # Ampersand-encoding based entirely on Nat Irons's Amputator
1274              # MT plugin: <http://bumppo.net/projects/amputator/>
1275              $text = preg_replace('/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/', 
1276                                  '&amp;', $text);
1277          }
1278          # Encode remaining <'s
1279          $text = str_replace('<', '&lt;', $text);
1280  
1281          return $text;
1282      }
1283  
1284  
1285  	protected function doAutoLinks($text) {
1286          $text = preg_replace_callback('{<((https?|ftp|dict):[^\'">\s]+)>}i', 
1287              array($this, '_doAutoLinks_url_callback'), $text);
1288  
1289          # Email addresses: <[email protected]>
1290          $text = preg_replace_callback('{
1291              <
1292              (?:mailto:)?
1293              (
1294                  (?:
1295                      [-!#$%&\'*+/=?^_`.{|}~\w\x80-\xFF]+
1296                  |
1297                      ".*?"
1298                  )
1299                  \@
1300                  (?:
1301                      [-a-z0-9\x80-\xFF]+(\.[-a-z0-9\x80-\xFF]+)*\.[a-z]+
1302                  |
1303                      \[[\d.a-fA-F:]+\]    # IPv4 & IPv6
1304                  )
1305              )
1306              >
1307              }xi',
1308              array($this, '_doAutoLinks_email_callback'), $text);
1309          $text = preg_replace_callback('{<(tel:([^\'">\s]+))>}i',array($this, '_doAutoLinks_tel_callback'), $text);
1310  
1311          return $text;
1312      }
1313  	protected function _doAutoLinks_tel_callback($matches) {
1314          $url = $this->encodeAttribute($matches[1]);
1315          $tel = $this->encodeAttribute($matches[2]);
1316          $link = "<a href=\"$url\">$tel</a>";
1317          return $this->hashPart($link);
1318      }
1319  	protected function _doAutoLinks_url_callback($matches) {
1320          $url = $this->encodeAttribute($matches[1]);
1321          $link = "<a href=\"$url\">$url</a>";
1322          return $this->hashPart($link);
1323      }
1324  	protected function _doAutoLinks_email_callback($matches) {
1325          $address = $matches[1];
1326          $link = $this->encodeEmailAddress($address);
1327          return $this->hashPart($link);
1328      }
1329  
1330  
1331  	protected function encodeEmailAddress($addr) {
1332      #
1333      #    Input: an email address, e.g. "[email protected]"
1334      #
1335      #    Output: the email address as a mailto link, with each character
1336      #        of the address encoded as either a decimal or hex entity, in
1337      #        the hopes of foiling most address harvesting spam bots. E.g.:
1338      #
1339      #      <p><a href="&#109;&#x61;&#105;&#x6c;&#116;&#x6f;&#58;&#x66;o&#111;
1340      #        &#x40;&#101;&#x78;&#97;&#x6d;&#112;&#x6c;&#101;&#46;&#x63;&#111;
1341      #        &#x6d;">&#x66;o&#111;&#x40;&#101;&#x78;&#97;&#x6d;&#112;&#x6c;
1342      #        &#101;&#46;&#x63;&#111;&#x6d;</a></p>
1343      #
1344      #    Based by a filter by Matthew Wickline, posted to BBEdit-Talk.
1345      #   With some optimizations by Milian Wolff.
1346      #
1347          $addr = "mailto:" . $addr;
1348          $chars = preg_split('/(?<!^)(?!$)/', $addr);
1349          $seed = (int)abs(crc32($addr) / strlen($addr)); # Deterministic seed.
1350          
1351          foreach ($chars as $key => $char) {
1352              $ord = ord($char);
1353              # Ignore non-ascii chars.
1354              if ($ord < 128) {
1355                  $r = ($seed * (1 + $key)) % 100; # Pseudo-random function.
1356                  # roughly 10% raw, 45% hex, 45% dec
1357                  # '@' *must* be encoded. I insist.
1358                  # '"' has to be encoded inside the attribute
1359                  if ($r > 90 && $char != '@' && $char != '"') /* do nothing */;
1360                  else if ($r < 45) $chars[$key] = '&#x'.dechex($ord).';';
1361                  else              $chars[$key] = '&#'.$ord.';';
1362              }
1363          }
1364          
1365          $addr = implode('', $chars);
1366          $text = implode('', array_slice($chars, 7)); # text without `mailto:`
1367          $addr = "<a href=\"$addr\">$text</a>";
1368  
1369          return $addr;
1370      }
1371  
1372  
1373  	protected function parseSpan($str) {
1374      #
1375      # Take the string $str and parse it into tokens, hashing embeded HTML,
1376      # escaped characters and handling code spans.
1377      #
1378          $output = '';
1379          
1380          $span_re = '{
1381                  (
1382                      \\\\'.$this->escape_chars_re.'
1383                  |
1384                      (?<![`\\\\])
1385                      `+                        # code span marker
1386              '.( $this->no_markup ? '' : '
1387                  |
1388                      <!--    .*?     -->        # comment
1389                  |
1390                      <\?.*?\?> | <%.*?%>        # processing instruction
1391                  |
1392                      <[!$]?[-a-zA-Z0-9:_]+    # regular tags
1393                      (?>
1394                          \s
1395                          (?>[^"\'>]+|"[^"]*"|\'[^\']*\')*
1396                      )?
1397                      >
1398                  |
1399                      <[-a-zA-Z0-9:_]+\s*/> # xml-style empty tag
1400                  |
1401                      </[-a-zA-Z0-9:_]+\s*> # closing tag
1402              ').'
1403                  )
1404                  }xs';
1405  
1406          while (1) {
1407              #
1408              # Each loop iteration seach for either the next tag, the next 
1409              # openning code span marker, or the next escaped character. 
1410              # Each token is then passed to handleSpanToken.
1411              #
1412              $parts = preg_split($span_re, $str, 2, PREG_SPLIT_DELIM_CAPTURE);
1413              
1414              # Create token from text preceding tag.
1415              if ($parts[0] != "") {
1416                  $output .= $parts[0];
1417              }
1418              
1419              # Check if we reach the end.
1420              if (isset($parts[1])) {
1421                  $output .= $this->handleSpanToken($parts[1], $parts[2]);
1422                  $str = $parts[2];
1423              }
1424              else {
1425                  break;
1426              }
1427          }
1428          
1429          return $output;
1430      }
1431      
1432      
1433  	protected function handleSpanToken($token, &$str) {
1434      #
1435      # Handle $token provided by parseSpan by determining its nature and 
1436      # returning the corresponding value that should replace it.
1437      #
1438          switch ($token{0}) {
1439              case "\\":
1440                  return $this->hashPart("&#". ord($token{1}). ";");
1441              case "`":
1442                  # Search for end marker in remaining text.
1443                  if (preg_match('/^(.*?[^`])'.preg_quote($token).'(?!`)(.*)$/sm', 
1444                      $str, $matches))
1445                  {
1446                      $str = $matches[2];
1447                      $codespan = $this->makeCodeSpan($matches[1]);
1448                      return $this->hashPart($codespan);
1449                  }
1450                  return $token; // return as text since no ending marker found.
1451              default:
1452                  return $this->hashPart($token);
1453          }
1454      }
1455  
1456  
1457  	protected function outdent($text) {
1458      #
1459      # Remove one level of line-leading tabs or spaces
1460      #
1461          return preg_replace('/^(\t|[ ]{1,'.$this->tab_width.'})/m', '', $text);
1462      }
1463  
1464  
1465      # String length function for detab. `_initDetab` will create a function to 
1466      # hanlde UTF-8 if the default function does not exist.
1467      protected $utf8_strlen = 'mb_strlen';
1468      
1469  	protected function detab($text) {
1470      #
1471      # Replace tabs with the appropriate amount of space.
1472      #
1473          # For each line we separate the line in blocks delemited by
1474          # tab characters. Then we reconstruct every line by adding the 
1475          # appropriate number of space between each blocks.
1476          
1477          $text = preg_replace_callback('/^.*\t.*$/m',
1478              array($this, '_detab_callback'), $text);
1479  
1480          return $text;
1481      }
1482  	protected function _detab_callback($matches) {
1483          $line = $matches[0];
1484          $strlen = $this->utf8_strlen; # strlen function for UTF-8.
1485          
1486          # Split in blocks.
1487          $blocks = explode("\t", $line);
1488          # Add each blocks to the line.
1489          $line = $blocks[0];
1490          unset($blocks[0]); # Do not add first block twice.
1491          foreach ($blocks as $block) {
1492              # Calculate amount of space, insert spaces, insert block.
1493              $amount = $this->tab_width - 
1494                  $strlen($line, 'UTF-8') % $this->tab_width;
1495              $line .= str_repeat(" ", $amount) . $block;
1496          }
1497          return $line;
1498      }
1499  	protected function _initDetab() {
1500      #
1501      # Check for the availability of the function in the `utf8_strlen` property
1502      # (initially `mb_strlen`). If the function is not available, create a 
1503      # function that will loosely count the number of UTF-8 characters with a
1504      # regular expression.
1505      #
1506          if (function_exists($this->utf8_strlen)) return;
1507          $this->utf8_strlen = create_function('$text', 'return preg_match_all(
1508              "/[\\\\x00-\\\\xBF]|[\\\\xC0-\\\\xFF][\\\\x80-\\\\xBF]*/", 
1509              $text, $m);');
1510      }
1511  
1512  
1513  	protected function unhash($text) {
1514      #
1515      # Swap back in all the tags hashed by _HashHTMLBlocks.
1516      #
1517          return preg_replace_callback('/(.)\x1A[0-9]+\1/', 
1518              array($this, '_unhash_callback'), $text);
1519      }
1520  	protected function _unhash_callback($matches) {
1521          return $this->html_hashes[$matches[0]];
1522      }
1523  
1524  }
1525  
1526  
1527  #
1528  # Temporary Markdown Extra Parser Implementation Class
1529  #
1530  # NOTE: DON'T USE THIS CLASS
1531  # Currently the implementation of of Extra resides here in this temporary class.
1532  # This makes it easier to propagate the changes between the three different
1533  # packaging styles of PHP Markdown. When this issue is resolved, this
1534  # MarkdownExtra_TmpImpl class here will disappear and \Michelf\MarkdownExtra
1535  # will contain the code. So please use \Michelf\MarkdownExtra and ignore this
1536  # one.
1537  #
1538  
1539  abstract class _MarkdownExtra_TmpImpl extends \Michelf\Markdown {
1540  
1541      ### Configuration Variables ###
1542  
1543      # Prefix for footnote ids.
1544      public $fn_id_prefix = "";
1545      
1546      # Optional title attribute for footnote links and backlinks.
1547      public $fn_link_title = "";
1548      public $fn_backlink_title = "";
1549      
1550      # Optional class attribute for footnote links and backlinks.
1551      public $fn_link_class = "footnote-ref";
1552      public $fn_backlink_class = "footnote-backref";
1553  
1554      # Class name for table cell alignment (%% replaced left/center/right)
1555      # For instance: 'go-%%' becomes 'go-left' or 'go-right' or 'go-center'
1556      # If empty, the align attribute is used instead of a class name.
1557      public $table_align_class_tmpl = '';
1558  
1559      # Optional class prefix for fenced code block.
1560      public $code_class_prefix = "";
1561      # Class attribute for code blocks goes on the `code` tag;
1562      # setting this to true will put attributes on the `pre` tag instead.
1563      public $code_attr_on_pre = false;
1564      
1565      # Predefined abbreviations.
1566      public $predef_abbr = array();
1567  
1568  
1569      ### Parser Implementation ###
1570  
1571  	public function __construct() {
1572      #
1573      # Constructor function. Initialize the parser object.
1574      #
1575          # Add extra escapable characters before parent constructor 
1576          # initialize the table.
1577          $this->escape_chars .= ':|';
1578          
1579          # Insert extra document, block, and span transformations. 
1580          # Parent constructor will do the sorting.
1581          $this->document_gamut += array(
1582              "doFencedCodeBlocks" => 5,
1583              "stripFootnotes"     => 15,
1584              "stripAbbreviations" => 25,
1585              "appendFootnotes"    => 50,
1586              );
1587          $this->block_gamut += array(
1588              "doFencedCodeBlocks" => 5,
1589              "doTables"           => 15,
1590              "doDefLists"         => 45,
1591              );
1592          $this->span_gamut += array(
1593              "doFootnotes"        => 5,
1594              "doAbbreviations"    => 70,
1595              );
1596          
1597          parent::__construct();
1598      }
1599      
1600      
1601      # Extra variables used during extra transformations.
1602      protected $footnotes = array();
1603      protected $footnotes_ordered = array();
1604      protected $footnotes_ref_count = array();
1605      protected $footnotes_numbers = array();
1606      protected $abbr_desciptions = array();
1607      protected $abbr_word_re = '';
1608      
1609      # Give the current footnote number.
1610      protected $footnote_counter = 1;
1611      
1612      
1613  	protected function setup() {
1614      #
1615      # Setting up Extra-specific variables.
1616      #
1617          parent::setup();
1618          
1619          $this->footnotes = array();
1620          $this->footnotes_ordered = array();
1621          $this->footnotes_ref_count = array();
1622          $this->footnotes_numbers = array();
1623          $this->abbr_desciptions = array();
1624          $this->abbr_word_re = '';
1625          $this->footnote_counter = 1;
1626          
1627          foreach ($this->predef_abbr as $abbr_word => $abbr_desc) {
1628              if ($this->abbr_word_re)
1629                  $this->abbr_word_re .= '|';
1630              $this->abbr_word_re .= preg_quote($abbr_word);
1631              $this->abbr_desciptions[$abbr_word] = trim($abbr_desc);
1632          }
1633      }
1634      
1635  	protected function teardown() {
1636      #
1637      # Clearing Extra-specific variables.
1638      #
1639          $this->footnotes = array();
1640          $this->footnotes_ordered = array();
1641          $this->footnotes_ref_count = array();
1642          $this->footnotes_numbers = array();
1643          $this->abbr_desciptions = array();
1644          $this->abbr_word_re = '';
1645          
1646          parent::teardown();
1647      }
1648      
1649      
1650      ### Extra Attribute Parser ###
1651  
1652      # Expression to use to catch attributes (includes the braces)
1653      protected $id_class_attr_catch_re = '\{((?:[ ]*[#.][-_:a-zA-Z0-9]+){1,})[ ]*\}';
1654      # Expression to use when parsing in a context when no capture is desired
1655      protected $id_class_attr_nocatch_re = '\{(?:[ ]*[#.][-_:a-zA-Z0-9]+){1,}[ ]*\}';
1656  
1657  	protected function doExtraAttributes($tag_name, $attr) {
1658      #
1659      # Parse attributes caught by the $this->id_class_attr_catch_re expression
1660      # and return the HTML-formatted list of attributes.
1661      #
1662      # Currently supported attributes are .class and #id.
1663      #
1664          if (empty($attr)) return "";
1665          
1666          # Split on components
1667          preg_match_all('/[#.][-_:a-zA-Z0-9]+/', $attr, $matches);
1668          $elements = $matches[0];
1669  
1670          # handle classes and ids (only first id taken into account)
1671          $classes = array();
1672          $id = false;
1673          foreach ($elements as $element) {
1674              if ($element{0} == '.') {
1675                  $classes[] = substr($element, 1);
1676              } else if ($element{0} == '#') {
1677                  if ($id === false) $id = substr($element, 1);
1678              }
1679          }
1680  
1681          # compose attributes as string
1682          $attr_str = "";
1683          if (!empty($id)) {
1684              $attr_str .= ' id="'.$id.'"';
1685          }
1686          if (!empty($classes)) {
1687              $attr_str .= ' class="'.implode(" ", $classes).'"';
1688          }
1689          return $attr_str;
1690      }
1691  
1692  
1693  	protected function stripLinkDefinitions($text) {
1694      #
1695      # Strips link definitions from text, stores the URLs and titles in
1696      # hash references.
1697      #
1698          $less_than_tab = $this->tab_width - 1;
1699  
1700          # Link defs are in the form: ^[id]: url "optional title"
1701          $text = preg_replace_callback('{
1702                              ^[ ]{0,'.$less_than_tab.'}\[(.+)\][ ]?:    # id = $1
1703                                [ ]*
1704                                \n?                # maybe *one* newline
1705                                [ ]*
1706                              (?:
1707                                <(.+?)>            # url = $2
1708                              |
1709                                (\S+?)            # url = $3
1710                              )
1711                                [ ]*
1712                                \n?                # maybe one newline
1713                                [ ]*
1714                              (?:
1715                                  (?<=\s)            # lookbehind for whitespace
1716                                  ["(]
1717                                  (.*?)            # title = $4
1718                                  [")]
1719                                  [ ]*
1720                              )?    # title is optional
1721                      (?:[ ]* '.$this->id_class_attr_catch_re.' )?  # $5 = extra id & class attr
1722                              (?:\n+|\Z)
1723              }xm',
1724              array($this, '_stripLinkDefinitions_callback'),
1725              $text);
1726          return $text;
1727      }
1728  	protected function _stripLinkDefinitions_callback($matches) {
1729          $link_id = strtolower($matches[1]);
1730          $url = $matches[2] == '' ? $matches[3] : $matches[2];
1731          $this->urls[$link_id] = $url;
1732          $this->titles[$link_id] =& $matches[4];
1733          $this->ref_attr[$link_id] = $this->doExtraAttributes("", $dummy =& $matches[5]);
1734          return ''; # String that will replace the block
1735      }
1736  
1737  
1738      ### HTML Block Parser ###
1739      
1740      # Tags that are always treated as block tags:
1741      protected $block_tags_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|form|fieldset|iframe|hr|legend|article|section|nav|aside|hgroup|header|footer|figcaption|figure';
1742                             
1743      # Tags treated as block tags only if the opening tag is alone on its line:
1744      protected $context_block_tags_re = 'script|noscript|style|ins|del|iframe|object|source|track|param|math|svg|canvas|audio|video';
1745      
1746      # Tags where markdown="1" default to span mode:
1747      protected $contain_span_tags_re = 'p|h[1-6]|li|dd|dt|td|th|legend|address';
1748      
1749      # Tags which must not have their contents modified, no matter where 
1750      # they appear:
1751      protected $clean_tags_re = 'script|style|math|svg';
1752      
1753      # Tags that do not need to be closed.
1754      protected $auto_close_tags_re = 'hr|img|param|source|track';
1755      
1756  
1757  	protected function hashHTMLBlocks($text) {
1758      #
1759      # Hashify HTML Blocks and "clean tags".
1760      #
1761      # We only want to do this for block-level HTML tags, such as headers,
1762      # lists, and tables. That's because we still want to wrap <p>s around
1763      # "paragraphs" that are wrapped in non-block-level tags, such as anchors,
1764      # phrase emphasis, and spans. The list of tags we're looking for is
1765      # hard-coded.
1766      #
1767      # This works by calling _HashHTMLBlocks_InMarkdown, which then calls
1768      # _HashHTMLBlocks_InHTML when it encounter block tags. When the markdown="1" 
1769      # attribute is found within a tag, _HashHTMLBlocks_InHTML calls back
1770      #  _HashHTMLBlocks_InMarkdown to handle the Markdown syntax within the tag.
1771      # These two functions are calling each other. It's recursive!
1772      #
1773          if ($this->no_markup)  return $text;
1774  
1775          #
1776          # Call the HTML-in-Markdown hasher.
1777          #
1778          list($text, ) = $this->_hashHTMLBlocks_inMarkdown($text);
1779          
1780          return $text;
1781      }
1782  	protected function _hashHTMLBlocks_inMarkdown($text, $indent = 0,
1783                                          $enclosing_tag_re = '', $span = false)
1784      {
1785      #
1786      # Parse markdown text, calling _HashHTMLBlocks_InHTML for block tags.
1787      #
1788      # *   $indent is the number of space to be ignored when checking for code 
1789      #     blocks. This is important because if we don't take the indent into 
1790      #     account, something like this (which looks right) won't work as expected:
1791      #
1792      #     <div>
1793      #         <div markdown="1">
1794      #         Hello World.  <-- Is this a Markdown code block or text?
1795      #         </div>  <-- Is this a Markdown code block or a real tag?
1796      #     <div>
1797      #
1798      #     If you don't like this, just don't indent the tag on which
1799      #     you apply the markdown="1" attribute.
1800      #
1801      # *   If $enclosing_tag_re is not empty, stops at the first unmatched closing 
1802      #     tag with that name. Nested tags supported.
1803      #
1804      # *   If $span is true, text inside must treated as span. So any double 
1805      #     newline will be replaced by a single newline so that it does not create 
1806      #     paragraphs.
1807      #
1808      # Returns an array of that form: ( processed text , remaining text )
1809      #
1810          if ($text === '') return array('', '');
1811  
1812          # Regex to check for the presense of newlines around a block tag.
1813          $newline_before_re = '/(?:^\n?|\n\n)*$/';
1814          $newline_after_re = 
1815              '{
1816                  ^                        # Start of text following the tag.
1817                  (?>[ ]*<!--.*?-->)?        # Optional comment.
1818                  [ ]*\n                    # Must be followed by newline.
1819              }xs';
1820          
1821          # Regex to match any tag.
1822          $block_tag_re =
1823              '{
1824                  (                    # $2: Capture whole tag.
1825                      </?                    # Any opening or closing tag.
1826                          (?>                # Tag name.
1827                              '.$this->block_tags_re.'            |
1828                              '.$this->context_block_tags_re.'    |
1829                              '.$this->clean_tags_re.'            |
1830                              (?!\s)'.$enclosing_tag_re.'
1831                          )
1832                          (?:
1833                              (?=[\s"\'/a-zA-Z0-9])    # Allowed characters after tag name.
1834                              (?>
1835                                  ".*?"        |    # Double quotes (can contain `>`)
1836                                  \'.*?\'       |    # Single quotes (can contain `>`)
1837                                  .+?                # Anything but quotes and `>`.
1838                              )*?
1839                          )?
1840                      >                    # End of tag.
1841                  |
1842                      <!--    .*?     -->    # HTML Comment
1843                  |
1844                      <\?.*?\?> | <%.*?%>    # Processing instruction
1845                  |
1846                      <!\[CDATA\[.*?\]\]>    # CData Block
1847                  '. ( !$span ? ' # If not in span.
1848                  |
1849                      # Indented code block
1850                      (?: ^[ ]*\n | ^ | \n[ ]*\n )
1851                      [ ]{'.($indent+4).'}[^\n]* \n
1852                      (?>
1853                          (?: [ ]{'.($indent+4).'}[^\n]* | [ ]* ) \n
1854                      )*
1855                  |
1856                      # Fenced code block marker
1857                      (?<= ^ | \n )
1858                      [ ]{0,'.($indent+3).'}(?:~{3,}|`{3,})
1859                                      [ ]*
1860                      (?:
1861                      \.?[-_:a-zA-Z0-9]+ # standalone class name
1862                      |
1863                          '.$this->id_class_attr_nocatch_re.' # extra attributes
1864                      )?
1865                      [ ]*
1866                      (?= \n )
1867                  ' : '' ). ' # End (if not is span).
1868                  |
1869                      # Code span marker
1870                      # Note, this regex needs to go after backtick fenced
1871                      # code blocks but it should also be kept outside of the
1872                      # "if not in span" condition adding backticks to the parser
1873                      `+
1874                  )
1875              }xs';
1876  
1877          
1878          $depth = 0;        # Current depth inside the tag tree.
1879          $parsed = "";    # Parsed text that will be returned.
1880  
1881          #
1882          # Loop through every tag until we find the closing tag of the parent
1883          # or loop until reaching the end of text if no parent tag specified.
1884          #
1885          do {
1886              #
1887              # Split the text using the first $tag_match pattern found.
1888              # Text before  pattern will be first in the array, text after
1889              # pattern will be at the end, and between will be any catches made 
1890              # by the pattern.
1891              #
1892              $parts = preg_split($block_tag_re, $text, 2, 
1893                                  PREG_SPLIT_DELIM_CAPTURE);
1894              
1895              # If in Markdown span mode, add a empty-string span-level hash 
1896              # after each newline to prevent triggering any block element.
1897              if ($span) {
1898                  $void = $this->hashPart("", ':');
1899                  $newline = "$void\n";
1900                  $parts[0] = $void . str_replace("\n", $newline, $parts[0]) . $void;
1901              }
1902              
1903              $parsed .= $parts[0]; # Text before current tag.
1904              
1905              # If end of $text has been reached. Stop loop.
1906              if (count($parts) < 3) {
1907                  $text = "";
1908                  break;
1909              }
1910              
1911              $tag  = $parts[1]; # Tag to handle.
1912              $text = $parts[2]; # Remaining text after current tag.
1913              $tag_re = preg_quote($tag); # For use in a regular expression.
1914              
1915              #
1916              # Check for: Fenced code block marker.
1917              # Note: need to recheck the whole tag to disambiguate backtick
1918              # fences from code spans
1919              #
1920              if (preg_match('{^\n?([ ]{0,'.($indent+3).'})(~{3,}|`{3,})[ ]*(?:\.?[-_:a-zA-Z0-9]+|'.$this->id_class_attr_nocatch_re.')?[ ]*\n?$}', $tag, $capture)) {
1921                  # Fenced code block marker: find matching end marker.
1922                  $fence_indent = strlen($capture[1]); # use captured indent in re
1923                  $fence_re = $capture[2]; # use captured fence in re
1924                  if (preg_match('{^(?>.*\n)*?[ ]{'.($fence_indent).'}'.$fence_re.'[ ]*(?:\n|$)}', $text,
1925                      $matches)) 
1926                  {
1927                      # End marker found: pass text unchanged until marker.
1928                      $parsed .= $tag . $matches[0];
1929                      $text = substr($text, strlen($matches[0]));
1930                  }
1931                  else {
1932                      # No end marker: just skip it.
1933                      $parsed .= $tag;
1934                  }
1935              }
1936              #
1937              # Check for: Indented code block.
1938              #
1939              else if ($tag{0} == "\n" || $tag{0} == " ") {
1940                  # Indented code block: pass it unchanged, will be handled 
1941                  # later.
1942                  $parsed .= $tag;
1943              }
1944              #
1945              # Check for: Code span marker
1946              # Note: need to check this after backtick fenced code blocks
1947              #
1948              else if ($tag{0} == "`") {
1949                  # Find corresponding end marker.
1950                  $tag_re = preg_quote($tag);
1951                  if (preg_match('{^(?>.+?|\n(?!\n))*?(?<!`)'.$tag_re.'(?!`)}',
1952                      $text, $matches))
1953                  {
1954                      # End marker found: pass text unchanged until marker.
1955                      $parsed .= $tag . $matches[0];
1956                      $text = substr($text, strlen($matches[0]));
1957                  }
1958                  else {
1959                      # Unmatched marker: just skip it.
1960                      $parsed .= $tag;
1961                  }
1962              }
1963              #
1964              # Check for: Opening Block level tag or
1965              #            Opening Context Block tag (like ins and del) 
1966              #               used as a block tag (tag is alone on it's line).
1967              #
1968              else if (preg_match('{^<(?:'.$this->block_tags_re.')\b}', $tag) ||
1969                  (    preg_match('{^<(?:'.$this->context_block_tags_re.')\b}', $tag) &&
1970                      preg_match($newline_before_re, $parsed) &&
1971                      preg_match($newline_after_re, $text)    )
1972                  )
1973              {
1974                  # Need to parse tag and following text using the HTML parser.
1975                  list($block_text, $text) = 
1976                      $this->_hashHTMLBlocks_inHTML($tag . $text, "hashBlock", true);
1977                  
1978                  # Make sure it stays outside of any paragraph by adding newlines.
1979                  $parsed .= "\n\n$block_text\n\n";
1980              }
1981              #
1982              # Check for: Clean tag (like script, math)
1983              #            HTML Comments, processing instructions.
1984              #
1985              else if (preg_match('{^<(?:'.$this->clean_tags_re.')\b}', $tag) ||
1986                  $tag{1} == '!' || $tag{1} == '?')
1987              {
1988                  # Need to parse tag and following text using the HTML parser.
1989                  # (don't check for markdown attribute)
1990                  list($block_text, $text) = 
1991                      $this->_hashHTMLBlocks_inHTML($tag . $text, "hashClean", false);
1992                  
1993                  $parsed .= $block_text;
1994              }
1995              #
1996              # Check for: Tag with same name as enclosing tag.
1997              #
1998              else if ($enclosing_tag_re !== '' &&
1999                  # Same name as enclosing tag.
2000                  preg_match('{^</?(?:'.$enclosing_tag_re.')\b}', $tag))
2001              {
2002                  #
2003                  # Increase/decrease nested tag count.
2004                  #
2005                  if ($tag{1} == '/')                        $depth--;
2006                  else if ($tag{strlen($tag)-2} != '/')    $depth++;
2007  
2008                  if ($depth < 0) {
2009                      #
2010                      # Going out of parent element. Clean up and break so we
2011                      # return to the calling function.
2012                      #
2013                      $text = $tag . $text;
2014                      break;
2015                  }
2016                  
2017                  $parsed .= $tag;
2018              }
2019              else {
2020                  $parsed .= $tag;
2021              }
2022          } while ($depth >= 0);
2023          
2024          return array($parsed, $text);
2025      }
2026  	protected function _hashHTMLBlocks_inHTML($text, $hash_method, $md_attr) {
2027      #
2028      # Parse HTML, calling _HashHTMLBlocks_InMarkdown for block tags.
2029      #
2030      # *   Calls $hash_method to convert any blocks.
2031      # *   Stops when the first opening tag closes.
2032      # *   $md_attr indicate if the use of the `markdown="1"` attribute is allowed.
2033      #     (it is not inside clean tags)
2034      #
2035      # Returns an array of that form: ( processed text , remaining text )
2036      #
2037          if ($text === '') return array('', '');
2038          
2039          # Regex to match `markdown` attribute inside of a tag.
2040          $markdown_attr_re = '
2041              {
2042                  \s*            # Eat whitespace before the `markdown` attribute
2043                  markdown
2044                  \s*=\s*
2045                  (?>
2046                      (["\'])        # $1: quote delimiter        
2047                      (.*?)        # $2: attribute value
2048                      \1            # matching delimiter    
2049                  |
2050                      ([^\s>]*)    # $3: unquoted attribute value
2051                  )
2052                  ()                # $4: make $3 always defined (avoid warnings)
2053              }xs';
2054          
2055          # Regex to match any tag.
2056          $tag_re = '{
2057                  (                    # $2: Capture whole tag.
2058                      </?                    # Any opening or closing tag.
2059                          [\w:$]+            # Tag name.
2060                          (?:
2061                              (?=[\s"\'/a-zA-Z0-9])    # Allowed characters after tag name.
2062                              (?>
2063                                  ".*?"        |    # Double quotes (can contain `>`)
2064                                  \'.*?\'       |    # Single quotes (can contain `>`)
2065                                  .+?                # Anything but quotes and `>`.
2066                              )*?
2067                          )?
2068                      >                    # End of tag.
2069                  |
2070                      <!--    .*?     -->    # HTML Comment
2071                  |
2072                      <\?.*?\?> | <%.*?%>    # Processing instruction
2073                  |
2074                      <!\[CDATA\[.*?\]\]>    # CData Block
2075                  )
2076              }xs';
2077          
2078          $original_text = $text;        # Save original text in case of faliure.
2079          
2080          $depth        = 0;    # Current depth inside the tag tree.
2081          $block_text    = "";    # Temporary text holder for current text.
2082          $parsed        = "";    # Parsed text that will be returned.
2083  
2084          #
2085          # Get the name of the starting tag.
2086          # (This pattern makes $base_tag_name_re safe without quoting.)
2087          #
2088          if (preg_match('/^<([\w:$]*)\b/', $text, $matches))
2089              $base_tag_name_re = $matches[1];
2090  
2091          #
2092          # Loop through every tag until we find the corresponding closing tag.
2093          #
2094          do {
2095              #
2096              # Split the text using the first $tag_match pattern found.
2097              # Text before  pattern will be first in the array, text after
2098              # pattern will be at the end, and between will be any catches made 
2099              # by the pattern.
2100              #
2101              $parts = preg_split($tag_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE);
2102              
2103              if (count($parts) < 3) {
2104                  #
2105                  # End of $text reached with unbalenced tag(s).
2106                  # In that case, we return original text unchanged and pass the
2107                  # first character as filtered to prevent an infinite loop in the 
2108                  # parent function.
2109                  #
2110                  return array($original_text{0}, substr($original_text, 1));
2111              }
2112              
2113              $block_text .= $parts[0]; # Text before current tag.
2114              $tag         = $parts[1]; # Tag to handle.
2115              $text        = $parts[2]; # Remaining text after current tag.
2116              
2117              #
2118              # Check for: Auto-close tag (like <hr/>)
2119              #             Comments and Processing Instructions.
2120              #
2121              if (preg_match('{^</?(?:'.$this->auto_close_tags_re.')\b}', $tag) ||
2122                  $tag{1} == '!' || $tag{1} == '?')
2123              {
2124                  # Just add the tag to the block as if it was text.
2125                  $block_text .= $tag;
2126              }
2127              else {
2128                  #
2129                  # Increase/decrease nested tag count. Only do so if
2130                  # the tag's name match base tag's.
2131                  #
2132                  if (preg_match('{^</?'.$base_tag_name_re.'\b}', $tag)) {
2133                      if ($tag{1} == '/')                        $depth--;
2134                      else if ($tag{strlen($tag)-2} != '/')    $depth++;
2135                  }
2136                  
2137                  #
2138                  # Check for `markdown="1"` attribute and handle it.
2139                  #
2140                  if ($md_attr && 
2141                      preg_match($markdown_attr_re, $tag, $attr_m) &&
2142                      preg_match('/^1|block|span$/', $attr_m[2] . $attr_m[3]))
2143                  {
2144                      # Remove `markdown` attribute from opening tag.
2145                      $tag = preg_replace($markdown_attr_re, '', $tag);
2146                      
2147                      # Check if text inside this tag must be parsed in span mode.
2148                      $this->mode = $attr_m[2] . $attr_m[3];
2149                      $span_mode = $this->mode == 'span' || $this->mode != 'block' &&
2150                          preg_match('{^<(?:'.$this->contain_span_tags_re.')\b}', $tag);
2151                      
2152                      # Calculate indent before tag.
2153                      if (preg_match('/(?:^|\n)( *?)(?! ).*?$/', $block_text, $matches)) {
2154                          $strlen = $this->utf8_strlen;
2155                          $indent = $strlen($matches[1], 'UTF-8');
2156                      } else {
2157                          $indent = 0;
2158                      }
2159                      
2160                      # End preceding block with this tag.
2161                      $block_text .= $tag;
2162                      $parsed .= $this->$hash_method($block_text);
2163                      
2164                      # Get enclosing tag name for the ParseMarkdown function.
2165                      # (This pattern makes $tag_name_re safe without quoting.)
2166                      preg_match('/^<([\w:$]*)\b/', $tag, $matches);
2167                      $tag_name_re = $matches[1];
2168                      
2169                      # Parse the content using the HTML-in-Markdown parser.
2170                      list ($block_text, $text)
2171                          = $this->_hashHTMLBlocks_inMarkdown($text, $indent, 
2172                              $tag_name_re, $span_mode);
2173                      
2174                      # Outdent markdown text.
2175                      if ($indent > 0) {
2176                          $block_text = preg_replace("/^[ ]{1,$indent}/m", "", 
2177                                                      $block_text);
2178                      }
2179                      
2180                      # Append tag content to parsed text.
2181                      if (!$span_mode)    $parsed .= "\n\n$block_text\n\n";
2182                      else                $parsed .= "$block_text";
2183                      
2184                      # Start over with a new block.
2185                      $block_text = "";
2186                  }
2187                  else $block_text .= $tag;
2188              }
2189              
2190          } while ($depth > 0);
2191          
2192          #
2193          # Hash last block text that wasn't processed inside the loop.
2194          #
2195          $parsed .= $this->$hash_method($block_text);
2196          
2197          return array($parsed, $text);
2198      }
2199  
2200  
2201  	protected function hashClean($text) {
2202      #
2203      # Called whenever a tag must be hashed when a function inserts a "clean" tag
2204      # in $text, it passes through this function and is automaticaly escaped, 
2205      # blocking invalid nested overlap.
2206      #
2207          return $this->hashPart($text, 'C');
2208      }
2209  
2210  
2211  	protected function doAnchors($text) {
2212      #
2213      # Turn Markdown link shortcuts into XHTML <a> tags.
2214      #
2215          if ($this->in_anchor) return $text;
2216          $this->in_anchor = true;
2217          
2218          #
2219          # First, handle reference-style links: [link text] [id]
2220          #
2221          $text = preg_replace_callback('{
2222              (                    # wrap whole match in $1
2223                \[
2224                  ('.$this->nested_brackets_re.')    # link text = $2
2225                \]
2226  
2227                [ ]?                # one optional space
2228                (?:\n[ ]*)?        # one optional newline followed by spaces
2229  
2230                \[
2231                  (.*?)        # id = $3
2232                \]
2233              )
2234              }xs',
2235              array($this, '_doAnchors_reference_callback'), $text);
2236  
2237          #
2238          # Next, inline-style links: [link text](url "optional title")
2239          #
2240          $text = preg_replace_callback('{
2241              (                # wrap whole match in $1
2242                \[
2243                  ('.$this->nested_brackets_re.')    # link text = $2
2244                \]
2245                \(            # literal paren
2246                  [ \n]*
2247                  (?:
2248                      <(.+?)>    # href = $3
2249                  |
2250                      ('.$this->nested_url_parenthesis_re.')    # href = $4
2251                  )
2252                  [ \n]*
2253                  (            # $5
2254                    ([\'"])    # quote char = $6
2255                    (.*?)        # Title = $7
2256                    \6        # matching quote
2257                    [ \n]*    # ignore any spaces/tabs between closing quote and )
2258                  )?            # title is optional
2259                \)
2260                (?:[ ]? '.$this->id_class_attr_catch_re.' )?     # $8 = id/class attributes
2261              )
2262              }xs',
2263              array($this, '_doAnchors_inline_callback'), $text);
2264  
2265          #
2266          # Last, handle reference-style shortcuts: [link text]
2267          # These must come last in case you've also got [link text][1]
2268          # or [link text](/foo)
2269          #
2270          $text = preg_replace_callback('{
2271              (                    # wrap whole match in $1
2272                \[
2273                  ([^\[\]]+)        # link text = $2; can\'t contain [ or ]
2274                \]
2275              )
2276              }xs',
2277              array($this, '_doAnchors_reference_callback'), $text);
2278  
2279          $this->in_anchor = false;
2280          return $text;
2281      }
2282  	protected function _doAnchors_reference_callback($matches) {
2283          $whole_match =  $matches[1];
2284          $link_text   =  $matches[2];
2285          $link_id     =& $matches[3];
2286  
2287          if ($link_id == "") {
2288              # for shortcut links like [this][] or [this].
2289              $link_id = $link_text;
2290          }
2291          
2292          # lower-case and turn embedded newlines into spaces
2293          $link_id = strtolower($link_id);
2294          $link_id = preg_replace('{[ ]?\n}', ' ', $link_id);
2295  
2296          if (isset($this->urls[$link_id])) {
2297              $url = $this->urls[$link_id];
2298              $url = $this->encodeAttribute($url);
2299              
2300              $result = "<a href=\"$url\"";
2301              if ( isset( $this->titles[$link_id] ) ) {
2302                  $title = $this->titles[$link_id];
2303                  $title = $this->encodeAttribute($title);
2304                  $result .=  " title=\"$title\"";
2305              }
2306              if (isset($this->ref_attr[$link_id]))
2307                  $result .= $this->ref_attr[$link_id];
2308          
2309              $link_text = $this->runSpanGamut($link_text);
2310              $result .= ">$link_text</a>";
2311              $result = $this->hashPart($result);
2312          }
2313          else {
2314              $result = $whole_match;
2315          }
2316          return $result;
2317      }
2318  	protected function _doAnchors_inline_callback($matches) {
2319          $whole_match    =  $matches[1];
2320          $link_text        =  $this->runSpanGamut($matches[2]);
2321          $url            =  $matches[3] == '' ? $matches[4] : $matches[3];
2322          $title            =& $matches[7];
2323          $attr  = $this->doExtraAttributes("a", $dummy =& $matches[8]);
2324  
2325          // if the URL was of the form <s p a c e s> it got caught by the HTML
2326          // tag parser and hashed. Need to reverse the process before using the URL.
2327          $unhashed = $this->unhash($url);
2328          if ($unhashed != $url)
2329              $url = preg_replace('/^<(.*)>$/', '\1', $unhashed);
2330  
2331          $url = $this->encodeAttribute($url);
2332  
2333          $result = "<a href=\"$url\"";
2334          if (isset($title)) {
2335              $title = $this->encodeAttribute($title);
2336              $result .=  " title=\"$title\"";
2337          }
2338          $result .= $attr;
2339          
2340          $link_text = $this->runSpanGamut($link_text);
2341          $result .= ">$link_text</a>";
2342  
2343          return $this->hashPart($result);
2344      }
2345  
2346  
2347  	protected function doImages($text) {
2348      #
2349      # Turn Markdown image shortcuts into <img> tags.
2350      #
2351          #
2352          # First, handle reference-style labeled images: ![alt text][id]
2353          #
2354          $text = preg_replace_callback('{
2355              (                # wrap whole match in $1
2356                !\[
2357                  ('.$this->nested_brackets_re.')        # alt text = $2
2358                \]
2359  
2360                [ ]?                # one optional space
2361                (?:\n[ ]*)?        # one optional newline followed by spaces
2362  
2363                \[
2364                  (.*?)        # id = $3
2365                \]
2366  
2367              )
2368              }xs', 
2369              array($this, '_doImages_reference_callback'), $text);
2370  
2371          #
2372          # Next, handle inline images:  ![alt text](url "optional title")
2373          # Don't forget: encode * and _
2374          #
2375          $text = preg_replace_callback('{
2376              (                # wrap whole match in $1
2377                !\[
2378                  ('.$this->nested_brackets_re.')        # alt text = $2
2379                \]
2380                \s?            # One optional whitespace character
2381                \(            # literal paren
2382                  [ \n]*
2383                  (?:
2384                      <(\S*)>    # src url = $3
2385                  |
2386                      ('.$this->nested_url_parenthesis_re.')    # src url = $4
2387                  )
2388                  [ \n]*
2389                  (            # $5
2390                    ([\'"])    # quote char = $6
2391                    (.*?)        # title = $7
2392                    \6        # matching quote
2393                    [ \n]*
2394                  )?            # title is optional
2395                \)
2396                (?:[ ]? '.$this->id_class_attr_catch_re.' )?     # $8 = id/class attributes
2397              )
2398              }xs',
2399              array($this, '_doImages_inline_callback'), $text);
2400  
2401          return $text;
2402      }
2403  	protected function _doImages_reference_callback($matches) {
2404          $whole_match = $matches[1];
2405          $alt_text    = $matches[2];
2406          $link_id     = strtolower($matches[3]);
2407  
2408          if ($link_id == "") {
2409              $link_id = strtolower($alt_text); # for shortcut links like ![this][].
2410          }
2411  
2412          $alt_text = $this->encodeAttribute($alt_text);
2413          if (isset($this->urls[$link_id])) {
2414              $url = $this->encodeAttribute($this->urls[$link_id]);
2415              $result = "<img src=\"$url\" alt=\"$alt_text\"";
2416              if (isset($this->titles[$link_id])) {
2417                  $title = $this->titles[$link_id];
2418                  $title = $this->encodeAttribute($title);
2419                  $result .=  " title=\"$title\"";
2420              }
2421              if (isset($this->ref_attr[$link_id]))
2422                  $result .= $this->ref_attr[$link_id];
2423              $result .= $this->empty_element_suffix;
2424              $result = $this->hashPart($result);
2425          }
2426          else {
2427              # If there's no such link ID, leave intact:
2428              $result = $whole_match;
2429          }
2430  
2431          return $result;
2432      }
2433  	protected function _doImages_inline_callback($matches) {
2434          $whole_match    = $matches[1];
2435          $alt_text        = $matches[2];
2436          $url            = $matches[3] == '' ? $matches[4] : $matches[3];
2437          $title            =& $matches[7];
2438          $attr  = $this->doExtraAttributes("img", $dummy =& $matches[8]);
2439  
2440          $alt_text = $this->encodeAttribute($alt_text);
2441          $url = $this->encodeAttribute($url);
2442          $result = "<img src=\"$url\" alt=\"$alt_text\"";
2443          if (isset($title)) {
2444              $title = $this->encodeAttribute($title);
2445              $result .=  " title=\"$title\""; # $title already quoted
2446          }
2447          $result .= $attr;
2448          $result .= $this->empty_element_suffix;
2449  
2450          return $this->hashPart($result);
2451      }
2452  
2453  
2454  	protected function doHeaders($text) {
2455      #
2456      # Redefined to add id and class attribute support.
2457      #
2458          # Setext-style headers:
2459          #      Header 1  {#header1}
2460          #      ========
2461          #  
2462          #      Header 2  {#header2 .class1 .class2}
2463          #      --------
2464          #
2465          $text = preg_replace_callback(
2466              '{
2467                  (^.+?)                                # $1: Header text
2468                  (?:[ ]+ '.$this->id_class_attr_catch_re.' )?     # $3 = id/class attributes
2469                  [ ]*\n(=+|-+)[ ]*\n+                # $3: Header footer
2470              }mx',
2471              array($this, '_doHeaders_callback_setext'), $text);
2472  
2473          # atx-style headers:
2474          #    # Header 1        {#header1}
2475          #    ## Header 2       {#header2}
2476          #    ## Header 2 with closing hashes ##  {#header3.class1.class2}
2477          #    ...
2478          #    ###### Header 6   {.class2}
2479          #
2480          $text = preg_replace_callback('{
2481                  ^(\#{1,6})    # $1 = string of #\'s
2482                  [ ]*
2483                  (.+?)        # $2 = Header text
2484                  [ ]*
2485                  \#*            # optional closing #\'s (not counted)
2486                  (?:[ ]+ '.$this->id_class_attr_catch_re.' )?     # $3 = id/class attributes
2487                  [ ]*
2488                  \n+
2489              }xm',
2490              array($this, '_doHeaders_callback_atx'), $text);
2491  
2492          return $text;
2493      }
2494  	protected function _doHeaders_callback_setext($matches) {
2495          if ($matches[3] == '-' && preg_match('{^- }', $matches[1]))
2496              return $matches[0];
2497          $level = $matches[3]{0} == '=' ? 1 : 2;
2498          $attr  = $this->doExtraAttributes("h$level", $dummy =& $matches[2]);
2499          $block = "<h$level$attr>".$this->runSpanGamut($matches[1])."</h$level>";
2500          return "\n" . $this->hashBlock($block) . "\n\n";
2501      }
2502  	protected function _doHeaders_callback_atx($matches) {
2503          $level = strlen($matches[1]);
2504          $attr  = $this->doExtraAttributes("h$level", $dummy =& $matches[3]);
2505          $block = "<h$level$attr>".$this->runSpanGamut($matches[2])."</h$level>";
2506          return "\n" . $this->hashBlock($block) . "\n\n";
2507      }
2508  
2509  
2510  	protected function doTables($text) {
2511      #
2512      # Form HTML tables.
2513      #
2514          $less_than_tab = $this->tab_width - 1;
2515          #
2516          # Find tables with leading pipe.
2517          #
2518          #    | Header 1 | Header 2
2519          #    | -------- | --------
2520          #    | Cell 1   | Cell 2
2521          #    | Cell 3   | Cell 4
2522          #
2523          $text = preg_replace_callback('
2524              {
2525                  ^                            # Start of a line
2526                  [ ]{0,'.$less_than_tab.'}    # Allowed whitespace.
2527                  [|]                            # Optional leading pipe (present)
2528                  (.+) \n                        # $1: Header row (at least one pipe)
2529                  
2530                  [ ]{0,'.$less_than_tab.'}    # Allowed whitespace.
2531                  [|] ([ ]*[-:]+[-| :]*) \n    # $2: Header underline
2532                  
2533                  (                            # $3: Cells
2534                      (?>
2535                          [ ]*                # Allowed whitespace.
2536                          [|] .* \n            # Row content.
2537                      )*
2538                  )
2539                  (?=\n|\Z)                    # Stop at final double newline.
2540              }xm',
2541              array($this, '_doTable_leadingPipe_callback'), $text);
2542          
2543          #
2544          # Find tables without leading pipe.
2545          #
2546          #    Header 1 | Header 2
2547          #    -------- | --------
2548          #    Cell 1   | Cell 2
2549          #    Cell 3   | Cell 4
2550          #
2551          $text = preg_replace_callback('
2552              {
2553                  ^                            # Start of a line
2554                  [ ]{0,'.$less_than_tab.'}    # Allowed whitespace.
2555                  (\S.*[|].*) \n                # $1: Header row (at least one pipe)
2556                  
2557                  [ ]{0,'.$less_than_tab.'}    # Allowed whitespace.
2558                  ([-:]+[ ]*[|][-| :]*) \n    # $2: Header underline
2559                  
2560                  (                            # $3: Cells
2561                      (?>
2562                          .* [|] .* \n        # Row content
2563                      )*
2564                  )
2565                  (?=\n|\Z)                    # Stop at final double newline.
2566              }xm',
2567              array($this, '_DoTable_callback'), $text);
2568  
2569          return $text;
2570      }
2571  	protected function _doTable_leadingPipe_callback($matches) {
2572          $head        = $matches[1];
2573          $underline    = $matches[2];
2574          $content    = $matches[3];
2575          
2576          # Remove leading pipe for each row.
2577          $content    = preg_replace('/^ *[|]/m', '', $content);
2578          
2579          return $this->_doTable_callback(array($matches[0], $head, $underline, $content));
2580      }
2581  	protected function _doTable_makeAlignAttr($alignname)
2582      {
2583          if (empty($this->table_align_class_tmpl))
2584              return " align=\"$alignname\"";
2585  
2586          $classname = str_replace('%%', $alignname, $this->table_align_class_tmpl);
2587          return " class=\"$classname\"";
2588      }
2589  	protected function _doTable_callback($matches) {
2590          $head        = $matches[1];
2591          $underline    = $matches[2];
2592          $content    = $matches[3];
2593  
2594          # Remove any tailing pipes for each line.
2595          $head        = preg_replace('/[|] *$/m', '', $head);
2596          $underline    = preg_replace('/[|] *$/m', '', $underline);
2597          $content    = preg_replace('/[|] *$/m', '', $content);
2598          
2599          # Reading alignement from header underline.
2600          $separators    = preg_split('/ *[|] */', $underline);
2601          foreach ($separators as $n => $s) {
2602              if (preg_match('/^ *-+: *$/', $s))
2603                  $attr[$n] = $this->_doTable_makeAlignAttr('right');
2604              else if (preg_match('/^ *:-+: *$/', $s))
2605                  $attr[$n] = $this->_doTable_makeAlignAttr('center');
2606              else if (preg_match('/^ *:-+ *$/', $s))
2607                  $attr[$n] = $this->_doTable_makeAlignAttr('left');
2608              else
2609                  $attr[$n] = '';
2610          }
2611          
2612          # Parsing span elements, including code spans, character escapes, 
2613          # and inline HTML tags, so that pipes inside those gets ignored.
2614          $head        = $this->parseSpan($head);
2615          $headers    = preg_split('/ *[|] */', $head);
2616          $col_count    = count($headers);
2617          $attr       = array_pad($attr, $col_count, '');
2618          
2619          # Write column headers.
2620          $text = "<table>\n";
2621          $text .= "<thead>\n";
2622          $text .= "<tr>\n";
2623          foreach ($headers as $n => $header)
2624              $text .= "  <th$attr[$n]>".$this->runSpanGamut(trim($header))."</th>\n";
2625          $text .= "</tr>\n";
2626          $text .= "</thead>\n";
2627          
2628          # Split content by row.
2629          $rows = explode("\n", trim($content, "\n"));
2630          
2631          $text .= "<tbody>\n";
2632          foreach ($rows as $row) {
2633              # Parsing span elements, including code spans, character escapes, 
2634              # and inline HTML tags, so that pipes inside those gets ignored.
2635              $row = $this->parseSpan($row);
2636              
2637              # Split row by cell.
2638              $row_cells = preg_split('/ *[|] */', $row, $col_count);
2639              $row_cells = array_pad($row_cells, $col_count, '');
2640              
2641              $text .= "<tr>\n";
2642              foreach ($row_cells as $n => $cell)
2643                  $text .= "  <td$attr[$n]>".$this->runSpanGamut(trim($cell))."</td>\n";
2644              $text .= "</tr>\n";
2645          }
2646          $text .= "</tbody>\n";
2647          $text .= "</table>";
2648          
2649          return $this->hashBlock($text) . "\n";
2650      }
2651  
2652      
2653  	protected function doDefLists($text) {
2654      #
2655      # Form HTML definition lists.
2656      #
2657          $less_than_tab = $this->tab_width - 1;
2658  
2659          # Re-usable pattern to match any entire dl list:
2660          $whole_list_re = '(?>
2661              (                                # $1 = whole list
2662                (                                # $2
2663                  [ ]{0,'.$less_than_tab.'}
2664                  ((?>.*\S.*\n)+)                # $3 = defined term
2665                  \n?
2666                  [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
2667                )
2668                (?s:.+?)
2669                (                                # $4
2670                    \z
2671                  |
2672                    \n{2,}
2673                    (?=\S)
2674                    (?!                        # Negative lookahead for another term
2675                      [ ]{0,'.$less_than_tab.'}
2676                      (?: \S.*\n )+?            # defined term
2677                      \n?
2678                      [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
2679                    )
2680                    (?!                        # Negative lookahead for another definition
2681                      [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
2682                    )
2683                )
2684              )
2685          )'; // mx
2686  
2687          $text = preg_replace_callback('{
2688                  (?>\A\n?|(?<=\n\n))
2689                  '.$whole_list_re.'
2690              }mx',
2691              array($this, '_doDefLists_callback'), $text);
2692  
2693          return $text;
2694      }
2695  	protected function _doDefLists_callback($matches) {
2696          # Re-usable patterns to match list item bullets and number markers:
2697          $list = $matches[1];
2698          
2699          # Turn double returns into triple returns, so that we can make a
2700          # paragraph for the last item in a list, if necessary:
2701          $result = trim($this->processDefListItems($list));
2702          $result = "<dl>\n" . $result . "\n</dl>";
2703          return $this->hashBlock($result) . "\n\n";
2704      }
2705  
2706  
2707  	protected function processDefListItems($list_str) {
2708      #
2709      #    Process the contents of a single definition list, splitting it
2710      #    into individual term and definition list items.
2711      #
2712          $less_than_tab = $this->tab_width - 1;
2713          
2714          # trim trailing blank lines:
2715          $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
2716  
2717          # Process definition terms.
2718          $list_str = preg_replace_callback('{
2719              (?>\A\n?|\n\n+)                    # leading line
2720              (                                # definition terms = $1
2721                  [ ]{0,'.$less_than_tab.'}    # leading whitespace
2722                  (?!\:[ ]|[ ])                # negative lookahead for a definition
2723                                              #   mark (colon) or more whitespace.
2724                  (?> \S.* \n)+?                # actual term (not whitespace).    
2725              )            
2726              (?=\n?[ ]{0,3}:[ ])                # lookahead for following line feed 
2727                                              #   with a definition mark.
2728              }xm',
2729              array($this, '_processDefListItems_callback_dt'), $list_str);
2730  
2731          # Process actual definitions.
2732          $list_str = preg_replace_callback('{
2733              \n(\n+)?                        # leading line = $1
2734              (                                # marker space = $2
2735                  [ ]{0,'.$less_than_tab.'}    # whitespace before colon
2736                  \:[ ]+                        # definition mark (colon)
2737              )
2738              ((?s:.+?))                        # definition text = $3
2739              (?= \n+                         # stop at next definition mark,
2740                  (?:                            # next term or end of text
2741                      [ ]{0,'.$less_than_tab.'} \:[ ]    |
2742                      <dt> | \z
2743                  )                        
2744              )                    
2745              }xm',
2746              array($this, '_processDefListItems_callback_dd'), $list_str);
2747  
2748          return $list_str;
2749      }
2750  	protected function _processDefListItems_callback_dt($matches) {
2751          $terms = explode("\n", trim($matches[1]));
2752          $text = '';
2753          foreach ($terms as $term) {
2754              $term = $this->runSpanGamut(trim($term));
2755              $text .= "\n<dt>" . $term . "</dt>";
2756          }
2757          return $text . "\n";
2758      }
2759  	protected function _processDefListItems_callback_dd($matches) {
2760          $leading_line    = $matches[1];
2761          $marker_space    = $matches[2];
2762          $def            = $matches[3];
2763  
2764          if ($leading_line || preg_match('/\n{2,}/', $def)) {
2765              # Replace marker with the appropriate whitespace indentation
2766              $def = str_repeat(' ', strlen($marker_space)) . $def;
2767              $def = $this->runBlockGamut($this->outdent($def . "\n\n"));
2768              $def = "\n". $def ."\n";
2769          }
2770          else {
2771              $def = rtrim($def);
2772              $def = $this->runSpanGamut($this->outdent($def));
2773          }
2774  
2775          return "\n<dd>" . $def . "</dd>\n";
2776      }
2777  
2778  
2779  	protected function doFencedCodeBlocks($text) {
2780      #
2781      # Adding the fenced code block syntax to regular Markdown:
2782      #
2783      # ~~~
2784      # Code block
2785      # ~~~
2786      #
2787          $less_than_tab = $this->tab_width;
2788          
2789          $text = preg_replace_callback('{
2790                  (?:\n|\A)
2791                  # 1: Opening marker
2792                  (
2793                      (?:~{3,}|`{3,}) # 3 or more tildes/backticks.
2794                  )
2795                  [ ]*
2796                  (?:
2797                      \.?([-_:a-zA-Z0-9]+) # 2: standalone class name
2798                  |
2799                      '.$this->id_class_attr_catch_re.' # 3: Extra attributes
2800                  )?
2801                  [ ]* \n # Whitespace and newline following marker.
2802                  
2803                  # 4: Content
2804                  (
2805                      (?>
2806                          (?!\1 [ ]* \n)    # Not a closing marker.
2807                          .*\n+
2808                      )+
2809                  )
2810                  
2811                  # Closing marker.
2812                  \1 [ ]* (?= \n )
2813              }xm',
2814              array($this, '_doFencedCodeBlocks_callback'), $text);
2815  
2816          return $text;
2817      }
2818  	protected function _doFencedCodeBlocks_callback($matches) {
2819          $classname =& $matches[2];
2820          $attrs     =& $matches[3];
2821          $codeblock = $matches[4];
2822          $codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES);
2823          $codeblock = preg_replace_callback('/^\n+/',
2824              array($this, '_doFencedCodeBlocks_newlines'), $codeblock);
2825  
2826          if ($classname != "") {
2827              if ($classname{0} == '.')
2828                  $classname = substr($classname, 1);
2829              $attr_str = ' class="'.$this->code_class_prefix.$classname.'"';
2830          } else {
2831              $attr_str = $this->doExtraAttributes($this->code_attr_on_pre ? "pre" : "code", $attrs);
2832          }
2833          $pre_attr_str  = $this->code_attr_on_pre ? $attr_str : '';
2834          $code_attr_str = $this->code_attr_on_pre ? '' : $attr_str;
2835          $codeblock  = "<pre$pre_attr_str><code$code_attr_str>$codeblock</code></pre>";
2836          
2837          return "\n\n".$this->hashBlock($codeblock)."\n\n";
2838      }
2839  	protected function _doFencedCodeBlocks_newlines($matches) {
2840          return str_repeat("<br$this->empty_element_suffix", 
2841              strlen($matches[0]));
2842      }
2843  
2844  
2845      #
2846      # Redefining emphasis markers so that emphasis by underscore does not
2847      # work in the middle of a word.
2848      #
2849      protected $em_relist = array(
2850          ''  => '(?:(?<!\*)\*(?!\*)|(?<![a-zA-Z0-9_])_(?!_))(?![\.,:;]?\s)',
2851          '*' => '(?<![\s*])\*(?!\*)',
2852          '_' => '(?<![\s_])_(?![a-zA-Z0-9_])',
2853          );
2854      protected $strong_relist = array(
2855          ''   => '(?:(?<!\*)\*\*(?!\*)|(?<![a-zA-Z0-9_])__(?!_))(?![\.,:;]?\s)',
2856          '**' => '(?<![\s*])\*\*(?!\*)',
2857          '__' => '(?<![\s_])__(?![a-zA-Z0-9_])',
2858          );
2859      protected $em_strong_relist = array(
2860          ''    => '(?:(?<!\*)\*\*\*(?!\*)|(?<![a-zA-Z0-9_])___(?!_))(?![\.,:;]?\s)',
2861          '***' => '(?<![\s*])\*\*\*(?!\*)',
2862          '___' => '(?<![\s_])___(?![a-zA-Z0-9_])',
2863          );
2864  
2865  
2866  	protected function formParagraphs($text) {
2867      #
2868      #    Params:
2869      #        $text - string to process with html <p> tags
2870      #
2871          # Strip leading and trailing lines:
2872          $text = preg_replace('/\A\n+|\n+\z/', '', $text);
2873          
2874          $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
2875  
2876          #
2877          # Wrap <p> tags and unhashify HTML blocks
2878          #
2879          foreach ($grafs as $key => $value) {
2880              $value = trim($this->runSpanGamut($value));
2881              
2882              # Check if this should be enclosed in a paragraph.
2883              # Clean tag hashes & block tag hashes are left alone.
2884              $is_p = !preg_match('/^B\x1A[0-9]+B|^C\x1A[0-9]+C$/', $value);
2885              
2886              if ($is_p) {
2887                  $value = "<p>$value</p>";
2888              }
2889              $grafs[$key] = $value;
2890          }
2891          
2892          # Join grafs in one text, then unhash HTML tags. 
2893          $text = implode("\n\n", $grafs);
2894          
2895          # Finish by removing any tag hashes still present in $text.
2896          $text = $this->unhash($text);
2897          
2898          return $text;
2899      }
2900      
2901      
2902      ### Footnotes
2903      
2904  	protected function stripFootnotes($text) {
2905      #
2906      # Strips link definitions from text, stores the URLs and titles in
2907      # hash references.
2908      #
2909          $less_than_tab = $this->tab_width - 1;
2910  
2911          # Link defs are in the form: [^id]: url "optional title"
2912          $text = preg_replace_callback('{
2913              ^[ ]{0,'.$less_than_tab.'}\[\^(.+?)\][ ]?:    # note_id = $1
2914                [ ]*
2915                \n?                    # maybe *one* newline
2916              (                        # text = $2 (no blank lines allowed)
2917                  (?:                    
2918                      .+                # actual text
2919                  |
2920                      \n                # newlines but 
2921                      (?!\[.+?\][ ]?:\s)# negative lookahead for footnote or link definition marker.
2922                      (?!\n+[ ]{0,3}\S)# ensure line is not blank and followed 
2923                                      # by non-indented content
2924                  )*
2925              )        
2926              }xm',
2927              array($this, '_stripFootnotes_callback'),
2928              $text);
2929          return $text;
2930      }
2931  	protected function _stripFootnotes_callback($matches) {
2932          $note_id = $this->fn_id_prefix . $matches[1];
2933          $this->footnotes[$note_id] = $this->outdent($matches[2]);
2934          return ''; # String that will replace the block
2935      }
2936  
2937  
2938  	protected function doFootnotes($text) {
2939      #
2940      # Replace footnote references in $text [^id] with a special text-token 
2941      # which will be replaced by the actual footnote marker in appendFootnotes.
2942      #
2943          if (!$this->in_anchor) {
2944              $text = preg_replace('{\[\^(.+?)\]}', "F\x1Afn:\\1\x1A:", $text);
2945          }
2946          return $text;
2947      }
2948  
2949      
2950  	protected function appendFootnotes($text) {
2951      #
2952      # Append footnote list to text.
2953      #
2954          $text = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}', 
2955              array($this, '_appendFootnotes_callback'), $text);
2956      
2957          if (!empty($this->footnotes_ordered)) {
2958              $text .= "\n\n";
2959              $text .= "<div class=\"footnotes\">\n";
2960              $text .= "<hr". $this->empty_element_suffix ."\n";
2961              $text .= "<ol>\n\n";
2962  
2963              $attr = "";
2964              if ($this->fn_backlink_class != "") {
2965                  $class = $this->fn_backlink_class;
2966                  $class = $this->encodeAttribute($class);
2967                  $attr .= " class=\"$class\"";
2968              }
2969              if ($this->fn_backlink_title != "") {
2970                  $title = $this->fn_backlink_title;
2971                  $title = $this->encodeAttribute($title);
2972                  $attr .= " title=\"$title\"";
2973              }
2974              $num = 0;
2975              
2976              while (!empty($this->footnotes_ordered)) {
2977                  $footnote = reset($this->footnotes_ordered);
2978                  $note_id = key($this->footnotes_ordered);
2979                  unset($this->footnotes_ordered[$note_id]);
2980                  $ref_count = $this->footnotes_ref_count[$note_id];
2981                  unset($this->footnotes_ref_count[$note_id]);
2982                  unset($this->footnotes[$note_id]);
2983                  
2984                  $footnote .= "\n"; # Need to append newline before parsing.
2985                  $footnote = $this->runBlockGamut("$footnote\n");                
2986                  $footnote = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}', 
2987                      array($this, '_appendFootnotes_callback'), $footnote);
2988                  
2989                  $attr = str_replace("%%", ++$num, $attr);
2990                  $note_id = $this->encodeAttribute($note_id);
2991  
2992                  # Prepare backlink, multiple backlinks if multiple references
2993                  $backlink = "<a href=\"#fnref:$note_id\"$attr>&#8617;</a>";
2994                  for ($ref_num = 2; $ref_num <= $ref_count; ++$ref_num) {
2995                      $backlink .= " <a href=\"#fnref$ref_num:$note_id\"$attr>&#8617;</a>";
2996                  }
2997                  # Add backlink to last paragraph; create new paragraph if needed.
2998                  if (preg_match('{</p>$}', $footnote)) {
2999                      $footnote = substr($footnote, 0, -4) . "&#160;$backlink</p>";
3000                  } else {
3001                      $footnote .= "\n\n<p>$backlink</p>";
3002                  }
3003                  
3004                  $text .= "<li id=\"fn:$note_id\">\n";
3005                  $text .= $footnote . "\n";
3006                  $text .= "</li>\n\n";
3007              }
3008              
3009              $text .= "</ol>\n";
3010              $text .= "</div>";
3011          }
3012          return $text;
3013      }
3014  	protected function _appendFootnotes_callback($matches) {
3015          $node_id = $this->fn_id_prefix . $matches[1];
3016          
3017          # Create footnote marker only if it has a corresponding footnote *and*
3018          # the footnote hasn't been used by another marker.
3019          if (isset($this->footnotes[$node_id])) {
3020              $num =& $this->footnotes_numbers[$node_id];
3021              if (!isset($num)) {
3022                  # Transfer footnote content to the ordered list and give it its
3023                  # number
3024                  $this->footnotes_ordered[$node_id] = $this->footnotes[$node_id];
3025                  $this->footnotes_ref_count[$node_id] = 1;
3026                  $num = $this->footnote_counter++;
3027                  $ref_count_mark = '';
3028              } else {
3029                  $ref_count_mark = $this->footnotes_ref_count[$node_id] += 1;
3030              }
3031  
3032              $attr = "";
3033              if ($this->fn_link_class != "") {
3034                  $class = $this->fn_link_class;
3035                  $class = $this->encodeAttribute($class);
3036                  $attr .= " class=\"$class\"";
3037              }
3038              if ($this->fn_link_title != "") {
3039                  $title = $this->fn_link_title;
3040                  $title = $this->encodeAttribute($title);
3041                  $attr .= " title=\"$title\"";
3042              }
3043              
3044              $attr = str_replace("%%", $num, $attr);
3045              $node_id = $this->encodeAttribute($node_id);
3046              
3047              return
3048                  "<sup id=\"fnref$ref_count_mark:$node_id\">".
3049                  "<a href=\"#fn:$node_id\"$attr>$num</a>".
3050                  "</sup>";
3051          }
3052          
3053          return "[^".$matches[1]."]";
3054      }
3055          
3056      
3057      ### Abbreviations ###
3058      
3059  	protected function stripAbbreviations($text) {
3060      #
3061      # Strips abbreviations from text, stores titles in hash references.
3062      #
3063          $less_than_tab = $this->tab_width - 1;
3064  
3065          # Link defs are in the form: [id]*: url "optional title"
3066          $text = preg_replace_callback('{
3067              ^[ ]{0,'.$less_than_tab.'}\*\[(.+?)\][ ]?:    # abbr_id = $1
3068              (.*)                    # text = $2 (no blank lines allowed)    
3069              }xm',
3070              array($this, '_stripAbbreviations_callback'),
3071              $text);
3072          return $text;
3073      }
3074  	protected function _stripAbbreviations_callback($matches) {
3075          $abbr_word = $matches[1];
3076          $abbr_desc = $matches[2];
3077          if ($this->abbr_word_re)
3078              $this->abbr_word_re .= '|';
3079          $this->abbr_word_re .= preg_quote($abbr_word);
3080          $this->abbr_desciptions[$abbr_word] = trim($abbr_desc);
3081          return ''; # String that will replace the block
3082      }
3083      
3084      
3085  	protected function doAbbreviations($text) {
3086      #
3087      # Find defined abbreviations in text and wrap them in <abbr> elements.
3088      #
3089          if ($this->abbr_word_re) {
3090              // cannot use the /x modifier because abbr_word_re may 
3091              // contain significant spaces:
3092              $text = preg_replace_callback('{'.
3093                  '(?<![\w\x1A])'.
3094                  '(?:'.$this->abbr_word_re.')'.
3095                  '(?![\w\x1A])'.
3096                  '}', 
3097                  array($this, '_doAbbreviations_callback'), $text);
3098          }
3099          return $text;
3100      }
3101  	protected function _doAbbreviations_callback($matches) {
3102          $abbr = $matches[0];
3103          if (isset($this->abbr_desciptions[$abbr])) {
3104              $desc = $this->abbr_desciptions[$abbr];
3105              if (empty($desc)) {
3106                  return $this->hashPart("<abbr>$abbr</abbr>");
3107              } else {
3108                  $desc = $this->encodeAttribute($desc);
3109                  return $this->hashPart("<abbr title=\"$desc\">$abbr</abbr>");
3110              }
3111          } else {
3112              return $matches[0];
3113          }
3114      }
3115  
3116  }


Generated: Fri Nov 28 20:08:37 2014 Cross-referenced by PHPXref 0.7.1