[ Index ]

PHP Cross Reference of vtigercrm-6.1.0

title

Body

[close]

/libraries/htmlpurifier/library/HTMLPurifier/Lexer/ -> PH5P.php (source)

   1  <?php
   2  
   3  /**
   4   * Experimental HTML5-based parser using Jeroen van der Meer's PH5P library.
   5   * Occupies space in the HTML5 pseudo-namespace, which may cause conflicts.
   6   * 
   7   * @note
   8   *    Recent changes to PHP's DOM extension have resulted in some fatal
   9   *    error conditions with the original version of PH5P. Pending changes,
  10   *    this lexer will punt to DirectLex if DOM throughs an exception.
  11   */
  12  
  13  class HTMLPurifier_Lexer_PH5P extends HTMLPurifier_Lexer_DOMLex {
  14      
  15      public function tokenizeHTML($html, $config, $context) {
  16          $new_html = $this->normalize($html, $config, $context);
  17          $new_html = $this->wrapHTML($new_html, $config, $context);
  18          try {
  19              $parser = new HTML5($new_html);
  20              $doc = $parser->save();
  21          } catch (DOMException $e) {
  22              // Uh oh, it failed. Punt to DirectLex.
  23              $lexer = new HTMLPurifier_Lexer_DirectLex();
  24              $context->register('PH5PError', $e); // save the error, so we can detect it
  25              return $lexer->tokenizeHTML($html, $config, $context); // use original HTML
  26          }
  27          $tokens = array();
  28          $this->tokenizeDOM(
  29              $doc->getElementsByTagName('html')->item(0)-> // <html>
  30                    getElementsByTagName('body')->item(0)-> //   <body>
  31                    getElementsByTagName('div')->item(0)    //     <div>
  32              , $tokens);
  33          return $tokens;
  34      }
  35      
  36  }
  37  
  38  /*
  39  
  40  Copyright 2007 Jeroen van der Meer <http://jero.net/> 
  41  
  42  Permission is hereby granted, free of charge, to any person obtaining a 
  43  copy of this software and associated documentation files (the 
  44  "Software"), to deal in the Software without restriction, including 
  45  without limitation the rights to use, copy, modify, merge, publish, 
  46  distribute, sublicense, and/or sell copies of the Software, and to 
  47  permit persons to whom the Software is furnished to do so, subject to 
  48  the following conditions: 
  49  
  50  The above copyright notice and this permission notice shall be included 
  51  in all copies or substantial portions of the Software. 
  52  
  53  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 
  54  OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
  55  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 
  56  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
  57  CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
  58  TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
  59  SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 
  60  
  61  */
  62  
  63  class HTML5 {
  64      private $data;
  65      private $char;
  66      private $EOF;
  67      private $state;
  68      private $tree;
  69      private $token;
  70      private $content_model;
  71      private $escape = false;
  72      private $entities = array('AElig;','AElig','AMP;','AMP','Aacute;','Aacute',
  73      'Acirc;','Acirc','Agrave;','Agrave','Alpha;','Aring;','Aring','Atilde;',
  74      'Atilde','Auml;','Auml','Beta;','COPY;','COPY','Ccedil;','Ccedil','Chi;',
  75      'Dagger;','Delta;','ETH;','ETH','Eacute;','Eacute','Ecirc;','Ecirc','Egrave;',
  76      'Egrave','Epsilon;','Eta;','Euml;','Euml','GT;','GT','Gamma;','Iacute;',
  77      'Iacute','Icirc;','Icirc','Igrave;','Igrave','Iota;','Iuml;','Iuml','Kappa;',
  78      'LT;','LT','Lambda;','Mu;','Ntilde;','Ntilde','Nu;','OElig;','Oacute;',
  79      'Oacute','Ocirc;','Ocirc','Ograve;','Ograve','Omega;','Omicron;','Oslash;',
  80      'Oslash','Otilde;','Otilde','Ouml;','Ouml','Phi;','Pi;','Prime;','Psi;',
  81      'QUOT;','QUOT','REG;','REG','Rho;','Scaron;','Sigma;','THORN;','THORN',
  82      'TRADE;','Tau;','Theta;','Uacute;','Uacute','Ucirc;','Ucirc','Ugrave;',
  83      'Ugrave','Upsilon;','Uuml;','Uuml','Xi;','Yacute;','Yacute','Yuml;','Zeta;',
  84      'aacute;','aacute','acirc;','acirc','acute;','acute','aelig;','aelig',
  85      'agrave;','agrave','alefsym;','alpha;','amp;','amp','and;','ang;','apos;',
  86      'aring;','aring','asymp;','atilde;','atilde','auml;','auml','bdquo;','beta;',
  87      'brvbar;','brvbar','bull;','cap;','ccedil;','ccedil','cedil;','cedil',
  88      'cent;','cent','chi;','circ;','clubs;','cong;','copy;','copy','crarr;',
  89      'cup;','curren;','curren','dArr;','dagger;','darr;','deg;','deg','delta;',
  90      'diams;','divide;','divide','eacute;','eacute','ecirc;','ecirc','egrave;',
  91      'egrave','empty;','emsp;','ensp;','epsilon;','equiv;','eta;','eth;','eth',
  92      'euml;','euml','euro;','exist;','fnof;','forall;','frac12;','frac12',
  93      'frac14;','frac14','frac34;','frac34','frasl;','gamma;','ge;','gt;','gt',
  94      'hArr;','harr;','hearts;','hellip;','iacute;','iacute','icirc;','icirc',
  95      'iexcl;','iexcl','igrave;','igrave','image;','infin;','int;','iota;',
  96      'iquest;','iquest','isin;','iuml;','iuml','kappa;','lArr;','lambda;','lang;',
  97      'laquo;','laquo','larr;','lceil;','ldquo;','le;','lfloor;','lowast;','loz;',
  98      'lrm;','lsaquo;','lsquo;','lt;','lt','macr;','macr','mdash;','micro;','micro',
  99      'middot;','middot','minus;','mu;','nabla;','nbsp;','nbsp','ndash;','ne;',
 100      'ni;','not;','not','notin;','nsub;','ntilde;','ntilde','nu;','oacute;',
 101      'oacute','ocirc;','ocirc','oelig;','ograve;','ograve','oline;','omega;',
 102      'omicron;','oplus;','or;','ordf;','ordf','ordm;','ordm','oslash;','oslash',
 103      'otilde;','otilde','otimes;','ouml;','ouml','para;','para','part;','permil;',
 104      'perp;','phi;','pi;','piv;','plusmn;','plusmn','pound;','pound','prime;',
 105      'prod;','prop;','psi;','quot;','quot','rArr;','radic;','rang;','raquo;',
 106      'raquo','rarr;','rceil;','rdquo;','real;','reg;','reg','rfloor;','rho;',
 107      'rlm;','rsaquo;','rsquo;','sbquo;','scaron;','sdot;','sect;','sect','shy;',
 108      'shy','sigma;','sigmaf;','sim;','spades;','sub;','sube;','sum;','sup1;',
 109      'sup1','sup2;','sup2','sup3;','sup3','sup;','supe;','szlig;','szlig','tau;',
 110      'there4;','theta;','thetasym;','thinsp;','thorn;','thorn','tilde;','times;',
 111      'times','trade;','uArr;','uacute;','uacute','uarr;','ucirc;','ucirc',
 112      'ugrave;','ugrave','uml;','uml','upsih;','upsilon;','uuml;','uuml','weierp;',
 113      'xi;','yacute;','yacute','yen;','yen','yuml;','yuml','zeta;','zwj;','zwnj;');
 114  
 115      const PCDATA    = 0;
 116      const RCDATA    = 1;
 117      const CDATA     = 2;
 118      const PLAINTEXT = 3;
 119  
 120      const DOCTYPE  = 0;
 121      const STARTTAG = 1;
 122      const ENDTAG   = 2;
 123      const COMMENT  = 3;
 124      const CHARACTR = 4;
 125      const EOF      = 5;
 126  
 127      public function __construct($data) {
 128          $data = str_replace("\r\n", "\n", $data);
 129          $data = str_replace("\r", null, $data);
 130  
 131          $this->data = $data;
 132          $this->char = -1;
 133          $this->EOF  = strlen($data);
 134          $this->tree = new HTML5TreeConstructer;
 135          $this->content_model = self::PCDATA;
 136  
 137          $this->state = 'data';
 138  
 139          while($this->state !== null) {
 140              $this->{$this->state.'State'}();
 141          }
 142      }
 143  
 144      public function save() {
 145          return $this->tree->save();
 146      }
 147  
 148      private function char() {
 149          return ($this->char < $this->EOF)
 150              ? $this->data[$this->char]
 151              : false;
 152      }
 153  
 154      private function character($s, $l = 0) {
 155          if($s + $l < $this->EOF) {
 156              if($l === 0) {
 157                  return $this->data[$s];
 158              } else {
 159                  return substr($this->data, $s, $l);
 160              }
 161          }
 162      }
 163  
 164      private function characters($char_class, $start) {
 165          return preg_replace('#^(['.$char_class.']+).*#s', '\\1', substr($this->data, $start));
 166      }
 167  
 168      private function dataState() {
 169          // Consume the next input character
 170          $this->char++;
 171          $char = $this->char();
 172  
 173          if($char === '&' && ($this->content_model === self::PCDATA || $this->content_model === self::RCDATA)) {
 174              /* U+0026 AMPERSAND (&)
 175              When the content model flag is set to one of the PCDATA or RCDATA
 176              states: switch to the entity data state. Otherwise: treat it as per
 177              the "anything else"    entry below. */
 178              $this->state = 'entityData';
 179  
 180          } elseif($char === '-') {
 181              /* If the content model flag is set to either the RCDATA state or
 182              the CDATA state, and the escape flag is false, and there are at
 183              least three characters before this one in the input stream, and the
 184              last four characters in the input stream, including this one, are
 185              U+003C LESS-THAN SIGN, U+0021 EXCLAMATION MARK, U+002D HYPHEN-MINUS,
 186              and U+002D HYPHEN-MINUS ("<!--"), then set the escape flag to true. */
 187              if(($this->content_model === self::RCDATA || $this->content_model ===
 188              self::CDATA) && $this->escape === false &&
 189              $this->char >= 3 && $this->character($this->char - 4, 4) === '<!--') {
 190                  $this->escape = true;
 191              }
 192  
 193              /* In any case, emit the input character as a character token. Stay
 194              in the data state. */
 195              $this->emitToken(array(
 196                  'type' => self::CHARACTR,
 197                  'data' => $char
 198              ));
 199  
 200          /* U+003C LESS-THAN SIGN (<) */
 201          } elseif($char === '<' && ($this->content_model === self::PCDATA ||
 202          (($this->content_model === self::RCDATA ||
 203          $this->content_model === self::CDATA) && $this->escape === false))) {
 204              /* When the content model flag is set to the PCDATA state: switch
 205              to the tag open state.
 206  
 207              When the content model flag is set to either the RCDATA state or
 208              the CDATA state and the escape flag is false: switch to the tag
 209              open state.
 210  
 211              Otherwise: treat it as per the "anything else" entry below. */
 212              $this->state = 'tagOpen';
 213  
 214          /* U+003E GREATER-THAN SIGN (>) */
 215          } elseif($char === '>') {
 216              /* If the content model flag is set to either the RCDATA state or
 217              the CDATA state, and the escape flag is true, and the last three
 218              characters in the input stream including this one are U+002D
 219              HYPHEN-MINUS, U+002D HYPHEN-MINUS, U+003E GREATER-THAN SIGN ("-->"),
 220              set the escape flag to false. */
 221              if(($this->content_model === self::RCDATA ||
 222              $this->content_model === self::CDATA) && $this->escape === true &&
 223              $this->character($this->char, 3) === '-->') {
 224                  $this->escape = false;
 225              }
 226  
 227              /* In any case, emit the input character as a character token.
 228              Stay in the data state. */
 229              $this->emitToken(array(
 230                  'type' => self::CHARACTR,
 231                  'data' => $char
 232              ));
 233  
 234          } elseif($this->char === $this->EOF) {
 235              /* EOF
 236              Emit an end-of-file token. */
 237              $this->EOF();
 238  
 239          } elseif($this->content_model === self::PLAINTEXT) {
 240              /* When the content model flag is set to the PLAINTEXT state
 241              THIS DIFFERS GREATLY FROM THE SPEC: Get the remaining characters of
 242              the text and emit it as a character token. */
 243              $this->emitToken(array(
 244                  'type' => self::CHARACTR,
 245                  'data' => substr($this->data, $this->char)
 246              ));
 247  
 248              $this->EOF();
 249  
 250          } else {
 251              /* Anything else
 252              THIS DIFFERS GREATLY FROM THE SPEC: Get as many character that
 253              otherwise would also be treated as a character token and emit it
 254              as a single character token. Stay in the data state. */
 255              $len  = strcspn($this->data, '<&', $this->char);
 256              $char = substr($this->data, $this->char, $len);
 257              $this->char += $len - 1;
 258  
 259              $this->emitToken(array(
 260                  'type' => self::CHARACTR,
 261                  'data' => $char
 262              ));
 263  
 264              $this->state = 'data';
 265          }
 266      }
 267  
 268      private function entityDataState() {
 269          // Attempt to consume an entity.
 270          $entity = $this->entity();
 271  
 272          // If nothing is returned, emit a U+0026 AMPERSAND character token.
 273          // Otherwise, emit the character token that was returned.
 274          $char = (!$entity) ? '&' : $entity;
 275          $this->emitToken(array(
 276              'type' => self::CHARACTR,
 277              'data' => $char
 278          ));
 279  
 280          // Finally, switch to the data state.
 281          $this->state = 'data';
 282      }
 283  
 284      private function tagOpenState() {
 285          switch($this->content_model) {
 286              case self::RCDATA:
 287              case self::CDATA:
 288                  /* If the next input character is a U+002F SOLIDUS (/) character,
 289                  consume it and switch to the close tag open state. If the next
 290                  input character is not a U+002F SOLIDUS (/) character, emit a
 291                  U+003C LESS-THAN SIGN character token and switch to the data
 292                  state to process the next input character. */
 293                  if($this->character($this->char + 1) === '/') {
 294                      $this->char++;
 295                      $this->state = 'closeTagOpen';
 296  
 297                  } else {
 298                      $this->emitToken(array(
 299                          'type' => self::CHARACTR,
 300                          'data' => '<'
 301                      ));
 302  
 303                      $this->state = 'data';
 304                  }
 305              break;
 306  
 307              case self::PCDATA:
 308                  // If the content model flag is set to the PCDATA state
 309                  // Consume the next input character:
 310                  $this->char++;
 311                  $char = $this->char();
 312  
 313                  if($char === '!') {
 314                      /* U+0021 EXCLAMATION MARK (!)
 315                      Switch to the markup declaration open state. */
 316                      $this->state = 'markupDeclarationOpen';
 317  
 318                  } elseif($char === '/') {
 319                      /* U+002F SOLIDUS (/)
 320                      Switch to the close tag open state. */
 321                      $this->state = 'closeTagOpen';
 322  
 323                  } elseif(preg_match('/^[A-Za-z]$/', $char)) {
 324                      /* U+0041 LATIN LETTER A through to U+005A LATIN LETTER Z
 325                      Create a new start tag token, set its tag name to the lowercase
 326                      version of the input character (add 0x0020 to the character's code
 327                      point), then switch to the tag name state. (Don't emit the token
 328                      yet; further details will be filled in before it is emitted.) */
 329                      $this->token = array(
 330                          'name'  => strtolower($char),
 331                          'type'  => self::STARTTAG,
 332                          'attr'  => array()
 333                      );
 334  
 335                      $this->state = 'tagName';
 336  
 337                  } elseif($char === '>') {
 338                      /* U+003E GREATER-THAN SIGN (>)
 339                      Parse error. Emit a U+003C LESS-THAN SIGN character token and a
 340                      U+003E GREATER-THAN SIGN character token. Switch to the data state. */
 341                      $this->emitToken(array(
 342                          'type' => self::CHARACTR,
 343                          'data' => '<>'
 344                      ));
 345  
 346                      $this->state = 'data';
 347  
 348                  } elseif($char === '?') {
 349                      /* U+003F QUESTION MARK (?)
 350                      Parse error. Switch to the bogus comment state. */
 351                      $this->state = 'bogusComment';
 352  
 353                  } else {
 354                      /* Anything else
 355                      Parse error. Emit a U+003C LESS-THAN SIGN character token and
 356                      reconsume the current input character in the data state. */
 357                      $this->emitToken(array(
 358                          'type' => self::CHARACTR,
 359                          'data' => '<'
 360                      ));
 361  
 362                      $this->char--;
 363                      $this->state = 'data';
 364                  }
 365              break;
 366          }
 367      }
 368  
 369      private function closeTagOpenState() {
 370          $next_node = strtolower($this->characters('A-Za-z', $this->char + 1));
 371          $the_same = count($this->tree->stack) > 0 && $next_node === end($this->tree->stack)->nodeName;
 372  
 373          if(($this->content_model === self::RCDATA || $this->content_model === self::CDATA) &&
 374          (!$the_same || ($the_same && (!preg_match('/[\t\n\x0b\x0c >\/]/',
 375          $this->character($this->char + 1 + strlen($next_node))) || $this->EOF === $this->char)))) {
 376              /* If the content model flag is set to the RCDATA or CDATA states then
 377              examine the next few characters. If they do not match the tag name of
 378              the last start tag token emitted (case insensitively), or if they do but
 379              they are not immediately followed by one of the following characters:
 380                  * U+0009 CHARACTER TABULATION
 381                  * U+000A LINE FEED (LF)
 382                  * U+000B LINE TABULATION
 383                  * U+000C FORM FEED (FF)
 384                  * U+0020 SPACE
 385                  * U+003E GREATER-THAN SIGN (>)
 386                  * U+002F SOLIDUS (/)
 387                  * EOF
 388              ...then there is a parse error. Emit a U+003C LESS-THAN SIGN character
 389              token, a U+002F SOLIDUS character token, and switch to the data state
 390              to process the next input character. */
 391              $this->emitToken(array(
 392                  'type' => self::CHARACTR,
 393                  'data' => '</'
 394              ));
 395  
 396              $this->state = 'data';
 397  
 398          } else {
 399              /* Otherwise, if the content model flag is set to the PCDATA state,
 400              or if the next few characters do match that tag name, consume the
 401              next input character: */
 402              $this->char++;
 403              $char = $this->char();
 404  
 405              if(preg_match('/^[A-Za-z]$/', $char)) {
 406                  /* U+0041 LATIN LETTER A through to U+005A LATIN LETTER Z
 407                  Create a new end tag token, set its tag name to the lowercase version
 408                  of the input character (add 0x0020 to the character's code point), then
 409                  switch to the tag name state. (Don't emit the token yet; further details
 410                  will be filled in before it is emitted.) */
 411                  $this->token = array(
 412                      'name'  => strtolower($char),
 413                      'type'  => self::ENDTAG
 414                  );
 415  
 416                  $this->state = 'tagName';
 417  
 418              } elseif($char === '>') {
 419                  /* U+003E GREATER-THAN SIGN (>)
 420                  Parse error. Switch to the data state. */
 421                  $this->state = 'data';
 422  
 423              } elseif($this->char === $this->EOF) {
 424                  /* EOF
 425                  Parse error. Emit a U+003C LESS-THAN SIGN character token and a U+002F
 426                  SOLIDUS character token. Reconsume the EOF character in the data state. */
 427                  $this->emitToken(array(
 428                      'type' => self::CHARACTR,
 429                      'data' => '</'
 430                  ));
 431  
 432                  $this->char--;
 433                  $this->state = 'data';
 434  
 435              } else {
 436                  /* Parse error. Switch to the bogus comment state. */
 437                  $this->state = 'bogusComment';
 438              }
 439          }
 440      }
 441  
 442      private function tagNameState() {
 443          // Consume the next input character:
 444          $this->char++;
 445          $char = $this->character($this->char);
 446  
 447          if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
 448              /* U+0009 CHARACTER TABULATION
 449              U+000A LINE FEED (LF)
 450              U+000B LINE TABULATION
 451              U+000C FORM FEED (FF)
 452              U+0020 SPACE
 453              Switch to the before attribute name state. */
 454              $this->state = 'beforeAttributeName';
 455  
 456          } elseif($char === '>') {
 457              /* U+003E GREATER-THAN SIGN (>)
 458              Emit the current tag token. Switch to the data state. */
 459              $this->emitToken($this->token);
 460              $this->state = 'data';
 461  
 462          } elseif($this->char === $this->EOF) {
 463              /* EOF
 464              Parse error. Emit the current tag token. Reconsume the EOF
 465              character in the data state. */
 466              $this->emitToken($this->token);
 467  
 468              $this->char--;
 469              $this->state = 'data';
 470  
 471          } elseif($char === '/') {
 472              /* U+002F SOLIDUS (/)
 473              Parse error unless this is a permitted slash. Switch to the before
 474              attribute name state. */
 475              $this->state = 'beforeAttributeName';
 476  
 477          } else {
 478              /* Anything else
 479              Append the current input character to the current tag token's tag name.
 480              Stay in the tag name state. */
 481              $this->token['name'] .= strtolower($char);
 482              $this->state = 'tagName';
 483          }
 484      }
 485  
 486      private function beforeAttributeNameState() {
 487          // Consume the next input character:
 488          $this->char++;
 489          $char = $this->character($this->char);
 490  
 491          if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
 492              /* U+0009 CHARACTER TABULATION
 493              U+000A LINE FEED (LF)
 494              U+000B LINE TABULATION
 495              U+000C FORM FEED (FF)
 496              U+0020 SPACE
 497              Stay in the before attribute name state. */
 498              $this->state = 'beforeAttributeName';
 499  
 500          } elseif($char === '>') {
 501              /* U+003E GREATER-THAN SIGN (>)
 502              Emit the current tag token. Switch to the data state. */
 503              $this->emitToken($this->token);
 504              $this->state = 'data';
 505  
 506          } elseif($char === '/') {
 507              /* U+002F SOLIDUS (/)
 508              Parse error unless this is a permitted slash. Stay in the before
 509              attribute name state. */
 510              $this->state = 'beforeAttributeName';
 511  
 512          } elseif($this->char === $this->EOF) {
 513              /* EOF
 514              Parse error. Emit the current tag token. Reconsume the EOF
 515              character in the data state. */
 516              $this->emitToken($this->token);
 517  
 518              $this->char--;
 519              $this->state = 'data';
 520  
 521          } else {
 522              /* Anything else
 523              Start a new attribute in the current tag token. Set that attribute's
 524              name to the current input character, and its value to the empty string.
 525              Switch to the attribute name state. */
 526              $this->token['attr'][] = array(
 527                  'name'  => strtolower($char),
 528                  'value' => null
 529              );
 530  
 531              $this->state = 'attributeName';
 532          }
 533      }
 534  
 535      private function attributeNameState() {
 536          // Consume the next input character:
 537          $this->char++;
 538          $char = $this->character($this->char);
 539  
 540          if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
 541              /* U+0009 CHARACTER TABULATION
 542              U+000A LINE FEED (LF)
 543              U+000B LINE TABULATION
 544              U+000C FORM FEED (FF)
 545              U+0020 SPACE
 546              Stay in the before attribute name state. */
 547              $this->state = 'afterAttributeName';
 548  
 549          } elseif($char === '=') {
 550              /* U+003D EQUALS SIGN (=)
 551              Switch to the before attribute value state. */
 552              $this->state = 'beforeAttributeValue';
 553  
 554          } elseif($char === '>') {
 555              /* U+003E GREATER-THAN SIGN (>)
 556              Emit the current tag token. Switch to the data state. */
 557              $this->emitToken($this->token);
 558              $this->state = 'data';
 559  
 560          } elseif($char === '/' && $this->character($this->char + 1) !== '>') {
 561              /* U+002F SOLIDUS (/)
 562              Parse error unless this is a permitted slash. Switch to the before
 563              attribute name state. */
 564              $this->state = 'beforeAttributeName';
 565  
 566          } elseif($this->char === $this->EOF) {
 567              /* EOF
 568              Parse error. Emit the current tag token. Reconsume the EOF
 569              character in the data state. */
 570              $this->emitToken($this->token);
 571  
 572              $this->char--;
 573              $this->state = 'data';
 574  
 575          } else {
 576              /* Anything else
 577              Append the current input character to the current attribute's name.
 578              Stay in the attribute name state. */
 579              $last = count($this->token['attr']) - 1;
 580              $this->token['attr'][$last]['name'] .= strtolower($char);
 581  
 582              $this->state = 'attributeName';
 583          }
 584      }
 585  
 586      private function afterAttributeNameState() {
 587          // Consume the next input character:
 588          $this->char++;
 589          $char = $this->character($this->char);
 590  
 591          if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
 592              /* U+0009 CHARACTER TABULATION
 593              U+000A LINE FEED (LF)
 594              U+000B LINE TABULATION
 595              U+000C FORM FEED (FF)
 596              U+0020 SPACE
 597              Stay in the after attribute name state. */
 598              $this->state = 'afterAttributeName';
 599  
 600          } elseif($char === '=') {
 601              /* U+003D EQUALS SIGN (=)
 602              Switch to the before attribute value state. */
 603              $this->state = 'beforeAttributeValue';
 604  
 605          } elseif($char === '>') {
 606              /* U+003E GREATER-THAN SIGN (>)
 607              Emit the current tag token. Switch to the data state. */
 608              $this->emitToken($this->token);
 609              $this->state = 'data';
 610  
 611          } elseif($char === '/' && $this->character($this->char + 1) !== '>') {
 612              /* U+002F SOLIDUS (/)
 613              Parse error unless this is a permitted slash. Switch to the
 614              before attribute name state. */
 615              $this->state = 'beforeAttributeName';
 616  
 617          } elseif($this->char === $this->EOF) {
 618              /* EOF
 619              Parse error. Emit the current tag token. Reconsume the EOF
 620              character in the data state. */
 621              $this->emitToken($this->token);
 622  
 623              $this->char--;
 624              $this->state = 'data';
 625  
 626          } else {
 627              /* Anything else
 628              Start a new attribute in the current tag token. Set that attribute's
 629              name to the current input character, and its value to the empty string.
 630              Switch to the attribute name state. */
 631              $this->token['attr'][] = array(
 632                  'name'  => strtolower($char),
 633                  'value' => null
 634              );
 635  
 636              $this->state = 'attributeName';
 637          }
 638      }
 639  
 640      private function beforeAttributeValueState() {
 641          // Consume the next input character:
 642          $this->char++;
 643          $char = $this->character($this->char);
 644  
 645          if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
 646              /* U+0009 CHARACTER TABULATION
 647              U+000A LINE FEED (LF)
 648              U+000B LINE TABULATION
 649              U+000C FORM FEED (FF)
 650              U+0020 SPACE
 651              Stay in the before attribute value state. */
 652              $this->state = 'beforeAttributeValue';
 653  
 654          } elseif($char === '"') {
 655              /* U+0022 QUOTATION MARK (")
 656              Switch to the attribute value (double-quoted) state. */
 657              $this->state = 'attributeValueDoubleQuoted';
 658  
 659          } elseif($char === '&') {
 660              /* U+0026 AMPERSAND (&)
 661              Switch to the attribute value (unquoted) state and reconsume
 662              this input character. */
 663              $this->char--;
 664              $this->state = 'attributeValueUnquoted';
 665  
 666          } elseif($char === '\'') {
 667              /* U+0027 APOSTROPHE (')
 668              Switch to the attribute value (single-quoted) state. */
 669              $this->state = 'attributeValueSingleQuoted';
 670  
 671          } elseif($char === '>') {
 672              /* U+003E GREATER-THAN SIGN (>)
 673              Emit the current tag token. Switch to the data state. */
 674              $this->emitToken($this->token);
 675              $this->state = 'data';
 676  
 677          } else {
 678              /* Anything else
 679              Append the current input character to the current attribute's value.
 680              Switch to the attribute value (unquoted) state. */
 681              $last = count($this->token['attr']) - 1;
 682              $this->token['attr'][$last]['value'] .= $char;
 683  
 684              $this->state = 'attributeValueUnquoted';
 685          }
 686      }
 687  
 688      private function attributeValueDoubleQuotedState() {
 689          // Consume the next input character:
 690          $this->char++;
 691          $char = $this->character($this->char);
 692  
 693          if($char === '"') {
 694              /* U+0022 QUOTATION MARK (")
 695              Switch to the before attribute name state. */
 696              $this->state = 'beforeAttributeName';
 697  
 698          } elseif($char === '&') {
 699              /* U+0026 AMPERSAND (&)
 700              Switch to the entity in attribute value state. */
 701              $this->entityInAttributeValueState('double');
 702  
 703          } elseif($this->char === $this->EOF) {
 704              /* EOF
 705              Parse error. Emit the current tag token. Reconsume the character
 706              in the data state. */
 707              $this->emitToken($this->token);
 708  
 709              $this->char--;
 710              $this->state = 'data';
 711  
 712          } else {
 713              /* Anything else
 714              Append the current input character to the current attribute's value.
 715              Stay in the attribute value (double-quoted) state. */
 716              $last = count($this->token['attr']) - 1;
 717              $this->token['attr'][$last]['value'] .= $char;
 718  
 719              $this->state = 'attributeValueDoubleQuoted';
 720          }
 721      }
 722  
 723      private function attributeValueSingleQuotedState() {
 724          // Consume the next input character:
 725          $this->char++;
 726          $char = $this->character($this->char);
 727  
 728          if($char === '\'') {
 729              /* U+0022 QUOTATION MARK (')
 730              Switch to the before attribute name state. */
 731              $this->state = 'beforeAttributeName';
 732  
 733          } elseif($char === '&') {
 734              /* U+0026 AMPERSAND (&)
 735              Switch to the entity in attribute value state. */
 736              $this->entityInAttributeValueState('single');
 737  
 738          } elseif($this->char === $this->EOF) {
 739              /* EOF
 740              Parse error. Emit the current tag token. Reconsume the character
 741              in the data state. */
 742              $this->emitToken($this->token);
 743  
 744              $this->char--;
 745              $this->state = 'data';
 746  
 747          } else {
 748              /* Anything else
 749              Append the current input character to the current attribute's value.
 750              Stay in the attribute value (single-quoted) state. */
 751              $last = count($this->token['attr']) - 1;
 752              $this->token['attr'][$last]['value'] .= $char;
 753  
 754              $this->state = 'attributeValueSingleQuoted';
 755          }
 756      }
 757  
 758      private function attributeValueUnquotedState() {
 759          // Consume the next input character:
 760          $this->char++;
 761          $char = $this->character($this->char);
 762  
 763          if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
 764              /* U+0009 CHARACTER TABULATION
 765              U+000A LINE FEED (LF)
 766              U+000B LINE TABULATION
 767              U+000C FORM FEED (FF)
 768              U+0020 SPACE
 769              Switch to the before attribute name state. */
 770              $this->state = 'beforeAttributeName';
 771  
 772          } elseif($char === '&') {
 773              /* U+0026 AMPERSAND (&)
 774              Switch to the entity in attribute value state. */
 775              $this->entityInAttributeValueState();
 776  
 777          } elseif($char === '>') {
 778              /* U+003E GREATER-THAN SIGN (>)
 779              Emit the current tag token. Switch to the data state. */
 780              $this->emitToken($this->token);
 781              $this->state = 'data';
 782  
 783          } else {
 784              /* Anything else
 785              Append the current input character to the current attribute's value.
 786              Stay in the attribute value (unquoted) state. */
 787              $last = count($this->token['attr']) - 1;
 788              $this->token['attr'][$last]['value'] .= $char;
 789  
 790              $this->state = 'attributeValueUnquoted';
 791          }
 792      }
 793  
 794      private function entityInAttributeValueState() {
 795          // Attempt to consume an entity.
 796          $entity = $this->entity();
 797  
 798          // If nothing is returned, append a U+0026 AMPERSAND character to the
 799          // current attribute's value. Otherwise, emit the character token that
 800          // was returned.
 801          $char = (!$entity)
 802              ? '&'
 803              : $entity;
 804  
 805          $last = count($this->token['attr']) - 1;
 806          $this->token['attr'][$last]['value'] .= $char;
 807      }
 808  
 809      private function bogusCommentState() {
 810          /* Consume every character up to the first U+003E GREATER-THAN SIGN
 811          character (>) or the end of the file (EOF), whichever comes first. Emit
 812          a comment token whose data is the concatenation of all the characters
 813          starting from and including the character that caused the state machine
 814          to switch into the bogus comment state, up to and including the last
 815          consumed character before the U+003E character, if any, or up to the
 816          end of the file otherwise. (If the comment was started by the end of
 817          the file (EOF), the token is empty.) */
 818          $data = $this->characters('^>', $this->char);
 819          $this->emitToken(array(
 820              'data' => $data,
 821              'type' => self::COMMENT
 822          ));
 823  
 824          $this->char += strlen($data);
 825  
 826          /* Switch to the data state. */
 827          $this->state = 'data';
 828  
 829          /* If the end of the file was reached, reconsume the EOF character. */
 830          if($this->char === $this->EOF) {
 831              $this->char = $this->EOF - 1;
 832          }
 833      }
 834  
 835      private function markupDeclarationOpenState() {
 836          /* If the next two characters are both U+002D HYPHEN-MINUS (-)
 837          characters, consume those two characters, create a comment token whose
 838          data is the empty string, and switch to the comment state. */
 839          if($this->character($this->char + 1, 2) === '--') {
 840              $this->char += 2;
 841              $this->state = 'comment';
 842              $this->token = array(
 843                  'data' => null,
 844                  'type' => self::COMMENT
 845              );
 846  
 847          /* Otherwise if the next seven chacacters are a case-insensitive match
 848          for the word "DOCTYPE", then consume those characters and switch to the
 849          DOCTYPE state. */
 850          } elseif(strtolower($this->character($this->char + 1, 7)) === 'doctype') {
 851              $this->char += 7;
 852              $this->state = 'doctype';
 853  
 854          /* Otherwise, is is a parse error. Switch to the bogus comment state.
 855          The next character that is consumed, if any, is the first character
 856          that will be in the comment. */
 857          } else {
 858              $this->char++;
 859              $this->state = 'bogusComment';
 860          }
 861      }
 862  
 863      private function commentState() {
 864          /* Consume the next input character: */
 865          $this->char++;
 866          $char = $this->char();
 867  
 868          /* U+002D HYPHEN-MINUS (-) */
 869          if($char === '-') {
 870              /* Switch to the comment dash state  */
 871              $this->state = 'commentDash';
 872  
 873          /* EOF */
 874          } elseif($this->char === $this->EOF) {
 875              /* Parse error. Emit the comment token. Reconsume the EOF character
 876              in the data state. */
 877              $this->emitToken($this->token);
 878              $this->char--;
 879              $this->state = 'data';
 880  
 881          /* Anything else */
 882          } else {
 883              /* Append the input character to the comment token's data. Stay in
 884              the comment state. */
 885              $this->token['data'] .= $char;
 886          }
 887      }
 888  
 889      private function commentDashState() {
 890          /* Consume the next input character: */
 891          $this->char++;
 892          $char = $this->char();
 893  
 894          /* U+002D HYPHEN-MINUS (-) */
 895          if($char === '-') {
 896              /* Switch to the comment end state  */
 897              $this->state = 'commentEnd';
 898  
 899          /* EOF */
 900          } elseif($this->char === $this->EOF) {
 901              /* Parse error. Emit the comment token. Reconsume the EOF character
 902              in the data state. */
 903              $this->emitToken($this->token);
 904              $this->char--;
 905              $this->state = 'data';
 906  
 907          /* Anything else */
 908          } else {
 909              /* Append a U+002D HYPHEN-MINUS (-) character and the input
 910              character to the comment token's data. Switch to the comment state. */
 911              $this->token['data'] .= '-'.$char;
 912              $this->state = 'comment';
 913          }
 914      }
 915  
 916      private function commentEndState() {
 917          /* Consume the next input character: */
 918          $this->char++;
 919          $char = $this->char();
 920  
 921          if($char === '>') {
 922              $this->emitToken($this->token);
 923              $this->state = 'data';
 924  
 925          } elseif($char === '-') {
 926              $this->token['data'] .= '-';
 927  
 928          } elseif($this->char === $this->EOF) {
 929              $this->emitToken($this->token);
 930              $this->char--;
 931              $this->state = 'data';
 932  
 933          } else {
 934              $this->token['data'] .= '--'.$char;
 935              $this->state = 'comment';
 936          }
 937      }
 938  
 939      private function doctypeState() {
 940          /* Consume the next input character: */
 941          $this->char++;
 942          $char = $this->char();
 943  
 944          if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
 945              $this->state = 'beforeDoctypeName';
 946  
 947          } else {
 948              $this->char--;
 949              $this->state = 'beforeDoctypeName';
 950          }
 951      }
 952  
 953      private function beforeDoctypeNameState() {
 954          /* Consume the next input character: */
 955          $this->char++;
 956          $char = $this->char();
 957  
 958          if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
 959              // Stay in the before DOCTYPE name state.
 960  
 961          } elseif(preg_match('/^[a-z]$/', $char)) {
 962              $this->token = array(
 963                  'name' => strtoupper($char),
 964                  'type' => self::DOCTYPE,
 965                  'error' => true
 966              );
 967  
 968              $this->state = 'doctypeName';
 969  
 970          } elseif($char === '>') {
 971              $this->emitToken(array(
 972                  'name' => null,
 973                  'type' => self::DOCTYPE,
 974                  'error' => true
 975              ));
 976  
 977              $this->state = 'data';
 978  
 979          } elseif($this->char === $this->EOF) {
 980              $this->emitToken(array(
 981                  'name' => null,
 982                  'type' => self::DOCTYPE,
 983                  'error' => true
 984              ));
 985  
 986              $this->char--;
 987              $this->state = 'data';
 988  
 989          } else {
 990              $this->token = array(
 991                  'name' => $char,
 992                  'type' => self::DOCTYPE,
 993                  'error' => true
 994              );
 995  
 996              $this->state = 'doctypeName';
 997          }
 998      }
 999  
1000      private function doctypeNameState() {
1001          /* Consume the next input character: */
1002          $this->char++;
1003          $char = $this->char();
1004  
1005          if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
1006              $this->state = 'AfterDoctypeName';
1007  
1008          } elseif($char === '>') {
1009              $this->emitToken($this->token);
1010              $this->state = 'data';
1011  
1012          } elseif(preg_match('/^[a-z]$/', $char)) {
1013              $this->token['name'] .= strtoupper($char);
1014  
1015          } elseif($this->char === $this->EOF) {
1016              $this->emitToken($this->token);
1017              $this->char--;
1018              $this->state = 'data';
1019  
1020          } else {
1021              $this->token['name'] .= $char;
1022          }
1023  
1024          $this->token['error'] = ($this->token['name'] === 'HTML')
1025              ? false
1026              : true;
1027      }
1028  
1029      private function afterDoctypeNameState() {
1030          /* Consume the next input character: */
1031          $this->char++;
1032          $char = $this->char();
1033  
1034          if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
1035              // Stay in the DOCTYPE name state.
1036  
1037          } elseif($char === '>') {
1038              $this->emitToken($this->token);
1039              $this->state = 'data';
1040  
1041          } elseif($this->char === $this->EOF) {
1042              $this->emitToken($this->token);
1043              $this->char--;
1044              $this->state = 'data';
1045  
1046          } else {
1047              $this->token['error'] = true;
1048              $this->state = 'bogusDoctype';
1049          }
1050      }
1051  
1052      private function bogusDoctypeState() {
1053          /* Consume the next input character: */
1054          $this->char++;
1055          $char = $this->char();
1056  
1057          if($char === '>') {
1058              $this->emitToken($this->token);
1059              $this->state = 'data';
1060  
1061          } elseif($this->char === $this->EOF) {
1062              $this->emitToken($this->token);
1063              $this->char--;
1064              $this->state = 'data';
1065  
1066          } else {
1067              // Stay in the bogus DOCTYPE state.
1068          }
1069      }
1070  
1071      private function entity() {
1072          $start = $this->char;
1073  
1074          // This section defines how to consume an entity. This definition is
1075          // used when parsing entities in text and in attributes.
1076  
1077          // The behaviour depends on the identity of the next character (the
1078          // one immediately after the U+0026 AMPERSAND character): 
1079  
1080          switch($this->character($this->char + 1)) {
1081              // U+0023 NUMBER SIGN (#)
1082              case '#':
1083  
1084                  // The behaviour further depends on the character after the
1085                  // U+0023 NUMBER SIGN:
1086                  switch($this->character($this->char + 1)) {
1087                      // U+0078 LATIN SMALL LETTER X
1088                      // U+0058 LATIN CAPITAL LETTER X
1089                      case 'x':
1090                      case 'X':
1091                          // Follow the steps below, but using the range of
1092                          // characters U+0030 DIGIT ZERO through to U+0039 DIGIT
1093                          // NINE, U+0061 LATIN SMALL LETTER A through to U+0066
1094                          // LATIN SMALL LETTER F, and U+0041 LATIN CAPITAL LETTER
1095                          // A, through to U+0046 LATIN CAPITAL LETTER F (in other
1096                          // words, 0-9, A-F, a-f).
1097                          $char = 1;
1098                          $char_class = '0-9A-Fa-f';
1099                      break;
1100  
1101                      // Anything else
1102                      default:
1103                          // Follow the steps below, but using the range of
1104                          // characters U+0030 DIGIT ZERO through to U+0039 DIGIT
1105                          // NINE (i.e. just 0-9).
1106                          $char = 0;
1107                          $char_class = '0-9';
1108                      break;
1109                  }
1110  
1111                  // Consume as many characters as match the range of characters
1112                  // given above.
1113                  $this->char++;
1114                  $e_name = $this->characters($char_class, $this->char + $char + 1);
1115                  $entity = $this->character($start, $this->char);
1116                  $cond = strlen($e_name) > 0;
1117  
1118                  // The rest of the parsing happens bellow.
1119              break;
1120  
1121              // Anything else
1122              default:
1123                  // Consume the maximum number of characters possible, with the
1124                  // consumed characters case-sensitively matching one of the
1125                  // identifiers in the first column of the entities table.
1126                  $e_name = $this->characters('0-9A-Za-z;', $this->char + 1);
1127                  $len = strlen($e_name);
1128  
1129                  for($c = 1; $c <= $len; $c++) {
1130                      $id = substr($e_name, 0, $c);
1131                      $this->char++;
1132  
1133                      if(in_array($id, $this->entities)) {
1134                          if ($e_name[$c-1] !== ';') {
1135                              if ($c < $len && $e_name[$c] == ';') {
1136                                  $this->char++; // consume extra semicolon
1137                              }
1138                          }
1139                          $entity = $id;
1140                          break;
1141                      }
1142                  }
1143  
1144                  $cond = isset($entity);
1145                  // The rest of the parsing happens bellow.
1146              break;
1147          }
1148  
1149          if(!$cond) {
1150              // If no match can be made, then this is a parse error. No
1151              // characters are consumed, and nothing is returned.
1152              $this->char = $start;
1153              return false;
1154          }
1155  
1156          // Return a character token for the character corresponding to the
1157          // entity name (as given by the second column of the entities table).
1158          return html_entity_decode('&'.$entity.';', ENT_QUOTES, 'UTF-8');
1159      }
1160  
1161      private function emitToken($token) {
1162          $emit = $this->tree->emitToken($token);
1163  
1164          if(is_int($emit)) {
1165              $this->content_model = $emit;
1166  
1167          } elseif($token['type'] === self::ENDTAG) {
1168              $this->content_model = self::PCDATA;
1169          }
1170      }
1171  
1172      private function EOF() {
1173          $this->state = null;
1174          $this->tree->emitToken(array(
1175              'type' => self::EOF
1176          ));
1177      }
1178  }
1179  
1180  class HTML5TreeConstructer {
1181      public $stack = array();
1182  
1183      private $phase;
1184      private $mode;
1185      private $dom;
1186      private $foster_parent = null;
1187      private $a_formatting  = array();
1188  
1189      private $head_pointer = null;
1190      private $form_pointer = null;
1191  
1192      private $scoping = array('button','caption','html','marquee','object','table','td','th');
1193      private $formatting = array('a','b','big','em','font','i','nobr','s','small','strike','strong','tt','u');
1194      private $special = array('address','area','base','basefont','bgsound',
1195      'blockquote','body','br','center','col','colgroup','dd','dir','div','dl',
1196      'dt','embed','fieldset','form','frame','frameset','h1','h2','h3','h4','h5',
1197      'h6','head','hr','iframe','image','img','input','isindex','li','link',
1198      'listing','menu','meta','noembed','noframes','noscript','ol','optgroup',
1199      'option','p','param','plaintext','pre','script','select','spacer','style',
1200      'tbody','textarea','tfoot','thead','title','tr','ul','wbr');
1201  
1202      // The different phases.
1203      const INIT_PHASE = 0;
1204      const ROOT_PHASE = 1;
1205      const MAIN_PHASE = 2;
1206      const END_PHASE  = 3;
1207  
1208      // The different insertion modes for the main phase.
1209      const BEFOR_HEAD = 0;
1210      const IN_HEAD    = 1;
1211      const AFTER_HEAD = 2;
1212      const IN_BODY    = 3;
1213      const IN_TABLE   = 4;
1214      const IN_CAPTION = 5;
1215      const IN_CGROUP  = 6;
1216      const IN_TBODY   = 7;
1217      const IN_ROW     = 8;
1218      const IN_CELL    = 9;
1219      const IN_SELECT  = 10;
1220      const AFTER_BODY = 11;
1221      const IN_FRAME   = 12;
1222      const AFTR_FRAME = 13;
1223  
1224      // The different types of elements.
1225      const SPECIAL    = 0;
1226      const SCOPING    = 1;
1227      const FORMATTING = 2;
1228      const PHRASING   = 3;
1229  
1230      const MARKER     = 0;
1231  
1232      public function __construct() {
1233          $this->phase = self::INIT_PHASE;
1234          $this->mode = self::BEFOR_HEAD;
1235          $this->dom = new DOMDocument;
1236  
1237          $this->dom->encoding = 'UTF-8';
1238          $this->dom->preserveWhiteSpace = true;
1239          $this->dom->substituteEntities = true;
1240          $this->dom->strictErrorChecking = false;
1241      }
1242  
1243      // Process tag tokens
1244      public function emitToken($token) {
1245          switch($this->phase) {
1246              case self::INIT_PHASE: return $this->initPhase($token); break;
1247              case self::ROOT_PHASE: return $this->rootElementPhase($token); break;
1248              case self::MAIN_PHASE: return $this->mainPhase($token); break;
1249              case self::END_PHASE : return $this->trailingEndPhase($token); break;
1250          }
1251      }
1252  
1253      private function initPhase($token) {
1254          /* Initially, the tree construction stage must handle each token
1255          emitted from the tokenisation stage as follows: */
1256  
1257          /* A DOCTYPE token that is marked as being in error
1258          A comment token
1259          A start tag token
1260          An end tag token
1261          A character token that is not one of one of U+0009 CHARACTER TABULATION,
1262              U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
1263              or U+0020 SPACE
1264          An end-of-file token */
1265          if((isset($token['error']) && $token['error']) ||
1266          $token['type'] === HTML5::COMMENT ||
1267          $token['type'] === HTML5::STARTTAG ||
1268          $token['type'] === HTML5::ENDTAG ||
1269          $token['type'] === HTML5::EOF ||
1270          ($token['type'] === HTML5::CHARACTR && isset($token['data']) &&
1271          !preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data']))) {
1272              /* This specification does not define how to handle this case. In
1273              particular, user agents may ignore the entirety of this specification
1274              altogether for such documents, and instead invoke special parse modes
1275              with a greater emphasis on backwards compatibility. */
1276  
1277              $this->phase = self::ROOT_PHASE;
1278              return $this->rootElementPhase($token);
1279  
1280          /* A DOCTYPE token marked as being correct */
1281          } elseif(isset($token['error']) && !$token['error']) {
1282              /* Append a DocumentType node to the Document  node, with the name
1283              attribute set to the name given in the DOCTYPE token (which will be
1284              "HTML"), and the other attributes specific to DocumentType objects
1285              set to null, empty lists, or the empty string as appropriate. */
1286              $doctype = new DOMDocumentType(null, null, 'HTML');
1287  
1288              /* Then, switch to the root element phase of the tree construction
1289              stage. */
1290              $this->phase = self::ROOT_PHASE;
1291  
1292          /* A character token that is one of one of U+0009 CHARACTER TABULATION,
1293          U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
1294          or U+0020 SPACE */
1295          } elseif(isset($token['data']) && preg_match('/^[\t\n\x0b\x0c ]+$/',
1296          $token['data'])) {
1297              /* Append that character  to the Document node. */
1298              $text = $this->dom->createTextNode($token['data']);
1299              $this->dom->appendChild($text);
1300          }
1301      }
1302  
1303      private function rootElementPhase($token) {
1304          /* After the initial phase, as each token is emitted from the tokenisation
1305          stage, it must be processed as described in this section. */
1306  
1307          /* A DOCTYPE token */
1308          if($token['type'] === HTML5::DOCTYPE) {
1309              // Parse error. Ignore the token.
1310  
1311          /* A comment token */
1312          } elseif($token['type'] === HTML5::COMMENT) {
1313              /* Append a Comment node to the Document object with the data
1314              attribute set to the data given in the comment token. */
1315              $comment = $this->dom->createComment($token['data']);
1316              $this->dom->appendChild($comment);
1317  
1318          /* A character token that is one of one of U+0009 CHARACTER TABULATION,
1319          U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
1320          or U+0020 SPACE */
1321          } elseif($token['type'] === HTML5::CHARACTR &&
1322          preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) {
1323              /* Append that character  to the Document node. */
1324              $text = $this->dom->createTextNode($token['data']);
1325              $this->dom->appendChild($text);
1326  
1327          /* A character token that is not one of U+0009 CHARACTER TABULATION,
1328              U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED
1329              (FF), or U+0020 SPACE
1330          A start tag token
1331          An end tag token
1332          An end-of-file token */
1333          } elseif(($token['type'] === HTML5::CHARACTR &&
1334          !preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) ||
1335          $token['type'] === HTML5::STARTTAG ||
1336          $token['type'] === HTML5::ENDTAG ||
1337          $token['type'] === HTML5::EOF) {
1338              /* Create an HTMLElement node with the tag name html, in the HTML
1339              namespace. Append it to the Document object. Switch to the main
1340              phase and reprocess the current token. */
1341              $html = $this->dom->createElement('html');
1342              $this->dom->appendChild($html);
1343              $this->stack[] = $html;
1344  
1345              $this->phase = self::MAIN_PHASE;
1346              return $this->mainPhase($token);
1347          }
1348      }
1349  
1350      private function mainPhase($token) {
1351          /* Tokens in the main phase must be handled as follows: */
1352  
1353          /* A DOCTYPE token */
1354          if($token['type'] === HTML5::DOCTYPE) {
1355              // Parse error. Ignore the token.
1356  
1357          /* A start tag token with the tag name "html" */
1358          } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'html') {
1359              /* If this start tag token was not the first start tag token, then
1360              it is a parse error. */
1361  
1362              /* For each attribute on the token, check to see if the attribute
1363              is already present on the top element of the stack of open elements.
1364              If it is not, add the attribute and its corresponding value to that
1365              element. */
1366              foreach($token['attr'] as $attr) {
1367                  if(!$this->stack[0]->hasAttribute($attr['name'])) {
1368                      $this->stack[0]->setAttribute($attr['name'], $attr['value']);
1369                  }
1370              }
1371  
1372          /* An end-of-file token */
1373          } elseif($token['type'] === HTML5::EOF) {
1374              /* Generate implied end tags. */
1375              $this->generateImpliedEndTags();
1376  
1377          /* Anything else. */
1378          } else {
1379              /* Depends on the insertion mode: */
1380              switch($this->mode) {
1381                  case self::BEFOR_HEAD: return $this->beforeHead($token); break;
1382                  case self::IN_HEAD:    return $this->inHead($token); break;
1383                  case self::AFTER_HEAD: return $this->afterHead($token); break;
1384                  case self::IN_BODY:    return $this->inBody($token); break;
1385                  case self::IN_TABLE:   return $this->inTable($token); break;
1386                  case self::IN_CAPTION: return $this->inCaption($token); break;
1387                  case self::IN_CGROUP:  return $this->inColumnGroup($token); break;
1388                  case self::IN_TBODY:   return $this->inTableBody($token); break;
1389                  case self::IN_ROW:     return $this->inRow($token); break;
1390                  case self::IN_CELL:    return $this->inCell($token); break;
1391                  case self::IN_SELECT:  return $this->inSelect($token); break;
1392                  case self::AFTER_BODY: return $this->afterBody($token); break;
1393                  case self::IN_FRAME:   return $this->inFrameset($token); break;
1394                  case self::AFTR_FRAME: return $this->afterFrameset($token); break;
1395                  case self::END_PHASE:  return $this->trailingEndPhase($token); break;
1396              }
1397          }
1398      }
1399  
1400      private function beforeHead($token) {
1401          /* Handle the token as follows: */
1402  
1403          /* A character token that is one of one of U+0009 CHARACTER TABULATION,
1404          U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
1405          or U+0020 SPACE */
1406          if($token['type'] === HTML5::CHARACTR &&
1407          preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) {
1408              /* Append the character to the current node. */
1409              $this->insertText($token['data']);
1410  
1411          /* A comment token */
1412          } elseif($token['type'] === HTML5::COMMENT) {
1413              /* Append a Comment node to the current node with the data attribute
1414              set to the data given in the comment token. */
1415              $this->insertComment($token['data']);
1416  
1417          /* A start tag token with the tag name "head" */
1418          } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'head') {
1419              /* Create an element for the token, append the new element to the
1420              current node and push it onto the stack of open elements. */
1421              $element = $this->insertElement($token);
1422  
1423              /* Set the head element pointer to this new element node. */
1424              $this->head_pointer = $element;
1425  
1426              /* Change the insertion mode to "in head". */
1427              $this->mode = self::IN_HEAD;
1428  
1429          /* A start tag token whose tag name is one of: "base", "link", "meta",
1430          "script", "style", "title". Or an end tag with the tag name "html".
1431          Or a character token that is not one of U+0009 CHARACTER TABULATION,
1432          U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
1433          or U+0020 SPACE. Or any other start tag token */
1434          } elseif($token['type'] === HTML5::STARTTAG ||
1435          ($token['type'] === HTML5::ENDTAG && $token['name'] === 'html') ||
1436          ($token['type'] === HTML5::CHARACTR && !preg_match('/^[\t\n\x0b\x0c ]$/',
1437          $token['data']))) {
1438              /* Act as if a start tag token with the tag name "head" and no
1439              attributes had been seen, then reprocess the current token. */
1440              $this->beforeHead(array(
1441                  'name' => 'head',
1442                  'type' => HTML5::STARTTAG,
1443                  'attr' => array()
1444              ));
1445  
1446              return $this->inHead($token);
1447  
1448          /* Any other end tag */
1449          } elseif($token['type'] === HTML5::ENDTAG) {
1450              /* Parse error. Ignore the token. */
1451          }
1452      }
1453  
1454      private function inHead($token) {
1455          /* Handle the token as follows: */
1456  
1457          /* A character token that is one of one of U+0009 CHARACTER TABULATION,
1458          U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
1459          or U+0020 SPACE.
1460  
1461          THIS DIFFERS FROM THE SPEC: If the current node is either a title, style
1462          or script element, append the character to the current node regardless
1463          of its content. */
1464          if(($token['type'] === HTML5::CHARACTR &&
1465          preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) || (
1466          $token['type'] === HTML5::CHARACTR && in_array(end($this->stack)->nodeName,
1467          array('title', 'style', 'script')))) {
1468              /* Append the character to the current node. */
1469              $this->insertText($token['data']);
1470  
1471          /* A comment token */
1472          } elseif($token['type'] === HTML5::COMMENT) {
1473              /* Append a Comment node to the current node with the data attribute
1474              set to the data given in the comment token. */
1475              $this->insertComment($token['data']);
1476  
1477          } elseif($token['type'] === HTML5::ENDTAG &&
1478          in_array($token['name'], array('title', 'style', 'script'))) {
1479              array_pop($this->stack);
1480              return HTML5::PCDATA;
1481  
1482          /* A start tag with the tag name "title" */
1483          } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'title') {
1484              /* Create an element for the token and append the new element to the
1485              node pointed to by the head element pointer, or, if that is null
1486              (innerHTML case), to the current node. */
1487              if($this->head_pointer !== null) {
1488                  $element = $this->insertElement($token, false);
1489                  $this->head_pointer->appendChild($element);
1490  
1491              } else {
1492                  $element = $this->insertElement($token);
1493              }
1494  
1495              /* Switch the tokeniser's content model flag  to the RCDATA state. */
1496              return HTML5::RCDATA;
1497  
1498          /* A start tag with the tag name "style" */
1499          } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'style') {
1500              /* Create an element for the token and append the new element to the
1501              node pointed to by the head element pointer, or, if that is null
1502              (innerHTML case), to the current node. */
1503              if($this->head_pointer !== null) {
1504                  $element = $this->insertElement($token, false);
1505                  $this->head_pointer->appendChild($element);
1506  
1507              } else {
1508                  $this->insertElement($token);
1509              }
1510  
1511              /* Switch the tokeniser's content model flag  to the CDATA state. */
1512              return HTML5::CDATA;
1513  
1514          /* A start tag with the tag name "script" */
1515          } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'script') {
1516              /* Create an element for the token. */
1517              $element = $this->insertElement($token, false);
1518              $this->head_pointer->appendChild($element);
1519  
1520              /* Switch the tokeniser's content model flag  to the CDATA state. */
1521              return HTML5::CDATA;
1522  
1523          /* A start tag with the tag name "base", "link", or "meta" */
1524          } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'],
1525          array('base', 'link', 'meta'))) {
1526              /* Create an element for the token and append the new element to the
1527              node pointed to by the head element pointer, or, if that is null
1528              (innerHTML case), to the current node. */
1529              if($this->head_pointer !== null) {
1530                  $element = $this->insertElement($token, false);
1531                  $this->head_pointer->appendChild($element);
1532                  array_pop($this->stack);
1533  
1534              } else {
1535                  $this->insertElement($token);
1536              }
1537  
1538          /* An end tag with the tag name "head" */
1539          } elseif($token['type'] === HTML5::ENDTAG && $token['name'] === 'head') {
1540              /* If the current node is a head element, pop the current node off
1541              the stack of open elements. */
1542              if($this->head_pointer->isSameNode(end($this->stack))) {
1543                  array_pop($this->stack);
1544  
1545              /* Otherwise, this is a parse error. */
1546              } else {
1547                  // k
1548              }
1549  
1550              /* Change the insertion mode to "after head". */
1551              $this->mode = self::AFTER_HEAD;
1552  
1553          /* A start tag with the tag name "head" or an end tag except "html". */
1554          } elseif(($token['type'] === HTML5::STARTTAG && $token['name'] === 'head') ||
1555          ($token['type'] === HTML5::ENDTAG && $token['name'] !== 'html')) {
1556              // Parse error. Ignore the token.
1557  
1558          /* Anything else */
1559          } else {
1560              /* If the current node is a head element, act as if an end tag
1561              token with the tag name "head" had been seen. */
1562              if($this->head_pointer->isSameNode(end($this->stack))) {
1563                  $this->inHead(array(
1564                      'name' => 'head',
1565                      'type' => HTML5::ENDTAG
1566                  ));
1567  
1568              /* Otherwise, change the insertion mode to "after head". */
1569              } else {
1570                  $this->mode = self::AFTER_HEAD;
1571              }
1572  
1573              /* Then, reprocess the current token. */
1574              return $this->afterHead($token);
1575          }
1576      }
1577  
1578      private function afterHead($token) {
1579          /* Handle the token as follows: */
1580  
1581          /* A character token that is one of one of U+0009 CHARACTER TABULATION,
1582          U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
1583          or U+0020 SPACE */
1584          if($token['type'] === HTML5::CHARACTR &&
1585          preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) {
1586              /* Append the character to the current node. */
1587              $this->insertText($token['data']);
1588  
1589          /* A comment token */
1590          } elseif($token['type'] === HTML5::COMMENT) {
1591              /* Append a Comment node to the current node with the data attribute
1592              set to the data given in the comment token. */
1593              $this->insertComment($token['data']);
1594  
1595          /* A start tag token with the tag name "body" */
1596          } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'body') {
1597              /* Insert a body element for the token. */
1598              $this->insertElement($token);
1599  
1600              /* Change the insertion mode to "in body". */
1601              $this->mode = self::IN_BODY;
1602  
1603          /* A start tag token with the tag name "frameset" */
1604          } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'frameset') {
1605              /* Insert a frameset element for the token. */
1606              $this->insertElement($token);
1607  
1608              /* Change the insertion mode to "in frameset". */
1609              $this->mode = self::IN_FRAME;
1610  
1611          /* A start tag token whose tag name is one of: "base", "link", "meta",
1612          "script", "style", "title" */
1613          } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'],
1614          array('base', 'link', 'meta', 'script', 'style', 'title'))) {
1615              /* Parse error. Switch the insertion mode back to "in head" and
1616              reprocess the token. */
1617              $this->mode = self::IN_HEAD;
1618              return $this->inHead($token);
1619  
1620          /* Anything else */
1621          } else {
1622              /* Act as if a start tag token with the tag name "body" and no
1623              attributes had been seen, and then reprocess the current token. */
1624              $this->afterHead(array(
1625                  'name' => 'body',
1626                  'type' => HTML5::STARTTAG,
1627                  'attr' => array()
1628              ));
1629  
1630              return $this->inBody($token);
1631          }
1632      }
1633  
1634      private function inBody($token) {
1635          /* Handle the token as follows: */
1636  
1637          switch($token['type']) {
1638              /* A character token */
1639              case HTML5::CHARACTR:
1640                  /* Reconstruct the active formatting elements, if any. */
1641                  $this->reconstructActiveFormattingElements();
1642  
1643                  /* Append the token's character to the current node. */
1644                  $this->insertText($token['data']);
1645              break;
1646  
1647              /* A comment token */
1648              case HTML5::COMMENT:
1649                  /* Append a Comment node to the current node with the data
1650                  attribute set to the data given in the comment token. */
1651                  $this->insertComment($token['data']);
1652              break;
1653  
1654              case HTML5::STARTTAG:
1655              switch($token['name']) {
1656                  /* A start tag token whose tag name is one of: "script",
1657                  "style" */
1658                  case 'script': case 'style':
1659                      /* Process the token as if the insertion mode had been "in
1660                      head". */
1661                      return $this->inHead($token);
1662                  break;
1663  
1664                  /* A start tag token whose tag name is one of: "base", "link",
1665                  "meta", "title" */
1666                  case 'base': case 'link': case 'meta': case 'title':
1667                      /* Parse error. Process the token as if the insertion mode
1668                      had    been "in head". */
1669                      return $this->inHead($token);
1670                  break;
1671  
1672                  /* A start tag token with the tag name "body" */
1673                  case 'body':
1674                      /* Parse error. If the second element on the stack of open
1675                      elements is not a body element, or, if the stack of open
1676                      elements has only one node on it, then ignore the token.
1677                      (innerHTML case) */
1678                      if(count($this->stack) === 1 || $this->stack[1]->nodeName !== 'body') {
1679                          // Ignore
1680  
1681                      /* Otherwise, for each attribute on the token, check to see
1682                      if the attribute is already present on the body element (the
1683                      second element)    on the stack of open elements. If it is not,
1684                      add the attribute and its corresponding value to that
1685                      element. */
1686                      } else {
1687                          foreach($token['attr'] as $attr) {
1688                              if(!$this->stack[1]->hasAttribute($attr['name'])) {
1689                                  $this->stack[1]->setAttribute($attr['name'], $attr['value']);
1690                              }
1691                          }
1692                      }
1693                  break;
1694  
1695                  /* A start tag whose tag name is one of: "address",
1696                  "blockquote", "center", "dir", "div", "dl", "fieldset",
1697                  "listing", "menu", "ol", "p", "ul" */
1698                  case 'address': case 'blockquote': case 'center': case 'dir':
1699                  case 'div': case 'dl': case 'fieldset': case 'listing':
1700                  case 'menu': case 'ol': case 'p': case 'ul':
1701                      /* If the stack of open elements has a p element in scope,
1702                      then act as if an end tag with the tag name p had been
1703                      seen. */
1704                      if($this->elementInScope('p')) {
1705                          $this->emitToken(array(
1706                              'name' => 'p',
1707                              'type' => HTML5::ENDTAG
1708                          ));
1709                      }
1710  
1711                      /* Insert an HTML element for the token. */
1712                      $this->insertElement($token);
1713                  break;
1714  
1715                  /* A start tag whose tag name is "form" */
1716                  case 'form':
1717                      /* If the form element pointer is not null, ignore the
1718                      token with a parse error. */
1719                      if($this->form_pointer !== null) {
1720                          // Ignore.
1721  
1722                      /* Otherwise: */
1723                      } else {
1724                          /* If the stack of open elements has a p element in
1725                          scope, then act as if an end tag with the tag name p
1726                          had been seen. */
1727                          if($this->elementInScope('p')) {
1728                              $this->emitToken(array(
1729                                  'name' => 'p',
1730                                  'type' => HTML5::ENDTAG
1731                              ));
1732                          }
1733  
1734                          /* Insert an HTML element for the token, and set the
1735                          form element pointer to point to the element created. */
1736                          $element = $this->insertElement($token);
1737                          $this->form_pointer = $element;
1738                      }
1739                  break;
1740  
1741                  /* A start tag whose tag name is "li", "dd" or "dt" */
1742                  case 'li': case 'dd': case 'dt':
1743                      /* If the stack of open elements has a p  element in scope,
1744                      then act as if an end tag with the tag name p had been
1745                      seen. */
1746                      if($this->elementInScope('p')) {
1747                          $this->emitToken(array(
1748                              'name' => 'p',
1749                              'type' => HTML5::ENDTAG
1750                          ));
1751                      }
1752  
1753                      $stack_length = count($this->stack) - 1;
1754  
1755                      for($n = $stack_length; 0 <= $n; $n--) {
1756                          /* 1. Initialise node to be the current node (the
1757                          bottommost node of the stack). */
1758                          $stop = false;
1759                          $node = $this->stack[$n];
1760                          $cat  = $this->getElementCategory($node->tagName);
1761  
1762                          /* 2. If node is an li, dd or dt element, then pop all
1763                          the    nodes from the current node up to node, including
1764                          node, then stop this algorithm. */
1765                          if($token['name'] === $node->tagName ||    ($token['name'] !== 'li'
1766                          && ($node->tagName === 'dd' || $node->tagName === 'dt'))) {
1767                              for($x = $stack_length; $x >= $n ; $x--) {
1768                                  array_pop($this->stack);
1769                              }
1770  
1771                              break;
1772                          }
1773  
1774                          /* 3. If node is not in the formatting category, and is
1775                          not    in the phrasing category, and is not an address or
1776                          div element, then stop this algorithm. */
1777                          if($cat !== self::FORMATTING && $cat !== self::PHRASING &&
1778                          $node->tagName !== 'address' && $node->tagName !== 'div') {
1779                              break;
1780                          }
1781                      }
1782  
1783                      /* Finally, insert an HTML element with the same tag
1784                      name as the    token's. */
1785                      $this->insertElement($token);
1786                  break;
1787  
1788                  /* A start tag token whose tag name is "plaintext" */
1789                  case 'plaintext':
1790                      /* If the stack of open elements has a p  element in scope,
1791                      then act as if an end tag with the tag name p had been
1792                      seen. */
1793                      if($this->elementInScope('p')) {
1794                          $this->emitToken(array(
1795                              'name' => 'p',
1796                              'type' => HTML5::ENDTAG
1797                          ));
1798                      }
1799  
1800                      /* Insert an HTML element for the token. */
1801                      $this->insertElement($token);
1802  
1803                      return HTML5::PLAINTEXT;
1804                  break;
1805  
1806                  /* A start tag whose tag name is one of: "h1", "h2", "h3", "h4",
1807                  "h5", "h6" */
1808                  case 'h1': case 'h2': case 'h3': case 'h4': case 'h5': case 'h6':
1809                      /* If the stack of open elements has a p  element in scope,
1810                      then act as if an end tag with the tag name p had been seen. */
1811                      if($this->elementInScope('p')) {
1812                          $this->emitToken(array(
1813                              'name' => 'p',
1814                              'type' => HTML5::ENDTAG
1815                          ));
1816                      }
1817  
1818                      /* If the stack of open elements has in scope an element whose
1819                      tag name is one of "h1", "h2", "h3", "h4", "h5", or "h6", then
1820                      this is a parse error; pop elements from the stack until an
1821                      element with one of those tag names has been popped from the
1822                      stack. */
1823                      while($this->elementInScope(array('h1', 'h2', 'h3', 'h4', 'h5', 'h6'))) {
1824                          array_pop($this->stack);
1825                      }
1826  
1827                      /* Insert an HTML element for the token. */
1828                      $this->insertElement($token);
1829                  break;
1830  
1831                  /* A start tag whose tag name is "a" */
1832                  case 'a':
1833                      /* If the list of active formatting elements contains
1834                      an element whose tag name is "a" between the end of the
1835                      list and the last marker on the list (or the start of
1836                      the list if there is no marker on the list), then this
1837                      is a parse error; act as if an end tag with the tag name
1838                      "a" had been seen, then remove that element from the list
1839                      of active formatting elements and the stack of open
1840                      elements if the end tag didn't already remove it (it
1841                      might not have if the element is not in table scope). */
1842                      $leng = count($this->a_formatting);
1843  
1844                      for($n = $leng - 1; $n >= 0; $n--) {
1845                          if($this->a_formatting[$n] === self::MARKER) {
1846                              break;
1847  
1848                          } elseif($this->a_formatting[$n]->nodeName === 'a') {
1849                              $this->emitToken(array(
1850                                  'name' => 'a',
1851                                  'type' => HTML5::ENDTAG
1852                              ));
1853                              break;
1854                          }
1855                      }
1856  
1857                      /* Reconstruct the active formatting elements, if any. */
1858                      $this->reconstructActiveFormattingElements();
1859  
1860                      /* Insert an HTML element for the token. */
1861                      $el = $this->insertElement($token);
1862  
1863                      /* Add that element to the list of active formatting
1864                      elements. */
1865                      $this->a_formatting[] = $el;
1866                  break;
1867  
1868                  /* A start tag whose tag name is one of: "b", "big", "em", "font",
1869                  "i", "nobr", "s", "small", "strike", "strong", "tt", "u" */
1870                  case 'b': case 'big': case 'em': case 'font': case 'i':
1871                  case 'nobr': case 's': case 'small': case 'strike':
1872                  case 'strong': case 'tt': case 'u':
1873                      /* Reconstruct the active formatting elements, if any. */
1874                      $this->reconstructActiveFormattingElements();
1875  
1876                      /* Insert an HTML element for the token. */
1877                      $el = $this->insertElement($token);
1878  
1879                      /* Add that element to the list of active formatting
1880                      elements. */
1881                      $this->a_formatting[] = $el;
1882                  break;
1883  
1884                  /* A start tag token whose tag name is "button" */
1885                  case 'button':
1886                      /* If the stack of open elements has a button element in scope,
1887                      then this is a parse error; act as if an end tag with the tag
1888                      name "button" had been seen, then reprocess the token. (We don't
1889                      do that. Unnecessary.) */
1890                      if($this->elementInScope('button')) {
1891                          $this->inBody(array(
1892                              'name' => 'button',
1893                              'type' => HTML5::ENDTAG
1894                          ));
1895                      }
1896  
1897                      /* Reconstruct the active formatting elements, if any. */
1898                      $this->reconstructActiveFormattingElements();
1899  
1900                      /* Insert an HTML element for the token. */
1901                      $this->insertElement($token);
1902  
1903                      /* Insert a marker at the end of the list of active
1904                      formatting elements. */
1905                      $this->a_formatting[] = self::MARKER;
1906                  break;
1907  
1908                  /* A start tag token whose tag name is one of: "marquee", "object" */
1909                  case 'marquee': case 'object':
1910                      /* Reconstruct the active formatting elements, if any. */
1911                      $this->reconstructActiveFormattingElements();
1912  
1913                      /* Insert an HTML element for the token. */
1914                      $this->insertElement($token);
1915  
1916                      /* Insert a marker at the end of the list of active
1917                      formatting elements. */
1918                      $this->a_formatting[] = self::MARKER;
1919                  break;
1920  
1921                  /* A start tag token whose tag name is "xmp" */
1922                  case 'xmp':
1923                      /* Reconstruct the active formatting elements, if any. */
1924                      $this->reconstructActiveFormattingElements();
1925  
1926                      /* Insert an HTML element for the token. */
1927                      $this->insertElement($token);
1928  
1929                      /* Switch the content model flag to the CDATA state. */
1930                      return HTML5::CDATA;
1931                  break;
1932  
1933                  /* A start tag whose tag name is "table" */
1934                  case 'table':
1935                      /* If the stack of open elements has a p element in scope,
1936                      then act as if an end tag with the tag name p had been seen. */
1937                      if($this->elementInScope('p')) {
1938                          $this->emitToken(array(
1939                              'name' => 'p',
1940                              'type' => HTML5::ENDTAG
1941                          ));
1942                      }
1943  
1944                      /* Insert an HTML element for the token. */
1945                      $this->insertElement($token);
1946  
1947                      /* Change the insertion mode to "in table". */
1948                      $this->mode = self::IN_TABLE;
1949                  break;
1950  
1951                  /* A start tag whose tag name is one of: "area", "basefont",
1952                  "bgsound", "br", "embed", "img", "param", "spacer", "wbr" */
1953                  case 'area': case 'basefont': case 'bgsound': case 'br':
1954                  case 'embed': case 'img': case 'param': case 'spacer':
1955                  case 'wbr':
1956                      /* Reconstruct the active formatting elements, if any. */
1957                      $this->reconstructActiveFormattingElements();
1958  
1959                      /* Insert an HTML element for the token. */
1960                      $this->insertElement($token);
1961  
1962                      /* Immediately pop the current node off the stack of open elements. */
1963                      array_pop($this->stack);
1964                  break;
1965  
1966                  /* A start tag whose tag name is "hr" */
1967                  case 'hr':
1968                      /* If the stack of open elements has a p element in scope,
1969                      then act as if an end tag with the tag name p had been seen. */
1970                      if($this->elementInScope('p')) {
1971                          $this->emitToken(array(
1972                              'name' => 'p',
1973                              'type' => HTML5::ENDTAG
1974                          ));
1975                      }
1976  
1977                      /* Insert an HTML element for the token. */
1978                      $this->insertElement($token);
1979  
1980                      /* Immediately pop the current node off the stack of open elements. */
1981                      array_pop($this->stack);
1982                  break;
1983  
1984                  /* A start tag whose tag name is "image" */
1985                  case 'image':
1986                      /* Parse error. Change the token's tag name to "img" and
1987                      reprocess it. (Don't ask.) */
1988                      $token['name'] = 'img';
1989                      return $this->inBody($token);
1990                  break;
1991  
1992                  /* A start tag whose tag name is "input" */
1993                  case 'input':
1994                      /* Reconstruct the active formatting elements, if any. */
1995                      $this->reconstructActiveFormattingElements();
1996  
1997                      /* Insert an input element for the token. */
1998                      $element = $this->insertElement($token, false);
1999  
2000                      /* If the form element pointer is not null, then associate the
2001                      input element with the form element pointed to by the form
2002                      element pointer. */
2003                      $this->form_pointer !== null
2004                          ? $this->form_pointer->appendChild($element)
2005                          : end($this->stack)->appendChild($element);
2006  
2007                      /* Pop that input element off the stack of open elements. */
2008                      array_pop($this->stack);
2009                  break;
2010  
2011                  /* A start tag whose tag name is "isindex" */
2012                  case 'isindex':
2013                      /* Parse error. */
2014                      // w/e
2015  
2016                      /* If the form element pointer is not null,
2017                      then ignore the token. */
2018                      if($this->form_pointer === null) {
2019                          /* Act as if a start tag token with the tag name "form" had
2020                          been seen. */
2021                          $this->inBody(array(
2022                              'name' => 'body',
2023                              'type' => HTML5::STARTTAG,
2024                              'attr' => array()
2025                          ));
2026  
2027                          /* Act as if a start tag token with the tag name "hr" had
2028                          been seen. */
2029                          $this->inBody(array(
2030                              'name' => 'hr',
2031                              'type' => HTML5::STARTTAG,
2032                              'attr' => array()
2033                          ));
2034  
2035                          /* Act as if a start tag token with the tag name "p" had
2036                          been seen. */
2037                          $this->inBody(array(
2038                              'name' => 'p',
2039                              'type' => HTML5::STARTTAG,
2040                              'attr' => array()
2041                          ));
2042  
2043                          /* Act as if a start tag token with the tag name "label"
2044                          had been seen. */
2045                          $this->inBody(array(
2046                              'name' => 'label',
2047                              'type' => HTML5::STARTTAG,
2048                              'attr' => array()
2049                          ));
2050  
2051                          /* Act as if a stream of character tokens had been seen. */
2052                          $this->insertText('This is a searchable index. '.
2053                          'Insert your search keywords here: ');
2054  
2055                          /* Act as if a start tag token with the tag name "input"
2056                          had been seen, with all the attributes from the "isindex"
2057                          token, except with the "name" attribute set to the value
2058                          "isindex" (ignoring any explicit "name" attribute). */
2059                          $attr = $token['attr'];
2060                          $attr[] = array('name' => 'name', 'value' => 'isindex');
2061  
2062                          $this->inBody(array(
2063                              'name' => 'input',
2064                              'type' => HTML5::STARTTAG,
2065                              'attr' => $attr
2066                          ));
2067  
2068                          /* Act as if a stream of character tokens had been seen
2069                          (see below for what they should say). */
2070                          $this->insertText('This is a searchable index. '.
2071                          'Insert your search keywords here: ');
2072  
2073                          /* Act as if an end tag token with the tag name "label"
2074                          had been seen. */
2075                          $this->inBody(array(
2076                              'name' => 'label',
2077                              'type' => HTML5::ENDTAG
2078                          ));
2079  
2080                          /* Act as if an end tag token with the tag name "p" had
2081                          been seen. */
2082                          $this->inBody(array(
2083                              'name' => 'p',
2084                              'type' => HTML5::ENDTAG
2085                          ));
2086  
2087                          /* Act as if a start tag token with the tag name "hr" had
2088                          been seen. */
2089                          $this->inBody(array(
2090                              'name' => 'hr',
2091                              'type' => HTML5::ENDTAG
2092                          ));
2093  
2094                          /* Act as if an end tag token with the tag name "form" had
2095                          been seen. */
2096                          $this->inBody(array(
2097                              'name' => 'form',
2098                              'type' => HTML5::ENDTAG
2099                          ));
2100                      }
2101                  break;
2102  
2103                  /* A start tag whose tag name is "textarea" */
2104                  case 'textarea':
2105                      $this->insertElement($token);
2106  
2107                      /* Switch the tokeniser's content model flag to the
2108                      RCDATA state. */
2109                      return HTML5::RCDATA;
2110                  break;
2111  
2112                  /* A start tag whose tag name is one of: "iframe", "noembed",
2113                  "noframes" */
2114                  case 'iframe': case 'noembed': case 'noframes':
2115                      $this->insertElement($token);
2116  
2117                      /* Switch the tokeniser's content model flag to the CDATA state. */
2118                      return HTML5::CDATA;
2119                  break;
2120  
2121                  /* A start tag whose tag name is "select" */
2122                  case 'select':
2123                      /* Reconstruct the active formatting elements, if any. */
2124                      $this->reconstructActiveFormattingElements();
2125  
2126                      /* Insert an HTML element for the token. */
2127                      $this->insertElement($token);
2128  
2129                      /* Change the insertion mode to "in select". */
2130                      $this->mode = self::IN_SELECT;
2131                  break;
2132  
2133                  /* A start or end tag whose tag name is one of: "caption", "col",
2134                  "colgroup", "frame", "frameset", "head", "option", "optgroup",
2135                  "tbody", "td", "tfoot", "th", "thead", "tr". */
2136                  case 'caption': case 'col': case 'colgroup': case 'frame':
2137                  case 'frameset': case 'head': case 'option': case 'optgroup':
2138                  case 'tbody': case 'td': case 'tfoot': case 'th': case 'thead':
2139                  case 'tr':
2140                      // Parse error. Ignore the token.
2141                  break;
2142  
2143                  /* A start or end tag whose tag name is one of: "event-source",
2144                  "section", "nav", "article", "aside", "header", "footer",
2145                  "datagrid", "command" */
2146                  case 'event-source': case 'section': case 'nav': case 'article':
2147                  case 'aside': case 'header': case 'footer': case 'datagrid':
2148                  case 'command':
2149                      // Work in progress!
2150                  break;
2151  
2152                  /* A start tag token not covered by the previous entries */
2153                  default:
2154                      /* Reconstruct the active formatting elements, if any. */
2155                      $this->reconstructActiveFormattingElements();
2156  
2157                      $this->insertElement($token, true, true);
2158                  break;
2159              }
2160              break;
2161  
2162              case HTML5::ENDTAG:
2163              switch($token['name']) {
2164                  /* An end tag with the tag name "body" */
2165                  case 'body':
2166                      /* If the second element in the stack of open elements is
2167                      not a body element, this is a parse error. Ignore the token.
2168                      (innerHTML case) */
2169                      if(count($this->stack) < 2 || $this->stack[1]->nodeName !== 'body') {
2170                          // Ignore.
2171  
2172                      /* If the current node is not the body element, then this
2173                      is a parse error. */
2174                      } elseif(end($this->stack)->nodeName !== 'body') {
2175                          // Parse error.
2176                      }
2177  
2178                      /* Change the insertion mode to "after body". */
2179                      $this->mode = self::AFTER_BODY;
2180                  break;
2181  
2182                  /* An end tag with the tag name "html" */
2183                  case 'html':
2184                      /* Act as if an end tag with tag name "body" had been seen,
2185                      then, if that token wasn't ignored, reprocess the current
2186                      token. */
2187                      $this->inBody(array(
2188                          'name' => 'body',
2189                          'type' => HTML5::ENDTAG
2190                      ));
2191  
2192                      return $this->afterBody($token);
2193                  break;
2194  
2195                  /* An end tag whose tag name is one of: "address", "blockquote",
2196                  "center", "dir", "div", "dl", "fieldset", "listing", "menu",
2197                  "ol", "pre", "ul" */
2198                  case 'address': case 'blockquote': case 'center': case 'dir':
2199                  case 'div': case 'dl': case 'fieldset': case 'listing':
2200                  case 'menu': case 'ol': case 'pre': case 'ul':
2201                      /* If the stack of open elements has an element in scope
2202                      with the same tag name as that of the token, then generate
2203                      implied end tags. */
2204                      if($this->elementInScope($token['name'])) {
2205                          $this->generateImpliedEndTags();
2206  
2207                          /* Now, if the current node is not an element with
2208                          the same tag name as that of the token, then this
2209                          is a parse error. */
2210                          // w/e
2211  
2212                          /* If the stack of open elements has an element in
2213                          scope with the same tag name as that of the token,
2214                          then pop elements from this stack until an element
2215                          with that tag name has been popped from the stack. */
2216                          for($n = count($this->stack) - 1; $n >= 0; $n--) {
2217                              if($this->stack[$n]->nodeName === $token['name']) {
2218                                  $n = -1;
2219                              }
2220  
2221                              array_pop($this->stack);
2222                          }
2223                      }
2224                  break;
2225  
2226                  /* An end tag whose tag name is "form" */
2227                  case 'form':
2228                      /* If the stack of open elements has an element in scope
2229                      with the same tag name as that of the token, then generate
2230                      implied    end tags. */
2231                      if($this->elementInScope($token['name'])) {
2232                          $this->generateImpliedEndTags();
2233  
2234                      } 
2235  
2236                      if(end($this->stack)->nodeName !== $token['name']) {
2237                          /* Now, if the current node is not an element with the
2238                          same tag name as that of the token, then this is a parse
2239                          error. */
2240                          // w/e
2241  
2242                      } else {
2243                          /* Otherwise, if the current node is an element with
2244                          the same tag name as that of the token pop that element
2245                          from the stack. */
2246                          array_pop($this->stack);
2247                      }
2248  
2249                      /* In any case, set the form element pointer to null. */
2250                      $this->form_pointer = null;
2251                  break;
2252  
2253                  /* An end tag whose tag name is "p" */
2254                  case 'p':
2255                      /* If the stack of open elements has a p element in scope,
2256                      then generate implied end tags, except for p elements. */
2257                      if($this->elementInScope('p')) {
2258                          $this->generateImpliedEndTags(array('p'));
2259  
2260                          /* If the current node is not a p element, then this is
2261                          a parse error. */
2262                          // k
2263  
2264                          /* If the stack of open elements has a p element in
2265                          scope, then pop elements from this stack until the stack
2266                          no longer has a p element in scope. */
2267                          for($n = count($this->stack) - 1; $n >= 0; $n--) {
2268                              if($this->elementInScope('p')) {
2269                                  array_pop($this->stack);
2270  
2271                              } else {
2272                                  break;
2273                              }
2274                          }
2275                      }
2276                  break;
2277  
2278                  /* An end tag whose tag name is "dd", "dt", or "li" */
2279                  case 'dd': case 'dt': case 'li':
2280                      /* If the stack of open elements has an element in scope
2281                      whose tag name matches the tag name of the token, then
2282                      generate implied end tags, except for elements with the
2283                      same tag name as the token. */
2284                      if($this->elementInScope($token['name'])) {
2285                          $this->generateImpliedEndTags(array($token['name']));
2286  
2287                          /* If the current node is not an element with the same
2288                          tag name as the token, then this is a parse error. */
2289                          // w/e
2290  
2291                          /* If the stack of open elements has an element in scope
2292                          whose tag name matches the tag name of the token, then
2293                          pop elements from this stack until an element with that
2294                          tag name has been popped from the stack. */
2295                          for($n = count($this->stack) - 1; $n >= 0; $n--) {
2296                              if($this->stack[$n]->nodeName === $token['name']) {
2297                                  $n = -1;
2298                              }
2299  
2300                              array_pop($this->stack);
2301                          }
2302                      }
2303                  break;
2304  
2305                  /* An end tag whose tag name is one of: "h1", "h2", "h3", "h4",
2306                  "h5", "h6" */
2307                  case 'h1': case 'h2': case 'h3': case 'h4': case 'h5': case 'h6':
2308                      $elements = array('h1', 'h2', 'h3', 'h4', 'h5', 'h6');
2309  
2310                      /* If the stack of open elements has in scope an element whose
2311                      tag name is one of "h1", "h2", "h3", "h4", "h5", or "h6", then
2312                      generate implied end tags. */
2313                      if($this->elementInScope($elements)) {
2314                          $this->generateImpliedEndTags();
2315  
2316                          /* Now, if the current node is not an element with the same
2317                          tag name as that of the token, then this is a parse error. */
2318                          // w/e
2319  
2320                          /* If the stack of open elements has in scope an element
2321                          whose tag name is one of "h1", "h2", "h3", "h4", "h5", or
2322                          "h6", then pop elements from the stack until an element
2323                          with one of those tag names has been popped from the stack. */
2324                          while($this->elementInScope($elements)) {
2325                              array_pop($this->stack);
2326                          }
2327                      }
2328                  break;
2329  
2330                  /* An end tag whose tag name is one of: "a", "b", "big", "em",
2331                  "font", "i", "nobr", "s", "small", "strike", "strong", "tt", "u" */
2332                  case 'a': case 'b': case 'big': case 'em': case 'font':
2333                  case 'i': case 'nobr': case 's': case 'small': case 'strike':
2334                  case 'strong': case 'tt': case 'u':
2335                      /* 1. Let the formatting element be the last element in
2336                      the list of active formatting elements that:
2337                          * is between the end of the list and the last scope
2338                          marker in the list, if any, or the start of the list
2339                          otherwise, and
2340                          * has the same tag name as the token.
2341                      */
2342                      while(true) {
2343                          for($a = count($this->a_formatting) - 1; $a >= 0; $a--) {
2344                              if($this->a_formatting[$a] === self::MARKER) {
2345                                  break;
2346  
2347                              } elseif($this->a_formatting[$a]->tagName === $token['name']) {
2348                                  $formatting_element = $this->a_formatting[$a];
2349                                  $in_stack = in_array($formatting_element, $this->stack, true);
2350                                  $fe_af_pos = $a;
2351                                  break;
2352                              }
2353                          }
2354  
2355                          /* If there is no such node, or, if that node is
2356                          also in the stack of open elements but the element
2357                          is not in scope, then this is a parse error. Abort
2358                          these steps. The token is ignored. */
2359                          if(!isset($formatting_element) || ($in_stack &&
2360                          !$this->elementInScope($token['name']))) {
2361                              break;
2362  
2363                          /* Otherwise, if there is such a node, but that node
2364                          is not in the stack of open elements, then this is a
2365                          parse error; remove the element from the list, and
2366                          abort these steps. */
2367                          } elseif(isset($formatting_element) && !$in_stack) {
2368                              unset($this->a_formatting[$fe_af_pos]);
2369                              $this->a_formatting = array_merge($this->a_formatting);
2370                              break;
2371                          }
2372  
2373                          /* 2. Let the furthest block be the topmost node in the
2374                          stack of open elements that is lower in the stack
2375                          than the formatting element, and is not an element in
2376                          the phrasing or formatting categories. There might
2377                          not be one. */
2378                          $fe_s_pos = array_search($formatting_element, $this->stack, true);
2379                          $length = count($this->stack);
2380  
2381                          for($s = $fe_s_pos + 1; $s < $length; $s++) {
2382                              $category = $this->getElementCategory($this->stack[$s]->nodeName);
2383  
2384                              if($category !== self::PHRASING && $category !== self::FORMATTING) {
2385                                  $furthest_block = $this->stack[$s];
2386                              }
2387                          }
2388  
2389                          /* 3. If there is no furthest block, then the UA must
2390                          skip the subsequent steps and instead just pop all
2391                          the nodes from the bottom of the stack of open
2392                          elements, from the current node up to the formatting
2393                          element, and remove the formatting element from the
2394                          list of active formatting elements. */
2395                          if(!isset($furthest_block)) {
2396                              for($n = $length - 1; $n >= $fe_s_pos; $n--) {
2397                                  array_pop($this->stack);
2398                              }
2399  
2400                              unset($this->a_formatting[$fe_af_pos]);
2401                              $this->a_formatting = array_merge($this->a_formatting);
2402                              break;
2403                          }
2404  
2405                          /* 4. Let the common ancestor be the element
2406                          immediately above the formatting element in the stack
2407                          of open elements. */
2408                          $common_ancestor = $this->stack[$fe_s_pos - 1];
2409  
2410                          /* 5. If the furthest block has a parent node, then
2411                          remove the furthest block from its parent node. */
2412                          if($furthest_block->parentNode !== null) {
2413                              $furthest_block->parentNode->removeChild($furthest_block);
2414                          }
2415  
2416                          /* 6. Let a bookmark note the position of the
2417                          formatting element in the list of active formatting
2418                          elements relative to the elements on either side
2419                          of it in the list. */
2420                          $bookmark = $fe_af_pos;
2421  
2422                          /* 7. Let node and last node  be the furthest block.
2423                          Follow these steps: */
2424                          $node = $furthest_block;
2425                          $last_node = $furthest_block;
2426  
2427                          while(true) {
2428                              for($n = array_search($node, $this->stack, true) - 1; $n >= 0; $n--) {
2429                                  /* 7.1 Let node be the element immediately
2430                                  prior to node in the stack of open elements. */
2431                                  $node = $this->stack[$n];
2432  
2433                                  /* 7.2 If node is not in the list of active
2434                                  formatting elements, then remove node from
2435                                  the stack of open elements and then go back
2436                                  to step 1. */
2437                                  if(!in_array($node, $this->a_formatting, true)) {
2438                                      unset($this->stack[$n]);
2439                                      $this->stack = array_merge($this->stack);
2440  
2441                                  } else {
2442                                      break;
2443                                  }
2444                              }
2445  
2446                              /* 7.3 Otherwise, if node is the formatting
2447                              element, then go to the next step in the overall
2448                              algorithm. */
2449                              if($node === $formatting_element) {
2450                                  break;
2451  
2452                              /* 7.4 Otherwise, if last node is the furthest
2453                              block, then move the aforementioned bookmark to
2454                              be immediately after the node in the list of
2455                              active formatting elements. */
2456                              } elseif($last_node === $furthest_block) {
2457                                  $bookmark = array_search($node, $this->a_formatting, true) + 1;
2458                              }
2459  
2460                              /* 7.5 If node has any children, perform a
2461                              shallow clone of node, replace the entry for
2462                              node in the list of active formatting elements
2463                              with an entry for the clone, replace the entry
2464                              for node in the stack of open elements with an
2465                              entry for the clone, and let node be the clone. */
2466                              if($node->hasChildNodes()) {
2467                                  $clone = $node->cloneNode();
2468                                  $s_pos = array_search($node, $this->stack, true);
2469                                  $a_pos = array_search($node, $this->a_formatting, true);
2470  
2471                                  $this->stack[$s_pos] = $clone;
2472                                  $this->a_formatting[$a_pos] = $clone;
2473                                  $node = $clone;
2474                              }
2475  
2476                              /* 7.6 Insert last node into node, first removing
2477                              it from its previous parent node if any. */
2478                              if($last_node->parentNode !== null) {
2479                                  $last_node->parentNode->removeChild($last_node);
2480                              }
2481  
2482                              $node->appendChild($last_node);
2483  
2484                              /* 7.7 Let last node be node. */
2485                              $last_node = $node;
2486                          }
2487  
2488                          /* 8. Insert whatever last node ended up being in
2489                          the previous step into the common ancestor node,
2490                          first removing it from its previous parent node if
2491                          any. */
2492                          if($last_node->parentNode !== null) {
2493                              $last_node->parentNode->removeChild($last_node);
2494                          }
2495  
2496                          $common_ancestor->appendChild($last_node);
2497  
2498                          /* 9. Perform a shallow clone of the formatting
2499                          element. */
2500                          $clone = $formatting_element->cloneNode();
2501  
2502                          /* 10. Take all of the child nodes of the furthest
2503                          block and append them to the clone created in the
2504                          last step. */
2505                          while($furthest_block->hasChildNodes()) {
2506                              $child = $furthest_block->firstChild;
2507                              $furthest_block->removeChild($child);
2508                              $clone->appendChild($child);
2509                          }
2510  
2511                          /* 11. Append that clone to the furthest block. */
2512                          $furthest_block->appendChild($clone);
2513  
2514                          /* 12. Remove the formatting element from the list
2515                          of active formatting elements, and insert the clone
2516                          into the list of active formatting elements at the
2517                          position of the aforementioned bookmark. */
2518                          $fe_af_pos = array_search($formatting_element, $this->a_formatting, true);
2519                          unset($this->a_formatting[$fe_af_pos]);
2520                          $this->a_formatting = array_merge($this->a_formatting);
2521  
2522                          $af_part1 = array_slice($this->a_formatting, 0, $bookmark - 1);
2523                          $af_part2 = array_slice($this->a_formatting, $bookmark, count($this->a_formatting));
2524                          $this->a_formatting = array_merge($af_part1, array($clone), $af_part2);
2525  
2526                          /* 13. Remove the formatting element from the stack
2527                          of open elements, and insert the clone into the stack
2528                          of open elements immediately after (i.e. in a more
2529                          deeply nested position than) the position of the
2530                          furthest block in that stack. */
2531                          $fe_s_pos = array_search($formatting_element, $this->stack, true);
2532                          $fb_s_pos = array_search($furthest_block, $this->stack, true);
2533                          unset($this->stack[$fe_s_pos]);
2534  
2535                          $s_part1 = array_slice($this->stack, 0, $fb_s_pos);
2536                          $s_part2 = array_slice($this->stack, $fb_s_pos + 1, count($this->stack));
2537                          $this->stack = array_merge($s_part1, array($clone), $s_part2);
2538  
2539                          /* 14. Jump back to step 1 in this series of steps. */
2540                          unset($formatting_element, $fe_af_pos, $fe_s_pos, $furthest_block);
2541                      }
2542                  break;
2543  
2544                  /* An end tag token whose tag name is one of: "button",
2545                  "marquee", "object" */
2546                  case 'button': case 'marquee': case 'object':
2547                      /* If the stack of open elements has an element in scope whose
2548                      tag name matches the tag name of the token, then generate implied
2549                      tags. */
2550                      if($this->elementInScope($token['name'])) {
2551                          $this->generateImpliedEndTags();
2552  
2553                          /* Now, if the current node is not an element with the same
2554                          tag name as the token, then this is a parse error. */
2555                          // k
2556  
2557                          /* Now, if the stack of open elements has an element in scope
2558                          whose tag name matches the tag name of the token, then pop
2559                          elements from the stack until that element has been popped from
2560                          the stack, and clear the list of active formatting elements up
2561                          to the last marker. */
2562                          for($n = count($this->stack) - 1; $n >= 0; $n--) {
2563                              if($this->stack[$n]->nodeName === $token['name']) {
2564                                  $n = -1;
2565                              }
2566  
2567                              array_pop($this->stack);
2568                          }
2569  
2570                          $marker = end(array_keys($this->a_formatting, self::MARKER, true));
2571  
2572                          for($n = count($this->a_formatting) - 1; $n > $marker; $n--) {
2573                              array_pop($this->a_formatting);
2574                          }
2575                      }
2576                  break;
2577  
2578                  /* Or an end tag whose tag name is one of: "area", "basefont",
2579                  "bgsound", "br", "embed", "hr", "iframe", "image", "img",
2580                  "input", "isindex", "noembed", "noframes", "param", "select",
2581                  "spacer", "table", "textarea", "wbr" */
2582                  case 'area': case 'basefont': case 'bgsound': case 'br':
2583                  case 'embed': case 'hr': case 'iframe': case 'image':
2584                  case 'img': case 'input': case 'isindex': case 'noembed':
2585                  case 'noframes': case 'param': case 'select': case 'spacer':
2586                  case 'table': case 'textarea': case 'wbr':
2587                      // Parse error. Ignore the token.
2588                  break;
2589  
2590                  /* An end tag token not covered by the previous entries */
2591                  default:
2592                      for($n = count($this->stack) - 1; $n >= 0; $n--) {
2593                          /* Initialise node to be the current node (the bottommost
2594                          node of the stack). */
2595                          $node = end($this->stack);
2596  
2597                          /* If node has the same tag name as the end tag token,
2598                          then: */
2599                          if($token['name'] === $node->nodeName) {
2600                              /* Generate implied end tags. */
2601                              $this->generateImpliedEndTags();
2602  
2603                              /* If the tag name of the end tag token does not
2604                              match the tag name of the current node, this is a
2605                              parse error. */
2606                              // k
2607  
2608                              /* Pop all the nodes from the current node up to
2609                              node, including node, then stop this algorithm. */
2610                              for($x = count($this->stack) - $n; $x >= $n; $x--) {
2611                                  array_pop($this->stack);
2612                              }
2613                                      
2614                          } else {
2615                              $category = $this->getElementCategory($node);
2616  
2617                              if($category !== self::SPECIAL && $category !== self::SCOPING) {
2618                                  /* Otherwise, if node is in neither the formatting
2619                                  category nor the phrasing category, then this is a
2620                                  parse error. Stop this algorithm. The end tag token
2621                                  is ignored. */
2622                                  return false;
2623                              }
2624                          }
2625                      }
2626                  break;
2627              }
2628              break;
2629          }
2630      }
2631  
2632      private function inTable($token) {
2633          $clear = array('html', 'table');
2634  
2635          /* A character token that is one of one of U+0009 CHARACTER TABULATION,
2636          U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
2637          or U+0020 SPACE */
2638          if($token['type'] === HTML5::CHARACTR &&
2639          preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) {
2640              /* Append the character to the current node. */
2641              $text = $this->dom->createTextNode($token['data']);
2642              end($this->stack)->appendChild($text);
2643  
2644          /* A comment token */
2645          } elseif($token['type'] === HTML5::COMMENT) {
2646              /* Append a Comment node to the current node with the data
2647              attribute set to the data given in the comment token. */
2648              $comment = $this->dom->createComment($token['data']);
2649              end($this->stack)->appendChild($comment);
2650  
2651          /* A start tag whose tag name is "caption" */
2652          } elseif($token['type'] === HTML5::STARTTAG &&
2653          $token['name'] === 'caption') {
2654              /* Clear the stack back to a table context. */
2655              $this->clearStackToTableContext($clear);
2656  
2657              /* Insert a marker at the end of the list of active
2658              formatting elements. */
2659              $this->a_formatting[] = self::MARKER;
2660  
2661              /* Insert an HTML element for the token, then switch the
2662              insertion mode to "in caption". */
2663              $this->insertElement($token);
2664              $this->mode = self::IN_CAPTION;
2665  
2666          /* A start tag whose tag name is "colgroup" */
2667          } elseif($token['type'] === HTML5::STARTTAG &&
2668          $token['name'] === 'colgroup') {
2669              /* Clear the stack back to a table context. */
2670              $this->clearStackToTableContext($clear);
2671  
2672              /* Insert an HTML element for the token, then switch the
2673              insertion mode to "in column group". */
2674              $this->insertElement($token);
2675              $this->mode = self::IN_CGROUP;
2676  
2677          /* A start tag whose tag name is "col" */
2678          } elseif($token['type'] === HTML5::STARTTAG &&
2679          $token['name'] === 'col') {
2680              $this->inTable(array(
2681                  'name' => 'colgroup',
2682                  'type' => HTML5::STARTTAG,
2683                  'attr' => array()
2684              ));
2685  
2686              $this->inColumnGroup($token);
2687  
2688          /* A start tag whose tag name is one of: "tbody", "tfoot", "thead" */
2689          } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'],
2690          array('tbody', 'tfoot', 'thead'))) {
2691              /* Clear the stack back to a table context. */
2692              $this->clearStackToTableContext($clear);
2693  
2694              /* Insert an HTML element for the token, then switch the insertion
2695              mode to "in table body". */
2696              $this->insertElement($token);
2697              $this->mode = self::IN_TBODY;
2698  
2699          /* A start tag whose tag name is one of: "td", "th", "tr" */
2700          } elseif($token['type'] === HTML5::STARTTAG &&
2701          in_array($token['name'], array('td', 'th', 'tr'))) {
2702              /* Act as if a start tag token with the tag name "tbody" had been
2703              seen, then reprocess the current token. */
2704              $this->inTable(array(
2705                  'name' => 'tbody',
2706                  'type' => HTML5::STARTTAG,
2707                  'attr' => array()
2708              ));
2709  
2710              return $this->inTableBody($token);
2711  
2712          /* A start tag whose tag name is "table" */
2713          } elseif($token['type'] === HTML5::STARTTAG &&
2714          $token['name'] === 'table') {
2715              /* Parse error. Act as if an end tag token with the tag name "table"
2716              had been seen, then, if that token wasn't ignored, reprocess the
2717              current token. */
2718              $this->inTable(array(
2719                  'name' => 'table',
2720                  'type' => HTML5::ENDTAG
2721              ));
2722  
2723              return $this->mainPhase($token);
2724  
2725          /* An end tag whose tag name is "table" */
2726          } elseif($token['type'] === HTML5::ENDTAG &&
2727          $token['name'] === 'table') {
2728              /* If the stack of open elements does not have an element in table
2729              scope with the same tag name as the token, this is a parse error.
2730              Ignore the token. (innerHTML case) */
2731              if(!$this->elementInScope($token['name'], true)) {
2732                  return false;
2733  
2734              /* Otherwise: */
2735              } else {
2736                  /* Generate implied end tags. */
2737                  $this->generateImpliedEndTags();
2738  
2739                  /* Now, if the current node is not a table element, then this
2740                  is a parse error. */
2741                  // w/e
2742  
2743                  /* Pop elements from this stack until a table element has been
2744                  popped from the stack. */
2745                  while(true) {
2746                      $current = end($this->stack)->nodeName;
2747                      array_pop($this->stack);
2748  
2749                      if($current === 'table') {
2750                          break;
2751                      }
2752                  }
2753  
2754                  /* Reset the insertion mode appropriately. */
2755                  $this->resetInsertionMode();
2756              }
2757  
2758          /* An end tag whose tag name is one of: "body", "caption", "col",
2759          "colgroup", "html", "tbody", "td", "tfoot", "th", "thead", "tr" */
2760          } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'],
2761          array('body', 'caption', 'col', 'colgroup', 'html', 'tbody', 'td',
2762          'tfoot', 'th', 'thead', 'tr'))) {
2763              // Parse error. Ignore the token.
2764  
2765          /* Anything else */
2766          } else {
2767              /* Parse error. Process the token as if the insertion mode was "in
2768              body", with the following exception: */
2769  
2770              /* If the current node is a table, tbody, tfoot, thead, or tr
2771              element, then, whenever a node would be inserted into the current
2772              node, it must instead be inserted into the foster parent element. */
2773              if(in_array(end($this->stack)->nodeName,
2774              array('table', 'tbody', 'tfoot', 'thead', 'tr'))) {
2775                  /* The foster parent element is the parent element of the last
2776                  table element in the stack of open elements, if there is a
2777                  table element and it has such a parent element. If there is no
2778                  table element in the stack of open elements (innerHTML case),
2779                  then the foster parent element is the first element in the
2780                  stack of open elements (the html  element). Otherwise, if there
2781                  is a table element in the stack of open elements, but the last
2782                  table element in the stack of open elements has no parent, or
2783                  its parent node is not an element, then the foster parent
2784                  element is the element before the last table element in the
2785                  stack of open elements. */
2786                  for($n = count($this->stack) - 1; $n >= 0; $n--) {
2787                      if($this->stack[$n]->nodeName === 'table') {
2788                          $table = $this->stack[$n];
2789                          break;
2790                      }
2791                  }
2792  
2793                  if(isset($table) && $table->parentNode !== null) {
2794                      $this->foster_parent = $table->parentNode;
2795  
2796                  } elseif(!isset($table)) {
2797                      $this->foster_parent = $this->stack[0];
2798  
2799                  } elseif(isset($table) && ($table->parentNode === null ||
2800                  $table->parentNode->nodeType !== XML_ELEMENT_NODE)) {
2801                      $this->foster_parent = $this->stack[$n - 1];
2802                  }
2803              }
2804  
2805              $this->inBody($token);
2806          }
2807      }
2808  
2809      private function inCaption($token) {
2810          /* An end tag whose tag name is "caption" */
2811          if($token['type'] === HTML5::ENDTAG && $token['name'] === 'caption') {
2812              /* If the stack of open elements does not have an element in table
2813              scope with the same tag name as the token, this is a parse error.
2814              Ignore the token. (innerHTML case) */
2815              if(!$this->elementInScope($token['name'], true)) {
2816                  // Ignore
2817  
2818              /* Otherwise: */
2819              } else {
2820                  /* Generate implied end tags. */
2821                  $this->generateImpliedEndTags();
2822  
2823                  /* Now, if the current node is not a caption element, then this
2824                  is a parse error. */
2825                  // w/e
2826  
2827                  /* Pop elements from this stack until a caption element has
2828                  been popped from the stack. */
2829                  while(true) {
2830                      $node = end($this->stack)->nodeName;
2831                      array_pop($this->stack);
2832  
2833                      if($node === 'caption') {
2834                          break;
2835                      }
2836                  }
2837  
2838                  /* Clear the list of active formatting elements up to the last
2839                  marker. */
2840                  $this->clearTheActiveFormattingElementsUpToTheLastMarker();
2841  
2842                  /* Switch the insertion mode to "in table". */
2843                  $this->mode = self::IN_TABLE;
2844              }
2845  
2846          /* A start tag whose tag name is one of: "caption", "col", "colgroup",
2847          "tbody", "td", "tfoot", "th", "thead", "tr", or an end tag whose tag
2848          name is "table" */
2849          } elseif(($token['type'] === HTML5::STARTTAG && in_array($token['name'],
2850          array('caption', 'col', 'colgroup', 'tbody', 'td', 'tfoot', 'th',
2851          'thead', 'tr'))) || ($token['type'] === HTML5::ENDTAG &&
2852          $token['name'] === 'table')) {
2853              /* Parse error. Act as if an end tag with the tag name "caption"
2854              had been seen, then, if that token wasn't ignored, reprocess the
2855              current token. */
2856              $this->inCaption(array(
2857                  'name' => 'caption',
2858                  'type' => HTML5::ENDTAG
2859              ));
2860  
2861              return $this->inTable($token);
2862  
2863          /* An end tag whose tag name is one of: "body", "col", "colgroup",
2864          "html", "tbody", "td", "tfoot", "th", "thead", "tr" */
2865          } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'],
2866          array('body', 'col', 'colgroup', 'html', 'tbody', 'tfoot', 'th',
2867          'thead', 'tr'))) {
2868              // Parse error. Ignore the token.
2869  
2870          /* Anything else */
2871          } else {
2872              /* Process the token as if the insertion mode was "in body". */
2873              $this->inBody($token);
2874          }
2875      }
2876  
2877      private function inColumnGroup($token) {
2878          /* A character token that is one of one of U+0009 CHARACTER TABULATION,
2879          U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
2880          or U+0020 SPACE */
2881          if($token['type'] === HTML5::CHARACTR &&
2882          preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) {
2883              /* Append the character to the current node. */
2884              $text = $this->dom->createTextNode($token['data']);
2885              end($this->stack)->appendChild($text);
2886  
2887          /* A comment token */
2888          } elseif($token['type'] === HTML5::COMMENT) {
2889              /* Append a Comment node to the current node with the data
2890              attribute set to the data given in the comment token. */
2891              $comment = $this->dom->createComment($token['data']);
2892              end($this->stack)->appendChild($comment);
2893  
2894          /* A start tag whose tag name is "col" */
2895          } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'col') {
2896              /* Insert a col element for the token. Immediately pop the current
2897              node off the stack of open elements. */
2898              $this->insertElement($token);
2899              array_pop($this->stack);
2900  
2901          /* An end tag whose tag name is "colgroup" */
2902          } elseif($token['type'] === HTML5::ENDTAG &&
2903          $token['name'] === 'colgroup') {
2904              /* If the current node is the root html element, then this is a
2905              parse error, ignore the token. (innerHTML case) */
2906              if(end($this->stack)->nodeName === 'html') {
2907                  // Ignore
2908  
2909              /* Otherwise, pop the current node (which will be a colgroup
2910              element) from the stack of open elements. Switch the insertion
2911              mode to "in table". */
2912              } else {
2913                  array_pop($this->stack);
2914                  $this->mode = self::IN_TABLE;
2915              }
2916  
2917          /* An end tag whose tag name is "col" */
2918          } elseif($token['type'] === HTML5::ENDTAG && $token['name'] === 'col') {
2919              /* Parse error. Ignore the token. */
2920  
2921          /* Anything else */
2922          } else {
2923              /* Act as if an end tag with the tag name "colgroup" had been seen,
2924              and then, if that token wasn't ignored, reprocess the current token. */
2925              $this->inColumnGroup(array(
2926                  'name' => 'colgroup',
2927                  'type' => HTML5::ENDTAG
2928              ));
2929  
2930              return $this->inTable($token);
2931          }
2932      }
2933  
2934      private function inTableBody($token) {
2935          $clear = array('tbody', 'tfoot', 'thead', 'html');
2936  
2937          /* A start tag whose tag name is "tr" */
2938          if($token['type'] === HTML5::STARTTAG && $token['name'] === 'tr') {
2939              /* Clear the stack back to a table body context. */
2940              $this->clearStackToTableContext($clear);
2941  
2942              /* Insert a tr element for the token, then switch the insertion
2943              mode to "in row". */
2944              $this->insertElement($token);
2945              $this->mode = self::IN_ROW;
2946  
2947          /* A start tag whose tag name is one of: "th", "td" */
2948          } elseif($token['type'] === HTML5::STARTTAG &&
2949          ($token['name'] === 'th' ||    $token['name'] === 'td')) {
2950              /* Parse error. Act as if a start tag with the tag name "tr" had
2951              been seen, then reprocess the current token. */
2952              $this->inTableBody(array(
2953                  'name' => 'tr',
2954                  'type' => HTML5::STARTTAG,
2955                  'attr' => array()
2956              ));
2957  
2958              return $this->inRow($token);
2959  
2960          /* An end tag whose tag name is one of: "tbody", "tfoot", "thead" */
2961          } elseif($token['type'] === HTML5::ENDTAG &&
2962          in_array($token['name'], array('tbody', 'tfoot', 'thead'))) {
2963              /* If the stack of open elements does not have an element in table
2964              scope with the same tag name as the token, this is a parse error.
2965              Ignore the token. */
2966              if(!$this->elementInScope($token['name'], true)) {
2967                  // Ignore
2968  
2969              /* Otherwise: */
2970              } else {
2971                  /* Clear the stack back to a table body context. */
2972                  $this->clearStackToTableContext($clear);
2973  
2974                  /* Pop the current node from the stack of open elements. Switch
2975                  the insertion mode to "in table". */
2976                  array_pop($this->stack);
2977                  $this->mode = self::IN_TABLE;
2978              }
2979  
2980          /* A start tag whose tag name is one of: "caption", "col", "colgroup",
2981          "tbody", "tfoot", "thead", or an end tag whose tag name is "table" */
2982          } elseif(($token['type'] === HTML5::STARTTAG && in_array($token['name'],
2983          array('caption', 'col', 'colgroup', 'tbody', 'tfoor', 'thead'))) ||
2984          ($token['type'] === HTML5::STARTTAG && $token['name'] === 'table')) {
2985              /* If the stack of open elements does not have a tbody, thead, or
2986              tfoot element in table scope, this is a parse error. Ignore the
2987              token. (innerHTML case) */
2988              if(!$this->elementInScope(array('tbody', 'thead', 'tfoot'), true)) {
2989                  // Ignore.
2990  
2991              /* Otherwise: */
2992              } else {
2993                  /* Clear the stack back to a table body context. */
2994                  $this->clearStackToTableContext($clear);
2995  
2996                  /* Act as if an end tag with the same tag name as the current
2997                  node ("tbody", "tfoot", or "thead") had been seen, then
2998                  reprocess the current token. */
2999                  $this->inTableBody(array(
3000                      'name' => end($this->stack)->nodeName,
3001                      'type' => HTML5::ENDTAG
3002                  ));
3003  
3004                  return $this->mainPhase($token);
3005              }
3006  
3007          /* An end tag whose tag name is one of: "body", "caption", "col",
3008          "colgroup", "html", "td", "th", "tr" */
3009          } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'],
3010          array('body', 'caption', 'col', 'colgroup', 'html', 'td', 'th', 'tr'))) {
3011              /* Parse error. Ignore the token. */
3012  
3013          /* Anything else */
3014          } else {
3015              /* Process the token as if the insertion mode was "in table". */
3016              $this->inTable($token);
3017          }
3018      }
3019  
3020      private function inRow($token) {
3021          $clear = array('tr', 'html');
3022  
3023          /* A start tag whose tag name is one of: "th", "td" */
3024          if($token['type'] === HTML5::STARTTAG &&
3025          ($token['name'] === 'th' || $token['name'] === 'td')) {
3026              /* Clear the stack back to a table row context. */
3027              $this->clearStackToTableContext($clear);
3028  
3029              /* Insert an HTML element for the token, then switch the insertion
3030              mode to "in cell". */
3031              $this->insertElement($token);
3032              $this->mode = self::IN_CELL;
3033  
3034              /* Insert a marker at the end of the list of active formatting
3035              elements. */
3036              $this->a_formatting[] = self::MARKER;
3037  
3038          /* An end tag whose tag name is "tr" */
3039          } elseif($token['type'] === HTML5::ENDTAG && $token['name'] === 'tr') {
3040              /* If the stack of open elements does not have an element in table
3041              scope with the same tag name as the token, this is a parse error.
3042              Ignore the token. (innerHTML case) */
3043              if(!$this->elementInScope($token['name'], true)) {
3044                  // Ignore.
3045  
3046              /* Otherwise: */
3047              } else {
3048                  /* Clear the stack back to a table row context. */
3049                  $this->clearStackToTableContext($clear);
3050  
3051                  /* Pop the current node (which will be a tr element) from the
3052                  stack of open elements. Switch the insertion mode to "in table
3053                  body". */
3054                  array_pop($this->stack);
3055                  $this->mode = self::IN_TBODY;
3056              }
3057  
3058          /* A start tag whose tag name is one of: "caption", "col", "colgroup",
3059          "tbody", "tfoot", "thead", "tr" or an end tag whose tag name is "table" */
3060          } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'],
3061          array('caption', 'col', 'colgroup', 'tbody', 'tfoot', 'thead', 'tr'))) {
3062              /* Act as if an end tag with the tag name "tr" had been seen, then,
3063              if that token wasn't ignored, reprocess the current token. */
3064              $this->inRow(array(
3065                  'name' => 'tr',
3066                  'type' => HTML5::ENDTAG
3067              ));
3068  
3069              return $this->inCell($token);
3070  
3071          /* An end tag whose tag name is one of: "tbody", "tfoot", "thead" */
3072          } elseif($token['type'] === HTML5::ENDTAG &&
3073          in_array($token['name'], array('tbody', 'tfoot', 'thead'))) {
3074              /* If the stack of open elements does not have an element in table
3075              scope with the same tag name as the token, this is a parse error.
3076              Ignore the token. */
3077              if(!$this->elementInScope($token['name'], true)) {
3078                  // Ignore.
3079  
3080              /* Otherwise: */
3081              } else {
3082                  /* Otherwise, act as if an end tag with the tag name "tr" had
3083                  been seen, then reprocess the current token. */
3084                  $this->inRow(array(
3085                      'name' => 'tr',
3086                      'type' => HTML5::ENDTAG
3087                  ));
3088  
3089                  return $this->inCell($token);
3090              }
3091  
3092          /* An end tag whose tag name is one of: "body", "caption", "col",
3093          "colgroup", "html", "td", "th" */
3094          } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'],
3095          array('body', 'caption', 'col', 'colgroup', 'html', 'td', 'th', 'tr'))) {
3096              /* Parse error. Ignore the token. */
3097  
3098          /* Anything else */
3099          } else {
3100              /* Process the token as if the insertion mode was "in table". */
3101              $this->inTable($token);
3102          }
3103      }
3104  
3105      private function inCell($token) {
3106          /* An end tag whose tag name is one of: "td", "th" */
3107          if($token['type'] === HTML5::ENDTAG &&
3108          ($token['name'] === 'td' || $token['name'] === 'th')) {
3109              /* If the stack of open elements does not have an element in table
3110              scope with the same tag name as that of the token, then this is a
3111              parse error and the token must be ignored. */
3112              if(!$this->elementInScope($token['name'], true)) {
3113                  // Ignore.
3114  
3115              /* Otherwise: */
3116              } else {
3117                  /* Generate implied end tags, except for elements with the same
3118                  tag name as the token. */
3119                  $this->generateImpliedEndTags(array($token['name']));
3120  
3121                  /* Now, if the current node is not an element with the same tag
3122                  name as the token, then this is a parse error. */
3123                  // k
3124  
3125                  /* Pop elements from this stack until an element with the same
3126                  tag name as the token has been popped from the stack. */
3127                  while(true) {
3128                      $node = end($this->stack)->nodeName;
3129                      array_pop($this->stack);
3130  
3131                      if($node === $token['name']) {
3132                          break;
3133                      }
3134                  }
3135  
3136                  /* Clear the list of active formatting elements up to the last
3137                  marker. */
3138                  $this->clearTheActiveFormattingElementsUpToTheLastMarker();
3139  
3140                  /* Switch the insertion mode to "in row". (The current node
3141                  will be a tr element at this point.) */
3142                  $this->mode = self::IN_ROW;
3143              }
3144  
3145          /* A start tag whose tag name is one of: "caption", "col", "colgroup",
3146          "tbody", "td", "tfoot", "th", "thead", "tr" */
3147          } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'],
3148          array('caption', 'col', 'colgroup', 'tbody', 'td', 'tfoot', 'th',
3149          'thead', 'tr'))) {
3150              /* If the stack of open elements does not have a td or th element
3151              in table scope, then this is a parse error; ignore the token.
3152              (innerHTML case) */
3153              if(!$this->elementInScope(array('td', 'th'), true)) {
3154                  // Ignore.
3155  
3156              /* Otherwise, close the cell (see below) and reprocess the current
3157              token. */
3158              } else {
3159                  $this->closeCell();
3160                  return $this->inRow($token);
3161              }
3162  
3163          /* A start tag whose tag name is one of: "caption", "col", "colgroup",
3164          "tbody", "td", "tfoot", "th", "thead", "tr" */
3165          } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'],
3166          array('caption', 'col', 'colgroup', 'tbody', 'td', 'tfoot', 'th',
3167          'thead', 'tr'))) {
3168              /* If the stack of open elements does not have a td or th element
3169              in table scope, then this is a parse error; ignore the token.
3170              (innerHTML case) */
3171              if(!$this->elementInScope(array('td', 'th'), true)) {
3172                  // Ignore.
3173  
3174              /* Otherwise, close the cell (see below) and reprocess the current
3175              token. */
3176              } else {
3177                  $this->closeCell();
3178                  return $this->inRow($token);
3179              }
3180  
3181          /* An end tag whose tag name is one of: "body", "caption", "col",
3182          "colgroup", "html" */
3183          } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'],
3184          array('body', 'caption', 'col', 'colgroup', 'html'))) {
3185              /* Parse error. Ignore the token. */
3186  
3187          /* An end tag whose tag name is one of: "table", "tbody", "tfoot",
3188          "thead", "tr" */
3189          } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'],
3190          array('table', 'tbody', 'tfoot', 'thead', 'tr'))) {
3191              /* If the stack of open elements does not have an element in table
3192              scope with the same tag name as that of the token (which can only
3193              happen for "tbody", "tfoot" and "thead", or, in the innerHTML case),
3194              then this is a parse error and the token must be ignored. */
3195              if(!$this->elementInScope($token['name'], true)) {
3196                  // Ignore.
3197  
3198              /* Otherwise, close the cell (see below) and reprocess the current
3199              token. */
3200              } else {
3201                  $this->closeCell();
3202                  return $this->inRow($token);
3203              }
3204  
3205          /* Anything else */
3206          } else {
3207              /* Process the token as if the insertion mode was "in body". */
3208              $this->inBody($token);
3209          }
3210      }
3211  
3212      private function inSelect($token) {
3213          /* Handle the token as follows: */
3214  
3215          /* A character token */
3216          if($token['type'] === HTML5::CHARACTR) {
3217              /* Append the token's character to the current node. */
3218              $this->insertText($token['data']);
3219  
3220          /* A comment token */
3221          } elseif($token['type'] === HTML5::COMMENT) {
3222              /* Append a Comment node to the current node with the data
3223              attribute set to the data given in the comment token. */
3224              $this->insertComment($token['data']);
3225  
3226          /* A start tag token whose tag name is "option" */
3227          } elseif($token['type'] === HTML5::STARTTAG &&
3228          $token['name'] === 'option') {
3229              /* If the current node is an option element, act as if an end tag
3230              with the tag name "option" had been seen. */
3231              if(end($this->stack)->nodeName === 'option') {
3232                  $this->inSelect(array(
3233                      'name' => 'option',
3234                      'type' => HTML5::ENDTAG
3235                  ));
3236              }
3237  
3238              /* Insert an HTML element for the token. */
3239              $this->insertElement($token);
3240  
3241          /* A start tag token whose tag name is "optgroup" */
3242          } elseif($token['type'] === HTML5::STARTTAG &&
3243          $token['name'] === 'optgroup') {
3244              /* If the current node is an option element, act as if an end tag
3245              with the tag name "option" had been seen. */
3246              if(end($this->stack)->nodeName === 'option') {
3247                  $this->inSelect(array(
3248                      'name' => 'option',
3249                      'type' => HTML5::ENDTAG
3250                  ));
3251              }
3252  
3253              /* If the current node is an optgroup element, act as if an end tag
3254              with the tag name "optgroup" had been seen. */
3255              if(end($this->stack)->nodeName === 'optgroup') {
3256                  $this->inSelect(array(
3257                      'name' => 'optgroup',
3258                      'type' => HTML5::ENDTAG
3259                  ));
3260              }
3261  
3262              /* Insert an HTML element for the token. */
3263              $this->insertElement($token);
3264  
3265          /* An end tag token whose tag name is "optgroup" */
3266          } elseif($token['type'] === HTML5::ENDTAG &&
3267          $token['name'] === 'optgroup') {
3268              /* First, if the current node is an option element, and the node
3269              immediately before it in the stack of open elements is an optgroup
3270              element, then act as if an end tag with the tag name "option" had
3271              been seen. */
3272              $elements_in_stack = count($this->stack);
3273  
3274              if($this->stack[$elements_in_stack - 1]->nodeName === 'option' &&
3275              $this->stack[$elements_in_stack - 2]->nodeName === 'optgroup') {
3276                  $this->inSelect(array(
3277                      'name' => 'option',
3278                      'type' => HTML5::ENDTAG
3279                  ));
3280              }
3281  
3282              /* If the current node is an optgroup element, then pop that node
3283              from the stack of open elements. Otherwise, this is a parse error,
3284              ignore the token. */
3285              if($this->stack[$elements_in_stack - 1] === 'optgroup') {
3286                  array_pop($this->stack);
3287              }
3288  
3289          /* An end tag token whose tag name is "option" */
3290          } elseif($token['type'] === HTML5::ENDTAG &&
3291          $token['name'] === 'option') {
3292              /* If the current node is an option element, then pop that node
3293              from the stack of open elements. Otherwise, this is a parse error,
3294              ignore the token. */
3295              if(end($this->stack)->nodeName === 'option') {
3296                  array_pop($this->stack);
3297              }
3298  
3299          /* An end tag whose tag name is "select" */
3300          } elseif($token['type'] === HTML5::ENDTAG &&
3301          $token['name'] === 'select') {
3302              /* If the stack of open elements does not have an element in table
3303              scope with the same tag name as the token, this is a parse error.
3304              Ignore the token. (innerHTML case) */
3305              if(!$this->elementInScope($token['name'], true)) {
3306                  // w/e
3307  
3308              /* Otherwise: */
3309              } else {
3310                  /* Pop elements from the stack of open elements until a select
3311                  element has been popped from the stack. */
3312                  while(true) {
3313                      $current = end($this->stack)->nodeName;
3314                      array_pop($this->stack);
3315  
3316                      if($current === 'select') {
3317                          break;
3318                      }
3319                  }
3320  
3321                  /* Reset the insertion mode appropriately. */
3322                  $this->resetInsertionMode();
3323              }
3324  
3325          /* A start tag whose tag name is "select" */
3326          } elseif($token['name'] === 'select' &&
3327          $token['type'] === HTML5::STARTTAG) {
3328              /* Parse error. Act as if the token had been an end tag with the
3329              tag name "select" instead. */
3330              $this->inSelect(array(
3331                  'name' => 'select',
3332                  'type' => HTML5::ENDTAG
3333              ));
3334  
3335          /* An end tag whose tag name is one of: "caption", "table", "tbody",
3336          "tfoot", "thead", "tr", "td", "th" */
3337          } elseif(in_array($token['name'], array('caption', 'table', 'tbody',
3338          'tfoot', 'thead', 'tr', 'td', 'th')) && $token['type'] === HTML5::ENDTAG) {
3339              /* Parse error. */
3340              // w/e
3341  
3342              /* If the stack of open elements has an element in table scope with
3343              the same tag name as that of the token, then act as if an end tag
3344              with the tag name "select" had been seen, and reprocess the token.
3345              Otherwise, ignore the token. */
3346              if($this->elementInScope($token['name'], true)) {
3347                  $this->inSelect(array(
3348                      'name' => 'select',
3349                      'type' => HTML5::ENDTAG
3350                  ));
3351  
3352                  $this->mainPhase($token);
3353              }
3354  
3355          /* Anything else */
3356          } else {
3357              /* Parse error. Ignore the token. */
3358          }
3359      }
3360  
3361      private function afterBody($token) {
3362          /* Handle the token as follows: */
3363  
3364          /* A character token that is one of one of U+0009 CHARACTER TABULATION,
3365          U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
3366          or U+0020 SPACE */
3367          if($token['type'] === HTML5::CHARACTR &&
3368          preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) {
3369              /* Process the token as it would be processed if the insertion mode
3370              was "in body". */
3371              $this->inBody($token);
3372  
3373          /* A comment token */
3374          } elseif($token['type'] === HTML5::COMMENT) {
3375              /* Append a Comment node to the first element in the stack of open
3376              elements (the html element), with the data attribute set to the
3377              data given in the comment token. */
3378              $comment = $this->dom->createComment($token['data']);
3379              $this->stack[0]->appendChild($comment);
3380  
3381          /* An end tag with the tag name "html" */
3382          } elseif($token['type'] === HTML5::ENDTAG && $token['name'] === 'html') {
3383              /* If the parser was originally created in order to handle the
3384              setting of an element's innerHTML attribute, this is a parse error;
3385              ignore the token. (The element will be an html element in this
3386              case.) (innerHTML case) */
3387  
3388              /* Otherwise, switch to the trailing end phase. */
3389              $this->phase = self::END_PHASE;
3390  
3391          /* Anything else */
3392          } else {
3393              /* Parse error. Set the insertion mode to "in body" and reprocess
3394              the token. */
3395              $this->mode = self::IN_BODY;
3396              return $this->inBody($token);
3397          }
3398      }
3399  
3400      private function inFrameset($token) {
3401          /* Handle the token as follows: */
3402  
3403          /* A character token that is one of one of U+0009 CHARACTER TABULATION,
3404          U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
3405          U+000D CARRIAGE RETURN (CR), or U+0020 SPACE */
3406          if($token['type'] === HTML5::CHARACTR &&
3407          preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) {
3408              /* Append the character to the current node. */
3409              $this->insertText($token['data']);
3410  
3411          /* A comment token */
3412          } elseif($token['type'] === HTML5::COMMENT) {
3413              /* Append a Comment node to the current node with the data
3414              attribute set to the data given in the comment token. */
3415              $this->insertComment($token['data']);
3416  
3417          /* A start tag with the tag name "frameset" */
3418          } elseif($token['name'] === 'frameset' &&
3419          $token['type'] === HTML5::STARTTAG) {
3420              $this->insertElement($token);
3421  
3422          /* An end tag with the tag name "frameset" */
3423          } elseif($token['name'] === 'frameset' &&
3424          $token['type'] === HTML5::ENDTAG) {
3425              /* If the current node is the root html element, then this is a
3426              parse error; ignore the token. (innerHTML case) */
3427              if(end($this->stack)->nodeName === 'html') {
3428                  // Ignore
3429  
3430              } else {
3431                  /* Otherwise, pop the current node from the stack of open
3432                  elements. */
3433                  array_pop($this->stack);
3434  
3435                  /* If the parser was not originally created in order to handle
3436                  the setting of an element's innerHTML attribute (innerHTML case),
3437                  and the current node is no longer a frameset element, then change
3438                  the insertion mode to "after frameset". */
3439                  $this->mode = self::AFTR_FRAME;
3440              }
3441  
3442          /* A start tag with the tag name "frame" */
3443          } elseif($token['name'] === 'frame' &&
3444          $token['type'] === HTML5::STARTTAG) {
3445              /* Insert an HTML element for the token. */
3446              $this->insertElement($token);
3447  
3448              /* Immediately pop the current node off the stack of open elements. */
3449              array_pop($this->stack);
3450  
3451          /* A start tag with the tag name "noframes" */
3452          } elseif($token['name'] === 'noframes' &&
3453          $token['type'] === HTML5::STARTTAG) {
3454              /* Process the token as if the insertion mode had been "in body". */
3455              $this->inBody($token);
3456  
3457          /* Anything else */
3458          } else {
3459              /* Parse error. Ignore the token. */
3460          }
3461      }
3462  
3463      private function afterFrameset($token) {
3464          /* Handle the token as follows: */
3465  
3466          /* A character token that is one of one of U+0009 CHARACTER TABULATION,
3467          U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
3468          U+000D CARRIAGE RETURN (CR), or U+0020 SPACE */
3469          if($token['type'] === HTML5::CHARACTR &&
3470          preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) {
3471              /* Append the character to the current node. */
3472              $this->insertText($token['data']);
3473  
3474          /* A comment token */
3475          } elseif($token['type'] === HTML5::COMMENT) {
3476              /* Append a Comment node to the current node with the data
3477              attribute set to the data given in the comment token. */
3478              $this->insertComment($token['data']);
3479  
3480          /* An end tag with the tag name "html" */
3481          } elseif($token['name'] === 'html' &&
3482          $token['type'] === HTML5::ENDTAG) {
3483              /* Switch to the trailing end phase. */
3484              $this->phase = self::END_PHASE;
3485  
3486          /* A start tag with the tag name "noframes" */
3487          } elseif($token['name'] === 'noframes' &&
3488          $token['type'] === HTML5::STARTTAG) {
3489              /* Process the token as if the insertion mode had been "in body". */
3490              $this->inBody($token);
3491  
3492          /* Anything else */
3493          } else {
3494              /* Parse error. Ignore the token. */
3495          }
3496      }
3497  
3498      private function trailingEndPhase($token) {
3499          /* After the main phase, as each token is emitted from the tokenisation
3500          stage, it must be processed as described in this section. */
3501  
3502          /* A DOCTYPE token */
3503          if($token['type'] === HTML5::DOCTYPE) {
3504              // Parse error. Ignore the token.
3505  
3506          /* A comment token */
3507          } elseif($token['type'] === HTML5::COMMENT) {
3508              /* Append a Comment node to the Document object with the data
3509              attribute set to the data given in the comment token. */
3510              $comment = $this->dom->createComment($token['data']);
3511              $this->dom->appendChild($comment);
3512  
3513          /* A character token that is one of one of U+0009 CHARACTER TABULATION,
3514          U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
3515          or U+0020 SPACE */
3516          } elseif($token['type'] === HTML5::CHARACTR &&
3517          preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) {
3518              /* Process the token as it would be processed in the main phase. */
3519              $this->mainPhase($token);
3520  
3521          /* A character token that is not one of U+0009 CHARACTER TABULATION,
3522          U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
3523          or U+0020 SPACE. Or a start tag token. Or an end tag token. */
3524          } elseif(($token['type'] === HTML5::CHARACTR &&
3525          preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) ||
3526          $token['type'] === HTML5::STARTTAG || $token['type'] === HTML5::ENDTAG) {
3527              /* Parse error. Switch back to the main phase and reprocess the
3528              token. */
3529              $this->phase = self::MAIN_PHASE;
3530              return $this->mainPhase($token);
3531  
3532          /* An end-of-file token */
3533          } elseif($token['type'] === HTML5::EOF) {
3534              /* OMG DONE!! */
3535          }
3536      }
3537  
3538      private function insertElement($token, $append = true, $check = false) {
3539          // Proprietary workaround for libxml2's limitations with tag names
3540          if ($check) {
3541              // Slightly modified HTML5 tag-name modification,
3542              // removing anything that's not an ASCII letter, digit, or hyphen
3543              $token['name'] = preg_replace('/[^a-z0-9-]/i', '', $token['name']);
3544              // Remove leading hyphens and numbers
3545              $token['name'] = ltrim($token['name'], '-0..9');
3546              // In theory, this should ever be needed, but just in case
3547              if ($token['name'] === '') $token['name'] = 'span'; // arbitrary generic choice
3548          }
3549          
3550          $el = $this->dom->createElement($token['name']);
3551  
3552          foreach($token['attr'] as $attr) {
3553              if(!$el->hasAttribute($attr['name'])) {
3554                  $el->setAttribute($attr['name'], $attr['value']);
3555              }
3556          }
3557  
3558          $this->appendToRealParent($el);
3559          $this->stack[] = $el;
3560  
3561          return $el;
3562      }
3563  
3564      private function insertText($data) {
3565          $text = $this->dom->createTextNode($data);
3566          $this->appendToRealParent($text);
3567      }
3568  
3569      private function insertComment($data) {
3570          $comment = $this->dom->createComment($data);
3571          $this->appendToRealParent($comment);
3572      }
3573  
3574      private function appendToRealParent($node) {
3575          if($this->foster_parent === null) {
3576              end($this->stack)->appendChild($node);
3577  
3578          } elseif($this->foster_parent !== null) {
3579              /* If the foster parent element is the parent element of the
3580              last table element in the stack of open elements, then the new
3581              node must be inserted immediately before the last table element
3582              in the stack of open elements in the foster parent element;
3583              otherwise, the new node must be appended to the foster parent
3584              element. */
3585              for($n = count($this->stack) - 1; $n >= 0; $n--) {
3586                  if($this->stack[$n]->nodeName === 'table' &&
3587                  $this->stack[$n]->parentNode !== null) {
3588                      $table = $this->stack[$n];
3589                      break;
3590                  }
3591              }
3592  
3593              if(isset($table) && $this->foster_parent->isSameNode($table->parentNode))
3594                  $this->foster_parent->insertBefore($node, $table);
3595              else
3596                  $this->foster_parent->appendChild($node);
3597  
3598              $this->foster_parent = null;
3599          }
3600      }
3601  
3602      private function elementInScope($el, $table = false) {
3603          if(is_array($el)) {
3604              foreach($el as $element) {
3605                  if($this->elementInScope($element, $table)) {
3606                      return true;
3607                  }
3608              }
3609  
3610              return false;
3611          }
3612  
3613          $leng = count($this->stack);
3614  
3615          for($n = 0; $n < $leng; $n++) {
3616              /* 1. Initialise node to be the current node (the bottommost node of
3617              the stack). */
3618              $node = $this->stack[$leng - 1 - $n];
3619  
3620              if($node->tagName === $el) {
3621                  /* 2. If node is the target node, terminate in a match state. */
3622                  return true;
3623  
3624              } elseif($node->tagName === 'table') {
3625                  /* 3. Otherwise, if node is a table element, terminate in a failure
3626                  state. */
3627                  return false;
3628  
3629              } elseif($table === true && in_array($node->tagName, array('caption', 'td',
3630              'th', 'button', 'marquee', 'object'))) {
3631                  /* 4. Otherwise, if the algorithm is the "has an element in scope"
3632                  variant (rather than the "has an element in table scope" variant),
3633                  and node is one of the following, terminate in a failure state. */
3634                  return false;
3635  
3636              } elseif($node === $node->ownerDocument->documentElement) {
3637                  /* 5. Otherwise, if node is an html element (root element), terminate
3638                  in a failure state. (This can only happen if the node is the topmost
3639                  node of the    stack of open elements, and prevents the next step from
3640                  being invoked if there are no more elements in the stack.) */
3641                  return false;
3642              }
3643  
3644              /* Otherwise, set node to the previous entry in the stack of open
3645              elements and return to step 2. (This will never fail, since the loop
3646              will always terminate in the previous step if the top of the stack
3647              is reached.) */
3648          }
3649      }
3650  
3651      private function reconstructActiveFormattingElements() {
3652          /* 1. If there are no entries in the list of active formatting elements,
3653          then there is nothing to reconstruct; stop this algorithm. */
3654          $formatting_elements = count($this->a_formatting);
3655  
3656          if($formatting_elements === 0) {
3657              return false;
3658          }
3659  
3660          /* 3. Let entry be the last (most recently added) element in the list
3661          of active formatting elements. */
3662          $entry = end($this->a_formatting);
3663  
3664          /* 2. If the last (most recently added) entry in the list of active
3665          formatting elements is a marker, or if it is an element that is in the
3666          stack of open elements, then there is nothing to reconstruct; stop this
3667          algorithm. */
3668          if($entry === self::MARKER || in_array($entry, $this->stack, true)) {
3669              return false;
3670          }
3671  
3672          for($a = $formatting_elements - 1; $a >= 0; true) {
3673              /* 4. If there are no entries before entry in the list of active
3674              formatting elements, then jump to step 8. */
3675              if($a === 0) {
3676                  $step_seven = false;
3677                  break;
3678              }
3679  
3680              /* 5. Let entry be the entry one earlier than entry in the list of
3681              active formatting elements. */
3682              $a--;
3683              $entry = $this->a_formatting[$a];
3684  
3685              /* 6. If entry is neither a marker nor an element that is also in
3686              thetack of open elements, go to step 4. */
3687              if($entry === self::MARKER || in_array($entry, $this->stack, true)) {
3688                  break;
3689              }
3690          }
3691  
3692          while(true) {
3693              /* 7. Let entry be the element one later than entry in the list of
3694              active formatting elements. */
3695              if(isset($step_seven) && $step_seven === true) {
3696                  $a++;
3697                  $entry = $this->a_formatting[$a];
3698              }
3699  
3700              /* 8. Perform a shallow clone of the element entry to obtain clone. */
3701              $clone = $entry->cloneNode();
3702  
3703              /* 9. Append clone to the current node and push it onto the stack
3704              of open elements  so that it is the new current node. */
3705              end($this->stack)->appendChild($clone);
3706              $this->stack[] = $clone;
3707  
3708              /* 10. Replace the entry for entry in the list with an entry for
3709              clone. */
3710              $this->a_formatting[$a] = $clone;
3711  
3712              /* 11. If the entry for clone in the list of active formatting
3713              elements is not the last entry in the list, return to step 7. */
3714              if(end($this->a_formatting) !== $clone) {
3715                  $step_seven = true;
3716              } else {
3717                  break;
3718              }
3719          }
3720      }
3721  
3722      private function clearTheActiveFormattingElementsUpToTheLastMarker() {
3723          /* When the steps below require the UA to clear the list of active
3724          formatting elements up to the last marker, the UA must perform the
3725          following steps: */
3726  
3727          while(true) {
3728              /* 1. Let entry be the last (most recently added) entry in the list
3729              of active formatting elements. */
3730              $entry = end($this->a_formatting);
3731  
3732              /* 2. Remove entry from the list of active formatting elements. */
3733              array_pop($this->a_formatting);
3734  
3735              /* 3. If entry was a marker, then stop the algorithm at this point.
3736              The list has been cleared up to the last marker. */
3737              if($entry === self::MARKER) {
3738                  break;
3739              }
3740          }
3741      }
3742  
3743      private function generateImpliedEndTags($exclude = array()) {
3744          /* When the steps below require the UA to generate implied end tags,
3745          then, if the current node is a dd element, a dt element, an li element,
3746          a p element, a td element, a th  element, or a tr element, the UA must
3747          act as if an end tag with the respective tag name had been seen and
3748          then generate implied end tags again. */
3749          $node = end($this->stack);
3750          $elements = array_diff(array('dd', 'dt', 'li', 'p', 'td', 'th', 'tr'), $exclude);
3751  
3752          while(in_array(end($this->stack)->nodeName, $elements)) {
3753              array_pop($this->stack);
3754          }
3755      }
3756  
3757      private function getElementCategory($node) {
3758          $name = $node->tagName;
3759          if(in_array($name, $this->special))
3760              return self::SPECIAL;
3761  
3762          elseif(in_array($name, $this->scoping))
3763              return self::SCOPING;
3764  
3765          elseif(in_array($name, $this->formatting))
3766              return self::FORMATTING;
3767  
3768          else
3769              return self::PHRASING;
3770      }
3771  
3772      private function clearStackToTableContext($elements) {
3773          /* When the steps above require the UA to clear the stack back to a
3774          table context, it means that the UA must, while the current node is not
3775          a table element or an html element, pop elements from the stack of open
3776          elements. If this causes any elements to be popped from the stack, then
3777          this is a parse error. */
3778          while(true) {
3779              $node = end($this->stack)->nodeName;
3780  
3781              if(in_array($node, $elements)) {
3782                  break;
3783              } else {
3784                  array_pop($this->stack);
3785              }
3786          }
3787      }
3788  
3789      private function resetInsertionMode() {
3790          /* 1. Let last be false. */
3791          $last = false;
3792          $leng = count($this->stack);
3793  
3794          for($n = $leng - 1; $n >= 0; $n--) {
3795              /* 2. Let node be the last node in the stack of open elements. */
3796              $node = $this->stack[$n];
3797  
3798              /* 3. If node is the first node in the stack of open elements, then
3799              set last to true. If the element whose innerHTML  attribute is being
3800              set is neither a td  element nor a th element, then set node to the
3801              element whose innerHTML  attribute is being set. (innerHTML  case) */
3802              if($this->stack[0]->isSameNode($node)) {
3803                  $last = true;
3804              }
3805  
3806              /* 4. If node is a select element, then switch the insertion mode to
3807              "in select" and abort these steps. (innerHTML case) */
3808              if($node->nodeName === 'select') {
3809                  $this->mode = self::IN_SELECT;
3810                  break;
3811  
3812              /* 5. If node is a td or th element, then switch the insertion mode
3813              to "in cell" and abort these steps. */
3814              } elseif($node->nodeName === 'td' || $node->nodeName === 'th') {
3815                  $this->mode = self::IN_CELL;
3816                  break;
3817  
3818              /* 6. If node is a tr element, then switch the insertion mode to
3819              "in    row" and abort these steps. */
3820              } elseif($node->nodeName === 'tr') {
3821                  $this->mode = self::IN_ROW;
3822                  break;
3823  
3824              /* 7. If node is a tbody, thead, or tfoot element, then switch the
3825              insertion mode to "in table body" and abort these steps. */
3826              } elseif(in_array($node->nodeName, array('tbody', 'thead', 'tfoot'))) {
3827                  $this->mode = self::IN_TBODY;
3828                  break;
3829  
3830              /* 8. If node is a caption element, then switch the insertion mode
3831              to "in caption" and abort these steps. */
3832              } elseif($node->nodeName === 'caption') {
3833                  $this->mode = self::IN_CAPTION;
3834                  break;
3835  
3836              /* 9. If node is a colgroup element, then switch the insertion mode
3837              to "in column group" and abort these steps. (innerHTML case) */
3838              } elseif($node->nodeName === 'colgroup') {
3839                  $this->mode = self::IN_CGROUP;
3840                  break;
3841  
3842              /* 10. If node is a table element, then switch the insertion mode
3843              to "in table" and abort these steps. */
3844              } elseif($node->nodeName === 'table') {
3845                  $this->mode = self::IN_TABLE;
3846                  break;
3847  
3848              /* 11. If node is a head element, then switch the insertion mode
3849              to "in body" ("in body"! not "in head"!) and abort these steps.
3850              (innerHTML case) */
3851              } elseif($node->nodeName === 'head') {
3852                  $this->mode = self::IN_BODY;
3853                  break;
3854  
3855              /* 12. If node is a body element, then switch the insertion mode to
3856              "in body" and abort these steps. */
3857              } elseif($node->nodeName === 'body') {
3858                  $this->mode = self::IN_BODY;
3859                  break;
3860  
3861              /* 13. If node is a frameset element, then switch the insertion
3862              mode to "in frameset" and abort these steps. (innerHTML case) */
3863              } elseif($node->nodeName === 'frameset') {
3864                  $this->mode = self::IN_FRAME;
3865                  break;
3866  
3867              /* 14. If node is an html element, then: if the head element
3868              pointer is null, switch the insertion mode to "before head",
3869              otherwise, switch the insertion mode to "after head". In either
3870              case, abort these steps. (innerHTML case) */
3871              } elseif($node->nodeName === 'html') {
3872                  $this->mode = ($this->head_pointer === null)
3873                      ? self::BEFOR_HEAD
3874                      : self::AFTER_HEAD;
3875  
3876                  break;
3877  
3878              /* 15. If last is true, then set the insertion mode to "in body"
3879              and    abort these steps. (innerHTML case) */
3880              } elseif($last) {
3881                  $this->mode = self::IN_BODY;
3882                  break;
3883              }
3884          }
3885      }
3886  
3887      private function closeCell() {
3888          /* If the stack of open elements has a td or th element in table scope,
3889          then act as if an end tag token with that tag name had been seen. */
3890          foreach(array('td', 'th') as $cell) {
3891              if($this->elementInScope($cell, true)) {
3892                  $this->inCell(array(
3893                      'name' => $cell,
3894                      'type' => HTML5::ENDTAG
3895                  ));
3896  
3897                  break;
3898              }
3899          }
3900      }
3901  
3902      public function save() {
3903          return $this->dom;
3904      }
3905  }
3906  ?>


Generated: Fri Nov 28 20:08:37 2014 Cross-referenced by PHPXref 0.7.1