MediaWiki  master
Balancer.php
Go to the documentation of this file.
1 <?php
26 namespace MediaWiki\Tidy;
27 
28 use Wikimedia\Assert\Assert;
29 use Wikimedia\Assert\ParameterAssertionException;
30 use \ExplodeIterator;
31 use \IteratorAggregate;
32 use \ReverseArrayIterator;
33 use \Sanitizer;
34 
35 # A note for future librarization[1] -- this file is a good candidate
36 # for splitting into an independent library, except that it is currently
37 # highly optimized for MediaWiki use. It only implements the portions
38 # of the HTML5 tree builder used by tags supported by MediaWiki, and
39 # does not contain a true tokenizer pass, instead relying on
40 # comment stripping, attribute normalization, and escaping done by
41 # the MediaWiki Sanitizer. It also deliberately avoids building
42 # a true DOM in memory, instead serializing elements to an output string
43 # as soon as possible (usually as soon as the tag is closed) to reduce
44 # its memory footprint.
45 
46 # We've been gradually lifting some of these restrictions to handle
47 # non-sanitized output generated by extensions, but we shortcut the tokenizer
48 # for speed (primarily by splitting on `<`) and so rely on syntactic
49 # well-formedness.
50 
51 # On the other hand, I've been pretty careful to note with comments in the
52 # code the places where this implementation omits features of the spec or
53 # depends on the MediaWiki Sanitizer. Perhaps in the future we'll want to
54 # implement the missing pieces and make this a standalone PHP HTML5 parser.
55 # In order to do so, some sort of MediaWiki-specific API will need
56 # to be added to (a) allow the Balancer to bypass the tokenizer,
57 # and (b) support on-the-fly flattening instead of DOM node creation.
58 
59 # [1]: https://www.mediawiki.org/wiki/Library_infrastructure_for_MediaWiki
60 
69 class BalanceSets {
70  const HTML_NAMESPACE = 'http://www.w3.org/1999/xhtml';
71  const MATHML_NAMESPACE = 'http://www.w3.org/1998/Math/MathML';
72  const SVG_NAMESPACE = 'http://www.w3.org/2000/svg';
73 
74  public static $unsupportedSet = [
75  self::HTML_NAMESPACE => [
76  'html' => true, 'head' => true, 'body' => true, 'frameset' => true,
77  'frame' => true,
78  'plaintext' => true, 'isindex' => true,
79  'xmp' => true, 'iframe' => true, 'noembed' => true,
80  'noscript' => true, 'script' => true,
81  'title' => true
82  ]
83  ];
84 
85  public static $emptyElementSet = [
86  self::HTML_NAMESPACE => [
87  'area' => true, 'base' => true, 'basefont' => true,
88  'bgsound' => true, 'br' => true, 'col' => true, 'command' => true,
89  'embed' => true, 'frame' => true, 'hr' => true, 'img' => true,
90  'input' => true, 'keygen' => true, 'link' => true, 'meta' => true,
91  'param' => true, 'source' => true, 'track' => true, 'wbr' => true
92  ]
93  ];
94 
95  public static $extraLinefeedSet = [
96  self::HTML_NAMESPACE => [
97  'pre' => true, 'textarea' => true, 'listing' => true,
98  ]
99  ];
100 
101  public static $headingSet = [
102  self::HTML_NAMESPACE => [
103  'h1' => true, 'h2' => true, 'h3' => true,
104  'h4' => true, 'h5' => true, 'h6' => true
105  ]
106  ];
107 
108  public static $specialSet = [
109  self::HTML_NAMESPACE => [
110  'address' => true, 'applet' => true, 'area' => true,
111  'article' => true, 'aside' => true, 'base' => true,
112  'basefont' => true, 'bgsound' => true, 'blockquote' => true,
113  'body' => true, 'br' => true, 'button' => true, 'caption' => true,
114  'center' => true, 'col' => true, 'colgroup' => true, 'dd' => true,
115  'details' => true, 'dir' => true, 'div' => true, 'dl' => true,
116  'dt' => true, 'embed' => true, 'fieldset' => true,
117  'figcaption' => true, 'figure' => true, 'footer' => true,
118  'form' => true, 'frame' => true, 'frameset' => true, 'h1' => true,
119  'h2' => true, 'h3' => true, 'h4' => true, 'h5' => true,
120  'h6' => true, 'head' => true, 'header' => true, 'hgroup' => true,
121  'hr' => true, 'html' => true, 'iframe' => true, 'img' => true,
122  'input' => true, 'isindex' => true, 'li' => true, 'link' => true,
123  'listing' => true, 'main' => true, 'marquee' => true,
124  'menu' => true, 'menuitem' => true, 'meta' => true, 'nav' => true,
125  'noembed' => true, 'noframes' => true, 'noscript' => true,
126  'object' => true, 'ol' => true, 'p' => true, 'param' => true,
127  'plaintext' => true, 'pre' => true, 'script' => true,
128  'section' => true, 'select' => true, 'source' => true,
129  'style' => true, 'summary' => true, 'table' => true,
130  'tbody' => true, 'td' => true, 'template' => true,
131  'textarea' => true, 'tfoot' => true, 'th' => true, 'thead' => true,
132  'title' => true, 'tr' => true, 'track' => true, 'ul' => true,
133  'wbr' => true, 'xmp' => true
134  ],
135  self::SVG_NAMESPACE => [
136  'foreignobject' => true, 'desc' => true, 'title' => true
137  ],
138  self::MATHML_NAMESPACE => [
139  'mi' => true, 'mo' => true, 'mn' => true, 'ms' => true,
140  'mtext' => true, 'annotation-xml' => true
141  ]
142  ];
143 
144  public static $addressDivPSet = [
145  self::HTML_NAMESPACE => [
146  'address' => true, 'div' => true, 'p' => true
147  ]
148  ];
149 
150  public static $tableSectionRowSet = [
151  self::HTML_NAMESPACE => [
152  'table' => true, 'thead' => true, 'tbody' => true,
153  'tfoot' => true, 'tr' => true
154  ]
155  ];
156 
157  public static $impliedEndTagsSet = [
158  self::HTML_NAMESPACE => [
159  'dd' => true, 'dt' => true, 'li' => true, 'optgroup' => true,
160  'option' => true, 'p' => true, 'rb' => true, 'rp' => true,
161  'rt' => true, 'rtc' => true
162  ]
163  ];
164 
165  public static $thoroughImpliedEndTagsSet = [
166  self::HTML_NAMESPACE => [
167  'caption' => true, 'colgroup' => true, 'dd' => true, 'dt' => true,
168  'li' => true, 'optgroup' => true, 'option' => true, 'p' => true,
169  'rb' => true, 'rp' => true, 'rt' => true, 'rtc' => true,
170  'tbody' => true, 'td' => true, 'tfoot' => true, 'th' => true,
171  'thead' => true, 'tr' => true
172  ]
173  ];
174 
175  public static $tableCellSet = [
176  self::HTML_NAMESPACE => [
177  'td' => true, 'th' => true
178  ]
179  ];
180  public static $tableContextSet = [
181  self::HTML_NAMESPACE => [
182  'table' => true, 'template' => true, 'html' => true
183  ]
184  ];
185 
186  public static $tableBodyContextSet = [
187  self::HTML_NAMESPACE => [
188  'tbody' => true, 'tfoot' => true, 'thead' => true,
189  'template' => true, 'html' => true
190  ]
191  ];
192 
193  public static $tableRowContextSet = [
194  self::HTML_NAMESPACE => [
195  'tr' => true, 'template' => true, 'html' => true
196  ]
197  ];
198 
199  // See https://html.spec.whatwg.org/multipage/forms.html#form-associated-element
200  public static $formAssociatedSet = [
201  self::HTML_NAMESPACE => [
202  'button' => true, 'fieldset' => true, 'input' => true,
203  'keygen' => true, 'object' => true, 'output' => true,
204  'select' => true, 'textarea' => true, 'img' => true
205  ]
206  ];
207 
208  public static $inScopeSet = [
209  self::HTML_NAMESPACE => [
210  'applet' => true, 'caption' => true, 'html' => true,
211  'marquee' => true, 'object' => true,
212  'table' => true, 'td' => true, 'template' => true,
213  'th' => true
214  ],
215  self::SVG_NAMESPACE => [
216  'foreignobject' => true, 'desc' => true, 'title' => true
217  ],
218  self::MATHML_NAMESPACE => [
219  'mi' => true, 'mo' => true, 'mn' => true, 'ms' => true,
220  'mtext' => true, 'annotation-xml' => true
221  ]
222  ];
223 
224  private static $inListItemScopeSet = null;
225  public static function inListItemScopeSet() {
226  if ( self::$inListItemScopeSet === null ) {
227  self::$inListItemScopeSet = self::$inScopeSet;
228  self::$inListItemScopeSet[self::HTML_NAMESPACE]['ol'] = true;
229  self::$inListItemScopeSet[self::HTML_NAMESPACE]['ul'] = true;
230  }
231  return self::$inListItemScopeSet;
232  }
233 
234  private static $inButtonScopeSet = null;
235  public static function inButtonScopeSet() {
236  if ( self::$inButtonScopeSet === null ) {
237  self::$inButtonScopeSet = self::$inScopeSet;
238  self::$inButtonScopeSet[self::HTML_NAMESPACE]['button'] = true;
239  }
240  return self::$inButtonScopeSet;
241  }
242 
243  public static $inTableScopeSet = [
244  self::HTML_NAMESPACE => [
245  'html' => true, 'table' => true, 'template' => true
246  ]
247  ];
248 
249  public static $inInvertedSelectScopeSet = [
250  self::HTML_NAMESPACE => [
251  'option' => true, 'optgroup' => true
252  ]
253  ];
254 
256  self::MATHML_NAMESPACE => [
257  'mi' => true, 'mo' => true, 'mn' => true, 'ms' => true,
258  'mtext' => true
259  ]
260  ];
261 
262  public static $htmlIntegrationPointSet = [
263  self::SVG_NAMESPACE => [
264  'foreignobject' => true,
265  'desc' => true,
266  'title' => true
267  ]
268  ];
269 
270  // For tidy compatibility.
271  public static $tidyPWrapSet = [
272  self::HTML_NAMESPACE => [
273  'body' => true, 'blockquote' => true,
274  // We parse with <body> as the fragment context, but the top-level
275  // element on the stack is actually <html>. We could use the
276  // "adjusted current node" everywhere to work around this, but it's
277  // easier just to add <html> to the p-wrap set.
278  'html' => true,
279  ],
280  ];
281  public static $tidyInlineSet = [
282  self::HTML_NAMESPACE => [
283  'a' => true, 'abbr' => true, 'acronym' => true, 'applet' => true,
284  'b' => true, 'basefont' => true, 'bdo' => true, 'big' => true,
285  'br' => true, 'button' => true, 'cite' => true, 'code' => true,
286  'dfn' => true, 'em' => true, 'font' => true, 'i' => true,
287  'iframe' => true, 'img' => true, 'input' => true, 'kbd' => true,
288  'label' => true, 'legend' => true, 'map' => true, 'object' => true,
289  'param' => true, 'q' => true, 'rb' => true, 'rbc' => true,
290  'rp' => true, 'rt' => true, 'rtc' => true, 'ruby' => true,
291  's' => true, 'samp' => true, 'select' => true, 'small' => true,
292  'span' => true, 'strike' => true, 'strong' => true, 'sub' => true,
293  'sup' => true, 'textarea' => true, 'tt' => true, 'u' => true,
294  'var' => true,
295  ],
296  ];
297 }
298 
321  public $localName;
326  public $attribs;
327 
333  public $parent;
334 
342  public $children;
343 
347  private $noahKey;
348 
353  public $nextAFE;
354 
359  public $prevAFE;
360 
364  public $nextNoah;
365 
375  $this->localName = $localName;
376  $this->namespaceURI = $namespaceURI;
377  $this->attribs = $attribs;
378  $this->contents = '';
379  $this->parent = null;
380  $this->children = [];
381  }
382 
387  private function removeChild( BalanceElement $elt ) {
388  Assert::precondition(
389  $this->parent !== 'flat', "Can't removeChild after flattening $this"
390  );
391  Assert::parameter(
392  $elt->parent === $this, 'elt', 'must have $this as a parent'
393  );
394  $idx = array_search( $elt, $this->children, true );
395  Assert::parameter( $idx !== false, '$elt', 'must be a child of $this' );
396  $elt->parent = null;
397  array_splice( $this->children, $idx, 1 );
398  }
399 
405  public function insertBefore( BalanceElement $a, $b ) {
406  Assert::precondition(
407  $this->parent !== 'flat', "Can't insertBefore after flattening."
408  );
409  $idx = array_search( $a, $this->children, true );
410  Assert::parameter( $idx !== false, '$a', 'must be a child of $this' );
411  if ( is_string( $b ) ) {
412  array_splice( $this->children, $idx, 0, [ $b ] );
413  } else {
414  Assert::parameter( $b->parent !== 'flat', '$b', "Can't be flat" );
415  if ( $b->parent !== null ) {
416  $b->parent->removeChild( $b );
417  }
418  array_splice( $this->children, $idx, 0, [ $b ] );
419  $b->parent = $this;
420  }
421  }
422 
427  public function appendChild( $elt ) {
428  Assert::precondition(
429  $this->parent !== 'flat', "Can't appendChild after flattening."
430  );
431  if ( is_string( $elt ) ) {
432  array_push( $this->children, $elt );
433  return;
434  }
435  // Remove $elt from parent, if it had one.
436  if ( $elt->parent !== null ) {
437  $elt->parent->removeChild( $elt );
438  }
439  array_push( $this->children, $elt );
440  $elt->parent = $this;
441  }
442 
447  public function adoptChildren( BalanceElement $elt ) {
448  Assert::precondition(
449  $elt->parent !== 'flat', "Can't adoptChildren after flattening."
450  );
451  foreach ( $elt->children as $child ) {
452  if ( !is_string( $child ) ) {
453  // This is an optimization which avoids an O(n^2) set of
454  // array_splice operations.
455  $child->parent = null;
456  }
457  $this->appendChild( $child );
458  }
459  $elt->children = [];
460  }
461 
469  public function flatten( $tidyCompat = false ) {
470  Assert::parameter( $this->parent !== null, '$this', 'must be a child' );
471  Assert::parameter( $this->parent !== 'flat', '$this', 'already flat' );
472  $idx = array_search( $this, $this->parent->children, true );
473  Assert::parameter(
474  $idx !== false, '$this', 'must be a child of its parent'
475  );
476  if ( $tidyCompat ) {
477  $blank = true;
478  foreach ( $this->children as $elt ) {
479  if ( !is_string( $elt ) ) {
480  $elt = $elt->flatten( $tidyCompat );
481  }
482  if ( $blank && preg_match( '/[^\t\n\f\r ]/', $elt ) ) {
483  $blank = false;
484  }
485  }
486  if ( $this->isHtmlNamed( 'mw:p-wrap' ) ) {
487  $this->localName = 'p';
488  } elseif ( $blank ) {
489  // Add 'mw-empty-elt' class so elements can be hidden via CSS
490  // for compatibility with legacy tidy.
491  if ( !count( $this->attribs ) &&
492  ( $this->localName === 'tr' || $this->localName === 'li' )
493  ) {
494  $this->attribs = [ 'class' => "mw-empty-elt" ];
495  }
496  $blank = false;
497  }
498  $flat = $blank ? '' : "{$this}";
499  } else {
500  $flat = "{$this}";
501  }
502  $this->parent->children[$idx] = $flat;
503  $this->parent = 'flat'; # for assertion checking
504  return $flat;
505  }
506 
514  public function __toString() {
515  $encAttribs = '';
516  foreach ( $this->attribs as $name => $value ) {
517  $encValue = Sanitizer::encodeAttribute( $value );
518  $encAttribs .= " $name=\"$encValue\"";
519  }
520  if ( !$this->isA( BalanceSets::$emptyElementSet ) ) {
521  $out = "<{$this->localName}{$encAttribs}>";
522  $len = strlen( $out );
523  // flatten children
524  foreach ( $this->children as $elt ) {
525  $out .= "{$elt}";
526  }
527  $out .= "</{$this->localName}>";
528  if (
530  $out[$len] === "\n"
531  ) {
532  // Double the linefeed after pre/listing/textarea
533  // according to the HTML5 fragment serialization algorithm.
534  $out = substr( $out, 0, $len + 1 ) .
535  substr( $out, $len );
536  }
537  } else {
538  $out = "<{$this->localName}{$encAttribs} />";
539  Assert::invariant(
540  count( $this->children ) === 0,
541  "Empty elements shouldn't have children."
542  );
543  }
544  return $out;
545  }
546 
547  # Utility functions on BalanceElements.
548 
557  public function isA( $set ) {
558  if ( $set instanceof BalanceElement ) {
559  return $this === $set;
560  } elseif ( is_array( $set ) ) {
561  return isset( $set[$this->namespaceURI] ) &&
562  isset( $set[$this->namespaceURI][$this->localName] );
563  } else {
564  # assume this is an HTML element name.
565  return $this->isHtml() && $this->localName === $set;
566  }
567  }
568 
574  public function isHtmlNamed( $tagName ) {
575  return $this->namespaceURI === BalanceSets::HTML_NAMESPACE
576  && $this->localName === $tagName;
577  }
578 
584  public function isHtml() {
585  return $this->namespaceURI === BalanceSets::HTML_NAMESPACE;
586  }
587 
595  public function isMathmlTextIntegrationPoint() {
597  }
598 
606  public function isHtmlIntegrationPoint() {
607  if ( $this->isA( BalanceSets::$htmlIntegrationPointSet ) ) {
608  return true;
609  }
610  if (
611  $this->namespaceURI === BalanceSets::MATHML_NAMESPACE &&
612  $this->localName === 'annotation-xml' &&
613  isset( $this->attribs['encoding'] ) &&
614  ( strcasecmp( $this->attribs['encoding'], 'text/html' ) == 0 ||
615  strcasecmp( $this->attribs['encoding'], 'application/xhtml+xml' ) == 0 )
616  ) {
617  return true;
618  }
619  return false;
620  }
621 
625  public function getNoahKey() {
626  if ( $this->noahKey === null ) {
628  ksort( $attribs );
629  $this->noahKey = serialize( [ $this->namespaceURI, $this->localName, $attribs ] );
630  }
631  return $this->noahKey;
632  }
633 }
634 
650 class BalanceStack implements IteratorAggregate {
655  private $elements = [];
662  public $fosterParentMode = false;
666  public $tidyCompat = false;
670  public $currentNode;
671 
676  public function __construct() {
677  # always a root <html> element on the stack
678  array_push(
679  $this->elements,
681  );
682  $this->currentNode = $this->elements[0];
683  }
684 
690  public function getOutput() {
691  // Don't include the outer '<html>....</html>'
692  $out = '';
693  foreach ( $this->elements[0]->children as $elt ) {
694  $out .= is_string( $elt ) ? $elt :
695  $elt->flatten( $this->tidyCompat );
696  }
697  return $out;
698  }
699 
705  public function insertComment( $value ) {
706  // Just another type of text node, except for tidy p-wrapping.
707  return $this->insertText( '<!--' . $value . '-->', true );
708  }
709 
715  public function insertText( $value, $isComment = false ) {
716  if (
717  $this->fosterParentMode &&
718  $this->currentNode->isA( BalanceSets::$tableSectionRowSet )
719  ) {
720  $this->fosterParent( $value );
721  } elseif (
722  $this->tidyCompat && !$isComment &&
723  $this->currentNode->isA( BalanceSets::$tidyPWrapSet )
724  ) {
725  $this->insertHTMLELement( 'mw:p-wrap', [] );
726  return $this->insertText( $value );
727  } else {
728  $this->currentNode->appendChild( $value );
729  }
730  }
731 
741  public function insertForeignElement( $namespaceURI, $tag, $attribs ) {
742  return $this->insertElement(
743  new BalanceElement( $namespaceURI, $tag, $attribs )
744  );
745  }
746 
755  public function insertHTMLElement( $tag, $attribs ) {
756  return $this->insertForeignElement(
758  );
759  }
760 
768  public function insertElement( BalanceElement $elt ) {
769  if (
770  $this->currentNode->isHtmlNamed( 'mw:p-wrap' ) &&
772  ) {
773  // Tidy compatibility.
774  $this->pop();
775  }
776  if (
777  $this->fosterParentMode &&
778  $this->currentNode->isA( BalanceSets::$tableSectionRowSet )
779  ) {
780  $elt = $this->fosterParent( $elt );
781  } else {
782  $this->currentNode->appendChild( $elt );
783  }
784  Assert::invariant( $elt->parent !== null, "$elt must be in tree" );
785  Assert::invariant( $elt->parent !== 'flat', "$elt must not have been previous flattened" );
786  array_push( $this->elements, $elt );
787  $this->currentNode = $elt;
788  return $elt;
789  }
790 
797  public function inScope( $tag ) {
799  }
800 
807  public function inButtonScope( $tag ) {
809  }
810 
817  public function inListItemScope( $tag ) {
819  }
820 
827  public function inTableScope( $tag ) {
829  }
830 
837  public function inSelectScope( $tag ) {
838  // Can't use inSpecificScope to implement this, since it involves
839  // *inverting* a set of tags. Implement manually.
840  foreach ( $this as $elt ) {
841  if ( $elt->isA( $tag ) ) {
842  return true;
843  }
844  if ( !$elt->isA( BalanceSets::$inInvertedSelectScopeSet ) ) {
845  return false;
846  }
847  }
848  return false;
849  }
850 
858  public function inSpecificScope( $tag, $set ) {
859  foreach ( $this as $elt ) {
860  if ( $elt->isA( $tag ) ) {
861  return true;
862  }
863  if ( $elt->isA( $set ) ) {
864  return false;
865  }
866  }
867  return false;
868  }
869 
876  public function generateImpliedEndTags( $butnot = null, $thorough = false ) {
877  $endTagSet = $thorough ?
880  while ( $this->currentNode ) {
881  if ( $butnot !== null && $this->currentNode->isHtmlNamed( $butnot ) ) {
882  break;
883  }
884  if ( !$this->currentNode->isA( $endTagSet ) ) {
885  break;
886  }
887  $this->pop();
888  }
889  }
890 
894  public function adjustedCurrentNode( $fragmentContext ) {
895  return ( $fragmentContext && count( $this->elements ) === 1 ) ?
896  $fragmentContext : $this->currentNode;
897  }
898 
904  public function getIterator() {
905  return new ReverseArrayIterator( $this->elements );
906  }
907 
914  public function node( $idx ) {
915  return $this->elements[ $idx ];
916  }
917 
923  public function replaceAt( $idx, BalanceElement $elt ) {
924  Assert::precondition(
925  $this->elements[$idx]->parent !== 'flat',
926  'Replaced element should not have already been flattened.'
927  );
928  Assert::precondition(
929  $elt->parent !== 'flat',
930  'New element should not have already been flattened.'
931  );
932  $this->elements[$idx] = $elt;
933  if ( $idx === count( $this->elements ) - 1 ) {
934  $this->currentNode = $elt;
935  }
936  }
937 
944  public function indexOf( $tag ) {
945  for ( $i = count( $this->elements ) - 1; $i >= 0; $i-- ) {
946  if ( $this->elements[$i]->isA( $tag ) ) {
947  return $i;
948  }
949  }
950  return -1;
951  }
952 
957  public function length() {
958  return count( $this->elements );
959  }
960 
965  public function pop() {
966  $elt = array_pop( $this->elements );
967  if ( count( $this->elements ) ) {
968  $this->currentNode = $this->elements[ count( $this->elements ) - 1 ];
969  } else {
970  $this->currentNode = null;
971  }
972  if ( !$elt->isHtmlNamed( 'mw:p-wrap' ) ) {
973  $elt->flatten( $this->tidyCompat );
974  }
975  }
976 
982  public function popTo( $idx ) {
983  $length = count( $this->elements );
984  for ( $length = count( $this->elements ); $length > $idx; $length-- ) {
985  $this->pop();
986  }
987  }
988 
995  public function popTag( $tag ) {
996  while ( $this->currentNode ) {
997  if ( $this->currentNode->isA( $tag ) ) {
998  $this->pop();
999  break;
1000  }
1001  $this->pop();
1002  }
1003  }
1004 
1010  public function clearToContext( $set ) {
1011  // Note that we don't loop to 0. Never pop the <html> elt off.
1012  for ( $length = count( $this->elements ); $length > 1; $length-- ) {
1013  if ( $this->currentNode->isA( $set ) ) {
1014  break;
1015  }
1016  $this->pop();
1017  }
1018  }
1019 
1026  public function removeElement( BalanceElement $elt, $flatten = true ) {
1027  Assert::parameter(
1028  $elt->parent !== 'flat',
1029  '$elt',
1030  '$elt should not already have been flattened.'
1031  );
1032  Assert::parameter(
1033  $elt->parent->parent !== 'flat',
1034  '$elt',
1035  'The parent of $elt should not already have been flattened.'
1036  );
1037  $idx = array_search( $elt, $this->elements, true );
1038  Assert::parameter( $idx !== false, '$elt', 'must be in stack' );
1039  array_splice( $this->elements, $idx, 1 );
1040  if ( $idx === count( $this->elements ) ) {
1041  $this->currentNode = $this->elements[$idx - 1];
1042  }
1043  if ( $flatten ) {
1044  // serialize $elt into its parent
1045  // otherwise, it will eventually serialize when the parent
1046  // is serialized, we just hold onto the memory for its
1047  // tree of objects a little longer.
1048  $elt->flatten( $this->tidyCompat );
1049  }
1050  Assert::postcondition(
1051  array_search( $elt, $this->elements, true ) === false,
1052  '$elt should no longer be in open elements stack'
1053  );
1054  }
1055 
1061  public function insertAfter( BalanceElement $a, BalanceElement $b ) {
1062  $idx = $this->indexOf( $a );
1063  Assert::parameter( $idx !== false, '$a', 'must be in stack' );
1064  if ( $idx === count( $this->elements ) - 1 ) {
1065  array_push( $this->elements, $b );
1066  $this->currentNode = $b;
1067  } else {
1068  array_splice( $this->elements, $idx + 1, 0, [ $b ] );
1069  }
1070  }
1071 
1072  # Fostering and adoption.
1073 
1079  private function fosterParent( $elt ) {
1080  $lastTable = $this->indexOf( 'table' );
1081  $lastTemplate = $this->indexOf( 'template' );
1082  $parent = null;
1083  $before = null;
1084 
1085  if ( $lastTemplate >= 0 && ( $lastTable < 0 || $lastTemplate > $lastTable ) ) {
1086  $parent = $this->elements[$lastTemplate];
1087  } elseif ( $lastTable >= 0 ) {
1088  $parent = $this->elements[$lastTable]->parent;
1089  # Assume all tables have parents, since we're not running scripts!
1090  Assert::invariant(
1091  $parent !== null, "All tables should have parents"
1092  );
1093  $before = $this->elements[$lastTable];
1094  } else {
1095  $parent = $this->elements[0]; // the `html` element.
1096  }
1097 
1098  if ( $this->tidyCompat ) {
1099  if ( is_string( $elt ) ) {
1100  // We're fostering text: do we need a p-wrapper?
1101  if ( $parent->isA( BalanceSets::$tidyPWrapSet ) ) {
1102  $this->insertHTMLElement( 'mw:p-wrap', [] );
1103  $this->insertText( $elt );
1104  return $elt;
1105  }
1106  } else {
1107  // We're fostering an element; do we need to merge p-wrappers?
1108  if ( $elt->isHtmlNamed( 'mw:p-wrap' ) ) {
1109  $idx = $before ?
1110  array_search( $before, $parent->children, true ) :
1111  count( $parent->children );
1112  $after = $idx > 0 ? $parent->children[$idx - 1] : '';
1113  if (
1114  $after instanceof BalanceElement &&
1115  $after->isHtmlNamed( 'mw:p-wrap' )
1116  ) {
1117  return $after; // Re-use existing p-wrapper.
1118  }
1119  }
1120  }
1121  }
1122 
1123  if ( $before ) {
1124  $parent->insertBefore( $before, $elt );
1125  } else {
1126  $parent->appendChild( $elt );
1127  }
1128  return $elt;
1129  }
1130 
1141  public function adoptionAgency( $tag, $afe ) {
1142  // If the current node is an HTML element whose tag name is subject,
1143  // and the current node is not in the list of active formatting
1144  // elements, then pop the current node off the stack of open
1145  // elements and abort these steps.
1146  if (
1147  $this->currentNode->isHtmlNamed( $tag ) &&
1148  !$afe->isInList( $this->currentNode )
1149  ) {
1150  $this->pop();
1151  return true; // no more handling required
1152  }
1153 
1154  // Let outer loop counter be zero.
1155  $outer = 0;
1156 
1157  // Outer loop: If outer loop counter is greater than or
1158  // equal to eight, then abort these steps.
1159  while ( $outer < 8 ) {
1160  // Increment outer loop counter by one.
1161  $outer++;
1162 
1163  // Let the formatting element be the last element in the list
1164  // of active formatting elements that: is between the end of
1165  // the list and the last scope marker in the list, if any, or
1166  // the start of the list otherwise, and has the same tag name
1167  // as the token.
1168  $fmtelt = $afe->findElementByTag( $tag );
1169 
1170  // If there is no such node, then abort these steps and instead
1171  // act as described in the "any other end tag" entry below.
1172  if ( !$fmtelt ) {
1173  return false; // false means handle by the default case
1174  }
1175 
1176  // Otherwise, if there is such a node, but that node is not in
1177  // the stack of open elements, then this is a parse error;
1178  // remove the element from the list, and abort these steps.
1179  $index = $this->indexOf( $fmtelt );
1180  if ( $index < 0 ) {
1181  $afe->remove( $fmtelt );
1182  return true; // true means no more handling required
1183  }
1184 
1185  // Otherwise, if there is such a node, and that node is also in
1186  // the stack of open elements, but the element is not in scope,
1187  // then this is a parse error; ignore the token, and abort
1188  // these steps.
1189  if ( !$this->inScope( $fmtelt ) ) {
1190  return true;
1191  }
1192 
1193  // Let the furthest block be the topmost node in the stack of
1194  // open elements that is lower in the stack than the formatting
1195  // element, and is an element in the special category. There
1196  // might not be one.
1197  $furthestblock = null;
1198  $furthestblockindex = -1;
1199  $stacklen = $this->length();
1200  for ( $i = $index+1; $i < $stacklen; $i++ ) {
1201  if ( $this->node( $i )->isA( BalanceSets::$specialSet ) ) {
1202  $furthestblock = $this->node( $i );
1203  $furthestblockindex = $i;
1204  break;
1205  }
1206  }
1207 
1208  // If there is no furthest block, then the UA must skip the
1209  // subsequent steps and instead just pop all the nodes from the
1210  // bottom of the stack of open elements, from the current node
1211  // up to and including the formatting element, and remove the
1212  // formatting element from the list of active formatting
1213  // elements.
1214  if ( !$furthestblock ) {
1215  $this->popTag( $fmtelt );
1216  $afe->remove( $fmtelt );
1217  return true;
1218  } else {
1219  // Let the common ancestor be the element immediately above
1220  // the formatting element in the stack of open elements.
1221  $ancestor = $this->node( $index-1 );
1222 
1223  // Let a bookmark note the position of the formatting
1224  // element in the list of active formatting elements
1225  // relative to the elements on either side of it in the
1226  // list.
1227  $BOOKMARK = new BalanceElement( '[bookmark]', '[bookmark]', [] );
1228  $afe->insertAfter( $fmtelt, $BOOKMARK );
1229 
1230  // Let node and last node be the furthest block.
1231  $node = $furthestblock;
1232  $lastnode = $furthestblock;
1233  $nodeindex = $furthestblockindex;
1234  $isAFE = false;
1235 
1236  // Let inner loop counter be zero.
1237  $inner = 0;
1238 
1239  while ( true ) {
1240 
1241  // Increment inner loop counter by one.
1242  $inner++;
1243 
1244  // Let node be the element immediately above node in
1245  // the stack of open elements, or if node is no longer
1246  // in the stack of open elements (e.g. because it got
1247  // removed by this algorithm), the element that was
1248  // immediately above node in the stack of open elements
1249  // before node was removed.
1250  $node = $this->node( --$nodeindex );
1251 
1252  // If node is the formatting element, then go
1253  // to the next step in the overall algorithm.
1254  if ( $node === $fmtelt ) break;
1255 
1256  // If the inner loop counter is greater than three and node
1257  // is in the list of active formatting elements, then remove
1258  // node from the list of active formatting elements.
1259  $isAFE = $afe->isInList( $node );
1260  if ( $inner > 3 && $isAFE ) {
1261  $afe->remove( $node );
1262  $isAFE = false;
1263  }
1264 
1265  // If node is not in the list of active formatting
1266  // elements, then remove node from the stack of open
1267  // elements and then go back to the step labeled inner
1268  // loop.
1269  if ( !$isAFE ) {
1270  // Don't flatten here, since we're about to relocate
1271  // parts of this $node.
1272  $this->removeElement( $node, false );
1273  continue;
1274  }
1275 
1276  // Create an element for the token for which the
1277  // element node was created with common ancestor as
1278  // the intended parent, replace the entry for node
1279  // in the list of active formatting elements with an
1280  // entry for the new element, replace the entry for
1281  // node in the stack of open elements with an entry for
1282  // the new element, and let node be the new element.
1283  $newelt = new BalanceElement(
1284  $node->namespaceURI, $node->localName, $node->attribs );
1285  $afe->replace( $node, $newelt );
1286  $this->replaceAt( $nodeindex, $newelt );
1287  $node = $newelt;
1288 
1289  // If last node is the furthest block, then move the
1290  // aforementioned bookmark to be immediately after the
1291  // new node in the list of active formatting elements.
1292  if ( $lastnode === $furthestblock ) {
1293  $afe->remove( $BOOKMARK );
1294  $afe->insertAfter( $newelt, $BOOKMARK );
1295  }
1296 
1297  // Insert last node into node, first removing it from
1298  // its previous parent node if any.
1299  $node->appendChild( $lastnode );
1300 
1301  // Let last node be node.
1302  $lastnode = $node;
1303  }
1304 
1305  // If the common ancestor node is a table, tbody, tfoot,
1306  // thead, or tr element, then, foster parent whatever last
1307  // node ended up being in the previous step, first removing
1308  // it from its previous parent node if any.
1309  if (
1310  $this->fosterParentMode &&
1311  $ancestor->isA( BalanceSets::$tableSectionRowSet )
1312  ) {
1313  $this->fosterParent( $lastnode );
1314  } else {
1315  // Otherwise, append whatever last node ended up being in
1316  // the previous step to the common ancestor node, first
1317  // removing it from its previous parent node if any.
1318  $ancestor->appendChild( $lastnode );
1319  }
1320 
1321  // Create an element for the token for which the
1322  // formatting element was created, with furthest block
1323  // as the intended parent.
1324  $newelt2 = new BalanceElement(
1325  $fmtelt->namespaceURI, $fmtelt->localName, $fmtelt->attribs );
1326 
1327  // Take all of the child nodes of the furthest block and
1328  // append them to the element created in the last step.
1329  $newelt2->adoptChildren( $furthestblock );
1330 
1331  // Append that new element to the furthest block.
1332  $furthestblock->appendChild( $newelt2 );
1333 
1334  // Remove the formatting element from the list of active
1335  // formatting elements, and insert the new element into the
1336  // list of active formatting elements at the position of
1337  // the aforementioned bookmark.
1338  $afe->remove( $fmtelt );
1339  $afe->replace( $BOOKMARK, $newelt2 );
1340 
1341  // Remove the formatting element from the stack of open
1342  // elements, and insert the new element into the stack of
1343  // open elements immediately below the position of the
1344  // furthest block in that stack.
1345  $this->removeElement( $fmtelt );
1346  $this->insertAfter( $furthestblock, $newelt2 );
1347  }
1348  }
1349 
1350  return true;
1351  }
1352 
1358  public function __toString() {
1359  $r = [];
1360  foreach ( $this->elements as $elt ) {
1361  array_push( $r, $elt->localName );
1362  }
1363  return implode( $r, ' ' );
1364  }
1365 }
1366 
1374  public $nextAFE;
1375  public $prevAFE;
1376 }
1377 
1389  private $tail;
1390 
1392  private $head;
1393 
1412  private $noahTableStack = [ [] ];
1413 
1414  public function __destruct() {
1415  for ( $node = $this->head; $node; $node = $next ) {
1416  $next = $node->nextAFE;
1417  $node->prevAFE = $node->nextAFE = $node->nextNoah = null;
1418  }
1419  $this->head = $this->tail = $this->noahTableStack = null;
1420  }
1421 
1422  public function insertMarker() {
1423  $elt = new BalanceMarker;
1424  if ( $this->tail ) {
1425  $this->tail->nextAFE = $elt;
1426  $elt->prevAFE = $this->tail;
1427  } else {
1428  $this->head = $elt;
1429  }
1430  $this->tail = $elt;
1431  $this->noahTableStack[] = [];
1432  }
1433 
1439  public function push( BalanceElement $elt ) {
1440  // Must not be in the list already
1441  if ( $elt->prevAFE !== null || $this->head === $elt ) {
1442  throw new ParameterAssertionException( '$elt',
1443  'Cannot insert a node into the AFE list twice' );
1444  }
1445 
1446  // "Noah's Ark clause" -- if there are already three copies of
1447  // this element before we encounter a marker, then drop the last
1448  // one.
1449  $noahKey = $elt->getNoahKey();
1450  $table =& $this->noahTableStack[ count( $this->noahTableStack ) - 1 ];
1451  if ( !isset( $table[$noahKey] ) ) {
1452  $table[$noahKey] = $elt;
1453  } else {
1454  $count = 1;
1455  $head = $tail = $table[$noahKey];
1456  while ( $tail->nextNoah ) {
1457  $tail = $tail->nextNoah;
1458  $count++;
1459  }
1460  if ( $count >= 3 ) {
1461  $this->remove( $head );
1462  }
1463  $tail->nextNoah = $elt;
1464  }
1465  // Add to the main AFE list
1466  if ( $this->tail ) {
1467  $this->tail->nextAFE = $elt;
1468  $elt->prevAFE = $this->tail;
1469  } else {
1470  $this->head = $elt;
1471  }
1472  $this->tail = $elt;
1473  }
1474 
1479  public function clearToMarker() {
1480  // Iterate back through the list starting from the tail
1481  $tail = $this->tail;
1482  while ( $tail && !( $tail instanceof BalanceMarker ) ) {
1483  // Unlink the element
1484  $prev = $tail->prevAFE;
1485  $tail->prevAFE = null;
1486  if ( $prev ) {
1487  $prev->nextAFE = null;
1488  }
1489  $tail->nextNoah = null;
1490  $tail = $prev;
1491  }
1492  // If we finished on a marker, unlink it and pop it off the Noah table stack
1493  if ( $tail ) {
1494  $prev = $tail->prevAFE;
1495  if ( $prev ) {
1496  $prev->nextAFE = null;
1497  }
1498  $tail = $prev;
1499  array_pop( $this->noahTableStack );
1500  } else {
1501  // No marker: wipe the top-level Noah table (which is the only one)
1502  $this->noahTableStack[0] = [];
1503  }
1504  // If we removed all the elements, clear the head pointer
1505  if ( !$tail ) {
1506  $this->head = null;
1507  }
1508  $this->tail = $tail;
1509  }
1510 
1516  public function findElementByTag( $tag ) {
1517  $elt = $this->tail;
1518  while ( $elt && !( $elt instanceof BalanceMarker ) ) {
1519  if ( $elt->localName === $tag ) {
1520  return $elt;
1521  }
1522  $elt = $elt->prevAFE;
1523  }
1524  return null;
1525  }
1526 
1531  public function isInList( BalanceElement $elt ) {
1532  return $this->head === $elt || $elt->prevAFE;
1533  }
1534 
1539  public function remove( BalanceElement $elt ) {
1540  if ( $this->head !== $elt && !$elt->prevAFE ) {
1541  throw new ParameterAssertionException( '$elt',
1542  "Attempted to remove an element which is not in the AFE list" );
1543  }
1544  // Update head and tail pointers
1545  if ( $this->head === $elt ) {
1546  $this->head = $elt->nextAFE;
1547  }
1548  if ( $this->tail === $elt ) {
1549  $this->tail = $elt->prevAFE;
1550  }
1551  // Update previous element
1552  if ( $elt->prevAFE ) {
1553  $elt->prevAFE->nextAFE = $elt->nextAFE;
1554  }
1555  // Update next element
1556  if ( $elt->nextAFE ) {
1557  $elt->nextAFE->prevAFE = $elt->prevAFE;
1558  }
1559  // Clear pointers so that isInList() etc. will work
1560  $elt->prevAFE = $elt->nextAFE = null;
1561  // Update Noah list
1562  $this->removeFromNoahList( $elt );
1563  }
1564 
1565  private function addToNoahList( BalanceElement $elt ) {
1566  $noahKey = $elt->getNoahKey();
1567  $table =& $this->noahTableStack[ count( $this->noahTableStack ) - 1 ];
1568  if ( !isset( $table[$noahKey] ) ) {
1569  $table[$noahKey] = $elt;
1570  } else {
1571  $tail = $table[$noahKey];
1572  while ( $tail->nextNoah ) {
1573  $tail = $tail->nextNoah;
1574  }
1575  $tail->nextNoah = $elt;
1576  }
1577  }
1578 
1579  private function removeFromNoahList( BalanceElement $elt ) {
1580  $table =& $this->noahTableStack[ count( $this->noahTableStack ) - 1 ];
1581  $key = $elt->getNoahKey();
1582  $noahElt = $table[$key];
1583  if ( $noahElt === $elt ) {
1584  if ( $noahElt->nextNoah ) {
1585  $table[$key] = $noahElt->nextNoah;
1586  $noahElt->nextNoah = null;
1587  } else {
1588  unset( $table[$key] );
1589  }
1590  } else {
1591  do {
1592  $prevNoahElt = $noahElt;
1593  $noahElt = $prevNoahElt->nextNoah;
1594  if ( $noahElt === $elt ) {
1595  // Found it, unlink
1596  $prevNoahElt->nextNoah = $elt->nextNoah;
1597  $elt->nextNoah = null;
1598  break;
1599  }
1600  } while ( $noahElt );
1601  }
1602  }
1603 
1607  public function replace( BalanceElement $a, BalanceElement $b ) {
1608  if ( $this->head !== $a && !$a->prevAFE ) {
1609  throw new ParameterAssertionException( '$a',
1610  "Attempted to replace an element which is not in the AFE list" );
1611  }
1612  // Update head and tail pointers
1613  if ( $this->head === $a ) {
1614  $this->head = $b;
1615  }
1616  if ( $this->tail === $a ) {
1617  $this->tail = $b;
1618  }
1619  // Update previous element
1620  if ( $a->prevAFE ) {
1621  $a->prevAFE->nextAFE = $b;
1622  }
1623  // Update next element
1624  if ( $a->nextAFE ) {
1625  $a->nextAFE->prevAFE = $b;
1626  }
1627  $b->prevAFE = $a->prevAFE;
1628  $b->nextAFE = $a->nextAFE;
1629  $a->nextAFE = $a->prevAFE = null;
1630  // Update Noah list
1631  $this->removeFromNoahList( $a );
1632  $this->addToNoahList( $b );
1633  }
1634 
1638  public function insertAfter( BalanceElement $a, BalanceElement $b ) {
1639  if ( $this->head !== $a && !$a->prevAFE ) {
1640  throw new ParameterAssertionException( '$a',
1641  "Attempted to insert after an element which is not in the AFE list" );
1642  }
1643  if ( $this->tail === $a ) {
1644  $this->tail = $b;
1645  }
1646  if ( $a->nextAFE ) {
1647  $a->nextAFE->prevAFE = $b;
1648  }
1649  $b->nextAFE = $a->nextAFE;
1650  $b->prevAFE = $a;
1651  $a->nextAFE = $b;
1652  $this->addToNoahList( $b );
1653  }
1654 
1655  // @codingStandardsIgnoreStart Generic.Files.LineLength.TooLong
1661  // @codingStandardsIgnoreEnd
1662  public function reconstruct( $stack ) {
1663  $entry = $this->tail;
1664  // If there are no entries in the list of active formatting elements,
1665  // then there is nothing to reconstruct
1666  if ( !$entry ) {
1667  return;
1668  }
1669  // If the last is a marker, do nothing.
1670  if ( $entry instanceof BalanceMarker ) {
1671  return;
1672  }
1673  // Or if it is an open element, do nothing.
1674  if ( $stack->indexOf( $entry ) >= 0 ) {
1675  return;
1676  }
1677 
1678  // Loop backward through the list until we find a marker or an
1679  // open element
1680  $foundit = false;
1681  while ( $entry->prevAFE ) {
1682  $entry = $entry->prevAFE;
1683  if ( $entry instanceof BalanceMarker || $stack->indexOf( $entry ) >= 0 ) {
1684  $foundit = true;
1685  break;
1686  }
1687  }
1688 
1689  // Now loop forward, starting from the element after the current one (or
1690  // the first element if we didn't find a marker or open element),
1691  // recreating formatting elements and pushing them back onto the list
1692  // of open elements.
1693  if ( $foundit ) {
1694  $entry = $entry->nextAFE;
1695  }
1696  do {
1697  $newElement = $stack->insertHTMLElement(
1698  $entry->localName,
1699  $entry->attribs );
1700  $this->replace( $entry, $newElement );
1701  $entry = $newElement->nextAFE;
1702  } while ( $entry );
1703  }
1704 
1708  public function __toString() {
1709  $prev = null;
1710  $s = '';
1711  for ( $node = $this->head; $node; $prev = $node, $node = $node->nextAFE ) {
1712  if ( $node instanceof BalanceMarker ) {
1713  $s .= "MARKER\n";
1714  continue;
1715  }
1716  $s .= $node->localName . '#' . substr( md5( spl_object_hash( $node ) ), 0, 8 );
1717  if ( $node->nextNoah ) {
1718  $s .= " (noah sibling: {$node->nextNoah->localName}#" .
1719  substr( md5( spl_object_hash( $node->nextNoah ) ), 0, 8 ) .
1720  ')';
1721  }
1722  if ( $node->nextAFE && $node->nextAFE->prevAFE !== $node ) {
1723  $s .= " (reverse link is wrong!)";
1724  }
1725  $s .= "\n";
1726  }
1727  if ( $prev !== $this->tail ) {
1728  $s .= "(tail pointer is wrong!)\n";
1729  }
1730  return $s;
1731  }
1732 }
1733 
1785 class Balancer {
1786  private $parseMode;
1787  private $bitsIterator;
1789  private $afe;
1790  private $stack;
1791  private $strict;
1792  private $tidyCompat;
1794 
1801  private $inRCDATA;
1802  private $inRAWTEXT;
1803 
1808  const VALID_COMMENT_REGEX = "~ !--
1809  ( # 1. Comment match detector
1810  > | -> | # Invalid short close
1811  ( # 2. Comment contents
1812  (?:
1813  (?! --> )
1814  (?! --!> )
1815  (?! --! \z )
1816  (?! -- \z )
1817  (?! - \z )
1818  .
1819  )*+
1820  )
1821  ( # 3. Comment close
1822  --> | # Normal close
1823  --!> | # Comment end bang
1824  ( # 4. Indicate matches requiring EOF
1825  --! | # EOF in comment end bang state
1826  -- | # EOF in comment end state
1827  - | # EOF in comment end dash state
1828  # EOF in comment state
1829  )
1830  )
1831  )
1832  ([^<]*) \z # 5. Non-tag text after the comment
1833  ~xs";
1834 
1859  public function __construct( array $config = [] ) {
1860  $config = $config + [
1861  'strict' => false,
1862  'allowedHtmlElements' => null,
1863  'tidyCompat' => false,
1864  'allowComments' => true,
1865  ];
1866  $this->allowedHtmlElements = $config['allowedHtmlElements'];
1867  $this->strict = $config['strict'];
1868  $this->tidyCompat = $config['tidyCompat'];
1869  $this->allowComments = $config['allowComments'];
1870  if ( $this->allowedHtmlElements !== null ) {
1871  # Sanity check!
1872  $bad = array_uintersect_assoc(
1873  $this->allowedHtmlElements,
1875  function( $a, $b ) {
1876  // Ignore the values (just intersect the keys) by saying
1877  // all values are equal to each other.
1878  return 0;
1879  }
1880  );
1881  if ( count( $bad ) > 0 ) {
1882  $badstr = implode( array_keys( $bad ), ',' );
1883  throw new ParameterAssertionException(
1884  '$config',
1885  'Balance attempted with sanitization including ' .
1886  "unsupported elements: {$badstr}"
1887  );
1888  }
1889  }
1890  }
1891 
1904  public function balance( $text, $processingCallback = null, $processingArgs = [] ) {
1905  $this->parseMode = 'inBodyMode';
1906  $this->bitsIterator = new ExplodeIterator( '<', $text );
1907  $this->afe = new BalanceActiveFormattingElements();
1908  $this->stack = new BalanceStack();
1909  $this->stack->tidyCompat = $this->tidyCompat;
1910  $this->processingCallback = $processingCallback;
1911  $this->processingArgs = $processingArgs;
1912 
1913  $this->textIntegrationMode =
1914  $this->ignoreLinefeed =
1915  $this->inRCDATA =
1916  $this->inRAWTEXT = false;
1917 
1918  # The stack is constructed with an <html> element already on it.
1919  # Set this up as a fragment parsed with <body> as the context.
1920  $this->fragmentContext =
1921  new BalanceElement( BalanceSets::HTML_NAMESPACE, 'body', [] );
1922  $this->resetInsertionMode();
1923  $this->formElementPointer = null;
1924  for ( $e = $this->fragmentContext; $e != null; $e = $e->parent ) {
1925  if ( $e->isHtmlNamed( 'form' ) ) {
1926  $this->formElementPointer = $e;
1927  break;
1928  }
1929  }
1930 
1931  // First element is text not tag
1932  $x = $this->bitsIterator->current();
1933  $this->bitsIterator->next();
1934  $this->insertToken( 'text', str_replace( '>', '&gt;', $x ) );
1935  // Now process each tag.
1936  while ( $this->bitsIterator->valid() ) {
1937  $this->advance();
1938  }
1939  $this->insertToken( 'eof', null );
1940  $result = $this->stack->getOutput();
1941  // Free memory before returning.
1942  $this->bitsIterator = null;
1943  $this->afe = null;
1944  $this->stack = null;
1945  $this->fragmentContext = null;
1946  $this->formElementPointer = null;
1947  return $result;
1948  }
1949 
1954  private function insertToken( $token, $value, $attribs = null, $selfclose = false ) {
1955  // validate tags against $unsupportedSet
1956  if ( $token === 'tag' || $token === 'endtag' ) {
1958  # As described in "simplifications" above, these tags are
1959  # not supported in the balancer.
1960  Assert::invariant(
1961  !$this->strict,
1962  "Unsupported $token <$value> found."
1963  );
1964  return false;
1965  }
1966  } elseif ( $token === 'text' && $value === '' ) {
1967  # Don't actually inject the empty string as a text token.
1968  return true;
1969  }
1970  // Support pre/listing/textarea by suppressing initial linefeed
1971  if ( $this->ignoreLinefeed ) {
1972  $this->ignoreLinefeed = false;
1973  if ( $token === 'text' ) {
1974  if ( $value[0] === "\n" ) {
1975  if ( $value === "\n" ) {
1976  # Nothing would be left, don't inject the empty string.
1977  return true;
1978  }
1979  $value = substr( $value, 1 );
1980  }
1981  }
1982  }
1983  // Some hoops we have to jump through
1984  $adjusted = $this->stack->adjustedCurrentNode( $this->fragmentContext );
1985 
1986  $isForeign = true;
1987  if (
1988  $this->stack->length() === 0 ||
1989  $adjusted->isHtml() ||
1990  $token === 'eof'
1991  ) {
1992  $isForeign = false;
1993  } elseif ( $adjusted->isMathmlTextIntegrationPoint() ) {
1994  if ( $token === 'text' ) {
1995  $isForeign = false;
1996  } elseif (
1997  $token === 'tag' &&
1998  $value !== 'mglyph' && $value !== 'malignmark'
1999  ) {
2000  $isForeign = false;
2001  }
2002  } elseif (
2003  $adjusted->namespaceURI === BalanceSets::MATHML_NAMESPACE &&
2004  $adjusted->localName === 'annotation-xml' &&
2005  $token === 'tag' && $value === 'svg'
2006  ) {
2007  $isForeign = false;
2008  } elseif (
2009  $adjusted->isHtmlIntegrationPoint() &&
2010  ( $token === 'tag' || $token === 'text' )
2011  ) {
2012  $isForeign = false;
2013  }
2014  if ( $isForeign ) {
2015  return $this->insertForeignToken( $token, $value, $attribs, $selfclose );
2016  } else {
2017  $func = $this->parseMode;
2018  return $this->$func( $token, $value, $attribs, $selfclose );
2019  }
2020  }
2021 
2022  private function insertForeignToken( $token, $value, $attribs = null, $selfclose = false ) {
2023  if ( $token === 'text' ) {
2024  $this->stack->insertText( $value );
2025  return true;
2026  } elseif ( $token === 'tag' ) {
2027  switch ( $value ) {
2028  case 'font':
2029  if ( isset( $attribs['color'] )
2030  || isset( $attribs['face'] )
2031  || isset( $attribs['size'] )
2032  ) {
2033  break;
2034  }
2035  /* otherwise, fall through */
2036  case 'b':
2037  case 'big':
2038  case 'blockquote':
2039  case 'body':
2040  case 'br':
2041  case 'center':
2042  case 'code':
2043  case 'dd':
2044  case 'div':
2045  case 'dl':
2046  case 'dt':
2047  case 'em':
2048  case 'embed':
2049  case 'h1':
2050  case 'h2':
2051  case 'h3':
2052  case 'h4':
2053  case 'h5':
2054  case 'h6':
2055  case 'head':
2056  case 'hr':
2057  case 'i':
2058  case 'img':
2059  case 'li':
2060  case 'listing':
2061  case 'menu':
2062  case 'meta':
2063  case 'nobr':
2064  case 'ol':
2065  case 'p':
2066  case 'pre':
2067  case 'ruby':
2068  case 's':
2069  case 'small':
2070  case 'span':
2071  case 'strong':
2072  case 'strike':
2073  case 'sub':
2074  case 'sup':
2075  case 'table':
2076  case 'tt':
2077  case 'u':
2078  case 'ul':
2079  case 'var':
2080  if ( $this->fragmentContext ) {
2081  break;
2082  }
2083  while ( true ) {
2084  $this->stack->pop();
2085  $node = $this->stack->currentNode;
2086  if (
2087  $node->isMathmlTextIntegrationPoint() ||
2088  $node->isHtmlIntegrationPoint() ||
2089  $node->isHtml()
2090  ) {
2091  break;
2092  }
2093  }
2094  return $this->insertToken( $token, $value, $attribs, $selfclose );
2095  }
2096  // "Any other start tag"
2097  $adjusted = ( $this->fragmentContext && $this->stack->length()===1 ) ?
2098  $this->fragmentContext : $this->stack->currentNode;
2099  $this->stack->insertForeignElement(
2100  $adjusted->namespaceURI, $value, $attribs
2101  );
2102  if ( $selfclose ) {
2103  $this->stack->pop();
2104  }
2105  return true;
2106  } elseif ( $token === 'endtag' ) {
2107  $first = true;
2108  foreach ( $this->stack as $i => $node ) {
2109  if ( $node->isHtml() && !$first ) {
2110  // process the end tag as HTML
2111  $func = $this->parseMode;
2112  return $this->$func( $token, $value, $attribs, $selfclose );
2113  } elseif ( $i === 0 ) {
2114  return true;
2115  } elseif ( $node->localName === $value ) {
2116  $this->stack->popTag( $node );
2117  return true;
2118  }
2119  $first = false;
2120  }
2121  }
2122  }
2123 
2128  private function advance() {
2129  $x = $this->bitsIterator->current();
2130  $this->bitsIterator->next();
2131  $regs = [];
2132  # Handle comments. These won't be generated by mediawiki (they
2133  # are stripped in the Sanitizer) but may be generated by extensions.
2134  if (
2135  $this->allowComments &&
2136  !( $this->inRCDATA || $this->inRAWTEXT ) &&
2137  preg_match( Balancer::VALID_COMMENT_REGEX, $x, $regs, PREG_OFFSET_CAPTURE ) &&
2138  /* verify EOF condition where necessary */
2139  ( $regs[4][1] < 0 || !$this->bitsIterator->valid() )
2140  ) {
2141  $contents = $regs[2][0];
2142  $rest = $regs[5][0];
2143  $this->insertToken( 'comment', $contents );
2144  $this->insertToken( 'text', str_replace( '>', '&gt;', $rest ) );
2145  return;
2146  }
2147  # $slash: Does the current element start with a '/'?
2148  # $t: Current element name
2149  # $attribStr: String between element name and >
2150  # $brace: Ending '>' or '/>'
2151  # $rest: Everything until the next element from the $bitsIterator
2152  if ( preg_match( Sanitizer::ELEMENT_BITS_REGEX, $x, $regs ) ) {
2153  list( /* $qbar */, $slash, $t, $attribStr, $brace, $rest ) = $regs;
2154  $t = strtolower( $t );
2155  if ( $this->strict ) {
2156  /* Verify that attributes are all properly double-quoted */
2157  Assert::invariant(
2158  preg_match(
2159  '/^( [:_A-Z0-9][-.:_A-Z0-9]*="[^"]*")*[ ]*$/i', $attribStr
2160  ),
2161  "Bad attribute string found"
2162  );
2163  }
2164  } else {
2165  Assert::invariant(
2166  !$this->strict, "< found which does not start a valid tag"
2167  );
2168  $slash = $t = $attribStr = $brace = $rest = null;
2169  }
2170  $goodtag = $t;
2171  if ( $this->inRCDATA ) {
2172  if ( $slash && $t === $this->inRCDATA ) {
2173  $this->inRCDATA = false;
2174  } else {
2175  // No tags allowed; this emulates the "rcdata" tokenizer mode.
2176  $goodtag = false;
2177  }
2178  }
2179  if ( $this->inRAWTEXT ) {
2180  if ( $slash && $t === $this->inRAWTEXT ) {
2181  $this->inRAWTEXT = false;
2182  } else {
2183  // No tags allowed, no entity-escaping done.
2184  $goodtag = false;
2185  }
2186  }
2187  $sanitize = $this->allowedHtmlElements !== null;
2188  if ( $sanitize ) {
2189  $goodtag = $t && isset( $this->allowedHtmlElements[$t] );
2190  }
2191  if ( $goodtag ) {
2192  if ( is_callable( $this->processingCallback ) ) {
2193  call_user_func_array( $this->processingCallback, [ &$attribStr, $this->processingArgs ] );
2194  }
2195  if ( $sanitize ) {
2196  $goodtag = Sanitizer::validateTag( $attribStr, $t );
2197  }
2198  }
2199  if ( $goodtag ) {
2200  if ( $sanitize ) {
2201  $attribs = Sanitizer::decodeTagAttributes( $attribStr );
2203  } else {
2204  $attribs = Sanitizer::decodeTagAttributes( $attribStr );
2205  }
2206  $goodtag = $this->insertToken(
2207  $slash ? 'endtag' : 'tag', $t, $attribs, $brace === '/>'
2208  );
2209  }
2210  if ( $goodtag ) {
2211  $rest = str_replace( '>', '&gt;', $rest );
2212  $this->insertToken( 'text', str_replace( '>', '&gt;', $rest ) );
2213  } elseif ( $this->inRAWTEXT ) {
2214  $this->insertToken( 'text', "<$x" );
2215  } else {
2216  # bad tag; serialize entire thing as text.
2217  $this->insertToken( 'text', '&lt;' . str_replace( '>', '&gt;', $x ) );
2218  }
2219  }
2220 
2221  private function switchMode( $mode ) {
2222  Assert::parameter(
2223  substr( $mode, -4 )==='Mode', '$mode', 'should end in Mode'
2224  );
2225  $oldMode = $this->parseMode;
2226  $this->parseMode = $mode;
2227  return $oldMode;
2228  }
2229 
2230  private function switchModeAndReprocess( $mode, $token, $value, $attribs, $selfclose ) {
2231  $this->switchMode( $mode );
2232  return $this->insertToken( $token, $value, $attribs, $selfclose );
2233  }
2234 
2235  private function resetInsertionMode() {
2236  $last = false;
2237  foreach ( $this->stack as $i => $node ) {
2238  if ( $i === 0 ) {
2239  $last = true;
2240  if ( $this->fragmentContext ) {
2241  $node = $this->fragmentContext;
2242  }
2243  }
2244  if ( $node->isHtml() ) {
2245  switch ( $node->localName ) {
2246  case 'select':
2247  $stacklen = $this->stack->length();
2248  for ( $j = $i + 1; $j < $stacklen-1; $j++ ) {
2249  $ancestor = $this->stack->node( $stacklen-$j-1 );
2250  if ( $ancestor->isHtmlNamed( 'template' ) ) {
2251  break;
2252  }
2253  if ( $ancestor->isHtmlNamed( 'table' ) ) {
2254  $this->switchMode( 'inSelectInTableMode' );
2255  return;
2256  }
2257  }
2258  $this->switchMode( 'inSelectMode' );
2259  return;
2260  case 'tr':
2261  $this->switchMode( 'inRowMode' );
2262  return;
2263  case 'tbody':
2264  case 'tfoot':
2265  case 'thead':
2266  $this->switchMode( 'inTableBodyMode' );
2267  return;
2268  case 'caption':
2269  $this->switchMode( 'inCaptionMode' );
2270  return;
2271  case 'colgroup':
2272  $this->switchMode( 'inColumnGroupMode' );
2273  return;
2274  case 'table':
2275  $this->switchMode( 'inTableMode' );
2276  return;
2277  case 'template':
2278  $this->switchMode(
2279  array_slice( $this->templateInsertionModes, -1 )[0]
2280  );
2281  return;
2282  case 'body':
2283  $this->switchMode( 'inBodyMode' );
2284  return;
2285  # OMITTED: <frameset>
2286  # OMITTED: <html>
2287  # OMITTED: <head>
2288  default:
2289  if ( !$last ) {
2290  # OMITTED: <head>
2291  if ( $node->isA( BalanceSets::$tableCellSet ) ) {
2292  $this->switchMode( 'inCellMode' );
2293  return;
2294  }
2295  }
2296  }
2297  }
2298  if ( $last ) {
2299  $this->switchMode( 'inBodyMode' );
2300  return;
2301  }
2302  }
2303  }
2304 
2305  private function stopParsing() {
2306  # Most of the spec methods are inapplicable, other than step 2:
2307  # "pop all the nodes off the stack of open elements".
2308  # We're going to keep the top-most <html> element on the stack, though.
2309 
2310  # Clear the AFE list first, otherwise the element objects will stay live
2311  # during serialization, potentially using O(N^2) memory. Note that
2312  # popping the stack will never result in reconstructing the active
2313  # formatting elements.
2314  $this->afe = null;
2315  $this->stack->popTo( 1 );
2316  }
2317 
2318  private function parseRawText( $value, $attribs = null ) {
2319  $this->stack->insertHTMLElement( $value, $attribs );
2320  $this->inRAWTEXT = $value;
2321  $this->originalInsertionMode = $this->switchMode( 'inTextMode' );
2322  return true;
2323  }
2324 
2325  private function inTextMode( $token, $value, $attribs = null, $selfclose = false ) {
2326  if ( $token === 'text' ) {
2327  $this->stack->insertText( $value );
2328  return true;
2329  } elseif ( $token === 'eof' ) {
2330  $this->stack->pop();
2331  return $this->switchModeAndReprocess(
2332  $this->originalInsertionMode, $token, $value, $attribs, $selfclose
2333  );
2334  } elseif ( $token === 'endtag' ) {
2335  $this->stack->pop();
2336  $this->switchMode( $this->originalInsertionMode );
2337  return true;
2338  }
2339  return true;
2340  }
2341 
2342  private function inHeadMode( $token, $value, $attribs = null, $selfclose = false ) {
2343  if ( $token === 'text' ) {
2344  if ( preg_match( '/^[\x09\x0A\x0C\x0D\x20]+/', $value, $matches ) ) {
2345  $this->stack->insertText( $matches[0] );
2346  $value = substr( $value, strlen( $matches[0] ) );
2347  }
2348  if ( strlen( $value ) === 0 ) {
2349  return true; // All text handled.
2350  }
2351  // Fall through to handle non-whitespace below.
2352  } elseif ( $token === 'tag' ) {
2353  switch ( $value ) {
2354  case 'meta':
2355  # OMITTED: in a full HTML parser, this might change the encoding.
2356  /* falls through */
2357  # OMITTED: <html>
2358  case 'base':
2359  case 'basefont':
2360  case 'bgsound':
2361  case 'link':
2362  $this->stack->insertHTMLElement( $value, $attribs );
2363  $this->stack->pop();
2364  return true;
2365  # OMITTED: <title>
2366  # OMITTED: <noscript>
2367  case 'noframes':
2368  case 'style':
2369  return $this->parseRawText( $value, $attribs );
2370  # OMITTED: <script>
2371  case 'template':
2372  $this->stack->insertHTMLElement( $value, $attribs );
2373  $this->afe->insertMarker();
2374  # OMITTED: frameset_ok
2375  $this->switchMode( 'inTemplateMode' );
2376  $this->templateInsertionModes[] = $this->parseMode;
2377  return true;
2378  # OMITTED: <head>
2379  }
2380  } elseif ( $token === 'endtag' ) {
2381  switch ( $value ) {
2382  # OMITTED: <head>
2383  # OMITTED: <body>
2384  # OMITTED: <html>
2385  case 'br':
2386  break; // handle at the bottom of the function
2387  case 'template':
2388  if ( $this->stack->indexOf( $value ) < 0 ) {
2389  return true; // Ignore the token.
2390  }
2391  $this->stack->generateImpliedEndTags( null, true /* thorough */ );
2392  $this->stack->popTag( $value );
2393  $this->afe->clearToMarker();
2394  array_pop( $this->templateInsertionModes );
2395  $this->resetInsertionMode();
2396  return true;
2397  default:
2398  // ignore any other end tag
2399  return true;
2400  }
2401  } elseif ( $token === 'comment' ) {
2402  $this->stack->insertComment( $value );
2403  return true;
2404  }
2405 
2406  // If not handled above
2407  $this->inHeadMode( 'endtag', 'head' ); // synthetic </head>
2408  // Then redo this one
2409  return $this->insertToken( $token, $value, $attribs, $selfclose );
2410  }
2411 
2412  private function inBodyMode( $token, $value, $attribs = null, $selfclose = false ) {
2413  if ( $token === 'text' ) {
2414  $this->afe->reconstruct( $this->stack );
2415  $this->stack->insertText( $value );
2416  return true;
2417  } elseif ( $token === 'eof' ) {
2418  if ( !empty( $this->templateInsertionModes ) ) {
2419  return $this->inTemplateMode( $token, $value, $attribs, $selfclose );
2420  }
2421  $this->stopParsing();
2422  return true;
2423  } elseif ( $token === 'tag' ) {
2424  switch ( $value ) {
2425  # OMITTED: <html>
2426  case 'base':
2427  case 'basefont':
2428  case 'bgsound':
2429  case 'link':
2430  case 'meta':
2431  case 'noframes':
2432  # OMITTED: <script>
2433  case 'style':
2434  case 'template':
2435  # OMITTED: <title>
2436  return $this->inHeadMode( $token, $value, $attribs, $selfclose );
2437  # OMITTED: <body>
2438  # OMITTED: <frameset>
2439 
2440  case 'address':
2441  case 'article':
2442  case 'aside':
2443  case 'blockquote':
2444  case 'center':
2445  case 'details':
2446  case 'dialog':
2447  case 'dir':
2448  case 'div':
2449  case 'dl':
2450  case 'fieldset':
2451  case 'figcaption':
2452  case 'figure':
2453  case 'footer':
2454  case 'header':
2455  case 'hgroup':
2456  case 'main':
2457  case 'menu':
2458  case 'nav':
2459  case 'ol':
2460  case 'p':
2461  case 'section':
2462  case 'summary':
2463  case 'ul':
2464  if ( $this->stack->inButtonScope( 'p' ) ) {
2465  $this->inBodyMode( 'endtag', 'p' );
2466  }
2467  $this->stack->insertHTMLElement( $value, $attribs );
2468  return true;
2469 
2470  case 'h1':
2471  case 'h2':
2472  case 'h3':
2473  case 'h4':
2474  case 'h5':
2475  case 'h6':
2476  if ( $this->stack->inButtonScope( 'p' ) ) {
2477  $this->inBodyMode( 'endtag', 'p' );
2478  }
2479  if ( $this->stack->currentNode->isA( BalanceSets::$headingSet ) ) {
2480  $this->stack->pop();
2481  }
2482  $this->stack->insertHTMLElement( $value, $attribs );
2483  return true;
2484 
2485  case 'pre':
2486  case 'listing':
2487  if ( $this->stack->inButtonScope( 'p' ) ) {
2488  $this->inBodyMode( 'endtag', 'p' );
2489  }
2490  $this->stack->insertHTMLElement( $value, $attribs );
2491  $this->ignoreLinefeed = true;
2492  # OMITTED: frameset_ok
2493  return true;
2494 
2495  case 'form':
2496  if (
2497  $this->formElementPointer &&
2498  $this->stack->indexOf( 'template' ) < 0
2499  ) {
2500  return true; // in a form, not in a template.
2501  }
2502  if ( $this->stack->inButtonScope( "p" ) ) {
2503  $this->inBodyMode( 'endtag', 'p' );
2504  }
2505  $elt = $this->stack->insertHTMLElement( $value, $attribs );
2506  if ( $this->stack->indexOf( 'template' ) < 0 ) {
2507  $this->formElementPointer = $elt;
2508  }
2509  return true;
2510 
2511  case 'li':
2512  # OMITTED: frameset_ok
2513  foreach ( $this->stack as $node ) {
2514  if ( $node->isHtmlNamed( 'li' ) ) {
2515  $this->inBodyMode( 'endtag', 'li' );
2516  break;
2517  }
2518  if (
2519  $node->isA( BalanceSets::$specialSet ) &&
2520  !$node->isA( BalanceSets::$addressDivPSet )
2521  ) {
2522  break;
2523  }
2524  }
2525  if ( $this->stack->inButtonScope( 'p' ) ) {
2526  $this->inBodyMode( 'endtag', 'p' );
2527  }
2528  $this->stack->insertHTMLElement( $value, $attribs );
2529  return true;
2530 
2531  case 'dd':
2532  case 'dt':
2533  # OMITTED: frameset_ok
2534  foreach ( $this->stack as $node ) {
2535  if ( $node->isHtmlNamed( 'dd' ) ) {
2536  $this->inBodyMode( 'endtag', 'dd' );
2537  break;
2538  }
2539  if ( $node->isHtmlNamed( 'dt' ) ) {
2540  $this->inBodyMode( 'endtag', 'dt' );
2541  break;
2542  }
2543  if (
2544  $node->isA( BalanceSets::$specialSet ) &&
2545  !$node->isA( BalanceSets::$addressDivPSet )
2546  ) {
2547  break;
2548  }
2549  }
2550  if ( $this->stack->inButtonScope( 'p' ) ) {
2551  $this->inBodyMode( 'endtag', 'p' );
2552  }
2553  $this->stack->insertHTMLElement( $value, $attribs );
2554  return true;
2555 
2556  # OMITTED: <plaintext>
2557 
2558  case 'button':
2559  if ( $this->stack->inScope( 'button' ) ) {
2560  $this->inBodyMode( 'endtag', 'button' );
2561  return $this->insertToken( $token, $value, $attribs, $selfclose );
2562  }
2563  $this->afe->reconstruct( $this->stack );
2564  $this->stack->insertHTMLElement( $value, $attribs );
2565  return true;
2566 
2567  case 'a':
2568  $activeElement = $this->afe->findElementByTag( 'a' );
2569  if ( $activeElement ) {
2570  $this->inBodyMode( 'endtag', 'a' );
2571  if ( $this->afe->isInList( $activeElement ) ) {
2572  $this->afe->remove( $activeElement );
2573  // Don't flatten here, since when we fall
2574  // through below we might foster parent
2575  // the new <a> tag inside this one.
2576  $this->stack->removeElement( $activeElement, false );
2577  }
2578  }
2579  /* Falls through */
2580  case 'b':
2581  case 'big':
2582  case 'code':
2583  case 'em':
2584  case 'font':
2585  case 'i':
2586  case 's':
2587  case 'small':
2588  case 'strike':
2589  case 'strong':
2590  case 'tt':
2591  case 'u':
2592  $this->afe->reconstruct( $this->stack );
2593  $this->afe->push( $this->stack->insertHTMLElement( $value, $attribs ), $attribs );
2594  return true;
2595 
2596  case 'nobr':
2597  $this->afe->reconstruct( $this->stack );
2598  if ( $this->stack->inScope( 'nobr' ) ) {
2599  $this->inBodyMode( 'endtag', 'nobr' );
2600  $this->afe->reconstruct( $this->stack );
2601  }
2602  $this->afe->push( $this->stack->insertHTMLElement( $value, $attribs ), $attribs );
2603  return true;
2604 
2605  case 'applet':
2606  case 'marquee':
2607  case 'object':
2608  $this->afe->reconstruct( $this->stack );
2609  $this->stack->insertHTMLElement( $value, $attribs );
2610  $this->afe->insertMarker();
2611  # OMITTED: frameset_ok
2612  return true;
2613 
2614  case 'table':
2615  # The document is never in "quirks mode"; see simplifications
2616  # above.
2617  if ( $this->stack->inButtonScope( 'p' ) ) {
2618  $this->inBodyMode( 'endtag', 'p' );
2619  }
2620  $this->stack->insertHTMLElement( $value, $attribs );
2621  # OMITTED: frameset_ok
2622  $this->switchMode( 'inTableMode' );
2623  return true;
2624 
2625  case 'area':
2626  case 'br':
2627  case 'embed':
2628  case 'img':
2629  case 'keygen':
2630  case 'wbr':
2631  $this->afe->reconstruct( $this->stack );
2632  $this->stack->insertHTMLElement( $value, $attribs );
2633  $this->stack->pop();
2634  # OMITTED: frameset_ok
2635  return true;
2636 
2637  case 'input':
2638  $this->afe->reconstruct( $this->stack );
2639  $this->stack->insertHTMLElement( $value, $attribs );
2640  $this->stack->pop();
2641  # OMITTED: frameset_ok
2642  # (hence we don't need to examine the tag's "type" attribute)
2643  return true;
2644 
2645  case 'menuitem':
2646  case 'param':
2647  case 'source':
2648  case 'track':
2649  $this->stack->insertHTMLElement( $value, $attribs );
2650  $this->stack->pop();
2651  return true;
2652 
2653  case 'hr':
2654  if ( $this->stack->inButtonScope( 'p' ) ) {
2655  $this->inBodyMode( 'endtag', 'p' );
2656  }
2657  $this->stack->insertHTMLElement( $value, $attribs );
2658  $this->stack->pop();
2659  return true;
2660 
2661  case 'image':
2662  # warts!
2663  return $this->inBodyMode( $token, 'img', $attribs, $selfclose );
2664 
2665  # OMITTED: <isindex>
2666 
2667  case 'textarea':
2668  $this->stack->insertHTMLElement( $value, $attribs );
2669  $this->ignoreLinefeed = true;
2670  $this->inRCDATA = $value; // emulate rcdata tokenizer mode
2671  # OMITTED: frameset_ok
2672  return true;
2673 
2674  # OMITTED: <xmp>
2675  # OMITTED: <iframe>
2676  # OMITTED: <noembed>
2677  # OMITTED: <noscript>
2678 
2679  case 'select':
2680  $this->afe->reconstruct( $this->stack );
2681  $this->stack->insertHTMLElement( $value, $attribs );
2682  switch ( $this->parseMode ) {
2683  case 'inTableMode':
2684  case 'inCaptionMode':
2685  case 'inTableBodyMode':
2686  case 'inRowMode':
2687  case 'inCellMode':
2688  $this->switchMode( 'inSelectInTableMode' );
2689  return true;
2690  default:
2691  $this->switchMode( 'inSelectMode' );
2692  return true;
2693  }
2694 
2695  case 'optgroup':
2696  case 'option':
2697  if ( $this->stack->currentNode->isHtmlNamed( 'option' ) ) {
2698  $this->inBodyMode( 'endtag', 'option' );
2699  }
2700  $this->afe->reconstruct( $this->stack );
2701  $this->stack->insertHTMLElement( $value, $attribs );
2702  return true;
2703 
2704  case 'rb':
2705  case 'rtc':
2706  if ( $this->stack->inScope( 'ruby' ) ) {
2707  $this->stack->generateImpliedEndTags();
2708  }
2709  $this->stack->insertHTMLElement( $value, $attribs );
2710  return true;
2711 
2712  case 'rp':
2713  case 'rt':
2714  if ( $this->stack->inScope( 'ruby' ) ) {
2715  $this->stack->generateImpliedEndTags( 'rtc' );
2716  }
2717  $this->stack->insertHTMLElement( $value, $attribs );
2718  return true;
2719 
2720  case 'math':
2721  $this->afe->reconstruct( $this->stack );
2722  # We skip the spec's "adjust MathML attributes" and
2723  # "adjust foreign attributes" steps, since the browser will
2724  # do this later when it parses the output and it doesn't affect
2725  # balancing.
2726  $this->stack->insertForeignElement(
2728  );
2729  if ( $selfclose ) {
2730  # emit explicit </math> tag.
2731  $this->stack->pop();
2732  }
2733  return true;
2734 
2735  case 'svg':
2736  $this->afe->reconstruct( $this->stack );
2737  # We skip the spec's "adjust SVG attributes" and
2738  # "adjust foreign attributes" steps, since the browser will
2739  # do this later when it parses the output and it doesn't affect
2740  # balancing.
2741  $this->stack->insertForeignElement(
2743  );
2744  if ( $selfclose ) {
2745  # emit explicit </svg> tag.
2746  $this->stack->pop();
2747  }
2748  return true;
2749 
2750  case 'caption':
2751  case 'col':
2752  case 'colgroup':
2753  # OMITTED: <frame>
2754  case 'head':
2755  case 'tbody':
2756  case 'td':
2757  case 'tfoot':
2758  case 'th':
2759  case 'thead':
2760  case 'tr':
2761  // Ignore table tags if we're not inTableMode
2762  return true;
2763  }
2764 
2765  // Handle any other start tag here
2766  $this->afe->reconstruct( $this->stack );
2767  $this->stack->insertHTMLElement( $value, $attribs );
2768  return true;
2769  } elseif ( $token === 'endtag' ) {
2770  switch ( $value ) {
2771  # </body>,</html> are unsupported.
2772 
2773  case 'template':
2774  return $this->inHeadMode( $token, $value, $attribs, $selfclose );
2775 
2776  case 'address':
2777  case 'article':
2778  case 'aside':
2779  case 'blockquote':
2780  case 'button':
2781  case 'center':
2782  case 'details':
2783  case 'dialog':
2784  case 'dir':
2785  case 'div':
2786  case 'dl':
2787  case 'fieldset':
2788  case 'figcaption':
2789  case 'figure':
2790  case 'footer':
2791  case 'header':
2792  case 'hgroup':
2793  case 'listing':
2794  case 'main':
2795  case 'menu':
2796  case 'nav':
2797  case 'ol':
2798  case 'pre':
2799  case 'section':
2800  case 'summary':
2801  case 'ul':
2802  // Ignore if there is not a matching open tag
2803  if ( !$this->stack->inScope( $value ) ) {
2804  return true;
2805  }
2806  $this->stack->generateImpliedEndTags();
2807  $this->stack->popTag( $value );
2808  return true;
2809 
2810  case 'form':
2811  if ( $this->stack->indexOf( 'template' ) < 0 ) {
2812  $openform = $this->formElementPointer;
2813  $this->formElementPointer = null;
2814  if ( !$openform || !$this->stack->inScope( $openform ) ) {
2815  return true;
2816  }
2817  $this->stack->generateImpliedEndTags();
2818  // Don't flatten yet if we're removing a <form> element
2819  // out-of-order. (eg. `<form><div></form>`)
2820  $flatten = ( $this->stack->currentNode === $openform );
2821  $this->stack->removeElement( $openform, $flatten );
2822  } else {
2823  if ( !$this->stack->inScope( 'form' ) ) {
2824  return true;
2825  }
2826  $this->stack->generateImpliedEndTags();
2827  $this->stack->popTag( 'form' );
2828  }
2829  return true;
2830 
2831  case 'p':
2832  if ( !$this->stack->inButtonScope( 'p' ) ) {
2833  $this->inBodyMode( 'tag', 'p', [] );
2834  return $this->insertToken( $token, $value, $attribs, $selfclose );
2835  }
2836  $this->stack->generateImpliedEndTags( $value );
2837  $this->stack->popTag( $value );
2838  return true;
2839 
2840  case 'li':
2841  if ( !$this->stack->inListItemScope( $value ) ) {
2842  return true; # ignore
2843  }
2844  $this->stack->generateImpliedEndTags( $value );
2845  $this->stack->popTag( $value );
2846  return true;
2847 
2848  case 'dd':
2849  case 'dt':
2850  if ( !$this->stack->inScope( $value ) ) {
2851  return true; # ignore
2852  }
2853  $this->stack->generateImpliedEndTags( $value );
2854  $this->stack->popTag( $value );
2855  return true;
2856 
2857  case 'h1':
2858  case 'h2':
2859  case 'h3':
2860  case 'h4':
2861  case 'h5':
2862  case 'h6':
2863  if ( !$this->stack->inScope( BalanceSets::$headingSet ) ) {
2864  return true; # ignore
2865  }
2866  $this->stack->generateImpliedEndTags();
2867  $this->stack->popTag( BalanceSets::$headingSet );
2868  return true;
2869 
2870  case 'sarcasm':
2871  # Take a deep breath, then:
2872  break;
2873 
2874  case 'a':
2875  case 'b':
2876  case 'big':
2877  case 'code':
2878  case 'em':
2879  case 'font':
2880  case 'i':
2881  case 'nobr':
2882  case 's':
2883  case 'small':
2884  case 'strike':
2885  case 'strong':
2886  case 'tt':
2887  case 'u':
2888  if ( $this->stack->adoptionAgency( $value, $this->afe ) ) {
2889  return true; # If we did something, we're done.
2890  }
2891  break; # Go to the "any other end tag" case.
2892 
2893  case 'applet':
2894  case 'marquee':
2895  case 'object':
2896  if ( !$this->stack->inScope( $value ) ) {
2897  return true; # ignore
2898  }
2899  $this->stack->generateImpliedEndTags();
2900  $this->stack->popTag( $value );
2901  $this->afe->clearToMarker();
2902  return true;
2903 
2904  case 'br':
2905  # Turn </br> into <br>
2906  return $this->inBodyMode( 'tag', $value, [] );
2907  }
2908 
2909  // Any other end tag goes here
2910  foreach ( $this->stack as $i => $node ) {
2911  if ( $node->isHtmlNamed( $value ) ) {
2912  $this->stack->generateImpliedEndTags( $value );
2913  $this->stack->popTo( $i ); # including $i
2914  break;
2915  } elseif ( $node->isA( BalanceSets::$specialSet ) ) {
2916  return true; // ignore this close token.
2917  }
2918  }
2919  return true;
2920  } elseif ( $token === 'comment' ) {
2921  $this->stack->insertComment( $value );
2922  return true;
2923  } else {
2924  Assert::invariant( false, "Bad token type: $token" );
2925  }
2926  }
2927 
2928  private function inTableMode( $token, $value, $attribs = null, $selfclose = false ) {
2929  if ( $token === 'text' ) {
2930  if ( $this->textIntegrationMode ) {
2931  return $this->inBodyMode( $token, $value, $attribs, $selfclose );
2932  } elseif ( $this->stack->currentNode->isA( BalanceSets::$tableSectionRowSet ) ) {
2933  $this->pendingTableText = '';
2934  $this->originalInsertionMode = $this->parseMode;
2935  return $this->switchModeAndReprocess( 'inTableTextMode', $token, $value, $attribs, $selfclose );
2936  }
2937  // fall through to default case.
2938  } elseif ( $token === 'eof' ) {
2939  $this->stopParsing();
2940  return true;
2941  } elseif ( $token === 'tag' ) {
2942  switch ( $value ) {
2943  case 'caption':
2944  $this->afe->insertMarker();
2945  $this->stack->insertHTMLElement( $value, $attribs );
2946  $this->switchMode( 'inCaptionMode' );
2947  return true;
2948  case 'colgroup':
2949  $this->stack->clearToContext( BalanceSets::$tableContextSet );
2950  $this->stack->insertHTMLElement( $value, $attribs );
2951  $this->switchMode( 'inColumnGroupMode' );
2952  return true;
2953  case 'col':
2954  $this->inTableMode( 'tag', 'colgroup', [] );
2955  return $this->insertToken( $token, $value, $attribs, $selfclose );
2956  case 'tbody':
2957  case 'tfoot':
2958  case 'thead':
2959  $this->stack->clearToContext( BalanceSets::$tableContextSet );
2960  $this->stack->insertHTMLElement( $value, $attribs );
2961  $this->switchMode( 'inTableBodyMode' );
2962  return true;
2963  case 'td':
2964  case 'th':
2965  case 'tr':
2966  $this->inTableMode( 'tag', 'tbody', [] );
2967  return $this->insertToken( $token, $value, $attribs, $selfclose );
2968  case 'table':
2969  if ( !$this->stack->inTableScope( $value ) ) {
2970  return true; // Ignore this tag.
2971  }
2972  $this->inTableMode( 'endtag', $value );
2973  return $this->insertToken( $token, $value, $attribs, $selfclose );
2974 
2975  case 'style':
2976  # OMITTED: <script>
2977  case 'template':
2978  return $this->inHeadMode( $token, $value, $attribs, $selfclose );
2979 
2980  case 'input':
2981  if ( !isset( $attribs['type'] ) || strcasecmp( $attribs['type'], 'hidden' ) !== 0 ) {
2982  break; // Handle this as "everything else"
2983  }
2984  $this->stack->insertHTMLElement( $value, $attribs );
2985  $this->stack->pop();
2986  return true;
2987 
2988  case 'form':
2989  if (
2990  $this->formElementPointer ||
2991  $this->stack->indexOf( 'template' ) >= 0
2992  ) {
2993  return true; // ignore this token
2994  }
2995  $this->formElementPointer =
2996  $this->stack->insertHTMLElement( $value, $attribs );
2997  $this->stack->popTag( $this->formElementPointer );
2998  return true;
2999  }
3000  // Fall through for "anything else" clause.
3001  } elseif ( $token === 'endtag' ) {
3002  switch ( $value ) {
3003  case 'table':
3004  if ( !$this->stack->inTableScope( $value ) ) {
3005  return true; // Ignore.
3006  }
3007  $this->stack->popTag( $value );
3008  $this->resetInsertionMode();
3009  return true;
3010  # OMITTED: <body>
3011  case 'caption':
3012  case 'col':
3013  case 'colgroup':
3014  # OMITTED: <html>
3015  case 'tbody':
3016  case 'td':
3017  case 'tfoot':
3018  case 'th':
3019  case 'thead':
3020  case 'tr':
3021  return true; // Ignore the token.
3022  case 'template':
3023  return $this->inHeadMode( $token, $value, $attribs, $selfclose );
3024  }
3025  // Fall through for "anything else" clause.
3026  } elseif ( $token === 'comment' ) {
3027  $this->stack->insertComment( $value );
3028  return true;
3029  }
3030  // This is the "anything else" case:
3031  $this->stack->fosterParentMode = true;
3032  $this->inBodyMode( $token, $value, $attribs, $selfclose );
3033  $this->stack->fosterParentMode = false;
3034  return true;
3035  }
3036 
3037  private function inTableTextMode( $token, $value, $attribs = null, $selfclose = false ) {
3038  if ( $token === 'text' ) {
3039  $this->pendingTableText .= $value;
3040  return true;
3041  }
3042  // Non-text token:
3043  $text = $this->pendingTableText;
3044  $this->pendingTableText = '';
3045  if ( preg_match( '/[^\x09\x0A\x0C\x0D\x20]/', $text ) ) {
3046  // This should match the "anything else" case inTableMode
3047  $this->stack->fosterParentMode = true;
3048  $this->inBodyMode( 'text', $text );
3049  $this->stack->fosterParentMode = false;
3050  } else {
3051  // Pending text is just whitespace.
3052  $this->stack->insertText( $text );
3053  }
3054  return $this->switchModeAndReprocess(
3055  $this->originalInsertionMode, $token, $value, $attribs, $selfclose
3056  );
3057  }
3058 
3059  // helper for inCaptionMode
3060  private function endCaption() {
3061  if ( !$this->stack->inTableScope( 'caption' ) ) {
3062  return false;
3063  }
3064  $this->stack->generateImpliedEndTags();
3065  $this->stack->popTag( 'caption' );
3066  $this->afe->clearToMarker();
3067  $this->switchMode( 'inTableMode' );
3068  return true;
3069  }
3070 
3071  private function inCaptionMode( $token, $value, $attribs = null, $selfclose = false ) {
3072  if ( $token === 'tag' ) {
3073  switch ( $value ) {
3074  case 'caption':
3075  case 'col':
3076  case 'colgroup':
3077  case 'tbody':
3078  case 'td':
3079  case 'tfoot':
3080  case 'th':
3081  case 'thead':
3082  case 'tr':
3083  if ( $this->endCaption() ) {
3084  $this->insertToken( $token, $value, $attribs, $selfclose );
3085  }
3086  return true;
3087  }
3088  // Fall through to "anything else" case.
3089  } elseif ( $token === 'endtag' ) {
3090  switch ( $value ) {
3091  case 'caption':
3092  $this->endCaption();
3093  return true;
3094  case 'table':
3095  if ( $this->endCaption() ) {
3096  $this->insertToken( $token, $value, $attribs, $selfclose );
3097  }
3098  return true;
3099  case 'body':
3100  case 'col':
3101  case 'colgroup':
3102  # OMITTED: <html>
3103  case 'tbody':
3104  case 'td':
3105  case 'tfoot':
3106  case 'th':
3107  case 'thead':
3108  case 'tr':
3109  // Ignore the token
3110  return true;
3111  }
3112  // Fall through to "anything else" case.
3113  }
3114  // The Anything Else case
3115  return $this->inBodyMode( $token, $value, $attribs, $selfclose );
3116  }
3117 
3118  private function inColumnGroupMode( $token, $value, $attribs = null, $selfclose = false ) {
3119  if ( $token === 'text' ) {
3120  if ( preg_match( '/^[\x09\x0A\x0C\x0D\x20]+/', $value, $matches ) ) {
3121  $this->stack->insertText( $matches[0] );
3122  $value = substr( $value, strlen( $matches[0] ) );
3123  }
3124  if ( strlen( $value ) === 0 ) {
3125  return true; // All text handled.
3126  }
3127  // Fall through to handle non-whitespace below.
3128  } elseif ( $token === 'tag' ) {
3129  switch ( $value ) {
3130  # OMITTED: <html>
3131  case 'col':
3132  $this->stack->insertHTMLElement( $value, $attribs );
3133  $this->stack->pop();
3134  return true;
3135  case 'template':
3136  return $this->inHeadMode( $token, $value, $attribs, $selfclose );
3137  }
3138  // Fall through for "anything else".
3139  } elseif ( $token === 'endtag' ) {
3140  switch ( $value ) {
3141  case 'colgroup':
3142  if ( !$this->stack->currentNode->isHtmlNamed( 'colgroup' ) ) {
3143  return true; // Ignore the token.
3144  }
3145  $this->stack->pop();
3146  $this->switchMode( 'inTableMode' );
3147  return true;
3148  case 'col':
3149  return true; // Ignore the token.
3150  case 'template':
3151  return $this->inHeadMode( $token, $value, $attribs, $selfclose );
3152  }
3153  // Fall through for "anything else".
3154  } elseif ( $token === 'eof' ) {
3155  return $this->inBodyMode( $token, $value, $attribs, $selfclose );
3156  } elseif ( $token === 'comment' ) {
3157  $this->stack->insertComment( $value );
3158  return true;
3159  }
3160 
3161  // Anything else
3162  if ( !$this->stack->currentNode->isHtmlNamed( 'colgroup' ) ) {
3163  return true; // Ignore the token.
3164  }
3165  $this->inColumnGroupMode( 'endtag', 'colgroup' );
3166  return $this->insertToken( $token, $value, $attribs, $selfclose );
3167  }
3168 
3169  // Helper function for inTableBodyMode
3170  private function endSection() {
3171  if ( !(
3172  $this->stack->inTableScope( 'tbody' ) ||
3173  $this->stack->inTableScope( 'thead' ) ||
3174  $this->stack->inTableScope( 'tfoot' )
3175  ) ) {
3176  return false;
3177  }
3178  $this->stack->clearToContext( BalanceSets::$tableBodyContextSet );
3179  $this->stack->pop();
3180  $this->switchMode( 'inTableMode' );
3181  return true;
3182  }
3183  private function inTableBodyMode( $token, $value, $attribs = null, $selfclose = false ) {
3184  if ( $token === 'tag' ) {
3185  switch ( $value ) {
3186  case 'tr':
3187  $this->stack->clearToContext( BalanceSets::$tableBodyContextSet );
3188  $this->stack->insertHTMLElement( $value, $attribs );
3189  $this->switchMode( 'inRowMode' );
3190  return true;
3191  case 'th':
3192  case 'td':
3193  $this->inTableBodyMode( 'tag', 'tr', [] );
3194  $this->insertToken( $token, $value, $attribs, $selfclose );
3195  return true;
3196  case 'caption':
3197  case 'col':
3198  case 'colgroup':
3199  case 'tbody':
3200  case 'tfoot':
3201  case 'thead':
3202  if ( $this->endSection() ) {
3203  $this->insertToken( $token, $value, $attribs, $selfclose );
3204  }
3205  return true;
3206  }
3207  } elseif ( $token === 'endtag' ) {
3208  switch ( $value ) {
3209  case 'table':
3210  if ( $this->endSection() ) {
3211  $this->insertToken( $token, $value, $attribs, $selfclose );
3212  }
3213  return true;
3214  case 'tbody':
3215  case 'tfoot':
3216  case 'thead':
3217  if ( $this->stack->inTableScope( $value ) ) {
3218  $this->endSection();
3219  }
3220  return true;
3221  # OMITTED: <body>
3222  case 'caption':
3223  case 'col':
3224  case 'colgroup':
3225  # OMITTED: <html>
3226  case 'td':
3227  case 'th':
3228  case 'tr':
3229  return true; // Ignore the token.
3230  }
3231  }
3232  // Anything else:
3233  return $this->inTableMode( $token, $value, $attribs, $selfclose );
3234  }
3235 
3236  // Helper function for inRowMode
3237  private function endRow() {
3238  if ( !$this->stack->inTableScope( 'tr' ) ) {
3239  return false;
3240  }
3241  $this->stack->clearToContext( BalanceSets::$tableRowContextSet );
3242  $this->stack->pop();
3243  $this->switchMode( 'inTableBodyMode' );
3244  return true;
3245  }
3246  private function inRowMode( $token, $value, $attribs = null, $selfclose = false ) {
3247  if ( $token === 'tag' ) {
3248  switch ( $value ) {
3249  case 'th':
3250  case 'td':
3251  $this->stack->clearToContext( BalanceSets::$tableRowContextSet );
3252  $this->stack->insertHTMLElement( $value, $attribs );
3253  $this->switchMode( 'inCellMode' );
3254  $this->afe->insertMarker();
3255  return true;
3256  case 'caption':
3257  case 'col':
3258  case 'colgroup':
3259  case 'tbody':
3260  case 'tfoot':
3261  case 'thead':
3262  case 'tr':
3263  if ( $this->endRow() ) {
3264  $this->insertToken( $token, $value, $attribs, $selfclose );
3265  }
3266  return true;
3267  }
3268  } elseif ( $token === 'endtag' ) {
3269  switch ( $value ) {
3270  case 'tr':
3271  $this->endRow();
3272  return true;
3273  case 'table':
3274  if ( $this->endRow() ) {
3275  $this->insertToken( $token, $value, $attribs, $selfclose );
3276  }
3277  return true;
3278  case 'tbody':
3279  case 'tfoot':
3280  case 'thead':
3281  if (
3282  $this->stack->inTableScope( $value ) &&
3283  $this->endRow()
3284  ) {
3285  $this->insertToken( $token, $value, $attribs, $selfclose );
3286  }
3287  return true;
3288  # OMITTED: <body>
3289  case 'caption':
3290  case 'col':
3291  case 'colgroup':
3292  # OMITTED: <html>
3293  case 'td':
3294  case 'th':
3295  return true; // Ignore the token.
3296  }
3297  }
3298  // Anything else:
3299  return $this->inTableMode( $token, $value, $attribs, $selfclose );
3300  }
3301 
3302  // Helper for inCellMode
3303  private function endCell() {
3304  if ( $this->stack->inTableScope( 'td' ) ) {
3305  $this->inCellMode( 'endtag', 'td' );
3306  return true;
3307  } elseif ( $this->stack->inTableScope( 'th' ) ) {
3308  $this->inCellMode( 'endtag', 'th' );
3309  return true;
3310  } else {
3311  return false;
3312  }
3313  }
3314  private function inCellMode( $token, $value, $attribs = null, $selfclose = false ) {
3315  if ( $token === 'tag' ) {
3316  switch ( $value ) {
3317  case 'caption':
3318  case 'col':
3319  case 'colgroup':
3320  case 'tbody':
3321  case 'td':
3322  case 'tfoot':
3323  case 'th':
3324  case 'thead':
3325  case 'tr':
3326  if ( $this->endCell() ) {
3327  $this->insertToken( $token, $value, $attribs, $selfclose );
3328  }
3329  return true;
3330  }
3331  } elseif ( $token === 'endtag' ) {
3332  switch ( $value ) {
3333  case 'td':
3334  case 'th':
3335  if ( $this->stack->inTableScope( $value ) ) {
3336  $this->stack->generateImpliedEndTags();
3337  $this->stack->popTag( $value );
3338  $this->afe->clearToMarker();
3339  $this->switchMode( 'inRowMode' );
3340  }
3341  return true;
3342  # OMITTED: <body>
3343  case 'caption':
3344  case 'col':
3345  case 'colgroup':
3346  # OMITTED: <html>
3347  return true;
3348 
3349  case 'table':
3350  case 'tbody':
3351  case 'tfoot':
3352  case 'thead':
3353  case 'tr':
3354  if ( $this->stack->inTableScope( $value ) ) {
3355  $this->stack->generateImpliedEndTags();
3356  $this->stack->popTag( BalanceSets::$tableCellSet );
3357  $this->afe->clearToMarker();
3358  $this->switchMode( 'inRowMode' );
3359  $this->insertToken( $token, $value, $attribs, $selfclose );
3360  }
3361  return true;
3362  }
3363  }
3364  // Anything else:
3365  return $this->inBodyMode( $token, $value, $attribs, $selfclose );
3366  }
3367 
3368  private function inSelectMode( $token, $value, $attribs = null, $selfclose = false ) {
3369  if ( $token === 'text' ) {
3370  $this->stack->insertText( $value );
3371  return true;
3372  } elseif ( $token === 'eof' ) {
3373  return $this->inBodyMode( $token, $value, $attribs, $selfclose );
3374  } elseif ( $token === 'tag' ) {
3375  switch ( $value ) {
3376  # OMITTED: <html>
3377  case 'option':
3378  if ( $this->stack->currentNode->isHtmlNamed( 'option' ) ) {
3379  $this->stack->pop();
3380  }
3381  $this->stack->insertHTMLElement( $value, $attribs );
3382  return true;
3383  case 'optgroup':
3384  if ( $this->stack->currentNode->isHtmlNamed( 'option' ) ) {
3385  $this->stack->pop();
3386  }
3387  if ( $this->stack->currentNode->isHtmlNamed( 'optgroup' ) ) {
3388  $this->stack->pop();
3389  }
3390  $this->stack->insertHTMLElement( $value, $attribs );
3391  return true;
3392  case 'select':
3393  $this->inSelectMode( 'endtag', $value ); // treat it like endtag
3394  return true;
3395  case 'input':
3396  case 'keygen':
3397  case 'textarea':
3398  if ( !$this->stack->inSelectScope( 'select' ) ) {
3399  return true; // ignore token (fragment case)
3400  }
3401  $this->inSelectMode( 'endtag', 'select' );
3402  return $this->insertToken( $token, $value, $attribs, $selfclose );
3403  case 'script':
3404  case 'template':
3405  return $this->inHeadMode( $token, $value, $attribs, $selfclose );
3406  }
3407  } elseif ( $token === 'endtag' ) {
3408  switch ( $value ) {
3409  case 'optgroup':
3410  if (
3411  $this->stack->currentNode->isHtmlNamed( 'option' ) &&
3412  $this->stack->length() >= 2 &&
3413  $this->stack->node( $this->stack->length() - 2 )->isHtmlNamed( 'optgroup' )
3414  ) {
3415  $this->stack->pop();
3416  }
3417  if ( $this->stack->currentNode->isHtmlNamed( 'optgroup' ) ) {
3418  $this->stack->pop();
3419  }
3420  return true;
3421  case 'option':
3422  if ( $this->stack->currentNode->isHtmlNamed( 'option' ) ) {
3423  $this->stack->pop();
3424  }
3425  return true;
3426  case 'select':
3427  if ( !$this->stack->inSelectScope( $value ) ) {
3428  return true; // fragment case
3429  }
3430  $this->stack->popTag( $value );
3431  $this->resetInsertionMode();
3432  return true;
3433  case 'template':
3434  return $this->inHeadMode( $token, $value, $attribs, $selfclose );
3435  }
3436  } elseif ( $token === 'comment' ) {
3437  $this->stack->insertComment( $value );
3438  return true;
3439  }
3440  // anything else: just ignore the token
3441  return true;
3442  }
3443 
3444  private function inSelectInTableMode( $token, $value, $attribs = null, $selfclose = false ) {
3445  switch ( $value ) {
3446  case 'caption':
3447  case 'table':
3448  case 'tbody':
3449  case 'tfoot':
3450  case 'thead':
3451  case 'tr':
3452  case 'td':
3453  case 'th':
3454  if ( $token === 'tag' ) {
3455  $this->inSelectInTableMode( 'endtag', 'select' );
3456  return $this->insertToken( $token, $value, $attribs, $selfclose );
3457  } elseif ( $token === 'endtag' ) {
3458  if ( $this->stack->inTableScope( $value ) ) {
3459  $this->inSelectInTableMode( 'endtag', 'select' );
3460  return $this->insertToken( $token, $value, $attribs, $selfclose );
3461  }
3462  return true;
3463  }
3464  }
3465  // anything else
3466  return $this->inSelectMode( $token, $value, $attribs, $selfclose );
3467  }
3468 
3469  private function inTemplateMode( $token, $value, $attribs = null, $selfclose = false ) {
3470  if ( $token === 'text' || $token === 'comment' ) {
3471  return $this->inBodyMode( $token, $value, $attribs, $selfclose );
3472  } elseif ( $token === 'eof' ) {
3473  if ( $this->stack->indexOf( 'template' ) < 0 ) {
3474  $this->stopParsing();
3475  } else {
3476  $this->stack->popTag( 'template' );
3477  $this->afe->clearToMarker();
3478  array_pop( $this->templateInsertionModes );
3479  $this->resetInsertionMode();
3480  $this->insertToken( $token, $value, $attribs, $selfclose );
3481  }
3482  return true;
3483  } elseif ( $token === 'tag' ) {
3484  switch ( $value ) {
3485  case 'base':
3486  case 'basefont':
3487  case 'bgsound':
3488  case 'link':
3489  case 'meta':
3490  case 'noframes':
3491  # OMITTED: <script>
3492  case 'style':
3493  case 'template':
3494  # OMITTED: <title>
3495  return $this->inHeadMode( $token, $value, $attribs, $selfclose );
3496 
3497  case 'caption':
3498  case 'colgroup':
3499  case 'tbody':
3500  case 'tfoot':
3501  case 'thead':
3502  return $this->switchModeAndReprocess(
3503  'inTableMode', $token, $value, $attribs, $selfclose
3504  );
3505 
3506  case 'col':
3507  return $this->switchModeAndReprocess(
3508  'inColumnGroupMode', $token, $value, $attribs, $selfclose
3509  );
3510 
3511  case 'tr':
3512  return $this->switchModeAndReprocess(
3513  'inTableBodyMode', $token, $value, $attribs, $selfclose
3514  );
3515 
3516  case 'td':
3517  case 'th':
3518  return $this->switchModeAndReprocess(
3519  'inRowMode', $token, $value, $attribs, $selfclose
3520  );
3521  }
3522  return $this->switchModeAndReprocess(
3523  'inBodyMode', $token, $value, $attribs, $selfclose
3524  );
3525  } elseif ( $token === 'endtag' ) {
3526  switch ( $value ) {
3527  case 'template':
3528  return $this->inHeadMode( $token, $value, $attribs, $selfclose );
3529  }
3530  return true;
3531  } else {
3532  Assert::invariant( false, "Bad token type: $token" );
3533  }
3534  }
3535 }
inCellMode($token, $value, $attribs=null, $selfclose=false)
Definition: Balancer.php:3314
static static static static static $specialSet
Definition: Balancer.php:108
Config $config
Definition: MediaWiki.php:37
removeElement(BalanceElement $elt, $flatten=true)
Remove the given $elt from the BalanceStack, optionally flattening it in the process.
Definition: Balancer.php:1026
$children
An array of children of this element.
Definition: Balancer.php:342
static static $inInvertedSelectScopeSet
Definition: Balancer.php:249
inSpecificScope($tag, $set)
Determine if the stack has $tag in a specific scope, $set.
Definition: Balancer.php:858
null means default in associative array form
Definition: hooks.txt:1816
node($idx)
Return the BalanceElement at the given position $idx, where position 0 represents the root element...
Definition: Balancer.php:914
static static static $mathmlTextIntegrationPointSet
Definition: Balancer.php:255
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global list
Definition: deferred.txt:11
static decodeTagAttributes($text)
Return an associative array of attribute names and values from a partial tag string.
Definition: Sanitizer.php:1286
insertToken($token, $value, $attribs=null, $selfclose=false)
Pass a token to the tree builder.
Definition: Balancer.php:1954
inRowMode($token, $value, $attribs=null, $selfclose=false)
Definition: Balancer.php:3246
this hook is for auditing only or null if authentication failed before getting that far or null if we can t even determine that probably a stub it is not rendered in wiki pages or galleries in category pages allow injecting custom HTML after the section Any uses of the hook need to handle escaping see BaseTemplate::getToolbox and BaseTemplate::makeListItem for details on the format of individual items inside of this array or by returning and letting standard HTTP rendering take place modifiable or by returning false and taking over the output $out
Definition: hooks.txt:776
the array() calling protocol came about after MediaWiki 1.4rc1.
flatten($tidyCompat=false)
Flatten this node and all of its children into a string, as specified by the HTML serialization speci...
Definition: Balancer.php:469
static static static static static static static static $impliedEndTagsSet
Definition: Balancer.php:157
$fosterParentMode
Foster parent mode determines how nodes are inserted into the stack.
Definition: Balancer.php:662
clearToContext($set)
Pop elements off the stack not including the first element in the specified set.
Definition: Balancer.php:1010
reconstruct($stack)
Reconstruct the active formatting elements.
Definition: Balancer.php:1662
$elements
Backing storage for the stack.
Definition: Balancer.php:655
insertAfter(BalanceElement $a, BalanceElement $b)
Find $a in the BalanceStack and insert $b after it.
Definition: Balancer.php:1061
removeChild(BalanceElement $elt)
Remove the given child from this element.
Definition: Balancer.php:387
__toString()
Get a string representation of the AFE list, for debugging.
Definition: Balancer.php:1708
getOutput()
Return a string representing the output of the tree builder: all the children of the root <html> node...
Definition: Balancer.php:690
static static static static $htmlIntegrationPointSet
Definition: Balancer.php:262
Apache License January AND DISTRIBUTION Definitions License shall mean the terms and conditions for use
pop()
Remove the current node from the BalanceStack, flattening it in the process.
Definition: Balancer.php:965
parseRawText($value, $attribs=null)
Definition: Balancer.php:2318
insertText($value, $isComment=false)
Insert text at the appropriate place for inserting a node.
Definition: Balancer.php:715
div flags Integer display flags(NO_ACTION_LINK, NO_EXTRA_USER_LINKS) 'LogException'returning false will NOT prevent logging $e
Definition: hooks.txt:1980
indexOf($tag)
Return the position of the given BalanceElement, set, or HTML tag name string in the BalanceStack...
Definition: Balancer.php:944
$parent
Parent of this element, or the string "flat" if this element has already been flattened into its pare...
Definition: Balancer.php:333
Some information about database access in MediaWiki By Tim January Database layout For information about the MediaWiki database such as a description of the tables and their contents
Definition: database.txt:2
Convenience class for iterating over an array in reverse order.
$tidyCompat
Tidy compatibility mode, determines behavior of body/blockquote.
Definition: Balancer.php:666
insertBefore(BalanceElement $a, $b)
Find $a in the list of children and insert $b before it.
Definition: Balancer.php:405
static static static static static static static static static static static $tableContextSet
Definition: Balancer.php:180
$value
The list of active formatting elements, which is used to handle mis-nested formatting element tags in...
Definition: Balancer.php:1387
static static static $extraLinefeedSet
Definition: Balancer.php:95
The "stack of open elements" as defined in the HTML5 tree builder spec.
Definition: Balancer.php:650
$tail
The last (most recent) element in the list.
Definition: Balancer.php:1389
length()
Return the number of elements currently in the BalanceStack.
Definition: Balancer.php:957
getNoahKey()
Get a string key for the Noah's Ark algorithm.
Definition: Balancer.php:625
$noahTableStack
An array of arrays representing the population of elements in each bucket according to the Noah's Ark...
Definition: Balancer.php:1412
$namespaceURI
The namespace of the element.
Definition: Balancer.php:316
isHtmlIntegrationPoint()
Determine if $this represents an HTML integration point, as defined in the HTML5 specification.
Definition: Balancer.php:606
balance($text, $processingCallback=null, $processingArgs=[])
Return a balanced HTML string for the HTML fragment given by $text, subject to the caveats listed in ...
Definition: Balancer.php:1904
isInList(BalanceElement $elt)
Determine whether an element is in the list of formatting elements.
Definition: Balancer.php:1531
static validateTagAttributes($attribs, $element)
Take an array of attribute names and values and normalize or discard illegal values for the given ele...
Definition: Sanitizer.php:748
inCaptionMode($token, $value, $attribs=null, $selfclose=false)
Definition: Balancer.php:3071
__construct()
Create a new BalanceStack with a single BalanceElement on it, representing the root <html> node...
Definition: Balancer.php:676
isHtml()
Determine if $this represents an element in the HTML namespace.
Definition: Balancer.php:584
insertComment($value)
Insert a comment at the appropriate place for inserting a node.
Definition: Balancer.php:705
We use the convention $dbr for read and $dbw for write to help you keep track of whether the database object is a the world will explode Or to be a subsequent write query which succeeded on the master may fail when replicated to the slave due to a unique key collision Replication on the slave will stop and it may take hours to repair the database and get it back online Setting read_only in my cnf on the slave will avoid this but given the dire we prefer to have as many checks as possible We provide a but the wrapper functions like select() and insert() are usually more convenient.They take care of things like table prefixes and escaping for you.If you really need to make your own SQL
The index of the header message $result[1]=The index of the body text message $result[2 through n]=Parameters passed to body text message.Please note the header message cannot receive/use parameters. 'ImportHandleLogItemXMLTag':When parsing a XML tag in a log item.Return false to stop further processing of the tag $reader:XMLReader object $logInfo:Array of information 'ImportHandlePageXMLTag':When parsing a XML tag in a page.Return false to stop further processing of the tag $reader:XMLReader object &$pageInfo:Array of information 'ImportHandleRevisionXMLTag':When parsing a XML tag in a page revision.Return false to stop further processing of the tag $reader:XMLReader object $pageInfo:Array of page information $revisionInfo:Array of revision information 'ImportHandleToplevelXMLTag':When parsing a top level XML tag.Return false to stop further processing of the tag $reader:XMLReader object 'ImportHandleUploadXMLTag':When parsing a XML tag in a file upload.Return false to stop further processing of the tag $reader:XMLReader object $revisionInfo:Array of information 'ImportLogInterwikiLink':Hook to change the interwiki link used in log entries and edit summaries for transwiki imports.&$fullInterwikiPrefix:Interwiki prefix, may contain colons.&$pageTitle:String that contains page title. 'ImportSources':Called when reading from the $wgImportSources configuration variable.Can be used to lazy-load the import sources list.&$importSources:The value of $wgImportSources.Modify as necessary.See the comment in DefaultSettings.php for the detail of how to structure this array. 'InfoAction':When building information to display on the action=info page.$context:IContextSource object &$pageInfo:Array of information 'InitializeArticleMaybeRedirect':MediaWiki check to see if title is a redirect.&$title:Title object for the current page &$request:WebRequest &$ignoreRedirect:boolean to skip redirect check &$target:Title/string of redirect target &$article:Article object 'InternalParseBeforeLinks':during Parser's internalParse method before links but after nowiki/noinclude/includeonly/onlyinclude and other processings.&$parser:Parser object &$text:string containing partially parsed text &$stripState:Parser's internal StripState object 'InternalParseBeforeSanitize':during Parser's internalParse method just before the parser removes unwanted/dangerous HTML tags and after nowiki/noinclude/includeonly/onlyinclude and other processings.Ideal for syntax-extensions after template/parser function execution which respect nowiki and HTML-comments.&$parser:Parser object &$text:string containing partially parsed text &$stripState:Parser's internal StripState object 'InterwikiLoadPrefix':When resolving if a given prefix is an interwiki or not.Return true without providing an interwiki to continue interwiki search.$prefix:interwiki prefix we are looking for.&$iwData:output array describing the interwiki with keys iw_url, iw_local, iw_trans and optionally iw_api and iw_wikiid. 'InvalidateEmailComplete':Called after a user's email has been invalidated successfully.$user:user(object) whose email is being invalidated 'IRCLineURL':When constructing the URL to use in an IRC notification.Callee may modify $url and $query, URL will be constructed as $url.$query &$url:URL to index.php &$query:Query string $rc:RecentChange object that triggered url generation 'IsFileCacheable':Override the result of Article::isFileCacheable()(if true) &$article:article(object) being checked 'IsTrustedProxy':Override the result of IP::isTrustedProxy() &$ip:IP being check &$result:Change this value to override the result of IP::isTrustedProxy() 'IsUploadAllowedFromUrl':Override the result of UploadFromUrl::isAllowedUrl() $url:URL used to upload from &$allowed:Boolean indicating if uploading is allowed for given URL 'isValidEmailAddr':Override the result of Sanitizer::validateEmail(), for instance to return false if the domain name doesn't match your organization.$addr:The e-mail address entered by the user &$result:Set this and return false to override the internal checks 'isValidPassword':Override the result of User::isValidPassword() $password:The password entered by the user &$result:Set this and return false to override the internal checks $user:User the password is being validated for 'Language::getMessagesFileName':$code:The language code or the language we're looking for a messages file for &$file:The messages file path, you can override this to change the location. 'LanguageGetMagic':DEPRECATED!Use $magicWords in a file listed in $wgExtensionMessagesFiles instead.Use this to define synonyms of magic words depending of the language &$magicExtensions:associative array of magic words synonyms $lang:language code(string) 'LanguageGetNamespaces':Provide custom ordering for namespaces or remove namespaces.Do not use this hook to add namespaces.Use CanonicalNamespaces for that.&$namespaces:Array of namespaces indexed by their numbers 'LanguageGetSpecialPageAliases':DEPRECATED!Use $specialPageAliases in a file listed in $wgExtensionMessagesFiles instead.Use to define aliases of special pages names depending of the language &$specialPageAliases:associative array of magic words synonyms $lang:language code(string) 'LanguageGetTranslatedLanguageNames':Provide translated language names.&$names:array of language code=> language name $code:language of the preferred translations 'LanguageLinks':Manipulate a page's language links.This is called in various places to allow extensions to define the effective language links for a page.$title:The page's Title.&$links:Associative array mapping language codes to prefixed links of the form"language:title".&$linkFlags:Associative array mapping prefixed links to arrays of flags.Currently unused, but planned to provide support for marking individual language links in the UI, e.g.for featured articles. 'LanguageSelector':Hook to change the language selector available on a page.$out:The output page.$cssClassName:CSS class name of the language selector. 'LinkBegin':DEPRECATED!Use HtmlPageLinkRendererBegin instead.Used when generating internal and interwiki links in Linker::link(), before processing starts.Return false to skip default processing and return $ret.See documentation for Linker::link() for details on the expected meanings of parameters.$skin:the Skin object $target:the Title that the link is pointing to &$html:the contents that the< a > tag should have(raw HTML) $result
Definition: hooks.txt:1814
appendChild($elt)
Append $elt to the end of the list of children.
Definition: Balancer.php:427
insertForeignToken($token, $value, $attribs=null, $selfclose=false)
Definition: Balancer.php:2022
$last
inBodyMode($token, $value, $attribs=null, $selfclose=false)
Definition: Balancer.php:2412
A BalanceElement is a simplified version of a DOM Node.
Definition: Balancer.php:311
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped noclasses just before the function returns a value If you return true
Definition: hooks.txt:1816
adjustedCurrentNode($fragmentContext)
Return the adjusted current node.
Definition: Balancer.php:894
const ELEMENT_BITS_REGEX
Acceptable tag name charset from HTML5 parsing spec http://www.w3.org/TR/html5/syntax.html#tag-open-state.
Definition: Sanitizer.php:46
push(BalanceElement $elt)
Follow the steps required when the spec requires us to "push onto the list of active formatting eleme...
Definition: Balancer.php:1439
__toString()
Serialize this node and all of its children to a string, as specified by the HTML serialization speci...
Definition: Balancer.php:514
__construct($namespaceURI, $localName, array $attribs)
Make a new BalanceElement corresponding to the HTML DOM Element with the given localname, namespace, and attributes.
Definition: Balancer.php:374
static static static static static static static static static static static static static static static $inScopeSet
Definition: Balancer.php:208
advance()
Grab the next "token" from $bitsIterator.
Definition: Balancer.php:2128
replaceAt($idx, BalanceElement $elt)
Replace the element at position $idx in the BalanceStack with $elt.
Definition: Balancer.php:923
generateImpliedEndTags($butnot=null, $thorough=false)
Generate implied end tags.
Definition: Balancer.php:876
replace(BalanceElement $a, BalanceElement $b)
Find element $a in the list and replace it with element $b.
Definition: Balancer.php:1607
inTableMode($token, $value, $attribs=null, $selfclose=false)
Definition: Balancer.php:2928
static validateTag($params, $element)
Takes attribute names and values for a tag and the tag name and validates that the tag is allowed to ...
Definition: Sanitizer.php:712
fosterParent($elt)
Foster parent the given $elt in the stack of open elements.
Definition: Balancer.php:1079
static static static static static $tidyPWrapSet
Definition: Balancer.php:271
An iterator which works exactly like:
inTableTextMode($token, $value, $attribs=null, $selfclose=false)
Definition: Balancer.php:3037
static static static static static static static static static static static static static static $formAssociatedSet
Definition: Balancer.php:200
static static $emptyElementSet
Definition: Balancer.php:85
$currentNode
Reference to the current element.
Definition: Balancer.php:670
inSelectScope($tag)
Determine if the stack has $tag in select scope.
Definition: Balancer.php:837
design txt This is a brief overview of the new design More thorough and up to date information is available on the documentation wiki at etc Handles the details of getting and saving to the user table of the and dealing with sessions and cookies OutputPage Encapsulates the entire HTML page that will be sent in response to any server request It is used by calling its functions to add text
Definition: design.txt:12
inListItemScope($tag)
Determine if the stack has $tag in list item scope.
Definition: Balancer.php:817
this hook is for auditing only RecentChangesLinked and Watchlist RecentChangesLinked and Watchlist e g Watchlist removed from all revisions and log entries to which it was applied This gives extensions a chance to take it off their books $tag
Definition: hooks.txt:981
isHtmlNamed($tagName)
Determine if this element is an HTML element with the specified name.
Definition: Balancer.php:574
Utility constants and sets for the HTML5 tree building algorithm.
Definition: Balancer.php:69
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
Definition: distributors.txt:9
$localName
The lower-cased name of the element.
Definition: Balancer.php:321
static static static static static static $tidyInlineSet
Definition: Balancer.php:281
inScope($tag)
Determine if the stack has $tag in scope.
Definition: Balancer.php:797
inTableBodyMode($token, $value, $attribs=null, $selfclose=false)
Definition: Balancer.php:3183
const VALID_COMMENT_REGEX
Valid HTML5 comments.
Definition: Balancer.php:1808
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition: injection.txt:35
$nextNoah
The next element in the Noah's Ark species bucket.
Definition: Balancer.php:364
isA($set)
Determine if $this represents a specific HTML tag, is a member of a tag set, or is equal to another B...
Definition: Balancer.php:557
This document describes the state of Postgres support in and is fairly well maintained The main code is very well while extensions are very hit and miss it is probably the most supported database after MySQL Much of the work in making MediaWiki database agnostic came about through the work of creating Postgres as and are nearing end of but without copying over all the usage comments General notes on the but these can almost always be programmed around *Although Postgres has a true BOOLEAN type
Definition: postgres.txt:22
static static static static static static $addressDivPSet
Definition: Balancer.php:144
insertHTMLElement($tag, $attribs)
Insert an HTML element at the appropriate place, pushing it on to the open elements stack...
Definition: Balancer.php:755
An implementation of the tree building portion of the HTML5 parsing spec.
Definition: Balancer.php:1785
popTo($idx)
Remove all nodes up to and including position $idx from the BalanceStack, flattening them in the proc...
Definition: Balancer.php:982
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global then executing the whole list after the page is displayed We don t do anything smart like collating updates to the same table or such because the list is almost always going to have just one item on if so it s not worth the trouble Since there is a job queue in the jobs table
Definition: deferred.txt:11
static static static static static static static static static $thoroughImpliedEndTagsSet
Definition: Balancer.php:165
insertAfter(BalanceElement $a, BalanceElement $b)
Find $a in the list and insert $b after it.
Definition: Balancer.php:1638
findElementByTag($tag)
Find and return the last element with the specified tag between the end of the list and the last mark...
Definition: Balancer.php:1516
$nextAFE
The next active formatting element in the list, or null if this is the end of the AFE list or if the ...
Definition: Balancer.php:353
getIterator()
Return an iterator over this stack which visits the current node first, and the root node last...
Definition: Balancer.php:904
static static static static static static static static static static static static static $tableRowContextSet
Definition: Balancer.php:193
popTag($tag)
Pop elements off the stack up to and including the first element with the specified HTML tagname (or ...
Definition: Balancer.php:995
Bar style
inHeadMode($token, $value, $attribs=null, $selfclose=false)
Definition: Balancer.php:2342
static static static static static static static $tableSectionRowSet
Definition: Balancer.php:150
usually copyright or history_copyright This message must be in HTML not wikitext if the section is included from a template to be included in the link
Definition: hooks.txt:2755
$noahKey
A unique string identifier for Noah's Ark purposes, lazy initialized.
Definition: Balancer.php:347
insertForeignElement($namespaceURI, $tag, $attribs)
Insert a BalanceElement at the appropriate place, pushing it on to the open elements stack...
Definition: Balancer.php:741
__construct(array $config=[])
Create a new Balancer.
Definition: Balancer.php:1859
inTextMode($token, $value, $attribs=null, $selfclose=false)
Definition: Balancer.php:2325
isMathmlTextIntegrationPoint()
Determine if $this represents a MathML text integration point, as defined in the HTML5 specification...
Definition: Balancer.php:595
clearToMarker()
Follow the steps required when the spec asks us to "clear the list of active formatting elements up t...
Definition: Balancer.php:1479
$count
adoptChildren(BalanceElement $elt)
Transfer all of the children of $elt to $this.
Definition: Balancer.php:447
A pseudo-element used as a marker in the list of active formatting elements.
Definition: Balancer.php:1373
__toString()
Return the contents of the open elements stack as a string for debugging.
Definition: Balancer.php:1358
inTemplateMode($token, $value, $attribs=null, $selfclose=false)
Definition: Balancer.php:3469
inTableScope($tag)
Determine if the stack has $tag in table scope.
Definition: Balancer.php:827
static static static static $headingSet
Definition: Balancer.php:101
serialize()
Definition: ApiMessage.php:94
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped noclasses just before the function returns a value If you return an< a > element with HTML attributes $attribs and contents $html will be returned If you return $ret will be returned and may include noclasses after processing & $attribs
Definition: hooks.txt:1816
$attribs
Attributes for the element, in array form.
Definition: Balancer.php:326
static static static static static static static static static static static static static static static static $inListItemScopeSet
Definition: Balancer.php:224
switchModeAndReprocess($mode, $token, $value, $attribs, $selfclose)
Definition: Balancer.php:2230
insertElement(BalanceElement $elt)
Insert an element at the appropriate place and push it on to the open elements stack.
Definition: Balancer.php:768
inColumnGroupMode($token, $value, $attribs=null, $selfclose=false)
Definition: Balancer.php:3118
inButtonScope($tag)
Determine if the stack has $tag in button scope.
Definition: Balancer.php:807
adoptionAgency($tag, $afe)
Run the "adoption agency algoritm" (AAA) for the given subject tag name.
Definition: Balancer.php:1141
$prevAFE
The previous active formatting element in the list, or null if this is the start of the list or if th...
Definition: Balancer.php:359
$head
The first (least recent) element in the list.
Definition: Balancer.php:1392
static static static static static static static static static static static static $tableBodyContextSet
Definition: Balancer.php:186
static static static static static static static static static static $tableCellSet
Definition: Balancer.php:175
static encodeAttribute($text)
Encode an attribute value for HTML output.
Definition: Sanitizer.php:1090
$matches
Allows to change the fields on the form that will be generated $name
Definition: hooks.txt:310