26 namespace MediaWiki\Tidy;
28 use Wikimedia\Assert\Assert;
29 use Wikimedia\Assert\ParameterAssertionException;
31 use \IteratorAggregate;
32 use \ReverseArrayIterator;
35 # A note for future librarization[1] -- this file is a good candidate
36 # for splitting into an independent library, except that it is currently
37 # highly optimized for MediaWiki use. It only implements the portions
38 # of the HTML5 tree builder used by tags supported by MediaWiki, and
39 # does not contain a true tokenizer pass, instead relying on
40 # comment stripping, attribute normalization, and escaping done by
41 # the MediaWiki Sanitizer. It also deliberately avoids building
42 # a true DOM in memory, instead serializing elements to an output string
43 # as soon as possible (usually as soon as the tag is closed) to reduce
44 # its memory footprint.
46 # We've been gradually lifting some of these restrictions to handle
47 # non-sanitized output generated by extensions, but we shortcut the tokenizer
48 # for speed (primarily by splitting on `<`) and so rely on syntactic
51 # On the other hand, I've been pretty careful to note with comments in the
52 # code the places where this implementation omits features of the spec or
53 # depends on the MediaWiki Sanitizer. Perhaps in the future we'll want to
54 # implement the missing pieces and make this a standalone PHP HTML5 parser.
55 # In order to do so, some sort of MediaWiki-specific API will need
56 # to be added to (a) allow the Balancer to bypass the tokenizer,
57 # and (b) support on-the-fly flattening instead of DOM node creation.
59 # [1]: https://www.mediawiki.org/wiki/Library_infrastructure_for_MediaWiki
75 self::HTML_NAMESPACE => [
76 'html' =>
true,
'head' =>
true,
'body' =>
true,
'frameset' =>
true,
78 'plaintext' =>
true,
'isindex' =>
true,
79 'xmp' =>
true,
'iframe' =>
true,
'noembed' =>
true,
80 'noscript' =>
true,
'script' =>
true,
86 self::HTML_NAMESPACE => [
87 'area' =>
true,
'base' =>
true,
'basefont' =>
true,
88 'bgsound' =>
true,
'br' =>
true,
'col' =>
true,
'command' =>
true,
89 'embed' =>
true,
'frame' =>
true,
'hr' =>
true,
'img' =>
true,
90 'input' =>
true,
'keygen' =>
true,
'link' =>
true,
'meta' =>
true,
91 'param' =>
true,
'source' =>
true,
'track' =>
true,
'wbr' =>
true
96 self::HTML_NAMESPACE => [
97 'pre' =>
true,
'textarea' =>
true,
'listing' =>
true,
102 self::HTML_NAMESPACE => [
103 'h1' =>
true,
'h2' =>
true,
'h3' =>
true,
104 'h4' =>
true,
'h5' =>
true,
'h6' =>
true
109 self::HTML_NAMESPACE => [
110 'address' =>
true,
'applet' =>
true,
'area' =>
true,
111 'article' =>
true,
'aside' =>
true,
'base' =>
true,
112 'basefont' =>
true,
'bgsound' =>
true,
'blockquote' =>
true,
113 'body' =>
true,
'br' =>
true,
'button' =>
true,
'caption' =>
true,
114 'center' =>
true,
'col' =>
true,
'colgroup' =>
true,
'dd' =>
true,
115 'details' =>
true,
'dir' =>
true,
'div' =>
true,
'dl' =>
true,
116 'dt' =>
true,
'embed' =>
true,
'fieldset' =>
true,
117 'figcaption' =>
true,
'figure' =>
true,
'footer' =>
true,
118 'form' =>
true,
'frame' =>
true,
'frameset' =>
true,
'h1' =>
true,
119 'h2' =>
true,
'h3' =>
true,
'h4' =>
true,
'h5' =>
true,
120 'h6' =>
true,
'head' =>
true,
'header' =>
true,
'hgroup' =>
true,
121 'hr' =>
true,
'html' =>
true,
'iframe' =>
true,
'img' =>
true,
122 'input' =>
true,
'isindex' =>
true,
'li' =>
true,
'link' =>
true,
123 'listing' =>
true,
'main' =>
true,
'marquee' =>
true,
124 'menu' =>
true,
'menuitem' =>
true,
'meta' =>
true,
'nav' =>
true,
125 'noembed' =>
true,
'noframes' =>
true,
'noscript' =>
true,
126 'object' =>
true,
'ol' =>
true,
'p' =>
true,
'param' =>
true,
127 'plaintext' =>
true,
'pre' =>
true,
'script' =>
true,
128 'section' =>
true,
'select' =>
true,
'source' =>
true,
129 'style' =>
true,
'summary' =>
true,
'table' =>
true,
130 'tbody' =>
true,
'td' =>
true,
'template' =>
true,
131 'textarea' =>
true,
'tfoot' =>
true,
'th' =>
true,
'thead' =>
true,
132 'title' =>
true,
'tr' =>
true,
'track' =>
true,
'ul' =>
true,
133 'wbr' =>
true,
'xmp' =>
true
135 self::SVG_NAMESPACE => [
136 'foreignobject' =>
true,
'desc' =>
true,
'title' =>
true
138 self::MATHML_NAMESPACE => [
139 'mi' =>
true,
'mo' =>
true,
'mn' =>
true,
'ms' =>
true,
140 'mtext' =>
true,
'annotation-xml' =>
true
145 self::HTML_NAMESPACE => [
146 'address' =>
true,
'div' =>
true,
'p' =>
true
151 self::HTML_NAMESPACE => [
152 'table' =>
true,
'thead' =>
true,
'tbody' =>
true,
153 'tfoot' =>
true,
'tr' =>
true
158 self::HTML_NAMESPACE => [
159 'dd' =>
true,
'dt' =>
true,
'li' =>
true,
'optgroup' =>
true,
160 'option' =>
true,
'p' =>
true,
'rb' =>
true,
'rp' =>
true,
161 'rt' =>
true,
'rtc' =>
true
166 self::HTML_NAMESPACE => [
167 'caption' =>
true,
'colgroup' =>
true,
'dd' =>
true,
'dt' =>
true,
168 'li' =>
true,
'optgroup' =>
true,
'option' =>
true,
'p' =>
true,
169 'rb' =>
true,
'rp' =>
true,
'rt' =>
true,
'rtc' =>
true,
170 'tbody' =>
true,
'td' =>
true,
'tfoot' =>
true,
'th' =>
true,
171 'thead' =>
true,
'tr' =>
true
176 self::HTML_NAMESPACE => [
177 'td' =>
true,
'th' =>
true
181 self::HTML_NAMESPACE => [
182 'table' =>
true,
'template' =>
true,
'html' =>
true
187 self::HTML_NAMESPACE => [
188 'tbody' =>
true,
'tfoot' =>
true,
'thead' =>
true,
189 'template' =>
true,
'html' =>
true
194 self::HTML_NAMESPACE => [
195 'tr' =>
true,
'template' =>
true,
'html' =>
true
201 self::HTML_NAMESPACE => [
202 'button' =>
true,
'fieldset' =>
true,
'input' =>
true,
203 'keygen' =>
true,
'object' =>
true,
'output' =>
true,
204 'select' =>
true,
'textarea' =>
true,
'img' =>
true
209 self::HTML_NAMESPACE => [
210 'applet' =>
true,
'caption' =>
true,
'html' =>
true,
211 'marquee' =>
true,
'object' =>
true,
212 'table' =>
true,
'td' =>
true,
'template' =>
true,
215 self::SVG_NAMESPACE => [
216 'foreignobject' =>
true,
'desc' =>
true,
'title' =>
true
218 self::MATHML_NAMESPACE => [
219 'mi' =>
true,
'mo' =>
true,
'mn' =>
true,
'ms' =>
true,
220 'mtext' =>
true,
'annotation-xml' =>
true
226 if ( self::$inListItemScopeSet === null ) {
227 self::$inListItemScopeSet = self::$inScopeSet;
228 self::$inListItemScopeSet[self::HTML_NAMESPACE][
'ol'] =
true;
229 self::$inListItemScopeSet[self::HTML_NAMESPACE][
'ul'] =
true;
231 return self::$inListItemScopeSet;
236 if ( self::$inButtonScopeSet === null ) {
237 self::$inButtonScopeSet = self::$inScopeSet;
238 self::$inButtonScopeSet[self::HTML_NAMESPACE][
'button'] =
true;
240 return self::$inButtonScopeSet;
244 self::HTML_NAMESPACE => [
245 'html' =>
true,
'table' =>
true,
'template' =>
true
250 self::HTML_NAMESPACE => [
251 'option' =>
true,
'optgroup' =>
true
256 self::MATHML_NAMESPACE => [
257 'mi' =>
true,
'mo' =>
true,
'mn' =>
true,
'ms' =>
true,
263 self::SVG_NAMESPACE => [
264 'foreignobject' =>
true,
272 self::HTML_NAMESPACE => [
273 'body' =>
true,
'blockquote' =>
true,
282 self::HTML_NAMESPACE => [
283 'a' =>
true,
'abbr' =>
true,
'acronym' =>
true,
'applet' =>
true,
284 'b' =>
true,
'basefont' =>
true,
'bdo' =>
true,
'big' =>
true,
285 'br' =>
true,
'button' =>
true,
'cite' =>
true,
'code' =>
true,
286 'dfn' =>
true,
'em' =>
true,
'font' =>
true,
'i' =>
true,
287 'iframe' =>
true,
'img' =>
true,
'input' =>
true,
'kbd' =>
true,
288 'label' =>
true,
'legend' =>
true,
'map' =>
true,
'object' =>
true,
289 'param' =>
true,
'q' =>
true,
'rb' =>
true,
'rbc' =>
true,
290 'rp' =>
true,
'rt' =>
true,
'rtc' =>
true,
'ruby' =>
true,
291 's' =>
true,
'samp' =>
true,
'select' =>
true,
'small' =>
true,
292 'span' =>
true,
'strike' =>
true,
'strong' =>
true,
'sub' =>
true,
293 'sup' =>
true,
'textarea' =>
true,
'tt' =>
true,
'u' =>
true,
379 $this->parent = null;
380 $this->children = [];
388 Assert::precondition(
389 $this->parent !==
'flat',
"Can't removeChild after flattening $this"
392 $elt->parent === $this,
'elt',
'must have $this as a parent'
394 $idx = array_search( $elt, $this->children,
true );
395 Assert::parameter( $idx !==
false,
'$elt',
'must be a child of $this' );
397 array_splice( $this->children, $idx, 1 );
406 Assert::precondition(
407 $this->parent !==
'flat',
"Can't insertBefore after flattening."
409 $idx = array_search( $a, $this->children,
true );
410 Assert::parameter( $idx !==
false,
'$a',
'must be a child of $this' );
411 if ( is_string( $b ) ) {
412 array_splice( $this->children, $idx, 0, [ $b ] );
414 Assert::parameter( $b->parent !==
'flat',
'$b',
"Can't be flat" );
415 if ( $b->parent !== null ) {
416 $b->parent->removeChild( $b );
418 array_splice( $this->children, $idx, 0, [ $b ] );
428 Assert::precondition(
429 $this->parent !==
'flat',
"Can't appendChild after flattening."
431 if ( is_string( $elt ) ) {
432 array_push( $this->children, $elt );
436 if ( $elt->parent !== null ) {
437 $elt->parent->removeChild( $elt );
439 array_push( $this->children, $elt );
440 $elt->parent = $this;
448 Assert::precondition(
449 $elt->parent !==
'flat',
"Can't adoptChildren after flattening."
451 foreach ( $elt->children
as $child ) {
452 if ( !is_string( $child ) ) {
455 $child->parent = null;
469 public function flatten( $tidyCompat =
false ) {
470 Assert::parameter( $this->parent !== null,
'$this',
'must be a child' );
471 Assert::parameter( $this->parent !==
'flat',
'$this',
'already flat' );
472 $idx = array_search( $this, $this->parent->children,
true );
474 $idx !==
false,
'$this',
'must be a child of its parent'
478 foreach ( $this->children
as $elt ) {
479 if ( !is_string( $elt ) ) {
480 $elt = $elt->flatten( $tidyCompat );
482 if ( $blank && preg_match(
'/[^\t\n\f\r ]/', $elt ) ) {
487 $this->localName =
'p';
488 } elseif ( $blank ) {
491 if ( !count( $this->attribs ) &&
492 ( $this->localName ===
'tr' || $this->localName ===
'li' )
494 $this->attribs = [
'class' =>
"mw-empty-elt" ];
498 $flat = $blank ?
'' :
"{$this}";
502 $this->parent->children[$idx] = $flat;
503 $this->parent =
'flat'; #
for assertion checking
518 $encAttribs .=
" $name=\"$encValue\"";
521 $out =
"<{$this->localName}{$encAttribs}>";
522 $len = strlen(
$out );
524 foreach ( $this->children
as $elt ) {
527 $out .=
"</{$this->localName}>";
534 $out = substr(
$out, 0, $len + 1 ) .
535 substr(
$out, $len );
538 $out =
"<{$this->localName}{$encAttribs} />";
540 count( $this->children ) === 0,
541 "Empty elements shouldn't have children."
547 # Utility functions on BalanceElements.
557 public function isA( $set ) {
559 return $this === $set;
560 } elseif ( is_array( $set ) ) {
561 return isset( $set[$this->namespaceURI] ) &&
562 isset( $set[$this->namespaceURI][$this->localName] );
564 # assume this is an HTML element name.
565 return $this->
isHtml() && $this->localName === $set;
576 && $this->localName === $tagName;
612 $this->localName ===
'annotation-xml' &&
613 isset( $this->attribs[
'encoding'] ) &&
614 ( strcasecmp( $this->attribs[
'encoding'],
'text/html' ) == 0 ||
615 strcasecmp( $this->attribs[
'encoding'],
'application/xhtml+xml' ) == 0 )
626 if ( $this->noahKey === null ) {
629 $this->noahKey =
serialize( [ $this->namespaceURI, $this->localName,
$attribs ] );
677 # always a root <html> element on the stack
682 $this->currentNode = $this->elements[0];
693 foreach ( $this->elements[0]->children
as $elt ) {
694 $out .= is_string( $elt ) ? $elt :
695 $elt->flatten( $this->tidyCompat );
717 $this->fosterParentMode &&
722 $this->tidyCompat && !$isComment &&
725 $this->insertHTMLELement(
'mw:p-wrap', [] );
728 $this->currentNode->appendChild(
$value );
770 $this->currentNode->isHtmlNamed(
'mw:p-wrap' ) &&
777 $this->fosterParentMode &&
782 $this->currentNode->appendChild( $elt );
784 Assert::invariant( $elt->parent !== null,
"$elt must be in tree" );
785 Assert::invariant( $elt->parent !==
'flat',
"$elt must not have been previous flattened" );
786 array_push( $this->elements, $elt );
787 $this->currentNode = $elt;
840 foreach ( $this
as $elt ) {
841 if ( $elt->isA(
$tag ) ) {
859 foreach ( $this
as $elt ) {
860 if ( $elt->isA(
$tag ) ) {
863 if ( $elt->isA( $set ) ) {
877 $endTagSet = $thorough ?
880 while ( $this->currentNode ) {
881 if ( $butnot !== null && $this->currentNode->isHtmlNamed( $butnot ) ) {
884 if ( !$this->currentNode->isA( $endTagSet ) ) {
895 return ( $fragmentContext && count( $this->elements ) === 1 ) ?
915 return $this->elements[ $idx ];
924 Assert::precondition(
925 $this->elements[$idx]->parent !==
'flat',
926 'Replaced element should not have already been flattened.'
928 Assert::precondition(
929 $elt->parent !==
'flat',
930 'New element should not have already been flattened.'
932 $this->elements[$idx] = $elt;
933 if ( $idx === count( $this->elements ) - 1 ) {
934 $this->currentNode = $elt;
945 for ( $i = count( $this->elements ) - 1; $i >= 0; $i-- ) {
946 if ( $this->elements[$i]->isA(
$tag ) ) {
958 return count( $this->elements );
966 $elt = array_pop( $this->elements );
967 if ( count( $this->elements ) ) {
968 $this->currentNode = $this->elements[ count( $this->elements ) - 1 ];
970 $this->currentNode = null;
972 if ( !$elt->isHtmlNamed(
'mw:p-wrap' ) ) {
973 $elt->flatten( $this->tidyCompat );
983 $length = count( $this->elements );
984 for ( $length = count( $this->elements ); $length > $idx; $length-- ) {
996 while ( $this->currentNode ) {
997 if ( $this->currentNode->isA(
$tag ) ) {
1012 for ( $length = count( $this->elements ); $length > 1; $length-- ) {
1013 if ( $this->currentNode->isA( $set ) ) {
1028 $elt->parent !==
'flat',
1030 '$elt should not already have been flattened.'
1033 $elt->parent->parent !==
'flat',
1035 'The parent of $elt should not already have been flattened.'
1037 $idx = array_search( $elt, $this->elements,
true );
1038 Assert::parameter( $idx !==
false,
'$elt',
'must be in stack' );
1039 array_splice( $this->elements, $idx, 1 );
1040 if ( $idx === count( $this->elements ) ) {
1041 $this->currentNode = $this->elements[$idx - 1];
1048 $elt->
flatten( $this->tidyCompat );
1050 Assert::postcondition(
1051 array_search( $elt, $this->elements,
true ) ===
false,
1052 '$elt should no longer be in open elements stack'
1063 Assert::parameter( $idx !==
false,
'$a',
'must be in stack' );
1064 if ( $idx === count( $this->elements ) - 1 ) {
1065 array_push( $this->elements, $b );
1066 $this->currentNode = $b;
1068 array_splice( $this->elements, $idx + 1, 0, [ $b ] );
1072 # Fostering and adoption.
1080 $lastTable = $this->
indexOf(
'table' );
1081 $lastTemplate = $this->
indexOf(
'template' );
1085 if ( $lastTemplate >= 0 && ( $lastTable < 0 || $lastTemplate > $lastTable ) ) {
1086 $parent = $this->elements[$lastTemplate];
1087 } elseif ( $lastTable >= 0 ) {
1088 $parent = $this->elements[$lastTable]->parent;
1089 # Assume all tables have parents, since we're not running scripts!
1091 $parent !== null,
"All tables should have parents"
1093 $before = $this->elements[$lastTable];
1095 $parent = $this->elements[0];
1098 if ( $this->tidyCompat ) {
1099 if ( is_string( $elt ) ) {
1108 if ( $elt->isHtmlNamed(
'mw:p-wrap' ) ) {
1110 array_search( $before, $parent->children,
true ) :
1111 count( $parent->children );
1112 $after = $idx > 0 ? $parent->children[$idx - 1] :
'';
1124 $parent->insertBefore( $before, $elt );
1126 $parent->appendChild( $elt );
1147 $this->currentNode->isHtmlNamed(
$tag ) &&
1148 !$afe->isInList( $this->currentNode )
1159 while ( $outer < 8 ) {
1168 $fmtelt = $afe->findElementByTag(
$tag );
1179 $index = $this->
indexOf( $fmtelt );
1181 $afe->remove( $fmtelt );
1189 if ( !$this->
inScope( $fmtelt ) ) {
1197 $furthestblock = null;
1198 $furthestblockindex = -1;
1199 $stacklen = $this->
length();
1200 for ( $i = $index+1; $i < $stacklen; $i++ ) {
1202 $furthestblock = $this->
node( $i );
1203 $furthestblockindex = $i;
1214 if ( !$furthestblock ) {
1215 $this->
popTag( $fmtelt );
1216 $afe->remove( $fmtelt );
1221 $ancestor = $this->
node( $index-1 );
1227 $BOOKMARK =
new BalanceElement(
'[bookmark]',
'[bookmark]', [] );
1228 $afe->insertAfter( $fmtelt, $BOOKMARK );
1231 $node = $furthestblock;
1232 $lastnode = $furthestblock;
1233 $nodeindex = $furthestblockindex;
1250 $node = $this->
node( --$nodeindex );
1254 if ( $node === $fmtelt )
break;
1259 $isAFE = $afe->isInList( $node );
1260 if ( $inner > 3 && $isAFE ) {
1261 $afe->remove( $node );
1284 $node->namespaceURI, $node->localName, $node->attribs );
1285 $afe->replace( $node, $newelt );
1286 $this->
replaceAt( $nodeindex, $newelt );
1292 if ( $lastnode === $furthestblock ) {
1293 $afe->remove( $BOOKMARK );
1294 $afe->insertAfter( $newelt, $BOOKMARK );
1299 $node->appendChild( $lastnode );
1310 $this->fosterParentMode &&
1318 $ancestor->appendChild( $lastnode );
1325 $fmtelt->namespaceURI, $fmtelt->localName, $fmtelt->attribs );
1329 $newelt2->adoptChildren( $furthestblock );
1332 $furthestblock->appendChild( $newelt2 );
1338 $afe->remove( $fmtelt );
1339 $afe->replace( $BOOKMARK, $newelt2 );
1360 foreach ( $this->elements
as $elt ) {
1361 array_push( $r, $elt->localName );
1363 return implode( $r,
' ' );
1415 for ( $node = $this->head; $node; $node = $next ) {
1416 $next = $node->nextAFE;
1417 $node->prevAFE = $node->nextAFE = $node->nextNoah = null;
1419 $this->head = $this->tail = $this->noahTableStack = null;
1424 if ( $this->tail ) {
1425 $this->tail->nextAFE = $elt;
1431 $this->noahTableStack[] = [];
1441 if ( $elt->prevAFE !== null || $this->head === $elt ) {
1442 throw new ParameterAssertionException(
'$elt',
1443 'Cannot insert a node into the AFE list twice' );
1450 $table =& $this->noahTableStack[ count( $this->noahTableStack ) - 1 ];
1451 if ( !isset( $table[$noahKey] ) ) {
1452 $table[$noahKey] = $elt;
1456 while (
$tail->nextNoah ) {
1461 $this->
remove(
$head );
1463 $tail->nextNoah = $elt;
1466 if ( $this->tail ) {
1467 $this->tail->nextAFE = $elt;
1484 $prev =
$tail->prevAFE;
1485 $tail->prevAFE = null;
1487 $prev->nextAFE = null;
1489 $tail->nextNoah = null;
1494 $prev =
$tail->prevAFE;
1496 $prev->nextAFE = null;
1499 array_pop( $this->noahTableStack );
1502 $this->noahTableStack[0] = [];
1508 $this->tail =
$tail;
1519 if ( $elt->localName ===
$tag ) {
1522 $elt = $elt->prevAFE;
1532 return $this->head === $elt || $elt->prevAFE;
1540 if ( $this->head !== $elt && !$elt->prevAFE ) {
1541 throw new ParameterAssertionException(
'$elt',
1542 "Attempted to remove an element which is not in the AFE list" );
1545 if ( $this->head === $elt ) {
1546 $this->head = $elt->nextAFE;
1548 if ( $this->tail === $elt ) {
1549 $this->tail = $elt->prevAFE;
1552 if ( $elt->prevAFE ) {
1553 $elt->prevAFE->nextAFE = $elt->nextAFE;
1556 if ( $elt->nextAFE ) {
1557 $elt->nextAFE->prevAFE = $elt->prevAFE;
1560 $elt->prevAFE = $elt->nextAFE = null;
1567 $table =& $this->noahTableStack[ count( $this->noahTableStack ) - 1 ];
1568 if ( !isset( $table[$noahKey] ) ) {
1569 $table[$noahKey] = $elt;
1571 $tail = $table[$noahKey];
1572 while (
$tail->nextNoah ) {
1575 $tail->nextNoah = $elt;
1580 $table =& $this->noahTableStack[ count( $this->noahTableStack ) - 1 ];
1582 $noahElt = $table[$key];
1583 if ( $noahElt === $elt ) {
1584 if ( $noahElt->nextNoah ) {
1585 $table[$key] = $noahElt->nextNoah;
1586 $noahElt->nextNoah = null;
1588 unset( $table[$key] );
1592 $prevNoahElt = $noahElt;
1593 $noahElt = $prevNoahElt->nextNoah;
1594 if ( $noahElt === $elt ) {
1596 $prevNoahElt->nextNoah = $elt->nextNoah;
1597 $elt->nextNoah = null;
1600 }
while ( $noahElt );
1608 if ( $this->head !== $a && !$a->prevAFE ) {
1609 throw new ParameterAssertionException(
'$a',
1610 "Attempted to replace an element which is not in the AFE list" );
1613 if ( $this->head === $a ) {
1616 if ( $this->tail === $a ) {
1620 if ( $a->prevAFE ) {
1621 $a->prevAFE->nextAFE = $b;
1624 if ( $a->nextAFE ) {
1625 $a->nextAFE->prevAFE = $b;
1627 $b->prevAFE = $a->prevAFE;
1628 $b->nextAFE = $a->nextAFE;
1629 $a->nextAFE = $a->prevAFE = null;
1639 if ( $this->head !== $a && !$a->prevAFE ) {
1640 throw new ParameterAssertionException(
'$a',
1641 "Attempted to insert after an element which is not in the AFE list" );
1643 if ( $this->tail === $a ) {
1646 if ( $a->nextAFE ) {
1647 $a->nextAFE->prevAFE = $b;
1649 $b->nextAFE = $a->nextAFE;
1674 if ( $stack->indexOf( $entry ) >= 0 ) {
1681 while ( $entry->prevAFE ) {
1682 $entry = $entry->prevAFE;
1683 if ( $entry instanceof BalanceMarker || $stack->indexOf( $entry ) >= 0 ) {
1694 $entry = $entry->nextAFE;
1697 $newElement = $stack->insertHTMLElement(
1700 $this->
replace( $entry, $newElement );
1701 $entry = $newElement->nextAFE;
1711 for ( $node = $this->head; $node; $prev = $node, $node = $node->nextAFE ) {
1716 $s .= $node->localName .
'#' . substr( md5( spl_object_hash( $node ) ), 0, 8 );
1717 if ( $node->nextNoah ) {
1718 $s .=
" (noah sibling: {$node->nextNoah->localName}#" .
1719 substr( md5( spl_object_hash( $node->nextNoah ) ), 0, 8 ) .
1722 if ( $node->nextAFE && $node->nextAFE->prevAFE !== $node ) {
1723 $s .=
" (reverse link is wrong!)";
1727 if ( $prev !== $this->tail ) {
1728 $s .=
"(tail pointer is wrong!)\n";
1809 ( # 1. Comment match detector
1810 > | -> | # Invalid short close
1811 ( # 2. Comment contents
1821 ( # 3. Comment close
1822 --> | # Normal close
1823 --!> | # Comment end bang
1824 ( # 4. Indicate matches requiring EOF
1825 --! | # EOF in comment end bang state
1826 -- | # EOF in comment end state
1827 - | # EOF in comment end dash state
1828 # EOF in comment state
1832 ([^<]*) \z # 5. Non-tag text after the comment
1862 'allowedHtmlElements' => null,
1863 'tidyCompat' =>
false,
1864 'allowComments' =>
true,
1866 $this->allowedHtmlElements =
$config[
'allowedHtmlElements'];
1867 $this->strict =
$config[
'strict'];
1868 $this->tidyCompat =
$config[
'tidyCompat'];
1869 $this->allowComments =
$config[
'allowComments'];
1870 if ( $this->allowedHtmlElements !== null ) {
1872 $bad = array_uintersect_assoc(
1873 $this->allowedHtmlElements,
1875 function( $a, $b ) {
1881 if ( count( $bad ) > 0 ) {
1882 $badstr = implode( array_keys( $bad ),
',' );
1883 throw new ParameterAssertionException(
1885 'Balance attempted with sanitization including ' .
1886 "unsupported elements: {$badstr}"
1904 public function balance( $text, $processingCallback = null, $processingArgs = [] ) {
1905 $this->parseMode =
'inBodyMode';
1910 $this->processingCallback = $processingCallback;
1911 $this->processingArgs = $processingArgs;
1913 $this->textIntegrationMode =
1914 $this->ignoreLinefeed =
1916 $this->inRAWTEXT =
false;
1918 # The stack is constructed with an <html> element already on it.
1919 # Set this up as a fragment parsed with <body> as the context.
1920 $this->fragmentContext =
1923 $this->formElementPointer = null;
1924 for (
$e = $this->fragmentContext;
$e != null;
$e =
$e->parent ) {
1925 if (
$e->isHtmlNamed(
'form' ) ) {
1926 $this->formElementPointer =
$e;
1932 $x = $this->bitsIterator->current();
1933 $this->bitsIterator->next();
1934 $this->
insertToken(
'text', str_replace(
'>',
'>', $x ) );
1936 while ( $this->bitsIterator->valid() ) {
1940 $result = $this->stack->getOutput();
1942 $this->bitsIterator = null;
1944 $this->stack = null;
1945 $this->fragmentContext = null;
1946 $this->formElementPointer = null;
1956 if ( $token ===
'tag' || $token ===
'endtag' ) {
1958 # As described in "simplifications" above, these tags are
1959 # not supported in the balancer.
1962 "Unsupported $token <$value> found."
1966 } elseif ( $token ===
'text' &&
$value ===
'' ) {
1967 # Don't actually inject the empty string as a text token.
1971 if ( $this->ignoreLinefeed ) {
1972 $this->ignoreLinefeed =
false;
1973 if ( $token ===
'text' ) {
1974 if (
$value[0] ===
"\n" ) {
1976 # Nothing would be left, don't inject the empty string.
1984 $adjusted = $this->stack->adjustedCurrentNode( $this->fragmentContext );
1988 $this->stack->length() === 0 ||
1989 $adjusted->isHtml() ||
1993 } elseif ( $adjusted->isMathmlTextIntegrationPoint() ) {
1994 if ( $token ===
'text' ) {
2004 $adjusted->localName ===
'annotation-xml' &&
2005 $token ===
'tag' &&
$value ===
'svg'
2009 $adjusted->isHtmlIntegrationPoint() &&
2010 ( $token ===
'tag' || $token ===
'text' )
2023 if ( $token ===
'text' ) {
2024 $this->stack->insertText(
$value );
2026 } elseif ( $token ===
'tag' ) {
2080 if ( $this->fragmentContext ) {
2084 $this->stack->pop();
2085 $node = $this->stack->currentNode;
2087 $node->isMathmlTextIntegrationPoint() ||
2088 $node->isHtmlIntegrationPoint() ||
2097 $adjusted = ( $this->fragmentContext && $this->stack->length()===1 ) ?
2098 $this->fragmentContext : $this->stack->currentNode;
2099 $this->stack->insertForeignElement(
2103 $this->stack->pop();
2106 } elseif ( $token ===
'endtag' ) {
2108 foreach ( $this->stack
as $i => $node ) {
2109 if ( $node->isHtml() && !$first ) {
2113 } elseif ( $i === 0 ) {
2115 } elseif ( $node->localName ===
$value ) {
2116 $this->stack->popTag( $node );
2129 $x = $this->bitsIterator->current();
2130 $this->bitsIterator->next();
2132 # Handle comments. These won't be generated by mediawiki (they
2133 # are stripped in the Sanitizer) but may be generated by extensions.
2135 $this->allowComments &&
2136 !( $this->inRCDATA || $this->inRAWTEXT ) &&
2139 ( $regs[4][1] < 0 || !$this->bitsIterator->valid() )
2141 $contents = $regs[2][0];
2142 $rest = $regs[5][0];
2144 $this->
insertToken(
'text', str_replace(
'>',
'>', $rest ) );
2147 # $slash: Does the current element start with a '/'?
2148 # $t: Current element name
2149 # $attribStr: String between element name and >
2150 # $brace: Ending '>' or '/>'
2151 # $rest: Everything until the next element from the $bitsIterator
2153 list( , $slash,
$t, $attribStr, $brace, $rest ) = $regs;
2154 $t = strtolower(
$t );
2155 if ( $this->strict ) {
2159 '/^( [:_A-Z0-9][-.:_A-Z0-9]*="[^"]*")*[ ]*$/i', $attribStr
2161 "Bad attribute string found"
2166 !$this->strict,
"< found which does not start a valid tag"
2168 $slash =
$t = $attribStr = $brace = $rest = null;
2171 if ( $this->inRCDATA ) {
2172 if ( $slash &&
$t === $this->inRCDATA ) {
2173 $this->inRCDATA =
false;
2179 if ( $this->inRAWTEXT ) {
2180 if ( $slash &&
$t === $this->inRAWTEXT ) {
2181 $this->inRAWTEXT =
false;
2187 $sanitize = $this->allowedHtmlElements !== null;
2189 $goodtag =
$t && isset( $this->allowedHtmlElements[
$t] );
2192 if ( is_callable( $this->processingCallback ) ) {
2193 call_user_func_array( $this->processingCallback, [ &$attribStr, $this->processingArgs ] );
2207 $slash ?
'endtag' :
'tag',
$t,
$attribs, $brace ===
'/>'
2211 $rest = str_replace(
'>',
'>', $rest );
2212 $this->
insertToken(
'text', str_replace(
'>',
'>', $rest ) );
2213 } elseif ( $this->inRAWTEXT ) {
2216 # bad tag; serialize entire thing as text.
2217 $this->
insertToken(
'text',
'<' . str_replace(
'>',
'>', $x ) );
2223 substr( $mode, -4 )===
'Mode',
'$mode',
'should end in Mode'
2226 $this->parseMode = $mode;
2237 foreach ( $this->stack
as $i => $node ) {
2240 if ( $this->fragmentContext ) {
2244 if ( $node->isHtml() ) {
2245 switch ( $node->localName ) {
2247 $stacklen = $this->stack->length();
2248 for ( $j = $i + 1; $j < $stacklen-1; $j++ ) {
2249 $ancestor = $this->stack->node( $stacklen-$j-1 );
2250 if ( $ancestor->isHtmlNamed(
'template' ) ) {
2253 if ( $ancestor->isHtmlNamed(
'table' ) ) {
2279 array_slice( $this->templateInsertionModes, -1 )[0]
2285 # OMITTED: <frameset>
2306 # Most of the spec methods are inapplicable, other than step 2:
2307 # "pop all the nodes off the stack of open elements".
2308 # We're going to keep the top-most <html> element on the stack, though.
2310 # Clear the AFE list first, otherwise the element objects will stay live
2311 # during serialization, potentially using O(N^2) memory. Note that
2312 # popping the stack will never result in reconstructing the active
2313 # formatting elements.
2315 $this->stack->popTo( 1 );
2320 $this->inRAWTEXT =
$value;
2321 $this->originalInsertionMode = $this->
switchMode(
'inTextMode' );
2326 if ( $token ===
'text' ) {
2327 $this->stack->insertText(
$value );
2329 } elseif ( $token ===
'eof' ) {
2330 $this->stack->pop();
2332 $this->originalInsertionMode, $token,
$value,
$attribs, $selfclose
2334 } elseif ( $token ===
'endtag' ) {
2335 $this->stack->pop();
2336 $this->
switchMode( $this->originalInsertionMode );
2343 if ( $token ===
'text' ) {
2344 if ( preg_match(
'/^[\x09\x0A\x0C\x0D\x20]+/',
$value,
$matches ) ) {
2345 $this->stack->insertText(
$matches[0] );
2348 if ( strlen(
$value ) === 0 ) {
2352 } elseif ( $token ===
'tag' ) {
2355 # OMITTED: in a full HTML parser, this might change the encoding.
2363 $this->stack->pop();
2366 # OMITTED: <noscript>
2373 $this->afe->insertMarker();
2374 # OMITTED: frameset_ok
2380 } elseif ( $token ===
'endtag' ) {
2388 if ( $this->stack->indexOf(
$value ) < 0 ) {
2391 $this->stack->generateImpliedEndTags( null,
true );
2392 $this->stack->popTag(
$value );
2393 $this->afe->clearToMarker();
2394 array_pop( $this->templateInsertionModes );
2401 } elseif ( $token ===
'comment' ) {
2402 $this->stack->insertComment(
$value );
2413 if ( $token ===
'text' ) {
2414 $this->afe->reconstruct( $this->stack );
2415 $this->stack->insertText(
$value );
2417 } elseif ( $token ===
'eof' ) {
2418 if ( !empty( $this->templateInsertionModes ) ) {
2423 } elseif ( $token ===
'tag' ) {
2438 # OMITTED: <frameset>
2464 if ( $this->stack->inButtonScope(
'p' ) ) {
2476 if ( $this->stack->inButtonScope(
'p' ) ) {
2480 $this->stack->pop();
2487 if ( $this->stack->inButtonScope(
'p' ) ) {
2491 $this->ignoreLinefeed =
true;
2492 # OMITTED: frameset_ok
2497 $this->formElementPointer &&
2498 $this->stack->indexOf(
'template' ) < 0
2502 if ( $this->stack->inButtonScope(
"p" ) ) {
2506 if ( $this->stack->indexOf(
'template' ) < 0 ) {
2507 $this->formElementPointer = $elt;
2512 # OMITTED: frameset_ok
2513 foreach ( $this->stack
as $node ) {
2514 if ( $node->isHtmlNamed(
'li' ) ) {
2525 if ( $this->stack->inButtonScope(
'p' ) ) {
2533 # OMITTED: frameset_ok
2534 foreach ( $this->stack
as $node ) {
2535 if ( $node->isHtmlNamed(
'dd' ) ) {
2539 if ( $node->isHtmlNamed(
'dt' ) ) {
2550 if ( $this->stack->inButtonScope(
'p' ) ) {
2556 # OMITTED: <plaintext>
2559 if ( $this->stack->inScope(
'button' ) ) {
2563 $this->afe->reconstruct( $this->stack );
2568 $activeElement = $this->afe->findElementByTag(
'a' );
2569 if ( $activeElement ) {
2571 if ( $this->afe->isInList( $activeElement ) ) {
2572 $this->afe->remove( $activeElement );
2576 $this->stack->removeElement( $activeElement,
false );
2592 $this->afe->reconstruct( $this->stack );
2597 $this->afe->reconstruct( $this->stack );
2598 if ( $this->stack->inScope(
'nobr' ) ) {
2600 $this->afe->reconstruct( $this->stack );
2608 $this->afe->reconstruct( $this->stack );
2610 $this->afe->insertMarker();
2611 # OMITTED: frameset_ok
2615 # The document is never in "quirks mode"; see simplifications
2617 if ( $this->stack->inButtonScope(
'p' ) ) {
2621 # OMITTED: frameset_ok
2631 $this->afe->reconstruct( $this->stack );
2633 $this->stack->pop();
2634 # OMITTED: frameset_ok
2638 $this->afe->reconstruct( $this->stack );
2640 $this->stack->pop();
2641 # OMITTED: frameset_ok
2642 # (hence we don't need to examine the tag's "type" attribute)
2650 $this->stack->pop();
2654 if ( $this->stack->inButtonScope(
'p' ) ) {
2658 $this->stack->pop();
2665 # OMITTED: <isindex>
2669 $this->ignoreLinefeed =
true;
2670 $this->inRCDATA =
$value;
2671 # OMITTED: frameset_ok
2676 # OMITTED: <noembed>
2677 # OMITTED: <noscript>
2680 $this->afe->reconstruct( $this->stack );
2682 switch ( $this->parseMode ) {
2684 case 'inCaptionMode':
2685 case 'inTableBodyMode':
2697 if ( $this->stack->currentNode->isHtmlNamed(
'option' ) ) {
2700 $this->afe->reconstruct( $this->stack );
2706 if ( $this->stack->inScope(
'ruby' ) ) {
2707 $this->stack->generateImpliedEndTags();
2714 if ( $this->stack->inScope(
'ruby' ) ) {
2715 $this->stack->generateImpliedEndTags(
'rtc' );
2721 $this->afe->reconstruct( $this->stack );
2722 # We skip the spec's "adjust MathML attributes" and
2723 # "adjust foreign attributes" steps, since the browser will
2724 # do this later when it parses the output and it doesn't affect
2726 $this->stack->insertForeignElement(
2730 # emit explicit </math> tag.
2731 $this->stack->pop();
2736 $this->afe->reconstruct( $this->stack );
2737 # We skip the spec's "adjust SVG attributes" and
2738 # "adjust foreign attributes" steps, since the browser will
2739 # do this later when it parses the output and it doesn't affect
2741 $this->stack->insertForeignElement(
2745 # emit explicit </svg> tag.
2746 $this->stack->pop();
2766 $this->afe->reconstruct( $this->stack );
2769 } elseif ( $token ===
'endtag' ) {
2771 # </body>,</html> are unsupported.
2803 if ( !$this->stack->inScope(
$value ) ) {
2806 $this->stack->generateImpliedEndTags();
2807 $this->stack->popTag(
$value );
2811 if ( $this->stack->indexOf(
'template' ) < 0 ) {
2813 $this->formElementPointer = null;
2814 if ( !$openform || !$this->stack->inScope( $openform ) ) {
2817 $this->stack->generateImpliedEndTags();
2820 $flatten = ( $this->stack->currentNode === $openform );
2821 $this->stack->removeElement( $openform, $flatten );
2823 if ( !$this->stack->inScope(
'form' ) ) {
2826 $this->stack->generateImpliedEndTags();
2827 $this->stack->popTag(
'form' );
2832 if ( !$this->stack->inButtonScope(
'p' ) ) {
2836 $this->stack->generateImpliedEndTags(
$value );
2837 $this->stack->popTag(
$value );
2841 if ( !$this->stack->inListItemScope(
$value ) ) {
2842 return true; # ignore
2844 $this->stack->generateImpliedEndTags(
$value );
2845 $this->stack->popTag(
$value );
2850 if ( !$this->stack->inScope(
$value ) ) {
2851 return true; # ignore
2853 $this->stack->generateImpliedEndTags(
$value );
2854 $this->stack->popTag(
$value );
2864 return true; # ignore
2866 $this->stack->generateImpliedEndTags();
2871 # Take a deep breath, then:
2888 if ( $this->stack->adoptionAgency(
$value, $this->afe ) ) {
2889 return true; # If we did something, we
're done.
2891 break; # Go to the "any other end tag" case.
2896 if ( !$this->stack->inScope( $value ) ) {
2897 return true; # ignore
2899 $this->stack->generateImpliedEndTags();
2900 $this->stack->popTag( $value );
2901 $this->afe->clearToMarker();
2905 # Turn </br> into <br>
2906 return $this->inBodyMode( 'tag
', $value, [] );
2909 // Any other end tag goes here
2910 foreach ( $this->stack as $i => $node ) {
2911 if ( $node->isHtmlNamed( $value ) ) {
2912 $this->stack->generateImpliedEndTags( $value );
2913 $this->stack->popTo( $i ); # including $i
2915 } elseif ( $node->isA( BalanceSets::$specialSet ) ) {
2916 return true; // ignore this close token.
2920 } elseif ( $token === 'comment
' ) {
2921 $this->stack->insertComment( $value );
2924 Assert::invariant( false, "Bad token type: $token" );
2928 private function inTableMode( $token, $value, $attribs = null, $selfclose = false ) {
2929 if ( $token === 'text' ) {
2930 if ( $this->textIntegrationMode ) {
2931 return $this->inBodyMode( $token, $value, $attribs, $selfclose );
2932 } elseif ( $this->stack->currentNode->isA( BalanceSets::$tableSectionRowSet ) ) {
2933 $this->pendingTableText = '';
2934 $this->originalInsertionMode = $this->parseMode;
2935 return $this->switchModeAndReprocess( 'inTableTextMode', $token, $value, $attribs, $selfclose );
2937 // fall through to default case.
2938 } elseif ( $token === 'eof
' ) {
2939 $this->stopParsing();
2941 } elseif ( $token === 'tag
' ) {
2944 $this->afe->insertMarker();
2945 $this->stack->insertHTMLElement( $value, $attribs );
2949 $this->stack->clearToContext( BalanceSets::$tableContextSet );
2950 $this->stack->insertHTMLElement( $value, $attribs );
2954 $this->inTableMode( 'tag
', 'colgroup
', [] );
2955 return $this->insertToken( $token, $value, $attribs, $selfclose );
2959 $this->stack->clearToContext( BalanceSets::$tableContextSet );
2960 $this->stack->insertHTMLElement( $value, $attribs );
2966 $this->inTableMode( 'tag
', 'tbody
', [] );
2967 return $this->insertToken( $token, $value, $attribs, $selfclose );
2969 if ( !$this->stack->inTableScope( $value ) ) {
2970 return true; // Ignore this tag.
2972 $this->inTableMode( 'endtag
', $value );
2973 return $this->insertToken( $token, $value, $attribs, $selfclose );
2978 return $this->inHeadMode( $token, $value, $attribs, $selfclose );
2981 if ( !isset( $attribs['type'] ) || strcasecmp( $attribs['type'], 'hidden
' ) !== 0 ) {
2982 break; // Handle this as "everything else"
2984 $this->stack->insertHTMLElement( $value, $attribs );
2985 $this->stack->pop();
2990 $this->formElementPointer ||
2991 $this->stack->indexOf( 'template' ) >= 0
2993 return true; // ignore this token
2995 $this->formElementPointer =
2996 $this->stack->insertHTMLElement( $value, $attribs );
2997 $this->stack->popTag( $this->formElementPointer );
3000 // Fall through for "anything else" clause.
3001 } elseif ( $token === 'endtag
' ) {
3004 if ( !$this->stack->inTableScope( $value ) ) {
3005 return true; // Ignore.
3007 $this->stack->popTag( $value );
3008 $this->resetInsertionMode();
3021 return true; // Ignore the token.
3023 return $this->inHeadMode( $token, $value, $attribs, $selfclose );
3025 // Fall through for "anything else" clause.
3026 } elseif ( $token === 'comment
' ) {
3027 $this->stack->insertComment( $value );
3030 // This is the "anything else" case:
3031 $this->stack->fosterParentMode = true;
3032 $this->inBodyMode( $token, $value, $attribs, $selfclose );
3033 $this->stack->fosterParentMode = false;
3037 private function inTableTextMode( $token, $value, $attribs = null, $selfclose = false ) {
3038 if ( $token === 'text' ) {
3039 $this->pendingTableText .= $value;
3043 $text = $this->pendingTableText;
3044 $this->pendingTableText = '';
3045 if ( preg_match( '/[^\x09\x0A\x0C\x0D\x20]/
', $text ) ) {
3046 // This should match the "anything else" case inTableMode
3047 $this->stack->fosterParentMode = true;
3048 $this->inBodyMode( 'text', $text );
3049 $this->stack->fosterParentMode = false;
3051 // Pending text is just whitespace.
3052 $this->stack->insertText( $text );
3054 return $this->switchModeAndReprocess(
3055 $this->originalInsertionMode, $token, $value, $attribs, $selfclose
3059 // helper for inCaptionMode
3060 private function endCaption() {
3061 if ( !$this->stack->inTableScope( 'caption
' ) ) {
3064 $this->stack->generateImpliedEndTags();
3065 $this->stack->popTag( 'caption
' );
3066 $this->afe->clearToMarker();
3071 private function inCaptionMode( $token, $value, $attribs = null, $selfclose = false ) {
3072 if ( $token === 'tag
' ) {
3083 if ( $this->endCaption() ) {
3084 $this->insertToken( $token, $value, $attribs, $selfclose );
3088 // Fall through to "anything else" case.
3089 } elseif ( $token === 'endtag
' ) {
3092 $this->endCaption();
3095 if ( $this->endCaption() ) {
3096 $this->insertToken( $token, $value, $attribs, $selfclose );
3112 // Fall through to "anything else" case.
3114 // The Anything Else case
3115 return $this->inBodyMode( $token, $value, $attribs, $selfclose );
3118 private function inColumnGroupMode( $token, $value, $attribs = null, $selfclose = false ) {
3119 if ( $token === 'text' ) {
3120 if ( preg_match( '/^[\x09\x0A\x0C\x0D\x20]+/
', $value, $matches ) ) {
3121 $this->stack->insertText( $matches[0] );
3122 $value = substr( $value, strlen( $matches[0] ) );
3124 if ( strlen( $value ) === 0 ) {
3125 return true; // All text handled.
3127 // Fall through to handle non-whitespace below.
3128 } elseif ( $token === 'tag
' ) {
3132 $this->stack->insertHTMLElement( $value, $attribs );
3133 $this->stack->pop();
3136 return $this->inHeadMode( $token, $value, $attribs, $selfclose );
3138 // Fall through for "anything else".
3139 } elseif ( $token === 'endtag
' ) {
3142 if ( !$this->stack->currentNode->isHtmlNamed( 'colgroup
' ) ) {
3143 return true; // Ignore the token.
3145 $this->stack->pop();
3149 return true; // Ignore the token.
3151 return $this->inHeadMode( $token, $value, $attribs, $selfclose );
3153 // Fall through for "anything else".
3154 } elseif ( $token === 'eof
' ) {
3155 return $this->inBodyMode( $token, $value, $attribs, $selfclose );
3156 } elseif ( $token === 'comment
' ) {
3157 $this->stack->insertComment( $value );
3162 if ( !$this->stack->currentNode->isHtmlNamed( 'colgroup
' ) ) {
3163 return true; // Ignore the token.
3165 $this->inColumnGroupMode( 'endtag
', 'colgroup
' );
3166 return $this->insertToken( $token, $value, $attribs, $selfclose );
3169 // Helper function for inTableBodyMode
3170 private function endSection() {
3172 $this->stack->inTableScope( 'tbody
' ) ||
3173 $this->stack->inTableScope( 'thead
' ) ||
3174 $this->stack->inTableScope( 'tfoot
' )
3178 $this->stack->clearToContext( BalanceSets::$tableBodyContextSet );
3179 $this->stack->pop();
3183 private function inTableBodyMode( $token, $value, $attribs = null, $selfclose = false ) {
3184 if ( $token === 'tag
' ) {
3187 $this->stack->clearToContext( BalanceSets::$tableBodyContextSet );
3188 $this->stack->insertHTMLElement( $value, $attribs );
3193 $this->inTableBodyMode( 'tag
', 'tr
', [] );
3194 $this->insertToken( $token, $value, $attribs, $selfclose );
3202 if ( $this->endSection() ) {
3203 $this->insertToken( $token, $value, $attribs, $selfclose );
3207 } elseif ( $token === 'endtag
' ) {
3210 if ( $this->endSection() ) {
3211 $this->insertToken( $token, $value, $attribs, $selfclose );
3217 if ( $this->stack->inTableScope( $value ) ) {
3218 $this->endSection();
3229 return true; // Ignore the token.
3233 return $this->inTableMode( $token, $value, $attribs, $selfclose );
3236 // Helper function for inRowMode
3237 private function endRow() {
3238 if ( !$this->stack->inTableScope( 'tr
' ) ) {
3241 $this->stack->clearToContext( BalanceSets::$tableRowContextSet );
3242 $this->stack->pop();
3246 private function inRowMode( $token, $value, $attribs = null, $selfclose = false ) {
3247 if ( $token === 'tag
' ) {
3251 $this->stack->clearToContext( BalanceSets::$tableRowContextSet );
3252 $this->stack->insertHTMLElement( $value, $attribs );
3254 $this->afe->insertMarker();
3263 if ( $this->endRow() ) {
3264 $this->insertToken( $token, $value, $attribs, $selfclose );
3268 } elseif ( $token === 'endtag
' ) {
3274 if ( $this->endRow() ) {
3275 $this->insertToken( $token, $value, $attribs, $selfclose );
3282 $this->stack->inTableScope( $value ) &&
3285 $this->insertToken( $token, $value, $attribs, $selfclose );
3295 return true; // Ignore the token.
3299 return $this->inTableMode( $token, $value, $attribs, $selfclose );
3302 // Helper for inCellMode
3303 private function endCell() {
3304 if ( $this->stack->inTableScope( 'td
' ) ) {
3305 $this->inCellMode( 'endtag
', 'td
' );
3307 } elseif ( $this->stack->inTableScope( 'th
' ) ) {
3308 $this->inCellMode( 'endtag
', 'th
' );
3314 private function inCellMode( $token, $value, $attribs = null, $selfclose = false ) {
3315 if ( $token === 'tag
' ) {
3326 if ( $this->endCell() ) {
3327 $this->insertToken( $token, $value, $attribs, $selfclose );
3331 } elseif ( $token === 'endtag
' ) {
3335 if ( $this->stack->inTableScope( $value ) ) {
3336 $this->stack->generateImpliedEndTags();
3337 $this->stack->popTag( $value );
3338 $this->afe->clearToMarker();
3354 if ( $this->stack->inTableScope( $value ) ) {
3355 $this->stack->generateImpliedEndTags();
3356 $this->stack->popTag( BalanceSets::$tableCellSet );
3357 $this->afe->clearToMarker();
3359 $this->insertToken( $token, $value, $attribs, $selfclose );
3365 return $this->inBodyMode( $token, $value, $attribs, $selfclose );
3368 private function inSelectMode( $token, $value, $attribs = null, $selfclose = false ) {
3369 if ( $token === 'text' ) {
3370 $this->stack->insertText( $value );
3372 } elseif ( $token === 'eof
' ) {
3373 return $this->inBodyMode( $token, $value, $attribs, $selfclose );
3374 } elseif ( $token === 'tag
' ) {
3378 if ( $this->stack->currentNode->isHtmlNamed( 'option
' ) ) {
3379 $this->stack->pop();
3381 $this->stack->insertHTMLElement( $value, $attribs );
3384 if ( $this->stack->currentNode->isHtmlNamed( 'option
' ) ) {
3385 $this->stack->pop();
3387 if ( $this->stack->currentNode->isHtmlNamed( 'optgroup
' ) ) {
3388 $this->stack->pop();
3390 $this->stack->insertHTMLElement( $value, $attribs );
3393 $this->inSelectMode( 'endtag
', $value ); // treat it like endtag
3398 if ( !$this->stack->inSelectScope( 'select' ) ) {
3399 return true; // ignore token (fragment case)
3401 $this->inSelectMode( 'endtag
', 'select' );
3402 return $this->insertToken( $token, $value, $attribs, $selfclose );
3405 return $this->inHeadMode( $token, $value, $attribs, $selfclose );
3407 } elseif ( $token === 'endtag
' ) {
3411 $this->stack->currentNode->isHtmlNamed( 'option
' ) &&
3412 $this->stack->length() >= 2 &&
3413 $this->stack->node( $this->stack->length() - 2 )->isHtmlNamed( 'optgroup
' )
3415 $this->stack->pop();
3417 if ( $this->stack->currentNode->isHtmlNamed( 'optgroup
' ) ) {
3418 $this->stack->pop();
3422 if ( $this->stack->currentNode->isHtmlNamed( 'option
' ) ) {
3423 $this->stack->pop();
3427 if ( !$this->stack->inSelectScope( $value ) ) {
3428 return true; // fragment case
3430 $this->stack->popTag( $value );
3431 $this->resetInsertionMode();
3434 return $this->inHeadMode( $token, $value, $attribs, $selfclose );
3436 } elseif ( $token === 'comment
' ) {
3437 $this->stack->insertComment( $value );
3440 // anything else: just ignore the token
3444 private function inSelectInTableMode( $token, $value, $attribs = null, $selfclose = false ) {
3454 if ( $token === 'tag
' ) {
3455 $this->inSelectInTableMode( 'endtag
', 'select' );
3456 return $this->insertToken( $token, $value, $attribs, $selfclose );
3457 } elseif ( $token === 'endtag
' ) {
3458 if ( $this->stack->inTableScope( $value ) ) {
3459 $this->inSelectInTableMode( 'endtag
', 'select' );
3460 return $this->insertToken( $token, $value, $attribs, $selfclose );
3466 return $this->inSelectMode( $token, $value, $attribs, $selfclose );
3469 private function inTemplateMode( $token, $value, $attribs = null, $selfclose = false ) {
3470 if ( $token === 'text' || $token === 'comment
' ) {
3471 return $this->inBodyMode( $token, $value, $attribs, $selfclose );
3472 } elseif ( $token === 'eof
' ) {
3473 if ( $this->stack->indexOf( 'template' ) < 0 ) {
3474 $this->stopParsing();
3476 $this->stack->popTag( 'template' );
3477 $this->afe->clearToMarker();
3478 array_pop( $this->templateInsertionModes );
3479 $this->resetInsertionMode();
3480 $this->insertToken( $token, $value, $attribs, $selfclose );
3483 } elseif ( $token === 'tag
' ) {
3495 return $this->inHeadMode( $token, $value, $attribs, $selfclose );
3502 return $this->switchModeAndReprocess(
3503 'inTableMode', $token, $value, $attribs, $selfclose
3507 return $this->switchModeAndReprocess(
3512 return $this->switchModeAndReprocess(
3518 return $this->switchModeAndReprocess(
3519 'inRowMode', $token, $value, $attribs, $selfclose
3522 return $this->switchModeAndReprocess(
3523 'inBodyMode', $token, $value, $attribs, $selfclose
3525 } elseif ( $token === 'endtag
' ) {
3528 return $this->inHeadMode( $token, $value, $attribs, $selfclose );
3532 Assert::invariant( false, "Bad token type: $token" );
null means default in associative array form
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global list
static decodeTagAttributes($text)
Return an associative array of attribute names and values from a partial tag string.
this hook is for auditing only or null if authentication failed before getting that far or null if we can t even determine that probably a stub it is not rendered in wiki pages or galleries in category pages allow injecting custom HTML after the section Any uses of the hook need to handle escaping see BaseTemplate::getToolbox and BaseTemplate::makeListItem for details on the format of individual items inside of this array or by returning and letting standard HTTP rendering take place modifiable or by returning false and taking over the output $out
the array() calling protocol came about after MediaWiki 1.4rc1.
Apache License January AND DISTRIBUTION Definitions License shall mean the terms and conditions for use
div flags Integer display flags(NO_ACTION_LINK, NO_EXTRA_USER_LINKS) 'LogException'returning false will NOT prevent logging $e
Some information about database access in MediaWiki By Tim January Database layout For information about the MediaWiki database such as a description of the tables and their contents
Convenience class for iterating over an array in reverse order.
static validateTagAttributes($attribs, $element)
Take an array of attribute names and values and normalize or discard illegal values for the given ele...
We use the convention $dbr for read and $dbw for write to help you keep track of whether the database object is a the world will explode Or to be a subsequent write query which succeeded on the master may fail when replicated to the slave due to a unique key collision Replication on the slave will stop and it may take hours to repair the database and get it back online Setting read_only in my cnf on the slave will avoid this but given the dire we prefer to have as many checks as possible We provide a but the wrapper functions like select() and insert() are usually more convenient.They take care of things like table prefixes and escaping for you.If you really need to make your own SQL
The index of the header message $result[1]=The index of the body text message $result[2 through n]=Parameters passed to body text message.Please note the header message cannot receive/use parameters. 'ImportHandleLogItemXMLTag':When parsing a XML tag in a log item.Return false to stop further processing of the tag $reader:XMLReader object $logInfo:Array of information 'ImportHandlePageXMLTag':When parsing a XML tag in a page.Return false to stop further processing of the tag $reader:XMLReader object &$pageInfo:Array of information 'ImportHandleRevisionXMLTag':When parsing a XML tag in a page revision.Return false to stop further processing of the tag $reader:XMLReader object $pageInfo:Array of page information $revisionInfo:Array of revision information 'ImportHandleToplevelXMLTag':When parsing a top level XML tag.Return false to stop further processing of the tag $reader:XMLReader object 'ImportHandleUploadXMLTag':When parsing a XML tag in a file upload.Return false to stop further processing of the tag $reader:XMLReader object $revisionInfo:Array of information 'ImportLogInterwikiLink':Hook to change the interwiki link used in log entries and edit summaries for transwiki imports.&$fullInterwikiPrefix:Interwiki prefix, may contain colons.&$pageTitle:String that contains page title. 'ImportSources':Called when reading from the $wgImportSources configuration variable.Can be used to lazy-load the import sources list.&$importSources:The value of $wgImportSources.Modify as necessary.See the comment in DefaultSettings.php for the detail of how to structure this array. 'InfoAction':When building information to display on the action=info page.$context:IContextSource object &$pageInfo:Array of information 'InitializeArticleMaybeRedirect':MediaWiki check to see if title is a redirect.&$title:Title object for the current page &$request:WebRequest &$ignoreRedirect:boolean to skip redirect check &$target:Title/string of redirect target &$article:Article object 'InternalParseBeforeLinks':during Parser's internalParse method before links but after nowiki/noinclude/includeonly/onlyinclude and other processings.&$parser:Parser object &$text:string containing partially parsed text &$stripState:Parser's internal StripState object 'InternalParseBeforeSanitize':during Parser's internalParse method just before the parser removes unwanted/dangerous HTML tags and after nowiki/noinclude/includeonly/onlyinclude and other processings.Ideal for syntax-extensions after template/parser function execution which respect nowiki and HTML-comments.&$parser:Parser object &$text:string containing partially parsed text &$stripState:Parser's internal StripState object 'InterwikiLoadPrefix':When resolving if a given prefix is an interwiki or not.Return true without providing an interwiki to continue interwiki search.$prefix:interwiki prefix we are looking for.&$iwData:output array describing the interwiki with keys iw_url, iw_local, iw_trans and optionally iw_api and iw_wikiid. 'InvalidateEmailComplete':Called after a user's email has been invalidated successfully.$user:user(object) whose email is being invalidated 'IRCLineURL':When constructing the URL to use in an IRC notification.Callee may modify $url and $query, URL will be constructed as $url.$query &$url:URL to index.php &$query:Query string $rc:RecentChange object that triggered url generation 'IsFileCacheable':Override the result of Article::isFileCacheable()(if true) &$article:article(object) being checked 'IsTrustedProxy':Override the result of IP::isTrustedProxy() &$ip:IP being check &$result:Change this value to override the result of IP::isTrustedProxy() 'IsUploadAllowedFromUrl':Override the result of UploadFromUrl::isAllowedUrl() $url:URL used to upload from &$allowed:Boolean indicating if uploading is allowed for given URL 'isValidEmailAddr':Override the result of Sanitizer::validateEmail(), for instance to return false if the domain name doesn't match your organization.$addr:The e-mail address entered by the user &$result:Set this and return false to override the internal checks 'isValidPassword':Override the result of User::isValidPassword() $password:The password entered by the user &$result:Set this and return false to override the internal checks $user:User the password is being validated for 'Language::getMessagesFileName':$code:The language code or the language we're looking for a messages file for &$file:The messages file path, you can override this to change the location. 'LanguageGetMagic':DEPRECATED!Use $magicWords in a file listed in $wgExtensionMessagesFiles instead.Use this to define synonyms of magic words depending of the language &$magicExtensions:associative array of magic words synonyms $lang:language code(string) 'LanguageGetNamespaces':Provide custom ordering for namespaces or remove namespaces.Do not use this hook to add namespaces.Use CanonicalNamespaces for that.&$namespaces:Array of namespaces indexed by their numbers 'LanguageGetSpecialPageAliases':DEPRECATED!Use $specialPageAliases in a file listed in $wgExtensionMessagesFiles instead.Use to define aliases of special pages names depending of the language &$specialPageAliases:associative array of magic words synonyms $lang:language code(string) 'LanguageGetTranslatedLanguageNames':Provide translated language names.&$names:array of language code=> language name $code:language of the preferred translations 'LanguageLinks':Manipulate a page's language links.This is called in various places to allow extensions to define the effective language links for a page.$title:The page's Title.&$links:Associative array mapping language codes to prefixed links of the form"language:title".&$linkFlags:Associative array mapping prefixed links to arrays of flags.Currently unused, but planned to provide support for marking individual language links in the UI, e.g.for featured articles. 'LanguageSelector':Hook to change the language selector available on a page.$out:The output page.$cssClassName:CSS class name of the language selector. 'LinkBegin':DEPRECATED!Use HtmlPageLinkRendererBegin instead.Used when generating internal and interwiki links in Linker::link(), before processing starts.Return false to skip default processing and return $ret.See documentation for Linker::link() for details on the expected meanings of parameters.$skin:the Skin object $target:the Title that the link is pointing to &$html:the contents that the< a > tag should have(raw HTML) $result
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped noclasses just before the function returns a value If you return true
const ELEMENT_BITS_REGEX
Acceptable tag name charset from HTML5 parsing spec http://www.w3.org/TR/html5/syntax.html#tag-open-state.
static validateTag($params, $element)
Takes attribute names and values for a tag and the tag name and validates that the tag is allowed to ...
An iterator which works exactly like:
design txt This is a brief overview of the new design More thorough and up to date information is available on the documentation wiki at etc Handles the details of getting and saving to the user table of the and dealing with sessions and cookies OutputPage Encapsulates the entire HTML page that will be sent in response to any server request It is used by calling its functions to add text
this hook is for auditing only RecentChangesLinked and Watchlist RecentChangesLinked and Watchlist e g Watchlist removed from all revisions and log entries to which it was applied This gives extensions a chance to take it off their books $tag
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
This document describes the state of Postgres support in and is fairly well maintained The main code is very well while extensions are very hit and miss it is probably the most supported database after MySQL Much of the work in making MediaWiki database agnostic came about through the work of creating Postgres as and are nearing end of but without copying over all the usage comments General notes on the but these can almost always be programmed around *Although Postgres has a true BOOLEAN type
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global then executing the whole list after the page is displayed We don t do anything smart like collating updates to the same table or such because the list is almost always going to have just one item on if so it s not worth the trouble Since there is a job queue in the jobs table
usually copyright or history_copyright This message must be in HTML not wikitext if the section is included from a template to be included in the link
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped noclasses just before the function returns a value If you return an< a > element with HTML attributes $attribs and contents $html will be returned If you return $ret will be returned and may include noclasses after processing & $attribs
static encodeAttribute($text)
Encode an attribute value for HTML output.
Allows to change the fields on the form that will be generated $name