MediaWiki  REL1_23
Html.php
Go to the documentation of this file.
00001 <?php
00050 class Html {
00051     // List of void elements from HTML5, section 8.1.2 as of 2011-08-12
00052     private static $voidElements = array(
00053         'area',
00054         'base',
00055         'br',
00056         'col',
00057         'command',
00058         'embed',
00059         'hr',
00060         'img',
00061         'input',
00062         'keygen',
00063         'link',
00064         'meta',
00065         'param',
00066         'source',
00067         'track',
00068         'wbr',
00069     );
00070 
00071     // Boolean attributes, which may have the value omitted entirely.  Manually
00072     // collected from the HTML5 spec as of 2011-08-12.
00073     private static $boolAttribs = array(
00074         'async',
00075         'autofocus',
00076         'autoplay',
00077         'checked',
00078         'controls',
00079         'default',
00080         'defer',
00081         'disabled',
00082         'formnovalidate',
00083         'hidden',
00084         'ismap',
00085         'itemscope',
00086         'loop',
00087         'multiple',
00088         'muted',
00089         'novalidate',
00090         'open',
00091         'pubdate',
00092         'readonly',
00093         'required',
00094         'reversed',
00095         'scoped',
00096         'seamless',
00097         'selected',
00098         'truespeed',
00099         'typemustmatch',
00100         // HTML5 Microdata
00101         'itemscope',
00102     );
00103 
00124     public static function rawElement( $element, $attribs = array(), $contents = '' ) {
00125         global $wgWellFormedXml;
00126         $start = self::openElement( $element, $attribs );
00127         if ( in_array( $element, self::$voidElements ) ) {
00128             if ( $wgWellFormedXml ) {
00129                 // Silly XML.
00130                 return substr( $start, 0, -1 ) . ' />';
00131             }
00132             return $start;
00133         } else {
00134             return "$start$contents" . self::closeElement( $element );
00135         }
00136     }
00137 
00148     public static function element( $element, $attribs = array(), $contents = '' ) {
00149         return self::rawElement( $element, $attribs, strtr( $contents, array(
00150             // There's no point in escaping quotes, >, etc. in the contents of
00151             // elements.
00152             '&' => '&amp;',
00153             '<' => '&lt;'
00154         ) ) );
00155     }
00156 
00166     public static function openElement( $element, $attribs = array() ) {
00167         global $wgWellFormedXml;
00168         $attribs = (array)$attribs;
00169         // This is not required in HTML5, but let's do it anyway, for
00170         // consistency and better compression.
00171         $element = strtolower( $element );
00172 
00173         // In text/html, initial <html> and <head> tags can be omitted under
00174         // pretty much any sane circumstances, if they have no attributes.  See:
00175         // <http://www.whatwg.org/html/syntax.html#optional-tags>
00176         if ( !$wgWellFormedXml && !$attribs
00177         && in_array( $element, array( 'html', 'head' ) ) ) {
00178             return '';
00179         }
00180 
00181         // Remove invalid input types
00182         if ( $element == 'input' ) {
00183             $validTypes = array(
00184                 'hidden',
00185                 'text',
00186                 'password',
00187                 'checkbox',
00188                 'radio',
00189                 'file',
00190                 'submit',
00191                 'image',
00192                 'reset',
00193                 'button',
00194 
00195                 // HTML input types
00196                 'datetime',
00197                 'datetime-local',
00198                 'date',
00199                 'month',
00200                 'time',
00201                 'week',
00202                 'number',
00203                 'range',
00204                 'email',
00205                 'url',
00206                 'search',
00207                 'tel',
00208                 'color',
00209             );
00210             if ( isset( $attribs['type'] )
00211             && !in_array( $attribs['type'], $validTypes ) ) {
00212                 unset( $attribs['type'] );
00213             }
00214         }
00215 
00216         // According to standard the default type for <button> elements is "submit".
00217         // Depending on compatibility mode IE might use "button", instead.
00218         // We enforce the standard "submit".
00219         if ( $element == 'button' && !isset( $attribs['type'] ) ) {
00220             $attribs['type'] = 'submit';
00221         }
00222 
00223         return "<$element" . self::expandAttributes(
00224             self::dropDefaults( $element, $attribs ) ) . '>';
00225     }
00226 
00235     public static function closeElement( $element ) {
00236         global $wgWellFormedXml;
00237 
00238         $element = strtolower( $element );
00239 
00240         // Reference:
00241         // http://www.whatwg.org/html/syntax.html#optional-tags
00242         if ( !$wgWellFormedXml && in_array( $element, array(
00243             'html',
00244             'head',
00245             'body',
00246             'li',
00247             'dt',
00248             'dd',
00249             'tr',
00250             'td',
00251             'th',
00252         ) ) ) {
00253             return '';
00254         }
00255         return "</$element>";
00256     }
00257 
00275     private static function dropDefaults( $element, $attribs ) {
00276 
00277         // Whenever altering this array, please provide a covering test case
00278         // in HtmlTest::provideElementsWithAttributesHavingDefaultValues
00279         static $attribDefaults = array(
00280             'area' => array( 'shape' => 'rect' ),
00281             'button' => array(
00282                 'formaction' => 'GET',
00283                 'formenctype' => 'application/x-www-form-urlencoded',
00284             ),
00285             'canvas' => array(
00286                 'height' => '150',
00287                 'width' => '300',
00288             ),
00289             'command' => array( 'type' => 'command' ),
00290             'form' => array(
00291                 'action' => 'GET',
00292                 'autocomplete' => 'on',
00293                 'enctype' => 'application/x-www-form-urlencoded',
00294             ),
00295             'input' => array(
00296                 'formaction' => 'GET',
00297                 'type' => 'text',
00298             ),
00299             'keygen' => array( 'keytype' => 'rsa' ),
00300             'link' => array( 'media' => 'all' ),
00301             'menu' => array( 'type' => 'list' ),
00302             // Note: the use of text/javascript here instead of other JavaScript
00303             // MIME types follows the HTML5 spec.
00304             'script' => array( 'type' => 'text/javascript' ),
00305             'style' => array(
00306                 'media' => 'all',
00307                 'type' => 'text/css',
00308             ),
00309             'textarea' => array( 'wrap' => 'soft' ),
00310         );
00311 
00312         $element = strtolower( $element );
00313 
00314         foreach ( $attribs as $attrib => $value ) {
00315             $lcattrib = strtolower( $attrib );
00316             if ( is_array( $value ) ) {
00317                 $value = implode( ' ', $value );
00318             } else {
00319                 $value = strval( $value );
00320             }
00321 
00322             // Simple checks using $attribDefaults
00323             if ( isset( $attribDefaults[$element][$lcattrib] ) &&
00324             $attribDefaults[$element][$lcattrib] == $value ) {
00325                 unset( $attribs[$attrib] );
00326             }
00327 
00328             if ( $lcattrib == 'class' && $value == '' ) {
00329                 unset( $attribs[$attrib] );
00330             }
00331         }
00332 
00333         // More subtle checks
00334         if ( $element === 'link' && isset( $attribs['type'] )
00335         && strval( $attribs['type'] ) == 'text/css' ) {
00336             unset( $attribs['type'] );
00337         }
00338         if ( $element === 'input' ) {
00339             $type = isset( $attribs['type'] ) ? $attribs['type'] : null;
00340             $value = isset( $attribs['value'] ) ? $attribs['value'] : null;
00341             if ( $type === 'checkbox' || $type === 'radio' ) {
00342                 // The default value for checkboxes and radio buttons is 'on'
00343                 // not ''. By stripping value="" we break radio boxes that
00344                 // actually wants empty values.
00345                 if ( $value === 'on' ) {
00346                     unset( $attribs['value'] );
00347                 }
00348             } elseif ( $type === 'submit' ) {
00349                 // The default value for submit appears to be "Submit" but
00350                 // let's not bother stripping out localized text that matches
00351                 // that.
00352             } else {
00353                 // The default value for nearly every other field type is ''
00354                 // The 'range' and 'color' types use different defaults but
00355                 // stripping a value="" does not hurt them.
00356                 if ( $value === '' ) {
00357                     unset( $attribs['value'] );
00358                 }
00359             }
00360         }
00361         if ( $element === 'select' && isset( $attribs['size'] ) ) {
00362             if ( in_array( 'multiple', $attribs )
00363                 || ( isset( $attribs['multiple'] ) && $attribs['multiple'] !== false )
00364             ) {
00365                 // A multi-select
00366                 if ( strval( $attribs['size'] ) == '4' ) {
00367                     unset( $attribs['size'] );
00368                 }
00369             } else {
00370                 // Single select
00371                 if ( strval( $attribs['size'] ) == '1' ) {
00372                     unset( $attribs['size'] );
00373                 }
00374             }
00375         }
00376 
00377         return $attribs;
00378     }
00379 
00419     public static function expandAttributes( $attribs ) {
00420         global $wgWellFormedXml;
00421 
00422         $ret = '';
00423         $attribs = (array)$attribs;
00424         foreach ( $attribs as $key => $value ) {
00425             // Support intuitive array( 'checked' => true/false ) form
00426             if ( $value === false || is_null( $value ) ) {
00427                 continue;
00428             }
00429 
00430             // For boolean attributes, support array( 'foo' ) instead of
00431             // requiring array( 'foo' => 'meaningless' ).
00432             if ( is_int( $key )
00433             && in_array( strtolower( $value ), self::$boolAttribs ) ) {
00434                 $key = $value;
00435             }
00436 
00437             // Not technically required in HTML5 but we'd like consistency
00438             // and better compression anyway.
00439             $key = strtolower( $key );
00440 
00441             // Bug 23769: Blacklist all form validation attributes for now.  Current
00442             // (June 2010) WebKit has no UI, so the form just refuses to submit
00443             // without telling the user why, which is much worse than failing
00444             // server-side validation.  Opera is the only other implementation at
00445             // this time, and has ugly UI, so just kill the feature entirely until
00446             // we have at least one good implementation.
00447 
00448             // As the default value of "1" for "step" rejects decimal
00449             // numbers to be entered in 'type="number"' fields, allow
00450             // the special case 'step="any"'.
00451 
00452             if ( in_array( $key, array( 'max', 'min', 'pattern', 'required' ) )
00453                 || $key === 'step' && $value !== 'any' ) {
00454                 continue;
00455             }
00456 
00457             // http://www.w3.org/TR/html401/index/attributes.html ("space-separated")
00458             // http://www.w3.org/TR/html5/index.html#attributes-1 ("space-separated")
00459             $spaceSeparatedListAttributes = array(
00460                 'class', // html4, html5
00461                 'accesskey', // as of html5, multiple space-separated values allowed
00462                 // html4-spec doesn't document rel= as space-separated
00463                 // but has been used like that and is now documented as such
00464                 // in the html5-spec.
00465                 'rel',
00466             );
00467 
00468             // Specific features for attributes that allow a list of space-separated values
00469             if ( in_array( $key, $spaceSeparatedListAttributes ) ) {
00470                 // Apply some normalization and remove duplicates
00471 
00472                 // Convert into correct array. Array can contain space-separated
00473                 // values. Implode/explode to get those into the main array as well.
00474                 if ( is_array( $value ) ) {
00475                     // If input wasn't an array, we can skip this step
00476                     $newValue = array();
00477                     foreach ( $value as $k => $v ) {
00478                         if ( is_string( $v ) ) {
00479                             // String values should be normal `array( 'foo' )`
00480                             // Just append them
00481                             if ( !isset( $value[$v] ) ) {
00482                                 // As a special case don't set 'foo' if a
00483                                 // separate 'foo' => true/false exists in the array
00484                                 // keys should be authoritative
00485                                 $newValue[] = $v;
00486                             }
00487                         } elseif ( $v ) {
00488                             // If the value is truthy but not a string this is likely
00489                             // an array( 'foo' => true ), falsy values don't add strings
00490                             $newValue[] = $k;
00491                         }
00492                     }
00493                     $value = implode( ' ', $newValue );
00494                 }
00495                 $value = explode( ' ', $value );
00496 
00497                 // Normalize spacing by fixing up cases where people used
00498                 // more than 1 space and/or a trailing/leading space
00499                 $value = array_diff( $value, array( '', ' ' ) );
00500 
00501                 // Remove duplicates and create the string
00502                 $value = implode( ' ', array_unique( $value ) );
00503             }
00504 
00505             // See the "Attributes" section in the HTML syntax part of HTML5,
00506             // 9.1.2.3 as of 2009-08-10.  Most attributes can have quotation
00507             // marks omitted, but not all.  (Although a literal " is not
00508             // permitted, we don't check for that, since it will be escaped
00509             // anyway.)
00510             #
00511             // See also research done on further characters that need to be
00512             // escaped: http://code.google.com/p/html5lib/issues/detail?id=93
00513             $badChars = "\\x00- '=<>`/\x{00a0}\x{1680}\x{180e}\x{180F}\x{2000}\x{2001}"
00514                 . "\x{2002}\x{2003}\x{2004}\x{2005}\x{2006}\x{2007}\x{2008}\x{2009}"
00515                 . "\x{200A}\x{2028}\x{2029}\x{202F}\x{205F}\x{3000}";
00516             if ( $wgWellFormedXml || $value === ''
00517             || preg_match( "![$badChars]!u", $value ) ) {
00518                 $quote = '"';
00519             } else {
00520                 $quote = '';
00521             }
00522 
00523             if ( in_array( $key, self::$boolAttribs ) ) {
00524                 // In HTML5, we can leave the value empty. If we don't need
00525                 // well-formed XML, we can omit the = entirely.
00526                 if ( !$wgWellFormedXml ) {
00527                     $ret .= " $key";
00528                 } else {
00529                     $ret .= " $key=\"\"";
00530                 }
00531             } else {
00532                 // Apparently we need to entity-encode \n, \r, \t, although the
00533                 // spec doesn't mention that.  Since we're doing strtr() anyway,
00534                 // and we don't need <> escaped here, we may as well not call
00535                 // htmlspecialchars().
00536                 // @todo FIXME: Verify that we actually need to
00537                 // escape \n\r\t here, and explain why, exactly.
00538                 #
00539                 // We could call Sanitizer::encodeAttribute() for this, but we
00540                 // don't because we're stubborn and like our marginal savings on
00541                 // byte size from not having to encode unnecessary quotes.
00542                 $map = array(
00543                     '&' => '&amp;',
00544                     '"' => '&quot;',
00545                     "\n" => '&#10;',
00546                     "\r" => '&#13;',
00547                     "\t" => '&#9;'
00548                 );
00549                 if ( $wgWellFormedXml ) {
00550                     // This is allowed per spec: <http://www.w3.org/TR/xml/#NT-AttValue>
00551                     // But reportedly it breaks some XML tools?
00552                     // @todo FIXME: Is this really true?
00553                     $map['<'] = '&lt;';
00554                 }
00555                 $ret .= " $key=$quote" . strtr( $value, $map ) . $quote;
00556             }
00557         }
00558         return $ret;
00559     }
00560 
00570     public static function inlineScript( $contents ) {
00571         global $wgWellFormedXml;
00572 
00573         $attrs = array();
00574 
00575         if ( $wgWellFormedXml && preg_match( '/[<&]/', $contents ) ) {
00576             $contents = "/*<![CDATA[*/$contents/*]]>*/";
00577         }
00578 
00579         return self::rawElement( 'script', $attrs, $contents );
00580     }
00581 
00589     public static function linkedScript( $url ) {
00590         $attrs = array( 'src' => $url );
00591 
00592         return self::element( 'script', $attrs );
00593     }
00594 
00604     public static function inlineStyle( $contents, $media = 'all' ) {
00605         global $wgWellFormedXml;
00606 
00607         if ( $wgWellFormedXml && preg_match( '/[<&]/', $contents ) ) {
00608             $contents = "/*<![CDATA[*/$contents/*]]>*/";
00609         }
00610 
00611         return self::rawElement( 'style', array(
00612             'type' => 'text/css',
00613             'media' => $media,
00614         ), $contents );
00615     }
00616 
00625     public static function linkedStyle( $url, $media = 'all' ) {
00626         return self::element( 'link', array(
00627             'rel' => 'stylesheet',
00628             'href' => $url,
00629             'type' => 'text/css',
00630             'media' => $media,
00631         ) );
00632     }
00633 
00645     public static function input( $name, $value = '', $type = 'text', $attribs = array() ) {
00646         $attribs['type'] = $type;
00647         $attribs['value'] = $value;
00648         $attribs['name'] = $name;
00649 
00650         return self::element( 'input', $attribs );
00651     }
00652 
00662     public static function hidden( $name, $value, $attribs = array() ) {
00663         return self::input( $name, $value, 'hidden', $attribs );
00664     }
00665 
00678     public static function textarea( $name, $value = '', $attribs = array() ) {
00679         $attribs['name'] = $name;
00680 
00681         if ( substr( $value, 0, 1 ) == "\n" ) {
00682             // Workaround for bug 12130: browsers eat the initial newline
00683             // assuming that it's just for show, but they do keep the later
00684             // newlines, which we may want to preserve during editing.
00685             // Prepending a single newline
00686             $spacedValue = "\n" . $value;
00687         } else {
00688             $spacedValue = $value;
00689         }
00690         return self::element( 'textarea', $attribs, $spacedValue );
00691     }
00692 
00707     public static function namespaceSelector( array $params = array(), array $selectAttribs = array() ) {
00708         global $wgContLang;
00709 
00710         ksort( $selectAttribs );
00711 
00712         // Is a namespace selected?
00713         if ( isset( $params['selected'] ) ) {
00714             // If string only contains digits, convert to clean int. Selected could also
00715             // be "all" or "" etc. which needs to be left untouched.
00716             // PHP is_numeric() has issues with large strings, PHP ctype_digit has other issues
00717             // and returns false for already clean ints. Use regex instead..
00718             if ( preg_match( '/^\d+$/', $params['selected'] ) ) {
00719                 $params['selected'] = intval( $params['selected'] );
00720             }
00721             // else: leaves it untouched for later processing
00722         } else {
00723             $params['selected'] = '';
00724         }
00725 
00726         if ( !isset( $params['exclude'] ) || !is_array( $params['exclude'] ) ) {
00727             $params['exclude'] = array();
00728         }
00729         if ( !isset( $params['disable'] ) || !is_array( $params['disable'] ) ) {
00730             $params['disable'] = array();
00731         }
00732 
00733         // Associative array between option-values and option-labels
00734         $options = array();
00735 
00736         if ( isset( $params['all'] ) ) {
00737             // add an option that would let the user select all namespaces.
00738             // Value is provided by user, the name shown is localized for the user.
00739             $options[$params['all']] = wfMessage( 'namespacesall' )->text();
00740         }
00741         // Add all namespaces as options (in the content language)
00742         $options += $wgContLang->getFormattedNamespaces();
00743 
00744         // Convert $options to HTML and filter out namespaces below 0
00745         $optionsHtml = array();
00746         foreach ( $options as $nsId => $nsName ) {
00747             if ( $nsId < NS_MAIN || in_array( $nsId, $params['exclude'] ) ) {
00748                 continue;
00749             }
00750             if ( $nsId === NS_MAIN ) {
00751                 // For other namespaces use use the namespace prefix as label, but for
00752                 // main we don't use "" but the user message describing it (e.g. "(Main)" or "(Article)")
00753                 $nsName = wfMessage( 'blanknamespace' )->text();
00754             } elseif ( is_int( $nsId ) ) {
00755                 $nsName = $wgContLang->convertNamespace( $nsId );
00756             }
00757             $optionsHtml[] = Html::element(
00758                 'option', array(
00759                     'disabled' => in_array( $nsId, $params['disable'] ),
00760                     'value' => $nsId,
00761                     'selected' => $nsId === $params['selected'],
00762                 ), $nsName
00763             );
00764         }
00765 
00766         if ( !array_key_exists( 'id', $selectAttribs ) ) {
00767             $selectAttribs['id'] = 'namespace';
00768         }
00769 
00770         if ( !array_key_exists( 'name', $selectAttribs ) ) {
00771             $selectAttribs['name'] = 'namespace';
00772         }
00773 
00774         $ret = '';
00775         if ( isset( $params['label'] ) ) {
00776             $ret .= Html::element(
00777                 'label', array(
00778                     'for' => isset( $selectAttribs['id'] ) ? $selectAttribs['id'] : null,
00779                 ), $params['label']
00780             ) . '&#160;';
00781         }
00782 
00783         // Wrap options in a <select>
00784         $ret .= Html::openElement( 'select', $selectAttribs )
00785             . "\n"
00786             . implode( "\n", $optionsHtml )
00787             . "\n"
00788             . Html::closeElement( 'select' );
00789 
00790         return $ret;
00791     }
00792 
00801     public static function htmlHeader( $attribs = array() ) {
00802         $ret = '';
00803 
00804         global $wgHtml5Version, $wgMimeType, $wgXhtmlNamespaces;
00805 
00806         $isXHTML = self::isXmlMimeType( $wgMimeType );
00807 
00808         if ( $isXHTML ) { // XHTML5
00809             // XML mimetyped markup should have an xml header.
00810             // However a DOCTYPE is not needed.
00811             $ret .= "<?xml version=\"1.0\" encoding=\"UTF-8\" ?" . ">\n";
00812 
00813             // Add the standard xmlns
00814             $attribs['xmlns'] = 'http://www.w3.org/1999/xhtml';
00815 
00816             // And support custom namespaces
00817             foreach ( $wgXhtmlNamespaces as $tag => $ns ) {
00818                 $attribs["xmlns:$tag"] = $ns;
00819             }
00820         } else { // HTML5
00821             // DOCTYPE
00822             $ret .= "<!DOCTYPE html>\n";
00823         }
00824 
00825         if ( $wgHtml5Version ) {
00826             $attribs['version'] = $wgHtml5Version;
00827         }
00828 
00829         $html = Html::openElement( 'html', $attribs );
00830 
00831         if ( $html ) {
00832             $html .= "\n";
00833         }
00834 
00835         $ret .= $html;
00836 
00837         return $ret;
00838     }
00839 
00846     public static function isXmlMimeType( $mimetype ) {
00847         # http://www.whatwg.org/html/infrastructure.html#xml-mime-type
00848         # * text/xml
00849         # * application/xml
00850         # * Any mimetype with a subtype ending in +xml (this implicitly includes application/xhtml+xml)
00851         return (bool)preg_match( '!^(text|application)/xml$|^.+/.+\+xml$!', $mimetype );
00852     }
00853 
00865     static function infoBox( $text, $icon, $alt, $class = false, $useStylePath = true ) {
00866         global $wgStylePath;
00867 
00868         if ( $useStylePath ) {
00869             $icon = $wgStylePath . '/common/images/' . $icon;
00870         }
00871 
00872         $s = Html::openElement( 'div', array( 'class' => "mw-infobox $class" ) );
00873 
00874         $s .= Html::openElement( 'div', array( 'class' => 'mw-infobox-left' ) ) .
00875                 Html::element( 'img',
00876                     array(
00877                         'src' => $icon,
00878                         'alt' => $alt,
00879                     )
00880                 ) .
00881                 Html::closeElement( 'div' );
00882 
00883         $s .= Html::openElement( 'div', array( 'class' => 'mw-infobox-right' ) ) .
00884                 $text .
00885                 Html::closeElement( 'div' );
00886         $s .= Html::element( 'div', array( 'style' => 'clear: left;' ), ' ' );
00887 
00888         $s .= Html::closeElement( 'div' );
00889 
00890         $s .= Html::element( 'div', array( 'style' => 'clear: left;' ), ' ' );
00891 
00892         return $s;
00893     }
00894 
00903     static function srcSet( $urls ) {
00904         $candidates = array();
00905         foreach ( $urls as $density => $url ) {
00906             // Image candidate syntax per current whatwg live spec, 2012-09-23:
00907             // http://www.whatwg.org/html/embedded-content-1.html#attr-img-srcset
00908             $candidates[] = "{$url} {$density}x";
00909         }
00910         return implode( ", ", $candidates );
00911     }
00912 }