MediaWiki  REL1_22
Html.php
Go to the documentation of this file.
00001 <?php
00050 class Html {
00051     // List of void elements from HTML5, section 8.1.2 as of 2011-08-12
00052     private static $voidElements = array(
00053         'area',
00054         'base',
00055         'br',
00056         'col',
00057         'command',
00058         'embed',
00059         'hr',
00060         'img',
00061         'input',
00062         'keygen',
00063         'link',
00064         'meta',
00065         'param',
00066         'source',
00067         'track',
00068         'wbr',
00069     );
00070 
00071     // Boolean attributes, which may have the value omitted entirely.  Manually
00072     // collected from the HTML5 spec as of 2011-08-12.
00073     private static $boolAttribs = array(
00074         'async',
00075         'autofocus',
00076         'autoplay',
00077         'checked',
00078         'controls',
00079         'default',
00080         'defer',
00081         'disabled',
00082         'formnovalidate',
00083         'hidden',
00084         'ismap',
00085         'itemscope',
00086         'loop',
00087         'multiple',
00088         'muted',
00089         'novalidate',
00090         'open',
00091         'pubdate',
00092         'readonly',
00093         'required',
00094         'reversed',
00095         'scoped',
00096         'seamless',
00097         'selected',
00098         'truespeed',
00099         'typemustmatch',
00100         // HTML5 Microdata
00101         'itemscope',
00102     );
00103 
00124     public static function rawElement( $element, $attribs = array(), $contents = '' ) {
00125         global $wgWellFormedXml;
00126         $start = self::openElement( $element, $attribs );
00127         if ( in_array( $element, self::$voidElements ) ) {
00128             if ( $wgWellFormedXml ) {
00129                 // Silly XML.
00130                 return substr( $start, 0, -1 ) . ' />';
00131             }
00132             return $start;
00133         } else {
00134             return "$start$contents" . self::closeElement( $element );
00135         }
00136     }
00137 
00148     public static function element( $element, $attribs = array(), $contents = '' ) {
00149         return self::rawElement( $element, $attribs, strtr( $contents, array(
00150             // There's no point in escaping quotes, >, etc. in the contents of
00151             // elements.
00152             '&' => '&amp;',
00153             '<' => '&lt;'
00154         ) ) );
00155     }
00156 
00166     public static function openElement( $element, $attribs = array() ) {
00167         global $wgWellFormedXml;
00168         $attribs = (array)$attribs;
00169         // This is not required in HTML5, but let's do it anyway, for
00170         // consistency and better compression.
00171         $element = strtolower( $element );
00172 
00173         // In text/html, initial <html> and <head> tags can be omitted under
00174         // pretty much any sane circumstances, if they have no attributes.  See:
00175         // <http://www.whatwg.org/html/syntax.html#optional-tags>
00176         if ( !$wgWellFormedXml && !$attribs
00177         && in_array( $element, array( 'html', 'head' ) ) ) {
00178             return '';
00179         }
00180 
00181         // Remove invalid input types
00182         if ( $element == 'input' ) {
00183             $validTypes = array(
00184                 'hidden',
00185                 'text',
00186                 'password',
00187                 'checkbox',
00188                 'radio',
00189                 'file',
00190                 'submit',
00191                 'image',
00192                 'reset',
00193                 'button',
00194 
00195                 // HTML input types
00196                 'datetime',
00197                 'datetime-local',
00198                 'date',
00199                 'month',
00200                 'time',
00201                 'week',
00202                 'number',
00203                 'range',
00204                 'email',
00205                 'url',
00206                 'search',
00207                 'tel',
00208                 'color',
00209             );
00210             if ( isset( $attribs['type'] )
00211             && !in_array( $attribs['type'], $validTypes ) ) {
00212                 unset( $attribs['type'] );
00213             }
00214         }
00215 
00216         // According to standard the default type for <button> elements is "submit".
00217         // Depending on compatibility mode IE might use "button", instead.
00218         // We enforce the standard "submit".
00219         if ( $element == 'button' && !isset( $attribs['type'] ) ) {
00220             $attribs['type'] = 'submit';
00221         }
00222 
00223         return "<$element" . self::expandAttributes(
00224             self::dropDefaults( $element, $attribs ) ) . '>';
00225     }
00226 
00235     public static function closeElement( $element ) {
00236         global $wgWellFormedXml;
00237 
00238         $element = strtolower( $element );
00239 
00240         // Reference:
00241         // http://www.whatwg.org/html/syntax.html#optional-tags
00242         if ( !$wgWellFormedXml && in_array( $element, array(
00243             'html',
00244             'head',
00245             'body',
00246             'li',
00247             'dt',
00248             'dd',
00249             'tr',
00250             'td',
00251             'th',
00252         ) ) ) {
00253             return '';
00254         }
00255         return "</$element>";
00256     }
00257 
00275     private static function dropDefaults( $element, $attribs ) {
00276 
00277         // Whenever altering this array, please provide a covering test case
00278         // in HtmlTest::provideElementsWithAttributesHavingDefaultValues
00279         static $attribDefaults = array(
00280             'area' => array( 'shape' => 'rect' ),
00281             'button' => array(
00282                 'formaction' => 'GET',
00283                 'formenctype' => 'application/x-www-form-urlencoded',
00284             ),
00285             'canvas' => array(
00286                 'height' => '150',
00287                 'width' => '300',
00288             ),
00289             'command' => array( 'type' => 'command' ),
00290             'form' => array(
00291                 'action' => 'GET',
00292                 'autocomplete' => 'on',
00293                 'enctype' => 'application/x-www-form-urlencoded',
00294             ),
00295             'input' => array(
00296                 'formaction' => 'GET',
00297                 'type' => 'text',
00298             ),
00299             'keygen' => array( 'keytype' => 'rsa' ),
00300             'link' => array( 'media' => 'all' ),
00301             'menu' => array( 'type' => 'list' ),
00302             // Note: the use of text/javascript here instead of other JavaScript
00303             // MIME types follows the HTML5 spec.
00304             'script' => array( 'type' => 'text/javascript' ),
00305             'style' => array(
00306                 'media' => 'all',
00307                 'type' => 'text/css',
00308             ),
00309             'textarea' => array( 'wrap' => 'soft' ),
00310         );
00311 
00312         $element = strtolower( $element );
00313 
00314         foreach ( $attribs as $attrib => $value ) {
00315             $lcattrib = strtolower( $attrib );
00316             if ( is_array( $value ) ) {
00317                 $value = implode( ' ', $value );
00318             } else {
00319                 $value = strval( $value );
00320             }
00321 
00322             // Simple checks using $attribDefaults
00323             if ( isset( $attribDefaults[$element][$lcattrib] ) &&
00324             $attribDefaults[$element][$lcattrib] == $value ) {
00325                 unset( $attribs[$attrib] );
00326             }
00327 
00328             if ( $lcattrib == 'class' && $value == '' ) {
00329                 unset( $attribs[$attrib] );
00330             }
00331         }
00332 
00333         // More subtle checks
00334         if ( $element === 'link' && isset( $attribs['type'] )
00335         && strval( $attribs['type'] ) == 'text/css' ) {
00336             unset( $attribs['type'] );
00337         }
00338         if ( $element === 'input' ) {
00339             $type = isset( $attribs['type'] ) ? $attribs['type'] : null;
00340             $value = isset( $attribs['value'] ) ? $attribs['value'] : null;
00341             if ( $type === 'checkbox' || $type === 'radio' ) {
00342                 // The default value for checkboxes and radio buttons is 'on'
00343                 // not ''. By stripping value="" we break radio boxes that
00344                 // actually wants empty values.
00345                 if ( $value === 'on' ) {
00346                     unset( $attribs['value'] );
00347                 }
00348             } elseif ( $type === 'submit' ) {
00349                 // The default value for submit appears to be "Submit" but
00350                 // let's not bother stripping out localized text that matches
00351                 // that.
00352             } else {
00353                 // The default value for nearly every other field type is ''
00354                 // The 'range' and 'color' types use different defaults but
00355                 // stripping a value="" does not hurt them.
00356                 if ( $value === '' ) {
00357                     unset( $attribs['value'] );
00358                 }
00359             }
00360         }
00361         if ( $element === 'select' && isset( $attribs['size'] ) ) {
00362             if ( in_array( 'multiple', $attribs )
00363                 || ( isset( $attribs['multiple'] ) && $attribs['multiple'] !== false )
00364             ) {
00365                 // A multi-select
00366                 if ( strval( $attribs['size'] ) == '4' ) {
00367                     unset( $attribs['size'] );
00368                 }
00369             } else {
00370                 // Single select
00371                 if ( strval( $attribs['size'] ) == '1' ) {
00372                     unset( $attribs['size'] );
00373                 }
00374             }
00375         }
00376 
00377         return $attribs;
00378     }
00379 
00419     public static function expandAttributes( $attribs ) {
00420         global $wgWellFormedXml;
00421 
00422         $ret = '';
00423         $attribs = (array)$attribs;
00424         foreach ( $attribs as $key => $value ) {
00425             // Support intuitive array( 'checked' => true/false ) form
00426             if ( $value === false || is_null( $value ) ) {
00427                 continue;
00428             }
00429 
00430             // For boolean attributes, support array( 'foo' ) instead of
00431             // requiring array( 'foo' => 'meaningless' ).
00432             if ( is_int( $key )
00433             && in_array( strtolower( $value ), self::$boolAttribs ) ) {
00434                 $key = $value;
00435             }
00436 
00437             // Not technically required in HTML5 but we'd like consistency
00438             // and better compression anyway.
00439             $key = strtolower( $key );
00440 
00441             // Bug 23769: Blacklist all form validation attributes for now.  Current
00442             // (June 2010) WebKit has no UI, so the form just refuses to submit
00443             // without telling the user why, which is much worse than failing
00444             // server-side validation.  Opera is the only other implementation at
00445             // this time, and has ugly UI, so just kill the feature entirely until
00446             // we have at least one good implementation.
00447 
00448             // As the default value of "1" for "step" rejects decimal
00449             // numbers to be entered in 'type="number"' fields, allow
00450             // the special case 'step="any"'.
00451 
00452             if ( in_array( $key, array( 'max', 'min', 'pattern', 'required' ) ) ||
00453                  $key === 'step' && $value !== 'any' ) {
00454                 continue;
00455             }
00456 
00457             // http://www.w3.org/TR/html401/index/attributes.html ("space-separated")
00458             // http://www.w3.org/TR/html5/index.html#attributes-1 ("space-separated")
00459             $spaceSeparatedListAttributes = array(
00460                 'class', // html4, html5
00461                 'accesskey', // as of html5, multiple space-separated values allowed
00462                 // html4-spec doesn't document rel= as space-separated
00463                 // but has been used like that and is now documented as such
00464                 // in the html5-spec.
00465                 'rel',
00466             );
00467 
00468             // Specific features for attributes that allow a list of space-separated values
00469             if ( in_array( $key, $spaceSeparatedListAttributes ) ) {
00470                 // Apply some normalization and remove duplicates
00471 
00472                 // Convert into correct array. Array can contain space-separated
00473                 // values. Implode/explode to get those into the main array as well.
00474                 if ( is_array( $value ) ) {
00475                     // If input wasn't an array, we can skip this step
00476                     $newValue = array();
00477                     foreach ( $value as $k => $v ) {
00478                         if ( is_string( $v ) ) {
00479                             // String values should be normal `array( 'foo' )`
00480                             // Just append them
00481                             if ( !isset( $value[$v] ) ) {
00482                                 // As a special case don't set 'foo' if a
00483                                 // separate 'foo' => true/false exists in the array
00484                                 // keys should be authoritative
00485                                 $newValue[] = $v;
00486                             }
00487                         } elseif ( $v ) {
00488                             // If the value is truthy but not a string this is likely
00489                             // an array( 'foo' => true ), falsy values don't add strings
00490                             $newValue[] = $k;
00491                         }
00492                     }
00493                     $value = implode( ' ', $newValue );
00494                 }
00495                 $value = explode( ' ', $value );
00496 
00497                 // Normalize spacing by fixing up cases where people used
00498                 // more than 1 space and/or a trailing/leading space
00499                 $value = array_diff( $value, array( '', ' ' ) );
00500 
00501                 // Remove duplicates and create the string
00502                 $value = implode( ' ', array_unique( $value ) );
00503             }
00504 
00505             // See the "Attributes" section in the HTML syntax part of HTML5,
00506             // 9.1.2.3 as of 2009-08-10.  Most attributes can have quotation
00507             // marks omitted, but not all.  (Although a literal " is not
00508             // permitted, we don't check for that, since it will be escaped
00509             // anyway.)
00510             #
00511             // See also research done on further characters that need to be
00512             // escaped: http://code.google.com/p/html5lib/issues/detail?id=93
00513             $badChars = "\\x00- '=<>`/\x{00a0}\x{1680}\x{180e}\x{180F}\x{2000}\x{2001}"
00514                 . "\x{2002}\x{2003}\x{2004}\x{2005}\x{2006}\x{2007}\x{2008}\x{2009}"
00515                 . "\x{200A}\x{2028}\x{2029}\x{202F}\x{205F}\x{3000}";
00516             if ( $wgWellFormedXml || $value === ''
00517             || preg_match( "![$badChars]!u", $value ) ) {
00518                 $quote = '"';
00519             } else {
00520                 $quote = '';
00521             }
00522 
00523             if ( in_array( $key, self::$boolAttribs ) ) {
00524                 // In HTML5, we can leave the value empty. If we don't need
00525                 // well-formed XML, we can omit the = entirely.
00526                 if ( !$wgWellFormedXml ) {
00527                     $ret .= " $key";
00528                 } else {
00529                     $ret .= " $key=\"\"";
00530                 }
00531             } else {
00532                 // Apparently we need to entity-encode \n, \r, \t, although the
00533                 // spec doesn't mention that.  Since we're doing strtr() anyway,
00534                 // and we don't need <> escaped here, we may as well not call
00535                 // htmlspecialchars().
00536                 // @todo FIXME: Verify that we actually need to
00537                 // escape \n\r\t here, and explain why, exactly.
00538                 #
00539                 // We could call Sanitizer::encodeAttribute() for this, but we
00540                 // don't because we're stubborn and like our marginal savings on
00541                 // byte size from not having to encode unnecessary quotes.
00542                 $map = array(
00543                     '&' => '&amp;',
00544                     '"' => '&quot;',
00545                     "\n" => '&#10;',
00546                     "\r" => '&#13;',
00547                     "\t" => '&#9;'
00548                 );
00549                 if ( $wgWellFormedXml ) {
00550                     // This is allowed per spec: <http://www.w3.org/TR/xml/#NT-AttValue>
00551                     // But reportedly it breaks some XML tools?
00552                     // @todo FIXME: Is this really true?
00553                     $map['<'] = '&lt;';
00554                 }
00555                 $ret .= " $key=$quote" . strtr( $value, $map ) . $quote;
00556             }
00557         }
00558         return $ret;
00559     }
00560 
00570     public static function inlineScript( $contents ) {
00571         global $wgWellFormedXml;
00572 
00573         $attrs = array();
00574 
00575         if ( $wgWellFormedXml && preg_match( '/[<&]/', $contents ) ) {
00576             $contents = "/*<![CDATA[*/$contents/*]]>*/";
00577         }
00578 
00579         return self::rawElement( 'script', $attrs, $contents );
00580     }
00581 
00589     public static function linkedScript( $url ) {
00590         $attrs = array( 'src' => $url );
00591 
00592         return self::element( 'script', $attrs );
00593     }
00594 
00604     public static function inlineStyle( $contents, $media = 'all' ) {
00605         global $wgWellFormedXml;
00606 
00607         if ( $wgWellFormedXml && preg_match( '/[<&]/', $contents ) ) {
00608             $contents = "/*<![CDATA[*/$contents/*]]>*/";
00609         }
00610 
00611         return self::rawElement( 'style', array(
00612             'type' => 'text/css',
00613             'media' => $media,
00614         ), $contents );
00615     }
00616 
00625     public static function linkedStyle( $url, $media = 'all' ) {
00626         return self::element( 'link', array(
00627             'rel' => 'stylesheet',
00628             'href' => $url,
00629             'type' => 'text/css',
00630             'media' => $media,
00631         ) );
00632     }
00633 
00645     public static function input( $name, $value = '', $type = 'text', $attribs = array() ) {
00646         $attribs['type'] = $type;
00647         $attribs['value'] = $value;
00648         $attribs['name'] = $name;
00649 
00650         return self::element( 'input', $attribs );
00651     }
00652 
00662     public static function hidden( $name, $value, $attribs = array() ) {
00663         return self::input( $name, $value, 'hidden', $attribs );
00664     }
00665 
00678     public static function textarea( $name, $value = '', $attribs = array() ) {
00679         $attribs['name'] = $name;
00680 
00681         if ( substr( $value, 0, 1 ) == "\n" ) {
00682             // Workaround for bug 12130: browsers eat the initial newline
00683             // assuming that it's just for show, but they do keep the later
00684             // newlines, which we may want to preserve during editing.
00685             // Prepending a single newline
00686             $spacedValue = "\n" . $value;
00687         } else {
00688             $spacedValue = $value;
00689         }
00690         return self::element( 'textarea', $attribs, $spacedValue );
00691     }
00706     public static function namespaceSelector( array $params = array(), array $selectAttribs = array() ) {
00707         global $wgContLang;
00708 
00709         ksort( $selectAttribs );
00710 
00711         // Is a namespace selected?
00712         if ( isset( $params['selected'] ) ) {
00713             // If string only contains digits, convert to clean int. Selected could also
00714             // be "all" or "" etc. which needs to be left untouched.
00715             // PHP is_numeric() has issues with large strings, PHP ctype_digit has other issues
00716             // and returns false for already clean ints. Use regex instead..
00717             if ( preg_match( '/^\d+$/', $params['selected'] ) ) {
00718                 $params['selected'] = intval( $params['selected'] );
00719             }
00720             // else: leaves it untouched for later processing
00721         } else {
00722             $params['selected'] = '';
00723         }
00724 
00725         if ( !isset( $params['exclude'] ) || !is_array( $params['exclude'] ) ) {
00726             $params['exclude'] = array();
00727         }
00728         if ( !isset( $params['disable'] ) || !is_array( $params['disable'] ) ) {
00729             $params['disable'] = array();
00730         }
00731 
00732         // Associative array between option-values and option-labels
00733         $options = array();
00734 
00735         if ( isset( $params['all'] ) ) {
00736             // add an option that would let the user select all namespaces.
00737             // Value is provided by user, the name shown is localized for the user.
00738             $options[$params['all']] = wfMessage( 'namespacesall' )->text();
00739         }
00740         // Add all namespaces as options (in the content language)
00741         $options += $wgContLang->getFormattedNamespaces();
00742 
00743         // Convert $options to HTML and filter out namespaces below 0
00744         $optionsHtml = array();
00745         foreach ( $options as $nsId => $nsName ) {
00746             if ( $nsId < NS_MAIN || in_array( $nsId, $params['exclude'] ) ) {
00747                 continue;
00748             }
00749             if ( $nsId === NS_MAIN ) {
00750                 // For other namespaces use use the namespace prefix as label, but for
00751                 // main we don't use "" but the user message describing it (e.g. "(Main)" or "(Article)")
00752                 $nsName = wfMessage( 'blanknamespace' )->text();
00753             } elseif ( is_int( $nsId ) ) {
00754                 $nsName = $wgContLang->convertNamespace( $nsId );
00755             }
00756             $optionsHtml[] = Html::element(
00757                 'option', array(
00758                     'disabled' => in_array( $nsId, $params['disable'] ),
00759                     'value' => $nsId,
00760                     'selected' => $nsId === $params['selected'],
00761                 ), $nsName
00762             );
00763         }
00764 
00765         if ( !array_key_exists( 'id', $selectAttribs ) ) {
00766             $selectAttribs['id'] = 'namespace';
00767         }
00768 
00769         if ( !array_key_exists( 'name', $selectAttribs ) ) {
00770             $selectAttribs['name'] = 'namespace';
00771         }
00772 
00773         $ret = '';
00774         if ( isset( $params['label'] ) ) {
00775             $ret .= Html::element(
00776                 'label', array(
00777                     'for' => isset( $selectAttribs['id'] ) ? $selectAttribs['id'] : null,
00778                 ), $params['label']
00779             ) . '&#160;';
00780         }
00781 
00782         // Wrap options in a <select>
00783         $ret .= Html::openElement( 'select', $selectAttribs )
00784             . "\n"
00785             . implode( "\n", $optionsHtml )
00786             . "\n"
00787             . Html::closeElement( 'select' );
00788 
00789         return $ret;
00790     }
00791 
00800     public static function htmlHeader( $attribs = array() ) {
00801         $ret = '';
00802 
00803         global $wgHtml5Version, $wgMimeType, $wgXhtmlNamespaces;
00804 
00805         $isXHTML = self::isXmlMimeType( $wgMimeType );
00806 
00807         if ( $isXHTML ) { // XHTML5
00808             // XML mimetyped markup should have an xml header.
00809             // However a DOCTYPE is not needed.
00810             $ret .= "<?xml version=\"1.0\" encoding=\"UTF-8\" ?" . ">\n";
00811 
00812             // Add the standard xmlns
00813             $attribs['xmlns'] = 'http://www.w3.org/1999/xhtml';
00814 
00815             // And support custom namespaces
00816             foreach ( $wgXhtmlNamespaces as $tag => $ns ) {
00817                 $attribs["xmlns:$tag"] = $ns;
00818             }
00819         } else { // HTML5
00820             // DOCTYPE
00821             $ret .= "<!DOCTYPE html>\n";
00822         }
00823 
00824         if ( $wgHtml5Version ) {
00825             $attribs['version'] = $wgHtml5Version;
00826         }
00827 
00828         $html = Html::openElement( 'html', $attribs );
00829 
00830         if ( $html ) {
00831             $html .= "\n";
00832         }
00833 
00834         $ret .= $html;
00835 
00836         return $ret;
00837     }
00838 
00845     public static function isXmlMimeType( $mimetype ) {
00846         # http://www.whatwg.org/html/infrastructure.html#xml-mime-type
00847         # * text/xml
00848         # * application/xml
00849         # * Any mimetype with a subtype ending in +xml (this implicitly includes application/xhtml+xml)
00850         return (bool)preg_match( '!^(text|application)/xml$|^.+/.+\+xml$!', $mimetype );
00851     }
00852 
00864     static function infoBox( $text, $icon, $alt, $class = false, $useStylePath = true ) {
00865         global $wgStylePath;
00866 
00867         if ( $useStylePath ) {
00868             $icon = $wgStylePath . '/common/images/' . $icon;
00869         }
00870 
00871         $s = Html::openElement( 'div', array( 'class' => "mw-infobox $class" ) );
00872 
00873         $s .= Html::openElement( 'div', array( 'class' => 'mw-infobox-left' ) ) .
00874                 Html::element( 'img',
00875                     array(
00876                         'src' => $icon,
00877                         'alt' => $alt,
00878                     )
00879                 ) .
00880                 Html::closeElement( 'div' );
00881 
00882         $s .= Html::openElement( 'div', array( 'class' => 'mw-infobox-right' ) ) .
00883                 $text .
00884                 Html::closeElement( 'div' );
00885         $s .= Html::element( 'div', array( 'style' => 'clear: left;' ), ' ' );
00886 
00887         $s .= Html::closeElement( 'div' );
00888 
00889         $s .= Html::element( 'div', array( 'style' => 'clear: left;' ), ' ' );
00890 
00891         return $s;
00892     }
00893 
00902     static function srcSet( $urls ) {
00903         $candidates = array();
00904         foreach ( $urls as $density => $url ) {
00905             // Image candidate syntax per current whatwg live spec, 2012-09-23:
00906             // http://www.whatwg.org/html/embedded-content-1.html#attr-img-srcset
00907             $candidates[] = "{$url} {$density}x";
00908         }
00909         return implode( ", ", $candidates );
00910     }
00911 }