[ Index ] |
PHP Cross Reference of MediaWiki-1.24.0 |
[Summary view] [Print] [Text view]
1 <?php 2 /** 3 * HTML validation and correction 4 * 5 * This program is free software; you can redistribute it and/or modify 6 * it under the terms of the GNU General Public License as published by 7 * the Free Software Foundation; either version 2 of the License, or 8 * (at your option) any later version. 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License along 16 * with this program; if not, write to the Free Software Foundation, Inc., 17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 18 * http://www.gnu.org/copyleft/gpl.html 19 * 20 * @file 21 * @ingroup Parser 22 */ 23 24 /** 25 * Class used to hide mw:editsection tokens from Tidy so that it doesn't break them 26 * or break on them. This is a bit of a hack for now, but hopefully in the future 27 * we may create a real postprocessor or something that will replace this. 28 * It's called wrapper because for now it basically takes over MWTidy::tidy's task 29 * of wrapping the text in a xhtml block 30 * 31 * This re-uses some of the parser's UNIQ tricks, though some of it is private so it's 32 * duplicated. Perhaps we should create an abstract marker hiding class. 33 * 34 * @ingroup Parser 35 */ 36 class MWTidyWrapper { 37 38 /** 39 * @var ReplacementArray 40 */ 41 protected $mTokens; 42 43 protected $mUniqPrefix; 44 45 protected $mMarkerIndex; 46 47 public function __construct() { 48 $this->mTokens = null; 49 $this->mUniqPrefix = null; 50 } 51 52 /** 53 * @param string $text 54 * @return string 55 */ 56 public function getWrapped( $text ) { 57 $this->mTokens = new ReplacementArray; 58 $this->mUniqPrefix = "\x7fUNIQ" . 59 dechex( mt_rand( 0, 0x7fffffff ) ) . dechex( mt_rand( 0, 0x7fffffff ) ); 60 $this->mMarkerIndex = 0; 61 62 // Replace <mw:editsection> elements with placeholders 63 $wrappedtext = preg_replace_callback( ParserOutput::EDITSECTION_REGEX, 64 array( &$this, 'replaceCallback' ), $text ); 65 // ...and <mw:toc> markers 66 $wrappedtext = preg_replace_callback( '/\<\\/?mw:toc\>/', 67 array( &$this, 'replaceCallback' ), $wrappedtext ); 68 // ... and <math> tags 69 $wrappedtext = preg_replace_callback( '/\<math(.*?)\<\\/math\>/s', 70 array( &$this, 'replaceCallback' ), $wrappedtext ); 71 // Modify inline Microdata <link> and <meta> elements so they say <html-link> and <html-meta> so 72 // we can trick Tidy into not stripping them out by including them in tidy's new-empty-tags config 73 $wrappedtext = preg_replace( '!<(link|meta)([^>]*?)(/{0,1}>)!', '<html-$1$2$3', $wrappedtext ); 74 75 // Wrap the whole thing in a doctype and body for Tidy. 76 $wrappedtext = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"' . 77 ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html>' . 78 '<head><title>test</title></head><body>' . $wrappedtext . '</body></html>'; 79 80 return $wrappedtext; 81 } 82 83 /** 84 * @param array $m 85 * 86 * @return string 87 */ 88 public function replaceCallback( $m ) { 89 $marker = "{$this->mUniqPrefix}-item-{$this->mMarkerIndex}" . Parser::MARKER_SUFFIX; 90 $this->mMarkerIndex++; 91 $this->mTokens->setPair( $marker, $m[0] ); 92 return $marker; 93 } 94 95 /** 96 * @param string $text 97 * @return string 98 */ 99 public function postprocess( $text ) { 100 // Revert <html-{link,meta}> back to <{link,meta}> 101 $text = preg_replace( '!<html-(link|meta)([^>]*?)(/{0,1}>)!', '<$1$2$3', $text ); 102 103 // Restore the contents of placeholder tokens 104 $text = $this->mTokens->replace( $text ); 105 106 return $text; 107 } 108 109 } 110 111 /** 112 * Class to interact with HTML tidy 113 * 114 * Either the external tidy program or the in-process tidy extension 115 * will be used depending on availability. Override the default 116 * $wgTidyInternal setting to disable the internal if it's not working. 117 * 118 * @ingroup Parser 119 */ 120 class MWTidy { 121 /** 122 * Interface with html tidy, used if $wgUseTidy = true. 123 * If tidy isn't able to correct the markup, the original will be 124 * returned in all its glory with a warning comment appended. 125 * 126 * @param string $text Hideous HTML input 127 * @return string Corrected HTML output 128 */ 129 public static function tidy( $text ) { 130 global $wgTidyInternal; 131 132 $wrapper = new MWTidyWrapper; 133 $wrappedtext = $wrapper->getWrapped( $text ); 134 135 $retVal = null; 136 if ( $wgTidyInternal ) { 137 $correctedtext = self::execInternalTidy( $wrappedtext, false, $retVal ); 138 } else { 139 $correctedtext = self::execExternalTidy( $wrappedtext, false, $retVal ); 140 } 141 142 if ( $retVal < 0 ) { 143 wfDebug( "Possible tidy configuration error!\n" ); 144 return $text . "\n<!-- Tidy was unable to run -->\n"; 145 } elseif ( is_null( $correctedtext ) ) { 146 wfDebug( "Tidy error detected!\n" ); 147 return $text . "\n<!-- Tidy found serious XHTML errors -->\n"; 148 } 149 150 $correctedtext = $wrapper->postprocess( $correctedtext ); // restore any hidden tokens 151 152 return $correctedtext; 153 } 154 155 /** 156 * Check HTML for errors, used if $wgValidateAllHtml = true. 157 * 158 * @param string $text 159 * @param string &$errorStr Return the error string 160 * @return bool Whether the HTML is valid 161 */ 162 public static function checkErrors( $text, &$errorStr = null ) { 163 global $wgTidyInternal; 164 165 $retval = 0; 166 if ( $wgTidyInternal ) { 167 $errorStr = self::execInternalTidy( $text, true, $retval ); 168 } else { 169 $errorStr = self::execExternalTidy( $text, true, $retval ); 170 } 171 172 return ( $retval < 0 && $errorStr == '' ) || $retval == 0; 173 } 174 175 /** 176 * Spawn an external HTML tidy process and get corrected markup back from it. 177 * Also called in OutputHandler.php for full page validation 178 * 179 * @param string $text HTML to check 180 * @param bool $stderr Whether to read result from STDERR rather than STDOUT 181 * @param int &$retval Exit code (-1 on internal error) 182 * @return string|null 183 */ 184 private static function execExternalTidy( $text, $stderr = false, &$retval = null ) { 185 global $wgTidyConf, $wgTidyBin, $wgTidyOpts; 186 wfProfileIn( __METHOD__ ); 187 188 $cleansource = ''; 189 $opts = ' -utf8'; 190 191 if ( $stderr ) { 192 $descriptorspec = array( 193 0 => array( 'pipe', 'r' ), 194 1 => array( 'file', wfGetNull(), 'a' ), 195 2 => array( 'pipe', 'w' ) 196 ); 197 } else { 198 $descriptorspec = array( 199 0 => array( 'pipe', 'r' ), 200 1 => array( 'pipe', 'w' ), 201 2 => array( 'file', wfGetNull(), 'a' ) 202 ); 203 } 204 205 $readpipe = $stderr ? 2 : 1; 206 $pipes = array(); 207 208 $process = proc_open( 209 "$wgTidyBin -config $wgTidyConf $wgTidyOpts$opts", $descriptorspec, $pipes ); 210 211 //NOTE: At least on linux, the process will be created even if tidy is not installed. 212 // This means that missing tidy will be treated as a validation failure. 213 214 if ( is_resource( $process ) ) { 215 // Theoretically, this style of communication could cause a deadlock 216 // here. If the stdout buffer fills up, then writes to stdin could 217 // block. This doesn't appear to happen with tidy, because tidy only 218 // writes to stdout after it's finished reading from stdin. Search 219 // for tidyParseStdin and tidySaveStdout in console/tidy.c 220 fwrite( $pipes[0], $text ); 221 fclose( $pipes[0] ); 222 while ( !feof( $pipes[$readpipe] ) ) { 223 $cleansource .= fgets( $pipes[$readpipe], 1024 ); 224 } 225 fclose( $pipes[$readpipe] ); 226 $retval = proc_close( $process ); 227 } else { 228 wfWarn( "Unable to start external tidy process" ); 229 $retval = -1; 230 } 231 232 if ( !$stderr && $cleansource == '' && $text != '' ) { 233 // Some kind of error happened, so we couldn't get the corrected text. 234 // Just give up; we'll use the source text and append a warning. 235 $cleansource = null; 236 } 237 238 wfProfileOut( __METHOD__ ); 239 return $cleansource; 240 } 241 242 /** 243 * Use the HTML tidy extension to use the tidy library in-process, 244 * saving the overhead of spawning a new process. 245 * 246 * @param string $text HTML to check 247 * @param bool $stderr Whether to read result from error status instead of output 248 * @param int &$retval Exit code (-1 on internal error) 249 * @return string|null 250 */ 251 private static function execInternalTidy( $text, $stderr = false, &$retval = null ) { 252 global $wgTidyConf, $wgDebugTidy; 253 wfProfileIn( __METHOD__ ); 254 255 if ( !class_exists( 'tidy' ) ) { 256 wfWarn( "Unable to load internal tidy class." ); 257 $retval = -1; 258 259 wfProfileOut( __METHOD__ ); 260 return null; 261 } 262 263 $tidy = new tidy; 264 $tidy->parseString( $text, $wgTidyConf, 'utf8' ); 265 266 if ( $stderr ) { 267 $retval = $tidy->getStatus(); 268 269 wfProfileOut( __METHOD__ ); 270 return $tidy->errorBuffer; 271 } 272 273 $tidy->cleanRepair(); 274 $retval = $tidy->getStatus(); 275 if ( $retval == 2 ) { 276 // 2 is magic number for fatal error 277 // http://www.php.net/manual/en/function.tidy-get-status.php 278 $cleansource = null; 279 } else { 280 $cleansource = tidy_get_output( $tidy ); 281 if ( $wgDebugTidy && $retval > 0 ) { 282 $cleansource .= "<!--\nTidy reports:\n" . 283 str_replace( '-->', '-->', $tidy->errorBuffer ) . 284 "\n-->"; 285 } 286 } 287 288 wfProfileOut( __METHOD__ ); 289 return $cleansource; 290 } 291 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
Generated: Fri Nov 28 14:03:12 2014 | Cross-referenced by PHPXref 0.7.1 |