MediaWiki  REL1_21
Tidy.php
Go to the documentation of this file.
00001 <?php
00036 class MWTidyWrapper {
00037 
00041         protected $mTokens;
00042 
00043         protected $mUniqPrefix;
00044 
00045         protected $mMarkerIndex;
00046 
00047         public function __construct() {
00048                 $this->mTokens = null;
00049                 $this->mUniqPrefix = null;
00050         }
00051 
00056         public function getWrapped( $text ) {
00057                 $this->mTokens = new ReplacementArray;
00058                 $this->mUniqPrefix = "\x7fUNIQ" .
00059                         dechex( mt_rand( 0, 0x7fffffff ) ) . dechex( mt_rand( 0, 0x7fffffff ) );
00060                 $this->mMarkerIndex = 0;
00061 
00062                 // Replace <mw:editsection> elements with placeholders
00063                 $wrappedtext = preg_replace_callback( ParserOutput::EDITSECTION_REGEX,
00064                         array( &$this, 'replaceEditSectionLinksCallback' ), $text );
00065 
00066                 // Modify inline Microdata <link> and <meta> elements so they say <html-link> and <html-meta> so
00067                 // we can trick Tidy into not stripping them out by including them in tidy's new-empty-tags config
00068                 $wrappedtext = preg_replace( '!<(link|meta)([^>]*?)(/{0,1}>)!', '<html-$1$2$3', $wrappedtext );
00069 
00070                 // Wrap the whole thing in a doctype and body for Tidy.
00071                 $wrappedtext = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"' .
00072                         ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html>' .
00073                         '<head><title>test</title></head><body>' . $wrappedtext . '</body></html>';
00074 
00075                 return $wrappedtext;
00076         }
00077 
00083         function replaceEditSectionLinksCallback( $m ) {
00084                 $marker = "{$this->mUniqPrefix}-item-{$this->mMarkerIndex}" . Parser::MARKER_SUFFIX;
00085                 $this->mMarkerIndex++;
00086                 $this->mTokens->setPair( $marker, $m[0] );
00087                 return $marker;
00088         }
00089 
00094         public function postprocess( $text ) {
00095                 // Revert <html-{link,meta}> back to <{link,meta}>
00096                 $text = preg_replace( '!<html-(link|meta)([^>]*?)(/{0,1}>)!', '<$1$2$3', $text );
00097 
00098                 // Restore the contents of placeholder tokens
00099                 $text = $this->mTokens->replace( $text );
00100 
00101                 return $text;
00102         }
00103 
00104 }
00105 
00115 class MWTidy {
00124         public static function tidy( $text ) {
00125                 global $wgTidyInternal;
00126 
00127                 $wrapper = new MWTidyWrapper;
00128                 $wrappedtext = $wrapper->getWrapped( $text );
00129 
00130                 $retVal = null;
00131                 if ( $wgTidyInternal ) {
00132                         $correctedtext = self::execInternalTidy( $wrappedtext, false, $retVal );
00133                 } else {
00134                         $correctedtext = self::execExternalTidy( $wrappedtext, false, $retVal );
00135                 }
00136 
00137                 if ( $retVal < 0 ) {
00138                         wfDebug( "Possible tidy configuration error!\n" );
00139                         return $text . "\n<!-- Tidy was unable to run -->\n";
00140                 } elseif ( is_null( $correctedtext ) ) {
00141                         wfDebug( "Tidy error detected!\n" );
00142                         return $text . "\n<!-- Tidy found serious XHTML errors -->\n";
00143                 }
00144 
00145                 $correctedtext = $wrapper->postprocess( $correctedtext ); // restore any hidden tokens
00146 
00147                 return $correctedtext;
00148         }
00149 
00157         public static function checkErrors( $text, &$errorStr = null ) {
00158                 global $wgTidyInternal;
00159 
00160                 $retval = 0;
00161                 if( $wgTidyInternal ) {
00162                         $errorStr = self::execInternalTidy( $text, true, $retval );
00163                 } else {
00164                         $errorStr = self::execExternalTidy( $text, true, $retval );
00165                 }
00166 
00167                 return ( $retval < 0 && $errorStr == '' ) || $retval == 0;
00168         }
00169 
00179         private static function execExternalTidy( $text, $stderr = false, &$retval = null ) {
00180                 global $wgTidyConf, $wgTidyBin, $wgTidyOpts;
00181                 wfProfileIn( __METHOD__ );
00182 
00183                 $cleansource = '';
00184                 $opts = ' -utf8';
00185 
00186                 if ( $stderr ) {
00187                         $descriptorspec = array(
00188                                 0 => array( 'pipe', 'r' ),
00189                                 1 => array( 'file', wfGetNull(), 'a' ),
00190                                 2 => array( 'pipe', 'w' )
00191                         );
00192                 } else {
00193                         $descriptorspec = array(
00194                                 0 => array( 'pipe', 'r' ),
00195                                 1 => array( 'pipe', 'w' ),
00196                                 2 => array( 'file', wfGetNull(), 'a' )
00197                         );
00198                 }
00199 
00200                 $readpipe = $stderr ? 2 : 1;
00201                 $pipes = array();
00202 
00203                 $process = proc_open(
00204                         "$wgTidyBin -config $wgTidyConf $wgTidyOpts$opts", $descriptorspec, $pipes );
00205 
00206                 if ( is_resource( $process ) ) {
00207                         // Theoretically, this style of communication could cause a deadlock
00208                         // here. If the stdout buffer fills up, then writes to stdin could
00209                         // block. This doesn't appear to happen with tidy, because tidy only
00210                         // writes to stdout after it's finished reading from stdin. Search
00211                         // for tidyParseStdin and tidySaveStdout in console/tidy.c
00212                         fwrite( $pipes[0], $text );
00213                         fclose( $pipes[0] );
00214                         while ( !feof( $pipes[$readpipe] ) ) {
00215                                 $cleansource .= fgets( $pipes[$readpipe], 1024 );
00216                         }
00217                         fclose( $pipes[$readpipe] );
00218                         $retval = proc_close( $process );
00219                 } else {
00220                         wfWarn( "Unable to start external tidy process" );
00221                         $retval = -1;
00222                 }
00223 
00224                 if ( !$stderr && $cleansource == '' && $text != '' ) {
00225                         // Some kind of error happened, so we couldn't get the corrected text.
00226                         // Just give up; we'll use the source text and append a warning.
00227                         $cleansource = null;
00228                 }
00229 
00230                 wfProfileOut( __METHOD__ );
00231                 return $cleansource;
00232         }
00233 
00243         private static function execInternalTidy( $text, $stderr = false, &$retval = null ) {
00244                 global $wgTidyConf, $wgDebugTidy;
00245                 wfProfileIn( __METHOD__ );
00246 
00247                 if ( !MWInit::classExists( 'tidy' ) ) {
00248                         wfWarn( "Unable to load internal tidy class." );
00249                         $retval = -1;
00250 
00251                         wfProfileOut( __METHOD__ );
00252                         return null;
00253                 }
00254 
00255                 $tidy = new tidy;
00256                 $tidy->parseString( $text, $wgTidyConf, 'utf8' );
00257 
00258                 if ( $stderr ) {
00259                         $retval = $tidy->getStatus();
00260 
00261                         wfProfileOut( __METHOD__ );
00262                         return $tidy->errorBuffer;
00263                 }
00264 
00265                 $tidy->cleanRepair();
00266                 $retval = $tidy->getStatus();
00267                 if ( $retval == 2 ) {
00268                         // 2 is magic number for fatal error
00269                         // http://www.php.net/manual/en/function.tidy-get-status.php
00270                         $cleansource = null;
00271                 } else {
00272                         $cleansource = tidy_get_output( $tidy );
00273                         if ( $wgDebugTidy && $retval > 0 ) {
00274                                 $cleansource .= "<!--\nTidy reports:\n" .
00275                                         str_replace( '-->', '--&gt;', $tidy->errorBuffer ) .
00276                                         "\n-->";
00277                         }
00278                 }
00279 
00280                 wfProfileOut( __METHOD__ );
00281                 return $cleansource;
00282         }
00283 }