MediaWiki  REL1_20
Tidy.php
Go to the documentation of this file.
00001 <?php
00036 class MWTidyWrapper {
00037 
00041         protected $mTokens;
00042 
00043         protected $mUniqPrefix;
00044 
00045         protected $mMarkerIndex;
00046 
00047         public function __construct() {
00048                 $this->mTokens = null;
00049                 $this->mUniqPrefix = null;
00050         }
00051 
00056         public function getWrapped( $text ) {
00057                 $this->mTokens = new ReplacementArray;
00058                 $this->mUniqPrefix = "\x7fUNIQ" .
00059                         dechex( mt_rand( 0, 0x7fffffff ) ) . dechex( mt_rand( 0, 0x7fffffff ) );
00060                 $this->mMarkerIndex = 0;
00061 
00062                 $wrappedtext = preg_replace_callback( ParserOutput::EDITSECTION_REGEX,
00063                         array( &$this, 'replaceEditSectionLinksCallback' ), $text );
00064 
00065                 $wrappedtext = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"'.
00066                         ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html>'.
00067                         '<head><title>test</title></head><body>'.$wrappedtext.'</body></html>';
00068 
00069                 return $wrappedtext;
00070         }
00071 
00077         function replaceEditSectionLinksCallback( $m ) {
00078                 $marker = "{$this->mUniqPrefix}-item-{$this->mMarkerIndex}" . Parser::MARKER_SUFFIX;
00079                 $this->mMarkerIndex++;
00080                 $this->mTokens->setPair( $marker, $m[0] );
00081                 return $marker;
00082         }
00083 
00088         public function postprocess( $text ) {
00089                 return $this->mTokens->replace( $text );
00090         }
00091 
00092 }
00093 
00103 class MWTidy {
00112         public static function tidy( $text ) {
00113                 global $wgTidyInternal;
00114 
00115                 $wrapper = new MWTidyWrapper;
00116                 $wrappedtext = $wrapper->getWrapped( $text );
00117 
00118                 $retVal = null;
00119                 if ( $wgTidyInternal ) {
00120                         $correctedtext = self::execInternalTidy( $wrappedtext, false, $retVal );
00121                 } else {
00122                         $correctedtext = self::execExternalTidy( $wrappedtext, false, $retVal );
00123                 }
00124 
00125                 if ( $retVal < 0 ) {
00126                         wfDebug( "Possible tidy configuration error!\n" );
00127                         return $text . "\n<!-- Tidy was unable to run -->\n";
00128                 } elseif ( is_null( $correctedtext ) ) {
00129                         wfDebug( "Tidy error detected!\n" );
00130                         return $text . "\n<!-- Tidy found serious XHTML errors -->\n";
00131                 }
00132 
00133                 $correctedtext = $wrapper->postprocess( $correctedtext ); // restore any hidden tokens
00134 
00135                 return $correctedtext;
00136         }
00137 
00145         public static function checkErrors( $text, &$errorStr = null ) {
00146                 global $wgTidyInternal;
00147 
00148                 $retval = 0;
00149                 if( $wgTidyInternal ) {
00150                         $errorStr = self::execInternalTidy( $text, true, $retval );
00151                 } else {
00152                         $errorStr = self::execExternalTidy( $text, true, $retval );
00153                 }
00154 
00155                 return ( $retval < 0 && $errorStr == '' ) || $retval == 0;
00156         }
00157 
00167         private static function execExternalTidy( $text, $stderr = false, &$retval = null ) {
00168                 global $wgTidyConf, $wgTidyBin, $wgTidyOpts;
00169                 wfProfileIn( __METHOD__ );
00170 
00171                 $cleansource = '';
00172                 $opts = ' -utf8';
00173 
00174                 if ( $stderr ) {
00175                         $descriptorspec = array(
00176                                 0 => array( 'pipe', 'r' ),
00177                                 1 => array( 'file', wfGetNull(), 'a' ),
00178                                 2 => array( 'pipe', 'w' )
00179                         );
00180                 } else {
00181                         $descriptorspec = array(
00182                                 0 => array( 'pipe', 'r' ),
00183                                 1 => array( 'pipe', 'w' ),
00184                                 2 => array( 'file', wfGetNull(), 'a' )
00185                         );
00186                 }
00187 
00188                 $readpipe = $stderr ? 2 : 1;
00189                 $pipes = array();
00190 
00191                 $process = proc_open(
00192                         "$wgTidyBin -config $wgTidyConf $wgTidyOpts$opts", $descriptorspec, $pipes );
00193 
00194                 if ( is_resource( $process ) ) {
00195                         // Theoretically, this style of communication could cause a deadlock
00196                         // here. If the stdout buffer fills up, then writes to stdin could
00197                         // block. This doesn't appear to happen with tidy, because tidy only
00198                         // writes to stdout after it's finished reading from stdin. Search
00199                         // for tidyParseStdin and tidySaveStdout in console/tidy.c
00200                         fwrite( $pipes[0], $text );
00201                         fclose( $pipes[0] );
00202                         while ( !feof( $pipes[$readpipe] ) ) {
00203                                 $cleansource .= fgets( $pipes[$readpipe], 1024 );
00204                         }
00205                         fclose( $pipes[$readpipe] );
00206                         $retval = proc_close( $process );
00207                 } else {
00208                         wfWarn( "Unable to start external tidy process" );
00209                         $retval = -1;
00210                 }
00211 
00212                 if ( !$stderr && $cleansource == '' && $text != '' ) {
00213                         // Some kind of error happened, so we couldn't get the corrected text.
00214                         // Just give up; we'll use the source text and append a warning.
00215                         $cleansource = null;
00216                 }
00217 
00218                 wfProfileOut( __METHOD__ );
00219                 return $cleansource;
00220         }
00221 
00231         private static function execInternalTidy( $text, $stderr = false, &$retval = null ) {
00232                 global $wgTidyConf, $wgDebugTidy;
00233                 wfProfileIn( __METHOD__ );
00234 
00235                 if ( !MWInit::classExists( 'tidy' ) ) {
00236                         wfWarn( "Unable to load internal tidy class." );
00237                         $retval = -1;
00238 
00239                         wfProfileOut( __METHOD__ );
00240                         return null;
00241                 }
00242 
00243                 $tidy = new tidy;
00244                 $tidy->parseString( $text, $wgTidyConf, 'utf8' );
00245 
00246                 if ( $stderr ) {
00247                         $retval = $tidy->getStatus();
00248 
00249                         wfProfileOut( __METHOD__ );
00250                         return $tidy->errorBuffer;
00251                 } else {
00252                         $tidy->cleanRepair();
00253                         $retval = $tidy->getStatus();
00254                         if ( $retval == 2 ) {
00255                                 // 2 is magic number for fatal error
00256                                 // http://www.php.net/manual/en/function.tidy-get-status.php
00257                                 $cleansource = null;
00258                         } else {
00259                                 $cleansource = tidy_get_output( $tidy );
00260                                 if ( $wgDebugTidy && $retval > 0 ) {
00261                                         $cleansource .= "<!--\nTidy reports:\n" .
00262                                                 str_replace( '-->', '--&gt;', $tidy->errorBuffer ) .
00263                                                 "\n-->";
00264                                 }
00265                         }
00266 
00267                         wfProfileOut( __METHOD__ );
00268                         return $cleansource;
00269                 }
00270         }
00271 }