MediaWiki  REL1_19
Tidy.php
Go to the documentation of this file.
00001 <?php
00018 class MWTidyWrapper {
00019 
00023         protected $mTokens;
00024 
00025         protected $mUniqPrefix;
00026 
00027         protected $mMarkerIndex;
00028 
00029         public function __construct() {
00030                 $this->mTokens = null;
00031                 $this->mUniqPrefix = null;
00032         }
00033 
00038         public function getWrapped( $text ) {
00039                 $this->mTokens = new ReplacementArray;
00040                 $this->mUniqPrefix = "\x7fUNIQ" .
00041                         dechex( mt_rand( 0, 0x7fffffff ) ) . dechex( mt_rand( 0, 0x7fffffff ) );
00042                 $this->mMarkerIndex = 0;
00043 
00044                 $wrappedtext = preg_replace_callback( ParserOutput::EDITSECTION_REGEX,
00045                         array( &$this, 'replaceEditSectionLinksCallback' ), $text );
00046 
00047                 $wrappedtext = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"'.
00048                         ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html>'.
00049                         '<head><title>test</title></head><body>'.$wrappedtext.'</body></html>';
00050 
00051                 return $wrappedtext;
00052         }
00053 
00059         function replaceEditSectionLinksCallback( $m ) {
00060                 $marker = "{$this->mUniqPrefix}-item-{$this->mMarkerIndex}" . Parser::MARKER_SUFFIX;
00061                 $this->mMarkerIndex++;
00062                 $this->mTokens->setPair( $marker, $m[0] );
00063                 return $marker;
00064         }
00065 
00070         public function postprocess( $text ) {
00071                 return $this->mTokens->replace( $text );
00072         }
00073 
00074 }
00075 
00085 class MWTidy {
00094         public static function tidy( $text ) {
00095                 global $wgTidyInternal;
00096 
00097                 $wrapper = new MWTidyWrapper;
00098                 $wrappedtext = $wrapper->getWrapped( $text );
00099 
00100                 $retVal = null;
00101                 if ( $wgTidyInternal ) {
00102                         $correctedtext = self::execInternalTidy( $wrappedtext, false, $retVal );
00103                 } else {
00104                         $correctedtext = self::execExternalTidy( $wrappedtext, false, $retVal );
00105                 }
00106 
00107                 if ( $retVal < 0 ) {
00108                         wfDebug( "Possible tidy configuration error!\n" );
00109                         return $text . "\n<!-- Tidy was unable to run -->\n";
00110                 } elseif ( is_null( $correctedtext ) ) {
00111                         wfDebug( "Tidy error detected!\n" );
00112                         return $text . "\n<!-- Tidy found serious XHTML errors -->\n";
00113                 }
00114 
00115                 $correctedtext = $wrapper->postprocess( $correctedtext ); // restore any hidden tokens
00116 
00117                 return $correctedtext;
00118         }
00119 
00127         public static function checkErrors( $text, &$errorStr = null ) {
00128                 global $wgTidyInternal;
00129 
00130                 $retval = 0;
00131                 if( $wgTidyInternal ) {
00132                         $errorStr = self::execInternalTidy( $text, true, $retval );
00133                 } else {
00134                         $errorStr = self::execExternalTidy( $text, true, $retval );
00135                 }
00136 
00137                 return ( $retval < 0 && $errorStr == '' ) || $retval == 0;
00138         }
00139 
00149         private static function execExternalTidy( $text, $stderr = false, &$retval = null ) {
00150                 global $wgTidyConf, $wgTidyBin, $wgTidyOpts;
00151                 wfProfileIn( __METHOD__ );
00152 
00153                 $cleansource = '';
00154                 $opts = ' -utf8';
00155 
00156                 if ( $stderr ) {
00157                         $descriptorspec = array(
00158                                 0 => array( 'pipe', 'r' ),
00159                                 1 => array( 'file', wfGetNull(), 'a' ),
00160                                 2 => array( 'pipe', 'w' )
00161                         );
00162                 } else {
00163                         $descriptorspec = array(
00164                                 0 => array( 'pipe', 'r' ),
00165                                 1 => array( 'pipe', 'w' ),
00166                                 2 => array( 'file', wfGetNull(), 'a' )
00167                         );
00168                 }
00169 
00170                 $readpipe = $stderr ? 2 : 1;
00171                 $pipes = array();
00172 
00173                 $process = proc_open(
00174                         "$wgTidyBin -config $wgTidyConf $wgTidyOpts$opts", $descriptorspec, $pipes );
00175 
00176                 if ( is_resource( $process ) ) {
00177                         // Theoretically, this style of communication could cause a deadlock
00178                         // here. If the stdout buffer fills up, then writes to stdin could
00179                         // block. This doesn't appear to happen with tidy, because tidy only
00180                         // writes to stdout after it's finished reading from stdin. Search
00181                         // for tidyParseStdin and tidySaveStdout in console/tidy.c
00182                         fwrite( $pipes[0], $text );
00183                         fclose( $pipes[0] );
00184                         while ( !feof( $pipes[$readpipe] ) ) {
00185                                 $cleansource .= fgets( $pipes[$readpipe], 1024 );
00186                         }
00187                         fclose( $pipes[$readpipe] );
00188                         $retval = proc_close( $process );
00189                 } else {
00190                         wfWarn( "Unable to start external tidy process" );
00191                         $retval = -1;
00192                 }
00193 
00194                 if ( !$stderr && $cleansource == '' && $text != '' ) {
00195                         // Some kind of error happened, so we couldn't get the corrected text.
00196                         // Just give up; we'll use the source text and append a warning.
00197                         $cleansource = null;
00198                 }
00199 
00200                 wfProfileOut( __METHOD__ );
00201                 return $cleansource;
00202         }
00203 
00213         private static function execInternalTidy( $text, $stderr = false, &$retval = null ) {
00214                 global $wgTidyConf, $wgDebugTidy;
00215                 wfProfileIn( __METHOD__ );
00216 
00217                 if ( !MWInit::classExists( 'tidy' ) ) {
00218                         wfWarn( "Unable to load internal tidy class." );
00219                         $retval = -1;
00220 
00221                         wfProfileOut( __METHOD__ );
00222                         return null;
00223                 }
00224 
00225                 $tidy = new tidy;
00226                 $tidy->parseString( $text, $wgTidyConf, 'utf8' );
00227 
00228                 if ( $stderr ) {
00229                         $retval = $tidy->getStatus();
00230 
00231                         wfProfileOut( __METHOD__ );
00232                         return $tidy->errorBuffer;
00233                 } else {
00234                         $tidy->cleanRepair();
00235                         $retval = $tidy->getStatus();
00236                         if ( $retval == 2 ) {
00237                                 // 2 is magic number for fatal error
00238                                 // http://www.php.net/manual/en/function.tidy-get-status.php
00239                                 $cleansource = null;
00240                         } else {
00241                                 $cleansource = tidy_get_output( $tidy );
00242                                 if ( $wgDebugTidy && $retval > 0 ) {
00243                                         $cleansource .= "<!--\nTidy reports:\n" .
00244                                                 str_replace( '-->', '--&gt;', $tidy->errorBuffer ) .
00245                                                 "\n-->";
00246                                 }
00247                         }
00248 
00249                         wfProfileOut( __METHOD__ );
00250                         return $cleansource;
00251                 }
00252         }
00253 }