MediaWiki  REL1_20
renderDump.php
Go to the documentation of this file.
00001 <?php
00031 require_once( __DIR__ . '/Maintenance.php' );
00032 
00039 class DumpRenderer extends Maintenance {
00040 
00041         private $count = 0;
00042         private $outputDirectory, $startTime;
00043 
00044         public function __construct() {
00045                 parent::__construct();
00046                 $this->mDescription = "Take page text out of an XML dump file and render basic HTML out to files";
00047                 $this->addOption( 'output-dir', 'The directory to output the HTML files to', true, true );
00048                 $this->addOption( 'prefix', 'Prefix for the rendered files (defaults to wiki)', false, true );
00049                 $this->addOption( 'parser', 'Use an alternative parser class', false, true );
00050         }
00051 
00052         public function execute() {
00053                 $this->outputDirectory = $this->getOption( 'output-dir' );
00054                 $this->prefix = $this->getOption( 'prefix', 'wiki' );
00055                 $this->startTime = microtime( true );
00056 
00057                 if ( $this->hasOption( 'parser' ) ) {
00058                         global $wgParserConf;
00059                         $wgParserConf['class'] = $this->getOption( 'parser' );
00060                         $this->prefix .= "-{$wgParserConf['class']}";
00061                 }
00062 
00063                 $source = new ImportStreamSource( $this->getStdin() );
00064                 $importer = new WikiImporter( $source );
00065 
00066                 $importer->setRevisionCallback(
00067                         array( &$this, 'handleRevision' ) );
00068 
00069                 $importer->doImport();
00070 
00071                 $delta = microtime( true ) - $this->startTime;
00072                 $this->error( "Rendered {$this->count} pages in " . round($delta, 2) . " seconds " );
00073                 if ($delta > 0)
00074                         $this->error( round($this->count / $delta, 2) . " pages/sec" );
00075                 $this->error( "\n" );
00076         }
00077 
00082         public function handleRevision( $rev ) {
00083                 global $wgParserConf;
00084 
00085                 $title = $rev->getTitle();
00086                 if ( !$title ) {
00087                         $this->error( "Got bogus revision with null title!" );
00088                         return;
00089                 }
00090                 $display = $title->getPrefixedText();
00091 
00092                 $this->count++;
00093 
00094                 $sanitized = rawurlencode( $display );
00095                 $filename = sprintf( "%s/%s-%07d-%s.html",
00096                         $this->outputDirectory,
00097                         $this->prefix,
00098                         $this->count,
00099                         $sanitized );
00100                 $this->output( sprintf( "%s\n", $filename, $display ) );
00101 
00102                 $user = new User();
00103                 $parser = new $wgParserConf['class']();
00104                 $options = ParserOptions::newFromUser( $user );
00105 
00106                 $output = $parser->parse( $rev->getText(), $title, $options );
00107 
00108                 file_put_contents( $filename,
00109                         "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" " .
00110                         "\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n" .
00111                         "<html xmlns=\"http://www.w3.org/1999/xhtml\">\n" .
00112                         "<head>\n" .
00113                         "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\" />\n" .
00114                         "<title>" . htmlspecialchars( $display ) . "</title>\n" .
00115                         "</head>\n" .
00116                         "<body>\n" .
00117                         $output->getText() .
00118                         "</body>\n" .
00119                         "</html>" );
00120         }
00121 }
00122 
00123 $maintClass = "DumpRenderer";
00124 require_once( RUN_MAINTENANCE_IF_MAIN );