MediaWiki  REL1_19
renderDump.php
Go to the documentation of this file.
00001 <?php
00031 require_once( dirname( __FILE__ ) . '/Maintenance.php' );
00032 
00033 class DumpRenderer extends Maintenance {
00034 
00035         private $count = 0;
00036         private $outputDirectory, $startTime;
00037 
00038         public function __construct() {
00039                 parent::__construct();
00040                 $this->mDescription = "Take page text out of an XML dump file and render basic HTML out to files";
00041                 $this->addOption( 'output-dir', 'The directory to output the HTML files to', true, true );
00042                 $this->addOption( 'prefix', 'Prefix for the rendered files (defaults to wiki)', false, true );
00043                 $this->addOption( 'parser', 'Use an alternative parser class', false, true );
00044         }
00045 
00046         public function execute() {
00047                 $this->outputDirectory = $this->getOption( 'output-dir' );
00048                 $this->prefix = $this->getOption( 'prefix', 'wiki' );
00049                 $this->startTime = wfTime();
00050 
00051                 if ( $this->hasOption( 'parser' ) ) {
00052                         global $wgParserConf;
00053                         $wgParserConf['class'] = $this->getOption( 'parser' );
00054                         $this->prefix .= "-{$wgParserConf['class']}";
00055                 }
00056 
00057                 $source = new ImportStreamSource( $this->getStdin() );
00058                 $importer = new WikiImporter( $source );
00059 
00060                 $importer->setRevisionCallback(
00061                         array( &$this, 'handleRevision' ) );
00062 
00063                 $importer->doImport();
00064 
00065                 $delta = wfTime() - $this->startTime;
00066                 $this->error( "Rendered {$this->count} pages in " . round($delta, 2) . " seconds " );
00067                 if ($delta > 0)
00068                         $this->error( round($this->count / $delta, 2) . " pages/sec" );
00069                 $this->error( "\n" );
00070         }
00071 
00076         public function handleRevision( $rev ) {
00077                 global $wgParserConf;
00078 
00079                 $title = $rev->getTitle();
00080                 if ( !$title ) {
00081                         $this->error( "Got bogus revision with null title!" );
00082                         return;
00083                 }
00084                 $display = $title->getPrefixedText();
00085 
00086                 $this->count++;
00087 
00088                 $sanitized = rawurlencode( $display );
00089                 $filename = sprintf( "%s/%s-%07d-%s.html",
00090                         $this->outputDirectory,
00091                         $this->prefix,
00092                         $this->count,
00093                         $sanitized );
00094                 $this->output( sprintf( "%s\n", $filename, $display ) );
00095 
00096                 $user = new User();
00097                 $parser = new $wgParserConf['class']();
00098                 $options = ParserOptions::newFromUser( $user );
00099 
00100                 $output = $parser->parse( $rev->getText(), $title, $options );
00101 
00102                 file_put_contents( $filename,
00103                         "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" " .
00104                         "\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n" .
00105                         "<html xmlns=\"http://www.w3.org/1999/xhtml\">\n" .
00106                         "<head>\n" .
00107                         "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\" />\n" .
00108                         "<title>" . htmlspecialchars( $display ) . "</title>\n" .
00109                         "</head>\n" .
00110                         "<body>\n" .
00111                         $output->getText() .
00112                         "</body>\n" .
00113                         "</html>" );
00114         }
00115 }
00116 
00117 $maintClass = "DumpRenderer";
00118 require_once( RUN_MAINTENANCE_IF_MAIN );