MediaWiki  REL1_24
renderDump.php
Go to the documentation of this file.
00001 <?php
00031 require_once __DIR__ . '/Maintenance.php';
00032 
00039 class DumpRenderer extends Maintenance {
00040 
00041     private $count = 0;
00042     private $outputDirectory, $startTime;
00043 
00044     public function __construct() {
00045         parent::__construct();
00046         $this->mDescription = "Take page text out of an XML dump file and render basic HTML out to files";
00047         $this->addOption( 'output-dir', 'The directory to output the HTML files to', true, true );
00048         $this->addOption( 'prefix', 'Prefix for the rendered files (defaults to wiki)', false, true );
00049         $this->addOption( 'parser', 'Use an alternative parser class', false, true );
00050     }
00051 
00052     public function execute() {
00053         $this->outputDirectory = $this->getOption( 'output-dir' );
00054         $this->prefix = $this->getOption( 'prefix', 'wiki' );
00055         $this->startTime = microtime( true );
00056 
00057         if ( $this->hasOption( 'parser' ) ) {
00058             global $wgParserConf;
00059             $wgParserConf['class'] = $this->getOption( 'parser' );
00060             $this->prefix .= "-{$wgParserConf['class']}";
00061         }
00062 
00063         $source = new ImportStreamSource( $this->getStdin() );
00064         $importer = new WikiImporter( $source );
00065 
00066         $importer->setRevisionCallback(
00067             array( &$this, 'handleRevision' ) );
00068 
00069         $importer->doImport();
00070 
00071         $delta = microtime( true ) - $this->startTime;
00072         $this->error( "Rendered {$this->count} pages in " . round( $delta, 2 ) . " seconds " );
00073         if ( $delta > 0 ) {
00074             $this->error( round( $this->count / $delta, 2 ) . " pages/sec" );
00075         }
00076         $this->error( "\n" );
00077     }
00078 
00083     public function handleRevision( $rev ) {
00084         $title = $rev->getTitle();
00085         if ( !$title ) {
00086             $this->error( "Got bogus revision with null title!" );
00087 
00088             return;
00089         }
00090         $display = $title->getPrefixedText();
00091 
00092         $this->count++;
00093 
00094         $sanitized = rawurlencode( $display );
00095         $filename = sprintf( "%s/%s-%07d-%s.html",
00096             $this->outputDirectory,
00097             $this->prefix,
00098             $this->count,
00099             $sanitized );
00100         $this->output( sprintf( "%s\n", $filename, $display ) );
00101 
00102         $user = new User();
00103         $options = ParserOptions::newFromUser( $user );
00104 
00105         $content = $rev->getContent();
00106         $output = $content->getParserOutput( $title, null, $options );
00107 
00108         file_put_contents( $filename,
00109             "<!DOCTYPE html>\n" .
00110             "<html lang=\"en\" dir=\"ltr\">\n" .
00111             "<head>\n" .
00112             "<meta charset=\"UTF-8\" />\n" .
00113             "<title>" . htmlspecialchars( $display ) . "</title>\n" .
00114             "</head>\n" .
00115             "<body>\n" .
00116             $output->getText() .
00117             "</body>\n" .
00118             "</html>" );
00119     }
00120 }
00121 
00122 $maintClass = "DumpRenderer";
00123 require_once RUN_MAINTENANCE_IF_MAIN;