MediaWiki  REL1_22
renderDump.php
Go to the documentation of this file.
00001 <?php
00031 require_once __DIR__ . '/Maintenance.php';
00032 
00039 class DumpRenderer extends Maintenance {
00040 
00041     private $count = 0;
00042     private $outputDirectory, $startTime;
00043 
00044     public function __construct() {
00045         parent::__construct();
00046         $this->mDescription = "Take page text out of an XML dump file and render basic HTML out to files";
00047         $this->addOption( 'output-dir', 'The directory to output the HTML files to', true, true );
00048         $this->addOption( 'prefix', 'Prefix for the rendered files (defaults to wiki)', false, true );
00049         $this->addOption( 'parser', 'Use an alternative parser class', false, true );
00050     }
00051 
00052     public function execute() {
00053         $this->outputDirectory = $this->getOption( 'output-dir' );
00054         $this->prefix = $this->getOption( 'prefix', 'wiki' );
00055         $this->startTime = microtime( true );
00056 
00057         if ( $this->hasOption( 'parser' ) ) {
00058             global $wgParserConf;
00059             $wgParserConf['class'] = $this->getOption( 'parser' );
00060             $this->prefix .= "-{$wgParserConf['class']}";
00061         }
00062 
00063         $source = new ImportStreamSource( $this->getStdin() );
00064         $importer = new WikiImporter( $source );
00065 
00066         $importer->setRevisionCallback(
00067             array( &$this, 'handleRevision' ) );
00068 
00069         $importer->doImport();
00070 
00071         $delta = microtime( true ) - $this->startTime;
00072         $this->error( "Rendered {$this->count} pages in " . round( $delta, 2 ) . " seconds " );
00073         if ( $delta > 0 ) {
00074             $this->error( round( $this->count / $delta, 2 ) . " pages/sec" );
00075         }
00076         $this->error( "\n" );
00077     }
00078 
00083     public function handleRevision( $rev ) {
00084         $title = $rev->getTitle();
00085         if ( !$title ) {
00086             $this->error( "Got bogus revision with null title!" );
00087             return;
00088         }
00089         $display = $title->getPrefixedText();
00090 
00091         $this->count++;
00092 
00093         $sanitized = rawurlencode( $display );
00094         $filename = sprintf( "%s/%s-%07d-%s.html",
00095             $this->outputDirectory,
00096             $this->prefix,
00097             $this->count,
00098             $sanitized );
00099         $this->output( sprintf( "%s\n", $filename, $display ) );
00100 
00101         $user = new User();
00102         $options = ParserOptions::newFromUser( $user );
00103 
00104         $content = $rev->getContent();
00105         $output = $content->getParserOutput( $title, null, $options );
00106 
00107         file_put_contents( $filename,
00108             "<!DOCTYPE html>\n" .
00109             "<html lang=\"en\" dir=\"ltr\">\n" .
00110             "<head>\n" .
00111             "<meta charset=\"UTF-8\" />\n" .
00112             "<title>" . htmlspecialchars( $display ) . "</title>\n" .
00113             "</head>\n" .
00114             "<body>\n" .
00115             $output->getText() .
00116             "</body>\n" .
00117             "</html>" );
00118     }
00119 }
00120 
00121 $maintClass = "DumpRenderer";
00122 require_once RUN_MAINTENANCE_IF_MAIN;