MediaWiki
REL1_22
|
00001 <?php 00031 require_once __DIR__ . '/Maintenance.php'; 00032 00039 class DumpRenderer extends Maintenance { 00040 00041 private $count = 0; 00042 private $outputDirectory, $startTime; 00043 00044 public function __construct() { 00045 parent::__construct(); 00046 $this->mDescription = "Take page text out of an XML dump file and render basic HTML out to files"; 00047 $this->addOption( 'output-dir', 'The directory to output the HTML files to', true, true ); 00048 $this->addOption( 'prefix', 'Prefix for the rendered files (defaults to wiki)', false, true ); 00049 $this->addOption( 'parser', 'Use an alternative parser class', false, true ); 00050 } 00051 00052 public function execute() { 00053 $this->outputDirectory = $this->getOption( 'output-dir' ); 00054 $this->prefix = $this->getOption( 'prefix', 'wiki' ); 00055 $this->startTime = microtime( true ); 00056 00057 if ( $this->hasOption( 'parser' ) ) { 00058 global $wgParserConf; 00059 $wgParserConf['class'] = $this->getOption( 'parser' ); 00060 $this->prefix .= "-{$wgParserConf['class']}"; 00061 } 00062 00063 $source = new ImportStreamSource( $this->getStdin() ); 00064 $importer = new WikiImporter( $source ); 00065 00066 $importer->setRevisionCallback( 00067 array( &$this, 'handleRevision' ) ); 00068 00069 $importer->doImport(); 00070 00071 $delta = microtime( true ) - $this->startTime; 00072 $this->error( "Rendered {$this->count} pages in " . round( $delta, 2 ) . " seconds " ); 00073 if ( $delta > 0 ) { 00074 $this->error( round( $this->count / $delta, 2 ) . " pages/sec" ); 00075 } 00076 $this->error( "\n" ); 00077 } 00078 00083 public function handleRevision( $rev ) { 00084 $title = $rev->getTitle(); 00085 if ( !$title ) { 00086 $this->error( "Got bogus revision with null title!" ); 00087 return; 00088 } 00089 $display = $title->getPrefixedText(); 00090 00091 $this->count++; 00092 00093 $sanitized = rawurlencode( $display ); 00094 $filename = sprintf( "%s/%s-%07d-%s.html", 00095 $this->outputDirectory, 00096 $this->prefix, 00097 $this->count, 00098 $sanitized ); 00099 $this->output( sprintf( "%s\n", $filename, $display ) ); 00100 00101 $user = new User(); 00102 $options = ParserOptions::newFromUser( $user ); 00103 00104 $content = $rev->getContent(); 00105 $output = $content->getParserOutput( $title, null, $options ); 00106 00107 file_put_contents( $filename, 00108 "<!DOCTYPE html>\n" . 00109 "<html lang=\"en\" dir=\"ltr\">\n" . 00110 "<head>\n" . 00111 "<meta charset=\"UTF-8\" />\n" . 00112 "<title>" . htmlspecialchars( $display ) . "</title>\n" . 00113 "</head>\n" . 00114 "<body>\n" . 00115 $output->getText() . 00116 "</body>\n" . 00117 "</html>" ); 00118 } 00119 } 00120 00121 $maintClass = "DumpRenderer"; 00122 require_once RUN_MAINTENANCE_IF_MAIN;