MediaWiki
REL1_21
|
00001 <?php 00031 require_once( __DIR__ . '/Maintenance.php' ); 00032 00039 class DumpRenderer extends Maintenance { 00040 00041 private $count = 0; 00042 private $outputDirectory, $startTime; 00043 00044 public function __construct() { 00045 parent::__construct(); 00046 $this->mDescription = "Take page text out of an XML dump file and render basic HTML out to files"; 00047 $this->addOption( 'output-dir', 'The directory to output the HTML files to', true, true ); 00048 $this->addOption( 'prefix', 'Prefix for the rendered files (defaults to wiki)', false, true ); 00049 $this->addOption( 'parser', 'Use an alternative parser class', false, true ); 00050 } 00051 00052 public function execute() { 00053 $this->outputDirectory = $this->getOption( 'output-dir' ); 00054 $this->prefix = $this->getOption( 'prefix', 'wiki' ); 00055 $this->startTime = microtime( true ); 00056 00057 if ( $this->hasOption( 'parser' ) ) { 00058 global $wgParserConf; 00059 $wgParserConf['class'] = $this->getOption( 'parser' ); 00060 $this->prefix .= "-{$wgParserConf['class']}"; 00061 } 00062 00063 $source = new ImportStreamSource( $this->getStdin() ); 00064 $importer = new WikiImporter( $source ); 00065 00066 $importer->setRevisionCallback( 00067 array( &$this, 'handleRevision' ) ); 00068 00069 $importer->doImport(); 00070 00071 $delta = microtime( true ) - $this->startTime; 00072 $this->error( "Rendered {$this->count} pages in " . round($delta, 2) . " seconds " ); 00073 if ($delta > 0) 00074 $this->error( round($this->count / $delta, 2) . " pages/sec" ); 00075 $this->error( "\n" ); 00076 } 00077 00082 public function handleRevision( $rev ) { 00083 $title = $rev->getTitle(); 00084 if ( !$title ) { 00085 $this->error( "Got bogus revision with null title!" ); 00086 return; 00087 } 00088 $display = $title->getPrefixedText(); 00089 00090 $this->count++; 00091 00092 $sanitized = rawurlencode( $display ); 00093 $filename = sprintf( "%s/%s-%07d-%s.html", 00094 $this->outputDirectory, 00095 $this->prefix, 00096 $this->count, 00097 $sanitized ); 00098 $this->output( sprintf( "%s\n", $filename, $display ) ); 00099 00100 $user = new User(); 00101 $options = ParserOptions::newFromUser( $user ); 00102 00103 $content = $rev->getContent(); 00104 $output = $content->getParserOutput( $title, null, $options ); 00105 00106 file_put_contents( $filename, 00107 "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" " . 00108 "\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n" . 00109 "<html xmlns=\"http://www.w3.org/1999/xhtml\">\n" . 00110 "<head>\n" . 00111 "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\" />\n" . 00112 "<title>" . htmlspecialchars( $display ) . "</title>\n" . 00113 "</head>\n" . 00114 "<body>\n" . 00115 $output->getText() . 00116 "</body>\n" . 00117 "</html>" ); 00118 } 00119 } 00120 00121 $maintClass = "DumpRenderer"; 00122 require_once( RUN_MAINTENANCE_IF_MAIN );