MediaWiki
REL1_20
|
00001 <?php 00031 require_once( __DIR__ . '/Maintenance.php' ); 00032 00039 class DumpRenderer extends Maintenance { 00040 00041 private $count = 0; 00042 private $outputDirectory, $startTime; 00043 00044 public function __construct() { 00045 parent::__construct(); 00046 $this->mDescription = "Take page text out of an XML dump file and render basic HTML out to files"; 00047 $this->addOption( 'output-dir', 'The directory to output the HTML files to', true, true ); 00048 $this->addOption( 'prefix', 'Prefix for the rendered files (defaults to wiki)', false, true ); 00049 $this->addOption( 'parser', 'Use an alternative parser class', false, true ); 00050 } 00051 00052 public function execute() { 00053 $this->outputDirectory = $this->getOption( 'output-dir' ); 00054 $this->prefix = $this->getOption( 'prefix', 'wiki' ); 00055 $this->startTime = microtime( true ); 00056 00057 if ( $this->hasOption( 'parser' ) ) { 00058 global $wgParserConf; 00059 $wgParserConf['class'] = $this->getOption( 'parser' ); 00060 $this->prefix .= "-{$wgParserConf['class']}"; 00061 } 00062 00063 $source = new ImportStreamSource( $this->getStdin() ); 00064 $importer = new WikiImporter( $source ); 00065 00066 $importer->setRevisionCallback( 00067 array( &$this, 'handleRevision' ) ); 00068 00069 $importer->doImport(); 00070 00071 $delta = microtime( true ) - $this->startTime; 00072 $this->error( "Rendered {$this->count} pages in " . round($delta, 2) . " seconds " ); 00073 if ($delta > 0) 00074 $this->error( round($this->count / $delta, 2) . " pages/sec" ); 00075 $this->error( "\n" ); 00076 } 00077 00082 public function handleRevision( $rev ) { 00083 global $wgParserConf; 00084 00085 $title = $rev->getTitle(); 00086 if ( !$title ) { 00087 $this->error( "Got bogus revision with null title!" ); 00088 return; 00089 } 00090 $display = $title->getPrefixedText(); 00091 00092 $this->count++; 00093 00094 $sanitized = rawurlencode( $display ); 00095 $filename = sprintf( "%s/%s-%07d-%s.html", 00096 $this->outputDirectory, 00097 $this->prefix, 00098 $this->count, 00099 $sanitized ); 00100 $this->output( sprintf( "%s\n", $filename, $display ) ); 00101 00102 $user = new User(); 00103 $parser = new $wgParserConf['class'](); 00104 $options = ParserOptions::newFromUser( $user ); 00105 00106 $output = $parser->parse( $rev->getText(), $title, $options ); 00107 00108 file_put_contents( $filename, 00109 "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" " . 00110 "\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n" . 00111 "<html xmlns=\"http://www.w3.org/1999/xhtml\">\n" . 00112 "<head>\n" . 00113 "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\" />\n" . 00114 "<title>" . htmlspecialchars( $display ) . "</title>\n" . 00115 "</head>\n" . 00116 "<body>\n" . 00117 $output->getText() . 00118 "</body>\n" . 00119 "</html>" ); 00120 } 00121 } 00122 00123 $maintClass = "DumpRenderer"; 00124 require_once( RUN_MAINTENANCE_IF_MAIN );