MediaWiki
REL1_19
|
00001 <?php 00031 require_once( dirname( __FILE__ ) . '/Maintenance.php' ); 00032 00033 class DumpRenderer extends Maintenance { 00034 00035 private $count = 0; 00036 private $outputDirectory, $startTime; 00037 00038 public function __construct() { 00039 parent::__construct(); 00040 $this->mDescription = "Take page text out of an XML dump file and render basic HTML out to files"; 00041 $this->addOption( 'output-dir', 'The directory to output the HTML files to', true, true ); 00042 $this->addOption( 'prefix', 'Prefix for the rendered files (defaults to wiki)', false, true ); 00043 $this->addOption( 'parser', 'Use an alternative parser class', false, true ); 00044 } 00045 00046 public function execute() { 00047 $this->outputDirectory = $this->getOption( 'output-dir' ); 00048 $this->prefix = $this->getOption( 'prefix', 'wiki' ); 00049 $this->startTime = wfTime(); 00050 00051 if ( $this->hasOption( 'parser' ) ) { 00052 global $wgParserConf; 00053 $wgParserConf['class'] = $this->getOption( 'parser' ); 00054 $this->prefix .= "-{$wgParserConf['class']}"; 00055 } 00056 00057 $source = new ImportStreamSource( $this->getStdin() ); 00058 $importer = new WikiImporter( $source ); 00059 00060 $importer->setRevisionCallback( 00061 array( &$this, 'handleRevision' ) ); 00062 00063 $importer->doImport(); 00064 00065 $delta = wfTime() - $this->startTime; 00066 $this->error( "Rendered {$this->count} pages in " . round($delta, 2) . " seconds " ); 00067 if ($delta > 0) 00068 $this->error( round($this->count / $delta, 2) . " pages/sec" ); 00069 $this->error( "\n" ); 00070 } 00071 00076 public function handleRevision( $rev ) { 00077 global $wgParserConf; 00078 00079 $title = $rev->getTitle(); 00080 if ( !$title ) { 00081 $this->error( "Got bogus revision with null title!" ); 00082 return; 00083 } 00084 $display = $title->getPrefixedText(); 00085 00086 $this->count++; 00087 00088 $sanitized = rawurlencode( $display ); 00089 $filename = sprintf( "%s/%s-%07d-%s.html", 00090 $this->outputDirectory, 00091 $this->prefix, 00092 $this->count, 00093 $sanitized ); 00094 $this->output( sprintf( "%s\n", $filename, $display ) ); 00095 00096 $user = new User(); 00097 $parser = new $wgParserConf['class'](); 00098 $options = ParserOptions::newFromUser( $user ); 00099 00100 $output = $parser->parse( $rev->getText(), $title, $options ); 00101 00102 file_put_contents( $filename, 00103 "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" " . 00104 "\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n" . 00105 "<html xmlns=\"http://www.w3.org/1999/xhtml\">\n" . 00106 "<head>\n" . 00107 "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\" />\n" . 00108 "<title>" . htmlspecialchars( $display ) . "</title>\n" . 00109 "</head>\n" . 00110 "<body>\n" . 00111 $output->getText() . 00112 "</body>\n" . 00113 "</html>" ); 00114 } 00115 } 00116 00117 $maintClass = "DumpRenderer"; 00118 require_once( RUN_MAINTENANCE_IF_MAIN );