[ Index ] |
PHP Cross Reference of MediaWiki-1.24.0 |
[Summary view] [Print] [Text view]
1 <?php 2 /** 3 * Take page text out of an XML dump file and render basic HTML out to files. 4 * This is *NOT* suitable for publishing or offline use; it's intended for 5 * running comparative tests of parsing behavior using real-world data. 6 * 7 * Templates etc are pulled from the local wiki database, not from the dump. 8 * 9 * Copyright (C) 2006 Brion Vibber <[email protected]> 10 * https://www.mediawiki.org/ 11 * 12 * This program is free software; you can redistribute it and/or modify 13 * it under the terms of the GNU General Public License as published by 14 * the Free Software Foundation; either version 2 of the License, or 15 * (at your option) any later version. 16 * 17 * This program is distributed in the hope that it will be useful, 18 * but WITHOUT ANY WARRANTY; without even the implied warranty of 19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 20 * GNU General Public License for more details. 21 * 22 * You should have received a copy of the GNU General Public License along 23 * with this program; if not, write to the Free Software Foundation, Inc., 24 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 25 * http://www.gnu.org/copyleft/gpl.html 26 * 27 * @file 28 * @ingroup Maintenance 29 */ 30 31 require_once __DIR__ . '/Maintenance.php'; 32 33 /** 34 * Maintenance script that takes page text out of an XML dump file 35 * and render basic HTML out to files. 36 * 37 * @ingroup Maintenance 38 */ 39 class DumpRenderer extends Maintenance { 40 41 private $count = 0; 42 private $outputDirectory, $startTime; 43 44 public function __construct() { 45 parent::__construct(); 46 $this->mDescription = "Take page text out of an XML dump file and render basic HTML out to files"; 47 $this->addOption( 'output-dir', 'The directory to output the HTML files to', true, true ); 48 $this->addOption( 'prefix', 'Prefix for the rendered files (defaults to wiki)', false, true ); 49 $this->addOption( 'parser', 'Use an alternative parser class', false, true ); 50 } 51 52 public function execute() { 53 $this->outputDirectory = $this->getOption( 'output-dir' ); 54 $this->prefix = $this->getOption( 'prefix', 'wiki' ); 55 $this->startTime = microtime( true ); 56 57 if ( $this->hasOption( 'parser' ) ) { 58 global $wgParserConf; 59 $wgParserConf['class'] = $this->getOption( 'parser' ); 60 $this->prefix .= "-{$wgParserConf['class']}"; 61 } 62 63 $source = new ImportStreamSource( $this->getStdin() ); 64 $importer = new WikiImporter( $source ); 65 66 $importer->setRevisionCallback( 67 array( &$this, 'handleRevision' ) ); 68 69 $importer->doImport(); 70 71 $delta = microtime( true ) - $this->startTime; 72 $this->error( "Rendered {$this->count} pages in " . round( $delta, 2 ) . " seconds " ); 73 if ( $delta > 0 ) { 74 $this->error( round( $this->count / $delta, 2 ) . " pages/sec" ); 75 } 76 $this->error( "\n" ); 77 } 78 79 /** 80 * Callback function for each revision, turn into HTML and save 81 * @param Revision $rev 82 */ 83 public function handleRevision( $rev ) { 84 $title = $rev->getTitle(); 85 if ( !$title ) { 86 $this->error( "Got bogus revision with null title!" ); 87 88 return; 89 } 90 $display = $title->getPrefixedText(); 91 92 $this->count++; 93 94 $sanitized = rawurlencode( $display ); 95 $filename = sprintf( "%s/%s-%07d-%s.html", 96 $this->outputDirectory, 97 $this->prefix, 98 $this->count, 99 $sanitized ); 100 $this->output( sprintf( "%s\n", $filename, $display ) ); 101 102 $user = new User(); 103 $options = ParserOptions::newFromUser( $user ); 104 105 $content = $rev->getContent(); 106 $output = $content->getParserOutput( $title, null, $options ); 107 108 file_put_contents( $filename, 109 "<!DOCTYPE html>\n" . 110 "<html lang=\"en\" dir=\"ltr\">\n" . 111 "<head>\n" . 112 "<meta charset=\"UTF-8\" />\n" . 113 "<title>" . htmlspecialchars( $display ) . "</title>\n" . 114 "</head>\n" . 115 "<body>\n" . 116 $output->getText() . 117 "</body>\n" . 118 "</html>" ); 119 } 120 } 121 122 $maintClass = "DumpRenderer"; 123 require_once RUN_MAINTENANCE_IF_MAIN;
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
Generated: Fri Nov 28 14:03:12 2014 | Cross-referenced by PHPXref 0.7.1 |