MediaWiki
REL1_24
|
00001 <?php 00033 class WikiExporter { 00035 public $list_authors = false; 00036 00038 public $dumpUploads = false; 00039 00041 public $dumpUploadFileContents = false; 00042 00044 public $author_list = ""; 00045 00046 const FULL = 1; 00047 const CURRENT = 2; 00048 const STABLE = 4; // extension defined 00049 const LOGS = 8; 00050 const RANGE = 16; 00051 00052 const BUFFER = 0; 00053 const STREAM = 1; 00054 00055 const TEXT = 0; 00056 const STUB = 1; 00057 00059 public $buffer; 00060 00062 public $text; 00063 00065 public $sink; 00066 00071 public static function schemaVersion() { 00072 return "0.9"; 00073 } 00074 00091 function __construct( $db, $history = WikiExporter::CURRENT, 00092 $buffer = WikiExporter::BUFFER, $text = WikiExporter::TEXT ) { 00093 $this->db = $db; 00094 $this->history = $history; 00095 $this->buffer = $buffer; 00096 $this->writer = new XmlDumpWriter(); 00097 $this->sink = new DumpOutput(); 00098 $this->text = $text; 00099 } 00100 00108 public function setOutputSink( &$sink ) { 00109 $this->sink =& $sink; 00110 } 00111 00112 public function openStream() { 00113 $output = $this->writer->openStream(); 00114 $this->sink->writeOpenStream( $output ); 00115 } 00116 00117 public function closeStream() { 00118 $output = $this->writer->closeStream(); 00119 $this->sink->writeCloseStream( $output ); 00120 } 00121 00127 public function allPages() { 00128 $this->dumpFrom( '' ); 00129 } 00130 00138 public function pagesByRange( $start, $end ) { 00139 $condition = 'page_id >= ' . intval( $start ); 00140 if ( $end ) { 00141 $condition .= ' AND page_id < ' . intval( $end ); 00142 } 00143 $this->dumpFrom( $condition ); 00144 } 00145 00153 public function revsByRange( $start, $end ) { 00154 $condition = 'rev_id >= ' . intval( $start ); 00155 if ( $end ) { 00156 $condition .= ' AND rev_id < ' . intval( $end ); 00157 } 00158 $this->dumpFrom( $condition ); 00159 } 00160 00164 public function pageByTitle( $title ) { 00165 $this->dumpFrom( 00166 'page_namespace=' . $title->getNamespace() . 00167 ' AND page_title=' . $this->db->addQuotes( $title->getDBkey() ) ); 00168 } 00169 00174 public function pageByName( $name ) { 00175 $title = Title::newFromText( $name ); 00176 if ( is_null( $title ) ) { 00177 throw new MWException( "Can't export invalid title" ); 00178 } else { 00179 $this->pageByTitle( $title ); 00180 } 00181 } 00182 00186 public function pagesByName( $names ) { 00187 foreach ( $names as $name ) { 00188 $this->pageByName( $name ); 00189 } 00190 } 00191 00192 public function allLogs() { 00193 $this->dumpFrom( '' ); 00194 } 00195 00200 public function logsByRange( $start, $end ) { 00201 $condition = 'log_id >= ' . intval( $start ); 00202 if ( $end ) { 00203 $condition .= ' AND log_id < ' . intval( $end ); 00204 } 00205 $this->dumpFrom( $condition ); 00206 } 00207 00215 protected function do_list_authors( $cond ) { 00216 wfProfileIn( __METHOD__ ); 00217 $this->author_list = "<contributors>"; 00218 // rev_deleted 00219 00220 $res = $this->db->select( 00221 array( 'page', 'revision' ), 00222 array( 'DISTINCT rev_user_text', 'rev_user' ), 00223 array( 00224 $this->db->bitAnd( 'rev_deleted', Revision::DELETED_USER ) . ' = 0', 00225 $cond, 00226 'page_id = rev_id', 00227 ), 00228 __METHOD__ 00229 ); 00230 00231 foreach ( $res as $row ) { 00232 $this->author_list .= "<contributor>" . 00233 "<username>" . 00234 htmlentities( $row->rev_user_text ) . 00235 "</username>" . 00236 "<id>" . 00237 $row->rev_user . 00238 "</id>" . 00239 "</contributor>"; 00240 } 00241 $this->author_list .= "</contributors>"; 00242 wfProfileOut( __METHOD__ ); 00243 } 00244 00250 protected function dumpFrom( $cond = '' ) { 00251 wfProfileIn( __METHOD__ ); 00252 # For logging dumps... 00253 if ( $this->history & self::LOGS ) { 00254 $where = array( 'user_id = log_user' ); 00255 # Hide private logs 00256 $hideLogs = LogEventsList::getExcludeClause( $this->db ); 00257 if ( $hideLogs ) { 00258 $where[] = $hideLogs; 00259 } 00260 # Add on any caller specified conditions 00261 if ( $cond ) { 00262 $where[] = $cond; 00263 } 00264 # Get logging table name for logging.* clause 00265 $logging = $this->db->tableName( 'logging' ); 00266 00267 if ( $this->buffer == WikiExporter::STREAM ) { 00268 $prev = $this->db->bufferResults( false ); 00269 } 00270 $result = null; // Assuring $result is not undefined, if exception occurs early 00271 try { 00272 $result = $this->db->select( array( 'logging', 'user' ), 00273 array( "{$logging}.*", 'user_name' ), // grab the user name 00274 $where, 00275 __METHOD__, 00276 array( 'ORDER BY' => 'log_id', 'USE INDEX' => array( 'logging' => 'PRIMARY' ) ) 00277 ); 00278 $this->outputLogStream( $result ); 00279 if ( $this->buffer == WikiExporter::STREAM ) { 00280 $this->db->bufferResults( $prev ); 00281 } 00282 } catch ( Exception $e ) { 00283 // Throwing the exception does not reliably free the resultset, and 00284 // would also leave the connection in unbuffered mode. 00285 00286 // Freeing result 00287 try { 00288 if ( $result ) { 00289 $result->free(); 00290 } 00291 } catch ( Exception $e2 ) { 00292 // Already in panic mode -> ignoring $e2 as $e has 00293 // higher priority 00294 } 00295 00296 // Putting database back in previous buffer mode 00297 try { 00298 if ( $this->buffer == WikiExporter::STREAM ) { 00299 $this->db->bufferResults( $prev ); 00300 } 00301 } catch ( Exception $e2 ) { 00302 // Already in panic mode -> ignoring $e2 as $e has 00303 // higher priority 00304 } 00305 00306 // Inform caller about problem 00307 wfProfileOut( __METHOD__ ); 00308 throw $e; 00309 } 00310 # For page dumps... 00311 } else { 00312 $tables = array( 'page', 'revision' ); 00313 $opts = array( 'ORDER BY' => 'page_id ASC' ); 00314 $opts['USE INDEX'] = array(); 00315 $join = array(); 00316 if ( is_array( $this->history ) ) { 00317 # Time offset/limit for all pages/history... 00318 $revJoin = 'page_id=rev_page'; 00319 # Set time order 00320 if ( $this->history['dir'] == 'asc' ) { 00321 $op = '>'; 00322 $opts['ORDER BY'] = 'rev_timestamp ASC'; 00323 } else { 00324 $op = '<'; 00325 $opts['ORDER BY'] = 'rev_timestamp DESC'; 00326 } 00327 # Set offset 00328 if ( !empty( $this->history['offset'] ) ) { 00329 $revJoin .= " AND rev_timestamp $op " . 00330 $this->db->addQuotes( $this->db->timestamp( $this->history['offset'] ) ); 00331 } 00332 $join['revision'] = array( 'INNER JOIN', $revJoin ); 00333 # Set query limit 00334 if ( !empty( $this->history['limit'] ) ) { 00335 $opts['LIMIT'] = intval( $this->history['limit'] ); 00336 } 00337 } elseif ( $this->history & WikiExporter::FULL ) { 00338 # Full history dumps... 00339 $join['revision'] = array( 'INNER JOIN', 'page_id=rev_page' ); 00340 } elseif ( $this->history & WikiExporter::CURRENT ) { 00341 # Latest revision dumps... 00342 if ( $this->list_authors && $cond != '' ) { // List authors, if so desired 00343 $this->do_list_authors( $cond ); 00344 } 00345 $join['revision'] = array( 'INNER JOIN', 'page_id=rev_page AND page_latest=rev_id' ); 00346 } elseif ( $this->history & WikiExporter::STABLE ) { 00347 # "Stable" revision dumps... 00348 # Default JOIN, to be overridden... 00349 $join['revision'] = array( 'INNER JOIN', 'page_id=rev_page AND page_latest=rev_id' ); 00350 # One, and only one hook should set this, and return false 00351 if ( wfRunHooks( 'WikiExporter::dumpStableQuery', array( &$tables, &$opts, &$join ) ) ) { 00352 wfProfileOut( __METHOD__ ); 00353 throw new MWException( __METHOD__ . " given invalid history dump type." ); 00354 } 00355 } elseif ( $this->history & WikiExporter::RANGE ) { 00356 # Dump of revisions within a specified range 00357 $join['revision'] = array( 'INNER JOIN', 'page_id=rev_page' ); 00358 $opts['ORDER BY'] = array( 'rev_page ASC', 'rev_id ASC' ); 00359 } else { 00360 # Unknown history specification parameter? 00361 wfProfileOut( __METHOD__ ); 00362 throw new MWException( __METHOD__ . " given invalid history dump type." ); 00363 } 00364 # Query optimization hacks 00365 if ( $cond == '' ) { 00366 $opts[] = 'STRAIGHT_JOIN'; 00367 $opts['USE INDEX']['page'] = 'PRIMARY'; 00368 } 00369 # Build text join options 00370 if ( $this->text != WikiExporter::STUB ) { // 1-pass 00371 $tables[] = 'text'; 00372 $join['text'] = array( 'INNER JOIN', 'rev_text_id=old_id' ); 00373 } 00374 00375 if ( $this->buffer == WikiExporter::STREAM ) { 00376 $prev = $this->db->bufferResults( false ); 00377 } 00378 00379 $result = null; // Assuring $result is not undefined, if exception occurs early 00380 try { 00381 wfRunHooks( 'ModifyExportQuery', 00382 array( $this->db, &$tables, &$cond, &$opts, &$join ) ); 00383 00384 # Do the query! 00385 $result = $this->db->select( $tables, '*', $cond, __METHOD__, $opts, $join ); 00386 # Output dump results 00387 $this->outputPageStream( $result ); 00388 00389 if ( $this->buffer == WikiExporter::STREAM ) { 00390 $this->db->bufferResults( $prev ); 00391 } 00392 } catch ( Exception $e ) { 00393 // Throwing the exception does not reliably free the resultset, and 00394 // would also leave the connection in unbuffered mode. 00395 00396 // Freeing result 00397 try { 00398 if ( $result ) { 00399 $result->free(); 00400 } 00401 } catch ( Exception $e2 ) { 00402 // Already in panic mode -> ignoring $e2 as $e has 00403 // higher priority 00404 } 00405 00406 // Putting database back in previous buffer mode 00407 try { 00408 if ( $this->buffer == WikiExporter::STREAM ) { 00409 $this->db->bufferResults( $prev ); 00410 } 00411 } catch ( Exception $e2 ) { 00412 // Already in panic mode -> ignoring $e2 as $e has 00413 // higher priority 00414 } 00415 00416 // Inform caller about problem 00417 throw $e; 00418 } 00419 } 00420 wfProfileOut( __METHOD__ ); 00421 } 00422 00435 protected function outputPageStream( $resultset ) { 00436 $last = null; 00437 foreach ( $resultset as $row ) { 00438 if ( $last === null || 00439 $last->page_namespace != $row->page_namespace || 00440 $last->page_title != $row->page_title ) { 00441 if ( $last !== null ) { 00442 $output = ''; 00443 if ( $this->dumpUploads ) { 00444 $output .= $this->writer->writeUploads( $last, $this->dumpUploadFileContents ); 00445 } 00446 $output .= $this->writer->closePage(); 00447 $this->sink->writeClosePage( $output ); 00448 } 00449 $output = $this->writer->openPage( $row ); 00450 $this->sink->writeOpenPage( $row, $output ); 00451 $last = $row; 00452 } 00453 $output = $this->writer->writeRevision( $row ); 00454 $this->sink->writeRevision( $row, $output ); 00455 } 00456 if ( $last !== null ) { 00457 $output = ''; 00458 if ( $this->dumpUploads ) { 00459 $output .= $this->writer->writeUploads( $last, $this->dumpUploadFileContents ); 00460 } 00461 $output .= $this->author_list; 00462 $output .= $this->writer->closePage(); 00463 $this->sink->writeClosePage( $output ); 00464 } 00465 } 00466 00470 protected function outputLogStream( $resultset ) { 00471 foreach ( $resultset as $row ) { 00472 $output = $this->writer->writeLogItem( $row ); 00473 $this->sink->writeLogItem( $row, $output ); 00474 } 00475 } 00476 } 00477 00481 class XmlDumpWriter { 00487 function schemaVersion() { 00488 wfDeprecated( __METHOD__, '1.20' ); 00489 return WikiExporter::schemaVersion(); 00490 } 00491 00502 function openStream() { 00503 global $wgLanguageCode; 00504 $ver = WikiExporter::schemaVersion(); 00505 return Xml::element( 'mediawiki', array( 00506 'xmlns' => "http://www.mediawiki.org/xml/export-$ver/", 00507 'xmlns:xsi' => "http://www.w3.org/2001/XMLSchema-instance", 00508 /* 00509 * When a new version of the schema is created, it needs staging on mediawiki.org. 00510 * This requires a change in the operations/mediawiki-config git repo. 00511 * 00512 * Create a changeset like https://gerrit.wikimedia.org/r/#/c/149643/ in which 00513 * you copy in the new xsd file. 00514 * 00515 * After it is reviewed, merged and deployed (sync-docroot), the index.html needs purging. 00516 * echo "http://www.mediawiki.org/xml/index.html" | mwscript purgeList.php --wiki=aawiki 00517 */ 00518 'xsi:schemaLocation' => "http://www.mediawiki.org/xml/export-$ver/ " . 00519 "http://www.mediawiki.org/xml/export-$ver.xsd", 00520 'version' => $ver, 00521 'xml:lang' => $wgLanguageCode ), 00522 null ) . 00523 "\n" . 00524 $this->siteInfo(); 00525 } 00526 00530 function siteInfo() { 00531 $info = array( 00532 $this->sitename(), 00533 $this->dbname(), 00534 $this->homelink(), 00535 $this->generator(), 00536 $this->caseSetting(), 00537 $this->namespaces() ); 00538 return " <siteinfo>\n " . 00539 implode( "\n ", $info ) . 00540 "\n </siteinfo>\n"; 00541 } 00542 00546 function sitename() { 00547 global $wgSitename; 00548 return Xml::element( 'sitename', array(), $wgSitename ); 00549 } 00550 00554 function dbname() { 00555 global $wgDBname; 00556 return Xml::element( 'dbname', array(), $wgDBname ); 00557 } 00558 00562 function generator() { 00563 global $wgVersion; 00564 return Xml::element( 'generator', array(), "MediaWiki $wgVersion" ); 00565 } 00566 00570 function homelink() { 00571 return Xml::element( 'base', array(), Title::newMainPage()->getCanonicalURL() ); 00572 } 00573 00577 function caseSetting() { 00578 global $wgCapitalLinks; 00579 // "case-insensitive" option is reserved for future 00580 $sensitivity = $wgCapitalLinks ? 'first-letter' : 'case-sensitive'; 00581 return Xml::element( 'case', array(), $sensitivity ); 00582 } 00583 00587 function namespaces() { 00588 global $wgContLang; 00589 $spaces = "<namespaces>\n"; 00590 foreach ( $wgContLang->getFormattedNamespaces() as $ns => $title ) { 00591 $spaces .= ' ' . 00592 Xml::element( 'namespace', 00593 array( 00594 'key' => $ns, 00595 'case' => MWNamespace::isCapitalized( $ns ) ? 'first-letter' : 'case-sensitive', 00596 ), $title ) . "\n"; 00597 } 00598 $spaces .= " </namespaces>"; 00599 return $spaces; 00600 } 00601 00608 function closeStream() { 00609 return "</mediawiki>\n"; 00610 } 00611 00619 public function openPage( $row ) { 00620 $out = " <page>\n"; 00621 $title = Title::makeTitle( $row->page_namespace, $row->page_title ); 00622 $out .= ' ' . Xml::elementClean( 'title', array(), self::canonicalTitle( $title ) ) . "\n"; 00623 $out .= ' ' . Xml::element( 'ns', array(), strval( $row->page_namespace ) ) . "\n"; 00624 $out .= ' ' . Xml::element( 'id', array(), strval( $row->page_id ) ) . "\n"; 00625 if ( $row->page_is_redirect ) { 00626 $page = WikiPage::factory( $title ); 00627 $redirect = $page->getRedirectTarget(); 00628 if ( $redirect instanceof Title && $redirect->isValidRedirectTarget() ) { 00629 $out .= ' '; 00630 $out .= Xml::element( 'redirect', array( 'title' => self::canonicalTitle( $redirect ) ) ); 00631 $out .= "\n"; 00632 } 00633 } 00634 00635 if ( $row->page_restrictions != '' ) { 00636 $out .= ' ' . Xml::element( 'restrictions', array(), 00637 strval( $row->page_restrictions ) ) . "\n"; 00638 } 00639 00640 wfRunHooks( 'XmlDumpWriterOpenPage', array( $this, &$out, $row, $title ) ); 00641 00642 return $out; 00643 } 00644 00651 function closePage() { 00652 return " </page>\n"; 00653 } 00654 00663 function writeRevision( $row ) { 00664 wfProfileIn( __METHOD__ ); 00665 00666 $out = " <revision>\n"; 00667 $out .= " " . Xml::element( 'id', null, strval( $row->rev_id ) ) . "\n"; 00668 if ( isset( $row->rev_parent_id ) && $row->rev_parent_id ) { 00669 $out .= " " . Xml::element( 'parentid', null, strval( $row->rev_parent_id ) ) . "\n"; 00670 } 00671 00672 $out .= $this->writeTimestamp( $row->rev_timestamp ); 00673 00674 if ( isset( $row->rev_deleted ) && ( $row->rev_deleted & Revision::DELETED_USER ) ) { 00675 $out .= " " . Xml::element( 'contributor', array( 'deleted' => 'deleted' ) ) . "\n"; 00676 } else { 00677 $out .= $this->writeContributor( $row->rev_user, $row->rev_user_text ); 00678 } 00679 00680 if ( isset( $row->rev_minor_edit ) && $row->rev_minor_edit ) { 00681 $out .= " <minor/>\n"; 00682 } 00683 if ( isset( $row->rev_deleted ) && ( $row->rev_deleted & Revision::DELETED_COMMENT ) ) { 00684 $out .= " " . Xml::element( 'comment', array( 'deleted' => 'deleted' ) ) . "\n"; 00685 } elseif ( $row->rev_comment != '' ) { 00686 $out .= " " . Xml::elementClean( 'comment', array(), strval( $row->rev_comment ) ) . "\n"; 00687 } 00688 00689 if ( isset( $row->rev_content_model ) && !is_null( $row->rev_content_model ) ) { 00690 $content_model = strval( $row->rev_content_model ); 00691 } else { 00692 // probably using $wgContentHandlerUseDB = false; 00693 $title = Title::makeTitle( $row->page_namespace, $row->page_title ); 00694 $content_model = ContentHandler::getDefaultModelFor( $title ); 00695 } 00696 00697 $content_handler = ContentHandler::getForModelID( $content_model ); 00698 00699 if ( isset( $row->rev_content_format ) && !is_null( $row->rev_content_format ) ) { 00700 $content_format = strval( $row->rev_content_format ); 00701 } else { 00702 // probably using $wgContentHandlerUseDB = false; 00703 $content_format = $content_handler->getDefaultFormat(); 00704 } 00705 00706 $text = ''; 00707 if ( isset( $row->rev_deleted ) && ( $row->rev_deleted & Revision::DELETED_TEXT ) ) { 00708 $out .= " " . Xml::element( 'text', array( 'deleted' => 'deleted' ) ) . "\n"; 00709 } elseif ( isset( $row->old_text ) ) { 00710 // Raw text from the database may have invalid chars 00711 $text = strval( Revision::getRevisionText( $row ) ); 00712 $text = $content_handler->exportTransform( $text, $content_format ); 00713 $out .= " " . Xml::elementClean( 'text', 00714 array( 'xml:space' => 'preserve', 'bytes' => intval( $row->rev_len ) ), 00715 strval( $text ) ) . "\n"; 00716 } else { 00717 // Stub output 00718 $out .= " " . Xml::element( 'text', 00719 array( 'id' => $row->rev_text_id, 'bytes' => intval( $row->rev_len ) ), 00720 "" ) . "\n"; 00721 } 00722 00723 if ( isset( $row->rev_sha1 ) 00724 && $row->rev_sha1 00725 && !( $row->rev_deleted & Revision::DELETED_TEXT ) 00726 ) { 00727 $out .= " " . Xml::element( 'sha1', null, strval( $row->rev_sha1 ) ) . "\n"; 00728 } else { 00729 $out .= " <sha1/>\n"; 00730 } 00731 00732 $out .= " " . Xml::element( 'model', null, strval( $content_model ) ) . "\n"; 00733 $out .= " " . Xml::element( 'format', null, strval( $content_format ) ) . "\n"; 00734 00735 wfRunHooks( 'XmlDumpWriterWriteRevision', array( &$this, &$out, $row, $text ) ); 00736 00737 $out .= " </revision>\n"; 00738 00739 wfProfileOut( __METHOD__ ); 00740 return $out; 00741 } 00742 00751 function writeLogItem( $row ) { 00752 wfProfileIn( __METHOD__ ); 00753 00754 $out = " <logitem>\n"; 00755 $out .= " " . Xml::element( 'id', null, strval( $row->log_id ) ) . "\n"; 00756 00757 $out .= $this->writeTimestamp( $row->log_timestamp, " " ); 00758 00759 if ( $row->log_deleted & LogPage::DELETED_USER ) { 00760 $out .= " " . Xml::element( 'contributor', array( 'deleted' => 'deleted' ) ) . "\n"; 00761 } else { 00762 $out .= $this->writeContributor( $row->log_user, $row->user_name, " " ); 00763 } 00764 00765 if ( $row->log_deleted & LogPage::DELETED_COMMENT ) { 00766 $out .= " " . Xml::element( 'comment', array( 'deleted' => 'deleted' ) ) . "\n"; 00767 } elseif ( $row->log_comment != '' ) { 00768 $out .= " " . Xml::elementClean( 'comment', null, strval( $row->log_comment ) ) . "\n"; 00769 } 00770 00771 $out .= " " . Xml::element( 'type', null, strval( $row->log_type ) ) . "\n"; 00772 $out .= " " . Xml::element( 'action', null, strval( $row->log_action ) ) . "\n"; 00773 00774 if ( $row->log_deleted & LogPage::DELETED_ACTION ) { 00775 $out .= " " . Xml::element( 'text', array( 'deleted' => 'deleted' ) ) . "\n"; 00776 } else { 00777 $title = Title::makeTitle( $row->log_namespace, $row->log_title ); 00778 $out .= " " . Xml::elementClean( 'logtitle', null, self::canonicalTitle( $title ) ) . "\n"; 00779 $out .= " " . Xml::elementClean( 'params', 00780 array( 'xml:space' => 'preserve' ), 00781 strval( $row->log_params ) ) . "\n"; 00782 } 00783 00784 $out .= " </logitem>\n"; 00785 00786 wfProfileOut( __METHOD__ ); 00787 return $out; 00788 } 00789 00795 function writeTimestamp( $timestamp, $indent = " " ) { 00796 $ts = wfTimestamp( TS_ISO_8601, $timestamp ); 00797 return $indent . Xml::element( 'timestamp', null, $ts ) . "\n"; 00798 } 00799 00806 function writeContributor( $id, $text, $indent = " " ) { 00807 $out = $indent . "<contributor>\n"; 00808 if ( $id || !IP::isValid( $text ) ) { 00809 $out .= $indent . " " . Xml::elementClean( 'username', null, strval( $text ) ) . "\n"; 00810 $out .= $indent . " " . Xml::element( 'id', null, strval( $id ) ) . "\n"; 00811 } else { 00812 $out .= $indent . " " . Xml::elementClean( 'ip', null, strval( $text ) ) . "\n"; 00813 } 00814 $out .= $indent . "</contributor>\n"; 00815 return $out; 00816 } 00817 00824 function writeUploads( $row, $dumpContents = false ) { 00825 if ( $row->page_namespace == NS_FILE ) { 00826 $img = wfLocalFile( $row->page_title ); 00827 if ( $img && $img->exists() ) { 00828 $out = ''; 00829 foreach ( array_reverse( $img->getHistory() ) as $ver ) { 00830 $out .= $this->writeUpload( $ver, $dumpContents ); 00831 } 00832 $out .= $this->writeUpload( $img, $dumpContents ); 00833 return $out; 00834 } 00835 } 00836 return ''; 00837 } 00838 00844 function writeUpload( $file, $dumpContents = false ) { 00845 if ( $file->isOld() ) { 00846 $archiveName = " " . 00847 Xml::element( 'archivename', null, $file->getArchiveName() ) . "\n"; 00848 } else { 00849 $archiveName = ''; 00850 } 00851 if ( $dumpContents ) { 00852 $be = $file->getRepo()->getBackend(); 00853 # Dump file as base64 00854 # Uses only XML-safe characters, so does not need escaping 00855 # @todo Too bad this loads the contents into memory (script might swap) 00856 $contents = ' <contents encoding="base64">' . 00857 chunk_split( base64_encode( 00858 $be->getFileContents( array( 'src' => $file->getPath() ) ) ) ) . 00859 " </contents>\n"; 00860 } else { 00861 $contents = ''; 00862 } 00863 if ( $file->isDeleted( File::DELETED_COMMENT ) ) { 00864 $comment = Xml::element( 'comment', array( 'deleted' => 'deleted' ) ); 00865 } else { 00866 $comment = Xml::elementClean( 'comment', null, $file->getDescription() ); 00867 } 00868 return " <upload>\n" . 00869 $this->writeTimestamp( $file->getTimestamp() ) . 00870 $this->writeContributor( $file->getUser( 'id' ), $file->getUser( 'text' ) ) . 00871 " " . $comment . "\n" . 00872 " " . Xml::element( 'filename', null, $file->getName() ) . "\n" . 00873 $archiveName . 00874 " " . Xml::element( 'src', null, $file->getCanonicalURL() ) . "\n" . 00875 " " . Xml::element( 'size', null, $file->getSize() ) . "\n" . 00876 " " . Xml::element( 'sha1base36', null, $file->getSha1() ) . "\n" . 00877 " " . Xml::element( 'rel', null, $file->getRel() ) . "\n" . 00878 $contents . 00879 " </upload>\n"; 00880 } 00881 00892 public static function canonicalTitle( Title $title ) { 00893 if ( $title->isExternal() ) { 00894 return $title->getPrefixedText(); 00895 } 00896 00897 global $wgContLang; 00898 $prefix = str_replace( '_', ' ', $wgContLang->getNsText( $title->getNamespace() ) ); 00899 00900 if ( $prefix !== '' ) { 00901 $prefix .= ':'; 00902 } 00903 00904 return $prefix . $title->getText(); 00905 } 00906 } 00907 00912 class DumpOutput { 00913 00917 function writeOpenStream( $string ) { 00918 $this->write( $string ); 00919 } 00920 00924 function writeCloseStream( $string ) { 00925 $this->write( $string ); 00926 } 00927 00932 function writeOpenPage( $page, $string ) { 00933 $this->write( $string ); 00934 } 00935 00939 function writeClosePage( $string ) { 00940 $this->write( $string ); 00941 } 00942 00947 function writeRevision( $rev, $string ) { 00948 $this->write( $string ); 00949 } 00950 00955 function writeLogItem( $rev, $string ) { 00956 $this->write( $string ); 00957 } 00958 00964 function write( $string ) { 00965 print $string; 00966 } 00967 00975 function closeRenameAndReopen( $newname ) { 00976 } 00977 00986 function closeAndRename( $newname, $open = false ) { 00987 } 00988 00994 function getFilenames() { 00995 return null; 00996 } 00997 } 00998 01003 class DumpFileOutput extends DumpOutput { 01004 protected $handle = false, $filename; 01005 01009 function __construct( $file ) { 01010 $this->handle = fopen( $file, "wt" ); 01011 $this->filename = $file; 01012 } 01013 01017 function writeCloseStream( $string ) { 01018 parent::writeCloseStream( $string ); 01019 if ( $this->handle ) { 01020 fclose( $this->handle ); 01021 $this->handle = false; 01022 } 01023 } 01024 01028 function write( $string ) { 01029 fputs( $this->handle, $string ); 01030 } 01031 01035 function closeRenameAndReopen( $newname ) { 01036 $this->closeAndRename( $newname, true ); 01037 } 01038 01043 function renameOrException( $newname ) { 01044 if ( !rename( $this->filename, $newname ) ) { 01045 throw new MWException( __METHOD__ . ": rename of file {$this->filename} to $newname failed\n" ); 01046 } 01047 } 01048 01054 function checkRenameArgCount( $newname ) { 01055 if ( is_array( $newname ) ) { 01056 if ( count( $newname ) > 1 ) { 01057 throw new MWException( __METHOD__ . ": passed multiple arguments for rename of single file\n" ); 01058 } else { 01059 $newname = $newname[0]; 01060 } 01061 } 01062 return $newname; 01063 } 01064 01069 function closeAndRename( $newname, $open = false ) { 01070 $newname = $this->checkRenameArgCount( $newname ); 01071 if ( $newname ) { 01072 if ( $this->handle ) { 01073 fclose( $this->handle ); 01074 $this->handle = false; 01075 } 01076 $this->renameOrException( $newname ); 01077 if ( $open ) { 01078 $this->handle = fopen( $this->filename, "wt" ); 01079 } 01080 } 01081 } 01082 01086 function getFilenames() { 01087 return $this->filename; 01088 } 01089 } 01090 01097 class DumpPipeOutput extends DumpFileOutput { 01098 protected $command, $filename; 01099 protected $procOpenResource = false; 01100 01105 function __construct( $command, $file = null ) { 01106 if ( !is_null( $file ) ) { 01107 $command .= " > " . wfEscapeShellArg( $file ); 01108 } 01109 01110 $this->startCommand( $command ); 01111 $this->command = $command; 01112 $this->filename = $file; 01113 } 01114 01118 function writeCloseStream( $string ) { 01119 parent::writeCloseStream( $string ); 01120 if ( $this->procOpenResource ) { 01121 proc_close( $this->procOpenResource ); 01122 $this->procOpenResource = false; 01123 } 01124 } 01125 01129 function startCommand( $command ) { 01130 $spec = array( 01131 0 => array( "pipe", "r" ), 01132 ); 01133 $pipes = array(); 01134 $this->procOpenResource = proc_open( $command, $spec, $pipes ); 01135 $this->handle = $pipes[0]; 01136 } 01137 01141 function closeRenameAndReopen( $newname ) { 01142 $this->closeAndRename( $newname, true ); 01143 } 01144 01149 function closeAndRename( $newname, $open = false ) { 01150 $newname = $this->checkRenameArgCount( $newname ); 01151 if ( $newname ) { 01152 if ( $this->handle ) { 01153 fclose( $this->handle ); 01154 $this->handle = false; 01155 } 01156 if ( $this->procOpenResource ) { 01157 proc_close( $this->procOpenResource ); 01158 $this->procOpenResource = false; 01159 } 01160 $this->renameOrException( $newname ); 01161 if ( $open ) { 01162 $command = $this->command; 01163 $command .= " > " . wfEscapeShellArg( $this->filename ); 01164 $this->startCommand( $command ); 01165 } 01166 } 01167 } 01168 } 01169 01174 class DumpGZipOutput extends DumpPipeOutput { 01178 function __construct( $file ) { 01179 parent::__construct( "gzip", $file ); 01180 } 01181 } 01182 01187 class DumpBZip2Output extends DumpPipeOutput { 01191 function __construct( $file ) { 01192 parent::__construct( "bzip2", $file ); 01193 } 01194 } 01195 01200 class Dump7ZipOutput extends DumpPipeOutput { 01204 function __construct( $file ) { 01205 $command = $this->setup7zCommand( $file ); 01206 parent::__construct( $command ); 01207 $this->filename = $file; 01208 } 01209 01214 function setup7zCommand( $file ) { 01215 $command = "7za a -bd -si " . wfEscapeShellArg( $file ); 01216 // Suppress annoying useless crap from p7zip 01217 // Unfortunately this could suppress real error messages too 01218 $command .= ' >' . wfGetNull() . ' 2>&1'; 01219 return $command; 01220 } 01221 01226 function closeAndRename( $newname, $open = false ) { 01227 $newname = $this->checkRenameArgCount( $newname ); 01228 if ( $newname ) { 01229 fclose( $this->handle ); 01230 proc_close( $this->procOpenResource ); 01231 $this->renameOrException( $newname ); 01232 if ( $open ) { 01233 $command = $this->setup7zCommand( $this->filename ); 01234 $this->startCommand( $command ); 01235 } 01236 } 01237 } 01238 } 01239 01246 class DumpFilter { 01252 public $sink; 01253 01257 protected $sendingThisPage; 01258 01262 function __construct( &$sink ) { 01263 $this->sink =& $sink; 01264 } 01265 01269 function writeOpenStream( $string ) { 01270 $this->sink->writeOpenStream( $string ); 01271 } 01272 01276 function writeCloseStream( $string ) { 01277 $this->sink->writeCloseStream( $string ); 01278 } 01279 01284 function writeOpenPage( $page, $string ) { 01285 $this->sendingThisPage = $this->pass( $page, $string ); 01286 if ( $this->sendingThisPage ) { 01287 $this->sink->writeOpenPage( $page, $string ); 01288 } 01289 } 01290 01294 function writeClosePage( $string ) { 01295 if ( $this->sendingThisPage ) { 01296 $this->sink->writeClosePage( $string ); 01297 $this->sendingThisPage = false; 01298 } 01299 } 01300 01305 function writeRevision( $rev, $string ) { 01306 if ( $this->sendingThisPage ) { 01307 $this->sink->writeRevision( $rev, $string ); 01308 } 01309 } 01310 01315 function writeLogItem( $rev, $string ) { 01316 $this->sink->writeRevision( $rev, $string ); 01317 } 01318 01322 function closeRenameAndReopen( $newname ) { 01323 $this->sink->closeRenameAndReopen( $newname ); 01324 } 01325 01330 function closeAndRename( $newname, $open = false ) { 01331 $this->sink->closeAndRename( $newname, $open ); 01332 } 01333 01337 function getFilenames() { 01338 return $this->sink->getFilenames(); 01339 } 01340 01346 function pass( $page ) { 01347 return true; 01348 } 01349 } 01350 01355 class DumpNotalkFilter extends DumpFilter { 01360 function pass( $page ) { 01361 return !MWNamespace::isTalk( $page->page_namespace ); 01362 } 01363 } 01364 01369 class DumpNamespaceFilter extends DumpFilter { 01371 public $invert = false; 01372 01374 public $namespaces = array(); 01375 01381 function __construct( &$sink, $param ) { 01382 parent::__construct( $sink ); 01383 01384 $constants = array( 01385 "NS_MAIN" => NS_MAIN, 01386 "NS_TALK" => NS_TALK, 01387 "NS_USER" => NS_USER, 01388 "NS_USER_TALK" => NS_USER_TALK, 01389 "NS_PROJECT" => NS_PROJECT, 01390 "NS_PROJECT_TALK" => NS_PROJECT_TALK, 01391 "NS_FILE" => NS_FILE, 01392 "NS_FILE_TALK" => NS_FILE_TALK, 01393 "NS_IMAGE" => NS_IMAGE, // NS_IMAGE is an alias for NS_FILE 01394 "NS_IMAGE_TALK" => NS_IMAGE_TALK, 01395 "NS_MEDIAWIKI" => NS_MEDIAWIKI, 01396 "NS_MEDIAWIKI_TALK" => NS_MEDIAWIKI_TALK, 01397 "NS_TEMPLATE" => NS_TEMPLATE, 01398 "NS_TEMPLATE_TALK" => NS_TEMPLATE_TALK, 01399 "NS_HELP" => NS_HELP, 01400 "NS_HELP_TALK" => NS_HELP_TALK, 01401 "NS_CATEGORY" => NS_CATEGORY, 01402 "NS_CATEGORY_TALK" => NS_CATEGORY_TALK ); 01403 01404 if ( $param { 0 } == '!' ) { 01405 $this->invert = true; 01406 $param = substr( $param, 1 ); 01407 } 01408 01409 foreach ( explode( ',', $param ) as $key ) { 01410 $key = trim( $key ); 01411 if ( isset( $constants[$key] ) ) { 01412 $ns = $constants[$key]; 01413 $this->namespaces[$ns] = true; 01414 } elseif ( is_numeric( $key ) ) { 01415 $ns = intval( $key ); 01416 $this->namespaces[$ns] = true; 01417 } else { 01418 throw new MWException( "Unrecognized namespace key '$key'\n" ); 01419 } 01420 } 01421 } 01422 01427 function pass( $page ) { 01428 $match = isset( $this->namespaces[$page->page_namespace] ); 01429 return $this->invert xor $match; 01430 } 01431 } 01432 01437 class DumpLatestFilter extends DumpFilter { 01438 public $page; 01439 01440 public $pageString; 01441 01442 public $rev; 01443 01444 public $revString; 01445 01450 function writeOpenPage( $page, $string ) { 01451 $this->page = $page; 01452 $this->pageString = $string; 01453 } 01454 01458 function writeClosePage( $string ) { 01459 if ( $this->rev ) { 01460 $this->sink->writeOpenPage( $this->page, $this->pageString ); 01461 $this->sink->writeRevision( $this->rev, $this->revString ); 01462 $this->sink->writeClosePage( $string ); 01463 } 01464 $this->rev = null; 01465 $this->revString = null; 01466 $this->page = null; 01467 $this->pageString = null; 01468 } 01469 01474 function writeRevision( $rev, $string ) { 01475 if ( $rev->rev_id == $this->page->page_latest ) { 01476 $this->rev = $rev; 01477 $this->revString = $string; 01478 } 01479 } 01480 } 01481 01486 class DumpMultiWriter { 01487 01491 function __construct( $sinks ) { 01492 $this->sinks = $sinks; 01493 $this->count = count( $sinks ); 01494 } 01495 01499 function writeOpenStream( $string ) { 01500 for ( $i = 0; $i < $this->count; $i++ ) { 01501 $this->sinks[$i]->writeOpenStream( $string ); 01502 } 01503 } 01504 01508 function writeCloseStream( $string ) { 01509 for ( $i = 0; $i < $this->count; $i++ ) { 01510 $this->sinks[$i]->writeCloseStream( $string ); 01511 } 01512 } 01513 01518 function writeOpenPage( $page, $string ) { 01519 for ( $i = 0; $i < $this->count; $i++ ) { 01520 $this->sinks[$i]->writeOpenPage( $page, $string ); 01521 } 01522 } 01523 01527 function writeClosePage( $string ) { 01528 for ( $i = 0; $i < $this->count; $i++ ) { 01529 $this->sinks[$i]->writeClosePage( $string ); 01530 } 01531 } 01532 01537 function writeRevision( $rev, $string ) { 01538 for ( $i = 0; $i < $this->count; $i++ ) { 01539 $this->sinks[$i]->writeRevision( $rev, $string ); 01540 } 01541 } 01542 01546 function closeRenameAndReopen( $newnames ) { 01547 $this->closeAndRename( $newnames, true ); 01548 } 01549 01554 function closeAndRename( $newnames, $open = false ) { 01555 for ( $i = 0; $i < $this->count; $i++ ) { 01556 $this->sinks[$i]->closeAndRename( $newnames[$i], $open ); 01557 } 01558 } 01559 01563 function getFilenames() { 01564 $filenames = array(); 01565 for ( $i = 0; $i < $this->count; $i++ ) { 01566 $filenames[] = $this->sinks[$i]->getFilenames(); 01567 } 01568 return $filenames; 01569 } 01570 }