MediaWiki
REL1_22
|
00001 <?php 00033 class WikiExporter { 00034 var $list_authors = false; # Return distinct author list (when not returning full history) 00035 var $author_list = ""; 00036 00037 var $dumpUploads = false; 00038 var $dumpUploadFileContents = false; 00039 00040 const FULL = 1; 00041 const CURRENT = 2; 00042 const STABLE = 4; // extension defined 00043 const LOGS = 8; 00044 const RANGE = 16; 00045 00046 const BUFFER = 0; 00047 const STREAM = 1; 00048 00049 const TEXT = 0; 00050 const STUB = 1; 00051 00052 var $buffer; 00053 00054 var $text; 00055 00059 var $sink; 00060 00065 public static function schemaVersion() { 00066 return "0.8"; 00067 } 00068 00086 function __construct( $db, $history = WikiExporter::CURRENT, 00087 $buffer = WikiExporter::BUFFER, $text = WikiExporter::TEXT ) { 00088 $this->db = $db; 00089 $this->history = $history; 00090 $this->buffer = $buffer; 00091 $this->writer = new XmlDumpWriter(); 00092 $this->sink = new DumpOutput(); 00093 $this->text = $text; 00094 } 00095 00103 public function setOutputSink( &$sink ) { 00104 $this->sink =& $sink; 00105 } 00106 00107 public function openStream() { 00108 $output = $this->writer->openStream(); 00109 $this->sink->writeOpenStream( $output ); 00110 } 00111 00112 public function closeStream() { 00113 $output = $this->writer->closeStream(); 00114 $this->sink->writeCloseStream( $output ); 00115 } 00116 00122 public function allPages() { 00123 $this->dumpFrom( '' ); 00124 } 00125 00133 public function pagesByRange( $start, $end ) { 00134 $condition = 'page_id >= ' . intval( $start ); 00135 if ( $end ) { 00136 $condition .= ' AND page_id < ' . intval( $end ); 00137 } 00138 $this->dumpFrom( $condition ); 00139 } 00140 00148 public function revsByRange( $start, $end ) { 00149 $condition = 'rev_id >= ' . intval( $start ); 00150 if ( $end ) { 00151 $condition .= ' AND rev_id < ' . intval( $end ); 00152 } 00153 $this->dumpFrom( $condition ); 00154 } 00155 00159 public function pageByTitle( $title ) { 00160 $this->dumpFrom( 00161 'page_namespace=' . $title->getNamespace() . 00162 ' AND page_title=' . $this->db->addQuotes( $title->getDBkey() ) ); 00163 } 00164 00169 public function pageByName( $name ) { 00170 $title = Title::newFromText( $name ); 00171 if ( is_null( $title ) ) { 00172 throw new MWException( "Can't export invalid title" ); 00173 } else { 00174 $this->pageByTitle( $title ); 00175 } 00176 } 00177 00181 public function pagesByName( $names ) { 00182 foreach ( $names as $name ) { 00183 $this->pageByName( $name ); 00184 } 00185 } 00186 00187 public function allLogs() { 00188 $this->dumpFrom( '' ); 00189 } 00190 00195 public function logsByRange( $start, $end ) { 00196 $condition = 'log_id >= ' . intval( $start ); 00197 if ( $end ) { 00198 $condition .= ' AND log_id < ' . intval( $end ); 00199 } 00200 $this->dumpFrom( $condition ); 00201 } 00202 00210 protected function do_list_authors( $cond ) { 00211 wfProfileIn( __METHOD__ ); 00212 $this->author_list = "<contributors>"; 00213 // rev_deleted 00214 00215 $res = $this->db->select( 00216 array( 'page', 'revision' ), 00217 array( 'DISTINCT rev_user_text', 'rev_user' ), 00218 array( 00219 $this->db->bitAnd( 'rev_deleted', Revision::DELETED_USER ) . ' = 0', 00220 $cond, 00221 'page_id = rev_id', 00222 ), 00223 __METHOD__ 00224 ); 00225 00226 foreach ( $res as $row ) { 00227 $this->author_list .= "<contributor>" . 00228 "<username>" . 00229 htmlentities( $row->rev_user_text ) . 00230 "</username>" . 00231 "<id>" . 00232 $row->rev_user . 00233 "</id>" . 00234 "</contributor>"; 00235 } 00236 $this->author_list .= "</contributors>"; 00237 wfProfileOut( __METHOD__ ); 00238 } 00239 00245 protected function dumpFrom( $cond = '' ) { 00246 wfProfileIn( __METHOD__ ); 00247 # For logging dumps... 00248 if ( $this->history & self::LOGS ) { 00249 $where = array( 'user_id = log_user' ); 00250 # Hide private logs 00251 $hideLogs = LogEventsList::getExcludeClause( $this->db ); 00252 if ( $hideLogs ) { 00253 $where[] = $hideLogs; 00254 } 00255 # Add on any caller specified conditions 00256 if ( $cond ) { 00257 $where[] = $cond; 00258 } 00259 # Get logging table name for logging.* clause 00260 $logging = $this->db->tableName( 'logging' ); 00261 00262 if ( $this->buffer == WikiExporter::STREAM ) { 00263 $prev = $this->db->bufferResults( false ); 00264 } 00265 $wrapper = null; // Assuring $wrapper is not undefined, if exception occurs early 00266 try { 00267 $result = $this->db->select( array( 'logging', 'user' ), 00268 array( "{$logging}.*", 'user_name' ), // grab the user name 00269 $where, 00270 __METHOD__, 00271 array( 'ORDER BY' => 'log_id', 'USE INDEX' => array( 'logging' => 'PRIMARY' ) ) 00272 ); 00273 $wrapper = $this->db->resultObject( $result ); 00274 $this->outputLogStream( $wrapper ); 00275 if ( $this->buffer == WikiExporter::STREAM ) { 00276 $this->db->bufferResults( $prev ); 00277 } 00278 } catch ( Exception $e ) { 00279 // Throwing the exception does not reliably free the resultset, and 00280 // would also leave the connection in unbuffered mode. 00281 00282 // Freeing result 00283 try { 00284 if ( $wrapper ) { 00285 $wrapper->free(); 00286 } 00287 } catch ( Exception $e2 ) { 00288 // Already in panic mode -> ignoring $e2 as $e has 00289 // higher priority 00290 } 00291 00292 // Putting database back in previous buffer mode 00293 try { 00294 if ( $this->buffer == WikiExporter::STREAM ) { 00295 $this->db->bufferResults( $prev ); 00296 } 00297 } catch ( Exception $e2 ) { 00298 // Already in panic mode -> ignoring $e2 as $e has 00299 // higher priority 00300 } 00301 00302 // Inform caller about problem 00303 wfProfileOut( __METHOD__ ); 00304 throw $e; 00305 } 00306 # For page dumps... 00307 } else { 00308 $tables = array( 'page', 'revision' ); 00309 $opts = array( 'ORDER BY' => 'page_id ASC' ); 00310 $opts['USE INDEX'] = array(); 00311 $join = array(); 00312 if ( is_array( $this->history ) ) { 00313 # Time offset/limit for all pages/history... 00314 $revJoin = 'page_id=rev_page'; 00315 # Set time order 00316 if ( $this->history['dir'] == 'asc' ) { 00317 $op = '>'; 00318 $opts['ORDER BY'] = 'rev_timestamp ASC'; 00319 } else { 00320 $op = '<'; 00321 $opts['ORDER BY'] = 'rev_timestamp DESC'; 00322 } 00323 # Set offset 00324 if ( !empty( $this->history['offset'] ) ) { 00325 $revJoin .= " AND rev_timestamp $op " . 00326 $this->db->addQuotes( $this->db->timestamp( $this->history['offset'] ) ); 00327 } 00328 $join['revision'] = array( 'INNER JOIN', $revJoin ); 00329 # Set query limit 00330 if ( !empty( $this->history['limit'] ) ) { 00331 $opts['LIMIT'] = intval( $this->history['limit'] ); 00332 } 00333 } elseif ( $this->history & WikiExporter::FULL ) { 00334 # Full history dumps... 00335 $join['revision'] = array( 'INNER JOIN', 'page_id=rev_page' ); 00336 } elseif ( $this->history & WikiExporter::CURRENT ) { 00337 # Latest revision dumps... 00338 if ( $this->list_authors && $cond != '' ) { // List authors, if so desired 00339 $this->do_list_authors( $cond ); 00340 } 00341 $join['revision'] = array( 'INNER JOIN', 'page_id=rev_page AND page_latest=rev_id' ); 00342 } elseif ( $this->history & WikiExporter::STABLE ) { 00343 # "Stable" revision dumps... 00344 # Default JOIN, to be overridden... 00345 $join['revision'] = array( 'INNER JOIN', 'page_id=rev_page AND page_latest=rev_id' ); 00346 # One, and only one hook should set this, and return false 00347 if ( wfRunHooks( 'WikiExporter::dumpStableQuery', array( &$tables, &$opts, &$join ) ) ) { 00348 wfProfileOut( __METHOD__ ); 00349 throw new MWException( __METHOD__ . " given invalid history dump type." ); 00350 } 00351 } elseif ( $this->history & WikiExporter::RANGE ) { 00352 # Dump of revisions within a specified range 00353 $join['revision'] = array( 'INNER JOIN', 'page_id=rev_page' ); 00354 $opts['ORDER BY'] = array( 'rev_page ASC', 'rev_id ASC' ); 00355 } else { 00356 # Unknown history specification parameter? 00357 wfProfileOut( __METHOD__ ); 00358 throw new MWException( __METHOD__ . " given invalid history dump type." ); 00359 } 00360 # Query optimization hacks 00361 if ( $cond == '' ) { 00362 $opts[] = 'STRAIGHT_JOIN'; 00363 $opts['USE INDEX']['page'] = 'PRIMARY'; 00364 } 00365 # Build text join options 00366 if ( $this->text != WikiExporter::STUB ) { // 1-pass 00367 $tables[] = 'text'; 00368 $join['text'] = array( 'INNER JOIN', 'rev_text_id=old_id' ); 00369 } 00370 00371 if ( $this->buffer == WikiExporter::STREAM ) { 00372 $prev = $this->db->bufferResults( false ); 00373 } 00374 00375 $wrapper = null; // Assuring $wrapper is not undefined, if exception occurs early 00376 try { 00377 wfRunHooks( 'ModifyExportQuery', 00378 array( $this->db, &$tables, &$cond, &$opts, &$join ) ); 00379 00380 # Do the query! 00381 $result = $this->db->select( $tables, '*', $cond, __METHOD__, $opts, $join ); 00382 $wrapper = $this->db->resultObject( $result ); 00383 # Output dump results 00384 $this->outputPageStream( $wrapper ); 00385 00386 if ( $this->buffer == WikiExporter::STREAM ) { 00387 $this->db->bufferResults( $prev ); 00388 } 00389 } catch ( Exception $e ) { 00390 // Throwing the exception does not reliably free the resultset, and 00391 // would also leave the connection in unbuffered mode. 00392 00393 // Freeing result 00394 try { 00395 if ( $wrapper ) { 00396 $wrapper->free(); 00397 } 00398 } catch ( Exception $e2 ) { 00399 // Already in panic mode -> ignoring $e2 as $e has 00400 // higher priority 00401 } 00402 00403 // Putting database back in previous buffer mode 00404 try { 00405 if ( $this->buffer == WikiExporter::STREAM ) { 00406 $this->db->bufferResults( $prev ); 00407 } 00408 } catch ( Exception $e2 ) { 00409 // Already in panic mode -> ignoring $e2 as $e has 00410 // higher priority 00411 } 00412 00413 // Inform caller about problem 00414 throw $e; 00415 } 00416 } 00417 wfProfileOut( __METHOD__ ); 00418 } 00419 00432 protected function outputPageStream( $resultset ) { 00433 $last = null; 00434 foreach ( $resultset as $row ) { 00435 if ( $last === null || 00436 $last->page_namespace != $row->page_namespace || 00437 $last->page_title != $row->page_title ) { 00438 if ( $last !== null ) { 00439 $output = ''; 00440 if ( $this->dumpUploads ) { 00441 $output .= $this->writer->writeUploads( $last, $this->dumpUploadFileContents ); 00442 } 00443 $output .= $this->writer->closePage(); 00444 $this->sink->writeClosePage( $output ); 00445 } 00446 $output = $this->writer->openPage( $row ); 00447 $this->sink->writeOpenPage( $row, $output ); 00448 $last = $row; 00449 } 00450 $output = $this->writer->writeRevision( $row ); 00451 $this->sink->writeRevision( $row, $output ); 00452 } 00453 if ( $last !== null ) { 00454 $output = ''; 00455 if ( $this->dumpUploads ) { 00456 $output .= $this->writer->writeUploads( $last, $this->dumpUploadFileContents ); 00457 } 00458 $output .= $this->author_list; 00459 $output .= $this->writer->closePage(); 00460 $this->sink->writeClosePage( $output ); 00461 } 00462 } 00463 00467 protected function outputLogStream( $resultset ) { 00468 foreach ( $resultset as $row ) { 00469 $output = $this->writer->writeLogItem( $row ); 00470 $this->sink->writeLogItem( $row, $output ); 00471 } 00472 } 00473 } 00474 00478 class XmlDumpWriter { 00484 function schemaVersion() { 00485 wfDeprecated( __METHOD__, '1.20' ); 00486 return WikiExporter::schemaVersion(); 00487 } 00488 00499 function openStream() { 00500 global $wgLanguageCode; 00501 $ver = WikiExporter::schemaVersion(); 00502 return Xml::element( 'mediawiki', array( 00503 'xmlns' => "http://www.mediawiki.org/xml/export-$ver/", 00504 'xmlns:xsi' => "http://www.w3.org/2001/XMLSchema-instance", 00505 'xsi:schemaLocation' => "http://www.mediawiki.org/xml/export-$ver/ " . 00506 "http://www.mediawiki.org/xml/export-$ver.xsd", #TODO: how do we get a new version up there? 00507 'version' => $ver, 00508 'xml:lang' => $wgLanguageCode ), 00509 null ) . 00510 "\n" . 00511 $this->siteInfo(); 00512 } 00513 00517 function siteInfo() { 00518 $info = array( 00519 $this->sitename(), 00520 $this->homelink(), 00521 $this->generator(), 00522 $this->caseSetting(), 00523 $this->namespaces() ); 00524 return " <siteinfo>\n " . 00525 implode( "\n ", $info ) . 00526 "\n </siteinfo>\n"; 00527 } 00528 00532 function sitename() { 00533 global $wgSitename; 00534 return Xml::element( 'sitename', array(), $wgSitename ); 00535 } 00536 00540 function generator() { 00541 global $wgVersion; 00542 return Xml::element( 'generator', array(), "MediaWiki $wgVersion" ); 00543 } 00544 00548 function homelink() { 00549 return Xml::element( 'base', array(), Title::newMainPage()->getCanonicalURL() ); 00550 } 00551 00555 function caseSetting() { 00556 global $wgCapitalLinks; 00557 // "case-insensitive" option is reserved for future 00558 $sensitivity = $wgCapitalLinks ? 'first-letter' : 'case-sensitive'; 00559 return Xml::element( 'case', array(), $sensitivity ); 00560 } 00561 00565 function namespaces() { 00566 global $wgContLang; 00567 $spaces = "<namespaces>\n"; 00568 foreach ( $wgContLang->getFormattedNamespaces() as $ns => $title ) { 00569 $spaces .= ' ' . 00570 Xml::element( 'namespace', 00571 array( 00572 'key' => $ns, 00573 'case' => MWNamespace::isCapitalized( $ns ) ? 'first-letter' : 'case-sensitive', 00574 ), $title ) . "\n"; 00575 } 00576 $spaces .= " </namespaces>"; 00577 return $spaces; 00578 } 00579 00586 function closeStream() { 00587 return "</mediawiki>\n"; 00588 } 00589 00598 function openPage( $row ) { 00599 $out = " <page>\n"; 00600 $title = Title::makeTitle( $row->page_namespace, $row->page_title ); 00601 $out .= ' ' . Xml::elementClean( 'title', array(), self::canonicalTitle( $title ) ) . "\n"; 00602 $out .= ' ' . Xml::element( 'ns', array(), strval( $row->page_namespace ) ) . "\n"; 00603 $out .= ' ' . Xml::element( 'id', array(), strval( $row->page_id ) ) . "\n"; 00604 if ( $row->page_is_redirect ) { 00605 $page = WikiPage::factory( $title ); 00606 $redirect = $page->getRedirectTarget(); 00607 if ( $redirect instanceOf Title && $redirect->isValidRedirectTarget() ) { 00608 $out .= ' ' . Xml::element( 'redirect', array( 'title' => self::canonicalTitle( $redirect ) ) ) . "\n"; 00609 } 00610 } 00611 00612 if ( $row->page_restrictions != '' ) { 00613 $out .= ' ' . Xml::element( 'restrictions', array(), 00614 strval( $row->page_restrictions ) ) . "\n"; 00615 } 00616 00617 wfRunHooks( 'XmlDumpWriterOpenPage', array( $this, &$out, $row, $title ) ); 00618 00619 return $out; 00620 } 00621 00628 function closePage() { 00629 return " </page>\n"; 00630 } 00631 00640 function writeRevision( $row ) { 00641 wfProfileIn( __METHOD__ ); 00642 00643 $out = " <revision>\n"; 00644 $out .= " " . Xml::element( 'id', null, strval( $row->rev_id ) ) . "\n"; 00645 if ( isset( $row->rev_parent_id ) && $row->rev_parent_id ) { 00646 $out .= " " . Xml::element( 'parentid', null, strval( $row->rev_parent_id ) ) . "\n"; 00647 } 00648 00649 $out .= $this->writeTimestamp( $row->rev_timestamp ); 00650 00651 if ( isset( $row->rev_deleted ) && ( $row->rev_deleted & Revision::DELETED_USER ) ) { 00652 $out .= " " . Xml::element( 'contributor', array( 'deleted' => 'deleted' ) ) . "\n"; 00653 } else { 00654 $out .= $this->writeContributor( $row->rev_user, $row->rev_user_text ); 00655 } 00656 00657 if ( isset( $row->rev_minor_edit ) && $row->rev_minor_edit ) { 00658 $out .= " <minor/>\n"; 00659 } 00660 if ( isset( $row->rev_deleted ) && ( $row->rev_deleted & Revision::DELETED_COMMENT ) ) { 00661 $out .= " " . Xml::element( 'comment', array( 'deleted' => 'deleted' ) ) . "\n"; 00662 } elseif ( $row->rev_comment != '' ) { 00663 $out .= " " . Xml::elementClean( 'comment', array(), strval( $row->rev_comment ) ) . "\n"; 00664 } 00665 00666 $text = ''; 00667 if ( isset( $row->rev_deleted ) && ( $row->rev_deleted & Revision::DELETED_TEXT ) ) { 00668 $out .= " " . Xml::element( 'text', array( 'deleted' => 'deleted' ) ) . "\n"; 00669 } elseif ( isset( $row->old_text ) ) { 00670 // Raw text from the database may have invalid chars 00671 $text = strval( Revision::getRevisionText( $row ) ); 00672 $out .= " " . Xml::elementClean( 'text', 00673 array( 'xml:space' => 'preserve', 'bytes' => intval( $row->rev_len ) ), 00674 strval( $text ) ) . "\n"; 00675 } else { 00676 // Stub output 00677 $out .= " " . Xml::element( 'text', 00678 array( 'id' => $row->rev_text_id, 'bytes' => intval( $row->rev_len ) ), 00679 "" ) . "\n"; 00680 } 00681 00682 if ( isset( $row->rev_sha1 ) && $row->rev_sha1 && !( $row->rev_deleted & Revision::DELETED_TEXT ) ) { 00683 $out .= " " . Xml::element( 'sha1', null, strval( $row->rev_sha1 ) ) . "\n"; 00684 } else { 00685 $out .= " <sha1/>\n"; 00686 } 00687 00688 if ( isset( $row->rev_content_model ) && !is_null( $row->rev_content_model ) ) { 00689 $content_model = strval( $row->rev_content_model ); 00690 } else { 00691 // probably using $wgContentHandlerUseDB = false; 00692 // @todo test! 00693 $title = Title::makeTitle( $row->page_namespace, $row->page_title ); 00694 $content_model = ContentHandler::getDefaultModelFor( $title ); 00695 } 00696 00697 $out .= " " . Xml::element( 'model', null, strval( $content_model ) ) . "\n"; 00698 00699 if ( isset( $row->rev_content_format ) && !is_null( $row->rev_content_format ) ) { 00700 $content_format = strval( $row->rev_content_format ); 00701 } else { 00702 // probably using $wgContentHandlerUseDB = false; 00703 // @todo test! 00704 $content_handler = ContentHandler::getForModelID( $content_model ); 00705 $content_format = $content_handler->getDefaultFormat(); 00706 } 00707 00708 $out .= " " . Xml::element( 'format', null, strval( $content_format ) ) . "\n"; 00709 00710 wfRunHooks( 'XmlDumpWriterWriteRevision', array( &$this, &$out, $row, $text ) ); 00711 00712 $out .= " </revision>\n"; 00713 00714 wfProfileOut( __METHOD__ ); 00715 return $out; 00716 } 00717 00726 function writeLogItem( $row ) { 00727 wfProfileIn( __METHOD__ ); 00728 00729 $out = " <logitem>\n"; 00730 $out .= " " . Xml::element( 'id', null, strval( $row->log_id ) ) . "\n"; 00731 00732 $out .= $this->writeTimestamp( $row->log_timestamp, " " ); 00733 00734 if ( $row->log_deleted & LogPage::DELETED_USER ) { 00735 $out .= " " . Xml::element( 'contributor', array( 'deleted' => 'deleted' ) ) . "\n"; 00736 } else { 00737 $out .= $this->writeContributor( $row->log_user, $row->user_name, " " ); 00738 } 00739 00740 if ( $row->log_deleted & LogPage::DELETED_COMMENT ) { 00741 $out .= " " . Xml::element( 'comment', array( 'deleted' => 'deleted' ) ) . "\n"; 00742 } elseif ( $row->log_comment != '' ) { 00743 $out .= " " . Xml::elementClean( 'comment', null, strval( $row->log_comment ) ) . "\n"; 00744 } 00745 00746 $out .= " " . Xml::element( 'type', null, strval( $row->log_type ) ) . "\n"; 00747 $out .= " " . Xml::element( 'action', null, strval( $row->log_action ) ) . "\n"; 00748 00749 if ( $row->log_deleted & LogPage::DELETED_ACTION ) { 00750 $out .= " " . Xml::element( 'text', array( 'deleted' => 'deleted' ) ) . "\n"; 00751 } else { 00752 $title = Title::makeTitle( $row->log_namespace, $row->log_title ); 00753 $out .= " " . Xml::elementClean( 'logtitle', null, self::canonicalTitle( $title ) ) . "\n"; 00754 $out .= " " . Xml::elementClean( 'params', 00755 array( 'xml:space' => 'preserve' ), 00756 strval( $row->log_params ) ) . "\n"; 00757 } 00758 00759 $out .= " </logitem>\n"; 00760 00761 wfProfileOut( __METHOD__ ); 00762 return $out; 00763 } 00764 00770 function writeTimestamp( $timestamp, $indent = " " ) { 00771 $ts = wfTimestamp( TS_ISO_8601, $timestamp ); 00772 return $indent . Xml::element( 'timestamp', null, $ts ) . "\n"; 00773 } 00774 00781 function writeContributor( $id, $text, $indent = " " ) { 00782 $out = $indent . "<contributor>\n"; 00783 if ( $id || !IP::isValid( $text ) ) { 00784 $out .= $indent . " " . Xml::elementClean( 'username', null, strval( $text ) ) . "\n"; 00785 $out .= $indent . " " . Xml::element( 'id', null, strval( $id ) ) . "\n"; 00786 } else { 00787 $out .= $indent . " " . Xml::elementClean( 'ip', null, strval( $text ) ) . "\n"; 00788 } 00789 $out .= $indent . "</contributor>\n"; 00790 return $out; 00791 } 00792 00799 function writeUploads( $row, $dumpContents = false ) { 00800 if ( $row->page_namespace == NS_FILE ) { 00801 $img = wfLocalFile( $row->page_title ); 00802 if ( $img && $img->exists() ) { 00803 $out = ''; 00804 foreach ( array_reverse( $img->getHistory() ) as $ver ) { 00805 $out .= $this->writeUpload( $ver, $dumpContents ); 00806 } 00807 $out .= $this->writeUpload( $img, $dumpContents ); 00808 return $out; 00809 } 00810 } 00811 return ''; 00812 } 00813 00819 function writeUpload( $file, $dumpContents = false ) { 00820 if ( $file->isOld() ) { 00821 $archiveName = " " . 00822 Xml::element( 'archivename', null, $file->getArchiveName() ) . "\n"; 00823 } else { 00824 $archiveName = ''; 00825 } 00826 if ( $dumpContents ) { 00827 $be = $file->getRepo()->getBackend(); 00828 # Dump file as base64 00829 # Uses only XML-safe characters, so does not need escaping 00830 # @TODO: too bad this loads the contents into memory (script might swap) 00831 $contents = ' <contents encoding="base64">' . 00832 chunk_split( base64_encode( 00833 $be->getFileContents( array( 'src' => $file->getPath() ) ) ) ) . 00834 " </contents>\n"; 00835 } else { 00836 $contents = ''; 00837 } 00838 if ( $file->isDeleted( File::DELETED_COMMENT ) ) { 00839 $comment = Xml::element( 'comment', array( 'deleted' => 'deleted' ) ); 00840 } else { 00841 $comment = Xml::elementClean( 'comment', null, $file->getDescription() ); 00842 } 00843 return " <upload>\n" . 00844 $this->writeTimestamp( $file->getTimestamp() ) . 00845 $this->writeContributor( $file->getUser( 'id' ), $file->getUser( 'text' ) ) . 00846 " " . $comment . "\n" . 00847 " " . Xml::element( 'filename', null, $file->getName() ) . "\n" . 00848 $archiveName . 00849 " " . Xml::element( 'src', null, $file->getCanonicalURL() ) . "\n" . 00850 " " . Xml::element( 'size', null, $file->getSize() ) . "\n" . 00851 " " . Xml::element( 'sha1base36', null, $file->getSha1() ) . "\n" . 00852 " " . Xml::element( 'rel', null, $file->getRel() ) . "\n" . 00853 $contents . 00854 " </upload>\n"; 00855 } 00856 00867 public static function canonicalTitle( Title $title ) { 00868 if ( $title->getInterwiki() ) { 00869 return $title->getPrefixedText(); 00870 } 00871 00872 global $wgContLang; 00873 $prefix = str_replace( '_', ' ', $wgContLang->getNsText( $title->getNamespace() ) ); 00874 00875 if ( $prefix !== '' ) { 00876 $prefix .= ':'; 00877 } 00878 00879 return $prefix . $title->getText(); 00880 } 00881 } 00882 00887 class DumpOutput { 00888 00892 function writeOpenStream( $string ) { 00893 $this->write( $string ); 00894 } 00895 00899 function writeCloseStream( $string ) { 00900 $this->write( $string ); 00901 } 00902 00907 function writeOpenPage( $page, $string ) { 00908 $this->write( $string ); 00909 } 00910 00914 function writeClosePage( $string ) { 00915 $this->write( $string ); 00916 } 00917 00922 function writeRevision( $rev, $string ) { 00923 $this->write( $string ); 00924 } 00925 00930 function writeLogItem( $rev, $string ) { 00931 $this->write( $string ); 00932 } 00933 00939 function write( $string ) { 00940 print $string; 00941 } 00942 00950 function closeRenameAndReopen( $newname ) { 00951 } 00952 00960 function closeAndRename( $newname, $open = false ) { 00961 } 00962 00968 function getFilenames() { 00969 return null; 00970 } 00971 } 00972 00977 class DumpFileOutput extends DumpOutput { 00978 protected $handle = false, $filename; 00979 00983 function __construct( $file ) { 00984 $this->handle = fopen( $file, "wt" ); 00985 $this->filename = $file; 00986 } 00987 00991 function writeCloseStream( $string ) { 00992 parent::writeCloseStream( $string ); 00993 if ( $this->handle ) { 00994 fclose( $this->handle ); 00995 $this->handle = false; 00996 } 00997 } 00998 01002 function write( $string ) { 01003 fputs( $this->handle, $string ); 01004 } 01005 01009 function closeRenameAndReopen( $newname ) { 01010 $this->closeAndRename( $newname, true ); 01011 } 01012 01017 function renameOrException( $newname ) { 01018 if ( !rename( $this->filename, $newname ) ) { 01019 throw new MWException( __METHOD__ . ": rename of file {$this->filename} to $newname failed\n" ); 01020 } 01021 } 01022 01028 function checkRenameArgCount( $newname ) { 01029 if ( is_array( $newname ) ) { 01030 if ( count( $newname ) > 1 ) { 01031 throw new MWException( __METHOD__ . ": passed multiple arguments for rename of single file\n" ); 01032 } else { 01033 $newname = $newname[0]; 01034 } 01035 } 01036 return $newname; 01037 } 01038 01043 function closeAndRename( $newname, $open = false ) { 01044 $newname = $this->checkRenameArgCount( $newname ); 01045 if ( $newname ) { 01046 if ( $this->handle ) { 01047 fclose( $this->handle ); 01048 $this->handle = false; 01049 } 01050 $this->renameOrException( $newname ); 01051 if ( $open ) { 01052 $this->handle = fopen( $this->filename, "wt" ); 01053 } 01054 } 01055 } 01056 01060 function getFilenames() { 01061 return $this->filename; 01062 } 01063 } 01064 01071 class DumpPipeOutput extends DumpFileOutput { 01072 protected $command, $filename; 01073 protected $procOpenResource = false; 01074 01079 function __construct( $command, $file = null ) { 01080 if ( !is_null( $file ) ) { 01081 $command .= " > " . wfEscapeShellArg( $file ); 01082 } 01083 01084 $this->startCommand( $command ); 01085 $this->command = $command; 01086 $this->filename = $file; 01087 } 01088 01092 function writeCloseStream( $string ) { 01093 parent::writeCloseStream( $string ); 01094 if ( $this->procOpenResource ) { 01095 proc_close( $this->procOpenResource ); 01096 $this->procOpenResource = false; 01097 } 01098 } 01099 01103 function startCommand( $command ) { 01104 $spec = array( 01105 0 => array( "pipe", "r" ), 01106 ); 01107 $pipes = array(); 01108 $this->procOpenResource = proc_open( $command, $spec, $pipes ); 01109 $this->handle = $pipes[0]; 01110 } 01111 01115 function closeRenameAndReopen( $newname ) { 01116 $this->closeAndRename( $newname, true ); 01117 } 01118 01123 function closeAndRename( $newname, $open = false ) { 01124 $newname = $this->checkRenameArgCount( $newname ); 01125 if ( $newname ) { 01126 if ( $this->handle ) { 01127 fclose( $this->handle ); 01128 $this->handle = false; 01129 } 01130 if ( $this->procOpenResource ) { 01131 proc_close( $this->procOpenResource ); 01132 $this->procOpenResource = false; 01133 } 01134 $this->renameOrException( $newname ); 01135 if ( $open ) { 01136 $command = $this->command; 01137 $command .= " > " . wfEscapeShellArg( $this->filename ); 01138 $this->startCommand( $command ); 01139 } 01140 } 01141 } 01142 01143 } 01144 01149 class DumpGZipOutput extends DumpPipeOutput { 01150 01154 function __construct( $file ) { 01155 parent::__construct( "gzip", $file ); 01156 } 01157 } 01158 01163 class DumpBZip2Output extends DumpPipeOutput { 01164 01168 function __construct( $file ) { 01169 parent::__construct( "bzip2", $file ); 01170 } 01171 } 01172 01177 class Dump7ZipOutput extends DumpPipeOutput { 01178 01182 function __construct( $file ) { 01183 $command = $this->setup7zCommand( $file ); 01184 parent::__construct( $command ); 01185 $this->filename = $file; 01186 } 01187 01192 function setup7zCommand( $file ) { 01193 $command = "7za a -bd -si " . wfEscapeShellArg( $file ); 01194 // Suppress annoying useless crap from p7zip 01195 // Unfortunately this could suppress real error messages too 01196 $command .= ' >' . wfGetNull() . ' 2>&1'; 01197 return $command; 01198 } 01199 01204 function closeAndRename( $newname, $open = false ) { 01205 $newname = $this->checkRenameArgCount( $newname ); 01206 if ( $newname ) { 01207 fclose( $this->handle ); 01208 proc_close( $this->procOpenResource ); 01209 $this->renameOrException( $newname ); 01210 if ( $open ) { 01211 $command = $this->setup7zCommand( $this->filename ); 01212 $this->startCommand( $command ); 01213 } 01214 } 01215 } 01216 } 01217 01224 class DumpFilter { 01225 01231 public $sink; 01232 01236 protected $sendingThisPage; 01237 01241 function __construct( &$sink ) { 01242 $this->sink =& $sink; 01243 } 01244 01248 function writeOpenStream( $string ) { 01249 $this->sink->writeOpenStream( $string ); 01250 } 01251 01255 function writeCloseStream( $string ) { 01256 $this->sink->writeCloseStream( $string ); 01257 } 01258 01263 function writeOpenPage( $page, $string ) { 01264 $this->sendingThisPage = $this->pass( $page, $string ); 01265 if ( $this->sendingThisPage ) { 01266 $this->sink->writeOpenPage( $page, $string ); 01267 } 01268 } 01269 01273 function writeClosePage( $string ) { 01274 if ( $this->sendingThisPage ) { 01275 $this->sink->writeClosePage( $string ); 01276 $this->sendingThisPage = false; 01277 } 01278 } 01279 01284 function writeRevision( $rev, $string ) { 01285 if ( $this->sendingThisPage ) { 01286 $this->sink->writeRevision( $rev, $string ); 01287 } 01288 } 01289 01294 function writeLogItem( $rev, $string ) { 01295 $this->sink->writeRevision( $rev, $string ); 01296 } 01297 01301 function closeRenameAndReopen( $newname ) { 01302 $this->sink->closeRenameAndReopen( $newname ); 01303 } 01304 01309 function closeAndRename( $newname, $open = false ) { 01310 $this->sink->closeAndRename( $newname, $open ); 01311 } 01312 01316 function getFilenames() { 01317 return $this->sink->getFilenames(); 01318 } 01319 01325 function pass( $page ) { 01326 return true; 01327 } 01328 } 01329 01334 class DumpNotalkFilter extends DumpFilter { 01335 01340 function pass( $page ) { 01341 return !MWNamespace::isTalk( $page->page_namespace ); 01342 } 01343 } 01344 01349 class DumpNamespaceFilter extends DumpFilter { 01350 var $invert = false; 01351 var $namespaces = array(); 01352 01358 function __construct( &$sink, $param ) { 01359 parent::__construct( $sink ); 01360 01361 $constants = array( 01362 "NS_MAIN" => NS_MAIN, 01363 "NS_TALK" => NS_TALK, 01364 "NS_USER" => NS_USER, 01365 "NS_USER_TALK" => NS_USER_TALK, 01366 "NS_PROJECT" => NS_PROJECT, 01367 "NS_PROJECT_TALK" => NS_PROJECT_TALK, 01368 "NS_FILE" => NS_FILE, 01369 "NS_FILE_TALK" => NS_FILE_TALK, 01370 "NS_IMAGE" => NS_IMAGE, // NS_IMAGE is an alias for NS_FILE 01371 "NS_IMAGE_TALK" => NS_IMAGE_TALK, 01372 "NS_MEDIAWIKI" => NS_MEDIAWIKI, 01373 "NS_MEDIAWIKI_TALK" => NS_MEDIAWIKI_TALK, 01374 "NS_TEMPLATE" => NS_TEMPLATE, 01375 "NS_TEMPLATE_TALK" => NS_TEMPLATE_TALK, 01376 "NS_HELP" => NS_HELP, 01377 "NS_HELP_TALK" => NS_HELP_TALK, 01378 "NS_CATEGORY" => NS_CATEGORY, 01379 "NS_CATEGORY_TALK" => NS_CATEGORY_TALK ); 01380 01381 if ( $param { 0 } == '!' ) { 01382 $this->invert = true; 01383 $param = substr( $param, 1 ); 01384 } 01385 01386 foreach ( explode( ',', $param ) as $key ) { 01387 $key = trim( $key ); 01388 if ( isset( $constants[$key] ) ) { 01389 $ns = $constants[$key]; 01390 $this->namespaces[$ns] = true; 01391 } elseif ( is_numeric( $key ) ) { 01392 $ns = intval( $key ); 01393 $this->namespaces[$ns] = true; 01394 } else { 01395 throw new MWException( "Unrecognized namespace key '$key'\n" ); 01396 } 01397 } 01398 } 01399 01404 function pass( $page ) { 01405 $match = isset( $this->namespaces[$page->page_namespace] ); 01406 return $this->invert xor $match; 01407 } 01408 } 01409 01414 class DumpLatestFilter extends DumpFilter { 01415 var $page, $pageString, $rev, $revString; 01416 01421 function writeOpenPage( $page, $string ) { 01422 $this->page = $page; 01423 $this->pageString = $string; 01424 } 01425 01429 function writeClosePage( $string ) { 01430 if ( $this->rev ) { 01431 $this->sink->writeOpenPage( $this->page, $this->pageString ); 01432 $this->sink->writeRevision( $this->rev, $this->revString ); 01433 $this->sink->writeClosePage( $string ); 01434 } 01435 $this->rev = null; 01436 $this->revString = null; 01437 $this->page = null; 01438 $this->pageString = null; 01439 } 01440 01445 function writeRevision( $rev, $string ) { 01446 if ( $rev->rev_id == $this->page->page_latest ) { 01447 $this->rev = $rev; 01448 $this->revString = $string; 01449 } 01450 } 01451 } 01452 01457 class DumpMultiWriter { 01458 01462 function __construct( $sinks ) { 01463 $this->sinks = $sinks; 01464 $this->count = count( $sinks ); 01465 } 01466 01470 function writeOpenStream( $string ) { 01471 for ( $i = 0; $i < $this->count; $i++ ) { 01472 $this->sinks[$i]->writeOpenStream( $string ); 01473 } 01474 } 01475 01479 function writeCloseStream( $string ) { 01480 for ( $i = 0; $i < $this->count; $i++ ) { 01481 $this->sinks[$i]->writeCloseStream( $string ); 01482 } 01483 } 01484 01489 function writeOpenPage( $page, $string ) { 01490 for ( $i = 0; $i < $this->count; $i++ ) { 01491 $this->sinks[$i]->writeOpenPage( $page, $string ); 01492 } 01493 } 01494 01498 function writeClosePage( $string ) { 01499 for ( $i = 0; $i < $this->count; $i++ ) { 01500 $this->sinks[$i]->writeClosePage( $string ); 01501 } 01502 } 01503 01508 function writeRevision( $rev, $string ) { 01509 for ( $i = 0; $i < $this->count; $i++ ) { 01510 $this->sinks[$i]->writeRevision( $rev, $string ); 01511 } 01512 } 01513 01517 function closeRenameAndReopen( $newnames ) { 01518 $this->closeAndRename( $newnames, true ); 01519 } 01520 01525 function closeAndRename( $newnames, $open = false ) { 01526 for ( $i = 0; $i < $this->count; $i++ ) { 01527 $this->sinks[$i]->closeAndRename( $newnames[$i], $open ); 01528 } 01529 } 01530 01534 function getFilenames() { 01535 $filenames = array(); 01536 for ( $i = 0; $i < $this->count; $i++ ) { 01537 $filenames[] = $this->sinks[$i]->getFilenames(); 01538 } 01539 return $filenames; 01540 } 01541 01542 } 01543 01548 function xmlsafe( $string ) { 01549 wfProfileIn( __FUNCTION__ ); 01550 01556 $string = UtfNormal::cleanUp( $string ); 01557 01558 $string = htmlspecialchars( $string ); 01559 wfProfileOut( __FUNCTION__ ); 01560 return $string; 01561 }