MediaWiki
REL1_21
|
00001 <?php 00033 class WikiExporter { 00034 var $list_authors = false; # Return distinct author list (when not returning full history) 00035 var $author_list = ""; 00036 00037 var $dumpUploads = false; 00038 var $dumpUploadFileContents = false; 00039 00040 const FULL = 1; 00041 const CURRENT = 2; 00042 const STABLE = 4; // extension defined 00043 const LOGS = 8; 00044 const RANGE = 16; 00045 00046 const BUFFER = 0; 00047 const STREAM = 1; 00048 00049 const TEXT = 0; 00050 const STUB = 1; 00051 00052 var $buffer; 00053 00054 var $text; 00055 00059 var $sink; 00060 00065 public static function schemaVersion() { 00066 return "0.8"; 00067 } 00068 00086 function __construct( $db, $history = WikiExporter::CURRENT, 00087 $buffer = WikiExporter::BUFFER, $text = WikiExporter::TEXT ) { 00088 $this->db = $db; 00089 $this->history = $history; 00090 $this->buffer = $buffer; 00091 $this->writer = new XmlDumpWriter(); 00092 $this->sink = new DumpOutput(); 00093 $this->text = $text; 00094 } 00095 00103 public function setOutputSink( &$sink ) { 00104 $this->sink =& $sink; 00105 } 00106 00107 public function openStream() { 00108 $output = $this->writer->openStream(); 00109 $this->sink->writeOpenStream( $output ); 00110 } 00111 00112 public function closeStream() { 00113 $output = $this->writer->closeStream(); 00114 $this->sink->writeCloseStream( $output ); 00115 } 00116 00122 public function allPages() { 00123 $this->dumpFrom( '' ); 00124 } 00125 00133 public function pagesByRange( $start, $end ) { 00134 $condition = 'page_id >= ' . intval( $start ); 00135 if ( $end ) { 00136 $condition .= ' AND page_id < ' . intval( $end ); 00137 } 00138 $this->dumpFrom( $condition ); 00139 } 00140 00148 public function revsByRange( $start, $end ) { 00149 $condition = 'rev_id >= ' . intval( $start ); 00150 if ( $end ) { 00151 $condition .= ' AND rev_id < ' . intval( $end ); 00152 } 00153 $this->dumpFrom( $condition ); 00154 } 00155 00159 public function pageByTitle( $title ) { 00160 $this->dumpFrom( 00161 'page_namespace=' . $title->getNamespace() . 00162 ' AND page_title=' . $this->db->addQuotes( $title->getDBkey() ) ); 00163 } 00164 00169 public function pageByName( $name ) { 00170 $title = Title::newFromText( $name ); 00171 if ( is_null( $title ) ) { 00172 throw new MWException( "Can't export invalid title" ); 00173 } else { 00174 $this->pageByTitle( $title ); 00175 } 00176 } 00177 00181 public function pagesByName( $names ) { 00182 foreach ( $names as $name ) { 00183 $this->pageByName( $name ); 00184 } 00185 } 00186 00187 public function allLogs() { 00188 $this->dumpFrom( '' ); 00189 } 00190 00195 public function logsByRange( $start, $end ) { 00196 $condition = 'log_id >= ' . intval( $start ); 00197 if ( $end ) { 00198 $condition .= ' AND log_id < ' . intval( $end ); 00199 } 00200 $this->dumpFrom( $condition ); 00201 } 00202 00210 protected function do_list_authors( $cond ) { 00211 wfProfileIn( __METHOD__ ); 00212 $this->author_list = "<contributors>"; 00213 // rev_deleted 00214 00215 $res = $this->db->select( 00216 array( 'page', 'revision' ), 00217 array( 'DISTINCT rev_user_text', 'rev_user' ), 00218 array( 00219 $this->db->bitAnd( 'rev_deleted', Revision::DELETED_USER ) . ' = 0', 00220 $cond, 00221 'page_id = rev_id', 00222 ), 00223 __METHOD__ 00224 ); 00225 00226 foreach ( $res as $row ) { 00227 $this->author_list .= "<contributor>" . 00228 "<username>" . 00229 htmlentities( $row->rev_user_text ) . 00230 "</username>" . 00231 "<id>" . 00232 $row->rev_user . 00233 "</id>" . 00234 "</contributor>"; 00235 } 00236 $this->author_list .= "</contributors>"; 00237 wfProfileOut( __METHOD__ ); 00238 } 00239 00245 protected function dumpFrom( $cond = '' ) { 00246 wfProfileIn( __METHOD__ ); 00247 # For logging dumps... 00248 if ( $this->history & self::LOGS ) { 00249 $where = array( 'user_id = log_user' ); 00250 # Hide private logs 00251 $hideLogs = LogEventsList::getExcludeClause( $this->db ); 00252 if ( $hideLogs ) $where[] = $hideLogs; 00253 # Add on any caller specified conditions 00254 if ( $cond ) $where[] = $cond; 00255 # Get logging table name for logging.* clause 00256 $logging = $this->db->tableName( 'logging' ); 00257 00258 if ( $this->buffer == WikiExporter::STREAM ) { 00259 $prev = $this->db->bufferResults( false ); 00260 } 00261 $wrapper = null; // Assuring $wrapper is not undefined, if exception occurs early 00262 try { 00263 $result = $this->db->select( array( 'logging', 'user' ), 00264 array( "{$logging}.*", 'user_name' ), // grab the user name 00265 $where, 00266 __METHOD__, 00267 array( 'ORDER BY' => 'log_id', 'USE INDEX' => array( 'logging' => 'PRIMARY' ) ) 00268 ); 00269 $wrapper = $this->db->resultObject( $result ); 00270 $this->outputLogStream( $wrapper ); 00271 if ( $this->buffer == WikiExporter::STREAM ) { 00272 $this->db->bufferResults( $prev ); 00273 } 00274 } catch ( Exception $e ) { 00275 // Throwing the exception does not reliably free the resultset, and 00276 // would also leave the connection in unbuffered mode. 00277 00278 // Freeing result 00279 try { 00280 if ( $wrapper ) { 00281 $wrapper->free(); 00282 } 00283 } catch ( Exception $e2 ) { 00284 // Already in panic mode -> ignoring $e2 as $e has 00285 // higher priority 00286 } 00287 00288 // Putting database back in previous buffer mode 00289 try { 00290 if ( $this->buffer == WikiExporter::STREAM ) { 00291 $this->db->bufferResults( $prev ); 00292 } 00293 } catch ( Exception $e2 ) { 00294 // Already in panic mode -> ignoring $e2 as $e has 00295 // higher priority 00296 } 00297 00298 // Inform caller about problem 00299 throw $e; 00300 } 00301 # For page dumps... 00302 } else { 00303 $tables = array( 'page', 'revision' ); 00304 $opts = array( 'ORDER BY' => 'page_id ASC' ); 00305 $opts['USE INDEX'] = array(); 00306 $join = array(); 00307 if ( is_array( $this->history ) ) { 00308 # Time offset/limit for all pages/history... 00309 $revJoin = 'page_id=rev_page'; 00310 # Set time order 00311 if ( $this->history['dir'] == 'asc' ) { 00312 $op = '>'; 00313 $opts['ORDER BY'] = 'rev_timestamp ASC'; 00314 } else { 00315 $op = '<'; 00316 $opts['ORDER BY'] = 'rev_timestamp DESC'; 00317 } 00318 # Set offset 00319 if ( !empty( $this->history['offset'] ) ) { 00320 $revJoin .= " AND rev_timestamp $op " . 00321 $this->db->addQuotes( $this->db->timestamp( $this->history['offset'] ) ); 00322 } 00323 $join['revision'] = array( 'INNER JOIN', $revJoin ); 00324 # Set query limit 00325 if ( !empty( $this->history['limit'] ) ) { 00326 $opts['LIMIT'] = intval( $this->history['limit'] ); 00327 } 00328 } elseif ( $this->history & WikiExporter::FULL ) { 00329 # Full history dumps... 00330 $join['revision'] = array( 'INNER JOIN', 'page_id=rev_page' ); 00331 } elseif ( $this->history & WikiExporter::CURRENT ) { 00332 # Latest revision dumps... 00333 if ( $this->list_authors && $cond != '' ) { // List authors, if so desired 00334 $this->do_list_authors( $cond ); 00335 } 00336 $join['revision'] = array( 'INNER JOIN', 'page_id=rev_page AND page_latest=rev_id' ); 00337 } elseif ( $this->history & WikiExporter::STABLE ) { 00338 # "Stable" revision dumps... 00339 # Default JOIN, to be overridden... 00340 $join['revision'] = array( 'INNER JOIN', 'page_id=rev_page AND page_latest=rev_id' ); 00341 # One, and only one hook should set this, and return false 00342 if ( wfRunHooks( 'WikiExporter::dumpStableQuery', array( &$tables, &$opts, &$join ) ) ) { 00343 wfProfileOut( __METHOD__ ); 00344 throw new MWException( __METHOD__ . " given invalid history dump type." ); 00345 } 00346 } elseif ( $this->history & WikiExporter::RANGE ) { 00347 # Dump of revisions within a specified range 00348 $join['revision'] = array( 'INNER JOIN', 'page_id=rev_page' ); 00349 $opts['ORDER BY'] = array( 'rev_page ASC', 'rev_id ASC' ); 00350 } else { 00351 # Unknown history specification parameter? 00352 wfProfileOut( __METHOD__ ); 00353 throw new MWException( __METHOD__ . " given invalid history dump type." ); 00354 } 00355 # Query optimization hacks 00356 if ( $cond == '' ) { 00357 $opts[] = 'STRAIGHT_JOIN'; 00358 $opts['USE INDEX']['page'] = 'PRIMARY'; 00359 } 00360 # Build text join options 00361 if ( $this->text != WikiExporter::STUB ) { // 1-pass 00362 $tables[] = 'text'; 00363 $join['text'] = array( 'INNER JOIN', 'rev_text_id=old_id' ); 00364 } 00365 00366 if ( $this->buffer == WikiExporter::STREAM ) { 00367 $prev = $this->db->bufferResults( false ); 00368 } 00369 00370 $wrapper = null; // Assuring $wrapper is not undefined, if exception occurs early 00371 try { 00372 wfRunHooks( 'ModifyExportQuery', 00373 array( $this->db, &$tables, &$cond, &$opts, &$join ) ); 00374 00375 # Do the query! 00376 $result = $this->db->select( $tables, '*', $cond, __METHOD__, $opts, $join ); 00377 $wrapper = $this->db->resultObject( $result ); 00378 # Output dump results 00379 $this->outputPageStream( $wrapper ); 00380 00381 if ( $this->buffer == WikiExporter::STREAM ) { 00382 $this->db->bufferResults( $prev ); 00383 } 00384 } catch ( Exception $e ) { 00385 // Throwing the exception does not reliably free the resultset, and 00386 // would also leave the connection in unbuffered mode. 00387 00388 // Freeing result 00389 try { 00390 if ( $wrapper ) { 00391 $wrapper->free(); 00392 } 00393 } catch ( Exception $e2 ) { 00394 // Already in panic mode -> ignoring $e2 as $e has 00395 // higher priority 00396 } 00397 00398 // Putting database back in previous buffer mode 00399 try { 00400 if ( $this->buffer == WikiExporter::STREAM ) { 00401 $this->db->bufferResults( $prev ); 00402 } 00403 } catch ( Exception $e2 ) { 00404 // Already in panic mode -> ignoring $e2 as $e has 00405 // higher priority 00406 } 00407 00408 // Inform caller about problem 00409 throw $e; 00410 } 00411 } 00412 wfProfileOut( __METHOD__ ); 00413 } 00414 00427 protected function outputPageStream( $resultset ) { 00428 $last = null; 00429 foreach ( $resultset as $row ) { 00430 if ( $last === null || 00431 $last->page_namespace != $row->page_namespace || 00432 $last->page_title != $row->page_title ) { 00433 if ( $last !== null ) { 00434 $output = ''; 00435 if ( $this->dumpUploads ) { 00436 $output .= $this->writer->writeUploads( $last, $this->dumpUploadFileContents ); 00437 } 00438 $output .= $this->writer->closePage(); 00439 $this->sink->writeClosePage( $output ); 00440 } 00441 $output = $this->writer->openPage( $row ); 00442 $this->sink->writeOpenPage( $row, $output ); 00443 $last = $row; 00444 } 00445 $output = $this->writer->writeRevision( $row ); 00446 $this->sink->writeRevision( $row, $output ); 00447 } 00448 if ( $last !== null ) { 00449 $output = ''; 00450 if ( $this->dumpUploads ) { 00451 $output .= $this->writer->writeUploads( $last, $this->dumpUploadFileContents ); 00452 } 00453 $output .= $this->author_list; 00454 $output .= $this->writer->closePage(); 00455 $this->sink->writeClosePage( $output ); 00456 } 00457 } 00458 00462 protected function outputLogStream( $resultset ) { 00463 foreach ( $resultset as $row ) { 00464 $output = $this->writer->writeLogItem( $row ); 00465 $this->sink->writeLogItem( $row, $output ); 00466 } 00467 } 00468 } 00469 00473 class XmlDumpWriter { 00479 function schemaVersion() { 00480 wfDeprecated( __METHOD__, '1.20' ); 00481 return WikiExporter::schemaVersion(); 00482 } 00483 00494 function openStream() { 00495 global $wgLanguageCode; 00496 $ver = WikiExporter::schemaVersion(); 00497 return Xml::element( 'mediawiki', array( 00498 'xmlns' => "http://www.mediawiki.org/xml/export-$ver/", 00499 'xmlns:xsi' => "http://www.w3.org/2001/XMLSchema-instance", 00500 'xsi:schemaLocation' => "http://www.mediawiki.org/xml/export-$ver/ " . 00501 "http://www.mediawiki.org/xml/export-$ver.xsd", #TODO: how do we get a new version up there? 00502 'version' => $ver, 00503 'xml:lang' => $wgLanguageCode ), 00504 null ) . 00505 "\n" . 00506 $this->siteInfo(); 00507 } 00508 00512 function siteInfo() { 00513 $info = array( 00514 $this->sitename(), 00515 $this->homelink(), 00516 $this->generator(), 00517 $this->caseSetting(), 00518 $this->namespaces() ); 00519 return " <siteinfo>\n " . 00520 implode( "\n ", $info ) . 00521 "\n </siteinfo>\n"; 00522 } 00523 00527 function sitename() { 00528 global $wgSitename; 00529 return Xml::element( 'sitename', array(), $wgSitename ); 00530 } 00531 00535 function generator() { 00536 global $wgVersion; 00537 return Xml::element( 'generator', array(), "MediaWiki $wgVersion" ); 00538 } 00539 00543 function homelink() { 00544 return Xml::element( 'base', array(), Title::newMainPage()->getCanonicalUrl() ); 00545 } 00546 00550 function caseSetting() { 00551 global $wgCapitalLinks; 00552 // "case-insensitive" option is reserved for future 00553 $sensitivity = $wgCapitalLinks ? 'first-letter' : 'case-sensitive'; 00554 return Xml::element( 'case', array(), $sensitivity ); 00555 } 00556 00560 function namespaces() { 00561 global $wgContLang; 00562 $spaces = "<namespaces>\n"; 00563 foreach ( $wgContLang->getFormattedNamespaces() as $ns => $title ) { 00564 $spaces .= ' ' . 00565 Xml::element( 'namespace', 00566 array( 'key' => $ns, 00567 'case' => MWNamespace::isCapitalized( $ns ) ? 'first-letter' : 'case-sensitive', 00568 ), $title ) . "\n"; 00569 } 00570 $spaces .= " </namespaces>"; 00571 return $spaces; 00572 } 00573 00580 function closeStream() { 00581 return "</mediawiki>\n"; 00582 } 00583 00592 function openPage( $row ) { 00593 $out = " <page>\n"; 00594 $title = Title::makeTitle( $row->page_namespace, $row->page_title ); 00595 $out .= ' ' . Xml::elementClean( 'title', array(), self::canonicalTitle( $title ) ) . "\n"; 00596 $out .= ' ' . Xml::element( 'ns', array(), strval( $row->page_namespace) ) . "\n"; 00597 $out .= ' ' . Xml::element( 'id', array(), strval( $row->page_id ) ) . "\n"; 00598 if ( $row->page_is_redirect ) { 00599 $page = WikiPage::factory( $title ); 00600 $redirect = $page->getRedirectTarget(); 00601 if ( $redirect instanceOf Title && $redirect->isValidRedirectTarget() ) { 00602 $out .= ' ' . Xml::element( 'redirect', array( 'title' => self::canonicalTitle( $redirect ) ) ) . "\n"; 00603 } 00604 } 00605 00606 if ( $row->page_restrictions != '' ) { 00607 $out .= ' ' . Xml::element( 'restrictions', array(), 00608 strval( $row->page_restrictions ) ) . "\n"; 00609 } 00610 00611 wfRunHooks( 'XmlDumpWriterOpenPage', array( $this, &$out, $row, $title ) ); 00612 00613 return $out; 00614 } 00615 00622 function closePage() { 00623 return " </page>\n"; 00624 } 00625 00634 function writeRevision( $row ) { 00635 wfProfileIn( __METHOD__ ); 00636 00637 $out = " <revision>\n"; 00638 $out .= " " . Xml::element( 'id', null, strval( $row->rev_id ) ) . "\n"; 00639 if( isset( $row->rev_parent_id ) && $row->rev_parent_id ) { 00640 $out .= " " . Xml::element( 'parentid', null, strval( $row->rev_parent_id ) ) . "\n"; 00641 } 00642 00643 $out .= $this->writeTimestamp( $row->rev_timestamp ); 00644 00645 if ( isset( $row->rev_deleted ) && ( $row->rev_deleted & Revision::DELETED_USER ) ) { 00646 $out .= " " . Xml::element( 'contributor', array( 'deleted' => 'deleted' ) ) . "\n"; 00647 } else { 00648 $out .= $this->writeContributor( $row->rev_user, $row->rev_user_text ); 00649 } 00650 00651 if ( isset( $row->rev_minor_edit ) && $row->rev_minor_edit ) { 00652 $out .= " <minor/>\n"; 00653 } 00654 if ( isset( $row->rev_deleted ) && ( $row->rev_deleted & Revision::DELETED_COMMENT ) ) { 00655 $out .= " " . Xml::element( 'comment', array( 'deleted' => 'deleted' ) ) . "\n"; 00656 } elseif ( $row->rev_comment != '' ) { 00657 $out .= " " . Xml::elementClean( 'comment', array(), strval( $row->rev_comment ) ) . "\n"; 00658 } 00659 00660 $text = ''; 00661 if ( isset( $row->rev_deleted ) && ( $row->rev_deleted & Revision::DELETED_TEXT ) ) { 00662 $out .= " " . Xml::element( 'text', array( 'deleted' => 'deleted' ) ) . "\n"; 00663 } elseif ( isset( $row->old_text ) ) { 00664 // Raw text from the database may have invalid chars 00665 $text = strval( Revision::getRevisionText( $row ) ); 00666 $out .= " " . Xml::elementClean( 'text', 00667 array( 'xml:space' => 'preserve', 'bytes' => intval( $row->rev_len ) ), 00668 strval( $text ) ) . "\n"; 00669 } else { 00670 // Stub output 00671 $out .= " " . Xml::element( 'text', 00672 array( 'id' => $row->rev_text_id, 'bytes' => intval( $row->rev_len ) ), 00673 "" ) . "\n"; 00674 } 00675 00676 if ( isset( $row->rev_sha1 ) && $row->rev_sha1 && !( $row->rev_deleted & Revision::DELETED_TEXT ) ) { 00677 $out .= " " . Xml::element( 'sha1', null, strval( $row->rev_sha1 ) ) . "\n"; 00678 } else { 00679 $out .= " <sha1/>\n"; 00680 } 00681 00682 if ( isset( $row->rev_content_model ) && !is_null( $row->rev_content_model ) ) { 00683 $content_model = strval( $row->rev_content_model ); 00684 } else { 00685 // probably using $wgContentHandlerUseDB = false; 00686 // @todo: test! 00687 $title = Title::makeTitle( $row->page_namespace, $row->page_title ); 00688 $content_model = ContentHandler::getDefaultModelFor( $title ); 00689 } 00690 00691 $out .= " " . Xml::element( 'model', null, strval( $content_model ) ) . "\n"; 00692 00693 if ( isset( $row->rev_content_format ) && !is_null( $row->rev_content_format ) ) { 00694 $content_format = strval( $row->rev_content_format ); 00695 } else { 00696 // probably using $wgContentHandlerUseDB = false; 00697 // @todo: test! 00698 $content_handler = ContentHandler::getForModelID( $content_model ); 00699 $content_format = $content_handler->getDefaultFormat(); 00700 } 00701 00702 $out .= " " . Xml::element( 'format', null, strval( $content_format ) ) . "\n"; 00703 00704 wfRunHooks( 'XmlDumpWriterWriteRevision', array( &$this, &$out, $row, $text ) ); 00705 00706 $out .= " </revision>\n"; 00707 00708 wfProfileOut( __METHOD__ ); 00709 return $out; 00710 } 00711 00720 function writeLogItem( $row ) { 00721 wfProfileIn( __METHOD__ ); 00722 00723 $out = " <logitem>\n"; 00724 $out .= " " . Xml::element( 'id', null, strval( $row->log_id ) ) . "\n"; 00725 00726 $out .= $this->writeTimestamp( $row->log_timestamp, " " ); 00727 00728 if ( $row->log_deleted & LogPage::DELETED_USER ) { 00729 $out .= " " . Xml::element( 'contributor', array( 'deleted' => 'deleted' ) ) . "\n"; 00730 } else { 00731 $out .= $this->writeContributor( $row->log_user, $row->user_name, " " ); 00732 } 00733 00734 if ( $row->log_deleted & LogPage::DELETED_COMMENT ) { 00735 $out .= " " . Xml::element( 'comment', array( 'deleted' => 'deleted' ) ) . "\n"; 00736 } elseif ( $row->log_comment != '' ) { 00737 $out .= " " . Xml::elementClean( 'comment', null, strval( $row->log_comment ) ) . "\n"; 00738 } 00739 00740 $out .= " " . Xml::element( 'type', null, strval( $row->log_type ) ) . "\n"; 00741 $out .= " " . Xml::element( 'action', null, strval( $row->log_action ) ) . "\n"; 00742 00743 if ( $row->log_deleted & LogPage::DELETED_ACTION ) { 00744 $out .= " " . Xml::element( 'text', array( 'deleted' => 'deleted' ) ) . "\n"; 00745 } else { 00746 $title = Title::makeTitle( $row->log_namespace, $row->log_title ); 00747 $out .= " " . Xml::elementClean( 'logtitle', null, self::canonicalTitle( $title ) ) . "\n"; 00748 $out .= " " . Xml::elementClean( 'params', 00749 array( 'xml:space' => 'preserve' ), 00750 strval( $row->log_params ) ) . "\n"; 00751 } 00752 00753 $out .= " </logitem>\n"; 00754 00755 wfProfileOut( __METHOD__ ); 00756 return $out; 00757 } 00758 00764 function writeTimestamp( $timestamp, $indent = " " ) { 00765 $ts = wfTimestamp( TS_ISO_8601, $timestamp ); 00766 return $indent . Xml::element( 'timestamp', null, $ts ) . "\n"; 00767 } 00768 00775 function writeContributor( $id, $text, $indent = " " ) { 00776 $out = $indent . "<contributor>\n"; 00777 if ( $id || !IP::isValid( $text ) ) { 00778 $out .= $indent . " " . Xml::elementClean( 'username', null, strval( $text ) ) . "\n"; 00779 $out .= $indent . " " . Xml::element( 'id', null, strval( $id ) ) . "\n"; 00780 } else { 00781 $out .= $indent . " " . Xml::elementClean( 'ip', null, strval( $text ) ) . "\n"; 00782 } 00783 $out .= $indent . "</contributor>\n"; 00784 return $out; 00785 } 00786 00793 function writeUploads( $row, $dumpContents = false ) { 00794 if ( $row->page_namespace == NS_FILE ) { 00795 $img = wfLocalFile( $row->page_title ); 00796 if ( $img && $img->exists() ) { 00797 $out = ''; 00798 foreach ( array_reverse( $img->getHistory() ) as $ver ) { 00799 $out .= $this->writeUpload( $ver, $dumpContents ); 00800 } 00801 $out .= $this->writeUpload( $img, $dumpContents ); 00802 return $out; 00803 } 00804 } 00805 return ''; 00806 } 00807 00813 function writeUpload( $file, $dumpContents = false ) { 00814 if ( $file->isOld() ) { 00815 $archiveName = " " . 00816 Xml::element( 'archivename', null, $file->getArchiveName() ) . "\n"; 00817 } else { 00818 $archiveName = ''; 00819 } 00820 if ( $dumpContents ) { 00821 # Dump file as base64 00822 # Uses only XML-safe characters, so does not need escaping 00823 $contents = ' <contents encoding="base64">' . 00824 chunk_split( base64_encode( file_get_contents( $file->getPath() ) ) ) . 00825 " </contents>\n"; 00826 } else { 00827 $contents = ''; 00828 } 00829 if ( $file->isDeleted( File::DELETED_COMMENT ) ) { 00830 $comment = Xml::element( 'comment', array( 'deleted' => 'deleted' ) ); 00831 } else { 00832 $comment = Xml::elementClean( 'comment', null, $file->getDescription() ); 00833 } 00834 return " <upload>\n" . 00835 $this->writeTimestamp( $file->getTimestamp() ) . 00836 $this->writeContributor( $file->getUser( 'id' ), $file->getUser( 'text' ) ) . 00837 " " . $comment . "\n" . 00838 " " . Xml::element( 'filename', null, $file->getName() ) . "\n" . 00839 $archiveName . 00840 " " . Xml::element( 'src', null, $file->getCanonicalUrl() ) . "\n" . 00841 " " . Xml::element( 'size', null, $file->getSize() ) . "\n" . 00842 " " . Xml::element( 'sha1base36', null, $file->getSha1() ) . "\n" . 00843 " " . Xml::element( 'rel', null, $file->getRel() ) . "\n" . 00844 $contents . 00845 " </upload>\n"; 00846 } 00847 00858 public static function canonicalTitle( Title $title ) { 00859 if ( $title->getInterwiki() ) { 00860 return $title->getPrefixedText(); 00861 } 00862 00863 global $wgContLang; 00864 $prefix = str_replace( '_', ' ', $wgContLang->getNsText( $title->getNamespace() ) ); 00865 00866 if ( $prefix !== '' ) { 00867 $prefix .= ':'; 00868 } 00869 00870 return $prefix . $title->getText(); 00871 } 00872 } 00873 00878 class DumpOutput { 00879 00883 function writeOpenStream( $string ) { 00884 $this->write( $string ); 00885 } 00886 00890 function writeCloseStream( $string ) { 00891 $this->write( $string ); 00892 } 00893 00898 function writeOpenPage( $page, $string ) { 00899 $this->write( $string ); 00900 } 00901 00905 function writeClosePage( $string ) { 00906 $this->write( $string ); 00907 } 00908 00913 function writeRevision( $rev, $string ) { 00914 $this->write( $string ); 00915 } 00916 00921 function writeLogItem( $rev, $string ) { 00922 $this->write( $string ); 00923 } 00924 00930 function write( $string ) { 00931 print $string; 00932 } 00933 00941 function closeRenameAndReopen( $newname ) { 00942 } 00943 00951 function closeAndRename( $newname, $open = false ) { 00952 } 00953 00959 function getFilenames() { 00960 return null; 00961 } 00962 } 00963 00968 class DumpFileOutput extends DumpOutput { 00969 protected $handle = false, $filename; 00970 00974 function __construct( $file ) { 00975 $this->handle = fopen( $file, "wt" ); 00976 $this->filename = $file; 00977 } 00978 00982 function writeCloseStream( $string ) { 00983 parent::writeCloseStream( $string ); 00984 if ( $this->handle ) { 00985 fclose( $this->handle ); 00986 $this->handle = false; 00987 } 00988 } 00989 00993 function write( $string ) { 00994 fputs( $this->handle, $string ); 00995 } 00996 01000 function closeRenameAndReopen( $newname ) { 01001 $this->closeAndRename( $newname, true ); 01002 } 01003 01008 function renameOrException( $newname ) { 01009 if ( !rename( $this->filename, $newname ) ) { 01010 throw new MWException( __METHOD__ . ": rename of file {$this->filename} to $newname failed\n" ); 01011 } 01012 } 01013 01019 function checkRenameArgCount( $newname ) { 01020 if ( is_array( $newname ) ) { 01021 if ( count( $newname ) > 1 ) { 01022 throw new MWException( __METHOD__ . ": passed multiple arguments for rename of single file\n" ); 01023 } else { 01024 $newname = $newname[0]; 01025 } 01026 } 01027 return $newname; 01028 } 01029 01034 function closeAndRename( $newname, $open = false ) { 01035 $newname = $this->checkRenameArgCount( $newname ); 01036 if ( $newname ) { 01037 if ( $this->handle ) { 01038 fclose( $this->handle ); 01039 $this->handle = false; 01040 } 01041 $this->renameOrException( $newname ); 01042 if ( $open ) { 01043 $this->handle = fopen( $this->filename, "wt" ); 01044 } 01045 } 01046 } 01047 01051 function getFilenames() { 01052 return $this->filename; 01053 } 01054 } 01055 01062 class DumpPipeOutput extends DumpFileOutput { 01063 protected $command, $filename; 01064 protected $procOpenResource = false; 01065 01070 function __construct( $command, $file = null ) { 01071 if ( !is_null( $file ) ) { 01072 $command .= " > " . wfEscapeShellArg( $file ); 01073 } 01074 01075 $this->startCommand( $command ); 01076 $this->command = $command; 01077 $this->filename = $file; 01078 } 01079 01083 function writeCloseStream( $string ) { 01084 parent::writeCloseStream( $string ); 01085 if ( $this->procOpenResource ) { 01086 proc_close( $this->procOpenResource ); 01087 $this->procOpenResource = false; 01088 } 01089 } 01090 01094 function startCommand( $command ) { 01095 $spec = array( 01096 0 => array( "pipe", "r" ), 01097 ); 01098 $pipes = array(); 01099 $this->procOpenResource = proc_open( $command, $spec, $pipes ); 01100 $this->handle = $pipes[0]; 01101 } 01102 01106 function closeRenameAndReopen( $newname ) { 01107 $this->closeAndRename( $newname, true ); 01108 } 01109 01114 function closeAndRename( $newname, $open = false ) { 01115 $newname = $this->checkRenameArgCount( $newname ); 01116 if ( $newname ) { 01117 if ( $this->handle ) { 01118 fclose( $this->handle ); 01119 $this->handle = false; 01120 } 01121 if ( $this->procOpenResource ) { 01122 proc_close( $this->procOpenResource ); 01123 $this->procOpenResource = false; 01124 } 01125 $this->renameOrException( $newname ); 01126 if ( $open ) { 01127 $command = $this->command; 01128 $command .= " > " . wfEscapeShellArg( $this->filename ); 01129 $this->startCommand( $command ); 01130 } 01131 } 01132 } 01133 01134 } 01135 01140 class DumpGZipOutput extends DumpPipeOutput { 01141 01145 function __construct( $file ) { 01146 parent::__construct( "gzip", $file ); 01147 } 01148 } 01149 01154 class DumpBZip2Output extends DumpPipeOutput { 01155 01159 function __construct( $file ) { 01160 parent::__construct( "bzip2", $file ); 01161 } 01162 } 01163 01168 class Dump7ZipOutput extends DumpPipeOutput { 01169 01173 function __construct( $file ) { 01174 $command = $this->setup7zCommand( $file ); 01175 parent::__construct( $command ); 01176 $this->filename = $file; 01177 } 01178 01183 function setup7zCommand( $file ) { 01184 $command = "7za a -bd -si " . wfEscapeShellArg( $file ); 01185 // Suppress annoying useless crap from p7zip 01186 // Unfortunately this could suppress real error messages too 01187 $command .= ' >' . wfGetNull() . ' 2>&1'; 01188 return( $command ); 01189 } 01190 01195 function closeAndRename( $newname, $open = false ) { 01196 $newname = $this->checkRenameArgCount( $newname ); 01197 if ( $newname ) { 01198 fclose( $this->handle ); 01199 proc_close( $this->procOpenResource ); 01200 $this->renameOrException( $newname ); 01201 if ( $open ) { 01202 $command = $this->setup7zCommand( $this->filename ); 01203 $this->startCommand( $command ); 01204 } 01205 } 01206 } 01207 } 01208 01215 class DumpFilter { 01216 01222 public $sink; 01223 01227 protected $sendingThisPage; 01228 01232 function __construct( &$sink ) { 01233 $this->sink =& $sink; 01234 } 01235 01239 function writeOpenStream( $string ) { 01240 $this->sink->writeOpenStream( $string ); 01241 } 01242 01246 function writeCloseStream( $string ) { 01247 $this->sink->writeCloseStream( $string ); 01248 } 01249 01254 function writeOpenPage( $page, $string ) { 01255 $this->sendingThisPage = $this->pass( $page, $string ); 01256 if ( $this->sendingThisPage ) { 01257 $this->sink->writeOpenPage( $page, $string ); 01258 } 01259 } 01260 01264 function writeClosePage( $string ) { 01265 if ( $this->sendingThisPage ) { 01266 $this->sink->writeClosePage( $string ); 01267 $this->sendingThisPage = false; 01268 } 01269 } 01270 01275 function writeRevision( $rev, $string ) { 01276 if ( $this->sendingThisPage ) { 01277 $this->sink->writeRevision( $rev, $string ); 01278 } 01279 } 01280 01285 function writeLogItem( $rev, $string ) { 01286 $this->sink->writeRevision( $rev, $string ); 01287 } 01288 01292 function closeRenameAndReopen( $newname ) { 01293 $this->sink->closeRenameAndReopen( $newname ); 01294 } 01295 01300 function closeAndRename( $newname, $open = false ) { 01301 $this->sink->closeAndRename( $newname, $open ); 01302 } 01303 01307 function getFilenames() { 01308 return $this->sink->getFilenames(); 01309 } 01310 01316 function pass( $page ) { 01317 return true; 01318 } 01319 } 01320 01325 class DumpNotalkFilter extends DumpFilter { 01326 01331 function pass( $page ) { 01332 return !MWNamespace::isTalk( $page->page_namespace ); 01333 } 01334 } 01335 01340 class DumpNamespaceFilter extends DumpFilter { 01341 var $invert = false; 01342 var $namespaces = array(); 01343 01349 function __construct( &$sink, $param ) { 01350 parent::__construct( $sink ); 01351 01352 $constants = array( 01353 "NS_MAIN" => NS_MAIN, 01354 "NS_TALK" => NS_TALK, 01355 "NS_USER" => NS_USER, 01356 "NS_USER_TALK" => NS_USER_TALK, 01357 "NS_PROJECT" => NS_PROJECT, 01358 "NS_PROJECT_TALK" => NS_PROJECT_TALK, 01359 "NS_FILE" => NS_FILE, 01360 "NS_FILE_TALK" => NS_FILE_TALK, 01361 "NS_IMAGE" => NS_IMAGE, // NS_IMAGE is an alias for NS_FILE 01362 "NS_IMAGE_TALK" => NS_IMAGE_TALK, 01363 "NS_MEDIAWIKI" => NS_MEDIAWIKI, 01364 "NS_MEDIAWIKI_TALK" => NS_MEDIAWIKI_TALK, 01365 "NS_TEMPLATE" => NS_TEMPLATE, 01366 "NS_TEMPLATE_TALK" => NS_TEMPLATE_TALK, 01367 "NS_HELP" => NS_HELP, 01368 "NS_HELP_TALK" => NS_HELP_TALK, 01369 "NS_CATEGORY" => NS_CATEGORY, 01370 "NS_CATEGORY_TALK" => NS_CATEGORY_TALK ); 01371 01372 if ( $param { 0 } == '!' ) { 01373 $this->invert = true; 01374 $param = substr( $param, 1 ); 01375 } 01376 01377 foreach ( explode( ',', $param ) as $key ) { 01378 $key = trim( $key ); 01379 if ( isset( $constants[$key] ) ) { 01380 $ns = $constants[$key]; 01381 $this->namespaces[$ns] = true; 01382 } elseif ( is_numeric( $key ) ) { 01383 $ns = intval( $key ); 01384 $this->namespaces[$ns] = true; 01385 } else { 01386 throw new MWException( "Unrecognized namespace key '$key'\n" ); 01387 } 01388 } 01389 } 01390 01395 function pass( $page ) { 01396 $match = isset( $this->namespaces[$page->page_namespace] ); 01397 return $this->invert xor $match; 01398 } 01399 } 01400 01405 class DumpLatestFilter extends DumpFilter { 01406 var $page, $pageString, $rev, $revString; 01407 01412 function writeOpenPage( $page, $string ) { 01413 $this->page = $page; 01414 $this->pageString = $string; 01415 } 01416 01420 function writeClosePage( $string ) { 01421 if ( $this->rev ) { 01422 $this->sink->writeOpenPage( $this->page, $this->pageString ); 01423 $this->sink->writeRevision( $this->rev, $this->revString ); 01424 $this->sink->writeClosePage( $string ); 01425 } 01426 $this->rev = null; 01427 $this->revString = null; 01428 $this->page = null; 01429 $this->pageString = null; 01430 } 01431 01436 function writeRevision( $rev, $string ) { 01437 if ( $rev->rev_id == $this->page->page_latest ) { 01438 $this->rev = $rev; 01439 $this->revString = $string; 01440 } 01441 } 01442 } 01443 01448 class DumpMultiWriter { 01449 01453 function __construct( $sinks ) { 01454 $this->sinks = $sinks; 01455 $this->count = count( $sinks ); 01456 } 01457 01461 function writeOpenStream( $string ) { 01462 for ( $i = 0; $i < $this->count; $i++ ) { 01463 $this->sinks[$i]->writeOpenStream( $string ); 01464 } 01465 } 01466 01470 function writeCloseStream( $string ) { 01471 for ( $i = 0; $i < $this->count; $i++ ) { 01472 $this->sinks[$i]->writeCloseStream( $string ); 01473 } 01474 } 01475 01480 function writeOpenPage( $page, $string ) { 01481 for ( $i = 0; $i < $this->count; $i++ ) { 01482 $this->sinks[$i]->writeOpenPage( $page, $string ); 01483 } 01484 } 01485 01489 function writeClosePage( $string ) { 01490 for ( $i = 0; $i < $this->count; $i++ ) { 01491 $this->sinks[$i]->writeClosePage( $string ); 01492 } 01493 } 01494 01499 function writeRevision( $rev, $string ) { 01500 for ( $i = 0; $i < $this->count; $i++ ) { 01501 $this->sinks[$i]->writeRevision( $rev, $string ); 01502 } 01503 } 01504 01508 function closeRenameAndReopen( $newnames ) { 01509 $this->closeAndRename( $newnames, true ); 01510 } 01511 01516 function closeAndRename( $newnames, $open = false ) { 01517 for ( $i = 0; $i < $this->count; $i++ ) { 01518 $this->sinks[$i]->closeAndRename( $newnames[$i], $open ); 01519 } 01520 } 01521 01525 function getFilenames() { 01526 $filenames = array(); 01527 for ( $i = 0; $i < $this->count; $i++ ) { 01528 $filenames[] = $this->sinks[$i]->getFilenames(); 01529 } 01530 return $filenames; 01531 } 01532 01533 } 01534 01539 function xmlsafe( $string ) { 01540 wfProfileIn( __FUNCTION__ ); 01541 01547 $string = UtfNormal::cleanUp( $string ); 01548 01549 $string = htmlspecialchars( $string ); 01550 wfProfileOut( __FUNCTION__ ); 01551 return $string; 01552 }