MediaWiki
REL1_23
|
00001 <?php 00033 class WikiExporter { 00034 var $list_authors = false; # Return distinct author list (when not returning full history) 00035 var $author_list = ""; 00036 00037 var $dumpUploads = false; 00038 var $dumpUploadFileContents = false; 00039 00040 const FULL = 1; 00041 const CURRENT = 2; 00042 const STABLE = 4; // extension defined 00043 const LOGS = 8; 00044 const RANGE = 16; 00045 00046 const BUFFER = 0; 00047 const STREAM = 1; 00048 00049 const TEXT = 0; 00050 const STUB = 1; 00051 00052 var $buffer; 00053 00054 var $text; 00055 00059 var $sink; 00060 00065 public static function schemaVersion() { 00066 return "0.8"; 00067 } 00068 00085 function __construct( $db, $history = WikiExporter::CURRENT, 00086 $buffer = WikiExporter::BUFFER, $text = WikiExporter::TEXT ) { 00087 $this->db = $db; 00088 $this->history = $history; 00089 $this->buffer = $buffer; 00090 $this->writer = new XmlDumpWriter(); 00091 $this->sink = new DumpOutput(); 00092 $this->text = $text; 00093 } 00094 00102 public function setOutputSink( &$sink ) { 00103 $this->sink =& $sink; 00104 } 00105 00106 public function openStream() { 00107 $output = $this->writer->openStream(); 00108 $this->sink->writeOpenStream( $output ); 00109 } 00110 00111 public function closeStream() { 00112 $output = $this->writer->closeStream(); 00113 $this->sink->writeCloseStream( $output ); 00114 } 00115 00121 public function allPages() { 00122 $this->dumpFrom( '' ); 00123 } 00124 00132 public function pagesByRange( $start, $end ) { 00133 $condition = 'page_id >= ' . intval( $start ); 00134 if ( $end ) { 00135 $condition .= ' AND page_id < ' . intval( $end ); 00136 } 00137 $this->dumpFrom( $condition ); 00138 } 00139 00147 public function revsByRange( $start, $end ) { 00148 $condition = 'rev_id >= ' . intval( $start ); 00149 if ( $end ) { 00150 $condition .= ' AND rev_id < ' . intval( $end ); 00151 } 00152 $this->dumpFrom( $condition ); 00153 } 00154 00158 public function pageByTitle( $title ) { 00159 $this->dumpFrom( 00160 'page_namespace=' . $title->getNamespace() . 00161 ' AND page_title=' . $this->db->addQuotes( $title->getDBkey() ) ); 00162 } 00163 00168 public function pageByName( $name ) { 00169 $title = Title::newFromText( $name ); 00170 if ( is_null( $title ) ) { 00171 throw new MWException( "Can't export invalid title" ); 00172 } else { 00173 $this->pageByTitle( $title ); 00174 } 00175 } 00176 00180 public function pagesByName( $names ) { 00181 foreach ( $names as $name ) { 00182 $this->pageByName( $name ); 00183 } 00184 } 00185 00186 public function allLogs() { 00187 $this->dumpFrom( '' ); 00188 } 00189 00194 public function logsByRange( $start, $end ) { 00195 $condition = 'log_id >= ' . intval( $start ); 00196 if ( $end ) { 00197 $condition .= ' AND log_id < ' . intval( $end ); 00198 } 00199 $this->dumpFrom( $condition ); 00200 } 00201 00209 protected function do_list_authors( $cond ) { 00210 wfProfileIn( __METHOD__ ); 00211 $this->author_list = "<contributors>"; 00212 // rev_deleted 00213 00214 $res = $this->db->select( 00215 array( 'page', 'revision' ), 00216 array( 'DISTINCT rev_user_text', 'rev_user' ), 00217 array( 00218 $this->db->bitAnd( 'rev_deleted', Revision::DELETED_USER ) . ' = 0', 00219 $cond, 00220 'page_id = rev_id', 00221 ), 00222 __METHOD__ 00223 ); 00224 00225 foreach ( $res as $row ) { 00226 $this->author_list .= "<contributor>" . 00227 "<username>" . 00228 htmlentities( $row->rev_user_text ) . 00229 "</username>" . 00230 "<id>" . 00231 $row->rev_user . 00232 "</id>" . 00233 "</contributor>"; 00234 } 00235 $this->author_list .= "</contributors>"; 00236 wfProfileOut( __METHOD__ ); 00237 } 00238 00244 protected function dumpFrom( $cond = '' ) { 00245 wfProfileIn( __METHOD__ ); 00246 # For logging dumps... 00247 if ( $this->history & self::LOGS ) { 00248 $where = array( 'user_id = log_user' ); 00249 # Hide private logs 00250 $hideLogs = LogEventsList::getExcludeClause( $this->db ); 00251 if ( $hideLogs ) { 00252 $where[] = $hideLogs; 00253 } 00254 # Add on any caller specified conditions 00255 if ( $cond ) { 00256 $where[] = $cond; 00257 } 00258 # Get logging table name for logging.* clause 00259 $logging = $this->db->tableName( 'logging' ); 00260 00261 if ( $this->buffer == WikiExporter::STREAM ) { 00262 $prev = $this->db->bufferResults( false ); 00263 } 00264 $wrapper = null; // Assuring $wrapper is not undefined, if exception occurs early 00265 try { 00266 $result = $this->db->select( array( 'logging', 'user' ), 00267 array( "{$logging}.*", 'user_name' ), // grab the user name 00268 $where, 00269 __METHOD__, 00270 array( 'ORDER BY' => 'log_id', 'USE INDEX' => array( 'logging' => 'PRIMARY' ) ) 00271 ); 00272 $wrapper = $this->db->resultObject( $result ); 00273 $this->outputLogStream( $wrapper ); 00274 if ( $this->buffer == WikiExporter::STREAM ) { 00275 $this->db->bufferResults( $prev ); 00276 } 00277 } catch ( Exception $e ) { 00278 // Throwing the exception does not reliably free the resultset, and 00279 // would also leave the connection in unbuffered mode. 00280 00281 // Freeing result 00282 try { 00283 if ( $wrapper ) { 00284 $wrapper->free(); 00285 } 00286 } catch ( Exception $e2 ) { 00287 // Already in panic mode -> ignoring $e2 as $e has 00288 // higher priority 00289 } 00290 00291 // Putting database back in previous buffer mode 00292 try { 00293 if ( $this->buffer == WikiExporter::STREAM ) { 00294 $this->db->bufferResults( $prev ); 00295 } 00296 } catch ( Exception $e2 ) { 00297 // Already in panic mode -> ignoring $e2 as $e has 00298 // higher priority 00299 } 00300 00301 // Inform caller about problem 00302 wfProfileOut( __METHOD__ ); 00303 throw $e; 00304 } 00305 # For page dumps... 00306 } else { 00307 $tables = array( 'page', 'revision' ); 00308 $opts = array( 'ORDER BY' => 'page_id ASC' ); 00309 $opts['USE INDEX'] = array(); 00310 $join = array(); 00311 if ( is_array( $this->history ) ) { 00312 # Time offset/limit for all pages/history... 00313 $revJoin = 'page_id=rev_page'; 00314 # Set time order 00315 if ( $this->history['dir'] == 'asc' ) { 00316 $op = '>'; 00317 $opts['ORDER BY'] = 'rev_timestamp ASC'; 00318 } else { 00319 $op = '<'; 00320 $opts['ORDER BY'] = 'rev_timestamp DESC'; 00321 } 00322 # Set offset 00323 if ( !empty( $this->history['offset'] ) ) { 00324 $revJoin .= " AND rev_timestamp $op " . 00325 $this->db->addQuotes( $this->db->timestamp( $this->history['offset'] ) ); 00326 } 00327 $join['revision'] = array( 'INNER JOIN', $revJoin ); 00328 # Set query limit 00329 if ( !empty( $this->history['limit'] ) ) { 00330 $opts['LIMIT'] = intval( $this->history['limit'] ); 00331 } 00332 } elseif ( $this->history & WikiExporter::FULL ) { 00333 # Full history dumps... 00334 $join['revision'] = array( 'INNER JOIN', 'page_id=rev_page' ); 00335 } elseif ( $this->history & WikiExporter::CURRENT ) { 00336 # Latest revision dumps... 00337 if ( $this->list_authors && $cond != '' ) { // List authors, if so desired 00338 $this->do_list_authors( $cond ); 00339 } 00340 $join['revision'] = array( 'INNER JOIN', 'page_id=rev_page AND page_latest=rev_id' ); 00341 } elseif ( $this->history & WikiExporter::STABLE ) { 00342 # "Stable" revision dumps... 00343 # Default JOIN, to be overridden... 00344 $join['revision'] = array( 'INNER JOIN', 'page_id=rev_page AND page_latest=rev_id' ); 00345 # One, and only one hook should set this, and return false 00346 if ( wfRunHooks( 'WikiExporter::dumpStableQuery', array( &$tables, &$opts, &$join ) ) ) { 00347 wfProfileOut( __METHOD__ ); 00348 throw new MWException( __METHOD__ . " given invalid history dump type." ); 00349 } 00350 } elseif ( $this->history & WikiExporter::RANGE ) { 00351 # Dump of revisions within a specified range 00352 $join['revision'] = array( 'INNER JOIN', 'page_id=rev_page' ); 00353 $opts['ORDER BY'] = array( 'rev_page ASC', 'rev_id ASC' ); 00354 } else { 00355 # Unknown history specification parameter? 00356 wfProfileOut( __METHOD__ ); 00357 throw new MWException( __METHOD__ . " given invalid history dump type." ); 00358 } 00359 # Query optimization hacks 00360 if ( $cond == '' ) { 00361 $opts[] = 'STRAIGHT_JOIN'; 00362 $opts['USE INDEX']['page'] = 'PRIMARY'; 00363 } 00364 # Build text join options 00365 if ( $this->text != WikiExporter::STUB ) { // 1-pass 00366 $tables[] = 'text'; 00367 $join['text'] = array( 'INNER JOIN', 'rev_text_id=old_id' ); 00368 } 00369 00370 if ( $this->buffer == WikiExporter::STREAM ) { 00371 $prev = $this->db->bufferResults( false ); 00372 } 00373 00374 $wrapper = null; // Assuring $wrapper is not undefined, if exception occurs early 00375 try { 00376 wfRunHooks( 'ModifyExportQuery', 00377 array( $this->db, &$tables, &$cond, &$opts, &$join ) ); 00378 00379 # Do the query! 00380 $result = $this->db->select( $tables, '*', $cond, __METHOD__, $opts, $join ); 00381 $wrapper = $this->db->resultObject( $result ); 00382 # Output dump results 00383 $this->outputPageStream( $wrapper ); 00384 00385 if ( $this->buffer == WikiExporter::STREAM ) { 00386 $this->db->bufferResults( $prev ); 00387 } 00388 } catch ( Exception $e ) { 00389 // Throwing the exception does not reliably free the resultset, and 00390 // would also leave the connection in unbuffered mode. 00391 00392 // Freeing result 00393 try { 00394 if ( $wrapper ) { 00395 $wrapper->free(); 00396 } 00397 } catch ( Exception $e2 ) { 00398 // Already in panic mode -> ignoring $e2 as $e has 00399 // higher priority 00400 } 00401 00402 // Putting database back in previous buffer mode 00403 try { 00404 if ( $this->buffer == WikiExporter::STREAM ) { 00405 $this->db->bufferResults( $prev ); 00406 } 00407 } catch ( Exception $e2 ) { 00408 // Already in panic mode -> ignoring $e2 as $e has 00409 // higher priority 00410 } 00411 00412 // Inform caller about problem 00413 throw $e; 00414 } 00415 } 00416 wfProfileOut( __METHOD__ ); 00417 } 00418 00431 protected function outputPageStream( $resultset ) { 00432 $last = null; 00433 foreach ( $resultset as $row ) { 00434 if ( $last === null || 00435 $last->page_namespace != $row->page_namespace || 00436 $last->page_title != $row->page_title ) { 00437 if ( $last !== null ) { 00438 $output = ''; 00439 if ( $this->dumpUploads ) { 00440 $output .= $this->writer->writeUploads( $last, $this->dumpUploadFileContents ); 00441 } 00442 $output .= $this->writer->closePage(); 00443 $this->sink->writeClosePage( $output ); 00444 } 00445 $output = $this->writer->openPage( $row ); 00446 $this->sink->writeOpenPage( $row, $output ); 00447 $last = $row; 00448 } 00449 $output = $this->writer->writeRevision( $row ); 00450 $this->sink->writeRevision( $row, $output ); 00451 } 00452 if ( $last !== null ) { 00453 $output = ''; 00454 if ( $this->dumpUploads ) { 00455 $output .= $this->writer->writeUploads( $last, $this->dumpUploadFileContents ); 00456 } 00457 $output .= $this->author_list; 00458 $output .= $this->writer->closePage(); 00459 $this->sink->writeClosePage( $output ); 00460 } 00461 } 00462 00466 protected function outputLogStream( $resultset ) { 00467 foreach ( $resultset as $row ) { 00468 $output = $this->writer->writeLogItem( $row ); 00469 $this->sink->writeLogItem( $row, $output ); 00470 } 00471 } 00472 } 00473 00477 class XmlDumpWriter { 00483 function schemaVersion() { 00484 wfDeprecated( __METHOD__, '1.20' ); 00485 return WikiExporter::schemaVersion(); 00486 } 00487 00498 function openStream() { 00499 global $wgLanguageCode; 00500 $ver = WikiExporter::schemaVersion(); 00501 return Xml::element( 'mediawiki', array( 00502 'xmlns' => "http://www.mediawiki.org/xml/export-$ver/", 00503 'xmlns:xsi' => "http://www.w3.org/2001/XMLSchema-instance", 00504 'xsi:schemaLocation' => "http://www.mediawiki.org/xml/export-$ver/ " . 00505 #TODO: how do we get a new version up there? 00506 "http://www.mediawiki.org/xml/export-$ver.xsd", 00507 'version' => $ver, 00508 'xml:lang' => $wgLanguageCode ), 00509 null ) . 00510 "\n" . 00511 $this->siteInfo(); 00512 } 00513 00517 function siteInfo() { 00518 $info = array( 00519 $this->sitename(), 00520 $this->homelink(), 00521 $this->generator(), 00522 $this->caseSetting(), 00523 $this->namespaces() ); 00524 return " <siteinfo>\n " . 00525 implode( "\n ", $info ) . 00526 "\n </siteinfo>\n"; 00527 } 00528 00532 function sitename() { 00533 global $wgSitename; 00534 return Xml::element( 'sitename', array(), $wgSitename ); 00535 } 00536 00540 function generator() { 00541 global $wgVersion; 00542 return Xml::element( 'generator', array(), "MediaWiki $wgVersion" ); 00543 } 00544 00548 function homelink() { 00549 return Xml::element( 'base', array(), Title::newMainPage()->getCanonicalURL() ); 00550 } 00551 00555 function caseSetting() { 00556 global $wgCapitalLinks; 00557 // "case-insensitive" option is reserved for future 00558 $sensitivity = $wgCapitalLinks ? 'first-letter' : 'case-sensitive'; 00559 return Xml::element( 'case', array(), $sensitivity ); 00560 } 00561 00565 function namespaces() { 00566 global $wgContLang; 00567 $spaces = "<namespaces>\n"; 00568 foreach ( $wgContLang->getFormattedNamespaces() as $ns => $title ) { 00569 $spaces .= ' ' . 00570 Xml::element( 'namespace', 00571 array( 00572 'key' => $ns, 00573 'case' => MWNamespace::isCapitalized( $ns ) ? 'first-letter' : 'case-sensitive', 00574 ), $title ) . "\n"; 00575 } 00576 $spaces .= " </namespaces>"; 00577 return $spaces; 00578 } 00579 00586 function closeStream() { 00587 return "</mediawiki>\n"; 00588 } 00589 00597 public function openPage( $row ) { 00598 $out = " <page>\n"; 00599 $title = Title::makeTitle( $row->page_namespace, $row->page_title ); 00600 $out .= ' ' . Xml::elementClean( 'title', array(), self::canonicalTitle( $title ) ) . "\n"; 00601 $out .= ' ' . Xml::element( 'ns', array(), strval( $row->page_namespace ) ) . "\n"; 00602 $out .= ' ' . Xml::element( 'id', array(), strval( $row->page_id ) ) . "\n"; 00603 if ( $row->page_is_redirect ) { 00604 $page = WikiPage::factory( $title ); 00605 $redirect = $page->getRedirectTarget(); 00606 if ( $redirect instanceof Title && $redirect->isValidRedirectTarget() ) { 00607 $out .= ' '; 00608 $out .= Xml::element( 'redirect', array( 'title' => self::canonicalTitle( $redirect ) ) ); 00609 $out .= "\n"; 00610 } 00611 } 00612 00613 if ( $row->page_restrictions != '' ) { 00614 $out .= ' ' . Xml::element( 'restrictions', array(), 00615 strval( $row->page_restrictions ) ) . "\n"; 00616 } 00617 00618 wfRunHooks( 'XmlDumpWriterOpenPage', array( $this, &$out, $row, $title ) ); 00619 00620 return $out; 00621 } 00622 00629 function closePage() { 00630 return " </page>\n"; 00631 } 00632 00641 function writeRevision( $row ) { 00642 wfProfileIn( __METHOD__ ); 00643 00644 $out = " <revision>\n"; 00645 $out .= " " . Xml::element( 'id', null, strval( $row->rev_id ) ) . "\n"; 00646 if ( isset( $row->rev_parent_id ) && $row->rev_parent_id ) { 00647 $out .= " " . Xml::element( 'parentid', null, strval( $row->rev_parent_id ) ) . "\n"; 00648 } 00649 00650 $out .= $this->writeTimestamp( $row->rev_timestamp ); 00651 00652 if ( isset( $row->rev_deleted ) && ( $row->rev_deleted & Revision::DELETED_USER ) ) { 00653 $out .= " " . Xml::element( 'contributor', array( 'deleted' => 'deleted' ) ) . "\n"; 00654 } else { 00655 $out .= $this->writeContributor( $row->rev_user, $row->rev_user_text ); 00656 } 00657 00658 if ( isset( $row->rev_minor_edit ) && $row->rev_minor_edit ) { 00659 $out .= " <minor/>\n"; 00660 } 00661 if ( isset( $row->rev_deleted ) && ( $row->rev_deleted & Revision::DELETED_COMMENT ) ) { 00662 $out .= " " . Xml::element( 'comment', array( 'deleted' => 'deleted' ) ) . "\n"; 00663 } elseif ( $row->rev_comment != '' ) { 00664 $out .= " " . Xml::elementClean( 'comment', array(), strval( $row->rev_comment ) ) . "\n"; 00665 } 00666 00667 $text = ''; 00668 if ( isset( $row->rev_deleted ) && ( $row->rev_deleted & Revision::DELETED_TEXT ) ) { 00669 $out .= " " . Xml::element( 'text', array( 'deleted' => 'deleted' ) ) . "\n"; 00670 } elseif ( isset( $row->old_text ) ) { 00671 // Raw text from the database may have invalid chars 00672 $text = strval( Revision::getRevisionText( $row ) ); 00673 $out .= " " . Xml::elementClean( 'text', 00674 array( 'xml:space' => 'preserve', 'bytes' => intval( $row->rev_len ) ), 00675 strval( $text ) ) . "\n"; 00676 } else { 00677 // Stub output 00678 $out .= " " . Xml::element( 'text', 00679 array( 'id' => $row->rev_text_id, 'bytes' => intval( $row->rev_len ) ), 00680 "" ) . "\n"; 00681 } 00682 00683 if ( isset( $row->rev_sha1 ) 00684 && $row->rev_sha1 00685 && !( $row->rev_deleted & Revision::DELETED_TEXT ) 00686 ) { 00687 $out .= " " . Xml::element( 'sha1', null, strval( $row->rev_sha1 ) ) . "\n"; 00688 } else { 00689 $out .= " <sha1/>\n"; 00690 } 00691 00692 if ( isset( $row->rev_content_model ) && !is_null( $row->rev_content_model ) ) { 00693 $content_model = strval( $row->rev_content_model ); 00694 } else { 00695 // probably using $wgContentHandlerUseDB = false; 00696 // @todo test! 00697 $title = Title::makeTitle( $row->page_namespace, $row->page_title ); 00698 $content_model = ContentHandler::getDefaultModelFor( $title ); 00699 } 00700 00701 $out .= " " . Xml::element( 'model', null, strval( $content_model ) ) . "\n"; 00702 00703 if ( isset( $row->rev_content_format ) && !is_null( $row->rev_content_format ) ) { 00704 $content_format = strval( $row->rev_content_format ); 00705 } else { 00706 // probably using $wgContentHandlerUseDB = false; 00707 // @todo test! 00708 $content_handler = ContentHandler::getForModelID( $content_model ); 00709 $content_format = $content_handler->getDefaultFormat(); 00710 } 00711 00712 $out .= " " . Xml::element( 'format', null, strval( $content_format ) ) . "\n"; 00713 00714 wfRunHooks( 'XmlDumpWriterWriteRevision', array( &$this, &$out, $row, $text ) ); 00715 00716 $out .= " </revision>\n"; 00717 00718 wfProfileOut( __METHOD__ ); 00719 return $out; 00720 } 00721 00730 function writeLogItem( $row ) { 00731 wfProfileIn( __METHOD__ ); 00732 00733 $out = " <logitem>\n"; 00734 $out .= " " . Xml::element( 'id', null, strval( $row->log_id ) ) . "\n"; 00735 00736 $out .= $this->writeTimestamp( $row->log_timestamp, " " ); 00737 00738 if ( $row->log_deleted & LogPage::DELETED_USER ) { 00739 $out .= " " . Xml::element( 'contributor', array( 'deleted' => 'deleted' ) ) . "\n"; 00740 } else { 00741 $out .= $this->writeContributor( $row->log_user, $row->user_name, " " ); 00742 } 00743 00744 if ( $row->log_deleted & LogPage::DELETED_COMMENT ) { 00745 $out .= " " . Xml::element( 'comment', array( 'deleted' => 'deleted' ) ) . "\n"; 00746 } elseif ( $row->log_comment != '' ) { 00747 $out .= " " . Xml::elementClean( 'comment', null, strval( $row->log_comment ) ) . "\n"; 00748 } 00749 00750 $out .= " " . Xml::element( 'type', null, strval( $row->log_type ) ) . "\n"; 00751 $out .= " " . Xml::element( 'action', null, strval( $row->log_action ) ) . "\n"; 00752 00753 if ( $row->log_deleted & LogPage::DELETED_ACTION ) { 00754 $out .= " " . Xml::element( 'text', array( 'deleted' => 'deleted' ) ) . "\n"; 00755 } else { 00756 $title = Title::makeTitle( $row->log_namespace, $row->log_title ); 00757 $out .= " " . Xml::elementClean( 'logtitle', null, self::canonicalTitle( $title ) ) . "\n"; 00758 $out .= " " . Xml::elementClean( 'params', 00759 array( 'xml:space' => 'preserve' ), 00760 strval( $row->log_params ) ) . "\n"; 00761 } 00762 00763 $out .= " </logitem>\n"; 00764 00765 wfProfileOut( __METHOD__ ); 00766 return $out; 00767 } 00768 00774 function writeTimestamp( $timestamp, $indent = " " ) { 00775 $ts = wfTimestamp( TS_ISO_8601, $timestamp ); 00776 return $indent . Xml::element( 'timestamp', null, $ts ) . "\n"; 00777 } 00778 00785 function writeContributor( $id, $text, $indent = " " ) { 00786 $out = $indent . "<contributor>\n"; 00787 if ( $id || !IP::isValid( $text ) ) { 00788 $out .= $indent . " " . Xml::elementClean( 'username', null, strval( $text ) ) . "\n"; 00789 $out .= $indent . " " . Xml::element( 'id', null, strval( $id ) ) . "\n"; 00790 } else { 00791 $out .= $indent . " " . Xml::elementClean( 'ip', null, strval( $text ) ) . "\n"; 00792 } 00793 $out .= $indent . "</contributor>\n"; 00794 return $out; 00795 } 00796 00803 function writeUploads( $row, $dumpContents = false ) { 00804 if ( $row->page_namespace == NS_FILE ) { 00805 $img = wfLocalFile( $row->page_title ); 00806 if ( $img && $img->exists() ) { 00807 $out = ''; 00808 foreach ( array_reverse( $img->getHistory() ) as $ver ) { 00809 $out .= $this->writeUpload( $ver, $dumpContents ); 00810 } 00811 $out .= $this->writeUpload( $img, $dumpContents ); 00812 return $out; 00813 } 00814 } 00815 return ''; 00816 } 00817 00823 function writeUpload( $file, $dumpContents = false ) { 00824 if ( $file->isOld() ) { 00825 $archiveName = " " . 00826 Xml::element( 'archivename', null, $file->getArchiveName() ) . "\n"; 00827 } else { 00828 $archiveName = ''; 00829 } 00830 if ( $dumpContents ) { 00831 $be = $file->getRepo()->getBackend(); 00832 # Dump file as base64 00833 # Uses only XML-safe characters, so does not need escaping 00834 # @todo Too bad this loads the contents into memory (script might swap) 00835 $contents = ' <contents encoding="base64">' . 00836 chunk_split( base64_encode( 00837 $be->getFileContents( array( 'src' => $file->getPath() ) ) ) ) . 00838 " </contents>\n"; 00839 } else { 00840 $contents = ''; 00841 } 00842 if ( $file->isDeleted( File::DELETED_COMMENT ) ) { 00843 $comment = Xml::element( 'comment', array( 'deleted' => 'deleted' ) ); 00844 } else { 00845 $comment = Xml::elementClean( 'comment', null, $file->getDescription() ); 00846 } 00847 return " <upload>\n" . 00848 $this->writeTimestamp( $file->getTimestamp() ) . 00849 $this->writeContributor( $file->getUser( 'id' ), $file->getUser( 'text' ) ) . 00850 " " . $comment . "\n" . 00851 " " . Xml::element( 'filename', null, $file->getName() ) . "\n" . 00852 $archiveName . 00853 " " . Xml::element( 'src', null, $file->getCanonicalURL() ) . "\n" . 00854 " " . Xml::element( 'size', null, $file->getSize() ) . "\n" . 00855 " " . Xml::element( 'sha1base36', null, $file->getSha1() ) . "\n" . 00856 " " . Xml::element( 'rel', null, $file->getRel() ) . "\n" . 00857 $contents . 00858 " </upload>\n"; 00859 } 00860 00871 public static function canonicalTitle( Title $title ) { 00872 if ( $title->isExternal() ) { 00873 return $title->getPrefixedText(); 00874 } 00875 00876 global $wgContLang; 00877 $prefix = str_replace( '_', ' ', $wgContLang->getNsText( $title->getNamespace() ) ); 00878 00879 if ( $prefix !== '' ) { 00880 $prefix .= ':'; 00881 } 00882 00883 return $prefix . $title->getText(); 00884 } 00885 } 00886 00891 class DumpOutput { 00892 00896 function writeOpenStream( $string ) { 00897 $this->write( $string ); 00898 } 00899 00903 function writeCloseStream( $string ) { 00904 $this->write( $string ); 00905 } 00906 00911 function writeOpenPage( $page, $string ) { 00912 $this->write( $string ); 00913 } 00914 00918 function writeClosePage( $string ) { 00919 $this->write( $string ); 00920 } 00921 00926 function writeRevision( $rev, $string ) { 00927 $this->write( $string ); 00928 } 00929 00934 function writeLogItem( $rev, $string ) { 00935 $this->write( $string ); 00936 } 00937 00943 function write( $string ) { 00944 print $string; 00945 } 00946 00954 function closeRenameAndReopen( $newname ) { 00955 } 00956 00965 function closeAndRename( $newname, $open = false ) { 00966 } 00967 00973 function getFilenames() { 00974 return null; 00975 } 00976 } 00977 00982 class DumpFileOutput extends DumpOutput { 00983 protected $handle = false, $filename; 00984 00988 function __construct( $file ) { 00989 $this->handle = fopen( $file, "wt" ); 00990 $this->filename = $file; 00991 } 00992 00996 function writeCloseStream( $string ) { 00997 parent::writeCloseStream( $string ); 00998 if ( $this->handle ) { 00999 fclose( $this->handle ); 01000 $this->handle = false; 01001 } 01002 } 01003 01007 function write( $string ) { 01008 fputs( $this->handle, $string ); 01009 } 01010 01014 function closeRenameAndReopen( $newname ) { 01015 $this->closeAndRename( $newname, true ); 01016 } 01017 01022 function renameOrException( $newname ) { 01023 if ( !rename( $this->filename, $newname ) ) { 01024 throw new MWException( __METHOD__ . ": rename of file {$this->filename} to $newname failed\n" ); 01025 } 01026 } 01027 01033 function checkRenameArgCount( $newname ) { 01034 if ( is_array( $newname ) ) { 01035 if ( count( $newname ) > 1 ) { 01036 throw new MWException( __METHOD__ . ": passed multiple arguments for rename of single file\n" ); 01037 } else { 01038 $newname = $newname[0]; 01039 } 01040 } 01041 return $newname; 01042 } 01043 01048 function closeAndRename( $newname, $open = false ) { 01049 $newname = $this->checkRenameArgCount( $newname ); 01050 if ( $newname ) { 01051 if ( $this->handle ) { 01052 fclose( $this->handle ); 01053 $this->handle = false; 01054 } 01055 $this->renameOrException( $newname ); 01056 if ( $open ) { 01057 $this->handle = fopen( $this->filename, "wt" ); 01058 } 01059 } 01060 } 01061 01065 function getFilenames() { 01066 return $this->filename; 01067 } 01068 } 01069 01076 class DumpPipeOutput extends DumpFileOutput { 01077 protected $command, $filename; 01078 protected $procOpenResource = false; 01079 01084 function __construct( $command, $file = null ) { 01085 if ( !is_null( $file ) ) { 01086 $command .= " > " . wfEscapeShellArg( $file ); 01087 } 01088 01089 $this->startCommand( $command ); 01090 $this->command = $command; 01091 $this->filename = $file; 01092 } 01093 01097 function writeCloseStream( $string ) { 01098 parent::writeCloseStream( $string ); 01099 if ( $this->procOpenResource ) { 01100 proc_close( $this->procOpenResource ); 01101 $this->procOpenResource = false; 01102 } 01103 } 01104 01108 function startCommand( $command ) { 01109 $spec = array( 01110 0 => array( "pipe", "r" ), 01111 ); 01112 $pipes = array(); 01113 $this->procOpenResource = proc_open( $command, $spec, $pipes ); 01114 $this->handle = $pipes[0]; 01115 } 01116 01120 function closeRenameAndReopen( $newname ) { 01121 $this->closeAndRename( $newname, true ); 01122 } 01123 01128 function closeAndRename( $newname, $open = false ) { 01129 $newname = $this->checkRenameArgCount( $newname ); 01130 if ( $newname ) { 01131 if ( $this->handle ) { 01132 fclose( $this->handle ); 01133 $this->handle = false; 01134 } 01135 if ( $this->procOpenResource ) { 01136 proc_close( $this->procOpenResource ); 01137 $this->procOpenResource = false; 01138 } 01139 $this->renameOrException( $newname ); 01140 if ( $open ) { 01141 $command = $this->command; 01142 $command .= " > " . wfEscapeShellArg( $this->filename ); 01143 $this->startCommand( $command ); 01144 } 01145 } 01146 } 01147 01148 } 01149 01154 class DumpGZipOutput extends DumpPipeOutput { 01155 01159 function __construct( $file ) { 01160 parent::__construct( "gzip", $file ); 01161 } 01162 } 01163 01168 class DumpBZip2Output extends DumpPipeOutput { 01169 01173 function __construct( $file ) { 01174 parent::__construct( "bzip2", $file ); 01175 } 01176 } 01177 01182 class Dump7ZipOutput extends DumpPipeOutput { 01183 01187 function __construct( $file ) { 01188 $command = $this->setup7zCommand( $file ); 01189 parent::__construct( $command ); 01190 $this->filename = $file; 01191 } 01192 01197 function setup7zCommand( $file ) { 01198 $command = "7za a -bd -si " . wfEscapeShellArg( $file ); 01199 // Suppress annoying useless crap from p7zip 01200 // Unfortunately this could suppress real error messages too 01201 $command .= ' >' . wfGetNull() . ' 2>&1'; 01202 return $command; 01203 } 01204 01209 function closeAndRename( $newname, $open = false ) { 01210 $newname = $this->checkRenameArgCount( $newname ); 01211 if ( $newname ) { 01212 fclose( $this->handle ); 01213 proc_close( $this->procOpenResource ); 01214 $this->renameOrException( $newname ); 01215 if ( $open ) { 01216 $command = $this->setup7zCommand( $this->filename ); 01217 $this->startCommand( $command ); 01218 } 01219 } 01220 } 01221 } 01222 01229 class DumpFilter { 01230 01236 public $sink; 01237 01241 protected $sendingThisPage; 01242 01246 function __construct( &$sink ) { 01247 $this->sink =& $sink; 01248 } 01249 01253 function writeOpenStream( $string ) { 01254 $this->sink->writeOpenStream( $string ); 01255 } 01256 01260 function writeCloseStream( $string ) { 01261 $this->sink->writeCloseStream( $string ); 01262 } 01263 01268 function writeOpenPage( $page, $string ) { 01269 $this->sendingThisPage = $this->pass( $page, $string ); 01270 if ( $this->sendingThisPage ) { 01271 $this->sink->writeOpenPage( $page, $string ); 01272 } 01273 } 01274 01278 function writeClosePage( $string ) { 01279 if ( $this->sendingThisPage ) { 01280 $this->sink->writeClosePage( $string ); 01281 $this->sendingThisPage = false; 01282 } 01283 } 01284 01289 function writeRevision( $rev, $string ) { 01290 if ( $this->sendingThisPage ) { 01291 $this->sink->writeRevision( $rev, $string ); 01292 } 01293 } 01294 01299 function writeLogItem( $rev, $string ) { 01300 $this->sink->writeRevision( $rev, $string ); 01301 } 01302 01306 function closeRenameAndReopen( $newname ) { 01307 $this->sink->closeRenameAndReopen( $newname ); 01308 } 01309 01314 function closeAndRename( $newname, $open = false ) { 01315 $this->sink->closeAndRename( $newname, $open ); 01316 } 01317 01321 function getFilenames() { 01322 return $this->sink->getFilenames(); 01323 } 01324 01330 function pass( $page ) { 01331 return true; 01332 } 01333 } 01334 01339 class DumpNotalkFilter extends DumpFilter { 01340 01345 function pass( $page ) { 01346 return !MWNamespace::isTalk( $page->page_namespace ); 01347 } 01348 } 01349 01354 class DumpNamespaceFilter extends DumpFilter { 01355 var $invert = false; 01356 var $namespaces = array(); 01357 01363 function __construct( &$sink, $param ) { 01364 parent::__construct( $sink ); 01365 01366 $constants = array( 01367 "NS_MAIN" => NS_MAIN, 01368 "NS_TALK" => NS_TALK, 01369 "NS_USER" => NS_USER, 01370 "NS_USER_TALK" => NS_USER_TALK, 01371 "NS_PROJECT" => NS_PROJECT, 01372 "NS_PROJECT_TALK" => NS_PROJECT_TALK, 01373 "NS_FILE" => NS_FILE, 01374 "NS_FILE_TALK" => NS_FILE_TALK, 01375 "NS_IMAGE" => NS_IMAGE, // NS_IMAGE is an alias for NS_FILE 01376 "NS_IMAGE_TALK" => NS_IMAGE_TALK, 01377 "NS_MEDIAWIKI" => NS_MEDIAWIKI, 01378 "NS_MEDIAWIKI_TALK" => NS_MEDIAWIKI_TALK, 01379 "NS_TEMPLATE" => NS_TEMPLATE, 01380 "NS_TEMPLATE_TALK" => NS_TEMPLATE_TALK, 01381 "NS_HELP" => NS_HELP, 01382 "NS_HELP_TALK" => NS_HELP_TALK, 01383 "NS_CATEGORY" => NS_CATEGORY, 01384 "NS_CATEGORY_TALK" => NS_CATEGORY_TALK ); 01385 01386 if ( $param { 0 } == '!' ) { 01387 $this->invert = true; 01388 $param = substr( $param, 1 ); 01389 } 01390 01391 foreach ( explode( ',', $param ) as $key ) { 01392 $key = trim( $key ); 01393 if ( isset( $constants[$key] ) ) { 01394 $ns = $constants[$key]; 01395 $this->namespaces[$ns] = true; 01396 } elseif ( is_numeric( $key ) ) { 01397 $ns = intval( $key ); 01398 $this->namespaces[$ns] = true; 01399 } else { 01400 throw new MWException( "Unrecognized namespace key '$key'\n" ); 01401 } 01402 } 01403 } 01404 01409 function pass( $page ) { 01410 $match = isset( $this->namespaces[$page->page_namespace] ); 01411 return $this->invert xor $match; 01412 } 01413 } 01414 01419 class DumpLatestFilter extends DumpFilter { 01420 var $page, $pageString, $rev, $revString; 01421 01426 function writeOpenPage( $page, $string ) { 01427 $this->page = $page; 01428 $this->pageString = $string; 01429 } 01430 01434 function writeClosePage( $string ) { 01435 if ( $this->rev ) { 01436 $this->sink->writeOpenPage( $this->page, $this->pageString ); 01437 $this->sink->writeRevision( $this->rev, $this->revString ); 01438 $this->sink->writeClosePage( $string ); 01439 } 01440 $this->rev = null; 01441 $this->revString = null; 01442 $this->page = null; 01443 $this->pageString = null; 01444 } 01445 01450 function writeRevision( $rev, $string ) { 01451 if ( $rev->rev_id == $this->page->page_latest ) { 01452 $this->rev = $rev; 01453 $this->revString = $string; 01454 } 01455 } 01456 } 01457 01462 class DumpMultiWriter { 01463 01467 function __construct( $sinks ) { 01468 $this->sinks = $sinks; 01469 $this->count = count( $sinks ); 01470 } 01471 01475 function writeOpenStream( $string ) { 01476 for ( $i = 0; $i < $this->count; $i++ ) { 01477 $this->sinks[$i]->writeOpenStream( $string ); 01478 } 01479 } 01480 01484 function writeCloseStream( $string ) { 01485 for ( $i = 0; $i < $this->count; $i++ ) { 01486 $this->sinks[$i]->writeCloseStream( $string ); 01487 } 01488 } 01489 01494 function writeOpenPage( $page, $string ) { 01495 for ( $i = 0; $i < $this->count; $i++ ) { 01496 $this->sinks[$i]->writeOpenPage( $page, $string ); 01497 } 01498 } 01499 01503 function writeClosePage( $string ) { 01504 for ( $i = 0; $i < $this->count; $i++ ) { 01505 $this->sinks[$i]->writeClosePage( $string ); 01506 } 01507 } 01508 01513 function writeRevision( $rev, $string ) { 01514 for ( $i = 0; $i < $this->count; $i++ ) { 01515 $this->sinks[$i]->writeRevision( $rev, $string ); 01516 } 01517 } 01518 01522 function closeRenameAndReopen( $newnames ) { 01523 $this->closeAndRename( $newnames, true ); 01524 } 01525 01530 function closeAndRename( $newnames, $open = false ) { 01531 for ( $i = 0; $i < $this->count; $i++ ) { 01532 $this->sinks[$i]->closeAndRename( $newnames[$i], $open ); 01533 } 01534 } 01535 01539 function getFilenames() { 01540 $filenames = array(); 01541 for ( $i = 0; $i < $this->count; $i++ ) { 01542 $filenames[] = $this->sinks[$i]->getFilenames(); 01543 } 01544 return $filenames; 01545 } 01546 01547 } 01548 01553 function xmlsafe( $string ) { 01554 wfProfileIn( __FUNCTION__ ); 01555 01561 $string = UtfNormal::cleanUp( $string ); 01562 01563 $string = htmlspecialchars( $string ); 01564 wfProfileOut( __FUNCTION__ ); 01565 return $string; 01566 }