MediaWiki
REL1_20
|
00001 <?php 00033 class WikiExporter { 00034 var $list_authors = false ; # Return distinct author list (when not returning full history) 00035 var $author_list = "" ; 00036 00037 var $dumpUploads = false; 00038 var $dumpUploadFileContents = false; 00039 00040 const FULL = 1; 00041 const CURRENT = 2; 00042 const STABLE = 4; // extension defined 00043 const LOGS = 8; 00044 const RANGE = 16; 00045 00046 const BUFFER = 0; 00047 const STREAM = 1; 00048 00049 const TEXT = 0; 00050 const STUB = 1; 00051 00052 var $buffer; 00053 00054 var $text; 00055 00059 var $sink; 00060 00065 public static function schemaVersion() { 00066 return "0.7"; 00067 } 00068 00086 function __construct( &$db, $history = WikiExporter::CURRENT, 00087 $buffer = WikiExporter::BUFFER, $text = WikiExporter::TEXT ) { 00088 $this->db =& $db; 00089 $this->history = $history; 00090 $this->buffer = $buffer; 00091 $this->writer = new XmlDumpWriter(); 00092 $this->sink = new DumpOutput(); 00093 $this->text = $text; 00094 } 00095 00103 public function setOutputSink( &$sink ) { 00104 $this->sink =& $sink; 00105 } 00106 00107 public function openStream() { 00108 $output = $this->writer->openStream(); 00109 $this->sink->writeOpenStream( $output ); 00110 } 00111 00112 public function closeStream() { 00113 $output = $this->writer->closeStream(); 00114 $this->sink->writeCloseStream( $output ); 00115 } 00116 00122 public function allPages() { 00123 $this->dumpFrom( '' ); 00124 } 00125 00133 public function pagesByRange( $start, $end ) { 00134 $condition = 'page_id >= ' . intval( $start ); 00135 if ( $end ) { 00136 $condition .= ' AND page_id < ' . intval( $end ); 00137 } 00138 $this->dumpFrom( $condition ); 00139 } 00140 00148 public function revsByRange( $start, $end ) { 00149 $condition = 'rev_id >= ' . intval( $start ); 00150 if ( $end ) { 00151 $condition .= ' AND rev_id < ' . intval( $end ); 00152 } 00153 $this->dumpFrom( $condition ); 00154 } 00155 00159 public function pageByTitle( $title ) { 00160 $this->dumpFrom( 00161 'page_namespace=' . $title->getNamespace() . 00162 ' AND page_title=' . $this->db->addQuotes( $title->getDBkey() ) ); 00163 } 00164 00169 public function pageByName( $name ) { 00170 $title = Title::newFromText( $name ); 00171 if ( is_null( $title ) ) { 00172 throw new MWException( "Can't export invalid title" ); 00173 } else { 00174 $this->pageByTitle( $title ); 00175 } 00176 } 00177 00181 public function pagesByName( $names ) { 00182 foreach ( $names as $name ) { 00183 $this->pageByName( $name ); 00184 } 00185 } 00186 00187 public function allLogs() { 00188 $this->dumpFrom( '' ); 00189 } 00190 00195 public function logsByRange( $start, $end ) { 00196 $condition = 'log_id >= ' . intval( $start ); 00197 if ( $end ) { 00198 $condition .= ' AND log_id < ' . intval( $end ); 00199 } 00200 $this->dumpFrom( $condition ); 00201 } 00202 00210 protected function do_list_authors( $cond ) { 00211 wfProfileIn( __METHOD__ ); 00212 $this->author_list = "<contributors>"; 00213 // rev_deleted 00214 00215 $res = $this->db->select( 00216 array( 'page', 'revision' ), 00217 array( 'DISTINCT rev_user_text', 'rev_user' ), 00218 array( 00219 $this->db->bitAnd( 'rev_deleted', Revision::DELETED_USER ) . ' = 0', 00220 $cond, 00221 'page_id = rev_id', 00222 ), 00223 __METHOD__ 00224 ); 00225 00226 foreach ( $res as $row ) { 00227 $this->author_list .= "<contributor>" . 00228 "<username>" . 00229 htmlentities( $row->rev_user_text ) . 00230 "</username>" . 00231 "<id>" . 00232 $row->rev_user . 00233 "</id>" . 00234 "</contributor>"; 00235 } 00236 $this->author_list .= "</contributors>"; 00237 wfProfileOut( __METHOD__ ); 00238 } 00239 00245 protected function dumpFrom( $cond = '' ) { 00246 wfProfileIn( __METHOD__ ); 00247 # For logging dumps... 00248 if ( $this->history & self::LOGS ) { 00249 $where = array( 'user_id = log_user' ); 00250 # Hide private logs 00251 $hideLogs = LogEventsList::getExcludeClause( $this->db ); 00252 if ( $hideLogs ) $where[] = $hideLogs; 00253 # Add on any caller specified conditions 00254 if ( $cond ) $where[] = $cond; 00255 # Get logging table name for logging.* clause 00256 $logging = $this->db->tableName( 'logging' ); 00257 00258 if ( $this->buffer == WikiExporter::STREAM ) { 00259 $prev = $this->db->bufferResults( false ); 00260 } 00261 $wrapper = null; // Assuring $wrapper is not undefined, if exception occurs early 00262 try { 00263 $result = $this->db->select( array( 'logging', 'user' ), 00264 array( "{$logging}.*", 'user_name' ), // grab the user name 00265 $where, 00266 __METHOD__, 00267 array( 'ORDER BY' => 'log_id', 'USE INDEX' => array( 'logging' => 'PRIMARY' ) ) 00268 ); 00269 $wrapper = $this->db->resultObject( $result ); 00270 $this->outputLogStream( $wrapper ); 00271 if ( $this->buffer == WikiExporter::STREAM ) { 00272 $this->db->bufferResults( $prev ); 00273 } 00274 } catch ( Exception $e ) { 00275 // Throwing the exception does not reliably free the resultset, and 00276 // would also leave the connection in unbuffered mode. 00277 00278 // Freeing result 00279 try { 00280 if ( $wrapper ) { 00281 $wrapper->free(); 00282 } 00283 } catch ( Exception $e2 ) { 00284 // Already in panic mode -> ignoring $e2 as $e has 00285 // higher priority 00286 } 00287 00288 // Putting database back in previous buffer mode 00289 try { 00290 if ( $this->buffer == WikiExporter::STREAM ) { 00291 $this->db->bufferResults( $prev ); 00292 } 00293 } catch ( Exception $e2 ) { 00294 // Already in panic mode -> ignoring $e2 as $e has 00295 // higher priority 00296 } 00297 00298 // Inform caller about problem 00299 throw $e; 00300 } 00301 # For page dumps... 00302 } else { 00303 $tables = array( 'page', 'revision' ); 00304 $opts = array( 'ORDER BY' => 'page_id ASC' ); 00305 $opts['USE INDEX'] = array(); 00306 $join = array(); 00307 if ( is_array( $this->history ) ) { 00308 # Time offset/limit for all pages/history... 00309 $revJoin = 'page_id=rev_page'; 00310 # Set time order 00311 if ( $this->history['dir'] == 'asc' ) { 00312 $op = '>'; 00313 $opts['ORDER BY'] = 'rev_timestamp ASC'; 00314 } else { 00315 $op = '<'; 00316 $opts['ORDER BY'] = 'rev_timestamp DESC'; 00317 } 00318 # Set offset 00319 if ( !empty( $this->history['offset'] ) ) { 00320 $revJoin .= " AND rev_timestamp $op " . 00321 $this->db->addQuotes( $this->db->timestamp( $this->history['offset'] ) ); 00322 } 00323 $join['revision'] = array( 'INNER JOIN', $revJoin ); 00324 # Set query limit 00325 if ( !empty( $this->history['limit'] ) ) { 00326 $opts['LIMIT'] = intval( $this->history['limit'] ); 00327 } 00328 } elseif ( $this->history & WikiExporter::FULL ) { 00329 # Full history dumps... 00330 $join['revision'] = array( 'INNER JOIN', 'page_id=rev_page' ); 00331 } elseif ( $this->history & WikiExporter::CURRENT ) { 00332 # Latest revision dumps... 00333 if ( $this->list_authors && $cond != '' ) { // List authors, if so desired 00334 $this->do_list_authors( $cond ); 00335 } 00336 $join['revision'] = array( 'INNER JOIN', 'page_id=rev_page AND page_latest=rev_id' ); 00337 } elseif ( $this->history & WikiExporter::STABLE ) { 00338 # "Stable" revision dumps... 00339 # Default JOIN, to be overridden... 00340 $join['revision'] = array( 'INNER JOIN', 'page_id=rev_page AND page_latest=rev_id' ); 00341 # One, and only one hook should set this, and return false 00342 if ( wfRunHooks( 'WikiExporter::dumpStableQuery', array( &$tables, &$opts, &$join ) ) ) { 00343 wfProfileOut( __METHOD__ ); 00344 throw new MWException( __METHOD__ . " given invalid history dump type." ); 00345 } 00346 } elseif ( $this->history & WikiExporter::RANGE ) { 00347 # Dump of revisions within a specified range 00348 $join['revision'] = array( 'INNER JOIN', 'page_id=rev_page' ); 00349 $opts['ORDER BY'] = array( 'rev_page ASC', 'rev_id ASC' ); 00350 } else { 00351 # Uknown history specification parameter? 00352 wfProfileOut( __METHOD__ ); 00353 throw new MWException( __METHOD__ . " given invalid history dump type." ); 00354 } 00355 # Query optimization hacks 00356 if ( $cond == '' ) { 00357 $opts[] = 'STRAIGHT_JOIN'; 00358 $opts['USE INDEX']['page'] = 'PRIMARY'; 00359 } 00360 # Build text join options 00361 if ( $this->text != WikiExporter::STUB ) { // 1-pass 00362 $tables[] = 'text'; 00363 $join['text'] = array( 'INNER JOIN', 'rev_text_id=old_id' ); 00364 } 00365 00366 if ( $this->buffer == WikiExporter::STREAM ) { 00367 $prev = $this->db->bufferResults( false ); 00368 } 00369 00370 $wrapper = null; // Assuring $wrapper is not undefined, if exception occurs early 00371 try { 00372 wfRunHooks( 'ModifyExportQuery', 00373 array( $this->db, &$tables, &$cond, &$opts, &$join ) ); 00374 00375 # Do the query! 00376 $result = $this->db->select( $tables, '*', $cond, __METHOD__, $opts, $join ); 00377 $wrapper = $this->db->resultObject( $result ); 00378 # Output dump results 00379 $this->outputPageStream( $wrapper ); 00380 00381 if ( $this->buffer == WikiExporter::STREAM ) { 00382 $this->db->bufferResults( $prev ); 00383 } 00384 } catch ( Exception $e ) { 00385 // Throwing the exception does not reliably free the resultset, and 00386 // would also leave the connection in unbuffered mode. 00387 00388 // Freeing result 00389 try { 00390 if ( $wrapper ) { 00391 $wrapper->free(); 00392 } 00393 } catch ( Exception $e2 ) { 00394 // Already in panic mode -> ignoring $e2 as $e has 00395 // higher priority 00396 } 00397 00398 // Putting database back in previous buffer mode 00399 try { 00400 if ( $this->buffer == WikiExporter::STREAM ) { 00401 $this->db->bufferResults( $prev ); 00402 } 00403 } catch ( Exception $e2 ) { 00404 // Already in panic mode -> ignoring $e2 as $e has 00405 // higher priority 00406 } 00407 00408 // Inform caller about problem 00409 throw $e; 00410 } 00411 } 00412 wfProfileOut( __METHOD__ ); 00413 } 00414 00427 protected function outputPageStream( $resultset ) { 00428 $last = null; 00429 foreach ( $resultset as $row ) { 00430 if ( is_null( $last ) || 00431 $last->page_namespace != $row->page_namespace || 00432 $last->page_title != $row->page_title ) { 00433 if ( isset( $last ) ) { 00434 $output = ''; 00435 if ( $this->dumpUploads ) { 00436 $output .= $this->writer->writeUploads( $last, $this->dumpUploadFileContents ); 00437 } 00438 $output .= $this->writer->closePage(); 00439 $this->sink->writeClosePage( $output ); 00440 } 00441 $output = $this->writer->openPage( $row ); 00442 $this->sink->writeOpenPage( $row, $output ); 00443 $last = $row; 00444 } 00445 $output = $this->writer->writeRevision( $row ); 00446 $this->sink->writeRevision( $row, $output ); 00447 } 00448 if ( isset( $last ) ) { 00449 $output = ''; 00450 if ( $this->dumpUploads ) { 00451 $output .= $this->writer->writeUploads( $last, $this->dumpUploadFileContents ); 00452 } 00453 $output .= $this->author_list; 00454 $output .= $this->writer->closePage(); 00455 $this->sink->writeClosePage( $output ); 00456 } 00457 } 00458 00462 protected function outputLogStream( $resultset ) { 00463 foreach ( $resultset as $row ) { 00464 $output = $this->writer->writeLogItem( $row ); 00465 $this->sink->writeLogItem( $row, $output ); 00466 } 00467 } 00468 } 00469 00473 class XmlDumpWriter { 00479 function schemaVersion() { 00480 wfDeprecated( __METHOD__, '1.20' ); 00481 return WikiExporter::schemaVersion(); 00482 } 00483 00494 function openStream() { 00495 global $wgLanguageCode; 00496 $ver = WikiExporter::schemaVersion(); 00497 return Xml::element( 'mediawiki', array( 00498 'xmlns' => "http://www.mediawiki.org/xml/export-$ver/", 00499 'xmlns:xsi' => "http://www.w3.org/2001/XMLSchema-instance", 00500 'xsi:schemaLocation' => "http://www.mediawiki.org/xml/export-$ver/ " . 00501 "http://www.mediawiki.org/xml/export-$ver.xsd", 00502 'version' => $ver, 00503 'xml:lang' => $wgLanguageCode ), 00504 null ) . 00505 "\n" . 00506 $this->siteInfo(); 00507 } 00508 00512 function siteInfo() { 00513 $info = array( 00514 $this->sitename(), 00515 $this->homelink(), 00516 $this->generator(), 00517 $this->caseSetting(), 00518 $this->namespaces() ); 00519 return " <siteinfo>\n " . 00520 implode( "\n ", $info ) . 00521 "\n </siteinfo>\n"; 00522 } 00523 00527 function sitename() { 00528 global $wgSitename; 00529 return Xml::element( 'sitename', array(), $wgSitename ); 00530 } 00531 00535 function generator() { 00536 global $wgVersion; 00537 return Xml::element( 'generator', array(), "MediaWiki $wgVersion" ); 00538 } 00539 00543 function homelink() { 00544 return Xml::element( 'base', array(), Title::newMainPage()->getCanonicalUrl() ); 00545 } 00546 00550 function caseSetting() { 00551 global $wgCapitalLinks; 00552 // "case-insensitive" option is reserved for future 00553 $sensitivity = $wgCapitalLinks ? 'first-letter' : 'case-sensitive'; 00554 return Xml::element( 'case', array(), $sensitivity ); 00555 } 00556 00560 function namespaces() { 00561 global $wgContLang; 00562 $spaces = "<namespaces>\n"; 00563 foreach ( $wgContLang->getFormattedNamespaces() as $ns => $title ) { 00564 $spaces .= ' ' . 00565 Xml::element( 'namespace', 00566 array( 'key' => $ns, 00567 'case' => MWNamespace::isCapitalized( $ns ) ? 'first-letter' : 'case-sensitive', 00568 ), $title ) . "\n"; 00569 } 00570 $spaces .= " </namespaces>"; 00571 return $spaces; 00572 } 00573 00580 function closeStream() { 00581 return "</mediawiki>\n"; 00582 } 00583 00592 function openPage( $row ) { 00593 $out = " <page>\n"; 00594 $title = Title::makeTitle( $row->page_namespace, $row->page_title ); 00595 $out .= ' ' . Xml::elementClean( 'title', array(), self::canonicalTitle( $title ) ) . "\n"; 00596 $out .= ' ' . Xml::element( 'ns', array(), strval( $row->page_namespace) ) . "\n"; 00597 $out .= ' ' . Xml::element( 'id', array(), strval( $row->page_id ) ) . "\n"; 00598 if ( $row->page_is_redirect ) { 00599 $page = WikiPage::factory( $title ); 00600 $redirect = $page->getRedirectTarget(); 00601 if ( $redirect instanceOf Title && $redirect->isValidRedirectTarget() ) { 00602 $out .= ' ' . Xml::element( 'redirect', array( 'title' => self::canonicalTitle( $redirect ) ) ) . "\n"; 00603 } 00604 } 00605 00606 if ( $row->page_restrictions != '' ) { 00607 $out .= ' ' . Xml::element( 'restrictions', array(), 00608 strval( $row->page_restrictions ) ) . "\n"; 00609 } 00610 00611 wfRunHooks( 'XmlDumpWriterOpenPage', array( $this, &$out, $row, $title ) ); 00612 00613 return $out; 00614 } 00615 00622 function closePage() { 00623 return " </page>\n"; 00624 } 00625 00634 function writeRevision( $row ) { 00635 wfProfileIn( __METHOD__ ); 00636 00637 $out = " <revision>\n"; 00638 $out .= " " . Xml::element( 'id', null, strval( $row->rev_id ) ) . "\n"; 00639 if( $row->rev_parent_id ) { 00640 $out .= " " . Xml::element( 'parentid', null, strval( $row->rev_parent_id ) ) . "\n"; 00641 } 00642 00643 $out .= $this->writeTimestamp( $row->rev_timestamp ); 00644 00645 if ( $row->rev_deleted & Revision::DELETED_USER ) { 00646 $out .= " " . Xml::element( 'contributor', array( 'deleted' => 'deleted' ) ) . "\n"; 00647 } else { 00648 $out .= $this->writeContributor( $row->rev_user, $row->rev_user_text ); 00649 } 00650 00651 if ( $row->rev_minor_edit ) { 00652 $out .= " <minor/>\n"; 00653 } 00654 if ( $row->rev_deleted & Revision::DELETED_COMMENT ) { 00655 $out .= " " . Xml::element( 'comment', array( 'deleted' => 'deleted' ) ) . "\n"; 00656 } elseif ( $row->rev_comment != '' ) { 00657 $out .= " " . Xml::elementClean( 'comment', array(), strval( $row->rev_comment ) ) . "\n"; 00658 } 00659 00660 if ( $row->rev_sha1 && !( $row->rev_deleted & Revision::DELETED_TEXT ) ) { 00661 $out .= " " . Xml::element('sha1', null, strval( $row->rev_sha1 ) ) . "\n"; 00662 } else { 00663 $out .= " <sha1/>\n"; 00664 } 00665 00666 $text = ''; 00667 if ( $row->rev_deleted & Revision::DELETED_TEXT ) { 00668 $out .= " " . Xml::element( 'text', array( 'deleted' => 'deleted' ) ) . "\n"; 00669 } elseif ( isset( $row->old_text ) ) { 00670 // Raw text from the database may have invalid chars 00671 $text = strval( Revision::getRevisionText( $row ) ); 00672 $out .= " " . Xml::elementClean( 'text', 00673 array( 'xml:space' => 'preserve', 'bytes' => intval( $row->rev_len ) ), 00674 strval( $text ) ) . "\n"; 00675 } else { 00676 // Stub output 00677 $out .= " " . Xml::element( 'text', 00678 array( 'id' => $row->rev_text_id, 'bytes' => intval( $row->rev_len ) ), 00679 "" ) . "\n"; 00680 } 00681 00682 wfRunHooks( 'XmlDumpWriterWriteRevision', array( &$this, &$out, $row, $text ) ); 00683 00684 $out .= " </revision>\n"; 00685 00686 wfProfileOut( __METHOD__ ); 00687 return $out; 00688 } 00689 00698 function writeLogItem( $row ) { 00699 wfProfileIn( __METHOD__ ); 00700 00701 $out = " <logitem>\n"; 00702 $out .= " " . Xml::element( 'id', null, strval( $row->log_id ) ) . "\n"; 00703 00704 $out .= $this->writeTimestamp( $row->log_timestamp, " " ); 00705 00706 if ( $row->log_deleted & LogPage::DELETED_USER ) { 00707 $out .= " " . Xml::element( 'contributor', array( 'deleted' => 'deleted' ) ) . "\n"; 00708 } else { 00709 $out .= $this->writeContributor( $row->log_user, $row->user_name, " " ); 00710 } 00711 00712 if ( $row->log_deleted & LogPage::DELETED_COMMENT ) { 00713 $out .= " " . Xml::element( 'comment', array( 'deleted' => 'deleted' ) ) . "\n"; 00714 } elseif ( $row->log_comment != '' ) { 00715 $out .= " " . Xml::elementClean( 'comment', null, strval( $row->log_comment ) ) . "\n"; 00716 } 00717 00718 $out .= " " . Xml::element( 'type', null, strval( $row->log_type ) ) . "\n"; 00719 $out .= " " . Xml::element( 'action', null, strval( $row->log_action ) ) . "\n"; 00720 00721 if ( $row->log_deleted & LogPage::DELETED_ACTION ) { 00722 $out .= " " . Xml::element( 'text', array( 'deleted' => 'deleted' ) ) . "\n"; 00723 } else { 00724 $title = Title::makeTitle( $row->log_namespace, $row->log_title ); 00725 $out .= " " . Xml::elementClean( 'logtitle', null, self::canonicalTitle( $title ) ) . "\n"; 00726 $out .= " " . Xml::elementClean( 'params', 00727 array( 'xml:space' => 'preserve' ), 00728 strval( $row->log_params ) ) . "\n"; 00729 } 00730 00731 $out .= " </logitem>\n"; 00732 00733 wfProfileOut( __METHOD__ ); 00734 return $out; 00735 } 00736 00742 function writeTimestamp( $timestamp, $indent = " " ) { 00743 $ts = wfTimestamp( TS_ISO_8601, $timestamp ); 00744 return $indent . Xml::element( 'timestamp', null, $ts ) . "\n"; 00745 } 00746 00753 function writeContributor( $id, $text, $indent = " " ) { 00754 $out = $indent . "<contributor>\n"; 00755 if ( $id || !IP::isValid( $text ) ) { 00756 $out .= $indent . " " . Xml::elementClean( 'username', null, strval( $text ) ) . "\n"; 00757 $out .= $indent . " " . Xml::element( 'id', null, strval( $id ) ) . "\n"; 00758 } else { 00759 $out .= $indent . " " . Xml::elementClean( 'ip', null, strval( $text ) ) . "\n"; 00760 } 00761 $out .= $indent . "</contributor>\n"; 00762 return $out; 00763 } 00764 00771 function writeUploads( $row, $dumpContents = false ) { 00772 if ( $row->page_namespace == NS_FILE ) { 00773 $img = wfLocalFile( $row->page_title ); 00774 if ( $img && $img->exists() ) { 00775 $out = ''; 00776 foreach ( array_reverse( $img->getHistory() ) as $ver ) { 00777 $out .= $this->writeUpload( $ver, $dumpContents ); 00778 } 00779 $out .= $this->writeUpload( $img, $dumpContents ); 00780 return $out; 00781 } 00782 } 00783 return ''; 00784 } 00785 00791 function writeUpload( $file, $dumpContents = false ) { 00792 if ( $file->isOld() ) { 00793 $archiveName = " " . 00794 Xml::element( 'archivename', null, $file->getArchiveName() ) . "\n"; 00795 } else { 00796 $archiveName = ''; 00797 } 00798 if ( $dumpContents ) { 00799 # Dump file as base64 00800 # Uses only XML-safe characters, so does not need escaping 00801 $contents = ' <contents encoding="base64">' . 00802 chunk_split( base64_encode( file_get_contents( $file->getPath() ) ) ) . 00803 " </contents>\n"; 00804 } else { 00805 $contents = ''; 00806 } 00807 if ( $file->isDeleted( File::DELETED_COMMENT ) ) { 00808 $comment = Xml::element( 'comment', array( 'deleted' => 'deleted' ) ); 00809 } else { 00810 $comment = Xml::elementClean( 'comment', null, $file->getDescription() ); 00811 } 00812 return " <upload>\n" . 00813 $this->writeTimestamp( $file->getTimestamp() ) . 00814 $this->writeContributor( $file->getUser( 'id' ), $file->getUser( 'text' ) ) . 00815 " " . $comment . "\n" . 00816 " " . Xml::element( 'filename', null, $file->getName() ) . "\n" . 00817 $archiveName . 00818 " " . Xml::element( 'src', null, $file->getCanonicalUrl() ) . "\n" . 00819 " " . Xml::element( 'size', null, $file->getSize() ) . "\n" . 00820 " " . Xml::element( 'sha1base36', null, $file->getSha1() ) . "\n" . 00821 " " . Xml::element( 'rel', null, $file->getRel() ) . "\n" . 00822 $contents . 00823 " </upload>\n"; 00824 } 00825 00836 public static function canonicalTitle( Title $title ) { 00837 if ( $title->getInterwiki() ) { 00838 return $title->getPrefixedText(); 00839 } 00840 00841 global $wgContLang; 00842 $prefix = str_replace( '_', ' ', $wgContLang->getNsText( $title->getNamespace() ) ); 00843 00844 if ( $prefix !== '' ) { 00845 $prefix .= ':'; 00846 } 00847 00848 return $prefix . $title->getText(); 00849 } 00850 } 00851 00852 00857 class DumpOutput { 00858 00862 function writeOpenStream( $string ) { 00863 $this->write( $string ); 00864 } 00865 00869 function writeCloseStream( $string ) { 00870 $this->write( $string ); 00871 } 00872 00877 function writeOpenPage( $page, $string ) { 00878 $this->write( $string ); 00879 } 00880 00884 function writeClosePage( $string ) { 00885 $this->write( $string ); 00886 } 00887 00892 function writeRevision( $rev, $string ) { 00893 $this->write( $string ); 00894 } 00895 00900 function writeLogItem( $rev, $string ) { 00901 $this->write( $string ); 00902 } 00903 00909 function write( $string ) { 00910 print $string; 00911 } 00912 00920 function closeRenameAndReopen( $newname ) { 00921 return; 00922 } 00923 00931 function closeAndRename( $newname, $open = false ) { 00932 return; 00933 } 00934 00940 function getFilenames() { 00941 return NULL; 00942 } 00943 } 00944 00949 class DumpFileOutput extends DumpOutput { 00950 protected $handle = false, $filename; 00951 00955 function __construct( $file ) { 00956 $this->handle = fopen( $file, "wt" ); 00957 $this->filename = $file; 00958 } 00959 00963 function writeCloseStream( $string ) { 00964 parent::writeCloseStream( $string ); 00965 if ( $this->handle ) { 00966 fclose( $this->handle ); 00967 $this->handle = false; 00968 } 00969 } 00970 00974 function write( $string ) { 00975 fputs( $this->handle, $string ); 00976 } 00977 00981 function closeRenameAndReopen( $newname ) { 00982 $this->closeAndRename( $newname, true ); 00983 } 00984 00989 function renameOrException( $newname ) { 00990 if (! rename( $this->filename, $newname ) ) { 00991 throw new MWException( __METHOD__ . ": rename of file {$this->filename} to $newname failed\n" ); 00992 } 00993 } 00994 01000 function checkRenameArgCount( $newname ) { 01001 if ( is_array( $newname ) ) { 01002 if ( count( $newname ) > 1 ) { 01003 throw new MWException( __METHOD__ . ": passed multiple arguments for rename of single file\n" ); 01004 } else { 01005 $newname = $newname[0]; 01006 } 01007 } 01008 return $newname; 01009 } 01010 01015 function closeAndRename( $newname, $open = false ) { 01016 $newname = $this->checkRenameArgCount( $newname ); 01017 if ( $newname ) { 01018 if ( $this->handle ) { 01019 fclose( $this->handle ); 01020 $this->handle = false; 01021 } 01022 $this->renameOrException( $newname ); 01023 if ( $open ) { 01024 $this->handle = fopen( $this->filename, "wt" ); 01025 } 01026 } 01027 } 01028 01032 function getFilenames() { 01033 return $this->filename; 01034 } 01035 } 01036 01043 class DumpPipeOutput extends DumpFileOutput { 01044 protected $command, $filename; 01045 protected $procOpenResource = false; 01046 01051 function __construct( $command, $file = null ) { 01052 if ( !is_null( $file ) ) { 01053 $command .= " > " . wfEscapeShellArg( $file ); 01054 } 01055 01056 $this->startCommand( $command ); 01057 $this->command = $command; 01058 $this->filename = $file; 01059 } 01060 01064 function writeCloseStream( $string ) { 01065 parent::writeCloseStream( $string ); 01066 if ( $this->procOpenResource ) { 01067 proc_close( $this->procOpenResource ); 01068 $this->procOpenResource = false; 01069 } 01070 } 01071 01075 function startCommand( $command ) { 01076 $spec = array( 01077 0 => array( "pipe", "r" ), 01078 ); 01079 $pipes = array(); 01080 $this->procOpenResource = proc_open( $command, $spec, $pipes ); 01081 $this->handle = $pipes[0]; 01082 } 01083 01087 function closeRenameAndReopen( $newname ) { 01088 $this->closeAndRename( $newname, true ); 01089 } 01090 01095 function closeAndRename( $newname, $open = false ) { 01096 $newname = $this->checkRenameArgCount( $newname ); 01097 if ( $newname ) { 01098 if ( $this->handle ) { 01099 fclose( $this->handle ); 01100 $this->handle = false; 01101 } 01102 if ( $this->procOpenResource ) { 01103 proc_close( $this->procOpenResource ); 01104 $this->procOpenResource = false; 01105 } 01106 $this->renameOrException( $newname ); 01107 if ( $open ) { 01108 $command = $this->command; 01109 $command .= " > " . wfEscapeShellArg( $this->filename ); 01110 $this->startCommand( $command ); 01111 } 01112 } 01113 } 01114 01115 } 01116 01121 class DumpGZipOutput extends DumpPipeOutput { 01122 01126 function __construct( $file ) { 01127 parent::__construct( "gzip", $file ); 01128 } 01129 } 01130 01135 class DumpBZip2Output extends DumpPipeOutput { 01136 01140 function __construct( $file ) { 01141 parent::__construct( "bzip2", $file ); 01142 } 01143 } 01144 01149 class Dump7ZipOutput extends DumpPipeOutput { 01150 01154 function __construct( $file ) { 01155 $command = $this->setup7zCommand( $file ); 01156 parent::__construct( $command ); 01157 $this->filename = $file; 01158 } 01159 01164 function setup7zCommand( $file ) { 01165 $command = "7za a -bd -si " . wfEscapeShellArg( $file ); 01166 // Suppress annoying useless crap from p7zip 01167 // Unfortunately this could suppress real error messages too 01168 $command .= ' >' . wfGetNull() . ' 2>&1'; 01169 return( $command ); 01170 } 01171 01176 function closeAndRename( $newname, $open = false ) { 01177 $newname = $this->checkRenameArgCount( $newname ); 01178 if ( $newname ) { 01179 fclose( $this->handle ); 01180 proc_close( $this->procOpenResource ); 01181 $this->renameOrException( $newname ); 01182 if ( $open ) { 01183 $command = $this->setup7zCommand( $this->filename ); 01184 $this->startCommand( $command ); 01185 } 01186 } 01187 } 01188 } 01189 01196 class DumpFilter { 01197 01203 public $sink; 01204 01208 protected $sendingThisPage; 01209 01213 function __construct( &$sink ) { 01214 $this->sink =& $sink; 01215 } 01216 01220 function writeOpenStream( $string ) { 01221 $this->sink->writeOpenStream( $string ); 01222 } 01223 01227 function writeCloseStream( $string ) { 01228 $this->sink->writeCloseStream( $string ); 01229 } 01230 01235 function writeOpenPage( $page, $string ) { 01236 $this->sendingThisPage = $this->pass( $page, $string ); 01237 if ( $this->sendingThisPage ) { 01238 $this->sink->writeOpenPage( $page, $string ); 01239 } 01240 } 01241 01245 function writeClosePage( $string ) { 01246 if ( $this->sendingThisPage ) { 01247 $this->sink->writeClosePage( $string ); 01248 $this->sendingThisPage = false; 01249 } 01250 } 01251 01256 function writeRevision( $rev, $string ) { 01257 if ( $this->sendingThisPage ) { 01258 $this->sink->writeRevision( $rev, $string ); 01259 } 01260 } 01261 01266 function writeLogItem( $rev, $string ) { 01267 $this->sink->writeRevision( $rev, $string ); 01268 } 01269 01273 function closeRenameAndReopen( $newname ) { 01274 $this->sink->closeRenameAndReopen( $newname ); 01275 } 01276 01281 function closeAndRename( $newname, $open = false ) { 01282 $this->sink->closeAndRename( $newname, $open ); 01283 } 01284 01288 function getFilenames() { 01289 return $this->sink->getFilenames(); 01290 } 01291 01297 function pass( $page ) { 01298 return true; 01299 } 01300 } 01301 01306 class DumpNotalkFilter extends DumpFilter { 01307 01312 function pass( $page ) { 01313 return !MWNamespace::isTalk( $page->page_namespace ); 01314 } 01315 } 01316 01321 class DumpNamespaceFilter extends DumpFilter { 01322 var $invert = false; 01323 var $namespaces = array(); 01324 01329 function __construct( &$sink, $param ) { 01330 parent::__construct( $sink ); 01331 01332 $constants = array( 01333 "NS_MAIN" => NS_MAIN, 01334 "NS_TALK" => NS_TALK, 01335 "NS_USER" => NS_USER, 01336 "NS_USER_TALK" => NS_USER_TALK, 01337 "NS_PROJECT" => NS_PROJECT, 01338 "NS_PROJECT_TALK" => NS_PROJECT_TALK, 01339 "NS_FILE" => NS_FILE, 01340 "NS_FILE_TALK" => NS_FILE_TALK, 01341 "NS_IMAGE" => NS_IMAGE, // NS_IMAGE is an alias for NS_FILE 01342 "NS_IMAGE_TALK" => NS_IMAGE_TALK, 01343 "NS_MEDIAWIKI" => NS_MEDIAWIKI, 01344 "NS_MEDIAWIKI_TALK" => NS_MEDIAWIKI_TALK, 01345 "NS_TEMPLATE" => NS_TEMPLATE, 01346 "NS_TEMPLATE_TALK" => NS_TEMPLATE_TALK, 01347 "NS_HELP" => NS_HELP, 01348 "NS_HELP_TALK" => NS_HELP_TALK, 01349 "NS_CATEGORY" => NS_CATEGORY, 01350 "NS_CATEGORY_TALK" => NS_CATEGORY_TALK ); 01351 01352 if ( $param { 0 } == '!' ) { 01353 $this->invert = true; 01354 $param = substr( $param, 1 ); 01355 } 01356 01357 foreach ( explode( ',', $param ) as $key ) { 01358 $key = trim( $key ); 01359 if ( isset( $constants[$key] ) ) { 01360 $ns = $constants[$key]; 01361 $this->namespaces[$ns] = true; 01362 } elseif ( is_numeric( $key ) ) { 01363 $ns = intval( $key ); 01364 $this->namespaces[$ns] = true; 01365 } else { 01366 throw new MWException( "Unrecognized namespace key '$key'\n" ); 01367 } 01368 } 01369 } 01370 01375 function pass( $page ) { 01376 $match = isset( $this->namespaces[$page->page_namespace] ); 01377 return $this->invert xor $match; 01378 } 01379 } 01380 01381 01386 class DumpLatestFilter extends DumpFilter { 01387 var $page, $pageString, $rev, $revString; 01388 01393 function writeOpenPage( $page, $string ) { 01394 $this->page = $page; 01395 $this->pageString = $string; 01396 } 01397 01401 function writeClosePage( $string ) { 01402 if ( $this->rev ) { 01403 $this->sink->writeOpenPage( $this->page, $this->pageString ); 01404 $this->sink->writeRevision( $this->rev, $this->revString ); 01405 $this->sink->writeClosePage( $string ); 01406 } 01407 $this->rev = null; 01408 $this->revString = null; 01409 $this->page = null; 01410 $this->pageString = null; 01411 } 01412 01417 function writeRevision( $rev, $string ) { 01418 if ( $rev->rev_id == $this->page->page_latest ) { 01419 $this->rev = $rev; 01420 $this->revString = $string; 01421 } 01422 } 01423 } 01424 01429 class DumpMultiWriter { 01430 01434 function __construct( $sinks ) { 01435 $this->sinks = $sinks; 01436 $this->count = count( $sinks ); 01437 } 01438 01442 function writeOpenStream( $string ) { 01443 for ( $i = 0; $i < $this->count; $i++ ) { 01444 $this->sinks[$i]->writeOpenStream( $string ); 01445 } 01446 } 01447 01451 function writeCloseStream( $string ) { 01452 for ( $i = 0; $i < $this->count; $i++ ) { 01453 $this->sinks[$i]->writeCloseStream( $string ); 01454 } 01455 } 01456 01461 function writeOpenPage( $page, $string ) { 01462 for ( $i = 0; $i < $this->count; $i++ ) { 01463 $this->sinks[$i]->writeOpenPage( $page, $string ); 01464 } 01465 } 01466 01470 function writeClosePage( $string ) { 01471 for ( $i = 0; $i < $this->count; $i++ ) { 01472 $this->sinks[$i]->writeClosePage( $string ); 01473 } 01474 } 01475 01480 function writeRevision( $rev, $string ) { 01481 for ( $i = 0; $i < $this->count; $i++ ) { 01482 $this->sinks[$i]->writeRevision( $rev, $string ); 01483 } 01484 } 01485 01489 function closeRenameAndReopen( $newnames ) { 01490 $this->closeAndRename( $newnames, true ); 01491 } 01492 01497 function closeAndRename( $newnames, $open = false ) { 01498 for ( $i = 0; $i < $this->count; $i++ ) { 01499 $this->sinks[$i]->closeAndRename( $newnames[$i], $open ); 01500 } 01501 } 01502 01506 function getFilenames() { 01507 $filenames = array(); 01508 for ( $i = 0; $i < $this->count; $i++ ) { 01509 $filenames[] = $this->sinks[$i]->getFilenames(); 01510 } 01511 return $filenames; 01512 } 01513 01514 } 01515 01520 function xmlsafe( $string ) { 01521 wfProfileIn( __FUNCTION__ ); 01522 01528 $string = UtfNormal::cleanUp( $string ); 01529 01530 $string = htmlspecialchars( $string ); 01531 wfProfileOut( __FUNCTION__ ); 01532 return $string; 01533 }