MediaWiki  REL1_21
Export.php
Go to the documentation of this file.
00001 <?php
00033 class WikiExporter {
00034         var $list_authors = false; # Return distinct author list (when not returning full history)
00035         var $author_list = "";
00036 
00037         var $dumpUploads = false;
00038         var $dumpUploadFileContents = false;
00039 
00040         const FULL = 1;
00041         const CURRENT = 2;
00042         const STABLE = 4; // extension defined
00043         const LOGS = 8;
00044         const RANGE = 16;
00045 
00046         const BUFFER = 0;
00047         const STREAM = 1;
00048 
00049         const TEXT = 0;
00050         const STUB = 1;
00051 
00052         var $buffer;
00053 
00054         var $text;
00055 
00059         var $sink;
00060 
00065         public static function schemaVersion() {
00066                 return "0.8";
00067         }
00068 
00086         function __construct( $db, $history = WikiExporter::CURRENT,
00087                         $buffer = WikiExporter::BUFFER, $text = WikiExporter::TEXT ) {
00088                 $this->db = $db;
00089                 $this->history = $history;
00090                 $this->buffer = $buffer;
00091                 $this->writer = new XmlDumpWriter();
00092                 $this->sink = new DumpOutput();
00093                 $this->text = $text;
00094         }
00095 
00103         public function setOutputSink( &$sink ) {
00104                 $this->sink =& $sink;
00105         }
00106 
00107         public function openStream() {
00108                 $output = $this->writer->openStream();
00109                 $this->sink->writeOpenStream( $output );
00110         }
00111 
00112         public function closeStream() {
00113                 $output = $this->writer->closeStream();
00114                 $this->sink->writeCloseStream( $output );
00115         }
00116 
00122         public function allPages() {
00123                 $this->dumpFrom( '' );
00124         }
00125 
00133         public function pagesByRange( $start, $end ) {
00134                 $condition = 'page_id >= ' . intval( $start );
00135                 if ( $end ) {
00136                         $condition .= ' AND page_id < ' . intval( $end );
00137                 }
00138                 $this->dumpFrom( $condition );
00139         }
00140 
00148         public function revsByRange( $start, $end ) {
00149                 $condition = 'rev_id >= ' . intval( $start );
00150                 if ( $end ) {
00151                         $condition .= ' AND rev_id < ' . intval( $end );
00152                 }
00153                 $this->dumpFrom( $condition );
00154         }
00155 
00159         public function pageByTitle( $title ) {
00160                 $this->dumpFrom(
00161                         'page_namespace=' . $title->getNamespace() .
00162                         ' AND page_title=' . $this->db->addQuotes( $title->getDBkey() ) );
00163         }
00164 
00169         public function pageByName( $name ) {
00170                 $title = Title::newFromText( $name );
00171                 if ( is_null( $title ) ) {
00172                         throw new MWException( "Can't export invalid title" );
00173                 } else {
00174                         $this->pageByTitle( $title );
00175                 }
00176         }
00177 
00181         public function pagesByName( $names ) {
00182                 foreach ( $names as $name ) {
00183                         $this->pageByName( $name );
00184                 }
00185         }
00186 
00187         public function allLogs() {
00188                 $this->dumpFrom( '' );
00189         }
00190 
00195         public function logsByRange( $start, $end ) {
00196                 $condition = 'log_id >= ' . intval( $start );
00197                 if ( $end ) {
00198                         $condition .= ' AND log_id < ' . intval( $end );
00199                 }
00200                 $this->dumpFrom( $condition );
00201         }
00202 
00210         protected function do_list_authors( $cond ) {
00211                 wfProfileIn( __METHOD__ );
00212                 $this->author_list = "<contributors>";
00213                 // rev_deleted
00214 
00215                 $res = $this->db->select(
00216                         array( 'page', 'revision' ),
00217                         array( 'DISTINCT rev_user_text', 'rev_user' ),
00218                         array(
00219                                 $this->db->bitAnd( 'rev_deleted', Revision::DELETED_USER ) . ' = 0',
00220                                 $cond,
00221                                 'page_id = rev_id',
00222                         ),
00223                         __METHOD__
00224                 );
00225 
00226                 foreach ( $res as $row ) {
00227                         $this->author_list .= "<contributor>" .
00228                                 "<username>" .
00229                                 htmlentities( $row->rev_user_text ) .
00230                                 "</username>" .
00231                                 "<id>" .
00232                                 $row->rev_user .
00233                                 "</id>" .
00234                                 "</contributor>";
00235                 }
00236                 $this->author_list .= "</contributors>";
00237                 wfProfileOut( __METHOD__ );
00238         }
00239 
00245         protected function dumpFrom( $cond = '' ) {
00246                 wfProfileIn( __METHOD__ );
00247                 # For logging dumps...
00248                 if ( $this->history & self::LOGS ) {
00249                         $where = array( 'user_id = log_user' );
00250                         # Hide private logs
00251                         $hideLogs = LogEventsList::getExcludeClause( $this->db );
00252                         if ( $hideLogs ) $where[] = $hideLogs;
00253                         # Add on any caller specified conditions
00254                         if ( $cond ) $where[] = $cond;
00255                         # Get logging table name for logging.* clause
00256                         $logging = $this->db->tableName( 'logging' );
00257 
00258                         if ( $this->buffer == WikiExporter::STREAM ) {
00259                                 $prev = $this->db->bufferResults( false );
00260                         }
00261                         $wrapper = null; // Assuring $wrapper is not undefined, if exception occurs early
00262                         try {
00263                                 $result = $this->db->select( array( 'logging', 'user' ),
00264                                         array( "{$logging}.*", 'user_name' ), // grab the user name
00265                                         $where,
00266                                         __METHOD__,
00267                                         array( 'ORDER BY' => 'log_id', 'USE INDEX' => array( 'logging' => 'PRIMARY' ) )
00268                                 );
00269                                 $wrapper = $this->db->resultObject( $result );
00270                                 $this->outputLogStream( $wrapper );
00271                                 if ( $this->buffer == WikiExporter::STREAM ) {
00272                                         $this->db->bufferResults( $prev );
00273                                 }
00274                         } catch ( Exception $e ) {
00275                                 // Throwing the exception does not reliably free the resultset, and
00276                                 // would also leave the connection in unbuffered mode.
00277 
00278                                 // Freeing result
00279                                 try {
00280                                         if ( $wrapper ) {
00281                                                 $wrapper->free();
00282                                         }
00283                                 } catch ( Exception $e2 ) {
00284                                         // Already in panic mode -> ignoring $e2 as $e has
00285                                         // higher priority
00286                                 }
00287 
00288                                 // Putting database back in previous buffer mode
00289                                 try {
00290                                         if ( $this->buffer == WikiExporter::STREAM ) {
00291                                                 $this->db->bufferResults( $prev );
00292                                         }
00293                                 } catch ( Exception $e2 ) {
00294                                         // Already in panic mode -> ignoring $e2 as $e has
00295                                         // higher priority
00296                                 }
00297 
00298                                 // Inform caller about problem
00299                                 throw $e;
00300                         }
00301                 # For page dumps...
00302                 } else {
00303                         $tables = array( 'page', 'revision' );
00304                         $opts = array( 'ORDER BY' => 'page_id ASC' );
00305                         $opts['USE INDEX'] = array();
00306                         $join = array();
00307                         if ( is_array( $this->history ) ) {
00308                                 # Time offset/limit for all pages/history...
00309                                 $revJoin = 'page_id=rev_page';
00310                                 # Set time order
00311                                 if ( $this->history['dir'] == 'asc' ) {
00312                                         $op = '>';
00313                                         $opts['ORDER BY'] = 'rev_timestamp ASC';
00314                                 } else {
00315                                         $op = '<';
00316                                         $opts['ORDER BY'] = 'rev_timestamp DESC';
00317                                 }
00318                                 # Set offset
00319                                 if ( !empty( $this->history['offset'] ) ) {
00320                                         $revJoin .= " AND rev_timestamp $op " .
00321                                                 $this->db->addQuotes( $this->db->timestamp( $this->history['offset'] ) );
00322                                 }
00323                                 $join['revision'] = array( 'INNER JOIN', $revJoin );
00324                                 # Set query limit
00325                                 if ( !empty( $this->history['limit'] ) ) {
00326                                         $opts['LIMIT'] = intval( $this->history['limit'] );
00327                                 }
00328                         } elseif ( $this->history & WikiExporter::FULL ) {
00329                                 # Full history dumps...
00330                                 $join['revision'] = array( 'INNER JOIN', 'page_id=rev_page' );
00331                         } elseif ( $this->history & WikiExporter::CURRENT ) {
00332                                 # Latest revision dumps...
00333                                 if ( $this->list_authors && $cond != '' ) { // List authors, if so desired
00334                                         $this->do_list_authors( $cond );
00335                                 }
00336                                 $join['revision'] = array( 'INNER JOIN', 'page_id=rev_page AND page_latest=rev_id' );
00337                         } elseif ( $this->history & WikiExporter::STABLE ) {
00338                                 # "Stable" revision dumps...
00339                                 # Default JOIN, to be overridden...
00340                                 $join['revision'] = array( 'INNER JOIN', 'page_id=rev_page AND page_latest=rev_id' );
00341                                 # One, and only one hook should set this, and return false
00342                                 if ( wfRunHooks( 'WikiExporter::dumpStableQuery', array( &$tables, &$opts, &$join ) ) ) {
00343                                         wfProfileOut( __METHOD__ );
00344                                         throw new MWException( __METHOD__ . " given invalid history dump type." );
00345                                 }
00346                         } elseif ( $this->history & WikiExporter::RANGE ) {
00347                                 # Dump of revisions within a specified range
00348                                 $join['revision'] = array( 'INNER JOIN', 'page_id=rev_page' );
00349                                 $opts['ORDER BY'] = array( 'rev_page ASC', 'rev_id ASC' );
00350                         } else {
00351                                 # Unknown history specification parameter?
00352                                 wfProfileOut( __METHOD__ );
00353                                 throw new MWException( __METHOD__ . " given invalid history dump type." );
00354                         }
00355                         # Query optimization hacks
00356                         if ( $cond == '' ) {
00357                                 $opts[] = 'STRAIGHT_JOIN';
00358                                 $opts['USE INDEX']['page'] = 'PRIMARY';
00359                         }
00360                         # Build text join options
00361                         if ( $this->text != WikiExporter::STUB ) { // 1-pass
00362                                 $tables[] = 'text';
00363                                 $join['text'] = array( 'INNER JOIN', 'rev_text_id=old_id' );
00364                         }
00365 
00366                         if ( $this->buffer == WikiExporter::STREAM ) {
00367                                 $prev = $this->db->bufferResults( false );
00368                         }
00369 
00370                         $wrapper = null; // Assuring $wrapper is not undefined, if exception occurs early
00371                         try {
00372                                 wfRunHooks( 'ModifyExportQuery',
00373                                                 array( $this->db, &$tables, &$cond, &$opts, &$join ) );
00374 
00375                                 # Do the query!
00376                                 $result = $this->db->select( $tables, '*', $cond, __METHOD__, $opts, $join );
00377                                 $wrapper = $this->db->resultObject( $result );
00378                                 # Output dump results
00379                                 $this->outputPageStream( $wrapper );
00380 
00381                                 if ( $this->buffer == WikiExporter::STREAM ) {
00382                                         $this->db->bufferResults( $prev );
00383                                 }
00384                         } catch ( Exception $e ) {
00385                                 // Throwing the exception does not reliably free the resultset, and
00386                                 // would also leave the connection in unbuffered mode.
00387 
00388                                 // Freeing result
00389                                 try {
00390                                         if ( $wrapper ) {
00391                                                 $wrapper->free();
00392                                         }
00393                                 } catch ( Exception $e2 ) {
00394                                         // Already in panic mode -> ignoring $e2 as $e has
00395                                         // higher priority
00396                                 }
00397 
00398                                 // Putting database back in previous buffer mode
00399                                 try {
00400                                         if ( $this->buffer == WikiExporter::STREAM ) {
00401                                                 $this->db->bufferResults( $prev );
00402                                         }
00403                                 } catch ( Exception $e2 ) {
00404                                         // Already in panic mode -> ignoring $e2 as $e has
00405                                         // higher priority
00406                                 }
00407 
00408                                 // Inform caller about problem
00409                                 throw $e;
00410                         }
00411                 }
00412                 wfProfileOut( __METHOD__ );
00413         }
00414 
00427         protected function outputPageStream( $resultset ) {
00428                 $last = null;
00429                 foreach ( $resultset as $row ) {
00430                         if ( $last === null ||
00431                                 $last->page_namespace != $row->page_namespace ||
00432                                 $last->page_title != $row->page_title ) {
00433                                 if ( $last !== null ) {
00434                                         $output = '';
00435                                         if ( $this->dumpUploads ) {
00436                                                 $output .= $this->writer->writeUploads( $last, $this->dumpUploadFileContents );
00437                                         }
00438                                         $output .= $this->writer->closePage();
00439                                         $this->sink->writeClosePage( $output );
00440                                 }
00441                                 $output = $this->writer->openPage( $row );
00442                                 $this->sink->writeOpenPage( $row, $output );
00443                                 $last = $row;
00444                         }
00445                         $output = $this->writer->writeRevision( $row );
00446                         $this->sink->writeRevision( $row, $output );
00447                 }
00448                 if ( $last !== null ) {
00449                         $output = '';
00450                         if ( $this->dumpUploads ) {
00451                                 $output .= $this->writer->writeUploads( $last, $this->dumpUploadFileContents );
00452                         }
00453                         $output .= $this->author_list;
00454                         $output .= $this->writer->closePage();
00455                         $this->sink->writeClosePage( $output );
00456                 }
00457         }
00458 
00462         protected function outputLogStream( $resultset ) {
00463                 foreach ( $resultset as $row ) {
00464                         $output = $this->writer->writeLogItem( $row );
00465                         $this->sink->writeLogItem( $row, $output );
00466                 }
00467         }
00468 }
00469 
00473 class XmlDumpWriter {
00479         function schemaVersion() {
00480                 wfDeprecated( __METHOD__, '1.20' );
00481                 return WikiExporter::schemaVersion();
00482         }
00483 
00494         function openStream() {
00495                 global $wgLanguageCode;
00496                 $ver = WikiExporter::schemaVersion();
00497                 return Xml::element( 'mediawiki', array(
00498                         'xmlns'              => "http://www.mediawiki.org/xml/export-$ver/",
00499                         'xmlns:xsi'          => "http://www.w3.org/2001/XMLSchema-instance",
00500                         'xsi:schemaLocation' => "http://www.mediawiki.org/xml/export-$ver/ " .
00501                                                 "http://www.mediawiki.org/xml/export-$ver.xsd", #TODO: how do we get a new version up there?
00502                         'version'            => $ver,
00503                         'xml:lang'           => $wgLanguageCode ),
00504                         null ) .
00505                         "\n" .
00506                         $this->siteInfo();
00507         }
00508 
00512         function siteInfo() {
00513                 $info = array(
00514                         $this->sitename(),
00515                         $this->homelink(),
00516                         $this->generator(),
00517                         $this->caseSetting(),
00518                         $this->namespaces() );
00519                 return "  <siteinfo>\n    " .
00520                         implode( "\n    ", $info ) .
00521                         "\n  </siteinfo>\n";
00522         }
00523 
00527         function sitename() {
00528                 global $wgSitename;
00529                 return Xml::element( 'sitename', array(), $wgSitename );
00530         }
00531 
00535         function generator() {
00536                 global $wgVersion;
00537                 return Xml::element( 'generator', array(), "MediaWiki $wgVersion" );
00538         }
00539 
00543         function homelink() {
00544                 return Xml::element( 'base', array(), Title::newMainPage()->getCanonicalUrl() );
00545         }
00546 
00550         function caseSetting() {
00551                 global $wgCapitalLinks;
00552                 // "case-insensitive" option is reserved for future
00553                 $sensitivity = $wgCapitalLinks ? 'first-letter' : 'case-sensitive';
00554                 return Xml::element( 'case', array(), $sensitivity );
00555         }
00556 
00560         function namespaces() {
00561                 global $wgContLang;
00562                 $spaces = "<namespaces>\n";
00563                 foreach ( $wgContLang->getFormattedNamespaces() as $ns => $title ) {
00564                         $spaces .= '      ' .
00565                                 Xml::element( 'namespace',
00566                                         array(  'key' => $ns,
00567                                                         'case' => MWNamespace::isCapitalized( $ns ) ? 'first-letter' : 'case-sensitive',
00568                                         ), $title ) . "\n";
00569                 }
00570                 $spaces .= "    </namespaces>";
00571                 return $spaces;
00572         }
00573 
00580         function closeStream() {
00581                 return "</mediawiki>\n";
00582         }
00583 
00592         function openPage( $row ) {
00593                 $out = "  <page>\n";
00594                 $title = Title::makeTitle( $row->page_namespace, $row->page_title );
00595                 $out .= '    ' . Xml::elementClean( 'title', array(), self::canonicalTitle( $title ) ) . "\n";
00596                 $out .= '    ' . Xml::element( 'ns', array(), strval( $row->page_namespace) ) . "\n";
00597                 $out .= '    ' . Xml::element( 'id', array(), strval( $row->page_id ) ) . "\n";
00598                 if ( $row->page_is_redirect ) {
00599                         $page = WikiPage::factory( $title );
00600                         $redirect = $page->getRedirectTarget();
00601                         if ( $redirect instanceOf Title && $redirect->isValidRedirectTarget() ) {
00602                                 $out .= '    ' . Xml::element( 'redirect', array( 'title' => self::canonicalTitle( $redirect ) ) ) . "\n";
00603                         }
00604                 }
00605 
00606                 if ( $row->page_restrictions != '' ) {
00607                         $out .= '    ' . Xml::element( 'restrictions', array(),
00608                                 strval( $row->page_restrictions ) ) . "\n";
00609                 }
00610 
00611                 wfRunHooks( 'XmlDumpWriterOpenPage', array( $this, &$out, $row, $title ) );
00612 
00613                 return $out;
00614         }
00615 
00622         function closePage() {
00623                 return "  </page>\n";
00624         }
00625 
00634         function writeRevision( $row ) {
00635                 wfProfileIn( __METHOD__ );
00636 
00637                 $out = "    <revision>\n";
00638                 $out .= "      " . Xml::element( 'id', null, strval( $row->rev_id ) ) . "\n";
00639                 if( isset( $row->rev_parent_id ) && $row->rev_parent_id ) {
00640                         $out .= "      " . Xml::element( 'parentid', null, strval( $row->rev_parent_id ) ) . "\n";
00641                 }
00642 
00643                 $out .= $this->writeTimestamp( $row->rev_timestamp );
00644 
00645                 if ( isset( $row->rev_deleted ) && ( $row->rev_deleted & Revision::DELETED_USER ) ) {
00646                         $out .= "      " . Xml::element( 'contributor', array( 'deleted' => 'deleted' ) ) . "\n";
00647                 } else {
00648                         $out .= $this->writeContributor( $row->rev_user, $row->rev_user_text );
00649                 }
00650 
00651                 if ( isset( $row->rev_minor_edit ) && $row->rev_minor_edit ) {
00652                         $out .= "      <minor/>\n";
00653                 }
00654                 if ( isset( $row->rev_deleted ) && ( $row->rev_deleted & Revision::DELETED_COMMENT ) ) {
00655                         $out .= "      " . Xml::element( 'comment', array( 'deleted' => 'deleted' ) ) . "\n";
00656                 } elseif ( $row->rev_comment != '' ) {
00657                         $out .= "      " . Xml::elementClean( 'comment', array(), strval( $row->rev_comment ) ) . "\n";
00658                 }
00659 
00660                 $text = '';
00661                 if ( isset( $row->rev_deleted ) && ( $row->rev_deleted & Revision::DELETED_TEXT ) ) {
00662                         $out .= "      " . Xml::element( 'text', array( 'deleted' => 'deleted' ) ) . "\n";
00663                 } elseif ( isset( $row->old_text ) ) {
00664                         // Raw text from the database may have invalid chars
00665                         $text = strval( Revision::getRevisionText( $row ) );
00666                         $out .= "      " . Xml::elementClean( 'text',
00667                                 array( 'xml:space' => 'preserve', 'bytes' => intval( $row->rev_len ) ),
00668                                 strval( $text ) ) . "\n";
00669                 } else {
00670                         // Stub output
00671                         $out .= "      " . Xml::element( 'text',
00672                                 array( 'id' => $row->rev_text_id, 'bytes' => intval( $row->rev_len ) ),
00673                                 "" ) . "\n";
00674                 }
00675 
00676                 if ( isset( $row->rev_sha1 ) && $row->rev_sha1 && !( $row->rev_deleted & Revision::DELETED_TEXT ) ) {
00677                         $out .= "      " . Xml::element( 'sha1', null, strval( $row->rev_sha1 ) ) . "\n";
00678                 } else {
00679                         $out .= "      <sha1/>\n";
00680                 }
00681 
00682                 if ( isset( $row->rev_content_model ) && !is_null( $row->rev_content_model ) ) {
00683                         $content_model = strval( $row->rev_content_model );
00684                 } else {
00685                         // probably using $wgContentHandlerUseDB = false;
00686                         // @todo: test!
00687                         $title = Title::makeTitle( $row->page_namespace, $row->page_title );
00688                         $content_model = ContentHandler::getDefaultModelFor( $title );
00689                 }
00690 
00691                 $out .= "      " . Xml::element( 'model', null, strval( $content_model ) ) . "\n";
00692 
00693                 if ( isset( $row->rev_content_format ) && !is_null( $row->rev_content_format ) ) {
00694                         $content_format = strval( $row->rev_content_format );
00695                 } else {
00696                         // probably using $wgContentHandlerUseDB = false;
00697                         // @todo: test!
00698                         $content_handler = ContentHandler::getForModelID( $content_model );
00699                         $content_format = $content_handler->getDefaultFormat();
00700                 }
00701 
00702                 $out .= "      " . Xml::element( 'format', null, strval( $content_format ) ) . "\n";
00703 
00704                 wfRunHooks( 'XmlDumpWriterWriteRevision', array( &$this, &$out, $row, $text ) );
00705 
00706                 $out .= "    </revision>\n";
00707 
00708                 wfProfileOut( __METHOD__ );
00709                 return $out;
00710         }
00711 
00720         function writeLogItem( $row ) {
00721                 wfProfileIn( __METHOD__ );
00722 
00723                 $out = "  <logitem>\n";
00724                 $out .= "    " . Xml::element( 'id', null, strval( $row->log_id ) ) . "\n";
00725 
00726                 $out .= $this->writeTimestamp( $row->log_timestamp, "    " );
00727 
00728                 if ( $row->log_deleted & LogPage::DELETED_USER ) {
00729                         $out .= "    " . Xml::element( 'contributor', array( 'deleted' => 'deleted' ) ) . "\n";
00730                 } else {
00731                         $out .= $this->writeContributor( $row->log_user, $row->user_name, "    " );
00732                 }
00733 
00734                 if ( $row->log_deleted & LogPage::DELETED_COMMENT ) {
00735                         $out .= "    " . Xml::element( 'comment', array( 'deleted' => 'deleted' ) ) . "\n";
00736                 } elseif ( $row->log_comment != '' ) {
00737                         $out .= "    " . Xml::elementClean( 'comment', null, strval( $row->log_comment ) ) . "\n";
00738                 }
00739 
00740                 $out .= "    " . Xml::element( 'type', null, strval( $row->log_type ) ) . "\n";
00741                 $out .= "    " . Xml::element( 'action', null, strval( $row->log_action ) ) . "\n";
00742 
00743                 if ( $row->log_deleted & LogPage::DELETED_ACTION ) {
00744                         $out .= "    " . Xml::element( 'text', array( 'deleted' => 'deleted' ) ) . "\n";
00745                 } else {
00746                         $title = Title::makeTitle( $row->log_namespace, $row->log_title );
00747                         $out .= "    " . Xml::elementClean( 'logtitle', null, self::canonicalTitle( $title ) ) . "\n";
00748                         $out .= "    " . Xml::elementClean( 'params',
00749                                 array( 'xml:space' => 'preserve' ),
00750                                 strval( $row->log_params ) ) . "\n";
00751                 }
00752 
00753                 $out .= "  </logitem>\n";
00754 
00755                 wfProfileOut( __METHOD__ );
00756                 return $out;
00757         }
00758 
00764         function writeTimestamp( $timestamp, $indent = "      " ) {
00765                 $ts = wfTimestamp( TS_ISO_8601, $timestamp );
00766                 return $indent . Xml::element( 'timestamp', null, $ts ) . "\n";
00767         }
00768 
00775         function writeContributor( $id, $text, $indent = "      " ) {
00776                 $out = $indent . "<contributor>\n";
00777                 if ( $id || !IP::isValid( $text ) ) {
00778                         $out .= $indent . "  " . Xml::elementClean( 'username', null, strval( $text ) ) . "\n";
00779                         $out .= $indent . "  " . Xml::element( 'id', null, strval( $id ) ) . "\n";
00780                 } else {
00781                         $out .= $indent . "  " . Xml::elementClean( 'ip', null, strval( $text ) ) . "\n";
00782                 }
00783                 $out .= $indent . "</contributor>\n";
00784                 return $out;
00785         }
00786 
00793         function writeUploads( $row, $dumpContents = false ) {
00794                 if ( $row->page_namespace == NS_FILE ) {
00795                         $img = wfLocalFile( $row->page_title );
00796                         if ( $img && $img->exists() ) {
00797                                 $out = '';
00798                                 foreach ( array_reverse( $img->getHistory() ) as $ver ) {
00799                                         $out .= $this->writeUpload( $ver, $dumpContents );
00800                                 }
00801                                 $out .= $this->writeUpload( $img, $dumpContents );
00802                                 return $out;
00803                         }
00804                 }
00805                 return '';
00806         }
00807 
00813         function writeUpload( $file, $dumpContents = false ) {
00814                 if ( $file->isOld() ) {
00815                         $archiveName = "      " .
00816                                 Xml::element( 'archivename', null, $file->getArchiveName() ) . "\n";
00817                 } else {
00818                         $archiveName = '';
00819                 }
00820                 if ( $dumpContents ) {
00821                         # Dump file as base64
00822                         # Uses only XML-safe characters, so does not need escaping
00823                         $contents = '      <contents encoding="base64">' .
00824                                 chunk_split( base64_encode( file_get_contents( $file->getPath() ) ) ) .
00825                                 "      </contents>\n";
00826                 } else {
00827                         $contents = '';
00828                 }
00829                 if ( $file->isDeleted( File::DELETED_COMMENT ) ) {
00830                         $comment = Xml::element( 'comment', array( 'deleted' => 'deleted' ) );
00831                 } else {
00832                         $comment = Xml::elementClean( 'comment', null, $file->getDescription() );
00833                 }
00834                 return "    <upload>\n" .
00835                         $this->writeTimestamp( $file->getTimestamp() ) .
00836                         $this->writeContributor( $file->getUser( 'id' ), $file->getUser( 'text' ) ) .
00837                         "      " . $comment . "\n" .
00838                         "      " . Xml::element( 'filename', null, $file->getName() ) . "\n" .
00839                         $archiveName .
00840                         "      " . Xml::element( 'src', null, $file->getCanonicalUrl() ) . "\n" .
00841                         "      " . Xml::element( 'size', null, $file->getSize() ) . "\n" .
00842                         "      " . Xml::element( 'sha1base36', null, $file->getSha1() ) . "\n" .
00843                         "      " . Xml::element( 'rel', null, $file->getRel() ) . "\n" .
00844                         $contents .
00845                         "    </upload>\n";
00846         }
00847 
00858         public static function canonicalTitle( Title $title ) {
00859                 if ( $title->getInterwiki() ) {
00860                         return $title->getPrefixedText();
00861                 }
00862 
00863                 global $wgContLang;
00864                 $prefix = str_replace( '_', ' ', $wgContLang->getNsText( $title->getNamespace() ) );
00865 
00866                 if ( $prefix !== '' ) {
00867                         $prefix .= ':';
00868                 }
00869 
00870                 return $prefix . $title->getText();
00871         }
00872 }
00873 
00878 class DumpOutput {
00879 
00883         function writeOpenStream( $string ) {
00884                 $this->write( $string );
00885         }
00886 
00890         function writeCloseStream( $string ) {
00891                 $this->write( $string );
00892         }
00893 
00898         function writeOpenPage( $page, $string ) {
00899                 $this->write( $string );
00900         }
00901 
00905         function writeClosePage( $string ) {
00906                 $this->write( $string );
00907         }
00908 
00913         function writeRevision( $rev, $string ) {
00914                 $this->write( $string );
00915         }
00916 
00921         function writeLogItem( $rev, $string ) {
00922                 $this->write( $string );
00923         }
00924 
00930         function write( $string ) {
00931                 print $string;
00932         }
00933 
00941         function closeRenameAndReopen( $newname ) {
00942         }
00943 
00951         function closeAndRename( $newname, $open = false ) {
00952         }
00953 
00959         function getFilenames() {
00960                 return null;
00961         }
00962 }
00963 
00968 class DumpFileOutput extends DumpOutput {
00969         protected $handle = false, $filename;
00970 
00974         function __construct( $file ) {
00975                 $this->handle = fopen( $file, "wt" );
00976                 $this->filename = $file;
00977         }
00978 
00982         function writeCloseStream( $string ) {
00983                 parent::writeCloseStream( $string );
00984                 if ( $this->handle ) {
00985                         fclose( $this->handle );
00986                         $this->handle = false;
00987                 }
00988         }
00989 
00993         function write( $string ) {
00994                 fputs( $this->handle, $string );
00995         }
00996 
01000         function closeRenameAndReopen( $newname ) {
01001                 $this->closeAndRename( $newname, true );
01002         }
01003 
01008         function renameOrException( $newname ) {
01009                         if ( !rename( $this->filename, $newname ) ) {
01010                                 throw new MWException( __METHOD__ . ": rename of file {$this->filename} to $newname failed\n" );
01011                         }
01012         }
01013 
01019         function checkRenameArgCount( $newname ) {
01020                 if ( is_array( $newname ) ) {
01021                         if ( count( $newname ) > 1 ) {
01022                                 throw new MWException( __METHOD__ . ": passed multiple arguments for rename of single file\n" );
01023                         } else {
01024                                 $newname = $newname[0];
01025                         }
01026                 }
01027                 return $newname;
01028         }
01029 
01034         function closeAndRename( $newname, $open = false ) {
01035                 $newname = $this->checkRenameArgCount( $newname );
01036                 if ( $newname ) {
01037                         if ( $this->handle ) {
01038                                 fclose( $this->handle );
01039                                 $this->handle = false;
01040                         }
01041                         $this->renameOrException( $newname );
01042                         if ( $open ) {
01043                                 $this->handle = fopen( $this->filename, "wt" );
01044                         }
01045                 }
01046         }
01047 
01051         function getFilenames() {
01052                 return $this->filename;
01053         }
01054 }
01055 
01062 class DumpPipeOutput extends DumpFileOutput {
01063         protected $command, $filename;
01064         protected $procOpenResource = false;
01065 
01070         function __construct( $command, $file = null ) {
01071                 if ( !is_null( $file ) ) {
01072                         $command .= " > " . wfEscapeShellArg( $file );
01073                 }
01074 
01075                 $this->startCommand( $command );
01076                 $this->command = $command;
01077                 $this->filename = $file;
01078         }
01079 
01083         function writeCloseStream( $string ) {
01084                 parent::writeCloseStream( $string );
01085                 if ( $this->procOpenResource ) {
01086                         proc_close( $this->procOpenResource );
01087                         $this->procOpenResource = false;
01088                 }
01089         }
01090 
01094         function startCommand( $command ) {
01095                 $spec = array(
01096                         0 => array( "pipe", "r" ),
01097                 );
01098                 $pipes = array();
01099                 $this->procOpenResource = proc_open( $command, $spec, $pipes );
01100                 $this->handle = $pipes[0];
01101         }
01102 
01106         function closeRenameAndReopen( $newname ) {
01107                 $this->closeAndRename( $newname, true );
01108         }
01109 
01114         function closeAndRename( $newname, $open = false ) {
01115                 $newname = $this->checkRenameArgCount( $newname );
01116                 if ( $newname ) {
01117                         if ( $this->handle ) {
01118                                 fclose( $this->handle );
01119                                 $this->handle = false;
01120                         }
01121                         if ( $this->procOpenResource ) {
01122                                 proc_close( $this->procOpenResource );
01123                                 $this->procOpenResource = false;
01124                         }
01125                         $this->renameOrException( $newname );
01126                         if ( $open ) {
01127                                 $command = $this->command;
01128                                 $command .= " > " . wfEscapeShellArg( $this->filename );
01129                                 $this->startCommand( $command );
01130                         }
01131                 }
01132         }
01133 
01134 }
01135 
01140 class DumpGZipOutput extends DumpPipeOutput {
01141 
01145         function __construct( $file ) {
01146                 parent::__construct( "gzip", $file );
01147         }
01148 }
01149 
01154 class DumpBZip2Output extends DumpPipeOutput {
01155 
01159         function __construct( $file ) {
01160                 parent::__construct( "bzip2", $file );
01161         }
01162 }
01163 
01168 class Dump7ZipOutput extends DumpPipeOutput {
01169 
01173         function __construct( $file ) {
01174                 $command = $this->setup7zCommand( $file );
01175                 parent::__construct( $command );
01176                 $this->filename = $file;
01177         }
01178 
01183         function setup7zCommand( $file ) {
01184                 $command = "7za a -bd -si " . wfEscapeShellArg( $file );
01185                 // Suppress annoying useless crap from p7zip
01186                 // Unfortunately this could suppress real error messages too
01187                 $command .= ' >' . wfGetNull() . ' 2>&1';
01188                 return( $command );
01189         }
01190 
01195         function closeAndRename( $newname, $open = false ) {
01196                 $newname = $this->checkRenameArgCount( $newname );
01197                 if ( $newname ) {
01198                         fclose( $this->handle );
01199                         proc_close( $this->procOpenResource );
01200                         $this->renameOrException( $newname );
01201                         if ( $open ) {
01202                                 $command = $this->setup7zCommand( $this->filename );
01203                                 $this->startCommand( $command );
01204                         }
01205                 }
01206         }
01207 }
01208 
01215 class DumpFilter {
01216 
01222         public $sink;
01223 
01227         protected $sendingThisPage;
01228 
01232         function __construct( &$sink ) {
01233                 $this->sink =& $sink;
01234         }
01235 
01239         function writeOpenStream( $string ) {
01240                 $this->sink->writeOpenStream( $string );
01241         }
01242 
01246         function writeCloseStream( $string ) {
01247                 $this->sink->writeCloseStream( $string );
01248         }
01249 
01254         function writeOpenPage( $page, $string ) {
01255                 $this->sendingThisPage = $this->pass( $page, $string );
01256                 if ( $this->sendingThisPage ) {
01257                         $this->sink->writeOpenPage( $page, $string );
01258                 }
01259         }
01260 
01264         function writeClosePage( $string ) {
01265                 if ( $this->sendingThisPage ) {
01266                         $this->sink->writeClosePage( $string );
01267                         $this->sendingThisPage = false;
01268                 }
01269         }
01270 
01275         function writeRevision( $rev, $string ) {
01276                 if ( $this->sendingThisPage ) {
01277                         $this->sink->writeRevision( $rev, $string );
01278                 }
01279         }
01280 
01285         function writeLogItem( $rev, $string ) {
01286                 $this->sink->writeRevision( $rev, $string );
01287         }
01288 
01292         function closeRenameAndReopen( $newname ) {
01293                 $this->sink->closeRenameAndReopen( $newname );
01294         }
01295 
01300         function closeAndRename( $newname, $open = false ) {
01301                 $this->sink->closeAndRename( $newname, $open );
01302         }
01303 
01307         function getFilenames() {
01308                 return $this->sink->getFilenames();
01309         }
01310 
01316         function pass( $page ) {
01317                 return true;
01318         }
01319 }
01320 
01325 class DumpNotalkFilter extends DumpFilter {
01326 
01331         function pass( $page ) {
01332                 return !MWNamespace::isTalk( $page->page_namespace );
01333         }
01334 }
01335 
01340 class DumpNamespaceFilter extends DumpFilter {
01341         var $invert = false;
01342         var $namespaces = array();
01343 
01349         function __construct( &$sink, $param ) {
01350                 parent::__construct( $sink );
01351 
01352                 $constants = array(
01353                         "NS_MAIN"           => NS_MAIN,
01354                         "NS_TALK"           => NS_TALK,
01355                         "NS_USER"           => NS_USER,
01356                         "NS_USER_TALK"      => NS_USER_TALK,
01357                         "NS_PROJECT"        => NS_PROJECT,
01358                         "NS_PROJECT_TALK"   => NS_PROJECT_TALK,
01359                         "NS_FILE"           => NS_FILE,
01360                         "NS_FILE_TALK"      => NS_FILE_TALK,
01361                         "NS_IMAGE"          => NS_IMAGE, // NS_IMAGE is an alias for NS_FILE
01362                         "NS_IMAGE_TALK"     => NS_IMAGE_TALK,
01363                         "NS_MEDIAWIKI"      => NS_MEDIAWIKI,
01364                         "NS_MEDIAWIKI_TALK" => NS_MEDIAWIKI_TALK,
01365                         "NS_TEMPLATE"       => NS_TEMPLATE,
01366                         "NS_TEMPLATE_TALK"  => NS_TEMPLATE_TALK,
01367                         "NS_HELP"           => NS_HELP,
01368                         "NS_HELP_TALK"      => NS_HELP_TALK,
01369                         "NS_CATEGORY"       => NS_CATEGORY,
01370                         "NS_CATEGORY_TALK"  => NS_CATEGORY_TALK );
01371 
01372                 if ( $param { 0 } == '!' ) {
01373                         $this->invert = true;
01374                         $param = substr( $param, 1 );
01375                 }
01376 
01377                 foreach ( explode( ',', $param ) as $key ) {
01378                         $key = trim( $key );
01379                         if ( isset( $constants[$key] ) ) {
01380                                 $ns = $constants[$key];
01381                                 $this->namespaces[$ns] = true;
01382                         } elseif ( is_numeric( $key ) ) {
01383                                 $ns = intval( $key );
01384                                 $this->namespaces[$ns] = true;
01385                         } else {
01386                                 throw new MWException( "Unrecognized namespace key '$key'\n" );
01387                         }
01388                 }
01389         }
01390 
01395         function pass( $page ) {
01396                 $match = isset( $this->namespaces[$page->page_namespace] );
01397                 return $this->invert xor $match;
01398         }
01399 }
01400 
01405 class DumpLatestFilter extends DumpFilter {
01406         var $page, $pageString, $rev, $revString;
01407 
01412         function writeOpenPage( $page, $string ) {
01413                 $this->page = $page;
01414                 $this->pageString = $string;
01415         }
01416 
01420         function writeClosePage( $string ) {
01421                 if ( $this->rev ) {
01422                         $this->sink->writeOpenPage( $this->page, $this->pageString );
01423                         $this->sink->writeRevision( $this->rev, $this->revString );
01424                         $this->sink->writeClosePage( $string );
01425                 }
01426                 $this->rev = null;
01427                 $this->revString = null;
01428                 $this->page = null;
01429                 $this->pageString = null;
01430         }
01431 
01436         function writeRevision( $rev, $string ) {
01437                 if ( $rev->rev_id == $this->page->page_latest ) {
01438                         $this->rev = $rev;
01439                         $this->revString = $string;
01440                 }
01441         }
01442 }
01443 
01448 class DumpMultiWriter {
01449 
01453         function __construct( $sinks ) {
01454                 $this->sinks = $sinks;
01455                 $this->count = count( $sinks );
01456         }
01457 
01461         function writeOpenStream( $string ) {
01462                 for ( $i = 0; $i < $this->count; $i++ ) {
01463                         $this->sinks[$i]->writeOpenStream( $string );
01464                 }
01465         }
01466 
01470         function writeCloseStream( $string ) {
01471                 for ( $i = 0; $i < $this->count; $i++ ) {
01472                         $this->sinks[$i]->writeCloseStream( $string );
01473                 }
01474         }
01475 
01480         function writeOpenPage( $page, $string ) {
01481                 for ( $i = 0; $i < $this->count; $i++ ) {
01482                         $this->sinks[$i]->writeOpenPage( $page, $string );
01483                 }
01484         }
01485 
01489         function writeClosePage( $string ) {
01490                 for ( $i = 0; $i < $this->count; $i++ ) {
01491                         $this->sinks[$i]->writeClosePage( $string );
01492                 }
01493         }
01494 
01499         function writeRevision( $rev, $string ) {
01500                 for ( $i = 0; $i < $this->count; $i++ ) {
01501                         $this->sinks[$i]->writeRevision( $rev, $string );
01502                 }
01503         }
01504 
01508         function closeRenameAndReopen( $newnames ) {
01509                 $this->closeAndRename( $newnames, true );
01510         }
01511 
01516         function closeAndRename( $newnames, $open = false ) {
01517                 for ( $i = 0; $i < $this->count; $i++ ) {
01518                         $this->sinks[$i]->closeAndRename( $newnames[$i], $open );
01519                 }
01520         }
01521 
01525         function getFilenames() {
01526                 $filenames = array();
01527                 for ( $i = 0; $i < $this->count; $i++ ) {
01528                         $filenames[] = $this->sinks[$i]->getFilenames();
01529                 }
01530                 return $filenames;
01531         }
01532 
01533 }
01534 
01539 function xmlsafe( $string ) {
01540         wfProfileIn( __FUNCTION__ );
01541 
01547         $string = UtfNormal::cleanUp( $string );
01548 
01549         $string = htmlspecialchars( $string );
01550         wfProfileOut( __FUNCTION__ );
01551         return $string;
01552 }