MediaWiki  REL1_24
Export.php
Go to the documentation of this file.
00001 <?php
00033 class WikiExporter {
00035     public $list_authors = false;
00036 
00038     public $dumpUploads = false;
00039 
00041     public $dumpUploadFileContents = false;
00042 
00044     public $author_list = "";
00045 
00046     const FULL = 1;
00047     const CURRENT = 2;
00048     const STABLE = 4; // extension defined
00049     const LOGS = 8;
00050     const RANGE = 16;
00051 
00052     const BUFFER = 0;
00053     const STREAM = 1;
00054 
00055     const TEXT = 0;
00056     const STUB = 1;
00057 
00059     public $buffer;
00060 
00062     public $text;
00063 
00065     public $sink;
00066 
00071     public static function schemaVersion() {
00072         return "0.9";
00073     }
00074 
00091     function __construct( $db, $history = WikiExporter::CURRENT,
00092             $buffer = WikiExporter::BUFFER, $text = WikiExporter::TEXT ) {
00093         $this->db = $db;
00094         $this->history = $history;
00095         $this->buffer = $buffer;
00096         $this->writer = new XmlDumpWriter();
00097         $this->sink = new DumpOutput();
00098         $this->text = $text;
00099     }
00100 
00108     public function setOutputSink( &$sink ) {
00109         $this->sink =& $sink;
00110     }
00111 
00112     public function openStream() {
00113         $output = $this->writer->openStream();
00114         $this->sink->writeOpenStream( $output );
00115     }
00116 
00117     public function closeStream() {
00118         $output = $this->writer->closeStream();
00119         $this->sink->writeCloseStream( $output );
00120     }
00121 
00127     public function allPages() {
00128         $this->dumpFrom( '' );
00129     }
00130 
00138     public function pagesByRange( $start, $end ) {
00139         $condition = 'page_id >= ' . intval( $start );
00140         if ( $end ) {
00141             $condition .= ' AND page_id < ' . intval( $end );
00142         }
00143         $this->dumpFrom( $condition );
00144     }
00145 
00153     public function revsByRange( $start, $end ) {
00154         $condition = 'rev_id >= ' . intval( $start );
00155         if ( $end ) {
00156             $condition .= ' AND rev_id < ' . intval( $end );
00157         }
00158         $this->dumpFrom( $condition );
00159     }
00160 
00164     public function pageByTitle( $title ) {
00165         $this->dumpFrom(
00166             'page_namespace=' . $title->getNamespace() .
00167             ' AND page_title=' . $this->db->addQuotes( $title->getDBkey() ) );
00168     }
00169 
00174     public function pageByName( $name ) {
00175         $title = Title::newFromText( $name );
00176         if ( is_null( $title ) ) {
00177             throw new MWException( "Can't export invalid title" );
00178         } else {
00179             $this->pageByTitle( $title );
00180         }
00181     }
00182 
00186     public function pagesByName( $names ) {
00187         foreach ( $names as $name ) {
00188             $this->pageByName( $name );
00189         }
00190     }
00191 
00192     public function allLogs() {
00193         $this->dumpFrom( '' );
00194     }
00195 
00200     public function logsByRange( $start, $end ) {
00201         $condition = 'log_id >= ' . intval( $start );
00202         if ( $end ) {
00203             $condition .= ' AND log_id < ' . intval( $end );
00204         }
00205         $this->dumpFrom( $condition );
00206     }
00207 
00215     protected function do_list_authors( $cond ) {
00216         wfProfileIn( __METHOD__ );
00217         $this->author_list = "<contributors>";
00218         // rev_deleted
00219 
00220         $res = $this->db->select(
00221             array( 'page', 'revision' ),
00222             array( 'DISTINCT rev_user_text', 'rev_user' ),
00223             array(
00224                 $this->db->bitAnd( 'rev_deleted', Revision::DELETED_USER ) . ' = 0',
00225                 $cond,
00226                 'page_id = rev_id',
00227             ),
00228             __METHOD__
00229         );
00230 
00231         foreach ( $res as $row ) {
00232             $this->author_list .= "<contributor>" .
00233                 "<username>" .
00234                 htmlentities( $row->rev_user_text ) .
00235                 "</username>" .
00236                 "<id>" .
00237                 $row->rev_user .
00238                 "</id>" .
00239                 "</contributor>";
00240         }
00241         $this->author_list .= "</contributors>";
00242         wfProfileOut( __METHOD__ );
00243     }
00244 
00250     protected function dumpFrom( $cond = '' ) {
00251         wfProfileIn( __METHOD__ );
00252         # For logging dumps...
00253         if ( $this->history & self::LOGS ) {
00254             $where = array( 'user_id = log_user' );
00255             # Hide private logs
00256             $hideLogs = LogEventsList::getExcludeClause( $this->db );
00257             if ( $hideLogs ) {
00258                 $where[] = $hideLogs;
00259             }
00260             # Add on any caller specified conditions
00261             if ( $cond ) {
00262                 $where[] = $cond;
00263             }
00264             # Get logging table name for logging.* clause
00265             $logging = $this->db->tableName( 'logging' );
00266 
00267             if ( $this->buffer == WikiExporter::STREAM ) {
00268                 $prev = $this->db->bufferResults( false );
00269             }
00270             $result = null; // Assuring $result is not undefined, if exception occurs early
00271             try {
00272                 $result = $this->db->select( array( 'logging', 'user' ),
00273                     array( "{$logging}.*", 'user_name' ), // grab the user name
00274                     $where,
00275                     __METHOD__,
00276                     array( 'ORDER BY' => 'log_id', 'USE INDEX' => array( 'logging' => 'PRIMARY' ) )
00277                 );
00278                 $this->outputLogStream( $result );
00279                 if ( $this->buffer == WikiExporter::STREAM ) {
00280                     $this->db->bufferResults( $prev );
00281                 }
00282             } catch ( Exception $e ) {
00283                 // Throwing the exception does not reliably free the resultset, and
00284                 // would also leave the connection in unbuffered mode.
00285 
00286                 // Freeing result
00287                 try {
00288                     if ( $result ) {
00289                         $result->free();
00290                     }
00291                 } catch ( Exception $e2 ) {
00292                     // Already in panic mode -> ignoring $e2 as $e has
00293                     // higher priority
00294                 }
00295 
00296                 // Putting database back in previous buffer mode
00297                 try {
00298                     if ( $this->buffer == WikiExporter::STREAM ) {
00299                         $this->db->bufferResults( $prev );
00300                     }
00301                 } catch ( Exception $e2 ) {
00302                     // Already in panic mode -> ignoring $e2 as $e has
00303                     // higher priority
00304                 }
00305 
00306                 // Inform caller about problem
00307                 wfProfileOut( __METHOD__ );
00308                 throw $e;
00309             }
00310         # For page dumps...
00311         } else {
00312             $tables = array( 'page', 'revision' );
00313             $opts = array( 'ORDER BY' => 'page_id ASC' );
00314             $opts['USE INDEX'] = array();
00315             $join = array();
00316             if ( is_array( $this->history ) ) {
00317                 # Time offset/limit for all pages/history...
00318                 $revJoin = 'page_id=rev_page';
00319                 # Set time order
00320                 if ( $this->history['dir'] == 'asc' ) {
00321                     $op = '>';
00322                     $opts['ORDER BY'] = 'rev_timestamp ASC';
00323                 } else {
00324                     $op = '<';
00325                     $opts['ORDER BY'] = 'rev_timestamp DESC';
00326                 }
00327                 # Set offset
00328                 if ( !empty( $this->history['offset'] ) ) {
00329                     $revJoin .= " AND rev_timestamp $op " .
00330                         $this->db->addQuotes( $this->db->timestamp( $this->history['offset'] ) );
00331                 }
00332                 $join['revision'] = array( 'INNER JOIN', $revJoin );
00333                 # Set query limit
00334                 if ( !empty( $this->history['limit'] ) ) {
00335                     $opts['LIMIT'] = intval( $this->history['limit'] );
00336                 }
00337             } elseif ( $this->history & WikiExporter::FULL ) {
00338                 # Full history dumps...
00339                 $join['revision'] = array( 'INNER JOIN', 'page_id=rev_page' );
00340             } elseif ( $this->history & WikiExporter::CURRENT ) {
00341                 # Latest revision dumps...
00342                 if ( $this->list_authors && $cond != '' ) { // List authors, if so desired
00343                     $this->do_list_authors( $cond );
00344                 }
00345                 $join['revision'] = array( 'INNER JOIN', 'page_id=rev_page AND page_latest=rev_id' );
00346             } elseif ( $this->history & WikiExporter::STABLE ) {
00347                 # "Stable" revision dumps...
00348                 # Default JOIN, to be overridden...
00349                 $join['revision'] = array( 'INNER JOIN', 'page_id=rev_page AND page_latest=rev_id' );
00350                 # One, and only one hook should set this, and return false
00351                 if ( wfRunHooks( 'WikiExporter::dumpStableQuery', array( &$tables, &$opts, &$join ) ) ) {
00352                     wfProfileOut( __METHOD__ );
00353                     throw new MWException( __METHOD__ . " given invalid history dump type." );
00354                 }
00355             } elseif ( $this->history & WikiExporter::RANGE ) {
00356                 # Dump of revisions within a specified range
00357                 $join['revision'] = array( 'INNER JOIN', 'page_id=rev_page' );
00358                 $opts['ORDER BY'] = array( 'rev_page ASC', 'rev_id ASC' );
00359             } else {
00360                 # Unknown history specification parameter?
00361                 wfProfileOut( __METHOD__ );
00362                 throw new MWException( __METHOD__ . " given invalid history dump type." );
00363             }
00364             # Query optimization hacks
00365             if ( $cond == '' ) {
00366                 $opts[] = 'STRAIGHT_JOIN';
00367                 $opts['USE INDEX']['page'] = 'PRIMARY';
00368             }
00369             # Build text join options
00370             if ( $this->text != WikiExporter::STUB ) { // 1-pass
00371                 $tables[] = 'text';
00372                 $join['text'] = array( 'INNER JOIN', 'rev_text_id=old_id' );
00373             }
00374 
00375             if ( $this->buffer == WikiExporter::STREAM ) {
00376                 $prev = $this->db->bufferResults( false );
00377             }
00378 
00379             $result = null; // Assuring $result is not undefined, if exception occurs early
00380             try {
00381                 wfRunHooks( 'ModifyExportQuery',
00382                         array( $this->db, &$tables, &$cond, &$opts, &$join ) );
00383 
00384                 # Do the query!
00385                 $result = $this->db->select( $tables, '*', $cond, __METHOD__, $opts, $join );
00386                 # Output dump results
00387                 $this->outputPageStream( $result );
00388 
00389                 if ( $this->buffer == WikiExporter::STREAM ) {
00390                     $this->db->bufferResults( $prev );
00391                 }
00392             } catch ( Exception $e ) {
00393                 // Throwing the exception does not reliably free the resultset, and
00394                 // would also leave the connection in unbuffered mode.
00395 
00396                 // Freeing result
00397                 try {
00398                     if ( $result ) {
00399                         $result->free();
00400                     }
00401                 } catch ( Exception $e2 ) {
00402                     // Already in panic mode -> ignoring $e2 as $e has
00403                     // higher priority
00404                 }
00405 
00406                 // Putting database back in previous buffer mode
00407                 try {
00408                     if ( $this->buffer == WikiExporter::STREAM ) {
00409                         $this->db->bufferResults( $prev );
00410                     }
00411                 } catch ( Exception $e2 ) {
00412                     // Already in panic mode -> ignoring $e2 as $e has
00413                     // higher priority
00414                 }
00415 
00416                 // Inform caller about problem
00417                 throw $e;
00418             }
00419         }
00420         wfProfileOut( __METHOD__ );
00421     }
00422 
00435     protected function outputPageStream( $resultset ) {
00436         $last = null;
00437         foreach ( $resultset as $row ) {
00438             if ( $last === null ||
00439                 $last->page_namespace != $row->page_namespace ||
00440                 $last->page_title != $row->page_title ) {
00441                 if ( $last !== null ) {
00442                     $output = '';
00443                     if ( $this->dumpUploads ) {
00444                         $output .= $this->writer->writeUploads( $last, $this->dumpUploadFileContents );
00445                     }
00446                     $output .= $this->writer->closePage();
00447                     $this->sink->writeClosePage( $output );
00448                 }
00449                 $output = $this->writer->openPage( $row );
00450                 $this->sink->writeOpenPage( $row, $output );
00451                 $last = $row;
00452             }
00453             $output = $this->writer->writeRevision( $row );
00454             $this->sink->writeRevision( $row, $output );
00455         }
00456         if ( $last !== null ) {
00457             $output = '';
00458             if ( $this->dumpUploads ) {
00459                 $output .= $this->writer->writeUploads( $last, $this->dumpUploadFileContents );
00460             }
00461             $output .= $this->author_list;
00462             $output .= $this->writer->closePage();
00463             $this->sink->writeClosePage( $output );
00464         }
00465     }
00466 
00470     protected function outputLogStream( $resultset ) {
00471         foreach ( $resultset as $row ) {
00472             $output = $this->writer->writeLogItem( $row );
00473             $this->sink->writeLogItem( $row, $output );
00474         }
00475     }
00476 }
00477 
00481 class XmlDumpWriter {
00487     function schemaVersion() {
00488         wfDeprecated( __METHOD__, '1.20' );
00489         return WikiExporter::schemaVersion();
00490     }
00491 
00502     function openStream() {
00503         global $wgLanguageCode;
00504         $ver = WikiExporter::schemaVersion();
00505         return Xml::element( 'mediawiki', array(
00506             'xmlns'              => "http://www.mediawiki.org/xml/export-$ver/",
00507             'xmlns:xsi'          => "http://www.w3.org/2001/XMLSchema-instance",
00508             /*
00509              * When a new version of the schema is created, it needs staging on mediawiki.org.
00510              * This requires a change in the operations/mediawiki-config git repo.
00511              *
00512              * Create a changeset like https://gerrit.wikimedia.org/r/#/c/149643/ in which
00513              * you copy in the new xsd file.
00514              *
00515              * After it is reviewed, merged and deployed (sync-docroot), the index.html needs purging.
00516              * echo "http://www.mediawiki.org/xml/index.html" | mwscript purgeList.php --wiki=aawiki
00517              */
00518             'xsi:schemaLocation' => "http://www.mediawiki.org/xml/export-$ver/ " .
00519                 "http://www.mediawiki.org/xml/export-$ver.xsd",
00520             'version'            => $ver,
00521             'xml:lang'           => $wgLanguageCode ),
00522             null ) .
00523             "\n" .
00524             $this->siteInfo();
00525     }
00526 
00530     function siteInfo() {
00531         $info = array(
00532             $this->sitename(),
00533             $this->dbname(),
00534             $this->homelink(),
00535             $this->generator(),
00536             $this->caseSetting(),
00537             $this->namespaces() );
00538         return "  <siteinfo>\n    " .
00539             implode( "\n    ", $info ) .
00540             "\n  </siteinfo>\n";
00541     }
00542 
00546     function sitename() {
00547         global $wgSitename;
00548         return Xml::element( 'sitename', array(), $wgSitename );
00549     }
00550 
00554     function dbname() {
00555         global $wgDBname;
00556         return Xml::element( 'dbname', array(), $wgDBname );
00557     }
00558 
00562     function generator() {
00563         global $wgVersion;
00564         return Xml::element( 'generator', array(), "MediaWiki $wgVersion" );
00565     }
00566 
00570     function homelink() {
00571         return Xml::element( 'base', array(), Title::newMainPage()->getCanonicalURL() );
00572     }
00573 
00577     function caseSetting() {
00578         global $wgCapitalLinks;
00579         // "case-insensitive" option is reserved for future
00580         $sensitivity = $wgCapitalLinks ? 'first-letter' : 'case-sensitive';
00581         return Xml::element( 'case', array(), $sensitivity );
00582     }
00583 
00587     function namespaces() {
00588         global $wgContLang;
00589         $spaces = "<namespaces>\n";
00590         foreach ( $wgContLang->getFormattedNamespaces() as $ns => $title ) {
00591             $spaces .= '      ' .
00592                 Xml::element( 'namespace',
00593                     array(
00594                         'key' => $ns,
00595                         'case' => MWNamespace::isCapitalized( $ns ) ? 'first-letter' : 'case-sensitive',
00596                     ), $title ) . "\n";
00597         }
00598         $spaces .= "    </namespaces>";
00599         return $spaces;
00600     }
00601 
00608     function closeStream() {
00609         return "</mediawiki>\n";
00610     }
00611 
00619     public function openPage( $row ) {
00620         $out = "  <page>\n";
00621         $title = Title::makeTitle( $row->page_namespace, $row->page_title );
00622         $out .= '    ' . Xml::elementClean( 'title', array(), self::canonicalTitle( $title ) ) . "\n";
00623         $out .= '    ' . Xml::element( 'ns', array(), strval( $row->page_namespace ) ) . "\n";
00624         $out .= '    ' . Xml::element( 'id', array(), strval( $row->page_id ) ) . "\n";
00625         if ( $row->page_is_redirect ) {
00626             $page = WikiPage::factory( $title );
00627             $redirect = $page->getRedirectTarget();
00628             if ( $redirect instanceof Title && $redirect->isValidRedirectTarget() ) {
00629                 $out .= '    ';
00630                 $out .= Xml::element( 'redirect', array( 'title' => self::canonicalTitle( $redirect ) ) );
00631                 $out .= "\n";
00632             }
00633         }
00634 
00635         if ( $row->page_restrictions != '' ) {
00636             $out .= '    ' . Xml::element( 'restrictions', array(),
00637                 strval( $row->page_restrictions ) ) . "\n";
00638         }
00639 
00640         wfRunHooks( 'XmlDumpWriterOpenPage', array( $this, &$out, $row, $title ) );
00641 
00642         return $out;
00643     }
00644 
00651     function closePage() {
00652         return "  </page>\n";
00653     }
00654 
00663     function writeRevision( $row ) {
00664         wfProfileIn( __METHOD__ );
00665 
00666         $out = "    <revision>\n";
00667         $out .= "      " . Xml::element( 'id', null, strval( $row->rev_id ) ) . "\n";
00668         if ( isset( $row->rev_parent_id ) && $row->rev_parent_id ) {
00669             $out .= "      " . Xml::element( 'parentid', null, strval( $row->rev_parent_id ) ) . "\n";
00670         }
00671 
00672         $out .= $this->writeTimestamp( $row->rev_timestamp );
00673 
00674         if ( isset( $row->rev_deleted ) && ( $row->rev_deleted & Revision::DELETED_USER ) ) {
00675             $out .= "      " . Xml::element( 'contributor', array( 'deleted' => 'deleted' ) ) . "\n";
00676         } else {
00677             $out .= $this->writeContributor( $row->rev_user, $row->rev_user_text );
00678         }
00679 
00680         if ( isset( $row->rev_minor_edit ) && $row->rev_minor_edit ) {
00681             $out .= "      <minor/>\n";
00682         }
00683         if ( isset( $row->rev_deleted ) && ( $row->rev_deleted & Revision::DELETED_COMMENT ) ) {
00684             $out .= "      " . Xml::element( 'comment', array( 'deleted' => 'deleted' ) ) . "\n";
00685         } elseif ( $row->rev_comment != '' ) {
00686             $out .= "      " . Xml::elementClean( 'comment', array(), strval( $row->rev_comment ) ) . "\n";
00687         }
00688 
00689         if ( isset( $row->rev_content_model ) && !is_null( $row->rev_content_model ) ) {
00690             $content_model = strval( $row->rev_content_model );
00691         } else {
00692             // probably using $wgContentHandlerUseDB = false;
00693             $title = Title::makeTitle( $row->page_namespace, $row->page_title );
00694             $content_model = ContentHandler::getDefaultModelFor( $title );
00695         }
00696 
00697         $content_handler = ContentHandler::getForModelID( $content_model );
00698 
00699         if ( isset( $row->rev_content_format ) && !is_null( $row->rev_content_format ) ) {
00700             $content_format = strval( $row->rev_content_format );
00701         } else {
00702             // probably using $wgContentHandlerUseDB = false;
00703             $content_format = $content_handler->getDefaultFormat();
00704         }
00705 
00706         $text = '';
00707         if ( isset( $row->rev_deleted ) && ( $row->rev_deleted & Revision::DELETED_TEXT ) ) {
00708             $out .= "      " . Xml::element( 'text', array( 'deleted' => 'deleted' ) ) . "\n";
00709         } elseif ( isset( $row->old_text ) ) {
00710             // Raw text from the database may have invalid chars
00711             $text = strval( Revision::getRevisionText( $row ) );
00712             $text = $content_handler->exportTransform( $text, $content_format );
00713             $out .= "      " . Xml::elementClean( 'text',
00714                 array( 'xml:space' => 'preserve', 'bytes' => intval( $row->rev_len ) ),
00715                 strval( $text ) ) . "\n";
00716         } else {
00717             // Stub output
00718             $out .= "      " . Xml::element( 'text',
00719                 array( 'id' => $row->rev_text_id, 'bytes' => intval( $row->rev_len ) ),
00720                 "" ) . "\n";
00721         }
00722 
00723         if ( isset( $row->rev_sha1 )
00724             && $row->rev_sha1
00725             && !( $row->rev_deleted & Revision::DELETED_TEXT )
00726         ) {
00727             $out .= "      " . Xml::element( 'sha1', null, strval( $row->rev_sha1 ) ) . "\n";
00728         } else {
00729             $out .= "      <sha1/>\n";
00730         }
00731 
00732         $out .= "      " . Xml::element( 'model', null, strval( $content_model ) ) . "\n";
00733         $out .= "      " . Xml::element( 'format', null, strval( $content_format ) ) . "\n";
00734 
00735         wfRunHooks( 'XmlDumpWriterWriteRevision', array( &$this, &$out, $row, $text ) );
00736 
00737         $out .= "    </revision>\n";
00738 
00739         wfProfileOut( __METHOD__ );
00740         return $out;
00741     }
00742 
00751     function writeLogItem( $row ) {
00752         wfProfileIn( __METHOD__ );
00753 
00754         $out = "  <logitem>\n";
00755         $out .= "    " . Xml::element( 'id', null, strval( $row->log_id ) ) . "\n";
00756 
00757         $out .= $this->writeTimestamp( $row->log_timestamp, "    " );
00758 
00759         if ( $row->log_deleted & LogPage::DELETED_USER ) {
00760             $out .= "    " . Xml::element( 'contributor', array( 'deleted' => 'deleted' ) ) . "\n";
00761         } else {
00762             $out .= $this->writeContributor( $row->log_user, $row->user_name, "    " );
00763         }
00764 
00765         if ( $row->log_deleted & LogPage::DELETED_COMMENT ) {
00766             $out .= "    " . Xml::element( 'comment', array( 'deleted' => 'deleted' ) ) . "\n";
00767         } elseif ( $row->log_comment != '' ) {
00768             $out .= "    " . Xml::elementClean( 'comment', null, strval( $row->log_comment ) ) . "\n";
00769         }
00770 
00771         $out .= "    " . Xml::element( 'type', null, strval( $row->log_type ) ) . "\n";
00772         $out .= "    " . Xml::element( 'action', null, strval( $row->log_action ) ) . "\n";
00773 
00774         if ( $row->log_deleted & LogPage::DELETED_ACTION ) {
00775             $out .= "    " . Xml::element( 'text', array( 'deleted' => 'deleted' ) ) . "\n";
00776         } else {
00777             $title = Title::makeTitle( $row->log_namespace, $row->log_title );
00778             $out .= "    " . Xml::elementClean( 'logtitle', null, self::canonicalTitle( $title ) ) . "\n";
00779             $out .= "    " . Xml::elementClean( 'params',
00780                 array( 'xml:space' => 'preserve' ),
00781                 strval( $row->log_params ) ) . "\n";
00782         }
00783 
00784         $out .= "  </logitem>\n";
00785 
00786         wfProfileOut( __METHOD__ );
00787         return $out;
00788     }
00789 
00795     function writeTimestamp( $timestamp, $indent = "      " ) {
00796         $ts = wfTimestamp( TS_ISO_8601, $timestamp );
00797         return $indent . Xml::element( 'timestamp', null, $ts ) . "\n";
00798     }
00799 
00806     function writeContributor( $id, $text, $indent = "      " ) {
00807         $out = $indent . "<contributor>\n";
00808         if ( $id || !IP::isValid( $text ) ) {
00809             $out .= $indent . "  " . Xml::elementClean( 'username', null, strval( $text ) ) . "\n";
00810             $out .= $indent . "  " . Xml::element( 'id', null, strval( $id ) ) . "\n";
00811         } else {
00812             $out .= $indent . "  " . Xml::elementClean( 'ip', null, strval( $text ) ) . "\n";
00813         }
00814         $out .= $indent . "</contributor>\n";
00815         return $out;
00816     }
00817 
00824     function writeUploads( $row, $dumpContents = false ) {
00825         if ( $row->page_namespace == NS_FILE ) {
00826             $img = wfLocalFile( $row->page_title );
00827             if ( $img && $img->exists() ) {
00828                 $out = '';
00829                 foreach ( array_reverse( $img->getHistory() ) as $ver ) {
00830                     $out .= $this->writeUpload( $ver, $dumpContents );
00831                 }
00832                 $out .= $this->writeUpload( $img, $dumpContents );
00833                 return $out;
00834             }
00835         }
00836         return '';
00837     }
00838 
00844     function writeUpload( $file, $dumpContents = false ) {
00845         if ( $file->isOld() ) {
00846             $archiveName = "      " .
00847                 Xml::element( 'archivename', null, $file->getArchiveName() ) . "\n";
00848         } else {
00849             $archiveName = '';
00850         }
00851         if ( $dumpContents ) {
00852             $be = $file->getRepo()->getBackend();
00853             # Dump file as base64
00854             # Uses only XML-safe characters, so does not need escaping
00855             # @todo Too bad this loads the contents into memory (script might swap)
00856             $contents = '      <contents encoding="base64">' .
00857                 chunk_split( base64_encode(
00858                     $be->getFileContents( array( 'src' => $file->getPath() ) ) ) ) .
00859                 "      </contents>\n";
00860         } else {
00861             $contents = '';
00862         }
00863         if ( $file->isDeleted( File::DELETED_COMMENT ) ) {
00864             $comment = Xml::element( 'comment', array( 'deleted' => 'deleted' ) );
00865         } else {
00866             $comment = Xml::elementClean( 'comment', null, $file->getDescription() );
00867         }
00868         return "    <upload>\n" .
00869             $this->writeTimestamp( $file->getTimestamp() ) .
00870             $this->writeContributor( $file->getUser( 'id' ), $file->getUser( 'text' ) ) .
00871             "      " . $comment . "\n" .
00872             "      " . Xml::element( 'filename', null, $file->getName() ) . "\n" .
00873             $archiveName .
00874             "      " . Xml::element( 'src', null, $file->getCanonicalURL() ) . "\n" .
00875             "      " . Xml::element( 'size', null, $file->getSize() ) . "\n" .
00876             "      " . Xml::element( 'sha1base36', null, $file->getSha1() ) . "\n" .
00877             "      " . Xml::element( 'rel', null, $file->getRel() ) . "\n" .
00878             $contents .
00879             "    </upload>\n";
00880     }
00881 
00892     public static function canonicalTitle( Title $title ) {
00893         if ( $title->isExternal() ) {
00894             return $title->getPrefixedText();
00895         }
00896 
00897         global $wgContLang;
00898         $prefix = str_replace( '_', ' ', $wgContLang->getNsText( $title->getNamespace() ) );
00899 
00900         if ( $prefix !== '' ) {
00901             $prefix .= ':';
00902         }
00903 
00904         return $prefix . $title->getText();
00905     }
00906 }
00907 
00912 class DumpOutput {
00913 
00917     function writeOpenStream( $string ) {
00918         $this->write( $string );
00919     }
00920 
00924     function writeCloseStream( $string ) {
00925         $this->write( $string );
00926     }
00927 
00932     function writeOpenPage( $page, $string ) {
00933         $this->write( $string );
00934     }
00935 
00939     function writeClosePage( $string ) {
00940         $this->write( $string );
00941     }
00942 
00947     function writeRevision( $rev, $string ) {
00948         $this->write( $string );
00949     }
00950 
00955     function writeLogItem( $rev, $string ) {
00956         $this->write( $string );
00957     }
00958 
00964     function write( $string ) {
00965         print $string;
00966     }
00967 
00975     function closeRenameAndReopen( $newname ) {
00976     }
00977 
00986     function closeAndRename( $newname, $open = false ) {
00987     }
00988 
00994     function getFilenames() {
00995         return null;
00996     }
00997 }
00998 
01003 class DumpFileOutput extends DumpOutput {
01004     protected $handle = false, $filename;
01005 
01009     function __construct( $file ) {
01010         $this->handle = fopen( $file, "wt" );
01011         $this->filename = $file;
01012     }
01013 
01017     function writeCloseStream( $string ) {
01018         parent::writeCloseStream( $string );
01019         if ( $this->handle ) {
01020             fclose( $this->handle );
01021             $this->handle = false;
01022         }
01023     }
01024 
01028     function write( $string ) {
01029         fputs( $this->handle, $string );
01030     }
01031 
01035     function closeRenameAndReopen( $newname ) {
01036         $this->closeAndRename( $newname, true );
01037     }
01038 
01043     function renameOrException( $newname ) {
01044             if ( !rename( $this->filename, $newname ) ) {
01045                 throw new MWException( __METHOD__ . ": rename of file {$this->filename} to $newname failed\n" );
01046             }
01047     }
01048 
01054     function checkRenameArgCount( $newname ) {
01055         if ( is_array( $newname ) ) {
01056             if ( count( $newname ) > 1 ) {
01057                 throw new MWException( __METHOD__ . ": passed multiple arguments for rename of single file\n" );
01058             } else {
01059                 $newname = $newname[0];
01060             }
01061         }
01062         return $newname;
01063     }
01064 
01069     function closeAndRename( $newname, $open = false ) {
01070         $newname = $this->checkRenameArgCount( $newname );
01071         if ( $newname ) {
01072             if ( $this->handle ) {
01073                 fclose( $this->handle );
01074                 $this->handle = false;
01075             }
01076             $this->renameOrException( $newname );
01077             if ( $open ) {
01078                 $this->handle = fopen( $this->filename, "wt" );
01079             }
01080         }
01081     }
01082 
01086     function getFilenames() {
01087         return $this->filename;
01088     }
01089 }
01090 
01097 class DumpPipeOutput extends DumpFileOutput {
01098     protected $command, $filename;
01099     protected $procOpenResource = false;
01100 
01105     function __construct( $command, $file = null ) {
01106         if ( !is_null( $file ) ) {
01107             $command .= " > " . wfEscapeShellArg( $file );
01108         }
01109 
01110         $this->startCommand( $command );
01111         $this->command = $command;
01112         $this->filename = $file;
01113     }
01114 
01118     function writeCloseStream( $string ) {
01119         parent::writeCloseStream( $string );
01120         if ( $this->procOpenResource ) {
01121             proc_close( $this->procOpenResource );
01122             $this->procOpenResource = false;
01123         }
01124     }
01125 
01129     function startCommand( $command ) {
01130         $spec = array(
01131             0 => array( "pipe", "r" ),
01132         );
01133         $pipes = array();
01134         $this->procOpenResource = proc_open( $command, $spec, $pipes );
01135         $this->handle = $pipes[0];
01136     }
01137 
01141     function closeRenameAndReopen( $newname ) {
01142         $this->closeAndRename( $newname, true );
01143     }
01144 
01149     function closeAndRename( $newname, $open = false ) {
01150         $newname = $this->checkRenameArgCount( $newname );
01151         if ( $newname ) {
01152             if ( $this->handle ) {
01153                 fclose( $this->handle );
01154                 $this->handle = false;
01155             }
01156             if ( $this->procOpenResource ) {
01157                 proc_close( $this->procOpenResource );
01158                 $this->procOpenResource = false;
01159             }
01160             $this->renameOrException( $newname );
01161             if ( $open ) {
01162                 $command = $this->command;
01163                 $command .= " > " . wfEscapeShellArg( $this->filename );
01164                 $this->startCommand( $command );
01165             }
01166         }
01167     }
01168 }
01169 
01174 class DumpGZipOutput extends DumpPipeOutput {
01178     function __construct( $file ) {
01179         parent::__construct( "gzip", $file );
01180     }
01181 }
01182 
01187 class DumpBZip2Output extends DumpPipeOutput {
01191     function __construct( $file ) {
01192         parent::__construct( "bzip2", $file );
01193     }
01194 }
01195 
01200 class Dump7ZipOutput extends DumpPipeOutput {
01204     function __construct( $file ) {
01205         $command = $this->setup7zCommand( $file );
01206         parent::__construct( $command );
01207         $this->filename = $file;
01208     }
01209 
01214     function setup7zCommand( $file ) {
01215         $command = "7za a -bd -si " . wfEscapeShellArg( $file );
01216         // Suppress annoying useless crap from p7zip
01217         // Unfortunately this could suppress real error messages too
01218         $command .= ' >' . wfGetNull() . ' 2>&1';
01219         return $command;
01220     }
01221 
01226     function closeAndRename( $newname, $open = false ) {
01227         $newname = $this->checkRenameArgCount( $newname );
01228         if ( $newname ) {
01229             fclose( $this->handle );
01230             proc_close( $this->procOpenResource );
01231             $this->renameOrException( $newname );
01232             if ( $open ) {
01233                 $command = $this->setup7zCommand( $this->filename );
01234                 $this->startCommand( $command );
01235             }
01236         }
01237     }
01238 }
01239 
01246 class DumpFilter {
01252     public $sink;
01253 
01257     protected $sendingThisPage;
01258 
01262     function __construct( &$sink ) {
01263         $this->sink =& $sink;
01264     }
01265 
01269     function writeOpenStream( $string ) {
01270         $this->sink->writeOpenStream( $string );
01271     }
01272 
01276     function writeCloseStream( $string ) {
01277         $this->sink->writeCloseStream( $string );
01278     }
01279 
01284     function writeOpenPage( $page, $string ) {
01285         $this->sendingThisPage = $this->pass( $page, $string );
01286         if ( $this->sendingThisPage ) {
01287             $this->sink->writeOpenPage( $page, $string );
01288         }
01289     }
01290 
01294     function writeClosePage( $string ) {
01295         if ( $this->sendingThisPage ) {
01296             $this->sink->writeClosePage( $string );
01297             $this->sendingThisPage = false;
01298         }
01299     }
01300 
01305     function writeRevision( $rev, $string ) {
01306         if ( $this->sendingThisPage ) {
01307             $this->sink->writeRevision( $rev, $string );
01308         }
01309     }
01310 
01315     function writeLogItem( $rev, $string ) {
01316         $this->sink->writeRevision( $rev, $string );
01317     }
01318 
01322     function closeRenameAndReopen( $newname ) {
01323         $this->sink->closeRenameAndReopen( $newname );
01324     }
01325 
01330     function closeAndRename( $newname, $open = false ) {
01331         $this->sink->closeAndRename( $newname, $open );
01332     }
01333 
01337     function getFilenames() {
01338         return $this->sink->getFilenames();
01339     }
01340 
01346     function pass( $page ) {
01347         return true;
01348     }
01349 }
01350 
01355 class DumpNotalkFilter extends DumpFilter {
01360     function pass( $page ) {
01361         return !MWNamespace::isTalk( $page->page_namespace );
01362     }
01363 }
01364 
01369 class DumpNamespaceFilter extends DumpFilter {
01371     public $invert = false;
01372 
01374     public $namespaces = array();
01375 
01381     function __construct( &$sink, $param ) {
01382         parent::__construct( $sink );
01383 
01384         $constants = array(
01385             "NS_MAIN"           => NS_MAIN,
01386             "NS_TALK"           => NS_TALK,
01387             "NS_USER"           => NS_USER,
01388             "NS_USER_TALK"      => NS_USER_TALK,
01389             "NS_PROJECT"        => NS_PROJECT,
01390             "NS_PROJECT_TALK"   => NS_PROJECT_TALK,
01391             "NS_FILE"           => NS_FILE,
01392             "NS_FILE_TALK"      => NS_FILE_TALK,
01393             "NS_IMAGE"          => NS_IMAGE, // NS_IMAGE is an alias for NS_FILE
01394             "NS_IMAGE_TALK"     => NS_IMAGE_TALK,
01395             "NS_MEDIAWIKI"      => NS_MEDIAWIKI,
01396             "NS_MEDIAWIKI_TALK" => NS_MEDIAWIKI_TALK,
01397             "NS_TEMPLATE"       => NS_TEMPLATE,
01398             "NS_TEMPLATE_TALK"  => NS_TEMPLATE_TALK,
01399             "NS_HELP"           => NS_HELP,
01400             "NS_HELP_TALK"      => NS_HELP_TALK,
01401             "NS_CATEGORY"       => NS_CATEGORY,
01402             "NS_CATEGORY_TALK"  => NS_CATEGORY_TALK );
01403 
01404         if ( $param { 0 } == '!' ) {
01405             $this->invert = true;
01406             $param = substr( $param, 1 );
01407         }
01408 
01409         foreach ( explode( ',', $param ) as $key ) {
01410             $key = trim( $key );
01411             if ( isset( $constants[$key] ) ) {
01412                 $ns = $constants[$key];
01413                 $this->namespaces[$ns] = true;
01414             } elseif ( is_numeric( $key ) ) {
01415                 $ns = intval( $key );
01416                 $this->namespaces[$ns] = true;
01417             } else {
01418                 throw new MWException( "Unrecognized namespace key '$key'\n" );
01419             }
01420         }
01421     }
01422 
01427     function pass( $page ) {
01428         $match = isset( $this->namespaces[$page->page_namespace] );
01429         return $this->invert xor $match;
01430     }
01431 }
01432 
01437 class DumpLatestFilter extends DumpFilter {
01438     public $page;
01439 
01440     public $pageString;
01441 
01442     public $rev;
01443 
01444     public $revString;
01445 
01450     function writeOpenPage( $page, $string ) {
01451         $this->page = $page;
01452         $this->pageString = $string;
01453     }
01454 
01458     function writeClosePage( $string ) {
01459         if ( $this->rev ) {
01460             $this->sink->writeOpenPage( $this->page, $this->pageString );
01461             $this->sink->writeRevision( $this->rev, $this->revString );
01462             $this->sink->writeClosePage( $string );
01463         }
01464         $this->rev = null;
01465         $this->revString = null;
01466         $this->page = null;
01467         $this->pageString = null;
01468     }
01469 
01474     function writeRevision( $rev, $string ) {
01475         if ( $rev->rev_id == $this->page->page_latest ) {
01476             $this->rev = $rev;
01477             $this->revString = $string;
01478         }
01479     }
01480 }
01481 
01486 class DumpMultiWriter {
01487 
01491     function __construct( $sinks ) {
01492         $this->sinks = $sinks;
01493         $this->count = count( $sinks );
01494     }
01495 
01499     function writeOpenStream( $string ) {
01500         for ( $i = 0; $i < $this->count; $i++ ) {
01501             $this->sinks[$i]->writeOpenStream( $string );
01502         }
01503     }
01504 
01508     function writeCloseStream( $string ) {
01509         for ( $i = 0; $i < $this->count; $i++ ) {
01510             $this->sinks[$i]->writeCloseStream( $string );
01511         }
01512     }
01513 
01518     function writeOpenPage( $page, $string ) {
01519         for ( $i = 0; $i < $this->count; $i++ ) {
01520             $this->sinks[$i]->writeOpenPage( $page, $string );
01521         }
01522     }
01523 
01527     function writeClosePage( $string ) {
01528         for ( $i = 0; $i < $this->count; $i++ ) {
01529             $this->sinks[$i]->writeClosePage( $string );
01530         }
01531     }
01532 
01537     function writeRevision( $rev, $string ) {
01538         for ( $i = 0; $i < $this->count; $i++ ) {
01539             $this->sinks[$i]->writeRevision( $rev, $string );
01540         }
01541     }
01542 
01546     function closeRenameAndReopen( $newnames ) {
01547         $this->closeAndRename( $newnames, true );
01548     }
01549 
01554     function closeAndRename( $newnames, $open = false ) {
01555         for ( $i = 0; $i < $this->count; $i++ ) {
01556             $this->sinks[$i]->closeAndRename( $newnames[$i], $open );
01557         }
01558     }
01559 
01563     function getFilenames() {
01564         $filenames = array();
01565         for ( $i = 0; $i < $this->count; $i++ ) {
01566             $filenames[] = $this->sinks[$i]->getFilenames();
01567         }
01568         return $filenames;
01569     }
01570 }