MediaWiki  REL1_20
Export.php
Go to the documentation of this file.
00001 <?php
00033 class WikiExporter {
00034         var $list_authors = false ; # Return distinct author list (when not returning full history)
00035         var $author_list = "" ;
00036 
00037         var $dumpUploads = false;
00038         var $dumpUploadFileContents = false;
00039 
00040         const FULL = 1;
00041         const CURRENT = 2;
00042         const STABLE = 4; // extension defined
00043         const LOGS = 8;
00044         const RANGE = 16;
00045 
00046         const BUFFER = 0;
00047         const STREAM = 1;
00048 
00049         const TEXT = 0;
00050         const STUB = 1;
00051 
00052         var $buffer;
00053 
00054         var $text;
00055 
00059         var $sink;
00060 
00065         public static function schemaVersion() {
00066                 return "0.7";
00067         }
00068 
00086         function __construct( &$db, $history = WikiExporter::CURRENT,
00087                         $buffer = WikiExporter::BUFFER, $text = WikiExporter::TEXT ) {
00088                 $this->db =& $db;
00089                 $this->history = $history;
00090                 $this->buffer  = $buffer;
00091                 $this->writer  = new XmlDumpWriter();
00092                 $this->sink    = new DumpOutput();
00093                 $this->text    = $text;
00094         }
00095 
00103         public function setOutputSink( &$sink ) {
00104                 $this->sink =& $sink;
00105         }
00106 
00107         public function openStream() {
00108                 $output = $this->writer->openStream();
00109                 $this->sink->writeOpenStream( $output );
00110         }
00111 
00112         public function closeStream() {
00113                 $output = $this->writer->closeStream();
00114                 $this->sink->writeCloseStream( $output );
00115         }
00116 
00122         public function allPages() {
00123                 $this->dumpFrom( '' );
00124         }
00125 
00133         public function pagesByRange( $start, $end ) {
00134                 $condition = 'page_id >= ' . intval( $start );
00135                 if ( $end ) {
00136                         $condition .= ' AND page_id < ' . intval( $end );
00137                 }
00138                 $this->dumpFrom( $condition );
00139         }
00140 
00148         public function revsByRange( $start, $end ) {
00149                 $condition = 'rev_id >= ' . intval( $start );
00150                 if ( $end ) {
00151                         $condition .= ' AND rev_id < ' . intval( $end );
00152                 }
00153                 $this->dumpFrom( $condition );
00154         }
00155 
00159         public function pageByTitle( $title ) {
00160                 $this->dumpFrom(
00161                         'page_namespace=' . $title->getNamespace() .
00162                         ' AND page_title=' . $this->db->addQuotes( $title->getDBkey() ) );
00163         }
00164 
00169         public function pageByName( $name ) {
00170                 $title = Title::newFromText( $name );
00171                 if ( is_null( $title ) ) {
00172                         throw new MWException( "Can't export invalid title" );
00173                 } else {
00174                         $this->pageByTitle( $title );
00175                 }
00176         }
00177 
00181         public function pagesByName( $names ) {
00182                 foreach ( $names as $name ) {
00183                         $this->pageByName( $name );
00184                 }
00185         }
00186 
00187         public function allLogs() {
00188                 $this->dumpFrom( '' );
00189         }
00190 
00195         public function logsByRange( $start, $end ) {
00196                 $condition = 'log_id >= ' . intval( $start );
00197                 if ( $end ) {
00198                         $condition .= ' AND log_id < ' . intval( $end );
00199                 }
00200                 $this->dumpFrom( $condition );
00201         }
00202 
00210         protected function do_list_authors( $cond ) {
00211                 wfProfileIn( __METHOD__ );
00212                 $this->author_list = "<contributors>";
00213                 // rev_deleted
00214 
00215                 $res = $this->db->select(
00216                         array( 'page', 'revision' ),
00217                         array( 'DISTINCT rev_user_text', 'rev_user' ),
00218                         array(
00219                                 $this->db->bitAnd( 'rev_deleted', Revision::DELETED_USER ) . ' = 0',
00220                                 $cond,
00221                                 'page_id = rev_id',
00222                         ),
00223                         __METHOD__
00224                 );
00225 
00226                 foreach ( $res as $row ) {
00227                         $this->author_list .= "<contributor>" .
00228                                 "<username>" .
00229                                 htmlentities( $row->rev_user_text )  .
00230                                 "</username>" .
00231                                 "<id>" .
00232                                 $row->rev_user .
00233                                 "</id>" .
00234                                 "</contributor>";
00235                 }
00236                 $this->author_list .= "</contributors>";
00237                 wfProfileOut( __METHOD__ );
00238         }
00239 
00245         protected function dumpFrom( $cond = '' ) {
00246                 wfProfileIn( __METHOD__ );
00247                 # For logging dumps...
00248                 if ( $this->history & self::LOGS ) {
00249                         $where = array( 'user_id = log_user' );
00250                         # Hide private logs
00251                         $hideLogs = LogEventsList::getExcludeClause( $this->db );
00252                         if ( $hideLogs ) $where[] = $hideLogs;
00253                         # Add on any caller specified conditions
00254                         if ( $cond ) $where[] = $cond;
00255                         # Get logging table name for logging.* clause
00256                         $logging = $this->db->tableName( 'logging' );
00257 
00258                         if ( $this->buffer == WikiExporter::STREAM ) {
00259                                 $prev = $this->db->bufferResults( false );
00260                         }
00261                         $wrapper = null; // Assuring $wrapper is not undefined, if exception occurs early
00262                         try {
00263                                 $result = $this->db->select( array( 'logging', 'user' ),
00264                                         array( "{$logging}.*", 'user_name' ), // grab the user name
00265                                         $where,
00266                                         __METHOD__,
00267                                         array( 'ORDER BY' => 'log_id', 'USE INDEX' => array( 'logging' => 'PRIMARY' ) )
00268                                 );
00269                                 $wrapper = $this->db->resultObject( $result );
00270                                 $this->outputLogStream( $wrapper );
00271                                 if ( $this->buffer == WikiExporter::STREAM ) {
00272                                         $this->db->bufferResults( $prev );
00273                                 }
00274                         } catch ( Exception $e ) {
00275                                 // Throwing the exception does not reliably free the resultset, and
00276                                 // would also leave the connection in unbuffered mode.
00277 
00278                                 // Freeing result
00279                                 try {
00280                                         if ( $wrapper ) {
00281                                                 $wrapper->free();
00282                                         }
00283                                 } catch ( Exception $e2 ) {
00284                                         // Already in panic mode -> ignoring $e2 as $e has
00285                                         // higher priority
00286                                 }
00287 
00288                                 // Putting database back in previous buffer mode
00289                                 try {
00290                                         if ( $this->buffer == WikiExporter::STREAM ) {
00291                                                 $this->db->bufferResults( $prev );
00292                                         }
00293                                 } catch ( Exception $e2 ) {
00294                                         // Already in panic mode -> ignoring $e2 as $e has
00295                                         // higher priority
00296                                 }
00297 
00298                                 // Inform caller about problem
00299                                 throw $e;
00300                         }
00301                 # For page dumps...
00302                 } else {
00303                         $tables = array( 'page', 'revision' );
00304                         $opts = array( 'ORDER BY' => 'page_id ASC' );
00305                         $opts['USE INDEX'] = array();
00306                         $join = array();
00307                         if ( is_array( $this->history ) ) {
00308                                 # Time offset/limit for all pages/history...
00309                                 $revJoin = 'page_id=rev_page';
00310                                 # Set time order
00311                                 if ( $this->history['dir'] == 'asc' ) {
00312                                         $op = '>';
00313                                         $opts['ORDER BY'] = 'rev_timestamp ASC';
00314                                 } else {
00315                                         $op = '<';
00316                                         $opts['ORDER BY'] = 'rev_timestamp DESC';
00317                                 }
00318                                 # Set offset
00319                                 if ( !empty( $this->history['offset'] ) ) {
00320                                         $revJoin .= " AND rev_timestamp $op " .
00321                                                 $this->db->addQuotes( $this->db->timestamp( $this->history['offset'] ) );
00322                                 }
00323                                 $join['revision'] = array( 'INNER JOIN', $revJoin );
00324                                 # Set query limit
00325                                 if ( !empty( $this->history['limit'] ) ) {
00326                                         $opts['LIMIT'] = intval( $this->history['limit'] );
00327                                 }
00328                         } elseif ( $this->history & WikiExporter::FULL ) {
00329                                 # Full history dumps...
00330                                 $join['revision'] = array( 'INNER JOIN', 'page_id=rev_page' );
00331                         } elseif ( $this->history & WikiExporter::CURRENT ) {
00332                                 # Latest revision dumps...
00333                                 if ( $this->list_authors && $cond != '' )  { // List authors, if so desired
00334                                         $this->do_list_authors( $cond );
00335                                 }
00336                                 $join['revision'] = array( 'INNER JOIN', 'page_id=rev_page AND page_latest=rev_id' );
00337                         } elseif ( $this->history & WikiExporter::STABLE ) {
00338                                 # "Stable" revision dumps...
00339                                 # Default JOIN, to be overridden...
00340                                 $join['revision'] = array( 'INNER JOIN', 'page_id=rev_page AND page_latest=rev_id' );
00341                                 # One, and only one hook should set this, and return false
00342                                 if ( wfRunHooks( 'WikiExporter::dumpStableQuery', array( &$tables, &$opts, &$join ) ) ) {
00343                                         wfProfileOut( __METHOD__ );
00344                                         throw new MWException( __METHOD__ . " given invalid history dump type." );
00345                                 }
00346                         } elseif ( $this->history & WikiExporter::RANGE ) {
00347                                 # Dump of revisions within a specified range
00348                                 $join['revision'] = array( 'INNER JOIN', 'page_id=rev_page' );
00349                                 $opts['ORDER BY'] = array( 'rev_page ASC', 'rev_id ASC' );
00350                         } else {
00351                                 # Uknown history specification parameter?
00352                                 wfProfileOut( __METHOD__ );
00353                                 throw new MWException( __METHOD__ . " given invalid history dump type." );
00354                         }
00355                         # Query optimization hacks
00356                         if ( $cond == '' ) {
00357                                 $opts[] = 'STRAIGHT_JOIN';
00358                                 $opts['USE INDEX']['page'] = 'PRIMARY';
00359                         }
00360                         # Build text join options
00361                         if ( $this->text != WikiExporter::STUB ) { // 1-pass
00362                                 $tables[] = 'text';
00363                                 $join['text'] = array( 'INNER JOIN', 'rev_text_id=old_id' );
00364                         }
00365 
00366                         if ( $this->buffer == WikiExporter::STREAM ) {
00367                                 $prev = $this->db->bufferResults( false );
00368                         }
00369 
00370                         $wrapper = null; // Assuring $wrapper is not undefined, if exception occurs early
00371                         try {
00372                                 wfRunHooks( 'ModifyExportQuery',
00373                                                 array( $this->db, &$tables, &$cond, &$opts, &$join ) );
00374 
00375                                 # Do the query!
00376                                 $result = $this->db->select( $tables, '*', $cond, __METHOD__, $opts, $join );
00377                                 $wrapper = $this->db->resultObject( $result );
00378                                 # Output dump results
00379                                 $this->outputPageStream( $wrapper );
00380 
00381                                 if ( $this->buffer == WikiExporter::STREAM ) {
00382                                         $this->db->bufferResults( $prev );
00383                                 }
00384                         } catch ( Exception $e ) {
00385                                 // Throwing the exception does not reliably free the resultset, and
00386                                 // would also leave the connection in unbuffered mode.
00387 
00388                                 // Freeing result
00389                                 try {
00390                                         if ( $wrapper ) {
00391                                                 $wrapper->free();
00392                                         }
00393                                 } catch ( Exception $e2 ) {
00394                                         // Already in panic mode -> ignoring $e2 as $e has
00395                                         // higher priority
00396                                 }
00397 
00398                                 // Putting database back in previous buffer mode
00399                                 try {
00400                                         if ( $this->buffer == WikiExporter::STREAM ) {
00401                                                 $this->db->bufferResults( $prev );
00402                                         }
00403                                 } catch ( Exception $e2 ) {
00404                                         // Already in panic mode -> ignoring $e2 as $e has
00405                                         // higher priority
00406                                 }
00407 
00408                                 // Inform caller about problem
00409                                 throw $e;
00410                         }
00411                 }
00412                 wfProfileOut( __METHOD__ );
00413         }
00414 
00427         protected function outputPageStream( $resultset ) {
00428                 $last = null;
00429                 foreach ( $resultset as $row ) {
00430                         if ( is_null( $last ) ||
00431                                 $last->page_namespace != $row->page_namespace ||
00432                                 $last->page_title     != $row->page_title ) {
00433                                 if ( isset( $last ) ) {
00434                                         $output = '';
00435                                         if ( $this->dumpUploads ) {
00436                                                 $output .= $this->writer->writeUploads( $last, $this->dumpUploadFileContents );
00437                                         }
00438                                         $output .= $this->writer->closePage();
00439                                         $this->sink->writeClosePage( $output );
00440                                 }
00441                                 $output = $this->writer->openPage( $row );
00442                                 $this->sink->writeOpenPage( $row, $output );
00443                                 $last = $row;
00444                         }
00445                         $output = $this->writer->writeRevision( $row );
00446                         $this->sink->writeRevision( $row, $output );
00447                 }
00448                 if ( isset( $last ) ) {
00449                         $output = '';
00450                         if ( $this->dumpUploads ) {
00451                                 $output .= $this->writer->writeUploads( $last, $this->dumpUploadFileContents );
00452                         }
00453                         $output .= $this->author_list;
00454                         $output .= $this->writer->closePage();
00455                         $this->sink->writeClosePage( $output );
00456                 }
00457         }
00458 
00462         protected function outputLogStream( $resultset ) {
00463                 foreach ( $resultset as $row ) {
00464                         $output = $this->writer->writeLogItem( $row );
00465                         $this->sink->writeLogItem( $row, $output );
00466                 }
00467         }
00468 }
00469 
00473 class XmlDumpWriter {
00479         function schemaVersion() {
00480                 wfDeprecated( __METHOD__, '1.20' );
00481                 return WikiExporter::schemaVersion();
00482         }
00483 
00494         function openStream() {
00495                 global $wgLanguageCode;
00496                 $ver = WikiExporter::schemaVersion();
00497                 return Xml::element( 'mediawiki', array(
00498                         'xmlns'              => "http://www.mediawiki.org/xml/export-$ver/",
00499                         'xmlns:xsi'          => "http://www.w3.org/2001/XMLSchema-instance",
00500                         'xsi:schemaLocation' => "http://www.mediawiki.org/xml/export-$ver/ " .
00501                                                 "http://www.mediawiki.org/xml/export-$ver.xsd",
00502                         'version'            => $ver,
00503                         'xml:lang'           => $wgLanguageCode ),
00504                         null ) .
00505                         "\n" .
00506                         $this->siteInfo();
00507         }
00508 
00512         function siteInfo() {
00513                 $info = array(
00514                         $this->sitename(),
00515                         $this->homelink(),
00516                         $this->generator(),
00517                         $this->caseSetting(),
00518                         $this->namespaces() );
00519                 return "  <siteinfo>\n    " .
00520                         implode( "\n    ", $info ) .
00521                         "\n  </siteinfo>\n";
00522         }
00523 
00527         function sitename() {
00528                 global $wgSitename;
00529                 return Xml::element( 'sitename', array(), $wgSitename );
00530         }
00531 
00535         function generator() {
00536                 global $wgVersion;
00537                 return Xml::element( 'generator', array(), "MediaWiki $wgVersion" );
00538         }
00539 
00543         function homelink() {
00544                 return Xml::element( 'base', array(), Title::newMainPage()->getCanonicalUrl() );
00545         }
00546 
00550         function caseSetting() {
00551                 global $wgCapitalLinks;
00552                 // "case-insensitive" option is reserved for future
00553                 $sensitivity = $wgCapitalLinks ? 'first-letter' : 'case-sensitive';
00554                 return Xml::element( 'case', array(), $sensitivity );
00555         }
00556 
00560         function namespaces() {
00561                 global $wgContLang;
00562                 $spaces = "<namespaces>\n";
00563                 foreach ( $wgContLang->getFormattedNamespaces() as $ns => $title ) {
00564                         $spaces .= '      ' .
00565                                 Xml::element( 'namespace',
00566                                         array(  'key' => $ns,
00567                                                         'case' => MWNamespace::isCapitalized( $ns ) ? 'first-letter' : 'case-sensitive',
00568                                         ), $title ) . "\n";
00569                 }
00570                 $spaces .= "    </namespaces>";
00571                 return $spaces;
00572         }
00573 
00580         function closeStream() {
00581                 return "</mediawiki>\n";
00582         }
00583 
00592         function openPage( $row ) {
00593                 $out = "  <page>\n";
00594                 $title = Title::makeTitle( $row->page_namespace, $row->page_title );
00595                 $out .= '    ' . Xml::elementClean( 'title', array(), self::canonicalTitle( $title ) ) . "\n";
00596                 $out .= '    ' . Xml::element( 'ns', array(), strval( $row->page_namespace) ) . "\n";
00597                 $out .= '    ' . Xml::element( 'id', array(), strval( $row->page_id ) ) . "\n";
00598                 if ( $row->page_is_redirect ) {
00599                         $page = WikiPage::factory( $title );
00600                         $redirect = $page->getRedirectTarget();
00601                         if ( $redirect instanceOf Title && $redirect->isValidRedirectTarget() ) {
00602                                 $out .= '    ' . Xml::element( 'redirect', array( 'title' => self::canonicalTitle( $redirect ) ) ) . "\n";
00603                         }
00604                 }
00605 
00606                 if ( $row->page_restrictions != '' ) {
00607                         $out .= '    ' . Xml::element( 'restrictions', array(),
00608                                 strval( $row->page_restrictions ) ) . "\n";
00609                 }
00610 
00611                 wfRunHooks( 'XmlDumpWriterOpenPage', array( $this, &$out, $row, $title ) );
00612 
00613                 return $out;
00614         }
00615 
00622         function closePage() {
00623                 return "  </page>\n";
00624         }
00625 
00634         function writeRevision( $row ) {
00635                 wfProfileIn( __METHOD__ );
00636 
00637                 $out  = "    <revision>\n";
00638                 $out .= "      " . Xml::element( 'id', null, strval( $row->rev_id ) ) . "\n";
00639                 if( $row->rev_parent_id ) {
00640                         $out .= "      " . Xml::element( 'parentid', null, strval( $row->rev_parent_id ) ) . "\n";
00641                 }
00642 
00643                 $out .= $this->writeTimestamp( $row->rev_timestamp );
00644 
00645                 if ( $row->rev_deleted & Revision::DELETED_USER ) {
00646                         $out .= "      " . Xml::element( 'contributor', array( 'deleted' => 'deleted' ) ) . "\n";
00647                 } else {
00648                         $out .= $this->writeContributor( $row->rev_user, $row->rev_user_text );
00649                 }
00650 
00651                 if ( $row->rev_minor_edit ) {
00652                         $out .=  "      <minor/>\n";
00653                 }
00654                 if ( $row->rev_deleted & Revision::DELETED_COMMENT ) {
00655                         $out .= "      " . Xml::element( 'comment', array( 'deleted' => 'deleted' ) ) . "\n";
00656                 } elseif ( $row->rev_comment != '' ) {
00657                         $out .= "      " . Xml::elementClean( 'comment', array(), strval( $row->rev_comment ) ) . "\n";
00658                 }
00659 
00660                 if ( $row->rev_sha1 && !( $row->rev_deleted & Revision::DELETED_TEXT ) ) {
00661                         $out .= "      " . Xml::element('sha1', null, strval( $row->rev_sha1 ) ) . "\n";
00662                 } else {
00663                         $out .= "      <sha1/>\n";
00664                 }
00665 
00666                 $text = '';
00667                 if ( $row->rev_deleted & Revision::DELETED_TEXT ) {
00668                         $out .= "      " . Xml::element( 'text', array( 'deleted' => 'deleted' ) ) . "\n";
00669                 } elseif ( isset( $row->old_text ) ) {
00670                         // Raw text from the database may have invalid chars
00671                         $text = strval( Revision::getRevisionText( $row ) );
00672                         $out .= "      " . Xml::elementClean( 'text',
00673                                 array( 'xml:space' => 'preserve', 'bytes' => intval( $row->rev_len ) ),
00674                                 strval( $text ) ) . "\n";
00675                 } else {
00676                         // Stub output
00677                         $out .= "      " . Xml::element( 'text',
00678                                 array( 'id' => $row->rev_text_id, 'bytes' => intval( $row->rev_len ) ),
00679                                 "" ) . "\n";
00680                 }
00681 
00682                 wfRunHooks( 'XmlDumpWriterWriteRevision', array( &$this, &$out, $row, $text ) );
00683 
00684                 $out .= "    </revision>\n";
00685 
00686                 wfProfileOut( __METHOD__ );
00687                 return $out;
00688         }
00689 
00698         function writeLogItem( $row ) {
00699                 wfProfileIn( __METHOD__ );
00700 
00701                 $out  = "  <logitem>\n";
00702                 $out .= "    " . Xml::element( 'id', null, strval( $row->log_id ) ) . "\n";
00703 
00704                 $out .= $this->writeTimestamp( $row->log_timestamp, "    " );
00705 
00706                 if ( $row->log_deleted & LogPage::DELETED_USER ) {
00707                         $out .= "    " . Xml::element( 'contributor', array( 'deleted' => 'deleted' ) ) . "\n";
00708                 } else {
00709                         $out .= $this->writeContributor( $row->log_user, $row->user_name, "    " );
00710                 }
00711 
00712                 if ( $row->log_deleted & LogPage::DELETED_COMMENT ) {
00713                         $out .= "    " . Xml::element( 'comment', array( 'deleted' => 'deleted' ) ) . "\n";
00714                 } elseif ( $row->log_comment != '' ) {
00715                         $out .= "    " . Xml::elementClean( 'comment', null, strval( $row->log_comment ) ) . "\n";
00716                 }
00717 
00718                 $out .= "    " . Xml::element( 'type', null, strval( $row->log_type ) ) . "\n";
00719                 $out .= "    " . Xml::element( 'action', null, strval( $row->log_action ) ) . "\n";
00720 
00721                 if ( $row->log_deleted & LogPage::DELETED_ACTION ) {
00722                         $out .= "    " . Xml::element( 'text', array( 'deleted' => 'deleted' ) ) . "\n";
00723                 } else {
00724                         $title = Title::makeTitle( $row->log_namespace, $row->log_title );
00725                         $out .= "    " . Xml::elementClean( 'logtitle', null, self::canonicalTitle( $title ) ) . "\n";
00726                         $out .= "    " . Xml::elementClean( 'params',
00727                                 array( 'xml:space' => 'preserve' ),
00728                                 strval( $row->log_params ) ) . "\n";
00729                 }
00730 
00731                 $out .= "  </logitem>\n";
00732 
00733                 wfProfileOut( __METHOD__ );
00734                 return $out;
00735         }
00736 
00742         function writeTimestamp( $timestamp, $indent = "      " ) {
00743                 $ts = wfTimestamp( TS_ISO_8601, $timestamp );
00744                 return $indent . Xml::element( 'timestamp', null, $ts ) . "\n";
00745         }
00746 
00753         function writeContributor( $id, $text, $indent = "      " ) {
00754                 $out = $indent . "<contributor>\n";
00755                 if ( $id || !IP::isValid( $text ) ) {
00756                         $out .= $indent . "  " . Xml::elementClean( 'username', null, strval( $text ) ) . "\n";
00757                         $out .= $indent . "  " . Xml::element( 'id', null, strval( $id ) ) . "\n";
00758                 } else {
00759                         $out .= $indent . "  " . Xml::elementClean( 'ip', null, strval( $text ) ) . "\n";
00760                 }
00761                 $out .= $indent . "</contributor>\n";
00762                 return $out;
00763         }
00764 
00771         function writeUploads( $row, $dumpContents = false ) {
00772                 if ( $row->page_namespace == NS_FILE ) {
00773                         $img = wfLocalFile( $row->page_title );
00774                         if ( $img && $img->exists() ) {
00775                                 $out = '';
00776                                 foreach ( array_reverse( $img->getHistory() ) as $ver ) {
00777                                         $out .= $this->writeUpload( $ver, $dumpContents );
00778                                 }
00779                                 $out .= $this->writeUpload( $img, $dumpContents );
00780                                 return $out;
00781                         }
00782                 }
00783                 return '';
00784         }
00785 
00791         function writeUpload( $file, $dumpContents = false ) {
00792                 if ( $file->isOld() ) {
00793                         $archiveName = "      " .
00794                                 Xml::element( 'archivename', null, $file->getArchiveName() ) . "\n";
00795                 } else {
00796                         $archiveName = '';
00797                 }
00798                 if ( $dumpContents ) {
00799                         # Dump file as base64
00800                         # Uses only XML-safe characters, so does not need escaping
00801                         $contents = '      <contents encoding="base64">' .
00802                                 chunk_split( base64_encode( file_get_contents( $file->getPath() ) ) ) .
00803                                 "      </contents>\n";
00804                 } else {
00805                         $contents = '';
00806                 }
00807                 if ( $file->isDeleted( File::DELETED_COMMENT ) ) {
00808                         $comment = Xml::element( 'comment', array( 'deleted' => 'deleted' ) );
00809                 } else {
00810                         $comment = Xml::elementClean( 'comment', null, $file->getDescription() );
00811                 }
00812                 return "    <upload>\n" .
00813                         $this->writeTimestamp( $file->getTimestamp() ) .
00814                         $this->writeContributor( $file->getUser( 'id' ), $file->getUser( 'text' ) ) .
00815                         "      " . $comment . "\n" .
00816                         "      " . Xml::element( 'filename', null, $file->getName() ) . "\n" .
00817                         $archiveName .
00818                         "      " . Xml::element( 'src', null, $file->getCanonicalUrl() ) . "\n" .
00819                         "      " . Xml::element( 'size', null, $file->getSize() ) . "\n" .
00820                         "      " . Xml::element( 'sha1base36', null, $file->getSha1() ) . "\n" .
00821                         "      " . Xml::element( 'rel', null, $file->getRel() ) . "\n" .
00822                         $contents .
00823                         "    </upload>\n";
00824         }
00825 
00836         public static function canonicalTitle( Title $title ) {
00837                 if ( $title->getInterwiki() ) {
00838                         return $title->getPrefixedText();
00839                 }
00840 
00841                 global $wgContLang;
00842                 $prefix = str_replace( '_', ' ', $wgContLang->getNsText( $title->getNamespace() ) );
00843 
00844                 if ( $prefix !== '' ) {
00845                         $prefix .= ':';
00846                 }
00847 
00848                 return $prefix . $title->getText();
00849         }
00850 }
00851 
00852 
00857 class DumpOutput {
00858 
00862         function writeOpenStream( $string ) {
00863                 $this->write( $string );
00864         }
00865 
00869         function writeCloseStream( $string ) {
00870                 $this->write( $string );
00871         }
00872 
00877         function writeOpenPage( $page, $string ) {
00878                 $this->write( $string );
00879         }
00880 
00884         function writeClosePage( $string ) {
00885                 $this->write( $string );
00886         }
00887 
00892         function writeRevision( $rev, $string ) {
00893                 $this->write( $string );
00894         }
00895 
00900         function writeLogItem( $rev, $string ) {
00901                 $this->write( $string );
00902         }
00903 
00909         function write( $string ) {
00910                 print $string;
00911         }
00912 
00920         function closeRenameAndReopen( $newname ) {
00921                 return;
00922         }
00923 
00931         function closeAndRename( $newname, $open = false ) {
00932                 return;
00933         }
00934 
00940         function getFilenames() {
00941                 return NULL;
00942         }
00943 }
00944 
00949 class DumpFileOutput extends DumpOutput {
00950         protected $handle = false, $filename;
00951 
00955         function __construct( $file ) {
00956                 $this->handle = fopen( $file, "wt" );
00957                 $this->filename = $file;
00958         }
00959 
00963         function writeCloseStream( $string ) {
00964                 parent::writeCloseStream( $string );
00965                 if ( $this->handle ) {
00966                         fclose( $this->handle );
00967                         $this->handle = false;
00968                 }
00969         }
00970 
00974         function write( $string ) {
00975                 fputs( $this->handle, $string );
00976         }
00977 
00981         function closeRenameAndReopen( $newname ) {
00982                 $this->closeAndRename( $newname, true );
00983         }
00984 
00989         function renameOrException( $newname ) {
00990                         if (! rename( $this->filename, $newname ) ) {
00991                                 throw new MWException( __METHOD__ . ": rename of file {$this->filename} to $newname failed\n" );
00992                         }
00993         }
00994 
01000         function checkRenameArgCount( $newname ) {
01001                 if ( is_array( $newname ) ) {
01002                         if ( count( $newname ) > 1 ) {
01003                                 throw new MWException( __METHOD__ . ": passed multiple arguments for rename of single file\n" );
01004                         } else {
01005                                 $newname = $newname[0];
01006                         }
01007                 }
01008                 return $newname;
01009         }
01010 
01015         function closeAndRename( $newname, $open = false ) {
01016                 $newname = $this->checkRenameArgCount( $newname );
01017                 if ( $newname ) {
01018                         if ( $this->handle ) {
01019                                 fclose( $this->handle );
01020                                 $this->handle = false;
01021                         }
01022                         $this->renameOrException( $newname );
01023                         if ( $open ) {
01024                                 $this->handle = fopen( $this->filename, "wt" );
01025                         }
01026                 }
01027         }
01028 
01032         function getFilenames() {
01033                 return $this->filename;
01034         }
01035 }
01036 
01043 class DumpPipeOutput extends DumpFileOutput {
01044         protected $command, $filename;
01045         protected $procOpenResource = false;
01046 
01051         function __construct( $command, $file = null ) {
01052                 if ( !is_null( $file ) ) {
01053                         $command .=  " > " . wfEscapeShellArg( $file );
01054                 }
01055 
01056                 $this->startCommand( $command );
01057                 $this->command = $command;
01058                 $this->filename = $file;
01059         }
01060 
01064         function writeCloseStream( $string ) {
01065                 parent::writeCloseStream( $string );
01066                 if ( $this->procOpenResource ) {
01067                         proc_close( $this->procOpenResource );
01068                         $this->procOpenResource = false;
01069                 }
01070         }
01071 
01075         function startCommand( $command ) {
01076                 $spec = array(
01077                         0 => array( "pipe", "r" ),
01078                 );
01079                 $pipes = array();
01080                 $this->procOpenResource = proc_open( $command, $spec, $pipes );
01081                 $this->handle = $pipes[0];
01082         }
01083 
01087         function closeRenameAndReopen( $newname ) {
01088                 $this->closeAndRename( $newname, true );
01089         }
01090 
01095         function closeAndRename( $newname, $open = false ) {
01096                 $newname = $this->checkRenameArgCount( $newname );
01097                 if ( $newname ) {
01098                         if ( $this->handle ) {
01099                                 fclose( $this->handle );
01100                                 $this->handle = false;
01101                         }
01102                         if ( $this->procOpenResource ) {
01103                                 proc_close( $this->procOpenResource );
01104                                 $this->procOpenResource = false;
01105                         }
01106                         $this->renameOrException( $newname );
01107                         if ( $open ) {
01108                                 $command = $this->command;
01109                                 $command .=  " > " . wfEscapeShellArg( $this->filename );
01110                                 $this->startCommand( $command );
01111                         }
01112                 }
01113         }
01114 
01115 }
01116 
01121 class DumpGZipOutput extends DumpPipeOutput {
01122 
01126         function __construct( $file ) {
01127                 parent::__construct( "gzip", $file );
01128         }
01129 }
01130 
01135 class DumpBZip2Output extends DumpPipeOutput {
01136 
01140         function __construct( $file ) {
01141                 parent::__construct( "bzip2", $file );
01142         }
01143 }
01144 
01149 class Dump7ZipOutput extends DumpPipeOutput {
01150 
01154         function __construct( $file ) {
01155                 $command = $this->setup7zCommand( $file );
01156                 parent::__construct( $command );
01157                 $this->filename = $file;
01158         }
01159 
01164         function setup7zCommand( $file ) {
01165                 $command = "7za a -bd -si " . wfEscapeShellArg( $file );
01166                 // Suppress annoying useless crap from p7zip
01167                 // Unfortunately this could suppress real error messages too
01168                 $command .= ' >' . wfGetNull() . ' 2>&1';
01169                 return( $command );
01170         }
01171 
01176         function closeAndRename( $newname, $open = false ) {
01177                 $newname = $this->checkRenameArgCount( $newname );
01178                 if ( $newname ) {
01179                         fclose( $this->handle );
01180                         proc_close( $this->procOpenResource );
01181                         $this->renameOrException( $newname );
01182                         if ( $open ) {
01183                                 $command = $this->setup7zCommand( $this->filename );
01184                                 $this->startCommand( $command );
01185                         }
01186                 }
01187         }
01188 }
01189 
01196 class DumpFilter {
01197 
01203         public $sink;
01204 
01208         protected $sendingThisPage;
01209 
01213         function __construct( &$sink ) {
01214                 $this->sink =& $sink;
01215         }
01216 
01220         function writeOpenStream( $string ) {
01221                 $this->sink->writeOpenStream( $string );
01222         }
01223 
01227         function writeCloseStream( $string ) {
01228                 $this->sink->writeCloseStream( $string );
01229         }
01230 
01235         function writeOpenPage( $page, $string ) {
01236                 $this->sendingThisPage = $this->pass( $page, $string );
01237                 if ( $this->sendingThisPage ) {
01238                         $this->sink->writeOpenPage( $page, $string );
01239                 }
01240         }
01241 
01245         function writeClosePage( $string ) {
01246                 if ( $this->sendingThisPage ) {
01247                         $this->sink->writeClosePage( $string );
01248                         $this->sendingThisPage = false;
01249                 }
01250         }
01251 
01256         function writeRevision( $rev, $string ) {
01257                 if ( $this->sendingThisPage ) {
01258                         $this->sink->writeRevision( $rev, $string );
01259                 }
01260         }
01261 
01266         function writeLogItem( $rev, $string ) {
01267                 $this->sink->writeRevision( $rev, $string );
01268         }
01269 
01273         function closeRenameAndReopen( $newname ) {
01274                 $this->sink->closeRenameAndReopen( $newname );
01275         }
01276 
01281         function closeAndRename( $newname, $open = false ) {
01282                 $this->sink->closeAndRename( $newname, $open );
01283         }
01284 
01288         function getFilenames() {
01289                 return $this->sink->getFilenames();
01290         }
01291 
01297         function pass( $page ) {
01298                 return true;
01299         }
01300 }
01301 
01306 class DumpNotalkFilter extends DumpFilter {
01307 
01312         function pass( $page ) {
01313                 return !MWNamespace::isTalk( $page->page_namespace );
01314         }
01315 }
01316 
01321 class DumpNamespaceFilter extends DumpFilter {
01322         var $invert = false;
01323         var $namespaces = array();
01324 
01329         function __construct( &$sink, $param ) {
01330                 parent::__construct( $sink );
01331 
01332                 $constants = array(
01333                         "NS_MAIN"           => NS_MAIN,
01334                         "NS_TALK"           => NS_TALK,
01335                         "NS_USER"           => NS_USER,
01336                         "NS_USER_TALK"      => NS_USER_TALK,
01337                         "NS_PROJECT"        => NS_PROJECT,
01338                         "NS_PROJECT_TALK"   => NS_PROJECT_TALK,
01339                         "NS_FILE"           => NS_FILE,
01340                         "NS_FILE_TALK"      => NS_FILE_TALK,
01341                         "NS_IMAGE"          => NS_IMAGE,  // NS_IMAGE is an alias for NS_FILE
01342                         "NS_IMAGE_TALK"     => NS_IMAGE_TALK,
01343                         "NS_MEDIAWIKI"      => NS_MEDIAWIKI,
01344                         "NS_MEDIAWIKI_TALK" => NS_MEDIAWIKI_TALK,
01345                         "NS_TEMPLATE"       => NS_TEMPLATE,
01346                         "NS_TEMPLATE_TALK"  => NS_TEMPLATE_TALK,
01347                         "NS_HELP"           => NS_HELP,
01348                         "NS_HELP_TALK"      => NS_HELP_TALK,
01349                         "NS_CATEGORY"       => NS_CATEGORY,
01350                         "NS_CATEGORY_TALK"  => NS_CATEGORY_TALK );
01351 
01352                 if ( $param { 0 } == '!' ) {
01353                         $this->invert = true;
01354                         $param = substr( $param, 1 );
01355                 }
01356 
01357                 foreach ( explode( ',', $param ) as $key ) {
01358                         $key = trim( $key );
01359                         if ( isset( $constants[$key] ) ) {
01360                                 $ns = $constants[$key];
01361                                 $this->namespaces[$ns] = true;
01362                         } elseif ( is_numeric( $key ) ) {
01363                                 $ns = intval( $key );
01364                                 $this->namespaces[$ns] = true;
01365                         } else {
01366                                 throw new MWException( "Unrecognized namespace key '$key'\n" );
01367                         }
01368                 }
01369         }
01370 
01375         function pass( $page ) {
01376                 $match = isset( $this->namespaces[$page->page_namespace] );
01377                 return $this->invert xor $match;
01378         }
01379 }
01380 
01381 
01386 class DumpLatestFilter extends DumpFilter {
01387         var $page, $pageString, $rev, $revString;
01388 
01393         function writeOpenPage( $page, $string ) {
01394                 $this->page = $page;
01395                 $this->pageString = $string;
01396         }
01397 
01401         function writeClosePage( $string ) {
01402                 if ( $this->rev ) {
01403                         $this->sink->writeOpenPage( $this->page, $this->pageString );
01404                         $this->sink->writeRevision( $this->rev, $this->revString );
01405                         $this->sink->writeClosePage( $string );
01406                 }
01407                 $this->rev = null;
01408                 $this->revString = null;
01409                 $this->page = null;
01410                 $this->pageString = null;
01411         }
01412 
01417         function writeRevision( $rev, $string ) {
01418                 if ( $rev->rev_id == $this->page->page_latest ) {
01419                         $this->rev = $rev;
01420                         $this->revString = $string;
01421                 }
01422         }
01423 }
01424 
01429 class DumpMultiWriter {
01430 
01434         function __construct( $sinks ) {
01435                 $this->sinks = $sinks;
01436                 $this->count = count( $sinks );
01437         }
01438 
01442         function writeOpenStream( $string ) {
01443                 for ( $i = 0; $i < $this->count; $i++ ) {
01444                         $this->sinks[$i]->writeOpenStream( $string );
01445                 }
01446         }
01447 
01451         function writeCloseStream( $string ) {
01452                 for ( $i = 0; $i < $this->count; $i++ ) {
01453                         $this->sinks[$i]->writeCloseStream( $string );
01454                 }
01455         }
01456 
01461         function writeOpenPage( $page, $string ) {
01462                 for ( $i = 0; $i < $this->count; $i++ ) {
01463                         $this->sinks[$i]->writeOpenPage( $page, $string );
01464                 }
01465         }
01466 
01470         function writeClosePage( $string ) {
01471                 for ( $i = 0; $i < $this->count; $i++ ) {
01472                         $this->sinks[$i]->writeClosePage( $string );
01473                 }
01474         }
01475 
01480         function writeRevision( $rev, $string ) {
01481                 for ( $i = 0; $i < $this->count; $i++ ) {
01482                         $this->sinks[$i]->writeRevision( $rev, $string );
01483                 }
01484         }
01485 
01489         function closeRenameAndReopen( $newnames ) {
01490                 $this->closeAndRename( $newnames, true );
01491         }
01492 
01497         function closeAndRename( $newnames, $open = false ) {
01498                 for ( $i = 0; $i < $this->count; $i++ ) {
01499                         $this->sinks[$i]->closeAndRename( $newnames[$i], $open );
01500                 }
01501         }
01502 
01506         function getFilenames() {
01507                 $filenames = array();
01508                 for ( $i = 0; $i < $this->count; $i++ ) {
01509                         $filenames[] =  $this->sinks[$i]->getFilenames();
01510                 }
01511                 return $filenames;
01512         }
01513 
01514 }
01515 
01520 function xmlsafe( $string ) {
01521         wfProfileIn( __FUNCTION__ );
01522 
01528         $string = UtfNormal::cleanUp( $string );
01529 
01530         $string = htmlspecialchars( $string );
01531         wfProfileOut( __FUNCTION__ );
01532         return $string;
01533 }