MediaWiki  REL1_23
Export.php
Go to the documentation of this file.
00001 <?php
00033 class WikiExporter {
00034     var $list_authors = false; # Return distinct author list (when not returning full history)
00035     var $author_list = "";
00036 
00037     var $dumpUploads = false;
00038     var $dumpUploadFileContents = false;
00039 
00040     const FULL = 1;
00041     const CURRENT = 2;
00042     const STABLE = 4; // extension defined
00043     const LOGS = 8;
00044     const RANGE = 16;
00045 
00046     const BUFFER = 0;
00047     const STREAM = 1;
00048 
00049     const TEXT = 0;
00050     const STUB = 1;
00051 
00052     var $buffer;
00053 
00054     var $text;
00055 
00059     var $sink;
00060 
00065     public static function schemaVersion() {
00066         return "0.8";
00067     }
00068 
00085     function __construct( $db, $history = WikiExporter::CURRENT,
00086             $buffer = WikiExporter::BUFFER, $text = WikiExporter::TEXT ) {
00087         $this->db = $db;
00088         $this->history = $history;
00089         $this->buffer = $buffer;
00090         $this->writer = new XmlDumpWriter();
00091         $this->sink = new DumpOutput();
00092         $this->text = $text;
00093     }
00094 
00102     public function setOutputSink( &$sink ) {
00103         $this->sink =& $sink;
00104     }
00105 
00106     public function openStream() {
00107         $output = $this->writer->openStream();
00108         $this->sink->writeOpenStream( $output );
00109     }
00110 
00111     public function closeStream() {
00112         $output = $this->writer->closeStream();
00113         $this->sink->writeCloseStream( $output );
00114     }
00115 
00121     public function allPages() {
00122         $this->dumpFrom( '' );
00123     }
00124 
00132     public function pagesByRange( $start, $end ) {
00133         $condition = 'page_id >= ' . intval( $start );
00134         if ( $end ) {
00135             $condition .= ' AND page_id < ' . intval( $end );
00136         }
00137         $this->dumpFrom( $condition );
00138     }
00139 
00147     public function revsByRange( $start, $end ) {
00148         $condition = 'rev_id >= ' . intval( $start );
00149         if ( $end ) {
00150             $condition .= ' AND rev_id < ' . intval( $end );
00151         }
00152         $this->dumpFrom( $condition );
00153     }
00154 
00158     public function pageByTitle( $title ) {
00159         $this->dumpFrom(
00160             'page_namespace=' . $title->getNamespace() .
00161             ' AND page_title=' . $this->db->addQuotes( $title->getDBkey() ) );
00162     }
00163 
00168     public function pageByName( $name ) {
00169         $title = Title::newFromText( $name );
00170         if ( is_null( $title ) ) {
00171             throw new MWException( "Can't export invalid title" );
00172         } else {
00173             $this->pageByTitle( $title );
00174         }
00175     }
00176 
00180     public function pagesByName( $names ) {
00181         foreach ( $names as $name ) {
00182             $this->pageByName( $name );
00183         }
00184     }
00185 
00186     public function allLogs() {
00187         $this->dumpFrom( '' );
00188     }
00189 
00194     public function logsByRange( $start, $end ) {
00195         $condition = 'log_id >= ' . intval( $start );
00196         if ( $end ) {
00197             $condition .= ' AND log_id < ' . intval( $end );
00198         }
00199         $this->dumpFrom( $condition );
00200     }
00201 
00209     protected function do_list_authors( $cond ) {
00210         wfProfileIn( __METHOD__ );
00211         $this->author_list = "<contributors>";
00212         // rev_deleted
00213 
00214         $res = $this->db->select(
00215             array( 'page', 'revision' ),
00216             array( 'DISTINCT rev_user_text', 'rev_user' ),
00217             array(
00218                 $this->db->bitAnd( 'rev_deleted', Revision::DELETED_USER ) . ' = 0',
00219                 $cond,
00220                 'page_id = rev_id',
00221             ),
00222             __METHOD__
00223         );
00224 
00225         foreach ( $res as $row ) {
00226             $this->author_list .= "<contributor>" .
00227                 "<username>" .
00228                 htmlentities( $row->rev_user_text ) .
00229                 "</username>" .
00230                 "<id>" .
00231                 $row->rev_user .
00232                 "</id>" .
00233                 "</contributor>";
00234         }
00235         $this->author_list .= "</contributors>";
00236         wfProfileOut( __METHOD__ );
00237     }
00238 
00244     protected function dumpFrom( $cond = '' ) {
00245         wfProfileIn( __METHOD__ );
00246         # For logging dumps...
00247         if ( $this->history & self::LOGS ) {
00248             $where = array( 'user_id = log_user' );
00249             # Hide private logs
00250             $hideLogs = LogEventsList::getExcludeClause( $this->db );
00251             if ( $hideLogs ) {
00252                 $where[] = $hideLogs;
00253             }
00254             # Add on any caller specified conditions
00255             if ( $cond ) {
00256                 $where[] = $cond;
00257             }
00258             # Get logging table name for logging.* clause
00259             $logging = $this->db->tableName( 'logging' );
00260 
00261             if ( $this->buffer == WikiExporter::STREAM ) {
00262                 $prev = $this->db->bufferResults( false );
00263             }
00264             $wrapper = null; // Assuring $wrapper is not undefined, if exception occurs early
00265             try {
00266                 $result = $this->db->select( array( 'logging', 'user' ),
00267                     array( "{$logging}.*", 'user_name' ), // grab the user name
00268                     $where,
00269                     __METHOD__,
00270                     array( 'ORDER BY' => 'log_id', 'USE INDEX' => array( 'logging' => 'PRIMARY' ) )
00271                 );
00272                 $wrapper = $this->db->resultObject( $result );
00273                 $this->outputLogStream( $wrapper );
00274                 if ( $this->buffer == WikiExporter::STREAM ) {
00275                     $this->db->bufferResults( $prev );
00276                 }
00277             } catch ( Exception $e ) {
00278                 // Throwing the exception does not reliably free the resultset, and
00279                 // would also leave the connection in unbuffered mode.
00280 
00281                 // Freeing result
00282                 try {
00283                     if ( $wrapper ) {
00284                         $wrapper->free();
00285                     }
00286                 } catch ( Exception $e2 ) {
00287                     // Already in panic mode -> ignoring $e2 as $e has
00288                     // higher priority
00289                 }
00290 
00291                 // Putting database back in previous buffer mode
00292                 try {
00293                     if ( $this->buffer == WikiExporter::STREAM ) {
00294                         $this->db->bufferResults( $prev );
00295                     }
00296                 } catch ( Exception $e2 ) {
00297                     // Already in panic mode -> ignoring $e2 as $e has
00298                     // higher priority
00299                 }
00300 
00301                 // Inform caller about problem
00302                 wfProfileOut( __METHOD__ );
00303                 throw $e;
00304             }
00305         # For page dumps...
00306         } else {
00307             $tables = array( 'page', 'revision' );
00308             $opts = array( 'ORDER BY' => 'page_id ASC' );
00309             $opts['USE INDEX'] = array();
00310             $join = array();
00311             if ( is_array( $this->history ) ) {
00312                 # Time offset/limit for all pages/history...
00313                 $revJoin = 'page_id=rev_page';
00314                 # Set time order
00315                 if ( $this->history['dir'] == 'asc' ) {
00316                     $op = '>';
00317                     $opts['ORDER BY'] = 'rev_timestamp ASC';
00318                 } else {
00319                     $op = '<';
00320                     $opts['ORDER BY'] = 'rev_timestamp DESC';
00321                 }
00322                 # Set offset
00323                 if ( !empty( $this->history['offset'] ) ) {
00324                     $revJoin .= " AND rev_timestamp $op " .
00325                         $this->db->addQuotes( $this->db->timestamp( $this->history['offset'] ) );
00326                 }
00327                 $join['revision'] = array( 'INNER JOIN', $revJoin );
00328                 # Set query limit
00329                 if ( !empty( $this->history['limit'] ) ) {
00330                     $opts['LIMIT'] = intval( $this->history['limit'] );
00331                 }
00332             } elseif ( $this->history & WikiExporter::FULL ) {
00333                 # Full history dumps...
00334                 $join['revision'] = array( 'INNER JOIN', 'page_id=rev_page' );
00335             } elseif ( $this->history & WikiExporter::CURRENT ) {
00336                 # Latest revision dumps...
00337                 if ( $this->list_authors && $cond != '' ) { // List authors, if so desired
00338                     $this->do_list_authors( $cond );
00339                 }
00340                 $join['revision'] = array( 'INNER JOIN', 'page_id=rev_page AND page_latest=rev_id' );
00341             } elseif ( $this->history & WikiExporter::STABLE ) {
00342                 # "Stable" revision dumps...
00343                 # Default JOIN, to be overridden...
00344                 $join['revision'] = array( 'INNER JOIN', 'page_id=rev_page AND page_latest=rev_id' );
00345                 # One, and only one hook should set this, and return false
00346                 if ( wfRunHooks( 'WikiExporter::dumpStableQuery', array( &$tables, &$opts, &$join ) ) ) {
00347                     wfProfileOut( __METHOD__ );
00348                     throw new MWException( __METHOD__ . " given invalid history dump type." );
00349                 }
00350             } elseif ( $this->history & WikiExporter::RANGE ) {
00351                 # Dump of revisions within a specified range
00352                 $join['revision'] = array( 'INNER JOIN', 'page_id=rev_page' );
00353                 $opts['ORDER BY'] = array( 'rev_page ASC', 'rev_id ASC' );
00354             } else {
00355                 # Unknown history specification parameter?
00356                 wfProfileOut( __METHOD__ );
00357                 throw new MWException( __METHOD__ . " given invalid history dump type." );
00358             }
00359             # Query optimization hacks
00360             if ( $cond == '' ) {
00361                 $opts[] = 'STRAIGHT_JOIN';
00362                 $opts['USE INDEX']['page'] = 'PRIMARY';
00363             }
00364             # Build text join options
00365             if ( $this->text != WikiExporter::STUB ) { // 1-pass
00366                 $tables[] = 'text';
00367                 $join['text'] = array( 'INNER JOIN', 'rev_text_id=old_id' );
00368             }
00369 
00370             if ( $this->buffer == WikiExporter::STREAM ) {
00371                 $prev = $this->db->bufferResults( false );
00372             }
00373 
00374             $wrapper = null; // Assuring $wrapper is not undefined, if exception occurs early
00375             try {
00376                 wfRunHooks( 'ModifyExportQuery',
00377                         array( $this->db, &$tables, &$cond, &$opts, &$join ) );
00378 
00379                 # Do the query!
00380                 $result = $this->db->select( $tables, '*', $cond, __METHOD__, $opts, $join );
00381                 $wrapper = $this->db->resultObject( $result );
00382                 # Output dump results
00383                 $this->outputPageStream( $wrapper );
00384 
00385                 if ( $this->buffer == WikiExporter::STREAM ) {
00386                     $this->db->bufferResults( $prev );
00387                 }
00388             } catch ( Exception $e ) {
00389                 // Throwing the exception does not reliably free the resultset, and
00390                 // would also leave the connection in unbuffered mode.
00391 
00392                 // Freeing result
00393                 try {
00394                     if ( $wrapper ) {
00395                         $wrapper->free();
00396                     }
00397                 } catch ( Exception $e2 ) {
00398                     // Already in panic mode -> ignoring $e2 as $e has
00399                     // higher priority
00400                 }
00401 
00402                 // Putting database back in previous buffer mode
00403                 try {
00404                     if ( $this->buffer == WikiExporter::STREAM ) {
00405                         $this->db->bufferResults( $prev );
00406                     }
00407                 } catch ( Exception $e2 ) {
00408                     // Already in panic mode -> ignoring $e2 as $e has
00409                     // higher priority
00410                 }
00411 
00412                 // Inform caller about problem
00413                 throw $e;
00414             }
00415         }
00416         wfProfileOut( __METHOD__ );
00417     }
00418 
00431     protected function outputPageStream( $resultset ) {
00432         $last = null;
00433         foreach ( $resultset as $row ) {
00434             if ( $last === null ||
00435                 $last->page_namespace != $row->page_namespace ||
00436                 $last->page_title != $row->page_title ) {
00437                 if ( $last !== null ) {
00438                     $output = '';
00439                     if ( $this->dumpUploads ) {
00440                         $output .= $this->writer->writeUploads( $last, $this->dumpUploadFileContents );
00441                     }
00442                     $output .= $this->writer->closePage();
00443                     $this->sink->writeClosePage( $output );
00444                 }
00445                 $output = $this->writer->openPage( $row );
00446                 $this->sink->writeOpenPage( $row, $output );
00447                 $last = $row;
00448             }
00449             $output = $this->writer->writeRevision( $row );
00450             $this->sink->writeRevision( $row, $output );
00451         }
00452         if ( $last !== null ) {
00453             $output = '';
00454             if ( $this->dumpUploads ) {
00455                 $output .= $this->writer->writeUploads( $last, $this->dumpUploadFileContents );
00456             }
00457             $output .= $this->author_list;
00458             $output .= $this->writer->closePage();
00459             $this->sink->writeClosePage( $output );
00460         }
00461     }
00462 
00466     protected function outputLogStream( $resultset ) {
00467         foreach ( $resultset as $row ) {
00468             $output = $this->writer->writeLogItem( $row );
00469             $this->sink->writeLogItem( $row, $output );
00470         }
00471     }
00472 }
00473 
00477 class XmlDumpWriter {
00483     function schemaVersion() {
00484         wfDeprecated( __METHOD__, '1.20' );
00485         return WikiExporter::schemaVersion();
00486     }
00487 
00498     function openStream() {
00499         global $wgLanguageCode;
00500         $ver = WikiExporter::schemaVersion();
00501         return Xml::element( 'mediawiki', array(
00502             'xmlns'              => "http://www.mediawiki.org/xml/export-$ver/",
00503             'xmlns:xsi'          => "http://www.w3.org/2001/XMLSchema-instance",
00504             'xsi:schemaLocation' => "http://www.mediawiki.org/xml/export-$ver/ " .
00505                 #TODO: how do we get a new version up there?
00506                 "http://www.mediawiki.org/xml/export-$ver.xsd",
00507             'version'            => $ver,
00508             'xml:lang'           => $wgLanguageCode ),
00509             null ) .
00510             "\n" .
00511             $this->siteInfo();
00512     }
00513 
00517     function siteInfo() {
00518         $info = array(
00519             $this->sitename(),
00520             $this->homelink(),
00521             $this->generator(),
00522             $this->caseSetting(),
00523             $this->namespaces() );
00524         return "  <siteinfo>\n    " .
00525             implode( "\n    ", $info ) .
00526             "\n  </siteinfo>\n";
00527     }
00528 
00532     function sitename() {
00533         global $wgSitename;
00534         return Xml::element( 'sitename', array(), $wgSitename );
00535     }
00536 
00540     function generator() {
00541         global $wgVersion;
00542         return Xml::element( 'generator', array(), "MediaWiki $wgVersion" );
00543     }
00544 
00548     function homelink() {
00549         return Xml::element( 'base', array(), Title::newMainPage()->getCanonicalURL() );
00550     }
00551 
00555     function caseSetting() {
00556         global $wgCapitalLinks;
00557         // "case-insensitive" option is reserved for future
00558         $sensitivity = $wgCapitalLinks ? 'first-letter' : 'case-sensitive';
00559         return Xml::element( 'case', array(), $sensitivity );
00560     }
00561 
00565     function namespaces() {
00566         global $wgContLang;
00567         $spaces = "<namespaces>\n";
00568         foreach ( $wgContLang->getFormattedNamespaces() as $ns => $title ) {
00569             $spaces .= '      ' .
00570                 Xml::element( 'namespace',
00571                     array(
00572                         'key' => $ns,
00573                         'case' => MWNamespace::isCapitalized( $ns ) ? 'first-letter' : 'case-sensitive',
00574                     ), $title ) . "\n";
00575         }
00576         $spaces .= "    </namespaces>";
00577         return $spaces;
00578     }
00579 
00586     function closeStream() {
00587         return "</mediawiki>\n";
00588     }
00589 
00597     public function openPage( $row ) {
00598         $out = "  <page>\n";
00599         $title = Title::makeTitle( $row->page_namespace, $row->page_title );
00600         $out .= '    ' . Xml::elementClean( 'title', array(), self::canonicalTitle( $title ) ) . "\n";
00601         $out .= '    ' . Xml::element( 'ns', array(), strval( $row->page_namespace ) ) . "\n";
00602         $out .= '    ' . Xml::element( 'id', array(), strval( $row->page_id ) ) . "\n";
00603         if ( $row->page_is_redirect ) {
00604             $page = WikiPage::factory( $title );
00605             $redirect = $page->getRedirectTarget();
00606             if ( $redirect instanceof Title && $redirect->isValidRedirectTarget() ) {
00607                 $out .= '    ';
00608                 $out .= Xml::element( 'redirect', array( 'title' => self::canonicalTitle( $redirect ) ) );
00609                 $out .= "\n";
00610             }
00611         }
00612 
00613         if ( $row->page_restrictions != '' ) {
00614             $out .= '    ' . Xml::element( 'restrictions', array(),
00615                 strval( $row->page_restrictions ) ) . "\n";
00616         }
00617 
00618         wfRunHooks( 'XmlDumpWriterOpenPage', array( $this, &$out, $row, $title ) );
00619 
00620         return $out;
00621     }
00622 
00629     function closePage() {
00630         return "  </page>\n";
00631     }
00632 
00641     function writeRevision( $row ) {
00642         wfProfileIn( __METHOD__ );
00643 
00644         $out = "    <revision>\n";
00645         $out .= "      " . Xml::element( 'id', null, strval( $row->rev_id ) ) . "\n";
00646         if ( isset( $row->rev_parent_id ) && $row->rev_parent_id ) {
00647             $out .= "      " . Xml::element( 'parentid', null, strval( $row->rev_parent_id ) ) . "\n";
00648         }
00649 
00650         $out .= $this->writeTimestamp( $row->rev_timestamp );
00651 
00652         if ( isset( $row->rev_deleted ) && ( $row->rev_deleted & Revision::DELETED_USER ) ) {
00653             $out .= "      " . Xml::element( 'contributor', array( 'deleted' => 'deleted' ) ) . "\n";
00654         } else {
00655             $out .= $this->writeContributor( $row->rev_user, $row->rev_user_text );
00656         }
00657 
00658         if ( isset( $row->rev_minor_edit ) && $row->rev_minor_edit ) {
00659             $out .= "      <minor/>\n";
00660         }
00661         if ( isset( $row->rev_deleted ) && ( $row->rev_deleted & Revision::DELETED_COMMENT ) ) {
00662             $out .= "      " . Xml::element( 'comment', array( 'deleted' => 'deleted' ) ) . "\n";
00663         } elseif ( $row->rev_comment != '' ) {
00664             $out .= "      " . Xml::elementClean( 'comment', array(), strval( $row->rev_comment ) ) . "\n";
00665         }
00666 
00667         $text = '';
00668         if ( isset( $row->rev_deleted ) && ( $row->rev_deleted & Revision::DELETED_TEXT ) ) {
00669             $out .= "      " . Xml::element( 'text', array( 'deleted' => 'deleted' ) ) . "\n";
00670         } elseif ( isset( $row->old_text ) ) {
00671             // Raw text from the database may have invalid chars
00672             $text = strval( Revision::getRevisionText( $row ) );
00673             $out .= "      " . Xml::elementClean( 'text',
00674                 array( 'xml:space' => 'preserve', 'bytes' => intval( $row->rev_len ) ),
00675                 strval( $text ) ) . "\n";
00676         } else {
00677             // Stub output
00678             $out .= "      " . Xml::element( 'text',
00679                 array( 'id' => $row->rev_text_id, 'bytes' => intval( $row->rev_len ) ),
00680                 "" ) . "\n";
00681         }
00682 
00683         if ( isset( $row->rev_sha1 )
00684             && $row->rev_sha1
00685             && !( $row->rev_deleted & Revision::DELETED_TEXT )
00686         ) {
00687             $out .= "      " . Xml::element( 'sha1', null, strval( $row->rev_sha1 ) ) . "\n";
00688         } else {
00689             $out .= "      <sha1/>\n";
00690         }
00691 
00692         if ( isset( $row->rev_content_model ) && !is_null( $row->rev_content_model ) ) {
00693             $content_model = strval( $row->rev_content_model );
00694         } else {
00695             // probably using $wgContentHandlerUseDB = false;
00696             // @todo test!
00697             $title = Title::makeTitle( $row->page_namespace, $row->page_title );
00698             $content_model = ContentHandler::getDefaultModelFor( $title );
00699         }
00700 
00701         $out .= "      " . Xml::element( 'model', null, strval( $content_model ) ) . "\n";
00702 
00703         if ( isset( $row->rev_content_format ) && !is_null( $row->rev_content_format ) ) {
00704             $content_format = strval( $row->rev_content_format );
00705         } else {
00706             // probably using $wgContentHandlerUseDB = false;
00707             // @todo test!
00708             $content_handler = ContentHandler::getForModelID( $content_model );
00709             $content_format = $content_handler->getDefaultFormat();
00710         }
00711 
00712         $out .= "      " . Xml::element( 'format', null, strval( $content_format ) ) . "\n";
00713 
00714         wfRunHooks( 'XmlDumpWriterWriteRevision', array( &$this, &$out, $row, $text ) );
00715 
00716         $out .= "    </revision>\n";
00717 
00718         wfProfileOut( __METHOD__ );
00719         return $out;
00720     }
00721 
00730     function writeLogItem( $row ) {
00731         wfProfileIn( __METHOD__ );
00732 
00733         $out = "  <logitem>\n";
00734         $out .= "    " . Xml::element( 'id', null, strval( $row->log_id ) ) . "\n";
00735 
00736         $out .= $this->writeTimestamp( $row->log_timestamp, "    " );
00737 
00738         if ( $row->log_deleted & LogPage::DELETED_USER ) {
00739             $out .= "    " . Xml::element( 'contributor', array( 'deleted' => 'deleted' ) ) . "\n";
00740         } else {
00741             $out .= $this->writeContributor( $row->log_user, $row->user_name, "    " );
00742         }
00743 
00744         if ( $row->log_deleted & LogPage::DELETED_COMMENT ) {
00745             $out .= "    " . Xml::element( 'comment', array( 'deleted' => 'deleted' ) ) . "\n";
00746         } elseif ( $row->log_comment != '' ) {
00747             $out .= "    " . Xml::elementClean( 'comment', null, strval( $row->log_comment ) ) . "\n";
00748         }
00749 
00750         $out .= "    " . Xml::element( 'type', null, strval( $row->log_type ) ) . "\n";
00751         $out .= "    " . Xml::element( 'action', null, strval( $row->log_action ) ) . "\n";
00752 
00753         if ( $row->log_deleted & LogPage::DELETED_ACTION ) {
00754             $out .= "    " . Xml::element( 'text', array( 'deleted' => 'deleted' ) ) . "\n";
00755         } else {
00756             $title = Title::makeTitle( $row->log_namespace, $row->log_title );
00757             $out .= "    " . Xml::elementClean( 'logtitle', null, self::canonicalTitle( $title ) ) . "\n";
00758             $out .= "    " . Xml::elementClean( 'params',
00759                 array( 'xml:space' => 'preserve' ),
00760                 strval( $row->log_params ) ) . "\n";
00761         }
00762 
00763         $out .= "  </logitem>\n";
00764 
00765         wfProfileOut( __METHOD__ );
00766         return $out;
00767     }
00768 
00774     function writeTimestamp( $timestamp, $indent = "      " ) {
00775         $ts = wfTimestamp( TS_ISO_8601, $timestamp );
00776         return $indent . Xml::element( 'timestamp', null, $ts ) . "\n";
00777     }
00778 
00785     function writeContributor( $id, $text, $indent = "      " ) {
00786         $out = $indent . "<contributor>\n";
00787         if ( $id || !IP::isValid( $text ) ) {
00788             $out .= $indent . "  " . Xml::elementClean( 'username', null, strval( $text ) ) . "\n";
00789             $out .= $indent . "  " . Xml::element( 'id', null, strval( $id ) ) . "\n";
00790         } else {
00791             $out .= $indent . "  " . Xml::elementClean( 'ip', null, strval( $text ) ) . "\n";
00792         }
00793         $out .= $indent . "</contributor>\n";
00794         return $out;
00795     }
00796 
00803     function writeUploads( $row, $dumpContents = false ) {
00804         if ( $row->page_namespace == NS_FILE ) {
00805             $img = wfLocalFile( $row->page_title );
00806             if ( $img && $img->exists() ) {
00807                 $out = '';
00808                 foreach ( array_reverse( $img->getHistory() ) as $ver ) {
00809                     $out .= $this->writeUpload( $ver, $dumpContents );
00810                 }
00811                 $out .= $this->writeUpload( $img, $dumpContents );
00812                 return $out;
00813             }
00814         }
00815         return '';
00816     }
00817 
00823     function writeUpload( $file, $dumpContents = false ) {
00824         if ( $file->isOld() ) {
00825             $archiveName = "      " .
00826                 Xml::element( 'archivename', null, $file->getArchiveName() ) . "\n";
00827         } else {
00828             $archiveName = '';
00829         }
00830         if ( $dumpContents ) {
00831             $be = $file->getRepo()->getBackend();
00832             # Dump file as base64
00833             # Uses only XML-safe characters, so does not need escaping
00834             # @todo Too bad this loads the contents into memory (script might swap)
00835             $contents = '      <contents encoding="base64">' .
00836                 chunk_split( base64_encode(
00837                     $be->getFileContents( array( 'src' => $file->getPath() ) ) ) ) .
00838                 "      </contents>\n";
00839         } else {
00840             $contents = '';
00841         }
00842         if ( $file->isDeleted( File::DELETED_COMMENT ) ) {
00843             $comment = Xml::element( 'comment', array( 'deleted' => 'deleted' ) );
00844         } else {
00845             $comment = Xml::elementClean( 'comment', null, $file->getDescription() );
00846         }
00847         return "    <upload>\n" .
00848             $this->writeTimestamp( $file->getTimestamp() ) .
00849             $this->writeContributor( $file->getUser( 'id' ), $file->getUser( 'text' ) ) .
00850             "      " . $comment . "\n" .
00851             "      " . Xml::element( 'filename', null, $file->getName() ) . "\n" .
00852             $archiveName .
00853             "      " . Xml::element( 'src', null, $file->getCanonicalURL() ) . "\n" .
00854             "      " . Xml::element( 'size', null, $file->getSize() ) . "\n" .
00855             "      " . Xml::element( 'sha1base36', null, $file->getSha1() ) . "\n" .
00856             "      " . Xml::element( 'rel', null, $file->getRel() ) . "\n" .
00857             $contents .
00858             "    </upload>\n";
00859     }
00860 
00871     public static function canonicalTitle( Title $title ) {
00872         if ( $title->isExternal() ) {
00873             return $title->getPrefixedText();
00874         }
00875 
00876         global $wgContLang;
00877         $prefix = str_replace( '_', ' ', $wgContLang->getNsText( $title->getNamespace() ) );
00878 
00879         if ( $prefix !== '' ) {
00880             $prefix .= ':';
00881         }
00882 
00883         return $prefix . $title->getText();
00884     }
00885 }
00886 
00891 class DumpOutput {
00892 
00896     function writeOpenStream( $string ) {
00897         $this->write( $string );
00898     }
00899 
00903     function writeCloseStream( $string ) {
00904         $this->write( $string );
00905     }
00906 
00911     function writeOpenPage( $page, $string ) {
00912         $this->write( $string );
00913     }
00914 
00918     function writeClosePage( $string ) {
00919         $this->write( $string );
00920     }
00921 
00926     function writeRevision( $rev, $string ) {
00927         $this->write( $string );
00928     }
00929 
00934     function writeLogItem( $rev, $string ) {
00935         $this->write( $string );
00936     }
00937 
00943     function write( $string ) {
00944         print $string;
00945     }
00946 
00954     function closeRenameAndReopen( $newname ) {
00955     }
00956 
00965     function closeAndRename( $newname, $open = false ) {
00966     }
00967 
00973     function getFilenames() {
00974         return null;
00975     }
00976 }
00977 
00982 class DumpFileOutput extends DumpOutput {
00983     protected $handle = false, $filename;
00984 
00988     function __construct( $file ) {
00989         $this->handle = fopen( $file, "wt" );
00990         $this->filename = $file;
00991     }
00992 
00996     function writeCloseStream( $string ) {
00997         parent::writeCloseStream( $string );
00998         if ( $this->handle ) {
00999             fclose( $this->handle );
01000             $this->handle = false;
01001         }
01002     }
01003 
01007     function write( $string ) {
01008         fputs( $this->handle, $string );
01009     }
01010 
01014     function closeRenameAndReopen( $newname ) {
01015         $this->closeAndRename( $newname, true );
01016     }
01017 
01022     function renameOrException( $newname ) {
01023             if ( !rename( $this->filename, $newname ) ) {
01024                 throw new MWException( __METHOD__ . ": rename of file {$this->filename} to $newname failed\n" );
01025             }
01026     }
01027 
01033     function checkRenameArgCount( $newname ) {
01034         if ( is_array( $newname ) ) {
01035             if ( count( $newname ) > 1 ) {
01036                 throw new MWException( __METHOD__ . ": passed multiple arguments for rename of single file\n" );
01037             } else {
01038                 $newname = $newname[0];
01039             }
01040         }
01041         return $newname;
01042     }
01043 
01048     function closeAndRename( $newname, $open = false ) {
01049         $newname = $this->checkRenameArgCount( $newname );
01050         if ( $newname ) {
01051             if ( $this->handle ) {
01052                 fclose( $this->handle );
01053                 $this->handle = false;
01054             }
01055             $this->renameOrException( $newname );
01056             if ( $open ) {
01057                 $this->handle = fopen( $this->filename, "wt" );
01058             }
01059         }
01060     }
01061 
01065     function getFilenames() {
01066         return $this->filename;
01067     }
01068 }
01069 
01076 class DumpPipeOutput extends DumpFileOutput {
01077     protected $command, $filename;
01078     protected $procOpenResource = false;
01079 
01084     function __construct( $command, $file = null ) {
01085         if ( !is_null( $file ) ) {
01086             $command .= " > " . wfEscapeShellArg( $file );
01087         }
01088 
01089         $this->startCommand( $command );
01090         $this->command = $command;
01091         $this->filename = $file;
01092     }
01093 
01097     function writeCloseStream( $string ) {
01098         parent::writeCloseStream( $string );
01099         if ( $this->procOpenResource ) {
01100             proc_close( $this->procOpenResource );
01101             $this->procOpenResource = false;
01102         }
01103     }
01104 
01108     function startCommand( $command ) {
01109         $spec = array(
01110             0 => array( "pipe", "r" ),
01111         );
01112         $pipes = array();
01113         $this->procOpenResource = proc_open( $command, $spec, $pipes );
01114         $this->handle = $pipes[0];
01115     }
01116 
01120     function closeRenameAndReopen( $newname ) {
01121         $this->closeAndRename( $newname, true );
01122     }
01123 
01128     function closeAndRename( $newname, $open = false ) {
01129         $newname = $this->checkRenameArgCount( $newname );
01130         if ( $newname ) {
01131             if ( $this->handle ) {
01132                 fclose( $this->handle );
01133                 $this->handle = false;
01134             }
01135             if ( $this->procOpenResource ) {
01136                 proc_close( $this->procOpenResource );
01137                 $this->procOpenResource = false;
01138             }
01139             $this->renameOrException( $newname );
01140             if ( $open ) {
01141                 $command = $this->command;
01142                 $command .= " > " . wfEscapeShellArg( $this->filename );
01143                 $this->startCommand( $command );
01144             }
01145         }
01146     }
01147 
01148 }
01149 
01154 class DumpGZipOutput extends DumpPipeOutput {
01155 
01159     function __construct( $file ) {
01160         parent::__construct( "gzip", $file );
01161     }
01162 }
01163 
01168 class DumpBZip2Output extends DumpPipeOutput {
01169 
01173     function __construct( $file ) {
01174         parent::__construct( "bzip2", $file );
01175     }
01176 }
01177 
01182 class Dump7ZipOutput extends DumpPipeOutput {
01183 
01187     function __construct( $file ) {
01188         $command = $this->setup7zCommand( $file );
01189         parent::__construct( $command );
01190         $this->filename = $file;
01191     }
01192 
01197     function setup7zCommand( $file ) {
01198         $command = "7za a -bd -si " . wfEscapeShellArg( $file );
01199         // Suppress annoying useless crap from p7zip
01200         // Unfortunately this could suppress real error messages too
01201         $command .= ' >' . wfGetNull() . ' 2>&1';
01202         return $command;
01203     }
01204 
01209     function closeAndRename( $newname, $open = false ) {
01210         $newname = $this->checkRenameArgCount( $newname );
01211         if ( $newname ) {
01212             fclose( $this->handle );
01213             proc_close( $this->procOpenResource );
01214             $this->renameOrException( $newname );
01215             if ( $open ) {
01216                 $command = $this->setup7zCommand( $this->filename );
01217                 $this->startCommand( $command );
01218             }
01219         }
01220     }
01221 }
01222 
01229 class DumpFilter {
01230 
01236     public $sink;
01237 
01241     protected $sendingThisPage;
01242 
01246     function __construct( &$sink ) {
01247         $this->sink =& $sink;
01248     }
01249 
01253     function writeOpenStream( $string ) {
01254         $this->sink->writeOpenStream( $string );
01255     }
01256 
01260     function writeCloseStream( $string ) {
01261         $this->sink->writeCloseStream( $string );
01262     }
01263 
01268     function writeOpenPage( $page, $string ) {
01269         $this->sendingThisPage = $this->pass( $page, $string );
01270         if ( $this->sendingThisPage ) {
01271             $this->sink->writeOpenPage( $page, $string );
01272         }
01273     }
01274 
01278     function writeClosePage( $string ) {
01279         if ( $this->sendingThisPage ) {
01280             $this->sink->writeClosePage( $string );
01281             $this->sendingThisPage = false;
01282         }
01283     }
01284 
01289     function writeRevision( $rev, $string ) {
01290         if ( $this->sendingThisPage ) {
01291             $this->sink->writeRevision( $rev, $string );
01292         }
01293     }
01294 
01299     function writeLogItem( $rev, $string ) {
01300         $this->sink->writeRevision( $rev, $string );
01301     }
01302 
01306     function closeRenameAndReopen( $newname ) {
01307         $this->sink->closeRenameAndReopen( $newname );
01308     }
01309 
01314     function closeAndRename( $newname, $open = false ) {
01315         $this->sink->closeAndRename( $newname, $open );
01316     }
01317 
01321     function getFilenames() {
01322         return $this->sink->getFilenames();
01323     }
01324 
01330     function pass( $page ) {
01331         return true;
01332     }
01333 }
01334 
01339 class DumpNotalkFilter extends DumpFilter {
01340 
01345     function pass( $page ) {
01346         return !MWNamespace::isTalk( $page->page_namespace );
01347     }
01348 }
01349 
01354 class DumpNamespaceFilter extends DumpFilter {
01355     var $invert = false;
01356     var $namespaces = array();
01357 
01363     function __construct( &$sink, $param ) {
01364         parent::__construct( $sink );
01365 
01366         $constants = array(
01367             "NS_MAIN"           => NS_MAIN,
01368             "NS_TALK"           => NS_TALK,
01369             "NS_USER"           => NS_USER,
01370             "NS_USER_TALK"      => NS_USER_TALK,
01371             "NS_PROJECT"        => NS_PROJECT,
01372             "NS_PROJECT_TALK"   => NS_PROJECT_TALK,
01373             "NS_FILE"           => NS_FILE,
01374             "NS_FILE_TALK"      => NS_FILE_TALK,
01375             "NS_IMAGE"          => NS_IMAGE, // NS_IMAGE is an alias for NS_FILE
01376             "NS_IMAGE_TALK"     => NS_IMAGE_TALK,
01377             "NS_MEDIAWIKI"      => NS_MEDIAWIKI,
01378             "NS_MEDIAWIKI_TALK" => NS_MEDIAWIKI_TALK,
01379             "NS_TEMPLATE"       => NS_TEMPLATE,
01380             "NS_TEMPLATE_TALK"  => NS_TEMPLATE_TALK,
01381             "NS_HELP"           => NS_HELP,
01382             "NS_HELP_TALK"      => NS_HELP_TALK,
01383             "NS_CATEGORY"       => NS_CATEGORY,
01384             "NS_CATEGORY_TALK"  => NS_CATEGORY_TALK );
01385 
01386         if ( $param { 0 } == '!' ) {
01387             $this->invert = true;
01388             $param = substr( $param, 1 );
01389         }
01390 
01391         foreach ( explode( ',', $param ) as $key ) {
01392             $key = trim( $key );
01393             if ( isset( $constants[$key] ) ) {
01394                 $ns = $constants[$key];
01395                 $this->namespaces[$ns] = true;
01396             } elseif ( is_numeric( $key ) ) {
01397                 $ns = intval( $key );
01398                 $this->namespaces[$ns] = true;
01399             } else {
01400                 throw new MWException( "Unrecognized namespace key '$key'\n" );
01401             }
01402         }
01403     }
01404 
01409     function pass( $page ) {
01410         $match = isset( $this->namespaces[$page->page_namespace] );
01411         return $this->invert xor $match;
01412     }
01413 }
01414 
01419 class DumpLatestFilter extends DumpFilter {
01420     var $page, $pageString, $rev, $revString;
01421 
01426     function writeOpenPage( $page, $string ) {
01427         $this->page = $page;
01428         $this->pageString = $string;
01429     }
01430 
01434     function writeClosePage( $string ) {
01435         if ( $this->rev ) {
01436             $this->sink->writeOpenPage( $this->page, $this->pageString );
01437             $this->sink->writeRevision( $this->rev, $this->revString );
01438             $this->sink->writeClosePage( $string );
01439         }
01440         $this->rev = null;
01441         $this->revString = null;
01442         $this->page = null;
01443         $this->pageString = null;
01444     }
01445 
01450     function writeRevision( $rev, $string ) {
01451         if ( $rev->rev_id == $this->page->page_latest ) {
01452             $this->rev = $rev;
01453             $this->revString = $string;
01454         }
01455     }
01456 }
01457 
01462 class DumpMultiWriter {
01463 
01467     function __construct( $sinks ) {
01468         $this->sinks = $sinks;
01469         $this->count = count( $sinks );
01470     }
01471 
01475     function writeOpenStream( $string ) {
01476         for ( $i = 0; $i < $this->count; $i++ ) {
01477             $this->sinks[$i]->writeOpenStream( $string );
01478         }
01479     }
01480 
01484     function writeCloseStream( $string ) {
01485         for ( $i = 0; $i < $this->count; $i++ ) {
01486             $this->sinks[$i]->writeCloseStream( $string );
01487         }
01488     }
01489 
01494     function writeOpenPage( $page, $string ) {
01495         for ( $i = 0; $i < $this->count; $i++ ) {
01496             $this->sinks[$i]->writeOpenPage( $page, $string );
01497         }
01498     }
01499 
01503     function writeClosePage( $string ) {
01504         for ( $i = 0; $i < $this->count; $i++ ) {
01505             $this->sinks[$i]->writeClosePage( $string );
01506         }
01507     }
01508 
01513     function writeRevision( $rev, $string ) {
01514         for ( $i = 0; $i < $this->count; $i++ ) {
01515             $this->sinks[$i]->writeRevision( $rev, $string );
01516         }
01517     }
01518 
01522     function closeRenameAndReopen( $newnames ) {
01523         $this->closeAndRename( $newnames, true );
01524     }
01525 
01530     function closeAndRename( $newnames, $open = false ) {
01531         for ( $i = 0; $i < $this->count; $i++ ) {
01532             $this->sinks[$i]->closeAndRename( $newnames[$i], $open );
01533         }
01534     }
01535 
01539     function getFilenames() {
01540         $filenames = array();
01541         for ( $i = 0; $i < $this->count; $i++ ) {
01542             $filenames[] = $this->sinks[$i]->getFilenames();
01543         }
01544         return $filenames;
01545     }
01546 
01547 }
01548 
01553 function xmlsafe( $string ) {
01554     wfProfileIn( __FUNCTION__ );
01555 
01561     $string = UtfNormal::cleanUp( $string );
01562 
01563     $string = htmlspecialchars( $string );
01564     wfProfileOut( __FUNCTION__ );
01565     return $string;
01566 }