MediaWiki  REL1_22
Export.php
Go to the documentation of this file.
00001 <?php
00033 class WikiExporter {
00034     var $list_authors = false; # Return distinct author list (when not returning full history)
00035     var $author_list = "";
00036 
00037     var $dumpUploads = false;
00038     var $dumpUploadFileContents = false;
00039 
00040     const FULL = 1;
00041     const CURRENT = 2;
00042     const STABLE = 4; // extension defined
00043     const LOGS = 8;
00044     const RANGE = 16;
00045 
00046     const BUFFER = 0;
00047     const STREAM = 1;
00048 
00049     const TEXT = 0;
00050     const STUB = 1;
00051 
00052     var $buffer;
00053 
00054     var $text;
00055 
00059     var $sink;
00060 
00065     public static function schemaVersion() {
00066         return "0.8";
00067     }
00068 
00086     function __construct( $db, $history = WikiExporter::CURRENT,
00087             $buffer = WikiExporter::BUFFER, $text = WikiExporter::TEXT ) {
00088         $this->db = $db;
00089         $this->history = $history;
00090         $this->buffer = $buffer;
00091         $this->writer = new XmlDumpWriter();
00092         $this->sink = new DumpOutput();
00093         $this->text = $text;
00094     }
00095 
00103     public function setOutputSink( &$sink ) {
00104         $this->sink =& $sink;
00105     }
00106 
00107     public function openStream() {
00108         $output = $this->writer->openStream();
00109         $this->sink->writeOpenStream( $output );
00110     }
00111 
00112     public function closeStream() {
00113         $output = $this->writer->closeStream();
00114         $this->sink->writeCloseStream( $output );
00115     }
00116 
00122     public function allPages() {
00123         $this->dumpFrom( '' );
00124     }
00125 
00133     public function pagesByRange( $start, $end ) {
00134         $condition = 'page_id >= ' . intval( $start );
00135         if ( $end ) {
00136             $condition .= ' AND page_id < ' . intval( $end );
00137         }
00138         $this->dumpFrom( $condition );
00139     }
00140 
00148     public function revsByRange( $start, $end ) {
00149         $condition = 'rev_id >= ' . intval( $start );
00150         if ( $end ) {
00151             $condition .= ' AND rev_id < ' . intval( $end );
00152         }
00153         $this->dumpFrom( $condition );
00154     }
00155 
00159     public function pageByTitle( $title ) {
00160         $this->dumpFrom(
00161             'page_namespace=' . $title->getNamespace() .
00162             ' AND page_title=' . $this->db->addQuotes( $title->getDBkey() ) );
00163     }
00164 
00169     public function pageByName( $name ) {
00170         $title = Title::newFromText( $name );
00171         if ( is_null( $title ) ) {
00172             throw new MWException( "Can't export invalid title" );
00173         } else {
00174             $this->pageByTitle( $title );
00175         }
00176     }
00177 
00181     public function pagesByName( $names ) {
00182         foreach ( $names as $name ) {
00183             $this->pageByName( $name );
00184         }
00185     }
00186 
00187     public function allLogs() {
00188         $this->dumpFrom( '' );
00189     }
00190 
00195     public function logsByRange( $start, $end ) {
00196         $condition = 'log_id >= ' . intval( $start );
00197         if ( $end ) {
00198             $condition .= ' AND log_id < ' . intval( $end );
00199         }
00200         $this->dumpFrom( $condition );
00201     }
00202 
00210     protected function do_list_authors( $cond ) {
00211         wfProfileIn( __METHOD__ );
00212         $this->author_list = "<contributors>";
00213         // rev_deleted
00214 
00215         $res = $this->db->select(
00216             array( 'page', 'revision' ),
00217             array( 'DISTINCT rev_user_text', 'rev_user' ),
00218             array(
00219                 $this->db->bitAnd( 'rev_deleted', Revision::DELETED_USER ) . ' = 0',
00220                 $cond,
00221                 'page_id = rev_id',
00222             ),
00223             __METHOD__
00224         );
00225 
00226         foreach ( $res as $row ) {
00227             $this->author_list .= "<contributor>" .
00228                 "<username>" .
00229                 htmlentities( $row->rev_user_text ) .
00230                 "</username>" .
00231                 "<id>" .
00232                 $row->rev_user .
00233                 "</id>" .
00234                 "</contributor>";
00235         }
00236         $this->author_list .= "</contributors>";
00237         wfProfileOut( __METHOD__ );
00238     }
00239 
00245     protected function dumpFrom( $cond = '' ) {
00246         wfProfileIn( __METHOD__ );
00247         # For logging dumps...
00248         if ( $this->history & self::LOGS ) {
00249             $where = array( 'user_id = log_user' );
00250             # Hide private logs
00251             $hideLogs = LogEventsList::getExcludeClause( $this->db );
00252             if ( $hideLogs ) {
00253                 $where[] = $hideLogs;
00254             }
00255             # Add on any caller specified conditions
00256             if ( $cond ) {
00257                 $where[] = $cond;
00258             }
00259             # Get logging table name for logging.* clause
00260             $logging = $this->db->tableName( 'logging' );
00261 
00262             if ( $this->buffer == WikiExporter::STREAM ) {
00263                 $prev = $this->db->bufferResults( false );
00264             }
00265             $wrapper = null; // Assuring $wrapper is not undefined, if exception occurs early
00266             try {
00267                 $result = $this->db->select( array( 'logging', 'user' ),
00268                     array( "{$logging}.*", 'user_name' ), // grab the user name
00269                     $where,
00270                     __METHOD__,
00271                     array( 'ORDER BY' => 'log_id', 'USE INDEX' => array( 'logging' => 'PRIMARY' ) )
00272                 );
00273                 $wrapper = $this->db->resultObject( $result );
00274                 $this->outputLogStream( $wrapper );
00275                 if ( $this->buffer == WikiExporter::STREAM ) {
00276                     $this->db->bufferResults( $prev );
00277                 }
00278             } catch ( Exception $e ) {
00279                 // Throwing the exception does not reliably free the resultset, and
00280                 // would also leave the connection in unbuffered mode.
00281 
00282                 // Freeing result
00283                 try {
00284                     if ( $wrapper ) {
00285                         $wrapper->free();
00286                     }
00287                 } catch ( Exception $e2 ) {
00288                     // Already in panic mode -> ignoring $e2 as $e has
00289                     // higher priority
00290                 }
00291 
00292                 // Putting database back in previous buffer mode
00293                 try {
00294                     if ( $this->buffer == WikiExporter::STREAM ) {
00295                         $this->db->bufferResults( $prev );
00296                     }
00297                 } catch ( Exception $e2 ) {
00298                     // Already in panic mode -> ignoring $e2 as $e has
00299                     // higher priority
00300                 }
00301 
00302                 // Inform caller about problem
00303                 wfProfileOut( __METHOD__ );
00304                 throw $e;
00305             }
00306         # For page dumps...
00307         } else {
00308             $tables = array( 'page', 'revision' );
00309             $opts = array( 'ORDER BY' => 'page_id ASC' );
00310             $opts['USE INDEX'] = array();
00311             $join = array();
00312             if ( is_array( $this->history ) ) {
00313                 # Time offset/limit for all pages/history...
00314                 $revJoin = 'page_id=rev_page';
00315                 # Set time order
00316                 if ( $this->history['dir'] == 'asc' ) {
00317                     $op = '>';
00318                     $opts['ORDER BY'] = 'rev_timestamp ASC';
00319                 } else {
00320                     $op = '<';
00321                     $opts['ORDER BY'] = 'rev_timestamp DESC';
00322                 }
00323                 # Set offset
00324                 if ( !empty( $this->history['offset'] ) ) {
00325                     $revJoin .= " AND rev_timestamp $op " .
00326                         $this->db->addQuotes( $this->db->timestamp( $this->history['offset'] ) );
00327                 }
00328                 $join['revision'] = array( 'INNER JOIN', $revJoin );
00329                 # Set query limit
00330                 if ( !empty( $this->history['limit'] ) ) {
00331                     $opts['LIMIT'] = intval( $this->history['limit'] );
00332                 }
00333             } elseif ( $this->history & WikiExporter::FULL ) {
00334                 # Full history dumps...
00335                 $join['revision'] = array( 'INNER JOIN', 'page_id=rev_page' );
00336             } elseif ( $this->history & WikiExporter::CURRENT ) {
00337                 # Latest revision dumps...
00338                 if ( $this->list_authors && $cond != '' ) { // List authors, if so desired
00339                     $this->do_list_authors( $cond );
00340                 }
00341                 $join['revision'] = array( 'INNER JOIN', 'page_id=rev_page AND page_latest=rev_id' );
00342             } elseif ( $this->history & WikiExporter::STABLE ) {
00343                 # "Stable" revision dumps...
00344                 # Default JOIN, to be overridden...
00345                 $join['revision'] = array( 'INNER JOIN', 'page_id=rev_page AND page_latest=rev_id' );
00346                 # One, and only one hook should set this, and return false
00347                 if ( wfRunHooks( 'WikiExporter::dumpStableQuery', array( &$tables, &$opts, &$join ) ) ) {
00348                     wfProfileOut( __METHOD__ );
00349                     throw new MWException( __METHOD__ . " given invalid history dump type." );
00350                 }
00351             } elseif ( $this->history & WikiExporter::RANGE ) {
00352                 # Dump of revisions within a specified range
00353                 $join['revision'] = array( 'INNER JOIN', 'page_id=rev_page' );
00354                 $opts['ORDER BY'] = array( 'rev_page ASC', 'rev_id ASC' );
00355             } else {
00356                 # Unknown history specification parameter?
00357                 wfProfileOut( __METHOD__ );
00358                 throw new MWException( __METHOD__ . " given invalid history dump type." );
00359             }
00360             # Query optimization hacks
00361             if ( $cond == '' ) {
00362                 $opts[] = 'STRAIGHT_JOIN';
00363                 $opts['USE INDEX']['page'] = 'PRIMARY';
00364             }
00365             # Build text join options
00366             if ( $this->text != WikiExporter::STUB ) { // 1-pass
00367                 $tables[] = 'text';
00368                 $join['text'] = array( 'INNER JOIN', 'rev_text_id=old_id' );
00369             }
00370 
00371             if ( $this->buffer == WikiExporter::STREAM ) {
00372                 $prev = $this->db->bufferResults( false );
00373             }
00374 
00375             $wrapper = null; // Assuring $wrapper is not undefined, if exception occurs early
00376             try {
00377                 wfRunHooks( 'ModifyExportQuery',
00378                         array( $this->db, &$tables, &$cond, &$opts, &$join ) );
00379 
00380                 # Do the query!
00381                 $result = $this->db->select( $tables, '*', $cond, __METHOD__, $opts, $join );
00382                 $wrapper = $this->db->resultObject( $result );
00383                 # Output dump results
00384                 $this->outputPageStream( $wrapper );
00385 
00386                 if ( $this->buffer == WikiExporter::STREAM ) {
00387                     $this->db->bufferResults( $prev );
00388                 }
00389             } catch ( Exception $e ) {
00390                 // Throwing the exception does not reliably free the resultset, and
00391                 // would also leave the connection in unbuffered mode.
00392 
00393                 // Freeing result
00394                 try {
00395                     if ( $wrapper ) {
00396                         $wrapper->free();
00397                     }
00398                 } catch ( Exception $e2 ) {
00399                     // Already in panic mode -> ignoring $e2 as $e has
00400                     // higher priority
00401                 }
00402 
00403                 // Putting database back in previous buffer mode
00404                 try {
00405                     if ( $this->buffer == WikiExporter::STREAM ) {
00406                         $this->db->bufferResults( $prev );
00407                     }
00408                 } catch ( Exception $e2 ) {
00409                     // Already in panic mode -> ignoring $e2 as $e has
00410                     // higher priority
00411                 }
00412 
00413                 // Inform caller about problem
00414                 throw $e;
00415             }
00416         }
00417         wfProfileOut( __METHOD__ );
00418     }
00419 
00432     protected function outputPageStream( $resultset ) {
00433         $last = null;
00434         foreach ( $resultset as $row ) {
00435             if ( $last === null ||
00436                 $last->page_namespace != $row->page_namespace ||
00437                 $last->page_title != $row->page_title ) {
00438                 if ( $last !== null ) {
00439                     $output = '';
00440                     if ( $this->dumpUploads ) {
00441                         $output .= $this->writer->writeUploads( $last, $this->dumpUploadFileContents );
00442                     }
00443                     $output .= $this->writer->closePage();
00444                     $this->sink->writeClosePage( $output );
00445                 }
00446                 $output = $this->writer->openPage( $row );
00447                 $this->sink->writeOpenPage( $row, $output );
00448                 $last = $row;
00449             }
00450             $output = $this->writer->writeRevision( $row );
00451             $this->sink->writeRevision( $row, $output );
00452         }
00453         if ( $last !== null ) {
00454             $output = '';
00455             if ( $this->dumpUploads ) {
00456                 $output .= $this->writer->writeUploads( $last, $this->dumpUploadFileContents );
00457             }
00458             $output .= $this->author_list;
00459             $output .= $this->writer->closePage();
00460             $this->sink->writeClosePage( $output );
00461         }
00462     }
00463 
00467     protected function outputLogStream( $resultset ) {
00468         foreach ( $resultset as $row ) {
00469             $output = $this->writer->writeLogItem( $row );
00470             $this->sink->writeLogItem( $row, $output );
00471         }
00472     }
00473 }
00474 
00478 class XmlDumpWriter {
00484     function schemaVersion() {
00485         wfDeprecated( __METHOD__, '1.20' );
00486         return WikiExporter::schemaVersion();
00487     }
00488 
00499     function openStream() {
00500         global $wgLanguageCode;
00501         $ver = WikiExporter::schemaVersion();
00502         return Xml::element( 'mediawiki', array(
00503             'xmlns'              => "http://www.mediawiki.org/xml/export-$ver/",
00504             'xmlns:xsi'          => "http://www.w3.org/2001/XMLSchema-instance",
00505             'xsi:schemaLocation' => "http://www.mediawiki.org/xml/export-$ver/ " .
00506                                     "http://www.mediawiki.org/xml/export-$ver.xsd", #TODO: how do we get a new version up there?
00507             'version'            => $ver,
00508             'xml:lang'           => $wgLanguageCode ),
00509             null ) .
00510             "\n" .
00511             $this->siteInfo();
00512     }
00513 
00517     function siteInfo() {
00518         $info = array(
00519             $this->sitename(),
00520             $this->homelink(),
00521             $this->generator(),
00522             $this->caseSetting(),
00523             $this->namespaces() );
00524         return "  <siteinfo>\n    " .
00525             implode( "\n    ", $info ) .
00526             "\n  </siteinfo>\n";
00527     }
00528 
00532     function sitename() {
00533         global $wgSitename;
00534         return Xml::element( 'sitename', array(), $wgSitename );
00535     }
00536 
00540     function generator() {
00541         global $wgVersion;
00542         return Xml::element( 'generator', array(), "MediaWiki $wgVersion" );
00543     }
00544 
00548     function homelink() {
00549         return Xml::element( 'base', array(), Title::newMainPage()->getCanonicalURL() );
00550     }
00551 
00555     function caseSetting() {
00556         global $wgCapitalLinks;
00557         // "case-insensitive" option is reserved for future
00558         $sensitivity = $wgCapitalLinks ? 'first-letter' : 'case-sensitive';
00559         return Xml::element( 'case', array(), $sensitivity );
00560     }
00561 
00565     function namespaces() {
00566         global $wgContLang;
00567         $spaces = "<namespaces>\n";
00568         foreach ( $wgContLang->getFormattedNamespaces() as $ns => $title ) {
00569             $spaces .= '      ' .
00570                 Xml::element( 'namespace',
00571                     array(
00572                         'key' => $ns,
00573                         'case' => MWNamespace::isCapitalized( $ns ) ? 'first-letter' : 'case-sensitive',
00574                     ), $title ) . "\n";
00575         }
00576         $spaces .= "    </namespaces>";
00577         return $spaces;
00578     }
00579 
00586     function closeStream() {
00587         return "</mediawiki>\n";
00588     }
00589 
00598     function openPage( $row ) {
00599         $out = "  <page>\n";
00600         $title = Title::makeTitle( $row->page_namespace, $row->page_title );
00601         $out .= '    ' . Xml::elementClean( 'title', array(), self::canonicalTitle( $title ) ) . "\n";
00602         $out .= '    ' . Xml::element( 'ns', array(), strval( $row->page_namespace ) ) . "\n";
00603         $out .= '    ' . Xml::element( 'id', array(), strval( $row->page_id ) ) . "\n";
00604         if ( $row->page_is_redirect ) {
00605             $page = WikiPage::factory( $title );
00606             $redirect = $page->getRedirectTarget();
00607             if ( $redirect instanceOf Title && $redirect->isValidRedirectTarget() ) {
00608                 $out .= '    ' . Xml::element( 'redirect', array( 'title' => self::canonicalTitle( $redirect ) ) ) . "\n";
00609             }
00610         }
00611 
00612         if ( $row->page_restrictions != '' ) {
00613             $out .= '    ' . Xml::element( 'restrictions', array(),
00614                 strval( $row->page_restrictions ) ) . "\n";
00615         }
00616 
00617         wfRunHooks( 'XmlDumpWriterOpenPage', array( $this, &$out, $row, $title ) );
00618 
00619         return $out;
00620     }
00621 
00628     function closePage() {
00629         return "  </page>\n";
00630     }
00631 
00640     function writeRevision( $row ) {
00641         wfProfileIn( __METHOD__ );
00642 
00643         $out = "    <revision>\n";
00644         $out .= "      " . Xml::element( 'id', null, strval( $row->rev_id ) ) . "\n";
00645         if ( isset( $row->rev_parent_id ) && $row->rev_parent_id ) {
00646             $out .= "      " . Xml::element( 'parentid', null, strval( $row->rev_parent_id ) ) . "\n";
00647         }
00648 
00649         $out .= $this->writeTimestamp( $row->rev_timestamp );
00650 
00651         if ( isset( $row->rev_deleted ) && ( $row->rev_deleted & Revision::DELETED_USER ) ) {
00652             $out .= "      " . Xml::element( 'contributor', array( 'deleted' => 'deleted' ) ) . "\n";
00653         } else {
00654             $out .= $this->writeContributor( $row->rev_user, $row->rev_user_text );
00655         }
00656 
00657         if ( isset( $row->rev_minor_edit ) && $row->rev_minor_edit ) {
00658             $out .= "      <minor/>\n";
00659         }
00660         if ( isset( $row->rev_deleted ) && ( $row->rev_deleted & Revision::DELETED_COMMENT ) ) {
00661             $out .= "      " . Xml::element( 'comment', array( 'deleted' => 'deleted' ) ) . "\n";
00662         } elseif ( $row->rev_comment != '' ) {
00663             $out .= "      " . Xml::elementClean( 'comment', array(), strval( $row->rev_comment ) ) . "\n";
00664         }
00665 
00666         $text = '';
00667         if ( isset( $row->rev_deleted ) && ( $row->rev_deleted & Revision::DELETED_TEXT ) ) {
00668             $out .= "      " . Xml::element( 'text', array( 'deleted' => 'deleted' ) ) . "\n";
00669         } elseif ( isset( $row->old_text ) ) {
00670             // Raw text from the database may have invalid chars
00671             $text = strval( Revision::getRevisionText( $row ) );
00672             $out .= "      " . Xml::elementClean( 'text',
00673                 array( 'xml:space' => 'preserve', 'bytes' => intval( $row->rev_len ) ),
00674                 strval( $text ) ) . "\n";
00675         } else {
00676             // Stub output
00677             $out .= "      " . Xml::element( 'text',
00678                 array( 'id' => $row->rev_text_id, 'bytes' => intval( $row->rev_len ) ),
00679                 "" ) . "\n";
00680         }
00681 
00682         if ( isset( $row->rev_sha1 ) && $row->rev_sha1 && !( $row->rev_deleted & Revision::DELETED_TEXT ) ) {
00683             $out .= "      " . Xml::element( 'sha1', null, strval( $row->rev_sha1 ) ) . "\n";
00684         } else {
00685             $out .= "      <sha1/>\n";
00686         }
00687 
00688         if ( isset( $row->rev_content_model ) && !is_null( $row->rev_content_model ) ) {
00689             $content_model = strval( $row->rev_content_model );
00690         } else {
00691             // probably using $wgContentHandlerUseDB = false;
00692             // @todo test!
00693             $title = Title::makeTitle( $row->page_namespace, $row->page_title );
00694             $content_model = ContentHandler::getDefaultModelFor( $title );
00695         }
00696 
00697         $out .= "      " . Xml::element( 'model', null, strval( $content_model ) ) . "\n";
00698 
00699         if ( isset( $row->rev_content_format ) && !is_null( $row->rev_content_format ) ) {
00700             $content_format = strval( $row->rev_content_format );
00701         } else {
00702             // probably using $wgContentHandlerUseDB = false;
00703             // @todo test!
00704             $content_handler = ContentHandler::getForModelID( $content_model );
00705             $content_format = $content_handler->getDefaultFormat();
00706         }
00707 
00708         $out .= "      " . Xml::element( 'format', null, strval( $content_format ) ) . "\n";
00709 
00710         wfRunHooks( 'XmlDumpWriterWriteRevision', array( &$this, &$out, $row, $text ) );
00711 
00712         $out .= "    </revision>\n";
00713 
00714         wfProfileOut( __METHOD__ );
00715         return $out;
00716     }
00717 
00726     function writeLogItem( $row ) {
00727         wfProfileIn( __METHOD__ );
00728 
00729         $out = "  <logitem>\n";
00730         $out .= "    " . Xml::element( 'id', null, strval( $row->log_id ) ) . "\n";
00731 
00732         $out .= $this->writeTimestamp( $row->log_timestamp, "    " );
00733 
00734         if ( $row->log_deleted & LogPage::DELETED_USER ) {
00735             $out .= "    " . Xml::element( 'contributor', array( 'deleted' => 'deleted' ) ) . "\n";
00736         } else {
00737             $out .= $this->writeContributor( $row->log_user, $row->user_name, "    " );
00738         }
00739 
00740         if ( $row->log_deleted & LogPage::DELETED_COMMENT ) {
00741             $out .= "    " . Xml::element( 'comment', array( 'deleted' => 'deleted' ) ) . "\n";
00742         } elseif ( $row->log_comment != '' ) {
00743             $out .= "    " . Xml::elementClean( 'comment', null, strval( $row->log_comment ) ) . "\n";
00744         }
00745 
00746         $out .= "    " . Xml::element( 'type', null, strval( $row->log_type ) ) . "\n";
00747         $out .= "    " . Xml::element( 'action', null, strval( $row->log_action ) ) . "\n";
00748 
00749         if ( $row->log_deleted & LogPage::DELETED_ACTION ) {
00750             $out .= "    " . Xml::element( 'text', array( 'deleted' => 'deleted' ) ) . "\n";
00751         } else {
00752             $title = Title::makeTitle( $row->log_namespace, $row->log_title );
00753             $out .= "    " . Xml::elementClean( 'logtitle', null, self::canonicalTitle( $title ) ) . "\n";
00754             $out .= "    " . Xml::elementClean( 'params',
00755                 array( 'xml:space' => 'preserve' ),
00756                 strval( $row->log_params ) ) . "\n";
00757         }
00758 
00759         $out .= "  </logitem>\n";
00760 
00761         wfProfileOut( __METHOD__ );
00762         return $out;
00763     }
00764 
00770     function writeTimestamp( $timestamp, $indent = "      " ) {
00771         $ts = wfTimestamp( TS_ISO_8601, $timestamp );
00772         return $indent . Xml::element( 'timestamp', null, $ts ) . "\n";
00773     }
00774 
00781     function writeContributor( $id, $text, $indent = "      " ) {
00782         $out = $indent . "<contributor>\n";
00783         if ( $id || !IP::isValid( $text ) ) {
00784             $out .= $indent . "  " . Xml::elementClean( 'username', null, strval( $text ) ) . "\n";
00785             $out .= $indent . "  " . Xml::element( 'id', null, strval( $id ) ) . "\n";
00786         } else {
00787             $out .= $indent . "  " . Xml::elementClean( 'ip', null, strval( $text ) ) . "\n";
00788         }
00789         $out .= $indent . "</contributor>\n";
00790         return $out;
00791     }
00792 
00799     function writeUploads( $row, $dumpContents = false ) {
00800         if ( $row->page_namespace == NS_FILE ) {
00801             $img = wfLocalFile( $row->page_title );
00802             if ( $img && $img->exists() ) {
00803                 $out = '';
00804                 foreach ( array_reverse( $img->getHistory() ) as $ver ) {
00805                     $out .= $this->writeUpload( $ver, $dumpContents );
00806                 }
00807                 $out .= $this->writeUpload( $img, $dumpContents );
00808                 return $out;
00809             }
00810         }
00811         return '';
00812     }
00813 
00819     function writeUpload( $file, $dumpContents = false ) {
00820         if ( $file->isOld() ) {
00821             $archiveName = "      " .
00822                 Xml::element( 'archivename', null, $file->getArchiveName() ) . "\n";
00823         } else {
00824             $archiveName = '';
00825         }
00826         if ( $dumpContents ) {
00827             $be = $file->getRepo()->getBackend();
00828             # Dump file as base64
00829             # Uses only XML-safe characters, so does not need escaping
00830             # @TODO: too bad this loads the contents into memory (script might swap)
00831             $contents = '      <contents encoding="base64">' .
00832                 chunk_split( base64_encode(
00833                     $be->getFileContents( array( 'src' => $file->getPath() ) ) ) ) .
00834                 "      </contents>\n";
00835         } else {
00836             $contents = '';
00837         }
00838         if ( $file->isDeleted( File::DELETED_COMMENT ) ) {
00839             $comment = Xml::element( 'comment', array( 'deleted' => 'deleted' ) );
00840         } else {
00841             $comment = Xml::elementClean( 'comment', null, $file->getDescription() );
00842         }
00843         return "    <upload>\n" .
00844             $this->writeTimestamp( $file->getTimestamp() ) .
00845             $this->writeContributor( $file->getUser( 'id' ), $file->getUser( 'text' ) ) .
00846             "      " . $comment . "\n" .
00847             "      " . Xml::element( 'filename', null, $file->getName() ) . "\n" .
00848             $archiveName .
00849             "      " . Xml::element( 'src', null, $file->getCanonicalURL() ) . "\n" .
00850             "      " . Xml::element( 'size', null, $file->getSize() ) . "\n" .
00851             "      " . Xml::element( 'sha1base36', null, $file->getSha1() ) . "\n" .
00852             "      " . Xml::element( 'rel', null, $file->getRel() ) . "\n" .
00853             $contents .
00854             "    </upload>\n";
00855     }
00856 
00867     public static function canonicalTitle( Title $title ) {
00868         if ( $title->getInterwiki() ) {
00869             return $title->getPrefixedText();
00870         }
00871 
00872         global $wgContLang;
00873         $prefix = str_replace( '_', ' ', $wgContLang->getNsText( $title->getNamespace() ) );
00874 
00875         if ( $prefix !== '' ) {
00876             $prefix .= ':';
00877         }
00878 
00879         return $prefix . $title->getText();
00880     }
00881 }
00882 
00887 class DumpOutput {
00888 
00892     function writeOpenStream( $string ) {
00893         $this->write( $string );
00894     }
00895 
00899     function writeCloseStream( $string ) {
00900         $this->write( $string );
00901     }
00902 
00907     function writeOpenPage( $page, $string ) {
00908         $this->write( $string );
00909     }
00910 
00914     function writeClosePage( $string ) {
00915         $this->write( $string );
00916     }
00917 
00922     function writeRevision( $rev, $string ) {
00923         $this->write( $string );
00924     }
00925 
00930     function writeLogItem( $rev, $string ) {
00931         $this->write( $string );
00932     }
00933 
00939     function write( $string ) {
00940         print $string;
00941     }
00942 
00950     function closeRenameAndReopen( $newname ) {
00951     }
00952 
00960     function closeAndRename( $newname, $open = false ) {
00961     }
00962 
00968     function getFilenames() {
00969         return null;
00970     }
00971 }
00972 
00977 class DumpFileOutput extends DumpOutput {
00978     protected $handle = false, $filename;
00979 
00983     function __construct( $file ) {
00984         $this->handle = fopen( $file, "wt" );
00985         $this->filename = $file;
00986     }
00987 
00991     function writeCloseStream( $string ) {
00992         parent::writeCloseStream( $string );
00993         if ( $this->handle ) {
00994             fclose( $this->handle );
00995             $this->handle = false;
00996         }
00997     }
00998 
01002     function write( $string ) {
01003         fputs( $this->handle, $string );
01004     }
01005 
01009     function closeRenameAndReopen( $newname ) {
01010         $this->closeAndRename( $newname, true );
01011     }
01012 
01017     function renameOrException( $newname ) {
01018             if ( !rename( $this->filename, $newname ) ) {
01019                 throw new MWException( __METHOD__ . ": rename of file {$this->filename} to $newname failed\n" );
01020             }
01021     }
01022 
01028     function checkRenameArgCount( $newname ) {
01029         if ( is_array( $newname ) ) {
01030             if ( count( $newname ) > 1 ) {
01031                 throw new MWException( __METHOD__ . ": passed multiple arguments for rename of single file\n" );
01032             } else {
01033                 $newname = $newname[0];
01034             }
01035         }
01036         return $newname;
01037     }
01038 
01043     function closeAndRename( $newname, $open = false ) {
01044         $newname = $this->checkRenameArgCount( $newname );
01045         if ( $newname ) {
01046             if ( $this->handle ) {
01047                 fclose( $this->handle );
01048                 $this->handle = false;
01049             }
01050             $this->renameOrException( $newname );
01051             if ( $open ) {
01052                 $this->handle = fopen( $this->filename, "wt" );
01053             }
01054         }
01055     }
01056 
01060     function getFilenames() {
01061         return $this->filename;
01062     }
01063 }
01064 
01071 class DumpPipeOutput extends DumpFileOutput {
01072     protected $command, $filename;
01073     protected $procOpenResource = false;
01074 
01079     function __construct( $command, $file = null ) {
01080         if ( !is_null( $file ) ) {
01081             $command .= " > " . wfEscapeShellArg( $file );
01082         }
01083 
01084         $this->startCommand( $command );
01085         $this->command = $command;
01086         $this->filename = $file;
01087     }
01088 
01092     function writeCloseStream( $string ) {
01093         parent::writeCloseStream( $string );
01094         if ( $this->procOpenResource ) {
01095             proc_close( $this->procOpenResource );
01096             $this->procOpenResource = false;
01097         }
01098     }
01099 
01103     function startCommand( $command ) {
01104         $spec = array(
01105             0 => array( "pipe", "r" ),
01106         );
01107         $pipes = array();
01108         $this->procOpenResource = proc_open( $command, $spec, $pipes );
01109         $this->handle = $pipes[0];
01110     }
01111 
01115     function closeRenameAndReopen( $newname ) {
01116         $this->closeAndRename( $newname, true );
01117     }
01118 
01123     function closeAndRename( $newname, $open = false ) {
01124         $newname = $this->checkRenameArgCount( $newname );
01125         if ( $newname ) {
01126             if ( $this->handle ) {
01127                 fclose( $this->handle );
01128                 $this->handle = false;
01129             }
01130             if ( $this->procOpenResource ) {
01131                 proc_close( $this->procOpenResource );
01132                 $this->procOpenResource = false;
01133             }
01134             $this->renameOrException( $newname );
01135             if ( $open ) {
01136                 $command = $this->command;
01137                 $command .= " > " . wfEscapeShellArg( $this->filename );
01138                 $this->startCommand( $command );
01139             }
01140         }
01141     }
01142 
01143 }
01144 
01149 class DumpGZipOutput extends DumpPipeOutput {
01150 
01154     function __construct( $file ) {
01155         parent::__construct( "gzip", $file );
01156     }
01157 }
01158 
01163 class DumpBZip2Output extends DumpPipeOutput {
01164 
01168     function __construct( $file ) {
01169         parent::__construct( "bzip2", $file );
01170     }
01171 }
01172 
01177 class Dump7ZipOutput extends DumpPipeOutput {
01178 
01182     function __construct( $file ) {
01183         $command = $this->setup7zCommand( $file );
01184         parent::__construct( $command );
01185         $this->filename = $file;
01186     }
01187 
01192     function setup7zCommand( $file ) {
01193         $command = "7za a -bd -si " . wfEscapeShellArg( $file );
01194         // Suppress annoying useless crap from p7zip
01195         // Unfortunately this could suppress real error messages too
01196         $command .= ' >' . wfGetNull() . ' 2>&1';
01197         return $command;
01198     }
01199 
01204     function closeAndRename( $newname, $open = false ) {
01205         $newname = $this->checkRenameArgCount( $newname );
01206         if ( $newname ) {
01207             fclose( $this->handle );
01208             proc_close( $this->procOpenResource );
01209             $this->renameOrException( $newname );
01210             if ( $open ) {
01211                 $command = $this->setup7zCommand( $this->filename );
01212                 $this->startCommand( $command );
01213             }
01214         }
01215     }
01216 }
01217 
01224 class DumpFilter {
01225 
01231     public $sink;
01232 
01236     protected $sendingThisPage;
01237 
01241     function __construct( &$sink ) {
01242         $this->sink =& $sink;
01243     }
01244 
01248     function writeOpenStream( $string ) {
01249         $this->sink->writeOpenStream( $string );
01250     }
01251 
01255     function writeCloseStream( $string ) {
01256         $this->sink->writeCloseStream( $string );
01257     }
01258 
01263     function writeOpenPage( $page, $string ) {
01264         $this->sendingThisPage = $this->pass( $page, $string );
01265         if ( $this->sendingThisPage ) {
01266             $this->sink->writeOpenPage( $page, $string );
01267         }
01268     }
01269 
01273     function writeClosePage( $string ) {
01274         if ( $this->sendingThisPage ) {
01275             $this->sink->writeClosePage( $string );
01276             $this->sendingThisPage = false;
01277         }
01278     }
01279 
01284     function writeRevision( $rev, $string ) {
01285         if ( $this->sendingThisPage ) {
01286             $this->sink->writeRevision( $rev, $string );
01287         }
01288     }
01289 
01294     function writeLogItem( $rev, $string ) {
01295         $this->sink->writeRevision( $rev, $string );
01296     }
01297 
01301     function closeRenameAndReopen( $newname ) {
01302         $this->sink->closeRenameAndReopen( $newname );
01303     }
01304 
01309     function closeAndRename( $newname, $open = false ) {
01310         $this->sink->closeAndRename( $newname, $open );
01311     }
01312 
01316     function getFilenames() {
01317         return $this->sink->getFilenames();
01318     }
01319 
01325     function pass( $page ) {
01326         return true;
01327     }
01328 }
01329 
01334 class DumpNotalkFilter extends DumpFilter {
01335 
01340     function pass( $page ) {
01341         return !MWNamespace::isTalk( $page->page_namespace );
01342     }
01343 }
01344 
01349 class DumpNamespaceFilter extends DumpFilter {
01350     var $invert = false;
01351     var $namespaces = array();
01352 
01358     function __construct( &$sink, $param ) {
01359         parent::__construct( $sink );
01360 
01361         $constants = array(
01362             "NS_MAIN"           => NS_MAIN,
01363             "NS_TALK"           => NS_TALK,
01364             "NS_USER"           => NS_USER,
01365             "NS_USER_TALK"      => NS_USER_TALK,
01366             "NS_PROJECT"        => NS_PROJECT,
01367             "NS_PROJECT_TALK"   => NS_PROJECT_TALK,
01368             "NS_FILE"           => NS_FILE,
01369             "NS_FILE_TALK"      => NS_FILE_TALK,
01370             "NS_IMAGE"          => NS_IMAGE, // NS_IMAGE is an alias for NS_FILE
01371             "NS_IMAGE_TALK"     => NS_IMAGE_TALK,
01372             "NS_MEDIAWIKI"      => NS_MEDIAWIKI,
01373             "NS_MEDIAWIKI_TALK" => NS_MEDIAWIKI_TALK,
01374             "NS_TEMPLATE"       => NS_TEMPLATE,
01375             "NS_TEMPLATE_TALK"  => NS_TEMPLATE_TALK,
01376             "NS_HELP"           => NS_HELP,
01377             "NS_HELP_TALK"      => NS_HELP_TALK,
01378             "NS_CATEGORY"       => NS_CATEGORY,
01379             "NS_CATEGORY_TALK"  => NS_CATEGORY_TALK );
01380 
01381         if ( $param { 0 } == '!' ) {
01382             $this->invert = true;
01383             $param = substr( $param, 1 );
01384         }
01385 
01386         foreach ( explode( ',', $param ) as $key ) {
01387             $key = trim( $key );
01388             if ( isset( $constants[$key] ) ) {
01389                 $ns = $constants[$key];
01390                 $this->namespaces[$ns] = true;
01391             } elseif ( is_numeric( $key ) ) {
01392                 $ns = intval( $key );
01393                 $this->namespaces[$ns] = true;
01394             } else {
01395                 throw new MWException( "Unrecognized namespace key '$key'\n" );
01396             }
01397         }
01398     }
01399 
01404     function pass( $page ) {
01405         $match = isset( $this->namespaces[$page->page_namespace] );
01406         return $this->invert xor $match;
01407     }
01408 }
01409 
01414 class DumpLatestFilter extends DumpFilter {
01415     var $page, $pageString, $rev, $revString;
01416 
01421     function writeOpenPage( $page, $string ) {
01422         $this->page = $page;
01423         $this->pageString = $string;
01424     }
01425 
01429     function writeClosePage( $string ) {
01430         if ( $this->rev ) {
01431             $this->sink->writeOpenPage( $this->page, $this->pageString );
01432             $this->sink->writeRevision( $this->rev, $this->revString );
01433             $this->sink->writeClosePage( $string );
01434         }
01435         $this->rev = null;
01436         $this->revString = null;
01437         $this->page = null;
01438         $this->pageString = null;
01439     }
01440 
01445     function writeRevision( $rev, $string ) {
01446         if ( $rev->rev_id == $this->page->page_latest ) {
01447             $this->rev = $rev;
01448             $this->revString = $string;
01449         }
01450     }
01451 }
01452 
01457 class DumpMultiWriter {
01458 
01462     function __construct( $sinks ) {
01463         $this->sinks = $sinks;
01464         $this->count = count( $sinks );
01465     }
01466 
01470     function writeOpenStream( $string ) {
01471         for ( $i = 0; $i < $this->count; $i++ ) {
01472             $this->sinks[$i]->writeOpenStream( $string );
01473         }
01474     }
01475 
01479     function writeCloseStream( $string ) {
01480         for ( $i = 0; $i < $this->count; $i++ ) {
01481             $this->sinks[$i]->writeCloseStream( $string );
01482         }
01483     }
01484 
01489     function writeOpenPage( $page, $string ) {
01490         for ( $i = 0; $i < $this->count; $i++ ) {
01491             $this->sinks[$i]->writeOpenPage( $page, $string );
01492         }
01493     }
01494 
01498     function writeClosePage( $string ) {
01499         for ( $i = 0; $i < $this->count; $i++ ) {
01500             $this->sinks[$i]->writeClosePage( $string );
01501         }
01502     }
01503 
01508     function writeRevision( $rev, $string ) {
01509         for ( $i = 0; $i < $this->count; $i++ ) {
01510             $this->sinks[$i]->writeRevision( $rev, $string );
01511         }
01512     }
01513 
01517     function closeRenameAndReopen( $newnames ) {
01518         $this->closeAndRename( $newnames, true );
01519     }
01520 
01525     function closeAndRename( $newnames, $open = false ) {
01526         for ( $i = 0; $i < $this->count; $i++ ) {
01527             $this->sinks[$i]->closeAndRename( $newnames[$i], $open );
01528         }
01529     }
01530 
01534     function getFilenames() {
01535         $filenames = array();
01536         for ( $i = 0; $i < $this->count; $i++ ) {
01537             $filenames[] = $this->sinks[$i]->getFilenames();
01538         }
01539         return $filenames;
01540     }
01541 
01542 }
01543 
01548 function xmlsafe( $string ) {
01549     wfProfileIn( __FUNCTION__ );
01550 
01556     $string = UtfNormal::cleanUp( $string );
01557 
01558     $string = htmlspecialchars( $string );
01559     wfProfileOut( __FUNCTION__ );
01560     return $string;
01561 }