MediaWiki  REL1_19
Export.php
Go to the documentation of this file.
00001 <?php
00033 class WikiExporter {
00034         var $list_authors = false ; # Return distinct author list (when not returning full history)
00035         var $author_list = "" ;
00036 
00037         var $dumpUploads = false;
00038         var $dumpUploadFileContents = false;
00039 
00040         const FULL = 1;
00041         const CURRENT = 2;
00042         const STABLE = 4; // extension defined
00043         const LOGS = 8;
00044         const RANGE = 16;
00045 
00046         const BUFFER = 0;
00047         const STREAM = 1;
00048 
00049         const TEXT = 0;
00050         const STUB = 1;
00051 
00069         function __construct( &$db, $history = WikiExporter::CURRENT,
00070                         $buffer = WikiExporter::BUFFER, $text = WikiExporter::TEXT ) {
00071                 $this->db =& $db;
00072                 $this->history = $history;
00073                 $this->buffer  = $buffer;
00074                 $this->writer  = new XmlDumpWriter();
00075                 $this->sink    = new DumpOutput();
00076                 $this->text    = $text;
00077         }
00078 
00086         public function setOutputSink( &$sink ) {
00087                 $this->sink =& $sink;
00088         }
00089 
00090         public function openStream() {
00091                 $output = $this->writer->openStream();
00092                 $this->sink->writeOpenStream( $output );
00093         }
00094 
00095         public function closeStream() {
00096                 $output = $this->writer->closeStream();
00097                 $this->sink->writeCloseStream( $output );
00098         }
00099 
00105         public function allPages() {
00106                 return $this->dumpFrom( '' );
00107         }
00108 
00116         public function pagesByRange( $start, $end ) {
00117                 $condition = 'page_id >= ' . intval( $start );
00118                 if ( $end ) {
00119                         $condition .= ' AND page_id < ' . intval( $end );
00120                 }
00121                 return $this->dumpFrom( $condition );
00122         }
00123 
00131         public function revsByRange( $start, $end ) {
00132                 $condition = 'rev_id >= ' . intval( $start );
00133                 if ( $end ) {
00134                         $condition .= ' AND rev_id < ' . intval( $end );
00135                 }
00136                 return $this->dumpFrom( $condition );
00137         }
00138 
00142         public function pageByTitle( $title ) {
00143                 return $this->dumpFrom(
00144                         'page_namespace=' . $title->getNamespace() .
00145                         ' AND page_title=' . $this->db->addQuotes( $title->getDBkey() ) );
00146         }
00147 
00148         public function pageByName( $name ) {
00149                 $title = Title::newFromText( $name );
00150                 if ( is_null( $title ) ) {
00151                         throw new MWException( "Can't export invalid title" );
00152                 } else {
00153                         return $this->pageByTitle( $title );
00154                 }
00155         }
00156 
00157         public function pagesByName( $names ) {
00158                 foreach ( $names as $name ) {
00159                         $this->pageByName( $name );
00160                 }
00161         }
00162 
00163         public function allLogs() {
00164                 return $this->dumpFrom( '' );
00165         }
00166 
00167         public function logsByRange( $start, $end ) {
00168                 $condition = 'log_id >= ' . intval( $start );
00169                 if ( $end ) {
00170                         $condition .= ' AND log_id < ' . intval( $end );
00171                 }
00172                 return $this->dumpFrom( $condition );
00173         }
00174 
00175         # Generates the distinct list of authors of an article
00176         # Not called by default (depends on $this->list_authors)
00177         # Can be set by Special:Export when not exporting whole history
00178         protected function do_list_authors( $cond ) {
00179                 wfProfileIn( __METHOD__ );
00180                 $this->author_list = "<contributors>";
00181                 // rev_deleted
00182 
00183                 $res = $this->db->select(
00184                         array( 'page', 'revision' ),
00185                         array( 'DISTINCT rev_user_text', 'rev_user' ),
00186                         array(
00187                                 $this->db->bitAnd( 'rev_deleted', Revision::DELETED_USER ) . ' = 0',
00188                                 $cond,
00189                                 'page_id = rev_id',
00190                         ),
00191                         __METHOD__
00192                 );
00193 
00194                 foreach ( $res as $row ) {
00195                         $this->author_list .= "<contributor>" .
00196                                 "<username>" .
00197                                 htmlentities( $row->rev_user_text )  .
00198                                 "</username>" .
00199                                 "<id>" .
00200                                 $row->rev_user .
00201                                 "</id>" .
00202                                 "</contributor>";
00203                 }
00204                 $this->author_list .= "</contributors>";
00205                 wfProfileOut( __METHOD__ );
00206         }
00207 
00208         protected function dumpFrom( $cond = '' ) {
00209                 wfProfileIn( __METHOD__ );
00210                 # For logging dumps...
00211                 if ( $this->history & self::LOGS ) {
00212                         if ( $this->buffer == WikiExporter::STREAM ) {
00213                                 $prev = $this->db->bufferResults( false );
00214                         }
00215                         $where = array( 'user_id = log_user' );
00216                         # Hide private logs
00217                         $hideLogs = LogEventsList::getExcludeClause( $this->db );
00218                         if ( $hideLogs ) $where[] = $hideLogs;
00219                         # Add on any caller specified conditions
00220                         if ( $cond ) $where[] = $cond;
00221                         # Get logging table name for logging.* clause
00222                         $logging = $this->db->tableName( 'logging' );
00223                         $result = $this->db->select( array( 'logging', 'user' ),
00224                                 array( "{$logging}.*", 'user_name' ), // grab the user name
00225                                 $where,
00226                                 __METHOD__,
00227                                 array( 'ORDER BY' => 'log_id', 'USE INDEX' => array( 'logging' => 'PRIMARY' ) )
00228                         );
00229                         $wrapper = $this->db->resultObject( $result );
00230                         $this->outputLogStream( $wrapper );
00231                         if ( $this->buffer == WikiExporter::STREAM ) {
00232                                 $this->db->bufferResults( $prev );
00233                         }
00234                 # For page dumps...
00235                 } else {
00236                         $tables = array( 'page', 'revision' );
00237                         $opts = array( 'ORDER BY' => 'page_id ASC' );
00238                         $opts['USE INDEX'] = array();
00239                         $join = array();
00240                         if ( is_array( $this->history ) ) {
00241                                 # Time offset/limit for all pages/history...
00242                                 $revJoin = 'page_id=rev_page';
00243                                 # Set time order
00244                                 if ( $this->history['dir'] == 'asc' ) {
00245                                         $op = '>';
00246                                         $opts['ORDER BY'] = 'rev_timestamp ASC';
00247                                 } else {
00248                                         $op = '<';
00249                                         $opts['ORDER BY'] = 'rev_timestamp DESC';
00250                                 }
00251                                 # Set offset
00252                                 if ( !empty( $this->history['offset'] ) ) {
00253                                         $revJoin .= " AND rev_timestamp $op " .
00254                                                 $this->db->addQuotes( $this->db->timestamp( $this->history['offset'] ) );
00255                                 }
00256                                 $join['revision'] = array( 'INNER JOIN', $revJoin );
00257                                 # Set query limit
00258                                 if ( !empty( $this->history['limit'] ) ) {
00259                                         $opts['LIMIT'] = intval( $this->history['limit'] );
00260                                 }
00261                         } elseif ( $this->history & WikiExporter::FULL ) {
00262                                 # Full history dumps...
00263                                 $join['revision'] = array( 'INNER JOIN', 'page_id=rev_page' );
00264                         } elseif ( $this->history & WikiExporter::CURRENT ) {
00265                                 # Latest revision dumps...
00266                                 if ( $this->list_authors && $cond != '' )  { // List authors, if so desired
00267                                         $this->do_list_authors( $cond );
00268                                 }
00269                                 $join['revision'] = array( 'INNER JOIN', 'page_id=rev_page AND page_latest=rev_id' );
00270                         } elseif ( $this->history & WikiExporter::STABLE ) {
00271                                 # "Stable" revision dumps...
00272                                 # Default JOIN, to be overridden...
00273                                 $join['revision'] = array( 'INNER JOIN', 'page_id=rev_page AND page_latest=rev_id' );
00274                                 # One, and only one hook should set this, and return false
00275                                 if ( wfRunHooks( 'WikiExporter::dumpStableQuery', array( &$tables, &$opts, &$join ) ) ) {
00276                                         wfProfileOut( __METHOD__ );
00277                                         throw new MWException( __METHOD__ . " given invalid history dump type." );
00278                                 }
00279                         } elseif ( $this->history & WikiExporter::RANGE ) {
00280                                 # Dump of revisions within a specified range
00281                                 $join['revision'] = array( 'INNER JOIN', 'page_id=rev_page' );
00282                                 $opts['ORDER BY'] = 'rev_page ASC, rev_id ASC';
00283                         } else {
00284                                 # Uknown history specification parameter?
00285                                 wfProfileOut( __METHOD__ );
00286                                 throw new MWException( __METHOD__ . " given invalid history dump type." );
00287                         }
00288                         # Query optimization hacks
00289                         if ( $cond == '' ) {
00290                                 $opts[] = 'STRAIGHT_JOIN';
00291                                 $opts['USE INDEX']['page'] = 'PRIMARY';
00292                         }
00293                         # Build text join options
00294                         if ( $this->text != WikiExporter::STUB ) { // 1-pass
00295                                 $tables[] = 'text';
00296                                 $join['text'] = array( 'INNER JOIN', 'rev_text_id=old_id' );
00297                         }
00298 
00299                         if ( $this->buffer == WikiExporter::STREAM ) {
00300                                 $prev = $this->db->bufferResults( false );
00301                         }
00302 
00303                         wfRunHooks( 'ModifyExportQuery',
00304                                                 array( $this->db, &$tables, &$cond, &$opts, &$join ) );
00305 
00306                         # Do the query!
00307                         $result = $this->db->select( $tables, '*', $cond, __METHOD__, $opts, $join );
00308                         $wrapper = $this->db->resultObject( $result );
00309                         # Output dump results
00310                         $this->outputPageStream( $wrapper );
00311                         if ( $this->list_authors ) {
00312                                 $this->outputPageStream( $wrapper );
00313                         }
00314 
00315                         if ( $this->buffer == WikiExporter::STREAM ) {
00316                                 $this->db->bufferResults( $prev );
00317                         }
00318                 }
00319                 wfProfileOut( __METHOD__ );
00320         }
00321 
00334         protected function outputPageStream( $resultset ) {
00335                 $last = null;
00336                 foreach ( $resultset as $row ) {
00337                         if ( is_null( $last ) ||
00338                                 $last->page_namespace != $row->page_namespace ||
00339                                 $last->page_title     != $row->page_title ) {
00340                                 if ( isset( $last ) ) {
00341                                         $output = '';
00342                                         if ( $this->dumpUploads ) {
00343                                                 $output .= $this->writer->writeUploads( $last, $this->dumpUploadFileContents );
00344                                         }
00345                                         $output .= $this->writer->closePage();
00346                                         $this->sink->writeClosePage( $output );
00347                                 }
00348                                 $output = $this->writer->openPage( $row );
00349                                 $this->sink->writeOpenPage( $row, $output );
00350                                 $last = $row;
00351                         }
00352                         $output = $this->writer->writeRevision( $row );
00353                         $this->sink->writeRevision( $row, $output );
00354                 }
00355                 if ( isset( $last ) ) {
00356                         $output = '';
00357                         if ( $this->dumpUploads ) {
00358                                 $output .= $this->writer->writeUploads( $last, $this->dumpUploadFileContents );
00359                         }
00360                         $output .= $this->author_list;
00361                         $output .= $this->writer->closePage();
00362                         $this->sink->writeClosePage( $output );
00363                 }
00364         }
00365 
00366         protected function outputLogStream( $resultset ) {
00367                 foreach ( $resultset as $row ) {
00368                         $output = $this->writer->writeLogItem( $row );
00369                         $this->sink->writeLogItem( $row, $output );
00370                 }
00371         }
00372 }
00373 
00377 class XmlDumpWriter {
00382         function schemaVersion() {
00383                 return "0.6";
00384         }
00385 
00396         function openStream() {
00397                 global $wgLanguageCode;
00398                 $ver = $this->schemaVersion();
00399                 return Xml::element( 'mediawiki', array(
00400                         'xmlns'              => "http://www.mediawiki.org/xml/export-$ver/",
00401                         'xmlns:xsi'          => "http://www.w3.org/2001/XMLSchema-instance",
00402                         'xsi:schemaLocation' => "http://www.mediawiki.org/xml/export-$ver/ " .
00403                                                 "http://www.mediawiki.org/xml/export-$ver.xsd",
00404                         'version'            => $ver,
00405                         'xml:lang'           => $wgLanguageCode ),
00406                         null ) .
00407                         "\n" .
00408                         $this->siteInfo();
00409         }
00410 
00411         function siteInfo() {
00412                 $info = array(
00413                         $this->sitename(),
00414                         $this->homelink(),
00415                         $this->generator(),
00416                         $this->caseSetting(),
00417                         $this->namespaces() );
00418                 return "  <siteinfo>\n    " .
00419                         implode( "\n    ", $info ) .
00420                         "\n  </siteinfo>\n";
00421         }
00422 
00423         function sitename() {
00424                 global $wgSitename;
00425                 return Xml::element( 'sitename', array(), $wgSitename );
00426         }
00427 
00428         function generator() {
00429                 global $wgVersion;
00430                 return Xml::element( 'generator', array(), "MediaWiki $wgVersion" );
00431         }
00432 
00433         function homelink() {
00434                 return Xml::element( 'base', array(), Title::newMainPage()->getCanonicalUrl() );
00435         }
00436 
00437         function caseSetting() {
00438                 global $wgCapitalLinks;
00439                 // "case-insensitive" option is reserved for future
00440                 $sensitivity = $wgCapitalLinks ? 'first-letter' : 'case-sensitive';
00441                 return Xml::element( 'case', array(), $sensitivity );
00442         }
00443 
00444         function namespaces() {
00445                 global $wgContLang;
00446                 $spaces = "<namespaces>\n";
00447                 foreach ( $wgContLang->getFormattedNamespaces() as $ns => $title ) {
00448                         $spaces .= '      ' .
00449                                 Xml::element( 'namespace',
00450                                         array(  'key' => $ns,
00451                                                         'case' => MWNamespace::isCapitalized( $ns ) ? 'first-letter' : 'case-sensitive',
00452                                         ), $title ) . "\n";
00453                 }
00454                 $spaces .= "    </namespaces>";
00455                 return $spaces;
00456         }
00457 
00464         function closeStream() {
00465                 return "</mediawiki>\n";
00466         }
00467 
00476         function openPage( $row ) {
00477                 $out = "  <page>\n";
00478                 $title = Title::makeTitle( $row->page_namespace, $row->page_title );
00479                 $out .= '    ' . Xml::elementClean( 'title', array(), self::canonicalTitle( $title ) ) . "\n";
00480                 $out .= '    ' . Xml::element( 'ns', array(), strval( $row->page_namespace) ) . "\n";
00481                 $out .= '    ' . Xml::element( 'id', array(), strval( $row->page_id ) ) . "\n";
00482                 if ( $row->page_is_redirect ) {
00483                         $page = WikiPage::factory( $title );
00484                         $redirect = $page->getRedirectTarget();
00485                         if ( $redirect instanceOf Title && $redirect->isValidRedirectTarget() ) {
00486                                 $out .= '    ' . Xml::element( 'redirect', array( 'title' => self::canonicalTitle( $redirect ) ) ) . "\n";
00487                         }
00488                 }
00489                 
00490                 if ( $row->rev_sha1 ) {
00491                         $out .= "      " . Xml::element('sha1', null, strval($row->rev_sha1) ) . "\n";
00492                 } else {
00493                         $out .= "      <sha1/>\n";
00494                 }
00495                 
00496                 if ( $row->page_restrictions != '' ) {
00497                         $out .= '    ' . Xml::element( 'restrictions', array(),
00498                                 strval( $row->page_restrictions ) ) . "\n";
00499                 }
00500 
00501                 wfRunHooks( 'XmlDumpWriterOpenPage', array( $this, &$out, $row, $title ) );
00502 
00503                 return $out;
00504         }
00505 
00511         function closePage() {
00512                 return "  </page>\n";
00513         }
00514 
00523         function writeRevision( $row ) {
00524                 wfProfileIn( __METHOD__ );
00525 
00526                 $out  = "    <revision>\n";
00527                 $out .= "      " . Xml::element( 'id', null, strval( $row->rev_id ) ) . "\n";
00528 
00529                 $out .= $this->writeTimestamp( $row->rev_timestamp );
00530 
00531                 if ( $row->rev_deleted & Revision::DELETED_USER ) {
00532                         $out .= "      " . Xml::element( 'contributor', array( 'deleted' => 'deleted' ) ) . "\n";
00533                 } else {
00534                         $out .= $this->writeContributor( $row->rev_user, $row->rev_user_text );
00535                 }
00536 
00537                 if ( $row->rev_minor_edit ) {
00538                         $out .=  "      <minor/>\n";
00539                 }
00540                 if ( $row->rev_deleted & Revision::DELETED_COMMENT ) {
00541                         $out .= "      " . Xml::element( 'comment', array( 'deleted' => 'deleted' ) ) . "\n";
00542                 } elseif ( $row->rev_comment != '' ) {
00543                         $out .= "      " . Xml::elementClean( 'comment', null, strval( $row->rev_comment ) ) . "\n";
00544                 }
00545 
00546                 $text = '';
00547                 if ( $row->rev_deleted & Revision::DELETED_TEXT ) {
00548                         $out .= "      " . Xml::element( 'text', array( 'deleted' => 'deleted' ) ) . "\n";
00549                 } elseif ( isset( $row->old_text ) ) {
00550                         // Raw text from the database may have invalid chars
00551                         $text = strval( Revision::getRevisionText( $row ) );
00552                         $out .= "      " . Xml::elementClean( 'text',
00553                                 array( 'xml:space' => 'preserve', 'bytes' => intval( $row->rev_len ) ),
00554                                 strval( $text ) ) . "\n";
00555                 } else {
00556                         // Stub output
00557                         $out .= "      " . Xml::element( 'text',
00558                                 array( 'id' => $row->rev_text_id, 'bytes' => intval( $row->rev_len ) ),
00559                                 "" ) . "\n";
00560                 }
00561 
00562                 wfRunHooks( 'XmlDumpWriterWriteRevision', array( &$this, &$out, $row, $text ) );
00563 
00564                 $out .= "    </revision>\n";
00565 
00566                 wfProfileOut( __METHOD__ );
00567                 return $out;
00568         }
00569 
00578         function writeLogItem( $row ) {
00579                 wfProfileIn( __METHOD__ );
00580 
00581                 $out  = "    <logitem>\n";
00582                 $out .= "      " . Xml::element( 'id', null, strval( $row->log_id ) ) . "\n";
00583 
00584                 $out .= $this->writeTimestamp( $row->log_timestamp );
00585 
00586                 if ( $row->log_deleted & LogPage::DELETED_USER ) {
00587                         $out .= "      " . Xml::element( 'contributor', array( 'deleted' => 'deleted' ) ) . "\n";
00588                 } else {
00589                         $out .= $this->writeContributor( $row->log_user, $row->user_name );
00590                 }
00591 
00592                 if ( $row->log_deleted & LogPage::DELETED_COMMENT ) {
00593                         $out .= "      " . Xml::element( 'comment', array( 'deleted' => 'deleted' ) ) . "\n";
00594                 } elseif ( $row->log_comment != '' ) {
00595                         $out .= "      " . Xml::elementClean( 'comment', null, strval( $row->log_comment ) ) . "\n";
00596                 }
00597 
00598                 $out .= "      " . Xml::element( 'type', null, strval( $row->log_type ) ) . "\n";
00599                 $out .= "      " . Xml::element( 'action', null, strval( $row->log_action ) ) . "\n";
00600 
00601                 if ( $row->log_deleted & LogPage::DELETED_ACTION ) {
00602                         $out .= "      " . Xml::element( 'text', array( 'deleted' => 'deleted' ) ) . "\n";
00603                 } else {
00604                         $title = Title::makeTitle( $row->log_namespace, $row->log_title );
00605                         $out .= "      " . Xml::elementClean( 'logtitle', null, self::canonicalTitle( $title ) ) . "\n";
00606                         $out .= "      " . Xml::elementClean( 'params',
00607                                 array( 'xml:space' => 'preserve' ),
00608                                 strval( $row->log_params ) ) . "\n";
00609                 }
00610 
00611                 $out .= "    </logitem>\n";
00612 
00613                 wfProfileOut( __METHOD__ );
00614                 return $out;
00615         }
00616 
00617         function writeTimestamp( $timestamp ) {
00618                 $ts = wfTimestamp( TS_ISO_8601, $timestamp );
00619                 return "      " . Xml::element( 'timestamp', null, $ts ) . "\n";
00620         }
00621 
00622         function writeContributor( $id, $text ) {
00623                 $out = "      <contributor>\n";
00624                 if ( $id || !IP::isValid( $text ) ) {
00625                         $out .= "        " . Xml::elementClean( 'username', null, strval( $text ) ) . "\n";
00626                         $out .= "        " . Xml::element( 'id', null, strval( $id ) ) . "\n";
00627                 } else {
00628                         $out .= "        " . Xml::elementClean( 'ip', null, strval( $text ) ) . "\n";
00629                 }
00630                 $out .= "      </contributor>\n";
00631                 return $out;
00632         }
00633 
00637         function writeUploads( $row, $dumpContents = false ) {
00638                 if ( $row->page_namespace == NS_IMAGE ) {
00639                         $img = wfLocalFile( $row->page_title );
00640                         if ( $img && $img->exists() ) {
00641                                 $out = '';
00642                                 foreach ( array_reverse( $img->getHistory() ) as $ver ) {
00643                                         $out .= $this->writeUpload( $ver, $dumpContents );
00644                                 }
00645                                 $out .= $this->writeUpload( $img, $dumpContents );
00646                                 return $out;
00647                         }
00648                 }
00649                 return '';
00650         }
00651 
00657         function writeUpload( $file, $dumpContents = false ) {
00658                 if ( $file->isOld() ) {
00659                         $archiveName = "      " .
00660                                 Xml::element( 'archivename', null, $file->getArchiveName() ) . "\n";
00661                 } else {
00662                         $archiveName = '';
00663                 }
00664                 if ( $dumpContents ) {
00665                         # Dump file as base64
00666                         # Uses only XML-safe characters, so does not need escaping
00667                         $contents = '      <contents encoding="base64">' .
00668                                 chunk_split( base64_encode( file_get_contents( $file->getPath() ) ) ) .
00669                                 "      </contents>\n";
00670                 } else {
00671                         $contents = '';
00672                 }
00673                 return "    <upload>\n" .
00674                         $this->writeTimestamp( $file->getTimestamp() ) .
00675                         $this->writeContributor( $file->getUser( 'id' ), $file->getUser( 'text' ) ) .
00676                         "      " . Xml::elementClean( 'comment', null, $file->getDescription() ) . "\n" .
00677                         "      " . Xml::element( 'filename', null, $file->getName() ) . "\n" .
00678                         $archiveName .
00679                         "      " . Xml::element( 'src', null, $file->getCanonicalUrl() ) . "\n" .
00680                         "      " . Xml::element( 'size', null, $file->getSize() ) . "\n" .
00681                         "      " . Xml::element( 'sha1base36', null, $file->getSha1() ) . "\n" .
00682                         "      " . Xml::element( 'rel', null, $file->getRel() ) . "\n" .
00683                         $contents .
00684                         "    </upload>\n";
00685         }
00686 
00697         public static function canonicalTitle( Title $title ) {
00698                 if ( $title->getInterwiki() ) {
00699                         return $title->getPrefixedText();
00700                 }
00701 
00702                 global $wgContLang;
00703                 $prefix = str_replace( '_', ' ', $wgContLang->getNsText( $title->getNamespace() ) );
00704 
00705                 if ( $prefix !== '' ) {
00706                         $prefix .= ':';
00707                 }
00708 
00709                 return $prefix . $title->getText();
00710         }
00711 }
00712 
00713 
00718 class DumpOutput {
00719         function writeOpenStream( $string ) {
00720                 $this->write( $string );
00721         }
00722 
00723         function writeCloseStream( $string ) {
00724                 $this->write( $string );
00725         }
00726 
00727         function writeOpenPage( $page, $string ) {
00728                 $this->write( $string );
00729         }
00730 
00731         function writeClosePage( $string ) {
00732                 $this->write( $string );
00733         }
00734 
00735         function writeRevision( $rev, $string ) {
00736                 $this->write( $string );
00737         }
00738 
00739         function writeLogItem( $rev, $string ) {
00740                 $this->write( $string );
00741         }
00742 
00747         function write( $string ) {
00748                 print $string;
00749         }
00750 
00758         function closeRenameAndReopen( $newname ) {
00759                 return;
00760         }
00761 
00769         function closeAndRename( $newname, $open = false ) {
00770                 return;
00771         }
00772 
00777         function getFilenames() {
00778                 return NULL;
00779         }
00780 }
00781 
00786 class DumpFileOutput extends DumpOutput {
00787         protected $handle, $filename;
00788 
00789         function __construct( $file ) {
00790                 $this->handle = fopen( $file, "wt" );
00791                 $this->filename = $file;
00792         }
00793 
00794         function write( $string ) {
00795                 fputs( $this->handle, $string );
00796         }
00797 
00798         function closeRenameAndReopen( $newname ) {
00799                 $this->closeAndRename( $newname, true );
00800         }
00801 
00802         function renameOrException( $newname ) {
00803                         if (! rename( $this->filename, $newname ) ) {
00804                                 throw new MWException( __METHOD__ . ": rename of file {$this->filename} to $newname failed\n" );
00805                         }
00806         }
00807 
00808         function checkRenameArgCount( $newname ) {
00809                 if ( is_array( $newname ) ) {
00810                         if ( count( $newname ) > 1 ) {
00811                                 throw new MWException( __METHOD__ . ": passed multiple arguments for rename of single file\n" );
00812                         } else {
00813                                 $newname = $newname[0];
00814                         }
00815                 }
00816                 return $newname;
00817         }
00818 
00819         function closeAndRename( $newname, $open = false ) {
00820                 $newname = $this->checkRenameArgCount( $newname );
00821                 if ( $newname ) {
00822                         fclose( $this->handle );
00823                         $this->renameOrException( $newname );
00824                         if ( $open ) {
00825                                 $this->handle = fopen( $this->filename, "wt" );
00826                         }
00827                 }
00828         }
00829 
00830         function getFilenames() {
00831                 return $this->filename;
00832         }
00833 }
00834 
00841 class DumpPipeOutput extends DumpFileOutput {
00842         protected $command, $filename;
00843 
00844         function __construct( $command, $file = null ) {
00845                 if ( !is_null( $file ) ) {
00846                         $command .=  " > " . wfEscapeShellArg( $file );
00847                 }
00848 
00849                 $this->startCommand( $command );
00850                 $this->command = $command;
00851                 $this->filename = $file;
00852         }
00853 
00854         function startCommand( $command ) {
00855                 $spec = array(
00856                         0 => array( "pipe", "r" ),
00857                 );
00858                 $pipes = array();
00859                 $this->procOpenResource = proc_open( $command, $spec, $pipes );
00860                 $this->handle = $pipes[0];
00861         }
00862 
00863         function closeRenameAndReopen( $newname ) {
00864                 $this->closeAndRename( $newname, true );
00865         }
00866 
00867         function closeAndRename( $newname, $open = false ) {
00868                 $newname = $this->checkRenameArgCount( $newname );
00869                 if ( $newname ) {
00870                         fclose( $this->handle );
00871                         proc_close( $this->procOpenResource );
00872                         $this->renameOrException( $newname );
00873                         if ( $open ) {
00874                                 $command = $this->command;
00875                                 $command .=  " > " . wfEscapeShellArg( $this->filename );
00876                                 $this->startCommand( $command );
00877                         }
00878                 }
00879         }
00880 
00881 }
00882 
00887 class DumpGZipOutput extends DumpPipeOutput {
00888         function __construct( $file ) {
00889                 parent::__construct( "gzip", $file );
00890         }
00891 }
00892 
00897 class DumpBZip2Output extends DumpPipeOutput {
00898         function __construct( $file ) {
00899                 parent::__construct( "bzip2", $file );
00900         }
00901 }
00902 
00907 class Dump7ZipOutput extends DumpPipeOutput {
00908         function __construct( $file ) {
00909                 $command = $this->setup7zCommand( $file );
00910                 parent::__construct( $command );
00911                 $this->filename = $file;
00912         }
00913 
00914         function setup7zCommand( $file ) {
00915                 $command = "7za a -bd -si " . wfEscapeShellArg( $file );
00916                 // Suppress annoying useless crap from p7zip
00917                 // Unfortunately this could suppress real error messages too
00918                 $command .= ' >' . wfGetNull() . ' 2>&1';
00919                 return( $command );
00920         }
00921 
00922         function closeAndRename( $newname, $open = false ) {
00923                 $newname = $this->checkRenameArgCount( $newname );
00924                 if ( $newname ) {
00925                         fclose( $this->handle );
00926                         proc_close( $this->procOpenResource );
00927                         $this->renameOrException( $newname );
00928                         if ( $open ) {
00929                                 $command = $this->setup7zCommand( $this->filename );
00930                                 $this->startCommand( $command );
00931                         }
00932                 }
00933         }
00934 }
00935 
00936 
00937 
00944 class DumpFilter {
00945         function __construct( &$sink ) {
00946                 $this->sink =& $sink;
00947         }
00948 
00949         function writeOpenStream( $string ) {
00950                 $this->sink->writeOpenStream( $string );
00951         }
00952 
00953         function writeCloseStream( $string ) {
00954                 $this->sink->writeCloseStream( $string );
00955         }
00956 
00957         function writeOpenPage( $page, $string ) {
00958                 $this->sendingThisPage = $this->pass( $page, $string );
00959                 if ( $this->sendingThisPage ) {
00960                         $this->sink->writeOpenPage( $page, $string );
00961                 }
00962         }
00963 
00964         function writeClosePage( $string ) {
00965                 if ( $this->sendingThisPage ) {
00966                         $this->sink->writeClosePage( $string );
00967                         $this->sendingThisPage = false;
00968                 }
00969         }
00970 
00971         function writeRevision( $rev, $string ) {
00972                 if ( $this->sendingThisPage ) {
00973                         $this->sink->writeRevision( $rev, $string );
00974                 }
00975         }
00976 
00977         function writeLogItem( $rev, $string ) {
00978                 $this->sink->writeRevision( $rev, $string );
00979         }
00980 
00981         function closeRenameAndReopen( $newname ) {
00982                 $this->sink->closeRenameAndReopen( $newname );
00983         }
00984 
00985         function closeAndRename( $newname, $open = false ) {
00986                 $this->sink->closeAndRename( $newname, $open );
00987         }
00988 
00989         function getFilenames() {
00990                 return $this->sink->getFilenames();
00991         }
00992 
00997         function pass( $page ) {
00998                 return true;
00999         }
01000 }
01001 
01006 class DumpNotalkFilter extends DumpFilter {
01007         function pass( $page ) {
01008                 return !MWNamespace::isTalk( $page->page_namespace );
01009         }
01010 }
01011 
01016 class DumpNamespaceFilter extends DumpFilter {
01017         var $invert = false;
01018         var $namespaces = array();
01019 
01020         function __construct( &$sink, $param ) {
01021                 parent::__construct( $sink );
01022 
01023                 $constants = array(
01024                         "NS_MAIN"           => NS_MAIN,
01025                         "NS_TALK"           => NS_TALK,
01026                         "NS_USER"           => NS_USER,
01027                         "NS_USER_TALK"      => NS_USER_TALK,
01028                         "NS_PROJECT"        => NS_PROJECT,
01029                         "NS_PROJECT_TALK"   => NS_PROJECT_TALK,
01030                         "NS_FILE"           => NS_FILE,
01031                         "NS_FILE_TALK"      => NS_FILE_TALK,
01032                         "NS_IMAGE"          => NS_IMAGE,  // NS_IMAGE is an alias for NS_FILE
01033                         "NS_IMAGE_TALK"     => NS_IMAGE_TALK,
01034                         "NS_MEDIAWIKI"      => NS_MEDIAWIKI,
01035                         "NS_MEDIAWIKI_TALK" => NS_MEDIAWIKI_TALK,
01036                         "NS_TEMPLATE"       => NS_TEMPLATE,
01037                         "NS_TEMPLATE_TALK"  => NS_TEMPLATE_TALK,
01038                         "NS_HELP"           => NS_HELP,
01039                         "NS_HELP_TALK"      => NS_HELP_TALK,
01040                         "NS_CATEGORY"       => NS_CATEGORY,
01041                         "NS_CATEGORY_TALK"  => NS_CATEGORY_TALK );
01042 
01043                 if ( $param { 0 } == '!' ) {
01044                         $this->invert = true;
01045                         $param = substr( $param, 1 );
01046                 }
01047 
01048                 foreach ( explode( ',', $param ) as $key ) {
01049                         $key = trim( $key );
01050                         if ( isset( $constants[$key] ) ) {
01051                                 $ns = $constants[$key];
01052                                 $this->namespaces[$ns] = true;
01053                         } elseif ( is_numeric( $key ) ) {
01054                                 $ns = intval( $key );
01055                                 $this->namespaces[$ns] = true;
01056                         } else {
01057                                 throw new MWException( "Unrecognized namespace key '$key'\n" );
01058                         }
01059                 }
01060         }
01061 
01062         function pass( $page ) {
01063                 $match = isset( $this->namespaces[$page->page_namespace] );
01064                 return $this->invert xor $match;
01065         }
01066 }
01067 
01068 
01073 class DumpLatestFilter extends DumpFilter {
01074         var $page, $pageString, $rev, $revString;
01075 
01076         function writeOpenPage( $page, $string ) {
01077                 $this->page = $page;
01078                 $this->pageString = $string;
01079         }
01080 
01081         function writeClosePage( $string ) {
01082                 if ( $this->rev ) {
01083                         $this->sink->writeOpenPage( $this->page, $this->pageString );
01084                         $this->sink->writeRevision( $this->rev, $this->revString );
01085                         $this->sink->writeClosePage( $string );
01086                 }
01087                 $this->rev = null;
01088                 $this->revString = null;
01089                 $this->page = null;
01090                 $this->pageString = null;
01091         }
01092 
01093         function writeRevision( $rev, $string ) {
01094                 if ( $rev->rev_id == $this->page->page_latest ) {
01095                         $this->rev = $rev;
01096                         $this->revString = $string;
01097                 }
01098         }
01099 }
01100 
01105 class DumpMultiWriter {
01106         function __construct( $sinks ) {
01107                 $this->sinks = $sinks;
01108                 $this->count = count( $sinks );
01109         }
01110 
01111         function writeOpenStream( $string ) {
01112                 for ( $i = 0; $i < $this->count; $i++ ) {
01113                         $this->sinks[$i]->writeOpenStream( $string );
01114                 }
01115         }
01116 
01117         function writeCloseStream( $string ) {
01118                 for ( $i = 0; $i < $this->count; $i++ ) {
01119                         $this->sinks[$i]->writeCloseStream( $string );
01120                 }
01121         }
01122 
01123         function writeOpenPage( $page, $string ) {
01124                 for ( $i = 0; $i < $this->count; $i++ ) {
01125                         $this->sinks[$i]->writeOpenPage( $page, $string );
01126                 }
01127         }
01128 
01129         function writeClosePage( $string ) {
01130                 for ( $i = 0; $i < $this->count; $i++ ) {
01131                         $this->sinks[$i]->writeClosePage( $string );
01132                 }
01133         }
01134 
01135         function writeRevision( $rev, $string ) {
01136                 for ( $i = 0; $i < $this->count; $i++ ) {
01137                         $this->sinks[$i]->writeRevision( $rev, $string );
01138                 }
01139         }
01140 
01141         function closeRenameAndReopen( $newnames ) {
01142                 $this->closeAndRename( $newnames, true );
01143         }
01144 
01145         function closeAndRename( $newnames, $open = false ) {
01146                 for ( $i = 0; $i < $this->count; $i++ ) {
01147                         $this->sinks[$i]->closeAndRename( $newnames[$i], $open );
01148                 }
01149         }
01150 
01151         function getFilenames() {
01152                 $filenames = array();
01153                 for ( $i = 0; $i < $this->count; $i++ ) {
01154                         $filenames[] =  $this->sinks[$i]->getFilenames();
01155                 }
01156                 return $filenames;
01157         }
01158 
01159 }
01160 
01161 function xmlsafe( $string ) {
01162         wfProfileIn( __FUNCTION__ );
01163 
01169         $string = UtfNormal::cleanUp( $string );
01170 
01171         $string = htmlspecialchars( $string );
01172         wfProfileOut( __FUNCTION__ );
01173         return $string;
01174 }