MediaWiki
REL1_19
|
00001 <?php 00033 class WikiExporter { 00034 var $list_authors = false ; # Return distinct author list (when not returning full history) 00035 var $author_list = "" ; 00036 00037 var $dumpUploads = false; 00038 var $dumpUploadFileContents = false; 00039 00040 const FULL = 1; 00041 const CURRENT = 2; 00042 const STABLE = 4; // extension defined 00043 const LOGS = 8; 00044 const RANGE = 16; 00045 00046 const BUFFER = 0; 00047 const STREAM = 1; 00048 00049 const TEXT = 0; 00050 const STUB = 1; 00051 00069 function __construct( &$db, $history = WikiExporter::CURRENT, 00070 $buffer = WikiExporter::BUFFER, $text = WikiExporter::TEXT ) { 00071 $this->db =& $db; 00072 $this->history = $history; 00073 $this->buffer = $buffer; 00074 $this->writer = new XmlDumpWriter(); 00075 $this->sink = new DumpOutput(); 00076 $this->text = $text; 00077 } 00078 00086 public function setOutputSink( &$sink ) { 00087 $this->sink =& $sink; 00088 } 00089 00090 public function openStream() { 00091 $output = $this->writer->openStream(); 00092 $this->sink->writeOpenStream( $output ); 00093 } 00094 00095 public function closeStream() { 00096 $output = $this->writer->closeStream(); 00097 $this->sink->writeCloseStream( $output ); 00098 } 00099 00105 public function allPages() { 00106 return $this->dumpFrom( '' ); 00107 } 00108 00116 public function pagesByRange( $start, $end ) { 00117 $condition = 'page_id >= ' . intval( $start ); 00118 if ( $end ) { 00119 $condition .= ' AND page_id < ' . intval( $end ); 00120 } 00121 return $this->dumpFrom( $condition ); 00122 } 00123 00131 public function revsByRange( $start, $end ) { 00132 $condition = 'rev_id >= ' . intval( $start ); 00133 if ( $end ) { 00134 $condition .= ' AND rev_id < ' . intval( $end ); 00135 } 00136 return $this->dumpFrom( $condition ); 00137 } 00138 00142 public function pageByTitle( $title ) { 00143 return $this->dumpFrom( 00144 'page_namespace=' . $title->getNamespace() . 00145 ' AND page_title=' . $this->db->addQuotes( $title->getDBkey() ) ); 00146 } 00147 00148 public function pageByName( $name ) { 00149 $title = Title::newFromText( $name ); 00150 if ( is_null( $title ) ) { 00151 throw new MWException( "Can't export invalid title" ); 00152 } else { 00153 return $this->pageByTitle( $title ); 00154 } 00155 } 00156 00157 public function pagesByName( $names ) { 00158 foreach ( $names as $name ) { 00159 $this->pageByName( $name ); 00160 } 00161 } 00162 00163 public function allLogs() { 00164 return $this->dumpFrom( '' ); 00165 } 00166 00167 public function logsByRange( $start, $end ) { 00168 $condition = 'log_id >= ' . intval( $start ); 00169 if ( $end ) { 00170 $condition .= ' AND log_id < ' . intval( $end ); 00171 } 00172 return $this->dumpFrom( $condition ); 00173 } 00174 00175 # Generates the distinct list of authors of an article 00176 # Not called by default (depends on $this->list_authors) 00177 # Can be set by Special:Export when not exporting whole history 00178 protected function do_list_authors( $cond ) { 00179 wfProfileIn( __METHOD__ ); 00180 $this->author_list = "<contributors>"; 00181 // rev_deleted 00182 00183 $res = $this->db->select( 00184 array( 'page', 'revision' ), 00185 array( 'DISTINCT rev_user_text', 'rev_user' ), 00186 array( 00187 $this->db->bitAnd( 'rev_deleted', Revision::DELETED_USER ) . ' = 0', 00188 $cond, 00189 'page_id = rev_id', 00190 ), 00191 __METHOD__ 00192 ); 00193 00194 foreach ( $res as $row ) { 00195 $this->author_list .= "<contributor>" . 00196 "<username>" . 00197 htmlentities( $row->rev_user_text ) . 00198 "</username>" . 00199 "<id>" . 00200 $row->rev_user . 00201 "</id>" . 00202 "</contributor>"; 00203 } 00204 $this->author_list .= "</contributors>"; 00205 wfProfileOut( __METHOD__ ); 00206 } 00207 00208 protected function dumpFrom( $cond = '' ) { 00209 wfProfileIn( __METHOD__ ); 00210 # For logging dumps... 00211 if ( $this->history & self::LOGS ) { 00212 if ( $this->buffer == WikiExporter::STREAM ) { 00213 $prev = $this->db->bufferResults( false ); 00214 } 00215 $where = array( 'user_id = log_user' ); 00216 # Hide private logs 00217 $hideLogs = LogEventsList::getExcludeClause( $this->db ); 00218 if ( $hideLogs ) $where[] = $hideLogs; 00219 # Add on any caller specified conditions 00220 if ( $cond ) $where[] = $cond; 00221 # Get logging table name for logging.* clause 00222 $logging = $this->db->tableName( 'logging' ); 00223 $result = $this->db->select( array( 'logging', 'user' ), 00224 array( "{$logging}.*", 'user_name' ), // grab the user name 00225 $where, 00226 __METHOD__, 00227 array( 'ORDER BY' => 'log_id', 'USE INDEX' => array( 'logging' => 'PRIMARY' ) ) 00228 ); 00229 $wrapper = $this->db->resultObject( $result ); 00230 $this->outputLogStream( $wrapper ); 00231 if ( $this->buffer == WikiExporter::STREAM ) { 00232 $this->db->bufferResults( $prev ); 00233 } 00234 # For page dumps... 00235 } else { 00236 $tables = array( 'page', 'revision' ); 00237 $opts = array( 'ORDER BY' => 'page_id ASC' ); 00238 $opts['USE INDEX'] = array(); 00239 $join = array(); 00240 if ( is_array( $this->history ) ) { 00241 # Time offset/limit for all pages/history... 00242 $revJoin = 'page_id=rev_page'; 00243 # Set time order 00244 if ( $this->history['dir'] == 'asc' ) { 00245 $op = '>'; 00246 $opts['ORDER BY'] = 'rev_timestamp ASC'; 00247 } else { 00248 $op = '<'; 00249 $opts['ORDER BY'] = 'rev_timestamp DESC'; 00250 } 00251 # Set offset 00252 if ( !empty( $this->history['offset'] ) ) { 00253 $revJoin .= " AND rev_timestamp $op " . 00254 $this->db->addQuotes( $this->db->timestamp( $this->history['offset'] ) ); 00255 } 00256 $join['revision'] = array( 'INNER JOIN', $revJoin ); 00257 # Set query limit 00258 if ( !empty( $this->history['limit'] ) ) { 00259 $opts['LIMIT'] = intval( $this->history['limit'] ); 00260 } 00261 } elseif ( $this->history & WikiExporter::FULL ) { 00262 # Full history dumps... 00263 $join['revision'] = array( 'INNER JOIN', 'page_id=rev_page' ); 00264 } elseif ( $this->history & WikiExporter::CURRENT ) { 00265 # Latest revision dumps... 00266 if ( $this->list_authors && $cond != '' ) { // List authors, if so desired 00267 $this->do_list_authors( $cond ); 00268 } 00269 $join['revision'] = array( 'INNER JOIN', 'page_id=rev_page AND page_latest=rev_id' ); 00270 } elseif ( $this->history & WikiExporter::STABLE ) { 00271 # "Stable" revision dumps... 00272 # Default JOIN, to be overridden... 00273 $join['revision'] = array( 'INNER JOIN', 'page_id=rev_page AND page_latest=rev_id' ); 00274 # One, and only one hook should set this, and return false 00275 if ( wfRunHooks( 'WikiExporter::dumpStableQuery', array( &$tables, &$opts, &$join ) ) ) { 00276 wfProfileOut( __METHOD__ ); 00277 throw new MWException( __METHOD__ . " given invalid history dump type." ); 00278 } 00279 } elseif ( $this->history & WikiExporter::RANGE ) { 00280 # Dump of revisions within a specified range 00281 $join['revision'] = array( 'INNER JOIN', 'page_id=rev_page' ); 00282 $opts['ORDER BY'] = 'rev_page ASC, rev_id ASC'; 00283 } else { 00284 # Uknown history specification parameter? 00285 wfProfileOut( __METHOD__ ); 00286 throw new MWException( __METHOD__ . " given invalid history dump type." ); 00287 } 00288 # Query optimization hacks 00289 if ( $cond == '' ) { 00290 $opts[] = 'STRAIGHT_JOIN'; 00291 $opts['USE INDEX']['page'] = 'PRIMARY'; 00292 } 00293 # Build text join options 00294 if ( $this->text != WikiExporter::STUB ) { // 1-pass 00295 $tables[] = 'text'; 00296 $join['text'] = array( 'INNER JOIN', 'rev_text_id=old_id' ); 00297 } 00298 00299 if ( $this->buffer == WikiExporter::STREAM ) { 00300 $prev = $this->db->bufferResults( false ); 00301 } 00302 00303 wfRunHooks( 'ModifyExportQuery', 00304 array( $this->db, &$tables, &$cond, &$opts, &$join ) ); 00305 00306 # Do the query! 00307 $result = $this->db->select( $tables, '*', $cond, __METHOD__, $opts, $join ); 00308 $wrapper = $this->db->resultObject( $result ); 00309 # Output dump results 00310 $this->outputPageStream( $wrapper ); 00311 if ( $this->list_authors ) { 00312 $this->outputPageStream( $wrapper ); 00313 } 00314 00315 if ( $this->buffer == WikiExporter::STREAM ) { 00316 $this->db->bufferResults( $prev ); 00317 } 00318 } 00319 wfProfileOut( __METHOD__ ); 00320 } 00321 00334 protected function outputPageStream( $resultset ) { 00335 $last = null; 00336 foreach ( $resultset as $row ) { 00337 if ( is_null( $last ) || 00338 $last->page_namespace != $row->page_namespace || 00339 $last->page_title != $row->page_title ) { 00340 if ( isset( $last ) ) { 00341 $output = ''; 00342 if ( $this->dumpUploads ) { 00343 $output .= $this->writer->writeUploads( $last, $this->dumpUploadFileContents ); 00344 } 00345 $output .= $this->writer->closePage(); 00346 $this->sink->writeClosePage( $output ); 00347 } 00348 $output = $this->writer->openPage( $row ); 00349 $this->sink->writeOpenPage( $row, $output ); 00350 $last = $row; 00351 } 00352 $output = $this->writer->writeRevision( $row ); 00353 $this->sink->writeRevision( $row, $output ); 00354 } 00355 if ( isset( $last ) ) { 00356 $output = ''; 00357 if ( $this->dumpUploads ) { 00358 $output .= $this->writer->writeUploads( $last, $this->dumpUploadFileContents ); 00359 } 00360 $output .= $this->author_list; 00361 $output .= $this->writer->closePage(); 00362 $this->sink->writeClosePage( $output ); 00363 } 00364 } 00365 00366 protected function outputLogStream( $resultset ) { 00367 foreach ( $resultset as $row ) { 00368 $output = $this->writer->writeLogItem( $row ); 00369 $this->sink->writeLogItem( $row, $output ); 00370 } 00371 } 00372 } 00373 00377 class XmlDumpWriter { 00382 function schemaVersion() { 00383 return "0.6"; 00384 } 00385 00396 function openStream() { 00397 global $wgLanguageCode; 00398 $ver = $this->schemaVersion(); 00399 return Xml::element( 'mediawiki', array( 00400 'xmlns' => "http://www.mediawiki.org/xml/export-$ver/", 00401 'xmlns:xsi' => "http://www.w3.org/2001/XMLSchema-instance", 00402 'xsi:schemaLocation' => "http://www.mediawiki.org/xml/export-$ver/ " . 00403 "http://www.mediawiki.org/xml/export-$ver.xsd", 00404 'version' => $ver, 00405 'xml:lang' => $wgLanguageCode ), 00406 null ) . 00407 "\n" . 00408 $this->siteInfo(); 00409 } 00410 00411 function siteInfo() { 00412 $info = array( 00413 $this->sitename(), 00414 $this->homelink(), 00415 $this->generator(), 00416 $this->caseSetting(), 00417 $this->namespaces() ); 00418 return " <siteinfo>\n " . 00419 implode( "\n ", $info ) . 00420 "\n </siteinfo>\n"; 00421 } 00422 00423 function sitename() { 00424 global $wgSitename; 00425 return Xml::element( 'sitename', array(), $wgSitename ); 00426 } 00427 00428 function generator() { 00429 global $wgVersion; 00430 return Xml::element( 'generator', array(), "MediaWiki $wgVersion" ); 00431 } 00432 00433 function homelink() { 00434 return Xml::element( 'base', array(), Title::newMainPage()->getCanonicalUrl() ); 00435 } 00436 00437 function caseSetting() { 00438 global $wgCapitalLinks; 00439 // "case-insensitive" option is reserved for future 00440 $sensitivity = $wgCapitalLinks ? 'first-letter' : 'case-sensitive'; 00441 return Xml::element( 'case', array(), $sensitivity ); 00442 } 00443 00444 function namespaces() { 00445 global $wgContLang; 00446 $spaces = "<namespaces>\n"; 00447 foreach ( $wgContLang->getFormattedNamespaces() as $ns => $title ) { 00448 $spaces .= ' ' . 00449 Xml::element( 'namespace', 00450 array( 'key' => $ns, 00451 'case' => MWNamespace::isCapitalized( $ns ) ? 'first-letter' : 'case-sensitive', 00452 ), $title ) . "\n"; 00453 } 00454 $spaces .= " </namespaces>"; 00455 return $spaces; 00456 } 00457 00464 function closeStream() { 00465 return "</mediawiki>\n"; 00466 } 00467 00476 function openPage( $row ) { 00477 $out = " <page>\n"; 00478 $title = Title::makeTitle( $row->page_namespace, $row->page_title ); 00479 $out .= ' ' . Xml::elementClean( 'title', array(), self::canonicalTitle( $title ) ) . "\n"; 00480 $out .= ' ' . Xml::element( 'ns', array(), strval( $row->page_namespace) ) . "\n"; 00481 $out .= ' ' . Xml::element( 'id', array(), strval( $row->page_id ) ) . "\n"; 00482 if ( $row->page_is_redirect ) { 00483 $page = WikiPage::factory( $title ); 00484 $redirect = $page->getRedirectTarget(); 00485 if ( $redirect instanceOf Title && $redirect->isValidRedirectTarget() ) { 00486 $out .= ' ' . Xml::element( 'redirect', array( 'title' => self::canonicalTitle( $redirect ) ) ) . "\n"; 00487 } 00488 } 00489 00490 if ( $row->rev_sha1 ) { 00491 $out .= " " . Xml::element('sha1', null, strval($row->rev_sha1) ) . "\n"; 00492 } else { 00493 $out .= " <sha1/>\n"; 00494 } 00495 00496 if ( $row->page_restrictions != '' ) { 00497 $out .= ' ' . Xml::element( 'restrictions', array(), 00498 strval( $row->page_restrictions ) ) . "\n"; 00499 } 00500 00501 wfRunHooks( 'XmlDumpWriterOpenPage', array( $this, &$out, $row, $title ) ); 00502 00503 return $out; 00504 } 00505 00511 function closePage() { 00512 return " </page>\n"; 00513 } 00514 00523 function writeRevision( $row ) { 00524 wfProfileIn( __METHOD__ ); 00525 00526 $out = " <revision>\n"; 00527 $out .= " " . Xml::element( 'id', null, strval( $row->rev_id ) ) . "\n"; 00528 00529 $out .= $this->writeTimestamp( $row->rev_timestamp ); 00530 00531 if ( $row->rev_deleted & Revision::DELETED_USER ) { 00532 $out .= " " . Xml::element( 'contributor', array( 'deleted' => 'deleted' ) ) . "\n"; 00533 } else { 00534 $out .= $this->writeContributor( $row->rev_user, $row->rev_user_text ); 00535 } 00536 00537 if ( $row->rev_minor_edit ) { 00538 $out .= " <minor/>\n"; 00539 } 00540 if ( $row->rev_deleted & Revision::DELETED_COMMENT ) { 00541 $out .= " " . Xml::element( 'comment', array( 'deleted' => 'deleted' ) ) . "\n"; 00542 } elseif ( $row->rev_comment != '' ) { 00543 $out .= " " . Xml::elementClean( 'comment', null, strval( $row->rev_comment ) ) . "\n"; 00544 } 00545 00546 $text = ''; 00547 if ( $row->rev_deleted & Revision::DELETED_TEXT ) { 00548 $out .= " " . Xml::element( 'text', array( 'deleted' => 'deleted' ) ) . "\n"; 00549 } elseif ( isset( $row->old_text ) ) { 00550 // Raw text from the database may have invalid chars 00551 $text = strval( Revision::getRevisionText( $row ) ); 00552 $out .= " " . Xml::elementClean( 'text', 00553 array( 'xml:space' => 'preserve', 'bytes' => intval( $row->rev_len ) ), 00554 strval( $text ) ) . "\n"; 00555 } else { 00556 // Stub output 00557 $out .= " " . Xml::element( 'text', 00558 array( 'id' => $row->rev_text_id, 'bytes' => intval( $row->rev_len ) ), 00559 "" ) . "\n"; 00560 } 00561 00562 wfRunHooks( 'XmlDumpWriterWriteRevision', array( &$this, &$out, $row, $text ) ); 00563 00564 $out .= " </revision>\n"; 00565 00566 wfProfileOut( __METHOD__ ); 00567 return $out; 00568 } 00569 00578 function writeLogItem( $row ) { 00579 wfProfileIn( __METHOD__ ); 00580 00581 $out = " <logitem>\n"; 00582 $out .= " " . Xml::element( 'id', null, strval( $row->log_id ) ) . "\n"; 00583 00584 $out .= $this->writeTimestamp( $row->log_timestamp ); 00585 00586 if ( $row->log_deleted & LogPage::DELETED_USER ) { 00587 $out .= " " . Xml::element( 'contributor', array( 'deleted' => 'deleted' ) ) . "\n"; 00588 } else { 00589 $out .= $this->writeContributor( $row->log_user, $row->user_name ); 00590 } 00591 00592 if ( $row->log_deleted & LogPage::DELETED_COMMENT ) { 00593 $out .= " " . Xml::element( 'comment', array( 'deleted' => 'deleted' ) ) . "\n"; 00594 } elseif ( $row->log_comment != '' ) { 00595 $out .= " " . Xml::elementClean( 'comment', null, strval( $row->log_comment ) ) . "\n"; 00596 } 00597 00598 $out .= " " . Xml::element( 'type', null, strval( $row->log_type ) ) . "\n"; 00599 $out .= " " . Xml::element( 'action', null, strval( $row->log_action ) ) . "\n"; 00600 00601 if ( $row->log_deleted & LogPage::DELETED_ACTION ) { 00602 $out .= " " . Xml::element( 'text', array( 'deleted' => 'deleted' ) ) . "\n"; 00603 } else { 00604 $title = Title::makeTitle( $row->log_namespace, $row->log_title ); 00605 $out .= " " . Xml::elementClean( 'logtitle', null, self::canonicalTitle( $title ) ) . "\n"; 00606 $out .= " " . Xml::elementClean( 'params', 00607 array( 'xml:space' => 'preserve' ), 00608 strval( $row->log_params ) ) . "\n"; 00609 } 00610 00611 $out .= " </logitem>\n"; 00612 00613 wfProfileOut( __METHOD__ ); 00614 return $out; 00615 } 00616 00617 function writeTimestamp( $timestamp ) { 00618 $ts = wfTimestamp( TS_ISO_8601, $timestamp ); 00619 return " " . Xml::element( 'timestamp', null, $ts ) . "\n"; 00620 } 00621 00622 function writeContributor( $id, $text ) { 00623 $out = " <contributor>\n"; 00624 if ( $id || !IP::isValid( $text ) ) { 00625 $out .= " " . Xml::elementClean( 'username', null, strval( $text ) ) . "\n"; 00626 $out .= " " . Xml::element( 'id', null, strval( $id ) ) . "\n"; 00627 } else { 00628 $out .= " " . Xml::elementClean( 'ip', null, strval( $text ) ) . "\n"; 00629 } 00630 $out .= " </contributor>\n"; 00631 return $out; 00632 } 00633 00637 function writeUploads( $row, $dumpContents = false ) { 00638 if ( $row->page_namespace == NS_IMAGE ) { 00639 $img = wfLocalFile( $row->page_title ); 00640 if ( $img && $img->exists() ) { 00641 $out = ''; 00642 foreach ( array_reverse( $img->getHistory() ) as $ver ) { 00643 $out .= $this->writeUpload( $ver, $dumpContents ); 00644 } 00645 $out .= $this->writeUpload( $img, $dumpContents ); 00646 return $out; 00647 } 00648 } 00649 return ''; 00650 } 00651 00657 function writeUpload( $file, $dumpContents = false ) { 00658 if ( $file->isOld() ) { 00659 $archiveName = " " . 00660 Xml::element( 'archivename', null, $file->getArchiveName() ) . "\n"; 00661 } else { 00662 $archiveName = ''; 00663 } 00664 if ( $dumpContents ) { 00665 # Dump file as base64 00666 # Uses only XML-safe characters, so does not need escaping 00667 $contents = ' <contents encoding="base64">' . 00668 chunk_split( base64_encode( file_get_contents( $file->getPath() ) ) ) . 00669 " </contents>\n"; 00670 } else { 00671 $contents = ''; 00672 } 00673 return " <upload>\n" . 00674 $this->writeTimestamp( $file->getTimestamp() ) . 00675 $this->writeContributor( $file->getUser( 'id' ), $file->getUser( 'text' ) ) . 00676 " " . Xml::elementClean( 'comment', null, $file->getDescription() ) . "\n" . 00677 " " . Xml::element( 'filename', null, $file->getName() ) . "\n" . 00678 $archiveName . 00679 " " . Xml::element( 'src', null, $file->getCanonicalUrl() ) . "\n" . 00680 " " . Xml::element( 'size', null, $file->getSize() ) . "\n" . 00681 " " . Xml::element( 'sha1base36', null, $file->getSha1() ) . "\n" . 00682 " " . Xml::element( 'rel', null, $file->getRel() ) . "\n" . 00683 $contents . 00684 " </upload>\n"; 00685 } 00686 00697 public static function canonicalTitle( Title $title ) { 00698 if ( $title->getInterwiki() ) { 00699 return $title->getPrefixedText(); 00700 } 00701 00702 global $wgContLang; 00703 $prefix = str_replace( '_', ' ', $wgContLang->getNsText( $title->getNamespace() ) ); 00704 00705 if ( $prefix !== '' ) { 00706 $prefix .= ':'; 00707 } 00708 00709 return $prefix . $title->getText(); 00710 } 00711 } 00712 00713 00718 class DumpOutput { 00719 function writeOpenStream( $string ) { 00720 $this->write( $string ); 00721 } 00722 00723 function writeCloseStream( $string ) { 00724 $this->write( $string ); 00725 } 00726 00727 function writeOpenPage( $page, $string ) { 00728 $this->write( $string ); 00729 } 00730 00731 function writeClosePage( $string ) { 00732 $this->write( $string ); 00733 } 00734 00735 function writeRevision( $rev, $string ) { 00736 $this->write( $string ); 00737 } 00738 00739 function writeLogItem( $rev, $string ) { 00740 $this->write( $string ); 00741 } 00742 00747 function write( $string ) { 00748 print $string; 00749 } 00750 00758 function closeRenameAndReopen( $newname ) { 00759 return; 00760 } 00761 00769 function closeAndRename( $newname, $open = false ) { 00770 return; 00771 } 00772 00777 function getFilenames() { 00778 return NULL; 00779 } 00780 } 00781 00786 class DumpFileOutput extends DumpOutput { 00787 protected $handle, $filename; 00788 00789 function __construct( $file ) { 00790 $this->handle = fopen( $file, "wt" ); 00791 $this->filename = $file; 00792 } 00793 00794 function write( $string ) { 00795 fputs( $this->handle, $string ); 00796 } 00797 00798 function closeRenameAndReopen( $newname ) { 00799 $this->closeAndRename( $newname, true ); 00800 } 00801 00802 function renameOrException( $newname ) { 00803 if (! rename( $this->filename, $newname ) ) { 00804 throw new MWException( __METHOD__ . ": rename of file {$this->filename} to $newname failed\n" ); 00805 } 00806 } 00807 00808 function checkRenameArgCount( $newname ) { 00809 if ( is_array( $newname ) ) { 00810 if ( count( $newname ) > 1 ) { 00811 throw new MWException( __METHOD__ . ": passed multiple arguments for rename of single file\n" ); 00812 } else { 00813 $newname = $newname[0]; 00814 } 00815 } 00816 return $newname; 00817 } 00818 00819 function closeAndRename( $newname, $open = false ) { 00820 $newname = $this->checkRenameArgCount( $newname ); 00821 if ( $newname ) { 00822 fclose( $this->handle ); 00823 $this->renameOrException( $newname ); 00824 if ( $open ) { 00825 $this->handle = fopen( $this->filename, "wt" ); 00826 } 00827 } 00828 } 00829 00830 function getFilenames() { 00831 return $this->filename; 00832 } 00833 } 00834 00841 class DumpPipeOutput extends DumpFileOutput { 00842 protected $command, $filename; 00843 00844 function __construct( $command, $file = null ) { 00845 if ( !is_null( $file ) ) { 00846 $command .= " > " . wfEscapeShellArg( $file ); 00847 } 00848 00849 $this->startCommand( $command ); 00850 $this->command = $command; 00851 $this->filename = $file; 00852 } 00853 00854 function startCommand( $command ) { 00855 $spec = array( 00856 0 => array( "pipe", "r" ), 00857 ); 00858 $pipes = array(); 00859 $this->procOpenResource = proc_open( $command, $spec, $pipes ); 00860 $this->handle = $pipes[0]; 00861 } 00862 00863 function closeRenameAndReopen( $newname ) { 00864 $this->closeAndRename( $newname, true ); 00865 } 00866 00867 function closeAndRename( $newname, $open = false ) { 00868 $newname = $this->checkRenameArgCount( $newname ); 00869 if ( $newname ) { 00870 fclose( $this->handle ); 00871 proc_close( $this->procOpenResource ); 00872 $this->renameOrException( $newname ); 00873 if ( $open ) { 00874 $command = $this->command; 00875 $command .= " > " . wfEscapeShellArg( $this->filename ); 00876 $this->startCommand( $command ); 00877 } 00878 } 00879 } 00880 00881 } 00882 00887 class DumpGZipOutput extends DumpPipeOutput { 00888 function __construct( $file ) { 00889 parent::__construct( "gzip", $file ); 00890 } 00891 } 00892 00897 class DumpBZip2Output extends DumpPipeOutput { 00898 function __construct( $file ) { 00899 parent::__construct( "bzip2", $file ); 00900 } 00901 } 00902 00907 class Dump7ZipOutput extends DumpPipeOutput { 00908 function __construct( $file ) { 00909 $command = $this->setup7zCommand( $file ); 00910 parent::__construct( $command ); 00911 $this->filename = $file; 00912 } 00913 00914 function setup7zCommand( $file ) { 00915 $command = "7za a -bd -si " . wfEscapeShellArg( $file ); 00916 // Suppress annoying useless crap from p7zip 00917 // Unfortunately this could suppress real error messages too 00918 $command .= ' >' . wfGetNull() . ' 2>&1'; 00919 return( $command ); 00920 } 00921 00922 function closeAndRename( $newname, $open = false ) { 00923 $newname = $this->checkRenameArgCount( $newname ); 00924 if ( $newname ) { 00925 fclose( $this->handle ); 00926 proc_close( $this->procOpenResource ); 00927 $this->renameOrException( $newname ); 00928 if ( $open ) { 00929 $command = $this->setup7zCommand( $this->filename ); 00930 $this->startCommand( $command ); 00931 } 00932 } 00933 } 00934 } 00935 00936 00937 00944 class DumpFilter { 00945 function __construct( &$sink ) { 00946 $this->sink =& $sink; 00947 } 00948 00949 function writeOpenStream( $string ) { 00950 $this->sink->writeOpenStream( $string ); 00951 } 00952 00953 function writeCloseStream( $string ) { 00954 $this->sink->writeCloseStream( $string ); 00955 } 00956 00957 function writeOpenPage( $page, $string ) { 00958 $this->sendingThisPage = $this->pass( $page, $string ); 00959 if ( $this->sendingThisPage ) { 00960 $this->sink->writeOpenPage( $page, $string ); 00961 } 00962 } 00963 00964 function writeClosePage( $string ) { 00965 if ( $this->sendingThisPage ) { 00966 $this->sink->writeClosePage( $string ); 00967 $this->sendingThisPage = false; 00968 } 00969 } 00970 00971 function writeRevision( $rev, $string ) { 00972 if ( $this->sendingThisPage ) { 00973 $this->sink->writeRevision( $rev, $string ); 00974 } 00975 } 00976 00977 function writeLogItem( $rev, $string ) { 00978 $this->sink->writeRevision( $rev, $string ); 00979 } 00980 00981 function closeRenameAndReopen( $newname ) { 00982 $this->sink->closeRenameAndReopen( $newname ); 00983 } 00984 00985 function closeAndRename( $newname, $open = false ) { 00986 $this->sink->closeAndRename( $newname, $open ); 00987 } 00988 00989 function getFilenames() { 00990 return $this->sink->getFilenames(); 00991 } 00992 00997 function pass( $page ) { 00998 return true; 00999 } 01000 } 01001 01006 class DumpNotalkFilter extends DumpFilter { 01007 function pass( $page ) { 01008 return !MWNamespace::isTalk( $page->page_namespace ); 01009 } 01010 } 01011 01016 class DumpNamespaceFilter extends DumpFilter { 01017 var $invert = false; 01018 var $namespaces = array(); 01019 01020 function __construct( &$sink, $param ) { 01021 parent::__construct( $sink ); 01022 01023 $constants = array( 01024 "NS_MAIN" => NS_MAIN, 01025 "NS_TALK" => NS_TALK, 01026 "NS_USER" => NS_USER, 01027 "NS_USER_TALK" => NS_USER_TALK, 01028 "NS_PROJECT" => NS_PROJECT, 01029 "NS_PROJECT_TALK" => NS_PROJECT_TALK, 01030 "NS_FILE" => NS_FILE, 01031 "NS_FILE_TALK" => NS_FILE_TALK, 01032 "NS_IMAGE" => NS_IMAGE, // NS_IMAGE is an alias for NS_FILE 01033 "NS_IMAGE_TALK" => NS_IMAGE_TALK, 01034 "NS_MEDIAWIKI" => NS_MEDIAWIKI, 01035 "NS_MEDIAWIKI_TALK" => NS_MEDIAWIKI_TALK, 01036 "NS_TEMPLATE" => NS_TEMPLATE, 01037 "NS_TEMPLATE_TALK" => NS_TEMPLATE_TALK, 01038 "NS_HELP" => NS_HELP, 01039 "NS_HELP_TALK" => NS_HELP_TALK, 01040 "NS_CATEGORY" => NS_CATEGORY, 01041 "NS_CATEGORY_TALK" => NS_CATEGORY_TALK ); 01042 01043 if ( $param { 0 } == '!' ) { 01044 $this->invert = true; 01045 $param = substr( $param, 1 ); 01046 } 01047 01048 foreach ( explode( ',', $param ) as $key ) { 01049 $key = trim( $key ); 01050 if ( isset( $constants[$key] ) ) { 01051 $ns = $constants[$key]; 01052 $this->namespaces[$ns] = true; 01053 } elseif ( is_numeric( $key ) ) { 01054 $ns = intval( $key ); 01055 $this->namespaces[$ns] = true; 01056 } else { 01057 throw new MWException( "Unrecognized namespace key '$key'\n" ); 01058 } 01059 } 01060 } 01061 01062 function pass( $page ) { 01063 $match = isset( $this->namespaces[$page->page_namespace] ); 01064 return $this->invert xor $match; 01065 } 01066 } 01067 01068 01073 class DumpLatestFilter extends DumpFilter { 01074 var $page, $pageString, $rev, $revString; 01075 01076 function writeOpenPage( $page, $string ) { 01077 $this->page = $page; 01078 $this->pageString = $string; 01079 } 01080 01081 function writeClosePage( $string ) { 01082 if ( $this->rev ) { 01083 $this->sink->writeOpenPage( $this->page, $this->pageString ); 01084 $this->sink->writeRevision( $this->rev, $this->revString ); 01085 $this->sink->writeClosePage( $string ); 01086 } 01087 $this->rev = null; 01088 $this->revString = null; 01089 $this->page = null; 01090 $this->pageString = null; 01091 } 01092 01093 function writeRevision( $rev, $string ) { 01094 if ( $rev->rev_id == $this->page->page_latest ) { 01095 $this->rev = $rev; 01096 $this->revString = $string; 01097 } 01098 } 01099 } 01100 01105 class DumpMultiWriter { 01106 function __construct( $sinks ) { 01107 $this->sinks = $sinks; 01108 $this->count = count( $sinks ); 01109 } 01110 01111 function writeOpenStream( $string ) { 01112 for ( $i = 0; $i < $this->count; $i++ ) { 01113 $this->sinks[$i]->writeOpenStream( $string ); 01114 } 01115 } 01116 01117 function writeCloseStream( $string ) { 01118 for ( $i = 0; $i < $this->count; $i++ ) { 01119 $this->sinks[$i]->writeCloseStream( $string ); 01120 } 01121 } 01122 01123 function writeOpenPage( $page, $string ) { 01124 for ( $i = 0; $i < $this->count; $i++ ) { 01125 $this->sinks[$i]->writeOpenPage( $page, $string ); 01126 } 01127 } 01128 01129 function writeClosePage( $string ) { 01130 for ( $i = 0; $i < $this->count; $i++ ) { 01131 $this->sinks[$i]->writeClosePage( $string ); 01132 } 01133 } 01134 01135 function writeRevision( $rev, $string ) { 01136 for ( $i = 0; $i < $this->count; $i++ ) { 01137 $this->sinks[$i]->writeRevision( $rev, $string ); 01138 } 01139 } 01140 01141 function closeRenameAndReopen( $newnames ) { 01142 $this->closeAndRename( $newnames, true ); 01143 } 01144 01145 function closeAndRename( $newnames, $open = false ) { 01146 for ( $i = 0; $i < $this->count; $i++ ) { 01147 $this->sinks[$i]->closeAndRename( $newnames[$i], $open ); 01148 } 01149 } 01150 01151 function getFilenames() { 01152 $filenames = array(); 01153 for ( $i = 0; $i < $this->count; $i++ ) { 01154 $filenames[] = $this->sinks[$i]->getFilenames(); 01155 } 01156 return $filenames; 01157 } 01158 01159 } 01160 01161 function xmlsafe( $string ) { 01162 wfProfileIn( __FUNCTION__ ); 01163 01169 $string = UtfNormal::cleanUp( $string ); 01170 01171 $string = htmlspecialchars( $string ); 01172 wfProfileOut( __FUNCTION__ ); 01173 return $string; 01174 }