MediaWiki  REL1_21
Import.php
Go to the documentation of this file.
00001 <?php
00033 class WikiImporter {
00034         private $reader = null;
00035         private $mLogItemCallback, $mUploadCallback, $mRevisionCallback, $mPageCallback;
00036         private $mSiteInfoCallback, $mTargetNamespace, $mTargetRootPage, $mPageOutCallback;
00037         private $mNoticeCallback, $mDebug;
00038         private $mImportUploads, $mImageBasePath;
00039         private $mNoUpdates = false;
00040 
00045         function __construct( $source ) {
00046                 $this->reader = new XMLReader();
00047 
00048                 stream_wrapper_register( 'uploadsource', 'UploadSourceAdapter' );
00049                 $id = UploadSourceAdapter::registerSource( $source );
00050                 if (defined( 'LIBXML_PARSEHUGE' ) ) {
00051                         $this->reader->open( "uploadsource://$id", null, LIBXML_PARSEHUGE );
00052                 } else {
00053                         $this->reader->open( "uploadsource://$id" );
00054                 }
00055 
00056                 // Default callbacks
00057                 $this->setRevisionCallback( array( $this, "importRevision" ) );
00058                 $this->setUploadCallback( array( $this, 'importUpload' ) );
00059                 $this->setLogItemCallback( array( $this, 'importLogItem' ) );
00060                 $this->setPageOutCallback( array( $this, 'finishImportPage' ) );
00061         }
00062 
00063         private function throwXmlError( $err ) {
00064                 $this->debug( "FAILURE: $err" );
00065                 wfDebug( "WikiImporter XML error: $err\n" );
00066         }
00067 
00068         private function debug( $data ) {
00069                 if( $this->mDebug ) {
00070                         wfDebug( "IMPORT: $data\n" );
00071                 }
00072         }
00073 
00074         private function warn( $data ) {
00075                 wfDebug( "IMPORT: $data\n" );
00076         }
00077 
00078         private function notice( $msg /*, $param, ...*/ ) {
00079                 $params = func_get_args();
00080                 array_shift( $params );
00081 
00082                 if ( is_callable( $this->mNoticeCallback ) ) {
00083                         call_user_func( $this->mNoticeCallback, $msg, $params );
00084                 } else { # No ImportReporter -> CLI
00085                         echo wfMessage( $msg, $params )->text() . "\n";
00086                 }
00087         }
00088 
00093         function setDebug( $debug ) {
00094                 $this->mDebug = $debug;
00095         }
00096 
00101         function setNoUpdates( $noupdates ) {
00102                 $this->mNoUpdates = $noupdates;
00103         }
00104 
00111         public function setNoticeCallback( $callback ) {
00112                 return wfSetVar( $this->mNoticeCallback, $callback );
00113         }
00114 
00120         public function setPageCallback( $callback ) {
00121                 $previous = $this->mPageCallback;
00122                 $this->mPageCallback = $callback;
00123                 return $previous;
00124         }
00125 
00135         public function setPageOutCallback( $callback ) {
00136                 $previous = $this->mPageOutCallback;
00137                 $this->mPageOutCallback = $callback;
00138                 return $previous;
00139         }
00140 
00146         public function setRevisionCallback( $callback ) {
00147                 $previous = $this->mRevisionCallback;
00148                 $this->mRevisionCallback = $callback;
00149                 return $previous;
00150         }
00151 
00157         public function setUploadCallback( $callback ) {
00158                 $previous = $this->mUploadCallback;
00159                 $this->mUploadCallback = $callback;
00160                 return $previous;
00161         }
00162 
00168         public function setLogItemCallback( $callback ) {
00169                 $previous = $this->mLogItemCallback;
00170                 $this->mLogItemCallback = $callback;
00171                 return $previous;
00172         }
00173 
00179         public function setSiteInfoCallback( $callback ) {
00180                 $previous = $this->mSiteInfoCallback;
00181                 $this->mSiteInfoCallback = $callback;
00182                 return $previous;
00183         }
00184 
00190         public function setTargetNamespace( $namespace ) {
00191                 if( is_null( $namespace ) ) {
00192                         // Don't override namespaces
00193                         $this->mTargetNamespace = null;
00194                 } elseif( $namespace >= 0 ) {
00195                         // @todo FIXME: Check for validity
00196                         $this->mTargetNamespace = intval( $namespace );
00197                 } else {
00198                         return false;
00199                 }
00200         }
00201 
00207         public function setTargetRootPage( $rootpage ) {
00208                 $status = Status::newGood();
00209                 if( is_null( $rootpage ) ) {
00210                         // No rootpage
00211                         $this->mTargetRootPage = null;
00212                 } elseif( $rootpage !== '' ) {
00213                         $rootpage = rtrim( $rootpage, '/' ); //avoid double slashes
00214                         $title = Title::newFromText( $rootpage, !is_null( $this->mTargetNamespace ) ? $this->mTargetNamespace : NS_MAIN );
00215                         if( !$title || $title->isExternal() ) {
00216                                 $status->fatal( 'import-rootpage-invalid' );
00217                         } else {
00218                                 if( !MWNamespace::hasSubpages( $title->getNamespace() ) ) {
00219                                         global $wgContLang;
00220 
00221                                         $displayNSText = $title->getNamespace() == NS_MAIN
00222                                                 ? wfMessage( 'blanknamespace' )->text()
00223                                                 : $wgContLang->getNsText( $title->getNamespace() );
00224                                         $status->fatal( 'import-rootpage-nosubpage', $displayNSText );
00225                                 } else {
00226                                         // set namespace to 'all', so the namespace check in processTitle() can passed
00227                                         $this->setTargetNamespace( null );
00228                                         $this->mTargetRootPage = $title->getPrefixedDBkey();
00229                                 }
00230                         }
00231                 }
00232                 return $status;
00233         }
00234 
00238         public function setImageBasePath( $dir ) {
00239                 $this->mImageBasePath = $dir;
00240         }
00241 
00245         public function setImportUploads( $import ) {
00246                 $this->mImportUploads = $import;
00247         }
00248 
00254         public function importRevision( $revision ) {
00255                 try {
00256                         $dbw = wfGetDB( DB_MASTER );
00257                         return $dbw->deadlockLoop( array( $revision, 'importOldRevision' ) );
00258                 } catch ( MWContentSerializationException $ex ) {
00259                         $this->notice( 'import-error-unserialize',
00260                                 $revision->getTitle()->getPrefixedText(),
00261                                 $revision->getID(),
00262                                 $revision->getModel(),
00263                                 $revision->getFormat() );
00264                 }
00265         }
00266 
00272         public function importLogItem( $rev ) {
00273                 $dbw = wfGetDB( DB_MASTER );
00274                 return $dbw->deadlockLoop( array( $rev, 'importLogItem' ) );
00275         }
00276 
00282         public function importUpload( $revision ) {
00283                 $dbw = wfGetDB( DB_MASTER );
00284                 return $dbw->deadlockLoop( array( $revision, 'importUpload' ) );
00285         }
00286 
00296         public function finishImportPage( $title, $origTitle, $revCount, $sRevCount, $pageInfo ) {
00297                 $args = func_get_args();
00298                 return wfRunHooks( 'AfterImportPage', $args );
00299         }
00300 
00305         public function debugRevisionHandler( &$revision ) {
00306                 $this->debug( "Got revision:" );
00307                 if( is_object( $revision->title ) ) {
00308                         $this->debug( "-- Title: " . $revision->title->getPrefixedText() );
00309                 } else {
00310                         $this->debug( "-- Title: <invalid>" );
00311                 }
00312                 $this->debug( "-- User: " . $revision->user_text );
00313                 $this->debug( "-- Timestamp: " . $revision->timestamp );
00314                 $this->debug( "-- Comment: " . $revision->comment );
00315                 $this->debug( "-- Text: " . $revision->text );
00316         }
00317 
00322         function pageCallback( $title ) {
00323                 if( isset( $this->mPageCallback ) ) {
00324                         call_user_func( $this->mPageCallback, $title );
00325                 }
00326         }
00327 
00336         private function pageOutCallback( $title, $origTitle, $revCount, $sucCount, $pageInfo ) {
00337                 if( isset( $this->mPageOutCallback ) ) {
00338                         $args = func_get_args();
00339                         call_user_func_array( $this->mPageOutCallback, $args );
00340                 }
00341         }
00342 
00348         private function revisionCallback( $revision ) {
00349                 if ( isset( $this->mRevisionCallback ) ) {
00350                         return call_user_func_array( $this->mRevisionCallback,
00351                                         array( $revision, $this ) );
00352                 } else {
00353                         return false;
00354                 }
00355         }
00356 
00362         private function logItemCallback( $revision ) {
00363                 if ( isset( $this->mLogItemCallback ) ) {
00364                         return call_user_func_array( $this->mLogItemCallback,
00365                                         array( $revision, $this ) );
00366                 } else {
00367                         return false;
00368                 }
00369         }
00370 
00378         private function nodeContents() {
00379                 if( $this->reader->isEmptyElement ) {
00380                         return "";
00381                 }
00382                 $buffer = "";
00383                 while( $this->reader->read() ) {
00384                         switch( $this->reader->nodeType ) {
00385                         case XmlReader::TEXT:
00386                         case XmlReader::SIGNIFICANT_WHITESPACE:
00387                                 $buffer .= $this->reader->value;
00388                                 break;
00389                         case XmlReader::END_ELEMENT:
00390                                 return $buffer;
00391                         }
00392                 }
00393 
00394                 $this->reader->close();
00395                 return '';
00396         }
00397 
00398         # --------------
00399 
00401         private function dumpElement() {
00402                 static $lookup = null;
00403                 if ( !$lookup ) {
00404                         $xmlReaderConstants = array(
00405                                 "NONE",
00406                                 "ELEMENT",
00407                                 "ATTRIBUTE",
00408                                 "TEXT",
00409                                 "CDATA",
00410                                 "ENTITY_REF",
00411                                 "ENTITY",
00412                                 "PI",
00413                                 "COMMENT",
00414                                 "DOC",
00415                                 "DOC_TYPE",
00416                                 "DOC_FRAGMENT",
00417                                 "NOTATION",
00418                                 "WHITESPACE",
00419                                 "SIGNIFICANT_WHITESPACE",
00420                                 "END_ELEMENT",
00421                                 "END_ENTITY",
00422                                 "XML_DECLARATION",
00423                                 );
00424                         $lookup = array();
00425 
00426                         foreach( $xmlReaderConstants as $name ) {
00427                                 $lookup[constant("XmlReader::$name")] = $name;
00428                         }
00429                 }
00430 
00431                 print( var_dump(
00432                         $lookup[$this->reader->nodeType],
00433                         $this->reader->name,
00434                         $this->reader->value
00435                 )."\n\n" );
00436         }
00437 
00443         public function doImport() {
00444 
00445                 // Calls to reader->read need to be wrapped in calls to
00446                 // libxml_disable_entity_loader() to avoid local file
00447                 // inclusion attacks (bug 46932).
00448                 $oldDisable = libxml_disable_entity_loader( true );
00449                 $this->reader->read();
00450 
00451                 if ( $this->reader->name != 'mediawiki' ) {
00452                         libxml_disable_entity_loader( $oldDisable );
00453                         throw new MWException( "Expected <mediawiki> tag, got ".
00454                                 $this->reader->name );
00455                 }
00456                 $this->debug( "<mediawiki> tag is correct." );
00457 
00458                 $this->debug( "Starting primary dump processing loop." );
00459 
00460                 $keepReading = $this->reader->read();
00461                 $skip = false;
00462                 while ( $keepReading ) {
00463                         $tag = $this->reader->name;
00464                         $type = $this->reader->nodeType;
00465 
00466                         if ( !wfRunHooks( 'ImportHandleToplevelXMLTag', $this ) ) {
00467                                 // Do nothing
00468                         } elseif ( $tag == 'mediawiki' && $type == XmlReader::END_ELEMENT ) {
00469                                 break;
00470                         } elseif ( $tag == 'siteinfo' ) {
00471                                 $this->handleSiteInfo();
00472                         } elseif ( $tag == 'page' ) {
00473                                 $this->handlePage();
00474                         } elseif ( $tag == 'logitem' ) {
00475                                 $this->handleLogItem();
00476                         } elseif ( $tag != '#text' ) {
00477                                 $this->warn( "Unhandled top-level XML tag $tag" );
00478 
00479                                 $skip = true;
00480                         }
00481 
00482                         if ( $skip ) {
00483                                 $keepReading = $this->reader->next();
00484                                 $skip = false;
00485                                 $this->debug( "Skip" );
00486                         } else {
00487                                 $keepReading = $this->reader->read();
00488                         }
00489                 }
00490 
00491                 libxml_disable_entity_loader( $oldDisable );
00492                 return true;
00493         }
00494 
00499         private function handleSiteInfo() {
00500                 // Site info is useful, but not actually used for dump imports.
00501                 // Includes a quick short-circuit to save performance.
00502                 if ( ! $this->mSiteInfoCallback ) {
00503                         $this->reader->next();
00504                         return true;
00505                 }
00506                 throw new MWException( "SiteInfo tag is not yet handled, do not set mSiteInfoCallback" );
00507         }
00508 
00509         private function handleLogItem() {
00510                 $this->debug( "Enter log item handler." );
00511                 $logInfo = array();
00512 
00513                 // Fields that can just be stuffed in the pageInfo object
00514                 $normalFields = array( 'id', 'comment', 'type', 'action', 'timestamp',
00515                                         'logtitle', 'params' );
00516 
00517                 while ( $this->reader->read() ) {
00518                         if ( $this->reader->nodeType == XmlReader::END_ELEMENT &&
00519                                         $this->reader->name == 'logitem' ) {
00520                                 break;
00521                         }
00522 
00523                         $tag = $this->reader->name;
00524 
00525                         if ( !wfRunHooks( 'ImportHandleLogItemXMLTag',
00526                                                 $this, $logInfo ) ) {
00527                                 // Do nothing
00528                         } elseif ( in_array( $tag, $normalFields ) ) {
00529                                 $logInfo[$tag] = $this->nodeContents();
00530                         } elseif ( $tag == 'contributor' ) {
00531                                 $logInfo['contributor'] = $this->handleContributor();
00532                         } elseif ( $tag != '#text' ) {
00533                                 $this->warn( "Unhandled log-item XML tag $tag" );
00534                         }
00535                 }
00536 
00537                 $this->processLogItem( $logInfo );
00538         }
00539 
00544         private function processLogItem( $logInfo ) {
00545                 $revision = new WikiRevision;
00546 
00547                 $revision->setID( $logInfo['id'] );
00548                 $revision->setType( $logInfo['type'] );
00549                 $revision->setAction( $logInfo['action'] );
00550                 $revision->setTimestamp( $logInfo['timestamp'] );
00551                 $revision->setParams( $logInfo['params'] );
00552                 $revision->setTitle( Title::newFromText( $logInfo['logtitle'] ) );
00553                 $revision->setNoUpdates( $this->mNoUpdates );
00554 
00555                 if ( isset( $logInfo['comment'] ) ) {
00556                         $revision->setComment( $logInfo['comment'] );
00557                 }
00558 
00559                 if ( isset( $logInfo['contributor']['ip'] ) ) {
00560                         $revision->setUserIP( $logInfo['contributor']['ip'] );
00561                 }
00562                 if ( isset( $logInfo['contributor']['username'] ) ) {
00563                         $revision->setUserName( $logInfo['contributor']['username'] );
00564                 }
00565 
00566                 return $this->logItemCallback( $revision );
00567         }
00568 
00569         private function handlePage() {
00570                 // Handle page data.
00571                 $this->debug( "Enter page handler." );
00572                 $pageInfo = array( 'revisionCount' => 0, 'successfulRevisionCount' => 0 );
00573 
00574                 // Fields that can just be stuffed in the pageInfo object
00575                 $normalFields = array( 'title', 'id', 'redirect', 'restrictions' );
00576 
00577                 $skip = false;
00578                 $badTitle = false;
00579 
00580                 while ( $skip ? $this->reader->next() : $this->reader->read() ) {
00581                         if ( $this->reader->nodeType == XmlReader::END_ELEMENT &&
00582                                         $this->reader->name == 'page' ) {
00583                                 break;
00584                         }
00585 
00586                         $tag = $this->reader->name;
00587 
00588                         if ( $badTitle ) {
00589                                 // The title is invalid, bail out of this page
00590                                 $skip = true;
00591                         } elseif ( !wfRunHooks( 'ImportHandlePageXMLTag', array( $this,
00592                                                 &$pageInfo ) ) ) {
00593                                 // Do nothing
00594                         } elseif ( in_array( $tag, $normalFields ) ) {
00595                                 $pageInfo[$tag] = $this->nodeContents();
00596                                 if ( $tag == 'title' ) {
00597                                         $title = $this->processTitle( $pageInfo['title'] );
00598 
00599                                         if ( !$title ) {
00600                                                 $badTitle = true;
00601                                                 $skip = true;
00602                                         }
00603 
00604                                         $this->pageCallback( $title );
00605                                         list( $pageInfo['_title'], $origTitle ) = $title;
00606                                 }
00607                         } elseif ( $tag == 'revision' ) {
00608                                 $this->handleRevision( $pageInfo );
00609                         } elseif ( $tag == 'upload' ) {
00610                                 $this->handleUpload( $pageInfo );
00611                         } elseif ( $tag != '#text' ) {
00612                                 $this->warn( "Unhandled page XML tag $tag" );
00613                                 $skip = true;
00614                         }
00615                 }
00616 
00617                 $this->pageOutCallback( $pageInfo['_title'], $origTitle,
00618                                         $pageInfo['revisionCount'],
00619                                         $pageInfo['successfulRevisionCount'],
00620                                         $pageInfo );
00621         }
00622 
00626         private function handleRevision( &$pageInfo ) {
00627                 $this->debug( "Enter revision handler" );
00628                 $revisionInfo = array();
00629 
00630                 $normalFields = array( 'id', 'timestamp', 'comment', 'minor', 'model', 'format', 'text' );
00631 
00632                 $skip = false;
00633 
00634                 while ( $skip ? $this->reader->next() : $this->reader->read() ) {
00635                         if ( $this->reader->nodeType == XmlReader::END_ELEMENT &&
00636                                         $this->reader->name == 'revision' ) {
00637                                 break;
00638                         }
00639 
00640                         $tag = $this->reader->name;
00641 
00642                         if ( !wfRunHooks( 'ImportHandleRevisionXMLTag', $this,
00643                                                 $pageInfo, $revisionInfo ) ) {
00644                                 // Do nothing
00645                         } elseif ( in_array( $tag, $normalFields ) ) {
00646                                 $revisionInfo[$tag] = $this->nodeContents();
00647                         } elseif ( $tag == 'contributor' ) {
00648                                 $revisionInfo['contributor'] = $this->handleContributor();
00649                         } elseif ( $tag != '#text' ) {
00650                                 $this->warn( "Unhandled revision XML tag $tag" );
00651                                 $skip = true;
00652                         }
00653                 }
00654 
00655                 $pageInfo['revisionCount']++;
00656                 if ( $this->processRevision( $pageInfo, $revisionInfo ) ) {
00657                         $pageInfo['successfulRevisionCount']++;
00658                 }
00659         }
00660 
00666         private function processRevision( $pageInfo, $revisionInfo ) {
00667                 $revision = new WikiRevision;
00668 
00669                 if( isset( $revisionInfo['id'] ) ) {
00670                         $revision->setID( $revisionInfo['id'] );
00671                 }
00672                 if ( isset( $revisionInfo['text'] ) ) {
00673                         $revision->setText( $revisionInfo['text'] );
00674                 }
00675                 if ( isset( $revisionInfo['model'] ) ) {
00676                         $revision->setModel( $revisionInfo['model'] );
00677                 }
00678                 if ( isset( $revisionInfo['format'] ) ) {
00679                         $revision->setFormat( $revisionInfo['format'] );
00680                 }
00681                 $revision->setTitle( $pageInfo['_title'] );
00682 
00683                 if ( isset( $revisionInfo['timestamp'] ) ) {
00684                         $revision->setTimestamp( $revisionInfo['timestamp'] );
00685                 } else {
00686                         $revision->setTimestamp( wfTimestampNow() );
00687                 }
00688 
00689                 if ( isset( $revisionInfo['comment'] ) ) {
00690                         $revision->setComment( $revisionInfo['comment'] );
00691                 }
00692 
00693                 if ( isset( $revisionInfo['minor'] ) ) {
00694                         $revision->setMinor( true );
00695                 }
00696                 if ( isset( $revisionInfo['contributor']['ip'] ) ) {
00697                         $revision->setUserIP( $revisionInfo['contributor']['ip'] );
00698                 }
00699                 if ( isset( $revisionInfo['contributor']['username'] ) ) {
00700                         $revision->setUserName( $revisionInfo['contributor']['username'] );
00701                 }
00702                 $revision->setNoUpdates( $this->mNoUpdates );
00703 
00704                 return $this->revisionCallback( $revision );
00705         }
00706 
00711         private function handleUpload( &$pageInfo ) {
00712                 $this->debug( "Enter upload handler" );
00713                 $uploadInfo = array();
00714 
00715                 $normalFields = array( 'timestamp', 'comment', 'filename', 'text',
00716                                         'src', 'size', 'sha1base36', 'archivename', 'rel' );
00717 
00718                 $skip = false;
00719 
00720                 while ( $skip ? $this->reader->next() : $this->reader->read() ) {
00721                         if ( $this->reader->nodeType == XmlReader::END_ELEMENT &&
00722                                         $this->reader->name == 'upload' ) {
00723                                 break;
00724                         }
00725 
00726                         $tag = $this->reader->name;
00727 
00728                         if ( !wfRunHooks( 'ImportHandleUploadXMLTag', $this,
00729                                                 $pageInfo ) ) {
00730                                 // Do nothing
00731                         } elseif ( in_array( $tag, $normalFields ) ) {
00732                                 $uploadInfo[$tag] = $this->nodeContents();
00733                         } elseif ( $tag == 'contributor' ) {
00734                                 $uploadInfo['contributor'] = $this->handleContributor();
00735                         } elseif ( $tag == 'contents' ) {
00736                                 $contents = $this->nodeContents();
00737                                 $encoding = $this->reader->getAttribute( 'encoding' );
00738                                 if ( $encoding === 'base64' ) {
00739                                         $uploadInfo['fileSrc'] = $this->dumpTemp( base64_decode( $contents ) );
00740                                         $uploadInfo['isTempSrc'] = true;
00741                                 }
00742                         } elseif ( $tag != '#text' ) {
00743                                 $this->warn( "Unhandled upload XML tag $tag" );
00744                                 $skip = true;
00745                         }
00746                 }
00747 
00748                 if ( $this->mImageBasePath && isset( $uploadInfo['rel'] ) ) {
00749                         $path = "{$this->mImageBasePath}/{$uploadInfo['rel']}";
00750                         if ( file_exists( $path ) ) {
00751                                 $uploadInfo['fileSrc'] = $path;
00752                                 $uploadInfo['isTempSrc'] = false;
00753                         }
00754                 }
00755 
00756                 if ( $this->mImportUploads ) {
00757                         return $this->processUpload( $pageInfo, $uploadInfo );
00758                 }
00759         }
00760 
00765         private function dumpTemp( $contents ) {
00766                 $filename = tempnam( wfTempDir(), 'importupload' );
00767                 file_put_contents( $filename, $contents );
00768                 return $filename;
00769         }
00770 
00776         private function processUpload( $pageInfo, $uploadInfo ) {
00777                 $revision = new WikiRevision;
00778                 $text = isset( $uploadInfo['text'] ) ? $uploadInfo['text'] : '';
00779 
00780                 $revision->setTitle( $pageInfo['_title'] );
00781                 $revision->setID( $pageInfo['id'] );
00782                 $revision->setTimestamp( $uploadInfo['timestamp'] );
00783                 $revision->setText( $text );
00784                 $revision->setFilename( $uploadInfo['filename'] );
00785                 if ( isset( $uploadInfo['archivename'] ) ) {
00786                         $revision->setArchiveName( $uploadInfo['archivename'] );
00787                 }
00788                 $revision->setSrc( $uploadInfo['src'] );
00789                 if ( isset( $uploadInfo['fileSrc'] ) ) {
00790                         $revision->setFileSrc( $uploadInfo['fileSrc'],
00791                                 !empty( $uploadInfo['isTempSrc'] ) );
00792                 }
00793                 if ( isset( $uploadInfo['sha1base36'] ) ) {
00794                         $revision->setSha1Base36( $uploadInfo['sha1base36'] );
00795                 }
00796                 $revision->setSize( intval( $uploadInfo['size'] ) );
00797                 $revision->setComment( $uploadInfo['comment'] );
00798 
00799                 if ( isset( $uploadInfo['contributor']['ip'] ) ) {
00800                         $revision->setUserIP( $uploadInfo['contributor']['ip'] );
00801                 }
00802                 if ( isset( $uploadInfo['contributor']['username'] ) ) {
00803                         $revision->setUserName( $uploadInfo['contributor']['username'] );
00804                 }
00805                 $revision->setNoUpdates( $this->mNoUpdates );
00806 
00807                 return call_user_func( $this->mUploadCallback, $revision );
00808         }
00809 
00813         private function handleContributor() {
00814                 $fields = array( 'id', 'ip', 'username' );
00815                 $info = array();
00816 
00817                 while ( $this->reader->read() ) {
00818                         if ( $this->reader->nodeType == XmlReader::END_ELEMENT &&
00819                                         $this->reader->name == 'contributor' ) {
00820                                 break;
00821                         }
00822 
00823                         $tag = $this->reader->name;
00824 
00825                         if ( in_array( $tag, $fields ) ) {
00826                                 $info[$tag] = $this->nodeContents();
00827                         }
00828                 }
00829 
00830                 return $info;
00831         }
00832 
00837         private function processTitle( $text ) {
00838                 global $wgCommandLineMode;
00839 
00840                 $workTitle = $text;
00841                 $origTitle = Title::newFromText( $workTitle );
00842 
00843                 if( !is_null( $this->mTargetNamespace ) && !is_null( $origTitle ) ) {
00844                         # makeTitleSafe, because $origTitle can have a interwiki (different setting of interwiki map)
00845                         # and than dbKey can begin with a lowercase char
00846                         $title = Title::makeTitleSafe( $this->mTargetNamespace,
00847                                 $origTitle->getDBkey() );
00848                 } else {
00849                         if( !is_null( $this->mTargetRootPage ) ) {
00850                                 $workTitle = $this->mTargetRootPage . '/' . $workTitle;
00851                         }
00852                         $title = Title::newFromText( $workTitle );
00853                 }
00854 
00855                 if( is_null( $title ) ) {
00856                         # Invalid page title? Ignore the page
00857                         $this->notice( 'import-error-invalid', $workTitle );
00858                         return false;
00859                 } elseif( $title->isExternal() ) {
00860                         $this->notice( 'import-error-interwiki', $title->getPrefixedText() );
00861                         return false;
00862                 } elseif( !$title->canExist() ) {
00863                         $this->notice( 'import-error-special', $title->getPrefixedText() );
00864                         return false;
00865                 } elseif( !$title->userCan( 'edit' ) && !$wgCommandLineMode ) {
00866                         # Do not import if the importing wiki user cannot edit this page
00867                         $this->notice( 'import-error-edit', $title->getPrefixedText() );
00868                         return false;
00869                 } elseif( !$title->exists() && !$title->userCan( 'create' ) && !$wgCommandLineMode ) {
00870                         # Do not import if the importing wiki user cannot create this page
00871                         $this->notice( 'import-error-create', $title->getPrefixedText() );
00872                         return false;
00873                 }
00874 
00875                 return array( $title, $origTitle );
00876         }
00877 }
00878 
00880 class UploadSourceAdapter {
00881         static $sourceRegistrations = array();
00882 
00883         private $mSource;
00884         private $mBuffer;
00885         private $mPosition;
00886 
00891         static function registerSource( $source ) {
00892                 $id = wfRandomString();
00893 
00894                 self::$sourceRegistrations[$id] = $source;
00895 
00896                 return $id;
00897         }
00898 
00906         function stream_open( $path, $mode, $options, &$opened_path ) {
00907                 $url = parse_url( $path );
00908                 $id = $url['host'];
00909 
00910                 if ( !isset( self::$sourceRegistrations[$id] ) ) {
00911                         return false;
00912                 }
00913 
00914                 $this->mSource = self::$sourceRegistrations[$id];
00915 
00916                 return true;
00917         }
00918 
00923         function stream_read( $count ) {
00924                 $return = '';
00925                 $leave = false;
00926 
00927                 while ( !$leave && !$this->mSource->atEnd() &&
00928                                 strlen( $this->mBuffer ) < $count ) {
00929                         $read = $this->mSource->readChunk();
00930 
00931                         if ( !strlen( $read ) ) {
00932                                 $leave = true;
00933                         }
00934 
00935                         $this->mBuffer .= $read;
00936                 }
00937 
00938                 if ( strlen( $this->mBuffer ) ) {
00939                         $return = substr( $this->mBuffer, 0, $count );
00940                         $this->mBuffer = substr( $this->mBuffer, $count );
00941                 }
00942 
00943                 $this->mPosition += strlen( $return );
00944 
00945                 return $return;
00946         }
00947 
00952         function stream_write( $data ) {
00953                 return false;
00954         }
00955 
00959         function stream_tell() {
00960                 return $this->mPosition;
00961         }
00962 
00966         function stream_eof() {
00967                 return $this->mSource->atEnd();
00968         }
00969 
00973         function url_stat() {
00974                 $result = array();
00975 
00976                 $result['dev'] = $result[0] = 0;
00977                 $result['ino'] = $result[1] = 0;
00978                 $result['mode'] = $result[2] = 0;
00979                 $result['nlink'] = $result[3] = 0;
00980                 $result['uid'] = $result[4] = 0;
00981                 $result['gid'] = $result[5] = 0;
00982                 $result['rdev'] = $result[6] = 0;
00983                 $result['size'] = $result[7] = 0;
00984                 $result['atime'] = $result[8] = 0;
00985                 $result['mtime'] = $result[9] = 0;
00986                 $result['ctime'] = $result[10] = 0;
00987                 $result['blksize'] = $result[11] = 0;
00988                 $result['blocks'] = $result[12] = 0;
00989 
00990                 return $result;
00991         }
00992 }
00993 
00994 class XMLReader2 extends XMLReader {
00995 
00999         function nodeContents() {
01000                 if( $this->isEmptyElement ) {
01001                         return "";
01002                 }
01003                 $buffer = "";
01004                 while( $this->read() ) {
01005                         switch( $this->nodeType ) {
01006                         case XmlReader::TEXT:
01007                         case XmlReader::SIGNIFICANT_WHITESPACE:
01008                                 $buffer .= $this->value;
01009                                 break;
01010                         case XmlReader::END_ELEMENT:
01011                                 return $buffer;
01012                         }
01013                 }
01014                 return $this->close();
01015         }
01016 }
01017 
01022 class WikiRevision {
01023         var $importer = null;
01024 
01028         var $title = null;
01029         var $id = 0;
01030         var $timestamp = "20010115000000";
01031         var $user = 0;
01032         var $user_text = "";
01033         var $model = null;
01034         var $format = null;
01035         var $text = "";
01036         var $content = null;
01037         var $comment = "";
01038         var $minor = false;
01039         var $type = "";
01040         var $action = "";
01041         var $params = "";
01042         var $fileSrc = '';
01043         var $sha1base36 = false;
01044         var $isTemp = false;
01045         var $archiveName = '';
01046         var $fileIsTemp;
01047         private $mNoUpdates = false;
01048 
01053         function setTitle( $title ) {
01054                 if( is_object( $title ) ) {
01055                         $this->title = $title;
01056                 } elseif( is_null( $title ) ) {
01057                         throw new MWException( "WikiRevision given a null title in import. You may need to adjust \$wgLegalTitleChars." );
01058                 } else {
01059                         throw new MWException( "WikiRevision given non-object title in import." );
01060                 }
01061         }
01062 
01066         function setID( $id ) {
01067                 $this->id = $id;
01068         }
01069 
01073         function setTimestamp( $ts ) {
01074                 # 2003-08-05T18:30:02Z
01075                 $this->timestamp = wfTimestamp( TS_MW, $ts );
01076         }
01077 
01081         function setUsername( $user ) {
01082                 $this->user_text = $user;
01083         }
01084 
01088         function setUserIP( $ip ) {
01089                 $this->user_text = $ip;
01090         }
01091 
01095         function setModel( $model ) {
01096                 $this->model = $model;
01097         }
01098 
01102         function setFormat( $format ) {
01103                 $this->format = $format;
01104         }
01105 
01109         function setText( $text ) {
01110                 $this->text = $text;
01111         }
01112 
01116         function setComment( $text ) {
01117                 $this->comment = $text;
01118         }
01119 
01123         function setMinor( $minor ) {
01124                 $this->minor = (bool)$minor;
01125         }
01126 
01130         function setSrc( $src ) {
01131                 $this->src = $src;
01132         }
01133 
01138         function setFileSrc( $src, $isTemp ) {
01139                 $this->fileSrc = $src;
01140                 $this->fileIsTemp = $isTemp;
01141         }
01142 
01146         function setSha1Base36( $sha1base36 ) {
01147                 $this->sha1base36 = $sha1base36;
01148         }
01149 
01153         function setFilename( $filename ) {
01154                 $this->filename = $filename;
01155         }
01156 
01160         function setArchiveName( $archiveName ) {
01161                 $this->archiveName = $archiveName;
01162         }
01163 
01167         function setSize( $size ) {
01168                 $this->size = intval( $size );
01169         }
01170 
01174         function setType( $type ) {
01175                 $this->type = $type;
01176         }
01177 
01181         function setAction( $action ) {
01182                 $this->action = $action;
01183         }
01184 
01188         function setParams( $params ) {
01189                 $this->params = $params;
01190         }
01191 
01195         public function setNoUpdates( $noupdates ) {
01196                 $this->mNoUpdates = $noupdates;
01197         }
01198 
01202         function getTitle() {
01203                 return $this->title;
01204         }
01205 
01209         function getID() {
01210                 return $this->id;
01211         }
01212 
01216         function getTimestamp() {
01217                 return $this->timestamp;
01218         }
01219 
01223         function getUser() {
01224                 return $this->user_text;
01225         }
01226 
01232         function getText() {
01233                 ContentHandler::deprecated( __METHOD__, '1.21' );
01234 
01235                 return $this->text;
01236         }
01237 
01241         function getContent() {
01242                 if ( is_null( $this->content ) ) {
01243                         $this->content =
01244                                 ContentHandler::makeContent(
01245                                         $this->text,
01246                                         $this->getTitle(),
01247                                         $this->getModel(),
01248                                         $this->getFormat()
01249                                 );
01250                 }
01251 
01252                 return $this->content;
01253         }
01254 
01258         function getModel() {
01259                 if ( is_null( $this->model ) ) {
01260                         $this->model = $this->getTitle()->getContentModel();
01261                 }
01262 
01263                 return $this->model;
01264         }
01265 
01269         function getFormat() {
01270                 if ( is_null( $this->model ) ) {
01271                         $this->format = ContentHandler::getForTitle( $this->getTitle() )->getDefaultFormat();
01272                 }
01273 
01274                 return $this->format;
01275         }
01276 
01280         function getComment() {
01281                 return $this->comment;
01282         }
01283 
01287         function getMinor() {
01288                 return $this->minor;
01289         }
01290 
01294         function getSrc() {
01295                 return $this->src;
01296         }
01297 
01301         function getSha1() {
01302                 if ( $this->sha1base36 ) {
01303                         return wfBaseConvert( $this->sha1base36, 36, 16 );
01304                 }
01305                 return false;
01306         }
01307 
01311         function getFileSrc() {
01312                 return $this->fileSrc;
01313         }
01314 
01318         function isTempSrc() {
01319                 return $this->isTemp;
01320         }
01321 
01325         function getFilename() {
01326                 return $this->filename;
01327         }
01328 
01332         function getArchiveName() {
01333                 return $this->archiveName;
01334         }
01335 
01339         function getSize() {
01340                 return $this->size;
01341         }
01342 
01346         function getType() {
01347                 return $this->type;
01348         }
01349 
01353         function getAction() {
01354                 return $this->action;
01355         }
01356 
01360         function getParams() {
01361                 return $this->params;
01362         }
01363 
01367         function importOldRevision() {
01368                 $dbw = wfGetDB( DB_MASTER );
01369 
01370                 # Sneak a single revision into place
01371                 $user = User::newFromName( $this->getUser() );
01372                 if( $user ) {
01373                         $userId = intval( $user->getId() );
01374                         $userText = $user->getName();
01375                         $userObj = $user;
01376                 } else {
01377                         $userId = 0;
01378                         $userText = $this->getUser();
01379                         $userObj = new User;
01380                 }
01381 
01382                 // avoid memory leak...?
01383                 $linkCache = LinkCache::singleton();
01384                 $linkCache->clear();
01385 
01386                 $page = WikiPage::factory( $this->title );
01387                 if( !$page->exists() ) {
01388                         # must create the page...
01389                         $pageId = $page->insertOn( $dbw );
01390                         $created = true;
01391                         $oldcountable = null;
01392                 } else {
01393                         $pageId = $page->getId();
01394                         $created = false;
01395 
01396                         $prior = $dbw->selectField( 'revision', '1',
01397                                 array( 'rev_page' => $pageId,
01398                                         'rev_timestamp' => $dbw->timestamp( $this->timestamp ),
01399                                         'rev_user_text' => $userText,
01400                                         'rev_comment' => $this->getComment() ),
01401                                 __METHOD__
01402                         );
01403                         if( $prior ) {
01404                                 // @todo FIXME: This could fail slightly for multiple matches :P
01405                                 wfDebug( __METHOD__ . ": skipping existing revision for [[" .
01406                                         $this->title->getPrefixedText() . "]], timestamp " . $this->timestamp . "\n" );
01407                                 return false;
01408                         }
01409                         $oldcountable = $page->isCountable();
01410                 }
01411 
01412                 # @todo FIXME: Use original rev_id optionally (better for backups)
01413                 # Insert the row
01414                 $revision = new Revision( array(
01415                         'title' => $this->title,
01416                         'page' => $pageId,
01417                         'content_model' => $this->getModel(),
01418                         'content_format' => $this->getFormat(),
01419                         'text' => $this->getContent()->serialize( $this->getFormat() ), //XXX: just set 'content' => $this->getContent()?
01420                         'comment' => $this->getComment(),
01421                         'user' => $userId,
01422                         'user_text' => $userText,
01423                         'timestamp' => $this->timestamp,
01424                         'minor_edit' => $this->minor,
01425                         ) );
01426                 $revision->insertOn( $dbw );
01427                 $changed = $page->updateIfNewerOn( $dbw, $revision );
01428 
01429                 if ( $changed !== false && !$this->mNoUpdates ) {
01430                         wfDebug( __METHOD__ . ": running updates\n" );
01431                         $page->doEditUpdates( $revision, $userObj, array( 'created' => $created, 'oldcountable' => $oldcountable ) );
01432                 }
01433 
01434                 return true;
01435         }
01436 
01440         function importLogItem() {
01441                 $dbw = wfGetDB( DB_MASTER );
01442                 # @todo FIXME: This will not record autoblocks
01443                 if( !$this->getTitle() ) {
01444                         wfDebug( __METHOD__ . ": skipping invalid {$this->type}/{$this->action} log time, timestamp " .
01445                                 $this->timestamp . "\n" );
01446                         return;
01447                 }
01448                 # Check if it exists already
01449                 // @todo FIXME: Use original log ID (better for backups)
01450                 $prior = $dbw->selectField( 'logging', '1',
01451                         array( 'log_type' => $this->getType(),
01452                                 'log_action' => $this->getAction(),
01453                                 'log_timestamp' => $dbw->timestamp( $this->timestamp ),
01454                                 'log_namespace' => $this->getTitle()->getNamespace(),
01455                                 'log_title' => $this->getTitle()->getDBkey(),
01456                                 'log_comment' => $this->getComment(),
01457                                 #'log_user_text' => $this->user_text,
01458                                 'log_params' => $this->params ),
01459                         __METHOD__
01460                 );
01461                 // @todo FIXME: This could fail slightly for multiple matches :P
01462                 if( $prior ) {
01463                         wfDebug( __METHOD__ . ": skipping existing item for Log:{$this->type}/{$this->action}, timestamp " .
01464                                 $this->timestamp . "\n" );
01465                         return;
01466                 }
01467                 $log_id = $dbw->nextSequenceValue( 'logging_log_id_seq' );
01468                 $data = array(
01469                         'log_id' => $log_id,
01470                         'log_type' => $this->type,
01471                         'log_action' => $this->action,
01472                         'log_timestamp' => $dbw->timestamp( $this->timestamp ),
01473                         'log_user' => User::idFromName( $this->user_text ),
01474                         #'log_user_text' => $this->user_text,
01475                         'log_namespace' => $this->getTitle()->getNamespace(),
01476                         'log_title' => $this->getTitle()->getDBkey(),
01477                         'log_comment' => $this->getComment(),
01478                         'log_params' => $this->params
01479                 );
01480                 $dbw->insert( 'logging', $data, __METHOD__ );
01481         }
01482 
01486         function importUpload() {
01487                 # Construct a file
01488                 $archiveName = $this->getArchiveName();
01489                 if ( $archiveName ) {
01490                         wfDebug( __METHOD__ . "Importing archived file as $archiveName\n" );
01491                         $file = OldLocalFile::newFromArchiveName( $this->getTitle(),
01492                                 RepoGroup::singleton()->getLocalRepo(), $archiveName );
01493                 } else {
01494                         $file = wfLocalFile( $this->getTitle() );
01495                         wfDebug( __METHOD__ . 'Importing new file as ' . $file->getName() . "\n" );
01496                         if ( $file->exists() && $file->getTimestamp() > $this->getTimestamp() ) {
01497                                 $archiveName = $file->getTimestamp() . '!' . $file->getName();
01498                                 $file = OldLocalFile::newFromArchiveName( $this->getTitle(),
01499                                         RepoGroup::singleton()->getLocalRepo(), $archiveName );
01500                                 wfDebug( __METHOD__ . "File already exists; importing as $archiveName\n" );
01501                         }
01502                 }
01503                 if( !$file ) {
01504                         wfDebug( __METHOD__ . ': Bad file for ' . $this->getTitle() . "\n" );
01505                         return false;
01506                 }
01507 
01508                 # Get the file source or download if necessary
01509                 $source = $this->getFileSrc();
01510                 $flags = $this->isTempSrc() ? File::DELETE_SOURCE : 0;
01511                 if ( !$source ) {
01512                         $source = $this->downloadSource();
01513                         $flags |= File::DELETE_SOURCE;
01514                 }
01515                 if( !$source ) {
01516                         wfDebug( __METHOD__ . ": Could not fetch remote file.\n" );
01517                         return false;
01518                 }
01519                 $sha1 = $this->getSha1();
01520                 if ( $sha1 && ( $sha1 !== sha1_file( $source ) ) ) {
01521                         if ( $flags & File::DELETE_SOURCE ) {
01522                                 # Broken file; delete it if it is a temporary file
01523                                 unlink( $source );
01524                         }
01525                         wfDebug( __METHOD__ . ": Corrupt file $source.\n" );
01526                         return false;
01527                 }
01528 
01529                 $user = User::newFromName( $this->user_text );
01530 
01531                 # Do the actual upload
01532                 if ( $archiveName ) {
01533                         $status = $file->uploadOld( $source, $archiveName,
01534                                 $this->getTimestamp(), $this->getComment(), $user, $flags );
01535                 } else {
01536                         $status = $file->upload( $source, $this->getComment(), $this->getComment(),
01537                                 $flags, false, $this->getTimestamp(), $user );
01538                 }
01539 
01540                 if ( $status->isGood() ) {
01541                         wfDebug( __METHOD__ . ": Successful\n" );
01542                         return true;
01543                 } else {
01544                         wfDebug( __METHOD__ . ': failed: ' . $status->getXml() . "\n" );
01545                         return false;
01546                 }
01547         }
01548 
01552         function downloadSource() {
01553                 global $wgEnableUploads;
01554                 if( !$wgEnableUploads ) {
01555                         return false;
01556                 }
01557 
01558                 $tempo = tempnam( wfTempDir(), 'download' );
01559                 $f = fopen( $tempo, 'wb' );
01560                 if( !$f ) {
01561                         wfDebug( "IMPORT: couldn't write to temp file $tempo\n" );
01562                         return false;
01563                 }
01564 
01565                 // @todo FIXME!
01566                 $src = $this->getSrc();
01567                 $data = Http::get( $src );
01568                 if( !$data ) {
01569                         wfDebug( "IMPORT: couldn't fetch source $src\n" );
01570                         fclose( $f );
01571                         unlink( $tempo );
01572                         return false;
01573                 }
01574 
01575                 fwrite( $f, $data );
01576                 fclose( $f );
01577 
01578                 return $tempo;
01579         }
01580 
01581 }
01582 
01587 class ImportStringSource {
01588         function __construct( $string ) {
01589                 $this->mString = $string;
01590                 $this->mRead = false;
01591         }
01592 
01596         function atEnd() {
01597                 return $this->mRead;
01598         }
01599 
01603         function readChunk() {
01604                 if( $this->atEnd() ) {
01605                         return false;
01606                 }
01607                 $this->mRead = true;
01608                 return $this->mString;
01609         }
01610 }
01611 
01616 class ImportStreamSource {
01617         function __construct( $handle ) {
01618                 $this->mHandle = $handle;
01619         }
01620 
01624         function atEnd() {
01625                 return feof( $this->mHandle );
01626         }
01627 
01631         function readChunk() {
01632                 return fread( $this->mHandle, 32768 );
01633         }
01634 
01639         static function newFromFile( $filename ) {
01640                 wfSuppressWarnings();
01641                 $file = fopen( $filename, 'rt' );
01642                 wfRestoreWarnings();
01643                 if( !$file ) {
01644                         return Status::newFatal( "importcantopen" );
01645                 }
01646                 return Status::newGood( new ImportStreamSource( $file ) );
01647         }
01648 
01653         static function newFromUpload( $fieldname = "xmlimport" ) {
01654                 $upload =& $_FILES[$fieldname];
01655 
01656                 if( $upload === null || !$upload['name'] ) {
01657                         return Status::newFatal( 'importnofile' );
01658                 }
01659                 if( !empty( $upload['error'] ) ) {
01660                         switch( $upload['error'] ) {
01661                                 case 1: # The uploaded file exceeds the upload_max_filesize directive in php.ini.
01662                                         return Status::newFatal( 'importuploaderrorsize' );
01663                                 case 2: # The uploaded file exceeds the MAX_FILE_SIZE directive that was specified in the HTML form.
01664                                         return Status::newFatal( 'importuploaderrorsize' );
01665                                 case 3: # The uploaded file was only partially uploaded
01666                                         return Status::newFatal( 'importuploaderrorpartial' );
01667                                 case 6: #Missing a temporary folder.
01668                                         return Status::newFatal( 'importuploaderrortemp' );
01669                                 # case else: # Currently impossible
01670                         }
01671 
01672                 }
01673                 $fname = $upload['tmp_name'];
01674                 if( is_uploaded_file( $fname ) ) {
01675                         return ImportStreamSource::newFromFile( $fname );
01676                 } else {
01677                         return Status::newFatal( 'importnofile' );
01678                 }
01679         }
01680 
01686         static function newFromURL( $url, $method = 'GET' ) {
01687                 wfDebug( __METHOD__ . ": opening $url\n" );
01688                 # Use the standard HTTP fetch function; it times out
01689                 # quicker and sorts out user-agent problems which might
01690                 # otherwise prevent importing from large sites, such
01691                 # as the Wikimedia cluster, etc.
01692                 $data = Http::request( $method, $url, array( 'followRedirects' => true ) );
01693                 if( $data !== false ) {
01694                         $file = tmpfile();
01695                         fwrite( $file, $data );
01696                         fflush( $file );
01697                         fseek( $file, 0 );
01698                         return Status::newGood( new ImportStreamSource( $file ) );
01699                 } else {
01700                         return Status::newFatal( 'importcantopen' );
01701                 }
01702         }
01703 
01712         public static function newFromInterwiki( $interwiki, $page, $history = false, $templates = false, $pageLinkDepth = 0 ) {
01713                 if( $page == '' ) {
01714                         return Status::newFatal( 'import-noarticle' );
01715                 }
01716                 $link = Title::newFromText( "$interwiki:Special:Export/$page" );
01717                 if( is_null( $link ) || $link->getInterwiki() == '' ) {
01718                         return Status::newFatal( 'importbadinterwiki' );
01719                 } else {
01720                         $params = array();
01721                         if ( $history ) $params['history'] = 1;
01722                         if ( $templates ) $params['templates'] = 1;
01723                         if ( $pageLinkDepth ) $params['pagelink-depth'] = $pageLinkDepth;
01724                         $url = $link->getFullUrl( $params );
01725                         # For interwikis, use POST to avoid redirects.
01726                         return ImportStreamSource::newFromURL( $url, "POST" );
01727                 }
01728         }
01729 }