MediaWiki  REL1_24
Import.php
Go to the documentation of this file.
00001 <?php
00033 class WikiImporter {
00034     private $reader = null;
00035     private $mLogItemCallback, $mUploadCallback, $mRevisionCallback, $mPageCallback;
00036     private $mSiteInfoCallback, $mTargetNamespace, $mTargetRootPage, $mPageOutCallback;
00037     private $mNoticeCallback, $mDebug;
00038     private $mImportUploads, $mImageBasePath;
00039     private $mNoUpdates = false;
00040 
00045     function __construct( ImportStreamSource $source ) {
00046         $this->reader = new XMLReader();
00047 
00048         if ( !in_array( 'uploadsource', stream_get_wrappers() ) ) {
00049             stream_wrapper_register( 'uploadsource', 'UploadSourceAdapter' );
00050         }
00051         $id = UploadSourceAdapter::registerSource( $source );
00052         if ( defined( 'LIBXML_PARSEHUGE' ) ) {
00053             $this->reader->open( "uploadsource://$id", null, LIBXML_PARSEHUGE );
00054         } else {
00055             $this->reader->open( "uploadsource://$id" );
00056         }
00057 
00058         // Default callbacks
00059         $this->setRevisionCallback( array( $this, "importRevision" ) );
00060         $this->setUploadCallback( array( $this, 'importUpload' ) );
00061         $this->setLogItemCallback( array( $this, 'importLogItem' ) );
00062         $this->setPageOutCallback( array( $this, 'finishImportPage' ) );
00063     }
00064 
00068     public function getReader() {
00069         return $this->reader;
00070     }
00071 
00072     public function throwXmlError( $err ) {
00073         $this->debug( "FAILURE: $err" );
00074         wfDebug( "WikiImporter XML error: $err\n" );
00075     }
00076 
00077     public function debug( $data ) {
00078         if ( $this->mDebug ) {
00079             wfDebug( "IMPORT: $data\n" );
00080         }
00081     }
00082 
00083     public function warn( $data ) {
00084         wfDebug( "IMPORT: $data\n" );
00085     }
00086 
00087     public function notice( $msg /*, $param, ...*/ ) {
00088         $params = func_get_args();
00089         array_shift( $params );
00090 
00091         if ( is_callable( $this->mNoticeCallback ) ) {
00092             call_user_func( $this->mNoticeCallback, $msg, $params );
00093         } else { # No ImportReporter -> CLI
00094             echo wfMessage( $msg, $params )->text() . "\n";
00095         }
00096     }
00097 
00102     function setDebug( $debug ) {
00103         $this->mDebug = $debug;
00104     }
00105 
00110     function setNoUpdates( $noupdates ) {
00111         $this->mNoUpdates = $noupdates;
00112     }
00113 
00120     public function setNoticeCallback( $callback ) {
00121         return wfSetVar( $this->mNoticeCallback, $callback );
00122     }
00123 
00129     public function setPageCallback( $callback ) {
00130         $previous = $this->mPageCallback;
00131         $this->mPageCallback = $callback;
00132         return $previous;
00133     }
00134 
00144     public function setPageOutCallback( $callback ) {
00145         $previous = $this->mPageOutCallback;
00146         $this->mPageOutCallback = $callback;
00147         return $previous;
00148     }
00149 
00155     public function setRevisionCallback( $callback ) {
00156         $previous = $this->mRevisionCallback;
00157         $this->mRevisionCallback = $callback;
00158         return $previous;
00159     }
00160 
00166     public function setUploadCallback( $callback ) {
00167         $previous = $this->mUploadCallback;
00168         $this->mUploadCallback = $callback;
00169         return $previous;
00170     }
00171 
00177     public function setLogItemCallback( $callback ) {
00178         $previous = $this->mLogItemCallback;
00179         $this->mLogItemCallback = $callback;
00180         return $previous;
00181     }
00182 
00188     public function setSiteInfoCallback( $callback ) {
00189         $previous = $this->mSiteInfoCallback;
00190         $this->mSiteInfoCallback = $callback;
00191         return $previous;
00192     }
00193 
00199     public function setTargetNamespace( $namespace ) {
00200         if ( is_null( $namespace ) ) {
00201             // Don't override namespaces
00202             $this->mTargetNamespace = null;
00203         } elseif ( $namespace >= 0 ) {
00204             // @todo FIXME: Check for validity
00205             $this->mTargetNamespace = intval( $namespace );
00206         } else {
00207             return false;
00208         }
00209     }
00210 
00216     public function setTargetRootPage( $rootpage ) {
00217         $status = Status::newGood();
00218         if ( is_null( $rootpage ) ) {
00219             // No rootpage
00220             $this->mTargetRootPage = null;
00221         } elseif ( $rootpage !== '' ) {
00222             $rootpage = rtrim( $rootpage, '/' ); //avoid double slashes
00223             $title = Title::newFromText( $rootpage, !is_null( $this->mTargetNamespace )
00224                 ? $this->mTargetNamespace
00225                 : NS_MAIN
00226             );
00227 
00228             if ( !$title || $title->isExternal() ) {
00229                 $status->fatal( 'import-rootpage-invalid' );
00230             } else {
00231                 if ( !MWNamespace::hasSubpages( $title->getNamespace() ) ) {
00232                     global $wgContLang;
00233 
00234                     $displayNSText = $title->getNamespace() == NS_MAIN
00235                         ? wfMessage( 'blanknamespace' )->text()
00236                         : $wgContLang->getNsText( $title->getNamespace() );
00237                     $status->fatal( 'import-rootpage-nosubpage', $displayNSText );
00238                 } else {
00239                     // set namespace to 'all', so the namespace check in processTitle() can passed
00240                     $this->setTargetNamespace( null );
00241                     $this->mTargetRootPage = $title->getPrefixedDBkey();
00242                 }
00243             }
00244         }
00245         return $status;
00246     }
00247 
00251     public function setImageBasePath( $dir ) {
00252         $this->mImageBasePath = $dir;
00253     }
00254 
00258     public function setImportUploads( $import ) {
00259         $this->mImportUploads = $import;
00260     }
00261 
00267     public function importRevision( $revision ) {
00268         if ( !$revision->getContentHandler()->canBeUsedOn( $revision->getTitle() ) ) {
00269             $this->notice( 'import-error-bad-location',
00270                 $revision->getTitle()->getPrefixedText(),
00271                 $revision->getID(),
00272                 $revision->getModel(),
00273                 $revision->getFormat() );
00274 
00275             return false;
00276         }
00277 
00278         try {
00279             $dbw = wfGetDB( DB_MASTER );
00280             return $dbw->deadlockLoop( array( $revision, 'importOldRevision' ) );
00281         } catch ( MWContentSerializationException $ex ) {
00282             $this->notice( 'import-error-unserialize',
00283                 $revision->getTitle()->getPrefixedText(),
00284                 $revision->getID(),
00285                 $revision->getModel(),
00286                 $revision->getFormat() );
00287         }
00288 
00289         return false;
00290     }
00291 
00297     public function importLogItem( $revision ) {
00298         $dbw = wfGetDB( DB_MASTER );
00299         return $dbw->deadlockLoop( array( $revision, 'importLogItem' ) );
00300     }
00301 
00307     public function importUpload( $revision ) {
00308         $dbw = wfGetDB( DB_MASTER );
00309         return $dbw->deadlockLoop( array( $revision, 'importUpload' ) );
00310     }
00311 
00321     public function finishImportPage( $title, $origTitle, $revCount, $sRevCount, $pageInfo ) {
00322         $args = func_get_args();
00323         return wfRunHooks( 'AfterImportPage', $args );
00324     }
00325 
00330     public function debugRevisionHandler( &$revision ) {
00331         $this->debug( "Got revision:" );
00332         if ( is_object( $revision->title ) ) {
00333             $this->debug( "-- Title: " . $revision->title->getPrefixedText() );
00334         } else {
00335             $this->debug( "-- Title: <invalid>" );
00336         }
00337         $this->debug( "-- User: " . $revision->user_text );
00338         $this->debug( "-- Timestamp: " . $revision->timestamp );
00339         $this->debug( "-- Comment: " . $revision->comment );
00340         $this->debug( "-- Text: " . $revision->text );
00341     }
00342 
00347     function pageCallback( $title ) {
00348         if ( isset( $this->mPageCallback ) ) {
00349             call_user_func( $this->mPageCallback, $title );
00350         }
00351     }
00352 
00361     private function pageOutCallback( $title, $origTitle, $revCount, $sucCount, $pageInfo ) {
00362         if ( isset( $this->mPageOutCallback ) ) {
00363             $args = func_get_args();
00364             call_user_func_array( $this->mPageOutCallback, $args );
00365         }
00366     }
00367 
00373     private function revisionCallback( $revision ) {
00374         if ( isset( $this->mRevisionCallback ) ) {
00375             return call_user_func_array( $this->mRevisionCallback,
00376                     array( $revision, $this ) );
00377         } else {
00378             return false;
00379         }
00380     }
00381 
00387     private function logItemCallback( $revision ) {
00388         if ( isset( $this->mLogItemCallback ) ) {
00389             return call_user_func_array( $this->mLogItemCallback,
00390                     array( $revision, $this ) );
00391         } else {
00392             return false;
00393         }
00394     }
00395 
00401     public function nodeAttribute( $attr ) {
00402         return $this->reader->getAttribute( $attr );
00403     }
00404 
00412     public function nodeContents() {
00413         if ( $this->reader->isEmptyElement ) {
00414             return "";
00415         }
00416         $buffer = "";
00417         while ( $this->reader->read() ) {
00418             switch ( $this->reader->nodeType ) {
00419             case XmlReader::TEXT:
00420             case XmlReader::SIGNIFICANT_WHITESPACE:
00421                 $buffer .= $this->reader->value;
00422                 break;
00423             case XmlReader::END_ELEMENT:
00424                 return $buffer;
00425             }
00426         }
00427 
00428         $this->reader->close();
00429         return '';
00430     }
00431 
00437     public function doImport() {
00438         // Calls to reader->read need to be wrapped in calls to
00439         // libxml_disable_entity_loader() to avoid local file
00440         // inclusion attacks (bug 46932).
00441         $oldDisable = libxml_disable_entity_loader( true );
00442         $this->reader->read();
00443 
00444         if ( $this->reader->name != 'mediawiki' ) {
00445             libxml_disable_entity_loader( $oldDisable );
00446             throw new MWException( "Expected <mediawiki> tag, got " .
00447                 $this->reader->name );
00448         }
00449         $this->debug( "<mediawiki> tag is correct." );
00450 
00451         $this->debug( "Starting primary dump processing loop." );
00452 
00453         $keepReading = $this->reader->read();
00454         $skip = false;
00455         while ( $keepReading ) {
00456             $tag = $this->reader->name;
00457             $type = $this->reader->nodeType;
00458 
00459             if ( !wfRunHooks( 'ImportHandleToplevelXMLTag', array( $this ) ) ) {
00460                 // Do nothing
00461             } elseif ( $tag == 'mediawiki' && $type == XmlReader::END_ELEMENT ) {
00462                 break;
00463             } elseif ( $tag == 'siteinfo' ) {
00464                 $this->handleSiteInfo();
00465             } elseif ( $tag == 'page' ) {
00466                 $this->handlePage();
00467             } elseif ( $tag == 'logitem' ) {
00468                 $this->handleLogItem();
00469             } elseif ( $tag != '#text' ) {
00470                 $this->warn( "Unhandled top-level XML tag $tag" );
00471 
00472                 $skip = true;
00473             }
00474 
00475             if ( $skip ) {
00476                 $keepReading = $this->reader->next();
00477                 $skip = false;
00478                 $this->debug( "Skip" );
00479             } else {
00480                 $keepReading = $this->reader->read();
00481             }
00482         }
00483 
00484         libxml_disable_entity_loader( $oldDisable );
00485         return true;
00486     }
00487 
00492     private function handleSiteInfo() {
00493         // Site info is useful, but not actually used for dump imports.
00494         // Includes a quick short-circuit to save performance.
00495         if ( !$this->mSiteInfoCallback ) {
00496             $this->reader->next();
00497             return true;
00498         }
00499         throw new MWException( "SiteInfo tag is not yet handled, do not set mSiteInfoCallback" );
00500     }
00501 
00502     private function handleLogItem() {
00503         $this->debug( "Enter log item handler." );
00504         $logInfo = array();
00505 
00506         // Fields that can just be stuffed in the pageInfo object
00507         $normalFields = array( 'id', 'comment', 'type', 'action', 'timestamp',
00508                     'logtitle', 'params' );
00509 
00510         while ( $this->reader->read() ) {
00511             if ( $this->reader->nodeType == XmlReader::END_ELEMENT &&
00512                     $this->reader->name == 'logitem' ) {
00513                 break;
00514             }
00515 
00516             $tag = $this->reader->name;
00517 
00518             if ( !wfRunHooks( 'ImportHandleLogItemXMLTag', array(
00519                 $this, $logInfo
00520             ) ) ) {
00521                 // Do nothing
00522             } elseif ( in_array( $tag, $normalFields ) ) {
00523                 $logInfo[$tag] = $this->nodeContents();
00524             } elseif ( $tag == 'contributor' ) {
00525                 $logInfo['contributor'] = $this->handleContributor();
00526             } elseif ( $tag != '#text' ) {
00527                 $this->warn( "Unhandled log-item XML tag $tag" );
00528             }
00529         }
00530 
00531         $this->processLogItem( $logInfo );
00532     }
00533 
00538     private function processLogItem( $logInfo ) {
00539         $revision = new WikiRevision;
00540 
00541         $revision->setID( $logInfo['id'] );
00542         $revision->setType( $logInfo['type'] );
00543         $revision->setAction( $logInfo['action'] );
00544         $revision->setTimestamp( $logInfo['timestamp'] );
00545         $revision->setParams( $logInfo['params'] );
00546         $revision->setTitle( Title::newFromText( $logInfo['logtitle'] ) );
00547         $revision->setNoUpdates( $this->mNoUpdates );
00548 
00549         if ( isset( $logInfo['comment'] ) ) {
00550             $revision->setComment( $logInfo['comment'] );
00551         }
00552 
00553         if ( isset( $logInfo['contributor']['ip'] ) ) {
00554             $revision->setUserIP( $logInfo['contributor']['ip'] );
00555         }
00556         if ( isset( $logInfo['contributor']['username'] ) ) {
00557             $revision->setUserName( $logInfo['contributor']['username'] );
00558         }
00559 
00560         return $this->logItemCallback( $revision );
00561     }
00562 
00563     private function handlePage() {
00564         // Handle page data.
00565         $this->debug( "Enter page handler." );
00566         $pageInfo = array( 'revisionCount' => 0, 'successfulRevisionCount' => 0 );
00567 
00568         // Fields that can just be stuffed in the pageInfo object
00569         $normalFields = array( 'title', 'id', 'redirect', 'restrictions' );
00570 
00571         $skip = false;
00572         $badTitle = false;
00573 
00574         while ( $skip ? $this->reader->next() : $this->reader->read() ) {
00575             if ( $this->reader->nodeType == XmlReader::END_ELEMENT &&
00576                     $this->reader->name == 'page' ) {
00577                 break;
00578             }
00579 
00580             $tag = $this->reader->name;
00581 
00582             if ( $badTitle ) {
00583                 // The title is invalid, bail out of this page
00584                 $skip = true;
00585             } elseif ( !wfRunHooks( 'ImportHandlePageXMLTag', array( $this,
00586                         &$pageInfo ) ) ) {
00587                 // Do nothing
00588             } elseif ( in_array( $tag, $normalFields ) ) {
00589                 // An XML snippet:
00590                 // <page>
00591                 //     <id>123</id>
00592                 //     <title>Page</title>
00593                 //     <redirect title="NewTitle"/>
00594                 //     ...
00595                 // Because the redirect tag is built differently, we need special handling for that case.
00596                 if ( $tag == 'redirect' ) {
00597                     $pageInfo[$tag] = $this->nodeAttribute( 'title' );
00598                 } else {
00599                     $pageInfo[$tag] = $this->nodeContents();
00600                     if ( $tag == 'title' ) {
00601                         $title = $this->processTitle( $pageInfo['title'] );
00602 
00603                         if ( !$title ) {
00604                             $badTitle = true;
00605                             $skip = true;
00606                         }
00607 
00608                         $this->pageCallback( $title );
00609                         list( $pageInfo['_title'], $origTitle ) = $title;
00610                     }
00611                 }
00612             } elseif ( $tag == 'revision' ) {
00613                 $this->handleRevision( $pageInfo );
00614             } elseif ( $tag == 'upload' ) {
00615                 $this->handleUpload( $pageInfo );
00616             } elseif ( $tag != '#text' ) {
00617                 $this->warn( "Unhandled page XML tag $tag" );
00618                 $skip = true;
00619             }
00620         }
00621 
00622         $this->pageOutCallback( $pageInfo['_title'], $origTitle,
00623                     $pageInfo['revisionCount'],
00624                     $pageInfo['successfulRevisionCount'],
00625                     $pageInfo );
00626     }
00627 
00631     private function handleRevision( &$pageInfo ) {
00632         $this->debug( "Enter revision handler" );
00633         $revisionInfo = array();
00634 
00635         $normalFields = array( 'id', 'timestamp', 'comment', 'minor', 'model', 'format', 'text' );
00636 
00637         $skip = false;
00638 
00639         while ( $skip ? $this->reader->next() : $this->reader->read() ) {
00640             if ( $this->reader->nodeType == XmlReader::END_ELEMENT &&
00641                     $this->reader->name == 'revision' ) {
00642                 break;
00643             }
00644 
00645             $tag = $this->reader->name;
00646 
00647             if ( !wfRunHooks( 'ImportHandleRevisionXMLTag', array(
00648                 $this, $pageInfo, $revisionInfo
00649             ) ) ) {
00650                 // Do nothing
00651             } elseif ( in_array( $tag, $normalFields ) ) {
00652                 $revisionInfo[$tag] = $this->nodeContents();
00653             } elseif ( $tag == 'contributor' ) {
00654                 $revisionInfo['contributor'] = $this->handleContributor();
00655             } elseif ( $tag != '#text' ) {
00656                 $this->warn( "Unhandled revision XML tag $tag" );
00657                 $skip = true;
00658             }
00659         }
00660 
00661         $pageInfo['revisionCount']++;
00662         if ( $this->processRevision( $pageInfo, $revisionInfo ) ) {
00663             $pageInfo['successfulRevisionCount']++;
00664         }
00665     }
00666 
00672     private function processRevision( $pageInfo, $revisionInfo ) {
00673         $revision = new WikiRevision;
00674 
00675         if ( isset( $revisionInfo['id'] ) ) {
00676             $revision->setID( $revisionInfo['id'] );
00677         }
00678         if ( isset( $revisionInfo['model'] ) ) {
00679             $revision->setModel( $revisionInfo['model'] );
00680         }
00681         if ( isset( $revisionInfo['format'] ) ) {
00682             $revision->setFormat( $revisionInfo['format'] );
00683         }
00684         $revision->setTitle( $pageInfo['_title'] );
00685 
00686         if ( isset( $revisionInfo['text'] ) ) {
00687             $handler = $revision->getContentHandler();
00688             $text = $handler->importTransform(
00689                 $revisionInfo['text'],
00690                 $revision->getFormat() );
00691 
00692             $revision->setText( $text );
00693         }
00694         if ( isset( $revisionInfo['timestamp'] ) ) {
00695             $revision->setTimestamp( $revisionInfo['timestamp'] );
00696         } else {
00697             $revision->setTimestamp( wfTimestampNow() );
00698         }
00699 
00700         if ( isset( $revisionInfo['comment'] ) ) {
00701             $revision->setComment( $revisionInfo['comment'] );
00702         }
00703 
00704         if ( isset( $revisionInfo['minor'] ) ) {
00705             $revision->setMinor( true );
00706         }
00707         if ( isset( $revisionInfo['contributor']['ip'] ) ) {
00708             $revision->setUserIP( $revisionInfo['contributor']['ip'] );
00709         }
00710         if ( isset( $revisionInfo['contributor']['username'] ) ) {
00711             $revision->setUserName( $revisionInfo['contributor']['username'] );
00712         }
00713         $revision->setNoUpdates( $this->mNoUpdates );
00714 
00715         return $this->revisionCallback( $revision );
00716     }
00717 
00722     private function handleUpload( &$pageInfo ) {
00723         $this->debug( "Enter upload handler" );
00724         $uploadInfo = array();
00725 
00726         $normalFields = array( 'timestamp', 'comment', 'filename', 'text',
00727                     'src', 'size', 'sha1base36', 'archivename', 'rel' );
00728 
00729         $skip = false;
00730 
00731         while ( $skip ? $this->reader->next() : $this->reader->read() ) {
00732             if ( $this->reader->nodeType == XmlReader::END_ELEMENT &&
00733                     $this->reader->name == 'upload' ) {
00734                 break;
00735             }
00736 
00737             $tag = $this->reader->name;
00738 
00739             if ( !wfRunHooks( 'ImportHandleUploadXMLTag', array(
00740                 $this, $pageInfo
00741             ) ) ) {
00742                 // Do nothing
00743             } elseif ( in_array( $tag, $normalFields ) ) {
00744                 $uploadInfo[$tag] = $this->nodeContents();
00745             } elseif ( $tag == 'contributor' ) {
00746                 $uploadInfo['contributor'] = $this->handleContributor();
00747             } elseif ( $tag == 'contents' ) {
00748                 $contents = $this->nodeContents();
00749                 $encoding = $this->reader->getAttribute( 'encoding' );
00750                 if ( $encoding === 'base64' ) {
00751                     $uploadInfo['fileSrc'] = $this->dumpTemp( base64_decode( $contents ) );
00752                     $uploadInfo['isTempSrc'] = true;
00753                 }
00754             } elseif ( $tag != '#text' ) {
00755                 $this->warn( "Unhandled upload XML tag $tag" );
00756                 $skip = true;
00757             }
00758         }
00759 
00760         if ( $this->mImageBasePath && isset( $uploadInfo['rel'] ) ) {
00761             $path = "{$this->mImageBasePath}/{$uploadInfo['rel']}";
00762             if ( file_exists( $path ) ) {
00763                 $uploadInfo['fileSrc'] = $path;
00764                 $uploadInfo['isTempSrc'] = false;
00765             }
00766         }
00767 
00768         if ( $this->mImportUploads ) {
00769             return $this->processUpload( $pageInfo, $uploadInfo );
00770         }
00771     }
00772 
00777     private function dumpTemp( $contents ) {
00778         $filename = tempnam( wfTempDir(), 'importupload' );
00779         file_put_contents( $filename, $contents );
00780         return $filename;
00781     }
00782 
00788     private function processUpload( $pageInfo, $uploadInfo ) {
00789         $revision = new WikiRevision;
00790         $text = isset( $uploadInfo['text'] ) ? $uploadInfo['text'] : '';
00791 
00792         $revision->setTitle( $pageInfo['_title'] );
00793         $revision->setID( $pageInfo['id'] );
00794         $revision->setTimestamp( $uploadInfo['timestamp'] );
00795         $revision->setText( $text );
00796         $revision->setFilename( $uploadInfo['filename'] );
00797         if ( isset( $uploadInfo['archivename'] ) ) {
00798             $revision->setArchiveName( $uploadInfo['archivename'] );
00799         }
00800         $revision->setSrc( $uploadInfo['src'] );
00801         if ( isset( $uploadInfo['fileSrc'] ) ) {
00802             $revision->setFileSrc( $uploadInfo['fileSrc'],
00803                 !empty( $uploadInfo['isTempSrc'] ) );
00804         }
00805         if ( isset( $uploadInfo['sha1base36'] ) ) {
00806             $revision->setSha1Base36( $uploadInfo['sha1base36'] );
00807         }
00808         $revision->setSize( intval( $uploadInfo['size'] ) );
00809         $revision->setComment( $uploadInfo['comment'] );
00810 
00811         if ( isset( $uploadInfo['contributor']['ip'] ) ) {
00812             $revision->setUserIP( $uploadInfo['contributor']['ip'] );
00813         }
00814         if ( isset( $uploadInfo['contributor']['username'] ) ) {
00815             $revision->setUserName( $uploadInfo['contributor']['username'] );
00816         }
00817         $revision->setNoUpdates( $this->mNoUpdates );
00818 
00819         return call_user_func( $this->mUploadCallback, $revision );
00820     }
00821 
00825     private function handleContributor() {
00826         $fields = array( 'id', 'ip', 'username' );
00827         $info = array();
00828 
00829         while ( $this->reader->read() ) {
00830             if ( $this->reader->nodeType == XmlReader::END_ELEMENT &&
00831                     $this->reader->name == 'contributor' ) {
00832                 break;
00833             }
00834 
00835             $tag = $this->reader->name;
00836 
00837             if ( in_array( $tag, $fields ) ) {
00838                 $info[$tag] = $this->nodeContents();
00839             }
00840         }
00841 
00842         return $info;
00843     }
00844 
00849     private function processTitle( $text ) {
00850         global $wgCommandLineMode;
00851 
00852         $workTitle = $text;
00853         $origTitle = Title::newFromText( $workTitle );
00854 
00855         if ( !is_null( $this->mTargetNamespace ) && !is_null( $origTitle ) ) {
00856             # makeTitleSafe, because $origTitle can have a interwiki (different setting of interwiki map)
00857             # and than dbKey can begin with a lowercase char
00858             $title = Title::makeTitleSafe( $this->mTargetNamespace,
00859                 $origTitle->getDBkey() );
00860         } else {
00861             if ( !is_null( $this->mTargetRootPage ) ) {
00862                 $workTitle = $this->mTargetRootPage . '/' . $workTitle;
00863             }
00864             $title = Title::newFromText( $workTitle );
00865         }
00866 
00867         if ( is_null( $title ) ) {
00868             # Invalid page title? Ignore the page
00869             $this->notice( 'import-error-invalid', $workTitle );
00870             return false;
00871         } elseif ( $title->isExternal() ) {
00872             $this->notice( 'import-error-interwiki', $title->getPrefixedText() );
00873             return false;
00874         } elseif ( !$title->canExist() ) {
00875             $this->notice( 'import-error-special', $title->getPrefixedText() );
00876             return false;
00877         } elseif ( !$title->userCan( 'edit' ) && !$wgCommandLineMode ) {
00878             # Do not import if the importing wiki user cannot edit this page
00879             $this->notice( 'import-error-edit', $title->getPrefixedText() );
00880             return false;
00881         } elseif ( !$title->exists() && !$title->userCan( 'create' ) && !$wgCommandLineMode ) {
00882             # Do not import if the importing wiki user cannot create this page
00883             $this->notice( 'import-error-create', $title->getPrefixedText() );
00884             return false;
00885         }
00886 
00887         return array( $title, $origTitle );
00888     }
00889 }
00890 
00892 class UploadSourceAdapter {
00894     public static $sourceRegistrations = array();
00895 
00897     private $mSource;
00898 
00900     private $mBuffer;
00901 
00903     private $mPosition;
00904 
00909     static function registerSource( ImportStreamSource $source ) {
00910         $id = wfRandomString();
00911 
00912         self::$sourceRegistrations[$id] = $source;
00913 
00914         return $id;
00915     }
00916 
00924     function stream_open( $path, $mode, $options, &$opened_path ) {
00925         $url = parse_url( $path );
00926         $id = $url['host'];
00927 
00928         if ( !isset( self::$sourceRegistrations[$id] ) ) {
00929             return false;
00930         }
00931 
00932         $this->mSource = self::$sourceRegistrations[$id];
00933 
00934         return true;
00935     }
00936 
00941     function stream_read( $count ) {
00942         $return = '';
00943         $leave = false;
00944 
00945         while ( !$leave && !$this->mSource->atEnd() &&
00946                 strlen( $this->mBuffer ) < $count ) {
00947             $read = $this->mSource->readChunk();
00948 
00949             if ( !strlen( $read ) ) {
00950                 $leave = true;
00951             }
00952 
00953             $this->mBuffer .= $read;
00954         }
00955 
00956         if ( strlen( $this->mBuffer ) ) {
00957             $return = substr( $this->mBuffer, 0, $count );
00958             $this->mBuffer = substr( $this->mBuffer, $count );
00959         }
00960 
00961         $this->mPosition += strlen( $return );
00962 
00963         return $return;
00964     }
00965 
00970     function stream_write( $data ) {
00971         return false;
00972     }
00973 
00977     function stream_tell() {
00978         return $this->mPosition;
00979     }
00980 
00984     function stream_eof() {
00985         return $this->mSource->atEnd();
00986     }
00987 
00991     function url_stat() {
00992         $result = array();
00993 
00994         $result['dev'] = $result[0] = 0;
00995         $result['ino'] = $result[1] = 0;
00996         $result['mode'] = $result[2] = 0;
00997         $result['nlink'] = $result[3] = 0;
00998         $result['uid'] = $result[4] = 0;
00999         $result['gid'] = $result[5] = 0;
01000         $result['rdev'] = $result[6] = 0;
01001         $result['size'] = $result[7] = 0;
01002         $result['atime'] = $result[8] = 0;
01003         $result['mtime'] = $result[9] = 0;
01004         $result['ctime'] = $result[10] = 0;
01005         $result['blksize'] = $result[11] = 0;
01006         $result['blocks'] = $result[12] = 0;
01007 
01008         return $result;
01009     }
01010 }
01011 
01016 class WikiRevision {
01018     public $importer = null;
01019 
01021     public $title = null;
01022 
01024     public $id = 0;
01025 
01027     public $timestamp = "20010115000000";
01028 
01032     public $user = 0;
01033 
01035     public $user_text = "";
01036 
01038     public $model = null;
01039 
01041     public $format = null;
01042 
01044     public $text = "";
01045 
01047     protected $size;
01048 
01050     public $content = null;
01051 
01053     protected $contentHandler = null;
01054 
01056     public $comment = "";
01057 
01059     public $minor = false;
01060 
01062     public $type = "";
01063 
01065     public $action = "";
01066 
01068     public $params = "";
01069 
01071     public $fileSrc = '';
01072 
01074     public $sha1base36 = false;
01075 
01080     public $isTemp = false;
01081 
01083     public $archiveName = '';
01084 
01085     protected $filename;
01086 
01088     protected $src;
01089 
01091     public $fileIsTemp;
01092 
01094     private $mNoUpdates = false;
01095 
01100     function setTitle( $title ) {
01101         if ( is_object( $title ) ) {
01102             $this->title = $title;
01103         } elseif ( is_null( $title ) ) {
01104             throw new MWException( "WikiRevision given a null title in import. "
01105                 . "You may need to adjust \$wgLegalTitleChars." );
01106         } else {
01107             throw new MWException( "WikiRevision given non-object title in import." );
01108         }
01109     }
01110 
01114     function setID( $id ) {
01115         $this->id = $id;
01116     }
01117 
01121     function setTimestamp( $ts ) {
01122         # 2003-08-05T18:30:02Z
01123         $this->timestamp = wfTimestamp( TS_MW, $ts );
01124     }
01125 
01129     function setUsername( $user ) {
01130         $this->user_text = $user;
01131     }
01132 
01136     function setUserIP( $ip ) {
01137         $this->user_text = $ip;
01138     }
01139 
01143     function setModel( $model ) {
01144         $this->model = $model;
01145     }
01146 
01150     function setFormat( $format ) {
01151         $this->format = $format;
01152     }
01153 
01157     function setText( $text ) {
01158         $this->text = $text;
01159     }
01160 
01164     function setComment( $text ) {
01165         $this->comment = $text;
01166     }
01167 
01171     function setMinor( $minor ) {
01172         $this->minor = (bool)$minor;
01173     }
01174 
01178     function setSrc( $src ) {
01179         $this->src = $src;
01180     }
01181 
01186     function setFileSrc( $src, $isTemp ) {
01187         $this->fileSrc = $src;
01188         $this->fileIsTemp = $isTemp;
01189     }
01190 
01194     function setSha1Base36( $sha1base36 ) {
01195         $this->sha1base36 = $sha1base36;
01196     }
01197 
01201     function setFilename( $filename ) {
01202         $this->filename = $filename;
01203     }
01204 
01208     function setArchiveName( $archiveName ) {
01209         $this->archiveName = $archiveName;
01210     }
01211 
01215     function setSize( $size ) {
01216         $this->size = intval( $size );
01217     }
01218 
01222     function setType( $type ) {
01223         $this->type = $type;
01224     }
01225 
01229     function setAction( $action ) {
01230         $this->action = $action;
01231     }
01232 
01236     function setParams( $params ) {
01237         $this->params = $params;
01238     }
01239 
01243     public function setNoUpdates( $noupdates ) {
01244         $this->mNoUpdates = $noupdates;
01245     }
01246 
01250     function getTitle() {
01251         return $this->title;
01252     }
01253 
01257     function getID() {
01258         return $this->id;
01259     }
01260 
01264     function getTimestamp() {
01265         return $this->timestamp;
01266     }
01267 
01271     function getUser() {
01272         return $this->user_text;
01273     }
01274 
01280     function getText() {
01281         ContentHandler::deprecated( __METHOD__, '1.21' );
01282 
01283         return $this->text;
01284     }
01285 
01289     function getContentHandler() {
01290         if ( is_null( $this->contentHandler ) ) {
01291             $this->contentHandler = ContentHandler::getForModelID( $this->getModel() );
01292         }
01293 
01294         return $this->contentHandler;
01295     }
01296 
01300     function getContent() {
01301         if ( is_null( $this->content ) ) {
01302             $handler = $this->getContentHandler();
01303             $this->content = $handler->unserializeContent( $this->text, $this->getFormat() );
01304         }
01305 
01306         return $this->content;
01307     }
01308 
01312     function getModel() {
01313         if ( is_null( $this->model ) ) {
01314             $this->model = $this->getTitle()->getContentModel();
01315         }
01316 
01317         return $this->model;
01318     }
01319 
01323     function getFormat() {
01324         if ( is_null( $this->format ) ) {
01325             $this->format = $this->getContentHandler()->getDefaultFormat();
01326         }
01327 
01328         return $this->format;
01329     }
01330 
01334     function getComment() {
01335         return $this->comment;
01336     }
01337 
01341     function getMinor() {
01342         return $this->minor;
01343     }
01344 
01348     function getSrc() {
01349         return $this->src;
01350     }
01351 
01355     function getSha1() {
01356         if ( $this->sha1base36 ) {
01357             return wfBaseConvert( $this->sha1base36, 36, 16 );
01358         }
01359         return false;
01360     }
01361 
01365     function getFileSrc() {
01366         return $this->fileSrc;
01367     }
01368 
01372     function isTempSrc() {
01373         return $this->isTemp;
01374     }
01375 
01379     function getFilename() {
01380         return $this->filename;
01381     }
01382 
01386     function getArchiveName() {
01387         return $this->archiveName;
01388     }
01389 
01393     function getSize() {
01394         return $this->size;
01395     }
01396 
01400     function getType() {
01401         return $this->type;
01402     }
01403 
01407     function getAction() {
01408         return $this->action;
01409     }
01410 
01414     function getParams() {
01415         return $this->params;
01416     }
01417 
01421     function importOldRevision() {
01422         $dbw = wfGetDB( DB_MASTER );
01423 
01424         # Sneak a single revision into place
01425         $user = User::newFromName( $this->getUser() );
01426         if ( $user ) {
01427             $userId = intval( $user->getId() );
01428             $userText = $user->getName();
01429             $userObj = $user;
01430         } else {
01431             $userId = 0;
01432             $userText = $this->getUser();
01433             $userObj = new User;
01434         }
01435 
01436         // avoid memory leak...?
01437         $linkCache = LinkCache::singleton();
01438         $linkCache->clear();
01439 
01440         $page = WikiPage::factory( $this->title );
01441         $page->loadPageData( 'fromdbmaster' );
01442         if ( !$page->exists() ) {
01443             # must create the page...
01444             $pageId = $page->insertOn( $dbw );
01445             $created = true;
01446             $oldcountable = null;
01447         } else {
01448             $pageId = $page->getId();
01449             $created = false;
01450 
01451             $prior = $dbw->selectField( 'revision', '1',
01452                 array( 'rev_page' => $pageId,
01453                     'rev_timestamp' => $dbw->timestamp( $this->timestamp ),
01454                     'rev_user_text' => $userText,
01455                     'rev_comment' => $this->getComment() ),
01456                 __METHOD__
01457             );
01458             if ( $prior ) {
01459                 // @todo FIXME: This could fail slightly for multiple matches :P
01460                 wfDebug( __METHOD__ . ": skipping existing revision for [[" .
01461                     $this->title->getPrefixedText() . "]], timestamp " . $this->timestamp . "\n" );
01462                 return false;
01463             }
01464             $oldcountable = $page->isCountable();
01465         }
01466 
01467         # @todo FIXME: Use original rev_id optionally (better for backups)
01468         # Insert the row
01469         $revision = new Revision( array(
01470             'title' => $this->title,
01471             'page' => $pageId,
01472             'content_model' => $this->getModel(),
01473             'content_format' => $this->getFormat(),
01474             //XXX: just set 'content' => $this->getContent()?
01475             'text' => $this->getContent()->serialize( $this->getFormat() ),
01476             'comment' => $this->getComment(),
01477             'user' => $userId,
01478             'user_text' => $userText,
01479             'timestamp' => $this->timestamp,
01480             'minor_edit' => $this->minor,
01481             ) );
01482         $revision->insertOn( $dbw );
01483         $changed = $page->updateIfNewerOn( $dbw, $revision );
01484 
01485         if ( $changed !== false && !$this->mNoUpdates ) {
01486             wfDebug( __METHOD__ . ": running updates\n" );
01487             $page->doEditUpdates(
01488                 $revision,
01489                 $userObj,
01490                 array( 'created' => $created, 'oldcountable' => $oldcountable )
01491             );
01492         }
01493 
01494         return true;
01495     }
01496 
01497     function importLogItem() {
01498         $dbw = wfGetDB( DB_MASTER );
01499         # @todo FIXME: This will not record autoblocks
01500         if ( !$this->getTitle() ) {
01501             wfDebug( __METHOD__ . ": skipping invalid {$this->type}/{$this->action} log time, timestamp " .
01502                 $this->timestamp . "\n" );
01503             return;
01504         }
01505         # Check if it exists already
01506         // @todo FIXME: Use original log ID (better for backups)
01507         $prior = $dbw->selectField( 'logging', '1',
01508             array( 'log_type' => $this->getType(),
01509                 'log_action' => $this->getAction(),
01510                 'log_timestamp' => $dbw->timestamp( $this->timestamp ),
01511                 'log_namespace' => $this->getTitle()->getNamespace(),
01512                 'log_title' => $this->getTitle()->getDBkey(),
01513                 'log_comment' => $this->getComment(),
01514                 #'log_user_text' => $this->user_text,
01515                 'log_params' => $this->params ),
01516             __METHOD__
01517         );
01518         // @todo FIXME: This could fail slightly for multiple matches :P
01519         if ( $prior ) {
01520             wfDebug( __METHOD__
01521                 . ": skipping existing item for Log:{$this->type}/{$this->action}, timestamp "
01522                 . $this->timestamp . "\n" );
01523             return;
01524         }
01525         $log_id = $dbw->nextSequenceValue( 'logging_log_id_seq' );
01526         $data = array(
01527             'log_id' => $log_id,
01528             'log_type' => $this->type,
01529             'log_action' => $this->action,
01530             'log_timestamp' => $dbw->timestamp( $this->timestamp ),
01531             'log_user' => User::idFromName( $this->user_text ),
01532             #'log_user_text' => $this->user_text,
01533             'log_namespace' => $this->getTitle()->getNamespace(),
01534             'log_title' => $this->getTitle()->getDBkey(),
01535             'log_comment' => $this->getComment(),
01536             'log_params' => $this->params
01537         );
01538         $dbw->insert( 'logging', $data, __METHOD__ );
01539     }
01540 
01544     function importUpload() {
01545         # Construct a file
01546         $archiveName = $this->getArchiveName();
01547         if ( $archiveName ) {
01548             wfDebug( __METHOD__ . "Importing archived file as $archiveName\n" );
01549             $file = OldLocalFile::newFromArchiveName( $this->getTitle(),
01550                 RepoGroup::singleton()->getLocalRepo(), $archiveName );
01551         } else {
01552             $file = wfLocalFile( $this->getTitle() );
01553             wfDebug( __METHOD__ . 'Importing new file as ' . $file->getName() . "\n" );
01554             if ( $file->exists() && $file->getTimestamp() > $this->getTimestamp() ) {
01555                 $archiveName = $file->getTimestamp() . '!' . $file->getName();
01556                 $file = OldLocalFile::newFromArchiveName( $this->getTitle(),
01557                     RepoGroup::singleton()->getLocalRepo(), $archiveName );
01558                 wfDebug( __METHOD__ . "File already exists; importing as $archiveName\n" );
01559             }
01560         }
01561         if ( !$file ) {
01562             wfDebug( __METHOD__ . ': Bad file for ' . $this->getTitle() . "\n" );
01563             return false;
01564         }
01565 
01566         # Get the file source or download if necessary
01567         $source = $this->getFileSrc();
01568         $flags = $this->isTempSrc() ? File::DELETE_SOURCE : 0;
01569         if ( !$source ) {
01570             $source = $this->downloadSource();
01571             $flags |= File::DELETE_SOURCE;
01572         }
01573         if ( !$source ) {
01574             wfDebug( __METHOD__ . ": Could not fetch remote file.\n" );
01575             return false;
01576         }
01577         $sha1 = $this->getSha1();
01578         if ( $sha1 && ( $sha1 !== sha1_file( $source ) ) ) {
01579             if ( $flags & File::DELETE_SOURCE ) {
01580                 # Broken file; delete it if it is a temporary file
01581                 unlink( $source );
01582             }
01583             wfDebug( __METHOD__ . ": Corrupt file $source.\n" );
01584             return false;
01585         }
01586 
01587         $user = User::newFromName( $this->user_text );
01588 
01589         # Do the actual upload
01590         if ( $archiveName ) {
01591             $status = $file->uploadOld( $source, $archiveName,
01592                 $this->getTimestamp(), $this->getComment(), $user, $flags );
01593         } else {
01594             $status = $file->upload( $source, $this->getComment(), $this->getComment(),
01595                 $flags, false, $this->getTimestamp(), $user );
01596         }
01597 
01598         if ( $status->isGood() ) {
01599             wfDebug( __METHOD__ . ": Successful\n" );
01600             return true;
01601         } else {
01602             wfDebug( __METHOD__ . ': failed: ' . $status->getXml() . "\n" );
01603             return false;
01604         }
01605     }
01606 
01610     function downloadSource() {
01611         global $wgEnableUploads;
01612         if ( !$wgEnableUploads ) {
01613             return false;
01614         }
01615 
01616         $tempo = tempnam( wfTempDir(), 'download' );
01617         $f = fopen( $tempo, 'wb' );
01618         if ( !$f ) {
01619             wfDebug( "IMPORT: couldn't write to temp file $tempo\n" );
01620             return false;
01621         }
01622 
01623         // @todo FIXME!
01624         $src = $this->getSrc();
01625         $data = Http::get( $src );
01626         if ( !$data ) {
01627             wfDebug( "IMPORT: couldn't fetch source $src\n" );
01628             fclose( $f );
01629             unlink( $tempo );
01630             return false;
01631         }
01632 
01633         fwrite( $f, $data );
01634         fclose( $f );
01635 
01636         return $tempo;
01637     }
01638 
01639 }
01640 
01645 class ImportStringSource {
01646     function __construct( $string ) {
01647         $this->mString = $string;
01648         $this->mRead = false;
01649     }
01650 
01654     function atEnd() {
01655         return $this->mRead;
01656     }
01657 
01661     function readChunk() {
01662         if ( $this->atEnd() ) {
01663             return false;
01664         }
01665         $this->mRead = true;
01666         return $this->mString;
01667     }
01668 }
01669 
01674 class ImportStreamSource {
01675     function __construct( $handle ) {
01676         $this->mHandle = $handle;
01677     }
01678 
01682     function atEnd() {
01683         return feof( $this->mHandle );
01684     }
01685 
01689     function readChunk() {
01690         return fread( $this->mHandle, 32768 );
01691     }
01692 
01697     static function newFromFile( $filename ) {
01698         wfSuppressWarnings();
01699         $file = fopen( $filename, 'rt' );
01700         wfRestoreWarnings();
01701         if ( !$file ) {
01702             return Status::newFatal( "importcantopen" );
01703         }
01704         return Status::newGood( new ImportStreamSource( $file ) );
01705     }
01706 
01711     static function newFromUpload( $fieldname = "xmlimport" ) {
01712         $upload =& $_FILES[$fieldname];
01713 
01714         if ( $upload === null || !$upload['name'] ) {
01715             return Status::newFatal( 'importnofile' );
01716         }
01717         if ( !empty( $upload['error'] ) ) {
01718             switch ( $upload['error'] ) {
01719                 case 1:
01720                     # The uploaded file exceeds the upload_max_filesize directive in php.ini.
01721                     return Status::newFatal( 'importuploaderrorsize' );
01722                 case 2:
01723                     # The uploaded file exceeds the MAX_FILE_SIZE directive that
01724                     # was specified in the HTML form.
01725                     return Status::newFatal( 'importuploaderrorsize' );
01726                 case 3:
01727                     # The uploaded file was only partially uploaded
01728                     return Status::newFatal( 'importuploaderrorpartial' );
01729                 case 6:
01730                     # Missing a temporary folder.
01731                     return Status::newFatal( 'importuploaderrortemp' );
01732                 # case else: # Currently impossible
01733             }
01734 
01735         }
01736         $fname = $upload['tmp_name'];
01737         if ( is_uploaded_file( $fname ) ) {
01738             return ImportStreamSource::newFromFile( $fname );
01739         } else {
01740             return Status::newFatal( 'importnofile' );
01741         }
01742     }
01743 
01749     static function newFromURL( $url, $method = 'GET' ) {
01750         wfDebug( __METHOD__ . ": opening $url\n" );
01751         # Use the standard HTTP fetch function; it times out
01752         # quicker and sorts out user-agent problems which might
01753         # otherwise prevent importing from large sites, such
01754         # as the Wikimedia cluster, etc.
01755         $data = Http::request( $method, $url, array( 'followRedirects' => true ) );
01756         if ( $data !== false ) {
01757             $file = tmpfile();
01758             fwrite( $file, $data );
01759             fflush( $file );
01760             fseek( $file, 0 );
01761             return Status::newGood( new ImportStreamSource( $file ) );
01762         } else {
01763             return Status::newFatal( 'importcantopen' );
01764         }
01765     }
01766 
01775     public static function newFromInterwiki( $interwiki, $page, $history = false,
01776         $templates = false, $pageLinkDepth = 0
01777     ) {
01778         if ( $page == '' ) {
01779             return Status::newFatal( 'import-noarticle' );
01780         }
01781         $link = Title::newFromText( "$interwiki:Special:Export/$page" );
01782         if ( is_null( $link ) || !$link->isExternal() ) {
01783             return Status::newFatal( 'importbadinterwiki' );
01784         } else {
01785             $params = array();
01786             if ( $history ) {
01787                 $params['history'] = 1;
01788             }
01789             if ( $templates ) {
01790                 $params['templates'] = 1;
01791             }
01792             if ( $pageLinkDepth ) {
01793                 $params['pagelink-depth'] = $pageLinkDepth;
01794             }
01795             $url = $link->getFullURL( $params );
01796             # For interwikis, use POST to avoid redirects.
01797             return ImportStreamSource::newFromURL( $url, "POST" );
01798         }
01799     }
01800 }