MediaWiki  REL1_20
Import.php
Go to the documentation of this file.
00001 <?php
00033 class WikiImporter {
00034         private $reader = null;
00035         private $mLogItemCallback, $mUploadCallback, $mRevisionCallback, $mPageCallback;
00036         private $mSiteInfoCallback, $mTargetNamespace, $mTargetRootPage, $mPageOutCallback;
00037         private $mNoticeCallback, $mDebug;
00038         private $mImportUploads, $mImageBasePath;
00039         private $mNoUpdates = false;
00040 
00045         function __construct( $source ) {
00046                 $this->reader = new XMLReader();
00047 
00048                 stream_wrapper_register( 'uploadsource', 'UploadSourceAdapter' );
00049                 $id = UploadSourceAdapter::registerSource( $source );
00050                 if (defined( 'LIBXML_PARSEHUGE' ) ) {
00051                         $this->reader->open( "uploadsource://$id", null, LIBXML_PARSEHUGE );
00052                 } else {
00053                         $this->reader->open( "uploadsource://$id" );
00054                 }
00055 
00056                 // Default callbacks
00057                 $this->setRevisionCallback( array( $this, "importRevision" ) );
00058                 $this->setUploadCallback( array( $this, 'importUpload' ) );
00059                 $this->setLogItemCallback( array( $this, 'importLogItem' ) );
00060                 $this->setPageOutCallback( array( $this, 'finishImportPage' ) );
00061         }
00062 
00063         private function throwXmlError( $err ) {
00064                 $this->debug( "FAILURE: $err" );
00065                 wfDebug( "WikiImporter XML error: $err\n" );
00066         }
00067 
00068         private function debug( $data ) {
00069                 if( $this->mDebug ) {
00070                         wfDebug( "IMPORT: $data\n" );
00071                 }
00072         }
00073 
00074         private function warn( $data ) {
00075                 wfDebug( "IMPORT: $data\n" );
00076         }
00077 
00078         private function notice( $msg /*, $param, ...*/ ) {
00079                 $params = func_get_args();
00080                 array_shift( $params );
00081 
00082                 if ( is_callable( $this->mNoticeCallback ) ) {
00083                         call_user_func( $this->mNoticeCallback, $msg, $params );
00084                 } else { # No ImportReporter -> CLI
00085                         echo wfMessage( $msg, $params )->text() . "\n";
00086                 }
00087         }
00088 
00093         function setDebug( $debug ) {
00094                 $this->mDebug = $debug;
00095         }
00096 
00101         function setNoUpdates( $noupdates ) {
00102                 $this->mNoUpdates = $noupdates;
00103         }
00104 
00111         public function setNoticeCallback( $callback ) {
00112                 return wfSetVar( $this->mNoticeCallback, $callback );
00113         }
00114 
00120         public function setPageCallback( $callback ) {
00121                 $previous = $this->mPageCallback;
00122                 $this->mPageCallback = $callback;
00123                 return $previous;
00124         }
00125 
00135         public function setPageOutCallback( $callback ) {
00136                 $previous = $this->mPageOutCallback;
00137                 $this->mPageOutCallback = $callback;
00138                 return $previous;
00139         }
00140 
00146         public function setRevisionCallback( $callback ) {
00147                 $previous = $this->mRevisionCallback;
00148                 $this->mRevisionCallback = $callback;
00149                 return $previous;
00150         }
00151 
00157         public function setUploadCallback( $callback ) {
00158                 $previous = $this->mUploadCallback;
00159                 $this->mUploadCallback = $callback;
00160                 return $previous;
00161         }
00162 
00168         public function setLogItemCallback( $callback ) {
00169                 $previous = $this->mLogItemCallback;
00170                 $this->mLogItemCallback = $callback;
00171                 return $previous;
00172         }
00173 
00179         public function setSiteInfoCallback( $callback ) {
00180                 $previous = $this->mSiteInfoCallback;
00181                 $this->mSiteInfoCallback = $callback;
00182                 return $previous;
00183         }
00184 
00190         public function setTargetNamespace( $namespace ) {
00191                 if( is_null( $namespace ) ) {
00192                         // Don't override namespaces
00193                         $this->mTargetNamespace = null;
00194                 } elseif( $namespace >= 0 ) {
00195                         // @todo FIXME: Check for validity
00196                         $this->mTargetNamespace = intval( $namespace );
00197                 } else {
00198                         return false;
00199                 }
00200         }
00201 
00207         public function setTargetRootPage( $rootpage ) {
00208                 $status = Status::newGood();
00209                 if( is_null( $rootpage ) ) {
00210                         // No rootpage
00211                         $this->mTargetRootPage = null;
00212                 } elseif( $rootpage !== '' ) {
00213                         $rootpage = rtrim( $rootpage, '/' ); //avoid double slashes
00214                         $title = Title::newFromText( $rootpage, !is_null( $this->mTargetNamespace ) ? $this->mTargetNamespace : NS_MAIN );
00215                         if( !$title || $title->isExternal() ) {
00216                                 $status->fatal( 'import-rootpage-invalid' );
00217                         } else {
00218                                 if( !MWNamespace::hasSubpages( $title->getNamespace() ) ) {
00219                                         global $wgContLang;
00220 
00221                                         $displayNSText = $title->getNamespace() == NS_MAIN
00222                                                 ? wfMessage( 'blanknamespace' )->text()
00223                                                 : $wgContLang->getNsText( $title->getNamespace() );
00224                                         $status->fatal( 'import-rootpage-nosubpage', $displayNSText );
00225                                 } else {
00226                                         // set namespace to 'all', so the namespace check in processTitle() can passed
00227                                         $this->setTargetNamespace( null );
00228                                         $this->mTargetRootPage = $title->getPrefixedDBKey();
00229                                 }
00230                         }
00231                 }
00232                 return $status;
00233         }
00234 
00238         public function setImageBasePath( $dir ) {
00239                 $this->mImageBasePath = $dir;
00240         }
00241 
00245         public function setImportUploads( $import ) {
00246                 $this->mImportUploads = $import;
00247         }
00248 
00254         public function importRevision( $revision ) {
00255                 $dbw = wfGetDB( DB_MASTER );
00256                 return $dbw->deadlockLoop( array( $revision, 'importOldRevision' ) );
00257         }
00258 
00264         public function importLogItem( $rev ) {
00265                 $dbw = wfGetDB( DB_MASTER );
00266                 return $dbw->deadlockLoop( array( $rev, 'importLogItem' ) );
00267         }
00268 
00274         public function importUpload( $revision ) {
00275                 $dbw = wfGetDB( DB_MASTER );
00276                 return $dbw->deadlockLoop( array( $revision, 'importUpload' ) );
00277         }
00278 
00288         public function finishImportPage( $title, $origTitle, $revCount, $sRevCount, $pageInfo ) {
00289                 $args = func_get_args();
00290                 return wfRunHooks( 'AfterImportPage', $args );
00291         }
00292 
00297         public function debugRevisionHandler( &$revision ) {
00298                 $this->debug( "Got revision:" );
00299                 if( is_object( $revision->title ) ) {
00300                         $this->debug( "-- Title: " . $revision->title->getPrefixedText() );
00301                 } else {
00302                         $this->debug( "-- Title: <invalid>" );
00303                 }
00304                 $this->debug( "-- User: " . $revision->user_text );
00305                 $this->debug( "-- Timestamp: " . $revision->timestamp );
00306                 $this->debug( "-- Comment: " . $revision->comment );
00307                 $this->debug( "-- Text: " . $revision->text );
00308         }
00309 
00314         function pageCallback( $title ) {
00315                 if( isset( $this->mPageCallback ) ) {
00316                         call_user_func( $this->mPageCallback, $title );
00317                 }
00318         }
00319 
00328         private function pageOutCallback( $title, $origTitle, $revCount, $sucCount, $pageInfo ) {
00329                 if( isset( $this->mPageOutCallback ) ) {
00330                         $args = func_get_args();
00331                         call_user_func_array( $this->mPageOutCallback, $args );
00332                 }
00333         }
00334 
00340         private function revisionCallback( $revision ) {
00341                 if ( isset( $this->mRevisionCallback ) ) {
00342                         return call_user_func_array( $this->mRevisionCallback,
00343                                         array( $revision, $this ) );
00344                 } else {
00345                         return false;
00346                 }
00347         }
00348 
00354         private function logItemCallback( $revision ) {
00355                 if ( isset( $this->mLogItemCallback ) ) {
00356                         return call_user_func_array( $this->mLogItemCallback,
00357                                         array( $revision, $this ) );
00358                 } else {
00359                         return false;
00360                 }
00361         }
00362 
00370         private function nodeContents() {
00371                 if( $this->reader->isEmptyElement ) {
00372                         return "";
00373                 }
00374                 $buffer = "";
00375                 while( $this->reader->read() ) {
00376                         switch( $this->reader->nodeType ) {
00377                         case XmlReader::TEXT:
00378                         case XmlReader::SIGNIFICANT_WHITESPACE:
00379                                 $buffer .= $this->reader->value;
00380                                 break;
00381                         case XmlReader::END_ELEMENT:
00382                                 return $buffer;
00383                         }
00384                 }
00385 
00386                 $this->reader->close();
00387                 return '';
00388         }
00389 
00390         # --------------
00391 
00393         private function dumpElement() {
00394                 static $lookup = null;
00395                 if (!$lookup) {
00396                         $xmlReaderConstants = array(
00397                                 "NONE",
00398                                 "ELEMENT",
00399                                 "ATTRIBUTE",
00400                                 "TEXT",
00401                                 "CDATA",
00402                                 "ENTITY_REF",
00403                                 "ENTITY",
00404                                 "PI",
00405                                 "COMMENT",
00406                                 "DOC",
00407                                 "DOC_TYPE",
00408                                 "DOC_FRAGMENT",
00409                                 "NOTATION",
00410                                 "WHITESPACE",
00411                                 "SIGNIFICANT_WHITESPACE",
00412                                 "END_ELEMENT",
00413                                 "END_ENTITY",
00414                                 "XML_DECLARATION",
00415                                 );
00416                         $lookup = array();
00417 
00418                         foreach( $xmlReaderConstants as $name ) {
00419                                 $lookup[constant("XmlReader::$name")] = $name;
00420                         }
00421                 }
00422 
00423                 print( var_dump(
00424                         $lookup[$this->reader->nodeType],
00425                         $this->reader->name,
00426                         $this->reader->value
00427                 )."\n\n" );
00428         }
00429 
00434         public function doImport() {
00435 
00436                 // Calls to reader->read need to be wrapped in calls to
00437                 // libxml_disable_entity_loader() to avoid local file
00438                 // inclusion attacks (bug 46932).
00439                 $oldDisable = libxml_disable_entity_loader( true );
00440                 $this->reader->read();
00441 
00442                 if ( $this->reader->name != 'mediawiki' ) {
00443                         libxml_disable_entity_loader( $oldDisable );
00444                         throw new MWException( "Expected <mediawiki> tag, got " .
00445                                 $this->reader->name );
00446                 }
00447                 $this->debug( "<mediawiki> tag is correct." );
00448 
00449                 $this->debug( "Starting primary dump processing loop." );
00450 
00451                 $keepReading = $this->reader->read();
00452                 $skip = false;
00453                 while ( $keepReading ) {
00454                         $tag = $this->reader->name;
00455                         $type = $this->reader->nodeType;
00456 
00457                         if ( !wfRunHooks( 'ImportHandleToplevelXMLTag', $this ) ) {
00458                                 // Do nothing
00459                         } elseif ( $tag == 'mediawiki' && $type == XmlReader::END_ELEMENT ) {
00460                                 break;
00461                         } elseif ( $tag == 'siteinfo' ) {
00462                                 $this->handleSiteInfo();
00463                         } elseif ( $tag == 'page' ) {
00464                                 $this->handlePage();
00465                         } elseif ( $tag == 'logitem' ) {
00466                                 $this->handleLogItem();
00467                         } elseif ( $tag != '#text' ) {
00468                                 $this->warn( "Unhandled top-level XML tag $tag" );
00469 
00470                                 $skip = true;
00471                         }
00472 
00473                         if ($skip) {
00474                                 $keepReading = $this->reader->next();
00475                                 $skip = false;
00476                                 $this->debug( "Skip" );
00477                         } else {
00478                                 $keepReading = $this->reader->read();
00479                         }
00480                 }
00481 
00482                 libxml_disable_entity_loader( $oldDisable );
00483                 return true;
00484         }
00485 
00490         private function handleSiteInfo() {
00491                 // Site info is useful, but not actually used for dump imports.
00492                 // Includes a quick short-circuit to save performance.
00493                 if ( ! $this->mSiteInfoCallback ) {
00494                         $this->reader->next();
00495                         return true;
00496                 }
00497                 throw new MWException( "SiteInfo tag is not yet handled, do not set mSiteInfoCallback" );
00498         }
00499 
00500         private function handleLogItem() {
00501                 $this->debug( "Enter log item handler." );
00502                 $logInfo = array();
00503 
00504                 // Fields that can just be stuffed in the pageInfo object
00505                 $normalFields = array( 'id', 'comment', 'type', 'action', 'timestamp',
00506                                         'logtitle', 'params' );
00507 
00508                 while ( $this->reader->read() ) {
00509                         if ( $this->reader->nodeType == XmlReader::END_ELEMENT &&
00510                                         $this->reader->name == 'logitem') {
00511                                 break;
00512                         }
00513 
00514                         $tag = $this->reader->name;
00515 
00516                         if ( !wfRunHooks( 'ImportHandleLogItemXMLTag',
00517                                                 $this, $logInfo ) ) {
00518                                 // Do nothing
00519                         } elseif ( in_array( $tag, $normalFields ) ) {
00520                                 $logInfo[$tag] = $this->nodeContents();
00521                         } elseif ( $tag == 'contributor' ) {
00522                                 $logInfo['contributor'] = $this->handleContributor();
00523                         } elseif ( $tag != '#text' ) {
00524                                 $this->warn( "Unhandled log-item XML tag $tag" );
00525                         }
00526                 }
00527 
00528                 $this->processLogItem( $logInfo );
00529         }
00530 
00535         private function processLogItem( $logInfo ) {
00536                 $revision = new WikiRevision;
00537 
00538                 $revision->setID( $logInfo['id'] );
00539                 $revision->setType( $logInfo['type'] );
00540                 $revision->setAction( $logInfo['action'] );
00541                 $revision->setTimestamp( $logInfo['timestamp'] );
00542                 $revision->setParams( $logInfo['params'] );
00543                 $revision->setTitle( Title::newFromText( $logInfo['logtitle'] ) );
00544                 $revision->setNoUpdates( $this->mNoUpdates );
00545 
00546                 if ( isset( $logInfo['comment'] ) ) {
00547                         $revision->setComment( $logInfo['comment'] );
00548                 }
00549 
00550                 if ( isset( $logInfo['contributor']['ip'] ) ) {
00551                         $revision->setUserIP( $logInfo['contributor']['ip'] );
00552                 }
00553                 if ( isset( $logInfo['contributor']['username'] ) ) {
00554                         $revision->setUserName( $logInfo['contributor']['username'] );
00555                 }
00556 
00557                 return $this->logItemCallback( $revision );
00558         }
00559 
00560         private function handlePage() {
00561                 // Handle page data.
00562                 $this->debug( "Enter page handler." );
00563                 $pageInfo = array( 'revisionCount' => 0, 'successfulRevisionCount' => 0 );
00564 
00565                 // Fields that can just be stuffed in the pageInfo object
00566                 $normalFields = array( 'title', 'id', 'redirect', 'restrictions' );
00567 
00568                 $skip = false;
00569                 $badTitle = false;
00570 
00571                 while ( $skip ? $this->reader->next() : $this->reader->read() ) {
00572                         if ( $this->reader->nodeType == XmlReader::END_ELEMENT &&
00573                                         $this->reader->name == 'page') {
00574                                 break;
00575                         }
00576 
00577                         $tag = $this->reader->name;
00578 
00579                         if ( $badTitle ) {
00580                                 // The title is invalid, bail out of this page
00581                                 $skip = true;
00582                         } elseif ( !wfRunHooks( 'ImportHandlePageXMLTag', array( $this,
00583                                                 &$pageInfo ) ) ) {
00584                                 // Do nothing
00585                         } elseif ( in_array( $tag, $normalFields ) ) {
00586                                 $pageInfo[$tag] = $this->nodeContents();
00587                                 if ( $tag == 'title' ) {
00588                                         $title = $this->processTitle( $pageInfo['title'] );
00589 
00590                                         if ( !$title ) {
00591                                                 $badTitle = true;
00592                                                 $skip = true;
00593                                         }
00594 
00595                                         $this->pageCallback( $title );
00596                                         list( $pageInfo['_title'], $origTitle ) = $title;
00597                                 }
00598                         } elseif ( $tag == 'revision' ) {
00599                                 $this->handleRevision( $pageInfo );
00600                         } elseif ( $tag == 'upload' ) {
00601                                 $this->handleUpload( $pageInfo );
00602                         } elseif ( $tag != '#text' ) {
00603                                 $this->warn( "Unhandled page XML tag $tag" );
00604                                 $skip = true;
00605                         }
00606                 }
00607 
00608                 $this->pageOutCallback( $pageInfo['_title'], $origTitle,
00609                                         $pageInfo['revisionCount'],
00610                                         $pageInfo['successfulRevisionCount'],
00611                                         $pageInfo );
00612         }
00613 
00617         private function handleRevision( &$pageInfo ) {
00618                 $this->debug( "Enter revision handler" );
00619                 $revisionInfo = array();
00620 
00621                 $normalFields = array( 'id', 'timestamp', 'comment', 'minor', 'text' );
00622 
00623                 $skip = false;
00624 
00625                 while ( $skip ? $this->reader->next() : $this->reader->read() ) {
00626                         if ( $this->reader->nodeType == XmlReader::END_ELEMENT &&
00627                                         $this->reader->name == 'revision') {
00628                                 break;
00629                         }
00630 
00631                         $tag = $this->reader->name;
00632 
00633                         if ( !wfRunHooks( 'ImportHandleRevisionXMLTag', $this,
00634                                                 $pageInfo, $revisionInfo ) ) {
00635                                 // Do nothing
00636                         } elseif ( in_array( $tag, $normalFields ) ) {
00637                                 $revisionInfo[$tag] = $this->nodeContents();
00638                         } elseif ( $tag == 'contributor' ) {
00639                                 $revisionInfo['contributor'] = $this->handleContributor();
00640                         } elseif ( $tag != '#text' ) {
00641                                 $this->warn( "Unhandled revision XML tag $tag" );
00642                                 $skip = true;
00643                         }
00644                 }
00645 
00646                 $pageInfo['revisionCount']++;
00647                 if ( $this->processRevision( $pageInfo, $revisionInfo ) ) {
00648                         $pageInfo['successfulRevisionCount']++;
00649                 }
00650         }
00651 
00657         private function processRevision( $pageInfo, $revisionInfo ) {
00658                 $revision = new WikiRevision;
00659 
00660                 if( isset( $revisionInfo['id'] ) ) {
00661                         $revision->setID( $revisionInfo['id'] );
00662                 }
00663                 if ( isset( $revisionInfo['text'] ) ) {
00664                         $revision->setText( $revisionInfo['text'] );
00665                 }
00666                 $revision->setTitle( $pageInfo['_title'] );
00667 
00668                 if ( isset( $revisionInfo['timestamp'] ) ) {
00669                         $revision->setTimestamp( $revisionInfo['timestamp'] );
00670                 } else {
00671                         $revision->setTimestamp( wfTimestampNow() );
00672                 }
00673 
00674                 if ( isset( $revisionInfo['comment'] ) ) {
00675                         $revision->setComment( $revisionInfo['comment'] );
00676                 }
00677 
00678                 if ( isset( $revisionInfo['minor'] ) ) {
00679                         $revision->setMinor( true );
00680                 }
00681                 if ( isset( $revisionInfo['contributor']['ip'] ) ) {
00682                         $revision->setUserIP( $revisionInfo['contributor']['ip'] );
00683                 }
00684                 if ( isset( $revisionInfo['contributor']['username'] ) ) {
00685                         $revision->setUserName( $revisionInfo['contributor']['username'] );
00686                 }
00687                 $revision->setNoUpdates( $this->mNoUpdates );
00688 
00689                 return $this->revisionCallback( $revision );
00690         }
00691 
00696         private function handleUpload( &$pageInfo ) {
00697                 $this->debug( "Enter upload handler" );
00698                 $uploadInfo = array();
00699 
00700                 $normalFields = array( 'timestamp', 'comment', 'filename', 'text',
00701                                         'src', 'size', 'sha1base36', 'archivename', 'rel' );
00702 
00703                 $skip = false;
00704 
00705                 while ( $skip ? $this->reader->next() : $this->reader->read() ) {
00706                         if ( $this->reader->nodeType == XmlReader::END_ELEMENT &&
00707                                         $this->reader->name == 'upload') {
00708                                 break;
00709                         }
00710 
00711                         $tag = $this->reader->name;
00712 
00713                         if ( !wfRunHooks( 'ImportHandleUploadXMLTag', $this,
00714                                                 $pageInfo ) ) {
00715                                 // Do nothing
00716                         } elseif ( in_array( $tag, $normalFields ) ) {
00717                                 $uploadInfo[$tag] = $this->nodeContents();
00718                         } elseif ( $tag == 'contributor' ) {
00719                                 $uploadInfo['contributor'] = $this->handleContributor();
00720                         } elseif ( $tag == 'contents' ) {
00721                                 $contents = $this->nodeContents();
00722                                 $encoding = $this->reader->getAttribute( 'encoding' );
00723                                 if ( $encoding === 'base64' ) {
00724                                         $uploadInfo['fileSrc'] = $this->dumpTemp( base64_decode( $contents ) );
00725                                         $uploadInfo['isTempSrc'] = true;
00726                                 }
00727                         } elseif ( $tag != '#text' ) {
00728                                 $this->warn( "Unhandled upload XML tag $tag" );
00729                                 $skip = true;
00730                         }
00731                 }
00732 
00733                 if ( $this->mImageBasePath && isset( $uploadInfo['rel'] ) ) {
00734                         $path = "{$this->mImageBasePath}/{$uploadInfo['rel']}";
00735                         if ( file_exists( $path ) ) {
00736                                 $uploadInfo['fileSrc'] = $path;
00737                                 $uploadInfo['isTempSrc'] = false;
00738                         }
00739                 }
00740 
00741                 if ( $this->mImportUploads ) {
00742                         return $this->processUpload( $pageInfo, $uploadInfo );
00743                 }
00744         }
00745 
00750         private function dumpTemp( $contents ) {
00751                 $filename = tempnam( wfTempDir(), 'importupload' );
00752                 file_put_contents( $filename, $contents );
00753                 return $filename;
00754         }
00755 
00761         private function processUpload( $pageInfo, $uploadInfo ) {
00762                 $revision = new WikiRevision;
00763                 $text = isset( $uploadInfo['text'] ) ? $uploadInfo['text'] : '';
00764 
00765                 $revision->setTitle( $pageInfo['_title'] );
00766                 $revision->setID( $pageInfo['id'] );
00767                 $revision->setTimestamp( $uploadInfo['timestamp'] );
00768                 $revision->setText( $text );
00769                 $revision->setFilename( $uploadInfo['filename'] );
00770                 if ( isset( $uploadInfo['archivename'] ) ) {
00771                         $revision->setArchiveName( $uploadInfo['archivename'] );
00772                 }
00773                 $revision->setSrc( $uploadInfo['src'] );
00774                 if ( isset( $uploadInfo['fileSrc'] ) ) {
00775                         $revision->setFileSrc( $uploadInfo['fileSrc'],
00776                                 !empty( $uploadInfo['isTempSrc'] ) );
00777                 }
00778                 if ( isset( $uploadInfo['sha1base36'] ) ) {
00779                         $revision->setSha1Base36( $uploadInfo['sha1base36'] );
00780                 }
00781                 $revision->setSize( intval( $uploadInfo['size'] ) );
00782                 $revision->setComment( $uploadInfo['comment'] );
00783 
00784                 if ( isset( $uploadInfo['contributor']['ip'] ) ) {
00785                         $revision->setUserIP( $uploadInfo['contributor']['ip'] );
00786                 }
00787                 if ( isset( $uploadInfo['contributor']['username'] ) ) {
00788                         $revision->setUserName( $uploadInfo['contributor']['username'] );
00789                 }
00790                 $revision->setNoUpdates( $this->mNoUpdates );
00791 
00792                 return call_user_func( $this->mUploadCallback, $revision );
00793         }
00794 
00798         private function handleContributor() {
00799                 $fields = array( 'id', 'ip', 'username' );
00800                 $info = array();
00801 
00802                 while ( $this->reader->read() ) {
00803                         if ( $this->reader->nodeType == XmlReader::END_ELEMENT &&
00804                                         $this->reader->name == 'contributor') {
00805                                 break;
00806                         }
00807 
00808                         $tag = $this->reader->name;
00809 
00810                         if ( in_array( $tag, $fields ) ) {
00811                                 $info[$tag] = $this->nodeContents();
00812                         }
00813                 }
00814 
00815                 return $info;
00816         }
00817 
00822         private function processTitle( $text ) {
00823                 global $wgCommandLineMode;
00824 
00825                 $workTitle = $text;
00826                 $origTitle = Title::newFromText( $workTitle );
00827 
00828                 if( !is_null( $this->mTargetNamespace ) && !is_null( $origTitle ) ) {
00829                         # makeTitleSafe, because $origTitle can have a interwiki (different setting of interwiki map)
00830                         # and than dbKey can begin with a lowercase char
00831                         $title = Title::makeTitleSafe( $this->mTargetNamespace,
00832                                 $origTitle->getDBkey() );
00833                 } else {
00834                         if( !is_null( $this->mTargetRootPage ) ) {
00835                                 $workTitle = $this->mTargetRootPage . '/' . $workTitle;
00836                         }
00837                         $title = Title::newFromText( $workTitle );
00838                 }
00839 
00840                 if( is_null( $title ) ) {
00841                         # Invalid page title? Ignore the page
00842                         $this->notice( 'import-error-invalid', $workTitle );
00843                         return false;
00844                 } elseif( $title->isExternal() ) {
00845                         $this->notice( 'import-error-interwiki', $title->getPrefixedText() );
00846                         return false;
00847                 } elseif( !$title->canExist() ) {
00848                         $this->notice( 'import-error-special', $title->getPrefixedText() );
00849                         return false;
00850                 } elseif( !$title->userCan( 'edit' ) && !$wgCommandLineMode ) {
00851                         # Do not import if the importing wiki user cannot edit this page
00852                         $this->notice( 'import-error-edit', $title->getPrefixedText() );
00853                         return false;
00854                 } elseif( !$title->exists() && !$title->userCan( 'create' ) && !$wgCommandLineMode ) {
00855                         # Do not import if the importing wiki user cannot create this page
00856                         $this->notice( 'import-error-create', $title->getPrefixedText() );
00857                         return false;
00858                 }
00859 
00860                 return array( $title, $origTitle );
00861         }
00862 }
00863 
00865 class UploadSourceAdapter {
00866         static $sourceRegistrations = array();
00867 
00868         private $mSource;
00869         private $mBuffer;
00870         private $mPosition;
00871 
00876         static function registerSource( $source ) {
00877                 $id = wfRandomString();
00878 
00879                 self::$sourceRegistrations[$id] = $source;
00880 
00881                 return $id;
00882         }
00883 
00891         function stream_open( $path, $mode, $options, &$opened_path ) {
00892                 $url = parse_url($path);
00893                 $id = $url['host'];
00894 
00895                 if ( !isset( self::$sourceRegistrations[$id] ) ) {
00896                         return false;
00897                 }
00898 
00899                 $this->mSource = self::$sourceRegistrations[$id];
00900 
00901                 return true;
00902         }
00903 
00908         function stream_read( $count ) {
00909                 $return = '';
00910                 $leave = false;
00911 
00912                 while ( !$leave && !$this->mSource->atEnd() &&
00913                                 strlen($this->mBuffer) < $count ) {
00914                         $read = $this->mSource->readChunk();
00915 
00916                         if ( !strlen($read) ) {
00917                                 $leave = true;
00918                         }
00919 
00920                         $this->mBuffer .= $read;
00921                 }
00922 
00923                 if ( strlen($this->mBuffer) ) {
00924                         $return = substr( $this->mBuffer, 0, $count );
00925                         $this->mBuffer = substr( $this->mBuffer, $count );
00926                 }
00927 
00928                 $this->mPosition += strlen($return);
00929 
00930                 return $return;
00931         }
00932 
00937         function stream_write( $data ) {
00938                 return false;
00939         }
00940 
00944         function stream_tell() {
00945                 return $this->mPosition;
00946         }
00947 
00951         function stream_eof() {
00952                 return $this->mSource->atEnd();
00953         }
00954 
00958         function url_stat() {
00959                 $result = array();
00960 
00961                 $result['dev'] = $result[0] = 0;
00962                 $result['ino'] = $result[1] = 0;
00963                 $result['mode'] = $result[2] = 0;
00964                 $result['nlink'] = $result[3] = 0;
00965                 $result['uid'] = $result[4] = 0;
00966                 $result['gid'] = $result[5] = 0;
00967                 $result['rdev'] = $result[6] = 0;
00968                 $result['size'] = $result[7] = 0;
00969                 $result['atime'] = $result[8] = 0;
00970                 $result['mtime'] = $result[9] = 0;
00971                 $result['ctime'] = $result[10] = 0;
00972                 $result['blksize'] = $result[11] = 0;
00973                 $result['blocks'] = $result[12] = 0;
00974 
00975                 return $result;
00976         }
00977 }
00978 
00979 class XMLReader2 extends XMLReader {
00980 
00984         function nodeContents() {
00985                 if( $this->isEmptyElement ) {
00986                         return "";
00987                 }
00988                 $buffer = "";
00989                 while( $this->read() ) {
00990                         switch( $this->nodeType ) {
00991                         case XmlReader::TEXT:
00992                         case XmlReader::SIGNIFICANT_WHITESPACE:
00993                                 $buffer .= $this->value;
00994                                 break;
00995                         case XmlReader::END_ELEMENT:
00996                                 return $buffer;
00997                         }
00998                 }
00999                 return $this->close();
01000         }
01001 }
01002 
01007 class WikiRevision {
01008         var $importer = null;
01009 
01013         var $title = null;
01014         var $id = 0;
01015         var $timestamp = "20010115000000";
01016         var $user = 0;
01017         var $user_text = "";
01018         var $text = "";
01019         var $comment = "";
01020         var $minor = false;
01021         var $type = "";
01022         var $action = "";
01023         var $params = "";
01024         var $fileSrc = '';
01025         var $sha1base36 = false;
01026         var $isTemp = false;
01027         var $archiveName = '';
01028         var $fileIsTemp;
01029         private $mNoUpdates = false;
01030 
01035         function setTitle( $title ) {
01036                 if( is_object( $title ) ) {
01037                         $this->title = $title;
01038                 } elseif( is_null( $title ) ) {
01039                         throw new MWException( "WikiRevision given a null title in import. You may need to adjust \$wgLegalTitleChars." );
01040                 } else {
01041                         throw new MWException( "WikiRevision given non-object title in import." );
01042                 }
01043         }
01044 
01048         function setID( $id ) {
01049                 $this->id = $id;
01050         }
01051 
01055         function setTimestamp( $ts ) {
01056                 # 2003-08-05T18:30:02Z
01057                 $this->timestamp = wfTimestamp( TS_MW, $ts );
01058         }
01059 
01063         function setUsername( $user ) {
01064                 $this->user_text = $user;
01065         }
01066 
01070         function setUserIP( $ip ) {
01071                 $this->user_text = $ip;
01072         }
01073 
01077         function setText( $text ) {
01078                 $this->text = $text;
01079         }
01080 
01084         function setComment( $text ) {
01085                 $this->comment = $text;
01086         }
01087 
01091         function setMinor( $minor ) {
01092                 $this->minor = (bool)$minor;
01093         }
01094 
01098         function setSrc( $src ) {
01099                 $this->src = $src;
01100         }
01101 
01106         function setFileSrc( $src, $isTemp ) {
01107                 $this->fileSrc = $src;
01108                 $this->fileIsTemp = $isTemp;
01109         }
01110 
01114         function setSha1Base36( $sha1base36 ) {
01115                 $this->sha1base36 = $sha1base36;
01116         }
01117 
01121         function setFilename( $filename ) {
01122                 $this->filename = $filename;
01123         }
01124 
01128         function setArchiveName( $archiveName ) {
01129                 $this->archiveName = $archiveName;
01130         }
01131 
01135         function setSize( $size ) {
01136                 $this->size = intval( $size );
01137         }
01138 
01142         function setType( $type ) {
01143                 $this->type = $type;
01144         }
01145 
01149         function setAction( $action ) {
01150                 $this->action = $action;
01151         }
01152 
01156         function setParams( $params ) {
01157                 $this->params = $params;
01158         }
01159 
01163         public function setNoUpdates( $noupdates ) {
01164                 $this->mNoUpdates = $noupdates;
01165         }
01166 
01170         function getTitle() {
01171                 return $this->title;
01172         }
01173 
01177         function getID() {
01178                 return $this->id;
01179         }
01180 
01184         function getTimestamp() {
01185                 return $this->timestamp;
01186         }
01187 
01191         function getUser() {
01192                 return $this->user_text;
01193         }
01194 
01198         function getText() {
01199                 return $this->text;
01200         }
01201 
01205         function getComment() {
01206                 return $this->comment;
01207         }
01208 
01212         function getMinor() {
01213                 return $this->minor;
01214         }
01215 
01219         function getSrc() {
01220                 return $this->src;
01221         }
01222 
01226         function getSha1() {
01227                 if ( $this->sha1base36 ) {
01228                         return wfBaseConvert( $this->sha1base36, 36, 16 );
01229                 }
01230                 return false;
01231         }
01232 
01236         function getFileSrc() {
01237                 return $this->fileSrc;
01238         }
01239 
01243         function isTempSrc() {
01244                 return $this->isTemp;
01245         }
01246 
01250         function getFilename() {
01251                 return $this->filename;
01252         }
01253 
01257         function getArchiveName() {
01258                 return $this->archiveName;
01259         }
01260 
01264         function getSize() {
01265                 return $this->size;
01266         }
01267 
01271         function getType() {
01272                 return $this->type;
01273         }
01274 
01278         function getAction() {
01279                 return $this->action;
01280         }
01281 
01285         function getParams() {
01286                 return $this->params;
01287         }
01288 
01292         function importOldRevision() {
01293                 $dbw = wfGetDB( DB_MASTER );
01294 
01295                 # Sneak a single revision into place
01296                 $user = User::newFromName( $this->getUser() );
01297                 if( $user ) {
01298                         $userId = intval( $user->getId() );
01299                         $userText = $user->getName();
01300                         $userObj = $user;
01301                 } else {
01302                         $userId = 0;
01303                         $userText = $this->getUser();
01304                         $userObj = new User;
01305                 }
01306 
01307                 // avoid memory leak...?
01308                 $linkCache = LinkCache::singleton();
01309                 $linkCache->clear();
01310 
01311                 $page = WikiPage::factory( $this->title );
01312                 if( !$page->exists() ) {
01313                         # must create the page...
01314                         $pageId = $page->insertOn( $dbw );
01315                         $created = true;
01316                         $oldcountable = null;
01317                 } else {
01318                         $pageId = $page->getId();
01319                         $created = false;
01320 
01321                         $prior = $dbw->selectField( 'revision', '1',
01322                                 array( 'rev_page' => $pageId,
01323                                         'rev_timestamp' => $dbw->timestamp( $this->timestamp ),
01324                                         'rev_user_text' => $userText,
01325                                         'rev_comment'   => $this->getComment() ),
01326                                 __METHOD__
01327                         );
01328                         if( $prior ) {
01329                                 // @todo FIXME: This could fail slightly for multiple matches :P
01330                                 wfDebug( __METHOD__ . ": skipping existing revision for [[" .
01331                                         $this->title->getPrefixedText() . "]], timestamp " . $this->timestamp . "\n" );
01332                                 return false;
01333                         }
01334                         $oldcountable = $page->isCountable();
01335                 }
01336 
01337                 # @todo FIXME: Use original rev_id optionally (better for backups)
01338                 # Insert the row
01339                 $revision = new Revision( array(
01340                         'page'       => $pageId,
01341                         'text'       => $this->getText(),
01342                         'comment'    => $this->getComment(),
01343                         'user'       => $userId,
01344                         'user_text'  => $userText,
01345                         'timestamp'  => $this->timestamp,
01346                         'minor_edit' => $this->minor,
01347                         ) );
01348                 $revision->insertOn( $dbw );
01349                 $changed = $page->updateIfNewerOn( $dbw, $revision );
01350 
01351                 if ( $changed !== false && !$this->mNoUpdates ) {
01352                         wfDebug( __METHOD__ . ": running updates\n" );
01353                         $page->doEditUpdates( $revision, $userObj, array( 'created' => $created, 'oldcountable' => $oldcountable ) );
01354                 }
01355 
01356                 return true;
01357         }
01358 
01362         function importLogItem() {
01363                 $dbw = wfGetDB( DB_MASTER );
01364                 # @todo FIXME: This will not record autoblocks
01365                 if( !$this->getTitle() ) {
01366                         wfDebug( __METHOD__ . ": skipping invalid {$this->type}/{$this->action} log time, timestamp " .
01367                                 $this->timestamp . "\n" );
01368                         return;
01369                 }
01370                 # Check if it exists already
01371                 // @todo FIXME: Use original log ID (better for backups)
01372                 $prior = $dbw->selectField( 'logging', '1',
01373                         array( 'log_type' => $this->getType(),
01374                                 'log_action'    => $this->getAction(),
01375                                 'log_timestamp' => $dbw->timestamp( $this->timestamp ),
01376                                 'log_namespace' => $this->getTitle()->getNamespace(),
01377                                 'log_title'     => $this->getTitle()->getDBkey(),
01378                                 'log_comment'   => $this->getComment(),
01379                                 #'log_user_text' => $this->user_text,
01380                                 'log_params'    => $this->params ),
01381                         __METHOD__
01382                 );
01383                 // @todo FIXME: This could fail slightly for multiple matches :P
01384                 if( $prior ) {
01385                         wfDebug( __METHOD__ . ": skipping existing item for Log:{$this->type}/{$this->action}, timestamp " .
01386                                 $this->timestamp . "\n" );
01387                         return;
01388                 }
01389                 $log_id = $dbw->nextSequenceValue( 'logging_log_id_seq' );
01390                 $data = array(
01391                         'log_id' => $log_id,
01392                         'log_type' => $this->type,
01393                         'log_action' => $this->action,
01394                         'log_timestamp' => $dbw->timestamp( $this->timestamp ),
01395                         'log_user' => User::idFromName( $this->user_text ),
01396                         #'log_user_text' => $this->user_text,
01397                         'log_namespace' => $this->getTitle()->getNamespace(),
01398                         'log_title' => $this->getTitle()->getDBkey(),
01399                         'log_comment' => $this->getComment(),
01400                         'log_params' => $this->params
01401                 );
01402                 $dbw->insert( 'logging', $data, __METHOD__ );
01403         }
01404 
01408         function importUpload() {
01409                 # Construct a file
01410                 $archiveName = $this->getArchiveName();
01411                 if ( $archiveName ) {
01412                         wfDebug( __METHOD__ . "Importing archived file as $archiveName\n" );
01413                         $file = OldLocalFile::newFromArchiveName( $this->getTitle(),
01414                                 RepoGroup::singleton()->getLocalRepo(), $archiveName );
01415                 } else {
01416                         $file = wfLocalFile( $this->getTitle() );
01417                         wfDebug( __METHOD__ . 'Importing new file as ' . $file->getName() . "\n" );
01418                         if ( $file->exists() && $file->getTimestamp() > $this->getTimestamp() ) {
01419                                 $archiveName = $file->getTimestamp() . '!' . $file->getName();
01420                                 $file = OldLocalFile::newFromArchiveName( $this->getTitle(),
01421                                         RepoGroup::singleton()->getLocalRepo(), $archiveName );
01422                                 wfDebug( __METHOD__ . "File already exists; importing as $archiveName\n" );
01423                         }
01424                 }
01425                 if( !$file ) {
01426                         wfDebug( __METHOD__ . ': Bad file for ' . $this->getTitle() . "\n" );
01427                         return false;
01428                 }
01429 
01430                 # Get the file source or download if necessary
01431                 $source = $this->getFileSrc();
01432                 $flags = $this->isTempSrc() ? File::DELETE_SOURCE : 0;
01433                 if ( !$source ) {
01434                         $source = $this->downloadSource();
01435                         $flags |= File::DELETE_SOURCE;
01436                 }
01437                 if( !$source ) {
01438                         wfDebug( __METHOD__ . ": Could not fetch remote file.\n" );
01439                         return false;
01440                 }
01441                 $sha1 = $this->getSha1();
01442                 if ( $sha1 && ( $sha1 !== sha1_file( $source ) ) ) {
01443                         if ( $flags & File::DELETE_SOURCE ) {
01444                                 # Broken file; delete it if it is a temporary file
01445                                 unlink( $source );
01446                         }
01447                         wfDebug( __METHOD__ . ": Corrupt file $source.\n" );
01448                         return false;
01449                 }
01450 
01451                 $user = User::newFromName( $this->user_text );
01452 
01453                 # Do the actual upload
01454                 if ( $archiveName ) {
01455                         $status = $file->uploadOld( $source, $archiveName,
01456                                 $this->getTimestamp(), $this->getComment(), $user, $flags );
01457                 } else {
01458                         $status = $file->upload( $source, $this->getComment(), $this->getComment(),
01459                                 $flags, false, $this->getTimestamp(), $user );
01460                 }
01461 
01462                 if ( $status->isGood() ) {
01463                         wfDebug( __METHOD__ . ": Succesful\n" );
01464                         return true;
01465                 } else {
01466                         wfDebug( __METHOD__ . ': failed: ' . $status->getXml() . "\n" );
01467                         return false;
01468                 }
01469         }
01470 
01474         function downloadSource() {
01475                 global $wgEnableUploads;
01476                 if( !$wgEnableUploads ) {
01477                         return false;
01478                 }
01479 
01480                 $tempo = tempnam( wfTempDir(), 'download' );
01481                 $f = fopen( $tempo, 'wb' );
01482                 if( !$f ) {
01483                         wfDebug( "IMPORT: couldn't write to temp file $tempo\n" );
01484                         return false;
01485                 }
01486 
01487                 // @todo FIXME!
01488                 $src = $this->getSrc();
01489                 $data = Http::get( $src );
01490                 if( !$data ) {
01491                         wfDebug( "IMPORT: couldn't fetch source $src\n" );
01492                         fclose( $f );
01493                         unlink( $tempo );
01494                         return false;
01495                 }
01496 
01497                 fwrite( $f, $data );
01498                 fclose( $f );
01499 
01500                 return $tempo;
01501         }
01502 
01503 }
01504 
01509 class ImportStringSource {
01510         function __construct( $string ) {
01511                 $this->mString = $string;
01512                 $this->mRead = false;
01513         }
01514 
01518         function atEnd() {
01519                 return $this->mRead;
01520         }
01521 
01525         function readChunk() {
01526                 if( $this->atEnd() ) {
01527                         return false;
01528                 }
01529                 $this->mRead = true;
01530                 return $this->mString;
01531         }
01532 }
01533 
01538 class ImportStreamSource {
01539         function __construct( $handle ) {
01540                 $this->mHandle = $handle;
01541         }
01542 
01546         function atEnd() {
01547                 return feof( $this->mHandle );
01548         }
01549 
01553         function readChunk() {
01554                 return fread( $this->mHandle, 32768 );
01555         }
01556 
01561         static function newFromFile( $filename ) {
01562                 wfSuppressWarnings();
01563                 $file = fopen( $filename, 'rt' );
01564                 wfRestoreWarnings();
01565                 if( !$file ) {
01566                         return Status::newFatal( "importcantopen" );
01567                 }
01568                 return Status::newGood( new ImportStreamSource( $file ) );
01569         }
01570 
01575         static function newFromUpload( $fieldname = "xmlimport" ) {
01576                 $upload =& $_FILES[$fieldname];
01577 
01578                 if( !isset( $upload ) || !$upload['name'] ) {
01579                         return Status::newFatal( 'importnofile' );
01580                 }
01581                 if( !empty( $upload['error'] ) ) {
01582                         switch($upload['error']){
01583                                 case 1: # The uploaded file exceeds the upload_max_filesize directive in php.ini.
01584                                         return Status::newFatal( 'importuploaderrorsize' );
01585                                 case 2: # The uploaded file exceeds the MAX_FILE_SIZE directive that was specified in the HTML form.
01586                                         return Status::newFatal( 'importuploaderrorsize' );
01587                                 case 3: # The uploaded file was only partially uploaded
01588                                         return Status::newFatal( 'importuploaderrorpartial' );
01589                                 case 6: #Missing a temporary folder.
01590                                         return Status::newFatal( 'importuploaderrortemp' );
01591                                 # case else: # Currently impossible
01592                         }
01593 
01594                 }
01595                 $fname = $upload['tmp_name'];
01596                 if( is_uploaded_file( $fname ) ) {
01597                         return ImportStreamSource::newFromFile( $fname );
01598                 } else {
01599                         return Status::newFatal( 'importnofile' );
01600                 }
01601         }
01602 
01608         static function newFromURL( $url, $method = 'GET' ) {
01609                 wfDebug( __METHOD__ . ": opening $url\n" );
01610                 # Use the standard HTTP fetch function; it times out
01611                 # quicker and sorts out user-agent problems which might
01612                 # otherwise prevent importing from large sites, such
01613                 # as the Wikimedia cluster, etc.
01614                 $data = Http::request( $method, $url, array( 'followRedirects' => true ) );
01615                 if( $data !== false ) {
01616                         $file = tmpfile();
01617                         fwrite( $file, $data );
01618                         fflush( $file );
01619                         fseek( $file, 0 );
01620                         return Status::newGood( new ImportStreamSource( $file ) );
01621                 } else {
01622                         return Status::newFatal( 'importcantopen' );
01623                 }
01624         }
01625 
01634         public static function newFromInterwiki( $interwiki, $page, $history = false, $templates = false, $pageLinkDepth = 0 ) {
01635                 if( $page == '' ) {
01636                         return Status::newFatal( 'import-noarticle' );
01637                 }
01638                 $link = Title::newFromText( "$interwiki:Special:Export/$page" );
01639                 if( is_null( $link ) || $link->getInterwiki() == '' ) {
01640                         return Status::newFatal( 'importbadinterwiki' );
01641                 } else {
01642                         $params = array();
01643                         if ( $history ) $params['history'] = 1;
01644                         if ( $templates ) $params['templates'] = 1;
01645                         if ( $pageLinkDepth ) $params['pagelink-depth'] = $pageLinkDepth;
01646                         $url = $link->getFullUrl( $params );
01647                         # For interwikis, use POST to avoid redirects.
01648                         return ImportStreamSource::newFromURL( $url, "POST" );
01649                 }
01650         }
01651 }