MediaWiki  REL1_19
Import.php
Go to the documentation of this file.
00001 <?php
00033 class WikiImporter {
00034         private $reader = null;
00035         private $mLogItemCallback, $mUploadCallback, $mRevisionCallback, $mPageCallback;
00036         private $mSiteInfoCallback, $mTargetNamespace, $mPageOutCallback;
00037         private $mNoticeCallback, $mDebug;
00038         private $mImportUploads, $mImageBasePath;
00039         private $mNoUpdates = false;
00040 
00045         function __construct( $source ) {
00046                 $this->reader = new XMLReader();
00047 
00048                 stream_wrapper_register( 'uploadsource', 'UploadSourceAdapter' );
00049                 $id = UploadSourceAdapter::registerSource( $source );
00050                 if (defined( 'LIBXML_PARSEHUGE' ) ) {
00051                         $this->reader->open( "uploadsource://$id", null, LIBXML_PARSEHUGE );
00052                 } else {
00053                         $this->reader->open( "uploadsource://$id" );
00054                 }
00055 
00056                 // Default callbacks
00057                 $this->setRevisionCallback( array( $this, "importRevision" ) );
00058                 $this->setUploadCallback( array( $this, 'importUpload' ) );
00059                 $this->setLogItemCallback( array( $this, 'importLogItem' ) );
00060                 $this->setPageOutCallback( array( $this, 'finishImportPage' ) );
00061         }
00062 
00063         private function throwXmlError( $err ) {
00064                 $this->debug( "FAILURE: $err" );
00065                 wfDebug( "WikiImporter XML error: $err\n" );
00066         }
00067 
00068         private function debug( $data ) {
00069                 if( $this->mDebug ) {
00070                         wfDebug( "IMPORT: $data\n" );
00071                 }
00072         }
00073 
00074         private function warn( $data ) {
00075                 wfDebug( "IMPORT: $data\n" );
00076         }
00077 
00078         private function notice( $msg /*, $param, ...*/ ) {
00079                 $params = func_get_args();
00080                 array_shift( $params );
00081 
00082                 if ( is_callable( $this->mNoticeCallback ) ) {
00083                         call_user_func( $this->mNoticeCallback, $msg, $params );
00084                 } else { # No ImportReporter -> CLI
00085                         echo wfMessage( $msg, $params )->text() . "\n";
00086                 }
00087         }
00088 
00093         function setDebug( $debug ) {
00094                 $this->mDebug = $debug;
00095         }
00096 
00101         function setNoUpdates( $noupdates ) {
00102                 $this->mNoUpdates = $noupdates;
00103         }
00104 
00111         public function setNoticeCallback( $callback ) {
00112                 return wfSetVar( $this->mNoticeCallback, $callback );
00113         }
00114 
00120         public function setPageCallback( $callback ) {
00121                 $previous = $this->mPageCallback;
00122                 $this->mPageCallback = $callback;
00123                 return $previous;
00124         }
00125 
00135         public function setPageOutCallback( $callback ) {
00136                 $previous = $this->mPageOutCallback;
00137                 $this->mPageOutCallback = $callback;
00138                 return $previous;
00139         }
00140 
00146         public function setRevisionCallback( $callback ) {
00147                 $previous = $this->mRevisionCallback;
00148                 $this->mRevisionCallback = $callback;
00149                 return $previous;
00150         }
00151 
00157         public function setUploadCallback( $callback ) {
00158                 $previous = $this->mUploadCallback;
00159                 $this->mUploadCallback = $callback;
00160                 return $previous;
00161         }
00162 
00168         public function setLogItemCallback( $callback ) {
00169                 $previous = $this->mLogItemCallback;
00170                 $this->mLogItemCallback = $callback;
00171                 return $previous;
00172         }
00173 
00179         public function setSiteInfoCallback( $callback ) {
00180                 $previous = $this->mSiteInfoCallback;
00181                 $this->mSiteInfoCallback = $callback;
00182                 return $previous;
00183         }
00184 
00190         public function setTargetNamespace( $namespace ) {
00191                 if( is_null( $namespace ) ) {
00192                         // Don't override namespaces
00193                         $this->mTargetNamespace = null;
00194                 } elseif( $namespace >= 0 ) {
00195                         // @todo FIXME: Check for validity
00196                         $this->mTargetNamespace = intval( $namespace );
00197                 } else {
00198                         return false;
00199                 }
00200         }
00201 
00205         public function setImageBasePath( $dir ) {
00206                 $this->mImageBasePath = $dir;
00207         }
00208 
00212         public function setImportUploads( $import ) {
00213                 $this->mImportUploads = $import;
00214         }
00215 
00221         public function importRevision( $revision ) {
00222                 $dbw = wfGetDB( DB_MASTER );
00223                 return $dbw->deadlockLoop( array( $revision, 'importOldRevision' ) );
00224         }
00225 
00231         public function importLogItem( $rev ) {
00232                 $dbw = wfGetDB( DB_MASTER );
00233                 return $dbw->deadlockLoop( array( $rev, 'importLogItem' ) );
00234         }
00235 
00241         public function importUpload( $revision ) {
00242                 $dbw = wfGetDB( DB_MASTER );
00243                 return $dbw->deadlockLoop( array( $revision, 'importUpload' ) );
00244         }
00245 
00255         public function finishImportPage( $title, $origTitle, $revCount, $sRevCount, $pageInfo ) {
00256                 $args = func_get_args();
00257                 return wfRunHooks( 'AfterImportPage', $args );
00258         }
00259 
00264         public function debugRevisionHandler( &$revision ) {
00265                 $this->debug( "Got revision:" );
00266                 if( is_object( $revision->title ) ) {
00267                         $this->debug( "-- Title: " . $revision->title->getPrefixedText() );
00268                 } else {
00269                         $this->debug( "-- Title: <invalid>" );
00270                 }
00271                 $this->debug( "-- User: " . $revision->user_text );
00272                 $this->debug( "-- Timestamp: " . $revision->timestamp );
00273                 $this->debug( "-- Comment: " . $revision->comment );
00274                 $this->debug( "-- Text: " . $revision->text );
00275         }
00276 
00281         function pageCallback( $title ) {
00282                 if( isset( $this->mPageCallback ) ) {
00283                         call_user_func( $this->mPageCallback, $title );
00284                 }
00285         }
00286 
00295         private function pageOutCallback( $title, $origTitle, $revCount, $sucCount, $pageInfo ) {
00296                 if( isset( $this->mPageOutCallback ) ) {
00297                         $args = func_get_args();
00298                         call_user_func_array( $this->mPageOutCallback, $args );
00299                 }
00300         }
00301 
00306         private function revisionCallback( $revision ) {
00307                 if ( isset( $this->mRevisionCallback ) ) {
00308                         return call_user_func_array( $this->mRevisionCallback,
00309                                         array( $revision, $this ) );
00310                 } else {
00311                         return false;
00312                 }
00313         }
00314 
00319         private function logItemCallback( $revision ) {
00320                 if ( isset( $this->mLogItemCallback ) ) {
00321                         return call_user_func_array( $this->mLogItemCallback,
00322                                         array( $revision, $this ) );
00323                 } else {
00324                         return false;
00325                 }
00326         }
00327 
00335         private function nodeContents() {
00336                 if( $this->reader->isEmptyElement ) {
00337                         return "";
00338                 }
00339                 $buffer = "";
00340                 while( $this->reader->read() ) {
00341                         switch( $this->reader->nodeType ) {
00342                         case XmlReader::TEXT:
00343                         case XmlReader::SIGNIFICANT_WHITESPACE:
00344                                 $buffer .= $this->reader->value;
00345                                 break;
00346                         case XmlReader::END_ELEMENT:
00347                                 return $buffer;
00348                         }
00349                 }
00350 
00351                 $this->reader->close();
00352                 return '';
00353         }
00354 
00355         # --------------
00356 
00358         private function dumpElement() {
00359                 static $lookup = null;
00360                 if (!$lookup) {
00361                         $xmlReaderConstants = array(
00362                                 "NONE",
00363                                 "ELEMENT",
00364                                 "ATTRIBUTE",
00365                                 "TEXT",
00366                                 "CDATA",
00367                                 "ENTITY_REF",
00368                                 "ENTITY",
00369                                 "PI",
00370                                 "COMMENT",
00371                                 "DOC",
00372                                 "DOC_TYPE",
00373                                 "DOC_FRAGMENT",
00374                                 "NOTATION",
00375                                 "WHITESPACE",
00376                                 "SIGNIFICANT_WHITESPACE",
00377                                 "END_ELEMENT",
00378                                 "END_ENTITY",
00379                                 "XML_DECLARATION",
00380                                 );
00381                         $lookup = array();
00382 
00383                         foreach( $xmlReaderConstants as $name ) {
00384                                 $lookup[constant("XmlReader::$name")] = $name;
00385                         }
00386                 }
00387 
00388                 print( var_dump(
00389                         $lookup[$this->reader->nodeType],
00390                         $this->reader->name,
00391                         $this->reader->value
00392                 )."\n\n" );
00393         }
00394 
00398         public function doImport() {
00399 
00400                 // Calls to reader->read need to be wrapped in calls to
00401                 // libxml_disable_entity_loader() to avoid local file
00402                 // inclusion attacks (bug 46932).
00403                 $oldDisable = libxml_disable_entity_loader( true );
00404                 $this->reader->read();
00405 
00406                 if ( $this->reader->name != 'mediawiki' ) {
00407                         libxml_disable_entity_loader( $oldDisable );
00408                         throw new MWException( "Expected <mediawiki> tag, got " .
00409                                 $this->reader->name );
00410                 }
00411                 $this->debug( "<mediawiki> tag is correct." );
00412 
00413                 $this->debug( "Starting primary dump processing loop." );
00414 
00415                 $keepReading = $this->reader->read();
00416                 $skip = false;
00417                 while ( $keepReading ) {
00418                         $tag = $this->reader->name;
00419                         $type = $this->reader->nodeType;
00420 
00421                         if ( !wfRunHooks( 'ImportHandleToplevelXMLTag', $this ) ) {
00422                                 // Do nothing
00423                         } elseif ( $tag == 'mediawiki' && $type == XmlReader::END_ELEMENT ) {
00424                                 break;
00425                         } elseif ( $tag == 'siteinfo' ) {
00426                                 $this->handleSiteInfo();
00427                         } elseif ( $tag == 'page' ) {
00428                                 $this->handlePage();
00429                         } elseif ( $tag == 'logitem' ) {
00430                                 $this->handleLogItem();
00431                         } elseif ( $tag != '#text' ) {
00432                                 $this->warn( "Unhandled top-level XML tag $tag" );
00433 
00434                                 $skip = true;
00435                         }
00436 
00437                         if ($skip) {
00438                                 $keepReading = $this->reader->next();
00439                                 $skip = false;
00440                                 $this->debug( "Skip" );
00441                         } else {
00442                                 $keepReading = $this->reader->read();
00443                         }
00444                 }
00445 
00446                 libxml_disable_entity_loader( $oldDisable );
00447                 return true;
00448         }
00449 
00454         private function handleSiteInfo() {
00455                 // Site info is useful, but not actually used for dump imports.
00456                 // Includes a quick short-circuit to save performance.
00457                 if ( ! $this->mSiteInfoCallback ) {
00458                         $this->reader->next();
00459                         return true;
00460                 }
00461                 throw new MWException( "SiteInfo tag is not yet handled, do not set mSiteInfoCallback" );
00462         }
00463 
00464         private function handleLogItem() {
00465                 $this->debug( "Enter log item handler." );
00466                 $logInfo = array();
00467 
00468                 // Fields that can just be stuffed in the pageInfo object
00469                 $normalFields = array( 'id', 'comment', 'type', 'action', 'timestamp',
00470                                         'logtitle', 'params' );
00471 
00472                 while ( $this->reader->read() ) {
00473                         if ( $this->reader->nodeType == XmlReader::END_ELEMENT &&
00474                                         $this->reader->name == 'logitem') {
00475                                 break;
00476                         }
00477 
00478                         $tag = $this->reader->name;
00479 
00480                         if ( !wfRunHooks( 'ImportHandleLogItemXMLTag',
00481                                                 $this, $logInfo ) ) {
00482                                 // Do nothing
00483                         } elseif ( in_array( $tag, $normalFields ) ) {
00484                                 $logInfo[$tag] = $this->nodeContents();
00485                         } elseif ( $tag == 'contributor' ) {
00486                                 $logInfo['contributor'] = $this->handleContributor();
00487                         } elseif ( $tag != '#text' ) {
00488                                 $this->warn( "Unhandled log-item XML tag $tag" );
00489                         }
00490                 }
00491 
00492                 $this->processLogItem( $logInfo );
00493         }
00494 
00499         private function processLogItem( $logInfo ) {
00500                 $revision = new WikiRevision;
00501 
00502                 $revision->setID( $logInfo['id'] );
00503                 $revision->setType( $logInfo['type'] );
00504                 $revision->setAction( $logInfo['action'] );
00505                 $revision->setTimestamp( $logInfo['timestamp'] );
00506                 $revision->setParams( $logInfo['params'] );
00507                 $revision->setTitle( Title::newFromText( $logInfo['logtitle'] ) );
00508                 $revision->setNoUpdates( $this->mNoUpdates );
00509 
00510                 if ( isset( $logInfo['comment'] ) ) {
00511                         $revision->setComment( $logInfo['comment'] );
00512                 }
00513 
00514                 if ( isset( $logInfo['contributor']['ip'] ) ) {
00515                         $revision->setUserIP( $logInfo['contributor']['ip'] );
00516                 }
00517                 if ( isset( $logInfo['contributor']['username'] ) ) {
00518                         $revision->setUserName( $logInfo['contributor']['username'] );
00519                 }
00520 
00521                 return $this->logItemCallback( $revision );
00522         }
00523 
00524         private function handlePage() {
00525                 // Handle page data.
00526                 $this->debug( "Enter page handler." );
00527                 $pageInfo = array( 'revisionCount' => 0, 'successfulRevisionCount' => 0 );
00528 
00529                 // Fields that can just be stuffed in the pageInfo object
00530                 $normalFields = array( 'title', 'id', 'redirect', 'restrictions' );
00531 
00532                 $skip = false;
00533                 $badTitle = false;
00534 
00535                 while ( $skip ? $this->reader->next() : $this->reader->read() ) {
00536                         if ( $this->reader->nodeType == XmlReader::END_ELEMENT &&
00537                                         $this->reader->name == 'page') {
00538                                 break;
00539                         }
00540 
00541                         $tag = $this->reader->name;
00542 
00543                         if ( $badTitle ) {
00544                                 // The title is invalid, bail out of this page
00545                                 $skip = true;
00546                         } elseif ( !wfRunHooks( 'ImportHandlePageXMLTag', array( $this,
00547                                                 &$pageInfo ) ) ) {
00548                                 // Do nothing
00549                         } elseif ( in_array( $tag, $normalFields ) ) {
00550                                 $pageInfo[$tag] = $this->nodeContents();
00551                                 if ( $tag == 'title' ) {
00552                                         $title = $this->processTitle( $pageInfo['title'] );
00553 
00554                                         if ( !$title ) {
00555                                                 $badTitle = true;
00556                                                 $skip = true;
00557                                         }
00558 
00559                                         $this->pageCallback( $title );
00560                                         list( $pageInfo['_title'], $origTitle ) = $title;
00561                                 }
00562                         } elseif ( $tag == 'revision' ) {
00563                                 $this->handleRevision( $pageInfo );
00564                         } elseif ( $tag == 'upload' ) {
00565                                 $this->handleUpload( $pageInfo );
00566                         } elseif ( $tag != '#text' ) {
00567                                 $this->warn( "Unhandled page XML tag $tag" );
00568                                 $skip = true;
00569                         }
00570                 }
00571 
00572                 $this->pageOutCallback( $pageInfo['_title'], $origTitle,
00573                                         $pageInfo['revisionCount'],
00574                                         $pageInfo['successfulRevisionCount'],
00575                                         $pageInfo );
00576         }
00577 
00581         private function handleRevision( &$pageInfo ) {
00582                 $this->debug( "Enter revision handler" );
00583                 $revisionInfo = array();
00584 
00585                 $normalFields = array( 'id', 'timestamp', 'comment', 'minor', 'text' );
00586 
00587                 $skip = false;
00588 
00589                 while ( $skip ? $this->reader->next() : $this->reader->read() ) {
00590                         if ( $this->reader->nodeType == XmlReader::END_ELEMENT &&
00591                                         $this->reader->name == 'revision') {
00592                                 break;
00593                         }
00594 
00595                         $tag = $this->reader->name;
00596 
00597                         if ( !wfRunHooks( 'ImportHandleRevisionXMLTag', $this,
00598                                                 $pageInfo, $revisionInfo ) ) {
00599                                 // Do nothing
00600                         } elseif ( in_array( $tag, $normalFields ) ) {
00601                                 $revisionInfo[$tag] = $this->nodeContents();
00602                         } elseif ( $tag == 'contributor' ) {
00603                                 $revisionInfo['contributor'] = $this->handleContributor();
00604                         } elseif ( $tag != '#text' ) {
00605                                 $this->warn( "Unhandled revision XML tag $tag" );
00606                                 $skip = true;
00607                         }
00608                 }
00609 
00610                 $pageInfo['revisionCount']++;
00611                 if ( $this->processRevision( $pageInfo, $revisionInfo ) ) {
00612                         $pageInfo['successfulRevisionCount']++;
00613                 }
00614         }
00615 
00621         private function processRevision( $pageInfo, $revisionInfo ) {
00622                 $revision = new WikiRevision;
00623 
00624                 if( isset( $revisionInfo['id'] ) ) {
00625                         $revision->setID( $revisionInfo['id'] );
00626                 }
00627                 if ( isset( $revisionInfo['text'] ) ) {
00628                         $revision->setText( $revisionInfo['text'] );
00629                 }
00630                 $revision->setTitle( $pageInfo['_title'] );
00631 
00632                 if ( isset( $revisionInfo['timestamp'] ) ) {
00633                         $revision->setTimestamp( $revisionInfo['timestamp'] );
00634                 } else {
00635                         $revision->setTimestamp( wfTimestampNow() );
00636                 }
00637 
00638                 if ( isset( $revisionInfo['comment'] ) ) {
00639                         $revision->setComment( $revisionInfo['comment'] );
00640                 }
00641 
00642                 if ( isset( $revisionInfo['minor'] ) ) {
00643                         $revision->setMinor( true );
00644                 }
00645                 if ( isset( $revisionInfo['contributor']['ip'] ) ) {
00646                         $revision->setUserIP( $revisionInfo['contributor']['ip'] );
00647                 }
00648                 if ( isset( $revisionInfo['contributor']['username'] ) ) {
00649                         $revision->setUserName( $revisionInfo['contributor']['username'] );
00650                 }
00651                 $revision->setNoUpdates( $this->mNoUpdates );
00652 
00653                 return $this->revisionCallback( $revision );
00654         }
00655 
00660         private function handleUpload( &$pageInfo ) {
00661                 $this->debug( "Enter upload handler" );
00662                 $uploadInfo = array();
00663 
00664                 $normalFields = array( 'timestamp', 'comment', 'filename', 'text',
00665                                         'src', 'size', 'sha1base36', 'archivename', 'rel' );
00666 
00667                 $skip = false;
00668 
00669                 while ( $skip ? $this->reader->next() : $this->reader->read() ) {
00670                         if ( $this->reader->nodeType == XmlReader::END_ELEMENT &&
00671                                         $this->reader->name == 'upload') {
00672                                 break;
00673                         }
00674 
00675                         $tag = $this->reader->name;
00676 
00677                         if ( !wfRunHooks( 'ImportHandleUploadXMLTag', $this,
00678                                                 $pageInfo ) ) {
00679                                 // Do nothing
00680                         } elseif ( in_array( $tag, $normalFields ) ) {
00681                                 $uploadInfo[$tag] = $this->nodeContents();
00682                         } elseif ( $tag == 'contributor' ) {
00683                                 $uploadInfo['contributor'] = $this->handleContributor();
00684                         } elseif ( $tag == 'contents' ) {
00685                                 $contents = $this->nodeContents();
00686                                 $encoding = $this->reader->getAttribute( 'encoding' );
00687                                 if ( $encoding === 'base64' ) {
00688                                         $uploadInfo['fileSrc'] = $this->dumpTemp( base64_decode( $contents ) );
00689                                         $uploadInfo['isTempSrc'] = true;
00690                                 }
00691                         } elseif ( $tag != '#text' ) {
00692                                 $this->warn( "Unhandled upload XML tag $tag" );
00693                                 $skip = true;
00694                         }
00695                 }
00696 
00697                 if ( $this->mImageBasePath && isset( $uploadInfo['rel'] ) ) {
00698                         $path = "{$this->mImageBasePath}/{$uploadInfo['rel']}";
00699                         if ( file_exists( $path ) ) {
00700                                 $uploadInfo['fileSrc'] = $path;
00701                                 $uploadInfo['isTempSrc'] = false;
00702                         }
00703                 }
00704 
00705                 if ( $this->mImportUploads ) {
00706                         return $this->processUpload( $pageInfo, $uploadInfo );
00707                 }
00708         }
00709 
00714         private function dumpTemp( $contents ) {
00715                 $filename = tempnam( wfTempDir(), 'importupload' );
00716                 file_put_contents( $filename, $contents );
00717                 return $filename;
00718         }
00719 
00725         private function processUpload( $pageInfo, $uploadInfo ) {
00726                 $revision = new WikiRevision;
00727                 $text = isset( $uploadInfo['text'] ) ? $uploadInfo['text'] : '';
00728 
00729                 $revision->setTitle( $pageInfo['_title'] );
00730                 $revision->setID( $pageInfo['id'] );
00731                 $revision->setTimestamp( $uploadInfo['timestamp'] );
00732                 $revision->setText( $text );
00733                 $revision->setFilename( $uploadInfo['filename'] );
00734                 if ( isset( $uploadInfo['archivename'] ) ) {
00735                         $revision->setArchiveName( $uploadInfo['archivename'] );
00736                 }
00737                 $revision->setSrc( $uploadInfo['src'] );
00738                 if ( isset( $uploadInfo['fileSrc'] ) ) {
00739                         $revision->setFileSrc( $uploadInfo['fileSrc'],
00740                                 !empty( $uploadInfo['isTempSrc'] ) );
00741                 }
00742                 if ( isset( $uploadInfo['sha1base36'] ) ) {
00743                         $revision->setSha1Base36( $uploadInfo['sha1base36'] );
00744                 }
00745                 $revision->setSize( intval( $uploadInfo['size'] ) );
00746                 $revision->setComment( $uploadInfo['comment'] );
00747 
00748                 if ( isset( $uploadInfo['contributor']['ip'] ) ) {
00749                         $revision->setUserIP( $uploadInfo['contributor']['ip'] );
00750                 }
00751                 if ( isset( $uploadInfo['contributor']['username'] ) ) {
00752                         $revision->setUserName( $uploadInfo['contributor']['username'] );
00753                 }
00754                 $revision->setNoUpdates( $this->mNoUpdates );
00755 
00756                 return call_user_func( $this->mUploadCallback, $revision );
00757         }
00758 
00762         private function handleContributor() {
00763                 $fields = array( 'id', 'ip', 'username' );
00764                 $info = array();
00765 
00766                 while ( $this->reader->read() ) {
00767                         if ( $this->reader->nodeType == XmlReader::END_ELEMENT &&
00768                                         $this->reader->name == 'contributor') {
00769                                 break;
00770                         }
00771 
00772                         $tag = $this->reader->name;
00773 
00774                         if ( in_array( $tag, $fields ) ) {
00775                                 $info[$tag] = $this->nodeContents();
00776                         }
00777                 }
00778 
00779                 return $info;
00780         }
00781 
00786         private function processTitle( $text ) {
00787                 global $wgCommandLineMode;
00788 
00789                 $workTitle = $text;
00790                 $origTitle = Title::newFromText( $workTitle );
00791 
00792                 if( !is_null( $this->mTargetNamespace ) && !is_null( $origTitle ) ) {
00793                         $title = Title::makeTitle( $this->mTargetNamespace,
00794                                 $origTitle->getDBkey() );
00795                 } else {
00796                         $title = Title::newFromText( $workTitle );
00797                 }
00798 
00799                 if( is_null( $title ) ) {
00800                         # Invalid page title? Ignore the page
00801                         $this->notice( 'import-error-invalid', $workTitle );
00802                         return false;
00803                 } elseif( $title->isExternal() ) {
00804                         $this->notice( 'import-error-interwiki', $title->getPrefixedText() );
00805                         return false;
00806                 } elseif( !$title->canExist() ) {
00807                         $this->notice( 'import-error-special', $title->getPrefixedText() );
00808                         return false;
00809                 } elseif( !$title->userCan( 'edit' ) && !$wgCommandLineMode ) {
00810                         # Do not import if the importing wiki user cannot edit this page
00811                         $this->notice( 'import-error-edit', $title->getPrefixedText() );
00812                         return false;
00813                 } elseif( !$title->exists() && !$title->userCan( 'create' ) && !$wgCommandLineMode ) {
00814                         # Do not import if the importing wiki user cannot create this page
00815                         $this->notice( 'import-error-create', $title->getPrefixedText() );
00816                         return false;
00817                 }
00818 
00819                 return array( $title, $origTitle );
00820         }
00821 }
00822 
00824 class UploadSourceAdapter {
00825         static $sourceRegistrations = array();
00826 
00827         private $mSource;
00828         private $mBuffer;
00829         private $mPosition;
00830 
00835         static function registerSource( $source ) {
00836                 $id = wfGenerateToken();
00837 
00838                 self::$sourceRegistrations[$id] = $source;
00839 
00840                 return $id;
00841         }
00842 
00850         function stream_open( $path, $mode, $options, &$opened_path ) {
00851                 $url = parse_url($path);
00852                 $id = $url['host'];
00853 
00854                 if ( !isset( self::$sourceRegistrations[$id] ) ) {
00855                         return false;
00856                 }
00857 
00858                 $this->mSource = self::$sourceRegistrations[$id];
00859 
00860                 return true;
00861         }
00862 
00867         function stream_read( $count ) {
00868                 $return = '';
00869                 $leave = false;
00870 
00871                 while ( !$leave && !$this->mSource->atEnd() &&
00872                                 strlen($this->mBuffer) < $count ) {
00873                         $read = $this->mSource->readChunk();
00874 
00875                         if ( !strlen($read) ) {
00876                                 $leave = true;
00877                         }
00878 
00879                         $this->mBuffer .= $read;
00880                 }
00881 
00882                 if ( strlen($this->mBuffer) ) {
00883                         $return = substr( $this->mBuffer, 0, $count );
00884                         $this->mBuffer = substr( $this->mBuffer, $count );
00885                 }
00886 
00887                 $this->mPosition += strlen($return);
00888 
00889                 return $return;
00890         }
00891 
00896         function stream_write( $data ) {
00897                 return false;
00898         }
00899 
00903         function stream_tell() {
00904                 return $this->mPosition;
00905         }
00906 
00910         function stream_eof() {
00911                 return $this->mSource->atEnd();
00912         }
00913 
00917         function url_stat() {
00918                 $result = array();
00919 
00920                 $result['dev'] = $result[0] = 0;
00921                 $result['ino'] = $result[1] = 0;
00922                 $result['mode'] = $result[2] = 0;
00923                 $result['nlink'] = $result[3] = 0;
00924                 $result['uid'] = $result[4] = 0;
00925                 $result['gid'] = $result[5] = 0;
00926                 $result['rdev'] = $result[6] = 0;
00927                 $result['size'] = $result[7] = 0;
00928                 $result['atime'] = $result[8] = 0;
00929                 $result['mtime'] = $result[9] = 0;
00930                 $result['ctime'] = $result[10] = 0;
00931                 $result['blksize'] = $result[11] = 0;
00932                 $result['blocks'] = $result[12] = 0;
00933 
00934                 return $result;
00935         }
00936 }
00937 
00938 class XMLReader2 extends XMLReader {
00939 
00943         function nodeContents() {
00944                 if( $this->isEmptyElement ) {
00945                         return "";
00946                 }
00947                 $buffer = "";
00948                 while( $this->read() ) {
00949                         switch( $this->nodeType ) {
00950                         case XmlReader::TEXT:
00951                         case XmlReader::SIGNIFICANT_WHITESPACE:
00952                                 $buffer .= $this->value;
00953                                 break;
00954                         case XmlReader::END_ELEMENT:
00955                                 return $buffer;
00956                         }
00957                 }
00958                 return $this->close();
00959         }
00960 }
00961 
00966 class WikiRevision {
00967         var $importer = null;
00968 
00972         var $title = null;
00973         var $id = 0;
00974         var $timestamp = "20010115000000";
00975         var $user = 0;
00976         var $user_text = "";
00977         var $text = "";
00978         var $comment = "";
00979         var $minor = false;
00980         var $type = "";
00981         var $action = "";
00982         var $params = "";
00983         var $fileSrc = '';
00984         var $sha1base36 = false;
00985         var $isTemp = false;
00986         var $archiveName = '';
00987         var $fileIsTemp;
00988         private $mNoUpdates = false;
00989 
00994         function setTitle( $title ) {
00995                 if( is_object( $title ) ) {
00996                         $this->title = $title;
00997                 } elseif( is_null( $title ) ) {
00998                         throw new MWException( "WikiRevision given a null title in import. You may need to adjust \$wgLegalTitleChars." );
00999                 } else {
01000                         throw new MWException( "WikiRevision given non-object title in import." );
01001                 }
01002         }
01003 
01007         function setID( $id ) {
01008                 $this->id = $id;
01009         }
01010 
01014         function setTimestamp( $ts ) {
01015                 # 2003-08-05T18:30:02Z
01016                 $this->timestamp = wfTimestamp( TS_MW, $ts );
01017         }
01018 
01022         function setUsername( $user ) {
01023                 $this->user_text = $user;
01024         }
01025 
01029         function setUserIP( $ip ) {
01030                 $this->user_text = $ip;
01031         }
01032 
01036         function setText( $text ) {
01037                 $this->text = $text;
01038         }
01039 
01043         function setComment( $text ) {
01044                 $this->comment = $text;
01045         }
01046 
01050         function setMinor( $minor ) {
01051                 $this->minor = (bool)$minor;
01052         }
01053 
01057         function setSrc( $src ) {
01058                 $this->src = $src;
01059         }
01060 
01065         function setFileSrc( $src, $isTemp ) {
01066                 $this->fileSrc = $src;
01067                 $this->fileIsTemp = $isTemp;
01068         }
01069 
01073         function setSha1Base36( $sha1base36 ) {
01074                 $this->sha1base36 = $sha1base36;
01075         }
01076 
01080         function setFilename( $filename ) {
01081                 $this->filename = $filename;
01082         }
01083 
01087         function setArchiveName( $archiveName ) {
01088                 $this->archiveName = $archiveName;
01089         }
01090 
01094         function setSize( $size ) {
01095                 $this->size = intval( $size );
01096         }
01097 
01101         function setType( $type ) {
01102                 $this->type = $type;
01103         }
01104 
01108         function setAction( $action ) {
01109                 $this->action = $action;
01110         }
01111 
01115         function setParams( $params ) {
01116                 $this->params = $params;
01117         }
01118 
01122         public function setNoUpdates( $noupdates ) {
01123                 $this->mNoUpdates = $noupdates;
01124         }
01125 
01129         function getTitle() {
01130                 return $this->title;
01131         }
01132 
01136         function getID() {
01137                 return $this->id;
01138         }
01139 
01143         function getTimestamp() {
01144                 return $this->timestamp;
01145         }
01146 
01150         function getUser() {
01151                 return $this->user_text;
01152         }
01153 
01157         function getText() {
01158                 return $this->text;
01159         }
01160 
01164         function getComment() {
01165                 return $this->comment;
01166         }
01167 
01171         function getMinor() {
01172                 return $this->minor;
01173         }
01174 
01178         function getSrc() {
01179                 return $this->src;
01180         }
01181 
01185         function getSha1() {
01186                 if ( $this->sha1base36 ) {
01187                         return wfBaseConvert( $this->sha1base36, 36, 16 );
01188                 }
01189                 return false;
01190         }
01191 
01195         function getFileSrc() {
01196                 return $this->fileSrc;
01197         }
01198 
01202         function isTempSrc() {
01203                 return $this->isTemp;
01204         }
01205 
01209         function getFilename() {
01210                 return $this->filename;
01211         }
01212 
01216         function getArchiveName() {
01217                 return $this->archiveName;
01218         }
01219 
01223         function getSize() {
01224                 return $this->size;
01225         }
01226 
01230         function getType() {
01231                 return $this->type;
01232         }
01233 
01237         function getAction() {
01238                 return $this->action;
01239         }
01240 
01244         function getParams() {
01245                 return $this->params;
01246         }
01247 
01251         function importOldRevision() {
01252                 $dbw = wfGetDB( DB_MASTER );
01253 
01254                 # Sneak a single revision into place
01255                 $user = User::newFromName( $this->getUser() );
01256                 if( $user ) {
01257                         $userId = intval( $user->getId() );
01258                         $userText = $user->getName();
01259                         $userObj = $user;
01260                 } else {
01261                         $userId = 0;
01262                         $userText = $this->getUser();
01263                         $userObj = new User;
01264                 }
01265 
01266                 // avoid memory leak...?
01267                 $linkCache = LinkCache::singleton();
01268                 $linkCache->clear();
01269 
01270                 $page = WikiPage::factory( $this->title );
01271                 if( !$page->exists() ) {
01272                         # must create the page...
01273                         $pageId = $page->insertOn( $dbw );
01274                         $created = true;
01275                         $oldcountable = null;
01276                 } else {
01277                         $pageId = $page->getId();
01278                         $created = false;
01279 
01280                         $prior = $dbw->selectField( 'revision', '1',
01281                                 array( 'rev_page' => $pageId,
01282                                         'rev_timestamp' => $dbw->timestamp( $this->timestamp ),
01283                                         'rev_user_text' => $userText,
01284                                         'rev_comment'   => $this->getComment() ),
01285                                 __METHOD__
01286                         );
01287                         if( $prior ) {
01288                                 // @todo FIXME: This could fail slightly for multiple matches :P
01289                                 wfDebug( __METHOD__ . ": skipping existing revision for [[" .
01290                                         $this->title->getPrefixedText() . "]], timestamp " . $this->timestamp . "\n" );
01291                                 return false;
01292                         }
01293                         $oldcountable = $page->isCountable();
01294                 }
01295 
01296                 # @todo FIXME: Use original rev_id optionally (better for backups)
01297                 # Insert the row
01298                 $revision = new Revision( array(
01299                         'page'       => $pageId,
01300                         'text'       => $this->getText(),
01301                         'comment'    => $this->getComment(),
01302                         'user'       => $userId,
01303                         'user_text'  => $userText,
01304                         'timestamp'  => $this->timestamp,
01305                         'minor_edit' => $this->minor,
01306                         ) );
01307                 $revision->insertOn( $dbw );
01308                 $changed = $page->updateIfNewerOn( $dbw, $revision );
01309 
01310                 if ( $changed !== false && !$this->mNoUpdates ) {
01311                         wfDebug( __METHOD__ . ": running updates\n" );
01312                         $page->doEditUpdates( $revision, $userObj, array( 'created' => $created, 'oldcountable' => $oldcountable ) );
01313                 }
01314 
01315                 return true;
01316         }
01317 
01321         function importLogItem() {
01322                 $dbw = wfGetDB( DB_MASTER );
01323                 # @todo FIXME: This will not record autoblocks
01324                 if( !$this->getTitle() ) {
01325                         wfDebug( __METHOD__ . ": skipping invalid {$this->type}/{$this->action} log time, timestamp " .
01326                                 $this->timestamp . "\n" );
01327                         return;
01328                 }
01329                 # Check if it exists already
01330                 // @todo FIXME: Use original log ID (better for backups)
01331                 $prior = $dbw->selectField( 'logging', '1',
01332                         array( 'log_type' => $this->getType(),
01333                                 'log_action'    => $this->getAction(),
01334                                 'log_timestamp' => $dbw->timestamp( $this->timestamp ),
01335                                 'log_namespace' => $this->getTitle()->getNamespace(),
01336                                 'log_title'     => $this->getTitle()->getDBkey(),
01337                                 'log_comment'   => $this->getComment(),
01338                                 #'log_user_text' => $this->user_text,
01339                                 'log_params'    => $this->params ),
01340                         __METHOD__
01341                 );
01342                 // @todo FIXME: This could fail slightly for multiple matches :P
01343                 if( $prior ) {
01344                         wfDebug( __METHOD__ . ": skipping existing item for Log:{$this->type}/{$this->action}, timestamp " .
01345                                 $this->timestamp . "\n" );
01346                         return;
01347                 }
01348                 $log_id = $dbw->nextSequenceValue( 'logging_log_id_seq' );
01349                 $data = array(
01350                         'log_id' => $log_id,
01351                         'log_type' => $this->type,
01352                         'log_action' => $this->action,
01353                         'log_timestamp' => $dbw->timestamp( $this->timestamp ),
01354                         'log_user' => User::idFromName( $this->user_text ),
01355                         #'log_user_text' => $this->user_text,
01356                         'log_namespace' => $this->getTitle()->getNamespace(),
01357                         'log_title' => $this->getTitle()->getDBkey(),
01358                         'log_comment' => $this->getComment(),
01359                         'log_params' => $this->params
01360                 );
01361                 $dbw->insert( 'logging', $data, __METHOD__ );
01362         }
01363 
01367         function importUpload() {
01368                 # Construct a file
01369                 $archiveName = $this->getArchiveName();
01370                 if ( $archiveName ) {
01371                         wfDebug( __METHOD__ . "Importing archived file as $archiveName\n" );
01372                         $file = OldLocalFile::newFromArchiveName( $this->getTitle(),
01373                                 RepoGroup::singleton()->getLocalRepo(), $archiveName );
01374                 } else {
01375                         $file = wfLocalFile( $this->getTitle() );
01376                         wfDebug( __METHOD__ . 'Importing new file as ' . $file->getName() . "\n" );
01377                         if ( $file->exists() && $file->getTimestamp() > $this->getTimestamp() ) {
01378                                 $archiveName = $file->getTimestamp() . '!' . $file->getName();
01379                                 $file = OldLocalFile::newFromArchiveName( $this->getTitle(),
01380                                         RepoGroup::singleton()->getLocalRepo(), $archiveName );
01381                                 wfDebug( __METHOD__ . "File already exists; importing as $archiveName\n" );
01382                         }
01383                 }
01384                 if( !$file ) {
01385                         wfDebug( __METHOD__ . ': Bad file for ' . $this->getTitle() . "\n" );
01386                         return false;
01387                 }
01388 
01389                 # Get the file source or download if necessary
01390                 $source = $this->getFileSrc();
01391                 $flags = $this->isTempSrc() ? File::DELETE_SOURCE : 0;
01392                 if ( !$source ) {
01393                         $source = $this->downloadSource();
01394                         $flags |= File::DELETE_SOURCE;
01395                 }
01396                 if( !$source ) {
01397                         wfDebug( __METHOD__ . ": Could not fetch remote file.\n" );
01398                         return false;
01399                 }
01400                 $sha1 = $this->getSha1();
01401                 if ( $sha1 && ( $sha1 !== sha1_file( $source ) ) ) {
01402                         if ( $flags & File::DELETE_SOURCE ) {
01403                                 # Broken file; delete it if it is a temporary file
01404                                 unlink( $source );
01405                         }
01406                         wfDebug( __METHOD__ . ": Corrupt file $source.\n" );
01407                         return false;
01408                 }
01409 
01410                 $user = User::newFromName( $this->user_text );
01411 
01412                 # Do the actual upload
01413                 if ( $archiveName ) {
01414                         $status = $file->uploadOld( $source, $archiveName,
01415                                 $this->getTimestamp(), $this->getComment(), $user, $flags );
01416                 } else {
01417                         $status = $file->upload( $source, $this->getComment(), $this->getComment(),
01418                                 $flags, false, $this->getTimestamp(), $user );
01419                 }
01420 
01421                 if ( $status->isGood() ) {
01422                         wfDebug( __METHOD__ . ": Succesful\n" );
01423                         return true;
01424                 } else {
01425                         wfDebug( __METHOD__ . ': failed: ' . $status->getXml() . "\n" );
01426                         return false;
01427                 }
01428         }
01429 
01433         function downloadSource() {
01434                 global $wgEnableUploads;
01435                 if( !$wgEnableUploads ) {
01436                         return false;
01437                 }
01438 
01439                 $tempo = tempnam( wfTempDir(), 'download' );
01440                 $f = fopen( $tempo, 'wb' );
01441                 if( !$f ) {
01442                         wfDebug( "IMPORT: couldn't write to temp file $tempo\n" );
01443                         return false;
01444                 }
01445 
01446                 // @todo FIXME!
01447                 $src = $this->getSrc();
01448                 $data = Http::get( $src );
01449                 if( !$data ) {
01450                         wfDebug( "IMPORT: couldn't fetch source $src\n" );
01451                         fclose( $f );
01452                         unlink( $tempo );
01453                         return false;
01454                 }
01455 
01456                 fwrite( $f, $data );
01457                 fclose( $f );
01458 
01459                 return $tempo;
01460         }
01461 
01462 }
01463 
01468 class ImportStringSource {
01469         function __construct( $string ) {
01470                 $this->mString = $string;
01471                 $this->mRead = false;
01472         }
01473 
01477         function atEnd() {
01478                 return $this->mRead;
01479         }
01480 
01484         function readChunk() {
01485                 if( $this->atEnd() ) {
01486                         return false;
01487                 }
01488                 $this->mRead = true;
01489                 return $this->mString;
01490         }
01491 }
01492 
01497 class ImportStreamSource {
01498         function __construct( $handle ) {
01499                 $this->mHandle = $handle;
01500         }
01501 
01505         function atEnd() {
01506                 return feof( $this->mHandle );
01507         }
01508 
01512         function readChunk() {
01513                 return fread( $this->mHandle, 32768 );
01514         }
01515 
01520         static function newFromFile( $filename ) {
01521                 wfSuppressWarnings();
01522                 $file = fopen( $filename, 'rt' );
01523                 wfRestoreWarnings();
01524                 if( !$file ) {
01525                         return Status::newFatal( "importcantopen" );
01526                 }
01527                 return Status::newGood( new ImportStreamSource( $file ) );
01528         }
01529 
01534         static function newFromUpload( $fieldname = "xmlimport" ) {
01535                 $upload =& $_FILES[$fieldname];
01536 
01537                 if( !isset( $upload ) || !$upload['name'] ) {
01538                         return Status::newFatal( 'importnofile' );
01539                 }
01540                 if( !empty( $upload['error'] ) ) {
01541                         switch($upload['error']){
01542                                 case 1: # The uploaded file exceeds the upload_max_filesize directive in php.ini.
01543                                         return Status::newFatal( 'importuploaderrorsize' );
01544                                 case 2: # The uploaded file exceeds the MAX_FILE_SIZE directive that was specified in the HTML form.
01545                                         return Status::newFatal( 'importuploaderrorsize' );
01546                                 case 3: # The uploaded file was only partially uploaded
01547                                         return Status::newFatal( 'importuploaderrorpartial' );
01548                                 case 6: #Missing a temporary folder.
01549                                         return Status::newFatal( 'importuploaderrortemp' );
01550                                 # case else: # Currently impossible
01551                         }
01552 
01553                 }
01554                 $fname = $upload['tmp_name'];
01555                 if( is_uploaded_file( $fname ) ) {
01556                         return ImportStreamSource::newFromFile( $fname );
01557                 } else {
01558                         return Status::newFatal( 'importnofile' );
01559                 }
01560         }
01561 
01567         static function newFromURL( $url, $method = 'GET' ) {
01568                 wfDebug( __METHOD__ . ": opening $url\n" );
01569                 # Use the standard HTTP fetch function; it times out
01570                 # quicker and sorts out user-agent problems which might
01571                 # otherwise prevent importing from large sites, such
01572                 # as the Wikimedia cluster, etc.
01573                 $data = Http::request( $method, $url, array( 'followRedirects' => true ) );
01574                 if( $data !== false ) {
01575                         $file = tmpfile();
01576                         fwrite( $file, $data );
01577                         fflush( $file );
01578                         fseek( $file, 0 );
01579                         return Status::newGood( new ImportStreamSource( $file ) );
01580                 } else {
01581                         return Status::newFatal( 'importcantopen' );
01582                 }
01583         }
01584 
01593         public static function newFromInterwiki( $interwiki, $page, $history = false, $templates = false, $pageLinkDepth = 0 ) {
01594                 if( $page == '' ) {
01595                         return Status::newFatal( 'import-noarticle' );
01596                 }
01597                 $link = Title::newFromText( "$interwiki:Special:Export/$page" );
01598                 if( is_null( $link ) || $link->getInterwiki() == '' ) {
01599                         return Status::newFatal( 'importbadinterwiki' );
01600                 } else {
01601                         $params = array();
01602                         if ( $history ) $params['history'] = 1;
01603                         if ( $templates ) $params['templates'] = 1;
01604                         if ( $pageLinkDepth ) $params['pagelink-depth'] = $pageLinkDepth;
01605                         $url = $link->getFullUrl( $params );
01606                         # For interwikis, use POST to avoid redirects.
01607                         return ImportStreamSource::newFromURL( $url, "POST" );
01608                 }
01609         }
01610 }