MediaWiki  REL1_23
Import.php
Go to the documentation of this file.
00001 <?php
00033 class WikiImporter {
00034     private $reader = null;
00035     private $mLogItemCallback, $mUploadCallback, $mRevisionCallback, $mPageCallback;
00036     private $mSiteInfoCallback, $mTargetNamespace, $mTargetRootPage, $mPageOutCallback;
00037     private $mNoticeCallback, $mDebug;
00038     private $mImportUploads, $mImageBasePath;
00039     private $mNoUpdates = false;
00040 
00045     function __construct( $source ) {
00046         $this->reader = new XMLReader();
00047 
00048         stream_wrapper_register( 'uploadsource', 'UploadSourceAdapter' );
00049         $id = UploadSourceAdapter::registerSource( $source );
00050         if ( defined( 'LIBXML_PARSEHUGE' ) ) {
00051             $this->reader->open( "uploadsource://$id", null, LIBXML_PARSEHUGE );
00052         } else {
00053             $this->reader->open( "uploadsource://$id" );
00054         }
00055 
00056         // Default callbacks
00057         $this->setRevisionCallback( array( $this, "importRevision" ) );
00058         $this->setUploadCallback( array( $this, 'importUpload' ) );
00059         $this->setLogItemCallback( array( $this, 'importLogItem' ) );
00060         $this->setPageOutCallback( array( $this, 'finishImportPage' ) );
00061     }
00062 
00063     private function throwXmlError( $err ) {
00064         $this->debug( "FAILURE: $err" );
00065         wfDebug( "WikiImporter XML error: $err\n" );
00066     }
00067 
00068     private function debug( $data ) {
00069         if ( $this->mDebug ) {
00070             wfDebug( "IMPORT: $data\n" );
00071         }
00072     }
00073 
00074     private function warn( $data ) {
00075         wfDebug( "IMPORT: $data\n" );
00076     }
00077 
00078     private function notice( $msg /*, $param, ...*/ ) {
00079         $params = func_get_args();
00080         array_shift( $params );
00081 
00082         if ( is_callable( $this->mNoticeCallback ) ) {
00083             call_user_func( $this->mNoticeCallback, $msg, $params );
00084         } else { # No ImportReporter -> CLI
00085             echo wfMessage( $msg, $params )->text() . "\n";
00086         }
00087     }
00088 
00093     function setDebug( $debug ) {
00094         $this->mDebug = $debug;
00095     }
00096 
00101     function setNoUpdates( $noupdates ) {
00102         $this->mNoUpdates = $noupdates;
00103     }
00104 
00111     public function setNoticeCallback( $callback ) {
00112         return wfSetVar( $this->mNoticeCallback, $callback );
00113     }
00114 
00120     public function setPageCallback( $callback ) {
00121         $previous = $this->mPageCallback;
00122         $this->mPageCallback = $callback;
00123         return $previous;
00124     }
00125 
00135     public function setPageOutCallback( $callback ) {
00136         $previous = $this->mPageOutCallback;
00137         $this->mPageOutCallback = $callback;
00138         return $previous;
00139     }
00140 
00146     public function setRevisionCallback( $callback ) {
00147         $previous = $this->mRevisionCallback;
00148         $this->mRevisionCallback = $callback;
00149         return $previous;
00150     }
00151 
00157     public function setUploadCallback( $callback ) {
00158         $previous = $this->mUploadCallback;
00159         $this->mUploadCallback = $callback;
00160         return $previous;
00161     }
00162 
00168     public function setLogItemCallback( $callback ) {
00169         $previous = $this->mLogItemCallback;
00170         $this->mLogItemCallback = $callback;
00171         return $previous;
00172     }
00173 
00179     public function setSiteInfoCallback( $callback ) {
00180         $previous = $this->mSiteInfoCallback;
00181         $this->mSiteInfoCallback = $callback;
00182         return $previous;
00183     }
00184 
00190     public function setTargetNamespace( $namespace ) {
00191         if ( is_null( $namespace ) ) {
00192             // Don't override namespaces
00193             $this->mTargetNamespace = null;
00194         } elseif ( $namespace >= 0 ) {
00195             // @todo FIXME: Check for validity
00196             $this->mTargetNamespace = intval( $namespace );
00197         } else {
00198             return false;
00199         }
00200     }
00201 
00207     public function setTargetRootPage( $rootpage ) {
00208         $status = Status::newGood();
00209         if ( is_null( $rootpage ) ) {
00210             // No rootpage
00211             $this->mTargetRootPage = null;
00212         } elseif ( $rootpage !== '' ) {
00213             $rootpage = rtrim( $rootpage, '/' ); //avoid double slashes
00214             $title = Title::newFromText( $rootpage, !is_null( $this->mTargetNamespace ) ? $this->mTargetNamespace : NS_MAIN );
00215             if ( !$title || $title->isExternal() ) {
00216                 $status->fatal( 'import-rootpage-invalid' );
00217             } else {
00218                 if ( !MWNamespace::hasSubpages( $title->getNamespace() ) ) {
00219                     global $wgContLang;
00220 
00221                     $displayNSText = $title->getNamespace() == NS_MAIN
00222                         ? wfMessage( 'blanknamespace' )->text()
00223                         : $wgContLang->getNsText( $title->getNamespace() );
00224                     $status->fatal( 'import-rootpage-nosubpage', $displayNSText );
00225                 } else {
00226                     // set namespace to 'all', so the namespace check in processTitle() can passed
00227                     $this->setTargetNamespace( null );
00228                     $this->mTargetRootPage = $title->getPrefixedDBkey();
00229                 }
00230             }
00231         }
00232         return $status;
00233     }
00234 
00238     public function setImageBasePath( $dir ) {
00239         $this->mImageBasePath = $dir;
00240     }
00241 
00245     public function setImportUploads( $import ) {
00246         $this->mImportUploads = $import;
00247     }
00248 
00254     public function importRevision( $revision ) {
00255         if ( !$revision->getContent()->getContentHandler()->canBeUsedOn( $revision->getTitle() ) ) {
00256             $this->notice( 'import-error-bad-location',
00257                 $revision->getTitle()->getPrefixedText(),
00258                 $revision->getID(),
00259                 $revision->getModel(),
00260                 $revision->getFormat() );
00261 
00262             return false;
00263         }
00264 
00265         try {
00266             $dbw = wfGetDB( DB_MASTER );
00267             return $dbw->deadlockLoop( array( $revision, 'importOldRevision' ) );
00268         } catch ( MWContentSerializationException $ex ) {
00269             $this->notice( 'import-error-unserialize',
00270                 $revision->getTitle()->getPrefixedText(),
00271                 $revision->getID(),
00272                 $revision->getModel(),
00273                 $revision->getFormat() );
00274         }
00275 
00276         return false;
00277     }
00278 
00284     public function importLogItem( $rev ) {
00285         $dbw = wfGetDB( DB_MASTER );
00286         return $dbw->deadlockLoop( array( $rev, 'importLogItem' ) );
00287     }
00288 
00294     public function importUpload( $revision ) {
00295         $dbw = wfGetDB( DB_MASTER );
00296         return $dbw->deadlockLoop( array( $revision, 'importUpload' ) );
00297     }
00298 
00308     public function finishImportPage( $title, $origTitle, $revCount, $sRevCount, $pageInfo ) {
00309         $args = func_get_args();
00310         return wfRunHooks( 'AfterImportPage', $args );
00311     }
00312 
00317     public function debugRevisionHandler( &$revision ) {
00318         $this->debug( "Got revision:" );
00319         if ( is_object( $revision->title ) ) {
00320             $this->debug( "-- Title: " . $revision->title->getPrefixedText() );
00321         } else {
00322             $this->debug( "-- Title: <invalid>" );
00323         }
00324         $this->debug( "-- User: " . $revision->user_text );
00325         $this->debug( "-- Timestamp: " . $revision->timestamp );
00326         $this->debug( "-- Comment: " . $revision->comment );
00327         $this->debug( "-- Text: " . $revision->text );
00328     }
00329 
00334     function pageCallback( $title ) {
00335         if ( isset( $this->mPageCallback ) ) {
00336             call_user_func( $this->mPageCallback, $title );
00337         }
00338     }
00339 
00348     private function pageOutCallback( $title, $origTitle, $revCount, $sucCount, $pageInfo ) {
00349         if ( isset( $this->mPageOutCallback ) ) {
00350             $args = func_get_args();
00351             call_user_func_array( $this->mPageOutCallback, $args );
00352         }
00353     }
00354 
00360     private function revisionCallback( $revision ) {
00361         if ( isset( $this->mRevisionCallback ) ) {
00362             return call_user_func_array( $this->mRevisionCallback,
00363                     array( $revision, $this ) );
00364         } else {
00365             return false;
00366         }
00367     }
00368 
00374     private function logItemCallback( $revision ) {
00375         if ( isset( $this->mLogItemCallback ) ) {
00376             return call_user_func_array( $this->mLogItemCallback,
00377                     array( $revision, $this ) );
00378         } else {
00379             return false;
00380         }
00381     }
00382 
00390     private function nodeContents() {
00391         if ( $this->reader->isEmptyElement ) {
00392             return "";
00393         }
00394         $buffer = "";
00395         while ( $this->reader->read() ) {
00396             switch ( $this->reader->nodeType ) {
00397             case XmlReader::TEXT:
00398             case XmlReader::SIGNIFICANT_WHITESPACE:
00399                 $buffer .= $this->reader->value;
00400                 break;
00401             case XmlReader::END_ELEMENT:
00402                 return $buffer;
00403             }
00404         }
00405 
00406         $this->reader->close();
00407         return '';
00408     }
00409 
00410     # --------------
00411 
00413     private function dumpElement() {
00414         static $lookup = null;
00415         if ( !$lookup ) {
00416             $xmlReaderConstants = array(
00417                 "NONE",
00418                 "ELEMENT",
00419                 "ATTRIBUTE",
00420                 "TEXT",
00421                 "CDATA",
00422                 "ENTITY_REF",
00423                 "ENTITY",
00424                 "PI",
00425                 "COMMENT",
00426                 "DOC",
00427                 "DOC_TYPE",
00428                 "DOC_FRAGMENT",
00429                 "NOTATION",
00430                 "WHITESPACE",
00431                 "SIGNIFICANT_WHITESPACE",
00432                 "END_ELEMENT",
00433                 "END_ENTITY",
00434                 "XML_DECLARATION",
00435             );
00436             $lookup = array();
00437 
00438             foreach ( $xmlReaderConstants as $name ) {
00439                 $lookup[constant( "XmlReader::$name" )] = $name;
00440             }
00441         }
00442 
00443         print var_dump(
00444             $lookup[$this->reader->nodeType],
00445             $this->reader->name,
00446             $this->reader->value
00447         ) . "\n\n";
00448     }
00449 
00455     public function doImport() {
00456 
00457         // Calls to reader->read need to be wrapped in calls to
00458         // libxml_disable_entity_loader() to avoid local file
00459         // inclusion attacks (bug 46932).
00460         $oldDisable = libxml_disable_entity_loader( true );
00461         $this->reader->read();
00462 
00463         if ( $this->reader->name != 'mediawiki' ) {
00464             libxml_disable_entity_loader( $oldDisable );
00465             throw new MWException( "Expected <mediawiki> tag, got " .
00466                 $this->reader->name );
00467         }
00468         $this->debug( "<mediawiki> tag is correct." );
00469 
00470         $this->debug( "Starting primary dump processing loop." );
00471 
00472         $keepReading = $this->reader->read();
00473         $skip = false;
00474         while ( $keepReading ) {
00475             $tag = $this->reader->name;
00476             $type = $this->reader->nodeType;
00477 
00478             if ( !wfRunHooks( 'ImportHandleToplevelXMLTag', array( $this ) ) ) {
00479                 // Do nothing
00480             } elseif ( $tag == 'mediawiki' && $type == XmlReader::END_ELEMENT ) {
00481                 break;
00482             } elseif ( $tag == 'siteinfo' ) {
00483                 $this->handleSiteInfo();
00484             } elseif ( $tag == 'page' ) {
00485                 $this->handlePage();
00486             } elseif ( $tag == 'logitem' ) {
00487                 $this->handleLogItem();
00488             } elseif ( $tag != '#text' ) {
00489                 $this->warn( "Unhandled top-level XML tag $tag" );
00490 
00491                 $skip = true;
00492             }
00493 
00494             if ( $skip ) {
00495                 $keepReading = $this->reader->next();
00496                 $skip = false;
00497                 $this->debug( "Skip" );
00498             } else {
00499                 $keepReading = $this->reader->read();
00500             }
00501         }
00502 
00503         libxml_disable_entity_loader( $oldDisable );
00504         return true;
00505     }
00506 
00511     private function handleSiteInfo() {
00512         // Site info is useful, but not actually used for dump imports.
00513         // Includes a quick short-circuit to save performance.
00514         if ( ! $this->mSiteInfoCallback ) {
00515             $this->reader->next();
00516             return true;
00517         }
00518         throw new MWException( "SiteInfo tag is not yet handled, do not set mSiteInfoCallback" );
00519     }
00520 
00521     private function handleLogItem() {
00522         $this->debug( "Enter log item handler." );
00523         $logInfo = array();
00524 
00525         // Fields that can just be stuffed in the pageInfo object
00526         $normalFields = array( 'id', 'comment', 'type', 'action', 'timestamp',
00527                     'logtitle', 'params' );
00528 
00529         while ( $this->reader->read() ) {
00530             if ( $this->reader->nodeType == XmlReader::END_ELEMENT &&
00531                     $this->reader->name == 'logitem' ) {
00532                 break;
00533             }
00534 
00535             $tag = $this->reader->name;
00536 
00537             if ( !wfRunHooks( 'ImportHandleLogItemXMLTag', array(
00538                 $this, $logInfo
00539             ) ) ) {
00540                 // Do nothing
00541             } elseif ( in_array( $tag, $normalFields ) ) {
00542                 $logInfo[$tag] = $this->nodeContents();
00543             } elseif ( $tag == 'contributor' ) {
00544                 $logInfo['contributor'] = $this->handleContributor();
00545             } elseif ( $tag != '#text' ) {
00546                 $this->warn( "Unhandled log-item XML tag $tag" );
00547             }
00548         }
00549 
00550         $this->processLogItem( $logInfo );
00551     }
00552 
00557     private function processLogItem( $logInfo ) {
00558         $revision = new WikiRevision;
00559 
00560         $revision->setID( $logInfo['id'] );
00561         $revision->setType( $logInfo['type'] );
00562         $revision->setAction( $logInfo['action'] );
00563         $revision->setTimestamp( $logInfo['timestamp'] );
00564         $revision->setParams( $logInfo['params'] );
00565         $revision->setTitle( Title::newFromText( $logInfo['logtitle'] ) );
00566         $revision->setNoUpdates( $this->mNoUpdates );
00567 
00568         if ( isset( $logInfo['comment'] ) ) {
00569             $revision->setComment( $logInfo['comment'] );
00570         }
00571 
00572         if ( isset( $logInfo['contributor']['ip'] ) ) {
00573             $revision->setUserIP( $logInfo['contributor']['ip'] );
00574         }
00575         if ( isset( $logInfo['contributor']['username'] ) ) {
00576             $revision->setUserName( $logInfo['contributor']['username'] );
00577         }
00578 
00579         return $this->logItemCallback( $revision );
00580     }
00581 
00582     private function handlePage() {
00583         // Handle page data.
00584         $this->debug( "Enter page handler." );
00585         $pageInfo = array( 'revisionCount' => 0, 'successfulRevisionCount' => 0 );
00586 
00587         // Fields that can just be stuffed in the pageInfo object
00588         $normalFields = array( 'title', 'id', 'redirect', 'restrictions' );
00589 
00590         $skip = false;
00591         $badTitle = false;
00592 
00593         while ( $skip ? $this->reader->next() : $this->reader->read() ) {
00594             if ( $this->reader->nodeType == XmlReader::END_ELEMENT &&
00595                     $this->reader->name == 'page' ) {
00596                 break;
00597             }
00598 
00599             $tag = $this->reader->name;
00600 
00601             if ( $badTitle ) {
00602                 // The title is invalid, bail out of this page
00603                 $skip = true;
00604             } elseif ( !wfRunHooks( 'ImportHandlePageXMLTag', array( $this,
00605                         &$pageInfo ) ) ) {
00606                 // Do nothing
00607             } elseif ( in_array( $tag, $normalFields ) ) {
00608                 $pageInfo[$tag] = $this->nodeContents();
00609                 if ( $tag == 'title' ) {
00610                     $title = $this->processTitle( $pageInfo['title'] );
00611 
00612                     if ( !$title ) {
00613                         $badTitle = true;
00614                         $skip = true;
00615                     }
00616 
00617                     $this->pageCallback( $title );
00618                     list( $pageInfo['_title'], $origTitle ) = $title;
00619                 }
00620             } elseif ( $tag == 'revision' ) {
00621                 $this->handleRevision( $pageInfo );
00622             } elseif ( $tag == 'upload' ) {
00623                 $this->handleUpload( $pageInfo );
00624             } elseif ( $tag != '#text' ) {
00625                 $this->warn( "Unhandled page XML tag $tag" );
00626                 $skip = true;
00627             }
00628         }
00629 
00630         $this->pageOutCallback( $pageInfo['_title'], $origTitle,
00631                     $pageInfo['revisionCount'],
00632                     $pageInfo['successfulRevisionCount'],
00633                     $pageInfo );
00634     }
00635 
00639     private function handleRevision( &$pageInfo ) {
00640         $this->debug( "Enter revision handler" );
00641         $revisionInfo = array();
00642 
00643         $normalFields = array( 'id', 'timestamp', 'comment', 'minor', 'model', 'format', 'text' );
00644 
00645         $skip = false;
00646 
00647         while ( $skip ? $this->reader->next() : $this->reader->read() ) {
00648             if ( $this->reader->nodeType == XmlReader::END_ELEMENT &&
00649                     $this->reader->name == 'revision' ) {
00650                 break;
00651             }
00652 
00653             $tag = $this->reader->name;
00654 
00655             if ( !wfRunHooks( 'ImportHandleRevisionXMLTag', array(
00656                 $this, $pageInfo, $revisionInfo
00657             ) ) ) {
00658                 // Do nothing
00659             } elseif ( in_array( $tag, $normalFields ) ) {
00660                 $revisionInfo[$tag] = $this->nodeContents();
00661             } elseif ( $tag == 'contributor' ) {
00662                 $revisionInfo['contributor'] = $this->handleContributor();
00663             } elseif ( $tag != '#text' ) {
00664                 $this->warn( "Unhandled revision XML tag $tag" );
00665                 $skip = true;
00666             }
00667         }
00668 
00669         $pageInfo['revisionCount']++;
00670         if ( $this->processRevision( $pageInfo, $revisionInfo ) ) {
00671             $pageInfo['successfulRevisionCount']++;
00672         }
00673     }
00674 
00680     private function processRevision( $pageInfo, $revisionInfo ) {
00681         $revision = new WikiRevision;
00682 
00683         if ( isset( $revisionInfo['id'] ) ) {
00684             $revision->setID( $revisionInfo['id'] );
00685         }
00686         if ( isset( $revisionInfo['text'] ) ) {
00687             $revision->setText( $revisionInfo['text'] );
00688         }
00689         if ( isset( $revisionInfo['model'] ) ) {
00690             $revision->setModel( $revisionInfo['model'] );
00691         }
00692         if ( isset( $revisionInfo['format'] ) ) {
00693             $revision->setFormat( $revisionInfo['format'] );
00694         }
00695         $revision->setTitle( $pageInfo['_title'] );
00696 
00697         if ( isset( $revisionInfo['timestamp'] ) ) {
00698             $revision->setTimestamp( $revisionInfo['timestamp'] );
00699         } else {
00700             $revision->setTimestamp( wfTimestampNow() );
00701         }
00702 
00703         if ( isset( $revisionInfo['comment'] ) ) {
00704             $revision->setComment( $revisionInfo['comment'] );
00705         }
00706 
00707         if ( isset( $revisionInfo['minor'] ) ) {
00708             $revision->setMinor( true );
00709         }
00710         if ( isset( $revisionInfo['contributor']['ip'] ) ) {
00711             $revision->setUserIP( $revisionInfo['contributor']['ip'] );
00712         }
00713         if ( isset( $revisionInfo['contributor']['username'] ) ) {
00714             $revision->setUserName( $revisionInfo['contributor']['username'] );
00715         }
00716         $revision->setNoUpdates( $this->mNoUpdates );
00717 
00718         return $this->revisionCallback( $revision );
00719     }
00720 
00725     private function handleUpload( &$pageInfo ) {
00726         $this->debug( "Enter upload handler" );
00727         $uploadInfo = array();
00728 
00729         $normalFields = array( 'timestamp', 'comment', 'filename', 'text',
00730                     'src', 'size', 'sha1base36', 'archivename', 'rel' );
00731 
00732         $skip = false;
00733 
00734         while ( $skip ? $this->reader->next() : $this->reader->read() ) {
00735             if ( $this->reader->nodeType == XmlReader::END_ELEMENT &&
00736                     $this->reader->name == 'upload' ) {
00737                 break;
00738             }
00739 
00740             $tag = $this->reader->name;
00741 
00742             if ( !wfRunHooks( 'ImportHandleUploadXMLTag', array(
00743                 $this, $pageInfo
00744             ) ) ) {
00745                 // Do nothing
00746             } elseif ( in_array( $tag, $normalFields ) ) {
00747                 $uploadInfo[$tag] = $this->nodeContents();
00748             } elseif ( $tag == 'contributor' ) {
00749                 $uploadInfo['contributor'] = $this->handleContributor();
00750             } elseif ( $tag == 'contents' ) {
00751                 $contents = $this->nodeContents();
00752                 $encoding = $this->reader->getAttribute( 'encoding' );
00753                 if ( $encoding === 'base64' ) {
00754                     $uploadInfo['fileSrc'] = $this->dumpTemp( base64_decode( $contents ) );
00755                     $uploadInfo['isTempSrc'] = true;
00756                 }
00757             } elseif ( $tag != '#text' ) {
00758                 $this->warn( "Unhandled upload XML tag $tag" );
00759                 $skip = true;
00760             }
00761         }
00762 
00763         if ( $this->mImageBasePath && isset( $uploadInfo['rel'] ) ) {
00764             $path = "{$this->mImageBasePath}/{$uploadInfo['rel']}";
00765             if ( file_exists( $path ) ) {
00766                 $uploadInfo['fileSrc'] = $path;
00767                 $uploadInfo['isTempSrc'] = false;
00768             }
00769         }
00770 
00771         if ( $this->mImportUploads ) {
00772             return $this->processUpload( $pageInfo, $uploadInfo );
00773         }
00774     }
00775 
00780     private function dumpTemp( $contents ) {
00781         $filename = tempnam( wfTempDir(), 'importupload' );
00782         file_put_contents( $filename, $contents );
00783         return $filename;
00784     }
00785 
00791     private function processUpload( $pageInfo, $uploadInfo ) {
00792         $revision = new WikiRevision;
00793         $text = isset( $uploadInfo['text'] ) ? $uploadInfo['text'] : '';
00794 
00795         $revision->setTitle( $pageInfo['_title'] );
00796         $revision->setID( $pageInfo['id'] );
00797         $revision->setTimestamp( $uploadInfo['timestamp'] );
00798         $revision->setText( $text );
00799         $revision->setFilename( $uploadInfo['filename'] );
00800         if ( isset( $uploadInfo['archivename'] ) ) {
00801             $revision->setArchiveName( $uploadInfo['archivename'] );
00802         }
00803         $revision->setSrc( $uploadInfo['src'] );
00804         if ( isset( $uploadInfo['fileSrc'] ) ) {
00805             $revision->setFileSrc( $uploadInfo['fileSrc'],
00806                 !empty( $uploadInfo['isTempSrc'] ) );
00807         }
00808         if ( isset( $uploadInfo['sha1base36'] ) ) {
00809             $revision->setSha1Base36( $uploadInfo['sha1base36'] );
00810         }
00811         $revision->setSize( intval( $uploadInfo['size'] ) );
00812         $revision->setComment( $uploadInfo['comment'] );
00813 
00814         if ( isset( $uploadInfo['contributor']['ip'] ) ) {
00815             $revision->setUserIP( $uploadInfo['contributor']['ip'] );
00816         }
00817         if ( isset( $uploadInfo['contributor']['username'] ) ) {
00818             $revision->setUserName( $uploadInfo['contributor']['username'] );
00819         }
00820         $revision->setNoUpdates( $this->mNoUpdates );
00821 
00822         return call_user_func( $this->mUploadCallback, $revision );
00823     }
00824 
00828     private function handleContributor() {
00829         $fields = array( 'id', 'ip', 'username' );
00830         $info = array();
00831 
00832         while ( $this->reader->read() ) {
00833             if ( $this->reader->nodeType == XmlReader::END_ELEMENT &&
00834                     $this->reader->name == 'contributor' ) {
00835                 break;
00836             }
00837 
00838             $tag = $this->reader->name;
00839 
00840             if ( in_array( $tag, $fields ) ) {
00841                 $info[$tag] = $this->nodeContents();
00842             }
00843         }
00844 
00845         return $info;
00846     }
00847 
00852     private function processTitle( $text ) {
00853         global $wgCommandLineMode;
00854 
00855         $workTitle = $text;
00856         $origTitle = Title::newFromText( $workTitle );
00857 
00858         if ( !is_null( $this->mTargetNamespace ) && !is_null( $origTitle ) ) {
00859             # makeTitleSafe, because $origTitle can have a interwiki (different setting of interwiki map)
00860             # and than dbKey can begin with a lowercase char
00861             $title = Title::makeTitleSafe( $this->mTargetNamespace,
00862                 $origTitle->getDBkey() );
00863         } else {
00864             if ( !is_null( $this->mTargetRootPage ) ) {
00865                 $workTitle = $this->mTargetRootPage . '/' . $workTitle;
00866             }
00867             $title = Title::newFromText( $workTitle );
00868         }
00869 
00870         if ( is_null( $title ) ) {
00871             # Invalid page title? Ignore the page
00872             $this->notice( 'import-error-invalid', $workTitle );
00873             return false;
00874         } elseif ( $title->isExternal() ) {
00875             $this->notice( 'import-error-interwiki', $title->getPrefixedText() );
00876             return false;
00877         } elseif ( !$title->canExist() ) {
00878             $this->notice( 'import-error-special', $title->getPrefixedText() );
00879             return false;
00880         } elseif ( !$title->userCan( 'edit' ) && !$wgCommandLineMode ) {
00881             # Do not import if the importing wiki user cannot edit this page
00882             $this->notice( 'import-error-edit', $title->getPrefixedText() );
00883             return false;
00884         } elseif ( !$title->exists() && !$title->userCan( 'create' ) && !$wgCommandLineMode ) {
00885             # Do not import if the importing wiki user cannot create this page
00886             $this->notice( 'import-error-create', $title->getPrefixedText() );
00887             return false;
00888         }
00889 
00890         return array( $title, $origTitle );
00891     }
00892 }
00893 
00895 class UploadSourceAdapter {
00896     static $sourceRegistrations = array();
00897 
00898     private $mSource;
00899     private $mBuffer;
00900     private $mPosition;
00901 
00906     static function registerSource( $source ) {
00907         $id = wfRandomString();
00908 
00909         self::$sourceRegistrations[$id] = $source;
00910 
00911         return $id;
00912     }
00913 
00921     function stream_open( $path, $mode, $options, &$opened_path ) {
00922         $url = parse_url( $path );
00923         $id = $url['host'];
00924 
00925         if ( !isset( self::$sourceRegistrations[$id] ) ) {
00926             return false;
00927         }
00928 
00929         $this->mSource = self::$sourceRegistrations[$id];
00930 
00931         return true;
00932     }
00933 
00938     function stream_read( $count ) {
00939         $return = '';
00940         $leave = false;
00941 
00942         while ( !$leave && !$this->mSource->atEnd() &&
00943                 strlen( $this->mBuffer ) < $count ) {
00944             $read = $this->mSource->readChunk();
00945 
00946             if ( !strlen( $read ) ) {
00947                 $leave = true;
00948             }
00949 
00950             $this->mBuffer .= $read;
00951         }
00952 
00953         if ( strlen( $this->mBuffer ) ) {
00954             $return = substr( $this->mBuffer, 0, $count );
00955             $this->mBuffer = substr( $this->mBuffer, $count );
00956         }
00957 
00958         $this->mPosition += strlen( $return );
00959 
00960         return $return;
00961     }
00962 
00967     function stream_write( $data ) {
00968         return false;
00969     }
00970 
00974     function stream_tell() {
00975         return $this->mPosition;
00976     }
00977 
00981     function stream_eof() {
00982         return $this->mSource->atEnd();
00983     }
00984 
00988     function url_stat() {
00989         $result = array();
00990 
00991         $result['dev'] = $result[0] = 0;
00992         $result['ino'] = $result[1] = 0;
00993         $result['mode'] = $result[2] = 0;
00994         $result['nlink'] = $result[3] = 0;
00995         $result['uid'] = $result[4] = 0;
00996         $result['gid'] = $result[5] = 0;
00997         $result['rdev'] = $result[6] = 0;
00998         $result['size'] = $result[7] = 0;
00999         $result['atime'] = $result[8] = 0;
01000         $result['mtime'] = $result[9] = 0;
01001         $result['ctime'] = $result[10] = 0;
01002         $result['blksize'] = $result[11] = 0;
01003         $result['blocks'] = $result[12] = 0;
01004 
01005         return $result;
01006     }
01007 }
01008 
01009 class XMLReader2 extends XMLReader {
01010 
01014     function nodeContents() {
01015         if ( $this->isEmptyElement ) {
01016             return "";
01017         }
01018         $buffer = "";
01019         while ( $this->read() ) {
01020             switch ( $this->nodeType ) {
01021             case XmlReader::TEXT:
01022             case XmlReader::SIGNIFICANT_WHITESPACE:
01023                 $buffer .= $this->value;
01024                 break;
01025             case XmlReader::END_ELEMENT:
01026                 return $buffer;
01027             }
01028         }
01029         return $this->close();
01030     }
01031 }
01032 
01037 class WikiRevision {
01038     var $importer = null;
01039 
01043     var $title = null;
01044     var $id = 0;
01045     var $timestamp = "20010115000000";
01046     var $user = 0;
01047     var $user_text = "";
01048     var $model = null;
01049     var $format = null;
01050     var $text = "";
01051     var $content = null;
01052     var $comment = "";
01053     var $minor = false;
01054     var $type = "";
01055     var $action = "";
01056     var $params = "";
01057     var $fileSrc = '';
01058     var $sha1base36 = false;
01059     var $isTemp = false;
01060     var $archiveName = '';
01061     var $fileIsTemp;
01062     private $mNoUpdates = false;
01063 
01068     function setTitle( $title ) {
01069         if ( is_object( $title ) ) {
01070             $this->title = $title;
01071         } elseif ( is_null( $title ) ) {
01072             throw new MWException( "WikiRevision given a null title in import. You may need to adjust \$wgLegalTitleChars." );
01073         } else {
01074             throw new MWException( "WikiRevision given non-object title in import." );
01075         }
01076     }
01077 
01081     function setID( $id ) {
01082         $this->id = $id;
01083     }
01084 
01088     function setTimestamp( $ts ) {
01089         # 2003-08-05T18:30:02Z
01090         $this->timestamp = wfTimestamp( TS_MW, $ts );
01091     }
01092 
01096     function setUsername( $user ) {
01097         $this->user_text = $user;
01098     }
01099 
01103     function setUserIP( $ip ) {
01104         $this->user_text = $ip;
01105     }
01106 
01110     function setModel( $model ) {
01111         $this->model = $model;
01112     }
01113 
01117     function setFormat( $format ) {
01118         $this->format = $format;
01119     }
01120 
01124     function setText( $text ) {
01125         $this->text = $text;
01126     }
01127 
01131     function setComment( $text ) {
01132         $this->comment = $text;
01133     }
01134 
01138     function setMinor( $minor ) {
01139         $this->minor = (bool)$minor;
01140     }
01141 
01145     function setSrc( $src ) {
01146         $this->src = $src;
01147     }
01148 
01153     function setFileSrc( $src, $isTemp ) {
01154         $this->fileSrc = $src;
01155         $this->fileIsTemp = $isTemp;
01156     }
01157 
01161     function setSha1Base36( $sha1base36 ) {
01162         $this->sha1base36 = $sha1base36;
01163     }
01164 
01168     function setFilename( $filename ) {
01169         $this->filename = $filename;
01170     }
01171 
01175     function setArchiveName( $archiveName ) {
01176         $this->archiveName = $archiveName;
01177     }
01178 
01182     function setSize( $size ) {
01183         $this->size = intval( $size );
01184     }
01185 
01189     function setType( $type ) {
01190         $this->type = $type;
01191     }
01192 
01196     function setAction( $action ) {
01197         $this->action = $action;
01198     }
01199 
01203     function setParams( $params ) {
01204         $this->params = $params;
01205     }
01206 
01210     public function setNoUpdates( $noupdates ) {
01211         $this->mNoUpdates = $noupdates;
01212     }
01213 
01217     function getTitle() {
01218         return $this->title;
01219     }
01220 
01224     function getID() {
01225         return $this->id;
01226     }
01227 
01231     function getTimestamp() {
01232         return $this->timestamp;
01233     }
01234 
01238     function getUser() {
01239         return $this->user_text;
01240     }
01241 
01247     function getText() {
01248         ContentHandler::deprecated( __METHOD__, '1.21' );
01249 
01250         return $this->text;
01251     }
01252 
01256     function getContent() {
01257         if ( is_null( $this->content ) ) {
01258             $this->content =
01259                 ContentHandler::makeContent(
01260                     $this->text,
01261                     $this->getTitle(),
01262                     $this->getModel(),
01263                     $this->getFormat()
01264                 );
01265         }
01266 
01267         return $this->content;
01268     }
01269 
01273     function getModel() {
01274         if ( is_null( $this->model ) ) {
01275             $this->model = $this->getTitle()->getContentModel();
01276         }
01277 
01278         return $this->model;
01279     }
01280 
01284     function getFormat() {
01285         if ( is_null( $this->model ) ) {
01286             $this->format = ContentHandler::getForTitle( $this->getTitle() )->getDefaultFormat();
01287         }
01288 
01289         return $this->format;
01290     }
01291 
01295     function getComment() {
01296         return $this->comment;
01297     }
01298 
01302     function getMinor() {
01303         return $this->minor;
01304     }
01305 
01309     function getSrc() {
01310         return $this->src;
01311     }
01312 
01316     function getSha1() {
01317         if ( $this->sha1base36 ) {
01318             return wfBaseConvert( $this->sha1base36, 36, 16 );
01319         }
01320         return false;
01321     }
01322 
01326     function getFileSrc() {
01327         return $this->fileSrc;
01328     }
01329 
01333     function isTempSrc() {
01334         return $this->isTemp;
01335     }
01336 
01340     function getFilename() {
01341         return $this->filename;
01342     }
01343 
01347     function getArchiveName() {
01348         return $this->archiveName;
01349     }
01350 
01354     function getSize() {
01355         return $this->size;
01356     }
01357 
01361     function getType() {
01362         return $this->type;
01363     }
01364 
01368     function getAction() {
01369         return $this->action;
01370     }
01371 
01375     function getParams() {
01376         return $this->params;
01377     }
01378 
01382     function importOldRevision() {
01383         $dbw = wfGetDB( DB_MASTER );
01384 
01385         # Sneak a single revision into place
01386         $user = User::newFromName( $this->getUser() );
01387         if ( $user ) {
01388             $userId = intval( $user->getId() );
01389             $userText = $user->getName();
01390             $userObj = $user;
01391         } else {
01392             $userId = 0;
01393             $userText = $this->getUser();
01394             $userObj = new User;
01395         }
01396 
01397         // avoid memory leak...?
01398         $linkCache = LinkCache::singleton();
01399         $linkCache->clear();
01400 
01401         $page = WikiPage::factory( $this->title );
01402         if ( !$page->exists() ) {
01403             # must create the page...
01404             $pageId = $page->insertOn( $dbw );
01405             $created = true;
01406             $oldcountable = null;
01407         } else {
01408             $pageId = $page->getId();
01409             $created = false;
01410 
01411             $prior = $dbw->selectField( 'revision', '1',
01412                 array( 'rev_page' => $pageId,
01413                     'rev_timestamp' => $dbw->timestamp( $this->timestamp ),
01414                     'rev_user_text' => $userText,
01415                     'rev_comment' => $this->getComment() ),
01416                 __METHOD__
01417             );
01418             if ( $prior ) {
01419                 // @todo FIXME: This could fail slightly for multiple matches :P
01420                 wfDebug( __METHOD__ . ": skipping existing revision for [[" .
01421                     $this->title->getPrefixedText() . "]], timestamp " . $this->timestamp . "\n" );
01422                 return false;
01423             }
01424             $oldcountable = $page->isCountable();
01425         }
01426 
01427         # @todo FIXME: Use original rev_id optionally (better for backups)
01428         # Insert the row
01429         $revision = new Revision( array(
01430             'title' => $this->title,
01431             'page' => $pageId,
01432             'content_model' => $this->getModel(),
01433             'content_format' => $this->getFormat(),
01434             'text' => $this->getContent()->serialize( $this->getFormat() ), //XXX: just set 'content' => $this->getContent()?
01435             'comment' => $this->getComment(),
01436             'user' => $userId,
01437             'user_text' => $userText,
01438             'timestamp' => $this->timestamp,
01439             'minor_edit' => $this->minor,
01440             ) );
01441         $revision->insertOn( $dbw );
01442         $changed = $page->updateIfNewerOn( $dbw, $revision );
01443 
01444         if ( $changed !== false && !$this->mNoUpdates ) {
01445             wfDebug( __METHOD__ . ": running updates\n" );
01446             $page->doEditUpdates( $revision, $userObj, array( 'created' => $created, 'oldcountable' => $oldcountable ) );
01447         }
01448 
01449         return true;
01450     }
01451 
01455     function importLogItem() {
01456         $dbw = wfGetDB( DB_MASTER );
01457         # @todo FIXME: This will not record autoblocks
01458         if ( !$this->getTitle() ) {
01459             wfDebug( __METHOD__ . ": skipping invalid {$this->type}/{$this->action} log time, timestamp " .
01460                 $this->timestamp . "\n" );
01461             return;
01462         }
01463         # Check if it exists already
01464         // @todo FIXME: Use original log ID (better for backups)
01465         $prior = $dbw->selectField( 'logging', '1',
01466             array( 'log_type' => $this->getType(),
01467                 'log_action' => $this->getAction(),
01468                 'log_timestamp' => $dbw->timestamp( $this->timestamp ),
01469                 'log_namespace' => $this->getTitle()->getNamespace(),
01470                 'log_title' => $this->getTitle()->getDBkey(),
01471                 'log_comment' => $this->getComment(),
01472                 #'log_user_text' => $this->user_text,
01473                 'log_params' => $this->params ),
01474             __METHOD__
01475         );
01476         // @todo FIXME: This could fail slightly for multiple matches :P
01477         if ( $prior ) {
01478             wfDebug( __METHOD__ . ": skipping existing item for Log:{$this->type}/{$this->action}, timestamp " .
01479                 $this->timestamp . "\n" );
01480             return;
01481         }
01482         $log_id = $dbw->nextSequenceValue( 'logging_log_id_seq' );
01483         $data = array(
01484             'log_id' => $log_id,
01485             'log_type' => $this->type,
01486             'log_action' => $this->action,
01487             'log_timestamp' => $dbw->timestamp( $this->timestamp ),
01488             'log_user' => User::idFromName( $this->user_text ),
01489             #'log_user_text' => $this->user_text,
01490             'log_namespace' => $this->getTitle()->getNamespace(),
01491             'log_title' => $this->getTitle()->getDBkey(),
01492             'log_comment' => $this->getComment(),
01493             'log_params' => $this->params
01494         );
01495         $dbw->insert( 'logging', $data, __METHOD__ );
01496     }
01497 
01501     function importUpload() {
01502         # Construct a file
01503         $archiveName = $this->getArchiveName();
01504         if ( $archiveName ) {
01505             wfDebug( __METHOD__ . "Importing archived file as $archiveName\n" );
01506             $file = OldLocalFile::newFromArchiveName( $this->getTitle(),
01507                 RepoGroup::singleton()->getLocalRepo(), $archiveName );
01508         } else {
01509             $file = wfLocalFile( $this->getTitle() );
01510             wfDebug( __METHOD__ . 'Importing new file as ' . $file->getName() . "\n" );
01511             if ( $file->exists() && $file->getTimestamp() > $this->getTimestamp() ) {
01512                 $archiveName = $file->getTimestamp() . '!' . $file->getName();
01513                 $file = OldLocalFile::newFromArchiveName( $this->getTitle(),
01514                     RepoGroup::singleton()->getLocalRepo(), $archiveName );
01515                 wfDebug( __METHOD__ . "File already exists; importing as $archiveName\n" );
01516             }
01517         }
01518         if ( !$file ) {
01519             wfDebug( __METHOD__ . ': Bad file for ' . $this->getTitle() . "\n" );
01520             return false;
01521         }
01522 
01523         # Get the file source or download if necessary
01524         $source = $this->getFileSrc();
01525         $flags = $this->isTempSrc() ? File::DELETE_SOURCE : 0;
01526         if ( !$source ) {
01527             $source = $this->downloadSource();
01528             $flags |= File::DELETE_SOURCE;
01529         }
01530         if ( !$source ) {
01531             wfDebug( __METHOD__ . ": Could not fetch remote file.\n" );
01532             return false;
01533         }
01534         $sha1 = $this->getSha1();
01535         if ( $sha1 && ( $sha1 !== sha1_file( $source ) ) ) {
01536             if ( $flags & File::DELETE_SOURCE ) {
01537                 # Broken file; delete it if it is a temporary file
01538                 unlink( $source );
01539             }
01540             wfDebug( __METHOD__ . ": Corrupt file $source.\n" );
01541             return false;
01542         }
01543 
01544         $user = User::newFromName( $this->user_text );
01545 
01546         # Do the actual upload
01547         if ( $archiveName ) {
01548             $status = $file->uploadOld( $source, $archiveName,
01549                 $this->getTimestamp(), $this->getComment(), $user, $flags );
01550         } else {
01551             $status = $file->upload( $source, $this->getComment(), $this->getComment(),
01552                 $flags, false, $this->getTimestamp(), $user );
01553         }
01554 
01555         if ( $status->isGood() ) {
01556             wfDebug( __METHOD__ . ": Successful\n" );
01557             return true;
01558         } else {
01559             wfDebug( __METHOD__ . ': failed: ' . $status->getXml() . "\n" );
01560             return false;
01561         }
01562     }
01563 
01567     function downloadSource() {
01568         global $wgEnableUploads;
01569         if ( !$wgEnableUploads ) {
01570             return false;
01571         }
01572 
01573         $tempo = tempnam( wfTempDir(), 'download' );
01574         $f = fopen( $tempo, 'wb' );
01575         if ( !$f ) {
01576             wfDebug( "IMPORT: couldn't write to temp file $tempo\n" );
01577             return false;
01578         }
01579 
01580         // @todo FIXME!
01581         $src = $this->getSrc();
01582         $data = Http::get( $src );
01583         if ( !$data ) {
01584             wfDebug( "IMPORT: couldn't fetch source $src\n" );
01585             fclose( $f );
01586             unlink( $tempo );
01587             return false;
01588         }
01589 
01590         fwrite( $f, $data );
01591         fclose( $f );
01592 
01593         return $tempo;
01594     }
01595 
01596 }
01597 
01602 class ImportStringSource {
01603     function __construct( $string ) {
01604         $this->mString = $string;
01605         $this->mRead = false;
01606     }
01607 
01611     function atEnd() {
01612         return $this->mRead;
01613     }
01614 
01618     function readChunk() {
01619         if ( $this->atEnd() ) {
01620             return false;
01621         }
01622         $this->mRead = true;
01623         return $this->mString;
01624     }
01625 }
01626 
01631 class ImportStreamSource {
01632     function __construct( $handle ) {
01633         $this->mHandle = $handle;
01634     }
01635 
01639     function atEnd() {
01640         return feof( $this->mHandle );
01641     }
01642 
01646     function readChunk() {
01647         return fread( $this->mHandle, 32768 );
01648     }
01649 
01654     static function newFromFile( $filename ) {
01655         wfSuppressWarnings();
01656         $file = fopen( $filename, 'rt' );
01657         wfRestoreWarnings();
01658         if ( !$file ) {
01659             return Status::newFatal( "importcantopen" );
01660         }
01661         return Status::newGood( new ImportStreamSource( $file ) );
01662     }
01663 
01668     static function newFromUpload( $fieldname = "xmlimport" ) {
01669         $upload =& $_FILES[$fieldname];
01670 
01671         if ( $upload === null || !$upload['name'] ) {
01672             return Status::newFatal( 'importnofile' );
01673         }
01674         if ( !empty( $upload['error'] ) ) {
01675             switch ( $upload['error'] ) {
01676                 case 1: # The uploaded file exceeds the upload_max_filesize directive in php.ini.
01677                     return Status::newFatal( 'importuploaderrorsize' );
01678                 case 2: # The uploaded file exceeds the MAX_FILE_SIZE directive that was specified in the HTML form.
01679                     return Status::newFatal( 'importuploaderrorsize' );
01680                 case 3: # The uploaded file was only partially uploaded
01681                     return Status::newFatal( 'importuploaderrorpartial' );
01682                 case 6: #Missing a temporary folder.
01683                     return Status::newFatal( 'importuploaderrortemp' );
01684                 # case else: # Currently impossible
01685             }
01686 
01687         }
01688         $fname = $upload['tmp_name'];
01689         if ( is_uploaded_file( $fname ) ) {
01690             return ImportStreamSource::newFromFile( $fname );
01691         } else {
01692             return Status::newFatal( 'importnofile' );
01693         }
01694     }
01695 
01701     static function newFromURL( $url, $method = 'GET' ) {
01702         wfDebug( __METHOD__ . ": opening $url\n" );
01703         # Use the standard HTTP fetch function; it times out
01704         # quicker and sorts out user-agent problems which might
01705         # otherwise prevent importing from large sites, such
01706         # as the Wikimedia cluster, etc.
01707         $data = Http::request( $method, $url, array( 'followRedirects' => true ) );
01708         if ( $data !== false ) {
01709             $file = tmpfile();
01710             fwrite( $file, $data );
01711             fflush( $file );
01712             fseek( $file, 0 );
01713             return Status::newGood( new ImportStreamSource( $file ) );
01714         } else {
01715             return Status::newFatal( 'importcantopen' );
01716         }
01717     }
01718 
01727     public static function newFromInterwiki( $interwiki, $page, $history = false, $templates = false, $pageLinkDepth = 0 ) {
01728         if ( $page == '' ) {
01729             return Status::newFatal( 'import-noarticle' );
01730         }
01731         $link = Title::newFromText( "$interwiki:Special:Export/$page" );
01732         if ( is_null( $link ) || !$link->isExternal() ) {
01733             return Status::newFatal( 'importbadinterwiki' );
01734         } else {
01735             $params = array();
01736             if ( $history ) {
01737                 $params['history'] = 1;
01738             }
01739             if ( $templates ) {
01740                 $params['templates'] = 1;
01741             }
01742             if ( $pageLinkDepth ) {
01743                 $params['pagelink-depth'] = $pageLinkDepth;
01744             }
01745             $url = $link->getFullURL( $params );
01746             # For interwikis, use POST to avoid redirects.
01747             return ImportStreamSource::newFromURL( $url, "POST" );
01748         }
01749     }
01750 }