MediaWiki  REL1_22
Import.php
Go to the documentation of this file.
00001 <?php
00033 class WikiImporter {
00034     private $reader = null;
00035     private $mLogItemCallback, $mUploadCallback, $mRevisionCallback, $mPageCallback;
00036     private $mSiteInfoCallback, $mTargetNamespace, $mTargetRootPage, $mPageOutCallback;
00037     private $mNoticeCallback, $mDebug;
00038     private $mImportUploads, $mImageBasePath;
00039     private $mNoUpdates = false;
00040 
00045     function __construct( $source ) {
00046         $this->reader = new XMLReader();
00047 
00048         stream_wrapper_register( 'uploadsource', 'UploadSourceAdapter' );
00049         $id = UploadSourceAdapter::registerSource( $source );
00050         if ( defined( 'LIBXML_PARSEHUGE' ) ) {
00051             $this->reader->open( "uploadsource://$id", null, LIBXML_PARSEHUGE );
00052         } else {
00053             $this->reader->open( "uploadsource://$id" );
00054         }
00055 
00056         // Default callbacks
00057         $this->setRevisionCallback( array( $this, "importRevision" ) );
00058         $this->setUploadCallback( array( $this, 'importUpload' ) );
00059         $this->setLogItemCallback( array( $this, 'importLogItem' ) );
00060         $this->setPageOutCallback( array( $this, 'finishImportPage' ) );
00061     }
00062 
00063     private function throwXmlError( $err ) {
00064         $this->debug( "FAILURE: $err" );
00065         wfDebug( "WikiImporter XML error: $err\n" );
00066     }
00067 
00068     private function debug( $data ) {
00069         if ( $this->mDebug ) {
00070             wfDebug( "IMPORT: $data\n" );
00071         }
00072     }
00073 
00074     private function warn( $data ) {
00075         wfDebug( "IMPORT: $data\n" );
00076     }
00077 
00078     private function notice( $msg /*, $param, ...*/ ) {
00079         $params = func_get_args();
00080         array_shift( $params );
00081 
00082         if ( is_callable( $this->mNoticeCallback ) ) {
00083             call_user_func( $this->mNoticeCallback, $msg, $params );
00084         } else { # No ImportReporter -> CLI
00085             echo wfMessage( $msg, $params )->text() . "\n";
00086         }
00087     }
00088 
00093     function setDebug( $debug ) {
00094         $this->mDebug = $debug;
00095     }
00096 
00101     function setNoUpdates( $noupdates ) {
00102         $this->mNoUpdates = $noupdates;
00103     }
00104 
00111     public function setNoticeCallback( $callback ) {
00112         return wfSetVar( $this->mNoticeCallback, $callback );
00113     }
00114 
00120     public function setPageCallback( $callback ) {
00121         $previous = $this->mPageCallback;
00122         $this->mPageCallback = $callback;
00123         return $previous;
00124     }
00125 
00135     public function setPageOutCallback( $callback ) {
00136         $previous = $this->mPageOutCallback;
00137         $this->mPageOutCallback = $callback;
00138         return $previous;
00139     }
00140 
00146     public function setRevisionCallback( $callback ) {
00147         $previous = $this->mRevisionCallback;
00148         $this->mRevisionCallback = $callback;
00149         return $previous;
00150     }
00151 
00157     public function setUploadCallback( $callback ) {
00158         $previous = $this->mUploadCallback;
00159         $this->mUploadCallback = $callback;
00160         return $previous;
00161     }
00162 
00168     public function setLogItemCallback( $callback ) {
00169         $previous = $this->mLogItemCallback;
00170         $this->mLogItemCallback = $callback;
00171         return $previous;
00172     }
00173 
00179     public function setSiteInfoCallback( $callback ) {
00180         $previous = $this->mSiteInfoCallback;
00181         $this->mSiteInfoCallback = $callback;
00182         return $previous;
00183     }
00184 
00190     public function setTargetNamespace( $namespace ) {
00191         if ( is_null( $namespace ) ) {
00192             // Don't override namespaces
00193             $this->mTargetNamespace = null;
00194         } elseif ( $namespace >= 0 ) {
00195             // @todo FIXME: Check for validity
00196             $this->mTargetNamespace = intval( $namespace );
00197         } else {
00198             return false;
00199         }
00200     }
00201 
00207     public function setTargetRootPage( $rootpage ) {
00208         $status = Status::newGood();
00209         if ( is_null( $rootpage ) ) {
00210             // No rootpage
00211             $this->mTargetRootPage = null;
00212         } elseif ( $rootpage !== '' ) {
00213             $rootpage = rtrim( $rootpage, '/' ); //avoid double slashes
00214             $title = Title::newFromText( $rootpage, !is_null( $this->mTargetNamespace ) ? $this->mTargetNamespace : NS_MAIN );
00215             if ( !$title || $title->isExternal() ) {
00216                 $status->fatal( 'import-rootpage-invalid' );
00217             } else {
00218                 if ( !MWNamespace::hasSubpages( $title->getNamespace() ) ) {
00219                     global $wgContLang;
00220 
00221                     $displayNSText = $title->getNamespace() == NS_MAIN
00222                         ? wfMessage( 'blanknamespace' )->text()
00223                         : $wgContLang->getNsText( $title->getNamespace() );
00224                     $status->fatal( 'import-rootpage-nosubpage', $displayNSText );
00225                 } else {
00226                     // set namespace to 'all', so the namespace check in processTitle() can passed
00227                     $this->setTargetNamespace( null );
00228                     $this->mTargetRootPage = $title->getPrefixedDBkey();
00229                 }
00230             }
00231         }
00232         return $status;
00233     }
00234 
00238     public function setImageBasePath( $dir ) {
00239         $this->mImageBasePath = $dir;
00240     }
00241 
00245     public function setImportUploads( $import ) {
00246         $this->mImportUploads = $import;
00247     }
00248 
00254     public function importRevision( $revision ) {
00255         try {
00256             $dbw = wfGetDB( DB_MASTER );
00257             return $dbw->deadlockLoop( array( $revision, 'importOldRevision' ) );
00258         } catch ( MWContentSerializationException $ex ) {
00259             $this->notice( 'import-error-unserialize',
00260                 $revision->getTitle()->getPrefixedText(),
00261                 $revision->getID(),
00262                 $revision->getModel(),
00263                 $revision->getFormat() );
00264         }
00265     }
00266 
00272     public function importLogItem( $rev ) {
00273         $dbw = wfGetDB( DB_MASTER );
00274         return $dbw->deadlockLoop( array( $rev, 'importLogItem' ) );
00275     }
00276 
00282     public function importUpload( $revision ) {
00283         $dbw = wfGetDB( DB_MASTER );
00284         return $dbw->deadlockLoop( array( $revision, 'importUpload' ) );
00285     }
00286 
00296     public function finishImportPage( $title, $origTitle, $revCount, $sRevCount, $pageInfo ) {
00297         $args = func_get_args();
00298         return wfRunHooks( 'AfterImportPage', $args );
00299     }
00300 
00305     public function debugRevisionHandler( &$revision ) {
00306         $this->debug( "Got revision:" );
00307         if ( is_object( $revision->title ) ) {
00308             $this->debug( "-- Title: " . $revision->title->getPrefixedText() );
00309         } else {
00310             $this->debug( "-- Title: <invalid>" );
00311         }
00312         $this->debug( "-- User: " . $revision->user_text );
00313         $this->debug( "-- Timestamp: " . $revision->timestamp );
00314         $this->debug( "-- Comment: " . $revision->comment );
00315         $this->debug( "-- Text: " . $revision->text );
00316     }
00317 
00322     function pageCallback( $title ) {
00323         if ( isset( $this->mPageCallback ) ) {
00324             call_user_func( $this->mPageCallback, $title );
00325         }
00326     }
00327 
00336     private function pageOutCallback( $title, $origTitle, $revCount, $sucCount, $pageInfo ) {
00337         if ( isset( $this->mPageOutCallback ) ) {
00338             $args = func_get_args();
00339             call_user_func_array( $this->mPageOutCallback, $args );
00340         }
00341     }
00342 
00348     private function revisionCallback( $revision ) {
00349         if ( isset( $this->mRevisionCallback ) ) {
00350             return call_user_func_array( $this->mRevisionCallback,
00351                     array( $revision, $this ) );
00352         } else {
00353             return false;
00354         }
00355     }
00356 
00362     private function logItemCallback( $revision ) {
00363         if ( isset( $this->mLogItemCallback ) ) {
00364             return call_user_func_array( $this->mLogItemCallback,
00365                     array( $revision, $this ) );
00366         } else {
00367             return false;
00368         }
00369     }
00370 
00378     private function nodeContents() {
00379         if ( $this->reader->isEmptyElement ) {
00380             return "";
00381         }
00382         $buffer = "";
00383         while ( $this->reader->read() ) {
00384             switch ( $this->reader->nodeType ) {
00385             case XmlReader::TEXT:
00386             case XmlReader::SIGNIFICANT_WHITESPACE:
00387                 $buffer .= $this->reader->value;
00388                 break;
00389             case XmlReader::END_ELEMENT:
00390                 return $buffer;
00391             }
00392         }
00393 
00394         $this->reader->close();
00395         return '';
00396     }
00397 
00398     # --------------
00399 
00401     private function dumpElement() {
00402         static $lookup = null;
00403         if ( !$lookup ) {
00404             $xmlReaderConstants = array(
00405                 "NONE",
00406                 "ELEMENT",
00407                 "ATTRIBUTE",
00408                 "TEXT",
00409                 "CDATA",
00410                 "ENTITY_REF",
00411                 "ENTITY",
00412                 "PI",
00413                 "COMMENT",
00414                 "DOC",
00415                 "DOC_TYPE",
00416                 "DOC_FRAGMENT",
00417                 "NOTATION",
00418                 "WHITESPACE",
00419                 "SIGNIFICANT_WHITESPACE",
00420                 "END_ELEMENT",
00421                 "END_ENTITY",
00422                 "XML_DECLARATION",
00423             );
00424             $lookup = array();
00425 
00426             foreach ( $xmlReaderConstants as $name ) {
00427                 $lookup[constant( "XmlReader::$name" )] = $name;
00428             }
00429         }
00430 
00431         print var_dump(
00432             $lookup[$this->reader->nodeType],
00433             $this->reader->name,
00434             $this->reader->value
00435         ) . "\n\n";
00436     }
00437 
00443     public function doImport() {
00444 
00445         // Calls to reader->read need to be wrapped in calls to
00446         // libxml_disable_entity_loader() to avoid local file
00447         // inclusion attacks (bug 46932).
00448         $oldDisable = libxml_disable_entity_loader( true );
00449         $this->reader->read();
00450 
00451         if ( $this->reader->name != 'mediawiki' ) {
00452             libxml_disable_entity_loader( $oldDisable );
00453             throw new MWException( "Expected <mediawiki> tag, got " .
00454                 $this->reader->name );
00455         }
00456         $this->debug( "<mediawiki> tag is correct." );
00457 
00458         $this->debug( "Starting primary dump processing loop." );
00459 
00460         $keepReading = $this->reader->read();
00461         $skip = false;
00462         while ( $keepReading ) {
00463             $tag = $this->reader->name;
00464             $type = $this->reader->nodeType;
00465 
00466             if ( !wfRunHooks( 'ImportHandleToplevelXMLTag', array( $this ) ) ) {
00467                 // Do nothing
00468             } elseif ( $tag == 'mediawiki' && $type == XmlReader::END_ELEMENT ) {
00469                 break;
00470             } elseif ( $tag == 'siteinfo' ) {
00471                 $this->handleSiteInfo();
00472             } elseif ( $tag == 'page' ) {
00473                 $this->handlePage();
00474             } elseif ( $tag == 'logitem' ) {
00475                 $this->handleLogItem();
00476             } elseif ( $tag != '#text' ) {
00477                 $this->warn( "Unhandled top-level XML tag $tag" );
00478 
00479                 $skip = true;
00480             }
00481 
00482             if ( $skip ) {
00483                 $keepReading = $this->reader->next();
00484                 $skip = false;
00485                 $this->debug( "Skip" );
00486             } else {
00487                 $keepReading = $this->reader->read();
00488             }
00489         }
00490 
00491         libxml_disable_entity_loader( $oldDisable );
00492         return true;
00493     }
00494 
00499     private function handleSiteInfo() {
00500         // Site info is useful, but not actually used for dump imports.
00501         // Includes a quick short-circuit to save performance.
00502         if ( ! $this->mSiteInfoCallback ) {
00503             $this->reader->next();
00504             return true;
00505         }
00506         throw new MWException( "SiteInfo tag is not yet handled, do not set mSiteInfoCallback" );
00507     }
00508 
00509     private function handleLogItem() {
00510         $this->debug( "Enter log item handler." );
00511         $logInfo = array();
00512 
00513         // Fields that can just be stuffed in the pageInfo object
00514         $normalFields = array( 'id', 'comment', 'type', 'action', 'timestamp',
00515                     'logtitle', 'params' );
00516 
00517         while ( $this->reader->read() ) {
00518             if ( $this->reader->nodeType == XmlReader::END_ELEMENT &&
00519                     $this->reader->name == 'logitem' ) {
00520                 break;
00521             }
00522 
00523             $tag = $this->reader->name;
00524 
00525             if ( !wfRunHooks( 'ImportHandleLogItemXMLTag', array(
00526                 $this, $logInfo
00527             ) ) ) {
00528                 // Do nothing
00529             } elseif ( in_array( $tag, $normalFields ) ) {
00530                 $logInfo[$tag] = $this->nodeContents();
00531             } elseif ( $tag == 'contributor' ) {
00532                 $logInfo['contributor'] = $this->handleContributor();
00533             } elseif ( $tag != '#text' ) {
00534                 $this->warn( "Unhandled log-item XML tag $tag" );
00535             }
00536         }
00537 
00538         $this->processLogItem( $logInfo );
00539     }
00540 
00545     private function processLogItem( $logInfo ) {
00546         $revision = new WikiRevision;
00547 
00548         $revision->setID( $logInfo['id'] );
00549         $revision->setType( $logInfo['type'] );
00550         $revision->setAction( $logInfo['action'] );
00551         $revision->setTimestamp( $logInfo['timestamp'] );
00552         $revision->setParams( $logInfo['params'] );
00553         $revision->setTitle( Title::newFromText( $logInfo['logtitle'] ) );
00554         $revision->setNoUpdates( $this->mNoUpdates );
00555 
00556         if ( isset( $logInfo['comment'] ) ) {
00557             $revision->setComment( $logInfo['comment'] );
00558         }
00559 
00560         if ( isset( $logInfo['contributor']['ip'] ) ) {
00561             $revision->setUserIP( $logInfo['contributor']['ip'] );
00562         }
00563         if ( isset( $logInfo['contributor']['username'] ) ) {
00564             $revision->setUserName( $logInfo['contributor']['username'] );
00565         }
00566 
00567         return $this->logItemCallback( $revision );
00568     }
00569 
00570     private function handlePage() {
00571         // Handle page data.
00572         $this->debug( "Enter page handler." );
00573         $pageInfo = array( 'revisionCount' => 0, 'successfulRevisionCount' => 0 );
00574 
00575         // Fields that can just be stuffed in the pageInfo object
00576         $normalFields = array( 'title', 'id', 'redirect', 'restrictions' );
00577 
00578         $skip = false;
00579         $badTitle = false;
00580 
00581         while ( $skip ? $this->reader->next() : $this->reader->read() ) {
00582             if ( $this->reader->nodeType == XmlReader::END_ELEMENT &&
00583                     $this->reader->name == 'page' ) {
00584                 break;
00585             }
00586 
00587             $tag = $this->reader->name;
00588 
00589             if ( $badTitle ) {
00590                 // The title is invalid, bail out of this page
00591                 $skip = true;
00592             } elseif ( !wfRunHooks( 'ImportHandlePageXMLTag', array( $this,
00593                         &$pageInfo ) ) ) {
00594                 // Do nothing
00595             } elseif ( in_array( $tag, $normalFields ) ) {
00596                 $pageInfo[$tag] = $this->nodeContents();
00597                 if ( $tag == 'title' ) {
00598                     $title = $this->processTitle( $pageInfo['title'] );
00599 
00600                     if ( !$title ) {
00601                         $badTitle = true;
00602                         $skip = true;
00603                     }
00604 
00605                     $this->pageCallback( $title );
00606                     list( $pageInfo['_title'], $origTitle ) = $title;
00607                 }
00608             } elseif ( $tag == 'revision' ) {
00609                 $this->handleRevision( $pageInfo );
00610             } elseif ( $tag == 'upload' ) {
00611                 $this->handleUpload( $pageInfo );
00612             } elseif ( $tag != '#text' ) {
00613                 $this->warn( "Unhandled page XML tag $tag" );
00614                 $skip = true;
00615             }
00616         }
00617 
00618         $this->pageOutCallback( $pageInfo['_title'], $origTitle,
00619                     $pageInfo['revisionCount'],
00620                     $pageInfo['successfulRevisionCount'],
00621                     $pageInfo );
00622     }
00623 
00627     private function handleRevision( &$pageInfo ) {
00628         $this->debug( "Enter revision handler" );
00629         $revisionInfo = array();
00630 
00631         $normalFields = array( 'id', 'timestamp', 'comment', 'minor', 'model', 'format', 'text' );
00632 
00633         $skip = false;
00634 
00635         while ( $skip ? $this->reader->next() : $this->reader->read() ) {
00636             if ( $this->reader->nodeType == XmlReader::END_ELEMENT &&
00637                     $this->reader->name == 'revision' ) {
00638                 break;
00639             }
00640 
00641             $tag = $this->reader->name;
00642 
00643             if ( !wfRunHooks( 'ImportHandleRevisionXMLTag', array(
00644                 $this, $pageInfo, $revisionInfo
00645             ) ) ) {
00646                 // Do nothing
00647             } elseif ( in_array( $tag, $normalFields ) ) {
00648                 $revisionInfo[$tag] = $this->nodeContents();
00649             } elseif ( $tag == 'contributor' ) {
00650                 $revisionInfo['contributor'] = $this->handleContributor();
00651             } elseif ( $tag != '#text' ) {
00652                 $this->warn( "Unhandled revision XML tag $tag" );
00653                 $skip = true;
00654             }
00655         }
00656 
00657         $pageInfo['revisionCount']++;
00658         if ( $this->processRevision( $pageInfo, $revisionInfo ) ) {
00659             $pageInfo['successfulRevisionCount']++;
00660         }
00661     }
00662 
00668     private function processRevision( $pageInfo, $revisionInfo ) {
00669         $revision = new WikiRevision;
00670 
00671         if ( isset( $revisionInfo['id'] ) ) {
00672             $revision->setID( $revisionInfo['id'] );
00673         }
00674         if ( isset( $revisionInfo['text'] ) ) {
00675             $revision->setText( $revisionInfo['text'] );
00676         }
00677         if ( isset( $revisionInfo['model'] ) ) {
00678             $revision->setModel( $revisionInfo['model'] );
00679         }
00680         if ( isset( $revisionInfo['format'] ) ) {
00681             $revision->setFormat( $revisionInfo['format'] );
00682         }
00683         $revision->setTitle( $pageInfo['_title'] );
00684 
00685         if ( isset( $revisionInfo['timestamp'] ) ) {
00686             $revision->setTimestamp( $revisionInfo['timestamp'] );
00687         } else {
00688             $revision->setTimestamp( wfTimestampNow() );
00689         }
00690 
00691         if ( isset( $revisionInfo['comment'] ) ) {
00692             $revision->setComment( $revisionInfo['comment'] );
00693         }
00694 
00695         if ( isset( $revisionInfo['minor'] ) ) {
00696             $revision->setMinor( true );
00697         }
00698         if ( isset( $revisionInfo['contributor']['ip'] ) ) {
00699             $revision->setUserIP( $revisionInfo['contributor']['ip'] );
00700         }
00701         if ( isset( $revisionInfo['contributor']['username'] ) ) {
00702             $revision->setUserName( $revisionInfo['contributor']['username'] );
00703         }
00704         $revision->setNoUpdates( $this->mNoUpdates );
00705 
00706         return $this->revisionCallback( $revision );
00707     }
00708 
00713     private function handleUpload( &$pageInfo ) {
00714         $this->debug( "Enter upload handler" );
00715         $uploadInfo = array();
00716 
00717         $normalFields = array( 'timestamp', 'comment', 'filename', 'text',
00718                     'src', 'size', 'sha1base36', 'archivename', 'rel' );
00719 
00720         $skip = false;
00721 
00722         while ( $skip ? $this->reader->next() : $this->reader->read() ) {
00723             if ( $this->reader->nodeType == XmlReader::END_ELEMENT &&
00724                     $this->reader->name == 'upload' ) {
00725                 break;
00726             }
00727 
00728             $tag = $this->reader->name;
00729 
00730             if ( !wfRunHooks( 'ImportHandleUploadXMLTag', array(
00731                 $this, $pageInfo
00732             ) ) ) {
00733                 // Do nothing
00734             } elseif ( in_array( $tag, $normalFields ) ) {
00735                 $uploadInfo[$tag] = $this->nodeContents();
00736             } elseif ( $tag == 'contributor' ) {
00737                 $uploadInfo['contributor'] = $this->handleContributor();
00738             } elseif ( $tag == 'contents' ) {
00739                 $contents = $this->nodeContents();
00740                 $encoding = $this->reader->getAttribute( 'encoding' );
00741                 if ( $encoding === 'base64' ) {
00742                     $uploadInfo['fileSrc'] = $this->dumpTemp( base64_decode( $contents ) );
00743                     $uploadInfo['isTempSrc'] = true;
00744                 }
00745             } elseif ( $tag != '#text' ) {
00746                 $this->warn( "Unhandled upload XML tag $tag" );
00747                 $skip = true;
00748             }
00749         }
00750 
00751         if ( $this->mImageBasePath && isset( $uploadInfo['rel'] ) ) {
00752             $path = "{$this->mImageBasePath}/{$uploadInfo['rel']}";
00753             if ( file_exists( $path ) ) {
00754                 $uploadInfo['fileSrc'] = $path;
00755                 $uploadInfo['isTempSrc'] = false;
00756             }
00757         }
00758 
00759         if ( $this->mImportUploads ) {
00760             return $this->processUpload( $pageInfo, $uploadInfo );
00761         }
00762     }
00763 
00768     private function dumpTemp( $contents ) {
00769         $filename = tempnam( wfTempDir(), 'importupload' );
00770         file_put_contents( $filename, $contents );
00771         return $filename;
00772     }
00773 
00779     private function processUpload( $pageInfo, $uploadInfo ) {
00780         $revision = new WikiRevision;
00781         $text = isset( $uploadInfo['text'] ) ? $uploadInfo['text'] : '';
00782 
00783         $revision->setTitle( $pageInfo['_title'] );
00784         $revision->setID( $pageInfo['id'] );
00785         $revision->setTimestamp( $uploadInfo['timestamp'] );
00786         $revision->setText( $text );
00787         $revision->setFilename( $uploadInfo['filename'] );
00788         if ( isset( $uploadInfo['archivename'] ) ) {
00789             $revision->setArchiveName( $uploadInfo['archivename'] );
00790         }
00791         $revision->setSrc( $uploadInfo['src'] );
00792         if ( isset( $uploadInfo['fileSrc'] ) ) {
00793             $revision->setFileSrc( $uploadInfo['fileSrc'],
00794                 !empty( $uploadInfo['isTempSrc'] ) );
00795         }
00796         if ( isset( $uploadInfo['sha1base36'] ) ) {
00797             $revision->setSha1Base36( $uploadInfo['sha1base36'] );
00798         }
00799         $revision->setSize( intval( $uploadInfo['size'] ) );
00800         $revision->setComment( $uploadInfo['comment'] );
00801 
00802         if ( isset( $uploadInfo['contributor']['ip'] ) ) {
00803             $revision->setUserIP( $uploadInfo['contributor']['ip'] );
00804         }
00805         if ( isset( $uploadInfo['contributor']['username'] ) ) {
00806             $revision->setUserName( $uploadInfo['contributor']['username'] );
00807         }
00808         $revision->setNoUpdates( $this->mNoUpdates );
00809 
00810         return call_user_func( $this->mUploadCallback, $revision );
00811     }
00812 
00816     private function handleContributor() {
00817         $fields = array( 'id', 'ip', 'username' );
00818         $info = array();
00819 
00820         while ( $this->reader->read() ) {
00821             if ( $this->reader->nodeType == XmlReader::END_ELEMENT &&
00822                     $this->reader->name == 'contributor' ) {
00823                 break;
00824             }
00825 
00826             $tag = $this->reader->name;
00827 
00828             if ( in_array( $tag, $fields ) ) {
00829                 $info[$tag] = $this->nodeContents();
00830             }
00831         }
00832 
00833         return $info;
00834     }
00835 
00840     private function processTitle( $text ) {
00841         global $wgCommandLineMode;
00842 
00843         $workTitle = $text;
00844         $origTitle = Title::newFromText( $workTitle );
00845 
00846         if ( !is_null( $this->mTargetNamespace ) && !is_null( $origTitle ) ) {
00847             # makeTitleSafe, because $origTitle can have a interwiki (different setting of interwiki map)
00848             # and than dbKey can begin with a lowercase char
00849             $title = Title::makeTitleSafe( $this->mTargetNamespace,
00850                 $origTitle->getDBkey() );
00851         } else {
00852             if ( !is_null( $this->mTargetRootPage ) ) {
00853                 $workTitle = $this->mTargetRootPage . '/' . $workTitle;
00854             }
00855             $title = Title::newFromText( $workTitle );
00856         }
00857 
00858         if ( is_null( $title ) ) {
00859             # Invalid page title? Ignore the page
00860             $this->notice( 'import-error-invalid', $workTitle );
00861             return false;
00862         } elseif ( $title->isExternal() ) {
00863             $this->notice( 'import-error-interwiki', $title->getPrefixedText() );
00864             return false;
00865         } elseif ( !$title->canExist() ) {
00866             $this->notice( 'import-error-special', $title->getPrefixedText() );
00867             return false;
00868         } elseif ( !$title->userCan( 'edit' ) && !$wgCommandLineMode ) {
00869             # Do not import if the importing wiki user cannot edit this page
00870             $this->notice( 'import-error-edit', $title->getPrefixedText() );
00871             return false;
00872         } elseif ( !$title->exists() && !$title->userCan( 'create' ) && !$wgCommandLineMode ) {
00873             # Do not import if the importing wiki user cannot create this page
00874             $this->notice( 'import-error-create', $title->getPrefixedText() );
00875             return false;
00876         }
00877 
00878         return array( $title, $origTitle );
00879     }
00880 }
00881 
00883 class UploadSourceAdapter {
00884     static $sourceRegistrations = array();
00885 
00886     private $mSource;
00887     private $mBuffer;
00888     private $mPosition;
00889 
00894     static function registerSource( $source ) {
00895         $id = wfRandomString();
00896 
00897         self::$sourceRegistrations[$id] = $source;
00898 
00899         return $id;
00900     }
00901 
00909     function stream_open( $path, $mode, $options, &$opened_path ) {
00910         $url = parse_url( $path );
00911         $id = $url['host'];
00912 
00913         if ( !isset( self::$sourceRegistrations[$id] ) ) {
00914             return false;
00915         }
00916 
00917         $this->mSource = self::$sourceRegistrations[$id];
00918 
00919         return true;
00920     }
00921 
00926     function stream_read( $count ) {
00927         $return = '';
00928         $leave = false;
00929 
00930         while ( !$leave && !$this->mSource->atEnd() &&
00931                 strlen( $this->mBuffer ) < $count ) {
00932             $read = $this->mSource->readChunk();
00933 
00934             if ( !strlen( $read ) ) {
00935                 $leave = true;
00936             }
00937 
00938             $this->mBuffer .= $read;
00939         }
00940 
00941         if ( strlen( $this->mBuffer ) ) {
00942             $return = substr( $this->mBuffer, 0, $count );
00943             $this->mBuffer = substr( $this->mBuffer, $count );
00944         }
00945 
00946         $this->mPosition += strlen( $return );
00947 
00948         return $return;
00949     }
00950 
00955     function stream_write( $data ) {
00956         return false;
00957     }
00958 
00962     function stream_tell() {
00963         return $this->mPosition;
00964     }
00965 
00969     function stream_eof() {
00970         return $this->mSource->atEnd();
00971     }
00972 
00976     function url_stat() {
00977         $result = array();
00978 
00979         $result['dev'] = $result[0] = 0;
00980         $result['ino'] = $result[1] = 0;
00981         $result['mode'] = $result[2] = 0;
00982         $result['nlink'] = $result[3] = 0;
00983         $result['uid'] = $result[4] = 0;
00984         $result['gid'] = $result[5] = 0;
00985         $result['rdev'] = $result[6] = 0;
00986         $result['size'] = $result[7] = 0;
00987         $result['atime'] = $result[8] = 0;
00988         $result['mtime'] = $result[9] = 0;
00989         $result['ctime'] = $result[10] = 0;
00990         $result['blksize'] = $result[11] = 0;
00991         $result['blocks'] = $result[12] = 0;
00992 
00993         return $result;
00994     }
00995 }
00996 
00997 class XMLReader2 extends XMLReader {
00998 
01002     function nodeContents() {
01003         if ( $this->isEmptyElement ) {
01004             return "";
01005         }
01006         $buffer = "";
01007         while ( $this->read() ) {
01008             switch ( $this->nodeType ) {
01009             case XmlReader::TEXT:
01010             case XmlReader::SIGNIFICANT_WHITESPACE:
01011                 $buffer .= $this->value;
01012                 break;
01013             case XmlReader::END_ELEMENT:
01014                 return $buffer;
01015             }
01016         }
01017         return $this->close();
01018     }
01019 }
01020 
01025 class WikiRevision {
01026     var $importer = null;
01027 
01031     var $title = null;
01032     var $id = 0;
01033     var $timestamp = "20010115000000";
01034     var $user = 0;
01035     var $user_text = "";
01036     var $model = null;
01037     var $format = null;
01038     var $text = "";
01039     var $content = null;
01040     var $comment = "";
01041     var $minor = false;
01042     var $type = "";
01043     var $action = "";
01044     var $params = "";
01045     var $fileSrc = '';
01046     var $sha1base36 = false;
01047     var $isTemp = false;
01048     var $archiveName = '';
01049     var $fileIsTemp;
01050     private $mNoUpdates = false;
01051 
01056     function setTitle( $title ) {
01057         if ( is_object( $title ) ) {
01058             $this->title = $title;
01059         } elseif ( is_null( $title ) ) {
01060             throw new MWException( "WikiRevision given a null title in import. You may need to adjust \$wgLegalTitleChars." );
01061         } else {
01062             throw new MWException( "WikiRevision given non-object title in import." );
01063         }
01064     }
01065 
01069     function setID( $id ) {
01070         $this->id = $id;
01071     }
01072 
01076     function setTimestamp( $ts ) {
01077         # 2003-08-05T18:30:02Z
01078         $this->timestamp = wfTimestamp( TS_MW, $ts );
01079     }
01080 
01084     function setUsername( $user ) {
01085         $this->user_text = $user;
01086     }
01087 
01091     function setUserIP( $ip ) {
01092         $this->user_text = $ip;
01093     }
01094 
01098     function setModel( $model ) {
01099         $this->model = $model;
01100     }
01101 
01105     function setFormat( $format ) {
01106         $this->format = $format;
01107     }
01108 
01112     function setText( $text ) {
01113         $this->text = $text;
01114     }
01115 
01119     function setComment( $text ) {
01120         $this->comment = $text;
01121     }
01122 
01126     function setMinor( $minor ) {
01127         $this->minor = (bool)$minor;
01128     }
01129 
01133     function setSrc( $src ) {
01134         $this->src = $src;
01135     }
01136 
01141     function setFileSrc( $src, $isTemp ) {
01142         $this->fileSrc = $src;
01143         $this->fileIsTemp = $isTemp;
01144     }
01145 
01149     function setSha1Base36( $sha1base36 ) {
01150         $this->sha1base36 = $sha1base36;
01151     }
01152 
01156     function setFilename( $filename ) {
01157         $this->filename = $filename;
01158     }
01159 
01163     function setArchiveName( $archiveName ) {
01164         $this->archiveName = $archiveName;
01165     }
01166 
01170     function setSize( $size ) {
01171         $this->size = intval( $size );
01172     }
01173 
01177     function setType( $type ) {
01178         $this->type = $type;
01179     }
01180 
01184     function setAction( $action ) {
01185         $this->action = $action;
01186     }
01187 
01191     function setParams( $params ) {
01192         $this->params = $params;
01193     }
01194 
01198     public function setNoUpdates( $noupdates ) {
01199         $this->mNoUpdates = $noupdates;
01200     }
01201 
01205     function getTitle() {
01206         return $this->title;
01207     }
01208 
01212     function getID() {
01213         return $this->id;
01214     }
01215 
01219     function getTimestamp() {
01220         return $this->timestamp;
01221     }
01222 
01226     function getUser() {
01227         return $this->user_text;
01228     }
01229 
01235     function getText() {
01236         ContentHandler::deprecated( __METHOD__, '1.21' );
01237 
01238         return $this->text;
01239     }
01240 
01244     function getContent() {
01245         if ( is_null( $this->content ) ) {
01246             $this->content =
01247                 ContentHandler::makeContent(
01248                     $this->text,
01249                     $this->getTitle(),
01250                     $this->getModel(),
01251                     $this->getFormat()
01252                 );
01253         }
01254 
01255         return $this->content;
01256     }
01257 
01261     function getModel() {
01262         if ( is_null( $this->model ) ) {
01263             $this->model = $this->getTitle()->getContentModel();
01264         }
01265 
01266         return $this->model;
01267     }
01268 
01272     function getFormat() {
01273         if ( is_null( $this->model ) ) {
01274             $this->format = ContentHandler::getForTitle( $this->getTitle() )->getDefaultFormat();
01275         }
01276 
01277         return $this->format;
01278     }
01279 
01283     function getComment() {
01284         return $this->comment;
01285     }
01286 
01290     function getMinor() {
01291         return $this->minor;
01292     }
01293 
01297     function getSrc() {
01298         return $this->src;
01299     }
01300 
01304     function getSha1() {
01305         if ( $this->sha1base36 ) {
01306             return wfBaseConvert( $this->sha1base36, 36, 16 );
01307         }
01308         return false;
01309     }
01310 
01314     function getFileSrc() {
01315         return $this->fileSrc;
01316     }
01317 
01321     function isTempSrc() {
01322         return $this->isTemp;
01323     }
01324 
01328     function getFilename() {
01329         return $this->filename;
01330     }
01331 
01335     function getArchiveName() {
01336         return $this->archiveName;
01337     }
01338 
01342     function getSize() {
01343         return $this->size;
01344     }
01345 
01349     function getType() {
01350         return $this->type;
01351     }
01352 
01356     function getAction() {
01357         return $this->action;
01358     }
01359 
01363     function getParams() {
01364         return $this->params;
01365     }
01366 
01370     function importOldRevision() {
01371         $dbw = wfGetDB( DB_MASTER );
01372 
01373         # Sneak a single revision into place
01374         $user = User::newFromName( $this->getUser() );
01375         if ( $user ) {
01376             $userId = intval( $user->getId() );
01377             $userText = $user->getName();
01378             $userObj = $user;
01379         } else {
01380             $userId = 0;
01381             $userText = $this->getUser();
01382             $userObj = new User;
01383         }
01384 
01385         // avoid memory leak...?
01386         $linkCache = LinkCache::singleton();
01387         $linkCache->clear();
01388 
01389         $page = WikiPage::factory( $this->title );
01390         if ( !$page->exists() ) {
01391             # must create the page...
01392             $pageId = $page->insertOn( $dbw );
01393             $created = true;
01394             $oldcountable = null;
01395         } else {
01396             $pageId = $page->getId();
01397             $created = false;
01398 
01399             $prior = $dbw->selectField( 'revision', '1',
01400                 array( 'rev_page' => $pageId,
01401                     'rev_timestamp' => $dbw->timestamp( $this->timestamp ),
01402                     'rev_user_text' => $userText,
01403                     'rev_comment' => $this->getComment() ),
01404                 __METHOD__
01405             );
01406             if ( $prior ) {
01407                 // @todo FIXME: This could fail slightly for multiple matches :P
01408                 wfDebug( __METHOD__ . ": skipping existing revision for [[" .
01409                     $this->title->getPrefixedText() . "]], timestamp " . $this->timestamp . "\n" );
01410                 return false;
01411             }
01412             $oldcountable = $page->isCountable();
01413         }
01414 
01415         # @todo FIXME: Use original rev_id optionally (better for backups)
01416         # Insert the row
01417         $revision = new Revision( array(
01418             'title' => $this->title,
01419             'page' => $pageId,
01420             'content_model' => $this->getModel(),
01421             'content_format' => $this->getFormat(),
01422             'text' => $this->getContent()->serialize( $this->getFormat() ), //XXX: just set 'content' => $this->getContent()?
01423             'comment' => $this->getComment(),
01424             'user' => $userId,
01425             'user_text' => $userText,
01426             'timestamp' => $this->timestamp,
01427             'minor_edit' => $this->minor,
01428             ) );
01429         $revision->insertOn( $dbw );
01430         $changed = $page->updateIfNewerOn( $dbw, $revision );
01431 
01432         if ( $changed !== false && !$this->mNoUpdates ) {
01433             wfDebug( __METHOD__ . ": running updates\n" );
01434             $page->doEditUpdates( $revision, $userObj, array( 'created' => $created, 'oldcountable' => $oldcountable ) );
01435         }
01436 
01437         return true;
01438     }
01439 
01443     function importLogItem() {
01444         $dbw = wfGetDB( DB_MASTER );
01445         # @todo FIXME: This will not record autoblocks
01446         if ( !$this->getTitle() ) {
01447             wfDebug( __METHOD__ . ": skipping invalid {$this->type}/{$this->action} log time, timestamp " .
01448                 $this->timestamp . "\n" );
01449             return;
01450         }
01451         # Check if it exists already
01452         // @todo FIXME: Use original log ID (better for backups)
01453         $prior = $dbw->selectField( 'logging', '1',
01454             array( 'log_type' => $this->getType(),
01455                 'log_action' => $this->getAction(),
01456                 'log_timestamp' => $dbw->timestamp( $this->timestamp ),
01457                 'log_namespace' => $this->getTitle()->getNamespace(),
01458                 'log_title' => $this->getTitle()->getDBkey(),
01459                 'log_comment' => $this->getComment(),
01460                 #'log_user_text' => $this->user_text,
01461                 'log_params' => $this->params ),
01462             __METHOD__
01463         );
01464         // @todo FIXME: This could fail slightly for multiple matches :P
01465         if ( $prior ) {
01466             wfDebug( __METHOD__ . ": skipping existing item for Log:{$this->type}/{$this->action}, timestamp " .
01467                 $this->timestamp . "\n" );
01468             return;
01469         }
01470         $log_id = $dbw->nextSequenceValue( 'logging_log_id_seq' );
01471         $data = array(
01472             'log_id' => $log_id,
01473             'log_type' => $this->type,
01474             'log_action' => $this->action,
01475             'log_timestamp' => $dbw->timestamp( $this->timestamp ),
01476             'log_user' => User::idFromName( $this->user_text ),
01477             #'log_user_text' => $this->user_text,
01478             'log_namespace' => $this->getTitle()->getNamespace(),
01479             'log_title' => $this->getTitle()->getDBkey(),
01480             'log_comment' => $this->getComment(),
01481             'log_params' => $this->params
01482         );
01483         $dbw->insert( 'logging', $data, __METHOD__ );
01484     }
01485 
01489     function importUpload() {
01490         # Construct a file
01491         $archiveName = $this->getArchiveName();
01492         if ( $archiveName ) {
01493             wfDebug( __METHOD__ . "Importing archived file as $archiveName\n" );
01494             $file = OldLocalFile::newFromArchiveName( $this->getTitle(),
01495                 RepoGroup::singleton()->getLocalRepo(), $archiveName );
01496         } else {
01497             $file = wfLocalFile( $this->getTitle() );
01498             wfDebug( __METHOD__ . 'Importing new file as ' . $file->getName() . "\n" );
01499             if ( $file->exists() && $file->getTimestamp() > $this->getTimestamp() ) {
01500                 $archiveName = $file->getTimestamp() . '!' . $file->getName();
01501                 $file = OldLocalFile::newFromArchiveName( $this->getTitle(),
01502                     RepoGroup::singleton()->getLocalRepo(), $archiveName );
01503                 wfDebug( __METHOD__ . "File already exists; importing as $archiveName\n" );
01504             }
01505         }
01506         if ( !$file ) {
01507             wfDebug( __METHOD__ . ': Bad file for ' . $this->getTitle() . "\n" );
01508             return false;
01509         }
01510 
01511         # Get the file source or download if necessary
01512         $source = $this->getFileSrc();
01513         $flags = $this->isTempSrc() ? File::DELETE_SOURCE : 0;
01514         if ( !$source ) {
01515             $source = $this->downloadSource();
01516             $flags |= File::DELETE_SOURCE;
01517         }
01518         if ( !$source ) {
01519             wfDebug( __METHOD__ . ": Could not fetch remote file.\n" );
01520             return false;
01521         }
01522         $sha1 = $this->getSha1();
01523         if ( $sha1 && ( $sha1 !== sha1_file( $source ) ) ) {
01524             if ( $flags & File::DELETE_SOURCE ) {
01525                 # Broken file; delete it if it is a temporary file
01526                 unlink( $source );
01527             }
01528             wfDebug( __METHOD__ . ": Corrupt file $source.\n" );
01529             return false;
01530         }
01531 
01532         $user = User::newFromName( $this->user_text );
01533 
01534         # Do the actual upload
01535         if ( $archiveName ) {
01536             $status = $file->uploadOld( $source, $archiveName,
01537                 $this->getTimestamp(), $this->getComment(), $user, $flags );
01538         } else {
01539             $status = $file->upload( $source, $this->getComment(), $this->getComment(),
01540                 $flags, false, $this->getTimestamp(), $user );
01541         }
01542 
01543         if ( $status->isGood() ) {
01544             wfDebug( __METHOD__ . ": Successful\n" );
01545             return true;
01546         } else {
01547             wfDebug( __METHOD__ . ': failed: ' . $status->getXml() . "\n" );
01548             return false;
01549         }
01550     }
01551 
01555     function downloadSource() {
01556         global $wgEnableUploads;
01557         if ( !$wgEnableUploads ) {
01558             return false;
01559         }
01560 
01561         $tempo = tempnam( wfTempDir(), 'download' );
01562         $f = fopen( $tempo, 'wb' );
01563         if ( !$f ) {
01564             wfDebug( "IMPORT: couldn't write to temp file $tempo\n" );
01565             return false;
01566         }
01567 
01568         // @todo FIXME!
01569         $src = $this->getSrc();
01570         $data = Http::get( $src );
01571         if ( !$data ) {
01572             wfDebug( "IMPORT: couldn't fetch source $src\n" );
01573             fclose( $f );
01574             unlink( $tempo );
01575             return false;
01576         }
01577 
01578         fwrite( $f, $data );
01579         fclose( $f );
01580 
01581         return $tempo;
01582     }
01583 
01584 }
01585 
01590 class ImportStringSource {
01591     function __construct( $string ) {
01592         $this->mString = $string;
01593         $this->mRead = false;
01594     }
01595 
01599     function atEnd() {
01600         return $this->mRead;
01601     }
01602 
01606     function readChunk() {
01607         if ( $this->atEnd() ) {
01608             return false;
01609         }
01610         $this->mRead = true;
01611         return $this->mString;
01612     }
01613 }
01614 
01619 class ImportStreamSource {
01620     function __construct( $handle ) {
01621         $this->mHandle = $handle;
01622     }
01623 
01627     function atEnd() {
01628         return feof( $this->mHandle );
01629     }
01630 
01634     function readChunk() {
01635         return fread( $this->mHandle, 32768 );
01636     }
01637 
01642     static function newFromFile( $filename ) {
01643         wfSuppressWarnings();
01644         $file = fopen( $filename, 'rt' );
01645         wfRestoreWarnings();
01646         if ( !$file ) {
01647             return Status::newFatal( "importcantopen" );
01648         }
01649         return Status::newGood( new ImportStreamSource( $file ) );
01650     }
01651 
01656     static function newFromUpload( $fieldname = "xmlimport" ) {
01657         $upload =& $_FILES[$fieldname];
01658 
01659         if ( $upload === null || !$upload['name'] ) {
01660             return Status::newFatal( 'importnofile' );
01661         }
01662         if ( !empty( $upload['error'] ) ) {
01663             switch ( $upload['error'] ) {
01664                 case 1: # The uploaded file exceeds the upload_max_filesize directive in php.ini.
01665                     return Status::newFatal( 'importuploaderrorsize' );
01666                 case 2: # The uploaded file exceeds the MAX_FILE_SIZE directive that was specified in the HTML form.
01667                     return Status::newFatal( 'importuploaderrorsize' );
01668                 case 3: # The uploaded file was only partially uploaded
01669                     return Status::newFatal( 'importuploaderrorpartial' );
01670                 case 6: #Missing a temporary folder.
01671                     return Status::newFatal( 'importuploaderrortemp' );
01672                 # case else: # Currently impossible
01673             }
01674 
01675         }
01676         $fname = $upload['tmp_name'];
01677         if ( is_uploaded_file( $fname ) ) {
01678             return ImportStreamSource::newFromFile( $fname );
01679         } else {
01680             return Status::newFatal( 'importnofile' );
01681         }
01682     }
01683 
01689     static function newFromURL( $url, $method = 'GET' ) {
01690         wfDebug( __METHOD__ . ": opening $url\n" );
01691         # Use the standard HTTP fetch function; it times out
01692         # quicker and sorts out user-agent problems which might
01693         # otherwise prevent importing from large sites, such
01694         # as the Wikimedia cluster, etc.
01695         $data = Http::request( $method, $url, array( 'followRedirects' => true ) );
01696         if ( $data !== false ) {
01697             $file = tmpfile();
01698             fwrite( $file, $data );
01699             fflush( $file );
01700             fseek( $file, 0 );
01701             return Status::newGood( new ImportStreamSource( $file ) );
01702         } else {
01703             return Status::newFatal( 'importcantopen' );
01704         }
01705     }
01706 
01715     public static function newFromInterwiki( $interwiki, $page, $history = false, $templates = false, $pageLinkDepth = 0 ) {
01716         if ( $page == '' ) {
01717             return Status::newFatal( 'import-noarticle' );
01718         }
01719         $link = Title::newFromText( "$interwiki:Special:Export/$page" );
01720         if ( is_null( $link ) || $link->getInterwiki() == '' ) {
01721             return Status::newFatal( 'importbadinterwiki' );
01722         } else {
01723             $params = array();
01724             if ( $history ) {
01725                 $params['history'] = 1;
01726             }
01727             if ( $templates ) {
01728                 $params['templates'] = 1;
01729             }
01730             if ( $pageLinkDepth ) {
01731                 $params['pagelink-depth'] = $pageLinkDepth;
01732             }
01733             $url = $link->getFullURL( $params );
01734             # For interwikis, use POST to avoid redirects.
01735             return ImportStreamSource::newFromURL( $url, "POST" );
01736         }
01737     }
01738 }