MediaWiki
REL1_22
|
00001 <?php 00033 class WikiImporter { 00034 private $reader = null; 00035 private $mLogItemCallback, $mUploadCallback, $mRevisionCallback, $mPageCallback; 00036 private $mSiteInfoCallback, $mTargetNamespace, $mTargetRootPage, $mPageOutCallback; 00037 private $mNoticeCallback, $mDebug; 00038 private $mImportUploads, $mImageBasePath; 00039 private $mNoUpdates = false; 00040 00045 function __construct( $source ) { 00046 $this->reader = new XMLReader(); 00047 00048 stream_wrapper_register( 'uploadsource', 'UploadSourceAdapter' ); 00049 $id = UploadSourceAdapter::registerSource( $source ); 00050 if ( defined( 'LIBXML_PARSEHUGE' ) ) { 00051 $this->reader->open( "uploadsource://$id", null, LIBXML_PARSEHUGE ); 00052 } else { 00053 $this->reader->open( "uploadsource://$id" ); 00054 } 00055 00056 // Default callbacks 00057 $this->setRevisionCallback( array( $this, "importRevision" ) ); 00058 $this->setUploadCallback( array( $this, 'importUpload' ) ); 00059 $this->setLogItemCallback( array( $this, 'importLogItem' ) ); 00060 $this->setPageOutCallback( array( $this, 'finishImportPage' ) ); 00061 } 00062 00063 private function throwXmlError( $err ) { 00064 $this->debug( "FAILURE: $err" ); 00065 wfDebug( "WikiImporter XML error: $err\n" ); 00066 } 00067 00068 private function debug( $data ) { 00069 if ( $this->mDebug ) { 00070 wfDebug( "IMPORT: $data\n" ); 00071 } 00072 } 00073 00074 private function warn( $data ) { 00075 wfDebug( "IMPORT: $data\n" ); 00076 } 00077 00078 private function notice( $msg /*, $param, ...*/ ) { 00079 $params = func_get_args(); 00080 array_shift( $params ); 00081 00082 if ( is_callable( $this->mNoticeCallback ) ) { 00083 call_user_func( $this->mNoticeCallback, $msg, $params ); 00084 } else { # No ImportReporter -> CLI 00085 echo wfMessage( $msg, $params )->text() . "\n"; 00086 } 00087 } 00088 00093 function setDebug( $debug ) { 00094 $this->mDebug = $debug; 00095 } 00096 00101 function setNoUpdates( $noupdates ) { 00102 $this->mNoUpdates = $noupdates; 00103 } 00104 00111 public function setNoticeCallback( $callback ) { 00112 return wfSetVar( $this->mNoticeCallback, $callback ); 00113 } 00114 00120 public function setPageCallback( $callback ) { 00121 $previous = $this->mPageCallback; 00122 $this->mPageCallback = $callback; 00123 return $previous; 00124 } 00125 00135 public function setPageOutCallback( $callback ) { 00136 $previous = $this->mPageOutCallback; 00137 $this->mPageOutCallback = $callback; 00138 return $previous; 00139 } 00140 00146 public function setRevisionCallback( $callback ) { 00147 $previous = $this->mRevisionCallback; 00148 $this->mRevisionCallback = $callback; 00149 return $previous; 00150 } 00151 00157 public function setUploadCallback( $callback ) { 00158 $previous = $this->mUploadCallback; 00159 $this->mUploadCallback = $callback; 00160 return $previous; 00161 } 00162 00168 public function setLogItemCallback( $callback ) { 00169 $previous = $this->mLogItemCallback; 00170 $this->mLogItemCallback = $callback; 00171 return $previous; 00172 } 00173 00179 public function setSiteInfoCallback( $callback ) { 00180 $previous = $this->mSiteInfoCallback; 00181 $this->mSiteInfoCallback = $callback; 00182 return $previous; 00183 } 00184 00190 public function setTargetNamespace( $namespace ) { 00191 if ( is_null( $namespace ) ) { 00192 // Don't override namespaces 00193 $this->mTargetNamespace = null; 00194 } elseif ( $namespace >= 0 ) { 00195 // @todo FIXME: Check for validity 00196 $this->mTargetNamespace = intval( $namespace ); 00197 } else { 00198 return false; 00199 } 00200 } 00201 00207 public function setTargetRootPage( $rootpage ) { 00208 $status = Status::newGood(); 00209 if ( is_null( $rootpage ) ) { 00210 // No rootpage 00211 $this->mTargetRootPage = null; 00212 } elseif ( $rootpage !== '' ) { 00213 $rootpage = rtrim( $rootpage, '/' ); //avoid double slashes 00214 $title = Title::newFromText( $rootpage, !is_null( $this->mTargetNamespace ) ? $this->mTargetNamespace : NS_MAIN ); 00215 if ( !$title || $title->isExternal() ) { 00216 $status->fatal( 'import-rootpage-invalid' ); 00217 } else { 00218 if ( !MWNamespace::hasSubpages( $title->getNamespace() ) ) { 00219 global $wgContLang; 00220 00221 $displayNSText = $title->getNamespace() == NS_MAIN 00222 ? wfMessage( 'blanknamespace' )->text() 00223 : $wgContLang->getNsText( $title->getNamespace() ); 00224 $status->fatal( 'import-rootpage-nosubpage', $displayNSText ); 00225 } else { 00226 // set namespace to 'all', so the namespace check in processTitle() can passed 00227 $this->setTargetNamespace( null ); 00228 $this->mTargetRootPage = $title->getPrefixedDBkey(); 00229 } 00230 } 00231 } 00232 return $status; 00233 } 00234 00238 public function setImageBasePath( $dir ) { 00239 $this->mImageBasePath = $dir; 00240 } 00241 00245 public function setImportUploads( $import ) { 00246 $this->mImportUploads = $import; 00247 } 00248 00254 public function importRevision( $revision ) { 00255 try { 00256 $dbw = wfGetDB( DB_MASTER ); 00257 return $dbw->deadlockLoop( array( $revision, 'importOldRevision' ) ); 00258 } catch ( MWContentSerializationException $ex ) { 00259 $this->notice( 'import-error-unserialize', 00260 $revision->getTitle()->getPrefixedText(), 00261 $revision->getID(), 00262 $revision->getModel(), 00263 $revision->getFormat() ); 00264 } 00265 } 00266 00272 public function importLogItem( $rev ) { 00273 $dbw = wfGetDB( DB_MASTER ); 00274 return $dbw->deadlockLoop( array( $rev, 'importLogItem' ) ); 00275 } 00276 00282 public function importUpload( $revision ) { 00283 $dbw = wfGetDB( DB_MASTER ); 00284 return $dbw->deadlockLoop( array( $revision, 'importUpload' ) ); 00285 } 00286 00296 public function finishImportPage( $title, $origTitle, $revCount, $sRevCount, $pageInfo ) { 00297 $args = func_get_args(); 00298 return wfRunHooks( 'AfterImportPage', $args ); 00299 } 00300 00305 public function debugRevisionHandler( &$revision ) { 00306 $this->debug( "Got revision:" ); 00307 if ( is_object( $revision->title ) ) { 00308 $this->debug( "-- Title: " . $revision->title->getPrefixedText() ); 00309 } else { 00310 $this->debug( "-- Title: <invalid>" ); 00311 } 00312 $this->debug( "-- User: " . $revision->user_text ); 00313 $this->debug( "-- Timestamp: " . $revision->timestamp ); 00314 $this->debug( "-- Comment: " . $revision->comment ); 00315 $this->debug( "-- Text: " . $revision->text ); 00316 } 00317 00322 function pageCallback( $title ) { 00323 if ( isset( $this->mPageCallback ) ) { 00324 call_user_func( $this->mPageCallback, $title ); 00325 } 00326 } 00327 00336 private function pageOutCallback( $title, $origTitle, $revCount, $sucCount, $pageInfo ) { 00337 if ( isset( $this->mPageOutCallback ) ) { 00338 $args = func_get_args(); 00339 call_user_func_array( $this->mPageOutCallback, $args ); 00340 } 00341 } 00342 00348 private function revisionCallback( $revision ) { 00349 if ( isset( $this->mRevisionCallback ) ) { 00350 return call_user_func_array( $this->mRevisionCallback, 00351 array( $revision, $this ) ); 00352 } else { 00353 return false; 00354 } 00355 } 00356 00362 private function logItemCallback( $revision ) { 00363 if ( isset( $this->mLogItemCallback ) ) { 00364 return call_user_func_array( $this->mLogItemCallback, 00365 array( $revision, $this ) ); 00366 } else { 00367 return false; 00368 } 00369 } 00370 00378 private function nodeContents() { 00379 if ( $this->reader->isEmptyElement ) { 00380 return ""; 00381 } 00382 $buffer = ""; 00383 while ( $this->reader->read() ) { 00384 switch ( $this->reader->nodeType ) { 00385 case XmlReader::TEXT: 00386 case XmlReader::SIGNIFICANT_WHITESPACE: 00387 $buffer .= $this->reader->value; 00388 break; 00389 case XmlReader::END_ELEMENT: 00390 return $buffer; 00391 } 00392 } 00393 00394 $this->reader->close(); 00395 return ''; 00396 } 00397 00398 # -------------- 00399 00401 private function dumpElement() { 00402 static $lookup = null; 00403 if ( !$lookup ) { 00404 $xmlReaderConstants = array( 00405 "NONE", 00406 "ELEMENT", 00407 "ATTRIBUTE", 00408 "TEXT", 00409 "CDATA", 00410 "ENTITY_REF", 00411 "ENTITY", 00412 "PI", 00413 "COMMENT", 00414 "DOC", 00415 "DOC_TYPE", 00416 "DOC_FRAGMENT", 00417 "NOTATION", 00418 "WHITESPACE", 00419 "SIGNIFICANT_WHITESPACE", 00420 "END_ELEMENT", 00421 "END_ENTITY", 00422 "XML_DECLARATION", 00423 ); 00424 $lookup = array(); 00425 00426 foreach ( $xmlReaderConstants as $name ) { 00427 $lookup[constant( "XmlReader::$name" )] = $name; 00428 } 00429 } 00430 00431 print var_dump( 00432 $lookup[$this->reader->nodeType], 00433 $this->reader->name, 00434 $this->reader->value 00435 ) . "\n\n"; 00436 } 00437 00443 public function doImport() { 00444 00445 // Calls to reader->read need to be wrapped in calls to 00446 // libxml_disable_entity_loader() to avoid local file 00447 // inclusion attacks (bug 46932). 00448 $oldDisable = libxml_disable_entity_loader( true ); 00449 $this->reader->read(); 00450 00451 if ( $this->reader->name != 'mediawiki' ) { 00452 libxml_disable_entity_loader( $oldDisable ); 00453 throw new MWException( "Expected <mediawiki> tag, got " . 00454 $this->reader->name ); 00455 } 00456 $this->debug( "<mediawiki> tag is correct." ); 00457 00458 $this->debug( "Starting primary dump processing loop." ); 00459 00460 $keepReading = $this->reader->read(); 00461 $skip = false; 00462 while ( $keepReading ) { 00463 $tag = $this->reader->name; 00464 $type = $this->reader->nodeType; 00465 00466 if ( !wfRunHooks( 'ImportHandleToplevelXMLTag', array( $this ) ) ) { 00467 // Do nothing 00468 } elseif ( $tag == 'mediawiki' && $type == XmlReader::END_ELEMENT ) { 00469 break; 00470 } elseif ( $tag == 'siteinfo' ) { 00471 $this->handleSiteInfo(); 00472 } elseif ( $tag == 'page' ) { 00473 $this->handlePage(); 00474 } elseif ( $tag == 'logitem' ) { 00475 $this->handleLogItem(); 00476 } elseif ( $tag != '#text' ) { 00477 $this->warn( "Unhandled top-level XML tag $tag" ); 00478 00479 $skip = true; 00480 } 00481 00482 if ( $skip ) { 00483 $keepReading = $this->reader->next(); 00484 $skip = false; 00485 $this->debug( "Skip" ); 00486 } else { 00487 $keepReading = $this->reader->read(); 00488 } 00489 } 00490 00491 libxml_disable_entity_loader( $oldDisable ); 00492 return true; 00493 } 00494 00499 private function handleSiteInfo() { 00500 // Site info is useful, but not actually used for dump imports. 00501 // Includes a quick short-circuit to save performance. 00502 if ( ! $this->mSiteInfoCallback ) { 00503 $this->reader->next(); 00504 return true; 00505 } 00506 throw new MWException( "SiteInfo tag is not yet handled, do not set mSiteInfoCallback" ); 00507 } 00508 00509 private function handleLogItem() { 00510 $this->debug( "Enter log item handler." ); 00511 $logInfo = array(); 00512 00513 // Fields that can just be stuffed in the pageInfo object 00514 $normalFields = array( 'id', 'comment', 'type', 'action', 'timestamp', 00515 'logtitle', 'params' ); 00516 00517 while ( $this->reader->read() ) { 00518 if ( $this->reader->nodeType == XmlReader::END_ELEMENT && 00519 $this->reader->name == 'logitem' ) { 00520 break; 00521 } 00522 00523 $tag = $this->reader->name; 00524 00525 if ( !wfRunHooks( 'ImportHandleLogItemXMLTag', array( 00526 $this, $logInfo 00527 ) ) ) { 00528 // Do nothing 00529 } elseif ( in_array( $tag, $normalFields ) ) { 00530 $logInfo[$tag] = $this->nodeContents(); 00531 } elseif ( $tag == 'contributor' ) { 00532 $logInfo['contributor'] = $this->handleContributor(); 00533 } elseif ( $tag != '#text' ) { 00534 $this->warn( "Unhandled log-item XML tag $tag" ); 00535 } 00536 } 00537 00538 $this->processLogItem( $logInfo ); 00539 } 00540 00545 private function processLogItem( $logInfo ) { 00546 $revision = new WikiRevision; 00547 00548 $revision->setID( $logInfo['id'] ); 00549 $revision->setType( $logInfo['type'] ); 00550 $revision->setAction( $logInfo['action'] ); 00551 $revision->setTimestamp( $logInfo['timestamp'] ); 00552 $revision->setParams( $logInfo['params'] ); 00553 $revision->setTitle( Title::newFromText( $logInfo['logtitle'] ) ); 00554 $revision->setNoUpdates( $this->mNoUpdates ); 00555 00556 if ( isset( $logInfo['comment'] ) ) { 00557 $revision->setComment( $logInfo['comment'] ); 00558 } 00559 00560 if ( isset( $logInfo['contributor']['ip'] ) ) { 00561 $revision->setUserIP( $logInfo['contributor']['ip'] ); 00562 } 00563 if ( isset( $logInfo['contributor']['username'] ) ) { 00564 $revision->setUserName( $logInfo['contributor']['username'] ); 00565 } 00566 00567 return $this->logItemCallback( $revision ); 00568 } 00569 00570 private function handlePage() { 00571 // Handle page data. 00572 $this->debug( "Enter page handler." ); 00573 $pageInfo = array( 'revisionCount' => 0, 'successfulRevisionCount' => 0 ); 00574 00575 // Fields that can just be stuffed in the pageInfo object 00576 $normalFields = array( 'title', 'id', 'redirect', 'restrictions' ); 00577 00578 $skip = false; 00579 $badTitle = false; 00580 00581 while ( $skip ? $this->reader->next() : $this->reader->read() ) { 00582 if ( $this->reader->nodeType == XmlReader::END_ELEMENT && 00583 $this->reader->name == 'page' ) { 00584 break; 00585 } 00586 00587 $tag = $this->reader->name; 00588 00589 if ( $badTitle ) { 00590 // The title is invalid, bail out of this page 00591 $skip = true; 00592 } elseif ( !wfRunHooks( 'ImportHandlePageXMLTag', array( $this, 00593 &$pageInfo ) ) ) { 00594 // Do nothing 00595 } elseif ( in_array( $tag, $normalFields ) ) { 00596 $pageInfo[$tag] = $this->nodeContents(); 00597 if ( $tag == 'title' ) { 00598 $title = $this->processTitle( $pageInfo['title'] ); 00599 00600 if ( !$title ) { 00601 $badTitle = true; 00602 $skip = true; 00603 } 00604 00605 $this->pageCallback( $title ); 00606 list( $pageInfo['_title'], $origTitle ) = $title; 00607 } 00608 } elseif ( $tag == 'revision' ) { 00609 $this->handleRevision( $pageInfo ); 00610 } elseif ( $tag == 'upload' ) { 00611 $this->handleUpload( $pageInfo ); 00612 } elseif ( $tag != '#text' ) { 00613 $this->warn( "Unhandled page XML tag $tag" ); 00614 $skip = true; 00615 } 00616 } 00617 00618 $this->pageOutCallback( $pageInfo['_title'], $origTitle, 00619 $pageInfo['revisionCount'], 00620 $pageInfo['successfulRevisionCount'], 00621 $pageInfo ); 00622 } 00623 00627 private function handleRevision( &$pageInfo ) { 00628 $this->debug( "Enter revision handler" ); 00629 $revisionInfo = array(); 00630 00631 $normalFields = array( 'id', 'timestamp', 'comment', 'minor', 'model', 'format', 'text' ); 00632 00633 $skip = false; 00634 00635 while ( $skip ? $this->reader->next() : $this->reader->read() ) { 00636 if ( $this->reader->nodeType == XmlReader::END_ELEMENT && 00637 $this->reader->name == 'revision' ) { 00638 break; 00639 } 00640 00641 $tag = $this->reader->name; 00642 00643 if ( !wfRunHooks( 'ImportHandleRevisionXMLTag', array( 00644 $this, $pageInfo, $revisionInfo 00645 ) ) ) { 00646 // Do nothing 00647 } elseif ( in_array( $tag, $normalFields ) ) { 00648 $revisionInfo[$tag] = $this->nodeContents(); 00649 } elseif ( $tag == 'contributor' ) { 00650 $revisionInfo['contributor'] = $this->handleContributor(); 00651 } elseif ( $tag != '#text' ) { 00652 $this->warn( "Unhandled revision XML tag $tag" ); 00653 $skip = true; 00654 } 00655 } 00656 00657 $pageInfo['revisionCount']++; 00658 if ( $this->processRevision( $pageInfo, $revisionInfo ) ) { 00659 $pageInfo['successfulRevisionCount']++; 00660 } 00661 } 00662 00668 private function processRevision( $pageInfo, $revisionInfo ) { 00669 $revision = new WikiRevision; 00670 00671 if ( isset( $revisionInfo['id'] ) ) { 00672 $revision->setID( $revisionInfo['id'] ); 00673 } 00674 if ( isset( $revisionInfo['text'] ) ) { 00675 $revision->setText( $revisionInfo['text'] ); 00676 } 00677 if ( isset( $revisionInfo['model'] ) ) { 00678 $revision->setModel( $revisionInfo['model'] ); 00679 } 00680 if ( isset( $revisionInfo['format'] ) ) { 00681 $revision->setFormat( $revisionInfo['format'] ); 00682 } 00683 $revision->setTitle( $pageInfo['_title'] ); 00684 00685 if ( isset( $revisionInfo['timestamp'] ) ) { 00686 $revision->setTimestamp( $revisionInfo['timestamp'] ); 00687 } else { 00688 $revision->setTimestamp( wfTimestampNow() ); 00689 } 00690 00691 if ( isset( $revisionInfo['comment'] ) ) { 00692 $revision->setComment( $revisionInfo['comment'] ); 00693 } 00694 00695 if ( isset( $revisionInfo['minor'] ) ) { 00696 $revision->setMinor( true ); 00697 } 00698 if ( isset( $revisionInfo['contributor']['ip'] ) ) { 00699 $revision->setUserIP( $revisionInfo['contributor']['ip'] ); 00700 } 00701 if ( isset( $revisionInfo['contributor']['username'] ) ) { 00702 $revision->setUserName( $revisionInfo['contributor']['username'] ); 00703 } 00704 $revision->setNoUpdates( $this->mNoUpdates ); 00705 00706 return $this->revisionCallback( $revision ); 00707 } 00708 00713 private function handleUpload( &$pageInfo ) { 00714 $this->debug( "Enter upload handler" ); 00715 $uploadInfo = array(); 00716 00717 $normalFields = array( 'timestamp', 'comment', 'filename', 'text', 00718 'src', 'size', 'sha1base36', 'archivename', 'rel' ); 00719 00720 $skip = false; 00721 00722 while ( $skip ? $this->reader->next() : $this->reader->read() ) { 00723 if ( $this->reader->nodeType == XmlReader::END_ELEMENT && 00724 $this->reader->name == 'upload' ) { 00725 break; 00726 } 00727 00728 $tag = $this->reader->name; 00729 00730 if ( !wfRunHooks( 'ImportHandleUploadXMLTag', array( 00731 $this, $pageInfo 00732 ) ) ) { 00733 // Do nothing 00734 } elseif ( in_array( $tag, $normalFields ) ) { 00735 $uploadInfo[$tag] = $this->nodeContents(); 00736 } elseif ( $tag == 'contributor' ) { 00737 $uploadInfo['contributor'] = $this->handleContributor(); 00738 } elseif ( $tag == 'contents' ) { 00739 $contents = $this->nodeContents(); 00740 $encoding = $this->reader->getAttribute( 'encoding' ); 00741 if ( $encoding === 'base64' ) { 00742 $uploadInfo['fileSrc'] = $this->dumpTemp( base64_decode( $contents ) ); 00743 $uploadInfo['isTempSrc'] = true; 00744 } 00745 } elseif ( $tag != '#text' ) { 00746 $this->warn( "Unhandled upload XML tag $tag" ); 00747 $skip = true; 00748 } 00749 } 00750 00751 if ( $this->mImageBasePath && isset( $uploadInfo['rel'] ) ) { 00752 $path = "{$this->mImageBasePath}/{$uploadInfo['rel']}"; 00753 if ( file_exists( $path ) ) { 00754 $uploadInfo['fileSrc'] = $path; 00755 $uploadInfo['isTempSrc'] = false; 00756 } 00757 } 00758 00759 if ( $this->mImportUploads ) { 00760 return $this->processUpload( $pageInfo, $uploadInfo ); 00761 } 00762 } 00763 00768 private function dumpTemp( $contents ) { 00769 $filename = tempnam( wfTempDir(), 'importupload' ); 00770 file_put_contents( $filename, $contents ); 00771 return $filename; 00772 } 00773 00779 private function processUpload( $pageInfo, $uploadInfo ) { 00780 $revision = new WikiRevision; 00781 $text = isset( $uploadInfo['text'] ) ? $uploadInfo['text'] : ''; 00782 00783 $revision->setTitle( $pageInfo['_title'] ); 00784 $revision->setID( $pageInfo['id'] ); 00785 $revision->setTimestamp( $uploadInfo['timestamp'] ); 00786 $revision->setText( $text ); 00787 $revision->setFilename( $uploadInfo['filename'] ); 00788 if ( isset( $uploadInfo['archivename'] ) ) { 00789 $revision->setArchiveName( $uploadInfo['archivename'] ); 00790 } 00791 $revision->setSrc( $uploadInfo['src'] ); 00792 if ( isset( $uploadInfo['fileSrc'] ) ) { 00793 $revision->setFileSrc( $uploadInfo['fileSrc'], 00794 !empty( $uploadInfo['isTempSrc'] ) ); 00795 } 00796 if ( isset( $uploadInfo['sha1base36'] ) ) { 00797 $revision->setSha1Base36( $uploadInfo['sha1base36'] ); 00798 } 00799 $revision->setSize( intval( $uploadInfo['size'] ) ); 00800 $revision->setComment( $uploadInfo['comment'] ); 00801 00802 if ( isset( $uploadInfo['contributor']['ip'] ) ) { 00803 $revision->setUserIP( $uploadInfo['contributor']['ip'] ); 00804 } 00805 if ( isset( $uploadInfo['contributor']['username'] ) ) { 00806 $revision->setUserName( $uploadInfo['contributor']['username'] ); 00807 } 00808 $revision->setNoUpdates( $this->mNoUpdates ); 00809 00810 return call_user_func( $this->mUploadCallback, $revision ); 00811 } 00812 00816 private function handleContributor() { 00817 $fields = array( 'id', 'ip', 'username' ); 00818 $info = array(); 00819 00820 while ( $this->reader->read() ) { 00821 if ( $this->reader->nodeType == XmlReader::END_ELEMENT && 00822 $this->reader->name == 'contributor' ) { 00823 break; 00824 } 00825 00826 $tag = $this->reader->name; 00827 00828 if ( in_array( $tag, $fields ) ) { 00829 $info[$tag] = $this->nodeContents(); 00830 } 00831 } 00832 00833 return $info; 00834 } 00835 00840 private function processTitle( $text ) { 00841 global $wgCommandLineMode; 00842 00843 $workTitle = $text; 00844 $origTitle = Title::newFromText( $workTitle ); 00845 00846 if ( !is_null( $this->mTargetNamespace ) && !is_null( $origTitle ) ) { 00847 # makeTitleSafe, because $origTitle can have a interwiki (different setting of interwiki map) 00848 # and than dbKey can begin with a lowercase char 00849 $title = Title::makeTitleSafe( $this->mTargetNamespace, 00850 $origTitle->getDBkey() ); 00851 } else { 00852 if ( !is_null( $this->mTargetRootPage ) ) { 00853 $workTitle = $this->mTargetRootPage . '/' . $workTitle; 00854 } 00855 $title = Title::newFromText( $workTitle ); 00856 } 00857 00858 if ( is_null( $title ) ) { 00859 # Invalid page title? Ignore the page 00860 $this->notice( 'import-error-invalid', $workTitle ); 00861 return false; 00862 } elseif ( $title->isExternal() ) { 00863 $this->notice( 'import-error-interwiki', $title->getPrefixedText() ); 00864 return false; 00865 } elseif ( !$title->canExist() ) { 00866 $this->notice( 'import-error-special', $title->getPrefixedText() ); 00867 return false; 00868 } elseif ( !$title->userCan( 'edit' ) && !$wgCommandLineMode ) { 00869 # Do not import if the importing wiki user cannot edit this page 00870 $this->notice( 'import-error-edit', $title->getPrefixedText() ); 00871 return false; 00872 } elseif ( !$title->exists() && !$title->userCan( 'create' ) && !$wgCommandLineMode ) { 00873 # Do not import if the importing wiki user cannot create this page 00874 $this->notice( 'import-error-create', $title->getPrefixedText() ); 00875 return false; 00876 } 00877 00878 return array( $title, $origTitle ); 00879 } 00880 } 00881 00883 class UploadSourceAdapter { 00884 static $sourceRegistrations = array(); 00885 00886 private $mSource; 00887 private $mBuffer; 00888 private $mPosition; 00889 00894 static function registerSource( $source ) { 00895 $id = wfRandomString(); 00896 00897 self::$sourceRegistrations[$id] = $source; 00898 00899 return $id; 00900 } 00901 00909 function stream_open( $path, $mode, $options, &$opened_path ) { 00910 $url = parse_url( $path ); 00911 $id = $url['host']; 00912 00913 if ( !isset( self::$sourceRegistrations[$id] ) ) { 00914 return false; 00915 } 00916 00917 $this->mSource = self::$sourceRegistrations[$id]; 00918 00919 return true; 00920 } 00921 00926 function stream_read( $count ) { 00927 $return = ''; 00928 $leave = false; 00929 00930 while ( !$leave && !$this->mSource->atEnd() && 00931 strlen( $this->mBuffer ) < $count ) { 00932 $read = $this->mSource->readChunk(); 00933 00934 if ( !strlen( $read ) ) { 00935 $leave = true; 00936 } 00937 00938 $this->mBuffer .= $read; 00939 } 00940 00941 if ( strlen( $this->mBuffer ) ) { 00942 $return = substr( $this->mBuffer, 0, $count ); 00943 $this->mBuffer = substr( $this->mBuffer, $count ); 00944 } 00945 00946 $this->mPosition += strlen( $return ); 00947 00948 return $return; 00949 } 00950 00955 function stream_write( $data ) { 00956 return false; 00957 } 00958 00962 function stream_tell() { 00963 return $this->mPosition; 00964 } 00965 00969 function stream_eof() { 00970 return $this->mSource->atEnd(); 00971 } 00972 00976 function url_stat() { 00977 $result = array(); 00978 00979 $result['dev'] = $result[0] = 0; 00980 $result['ino'] = $result[1] = 0; 00981 $result['mode'] = $result[2] = 0; 00982 $result['nlink'] = $result[3] = 0; 00983 $result['uid'] = $result[4] = 0; 00984 $result['gid'] = $result[5] = 0; 00985 $result['rdev'] = $result[6] = 0; 00986 $result['size'] = $result[7] = 0; 00987 $result['atime'] = $result[8] = 0; 00988 $result['mtime'] = $result[9] = 0; 00989 $result['ctime'] = $result[10] = 0; 00990 $result['blksize'] = $result[11] = 0; 00991 $result['blocks'] = $result[12] = 0; 00992 00993 return $result; 00994 } 00995 } 00996 00997 class XMLReader2 extends XMLReader { 00998 01002 function nodeContents() { 01003 if ( $this->isEmptyElement ) { 01004 return ""; 01005 } 01006 $buffer = ""; 01007 while ( $this->read() ) { 01008 switch ( $this->nodeType ) { 01009 case XmlReader::TEXT: 01010 case XmlReader::SIGNIFICANT_WHITESPACE: 01011 $buffer .= $this->value; 01012 break; 01013 case XmlReader::END_ELEMENT: 01014 return $buffer; 01015 } 01016 } 01017 return $this->close(); 01018 } 01019 } 01020 01025 class WikiRevision { 01026 var $importer = null; 01027 01031 var $title = null; 01032 var $id = 0; 01033 var $timestamp = "20010115000000"; 01034 var $user = 0; 01035 var $user_text = ""; 01036 var $model = null; 01037 var $format = null; 01038 var $text = ""; 01039 var $content = null; 01040 var $comment = ""; 01041 var $minor = false; 01042 var $type = ""; 01043 var $action = ""; 01044 var $params = ""; 01045 var $fileSrc = ''; 01046 var $sha1base36 = false; 01047 var $isTemp = false; 01048 var $archiveName = ''; 01049 var $fileIsTemp; 01050 private $mNoUpdates = false; 01051 01056 function setTitle( $title ) { 01057 if ( is_object( $title ) ) { 01058 $this->title = $title; 01059 } elseif ( is_null( $title ) ) { 01060 throw new MWException( "WikiRevision given a null title in import. You may need to adjust \$wgLegalTitleChars." ); 01061 } else { 01062 throw new MWException( "WikiRevision given non-object title in import." ); 01063 } 01064 } 01065 01069 function setID( $id ) { 01070 $this->id = $id; 01071 } 01072 01076 function setTimestamp( $ts ) { 01077 # 2003-08-05T18:30:02Z 01078 $this->timestamp = wfTimestamp( TS_MW, $ts ); 01079 } 01080 01084 function setUsername( $user ) { 01085 $this->user_text = $user; 01086 } 01087 01091 function setUserIP( $ip ) { 01092 $this->user_text = $ip; 01093 } 01094 01098 function setModel( $model ) { 01099 $this->model = $model; 01100 } 01101 01105 function setFormat( $format ) { 01106 $this->format = $format; 01107 } 01108 01112 function setText( $text ) { 01113 $this->text = $text; 01114 } 01115 01119 function setComment( $text ) { 01120 $this->comment = $text; 01121 } 01122 01126 function setMinor( $minor ) { 01127 $this->minor = (bool)$minor; 01128 } 01129 01133 function setSrc( $src ) { 01134 $this->src = $src; 01135 } 01136 01141 function setFileSrc( $src, $isTemp ) { 01142 $this->fileSrc = $src; 01143 $this->fileIsTemp = $isTemp; 01144 } 01145 01149 function setSha1Base36( $sha1base36 ) { 01150 $this->sha1base36 = $sha1base36; 01151 } 01152 01156 function setFilename( $filename ) { 01157 $this->filename = $filename; 01158 } 01159 01163 function setArchiveName( $archiveName ) { 01164 $this->archiveName = $archiveName; 01165 } 01166 01170 function setSize( $size ) { 01171 $this->size = intval( $size ); 01172 } 01173 01177 function setType( $type ) { 01178 $this->type = $type; 01179 } 01180 01184 function setAction( $action ) { 01185 $this->action = $action; 01186 } 01187 01191 function setParams( $params ) { 01192 $this->params = $params; 01193 } 01194 01198 public function setNoUpdates( $noupdates ) { 01199 $this->mNoUpdates = $noupdates; 01200 } 01201 01205 function getTitle() { 01206 return $this->title; 01207 } 01208 01212 function getID() { 01213 return $this->id; 01214 } 01215 01219 function getTimestamp() { 01220 return $this->timestamp; 01221 } 01222 01226 function getUser() { 01227 return $this->user_text; 01228 } 01229 01235 function getText() { 01236 ContentHandler::deprecated( __METHOD__, '1.21' ); 01237 01238 return $this->text; 01239 } 01240 01244 function getContent() { 01245 if ( is_null( $this->content ) ) { 01246 $this->content = 01247 ContentHandler::makeContent( 01248 $this->text, 01249 $this->getTitle(), 01250 $this->getModel(), 01251 $this->getFormat() 01252 ); 01253 } 01254 01255 return $this->content; 01256 } 01257 01261 function getModel() { 01262 if ( is_null( $this->model ) ) { 01263 $this->model = $this->getTitle()->getContentModel(); 01264 } 01265 01266 return $this->model; 01267 } 01268 01272 function getFormat() { 01273 if ( is_null( $this->model ) ) { 01274 $this->format = ContentHandler::getForTitle( $this->getTitle() )->getDefaultFormat(); 01275 } 01276 01277 return $this->format; 01278 } 01279 01283 function getComment() { 01284 return $this->comment; 01285 } 01286 01290 function getMinor() { 01291 return $this->minor; 01292 } 01293 01297 function getSrc() { 01298 return $this->src; 01299 } 01300 01304 function getSha1() { 01305 if ( $this->sha1base36 ) { 01306 return wfBaseConvert( $this->sha1base36, 36, 16 ); 01307 } 01308 return false; 01309 } 01310 01314 function getFileSrc() { 01315 return $this->fileSrc; 01316 } 01317 01321 function isTempSrc() { 01322 return $this->isTemp; 01323 } 01324 01328 function getFilename() { 01329 return $this->filename; 01330 } 01331 01335 function getArchiveName() { 01336 return $this->archiveName; 01337 } 01338 01342 function getSize() { 01343 return $this->size; 01344 } 01345 01349 function getType() { 01350 return $this->type; 01351 } 01352 01356 function getAction() { 01357 return $this->action; 01358 } 01359 01363 function getParams() { 01364 return $this->params; 01365 } 01366 01370 function importOldRevision() { 01371 $dbw = wfGetDB( DB_MASTER ); 01372 01373 # Sneak a single revision into place 01374 $user = User::newFromName( $this->getUser() ); 01375 if ( $user ) { 01376 $userId = intval( $user->getId() ); 01377 $userText = $user->getName(); 01378 $userObj = $user; 01379 } else { 01380 $userId = 0; 01381 $userText = $this->getUser(); 01382 $userObj = new User; 01383 } 01384 01385 // avoid memory leak...? 01386 $linkCache = LinkCache::singleton(); 01387 $linkCache->clear(); 01388 01389 $page = WikiPage::factory( $this->title ); 01390 if ( !$page->exists() ) { 01391 # must create the page... 01392 $pageId = $page->insertOn( $dbw ); 01393 $created = true; 01394 $oldcountable = null; 01395 } else { 01396 $pageId = $page->getId(); 01397 $created = false; 01398 01399 $prior = $dbw->selectField( 'revision', '1', 01400 array( 'rev_page' => $pageId, 01401 'rev_timestamp' => $dbw->timestamp( $this->timestamp ), 01402 'rev_user_text' => $userText, 01403 'rev_comment' => $this->getComment() ), 01404 __METHOD__ 01405 ); 01406 if ( $prior ) { 01407 // @todo FIXME: This could fail slightly for multiple matches :P 01408 wfDebug( __METHOD__ . ": skipping existing revision for [[" . 01409 $this->title->getPrefixedText() . "]], timestamp " . $this->timestamp . "\n" ); 01410 return false; 01411 } 01412 $oldcountable = $page->isCountable(); 01413 } 01414 01415 # @todo FIXME: Use original rev_id optionally (better for backups) 01416 # Insert the row 01417 $revision = new Revision( array( 01418 'title' => $this->title, 01419 'page' => $pageId, 01420 'content_model' => $this->getModel(), 01421 'content_format' => $this->getFormat(), 01422 'text' => $this->getContent()->serialize( $this->getFormat() ), //XXX: just set 'content' => $this->getContent()? 01423 'comment' => $this->getComment(), 01424 'user' => $userId, 01425 'user_text' => $userText, 01426 'timestamp' => $this->timestamp, 01427 'minor_edit' => $this->minor, 01428 ) ); 01429 $revision->insertOn( $dbw ); 01430 $changed = $page->updateIfNewerOn( $dbw, $revision ); 01431 01432 if ( $changed !== false && !$this->mNoUpdates ) { 01433 wfDebug( __METHOD__ . ": running updates\n" ); 01434 $page->doEditUpdates( $revision, $userObj, array( 'created' => $created, 'oldcountable' => $oldcountable ) ); 01435 } 01436 01437 return true; 01438 } 01439 01443 function importLogItem() { 01444 $dbw = wfGetDB( DB_MASTER ); 01445 # @todo FIXME: This will not record autoblocks 01446 if ( !$this->getTitle() ) { 01447 wfDebug( __METHOD__ . ": skipping invalid {$this->type}/{$this->action} log time, timestamp " . 01448 $this->timestamp . "\n" ); 01449 return; 01450 } 01451 # Check if it exists already 01452 // @todo FIXME: Use original log ID (better for backups) 01453 $prior = $dbw->selectField( 'logging', '1', 01454 array( 'log_type' => $this->getType(), 01455 'log_action' => $this->getAction(), 01456 'log_timestamp' => $dbw->timestamp( $this->timestamp ), 01457 'log_namespace' => $this->getTitle()->getNamespace(), 01458 'log_title' => $this->getTitle()->getDBkey(), 01459 'log_comment' => $this->getComment(), 01460 #'log_user_text' => $this->user_text, 01461 'log_params' => $this->params ), 01462 __METHOD__ 01463 ); 01464 // @todo FIXME: This could fail slightly for multiple matches :P 01465 if ( $prior ) { 01466 wfDebug( __METHOD__ . ": skipping existing item for Log:{$this->type}/{$this->action}, timestamp " . 01467 $this->timestamp . "\n" ); 01468 return; 01469 } 01470 $log_id = $dbw->nextSequenceValue( 'logging_log_id_seq' ); 01471 $data = array( 01472 'log_id' => $log_id, 01473 'log_type' => $this->type, 01474 'log_action' => $this->action, 01475 'log_timestamp' => $dbw->timestamp( $this->timestamp ), 01476 'log_user' => User::idFromName( $this->user_text ), 01477 #'log_user_text' => $this->user_text, 01478 'log_namespace' => $this->getTitle()->getNamespace(), 01479 'log_title' => $this->getTitle()->getDBkey(), 01480 'log_comment' => $this->getComment(), 01481 'log_params' => $this->params 01482 ); 01483 $dbw->insert( 'logging', $data, __METHOD__ ); 01484 } 01485 01489 function importUpload() { 01490 # Construct a file 01491 $archiveName = $this->getArchiveName(); 01492 if ( $archiveName ) { 01493 wfDebug( __METHOD__ . "Importing archived file as $archiveName\n" ); 01494 $file = OldLocalFile::newFromArchiveName( $this->getTitle(), 01495 RepoGroup::singleton()->getLocalRepo(), $archiveName ); 01496 } else { 01497 $file = wfLocalFile( $this->getTitle() ); 01498 wfDebug( __METHOD__ . 'Importing new file as ' . $file->getName() . "\n" ); 01499 if ( $file->exists() && $file->getTimestamp() > $this->getTimestamp() ) { 01500 $archiveName = $file->getTimestamp() . '!' . $file->getName(); 01501 $file = OldLocalFile::newFromArchiveName( $this->getTitle(), 01502 RepoGroup::singleton()->getLocalRepo(), $archiveName ); 01503 wfDebug( __METHOD__ . "File already exists; importing as $archiveName\n" ); 01504 } 01505 } 01506 if ( !$file ) { 01507 wfDebug( __METHOD__ . ': Bad file for ' . $this->getTitle() . "\n" ); 01508 return false; 01509 } 01510 01511 # Get the file source or download if necessary 01512 $source = $this->getFileSrc(); 01513 $flags = $this->isTempSrc() ? File::DELETE_SOURCE : 0; 01514 if ( !$source ) { 01515 $source = $this->downloadSource(); 01516 $flags |= File::DELETE_SOURCE; 01517 } 01518 if ( !$source ) { 01519 wfDebug( __METHOD__ . ": Could not fetch remote file.\n" ); 01520 return false; 01521 } 01522 $sha1 = $this->getSha1(); 01523 if ( $sha1 && ( $sha1 !== sha1_file( $source ) ) ) { 01524 if ( $flags & File::DELETE_SOURCE ) { 01525 # Broken file; delete it if it is a temporary file 01526 unlink( $source ); 01527 } 01528 wfDebug( __METHOD__ . ": Corrupt file $source.\n" ); 01529 return false; 01530 } 01531 01532 $user = User::newFromName( $this->user_text ); 01533 01534 # Do the actual upload 01535 if ( $archiveName ) { 01536 $status = $file->uploadOld( $source, $archiveName, 01537 $this->getTimestamp(), $this->getComment(), $user, $flags ); 01538 } else { 01539 $status = $file->upload( $source, $this->getComment(), $this->getComment(), 01540 $flags, false, $this->getTimestamp(), $user ); 01541 } 01542 01543 if ( $status->isGood() ) { 01544 wfDebug( __METHOD__ . ": Successful\n" ); 01545 return true; 01546 } else { 01547 wfDebug( __METHOD__ . ': failed: ' . $status->getXml() . "\n" ); 01548 return false; 01549 } 01550 } 01551 01555 function downloadSource() { 01556 global $wgEnableUploads; 01557 if ( !$wgEnableUploads ) { 01558 return false; 01559 } 01560 01561 $tempo = tempnam( wfTempDir(), 'download' ); 01562 $f = fopen( $tempo, 'wb' ); 01563 if ( !$f ) { 01564 wfDebug( "IMPORT: couldn't write to temp file $tempo\n" ); 01565 return false; 01566 } 01567 01568 // @todo FIXME! 01569 $src = $this->getSrc(); 01570 $data = Http::get( $src ); 01571 if ( !$data ) { 01572 wfDebug( "IMPORT: couldn't fetch source $src\n" ); 01573 fclose( $f ); 01574 unlink( $tempo ); 01575 return false; 01576 } 01577 01578 fwrite( $f, $data ); 01579 fclose( $f ); 01580 01581 return $tempo; 01582 } 01583 01584 } 01585 01590 class ImportStringSource { 01591 function __construct( $string ) { 01592 $this->mString = $string; 01593 $this->mRead = false; 01594 } 01595 01599 function atEnd() { 01600 return $this->mRead; 01601 } 01602 01606 function readChunk() { 01607 if ( $this->atEnd() ) { 01608 return false; 01609 } 01610 $this->mRead = true; 01611 return $this->mString; 01612 } 01613 } 01614 01619 class ImportStreamSource { 01620 function __construct( $handle ) { 01621 $this->mHandle = $handle; 01622 } 01623 01627 function atEnd() { 01628 return feof( $this->mHandle ); 01629 } 01630 01634 function readChunk() { 01635 return fread( $this->mHandle, 32768 ); 01636 } 01637 01642 static function newFromFile( $filename ) { 01643 wfSuppressWarnings(); 01644 $file = fopen( $filename, 'rt' ); 01645 wfRestoreWarnings(); 01646 if ( !$file ) { 01647 return Status::newFatal( "importcantopen" ); 01648 } 01649 return Status::newGood( new ImportStreamSource( $file ) ); 01650 } 01651 01656 static function newFromUpload( $fieldname = "xmlimport" ) { 01657 $upload =& $_FILES[$fieldname]; 01658 01659 if ( $upload === null || !$upload['name'] ) { 01660 return Status::newFatal( 'importnofile' ); 01661 } 01662 if ( !empty( $upload['error'] ) ) { 01663 switch ( $upload['error'] ) { 01664 case 1: # The uploaded file exceeds the upload_max_filesize directive in php.ini. 01665 return Status::newFatal( 'importuploaderrorsize' ); 01666 case 2: # The uploaded file exceeds the MAX_FILE_SIZE directive that was specified in the HTML form. 01667 return Status::newFatal( 'importuploaderrorsize' ); 01668 case 3: # The uploaded file was only partially uploaded 01669 return Status::newFatal( 'importuploaderrorpartial' ); 01670 case 6: #Missing a temporary folder. 01671 return Status::newFatal( 'importuploaderrortemp' ); 01672 # case else: # Currently impossible 01673 } 01674 01675 } 01676 $fname = $upload['tmp_name']; 01677 if ( is_uploaded_file( $fname ) ) { 01678 return ImportStreamSource::newFromFile( $fname ); 01679 } else { 01680 return Status::newFatal( 'importnofile' ); 01681 } 01682 } 01683 01689 static function newFromURL( $url, $method = 'GET' ) { 01690 wfDebug( __METHOD__ . ": opening $url\n" ); 01691 # Use the standard HTTP fetch function; it times out 01692 # quicker and sorts out user-agent problems which might 01693 # otherwise prevent importing from large sites, such 01694 # as the Wikimedia cluster, etc. 01695 $data = Http::request( $method, $url, array( 'followRedirects' => true ) ); 01696 if ( $data !== false ) { 01697 $file = tmpfile(); 01698 fwrite( $file, $data ); 01699 fflush( $file ); 01700 fseek( $file, 0 ); 01701 return Status::newGood( new ImportStreamSource( $file ) ); 01702 } else { 01703 return Status::newFatal( 'importcantopen' ); 01704 } 01705 } 01706 01715 public static function newFromInterwiki( $interwiki, $page, $history = false, $templates = false, $pageLinkDepth = 0 ) { 01716 if ( $page == '' ) { 01717 return Status::newFatal( 'import-noarticle' ); 01718 } 01719 $link = Title::newFromText( "$interwiki:Special:Export/$page" ); 01720 if ( is_null( $link ) || $link->getInterwiki() == '' ) { 01721 return Status::newFatal( 'importbadinterwiki' ); 01722 } else { 01723 $params = array(); 01724 if ( $history ) { 01725 $params['history'] = 1; 01726 } 01727 if ( $templates ) { 01728 $params['templates'] = 1; 01729 } 01730 if ( $pageLinkDepth ) { 01731 $params['pagelink-depth'] = $pageLinkDepth; 01732 } 01733 $url = $link->getFullURL( $params ); 01734 # For interwikis, use POST to avoid redirects. 01735 return ImportStreamSource::newFromURL( $url, "POST" ); 01736 } 01737 } 01738 }