MediaWiki
REL1_19
|
00001 <?php 00033 class WikiImporter { 00034 private $reader = null; 00035 private $mLogItemCallback, $mUploadCallback, $mRevisionCallback, $mPageCallback; 00036 private $mSiteInfoCallback, $mTargetNamespace, $mPageOutCallback; 00037 private $mNoticeCallback, $mDebug; 00038 private $mImportUploads, $mImageBasePath; 00039 private $mNoUpdates = false; 00040 00045 function __construct( $source ) { 00046 $this->reader = new XMLReader(); 00047 00048 stream_wrapper_register( 'uploadsource', 'UploadSourceAdapter' ); 00049 $id = UploadSourceAdapter::registerSource( $source ); 00050 if (defined( 'LIBXML_PARSEHUGE' ) ) { 00051 $this->reader->open( "uploadsource://$id", null, LIBXML_PARSEHUGE ); 00052 } else { 00053 $this->reader->open( "uploadsource://$id" ); 00054 } 00055 00056 // Default callbacks 00057 $this->setRevisionCallback( array( $this, "importRevision" ) ); 00058 $this->setUploadCallback( array( $this, 'importUpload' ) ); 00059 $this->setLogItemCallback( array( $this, 'importLogItem' ) ); 00060 $this->setPageOutCallback( array( $this, 'finishImportPage' ) ); 00061 } 00062 00063 private function throwXmlError( $err ) { 00064 $this->debug( "FAILURE: $err" ); 00065 wfDebug( "WikiImporter XML error: $err\n" ); 00066 } 00067 00068 private function debug( $data ) { 00069 if( $this->mDebug ) { 00070 wfDebug( "IMPORT: $data\n" ); 00071 } 00072 } 00073 00074 private function warn( $data ) { 00075 wfDebug( "IMPORT: $data\n" ); 00076 } 00077 00078 private function notice( $msg /*, $param, ...*/ ) { 00079 $params = func_get_args(); 00080 array_shift( $params ); 00081 00082 if ( is_callable( $this->mNoticeCallback ) ) { 00083 call_user_func( $this->mNoticeCallback, $msg, $params ); 00084 } else { # No ImportReporter -> CLI 00085 echo wfMessage( $msg, $params )->text() . "\n"; 00086 } 00087 } 00088 00093 function setDebug( $debug ) { 00094 $this->mDebug = $debug; 00095 } 00096 00101 function setNoUpdates( $noupdates ) { 00102 $this->mNoUpdates = $noupdates; 00103 } 00104 00111 public function setNoticeCallback( $callback ) { 00112 return wfSetVar( $this->mNoticeCallback, $callback ); 00113 } 00114 00120 public function setPageCallback( $callback ) { 00121 $previous = $this->mPageCallback; 00122 $this->mPageCallback = $callback; 00123 return $previous; 00124 } 00125 00135 public function setPageOutCallback( $callback ) { 00136 $previous = $this->mPageOutCallback; 00137 $this->mPageOutCallback = $callback; 00138 return $previous; 00139 } 00140 00146 public function setRevisionCallback( $callback ) { 00147 $previous = $this->mRevisionCallback; 00148 $this->mRevisionCallback = $callback; 00149 return $previous; 00150 } 00151 00157 public function setUploadCallback( $callback ) { 00158 $previous = $this->mUploadCallback; 00159 $this->mUploadCallback = $callback; 00160 return $previous; 00161 } 00162 00168 public function setLogItemCallback( $callback ) { 00169 $previous = $this->mLogItemCallback; 00170 $this->mLogItemCallback = $callback; 00171 return $previous; 00172 } 00173 00179 public function setSiteInfoCallback( $callback ) { 00180 $previous = $this->mSiteInfoCallback; 00181 $this->mSiteInfoCallback = $callback; 00182 return $previous; 00183 } 00184 00190 public function setTargetNamespace( $namespace ) { 00191 if( is_null( $namespace ) ) { 00192 // Don't override namespaces 00193 $this->mTargetNamespace = null; 00194 } elseif( $namespace >= 0 ) { 00195 // @todo FIXME: Check for validity 00196 $this->mTargetNamespace = intval( $namespace ); 00197 } else { 00198 return false; 00199 } 00200 } 00201 00205 public function setImageBasePath( $dir ) { 00206 $this->mImageBasePath = $dir; 00207 } 00208 00212 public function setImportUploads( $import ) { 00213 $this->mImportUploads = $import; 00214 } 00215 00221 public function importRevision( $revision ) { 00222 $dbw = wfGetDB( DB_MASTER ); 00223 return $dbw->deadlockLoop( array( $revision, 'importOldRevision' ) ); 00224 } 00225 00231 public function importLogItem( $rev ) { 00232 $dbw = wfGetDB( DB_MASTER ); 00233 return $dbw->deadlockLoop( array( $rev, 'importLogItem' ) ); 00234 } 00235 00241 public function importUpload( $revision ) { 00242 $dbw = wfGetDB( DB_MASTER ); 00243 return $dbw->deadlockLoop( array( $revision, 'importUpload' ) ); 00244 } 00245 00255 public function finishImportPage( $title, $origTitle, $revCount, $sRevCount, $pageInfo ) { 00256 $args = func_get_args(); 00257 return wfRunHooks( 'AfterImportPage', $args ); 00258 } 00259 00264 public function debugRevisionHandler( &$revision ) { 00265 $this->debug( "Got revision:" ); 00266 if( is_object( $revision->title ) ) { 00267 $this->debug( "-- Title: " . $revision->title->getPrefixedText() ); 00268 } else { 00269 $this->debug( "-- Title: <invalid>" ); 00270 } 00271 $this->debug( "-- User: " . $revision->user_text ); 00272 $this->debug( "-- Timestamp: " . $revision->timestamp ); 00273 $this->debug( "-- Comment: " . $revision->comment ); 00274 $this->debug( "-- Text: " . $revision->text ); 00275 } 00276 00281 function pageCallback( $title ) { 00282 if( isset( $this->mPageCallback ) ) { 00283 call_user_func( $this->mPageCallback, $title ); 00284 } 00285 } 00286 00295 private function pageOutCallback( $title, $origTitle, $revCount, $sucCount, $pageInfo ) { 00296 if( isset( $this->mPageOutCallback ) ) { 00297 $args = func_get_args(); 00298 call_user_func_array( $this->mPageOutCallback, $args ); 00299 } 00300 } 00301 00306 private function revisionCallback( $revision ) { 00307 if ( isset( $this->mRevisionCallback ) ) { 00308 return call_user_func_array( $this->mRevisionCallback, 00309 array( $revision, $this ) ); 00310 } else { 00311 return false; 00312 } 00313 } 00314 00319 private function logItemCallback( $revision ) { 00320 if ( isset( $this->mLogItemCallback ) ) { 00321 return call_user_func_array( $this->mLogItemCallback, 00322 array( $revision, $this ) ); 00323 } else { 00324 return false; 00325 } 00326 } 00327 00335 private function nodeContents() { 00336 if( $this->reader->isEmptyElement ) { 00337 return ""; 00338 } 00339 $buffer = ""; 00340 while( $this->reader->read() ) { 00341 switch( $this->reader->nodeType ) { 00342 case XmlReader::TEXT: 00343 case XmlReader::SIGNIFICANT_WHITESPACE: 00344 $buffer .= $this->reader->value; 00345 break; 00346 case XmlReader::END_ELEMENT: 00347 return $buffer; 00348 } 00349 } 00350 00351 $this->reader->close(); 00352 return ''; 00353 } 00354 00355 # -------------- 00356 00358 private function dumpElement() { 00359 static $lookup = null; 00360 if (!$lookup) { 00361 $xmlReaderConstants = array( 00362 "NONE", 00363 "ELEMENT", 00364 "ATTRIBUTE", 00365 "TEXT", 00366 "CDATA", 00367 "ENTITY_REF", 00368 "ENTITY", 00369 "PI", 00370 "COMMENT", 00371 "DOC", 00372 "DOC_TYPE", 00373 "DOC_FRAGMENT", 00374 "NOTATION", 00375 "WHITESPACE", 00376 "SIGNIFICANT_WHITESPACE", 00377 "END_ELEMENT", 00378 "END_ENTITY", 00379 "XML_DECLARATION", 00380 ); 00381 $lookup = array(); 00382 00383 foreach( $xmlReaderConstants as $name ) { 00384 $lookup[constant("XmlReader::$name")] = $name; 00385 } 00386 } 00387 00388 print( var_dump( 00389 $lookup[$this->reader->nodeType], 00390 $this->reader->name, 00391 $this->reader->value 00392 )."\n\n" ); 00393 } 00394 00398 public function doImport() { 00399 00400 // Calls to reader->read need to be wrapped in calls to 00401 // libxml_disable_entity_loader() to avoid local file 00402 // inclusion attacks (bug 46932). 00403 $oldDisable = libxml_disable_entity_loader( true ); 00404 $this->reader->read(); 00405 00406 if ( $this->reader->name != 'mediawiki' ) { 00407 libxml_disable_entity_loader( $oldDisable ); 00408 throw new MWException( "Expected <mediawiki> tag, got " . 00409 $this->reader->name ); 00410 } 00411 $this->debug( "<mediawiki> tag is correct." ); 00412 00413 $this->debug( "Starting primary dump processing loop." ); 00414 00415 $keepReading = $this->reader->read(); 00416 $skip = false; 00417 while ( $keepReading ) { 00418 $tag = $this->reader->name; 00419 $type = $this->reader->nodeType; 00420 00421 if ( !wfRunHooks( 'ImportHandleToplevelXMLTag', $this ) ) { 00422 // Do nothing 00423 } elseif ( $tag == 'mediawiki' && $type == XmlReader::END_ELEMENT ) { 00424 break; 00425 } elseif ( $tag == 'siteinfo' ) { 00426 $this->handleSiteInfo(); 00427 } elseif ( $tag == 'page' ) { 00428 $this->handlePage(); 00429 } elseif ( $tag == 'logitem' ) { 00430 $this->handleLogItem(); 00431 } elseif ( $tag != '#text' ) { 00432 $this->warn( "Unhandled top-level XML tag $tag" ); 00433 00434 $skip = true; 00435 } 00436 00437 if ($skip) { 00438 $keepReading = $this->reader->next(); 00439 $skip = false; 00440 $this->debug( "Skip" ); 00441 } else { 00442 $keepReading = $this->reader->read(); 00443 } 00444 } 00445 00446 libxml_disable_entity_loader( $oldDisable ); 00447 return true; 00448 } 00449 00454 private function handleSiteInfo() { 00455 // Site info is useful, but not actually used for dump imports. 00456 // Includes a quick short-circuit to save performance. 00457 if ( ! $this->mSiteInfoCallback ) { 00458 $this->reader->next(); 00459 return true; 00460 } 00461 throw new MWException( "SiteInfo tag is not yet handled, do not set mSiteInfoCallback" ); 00462 } 00463 00464 private function handleLogItem() { 00465 $this->debug( "Enter log item handler." ); 00466 $logInfo = array(); 00467 00468 // Fields that can just be stuffed in the pageInfo object 00469 $normalFields = array( 'id', 'comment', 'type', 'action', 'timestamp', 00470 'logtitle', 'params' ); 00471 00472 while ( $this->reader->read() ) { 00473 if ( $this->reader->nodeType == XmlReader::END_ELEMENT && 00474 $this->reader->name == 'logitem') { 00475 break; 00476 } 00477 00478 $tag = $this->reader->name; 00479 00480 if ( !wfRunHooks( 'ImportHandleLogItemXMLTag', 00481 $this, $logInfo ) ) { 00482 // Do nothing 00483 } elseif ( in_array( $tag, $normalFields ) ) { 00484 $logInfo[$tag] = $this->nodeContents(); 00485 } elseif ( $tag == 'contributor' ) { 00486 $logInfo['contributor'] = $this->handleContributor(); 00487 } elseif ( $tag != '#text' ) { 00488 $this->warn( "Unhandled log-item XML tag $tag" ); 00489 } 00490 } 00491 00492 $this->processLogItem( $logInfo ); 00493 } 00494 00499 private function processLogItem( $logInfo ) { 00500 $revision = new WikiRevision; 00501 00502 $revision->setID( $logInfo['id'] ); 00503 $revision->setType( $logInfo['type'] ); 00504 $revision->setAction( $logInfo['action'] ); 00505 $revision->setTimestamp( $logInfo['timestamp'] ); 00506 $revision->setParams( $logInfo['params'] ); 00507 $revision->setTitle( Title::newFromText( $logInfo['logtitle'] ) ); 00508 $revision->setNoUpdates( $this->mNoUpdates ); 00509 00510 if ( isset( $logInfo['comment'] ) ) { 00511 $revision->setComment( $logInfo['comment'] ); 00512 } 00513 00514 if ( isset( $logInfo['contributor']['ip'] ) ) { 00515 $revision->setUserIP( $logInfo['contributor']['ip'] ); 00516 } 00517 if ( isset( $logInfo['contributor']['username'] ) ) { 00518 $revision->setUserName( $logInfo['contributor']['username'] ); 00519 } 00520 00521 return $this->logItemCallback( $revision ); 00522 } 00523 00524 private function handlePage() { 00525 // Handle page data. 00526 $this->debug( "Enter page handler." ); 00527 $pageInfo = array( 'revisionCount' => 0, 'successfulRevisionCount' => 0 ); 00528 00529 // Fields that can just be stuffed in the pageInfo object 00530 $normalFields = array( 'title', 'id', 'redirect', 'restrictions' ); 00531 00532 $skip = false; 00533 $badTitle = false; 00534 00535 while ( $skip ? $this->reader->next() : $this->reader->read() ) { 00536 if ( $this->reader->nodeType == XmlReader::END_ELEMENT && 00537 $this->reader->name == 'page') { 00538 break; 00539 } 00540 00541 $tag = $this->reader->name; 00542 00543 if ( $badTitle ) { 00544 // The title is invalid, bail out of this page 00545 $skip = true; 00546 } elseif ( !wfRunHooks( 'ImportHandlePageXMLTag', array( $this, 00547 &$pageInfo ) ) ) { 00548 // Do nothing 00549 } elseif ( in_array( $tag, $normalFields ) ) { 00550 $pageInfo[$tag] = $this->nodeContents(); 00551 if ( $tag == 'title' ) { 00552 $title = $this->processTitle( $pageInfo['title'] ); 00553 00554 if ( !$title ) { 00555 $badTitle = true; 00556 $skip = true; 00557 } 00558 00559 $this->pageCallback( $title ); 00560 list( $pageInfo['_title'], $origTitle ) = $title; 00561 } 00562 } elseif ( $tag == 'revision' ) { 00563 $this->handleRevision( $pageInfo ); 00564 } elseif ( $tag == 'upload' ) { 00565 $this->handleUpload( $pageInfo ); 00566 } elseif ( $tag != '#text' ) { 00567 $this->warn( "Unhandled page XML tag $tag" ); 00568 $skip = true; 00569 } 00570 } 00571 00572 $this->pageOutCallback( $pageInfo['_title'], $origTitle, 00573 $pageInfo['revisionCount'], 00574 $pageInfo['successfulRevisionCount'], 00575 $pageInfo ); 00576 } 00577 00581 private function handleRevision( &$pageInfo ) { 00582 $this->debug( "Enter revision handler" ); 00583 $revisionInfo = array(); 00584 00585 $normalFields = array( 'id', 'timestamp', 'comment', 'minor', 'text' ); 00586 00587 $skip = false; 00588 00589 while ( $skip ? $this->reader->next() : $this->reader->read() ) { 00590 if ( $this->reader->nodeType == XmlReader::END_ELEMENT && 00591 $this->reader->name == 'revision') { 00592 break; 00593 } 00594 00595 $tag = $this->reader->name; 00596 00597 if ( !wfRunHooks( 'ImportHandleRevisionXMLTag', $this, 00598 $pageInfo, $revisionInfo ) ) { 00599 // Do nothing 00600 } elseif ( in_array( $tag, $normalFields ) ) { 00601 $revisionInfo[$tag] = $this->nodeContents(); 00602 } elseif ( $tag == 'contributor' ) { 00603 $revisionInfo['contributor'] = $this->handleContributor(); 00604 } elseif ( $tag != '#text' ) { 00605 $this->warn( "Unhandled revision XML tag $tag" ); 00606 $skip = true; 00607 } 00608 } 00609 00610 $pageInfo['revisionCount']++; 00611 if ( $this->processRevision( $pageInfo, $revisionInfo ) ) { 00612 $pageInfo['successfulRevisionCount']++; 00613 } 00614 } 00615 00621 private function processRevision( $pageInfo, $revisionInfo ) { 00622 $revision = new WikiRevision; 00623 00624 if( isset( $revisionInfo['id'] ) ) { 00625 $revision->setID( $revisionInfo['id'] ); 00626 } 00627 if ( isset( $revisionInfo['text'] ) ) { 00628 $revision->setText( $revisionInfo['text'] ); 00629 } 00630 $revision->setTitle( $pageInfo['_title'] ); 00631 00632 if ( isset( $revisionInfo['timestamp'] ) ) { 00633 $revision->setTimestamp( $revisionInfo['timestamp'] ); 00634 } else { 00635 $revision->setTimestamp( wfTimestampNow() ); 00636 } 00637 00638 if ( isset( $revisionInfo['comment'] ) ) { 00639 $revision->setComment( $revisionInfo['comment'] ); 00640 } 00641 00642 if ( isset( $revisionInfo['minor'] ) ) { 00643 $revision->setMinor( true ); 00644 } 00645 if ( isset( $revisionInfo['contributor']['ip'] ) ) { 00646 $revision->setUserIP( $revisionInfo['contributor']['ip'] ); 00647 } 00648 if ( isset( $revisionInfo['contributor']['username'] ) ) { 00649 $revision->setUserName( $revisionInfo['contributor']['username'] ); 00650 } 00651 $revision->setNoUpdates( $this->mNoUpdates ); 00652 00653 return $this->revisionCallback( $revision ); 00654 } 00655 00660 private function handleUpload( &$pageInfo ) { 00661 $this->debug( "Enter upload handler" ); 00662 $uploadInfo = array(); 00663 00664 $normalFields = array( 'timestamp', 'comment', 'filename', 'text', 00665 'src', 'size', 'sha1base36', 'archivename', 'rel' ); 00666 00667 $skip = false; 00668 00669 while ( $skip ? $this->reader->next() : $this->reader->read() ) { 00670 if ( $this->reader->nodeType == XmlReader::END_ELEMENT && 00671 $this->reader->name == 'upload') { 00672 break; 00673 } 00674 00675 $tag = $this->reader->name; 00676 00677 if ( !wfRunHooks( 'ImportHandleUploadXMLTag', $this, 00678 $pageInfo ) ) { 00679 // Do nothing 00680 } elseif ( in_array( $tag, $normalFields ) ) { 00681 $uploadInfo[$tag] = $this->nodeContents(); 00682 } elseif ( $tag == 'contributor' ) { 00683 $uploadInfo['contributor'] = $this->handleContributor(); 00684 } elseif ( $tag == 'contents' ) { 00685 $contents = $this->nodeContents(); 00686 $encoding = $this->reader->getAttribute( 'encoding' ); 00687 if ( $encoding === 'base64' ) { 00688 $uploadInfo['fileSrc'] = $this->dumpTemp( base64_decode( $contents ) ); 00689 $uploadInfo['isTempSrc'] = true; 00690 } 00691 } elseif ( $tag != '#text' ) { 00692 $this->warn( "Unhandled upload XML tag $tag" ); 00693 $skip = true; 00694 } 00695 } 00696 00697 if ( $this->mImageBasePath && isset( $uploadInfo['rel'] ) ) { 00698 $path = "{$this->mImageBasePath}/{$uploadInfo['rel']}"; 00699 if ( file_exists( $path ) ) { 00700 $uploadInfo['fileSrc'] = $path; 00701 $uploadInfo['isTempSrc'] = false; 00702 } 00703 } 00704 00705 if ( $this->mImportUploads ) { 00706 return $this->processUpload( $pageInfo, $uploadInfo ); 00707 } 00708 } 00709 00714 private function dumpTemp( $contents ) { 00715 $filename = tempnam( wfTempDir(), 'importupload' ); 00716 file_put_contents( $filename, $contents ); 00717 return $filename; 00718 } 00719 00725 private function processUpload( $pageInfo, $uploadInfo ) { 00726 $revision = new WikiRevision; 00727 $text = isset( $uploadInfo['text'] ) ? $uploadInfo['text'] : ''; 00728 00729 $revision->setTitle( $pageInfo['_title'] ); 00730 $revision->setID( $pageInfo['id'] ); 00731 $revision->setTimestamp( $uploadInfo['timestamp'] ); 00732 $revision->setText( $text ); 00733 $revision->setFilename( $uploadInfo['filename'] ); 00734 if ( isset( $uploadInfo['archivename'] ) ) { 00735 $revision->setArchiveName( $uploadInfo['archivename'] ); 00736 } 00737 $revision->setSrc( $uploadInfo['src'] ); 00738 if ( isset( $uploadInfo['fileSrc'] ) ) { 00739 $revision->setFileSrc( $uploadInfo['fileSrc'], 00740 !empty( $uploadInfo['isTempSrc'] ) ); 00741 } 00742 if ( isset( $uploadInfo['sha1base36'] ) ) { 00743 $revision->setSha1Base36( $uploadInfo['sha1base36'] ); 00744 } 00745 $revision->setSize( intval( $uploadInfo['size'] ) ); 00746 $revision->setComment( $uploadInfo['comment'] ); 00747 00748 if ( isset( $uploadInfo['contributor']['ip'] ) ) { 00749 $revision->setUserIP( $uploadInfo['contributor']['ip'] ); 00750 } 00751 if ( isset( $uploadInfo['contributor']['username'] ) ) { 00752 $revision->setUserName( $uploadInfo['contributor']['username'] ); 00753 } 00754 $revision->setNoUpdates( $this->mNoUpdates ); 00755 00756 return call_user_func( $this->mUploadCallback, $revision ); 00757 } 00758 00762 private function handleContributor() { 00763 $fields = array( 'id', 'ip', 'username' ); 00764 $info = array(); 00765 00766 while ( $this->reader->read() ) { 00767 if ( $this->reader->nodeType == XmlReader::END_ELEMENT && 00768 $this->reader->name == 'contributor') { 00769 break; 00770 } 00771 00772 $tag = $this->reader->name; 00773 00774 if ( in_array( $tag, $fields ) ) { 00775 $info[$tag] = $this->nodeContents(); 00776 } 00777 } 00778 00779 return $info; 00780 } 00781 00786 private function processTitle( $text ) { 00787 global $wgCommandLineMode; 00788 00789 $workTitle = $text; 00790 $origTitle = Title::newFromText( $workTitle ); 00791 00792 if( !is_null( $this->mTargetNamespace ) && !is_null( $origTitle ) ) { 00793 $title = Title::makeTitle( $this->mTargetNamespace, 00794 $origTitle->getDBkey() ); 00795 } else { 00796 $title = Title::newFromText( $workTitle ); 00797 } 00798 00799 if( is_null( $title ) ) { 00800 # Invalid page title? Ignore the page 00801 $this->notice( 'import-error-invalid', $workTitle ); 00802 return false; 00803 } elseif( $title->isExternal() ) { 00804 $this->notice( 'import-error-interwiki', $title->getPrefixedText() ); 00805 return false; 00806 } elseif( !$title->canExist() ) { 00807 $this->notice( 'import-error-special', $title->getPrefixedText() ); 00808 return false; 00809 } elseif( !$title->userCan( 'edit' ) && !$wgCommandLineMode ) { 00810 # Do not import if the importing wiki user cannot edit this page 00811 $this->notice( 'import-error-edit', $title->getPrefixedText() ); 00812 return false; 00813 } elseif( !$title->exists() && !$title->userCan( 'create' ) && !$wgCommandLineMode ) { 00814 # Do not import if the importing wiki user cannot create this page 00815 $this->notice( 'import-error-create', $title->getPrefixedText() ); 00816 return false; 00817 } 00818 00819 return array( $title, $origTitle ); 00820 } 00821 } 00822 00824 class UploadSourceAdapter { 00825 static $sourceRegistrations = array(); 00826 00827 private $mSource; 00828 private $mBuffer; 00829 private $mPosition; 00830 00835 static function registerSource( $source ) { 00836 $id = wfGenerateToken(); 00837 00838 self::$sourceRegistrations[$id] = $source; 00839 00840 return $id; 00841 } 00842 00850 function stream_open( $path, $mode, $options, &$opened_path ) { 00851 $url = parse_url($path); 00852 $id = $url['host']; 00853 00854 if ( !isset( self::$sourceRegistrations[$id] ) ) { 00855 return false; 00856 } 00857 00858 $this->mSource = self::$sourceRegistrations[$id]; 00859 00860 return true; 00861 } 00862 00867 function stream_read( $count ) { 00868 $return = ''; 00869 $leave = false; 00870 00871 while ( !$leave && !$this->mSource->atEnd() && 00872 strlen($this->mBuffer) < $count ) { 00873 $read = $this->mSource->readChunk(); 00874 00875 if ( !strlen($read) ) { 00876 $leave = true; 00877 } 00878 00879 $this->mBuffer .= $read; 00880 } 00881 00882 if ( strlen($this->mBuffer) ) { 00883 $return = substr( $this->mBuffer, 0, $count ); 00884 $this->mBuffer = substr( $this->mBuffer, $count ); 00885 } 00886 00887 $this->mPosition += strlen($return); 00888 00889 return $return; 00890 } 00891 00896 function stream_write( $data ) { 00897 return false; 00898 } 00899 00903 function stream_tell() { 00904 return $this->mPosition; 00905 } 00906 00910 function stream_eof() { 00911 return $this->mSource->atEnd(); 00912 } 00913 00917 function url_stat() { 00918 $result = array(); 00919 00920 $result['dev'] = $result[0] = 0; 00921 $result['ino'] = $result[1] = 0; 00922 $result['mode'] = $result[2] = 0; 00923 $result['nlink'] = $result[3] = 0; 00924 $result['uid'] = $result[4] = 0; 00925 $result['gid'] = $result[5] = 0; 00926 $result['rdev'] = $result[6] = 0; 00927 $result['size'] = $result[7] = 0; 00928 $result['atime'] = $result[8] = 0; 00929 $result['mtime'] = $result[9] = 0; 00930 $result['ctime'] = $result[10] = 0; 00931 $result['blksize'] = $result[11] = 0; 00932 $result['blocks'] = $result[12] = 0; 00933 00934 return $result; 00935 } 00936 } 00937 00938 class XMLReader2 extends XMLReader { 00939 00943 function nodeContents() { 00944 if( $this->isEmptyElement ) { 00945 return ""; 00946 } 00947 $buffer = ""; 00948 while( $this->read() ) { 00949 switch( $this->nodeType ) { 00950 case XmlReader::TEXT: 00951 case XmlReader::SIGNIFICANT_WHITESPACE: 00952 $buffer .= $this->value; 00953 break; 00954 case XmlReader::END_ELEMENT: 00955 return $buffer; 00956 } 00957 } 00958 return $this->close(); 00959 } 00960 } 00961 00966 class WikiRevision { 00967 var $importer = null; 00968 00972 var $title = null; 00973 var $id = 0; 00974 var $timestamp = "20010115000000"; 00975 var $user = 0; 00976 var $user_text = ""; 00977 var $text = ""; 00978 var $comment = ""; 00979 var $minor = false; 00980 var $type = ""; 00981 var $action = ""; 00982 var $params = ""; 00983 var $fileSrc = ''; 00984 var $sha1base36 = false; 00985 var $isTemp = false; 00986 var $archiveName = ''; 00987 var $fileIsTemp; 00988 private $mNoUpdates = false; 00989 00994 function setTitle( $title ) { 00995 if( is_object( $title ) ) { 00996 $this->title = $title; 00997 } elseif( is_null( $title ) ) { 00998 throw new MWException( "WikiRevision given a null title in import. You may need to adjust \$wgLegalTitleChars." ); 00999 } else { 01000 throw new MWException( "WikiRevision given non-object title in import." ); 01001 } 01002 } 01003 01007 function setID( $id ) { 01008 $this->id = $id; 01009 } 01010 01014 function setTimestamp( $ts ) { 01015 # 2003-08-05T18:30:02Z 01016 $this->timestamp = wfTimestamp( TS_MW, $ts ); 01017 } 01018 01022 function setUsername( $user ) { 01023 $this->user_text = $user; 01024 } 01025 01029 function setUserIP( $ip ) { 01030 $this->user_text = $ip; 01031 } 01032 01036 function setText( $text ) { 01037 $this->text = $text; 01038 } 01039 01043 function setComment( $text ) { 01044 $this->comment = $text; 01045 } 01046 01050 function setMinor( $minor ) { 01051 $this->minor = (bool)$minor; 01052 } 01053 01057 function setSrc( $src ) { 01058 $this->src = $src; 01059 } 01060 01065 function setFileSrc( $src, $isTemp ) { 01066 $this->fileSrc = $src; 01067 $this->fileIsTemp = $isTemp; 01068 } 01069 01073 function setSha1Base36( $sha1base36 ) { 01074 $this->sha1base36 = $sha1base36; 01075 } 01076 01080 function setFilename( $filename ) { 01081 $this->filename = $filename; 01082 } 01083 01087 function setArchiveName( $archiveName ) { 01088 $this->archiveName = $archiveName; 01089 } 01090 01094 function setSize( $size ) { 01095 $this->size = intval( $size ); 01096 } 01097 01101 function setType( $type ) { 01102 $this->type = $type; 01103 } 01104 01108 function setAction( $action ) { 01109 $this->action = $action; 01110 } 01111 01115 function setParams( $params ) { 01116 $this->params = $params; 01117 } 01118 01122 public function setNoUpdates( $noupdates ) { 01123 $this->mNoUpdates = $noupdates; 01124 } 01125 01129 function getTitle() { 01130 return $this->title; 01131 } 01132 01136 function getID() { 01137 return $this->id; 01138 } 01139 01143 function getTimestamp() { 01144 return $this->timestamp; 01145 } 01146 01150 function getUser() { 01151 return $this->user_text; 01152 } 01153 01157 function getText() { 01158 return $this->text; 01159 } 01160 01164 function getComment() { 01165 return $this->comment; 01166 } 01167 01171 function getMinor() { 01172 return $this->minor; 01173 } 01174 01178 function getSrc() { 01179 return $this->src; 01180 } 01181 01185 function getSha1() { 01186 if ( $this->sha1base36 ) { 01187 return wfBaseConvert( $this->sha1base36, 36, 16 ); 01188 } 01189 return false; 01190 } 01191 01195 function getFileSrc() { 01196 return $this->fileSrc; 01197 } 01198 01202 function isTempSrc() { 01203 return $this->isTemp; 01204 } 01205 01209 function getFilename() { 01210 return $this->filename; 01211 } 01212 01216 function getArchiveName() { 01217 return $this->archiveName; 01218 } 01219 01223 function getSize() { 01224 return $this->size; 01225 } 01226 01230 function getType() { 01231 return $this->type; 01232 } 01233 01237 function getAction() { 01238 return $this->action; 01239 } 01240 01244 function getParams() { 01245 return $this->params; 01246 } 01247 01251 function importOldRevision() { 01252 $dbw = wfGetDB( DB_MASTER ); 01253 01254 # Sneak a single revision into place 01255 $user = User::newFromName( $this->getUser() ); 01256 if( $user ) { 01257 $userId = intval( $user->getId() ); 01258 $userText = $user->getName(); 01259 $userObj = $user; 01260 } else { 01261 $userId = 0; 01262 $userText = $this->getUser(); 01263 $userObj = new User; 01264 } 01265 01266 // avoid memory leak...? 01267 $linkCache = LinkCache::singleton(); 01268 $linkCache->clear(); 01269 01270 $page = WikiPage::factory( $this->title ); 01271 if( !$page->exists() ) { 01272 # must create the page... 01273 $pageId = $page->insertOn( $dbw ); 01274 $created = true; 01275 $oldcountable = null; 01276 } else { 01277 $pageId = $page->getId(); 01278 $created = false; 01279 01280 $prior = $dbw->selectField( 'revision', '1', 01281 array( 'rev_page' => $pageId, 01282 'rev_timestamp' => $dbw->timestamp( $this->timestamp ), 01283 'rev_user_text' => $userText, 01284 'rev_comment' => $this->getComment() ), 01285 __METHOD__ 01286 ); 01287 if( $prior ) { 01288 // @todo FIXME: This could fail slightly for multiple matches :P 01289 wfDebug( __METHOD__ . ": skipping existing revision for [[" . 01290 $this->title->getPrefixedText() . "]], timestamp " . $this->timestamp . "\n" ); 01291 return false; 01292 } 01293 $oldcountable = $page->isCountable(); 01294 } 01295 01296 # @todo FIXME: Use original rev_id optionally (better for backups) 01297 # Insert the row 01298 $revision = new Revision( array( 01299 'page' => $pageId, 01300 'text' => $this->getText(), 01301 'comment' => $this->getComment(), 01302 'user' => $userId, 01303 'user_text' => $userText, 01304 'timestamp' => $this->timestamp, 01305 'minor_edit' => $this->minor, 01306 ) ); 01307 $revision->insertOn( $dbw ); 01308 $changed = $page->updateIfNewerOn( $dbw, $revision ); 01309 01310 if ( $changed !== false && !$this->mNoUpdates ) { 01311 wfDebug( __METHOD__ . ": running updates\n" ); 01312 $page->doEditUpdates( $revision, $userObj, array( 'created' => $created, 'oldcountable' => $oldcountable ) ); 01313 } 01314 01315 return true; 01316 } 01317 01321 function importLogItem() { 01322 $dbw = wfGetDB( DB_MASTER ); 01323 # @todo FIXME: This will not record autoblocks 01324 if( !$this->getTitle() ) { 01325 wfDebug( __METHOD__ . ": skipping invalid {$this->type}/{$this->action} log time, timestamp " . 01326 $this->timestamp . "\n" ); 01327 return; 01328 } 01329 # Check if it exists already 01330 // @todo FIXME: Use original log ID (better for backups) 01331 $prior = $dbw->selectField( 'logging', '1', 01332 array( 'log_type' => $this->getType(), 01333 'log_action' => $this->getAction(), 01334 'log_timestamp' => $dbw->timestamp( $this->timestamp ), 01335 'log_namespace' => $this->getTitle()->getNamespace(), 01336 'log_title' => $this->getTitle()->getDBkey(), 01337 'log_comment' => $this->getComment(), 01338 #'log_user_text' => $this->user_text, 01339 'log_params' => $this->params ), 01340 __METHOD__ 01341 ); 01342 // @todo FIXME: This could fail slightly for multiple matches :P 01343 if( $prior ) { 01344 wfDebug( __METHOD__ . ": skipping existing item for Log:{$this->type}/{$this->action}, timestamp " . 01345 $this->timestamp . "\n" ); 01346 return; 01347 } 01348 $log_id = $dbw->nextSequenceValue( 'logging_log_id_seq' ); 01349 $data = array( 01350 'log_id' => $log_id, 01351 'log_type' => $this->type, 01352 'log_action' => $this->action, 01353 'log_timestamp' => $dbw->timestamp( $this->timestamp ), 01354 'log_user' => User::idFromName( $this->user_text ), 01355 #'log_user_text' => $this->user_text, 01356 'log_namespace' => $this->getTitle()->getNamespace(), 01357 'log_title' => $this->getTitle()->getDBkey(), 01358 'log_comment' => $this->getComment(), 01359 'log_params' => $this->params 01360 ); 01361 $dbw->insert( 'logging', $data, __METHOD__ ); 01362 } 01363 01367 function importUpload() { 01368 # Construct a file 01369 $archiveName = $this->getArchiveName(); 01370 if ( $archiveName ) { 01371 wfDebug( __METHOD__ . "Importing archived file as $archiveName\n" ); 01372 $file = OldLocalFile::newFromArchiveName( $this->getTitle(), 01373 RepoGroup::singleton()->getLocalRepo(), $archiveName ); 01374 } else { 01375 $file = wfLocalFile( $this->getTitle() ); 01376 wfDebug( __METHOD__ . 'Importing new file as ' . $file->getName() . "\n" ); 01377 if ( $file->exists() && $file->getTimestamp() > $this->getTimestamp() ) { 01378 $archiveName = $file->getTimestamp() . '!' . $file->getName(); 01379 $file = OldLocalFile::newFromArchiveName( $this->getTitle(), 01380 RepoGroup::singleton()->getLocalRepo(), $archiveName ); 01381 wfDebug( __METHOD__ . "File already exists; importing as $archiveName\n" ); 01382 } 01383 } 01384 if( !$file ) { 01385 wfDebug( __METHOD__ . ': Bad file for ' . $this->getTitle() . "\n" ); 01386 return false; 01387 } 01388 01389 # Get the file source or download if necessary 01390 $source = $this->getFileSrc(); 01391 $flags = $this->isTempSrc() ? File::DELETE_SOURCE : 0; 01392 if ( !$source ) { 01393 $source = $this->downloadSource(); 01394 $flags |= File::DELETE_SOURCE; 01395 } 01396 if( !$source ) { 01397 wfDebug( __METHOD__ . ": Could not fetch remote file.\n" ); 01398 return false; 01399 } 01400 $sha1 = $this->getSha1(); 01401 if ( $sha1 && ( $sha1 !== sha1_file( $source ) ) ) { 01402 if ( $flags & File::DELETE_SOURCE ) { 01403 # Broken file; delete it if it is a temporary file 01404 unlink( $source ); 01405 } 01406 wfDebug( __METHOD__ . ": Corrupt file $source.\n" ); 01407 return false; 01408 } 01409 01410 $user = User::newFromName( $this->user_text ); 01411 01412 # Do the actual upload 01413 if ( $archiveName ) { 01414 $status = $file->uploadOld( $source, $archiveName, 01415 $this->getTimestamp(), $this->getComment(), $user, $flags ); 01416 } else { 01417 $status = $file->upload( $source, $this->getComment(), $this->getComment(), 01418 $flags, false, $this->getTimestamp(), $user ); 01419 } 01420 01421 if ( $status->isGood() ) { 01422 wfDebug( __METHOD__ . ": Succesful\n" ); 01423 return true; 01424 } else { 01425 wfDebug( __METHOD__ . ': failed: ' . $status->getXml() . "\n" ); 01426 return false; 01427 } 01428 } 01429 01433 function downloadSource() { 01434 global $wgEnableUploads; 01435 if( !$wgEnableUploads ) { 01436 return false; 01437 } 01438 01439 $tempo = tempnam( wfTempDir(), 'download' ); 01440 $f = fopen( $tempo, 'wb' ); 01441 if( !$f ) { 01442 wfDebug( "IMPORT: couldn't write to temp file $tempo\n" ); 01443 return false; 01444 } 01445 01446 // @todo FIXME! 01447 $src = $this->getSrc(); 01448 $data = Http::get( $src ); 01449 if( !$data ) { 01450 wfDebug( "IMPORT: couldn't fetch source $src\n" ); 01451 fclose( $f ); 01452 unlink( $tempo ); 01453 return false; 01454 } 01455 01456 fwrite( $f, $data ); 01457 fclose( $f ); 01458 01459 return $tempo; 01460 } 01461 01462 } 01463 01468 class ImportStringSource { 01469 function __construct( $string ) { 01470 $this->mString = $string; 01471 $this->mRead = false; 01472 } 01473 01477 function atEnd() { 01478 return $this->mRead; 01479 } 01480 01484 function readChunk() { 01485 if( $this->atEnd() ) { 01486 return false; 01487 } 01488 $this->mRead = true; 01489 return $this->mString; 01490 } 01491 } 01492 01497 class ImportStreamSource { 01498 function __construct( $handle ) { 01499 $this->mHandle = $handle; 01500 } 01501 01505 function atEnd() { 01506 return feof( $this->mHandle ); 01507 } 01508 01512 function readChunk() { 01513 return fread( $this->mHandle, 32768 ); 01514 } 01515 01520 static function newFromFile( $filename ) { 01521 wfSuppressWarnings(); 01522 $file = fopen( $filename, 'rt' ); 01523 wfRestoreWarnings(); 01524 if( !$file ) { 01525 return Status::newFatal( "importcantopen" ); 01526 } 01527 return Status::newGood( new ImportStreamSource( $file ) ); 01528 } 01529 01534 static function newFromUpload( $fieldname = "xmlimport" ) { 01535 $upload =& $_FILES[$fieldname]; 01536 01537 if( !isset( $upload ) || !$upload['name'] ) { 01538 return Status::newFatal( 'importnofile' ); 01539 } 01540 if( !empty( $upload['error'] ) ) { 01541 switch($upload['error']){ 01542 case 1: # The uploaded file exceeds the upload_max_filesize directive in php.ini. 01543 return Status::newFatal( 'importuploaderrorsize' ); 01544 case 2: # The uploaded file exceeds the MAX_FILE_SIZE directive that was specified in the HTML form. 01545 return Status::newFatal( 'importuploaderrorsize' ); 01546 case 3: # The uploaded file was only partially uploaded 01547 return Status::newFatal( 'importuploaderrorpartial' ); 01548 case 6: #Missing a temporary folder. 01549 return Status::newFatal( 'importuploaderrortemp' ); 01550 # case else: # Currently impossible 01551 } 01552 01553 } 01554 $fname = $upload['tmp_name']; 01555 if( is_uploaded_file( $fname ) ) { 01556 return ImportStreamSource::newFromFile( $fname ); 01557 } else { 01558 return Status::newFatal( 'importnofile' ); 01559 } 01560 } 01561 01567 static function newFromURL( $url, $method = 'GET' ) { 01568 wfDebug( __METHOD__ . ": opening $url\n" ); 01569 # Use the standard HTTP fetch function; it times out 01570 # quicker and sorts out user-agent problems which might 01571 # otherwise prevent importing from large sites, such 01572 # as the Wikimedia cluster, etc. 01573 $data = Http::request( $method, $url, array( 'followRedirects' => true ) ); 01574 if( $data !== false ) { 01575 $file = tmpfile(); 01576 fwrite( $file, $data ); 01577 fflush( $file ); 01578 fseek( $file, 0 ); 01579 return Status::newGood( new ImportStreamSource( $file ) ); 01580 } else { 01581 return Status::newFatal( 'importcantopen' ); 01582 } 01583 } 01584 01593 public static function newFromInterwiki( $interwiki, $page, $history = false, $templates = false, $pageLinkDepth = 0 ) { 01594 if( $page == '' ) { 01595 return Status::newFatal( 'import-noarticle' ); 01596 } 01597 $link = Title::newFromText( "$interwiki:Special:Export/$page" ); 01598 if( is_null( $link ) || $link->getInterwiki() == '' ) { 01599 return Status::newFatal( 'importbadinterwiki' ); 01600 } else { 01601 $params = array(); 01602 if ( $history ) $params['history'] = 1; 01603 if ( $templates ) $params['templates'] = 1; 01604 if ( $pageLinkDepth ) $params['pagelink-depth'] = $pageLinkDepth; 01605 $url = $link->getFullUrl( $params ); 01606 # For interwikis, use POST to avoid redirects. 01607 return ImportStreamSource::newFromURL( $url, "POST" ); 01608 } 01609 } 01610 }