MediaWiki
REL1_20
|
00001 <?php 00031 class SpecialExport extends SpecialPage { 00032 00033 private $curonly, $doExport, $pageLinkDepth, $templates; 00034 private $images; 00035 00036 public function __construct() { 00037 parent::__construct( 'Export' ); 00038 } 00039 00040 public function execute( $par ) { 00041 global $wgSitename, $wgExportAllowListContributors, $wgExportFromNamespaces; 00042 global $wgExportAllowHistory, $wgExportMaxHistory, $wgExportMaxLinkDepth; 00043 global $wgExportAllowAll; 00044 00045 $this->setHeaders(); 00046 $this->outputHeader(); 00047 00048 // Set some variables 00049 $this->curonly = true; 00050 $this->doExport = false; 00051 $request = $this->getRequest(); 00052 $this->templates = $request->getCheck( 'templates' ); 00053 $this->images = $request->getCheck( 'images' ); // Doesn't do anything yet 00054 $this->pageLinkDepth = $this->validateLinkDepth( 00055 $request->getIntOrNull( 'pagelink-depth' ) 00056 ); 00057 $nsindex = ''; 00058 $exportall = false; 00059 00060 if ( $request->getCheck( 'addcat' ) ) { 00061 $page = $request->getText( 'pages' ); 00062 $catname = $request->getText( 'catname' ); 00063 00064 if ( $catname !== '' && $catname !== null && $catname !== false ) { 00065 $t = Title::makeTitleSafe( NS_MAIN, $catname ); 00066 if ( $t ) { 00072 $catpages = $this->getPagesFromCategory( $t ); 00073 if ( $catpages ) { 00074 $page .= "\n" . implode( "\n", $catpages ); 00075 } 00076 } 00077 } 00078 } 00079 elseif( $request->getCheck( 'addns' ) && $wgExportFromNamespaces ) { 00080 $page = $request->getText( 'pages' ); 00081 $nsindex = $request->getText( 'nsindex', '' ); 00082 00083 if ( strval( $nsindex ) !== '' ) { 00087 $nspages = $this->getPagesFromNamespace( $nsindex ); 00088 if ( $nspages ) { 00089 $page .= "\n" . implode( "\n", $nspages ); 00090 } 00091 } 00092 } 00093 elseif( $request->getCheck( 'exportall' ) && $wgExportAllowAll ) { 00094 $this->doExport = true; 00095 $exportall = true; 00096 00097 /* Although $page and $history are not used later on, we 00098 nevertheless set them to avoid that PHP notices about using 00099 undefined variables foul up our XML output (see call to 00100 doExport(...) further down) */ 00101 $page = ''; 00102 $history = ''; 00103 } 00104 elseif( $request->wasPosted() && $par == '' ) { 00105 $page = $request->getText( 'pages' ); 00106 $this->curonly = $request->getCheck( 'curonly' ); 00107 $rawOffset = $request->getVal( 'offset' ); 00108 00109 if( $rawOffset ) { 00110 $offset = wfTimestamp( TS_MW, $rawOffset ); 00111 } else { 00112 $offset = null; 00113 } 00114 00115 $limit = $request->getInt( 'limit' ); 00116 $dir = $request->getVal( 'dir' ); 00117 $history = array( 00118 'dir' => 'asc', 00119 'offset' => false, 00120 'limit' => $wgExportMaxHistory, 00121 ); 00122 $historyCheck = $request->getCheck( 'history' ); 00123 00124 if ( $this->curonly ) { 00125 $history = WikiExporter::CURRENT; 00126 } elseif ( !$historyCheck ) { 00127 if ( $limit > 0 && ($wgExportMaxHistory == 0 || $limit < $wgExportMaxHistory ) ) { 00128 $history['limit'] = $limit; 00129 } 00130 if ( !is_null( $offset ) ) { 00131 $history['offset'] = $offset; 00132 } 00133 if ( strtolower( $dir ) == 'desc' ) { 00134 $history['dir'] = 'desc'; 00135 } 00136 } 00137 00138 if( $page != '' ) { 00139 $this->doExport = true; 00140 } 00141 } else { 00142 // Default to current-only for GET requests. 00143 $page = $request->getText( 'pages', $par ); 00144 $historyCheck = $request->getCheck( 'history' ); 00145 00146 if( $historyCheck ) { 00147 $history = WikiExporter::FULL; 00148 } else { 00149 $history = WikiExporter::CURRENT; 00150 } 00151 00152 if( $page != '' ) { 00153 $this->doExport = true; 00154 } 00155 } 00156 00157 if( !$wgExportAllowHistory ) { 00158 // Override 00159 $history = WikiExporter::CURRENT; 00160 } 00161 00162 $list_authors = $request->getCheck( 'listauthors' ); 00163 if ( !$this->curonly || !$wgExportAllowListContributors ) { 00164 $list_authors = false ; 00165 } 00166 00167 if ( $this->doExport ) { 00168 $this->getOutput()->disable(); 00169 00170 // Cancel output buffering and gzipping if set 00171 // This should provide safer streaming for pages with history 00172 wfResetOutputBuffers(); 00173 $request->response()->header( "Content-type: application/xml; charset=utf-8" ); 00174 00175 if( $request->getCheck( 'wpDownload' ) ) { 00176 // Provide a sane filename suggestion 00177 $filename = urlencode( $wgSitename . '-' . wfTimestampNow() . '.xml' ); 00178 $request->response()->header( "Content-disposition: attachment;filename={$filename}" ); 00179 } 00180 00181 $this->doExport( $page, $history, $list_authors, $exportall ); 00182 00183 return; 00184 } 00185 00186 $out = $this->getOutput(); 00187 $out->addWikiMsg( 'exporttext' ); 00188 00189 $form = Xml::openElement( 'form', array( 'method' => 'post', 00190 'action' => $this->getTitle()->getLocalUrl( 'action=submit' ) ) ); 00191 $form .= Xml::inputLabel( $this->msg( 'export-addcattext' )->text(), 'catname', 'catname', 40 ) . ' '; 00192 $form .= Xml::submitButton( $this->msg( 'export-addcat' )->text(), array( 'name' => 'addcat' ) ) . '<br />'; 00193 00194 if ( $wgExportFromNamespaces ) { 00195 $form .= Html::namespaceSelector( 00196 array( 00197 'selected' => $nsindex, 00198 'label' => $this->msg( 'export-addnstext' )->text() 00199 ), array( 00200 'name' => 'nsindex', 00201 'id' => 'namespace', 00202 'class' => 'namespaceselector', 00203 ) 00204 ) . ' '; 00205 $form .= Xml::submitButton( $this->msg( 'export-addns' )->text(), array( 'name' => 'addns' ) ) . '<br />'; 00206 } 00207 00208 if ( $wgExportAllowAll ) { 00209 $form .= Xml::checkLabel( 00210 $this->msg( 'exportall' )->text(), 00211 'exportall', 00212 'exportall', 00213 $request->wasPosted() ? $request->getCheck( 'exportall' ) : false 00214 ) . '<br />'; 00215 } 00216 00217 $form .= Xml::element( 'textarea', array( 'name' => 'pages', 'cols' => 40, 'rows' => 10 ), $page, false ); 00218 $form .= '<br />'; 00219 00220 if( $wgExportAllowHistory ) { 00221 $form .= Xml::checkLabel( 00222 $this->msg( 'exportcuronly' )->text(), 00223 'curonly', 00224 'curonly', 00225 $request->wasPosted() ? $request->getCheck( 'curonly' ) : true 00226 ) . '<br />'; 00227 } else { 00228 $out->addWikiMsg( 'exportnohistory' ); 00229 } 00230 00231 $form .= Xml::checkLabel( 00232 $this->msg( 'export-templates' )->text(), 00233 'templates', 00234 'wpExportTemplates', 00235 $request->wasPosted() ? $request->getCheck( 'templates' ) : false 00236 ) . '<br />'; 00237 00238 if( $wgExportMaxLinkDepth || $this->userCanOverrideExportDepth() ) { 00239 $form .= Xml::inputLabel( $this->msg( 'export-pagelinks' )->text(), 'pagelink-depth', 'pagelink-depth', 20, 0 ) . '<br />'; 00240 } 00241 // Enable this when we can do something useful exporting/importing image information. :) 00242 //$form .= Xml::checkLabel( $this->msg( 'export-images' )->text(), 'images', 'wpExportImages', false ) . '<br />'; 00243 $form .= Xml::checkLabel( 00244 $this->msg( 'export-download' )->text(), 00245 'wpDownload', 00246 'wpDownload', 00247 $request->wasPosted() ? $request->getCheck( 'wpDownload' ) : true 00248 ) . '<br />'; 00249 00250 if ( $wgExportAllowListContributors ) { 00251 $form .= Xml::checkLabel( 00252 $this->msg( 'exportlistauthors' )->text(), 00253 'listauthors', 00254 'listauthors', 00255 $request->wasPosted() ? $request->getCheck( 'listauthors' ) : false 00256 ) . '<br />'; 00257 } 00258 00259 $form .= Xml::submitButton( $this->msg( 'export-submit' )->text(), Linker::tooltipAndAccesskeyAttribs( 'export' ) ); 00260 $form .= Xml::closeElement( 'form' ); 00261 00262 $out->addHTML( $form ); 00263 } 00264 00268 private function userCanOverrideExportDepth() { 00269 return $this->getUser()->isAllowed( 'override-export-depth' ); 00270 } 00271 00281 private function doExport( $page, $history, $list_authors, $exportall ) { 00282 00283 // If we are grabbing everything, enable full history and ignore the rest 00284 if ( $exportall ) { 00285 $history = WikiExporter::FULL; 00286 } else { 00287 00288 $pageSet = array(); // Inverted index of all pages to look up 00289 00290 // Split up and normalize input 00291 foreach( explode( "\n", $page ) as $pageName ) { 00292 $pageName = trim( $pageName ); 00293 $title = Title::newFromText( $pageName ); 00294 if( $title && $title->getInterwiki() == '' && $title->getText() !== '' ) { 00295 // Only record each page once! 00296 $pageSet[$title->getPrefixedText()] = true; 00297 } 00298 } 00299 00300 // Set of original pages to pass on to further manipulation... 00301 $inputPages = array_keys( $pageSet ); 00302 00303 // Look up any linked pages if asked... 00304 if( $this->templates ) { 00305 $pageSet = $this->getTemplates( $inputPages, $pageSet ); 00306 } 00307 $linkDepth = $this->pageLinkDepth; 00308 if( $linkDepth ) { 00309 $pageSet = $this->getPageLinks( $inputPages, $pageSet, $linkDepth ); 00310 } 00311 00312 /* 00313 // Enable this when we can do something useful exporting/importing image information. :) 00314 if( $this->images ) ) { 00315 $pageSet = $this->getImages( $inputPages, $pageSet ); 00316 } 00317 */ 00318 00319 $pages = array_keys( $pageSet ); 00320 00321 // Normalize titles to the same format and remove dupes, see bug 17374 00322 foreach( $pages as $k => $v ) { 00323 $pages[$k] = str_replace( " ", "_", $v ); 00324 } 00325 00326 $pages = array_unique( $pages ); 00327 } 00328 00329 /* Ok, let's get to it... */ 00330 if( $history == WikiExporter::CURRENT ) { 00331 $lb = false; 00332 $db = wfGetDB( DB_SLAVE ); 00333 $buffer = WikiExporter::BUFFER; 00334 } else { 00335 // Use an unbuffered query; histories may be very long! 00336 $lb = wfGetLBFactory()->newMainLB(); 00337 $db = $lb->getConnection( DB_SLAVE ); 00338 $buffer = WikiExporter::STREAM; 00339 00340 // This might take a while... :D 00341 wfSuppressWarnings(); 00342 set_time_limit(0); 00343 wfRestoreWarnings(); 00344 } 00345 00346 $exporter = new WikiExporter( $db, $history, $buffer ); 00347 $exporter->list_authors = $list_authors; 00348 $exporter->openStream(); 00349 00350 if ( $exportall ) { 00351 $exporter->allPages(); 00352 } else { 00353 foreach( $pages as $page ) { 00354 /* 00355 if( $wgExportMaxHistory && !$this->curonly ) { 00356 $title = Title::newFromText( $page ); 00357 if( $title ) { 00358 $count = Revision::countByTitle( $db, $title ); 00359 if( $count > $wgExportMaxHistory ) { 00360 wfDebug( __FUNCTION__ . 00361 ": Skipped $page, $count revisions too big\n" ); 00362 continue; 00363 } 00364 } 00365 }*/ 00366 #Bug 8824: Only export pages the user can read 00367 $title = Title::newFromText( $page ); 00368 if( is_null( $title ) ) { 00369 continue; #TODO: perhaps output an <error> tag or something. 00370 } 00371 if( !$title->userCan( 'read', $this->getUser() ) ) { 00372 continue; #TODO: perhaps output an <error> tag or something. 00373 } 00374 00375 $exporter->pageByTitle( $title ); 00376 } 00377 } 00378 00379 $exporter->closeStream(); 00380 00381 if( $lb ) { 00382 $lb->closeAll(); 00383 } 00384 } 00385 00390 private function getPagesFromCategory( $title ) { 00391 global $wgContLang; 00392 00393 $name = $title->getDBkey(); 00394 00395 $dbr = wfGetDB( DB_SLAVE ); 00396 $res = $dbr->select( 00397 array( 'page', 'categorylinks' ), 00398 array( 'page_namespace', 'page_title' ), 00399 array( 'cl_from=page_id', 'cl_to' => $name ), 00400 __METHOD__, 00401 array( 'LIMIT' => '5000' ) 00402 ); 00403 00404 $pages = array(); 00405 00406 foreach ( $res as $row ) { 00407 $n = $row->page_title; 00408 if ($row->page_namespace) { 00409 $ns = $wgContLang->getNsText( $row->page_namespace ); 00410 $n = $ns . ':' . $n; 00411 } 00412 00413 $pages[] = $n; 00414 } 00415 return $pages; 00416 } 00417 00422 private function getPagesFromNamespace( $nsindex ) { 00423 global $wgContLang; 00424 00425 $dbr = wfGetDB( DB_SLAVE ); 00426 $res = $dbr->select( 00427 'page', 00428 array( 'page_namespace', 'page_title' ), 00429 array( 'page_namespace' => $nsindex ), 00430 __METHOD__, 00431 array( 'LIMIT' => '5000' ) 00432 ); 00433 00434 $pages = array(); 00435 00436 foreach ( $res as $row ) { 00437 $n = $row->page_title; 00438 00439 if ( $row->page_namespace ) { 00440 $ns = $wgContLang->getNsText( $row->page_namespace ); 00441 $n = $ns . ':' . $n; 00442 } 00443 00444 $pages[] = $n; 00445 } 00446 return $pages; 00447 } 00448 00455 private function getTemplates( $inputPages, $pageSet ) { 00456 return $this->getLinks( $inputPages, $pageSet, 00457 'templatelinks', 00458 array( 'namespace' => 'tl_namespace', 'title' => 'tl_title' ), 00459 array( 'page_id=tl_from' ) 00460 ); 00461 } 00462 00468 private function validateLinkDepth( $depth ) { 00469 global $wgExportMaxLinkDepth; 00470 00471 if( $depth < 0 ) { 00472 return 0; 00473 } 00474 00475 if ( !$this->userCanOverrideExportDepth() ) { 00476 if( $depth > $wgExportMaxLinkDepth ) { 00477 return $wgExportMaxLinkDepth; 00478 } 00479 } 00480 00481 /* 00482 * There's a HARD CODED limit of 5 levels of recursion here to prevent a 00483 * crazy-big export from being done by someone setting the depth 00484 * number too high. In other words, last resort safety net. 00485 */ 00486 return intval( min( $depth, 5 ) ); 00487 } 00488 00496 private function getPageLinks( $inputPages, $pageSet, $depth ) { 00497 for( ; $depth > 0; --$depth ) { 00498 $pageSet = $this->getLinks( 00499 $inputPages, $pageSet, 'pagelinks', 00500 array( 'namespace' => 'pl_namespace', 'title' => 'pl_title' ), 00501 array( 'page_id=pl_from' ) 00502 ); 00503 $inputPages = array_keys( $pageSet ); 00504 } 00505 00506 return $pageSet; 00507 } 00508 00517 private function getImages( $inputPages, $pageSet ) { 00518 return $this->getLinks( 00519 $inputPages, 00520 $pageSet, 00521 'imagelinks', 00522 array( 'namespace' => NS_FILE, 'title' => 'il_to' ), 00523 array( 'page_id=il_from' ) 00524 ); 00525 } 00526 00531 private function getLinks( $inputPages, $pageSet, $table, $fields, $join ) { 00532 $dbr = wfGetDB( DB_SLAVE ); 00533 00534 foreach( $inputPages as $page ) { 00535 $title = Title::newFromText( $page ); 00536 00537 if( $title ) { 00538 $pageSet[$title->getPrefixedText()] = true; 00541 $result = $dbr->select( 00542 array( 'page', $table ), 00543 $fields, 00544 array_merge( 00545 $join, 00546 array( 00547 'page_namespace' => $title->getNamespace(), 00548 'page_title' => $title->getDBkey() 00549 ) 00550 ), 00551 __METHOD__ 00552 ); 00553 00554 foreach( $result as $row ) { 00555 $template = Title::makeTitle( $row->namespace, $row->title ); 00556 $pageSet[$template->getPrefixedText()] = true; 00557 } 00558 } 00559 } 00560 00561 return $pageSet; 00562 } 00563 00564 }