MediaWiki
REL1_22
|
00001 <?php 00031 class SpecialExport extends SpecialPage { 00032 private $curonly, $doExport, $pageLinkDepth, $templates; 00033 private $images; 00034 00035 public function __construct() { 00036 parent::__construct( 'Export' ); 00037 } 00038 00039 public function execute( $par ) { 00040 global $wgSitename, $wgExportAllowListContributors, $wgExportFromNamespaces; 00041 global $wgExportAllowHistory, $wgExportMaxHistory, $wgExportMaxLinkDepth; 00042 global $wgExportAllowAll; 00043 00044 $this->setHeaders(); 00045 $this->outputHeader(); 00046 00047 // Set some variables 00048 $this->curonly = true; 00049 $this->doExport = false; 00050 $request = $this->getRequest(); 00051 $this->templates = $request->getCheck( 'templates' ); 00052 $this->images = $request->getCheck( 'images' ); // Doesn't do anything yet 00053 $this->pageLinkDepth = $this->validateLinkDepth( 00054 $request->getIntOrNull( 'pagelink-depth' ) 00055 ); 00056 $nsindex = ''; 00057 $exportall = false; 00058 00059 if ( $request->getCheck( 'addcat' ) ) { 00060 $page = $request->getText( 'pages' ); 00061 $catname = $request->getText( 'catname' ); 00062 00063 if ( $catname !== '' && $catname !== null && $catname !== false ) { 00064 $t = Title::makeTitleSafe( NS_MAIN, $catname ); 00065 if ( $t ) { 00071 $catpages = $this->getPagesFromCategory( $t ); 00072 if ( $catpages ) { 00073 $page .= "\n" . implode( "\n", $catpages ); 00074 } 00075 } 00076 } 00077 } elseif ( $request->getCheck( 'addns' ) && $wgExportFromNamespaces ) { 00078 $page = $request->getText( 'pages' ); 00079 $nsindex = $request->getText( 'nsindex', '' ); 00080 00081 if ( strval( $nsindex ) !== '' ) { 00085 $nspages = $this->getPagesFromNamespace( $nsindex ); 00086 if ( $nspages ) { 00087 $page .= "\n" . implode( "\n", $nspages ); 00088 } 00089 } 00090 } elseif ( $request->getCheck( 'exportall' ) && $wgExportAllowAll ) { 00091 $this->doExport = true; 00092 $exportall = true; 00093 00094 /* Although $page and $history are not used later on, we 00095 nevertheless set them to avoid that PHP notices about using 00096 undefined variables foul up our XML output (see call to 00097 doExport(...) further down) */ 00098 $page = ''; 00099 $history = ''; 00100 } elseif ( $request->wasPosted() && $par == '' ) { 00101 $page = $request->getText( 'pages' ); 00102 $this->curonly = $request->getCheck( 'curonly' ); 00103 $rawOffset = $request->getVal( 'offset' ); 00104 00105 if ( $rawOffset ) { 00106 $offset = wfTimestamp( TS_MW, $rawOffset ); 00107 } else { 00108 $offset = null; 00109 } 00110 00111 $limit = $request->getInt( 'limit' ); 00112 $dir = $request->getVal( 'dir' ); 00113 $history = array( 00114 'dir' => 'asc', 00115 'offset' => false, 00116 'limit' => $wgExportMaxHistory, 00117 ); 00118 $historyCheck = $request->getCheck( 'history' ); 00119 00120 if ( $this->curonly ) { 00121 $history = WikiExporter::CURRENT; 00122 } elseif ( !$historyCheck ) { 00123 if ( $limit > 0 && ( $wgExportMaxHistory == 0 || $limit < $wgExportMaxHistory ) ) { 00124 $history['limit'] = $limit; 00125 } 00126 00127 if ( !is_null( $offset ) ) { 00128 $history['offset'] = $offset; 00129 } 00130 00131 if ( strtolower( $dir ) == 'desc' ) { 00132 $history['dir'] = 'desc'; 00133 } 00134 } 00135 00136 if ( $page != '' ) { 00137 $this->doExport = true; 00138 } 00139 } else { 00140 // Default to current-only for GET requests. 00141 $page = $request->getText( 'pages', $par ); 00142 $historyCheck = $request->getCheck( 'history' ); 00143 00144 if ( $historyCheck ) { 00145 $history = WikiExporter::FULL; 00146 } else { 00147 $history = WikiExporter::CURRENT; 00148 } 00149 00150 if ( $page != '' ) { 00151 $this->doExport = true; 00152 } 00153 } 00154 00155 if ( !$wgExportAllowHistory ) { 00156 // Override 00157 $history = WikiExporter::CURRENT; 00158 } 00159 00160 $list_authors = $request->getCheck( 'listauthors' ); 00161 if ( !$this->curonly || !$wgExportAllowListContributors ) { 00162 $list_authors = false; 00163 } 00164 00165 if ( $this->doExport ) { 00166 $this->getOutput()->disable(); 00167 00168 // Cancel output buffering and gzipping if set 00169 // This should provide safer streaming for pages with history 00170 wfResetOutputBuffers(); 00171 $request->response()->header( "Content-type: application/xml; charset=utf-8" ); 00172 00173 if ( $request->getCheck( 'wpDownload' ) ) { 00174 // Provide a sane filename suggestion 00175 $filename = urlencode( $wgSitename . '-' . wfTimestampNow() . '.xml' ); 00176 $request->response()->header( "Content-disposition: attachment;filename={$filename}" ); 00177 } 00178 00179 $this->doExport( $page, $history, $list_authors, $exportall ); 00180 00181 return; 00182 } 00183 00184 $out = $this->getOutput(); 00185 $out->addWikiMsg( 'exporttext' ); 00186 00187 $form = Xml::openElement( 'form', array( 'method' => 'post', 00188 'action' => $this->getTitle()->getLocalURL( 'action=submit' ) ) ); 00189 $form .= Xml::inputLabel( 00190 $this->msg( 'export-addcattext' )->text(), 00191 'catname', 00192 'catname', 00193 40 00194 ) . ' '; 00195 $form .= Xml::submitButton( 00196 $this->msg( 'export-addcat' )->text(), 00197 array( 'name' => 'addcat' ) 00198 ) . '<br />'; 00199 00200 if ( $wgExportFromNamespaces ) { 00201 $form .= Html::namespaceSelector( 00202 array( 00203 'selected' => $nsindex, 00204 'label' => $this->msg( 'export-addnstext' )->text() 00205 ), array( 00206 'name' => 'nsindex', 00207 'id' => 'namespace', 00208 'class' => 'namespaceselector', 00209 ) 00210 ) . ' '; 00211 $form .= Xml::submitButton( 00212 $this->msg( 'export-addns' )->text(), 00213 array( 'name' => 'addns' ) 00214 ) . '<br />'; 00215 } 00216 00217 if ( $wgExportAllowAll ) { 00218 $form .= Xml::checkLabel( 00219 $this->msg( 'exportall' )->text(), 00220 'exportall', 00221 'exportall', 00222 $request->wasPosted() ? $request->getCheck( 'exportall' ) : false 00223 ) . '<br />'; 00224 } 00225 00226 $form .= Xml::element( 00227 'textarea', 00228 array( 'name' => 'pages', 'cols' => 40, 'rows' => 10 ), 00229 $page, 00230 false 00231 ); 00232 $form .= '<br />'; 00233 00234 if ( $wgExportAllowHistory ) { 00235 $form .= Xml::checkLabel( 00236 $this->msg( 'exportcuronly' )->text(), 00237 'curonly', 00238 'curonly', 00239 $request->wasPosted() ? $request->getCheck( 'curonly' ) : true 00240 ) . '<br />'; 00241 } else { 00242 $out->addWikiMsg( 'exportnohistory' ); 00243 } 00244 00245 $form .= Xml::checkLabel( 00246 $this->msg( 'export-templates' )->text(), 00247 'templates', 00248 'wpExportTemplates', 00249 $request->wasPosted() ? $request->getCheck( 'templates' ) : false 00250 ) . '<br />'; 00251 00252 if ( $wgExportMaxLinkDepth || $this->userCanOverrideExportDepth() ) { 00253 $form .= Xml::inputLabel( 00254 $this->msg( 'export-pagelinks' )->text(), 00255 'pagelink-depth', 00256 'pagelink-depth', 00257 20, 00258 0 00259 ) . '<br />'; 00260 } 00261 00262 // Enable this when we can do something useful exporting/importing image information. :) 00263 //$form .= Xml::checkLabel( $this->msg( 'export-images' )->text(), 'images', 'wpExportImages', false ) . '<br />'; 00264 $form .= Xml::checkLabel( 00265 $this->msg( 'export-download' )->text(), 00266 'wpDownload', 00267 'wpDownload', 00268 $request->wasPosted() ? $request->getCheck( 'wpDownload' ) : true 00269 ) . '<br />'; 00270 00271 if ( $wgExportAllowListContributors ) { 00272 $form .= Xml::checkLabel( 00273 $this->msg( 'exportlistauthors' )->text(), 00274 'listauthors', 00275 'listauthors', 00276 $request->wasPosted() ? $request->getCheck( 'listauthors' ) : false 00277 ) . '<br />'; 00278 } 00279 00280 $form .= Xml::submitButton( 00281 $this->msg( 'export-submit' )->text(), 00282 Linker::tooltipAndAccesskeyAttribs( 'export' ) 00283 ); 00284 $form .= Xml::closeElement( 'form' ); 00285 00286 $out->addHTML( $form ); 00287 } 00288 00292 private function userCanOverrideExportDepth() { 00293 return $this->getUser()->isAllowed( 'override-export-depth' ); 00294 } 00295 00305 private function doExport( $page, $history, $list_authors, $exportall ) { 00306 00307 // If we are grabbing everything, enable full history and ignore the rest 00308 if ( $exportall ) { 00309 $history = WikiExporter::FULL; 00310 } else { 00311 00312 $pageSet = array(); // Inverted index of all pages to look up 00313 00314 // Split up and normalize input 00315 foreach ( explode( "\n", $page ) as $pageName ) { 00316 $pageName = trim( $pageName ); 00317 $title = Title::newFromText( $pageName ); 00318 if ( $title && $title->getInterwiki() == '' && $title->getText() !== '' ) { 00319 // Only record each page once! 00320 $pageSet[$title->getPrefixedText()] = true; 00321 } 00322 } 00323 00324 // Set of original pages to pass on to further manipulation... 00325 $inputPages = array_keys( $pageSet ); 00326 00327 // Look up any linked pages if asked... 00328 if ( $this->templates ) { 00329 $pageSet = $this->getTemplates( $inputPages, $pageSet ); 00330 } 00331 $linkDepth = $this->pageLinkDepth; 00332 if ( $linkDepth ) { 00333 $pageSet = $this->getPageLinks( $inputPages, $pageSet, $linkDepth ); 00334 } 00335 00336 // Enable this when we can do something useful exporting/importing image information. 00337 // if( $this->images ) ) { 00338 // $pageSet = $this->getImages( $inputPages, $pageSet ); 00339 // } 00340 00341 $pages = array_keys( $pageSet ); 00342 00343 // Normalize titles to the same format and remove dupes, see bug 17374 00344 foreach ( $pages as $k => $v ) { 00345 $pages[$k] = str_replace( " ", "_", $v ); 00346 } 00347 00348 $pages = array_unique( $pages ); 00349 } 00350 00351 /* Ok, let's get to it... */ 00352 if ( $history == WikiExporter::CURRENT ) { 00353 $lb = false; 00354 $db = wfGetDB( DB_SLAVE ); 00355 $buffer = WikiExporter::BUFFER; 00356 } else { 00357 // Use an unbuffered query; histories may be very long! 00358 $lb = wfGetLBFactory()->newMainLB(); 00359 $db = $lb->getConnection( DB_SLAVE ); 00360 $buffer = WikiExporter::STREAM; 00361 00362 // This might take a while... :D 00363 wfSuppressWarnings(); 00364 set_time_limit( 0 ); 00365 wfRestoreWarnings(); 00366 } 00367 00368 $exporter = new WikiExporter( $db, $history, $buffer ); 00369 $exporter->list_authors = $list_authors; 00370 $exporter->openStream(); 00371 00372 if ( $exportall ) { 00373 $exporter->allPages(); 00374 } else { 00375 foreach ( $pages as $page ) { 00376 #Bug 8824: Only export pages the user can read 00377 $title = Title::newFromText( $page ); 00378 if ( is_null( $title ) ) { 00379 // @todo Perhaps output an <error> tag or something. 00380 continue; 00381 } 00382 00383 if ( !$title->userCan( 'read', $this->getUser() ) ) { 00384 // @todo Perhaps output an <error> tag or something. 00385 continue; 00386 } 00387 00388 $exporter->pageByTitle( $title ); 00389 } 00390 } 00391 00392 $exporter->closeStream(); 00393 00394 if ( $lb ) { 00395 $lb->closeAll(); 00396 } 00397 } 00398 00403 private function getPagesFromCategory( $title ) { 00404 global $wgContLang; 00405 00406 $name = $title->getDBkey(); 00407 00408 $dbr = wfGetDB( DB_SLAVE ); 00409 $res = $dbr->select( 00410 array( 'page', 'categorylinks' ), 00411 array( 'page_namespace', 'page_title' ), 00412 array( 'cl_from=page_id', 'cl_to' => $name ), 00413 __METHOD__, 00414 array( 'LIMIT' => '5000' ) 00415 ); 00416 00417 $pages = array(); 00418 00419 foreach ( $res as $row ) { 00420 $n = $row->page_title; 00421 if ( $row->page_namespace ) { 00422 $ns = $wgContLang->getNsText( $row->page_namespace ); 00423 $n = $ns . ':' . $n; 00424 } 00425 00426 $pages[] = $n; 00427 } 00428 00429 return $pages; 00430 } 00431 00436 private function getPagesFromNamespace( $nsindex ) { 00437 global $wgContLang; 00438 00439 $dbr = wfGetDB( DB_SLAVE ); 00440 $res = $dbr->select( 00441 'page', 00442 array( 'page_namespace', 'page_title' ), 00443 array( 'page_namespace' => $nsindex ), 00444 __METHOD__, 00445 array( 'LIMIT' => '5000' ) 00446 ); 00447 00448 $pages = array(); 00449 00450 foreach ( $res as $row ) { 00451 $n = $row->page_title; 00452 00453 if ( $row->page_namespace ) { 00454 $ns = $wgContLang->getNsText( $row->page_namespace ); 00455 $n = $ns . ':' . $n; 00456 } 00457 00458 $pages[] = $n; 00459 } 00460 00461 return $pages; 00462 } 00463 00470 private function getTemplates( $inputPages, $pageSet ) { 00471 return $this->getLinks( $inputPages, $pageSet, 00472 'templatelinks', 00473 array( 'namespace' => 'tl_namespace', 'title' => 'tl_title' ), 00474 array( 'page_id=tl_from' ) 00475 ); 00476 } 00477 00483 private function validateLinkDepth( $depth ) { 00484 global $wgExportMaxLinkDepth; 00485 00486 if ( $depth < 0 ) { 00487 return 0; 00488 } 00489 00490 if ( !$this->userCanOverrideExportDepth() ) { 00491 if ( $depth > $wgExportMaxLinkDepth ) { 00492 return $wgExportMaxLinkDepth; 00493 } 00494 } 00495 00496 /* 00497 * There's a HARD CODED limit of 5 levels of recursion here to prevent a 00498 * crazy-big export from being done by someone setting the depth 00499 * number too high. In other words, last resort safety net. 00500 */ 00501 00502 return intval( min( $depth, 5 ) ); 00503 } 00504 00512 private function getPageLinks( $inputPages, $pageSet, $depth ) { 00513 for ( ; $depth > 0; --$depth ) { 00514 $pageSet = $this->getLinks( 00515 $inputPages, $pageSet, 'pagelinks', 00516 array( 'namespace' => 'pl_namespace', 'title' => 'pl_title' ), 00517 array( 'page_id=pl_from' ) 00518 ); 00519 $inputPages = array_keys( $pageSet ); 00520 } 00521 00522 return $pageSet; 00523 } 00524 00533 private function getImages( $inputPages, $pageSet ) { 00534 return $this->getLinks( 00535 $inputPages, 00536 $pageSet, 00537 'imagelinks', 00538 array( 'namespace' => NS_FILE, 'title' => 'il_to' ), 00539 array( 'page_id=il_from' ) 00540 ); 00541 } 00542 00552 private function getLinks( $inputPages, $pageSet, $table, $fields, $join ) { 00553 $dbr = wfGetDB( DB_SLAVE ); 00554 00555 foreach ( $inputPages as $page ) { 00556 $title = Title::newFromText( $page ); 00557 00558 if ( $title ) { 00559 $pageSet[$title->getPrefixedText()] = true; 00562 $result = $dbr->select( 00563 array( 'page', $table ), 00564 $fields, 00565 array_merge( 00566 $join, 00567 array( 00568 'page_namespace' => $title->getNamespace(), 00569 'page_title' => $title->getDBkey() 00570 ) 00571 ), 00572 __METHOD__ 00573 ); 00574 00575 foreach ( $result as $row ) { 00576 $template = Title::makeTitle( $row->namespace, $row->title ); 00577 $pageSet[$template->getPrefixedText()] = true; 00578 } 00579 } 00580 } 00581 00582 return $pageSet; 00583 } 00584 00585 protected function getGroupName() { 00586 return 'pagetools'; 00587 } 00588 }