MediaWiki
REL1_24
|
00001 <?php 00031 class SpecialExport extends SpecialPage { 00032 private $curonly, $doExport, $pageLinkDepth, $templates; 00033 private $images; 00034 00035 public function __construct() { 00036 parent::__construct( 'Export' ); 00037 } 00038 00039 public function execute( $par ) { 00040 $this->setHeaders(); 00041 $this->outputHeader(); 00042 $config = $this->getConfig(); 00043 00044 // Set some variables 00045 $this->curonly = true; 00046 $this->doExport = false; 00047 $request = $this->getRequest(); 00048 $this->templates = $request->getCheck( 'templates' ); 00049 $this->images = $request->getCheck( 'images' ); // Doesn't do anything yet 00050 $this->pageLinkDepth = $this->validateLinkDepth( 00051 $request->getIntOrNull( 'pagelink-depth' ) 00052 ); 00053 $nsindex = ''; 00054 $exportall = false; 00055 00056 if ( $request->getCheck( 'addcat' ) ) { 00057 $page = $request->getText( 'pages' ); 00058 $catname = $request->getText( 'catname' ); 00059 00060 if ( $catname !== '' && $catname !== null && $catname !== false ) { 00061 $t = Title::makeTitleSafe( NS_MAIN, $catname ); 00062 if ( $t ) { 00068 $catpages = $this->getPagesFromCategory( $t ); 00069 if ( $catpages ) { 00070 $page .= "\n" . implode( "\n", $catpages ); 00071 } 00072 } 00073 } 00074 } elseif ( $request->getCheck( 'addns' ) && $config->get( 'ExportFromNamespaces' ) ) { 00075 $page = $request->getText( 'pages' ); 00076 $nsindex = $request->getText( 'nsindex', '' ); 00077 00078 if ( strval( $nsindex ) !== '' ) { 00082 $nspages = $this->getPagesFromNamespace( $nsindex ); 00083 if ( $nspages ) { 00084 $page .= "\n" . implode( "\n", $nspages ); 00085 } 00086 } 00087 } elseif ( $request->getCheck( 'exportall' ) && $config->get( 'ExportAllowAll' ) ) { 00088 $this->doExport = true; 00089 $exportall = true; 00090 00091 /* Although $page and $history are not used later on, we 00092 nevertheless set them to avoid that PHP notices about using 00093 undefined variables foul up our XML output (see call to 00094 doExport(...) further down) */ 00095 $page = ''; 00096 $history = ''; 00097 } elseif ( $request->wasPosted() && $par == '' ) { 00098 $page = $request->getText( 'pages' ); 00099 $this->curonly = $request->getCheck( 'curonly' ); 00100 $rawOffset = $request->getVal( 'offset' ); 00101 00102 if ( $rawOffset ) { 00103 $offset = wfTimestamp( TS_MW, $rawOffset ); 00104 } else { 00105 $offset = null; 00106 } 00107 00108 $maxHistory = $config->get( 'ExportMaxHistory' ); 00109 $limit = $request->getInt( 'limit' ); 00110 $dir = $request->getVal( 'dir' ); 00111 $history = array( 00112 'dir' => 'asc', 00113 'offset' => false, 00114 'limit' => $maxHistory, 00115 ); 00116 $historyCheck = $request->getCheck( 'history' ); 00117 00118 if ( $this->curonly ) { 00119 $history = WikiExporter::CURRENT; 00120 } elseif ( !$historyCheck ) { 00121 if ( $limit > 0 && ( $maxHistory == 0 || $limit < $maxHistory ) ) { 00122 $history['limit'] = $limit; 00123 } 00124 00125 if ( !is_null( $offset ) ) { 00126 $history['offset'] = $offset; 00127 } 00128 00129 if ( strtolower( $dir ) == 'desc' ) { 00130 $history['dir'] = 'desc'; 00131 } 00132 } 00133 00134 if ( $page != '' ) { 00135 $this->doExport = true; 00136 } 00137 } else { 00138 // Default to current-only for GET requests. 00139 $page = $request->getText( 'pages', $par ); 00140 $historyCheck = $request->getCheck( 'history' ); 00141 00142 if ( $historyCheck ) { 00143 $history = WikiExporter::FULL; 00144 } else { 00145 $history = WikiExporter::CURRENT; 00146 } 00147 00148 if ( $page != '' ) { 00149 $this->doExport = true; 00150 } 00151 } 00152 00153 if ( !$config->get( 'ExportAllowHistory' ) ) { 00154 // Override 00155 $history = WikiExporter::CURRENT; 00156 } 00157 00158 $list_authors = $request->getCheck( 'listauthors' ); 00159 if ( !$this->curonly || !$config->get( 'ExportAllowListContributors' ) ) { 00160 $list_authors = false; 00161 } 00162 00163 if ( $this->doExport ) { 00164 $this->getOutput()->disable(); 00165 00166 // Cancel output buffering and gzipping if set 00167 // This should provide safer streaming for pages with history 00168 wfResetOutputBuffers(); 00169 $request->response()->header( "Content-type: application/xml; charset=utf-8" ); 00170 00171 if ( $request->getCheck( 'wpDownload' ) ) { 00172 // Provide a sane filename suggestion 00173 $filename = urlencode( $config->get( 'Sitename' ) . '-' . wfTimestampNow() . '.xml' ); 00174 $request->response()->header( "Content-disposition: attachment;filename={$filename}" ); 00175 } 00176 00177 $this->doExport( $page, $history, $list_authors, $exportall ); 00178 00179 return; 00180 } 00181 00182 $out = $this->getOutput(); 00183 $out->addWikiMsg( 'exporttext' ); 00184 00185 $form = Xml::openElement( 'form', array( 'method' => 'post', 00186 'action' => $this->getPageTitle()->getLocalURL( 'action=submit' ) ) ); 00187 $form .= Xml::inputLabel( 00188 $this->msg( 'export-addcattext' )->text(), 00189 'catname', 00190 'catname', 00191 40 00192 ) . ' '; 00193 $form .= Xml::submitButton( 00194 $this->msg( 'export-addcat' )->text(), 00195 array( 'name' => 'addcat' ) 00196 ) . '<br />'; 00197 00198 if ( $config->get( 'ExportFromNamespaces' ) ) { 00199 $form .= Html::namespaceSelector( 00200 array( 00201 'selected' => $nsindex, 00202 'label' => $this->msg( 'export-addnstext' )->text() 00203 ), array( 00204 'name' => 'nsindex', 00205 'id' => 'namespace', 00206 'class' => 'namespaceselector', 00207 ) 00208 ) . ' '; 00209 $form .= Xml::submitButton( 00210 $this->msg( 'export-addns' )->text(), 00211 array( 'name' => 'addns' ) 00212 ) . '<br />'; 00213 } 00214 00215 if ( $config->get( 'ExportAllowAll' ) ) { 00216 $form .= Xml::checkLabel( 00217 $this->msg( 'exportall' )->text(), 00218 'exportall', 00219 'exportall', 00220 $request->wasPosted() ? $request->getCheck( 'exportall' ) : false 00221 ) . '<br />'; 00222 } 00223 00224 $form .= Xml::element( 00225 'textarea', 00226 array( 'name' => 'pages', 'cols' => 40, 'rows' => 10 ), 00227 $page, 00228 false 00229 ); 00230 $form .= '<br />'; 00231 00232 if ( $config->get( 'ExportAllowHistory' ) ) { 00233 $form .= Xml::checkLabel( 00234 $this->msg( 'exportcuronly' )->text(), 00235 'curonly', 00236 'curonly', 00237 $request->wasPosted() ? $request->getCheck( 'curonly' ) : true 00238 ) . '<br />'; 00239 } else { 00240 $out->addWikiMsg( 'exportnohistory' ); 00241 } 00242 00243 $form .= Xml::checkLabel( 00244 $this->msg( 'export-templates' )->text(), 00245 'templates', 00246 'wpExportTemplates', 00247 $request->wasPosted() ? $request->getCheck( 'templates' ) : false 00248 ) . '<br />'; 00249 00250 if ( $config->get( 'ExportMaxLinkDepth' ) || $this->userCanOverrideExportDepth() ) { 00251 $form .= Xml::inputLabel( 00252 $this->msg( 'export-pagelinks' )->text(), 00253 'pagelink-depth', 00254 'pagelink-depth', 00255 20, 00256 0 00257 ) . '<br />'; 00258 } 00259 00260 /* Enable this when we can do something useful exporting/importing image information. 00261 $form .= Xml::checkLabel( 00262 $this->msg( 'export-images' )->text(), 00263 'images', 00264 'wpExportImages', 00265 false 00266 ) . '<br />'; 00267 */ 00268 $form .= Xml::checkLabel( 00269 $this->msg( 'export-download' )->text(), 00270 'wpDownload', 00271 'wpDownload', 00272 $request->wasPosted() ? $request->getCheck( 'wpDownload' ) : true 00273 ) . '<br />'; 00274 00275 if ( $config->get( 'ExportAllowListContributors' ) ) { 00276 $form .= Xml::checkLabel( 00277 $this->msg( 'exportlistauthors' )->text(), 00278 'listauthors', 00279 'listauthors', 00280 $request->wasPosted() ? $request->getCheck( 'listauthors' ) : false 00281 ) . '<br />'; 00282 } 00283 00284 $form .= Xml::submitButton( 00285 $this->msg( 'export-submit' )->text(), 00286 Linker::tooltipAndAccesskeyAttribs( 'export' ) 00287 ); 00288 $form .= Xml::closeElement( 'form' ); 00289 00290 $out->addHTML( $form ); 00291 } 00292 00296 private function userCanOverrideExportDepth() { 00297 return $this->getUser()->isAllowed( 'override-export-depth' ); 00298 } 00299 00309 private function doExport( $page, $history, $list_authors, $exportall ) { 00310 00311 // If we are grabbing everything, enable full history and ignore the rest 00312 if ( $exportall ) { 00313 $history = WikiExporter::FULL; 00314 } else { 00315 00316 $pageSet = array(); // Inverted index of all pages to look up 00317 00318 // Split up and normalize input 00319 foreach ( explode( "\n", $page ) as $pageName ) { 00320 $pageName = trim( $pageName ); 00321 $title = Title::newFromText( $pageName ); 00322 if ( $title && !$title->isExternal() && $title->getText() !== '' ) { 00323 // Only record each page once! 00324 $pageSet[$title->getPrefixedText()] = true; 00325 } 00326 } 00327 00328 // Set of original pages to pass on to further manipulation... 00329 $inputPages = array_keys( $pageSet ); 00330 00331 // Look up any linked pages if asked... 00332 if ( $this->templates ) { 00333 $pageSet = $this->getTemplates( $inputPages, $pageSet ); 00334 } 00335 $linkDepth = $this->pageLinkDepth; 00336 if ( $linkDepth ) { 00337 $pageSet = $this->getPageLinks( $inputPages, $pageSet, $linkDepth ); 00338 } 00339 00340 // Enable this when we can do something useful exporting/importing image information. 00341 // if( $this->images ) ) { 00342 // $pageSet = $this->getImages( $inputPages, $pageSet ); 00343 // } 00344 00345 $pages = array_keys( $pageSet ); 00346 00347 // Normalize titles to the same format and remove dupes, see bug 17374 00348 foreach ( $pages as $k => $v ) { 00349 $pages[$k] = str_replace( " ", "_", $v ); 00350 } 00351 00352 $pages = array_unique( $pages ); 00353 } 00354 00355 /* Ok, let's get to it... */ 00356 if ( $history == WikiExporter::CURRENT ) { 00357 $lb = false; 00358 $db = wfGetDB( DB_SLAVE ); 00359 $buffer = WikiExporter::BUFFER; 00360 } else { 00361 // Use an unbuffered query; histories may be very long! 00362 $lb = wfGetLBFactory()->newMainLB(); 00363 $db = $lb->getConnection( DB_SLAVE ); 00364 $buffer = WikiExporter::STREAM; 00365 00366 // This might take a while... :D 00367 wfSuppressWarnings(); 00368 set_time_limit( 0 ); 00369 wfRestoreWarnings(); 00370 } 00371 00372 $exporter = new WikiExporter( $db, $history, $buffer ); 00373 $exporter->list_authors = $list_authors; 00374 $exporter->openStream(); 00375 00376 if ( $exportall ) { 00377 $exporter->allPages(); 00378 } else { 00379 foreach ( $pages as $page ) { 00380 #Bug 8824: Only export pages the user can read 00381 $title = Title::newFromText( $page ); 00382 if ( is_null( $title ) ) { 00383 // @todo Perhaps output an <error> tag or something. 00384 continue; 00385 } 00386 00387 if ( !$title->userCan( 'read', $this->getUser() ) ) { 00388 // @todo Perhaps output an <error> tag or something. 00389 continue; 00390 } 00391 00392 $exporter->pageByTitle( $title ); 00393 } 00394 } 00395 00396 $exporter->closeStream(); 00397 00398 if ( $lb ) { 00399 $lb->closeAll(); 00400 } 00401 } 00402 00407 private function getPagesFromCategory( $title ) { 00408 global $wgContLang; 00409 00410 $name = $title->getDBkey(); 00411 00412 $dbr = wfGetDB( DB_SLAVE ); 00413 $res = $dbr->select( 00414 array( 'page', 'categorylinks' ), 00415 array( 'page_namespace', 'page_title' ), 00416 array( 'cl_from=page_id', 'cl_to' => $name ), 00417 __METHOD__, 00418 array( 'LIMIT' => '5000' ) 00419 ); 00420 00421 $pages = array(); 00422 00423 foreach ( $res as $row ) { 00424 $n = $row->page_title; 00425 if ( $row->page_namespace ) { 00426 $ns = $wgContLang->getNsText( $row->page_namespace ); 00427 $n = $ns . ':' . $n; 00428 } 00429 00430 $pages[] = $n; 00431 } 00432 00433 return $pages; 00434 } 00435 00440 private function getPagesFromNamespace( $nsindex ) { 00441 global $wgContLang; 00442 00443 $dbr = wfGetDB( DB_SLAVE ); 00444 $res = $dbr->select( 00445 'page', 00446 array( 'page_namespace', 'page_title' ), 00447 array( 'page_namespace' => $nsindex ), 00448 __METHOD__, 00449 array( 'LIMIT' => '5000' ) 00450 ); 00451 00452 $pages = array(); 00453 00454 foreach ( $res as $row ) { 00455 $n = $row->page_title; 00456 00457 if ( $row->page_namespace ) { 00458 $ns = $wgContLang->getNsText( $row->page_namespace ); 00459 $n = $ns . ':' . $n; 00460 } 00461 00462 $pages[] = $n; 00463 } 00464 00465 return $pages; 00466 } 00467 00474 private function getTemplates( $inputPages, $pageSet ) { 00475 return $this->getLinks( $inputPages, $pageSet, 00476 'templatelinks', 00477 array( 'namespace' => 'tl_namespace', 'title' => 'tl_title' ), 00478 array( 'page_id=tl_from' ) 00479 ); 00480 } 00481 00487 private function validateLinkDepth( $depth ) { 00488 if ( $depth < 0 ) { 00489 return 0; 00490 } 00491 00492 if ( !$this->userCanOverrideExportDepth() ) { 00493 $maxLinkDepth = $this->getConfig()->get( 'ExportMaxLinkDepth' ); 00494 if ( $depth > $maxLinkDepth ) { 00495 return $maxLinkDepth; 00496 } 00497 } 00498 00499 /* 00500 * There's a HARD CODED limit of 5 levels of recursion here to prevent a 00501 * crazy-big export from being done by someone setting the depth 00502 * number too high. In other words, last resort safety net. 00503 */ 00504 00505 return intval( min( $depth, 5 ) ); 00506 } 00507 00515 private function getPageLinks( $inputPages, $pageSet, $depth ) { 00516 // @codingStandardsIgnoreStart Squiz.WhiteSpace.SemicolonSpacing.Incorrect 00517 for ( ; $depth > 0; --$depth ) { 00518 // @codingStandardsIgnoreEnd 00519 $pageSet = $this->getLinks( 00520 $inputPages, $pageSet, 'pagelinks', 00521 array( 'namespace' => 'pl_namespace', 'title' => 'pl_title' ), 00522 array( 'page_id=pl_from' ) 00523 ); 00524 $inputPages = array_keys( $pageSet ); 00525 } 00526 00527 return $pageSet; 00528 } 00529 00538 private function getImages( $inputPages, $pageSet ) { 00539 return $this->getLinks( 00540 $inputPages, 00541 $pageSet, 00542 'imagelinks', 00543 array( 'namespace' => NS_FILE, 'title' => 'il_to' ), 00544 array( 'page_id=il_from' ) 00545 ); 00546 } 00547 00557 private function getLinks( $inputPages, $pageSet, $table, $fields, $join ) { 00558 $dbr = wfGetDB( DB_SLAVE ); 00559 00560 foreach ( $inputPages as $page ) { 00561 $title = Title::newFromText( $page ); 00562 00563 if ( $title ) { 00564 $pageSet[$title->getPrefixedText()] = true; 00567 $result = $dbr->select( 00568 array( 'page', $table ), 00569 $fields, 00570 array_merge( 00571 $join, 00572 array( 00573 'page_namespace' => $title->getNamespace(), 00574 'page_title' => $title->getDBkey() 00575 ) 00576 ), 00577 __METHOD__ 00578 ); 00579 00580 foreach ( $result as $row ) { 00581 $template = Title::makeTitle( $row->namespace, $row->title ); 00582 $pageSet[$template->getPrefixedText()] = true; 00583 } 00584 } 00585 } 00586 00587 return $pageSet; 00588 } 00589 00590 protected function getGroupName() { 00591 return 'pagetools'; 00592 } 00593 }