MediaWiki
REL1_19
|
00001 <?php 00031 class SpecialExport extends SpecialPage { 00032 00033 private $curonly, $doExport, $pageLinkDepth, $templates; 00034 private $images; 00035 00036 public function __construct() { 00037 parent::__construct( 'Export' ); 00038 } 00039 00040 public function execute( $par ) { 00041 global $wgSitename, $wgExportAllowListContributors, $wgExportFromNamespaces; 00042 global $wgExportAllowHistory, $wgExportMaxHistory, $wgExportMaxLinkDepth; 00043 global $wgExportAllowAll; 00044 00045 $this->setHeaders(); 00046 $this->outputHeader(); 00047 00048 // Set some variables 00049 $this->curonly = true; 00050 $this->doExport = false; 00051 $request = $this->getRequest(); 00052 $this->templates = $request->getCheck( 'templates' ); 00053 $this->images = $request->getCheck( 'images' ); // Doesn't do anything yet 00054 $this->pageLinkDepth = $this->validateLinkDepth( 00055 $request->getIntOrNull( 'pagelink-depth' ) 00056 ); 00057 $nsindex = ''; 00058 $exportall = false; 00059 00060 if ( $request->getCheck( 'addcat' ) ) { 00061 $page = $request->getText( 'pages' ); 00062 $catname = $request->getText( 'catname' ); 00063 00064 if ( $catname !== '' && $catname !== null && $catname !== false ) { 00065 $t = Title::makeTitleSafe( NS_MAIN, $catname ); 00066 if ( $t ) { 00072 $catpages = $this->getPagesFromCategory( $t ); 00073 if ( $catpages ) { 00074 $page .= "\n" . implode( "\n", $catpages ); 00075 } 00076 } 00077 } 00078 } 00079 elseif( $request->getCheck( 'addns' ) && $wgExportFromNamespaces ) { 00080 $page = $request->getText( 'pages' ); 00081 $nsindex = $request->getText( 'nsindex', '' ); 00082 00083 if ( strval( $nsindex ) !== '' ) { 00087 $nspages = $this->getPagesFromNamespace( $nsindex ); 00088 if ( $nspages ) { 00089 $page .= "\n" . implode( "\n", $nspages ); 00090 } 00091 } 00092 } 00093 elseif( $request->getCheck( 'exportall' ) && $wgExportAllowAll ) { 00094 $this->doExport = true; 00095 $exportall = true; 00096 } 00097 elseif( $request->wasPosted() && $par == '' ) { 00098 $page = $request->getText( 'pages' ); 00099 $this->curonly = $request->getCheck( 'curonly' ); 00100 $rawOffset = $request->getVal( 'offset' ); 00101 00102 if( $rawOffset ) { 00103 $offset = wfTimestamp( TS_MW, $rawOffset ); 00104 } else { 00105 $offset = null; 00106 } 00107 00108 $limit = $request->getInt( 'limit' ); 00109 $dir = $request->getVal( 'dir' ); 00110 $history = array( 00111 'dir' => 'asc', 00112 'offset' => false, 00113 'limit' => $wgExportMaxHistory, 00114 ); 00115 $historyCheck = $request->getCheck( 'history' ); 00116 00117 if ( $this->curonly ) { 00118 $history = WikiExporter::CURRENT; 00119 } elseif ( !$historyCheck ) { 00120 if ( $limit > 0 && ($wgExportMaxHistory == 0 || $limit < $wgExportMaxHistory ) ) { 00121 $history['limit'] = $limit; 00122 } 00123 if ( !is_null( $offset ) ) { 00124 $history['offset'] = $offset; 00125 } 00126 if ( strtolower( $dir ) == 'desc' ) { 00127 $history['dir'] = 'desc'; 00128 } 00129 } 00130 00131 if( $page != '' ) { 00132 $this->doExport = true; 00133 } 00134 } else { 00135 // Default to current-only for GET requests. 00136 $page = $request->getText( 'pages', $par ); 00137 $historyCheck = $request->getCheck( 'history' ); 00138 00139 if( $historyCheck ) { 00140 $history = WikiExporter::FULL; 00141 } else { 00142 $history = WikiExporter::CURRENT; 00143 } 00144 00145 if( $page != '' ) { 00146 $this->doExport = true; 00147 } 00148 } 00149 00150 if( !$wgExportAllowHistory ) { 00151 // Override 00152 $history = WikiExporter::CURRENT; 00153 } 00154 00155 $list_authors = $request->getCheck( 'listauthors' ); 00156 if ( !$this->curonly || !$wgExportAllowListContributors ) { 00157 $list_authors = false ; 00158 } 00159 00160 if ( $this->doExport ) { 00161 $this->getOutput()->disable(); 00162 00163 // Cancel output buffering and gzipping if set 00164 // This should provide safer streaming for pages with history 00165 wfResetOutputBuffers(); 00166 $request->response()->header( "Content-type: application/xml; charset=utf-8" ); 00167 00168 if( $request->getCheck( 'wpDownload' ) ) { 00169 // Provide a sane filename suggestion 00170 $filename = urlencode( $wgSitename . '-' . wfTimestampNow() . '.xml' ); 00171 $request->response()->header( "Content-disposition: attachment;filename={$filename}" ); 00172 } 00173 00174 $this->doExport( $page, $history, $list_authors, $exportall ); 00175 00176 return; 00177 } 00178 00179 $out = $this->getOutput(); 00180 $out->addWikiMsg( 'exporttext' ); 00181 00182 $form = Xml::openElement( 'form', array( 'method' => 'post', 00183 'action' => $this->getTitle()->getLocalUrl( 'action=submit' ) ) ); 00184 $form .= Xml::inputLabel( wfMsg( 'export-addcattext' ) , 'catname', 'catname', 40 ) . ' '; 00185 $form .= Xml::submitButton( wfMsg( 'export-addcat' ), array( 'name' => 'addcat' ) ) . '<br />'; 00186 00187 if ( $wgExportFromNamespaces ) { 00188 $form .= Xml::namespaceSelector( $nsindex, null, 'nsindex', wfMsg( 'export-addnstext' ) ) . ' '; 00189 $form .= Xml::submitButton( wfMsg( 'export-addns' ), array( 'name' => 'addns' ) ) . '<br />'; 00190 } 00191 00192 if ( $wgExportAllowAll ) { 00193 $form .= Xml::checkLabel( 00194 wfMsg( 'exportall' ), 00195 'exportall', 00196 'exportall', 00197 $request->wasPosted() ? $request->getCheck( 'exportall' ) : false 00198 ) . '<br />'; 00199 } 00200 00201 $form .= Xml::element( 'textarea', array( 'name' => 'pages', 'cols' => 40, 'rows' => 10 ), $page, false ); 00202 $form .= '<br />'; 00203 00204 if( $wgExportAllowHistory ) { 00205 $form .= Xml::checkLabel( 00206 wfMsg( 'exportcuronly' ), 00207 'curonly', 00208 'curonly', 00209 $request->wasPosted() ? $request->getCheck( 'curonly' ) : true 00210 ) . '<br />'; 00211 } else { 00212 $out->addHTML( wfMsgExt( 'exportnohistory', 'parse' ) ); 00213 } 00214 00215 $form .= Xml::checkLabel( 00216 wfMsg( 'export-templates' ), 00217 'templates', 00218 'wpExportTemplates', 00219 $request->wasPosted() ? $request->getCheck( 'templates' ) : false 00220 ) . '<br />'; 00221 00222 if( $wgExportMaxLinkDepth || $this->userCanOverrideExportDepth() ) { 00223 $form .= Xml::inputLabel( wfMsg( 'export-pagelinks' ), 'pagelink-depth', 'pagelink-depth', 20, 0 ) . '<br />'; 00224 } 00225 // Enable this when we can do something useful exporting/importing image information. :) 00226 //$form .= Xml::checkLabel( wfMsg( 'export-images' ), 'images', 'wpExportImages', false ) . '<br />'; 00227 $form .= Xml::checkLabel( 00228 wfMsg( 'export-download' ), 00229 'wpDownload', 00230 'wpDownload', 00231 $request->wasPosted() ? $request->getCheck( 'wpDownload' ) : true 00232 ) . '<br />'; 00233 00234 if ( $wgExportAllowListContributors ) { 00235 $form .= Xml::checkLabel( 00236 wfMsg( 'exportlistauthors' ), 00237 'listauthors', 00238 'listauthors', 00239 $request->wasPosted() ? $request->getCheck( 'listauthors' ) : false 00240 ) . '<br />'; 00241 } 00242 00243 $form .= Xml::submitButton( wfMsg( 'export-submit' ), Linker::tooltipAndAccesskeyAttribs( 'export' ) ); 00244 $form .= Xml::closeElement( 'form' ); 00245 00246 $out->addHTML( $form ); 00247 } 00248 00252 private function userCanOverrideExportDepth() { 00253 return $this->getUser()->isAllowed( 'override-export-depth' ); 00254 } 00255 00265 private function doExport( $page, $history, $list_authors, $exportall ) { 00266 00267 // If we are grabbing everything, enable full history and ignore the rest 00268 if ( $exportall ) { 00269 $history = WikiExporter::FULL; 00270 } else { 00271 00272 $pageSet = array(); // Inverted index of all pages to look up 00273 00274 // Split up and normalize input 00275 foreach( explode( "\n", $page ) as $pageName ) { 00276 $pageName = trim( $pageName ); 00277 $title = Title::newFromText( $pageName ); 00278 if( $title && $title->getInterwiki() == '' && $title->getText() !== '' ) { 00279 // Only record each page once! 00280 $pageSet[$title->getPrefixedText()] = true; 00281 } 00282 } 00283 00284 // Set of original pages to pass on to further manipulation... 00285 $inputPages = array_keys( $pageSet ); 00286 00287 // Look up any linked pages if asked... 00288 if( $this->templates ) { 00289 $pageSet = $this->getTemplates( $inputPages, $pageSet ); 00290 } 00291 $linkDepth = $this->pageLinkDepth; 00292 if( $linkDepth ) { 00293 $pageSet = $this->getPageLinks( $inputPages, $pageSet, $linkDepth ); 00294 } 00295 00296 /* 00297 // Enable this when we can do something useful exporting/importing image information. :) 00298 if( $this->images ) ) { 00299 $pageSet = $this->getImages( $inputPages, $pageSet ); 00300 } 00301 */ 00302 00303 $pages = array_keys( $pageSet ); 00304 00305 // Normalize titles to the same format and remove dupes, see bug 17374 00306 foreach( $pages as $k => $v ) { 00307 $pages[$k] = str_replace( " ", "_", $v ); 00308 } 00309 00310 $pages = array_unique( $pages ); 00311 } 00312 00313 /* Ok, let's get to it... */ 00314 if( $history == WikiExporter::CURRENT ) { 00315 $lb = false; 00316 $db = wfGetDB( DB_SLAVE ); 00317 $buffer = WikiExporter::BUFFER; 00318 } else { 00319 // Use an unbuffered query; histories may be very long! 00320 $lb = wfGetLBFactory()->newMainLB(); 00321 $db = $lb->getConnection( DB_SLAVE ); 00322 $buffer = WikiExporter::STREAM; 00323 00324 // This might take a while... :D 00325 wfSuppressWarnings(); 00326 set_time_limit(0); 00327 wfRestoreWarnings(); 00328 } 00329 00330 $exporter = new WikiExporter( $db, $history, $buffer ); 00331 $exporter->list_authors = $list_authors; 00332 $exporter->openStream(); 00333 00334 if ( $exportall ) { 00335 $exporter->allPages(); 00336 } else { 00337 foreach( $pages as $page ) { 00338 /* 00339 if( $wgExportMaxHistory && !$this->curonly ) { 00340 $title = Title::newFromText( $page ); 00341 if( $title ) { 00342 $count = Revision::countByTitle( $db, $title ); 00343 if( $count > $wgExportMaxHistory ) { 00344 wfDebug( __FUNCTION__ . 00345 ": Skipped $page, $count revisions too big\n" ); 00346 continue; 00347 } 00348 } 00349 }*/ 00350 #Bug 8824: Only export pages the user can read 00351 $title = Title::newFromText( $page ); 00352 if( is_null( $title ) ) { 00353 continue; #TODO: perhaps output an <error> tag or something. 00354 } 00355 if( !$title->userCan( 'read', $this->getUser() ) ) { 00356 continue; #TODO: perhaps output an <error> tag or something. 00357 } 00358 00359 $exporter->pageByTitle( $title ); 00360 } 00361 } 00362 00363 $exporter->closeStream(); 00364 00365 if( $lb ) { 00366 $lb->closeAll(); 00367 } 00368 } 00369 00374 private function getPagesFromCategory( $title ) { 00375 global $wgContLang; 00376 00377 $name = $title->getDBkey(); 00378 00379 $dbr = wfGetDB( DB_SLAVE ); 00380 $res = $dbr->select( 00381 array( 'page', 'categorylinks' ), 00382 array( 'page_namespace', 'page_title' ), 00383 array( 'cl_from=page_id', 'cl_to' => $name ), 00384 __METHOD__, 00385 array( 'LIMIT' => '5000' ) 00386 ); 00387 00388 $pages = array(); 00389 00390 foreach ( $res as $row ) { 00391 $n = $row->page_title; 00392 if ($row->page_namespace) { 00393 $ns = $wgContLang->getNsText( $row->page_namespace ); 00394 $n = $ns . ':' . $n; 00395 } 00396 00397 $pages[] = $n; 00398 } 00399 return $pages; 00400 } 00401 00406 private function getPagesFromNamespace( $nsindex ) { 00407 global $wgContLang; 00408 00409 $dbr = wfGetDB( DB_SLAVE ); 00410 $res = $dbr->select( 00411 'page', 00412 array( 'page_namespace', 'page_title' ), 00413 array( 'page_namespace' => $nsindex ), 00414 __METHOD__, 00415 array( 'LIMIT' => '5000' ) 00416 ); 00417 00418 $pages = array(); 00419 00420 foreach ( $res as $row ) { 00421 $n = $row->page_title; 00422 00423 if ( $row->page_namespace ) { 00424 $ns = $wgContLang->getNsText( $row->page_namespace ); 00425 $n = $ns . ':' . $n; 00426 } 00427 00428 $pages[] = $n; 00429 } 00430 return $pages; 00431 } 00432 00439 private function getTemplates( $inputPages, $pageSet ) { 00440 return $this->getLinks( $inputPages, $pageSet, 00441 'templatelinks', 00442 array( 'tl_namespace AS namespace', 'tl_title AS title' ), 00443 array( 'page_id=tl_from' ) 00444 ); 00445 } 00446 00452 private function validateLinkDepth( $depth ) { 00453 global $wgExportMaxLinkDepth; 00454 00455 if( $depth < 0 ) { 00456 return 0; 00457 } 00458 00459 if ( !$this->userCanOverrideExportDepth() ) { 00460 if( $depth > $wgExportMaxLinkDepth ) { 00461 return $wgExportMaxLinkDepth; 00462 } 00463 } 00464 00465 /* 00466 * There's a HARD CODED limit of 5 levels of recursion here to prevent a 00467 * crazy-big export from being done by someone setting the depth 00468 * number too high. In other words, last resort safety net. 00469 */ 00470 return intval( min( $depth, 5 ) ); 00471 } 00472 00480 private function getPageLinks( $inputPages, $pageSet, $depth ) { 00481 for( ; $depth > 0; --$depth ) { 00482 $pageSet = $this->getLinks( 00483 $inputPages, $pageSet, 'pagelinks', 00484 array( 'pl_namespace AS namespace', 'pl_title AS title' ), 00485 array( 'page_id=pl_from' ) 00486 ); 00487 $inputPages = array_keys( $pageSet ); 00488 } 00489 00490 return $pageSet; 00491 } 00492 00501 private function getImages( $inputPages, $pageSet ) { 00502 return $this->getLinks( 00503 $inputPages, 00504 $pageSet, 00505 'imagelinks', 00506 array( NS_FILE . ' AS namespace', 'il_to AS title' ), 00507 array( 'page_id=il_from' ) 00508 ); 00509 } 00510 00514 private function getLinks( $inputPages, $pageSet, $table, $fields, $join ) { 00515 $dbr = wfGetDB( DB_SLAVE ); 00516 00517 foreach( $inputPages as $page ) { 00518 $title = Title::newFromText( $page ); 00519 00520 if( $title ) { 00521 $pageSet[$title->getPrefixedText()] = true; 00524 $result = $dbr->select( 00525 array( 'page', $table ), 00526 $fields, 00527 array_merge( 00528 $join, 00529 array( 00530 'page_namespace' => $title->getNamespace(), 00531 'page_title' => $title->getDBkey() 00532 ) 00533 ), 00534 __METHOD__ 00535 ); 00536 00537 foreach( $result as $row ) { 00538 $template = Title::makeTitle( $row->namespace, $row->title ); 00539 $pageSet[$template->getPrefixedText()] = true; 00540 } 00541 } 00542 } 00543 00544 return $pageSet; 00545 } 00546 00547 }