MediaWiki
REL1_20
|
00001 <?php 00029 require_once( __DIR__ . '/Maintenance.php' ); 00030 00036 class GenerateSitemap extends Maintenance { 00037 const GS_MAIN = -2; 00038 const GS_TALK = -1; 00039 00047 var $url_limit; 00048 00056 var $size_limit; 00057 00063 var $fspath; 00064 00070 var $urlpath; 00071 00077 var $compress; 00078 00084 var $skipRedirects; 00085 00091 var $limit = array(); 00092 00098 var $priorities = array(); 00099 00105 var $namespaces = array(); 00106 00112 var $timestamp; 00113 00119 var $dbr; 00120 00126 var $findex; 00127 00128 00134 var $file; 00135 00141 private $identifier; 00142 00146 public function __construct() { 00147 parent::__construct(); 00148 $this->mDescription = "Creates a sitemap for the site"; 00149 $this->addOption( 'fspath', 'The file system path to save to, e.g. /tmp/sitemap; defaults to current directory', false, true ); 00150 $this->addOption( 'urlpath', 'The URL path corresponding to --fspath, prepended to filenames in the index; defaults to an empty string', false, true ); 00151 $this->addOption( 'compress', 'Compress the sitemap files, can take value yes|no, default yes', false, true ); 00152 $this->addOption( 'skip-redirects', 'Do not include redirecting articles in the sitemap' ); 00153 $this->addOption( 'identifier', 'What site identifier to use for the wiki, defaults to $wgDBname', false, true ); 00154 } 00155 00159 public function execute() { 00160 $this->setNamespacePriorities(); 00161 $this->url_limit = 50000; 00162 $this->size_limit = pow( 2, 20 ) * 10; 00163 $this->fspath = self::init_path( $this->getOption( 'fspath', getcwd() ) ); 00164 $this->urlpath = $this->getOption( 'urlpath', "" ); 00165 if ( $this->urlpath !== "" && substr( $this->urlpath, -1 ) !== '/' ) { 00166 $this->urlpath .= '/'; 00167 } 00168 $this->identifier = $this->getOption( 'identifier', wfWikiID() ); 00169 $this->compress = $this->getOption( 'compress', 'yes' ) !== 'no'; 00170 $this->skipRedirects = $this->getOption( 'skip-redirects', false ) !== false ; 00171 $this->dbr = wfGetDB( DB_SLAVE ); 00172 $this->generateNamespaces(); 00173 $this->timestamp = wfTimestamp( TS_ISO_8601, wfTimestampNow() ); 00174 $this->findex = fopen( "{$this->fspath}sitemap-index-{$this->identifier}.xml", 'wb' ); 00175 $this->main(); 00176 } 00177 00178 private function setNamespacePriorities() { 00179 global $wgSitemapNamespacesPriorities; 00180 00181 // Custom main namespaces 00182 $this->priorities[self::GS_MAIN] = '0.5'; 00183 // Custom talk namesspaces 00184 $this->priorities[self::GS_TALK] = '0.1'; 00185 // MediaWiki standard namespaces 00186 $this->priorities[NS_MAIN] = '1.0'; 00187 $this->priorities[NS_TALK] = '0.1'; 00188 $this->priorities[NS_USER] = '0.5'; 00189 $this->priorities[NS_USER_TALK] = '0.1'; 00190 $this->priorities[NS_PROJECT] = '0.5'; 00191 $this->priorities[NS_PROJECT_TALK] = '0.1'; 00192 $this->priorities[NS_FILE] = '0.5'; 00193 $this->priorities[NS_FILE_TALK] = '0.1'; 00194 $this->priorities[NS_MEDIAWIKI] = '0.0'; 00195 $this->priorities[NS_MEDIAWIKI_TALK] = '0.1'; 00196 $this->priorities[NS_TEMPLATE] = '0.0'; 00197 $this->priorities[NS_TEMPLATE_TALK] = '0.1'; 00198 $this->priorities[NS_HELP] = '0.5'; 00199 $this->priorities[NS_HELP_TALK] = '0.1'; 00200 $this->priorities[NS_CATEGORY] = '0.5'; 00201 $this->priorities[NS_CATEGORY_TALK] = '0.1'; 00202 00203 // Custom priorities 00204 if ( $wgSitemapNamespacesPriorities !== false ) { 00208 foreach ( $wgSitemapNamespacesPriorities as $namespace => $priority ) { 00209 $float = floatval( $priority ); 00210 if ( $float > 1.0 ) { 00211 $priority = '1.0'; 00212 } elseif ( $float < 0.0 ) { 00213 $priority = '0.0'; 00214 } 00215 $this->priorities[$namespace] = $priority; 00216 } 00217 } 00218 } 00219 00225 private static function init_path( $fspath ) { 00226 if ( !isset( $fspath ) ) { 00227 return null; 00228 } 00229 # Create directory if needed 00230 if ( $fspath && !is_dir( $fspath ) ) { 00231 wfMkdirParents( $fspath, null, __METHOD__ ) or die( "Can not create directory $fspath.\n" ); 00232 } 00233 00234 return realpath( $fspath ) . DIRECTORY_SEPARATOR ; 00235 } 00236 00240 function generateNamespaces() { 00241 // Only generate for specific namespaces if $wgSitemapNamespaces is an array. 00242 global $wgSitemapNamespaces; 00243 if ( is_array( $wgSitemapNamespaces ) ) { 00244 $this->namespaces = $wgSitemapNamespaces; 00245 return; 00246 } 00247 00248 $res = $this->dbr->select( 'page', 00249 array( 'page_namespace' ), 00250 array(), 00251 __METHOD__, 00252 array( 00253 'GROUP BY' => 'page_namespace', 00254 'ORDER BY' => 'page_namespace', 00255 ) 00256 ); 00257 00258 foreach ( $res as $row ) 00259 $this->namespaces[] = $row->page_namespace; 00260 } 00261 00268 function priority( $namespace ) { 00269 return isset( $this->priorities[$namespace] ) ? $this->priorities[$namespace] : $this->guessPriority( $namespace ); 00270 } 00271 00280 function guessPriority( $namespace ) { 00281 return MWNamespace::isSubject( $namespace ) ? $this->priorities[self::GS_MAIN] : $this->priorities[self::GS_TALK]; 00282 } 00283 00290 function getPageRes( $namespace ) { 00291 return $this->dbr->select( 'page', 00292 array( 00293 'page_namespace', 00294 'page_title', 00295 'page_touched', 00296 'page_is_redirect' 00297 ), 00298 array( 'page_namespace' => $namespace ), 00299 __METHOD__ 00300 ); 00301 } 00302 00306 public function main() { 00307 global $wgContLang; 00308 00309 fwrite( $this->findex, $this->openIndex() ); 00310 00311 foreach ( $this->namespaces as $namespace ) { 00312 $res = $this->getPageRes( $namespace ); 00313 $this->file = false; 00314 $this->generateLimit( $namespace ); 00315 $length = $this->limit[0]; 00316 $i = $smcount = 0; 00317 00318 $fns = $wgContLang->getFormattedNsText( $namespace ); 00319 $this->output( "$namespace ($fns)\n" ); 00320 $skippedRedirects = 0; // Number of redirects skipped for that namespace 00321 foreach ( $res as $row ) { 00322 if ($this->skipRedirects && $row->page_is_redirect ) { 00323 $skippedRedirects++; 00324 continue; 00325 } 00326 00327 if ( $i++ === 0 || $i === $this->url_limit + 1 || $length + $this->limit[1] + $this->limit[2] > $this->size_limit ) { 00328 if ( $this->file !== false ) { 00329 $this->write( $this->file, $this->closeFile() ); 00330 $this->close( $this->file ); 00331 } 00332 $filename = $this->sitemapFilename( $namespace, $smcount++ ); 00333 $this->file = $this->open( $this->fspath . $filename, 'wb' ); 00334 $this->write( $this->file, $this->openFile() ); 00335 fwrite( $this->findex, $this->indexEntry( $filename ) ); 00336 $this->output( "\t$this->fspath$filename\n" ); 00337 $length = $this->limit[0]; 00338 $i = 1; 00339 } 00340 $title = Title::makeTitle( $row->page_namespace, $row->page_title ); 00341 $date = wfTimestamp( TS_ISO_8601, $row->page_touched ); 00342 $entry = $this->fileEntry( $title->getCanonicalURL(), $date, $this->priority( $namespace ) ); 00343 $length += strlen( $entry ); 00344 $this->write( $this->file, $entry ); 00345 // generate pages for language variants 00346 if ( $wgContLang->hasVariants() ) { 00347 $variants = $wgContLang->getVariants(); 00348 foreach ( $variants as $vCode ) { 00349 if ( $vCode == $wgContLang->getCode() ) continue; // we don't want default variant 00350 $entry = $this->fileEntry( $title->getCanonicalURL( '', $vCode ), $date, $this->priority( $namespace ) ); 00351 $length += strlen( $entry ); 00352 $this->write( $this->file, $entry ); 00353 } 00354 } 00355 } 00356 00357 if ($this->skipRedirects && $skippedRedirects > 0) { 00358 $this->output( " skipped $skippedRedirects redirect(s)\n" ); 00359 } 00360 00361 if ( $this->file ) { 00362 $this->write( $this->file, $this->closeFile() ); 00363 $this->close( $this->file ); 00364 } 00365 } 00366 fwrite( $this->findex, $this->closeIndex() ); 00367 fclose( $this->findex ); 00368 } 00369 00375 function open( $file, $flags ) { 00376 $resource = $this->compress ? gzopen( $file, $flags ) : fopen( $file, $flags ); 00377 if( $resource === false ) { 00378 wfDebugDieBacktrace( __METHOD__ . " error opening file $file with flags $flags. Check permissions?" ); 00379 } 00380 return $resource; 00381 } 00382 00386 function write( &$handle, $str ) { 00387 if( $handle === true || $handle === false ) { 00388 wfDebugDieBacktrace( __METHOD__ . " was passed a boolean as a file handle.\n" ); 00389 } 00390 if ( $this->compress ) 00391 gzwrite( $handle, $str ); 00392 else 00393 fwrite( $handle, $str ); 00394 } 00395 00399 function close( &$handle ) { 00400 if ( $this->compress ) 00401 gzclose( $handle ); 00402 else 00403 fclose( $handle ); 00404 } 00405 00413 function sitemapFilename( $namespace, $count ) { 00414 $ext = $this->compress ? '.gz' : ''; 00415 return "sitemap-{$this->identifier}-NS_$namespace-$count.xml$ext"; 00416 } 00417 00423 function xmlHead() { 00424 return '<?xml version="1.0" encoding="UTF-8"?>' . "\n"; 00425 } 00426 00432 function xmlSchema() { 00433 return 'http://www.sitemaps.org/schemas/sitemap/0.9'; 00434 } 00435 00441 function openIndex() { 00442 return $this->xmlHead() . '<sitemapindex xmlns="' . $this->xmlSchema() . '">' . "\n"; 00443 } 00444 00451 function indexEntry( $filename ) { 00452 return 00453 "\t<sitemap>\n" . 00454 "\t\t<loc>{$this->urlpath}$filename</loc>\n" . 00455 "\t\t<lastmod>{$this->timestamp}</lastmod>\n" . 00456 "\t</sitemap>\n"; 00457 } 00458 00464 function closeIndex() { 00465 return "</sitemapindex>\n"; 00466 } 00467 00473 function openFile() { 00474 return $this->xmlHead() . '<urlset xmlns="' . $this->xmlSchema() . '">' . "\n"; 00475 } 00476 00485 function fileEntry( $url, $date, $priority ) { 00486 return 00487 "\t<url>\n" . 00488 "\t\t<loc>$url</loc>\n" . 00489 "\t\t<lastmod>$date</lastmod>\n" . 00490 "\t\t<priority>$priority</priority>\n" . 00491 "\t</url>\n"; 00492 } 00493 00499 function closeFile() { 00500 return "</urlset>\n"; 00501 } 00502 00506 function generateLimit( $namespace ) { 00507 // bug 17961: make a title with the longest possible URL in this namespace 00508 $title = Title::makeTitle( $namespace, str_repeat( "\xf0\xa8\xae\x81", 63 ) . "\xe5\x96\x83" ); 00509 00510 $this->limit = array( 00511 strlen( $this->openFile() ), 00512 strlen( $this->fileEntry( $title->getCanonicalURL(), wfTimestamp( TS_ISO_8601, wfTimestamp() ), $this->priority( $namespace ) ) ), 00513 strlen( $this->closeFile() ) 00514 ); 00515 } 00516 } 00517 00518 $maintClass = "GenerateSitemap"; 00519 require_once( RUN_MAINTENANCE_IF_MAIN );