MediaWiki
REL1_19
|
00001 <?php 00029 require_once( dirname( __FILE__ ) . '/Maintenance.php' ); 00030 00031 class GenerateSitemap extends Maintenance { 00032 const GS_MAIN = -2; 00033 const GS_TALK = -1; 00034 00042 var $url_limit; 00043 00051 var $size_limit; 00052 00058 var $fspath; 00059 00065 var $urlpath; 00066 00072 var $compress; 00073 00079 var $limit = array(); 00080 00086 var $priorities = array(); 00087 00093 var $namespaces = array(); 00094 00100 var $timestamp; 00101 00107 var $dbr; 00108 00114 var $findex; 00115 00116 00122 var $file; 00123 00129 private $identifier; 00130 00134 public function __construct() { 00135 parent::__construct(); 00136 $this->mDescription = "Creates a sitemap for the site"; 00137 $this->addOption( 'fspath', 'The file system path to save to, e.g. /tmp/sitemap; defaults to current directory', false, true ); 00138 $this->addOption( 'urlpath', 'The URL path corresponding to --fspath, prepended to filenames in the index; defaults to an empty string', false, true ); 00139 $this->addOption( 'compress', 'Compress the sitemap files, can take value yes|no, default yes', false, true ); 00140 $this->addOption( 'identifier', 'What site identifier to use for the wiki, defaults to $wgDBname', false, true ); 00141 } 00142 00146 public function execute() { 00147 $this->setNamespacePriorities(); 00148 $this->url_limit = 50000; 00149 $this->size_limit = pow( 2, 20 ) * 10; 00150 $this->fspath = self::init_path( $this->getOption( 'fspath', getcwd() ) ); 00151 $this->urlpath = $this->getOption( 'urlpath', "" ); 00152 if ( $this->urlpath !== "" && substr( $this->urlpath, -1 ) !== '/' ) { 00153 $this->urlpath .= '/'; 00154 } 00155 $this->identifier = $this->getOption( 'identifier', wfWikiID() ); 00156 $this->compress = $this->getOption( 'compress', 'yes' ) !== 'no'; 00157 $this->dbr = wfGetDB( DB_SLAVE ); 00158 $this->generateNamespaces(); 00159 $this->timestamp = wfTimestamp( TS_ISO_8601, wfTimestampNow() ); 00160 $this->findex = fopen( "{$this->fspath}sitemap-index-{$this->identifier}.xml", 'wb' ); 00161 $this->main(); 00162 } 00163 00164 private function setNamespacePriorities() { 00165 global $wgSitemapNamespacesPriorities; 00166 00167 // Custom main namespaces 00168 $this->priorities[self::GS_MAIN] = '0.5'; 00169 // Custom talk namesspaces 00170 $this->priorities[self::GS_TALK] = '0.1'; 00171 // MediaWiki standard namespaces 00172 $this->priorities[NS_MAIN] = '1.0'; 00173 $this->priorities[NS_TALK] = '0.1'; 00174 $this->priorities[NS_USER] = '0.5'; 00175 $this->priorities[NS_USER_TALK] = '0.1'; 00176 $this->priorities[NS_PROJECT] = '0.5'; 00177 $this->priorities[NS_PROJECT_TALK] = '0.1'; 00178 $this->priorities[NS_FILE] = '0.5'; 00179 $this->priorities[NS_FILE_TALK] = '0.1'; 00180 $this->priorities[NS_MEDIAWIKI] = '0.0'; 00181 $this->priorities[NS_MEDIAWIKI_TALK] = '0.1'; 00182 $this->priorities[NS_TEMPLATE] = '0.0'; 00183 $this->priorities[NS_TEMPLATE_TALK] = '0.1'; 00184 $this->priorities[NS_HELP] = '0.5'; 00185 $this->priorities[NS_HELP_TALK] = '0.1'; 00186 $this->priorities[NS_CATEGORY] = '0.5'; 00187 $this->priorities[NS_CATEGORY_TALK] = '0.1'; 00188 00189 // Custom priorities 00190 if ( $wgSitemapNamespacesPriorities !== false ) { 00194 foreach ( $wgSitemapNamespacesPriorities as $namespace => $priority ) { 00195 $float = floatval( $priority ); 00196 if ( $float > 1.0 ) { 00197 $priority = '1.0'; 00198 } elseif ( $float < 0.0 ) { 00199 $priority = '0.0'; 00200 } 00201 $this->priorities[$namespace] = $priority; 00202 } 00203 } 00204 } 00205 00211 private static function init_path( $fspath ) { 00212 if ( !isset( $fspath ) ) { 00213 return null; 00214 } 00215 # Create directory if needed 00216 if ( $fspath && !is_dir( $fspath ) ) { 00217 wfMkdirParents( $fspath, null, __METHOD__ ) or die( "Can not create directory $fspath.\n" ); 00218 } 00219 00220 return realpath( $fspath ) . DIRECTORY_SEPARATOR ; 00221 } 00222 00226 function generateNamespaces() { 00227 // Only generate for specific namespaces if $wgSitemapNamespaces is an array. 00228 global $wgSitemapNamespaces; 00229 if ( is_array( $wgSitemapNamespaces ) ) { 00230 $this->namespaces = $wgSitemapNamespaces; 00231 return; 00232 } 00233 00234 $res = $this->dbr->select( 'page', 00235 array( 'page_namespace' ), 00236 array(), 00237 __METHOD__, 00238 array( 00239 'GROUP BY' => 'page_namespace', 00240 'ORDER BY' => 'page_namespace', 00241 ) 00242 ); 00243 00244 foreach ( $res as $row ) 00245 $this->namespaces[] = $row->page_namespace; 00246 } 00247 00254 function priority( $namespace ) { 00255 return isset( $this->priorities[$namespace] ) ? $this->priorities[$namespace] : $this->guessPriority( $namespace ); 00256 } 00257 00266 function guessPriority( $namespace ) { 00267 return MWNamespace::isMain( $namespace ) ? $this->priorities[self::GS_MAIN] : $this->priorities[self::GS_TALK]; 00268 } 00269 00276 function getPageRes( $namespace ) { 00277 return $this->dbr->select( 'page', 00278 array( 00279 'page_namespace', 00280 'page_title', 00281 'page_touched', 00282 ), 00283 array( 'page_namespace' => $namespace ), 00284 __METHOD__ 00285 ); 00286 } 00287 00291 public function main() { 00292 global $wgContLang; 00293 00294 fwrite( $this->findex, $this->openIndex() ); 00295 00296 foreach ( $this->namespaces as $namespace ) { 00297 $res = $this->getPageRes( $namespace ); 00298 $this->file = false; 00299 $this->generateLimit( $namespace ); 00300 $length = $this->limit[0]; 00301 $i = $smcount = 0; 00302 00303 $fns = $wgContLang->getFormattedNsText( $namespace ); 00304 $this->output( "$namespace ($fns)\n" ); 00305 foreach ( $res as $row ) { 00306 if ( $i++ === 0 || $i === $this->url_limit + 1 || $length + $this->limit[1] + $this->limit[2] > $this->size_limit ) { 00307 if ( $this->file !== false ) { 00308 $this->write( $this->file, $this->closeFile() ); 00309 $this->close( $this->file ); 00310 } 00311 $filename = $this->sitemapFilename( $namespace, $smcount++ ); 00312 $this->file = $this->open( $this->fspath . $filename, 'wb' ); 00313 $this->write( $this->file, $this->openFile() ); 00314 fwrite( $this->findex, $this->indexEntry( $filename ) ); 00315 $this->output( "\t$this->fspath$filename\n" ); 00316 $length = $this->limit[0]; 00317 $i = 1; 00318 } 00319 $title = Title::makeTitle( $row->page_namespace, $row->page_title ); 00320 $date = wfTimestamp( TS_ISO_8601, $row->page_touched ); 00321 $entry = $this->fileEntry( $title->getCanonicalURL(), $date, $this->priority( $namespace ) ); 00322 $length += strlen( $entry ); 00323 $this->write( $this->file, $entry ); 00324 // generate pages for language variants 00325 if ( $wgContLang->hasVariants() ) { 00326 $variants = $wgContLang->getVariants(); 00327 foreach ( $variants as $vCode ) { 00328 if ( $vCode == $wgContLang->getCode() ) continue; // we don't want default variant 00329 $entry = $this->fileEntry( $title->getCanonicalURL( '', $vCode ), $date, $this->priority( $namespace ) ); 00330 $length += strlen( $entry ); 00331 $this->write( $this->file, $entry ); 00332 } 00333 } 00334 } 00335 if ( $this->file ) { 00336 $this->write( $this->file, $this->closeFile() ); 00337 $this->close( $this->file ); 00338 } 00339 } 00340 fwrite( $this->findex, $this->closeIndex() ); 00341 fclose( $this->findex ); 00342 } 00343 00349 function open( $file, $flags ) { 00350 $resource = $this->compress ? gzopen( $file, $flags ) : fopen( $file, $flags ); 00351 if( $resource === false ) { 00352 wfDebugDieBacktrace( __METHOD__ . " error opening file $file with flags $flags. Check permissions?" ); 00353 } 00354 return $resource; 00355 } 00356 00360 function write( &$handle, $str ) { 00361 if( $handle === true || $handle === false ) { 00362 wfDebugDieBacktrace( __METHOD__ . " was passed a boolean as a file handle.\n" ); 00363 } 00364 if ( $this->compress ) 00365 gzwrite( $handle, $str ); 00366 else 00367 fwrite( $handle, $str ); 00368 } 00369 00373 function close( &$handle ) { 00374 if ( $this->compress ) 00375 gzclose( $handle ); 00376 else 00377 fclose( $handle ); 00378 } 00379 00387 function sitemapFilename( $namespace, $count ) { 00388 $ext = $this->compress ? '.gz' : ''; 00389 return "sitemap-{$this->identifier}-NS_$namespace-$count.xml$ext"; 00390 } 00391 00397 function xmlHead() { 00398 return '<?xml version="1.0" encoding="UTF-8"?>' . "\n"; 00399 } 00400 00406 function xmlSchema() { 00407 return 'http://www.sitemaps.org/schemas/sitemap/0.9'; 00408 } 00409 00415 function openIndex() { 00416 return $this->xmlHead() . '<sitemapindex xmlns="' . $this->xmlSchema() . '">' . "\n"; 00417 } 00418 00425 function indexEntry( $filename ) { 00426 return 00427 "\t<sitemap>\n" . 00428 "\t\t<loc>{$this->urlpath}$filename</loc>\n" . 00429 "\t\t<lastmod>{$this->timestamp}</lastmod>\n" . 00430 "\t</sitemap>\n"; 00431 } 00432 00438 function closeIndex() { 00439 return "</sitemapindex>\n"; 00440 } 00441 00447 function openFile() { 00448 return $this->xmlHead() . '<urlset xmlns="' . $this->xmlSchema() . '">' . "\n"; 00449 } 00450 00459 function fileEntry( $url, $date, $priority ) { 00460 return 00461 "\t<url>\n" . 00462 "\t\t<loc>$url</loc>\n" . 00463 "\t\t<lastmod>$date</lastmod>\n" . 00464 "\t\t<priority>$priority</priority>\n" . 00465 "\t</url>\n"; 00466 } 00467 00473 function closeFile() { 00474 return "</urlset>\n"; 00475 } 00476 00480 function generateLimit( $namespace ) { 00481 // bug 17961: make a title with the longest possible URL in this namespace 00482 $title = Title::makeTitle( $namespace, str_repeat( "\xf0\xa8\xae\x81", 63 ) . "\xe5\x96\x83" ); 00483 00484 $this->limit = array( 00485 strlen( $this->openFile() ), 00486 strlen( $this->fileEntry( $title->getCanonicalURL(), wfTimestamp( TS_ISO_8601, wfTimestamp() ), $this->priority( $namespace ) ) ), 00487 strlen( $this->closeFile() ) 00488 ); 00489 } 00490 } 00491 00492 $maintClass = "GenerateSitemap"; 00493 require_once( RUN_MAINTENANCE_IF_MAIN );