MediaWiki  REL1_19
updateCollation.php
Go to the documentation of this file.
00001 <?php
00027 #$optionsWithArgs = array( 'begin', 'max-slave-lag' );
00028 
00029 require_once( dirname( __FILE__ ) . '/Maintenance.php' );
00030 
00031 class UpdateCollation extends Maintenance {
00032         const BATCH_SIZE = 50; // Number of rows to process in one batch
00033         const SYNC_INTERVAL = 20; // Wait for slaves after this many batches
00034 
00035         public function __construct() {
00036                 parent::__construct();
00037 
00038                 global $wgCategoryCollation;
00039                 $this->mDescription = <<<TEXT
00040 This script will find all rows in the categorylinks table whose collation is
00041 out-of-date (cl_collation != '$wgCategoryCollation') and repopulate cl_sortkey
00042 using the page title and cl_sortkey_prefix.  If everything's collation is
00043 up-to-date, it will do nothing.
00044 TEXT;
00045 
00046                 $this->addOption( 'force', 'Run on all rows, even if the collation is ' .
00047                         'supposed to be up-to-date.' );
00048                 $this->addOption( 'previous-collation', 'Set the previous value of ' .
00049                         '$wgCategoryCollation here to speed up this script, especially if your ' .
00050                         'categorylinks table is large. This will only update rows with that ' .
00051                         'collation, though, so it may miss out-of-date rows with a different, ' .
00052                         'even older collation.', false, true );
00053         }
00054 
00055         public function execute() {
00056                 global $wgCategoryCollation, $wgMiserMode;
00057 
00058                 $dbw = $this->getDB( DB_MASTER );
00059                 $force = $this->getOption( 'force' );
00060 
00061                 $options = array( 'LIMIT' => self::BATCH_SIZE, 'STRAIGHT_JOIN' );
00062 
00063                 if ( $force ) {
00064                         $options['ORDER BY'] = 'cl_from, cl_to';
00065                         $collationConds = array();
00066                 } else {
00067                         if ( $this->hasOption( 'previous-collation' ) ) {
00068                                 $collationConds['cl_collation'] = $this->getOption( 'previous-collation' );
00069                         } else {
00070                                 $collationConds = array( 0 =>
00071                                         'cl_collation != ' . $dbw->addQuotes( $wgCategoryCollation )
00072                                 );
00073                         }
00074 
00075                         if ( !$wgMiserMode ) {
00076                                 $count = $dbw->selectField(
00077                                         'categorylinks',
00078                                         'COUNT(*)',
00079                                         $collationConds,
00080                                         __METHOD__
00081                                 );
00082 
00083                                 if ( $count == 0 ) {
00084                                         $this->output( "Collations up-to-date.\n" );
00085                                         return;
00086                                 }
00087                                 $this->output( "Fixing collation for $count rows.\n" );
00088                         }
00089                 }
00090 
00091                 $count = 0;
00092                 $batchCount = 0;
00093                 $batchConds = array();
00094                 do {
00095                         $this->output( "Selecting next " . self::BATCH_SIZE . " rows..." );
00096                         $res = $dbw->select(
00097                                 array( 'categorylinks', 'page' ),
00098                                 array( 'cl_from', 'cl_to', 'cl_sortkey_prefix', 'cl_collation',
00099                                         'cl_sortkey', 'page_namespace', 'page_title'
00100                                 ),
00101                                 array_merge( $collationConds, $batchConds, array( 'cl_from = page_id' ) ),
00102                                 __METHOD__,
00103                                 $options
00104                         );
00105                         $this->output( " processing..." );
00106 
00107                         $dbw->begin();
00108                         foreach ( $res as $row ) {
00109                                 $title = Title::newFromRow( $row );
00110                                 if ( !$row->cl_collation ) {
00111                                         # This is an old-style row, so the sortkey needs to be
00112                                         # converted.
00113                                         if ( $row->cl_sortkey == $title->getText()
00114                                                 || $row->cl_sortkey == $title->getPrefixedText() ) {
00115                                                 $prefix = '';
00116                                         } else {
00117                                                 # Custom sortkey, use it as a prefix
00118                                                 $prefix = $row->cl_sortkey;
00119                                         }
00120                                 } else {
00121                                         $prefix = $row->cl_sortkey_prefix;
00122                                 }
00123                                 # cl_type will be wrong for lots of pages if cl_collation is 0,
00124                                 # so let's update it while we're here.
00125                                 if ( $title->getNamespace() == NS_CATEGORY ) {
00126                                         $type = 'subcat';
00127                                 } elseif ( $title->getNamespace() == NS_FILE ) {
00128                                         $type = 'file';
00129                                 } else {
00130                                         $type = 'page';
00131                                 }
00132                                 $dbw->update(
00133                                         'categorylinks',
00134                                         array(
00135                                                 'cl_sortkey' => Collation::singleton()->getSortKey(
00136                                                         $title->getCategorySortkey( $prefix ) ),
00137                                                 'cl_sortkey_prefix' => $prefix,
00138                                                 'cl_collation' => $wgCategoryCollation,
00139                                                 'cl_type' => $type,
00140                                                 'cl_timestamp = cl_timestamp',
00141                                         ),
00142                                         array( 'cl_from' => $row->cl_from, 'cl_to' => $row->cl_to ),
00143                                         __METHOD__
00144                                 );
00145                         }
00146                         $dbw->commit();
00147 
00148                         if ( $force && $row ) {
00149                                 $encFrom = $dbw->addQuotes( $row->cl_from );
00150                                 $encTo = $dbw->addQuotes( $row->cl_to );
00151                                 $batchConds = array(
00152                                         "(cl_from = $encFrom AND cl_to > $encTo) " .
00153                                         " OR cl_from > $encFrom" );
00154                         }
00155 
00156                         $count += $res->numRows();
00157                         $this->output( "$count done.\n" );
00158 
00159                         if ( ++$batchCount % self::SYNC_INTERVAL == 0 ) {
00160                                 $this->output( "Waiting for slaves ... " );
00161                                 wfWaitForSlaves();
00162                                 $this->output( "done\n" );
00163                         }
00164                 } while ( $res->numRows() == self::BATCH_SIZE );
00165         }
00166 }
00167 
00168 $maintClass = "UpdateCollation";
00169 require_once( RUN_MAINTENANCE_IF_MAIN );