MediaWiki  REL1_19
userDupes.inc
Go to the documentation of this file.
00001 <?php
00031 class UserDupes {
00032         var $db;
00033         var $reassigned;
00034         var $trimmed;
00035         var $failed;
00036         private $outputCallback;
00037 
00038         function __construct( &$database, $outputCallback ) {
00039                 $this->db = $database;
00040                 $this->outputCallback = $outputCallback;
00041         }
00042 
00047         private function out( $str ) {
00048                 call_user_func( $this->outputCallback, $str );
00049         }
00050 
00056         function hasUniqueIndex() {
00057                 $info = $this->db->indexInfo( 'user', 'user_name', __METHOD__ );
00058                 if ( !$info ) {
00059                         $this->out( "WARNING: doesn't seem to have user_name index at all!\n" );
00060                         return false;
00061                 }
00062 
00063                 # Confusingly, 'Non_unique' is 0 for *unique* indexes,
00064                 # and 1 for *non-unique* indexes. Pass the crack, MySQL,
00065                 # it's obviously some good stuff!
00066                 return ( $info[0]->Non_unique == 0 );
00067         }
00068 
00080         function clearDupes() {
00081                 return $this->checkDupes( true );
00082         }
00083 
00098         function checkDupes( $doDelete = false ) {
00099                 if ( $this->hasUniqueIndex() ) {
00100                         echo wfWikiID() . " already has a unique index on its user table.\n";
00101                         return true;
00102                 }
00103 
00104                 $this->lock();
00105 
00106                 $this->out( "Checking for duplicate accounts...\n" );
00107                 $dupes = $this->getDupes();
00108                 $count = count( $dupes );
00109 
00110                 $this->out( "Found $count accounts with duplicate records on " . wfWikiID() . ".\n" );
00111                 $this->trimmed    = 0;
00112                 $this->reassigned = 0;
00113                 $this->failed     = 0;
00114                 foreach ( $dupes as $name ) {
00115                         $this->examine( $name, $doDelete );
00116                 }
00117 
00118                 $this->unlock();
00119 
00120                 $this->out( "\n" );
00121 
00122                 if ( $this->reassigned > 0 ) {
00123                         if ( $doDelete ) {
00124                                 $this->out( "$this->reassigned duplicate accounts had edits reassigned to a canonical record id.\n" );
00125                         } else {
00126                                 $this->out( "$this->reassigned duplicate accounts need to have edits reassigned.\n" );
00127                         }
00128                 }
00129 
00130                 if ( $this->trimmed > 0 ) {
00131                         if ( $doDelete ) {
00132                                 $this->out( "$this->trimmed duplicate user records were deleted from " . wfWikiID() . ".\n" );
00133                         } else {
00134                                 $this->out( "$this->trimmed duplicate user accounts were found on " . wfWikiID() . " which can be removed safely.\n" );
00135                         }
00136                 }
00137 
00138                 if ( $this->failed > 0 ) {
00139                         $this->out( "Something terribly awry; $this->failed duplicate accounts were not removed.\n" );
00140                         return false;
00141                 }
00142 
00143                 if ( $this->trimmed == 0 || $doDelete ) {
00144                         $this->out( "It is now safe to apply the unique index on user_name.\n" );
00145                         return true;
00146                 } else {
00147                         $this->out( "Run this script again with the --fix option to automatically delete them.\n" );
00148                         return false;
00149                 }
00150         }
00151 
00156         function lock() {
00157                 if ( $this->newSchema() ) {
00158                         $set = array( 'user', 'revision' );
00159                 } else {
00160                         $set = array( 'user', 'cur', 'old' );
00161                 }
00162                 $names = array_map( array( $this, 'lockTable' ), $set );
00163                 $tables = implode( ',', $names );
00164 
00165                 $this->db->query( "LOCK TABLES $tables", __METHOD__ );
00166         }
00167 
00168         function lockTable( $table ) {
00169                 return $this->db->tableName( $table ) . ' WRITE';
00170         }
00171 
00176         function newSchema() {
00177                 return MWInit::classExists( 'Revision' );
00178         }
00179 
00183         function unlock() {
00184                 $this->db->query( "UNLOCK TABLES", __METHOD__ );
00185         }
00186 
00192         function getDupes() {
00193                 $user = $this->db->tableName( 'user' );
00194                 $result = $this->db->query(
00195                          "SELECT user_name,COUNT(*) AS n
00196                                 FROM $user
00197                         GROUP BY user_name
00198                           HAVING n > 1", __METHOD__ );
00199 
00200                 $list = array();
00201                 foreach ( $result as $row ) {
00202                         $list[] = $row->user_name;
00203                 }
00204                 return $list;
00205         }
00206 
00215         function examine( $name, $doDelete ) {
00216                 $result = $this->db->select( 'user',
00217                         array( 'user_id' ),
00218                         array( 'user_name' => $name ),
00219                         __METHOD__ );
00220 
00221                 $firstRow = $this->db->fetchObject( $result );
00222                 $firstId  = $firstRow->user_id;
00223                 $this->out( "Record that will be used for '$name' is user_id=$firstId\n" );
00224 
00225                 foreach ( $result as $row ) {
00226                         $dupeId = $row->user_id;
00227                         $this->out( "... dupe id $dupeId: " );
00228                         $edits = $this->editCount( $dupeId );
00229                         if ( $edits > 0 ) {
00230                                 $this->reassigned++;
00231                                 $this->out( "has $edits edits! " );
00232                                 if ( $doDelete ) {
00233                                         $this->reassignEdits( $dupeId, $firstId );
00234                                         $newEdits = $this->editCount( $dupeId );
00235                                         if ( $newEdits == 0 ) {
00236                                                 $this->out( "confirmed cleaned. " );
00237                                         } else {
00238                                                 $this->failed++;
00239                                                 $this->out( "WARNING! $newEdits remaining edits for $dupeId; NOT deleting user.\n" );
00240                                                 continue;
00241                                         }
00242                                 } else {
00243                                         $this->out( "(will need to reassign edits on fix)" );
00244                                 }
00245                         } else {
00246                                 $this->out( "ok, no edits. " );
00247                         }
00248                         $this->trimmed++;
00249                         if ( $doDelete ) {
00250                                 $this->trimAccount( $dupeId );
00251                         }
00252                         $this->out( "\n" );
00253                 }
00254         }
00255 
00264         function editCount( $userid ) {
00265                 if ( $this->newSchema() ) {
00266                         return $this->editCountOn( 'revision', 'rev_user', $userid );
00267                 } else {
00268                         return $this->editCountOn( 'cur', 'cur_user', $userid ) +
00269                                 $this->editCountOn( 'old', 'old_user', $userid );
00270                 }
00271         }
00272 
00281         function editCountOn( $table, $field, $userid ) {
00282                 return intval( $this->db->selectField(
00283                         $table,
00284                         'COUNT(*)',
00285                         array( $field => $userid ),
00286                         __METHOD__ ) );
00287         }
00288 
00294         function reassignEdits( $from, $to ) {
00295                 $set = $this->newSchema()
00296                         ? array( 'revision' => 'rev_user' )
00297                         : array( 'cur' => 'cur_user', 'old' => 'old_user' );
00298                 foreach ( $set as $table => $field ) {
00299                         $this->reassignEditsOn( $table, $field, $from, $to );
00300                 }
00301         }
00302 
00310         function reassignEditsOn( $table, $field, $from, $to ) {
00311                 $this->out( "reassigning on $table... " );
00312                 $this->db->update( $table,
00313                         array( $field => $to ),
00314                         array( $field => $from ),
00315                         __METHOD__ );
00316                 $this->out( "ok. " );
00317         }
00318 
00324         function trimAccount( $userid ) {
00325                 $this->out( "deleting..." );
00326                 $this->db->delete( 'user', array( 'user_id' => $userid ), __METHOD__ );
00327                 $this->out( " ok" );
00328         }
00329 
00330 }