MediaWiki  REL1_22
orphans.php
Go to the documentation of this file.
00001 <?php
00031 require_once __DIR__ . '/Maintenance.php';
00032 
00039 class Orphans extends Maintenance {
00040     public function __construct() {
00041         parent::__construct();
00042         $this->mDescription = "Look for 'orphan' revisions hooked to pages which don't exist\n" .
00043                                 "and 'childless' pages with no revisions\n" .
00044                                 "Then, kill the poor widows and orphans\n" .
00045                                 "Man this is depressing";
00046         $this->addOption( 'fix', 'Actually fix broken entries' );
00047     }
00048 
00049     public function execute() {
00050         global $wgTitle;
00051         $wgTitle = Title::newFromText( 'Orphan revision cleanup script' );
00052         $this->checkOrphans( $this->hasOption( 'fix' ) );
00053         $this->checkSeparation( $this->hasOption( 'fix' ) );
00054         # Does not work yet, do not use
00055         # $this->checkWidows( $this->hasOption( 'fix' ) );
00056     }
00057 
00063     private function lockTables( $db, $extraTable = array() ) {
00064         $tbls = array( 'page', 'revision', 'redirect' );
00065         if ( $extraTable ) {
00066             $tbls = array_merge( $tbls, $extraTable );
00067         }
00068         $db->lockTables( array(), $tbls, __METHOD__, false );
00069     }
00070 
00075     private function checkOrphans( $fix ) {
00076         $dbw = wfGetDB( DB_MASTER );
00077         $page = $dbw->tableName( 'page' );
00078         $revision = $dbw->tableName( 'revision' );
00079 
00080         if ( $fix ) {
00081             $this->lockTables( $dbw );
00082         }
00083 
00084         $this->output( "Checking for orphan revision table entries... (this may take a while on a large wiki)\n" );
00085         $result = $dbw->query( "
00086             SELECT *
00087             FROM $revision LEFT OUTER JOIN $page ON rev_page=page_id
00088             WHERE page_id IS NULL
00089         " );
00090         $orphans = $result->numRows();
00091         if ( $orphans > 0 ) {
00092             global $wgContLang;
00093             $this->output( "$orphans orphan revisions...\n" );
00094             $this->output( sprintf( "%10s %10s %14s %20s %s\n", 'rev_id', 'rev_page', 'rev_timestamp', 'rev_user_text', 'rev_comment' ) );
00095             foreach ( $result as $row ) {
00096                 $comment = ( $row->rev_comment == '' )
00097                     ? ''
00098                     : '(' . $wgContLang->truncate( $row->rev_comment, 40 ) . ')';
00099                 $this->output( sprintf( "%10d %10d %14s %20s %s\n",
00100                     $row->rev_id,
00101                     $row->rev_page,
00102                     $row->rev_timestamp,
00103                     $wgContLang->truncate( $row->rev_user_text, 17 ),
00104                     $comment ) );
00105                 if ( $fix ) {
00106                     $dbw->delete( 'revision', array( 'rev_id' => $row->rev_id ) );
00107                 }
00108             }
00109             if ( !$fix ) {
00110                 $this->output( "Run again with --fix to remove these entries automatically.\n" );
00111             }
00112         } else {
00113             $this->output( "No orphans! Yay!\n" );
00114         }
00115 
00116         if ( $fix ) {
00117             $dbw->unlockTables( __METHOD__ );
00118         }
00119     }
00120 
00127     private function checkWidows( $fix ) {
00128         $dbw = wfGetDB( DB_MASTER );
00129         $page = $dbw->tableName( 'page' );
00130         $revision = $dbw->tableName( 'revision' );
00131 
00132         if ( $fix ) {
00133             $this->lockTables( $dbw );
00134         }
00135 
00136         $this->output( "\nChecking for childless page table entries... (this may take a while on a large wiki)\n" );
00137         $result = $dbw->query( "
00138             SELECT *
00139             FROM $page LEFT OUTER JOIN $revision ON page_latest=rev_id
00140             WHERE rev_id IS NULL
00141         " );
00142         $widows = $result->numRows();
00143         if ( $widows > 0 ) {
00144             $this->output( "$widows childless pages...\n" );
00145             $this->output( sprintf( "%10s %11s %2s %s\n", 'page_id', 'page_latest', 'ns', 'page_title' ) );
00146             foreach ( $result as $row ) {
00147                 printf( "%10d %11d %2d %s\n",
00148                     $row->page_id,
00149                     $row->page_latest,
00150                     $row->page_namespace,
00151                     $row->page_title );
00152                 if ( $fix ) {
00153                     $dbw->delete( 'page', array( 'page_id' => $row->page_id ) );
00154                 }
00155             }
00156             if ( !$fix ) {
00157                 $this->output( "Run again with --fix to remove these entries automatically.\n" );
00158             }
00159         } else {
00160             $this->output( "No childless pages! Yay!\n" );
00161         }
00162 
00163         if ( $fix ) {
00164             $dbw->unlockTables( __METHOD__ );
00165         }
00166     }
00167 
00172     private function checkSeparation( $fix ) {
00173         $dbw = wfGetDB( DB_MASTER );
00174         $page = $dbw->tableName( 'page' );
00175         $revision = $dbw->tableName( 'revision' );
00176 
00177         if ( $fix ) {
00178             $this->lockTables( $dbw, array( 'user', 'text' ) );
00179         }
00180 
00181         $this->output( "\nChecking for pages whose page_latest links are incorrect... (this may take a while on a large wiki)\n" );
00182         $result = $dbw->query( "
00183             SELECT *
00184             FROM $page LEFT OUTER JOIN $revision ON page_latest=rev_id
00185         " );
00186         $found = 0;
00187         foreach ( $result as $row ) {
00188             $result2 = $dbw->query( "
00189                 SELECT MAX(rev_timestamp) as max_timestamp
00190                 FROM $revision
00191                 WHERE rev_page=$row->page_id
00192             " );
00193             $row2 = $dbw->fetchObject( $result2 );
00194             if ( $row2 ) {
00195                 if ( $row->rev_timestamp != $row2->max_timestamp ) {
00196                     if ( $found == 0 ) {
00197                         $this->output( sprintf( "%10s %10s %14s %14s\n",
00198                             'page_id', 'rev_id', 'timestamp', 'max timestamp' ) );
00199                     }
00200                     ++$found;
00201                     $this->output( sprintf( "%10d %10d %14s %14s\n",
00202                         $row->page_id,
00203                         $row->page_latest,
00204                         $row->rev_timestamp,
00205                         $row2->max_timestamp ) );
00206                     if ( $fix ) {
00207                         # ...
00208                         $maxId = $dbw->selectField(
00209                             'revision',
00210                             'rev_id',
00211                             array(
00212                                 'rev_page' => $row->page_id,
00213                                 'rev_timestamp' => $row2->max_timestamp ) );
00214                         $this->output( "... updating to revision $maxId\n" );
00215                         $maxRev = Revision::newFromId( $maxId );
00216                         $title = Title::makeTitle( $row->page_namespace, $row->page_title );
00217                         $article = WikiPage::factory( $title );
00218                         $article->updateRevisionOn( $dbw, $maxRev );
00219                     }
00220                 }
00221             } else {
00222                 $this->output( "wtf\n" );
00223             }
00224         }
00225 
00226         if ( $found ) {
00227             $this->output( "Found $found pages with incorrect latest revision.\n" );
00228         } else {
00229             $this->output( "No pages with incorrect latest revision. Yay!\n" );
00230         }
00231         if ( !$fix && $found > 0 ) {
00232             $this->output( "Run again with --fix to remove these entries automatically.\n" );
00233         }
00234 
00235         if ( $fix ) {
00236             $dbw->unlockTables( __METHOD__ );
00237         }
00238     }
00239 }
00240 
00241 $maintClass = "Orphans";
00242 require_once RUN_MAINTENANCE_IF_MAIN;