MediaWiki  REL1_24
orphans.php
Go to the documentation of this file.
00001 <?php
00031 require_once __DIR__ . '/Maintenance.php';
00032 
00039 class Orphans extends Maintenance {
00040     public function __construct() {
00041         parent::__construct();
00042         $this->mDescription = "Look for 'orphan' revisions hooked to pages which don't exist\n" .
00043             "and 'childless' pages with no revisions\n" .
00044             "Then, kill the poor widows and orphans\n" .
00045             "Man this is depressing";
00046         $this->addOption( 'fix', 'Actually fix broken entries' );
00047     }
00048 
00049     public function execute() {
00050         $this->checkOrphans( $this->hasOption( 'fix' ) );
00051         $this->checkSeparation( $this->hasOption( 'fix' ) );
00052         # Does not work yet, do not use
00053         # $this->checkWidows( $this->hasOption( 'fix' ) );
00054     }
00055 
00061     private function lockTables( $db, $extraTable = array() ) {
00062         $tbls = array( 'page', 'revision', 'redirect' );
00063         if ( $extraTable ) {
00064             $tbls = array_merge( $tbls, $extraTable );
00065         }
00066         $db->lockTables( array(), $tbls, __METHOD__, false );
00067     }
00068 
00073     private function checkOrphans( $fix ) {
00074         $dbw = wfGetDB( DB_MASTER );
00075         $page = $dbw->tableName( 'page' );
00076         $revision = $dbw->tableName( 'revision' );
00077 
00078         if ( $fix ) {
00079             $this->lockTables( $dbw );
00080         }
00081 
00082         $this->output( "Checking for orphan revision table entries... "
00083             . "(this may take a while on a large wiki)\n" );
00084         $result = $dbw->query( "
00085             SELECT *
00086             FROM $revision LEFT OUTER JOIN $page ON rev_page=page_id
00087             WHERE page_id IS NULL
00088         " );
00089         $orphans = $result->numRows();
00090         if ( $orphans > 0 ) {
00091             global $wgContLang;
00092 
00093             $this->output( "$orphans orphan revisions...\n" );
00094             $this->output( sprintf(
00095                 "%10s %10s %14s %20s %s\n",
00096                 'rev_id', 'rev_page', 'rev_timestamp', 'rev_user_text', 'rev_comment'
00097             ) );
00098 
00099             foreach ( $result as $row ) {
00100                 $comment = ( $row->rev_comment == '' )
00101                     ? ''
00102                     : '(' . $wgContLang->truncate( $row->rev_comment, 40 ) . ')';
00103                 $this->output( sprintf( "%10d %10d %14s %20s %s\n",
00104                     $row->rev_id,
00105                     $row->rev_page,
00106                     $row->rev_timestamp,
00107                     $wgContLang->truncate( $row->rev_user_text, 17 ),
00108                     $comment ) );
00109                 if ( $fix ) {
00110                     $dbw->delete( 'revision', array( 'rev_id' => $row->rev_id ) );
00111                 }
00112             }
00113             if ( !$fix ) {
00114                 $this->output( "Run again with --fix to remove these entries automatically.\n" );
00115             }
00116         } else {
00117             $this->output( "No orphans! Yay!\n" );
00118         }
00119 
00120         if ( $fix ) {
00121             $dbw->unlockTables( __METHOD__ );
00122         }
00123     }
00124 
00131     private function checkWidows( $fix ) {
00132         $dbw = wfGetDB( DB_MASTER );
00133         $page = $dbw->tableName( 'page' );
00134         $revision = $dbw->tableName( 'revision' );
00135 
00136         if ( $fix ) {
00137             $this->lockTables( $dbw );
00138         }
00139 
00140         $this->output( "\nChecking for childless page table entries... "
00141             . "(this may take a while on a large wiki)\n" );
00142         $result = $dbw->query( "
00143             SELECT *
00144             FROM $page LEFT OUTER JOIN $revision ON page_latest=rev_id
00145             WHERE rev_id IS NULL
00146         " );
00147         $widows = $result->numRows();
00148         if ( $widows > 0 ) {
00149             $this->output( "$widows childless pages...\n" );
00150             $this->output( sprintf( "%10s %11s %2s %s\n", 'page_id', 'page_latest', 'ns', 'page_title' ) );
00151             foreach ( $result as $row ) {
00152                 printf( "%10d %11d %2d %s\n",
00153                     $row->page_id,
00154                     $row->page_latest,
00155                     $row->page_namespace,
00156                     $row->page_title );
00157                 if ( $fix ) {
00158                     $dbw->delete( 'page', array( 'page_id' => $row->page_id ) );
00159                 }
00160             }
00161             if ( !$fix ) {
00162                 $this->output( "Run again with --fix to remove these entries automatically.\n" );
00163             }
00164         } else {
00165             $this->output( "No childless pages! Yay!\n" );
00166         }
00167 
00168         if ( $fix ) {
00169             $dbw->unlockTables( __METHOD__ );
00170         }
00171     }
00172 
00177     private function checkSeparation( $fix ) {
00178         $dbw = wfGetDB( DB_MASTER );
00179         $page = $dbw->tableName( 'page' );
00180         $revision = $dbw->tableName( 'revision' );
00181 
00182         if ( $fix ) {
00183             $this->lockTables( $dbw, array( 'user', 'text' ) );
00184         }
00185 
00186         $this->output( "\nChecking for pages whose page_latest links are incorrect... "
00187             . "(this may take a while on a large wiki)\n" );
00188         $result = $dbw->query( "
00189             SELECT *
00190             FROM $page LEFT OUTER JOIN $revision ON page_latest=rev_id
00191         " );
00192         $found = 0;
00193         foreach ( $result as $row ) {
00194             $result2 = $dbw->query( "
00195                 SELECT MAX(rev_timestamp) as max_timestamp
00196                 FROM $revision
00197                 WHERE rev_page=$row->page_id
00198             " );
00199             $row2 = $dbw->fetchObject( $result2 );
00200             if ( $row2 ) {
00201                 if ( $row->rev_timestamp != $row2->max_timestamp ) {
00202                     if ( $found == 0 ) {
00203                         $this->output( sprintf( "%10s %10s %14s %14s\n",
00204                             'page_id', 'rev_id', 'timestamp', 'max timestamp' ) );
00205                     }
00206                     ++$found;
00207                     $this->output( sprintf( "%10d %10d %14s %14s\n",
00208                         $row->page_id,
00209                         $row->page_latest,
00210                         $row->rev_timestamp,
00211                         $row2->max_timestamp ) );
00212                     if ( $fix ) {
00213                         # ...
00214                         $maxId = $dbw->selectField(
00215                             'revision',
00216                             'rev_id',
00217                             array(
00218                                 'rev_page' => $row->page_id,
00219                                 'rev_timestamp' => $row2->max_timestamp ) );
00220                         $this->output( "... updating to revision $maxId\n" );
00221                         $maxRev = Revision::newFromId( $maxId );
00222                         $title = Title::makeTitle( $row->page_namespace, $row->page_title );
00223                         $article = WikiPage::factory( $title );
00224                         $article->updateRevisionOn( $dbw, $maxRev );
00225                     }
00226                 }
00227             } else {
00228                 $this->output( "wtf\n" );
00229             }
00230         }
00231 
00232         if ( $found ) {
00233             $this->output( "Found $found pages with incorrect latest revision.\n" );
00234         } else {
00235             $this->output( "No pages with incorrect latest revision. Yay!\n" );
00236         }
00237         if ( !$fix && $found > 0 ) {
00238             $this->output( "Run again with --fix to remove these entries automatically.\n" );
00239         }
00240 
00241         if ( $fix ) {
00242             $dbw->unlockTables( __METHOD__ );
00243         }
00244     }
00245 }
00246 
00247 $maintClass = "Orphans";
00248 require_once RUN_MAINTENANCE_IF_MAIN;