MediaWiki  REL1_24
cleanupImages.php
Go to the documentation of this file.
00001 <?php
00032 require_once __DIR__ . '/cleanupTable.inc';
00033 
00039 class ImageCleanup extends TableCleanup {
00040     protected $defaultParams = array(
00041         'table' => 'image',
00042         'conds' => array(),
00043         'index' => 'img_name',
00044         'callback' => 'processRow',
00045     );
00046 
00047     public function __construct() {
00048         parent::__construct();
00049         $this->mDescription = "Script to clean up broken, unparseable upload filenames";
00050     }
00051 
00052     protected function processRow( $row ) {
00053         global $wgContLang;
00054 
00055         $source = $row->img_name;
00056         if ( $source == '' ) {
00057             // Ye olde empty rows. Just kill them.
00058             $this->killRow( $source );
00059 
00060             return $this->progress( 1 );
00061         }
00062 
00063         $cleaned = $source;
00064 
00065         // About half of old bad image names have percent-codes
00066         $cleaned = rawurldecode( $cleaned );
00067 
00068         // We also have some HTML entities there
00069         $cleaned = Sanitizer::decodeCharReferences( $cleaned );
00070 
00071         // Some are old latin-1
00072         $cleaned = $wgContLang->checkTitleEncoding( $cleaned );
00073 
00074         // Many of remainder look like non-normalized unicode
00075         $cleaned = $wgContLang->normalize( $cleaned );
00076 
00077         $title = Title::makeTitleSafe( NS_FILE, $cleaned );
00078 
00079         if ( is_null( $title ) ) {
00080             $this->output( "page $source ($cleaned) is illegal.\n" );
00081             $safe = $this->buildSafeTitle( $cleaned );
00082             if ( $safe === false ) {
00083                 return $this->progress( 0 );
00084             }
00085             $this->pokeFile( $source, $safe );
00086 
00087             return $this->progress( 1 );
00088         }
00089 
00090         if ( $title->getDBkey() !== $source ) {
00091             $munged = $title->getDBkey();
00092             $this->output( "page $source ($munged) doesn't match self.\n" );
00093             $this->pokeFile( $source, $munged );
00094 
00095             return $this->progress( 1 );
00096         }
00097 
00098         return $this->progress( 0 );
00099     }
00100 
00104     private function killRow( $name ) {
00105         if ( $this->dryrun ) {
00106             $this->output( "DRY RUN: would delete bogus row '$name'\n" );
00107         } else {
00108             $this->output( "deleting bogus row '$name'\n" );
00109             $db = wfGetDB( DB_MASTER );
00110             $db->delete( 'image',
00111                 array( 'img_name' => $name ),
00112                 __METHOD__ );
00113         }
00114     }
00115 
00116     private function filePath( $name ) {
00117         if ( !isset( $this->repo ) ) {
00118             $this->repo = RepoGroup::singleton()->getLocalRepo();
00119         }
00120 
00121         return $this->repo->getRootDirectory() . '/' . $this->repo->getHashPath( $name ) . $name;
00122     }
00123 
00124     private function imageExists( $name, $db ) {
00125         return $db->selectField( 'image', '1', array( 'img_name' => $name ), __METHOD__ );
00126     }
00127 
00128     private function pageExists( $name, $db ) {
00129         return $db->selectField(
00130             'page',
00131             '1',
00132             array( 'page_namespace' => NS_FILE, 'page_title' => $name ),
00133             __METHOD__
00134         );
00135     }
00136 
00137     private function pokeFile( $orig, $new ) {
00138         $path = $this->filePath( $orig );
00139         if ( !file_exists( $path ) ) {
00140             $this->output( "missing file: $path\n" );
00141             $this->killRow( $orig );
00142 
00143             return;
00144         }
00145 
00146         $db = wfGetDB( DB_MASTER );
00147 
00148         /*
00149          * To prevent key collisions in the update() statements below,
00150          * if the target title exists in the image table, or if both the
00151          * original and target titles exist in the page table, append
00152          * increasing version numbers until the target title exists in
00153          * neither.  (See also bug 16916.)
00154          */
00155         $version = 0;
00156         $final = $new;
00157         $conflict = ( $this->imageExists( $final, $db ) ||
00158             ( $this->pageExists( $orig, $db ) && $this->pageExists( $final, $db ) ) );
00159 
00160         while ( $conflict ) {
00161             $this->output( "Rename conflicts with '$final'...\n" );
00162             $version++;
00163             $final = $this->appendTitle( $new, "_$version" );
00164             $conflict = ( $this->imageExists( $final, $db ) || $this->pageExists( $final, $db ) );
00165         }
00166 
00167         $finalPath = $this->filePath( $final );
00168 
00169         if ( $this->dryrun ) {
00170             $this->output( "DRY RUN: would rename $path to $finalPath\n" );
00171         } else {
00172             $this->output( "renaming $path to $finalPath\n" );
00173             // @todo FIXME: Should this use File::move()?
00174             $db->begin( __METHOD__ );
00175             $db->update( 'image',
00176                 array( 'img_name' => $final ),
00177                 array( 'img_name' => $orig ),
00178                 __METHOD__ );
00179             $db->update( 'oldimage',
00180                 array( 'oi_name' => $final ),
00181                 array( 'oi_name' => $orig ),
00182                 __METHOD__ );
00183             $db->update( 'page',
00184                 array( 'page_title' => $final ),
00185                 array( 'page_title' => $orig, 'page_namespace' => NS_FILE ),
00186                 __METHOD__ );
00187             $dir = dirname( $finalPath );
00188             if ( !file_exists( $dir ) ) {
00189                 if ( !wfMkdirParents( $dir, null, __METHOD__ ) ) {
00190                     $this->output( "RENAME FAILED, COULD NOT CREATE $dir" );
00191                     $db->rollback( __METHOD__ );
00192 
00193                     return;
00194                 }
00195             }
00196             if ( rename( $path, $finalPath ) ) {
00197                 $db->commit( __METHOD__ );
00198             } else {
00199                 $this->error( "RENAME FAILED" );
00200                 $db->rollback( __METHOD__ );
00201             }
00202         }
00203     }
00204 
00205     private function appendTitle( $name, $suffix ) {
00206         return preg_replace( '/^(.*)(\..*?)$/',
00207             "\\1$suffix\\2", $name );
00208     }
00209 
00210     private function buildSafeTitle( $name ) {
00211         $x = preg_replace_callback(
00212             '/([^' . Title::legalChars() . ']|~)/',
00213             array( $this, 'hexChar' ),
00214             $name );
00215 
00216         $test = Title::makeTitleSafe( NS_FILE, $x );
00217         if ( is_null( $test ) || $test->getDBkey() !== $x ) {
00218             $this->error( "Unable to generate safe title from '$name', got '$x'" );
00219 
00220             return false;
00221         }
00222 
00223         return $x;
00224     }
00225 }
00226 
00227 $maintClass = "ImageCleanup";
00228 require_once RUN_MAINTENANCE_IF_MAIN;