MediaWiki  REL1_22
cleanupImages.php
Go to the documentation of this file.
00001 <?php
00032 require_once __DIR__ . '/cleanupTable.inc';
00033 
00039 class ImageCleanup extends TableCleanup {
00040     protected $defaultParams = array(
00041         'table' => 'image',
00042         'conds' => array(),
00043         'index' => 'img_name',
00044         'callback' => 'processRow',
00045     );
00046 
00047     public function __construct() {
00048         parent::__construct();
00049         $this->mDescription = "Script to clean up broken, unparseable upload filenames";
00050     }
00051 
00052     protected function processRow( $row ) {
00053         global $wgContLang;
00054 
00055         $source = $row->img_name;
00056         if ( $source == '' ) {
00057             // Ye olde empty rows. Just kill them.
00058             $this->killRow( $source );
00059             return $this->progress( 1 );
00060         }
00061 
00062         $cleaned = $source;
00063 
00064         // About half of old bad image names have percent-codes
00065         $cleaned = rawurldecode( $cleaned );
00066 
00067         // We also have some HTML entities there
00068         $cleaned = Sanitizer::decodeCharReferences( $cleaned );
00069 
00070         // Some are old latin-1
00071         $cleaned = $wgContLang->checkTitleEncoding( $cleaned );
00072 
00073         // Many of remainder look like non-normalized unicode
00074         $cleaned = $wgContLang->normalize( $cleaned );
00075 
00076         $title = Title::makeTitleSafe( NS_FILE, $cleaned );
00077 
00078         if ( is_null( $title ) ) {
00079             $this->output( "page $source ($cleaned) is illegal.\n" );
00080             $safe = $this->buildSafeTitle( $cleaned );
00081             if ( $safe === false ) {
00082                 return $this->progress( 0 );
00083             }
00084             $this->pokeFile( $source, $safe );
00085             return $this->progress( 1 );
00086         }
00087 
00088         if ( $title->getDBkey() !== $source ) {
00089             $munged = $title->getDBkey();
00090             $this->output( "page $source ($munged) doesn't match self.\n" );
00091             $this->pokeFile( $source, $munged );
00092             return $this->progress( 1 );
00093         }
00094 
00095         return $this->progress( 0 );
00096     }
00097 
00101     private function killRow( $name ) {
00102         if ( $this->dryrun ) {
00103             $this->output( "DRY RUN: would delete bogus row '$name'\n" );
00104         } else {
00105             $this->output( "deleting bogus row '$name'\n" );
00106             $db = wfGetDB( DB_MASTER );
00107             $db->delete( 'image',
00108                 array( 'img_name' => $name ),
00109                 __METHOD__ );
00110         }
00111     }
00112 
00113     private function filePath( $name ) {
00114         if ( !isset( $this->repo ) ) {
00115             $this->repo = RepoGroup::singleton()->getLocalRepo();
00116         }
00117         return $this->repo->getRootDirectory() . '/' . $this->repo->getHashPath( $name ) . $name;
00118     }
00119 
00120     private function imageExists( $name, $db ) {
00121         return $db->selectField( 'image', '1', array( 'img_name' => $name ), __METHOD__ );
00122     }
00123 
00124     private function pageExists( $name, $db ) {
00125         return $db->selectField( 'page', '1', array( 'page_namespace' => NS_FILE, 'page_title' => $name ), __METHOD__ );
00126     }
00127 
00128     private function pokeFile( $orig, $new ) {
00129         $path = $this->filePath( $orig );
00130         if ( !file_exists( $path ) ) {
00131             $this->output( "missing file: $path\n" );
00132             $this->killRow( $orig );
00133             return;
00134         }
00135 
00136         $db = wfGetDB( DB_MASTER );
00137 
00138         /*
00139          * To prevent key collisions in the update() statements below,
00140          * if the target title exists in the image table, or if both the
00141          * original and target titles exist in the page table, append
00142          * increasing version numbers until the target title exists in
00143          * neither.  (See also bug 16916.)
00144          */
00145         $version = 0;
00146         $final = $new;
00147         $conflict = ( $this->imageExists( $final, $db ) ||
00148                 ( $this->pageExists( $orig, $db ) && $this->pageExists( $final, $db ) ) );
00149 
00150         while ( $conflict ) {
00151             $this->output( "Rename conflicts with '$final'...\n" );
00152             $version++;
00153             $final = $this->appendTitle( $new, "_$version" );
00154             $conflict = ( $this->imageExists( $final, $db ) || $this->pageExists( $final, $db ) );
00155         }
00156 
00157         $finalPath = $this->filePath( $final );
00158 
00159         if ( $this->dryrun ) {
00160             $this->output( "DRY RUN: would rename $path to $finalPath\n" );
00161         } else {
00162             $this->output( "renaming $path to $finalPath\n" );
00163             // @todo FIXME: Should this use File::move()?
00164             $db->begin( __METHOD__ );
00165             $db->update( 'image',
00166                 array( 'img_name' => $final ),
00167                 array( 'img_name' => $orig ),
00168                 __METHOD__ );
00169             $db->update( 'oldimage',
00170                 array( 'oi_name' => $final ),
00171                 array( 'oi_name' => $orig ),
00172                 __METHOD__ );
00173             $db->update( 'page',
00174                 array( 'page_title' => $final ),
00175                 array( 'page_title' => $orig, 'page_namespace' => NS_FILE ),
00176                 __METHOD__ );
00177             $dir = dirname( $finalPath );
00178             if ( !file_exists( $dir ) ) {
00179                 if ( !wfMkdirParents( $dir, null, __METHOD__ ) ) {
00180                     $this->output( "RENAME FAILED, COULD NOT CREATE $dir" );
00181                     $db->rollback( __METHOD__ );
00182                     return;
00183                 }
00184             }
00185             if ( rename( $path, $finalPath ) ) {
00186                 $db->commit( __METHOD__ );
00187             } else {
00188                 $this->error( "RENAME FAILED" );
00189                 $db->rollback( __METHOD__ );
00190             }
00191         }
00192     }
00193 
00194     private function appendTitle( $name, $suffix ) {
00195         return preg_replace( '/^(.*)(\..*?)$/',
00196             "\\1$suffix\\2", $name );
00197     }
00198 
00199     private function buildSafeTitle( $name ) {
00200         $x = preg_replace_callback(
00201             '/([^' . Title::legalChars() . ']|~)/',
00202             array( $this, 'hexChar' ),
00203             $name );
00204 
00205         $test = Title::makeTitleSafe( NS_FILE, $x );
00206         if ( is_null( $test ) || $test->getDBkey() !== $x ) {
00207             $this->error( "Unable to generate safe title from '$name', got '$x'" );
00208             return false;
00209         }
00210 
00211         return $x;
00212     }
00213 }
00214 
00215 $maintClass = "ImageCleanup";
00216 require_once RUN_MAINTENANCE_IF_MAIN;