MediaWiki  REL1_24
purgeChangedFiles.php
Go to the documentation of this file.
00001 <?php
00025 require_once __DIR__ . '/Maintenance.php';
00026 
00033 class PurgeChangedFiles extends Maintenance {
00038     private static $typeMappings = array(
00039         'created' => array(
00040             'upload' => array( 'upload' ),
00041             'import' => array( 'upload', 'interwiki' ),
00042         ),
00043         'deleted' => array(
00044             'delete' => array( 'delete', 'revision' ),
00045             'suppress' => array( 'delete', 'revision' ),
00046         ),
00047         'modified' => array(
00048             'upload' => array( 'overwrite', 'revert' ),
00049             'move' => array( 'move', 'move_redir' ),
00050         ),
00051     );
00052 
00056     private $startTimestamp;
00057 
00061     private $endTimestamp;
00062 
00063     public function __construct() {
00064         parent::__construct();
00065         $this->mDescription = "Scan the logging table and purge files and thumbnails.";
00066         $this->addOption( 'starttime', 'Starting timestamp', true, true );
00067         $this->addOption( 'endtime', 'Ending timestamp', true, true );
00068         $this->addOption( 'type', 'Comma-separated list of types of changes to send purges for (' .
00069             implode( ',', array_keys( self::$typeMappings ) ) . ',all)', false, true );
00070         $this->addOption( 'htcp-dest', 'HTCP announcement destination (IP:port)', false, true );
00071         $this->addOption( 'dry-run', 'Do not send purge requests' );
00072         $this->addOption( 'sleep-per-batch', 'Milliseconds to sleep between batches', false, true );
00073         $this->addOption( 'verbose', 'Show more output', false, false, 'v' );
00074         $this->setBatchSize( 100 );
00075     }
00076 
00077     public function execute() {
00078         global $wgHTCPRouting;
00079 
00080         if ( $this->hasOption( 'htcp-dest' ) ) {
00081             $parts = explode( ':', $this->getOption( 'htcp-dest' ) );
00082             if ( count( $parts ) < 2 ) {
00083                 // Add default htcp port
00084                 $parts[] = '4827';
00085             }
00086 
00087             // Route all HTCP messages to provided host:port
00088             $wgHTCPRouting = array(
00089                 '' => array( 'host' => $parts[0], 'port' => $parts[1] ),
00090             );
00091             $this->verbose( "HTCP broadcasts to {$parts[0]}:{$parts[1]}\n" );
00092         }
00093 
00094         // Find out which actions we should be concerned with
00095         $typeOpt = $this->getOption( 'type', 'all' );
00096         $validTypes = array_keys( self::$typeMappings );
00097         if ( $typeOpt === 'all' ) {
00098             // Convert 'all' to all registered types
00099             $typeOpt = implode( ',', $validTypes );
00100         }
00101         $typeList = explode( ',', $typeOpt );
00102         foreach ( $typeList as $type ) {
00103             if ( !in_array( $type, $validTypes ) ) {
00104                 $this->error( "\nERROR: Unknown type: {$type}\n" );
00105                 $this->maybeHelp( true );
00106             }
00107         }
00108 
00109         // Validate the timestamps
00110         $dbr = $this->getDB( DB_SLAVE );
00111         $this->startTimestamp = $dbr->timestamp( $this->getOption( 'starttime' ) );
00112         $this->endTimestamp = $dbr->timestamp( $this->getOption( 'endtime' ) );
00113 
00114         if ( $this->startTimestamp > $this->endTimestamp ) {
00115             $this->error( "\nERROR: starttime after endtime\n" );
00116             $this->maybeHelp( true );
00117         }
00118 
00119         // Turn on verbose when dry-run is enabled
00120         if ( $this->hasOption( 'dry-run' ) ) {
00121             $this->mOptions['verbose'] = 1;
00122         }
00123 
00124         $this->verbose( 'Purging files that were: ' . implode( ', ', $typeList ) . "\n" );
00125         foreach ( $typeList as $type ) {
00126             $this->verbose( "Checking for {$type} files...\n" );
00127             $this->purgeFromLogType( $type );
00128             if ( !$this->hasOption( 'dry-run' ) ) {
00129                 $this->verbose( "...{$type} files purged.\n\n" );
00130             }
00131         }
00132     }
00133 
00139     protected function purgeFromLogType( $type ) {
00140         $repo = RepoGroup::singleton()->getLocalRepo();
00141         $dbr = $this->getDB( DB_SLAVE );
00142 
00143         foreach ( self::$typeMappings[$type] as $logType => $logActions ) {
00144             $this->verbose( "Scanning for {$logType}/" . implode( ',', $logActions ) . "\n" );
00145 
00146             $res = $dbr->select(
00147                 'logging',
00148                 array( 'log_title', 'log_timestamp', 'log_params' ),
00149                 array(
00150                     'log_namespace' => NS_FILE,
00151                     'log_type' => $logType,
00152                     'log_action' => $logActions,
00153                     'log_timestamp >= ' . $dbr->addQuotes( $this->startTimestamp ),
00154                     'log_timestamp <= ' . $dbr->addQuotes( $this->endTimestamp ),
00155                 ),
00156                 __METHOD__
00157             );
00158 
00159             $bSize = 0;
00160             foreach ( $res as $row ) {
00161                 $file = $repo->newFile( Title::makeTitle( NS_FILE, $row->log_title ) );
00162 
00163                 if ( $this->hasOption( 'dry-run' ) ) {
00164                     $this->verbose( "{$type}[{$row->log_timestamp}]: {$row->log_title}\n" );
00165                     continue;
00166                 }
00167 
00168                 // Purge current version and any versions in oldimage table
00169                 $file->purgeCache();
00170                 $file->purgeHistory();
00171 
00172                 if ( $logType === 'delete' ) {
00173                     // If there is an orphaned storage file... delete it
00174                     if ( !$file->exists() && $repo->fileExists( $file->getPath() ) ) {
00175                         $dpath = $this->getDeletedPath( $repo, $file );
00176                         if ( $repo->fileExists( $dpath ) ) {
00177                             // Sanity check to avoid data loss
00178                             $repo->getBackend()->delete( array( 'src' => $file->getPath() ) );
00179                             $this->verbose( "Deleted orphan file: {$file->getPath()}.\n" );
00180                         } else {
00181                             $this->error( "File was not deleted: {$file->getPath()}.\n" );
00182                         }
00183                     }
00184 
00185                     // Purge items from fileachive table (rows are likely here)
00186                     $this->purgeFromArchiveTable( $repo, $file );
00187                 } elseif ( $logType === 'move' ) {
00188                     // Purge the target file as well
00189 
00190                     $params = unserialize( $row->log_params );
00191                     if ( isset( $params['4::target'] ) ) {
00192                         $target = $params['4::target'];
00193                         $targetFile = $repo->newFile( Title::makeTitle( NS_FILE, $target ) );
00194                         $targetFile->purgeCache();
00195                         $targetFile->purgeHistory();
00196                         $this->verbose( "Purged file {$target}; move target @{$row->log_timestamp}.\n" );
00197                     }
00198                 }
00199 
00200                 $this->verbose( "Purged file {$row->log_title}; {$type} @{$row->log_timestamp}.\n" );
00201 
00202                 if ( $this->hasOption( 'sleep-per-batch' ) && ++$bSize > $this->mBatchSize ) {
00203                     $bSize = 0;
00204                     // sleep-per-batch is milliseconds, usleep wants micro seconds.
00205                     usleep( 1000 * (int)$this->getOption( 'sleep-per-batch' ) );
00206                 }
00207             }
00208         }
00209     }
00210 
00211     protected function purgeFromArchiveTable( LocalRepo $repo, LocalFile $file ) {
00212         $dbr = $repo->getSlaveDB();
00213         $res = $dbr->select(
00214             'filearchive',
00215             array( 'fa_archive_name' ),
00216             array( 'fa_name' => $file->getName() ),
00217             __METHOD__
00218         );
00219 
00220         foreach ( $res as $row ) {
00221             if ( $row->fa_archive_name === null ) {
00222                 // Was not an old version (current version names checked already)
00223                 continue;
00224             }
00225             $ofile = $repo->newFromArchiveName( $file->getTitle(), $row->fa_archive_name );
00226             // If there is an orphaned storage file still there...delete it
00227             if ( !$file->exists() && $repo->fileExists( $ofile->getPath() ) ) {
00228                 $dpath = $this->getDeletedPath( $repo, $ofile );
00229                 if ( $repo->fileExists( $dpath ) ) {
00230                     // Sanity check to avoid data loss
00231                     $repo->getBackend()->delete( array( 'src' => $ofile->getPath() ) );
00232                     $this->output( "Deleted orphan file: {$ofile->getPath()}.\n" );
00233                 } else {
00234                     $this->error( "File was not deleted: {$ofile->getPath()}.\n" );
00235                 }
00236             }
00237             $file->purgeOldThumbnails( $row->fa_archive_name );
00238         }
00239     }
00240 
00241     protected function getDeletedPath( LocalRepo $repo, LocalFile $file ) {
00242         $hash = $repo->getFileSha1( $file->getPath() );
00243         $key = "{$hash}.{$file->getExtension()}";
00244 
00245         return $repo->getDeletedHashPath( $key ) . $key;
00246     }
00247 
00253     protected function verbose( $msg ) {
00254         if ( $this->hasOption( 'verbose' ) ) {
00255             $this->output( $msg );
00256         }
00257     }
00258 }
00259 
00260 $maintClass = "PurgeChangedFiles";
00261 require_once RUN_MAINTENANCE_IF_MAIN;