MediaWiki  REL1_22
purgeChangedFiles.php
Go to the documentation of this file.
00001 <?php
00025 require_once __DIR__ . '/Maintenance.php';
00026 
00033 class PurgeChangedFiles extends Maintenance {
00038     private static $typeMappings = array(
00039         'created' => array(
00040             'upload' => array( 'upload' ),
00041             'import' => array( 'upload', 'interwiki' ),
00042         ),
00043         'deleted' => array(
00044             'delete' => array( 'delete', 'revision' ),
00045             'suppress' => array( 'delete', 'revision' ),
00046         ),
00047         'modified' => array(
00048             'upload' => array( 'overwrite', 'revert' ),
00049             'move' => array( 'move', 'move_redir' ),
00050         ),
00051     );
00052 
00056     private $startTimestamp;
00057 
00061     private $endTimestamp;
00062 
00063     public function __construct() {
00064         parent::__construct();
00065         $this->mDescription = "Scan the logging table and purge files and thumbnails.";
00066         $this->addOption( 'starttime', 'Starting timestamp', true, true );
00067         $this->addOption( 'endtime', 'Ending timestamp', true, true );
00068         $this->addOption( 'type', 'Comma-separated list of types of changes to send purges for (' .
00069             implode( ',', array_keys( self::$typeMappings ) ) . ',all)', false, true );
00070         $this->addOption( 'htcp-dest', 'HTCP announcement destination (IP:port)', false, true );
00071         $this->addOption( 'dry-run', 'Do not send purge requests' );
00072         $this->addOption( 'verbose', 'Show more output', false, false, 'v' );
00073     }
00074 
00075     public function execute() {
00076         global $wgHTCPRouting;
00077 
00078         if ( $this->hasOption( 'htcp-dest' ) ) {
00079             $parts = explode( ':', $this->getOption( 'htcp-dest' ) );
00080             if ( count( $parts ) < 2 ) {
00081                 // Add default htcp port
00082                 $parts[] = '4827';
00083             }
00084 
00085             // Route all HTCP messages to provided host:port
00086             $wgHTCPRouting = array(
00087                 '' => array( 'host' => $parts[0], 'port' => $parts[1] ),
00088             );
00089             $this->verbose( "HTCP broadcasts to {$parts[0]}:{$parts[1]}\n" );
00090         }
00091 
00092         // Find out which actions we should be concerned with
00093         $typeOpt = $this->getOption( 'type', 'all' );
00094         $validTypes = array_keys( self::$typeMappings );
00095         if ( $typeOpt === 'all' ) {
00096             // Convert 'all' to all registered types
00097             $typeOpt = implode( ',', $validTypes );
00098         }
00099         $typeList = explode( ',', $typeOpt );
00100         foreach ( $typeList as $type ) {
00101             if ( !in_array( $type, $validTypes ) ) {
00102                 $this->error( "\nERROR: Unknown type: {$type}\n" );
00103                 $this->maybeHelp( true );
00104             }
00105         }
00106 
00107         // Validate the timestamps
00108         $dbr = $this->getDB( DB_SLAVE );
00109         $this->startTimestamp = $dbr->timestamp( $this->getOption( 'starttime' ) );
00110         $this->endTimestamp = $dbr->timestamp( $this->getOption( 'endtime' ) );
00111 
00112         if ( $this->startTimestamp > $this->endTimestamp ) {
00113             $this->error( "\nERROR: starttime after endtime\n" );
00114             $this->maybeHelp( true );
00115         }
00116 
00117         // Turn on verbose when dry-run is enabled
00118         if ( $this->hasOption( 'dry-run' ) ) {
00119             $this->mOptions['verbose'] = 1;
00120         }
00121 
00122         $this->verbose( 'Purging files that were: ' . implode( ', ', $typeList ) . "\n");
00123         foreach ( $typeList as $type ) {
00124             $this->verbose( "Checking for {$type} files...\n" );
00125             $this->purgeFromLogType( $type );
00126             if ( !$this->hasOption( 'dry-run' ) ) {
00127                 $this->verbose( "...{$type} files purged.\n\n" );
00128             }
00129         }
00130     }
00131 
00137     protected function purgeFromLogType( $type ) {
00138         $repo = RepoGroup::singleton()->getLocalRepo();
00139         $dbr = $this->getDB( DB_SLAVE );
00140 
00141         foreach ( self::$typeMappings[$type] as $logType => $logActions ) {
00142             $this->verbose( "Scanning for {$logType}/" . implode( ',', $logActions ) . "\n" );
00143 
00144             $res = $dbr->select(
00145                 'logging',
00146                 array( 'log_title', 'log_timestamp', 'log_params' ),
00147                 array(
00148                     'log_namespace' => NS_FILE,
00149                     'log_type' => $logType,
00150                     'log_action' => $logActions,
00151                     'log_timestamp >= ' . $dbr->addQuotes( $this->startTimestamp ),
00152                     'log_timestamp <= ' . $dbr->addQuotes( $this->endTimestamp ),
00153                 ),
00154                 __METHOD__
00155             );
00156 
00157             foreach ( $res as $row ) {
00158                 $file = $repo->newFile( Title::makeTitle( NS_FILE, $row->log_title ) );
00159 
00160                 if ( $this->hasOption( 'dry-run' ) ) {
00161                     $this->verbose( "{$type}[{$row->log_timestamp}]: {$row->log_title}\n" );
00162                     continue;
00163                 }
00164 
00165                 // Purge current version and any versions in oldimage table
00166                 $file->purgeCache();
00167                 $file->purgeHistory();
00168 
00169                 if ( $logType === 'delete' ) {
00170                     // If there is an orphaned storage file... delete it
00171                     if ( !$file->exists() && $repo->fileExists( $file->getPath() ) ) {
00172                         $dpath = $this->getDeletedPath( $repo, $file );
00173                         if ( $repo->fileExists( $dpath ) ) {
00174                             // Sanity check to avoid data loss
00175                             $repo->getBackend()->delete( array( 'src' => $file->getPath() ) );
00176                             $this->verbose( "Deleted orphan file: {$file->getPath()}.\n" );
00177 
00178                         } else {
00179                             $this->error( "File was not deleted: {$file->getPath()}.\n" );
00180                         }
00181                     }
00182 
00183                     // Purge items from fileachive table (rows are likely here)
00184                     $this->purgeFromArchiveTable( $repo, $file );
00185 
00186                 } else if ( $logType === 'move' ) {
00187                     // Purge the target file as well
00188 
00189                     $params = unserialize( $row->log_params );
00190                     if ( isset( $params['4::target'] ) ) {
00191                         $target = $params['4::target'];
00192                         $targetFile = $repo->newFile( Title::makeTitle( NS_FILE, $target ) );
00193                         $targetFile->purgeCache();
00194                         $targetFile->purgeHistory();
00195                         $this->verbose( "Purged file {$target}; move target @{$row->log_timestamp}.\n" );
00196                     }
00197                 }
00198 
00199                 $this->verbose( "Purged file {$row->log_title}; {$type} @{$row->log_timestamp}.\n" );
00200             }
00201         }
00202     }
00203 
00204     protected function purgeFromArchiveTable( LocalRepo $repo, LocalFile $file ) {
00205         $dbr = $repo->getSlaveDB();
00206         $res = $dbr->select(
00207             'filearchive',
00208             array( 'fa_archive_name' ),
00209             array( 'fa_name' => $file->getName() ),
00210             __METHOD__
00211         );
00212 
00213         foreach ( $res as $row ) {
00214             if ( $row->fa_archive_name === null ) {
00215                 // Was not an old version (current version names checked already)
00216                 continue;
00217             }
00218             $ofile = $repo->newFromArchiveName( $file->getTitle(), $row->fa_archive_name );
00219             // If there is an orphaned storage file still there...delete it
00220             if ( !$file->exists() && $repo->fileExists( $ofile->getPath() ) ) {
00221                 $dpath = $this->getDeletedPath( $repo, $ofile );
00222                 if ( $repo->fileExists( $dpath ) ) {
00223                     // Sanity check to avoid data loss
00224                     $repo->getBackend()->delete( array( 'src' => $ofile->getPath() ) );
00225                     $this->output( "Deleted orphan file: {$ofile->getPath()}.\n" );
00226 
00227                 } else {
00228                     $this->error( "File was not deleted: {$ofile->getPath()}.\n" );
00229                 }
00230             }
00231             $file->purgeOldThumbnails( $row->fa_archive_name );
00232         }
00233     }
00234 
00235     protected function getDeletedPath( LocalRepo $repo, LocalFile $file ) {
00236         $hash = $repo->getFileSha1( $file->getPath() );
00237         $key = "{$hash}.{$file->getExtension()}";
00238         return $repo->getDeletedHashPath( $key ) . $key;
00239     }
00240 
00246     protected function verbose( $msg ) {
00247         if ( $this->hasOption( 'verbose' ) ) {
00248             $this->output( $msg );
00249         }
00250     }
00251 
00252 }
00253 
00254 $maintClass = "PurgeChangedFiles";
00255 require_once RUN_MAINTENANCE_IF_MAIN;