MediaWiki  master
purgeChangedFiles.php
Go to the documentation of this file.
1 <?php
24 require_once __DIR__ . '/Maintenance.php';
25 
37  private static $typeMappings = [
38  'created' => [
39  'upload' => [ 'upload' ],
40  'import' => [ 'upload', 'interwiki' ],
41  ],
42  'deleted' => [
43  'delete' => [ 'delete', 'revision' ],
44  'suppress' => [ 'delete', 'revision' ],
45  ],
46  'modified' => [
47  'upload' => [ 'overwrite', 'revert' ],
48  'move' => [ 'move', 'move_redir' ],
49  ],
50  ];
51 
55  private $startTimestamp;
56 
60  private $endTimestamp;
61 
62  public function __construct() {
63  parent::__construct();
64  $this->addDescription( 'Scan the logging table and purge files and thumbnails.' );
65  $this->addOption( 'starttime', 'Starting timestamp', true, true );
66  $this->addOption( 'endtime', 'Ending timestamp', true, true );
67  $this->addOption( 'type', 'Comma-separated list of types of changes to send purges for (' .
68  implode( ',', array_keys( self::$typeMappings ) ) . ',all)', false, true );
69  $this->addOption( 'htcp-dest', 'HTCP announcement destination (IP:port)', false, true );
70  $this->addOption( 'dry-run', 'Do not send purge requests' );
71  $this->addOption( 'sleep-per-batch', 'Milliseconds to sleep between batches', false, true );
72  $this->addOption( 'verbose', 'Show more output', false, false, 'v' );
73  $this->setBatchSize( 100 );
74  }
75 
76  public function execute() {
78 
79  if ( $this->hasOption( 'htcp-dest' ) ) {
80  $parts = explode( ':', $this->getOption( 'htcp-dest' ) );
81  if ( count( $parts ) < 2 ) {
82  // Add default htcp port
83  $parts[] = '4827';
84  }
85 
86  // Route all HTCP messages to provided host:port
87  $wgHTCPRouting = [
88  '' => [ 'host' => $parts[0], 'port' => $parts[1] ],
89  ];
90  $this->verbose( "HTCP broadcasts to {$parts[0]}:{$parts[1]}\n" );
91  }
92 
93  // Find out which actions we should be concerned with
94  $typeOpt = $this->getOption( 'type', 'all' );
95  $validTypes = array_keys( self::$typeMappings );
96  if ( $typeOpt === 'all' ) {
97  // Convert 'all' to all registered types
98  $typeOpt = implode( ',', $validTypes );
99  }
100  $typeList = explode( ',', $typeOpt );
101  foreach ( $typeList as $type ) {
102  if ( !in_array( $type, $validTypes ) ) {
103  $this->error( "\nERROR: Unknown type: {$type}\n" );
104  $this->maybeHelp( true );
105  }
106  }
107 
108  // Validate the timestamps
109  $dbr = $this->getDB( DB_SLAVE );
110  $this->startTimestamp = $dbr->timestamp( $this->getOption( 'starttime' ) );
111  $this->endTimestamp = $dbr->timestamp( $this->getOption( 'endtime' ) );
112 
113  if ( $this->startTimestamp > $this->endTimestamp ) {
114  $this->error( "\nERROR: starttime after endtime\n" );
115  $this->maybeHelp( true );
116  }
117 
118  // Turn on verbose when dry-run is enabled
119  if ( $this->hasOption( 'dry-run' ) ) {
120  $this->mOptions['verbose'] = 1;
121  }
122 
123  $this->verbose( 'Purging files that were: ' . implode( ', ', $typeList ) . "\n" );
124  foreach ( $typeList as $type ) {
125  $this->verbose( "Checking for {$type} files...\n" );
126  $this->purgeFromLogType( $type );
127  if ( !$this->hasOption( 'dry-run' ) ) {
128  $this->verbose( "...{$type} files purged.\n\n" );
129  }
130  }
131  }
132 
138  protected function purgeFromLogType( $type ) {
139  $repo = RepoGroup::singleton()->getLocalRepo();
140  $dbr = $this->getDB( DB_SLAVE );
141 
142  foreach ( self::$typeMappings[$type] as $logType => $logActions ) {
143  $this->verbose( "Scanning for {$logType}/" . implode( ',', $logActions ) . "\n" );
144 
145  $res = $dbr->select(
146  'logging',
147  [ 'log_title', 'log_timestamp', 'log_params' ],
148  [
149  'log_namespace' => NS_FILE,
150  'log_type' => $logType,
151  'log_action' => $logActions,
152  'log_timestamp >= ' . $dbr->addQuotes( $this->startTimestamp ),
153  'log_timestamp <= ' . $dbr->addQuotes( $this->endTimestamp ),
154  ],
155  __METHOD__
156  );
157 
158  $bSize = 0;
159  foreach ( $res as $row ) {
160  $file = $repo->newFile( Title::makeTitle( NS_FILE, $row->log_title ) );
161 
162  if ( $this->hasOption( 'dry-run' ) ) {
163  $this->verbose( "{$type}[{$row->log_timestamp}]: {$row->log_title}\n" );
164  continue;
165  }
166 
167  // Purge current version and any versions in oldimage table
168  $file->purgeCache();
169 
170  if ( $logType === 'delete' ) {
171  // If there is an orphaned storage file... delete it
172  if ( !$file->exists() && $repo->fileExists( $file->getPath() ) ) {
173  $dpath = $this->getDeletedPath( $repo, $file );
174  if ( $repo->fileExists( $dpath ) ) {
175  // Sanity check to avoid data loss
176  $repo->getBackend()->delete( [ 'src' => $file->getPath() ] );
177  $this->verbose( "Deleted orphan file: {$file->getPath()}.\n" );
178  } else {
179  $this->error( "File was not deleted: {$file->getPath()}.\n" );
180  }
181  }
182 
183  // Purge items from fileachive table (rows are likely here)
184  $this->purgeFromArchiveTable( $repo, $file );
185  } elseif ( $logType === 'move' ) {
186  // Purge the target file as well
187 
188  $params = unserialize( $row->log_params );
189  if ( isset( $params['4::target'] ) ) {
190  $target = $params['4::target'];
191  $targetFile = $repo->newFile( Title::makeTitle( NS_FILE, $target ) );
192  $targetFile->purgeCache();
193  $this->verbose( "Purged file {$target}; move target @{$row->log_timestamp}.\n" );
194  }
195  }
196 
197  $this->verbose( "Purged file {$row->log_title}; {$type} @{$row->log_timestamp}.\n" );
198 
199  if ( $this->hasOption( 'sleep-per-batch' ) && ++$bSize > $this->mBatchSize ) {
200  $bSize = 0;
201  // sleep-per-batch is milliseconds, usleep wants micro seconds.
202  usleep( 1000 * (int)$this->getOption( 'sleep-per-batch' ) );
203  }
204  }
205  }
206  }
207 
208  protected function purgeFromArchiveTable( LocalRepo $repo, LocalFile $file ) {
209  $dbr = $repo->getSlaveDB();
210  $res = $dbr->select(
211  'filearchive',
212  [ 'fa_archive_name' ],
213  [ 'fa_name' => $file->getName() ],
214  __METHOD__
215  );
216 
217  foreach ( $res as $row ) {
218  if ( $row->fa_archive_name === null ) {
219  // Was not an old version (current version names checked already)
220  continue;
221  }
222  $ofile = $repo->newFromArchiveName( $file->getTitle(), $row->fa_archive_name );
223  // If there is an orphaned storage file still there...delete it
224  if ( !$file->exists() && $repo->fileExists( $ofile->getPath() ) ) {
225  $dpath = $this->getDeletedPath( $repo, $ofile );
226  if ( $repo->fileExists( $dpath ) ) {
227  // Sanity check to avoid data loss
228  $repo->getBackend()->delete( [ 'src' => $ofile->getPath() ] );
229  $this->output( "Deleted orphan file: {$ofile->getPath()}.\n" );
230  } else {
231  $this->error( "File was not deleted: {$ofile->getPath()}.\n" );
232  }
233  }
234  $file->purgeOldThumbnails( $row->fa_archive_name );
235  }
236  }
237 
238  protected function getDeletedPath( LocalRepo $repo, LocalFile $file ) {
239  $hash = $repo->getFileSha1( $file->getPath() );
240  $key = "{$hash}.{$file->getExtension()}";
241 
242  return $repo->getDeletedHashPath( $key ) . $key;
243  }
244 
250  protected function verbose( $msg ) {
251  if ( $this->hasOption( 'verbose' ) ) {
252  $this->output( $msg );
253  }
254  }
255 }
256 
257 $maintClass = "PurgeChangedFiles";
258 require_once RUN_MAINTENANCE_IF_MAIN;
static array string $startTimestamp
exists()
canRender inherited
Definition: LocalFile.php:843
newFromArchiveName($title, $archiveName)
Definition: LocalRepo.php:84
getSlaveDB()
Get a connection to the slave DB.
Definition: LocalRepo.php:459
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
Definition: maintenance.txt:39
verbose($msg)
Send an output message iff the 'verbose' option has been provided.
A repository that stores files in the local filesystem and registers them in the wiki's own database...
Definition: LocalRepo.php:31
getDB($db, $groups=[], $wiki=false)
Returns a database to be used by current maintenance script.
hasOption($name)
Checks to see if a particular param exists.
getName()
Return the name of this file.
Definition: File.php:296
static array $typeMappings
Mapping from type option to log type and actions.
require_once RUN_MAINTENANCE_IF_MAIN
Definition: maintenance.txt:50
when a variable name is used in a it is silently declared as a new local masking the global
Definition: design.txt:93
getTitle()
Return the associated title object.
Definition: File.php:325
getBackend()
Get the file backend instance.
Definition: FileRepo.php:215
getPath()
Return the storage path to the file.
Definition: File.php:416
addOption($name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
getFileSha1($virtualUrl)
Get the sha1 (base 36) of a file with a given virtual URL/storage path.
Definition: FileRepo.php:1577
purgeFromArchiveTable(LocalRepo $repo, LocalFile $file)
unserialize($serialized)
Definition: ApiMessage.php:102
$res
Definition: database.txt:21
static singleton()
Get a RepoGroup instance.
Definition: RepoGroup.php:59
$params
purgeFromLogType($type)
Purge cache and thumbnails for changes of the given type.
const DB_SLAVE
Definition: Defines.php:46
fileExists($file)
Checks existence of a a file.
Definition: FileRepo.php:1350
const NS_FILE
Definition: Defines.php:75
addDescription($text)
Set the description text.
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
Definition: distributors.txt:9
getOption($name, $default=null)
Get an option, or return the default.
Maintenance script that scans the deletion log and purges affected files within a timeframe...
output($out, $channel=null)
Throw some output to the user.
purgeOldThumbnails($archiveName)
Delete cached transformed files for an archived version only.
Definition: LocalFile.php:916
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition: injection.txt:35
maybeHelp($force=false)
Maybe show the help.
getDeletedHashPath($key)
Get a relative path for a deletion archive key, e.g.
Definition: FileRepo.php:1479
error($err, $die=0)
Throw an error to the user.
Class to represent a local file in the wiki's own database.
Definition: LocalFile.php:48
setBatchSize($s=0)
Set the batch size.
getDeletedPath(LocalRepo $repo, LocalFile $file)
$wgHTCPRouting
Routing configuration for HTCP multicast purging.
do that in ParserLimitReportFormat instead use this to modify the parameters of the image and a DIV can begin in one section and end in another Make sure your code can handle that case gracefully See the EditSectionClearerLink extension for an example zero but section is usually empty its values are the globals values before the output is cached one of or reset my talk my contributions etc etc otherwise the built in rate limiting checks are if enabled allows for interception of redirect as a string mapping parameter names to values & $type
Definition: hooks.txt:2376
static makeTitle($ns, $title, $fragment= '', $interwiki= '')
Create a new Title from a namespace index and a DB key.
Definition: Title.php:503