MediaWiki
REL1_24
|
00001 <?php 00032 class ApiQueryDuplicateFiles extends ApiQueryGeneratorBase { 00033 00034 public function __construct( ApiQuery $query, $moduleName ) { 00035 parent::__construct( $query, $moduleName, 'df' ); 00036 } 00037 00038 public function execute() { 00039 $this->run(); 00040 } 00041 00042 public function getCacheMode( $params ) { 00043 return 'public'; 00044 } 00045 00046 public function executeGenerator( $resultPageSet ) { 00047 $this->run( $resultPageSet ); 00048 } 00049 00053 private function run( $resultPageSet = null ) { 00054 $params = $this->extractRequestParams(); 00055 $namespaces = $this->getPageSet()->getAllTitlesByNamespace(); 00056 if ( empty( $namespaces[NS_FILE] ) ) { 00057 return; 00058 } 00059 $images = $namespaces[NS_FILE]; 00060 00061 if ( $params['dir'] == 'descending' ) { 00062 $images = array_reverse( $images ); 00063 } 00064 00065 $skipUntilThisDup = false; 00066 if ( isset( $params['continue'] ) ) { 00067 $cont = explode( '|', $params['continue'] ); 00068 $this->dieContinueUsageIf( count( $cont ) != 2 ); 00069 $fromImage = $cont[0]; 00070 $skipUntilThisDup = $cont[1]; 00071 // Filter out any images before $fromImage 00072 foreach ( $images as $image => $pageId ) { 00073 if ( $image < $fromImage ) { 00074 unset( $images[$image] ); 00075 } else { 00076 break; 00077 } 00078 } 00079 } 00080 00081 $filesToFind = array_keys( $images ); 00082 if ( $params['localonly'] ) { 00083 $files = RepoGroup::singleton()->getLocalRepo()->findFiles( $filesToFind ); 00084 } else { 00085 $files = RepoGroup::singleton()->findFiles( $filesToFind ); 00086 } 00087 00088 $fit = true; 00089 $count = 0; 00090 $titles = array(); 00091 00092 $sha1s = array(); 00093 foreach ( $files as $file ) { 00095 $sha1s[$file->getName()] = $file->getSha1(); 00096 } 00097 00098 // find all files with the hashes, result format is: 00099 // array( hash => array( dup1, dup2 ), hash1 => ... ) 00100 $filesToFindBySha1s = array_unique( array_values( $sha1s ) ); 00101 if ( $params['localonly'] ) { 00102 $filesBySha1s = RepoGroup::singleton()->getLocalRepo()->findBySha1s( $filesToFindBySha1s ); 00103 } else { 00104 $filesBySha1s = RepoGroup::singleton()->findBySha1s( $filesToFindBySha1s ); 00105 } 00106 00107 // iterate over $images to handle continue param correct 00108 foreach ( $images as $image => $pageId ) { 00109 if ( !isset( $sha1s[$image] ) ) { 00110 continue; //file does not exist 00111 } 00112 $sha1 = $sha1s[$image]; 00113 $dupFiles = $filesBySha1s[$sha1]; 00114 if ( $params['dir'] == 'descending' ) { 00115 $dupFiles = array_reverse( $dupFiles ); 00116 } 00118 foreach ( $dupFiles as $dupFile ) { 00119 $dupName = $dupFile->getName(); 00120 if ( $image == $dupName && $dupFile->isLocal() ) { 00121 continue; //ignore the local file itself 00122 } 00123 if ( $skipUntilThisDup !== false && $dupName < $skipUntilThisDup ) { 00124 continue; //skip to pos after the image from continue param 00125 } 00126 $skipUntilThisDup = false; 00127 if ( ++$count > $params['limit'] ) { 00128 $fit = false; //break outer loop 00129 // We're one over limit which shows that 00130 // there are additional images to be had. Stop here... 00131 $this->setContinueEnumParameter( 'continue', $image . '|' . $dupName ); 00132 break; 00133 } 00134 if ( !is_null( $resultPageSet ) ) { 00135 $titles[] = $dupFile->getTitle(); 00136 } else { 00137 $r = array( 00138 'name' => $dupName, 00139 'user' => $dupFile->getUser( 'text' ), 00140 'timestamp' => wfTimestamp( TS_ISO_8601, $dupFile->getTimestamp() ) 00141 ); 00142 if ( !$dupFile->isLocal() ) { 00143 $r['shared'] = ''; 00144 } 00145 $fit = $this->addPageSubItem( $pageId, $r ); 00146 if ( !$fit ) { 00147 $this->setContinueEnumParameter( 'continue', $image . '|' . $dupName ); 00148 break; 00149 } 00150 } 00151 } 00152 if ( !$fit ) { 00153 break; 00154 } 00155 } 00156 if ( !is_null( $resultPageSet ) ) { 00157 $resultPageSet->populateFromTitles( $titles ); 00158 } 00159 } 00160 00161 public function getAllowedParams() { 00162 return array( 00163 'limit' => array( 00164 ApiBase::PARAM_DFLT => 10, 00165 ApiBase::PARAM_TYPE => 'limit', 00166 ApiBase::PARAM_MIN => 1, 00167 ApiBase::PARAM_MAX => ApiBase::LIMIT_BIG1, 00168 ApiBase::PARAM_MAX2 => ApiBase::LIMIT_BIG2 00169 ), 00170 'continue' => null, 00171 'dir' => array( 00172 ApiBase::PARAM_DFLT => 'ascending', 00173 ApiBase::PARAM_TYPE => array( 00174 'ascending', 00175 'descending' 00176 ) 00177 ), 00178 'localonly' => false, 00179 ); 00180 } 00181 00182 public function getParamDescription() { 00183 return array( 00184 'limit' => 'How many duplicate files to return', 00185 'continue' => 'When more results are available, use this to continue', 00186 'dir' => 'The direction in which to list', 00187 'localonly' => 'Look only for files in the local repository', 00188 ); 00189 } 00190 00191 public function getDescription() { 00192 return 'List all files that are duplicates of the given file(s) based on hash values.'; 00193 } 00194 00195 public function getExamples() { 00196 return array( 00197 'api.php?action=query&titles=File:Albert_Einstein_Head.jpg&prop=duplicatefiles', 00198 'api.php?action=query&generator=allimages&prop=duplicatefiles', 00199 ); 00200 } 00201 00202 public function getHelpUrls() { 00203 return 'https://www.mediawiki.org/wiki/API:Properties#duplicatefiles_.2F_df'; 00204 } 00205 }