MediaWiki
REL1_24
|
00001 <?php 00033 class PNGMetadataExtractor { 00035 private static $pngSig; 00036 00038 private static $crcSize; 00039 00041 private static $textChunks; 00042 00043 const VERSION = 1; 00044 const MAX_CHUNK_SIZE = 3145728; // 3 megabytes 00045 00046 static function getMetadata( $filename ) { 00047 self::$pngSig = pack( "C8", 137, 80, 78, 71, 13, 10, 26, 10 ); 00048 self::$crcSize = 4; 00049 /* based on list at http://owl.phy.queensu.ca/~phil/exiftool/TagNames/PNG.html#TextualData 00050 * and http://www.w3.org/TR/PNG/#11keywords 00051 */ 00052 self::$textChunks = array( 00053 'xml:com.adobe.xmp' => 'xmp', 00054 # Artist is unofficial. Author is the recommended 00055 # keyword in the PNG spec. However some people output 00056 # Artist so support both. 00057 'artist' => 'Artist', 00058 'model' => 'Model', 00059 'make' => 'Make', 00060 'author' => 'Artist', 00061 'comment' => 'PNGFileComment', 00062 'description' => 'ImageDescription', 00063 'title' => 'ObjectName', 00064 'copyright' => 'Copyright', 00065 # Source as in original device used to make image 00066 # not as in who gave you the image 00067 'source' => 'Model', 00068 'software' => 'Software', 00069 'disclaimer' => 'Disclaimer', 00070 'warning' => 'ContentWarning', 00071 'url' => 'Identifier', # Not sure if this is best mapping. Maybe WebStatement. 00072 'label' => 'Label', 00073 'creation time' => 'DateTimeDigitized', 00074 /* Other potentially useful things - Document */ 00075 ); 00076 00077 $frameCount = 0; 00078 $loopCount = 1; 00079 $text = array(); 00080 $duration = 0.0; 00081 $bitDepth = 0; 00082 $colorType = 'unknown'; 00083 00084 if ( !$filename ) { 00085 throw new Exception( __METHOD__ . ": No file name specified" ); 00086 } elseif ( !file_exists( $filename ) || is_dir( $filename ) ) { 00087 throw new Exception( __METHOD__ . ": File $filename does not exist" ); 00088 } 00089 00090 $fh = fopen( $filename, 'rb' ); 00091 00092 if ( !$fh ) { 00093 throw new Exception( __METHOD__ . ": Unable to open file $filename" ); 00094 } 00095 00096 // Check for the PNG header 00097 $buf = fread( $fh, 8 ); 00098 if ( $buf != self::$pngSig ) { 00099 throw new Exception( __METHOD__ . ": Not a valid PNG file; header: $buf" ); 00100 } 00101 00102 // Read chunks 00103 while ( !feof( $fh ) ) { 00104 $buf = fread( $fh, 4 ); 00105 if ( !$buf || strlen( $buf ) < 4 ) { 00106 throw new Exception( __METHOD__ . ": Read error" ); 00107 } 00108 $chunk_size = unpack( "N", $buf ); 00109 $chunk_size = $chunk_size[1]; 00110 00111 if ( $chunk_size < 0 ) { 00112 throw new Exception( __METHOD__ . ": Chunk size too big for unpack" ); 00113 } 00114 00115 $chunk_type = fread( $fh, 4 ); 00116 if ( !$chunk_type || strlen( $chunk_type ) < 4 ) { 00117 throw new Exception( __METHOD__ . ": Read error" ); 00118 } 00119 00120 if ( $chunk_type == "IHDR" ) { 00121 $buf = self::read( $fh, $chunk_size ); 00122 if ( !$buf || strlen( $buf ) < $chunk_size ) { 00123 throw new Exception( __METHOD__ . ": Read error" ); 00124 } 00125 $bitDepth = ord( substr( $buf, 8, 1 ) ); 00126 // Detect the color type in British English as per the spec 00127 // http://www.w3.org/TR/PNG/#11IHDR 00128 switch ( ord( substr( $buf, 9, 1 ) ) ) { 00129 case 0: 00130 $colorType = 'greyscale'; 00131 break; 00132 case 2: 00133 $colorType = 'truecolour'; 00134 break; 00135 case 3: 00136 $colorType = 'index-coloured'; 00137 break; 00138 case 4: 00139 $colorType = 'greyscale-alpha'; 00140 break; 00141 case 6: 00142 $colorType = 'truecolour-alpha'; 00143 break; 00144 default: 00145 $colorType = 'unknown'; 00146 break; 00147 } 00148 } elseif ( $chunk_type == "acTL" ) { 00149 $buf = fread( $fh, $chunk_size ); 00150 if ( !$buf || strlen( $buf ) < $chunk_size || $chunk_size < 4 ) { 00151 throw new Exception( __METHOD__ . ": Read error" ); 00152 } 00153 00154 $actl = unpack( "Nframes/Nplays", $buf ); 00155 $frameCount = $actl['frames']; 00156 $loopCount = $actl['plays']; 00157 } elseif ( $chunk_type == "fcTL" ) { 00158 $buf = self::read( $fh, $chunk_size ); 00159 if ( !$buf || strlen( $buf ) < $chunk_size ) { 00160 throw new Exception( __METHOD__ . ": Read error" ); 00161 } 00162 $buf = substr( $buf, 20 ); 00163 if ( strlen( $buf ) < 4 ) { 00164 throw new Exception( __METHOD__ . ": Read error" ); 00165 } 00166 00167 $fctldur = unpack( "ndelay_num/ndelay_den", $buf ); 00168 if ( $fctldur['delay_den'] == 0 ) { 00169 $fctldur['delay_den'] = 100; 00170 } 00171 if ( $fctldur['delay_num'] ) { 00172 $duration += $fctldur['delay_num'] / $fctldur['delay_den']; 00173 } 00174 } elseif ( $chunk_type == "iTXt" ) { 00175 // Extracts iTXt chunks, uncompressing if necessary. 00176 $buf = self::read( $fh, $chunk_size ); 00177 $items = array(); 00178 if ( preg_match( 00179 '/^([^\x00]{1,79})\x00(\x00|\x01)\x00([^\x00]*)(.)[^\x00]*\x00(.*)$/Ds', 00180 $buf, $items ) 00181 ) { 00182 /* $items[1] = text chunk name, $items[2] = compressed flag, 00183 * $items[3] = lang code (or ""), $items[4]= compression type. 00184 * $items[5] = content 00185 */ 00186 00187 // Theoretically should be case-sensitive, but in practise... 00188 $items[1] = strtolower( $items[1] ); 00189 if ( !isset( self::$textChunks[$items[1]] ) ) { 00190 // Only extract textual chunks on our list. 00191 fseek( $fh, self::$crcSize, SEEK_CUR ); 00192 continue; 00193 } 00194 00195 $items[3] = strtolower( $items[3] ); 00196 if ( $items[3] == '' ) { 00197 // if no lang specified use x-default like in xmp. 00198 $items[3] = 'x-default'; 00199 } 00200 00201 // if compressed 00202 if ( $items[2] == "\x01" ) { 00203 if ( function_exists( 'gzuncompress' ) && $items[4] === "\x00" ) { 00204 wfSuppressWarnings(); 00205 $items[5] = gzuncompress( $items[5] ); 00206 wfRestoreWarnings(); 00207 00208 if ( $items[5] === false ) { 00209 // decompression failed 00210 wfDebug( __METHOD__ . ' Error decompressing iTxt chunk - ' . $items[1] . "\n" ); 00211 fseek( $fh, self::$crcSize, SEEK_CUR ); 00212 continue; 00213 } 00214 } else { 00215 wfDebug( __METHOD__ . ' Skipping compressed png iTXt chunk due to lack of zlib,' 00216 . " or potentially invalid compression method\n" ); 00217 fseek( $fh, self::$crcSize, SEEK_CUR ); 00218 continue; 00219 } 00220 } 00221 $finalKeyword = self::$textChunks[$items[1]]; 00222 $text[$finalKeyword][$items[3]] = $items[5]; 00223 $text[$finalKeyword]['_type'] = 'lang'; 00224 } else { 00225 // Error reading iTXt chunk 00226 throw new Exception( __METHOD__ . ": Read error on iTXt chunk" ); 00227 } 00228 } elseif ( $chunk_type == 'tEXt' ) { 00229 $buf = self::read( $fh, $chunk_size ); 00230 00231 // In case there is no \x00 which will make explode fail. 00232 if ( strpos( $buf, "\x00" ) === false ) { 00233 throw new Exception( __METHOD__ . ": Read error on tEXt chunk" ); 00234 } 00235 00236 list( $keyword, $content ) = explode( "\x00", $buf, 2 ); 00237 if ( $keyword === '' || $content === '' ) { 00238 throw new Exception( __METHOD__ . ": Read error on tEXt chunk" ); 00239 } 00240 00241 // Theoretically should be case-sensitive, but in practise... 00242 $keyword = strtolower( $keyword ); 00243 if ( !isset( self::$textChunks[$keyword] ) ) { 00244 // Don't recognize chunk, so skip. 00245 fseek( $fh, self::$crcSize, SEEK_CUR ); 00246 continue; 00247 } 00248 wfSuppressWarnings(); 00249 $content = iconv( 'ISO-8859-1', 'UTF-8', $content ); 00250 wfRestoreWarnings(); 00251 00252 if ( $content === false ) { 00253 throw new Exception( __METHOD__ . ": Read error (error with iconv)" ); 00254 } 00255 00256 $finalKeyword = self::$textChunks[$keyword]; 00257 $text[$finalKeyword]['x-default'] = $content; 00258 $text[$finalKeyword]['_type'] = 'lang'; 00259 } elseif ( $chunk_type == 'zTXt' ) { 00260 if ( function_exists( 'gzuncompress' ) ) { 00261 $buf = self::read( $fh, $chunk_size ); 00262 00263 // In case there is no \x00 which will make explode fail. 00264 if ( strpos( $buf, "\x00" ) === false ) { 00265 throw new Exception( __METHOD__ . ": Read error on zTXt chunk" ); 00266 } 00267 00268 list( $keyword, $postKeyword ) = explode( "\x00", $buf, 2 ); 00269 if ( $keyword === '' || $postKeyword === '' ) { 00270 throw new Exception( __METHOD__ . ": Read error on zTXt chunk" ); 00271 } 00272 // Theoretically should be case-sensitive, but in practise... 00273 $keyword = strtolower( $keyword ); 00274 00275 if ( !isset( self::$textChunks[$keyword] ) ) { 00276 // Don't recognize chunk, so skip. 00277 fseek( $fh, self::$crcSize, SEEK_CUR ); 00278 continue; 00279 } 00280 $compression = substr( $postKeyword, 0, 1 ); 00281 $content = substr( $postKeyword, 1 ); 00282 if ( $compression !== "\x00" ) { 00283 wfDebug( __METHOD__ . " Unrecognized compression method in zTXt ($keyword). Skipping.\n" ); 00284 fseek( $fh, self::$crcSize, SEEK_CUR ); 00285 continue; 00286 } 00287 00288 wfSuppressWarnings(); 00289 $content = gzuncompress( $content ); 00290 wfRestoreWarnings(); 00291 00292 if ( $content === false ) { 00293 // decompression failed 00294 wfDebug( __METHOD__ . ' Error decompressing zTXt chunk - ' . $keyword . "\n" ); 00295 fseek( $fh, self::$crcSize, SEEK_CUR ); 00296 continue; 00297 } 00298 00299 wfSuppressWarnings(); 00300 $content = iconv( 'ISO-8859-1', 'UTF-8', $content ); 00301 wfRestoreWarnings(); 00302 00303 if ( $content === false ) { 00304 throw new Exception( __METHOD__ . ": Read error (error with iconv)" ); 00305 } 00306 00307 $finalKeyword = self::$textChunks[$keyword]; 00308 $text[$finalKeyword]['x-default'] = $content; 00309 $text[$finalKeyword]['_type'] = 'lang'; 00310 } else { 00311 wfDebug( __METHOD__ . " Cannot decompress zTXt chunk due to lack of zlib. Skipping.\n" ); 00312 fseek( $fh, $chunk_size, SEEK_CUR ); 00313 } 00314 } elseif ( $chunk_type == 'tIME' ) { 00315 // last mod timestamp. 00316 if ( $chunk_size !== 7 ) { 00317 throw new Exception( __METHOD__ . ": tIME wrong size" ); 00318 } 00319 $buf = self::read( $fh, $chunk_size ); 00320 if ( !$buf || strlen( $buf ) < $chunk_size ) { 00321 throw new Exception( __METHOD__ . ": Read error" ); 00322 } 00323 00324 // Note: spec says this should be UTC. 00325 $t = unpack( "ny/Cm/Cd/Ch/Cmin/Cs", $buf ); 00326 $strTime = sprintf( "%04d%02d%02d%02d%02d%02d", 00327 $t['y'], $t['m'], $t['d'], $t['h'], 00328 $t['min'], $t['s'] ); 00329 00330 $exifTime = wfTimestamp( TS_EXIF, $strTime ); 00331 00332 if ( $exifTime ) { 00333 $text['DateTime'] = $exifTime; 00334 } 00335 } elseif ( $chunk_type == 'pHYs' ) { 00336 // how big pixels are (dots per meter). 00337 if ( $chunk_size !== 9 ) { 00338 throw new Exception( __METHOD__ . ": pHYs wrong size" ); 00339 } 00340 00341 $buf = self::read( $fh, $chunk_size ); 00342 if ( !$buf || strlen( $buf ) < $chunk_size ) { 00343 throw new Exception( __METHOD__ . ": Read error" ); 00344 } 00345 00346 $dim = unpack( "Nwidth/Nheight/Cunit", $buf ); 00347 if ( $dim['unit'] == 1 ) { 00348 // Need to check for negative because php 00349 // doesn't deal with super-large unsigned 32-bit ints well 00350 if ( $dim['width'] > 0 && $dim['height'] > 0 ) { 00351 // unit is meters 00352 // (as opposed to 0 = undefined ) 00353 $text['XResolution'] = $dim['width'] 00354 . '/100'; 00355 $text['YResolution'] = $dim['height'] 00356 . '/100'; 00357 $text['ResolutionUnit'] = 3; 00358 // 3 = dots per cm (from Exif). 00359 } 00360 } 00361 } elseif ( $chunk_type == "IEND" ) { 00362 break; 00363 } else { 00364 fseek( $fh, $chunk_size, SEEK_CUR ); 00365 } 00366 fseek( $fh, self::$crcSize, SEEK_CUR ); 00367 } 00368 fclose( $fh ); 00369 00370 if ( $loopCount > 1 ) { 00371 $duration *= $loopCount; 00372 } 00373 00374 if ( isset( $text['DateTimeDigitized'] ) ) { 00375 // Convert date format from rfc2822 to exif. 00376 foreach ( $text['DateTimeDigitized'] as $name => &$value ) { 00377 if ( $name === '_type' ) { 00378 continue; 00379 } 00380 00381 // @todo FIXME: Currently timezones are ignored. 00382 // possibly should be wfTimestamp's 00383 // responsibility. (at least for numeric TZ) 00384 $formatted = wfTimestamp( TS_EXIF, $value ); 00385 if ( $formatted ) { 00386 // Only change if we could convert the 00387 // date. 00388 // The png standard says it should be 00389 // in rfc2822 format, but not required. 00390 // In general for the exif stuff we 00391 // prettify the date if we can, but we 00392 // display as-is if we cannot or if 00393 // it is invalid. 00394 // So do the same here. 00395 00396 $value = $formatted; 00397 } 00398 } 00399 } 00400 00401 return array( 00402 'frameCount' => $frameCount, 00403 'loopCount' => $loopCount, 00404 'duration' => $duration, 00405 'text' => $text, 00406 'bitDepth' => $bitDepth, 00407 'colorType' => $colorType, 00408 ); 00409 } 00410 00419 private static function read( $fh, $size ) { 00420 if ( $size > self::MAX_CHUNK_SIZE ) { 00421 throw new Exception( __METHOD__ . ': Chunk size of ' . $size . 00422 ' too big. Max size is: ' . self::MAX_CHUNK_SIZE ); 00423 } 00424 00425 return fread( $fh, $size ); 00426 } 00427 }