MediaWiki  REL1_20
PNGMetadataExtractor.php
Go to the documentation of this file.
00001 <?php
00033 class PNGMetadataExtractor {
00034         static $png_sig;
00035         static $CRC_size;
00036         static $text_chunks;
00037 
00038         const VERSION = 1;
00039         const MAX_CHUNK_SIZE = 3145728; // 3 megabytes
00040 
00041         static function getMetadata( $filename ) {
00042                 self::$png_sig = pack( "C8", 137, 80, 78, 71, 13, 10, 26, 10 );
00043                 self::$CRC_size = 4;
00044                 /* based on list at http://owl.phy.queensu.ca/~phil/exiftool/TagNames/PNG.html#TextualData
00045                  * and http://www.w3.org/TR/PNG/#11keywords
00046                  */
00047                 self::$text_chunks = array(
00048                         'xml:com.adobe.xmp' => 'xmp',
00049                         # Artist is unofficial. Author is the recommended
00050                         # keyword in the PNG spec. However some people output
00051                         # Artist so support both.
00052                         'artist'      => 'Artist',
00053                         'model'       => 'Model',
00054                         'make'        => 'Make',
00055                         'author'      => 'Artist',
00056                         'comment'     => 'PNGFileComment',
00057                         'description' => 'ImageDescription',
00058                         'title'       => 'ObjectName',
00059                         'copyright'   => 'Copyright',
00060                         # Source as in original device used to make image
00061                         # not as in who gave you the image
00062                         'source'      => 'Model',
00063                         'software'    => 'Software',
00064                         'disclaimer'  => 'Disclaimer',
00065                         'warning'     => 'ContentWarning',
00066                         'url'         => 'Identifier', # Not sure if this is best mapping. Maybe WebStatement.
00067                         'label'       => 'Label',
00068                         'creation time' => 'DateTimeDigitized',
00069                         /* Other potentially useful things - Document */
00070                 );
00071 
00072                 $frameCount = 0;
00073                 $loopCount = 1;
00074                 $text = array();
00075                 $duration = 0.0;
00076                 $bitDepth = 0;
00077                 $colorType = 'unknown';
00078 
00079                 if ( !$filename ) {
00080                         throw new Exception( __METHOD__ . ": No file name specified" );
00081                 } elseif ( !file_exists( $filename ) || is_dir( $filename ) ) {
00082                         throw new Exception( __METHOD__ . ": File $filename does not exist" );
00083                 }
00084 
00085                 $fh = fopen( $filename, 'rb' );
00086 
00087                 if ( !$fh ) {
00088                         throw new Exception( __METHOD__ . ": Unable to open file $filename" );
00089                 }
00090 
00091                 // Check for the PNG header
00092                 $buf = fread( $fh, 8 );
00093                 if ( $buf != self::$png_sig ) {
00094                         throw new Exception( __METHOD__ . ": Not a valid PNG file; header: $buf" );
00095                 }
00096 
00097                 // Read chunks
00098                 while ( !feof( $fh ) ) {
00099                         $buf = fread( $fh, 4 );
00100                         if ( !$buf || strlen( $buf ) < 4 ) {
00101                                 throw new Exception( __METHOD__ . ": Read error" );
00102                         }
00103                         $chunk_size = unpack( "N", $buf );
00104                         $chunk_size = $chunk_size[1];
00105 
00106                         if ( $chunk_size < 0 ) {
00107                                 throw new Exception( __METHOD__ . ": Chunk size too big for unpack" );
00108                         }
00109 
00110                         $chunk_type = fread( $fh, 4 );
00111                         if ( !$chunk_type || strlen( $chunk_type ) < 4 ) {
00112                                 throw new Exception( __METHOD__ . ": Read error" );
00113                         }
00114 
00115                         if ( $chunk_type == "IHDR" ) {
00116                                 $buf = self::read( $fh, $chunk_size );
00117                                 if ( !$buf || strlen( $buf ) < $chunk_size ) {
00118                                         throw new Exception( __METHOD__ . ": Read error" );
00119                                 }
00120                                 $bitDepth = ord( substr( $buf, 8, 1 ) );
00121                                 // Detect the color type in British English as per the spec
00122                                 // http://www.w3.org/TR/PNG/#11IHDR
00123                                 switch ( ord( substr( $buf, 9, 1 ) ) ) {
00124                                         case 0:
00125                                                 $colorType = 'greyscale';
00126                                                 break;
00127                                         case 2: 
00128                                                 $colorType = 'truecolour';
00129                                                 break;
00130                                         case 3:
00131                                                 $colorType = 'index-coloured';
00132                                                 break;
00133                                         case 4:
00134                                                 $colorType = 'greyscale-alpha';
00135                                                 break;
00136                                         case 6:
00137                                                 $colorType = 'truecolour-alpha';
00138                                                 break;
00139                                         default:
00140                                                 $colorType = 'unknown';
00141                                                 break;
00142                                 }
00143                         } elseif ( $chunk_type == "acTL" ) {
00144                                 $buf = fread( $fh, $chunk_size );
00145                                 if( !$buf || strlen( $buf ) < $chunk_size || $chunk_size < 4 ) {
00146                                         throw new Exception( __METHOD__ . ": Read error" );
00147                                 }
00148 
00149                                 $actl = unpack( "Nframes/Nplays", $buf );
00150                                 $frameCount = $actl['frames'];
00151                                 $loopCount = $actl['plays'];
00152                         } elseif ( $chunk_type == "fcTL" ) {
00153                                 $buf = self::read( $fh, $chunk_size );
00154                                 if ( !$buf || strlen( $buf ) < $chunk_size ) {
00155                                         throw new Exception( __METHOD__ . ": Read error" );
00156                                 }
00157                                 $buf = substr( $buf, 20 );
00158                                 if ( strlen( $buf ) < 4 ) {
00159                                         throw new Exception( __METHOD__ . ": Read error" );
00160                                 }
00161 
00162                                 $fctldur = unpack( "ndelay_num/ndelay_den", $buf );
00163                                 if ( $fctldur['delay_den'] == 0 ) {
00164                                         $fctldur['delay_den'] = 100;
00165                                 }
00166                                 if ( $fctldur['delay_num'] ) {
00167                                         $duration += $fctldur['delay_num'] / $fctldur['delay_den'];
00168                                 }
00169                         } elseif ( $chunk_type == "iTXt" ) {
00170                                 // Extracts iTXt chunks, uncompressing if necessary.
00171                                 $buf = self::read( $fh, $chunk_size );
00172                                 $items = array();
00173                                 if ( preg_match(
00174                                         '/^([^\x00]{1,79})\x00(\x00|\x01)\x00([^\x00]*)(.)[^\x00]*\x00(.*)$/Ds',
00175                                         $buf, $items )
00176                                 ) {
00177                                         /* $items[1] = text chunk name, $items[2] = compressed flag,
00178                                          * $items[3] = lang code (or ""), $items[4]= compression type.
00179                                          * $items[5] = content
00180                                          */
00181 
00182                                         // Theoretically should be case-sensitive, but in practise...
00183                                         $items[1] = strtolower( $items[1] );
00184                                         if ( !isset( self::$text_chunks[$items[1]] ) ) {
00185                                                 // Only extract textual chunks on our list.
00186                                                 fseek( $fh, self::$CRC_size, SEEK_CUR );
00187                                                 continue;
00188                                         }
00189 
00190                                         $items[3] = strtolower( $items[3] );
00191                                         if ( $items[3] == '' ) {
00192                                                 // if no lang specified use x-default like in xmp.
00193                                                 $items[3] = 'x-default';
00194                                         }
00195 
00196                                         // if compressed
00197                                         if ( $items[2] == "\x01" ) {
00198                                                 if ( function_exists( 'gzuncompress' ) && $items[4] === "\x00" ) {
00199                                                         wfSuppressWarnings();
00200                                                         $items[5] = gzuncompress( $items[5] );
00201                                                         wfRestoreWarnings();
00202 
00203                                                         if ( $items[5] === false ) {
00204                                                                 // decompression failed
00205                                                                 wfDebug( __METHOD__ . ' Error decompressing iTxt chunk - ' . $items[1] );
00206                                                                 fseek( $fh, self::$CRC_size, SEEK_CUR );
00207                                                                 continue;
00208                                                         }
00209 
00210                                                 } else {
00211                                                         wfDebug( __METHOD__ . ' Skipping compressed png iTXt chunk due to lack of zlib,'
00212                                                                 . ' or potentially invalid compression method' );
00213                                                         fseek( $fh, self::$CRC_size, SEEK_CUR );
00214                                                         continue;
00215                                                 }
00216                                         }
00217                                         $finalKeyword = self::$text_chunks[ $items[1] ];
00218                                         $text[ $finalKeyword ][ $items[3] ] = $items[5];
00219                                         $text[ $finalKeyword ]['_type'] = 'lang';
00220 
00221                                 } else {
00222                                         // Error reading iTXt chunk
00223                                         throw new Exception( __METHOD__ . ": Read error on iTXt chunk" );
00224                                 }
00225 
00226                         } elseif ( $chunk_type == 'tEXt' ) {
00227                                 $buf = self::read( $fh, $chunk_size );
00228 
00229                                 // In case there is no \x00 which will make explode fail.
00230                                 if ( strpos( $buf, "\x00" ) === false ) {
00231                                         throw new Exception( __METHOD__ . ": Read error on tEXt chunk" );
00232                                 }
00233 
00234                                 list( $keyword, $content ) = explode( "\x00", $buf, 2 );
00235                                 if ( $keyword === '' || $content === '' ) {
00236                                         throw new Exception( __METHOD__ . ": Read error on tEXt chunk" );
00237                                 }
00238 
00239                                 // Theoretically should be case-sensitive, but in practise...
00240                                 $keyword = strtolower( $keyword );
00241                                 if ( !isset( self::$text_chunks[ $keyword ] ) ) {
00242                                         // Don't recognize chunk, so skip.
00243                                         fseek( $fh, self::$CRC_size, SEEK_CUR );
00244                                         continue;
00245                                 }
00246                                 wfSuppressWarnings();
00247                                 $content = iconv( 'ISO-8859-1', 'UTF-8', $content );
00248                                 wfRestoreWarnings();
00249 
00250                                 if ( $content === false ) {
00251                                         throw new Exception( __METHOD__ . ": Read error (error with iconv)" );
00252                                 }
00253 
00254                                 $finalKeyword = self::$text_chunks[ $keyword ];
00255                                 $text[ $finalKeyword ][ 'x-default' ] = $content;
00256                                 $text[ $finalKeyword ]['_type'] = 'lang';
00257 
00258                         } elseif ( $chunk_type == 'zTXt' ) {
00259                                 if ( function_exists( 'gzuncompress' ) ) {
00260                                         $buf = self::read( $fh, $chunk_size );
00261 
00262                                         // In case there is no \x00 which will make explode fail.
00263                                         if ( strpos( $buf, "\x00" ) === false ) {
00264                                                 throw new Exception( __METHOD__ . ": Read error on zTXt chunk" );
00265                                         }
00266 
00267                                         list( $keyword, $postKeyword ) = explode( "\x00", $buf, 2 );
00268                                         if ( $keyword === '' || $postKeyword === '' ) {
00269                                                 throw new Exception( __METHOD__ . ": Read error on zTXt chunk" );
00270                                         }
00271                                         // Theoretically should be case-sensitive, but in practise...
00272                                         $keyword = strtolower( $keyword );
00273 
00274                                         if ( !isset( self::$text_chunks[ $keyword ] ) ) {
00275                                                 // Don't recognize chunk, so skip.
00276                                                 fseek( $fh, self::$CRC_size, SEEK_CUR );
00277                                                 continue;
00278                                         }
00279                                         $compression = substr( $postKeyword, 0, 1 );
00280                                         $content = substr( $postKeyword, 1 );
00281                                         if ( $compression !== "\x00" ) {
00282                                                 wfDebug( __METHOD__ . " Unrecognized compression method in zTXt ($keyword). Skipping." );
00283                                                 fseek( $fh, self::$CRC_size, SEEK_CUR );
00284                                                 continue;
00285                                         }
00286 
00287                                         wfSuppressWarnings();
00288                                         $content = gzuncompress( $content );
00289                                         wfRestoreWarnings();
00290 
00291                                         if ( $content === false ) {
00292                                                 // decompression failed
00293                                                 wfDebug( __METHOD__ . ' Error decompressing zTXt chunk - ' . $keyword );
00294                                                 fseek( $fh, self::$CRC_size, SEEK_CUR );
00295                                                 continue;
00296                                         }
00297 
00298                                         wfSuppressWarnings();
00299                                         $content = iconv( 'ISO-8859-1', 'UTF-8', $content );
00300                                         wfRestoreWarnings();
00301 
00302                                         if ( $content === false ) {
00303                                                 throw new Exception( __METHOD__ . ": Read error (error with iconv)" );
00304                                         }
00305 
00306                                         $finalKeyword = self::$text_chunks[ $keyword ];
00307                                         $text[ $finalKeyword ][ 'x-default' ] = $content;
00308                                         $text[ $finalKeyword ]['_type'] = 'lang';
00309 
00310                                 } else {
00311                                         wfDebug( __METHOD__ . " Cannot decompress zTXt chunk due to lack of zlib. Skipping." );
00312                                         fseek( $fh, $chunk_size, SEEK_CUR );
00313                                 }
00314                         } elseif ( $chunk_type == 'tIME' ) {
00315                                 // last mod timestamp.
00316                                 if ( $chunk_size !== 7 ) {
00317                                         throw new Exception( __METHOD__ . ": tIME wrong size" );
00318                                 }
00319                                 $buf = self::read( $fh, $chunk_size );
00320                                 if ( !$buf || strlen( $buf ) < $chunk_size ) {
00321                                         throw new Exception( __METHOD__ . ": Read error" );
00322                                 }
00323 
00324                                 // Note: spec says this should be UTC.
00325                                 $t = unpack( "ny/Cm/Cd/Ch/Cmin/Cs", $buf );
00326                                 $strTime = sprintf( "%04d%02d%02d%02d%02d%02d",
00327                                         $t['y'], $t['m'], $t['d'], $t['h'],
00328                                         $t['min'], $t['s'] );
00329 
00330                                 $exifTime = wfTimestamp( TS_EXIF, $strTime );
00331 
00332                                 if ( $exifTime ) {
00333                                         $text['DateTime'] = $exifTime;
00334                                 }
00335 
00336                         } elseif ( $chunk_type == 'pHYs' ) {
00337                                 // how big pixels are (dots per meter).
00338                                 if ( $chunk_size !== 9 ) {
00339                                         throw new Exception( __METHOD__ . ": pHYs wrong size" );
00340                                 }
00341 
00342                                 $buf = self::read( $fh, $chunk_size );
00343                                 if ( !$buf || strlen( $buf ) < $chunk_size ) {
00344                                         throw new Exception( __METHOD__ . ": Read error" );
00345                                 }
00346 
00347                                 $dim = unpack( "Nwidth/Nheight/Cunit", $buf );
00348                                 if ( $dim['unit'] == 1 ) {
00349                                         // Need to check for negative because php
00350                                         // doesn't deal with super-large unsigned 32-bit ints well
00351                                         if ( $dim['width'] > 0 && $dim['height'] > 0 ) {
00352                                                 // unit is meters
00353                                                 // (as opposed to 0 = undefined )
00354                                                 $text['XResolution'] = $dim['width']
00355                                                         . '/100';
00356                                                 $text['YResolution'] = $dim['height']
00357                                                         . '/100';
00358                                                 $text['ResolutionUnit'] = 3;
00359                                                 // 3 = dots per cm (from Exif).
00360                                         }
00361                                 }
00362 
00363                         } elseif ( $chunk_type == "IEND" ) {
00364                                 break;
00365                         } else {
00366                                 fseek( $fh, $chunk_size, SEEK_CUR );
00367                         }
00368                         fseek( $fh, self::$CRC_size, SEEK_CUR );
00369                 }
00370                 fclose( $fh );
00371 
00372                 if ( $loopCount > 1 ) {
00373                         $duration *= $loopCount;
00374                 }
00375 
00376                 if ( isset( $text['DateTimeDigitized'] ) ) {
00377                         // Convert date format from rfc2822 to exif.
00378                         foreach ( $text['DateTimeDigitized'] as $name => &$value ) {
00379                                 if ( $name === '_type' ) {
00380                                         continue;
00381                                 }
00382 
00383                                 // @todo FIXME: Currently timezones are ignored.
00384                                 // possibly should be wfTimestamp's
00385                                 // responsibility. (at least for numeric TZ)
00386                                 $formatted = wfTimestamp( TS_EXIF, $value );
00387                                 if ( $formatted ) {
00388                                         // Only change if we could convert the
00389                                         // date.
00390                                         // The png standard says it should be
00391                                         // in rfc2822 format, but not required.
00392                                         // In general for the exif stuff we
00393                                         // prettify the date if we can, but we
00394                                         // display as-is if we cannot or if
00395                                         // it is invalid.
00396                                         // So do the same here.
00397 
00398                                         $value = $formatted;
00399                                 }
00400                         }
00401                 }
00402                 return array(
00403                         'frameCount' => $frameCount,
00404                         'loopCount' => $loopCount,
00405                         'duration' => $duration,
00406                         'text' => $text,
00407                         'bitDepth' => $bitDepth,
00408                         'colorType' => $colorType,
00409                 );
00410 
00411         }
00420         static private function read( $fh, $size ) {
00421                 if ( $size > self::MAX_CHUNK_SIZE ) {
00422                         throw new Exception( __METHOD__ . ': Chunk size of ' . $size .
00423                                 ' too big. Max size is: ' . self::MAX_CHUNK_SIZE );
00424                 }
00425                 return fread( $fh, $size );
00426         }
00427 }