MediaWiki  REL1_19
PNGMetadataExtractor.php
Go to the documentation of this file.
00001 <?php
00017 class PNGMetadataExtractor {
00018         static $png_sig;
00019         static $CRC_size;
00020         static $text_chunks;
00021 
00022         const VERSION = 1;
00023         const MAX_CHUNK_SIZE = 3145728; // 3 megabytes
00024 
00025         static function getMetadata( $filename ) {
00026                 self::$png_sig = pack( "C8", 137, 80, 78, 71, 13, 10, 26, 10 );
00027                 self::$CRC_size = 4;
00028                 /* based on list at http://owl.phy.queensu.ca/~phil/exiftool/TagNames/PNG.html#TextualData
00029                  * and http://www.w3.org/TR/PNG/#11keywords
00030                  */
00031                 self::$text_chunks = array(
00032                         'xml:com.adobe.xmp' => 'xmp',
00033                         # Artist is unofficial. Author is the recommended
00034                         # keyword in the PNG spec. However some people output
00035                         # Artist so support both.
00036                         'artist'      => 'Artist',
00037                         'model'       => 'Model',
00038                         'make'        => 'Make',
00039                         'author'      => 'Artist',
00040                         'comment'     => 'PNGFileComment',
00041                         'description' => 'ImageDescription',
00042                         'title'       => 'ObjectName',
00043                         'copyright'   => 'Copyright',
00044                         # Source as in original device used to make image
00045                         # not as in who gave you the image
00046                         'source'      => 'Model',
00047                         'software'    => 'Software',
00048                         'disclaimer'  => 'Disclaimer',
00049                         'warning'     => 'ContentWarning',
00050                         'url'         => 'Identifier', # Not sure if this is best mapping. Maybe WebStatement.
00051                         'label'       => 'Label',
00052                         'creation time' => 'DateTimeDigitized',
00053                         /* Other potentially useful things - Document */
00054                 );
00055 
00056                 $frameCount = 0;
00057                 $loopCount = 1;
00058                 $text = array();
00059                 $duration = 0.0;
00060                 $bitDepth = 0;
00061                 $colorType = 'unknown';
00062 
00063                 if ( !$filename ) {
00064                         throw new Exception( __METHOD__ . ": No file name specified" );
00065                 } elseif ( !file_exists( $filename ) || is_dir( $filename ) ) {
00066                         throw new Exception( __METHOD__ . ": File $filename does not exist" );
00067                 }
00068 
00069                 $fh = fopen( $filename, 'rb' );
00070 
00071                 if ( !$fh ) {
00072                         throw new Exception( __METHOD__ . ": Unable to open file $filename" );
00073                 }
00074 
00075                 // Check for the PNG header
00076                 $buf = fread( $fh, 8 );
00077                 if ( $buf != self::$png_sig ) {
00078                         throw new Exception( __METHOD__ . ": Not a valid PNG file; header: $buf" );
00079                 }
00080 
00081                 // Read chunks
00082                 while ( !feof( $fh ) ) {
00083                         $buf = fread( $fh, 4 );
00084                         if ( !$buf || strlen( $buf ) < 4 ) {
00085                                 throw new Exception( __METHOD__ . ": Read error" );
00086                         }
00087                         $chunk_size = unpack( "N", $buf );
00088                         $chunk_size = $chunk_size[1];
00089 
00090                         if ( $chunk_size < 0 ) {
00091                                 throw new Exception( __METHOD__ . ": Chunk size too big for unpack" );
00092                         }
00093 
00094                         $chunk_type = fread( $fh, 4 );
00095                         if ( !$chunk_type || strlen( $chunk_type ) < 4 ) {
00096                                 throw new Exception( __METHOD__ . ": Read error" );
00097                         }
00098 
00099                         if ( $chunk_type == "IHDR" ) {
00100                                 $buf = self::read( $fh, $chunk_size );
00101                                 if ( !$buf || strlen( $buf ) < $chunk_size ) {
00102                                         throw new Exception( __METHOD__ . ": Read error" );
00103                                 }
00104                                 $bitDepth = ord( substr( $buf, 8, 1 ) );
00105                                 // Detect the color type in British English as per the spec
00106                                 // http://www.w3.org/TR/PNG/#11IHDR
00107                                 switch ( ord( substr( $buf, 9, 1 ) ) ) {
00108                                         case 0:
00109                                                 $colorType = 'greyscale';
00110                                                 break;
00111                                         case 2: 
00112                                                 $colorType = 'truecolour';
00113                                                 break;
00114                                         case 3:
00115                                                 $colorType = 'index-coloured';
00116                                                 break;
00117                                         case 4:
00118                                                 $colorType = 'greyscale-alpha';
00119                                                 break;
00120                                         case 6:
00121                                                 $colorType = 'truecolour-alpha';
00122                                                 break;
00123                                         default:
00124                                                 $colorType = 'unknown';
00125                                                 break;
00126                                 }
00127                         } elseif ( $chunk_type == "acTL" ) {
00128                                 $buf = fread( $fh, $chunk_size );
00129                                 if( !$buf || strlen( $buf ) < $chunk_size || $chunk_size < 4 ) {
00130                                         throw new Exception( __METHOD__ . ": Read error" );
00131                                 }
00132 
00133                                 $actl = unpack( "Nframes/Nplays", $buf );
00134                                 $frameCount = $actl['frames'];
00135                                 $loopCount = $actl['plays'];
00136                         } elseif ( $chunk_type == "fcTL" ) {
00137                                 $buf = self::read( $fh, $chunk_size );
00138                                 if ( !$buf || strlen( $buf ) < $chunk_size ) {
00139                                         throw new Exception( __METHOD__ . ": Read error" );
00140                                 }
00141                                 $buf = substr( $buf, 20 );
00142                                 if ( strlen( $buf ) < 4 ) {
00143                                         throw new Exception( __METHOD__ . ": Read error" );
00144                                 }
00145 
00146                                 $fctldur = unpack( "ndelay_num/ndelay_den", $buf );
00147                                 if ( $fctldur['delay_den'] == 0 ) {
00148                                         $fctldur['delay_den'] = 100;
00149                                 }
00150                                 if ( $fctldur['delay_num'] ) {
00151                                         $duration += $fctldur['delay_num'] / $fctldur['delay_den'];
00152                                 }
00153                         } elseif ( $chunk_type == "iTXt" ) {
00154                                 // Extracts iTXt chunks, uncompressing if necessary.
00155                                 $buf = self::read( $fh, $chunk_size );
00156                                 $items = array();
00157                                 if ( preg_match(
00158                                         '/^([^\x00]{1,79})\x00(\x00|\x01)\x00([^\x00]*)(.)[^\x00]*\x00(.*)$/Ds',
00159                                         $buf, $items )
00160                                 ) {
00161                                         /* $items[1] = text chunk name, $items[2] = compressed flag,
00162                                          * $items[3] = lang code (or ""), $items[4]= compression type.
00163                                          * $items[5] = content
00164                                          */
00165 
00166                                         // Theoretically should be case-sensitive, but in practise...
00167                                         $items[1] = strtolower( $items[1] );
00168                                         if ( !isset( self::$text_chunks[$items[1]] ) ) {
00169                                                 // Only extract textual chunks on our list.
00170                                                 fseek( $fh, self::$CRC_size, SEEK_CUR );
00171                                                 continue;
00172                                         }
00173 
00174                                         $items[3] = strtolower( $items[3] );
00175                                         if ( $items[3] == '' ) {
00176                                                 // if no lang specified use x-default like in xmp.
00177                                                 $items[3] = 'x-default';
00178                                         }
00179 
00180                                         // if compressed
00181                                         if ( $items[2] == "\x01" ) {
00182                                                 if ( function_exists( 'gzuncompress' ) && $items[4] === "\x00" ) {
00183                                                         wfSuppressWarnings();
00184                                                         $items[5] = gzuncompress( $items[5] );
00185                                                         wfRestoreWarnings();
00186 
00187                                                         if ( $items[5] === false ) {
00188                                                                 // decompression failed
00189                                                                 wfDebug( __METHOD__ . ' Error decompressing iTxt chunk - ' . $items[1] );
00190                                                                 fseek( $fh, self::$CRC_size, SEEK_CUR );
00191                                                                 continue;
00192                                                         }
00193 
00194                                                 } else {
00195                                                         wfDebug( __METHOD__ . ' Skipping compressed png iTXt chunk due to lack of zlib,'
00196                                                                 . ' or potentially invalid compression method' );
00197                                                         fseek( $fh, self::$CRC_size, SEEK_CUR );
00198                                                         continue;
00199                                                 }
00200                                         }
00201                                         $finalKeyword = self::$text_chunks[ $items[1] ];
00202                                         $text[ $finalKeyword ][ $items[3] ] = $items[5];
00203                                         $text[ $finalKeyword ]['_type'] = 'lang';
00204 
00205                                 } else {
00206                                         // Error reading iTXt chunk
00207                                         throw new Exception( __METHOD__ . ": Read error on iTXt chunk" );
00208                                 }
00209 
00210                         } elseif ( $chunk_type == 'tEXt' ) {
00211                                 $buf = self::read( $fh, $chunk_size );
00212 
00213                                 // In case there is no \x00 which will make explode fail.
00214                                 if ( strpos( $buf, "\x00" ) === false ) {
00215                                         throw new Exception( __METHOD__ . ": Read error on tEXt chunk" );
00216                                 }
00217 
00218                                 list( $keyword, $content ) = explode( "\x00", $buf, 2 );
00219                                 if ( $keyword === '' || $content === '' ) {
00220                                         throw new Exception( __METHOD__ . ": Read error on tEXt chunk" );
00221                                 }
00222 
00223                                 // Theoretically should be case-sensitive, but in practise...
00224                                 $keyword = strtolower( $keyword );
00225                                 if ( !isset( self::$text_chunks[ $keyword ] ) ) {
00226                                         // Don't recognize chunk, so skip.
00227                                         fseek( $fh, self::$CRC_size, SEEK_CUR );
00228                                         continue;
00229                                 }
00230                                 wfSuppressWarnings();
00231                                 $content = iconv( 'ISO-8859-1', 'UTF-8', $content );
00232                                 wfRestoreWarnings();
00233 
00234                                 if ( $content === false ) {
00235                                         throw new Exception( __METHOD__ . ": Read error (error with iconv)" );
00236                                 }
00237 
00238                                 $finalKeyword = self::$text_chunks[ $keyword ];
00239                                 $text[ $finalKeyword ][ 'x-default' ] = $content;
00240                                 $text[ $finalKeyword ]['_type'] = 'lang';
00241 
00242                         } elseif ( $chunk_type == 'zTXt' ) {
00243                                 if ( function_exists( 'gzuncompress' ) ) {
00244                                         $buf = self::read( $fh, $chunk_size );
00245 
00246                                         // In case there is no \x00 which will make explode fail.
00247                                         if ( strpos( $buf, "\x00" ) === false ) {
00248                                                 throw new Exception( __METHOD__ . ": Read error on zTXt chunk" );
00249                                         }
00250 
00251                                         list( $keyword, $postKeyword ) = explode( "\x00", $buf, 2 );
00252                                         if ( $keyword === '' || $postKeyword === '' ) {
00253                                                 throw new Exception( __METHOD__ . ": Read error on zTXt chunk" );
00254                                         }
00255                                         // Theoretically should be case-sensitive, but in practise...
00256                                         $keyword = strtolower( $keyword );
00257 
00258                                         if ( !isset( self::$text_chunks[ $keyword ] ) ) {
00259                                                 // Don't recognize chunk, so skip.
00260                                                 fseek( $fh, self::$CRC_size, SEEK_CUR );
00261                                                 continue;
00262                                         }
00263                                         $compression = substr( $postKeyword, 0, 1 );
00264                                         $content = substr( $postKeyword, 1 );
00265                                         if ( $compression !== "\x00" ) {
00266                                                 wfDebug( __METHOD__ . " Unrecognized compression method in zTXt ($keyword). Skipping." );
00267                                                 fseek( $fh, self::$CRC_size, SEEK_CUR );
00268                                                 continue;
00269                                         }
00270 
00271                                         wfSuppressWarnings();
00272                                         $content = gzuncompress( $content );
00273                                         wfRestoreWarnings();
00274 
00275                                         if ( $content === false ) {
00276                                                 // decompression failed
00277                                                 wfDebug( __METHOD__ . ' Error decompressing zTXt chunk - ' . $keyword );
00278                                                 fseek( $fh, self::$CRC_size, SEEK_CUR );
00279                                                 continue;
00280                                         }
00281 
00282                                         wfSuppressWarnings();
00283                                         $content = iconv( 'ISO-8859-1', 'UTF-8', $content );
00284                                         wfRestoreWarnings();
00285 
00286                                         if ( $content === false ) {
00287                                                 throw new Exception( __METHOD__ . ": Read error (error with iconv)" );
00288                                         }
00289 
00290                                         $finalKeyword = self::$text_chunks[ $keyword ];
00291                                         $text[ $finalKeyword ][ 'x-default' ] = $content;
00292                                         $text[ $finalKeyword ]['_type'] = 'lang';
00293 
00294                                 } else {
00295                                         wfDebug( __METHOD__ . " Cannot decompress zTXt chunk due to lack of zlib. Skipping." );
00296                                         fseek( $fh, $chunk_size, SEEK_CUR );
00297                                 }
00298                         } elseif ( $chunk_type == 'tIME' ) {
00299                                 // last mod timestamp.
00300                                 if ( $chunk_size !== 7 ) {
00301                                         throw new Exception( __METHOD__ . ": tIME wrong size" );
00302                                 }
00303                                 $buf = self::read( $fh, $chunk_size );
00304                                 if ( !$buf || strlen( $buf ) < $chunk_size ) {
00305                                         throw new Exception( __METHOD__ . ": Read error" );
00306                                 }
00307 
00308                                 // Note: spec says this should be UTC.
00309                                 $t = unpack( "ny/Cm/Cd/Ch/Cmin/Cs", $buf );
00310                                 $strTime = sprintf( "%04d%02d%02d%02d%02d%02d",
00311                                         $t['y'], $t['m'], $t['d'], $t['h'],
00312                                         $t['min'], $t['s'] );
00313 
00314                                 $exifTime = wfTimestamp( TS_EXIF, $strTime );
00315 
00316                                 if ( $exifTime ) {
00317                                         $text['DateTime'] = $exifTime;
00318                                 }
00319 
00320                         } elseif ( $chunk_type == 'pHYs' ) {
00321                                 // how big pixels are (dots per meter).
00322                                 if ( $chunk_size !== 9 ) {
00323                                         throw new Exception( __METHOD__ . ": pHYs wrong size" );
00324                                 }
00325 
00326                                 $buf = self::read( $fh, $chunk_size );
00327                                 if ( !$buf || strlen( $buf ) < $chunk_size ) {
00328                                         throw new Exception( __METHOD__ . ": Read error" );
00329                                 }
00330 
00331                                 $dim = unpack( "Nwidth/Nheight/Cunit", $buf );
00332                                 if ( $dim['unit'] == 1 ) {
00333                                         // Need to check for negative because php
00334                                         // doesn't deal with super-large unsigned 32-bit ints well
00335                                         if ( $dim['width'] > 0 && $dim['height'] > 0 ) {
00336                                                 // unit is meters
00337                                                 // (as opposed to 0 = undefined )
00338                                                 $text['XResolution'] = $dim['width']
00339                                                         . '/100';
00340                                                 $text['YResolution'] = $dim['height']
00341                                                         . '/100';
00342                                                 $text['ResolutionUnit'] = 3;
00343                                                 // 3 = dots per cm (from Exif).
00344                                         }
00345                                 }
00346 
00347                         } elseif ( $chunk_type == "IEND" ) {
00348                                 break;
00349                         } else {
00350                                 fseek( $fh, $chunk_size, SEEK_CUR );
00351                         }
00352                         fseek( $fh, self::$CRC_size, SEEK_CUR );
00353                 }
00354                 fclose( $fh );
00355 
00356                 if ( $loopCount > 1 ) {
00357                         $duration *= $loopCount;
00358                 }
00359 
00360                 if ( isset( $text['DateTimeDigitized'] ) ) {
00361                         // Convert date format from rfc2822 to exif.
00362                         foreach ( $text['DateTimeDigitized'] as $name => &$value ) {
00363                                 if ( $name === '_type' ) {
00364                                         continue;
00365                                 }
00366 
00367                                 // @todo FIXME: Currently timezones are ignored.
00368                                 // possibly should be wfTimestamp's
00369                                 // responsibility. (at least for numeric TZ)
00370                                 $formatted = wfTimestamp( TS_EXIF, $value );
00371                                 if ( $formatted ) {
00372                                         // Only change if we could convert the
00373                                         // date.
00374                                         // The png standard says it should be
00375                                         // in rfc2822 format, but not required.
00376                                         // In general for the exif stuff we
00377                                         // prettify the date if we can, but we
00378                                         // display as-is if we cannot or if
00379                                         // it is invalid.
00380                                         // So do the same here.
00381 
00382                                         $value = $formatted;
00383                                 }
00384                         }
00385                 }
00386                 return array(
00387                         'frameCount' => $frameCount,
00388                         'loopCount' => $loopCount,
00389                         'duration' => $duration,
00390                         'text' => $text,
00391                         'bitDepth' => $bitDepth,
00392                         'colorType' => $colorType,
00393                 );
00394 
00395         }
00404         static private function read( $fh, $size ) {
00405                 if ( $size > self::MAX_CHUNK_SIZE ) {
00406                         throw new Exception( __METHOD__ . ': Chunk size of ' . $size .
00407                                 ' too big. Max size is: ' . self::MAX_CHUNK_SIZE );
00408                 }
00409                 return fread( $fh, $size );
00410         }
00411 }