MediaWiki  REL1_24
GIFMetadataExtractor.php
Go to the documentation of this file.
00001 <?php
00034 class GIFMetadataExtractor {
00036     private static $gifFrameSep;
00037 
00039     private static $gifExtensionSep;
00040 
00042     private static $gifTerm;
00043 
00044     const VERSION = 1;
00045 
00046     // Each sub-block is less than or equal to 255 bytes.
00047     // Most of the time its 255 bytes, except for in XMP
00048     // blocks, where it's usually between 32-127 bytes each.
00049     const MAX_SUBBLOCKS = 262144; // 5mb divided by 20.
00050 
00056     static function getMetadata( $filename ) {
00057         self::$gifFrameSep = pack( "C", ord( "," ) );
00058         self::$gifExtensionSep = pack( "C", ord( "!" ) );
00059         self::$gifTerm = pack( "C", ord( ";" ) );
00060 
00061         $frameCount = 0;
00062         $duration = 0.0;
00063         $isLooped = false;
00064         $xmp = "";
00065         $comment = array();
00066 
00067         if ( !$filename ) {
00068             throw new Exception( "No file name specified" );
00069         } elseif ( !file_exists( $filename ) || is_dir( $filename ) ) {
00070             throw new Exception( "File $filename does not exist" );
00071         }
00072 
00073         $fh = fopen( $filename, 'rb' );
00074 
00075         if ( !$fh ) {
00076             throw new Exception( "Unable to open file $filename" );
00077         }
00078 
00079         // Check for the GIF header
00080         $buf = fread( $fh, 6 );
00081         if ( !( $buf == 'GIF87a' || $buf == 'GIF89a' ) ) {
00082             throw new Exception( "Not a valid GIF file; header: $buf" );
00083         }
00084 
00085         // Skip over width and height.
00086         fread( $fh, 4 );
00087 
00088         // Read BPP
00089         $buf = fread( $fh, 1 );
00090         $bpp = self::decodeBPP( $buf );
00091 
00092         // Skip over background and aspect ratio
00093         fread( $fh, 2 );
00094 
00095         // Skip over the GCT
00096         self::readGCT( $fh, $bpp );
00097 
00098         while ( !feof( $fh ) ) {
00099             $buf = fread( $fh, 1 );
00100 
00101             if ( $buf == self::$gifFrameSep ) {
00102                 // Found a frame
00103                 $frameCount++;
00104 
00105                 ## Skip bounding box
00106                 fread( $fh, 8 );
00107 
00108                 ## Read BPP
00109                 $buf = fread( $fh, 1 );
00110                 $bpp = self::decodeBPP( $buf );
00111 
00112                 ## Read GCT
00113                 self::readGCT( $fh, $bpp );
00114                 fread( $fh, 1 );
00115                 self::skipBlock( $fh );
00116             } elseif ( $buf == self::$gifExtensionSep ) {
00117                 $buf = fread( $fh, 1 );
00118                 if ( strlen( $buf ) < 1 ) {
00119                     throw new Exception( "Ran out of input" );
00120                 }
00121                 $extension_code = unpack( 'C', $buf );
00122                 $extension_code = $extension_code[1];
00123 
00124                 if ( $extension_code == 0xF9 ) {
00125                     // Graphics Control Extension.
00126                     fread( $fh, 1 ); // Block size
00127 
00128                     fread( $fh, 1 ); // Transparency, disposal method, user input
00129 
00130                     $buf = fread( $fh, 2 ); // Delay, in hundredths of seconds.
00131                     if ( strlen( $buf ) < 2 ) {
00132                         throw new Exception( "Ran out of input" );
00133                     }
00134                     $delay = unpack( 'v', $buf );
00135                     $delay = $delay[1];
00136                     $duration += $delay * 0.01;
00137 
00138                     fread( $fh, 1 ); // Transparent colour index
00139 
00140                     $term = fread( $fh, 1 ); // Should be a terminator
00141                     if ( strlen( $term ) < 1 ) {
00142                         throw new Exception( "Ran out of input" );
00143                     }
00144                     $term = unpack( 'C', $term );
00145                     $term = $term[1];
00146                     if ( $term != 0 ) {
00147                         throw new Exception( "Malformed Graphics Control Extension block" );
00148                     }
00149                 } elseif ( $extension_code == 0xFE ) {
00150                     // Comment block(s).
00151                     $data = self::readBlock( $fh );
00152                     if ( $data === "" ) {
00153                         throw new Exception( 'Read error, zero-length comment block' );
00154                     }
00155 
00156                     // The standard says this should be ASCII, however its unclear if
00157                     // thats true in practise. Check to see if its valid utf-8, if so
00158                     // assume its that, otherwise assume its windows-1252 (iso-8859-1)
00159                     $dataCopy = $data;
00160                     // quickIsNFCVerify has the side effect of replacing any invalid characters
00161                     UtfNormal::quickIsNFCVerify( $dataCopy );
00162 
00163                     if ( $dataCopy !== $data ) {
00164                         wfSuppressWarnings();
00165                         $data = iconv( 'windows-1252', 'UTF-8', $data );
00166                         wfRestoreWarnings();
00167                     }
00168 
00169                     $commentCount = count( $comment );
00170                     if ( $commentCount === 0
00171                         || $comment[$commentCount - 1] !== $data
00172                     ) {
00173                         // Some applications repeat the same comment on each
00174                         // frame of an animated GIF image, so if this comment
00175                         // is identical to the last, only extract once.
00176                         $comment[] = $data;
00177                     }
00178                 } elseif ( $extension_code == 0xFF ) {
00179                     // Application extension (Netscape info about the animated gif)
00180                     // or XMP (or theoretically any other type of extension block)
00181                     $blockLength = fread( $fh, 1 );
00182                     if ( strlen( $blockLength ) < 1 ) {
00183                         throw new Exception( "Ran out of input" );
00184                     }
00185                     $blockLength = unpack( 'C', $blockLength );
00186                     $blockLength = $blockLength[1];
00187                     $data = fread( $fh, $blockLength );
00188 
00189                     if ( $blockLength != 11 ) {
00190                         wfDebug( __METHOD__ . " GIF application block with wrong length\n" );
00191                         fseek( $fh, -( $blockLength + 1 ), SEEK_CUR );
00192                         self::skipBlock( $fh );
00193                         continue;
00194                     }
00195 
00196                     // NETSCAPE2.0 (application name for animated gif)
00197                     if ( $data == 'NETSCAPE2.0' ) {
00198                         $data = fread( $fh, 2 ); // Block length and introduction, should be 03 01
00199 
00200                         if ( $data != "\x03\x01" ) {
00201                             throw new Exception( "Expected \x03\x01, got $data" );
00202                         }
00203 
00204                         // Unsigned little-endian integer, loop count or zero for "forever"
00205                         $loopData = fread( $fh, 2 );
00206                         if ( strlen( $loopData ) < 2 ) {
00207                             throw new Exception( "Ran out of input" );
00208                         }
00209                         $loopData = unpack( 'v', $loopData );
00210                         $loopCount = $loopData[1];
00211 
00212                         if ( $loopCount != 1 ) {
00213                             $isLooped = true;
00214                         }
00215 
00216                         // Read out terminator byte
00217                         fread( $fh, 1 );
00218                     } elseif ( $data == 'XMP DataXMP' ) {
00219                         // application name for XMP data.
00220                         // see pg 18 of XMP spec part 3.
00221 
00222                         $xmp = self::readBlock( $fh, true );
00223 
00224                         if ( substr( $xmp, -257, 3 ) !== "\x01\xFF\xFE"
00225                             || substr( $xmp, -4 ) !== "\x03\x02\x01\x00"
00226                         ) {
00227                             // this is just a sanity check.
00228                             throw new Exception( "XMP does not have magic trailer!" );
00229                         }
00230 
00231                         // strip out trailer.
00232                         $xmp = substr( $xmp, 0, -257 );
00233                     } else {
00234                         // unrecognized extension block
00235                         fseek( $fh, -( $blockLength + 1 ), SEEK_CUR );
00236                         self::skipBlock( $fh );
00237                         continue;
00238                     }
00239                 } else {
00240                     self::skipBlock( $fh );
00241                 }
00242             } elseif ( $buf == self::$gifTerm ) {
00243                 break;
00244             } else {
00245                 if ( strlen( $buf ) < 1 ) {
00246                     throw new Exception( "Ran out of input" );
00247                 }
00248                 $byte = unpack( 'C', $buf );
00249                 $byte = $byte[1];
00250                 throw new Exception( "At position: " . ftell( $fh ) . ", Unknown byte " . $byte );
00251             }
00252         }
00253 
00254         return array(
00255             'frameCount' => $frameCount,
00256             'looped' => $isLooped,
00257             'duration' => $duration,
00258             'xmp' => $xmp,
00259             'comment' => $comment,
00260         );
00261     }
00262 
00268     static function readGCT( $fh, $bpp ) {
00269         if ( $bpp > 0 ) {
00270             $max = pow( 2, $bpp );
00271             for ( $i = 1; $i <= $max; ++$i ) {
00272                 fread( $fh, 3 );
00273             }
00274         }
00275     }
00276 
00282     static function decodeBPP( $data ) {
00283         if ( strlen( $data ) < 1 ) {
00284             throw new Exception( "Ran out of input" );
00285         }
00286         $buf = unpack( 'C', $data );
00287         $buf = $buf[1];
00288         $bpp = ( $buf & 7 ) + 1;
00289         $buf >>= 7;
00290 
00291         $have_map = $buf & 1;
00292 
00293         return $have_map ? $bpp : 0;
00294     }
00295 
00300     static function skipBlock( $fh ) {
00301         while ( !feof( $fh ) ) {
00302             $buf = fread( $fh, 1 );
00303             if ( strlen( $buf ) < 1 ) {
00304                 throw new Exception( "Ran out of input" );
00305             }
00306             $block_len = unpack( 'C', $buf );
00307             $block_len = $block_len[1];
00308             if ( $block_len == 0 ) {
00309                 return;
00310             }
00311             fread( $fh, $block_len );
00312         }
00313     }
00314 
00329     static function readBlock( $fh, $includeLengths = false ) {
00330         $data = '';
00331         $subLength = fread( $fh, 1 );
00332         $blocks = 0;
00333 
00334         while ( $subLength !== "\0" ) {
00335             $blocks++;
00336             if ( $blocks > self::MAX_SUBBLOCKS ) {
00337                 throw new Exception( "MAX_SUBBLOCKS exceeded (over $blocks sub-blocks)" );
00338             }
00339             if ( feof( $fh ) ) {
00340                 throw new Exception( "Read error: Unexpected EOF." );
00341             }
00342             if ( $includeLengths ) {
00343                 $data .= $subLength;
00344             }
00345 
00346             $data .= fread( $fh, ord( $subLength ) );
00347             $subLength = fread( $fh, 1 );
00348         }
00349 
00350         return $data;
00351     }
00352 }