[ Index ] |
PHP Cross Reference of MediaWiki-1.24.0 |
[Summary view] [Print] [Text view]
1 <?php 2 /** 3 * Extraction of JPEG image metadata. 4 * 5 * This program is free software; you can redistribute it and/or modify 6 * it under the terms of the GNU General Public License as published by 7 * the Free Software Foundation; either version 2 of the License, or 8 * (at your option) any later version. 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License along 16 * with this program; if not, write to the Free Software Foundation, Inc., 17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 18 * http://www.gnu.org/copyleft/gpl.html 19 * 20 * @file 21 * @ingroup Media 22 */ 23 24 /** 25 * Class for reading jpegs and extracting metadata. 26 * see also BitmapMetadataHandler. 27 * 28 * Based somewhat on GIFMetadataExtractor. 29 * 30 * @ingroup Media 31 */ 32 class JpegMetadataExtractor { 33 const MAX_JPEG_SEGMENTS = 200; 34 35 // the max segment is a sanity check. 36 // A jpeg file should never even remotely have 37 // that many segments. Your average file has about 10. 38 39 /** Function to extract metadata segments of interest from jpeg files 40 * based on GIFMetadataExtractor. 41 * 42 * we can almost use getimagesize to do this 43 * but gis doesn't support having multiple app1 segments 44 * and those can't extract xmp on files containing both exif and xmp data 45 * 46 * @param string $filename Name of jpeg file 47 * @return array Array of interesting segments. 48 * @throws MWException If given invalid file. 49 */ 50 static function segmentSplitter( $filename ) { 51 $showXMP = function_exists( 'xml_parser_create_ns' ); 52 53 $segmentCount = 0; 54 55 $segments = array( 56 'XMP_ext' => array(), 57 'COM' => array(), 58 'PSIR' => array(), 59 ); 60 61 if ( !$filename ) { 62 throw new MWException( "No filename specified for " . __METHOD__ ); 63 } 64 if ( !file_exists( $filename ) || is_dir( $filename ) ) { 65 throw new MWException( "Invalid file $filename passed to " . __METHOD__ ); 66 } 67 68 $fh = fopen( $filename, "rb" ); 69 70 if ( !$fh ) { 71 throw new MWException( "Could not open file $filename" ); 72 } 73 74 $buffer = fread( $fh, 2 ); 75 if ( $buffer !== "\xFF\xD8" ) { 76 throw new MWException( "Not a jpeg, no SOI" ); 77 } 78 while ( !feof( $fh ) ) { 79 $buffer = fread( $fh, 1 ); 80 $segmentCount++; 81 if ( $segmentCount > self::MAX_JPEG_SEGMENTS ) { 82 // this is just a sanity check 83 throw new MWException( 'Too many jpeg segments. Aborting' ); 84 } 85 if ( $buffer !== "\xFF" ) { 86 throw new MWException( "Error reading jpeg file marker. " . 87 "Expected 0xFF but got " . bin2hex( $buffer ) ); 88 } 89 90 $buffer = fread( $fh, 1 ); 91 while ( $buffer === "\xFF" && !feof( $fh ) ) { 92 // Skip through any 0xFF padding bytes. 93 $buffer = fread( $fh, 1 ); 94 } 95 if ( $buffer === "\xFE" ) { 96 97 // COM section -- file comment 98 // First see if valid utf-8, 99 // if not try to convert it to windows-1252. 100 $com = $oldCom = trim( self::jpegExtractMarker( $fh ) ); 101 UtfNormal::quickIsNFCVerify( $com ); 102 // turns $com to valid utf-8. 103 // thus if no change, its utf-8, otherwise its something else. 104 if ( $com !== $oldCom ) { 105 wfSuppressWarnings(); 106 $com = $oldCom = iconv( 'windows-1252', 'UTF-8//IGNORE', $oldCom ); 107 wfRestoreWarnings(); 108 } 109 // Try it again, if its still not a valid string, then probably 110 // binary junk or some really weird encoding, so don't extract. 111 UtfNormal::quickIsNFCVerify( $com ); 112 if ( $com === $oldCom ) { 113 $segments["COM"][] = $oldCom; 114 } else { 115 wfDebug( __METHOD__ . " Ignoring JPEG comment as is garbage.\n" ); 116 } 117 } elseif ( $buffer === "\xE1" ) { 118 // APP1 section (Exif, XMP, and XMP extended) 119 // only extract if XMP is enabled. 120 $temp = self::jpegExtractMarker( $fh ); 121 // check what type of app segment this is. 122 if ( substr( $temp, 0, 29 ) === "http://ns.adobe.com/xap/1.0/\x00" && $showXMP ) { 123 $segments["XMP"] = substr( $temp, 29 ); 124 } elseif ( substr( $temp, 0, 35 ) === "http://ns.adobe.com/xmp/extension/\x00" && $showXMP ) { 125 $segments["XMP_ext"][] = substr( $temp, 35 ); 126 } elseif ( substr( $temp, 0, 29 ) === "XMP\x00://ns.adobe.com/xap/1.0/\x00" && $showXMP ) { 127 // Some images (especially flickr images) seem to have this. 128 // I really have no idea what the deal is with them, but 129 // whatever... 130 $segments["XMP"] = substr( $temp, 29 ); 131 wfDebug( __METHOD__ . ' Found XMP section with wrong app identifier ' 132 . "Using anyways.\n" ); 133 } elseif ( substr( $temp, 0, 6 ) === "Exif\0\0" ) { 134 // Just need to find out what the byte order is. 135 // because php's exif plugin sucks... 136 // This is a II for little Endian, MM for big. Not a unicode BOM. 137 $byteOrderMarker = substr( $temp, 6, 2 ); 138 if ( $byteOrderMarker === 'MM' ) { 139 $segments['byteOrder'] = 'BE'; 140 } elseif ( $byteOrderMarker === 'II' ) { 141 $segments['byteOrder'] = 'LE'; 142 } else { 143 wfDebug( __METHOD__ . " Invalid byte ordering?!\n" ); 144 } 145 } 146 } elseif ( $buffer === "\xED" ) { 147 // APP13 - PSIR. IPTC and some photoshop stuff 148 $temp = self::jpegExtractMarker( $fh ); 149 if ( substr( $temp, 0, 14 ) === "Photoshop 3.0\x00" ) { 150 $segments["PSIR"][] = $temp; 151 } 152 } elseif ( $buffer === "\xD9" || $buffer === "\xDA" ) { 153 // EOI - end of image or SOS - start of scan. either way we're past any interesting segments 154 return $segments; 155 } else { 156 // segment we don't care about, so skip 157 $size = wfUnpack( "nint", fread( $fh, 2 ), 2 ); 158 if ( $size['int'] <= 2 ) { 159 throw new MWException( "invalid marker size in jpeg" ); 160 } 161 fseek( $fh, $size['int'] - 2, SEEK_CUR ); 162 } 163 } 164 // shouldn't get here. 165 throw new MWException( "Reached end of jpeg file unexpectedly" ); 166 } 167 168 /** 169 * Helper function for jpegSegmentSplitter 170 * @param resource &$fh File handle for JPEG file 171 * @throws MWException 172 * @return string Data content of segment. 173 */ 174 private static function jpegExtractMarker( &$fh ) { 175 $size = wfUnpack( "nint", fread( $fh, 2 ), 2 ); 176 if ( $size['int'] <= 2 ) { 177 throw new MWException( "invalid marker size in jpeg" ); 178 } 179 $segment = fread( $fh, $size['int'] - 2 ); 180 if ( strlen( $segment ) !== $size['int'] - 2 ) { 181 throw new MWException( "Segment shorter than expected" ); 182 } 183 184 return $segment; 185 } 186 187 /** 188 * This reads the photoshop image resource. 189 * Currently it only compares the iptc/iim hash 190 * with the stored hash, which is used to determine the precedence 191 * of the iptc data. In future it may extract some other info, like 192 * url of copyright license. 193 * 194 * This should generally be called by BitmapMetadataHandler::doApp13() 195 * 196 * @param string $app13 Photoshop psir app13 block from jpg. 197 * @throws MWException (It gets caught next level up though) 198 * @return string If the iptc hash is good or not. One of 'iptc-no-hash', 199 * 'iptc-good-hash', 'iptc-bad-hash'. 200 */ 201 public static function doPSIR( $app13 ) { 202 if ( !$app13 ) { 203 throw new MWException( "No App13 segment given" ); 204 } 205 // First compare hash with real thing 206 // 0x404 contains IPTC, 0x425 has hash 207 // This is used to determine if the iptc is newer than 208 // the xmp data, as xmp programs update the hash, 209 // where non-xmp programs don't. 210 211 $offset = 14; // skip past PHOTOSHOP 3.0 identifier. should already be checked. 212 $appLen = strlen( $app13 ); 213 $realHash = ""; 214 $recordedHash = ""; 215 216 // the +12 is the length of an empty item. 217 while ( $offset + 12 <= $appLen ) { 218 $valid = true; 219 if ( substr( $app13, $offset, 4 ) !== '8BIM' ) { 220 // its supposed to be 8BIM 221 // but apparently sometimes isn't esp. in 222 // really old jpg's 223 $valid = false; 224 } 225 $offset += 4; 226 $id = substr( $app13, $offset, 2 ); 227 // id is a 2 byte id number which identifies 228 // the piece of info this record contains. 229 230 $offset += 2; 231 232 // some record types can contain a name, which 233 // is a pascal string 0-padded to be an even 234 // number of bytes. Most times (and any time 235 // we care) this is empty, making it two null bytes. 236 237 $lenName = ord( substr( $app13, $offset, 1 ) ) + 1; 238 // we never use the name so skip it. +1 for length byte 239 if ( $lenName % 2 == 1 ) { 240 $lenName++; 241 } // pad to even. 242 $offset += $lenName; 243 244 // now length of data (unsigned long big endian) 245 $lenData = wfUnpack( 'Nlen', substr( $app13, $offset, 4 ), 4 ); 246 // PHP can take issue with very large unsigned ints and make them negative. 247 // Which should never ever happen, as this has to be inside a segment 248 // which is limited to a 16 bit number. 249 if ( $lenData['len'] < 0 ) { 250 throw new MWException( "Too big PSIR (" . $lenData['len'] . ')' ); 251 } 252 253 $offset += 4; // 4bytes length field; 254 255 // this should not happen, but check. 256 if ( $lenData['len'] + $offset > $appLen ) { 257 throw new MWException( "PSIR data too long. (item length=" . $lenData['len'] 258 . "; offset=$offset; total length=$appLen)" ); 259 } 260 261 if ( $valid ) { 262 switch ( $id ) { 263 case "\x04\x04": 264 // IPTC block 265 $realHash = md5( substr( $app13, $offset, $lenData['len'] ), true ); 266 break; 267 case "\x04\x25": 268 $recordedHash = substr( $app13, $offset, $lenData['len'] ); 269 break; 270 } 271 } 272 273 // if odd, add 1 to length to account for 274 // null pad byte. 275 if ( $lenData['len'] % 2 == 1 ) { 276 $lenData['len']++; 277 } 278 $offset += $lenData['len']; 279 } 280 281 if ( !$realHash || !$recordedHash ) { 282 return 'iptc-no-hash'; 283 } elseif ( $realHash === $recordedHash ) { 284 return 'iptc-good-hash'; 285 } else { /*$realHash !== $recordedHash */ 286 return 'iptc-bad-hash'; 287 } 288 } 289 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
Generated: Fri Nov 28 14:03:12 2014 | Cross-referenced by PHPXref 0.7.1 |