[ Index ] |
PHP Cross Reference of MediaWiki-1.24.0 |
[Summary view] [Print] [Text view]
1 <?php 2 /** 3 * Class for some IPTC functions. 4 * 5 * This program is free software; you can redistribute it and/or modify 6 * it under the terms of the GNU General Public License as published by 7 * the Free Software Foundation; either version 2 of the License, or 8 * (at your option) any later version. 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License along 16 * with this program; if not, write to the Free Software Foundation, Inc., 17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 18 * http://www.gnu.org/copyleft/gpl.html 19 * 20 * @file 21 * @ingroup Media 22 */ 23 24 /** 25 * Class for some IPTC functions. 26 * 27 * @ingroup Media 28 */ 29 class IPTC { 30 /** 31 * This takes the results of iptcparse() and puts it into a 32 * form that can be handled by mediawiki. Generally called from 33 * BitmapMetadataHandler::doApp13. 34 * 35 * @see http://www.iptc.org/std/IIM/4.1/specification/IIMV4.1.pdf 36 * 37 * @param string $rawData The app13 block from jpeg containing iptc/iim data 38 * @return array IPTC metadata array 39 */ 40 static function parse( $rawData ) { 41 $parsed = iptcparse( $rawData ); 42 $data = array(); 43 if ( !is_array( $parsed ) ) { 44 return $data; 45 } 46 47 $c = ''; 48 //charset info contained in tag 1:90. 49 if ( isset( $parsed['1#090'] ) && isset( $parsed['1#090'][0] ) ) { 50 $c = self::getCharset( $parsed['1#090'][0] ); 51 if ( $c === false ) { 52 //Unknown charset. refuse to parse. 53 //note: There is a different between 54 //unknown and no charset specified. 55 return array(); 56 } 57 unset( $parsed['1#090'] ); 58 } 59 60 foreach ( $parsed as $tag => $val ) { 61 if ( isset( $val[0] ) && trim( $val[0] ) == '' ) { 62 wfDebugLog( 'iptc', "IPTC tag $tag had only whitespace as its value." ); 63 continue; 64 } 65 switch ( $tag ) { 66 case '2#120': /*IPTC caption. mapped with exif ImageDescription*/ 67 $data['ImageDescription'] = self::convIPTC( $val, $c ); 68 break; 69 case '2#116': /* copyright. Mapped with exif copyright */ 70 $data['Copyright'] = self::convIPTC( $val, $c ); 71 break; 72 case '2#080': /* byline. Mapped with exif Artist */ 73 /* merge with byline title (2:85) 74 * like how exif does it with 75 * Title, person. Not sure if this is best 76 * approach since we no longer have the two fields 77 * separate. each byline title entry corresponds to a 78 * specific byline. */ 79 80 $bylines = self::convIPTC( $val, $c ); 81 if ( isset( $parsed['2#085'] ) ) { 82 $titles = self::convIPTC( $parsed['2#085'], $c ); 83 } else { 84 $titles = array(); 85 } 86 87 $titleCount = count( $titles ); 88 for ( $i = 0; $i < $titleCount; $i++ ) { 89 if ( isset( $bylines[$i] ) ) { 90 // theoretically this should always be set 91 // but doesn't hurt to be careful. 92 $bylines[$i] = $titles[$i] . ', ' . $bylines[$i]; 93 } 94 } 95 $data['Artist'] = $bylines; 96 break; 97 case '2#025': /* keywords */ 98 $data['Keywords'] = self::convIPTC( $val, $c ); 99 break; 100 case '2#101': /* Country (shown)*/ 101 $data['CountryDest'] = self::convIPTC( $val, $c ); 102 break; 103 case '2#095': /* state/province (shown) */ 104 $data['ProvinceOrStateDest'] = self::convIPTC( $val, $c ); 105 break; 106 case '2#090': /* city (Shown) */ 107 $data['CityDest'] = self::convIPTC( $val, $c ); 108 break; 109 case '2#092': /* sublocation (shown) */ 110 $data['SublocationDest'] = self::convIPTC( $val, $c ); 111 break; 112 case '2#005': /* object name/title */ 113 $data['ObjectName'] = self::convIPTC( $val, $c ); 114 break; 115 case '2#040': /* special instructions */ 116 $data['SpecialInstructions'] = self::convIPTC( $val, $c ); 117 break; 118 case '2#105': /* headline*/ 119 $data['Headline'] = self::convIPTC( $val, $c ); 120 break; 121 case '2#110': /* credit */ 122 /*"Identifies the provider of the objectdata, 123 * not necessarily the owner/creator". */ 124 $data['Credit'] = self::convIPTC( $val, $c ); 125 break; 126 case '2#115': /* source */ 127 /* "Identifies the original owner of the intellectual content of the 128 *objectdata. This could be an agency, a member of an agency or 129 *an individual." */ 130 $data['Source'] = self::convIPTC( $val, $c ); 131 break; 132 133 case '2#007': /* edit status (lead, correction, etc) */ 134 $data['EditStatus'] = self::convIPTC( $val, $c ); 135 break; 136 case '2#015': /* category. deprecated. max 3 letters in theory, often more */ 137 $data['iimCategory'] = self::convIPTC( $val, $c ); 138 break; 139 case '2#020': /* category. deprecated. */ 140 $data['iimSupplementalCategory'] = self::convIPTC( $val, $c ); 141 break; 142 case '2#010': /*urgency (1-8. 1 most, 5 normal, 8 low priority)*/ 143 $data['Urgency'] = self::convIPTC( $val, $c ); 144 break; 145 case '2#022': 146 /* "Identifies objectdata that recurs often and predictably... 147 * Example: Euroweather" */ 148 $data['FixtureIdentifier'] = self::convIPTC( $val, $c ); 149 break; 150 case '2#026': 151 /* Content location code (iso 3166 + some custom things) 152 * ex: TUR (for turkey), XUN (for UN), XSP (outer space) 153 * See wikipedia article on iso 3166 and appendix D of iim std. */ 154 $data['LocationDestCode'] = self::convIPTC( $val, $c ); 155 break; 156 case '2#027': 157 /* Content location name. Full printable name 158 * of location of photo. */ 159 $data['LocationDest'] = self::convIPTC( $val, $c ); 160 break; 161 case '2#065': 162 /* Originating Program. 163 * Combine with Program version (2:70) if present. 164 */ 165 $software = self::convIPTC( $val, $c ); 166 167 if ( count( $software ) !== 1 ) { 168 //according to iim standard this cannot have multiple values 169 //so if there is more than one, something weird is happening, 170 //and we skip it. 171 wfDebugLog( 'iptc', 'IPTC: Wrong count on 2:65 Software field' ); 172 break; 173 } 174 175 if ( isset( $parsed['2#070'] ) ) { 176 //if a version is set for the software. 177 $softwareVersion = self::convIPTC( $parsed['2#070'], $c ); 178 unset( $parsed['2#070'] ); 179 $data['Software'] = array( array( $software[0], $softwareVersion[0] ) ); 180 } else { 181 $data['Software'] = $software; 182 } 183 break; 184 case '2#075': 185 /* Object cycle. 186 * a for morning (am), p for evening, b for both */ 187 $data['ObjectCycle'] = self::convIPTC( $val, $c ); 188 break; 189 case '2#100': 190 /* Country/Primary location code. 191 * "Indicates the code of the country/primary location where the 192 * intellectual property of the objectdata was created" 193 * unclear how this differs from 2#026 194 */ 195 $data['CountryCodeDest'] = self::convIPTC( $val, $c ); 196 break; 197 case '2#103': 198 /* original transmission ref. 199 * "A code representing the location of original transmission ac- 200 * cording to practises of the provider." 201 */ 202 $data['OriginalTransmissionRef'] = self::convIPTC( $val, $c ); 203 break; 204 case '2#118': /*contact*/ 205 $data['Contact'] = self::convIPTC( $val, $c ); 206 break; 207 case '2#122': 208 /* Writer/Editor 209 * "Identification of the name of the person involved in the writing, 210 * editing or correcting the objectdata or caption/abstract." 211 */ 212 $data['Writer'] = self::convIPTC( $val, $c ); 213 break; 214 case '2#135': /* lang code */ 215 $data['LanguageCode'] = self::convIPTC( $val, $c ); 216 break; 217 218 // Start date stuff. 219 // It doesn't accept incomplete dates even though they are valid 220 // according to spec. 221 // Should potentially store timezone as well. 222 case '2#055': 223 //Date created (not date digitized). 224 //Maps to exif DateTimeOriginal 225 if ( isset( $parsed['2#060'] ) ) { 226 $time = $parsed['2#060']; 227 } else { 228 $time = array(); 229 } 230 $timestamp = self::timeHelper( $val, $time, $c ); 231 if ( $timestamp ) { 232 $data['DateTimeOriginal'] = $timestamp; 233 } 234 break; 235 236 case '2#062': 237 //Date converted to digital representation. 238 //Maps to exif DateTimeDigitized 239 if ( isset( $parsed['2#063'] ) ) { 240 $time = $parsed['2#063']; 241 } else { 242 $time = array(); 243 } 244 $timestamp = self::timeHelper( $val, $time, $c ); 245 if ( $timestamp ) { 246 $data['DateTimeDigitized'] = $timestamp; 247 } 248 break; 249 250 case '2#030': 251 //Date released. 252 if ( isset( $parsed['2#035'] ) ) { 253 $time = $parsed['2#035']; 254 } else { 255 $time = array(); 256 } 257 $timestamp = self::timeHelper( $val, $time, $c ); 258 if ( $timestamp ) { 259 $data['DateTimeReleased'] = $timestamp; 260 } 261 break; 262 263 case '2#037': 264 //Date expires. 265 if ( isset( $parsed['2#038'] ) ) { 266 $time = $parsed['2#038']; 267 } else { 268 $time = array(); 269 } 270 $timestamp = self::timeHelper( $val, $time, $c ); 271 if ( $timestamp ) { 272 $data['DateTimeExpires'] = $timestamp; 273 } 274 break; 275 276 case '2#000': /* iim version */ 277 // unlike other tags, this is a 2-byte binary number. 278 //technically this is required if there is iptc data 279 //but in practise it isn't always there. 280 if ( strlen( $val[0] ) == 2 ) { 281 //if is just to be paranoid. 282 $versionValue = ord( substr( $val[0], 0, 1 ) ) * 256; 283 $versionValue += ord( substr( $val[0], 1, 1 ) ); 284 $data['iimVersion'] = $versionValue; 285 } 286 break; 287 288 case '2#004': 289 // IntellectualGenere. 290 // first 4 characters are an id code 291 // That we're not really interested in. 292 293 // This prop is weird, since it's 294 // allowed to have multiple values 295 // in iim 4.1, but not in the XMP 296 // stuff. We're going to just 297 // extract the first value. 298 $con = self::ConvIPTC( $val, $c ); 299 if ( strlen( $con[0] ) < 5 ) { 300 wfDebugLog( 'iptc', 'IPTC: ' 301 . '2:04 too short. ' 302 . 'Ignoring.' ); 303 break; 304 } 305 $extracted = substr( $con[0], 4 ); 306 $data['IntellectualGenre'] = $extracted; 307 break; 308 309 case '2#012': 310 // Subject News code - this is a compound field 311 // at the moment we only extract the subject news 312 // code, which is an 8 digit (ascii) number 313 // describing the subject matter of the content. 314 $codes = self::convIPTC( $val, $c ); 315 foreach ( $codes as $ic ) { 316 $fields = explode( ':', $ic, 3 ); 317 318 if ( count( $fields ) < 2 || $fields[0] !== 'IPTC' ) { 319 wfDebugLog( 'IPTC', 'IPTC: ' 320 . 'Invalid 2:12 - ' . $ic ); 321 break; 322 } 323 $data['SubjectNewsCode'] = $fields[1]; 324 } 325 break; 326 327 // purposely does not do 2:125, 2:130, 2:131, 328 // 2:47, 2:50, 2:45, 2:42, 2:8, 2:3 329 // 2:200, 2:201, 2:202 330 // or the audio stuff (2:150 to 2:154) 331 332 case '2#070': 333 case '2#060': 334 case '2#063': 335 case '2#085': 336 case '2#038': 337 case '2#035': 338 //ignore. Handled elsewhere. 339 break; 340 341 default: 342 wfDebugLog( 'iptc', "Unsupported iptc tag: $tag. Value: " . implode( ',', $val ) ); 343 break; 344 } 345 } 346 347 return $data; 348 } 349 350 /** 351 * Convert an iptc date and time tags into the exif format 352 * 353 * @todo Potentially this should also capture the timezone offset. 354 * @param array $date The date tag 355 * @param array $time The time tag 356 * @param string $c The charset 357 * @return string Date in EXIF format. 358 */ 359 private static function timeHelper( $date, $time, $c ) { 360 if ( count( $date ) === 1 ) { 361 //the standard says this should always be 1 362 //just double checking. 363 list( $date ) = self::convIPTC( $date, $c ); 364 } else { 365 return null; 366 } 367 368 if ( count( $time ) === 1 ) { 369 list( $time ) = self::convIPTC( $time, $c ); 370 $dateOnly = false; 371 } else { 372 $time = '000000+0000'; //placeholder 373 $dateOnly = true; 374 } 375 376 if ( !( preg_match( '/\d\d\d\d\d\d[-+]\d\d\d\d/', $time ) 377 && preg_match( '/\d\d\d\d\d\d\d\d/', $date ) 378 && substr( $date, 0, 4 ) !== '0000' 379 && substr( $date, 4, 2 ) !== '00' 380 && substr( $date, 6, 2 ) !== '00' 381 ) ) { 382 //something wrong. 383 // Note, this rejects some valid dates according to iptc spec 384 // for example: the date 00000400 means the photo was taken in 385 // April, but the year and day is unknown. We don't process these 386 // types of incomplete dates atm. 387 wfDebugLog( 'iptc', "IPTC: invalid time ( $time ) or date ( $date )" ); 388 389 return null; 390 } 391 392 $unixTS = wfTimestamp( TS_UNIX, $date . substr( $time, 0, 6 ) ); 393 if ( $unixTS === false ) { 394 wfDebugLog( 'iptc', "IPTC: can't convert date to TS_UNIX: $date $time." ); 395 396 return null; 397 } 398 399 $tz = ( intval( substr( $time, 7, 2 ) ) * 60 * 60 ) 400 + ( intval( substr( $time, 9, 2 ) ) * 60 ); 401 402 if ( substr( $time, 6, 1 ) === '-' ) { 403 $tz = -$tz; 404 } 405 406 $finalTimestamp = wfTimestamp( TS_EXIF, $unixTS + $tz ); 407 if ( $finalTimestamp === false ) { 408 wfDebugLog( 'iptc', "IPTC: can't make final timestamp. Date: " . ( $unixTS + $tz ) ); 409 410 return null; 411 } 412 if ( $dateOnly ) { 413 //return the date only 414 return substr( $finalTimestamp, 0, 10 ); 415 } else { 416 return $finalTimestamp; 417 } 418 } 419 420 /** 421 * Helper function to convert charset for iptc values. 422 * @param string|array $data The iptc string 423 * @param string $charset The charset 424 * 425 * @return string|array 426 */ 427 private static function convIPTC( $data, $charset ) { 428 if ( is_array( $data ) ) { 429 foreach ( $data as &$val ) { 430 $val = self::convIPTCHelper( $val, $charset ); 431 } 432 } else { 433 $data = self::convIPTCHelper( $data, $charset ); 434 } 435 436 return $data; 437 } 438 439 /** 440 * Helper function of a helper function to convert charset for iptc values. 441 * @param string|array $data The IPTC string 442 * @param string $charset The charset 443 * 444 * @return string 445 */ 446 private static function convIPTCHelper( $data, $charset ) { 447 if ( $charset ) { 448 wfSuppressWarnings(); 449 $data = iconv( $charset, "UTF-8//IGNORE", $data ); 450 wfRestoreWarnings(); 451 if ( $data === false ) { 452 $data = ""; 453 wfDebugLog( 'iptc', __METHOD__ . " Error converting iptc data charset $charset to utf-8" ); 454 } 455 } else { 456 //treat as utf-8 if is valid utf-8. otherwise pretend its windows-1252 457 // most of the time if there is no 1:90 tag, it is either ascii, latin1, or utf-8 458 $oldData = $data; 459 UtfNormal::quickIsNFCVerify( $data ); //make $data valid utf-8 460 if ( $data === $oldData ) { 461 return $data; //if validation didn't change $data 462 } else { 463 return self::convIPTCHelper( $oldData, 'Windows-1252' ); 464 } 465 } 466 467 return trim( $data ); 468 } 469 470 /** 471 * take the value of 1:90 tag and returns a charset 472 * @param string $tag 1:90 tag. 473 * @return string Charset name or "?" 474 * Warning, this function does not (and is not intended to) detect 475 * all iso 2022 escape codes. In practise, the code for utf-8 is the 476 * only code that seems to have wide use. It does detect that code. 477 */ 478 static function getCharset( $tag ) { 479 480 //According to iim standard, charset is defined by the tag 1:90. 481 //in which there are iso 2022 escape sequences to specify the character set. 482 //the iim standard seems to encourage that all necessary escape sequences are 483 //in the 1:90 tag, but says it doesn't have to be. 484 485 //This is in need of more testing probably. This is definitely not complete. 486 //however reading the docs of some other iptc software, it appears that most iptc software 487 //only recognizes utf-8. If 1:90 tag is not present content is 488 // usually ascii or iso-8859-1 (and sometimes utf-8), but no guarantee. 489 490 //This also won't work if there are more than one escape sequence in the 1:90 tag 491 //or if something is put in the G2, or G3 charsets, etc. It will only reliably recognize utf-8. 492 493 // This is just going through the charsets mentioned in appendix C of the iim standard. 494 495 // \x1b = ESC. 496 switch ( $tag ) { 497 case "\x1b%G": //utf-8 498 //Also call things that are compatible with utf-8, utf-8 (e.g. ascii) 499 case "\x1b(B": // ascii 500 case "\x1b(@": // iso-646-IRV (ascii in latest version, $ different in older version) 501 $c = 'UTF-8'; 502 break; 503 case "\x1b(A": //like ascii, but british. 504 $c = 'ISO646-GB'; 505 break; 506 case "\x1b(C": //some obscure sweedish/finland encoding 507 $c = 'ISO-IR-8-1'; 508 break; 509 case "\x1b(D": 510 $c = 'ISO-IR-8-2'; 511 break; 512 case "\x1b(E": //some obscure danish/norway encoding 513 $c = 'ISO-IR-9-1'; 514 break; 515 case "\x1b(F": 516 $c = 'ISO-IR-9-2'; 517 break; 518 case "\x1b(G": 519 $c = 'SEN_850200_B'; // aka iso 646-SE; ascii-like 520 break; 521 case "\x1b(I": 522 $c = "ISO646-IT"; 523 break; 524 case "\x1b(L": 525 $c = "ISO646-PT"; 526 break; 527 case "\x1b(Z": 528 $c = "ISO646-ES"; 529 break; 530 case "\x1b([": 531 $c = "GREEK7-OLD"; 532 break; 533 case "\x1b(K": 534 $c = "ISO646-DE"; 535 break; 536 case "\x1b(N": //crylic 537 $c = "ISO_5427"; 538 break; 539 case "\x1b(`": //iso646-NO 540 $c = "NS_4551-1"; 541 break; 542 case "\x1b(f": //iso646-FR 543 $c = "NF_Z_62-010"; 544 break; 545 case "\x1b(g": 546 $c = "PT2"; //iso646-PT2 547 break; 548 case "\x1b(h": 549 $c = "ES2"; 550 break; 551 case "\x1b(i": //iso646-HU 552 $c = "MSZ_7795.3"; 553 break; 554 case "\x1b(w": 555 $c = "CSA_Z243.4-1985-1"; 556 break; 557 case "\x1b(x": 558 $c = "CSA_Z243.4-1985-2"; 559 break; 560 case "\x1b\$(B": 561 case "\x1b\$B": 562 case "\x1b&@\x1b\$B": 563 case "\x1b&@\x1b\$(B": 564 $c = "JIS_C6226-1983"; 565 break; 566 case "\x1b-A": // iso-8859-1. at least for the high code characters. 567 case "\x1b(@\x1b-A": 568 case "\x1b(B\x1b-A": 569 $c = 'ISO-8859-1'; 570 break; 571 case "\x1b-B": // iso-8859-2. at least for the high code characters. 572 $c = 'ISO-8859-2'; 573 break; 574 case "\x1b-C": // iso-8859-3. at least for the high code characters. 575 $c = 'ISO-8859-3'; 576 break; 577 case "\x1b-D": // iso-8859-4. at least for the high code characters. 578 $c = 'ISO-8859-4'; 579 break; 580 case "\x1b-E": // iso-8859-5. at least for the high code characters. 581 $c = 'ISO-8859-5'; 582 break; 583 case "\x1b-F": // iso-8859-6. at least for the high code characters. 584 $c = 'ISO-8859-6'; 585 break; 586 case "\x1b-G": // iso-8859-7. at least for the high code characters. 587 $c = 'ISO-8859-7'; 588 break; 589 case "\x1b-H": // iso-8859-8. at least for the high code characters. 590 $c = 'ISO-8859-8'; 591 break; 592 case "\x1b-I": // CSN_369103. at least for the high code characters. 593 $c = 'CSN_369103'; 594 break; 595 default: 596 wfDebugLog( 'iptc', __METHOD__ . 'Unknown charset in iptc 1:90: ' . bin2hex( $tag ) ); 597 //at this point just give up and refuse to parse iptc? 598 $c = false; 599 } 600 return $c; 601 } 602 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
Generated: Fri Nov 28 14:03:12 2014 | Cross-referenced by PHPXref 0.7.1 |