MediaWiki
REL1_22
|
00001 <?php 00049 class XMPReader { 00050 00051 private $curItem = array(); // array to hold the current element (and previous element, and so on) 00052 private $ancestorStruct = false; // the structure name when processing nested structures. 00053 private $charContent = false; // temporary holder for character data that appears in xmp doc. 00054 private $mode = array(); // stores the state the xmpreader is in (see MODE_FOO constants) 00055 private $results = array(); // array to hold results 00056 private $processingArray = false; // if we're doing a seq or bag. 00057 private $itemLang = false; // used for lang alts only 00058 00059 private $xmlParser; 00060 private $charset = false; 00061 private $extendedXMPOffset = 0; 00062 00063 protected $items; 00064 00074 const MODE_INITIAL = 0; 00075 const MODE_IGNORE = 1; 00076 const MODE_LI = 2; 00077 const MODE_LI_LANG = 3; 00078 const MODE_QDESC = 4; 00079 00080 // The following MODE constants are also used in the 00081 // $items array to denote what type of property the item is. 00082 const MODE_SIMPLE = 10; 00083 const MODE_STRUCT = 11; // structure (associative array) 00084 const MODE_SEQ = 12; // ordered list 00085 const MODE_BAG = 13; // unordered list 00086 const MODE_LANG = 14; 00087 const MODE_ALT = 15; // non-language alt. Currently not implemented, and not needed atm. 00088 const MODE_BAGSTRUCT = 16; // A BAG of Structs. 00089 00090 const NS_RDF = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'; 00091 const NS_XML = 'http://www.w3.org/XML/1998/namespace'; 00092 00098 function __construct() { 00099 00100 if ( !function_exists( 'xml_parser_create_ns' ) ) { 00101 // this should already be checked by this point 00102 throw new MWException( 'XMP support requires XML Parser' ); 00103 } 00104 00105 $this->items = XMPInfo::getItems(); 00106 00107 $this->resetXMLParser(); 00108 00109 } 00114 private function resetXMLParser() { 00115 00116 if ( $this->xmlParser ) { 00117 //is this needed? 00118 xml_parser_free( $this->xmlParser ); 00119 } 00120 00121 $this->xmlParser = xml_parser_create_ns( 'UTF-8', ' ' ); 00122 xml_parser_set_option( $this->xmlParser, XML_OPTION_CASE_FOLDING, 0 ); 00123 xml_parser_set_option( $this->xmlParser, XML_OPTION_SKIP_WHITE, 1 ); 00124 00125 xml_set_element_handler( $this->xmlParser, 00126 array( $this, 'startElement' ), 00127 array( $this, 'endElement' ) ); 00128 00129 xml_set_character_data_handler( $this->xmlParser, array( $this, 'char' ) ); 00130 } 00131 00136 function __destruct() { 00137 // not sure if this is needed. 00138 xml_parser_free( $this->xmlParser ); 00139 } 00140 00147 public function getResults() { 00148 // xmp-special is for metadata that affects how stuff 00149 // is extracted. For example xmpNote:HasExtendedXMP. 00150 00151 // It is also used to handle photoshop:AuthorsPosition 00152 // which is weird and really part of another property, 00153 // see 2:85 in IPTC. See also pg 21 of IPTC4XMP standard. 00154 // The location fields also use it. 00155 00156 $data = $this->results; 00157 00158 wfRunHooks( 'XMPGetResults', Array( &$data ) ); 00159 00160 if ( isset( $data['xmp-special']['AuthorsPosition'] ) 00161 && is_string( $data['xmp-special']['AuthorsPosition'] ) 00162 && isset( $data['xmp-general']['Artist'][0] ) 00163 ) { 00164 // Note, if there is more than one creator, 00165 // this only applies to first. This also will 00166 // only apply to the dc:Creator prop, not the 00167 // exif:Artist prop. 00168 00169 $data['xmp-general']['Artist'][0] = 00170 $data['xmp-special']['AuthorsPosition'] . ', ' 00171 . $data['xmp-general']['Artist'][0]; 00172 } 00173 00174 // Go through the LocationShown and LocationCreated 00175 // changing it to the non-hierarchal form used by 00176 // the other location fields. 00177 00178 if ( isset( $data['xmp-special']['LocationShown'][0] ) 00179 && is_array( $data['xmp-special']['LocationShown'][0] ) 00180 ) { 00181 // the is_array is just paranoia. It should always 00182 // be an array. 00183 foreach ( $data['xmp-special']['LocationShown'] as $loc ) { 00184 if ( !is_array( $loc ) ) { 00185 // To avoid copying over the _type meta-fields. 00186 continue; 00187 } 00188 foreach ( $loc as $field => $val ) { 00189 $data['xmp-general'][$field . 'Dest'][] = $val; 00190 } 00191 } 00192 } 00193 if ( isset( $data['xmp-special']['LocationCreated'][0] ) 00194 && is_array( $data['xmp-special']['LocationCreated'][0] ) 00195 ) { 00196 // the is_array is just paranoia. It should always 00197 // be an array. 00198 foreach ( $data['xmp-special']['LocationCreated'] as $loc ) { 00199 if ( !is_array( $loc ) ) { 00200 // To avoid copying over the _type meta-fields. 00201 continue; 00202 } 00203 foreach ( $loc as $field => $val ) { 00204 $data['xmp-general'][$field . 'Created'][] = $val; 00205 } 00206 } 00207 } 00208 00209 // We don't want to return the special values, since they're 00210 // special and not info to be stored about the file. 00211 unset( $data['xmp-special'] ); 00212 00213 // Convert GPSAltitude to negative if below sea level. 00214 if ( isset( $data['xmp-exif']['GPSAltitudeRef'] ) 00215 && isset( $data['xmp-exif']['GPSAltitude'] ) 00216 ) { 00217 00218 // Must convert to a real before multiplying by -1 00219 // XMPValidate guarantees there will always be a '/' in this value. 00220 list( $nom, $denom ) = explode( '/', $data['xmp-exif']['GPSAltitude'] ); 00221 $data['xmp-exif']['GPSAltitude'] = $nom / $denom; 00222 00223 if ( $data['xmp-exif']['GPSAltitudeRef'] == '1' ) { 00224 $data['xmp-exif']['GPSAltitude'] *= -1; 00225 } 00226 unset( $data['xmp-exif']['GPSAltitudeRef'] ); 00227 } 00228 00229 return $data; 00230 } 00231 00245 public function parse( $content, $allOfIt = true, $reset = false ) { 00246 if ( $reset ) { 00247 $this->resetXMLParser(); 00248 } 00249 try { 00250 00251 // detect encoding by looking for BOM which is supposed to be in processing instruction. 00252 // see page 12 of http://www.adobe.com/devnet/xmp/pdfs/XMPSpecificationPart3.pdf 00253 if ( !$this->charset ) { 00254 $bom = array(); 00255 if ( preg_match( '/\xEF\xBB\xBF|\xFE\xFF|\x00\x00\xFE\xFF|\xFF\xFE\x00\x00|\xFF\xFE/', 00256 $content, $bom ) 00257 ) { 00258 switch ( $bom[0] ) { 00259 case "\xFE\xFF": 00260 $this->charset = 'UTF-16BE'; 00261 break; 00262 case "\xFF\xFE": 00263 $this->charset = 'UTF-16LE'; 00264 break; 00265 case "\x00\x00\xFE\xFF": 00266 $this->charset = 'UTF-32BE'; 00267 break; 00268 case "\xFF\xFE\x00\x00": 00269 $this->charset = 'UTF-32LE'; 00270 break; 00271 case "\xEF\xBB\xBF": 00272 $this->charset = 'UTF-8'; 00273 break; 00274 default: 00275 //this should be impossible to get to 00276 throw new MWException( "Invalid BOM" ); 00277 } 00278 } else { 00279 // standard specifically says, if no bom assume utf-8 00280 $this->charset = 'UTF-8'; 00281 } 00282 } 00283 if ( $this->charset !== 'UTF-8' ) { 00284 //don't convert if already utf-8 00285 wfSuppressWarnings(); 00286 $content = iconv( $this->charset, 'UTF-8//IGNORE', $content ); 00287 wfRestoreWarnings(); 00288 } 00289 00290 $ok = xml_parse( $this->xmlParser, $content, $allOfIt ); 00291 if ( !$ok ) { 00292 $error = xml_error_string( xml_get_error_code( $this->xmlParser ) ); 00293 $where = 'line: ' . xml_get_current_line_number( $this->xmlParser ) 00294 . ' column: ' . xml_get_current_column_number( $this->xmlParser ) 00295 . ' byte offset: ' . xml_get_current_byte_index( $this->xmlParser ); 00296 00297 wfDebugLog( 'XMP', "XMPReader::parse : Error reading XMP content: $error ($where)" ); 00298 $this->results = array(); // blank if error. 00299 return false; 00300 } 00301 } catch ( MWException $e ) { 00302 wfDebugLog( 'XMP', 'XMP parse error: ' . $e ); 00303 $this->results = array(); 00304 return false; 00305 } 00306 return true; 00307 } 00308 00316 public function parseExtended( $content ) { 00317 // @todo FIXME: This is untested. Hard to find example files 00318 // or programs that make such files.. 00319 $guid = substr( $content, 0, 32 ); 00320 if ( !isset( $this->results['xmp-special']['HasExtendedXMP'] ) 00321 || $this->results['xmp-special']['HasExtendedXMP'] !== $guid ) { 00322 wfDebugLog( 'XMP', __METHOD__ . " Ignoring XMPExtended block due to wrong guid (guid= '$guid')" ); 00323 return false; 00324 } 00325 $len = unpack( 'Nlength/Noffset', substr( $content, 32, 8 ) ); 00326 00327 if ( !$len || $len['length'] < 4 || $len['offset'] < 0 || $len['offset'] > $len['length'] ) { 00328 wfDebugLog( 'XMP', __METHOD__ . 'Error reading extended XMP block, invalid length or offset.' ); 00329 return false; 00330 } 00331 00332 // we're not very robust here. we should accept it in the wrong order. To quote 00333 // the xmp standard: 00334 // "A JPEG writer should write the ExtendedXMP marker segments in order, immediately following the 00335 // StandardXMP. However, the JPEG standard does not require preservation of marker segment order. A 00336 // robust JPEG reader should tolerate the marker segments in any order." 00337 // 00338 // otoh the probability that an image will have more than 128k of metadata is rather low... 00339 // so the probability that it will have > 128k, and be in the wrong order is very low... 00340 00341 if ( $len['offset'] !== $this->extendedXMPOffset ) { 00342 wfDebugLog( 'XMP', __METHOD__ . 'Ignoring XMPExtended block due to wrong order. (Offset was ' 00343 . $len['offset'] . ' but expected ' . $this->extendedXMPOffset . ')' ); 00344 return false; 00345 } 00346 00347 if ( $len['offset'] === 0 ) { 00348 // if we're starting the extended block, we've probably already 00349 // done the XMPStandard block, so reset. 00350 $this->resetXMLParser(); 00351 } 00352 00353 $this->extendedXMPOffset += $len['length']; 00354 00355 $actualContent = substr( $content, 40 ); 00356 00357 if ( $this->extendedXMPOffset === strlen( $actualContent ) ) { 00358 $atEnd = true; 00359 } else { 00360 $atEnd = false; 00361 } 00362 00363 wfDebugLog( 'XMP', __METHOD__ . 'Parsing a XMPExtended block' ); 00364 return $this->parse( $actualContent, $atEnd ); 00365 } 00366 00383 function char( $parser, $data ) { 00384 00385 $data = trim( $data ); 00386 if ( trim( $data ) === "" ) { 00387 return; 00388 } 00389 00390 if ( !isset( $this->mode[0] ) ) { 00391 throw new MWException( 'Unexpected character data before first rdf:Description element' ); 00392 } 00393 00394 if ( $this->mode[0] === self::MODE_IGNORE ) { 00395 return; 00396 } 00397 00398 if ( $this->mode[0] !== self::MODE_SIMPLE 00399 && $this->mode[0] !== self::MODE_QDESC 00400 ) { 00401 throw new MWException( 'character data where not expected. (mode ' . $this->mode[0] . ')' ); 00402 } 00403 00404 // to check, how does this handle w.s. 00405 if ( $this->charContent === false ) { 00406 $this->charContent = $data; 00407 } else { 00408 $this->charContent .= $data; 00409 } 00410 00411 } 00412 00419 private function endElementModeIgnore( $elm ) { 00420 if ( $this->curItem[0] === $elm ) { 00421 array_shift( $this->curItem ); 00422 array_shift( $this->mode ); 00423 } 00424 } 00425 00441 private function endElementModeSimple( $elm ) { 00442 if ( $this->charContent !== false ) { 00443 if ( $this->processingArray ) { 00444 // if we're processing an array, use the original element 00445 // name instead of rdf:li. 00446 list( $ns, $tag ) = explode( ' ', $this->curItem[0], 2 ); 00447 } else { 00448 list( $ns, $tag ) = explode( ' ', $elm, 2 ); 00449 } 00450 $this->saveValue( $ns, $tag, $this->charContent ); 00451 00452 $this->charContent = false; // reset 00453 } 00454 array_shift( $this->curItem ); 00455 array_shift( $this->mode ); 00456 00457 } 00458 00477 private function endElementNested( $elm ) { 00478 00479 /* cur item must be the same as $elm, unless if in MODE_STRUCT 00480 in which case it could also be rdf:Description */ 00481 if ( $this->curItem[0] !== $elm 00482 && !( $elm === self::NS_RDF . ' Description' 00483 && $this->mode[0] === self::MODE_STRUCT ) 00484 ) { 00485 throw new MWException( "nesting mismatch. got a </$elm> but expected a </" . $this->curItem[0] . '>' ); 00486 } 00487 00488 // Validate structures. 00489 list( $ns, $tag ) = explode( ' ', $elm, 2 ); 00490 if ( isset( $this->items[$ns][$tag]['validate'] ) ) { 00491 00492 $info =& $this->items[$ns][$tag]; 00493 $finalName = isset( $info['map_name'] ) 00494 ? $info['map_name'] : $tag; 00495 00496 $validate = is_array( $info['validate'] ) ? $info['validate'] 00497 : array( 'XMPValidate', $info['validate'] ); 00498 00499 if ( !isset( $this->results['xmp-' . $info['map_group']][$finalName] ) ) { 00500 // This can happen if all the members of the struct failed validation. 00501 wfDebugLog( 'XMP', __METHOD__ . " <$ns:$tag> has no valid members." ); 00502 00503 } elseif ( is_callable( $validate ) ) { 00504 $val =& $this->results['xmp-' . $info['map_group']][$finalName]; 00505 call_user_func_array( $validate, array( $info, &$val, false ) ); 00506 if ( is_null( $val ) ) { 00507 // the idea being the validation function will unset the variable if 00508 // its invalid. 00509 wfDebugLog( 'XMP', __METHOD__ . " <$ns:$tag> failed validation." ); 00510 unset( $this->results['xmp-' . $info['map_group']][$finalName] ); 00511 } 00512 } else { 00513 wfDebugLog( 'XMP', __METHOD__ . " Validation function for $finalName (" 00514 . $validate[0] . '::' . $validate[1] . '()) is not callable.' ); 00515 } 00516 } 00517 00518 array_shift( $this->curItem ); 00519 array_shift( $this->mode ); 00520 $this->ancestorStruct = false; 00521 $this->processingArray = false; 00522 $this->itemLang = false; 00523 } 00524 00544 private function endElementModeLi( $elm ) { 00545 00546 list( $ns, $tag ) = explode( ' ', $this->curItem[0], 2 ); 00547 $info = $this->items[$ns][$tag]; 00548 $finalName = isset( $info['map_name'] ) 00549 ? $info['map_name'] : $tag; 00550 00551 array_shift( $this->mode ); 00552 00553 if ( !isset( $this->results['xmp-' . $info['map_group']][$finalName] ) ) { 00554 wfDebugLog( 'XMP', __METHOD__ . " Empty compund element $finalName." ); 00555 return; 00556 } 00557 00558 if ( $elm === self::NS_RDF . ' Seq' ) { 00559 $this->results['xmp-' . $info['map_group']][$finalName]['_type'] = 'ol'; 00560 } elseif ( $elm === self::NS_RDF . ' Bag' ) { 00561 $this->results['xmp-' . $info['map_group']][$finalName]['_type'] = 'ul'; 00562 } elseif ( $elm === self::NS_RDF . ' Alt' ) { 00563 // extra if needed as you could theoretically have a non-language alt. 00564 if ( $info['mode'] === self::MODE_LANG ) { 00565 $this->results['xmp-' . $info['map_group']][$finalName]['_type'] = 'lang'; 00566 } 00567 00568 } else { 00569 throw new MWException( __METHOD__ . " expected </rdf:seq> or </rdf:bag> but instead got $elm." ); 00570 } 00571 } 00572 00583 private function endElementModeQDesc( $elm ) { 00584 00585 if ( $elm === self::NS_RDF . ' value' ) { 00586 list( $ns, $tag ) = explode( ' ', $this->curItem[0], 2 ); 00587 $this->saveValue( $ns, $tag, $this->charContent ); 00588 return; 00589 } else { 00590 array_shift( $this->mode ); 00591 array_shift( $this->curItem ); 00592 } 00593 } 00594 00608 function endElement( $parser, $elm ) { 00609 if ( $elm === ( self::NS_RDF . ' RDF' ) 00610 || $elm === 'adobe:ns:meta/ xmpmeta' 00611 || $elm === 'adobe:ns:meta/ xapmeta' ) 00612 { 00613 // ignore these. 00614 return; 00615 } 00616 00617 if ( $elm === self::NS_RDF . ' type' ) { 00618 // these aren't really supported properly yet. 00619 // However, it appears they almost never used. 00620 wfDebugLog( 'XMP', __METHOD__ . ' encountered <rdf:type>' ); 00621 } 00622 00623 if ( strpos( $elm, ' ' ) === false ) { 00624 // This probably shouldn't happen. 00625 // However, there is a bug in an adobe product 00626 // that forgets the namespace on some things. 00627 // (Luckily they are unimportant things). 00628 wfDebugLog( 'XMP', __METHOD__ . " Encountered </$elm> which has no namespace. Skipping." ); 00629 return; 00630 } 00631 00632 if ( count( $this->mode[0] ) === 0 ) { 00633 // This should never ever happen and means 00634 // there is a pretty major bug in this class. 00635 throw new MWException( 'Encountered end element with no mode' ); 00636 } 00637 00638 if ( count( $this->curItem ) == 0 && $this->mode[0] !== self::MODE_INITIAL ) { 00639 // just to be paranoid. Should always have a curItem, except for initially 00640 // (aka during MODE_INITAL). 00641 throw new MWException( "Hit end element </$elm> but no curItem" ); 00642 } 00643 00644 switch ( $this->mode[0] ) { 00645 case self::MODE_IGNORE: 00646 $this->endElementModeIgnore( $elm ); 00647 break; 00648 case self::MODE_SIMPLE: 00649 $this->endElementModeSimple( $elm ); 00650 break; 00651 case self::MODE_STRUCT: 00652 case self::MODE_SEQ: 00653 case self::MODE_BAG: 00654 case self::MODE_LANG: 00655 case self::MODE_BAGSTRUCT: 00656 $this->endElementNested( $elm ); 00657 break; 00658 case self::MODE_INITIAL: 00659 if ( $elm === self::NS_RDF . ' Description' ) { 00660 array_shift( $this->mode ); 00661 } else { 00662 throw new MWException( 'Element ended unexpectedly while in MODE_INITIAL' ); 00663 } 00664 break; 00665 case self::MODE_LI: 00666 case self::MODE_LI_LANG: 00667 $this->endElementModeLi( $elm ); 00668 break; 00669 case self::MODE_QDESC: 00670 $this->endElementModeQDesc( $elm ); 00671 break; 00672 default: 00673 wfDebugLog( 'XMP', __METHOD__ . " no mode (elm = $elm)" ); 00674 break; 00675 } 00676 } 00677 00689 private function startElementModeIgnore( $elm ) { 00690 if ( $elm === $this->curItem[0] ) { 00691 array_unshift( $this->curItem, $elm ); 00692 array_unshift( $this->mode, self::MODE_IGNORE ); 00693 } 00694 } 00695 00703 private function startElementModeBag( $elm ) { 00704 if ( $elm === self::NS_RDF . ' Bag' ) { 00705 array_unshift( $this->mode, self::MODE_LI ); 00706 } else { 00707 throw new MWException( "Expected <rdf:Bag> but got $elm." ); 00708 } 00709 00710 } 00711 00719 private function startElementModeSeq( $elm ) { 00720 if ( $elm === self::NS_RDF . ' Seq' ) { 00721 array_unshift( $this->mode, self::MODE_LI ); 00722 } elseif ( $elm === self::NS_RDF . ' Bag' ) { 00723 # bug 27105 00724 wfDebugLog( 'XMP', __METHOD__ . ' Expected an rdf:Seq, but got an rdf:Bag. Pretending' 00725 . ' it is a Seq, since some buggy software is known to screw this up.' ); 00726 array_unshift( $this->mode, self::MODE_LI ); 00727 } else { 00728 throw new MWException( "Expected <rdf:Seq> but got $elm." ); 00729 } 00730 00731 } 00732 00747 private function startElementModeLang( $elm ) { 00748 if ( $elm === self::NS_RDF . ' Alt' ) { 00749 array_unshift( $this->mode, self::MODE_LI_LANG ); 00750 } else { 00751 throw new MWException( "Expected <rdf:Seq> but got $elm." ); 00752 } 00753 00754 } 00755 00774 private function startElementModeSimple( $elm, $attribs ) { 00775 if ( $elm === self::NS_RDF . ' Description' ) { 00776 // If this value has qualifiers 00777 array_unshift( $this->mode, self::MODE_QDESC ); 00778 array_unshift( $this->curItem, $this->curItem[0] ); 00779 00780 if ( isset( $attribs[self::NS_RDF . ' value'] ) ) { 00781 list( $ns, $tag ) = explode( ' ', $this->curItem[0], 2 ); 00782 $this->saveValue( $ns, $tag, $attribs[self::NS_RDF . ' value'] ); 00783 } 00784 } elseif ( $elm === self::NS_RDF . ' value' ) { 00785 // This should not be here. 00786 throw new MWException( __METHOD__ . ' Encountered <rdf:value> where it was unexpected.' ); 00787 00788 } else { 00789 // something else we don't recognize, like a qualifier maybe. 00790 wfDebugLog( 'XMP', __METHOD__ . " Encountered element <$elm> where only expecting character data as value of " . $this->curItem[0] ); 00791 array_unshift( $this->mode, self::MODE_IGNORE ); 00792 array_unshift( $this->curItem, $elm ); 00793 00794 } 00795 00796 } 00797 00812 private function startElementModeQDesc( $elm ) { 00813 if ( $elm === self::NS_RDF . ' value' ) { 00814 return; // do nothing 00815 } else { 00816 // otherwise its a qualifier, which we ignore 00817 array_unshift( $this->mode, self::MODE_IGNORE ); 00818 array_unshift( $this->curItem, $elm ); 00819 } 00820 } 00821 00834 private function startElementModeInitial( $ns, $tag, $attribs ) { 00835 if ( $ns !== self::NS_RDF ) { 00836 00837 if ( isset( $this->items[$ns][$tag] ) ) { 00838 if ( isset( $this->items[$ns][$tag]['structPart'] ) ) { 00839 // If this element is supposed to appear only as 00840 // a child of a structure, but appears here (not as 00841 // a child of a struct), then something weird is 00842 // happening, so ignore this element and its children. 00843 00844 wfDebugLog( 'XMP', "Encountered <$ns:$tag> outside" 00845 . " of its expected parent. Ignoring." ); 00846 00847 array_unshift( $this->mode, self::MODE_IGNORE ); 00848 array_unshift( $this->curItem, $ns . ' ' . $tag ); 00849 return; 00850 } 00851 $mode = $this->items[$ns][$tag]['mode']; 00852 array_unshift( $this->mode, $mode ); 00853 array_unshift( $this->curItem, $ns . ' ' . $tag ); 00854 if ( $mode === self::MODE_STRUCT ) { 00855 $this->ancestorStruct = isset( $this->items[$ns][$tag]['map_name'] ) 00856 ? $this->items[$ns][$tag]['map_name'] : $tag; 00857 } 00858 if ( $this->charContent !== false ) { 00859 // Something weird. 00860 // Should not happen in valid XMP. 00861 throw new MWException( 'tag nested in non-whitespace characters.' ); 00862 } 00863 } else { 00864 // This element is not on our list of allowed elements so ignore. 00865 wfDebugLog( 'XMP', __METHOD__ . " Ignoring unrecognized element <$ns:$tag>." ); 00866 array_unshift( $this->mode, self::MODE_IGNORE ); 00867 array_unshift( $this->curItem, $ns . ' ' . $tag ); 00868 return; 00869 } 00870 00871 } 00872 // process attributes 00873 $this->doAttribs( $attribs ); 00874 } 00875 00895 private function startElementModeStruct( $ns, $tag, $attribs ) { 00896 if ( $ns !== self::NS_RDF ) { 00897 00898 if ( isset( $this->items[$ns][$tag] ) ) { 00899 if ( isset( $this->items[$ns][$this->ancestorStruct]['children'] ) 00900 && !isset( $this->items[$ns][$this->ancestorStruct]['children'][$tag] ) ) 00901 { 00902 // This assumes that we don't have inter-namespace nesting 00903 // which we don't in all the properties we're interested in. 00904 throw new MWException( " <$tag> appeared nested in <" . $this->ancestorStruct 00905 . "> where it is not allowed." ); 00906 } 00907 array_unshift( $this->mode, $this->items[$ns][$tag]['mode'] ); 00908 array_unshift( $this->curItem, $ns . ' ' . $tag ); 00909 if ( $this->charContent !== false ) { 00910 // Something weird. 00911 // Should not happen in valid XMP. 00912 throw new MWException( "tag <$tag> nested in non-whitespace characters (" . $this->charContent . ")." ); 00913 } 00914 } else { 00915 array_unshift( $this->mode, self::MODE_IGNORE ); 00916 array_unshift( $this->curItem, $elm ); 00917 return; 00918 } 00919 00920 } 00921 00922 if ( $ns === self::NS_RDF && $tag === 'Description' ) { 00923 $this->doAttribs( $attribs ); 00924 array_unshift( $this->mode, self::MODE_STRUCT ); 00925 array_unshift( $this->curItem, $this->curItem[0] ); 00926 } 00927 } 00928 00942 private function startElementModeLi( $elm, $attribs ) { 00943 if ( ( $elm ) !== self::NS_RDF . ' li' ) { 00944 throw new MWException( "<rdf:li> expected but got $elm." ); 00945 } 00946 00947 if ( !isset( $this->mode[1] ) ) { 00948 // This should never ever ever happen. Checking for it 00949 // to be paranoid. 00950 throw new MWException( 'In mode Li, but no 2xPrevious mode!' ); 00951 } 00952 00953 if ( $this->mode[1] === self::MODE_BAGSTRUCT ) { 00954 // This list item contains a compound (STRUCT) value. 00955 array_unshift( $this->mode, self::MODE_STRUCT ); 00956 array_unshift( $this->curItem, $elm ); 00957 $this->processingArray = true; 00958 00959 if ( !isset( $this->curItem[1] ) ) { 00960 // be paranoid. 00961 throw new MWException( 'Can not find parent of BAGSTRUCT.' ); 00962 } 00963 list( $curNS, $curTag ) = explode( ' ', $this->curItem[1] ); 00964 $this->ancestorStruct = isset( $this->items[$curNS][$curTag]['map_name'] ) 00965 ? $this->items[$curNS][$curTag]['map_name'] : $curTag; 00966 00967 $this->doAttribs( $attribs ); 00968 00969 } else { 00970 // Normal BAG or SEQ containing simple values. 00971 array_unshift( $this->mode, self::MODE_SIMPLE ); 00972 // need to add curItem[0] on again since one is for the specific item 00973 // and one is for the entire group. 00974 array_unshift( $this->curItem, $this->curItem[0] ); 00975 $this->processingArray = true; 00976 } 00977 00978 } 00979 00994 private function startElementModeLiLang( $elm, $attribs ) { 00995 if ( $elm !== self::NS_RDF . ' li' ) { 00996 throw new MWException( __METHOD__ . " <rdf:li> expected but got $elm." ); 00997 } 00998 if ( !isset( $attribs[self::NS_XML . ' lang'] ) 00999 || !preg_match( '/^[-A-Za-z0-9]{2,}$/D', $attribs[self::NS_XML . ' lang'] ) ) 01000 { 01001 throw new MWException( __METHOD__ 01002 . " <rdf:li> did not contain, or has invalid xml:lang attribute in lang alternative" ); 01003 } 01004 01005 // Lang is case-insensitive. 01006 $this->itemLang = strtolower( $attribs[self::NS_XML . ' lang'] ); 01007 01008 // need to add curItem[0] on again since one is for the specific item 01009 // and one is for the entire group. 01010 array_unshift( $this->curItem, $this->curItem[0] ); 01011 array_unshift( $this->mode, self::MODE_SIMPLE ); 01012 $this->processingArray = true; 01013 } 01014 01025 function startElement( $parser, $elm, $attribs ) { 01026 01027 if ( $elm === self::NS_RDF . ' RDF' 01028 || $elm === 'adobe:ns:meta/ xmpmeta' 01029 || $elm === 'adobe:ns:meta/ xapmeta' ) 01030 { 01031 /* ignore. */ 01032 return; 01033 } elseif ( $elm === self::NS_RDF . ' Description' ) { 01034 if ( count( $this->mode ) === 0 ) { 01035 // outer rdf:desc 01036 array_unshift( $this->mode, self::MODE_INITIAL ); 01037 } 01038 } elseif ( $elm === self::NS_RDF . ' type' ) { 01039 // This doesn't support rdf:type properly. 01040 // In practise I have yet to see a file that 01041 // uses this element, however it is mentioned 01042 // on page 25 of part 1 of the xmp standard. 01043 // 01044 // also it seems as if exiv2 and exiftool do not support 01045 // this either (That or I misunderstand the standard) 01046 wfDebugLog( 'XMP', __METHOD__ . ' Encountered <rdf:type> which isn\'t currently supported' ); 01047 } 01048 01049 if ( strpos( $elm, ' ' ) === false ) { 01050 // This probably shouldn't happen. 01051 wfDebugLog( 'XMP', __METHOD__ . " Encountered <$elm> which has no namespace. Skipping." ); 01052 return; 01053 } 01054 01055 list( $ns, $tag ) = explode( ' ', $elm, 2 ); 01056 01057 if ( count( $this->mode ) === 0 ) { 01058 // This should not happen. 01059 throw new MWException( 'Error extracting XMP, ' 01060 . "encountered <$elm> with no mode" ); 01061 } 01062 01063 switch ( $this->mode[0] ) { 01064 case self::MODE_IGNORE: 01065 $this->startElementModeIgnore( $elm ); 01066 break; 01067 case self::MODE_SIMPLE: 01068 $this->startElementModeSimple( $elm, $attribs ); 01069 break; 01070 case self::MODE_INITIAL: 01071 $this->startElementModeInitial( $ns, $tag, $attribs ); 01072 break; 01073 case self::MODE_STRUCT: 01074 $this->startElementModeStruct( $ns, $tag, $attribs ); 01075 break; 01076 case self::MODE_BAG: 01077 case self::MODE_BAGSTRUCT: 01078 $this->startElementModeBag( $elm ); 01079 break; 01080 case self::MODE_SEQ: 01081 $this->startElementModeSeq( $elm ); 01082 break; 01083 case self::MODE_LANG: 01084 $this->startElementModeLang( $elm ); 01085 break; 01086 case self::MODE_LI_LANG: 01087 $this->startElementModeLiLang( $elm, $attribs ); 01088 break; 01089 case self::MODE_LI: 01090 $this->startElementModeLi( $elm, $attribs ); 01091 break; 01092 case self::MODE_QDESC: 01093 $this->startElementModeQDesc( $elm ); 01094 break; 01095 default: 01096 throw new MWException( 'StartElement in unknown mode: ' . $this->mode[0] ); 01097 } 01098 } 01099 01115 private function doAttribs( $attribs ) { 01116 01117 // first check for rdf:parseType attribute, as that can change 01118 // how the attributes are interperted. 01119 01120 if ( isset( $attribs[self::NS_RDF . ' parseType'] ) 01121 && $attribs[self::NS_RDF . ' parseType'] === 'Resource' 01122 && $this->mode[0] === self::MODE_SIMPLE ) 01123 { 01124 // this is equivalent to having an inner rdf:Description 01125 $this->mode[0] = self::MODE_QDESC; 01126 } 01127 foreach ( $attribs as $name => $val ) { 01128 if ( strpos( $name, ' ' ) === false ) { 01129 // This shouldn't happen, but so far some old software forgets namespace 01130 // on rdf:about. 01131 wfDebugLog( 'XMP', __METHOD__ . ' Encountered non-namespaced attribute: ' 01132 . " $name=\"$val\". Skipping. " ); 01133 continue; 01134 } 01135 list( $ns, $tag ) = explode( ' ', $name, 2 ); 01136 if ( $ns === self::NS_RDF ) { 01137 if ( $tag === 'value' || $tag === 'resource' ) { 01138 // resource is for url. 01139 // value attribute is a weird way of just putting the contents. 01140 $this->char( $this->xmlParser, $val ); 01141 } 01142 } elseif ( isset( $this->items[$ns][$tag] ) ) { 01143 if ( $this->mode[0] === self::MODE_SIMPLE ) { 01144 throw new MWException( __METHOD__ 01145 . " $ns:$tag found as attribute where not allowed" ); 01146 } 01147 $this->saveValue( $ns, $tag, $val ); 01148 } else { 01149 wfDebugLog( 'XMP', __METHOD__ . " Ignoring unrecognized element <$ns:$tag>." ); 01150 } 01151 } 01152 } 01153 01165 private function saveValue( $ns, $tag, $val ) { 01166 01167 $info =& $this->items[$ns][$tag]; 01168 $finalName = isset( $info['map_name'] ) 01169 ? $info['map_name'] : $tag; 01170 if ( isset( $info['validate'] ) ) { 01171 $validate = is_array( $info['validate'] ) ? $info['validate'] 01172 : array( 'XMPValidate', $info['validate'] ); 01173 01174 if ( is_callable( $validate ) ) { 01175 call_user_func_array( $validate, array( $info, &$val, true ) ); 01176 // the reasoning behind using &$val instead of using the return value 01177 // is to be consistent between here and validating structures. 01178 if ( is_null( $val ) ) { 01179 wfDebugLog( 'XMP', __METHOD__ . " <$ns:$tag> failed validation." ); 01180 return; 01181 } 01182 } else { 01183 wfDebugLog( 'XMP', __METHOD__ . " Validation function for $finalName (" 01184 . $validate[0] . '::' . $validate[1] . '()) is not callable.' ); 01185 } 01186 } 01187 01188 if ( $this->ancestorStruct && $this->processingArray ) { 01189 // Aka both an array and a struct. ( self::MODE_BAGSTRUCT ) 01190 $this->results['xmp-' . $info['map_group']][$this->ancestorStruct][][$finalName] = $val; 01191 } elseif ( $this->ancestorStruct ) { 01192 $this->results['xmp-' . $info['map_group']][$this->ancestorStruct][$finalName] = $val; 01193 } elseif ( $this->processingArray ) { 01194 if ( $this->itemLang === false ) { 01195 // normal array 01196 $this->results['xmp-' . $info['map_group']][$finalName][] = $val; 01197 } else { 01198 // lang array. 01199 $this->results['xmp-' . $info['map_group']][$finalName][$this->itemLang] = $val; 01200 } 01201 } else { 01202 $this->results['xmp-' . $info['map_group']][$finalName] = $val; 01203 } 01204 } 01205 }