MediaWiki
REL1_20
|
00001 <?php 00049 class XMPReader { 00050 00051 private $curItem = array(); // array to hold the current element (and previous element, and so on) 00052 private $ancestorStruct = false; // the structure name when processing nested structures. 00053 private $charContent = false; // temporary holder for character data that appears in xmp doc. 00054 private $mode = array(); // stores the state the xmpreader is in (see MODE_FOO constants) 00055 private $results = array(); // array to hold results 00056 private $processingArray = false; // if we're doing a seq or bag. 00057 private $itemLang = false; // used for lang alts only 00058 00059 private $xmlParser; 00060 private $charset = false; 00061 private $extendedXMPOffset = 0; 00062 00063 protected $items; 00064 00074 const MODE_INITIAL = 0; 00075 const MODE_IGNORE = 1; 00076 const MODE_LI = 2; 00077 const MODE_LI_LANG = 3; 00078 const MODE_QDESC = 4; 00079 00080 // The following MODE constants are also used in the 00081 // $items array to denote what type of property the item is. 00082 const MODE_SIMPLE = 10; 00083 const MODE_STRUCT = 11; // structure (associative array) 00084 const MODE_SEQ = 12; // ordered list 00085 const MODE_BAG = 13; // unordered list 00086 const MODE_LANG = 14; 00087 const MODE_ALT = 15; // non-language alt. Currently not implemented, and not needed atm. 00088 const MODE_BAGSTRUCT = 16; // A BAG of Structs. 00089 00090 const NS_RDF = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'; 00091 const NS_XML = 'http://www.w3.org/XML/1998/namespace'; 00092 00093 00099 function __construct() { 00100 00101 if ( !function_exists( 'xml_parser_create_ns' ) ) { 00102 // this should already be checked by this point 00103 throw new MWException( 'XMP support requires XML Parser' ); 00104 } 00105 00106 $this->items = XMPInfo::getItems(); 00107 00108 $this->resetXMLParser(); 00109 00110 } 00115 private function resetXMLParser() { 00116 00117 if ($this->xmlParser) { 00118 //is this needed? 00119 xml_parser_free( $this->xmlParser ); 00120 } 00121 00122 $this->xmlParser = xml_parser_create_ns( 'UTF-8', ' ' ); 00123 xml_parser_set_option( $this->xmlParser, XML_OPTION_CASE_FOLDING, 0 ); 00124 xml_parser_set_option( $this->xmlParser, XML_OPTION_SKIP_WHITE, 1 ); 00125 00126 xml_set_element_handler( $this->xmlParser, 00127 array( $this, 'startElement' ), 00128 array( $this, 'endElement' ) ); 00129 00130 xml_set_character_data_handler( $this->xmlParser, array( $this, 'char' ) ); 00131 } 00132 00137 function __destruct() { 00138 // not sure if this is needed. 00139 xml_parser_free( $this->xmlParser ); 00140 } 00141 00148 public function getResults() { 00149 // xmp-special is for metadata that affects how stuff 00150 // is extracted. For example xmpNote:HasExtendedXMP. 00151 00152 // It is also used to handle photoshop:AuthorsPosition 00153 // which is weird and really part of another property, 00154 // see 2:85 in IPTC. See also pg 21 of IPTC4XMP standard. 00155 // The location fields also use it. 00156 00157 $data = $this->results; 00158 00159 wfRunHooks('XMPGetResults', Array(&$data)); 00160 00161 if ( isset( $data['xmp-special']['AuthorsPosition'] ) 00162 && is_string( $data['xmp-special']['AuthorsPosition'] ) 00163 && isset( $data['xmp-general']['Artist'][0] ) 00164 ) { 00165 // Note, if there is more than one creator, 00166 // this only applies to first. This also will 00167 // only apply to the dc:Creator prop, not the 00168 // exif:Artist prop. 00169 00170 $data['xmp-general']['Artist'][0] = 00171 $data['xmp-special']['AuthorsPosition'] . ', ' 00172 . $data['xmp-general']['Artist'][0]; 00173 } 00174 00175 // Go through the LocationShown and LocationCreated 00176 // changing it to the non-hierarchal form used by 00177 // the other location fields. 00178 00179 if ( isset( $data['xmp-special']['LocationShown'][0] ) 00180 && is_array( $data['xmp-special']['LocationShown'][0] ) 00181 ) { 00182 // the is_array is just paranoia. It should always 00183 // be an array. 00184 foreach( $data['xmp-special']['LocationShown'] as $loc ) { 00185 if ( !is_array( $loc ) ) { 00186 // To avoid copying over the _type meta-fields. 00187 continue; 00188 } 00189 foreach( $loc as $field => $val ) { 00190 $data['xmp-general'][$field . 'Dest'][] = $val; 00191 } 00192 } 00193 } 00194 if ( isset( $data['xmp-special']['LocationCreated'][0] ) 00195 && is_array( $data['xmp-special']['LocationCreated'][0] ) 00196 ) { 00197 // the is_array is just paranoia. It should always 00198 // be an array. 00199 foreach( $data['xmp-special']['LocationCreated'] as $loc ) { 00200 if ( !is_array( $loc ) ) { 00201 // To avoid copying over the _type meta-fields. 00202 continue; 00203 } 00204 foreach( $loc as $field => $val ) { 00205 $data['xmp-general'][$field . 'Created'][] = $val; 00206 } 00207 } 00208 } 00209 00210 00211 // We don't want to return the special values, since they're 00212 // special and not info to be stored about the file. 00213 unset( $data['xmp-special'] ); 00214 00215 // Convert GPSAltitude to negative if below sea level. 00216 if ( isset( $data['xmp-exif']['GPSAltitudeRef'] ) 00217 && isset( $data['xmp-exif']['GPSAltitude'] ) 00218 ) { 00219 00220 // Must convert to a real before multiplying by -1 00221 // XMPValidate guarantees there will always be a '/' in this value. 00222 list( $nom, $denom ) = explode( '/', $data['xmp-exif']['GPSAltitude'] ); 00223 $data['xmp-exif']['GPSAltitude'] = $nom / $denom; 00224 00225 if ( $data['xmp-exif']['GPSAltitudeRef'] == '1' ) { 00226 $data['xmp-exif']['GPSAltitude'] *= -1; 00227 } 00228 unset( $data['xmp-exif']['GPSAltitudeRef'] ); 00229 } 00230 00231 return $data; 00232 } 00233 00246 public function parse( $content, $allOfIt = true, $reset = false ) { 00247 if ( $reset ) { 00248 $this->resetXMLParser(); 00249 } 00250 try { 00251 00252 // detect encoding by looking for BOM which is supposed to be in processing instruction. 00253 // see page 12 of http://www.adobe.com/devnet/xmp/pdfs/XMPSpecificationPart3.pdf 00254 if ( !$this->charset ) { 00255 $bom = array(); 00256 if ( preg_match( '/\xEF\xBB\xBF|\xFE\xFF|\x00\x00\xFE\xFF|\xFF\xFE\x00\x00|\xFF\xFE/', 00257 $content, $bom ) 00258 ) { 00259 switch ( $bom[0] ) { 00260 case "\xFE\xFF": 00261 $this->charset = 'UTF-16BE'; 00262 break; 00263 case "\xFF\xFE": 00264 $this->charset = 'UTF-16LE'; 00265 break; 00266 case "\x00\x00\xFE\xFF": 00267 $this->charset = 'UTF-32BE'; 00268 break; 00269 case "\xFF\xFE\x00\x00": 00270 $this->charset = 'UTF-32LE'; 00271 break; 00272 case "\xEF\xBB\xBF": 00273 $this->charset = 'UTF-8'; 00274 break; 00275 default: 00276 //this should be impossible to get to 00277 throw new MWException("Invalid BOM"); 00278 break; 00279 00280 } 00281 00282 } else { 00283 // standard specifically says, if no bom assume utf-8 00284 $this->charset = 'UTF-8'; 00285 } 00286 } 00287 if ( $this->charset !== 'UTF-8' ) { 00288 //don't convert if already utf-8 00289 wfSuppressWarnings(); 00290 $content = iconv( $this->charset, 'UTF-8//IGNORE', $content ); 00291 wfRestoreWarnings(); 00292 } 00293 00294 $ok = xml_parse( $this->xmlParser, $content, $allOfIt ); 00295 if ( !$ok ) { 00296 $error = xml_error_string( xml_get_error_code( $this->xmlParser ) ); 00297 $where = 'line: ' . xml_get_current_line_number( $this->xmlParser ) 00298 . ' column: ' . xml_get_current_column_number( $this->xmlParser ) 00299 . ' byte offset: ' . xml_get_current_byte_index( $this->xmlParser ); 00300 00301 wfDebugLog( 'XMP', "XMPReader::parse : Error reading XMP content: $error ($where)" ); 00302 $this->results = array(); // blank if error. 00303 return false; 00304 } 00305 } catch ( MWException $e ) { 00306 wfDebugLog( 'XMP', 'XMP parse error: ' . $e ); 00307 $this->results = array(); 00308 return false; 00309 } 00310 return true; 00311 } 00312 00320 public function parseExtended( $content ) { 00321 // @todo FIXME: This is untested. Hard to find example files 00322 // or programs that make such files.. 00323 $guid = substr( $content, 0, 32 ); 00324 if ( !isset( $this->results['xmp-special']['HasExtendedXMP'] ) 00325 || $this->results['xmp-special']['HasExtendedXMP'] !== $guid ) { 00326 wfDebugLog('XMP', __METHOD__ . " Ignoring XMPExtended block due to wrong guid (guid= '$guid' )"); 00327 return false; 00328 } 00329 $len = unpack( 'Nlength/Noffset', substr( $content, 32, 8 ) ); 00330 00331 if (!$len || $len['length'] < 4 || $len['offset'] < 0 || $len['offset'] > $len['length'] ) { 00332 wfDebugLog('XMP', __METHOD__ . 'Error reading extended XMP block, invalid length or offset.'); 00333 return false; 00334 } 00335 00336 00337 // we're not very robust here. we should accept it in the wrong order. To quote 00338 // the xmp standard: 00339 // "A JPEG writer should write the ExtendedXMP marker segments in order, immediately following the 00340 // StandardXMP. However, the JPEG standard does not require preservation of marker segment order. A 00341 // robust JPEG reader should tolerate the marker segments in any order." 00342 // 00343 // otoh the probability that an image will have more than 128k of metadata is rather low... 00344 // so the probability that it will have > 128k, and be in the wrong order is very low... 00345 00346 if ( $len['offset'] !== $this->extendedXMPOffset ) { 00347 wfDebugLog('XMP', __METHOD__ . 'Ignoring XMPExtended block due to wrong order. (Offset was ' 00348 . $len['offset'] . ' but expected ' . $this->extendedXMPOffset . ')'); 00349 return false; 00350 } 00351 00352 if ( $len['offset'] === 0 ) { 00353 // if we're starting the extended block, we've probably already 00354 // done the XMPStandard block, so reset. 00355 $this->resetXMLParser(); 00356 } 00357 00358 $this->extendedXMPOffset += $len['length']; 00359 00360 $actualContent = substr( $content, 40 ); 00361 00362 if ( $this->extendedXMPOffset === strlen( $actualContent ) ) { 00363 $atEnd = true; 00364 } else { 00365 $atEnd = false; 00366 } 00367 00368 wfDebugLog('XMP', __METHOD__ . 'Parsing a XMPExtended block'); 00369 return $this->parse( $actualContent, $atEnd ); 00370 } 00371 00388 function char( $parser, $data ) { 00389 00390 $data = trim( $data ); 00391 if ( trim( $data ) === "" ) { 00392 return; 00393 } 00394 00395 if ( !isset( $this->mode[0] ) ) { 00396 throw new MWException( 'Unexpected character data before first rdf:Description element' ); 00397 } 00398 00399 if ( $this->mode[0] === self::MODE_IGNORE ) return; 00400 00401 if ( $this->mode[0] !== self::MODE_SIMPLE 00402 && $this->mode[0] !== self::MODE_QDESC 00403 ) { 00404 throw new MWException( 'character data where not expected. (mode ' . $this->mode[0] . ')' ); 00405 } 00406 00407 // to check, how does this handle w.s. 00408 if ( $this->charContent === false ) { 00409 $this->charContent = $data; 00410 } else { 00411 $this->charContent .= $data; 00412 } 00413 00414 } 00415 00422 private function endElementModeIgnore ( $elm ) { 00423 00424 if ( $this->curItem[0] === $elm ) { 00425 array_shift( $this->curItem ); 00426 array_shift( $this->mode ); 00427 } 00428 return; 00429 00430 } 00431 00447 private function endElementModeSimple ( $elm ) { 00448 if ( $this->charContent !== false ) { 00449 if ( $this->processingArray ) { 00450 // if we're processing an array, use the original element 00451 // name instead of rdf:li. 00452 list( $ns, $tag ) = explode( ' ', $this->curItem[0], 2 ); 00453 } else { 00454 list( $ns, $tag ) = explode( ' ', $elm, 2 ); 00455 } 00456 $this->saveValue( $ns, $tag, $this->charContent ); 00457 00458 $this->charContent = false; // reset 00459 } 00460 array_shift( $this->curItem ); 00461 array_shift( $this->mode ); 00462 00463 } 00464 00482 private function endElementNested( $elm ) { 00483 00484 /* cur item must be the same as $elm, unless if in MODE_STRUCT 00485 in which case it could also be rdf:Description */ 00486 if ( $this->curItem[0] !== $elm 00487 && !( $elm === self::NS_RDF . ' Description' 00488 && $this->mode[0] === self::MODE_STRUCT ) 00489 ) { 00490 throw new MWException( "nesting mismatch. got a </$elm> but expected a </" . $this->curItem[0] . '>' ); 00491 } 00492 00493 // Validate structures. 00494 list( $ns, $tag ) = explode( ' ', $elm, 2 ); 00495 if ( isset( $this->items[$ns][$tag]['validate'] ) ) { 00496 00497 $info =& $this->items[$ns][$tag]; 00498 $finalName = isset( $info['map_name'] ) 00499 ? $info['map_name'] : $tag; 00500 00501 $validate = is_array( $info['validate'] ) ? $info['validate'] 00502 : array( 'XMPValidate', $info['validate'] ); 00503 00504 if ( !isset( $this->results['xmp-' . $info['map_group']][$finalName] ) ) { 00505 // This can happen if all the members of the struct failed validation. 00506 wfDebugLog( 'XMP', __METHOD__ . " <$ns:$tag> has no valid members." ); 00507 00508 } elseif ( is_callable( $validate ) ) { 00509 $val =& $this->results['xmp-' . $info['map_group']][$finalName]; 00510 call_user_func_array( $validate, array( $info, &$val, false ) ); 00511 if ( is_null( $val ) ) { 00512 // the idea being the validation function will unset the variable if 00513 // its invalid. 00514 wfDebugLog( 'XMP', __METHOD__ . " <$ns:$tag> failed validation." ); 00515 unset( $this->results['xmp-' . $info['map_group']][$finalName] ); 00516 } 00517 } else { 00518 wfDebugLog( 'XMP', __METHOD__ . " Validation function for $finalName (" 00519 . $validate[0] . '::' . $validate[1] . '()) is not callable.' ); 00520 } 00521 } 00522 00523 array_shift( $this->curItem ); 00524 array_shift( $this->mode ); 00525 $this->ancestorStruct = false; 00526 $this->processingArray = false; 00527 $this->itemLang = false; 00528 } 00529 00548 private function endElementModeLi( $elm ) { 00549 00550 list( $ns, $tag ) = explode( ' ', $this->curItem[0], 2 ); 00551 $info = $this->items[$ns][$tag]; 00552 $finalName = isset( $info['map_name'] ) 00553 ? $info['map_name'] : $tag; 00554 00555 array_shift( $this->mode ); 00556 00557 if ( !isset( $this->results['xmp-' . $info['map_group']][$finalName] ) ) { 00558 wfDebugLog( 'XMP', __METHOD__ . " Empty compund element $finalName." ); 00559 return; 00560 } 00561 00562 if ( $elm === self::NS_RDF . ' Seq' ) { 00563 $this->results['xmp-' . $info['map_group']][$finalName]['_type'] = 'ol'; 00564 } elseif ( $elm === self::NS_RDF . ' Bag' ) { 00565 $this->results['xmp-' . $info['map_group']][$finalName]['_type'] = 'ul'; 00566 } elseif ( $elm === self::NS_RDF . ' Alt' ) { 00567 // extra if needed as you could theoretically have a non-language alt. 00568 if ( $info['mode'] === self::MODE_LANG ) { 00569 $this->results['xmp-' . $info['map_group']][$finalName]['_type'] = 'lang'; 00570 } 00571 00572 } else { 00573 throw new MWException( __METHOD__ . " expected </rdf:seq> or </rdf:bag> but instead got $elm." ); 00574 } 00575 } 00576 00587 private function endElementModeQDesc( $elm ) { 00588 00589 if ( $elm === self::NS_RDF . ' value' ) { 00590 list( $ns, $tag ) = explode( ' ', $this->curItem[0], 2 ); 00591 $this->saveValue( $ns, $tag, $this->charContent ); 00592 return; 00593 } else { 00594 array_shift( $this->mode ); 00595 array_shift( $this->curItem ); 00596 } 00597 00598 00599 } 00600 00613 function endElement( $parser, $elm ) { 00614 if ( $elm === ( self::NS_RDF . ' RDF' ) 00615 || $elm === 'adobe:ns:meta/ xmpmeta' 00616 || $elm === 'adobe:ns:meta/ xapmeta' ) 00617 { 00618 // ignore these. 00619 return; 00620 } 00621 00622 if ( $elm === self::NS_RDF . ' type' ) { 00623 // these aren't really supported properly yet. 00624 // However, it appears they almost never used. 00625 wfDebugLog( 'XMP', __METHOD__ . ' encountered <rdf:type>' ); 00626 } 00627 00628 if ( strpos( $elm, ' ' ) === false ) { 00629 // This probably shouldn't happen. 00630 // However, there is a bug in an adobe product 00631 // that forgets the namespace on some things. 00632 // (Luckily they are unimportant things). 00633 wfDebugLog( 'XMP', __METHOD__ . " Encountered </$elm> which has no namespace. Skipping." ); 00634 return; 00635 } 00636 00637 if ( count( $this->mode[0] ) === 0 ) { 00638 // This should never ever happen and means 00639 // there is a pretty major bug in this class. 00640 throw new MWException( 'Encountered end element with no mode' ); 00641 } 00642 00643 if ( count( $this->curItem ) == 0 && $this->mode[0] !== self::MODE_INITIAL ) { 00644 // just to be paranoid. Should always have a curItem, except for initially 00645 // (aka during MODE_INITAL). 00646 throw new MWException( "Hit end element </$elm> but no curItem" ); 00647 } 00648 00649 switch( $this->mode[0] ) { 00650 case self::MODE_IGNORE: 00651 $this->endElementModeIgnore( $elm ); 00652 break; 00653 case self::MODE_SIMPLE: 00654 $this->endElementModeSimple( $elm ); 00655 break; 00656 case self::MODE_STRUCT: 00657 case self::MODE_SEQ: 00658 case self::MODE_BAG: 00659 case self::MODE_LANG: 00660 case self::MODE_BAGSTRUCT: 00661 $this->endElementNested( $elm ); 00662 break; 00663 case self::MODE_INITIAL: 00664 if ( $elm === self::NS_RDF . ' Description' ) { 00665 array_shift( $this->mode ); 00666 } else { 00667 throw new MWException( 'Element ended unexpectedly while in MODE_INITIAL' ); 00668 } 00669 break; 00670 case self::MODE_LI: 00671 case self::MODE_LI_LANG: 00672 $this->endElementModeLi( $elm ); 00673 break; 00674 case self::MODE_QDESC: 00675 $this->endElementModeQDesc( $elm ); 00676 break; 00677 default: 00678 wfDebugLog( 'XMP', __METHOD__ . " no mode (elm = $elm)" ); 00679 break; 00680 } 00681 } 00682 00694 private function startElementModeIgnore( $elm ) { 00695 if ( $elm === $this->curItem[0] ) { 00696 array_unshift( $this->curItem, $elm ); 00697 array_unshift( $this->mode, self::MODE_IGNORE ); 00698 } 00699 } 00700 00708 private function startElementModeBag( $elm ) { 00709 if ( $elm === self::NS_RDF . ' Bag' ) { 00710 array_unshift( $this->mode, self::MODE_LI ); 00711 } else { 00712 throw new MWException( "Expected <rdf:Bag> but got $elm." ); 00713 } 00714 00715 } 00716 00724 private function startElementModeSeq( $elm ) { 00725 if ( $elm === self::NS_RDF . ' Seq' ) { 00726 array_unshift( $this->mode, self::MODE_LI ); 00727 } elseif ( $elm === self::NS_RDF . ' Bag' ) { 00728 # bug 27105 00729 wfDebugLog( 'XMP', __METHOD__ . ' Expected an rdf:Seq, but got an rdf:Bag. Pretending' 00730 . ' it is a Seq, since some buggy software is known to screw this up.' ); 00731 array_unshift( $this->mode, self::MODE_LI ); 00732 } else { 00733 throw new MWException( "Expected <rdf:Seq> but got $elm." ); 00734 } 00735 00736 } 00737 00752 private function startElementModeLang( $elm ) { 00753 if ( $elm === self::NS_RDF . ' Alt' ) { 00754 array_unshift( $this->mode, self::MODE_LI_LANG ); 00755 } else { 00756 throw new MWException( "Expected <rdf:Seq> but got $elm." ); 00757 } 00758 00759 } 00760 00778 private function startElementModeSimple( $elm, $attribs ) { 00779 if ( $elm === self::NS_RDF . ' Description' ) { 00780 // If this value has qualifiers 00781 array_unshift( $this->mode, self::MODE_QDESC ); 00782 array_unshift( $this->curItem, $this->curItem[0] ); 00783 00784 if ( isset( $attribs[self::NS_RDF . ' value'] ) ) { 00785 list( $ns, $tag ) = explode( ' ', $this->curItem[0], 2 ); 00786 $this->saveValue( $ns, $tag, $attribs[self::NS_RDF . ' value'] ); 00787 } 00788 } elseif ( $elm === self::NS_RDF . ' value' ) { 00789 // This should not be here. 00790 throw new MWException( __METHOD__ . ' Encountered <rdf:value> where it was unexpected.' ); 00791 00792 } else { 00793 // something else we don't recognize, like a qualifier maybe. 00794 wfDebugLog( 'XMP', __METHOD__ . " Encountered element <$elm> where only expecting character data as value of " . $this->curItem[0] ); 00795 array_unshift( $this->mode, self::MODE_IGNORE ); 00796 array_unshift( $this->curItem, $elm ); 00797 00798 } 00799 00800 } 00801 00816 private function startElementModeQDesc( $elm ) { 00817 if ( $elm === self::NS_RDF . ' value' ) { 00818 return; // do nothing 00819 } else { 00820 // otherwise its a qualifier, which we ignore 00821 array_unshift( $this->mode, self::MODE_IGNORE ); 00822 array_unshift( $this->curItem, $elm ); 00823 } 00824 } 00825 00837 private function startElementModeInitial( $ns, $tag, $attribs ) { 00838 if ( $ns !== self::NS_RDF ) { 00839 00840 if ( isset( $this->items[$ns][$tag] ) ) { 00841 if ( isset( $this->items[$ns][$tag]['structPart'] ) ) { 00842 // If this element is supposed to appear only as 00843 // a child of a structure, but appears here (not as 00844 // a child of a struct), then something weird is 00845 // happening, so ignore this element and its children. 00846 00847 wfDebugLog( 'XMP', "Encountered <$ns:$tag> outside" 00848 . " of its expected parent. Ignoring." ); 00849 00850 array_unshift( $this->mode, self::MODE_IGNORE ); 00851 array_unshift( $this->curItem, $ns . ' ' . $tag ); 00852 return; 00853 } 00854 $mode = $this->items[$ns][$tag]['mode']; 00855 array_unshift( $this->mode, $mode ); 00856 array_unshift( $this->curItem, $ns . ' ' . $tag ); 00857 if ( $mode === self::MODE_STRUCT ) { 00858 $this->ancestorStruct = isset( $this->items[$ns][$tag]['map_name'] ) 00859 ? $this->items[$ns][$tag]['map_name'] : $tag; 00860 } 00861 if ( $this->charContent !== false ) { 00862 // Something weird. 00863 // Should not happen in valid XMP. 00864 throw new MWException( 'tag nested in non-whitespace characters.' ); 00865 } 00866 } else { 00867 // This element is not on our list of allowed elements so ignore. 00868 wfDebugLog( 'XMP', __METHOD__ . " Ignoring unrecognized element <$ns:$tag>." ); 00869 array_unshift( $this->mode, self::MODE_IGNORE ); 00870 array_unshift( $this->curItem, $ns . ' ' . $tag ); 00871 return; 00872 } 00873 00874 } 00875 // process attributes 00876 $this->doAttribs( $attribs ); 00877 } 00878 00897 private function startElementModeStruct( $ns, $tag, $attribs ) { 00898 if ( $ns !== self::NS_RDF ) { 00899 00900 if ( isset( $this->items[$ns][$tag] ) ) { 00901 if ( isset( $this->items[$ns][$this->ancestorStruct]['children'] ) 00902 && !isset( $this->items[$ns][$this->ancestorStruct]['children'][$tag] ) ) 00903 { 00904 // This assumes that we don't have inter-namespace nesting 00905 // which we don't in all the properties we're interested in. 00906 throw new MWException( " <$tag> appeared nested in <" . $this->ancestorStruct 00907 . "> where it is not allowed." ); 00908 } 00909 array_unshift( $this->mode, $this->items[$ns][$tag]['mode'] ); 00910 array_unshift( $this->curItem, $ns . ' ' . $tag ); 00911 if ( $this->charContent !== false ) { 00912 // Something weird. 00913 // Should not happen in valid XMP. 00914 throw new MWException( "tag <$tag> nested in non-whitespace characters (" . $this->charContent . ")." ); 00915 } 00916 } else { 00917 array_unshift( $this->mode, self::MODE_IGNORE ); 00918 array_unshift( $this->curItem, $elm ); 00919 return; 00920 } 00921 00922 } 00923 00924 if ( $ns === self::NS_RDF && $tag === 'Description' ) { 00925 $this->doAttribs( $attribs ); 00926 array_unshift( $this->mode, self::MODE_STRUCT ); 00927 array_unshift( $this->curItem, $this->curItem[0] ); 00928 } 00929 } 00930 00944 private function startElementModeLi( $elm, $attribs ) { 00945 if ( ( $elm ) !== self::NS_RDF . ' li' ) { 00946 throw new MWException( "<rdf:li> expected but got $elm." ); 00947 } 00948 00949 if ( !isset( $this->mode[1] ) ) { 00950 // This should never ever ever happen. Checking for it 00951 // to be paranoid. 00952 throw new MWException( 'In mode Li, but no 2xPrevious mode!' ); 00953 } 00954 00955 if ( $this->mode[1] === self::MODE_BAGSTRUCT ) { 00956 // This list item contains a compound (STRUCT) value. 00957 array_unshift( $this->mode, self::MODE_STRUCT ); 00958 array_unshift( $this->curItem, $elm ); 00959 $this->processingArray = true; 00960 00961 if ( !isset( $this->curItem[1] ) ) { 00962 // be paranoid. 00963 throw new MWException( 'Can not find parent of BAGSTRUCT.' ); 00964 } 00965 list( $curNS, $curTag ) = explode( ' ', $this->curItem[1] ); 00966 $this->ancestorStruct = isset( $this->items[$curNS][$curTag]['map_name'] ) 00967 ? $this->items[$curNS][$curTag]['map_name'] : $curTag; 00968 00969 $this->doAttribs( $attribs ); 00970 00971 } else { 00972 // Normal BAG or SEQ containing simple values. 00973 array_unshift( $this->mode, self::MODE_SIMPLE ); 00974 // need to add curItem[0] on again since one is for the specific item 00975 // and one is for the entire group. 00976 array_unshift( $this->curItem, $this->curItem[0] ); 00977 $this->processingArray = true; 00978 } 00979 00980 } 00981 00996 private function startElementModeLiLang( $elm, $attribs ) { 00997 if ( $elm !== self::NS_RDF . ' li' ) { 00998 throw new MWException( __METHOD__ . " <rdf:li> expected but got $elm." ); 00999 } 01000 if ( !isset( $attribs[ self::NS_XML . ' lang'] ) 01001 || !preg_match( '/^[-A-Za-z0-9]{2,}$/D', $attribs[ self::NS_XML . ' lang' ] ) ) 01002 { 01003 throw new MWException( __METHOD__ 01004 . " <rdf:li> did not contain, or has invalid xml:lang attribute in lang alternative" ); 01005 } 01006 01007 // Lang is case-insensitive. 01008 $this->itemLang = strtolower( $attribs[ self::NS_XML . ' lang' ] ); 01009 01010 // need to add curItem[0] on again since one is for the specific item 01011 // and one is for the entire group. 01012 array_unshift( $this->curItem, $this->curItem[0] ); 01013 array_unshift( $this->mode, self::MODE_SIMPLE ); 01014 $this->processingArray = true; 01015 } 01016 01026 function startElement( $parser, $elm, $attribs ) { 01027 01028 if ( $elm === self::NS_RDF . ' RDF' 01029 || $elm === 'adobe:ns:meta/ xmpmeta' 01030 || $elm === 'adobe:ns:meta/ xapmeta') 01031 { 01032 /* ignore. */ 01033 return; 01034 } elseif ( $elm === self::NS_RDF . ' Description' ) { 01035 if ( count( $this->mode ) === 0 ) { 01036 // outer rdf:desc 01037 array_unshift( $this->mode, self::MODE_INITIAL ); 01038 } 01039 } elseif ( $elm === self::NS_RDF . ' type' ) { 01040 // This doesn't support rdf:type properly. 01041 // In practise I have yet to see a file that 01042 // uses this element, however it is mentioned 01043 // on page 25 of part 1 of the xmp standard. 01044 // 01045 // also it seems as if exiv2 and exiftool do not support 01046 // this either (That or I misunderstand the standard) 01047 wfDebugLog( 'XMP', __METHOD__ . ' Encountered <rdf:type> which isn\'t currently supported' ); 01048 } 01049 01050 if ( strpos( $elm, ' ' ) === false ) { 01051 // This probably shouldn't happen. 01052 wfDebugLog( 'XMP', __METHOD__ . " Encountered <$elm> which has no namespace. Skipping." ); 01053 return; 01054 } 01055 01056 list( $ns, $tag ) = explode( ' ', $elm, 2 ); 01057 01058 if ( count( $this->mode ) === 0 ) { 01059 // This should not happen. 01060 throw new MWException('Error extracting XMP, ' 01061 . "encountered <$elm> with no mode" ); 01062 } 01063 01064 switch( $this->mode[0] ) { 01065 case self::MODE_IGNORE: 01066 $this->startElementModeIgnore( $elm ); 01067 break; 01068 case self::MODE_SIMPLE: 01069 $this->startElementModeSimple( $elm, $attribs ); 01070 break; 01071 case self::MODE_INITIAL: 01072 $this->startElementModeInitial( $ns, $tag, $attribs ); 01073 break; 01074 case self::MODE_STRUCT: 01075 $this->startElementModeStruct( $ns, $tag, $attribs ); 01076 break; 01077 case self::MODE_BAG: 01078 case self::MODE_BAGSTRUCT: 01079 $this->startElementModeBag( $elm ); 01080 break; 01081 case self::MODE_SEQ: 01082 $this->startElementModeSeq( $elm ); 01083 break; 01084 case self::MODE_LANG: 01085 $this->startElementModeLang( $elm ); 01086 break; 01087 case self::MODE_LI_LANG: 01088 $this->startElementModeLiLang( $elm, $attribs ); 01089 break; 01090 case self::MODE_LI: 01091 $this->startElementModeLi( $elm, $attribs ); 01092 break; 01093 case self::MODE_QDESC: 01094 $this->startElementModeQDesc( $elm ); 01095 break; 01096 default: 01097 throw new MWException( 'StartElement in unknown mode: ' . $this->mode[0] ); 01098 break; 01099 } 01100 } 01101 01116 private function doAttribs( $attribs ) { 01117 01118 // first check for rdf:parseType attribute, as that can change 01119 // how the attributes are interperted. 01120 01121 if ( isset( $attribs[self::NS_RDF . ' parseType'] ) 01122 && $attribs[self::NS_RDF . ' parseType'] === 'Resource' 01123 && $this->mode[0] === self::MODE_SIMPLE ) 01124 { 01125 // this is equivalent to having an inner rdf:Description 01126 $this->mode[0] = self::MODE_QDESC; 01127 } 01128 foreach ( $attribs as $name => $val ) { 01129 01130 01131 if ( strpos( $name, ' ' ) === false ) { 01132 // This shouldn't happen, but so far some old software forgets namespace 01133 // on rdf:about. 01134 wfDebugLog( 'XMP', __METHOD__ . ' Encountered non-namespaced attribute: ' 01135 . " $name=\"$val\". Skipping. " ); 01136 continue; 01137 } 01138 list( $ns, $tag ) = explode( ' ', $name, 2 ); 01139 if ( $ns === self::NS_RDF ) { 01140 if ( $tag === 'value' || $tag === 'resource' ) { 01141 // resource is for url. 01142 // value attribute is a weird way of just putting the contents. 01143 $this->char( $this->xmlParser, $val ); 01144 } 01145 } elseif ( isset( $this->items[$ns][$tag] ) ) { 01146 if ( $this->mode[0] === self::MODE_SIMPLE ) { 01147 throw new MWException( __METHOD__ 01148 . " $ns:$tag found as attribute where not allowed" ); 01149 } 01150 $this->saveValue( $ns, $tag, $val ); 01151 } else { 01152 wfDebugLog( 'XMP', __METHOD__ . " Ignoring unrecognized element <$ns:$tag>." ); 01153 } 01154 } 01155 } 01156 01168 private function saveValue( $ns, $tag, $val ) { 01169 01170 $info =& $this->items[$ns][$tag]; 01171 $finalName = isset( $info['map_name'] ) 01172 ? $info['map_name'] : $tag; 01173 if ( isset( $info['validate'] ) ) { 01174 $validate = is_array( $info['validate'] ) ? $info['validate'] 01175 : array( 'XMPValidate', $info['validate'] ); 01176 01177 if ( is_callable( $validate ) ) { 01178 call_user_func_array( $validate, array( $info, &$val, true ) ); 01179 // the reasoning behind using &$val instead of using the return value 01180 // is to be consistent between here and validating structures. 01181 if ( is_null( $val ) ) { 01182 wfDebugLog( 'XMP', __METHOD__ . " <$ns:$tag> failed validation." ); 01183 return; 01184 } 01185 } else { 01186 wfDebugLog( 'XMP', __METHOD__ . " Validation function for $finalName (" 01187 . $validate[0] . '::' . $validate[1] . '()) is not callable.' ); 01188 } 01189 } 01190 01191 if ( $this->ancestorStruct && $this->processingArray ) { 01192 // Aka both an array and a struct. ( self::MODE_BAGSTRUCT ) 01193 $this->results['xmp-' . $info['map_group']][$this->ancestorStruct][][$finalName] = $val; 01194 } elseif ( $this->ancestorStruct ) { 01195 $this->results['xmp-' . $info['map_group']][$this->ancestorStruct][$finalName] = $val; 01196 } elseif ( $this->processingArray ) { 01197 if ( $this->itemLang === false ) { 01198 // normal array 01199 $this->results['xmp-' . $info['map_group']][$finalName][] = $val; 01200 } else { 01201 // lang array. 01202 $this->results['xmp-' . $info['map_group']][$finalName][$this->itemLang] = $val; 01203 } 01204 } else { 01205 $this->results['xmp-' . $info['map_group']][$finalName] = $val; 01206 } 01207 } 01208 }