MediaWiki
REL1_21
|
00001 <?php 00049 class XMPReader { 00050 00051 private $curItem = array(); // array to hold the current element (and previous element, and so on) 00052 private $ancestorStruct = false; // the structure name when processing nested structures. 00053 private $charContent = false; // temporary holder for character data that appears in xmp doc. 00054 private $mode = array(); // stores the state the xmpreader is in (see MODE_FOO constants) 00055 private $results = array(); // array to hold results 00056 private $processingArray = false; // if we're doing a seq or bag. 00057 private $itemLang = false; // used for lang alts only 00058 00059 private $xmlParser; 00060 private $charset = false; 00061 private $extendedXMPOffset = 0; 00062 00063 protected $items; 00064 00074 const MODE_INITIAL = 0; 00075 const MODE_IGNORE = 1; 00076 const MODE_LI = 2; 00077 const MODE_LI_LANG = 3; 00078 const MODE_QDESC = 4; 00079 00080 // The following MODE constants are also used in the 00081 // $items array to denote what type of property the item is. 00082 const MODE_SIMPLE = 10; 00083 const MODE_STRUCT = 11; // structure (associative array) 00084 const MODE_SEQ = 12; // ordered list 00085 const MODE_BAG = 13; // unordered list 00086 const MODE_LANG = 14; 00087 const MODE_ALT = 15; // non-language alt. Currently not implemented, and not needed atm. 00088 const MODE_BAGSTRUCT = 16; // A BAG of Structs. 00089 00090 const NS_RDF = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'; 00091 const NS_XML = 'http://www.w3.org/XML/1998/namespace'; 00092 00098 function __construct() { 00099 00100 if ( !function_exists( 'xml_parser_create_ns' ) ) { 00101 // this should already be checked by this point 00102 throw new MWException( 'XMP support requires XML Parser' ); 00103 } 00104 00105 $this->items = XMPInfo::getItems(); 00106 00107 $this->resetXMLParser(); 00108 00109 } 00114 private function resetXMLParser() { 00115 00116 if ( $this->xmlParser ) { 00117 //is this needed? 00118 xml_parser_free( $this->xmlParser ); 00119 } 00120 00121 $this->xmlParser = xml_parser_create_ns( 'UTF-8', ' ' ); 00122 xml_parser_set_option( $this->xmlParser, XML_OPTION_CASE_FOLDING, 0 ); 00123 xml_parser_set_option( $this->xmlParser, XML_OPTION_SKIP_WHITE, 1 ); 00124 00125 xml_set_element_handler( $this->xmlParser, 00126 array( $this, 'startElement' ), 00127 array( $this, 'endElement' ) ); 00128 00129 xml_set_character_data_handler( $this->xmlParser, array( $this, 'char' ) ); 00130 } 00131 00136 function __destruct() { 00137 // not sure if this is needed. 00138 xml_parser_free( $this->xmlParser ); 00139 } 00140 00147 public function getResults() { 00148 // xmp-special is for metadata that affects how stuff 00149 // is extracted. For example xmpNote:HasExtendedXMP. 00150 00151 // It is also used to handle photoshop:AuthorsPosition 00152 // which is weird and really part of another property, 00153 // see 2:85 in IPTC. See also pg 21 of IPTC4XMP standard. 00154 // The location fields also use it. 00155 00156 $data = $this->results; 00157 00158 wfRunHooks( 'XMPGetResults', Array( &$data ) ); 00159 00160 if ( isset( $data['xmp-special']['AuthorsPosition'] ) 00161 && is_string( $data['xmp-special']['AuthorsPosition'] ) 00162 && isset( $data['xmp-general']['Artist'][0] ) 00163 ) { 00164 // Note, if there is more than one creator, 00165 // this only applies to first. This also will 00166 // only apply to the dc:Creator prop, not the 00167 // exif:Artist prop. 00168 00169 $data['xmp-general']['Artist'][0] = 00170 $data['xmp-special']['AuthorsPosition'] . ', ' 00171 . $data['xmp-general']['Artist'][0]; 00172 } 00173 00174 // Go through the LocationShown and LocationCreated 00175 // changing it to the non-hierarchal form used by 00176 // the other location fields. 00177 00178 if ( isset( $data['xmp-special']['LocationShown'][0] ) 00179 && is_array( $data['xmp-special']['LocationShown'][0] ) 00180 ) { 00181 // the is_array is just paranoia. It should always 00182 // be an array. 00183 foreach( $data['xmp-special']['LocationShown'] as $loc ) { 00184 if ( !is_array( $loc ) ) { 00185 // To avoid copying over the _type meta-fields. 00186 continue; 00187 } 00188 foreach( $loc as $field => $val ) { 00189 $data['xmp-general'][$field . 'Dest'][] = $val; 00190 } 00191 } 00192 } 00193 if ( isset( $data['xmp-special']['LocationCreated'][0] ) 00194 && is_array( $data['xmp-special']['LocationCreated'][0] ) 00195 ) { 00196 // the is_array is just paranoia. It should always 00197 // be an array. 00198 foreach( $data['xmp-special']['LocationCreated'] as $loc ) { 00199 if ( !is_array( $loc ) ) { 00200 // To avoid copying over the _type meta-fields. 00201 continue; 00202 } 00203 foreach( $loc as $field => $val ) { 00204 $data['xmp-general'][$field . 'Created'][] = $val; 00205 } 00206 } 00207 } 00208 00209 // We don't want to return the special values, since they're 00210 // special and not info to be stored about the file. 00211 unset( $data['xmp-special'] ); 00212 00213 // Convert GPSAltitude to negative if below sea level. 00214 if ( isset( $data['xmp-exif']['GPSAltitudeRef'] ) 00215 && isset( $data['xmp-exif']['GPSAltitude'] ) 00216 ) { 00217 00218 // Must convert to a real before multiplying by -1 00219 // XMPValidate guarantees there will always be a '/' in this value. 00220 list( $nom, $denom ) = explode( '/', $data['xmp-exif']['GPSAltitude'] ); 00221 $data['xmp-exif']['GPSAltitude'] = $nom / $denom; 00222 00223 if ( $data['xmp-exif']['GPSAltitudeRef'] == '1' ) { 00224 $data['xmp-exif']['GPSAltitude'] *= -1; 00225 } 00226 unset( $data['xmp-exif']['GPSAltitudeRef'] ); 00227 } 00228 00229 return $data; 00230 } 00231 00245 public function parse( $content, $allOfIt = true, $reset = false ) { 00246 if ( $reset ) { 00247 $this->resetXMLParser(); 00248 } 00249 try { 00250 00251 // detect encoding by looking for BOM which is supposed to be in processing instruction. 00252 // see page 12 of http://www.adobe.com/devnet/xmp/pdfs/XMPSpecificationPart3.pdf 00253 if ( !$this->charset ) { 00254 $bom = array(); 00255 if ( preg_match( '/\xEF\xBB\xBF|\xFE\xFF|\x00\x00\xFE\xFF|\xFF\xFE\x00\x00|\xFF\xFE/', 00256 $content, $bom ) 00257 ) { 00258 switch ( $bom[0] ) { 00259 case "\xFE\xFF": 00260 $this->charset = 'UTF-16BE'; 00261 break; 00262 case "\xFF\xFE": 00263 $this->charset = 'UTF-16LE'; 00264 break; 00265 case "\x00\x00\xFE\xFF": 00266 $this->charset = 'UTF-32BE'; 00267 break; 00268 case "\xFF\xFE\x00\x00": 00269 $this->charset = 'UTF-32LE'; 00270 break; 00271 case "\xEF\xBB\xBF": 00272 $this->charset = 'UTF-8'; 00273 break; 00274 default: 00275 //this should be impossible to get to 00276 throw new MWException( "Invalid BOM" ); 00277 } 00278 } else { 00279 // standard specifically says, if no bom assume utf-8 00280 $this->charset = 'UTF-8'; 00281 } 00282 } 00283 if ( $this->charset !== 'UTF-8' ) { 00284 //don't convert if already utf-8 00285 wfSuppressWarnings(); 00286 $content = iconv( $this->charset, 'UTF-8//IGNORE', $content ); 00287 wfRestoreWarnings(); 00288 } 00289 00290 $ok = xml_parse( $this->xmlParser, $content, $allOfIt ); 00291 if ( !$ok ) { 00292 $error = xml_error_string( xml_get_error_code( $this->xmlParser ) ); 00293 $where = 'line: ' . xml_get_current_line_number( $this->xmlParser ) 00294 . ' column: ' . xml_get_current_column_number( $this->xmlParser ) 00295 . ' byte offset: ' . xml_get_current_byte_index( $this->xmlParser ); 00296 00297 wfDebugLog( 'XMP', "XMPReader::parse : Error reading XMP content: $error ($where)" ); 00298 $this->results = array(); // blank if error. 00299 return false; 00300 } 00301 } catch ( MWException $e ) { 00302 wfDebugLog( 'XMP', 'XMP parse error: ' . $e ); 00303 $this->results = array(); 00304 return false; 00305 } 00306 return true; 00307 } 00308 00316 public function parseExtended( $content ) { 00317 // @todo FIXME: This is untested. Hard to find example files 00318 // or programs that make such files.. 00319 $guid = substr( $content, 0, 32 ); 00320 if ( !isset( $this->results['xmp-special']['HasExtendedXMP'] ) 00321 || $this->results['xmp-special']['HasExtendedXMP'] !== $guid ) { 00322 wfDebugLog( 'XMP', __METHOD__ . " Ignoring XMPExtended block due to wrong guid (guid= '$guid')" ); 00323 return false; 00324 } 00325 $len = unpack( 'Nlength/Noffset', substr( $content, 32, 8 ) ); 00326 00327 if ( !$len || $len['length'] < 4 || $len['offset'] < 0 || $len['offset'] > $len['length'] ) { 00328 wfDebugLog( 'XMP', __METHOD__ . 'Error reading extended XMP block, invalid length or offset.' ); 00329 return false; 00330 } 00331 00332 // we're not very robust here. we should accept it in the wrong order. To quote 00333 // the xmp standard: 00334 // "A JPEG writer should write the ExtendedXMP marker segments in order, immediately following the 00335 // StandardXMP. However, the JPEG standard does not require preservation of marker segment order. A 00336 // robust JPEG reader should tolerate the marker segments in any order." 00337 // 00338 // otoh the probability that an image will have more than 128k of metadata is rather low... 00339 // so the probability that it will have > 128k, and be in the wrong order is very low... 00340 00341 if ( $len['offset'] !== $this->extendedXMPOffset ) { 00342 wfDebugLog( 'XMP', __METHOD__ . 'Ignoring XMPExtended block due to wrong order. (Offset was ' 00343 . $len['offset'] . ' but expected ' . $this->extendedXMPOffset . ')' ); 00344 return false; 00345 } 00346 00347 if ( $len['offset'] === 0 ) { 00348 // if we're starting the extended block, we've probably already 00349 // done the XMPStandard block, so reset. 00350 $this->resetXMLParser(); 00351 } 00352 00353 $this->extendedXMPOffset += $len['length']; 00354 00355 $actualContent = substr( $content, 40 ); 00356 00357 if ( $this->extendedXMPOffset === strlen( $actualContent ) ) { 00358 $atEnd = true; 00359 } else { 00360 $atEnd = false; 00361 } 00362 00363 wfDebugLog( 'XMP', __METHOD__ . 'Parsing a XMPExtended block' ); 00364 return $this->parse( $actualContent, $atEnd ); 00365 } 00366 00383 function char( $parser, $data ) { 00384 00385 $data = trim( $data ); 00386 if ( trim( $data ) === "" ) { 00387 return; 00388 } 00389 00390 if ( !isset( $this->mode[0] ) ) { 00391 throw new MWException( 'Unexpected character data before first rdf:Description element' ); 00392 } 00393 00394 if ( $this->mode[0] === self::MODE_IGNORE ) return; 00395 00396 if ( $this->mode[0] !== self::MODE_SIMPLE 00397 && $this->mode[0] !== self::MODE_QDESC 00398 ) { 00399 throw new MWException( 'character data where not expected. (mode ' . $this->mode[0] . ')' ); 00400 } 00401 00402 // to check, how does this handle w.s. 00403 if ( $this->charContent === false ) { 00404 $this->charContent = $data; 00405 } else { 00406 $this->charContent .= $data; 00407 } 00408 00409 } 00410 00417 private function endElementModeIgnore ( $elm ) { 00418 if ( $this->curItem[0] === $elm ) { 00419 array_shift( $this->curItem ); 00420 array_shift( $this->mode ); 00421 } 00422 } 00423 00439 private function endElementModeSimple ( $elm ) { 00440 if ( $this->charContent !== false ) { 00441 if ( $this->processingArray ) { 00442 // if we're processing an array, use the original element 00443 // name instead of rdf:li. 00444 list( $ns, $tag ) = explode( ' ', $this->curItem[0], 2 ); 00445 } else { 00446 list( $ns, $tag ) = explode( ' ', $elm, 2 ); 00447 } 00448 $this->saveValue( $ns, $tag, $this->charContent ); 00449 00450 $this->charContent = false; // reset 00451 } 00452 array_shift( $this->curItem ); 00453 array_shift( $this->mode ); 00454 00455 } 00456 00475 private function endElementNested( $elm ) { 00476 00477 /* cur item must be the same as $elm, unless if in MODE_STRUCT 00478 in which case it could also be rdf:Description */ 00479 if ( $this->curItem[0] !== $elm 00480 && !( $elm === self::NS_RDF . ' Description' 00481 && $this->mode[0] === self::MODE_STRUCT ) 00482 ) { 00483 throw new MWException( "nesting mismatch. got a </$elm> but expected a </" . $this->curItem[0] . '>' ); 00484 } 00485 00486 // Validate structures. 00487 list( $ns, $tag ) = explode( ' ', $elm, 2 ); 00488 if ( isset( $this->items[$ns][$tag]['validate'] ) ) { 00489 00490 $info =& $this->items[$ns][$tag]; 00491 $finalName = isset( $info['map_name'] ) 00492 ? $info['map_name'] : $tag; 00493 00494 $validate = is_array( $info['validate'] ) ? $info['validate'] 00495 : array( 'XMPValidate', $info['validate'] ); 00496 00497 if ( !isset( $this->results['xmp-' . $info['map_group']][$finalName] ) ) { 00498 // This can happen if all the members of the struct failed validation. 00499 wfDebugLog( 'XMP', __METHOD__ . " <$ns:$tag> has no valid members." ); 00500 00501 } elseif ( is_callable( $validate ) ) { 00502 $val =& $this->results['xmp-' . $info['map_group']][$finalName]; 00503 call_user_func_array( $validate, array( $info, &$val, false ) ); 00504 if ( is_null( $val ) ) { 00505 // the idea being the validation function will unset the variable if 00506 // its invalid. 00507 wfDebugLog( 'XMP', __METHOD__ . " <$ns:$tag> failed validation." ); 00508 unset( $this->results['xmp-' . $info['map_group']][$finalName] ); 00509 } 00510 } else { 00511 wfDebugLog( 'XMP', __METHOD__ . " Validation function for $finalName (" 00512 . $validate[0] . '::' . $validate[1] . '()) is not callable.' ); 00513 } 00514 } 00515 00516 array_shift( $this->curItem ); 00517 array_shift( $this->mode ); 00518 $this->ancestorStruct = false; 00519 $this->processingArray = false; 00520 $this->itemLang = false; 00521 } 00522 00542 private function endElementModeLi( $elm ) { 00543 00544 list( $ns, $tag ) = explode( ' ', $this->curItem[0], 2 ); 00545 $info = $this->items[$ns][$tag]; 00546 $finalName = isset( $info['map_name'] ) 00547 ? $info['map_name'] : $tag; 00548 00549 array_shift( $this->mode ); 00550 00551 if ( !isset( $this->results['xmp-' . $info['map_group']][$finalName] ) ) { 00552 wfDebugLog( 'XMP', __METHOD__ . " Empty compund element $finalName." ); 00553 return; 00554 } 00555 00556 if ( $elm === self::NS_RDF . ' Seq' ) { 00557 $this->results['xmp-' . $info['map_group']][$finalName]['_type'] = 'ol'; 00558 } elseif ( $elm === self::NS_RDF . ' Bag' ) { 00559 $this->results['xmp-' . $info['map_group']][$finalName]['_type'] = 'ul'; 00560 } elseif ( $elm === self::NS_RDF . ' Alt' ) { 00561 // extra if needed as you could theoretically have a non-language alt. 00562 if ( $info['mode'] === self::MODE_LANG ) { 00563 $this->results['xmp-' . $info['map_group']][$finalName]['_type'] = 'lang'; 00564 } 00565 00566 } else { 00567 throw new MWException( __METHOD__ . " expected </rdf:seq> or </rdf:bag> but instead got $elm." ); 00568 } 00569 } 00570 00581 private function endElementModeQDesc( $elm ) { 00582 00583 if ( $elm === self::NS_RDF . ' value' ) { 00584 list( $ns, $tag ) = explode( ' ', $this->curItem[0], 2 ); 00585 $this->saveValue( $ns, $tag, $this->charContent ); 00586 return; 00587 } else { 00588 array_shift( $this->mode ); 00589 array_shift( $this->curItem ); 00590 } 00591 } 00592 00606 function endElement( $parser, $elm ) { 00607 if ( $elm === ( self::NS_RDF . ' RDF' ) 00608 || $elm === 'adobe:ns:meta/ xmpmeta' 00609 || $elm === 'adobe:ns:meta/ xapmeta' ) 00610 { 00611 // ignore these. 00612 return; 00613 } 00614 00615 if ( $elm === self::NS_RDF . ' type' ) { 00616 // these aren't really supported properly yet. 00617 // However, it appears they almost never used. 00618 wfDebugLog( 'XMP', __METHOD__ . ' encountered <rdf:type>' ); 00619 } 00620 00621 if ( strpos( $elm, ' ' ) === false ) { 00622 // This probably shouldn't happen. 00623 // However, there is a bug in an adobe product 00624 // that forgets the namespace on some things. 00625 // (Luckily they are unimportant things). 00626 wfDebugLog( 'XMP', __METHOD__ . " Encountered </$elm> which has no namespace. Skipping." ); 00627 return; 00628 } 00629 00630 if ( count( $this->mode[0] ) === 0 ) { 00631 // This should never ever happen and means 00632 // there is a pretty major bug in this class. 00633 throw new MWException( 'Encountered end element with no mode' ); 00634 } 00635 00636 if ( count( $this->curItem ) == 0 && $this->mode[0] !== self::MODE_INITIAL ) { 00637 // just to be paranoid. Should always have a curItem, except for initially 00638 // (aka during MODE_INITAL). 00639 throw new MWException( "Hit end element </$elm> but no curItem" ); 00640 } 00641 00642 switch( $this->mode[0] ) { 00643 case self::MODE_IGNORE: 00644 $this->endElementModeIgnore( $elm ); 00645 break; 00646 case self::MODE_SIMPLE: 00647 $this->endElementModeSimple( $elm ); 00648 break; 00649 case self::MODE_STRUCT: 00650 case self::MODE_SEQ: 00651 case self::MODE_BAG: 00652 case self::MODE_LANG: 00653 case self::MODE_BAGSTRUCT: 00654 $this->endElementNested( $elm ); 00655 break; 00656 case self::MODE_INITIAL: 00657 if ( $elm === self::NS_RDF . ' Description' ) { 00658 array_shift( $this->mode ); 00659 } else { 00660 throw new MWException( 'Element ended unexpectedly while in MODE_INITIAL' ); 00661 } 00662 break; 00663 case self::MODE_LI: 00664 case self::MODE_LI_LANG: 00665 $this->endElementModeLi( $elm ); 00666 break; 00667 case self::MODE_QDESC: 00668 $this->endElementModeQDesc( $elm ); 00669 break; 00670 default: 00671 wfDebugLog( 'XMP', __METHOD__ . " no mode (elm = $elm)" ); 00672 break; 00673 } 00674 } 00675 00687 private function startElementModeIgnore( $elm ) { 00688 if ( $elm === $this->curItem[0] ) { 00689 array_unshift( $this->curItem, $elm ); 00690 array_unshift( $this->mode, self::MODE_IGNORE ); 00691 } 00692 } 00693 00701 private function startElementModeBag( $elm ) { 00702 if ( $elm === self::NS_RDF . ' Bag' ) { 00703 array_unshift( $this->mode, self::MODE_LI ); 00704 } else { 00705 throw new MWException( "Expected <rdf:Bag> but got $elm." ); 00706 } 00707 00708 } 00709 00717 private function startElementModeSeq( $elm ) { 00718 if ( $elm === self::NS_RDF . ' Seq' ) { 00719 array_unshift( $this->mode, self::MODE_LI ); 00720 } elseif ( $elm === self::NS_RDF . ' Bag' ) { 00721 # bug 27105 00722 wfDebugLog( 'XMP', __METHOD__ . ' Expected an rdf:Seq, but got an rdf:Bag. Pretending' 00723 . ' it is a Seq, since some buggy software is known to screw this up.' ); 00724 array_unshift( $this->mode, self::MODE_LI ); 00725 } else { 00726 throw new MWException( "Expected <rdf:Seq> but got $elm." ); 00727 } 00728 00729 } 00730 00745 private function startElementModeLang( $elm ) { 00746 if ( $elm === self::NS_RDF . ' Alt' ) { 00747 array_unshift( $this->mode, self::MODE_LI_LANG ); 00748 } else { 00749 throw new MWException( "Expected <rdf:Seq> but got $elm." ); 00750 } 00751 00752 } 00753 00772 private function startElementModeSimple( $elm, $attribs ) { 00773 if ( $elm === self::NS_RDF . ' Description' ) { 00774 // If this value has qualifiers 00775 array_unshift( $this->mode, self::MODE_QDESC ); 00776 array_unshift( $this->curItem, $this->curItem[0] ); 00777 00778 if ( isset( $attribs[self::NS_RDF . ' value'] ) ) { 00779 list( $ns, $tag ) = explode( ' ', $this->curItem[0], 2 ); 00780 $this->saveValue( $ns, $tag, $attribs[self::NS_RDF . ' value'] ); 00781 } 00782 } elseif ( $elm === self::NS_RDF . ' value' ) { 00783 // This should not be here. 00784 throw new MWException( __METHOD__ . ' Encountered <rdf:value> where it was unexpected.' ); 00785 00786 } else { 00787 // something else we don't recognize, like a qualifier maybe. 00788 wfDebugLog( 'XMP', __METHOD__ . " Encountered element <$elm> where only expecting character data as value of " . $this->curItem[0] ); 00789 array_unshift( $this->mode, self::MODE_IGNORE ); 00790 array_unshift( $this->curItem, $elm ); 00791 00792 } 00793 00794 } 00795 00810 private function startElementModeQDesc( $elm ) { 00811 if ( $elm === self::NS_RDF . ' value' ) { 00812 return; // do nothing 00813 } else { 00814 // otherwise its a qualifier, which we ignore 00815 array_unshift( $this->mode, self::MODE_IGNORE ); 00816 array_unshift( $this->curItem, $elm ); 00817 } 00818 } 00819 00832 private function startElementModeInitial( $ns, $tag, $attribs ) { 00833 if ( $ns !== self::NS_RDF ) { 00834 00835 if ( isset( $this->items[$ns][$tag] ) ) { 00836 if ( isset( $this->items[$ns][$tag]['structPart'] ) ) { 00837 // If this element is supposed to appear only as 00838 // a child of a structure, but appears here (not as 00839 // a child of a struct), then something weird is 00840 // happening, so ignore this element and its children. 00841 00842 wfDebugLog( 'XMP', "Encountered <$ns:$tag> outside" 00843 . " of its expected parent. Ignoring." ); 00844 00845 array_unshift( $this->mode, self::MODE_IGNORE ); 00846 array_unshift( $this->curItem, $ns . ' ' . $tag ); 00847 return; 00848 } 00849 $mode = $this->items[$ns][$tag]['mode']; 00850 array_unshift( $this->mode, $mode ); 00851 array_unshift( $this->curItem, $ns . ' ' . $tag ); 00852 if ( $mode === self::MODE_STRUCT ) { 00853 $this->ancestorStruct = isset( $this->items[$ns][$tag]['map_name'] ) 00854 ? $this->items[$ns][$tag]['map_name'] : $tag; 00855 } 00856 if ( $this->charContent !== false ) { 00857 // Something weird. 00858 // Should not happen in valid XMP. 00859 throw new MWException( 'tag nested in non-whitespace characters.' ); 00860 } 00861 } else { 00862 // This element is not on our list of allowed elements so ignore. 00863 wfDebugLog( 'XMP', __METHOD__ . " Ignoring unrecognized element <$ns:$tag>." ); 00864 array_unshift( $this->mode, self::MODE_IGNORE ); 00865 array_unshift( $this->curItem, $ns . ' ' . $tag ); 00866 return; 00867 } 00868 00869 } 00870 // process attributes 00871 $this->doAttribs( $attribs ); 00872 } 00873 00893 private function startElementModeStruct( $ns, $tag, $attribs ) { 00894 if ( $ns !== self::NS_RDF ) { 00895 00896 if ( isset( $this->items[$ns][$tag] ) ) { 00897 if ( isset( $this->items[$ns][$this->ancestorStruct]['children'] ) 00898 && !isset( $this->items[$ns][$this->ancestorStruct]['children'][$tag] ) ) 00899 { 00900 // This assumes that we don't have inter-namespace nesting 00901 // which we don't in all the properties we're interested in. 00902 throw new MWException( " <$tag> appeared nested in <" . $this->ancestorStruct 00903 . "> where it is not allowed." ); 00904 } 00905 array_unshift( $this->mode, $this->items[$ns][$tag]['mode'] ); 00906 array_unshift( $this->curItem, $ns . ' ' . $tag ); 00907 if ( $this->charContent !== false ) { 00908 // Something weird. 00909 // Should not happen in valid XMP. 00910 throw new MWException( "tag <$tag> nested in non-whitespace characters (" . $this->charContent . ")." ); 00911 } 00912 } else { 00913 array_unshift( $this->mode, self::MODE_IGNORE ); 00914 array_unshift( $this->curItem, $elm ); 00915 return; 00916 } 00917 00918 } 00919 00920 if ( $ns === self::NS_RDF && $tag === 'Description' ) { 00921 $this->doAttribs( $attribs ); 00922 array_unshift( $this->mode, self::MODE_STRUCT ); 00923 array_unshift( $this->curItem, $this->curItem[0] ); 00924 } 00925 } 00926 00940 private function startElementModeLi( $elm, $attribs ) { 00941 if ( ( $elm ) !== self::NS_RDF . ' li' ) { 00942 throw new MWException( "<rdf:li> expected but got $elm." ); 00943 } 00944 00945 if ( !isset( $this->mode[1] ) ) { 00946 // This should never ever ever happen. Checking for it 00947 // to be paranoid. 00948 throw new MWException( 'In mode Li, but no 2xPrevious mode!' ); 00949 } 00950 00951 if ( $this->mode[1] === self::MODE_BAGSTRUCT ) { 00952 // This list item contains a compound (STRUCT) value. 00953 array_unshift( $this->mode, self::MODE_STRUCT ); 00954 array_unshift( $this->curItem, $elm ); 00955 $this->processingArray = true; 00956 00957 if ( !isset( $this->curItem[1] ) ) { 00958 // be paranoid. 00959 throw new MWException( 'Can not find parent of BAGSTRUCT.' ); 00960 } 00961 list( $curNS, $curTag ) = explode( ' ', $this->curItem[1] ); 00962 $this->ancestorStruct = isset( $this->items[$curNS][$curTag]['map_name'] ) 00963 ? $this->items[$curNS][$curTag]['map_name'] : $curTag; 00964 00965 $this->doAttribs( $attribs ); 00966 00967 } else { 00968 // Normal BAG or SEQ containing simple values. 00969 array_unshift( $this->mode, self::MODE_SIMPLE ); 00970 // need to add curItem[0] on again since one is for the specific item 00971 // and one is for the entire group. 00972 array_unshift( $this->curItem, $this->curItem[0] ); 00973 $this->processingArray = true; 00974 } 00975 00976 } 00977 00992 private function startElementModeLiLang( $elm, $attribs ) { 00993 if ( $elm !== self::NS_RDF . ' li' ) { 00994 throw new MWException( __METHOD__ . " <rdf:li> expected but got $elm." ); 00995 } 00996 if ( !isset( $attribs[ self::NS_XML . ' lang'] ) 00997 || !preg_match( '/^[-A-Za-z0-9]{2,}$/D', $attribs[ self::NS_XML . ' lang' ] ) ) 00998 { 00999 throw new MWException( __METHOD__ 01000 . " <rdf:li> did not contain, or has invalid xml:lang attribute in lang alternative" ); 01001 } 01002 01003 // Lang is case-insensitive. 01004 $this->itemLang = strtolower( $attribs[ self::NS_XML . ' lang' ] ); 01005 01006 // need to add curItem[0] on again since one is for the specific item 01007 // and one is for the entire group. 01008 array_unshift( $this->curItem, $this->curItem[0] ); 01009 array_unshift( $this->mode, self::MODE_SIMPLE ); 01010 $this->processingArray = true; 01011 } 01012 01023 function startElement( $parser, $elm, $attribs ) { 01024 01025 if ( $elm === self::NS_RDF . ' RDF' 01026 || $elm === 'adobe:ns:meta/ xmpmeta' 01027 || $elm === 'adobe:ns:meta/ xapmeta' ) 01028 { 01029 /* ignore. */ 01030 return; 01031 } elseif ( $elm === self::NS_RDF . ' Description' ) { 01032 if ( count( $this->mode ) === 0 ) { 01033 // outer rdf:desc 01034 array_unshift( $this->mode, self::MODE_INITIAL ); 01035 } 01036 } elseif ( $elm === self::NS_RDF . ' type' ) { 01037 // This doesn't support rdf:type properly. 01038 // In practise I have yet to see a file that 01039 // uses this element, however it is mentioned 01040 // on page 25 of part 1 of the xmp standard. 01041 // 01042 // also it seems as if exiv2 and exiftool do not support 01043 // this either (That or I misunderstand the standard) 01044 wfDebugLog( 'XMP', __METHOD__ . ' Encountered <rdf:type> which isn\'t currently supported' ); 01045 } 01046 01047 if ( strpos( $elm, ' ' ) === false ) { 01048 // This probably shouldn't happen. 01049 wfDebugLog( 'XMP', __METHOD__ . " Encountered <$elm> which has no namespace. Skipping." ); 01050 return; 01051 } 01052 01053 list( $ns, $tag ) = explode( ' ', $elm, 2 ); 01054 01055 if ( count( $this->mode ) === 0 ) { 01056 // This should not happen. 01057 throw new MWException( 'Error extracting XMP, ' 01058 . "encountered <$elm> with no mode" ); 01059 } 01060 01061 switch( $this->mode[0] ) { 01062 case self::MODE_IGNORE: 01063 $this->startElementModeIgnore( $elm ); 01064 break; 01065 case self::MODE_SIMPLE: 01066 $this->startElementModeSimple( $elm, $attribs ); 01067 break; 01068 case self::MODE_INITIAL: 01069 $this->startElementModeInitial( $ns, $tag, $attribs ); 01070 break; 01071 case self::MODE_STRUCT: 01072 $this->startElementModeStruct( $ns, $tag, $attribs ); 01073 break; 01074 case self::MODE_BAG: 01075 case self::MODE_BAGSTRUCT: 01076 $this->startElementModeBag( $elm ); 01077 break; 01078 case self::MODE_SEQ: 01079 $this->startElementModeSeq( $elm ); 01080 break; 01081 case self::MODE_LANG: 01082 $this->startElementModeLang( $elm ); 01083 break; 01084 case self::MODE_LI_LANG: 01085 $this->startElementModeLiLang( $elm, $attribs ); 01086 break; 01087 case self::MODE_LI: 01088 $this->startElementModeLi( $elm, $attribs ); 01089 break; 01090 case self::MODE_QDESC: 01091 $this->startElementModeQDesc( $elm ); 01092 break; 01093 default: 01094 throw new MWException( 'StartElement in unknown mode: ' . $this->mode[0] ); 01095 } 01096 } 01097 01113 private function doAttribs( $attribs ) { 01114 01115 // first check for rdf:parseType attribute, as that can change 01116 // how the attributes are interperted. 01117 01118 if ( isset( $attribs[self::NS_RDF . ' parseType'] ) 01119 && $attribs[self::NS_RDF . ' parseType'] === 'Resource' 01120 && $this->mode[0] === self::MODE_SIMPLE ) 01121 { 01122 // this is equivalent to having an inner rdf:Description 01123 $this->mode[0] = self::MODE_QDESC; 01124 } 01125 foreach ( $attribs as $name => $val ) { 01126 if ( strpos( $name, ' ' ) === false ) { 01127 // This shouldn't happen, but so far some old software forgets namespace 01128 // on rdf:about. 01129 wfDebugLog( 'XMP', __METHOD__ . ' Encountered non-namespaced attribute: ' 01130 . " $name=\"$val\". Skipping. " ); 01131 continue; 01132 } 01133 list( $ns, $tag ) = explode( ' ', $name, 2 ); 01134 if ( $ns === self::NS_RDF ) { 01135 if ( $tag === 'value' || $tag === 'resource' ) { 01136 // resource is for url. 01137 // value attribute is a weird way of just putting the contents. 01138 $this->char( $this->xmlParser, $val ); 01139 } 01140 } elseif ( isset( $this->items[$ns][$tag] ) ) { 01141 if ( $this->mode[0] === self::MODE_SIMPLE ) { 01142 throw new MWException( __METHOD__ 01143 . " $ns:$tag found as attribute where not allowed" ); 01144 } 01145 $this->saveValue( $ns, $tag, $val ); 01146 } else { 01147 wfDebugLog( 'XMP', __METHOD__ . " Ignoring unrecognized element <$ns:$tag>." ); 01148 } 01149 } 01150 } 01151 01163 private function saveValue( $ns, $tag, $val ) { 01164 01165 $info =& $this->items[$ns][$tag]; 01166 $finalName = isset( $info['map_name'] ) 01167 ? $info['map_name'] : $tag; 01168 if ( isset( $info['validate'] ) ) { 01169 $validate = is_array( $info['validate'] ) ? $info['validate'] 01170 : array( 'XMPValidate', $info['validate'] ); 01171 01172 if ( is_callable( $validate ) ) { 01173 call_user_func_array( $validate, array( $info, &$val, true ) ); 01174 // the reasoning behind using &$val instead of using the return value 01175 // is to be consistent between here and validating structures. 01176 if ( is_null( $val ) ) { 01177 wfDebugLog( 'XMP', __METHOD__ . " <$ns:$tag> failed validation." ); 01178 return; 01179 } 01180 } else { 01181 wfDebugLog( 'XMP', __METHOD__ . " Validation function for $finalName (" 01182 . $validate[0] . '::' . $validate[1] . '()) is not callable.' ); 01183 } 01184 } 01185 01186 if ( $this->ancestorStruct && $this->processingArray ) { 01187 // Aka both an array and a struct. ( self::MODE_BAGSTRUCT ) 01188 $this->results['xmp-' . $info['map_group']][$this->ancestorStruct][][$finalName] = $val; 01189 } elseif ( $this->ancestorStruct ) { 01190 $this->results['xmp-' . $info['map_group']][$this->ancestorStruct][$finalName] = $val; 01191 } elseif ( $this->processingArray ) { 01192 if ( $this->itemLang === false ) { 01193 // normal array 01194 $this->results['xmp-' . $info['map_group']][$finalName][] = $val; 01195 } else { 01196 // lang array. 01197 $this->results['xmp-' . $info['map_group']][$finalName][$this->itemLang] = $val; 01198 } 01199 } else { 01200 $this->results['xmp-' . $info['map_group']][$finalName] = $val; 01201 } 01202 } 01203 }