MediaWiki  REL1_19
XMP.php
Go to the documentation of this file.
00001 <?php
00027 class XMPReader {
00028 
00029         private $curItem = array();        // array to hold the current element (and previous element, and so on)
00030         private $ancestorStruct = false;   // the structure name when processing nested structures.
00031         private $charContent = false;      // temporary holder for character data that appears in xmp doc.
00032         private $mode = array();           // stores the state the xmpreader is in (see MODE_FOO constants)
00033         private $results = array();        // array to hold results
00034         private $processingArray = false;  // if we're doing a seq or bag.
00035         private $itemLang = false;         // used for lang alts only
00036 
00037         private $xmlParser;
00038         private $charset = false;
00039         private $extendedXMPOffset = 0;
00040 
00041         protected $items;
00042 
00052         const MODE_INITIAL = 0;
00053         const MODE_IGNORE  = 1;
00054         const MODE_LI      = 2;
00055         const MODE_LI_LANG = 3;
00056         const MODE_QDESC   = 4;
00057 
00058         // The following MODE constants are also used in the
00059         // $items array to denote what type of property the item is.
00060         const MODE_SIMPLE    = 10;
00061         const MODE_STRUCT    = 11; // structure (associative array)
00062         const MODE_SEQ       = 12; // ordered list
00063         const MODE_BAG       = 13; // unordered list
00064         const MODE_LANG      = 14;
00065         const MODE_ALT       = 15; // non-language alt. Currently not implemented, and not needed atm.
00066         const MODE_BAGSTRUCT = 16; // A BAG of Structs.
00067 
00068         const NS_RDF = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#';
00069         const NS_XML = 'http://www.w3.org/XML/1998/namespace';
00070 
00071 
00077         function __construct() {
00078 
00079                 if ( !function_exists( 'xml_parser_create_ns' ) ) {
00080                         // this should already be checked by this point
00081                         throw new MWException( 'XMP support requires XML Parser' );
00082                 }
00083 
00084                 $this->items = XMPInfo::getItems();
00085 
00086                 $this->resetXMLParser();
00087 
00088         }
00093         private function resetXMLParser() {
00094 
00095                 if ($this->xmlParser) {
00096                         //is this needed?
00097                         xml_parser_free( $this->xmlParser );
00098                 }
00099 
00100                 $this->xmlParser = xml_parser_create_ns( 'UTF-8', ' ' );
00101                 xml_parser_set_option( $this->xmlParser, XML_OPTION_CASE_FOLDING, 0 );
00102                 xml_parser_set_option( $this->xmlParser, XML_OPTION_SKIP_WHITE, 1 );
00103 
00104                 xml_set_element_handler( $this->xmlParser,
00105                         array( $this, 'startElement' ),
00106                         array( $this, 'endElement' ) );
00107 
00108                 xml_set_character_data_handler( $this->xmlParser, array( $this, 'char' ) );
00109         }
00110 
00115         function __destruct() {
00116                 // not sure if this is needed.
00117                 xml_parser_free( $this->xmlParser );
00118         }
00119 
00126         public function getResults() {
00127                 // xmp-special is for metadata that affects how stuff
00128                 // is extracted. For example xmpNote:HasExtendedXMP.
00129 
00130                 // It is also used to handle photoshop:AuthorsPosition
00131                 // which is weird and really part of another property,
00132                 // see 2:85 in IPTC. See also pg 21 of IPTC4XMP standard.
00133                 // The location fields also use it.
00134 
00135                 $data = $this->results;
00136 
00137                 wfRunHooks('XMPGetResults', Array(&$data));
00138 
00139                 if ( isset( $data['xmp-special']['AuthorsPosition'] )
00140                         && is_string( $data['xmp-special']['AuthorsPosition'] )
00141                         && isset( $data['xmp-general']['Artist'][0] )
00142                 ) {
00143                         // Note, if there is more than one creator,
00144                         // this only applies to first. This also will
00145                         // only apply to the dc:Creator prop, not the
00146                         // exif:Artist prop.
00147 
00148                         $data['xmp-general']['Artist'][0] =
00149                                 $data['xmp-special']['AuthorsPosition'] . ', '
00150                                 . $data['xmp-general']['Artist'][0];
00151                 }
00152 
00153                 // Go through the LocationShown and LocationCreated
00154                 // changing it to the non-hierarchal form used by
00155                 // the other location fields.
00156 
00157                 if ( isset( $data['xmp-special']['LocationShown'][0] )
00158                         && is_array( $data['xmp-special']['LocationShown'][0] )
00159                 ) {
00160                         // the is_array is just paranoia. It should always
00161                         // be an array.
00162                         foreach( $data['xmp-special']['LocationShown'] as $loc ) {
00163                                 if ( !is_array( $loc ) ) {
00164                                         // To avoid copying over the _type meta-fields.
00165                                         continue;
00166                                 }
00167                                 foreach( $loc as $field => $val ) {
00168                                         $data['xmp-general'][$field . 'Dest'][] = $val;
00169                                 }
00170                         }
00171                 }
00172                 if ( isset( $data['xmp-special']['LocationCreated'][0] )
00173                         && is_array( $data['xmp-special']['LocationCreated'][0] )
00174                 ) {
00175                         // the is_array is just paranoia. It should always
00176                         // be an array.
00177                         foreach( $data['xmp-special']['LocationCreated'] as $loc ) {
00178                                 if ( !is_array( $loc ) ) {
00179                                         // To avoid copying over the _type meta-fields.
00180                                         continue;
00181                                 }
00182                                 foreach(  $loc as $field => $val ) {
00183                                         $data['xmp-general'][$field . 'Created'][] = $val;
00184                                 }
00185                         }
00186                 }
00187 
00188 
00189                 // We don't want to return the special values, since they're
00190                 // special and not info to be stored about the file.
00191                 unset( $data['xmp-special'] );
00192 
00193                 // Convert GPSAltitude to negative if below sea level.
00194                 if ( isset( $data['xmp-exif']['GPSAltitudeRef'] ) ) {
00195                         if ( $data['xmp-exif']['GPSAltitudeRef'] == '1'
00196                                 && isset( $data['xmp-exif']['GPSAltitude'] )
00197                         ) {
00198                                 $data['xmp-exif']['GPSAltitude'] *= -1;
00199                         }
00200                         unset( $data['xmp-exif']['GPSAltitudeRef'] );
00201                 }
00202 
00203                 return $data;
00204         }
00205 
00218         public function parse( $content, $allOfIt = true, $reset = false ) {
00219                 if ( $reset ) {
00220                         $this->resetXMLParser();
00221                 }
00222                 try {
00223 
00224                         // detect encoding by looking for BOM which is supposed to be in processing instruction.
00225                         // see page 12 of http://www.adobe.com/devnet/xmp/pdfs/XMPSpecificationPart3.pdf
00226                         if ( !$this->charset ) {
00227                                 $bom = array();
00228                                 if ( preg_match( '/\xEF\xBB\xBF|\xFE\xFF|\x00\x00\xFE\xFF|\xFF\xFE\x00\x00|\xFF\xFE/',
00229                                          $content, $bom )
00230                                 ) {
00231                                         switch ( $bom[0] ) {
00232                                                 case "\xFE\xFF":
00233                                                         $this->charset = 'UTF-16BE';
00234                                                         break;
00235                                                 case "\xFF\xFE":
00236                                                         $this->charset = 'UTF-16LE';
00237                                                         break;
00238                                                 case "\x00\x00\xFE\xFF":
00239                                                         $this->charset = 'UTF-32BE';
00240                                                         break;
00241                                                 case "\xFF\xFE\x00\x00":
00242                                                         $this->charset = 'UTF-32LE';
00243                                                         break;
00244                                                 case "\xEF\xBB\xBF":
00245                                                         $this->charset = 'UTF-8';
00246                                                         break;
00247                                                 default:
00248                                                         //this should be impossible to get to
00249                                                         throw new MWException("Invalid BOM");
00250                                                         break;
00251 
00252                                         }
00253 
00254                                 } else {
00255                                         // standard specifically says, if no bom assume utf-8
00256                                         $this->charset = 'UTF-8';
00257                                 }
00258                         }
00259                         if ( $this->charset !== 'UTF-8' ) {
00260                                 //don't convert if already utf-8
00261                                 wfSuppressWarnings();
00262                                 $content = iconv( $this->charset, 'UTF-8//IGNORE', $content );
00263                                 wfRestoreWarnings();
00264                         }
00265 
00266                         $ok = xml_parse( $this->xmlParser, $content, $allOfIt );
00267                         if ( !$ok ) {
00268                                 $error = xml_error_string( xml_get_error_code( $this->xmlParser ) );
00269                                 $where = 'line: ' . xml_get_current_line_number( $this->xmlParser )
00270                                         . ' column: ' . xml_get_current_column_number( $this->xmlParser )
00271                                         . ' byte offset: ' . xml_get_current_byte_index( $this->xmlParser );
00272 
00273                                 wfDebugLog( 'XMP', "XMPReader::parse : Error reading XMP content: $error ($where)" );
00274                                 $this->results = array(); // blank if error.
00275                                 return false;
00276                         }
00277                 } catch ( MWException $e ) {
00278                         wfDebugLog( 'XMP', 'XMP parse error: ' . $e );
00279                         $this->results = array();
00280                         return false;
00281                 }
00282                 return true;
00283         }
00284 
00292         public function parseExtended( $content ) {
00293                 // @todo FIXME: This is untested. Hard to find example files
00294                 // or programs that make such files..
00295                 $guid = substr( $content, 0, 32 );
00296                 if ( !isset( $this->results['xmp-special']['HasExtendedXMP'] )
00297                         || $this->results['xmp-special']['HasExtendedXMP'] !== $guid ) {
00298                         wfDebugLog('XMP', __METHOD__ . " Ignoring XMPExtended block due to wrong guid (guid= '$guid' )");
00299                         return false;
00300                 }
00301                 $len  = unpack( 'Nlength/Noffset', substr( $content, 32, 8 ) );
00302 
00303                 if (!$len || $len['length'] < 4 || $len['offset'] < 0 || $len['offset'] > $len['length'] ) {
00304                         wfDebugLog('XMP', __METHOD__ . 'Error reading extended XMP block, invalid length or offset.');
00305                         return false;
00306                 }
00307 
00308 
00309                 // we're not very robust here. we should accept it in the wrong order. To quote
00310                 // the xmp standard:
00311                 // "A JPEG writer should write the ExtendedXMP marker segments in order, immediately following the
00312                 // StandardXMP. However, the JPEG standard does not require preservation of marker segment order. A
00313                 // robust JPEG reader should tolerate the marker segments in any order."
00314                 //
00315                 // otoh the probability that an image will have more than 128k of metadata is rather low...
00316                 // so the probability that it will have > 128k, and be in the wrong order is very low...
00317 
00318                 if ( $len['offset'] !== $this->extendedXMPOffset ) {
00319                         wfDebugLog('XMP', __METHOD__ . 'Ignoring XMPExtended block due to wrong order. (Offset was '
00320                                 . $len['offset'] . ' but expected ' . $this->extendedXMPOffset . ')');
00321                         return false;
00322                 }
00323 
00324                 if ( $len['offset'] === 0 ) {
00325                         // if we're starting the extended block, we've probably already
00326                         // done the XMPStandard block, so reset.
00327                         $this->resetXMLParser();
00328                 }
00329 
00330                 $this->extendedXMPOffset += $len['length'];
00331 
00332                 $actualContent = substr( $content, 40 );
00333 
00334                 if ( $this->extendedXMPOffset === strlen( $actualContent ) ) {
00335                         $atEnd = true;
00336                 } else {
00337                         $atEnd = false;
00338                 }
00339 
00340                 wfDebugLog('XMP', __METHOD__ . 'Parsing a XMPExtended block');
00341                 return $this->parse( $actualContent, $atEnd );
00342         }
00343 
00360         function char( $parser, $data ) {
00361 
00362                 $data = trim( $data );
00363                 if ( trim( $data ) === "" ) {
00364                         return;
00365                 }
00366 
00367                 if ( !isset( $this->mode[0] ) ) {
00368                         throw new MWException( 'Unexpected character data before first rdf:Description element' );
00369                 }
00370 
00371                 if ( $this->mode[0] === self::MODE_IGNORE ) return;
00372 
00373                 if ( $this->mode[0] !== self::MODE_SIMPLE
00374                         && $this->mode[0] !== self::MODE_QDESC
00375                 ) {
00376                         throw new MWException( 'character data where not expected. (mode ' . $this->mode[0] . ')' );
00377                 }
00378 
00379                 // to check, how does this handle w.s.
00380                 if ( $this->charContent === false ) {
00381                         $this->charContent = $data;
00382                 } else {
00383                         $this->charContent .= $data;
00384                 }
00385 
00386         }
00387 
00394         private function endElementModeIgnore ( $elm ) {
00395 
00396                 if ( $this->curItem[0] === $elm ) {
00397                         array_shift( $this->curItem );
00398                         array_shift( $this->mode );
00399                 }
00400                 return;
00401 
00402         }
00403 
00419         private function endElementModeSimple ( $elm ) {
00420                 if ( $this->charContent !== false ) {
00421                         if ( $this->processingArray ) {
00422                                 // if we're processing an array, use the original element
00423                                 // name instead of rdf:li.
00424                                 list( $ns, $tag ) = explode( ' ', $this->curItem[0], 2 );
00425                         } else {
00426                                 list( $ns, $tag ) = explode( ' ', $elm, 2 );
00427                         }
00428                         $this->saveValue( $ns, $tag, $this->charContent );
00429 
00430                         $this->charContent = false; // reset
00431                 }
00432                 array_shift( $this->curItem );
00433                 array_shift( $this->mode );
00434 
00435         }
00436 
00452         private function endElementNested( $elm ) {
00453 
00454                 /* cur item must be the same as $elm, unless if in MODE_STRUCT
00455                    in which case it could also be rdf:Description */
00456                 if ( $this->curItem[0] !== $elm
00457                         && !( $elm === self::NS_RDF . ' Description'
00458                                 && $this->mode[0] === self::MODE_STRUCT )
00459                  ) {
00460                         throw new MWException( "nesting mismatch. got a </$elm> but expected a </" . $this->curItem[0] . '>' );
00461                 }
00462 
00463                 // Validate structures.
00464                 list( $ns, $tag ) = explode( ' ', $elm, 2 );
00465                 if ( isset( $this->items[$ns][$tag]['validate'] ) ) {
00466 
00467                         $info =& $this->items[$ns][$tag];
00468                         $finalName = isset( $info['map_name'] )
00469                                 ? $info['map_name'] : $tag;
00470 
00471                         $validate = is_array( $info['validate'] ) ? $info['validate']
00472                                 : array( 'XMPValidate', $info['validate'] );
00473 
00474                         if ( !isset( $this->results['xmp-' . $info['map_group']][$finalName] ) ) {
00475                                 // This can happen if all the members of the struct failed validation.
00476                                 wfDebugLog( 'XMP', __METHOD__ . " <$ns:$tag> has no valid members." );
00477 
00478                         } elseif ( is_callable( $validate ) ) {
00479                                 $val =& $this->results['xmp-' . $info['map_group']][$finalName];
00480                                 call_user_func_array( $validate, array( $info, &$val, false ) );
00481                                 if ( is_null( $val ) ) {
00482                                         // the idea being the validation function will unset the variable if
00483                                         // its invalid.
00484                                         wfDebugLog( 'XMP', __METHOD__ . " <$ns:$tag> failed validation." );
00485                                         unset( $this->results['xmp-' . $info['map_group']][$finalName] );
00486                                 }
00487                         } else {
00488                                 wfDebugLog( 'XMP', __METHOD__ . " Validation function for $finalName ("
00489                                         . $validate[0] . '::' . $validate[1] . '()) is not callable.' );
00490                         }
00491                 }
00492 
00493                 array_shift( $this->curItem );
00494                 array_shift( $this->mode );
00495                 $this->ancestorStruct = false;
00496                 $this->processingArray = false;
00497                 $this->itemLang = false;
00498         }
00499 
00516         private function endElementModeLi( $elm ) {
00517 
00518                 list( $ns, $tag ) = explode( ' ', $this->curItem[0], 2 );
00519                 $info = $this->items[$ns][$tag];
00520                 $finalName = isset( $info['map_name'] )
00521                         ? $info['map_name'] : $tag;
00522 
00523                 array_shift( $this->mode );
00524 
00525                 if ( !isset( $this->results['xmp-' . $info['map_group']][$finalName] ) ) {
00526                         wfDebugLog( 'XMP', __METHOD__ . " Empty compund element $finalName." );
00527                         return;
00528                 }
00529 
00530                 if ( $elm === self::NS_RDF . ' Seq' ) {
00531                         $this->results['xmp-' . $info['map_group']][$finalName]['_type'] = 'ol';
00532                 } elseif ( $elm === self::NS_RDF . ' Bag' ) {
00533                         $this->results['xmp-' . $info['map_group']][$finalName]['_type'] = 'ul';
00534                 } elseif ( $elm === self::NS_RDF . ' Alt' ) {
00535                         // extra if needed as you could theoretically have a non-language alt.
00536                         if ( $info['mode'] === self::MODE_LANG ) {
00537                                 $this->results['xmp-' . $info['map_group']][$finalName]['_type'] = 'lang';
00538                         }
00539 
00540                 } else {
00541                         throw new MWException( __METHOD__ . " expected </rdf:seq> or </rdf:bag> but instead got $elm." );
00542                 }
00543         }
00544 
00555         private function endElementModeQDesc( $elm ) {
00556 
00557                 if ( $elm === self::NS_RDF . ' value' ) {
00558                         list( $ns, $tag ) = explode( ' ', $this->curItem[0], 2 );
00559                         $this->saveValue( $ns, $tag, $this->charContent );
00560                         return;
00561                 } else {
00562                         array_shift( $this->mode );
00563                         array_shift( $this->curItem );
00564                 }
00565 
00566 
00567         }
00568 
00581         function endElement( $parser, $elm ) {
00582                 if ( $elm === ( self::NS_RDF . ' RDF' )
00583                         || $elm === 'adobe:ns:meta/ xmpmeta'
00584                         || $elm === 'adobe:ns:meta/ xapmeta' )
00585                 {
00586                         // ignore these.
00587                         return;
00588                 }
00589 
00590                 if ( $elm === self::NS_RDF . ' type' ) {
00591                         // these aren't really supported properly yet.
00592                         // However, it appears they almost never used.
00593                         wfDebugLog( 'XMP', __METHOD__ . ' encountered <rdf:type>' );
00594                 }
00595 
00596                 if ( strpos( $elm, ' ' ) === false ) {
00597                         // This probably shouldn't happen.
00598                         // However, there is a bug in an adobe product
00599                         // that forgets the namespace on some things.
00600                         // (Luckily they are unimportant things).
00601                         wfDebugLog( 'XMP', __METHOD__ . " Encountered </$elm> which has no namespace. Skipping." );
00602                         return;
00603                 }
00604 
00605                 if ( count( $this->mode[0] ) === 0 ) {
00606                         // This should never ever happen and means
00607                         // there is a pretty major bug in this class.
00608                         throw new MWException( 'Encountered end element with no mode' );
00609                 }
00610 
00611                 if ( count( $this->curItem ) == 0 && $this->mode[0] !== self::MODE_INITIAL ) {
00612                         // just to be paranoid. Should always have a curItem, except for initially
00613                         // (aka during MODE_INITAL).
00614                         throw new MWException( "Hit end element </$elm> but no curItem" );
00615                 }
00616 
00617                 switch( $this->mode[0] ) {
00618                         case self::MODE_IGNORE:
00619                                 $this->endElementModeIgnore( $elm );
00620                                 break;
00621                         case self::MODE_SIMPLE:
00622                                 $this->endElementModeSimple( $elm );
00623                                 break;
00624                         case self::MODE_STRUCT:
00625                         case self::MODE_SEQ:
00626                         case self::MODE_BAG:
00627                         case self::MODE_LANG:
00628                         case self::MODE_BAGSTRUCT:
00629                                 $this->endElementNested( $elm );
00630                                 break;
00631                         case self::MODE_INITIAL:
00632                                 if ( $elm === self::NS_RDF . ' Description' ) {
00633                                         array_shift( $this->mode );
00634                                 } else {
00635                                         throw new MWException( 'Element ended unexpectedly while in MODE_INITIAL' );
00636                                 }
00637                                 break;
00638                         case self::MODE_LI:
00639                         case self::MODE_LI_LANG:
00640                                 $this->endElementModeLi( $elm );
00641                                 break;
00642                         case self::MODE_QDESC:
00643                                 $this->endElementModeQDesc( $elm );
00644                                 break;
00645                         default:
00646                                 wfDebugLog( 'XMP', __METHOD__ . " no mode (elm = $elm)" );
00647                                 break;
00648                 }
00649         }
00650 
00662         private function startElementModeIgnore( $elm ) {
00663                 if ( $elm === $this->curItem[0] ) {
00664                         array_unshift( $this->curItem, $elm );
00665                         array_unshift( $this->mode, self::MODE_IGNORE );
00666                 }
00667         }
00668 
00676         private function startElementModeBag( $elm ) {
00677                 if ( $elm === self::NS_RDF . ' Bag' ) {
00678                         array_unshift( $this->mode, self::MODE_LI );
00679                 } else {
00680                         throw new MWException( "Expected <rdf:Bag> but got $elm." );
00681                 }
00682 
00683         }
00684 
00692         private function startElementModeSeq( $elm ) {
00693                 if ( $elm === self::NS_RDF . ' Seq' ) {
00694                         array_unshift( $this->mode, self::MODE_LI );
00695                 } elseif ( $elm === self::NS_RDF . ' Bag' ) {
00696                         # bug 27105
00697                         wfDebugLog( 'XMP', __METHOD__ . ' Expected an rdf:Seq, but got an rdf:Bag. Pretending'
00698                                 . ' it is a Seq, since some buggy software is known to screw this up.' );
00699                         array_unshift( $this->mode, self::MODE_LI );
00700                 } else {
00701                         throw new MWException( "Expected <rdf:Seq> but got $elm." );
00702                 }
00703 
00704         }
00705 
00720         private function startElementModeLang( $elm ) {
00721                 if ( $elm === self::NS_RDF . ' Alt' ) {
00722                         array_unshift( $this->mode, self::MODE_LI_LANG );
00723                 } else {
00724                         throw new MWException( "Expected <rdf:Seq> but got $elm." );
00725                 }
00726 
00727         }
00728 
00746         private function startElementModeSimple( $elm, $attribs ) {
00747                 if ( $elm === self::NS_RDF . ' Description' ) {
00748                         // If this value has qualifiers
00749                         array_unshift( $this->mode, self::MODE_QDESC );
00750                         array_unshift( $this->curItem, $this->curItem[0] );
00751 
00752                         if ( isset( $attribs[self::NS_RDF . ' value'] ) ) {
00753                                 list( $ns, $tag ) = explode( ' ', $this->curItem[0], 2 );
00754                                 $this->saveValue( $ns, $tag, $attribs[self::NS_RDF . ' value'] );
00755                         }
00756                 } elseif ( $elm === self::NS_RDF . ' value' ) {
00757                         // This should not be here.
00758                         throw new MWException( __METHOD__ . ' Encountered <rdf:value> where it was unexpected.' );
00759 
00760                 } else {
00761                         // something else we don't recognize, like a qualifier maybe.
00762                         wfDebugLog( 'XMP', __METHOD__ . " Encountered element <$elm> where only expecting character data as value of " . $this->curItem[0] );
00763                         array_unshift( $this->mode, self::MODE_IGNORE );
00764                         array_unshift( $this->curItem, $elm );
00765 
00766                 }
00767 
00768         }
00769 
00784         private function startElementModeQDesc( $elm ) {
00785                 if ( $elm === self::NS_RDF . ' value' ) {
00786                         return; // do nothing
00787                 } else {
00788                         // otherwise its a qualifier, which we ignore
00789                         array_unshift( $this->mode, self::MODE_IGNORE );
00790                         array_unshift( $this->curItem, $elm );
00791                 }
00792         }
00793 
00805         private function startElementModeInitial( $ns, $tag, $attribs ) {
00806                 if ( $ns !== self::NS_RDF ) {
00807 
00808                         if ( isset( $this->items[$ns][$tag] ) ) {
00809                                 if ( isset( $this->items[$ns][$tag]['structPart'] ) ) {
00810                                         // If this element is supposed to appear only as
00811                                         // a child of a structure, but appears here (not as
00812                                         // a child of a struct), then something weird is
00813                                         // happening, so ignore this element and its children.
00814 
00815                                         wfDebugLog( 'XMP', "Encountered <$ns:$tag> outside"
00816                                                 . " of its expected parent. Ignoring." );
00817 
00818                                         array_unshift( $this->mode, self::MODE_IGNORE );
00819                                         array_unshift( $this->curItem, $ns . ' ' . $tag );
00820                                         return;
00821                                 }
00822                                 $mode = $this->items[$ns][$tag]['mode'];
00823                                 array_unshift( $this->mode, $mode );
00824                                 array_unshift( $this->curItem, $ns . ' ' . $tag );
00825                                 if ( $mode === self::MODE_STRUCT ) {
00826                                         $this->ancestorStruct = isset( $this->items[$ns][$tag]['map_name'] )
00827                                                 ? $this->items[$ns][$tag]['map_name'] : $tag;
00828                                 }
00829                                 if ( $this->charContent !== false ) {
00830                                         // Something weird.
00831                                         // Should not happen in valid XMP.
00832                                         throw new MWException( 'tag nested in non-whitespace characters.' );
00833                                 }
00834                         } else {
00835                                 // This element is not on our list of allowed elements so ignore.
00836                                 wfDebugLog( 'XMP', __METHOD__ . " Ignoring unrecognized element <$ns:$tag>." );
00837                                 array_unshift( $this->mode, self::MODE_IGNORE );
00838                                 array_unshift( $this->curItem, $ns . ' ' . $tag );
00839                                 return;
00840                         }
00841 
00842                 }
00843                 // process attributes
00844                 $this->doAttribs( $attribs );
00845         }
00846 
00865         private function startElementModeStruct( $ns, $tag, $attribs ) {
00866                 if ( $ns !== self::NS_RDF ) {
00867 
00868                         if ( isset( $this->items[$ns][$tag] ) ) {
00869                                 if ( isset( $this->items[$ns][$this->ancestorStruct]['children'] )
00870                                         && !isset( $this->items[$ns][$this->ancestorStruct]['children'][$tag] ) )
00871                                 {
00872                                         // This assumes that we don't have inter-namespace nesting
00873                                         // which we don't in all the properties we're interested in.
00874                                         throw new MWException( " <$tag> appeared nested in <" . $this->ancestorStruct
00875                                                 . "> where it is not allowed." );
00876                                 }
00877                                 array_unshift( $this->mode, $this->items[$ns][$tag]['mode'] );
00878                                 array_unshift( $this->curItem, $ns . ' ' . $tag );
00879                                 if ( $this->charContent !== false ) {
00880                                         // Something weird.
00881                                         // Should not happen in valid XMP.
00882                                         throw new MWException( "tag <$tag> nested in non-whitespace characters (" . $this->charContent . ")." );
00883                                 }
00884                         } else {
00885                                 array_unshift( $this->mode, self::MODE_IGNORE );
00886                                 array_unshift( $this->curItem, $elm );
00887                                 return;
00888                         }
00889 
00890                 }
00891 
00892                 if ( $ns === self::NS_RDF && $tag === 'Description' ) {
00893                         $this->doAttribs( $attribs );
00894                         array_unshift( $this->mode, self::MODE_STRUCT );
00895                         array_unshift( $this->curItem, $this->curItem[0] );
00896                 }
00897         }
00898 
00912         private function startElementModeLi( $elm, $attribs ) {
00913                 if ( ( $elm ) !== self::NS_RDF . ' li' ) {
00914                         throw new MWException( "<rdf:li> expected but got $elm." );
00915                 }
00916 
00917                 if ( !isset( $this->mode[1] ) ) {
00918                         // This should never ever ever happen. Checking for it
00919                         // to be paranoid.
00920                         throw new MWException( 'In mode Li, but no 2xPrevious mode!' );
00921                 }
00922 
00923                 if ( $this->mode[1] === self::MODE_BAGSTRUCT ) {
00924                         // This list item contains a compound (STRUCT) value.
00925                         array_unshift( $this->mode, self::MODE_STRUCT );
00926                         array_unshift( $this->curItem, $elm );
00927                         $this->processingArray = true;
00928 
00929                         if ( !isset( $this->curItem[1] ) ) {
00930                                 // be paranoid.
00931                                 throw new MWException( 'Can not find parent of BAGSTRUCT.' );
00932                         }
00933                         list( $curNS, $curTag ) = explode( ' ', $this->curItem[1] );
00934                         $this->ancestorStruct = isset( $this->items[$curNS][$curTag]['map_name'] )
00935                                 ? $this->items[$curNS][$curTag]['map_name'] : $curTag;
00936 
00937                         $this->doAttribs( $attribs );
00938 
00939                 } else {
00940                         // Normal BAG or SEQ containing simple values.
00941                         array_unshift( $this->mode, self::MODE_SIMPLE );
00942                         // need to add curItem[0] on again since one is for the specific item
00943                         // and one is for the entire group.
00944                         array_unshift( $this->curItem, $this->curItem[0] );
00945                         $this->processingArray = true;
00946                 }
00947 
00948         }
00949 
00964         private function startElementModeLiLang( $elm, $attribs ) {
00965                 if ( $elm !== self::NS_RDF . ' li' ) {
00966                         throw new MWException( __METHOD__ . " <rdf:li> expected but got $elm." );
00967                 }
00968                 if ( !isset( $attribs[ self::NS_XML . ' lang'] )
00969                         || !preg_match( '/^[-A-Za-z0-9]{2,}$/D', $attribs[ self::NS_XML . ' lang' ] ) )
00970                 {
00971                         throw new MWException( __METHOD__
00972                                 . " <rdf:li> did not contain, or has invalid xml:lang attribute in lang alternative" );
00973                 }
00974 
00975                 // Lang is case-insensitive.
00976                 $this->itemLang = strtolower( $attribs[ self::NS_XML . ' lang' ] );
00977 
00978                 // need to add curItem[0] on again since one is for the specific item
00979                 // and one is for the entire group.
00980                 array_unshift( $this->curItem, $this->curItem[0] );
00981                 array_unshift( $this->mode, self::MODE_SIMPLE );
00982                 $this->processingArray = true;
00983         }
00984 
00994         function startElement( $parser, $elm, $attribs ) {
00995 
00996                 if ( $elm === self::NS_RDF . ' RDF'
00997                         || $elm === 'adobe:ns:meta/ xmpmeta'
00998                         || $elm === 'adobe:ns:meta/ xapmeta')
00999                 {
01000                         /* ignore. */
01001                         return;
01002                 } elseif ( $elm === self::NS_RDF . ' Description' ) {
01003                         if ( count( $this->mode ) === 0 ) {
01004                                 // outer rdf:desc
01005                                 array_unshift( $this->mode, self::MODE_INITIAL );
01006                         }
01007                 } elseif ( $elm === self::NS_RDF . ' type' ) {
01008                         // This doesn't support rdf:type properly.
01009                         // In practise I have yet to see a file that
01010                         // uses this element, however it is mentioned
01011                         // on page 25 of part 1 of the xmp standard.
01012                         //
01013                         // also it seems as if exiv2 and exiftool do not support
01014                         // this either (That or I misunderstand the standard)
01015                         wfDebugLog( 'XMP', __METHOD__ . ' Encountered <rdf:type> which isn\'t currently supported' );
01016                 }
01017 
01018                 if ( strpos( $elm, ' ' ) === false ) {
01019                         // This probably shouldn't happen.
01020                         wfDebugLog( 'XMP', __METHOD__ . " Encountered <$elm> which has no namespace. Skipping." );
01021                         return;
01022                 }
01023 
01024                 list( $ns, $tag ) = explode( ' ', $elm, 2 );
01025 
01026                 if ( count( $this->mode ) === 0 ) {
01027                         // This should not happen.
01028                         throw new MWException('Error extracting XMP, '
01029                                 . "encountered <$elm> with no mode" );
01030                 }
01031 
01032                 switch( $this->mode[0] ) {
01033                         case self::MODE_IGNORE:
01034                                 $this->startElementModeIgnore( $elm );
01035                                 break;
01036                         case self::MODE_SIMPLE:
01037                                 $this->startElementModeSimple( $elm, $attribs );
01038                                 break;
01039                         case self::MODE_INITIAL:
01040                                 $this->startElementModeInitial( $ns, $tag, $attribs );
01041                                 break;
01042                         case self::MODE_STRUCT:
01043                                 $this->startElementModeStruct( $ns, $tag, $attribs );
01044                                 break;
01045                         case self::MODE_BAG:
01046                         case self::MODE_BAGSTRUCT:
01047                                 $this->startElementModeBag( $elm );
01048                                 break;
01049                         case self::MODE_SEQ:
01050                                 $this->startElementModeSeq( $elm );
01051                                 break;
01052                         case self::MODE_LANG:
01053                                 $this->startElementModeLang( $elm );
01054                                 break;
01055                         case self::MODE_LI_LANG:
01056                                 $this->startElementModeLiLang( $elm, $attribs );
01057                                 break;
01058                         case self::MODE_LI:
01059                                 $this->startElementModeLi( $elm, $attribs );
01060                                 break;
01061                         case self::MODE_QDESC:
01062                                 $this->startElementModeQDesc( $elm );
01063                                 break;
01064                         default:
01065                                 throw new MWException( 'StartElement in unknown mode: ' . $this->mode[0] );
01066                                 break;
01067                 }
01068         }
01069 
01082         private function doAttribs( $attribs ) {
01083 
01084                 // first check for rdf:parseType attribute, as that can change
01085                 // how the attributes are interperted.
01086 
01087                 if ( isset( $attribs[self::NS_RDF . ' parseType'] )
01088                         && $attribs[self::NS_RDF . ' parseType'] === 'Resource'
01089                         && $this->mode[0] === self::MODE_SIMPLE )
01090                 {
01091                         // this is equivalent to having an inner rdf:Description
01092                         $this->mode[0] = self::MODE_QDESC;
01093                 }
01094                 foreach ( $attribs as $name => $val ) {
01095 
01096 
01097                         if ( strpos( $name, ' ' ) === false ) {
01098                                 // This shouldn't happen, but so far some old software forgets namespace
01099                                 // on rdf:about.
01100                                 wfDebugLog( 'XMP', __METHOD__ . ' Encountered non-namespaced attribute: '
01101                                         . " $name=\"$val\". Skipping. " );
01102                                 continue;
01103                         }
01104                         list( $ns, $tag ) = explode( ' ', $name, 2 );
01105                         if ( $ns === self::NS_RDF ) {
01106                                 if ( $tag === 'value' || $tag === 'resource' ) {
01107                                         // resource is for url.
01108                                         // value attribute is a weird way of just putting the contents.
01109                                         $this->char( $this->xmlParser, $val );
01110                                 }
01111                         } elseif ( isset( $this->items[$ns][$tag] ) ) {
01112                                 if ( $this->mode[0] === self::MODE_SIMPLE ) {
01113                                         throw new MWException( __METHOD__
01114                                                 . " $ns:$tag found as attribute where not allowed" );
01115                                 }
01116                                 $this->saveValue( $ns, $tag, $val );
01117                         } else {
01118                                 wfDebugLog( 'XMP', __METHOD__ . " Ignoring unrecognized element <$ns:$tag>." );
01119                         }
01120                 }
01121         }
01122 
01134         private function saveValue( $ns, $tag, $val ) {
01135 
01136                 $info =& $this->items[$ns][$tag];
01137                 $finalName = isset( $info['map_name'] )
01138                         ? $info['map_name'] : $tag;
01139                 if ( isset( $info['validate'] ) ) {
01140                         $validate = is_array( $info['validate'] ) ? $info['validate']
01141                                 : array( 'XMPValidate', $info['validate'] );
01142 
01143                         if ( is_callable( $validate ) ) {
01144                                 call_user_func_array( $validate, array( $info, &$val, true ) );
01145                                 // the reasoning behind using &$val instead of using the return value
01146                                 // is to be consistent between here and validating structures.
01147                                 if ( is_null( $val ) ) {
01148                                         wfDebugLog( 'XMP', __METHOD__ . " <$ns:$tag> failed validation." );
01149                                         return;
01150                                 }
01151                         } else {
01152                                 wfDebugLog( 'XMP', __METHOD__ . " Validation function for $finalName ("
01153                                         . $validate[0] . '::' . $validate[1] . '()) is not callable.' );
01154                         }
01155                 }
01156 
01157                 if ( $this->ancestorStruct && $this->processingArray ) {
01158                         // Aka both an array and a struct. ( self::MODE_BAGSTRUCT )
01159                         $this->results['xmp-' . $info['map_group']][$this->ancestorStruct][][$finalName] = $val;
01160                 } elseif ( $this->ancestorStruct ) {
01161                         $this->results['xmp-' . $info['map_group']][$this->ancestorStruct][$finalName] = $val;
01162                 } elseif ( $this->processingArray ) {
01163                         if ( $this->itemLang === false ) {
01164                                 // normal array
01165                                 $this->results['xmp-' . $info['map_group']][$finalName][] = $val;
01166                         } else {
01167                                 // lang array.
01168                                 $this->results['xmp-' . $info['map_group']][$finalName][$this->itemLang] = $val;
01169                         }
01170                 } else {
01171                         $this->results['xmp-' . $info['map_group']][$finalName] = $val;
01172                 }
01173         }
01174 }