MediaWiki  REL1_19
LanguageConverter.php
Go to the documentation of this file.
00001 <?php
00031 class LanguageConverter {
00032         var $mMainLanguageCode;
00033         var $mVariants, $mVariantFallbacks, $mVariantNames;
00034         var $mTablesLoaded = false;
00035         var $mTables;
00036         // 'bidirectional' 'unidirectional' 'disable' for each variant
00037         var $mManualLevel;
00038 
00042         var $mCacheKey;
00043 
00044         var $mLangObj;
00045         var $mFlags;
00046         var $mDescCodeSep = ':', $mDescVarSep = ';';
00047         var $mUcfirst = false;
00048         var $mConvRuleTitle = false;
00049         var $mURLVariant;
00050         var $mUserVariant;
00051         var $mHeaderVariant;
00052         var $mMaxDepth = 10;
00053         var $mVarSeparatorPattern;
00054 
00055         const CACHE_VERSION_KEY = 'VERSION 6';
00056 
00067         public function __construct( $langobj, $maincode, $variants = array(),
00068                                                                 $variantfallbacks = array(), $flags = array(),
00069                                                                 $manualLevel = array() ) {
00070                 global $wgDisabledVariants;
00071                 $this->mLangObj = $langobj;
00072                 $this->mMainLanguageCode = $maincode;
00073                 $this->mVariants = array_diff( $variants, $wgDisabledVariants );
00074                 $this->mVariantFallbacks = $variantfallbacks;
00075                 $this->mVariantNames = Language::getLanguageNames();
00076                 $this->mCacheKey = wfMemcKey( 'conversiontables', $maincode );
00077                 $defaultflags = array(
00078                         // 'S' show converted text
00079                         // '+' add rules for alltext
00080                         // 'E' the gave flags is error
00081                         // these flags above are reserved for program
00082                         'A' => 'A',       // add rule for convert code (all text convert)
00083                         'T' => 'T',       // title convert
00084                         'R' => 'R',       // raw content
00085                         'D' => 'D',       // convert description (subclass implement)
00086                         '-' => '-',       // remove convert (not implement)
00087                         'H' => 'H',       // add rule for convert code
00088                                                   // (but no display in placed code)
00089                         'N' => 'N'        // current variant name
00090                 );
00091                 $this->mFlags = array_merge( $defaultflags, $flags );
00092                 foreach ( $this->mVariants as $v ) {
00093                         if ( array_key_exists( $v, $manualLevel ) ) {
00094                                 $this->mManualLevel[$v] = $manualLevel[$v];
00095                         } else {
00096                                 $this->mManualLevel[$v] = 'bidirectional';
00097                         }
00098                         $this->mFlags[$v] = $v;
00099                 }
00100         }
00101 
00108         public function getVariants() {
00109                 return $this->mVariants;
00110         }
00111 
00123         public function getVariantFallbacks( $variant ) {
00124                 if ( isset( $this->mVariantFallbacks[$variant] ) ) {
00125                         return $this->mVariantFallbacks[$variant];
00126                 }
00127                 return $this->mMainLanguageCode;
00128         }
00129 
00134         public function getConvRuleTitle() {
00135                 return $this->mConvRuleTitle;
00136         }
00137 
00142         public function getPreferredVariant() {
00143                 global $wgDefaultLanguageVariant, $wgUser;
00144 
00145                 $req = $this->getURLVariant();
00146 
00147                 if ( $wgUser->isLoggedIn() && !$req ) {
00148                         $req = $this->getUserVariant();
00149                 } elseif ( !$req ) {
00150                         $req = $this->getHeaderVariant();
00151                 }
00152 
00153                 if ( $wgDefaultLanguageVariant && !$req ) {
00154                         $req = $this->validateVariant( $wgDefaultLanguageVariant );
00155                 }
00156 
00157                 // This function, unlike the other get*Variant functions, is
00158                 // not memoized (i.e. there return value is not cached) since
00159                 // new information might appear during processing after this
00160                 // is first called.
00161                 if ( $this->validateVariant( $req ) ) {
00162                         return $req;
00163                 }
00164                 return $this->mMainLanguageCode;
00165         }
00166 
00172         public function getDefaultVariant() {
00173                 global $wgDefaultLanguageVariant;
00174 
00175                 $req = $this->getURLVariant();
00176 
00177                 if ( $wgDefaultLanguageVariant && !$req ) {
00178                         $req = $this->validateVariant( $wgDefaultLanguageVariant );
00179                 }
00180 
00181                 if ( $req ) {
00182                         return $req;
00183                 }
00184                 return $this->mMainLanguageCode;
00185         }
00186 
00192         public function validateVariant( $variant = null ) {
00193                 if ( $variant !== null && in_array( $variant, $this->mVariants ) ) {
00194                         return $variant;
00195                 }
00196                 return null;
00197         }
00198 
00204         public function getURLVariant() {
00205                 global $wgRequest;
00206 
00207                 if ( $this->mURLVariant ) {
00208                         return $this->mURLVariant;
00209                 }
00210 
00211                 // see if the preference is set in the request
00212                 $ret = $wgRequest->getText( 'variant' );
00213 
00214                 if ( !$ret ) {
00215                         $ret = $wgRequest->getVal( 'uselang' );
00216                 }
00217 
00218                 return $this->mURLVariant = $this->validateVariant( $ret );
00219         }
00220 
00226         protected function getUserVariant() {
00227                 global $wgUser;
00228 
00229                 // memoizing this function wreaks havoc on parserTest.php
00230                 /*
00231                 if ( $this->mUserVariant ) {
00232                         return $this->mUserVariant;
00233                 }
00234                 */
00235 
00236                 // Get language variant preference from logged in users
00237                 // Don't call this on stub objects because that causes infinite
00238                 // recursion during initialisation
00239                 if ( $wgUser->isLoggedIn() )  {
00240                         $ret = $wgUser->getOption( 'variant' );
00241                 } else {
00242                         // figure out user lang without constructing wgLang to avoid
00243                         // infinite recursion
00244                         $ret = $wgUser->getOption( 'language' );
00245                 }
00246 
00247                 return $this->mUserVariant = $this->validateVariant( $ret );
00248         }
00249 
00255         protected function getHeaderVariant() {
00256                 global $wgRequest;
00257 
00258                 if ( $this->mHeaderVariant ) {
00259                         return $this->mHeaderVariant;
00260                 }
00261 
00262                 // see if some supported language variant is set in the
00263                 // HTTP header.
00264                 $languages = array_keys( $wgRequest->getAcceptLang() );
00265                 if ( empty( $languages ) ) {
00266                         return null;
00267                 }
00268 
00269                 $fallbackLanguages = array();
00270                 foreach ( $languages as $language ) {
00271                         $this->mHeaderVariant = $this->validateVariant( $language );
00272                         if ( $this->mHeaderVariant ) {
00273                                 break;
00274                         }
00275 
00276                         // To see if there are fallbacks of current language.
00277                         // We record these fallback variants, and process
00278                         // them later.
00279                         $fallbacks = $this->getVariantFallbacks( $language );
00280                         if ( is_string( $fallbacks ) ) {
00281                                 $fallbackLanguages[] = $fallbacks;
00282                         } elseif ( is_array( $fallbacks ) ) {
00283                                 $fallbackLanguages =
00284                                         array_merge( $fallbackLanguages, $fallbacks );
00285                         }
00286                 }
00287 
00288                 if ( !$this->mHeaderVariant ) {
00289                         // process fallback languages now
00290                         $fallback_languages = array_unique( $fallbackLanguages );
00291                         foreach ( $fallback_languages as $language ) {
00292                                 $this->mHeaderVariant = $this->validateVariant( $language );
00293                                 if ( $this->mHeaderVariant ) {
00294                                         break;
00295                                 }
00296                         }
00297                 }
00298 
00299                 return $this->mHeaderVariant;
00300         }
00301 
00312         public function autoConvert( $text, $toVariant = false ) {
00313                 wfProfileIn( __METHOD__ );
00314 
00315                 $this->loadTables();
00316 
00317                 if ( !$toVariant ) {
00318                         $toVariant = $this->getPreferredVariant();
00319                         if ( !$toVariant ) {
00320                                 wfProfileOut( __METHOD__ );
00321                                 return $text;
00322                         }
00323                 }
00324 
00325                 if( $this->guessVariant( $text, $toVariant ) ) {
00326                         wfProfileOut( __METHOD__ );
00327                         return $text;
00328                 }
00329 
00330                 /* we convert everything except:
00331                    1. HTML markups (anything between < and >)
00332                    2. HTML entities
00333                    3. placeholders created by the parser
00334                 */
00335                 global $wgParser;
00336                 if ( isset( $wgParser ) && $wgParser->UniqPrefix() != '' ) {
00337                         $marker = '|' . $wgParser->UniqPrefix() . '[\-a-zA-Z0-9]+';
00338                 } else {
00339                         $marker = '';
00340                 }
00341 
00342                 // this one is needed when the text is inside an HTML markup
00343                 $htmlfix = '|<[^>]+$|^[^<>]*>';
00344 
00345                 // disable convert to variants between <code></code> tags
00346                 $codefix = '<code>.+?<\/code>|';
00347                 // disable convertsion of <script type="text/javascript"> ... </script>
00348                 $scriptfix = '<script.*?>.*?<\/script>|';
00349                 // disable conversion of <pre xxxx> ... </pre>
00350                 $prefix = '<pre.*?>.*?<\/pre>|';
00351 
00352                 $reg = '/' . $codefix . $scriptfix . $prefix .
00353                         '<[^>]+>|&[a-zA-Z#][a-z0-9]+;' . $marker . $htmlfix . '/s';
00354                 $startPos = 0;
00355                 $sourceBlob = '';
00356                 $literalBlob = '';
00357 
00358                 // Guard against delimiter nulls in the input
00359                 $text = str_replace( "\000", '', $text );
00360 
00361                 $markupMatches = null;
00362                 $elementMatches = null;
00363                 while ( $startPos < strlen( $text ) ) {
00364                         if ( preg_match( $reg, $text, $markupMatches, PREG_OFFSET_CAPTURE, $startPos ) ) {
00365                                 $elementPos = $markupMatches[0][1];
00366                                 $element = $markupMatches[0][0];
00367                         } else {
00368                                 $elementPos = strlen( $text );
00369                                 $element = '';
00370                         }
00371 
00372                         // Queue the part before the markup for translation in a batch
00373                         $sourceBlob .= substr( $text, $startPos, $elementPos - $startPos ) . "\000";
00374 
00375                         // Advance to the next position
00376                         $startPos = $elementPos + strlen( $element );
00377 
00378                         // Translate any alt or title attributes inside the matched element
00379                         if ( $element !== '' && preg_match( '/^(<[^>\s]*)\s([^>]*)(.*)$/', $element,
00380                                 $elementMatches ) )
00381                         {
00382                                 $attrs = Sanitizer::decodeTagAttributes( $elementMatches[2] );
00383                                 $changed = false;
00384                                 foreach ( array( 'title', 'alt' ) as $attrName ) {
00385                                         if ( !isset( $attrs[$attrName] ) ) {
00386                                                 continue;
00387                                         }
00388                                         $attr = $attrs[$attrName];
00389                                         // Don't convert URLs
00390                                         if ( !strpos( $attr, '://' ) ) {
00391                                                 $attr = $this->translate( $attr, $toVariant );
00392                                         }
00393 
00394                                         // Remove HTML tags to avoid disrupting the layout
00395                                         $attr = preg_replace( '/<[^>]+>/', '', $attr );
00396                                         if ( $attr !== $attrs[$attrName] ) {
00397                                                 $attrs[$attrName] = $attr;
00398                                                 $changed = true;
00399                                         }
00400                                 }
00401                                 if ( $changed ) {
00402                                         $element = $elementMatches[1] . Html::expandAttributes( $attrs ) .
00403                                                 $elementMatches[3];
00404                                 }
00405                         }
00406                         $literalBlob .= $element . "\000";
00407                 }
00408 
00409                 // Do the main translation batch
00410                 $translatedBlob = $this->translate( $sourceBlob, $toVariant );
00411 
00412                 // Put the output back together
00413                 $translatedIter = StringUtils::explode( "\000", $translatedBlob );
00414                 $literalIter = StringUtils::explode( "\000", $literalBlob );
00415                 $output = '';
00416                 while ( $translatedIter->valid() && $literalIter->valid() ) {
00417                         $output .= $translatedIter->current();
00418                         $output .= $literalIter->current();
00419                         $translatedIter->next();
00420                         $literalIter->next();
00421                 }
00422 
00423                 wfProfileOut( __METHOD__ );
00424                 return $output;
00425         }
00426 
00436         public function translate( $text, $variant ) {
00437                 wfProfileIn( __METHOD__ );
00438                 // If $text is empty or only includes spaces, do nothing
00439                 // Otherwise translate it
00440                 if ( trim( $text ) ) {
00441                         $this->loadTables();
00442                         $text = $this->mTables[$variant]->replace( $text );
00443                 }
00444                 wfProfileOut( __METHOD__ );
00445                 return $text;
00446         }
00447 
00454         public function autoConvertToAllVariants( $text ) {
00455                 wfProfileIn( __METHOD__ );
00456                 $this->loadTables();
00457 
00458                 $ret = array();
00459                 foreach ( $this->mVariants as $variant ) {
00460                         $ret[$variant] = $this->translate( $text, $variant );
00461                 }
00462 
00463                 wfProfileOut( __METHOD__ );
00464                 return $ret;
00465         }
00466 
00478         public function convertLinkToAllVariants( $text ) {
00479                 return $this->autoConvertToAllVariants( $text );
00480         }
00481 
00487         protected function applyManualConv( $convRule ) {
00488                 // Use syntax -{T|zh-cn:TitleCN; zh-tw:TitleTw}- to custom
00489                 // title conversion.
00490                 // Bug 24072: $mConvRuleTitle was overwritten by other manual
00491                 // rule(s) not for title, this breaks the title conversion.
00492                 $newConvRuleTitle = $convRule->getTitle();
00493                 if ( $newConvRuleTitle ) {
00494                         // So I add an empty check for getTitle()
00495                         $this->mConvRuleTitle = $newConvRuleTitle;
00496                 }
00497 
00498                 // merge/remove manual conversion rules to/from global table
00499                 $convTable = $convRule->getConvTable();
00500                 $action = $convRule->getRulesAction();
00501                 foreach ( $convTable as $variant => $pair ) {
00502                         if ( !$this->validateVariant( $variant ) ) {
00503                                 continue;
00504                         }
00505 
00506                         if ( $action == 'add' ) {
00507                                 foreach ( $pair as $from => $to ) {
00508                                         // to ensure that $from and $to not be left blank
00509                                         // so $this->translate() could always return a string
00510                                         if ( $from || $to ) {
00511                                                 // more efficient than array_merge(), about 2.5 times.
00512                                                 $this->mTables[$variant]->setPair( $from, $to );
00513                                         }
00514                                 }
00515                         } elseif ( $action == 'remove' ) {
00516                                 $this->mTables[$variant]->removeArray( $pair );
00517                         }
00518                 }
00519         }
00520 
00528         public function convertTitle( $title ) {
00529                 $variant = $this->getPreferredVariant();
00530                 $index = $title->getNamespace();
00531                 if ( $index === NS_MAIN ) {
00532                         $text = '';
00533                 } else {
00534                         // first let's check if a message has given us a converted name
00535                         $nsConvMsg = wfMessage( 'conversion-ns' . $index )->inContentLanguage();
00536                         if ( $nsConvMsg->exists() ) {
00537                                 $text = $nsConvMsg->plain();
00538                         } else {
00539                                 // the message does not exist, try retrieve it from the current
00540                                 // variant's namespace names.
00541                                 $langObj = $this->mLangObj->factory( $variant );
00542                                 $text = $langObj->getFormattedNsText( $index );
00543                         }
00544                         $text .= ':';
00545                 }
00546                 $text .= $title->getText();
00547                 $text = $this->translate( $text, $variant );
00548                 return $text;
00549         }
00550 
00565         public function convert( $text ) {
00566                 $variant = $this->getPreferredVariant();
00567                 return $this->convertTo( $text, $variant );
00568         }
00569 
00577         public function convertTo( $text, $variant ) {
00578                 global $wgDisableLangConversion;
00579                 if ( $wgDisableLangConversion || $this->guessVariant( $text, $variant ) ) {
00580                         return $text;
00581                 }
00582                 return $this->recursiveConvertTopLevel( $text, $variant );
00583         }
00584 
00594         protected function recursiveConvertTopLevel( $text, $variant, $depth = 0 ) {
00595                 $startPos = 0;
00596                 $out = '';
00597                 $length = strlen( $text );
00598                 while ( $startPos < $length ) {
00599                         $pos = strpos( $text, '-{', $startPos );
00600 
00601                         if ( $pos === false ) {
00602                                 // No more markup, append final segment
00603                                 $out .= $this->autoConvert( substr( $text, $startPos ), $variant );
00604                                 return $out;
00605                         }
00606 
00607                         // Markup found
00608                         // Append initial segment
00609                         $out .= $this->autoConvert( substr( $text, $startPos, $pos - $startPos ), $variant );
00610 
00611                         // Advance position
00612                         $startPos = $pos;
00613 
00614                         // Do recursive conversion
00615                         $out .= $this->recursiveConvertRule( $text, $variant, $startPos, $depth + 1 );
00616                 }
00617 
00618                 return $out;
00619         }
00620 
00631         protected function recursiveConvertRule( $text, $variant, &$startPos, $depth = 0 ) {
00632                 // Quick sanity check (no function calls)
00633                 if ( $text[$startPos] !== '-' || $text[$startPos + 1] !== '{' ) {
00634                         throw new MWException( __METHOD__ . ': invalid input string' );
00635                 }
00636 
00637                 $startPos += 2;
00638                 $inner = '';
00639                 $warningDone = false;
00640                 $length = strlen( $text );
00641 
00642                 while ( $startPos < $length ) {
00643                         $m = false;
00644                         preg_match( '/-\{|\}-/', $text, $m,  PREG_OFFSET_CAPTURE, $startPos );
00645                         if ( !$m ) {
00646                                 // Unclosed rule
00647                                 break;
00648                         }
00649 
00650                         $token = $m[0][0];
00651                         $pos = $m[0][1];
00652 
00653                         // Markup found
00654                         // Append initial segment
00655                         $inner .= substr( $text, $startPos, $pos - $startPos );
00656 
00657                         // Advance position
00658                         $startPos = $pos;
00659 
00660                         switch ( $token ) {
00661                                 case '-{':
00662                                         // Check max depth
00663                                         if ( $depth >= $this->mMaxDepth ) {
00664                                                 $inner .= '-{';
00665                                                 if ( !$warningDone ) {
00666                                                         $inner .= '<span class="error">' .
00667                                                                 wfMsgForContent( 'language-converter-depth-warning',
00668                                                                         $this->mMaxDepth ) .
00669                                                                 '</span>';
00670                                                         $warningDone = true;
00671                                                 }
00672                                                 $startPos += 2;
00673                                                 continue;
00674                                         }
00675                                         // Recursively parse another rule
00676                                         $inner .= $this->recursiveConvertRule( $text, $variant, $startPos, $depth + 1 );
00677                                         break;
00678                                 case '}-':
00679                                         // Apply the rule
00680                                         $startPos += 2;
00681                                         $rule = new ConverterRule( $inner, $this );
00682                                         $rule->parse( $variant );
00683                                         $this->applyManualConv( $rule );
00684                                         return $rule->getDisplay();
00685                                 default:
00686                                         throw new MWException( __METHOD__ . ': invalid regex match' );
00687                         }
00688                 }
00689 
00690                 // Unclosed rule
00691                 if ( $startPos < $length ) {
00692                         $inner .= substr( $text, $startPos );
00693                 }
00694                 $startPos = $length;
00695                 return '-{' . $this->autoConvert( $inner, $variant );
00696         }
00697 
00709         public function findVariantLink( &$link, &$nt, $ignoreOtherCond = false ) {
00710                 # If the article has already existed, there is no need to
00711                 # check it again, otherwise it may cause a fault.
00712                 if ( is_object( $nt ) && $nt->exists() ) {
00713                         return;
00714                 }
00715 
00716                 global $wgDisableLangConversion, $wgDisableTitleConversion, $wgRequest,
00717                         $wgUser;
00718                 $isredir = $wgRequest->getText( 'redirect', 'yes' );
00719                 $action = $wgRequest->getText( 'action' );
00720                 $linkconvert = $wgRequest->getText( 'linkconvert', 'yes' );
00721                 $disableLinkConversion = $wgDisableLangConversion
00722                         || $wgDisableTitleConversion;
00723                 $linkBatch = new LinkBatch();
00724 
00725                 $ns = NS_MAIN;
00726 
00727                 if ( $disableLinkConversion ||
00728                          ( !$ignoreOtherCond &&
00729                            ( $isredir == 'no'
00730                                  || $action == 'edit'
00731                                  || $action == 'submit'
00732                                  || $linkconvert == 'no'
00733                                  || $wgUser->getOption( 'noconvertlink' ) == 1 ) ) ) {
00734                         return;
00735                 }
00736 
00737                 if ( is_object( $nt ) ) {
00738                         $ns = $nt->getNamespace();
00739                 }
00740 
00741                 $variants = $this->autoConvertToAllVariants( $link );
00742                 if ( !$variants ) { // give up
00743                         return;
00744                 }
00745 
00746                 $titles = array();
00747 
00748                 foreach ( $variants as $v ) {
00749                         if ( $v != $link ) {
00750                                 $varnt = Title::newFromText( $v, $ns );
00751                                 if ( !is_null( $varnt ) ) {
00752                                         $linkBatch->addObj( $varnt );
00753                                         $titles[] = $varnt;
00754                                 }
00755                         }
00756                 }
00757 
00758                 // fetch all variants in single query
00759                 $linkBatch->execute();
00760 
00761                 foreach ( $titles as $varnt ) {
00762                         if ( $varnt->getArticleID() > 0 ) {
00763                                 $nt = $varnt;
00764                                 $link = $varnt->getText();
00765                                 break;
00766                         }
00767                 }
00768         }
00769 
00775         public function getExtraHashOptions() {
00776                 $variant = $this->getPreferredVariant();
00777                 return '!' . $variant;
00778         }
00779 
00790         public function guessVariant($text, $variant) {
00791                 return false;
00792         }
00793 
00800         function loadDefaultTables() {
00801                 $name = get_class( $this );
00802                 throw new MWException( "Must implement loadDefaultTables() method in class $name" );
00803         }
00804 
00810         function loadTables( $fromCache = true ) {
00811                 if ( $this->mTablesLoaded ) {
00812                         return;
00813                 }
00814                 global $wgMemc;
00815                 wfProfileIn( __METHOD__ );
00816                 $this->mTablesLoaded = true;
00817                 $this->mTables = false;
00818                 if ( $fromCache ) {
00819                         wfProfileIn( __METHOD__ . '-cache' );
00820                         $this->mTables = $wgMemc->get( $this->mCacheKey );
00821                         wfProfileOut( __METHOD__ . '-cache' );
00822                 }
00823                 if ( !$this->mTables
00824                          || !array_key_exists( self::CACHE_VERSION_KEY, $this->mTables ) ) {
00825                         wfProfileIn( __METHOD__ . '-recache' );
00826                         // not in cache, or we need a fresh reload.
00827                         // We will first load the default tables
00828                         // then update them using things in MediaWiki:Conversiontable/*
00829                         $this->loadDefaultTables();
00830                         foreach ( $this->mVariants as $var ) {
00831                                 $cached = $this->parseCachedTable( $var );
00832                                 $this->mTables[$var]->mergeArray( $cached );
00833                         }
00834 
00835                         $this->postLoadTables();
00836                         $this->mTables[self::CACHE_VERSION_KEY] = true;
00837 
00838                         $wgMemc->set( $this->mCacheKey, $this->mTables, 43200 );
00839                         wfProfileOut( __METHOD__ . '-recache' );
00840                 }
00841                 wfProfileOut( __METHOD__ );
00842         }
00843 
00847         function postLoadTables() { }
00848 
00854         function reloadTables() {
00855                 if ( $this->mTables ) {
00856                         unset( $this->mTables );
00857                 }
00858                 $this->mTablesLoaded = false;
00859                 $this->loadTables( false );
00860         }
00861 
00881         function parseCachedTable( $code, $subpage = '', $recursive = true ) {
00882                 static $parsed = array();
00883 
00884                 $key = 'Conversiontable/' . $code;
00885                 if ( $subpage ) {
00886                         $key .= '/' . $subpage;
00887                 }
00888                 if ( array_key_exists( $key, $parsed ) ) {
00889                         return array();
00890                 }
00891 
00892                 $parsed[$key] = true;
00893 
00894                 if ( $subpage === '' ) {
00895                         $txt = MessageCache::singleton()->get( 'conversiontable', true, $code );
00896                 } else {
00897                         $txt = false;
00898                         $title = Title::makeTitleSafe( NS_MEDIAWIKI, $key );
00899                         if ( $title && $title->exists() ) {
00900                                 $revision = Revision::newFromTitle( $title );
00901                                 if ( $revision ) {
00902                                         $txt = $revision->getRawText();
00903                                 }
00904                         }
00905                 }
00906 
00907                 # Nothing to parse if there's no text
00908                 if ( $txt === false || $txt === null || $txt === '' ) {
00909                         return array();
00910                 }
00911 
00912                 // get all subpage links of the form
00913                 // [[MediaWiki:Conversiontable/zh-xx/...|...]]
00914                 $linkhead = $this->mLangObj->getNsText( NS_MEDIAWIKI ) .
00915                         ':Conversiontable';
00916                 $subs = StringUtils::explode( '[[', $txt );
00917                 $sublinks = array();
00918                 foreach ( $subs as $sub ) {
00919                         $link = explode( ']]', $sub, 2 );
00920                         if ( count( $link ) != 2 ) {
00921                                 continue;
00922                         }
00923                         $b = explode( '|', $link[0], 2 );
00924                         $b = explode( '/', trim( $b[0] ), 3 );
00925                         if ( count( $b ) == 3 ) {
00926                                 $sublink = $b[2];
00927                         } else {
00928                                 $sublink = '';
00929                         }
00930 
00931                         if ( $b[0] == $linkhead && $b[1] == $code ) {
00932                                 $sublinks[] = $sublink;
00933                         }
00934                 }
00935 
00936                 // parse the mappings in this page
00937                 $blocks = StringUtils::explode( '-{', $txt );
00938                 $ret = array();
00939                 $first = true;
00940                 foreach ( $blocks as $block ) {
00941                         if ( $first ) {
00942                                 // Skip the part before the first -{
00943                                 $first = false;
00944                                 continue;
00945                         }
00946                         $mappings = explode( '}-', $block, 2 );
00947                         $stripped = str_replace( array( "'", '"', '*', '#' ), '',
00948                                                                          $mappings[0] );
00949                         $table = StringUtils::explode( ';', $stripped );
00950                         foreach ( $table as $t ) {
00951                                 $m = explode( '=>', $t, 3 );
00952                                 if ( count( $m ) != 2 ) {
00953                                         continue;
00954                                 }
00955                                 // trim any trailling comments starting with '//'
00956                                 $tt = explode( '//', $m[1], 2 );
00957                                 $ret[trim( $m[0] )] = trim( $tt[0] );
00958                         }
00959                 }
00960 
00961                 // recursively parse the subpages
00962                 if ( $recursive ) {
00963                         foreach ( $sublinks as $link ) {
00964                                 $s = $this->parseCachedTable( $code, $link, $recursive );
00965                                 $ret = array_merge( $ret, $s );
00966                         }
00967                 }
00968 
00969                 if ( $this->mUcfirst ) {
00970                         foreach ( $ret as $k => $v ) {
00971                                 $ret[$this->mLangObj->ucfirst( $k )] = $this->mLangObj->ucfirst( $v );
00972                         }
00973                 }
00974                 return $ret;
00975         }
00976 
00985         public function markNoConversion( $text, $noParse = false ) {
00986                 # don't mark if already marked
00987                 if ( strpos( $text, '-{' ) || strpos( $text, '}-' ) ) {
00988                         return $text;
00989                 }
00990 
00991                 $ret = "-{R|$text}-";
00992                 return $ret;
00993         }
00994 
01003         function convertCategoryKey( $key ) {
01004                 return $key;
01005         }
01006 
01023         function OnArticleSaveComplete( $article, $user, $text, $summary, $isMinor,
01024                         $isWatch, $section, $flags, $revision ) {
01025                 $titleobj = $article->getTitle();
01026                 if ( $titleobj->getNamespace() == NS_MEDIAWIKI ) {
01027                         $title = $titleobj->getDBkey();
01028                         $t = explode( '/', $title, 3 );
01029                         $c = count( $t );
01030                         if ( $c > 1 && $t[0] == 'Conversiontable' ) {
01031                                 if ( $this->validateVariant( $t[1] ) ) {
01032                                         $this->reloadTables();
01033                                 }
01034                         }
01035                 }
01036                 return true;
01037         }
01038 
01047         public function armourMath( $text ) {
01048                 // convert '-{' and '}-' to '-&#123;' and '&#125;-' to prevent
01049                 // any unwanted markup appearing in the math image tag.
01050                 $text = strtr( $text, array( '-{' => '-&#123;', '}-' => '&#125;-' ) );
01051                 return $text;
01052         }
01053 
01057         function getVarSeparatorPattern() {
01058                 if ( is_null( $this->mVarSeparatorPattern ) ) {
01059                         // varsep_pattern for preg_split:
01060                         // text should be splited by ";" only if a valid variant
01061                         // name exist after the markup, for example:
01062                         //  -{zh-hans:<span style="font-size:120%;">xxx</span>;zh-hant:\
01063                         //      <span style="font-size:120%;">yyy</span>;}-
01064                         // we should split it as:
01065                         //  array(
01066                         //        [0] => 'zh-hans:<span style="font-size:120%;">xxx</span>'
01067                         //        [1] => 'zh-hant:<span style="font-size:120%;">yyy</span>'
01068                         //        [2] => ''
01069                         //       )
01070                         $pat = '/;\s*(?=';
01071                         foreach ( $this->mVariants as $variant ) {
01072                                 // zh-hans:xxx;zh-hant:yyy
01073                                 $pat .= $variant . '\s*:|';
01074                                 // xxx=>zh-hans:yyy; xxx=>zh-hant:zzz
01075                                 $pat .= '[^;]*?=>\s*' . $variant . '\s*:|';
01076                         }
01077                         $pat .= '\s*$)/';
01078                         $this->mVarSeparatorPattern = $pat;
01079                 }
01080                 return $this->mVarSeparatorPattern;
01081         }
01082 }
01083 
01089 class ConverterRule {
01090         var $mText; // original text in -{text}-
01091         var $mConverter; // LanguageConverter object
01092         var $mManualCodeError = '<strong class="error">code error!</strong>';
01093         var $mRuleDisplay = '';
01094         var $mRuleTitle = false;
01095         var $mRules = '';// string : the text of the rules
01096         var $mRulesAction = 'none';
01097         var $mFlags = array();
01098         var $mVariantFlags = array();
01099         var $mConvTable = array();
01100         var $mBidtable = array();// array of the translation in each variant
01101         var $mUnidtable = array();// array of the translation in each variant
01102 
01109         public function __construct( $text, $converter ) {
01110                 $this->mText = $text;
01111                 $this->mConverter = $converter;
01112         }
01113 
01120         public function getTextInBidtable( $variants ) {
01121                 $variants = (array)$variants;
01122                 if ( !$variants ) {
01123                         return false;
01124                 }
01125                 foreach ( $variants as $variant ) {
01126                         if ( isset( $this->mBidtable[$variant] ) ) {
01127                                 return $this->mBidtable[$variant];
01128                         }
01129                 }
01130                 return false;
01131         }
01132 
01137         function parseFlags() {
01138                 $text = $this->mText;
01139                 $flags = array();
01140                 $variantFlags = array();
01141 
01142                 $sepPos = strpos( $text, '|' );
01143                 if ( $sepPos !== false ) {
01144                         $validFlags = $this->mConverter->mFlags;
01145                         $f = StringUtils::explode( ';', substr( $text, 0, $sepPos ) );
01146                         foreach ( $f as $ff ) {
01147                                 $ff = trim( $ff );
01148                                 if ( isset( $validFlags[$ff] ) ) {
01149                                         $flags[$validFlags[$ff]] = true;
01150                                 }
01151                         }
01152                         $text = strval( substr( $text, $sepPos + 1 ) );
01153                 }
01154 
01155                 if ( !$flags ) {
01156                         $flags['S'] = true;
01157                 } elseif ( isset( $flags['R'] ) ) {
01158                         $flags = array( 'R' => true );// remove other flags
01159                 } elseif ( isset( $flags['N'] ) ) {
01160                         $flags = array( 'N' => true );// remove other flags
01161                 } elseif ( isset( $flags['-'] ) ) {
01162                         $flags = array( '-' => true );// remove other flags
01163                 } elseif ( count( $flags ) == 1 && isset( $flags['T'] ) ) {
01164                         $flags['H'] = true;
01165                 } elseif ( isset( $flags['H'] ) ) {
01166                         // replace A flag, and remove other flags except T
01167                         $temp = array( '+' => true, 'H' => true );
01168                         if ( isset( $flags['T'] ) ) {
01169                                 $temp['T'] = true;
01170                         }
01171                         if ( isset( $flags['D'] ) ) {
01172                                 $temp['D'] = true;
01173                         }
01174                         $flags = $temp;
01175                 } else {
01176                         if ( isset( $flags['A'] ) ) {
01177                                 $flags['+'] = true;
01178                                 $flags['S'] = true;
01179                         }
01180                         if ( isset( $flags['D'] ) ) {
01181                                 unset( $flags['S'] );
01182                         }
01183                         // try to find flags like "zh-hans", "zh-hant"
01184                         // allow syntaxes like "-{zh-hans;zh-hant|XXXX}-"
01185                         $variantFlags = array_intersect( array_keys( $flags ), $this->mConverter->mVariants );
01186                         if ( $variantFlags ) {
01187                                 $variantFlags = array_flip( $variantFlags );
01188                                 $flags = array();
01189                         }
01190                 }
01191                 $this->mVariantFlags = $variantFlags;
01192                 $this->mRules = $text;
01193                 $this->mFlags = $flags;
01194         }
01195 
01200         function parseRules() {
01201                 $rules = $this->mRules;
01202                 $bidtable = array();
01203                 $unidtable = array();
01204                 $variants = $this->mConverter->mVariants;
01205                 $varsep_pattern = $this->mConverter->getVarSeparatorPattern();
01206 
01207                 $choice = preg_split( $varsep_pattern, $rules );
01208 
01209                 foreach ( $choice as $c ) {
01210                         $v  = explode( ':', $c, 2 );
01211                         if ( count( $v ) != 2 ) {
01212                                 // syntax error, skip
01213                                 continue;
01214                         }
01215                         $to = trim( $v[1] );
01216                         $v  = trim( $v[0] );
01217                         $u  = explode( '=>', $v, 2 );
01218                         // if $to is empty, strtr() could return a wrong result
01219                         if ( count( $u ) == 1 && $to && in_array( $v, $variants ) ) {
01220                                 $bidtable[$v] = $to;
01221                         } elseif ( count( $u ) == 2 ) {
01222                                 $from = trim( $u[0] );
01223                                 $v      = trim( $u[1] );
01224                                 if ( array_key_exists( $v, $unidtable )
01225                                          && !is_array( $unidtable[$v] )
01226                                          && $to
01227                                          && in_array( $v, $variants ) ) {
01228                                         $unidtable[$v] = array( $from => $to );
01229                                 } elseif ( $to && in_array( $v, $variants ) ) {
01230                                         $unidtable[$v][$from] = $to;
01231                                 }
01232                         }
01233                         // syntax error, pass
01234                         if ( !isset( $this->mConverter->mVariantNames[$v] ) ) {
01235                                 $bidtable = array();
01236                                 $unidtable = array();
01237                                 break;
01238                         }
01239                 }
01240                 $this->mBidtable = $bidtable;
01241                 $this->mUnidtable = $unidtable;
01242         }
01243 
01249         function getRulesDesc() {
01250                 $codesep = $this->mConverter->mDescCodeSep;
01251                 $varsep = $this->mConverter->mDescVarSep;
01252                 $text = '';
01253                 foreach ( $this->mBidtable as $k => $v ) {
01254                         $text .= $this->mConverter->mVariantNames[$k] . "$codesep$v$varsep";
01255                 }
01256                 foreach ( $this->mUnidtable as $k => $a ) {
01257                         foreach ( $a as $from => $to ) {
01258                                 $text .= $from . '⇒' . $this->mConverter->mVariantNames[$k] .
01259                                         "$codesep$to$varsep";
01260                         }
01261                 }
01262                 return $text;
01263         }
01264 
01273         function getRuleConvertedStr( $variant ) {
01274                 $bidtable = $this->mBidtable;
01275                 $unidtable = $this->mUnidtable;
01276 
01277                 if ( count( $bidtable ) + count( $unidtable ) == 0 ) {
01278                         return $this->mRules;
01279                 } else {
01280                         // display current variant in bidirectional array
01281                         $disp = $this->getTextInBidtable( $variant );
01282                         // or display current variant in fallbacks
01283                         if ( !$disp ) {
01284                                 $disp = $this->getTextInBidtable(
01285                                                 $this->mConverter->getVariantFallbacks( $variant ) );
01286                         }
01287                         // or display current variant in unidirectional array
01288                         if ( !$disp && array_key_exists( $variant, $unidtable ) ) {
01289                                 $disp = array_values( $unidtable[$variant] );
01290                                 $disp = $disp[0];
01291                         }
01292                         // or display frist text under disable manual convert
01293                         if ( !$disp
01294                                  && $this->mConverter->mManualLevel[$variant] == 'disable' ) {
01295                                 if ( count( $bidtable ) > 0 ) {
01296                                         $disp = array_values( $bidtable );
01297                                         $disp = $disp[0];
01298                                 } else {
01299                                         $disp = array_values( $unidtable );
01300                                         $disp = array_values( $disp[0] );
01301                                         $disp = $disp[0];
01302                                 }
01303                         }
01304                         return $disp;
01305                 }
01306         }
01307 
01312         function generateConvTable() {
01313                 // Special case optimisation
01314                 if ( !$this->mBidtable && !$this->mUnidtable ) {
01315                         $this->mConvTable = array();
01316                         return;
01317                 }
01318 
01319                 $bidtable = $this->mBidtable;
01320                 $unidtable = $this->mUnidtable;
01321                 $manLevel = $this->mConverter->mManualLevel;
01322 
01323                 $vmarked = array();
01324                 foreach ( $this->mConverter->mVariants as $v ) {
01325                         /* for bidirectional array
01326                                 fill in the missing variants, if any,
01327                                 with fallbacks */
01328                         if ( !isset( $bidtable[$v] ) ) {
01329                                 $variantFallbacks =
01330                                         $this->mConverter->getVariantFallbacks( $v );
01331                                 $vf = $this->getTextInBidtable( $variantFallbacks );
01332                                 if ( $vf ) {
01333                                         $bidtable[$v] = $vf;
01334                                 }
01335                         }
01336 
01337                         if ( isset( $bidtable[$v] ) ) {
01338                                 foreach ( $vmarked as $vo ) {
01339                                         // use syntax: -{A|zh:WordZh;zh-tw:WordTw}-
01340                                         // or -{H|zh:WordZh;zh-tw:WordTw}-
01341                                         // or -{-|zh:WordZh;zh-tw:WordTw}-
01342                                         // to introduce a custom mapping between
01343                                         // words WordZh and WordTw in the whole text
01344                                         if ( $manLevel[$v] == 'bidirectional' ) {
01345                                                 $this->mConvTable[$v][$bidtable[$vo]] = $bidtable[$v];
01346                                         }
01347                                         if ( $manLevel[$vo] == 'bidirectional' ) {
01348                                                 $this->mConvTable[$vo][$bidtable[$v]] = $bidtable[$vo];
01349                                         }
01350                                 }
01351                                 $vmarked[] = $v;
01352                         }
01353                         /* for unidirectional array fill to convert tables */
01354                         if ( ( $manLevel[$v] == 'bidirectional' || $manLevel[$v] == 'unidirectional' )
01355                                 && isset( $unidtable[$v] ) )
01356                         {
01357                                 if ( isset( $this->mConvTable[$v] ) ) {
01358                                         $this->mConvTable[$v] = array_merge( $this->mConvTable[$v], $unidtable[$v] );
01359                                 } else {
01360                                         $this->mConvTable[$v] = $unidtable[$v];
01361                                 }
01362                         }
01363                 }
01364         }
01365 
01370         public function parse( $variant = null ) {
01371                 if ( !$variant ) {
01372                         $variant = $this->mConverter->getPreferredVariant();
01373                 }
01374 
01375                 $this->parseFlags();
01376                 $flags = $this->mFlags;
01377 
01378                 // convert to specified variant
01379                 // syntax: -{zh-hans;zh-hant[;...]|<text to convert>}-
01380                 if ( $this->mVariantFlags ) {
01381                         // check if current variant in flags
01382                         if ( isset( $this->mVariantFlags[$variant] ) ) {
01383                                 // then convert <text to convert> to current language
01384                                 $this->mRules = $this->mConverter->autoConvert( $this->mRules,
01385                                         $variant );
01386                         } else { // if current variant no in flags,
01387                                    // then we check its fallback variants.
01388                                 $variantFallbacks =
01389                                         $this->mConverter->getVariantFallbacks( $variant );
01390                                 if( is_array( $variantFallbacks ) ) {
01391                                         foreach ( $variantFallbacks as $variantFallback ) {
01392                                                 // if current variant's fallback exist in flags
01393                                                 if ( isset( $this->mVariantFlags[$variantFallback] ) ) {
01394                                                         // then convert <text to convert> to fallback language
01395                                                         $this->mRules =
01396                                                                 $this->mConverter->autoConvert( $this->mRules,
01397                                                                         $variantFallback );
01398                                                         break;
01399                                                 }
01400                                         }
01401                                 }
01402                         }
01403                         $this->mFlags = $flags = array( 'R' => true );
01404                 }
01405 
01406                 if ( !isset( $flags['R'] ) && !isset( $flags['N'] ) ) {
01407                         // decode => HTML entities modified by Sanitizer::removeHTMLtags
01408                         $this->mRules = str_replace( '=&gt;', '=>', $this->mRules );
01409                         $this->parseRules();
01410                 }
01411                 $rules = $this->mRules;
01412 
01413                 if ( !$this->mBidtable && !$this->mUnidtable ) {
01414                         if ( isset( $flags['+'] ) || isset( $flags['-'] ) ) {
01415                                 // fill all variants if text in -{A/H/-|text} without rules
01416                                 foreach ( $this->mConverter->mVariants as $v ) {
01417                                         $this->mBidtable[$v] = $rules;
01418                                 }
01419                         } elseif ( !isset( $flags['N'] ) && !isset( $flags['T'] ) ) {
01420                                 $this->mFlags = $flags = array( 'R' => true );
01421                         }
01422                 }
01423 
01424                 $this->mRuleDisplay = false;
01425                 foreach ( $flags as $flag => $unused ) {
01426                         switch ( $flag ) {
01427                                 case 'R':
01428                                         // if we don't do content convert, still strip the -{}- tags
01429                                         $this->mRuleDisplay = $rules;
01430                                         break;
01431                                 case 'N':
01432                                         // process N flag: output current variant name
01433                                         $ruleVar = trim( $rules );
01434                                         if ( isset( $this->mConverter->mVariantNames[$ruleVar] ) ) {
01435                                                 $this->mRuleDisplay = $this->mConverter->mVariantNames[$ruleVar];
01436                                         } else {
01437                                                 $this->mRuleDisplay = '';
01438                                         }
01439                                         break;
01440                                 case 'D':
01441                                         // process D flag: output rules description
01442                                         $this->mRuleDisplay = $this->getRulesDesc();
01443                                         break;
01444                                 case 'H':
01445                                         // process H,- flag or T only: output nothing
01446                                         $this->mRuleDisplay = '';
01447                                         break;
01448                                 case '-':
01449                                         $this->mRulesAction = 'remove';
01450                                         $this->mRuleDisplay = '';
01451                                         break;
01452                                 case '+':
01453                                         $this->mRulesAction = 'add';
01454                                         $this->mRuleDisplay = '';
01455                                         break;
01456                                 case 'S':
01457                                         $this->mRuleDisplay = $this->getRuleConvertedStr( $variant );
01458                                         break;
01459                                 case 'T':
01460                                         $this->mRuleTitle = $this->getRuleConvertedStr( $variant );
01461                                         $this->mRuleDisplay = '';
01462                                         break;
01463                                 default:
01464                                         // ignore unknown flags (but see error case below)
01465                         }
01466                 }
01467                 if ( $this->mRuleDisplay === false ) {
01468                         $this->mRuleDisplay = $this->mManualCodeError;
01469                 }
01470 
01471                 $this->generateConvTable();
01472         }
01473 
01477         public function hasRules() {
01478                 // TODO:
01479         }
01480 
01485         public function getDisplay() {
01486                 return $this->mRuleDisplay;
01487         }
01488 
01493         public function getTitle() {
01494                 return $this->mRuleTitle;
01495         }
01496 
01501         public function getRulesAction() {
01502                 return $this->mRulesAction;
01503         }
01504 
01510         public function getConvTable() {
01511                 return $this->mConvTable;
01512         }
01513 
01518         public function getRules() {
01519                 return $this->mRules;
01520         }
01521 
01526         public function getFlags() {
01527                 return $this->mFlags;
01528         }
01529 }