MediaWiki  REL1_20
LanguageConverter.php
Go to the documentation of this file.
00001 <?php
00031 class LanguageConverter {
00032 
00038         static public $languagesWithVariants = array(
00039                 'gan',
00040                 'iu',
00041                 'kk',
00042                 'ku',
00043                 'shi',
00044                 'sr',
00045                 'tg',
00046                 'uz',
00047                 'zh',
00048         );
00049 
00050         var $mMainLanguageCode;
00051         var $mVariants, $mVariantFallbacks, $mVariantNames;
00052         var $mTablesLoaded = false;
00053         var $mTables;
00054         // 'bidirectional' 'unidirectional' 'disable' for each variant
00055         var $mManualLevel;
00056 
00060         var $mCacheKey;
00061 
00062         var $mLangObj;
00063         var $mFlags;
00064         var $mDescCodeSep = ':', $mDescVarSep = ';';
00065         var $mUcfirst = false;
00066         var $mConvRuleTitle = false;
00067         var $mURLVariant;
00068         var $mUserVariant;
00069         var $mHeaderVariant;
00070         var $mMaxDepth = 10;
00071         var $mVarSeparatorPattern;
00072 
00073         const CACHE_VERSION_KEY = 'VERSION 6';
00074 
00085         public function __construct( $langobj, $maincode, $variants = array(),
00086                                                                 $variantfallbacks = array(), $flags = array(),
00087                                                                 $manualLevel = array() ) {
00088                 global $wgDisabledVariants;
00089                 $this->mLangObj = $langobj;
00090                 $this->mMainLanguageCode = $maincode;
00091                 $this->mVariants = array_diff( $variants, $wgDisabledVariants );
00092                 $this->mVariantFallbacks = $variantfallbacks;
00093                 $this->mVariantNames = Language::fetchLanguageNames();
00094                 $this->mCacheKey = wfMemcKey( 'conversiontables', $maincode );
00095                 $defaultflags = array(
00096                         // 'S' show converted text
00097                         // '+' add rules for alltext
00098                         // 'E' the gave flags is error
00099                         // these flags above are reserved for program
00100                         'A' => 'A',       // add rule for convert code (all text convert)
00101                         'T' => 'T',       // title convert
00102                         'R' => 'R',       // raw content
00103                         'D' => 'D',       // convert description (subclass implement)
00104                         '-' => '-',       // remove convert (not implement)
00105                         'H' => 'H',       // add rule for convert code
00106                                                   // (but no display in placed code)
00107                         'N' => 'N'        // current variant name
00108                 );
00109                 $this->mFlags = array_merge( $defaultflags, $flags );
00110                 foreach ( $this->mVariants as $v ) {
00111                         if ( array_key_exists( $v, $manualLevel ) ) {
00112                                 $this->mManualLevel[$v] = $manualLevel[$v];
00113                         } else {
00114                                 $this->mManualLevel[$v] = 'bidirectional';
00115                         }
00116                         $this->mFlags[$v] = $v;
00117                 }
00118         }
00119 
00126         public function getVariants() {
00127                 return $this->mVariants;
00128         }
00129 
00141         public function getVariantFallbacks( $variant ) {
00142                 if ( isset( $this->mVariantFallbacks[$variant] ) ) {
00143                         return $this->mVariantFallbacks[$variant];
00144                 }
00145                 return $this->mMainLanguageCode;
00146         }
00147 
00152         public function getConvRuleTitle() {
00153                 return $this->mConvRuleTitle;
00154         }
00155 
00160         public function getPreferredVariant() {
00161                 global $wgDefaultLanguageVariant, $wgUser;
00162 
00163                 $req = $this->getURLVariant();
00164 
00165                 if ( $wgUser->isLoggedIn() && !$req ) {
00166                         $req = $this->getUserVariant();
00167                 } elseif ( !$req ) {
00168                         $req = $this->getHeaderVariant();
00169                 }
00170 
00171                 if ( $wgDefaultLanguageVariant && !$req ) {
00172                         $req = $this->validateVariant( $wgDefaultLanguageVariant );
00173                 }
00174 
00175                 // This function, unlike the other get*Variant functions, is
00176                 // not memoized (i.e. there return value is not cached) since
00177                 // new information might appear during processing after this
00178                 // is first called.
00179                 if ( $this->validateVariant( $req ) ) {
00180                         return $req;
00181                 }
00182                 return $this->mMainLanguageCode;
00183         }
00184 
00190         public function getDefaultVariant() {
00191                 global $wgDefaultLanguageVariant;
00192 
00193                 $req = $this->getURLVariant();
00194 
00195                 if ( $wgDefaultLanguageVariant && !$req ) {
00196                         $req = $this->validateVariant( $wgDefaultLanguageVariant );
00197                 }
00198 
00199                 if ( $req ) {
00200                         return $req;
00201                 }
00202                 return $this->mMainLanguageCode;
00203         }
00204 
00210         public function validateVariant( $variant = null ) {
00211                 if ( $variant !== null && in_array( $variant, $this->mVariants ) ) {
00212                         return $variant;
00213                 }
00214                 return null;
00215         }
00216 
00222         public function getURLVariant() {
00223                 global $wgRequest;
00224 
00225                 if ( $this->mURLVariant ) {
00226                         return $this->mURLVariant;
00227                 }
00228 
00229                 // see if the preference is set in the request
00230                 $ret = $wgRequest->getText( 'variant' );
00231 
00232                 if ( !$ret ) {
00233                         $ret = $wgRequest->getVal( 'uselang' );
00234                 }
00235 
00236                 return $this->mURLVariant = $this->validateVariant( $ret );
00237         }
00238 
00244         protected function getUserVariant() {
00245                 global $wgUser;
00246 
00247                 // memoizing this function wreaks havoc on parserTest.php
00248                 /*
00249                 if ( $this->mUserVariant ) {
00250                         return $this->mUserVariant;
00251                 }
00252                 */
00253 
00254                 // Get language variant preference from logged in users
00255                 // Don't call this on stub objects because that causes infinite
00256                 // recursion during initialisation
00257                 if ( $wgUser->isLoggedIn() )  {
00258                         $ret = $wgUser->getOption( 'variant' );
00259                 } else {
00260                         // figure out user lang without constructing wgLang to avoid
00261                         // infinite recursion
00262                         $ret = $wgUser->getOption( 'language' );
00263                 }
00264 
00265                 return $this->mUserVariant = $this->validateVariant( $ret );
00266         }
00267 
00273         protected function getHeaderVariant() {
00274                 global $wgRequest;
00275 
00276                 if ( $this->mHeaderVariant ) {
00277                         return $this->mHeaderVariant;
00278                 }
00279 
00280                 // see if some supported language variant is set in the
00281                 // HTTP header.
00282                 $languages = array_keys( $wgRequest->getAcceptLang() );
00283                 if ( empty( $languages ) ) {
00284                         return null;
00285                 }
00286 
00287                 $fallbackLanguages = array();
00288                 foreach ( $languages as $language ) {
00289                         $this->mHeaderVariant = $this->validateVariant( $language );
00290                         if ( $this->mHeaderVariant ) {
00291                                 break;
00292                         }
00293 
00294                         // To see if there are fallbacks of current language.
00295                         // We record these fallback variants, and process
00296                         // them later.
00297                         $fallbacks = $this->getVariantFallbacks( $language );
00298                         if ( is_string( $fallbacks ) && $fallbacks !== $this->mMainLanguageCode ) {
00299                                 $fallbackLanguages[] = $fallbacks;
00300                         } elseif ( is_array( $fallbacks ) ) {
00301                                 $fallbackLanguages =
00302                                         array_merge( $fallbackLanguages, $fallbacks );
00303                         }
00304                 }
00305 
00306                 if ( !$this->mHeaderVariant ) {
00307                         // process fallback languages now
00308                         $fallback_languages = array_unique( $fallbackLanguages );
00309                         foreach ( $fallback_languages as $language ) {
00310                                 $this->mHeaderVariant = $this->validateVariant( $language );
00311                                 if ( $this->mHeaderVariant ) {
00312                                         break;
00313                                 }
00314                         }
00315                 }
00316 
00317                 return $this->mHeaderVariant;
00318         }
00319 
00330         public function autoConvert( $text, $toVariant = false ) {
00331                 wfProfileIn( __METHOD__ );
00332 
00333                 $this->loadTables();
00334 
00335                 if ( !$toVariant ) {
00336                         $toVariant = $this->getPreferredVariant();
00337                         if ( !$toVariant ) {
00338                                 wfProfileOut( __METHOD__ );
00339                                 return $text;
00340                         }
00341                 }
00342 
00343                 if( $this->guessVariant( $text, $toVariant ) ) {
00344                         wfProfileOut( __METHOD__ );
00345                         return $text;
00346                 }
00347 
00348                 /* we convert everything except:
00349                    1. HTML markups (anything between < and >)
00350                    2. HTML entities
00351                    3. placeholders created by the parser
00352                 */
00353                 global $wgParser;
00354                 if ( isset( $wgParser ) && $wgParser->UniqPrefix() != '' ) {
00355                         $marker = '|' . $wgParser->UniqPrefix() . '[\-a-zA-Z0-9]+';
00356                 } else {
00357                         $marker = '';
00358                 }
00359 
00360                 // this one is needed when the text is inside an HTML markup
00361                 $htmlfix = '|<[^>]+$|^[^<>]*>';
00362 
00363                 // disable convert to variants between <code></code> tags
00364                 $codefix = '<code>.+?<\/code>|';
00365                 // disable convertsion of <script type="text/javascript"> ... </script>
00366                 $scriptfix = '<script.*?>.*?<\/script>|';
00367                 // disable conversion of <pre xxxx> ... </pre>
00368                 $prefix = '<pre.*?>.*?<\/pre>|';
00369 
00370                 $reg = '/' . $codefix . $scriptfix . $prefix .
00371                         '<[^>]+>|&[a-zA-Z#][a-z0-9]+;' . $marker . $htmlfix . '/s';
00372                 $startPos = 0;
00373                 $sourceBlob = '';
00374                 $literalBlob = '';
00375 
00376                 // Guard against delimiter nulls in the input
00377                 $text = str_replace( "\000", '', $text );
00378 
00379                 $markupMatches = null;
00380                 $elementMatches = null;
00381                 while ( $startPos < strlen( $text ) ) {
00382                         if ( preg_match( $reg, $text, $markupMatches, PREG_OFFSET_CAPTURE, $startPos ) ) {
00383                                 $elementPos = $markupMatches[0][1];
00384                                 $element = $markupMatches[0][0];
00385                         } else {
00386                                 $elementPos = strlen( $text );
00387                                 $element = '';
00388                         }
00389 
00390                         // Queue the part before the markup for translation in a batch
00391                         $sourceBlob .= substr( $text, $startPos, $elementPos - $startPos ) . "\000";
00392 
00393                         // Advance to the next position
00394                         $startPos = $elementPos + strlen( $element );
00395 
00396                         // Translate any alt or title attributes inside the matched element
00397                         if ( $element !== '' && preg_match( '/^(<[^>\s]*)\s([^>]*)(.*)$/', $element,
00398                                 $elementMatches ) )
00399                         {
00400                                 $attrs = Sanitizer::decodeTagAttributes( $elementMatches[2] );
00401                                 $changed = false;
00402                                 foreach ( array( 'title', 'alt' ) as $attrName ) {
00403                                         if ( !isset( $attrs[$attrName] ) ) {
00404                                                 continue;
00405                                         }
00406                                         $attr = $attrs[$attrName];
00407                                         // Don't convert URLs
00408                                         if ( !strpos( $attr, '://' ) ) {
00409                                                 $attr = $this->translate( $attr, $toVariant );
00410                                         }
00411 
00412                                         // Remove HTML tags to avoid disrupting the layout
00413                                         $attr = preg_replace( '/<[^>]+>/', '', $attr );
00414                                         if ( $attr !== $attrs[$attrName] ) {
00415                                                 $attrs[$attrName] = $attr;
00416                                                 $changed = true;
00417                                         }
00418                                 }
00419                                 if ( $changed ) {
00420                                         $element = $elementMatches[1] . Html::expandAttributes( $attrs ) .
00421                                                 $elementMatches[3];
00422                                 }
00423                         }
00424                         $literalBlob .= $element . "\000";
00425                 }
00426 
00427                 // Do the main translation batch
00428                 $translatedBlob = $this->translate( $sourceBlob, $toVariant );
00429 
00430                 // Put the output back together
00431                 $translatedIter = StringUtils::explode( "\000", $translatedBlob );
00432                 $literalIter = StringUtils::explode( "\000", $literalBlob );
00433                 $output = '';
00434                 while ( $translatedIter->valid() && $literalIter->valid() ) {
00435                         $output .= $translatedIter->current();
00436                         $output .= $literalIter->current();
00437                         $translatedIter->next();
00438                         $literalIter->next();
00439                 }
00440 
00441                 wfProfileOut( __METHOD__ );
00442                 return $output;
00443         }
00444 
00454         public function translate( $text, $variant ) {
00455                 wfProfileIn( __METHOD__ );
00456                 // If $text is empty or only includes spaces, do nothing
00457                 // Otherwise translate it
00458                 if ( trim( $text ) ) {
00459                         $this->loadTables();
00460                         $text = $this->mTables[$variant]->replace( $text );
00461                 }
00462                 wfProfileOut( __METHOD__ );
00463                 return $text;
00464         }
00465 
00472         public function autoConvertToAllVariants( $text ) {
00473                 wfProfileIn( __METHOD__ );
00474                 $this->loadTables();
00475 
00476                 $ret = array();
00477                 foreach ( $this->mVariants as $variant ) {
00478                         $ret[$variant] = $this->translate( $text, $variant );
00479                 }
00480 
00481                 wfProfileOut( __METHOD__ );
00482                 return $ret;
00483         }
00484 
00496         public function convertLinkToAllVariants( $text ) {
00497                 return $this->autoConvertToAllVariants( $text );
00498         }
00499 
00505         protected function applyManualConv( $convRule ) {
00506                 // Use syntax -{T|zh-cn:TitleCN; zh-tw:TitleTw}- to custom
00507                 // title conversion.
00508                 // Bug 24072: $mConvRuleTitle was overwritten by other manual
00509                 // rule(s) not for title, this breaks the title conversion.
00510                 $newConvRuleTitle = $convRule->getTitle();
00511                 if ( $newConvRuleTitle ) {
00512                         // So I add an empty check for getTitle()
00513                         $this->mConvRuleTitle = $newConvRuleTitle;
00514                 }
00515 
00516                 // merge/remove manual conversion rules to/from global table
00517                 $convTable = $convRule->getConvTable();
00518                 $action = $convRule->getRulesAction();
00519                 foreach ( $convTable as $variant => $pair ) {
00520                         if ( !$this->validateVariant( $variant ) ) {
00521                                 continue;
00522                         }
00523 
00524                         if ( $action == 'add' ) {
00525                                 foreach ( $pair as $from => $to ) {
00526                                         // to ensure that $from and $to not be left blank
00527                                         // so $this->translate() could always return a string
00528                                         if ( $from || $to ) {
00529                                                 // more efficient than array_merge(), about 2.5 times.
00530                                                 $this->mTables[$variant]->setPair( $from, $to );
00531                                         }
00532                                 }
00533                         } elseif ( $action == 'remove' ) {
00534                                 $this->mTables[$variant]->removeArray( $pair );
00535                         }
00536                 }
00537         }
00538 
00546         public function convertTitle( $title ) {
00547                 $variant = $this->getPreferredVariant();
00548                 $index = $title->getNamespace();
00549                 if ( $index === NS_MAIN ) {
00550                         $text = '';
00551                 } else {
00552                         // first let's check if a message has given us a converted name
00553                         $nsConvMsg = wfMessage( 'conversion-ns' . $index )->inContentLanguage();
00554                         if ( $nsConvMsg->exists() ) {
00555                                 $text = $nsConvMsg->plain();
00556                         } else {
00557                                 // the message does not exist, try retrieve it from the current
00558                                 // variant's namespace names.
00559                                 $langObj = $this->mLangObj->factory( $variant );
00560                                 $text = $langObj->getFormattedNsText( $index );
00561                         }
00562                         $text .= ':';
00563                 }
00564                 $text .= $title->getText();
00565                 $text = $this->translate( $text, $variant );
00566                 return $text;
00567         }
00568 
00583         public function convert( $text ) {
00584                 $variant = $this->getPreferredVariant();
00585                 return $this->convertTo( $text, $variant );
00586         }
00587 
00595         public function convertTo( $text, $variant ) {
00596                 global $wgDisableLangConversion;
00597                 if ( $wgDisableLangConversion ) {
00598                         return $text;
00599                 }
00600                 return $this->recursiveConvertTopLevel( $text, $variant );
00601         }
00602 
00612         protected function recursiveConvertTopLevel( $text, $variant, $depth = 0 ) {
00613                 $startPos = 0;
00614                 $out = '';
00615                 $length = strlen( $text );
00616                 $shouldConvert = !$this->guessVariant( $text, $variant );
00617 
00618                 while ( $startPos < $length ) {
00619                         $pos = strpos( $text, '-{', $startPos );
00620 
00621                         if ( $pos === false ) {
00622                                 // No more markup, append final segment
00623                                 $fragment = substr( $text, $startPos );
00624                                 $out .= $shouldConvert? $this->autoConvert( $fragment, $variant ): $fragment;
00625                                 return $out;
00626                         }
00627 
00628                         // Markup found
00629                         // Append initial segment
00630                         $fragment = substr( $text, $startPos, $pos - $startPos );
00631                         $out .= $shouldConvert? $this->autoConvert( $fragment, $variant ): $fragment;
00632 
00633                         // Advance position
00634                         $startPos = $pos;
00635 
00636                         // Do recursive conversion
00637                         $out .= $this->recursiveConvertRule( $text, $variant, $startPos, $depth + 1 );
00638                 }
00639 
00640                 return $out;
00641         }
00642 
00654         protected function recursiveConvertRule( $text, $variant, &$startPos, $depth = 0 ) {
00655                 // Quick sanity check (no function calls)
00656                 if ( $text[$startPos] !== '-' || $text[$startPos + 1] !== '{' ) {
00657                         throw new MWException( __METHOD__ . ': invalid input string' );
00658                 }
00659 
00660                 $startPos += 2;
00661                 $inner = '';
00662                 $warningDone = false;
00663                 $length = strlen( $text );
00664 
00665                 while ( $startPos < $length ) {
00666                         $m = false;
00667                         preg_match( '/-\{|\}-/', $text, $m,  PREG_OFFSET_CAPTURE, $startPos );
00668                         if ( !$m ) {
00669                                 // Unclosed rule
00670                                 break;
00671                         }
00672 
00673                         $token = $m[0][0];
00674                         $pos = $m[0][1];
00675 
00676                         // Markup found
00677                         // Append initial segment
00678                         $inner .= substr( $text, $startPos, $pos - $startPos );
00679 
00680                         // Advance position
00681                         $startPos = $pos;
00682 
00683                         switch ( $token ) {
00684                                 case '-{':
00685                                         // Check max depth
00686                                         if ( $depth >= $this->mMaxDepth ) {
00687                                                 $inner .= '-{';
00688                                                 if ( !$warningDone ) {
00689                                                         $inner .= '<span class="error">' .
00690                                                                 wfMessage( 'language-converter-depth-warning' )
00691                                                                         ->numParams( $this->mMaxDepth )->inContentLanguage()->text() .
00692                                                                 '</span>';
00693                                                         $warningDone = true;
00694                                                 }
00695                                                 $startPos += 2;
00696                                                 continue;
00697                                         }
00698                                         // Recursively parse another rule
00699                                         $inner .= $this->recursiveConvertRule( $text, $variant, $startPos, $depth + 1 );
00700                                         break;
00701                                 case '}-':
00702                                         // Apply the rule
00703                                         $startPos += 2;
00704                                         $rule = new ConverterRule( $inner, $this );
00705                                         $rule->parse( $variant );
00706                                         $this->applyManualConv( $rule );
00707                                         return $rule->getDisplay();
00708                                 default:
00709                                         throw new MWException( __METHOD__ . ': invalid regex match' );
00710                         }
00711                 }
00712 
00713                 // Unclosed rule
00714                 if ( $startPos < $length ) {
00715                         $inner .= substr( $text, $startPos );
00716                 }
00717                 $startPos = $length;
00718                 return '-{' . $this->autoConvert( $inner, $variant );
00719         }
00720 
00732         public function findVariantLink( &$link, &$nt, $ignoreOtherCond = false ) {
00733                 # If the article has already existed, there is no need to
00734                 # check it again, otherwise it may cause a fault.
00735                 if ( is_object( $nt ) && $nt->exists() ) {
00736                         return;
00737                 }
00738 
00739                 global $wgDisableLangConversion, $wgDisableTitleConversion, $wgRequest,
00740                         $wgUser;
00741                 $isredir = $wgRequest->getText( 'redirect', 'yes' );
00742                 $action = $wgRequest->getText( 'action' );
00743                 $linkconvert = $wgRequest->getText( 'linkconvert', 'yes' );
00744                 $disableLinkConversion = $wgDisableLangConversion
00745                         || $wgDisableTitleConversion;
00746                 $linkBatch = new LinkBatch();
00747 
00748                 $ns = NS_MAIN;
00749 
00750                 if ( $disableLinkConversion ||
00751                          ( !$ignoreOtherCond &&
00752                            ( $isredir == 'no'
00753                                  || $action == 'edit'
00754                                  || $action == 'submit'
00755                                  || $linkconvert == 'no'
00756                                  || $wgUser->getOption( 'noconvertlink' ) == 1 ) ) ) {
00757                         return;
00758                 }
00759 
00760                 if ( is_object( $nt ) ) {
00761                         $ns = $nt->getNamespace();
00762                 }
00763 
00764                 $variants = $this->autoConvertToAllVariants( $link );
00765                 if ( !$variants ) { // give up
00766                         return;
00767                 }
00768 
00769                 $titles = array();
00770 
00771                 foreach ( $variants as $v ) {
00772                         if ( $v != $link ) {
00773                                 $varnt = Title::newFromText( $v, $ns );
00774                                 if ( !is_null( $varnt ) ) {
00775                                         $linkBatch->addObj( $varnt );
00776                                         $titles[] = $varnt;
00777                                 }
00778                         }
00779                 }
00780 
00781                 // fetch all variants in single query
00782                 $linkBatch->execute();
00783 
00784                 foreach ( $titles as $varnt ) {
00785                         if ( $varnt->getArticleID() > 0 ) {
00786                                 $nt = $varnt;
00787                                 $link = $varnt->getText();
00788                                 break;
00789                         }
00790                 }
00791         }
00792 
00798         public function getExtraHashOptions() {
00799                 $variant = $this->getPreferredVariant();
00800                 return '!' . $variant;
00801         }
00802 
00813         public function guessVariant($text, $variant) {
00814                 return false;
00815         }
00816 
00824         function loadDefaultTables() {
00825                 $name = get_class( $this );
00826                 throw new MWException( "Must implement loadDefaultTables() method in class $name" );
00827         }
00828 
00834         function loadTables( $fromCache = true ) {
00835                 global $wgLangConvMemc;
00836 
00837                 if ( $this->mTablesLoaded ) {
00838                         return;
00839                 }
00840 
00841                 wfProfileIn( __METHOD__ );
00842                 $this->mTablesLoaded = true;
00843                 $this->mTables = false;
00844                 if ( $fromCache ) {
00845                         wfProfileIn( __METHOD__ . '-cache' );
00846                         $this->mTables = $wgLangConvMemc->get( $this->mCacheKey );
00847                         wfProfileOut( __METHOD__ . '-cache' );
00848                 }
00849                 if ( !$this->mTables
00850                          || !array_key_exists( self::CACHE_VERSION_KEY, $this->mTables ) ) {
00851                         wfProfileIn( __METHOD__ . '-recache' );
00852                         // not in cache, or we need a fresh reload.
00853                         // We will first load the default tables
00854                         // then update them using things in MediaWiki:Conversiontable/*
00855                         $this->loadDefaultTables();
00856                         foreach ( $this->mVariants as $var ) {
00857                                 $cached = $this->parseCachedTable( $var );
00858                                 $this->mTables[$var]->mergeArray( $cached );
00859                         }
00860 
00861                         $this->postLoadTables();
00862                         $this->mTables[self::CACHE_VERSION_KEY] = true;
00863 
00864                         $wgLangConvMemc->set( $this->mCacheKey, $this->mTables, 43200 );
00865                         wfProfileOut( __METHOD__ . '-recache' );
00866                 }
00867                 wfProfileOut( __METHOD__ );
00868         }
00869 
00873         function postLoadTables() { }
00874 
00880         function reloadTables() {
00881                 if ( $this->mTables ) {
00882                         unset( $this->mTables );
00883                 }
00884                 $this->mTablesLoaded = false;
00885                 $this->loadTables( false );
00886         }
00887 
00907         function parseCachedTable( $code, $subpage = '', $recursive = true ) {
00908                 static $parsed = array();
00909 
00910                 $key = 'Conversiontable/' . $code;
00911                 if ( $subpage ) {
00912                         $key .= '/' . $subpage;
00913                 }
00914                 if ( array_key_exists( $key, $parsed ) ) {
00915                         return array();
00916                 }
00917 
00918                 $parsed[$key] = true;
00919 
00920                 if ( $subpage === '' ) {
00921                         $txt = MessageCache::singleton()->get( 'conversiontable', true, $code );
00922                 } else {
00923                         $txt = false;
00924                         $title = Title::makeTitleSafe( NS_MEDIAWIKI, $key );
00925                         if ( $title && $title->exists() ) {
00926                                 $revision = Revision::newFromTitle( $title );
00927                                 if ( $revision ) {
00928                                         $txt = $revision->getRawText();
00929                                 }
00930                         }
00931                 }
00932 
00933                 # Nothing to parse if there's no text
00934                 if ( $txt === false || $txt === null || $txt === '' ) {
00935                         return array();
00936                 }
00937 
00938                 // get all subpage links of the form
00939                 // [[MediaWiki:Conversiontable/zh-xx/...|...]]
00940                 $linkhead = $this->mLangObj->getNsText( NS_MEDIAWIKI ) .
00941                         ':Conversiontable';
00942                 $subs = StringUtils::explode( '[[', $txt );
00943                 $sublinks = array();
00944                 foreach ( $subs as $sub ) {
00945                         $link = explode( ']]', $sub, 2 );
00946                         if ( count( $link ) != 2 ) {
00947                                 continue;
00948                         }
00949                         $b = explode( '|', $link[0], 2 );
00950                         $b = explode( '/', trim( $b[0] ), 3 );
00951                         if ( count( $b ) == 3 ) {
00952                                 $sublink = $b[2];
00953                         } else {
00954                                 $sublink = '';
00955                         }
00956 
00957                         if ( $b[0] == $linkhead && $b[1] == $code ) {
00958                                 $sublinks[] = $sublink;
00959                         }
00960                 }
00961 
00962                 // parse the mappings in this page
00963                 $blocks = StringUtils::explode( '-{', $txt );
00964                 $ret = array();
00965                 $first = true;
00966                 foreach ( $blocks as $block ) {
00967                         if ( $first ) {
00968                                 // Skip the part before the first -{
00969                                 $first = false;
00970                                 continue;
00971                         }
00972                         $mappings = explode( '}-', $block, 2 );
00973                         $stripped = str_replace( array( "'", '"', '*', '#' ), '',
00974                                                                          $mappings[0] );
00975                         $table = StringUtils::explode( ';', $stripped );
00976                         foreach ( $table as $t ) {
00977                                 $m = explode( '=>', $t, 3 );
00978                                 if ( count( $m ) != 2 ) {
00979                                         continue;
00980                                 }
00981                                 // trim any trailling comments starting with '//'
00982                                 $tt = explode( '//', $m[1], 2 );
00983                                 $ret[trim( $m[0] )] = trim( $tt[0] );
00984                         }
00985                 }
00986 
00987                 // recursively parse the subpages
00988                 if ( $recursive ) {
00989                         foreach ( $sublinks as $link ) {
00990                                 $s = $this->parseCachedTable( $code, $link, $recursive );
00991                                 $ret = array_merge( $ret, $s );
00992                         }
00993                 }
00994 
00995                 if ( $this->mUcfirst ) {
00996                         foreach ( $ret as $k => $v ) {
00997                                 $ret[$this->mLangObj->ucfirst( $k )] = $this->mLangObj->ucfirst( $v );
00998                         }
00999                 }
01000                 return $ret;
01001         }
01002 
01011         public function markNoConversion( $text, $noParse = false ) {
01012                 # don't mark if already marked
01013                 if ( strpos( $text, '-{' ) || strpos( $text, '}-' ) ) {
01014                         return $text;
01015                 }
01016 
01017                 $ret = "-{R|$text}-";
01018                 return $ret;
01019         }
01020 
01029         function convertCategoryKey( $key ) {
01030                 return $key;
01031         }
01032 
01049         function OnArticleSaveComplete( $article, $user, $text, $summary, $isMinor,
01050                         $isWatch, $section, $flags, $revision ) {
01051                 $titleobj = $article->getTitle();
01052                 if ( $titleobj->getNamespace() == NS_MEDIAWIKI ) {
01053                         $title = $titleobj->getDBkey();
01054                         $t = explode( '/', $title, 3 );
01055                         $c = count( $t );
01056                         if ( $c > 1 && $t[0] == 'Conversiontable' ) {
01057                                 if ( $this->validateVariant( $t[1] ) ) {
01058                                         $this->reloadTables();
01059                                 }
01060                         }
01061                 }
01062                 return true;
01063         }
01064 
01073         public function armourMath( $text ) {
01074                 // convert '-{' and '}-' to '-&#123;' and '&#125;-' to prevent
01075                 // any unwanted markup appearing in the math image tag.
01076                 $text = strtr( $text, array( '-{' => '-&#123;', '}-' => '&#125;-' ) );
01077                 return $text;
01078         }
01079 
01083         function getVarSeparatorPattern() {
01084                 if ( is_null( $this->mVarSeparatorPattern ) ) {
01085                         // varsep_pattern for preg_split:
01086                         // text should be splited by ";" only if a valid variant
01087                         // name exist after the markup, for example:
01088                         //  -{zh-hans:<span style="font-size:120%;">xxx</span>;zh-hant:\
01089                         //      <span style="font-size:120%;">yyy</span>;}-
01090                         // we should split it as:
01091                         //  array(
01092                         //        [0] => 'zh-hans:<span style="font-size:120%;">xxx</span>'
01093                         //        [1] => 'zh-hant:<span style="font-size:120%;">yyy</span>'
01094                         //        [2] => ''
01095                         //       )
01096                         $pat = '/;\s*(?=';
01097                         foreach ( $this->mVariants as $variant ) {
01098                                 // zh-hans:xxx;zh-hant:yyy
01099                                 $pat .= $variant . '\s*:|';
01100                                 // xxx=>zh-hans:yyy; xxx=>zh-hant:zzz
01101                                 $pat .= '[^;]*?=>\s*' . $variant . '\s*:|';
01102                         }
01103                         $pat .= '\s*$)/';
01104                         $this->mVarSeparatorPattern = $pat;
01105                 }
01106                 return $this->mVarSeparatorPattern;
01107         }
01108 }
01109 
01115 class ConverterRule {
01116         var $mText; // original text in -{text}-
01117         var $mConverter; // LanguageConverter object
01118         var $mRuleDisplay = '';
01119         var $mRuleTitle = false;
01120         var $mRules = '';// string : the text of the rules
01121         var $mRulesAction = 'none';
01122         var $mFlags = array();
01123         var $mVariantFlags = array();
01124         var $mConvTable = array();
01125         var $mBidtable = array();// array of the translation in each variant
01126         var $mUnidtable = array();// array of the translation in each variant
01127 
01134         public function __construct( $text, $converter ) {
01135                 $this->mText = $text;
01136                 $this->mConverter = $converter;
01137         }
01138 
01145         public function getTextInBidtable( $variants ) {
01146                 $variants = (array)$variants;
01147                 if ( !$variants ) {
01148                         return false;
01149                 }
01150                 foreach ( $variants as $variant ) {
01151                         if ( isset( $this->mBidtable[$variant] ) ) {
01152                                 return $this->mBidtable[$variant];
01153                         }
01154                 }
01155                 return false;
01156         }
01157 
01162         function parseFlags() {
01163                 $text = $this->mText;
01164                 $flags = array();
01165                 $variantFlags = array();
01166 
01167                 $sepPos = strpos( $text, '|' );
01168                 if ( $sepPos !== false ) {
01169                         $validFlags = $this->mConverter->mFlags;
01170                         $f = StringUtils::explode( ';', substr( $text, 0, $sepPos ) );
01171                         foreach ( $f as $ff ) {
01172                                 $ff = trim( $ff );
01173                                 if ( isset( $validFlags[$ff] ) ) {
01174                                         $flags[$validFlags[$ff]] = true;
01175                                 }
01176                         }
01177                         $text = strval( substr( $text, $sepPos + 1 ) );
01178                 }
01179 
01180                 if ( !$flags ) {
01181                         $flags['S'] = true;
01182                 } elseif ( isset( $flags['R'] ) ) {
01183                         $flags = array( 'R' => true );// remove other flags
01184                 } elseif ( isset( $flags['N'] ) ) {
01185                         $flags = array( 'N' => true );// remove other flags
01186                 } elseif ( isset( $flags['-'] ) ) {
01187                         $flags = array( '-' => true );// remove other flags
01188                 } elseif ( count( $flags ) == 1 && isset( $flags['T'] ) ) {
01189                         $flags['H'] = true;
01190                 } elseif ( isset( $flags['H'] ) ) {
01191                         // replace A flag, and remove other flags except T
01192                         $temp = array( '+' => true, 'H' => true );
01193                         if ( isset( $flags['T'] ) ) {
01194                                 $temp['T'] = true;
01195                         }
01196                         if ( isset( $flags['D'] ) ) {
01197                                 $temp['D'] = true;
01198                         }
01199                         $flags = $temp;
01200                 } else {
01201                         if ( isset( $flags['A'] ) ) {
01202                                 $flags['+'] = true;
01203                                 $flags['S'] = true;
01204                         }
01205                         if ( isset( $flags['D'] ) ) {
01206                                 unset( $flags['S'] );
01207                         }
01208                         // try to find flags like "zh-hans", "zh-hant"
01209                         // allow syntaxes like "-{zh-hans;zh-hant|XXXX}-"
01210                         $variantFlags = array_intersect( array_keys( $flags ), $this->mConverter->mVariants );
01211                         if ( $variantFlags ) {
01212                                 $variantFlags = array_flip( $variantFlags );
01213                                 $flags = array();
01214                         }
01215                 }
01216                 $this->mVariantFlags = $variantFlags;
01217                 $this->mRules = $text;
01218                 $this->mFlags = $flags;
01219         }
01220 
01225         function parseRules() {
01226                 $rules = $this->mRules;
01227                 $bidtable = array();
01228                 $unidtable = array();
01229                 $variants = $this->mConverter->mVariants;
01230                 $varsep_pattern = $this->mConverter->getVarSeparatorPattern();
01231 
01232                 $choice = preg_split( $varsep_pattern, $rules );
01233 
01234                 foreach ( $choice as $c ) {
01235                         $v  = explode( ':', $c, 2 );
01236                         if ( count( $v ) != 2 ) {
01237                                 // syntax error, skip
01238                                 continue;
01239                         }
01240                         $to = trim( $v[1] );
01241                         $v  = trim( $v[0] );
01242                         $u  = explode( '=>', $v, 2 );
01243                         // if $to is empty, strtr() could return a wrong result
01244                         if ( count( $u ) == 1 && $to && in_array( $v, $variants ) ) {
01245                                 $bidtable[$v] = $to;
01246                         } elseif ( count( $u ) == 2 ) {
01247                                 $from = trim( $u[0] );
01248                                 $v      = trim( $u[1] );
01249                                 if ( array_key_exists( $v, $unidtable )
01250                                          && !is_array( $unidtable[$v] )
01251                                          && $to
01252                                          && in_array( $v, $variants ) ) {
01253                                         $unidtable[$v] = array( $from => $to );
01254                                 } elseif ( $to && in_array( $v, $variants ) ) {
01255                                         $unidtable[$v][$from] = $to;
01256                                 }
01257                         }
01258                         // syntax error, pass
01259                         if ( !isset( $this->mConverter->mVariantNames[$v] ) ) {
01260                                 $bidtable = array();
01261                                 $unidtable = array();
01262                                 break;
01263                         }
01264                 }
01265                 $this->mBidtable = $bidtable;
01266                 $this->mUnidtable = $unidtable;
01267         }
01268 
01274         function getRulesDesc() {
01275                 $codesep = $this->mConverter->mDescCodeSep;
01276                 $varsep = $this->mConverter->mDescVarSep;
01277                 $text = '';
01278                 foreach ( $this->mBidtable as $k => $v ) {
01279                         $text .= $this->mConverter->mVariantNames[$k] . "$codesep$v$varsep";
01280                 }
01281                 foreach ( $this->mUnidtable as $k => $a ) {
01282                         foreach ( $a as $from => $to ) {
01283                                 $text .= $from . '⇒' . $this->mConverter->mVariantNames[$k] .
01284                                         "$codesep$to$varsep";
01285                         }
01286                 }
01287                 return $text;
01288         }
01289 
01298         function getRuleConvertedStr( $variant ) {
01299                 $bidtable = $this->mBidtable;
01300                 $unidtable = $this->mUnidtable;
01301 
01302                 if ( count( $bidtable ) + count( $unidtable ) == 0 ) {
01303                         return $this->mRules;
01304                 } else {
01305                         // display current variant in bidirectional array
01306                         $disp = $this->getTextInBidtable( $variant );
01307                         // or display current variant in fallbacks
01308                         if ( !$disp ) {
01309                                 $disp = $this->getTextInBidtable(
01310                                                 $this->mConverter->getVariantFallbacks( $variant ) );
01311                         }
01312                         // or display current variant in unidirectional array
01313                         if ( !$disp && array_key_exists( $variant, $unidtable ) ) {
01314                                 $disp = array_values( $unidtable[$variant] );
01315                                 $disp = $disp[0];
01316                         }
01317                         // or display frist text under disable manual convert
01318                         if ( !$disp
01319                                  && $this->mConverter->mManualLevel[$variant] == 'disable' ) {
01320                                 if ( count( $bidtable ) > 0 ) {
01321                                         $disp = array_values( $bidtable );
01322                                         $disp = $disp[0];
01323                                 } else {
01324                                         $disp = array_values( $unidtable );
01325                                         $disp = array_values( $disp[0] );
01326                                         $disp = $disp[0];
01327                                 }
01328                         }
01329                         return $disp;
01330                 }
01331         }
01332 
01337         function generateConvTable() {
01338                 // Special case optimisation
01339                 if ( !$this->mBidtable && !$this->mUnidtable ) {
01340                         $this->mConvTable = array();
01341                         return;
01342                 }
01343 
01344                 $bidtable = $this->mBidtable;
01345                 $unidtable = $this->mUnidtable;
01346                 $manLevel = $this->mConverter->mManualLevel;
01347 
01348                 $vmarked = array();
01349                 foreach ( $this->mConverter->mVariants as $v ) {
01350                         /* for bidirectional array
01351                                 fill in the missing variants, if any,
01352                                 with fallbacks */
01353                         if ( !isset( $bidtable[$v] ) ) {
01354                                 $variantFallbacks =
01355                                         $this->mConverter->getVariantFallbacks( $v );
01356                                 $vf = $this->getTextInBidtable( $variantFallbacks );
01357                                 if ( $vf ) {
01358                                         $bidtable[$v] = $vf;
01359                                 }
01360                         }
01361 
01362                         if ( isset( $bidtable[$v] ) ) {
01363                                 foreach ( $vmarked as $vo ) {
01364                                         // use syntax: -{A|zh:WordZh;zh-tw:WordTw}-
01365                                         // or -{H|zh:WordZh;zh-tw:WordTw}-
01366                                         // or -{-|zh:WordZh;zh-tw:WordTw}-
01367                                         // to introduce a custom mapping between
01368                                         // words WordZh and WordTw in the whole text
01369                                         if ( $manLevel[$v] == 'bidirectional' ) {
01370                                                 $this->mConvTable[$v][$bidtable[$vo]] = $bidtable[$v];
01371                                         }
01372                                         if ( $manLevel[$vo] == 'bidirectional' ) {
01373                                                 $this->mConvTable[$vo][$bidtable[$v]] = $bidtable[$vo];
01374                                         }
01375                                 }
01376                                 $vmarked[] = $v;
01377                         }
01378                         /* for unidirectional array fill to convert tables */
01379                         if ( ( $manLevel[$v] == 'bidirectional' || $manLevel[$v] == 'unidirectional' )
01380                                 && isset( $unidtable[$v] ) )
01381                         {
01382                                 if ( isset( $this->mConvTable[$v] ) ) {
01383                                         $this->mConvTable[$v] = array_merge( $this->mConvTable[$v], $unidtable[$v] );
01384                                 } else {
01385                                         $this->mConvTable[$v] = $unidtable[$v];
01386                                 }
01387                         }
01388                 }
01389         }
01390 
01395         public function parse( $variant = null ) {
01396                 if ( !$variant ) {
01397                         $variant = $this->mConverter->getPreferredVariant();
01398                 }
01399 
01400                 $this->parseFlags();
01401                 $flags = $this->mFlags;
01402 
01403                 // convert to specified variant
01404                 // syntax: -{zh-hans;zh-hant[;...]|<text to convert>}-
01405                 if ( $this->mVariantFlags ) {
01406                         // check if current variant in flags
01407                         if ( isset( $this->mVariantFlags[$variant] ) ) {
01408                                 // then convert <text to convert> to current language
01409                                 $this->mRules = $this->mConverter->autoConvert( $this->mRules,
01410                                         $variant );
01411                         } else { // if current variant no in flags,
01412                                    // then we check its fallback variants.
01413                                 $variantFallbacks =
01414                                         $this->mConverter->getVariantFallbacks( $variant );
01415                                 if( is_array( $variantFallbacks ) ) {
01416                                         foreach ( $variantFallbacks as $variantFallback ) {
01417                                                 // if current variant's fallback exist in flags
01418                                                 if ( isset( $this->mVariantFlags[$variantFallback] ) ) {
01419                                                         // then convert <text to convert> to fallback language
01420                                                         $this->mRules =
01421                                                                 $this->mConverter->autoConvert( $this->mRules,
01422                                                                         $variantFallback );
01423                                                         break;
01424                                                 }
01425                                         }
01426                                 }
01427                         }
01428                         $this->mFlags = $flags = array( 'R' => true );
01429                 }
01430 
01431                 if ( !isset( $flags['R'] ) && !isset( $flags['N'] ) ) {
01432                         // decode => HTML entities modified by Sanitizer::removeHTMLtags
01433                         $this->mRules = str_replace( '=&gt;', '=>', $this->mRules );
01434                         $this->parseRules();
01435                 }
01436                 $rules = $this->mRules;
01437 
01438                 if ( !$this->mBidtable && !$this->mUnidtable ) {
01439                         if ( isset( $flags['+'] ) || isset( $flags['-'] ) ) {
01440                                 // fill all variants if text in -{A/H/-|text} without rules
01441                                 foreach ( $this->mConverter->mVariants as $v ) {
01442                                         $this->mBidtable[$v] = $rules;
01443                                 }
01444                         } elseif ( !isset( $flags['N'] ) && !isset( $flags['T'] ) ) {
01445                                 $this->mFlags = $flags = array( 'R' => true );
01446                         }
01447                 }
01448 
01449                 $this->mRuleDisplay = false;
01450                 foreach ( $flags as $flag => $unused ) {
01451                         switch ( $flag ) {
01452                                 case 'R':
01453                                         // if we don't do content convert, still strip the -{}- tags
01454                                         $this->mRuleDisplay = $rules;
01455                                         break;
01456                                 case 'N':
01457                                         // process N flag: output current variant name
01458                                         $ruleVar = trim( $rules );
01459                                         if ( isset( $this->mConverter->mVariantNames[$ruleVar] ) ) {
01460                                                 $this->mRuleDisplay = $this->mConverter->mVariantNames[$ruleVar];
01461                                         } else {
01462                                                 $this->mRuleDisplay = '';
01463                                         }
01464                                         break;
01465                                 case 'D':
01466                                         // process D flag: output rules description
01467                                         $this->mRuleDisplay = $this->getRulesDesc();
01468                                         break;
01469                                 case 'H':
01470                                         // process H,- flag or T only: output nothing
01471                                         $this->mRuleDisplay = '';
01472                                         break;
01473                                 case '-':
01474                                         $this->mRulesAction = 'remove';
01475                                         $this->mRuleDisplay = '';
01476                                         break;
01477                                 case '+':
01478                                         $this->mRulesAction = 'add';
01479                                         $this->mRuleDisplay = '';
01480                                         break;
01481                                 case 'S':
01482                                         $this->mRuleDisplay = $this->getRuleConvertedStr( $variant );
01483                                         break;
01484                                 case 'T':
01485                                         $this->mRuleTitle = $this->getRuleConvertedStr( $variant );
01486                                         $this->mRuleDisplay = '';
01487                                         break;
01488                                 default:
01489                                         // ignore unknown flags (but see error case below)
01490                         }
01491                 }
01492                 if ( $this->mRuleDisplay === false ) {
01493                         $this->mRuleDisplay = '<span class="error">'
01494                                 . wfMessage( 'converter-manual-rule-error' )->inContentLanguage()->escaped()
01495                                 . '</span>';
01496                 }
01497 
01498                 $this->generateConvTable();
01499         }
01500 
01504         public function hasRules() {
01505                 // TODO:
01506         }
01507 
01512         public function getDisplay() {
01513                 return $this->mRuleDisplay;
01514         }
01515 
01520         public function getTitle() {
01521                 return $this->mRuleTitle;
01522         }
01523 
01528         public function getRulesAction() {
01529                 return $this->mRulesAction;
01530         }
01531 
01537         public function getConvTable() {
01538                 return $this->mConvTable;
01539         }
01540 
01545         public function getRules() {
01546                 return $this->mRules;
01547         }
01548 
01553         public function getFlags() {
01554                 return $this->mFlags;
01555         }
01556 }