MediaWiki  REL1_21
LanguageConverter.php
Go to the documentation of this file.
00001 <?php
00031 class LanguageConverter {
00032 
00038         static public $languagesWithVariants = array(
00039                 'gan',
00040                 'iu',
00041                 'kk',
00042                 'ku',
00043                 'shi',
00044                 'sr',
00045                 'tg',
00046                 'uz',
00047                 'zh',
00048         );
00049 
00050         public $mMainLanguageCode;
00051         public $mVariants, $mVariantFallbacks, $mVariantNames;
00052         public $mTablesLoaded = false;
00053         public $mTables;
00054         // 'bidirectional' 'unidirectional' 'disable' for each variant
00055         public $mManualLevel;
00056 
00060         public $mCacheKey;
00061 
00062         public $mLangObj;
00063         public $mFlags;
00064         public $mDescCodeSep = ':', $mDescVarSep = ';';
00065         public $mUcfirst = false;
00066         public $mConvRuleTitle = false;
00067         public $mURLVariant;
00068         public $mUserVariant;
00069         public $mHeaderVariant;
00070         public $mMaxDepth = 10;
00071         public $mVarSeparatorPattern;
00072 
00073         const CACHE_VERSION_KEY = 'VERSION 6';
00074 
00085         public function __construct( $langobj, $maincode, $variants = array(),
00086                                                                 $variantfallbacks = array(), $flags = array(),
00087                                                                 $manualLevel = array() ) {
00088                 global $wgDisabledVariants;
00089                 $this->mLangObj = $langobj;
00090                 $this->mMainLanguageCode = $maincode;
00091                 $this->mVariants = array_diff( $variants, $wgDisabledVariants );
00092                 $this->mVariantFallbacks = $variantfallbacks;
00093                 $this->mVariantNames = Language::fetchLanguageNames();
00094                 $this->mCacheKey = wfMemcKey( 'conversiontables', $maincode );
00095                 $defaultflags = array(
00096                         // 'S' show converted text
00097                         // '+' add rules for alltext
00098                         // 'E' the gave flags is error
00099                         // these flags above are reserved for program
00100                         'A' => 'A',       // add rule for convert code (all text convert)
00101                         'T' => 'T',       // title convert
00102                         'R' => 'R',       // raw content
00103                         'D' => 'D',       // convert description (subclass implement)
00104                         '-' => '-',       // remove convert (not implement)
00105                         'H' => 'H',       // add rule for convert code
00106                                                   // (but no display in placed code)
00107                         'N' => 'N'        // current variant name
00108                 );
00109                 $this->mFlags = array_merge( $defaultflags, $flags );
00110                 foreach ( $this->mVariants as $v ) {
00111                         if ( array_key_exists( $v, $manualLevel ) ) {
00112                                 $this->mManualLevel[$v] = $manualLevel[$v];
00113                         } else {
00114                                 $this->mManualLevel[$v] = 'bidirectional';
00115                         }
00116                         $this->mFlags[$v] = $v;
00117                 }
00118         }
00119 
00126         public function getVariants() {
00127                 return $this->mVariants;
00128         }
00129 
00141         public function getVariantFallbacks( $variant ) {
00142                 if ( isset( $this->mVariantFallbacks[$variant] ) ) {
00143                         return $this->mVariantFallbacks[$variant];
00144                 }
00145                 return $this->mMainLanguageCode;
00146         }
00147 
00152         public function getConvRuleTitle() {
00153                 return $this->mConvRuleTitle;
00154         }
00155 
00160         public function getPreferredVariant() {
00161                 global $wgDefaultLanguageVariant, $wgUser;
00162 
00163                 $req = $this->getURLVariant();
00164 
00165                 if ( $wgUser->isLoggedIn() && !$req ) {
00166                         $req = $this->getUserVariant();
00167                 } elseif ( !$req ) {
00168                         $req = $this->getHeaderVariant();
00169                 }
00170 
00171                 if ( $wgDefaultLanguageVariant && !$req ) {
00172                         $req = $this->validateVariant( $wgDefaultLanguageVariant );
00173                 }
00174 
00175                 // This function, unlike the other get*Variant functions, is
00176                 // not memoized (i.e. there return value is not cached) since
00177                 // new information might appear during processing after this
00178                 // is first called.
00179                 if ( $this->validateVariant( $req ) ) {
00180                         return $req;
00181                 }
00182                 return $this->mMainLanguageCode;
00183         }
00184 
00190         public function getDefaultVariant() {
00191                 global $wgDefaultLanguageVariant;
00192 
00193                 $req = $this->getURLVariant();
00194 
00195                 if ( !$req ) {
00196                         $req = $this->getHeaderVariant();
00197                 }
00198 
00199                 if ( $wgDefaultLanguageVariant && !$req ) {
00200                         $req = $this->validateVariant( $wgDefaultLanguageVariant );
00201                 }
00202 
00203                 if ( $req ) {
00204                         return $req;
00205                 }
00206                 return $this->mMainLanguageCode;
00207         }
00208 
00214         public function validateVariant( $variant = null ) {
00215                 if ( $variant !== null && in_array( $variant, $this->mVariants ) ) {
00216                         return $variant;
00217                 }
00218                 return null;
00219         }
00220 
00226         public function getURLVariant() {
00227                 global $wgRequest;
00228 
00229                 if ( $this->mURLVariant ) {
00230                         return $this->mURLVariant;
00231                 }
00232 
00233                 // see if the preference is set in the request
00234                 $ret = $wgRequest->getText( 'variant' );
00235 
00236                 if ( !$ret ) {
00237                         $ret = $wgRequest->getVal( 'uselang' );
00238                 }
00239 
00240                 return $this->mURLVariant = $this->validateVariant( $ret );
00241         }
00242 
00248         protected function getUserVariant() {
00249                 global $wgUser;
00250 
00251                 // memoizing this function wreaks havoc on parserTest.php
00252                 /*
00253                 if ( $this->mUserVariant ) {
00254                         return $this->mUserVariant;
00255                 }
00256                 */
00257 
00258                 // Get language variant preference from logged in users
00259                 // Don't call this on stub objects because that causes infinite
00260                 // recursion during initialisation
00261                 if ( $wgUser->isLoggedIn() )  {
00262                         $ret = $wgUser->getOption( 'variant' );
00263                 } else {
00264                         // figure out user lang without constructing wgLang to avoid
00265                         // infinite recursion
00266                         $ret = $wgUser->getOption( 'language' );
00267                 }
00268 
00269                 return $this->mUserVariant = $this->validateVariant( $ret );
00270         }
00271 
00277         protected function getHeaderVariant() {
00278                 global $wgRequest;
00279 
00280                 if ( $this->mHeaderVariant ) {
00281                         return $this->mHeaderVariant;
00282                 }
00283 
00284                 // see if some supported language variant is set in the
00285                 // HTTP header.
00286                 $languages = array_keys( $wgRequest->getAcceptLang() );
00287                 if ( empty( $languages ) ) {
00288                         return null;
00289                 }
00290 
00291                 $fallbackLanguages = array();
00292                 foreach ( $languages as $language ) {
00293                         $this->mHeaderVariant = $this->validateVariant( $language );
00294                         if ( $this->mHeaderVariant ) {
00295                                 break;
00296                         }
00297 
00298                         // To see if there are fallbacks of current language.
00299                         // We record these fallback variants, and process
00300                         // them later.
00301                         $fallbacks = $this->getVariantFallbacks( $language );
00302                         if ( is_string( $fallbacks ) && $fallbacks !== $this->mMainLanguageCode ) {
00303                                 $fallbackLanguages[] = $fallbacks;
00304                         } elseif ( is_array( $fallbacks ) ) {
00305                                 $fallbackLanguages =
00306                                         array_merge( $fallbackLanguages, $fallbacks );
00307                         }
00308                 }
00309 
00310                 if ( !$this->mHeaderVariant ) {
00311                         // process fallback languages now
00312                         $fallback_languages = array_unique( $fallbackLanguages );
00313                         foreach ( $fallback_languages as $language ) {
00314                                 $this->mHeaderVariant = $this->validateVariant( $language );
00315                                 if ( $this->mHeaderVariant ) {
00316                                         break;
00317                                 }
00318                         }
00319                 }
00320 
00321                 return $this->mHeaderVariant;
00322         }
00323 
00334         public function autoConvert( $text, $toVariant = false ) {
00335                 wfProfileIn( __METHOD__ );
00336 
00337                 $this->loadTables();
00338 
00339                 if ( !$toVariant ) {
00340                         $toVariant = $this->getPreferredVariant();
00341                         if ( !$toVariant ) {
00342                                 wfProfileOut( __METHOD__ );
00343                                 return $text;
00344                         }
00345                 }
00346 
00347                 if( $this->guessVariant( $text, $toVariant ) ) {
00348                         wfProfileOut( __METHOD__ );
00349                         return $text;
00350                 }
00351 
00352                 /* we convert everything except:
00353                    1. HTML markups (anything between < and >)
00354                    2. HTML entities
00355                    3. placeholders created by the parser
00356                 */
00357                 global $wgParser;
00358                 if ( isset( $wgParser ) && $wgParser->UniqPrefix() != '' ) {
00359                         $marker = '|' . $wgParser->UniqPrefix() . '[\-a-zA-Z0-9]+';
00360                 } else {
00361                         $marker = '';
00362                 }
00363 
00364                 // this one is needed when the text is inside an HTML markup
00365                 $htmlfix = '|<[^>]+$|^[^<>]*>';
00366 
00367                 // disable convert to variants between <code></code> tags
00368                 $codefix = '<code>.+?<\/code>|';
00369                 // disable convertsion of <script type="text/javascript"> ... </script>
00370                 $scriptfix = '<script.*?>.*?<\/script>|';
00371                 // disable conversion of <pre xxxx> ... </pre>
00372                 $prefix = '<pre.*?>.*?<\/pre>|';
00373 
00374                 $reg = '/' . $codefix . $scriptfix . $prefix .
00375                         '<[^>]+>|&[a-zA-Z#][a-z0-9]+;' . $marker . $htmlfix . '/s';
00376                 $startPos = 0;
00377                 $sourceBlob = '';
00378                 $literalBlob = '';
00379 
00380                 // Guard against delimiter nulls in the input
00381                 $text = str_replace( "\000", '', $text );
00382 
00383                 $markupMatches = null;
00384                 $elementMatches = null;
00385                 while ( $startPos < strlen( $text ) ) {
00386                         if ( preg_match( $reg, $text, $markupMatches, PREG_OFFSET_CAPTURE, $startPos ) ) {
00387                                 $elementPos = $markupMatches[0][1];
00388                                 $element = $markupMatches[0][0];
00389                         } else {
00390                                 $elementPos = strlen( $text );
00391                                 $element = '';
00392                         }
00393 
00394                         // Queue the part before the markup for translation in a batch
00395                         $sourceBlob .= substr( $text, $startPos, $elementPos - $startPos ) . "\000";
00396 
00397                         // Advance to the next position
00398                         $startPos = $elementPos + strlen( $element );
00399 
00400                         // Translate any alt or title attributes inside the matched element
00401                         if ( $element !== '' && preg_match( '/^(<[^>\s]*)\s([^>]*)(.*)$/', $element,
00402                                 $elementMatches ) )
00403                         {
00404                                 $attrs = Sanitizer::decodeTagAttributes( $elementMatches[2] );
00405                                 $changed = false;
00406                                 foreach ( array( 'title', 'alt' ) as $attrName ) {
00407                                         if ( !isset( $attrs[$attrName] ) ) {
00408                                                 continue;
00409                                         }
00410                                         $attr = $attrs[$attrName];
00411                                         // Don't convert URLs
00412                                         if ( !strpos( $attr, '://' ) ) {
00413                                                 $attr = $this->recursiveConvertTopLevel( $attr, $toVariant );
00414                                         }
00415 
00416                                         // Remove HTML tags to avoid disrupting the layout
00417                                         $attr = preg_replace( '/<[^>]+>/', '', $attr );
00418                                         if ( $attr !== $attrs[$attrName] ) {
00419                                                 $attrs[$attrName] = $attr;
00420                                                 $changed = true;
00421                                         }
00422                                 }
00423                                 if ( $changed ) {
00424                                         $element = $elementMatches[1] . Html::expandAttributes( $attrs ) .
00425                                                 $elementMatches[3];
00426                                 }
00427                         }
00428                         $literalBlob .= $element . "\000";
00429                 }
00430 
00431                 // Do the main translation batch
00432                 $translatedBlob = $this->translate( $sourceBlob, $toVariant );
00433 
00434                 // Put the output back together
00435                 $translatedIter = StringUtils::explode( "\000", $translatedBlob );
00436                 $literalIter = StringUtils::explode( "\000", $literalBlob );
00437                 $output = '';
00438                 while ( $translatedIter->valid() && $literalIter->valid() ) {
00439                         $output .= $translatedIter->current();
00440                         $output .= $literalIter->current();
00441                         $translatedIter->next();
00442                         $literalIter->next();
00443                 }
00444 
00445                 wfProfileOut( __METHOD__ );
00446                 return $output;
00447         }
00448 
00458         public function translate( $text, $variant ) {
00459                 wfProfileIn( __METHOD__ );
00460                 // If $text is empty or only includes spaces, do nothing
00461                 // Otherwise translate it
00462                 if ( trim( $text ) ) {
00463                         $this->loadTables();
00464                         $text = $this->mTables[$variant]->replace( $text );
00465                 }
00466                 wfProfileOut( __METHOD__ );
00467                 return $text;
00468         }
00469 
00476         public function autoConvertToAllVariants( $text ) {
00477                 wfProfileIn( __METHOD__ );
00478                 $this->loadTables();
00479 
00480                 $ret = array();
00481                 foreach ( $this->mVariants as $variant ) {
00482                         $ret[$variant] = $this->translate( $text, $variant );
00483                 }
00484 
00485                 wfProfileOut( __METHOD__ );
00486                 return $ret;
00487         }
00488 
00500         public function convertLinkToAllVariants( $text ) {
00501                 return $this->autoConvertToAllVariants( $text );
00502         }
00503 
00509         protected function applyManualConv( $convRule ) {
00510                 // Use syntax -{T|zh-cn:TitleCN; zh-tw:TitleTw}- to custom
00511                 // title conversion.
00512                 // Bug 24072: $mConvRuleTitle was overwritten by other manual
00513                 // rule(s) not for title, this breaks the title conversion.
00514                 $newConvRuleTitle = $convRule->getTitle();
00515                 if ( $newConvRuleTitle ) {
00516                         // So I add an empty check for getTitle()
00517                         $this->mConvRuleTitle = $newConvRuleTitle;
00518                 }
00519 
00520                 // merge/remove manual conversion rules to/from global table
00521                 $convTable = $convRule->getConvTable();
00522                 $action = $convRule->getRulesAction();
00523                 foreach ( $convTable as $variant => $pair ) {
00524                         if ( !$this->validateVariant( $variant ) ) {
00525                                 continue;
00526                         }
00527 
00528                         if ( $action == 'add' ) {
00529                                 foreach ( $pair as $from => $to ) {
00530                                         // to ensure that $from and $to not be left blank
00531                                         // so $this->translate() could always return a string
00532                                         if ( $from || $to ) {
00533                                                 // more efficient than array_merge(), about 2.5 times.
00534                                                 $this->mTables[$variant]->setPair( $from, $to );
00535                                         }
00536                                 }
00537                         } elseif ( $action == 'remove' ) {
00538                                 $this->mTables[$variant]->removeArray( $pair );
00539                         }
00540                 }
00541         }
00542 
00550         public function convertTitle( $title ) {
00551                 $variant = $this->getPreferredVariant();
00552                 $index = $title->getNamespace();
00553                 if ( $index !== NS_MAIN ) {
00554                         $text = $this->convertNamespace( $index ) . ':';
00555                 } else {
00556                         $text = '';
00557                 }
00558                 $text .= $this->translate( $title->getText(), $variant );
00559                 return $text;
00560         }
00561 
00568         public function convertNamespace( $index ) {
00569                 $variant = $this->getPreferredVariant();
00570                 if ( $index === NS_MAIN ) {
00571                         return '';
00572                 } else {
00573                         // First check if a message gives a converted name in the target variant.
00574                         $nsConvMsg = wfMessage( 'conversion-ns' . $index )->inLanguage( $variant );
00575                         if ( $nsConvMsg->exists() ) {
00576                                 return $nsConvMsg->plain();
00577                         }
00578                         // Then check if a message gives a converted name in content language
00579                         // which needs extra translation to the target variant.
00580                         $nsConvMsg = wfMessage( 'conversion-ns' . $index )->inContentLanguage();
00581                         if ( $nsConvMsg->exists() ) {
00582                                 return $this->translate( $nsConvMsg->plain(), $variant );
00583                         }
00584                         // No message exists, retrieve it from the target variant's namespace names.
00585                         $langObj = $this->mLangObj->factory( $variant );
00586                         return $langObj->getFormattedNsText( $index );
00587                 }
00588         }
00589 
00604         public function convert( $text ) {
00605                 $variant = $this->getPreferredVariant();
00606                 return $this->convertTo( $text, $variant );
00607         }
00608 
00616         public function convertTo( $text, $variant ) {
00617                 global $wgDisableLangConversion;
00618                 if ( $wgDisableLangConversion ) {
00619                         return $text;
00620                 }
00621                 // Reset converter state for a new converter run.
00622                 $this->mConvRuleTitle = false;
00623                 return $this->recursiveConvertTopLevel( $text, $variant );
00624         }
00625 
00635         protected function recursiveConvertTopLevel( $text, $variant, $depth = 0 ) {
00636                 $startPos = 0;
00637                 $out = '';
00638                 $length = strlen( $text );
00639                 $shouldConvert = !$this->guessVariant( $text, $variant );
00640 
00641                 while ( $startPos < $length ) {
00642                         $pos = strpos( $text, '-{', $startPos );
00643 
00644                         if ( $pos === false ) {
00645                                 // No more markup, append final segment
00646                                 $fragment = substr( $text, $startPos );
00647                                 $out .= $shouldConvert? $this->autoConvert( $fragment, $variant ): $fragment;
00648                                 return $out;
00649                         }
00650 
00651                         // Markup found
00652                         // Append initial segment
00653                         $fragment = substr( $text, $startPos, $pos - $startPos );
00654                         $out .= $shouldConvert? $this->autoConvert( $fragment, $variant ): $fragment;
00655 
00656                         // Advance position
00657                         $startPos = $pos;
00658 
00659                         // Do recursive conversion
00660                         $out .= $this->recursiveConvertRule( $text, $variant, $startPos, $depth + 1 );
00661                 }
00662 
00663                 return $out;
00664         }
00665 
00677         protected function recursiveConvertRule( $text, $variant, &$startPos, $depth = 0 ) {
00678                 // Quick sanity check (no function calls)
00679                 if ( $text[$startPos] !== '-' || $text[$startPos + 1] !== '{' ) {
00680                         throw new MWException( __METHOD__ . ': invalid input string' );
00681                 }
00682 
00683                 $startPos += 2;
00684                 $inner = '';
00685                 $warningDone = false;
00686                 $length = strlen( $text );
00687 
00688                 while ( $startPos < $length ) {
00689                         $m = false;
00690                         preg_match( '/-\{|\}-/', $text, $m,  PREG_OFFSET_CAPTURE, $startPos );
00691                         if ( !$m ) {
00692                                 // Unclosed rule
00693                                 break;
00694                         }
00695 
00696                         $token = $m[0][0];
00697                         $pos = $m[0][1];
00698 
00699                         // Markup found
00700                         // Append initial segment
00701                         $inner .= substr( $text, $startPos, $pos - $startPos );
00702 
00703                         // Advance position
00704                         $startPos = $pos;
00705 
00706                         switch ( $token ) {
00707                                 case '-{':
00708                                         // Check max depth
00709                                         if ( $depth >= $this->mMaxDepth ) {
00710                                                 $inner .= '-{';
00711                                                 if ( !$warningDone ) {
00712                                                         $inner .= '<span class="error">' .
00713                                                                 wfMessage( 'language-converter-depth-warning' )
00714                                                                         ->numParams( $this->mMaxDepth )->inContentLanguage()->text() .
00715                                                                 '</span>';
00716                                                         $warningDone = true;
00717                                                 }
00718                                                 $startPos += 2;
00719                                                 continue;
00720                                         }
00721                                         // Recursively parse another rule
00722                                         $inner .= $this->recursiveConvertRule( $text, $variant, $startPos, $depth + 1 );
00723                                         break;
00724                                 case '}-':
00725                                         // Apply the rule
00726                                         $startPos += 2;
00727                                         $rule = new ConverterRule( $inner, $this );
00728                                         $rule->parse( $variant );
00729                                         $this->applyManualConv( $rule );
00730                                         return $rule->getDisplay();
00731                                 default:
00732                                         throw new MWException( __METHOD__ . ': invalid regex match' );
00733                         }
00734                 }
00735 
00736                 // Unclosed rule
00737                 if ( $startPos < $length ) {
00738                         $inner .= substr( $text, $startPos );
00739                 }
00740                 $startPos = $length;
00741                 return '-{' . $this->autoConvert( $inner, $variant );
00742         }
00743 
00755         public function findVariantLink( &$link, &$nt, $ignoreOtherCond = false ) {
00756                 # If the article has already existed, there is no need to
00757                 # check it again, otherwise it may cause a fault.
00758                 if ( is_object( $nt ) && $nt->exists() ) {
00759                         return;
00760                 }
00761 
00762                 global $wgDisableLangConversion, $wgDisableTitleConversion, $wgRequest,
00763                         $wgUser;
00764                 $isredir = $wgRequest->getText( 'redirect', 'yes' );
00765                 $action = $wgRequest->getText( 'action' );
00766                 $linkconvert = $wgRequest->getText( 'linkconvert', 'yes' );
00767                 $disableLinkConversion = $wgDisableLangConversion
00768                         || $wgDisableTitleConversion;
00769                 $linkBatch = new LinkBatch();
00770 
00771                 $ns = NS_MAIN;
00772 
00773                 if ( $disableLinkConversion ||
00774                          ( !$ignoreOtherCond &&
00775                            ( $isredir == 'no'
00776                                  || $action == 'edit'
00777                                  || $action == 'submit'
00778                                  || $linkconvert == 'no'
00779                                  || $wgUser->getOption( 'noconvertlink' ) == 1 ) ) ) {
00780                         return;
00781                 }
00782 
00783                 if ( is_object( $nt ) ) {
00784                         $ns = $nt->getNamespace();
00785                 }
00786 
00787                 $variants = $this->autoConvertToAllVariants( $link );
00788                 if ( !$variants ) { // give up
00789                         return;
00790                 }
00791 
00792                 $titles = array();
00793 
00794                 foreach ( $variants as $v ) {
00795                         if ( $v != $link ) {
00796                                 $varnt = Title::newFromText( $v, $ns );
00797                                 if ( !is_null( $varnt ) ) {
00798                                         $linkBatch->addObj( $varnt );
00799                                         $titles[] = $varnt;
00800                                 }
00801                         }
00802                 }
00803 
00804                 // fetch all variants in single query
00805                 $linkBatch->execute();
00806 
00807                 foreach ( $titles as $varnt ) {
00808                         if ( $varnt->getArticleID() > 0 ) {
00809                                 $nt = $varnt;
00810                                 $link = $varnt->getText();
00811                                 break;
00812                         }
00813                 }
00814         }
00815 
00821         public function getExtraHashOptions() {
00822                 $variant = $this->getPreferredVariant();
00823                 return '!' . $variant;
00824         }
00825 
00836         public function guessVariant($text, $variant) {
00837                 return false;
00838         }
00839 
00847         function loadDefaultTables() {
00848                 $name = get_class( $this );
00849                 throw new MWException( "Must implement loadDefaultTables() method in class $name" );
00850         }
00851 
00857         function loadTables( $fromCache = true ) {
00858                 global $wgLangConvMemc;
00859 
00860                 if ( $this->mTablesLoaded ) {
00861                         return;
00862                 }
00863 
00864                 wfProfileIn( __METHOD__ );
00865                 $this->mTablesLoaded = true;
00866                 $this->mTables = false;
00867                 if ( $fromCache ) {
00868                         wfProfileIn( __METHOD__ . '-cache' );
00869                         $this->mTables = $wgLangConvMemc->get( $this->mCacheKey );
00870                         wfProfileOut( __METHOD__ . '-cache' );
00871                 }
00872                 if ( !$this->mTables
00873                          || !array_key_exists( self::CACHE_VERSION_KEY, $this->mTables ) ) {
00874                         wfProfileIn( __METHOD__ . '-recache' );
00875                         // not in cache, or we need a fresh reload.
00876                         // We will first load the default tables
00877                         // then update them using things in MediaWiki:Conversiontable/*
00878                         $this->loadDefaultTables();
00879                         foreach ( $this->mVariants as $var ) {
00880                                 $cached = $this->parseCachedTable( $var );
00881                                 $this->mTables[$var]->mergeArray( $cached );
00882                         }
00883 
00884                         $this->postLoadTables();
00885                         $this->mTables[self::CACHE_VERSION_KEY] = true;
00886 
00887                         $wgLangConvMemc->set( $this->mCacheKey, $this->mTables, 43200 );
00888                         wfProfileOut( __METHOD__ . '-recache' );
00889                 }
00890                 wfProfileOut( __METHOD__ );
00891         }
00892 
00896         function postLoadTables() { }
00897 
00903         function reloadTables() {
00904                 if ( $this->mTables ) {
00905                         unset( $this->mTables );
00906                 }
00907                 $this->mTablesLoaded = false;
00908                 $this->loadTables( false );
00909         }
00910 
00930         function parseCachedTable( $code, $subpage = '', $recursive = true ) {
00931                 static $parsed = array();
00932 
00933                 $key = 'Conversiontable/' . $code;
00934                 if ( $subpage ) {
00935                         $key .= '/' . $subpage;
00936                 }
00937                 if ( array_key_exists( $key, $parsed ) ) {
00938                         return array();
00939                 }
00940 
00941                 $parsed[$key] = true;
00942 
00943                 if ( $subpage === '' ) {
00944                         $txt = MessageCache::singleton()->get( 'conversiontable', true, $code );
00945                 } else {
00946                         $txt = false;
00947                         $title = Title::makeTitleSafe( NS_MEDIAWIKI, $key );
00948                         if ( $title && $title->exists() ) {
00949                                 $revision = Revision::newFromTitle( $title );
00950                                 if ( $revision ) {
00951                                         if ( $revision->getContentModel() == CONTENT_MODEL_WIKITEXT ) {
00952                                                 $txt = $revision->getContent( Revision::RAW )->getNativeData();
00953                                         }
00954 
00955                                         //@todo: in the future, use a specialized content model, perhaps based on json!
00956                                 }
00957                         }
00958                 }
00959 
00960                 # Nothing to parse if there's no text
00961                 if ( $txt === false || $txt === null || $txt === '' ) {
00962                         return array();
00963                 }
00964 
00965                 // get all subpage links of the form
00966                 // [[MediaWiki:Conversiontable/zh-xx/...|...]]
00967                 $linkhead = $this->mLangObj->getNsText( NS_MEDIAWIKI ) .
00968                         ':Conversiontable';
00969                 $subs = StringUtils::explode( '[[', $txt );
00970                 $sublinks = array();
00971                 foreach ( $subs as $sub ) {
00972                         $link = explode( ']]', $sub, 2 );
00973                         if ( count( $link ) != 2 ) {
00974                                 continue;
00975                         }
00976                         $b = explode( '|', $link[0], 2 );
00977                         $b = explode( '/', trim( $b[0] ), 3 );
00978                         if ( count( $b ) == 3 ) {
00979                                 $sublink = $b[2];
00980                         } else {
00981                                 $sublink = '';
00982                         }
00983 
00984                         if ( $b[0] == $linkhead && $b[1] == $code ) {
00985                                 $sublinks[] = $sublink;
00986                         }
00987                 }
00988 
00989                 // parse the mappings in this page
00990                 $blocks = StringUtils::explode( '-{', $txt );
00991                 $ret = array();
00992                 $first = true;
00993                 foreach ( $blocks as $block ) {
00994                         if ( $first ) {
00995                                 // Skip the part before the first -{
00996                                 $first = false;
00997                                 continue;
00998                         }
00999                         $mappings = explode( '}-', $block, 2 );
01000                         $stripped = str_replace( array( "'", '"', '*', '#' ), '',
01001                                                                          $mappings[0] );
01002                         $table = StringUtils::explode( ';', $stripped );
01003                         foreach ( $table as $t ) {
01004                                 $m = explode( '=>', $t, 3 );
01005                                 if ( count( $m ) != 2 ) {
01006                                         continue;
01007                                 }
01008                                 // trim any trailling comments starting with '//'
01009                                 $tt = explode( '//', $m[1], 2 );
01010                                 $ret[trim( $m[0] )] = trim( $tt[0] );
01011                         }
01012                 }
01013 
01014                 // recursively parse the subpages
01015                 if ( $recursive ) {
01016                         foreach ( $sublinks as $link ) {
01017                                 $s = $this->parseCachedTable( $code, $link, $recursive );
01018                                 $ret = array_merge( $ret, $s );
01019                         }
01020                 }
01021 
01022                 if ( $this->mUcfirst ) {
01023                         foreach ( $ret as $k => $v ) {
01024                                 $ret[$this->mLangObj->ucfirst( $k )] = $this->mLangObj->ucfirst( $v );
01025                         }
01026                 }
01027                 return $ret;
01028         }
01029 
01038         public function markNoConversion( $text, $noParse = false ) {
01039                 # don't mark if already marked
01040                 if ( strpos( $text, '-{' ) || strpos( $text, '}-' ) ) {
01041                         return $text;
01042                 }
01043 
01044                 $ret = "-{R|$text}-";
01045                 return $ret;
01046         }
01047 
01056         function convertCategoryKey( $key ) {
01057                 return $key;
01058         }
01059 
01076         function OnPageContentSaveComplete( $page, $user, $content, $summary, $isMinor,
01077                         $isWatch, $section, $flags, $revision ) {
01078                 $titleobj = $page->getTitle();
01079                 if ( $titleobj->getNamespace() == NS_MEDIAWIKI ) {
01080                         $title = $titleobj->getDBkey();
01081                         $t = explode( '/', $title, 3 );
01082                         $c = count( $t );
01083                         if ( $c > 1 && $t[0] == 'Conversiontable' ) {
01084                                 if ( $this->validateVariant( $t[1] ) ) {
01085                                         $this->reloadTables();
01086                                 }
01087                         }
01088                 }
01089                 return true;
01090         }
01091 
01100         public function armourMath( $text ) {
01101                 // convert '-{' and '}-' to '-&#123;' and '&#125;-' to prevent
01102                 // any unwanted markup appearing in the math image tag.
01103                 $text = strtr( $text, array( '-{' => '-&#123;', '}-' => '&#125;-' ) );
01104                 return $text;
01105         }
01106 
01110         function getVarSeparatorPattern() {
01111                 if ( is_null( $this->mVarSeparatorPattern ) ) {
01112                         // varsep_pattern for preg_split:
01113                         // text should be splited by ";" only if a valid variant
01114                         // name exist after the markup, for example:
01115                         //  -{zh-hans:<span style="font-size:120%;">xxx</span>;zh-hant:\
01116                         //      <span style="font-size:120%;">yyy</span>;}-
01117                         // we should split it as:
01118                         //  array(
01119                         //        [0] => 'zh-hans:<span style="font-size:120%;">xxx</span>'
01120                         //        [1] => 'zh-hant:<span style="font-size:120%;">yyy</span>'
01121                         //        [2] => ''
01122                         //       )
01123                         $pat = '/;\s*(?=';
01124                         foreach ( $this->mVariants as $variant ) {
01125                                 // zh-hans:xxx;zh-hant:yyy
01126                                 $pat .= $variant . '\s*:|';
01127                                 // xxx=>zh-hans:yyy; xxx=>zh-hant:zzz
01128                                 $pat .= '[^;]*?=>\s*' . $variant . '\s*:|';
01129                         }
01130                         $pat .= '\s*$)/';
01131                         $this->mVarSeparatorPattern = $pat;
01132                 }
01133                 return $this->mVarSeparatorPattern;
01134         }
01135 }
01136 
01142 class ConverterRule {
01143         public $mText; // original text in -{text}-
01144         public $mConverter; // LanguageConverter object
01145         public $mRuleDisplay = '';
01146         public $mRuleTitle = false;
01147         public $mRules = '';// string : the text of the rules
01148         public $mRulesAction = 'none';
01149         public $mFlags = array();
01150         public $mVariantFlags = array();
01151         public $mConvTable = array();
01152         public $mBidtable = array();// array of the translation in each variant
01153         public $mUnidtable = array();// array of the translation in each variant
01154 
01161         public function __construct( $text, $converter ) {
01162                 $this->mText = $text;
01163                 $this->mConverter = $converter;
01164         }
01165 
01172         public function getTextInBidtable( $variants ) {
01173                 $variants = (array)$variants;
01174                 if ( !$variants ) {
01175                         return false;
01176                 }
01177                 foreach ( $variants as $variant ) {
01178                         if ( isset( $this->mBidtable[$variant] ) ) {
01179                                 return $this->mBidtable[$variant];
01180                         }
01181                 }
01182                 return false;
01183         }
01184 
01189         function parseFlags() {
01190                 $text = $this->mText;
01191                 $flags = array();
01192                 $variantFlags = array();
01193 
01194                 $sepPos = strpos( $text, '|' );
01195                 if ( $sepPos !== false ) {
01196                         $validFlags = $this->mConverter->mFlags;
01197                         $f = StringUtils::explode( ';', substr( $text, 0, $sepPos ) );
01198                         foreach ( $f as $ff ) {
01199                                 $ff = trim( $ff );
01200                                 if ( isset( $validFlags[$ff] ) ) {
01201                                         $flags[$validFlags[$ff]] = true;
01202                                 }
01203                         }
01204                         $text = strval( substr( $text, $sepPos + 1 ) );
01205                 }
01206 
01207                 if ( !$flags ) {
01208                         $flags['S'] = true;
01209                 } elseif ( isset( $flags['R'] ) ) {
01210                         $flags = array( 'R' => true );// remove other flags
01211                 } elseif ( isset( $flags['N'] ) ) {
01212                         $flags = array( 'N' => true );// remove other flags
01213                 } elseif ( isset( $flags['-'] ) ) {
01214                         $flags = array( '-' => true );// remove other flags
01215                 } elseif ( count( $flags ) == 1 && isset( $flags['T'] ) ) {
01216                         $flags['H'] = true;
01217                 } elseif ( isset( $flags['H'] ) ) {
01218                         // replace A flag, and remove other flags except T
01219                         $temp = array( '+' => true, 'H' => true );
01220                         if ( isset( $flags['T'] ) ) {
01221                                 $temp['T'] = true;
01222                         }
01223                         if ( isset( $flags['D'] ) ) {
01224                                 $temp['D'] = true;
01225                         }
01226                         $flags = $temp;
01227                 } else {
01228                         if ( isset( $flags['A'] ) ) {
01229                                 $flags['+'] = true;
01230                                 $flags['S'] = true;
01231                         }
01232                         if ( isset( $flags['D'] ) ) {
01233                                 unset( $flags['S'] );
01234                         }
01235                         // try to find flags like "zh-hans", "zh-hant"
01236                         // allow syntaxes like "-{zh-hans;zh-hant|XXXX}-"
01237                         $variantFlags = array_intersect( array_keys( $flags ), $this->mConverter->mVariants );
01238                         if ( $variantFlags ) {
01239                                 $variantFlags = array_flip( $variantFlags );
01240                                 $flags = array();
01241                         }
01242                 }
01243                 $this->mVariantFlags = $variantFlags;
01244                 $this->mRules = $text;
01245                 $this->mFlags = $flags;
01246         }
01247 
01252         function parseRules() {
01253                 $rules = $this->mRules;
01254                 $bidtable = array();
01255                 $unidtable = array();
01256                 $variants = $this->mConverter->mVariants;
01257                 $varsep_pattern = $this->mConverter->getVarSeparatorPattern();
01258 
01259                 $choice = preg_split( $varsep_pattern, $rules );
01260 
01261                 foreach ( $choice as $c ) {
01262                         $v  = explode( ':', $c, 2 );
01263                         if ( count( $v ) != 2 ) {
01264                                 // syntax error, skip
01265                                 continue;
01266                         }
01267                         $to = trim( $v[1] );
01268                         $v  = trim( $v[0] );
01269                         $u  = explode( '=>', $v, 2 );
01270                         // if $to is empty, strtr() could return a wrong result
01271                         if ( count( $u ) == 1 && $to && in_array( $v, $variants ) ) {
01272                                 $bidtable[$v] = $to;
01273                         } elseif ( count( $u ) == 2 ) {
01274                                 $from = trim( $u[0] );
01275                                 $v = trim( $u[1] );
01276                                 if ( array_key_exists( $v, $unidtable )
01277                                          && !is_array( $unidtable[$v] )
01278                                          && $to
01279                                          && in_array( $v, $variants ) ) {
01280                                         $unidtable[$v] = array( $from => $to );
01281                                 } elseif ( $to && in_array( $v, $variants ) ) {
01282                                         $unidtable[$v][$from] = $to;
01283                                 }
01284                         }
01285                         // syntax error, pass
01286                         if ( !isset( $this->mConverter->mVariantNames[$v] ) ) {
01287                                 $bidtable = array();
01288                                 $unidtable = array();
01289                                 break;
01290                         }
01291                 }
01292                 $this->mBidtable = $bidtable;
01293                 $this->mUnidtable = $unidtable;
01294         }
01295 
01301         function getRulesDesc() {
01302                 $codesep = $this->mConverter->mDescCodeSep;
01303                 $varsep = $this->mConverter->mDescVarSep;
01304                 $text = '';
01305                 foreach ( $this->mBidtable as $k => $v ) {
01306                         $text .= $this->mConverter->mVariantNames[$k] . "$codesep$v$varsep";
01307                 }
01308                 foreach ( $this->mUnidtable as $k => $a ) {
01309                         foreach ( $a as $from => $to ) {
01310                                 $text .= $from . '⇒' . $this->mConverter->mVariantNames[$k] .
01311                                         "$codesep$to$varsep";
01312                         }
01313                 }
01314                 return $text;
01315         }
01316 
01325         function getRuleConvertedStr( $variant ) {
01326                 $bidtable = $this->mBidtable;
01327                 $unidtable = $this->mUnidtable;
01328 
01329                 if ( count( $bidtable ) + count( $unidtable ) == 0 ) {
01330                         return $this->mRules;
01331                 } else {
01332                         // display current variant in bidirectional array
01333                         $disp = $this->getTextInBidtable( $variant );
01334                         // or display current variant in fallbacks
01335                         if ( !$disp ) {
01336                                 $disp = $this->getTextInBidtable(
01337                                                 $this->mConverter->getVariantFallbacks( $variant ) );
01338                         }
01339                         // or display current variant in unidirectional array
01340                         if ( !$disp && array_key_exists( $variant, $unidtable ) ) {
01341                                 $disp = array_values( $unidtable[$variant] );
01342                                 $disp = $disp[0];
01343                         }
01344                         // or display frist text under disable manual convert
01345                         if ( !$disp
01346                                  && $this->mConverter->mManualLevel[$variant] == 'disable' ) {
01347                                 if ( count( $bidtable ) > 0 ) {
01348                                         $disp = array_values( $bidtable );
01349                                         $disp = $disp[0];
01350                                 } else {
01351                                         $disp = array_values( $unidtable );
01352                                         $disp = array_values( $disp[0] );
01353                                         $disp = $disp[0];
01354                                 }
01355                         }
01356                         return $disp;
01357                 }
01358         }
01359 
01364         function generateConvTable() {
01365                 // Special case optimisation
01366                 if ( !$this->mBidtable && !$this->mUnidtable ) {
01367                         $this->mConvTable = array();
01368                         return;
01369                 }
01370 
01371                 $bidtable = $this->mBidtable;
01372                 $unidtable = $this->mUnidtable;
01373                 $manLevel = $this->mConverter->mManualLevel;
01374 
01375                 $vmarked = array();
01376                 foreach ( $this->mConverter->mVariants as $v ) {
01377                         /* for bidirectional array
01378                                 fill in the missing variants, if any,
01379                                 with fallbacks */
01380                         if ( !isset( $bidtable[$v] ) ) {
01381                                 $variantFallbacks =
01382                                         $this->mConverter->getVariantFallbacks( $v );
01383                                 $vf = $this->getTextInBidtable( $variantFallbacks );
01384                                 if ( $vf ) {
01385                                         $bidtable[$v] = $vf;
01386                                 }
01387                         }
01388 
01389                         if ( isset( $bidtable[$v] ) ) {
01390                                 foreach ( $vmarked as $vo ) {
01391                                         // use syntax: -{A|zh:WordZh;zh-tw:WordTw}-
01392                                         // or -{H|zh:WordZh;zh-tw:WordTw}-
01393                                         // or -{-|zh:WordZh;zh-tw:WordTw}-
01394                                         // to introduce a custom mapping between
01395                                         // words WordZh and WordTw in the whole text
01396                                         if ( $manLevel[$v] == 'bidirectional' ) {
01397                                                 $this->mConvTable[$v][$bidtable[$vo]] = $bidtable[$v];
01398                                         }
01399                                         if ( $manLevel[$vo] == 'bidirectional' ) {
01400                                                 $this->mConvTable[$vo][$bidtable[$v]] = $bidtable[$vo];
01401                                         }
01402                                 }
01403                                 $vmarked[] = $v;
01404                         }
01405                         /* for unidirectional array fill to convert tables */
01406                         if ( ( $manLevel[$v] == 'bidirectional' || $manLevel[$v] == 'unidirectional' )
01407                                 && isset( $unidtable[$v] ) )
01408                         {
01409                                 if ( isset( $this->mConvTable[$v] ) ) {
01410                                         $this->mConvTable[$v] = array_merge( $this->mConvTable[$v], $unidtable[$v] );
01411                                 } else {
01412                                         $this->mConvTable[$v] = $unidtable[$v];
01413                                 }
01414                         }
01415                 }
01416         }
01417 
01422         public function parse( $variant = null ) {
01423                 if ( !$variant ) {
01424                         $variant = $this->mConverter->getPreferredVariant();
01425                 }
01426 
01427                 $this->parseFlags();
01428                 $flags = $this->mFlags;
01429 
01430                 // convert to specified variant
01431                 // syntax: -{zh-hans;zh-hant[;...]|<text to convert>}-
01432                 if ( $this->mVariantFlags ) {
01433                         // check if current variant in flags
01434                         if ( isset( $this->mVariantFlags[$variant] ) ) {
01435                                 // then convert <text to convert> to current language
01436                                 $this->mRules = $this->mConverter->autoConvert( $this->mRules,
01437                                         $variant );
01438                         } else { // if current variant no in flags,
01439                                    // then we check its fallback variants.
01440                                 $variantFallbacks =
01441                                         $this->mConverter->getVariantFallbacks( $variant );
01442                                 if( is_array( $variantFallbacks ) ) {
01443                                         foreach ( $variantFallbacks as $variantFallback ) {
01444                                                 // if current variant's fallback exist in flags
01445                                                 if ( isset( $this->mVariantFlags[$variantFallback] ) ) {
01446                                                         // then convert <text to convert> to fallback language
01447                                                         $this->mRules =
01448                                                                 $this->mConverter->autoConvert( $this->mRules,
01449                                                                         $variantFallback );
01450                                                         break;
01451                                                 }
01452                                         }
01453                                 }
01454                         }
01455                         $this->mFlags = $flags = array( 'R' => true );
01456                 }
01457 
01458                 if ( !isset( $flags['R'] ) && !isset( $flags['N'] ) ) {
01459                         // decode => HTML entities modified by Sanitizer::removeHTMLtags
01460                         $this->mRules = str_replace( '=&gt;', '=>', $this->mRules );
01461                         $this->parseRules();
01462                 }
01463                 $rules = $this->mRules;
01464 
01465                 if ( !$this->mBidtable && !$this->mUnidtable ) {
01466                         if ( isset( $flags['+'] ) || isset( $flags['-'] ) ) {
01467                                 // fill all variants if text in -{A/H/-|text} without rules
01468                                 foreach ( $this->mConverter->mVariants as $v ) {
01469                                         $this->mBidtable[$v] = $rules;
01470                                 }
01471                         } elseif ( !isset( $flags['N'] ) && !isset( $flags['T'] ) ) {
01472                                 $this->mFlags = $flags = array( 'R' => true );
01473                         }
01474                 }
01475 
01476                 $this->mRuleDisplay = false;
01477                 foreach ( $flags as $flag => $unused ) {
01478                         switch ( $flag ) {
01479                                 case 'R':
01480                                         // if we don't do content convert, still strip the -{}- tags
01481                                         $this->mRuleDisplay = $rules;
01482                                         break;
01483                                 case 'N':
01484                                         // process N flag: output current variant name
01485                                         $ruleVar = trim( $rules );
01486                                         if ( isset( $this->mConverter->mVariantNames[$ruleVar] ) ) {
01487                                                 $this->mRuleDisplay = $this->mConverter->mVariantNames[$ruleVar];
01488                                         } else {
01489                                                 $this->mRuleDisplay = '';
01490                                         }
01491                                         break;
01492                                 case 'D':
01493                                         // process D flag: output rules description
01494                                         $this->mRuleDisplay = $this->getRulesDesc();
01495                                         break;
01496                                 case 'H':
01497                                         // process H,- flag or T only: output nothing
01498                                         $this->mRuleDisplay = '';
01499                                         break;
01500                                 case '-':
01501                                         $this->mRulesAction = 'remove';
01502                                         $this->mRuleDisplay = '';
01503                                         break;
01504                                 case '+':
01505                                         $this->mRulesAction = 'add';
01506                                         $this->mRuleDisplay = '';
01507                                         break;
01508                                 case 'S':
01509                                         $this->mRuleDisplay = $this->getRuleConvertedStr( $variant );
01510                                         break;
01511                                 case 'T':
01512                                         $this->mRuleTitle = $this->getRuleConvertedStr( $variant );
01513                                         $this->mRuleDisplay = '';
01514                                         break;
01515                                 default:
01516                                         // ignore unknown flags (but see error case below)
01517                         }
01518                 }
01519                 if ( $this->mRuleDisplay === false ) {
01520                         $this->mRuleDisplay = '<span class="error">'
01521                                 . wfMessage( 'converter-manual-rule-error' )->inContentLanguage()->escaped()
01522                                 . '</span>';
01523                 }
01524 
01525                 $this->generateConvTable();
01526         }
01527 
01531         public function hasRules() {
01532                 // TODO:
01533         }
01534 
01539         public function getDisplay() {
01540                 return $this->mRuleDisplay;
01541         }
01542 
01547         public function getTitle() {
01548                 return $this->mRuleTitle;
01549         }
01550 
01555         public function getRulesAction() {
01556                 return $this->mRulesAction;
01557         }
01558 
01564         public function getConvTable() {
01565                 return $this->mConvTable;
01566         }
01567 
01572         public function getRules() {
01573                 return $this->mRules;
01574         }
01575 
01580         public function getFlags() {
01581                 return $this->mFlags;
01582         }
01583 }