MediaWiki  REL1_23
LanguageConverter.php
Go to the documentation of this file.
00001 <?php
00029 class LanguageConverter {
00030 
00036     static public $languagesWithVariants = array(
00037         'gan',
00038         'iu',
00039         'kk',
00040         'ku',
00041         'shi',
00042         'sr',
00043         'tg',
00044         'uz',
00045         'zh',
00046     );
00047 
00048     public $mMainLanguageCode;
00049     public $mVariants, $mVariantFallbacks, $mVariantNames;
00050     public $mTablesLoaded = false;
00051     public $mTables;
00052     // 'bidirectional' 'unidirectional' 'disable' for each variant
00053     public $mManualLevel;
00054 
00058     public $mCacheKey;
00059 
00060     public $mLangObj;
00061     public $mFlags;
00062     public $mDescCodeSep = ':', $mDescVarSep = ';';
00063     public $mUcfirst = false;
00064     public $mConvRuleTitle = false;
00065     public $mURLVariant;
00066     public $mUserVariant;
00067     public $mHeaderVariant;
00068     public $mMaxDepth = 10;
00069     public $mVarSeparatorPattern;
00070 
00071     const CACHE_VERSION_KEY = 'VERSION 7';
00072 
00083     public function __construct( $langobj, $maincode, $variants = array(),
00084                                 $variantfallbacks = array(), $flags = array(),
00085                                 $manualLevel = array() ) {
00086         global $wgDisabledVariants;
00087         $this->mLangObj = $langobj;
00088         $this->mMainLanguageCode = $maincode;
00089         $this->mVariants = array_diff( $variants, $wgDisabledVariants );
00090         $this->mVariantFallbacks = $variantfallbacks;
00091         $this->mVariantNames = Language::fetchLanguageNames();
00092         $this->mCacheKey = wfMemcKey( 'conversiontables', $maincode );
00093         $defaultflags = array(
00094             // 'S' show converted text
00095             // '+' add rules for alltext
00096             // 'E' the gave flags is error
00097             // these flags above are reserved for program
00098             'A' => 'A',   // add rule for convert code (all text convert)
00099             'T' => 'T',   // title convert
00100             'R' => 'R',   // raw content
00101             'D' => 'D',   // convert description (subclass implement)
00102             '-' => '-',   // remove convert (not implement)
00103             'H' => 'H',   // add rule for convert code (but no display in placed code)
00104             'N' => 'N'    // current variant name
00105         );
00106         $this->mFlags = array_merge( $defaultflags, $flags );
00107         foreach ( $this->mVariants as $v ) {
00108             if ( array_key_exists( $v, $manualLevel ) ) {
00109                 $this->mManualLevel[$v] = $manualLevel[$v];
00110             } else {
00111                 $this->mManualLevel[$v] = 'bidirectional';
00112             }
00113             $this->mFlags[$v] = $v;
00114         }
00115     }
00116 
00123     public function getVariants() {
00124         return $this->mVariants;
00125     }
00126 
00138     public function getVariantFallbacks( $variant ) {
00139         if ( isset( $this->mVariantFallbacks[$variant] ) ) {
00140             return $this->mVariantFallbacks[$variant];
00141         }
00142         return $this->mMainLanguageCode;
00143     }
00144 
00149     public function getConvRuleTitle() {
00150         return $this->mConvRuleTitle;
00151     }
00152 
00157     public function getPreferredVariant() {
00158         global $wgDefaultLanguageVariant, $wgUser;
00159 
00160         $req = $this->getURLVariant();
00161 
00162         if ( $wgUser->isLoggedIn() && !$req ) {
00163             $req = $this->getUserVariant();
00164         } elseif ( !$req ) {
00165             $req = $this->getHeaderVariant();
00166         }
00167 
00168         if ( $wgDefaultLanguageVariant && !$req ) {
00169             $req = $this->validateVariant( $wgDefaultLanguageVariant );
00170         }
00171 
00172         // This function, unlike the other get*Variant functions, is
00173         // not memoized (i.e. there return value is not cached) since
00174         // new information might appear during processing after this
00175         // is first called.
00176         if ( $this->validateVariant( $req ) ) {
00177             return $req;
00178         }
00179         return $this->mMainLanguageCode;
00180     }
00181 
00187     public function getDefaultVariant() {
00188         global $wgDefaultLanguageVariant;
00189 
00190         $req = $this->getURLVariant();
00191 
00192         if ( !$req ) {
00193             $req = $this->getHeaderVariant();
00194         }
00195 
00196         if ( $wgDefaultLanguageVariant && !$req ) {
00197             $req = $this->validateVariant( $wgDefaultLanguageVariant );
00198         }
00199 
00200         if ( $req ) {
00201             return $req;
00202         }
00203         return $this->mMainLanguageCode;
00204     }
00205 
00211     public function validateVariant( $variant = null ) {
00212         if ( $variant !== null && in_array( $variant, $this->mVariants ) ) {
00213             return $variant;
00214         }
00215         return null;
00216     }
00217 
00223     public function getURLVariant() {
00224         global $wgRequest;
00225 
00226         if ( $this->mURLVariant ) {
00227             return $this->mURLVariant;
00228         }
00229 
00230         // see if the preference is set in the request
00231         $ret = $wgRequest->getText( 'variant' );
00232 
00233         if ( !$ret ) {
00234             $ret = $wgRequest->getVal( 'uselang' );
00235         }
00236 
00237         $this->mURLVariant = $this->validateVariant( $ret );
00238         return $this->mURLVariant;
00239     }
00240 
00246     protected function getUserVariant() {
00247         global $wgUser, $wgContLang;
00248 
00249         // memoizing this function wreaks havoc on parserTest.php
00250         /*
00251         if ( $this->mUserVariant ) {
00252             return $this->mUserVariant;
00253         }
00254         */
00255 
00256         // Get language variant preference from logged in users
00257         // Don't call this on stub objects because that causes infinite
00258         // recursion during initialisation
00259         if ( $wgUser->isLoggedIn() ) {
00260             if ( $this->mMainLanguageCode == $wgContLang->getCode() ) {
00261                 $ret = $wgUser->getOption( 'variant' );
00262             } else {
00263                 $ret = $wgUser->getOption( 'variant-' . $this->mMainLanguageCode );
00264             }
00265         } else {
00266             // figure out user lang without constructing wgLang to avoid
00267             // infinite recursion
00268             $ret = $wgUser->getOption( 'language' );
00269         }
00270 
00271         $this->mUserVariant = $this->validateVariant( $ret );
00272         return $this->mUserVariant;
00273     }
00274 
00280     protected function getHeaderVariant() {
00281         global $wgRequest;
00282 
00283         if ( $this->mHeaderVariant ) {
00284             return $this->mHeaderVariant;
00285         }
00286 
00287         // see if some supported language variant is set in the
00288         // HTTP header.
00289         $languages = array_keys( $wgRequest->getAcceptLang() );
00290         if ( empty( $languages ) ) {
00291             return null;
00292         }
00293 
00294         $fallbackLanguages = array();
00295         foreach ( $languages as $language ) {
00296             $this->mHeaderVariant = $this->validateVariant( $language );
00297             if ( $this->mHeaderVariant ) {
00298                 break;
00299             }
00300 
00301             // To see if there are fallbacks of current language.
00302             // We record these fallback variants, and process
00303             // them later.
00304             $fallbacks = $this->getVariantFallbacks( $language );
00305             if ( is_string( $fallbacks ) && $fallbacks !== $this->mMainLanguageCode ) {
00306                 $fallbackLanguages[] = $fallbacks;
00307             } elseif ( is_array( $fallbacks ) ) {
00308                 $fallbackLanguages =
00309                     array_merge( $fallbackLanguages, $fallbacks );
00310             }
00311         }
00312 
00313         if ( !$this->mHeaderVariant ) {
00314             // process fallback languages now
00315             $fallback_languages = array_unique( $fallbackLanguages );
00316             foreach ( $fallback_languages as $language ) {
00317                 $this->mHeaderVariant = $this->validateVariant( $language );
00318                 if ( $this->mHeaderVariant ) {
00319                     break;
00320                 }
00321             }
00322         }
00323 
00324         return $this->mHeaderVariant;
00325     }
00326 
00337     public function autoConvert( $text, $toVariant = false ) {
00338         wfProfileIn( __METHOD__ );
00339 
00340         $this->loadTables();
00341 
00342         if ( !$toVariant ) {
00343             $toVariant = $this->getPreferredVariant();
00344             if ( !$toVariant ) {
00345                 wfProfileOut( __METHOD__ );
00346                 return $text;
00347             }
00348         }
00349 
00350         if ( $this->guessVariant( $text, $toVariant ) ) {
00351             wfProfileOut( __METHOD__ );
00352             return $text;
00353         }
00354 
00355         /* we convert everything except:
00356            1. HTML markups (anything between < and >)
00357            2. HTML entities
00358            3. placeholders created by the parser
00359         */
00360         global $wgParser;
00361         if ( isset( $wgParser ) && $wgParser->UniqPrefix() != '' ) {
00362             $marker = '|' . $wgParser->UniqPrefix() . '[\-a-zA-Z0-9]+';
00363         } else {
00364             $marker = '';
00365         }
00366 
00367         // this one is needed when the text is inside an HTML markup
00368         $htmlfix = '|<[^>]+$|^[^<>]*>';
00369 
00370         // disable convert to variants between <code> tags
00371         $codefix = '<code>.+?<\/code>|';
00372         // disable conversion of <script> tags
00373         $scriptfix = '<script.*?>.*?<\/script>|';
00374         // disable conversion of <pre> tags
00375         $prefix = '<pre.*?>.*?<\/pre>|';
00376 
00377         $reg = '/' . $codefix . $scriptfix . $prefix .
00378             '<[^>]+>|&[a-zA-Z#][a-z0-9]+;' . $marker . $htmlfix . '/s';
00379         $startPos = 0;
00380         $sourceBlob = '';
00381         $literalBlob = '';
00382 
00383         // Guard against delimiter nulls in the input
00384         $text = str_replace( "\000", '', $text );
00385 
00386         $markupMatches = null;
00387         $elementMatches = null;
00388         while ( $startPos < strlen( $text ) ) {
00389             if ( preg_match( $reg, $text, $markupMatches, PREG_OFFSET_CAPTURE, $startPos ) ) {
00390                 $elementPos = $markupMatches[0][1];
00391                 $element = $markupMatches[0][0];
00392             } else {
00393                 $elementPos = strlen( $text );
00394                 $element = '';
00395             }
00396 
00397             // Queue the part before the markup for translation in a batch
00398             $sourceBlob .= substr( $text, $startPos, $elementPos - $startPos ) . "\000";
00399 
00400             // Advance to the next position
00401             $startPos = $elementPos + strlen( $element );
00402 
00403             // Translate any alt or title attributes inside the matched element
00404             if ( $element !== ''
00405                 && preg_match( '/^(<[^>\s]*)\s([^>]*)(.*)$/', $element, $elementMatches )
00406             ) {
00407                 $attrs = Sanitizer::decodeTagAttributes( $elementMatches[2] );
00408                 $changed = false;
00409                 foreach ( array( 'title', 'alt' ) as $attrName ) {
00410                     if ( !isset( $attrs[$attrName] ) ) {
00411                         continue;
00412                     }
00413                     $attr = $attrs[$attrName];
00414                     // Don't convert URLs
00415                     if ( !strpos( $attr, '://' ) ) {
00416                         $attr = $this->recursiveConvertTopLevel( $attr, $toVariant );
00417                     }
00418 
00419                     // Remove HTML tags to avoid disrupting the layout
00420                     $attr = preg_replace( '/<[^>]+>/', '', $attr );
00421                     if ( $attr !== $attrs[$attrName] ) {
00422                         $attrs[$attrName] = $attr;
00423                         $changed = true;
00424                     }
00425                 }
00426                 if ( $changed ) {
00427                     $element = $elementMatches[1] . Html::expandAttributes( $attrs ) .
00428                         $elementMatches[3];
00429                 }
00430             }
00431             $literalBlob .= $element . "\000";
00432         }
00433 
00434         // Do the main translation batch
00435         $translatedBlob = $this->translate( $sourceBlob, $toVariant );
00436 
00437         // Put the output back together
00438         $translatedIter = StringUtils::explode( "\000", $translatedBlob );
00439         $literalIter = StringUtils::explode( "\000", $literalBlob );
00440         $output = '';
00441         while ( $translatedIter->valid() && $literalIter->valid() ) {
00442             $output .= $translatedIter->current();
00443             $output .= $literalIter->current();
00444             $translatedIter->next();
00445             $literalIter->next();
00446         }
00447 
00448         wfProfileOut( __METHOD__ );
00449         return $output;
00450     }
00451 
00461     public function translate( $text, $variant ) {
00462         wfProfileIn( __METHOD__ );
00463         // If $text is empty or only includes spaces, do nothing
00464         // Otherwise translate it
00465         if ( trim( $text ) ) {
00466             $this->loadTables();
00467             $text = $this->mTables[$variant]->replace( $text );
00468         }
00469         wfProfileOut( __METHOD__ );
00470         return $text;
00471     }
00472 
00479     public function autoConvertToAllVariants( $text ) {
00480         wfProfileIn( __METHOD__ );
00481         $this->loadTables();
00482 
00483         $ret = array();
00484         foreach ( $this->mVariants as $variant ) {
00485             $ret[$variant] = $this->translate( $text, $variant );
00486         }
00487 
00488         wfProfileOut( __METHOD__ );
00489         return $ret;
00490     }
00491 
00497     protected function applyManualConv( $convRule ) {
00498         // Use syntax -{T|zh-cn:TitleCN; zh-tw:TitleTw}- to custom
00499         // title conversion.
00500         // Bug 24072: $mConvRuleTitle was overwritten by other manual
00501         // rule(s) not for title, this breaks the title conversion.
00502         $newConvRuleTitle = $convRule->getTitle();
00503         if ( $newConvRuleTitle ) {
00504             // So I add an empty check for getTitle()
00505             $this->mConvRuleTitle = $newConvRuleTitle;
00506         }
00507 
00508         // merge/remove manual conversion rules to/from global table
00509         $convTable = $convRule->getConvTable();
00510         $action = $convRule->getRulesAction();
00511         foreach ( $convTable as $variant => $pair ) {
00512             if ( !$this->validateVariant( $variant ) ) {
00513                 continue;
00514             }
00515 
00516             if ( $action == 'add' ) {
00517                 foreach ( $pair as $from => $to ) {
00518                     // to ensure that $from and $to not be left blank
00519                     // so $this->translate() could always return a string
00520                     if ( $from || $to ) {
00521                         // more efficient than array_merge(), about 2.5 times.
00522                         $this->mTables[$variant]->setPair( $from, $to );
00523                     }
00524                 }
00525             } elseif ( $action == 'remove' ) {
00526                 $this->mTables[$variant]->removeArray( $pair );
00527             }
00528         }
00529     }
00530 
00538     public function convertTitle( $title ) {
00539         $variant = $this->getPreferredVariant();
00540         $index = $title->getNamespace();
00541         if ( $index !== NS_MAIN ) {
00542             $text = $this->convertNamespace( $index, $variant ) . ':';
00543         } else {
00544             $text = '';
00545         }
00546         $text .= $this->translate( $title->getText(), $variant );
00547         return $text;
00548     }
00549 
00557     public function convertNamespace( $index, $variant = null ) {
00558         if ( $variant === null ) {
00559             $variant = $this->getPreferredVariant();
00560         }
00561         if ( $index === NS_MAIN ) {
00562             return '';
00563         } else {
00564             // First check if a message gives a converted name in the target variant.
00565             $nsConvMsg = wfMessage( 'conversion-ns' . $index )->inLanguage( $variant );
00566             if ( $nsConvMsg->exists() ) {
00567                 return $nsConvMsg->plain();
00568             }
00569             // Then check if a message gives a converted name in content language
00570             // which needs extra translation to the target variant.
00571             $nsConvMsg = wfMessage( 'conversion-ns' . $index )->inContentLanguage();
00572             if ( $nsConvMsg->exists() ) {
00573                 return $this->translate( $nsConvMsg->plain(), $variant );
00574             }
00575             // No message exists, retrieve it from the target variant's namespace names.
00576             $langObj = $this->mLangObj->factory( $variant );
00577             return $langObj->getFormattedNsText( $index );
00578         }
00579     }
00580 
00595     public function convert( $text ) {
00596         $variant = $this->getPreferredVariant();
00597         return $this->convertTo( $text, $variant );
00598     }
00599 
00607     public function convertTo( $text, $variant ) {
00608         global $wgDisableLangConversion;
00609         if ( $wgDisableLangConversion ) {
00610             return $text;
00611         }
00612         // Reset converter state for a new converter run.
00613         $this->mConvRuleTitle = false;
00614         return $this->recursiveConvertTopLevel( $text, $variant );
00615     }
00616 
00626     protected function recursiveConvertTopLevel( $text, $variant, $depth = 0 ) {
00627         $startPos = 0;
00628         $out = '';
00629         $length = strlen( $text );
00630         $shouldConvert = !$this->guessVariant( $text, $variant );
00631 
00632         while ( $startPos < $length ) {
00633             $pos = strpos( $text, '-{', $startPos );
00634 
00635             if ( $pos === false ) {
00636                 // No more markup, append final segment
00637                 $fragment = substr( $text, $startPos );
00638                 $out .= $shouldConvert ? $this->autoConvert( $fragment, $variant ) : $fragment;
00639                 return $out;
00640             }
00641 
00642             // Markup found
00643             // Append initial segment
00644             $fragment = substr( $text, $startPos, $pos - $startPos );
00645             $out .= $shouldConvert ? $this->autoConvert( $fragment, $variant ) : $fragment;
00646 
00647             // Advance position
00648             $startPos = $pos;
00649 
00650             // Do recursive conversion
00651             $out .= $this->recursiveConvertRule( $text, $variant, $startPos, $depth + 1 );
00652         }
00653 
00654         return $out;
00655     }
00656 
00668     protected function recursiveConvertRule( $text, $variant, &$startPos, $depth = 0 ) {
00669         // Quick sanity check (no function calls)
00670         if ( $text[$startPos] !== '-' || $text[$startPos + 1] !== '{' ) {
00671             throw new MWException( __METHOD__ . ': invalid input string' );
00672         }
00673 
00674         $startPos += 2;
00675         $inner = '';
00676         $warningDone = false;
00677         $length = strlen( $text );
00678 
00679         while ( $startPos < $length ) {
00680             $m = false;
00681             preg_match( '/-\{|\}-/', $text, $m, PREG_OFFSET_CAPTURE, $startPos );
00682             if ( !$m ) {
00683                 // Unclosed rule
00684                 break;
00685             }
00686 
00687             $token = $m[0][0];
00688             $pos = $m[0][1];
00689 
00690             // Markup found
00691             // Append initial segment
00692             $inner .= substr( $text, $startPos, $pos - $startPos );
00693 
00694             // Advance position
00695             $startPos = $pos;
00696 
00697             switch ( $token ) {
00698                 case '-{':
00699                     // Check max depth
00700                     if ( $depth >= $this->mMaxDepth ) {
00701                         $inner .= '-{';
00702                         if ( !$warningDone ) {
00703                             $inner .= '<span class="error">' .
00704                                 wfMessage( 'language-converter-depth-warning' )
00705                                     ->numParams( $this->mMaxDepth )->inContentLanguage()->text() .
00706                                 '</span>';
00707                             $warningDone = true;
00708                         }
00709                         $startPos += 2;
00710                         continue;
00711                     }
00712                     // Recursively parse another rule
00713                     $inner .= $this->recursiveConvertRule( $text, $variant, $startPos, $depth + 1 );
00714                     break;
00715                 case '}-':
00716                     // Apply the rule
00717                     $startPos += 2;
00718                     $rule = new ConverterRule( $inner, $this );
00719                     $rule->parse( $variant );
00720                     $this->applyManualConv( $rule );
00721                     return $rule->getDisplay();
00722                 default:
00723                     throw new MWException( __METHOD__ . ': invalid regex match' );
00724             }
00725         }
00726 
00727         // Unclosed rule
00728         if ( $startPos < $length ) {
00729             $inner .= substr( $text, $startPos );
00730         }
00731         $startPos = $length;
00732         return '-{' . $this->autoConvert( $inner, $variant );
00733     }
00734 
00746     public function findVariantLink( &$link, &$nt, $ignoreOtherCond = false ) {
00747         # If the article has already existed, there is no need to
00748         # check it again, otherwise it may cause a fault.
00749         if ( is_object( $nt ) && $nt->exists() ) {
00750             return;
00751         }
00752 
00753         global $wgDisableLangConversion, $wgDisableTitleConversion, $wgRequest;
00754         $isredir = $wgRequest->getText( 'redirect', 'yes' );
00755         $action = $wgRequest->getText( 'action' );
00756         $linkconvert = $wgRequest->getText( 'linkconvert', 'yes' );
00757         $disableLinkConversion = $wgDisableLangConversion
00758             || $wgDisableTitleConversion;
00759         $linkBatch = new LinkBatch();
00760 
00761         $ns = NS_MAIN;
00762 
00763         if ( $disableLinkConversion ||
00764             ( !$ignoreOtherCond &&
00765                 ( $isredir == 'no'
00766                     || $action == 'edit'
00767                     || $action == 'submit'
00768                     || $linkconvert == 'no' ) ) ) {
00769             return;
00770         }
00771 
00772         if ( is_object( $nt ) ) {
00773             $ns = $nt->getNamespace();
00774         }
00775 
00776         $variants = $this->autoConvertToAllVariants( $link );
00777         if ( !$variants ) { // give up
00778             return;
00779         }
00780 
00781         $titles = array();
00782 
00783         foreach ( $variants as $v ) {
00784             if ( $v != $link ) {
00785                 $varnt = Title::newFromText( $v, $ns );
00786                 if ( !is_null( $varnt ) ) {
00787                     $linkBatch->addObj( $varnt );
00788                     $titles[] = $varnt;
00789                 }
00790             }
00791         }
00792 
00793         // fetch all variants in single query
00794         $linkBatch->execute();
00795 
00796         foreach ( $titles as $varnt ) {
00797             if ( $varnt->getArticleID() > 0 ) {
00798                 $nt = $varnt;
00799                 $link = $varnt->getText();
00800                 break;
00801             }
00802         }
00803     }
00804 
00810     public function getExtraHashOptions() {
00811         $variant = $this->getPreferredVariant();
00812         return '!' . $variant;
00813     }
00814 
00825     public function guessVariant( $text, $variant ) {
00826         return false;
00827     }
00828 
00836     function loadDefaultTables() {
00837         $name = get_class( $this );
00838         throw new MWException( "Must implement loadDefaultTables() method in class $name" );
00839     }
00840 
00846     function loadTables( $fromCache = true ) {
00847         global $wgLangConvMemc;
00848 
00849         if ( $this->mTablesLoaded ) {
00850             return;
00851         }
00852 
00853         wfProfileIn( __METHOD__ );
00854         $this->mTablesLoaded = true;
00855         $this->mTables = false;
00856         if ( $fromCache ) {
00857             wfProfileIn( __METHOD__ . '-cache' );
00858             $this->mTables = $wgLangConvMemc->get( $this->mCacheKey );
00859             wfProfileOut( __METHOD__ . '-cache' );
00860         }
00861         if ( !$this->mTables || !array_key_exists( self::CACHE_VERSION_KEY, $this->mTables ) ) {
00862             wfProfileIn( __METHOD__ . '-recache' );
00863             // not in cache, or we need a fresh reload.
00864             // We will first load the default tables
00865             // then update them using things in MediaWiki:Conversiontable/*
00866             $this->loadDefaultTables();
00867             foreach ( $this->mVariants as $var ) {
00868                 $cached = $this->parseCachedTable( $var );
00869                 $this->mTables[$var]->mergeArray( $cached );
00870             }
00871 
00872             $this->postLoadTables();
00873             $this->mTables[self::CACHE_VERSION_KEY] = true;
00874 
00875             $wgLangConvMemc->set( $this->mCacheKey, $this->mTables, 43200 );
00876             wfProfileOut( __METHOD__ . '-recache' );
00877         }
00878         wfProfileOut( __METHOD__ );
00879     }
00880 
00884     function postLoadTables() { }
00885 
00891     function reloadTables() {
00892         if ( $this->mTables ) {
00893             unset( $this->mTables );
00894         }
00895         $this->mTablesLoaded = false;
00896         $this->loadTables( false );
00897     }
00898 
00918     function parseCachedTable( $code, $subpage = '', $recursive = true ) {
00919         static $parsed = array();
00920 
00921         $key = 'Conversiontable/' . $code;
00922         if ( $subpage ) {
00923             $key .= '/' . $subpage;
00924         }
00925         if ( array_key_exists( $key, $parsed ) ) {
00926             return array();
00927         }
00928 
00929         $parsed[$key] = true;
00930 
00931         if ( $subpage === '' ) {
00932             $txt = MessageCache::singleton()->getMsgFromNamespace( $key, $code );
00933         } else {
00934             $txt = false;
00935             $title = Title::makeTitleSafe( NS_MEDIAWIKI, $key );
00936             if ( $title && $title->exists() ) {
00937                 $revision = Revision::newFromTitle( $title );
00938                 if ( $revision ) {
00939                     if ( $revision->getContentModel() == CONTENT_MODEL_WIKITEXT ) {
00940                         $txt = $revision->getContent( Revision::RAW )->getNativeData();
00941                     }
00942 
00943                     // @todo in the future, use a specialized content model, perhaps based on json!
00944                 }
00945             }
00946         }
00947 
00948         # Nothing to parse if there's no text
00949         if ( $txt === false || $txt === null || $txt === '' ) {
00950             return array();
00951         }
00952 
00953         // get all subpage links of the form
00954         // [[MediaWiki:Conversiontable/zh-xx/...|...]]
00955         $linkhead = $this->mLangObj->getNsText( NS_MEDIAWIKI ) .
00956             ':Conversiontable';
00957         $subs = StringUtils::explode( '[[', $txt );
00958         $sublinks = array();
00959         foreach ( $subs as $sub ) {
00960             $link = explode( ']]', $sub, 2 );
00961             if ( count( $link ) != 2 ) {
00962                 continue;
00963             }
00964             $b = explode( '|', $link[0], 2 );
00965             $b = explode( '/', trim( $b[0] ), 3 );
00966             if ( count( $b ) == 3 ) {
00967                 $sublink = $b[2];
00968             } else {
00969                 $sublink = '';
00970             }
00971 
00972             if ( $b[0] == $linkhead && $b[1] == $code ) {
00973                 $sublinks[] = $sublink;
00974             }
00975         }
00976 
00977         // parse the mappings in this page
00978         $blocks = StringUtils::explode( '-{', $txt );
00979         $ret = array();
00980         $first = true;
00981         foreach ( $blocks as $block ) {
00982             if ( $first ) {
00983                 // Skip the part before the first -{
00984                 $first = false;
00985                 continue;
00986             }
00987             $mappings = explode( '}-', $block, 2 );
00988             $stripped = str_replace( array( "'", '"', '*', '#' ), '', $mappings[0] );
00989             $table = StringUtils::explode( ';', $stripped );
00990             foreach ( $table as $t ) {
00991                 $m = explode( '=>', $t, 3 );
00992                 if ( count( $m ) != 2 ) {
00993                     continue;
00994                 }
00995                 // trim any trailling comments starting with '//'
00996                 $tt = explode( '//', $m[1], 2 );
00997                 $ret[trim( $m[0] )] = trim( $tt[0] );
00998             }
00999         }
01000 
01001         // recursively parse the subpages
01002         if ( $recursive ) {
01003             foreach ( $sublinks as $link ) {
01004                 $s = $this->parseCachedTable( $code, $link, $recursive );
01005                 $ret = array_merge( $ret, $s );
01006             }
01007         }
01008 
01009         if ( $this->mUcfirst ) {
01010             foreach ( $ret as $k => $v ) {
01011                 $ret[$this->mLangObj->ucfirst( $k )] = $this->mLangObj->ucfirst( $v );
01012             }
01013         }
01014         return $ret;
01015     }
01016 
01025     public function markNoConversion( $text, $noParse = false ) {
01026         # don't mark if already marked
01027         if ( strpos( $text, '-{' ) || strpos( $text, '}-' ) ) {
01028             return $text;
01029         }
01030 
01031         $ret = "-{R|$text}-";
01032         return $ret;
01033     }
01034 
01043     function convertCategoryKey( $key ) {
01044         return $key;
01045     }
01046 
01063     function OnPageContentSaveComplete( $page, $user, $content, $summary, $isMinor,
01064             $isWatch, $section, $flags, $revision ) {
01065         $titleobj = $page->getTitle();
01066         if ( $titleobj->getNamespace() == NS_MEDIAWIKI ) {
01067             $title = $titleobj->getDBkey();
01068             $t = explode( '/', $title, 3 );
01069             $c = count( $t );
01070             if ( $c > 1 && $t[0] == 'Conversiontable' ) {
01071                 if ( $this->validateVariant( $t[1] ) ) {
01072                     $this->reloadTables();
01073                 }
01074             }
01075         }
01076         return true;
01077     }
01078 
01088     public function armourMath( $text ) {
01089         // convert '-{' and '}-' to '-&#123;' and '&#125;-' to prevent
01090         // any unwanted markup appearing in the math image tag.
01091         $text = strtr( $text, array( '-{' => '-&#123;', '}-' => '&#125;-' ) );
01092         return $text;
01093     }
01094 
01098     function getVarSeparatorPattern() {
01099         if ( is_null( $this->mVarSeparatorPattern ) ) {
01100             // varsep_pattern for preg_split:
01101             // text should be splited by ";" only if a valid variant
01102             // name exist after the markup, for example:
01103             //  -{zh-hans:<span style="font-size:120%;">xxx</span>;zh-hant:\
01104             //  <span style="font-size:120%;">yyy</span>;}-
01105             // we should split it as:
01106             //  array(
01107             //    [0] => 'zh-hans:<span style="font-size:120%;">xxx</span>'
01108             //    [1] => 'zh-hant:<span style="font-size:120%;">yyy</span>'
01109             //    [2] => ''
01110             //   )
01111             $pat = '/;\s*(?=';
01112             foreach ( $this->mVariants as $variant ) {
01113                 // zh-hans:xxx;zh-hant:yyy
01114                 $pat .= $variant . '\s*:|';
01115                 // xxx=>zh-hans:yyy; xxx=>zh-hant:zzz
01116                 $pat .= '[^;]*?=>\s*' . $variant . '\s*:|';
01117             }
01118             $pat .= '\s*$)/';
01119             $this->mVarSeparatorPattern = $pat;
01120         }
01121         return $this->mVarSeparatorPattern;
01122     }
01123 }