MediaWiki  REL1_24
LanguageConverter.php
Go to the documentation of this file.
00001 <?php
00031 class LanguageConverter {
00037     static public $languagesWithVariants = array(
00038         'gan',
00039         'iu',
00040         'kk',
00041         'ku',
00042         'shi',
00043         'sr',
00044         'tg',
00045         'uz',
00046         'zh',
00047     );
00048 
00049     public $mMainLanguageCode;
00050     public $mVariants, $mVariantFallbacks, $mVariantNames;
00051     public $mTablesLoaded = false;
00052     public $mTables;
00053     // 'bidirectional' 'unidirectional' 'disable' for each variant
00054     public $mManualLevel;
00055 
00059     public $mCacheKey;
00060 
00061     public $mLangObj;
00062     public $mFlags;
00063     public $mDescCodeSep = ':', $mDescVarSep = ';';
00064     public $mUcfirst = false;
00065     public $mConvRuleTitle = false;
00066     public $mURLVariant;
00067     public $mUserVariant;
00068     public $mHeaderVariant;
00069     public $mMaxDepth = 10;
00070     public $mVarSeparatorPattern;
00071 
00072     const CACHE_VERSION_KEY = 'VERSION 7';
00073 
00084     public function __construct( $langobj, $maincode, $variants = array(),
00085                                 $variantfallbacks = array(), $flags = array(),
00086                                 $manualLevel = array() ) {
00087         global $wgDisabledVariants;
00088         $this->mLangObj = $langobj;
00089         $this->mMainLanguageCode = $maincode;
00090         $this->mVariants = array_diff( $variants, $wgDisabledVariants );
00091         $this->mVariantFallbacks = $variantfallbacks;
00092         $this->mVariantNames = Language::fetchLanguageNames();
00093         $this->mCacheKey = wfMemcKey( 'conversiontables', $maincode );
00094         $defaultflags = array(
00095             // 'S' show converted text
00096             // '+' add rules for alltext
00097             // 'E' the gave flags is error
00098             // these flags above are reserved for program
00099             'A' => 'A',   // add rule for convert code (all text convert)
00100             'T' => 'T',   // title convert
00101             'R' => 'R',   // raw content
00102             'D' => 'D',   // convert description (subclass implement)
00103             '-' => '-',   // remove convert (not implement)
00104             'H' => 'H',   // add rule for convert code (but no display in placed code)
00105             'N' => 'N'    // current variant name
00106         );
00107         $this->mFlags = array_merge( $defaultflags, $flags );
00108         foreach ( $this->mVariants as $v ) {
00109             if ( array_key_exists( $v, $manualLevel ) ) {
00110                 $this->mManualLevel[$v] = $manualLevel[$v];
00111             } else {
00112                 $this->mManualLevel[$v] = 'bidirectional';
00113             }
00114             $this->mFlags[$v] = $v;
00115         }
00116     }
00117 
00124     public function getVariants() {
00125         return $this->mVariants;
00126     }
00127 
00139     public function getVariantFallbacks( $variant ) {
00140         if ( isset( $this->mVariantFallbacks[$variant] ) ) {
00141             return $this->mVariantFallbacks[$variant];
00142         }
00143         return $this->mMainLanguageCode;
00144     }
00145 
00150     public function getConvRuleTitle() {
00151         return $this->mConvRuleTitle;
00152     }
00153 
00158     public function getPreferredVariant() {
00159         global $wgDefaultLanguageVariant, $wgUser;
00160 
00161         $req = $this->getURLVariant();
00162 
00163         if ( $wgUser->isLoggedIn() && !$req ) {
00164             $req = $this->getUserVariant();
00165         } elseif ( !$req ) {
00166             $req = $this->getHeaderVariant();
00167         }
00168 
00169         if ( $wgDefaultLanguageVariant && !$req ) {
00170             $req = $this->validateVariant( $wgDefaultLanguageVariant );
00171         }
00172 
00173         // This function, unlike the other get*Variant functions, is
00174         // not memoized (i.e. there return value is not cached) since
00175         // new information might appear during processing after this
00176         // is first called.
00177         if ( $this->validateVariant( $req ) ) {
00178             return $req;
00179         }
00180         return $this->mMainLanguageCode;
00181     }
00182 
00188     public function getDefaultVariant() {
00189         global $wgDefaultLanguageVariant;
00190 
00191         $req = $this->getURLVariant();
00192 
00193         if ( !$req ) {
00194             $req = $this->getHeaderVariant();
00195         }
00196 
00197         if ( $wgDefaultLanguageVariant && !$req ) {
00198             $req = $this->validateVariant( $wgDefaultLanguageVariant );
00199         }
00200 
00201         if ( $req ) {
00202             return $req;
00203         }
00204         return $this->mMainLanguageCode;
00205     }
00206 
00212     public function validateVariant( $variant = null ) {
00213         if ( $variant !== null && in_array( $variant, $this->mVariants ) ) {
00214             return $variant;
00215         }
00216         return null;
00217     }
00218 
00224     public function getURLVariant() {
00225         global $wgRequest;
00226 
00227         if ( $this->mURLVariant ) {
00228             return $this->mURLVariant;
00229         }
00230 
00231         // see if the preference is set in the request
00232         $ret = $wgRequest->getText( 'variant' );
00233 
00234         if ( !$ret ) {
00235             $ret = $wgRequest->getVal( 'uselang' );
00236         }
00237 
00238         $this->mURLVariant = $this->validateVariant( $ret );
00239         return $this->mURLVariant;
00240     }
00241 
00247     protected function getUserVariant() {
00248         global $wgUser, $wgContLang;
00249 
00250         // memoizing this function wreaks havoc on parserTest.php
00251         /*
00252         if ( $this->mUserVariant ) {
00253             return $this->mUserVariant;
00254         }
00255         */
00256 
00257         // Get language variant preference from logged in users
00258         // Don't call this on stub objects because that causes infinite
00259         // recursion during initialisation
00260         if ( $wgUser->isLoggedIn() ) {
00261             if ( $this->mMainLanguageCode == $wgContLang->getCode() ) {
00262                 $ret = $wgUser->getOption( 'variant' );
00263             } else {
00264                 $ret = $wgUser->getOption( 'variant-' . $this->mMainLanguageCode );
00265             }
00266         } else {
00267             // figure out user lang without constructing wgLang to avoid
00268             // infinite recursion
00269             $ret = $wgUser->getOption( 'language' );
00270         }
00271 
00272         $this->mUserVariant = $this->validateVariant( $ret );
00273         return $this->mUserVariant;
00274     }
00275 
00281     protected function getHeaderVariant() {
00282         global $wgRequest;
00283 
00284         if ( $this->mHeaderVariant ) {
00285             return $this->mHeaderVariant;
00286         }
00287 
00288         // see if some supported language variant is set in the
00289         // HTTP header.
00290         $languages = array_keys( $wgRequest->getAcceptLang() );
00291         if ( empty( $languages ) ) {
00292             return null;
00293         }
00294 
00295         $fallbackLanguages = array();
00296         foreach ( $languages as $language ) {
00297             $this->mHeaderVariant = $this->validateVariant( $language );
00298             if ( $this->mHeaderVariant ) {
00299                 break;
00300             }
00301 
00302             // To see if there are fallbacks of current language.
00303             // We record these fallback variants, and process
00304             // them later.
00305             $fallbacks = $this->getVariantFallbacks( $language );
00306             if ( is_string( $fallbacks ) && $fallbacks !== $this->mMainLanguageCode ) {
00307                 $fallbackLanguages[] = $fallbacks;
00308             } elseif ( is_array( $fallbacks ) ) {
00309                 $fallbackLanguages =
00310                     array_merge( $fallbackLanguages, $fallbacks );
00311             }
00312         }
00313 
00314         if ( !$this->mHeaderVariant ) {
00315             // process fallback languages now
00316             $fallback_languages = array_unique( $fallbackLanguages );
00317             foreach ( $fallback_languages as $language ) {
00318                 $this->mHeaderVariant = $this->validateVariant( $language );
00319                 if ( $this->mHeaderVariant ) {
00320                     break;
00321                 }
00322             }
00323         }
00324 
00325         return $this->mHeaderVariant;
00326     }
00327 
00338     public function autoConvert( $text, $toVariant = false ) {
00339         wfProfileIn( __METHOD__ );
00340 
00341         $this->loadTables();
00342 
00343         if ( !$toVariant ) {
00344             $toVariant = $this->getPreferredVariant();
00345             if ( !$toVariant ) {
00346                 wfProfileOut( __METHOD__ );
00347                 return $text;
00348             }
00349         }
00350 
00351         if ( $this->guessVariant( $text, $toVariant ) ) {
00352             wfProfileOut( __METHOD__ );
00353             return $text;
00354         }
00355 
00356         /* we convert everything except:
00357            1. HTML markups (anything between < and >)
00358            2. HTML entities
00359            3. placeholders created by the parser
00360         */
00361         global $wgParser;
00362         if ( isset( $wgParser ) && $wgParser->UniqPrefix() != '' ) {
00363             $marker = '|' . $wgParser->UniqPrefix() . '[\-a-zA-Z0-9]+';
00364         } else {
00365             $marker = '';
00366         }
00367 
00368         // this one is needed when the text is inside an HTML markup
00369         $htmlfix = '|<[^>]+$|^[^<>]*>';
00370 
00371         // disable convert to variants between <code> tags
00372         $codefix = '<code>.+?<\/code>|';
00373         // disable conversion of <script> tags
00374         $scriptfix = '<script.*?>.*?<\/script>|';
00375         // disable conversion of <pre> tags
00376         $prefix = '<pre.*?>.*?<\/pre>|';
00377 
00378         $reg = '/' . $codefix . $scriptfix . $prefix .
00379             '<[^>]+>|&[a-zA-Z#][a-z0-9]+;' . $marker . $htmlfix . '/s';
00380         $startPos = 0;
00381         $sourceBlob = '';
00382         $literalBlob = '';
00383 
00384         // Guard against delimiter nulls in the input
00385         $text = str_replace( "\000", '', $text );
00386 
00387         $markupMatches = null;
00388         $elementMatches = null;
00389         while ( $startPos < strlen( $text ) ) {
00390             if ( preg_match( $reg, $text, $markupMatches, PREG_OFFSET_CAPTURE, $startPos ) ) {
00391                 $elementPos = $markupMatches[0][1];
00392                 $element = $markupMatches[0][0];
00393             } else {
00394                 $elementPos = strlen( $text );
00395                 $element = '';
00396             }
00397 
00398             // Queue the part before the markup for translation in a batch
00399             $sourceBlob .= substr( $text, $startPos, $elementPos - $startPos ) . "\000";
00400 
00401             // Advance to the next position
00402             $startPos = $elementPos + strlen( $element );
00403 
00404             // Translate any alt or title attributes inside the matched element
00405             if ( $element !== ''
00406                 && preg_match( '/^(<[^>\s]*)\s([^>]*)(.*)$/', $element, $elementMatches )
00407             ) {
00408                 $attrs = Sanitizer::decodeTagAttributes( $elementMatches[2] );
00409                 $changed = false;
00410                 foreach ( array( 'title', 'alt' ) as $attrName ) {
00411                     if ( !isset( $attrs[$attrName] ) ) {
00412                         continue;
00413                     }
00414                     $attr = $attrs[$attrName];
00415                     // Don't convert URLs
00416                     if ( !strpos( $attr, '://' ) ) {
00417                         $attr = $this->recursiveConvertTopLevel( $attr, $toVariant );
00418                     }
00419 
00420                     // Remove HTML tags to avoid disrupting the layout
00421                     $attr = preg_replace( '/<[^>]+>/', '', $attr );
00422                     if ( $attr !== $attrs[$attrName] ) {
00423                         $attrs[$attrName] = $attr;
00424                         $changed = true;
00425                     }
00426                 }
00427                 if ( $changed ) {
00428                     $element = $elementMatches[1] . Html::expandAttributes( $attrs ) .
00429                         $elementMatches[3];
00430                 }
00431             }
00432             $literalBlob .= $element . "\000";
00433         }
00434 
00435         // Do the main translation batch
00436         $translatedBlob = $this->translate( $sourceBlob, $toVariant );
00437 
00438         // Put the output back together
00439         $translatedIter = StringUtils::explode( "\000", $translatedBlob );
00440         $literalIter = StringUtils::explode( "\000", $literalBlob );
00441         $output = '';
00442         while ( $translatedIter->valid() && $literalIter->valid() ) {
00443             $output .= $translatedIter->current();
00444             $output .= $literalIter->current();
00445             $translatedIter->next();
00446             $literalIter->next();
00447         }
00448 
00449         wfProfileOut( __METHOD__ );
00450         return $output;
00451     }
00452 
00462     public function translate( $text, $variant ) {
00463         wfProfileIn( __METHOD__ );
00464         // If $text is empty or only includes spaces, do nothing
00465         // Otherwise translate it
00466         if ( trim( $text ) ) {
00467             $this->loadTables();
00468             $text = $this->mTables[$variant]->replace( $text );
00469         }
00470         wfProfileOut( __METHOD__ );
00471         return $text;
00472     }
00473 
00480     public function autoConvertToAllVariants( $text ) {
00481         wfProfileIn( __METHOD__ );
00482         $this->loadTables();
00483 
00484         $ret = array();
00485         foreach ( $this->mVariants as $variant ) {
00486             $ret[$variant] = $this->translate( $text, $variant );
00487         }
00488 
00489         wfProfileOut( __METHOD__ );
00490         return $ret;
00491     }
00492 
00498     protected function applyManualConv( $convRule ) {
00499         // Use syntax -{T|zh-cn:TitleCN; zh-tw:TitleTw}- to custom
00500         // title conversion.
00501         // Bug 24072: $mConvRuleTitle was overwritten by other manual
00502         // rule(s) not for title, this breaks the title conversion.
00503         $newConvRuleTitle = $convRule->getTitle();
00504         if ( $newConvRuleTitle ) {
00505             // So I add an empty check for getTitle()
00506             $this->mConvRuleTitle = $newConvRuleTitle;
00507         }
00508 
00509         // merge/remove manual conversion rules to/from global table
00510         $convTable = $convRule->getConvTable();
00511         $action = $convRule->getRulesAction();
00512         foreach ( $convTable as $variant => $pair ) {
00513             if ( !$this->validateVariant( $variant ) ) {
00514                 continue;
00515             }
00516 
00517             if ( $action == 'add' ) {
00518                 foreach ( $pair as $from => $to ) {
00519                     // to ensure that $from and $to not be left blank
00520                     // so $this->translate() could always return a string
00521                     if ( $from || $to ) {
00522                         // more efficient than array_merge(), about 2.5 times.
00523                         $this->mTables[$variant]->setPair( $from, $to );
00524                     }
00525                 }
00526             } elseif ( $action == 'remove' ) {
00527                 $this->mTables[$variant]->removeArray( $pair );
00528             }
00529         }
00530     }
00531 
00539     public function convertTitle( $title ) {
00540         $variant = $this->getPreferredVariant();
00541         $index = $title->getNamespace();
00542         if ( $index !== NS_MAIN ) {
00543             $text = $this->convertNamespace( $index, $variant ) . ':';
00544         } else {
00545             $text = '';
00546         }
00547         $text .= $this->translate( $title->getText(), $variant );
00548         return $text;
00549     }
00550 
00558     public function convertNamespace( $index, $variant = null ) {
00559         if ( $variant === null ) {
00560             $variant = $this->getPreferredVariant();
00561         }
00562         if ( $index === NS_MAIN ) {
00563             return '';
00564         } else {
00565             // First check if a message gives a converted name in the target variant.
00566             $nsConvMsg = wfMessage( 'conversion-ns' . $index )->inLanguage( $variant );
00567             if ( $nsConvMsg->exists() ) {
00568                 return $nsConvMsg->plain();
00569             }
00570             // Then check if a message gives a converted name in content language
00571             // which needs extra translation to the target variant.
00572             $nsConvMsg = wfMessage( 'conversion-ns' . $index )->inContentLanguage();
00573             if ( $nsConvMsg->exists() ) {
00574                 return $this->translate( $nsConvMsg->plain(), $variant );
00575             }
00576             // No message exists, retrieve it from the target variant's namespace names.
00577             $langObj = $this->mLangObj->factory( $variant );
00578             return $langObj->getFormattedNsText( $index );
00579         }
00580     }
00581 
00596     public function convert( $text ) {
00597         $variant = $this->getPreferredVariant();
00598         return $this->convertTo( $text, $variant );
00599     }
00600 
00608     public function convertTo( $text, $variant ) {
00609         global $wgDisableLangConversion;
00610         if ( $wgDisableLangConversion ) {
00611             return $text;
00612         }
00613         // Reset converter state for a new converter run.
00614         $this->mConvRuleTitle = false;
00615         return $this->recursiveConvertTopLevel( $text, $variant );
00616     }
00617 
00627     protected function recursiveConvertTopLevel( $text, $variant, $depth = 0 ) {
00628         $startPos = 0;
00629         $out = '';
00630         $length = strlen( $text );
00631         $shouldConvert = !$this->guessVariant( $text, $variant );
00632 
00633         while ( $startPos < $length ) {
00634             $pos = strpos( $text, '-{', $startPos );
00635 
00636             if ( $pos === false ) {
00637                 // No more markup, append final segment
00638                 $fragment = substr( $text, $startPos );
00639                 $out .= $shouldConvert ? $this->autoConvert( $fragment, $variant ) : $fragment;
00640                 return $out;
00641             }
00642 
00643             // Markup found
00644             // Append initial segment
00645             $fragment = substr( $text, $startPos, $pos - $startPos );
00646             $out .= $shouldConvert ? $this->autoConvert( $fragment, $variant ) : $fragment;
00647 
00648             // Advance position
00649             $startPos = $pos;
00650 
00651             // Do recursive conversion
00652             $out .= $this->recursiveConvertRule( $text, $variant, $startPos, $depth + 1 );
00653         }
00654 
00655         return $out;
00656     }
00657 
00669     protected function recursiveConvertRule( $text, $variant, &$startPos, $depth = 0 ) {
00670         // Quick sanity check (no function calls)
00671         if ( $text[$startPos] !== '-' || $text[$startPos + 1] !== '{' ) {
00672             throw new MWException( __METHOD__ . ': invalid input string' );
00673         }
00674 
00675         $startPos += 2;
00676         $inner = '';
00677         $warningDone = false;
00678         $length = strlen( $text );
00679 
00680         while ( $startPos < $length ) {
00681             $m = false;
00682             preg_match( '/-\{|\}-/', $text, $m, PREG_OFFSET_CAPTURE, $startPos );
00683             if ( !$m ) {
00684                 // Unclosed rule
00685                 break;
00686             }
00687 
00688             $token = $m[0][0];
00689             $pos = $m[0][1];
00690 
00691             // Markup found
00692             // Append initial segment
00693             $inner .= substr( $text, $startPos, $pos - $startPos );
00694 
00695             // Advance position
00696             $startPos = $pos;
00697 
00698             switch ( $token ) {
00699                 case '-{':
00700                     // Check max depth
00701                     if ( $depth >= $this->mMaxDepth ) {
00702                         $inner .= '-{';
00703                         if ( !$warningDone ) {
00704                             $inner .= '<span class="error">' .
00705                                 wfMessage( 'language-converter-depth-warning' )
00706                                     ->numParams( $this->mMaxDepth )->inContentLanguage()->text() .
00707                                 '</span>';
00708                             $warningDone = true;
00709                         }
00710                         $startPos += 2;
00711                         continue;
00712                     }
00713                     // Recursively parse another rule
00714                     $inner .= $this->recursiveConvertRule( $text, $variant, $startPos, $depth + 1 );
00715                     break;
00716                 case '}-':
00717                     // Apply the rule
00718                     $startPos += 2;
00719                     $rule = new ConverterRule( $inner, $this );
00720                     $rule->parse( $variant );
00721                     $this->applyManualConv( $rule );
00722                     return $rule->getDisplay();
00723                 default:
00724                     throw new MWException( __METHOD__ . ': invalid regex match' );
00725             }
00726         }
00727 
00728         // Unclosed rule
00729         if ( $startPos < $length ) {
00730             $inner .= substr( $text, $startPos );
00731         }
00732         $startPos = $length;
00733         return '-{' . $this->autoConvert( $inner, $variant );
00734     }
00735 
00747     public function findVariantLink( &$link, &$nt, $ignoreOtherCond = false ) {
00748         # If the article has already existed, there is no need to
00749         # check it again, otherwise it may cause a fault.
00750         if ( is_object( $nt ) && $nt->exists() ) {
00751             return;
00752         }
00753 
00754         global $wgDisableLangConversion, $wgDisableTitleConversion, $wgRequest;
00755         $isredir = $wgRequest->getText( 'redirect', 'yes' );
00756         $action = $wgRequest->getText( 'action' );
00757         if ( $action == 'edit' && $wgRequest->getBool( 'redlink' ) ) {
00758             $action = 'view';
00759         }
00760         $linkconvert = $wgRequest->getText( 'linkconvert', 'yes' );
00761         $disableLinkConversion = $wgDisableLangConversion
00762             || $wgDisableTitleConversion;
00763         $linkBatch = new LinkBatch();
00764 
00765         $ns = NS_MAIN;
00766 
00767         if ( $disableLinkConversion ||
00768             ( !$ignoreOtherCond &&
00769                 ( $isredir == 'no'
00770                     || $action == 'edit'
00771                     || $action == 'submit'
00772                     || $linkconvert == 'no' ) ) ) {
00773             return;
00774         }
00775 
00776         if ( is_object( $nt ) ) {
00777             $ns = $nt->getNamespace();
00778         }
00779 
00780         $variants = $this->autoConvertToAllVariants( $link );
00781         if ( !$variants ) { // give up
00782             return;
00783         }
00784 
00785         $titles = array();
00786 
00787         foreach ( $variants as $v ) {
00788             if ( $v != $link ) {
00789                 $varnt = Title::newFromText( $v, $ns );
00790                 if ( !is_null( $varnt ) ) {
00791                     $linkBatch->addObj( $varnt );
00792                     $titles[] = $varnt;
00793                 }
00794             }
00795         }
00796 
00797         // fetch all variants in single query
00798         $linkBatch->execute();
00799 
00800         foreach ( $titles as $varnt ) {
00801             if ( $varnt->getArticleID() > 0 ) {
00802                 $nt = $varnt;
00803                 $link = $varnt->getText();
00804                 break;
00805             }
00806         }
00807     }
00808 
00814     public function getExtraHashOptions() {
00815         $variant = $this->getPreferredVariant();
00816 
00817         return '!' . $variant;
00818     }
00819 
00830     public function guessVariant( $text, $variant ) {
00831         return false;
00832     }
00833 
00841     function loadDefaultTables() {
00842         $name = get_class( $this );
00843 
00844         throw new MWException( "Must implement loadDefaultTables() method in class $name" );
00845     }
00846 
00852     function loadTables( $fromCache = true ) {
00853         global $wgLangConvMemc;
00854 
00855         if ( $this->mTablesLoaded ) {
00856             return;
00857         }
00858 
00859         wfProfileIn( __METHOD__ );
00860         $this->mTablesLoaded = true;
00861         $this->mTables = false;
00862         if ( $fromCache ) {
00863             wfProfileIn( __METHOD__ . '-cache' );
00864             $this->mTables = $wgLangConvMemc->get( $this->mCacheKey );
00865             wfProfileOut( __METHOD__ . '-cache' );
00866         }
00867         if ( !$this->mTables || !array_key_exists( self::CACHE_VERSION_KEY, $this->mTables ) ) {
00868             wfProfileIn( __METHOD__ . '-recache' );
00869             // not in cache, or we need a fresh reload.
00870             // We will first load the default tables
00871             // then update them using things in MediaWiki:Conversiontable/*
00872             $this->loadDefaultTables();
00873             foreach ( $this->mVariants as $var ) {
00874                 $cached = $this->parseCachedTable( $var );
00875                 $this->mTables[$var]->mergeArray( $cached );
00876             }
00877 
00878             $this->postLoadTables();
00879             $this->mTables[self::CACHE_VERSION_KEY] = true;
00880 
00881             $wgLangConvMemc->set( $this->mCacheKey, $this->mTables, 43200 );
00882             wfProfileOut( __METHOD__ . '-recache' );
00883         }
00884         wfProfileOut( __METHOD__ );
00885     }
00886 
00890     function postLoadTables() {
00891     }
00892 
00898     function reloadTables() {
00899         if ( $this->mTables ) {
00900             unset( $this->mTables );
00901         }
00902 
00903         $this->mTablesLoaded = false;
00904         $this->loadTables( false );
00905     }
00906 
00926     function parseCachedTable( $code, $subpage = '', $recursive = true ) {
00927         static $parsed = array();
00928 
00929         $key = 'Conversiontable/' . $code;
00930         if ( $subpage ) {
00931             $key .= '/' . $subpage;
00932         }
00933         if ( array_key_exists( $key, $parsed ) ) {
00934             return array();
00935         }
00936 
00937         $parsed[$key] = true;
00938 
00939         if ( $subpage === '' ) {
00940             $txt = MessageCache::singleton()->getMsgFromNamespace( $key, $code );
00941         } else {
00942             $txt = false;
00943             $title = Title::makeTitleSafe( NS_MEDIAWIKI, $key );
00944             if ( $title && $title->exists() ) {
00945                 $revision = Revision::newFromTitle( $title );
00946                 if ( $revision ) {
00947                     if ( $revision->getContentModel() == CONTENT_MODEL_WIKITEXT ) {
00948                         $txt = $revision->getContent( Revision::RAW )->getNativeData();
00949                     }
00950 
00951                     // @todo in the future, use a specialized content model, perhaps based on json!
00952                 }
00953             }
00954         }
00955 
00956         # Nothing to parse if there's no text
00957         if ( $txt === false || $txt === null || $txt === '' ) {
00958             return array();
00959         }
00960 
00961         // get all subpage links of the form
00962         // [[MediaWiki:Conversiontable/zh-xx/...|...]]
00963         $linkhead = $this->mLangObj->getNsText( NS_MEDIAWIKI ) .
00964             ':Conversiontable';
00965         $subs = StringUtils::explode( '[[', $txt );
00966         $sublinks = array();
00967         foreach ( $subs as $sub ) {
00968             $link = explode( ']]', $sub, 2 );
00969             if ( count( $link ) != 2 ) {
00970                 continue;
00971             }
00972             $b = explode( '|', $link[0], 2 );
00973             $b = explode( '/', trim( $b[0] ), 3 );
00974             if ( count( $b ) == 3 ) {
00975                 $sublink = $b[2];
00976             } else {
00977                 $sublink = '';
00978             }
00979 
00980             if ( $b[0] == $linkhead && $b[1] == $code ) {
00981                 $sublinks[] = $sublink;
00982             }
00983         }
00984 
00985         // parse the mappings in this page
00986         $blocks = StringUtils::explode( '-{', $txt );
00987         $ret = array();
00988         $first = true;
00989         foreach ( $blocks as $block ) {
00990             if ( $first ) {
00991                 // Skip the part before the first -{
00992                 $first = false;
00993                 continue;
00994             }
00995             $mappings = explode( '}-', $block, 2 );
00996             $stripped = str_replace( array( "'", '"', '*', '#' ), '', $mappings[0] );
00997             $table = StringUtils::explode( ';', $stripped );
00998             foreach ( $table as $t ) {
00999                 $m = explode( '=>', $t, 3 );
01000                 if ( count( $m ) != 2 ) {
01001                     continue;
01002                 }
01003                 // trim any trailling comments starting with '//'
01004                 $tt = explode( '//', $m[1], 2 );
01005                 $ret[trim( $m[0] )] = trim( $tt[0] );
01006             }
01007         }
01008 
01009         // recursively parse the subpages
01010         if ( $recursive ) {
01011             foreach ( $sublinks as $link ) {
01012                 $s = $this->parseCachedTable( $code, $link, $recursive );
01013                 $ret = array_merge( $ret, $s );
01014             }
01015         }
01016 
01017         if ( $this->mUcfirst ) {
01018             foreach ( $ret as $k => $v ) {
01019                 $ret[$this->mLangObj->ucfirst( $k )] = $this->mLangObj->ucfirst( $v );
01020             }
01021         }
01022         return $ret;
01023     }
01024 
01033     public function markNoConversion( $text, $noParse = false ) {
01034         # don't mark if already marked
01035         if ( strpos( $text, '-{' ) || strpos( $text, '}-' ) ) {
01036             return $text;
01037         }
01038 
01039         $ret = "-{R|$text}-";
01040         return $ret;
01041     }
01042 
01051     function convertCategoryKey( $key ) {
01052         return $key;
01053     }
01054 
01071     function OnPageContentSaveComplete( $page, $user, $content, $summary, $isMinor,
01072             $isWatch, $section, $flags, $revision ) {
01073         $titleobj = $page->getTitle();
01074         if ( $titleobj->getNamespace() == NS_MEDIAWIKI ) {
01075             $title = $titleobj->getDBkey();
01076             $t = explode( '/', $title, 3 );
01077             $c = count( $t );
01078             if ( $c > 1 && $t[0] == 'Conversiontable' ) {
01079                 if ( $this->validateVariant( $t[1] ) ) {
01080                     $this->reloadTables();
01081                 }
01082             }
01083         }
01084         return true;
01085     }
01086 
01096     public function armourMath( $text ) {
01097         // convert '-{' and '}-' to '-&#123;' and '&#125;-' to prevent
01098         // any unwanted markup appearing in the math image tag.
01099         $text = strtr( $text, array( '-{' => '-&#123;', '}-' => '&#125;-' ) );
01100         return $text;
01101     }
01102 
01107     function getVarSeparatorPattern() {
01108         if ( is_null( $this->mVarSeparatorPattern ) ) {
01109             // varsep_pattern for preg_split:
01110             // text should be splited by ";" only if a valid variant
01111             // name exist after the markup, for example:
01112             //  -{zh-hans:<span style="font-size:120%;">xxx</span>;zh-hant:\
01113             //  <span style="font-size:120%;">yyy</span>;}-
01114             // we should split it as:
01115             //  array(
01116             //    [0] => 'zh-hans:<span style="font-size:120%;">xxx</span>'
01117             //    [1] => 'zh-hant:<span style="font-size:120%;">yyy</span>'
01118             //    [2] => ''
01119             //   )
01120             $pat = '/;\s*(?=';
01121             foreach ( $this->mVariants as $variant ) {
01122                 // zh-hans:xxx;zh-hant:yyy
01123                 $pat .= $variant . '\s*:|';
01124                 // xxx=>zh-hans:yyy; xxx=>zh-hant:zzz
01125                 $pat .= '[^;]*?=>\s*' . $variant . '\s*:|';
01126             }
01127             $pat .= '\s*$)/';
01128             $this->mVarSeparatorPattern = $pat;
01129         }
01130         return $this->mVarSeparatorPattern;
01131     }
01132 }