MediaWiki  REL1_22
LanguageConverter.php
Go to the documentation of this file.
00001 <?php
00031 class LanguageConverter {
00032 
00038     static public $languagesWithVariants = array(
00039         'gan',
00040         'iu',
00041         'kk',
00042         'ku',
00043         'shi',
00044         'sr',
00045         'tg',
00046         'uz',
00047         'zh',
00048     );
00049 
00050     public $mMainLanguageCode;
00051     public $mVariants, $mVariantFallbacks, $mVariantNames;
00052     public $mTablesLoaded = false;
00053     public $mTables;
00054     // 'bidirectional' 'unidirectional' 'disable' for each variant
00055     public $mManualLevel;
00056 
00060     public $mCacheKey;
00061 
00062     public $mLangObj;
00063     public $mFlags;
00064     public $mDescCodeSep = ':', $mDescVarSep = ';';
00065     public $mUcfirst = false;
00066     public $mConvRuleTitle = false;
00067     public $mURLVariant;
00068     public $mUserVariant;
00069     public $mHeaderVariant;
00070     public $mMaxDepth = 10;
00071     public $mVarSeparatorPattern;
00072 
00073     const CACHE_VERSION_KEY = 'VERSION 6';
00074 
00085     public function __construct( $langobj, $maincode, $variants = array(),
00086                                 $variantfallbacks = array(), $flags = array(),
00087                                 $manualLevel = array() ) {
00088         global $wgDisabledVariants;
00089         $this->mLangObj = $langobj;
00090         $this->mMainLanguageCode = $maincode;
00091         $this->mVariants = array_diff( $variants, $wgDisabledVariants );
00092         $this->mVariantFallbacks = $variantfallbacks;
00093         $this->mVariantNames = Language::fetchLanguageNames();
00094         $this->mCacheKey = wfMemcKey( 'conversiontables', $maincode );
00095         $defaultflags = array(
00096             // 'S' show converted text
00097             // '+' add rules for alltext
00098             // 'E' the gave flags is error
00099             // these flags above are reserved for program
00100             'A' => 'A',   // add rule for convert code (all text convert)
00101             'T' => 'T',   // title convert
00102             'R' => 'R',   // raw content
00103             'D' => 'D',   // convert description (subclass implement)
00104             '-' => '-',   // remove convert (not implement)
00105             'H' => 'H',   // add rule for convert code
00106                           // (but no display in placed code)
00107             'N' => 'N'    // current variant name
00108         );
00109         $this->mFlags = array_merge( $defaultflags, $flags );
00110         foreach ( $this->mVariants as $v ) {
00111             if ( array_key_exists( $v, $manualLevel ) ) {
00112                 $this->mManualLevel[$v] = $manualLevel[$v];
00113             } else {
00114                 $this->mManualLevel[$v] = 'bidirectional';
00115             }
00116             $this->mFlags[$v] = $v;
00117         }
00118     }
00119 
00126     public function getVariants() {
00127         return $this->mVariants;
00128     }
00129 
00141     public function getVariantFallbacks( $variant ) {
00142         if ( isset( $this->mVariantFallbacks[$variant] ) ) {
00143             return $this->mVariantFallbacks[$variant];
00144         }
00145         return $this->mMainLanguageCode;
00146     }
00147 
00152     public function getConvRuleTitle() {
00153         return $this->mConvRuleTitle;
00154     }
00155 
00160     public function getPreferredVariant() {
00161         global $wgDefaultLanguageVariant, $wgUser;
00162 
00163         $req = $this->getURLVariant();
00164 
00165         if ( $wgUser->isLoggedIn() && !$req ) {
00166             $req = $this->getUserVariant();
00167         } elseif ( !$req ) {
00168             $req = $this->getHeaderVariant();
00169         }
00170 
00171         if ( $wgDefaultLanguageVariant && !$req ) {
00172             $req = $this->validateVariant( $wgDefaultLanguageVariant );
00173         }
00174 
00175         // This function, unlike the other get*Variant functions, is
00176         // not memoized (i.e. there return value is not cached) since
00177         // new information might appear during processing after this
00178         // is first called.
00179         if ( $this->validateVariant( $req ) ) {
00180             return $req;
00181         }
00182         return $this->mMainLanguageCode;
00183     }
00184 
00190     public function getDefaultVariant() {
00191         global $wgDefaultLanguageVariant;
00192 
00193         $req = $this->getURLVariant();
00194 
00195         if ( !$req ) {
00196             $req = $this->getHeaderVariant();
00197         }
00198 
00199         if ( $wgDefaultLanguageVariant && !$req ) {
00200             $req = $this->validateVariant( $wgDefaultLanguageVariant );
00201         }
00202 
00203         if ( $req ) {
00204             return $req;
00205         }
00206         return $this->mMainLanguageCode;
00207     }
00208 
00214     public function validateVariant( $variant = null ) {
00215         if ( $variant !== null && in_array( $variant, $this->mVariants ) ) {
00216             return $variant;
00217         }
00218         return null;
00219     }
00220 
00226     public function getURLVariant() {
00227         global $wgRequest;
00228 
00229         if ( $this->mURLVariant ) {
00230             return $this->mURLVariant;
00231         }
00232 
00233         // see if the preference is set in the request
00234         $ret = $wgRequest->getText( 'variant' );
00235 
00236         if ( !$ret ) {
00237             $ret = $wgRequest->getVal( 'uselang' );
00238         }
00239 
00240         return $this->mURLVariant = $this->validateVariant( $ret );
00241     }
00242 
00248     protected function getUserVariant() {
00249         global $wgUser, $wgContLang;
00250 
00251         // memoizing this function wreaks havoc on parserTest.php
00252         /*
00253         if ( $this->mUserVariant ) {
00254             return $this->mUserVariant;
00255         }
00256         */
00257 
00258         // Get language variant preference from logged in users
00259         // Don't call this on stub objects because that causes infinite
00260         // recursion during initialisation
00261         if ( $wgUser->isLoggedIn() ) {
00262             if ( $this->mMainLanguageCode == $wgContLang->getCode() ) {
00263                 $ret = $wgUser->getOption( 'variant' );
00264             } else {
00265                 $ret = $wgUser->getOption( 'variant-' . $this->mMainLanguageCode );
00266             }
00267         } else {
00268             // figure out user lang without constructing wgLang to avoid
00269             // infinite recursion
00270             $ret = $wgUser->getOption( 'language' );
00271         }
00272 
00273         return $this->mUserVariant = $this->validateVariant( $ret );
00274     }
00275 
00281     protected function getHeaderVariant() {
00282         global $wgRequest;
00283 
00284         if ( $this->mHeaderVariant ) {
00285             return $this->mHeaderVariant;
00286         }
00287 
00288         // see if some supported language variant is set in the
00289         // HTTP header.
00290         $languages = array_keys( $wgRequest->getAcceptLang() );
00291         if ( empty( $languages ) ) {
00292             return null;
00293         }
00294 
00295         $fallbackLanguages = array();
00296         foreach ( $languages as $language ) {
00297             $this->mHeaderVariant = $this->validateVariant( $language );
00298             if ( $this->mHeaderVariant ) {
00299                 break;
00300             }
00301 
00302             // To see if there are fallbacks of current language.
00303             // We record these fallback variants, and process
00304             // them later.
00305             $fallbacks = $this->getVariantFallbacks( $language );
00306             if ( is_string( $fallbacks ) && $fallbacks !== $this->mMainLanguageCode ) {
00307                 $fallbackLanguages[] = $fallbacks;
00308             } elseif ( is_array( $fallbacks ) ) {
00309                 $fallbackLanguages =
00310                     array_merge( $fallbackLanguages, $fallbacks );
00311             }
00312         }
00313 
00314         if ( !$this->mHeaderVariant ) {
00315             // process fallback languages now
00316             $fallback_languages = array_unique( $fallbackLanguages );
00317             foreach ( $fallback_languages as $language ) {
00318                 $this->mHeaderVariant = $this->validateVariant( $language );
00319                 if ( $this->mHeaderVariant ) {
00320                     break;
00321                 }
00322             }
00323         }
00324 
00325         return $this->mHeaderVariant;
00326     }
00327 
00338     public function autoConvert( $text, $toVariant = false ) {
00339         wfProfileIn( __METHOD__ );
00340 
00341         $this->loadTables();
00342 
00343         if ( !$toVariant ) {
00344             $toVariant = $this->getPreferredVariant();
00345             if ( !$toVariant ) {
00346                 wfProfileOut( __METHOD__ );
00347                 return $text;
00348             }
00349         }
00350 
00351         if ( $this->guessVariant( $text, $toVariant ) ) {
00352             wfProfileOut( __METHOD__ );
00353             return $text;
00354         }
00355 
00356         /* we convert everything except:
00357            1. HTML markups (anything between < and >)
00358            2. HTML entities
00359            3. placeholders created by the parser
00360         */
00361         global $wgParser;
00362         if ( isset( $wgParser ) && $wgParser->UniqPrefix() != '' ) {
00363             $marker = '|' . $wgParser->UniqPrefix() . '[\-a-zA-Z0-9]+';
00364         } else {
00365             $marker = '';
00366         }
00367 
00368         // this one is needed when the text is inside an HTML markup
00369         $htmlfix = '|<[^>]+$|^[^<>]*>';
00370 
00371         // disable convert to variants between <code> tags
00372         $codefix = '<code>.+?<\/code>|';
00373         // disable conversion of <script> tags
00374         $scriptfix = '<script.*?>.*?<\/script>|';
00375         // disable conversion of <pre> tags
00376         $prefix = '<pre.*?>.*?<\/pre>|';
00377 
00378         $reg = '/' . $codefix . $scriptfix . $prefix .
00379             '<[^>]+>|&[a-zA-Z#][a-z0-9]+;' . $marker . $htmlfix . '/s';
00380         $startPos = 0;
00381         $sourceBlob = '';
00382         $literalBlob = '';
00383 
00384         // Guard against delimiter nulls in the input
00385         $text = str_replace( "\000", '', $text );
00386 
00387         $markupMatches = null;
00388         $elementMatches = null;
00389         while ( $startPos < strlen( $text ) ) {
00390             if ( preg_match( $reg, $text, $markupMatches, PREG_OFFSET_CAPTURE, $startPos ) ) {
00391                 $elementPos = $markupMatches[0][1];
00392                 $element = $markupMatches[0][0];
00393             } else {
00394                 $elementPos = strlen( $text );
00395                 $element = '';
00396             }
00397 
00398             // Queue the part before the markup for translation in a batch
00399             $sourceBlob .= substr( $text, $startPos, $elementPos - $startPos ) . "\000";
00400 
00401             // Advance to the next position
00402             $startPos = $elementPos + strlen( $element );
00403 
00404             // Translate any alt or title attributes inside the matched element
00405             if ( $element !== '' && preg_match( '/^(<[^>\s]*)\s([^>]*)(.*)$/', $element,
00406                 $elementMatches ) )
00407             {
00408                 $attrs = Sanitizer::decodeTagAttributes( $elementMatches[2] );
00409                 $changed = false;
00410                 foreach ( array( 'title', 'alt' ) as $attrName ) {
00411                     if ( !isset( $attrs[$attrName] ) ) {
00412                         continue;
00413                     }
00414                     $attr = $attrs[$attrName];
00415                     // Don't convert URLs
00416                     if ( !strpos( $attr, '://' ) ) {
00417                         $attr = $this->recursiveConvertTopLevel( $attr, $toVariant );
00418                     }
00419 
00420                     // Remove HTML tags to avoid disrupting the layout
00421                     $attr = preg_replace( '/<[^>]+>/', '', $attr );
00422                     if ( $attr !== $attrs[$attrName] ) {
00423                         $attrs[$attrName] = $attr;
00424                         $changed = true;
00425                     }
00426                 }
00427                 if ( $changed ) {
00428                     $element = $elementMatches[1] . Html::expandAttributes( $attrs ) .
00429                         $elementMatches[3];
00430                 }
00431             }
00432             $literalBlob .= $element . "\000";
00433         }
00434 
00435         // Do the main translation batch
00436         $translatedBlob = $this->translate( $sourceBlob, $toVariant );
00437 
00438         // Put the output back together
00439         $translatedIter = StringUtils::explode( "\000", $translatedBlob );
00440         $literalIter = StringUtils::explode( "\000", $literalBlob );
00441         $output = '';
00442         while ( $translatedIter->valid() && $literalIter->valid() ) {
00443             $output .= $translatedIter->current();
00444             $output .= $literalIter->current();
00445             $translatedIter->next();
00446             $literalIter->next();
00447         }
00448 
00449         wfProfileOut( __METHOD__ );
00450         return $output;
00451     }
00452 
00462     public function translate( $text, $variant ) {
00463         wfProfileIn( __METHOD__ );
00464         // If $text is empty or only includes spaces, do nothing
00465         // Otherwise translate it
00466         if ( trim( $text ) ) {
00467             $this->loadTables();
00468             $text = $this->mTables[$variant]->replace( $text );
00469         }
00470         wfProfileOut( __METHOD__ );
00471         return $text;
00472     }
00473 
00480     public function autoConvertToAllVariants( $text ) {
00481         wfProfileIn( __METHOD__ );
00482         $this->loadTables();
00483 
00484         $ret = array();
00485         foreach ( $this->mVariants as $variant ) {
00486             $ret[$variant] = $this->translate( $text, $variant );
00487         }
00488 
00489         wfProfileOut( __METHOD__ );
00490         return $ret;
00491     }
00492 
00504     public function convertLinkToAllVariants( $text ) {
00505         return $this->autoConvertToAllVariants( $text );
00506     }
00507 
00513     protected function applyManualConv( $convRule ) {
00514         // Use syntax -{T|zh-cn:TitleCN; zh-tw:TitleTw}- to custom
00515         // title conversion.
00516         // Bug 24072: $mConvRuleTitle was overwritten by other manual
00517         // rule(s) not for title, this breaks the title conversion.
00518         $newConvRuleTitle = $convRule->getTitle();
00519         if ( $newConvRuleTitle ) {
00520             // So I add an empty check for getTitle()
00521             $this->mConvRuleTitle = $newConvRuleTitle;
00522         }
00523 
00524         // merge/remove manual conversion rules to/from global table
00525         $convTable = $convRule->getConvTable();
00526         $action = $convRule->getRulesAction();
00527         foreach ( $convTable as $variant => $pair ) {
00528             if ( !$this->validateVariant( $variant ) ) {
00529                 continue;
00530             }
00531 
00532             if ( $action == 'add' ) {
00533                 foreach ( $pair as $from => $to ) {
00534                     // to ensure that $from and $to not be left blank
00535                     // so $this->translate() could always return a string
00536                     if ( $from || $to ) {
00537                         // more efficient than array_merge(), about 2.5 times.
00538                         $this->mTables[$variant]->setPair( $from, $to );
00539                     }
00540                 }
00541             } elseif ( $action == 'remove' ) {
00542                 $this->mTables[$variant]->removeArray( $pair );
00543             }
00544         }
00545     }
00546 
00554     public function convertTitle( $title ) {
00555         $variant = $this->getPreferredVariant();
00556         $index = $title->getNamespace();
00557         if ( $index !== NS_MAIN ) {
00558             $text = $this->convertNamespace( $index, $variant ) . ':';
00559         } else {
00560             $text = '';
00561         }
00562         $text .= $this->translate( $title->getText(), $variant );
00563         return $text;
00564     }
00565 
00573     public function convertNamespace( $index, $variant = null ) {
00574         if ( $variant === null ) {
00575             $variant = $this->getPreferredVariant();
00576         }
00577         if ( $index === NS_MAIN ) {
00578             return '';
00579         } else {
00580             // First check if a message gives a converted name in the target variant.
00581             $nsConvMsg = wfMessage( 'conversion-ns' . $index )->inLanguage( $variant );
00582             if ( $nsConvMsg->exists() ) {
00583                 return $nsConvMsg->plain();
00584             }
00585             // Then check if a message gives a converted name in content language
00586             // which needs extra translation to the target variant.
00587             $nsConvMsg = wfMessage( 'conversion-ns' . $index )->inContentLanguage();
00588             if ( $nsConvMsg->exists() ) {
00589                 return $this->translate( $nsConvMsg->plain(), $variant );
00590             }
00591             // No message exists, retrieve it from the target variant's namespace names.
00592             $langObj = $this->mLangObj->factory( $variant );
00593             return $langObj->getFormattedNsText( $index );
00594         }
00595     }
00596 
00611     public function convert( $text ) {
00612         $variant = $this->getPreferredVariant();
00613         return $this->convertTo( $text, $variant );
00614     }
00615 
00623     public function convertTo( $text, $variant ) {
00624         global $wgDisableLangConversion;
00625         if ( $wgDisableLangConversion ) {
00626             return $text;
00627         }
00628         // Reset converter state for a new converter run.
00629         $this->mConvRuleTitle = false;
00630         return $this->recursiveConvertTopLevel( $text, $variant );
00631     }
00632 
00642     protected function recursiveConvertTopLevel( $text, $variant, $depth = 0 ) {
00643         $startPos = 0;
00644         $out = '';
00645         $length = strlen( $text );
00646         $shouldConvert = !$this->guessVariant( $text, $variant );
00647 
00648         while ( $startPos < $length ) {
00649             $pos = strpos( $text, '-{', $startPos );
00650 
00651             if ( $pos === false ) {
00652                 // No more markup, append final segment
00653                 $fragment = substr( $text, $startPos );
00654                 $out .= $shouldConvert ? $this->autoConvert( $fragment, $variant ) : $fragment;
00655                 return $out;
00656             }
00657 
00658             // Markup found
00659             // Append initial segment
00660             $fragment = substr( $text, $startPos, $pos - $startPos );
00661             $out .= $shouldConvert ? $this->autoConvert( $fragment, $variant ) : $fragment;
00662 
00663             // Advance position
00664             $startPos = $pos;
00665 
00666             // Do recursive conversion
00667             $out .= $this->recursiveConvertRule( $text, $variant, $startPos, $depth + 1 );
00668         }
00669 
00670         return $out;
00671     }
00672 
00684     protected function recursiveConvertRule( $text, $variant, &$startPos, $depth = 0 ) {
00685         // Quick sanity check (no function calls)
00686         if ( $text[$startPos] !== '-' || $text[$startPos + 1] !== '{' ) {
00687             throw new MWException( __METHOD__ . ': invalid input string' );
00688         }
00689 
00690         $startPos += 2;
00691         $inner = '';
00692         $warningDone = false;
00693         $length = strlen( $text );
00694 
00695         while ( $startPos < $length ) {
00696             $m = false;
00697             preg_match( '/-\{|\}-/', $text, $m, PREG_OFFSET_CAPTURE, $startPos );
00698             if ( !$m ) {
00699                 // Unclosed rule
00700                 break;
00701             }
00702 
00703             $token = $m[0][0];
00704             $pos = $m[0][1];
00705 
00706             // Markup found
00707             // Append initial segment
00708             $inner .= substr( $text, $startPos, $pos - $startPos );
00709 
00710             // Advance position
00711             $startPos = $pos;
00712 
00713             switch ( $token ) {
00714                 case '-{':
00715                     // Check max depth
00716                     if ( $depth >= $this->mMaxDepth ) {
00717                         $inner .= '-{';
00718                         if ( !$warningDone ) {
00719                             $inner .= '<span class="error">' .
00720                                 wfMessage( 'language-converter-depth-warning' )
00721                                     ->numParams( $this->mMaxDepth )->inContentLanguage()->text() .
00722                                 '</span>';
00723                             $warningDone = true;
00724                         }
00725                         $startPos += 2;
00726                         continue;
00727                     }
00728                     // Recursively parse another rule
00729                     $inner .= $this->recursiveConvertRule( $text, $variant, $startPos, $depth + 1 );
00730                     break;
00731                 case '}-':
00732                     // Apply the rule
00733                     $startPos += 2;
00734                     $rule = new ConverterRule( $inner, $this );
00735                     $rule->parse( $variant );
00736                     $this->applyManualConv( $rule );
00737                     return $rule->getDisplay();
00738                 default:
00739                     throw new MWException( __METHOD__ . ': invalid regex match' );
00740             }
00741         }
00742 
00743         // Unclosed rule
00744         if ( $startPos < $length ) {
00745             $inner .= substr( $text, $startPos );
00746         }
00747         $startPos = $length;
00748         return '-{' . $this->autoConvert( $inner, $variant );
00749     }
00750 
00762     public function findVariantLink( &$link, &$nt, $ignoreOtherCond = false ) {
00763         # If the article has already existed, there is no need to
00764         # check it again, otherwise it may cause a fault.
00765         if ( is_object( $nt ) && $nt->exists() ) {
00766             return;
00767         }
00768 
00769         global $wgDisableLangConversion, $wgDisableTitleConversion, $wgRequest,
00770             $wgUser;
00771         $isredir = $wgRequest->getText( 'redirect', 'yes' );
00772         $action = $wgRequest->getText( 'action' );
00773         $linkconvert = $wgRequest->getText( 'linkconvert', 'yes' );
00774         $disableLinkConversion = $wgDisableLangConversion
00775             || $wgDisableTitleConversion;
00776         $linkBatch = new LinkBatch();
00777 
00778         $ns = NS_MAIN;
00779 
00780         if ( $disableLinkConversion ||
00781              ( !$ignoreOtherCond &&
00782                ( $isredir == 'no'
00783                  || $action == 'edit'
00784                  || $action == 'submit'
00785                  || $linkconvert == 'no'
00786                  || $wgUser->getOption( 'noconvertlink' ) == 1 ) ) ) {
00787             return;
00788         }
00789 
00790         if ( is_object( $nt ) ) {
00791             $ns = $nt->getNamespace();
00792         }
00793 
00794         $variants = $this->autoConvertToAllVariants( $link );
00795         if ( !$variants ) { // give up
00796             return;
00797         }
00798 
00799         $titles = array();
00800 
00801         foreach ( $variants as $v ) {
00802             if ( $v != $link ) {
00803                 $varnt = Title::newFromText( $v, $ns );
00804                 if ( !is_null( $varnt ) ) {
00805                     $linkBatch->addObj( $varnt );
00806                     $titles[] = $varnt;
00807                 }
00808             }
00809         }
00810 
00811         // fetch all variants in single query
00812         $linkBatch->execute();
00813 
00814         foreach ( $titles as $varnt ) {
00815             if ( $varnt->getArticleID() > 0 ) {
00816                 $nt = $varnt;
00817                 $link = $varnt->getText();
00818                 break;
00819             }
00820         }
00821     }
00822 
00828     public function getExtraHashOptions() {
00829         $variant = $this->getPreferredVariant();
00830         return '!' . $variant;
00831     }
00832 
00843     public function guessVariant( $text, $variant ) {
00844         return false;
00845     }
00846 
00854     function loadDefaultTables() {
00855         $name = get_class( $this );
00856         throw new MWException( "Must implement loadDefaultTables() method in class $name" );
00857     }
00858 
00864     function loadTables( $fromCache = true ) {
00865         global $wgLangConvMemc;
00866 
00867         if ( $this->mTablesLoaded ) {
00868             return;
00869         }
00870 
00871         wfProfileIn( __METHOD__ );
00872         $this->mTablesLoaded = true;
00873         $this->mTables = false;
00874         if ( $fromCache ) {
00875             wfProfileIn( __METHOD__ . '-cache' );
00876             $this->mTables = $wgLangConvMemc->get( $this->mCacheKey );
00877             wfProfileOut( __METHOD__ . '-cache' );
00878         }
00879         if ( !$this->mTables
00880              || !array_key_exists( self::CACHE_VERSION_KEY, $this->mTables ) ) {
00881             wfProfileIn( __METHOD__ . '-recache' );
00882             // not in cache, or we need a fresh reload.
00883             // We will first load the default tables
00884             // then update them using things in MediaWiki:Conversiontable/*
00885             $this->loadDefaultTables();
00886             foreach ( $this->mVariants as $var ) {
00887                 $cached = $this->parseCachedTable( $var );
00888                 $this->mTables[$var]->mergeArray( $cached );
00889             }
00890 
00891             $this->postLoadTables();
00892             $this->mTables[self::CACHE_VERSION_KEY] = true;
00893 
00894             $wgLangConvMemc->set( $this->mCacheKey, $this->mTables, 43200 );
00895             wfProfileOut( __METHOD__ . '-recache' );
00896         }
00897         wfProfileOut( __METHOD__ );
00898     }
00899 
00903     function postLoadTables() { }
00904 
00910     function reloadTables() {
00911         if ( $this->mTables ) {
00912             unset( $this->mTables );
00913         }
00914         $this->mTablesLoaded = false;
00915         $this->loadTables( false );
00916     }
00917 
00937     function parseCachedTable( $code, $subpage = '', $recursive = true ) {
00938         static $parsed = array();
00939 
00940         $key = 'Conversiontable/' . $code;
00941         if ( $subpage ) {
00942             $key .= '/' . $subpage;
00943         }
00944         if ( array_key_exists( $key, $parsed ) ) {
00945             return array();
00946         }
00947 
00948         $parsed[$key] = true;
00949 
00950         if ( $subpage === '' ) {
00951             $txt = MessageCache::singleton()->getMsgFromNamespace( $key, $code );
00952         } else {
00953             $txt = false;
00954             $title = Title::makeTitleSafe( NS_MEDIAWIKI, $key );
00955             if ( $title && $title->exists() ) {
00956                 $revision = Revision::newFromTitle( $title );
00957                 if ( $revision ) {
00958                     if ( $revision->getContentModel() == CONTENT_MODEL_WIKITEXT ) {
00959                         $txt = $revision->getContent( Revision::RAW )->getNativeData();
00960                     }
00961 
00962                     // @todo in the future, use a specialized content model, perhaps based on json!
00963                 }
00964             }
00965         }
00966 
00967         # Nothing to parse if there's no text
00968         if ( $txt === false || $txt === null || $txt === '' ) {
00969             return array();
00970         }
00971 
00972         // get all subpage links of the form
00973         // [[MediaWiki:Conversiontable/zh-xx/...|...]]
00974         $linkhead = $this->mLangObj->getNsText( NS_MEDIAWIKI ) .
00975             ':Conversiontable';
00976         $subs = StringUtils::explode( '[[', $txt );
00977         $sublinks = array();
00978         foreach ( $subs as $sub ) {
00979             $link = explode( ']]', $sub, 2 );
00980             if ( count( $link ) != 2 ) {
00981                 continue;
00982             }
00983             $b = explode( '|', $link[0], 2 );
00984             $b = explode( '/', trim( $b[0] ), 3 );
00985             if ( count( $b ) == 3 ) {
00986                 $sublink = $b[2];
00987             } else {
00988                 $sublink = '';
00989             }
00990 
00991             if ( $b[0] == $linkhead && $b[1] == $code ) {
00992                 $sublinks[] = $sublink;
00993             }
00994         }
00995 
00996         // parse the mappings in this page
00997         $blocks = StringUtils::explode( '-{', $txt );
00998         $ret = array();
00999         $first = true;
01000         foreach ( $blocks as $block ) {
01001             if ( $first ) {
01002                 // Skip the part before the first -{
01003                 $first = false;
01004                 continue;
01005             }
01006             $mappings = explode( '}-', $block, 2 );
01007             $stripped = str_replace( array( "'", '"', '*', '#' ), '',
01008                                      $mappings[0] );
01009             $table = StringUtils::explode( ';', $stripped );
01010             foreach ( $table as $t ) {
01011                 $m = explode( '=>', $t, 3 );
01012                 if ( count( $m ) != 2 ) {
01013                     continue;
01014                 }
01015                 // trim any trailling comments starting with '//'
01016                 $tt = explode( '//', $m[1], 2 );
01017                 $ret[trim( $m[0] )] = trim( $tt[0] );
01018             }
01019         }
01020 
01021         // recursively parse the subpages
01022         if ( $recursive ) {
01023             foreach ( $sublinks as $link ) {
01024                 $s = $this->parseCachedTable( $code, $link, $recursive );
01025                 $ret = array_merge( $ret, $s );
01026             }
01027         }
01028 
01029         if ( $this->mUcfirst ) {
01030             foreach ( $ret as $k => $v ) {
01031                 $ret[$this->mLangObj->ucfirst( $k )] = $this->mLangObj->ucfirst( $v );
01032             }
01033         }
01034         return $ret;
01035     }
01036 
01045     public function markNoConversion( $text, $noParse = false ) {
01046         # don't mark if already marked
01047         if ( strpos( $text, '-{' ) || strpos( $text, '}-' ) ) {
01048             return $text;
01049         }
01050 
01051         $ret = "-{R|$text}-";
01052         return $ret;
01053     }
01054 
01063     function convertCategoryKey( $key ) {
01064         return $key;
01065     }
01066 
01083     function OnPageContentSaveComplete( $page, $user, $content, $summary, $isMinor,
01084             $isWatch, $section, $flags, $revision ) {
01085         $titleobj = $page->getTitle();
01086         if ( $titleobj->getNamespace() == NS_MEDIAWIKI ) {
01087             $title = $titleobj->getDBkey();
01088             $t = explode( '/', $title, 3 );
01089             $c = count( $t );
01090             if ( $c > 1 && $t[0] == 'Conversiontable' ) {
01091                 if ( $this->validateVariant( $t[1] ) ) {
01092                     $this->reloadTables();
01093                 }
01094             }
01095         }
01096         return true;
01097     }
01098 
01108     public function armourMath( $text ) {
01109         // convert '-{' and '}-' to '-&#123;' and '&#125;-' to prevent
01110         // any unwanted markup appearing in the math image tag.
01111         $text = strtr( $text, array( '-{' => '-&#123;', '}-' => '&#125;-' ) );
01112         return $text;
01113     }
01114 
01118     function getVarSeparatorPattern() {
01119         if ( is_null( $this->mVarSeparatorPattern ) ) {
01120             // varsep_pattern for preg_split:
01121             // text should be splited by ";" only if a valid variant
01122             // name exist after the markup, for example:
01123             //  -{zh-hans:<span style="font-size:120%;">xxx</span>;zh-hant:\
01124             //  <span style="font-size:120%;">yyy</span>;}-
01125             // we should split it as:
01126             //  array(
01127             //    [0] => 'zh-hans:<span style="font-size:120%;">xxx</span>'
01128             //    [1] => 'zh-hant:<span style="font-size:120%;">yyy</span>'
01129             //    [2] => ''
01130             //   )
01131             $pat = '/;\s*(?=';
01132             foreach ( $this->mVariants as $variant ) {
01133                 // zh-hans:xxx;zh-hant:yyy
01134                 $pat .= $variant . '\s*:|';
01135                 // xxx=>zh-hans:yyy; xxx=>zh-hant:zzz
01136                 $pat .= '[^;]*?=>\s*' . $variant . '\s*:|';
01137             }
01138             $pat .= '\s*$)/';
01139             $this->mVarSeparatorPattern = $pat;
01140         }
01141         return $this->mVarSeparatorPattern;
01142     }
01143 }
01144 
01150 class ConverterRule {
01151     public $mText; // original text in -{text}-
01152     public $mConverter; // LanguageConverter object
01153     public $mRuleDisplay = '';
01154     public $mRuleTitle = false;
01155     public $mRules = '';// string : the text of the rules
01156     public $mRulesAction = 'none';
01157     public $mFlags = array();
01158     public $mVariantFlags = array();
01159     public $mConvTable = array();
01160     public $mBidtable = array();// array of the translation in each variant
01161     public $mUnidtable = array();// array of the translation in each variant
01162 
01169     public function __construct( $text, $converter ) {
01170         $this->mText = $text;
01171         $this->mConverter = $converter;
01172     }
01173 
01180     public function getTextInBidtable( $variants ) {
01181         $variants = (array)$variants;
01182         if ( !$variants ) {
01183             return false;
01184         }
01185         foreach ( $variants as $variant ) {
01186             if ( isset( $this->mBidtable[$variant] ) ) {
01187                 return $this->mBidtable[$variant];
01188             }
01189         }
01190         return false;
01191     }
01192 
01197     function parseFlags() {
01198         $text = $this->mText;
01199         $flags = array();
01200         $variantFlags = array();
01201 
01202         $sepPos = strpos( $text, '|' );
01203         if ( $sepPos !== false ) {
01204             $validFlags = $this->mConverter->mFlags;
01205             $f = StringUtils::explode( ';', substr( $text, 0, $sepPos ) );
01206             foreach ( $f as $ff ) {
01207                 $ff = trim( $ff );
01208                 if ( isset( $validFlags[$ff] ) ) {
01209                     $flags[$validFlags[$ff]] = true;
01210                 }
01211             }
01212             $text = strval( substr( $text, $sepPos + 1 ) );
01213         }
01214 
01215         if ( !$flags ) {
01216             $flags['S'] = true;
01217         } elseif ( isset( $flags['R'] ) ) {
01218             $flags = array( 'R' => true );// remove other flags
01219         } elseif ( isset( $flags['N'] ) ) {
01220             $flags = array( 'N' => true );// remove other flags
01221         } elseif ( isset( $flags['-'] ) ) {
01222             $flags = array( '-' => true );// remove other flags
01223         } elseif ( count( $flags ) == 1 && isset( $flags['T'] ) ) {
01224             $flags['H'] = true;
01225         } elseif ( isset( $flags['H'] ) ) {
01226             // replace A flag, and remove other flags except T
01227             $temp = array( '+' => true, 'H' => true );
01228             if ( isset( $flags['T'] ) ) {
01229                 $temp['T'] = true;
01230             }
01231             if ( isset( $flags['D'] ) ) {
01232                 $temp['D'] = true;
01233             }
01234             $flags = $temp;
01235         } else {
01236             if ( isset( $flags['A'] ) ) {
01237                 $flags['+'] = true;
01238                 $flags['S'] = true;
01239             }
01240             if ( isset( $flags['D'] ) ) {
01241                 unset( $flags['S'] );
01242             }
01243             // try to find flags like "zh-hans", "zh-hant"
01244             // allow syntaxes like "-{zh-hans;zh-hant|XXXX}-"
01245             $variantFlags = array_intersect( array_keys( $flags ), $this->mConverter->mVariants );
01246             if ( $variantFlags ) {
01247                 $variantFlags = array_flip( $variantFlags );
01248                 $flags = array();
01249             }
01250         }
01251         $this->mVariantFlags = $variantFlags;
01252         $this->mRules = $text;
01253         $this->mFlags = $flags;
01254     }
01255 
01260     function parseRules() {
01261         $rules = $this->mRules;
01262         $bidtable = array();
01263         $unidtable = array();
01264         $variants = $this->mConverter->mVariants;
01265         $varsep_pattern = $this->mConverter->getVarSeparatorPattern();
01266 
01267         // Split according to $varsep_pattern, but ignore semicolons from HTML entities
01268         $rules = preg_replace( '/(&[#a-zA-Z0-9]+);/', "$1\x01", $rules );
01269         $choice = preg_split( $varsep_pattern, $rules );
01270         $choice = str_replace( "\x01", ';', $choice );
01271 
01272         foreach ( $choice as $c ) {
01273             $v = explode( ':', $c, 2 );
01274             if ( count( $v ) != 2 ) {
01275                 // syntax error, skip
01276                 continue;
01277             }
01278             $to = trim( $v[1] );
01279             $v = trim( $v[0] );
01280             $u = explode( '=>', $v, 2 );
01281             // if $to is empty, strtr() could return a wrong result
01282             if ( count( $u ) == 1 && $to && in_array( $v, $variants ) ) {
01283                 $bidtable[$v] = $to;
01284             } elseif ( count( $u ) == 2 ) {
01285                 $from = trim( $u[0] );
01286                 $v = trim( $u[1] );
01287                 if ( array_key_exists( $v, $unidtable )
01288                      && !is_array( $unidtable[$v] )
01289                      && $to
01290                      && in_array( $v, $variants ) ) {
01291                     $unidtable[$v] = array( $from => $to );
01292                 } elseif ( $to && in_array( $v, $variants ) ) {
01293                     $unidtable[$v][$from] = $to;
01294                 }
01295             }
01296             // syntax error, pass
01297             if ( !isset( $this->mConverter->mVariantNames[$v] ) ) {
01298                 $bidtable = array();
01299                 $unidtable = array();
01300                 break;
01301             }
01302         }
01303         $this->mBidtable = $bidtable;
01304         $this->mUnidtable = $unidtable;
01305     }
01306 
01312     function getRulesDesc() {
01313         $codesep = $this->mConverter->mDescCodeSep;
01314         $varsep = $this->mConverter->mDescVarSep;
01315         $text = '';
01316         foreach ( $this->mBidtable as $k => $v ) {
01317             $text .= $this->mConverter->mVariantNames[$k] . "$codesep$v$varsep";
01318         }
01319         foreach ( $this->mUnidtable as $k => $a ) {
01320             foreach ( $a as $from => $to ) {
01321                 $text .= $from . '⇒' . $this->mConverter->mVariantNames[$k] .
01322                     "$codesep$to$varsep";
01323             }
01324         }
01325         return $text;
01326     }
01327 
01336     function getRuleConvertedStr( $variant ) {
01337         $bidtable = $this->mBidtable;
01338         $unidtable = $this->mUnidtable;
01339 
01340         if ( count( $bidtable ) + count( $unidtable ) == 0 ) {
01341             return $this->mRules;
01342         } else {
01343             // display current variant in bidirectional array
01344             $disp = $this->getTextInBidtable( $variant );
01345             // or display current variant in fallbacks
01346             if ( !$disp ) {
01347                 $disp = $this->getTextInBidtable(
01348                         $this->mConverter->getVariantFallbacks( $variant ) );
01349             }
01350             // or display current variant in unidirectional array
01351             if ( !$disp && array_key_exists( $variant, $unidtable ) ) {
01352                 $disp = array_values( $unidtable[$variant] );
01353                 $disp = $disp[0];
01354             }
01355             // or display frist text under disable manual convert
01356             if ( !$disp
01357                  && $this->mConverter->mManualLevel[$variant] == 'disable' ) {
01358                 if ( count( $bidtable ) > 0 ) {
01359                     $disp = array_values( $bidtable );
01360                     $disp = $disp[0];
01361                 } else {
01362                     $disp = array_values( $unidtable );
01363                     $disp = array_values( $disp[0] );
01364                     $disp = $disp[0];
01365                 }
01366             }
01367             return $disp;
01368         }
01369     }
01370 
01381     function getRuleConvertedTitle( $variant ) {
01382         if ( $variant === $this->mConverter->mMainLanguageCode ) {
01383             // If a string targeting exactly this variant is set,
01384             // use it. Otherwise, just return false, so the real
01385             // page name can be shown (and because variant === main,
01386             // there'll be no further automatic conversion).
01387             $disp = $this->getTextInBidtable( $variant );
01388             if ( $disp ) {
01389                 return $disp;
01390             }
01391             if ( array_key_exists( $variant, $this->mUnidtable ) ) {
01392                 $disp = array_values( $this->mUnidtable[$variant] );
01393                 $disp = $disp[0];
01394             }
01395             // Assigned above or still false.
01396             return $disp;
01397         } else {
01398             return $this->getRuleConvertedStr( $variant );
01399         }
01400     }
01401 
01406     function generateConvTable() {
01407         // Special case optimisation
01408         if ( !$this->mBidtable && !$this->mUnidtable ) {
01409             $this->mConvTable = array();
01410             return;
01411         }
01412 
01413         $bidtable = $this->mBidtable;
01414         $unidtable = $this->mUnidtable;
01415         $manLevel = $this->mConverter->mManualLevel;
01416 
01417         $vmarked = array();
01418         foreach ( $this->mConverter->mVariants as $v ) {
01419             /* for bidirectional array
01420                 fill in the missing variants, if any,
01421                 with fallbacks */
01422             if ( !isset( $bidtable[$v] ) ) {
01423                 $variantFallbacks =
01424                     $this->mConverter->getVariantFallbacks( $v );
01425                 $vf = $this->getTextInBidtable( $variantFallbacks );
01426                 if ( $vf ) {
01427                     $bidtable[$v] = $vf;
01428                 }
01429             }
01430 
01431             if ( isset( $bidtable[$v] ) ) {
01432                 foreach ( $vmarked as $vo ) {
01433                     // use syntax: -{A|zh:WordZh;zh-tw:WordTw}-
01434                     // or -{H|zh:WordZh;zh-tw:WordTw}-
01435                     // or -{-|zh:WordZh;zh-tw:WordTw}-
01436                     // to introduce a custom mapping between
01437                     // words WordZh and WordTw in the whole text
01438                     if ( $manLevel[$v] == 'bidirectional' ) {
01439                         $this->mConvTable[$v][$bidtable[$vo]] = $bidtable[$v];
01440                     }
01441                     if ( $manLevel[$vo] == 'bidirectional' ) {
01442                         $this->mConvTable[$vo][$bidtable[$v]] = $bidtable[$vo];
01443                     }
01444                 }
01445                 $vmarked[] = $v;
01446             }
01447             /* for unidirectional array fill to convert tables */
01448             if ( ( $manLevel[$v] == 'bidirectional' || $manLevel[$v] == 'unidirectional' )
01449                 && isset( $unidtable[$v] ) )
01450             {
01451                 if ( isset( $this->mConvTable[$v] ) ) {
01452                     $this->mConvTable[$v] = array_merge( $this->mConvTable[$v], $unidtable[$v] );
01453                 } else {
01454                     $this->mConvTable[$v] = $unidtable[$v];
01455                 }
01456             }
01457         }
01458     }
01459 
01464     public function parse( $variant = null ) {
01465         if ( !$variant ) {
01466             $variant = $this->mConverter->getPreferredVariant();
01467         }
01468 
01469         $this->parseFlags();
01470         $flags = $this->mFlags;
01471 
01472         // convert to specified variant
01473         // syntax: -{zh-hans;zh-hant[;...]|<text to convert>}-
01474         if ( $this->mVariantFlags ) {
01475             // check if current variant in flags
01476             if ( isset( $this->mVariantFlags[$variant] ) ) {
01477                 // then convert <text to convert> to current language
01478                 $this->mRules = $this->mConverter->autoConvert( $this->mRules,
01479                     $variant );
01480             } else { // if current variant no in flags,
01481                    // then we check its fallback variants.
01482                 $variantFallbacks =
01483                     $this->mConverter->getVariantFallbacks( $variant );
01484                 if ( is_array( $variantFallbacks ) ) {
01485                     foreach ( $variantFallbacks as $variantFallback ) {
01486                         // if current variant's fallback exist in flags
01487                         if ( isset( $this->mVariantFlags[$variantFallback] ) ) {
01488                             // then convert <text to convert> to fallback language
01489                             $this->mRules =
01490                                 $this->mConverter->autoConvert( $this->mRules,
01491                                     $variantFallback );
01492                             break;
01493                         }
01494                     }
01495                 }
01496             }
01497             $this->mFlags = $flags = array( 'R' => true );
01498         }
01499 
01500         if ( !isset( $flags['R'] ) && !isset( $flags['N'] ) ) {
01501             // decode => HTML entities modified by Sanitizer::removeHTMLtags
01502             $this->mRules = str_replace( '=&gt;', '=>', $this->mRules );
01503             $this->parseRules();
01504         }
01505         $rules = $this->mRules;
01506 
01507         if ( !$this->mBidtable && !$this->mUnidtable ) {
01508             if ( isset( $flags['+'] ) || isset( $flags['-'] ) ) {
01509                 // fill all variants if text in -{A/H/-|text} without rules
01510                 foreach ( $this->mConverter->mVariants as $v ) {
01511                     $this->mBidtable[$v] = $rules;
01512                 }
01513             } elseif ( !isset( $flags['N'] ) && !isset( $flags['T'] ) ) {
01514                 $this->mFlags = $flags = array( 'R' => true );
01515             }
01516         }
01517 
01518         $this->mRuleDisplay = false;
01519         foreach ( $flags as $flag => $unused ) {
01520             switch ( $flag ) {
01521                 case 'R':
01522                     // if we don't do content convert, still strip the -{}- tags
01523                     $this->mRuleDisplay = $rules;
01524                     break;
01525                 case 'N':
01526                     // process N flag: output current variant name
01527                     $ruleVar = trim( $rules );
01528                     if ( isset( $this->mConverter->mVariantNames[$ruleVar] ) ) {
01529                         $this->mRuleDisplay = $this->mConverter->mVariantNames[$ruleVar];
01530                     } else {
01531                         $this->mRuleDisplay = '';
01532                     }
01533                     break;
01534                 case 'D':
01535                     // process D flag: output rules description
01536                     $this->mRuleDisplay = $this->getRulesDesc();
01537                     break;
01538                 case 'H':
01539                     // process H,- flag or T only: output nothing
01540                     $this->mRuleDisplay = '';
01541                     break;
01542                 case '-':
01543                     $this->mRulesAction = 'remove';
01544                     $this->mRuleDisplay = '';
01545                     break;
01546                 case '+':
01547                     $this->mRulesAction = 'add';
01548                     $this->mRuleDisplay = '';
01549                     break;
01550                 case 'S':
01551                     $this->mRuleDisplay = $this->getRuleConvertedStr( $variant );
01552                     break;
01553                 case 'T':
01554                     $this->mRuleTitle = $this->getRuleConvertedTitle( $variant );
01555                     $this->mRuleDisplay = '';
01556                     break;
01557                 default:
01558                     // ignore unknown flags (but see error case below)
01559             }
01560         }
01561         if ( $this->mRuleDisplay === false ) {
01562             $this->mRuleDisplay = '<span class="error">'
01563                 . wfMessage( 'converter-manual-rule-error' )->inContentLanguage()->escaped()
01564                 . '</span>';
01565         }
01566 
01567         $this->generateConvTable();
01568     }
01569 
01573     public function hasRules() {
01574         // TODO:
01575     }
01576 
01581     public function getDisplay() {
01582         return $this->mRuleDisplay;
01583     }
01584 
01589     public function getTitle() {
01590         return $this->mRuleTitle;
01591     }
01592 
01597     public function getRulesAction() {
01598         return $this->mRulesAction;
01599     }
01600 
01606     public function getConvTable() {
01607         return $this->mConvTable;
01608     }
01609 
01614     public function getRules() {
01615         return $this->mRules;
01616     }
01617 
01622     public function getFlags() {
01623         return $this->mFlags;
01624     }
01625 }