MediaWiki
REL1_23
|
00001 <?php 00029 class LanguageConverter { 00030 00036 static public $languagesWithVariants = array( 00037 'gan', 00038 'iu', 00039 'kk', 00040 'ku', 00041 'shi', 00042 'sr', 00043 'tg', 00044 'uz', 00045 'zh', 00046 ); 00047 00048 public $mMainLanguageCode; 00049 public $mVariants, $mVariantFallbacks, $mVariantNames; 00050 public $mTablesLoaded = false; 00051 public $mTables; 00052 // 'bidirectional' 'unidirectional' 'disable' for each variant 00053 public $mManualLevel; 00054 00058 public $mCacheKey; 00059 00060 public $mLangObj; 00061 public $mFlags; 00062 public $mDescCodeSep = ':', $mDescVarSep = ';'; 00063 public $mUcfirst = false; 00064 public $mConvRuleTitle = false; 00065 public $mURLVariant; 00066 public $mUserVariant; 00067 public $mHeaderVariant; 00068 public $mMaxDepth = 10; 00069 public $mVarSeparatorPattern; 00070 00071 const CACHE_VERSION_KEY = 'VERSION 7'; 00072 00083 public function __construct( $langobj, $maincode, $variants = array(), 00084 $variantfallbacks = array(), $flags = array(), 00085 $manualLevel = array() ) { 00086 global $wgDisabledVariants; 00087 $this->mLangObj = $langobj; 00088 $this->mMainLanguageCode = $maincode; 00089 $this->mVariants = array_diff( $variants, $wgDisabledVariants ); 00090 $this->mVariantFallbacks = $variantfallbacks; 00091 $this->mVariantNames = Language::fetchLanguageNames(); 00092 $this->mCacheKey = wfMemcKey( 'conversiontables', $maincode ); 00093 $defaultflags = array( 00094 // 'S' show converted text 00095 // '+' add rules for alltext 00096 // 'E' the gave flags is error 00097 // these flags above are reserved for program 00098 'A' => 'A', // add rule for convert code (all text convert) 00099 'T' => 'T', // title convert 00100 'R' => 'R', // raw content 00101 'D' => 'D', // convert description (subclass implement) 00102 '-' => '-', // remove convert (not implement) 00103 'H' => 'H', // add rule for convert code (but no display in placed code) 00104 'N' => 'N' // current variant name 00105 ); 00106 $this->mFlags = array_merge( $defaultflags, $flags ); 00107 foreach ( $this->mVariants as $v ) { 00108 if ( array_key_exists( $v, $manualLevel ) ) { 00109 $this->mManualLevel[$v] = $manualLevel[$v]; 00110 } else { 00111 $this->mManualLevel[$v] = 'bidirectional'; 00112 } 00113 $this->mFlags[$v] = $v; 00114 } 00115 } 00116 00123 public function getVariants() { 00124 return $this->mVariants; 00125 } 00126 00138 public function getVariantFallbacks( $variant ) { 00139 if ( isset( $this->mVariantFallbacks[$variant] ) ) { 00140 return $this->mVariantFallbacks[$variant]; 00141 } 00142 return $this->mMainLanguageCode; 00143 } 00144 00149 public function getConvRuleTitle() { 00150 return $this->mConvRuleTitle; 00151 } 00152 00157 public function getPreferredVariant() { 00158 global $wgDefaultLanguageVariant, $wgUser; 00159 00160 $req = $this->getURLVariant(); 00161 00162 if ( $wgUser->isLoggedIn() && !$req ) { 00163 $req = $this->getUserVariant(); 00164 } elseif ( !$req ) { 00165 $req = $this->getHeaderVariant(); 00166 } 00167 00168 if ( $wgDefaultLanguageVariant && !$req ) { 00169 $req = $this->validateVariant( $wgDefaultLanguageVariant ); 00170 } 00171 00172 // This function, unlike the other get*Variant functions, is 00173 // not memoized (i.e. there return value is not cached) since 00174 // new information might appear during processing after this 00175 // is first called. 00176 if ( $this->validateVariant( $req ) ) { 00177 return $req; 00178 } 00179 return $this->mMainLanguageCode; 00180 } 00181 00187 public function getDefaultVariant() { 00188 global $wgDefaultLanguageVariant; 00189 00190 $req = $this->getURLVariant(); 00191 00192 if ( !$req ) { 00193 $req = $this->getHeaderVariant(); 00194 } 00195 00196 if ( $wgDefaultLanguageVariant && !$req ) { 00197 $req = $this->validateVariant( $wgDefaultLanguageVariant ); 00198 } 00199 00200 if ( $req ) { 00201 return $req; 00202 } 00203 return $this->mMainLanguageCode; 00204 } 00205 00211 public function validateVariant( $variant = null ) { 00212 if ( $variant !== null && in_array( $variant, $this->mVariants ) ) { 00213 return $variant; 00214 } 00215 return null; 00216 } 00217 00223 public function getURLVariant() { 00224 global $wgRequest; 00225 00226 if ( $this->mURLVariant ) { 00227 return $this->mURLVariant; 00228 } 00229 00230 // see if the preference is set in the request 00231 $ret = $wgRequest->getText( 'variant' ); 00232 00233 if ( !$ret ) { 00234 $ret = $wgRequest->getVal( 'uselang' ); 00235 } 00236 00237 $this->mURLVariant = $this->validateVariant( $ret ); 00238 return $this->mURLVariant; 00239 } 00240 00246 protected function getUserVariant() { 00247 global $wgUser, $wgContLang; 00248 00249 // memoizing this function wreaks havoc on parserTest.php 00250 /* 00251 if ( $this->mUserVariant ) { 00252 return $this->mUserVariant; 00253 } 00254 */ 00255 00256 // Get language variant preference from logged in users 00257 // Don't call this on stub objects because that causes infinite 00258 // recursion during initialisation 00259 if ( $wgUser->isLoggedIn() ) { 00260 if ( $this->mMainLanguageCode == $wgContLang->getCode() ) { 00261 $ret = $wgUser->getOption( 'variant' ); 00262 } else { 00263 $ret = $wgUser->getOption( 'variant-' . $this->mMainLanguageCode ); 00264 } 00265 } else { 00266 // figure out user lang without constructing wgLang to avoid 00267 // infinite recursion 00268 $ret = $wgUser->getOption( 'language' ); 00269 } 00270 00271 $this->mUserVariant = $this->validateVariant( $ret ); 00272 return $this->mUserVariant; 00273 } 00274 00280 protected function getHeaderVariant() { 00281 global $wgRequest; 00282 00283 if ( $this->mHeaderVariant ) { 00284 return $this->mHeaderVariant; 00285 } 00286 00287 // see if some supported language variant is set in the 00288 // HTTP header. 00289 $languages = array_keys( $wgRequest->getAcceptLang() ); 00290 if ( empty( $languages ) ) { 00291 return null; 00292 } 00293 00294 $fallbackLanguages = array(); 00295 foreach ( $languages as $language ) { 00296 $this->mHeaderVariant = $this->validateVariant( $language ); 00297 if ( $this->mHeaderVariant ) { 00298 break; 00299 } 00300 00301 // To see if there are fallbacks of current language. 00302 // We record these fallback variants, and process 00303 // them later. 00304 $fallbacks = $this->getVariantFallbacks( $language ); 00305 if ( is_string( $fallbacks ) && $fallbacks !== $this->mMainLanguageCode ) { 00306 $fallbackLanguages[] = $fallbacks; 00307 } elseif ( is_array( $fallbacks ) ) { 00308 $fallbackLanguages = 00309 array_merge( $fallbackLanguages, $fallbacks ); 00310 } 00311 } 00312 00313 if ( !$this->mHeaderVariant ) { 00314 // process fallback languages now 00315 $fallback_languages = array_unique( $fallbackLanguages ); 00316 foreach ( $fallback_languages as $language ) { 00317 $this->mHeaderVariant = $this->validateVariant( $language ); 00318 if ( $this->mHeaderVariant ) { 00319 break; 00320 } 00321 } 00322 } 00323 00324 return $this->mHeaderVariant; 00325 } 00326 00337 public function autoConvert( $text, $toVariant = false ) { 00338 wfProfileIn( __METHOD__ ); 00339 00340 $this->loadTables(); 00341 00342 if ( !$toVariant ) { 00343 $toVariant = $this->getPreferredVariant(); 00344 if ( !$toVariant ) { 00345 wfProfileOut( __METHOD__ ); 00346 return $text; 00347 } 00348 } 00349 00350 if ( $this->guessVariant( $text, $toVariant ) ) { 00351 wfProfileOut( __METHOD__ ); 00352 return $text; 00353 } 00354 00355 /* we convert everything except: 00356 1. HTML markups (anything between < and >) 00357 2. HTML entities 00358 3. placeholders created by the parser 00359 */ 00360 global $wgParser; 00361 if ( isset( $wgParser ) && $wgParser->UniqPrefix() != '' ) { 00362 $marker = '|' . $wgParser->UniqPrefix() . '[\-a-zA-Z0-9]+'; 00363 } else { 00364 $marker = ''; 00365 } 00366 00367 // this one is needed when the text is inside an HTML markup 00368 $htmlfix = '|<[^>]+$|^[^<>]*>'; 00369 00370 // disable convert to variants between <code> tags 00371 $codefix = '<code>.+?<\/code>|'; 00372 // disable conversion of <script> tags 00373 $scriptfix = '<script.*?>.*?<\/script>|'; 00374 // disable conversion of <pre> tags 00375 $prefix = '<pre.*?>.*?<\/pre>|'; 00376 00377 $reg = '/' . $codefix . $scriptfix . $prefix . 00378 '<[^>]+>|&[a-zA-Z#][a-z0-9]+;' . $marker . $htmlfix . '/s'; 00379 $startPos = 0; 00380 $sourceBlob = ''; 00381 $literalBlob = ''; 00382 00383 // Guard against delimiter nulls in the input 00384 $text = str_replace( "\000", '', $text ); 00385 00386 $markupMatches = null; 00387 $elementMatches = null; 00388 while ( $startPos < strlen( $text ) ) { 00389 if ( preg_match( $reg, $text, $markupMatches, PREG_OFFSET_CAPTURE, $startPos ) ) { 00390 $elementPos = $markupMatches[0][1]; 00391 $element = $markupMatches[0][0]; 00392 } else { 00393 $elementPos = strlen( $text ); 00394 $element = ''; 00395 } 00396 00397 // Queue the part before the markup for translation in a batch 00398 $sourceBlob .= substr( $text, $startPos, $elementPos - $startPos ) . "\000"; 00399 00400 // Advance to the next position 00401 $startPos = $elementPos + strlen( $element ); 00402 00403 // Translate any alt or title attributes inside the matched element 00404 if ( $element !== '' 00405 && preg_match( '/^(<[^>\s]*)\s([^>]*)(.*)$/', $element, $elementMatches ) 00406 ) { 00407 $attrs = Sanitizer::decodeTagAttributes( $elementMatches[2] ); 00408 $changed = false; 00409 foreach ( array( 'title', 'alt' ) as $attrName ) { 00410 if ( !isset( $attrs[$attrName] ) ) { 00411 continue; 00412 } 00413 $attr = $attrs[$attrName]; 00414 // Don't convert URLs 00415 if ( !strpos( $attr, '://' ) ) { 00416 $attr = $this->recursiveConvertTopLevel( $attr, $toVariant ); 00417 } 00418 00419 // Remove HTML tags to avoid disrupting the layout 00420 $attr = preg_replace( '/<[^>]+>/', '', $attr ); 00421 if ( $attr !== $attrs[$attrName] ) { 00422 $attrs[$attrName] = $attr; 00423 $changed = true; 00424 } 00425 } 00426 if ( $changed ) { 00427 $element = $elementMatches[1] . Html::expandAttributes( $attrs ) . 00428 $elementMatches[3]; 00429 } 00430 } 00431 $literalBlob .= $element . "\000"; 00432 } 00433 00434 // Do the main translation batch 00435 $translatedBlob = $this->translate( $sourceBlob, $toVariant ); 00436 00437 // Put the output back together 00438 $translatedIter = StringUtils::explode( "\000", $translatedBlob ); 00439 $literalIter = StringUtils::explode( "\000", $literalBlob ); 00440 $output = ''; 00441 while ( $translatedIter->valid() && $literalIter->valid() ) { 00442 $output .= $translatedIter->current(); 00443 $output .= $literalIter->current(); 00444 $translatedIter->next(); 00445 $literalIter->next(); 00446 } 00447 00448 wfProfileOut( __METHOD__ ); 00449 return $output; 00450 } 00451 00461 public function translate( $text, $variant ) { 00462 wfProfileIn( __METHOD__ ); 00463 // If $text is empty or only includes spaces, do nothing 00464 // Otherwise translate it 00465 if ( trim( $text ) ) { 00466 $this->loadTables(); 00467 $text = $this->mTables[$variant]->replace( $text ); 00468 } 00469 wfProfileOut( __METHOD__ ); 00470 return $text; 00471 } 00472 00479 public function autoConvertToAllVariants( $text ) { 00480 wfProfileIn( __METHOD__ ); 00481 $this->loadTables(); 00482 00483 $ret = array(); 00484 foreach ( $this->mVariants as $variant ) { 00485 $ret[$variant] = $this->translate( $text, $variant ); 00486 } 00487 00488 wfProfileOut( __METHOD__ ); 00489 return $ret; 00490 } 00491 00497 protected function applyManualConv( $convRule ) { 00498 // Use syntax -{T|zh-cn:TitleCN; zh-tw:TitleTw}- to custom 00499 // title conversion. 00500 // Bug 24072: $mConvRuleTitle was overwritten by other manual 00501 // rule(s) not for title, this breaks the title conversion. 00502 $newConvRuleTitle = $convRule->getTitle(); 00503 if ( $newConvRuleTitle ) { 00504 // So I add an empty check for getTitle() 00505 $this->mConvRuleTitle = $newConvRuleTitle; 00506 } 00507 00508 // merge/remove manual conversion rules to/from global table 00509 $convTable = $convRule->getConvTable(); 00510 $action = $convRule->getRulesAction(); 00511 foreach ( $convTable as $variant => $pair ) { 00512 if ( !$this->validateVariant( $variant ) ) { 00513 continue; 00514 } 00515 00516 if ( $action == 'add' ) { 00517 foreach ( $pair as $from => $to ) { 00518 // to ensure that $from and $to not be left blank 00519 // so $this->translate() could always return a string 00520 if ( $from || $to ) { 00521 // more efficient than array_merge(), about 2.5 times. 00522 $this->mTables[$variant]->setPair( $from, $to ); 00523 } 00524 } 00525 } elseif ( $action == 'remove' ) { 00526 $this->mTables[$variant]->removeArray( $pair ); 00527 } 00528 } 00529 } 00530 00538 public function convertTitle( $title ) { 00539 $variant = $this->getPreferredVariant(); 00540 $index = $title->getNamespace(); 00541 if ( $index !== NS_MAIN ) { 00542 $text = $this->convertNamespace( $index, $variant ) . ':'; 00543 } else { 00544 $text = ''; 00545 } 00546 $text .= $this->translate( $title->getText(), $variant ); 00547 return $text; 00548 } 00549 00557 public function convertNamespace( $index, $variant = null ) { 00558 if ( $variant === null ) { 00559 $variant = $this->getPreferredVariant(); 00560 } 00561 if ( $index === NS_MAIN ) { 00562 return ''; 00563 } else { 00564 // First check if a message gives a converted name in the target variant. 00565 $nsConvMsg = wfMessage( 'conversion-ns' . $index )->inLanguage( $variant ); 00566 if ( $nsConvMsg->exists() ) { 00567 return $nsConvMsg->plain(); 00568 } 00569 // Then check if a message gives a converted name in content language 00570 // which needs extra translation to the target variant. 00571 $nsConvMsg = wfMessage( 'conversion-ns' . $index )->inContentLanguage(); 00572 if ( $nsConvMsg->exists() ) { 00573 return $this->translate( $nsConvMsg->plain(), $variant ); 00574 } 00575 // No message exists, retrieve it from the target variant's namespace names. 00576 $langObj = $this->mLangObj->factory( $variant ); 00577 return $langObj->getFormattedNsText( $index ); 00578 } 00579 } 00580 00595 public function convert( $text ) { 00596 $variant = $this->getPreferredVariant(); 00597 return $this->convertTo( $text, $variant ); 00598 } 00599 00607 public function convertTo( $text, $variant ) { 00608 global $wgDisableLangConversion; 00609 if ( $wgDisableLangConversion ) { 00610 return $text; 00611 } 00612 // Reset converter state for a new converter run. 00613 $this->mConvRuleTitle = false; 00614 return $this->recursiveConvertTopLevel( $text, $variant ); 00615 } 00616 00626 protected function recursiveConvertTopLevel( $text, $variant, $depth = 0 ) { 00627 $startPos = 0; 00628 $out = ''; 00629 $length = strlen( $text ); 00630 $shouldConvert = !$this->guessVariant( $text, $variant ); 00631 00632 while ( $startPos < $length ) { 00633 $pos = strpos( $text, '-{', $startPos ); 00634 00635 if ( $pos === false ) { 00636 // No more markup, append final segment 00637 $fragment = substr( $text, $startPos ); 00638 $out .= $shouldConvert ? $this->autoConvert( $fragment, $variant ) : $fragment; 00639 return $out; 00640 } 00641 00642 // Markup found 00643 // Append initial segment 00644 $fragment = substr( $text, $startPos, $pos - $startPos ); 00645 $out .= $shouldConvert ? $this->autoConvert( $fragment, $variant ) : $fragment; 00646 00647 // Advance position 00648 $startPos = $pos; 00649 00650 // Do recursive conversion 00651 $out .= $this->recursiveConvertRule( $text, $variant, $startPos, $depth + 1 ); 00652 } 00653 00654 return $out; 00655 } 00656 00668 protected function recursiveConvertRule( $text, $variant, &$startPos, $depth = 0 ) { 00669 // Quick sanity check (no function calls) 00670 if ( $text[$startPos] !== '-' || $text[$startPos + 1] !== '{' ) { 00671 throw new MWException( __METHOD__ . ': invalid input string' ); 00672 } 00673 00674 $startPos += 2; 00675 $inner = ''; 00676 $warningDone = false; 00677 $length = strlen( $text ); 00678 00679 while ( $startPos < $length ) { 00680 $m = false; 00681 preg_match( '/-\{|\}-/', $text, $m, PREG_OFFSET_CAPTURE, $startPos ); 00682 if ( !$m ) { 00683 // Unclosed rule 00684 break; 00685 } 00686 00687 $token = $m[0][0]; 00688 $pos = $m[0][1]; 00689 00690 // Markup found 00691 // Append initial segment 00692 $inner .= substr( $text, $startPos, $pos - $startPos ); 00693 00694 // Advance position 00695 $startPos = $pos; 00696 00697 switch ( $token ) { 00698 case '-{': 00699 // Check max depth 00700 if ( $depth >= $this->mMaxDepth ) { 00701 $inner .= '-{'; 00702 if ( !$warningDone ) { 00703 $inner .= '<span class="error">' . 00704 wfMessage( 'language-converter-depth-warning' ) 00705 ->numParams( $this->mMaxDepth )->inContentLanguage()->text() . 00706 '</span>'; 00707 $warningDone = true; 00708 } 00709 $startPos += 2; 00710 continue; 00711 } 00712 // Recursively parse another rule 00713 $inner .= $this->recursiveConvertRule( $text, $variant, $startPos, $depth + 1 ); 00714 break; 00715 case '}-': 00716 // Apply the rule 00717 $startPos += 2; 00718 $rule = new ConverterRule( $inner, $this ); 00719 $rule->parse( $variant ); 00720 $this->applyManualConv( $rule ); 00721 return $rule->getDisplay(); 00722 default: 00723 throw new MWException( __METHOD__ . ': invalid regex match' ); 00724 } 00725 } 00726 00727 // Unclosed rule 00728 if ( $startPos < $length ) { 00729 $inner .= substr( $text, $startPos ); 00730 } 00731 $startPos = $length; 00732 return '-{' . $this->autoConvert( $inner, $variant ); 00733 } 00734 00746 public function findVariantLink( &$link, &$nt, $ignoreOtherCond = false ) { 00747 # If the article has already existed, there is no need to 00748 # check it again, otherwise it may cause a fault. 00749 if ( is_object( $nt ) && $nt->exists() ) { 00750 return; 00751 } 00752 00753 global $wgDisableLangConversion, $wgDisableTitleConversion, $wgRequest; 00754 $isredir = $wgRequest->getText( 'redirect', 'yes' ); 00755 $action = $wgRequest->getText( 'action' ); 00756 $linkconvert = $wgRequest->getText( 'linkconvert', 'yes' ); 00757 $disableLinkConversion = $wgDisableLangConversion 00758 || $wgDisableTitleConversion; 00759 $linkBatch = new LinkBatch(); 00760 00761 $ns = NS_MAIN; 00762 00763 if ( $disableLinkConversion || 00764 ( !$ignoreOtherCond && 00765 ( $isredir == 'no' 00766 || $action == 'edit' 00767 || $action == 'submit' 00768 || $linkconvert == 'no' ) ) ) { 00769 return; 00770 } 00771 00772 if ( is_object( $nt ) ) { 00773 $ns = $nt->getNamespace(); 00774 } 00775 00776 $variants = $this->autoConvertToAllVariants( $link ); 00777 if ( !$variants ) { // give up 00778 return; 00779 } 00780 00781 $titles = array(); 00782 00783 foreach ( $variants as $v ) { 00784 if ( $v != $link ) { 00785 $varnt = Title::newFromText( $v, $ns ); 00786 if ( !is_null( $varnt ) ) { 00787 $linkBatch->addObj( $varnt ); 00788 $titles[] = $varnt; 00789 } 00790 } 00791 } 00792 00793 // fetch all variants in single query 00794 $linkBatch->execute(); 00795 00796 foreach ( $titles as $varnt ) { 00797 if ( $varnt->getArticleID() > 0 ) { 00798 $nt = $varnt; 00799 $link = $varnt->getText(); 00800 break; 00801 } 00802 } 00803 } 00804 00810 public function getExtraHashOptions() { 00811 $variant = $this->getPreferredVariant(); 00812 return '!' . $variant; 00813 } 00814 00825 public function guessVariant( $text, $variant ) { 00826 return false; 00827 } 00828 00836 function loadDefaultTables() { 00837 $name = get_class( $this ); 00838 throw new MWException( "Must implement loadDefaultTables() method in class $name" ); 00839 } 00840 00846 function loadTables( $fromCache = true ) { 00847 global $wgLangConvMemc; 00848 00849 if ( $this->mTablesLoaded ) { 00850 return; 00851 } 00852 00853 wfProfileIn( __METHOD__ ); 00854 $this->mTablesLoaded = true; 00855 $this->mTables = false; 00856 if ( $fromCache ) { 00857 wfProfileIn( __METHOD__ . '-cache' ); 00858 $this->mTables = $wgLangConvMemc->get( $this->mCacheKey ); 00859 wfProfileOut( __METHOD__ . '-cache' ); 00860 } 00861 if ( !$this->mTables || !array_key_exists( self::CACHE_VERSION_KEY, $this->mTables ) ) { 00862 wfProfileIn( __METHOD__ . '-recache' ); 00863 // not in cache, or we need a fresh reload. 00864 // We will first load the default tables 00865 // then update them using things in MediaWiki:Conversiontable/* 00866 $this->loadDefaultTables(); 00867 foreach ( $this->mVariants as $var ) { 00868 $cached = $this->parseCachedTable( $var ); 00869 $this->mTables[$var]->mergeArray( $cached ); 00870 } 00871 00872 $this->postLoadTables(); 00873 $this->mTables[self::CACHE_VERSION_KEY] = true; 00874 00875 $wgLangConvMemc->set( $this->mCacheKey, $this->mTables, 43200 ); 00876 wfProfileOut( __METHOD__ . '-recache' ); 00877 } 00878 wfProfileOut( __METHOD__ ); 00879 } 00880 00884 function postLoadTables() { } 00885 00891 function reloadTables() { 00892 if ( $this->mTables ) { 00893 unset( $this->mTables ); 00894 } 00895 $this->mTablesLoaded = false; 00896 $this->loadTables( false ); 00897 } 00898 00918 function parseCachedTable( $code, $subpage = '', $recursive = true ) { 00919 static $parsed = array(); 00920 00921 $key = 'Conversiontable/' . $code; 00922 if ( $subpage ) { 00923 $key .= '/' . $subpage; 00924 } 00925 if ( array_key_exists( $key, $parsed ) ) { 00926 return array(); 00927 } 00928 00929 $parsed[$key] = true; 00930 00931 if ( $subpage === '' ) { 00932 $txt = MessageCache::singleton()->getMsgFromNamespace( $key, $code ); 00933 } else { 00934 $txt = false; 00935 $title = Title::makeTitleSafe( NS_MEDIAWIKI, $key ); 00936 if ( $title && $title->exists() ) { 00937 $revision = Revision::newFromTitle( $title ); 00938 if ( $revision ) { 00939 if ( $revision->getContentModel() == CONTENT_MODEL_WIKITEXT ) { 00940 $txt = $revision->getContent( Revision::RAW )->getNativeData(); 00941 } 00942 00943 // @todo in the future, use a specialized content model, perhaps based on json! 00944 } 00945 } 00946 } 00947 00948 # Nothing to parse if there's no text 00949 if ( $txt === false || $txt === null || $txt === '' ) { 00950 return array(); 00951 } 00952 00953 // get all subpage links of the form 00954 // [[MediaWiki:Conversiontable/zh-xx/...|...]] 00955 $linkhead = $this->mLangObj->getNsText( NS_MEDIAWIKI ) . 00956 ':Conversiontable'; 00957 $subs = StringUtils::explode( '[[', $txt ); 00958 $sublinks = array(); 00959 foreach ( $subs as $sub ) { 00960 $link = explode( ']]', $sub, 2 ); 00961 if ( count( $link ) != 2 ) { 00962 continue; 00963 } 00964 $b = explode( '|', $link[0], 2 ); 00965 $b = explode( '/', trim( $b[0] ), 3 ); 00966 if ( count( $b ) == 3 ) { 00967 $sublink = $b[2]; 00968 } else { 00969 $sublink = ''; 00970 } 00971 00972 if ( $b[0] == $linkhead && $b[1] == $code ) { 00973 $sublinks[] = $sublink; 00974 } 00975 } 00976 00977 // parse the mappings in this page 00978 $blocks = StringUtils::explode( '-{', $txt ); 00979 $ret = array(); 00980 $first = true; 00981 foreach ( $blocks as $block ) { 00982 if ( $first ) { 00983 // Skip the part before the first -{ 00984 $first = false; 00985 continue; 00986 } 00987 $mappings = explode( '}-', $block, 2 ); 00988 $stripped = str_replace( array( "'", '"', '*', '#' ), '', $mappings[0] ); 00989 $table = StringUtils::explode( ';', $stripped ); 00990 foreach ( $table as $t ) { 00991 $m = explode( '=>', $t, 3 ); 00992 if ( count( $m ) != 2 ) { 00993 continue; 00994 } 00995 // trim any trailling comments starting with '//' 00996 $tt = explode( '//', $m[1], 2 ); 00997 $ret[trim( $m[0] )] = trim( $tt[0] ); 00998 } 00999 } 01000 01001 // recursively parse the subpages 01002 if ( $recursive ) { 01003 foreach ( $sublinks as $link ) { 01004 $s = $this->parseCachedTable( $code, $link, $recursive ); 01005 $ret = array_merge( $ret, $s ); 01006 } 01007 } 01008 01009 if ( $this->mUcfirst ) { 01010 foreach ( $ret as $k => $v ) { 01011 $ret[$this->mLangObj->ucfirst( $k )] = $this->mLangObj->ucfirst( $v ); 01012 } 01013 } 01014 return $ret; 01015 } 01016 01025 public function markNoConversion( $text, $noParse = false ) { 01026 # don't mark if already marked 01027 if ( strpos( $text, '-{' ) || strpos( $text, '}-' ) ) { 01028 return $text; 01029 } 01030 01031 $ret = "-{R|$text}-"; 01032 return $ret; 01033 } 01034 01043 function convertCategoryKey( $key ) { 01044 return $key; 01045 } 01046 01063 function OnPageContentSaveComplete( $page, $user, $content, $summary, $isMinor, 01064 $isWatch, $section, $flags, $revision ) { 01065 $titleobj = $page->getTitle(); 01066 if ( $titleobj->getNamespace() == NS_MEDIAWIKI ) { 01067 $title = $titleobj->getDBkey(); 01068 $t = explode( '/', $title, 3 ); 01069 $c = count( $t ); 01070 if ( $c > 1 && $t[0] == 'Conversiontable' ) { 01071 if ( $this->validateVariant( $t[1] ) ) { 01072 $this->reloadTables(); 01073 } 01074 } 01075 } 01076 return true; 01077 } 01078 01088 public function armourMath( $text ) { 01089 // convert '-{' and '}-' to '-{' and '}-' to prevent 01090 // any unwanted markup appearing in the math image tag. 01091 $text = strtr( $text, array( '-{' => '-{', '}-' => '}-' ) ); 01092 return $text; 01093 } 01094 01098 function getVarSeparatorPattern() { 01099 if ( is_null( $this->mVarSeparatorPattern ) ) { 01100 // varsep_pattern for preg_split: 01101 // text should be splited by ";" only if a valid variant 01102 // name exist after the markup, for example: 01103 // -{zh-hans:<span style="font-size:120%;">xxx</span>;zh-hant:\ 01104 // <span style="font-size:120%;">yyy</span>;}- 01105 // we should split it as: 01106 // array( 01107 // [0] => 'zh-hans:<span style="font-size:120%;">xxx</span>' 01108 // [1] => 'zh-hant:<span style="font-size:120%;">yyy</span>' 01109 // [2] => '' 01110 // ) 01111 $pat = '/;\s*(?='; 01112 foreach ( $this->mVariants as $variant ) { 01113 // zh-hans:xxx;zh-hant:yyy 01114 $pat .= $variant . '\s*:|'; 01115 // xxx=>zh-hans:yyy; xxx=>zh-hant:zzz 01116 $pat .= '[^;]*?=>\s*' . $variant . '\s*:|'; 01117 } 01118 $pat .= '\s*$)/'; 01119 $this->mVarSeparatorPattern = $pat; 01120 } 01121 return $this->mVarSeparatorPattern; 01122 } 01123 }