MediaWiki
REL1_22
|
00001 <?php 00031 class LanguageConverter { 00032 00038 static public $languagesWithVariants = array( 00039 'gan', 00040 'iu', 00041 'kk', 00042 'ku', 00043 'shi', 00044 'sr', 00045 'tg', 00046 'uz', 00047 'zh', 00048 ); 00049 00050 public $mMainLanguageCode; 00051 public $mVariants, $mVariantFallbacks, $mVariantNames; 00052 public $mTablesLoaded = false; 00053 public $mTables; 00054 // 'bidirectional' 'unidirectional' 'disable' for each variant 00055 public $mManualLevel; 00056 00060 public $mCacheKey; 00061 00062 public $mLangObj; 00063 public $mFlags; 00064 public $mDescCodeSep = ':', $mDescVarSep = ';'; 00065 public $mUcfirst = false; 00066 public $mConvRuleTitle = false; 00067 public $mURLVariant; 00068 public $mUserVariant; 00069 public $mHeaderVariant; 00070 public $mMaxDepth = 10; 00071 public $mVarSeparatorPattern; 00072 00073 const CACHE_VERSION_KEY = 'VERSION 6'; 00074 00085 public function __construct( $langobj, $maincode, $variants = array(), 00086 $variantfallbacks = array(), $flags = array(), 00087 $manualLevel = array() ) { 00088 global $wgDisabledVariants; 00089 $this->mLangObj = $langobj; 00090 $this->mMainLanguageCode = $maincode; 00091 $this->mVariants = array_diff( $variants, $wgDisabledVariants ); 00092 $this->mVariantFallbacks = $variantfallbacks; 00093 $this->mVariantNames = Language::fetchLanguageNames(); 00094 $this->mCacheKey = wfMemcKey( 'conversiontables', $maincode ); 00095 $defaultflags = array( 00096 // 'S' show converted text 00097 // '+' add rules for alltext 00098 // 'E' the gave flags is error 00099 // these flags above are reserved for program 00100 'A' => 'A', // add rule for convert code (all text convert) 00101 'T' => 'T', // title convert 00102 'R' => 'R', // raw content 00103 'D' => 'D', // convert description (subclass implement) 00104 '-' => '-', // remove convert (not implement) 00105 'H' => 'H', // add rule for convert code 00106 // (but no display in placed code) 00107 'N' => 'N' // current variant name 00108 ); 00109 $this->mFlags = array_merge( $defaultflags, $flags ); 00110 foreach ( $this->mVariants as $v ) { 00111 if ( array_key_exists( $v, $manualLevel ) ) { 00112 $this->mManualLevel[$v] = $manualLevel[$v]; 00113 } else { 00114 $this->mManualLevel[$v] = 'bidirectional'; 00115 } 00116 $this->mFlags[$v] = $v; 00117 } 00118 } 00119 00126 public function getVariants() { 00127 return $this->mVariants; 00128 } 00129 00141 public function getVariantFallbacks( $variant ) { 00142 if ( isset( $this->mVariantFallbacks[$variant] ) ) { 00143 return $this->mVariantFallbacks[$variant]; 00144 } 00145 return $this->mMainLanguageCode; 00146 } 00147 00152 public function getConvRuleTitle() { 00153 return $this->mConvRuleTitle; 00154 } 00155 00160 public function getPreferredVariant() { 00161 global $wgDefaultLanguageVariant, $wgUser; 00162 00163 $req = $this->getURLVariant(); 00164 00165 if ( $wgUser->isLoggedIn() && !$req ) { 00166 $req = $this->getUserVariant(); 00167 } elseif ( !$req ) { 00168 $req = $this->getHeaderVariant(); 00169 } 00170 00171 if ( $wgDefaultLanguageVariant && !$req ) { 00172 $req = $this->validateVariant( $wgDefaultLanguageVariant ); 00173 } 00174 00175 // This function, unlike the other get*Variant functions, is 00176 // not memoized (i.e. there return value is not cached) since 00177 // new information might appear during processing after this 00178 // is first called. 00179 if ( $this->validateVariant( $req ) ) { 00180 return $req; 00181 } 00182 return $this->mMainLanguageCode; 00183 } 00184 00190 public function getDefaultVariant() { 00191 global $wgDefaultLanguageVariant; 00192 00193 $req = $this->getURLVariant(); 00194 00195 if ( !$req ) { 00196 $req = $this->getHeaderVariant(); 00197 } 00198 00199 if ( $wgDefaultLanguageVariant && !$req ) { 00200 $req = $this->validateVariant( $wgDefaultLanguageVariant ); 00201 } 00202 00203 if ( $req ) { 00204 return $req; 00205 } 00206 return $this->mMainLanguageCode; 00207 } 00208 00214 public function validateVariant( $variant = null ) { 00215 if ( $variant !== null && in_array( $variant, $this->mVariants ) ) { 00216 return $variant; 00217 } 00218 return null; 00219 } 00220 00226 public function getURLVariant() { 00227 global $wgRequest; 00228 00229 if ( $this->mURLVariant ) { 00230 return $this->mURLVariant; 00231 } 00232 00233 // see if the preference is set in the request 00234 $ret = $wgRequest->getText( 'variant' ); 00235 00236 if ( !$ret ) { 00237 $ret = $wgRequest->getVal( 'uselang' ); 00238 } 00239 00240 return $this->mURLVariant = $this->validateVariant( $ret ); 00241 } 00242 00248 protected function getUserVariant() { 00249 global $wgUser, $wgContLang; 00250 00251 // memoizing this function wreaks havoc on parserTest.php 00252 /* 00253 if ( $this->mUserVariant ) { 00254 return $this->mUserVariant; 00255 } 00256 */ 00257 00258 // Get language variant preference from logged in users 00259 // Don't call this on stub objects because that causes infinite 00260 // recursion during initialisation 00261 if ( $wgUser->isLoggedIn() ) { 00262 if ( $this->mMainLanguageCode == $wgContLang->getCode() ) { 00263 $ret = $wgUser->getOption( 'variant' ); 00264 } else { 00265 $ret = $wgUser->getOption( 'variant-' . $this->mMainLanguageCode ); 00266 } 00267 } else { 00268 // figure out user lang without constructing wgLang to avoid 00269 // infinite recursion 00270 $ret = $wgUser->getOption( 'language' ); 00271 } 00272 00273 return $this->mUserVariant = $this->validateVariant( $ret ); 00274 } 00275 00281 protected function getHeaderVariant() { 00282 global $wgRequest; 00283 00284 if ( $this->mHeaderVariant ) { 00285 return $this->mHeaderVariant; 00286 } 00287 00288 // see if some supported language variant is set in the 00289 // HTTP header. 00290 $languages = array_keys( $wgRequest->getAcceptLang() ); 00291 if ( empty( $languages ) ) { 00292 return null; 00293 } 00294 00295 $fallbackLanguages = array(); 00296 foreach ( $languages as $language ) { 00297 $this->mHeaderVariant = $this->validateVariant( $language ); 00298 if ( $this->mHeaderVariant ) { 00299 break; 00300 } 00301 00302 // To see if there are fallbacks of current language. 00303 // We record these fallback variants, and process 00304 // them later. 00305 $fallbacks = $this->getVariantFallbacks( $language ); 00306 if ( is_string( $fallbacks ) && $fallbacks !== $this->mMainLanguageCode ) { 00307 $fallbackLanguages[] = $fallbacks; 00308 } elseif ( is_array( $fallbacks ) ) { 00309 $fallbackLanguages = 00310 array_merge( $fallbackLanguages, $fallbacks ); 00311 } 00312 } 00313 00314 if ( !$this->mHeaderVariant ) { 00315 // process fallback languages now 00316 $fallback_languages = array_unique( $fallbackLanguages ); 00317 foreach ( $fallback_languages as $language ) { 00318 $this->mHeaderVariant = $this->validateVariant( $language ); 00319 if ( $this->mHeaderVariant ) { 00320 break; 00321 } 00322 } 00323 } 00324 00325 return $this->mHeaderVariant; 00326 } 00327 00338 public function autoConvert( $text, $toVariant = false ) { 00339 wfProfileIn( __METHOD__ ); 00340 00341 $this->loadTables(); 00342 00343 if ( !$toVariant ) { 00344 $toVariant = $this->getPreferredVariant(); 00345 if ( !$toVariant ) { 00346 wfProfileOut( __METHOD__ ); 00347 return $text; 00348 } 00349 } 00350 00351 if ( $this->guessVariant( $text, $toVariant ) ) { 00352 wfProfileOut( __METHOD__ ); 00353 return $text; 00354 } 00355 00356 /* we convert everything except: 00357 1. HTML markups (anything between < and >) 00358 2. HTML entities 00359 3. placeholders created by the parser 00360 */ 00361 global $wgParser; 00362 if ( isset( $wgParser ) && $wgParser->UniqPrefix() != '' ) { 00363 $marker = '|' . $wgParser->UniqPrefix() . '[\-a-zA-Z0-9]+'; 00364 } else { 00365 $marker = ''; 00366 } 00367 00368 // this one is needed when the text is inside an HTML markup 00369 $htmlfix = '|<[^>]+$|^[^<>]*>'; 00370 00371 // disable convert to variants between <code> tags 00372 $codefix = '<code>.+?<\/code>|'; 00373 // disable conversion of <script> tags 00374 $scriptfix = '<script.*?>.*?<\/script>|'; 00375 // disable conversion of <pre> tags 00376 $prefix = '<pre.*?>.*?<\/pre>|'; 00377 00378 $reg = '/' . $codefix . $scriptfix . $prefix . 00379 '<[^>]+>|&[a-zA-Z#][a-z0-9]+;' . $marker . $htmlfix . '/s'; 00380 $startPos = 0; 00381 $sourceBlob = ''; 00382 $literalBlob = ''; 00383 00384 // Guard against delimiter nulls in the input 00385 $text = str_replace( "\000", '', $text ); 00386 00387 $markupMatches = null; 00388 $elementMatches = null; 00389 while ( $startPos < strlen( $text ) ) { 00390 if ( preg_match( $reg, $text, $markupMatches, PREG_OFFSET_CAPTURE, $startPos ) ) { 00391 $elementPos = $markupMatches[0][1]; 00392 $element = $markupMatches[0][0]; 00393 } else { 00394 $elementPos = strlen( $text ); 00395 $element = ''; 00396 } 00397 00398 // Queue the part before the markup for translation in a batch 00399 $sourceBlob .= substr( $text, $startPos, $elementPos - $startPos ) . "\000"; 00400 00401 // Advance to the next position 00402 $startPos = $elementPos + strlen( $element ); 00403 00404 // Translate any alt or title attributes inside the matched element 00405 if ( $element !== '' && preg_match( '/^(<[^>\s]*)\s([^>]*)(.*)$/', $element, 00406 $elementMatches ) ) 00407 { 00408 $attrs = Sanitizer::decodeTagAttributes( $elementMatches[2] ); 00409 $changed = false; 00410 foreach ( array( 'title', 'alt' ) as $attrName ) { 00411 if ( !isset( $attrs[$attrName] ) ) { 00412 continue; 00413 } 00414 $attr = $attrs[$attrName]; 00415 // Don't convert URLs 00416 if ( !strpos( $attr, '://' ) ) { 00417 $attr = $this->recursiveConvertTopLevel( $attr, $toVariant ); 00418 } 00419 00420 // Remove HTML tags to avoid disrupting the layout 00421 $attr = preg_replace( '/<[^>]+>/', '', $attr ); 00422 if ( $attr !== $attrs[$attrName] ) { 00423 $attrs[$attrName] = $attr; 00424 $changed = true; 00425 } 00426 } 00427 if ( $changed ) { 00428 $element = $elementMatches[1] . Html::expandAttributes( $attrs ) . 00429 $elementMatches[3]; 00430 } 00431 } 00432 $literalBlob .= $element . "\000"; 00433 } 00434 00435 // Do the main translation batch 00436 $translatedBlob = $this->translate( $sourceBlob, $toVariant ); 00437 00438 // Put the output back together 00439 $translatedIter = StringUtils::explode( "\000", $translatedBlob ); 00440 $literalIter = StringUtils::explode( "\000", $literalBlob ); 00441 $output = ''; 00442 while ( $translatedIter->valid() && $literalIter->valid() ) { 00443 $output .= $translatedIter->current(); 00444 $output .= $literalIter->current(); 00445 $translatedIter->next(); 00446 $literalIter->next(); 00447 } 00448 00449 wfProfileOut( __METHOD__ ); 00450 return $output; 00451 } 00452 00462 public function translate( $text, $variant ) { 00463 wfProfileIn( __METHOD__ ); 00464 // If $text is empty or only includes spaces, do nothing 00465 // Otherwise translate it 00466 if ( trim( $text ) ) { 00467 $this->loadTables(); 00468 $text = $this->mTables[$variant]->replace( $text ); 00469 } 00470 wfProfileOut( __METHOD__ ); 00471 return $text; 00472 } 00473 00480 public function autoConvertToAllVariants( $text ) { 00481 wfProfileIn( __METHOD__ ); 00482 $this->loadTables(); 00483 00484 $ret = array(); 00485 foreach ( $this->mVariants as $variant ) { 00486 $ret[$variant] = $this->translate( $text, $variant ); 00487 } 00488 00489 wfProfileOut( __METHOD__ ); 00490 return $ret; 00491 } 00492 00504 public function convertLinkToAllVariants( $text ) { 00505 return $this->autoConvertToAllVariants( $text ); 00506 } 00507 00513 protected function applyManualConv( $convRule ) { 00514 // Use syntax -{T|zh-cn:TitleCN; zh-tw:TitleTw}- to custom 00515 // title conversion. 00516 // Bug 24072: $mConvRuleTitle was overwritten by other manual 00517 // rule(s) not for title, this breaks the title conversion. 00518 $newConvRuleTitle = $convRule->getTitle(); 00519 if ( $newConvRuleTitle ) { 00520 // So I add an empty check for getTitle() 00521 $this->mConvRuleTitle = $newConvRuleTitle; 00522 } 00523 00524 // merge/remove manual conversion rules to/from global table 00525 $convTable = $convRule->getConvTable(); 00526 $action = $convRule->getRulesAction(); 00527 foreach ( $convTable as $variant => $pair ) { 00528 if ( !$this->validateVariant( $variant ) ) { 00529 continue; 00530 } 00531 00532 if ( $action == 'add' ) { 00533 foreach ( $pair as $from => $to ) { 00534 // to ensure that $from and $to not be left blank 00535 // so $this->translate() could always return a string 00536 if ( $from || $to ) { 00537 // more efficient than array_merge(), about 2.5 times. 00538 $this->mTables[$variant]->setPair( $from, $to ); 00539 } 00540 } 00541 } elseif ( $action == 'remove' ) { 00542 $this->mTables[$variant]->removeArray( $pair ); 00543 } 00544 } 00545 } 00546 00554 public function convertTitle( $title ) { 00555 $variant = $this->getPreferredVariant(); 00556 $index = $title->getNamespace(); 00557 if ( $index !== NS_MAIN ) { 00558 $text = $this->convertNamespace( $index, $variant ) . ':'; 00559 } else { 00560 $text = ''; 00561 } 00562 $text .= $this->translate( $title->getText(), $variant ); 00563 return $text; 00564 } 00565 00573 public function convertNamespace( $index, $variant = null ) { 00574 if ( $variant === null ) { 00575 $variant = $this->getPreferredVariant(); 00576 } 00577 if ( $index === NS_MAIN ) { 00578 return ''; 00579 } else { 00580 // First check if a message gives a converted name in the target variant. 00581 $nsConvMsg = wfMessage( 'conversion-ns' . $index )->inLanguage( $variant ); 00582 if ( $nsConvMsg->exists() ) { 00583 return $nsConvMsg->plain(); 00584 } 00585 // Then check if a message gives a converted name in content language 00586 // which needs extra translation to the target variant. 00587 $nsConvMsg = wfMessage( 'conversion-ns' . $index )->inContentLanguage(); 00588 if ( $nsConvMsg->exists() ) { 00589 return $this->translate( $nsConvMsg->plain(), $variant ); 00590 } 00591 // No message exists, retrieve it from the target variant's namespace names. 00592 $langObj = $this->mLangObj->factory( $variant ); 00593 return $langObj->getFormattedNsText( $index ); 00594 } 00595 } 00596 00611 public function convert( $text ) { 00612 $variant = $this->getPreferredVariant(); 00613 return $this->convertTo( $text, $variant ); 00614 } 00615 00623 public function convertTo( $text, $variant ) { 00624 global $wgDisableLangConversion; 00625 if ( $wgDisableLangConversion ) { 00626 return $text; 00627 } 00628 // Reset converter state for a new converter run. 00629 $this->mConvRuleTitle = false; 00630 return $this->recursiveConvertTopLevel( $text, $variant ); 00631 } 00632 00642 protected function recursiveConvertTopLevel( $text, $variant, $depth = 0 ) { 00643 $startPos = 0; 00644 $out = ''; 00645 $length = strlen( $text ); 00646 $shouldConvert = !$this->guessVariant( $text, $variant ); 00647 00648 while ( $startPos < $length ) { 00649 $pos = strpos( $text, '-{', $startPos ); 00650 00651 if ( $pos === false ) { 00652 // No more markup, append final segment 00653 $fragment = substr( $text, $startPos ); 00654 $out .= $shouldConvert ? $this->autoConvert( $fragment, $variant ) : $fragment; 00655 return $out; 00656 } 00657 00658 // Markup found 00659 // Append initial segment 00660 $fragment = substr( $text, $startPos, $pos - $startPos ); 00661 $out .= $shouldConvert ? $this->autoConvert( $fragment, $variant ) : $fragment; 00662 00663 // Advance position 00664 $startPos = $pos; 00665 00666 // Do recursive conversion 00667 $out .= $this->recursiveConvertRule( $text, $variant, $startPos, $depth + 1 ); 00668 } 00669 00670 return $out; 00671 } 00672 00684 protected function recursiveConvertRule( $text, $variant, &$startPos, $depth = 0 ) { 00685 // Quick sanity check (no function calls) 00686 if ( $text[$startPos] !== '-' || $text[$startPos + 1] !== '{' ) { 00687 throw new MWException( __METHOD__ . ': invalid input string' ); 00688 } 00689 00690 $startPos += 2; 00691 $inner = ''; 00692 $warningDone = false; 00693 $length = strlen( $text ); 00694 00695 while ( $startPos < $length ) { 00696 $m = false; 00697 preg_match( '/-\{|\}-/', $text, $m, PREG_OFFSET_CAPTURE, $startPos ); 00698 if ( !$m ) { 00699 // Unclosed rule 00700 break; 00701 } 00702 00703 $token = $m[0][0]; 00704 $pos = $m[0][1]; 00705 00706 // Markup found 00707 // Append initial segment 00708 $inner .= substr( $text, $startPos, $pos - $startPos ); 00709 00710 // Advance position 00711 $startPos = $pos; 00712 00713 switch ( $token ) { 00714 case '-{': 00715 // Check max depth 00716 if ( $depth >= $this->mMaxDepth ) { 00717 $inner .= '-{'; 00718 if ( !$warningDone ) { 00719 $inner .= '<span class="error">' . 00720 wfMessage( 'language-converter-depth-warning' ) 00721 ->numParams( $this->mMaxDepth )->inContentLanguage()->text() . 00722 '</span>'; 00723 $warningDone = true; 00724 } 00725 $startPos += 2; 00726 continue; 00727 } 00728 // Recursively parse another rule 00729 $inner .= $this->recursiveConvertRule( $text, $variant, $startPos, $depth + 1 ); 00730 break; 00731 case '}-': 00732 // Apply the rule 00733 $startPos += 2; 00734 $rule = new ConverterRule( $inner, $this ); 00735 $rule->parse( $variant ); 00736 $this->applyManualConv( $rule ); 00737 return $rule->getDisplay(); 00738 default: 00739 throw new MWException( __METHOD__ . ': invalid regex match' ); 00740 } 00741 } 00742 00743 // Unclosed rule 00744 if ( $startPos < $length ) { 00745 $inner .= substr( $text, $startPos ); 00746 } 00747 $startPos = $length; 00748 return '-{' . $this->autoConvert( $inner, $variant ); 00749 } 00750 00762 public function findVariantLink( &$link, &$nt, $ignoreOtherCond = false ) { 00763 # If the article has already existed, there is no need to 00764 # check it again, otherwise it may cause a fault. 00765 if ( is_object( $nt ) && $nt->exists() ) { 00766 return; 00767 } 00768 00769 global $wgDisableLangConversion, $wgDisableTitleConversion, $wgRequest, 00770 $wgUser; 00771 $isredir = $wgRequest->getText( 'redirect', 'yes' ); 00772 $action = $wgRequest->getText( 'action' ); 00773 $linkconvert = $wgRequest->getText( 'linkconvert', 'yes' ); 00774 $disableLinkConversion = $wgDisableLangConversion 00775 || $wgDisableTitleConversion; 00776 $linkBatch = new LinkBatch(); 00777 00778 $ns = NS_MAIN; 00779 00780 if ( $disableLinkConversion || 00781 ( !$ignoreOtherCond && 00782 ( $isredir == 'no' 00783 || $action == 'edit' 00784 || $action == 'submit' 00785 || $linkconvert == 'no' 00786 || $wgUser->getOption( 'noconvertlink' ) == 1 ) ) ) { 00787 return; 00788 } 00789 00790 if ( is_object( $nt ) ) { 00791 $ns = $nt->getNamespace(); 00792 } 00793 00794 $variants = $this->autoConvertToAllVariants( $link ); 00795 if ( !$variants ) { // give up 00796 return; 00797 } 00798 00799 $titles = array(); 00800 00801 foreach ( $variants as $v ) { 00802 if ( $v != $link ) { 00803 $varnt = Title::newFromText( $v, $ns ); 00804 if ( !is_null( $varnt ) ) { 00805 $linkBatch->addObj( $varnt ); 00806 $titles[] = $varnt; 00807 } 00808 } 00809 } 00810 00811 // fetch all variants in single query 00812 $linkBatch->execute(); 00813 00814 foreach ( $titles as $varnt ) { 00815 if ( $varnt->getArticleID() > 0 ) { 00816 $nt = $varnt; 00817 $link = $varnt->getText(); 00818 break; 00819 } 00820 } 00821 } 00822 00828 public function getExtraHashOptions() { 00829 $variant = $this->getPreferredVariant(); 00830 return '!' . $variant; 00831 } 00832 00843 public function guessVariant( $text, $variant ) { 00844 return false; 00845 } 00846 00854 function loadDefaultTables() { 00855 $name = get_class( $this ); 00856 throw new MWException( "Must implement loadDefaultTables() method in class $name" ); 00857 } 00858 00864 function loadTables( $fromCache = true ) { 00865 global $wgLangConvMemc; 00866 00867 if ( $this->mTablesLoaded ) { 00868 return; 00869 } 00870 00871 wfProfileIn( __METHOD__ ); 00872 $this->mTablesLoaded = true; 00873 $this->mTables = false; 00874 if ( $fromCache ) { 00875 wfProfileIn( __METHOD__ . '-cache' ); 00876 $this->mTables = $wgLangConvMemc->get( $this->mCacheKey ); 00877 wfProfileOut( __METHOD__ . '-cache' ); 00878 } 00879 if ( !$this->mTables 00880 || !array_key_exists( self::CACHE_VERSION_KEY, $this->mTables ) ) { 00881 wfProfileIn( __METHOD__ . '-recache' ); 00882 // not in cache, or we need a fresh reload. 00883 // We will first load the default tables 00884 // then update them using things in MediaWiki:Conversiontable/* 00885 $this->loadDefaultTables(); 00886 foreach ( $this->mVariants as $var ) { 00887 $cached = $this->parseCachedTable( $var ); 00888 $this->mTables[$var]->mergeArray( $cached ); 00889 } 00890 00891 $this->postLoadTables(); 00892 $this->mTables[self::CACHE_VERSION_KEY] = true; 00893 00894 $wgLangConvMemc->set( $this->mCacheKey, $this->mTables, 43200 ); 00895 wfProfileOut( __METHOD__ . '-recache' ); 00896 } 00897 wfProfileOut( __METHOD__ ); 00898 } 00899 00903 function postLoadTables() { } 00904 00910 function reloadTables() { 00911 if ( $this->mTables ) { 00912 unset( $this->mTables ); 00913 } 00914 $this->mTablesLoaded = false; 00915 $this->loadTables( false ); 00916 } 00917 00937 function parseCachedTable( $code, $subpage = '', $recursive = true ) { 00938 static $parsed = array(); 00939 00940 $key = 'Conversiontable/' . $code; 00941 if ( $subpage ) { 00942 $key .= '/' . $subpage; 00943 } 00944 if ( array_key_exists( $key, $parsed ) ) { 00945 return array(); 00946 } 00947 00948 $parsed[$key] = true; 00949 00950 if ( $subpage === '' ) { 00951 $txt = MessageCache::singleton()->getMsgFromNamespace( $key, $code ); 00952 } else { 00953 $txt = false; 00954 $title = Title::makeTitleSafe( NS_MEDIAWIKI, $key ); 00955 if ( $title && $title->exists() ) { 00956 $revision = Revision::newFromTitle( $title ); 00957 if ( $revision ) { 00958 if ( $revision->getContentModel() == CONTENT_MODEL_WIKITEXT ) { 00959 $txt = $revision->getContent( Revision::RAW )->getNativeData(); 00960 } 00961 00962 // @todo in the future, use a specialized content model, perhaps based on json! 00963 } 00964 } 00965 } 00966 00967 # Nothing to parse if there's no text 00968 if ( $txt === false || $txt === null || $txt === '' ) { 00969 return array(); 00970 } 00971 00972 // get all subpage links of the form 00973 // [[MediaWiki:Conversiontable/zh-xx/...|...]] 00974 $linkhead = $this->mLangObj->getNsText( NS_MEDIAWIKI ) . 00975 ':Conversiontable'; 00976 $subs = StringUtils::explode( '[[', $txt ); 00977 $sublinks = array(); 00978 foreach ( $subs as $sub ) { 00979 $link = explode( ']]', $sub, 2 ); 00980 if ( count( $link ) != 2 ) { 00981 continue; 00982 } 00983 $b = explode( '|', $link[0], 2 ); 00984 $b = explode( '/', trim( $b[0] ), 3 ); 00985 if ( count( $b ) == 3 ) { 00986 $sublink = $b[2]; 00987 } else { 00988 $sublink = ''; 00989 } 00990 00991 if ( $b[0] == $linkhead && $b[1] == $code ) { 00992 $sublinks[] = $sublink; 00993 } 00994 } 00995 00996 // parse the mappings in this page 00997 $blocks = StringUtils::explode( '-{', $txt ); 00998 $ret = array(); 00999 $first = true; 01000 foreach ( $blocks as $block ) { 01001 if ( $first ) { 01002 // Skip the part before the first -{ 01003 $first = false; 01004 continue; 01005 } 01006 $mappings = explode( '}-', $block, 2 ); 01007 $stripped = str_replace( array( "'", '"', '*', '#' ), '', 01008 $mappings[0] ); 01009 $table = StringUtils::explode( ';', $stripped ); 01010 foreach ( $table as $t ) { 01011 $m = explode( '=>', $t, 3 ); 01012 if ( count( $m ) != 2 ) { 01013 continue; 01014 } 01015 // trim any trailling comments starting with '//' 01016 $tt = explode( '//', $m[1], 2 ); 01017 $ret[trim( $m[0] )] = trim( $tt[0] ); 01018 } 01019 } 01020 01021 // recursively parse the subpages 01022 if ( $recursive ) { 01023 foreach ( $sublinks as $link ) { 01024 $s = $this->parseCachedTable( $code, $link, $recursive ); 01025 $ret = array_merge( $ret, $s ); 01026 } 01027 } 01028 01029 if ( $this->mUcfirst ) { 01030 foreach ( $ret as $k => $v ) { 01031 $ret[$this->mLangObj->ucfirst( $k )] = $this->mLangObj->ucfirst( $v ); 01032 } 01033 } 01034 return $ret; 01035 } 01036 01045 public function markNoConversion( $text, $noParse = false ) { 01046 # don't mark if already marked 01047 if ( strpos( $text, '-{' ) || strpos( $text, '}-' ) ) { 01048 return $text; 01049 } 01050 01051 $ret = "-{R|$text}-"; 01052 return $ret; 01053 } 01054 01063 function convertCategoryKey( $key ) { 01064 return $key; 01065 } 01066 01083 function OnPageContentSaveComplete( $page, $user, $content, $summary, $isMinor, 01084 $isWatch, $section, $flags, $revision ) { 01085 $titleobj = $page->getTitle(); 01086 if ( $titleobj->getNamespace() == NS_MEDIAWIKI ) { 01087 $title = $titleobj->getDBkey(); 01088 $t = explode( '/', $title, 3 ); 01089 $c = count( $t ); 01090 if ( $c > 1 && $t[0] == 'Conversiontable' ) { 01091 if ( $this->validateVariant( $t[1] ) ) { 01092 $this->reloadTables(); 01093 } 01094 } 01095 } 01096 return true; 01097 } 01098 01108 public function armourMath( $text ) { 01109 // convert '-{' and '}-' to '-{' and '}-' to prevent 01110 // any unwanted markup appearing in the math image tag. 01111 $text = strtr( $text, array( '-{' => '-{', '}-' => '}-' ) ); 01112 return $text; 01113 } 01114 01118 function getVarSeparatorPattern() { 01119 if ( is_null( $this->mVarSeparatorPattern ) ) { 01120 // varsep_pattern for preg_split: 01121 // text should be splited by ";" only if a valid variant 01122 // name exist after the markup, for example: 01123 // -{zh-hans:<span style="font-size:120%;">xxx</span>;zh-hant:\ 01124 // <span style="font-size:120%;">yyy</span>;}- 01125 // we should split it as: 01126 // array( 01127 // [0] => 'zh-hans:<span style="font-size:120%;">xxx</span>' 01128 // [1] => 'zh-hant:<span style="font-size:120%;">yyy</span>' 01129 // [2] => '' 01130 // ) 01131 $pat = '/;\s*(?='; 01132 foreach ( $this->mVariants as $variant ) { 01133 // zh-hans:xxx;zh-hant:yyy 01134 $pat .= $variant . '\s*:|'; 01135 // xxx=>zh-hans:yyy; xxx=>zh-hant:zzz 01136 $pat .= '[^;]*?=>\s*' . $variant . '\s*:|'; 01137 } 01138 $pat .= '\s*$)/'; 01139 $this->mVarSeparatorPattern = $pat; 01140 } 01141 return $this->mVarSeparatorPattern; 01142 } 01143 } 01144 01150 class ConverterRule { 01151 public $mText; // original text in -{text}- 01152 public $mConverter; // LanguageConverter object 01153 public $mRuleDisplay = ''; 01154 public $mRuleTitle = false; 01155 public $mRules = '';// string : the text of the rules 01156 public $mRulesAction = 'none'; 01157 public $mFlags = array(); 01158 public $mVariantFlags = array(); 01159 public $mConvTable = array(); 01160 public $mBidtable = array();// array of the translation in each variant 01161 public $mUnidtable = array();// array of the translation in each variant 01162 01169 public function __construct( $text, $converter ) { 01170 $this->mText = $text; 01171 $this->mConverter = $converter; 01172 } 01173 01180 public function getTextInBidtable( $variants ) { 01181 $variants = (array)$variants; 01182 if ( !$variants ) { 01183 return false; 01184 } 01185 foreach ( $variants as $variant ) { 01186 if ( isset( $this->mBidtable[$variant] ) ) { 01187 return $this->mBidtable[$variant]; 01188 } 01189 } 01190 return false; 01191 } 01192 01197 function parseFlags() { 01198 $text = $this->mText; 01199 $flags = array(); 01200 $variantFlags = array(); 01201 01202 $sepPos = strpos( $text, '|' ); 01203 if ( $sepPos !== false ) { 01204 $validFlags = $this->mConverter->mFlags; 01205 $f = StringUtils::explode( ';', substr( $text, 0, $sepPos ) ); 01206 foreach ( $f as $ff ) { 01207 $ff = trim( $ff ); 01208 if ( isset( $validFlags[$ff] ) ) { 01209 $flags[$validFlags[$ff]] = true; 01210 } 01211 } 01212 $text = strval( substr( $text, $sepPos + 1 ) ); 01213 } 01214 01215 if ( !$flags ) { 01216 $flags['S'] = true; 01217 } elseif ( isset( $flags['R'] ) ) { 01218 $flags = array( 'R' => true );// remove other flags 01219 } elseif ( isset( $flags['N'] ) ) { 01220 $flags = array( 'N' => true );// remove other flags 01221 } elseif ( isset( $flags['-'] ) ) { 01222 $flags = array( '-' => true );// remove other flags 01223 } elseif ( count( $flags ) == 1 && isset( $flags['T'] ) ) { 01224 $flags['H'] = true; 01225 } elseif ( isset( $flags['H'] ) ) { 01226 // replace A flag, and remove other flags except T 01227 $temp = array( '+' => true, 'H' => true ); 01228 if ( isset( $flags['T'] ) ) { 01229 $temp['T'] = true; 01230 } 01231 if ( isset( $flags['D'] ) ) { 01232 $temp['D'] = true; 01233 } 01234 $flags = $temp; 01235 } else { 01236 if ( isset( $flags['A'] ) ) { 01237 $flags['+'] = true; 01238 $flags['S'] = true; 01239 } 01240 if ( isset( $flags['D'] ) ) { 01241 unset( $flags['S'] ); 01242 } 01243 // try to find flags like "zh-hans", "zh-hant" 01244 // allow syntaxes like "-{zh-hans;zh-hant|XXXX}-" 01245 $variantFlags = array_intersect( array_keys( $flags ), $this->mConverter->mVariants ); 01246 if ( $variantFlags ) { 01247 $variantFlags = array_flip( $variantFlags ); 01248 $flags = array(); 01249 } 01250 } 01251 $this->mVariantFlags = $variantFlags; 01252 $this->mRules = $text; 01253 $this->mFlags = $flags; 01254 } 01255 01260 function parseRules() { 01261 $rules = $this->mRules; 01262 $bidtable = array(); 01263 $unidtable = array(); 01264 $variants = $this->mConverter->mVariants; 01265 $varsep_pattern = $this->mConverter->getVarSeparatorPattern(); 01266 01267 // Split according to $varsep_pattern, but ignore semicolons from HTML entities 01268 $rules = preg_replace( '/(&[#a-zA-Z0-9]+);/', "$1\x01", $rules ); 01269 $choice = preg_split( $varsep_pattern, $rules ); 01270 $choice = str_replace( "\x01", ';', $choice ); 01271 01272 foreach ( $choice as $c ) { 01273 $v = explode( ':', $c, 2 ); 01274 if ( count( $v ) != 2 ) { 01275 // syntax error, skip 01276 continue; 01277 } 01278 $to = trim( $v[1] ); 01279 $v = trim( $v[0] ); 01280 $u = explode( '=>', $v, 2 ); 01281 // if $to is empty, strtr() could return a wrong result 01282 if ( count( $u ) == 1 && $to && in_array( $v, $variants ) ) { 01283 $bidtable[$v] = $to; 01284 } elseif ( count( $u ) == 2 ) { 01285 $from = trim( $u[0] ); 01286 $v = trim( $u[1] ); 01287 if ( array_key_exists( $v, $unidtable ) 01288 && !is_array( $unidtable[$v] ) 01289 && $to 01290 && in_array( $v, $variants ) ) { 01291 $unidtable[$v] = array( $from => $to ); 01292 } elseif ( $to && in_array( $v, $variants ) ) { 01293 $unidtable[$v][$from] = $to; 01294 } 01295 } 01296 // syntax error, pass 01297 if ( !isset( $this->mConverter->mVariantNames[$v] ) ) { 01298 $bidtable = array(); 01299 $unidtable = array(); 01300 break; 01301 } 01302 } 01303 $this->mBidtable = $bidtable; 01304 $this->mUnidtable = $unidtable; 01305 } 01306 01312 function getRulesDesc() { 01313 $codesep = $this->mConverter->mDescCodeSep; 01314 $varsep = $this->mConverter->mDescVarSep; 01315 $text = ''; 01316 foreach ( $this->mBidtable as $k => $v ) { 01317 $text .= $this->mConverter->mVariantNames[$k] . "$codesep$v$varsep"; 01318 } 01319 foreach ( $this->mUnidtable as $k => $a ) { 01320 foreach ( $a as $from => $to ) { 01321 $text .= $from . '⇒' . $this->mConverter->mVariantNames[$k] . 01322 "$codesep$to$varsep"; 01323 } 01324 } 01325 return $text; 01326 } 01327 01336 function getRuleConvertedStr( $variant ) { 01337 $bidtable = $this->mBidtable; 01338 $unidtable = $this->mUnidtable; 01339 01340 if ( count( $bidtable ) + count( $unidtable ) == 0 ) { 01341 return $this->mRules; 01342 } else { 01343 // display current variant in bidirectional array 01344 $disp = $this->getTextInBidtable( $variant ); 01345 // or display current variant in fallbacks 01346 if ( !$disp ) { 01347 $disp = $this->getTextInBidtable( 01348 $this->mConverter->getVariantFallbacks( $variant ) ); 01349 } 01350 // or display current variant in unidirectional array 01351 if ( !$disp && array_key_exists( $variant, $unidtable ) ) { 01352 $disp = array_values( $unidtable[$variant] ); 01353 $disp = $disp[0]; 01354 } 01355 // or display frist text under disable manual convert 01356 if ( !$disp 01357 && $this->mConverter->mManualLevel[$variant] == 'disable' ) { 01358 if ( count( $bidtable ) > 0 ) { 01359 $disp = array_values( $bidtable ); 01360 $disp = $disp[0]; 01361 } else { 01362 $disp = array_values( $unidtable ); 01363 $disp = array_values( $disp[0] ); 01364 $disp = $disp[0]; 01365 } 01366 } 01367 return $disp; 01368 } 01369 } 01370 01381 function getRuleConvertedTitle( $variant ) { 01382 if ( $variant === $this->mConverter->mMainLanguageCode ) { 01383 // If a string targeting exactly this variant is set, 01384 // use it. Otherwise, just return false, so the real 01385 // page name can be shown (and because variant === main, 01386 // there'll be no further automatic conversion). 01387 $disp = $this->getTextInBidtable( $variant ); 01388 if ( $disp ) { 01389 return $disp; 01390 } 01391 if ( array_key_exists( $variant, $this->mUnidtable ) ) { 01392 $disp = array_values( $this->mUnidtable[$variant] ); 01393 $disp = $disp[0]; 01394 } 01395 // Assigned above or still false. 01396 return $disp; 01397 } else { 01398 return $this->getRuleConvertedStr( $variant ); 01399 } 01400 } 01401 01406 function generateConvTable() { 01407 // Special case optimisation 01408 if ( !$this->mBidtable && !$this->mUnidtable ) { 01409 $this->mConvTable = array(); 01410 return; 01411 } 01412 01413 $bidtable = $this->mBidtable; 01414 $unidtable = $this->mUnidtable; 01415 $manLevel = $this->mConverter->mManualLevel; 01416 01417 $vmarked = array(); 01418 foreach ( $this->mConverter->mVariants as $v ) { 01419 /* for bidirectional array 01420 fill in the missing variants, if any, 01421 with fallbacks */ 01422 if ( !isset( $bidtable[$v] ) ) { 01423 $variantFallbacks = 01424 $this->mConverter->getVariantFallbacks( $v ); 01425 $vf = $this->getTextInBidtable( $variantFallbacks ); 01426 if ( $vf ) { 01427 $bidtable[$v] = $vf; 01428 } 01429 } 01430 01431 if ( isset( $bidtable[$v] ) ) { 01432 foreach ( $vmarked as $vo ) { 01433 // use syntax: -{A|zh:WordZh;zh-tw:WordTw}- 01434 // or -{H|zh:WordZh;zh-tw:WordTw}- 01435 // or -{-|zh:WordZh;zh-tw:WordTw}- 01436 // to introduce a custom mapping between 01437 // words WordZh and WordTw in the whole text 01438 if ( $manLevel[$v] == 'bidirectional' ) { 01439 $this->mConvTable[$v][$bidtable[$vo]] = $bidtable[$v]; 01440 } 01441 if ( $manLevel[$vo] == 'bidirectional' ) { 01442 $this->mConvTable[$vo][$bidtable[$v]] = $bidtable[$vo]; 01443 } 01444 } 01445 $vmarked[] = $v; 01446 } 01447 /* for unidirectional array fill to convert tables */ 01448 if ( ( $manLevel[$v] == 'bidirectional' || $manLevel[$v] == 'unidirectional' ) 01449 && isset( $unidtable[$v] ) ) 01450 { 01451 if ( isset( $this->mConvTable[$v] ) ) { 01452 $this->mConvTable[$v] = array_merge( $this->mConvTable[$v], $unidtable[$v] ); 01453 } else { 01454 $this->mConvTable[$v] = $unidtable[$v]; 01455 } 01456 } 01457 } 01458 } 01459 01464 public function parse( $variant = null ) { 01465 if ( !$variant ) { 01466 $variant = $this->mConverter->getPreferredVariant(); 01467 } 01468 01469 $this->parseFlags(); 01470 $flags = $this->mFlags; 01471 01472 // convert to specified variant 01473 // syntax: -{zh-hans;zh-hant[;...]|<text to convert>}- 01474 if ( $this->mVariantFlags ) { 01475 // check if current variant in flags 01476 if ( isset( $this->mVariantFlags[$variant] ) ) { 01477 // then convert <text to convert> to current language 01478 $this->mRules = $this->mConverter->autoConvert( $this->mRules, 01479 $variant ); 01480 } else { // if current variant no in flags, 01481 // then we check its fallback variants. 01482 $variantFallbacks = 01483 $this->mConverter->getVariantFallbacks( $variant ); 01484 if ( is_array( $variantFallbacks ) ) { 01485 foreach ( $variantFallbacks as $variantFallback ) { 01486 // if current variant's fallback exist in flags 01487 if ( isset( $this->mVariantFlags[$variantFallback] ) ) { 01488 // then convert <text to convert> to fallback language 01489 $this->mRules = 01490 $this->mConverter->autoConvert( $this->mRules, 01491 $variantFallback ); 01492 break; 01493 } 01494 } 01495 } 01496 } 01497 $this->mFlags = $flags = array( 'R' => true ); 01498 } 01499 01500 if ( !isset( $flags['R'] ) && !isset( $flags['N'] ) ) { 01501 // decode => HTML entities modified by Sanitizer::removeHTMLtags 01502 $this->mRules = str_replace( '=>', '=>', $this->mRules ); 01503 $this->parseRules(); 01504 } 01505 $rules = $this->mRules; 01506 01507 if ( !$this->mBidtable && !$this->mUnidtable ) { 01508 if ( isset( $flags['+'] ) || isset( $flags['-'] ) ) { 01509 // fill all variants if text in -{A/H/-|text} without rules 01510 foreach ( $this->mConverter->mVariants as $v ) { 01511 $this->mBidtable[$v] = $rules; 01512 } 01513 } elseif ( !isset( $flags['N'] ) && !isset( $flags['T'] ) ) { 01514 $this->mFlags = $flags = array( 'R' => true ); 01515 } 01516 } 01517 01518 $this->mRuleDisplay = false; 01519 foreach ( $flags as $flag => $unused ) { 01520 switch ( $flag ) { 01521 case 'R': 01522 // if we don't do content convert, still strip the -{}- tags 01523 $this->mRuleDisplay = $rules; 01524 break; 01525 case 'N': 01526 // process N flag: output current variant name 01527 $ruleVar = trim( $rules ); 01528 if ( isset( $this->mConverter->mVariantNames[$ruleVar] ) ) { 01529 $this->mRuleDisplay = $this->mConverter->mVariantNames[$ruleVar]; 01530 } else { 01531 $this->mRuleDisplay = ''; 01532 } 01533 break; 01534 case 'D': 01535 // process D flag: output rules description 01536 $this->mRuleDisplay = $this->getRulesDesc(); 01537 break; 01538 case 'H': 01539 // process H,- flag or T only: output nothing 01540 $this->mRuleDisplay = ''; 01541 break; 01542 case '-': 01543 $this->mRulesAction = 'remove'; 01544 $this->mRuleDisplay = ''; 01545 break; 01546 case '+': 01547 $this->mRulesAction = 'add'; 01548 $this->mRuleDisplay = ''; 01549 break; 01550 case 'S': 01551 $this->mRuleDisplay = $this->getRuleConvertedStr( $variant ); 01552 break; 01553 case 'T': 01554 $this->mRuleTitle = $this->getRuleConvertedTitle( $variant ); 01555 $this->mRuleDisplay = ''; 01556 break; 01557 default: 01558 // ignore unknown flags (but see error case below) 01559 } 01560 } 01561 if ( $this->mRuleDisplay === false ) { 01562 $this->mRuleDisplay = '<span class="error">' 01563 . wfMessage( 'converter-manual-rule-error' )->inContentLanguage()->escaped() 01564 . '</span>'; 01565 } 01566 01567 $this->generateConvTable(); 01568 } 01569 01573 public function hasRules() { 01574 // TODO: 01575 } 01576 01581 public function getDisplay() { 01582 return $this->mRuleDisplay; 01583 } 01584 01589 public function getTitle() { 01590 return $this->mRuleTitle; 01591 } 01592 01597 public function getRulesAction() { 01598 return $this->mRulesAction; 01599 } 01600 01606 public function getConvTable() { 01607 return $this->mConvTable; 01608 } 01609 01614 public function getRules() { 01615 return $this->mRules; 01616 } 01617 01622 public function getFlags() { 01623 return $this->mFlags; 01624 } 01625 }