MediaWiki
REL1_21
|
00001 <?php 00031 class LanguageConverter { 00032 00038 static public $languagesWithVariants = array( 00039 'gan', 00040 'iu', 00041 'kk', 00042 'ku', 00043 'shi', 00044 'sr', 00045 'tg', 00046 'uz', 00047 'zh', 00048 ); 00049 00050 public $mMainLanguageCode; 00051 public $mVariants, $mVariantFallbacks, $mVariantNames; 00052 public $mTablesLoaded = false; 00053 public $mTables; 00054 // 'bidirectional' 'unidirectional' 'disable' for each variant 00055 public $mManualLevel; 00056 00060 public $mCacheKey; 00061 00062 public $mLangObj; 00063 public $mFlags; 00064 public $mDescCodeSep = ':', $mDescVarSep = ';'; 00065 public $mUcfirst = false; 00066 public $mConvRuleTitle = false; 00067 public $mURLVariant; 00068 public $mUserVariant; 00069 public $mHeaderVariant; 00070 public $mMaxDepth = 10; 00071 public $mVarSeparatorPattern; 00072 00073 const CACHE_VERSION_KEY = 'VERSION 6'; 00074 00085 public function __construct( $langobj, $maincode, $variants = array(), 00086 $variantfallbacks = array(), $flags = array(), 00087 $manualLevel = array() ) { 00088 global $wgDisabledVariants; 00089 $this->mLangObj = $langobj; 00090 $this->mMainLanguageCode = $maincode; 00091 $this->mVariants = array_diff( $variants, $wgDisabledVariants ); 00092 $this->mVariantFallbacks = $variantfallbacks; 00093 $this->mVariantNames = Language::fetchLanguageNames(); 00094 $this->mCacheKey = wfMemcKey( 'conversiontables', $maincode ); 00095 $defaultflags = array( 00096 // 'S' show converted text 00097 // '+' add rules for alltext 00098 // 'E' the gave flags is error 00099 // these flags above are reserved for program 00100 'A' => 'A', // add rule for convert code (all text convert) 00101 'T' => 'T', // title convert 00102 'R' => 'R', // raw content 00103 'D' => 'D', // convert description (subclass implement) 00104 '-' => '-', // remove convert (not implement) 00105 'H' => 'H', // add rule for convert code 00106 // (but no display in placed code) 00107 'N' => 'N' // current variant name 00108 ); 00109 $this->mFlags = array_merge( $defaultflags, $flags ); 00110 foreach ( $this->mVariants as $v ) { 00111 if ( array_key_exists( $v, $manualLevel ) ) { 00112 $this->mManualLevel[$v] = $manualLevel[$v]; 00113 } else { 00114 $this->mManualLevel[$v] = 'bidirectional'; 00115 } 00116 $this->mFlags[$v] = $v; 00117 } 00118 } 00119 00126 public function getVariants() { 00127 return $this->mVariants; 00128 } 00129 00141 public function getVariantFallbacks( $variant ) { 00142 if ( isset( $this->mVariantFallbacks[$variant] ) ) { 00143 return $this->mVariantFallbacks[$variant]; 00144 } 00145 return $this->mMainLanguageCode; 00146 } 00147 00152 public function getConvRuleTitle() { 00153 return $this->mConvRuleTitle; 00154 } 00155 00160 public function getPreferredVariant() { 00161 global $wgDefaultLanguageVariant, $wgUser; 00162 00163 $req = $this->getURLVariant(); 00164 00165 if ( $wgUser->isLoggedIn() && !$req ) { 00166 $req = $this->getUserVariant(); 00167 } elseif ( !$req ) { 00168 $req = $this->getHeaderVariant(); 00169 } 00170 00171 if ( $wgDefaultLanguageVariant && !$req ) { 00172 $req = $this->validateVariant( $wgDefaultLanguageVariant ); 00173 } 00174 00175 // This function, unlike the other get*Variant functions, is 00176 // not memoized (i.e. there return value is not cached) since 00177 // new information might appear during processing after this 00178 // is first called. 00179 if ( $this->validateVariant( $req ) ) { 00180 return $req; 00181 } 00182 return $this->mMainLanguageCode; 00183 } 00184 00190 public function getDefaultVariant() { 00191 global $wgDefaultLanguageVariant; 00192 00193 $req = $this->getURLVariant(); 00194 00195 if ( !$req ) { 00196 $req = $this->getHeaderVariant(); 00197 } 00198 00199 if ( $wgDefaultLanguageVariant && !$req ) { 00200 $req = $this->validateVariant( $wgDefaultLanguageVariant ); 00201 } 00202 00203 if ( $req ) { 00204 return $req; 00205 } 00206 return $this->mMainLanguageCode; 00207 } 00208 00214 public function validateVariant( $variant = null ) { 00215 if ( $variant !== null && in_array( $variant, $this->mVariants ) ) { 00216 return $variant; 00217 } 00218 return null; 00219 } 00220 00226 public function getURLVariant() { 00227 global $wgRequest; 00228 00229 if ( $this->mURLVariant ) { 00230 return $this->mURLVariant; 00231 } 00232 00233 // see if the preference is set in the request 00234 $ret = $wgRequest->getText( 'variant' ); 00235 00236 if ( !$ret ) { 00237 $ret = $wgRequest->getVal( 'uselang' ); 00238 } 00239 00240 return $this->mURLVariant = $this->validateVariant( $ret ); 00241 } 00242 00248 protected function getUserVariant() { 00249 global $wgUser; 00250 00251 // memoizing this function wreaks havoc on parserTest.php 00252 /* 00253 if ( $this->mUserVariant ) { 00254 return $this->mUserVariant; 00255 } 00256 */ 00257 00258 // Get language variant preference from logged in users 00259 // Don't call this on stub objects because that causes infinite 00260 // recursion during initialisation 00261 if ( $wgUser->isLoggedIn() ) { 00262 $ret = $wgUser->getOption( 'variant' ); 00263 } else { 00264 // figure out user lang without constructing wgLang to avoid 00265 // infinite recursion 00266 $ret = $wgUser->getOption( 'language' ); 00267 } 00268 00269 return $this->mUserVariant = $this->validateVariant( $ret ); 00270 } 00271 00277 protected function getHeaderVariant() { 00278 global $wgRequest; 00279 00280 if ( $this->mHeaderVariant ) { 00281 return $this->mHeaderVariant; 00282 } 00283 00284 // see if some supported language variant is set in the 00285 // HTTP header. 00286 $languages = array_keys( $wgRequest->getAcceptLang() ); 00287 if ( empty( $languages ) ) { 00288 return null; 00289 } 00290 00291 $fallbackLanguages = array(); 00292 foreach ( $languages as $language ) { 00293 $this->mHeaderVariant = $this->validateVariant( $language ); 00294 if ( $this->mHeaderVariant ) { 00295 break; 00296 } 00297 00298 // To see if there are fallbacks of current language. 00299 // We record these fallback variants, and process 00300 // them later. 00301 $fallbacks = $this->getVariantFallbacks( $language ); 00302 if ( is_string( $fallbacks ) && $fallbacks !== $this->mMainLanguageCode ) { 00303 $fallbackLanguages[] = $fallbacks; 00304 } elseif ( is_array( $fallbacks ) ) { 00305 $fallbackLanguages = 00306 array_merge( $fallbackLanguages, $fallbacks ); 00307 } 00308 } 00309 00310 if ( !$this->mHeaderVariant ) { 00311 // process fallback languages now 00312 $fallback_languages = array_unique( $fallbackLanguages ); 00313 foreach ( $fallback_languages as $language ) { 00314 $this->mHeaderVariant = $this->validateVariant( $language ); 00315 if ( $this->mHeaderVariant ) { 00316 break; 00317 } 00318 } 00319 } 00320 00321 return $this->mHeaderVariant; 00322 } 00323 00334 public function autoConvert( $text, $toVariant = false ) { 00335 wfProfileIn( __METHOD__ ); 00336 00337 $this->loadTables(); 00338 00339 if ( !$toVariant ) { 00340 $toVariant = $this->getPreferredVariant(); 00341 if ( !$toVariant ) { 00342 wfProfileOut( __METHOD__ ); 00343 return $text; 00344 } 00345 } 00346 00347 if( $this->guessVariant( $text, $toVariant ) ) { 00348 wfProfileOut( __METHOD__ ); 00349 return $text; 00350 } 00351 00352 /* we convert everything except: 00353 1. HTML markups (anything between < and >) 00354 2. HTML entities 00355 3. placeholders created by the parser 00356 */ 00357 global $wgParser; 00358 if ( isset( $wgParser ) && $wgParser->UniqPrefix() != '' ) { 00359 $marker = '|' . $wgParser->UniqPrefix() . '[\-a-zA-Z0-9]+'; 00360 } else { 00361 $marker = ''; 00362 } 00363 00364 // this one is needed when the text is inside an HTML markup 00365 $htmlfix = '|<[^>]+$|^[^<>]*>'; 00366 00367 // disable convert to variants between <code></code> tags 00368 $codefix = '<code>.+?<\/code>|'; 00369 // disable convertsion of <script type="text/javascript"> ... </script> 00370 $scriptfix = '<script.*?>.*?<\/script>|'; 00371 // disable conversion of <pre xxxx> ... </pre> 00372 $prefix = '<pre.*?>.*?<\/pre>|'; 00373 00374 $reg = '/' . $codefix . $scriptfix . $prefix . 00375 '<[^>]+>|&[a-zA-Z#][a-z0-9]+;' . $marker . $htmlfix . '/s'; 00376 $startPos = 0; 00377 $sourceBlob = ''; 00378 $literalBlob = ''; 00379 00380 // Guard against delimiter nulls in the input 00381 $text = str_replace( "\000", '', $text ); 00382 00383 $markupMatches = null; 00384 $elementMatches = null; 00385 while ( $startPos < strlen( $text ) ) { 00386 if ( preg_match( $reg, $text, $markupMatches, PREG_OFFSET_CAPTURE, $startPos ) ) { 00387 $elementPos = $markupMatches[0][1]; 00388 $element = $markupMatches[0][0]; 00389 } else { 00390 $elementPos = strlen( $text ); 00391 $element = ''; 00392 } 00393 00394 // Queue the part before the markup for translation in a batch 00395 $sourceBlob .= substr( $text, $startPos, $elementPos - $startPos ) . "\000"; 00396 00397 // Advance to the next position 00398 $startPos = $elementPos + strlen( $element ); 00399 00400 // Translate any alt or title attributes inside the matched element 00401 if ( $element !== '' && preg_match( '/^(<[^>\s]*)\s([^>]*)(.*)$/', $element, 00402 $elementMatches ) ) 00403 { 00404 $attrs = Sanitizer::decodeTagAttributes( $elementMatches[2] ); 00405 $changed = false; 00406 foreach ( array( 'title', 'alt' ) as $attrName ) { 00407 if ( !isset( $attrs[$attrName] ) ) { 00408 continue; 00409 } 00410 $attr = $attrs[$attrName]; 00411 // Don't convert URLs 00412 if ( !strpos( $attr, '://' ) ) { 00413 $attr = $this->recursiveConvertTopLevel( $attr, $toVariant ); 00414 } 00415 00416 // Remove HTML tags to avoid disrupting the layout 00417 $attr = preg_replace( '/<[^>]+>/', '', $attr ); 00418 if ( $attr !== $attrs[$attrName] ) { 00419 $attrs[$attrName] = $attr; 00420 $changed = true; 00421 } 00422 } 00423 if ( $changed ) { 00424 $element = $elementMatches[1] . Html::expandAttributes( $attrs ) . 00425 $elementMatches[3]; 00426 } 00427 } 00428 $literalBlob .= $element . "\000"; 00429 } 00430 00431 // Do the main translation batch 00432 $translatedBlob = $this->translate( $sourceBlob, $toVariant ); 00433 00434 // Put the output back together 00435 $translatedIter = StringUtils::explode( "\000", $translatedBlob ); 00436 $literalIter = StringUtils::explode( "\000", $literalBlob ); 00437 $output = ''; 00438 while ( $translatedIter->valid() && $literalIter->valid() ) { 00439 $output .= $translatedIter->current(); 00440 $output .= $literalIter->current(); 00441 $translatedIter->next(); 00442 $literalIter->next(); 00443 } 00444 00445 wfProfileOut( __METHOD__ ); 00446 return $output; 00447 } 00448 00458 public function translate( $text, $variant ) { 00459 wfProfileIn( __METHOD__ ); 00460 // If $text is empty or only includes spaces, do nothing 00461 // Otherwise translate it 00462 if ( trim( $text ) ) { 00463 $this->loadTables(); 00464 $text = $this->mTables[$variant]->replace( $text ); 00465 } 00466 wfProfileOut( __METHOD__ ); 00467 return $text; 00468 } 00469 00476 public function autoConvertToAllVariants( $text ) { 00477 wfProfileIn( __METHOD__ ); 00478 $this->loadTables(); 00479 00480 $ret = array(); 00481 foreach ( $this->mVariants as $variant ) { 00482 $ret[$variant] = $this->translate( $text, $variant ); 00483 } 00484 00485 wfProfileOut( __METHOD__ ); 00486 return $ret; 00487 } 00488 00500 public function convertLinkToAllVariants( $text ) { 00501 return $this->autoConvertToAllVariants( $text ); 00502 } 00503 00509 protected function applyManualConv( $convRule ) { 00510 // Use syntax -{T|zh-cn:TitleCN; zh-tw:TitleTw}- to custom 00511 // title conversion. 00512 // Bug 24072: $mConvRuleTitle was overwritten by other manual 00513 // rule(s) not for title, this breaks the title conversion. 00514 $newConvRuleTitle = $convRule->getTitle(); 00515 if ( $newConvRuleTitle ) { 00516 // So I add an empty check for getTitle() 00517 $this->mConvRuleTitle = $newConvRuleTitle; 00518 } 00519 00520 // merge/remove manual conversion rules to/from global table 00521 $convTable = $convRule->getConvTable(); 00522 $action = $convRule->getRulesAction(); 00523 foreach ( $convTable as $variant => $pair ) { 00524 if ( !$this->validateVariant( $variant ) ) { 00525 continue; 00526 } 00527 00528 if ( $action == 'add' ) { 00529 foreach ( $pair as $from => $to ) { 00530 // to ensure that $from and $to not be left blank 00531 // so $this->translate() could always return a string 00532 if ( $from || $to ) { 00533 // more efficient than array_merge(), about 2.5 times. 00534 $this->mTables[$variant]->setPair( $from, $to ); 00535 } 00536 } 00537 } elseif ( $action == 'remove' ) { 00538 $this->mTables[$variant]->removeArray( $pair ); 00539 } 00540 } 00541 } 00542 00550 public function convertTitle( $title ) { 00551 $variant = $this->getPreferredVariant(); 00552 $index = $title->getNamespace(); 00553 if ( $index !== NS_MAIN ) { 00554 $text = $this->convertNamespace( $index ) . ':'; 00555 } else { 00556 $text = ''; 00557 } 00558 $text .= $this->translate( $title->getText(), $variant ); 00559 return $text; 00560 } 00561 00568 public function convertNamespace( $index ) { 00569 $variant = $this->getPreferredVariant(); 00570 if ( $index === NS_MAIN ) { 00571 return ''; 00572 } else { 00573 // First check if a message gives a converted name in the target variant. 00574 $nsConvMsg = wfMessage( 'conversion-ns' . $index )->inLanguage( $variant ); 00575 if ( $nsConvMsg->exists() ) { 00576 return $nsConvMsg->plain(); 00577 } 00578 // Then check if a message gives a converted name in content language 00579 // which needs extra translation to the target variant. 00580 $nsConvMsg = wfMessage( 'conversion-ns' . $index )->inContentLanguage(); 00581 if ( $nsConvMsg->exists() ) { 00582 return $this->translate( $nsConvMsg->plain(), $variant ); 00583 } 00584 // No message exists, retrieve it from the target variant's namespace names. 00585 $langObj = $this->mLangObj->factory( $variant ); 00586 return $langObj->getFormattedNsText( $index ); 00587 } 00588 } 00589 00604 public function convert( $text ) { 00605 $variant = $this->getPreferredVariant(); 00606 return $this->convertTo( $text, $variant ); 00607 } 00608 00616 public function convertTo( $text, $variant ) { 00617 global $wgDisableLangConversion; 00618 if ( $wgDisableLangConversion ) { 00619 return $text; 00620 } 00621 // Reset converter state for a new converter run. 00622 $this->mConvRuleTitle = false; 00623 return $this->recursiveConvertTopLevel( $text, $variant ); 00624 } 00625 00635 protected function recursiveConvertTopLevel( $text, $variant, $depth = 0 ) { 00636 $startPos = 0; 00637 $out = ''; 00638 $length = strlen( $text ); 00639 $shouldConvert = !$this->guessVariant( $text, $variant ); 00640 00641 while ( $startPos < $length ) { 00642 $pos = strpos( $text, '-{', $startPos ); 00643 00644 if ( $pos === false ) { 00645 // No more markup, append final segment 00646 $fragment = substr( $text, $startPos ); 00647 $out .= $shouldConvert? $this->autoConvert( $fragment, $variant ): $fragment; 00648 return $out; 00649 } 00650 00651 // Markup found 00652 // Append initial segment 00653 $fragment = substr( $text, $startPos, $pos - $startPos ); 00654 $out .= $shouldConvert? $this->autoConvert( $fragment, $variant ): $fragment; 00655 00656 // Advance position 00657 $startPos = $pos; 00658 00659 // Do recursive conversion 00660 $out .= $this->recursiveConvertRule( $text, $variant, $startPos, $depth + 1 ); 00661 } 00662 00663 return $out; 00664 } 00665 00677 protected function recursiveConvertRule( $text, $variant, &$startPos, $depth = 0 ) { 00678 // Quick sanity check (no function calls) 00679 if ( $text[$startPos] !== '-' || $text[$startPos + 1] !== '{' ) { 00680 throw new MWException( __METHOD__ . ': invalid input string' ); 00681 } 00682 00683 $startPos += 2; 00684 $inner = ''; 00685 $warningDone = false; 00686 $length = strlen( $text ); 00687 00688 while ( $startPos < $length ) { 00689 $m = false; 00690 preg_match( '/-\{|\}-/', $text, $m, PREG_OFFSET_CAPTURE, $startPos ); 00691 if ( !$m ) { 00692 // Unclosed rule 00693 break; 00694 } 00695 00696 $token = $m[0][0]; 00697 $pos = $m[0][1]; 00698 00699 // Markup found 00700 // Append initial segment 00701 $inner .= substr( $text, $startPos, $pos - $startPos ); 00702 00703 // Advance position 00704 $startPos = $pos; 00705 00706 switch ( $token ) { 00707 case '-{': 00708 // Check max depth 00709 if ( $depth >= $this->mMaxDepth ) { 00710 $inner .= '-{'; 00711 if ( !$warningDone ) { 00712 $inner .= '<span class="error">' . 00713 wfMessage( 'language-converter-depth-warning' ) 00714 ->numParams( $this->mMaxDepth )->inContentLanguage()->text() . 00715 '</span>'; 00716 $warningDone = true; 00717 } 00718 $startPos += 2; 00719 continue; 00720 } 00721 // Recursively parse another rule 00722 $inner .= $this->recursiveConvertRule( $text, $variant, $startPos, $depth + 1 ); 00723 break; 00724 case '}-': 00725 // Apply the rule 00726 $startPos += 2; 00727 $rule = new ConverterRule( $inner, $this ); 00728 $rule->parse( $variant ); 00729 $this->applyManualConv( $rule ); 00730 return $rule->getDisplay(); 00731 default: 00732 throw new MWException( __METHOD__ . ': invalid regex match' ); 00733 } 00734 } 00735 00736 // Unclosed rule 00737 if ( $startPos < $length ) { 00738 $inner .= substr( $text, $startPos ); 00739 } 00740 $startPos = $length; 00741 return '-{' . $this->autoConvert( $inner, $variant ); 00742 } 00743 00755 public function findVariantLink( &$link, &$nt, $ignoreOtherCond = false ) { 00756 # If the article has already existed, there is no need to 00757 # check it again, otherwise it may cause a fault. 00758 if ( is_object( $nt ) && $nt->exists() ) { 00759 return; 00760 } 00761 00762 global $wgDisableLangConversion, $wgDisableTitleConversion, $wgRequest, 00763 $wgUser; 00764 $isredir = $wgRequest->getText( 'redirect', 'yes' ); 00765 $action = $wgRequest->getText( 'action' ); 00766 $linkconvert = $wgRequest->getText( 'linkconvert', 'yes' ); 00767 $disableLinkConversion = $wgDisableLangConversion 00768 || $wgDisableTitleConversion; 00769 $linkBatch = new LinkBatch(); 00770 00771 $ns = NS_MAIN; 00772 00773 if ( $disableLinkConversion || 00774 ( !$ignoreOtherCond && 00775 ( $isredir == 'no' 00776 || $action == 'edit' 00777 || $action == 'submit' 00778 || $linkconvert == 'no' 00779 || $wgUser->getOption( 'noconvertlink' ) == 1 ) ) ) { 00780 return; 00781 } 00782 00783 if ( is_object( $nt ) ) { 00784 $ns = $nt->getNamespace(); 00785 } 00786 00787 $variants = $this->autoConvertToAllVariants( $link ); 00788 if ( !$variants ) { // give up 00789 return; 00790 } 00791 00792 $titles = array(); 00793 00794 foreach ( $variants as $v ) { 00795 if ( $v != $link ) { 00796 $varnt = Title::newFromText( $v, $ns ); 00797 if ( !is_null( $varnt ) ) { 00798 $linkBatch->addObj( $varnt ); 00799 $titles[] = $varnt; 00800 } 00801 } 00802 } 00803 00804 // fetch all variants in single query 00805 $linkBatch->execute(); 00806 00807 foreach ( $titles as $varnt ) { 00808 if ( $varnt->getArticleID() > 0 ) { 00809 $nt = $varnt; 00810 $link = $varnt->getText(); 00811 break; 00812 } 00813 } 00814 } 00815 00821 public function getExtraHashOptions() { 00822 $variant = $this->getPreferredVariant(); 00823 return '!' . $variant; 00824 } 00825 00836 public function guessVariant($text, $variant) { 00837 return false; 00838 } 00839 00847 function loadDefaultTables() { 00848 $name = get_class( $this ); 00849 throw new MWException( "Must implement loadDefaultTables() method in class $name" ); 00850 } 00851 00857 function loadTables( $fromCache = true ) { 00858 global $wgLangConvMemc; 00859 00860 if ( $this->mTablesLoaded ) { 00861 return; 00862 } 00863 00864 wfProfileIn( __METHOD__ ); 00865 $this->mTablesLoaded = true; 00866 $this->mTables = false; 00867 if ( $fromCache ) { 00868 wfProfileIn( __METHOD__ . '-cache' ); 00869 $this->mTables = $wgLangConvMemc->get( $this->mCacheKey ); 00870 wfProfileOut( __METHOD__ . '-cache' ); 00871 } 00872 if ( !$this->mTables 00873 || !array_key_exists( self::CACHE_VERSION_KEY, $this->mTables ) ) { 00874 wfProfileIn( __METHOD__ . '-recache' ); 00875 // not in cache, or we need a fresh reload. 00876 // We will first load the default tables 00877 // then update them using things in MediaWiki:Conversiontable/* 00878 $this->loadDefaultTables(); 00879 foreach ( $this->mVariants as $var ) { 00880 $cached = $this->parseCachedTable( $var ); 00881 $this->mTables[$var]->mergeArray( $cached ); 00882 } 00883 00884 $this->postLoadTables(); 00885 $this->mTables[self::CACHE_VERSION_KEY] = true; 00886 00887 $wgLangConvMemc->set( $this->mCacheKey, $this->mTables, 43200 ); 00888 wfProfileOut( __METHOD__ . '-recache' ); 00889 } 00890 wfProfileOut( __METHOD__ ); 00891 } 00892 00896 function postLoadTables() { } 00897 00903 function reloadTables() { 00904 if ( $this->mTables ) { 00905 unset( $this->mTables ); 00906 } 00907 $this->mTablesLoaded = false; 00908 $this->loadTables( false ); 00909 } 00910 00930 function parseCachedTable( $code, $subpage = '', $recursive = true ) { 00931 static $parsed = array(); 00932 00933 $key = 'Conversiontable/' . $code; 00934 if ( $subpage ) { 00935 $key .= '/' . $subpage; 00936 } 00937 if ( array_key_exists( $key, $parsed ) ) { 00938 return array(); 00939 } 00940 00941 $parsed[$key] = true; 00942 00943 if ( $subpage === '' ) { 00944 $txt = MessageCache::singleton()->get( 'conversiontable', true, $code ); 00945 } else { 00946 $txt = false; 00947 $title = Title::makeTitleSafe( NS_MEDIAWIKI, $key ); 00948 if ( $title && $title->exists() ) { 00949 $revision = Revision::newFromTitle( $title ); 00950 if ( $revision ) { 00951 if ( $revision->getContentModel() == CONTENT_MODEL_WIKITEXT ) { 00952 $txt = $revision->getContent( Revision::RAW )->getNativeData(); 00953 } 00954 00955 //@todo: in the future, use a specialized content model, perhaps based on json! 00956 } 00957 } 00958 } 00959 00960 # Nothing to parse if there's no text 00961 if ( $txt === false || $txt === null || $txt === '' ) { 00962 return array(); 00963 } 00964 00965 // get all subpage links of the form 00966 // [[MediaWiki:Conversiontable/zh-xx/...|...]] 00967 $linkhead = $this->mLangObj->getNsText( NS_MEDIAWIKI ) . 00968 ':Conversiontable'; 00969 $subs = StringUtils::explode( '[[', $txt ); 00970 $sublinks = array(); 00971 foreach ( $subs as $sub ) { 00972 $link = explode( ']]', $sub, 2 ); 00973 if ( count( $link ) != 2 ) { 00974 continue; 00975 } 00976 $b = explode( '|', $link[0], 2 ); 00977 $b = explode( '/', trim( $b[0] ), 3 ); 00978 if ( count( $b ) == 3 ) { 00979 $sublink = $b[2]; 00980 } else { 00981 $sublink = ''; 00982 } 00983 00984 if ( $b[0] == $linkhead && $b[1] == $code ) { 00985 $sublinks[] = $sublink; 00986 } 00987 } 00988 00989 // parse the mappings in this page 00990 $blocks = StringUtils::explode( '-{', $txt ); 00991 $ret = array(); 00992 $first = true; 00993 foreach ( $blocks as $block ) { 00994 if ( $first ) { 00995 // Skip the part before the first -{ 00996 $first = false; 00997 continue; 00998 } 00999 $mappings = explode( '}-', $block, 2 ); 01000 $stripped = str_replace( array( "'", '"', '*', '#' ), '', 01001 $mappings[0] ); 01002 $table = StringUtils::explode( ';', $stripped ); 01003 foreach ( $table as $t ) { 01004 $m = explode( '=>', $t, 3 ); 01005 if ( count( $m ) != 2 ) { 01006 continue; 01007 } 01008 // trim any trailling comments starting with '//' 01009 $tt = explode( '//', $m[1], 2 ); 01010 $ret[trim( $m[0] )] = trim( $tt[0] ); 01011 } 01012 } 01013 01014 // recursively parse the subpages 01015 if ( $recursive ) { 01016 foreach ( $sublinks as $link ) { 01017 $s = $this->parseCachedTable( $code, $link, $recursive ); 01018 $ret = array_merge( $ret, $s ); 01019 } 01020 } 01021 01022 if ( $this->mUcfirst ) { 01023 foreach ( $ret as $k => $v ) { 01024 $ret[$this->mLangObj->ucfirst( $k )] = $this->mLangObj->ucfirst( $v ); 01025 } 01026 } 01027 return $ret; 01028 } 01029 01038 public function markNoConversion( $text, $noParse = false ) { 01039 # don't mark if already marked 01040 if ( strpos( $text, '-{' ) || strpos( $text, '}-' ) ) { 01041 return $text; 01042 } 01043 01044 $ret = "-{R|$text}-"; 01045 return $ret; 01046 } 01047 01056 function convertCategoryKey( $key ) { 01057 return $key; 01058 } 01059 01076 function OnPageContentSaveComplete( $page, $user, $content, $summary, $isMinor, 01077 $isWatch, $section, $flags, $revision ) { 01078 $titleobj = $page->getTitle(); 01079 if ( $titleobj->getNamespace() == NS_MEDIAWIKI ) { 01080 $title = $titleobj->getDBkey(); 01081 $t = explode( '/', $title, 3 ); 01082 $c = count( $t ); 01083 if ( $c > 1 && $t[0] == 'Conversiontable' ) { 01084 if ( $this->validateVariant( $t[1] ) ) { 01085 $this->reloadTables(); 01086 } 01087 } 01088 } 01089 return true; 01090 } 01091 01100 public function armourMath( $text ) { 01101 // convert '-{' and '}-' to '-{' and '}-' to prevent 01102 // any unwanted markup appearing in the math image tag. 01103 $text = strtr( $text, array( '-{' => '-{', '}-' => '}-' ) ); 01104 return $text; 01105 } 01106 01110 function getVarSeparatorPattern() { 01111 if ( is_null( $this->mVarSeparatorPattern ) ) { 01112 // varsep_pattern for preg_split: 01113 // text should be splited by ";" only if a valid variant 01114 // name exist after the markup, for example: 01115 // -{zh-hans:<span style="font-size:120%;">xxx</span>;zh-hant:\ 01116 // <span style="font-size:120%;">yyy</span>;}- 01117 // we should split it as: 01118 // array( 01119 // [0] => 'zh-hans:<span style="font-size:120%;">xxx</span>' 01120 // [1] => 'zh-hant:<span style="font-size:120%;">yyy</span>' 01121 // [2] => '' 01122 // ) 01123 $pat = '/;\s*(?='; 01124 foreach ( $this->mVariants as $variant ) { 01125 // zh-hans:xxx;zh-hant:yyy 01126 $pat .= $variant . '\s*:|'; 01127 // xxx=>zh-hans:yyy; xxx=>zh-hant:zzz 01128 $pat .= '[^;]*?=>\s*' . $variant . '\s*:|'; 01129 } 01130 $pat .= '\s*$)/'; 01131 $this->mVarSeparatorPattern = $pat; 01132 } 01133 return $this->mVarSeparatorPattern; 01134 } 01135 } 01136 01142 class ConverterRule { 01143 public $mText; // original text in -{text}- 01144 public $mConverter; // LanguageConverter object 01145 public $mRuleDisplay = ''; 01146 public $mRuleTitle = false; 01147 public $mRules = '';// string : the text of the rules 01148 public $mRulesAction = 'none'; 01149 public $mFlags = array(); 01150 public $mVariantFlags = array(); 01151 public $mConvTable = array(); 01152 public $mBidtable = array();// array of the translation in each variant 01153 public $mUnidtable = array();// array of the translation in each variant 01154 01161 public function __construct( $text, $converter ) { 01162 $this->mText = $text; 01163 $this->mConverter = $converter; 01164 } 01165 01172 public function getTextInBidtable( $variants ) { 01173 $variants = (array)$variants; 01174 if ( !$variants ) { 01175 return false; 01176 } 01177 foreach ( $variants as $variant ) { 01178 if ( isset( $this->mBidtable[$variant] ) ) { 01179 return $this->mBidtable[$variant]; 01180 } 01181 } 01182 return false; 01183 } 01184 01189 function parseFlags() { 01190 $text = $this->mText; 01191 $flags = array(); 01192 $variantFlags = array(); 01193 01194 $sepPos = strpos( $text, '|' ); 01195 if ( $sepPos !== false ) { 01196 $validFlags = $this->mConverter->mFlags; 01197 $f = StringUtils::explode( ';', substr( $text, 0, $sepPos ) ); 01198 foreach ( $f as $ff ) { 01199 $ff = trim( $ff ); 01200 if ( isset( $validFlags[$ff] ) ) { 01201 $flags[$validFlags[$ff]] = true; 01202 } 01203 } 01204 $text = strval( substr( $text, $sepPos + 1 ) ); 01205 } 01206 01207 if ( !$flags ) { 01208 $flags['S'] = true; 01209 } elseif ( isset( $flags['R'] ) ) { 01210 $flags = array( 'R' => true );// remove other flags 01211 } elseif ( isset( $flags['N'] ) ) { 01212 $flags = array( 'N' => true );// remove other flags 01213 } elseif ( isset( $flags['-'] ) ) { 01214 $flags = array( '-' => true );// remove other flags 01215 } elseif ( count( $flags ) == 1 && isset( $flags['T'] ) ) { 01216 $flags['H'] = true; 01217 } elseif ( isset( $flags['H'] ) ) { 01218 // replace A flag, and remove other flags except T 01219 $temp = array( '+' => true, 'H' => true ); 01220 if ( isset( $flags['T'] ) ) { 01221 $temp['T'] = true; 01222 } 01223 if ( isset( $flags['D'] ) ) { 01224 $temp['D'] = true; 01225 } 01226 $flags = $temp; 01227 } else { 01228 if ( isset( $flags['A'] ) ) { 01229 $flags['+'] = true; 01230 $flags['S'] = true; 01231 } 01232 if ( isset( $flags['D'] ) ) { 01233 unset( $flags['S'] ); 01234 } 01235 // try to find flags like "zh-hans", "zh-hant" 01236 // allow syntaxes like "-{zh-hans;zh-hant|XXXX}-" 01237 $variantFlags = array_intersect( array_keys( $flags ), $this->mConverter->mVariants ); 01238 if ( $variantFlags ) { 01239 $variantFlags = array_flip( $variantFlags ); 01240 $flags = array(); 01241 } 01242 } 01243 $this->mVariantFlags = $variantFlags; 01244 $this->mRules = $text; 01245 $this->mFlags = $flags; 01246 } 01247 01252 function parseRules() { 01253 $rules = $this->mRules; 01254 $bidtable = array(); 01255 $unidtable = array(); 01256 $variants = $this->mConverter->mVariants; 01257 $varsep_pattern = $this->mConverter->getVarSeparatorPattern(); 01258 01259 $choice = preg_split( $varsep_pattern, $rules ); 01260 01261 foreach ( $choice as $c ) { 01262 $v = explode( ':', $c, 2 ); 01263 if ( count( $v ) != 2 ) { 01264 // syntax error, skip 01265 continue; 01266 } 01267 $to = trim( $v[1] ); 01268 $v = trim( $v[0] ); 01269 $u = explode( '=>', $v, 2 ); 01270 // if $to is empty, strtr() could return a wrong result 01271 if ( count( $u ) == 1 && $to && in_array( $v, $variants ) ) { 01272 $bidtable[$v] = $to; 01273 } elseif ( count( $u ) == 2 ) { 01274 $from = trim( $u[0] ); 01275 $v = trim( $u[1] ); 01276 if ( array_key_exists( $v, $unidtable ) 01277 && !is_array( $unidtable[$v] ) 01278 && $to 01279 && in_array( $v, $variants ) ) { 01280 $unidtable[$v] = array( $from => $to ); 01281 } elseif ( $to && in_array( $v, $variants ) ) { 01282 $unidtable[$v][$from] = $to; 01283 } 01284 } 01285 // syntax error, pass 01286 if ( !isset( $this->mConverter->mVariantNames[$v] ) ) { 01287 $bidtable = array(); 01288 $unidtable = array(); 01289 break; 01290 } 01291 } 01292 $this->mBidtable = $bidtable; 01293 $this->mUnidtable = $unidtable; 01294 } 01295 01301 function getRulesDesc() { 01302 $codesep = $this->mConverter->mDescCodeSep; 01303 $varsep = $this->mConverter->mDescVarSep; 01304 $text = ''; 01305 foreach ( $this->mBidtable as $k => $v ) { 01306 $text .= $this->mConverter->mVariantNames[$k] . "$codesep$v$varsep"; 01307 } 01308 foreach ( $this->mUnidtable as $k => $a ) { 01309 foreach ( $a as $from => $to ) { 01310 $text .= $from . '⇒' . $this->mConverter->mVariantNames[$k] . 01311 "$codesep$to$varsep"; 01312 } 01313 } 01314 return $text; 01315 } 01316 01325 function getRuleConvertedStr( $variant ) { 01326 $bidtable = $this->mBidtable; 01327 $unidtable = $this->mUnidtable; 01328 01329 if ( count( $bidtable ) + count( $unidtable ) == 0 ) { 01330 return $this->mRules; 01331 } else { 01332 // display current variant in bidirectional array 01333 $disp = $this->getTextInBidtable( $variant ); 01334 // or display current variant in fallbacks 01335 if ( !$disp ) { 01336 $disp = $this->getTextInBidtable( 01337 $this->mConverter->getVariantFallbacks( $variant ) ); 01338 } 01339 // or display current variant in unidirectional array 01340 if ( !$disp && array_key_exists( $variant, $unidtable ) ) { 01341 $disp = array_values( $unidtable[$variant] ); 01342 $disp = $disp[0]; 01343 } 01344 // or display frist text under disable manual convert 01345 if ( !$disp 01346 && $this->mConverter->mManualLevel[$variant] == 'disable' ) { 01347 if ( count( $bidtable ) > 0 ) { 01348 $disp = array_values( $bidtable ); 01349 $disp = $disp[0]; 01350 } else { 01351 $disp = array_values( $unidtable ); 01352 $disp = array_values( $disp[0] ); 01353 $disp = $disp[0]; 01354 } 01355 } 01356 return $disp; 01357 } 01358 } 01359 01364 function generateConvTable() { 01365 // Special case optimisation 01366 if ( !$this->mBidtable && !$this->mUnidtable ) { 01367 $this->mConvTable = array(); 01368 return; 01369 } 01370 01371 $bidtable = $this->mBidtable; 01372 $unidtable = $this->mUnidtable; 01373 $manLevel = $this->mConverter->mManualLevel; 01374 01375 $vmarked = array(); 01376 foreach ( $this->mConverter->mVariants as $v ) { 01377 /* for bidirectional array 01378 fill in the missing variants, if any, 01379 with fallbacks */ 01380 if ( !isset( $bidtable[$v] ) ) { 01381 $variantFallbacks = 01382 $this->mConverter->getVariantFallbacks( $v ); 01383 $vf = $this->getTextInBidtable( $variantFallbacks ); 01384 if ( $vf ) { 01385 $bidtable[$v] = $vf; 01386 } 01387 } 01388 01389 if ( isset( $bidtable[$v] ) ) { 01390 foreach ( $vmarked as $vo ) { 01391 // use syntax: -{A|zh:WordZh;zh-tw:WordTw}- 01392 // or -{H|zh:WordZh;zh-tw:WordTw}- 01393 // or -{-|zh:WordZh;zh-tw:WordTw}- 01394 // to introduce a custom mapping between 01395 // words WordZh and WordTw in the whole text 01396 if ( $manLevel[$v] == 'bidirectional' ) { 01397 $this->mConvTable[$v][$bidtable[$vo]] = $bidtable[$v]; 01398 } 01399 if ( $manLevel[$vo] == 'bidirectional' ) { 01400 $this->mConvTable[$vo][$bidtable[$v]] = $bidtable[$vo]; 01401 } 01402 } 01403 $vmarked[] = $v; 01404 } 01405 /* for unidirectional array fill to convert tables */ 01406 if ( ( $manLevel[$v] == 'bidirectional' || $manLevel[$v] == 'unidirectional' ) 01407 && isset( $unidtable[$v] ) ) 01408 { 01409 if ( isset( $this->mConvTable[$v] ) ) { 01410 $this->mConvTable[$v] = array_merge( $this->mConvTable[$v], $unidtable[$v] ); 01411 } else { 01412 $this->mConvTable[$v] = $unidtable[$v]; 01413 } 01414 } 01415 } 01416 } 01417 01422 public function parse( $variant = null ) { 01423 if ( !$variant ) { 01424 $variant = $this->mConverter->getPreferredVariant(); 01425 } 01426 01427 $this->parseFlags(); 01428 $flags = $this->mFlags; 01429 01430 // convert to specified variant 01431 // syntax: -{zh-hans;zh-hant[;...]|<text to convert>}- 01432 if ( $this->mVariantFlags ) { 01433 // check if current variant in flags 01434 if ( isset( $this->mVariantFlags[$variant] ) ) { 01435 // then convert <text to convert> to current language 01436 $this->mRules = $this->mConverter->autoConvert( $this->mRules, 01437 $variant ); 01438 } else { // if current variant no in flags, 01439 // then we check its fallback variants. 01440 $variantFallbacks = 01441 $this->mConverter->getVariantFallbacks( $variant ); 01442 if( is_array( $variantFallbacks ) ) { 01443 foreach ( $variantFallbacks as $variantFallback ) { 01444 // if current variant's fallback exist in flags 01445 if ( isset( $this->mVariantFlags[$variantFallback] ) ) { 01446 // then convert <text to convert> to fallback language 01447 $this->mRules = 01448 $this->mConverter->autoConvert( $this->mRules, 01449 $variantFallback ); 01450 break; 01451 } 01452 } 01453 } 01454 } 01455 $this->mFlags = $flags = array( 'R' => true ); 01456 } 01457 01458 if ( !isset( $flags['R'] ) && !isset( $flags['N'] ) ) { 01459 // decode => HTML entities modified by Sanitizer::removeHTMLtags 01460 $this->mRules = str_replace( '=>', '=>', $this->mRules ); 01461 $this->parseRules(); 01462 } 01463 $rules = $this->mRules; 01464 01465 if ( !$this->mBidtable && !$this->mUnidtable ) { 01466 if ( isset( $flags['+'] ) || isset( $flags['-'] ) ) { 01467 // fill all variants if text in -{A/H/-|text} without rules 01468 foreach ( $this->mConverter->mVariants as $v ) { 01469 $this->mBidtable[$v] = $rules; 01470 } 01471 } elseif ( !isset( $flags['N'] ) && !isset( $flags['T'] ) ) { 01472 $this->mFlags = $flags = array( 'R' => true ); 01473 } 01474 } 01475 01476 $this->mRuleDisplay = false; 01477 foreach ( $flags as $flag => $unused ) { 01478 switch ( $flag ) { 01479 case 'R': 01480 // if we don't do content convert, still strip the -{}- tags 01481 $this->mRuleDisplay = $rules; 01482 break; 01483 case 'N': 01484 // process N flag: output current variant name 01485 $ruleVar = trim( $rules ); 01486 if ( isset( $this->mConverter->mVariantNames[$ruleVar] ) ) { 01487 $this->mRuleDisplay = $this->mConverter->mVariantNames[$ruleVar]; 01488 } else { 01489 $this->mRuleDisplay = ''; 01490 } 01491 break; 01492 case 'D': 01493 // process D flag: output rules description 01494 $this->mRuleDisplay = $this->getRulesDesc(); 01495 break; 01496 case 'H': 01497 // process H,- flag or T only: output nothing 01498 $this->mRuleDisplay = ''; 01499 break; 01500 case '-': 01501 $this->mRulesAction = 'remove'; 01502 $this->mRuleDisplay = ''; 01503 break; 01504 case '+': 01505 $this->mRulesAction = 'add'; 01506 $this->mRuleDisplay = ''; 01507 break; 01508 case 'S': 01509 $this->mRuleDisplay = $this->getRuleConvertedStr( $variant ); 01510 break; 01511 case 'T': 01512 $this->mRuleTitle = $this->getRuleConvertedStr( $variant ); 01513 $this->mRuleDisplay = ''; 01514 break; 01515 default: 01516 // ignore unknown flags (but see error case below) 01517 } 01518 } 01519 if ( $this->mRuleDisplay === false ) { 01520 $this->mRuleDisplay = '<span class="error">' 01521 . wfMessage( 'converter-manual-rule-error' )->inContentLanguage()->escaped() 01522 . '</span>'; 01523 } 01524 01525 $this->generateConvTable(); 01526 } 01527 01531 public function hasRules() { 01532 // TODO: 01533 } 01534 01539 public function getDisplay() { 01540 return $this->mRuleDisplay; 01541 } 01542 01547 public function getTitle() { 01548 return $this->mRuleTitle; 01549 } 01550 01555 public function getRulesAction() { 01556 return $this->mRulesAction; 01557 } 01558 01564 public function getConvTable() { 01565 return $this->mConvTable; 01566 } 01567 01572 public function getRules() { 01573 return $this->mRules; 01574 } 01575 01580 public function getFlags() { 01581 return $this->mFlags; 01582 } 01583 }