MediaWiki
REL1_20
|
00001 <?php 00031 class LanguageConverter { 00032 00038 static public $languagesWithVariants = array( 00039 'gan', 00040 'iu', 00041 'kk', 00042 'ku', 00043 'shi', 00044 'sr', 00045 'tg', 00046 'uz', 00047 'zh', 00048 ); 00049 00050 var $mMainLanguageCode; 00051 var $mVariants, $mVariantFallbacks, $mVariantNames; 00052 var $mTablesLoaded = false; 00053 var $mTables; 00054 // 'bidirectional' 'unidirectional' 'disable' for each variant 00055 var $mManualLevel; 00056 00060 var $mCacheKey; 00061 00062 var $mLangObj; 00063 var $mFlags; 00064 var $mDescCodeSep = ':', $mDescVarSep = ';'; 00065 var $mUcfirst = false; 00066 var $mConvRuleTitle = false; 00067 var $mURLVariant; 00068 var $mUserVariant; 00069 var $mHeaderVariant; 00070 var $mMaxDepth = 10; 00071 var $mVarSeparatorPattern; 00072 00073 const CACHE_VERSION_KEY = 'VERSION 6'; 00074 00085 public function __construct( $langobj, $maincode, $variants = array(), 00086 $variantfallbacks = array(), $flags = array(), 00087 $manualLevel = array() ) { 00088 global $wgDisabledVariants; 00089 $this->mLangObj = $langobj; 00090 $this->mMainLanguageCode = $maincode; 00091 $this->mVariants = array_diff( $variants, $wgDisabledVariants ); 00092 $this->mVariantFallbacks = $variantfallbacks; 00093 $this->mVariantNames = Language::fetchLanguageNames(); 00094 $this->mCacheKey = wfMemcKey( 'conversiontables', $maincode ); 00095 $defaultflags = array( 00096 // 'S' show converted text 00097 // '+' add rules for alltext 00098 // 'E' the gave flags is error 00099 // these flags above are reserved for program 00100 'A' => 'A', // add rule for convert code (all text convert) 00101 'T' => 'T', // title convert 00102 'R' => 'R', // raw content 00103 'D' => 'D', // convert description (subclass implement) 00104 '-' => '-', // remove convert (not implement) 00105 'H' => 'H', // add rule for convert code 00106 // (but no display in placed code) 00107 'N' => 'N' // current variant name 00108 ); 00109 $this->mFlags = array_merge( $defaultflags, $flags ); 00110 foreach ( $this->mVariants as $v ) { 00111 if ( array_key_exists( $v, $manualLevel ) ) { 00112 $this->mManualLevel[$v] = $manualLevel[$v]; 00113 } else { 00114 $this->mManualLevel[$v] = 'bidirectional'; 00115 } 00116 $this->mFlags[$v] = $v; 00117 } 00118 } 00119 00126 public function getVariants() { 00127 return $this->mVariants; 00128 } 00129 00141 public function getVariantFallbacks( $variant ) { 00142 if ( isset( $this->mVariantFallbacks[$variant] ) ) { 00143 return $this->mVariantFallbacks[$variant]; 00144 } 00145 return $this->mMainLanguageCode; 00146 } 00147 00152 public function getConvRuleTitle() { 00153 return $this->mConvRuleTitle; 00154 } 00155 00160 public function getPreferredVariant() { 00161 global $wgDefaultLanguageVariant, $wgUser; 00162 00163 $req = $this->getURLVariant(); 00164 00165 if ( $wgUser->isLoggedIn() && !$req ) { 00166 $req = $this->getUserVariant(); 00167 } elseif ( !$req ) { 00168 $req = $this->getHeaderVariant(); 00169 } 00170 00171 if ( $wgDefaultLanguageVariant && !$req ) { 00172 $req = $this->validateVariant( $wgDefaultLanguageVariant ); 00173 } 00174 00175 // This function, unlike the other get*Variant functions, is 00176 // not memoized (i.e. there return value is not cached) since 00177 // new information might appear during processing after this 00178 // is first called. 00179 if ( $this->validateVariant( $req ) ) { 00180 return $req; 00181 } 00182 return $this->mMainLanguageCode; 00183 } 00184 00190 public function getDefaultVariant() { 00191 global $wgDefaultLanguageVariant; 00192 00193 $req = $this->getURLVariant(); 00194 00195 if ( $wgDefaultLanguageVariant && !$req ) { 00196 $req = $this->validateVariant( $wgDefaultLanguageVariant ); 00197 } 00198 00199 if ( $req ) { 00200 return $req; 00201 } 00202 return $this->mMainLanguageCode; 00203 } 00204 00210 public function validateVariant( $variant = null ) { 00211 if ( $variant !== null && in_array( $variant, $this->mVariants ) ) { 00212 return $variant; 00213 } 00214 return null; 00215 } 00216 00222 public function getURLVariant() { 00223 global $wgRequest; 00224 00225 if ( $this->mURLVariant ) { 00226 return $this->mURLVariant; 00227 } 00228 00229 // see if the preference is set in the request 00230 $ret = $wgRequest->getText( 'variant' ); 00231 00232 if ( !$ret ) { 00233 $ret = $wgRequest->getVal( 'uselang' ); 00234 } 00235 00236 return $this->mURLVariant = $this->validateVariant( $ret ); 00237 } 00238 00244 protected function getUserVariant() { 00245 global $wgUser; 00246 00247 // memoizing this function wreaks havoc on parserTest.php 00248 /* 00249 if ( $this->mUserVariant ) { 00250 return $this->mUserVariant; 00251 } 00252 */ 00253 00254 // Get language variant preference from logged in users 00255 // Don't call this on stub objects because that causes infinite 00256 // recursion during initialisation 00257 if ( $wgUser->isLoggedIn() ) { 00258 $ret = $wgUser->getOption( 'variant' ); 00259 } else { 00260 // figure out user lang without constructing wgLang to avoid 00261 // infinite recursion 00262 $ret = $wgUser->getOption( 'language' ); 00263 } 00264 00265 return $this->mUserVariant = $this->validateVariant( $ret ); 00266 } 00267 00273 protected function getHeaderVariant() { 00274 global $wgRequest; 00275 00276 if ( $this->mHeaderVariant ) { 00277 return $this->mHeaderVariant; 00278 } 00279 00280 // see if some supported language variant is set in the 00281 // HTTP header. 00282 $languages = array_keys( $wgRequest->getAcceptLang() ); 00283 if ( empty( $languages ) ) { 00284 return null; 00285 } 00286 00287 $fallbackLanguages = array(); 00288 foreach ( $languages as $language ) { 00289 $this->mHeaderVariant = $this->validateVariant( $language ); 00290 if ( $this->mHeaderVariant ) { 00291 break; 00292 } 00293 00294 // To see if there are fallbacks of current language. 00295 // We record these fallback variants, and process 00296 // them later. 00297 $fallbacks = $this->getVariantFallbacks( $language ); 00298 if ( is_string( $fallbacks ) && $fallbacks !== $this->mMainLanguageCode ) { 00299 $fallbackLanguages[] = $fallbacks; 00300 } elseif ( is_array( $fallbacks ) ) { 00301 $fallbackLanguages = 00302 array_merge( $fallbackLanguages, $fallbacks ); 00303 } 00304 } 00305 00306 if ( !$this->mHeaderVariant ) { 00307 // process fallback languages now 00308 $fallback_languages = array_unique( $fallbackLanguages ); 00309 foreach ( $fallback_languages as $language ) { 00310 $this->mHeaderVariant = $this->validateVariant( $language ); 00311 if ( $this->mHeaderVariant ) { 00312 break; 00313 } 00314 } 00315 } 00316 00317 return $this->mHeaderVariant; 00318 } 00319 00330 public function autoConvert( $text, $toVariant = false ) { 00331 wfProfileIn( __METHOD__ ); 00332 00333 $this->loadTables(); 00334 00335 if ( !$toVariant ) { 00336 $toVariant = $this->getPreferredVariant(); 00337 if ( !$toVariant ) { 00338 wfProfileOut( __METHOD__ ); 00339 return $text; 00340 } 00341 } 00342 00343 if( $this->guessVariant( $text, $toVariant ) ) { 00344 wfProfileOut( __METHOD__ ); 00345 return $text; 00346 } 00347 00348 /* we convert everything except: 00349 1. HTML markups (anything between < and >) 00350 2. HTML entities 00351 3. placeholders created by the parser 00352 */ 00353 global $wgParser; 00354 if ( isset( $wgParser ) && $wgParser->UniqPrefix() != '' ) { 00355 $marker = '|' . $wgParser->UniqPrefix() . '[\-a-zA-Z0-9]+'; 00356 } else { 00357 $marker = ''; 00358 } 00359 00360 // this one is needed when the text is inside an HTML markup 00361 $htmlfix = '|<[^>]+$|^[^<>]*>'; 00362 00363 // disable convert to variants between <code></code> tags 00364 $codefix = '<code>.+?<\/code>|'; 00365 // disable convertsion of <script type="text/javascript"> ... </script> 00366 $scriptfix = '<script.*?>.*?<\/script>|'; 00367 // disable conversion of <pre xxxx> ... </pre> 00368 $prefix = '<pre.*?>.*?<\/pre>|'; 00369 00370 $reg = '/' . $codefix . $scriptfix . $prefix . 00371 '<[^>]+>|&[a-zA-Z#][a-z0-9]+;' . $marker . $htmlfix . '/s'; 00372 $startPos = 0; 00373 $sourceBlob = ''; 00374 $literalBlob = ''; 00375 00376 // Guard against delimiter nulls in the input 00377 $text = str_replace( "\000", '', $text ); 00378 00379 $markupMatches = null; 00380 $elementMatches = null; 00381 while ( $startPos < strlen( $text ) ) { 00382 if ( preg_match( $reg, $text, $markupMatches, PREG_OFFSET_CAPTURE, $startPos ) ) { 00383 $elementPos = $markupMatches[0][1]; 00384 $element = $markupMatches[0][0]; 00385 } else { 00386 $elementPos = strlen( $text ); 00387 $element = ''; 00388 } 00389 00390 // Queue the part before the markup for translation in a batch 00391 $sourceBlob .= substr( $text, $startPos, $elementPos - $startPos ) . "\000"; 00392 00393 // Advance to the next position 00394 $startPos = $elementPos + strlen( $element ); 00395 00396 // Translate any alt or title attributes inside the matched element 00397 if ( $element !== '' && preg_match( '/^(<[^>\s]*)\s([^>]*)(.*)$/', $element, 00398 $elementMatches ) ) 00399 { 00400 $attrs = Sanitizer::decodeTagAttributes( $elementMatches[2] ); 00401 $changed = false; 00402 foreach ( array( 'title', 'alt' ) as $attrName ) { 00403 if ( !isset( $attrs[$attrName] ) ) { 00404 continue; 00405 } 00406 $attr = $attrs[$attrName]; 00407 // Don't convert URLs 00408 if ( !strpos( $attr, '://' ) ) { 00409 $attr = $this->translate( $attr, $toVariant ); 00410 } 00411 00412 // Remove HTML tags to avoid disrupting the layout 00413 $attr = preg_replace( '/<[^>]+>/', '', $attr ); 00414 if ( $attr !== $attrs[$attrName] ) { 00415 $attrs[$attrName] = $attr; 00416 $changed = true; 00417 } 00418 } 00419 if ( $changed ) { 00420 $element = $elementMatches[1] . Html::expandAttributes( $attrs ) . 00421 $elementMatches[3]; 00422 } 00423 } 00424 $literalBlob .= $element . "\000"; 00425 } 00426 00427 // Do the main translation batch 00428 $translatedBlob = $this->translate( $sourceBlob, $toVariant ); 00429 00430 // Put the output back together 00431 $translatedIter = StringUtils::explode( "\000", $translatedBlob ); 00432 $literalIter = StringUtils::explode( "\000", $literalBlob ); 00433 $output = ''; 00434 while ( $translatedIter->valid() && $literalIter->valid() ) { 00435 $output .= $translatedIter->current(); 00436 $output .= $literalIter->current(); 00437 $translatedIter->next(); 00438 $literalIter->next(); 00439 } 00440 00441 wfProfileOut( __METHOD__ ); 00442 return $output; 00443 } 00444 00454 public function translate( $text, $variant ) { 00455 wfProfileIn( __METHOD__ ); 00456 // If $text is empty or only includes spaces, do nothing 00457 // Otherwise translate it 00458 if ( trim( $text ) ) { 00459 $this->loadTables(); 00460 $text = $this->mTables[$variant]->replace( $text ); 00461 } 00462 wfProfileOut( __METHOD__ ); 00463 return $text; 00464 } 00465 00472 public function autoConvertToAllVariants( $text ) { 00473 wfProfileIn( __METHOD__ ); 00474 $this->loadTables(); 00475 00476 $ret = array(); 00477 foreach ( $this->mVariants as $variant ) { 00478 $ret[$variant] = $this->translate( $text, $variant ); 00479 } 00480 00481 wfProfileOut( __METHOD__ ); 00482 return $ret; 00483 } 00484 00496 public function convertLinkToAllVariants( $text ) { 00497 return $this->autoConvertToAllVariants( $text ); 00498 } 00499 00505 protected function applyManualConv( $convRule ) { 00506 // Use syntax -{T|zh-cn:TitleCN; zh-tw:TitleTw}- to custom 00507 // title conversion. 00508 // Bug 24072: $mConvRuleTitle was overwritten by other manual 00509 // rule(s) not for title, this breaks the title conversion. 00510 $newConvRuleTitle = $convRule->getTitle(); 00511 if ( $newConvRuleTitle ) { 00512 // So I add an empty check for getTitle() 00513 $this->mConvRuleTitle = $newConvRuleTitle; 00514 } 00515 00516 // merge/remove manual conversion rules to/from global table 00517 $convTable = $convRule->getConvTable(); 00518 $action = $convRule->getRulesAction(); 00519 foreach ( $convTable as $variant => $pair ) { 00520 if ( !$this->validateVariant( $variant ) ) { 00521 continue; 00522 } 00523 00524 if ( $action == 'add' ) { 00525 foreach ( $pair as $from => $to ) { 00526 // to ensure that $from and $to not be left blank 00527 // so $this->translate() could always return a string 00528 if ( $from || $to ) { 00529 // more efficient than array_merge(), about 2.5 times. 00530 $this->mTables[$variant]->setPair( $from, $to ); 00531 } 00532 } 00533 } elseif ( $action == 'remove' ) { 00534 $this->mTables[$variant]->removeArray( $pair ); 00535 } 00536 } 00537 } 00538 00546 public function convertTitle( $title ) { 00547 $variant = $this->getPreferredVariant(); 00548 $index = $title->getNamespace(); 00549 if ( $index === NS_MAIN ) { 00550 $text = ''; 00551 } else { 00552 // first let's check if a message has given us a converted name 00553 $nsConvMsg = wfMessage( 'conversion-ns' . $index )->inContentLanguage(); 00554 if ( $nsConvMsg->exists() ) { 00555 $text = $nsConvMsg->plain(); 00556 } else { 00557 // the message does not exist, try retrieve it from the current 00558 // variant's namespace names. 00559 $langObj = $this->mLangObj->factory( $variant ); 00560 $text = $langObj->getFormattedNsText( $index ); 00561 } 00562 $text .= ':'; 00563 } 00564 $text .= $title->getText(); 00565 $text = $this->translate( $text, $variant ); 00566 return $text; 00567 } 00568 00583 public function convert( $text ) { 00584 $variant = $this->getPreferredVariant(); 00585 return $this->convertTo( $text, $variant ); 00586 } 00587 00595 public function convertTo( $text, $variant ) { 00596 global $wgDisableLangConversion; 00597 if ( $wgDisableLangConversion ) { 00598 return $text; 00599 } 00600 return $this->recursiveConvertTopLevel( $text, $variant ); 00601 } 00602 00612 protected function recursiveConvertTopLevel( $text, $variant, $depth = 0 ) { 00613 $startPos = 0; 00614 $out = ''; 00615 $length = strlen( $text ); 00616 $shouldConvert = !$this->guessVariant( $text, $variant ); 00617 00618 while ( $startPos < $length ) { 00619 $pos = strpos( $text, '-{', $startPos ); 00620 00621 if ( $pos === false ) { 00622 // No more markup, append final segment 00623 $fragment = substr( $text, $startPos ); 00624 $out .= $shouldConvert? $this->autoConvert( $fragment, $variant ): $fragment; 00625 return $out; 00626 } 00627 00628 // Markup found 00629 // Append initial segment 00630 $fragment = substr( $text, $startPos, $pos - $startPos ); 00631 $out .= $shouldConvert? $this->autoConvert( $fragment, $variant ): $fragment; 00632 00633 // Advance position 00634 $startPos = $pos; 00635 00636 // Do recursive conversion 00637 $out .= $this->recursiveConvertRule( $text, $variant, $startPos, $depth + 1 ); 00638 } 00639 00640 return $out; 00641 } 00642 00654 protected function recursiveConvertRule( $text, $variant, &$startPos, $depth = 0 ) { 00655 // Quick sanity check (no function calls) 00656 if ( $text[$startPos] !== '-' || $text[$startPos + 1] !== '{' ) { 00657 throw new MWException( __METHOD__ . ': invalid input string' ); 00658 } 00659 00660 $startPos += 2; 00661 $inner = ''; 00662 $warningDone = false; 00663 $length = strlen( $text ); 00664 00665 while ( $startPos < $length ) { 00666 $m = false; 00667 preg_match( '/-\{|\}-/', $text, $m, PREG_OFFSET_CAPTURE, $startPos ); 00668 if ( !$m ) { 00669 // Unclosed rule 00670 break; 00671 } 00672 00673 $token = $m[0][0]; 00674 $pos = $m[0][1]; 00675 00676 // Markup found 00677 // Append initial segment 00678 $inner .= substr( $text, $startPos, $pos - $startPos ); 00679 00680 // Advance position 00681 $startPos = $pos; 00682 00683 switch ( $token ) { 00684 case '-{': 00685 // Check max depth 00686 if ( $depth >= $this->mMaxDepth ) { 00687 $inner .= '-{'; 00688 if ( !$warningDone ) { 00689 $inner .= '<span class="error">' . 00690 wfMessage( 'language-converter-depth-warning' ) 00691 ->numParams( $this->mMaxDepth )->inContentLanguage()->text() . 00692 '</span>'; 00693 $warningDone = true; 00694 } 00695 $startPos += 2; 00696 continue; 00697 } 00698 // Recursively parse another rule 00699 $inner .= $this->recursiveConvertRule( $text, $variant, $startPos, $depth + 1 ); 00700 break; 00701 case '}-': 00702 // Apply the rule 00703 $startPos += 2; 00704 $rule = new ConverterRule( $inner, $this ); 00705 $rule->parse( $variant ); 00706 $this->applyManualConv( $rule ); 00707 return $rule->getDisplay(); 00708 default: 00709 throw new MWException( __METHOD__ . ': invalid regex match' ); 00710 } 00711 } 00712 00713 // Unclosed rule 00714 if ( $startPos < $length ) { 00715 $inner .= substr( $text, $startPos ); 00716 } 00717 $startPos = $length; 00718 return '-{' . $this->autoConvert( $inner, $variant ); 00719 } 00720 00732 public function findVariantLink( &$link, &$nt, $ignoreOtherCond = false ) { 00733 # If the article has already existed, there is no need to 00734 # check it again, otherwise it may cause a fault. 00735 if ( is_object( $nt ) && $nt->exists() ) { 00736 return; 00737 } 00738 00739 global $wgDisableLangConversion, $wgDisableTitleConversion, $wgRequest, 00740 $wgUser; 00741 $isredir = $wgRequest->getText( 'redirect', 'yes' ); 00742 $action = $wgRequest->getText( 'action' ); 00743 $linkconvert = $wgRequest->getText( 'linkconvert', 'yes' ); 00744 $disableLinkConversion = $wgDisableLangConversion 00745 || $wgDisableTitleConversion; 00746 $linkBatch = new LinkBatch(); 00747 00748 $ns = NS_MAIN; 00749 00750 if ( $disableLinkConversion || 00751 ( !$ignoreOtherCond && 00752 ( $isredir == 'no' 00753 || $action == 'edit' 00754 || $action == 'submit' 00755 || $linkconvert == 'no' 00756 || $wgUser->getOption( 'noconvertlink' ) == 1 ) ) ) { 00757 return; 00758 } 00759 00760 if ( is_object( $nt ) ) { 00761 $ns = $nt->getNamespace(); 00762 } 00763 00764 $variants = $this->autoConvertToAllVariants( $link ); 00765 if ( !$variants ) { // give up 00766 return; 00767 } 00768 00769 $titles = array(); 00770 00771 foreach ( $variants as $v ) { 00772 if ( $v != $link ) { 00773 $varnt = Title::newFromText( $v, $ns ); 00774 if ( !is_null( $varnt ) ) { 00775 $linkBatch->addObj( $varnt ); 00776 $titles[] = $varnt; 00777 } 00778 } 00779 } 00780 00781 // fetch all variants in single query 00782 $linkBatch->execute(); 00783 00784 foreach ( $titles as $varnt ) { 00785 if ( $varnt->getArticleID() > 0 ) { 00786 $nt = $varnt; 00787 $link = $varnt->getText(); 00788 break; 00789 } 00790 } 00791 } 00792 00798 public function getExtraHashOptions() { 00799 $variant = $this->getPreferredVariant(); 00800 return '!' . $variant; 00801 } 00802 00813 public function guessVariant($text, $variant) { 00814 return false; 00815 } 00816 00824 function loadDefaultTables() { 00825 $name = get_class( $this ); 00826 throw new MWException( "Must implement loadDefaultTables() method in class $name" ); 00827 } 00828 00834 function loadTables( $fromCache = true ) { 00835 global $wgLangConvMemc; 00836 00837 if ( $this->mTablesLoaded ) { 00838 return; 00839 } 00840 00841 wfProfileIn( __METHOD__ ); 00842 $this->mTablesLoaded = true; 00843 $this->mTables = false; 00844 if ( $fromCache ) { 00845 wfProfileIn( __METHOD__ . '-cache' ); 00846 $this->mTables = $wgLangConvMemc->get( $this->mCacheKey ); 00847 wfProfileOut( __METHOD__ . '-cache' ); 00848 } 00849 if ( !$this->mTables 00850 || !array_key_exists( self::CACHE_VERSION_KEY, $this->mTables ) ) { 00851 wfProfileIn( __METHOD__ . '-recache' ); 00852 // not in cache, or we need a fresh reload. 00853 // We will first load the default tables 00854 // then update them using things in MediaWiki:Conversiontable/* 00855 $this->loadDefaultTables(); 00856 foreach ( $this->mVariants as $var ) { 00857 $cached = $this->parseCachedTable( $var ); 00858 $this->mTables[$var]->mergeArray( $cached ); 00859 } 00860 00861 $this->postLoadTables(); 00862 $this->mTables[self::CACHE_VERSION_KEY] = true; 00863 00864 $wgLangConvMemc->set( $this->mCacheKey, $this->mTables, 43200 ); 00865 wfProfileOut( __METHOD__ . '-recache' ); 00866 } 00867 wfProfileOut( __METHOD__ ); 00868 } 00869 00873 function postLoadTables() { } 00874 00880 function reloadTables() { 00881 if ( $this->mTables ) { 00882 unset( $this->mTables ); 00883 } 00884 $this->mTablesLoaded = false; 00885 $this->loadTables( false ); 00886 } 00887 00907 function parseCachedTable( $code, $subpage = '', $recursive = true ) { 00908 static $parsed = array(); 00909 00910 $key = 'Conversiontable/' . $code; 00911 if ( $subpage ) { 00912 $key .= '/' . $subpage; 00913 } 00914 if ( array_key_exists( $key, $parsed ) ) { 00915 return array(); 00916 } 00917 00918 $parsed[$key] = true; 00919 00920 if ( $subpage === '' ) { 00921 $txt = MessageCache::singleton()->get( 'conversiontable', true, $code ); 00922 } else { 00923 $txt = false; 00924 $title = Title::makeTitleSafe( NS_MEDIAWIKI, $key ); 00925 if ( $title && $title->exists() ) { 00926 $revision = Revision::newFromTitle( $title ); 00927 if ( $revision ) { 00928 $txt = $revision->getRawText(); 00929 } 00930 } 00931 } 00932 00933 # Nothing to parse if there's no text 00934 if ( $txt === false || $txt === null || $txt === '' ) { 00935 return array(); 00936 } 00937 00938 // get all subpage links of the form 00939 // [[MediaWiki:Conversiontable/zh-xx/...|...]] 00940 $linkhead = $this->mLangObj->getNsText( NS_MEDIAWIKI ) . 00941 ':Conversiontable'; 00942 $subs = StringUtils::explode( '[[', $txt ); 00943 $sublinks = array(); 00944 foreach ( $subs as $sub ) { 00945 $link = explode( ']]', $sub, 2 ); 00946 if ( count( $link ) != 2 ) { 00947 continue; 00948 } 00949 $b = explode( '|', $link[0], 2 ); 00950 $b = explode( '/', trim( $b[0] ), 3 ); 00951 if ( count( $b ) == 3 ) { 00952 $sublink = $b[2]; 00953 } else { 00954 $sublink = ''; 00955 } 00956 00957 if ( $b[0] == $linkhead && $b[1] == $code ) { 00958 $sublinks[] = $sublink; 00959 } 00960 } 00961 00962 // parse the mappings in this page 00963 $blocks = StringUtils::explode( '-{', $txt ); 00964 $ret = array(); 00965 $first = true; 00966 foreach ( $blocks as $block ) { 00967 if ( $first ) { 00968 // Skip the part before the first -{ 00969 $first = false; 00970 continue; 00971 } 00972 $mappings = explode( '}-', $block, 2 ); 00973 $stripped = str_replace( array( "'", '"', '*', '#' ), '', 00974 $mappings[0] ); 00975 $table = StringUtils::explode( ';', $stripped ); 00976 foreach ( $table as $t ) { 00977 $m = explode( '=>', $t, 3 ); 00978 if ( count( $m ) != 2 ) { 00979 continue; 00980 } 00981 // trim any trailling comments starting with '//' 00982 $tt = explode( '//', $m[1], 2 ); 00983 $ret[trim( $m[0] )] = trim( $tt[0] ); 00984 } 00985 } 00986 00987 // recursively parse the subpages 00988 if ( $recursive ) { 00989 foreach ( $sublinks as $link ) { 00990 $s = $this->parseCachedTable( $code, $link, $recursive ); 00991 $ret = array_merge( $ret, $s ); 00992 } 00993 } 00994 00995 if ( $this->mUcfirst ) { 00996 foreach ( $ret as $k => $v ) { 00997 $ret[$this->mLangObj->ucfirst( $k )] = $this->mLangObj->ucfirst( $v ); 00998 } 00999 } 01000 return $ret; 01001 } 01002 01011 public function markNoConversion( $text, $noParse = false ) { 01012 # don't mark if already marked 01013 if ( strpos( $text, '-{' ) || strpos( $text, '}-' ) ) { 01014 return $text; 01015 } 01016 01017 $ret = "-{R|$text}-"; 01018 return $ret; 01019 } 01020 01029 function convertCategoryKey( $key ) { 01030 return $key; 01031 } 01032 01049 function OnArticleSaveComplete( $article, $user, $text, $summary, $isMinor, 01050 $isWatch, $section, $flags, $revision ) { 01051 $titleobj = $article->getTitle(); 01052 if ( $titleobj->getNamespace() == NS_MEDIAWIKI ) { 01053 $title = $titleobj->getDBkey(); 01054 $t = explode( '/', $title, 3 ); 01055 $c = count( $t ); 01056 if ( $c > 1 && $t[0] == 'Conversiontable' ) { 01057 if ( $this->validateVariant( $t[1] ) ) { 01058 $this->reloadTables(); 01059 } 01060 } 01061 } 01062 return true; 01063 } 01064 01073 public function armourMath( $text ) { 01074 // convert '-{' and '}-' to '-{' and '}-' to prevent 01075 // any unwanted markup appearing in the math image tag. 01076 $text = strtr( $text, array( '-{' => '-{', '}-' => '}-' ) ); 01077 return $text; 01078 } 01079 01083 function getVarSeparatorPattern() { 01084 if ( is_null( $this->mVarSeparatorPattern ) ) { 01085 // varsep_pattern for preg_split: 01086 // text should be splited by ";" only if a valid variant 01087 // name exist after the markup, for example: 01088 // -{zh-hans:<span style="font-size:120%;">xxx</span>;zh-hant:\ 01089 // <span style="font-size:120%;">yyy</span>;}- 01090 // we should split it as: 01091 // array( 01092 // [0] => 'zh-hans:<span style="font-size:120%;">xxx</span>' 01093 // [1] => 'zh-hant:<span style="font-size:120%;">yyy</span>' 01094 // [2] => '' 01095 // ) 01096 $pat = '/;\s*(?='; 01097 foreach ( $this->mVariants as $variant ) { 01098 // zh-hans:xxx;zh-hant:yyy 01099 $pat .= $variant . '\s*:|'; 01100 // xxx=>zh-hans:yyy; xxx=>zh-hant:zzz 01101 $pat .= '[^;]*?=>\s*' . $variant . '\s*:|'; 01102 } 01103 $pat .= '\s*$)/'; 01104 $this->mVarSeparatorPattern = $pat; 01105 } 01106 return $this->mVarSeparatorPattern; 01107 } 01108 } 01109 01115 class ConverterRule { 01116 var $mText; // original text in -{text}- 01117 var $mConverter; // LanguageConverter object 01118 var $mRuleDisplay = ''; 01119 var $mRuleTitle = false; 01120 var $mRules = '';// string : the text of the rules 01121 var $mRulesAction = 'none'; 01122 var $mFlags = array(); 01123 var $mVariantFlags = array(); 01124 var $mConvTable = array(); 01125 var $mBidtable = array();// array of the translation in each variant 01126 var $mUnidtable = array();// array of the translation in each variant 01127 01134 public function __construct( $text, $converter ) { 01135 $this->mText = $text; 01136 $this->mConverter = $converter; 01137 } 01138 01145 public function getTextInBidtable( $variants ) { 01146 $variants = (array)$variants; 01147 if ( !$variants ) { 01148 return false; 01149 } 01150 foreach ( $variants as $variant ) { 01151 if ( isset( $this->mBidtable[$variant] ) ) { 01152 return $this->mBidtable[$variant]; 01153 } 01154 } 01155 return false; 01156 } 01157 01162 function parseFlags() { 01163 $text = $this->mText; 01164 $flags = array(); 01165 $variantFlags = array(); 01166 01167 $sepPos = strpos( $text, '|' ); 01168 if ( $sepPos !== false ) { 01169 $validFlags = $this->mConverter->mFlags; 01170 $f = StringUtils::explode( ';', substr( $text, 0, $sepPos ) ); 01171 foreach ( $f as $ff ) { 01172 $ff = trim( $ff ); 01173 if ( isset( $validFlags[$ff] ) ) { 01174 $flags[$validFlags[$ff]] = true; 01175 } 01176 } 01177 $text = strval( substr( $text, $sepPos + 1 ) ); 01178 } 01179 01180 if ( !$flags ) { 01181 $flags['S'] = true; 01182 } elseif ( isset( $flags['R'] ) ) { 01183 $flags = array( 'R' => true );// remove other flags 01184 } elseif ( isset( $flags['N'] ) ) { 01185 $flags = array( 'N' => true );// remove other flags 01186 } elseif ( isset( $flags['-'] ) ) { 01187 $flags = array( '-' => true );// remove other flags 01188 } elseif ( count( $flags ) == 1 && isset( $flags['T'] ) ) { 01189 $flags['H'] = true; 01190 } elseif ( isset( $flags['H'] ) ) { 01191 // replace A flag, and remove other flags except T 01192 $temp = array( '+' => true, 'H' => true ); 01193 if ( isset( $flags['T'] ) ) { 01194 $temp['T'] = true; 01195 } 01196 if ( isset( $flags['D'] ) ) { 01197 $temp['D'] = true; 01198 } 01199 $flags = $temp; 01200 } else { 01201 if ( isset( $flags['A'] ) ) { 01202 $flags['+'] = true; 01203 $flags['S'] = true; 01204 } 01205 if ( isset( $flags['D'] ) ) { 01206 unset( $flags['S'] ); 01207 } 01208 // try to find flags like "zh-hans", "zh-hant" 01209 // allow syntaxes like "-{zh-hans;zh-hant|XXXX}-" 01210 $variantFlags = array_intersect( array_keys( $flags ), $this->mConverter->mVariants ); 01211 if ( $variantFlags ) { 01212 $variantFlags = array_flip( $variantFlags ); 01213 $flags = array(); 01214 } 01215 } 01216 $this->mVariantFlags = $variantFlags; 01217 $this->mRules = $text; 01218 $this->mFlags = $flags; 01219 } 01220 01225 function parseRules() { 01226 $rules = $this->mRules; 01227 $bidtable = array(); 01228 $unidtable = array(); 01229 $variants = $this->mConverter->mVariants; 01230 $varsep_pattern = $this->mConverter->getVarSeparatorPattern(); 01231 01232 $choice = preg_split( $varsep_pattern, $rules ); 01233 01234 foreach ( $choice as $c ) { 01235 $v = explode( ':', $c, 2 ); 01236 if ( count( $v ) != 2 ) { 01237 // syntax error, skip 01238 continue; 01239 } 01240 $to = trim( $v[1] ); 01241 $v = trim( $v[0] ); 01242 $u = explode( '=>', $v, 2 ); 01243 // if $to is empty, strtr() could return a wrong result 01244 if ( count( $u ) == 1 && $to && in_array( $v, $variants ) ) { 01245 $bidtable[$v] = $to; 01246 } elseif ( count( $u ) == 2 ) { 01247 $from = trim( $u[0] ); 01248 $v = trim( $u[1] ); 01249 if ( array_key_exists( $v, $unidtable ) 01250 && !is_array( $unidtable[$v] ) 01251 && $to 01252 && in_array( $v, $variants ) ) { 01253 $unidtable[$v] = array( $from => $to ); 01254 } elseif ( $to && in_array( $v, $variants ) ) { 01255 $unidtable[$v][$from] = $to; 01256 } 01257 } 01258 // syntax error, pass 01259 if ( !isset( $this->mConverter->mVariantNames[$v] ) ) { 01260 $bidtable = array(); 01261 $unidtable = array(); 01262 break; 01263 } 01264 } 01265 $this->mBidtable = $bidtable; 01266 $this->mUnidtable = $unidtable; 01267 } 01268 01274 function getRulesDesc() { 01275 $codesep = $this->mConverter->mDescCodeSep; 01276 $varsep = $this->mConverter->mDescVarSep; 01277 $text = ''; 01278 foreach ( $this->mBidtable as $k => $v ) { 01279 $text .= $this->mConverter->mVariantNames[$k] . "$codesep$v$varsep"; 01280 } 01281 foreach ( $this->mUnidtable as $k => $a ) { 01282 foreach ( $a as $from => $to ) { 01283 $text .= $from . '⇒' . $this->mConverter->mVariantNames[$k] . 01284 "$codesep$to$varsep"; 01285 } 01286 } 01287 return $text; 01288 } 01289 01298 function getRuleConvertedStr( $variant ) { 01299 $bidtable = $this->mBidtable; 01300 $unidtable = $this->mUnidtable; 01301 01302 if ( count( $bidtable ) + count( $unidtable ) == 0 ) { 01303 return $this->mRules; 01304 } else { 01305 // display current variant in bidirectional array 01306 $disp = $this->getTextInBidtable( $variant ); 01307 // or display current variant in fallbacks 01308 if ( !$disp ) { 01309 $disp = $this->getTextInBidtable( 01310 $this->mConverter->getVariantFallbacks( $variant ) ); 01311 } 01312 // or display current variant in unidirectional array 01313 if ( !$disp && array_key_exists( $variant, $unidtable ) ) { 01314 $disp = array_values( $unidtable[$variant] ); 01315 $disp = $disp[0]; 01316 } 01317 // or display frist text under disable manual convert 01318 if ( !$disp 01319 && $this->mConverter->mManualLevel[$variant] == 'disable' ) { 01320 if ( count( $bidtable ) > 0 ) { 01321 $disp = array_values( $bidtable ); 01322 $disp = $disp[0]; 01323 } else { 01324 $disp = array_values( $unidtable ); 01325 $disp = array_values( $disp[0] ); 01326 $disp = $disp[0]; 01327 } 01328 } 01329 return $disp; 01330 } 01331 } 01332 01337 function generateConvTable() { 01338 // Special case optimisation 01339 if ( !$this->mBidtable && !$this->mUnidtable ) { 01340 $this->mConvTable = array(); 01341 return; 01342 } 01343 01344 $bidtable = $this->mBidtable; 01345 $unidtable = $this->mUnidtable; 01346 $manLevel = $this->mConverter->mManualLevel; 01347 01348 $vmarked = array(); 01349 foreach ( $this->mConverter->mVariants as $v ) { 01350 /* for bidirectional array 01351 fill in the missing variants, if any, 01352 with fallbacks */ 01353 if ( !isset( $bidtable[$v] ) ) { 01354 $variantFallbacks = 01355 $this->mConverter->getVariantFallbacks( $v ); 01356 $vf = $this->getTextInBidtable( $variantFallbacks ); 01357 if ( $vf ) { 01358 $bidtable[$v] = $vf; 01359 } 01360 } 01361 01362 if ( isset( $bidtable[$v] ) ) { 01363 foreach ( $vmarked as $vo ) { 01364 // use syntax: -{A|zh:WordZh;zh-tw:WordTw}- 01365 // or -{H|zh:WordZh;zh-tw:WordTw}- 01366 // or -{-|zh:WordZh;zh-tw:WordTw}- 01367 // to introduce a custom mapping between 01368 // words WordZh and WordTw in the whole text 01369 if ( $manLevel[$v] == 'bidirectional' ) { 01370 $this->mConvTable[$v][$bidtable[$vo]] = $bidtable[$v]; 01371 } 01372 if ( $manLevel[$vo] == 'bidirectional' ) { 01373 $this->mConvTable[$vo][$bidtable[$v]] = $bidtable[$vo]; 01374 } 01375 } 01376 $vmarked[] = $v; 01377 } 01378 /* for unidirectional array fill to convert tables */ 01379 if ( ( $manLevel[$v] == 'bidirectional' || $manLevel[$v] == 'unidirectional' ) 01380 && isset( $unidtable[$v] ) ) 01381 { 01382 if ( isset( $this->mConvTable[$v] ) ) { 01383 $this->mConvTable[$v] = array_merge( $this->mConvTable[$v], $unidtable[$v] ); 01384 } else { 01385 $this->mConvTable[$v] = $unidtable[$v]; 01386 } 01387 } 01388 } 01389 } 01390 01395 public function parse( $variant = null ) { 01396 if ( !$variant ) { 01397 $variant = $this->mConverter->getPreferredVariant(); 01398 } 01399 01400 $this->parseFlags(); 01401 $flags = $this->mFlags; 01402 01403 // convert to specified variant 01404 // syntax: -{zh-hans;zh-hant[;...]|<text to convert>}- 01405 if ( $this->mVariantFlags ) { 01406 // check if current variant in flags 01407 if ( isset( $this->mVariantFlags[$variant] ) ) { 01408 // then convert <text to convert> to current language 01409 $this->mRules = $this->mConverter->autoConvert( $this->mRules, 01410 $variant ); 01411 } else { // if current variant no in flags, 01412 // then we check its fallback variants. 01413 $variantFallbacks = 01414 $this->mConverter->getVariantFallbacks( $variant ); 01415 if( is_array( $variantFallbacks ) ) { 01416 foreach ( $variantFallbacks as $variantFallback ) { 01417 // if current variant's fallback exist in flags 01418 if ( isset( $this->mVariantFlags[$variantFallback] ) ) { 01419 // then convert <text to convert> to fallback language 01420 $this->mRules = 01421 $this->mConverter->autoConvert( $this->mRules, 01422 $variantFallback ); 01423 break; 01424 } 01425 } 01426 } 01427 } 01428 $this->mFlags = $flags = array( 'R' => true ); 01429 } 01430 01431 if ( !isset( $flags['R'] ) && !isset( $flags['N'] ) ) { 01432 // decode => HTML entities modified by Sanitizer::removeHTMLtags 01433 $this->mRules = str_replace( '=>', '=>', $this->mRules ); 01434 $this->parseRules(); 01435 } 01436 $rules = $this->mRules; 01437 01438 if ( !$this->mBidtable && !$this->mUnidtable ) { 01439 if ( isset( $flags['+'] ) || isset( $flags['-'] ) ) { 01440 // fill all variants if text in -{A/H/-|text} without rules 01441 foreach ( $this->mConverter->mVariants as $v ) { 01442 $this->mBidtable[$v] = $rules; 01443 } 01444 } elseif ( !isset( $flags['N'] ) && !isset( $flags['T'] ) ) { 01445 $this->mFlags = $flags = array( 'R' => true ); 01446 } 01447 } 01448 01449 $this->mRuleDisplay = false; 01450 foreach ( $flags as $flag => $unused ) { 01451 switch ( $flag ) { 01452 case 'R': 01453 // if we don't do content convert, still strip the -{}- tags 01454 $this->mRuleDisplay = $rules; 01455 break; 01456 case 'N': 01457 // process N flag: output current variant name 01458 $ruleVar = trim( $rules ); 01459 if ( isset( $this->mConverter->mVariantNames[$ruleVar] ) ) { 01460 $this->mRuleDisplay = $this->mConverter->mVariantNames[$ruleVar]; 01461 } else { 01462 $this->mRuleDisplay = ''; 01463 } 01464 break; 01465 case 'D': 01466 // process D flag: output rules description 01467 $this->mRuleDisplay = $this->getRulesDesc(); 01468 break; 01469 case 'H': 01470 // process H,- flag or T only: output nothing 01471 $this->mRuleDisplay = ''; 01472 break; 01473 case '-': 01474 $this->mRulesAction = 'remove'; 01475 $this->mRuleDisplay = ''; 01476 break; 01477 case '+': 01478 $this->mRulesAction = 'add'; 01479 $this->mRuleDisplay = ''; 01480 break; 01481 case 'S': 01482 $this->mRuleDisplay = $this->getRuleConvertedStr( $variant ); 01483 break; 01484 case 'T': 01485 $this->mRuleTitle = $this->getRuleConvertedStr( $variant ); 01486 $this->mRuleDisplay = ''; 01487 break; 01488 default: 01489 // ignore unknown flags (but see error case below) 01490 } 01491 } 01492 if ( $this->mRuleDisplay === false ) { 01493 $this->mRuleDisplay = '<span class="error">' 01494 . wfMessage( 'converter-manual-rule-error' )->inContentLanguage()->escaped() 01495 . '</span>'; 01496 } 01497 01498 $this->generateConvTable(); 01499 } 01500 01504 public function hasRules() { 01505 // TODO: 01506 } 01507 01512 public function getDisplay() { 01513 return $this->mRuleDisplay; 01514 } 01515 01520 public function getTitle() { 01521 return $this->mRuleTitle; 01522 } 01523 01528 public function getRulesAction() { 01529 return $this->mRulesAction; 01530 } 01531 01537 public function getConvTable() { 01538 return $this->mConvTable; 01539 } 01540 01545 public function getRules() { 01546 return $this->mRules; 01547 } 01548 01553 public function getFlags() { 01554 return $this->mFlags; 01555 } 01556 }