MediaWiki
REL1_19
|
00001 <?php 00031 class LanguageConverter { 00032 var $mMainLanguageCode; 00033 var $mVariants, $mVariantFallbacks, $mVariantNames; 00034 var $mTablesLoaded = false; 00035 var $mTables; 00036 // 'bidirectional' 'unidirectional' 'disable' for each variant 00037 var $mManualLevel; 00038 00042 var $mCacheKey; 00043 00044 var $mLangObj; 00045 var $mFlags; 00046 var $mDescCodeSep = ':', $mDescVarSep = ';'; 00047 var $mUcfirst = false; 00048 var $mConvRuleTitle = false; 00049 var $mURLVariant; 00050 var $mUserVariant; 00051 var $mHeaderVariant; 00052 var $mMaxDepth = 10; 00053 var $mVarSeparatorPattern; 00054 00055 const CACHE_VERSION_KEY = 'VERSION 6'; 00056 00067 public function __construct( $langobj, $maincode, $variants = array(), 00068 $variantfallbacks = array(), $flags = array(), 00069 $manualLevel = array() ) { 00070 global $wgDisabledVariants; 00071 $this->mLangObj = $langobj; 00072 $this->mMainLanguageCode = $maincode; 00073 $this->mVariants = array_diff( $variants, $wgDisabledVariants ); 00074 $this->mVariantFallbacks = $variantfallbacks; 00075 $this->mVariantNames = Language::getLanguageNames(); 00076 $this->mCacheKey = wfMemcKey( 'conversiontables', $maincode ); 00077 $defaultflags = array( 00078 // 'S' show converted text 00079 // '+' add rules for alltext 00080 // 'E' the gave flags is error 00081 // these flags above are reserved for program 00082 'A' => 'A', // add rule for convert code (all text convert) 00083 'T' => 'T', // title convert 00084 'R' => 'R', // raw content 00085 'D' => 'D', // convert description (subclass implement) 00086 '-' => '-', // remove convert (not implement) 00087 'H' => 'H', // add rule for convert code 00088 // (but no display in placed code) 00089 'N' => 'N' // current variant name 00090 ); 00091 $this->mFlags = array_merge( $defaultflags, $flags ); 00092 foreach ( $this->mVariants as $v ) { 00093 if ( array_key_exists( $v, $manualLevel ) ) { 00094 $this->mManualLevel[$v] = $manualLevel[$v]; 00095 } else { 00096 $this->mManualLevel[$v] = 'bidirectional'; 00097 } 00098 $this->mFlags[$v] = $v; 00099 } 00100 } 00101 00108 public function getVariants() { 00109 return $this->mVariants; 00110 } 00111 00123 public function getVariantFallbacks( $variant ) { 00124 if ( isset( $this->mVariantFallbacks[$variant] ) ) { 00125 return $this->mVariantFallbacks[$variant]; 00126 } 00127 return $this->mMainLanguageCode; 00128 } 00129 00134 public function getConvRuleTitle() { 00135 return $this->mConvRuleTitle; 00136 } 00137 00142 public function getPreferredVariant() { 00143 global $wgDefaultLanguageVariant, $wgUser; 00144 00145 $req = $this->getURLVariant(); 00146 00147 if ( $wgUser->isLoggedIn() && !$req ) { 00148 $req = $this->getUserVariant(); 00149 } elseif ( !$req ) { 00150 $req = $this->getHeaderVariant(); 00151 } 00152 00153 if ( $wgDefaultLanguageVariant && !$req ) { 00154 $req = $this->validateVariant( $wgDefaultLanguageVariant ); 00155 } 00156 00157 // This function, unlike the other get*Variant functions, is 00158 // not memoized (i.e. there return value is not cached) since 00159 // new information might appear during processing after this 00160 // is first called. 00161 if ( $this->validateVariant( $req ) ) { 00162 return $req; 00163 } 00164 return $this->mMainLanguageCode; 00165 } 00166 00172 public function getDefaultVariant() { 00173 global $wgDefaultLanguageVariant; 00174 00175 $req = $this->getURLVariant(); 00176 00177 if ( $wgDefaultLanguageVariant && !$req ) { 00178 $req = $this->validateVariant( $wgDefaultLanguageVariant ); 00179 } 00180 00181 if ( $req ) { 00182 return $req; 00183 } 00184 return $this->mMainLanguageCode; 00185 } 00186 00192 public function validateVariant( $variant = null ) { 00193 if ( $variant !== null && in_array( $variant, $this->mVariants ) ) { 00194 return $variant; 00195 } 00196 return null; 00197 } 00198 00204 public function getURLVariant() { 00205 global $wgRequest; 00206 00207 if ( $this->mURLVariant ) { 00208 return $this->mURLVariant; 00209 } 00210 00211 // see if the preference is set in the request 00212 $ret = $wgRequest->getText( 'variant' ); 00213 00214 if ( !$ret ) { 00215 $ret = $wgRequest->getVal( 'uselang' ); 00216 } 00217 00218 return $this->mURLVariant = $this->validateVariant( $ret ); 00219 } 00220 00226 protected function getUserVariant() { 00227 global $wgUser; 00228 00229 // memoizing this function wreaks havoc on parserTest.php 00230 /* 00231 if ( $this->mUserVariant ) { 00232 return $this->mUserVariant; 00233 } 00234 */ 00235 00236 // Get language variant preference from logged in users 00237 // Don't call this on stub objects because that causes infinite 00238 // recursion during initialisation 00239 if ( $wgUser->isLoggedIn() ) { 00240 $ret = $wgUser->getOption( 'variant' ); 00241 } else { 00242 // figure out user lang without constructing wgLang to avoid 00243 // infinite recursion 00244 $ret = $wgUser->getOption( 'language' ); 00245 } 00246 00247 return $this->mUserVariant = $this->validateVariant( $ret ); 00248 } 00249 00255 protected function getHeaderVariant() { 00256 global $wgRequest; 00257 00258 if ( $this->mHeaderVariant ) { 00259 return $this->mHeaderVariant; 00260 } 00261 00262 // see if some supported language variant is set in the 00263 // HTTP header. 00264 $languages = array_keys( $wgRequest->getAcceptLang() ); 00265 if ( empty( $languages ) ) { 00266 return null; 00267 } 00268 00269 $fallbackLanguages = array(); 00270 foreach ( $languages as $language ) { 00271 $this->mHeaderVariant = $this->validateVariant( $language ); 00272 if ( $this->mHeaderVariant ) { 00273 break; 00274 } 00275 00276 // To see if there are fallbacks of current language. 00277 // We record these fallback variants, and process 00278 // them later. 00279 $fallbacks = $this->getVariantFallbacks( $language ); 00280 if ( is_string( $fallbacks ) ) { 00281 $fallbackLanguages[] = $fallbacks; 00282 } elseif ( is_array( $fallbacks ) ) { 00283 $fallbackLanguages = 00284 array_merge( $fallbackLanguages, $fallbacks ); 00285 } 00286 } 00287 00288 if ( !$this->mHeaderVariant ) { 00289 // process fallback languages now 00290 $fallback_languages = array_unique( $fallbackLanguages ); 00291 foreach ( $fallback_languages as $language ) { 00292 $this->mHeaderVariant = $this->validateVariant( $language ); 00293 if ( $this->mHeaderVariant ) { 00294 break; 00295 } 00296 } 00297 } 00298 00299 return $this->mHeaderVariant; 00300 } 00301 00312 public function autoConvert( $text, $toVariant = false ) { 00313 wfProfileIn( __METHOD__ ); 00314 00315 $this->loadTables(); 00316 00317 if ( !$toVariant ) { 00318 $toVariant = $this->getPreferredVariant(); 00319 if ( !$toVariant ) { 00320 wfProfileOut( __METHOD__ ); 00321 return $text; 00322 } 00323 } 00324 00325 if( $this->guessVariant( $text, $toVariant ) ) { 00326 wfProfileOut( __METHOD__ ); 00327 return $text; 00328 } 00329 00330 /* we convert everything except: 00331 1. HTML markups (anything between < and >) 00332 2. HTML entities 00333 3. placeholders created by the parser 00334 */ 00335 global $wgParser; 00336 if ( isset( $wgParser ) && $wgParser->UniqPrefix() != '' ) { 00337 $marker = '|' . $wgParser->UniqPrefix() . '[\-a-zA-Z0-9]+'; 00338 } else { 00339 $marker = ''; 00340 } 00341 00342 // this one is needed when the text is inside an HTML markup 00343 $htmlfix = '|<[^>]+$|^[^<>]*>'; 00344 00345 // disable convert to variants between <code></code> tags 00346 $codefix = '<code>.+?<\/code>|'; 00347 // disable convertsion of <script type="text/javascript"> ... </script> 00348 $scriptfix = '<script.*?>.*?<\/script>|'; 00349 // disable conversion of <pre xxxx> ... </pre> 00350 $prefix = '<pre.*?>.*?<\/pre>|'; 00351 00352 $reg = '/' . $codefix . $scriptfix . $prefix . 00353 '<[^>]+>|&[a-zA-Z#][a-z0-9]+;' . $marker . $htmlfix . '/s'; 00354 $startPos = 0; 00355 $sourceBlob = ''; 00356 $literalBlob = ''; 00357 00358 // Guard against delimiter nulls in the input 00359 $text = str_replace( "\000", '', $text ); 00360 00361 $markupMatches = null; 00362 $elementMatches = null; 00363 while ( $startPos < strlen( $text ) ) { 00364 if ( preg_match( $reg, $text, $markupMatches, PREG_OFFSET_CAPTURE, $startPos ) ) { 00365 $elementPos = $markupMatches[0][1]; 00366 $element = $markupMatches[0][0]; 00367 } else { 00368 $elementPos = strlen( $text ); 00369 $element = ''; 00370 } 00371 00372 // Queue the part before the markup for translation in a batch 00373 $sourceBlob .= substr( $text, $startPos, $elementPos - $startPos ) . "\000"; 00374 00375 // Advance to the next position 00376 $startPos = $elementPos + strlen( $element ); 00377 00378 // Translate any alt or title attributes inside the matched element 00379 if ( $element !== '' && preg_match( '/^(<[^>\s]*)\s([^>]*)(.*)$/', $element, 00380 $elementMatches ) ) 00381 { 00382 $attrs = Sanitizer::decodeTagAttributes( $elementMatches[2] ); 00383 $changed = false; 00384 foreach ( array( 'title', 'alt' ) as $attrName ) { 00385 if ( !isset( $attrs[$attrName] ) ) { 00386 continue; 00387 } 00388 $attr = $attrs[$attrName]; 00389 // Don't convert URLs 00390 if ( !strpos( $attr, '://' ) ) { 00391 $attr = $this->translate( $attr, $toVariant ); 00392 } 00393 00394 // Remove HTML tags to avoid disrupting the layout 00395 $attr = preg_replace( '/<[^>]+>/', '', $attr ); 00396 if ( $attr !== $attrs[$attrName] ) { 00397 $attrs[$attrName] = $attr; 00398 $changed = true; 00399 } 00400 } 00401 if ( $changed ) { 00402 $element = $elementMatches[1] . Html::expandAttributes( $attrs ) . 00403 $elementMatches[3]; 00404 } 00405 } 00406 $literalBlob .= $element . "\000"; 00407 } 00408 00409 // Do the main translation batch 00410 $translatedBlob = $this->translate( $sourceBlob, $toVariant ); 00411 00412 // Put the output back together 00413 $translatedIter = StringUtils::explode( "\000", $translatedBlob ); 00414 $literalIter = StringUtils::explode( "\000", $literalBlob ); 00415 $output = ''; 00416 while ( $translatedIter->valid() && $literalIter->valid() ) { 00417 $output .= $translatedIter->current(); 00418 $output .= $literalIter->current(); 00419 $translatedIter->next(); 00420 $literalIter->next(); 00421 } 00422 00423 wfProfileOut( __METHOD__ ); 00424 return $output; 00425 } 00426 00436 public function translate( $text, $variant ) { 00437 wfProfileIn( __METHOD__ ); 00438 // If $text is empty or only includes spaces, do nothing 00439 // Otherwise translate it 00440 if ( trim( $text ) ) { 00441 $this->loadTables(); 00442 $text = $this->mTables[$variant]->replace( $text ); 00443 } 00444 wfProfileOut( __METHOD__ ); 00445 return $text; 00446 } 00447 00454 public function autoConvertToAllVariants( $text ) { 00455 wfProfileIn( __METHOD__ ); 00456 $this->loadTables(); 00457 00458 $ret = array(); 00459 foreach ( $this->mVariants as $variant ) { 00460 $ret[$variant] = $this->translate( $text, $variant ); 00461 } 00462 00463 wfProfileOut( __METHOD__ ); 00464 return $ret; 00465 } 00466 00478 public function convertLinkToAllVariants( $text ) { 00479 return $this->autoConvertToAllVariants( $text ); 00480 } 00481 00487 protected function applyManualConv( $convRule ) { 00488 // Use syntax -{T|zh-cn:TitleCN; zh-tw:TitleTw}- to custom 00489 // title conversion. 00490 // Bug 24072: $mConvRuleTitle was overwritten by other manual 00491 // rule(s) not for title, this breaks the title conversion. 00492 $newConvRuleTitle = $convRule->getTitle(); 00493 if ( $newConvRuleTitle ) { 00494 // So I add an empty check for getTitle() 00495 $this->mConvRuleTitle = $newConvRuleTitle; 00496 } 00497 00498 // merge/remove manual conversion rules to/from global table 00499 $convTable = $convRule->getConvTable(); 00500 $action = $convRule->getRulesAction(); 00501 foreach ( $convTable as $variant => $pair ) { 00502 if ( !$this->validateVariant( $variant ) ) { 00503 continue; 00504 } 00505 00506 if ( $action == 'add' ) { 00507 foreach ( $pair as $from => $to ) { 00508 // to ensure that $from and $to not be left blank 00509 // so $this->translate() could always return a string 00510 if ( $from || $to ) { 00511 // more efficient than array_merge(), about 2.5 times. 00512 $this->mTables[$variant]->setPair( $from, $to ); 00513 } 00514 } 00515 } elseif ( $action == 'remove' ) { 00516 $this->mTables[$variant]->removeArray( $pair ); 00517 } 00518 } 00519 } 00520 00528 public function convertTitle( $title ) { 00529 $variant = $this->getPreferredVariant(); 00530 $index = $title->getNamespace(); 00531 if ( $index === NS_MAIN ) { 00532 $text = ''; 00533 } else { 00534 // first let's check if a message has given us a converted name 00535 $nsConvMsg = wfMessage( 'conversion-ns' . $index )->inContentLanguage(); 00536 if ( $nsConvMsg->exists() ) { 00537 $text = $nsConvMsg->plain(); 00538 } else { 00539 // the message does not exist, try retrieve it from the current 00540 // variant's namespace names. 00541 $langObj = $this->mLangObj->factory( $variant ); 00542 $text = $langObj->getFormattedNsText( $index ); 00543 } 00544 $text .= ':'; 00545 } 00546 $text .= $title->getText(); 00547 $text = $this->translate( $text, $variant ); 00548 return $text; 00549 } 00550 00565 public function convert( $text ) { 00566 $variant = $this->getPreferredVariant(); 00567 return $this->convertTo( $text, $variant ); 00568 } 00569 00577 public function convertTo( $text, $variant ) { 00578 global $wgDisableLangConversion; 00579 if ( $wgDisableLangConversion || $this->guessVariant( $text, $variant ) ) { 00580 return $text; 00581 } 00582 return $this->recursiveConvertTopLevel( $text, $variant ); 00583 } 00584 00594 protected function recursiveConvertTopLevel( $text, $variant, $depth = 0 ) { 00595 $startPos = 0; 00596 $out = ''; 00597 $length = strlen( $text ); 00598 while ( $startPos < $length ) { 00599 $pos = strpos( $text, '-{', $startPos ); 00600 00601 if ( $pos === false ) { 00602 // No more markup, append final segment 00603 $out .= $this->autoConvert( substr( $text, $startPos ), $variant ); 00604 return $out; 00605 } 00606 00607 // Markup found 00608 // Append initial segment 00609 $out .= $this->autoConvert( substr( $text, $startPos, $pos - $startPos ), $variant ); 00610 00611 // Advance position 00612 $startPos = $pos; 00613 00614 // Do recursive conversion 00615 $out .= $this->recursiveConvertRule( $text, $variant, $startPos, $depth + 1 ); 00616 } 00617 00618 return $out; 00619 } 00620 00631 protected function recursiveConvertRule( $text, $variant, &$startPos, $depth = 0 ) { 00632 // Quick sanity check (no function calls) 00633 if ( $text[$startPos] !== '-' || $text[$startPos + 1] !== '{' ) { 00634 throw new MWException( __METHOD__ . ': invalid input string' ); 00635 } 00636 00637 $startPos += 2; 00638 $inner = ''; 00639 $warningDone = false; 00640 $length = strlen( $text ); 00641 00642 while ( $startPos < $length ) { 00643 $m = false; 00644 preg_match( '/-\{|\}-/', $text, $m, PREG_OFFSET_CAPTURE, $startPos ); 00645 if ( !$m ) { 00646 // Unclosed rule 00647 break; 00648 } 00649 00650 $token = $m[0][0]; 00651 $pos = $m[0][1]; 00652 00653 // Markup found 00654 // Append initial segment 00655 $inner .= substr( $text, $startPos, $pos - $startPos ); 00656 00657 // Advance position 00658 $startPos = $pos; 00659 00660 switch ( $token ) { 00661 case '-{': 00662 // Check max depth 00663 if ( $depth >= $this->mMaxDepth ) { 00664 $inner .= '-{'; 00665 if ( !$warningDone ) { 00666 $inner .= '<span class="error">' . 00667 wfMsgForContent( 'language-converter-depth-warning', 00668 $this->mMaxDepth ) . 00669 '</span>'; 00670 $warningDone = true; 00671 } 00672 $startPos += 2; 00673 continue; 00674 } 00675 // Recursively parse another rule 00676 $inner .= $this->recursiveConvertRule( $text, $variant, $startPos, $depth + 1 ); 00677 break; 00678 case '}-': 00679 // Apply the rule 00680 $startPos += 2; 00681 $rule = new ConverterRule( $inner, $this ); 00682 $rule->parse( $variant ); 00683 $this->applyManualConv( $rule ); 00684 return $rule->getDisplay(); 00685 default: 00686 throw new MWException( __METHOD__ . ': invalid regex match' ); 00687 } 00688 } 00689 00690 // Unclosed rule 00691 if ( $startPos < $length ) { 00692 $inner .= substr( $text, $startPos ); 00693 } 00694 $startPos = $length; 00695 return '-{' . $this->autoConvert( $inner, $variant ); 00696 } 00697 00709 public function findVariantLink( &$link, &$nt, $ignoreOtherCond = false ) { 00710 # If the article has already existed, there is no need to 00711 # check it again, otherwise it may cause a fault. 00712 if ( is_object( $nt ) && $nt->exists() ) { 00713 return; 00714 } 00715 00716 global $wgDisableLangConversion, $wgDisableTitleConversion, $wgRequest, 00717 $wgUser; 00718 $isredir = $wgRequest->getText( 'redirect', 'yes' ); 00719 $action = $wgRequest->getText( 'action' ); 00720 $linkconvert = $wgRequest->getText( 'linkconvert', 'yes' ); 00721 $disableLinkConversion = $wgDisableLangConversion 00722 || $wgDisableTitleConversion; 00723 $linkBatch = new LinkBatch(); 00724 00725 $ns = NS_MAIN; 00726 00727 if ( $disableLinkConversion || 00728 ( !$ignoreOtherCond && 00729 ( $isredir == 'no' 00730 || $action == 'edit' 00731 || $action == 'submit' 00732 || $linkconvert == 'no' 00733 || $wgUser->getOption( 'noconvertlink' ) == 1 ) ) ) { 00734 return; 00735 } 00736 00737 if ( is_object( $nt ) ) { 00738 $ns = $nt->getNamespace(); 00739 } 00740 00741 $variants = $this->autoConvertToAllVariants( $link ); 00742 if ( !$variants ) { // give up 00743 return; 00744 } 00745 00746 $titles = array(); 00747 00748 foreach ( $variants as $v ) { 00749 if ( $v != $link ) { 00750 $varnt = Title::newFromText( $v, $ns ); 00751 if ( !is_null( $varnt ) ) { 00752 $linkBatch->addObj( $varnt ); 00753 $titles[] = $varnt; 00754 } 00755 } 00756 } 00757 00758 // fetch all variants in single query 00759 $linkBatch->execute(); 00760 00761 foreach ( $titles as $varnt ) { 00762 if ( $varnt->getArticleID() > 0 ) { 00763 $nt = $varnt; 00764 $link = $varnt->getText(); 00765 break; 00766 } 00767 } 00768 } 00769 00775 public function getExtraHashOptions() { 00776 $variant = $this->getPreferredVariant(); 00777 return '!' . $variant; 00778 } 00779 00790 public function guessVariant($text, $variant) { 00791 return false; 00792 } 00793 00800 function loadDefaultTables() { 00801 $name = get_class( $this ); 00802 throw new MWException( "Must implement loadDefaultTables() method in class $name" ); 00803 } 00804 00810 function loadTables( $fromCache = true ) { 00811 if ( $this->mTablesLoaded ) { 00812 return; 00813 } 00814 global $wgMemc; 00815 wfProfileIn( __METHOD__ ); 00816 $this->mTablesLoaded = true; 00817 $this->mTables = false; 00818 if ( $fromCache ) { 00819 wfProfileIn( __METHOD__ . '-cache' ); 00820 $this->mTables = $wgMemc->get( $this->mCacheKey ); 00821 wfProfileOut( __METHOD__ . '-cache' ); 00822 } 00823 if ( !$this->mTables 00824 || !array_key_exists( self::CACHE_VERSION_KEY, $this->mTables ) ) { 00825 wfProfileIn( __METHOD__ . '-recache' ); 00826 // not in cache, or we need a fresh reload. 00827 // We will first load the default tables 00828 // then update them using things in MediaWiki:Conversiontable/* 00829 $this->loadDefaultTables(); 00830 foreach ( $this->mVariants as $var ) { 00831 $cached = $this->parseCachedTable( $var ); 00832 $this->mTables[$var]->mergeArray( $cached ); 00833 } 00834 00835 $this->postLoadTables(); 00836 $this->mTables[self::CACHE_VERSION_KEY] = true; 00837 00838 $wgMemc->set( $this->mCacheKey, $this->mTables, 43200 ); 00839 wfProfileOut( __METHOD__ . '-recache' ); 00840 } 00841 wfProfileOut( __METHOD__ ); 00842 } 00843 00847 function postLoadTables() { } 00848 00854 function reloadTables() { 00855 if ( $this->mTables ) { 00856 unset( $this->mTables ); 00857 } 00858 $this->mTablesLoaded = false; 00859 $this->loadTables( false ); 00860 } 00861 00881 function parseCachedTable( $code, $subpage = '', $recursive = true ) { 00882 static $parsed = array(); 00883 00884 $key = 'Conversiontable/' . $code; 00885 if ( $subpage ) { 00886 $key .= '/' . $subpage; 00887 } 00888 if ( array_key_exists( $key, $parsed ) ) { 00889 return array(); 00890 } 00891 00892 $parsed[$key] = true; 00893 00894 if ( $subpage === '' ) { 00895 $txt = MessageCache::singleton()->get( 'conversiontable', true, $code ); 00896 } else { 00897 $txt = false; 00898 $title = Title::makeTitleSafe( NS_MEDIAWIKI, $key ); 00899 if ( $title && $title->exists() ) { 00900 $revision = Revision::newFromTitle( $title ); 00901 if ( $revision ) { 00902 $txt = $revision->getRawText(); 00903 } 00904 } 00905 } 00906 00907 # Nothing to parse if there's no text 00908 if ( $txt === false || $txt === null || $txt === '' ) { 00909 return array(); 00910 } 00911 00912 // get all subpage links of the form 00913 // [[MediaWiki:Conversiontable/zh-xx/...|...]] 00914 $linkhead = $this->mLangObj->getNsText( NS_MEDIAWIKI ) . 00915 ':Conversiontable'; 00916 $subs = StringUtils::explode( '[[', $txt ); 00917 $sublinks = array(); 00918 foreach ( $subs as $sub ) { 00919 $link = explode( ']]', $sub, 2 ); 00920 if ( count( $link ) != 2 ) { 00921 continue; 00922 } 00923 $b = explode( '|', $link[0], 2 ); 00924 $b = explode( '/', trim( $b[0] ), 3 ); 00925 if ( count( $b ) == 3 ) { 00926 $sublink = $b[2]; 00927 } else { 00928 $sublink = ''; 00929 } 00930 00931 if ( $b[0] == $linkhead && $b[1] == $code ) { 00932 $sublinks[] = $sublink; 00933 } 00934 } 00935 00936 // parse the mappings in this page 00937 $blocks = StringUtils::explode( '-{', $txt ); 00938 $ret = array(); 00939 $first = true; 00940 foreach ( $blocks as $block ) { 00941 if ( $first ) { 00942 // Skip the part before the first -{ 00943 $first = false; 00944 continue; 00945 } 00946 $mappings = explode( '}-', $block, 2 ); 00947 $stripped = str_replace( array( "'", '"', '*', '#' ), '', 00948 $mappings[0] ); 00949 $table = StringUtils::explode( ';', $stripped ); 00950 foreach ( $table as $t ) { 00951 $m = explode( '=>', $t, 3 ); 00952 if ( count( $m ) != 2 ) { 00953 continue; 00954 } 00955 // trim any trailling comments starting with '//' 00956 $tt = explode( '//', $m[1], 2 ); 00957 $ret[trim( $m[0] )] = trim( $tt[0] ); 00958 } 00959 } 00960 00961 // recursively parse the subpages 00962 if ( $recursive ) { 00963 foreach ( $sublinks as $link ) { 00964 $s = $this->parseCachedTable( $code, $link, $recursive ); 00965 $ret = array_merge( $ret, $s ); 00966 } 00967 } 00968 00969 if ( $this->mUcfirst ) { 00970 foreach ( $ret as $k => $v ) { 00971 $ret[$this->mLangObj->ucfirst( $k )] = $this->mLangObj->ucfirst( $v ); 00972 } 00973 } 00974 return $ret; 00975 } 00976 00985 public function markNoConversion( $text, $noParse = false ) { 00986 # don't mark if already marked 00987 if ( strpos( $text, '-{' ) || strpos( $text, '}-' ) ) { 00988 return $text; 00989 } 00990 00991 $ret = "-{R|$text}-"; 00992 return $ret; 00993 } 00994 01003 function convertCategoryKey( $key ) { 01004 return $key; 01005 } 01006 01023 function OnArticleSaveComplete( $article, $user, $text, $summary, $isMinor, 01024 $isWatch, $section, $flags, $revision ) { 01025 $titleobj = $article->getTitle(); 01026 if ( $titleobj->getNamespace() == NS_MEDIAWIKI ) { 01027 $title = $titleobj->getDBkey(); 01028 $t = explode( '/', $title, 3 ); 01029 $c = count( $t ); 01030 if ( $c > 1 && $t[0] == 'Conversiontable' ) { 01031 if ( $this->validateVariant( $t[1] ) ) { 01032 $this->reloadTables(); 01033 } 01034 } 01035 } 01036 return true; 01037 } 01038 01047 public function armourMath( $text ) { 01048 // convert '-{' and '}-' to '-{' and '}-' to prevent 01049 // any unwanted markup appearing in the math image tag. 01050 $text = strtr( $text, array( '-{' => '-{', '}-' => '}-' ) ); 01051 return $text; 01052 } 01053 01057 function getVarSeparatorPattern() { 01058 if ( is_null( $this->mVarSeparatorPattern ) ) { 01059 // varsep_pattern for preg_split: 01060 // text should be splited by ";" only if a valid variant 01061 // name exist after the markup, for example: 01062 // -{zh-hans:<span style="font-size:120%;">xxx</span>;zh-hant:\ 01063 // <span style="font-size:120%;">yyy</span>;}- 01064 // we should split it as: 01065 // array( 01066 // [0] => 'zh-hans:<span style="font-size:120%;">xxx</span>' 01067 // [1] => 'zh-hant:<span style="font-size:120%;">yyy</span>' 01068 // [2] => '' 01069 // ) 01070 $pat = '/;\s*(?='; 01071 foreach ( $this->mVariants as $variant ) { 01072 // zh-hans:xxx;zh-hant:yyy 01073 $pat .= $variant . '\s*:|'; 01074 // xxx=>zh-hans:yyy; xxx=>zh-hant:zzz 01075 $pat .= '[^;]*?=>\s*' . $variant . '\s*:|'; 01076 } 01077 $pat .= '\s*$)/'; 01078 $this->mVarSeparatorPattern = $pat; 01079 } 01080 return $this->mVarSeparatorPattern; 01081 } 01082 } 01083 01089 class ConverterRule { 01090 var $mText; // original text in -{text}- 01091 var $mConverter; // LanguageConverter object 01092 var $mManualCodeError = '<strong class="error">code error!</strong>'; 01093 var $mRuleDisplay = ''; 01094 var $mRuleTitle = false; 01095 var $mRules = '';// string : the text of the rules 01096 var $mRulesAction = 'none'; 01097 var $mFlags = array(); 01098 var $mVariantFlags = array(); 01099 var $mConvTable = array(); 01100 var $mBidtable = array();// array of the translation in each variant 01101 var $mUnidtable = array();// array of the translation in each variant 01102 01109 public function __construct( $text, $converter ) { 01110 $this->mText = $text; 01111 $this->mConverter = $converter; 01112 } 01113 01120 public function getTextInBidtable( $variants ) { 01121 $variants = (array)$variants; 01122 if ( !$variants ) { 01123 return false; 01124 } 01125 foreach ( $variants as $variant ) { 01126 if ( isset( $this->mBidtable[$variant] ) ) { 01127 return $this->mBidtable[$variant]; 01128 } 01129 } 01130 return false; 01131 } 01132 01137 function parseFlags() { 01138 $text = $this->mText; 01139 $flags = array(); 01140 $variantFlags = array(); 01141 01142 $sepPos = strpos( $text, '|' ); 01143 if ( $sepPos !== false ) { 01144 $validFlags = $this->mConverter->mFlags; 01145 $f = StringUtils::explode( ';', substr( $text, 0, $sepPos ) ); 01146 foreach ( $f as $ff ) { 01147 $ff = trim( $ff ); 01148 if ( isset( $validFlags[$ff] ) ) { 01149 $flags[$validFlags[$ff]] = true; 01150 } 01151 } 01152 $text = strval( substr( $text, $sepPos + 1 ) ); 01153 } 01154 01155 if ( !$flags ) { 01156 $flags['S'] = true; 01157 } elseif ( isset( $flags['R'] ) ) { 01158 $flags = array( 'R' => true );// remove other flags 01159 } elseif ( isset( $flags['N'] ) ) { 01160 $flags = array( 'N' => true );// remove other flags 01161 } elseif ( isset( $flags['-'] ) ) { 01162 $flags = array( '-' => true );// remove other flags 01163 } elseif ( count( $flags ) == 1 && isset( $flags['T'] ) ) { 01164 $flags['H'] = true; 01165 } elseif ( isset( $flags['H'] ) ) { 01166 // replace A flag, and remove other flags except T 01167 $temp = array( '+' => true, 'H' => true ); 01168 if ( isset( $flags['T'] ) ) { 01169 $temp['T'] = true; 01170 } 01171 if ( isset( $flags['D'] ) ) { 01172 $temp['D'] = true; 01173 } 01174 $flags = $temp; 01175 } else { 01176 if ( isset( $flags['A'] ) ) { 01177 $flags['+'] = true; 01178 $flags['S'] = true; 01179 } 01180 if ( isset( $flags['D'] ) ) { 01181 unset( $flags['S'] ); 01182 } 01183 // try to find flags like "zh-hans", "zh-hant" 01184 // allow syntaxes like "-{zh-hans;zh-hant|XXXX}-" 01185 $variantFlags = array_intersect( array_keys( $flags ), $this->mConverter->mVariants ); 01186 if ( $variantFlags ) { 01187 $variantFlags = array_flip( $variantFlags ); 01188 $flags = array(); 01189 } 01190 } 01191 $this->mVariantFlags = $variantFlags; 01192 $this->mRules = $text; 01193 $this->mFlags = $flags; 01194 } 01195 01200 function parseRules() { 01201 $rules = $this->mRules; 01202 $bidtable = array(); 01203 $unidtable = array(); 01204 $variants = $this->mConverter->mVariants; 01205 $varsep_pattern = $this->mConverter->getVarSeparatorPattern(); 01206 01207 $choice = preg_split( $varsep_pattern, $rules ); 01208 01209 foreach ( $choice as $c ) { 01210 $v = explode( ':', $c, 2 ); 01211 if ( count( $v ) != 2 ) { 01212 // syntax error, skip 01213 continue; 01214 } 01215 $to = trim( $v[1] ); 01216 $v = trim( $v[0] ); 01217 $u = explode( '=>', $v, 2 ); 01218 // if $to is empty, strtr() could return a wrong result 01219 if ( count( $u ) == 1 && $to && in_array( $v, $variants ) ) { 01220 $bidtable[$v] = $to; 01221 } elseif ( count( $u ) == 2 ) { 01222 $from = trim( $u[0] ); 01223 $v = trim( $u[1] ); 01224 if ( array_key_exists( $v, $unidtable ) 01225 && !is_array( $unidtable[$v] ) 01226 && $to 01227 && in_array( $v, $variants ) ) { 01228 $unidtable[$v] = array( $from => $to ); 01229 } elseif ( $to && in_array( $v, $variants ) ) { 01230 $unidtable[$v][$from] = $to; 01231 } 01232 } 01233 // syntax error, pass 01234 if ( !isset( $this->mConverter->mVariantNames[$v] ) ) { 01235 $bidtable = array(); 01236 $unidtable = array(); 01237 break; 01238 } 01239 } 01240 $this->mBidtable = $bidtable; 01241 $this->mUnidtable = $unidtable; 01242 } 01243 01249 function getRulesDesc() { 01250 $codesep = $this->mConverter->mDescCodeSep; 01251 $varsep = $this->mConverter->mDescVarSep; 01252 $text = ''; 01253 foreach ( $this->mBidtable as $k => $v ) { 01254 $text .= $this->mConverter->mVariantNames[$k] . "$codesep$v$varsep"; 01255 } 01256 foreach ( $this->mUnidtable as $k => $a ) { 01257 foreach ( $a as $from => $to ) { 01258 $text .= $from . '⇒' . $this->mConverter->mVariantNames[$k] . 01259 "$codesep$to$varsep"; 01260 } 01261 } 01262 return $text; 01263 } 01264 01273 function getRuleConvertedStr( $variant ) { 01274 $bidtable = $this->mBidtable; 01275 $unidtable = $this->mUnidtable; 01276 01277 if ( count( $bidtable ) + count( $unidtable ) == 0 ) { 01278 return $this->mRules; 01279 } else { 01280 // display current variant in bidirectional array 01281 $disp = $this->getTextInBidtable( $variant ); 01282 // or display current variant in fallbacks 01283 if ( !$disp ) { 01284 $disp = $this->getTextInBidtable( 01285 $this->mConverter->getVariantFallbacks( $variant ) ); 01286 } 01287 // or display current variant in unidirectional array 01288 if ( !$disp && array_key_exists( $variant, $unidtable ) ) { 01289 $disp = array_values( $unidtable[$variant] ); 01290 $disp = $disp[0]; 01291 } 01292 // or display frist text under disable manual convert 01293 if ( !$disp 01294 && $this->mConverter->mManualLevel[$variant] == 'disable' ) { 01295 if ( count( $bidtable ) > 0 ) { 01296 $disp = array_values( $bidtable ); 01297 $disp = $disp[0]; 01298 } else { 01299 $disp = array_values( $unidtable ); 01300 $disp = array_values( $disp[0] ); 01301 $disp = $disp[0]; 01302 } 01303 } 01304 return $disp; 01305 } 01306 } 01307 01312 function generateConvTable() { 01313 // Special case optimisation 01314 if ( !$this->mBidtable && !$this->mUnidtable ) { 01315 $this->mConvTable = array(); 01316 return; 01317 } 01318 01319 $bidtable = $this->mBidtable; 01320 $unidtable = $this->mUnidtable; 01321 $manLevel = $this->mConverter->mManualLevel; 01322 01323 $vmarked = array(); 01324 foreach ( $this->mConverter->mVariants as $v ) { 01325 /* for bidirectional array 01326 fill in the missing variants, if any, 01327 with fallbacks */ 01328 if ( !isset( $bidtable[$v] ) ) { 01329 $variantFallbacks = 01330 $this->mConverter->getVariantFallbacks( $v ); 01331 $vf = $this->getTextInBidtable( $variantFallbacks ); 01332 if ( $vf ) { 01333 $bidtable[$v] = $vf; 01334 } 01335 } 01336 01337 if ( isset( $bidtable[$v] ) ) { 01338 foreach ( $vmarked as $vo ) { 01339 // use syntax: -{A|zh:WordZh;zh-tw:WordTw}- 01340 // or -{H|zh:WordZh;zh-tw:WordTw}- 01341 // or -{-|zh:WordZh;zh-tw:WordTw}- 01342 // to introduce a custom mapping between 01343 // words WordZh and WordTw in the whole text 01344 if ( $manLevel[$v] == 'bidirectional' ) { 01345 $this->mConvTable[$v][$bidtable[$vo]] = $bidtable[$v]; 01346 } 01347 if ( $manLevel[$vo] == 'bidirectional' ) { 01348 $this->mConvTable[$vo][$bidtable[$v]] = $bidtable[$vo]; 01349 } 01350 } 01351 $vmarked[] = $v; 01352 } 01353 /* for unidirectional array fill to convert tables */ 01354 if ( ( $manLevel[$v] == 'bidirectional' || $manLevel[$v] == 'unidirectional' ) 01355 && isset( $unidtable[$v] ) ) 01356 { 01357 if ( isset( $this->mConvTable[$v] ) ) { 01358 $this->mConvTable[$v] = array_merge( $this->mConvTable[$v], $unidtable[$v] ); 01359 } else { 01360 $this->mConvTable[$v] = $unidtable[$v]; 01361 } 01362 } 01363 } 01364 } 01365 01370 public function parse( $variant = null ) { 01371 if ( !$variant ) { 01372 $variant = $this->mConverter->getPreferredVariant(); 01373 } 01374 01375 $this->parseFlags(); 01376 $flags = $this->mFlags; 01377 01378 // convert to specified variant 01379 // syntax: -{zh-hans;zh-hant[;...]|<text to convert>}- 01380 if ( $this->mVariantFlags ) { 01381 // check if current variant in flags 01382 if ( isset( $this->mVariantFlags[$variant] ) ) { 01383 // then convert <text to convert> to current language 01384 $this->mRules = $this->mConverter->autoConvert( $this->mRules, 01385 $variant ); 01386 } else { // if current variant no in flags, 01387 // then we check its fallback variants. 01388 $variantFallbacks = 01389 $this->mConverter->getVariantFallbacks( $variant ); 01390 if( is_array( $variantFallbacks ) ) { 01391 foreach ( $variantFallbacks as $variantFallback ) { 01392 // if current variant's fallback exist in flags 01393 if ( isset( $this->mVariantFlags[$variantFallback] ) ) { 01394 // then convert <text to convert> to fallback language 01395 $this->mRules = 01396 $this->mConverter->autoConvert( $this->mRules, 01397 $variantFallback ); 01398 break; 01399 } 01400 } 01401 } 01402 } 01403 $this->mFlags = $flags = array( 'R' => true ); 01404 } 01405 01406 if ( !isset( $flags['R'] ) && !isset( $flags['N'] ) ) { 01407 // decode => HTML entities modified by Sanitizer::removeHTMLtags 01408 $this->mRules = str_replace( '=>', '=>', $this->mRules ); 01409 $this->parseRules(); 01410 } 01411 $rules = $this->mRules; 01412 01413 if ( !$this->mBidtable && !$this->mUnidtable ) { 01414 if ( isset( $flags['+'] ) || isset( $flags['-'] ) ) { 01415 // fill all variants if text in -{A/H/-|text} without rules 01416 foreach ( $this->mConverter->mVariants as $v ) { 01417 $this->mBidtable[$v] = $rules; 01418 } 01419 } elseif ( !isset( $flags['N'] ) && !isset( $flags['T'] ) ) { 01420 $this->mFlags = $flags = array( 'R' => true ); 01421 } 01422 } 01423 01424 $this->mRuleDisplay = false; 01425 foreach ( $flags as $flag => $unused ) { 01426 switch ( $flag ) { 01427 case 'R': 01428 // if we don't do content convert, still strip the -{}- tags 01429 $this->mRuleDisplay = $rules; 01430 break; 01431 case 'N': 01432 // process N flag: output current variant name 01433 $ruleVar = trim( $rules ); 01434 if ( isset( $this->mConverter->mVariantNames[$ruleVar] ) ) { 01435 $this->mRuleDisplay = $this->mConverter->mVariantNames[$ruleVar]; 01436 } else { 01437 $this->mRuleDisplay = ''; 01438 } 01439 break; 01440 case 'D': 01441 // process D flag: output rules description 01442 $this->mRuleDisplay = $this->getRulesDesc(); 01443 break; 01444 case 'H': 01445 // process H,- flag or T only: output nothing 01446 $this->mRuleDisplay = ''; 01447 break; 01448 case '-': 01449 $this->mRulesAction = 'remove'; 01450 $this->mRuleDisplay = ''; 01451 break; 01452 case '+': 01453 $this->mRulesAction = 'add'; 01454 $this->mRuleDisplay = ''; 01455 break; 01456 case 'S': 01457 $this->mRuleDisplay = $this->getRuleConvertedStr( $variant ); 01458 break; 01459 case 'T': 01460 $this->mRuleTitle = $this->getRuleConvertedStr( $variant ); 01461 $this->mRuleDisplay = ''; 01462 break; 01463 default: 01464 // ignore unknown flags (but see error case below) 01465 } 01466 } 01467 if ( $this->mRuleDisplay === false ) { 01468 $this->mRuleDisplay = $this->mManualCodeError; 01469 } 01470 01471 $this->generateConvTable(); 01472 } 01473 01477 public function hasRules() { 01478 // TODO: 01479 } 01480 01485 public function getDisplay() { 01486 return $this->mRuleDisplay; 01487 } 01488 01493 public function getTitle() { 01494 return $this->mRuleTitle; 01495 } 01496 01501 public function getRulesAction() { 01502 return $this->mRulesAction; 01503 } 01504 01510 public function getConvTable() { 01511 return $this->mConvTable; 01512 } 01513 01518 public function getRules() { 01519 return $this->mRules; 01520 } 01521 01526 public function getFlags() { 01527 return $this->mFlags; 01528 } 01529 }