[ Index ]

PHP Cross Reference of MediaWiki-1.24.0

title

Body

[close]

/languages/ -> LanguageConverter.php (source)

   1  <?php
   2  /**
   3   * This program is free software; you can redistribute it and/or modify
   4   * it under the terms of the GNU General Public License as published by
   5   * the Free Software Foundation; either version 2 of the License, or
   6   * (at your option) any later version.
   7   *
   8   * This program is distributed in the hope that it will be useful,
   9   * but WITHOUT ANY WARRANTY; without even the implied warranty of
  10   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11   * GNU General Public License for more details.
  12   *
  13   * You should have received a copy of the GNU General Public License along
  14   * with this program; if not, write to the Free Software Foundation, Inc.,
  15   * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  16   * http://www.gnu.org/copyleft/gpl.html
  17   *
  18   * @file
  19   * @ingroup Language
  20   */
  21  
  22  /**
  23   * Base class for language conversion.
  24   * @ingroup Language
  25   *
  26   * @author Zhengzhu Feng <[email protected]>
  27   * @author fdcn <[email protected]>
  28   * @author shinjiman <[email protected]>
  29   * @author PhiLiP <[email protected]>
  30   */
  31  class LanguageConverter {
  32      /**
  33       * languages supporting variants
  34       * @since 1.20
  35       * @var array
  36       */
  37      static public $languagesWithVariants = array(
  38          'gan',
  39          'iu',
  40          'kk',
  41          'ku',
  42          'shi',
  43          'sr',
  44          'tg',
  45          'uz',
  46          'zh',
  47      );
  48  
  49      public $mMainLanguageCode;
  50      public $mVariants, $mVariantFallbacks, $mVariantNames;
  51      public $mTablesLoaded = false;
  52      public $mTables;
  53      // 'bidirectional' 'unidirectional' 'disable' for each variant
  54      public $mManualLevel;
  55  
  56      /**
  57       * @var string Memcached key name
  58       */
  59      public $mCacheKey;
  60  
  61      public $mLangObj;
  62      public $mFlags;
  63      public $mDescCodeSep = ':', $mDescVarSep = ';';
  64      public $mUcfirst = false;
  65      public $mConvRuleTitle = false;
  66      public $mURLVariant;
  67      public $mUserVariant;
  68      public $mHeaderVariant;
  69      public $mMaxDepth = 10;
  70      public $mVarSeparatorPattern;
  71  
  72      const CACHE_VERSION_KEY = 'VERSION 7';
  73  
  74      /**
  75       * Constructor
  76       *
  77       * @param Language $langobj
  78       * @param string $maincode The main language code of this language
  79       * @param array $variants The supported variants of this language
  80       * @param array $variantfallbacks The fallback language of each variant
  81       * @param array $flags Defining the custom strings that maps to the flags
  82       * @param array $manualLevel Limit for supported variants
  83       */
  84  	public function __construct( $langobj, $maincode, $variants = array(),
  85                                  $variantfallbacks = array(), $flags = array(),
  86                                  $manualLevel = array() ) {
  87          global $wgDisabledVariants;
  88          $this->mLangObj = $langobj;
  89          $this->mMainLanguageCode = $maincode;
  90          $this->mVariants = array_diff( $variants, $wgDisabledVariants );
  91          $this->mVariantFallbacks = $variantfallbacks;
  92          $this->mVariantNames = Language::fetchLanguageNames();
  93          $this->mCacheKey = wfMemcKey( 'conversiontables', $maincode );
  94          $defaultflags = array(
  95              // 'S' show converted text
  96              // '+' add rules for alltext
  97              // 'E' the gave flags is error
  98              // these flags above are reserved for program
  99              'A' => 'A',      // add rule for convert code (all text convert)
 100              'T' => 'T',      // title convert
 101              'R' => 'R',      // raw content
 102              'D' => 'D',      // convert description (subclass implement)
 103              '-' => '-',      // remove convert (not implement)
 104              'H' => 'H',      // add rule for convert code (but no display in placed code)
 105              'N' => 'N'      // current variant name
 106          );
 107          $this->mFlags = array_merge( $defaultflags, $flags );
 108          foreach ( $this->mVariants as $v ) {
 109              if ( array_key_exists( $v, $manualLevel ) ) {
 110                  $this->mManualLevel[$v] = $manualLevel[$v];
 111              } else {
 112                  $this->mManualLevel[$v] = 'bidirectional';
 113              }
 114              $this->mFlags[$v] = $v;
 115          }
 116      }
 117  
 118      /**
 119       * Get all valid variants.
 120       * Call this instead of using $this->mVariants directly.
 121       *
 122       * @return array Contains all valid variants
 123       */
 124  	public function getVariants() {
 125          return $this->mVariants;
 126      }
 127  
 128      /**
 129       * In case some variant is not defined in the markup, we need
 130       * to have some fallback. For example, in zh, normally people
 131       * will define zh-hans and zh-hant, but less so for zh-sg or zh-hk.
 132       * when zh-sg is preferred but not defined, we will pick zh-hans
 133       * in this case. Right now this is only used by zh.
 134       *
 135       * @param string $variant The language code of the variant
 136       * @return string|array The code of the fallback language or the
 137       *   main code if there is no fallback
 138       */
 139  	public function getVariantFallbacks( $variant ) {
 140          if ( isset( $this->mVariantFallbacks[$variant] ) ) {
 141              return $this->mVariantFallbacks[$variant];
 142          }
 143          return $this->mMainLanguageCode;
 144      }
 145  
 146      /**
 147       * Get the title produced by the conversion rule.
 148       * @return string The converted title text
 149       */
 150  	public function getConvRuleTitle() {
 151          return $this->mConvRuleTitle;
 152      }
 153  
 154      /**
 155       * Get preferred language variant.
 156       * @return string The preferred language code
 157       */
 158  	public function getPreferredVariant() {
 159          global $wgDefaultLanguageVariant, $wgUser;
 160  
 161          $req = $this->getURLVariant();
 162  
 163          if ( $wgUser->isLoggedIn() && !$req ) {
 164              $req = $this->getUserVariant();
 165          } elseif ( !$req ) {
 166              $req = $this->getHeaderVariant();
 167          }
 168  
 169          if ( $wgDefaultLanguageVariant && !$req ) {
 170              $req = $this->validateVariant( $wgDefaultLanguageVariant );
 171          }
 172  
 173          // This function, unlike the other get*Variant functions, is
 174          // not memoized (i.e. there return value is not cached) since
 175          // new information might appear during processing after this
 176          // is first called.
 177          if ( $this->validateVariant( $req ) ) {
 178              return $req;
 179          }
 180          return $this->mMainLanguageCode;
 181      }
 182  
 183      /**
 184       * Get default variant.
 185       * This function would not be affected by user's settings
 186       * @return string The default variant code
 187       */
 188  	public function getDefaultVariant() {
 189          global $wgDefaultLanguageVariant;
 190  
 191          $req = $this->getURLVariant();
 192  
 193          if ( !$req ) {
 194              $req = $this->getHeaderVariant();
 195          }
 196  
 197          if ( $wgDefaultLanguageVariant && !$req ) {
 198              $req = $this->validateVariant( $wgDefaultLanguageVariant );
 199          }
 200  
 201          if ( $req ) {
 202              return $req;
 203          }
 204          return $this->mMainLanguageCode;
 205      }
 206  
 207      /**
 208       * Validate the variant
 209       * @param string $variant The variant to validate
 210       * @return mixed Returns the variant if it is valid, null otherwise
 211       */
 212  	public function validateVariant( $variant = null ) {
 213          if ( $variant !== null && in_array( $variant, $this->mVariants ) ) {
 214              return $variant;
 215          }
 216          return null;
 217      }
 218  
 219      /**
 220       * Get the variant specified in the URL
 221       *
 222       * @return mixed Variant if one found, false otherwise.
 223       */
 224  	public function getURLVariant() {
 225          global $wgRequest;
 226  
 227          if ( $this->mURLVariant ) {
 228              return $this->mURLVariant;
 229          }
 230  
 231          // see if the preference is set in the request
 232          $ret = $wgRequest->getText( 'variant' );
 233  
 234          if ( !$ret ) {
 235              $ret = $wgRequest->getVal( 'uselang' );
 236          }
 237  
 238          $this->mURLVariant = $this->validateVariant( $ret );
 239          return $this->mURLVariant;
 240      }
 241  
 242      /**
 243       * Determine if the user has a variant set.
 244       *
 245       * @return mixed Variant if one found, false otherwise.
 246       */
 247  	protected function getUserVariant() {
 248          global $wgUser, $wgContLang;
 249  
 250          // memoizing this function wreaks havoc on parserTest.php
 251          /*
 252          if ( $this->mUserVariant ) {
 253              return $this->mUserVariant;
 254          }
 255          */
 256  
 257          // Get language variant preference from logged in users
 258          // Don't call this on stub objects because that causes infinite
 259          // recursion during initialisation
 260          if ( $wgUser->isLoggedIn() ) {
 261              if ( $this->mMainLanguageCode == $wgContLang->getCode() ) {
 262                  $ret = $wgUser->getOption( 'variant' );
 263              } else {
 264                  $ret = $wgUser->getOption( 'variant-' . $this->mMainLanguageCode );
 265              }
 266          } else {
 267              // figure out user lang without constructing wgLang to avoid
 268              // infinite recursion
 269              $ret = $wgUser->getOption( 'language' );
 270          }
 271  
 272          $this->mUserVariant = $this->validateVariant( $ret );
 273          return $this->mUserVariant;
 274      }
 275  
 276      /**
 277       * Determine the language variant from the Accept-Language header.
 278       *
 279       * @return mixed Variant if one found, false otherwise.
 280       */
 281  	protected function getHeaderVariant() {
 282          global $wgRequest;
 283  
 284          if ( $this->mHeaderVariant ) {
 285              return $this->mHeaderVariant;
 286          }
 287  
 288          // see if some supported language variant is set in the
 289          // HTTP header.
 290          $languages = array_keys( $wgRequest->getAcceptLang() );
 291          if ( empty( $languages ) ) {
 292              return null;
 293          }
 294  
 295          $fallbackLanguages = array();
 296          foreach ( $languages as $language ) {
 297              $this->mHeaderVariant = $this->validateVariant( $language );
 298              if ( $this->mHeaderVariant ) {
 299                  break;
 300              }
 301  
 302              // To see if there are fallbacks of current language.
 303              // We record these fallback variants, and process
 304              // them later.
 305              $fallbacks = $this->getVariantFallbacks( $language );
 306              if ( is_string( $fallbacks ) && $fallbacks !== $this->mMainLanguageCode ) {
 307                  $fallbackLanguages[] = $fallbacks;
 308              } elseif ( is_array( $fallbacks ) ) {
 309                  $fallbackLanguages =
 310                      array_merge( $fallbackLanguages, $fallbacks );
 311              }
 312          }
 313  
 314          if ( !$this->mHeaderVariant ) {
 315              // process fallback languages now
 316              $fallback_languages = array_unique( $fallbackLanguages );
 317              foreach ( $fallback_languages as $language ) {
 318                  $this->mHeaderVariant = $this->validateVariant( $language );
 319                  if ( $this->mHeaderVariant ) {
 320                      break;
 321                  }
 322              }
 323          }
 324  
 325          return $this->mHeaderVariant;
 326      }
 327  
 328      /**
 329       * Dictionary-based conversion.
 330       * This function would not parse the conversion rules.
 331       * If you want to parse rules, try to use convert() or
 332       * convertTo().
 333       *
 334       * @param string $text The text to be converted
 335       * @param bool|string $toVariant The target language code
 336       * @return string The converted text
 337       */
 338  	public function autoConvert( $text, $toVariant = false ) {
 339          wfProfileIn( __METHOD__ );
 340  
 341          $this->loadTables();
 342  
 343          if ( !$toVariant ) {
 344              $toVariant = $this->getPreferredVariant();
 345              if ( !$toVariant ) {
 346                  wfProfileOut( __METHOD__ );
 347                  return $text;
 348              }
 349          }
 350  
 351          if ( $this->guessVariant( $text, $toVariant ) ) {
 352              wfProfileOut( __METHOD__ );
 353              return $text;
 354          }
 355  
 356          /* we convert everything except:
 357             1. HTML markups (anything between < and >)
 358             2. HTML entities
 359             3. placeholders created by the parser
 360          */
 361          global $wgParser;
 362          if ( isset( $wgParser ) && $wgParser->UniqPrefix() != '' ) {
 363              $marker = '|' . $wgParser->UniqPrefix() . '[\-a-zA-Z0-9]+';
 364          } else {
 365              $marker = '';
 366          }
 367  
 368          // this one is needed when the text is inside an HTML markup
 369          $htmlfix = '|<[^>]+$|^[^<>]*>';
 370  
 371          // disable convert to variants between <code> tags
 372          $codefix = '<code>.+?<\/code>|';
 373          // disable conversion of <script> tags
 374          $scriptfix = '<script.*?>.*?<\/script>|';
 375          // disable conversion of <pre> tags
 376          $prefix = '<pre.*?>.*?<\/pre>|';
 377  
 378          $reg = '/' . $codefix . $scriptfix . $prefix .
 379              '<[^>]+>|&[a-zA-Z#][a-z0-9]+;' . $marker . $htmlfix . '/s';
 380          $startPos = 0;
 381          $sourceBlob = '';
 382          $literalBlob = '';
 383  
 384          // Guard against delimiter nulls in the input
 385          $text = str_replace( "\000", '', $text );
 386  
 387          $markupMatches = null;
 388          $elementMatches = null;
 389          while ( $startPos < strlen( $text ) ) {
 390              if ( preg_match( $reg, $text, $markupMatches, PREG_OFFSET_CAPTURE, $startPos ) ) {
 391                  $elementPos = $markupMatches[0][1];
 392                  $element = $markupMatches[0][0];
 393              } else {
 394                  $elementPos = strlen( $text );
 395                  $element = '';
 396              }
 397  
 398              // Queue the part before the markup for translation in a batch
 399              $sourceBlob .= substr( $text, $startPos, $elementPos - $startPos ) . "\000";
 400  
 401              // Advance to the next position
 402              $startPos = $elementPos + strlen( $element );
 403  
 404              // Translate any alt or title attributes inside the matched element
 405              if ( $element !== ''
 406                  && preg_match( '/^(<[^>\s]*)\s([^>]*)(.*)$/', $element, $elementMatches )
 407              ) {
 408                  $attrs = Sanitizer::decodeTagAttributes( $elementMatches[2] );
 409                  $changed = false;
 410                  foreach ( array( 'title', 'alt' ) as $attrName ) {
 411                      if ( !isset( $attrs[$attrName] ) ) {
 412                          continue;
 413                      }
 414                      $attr = $attrs[$attrName];
 415                      // Don't convert URLs
 416                      if ( !strpos( $attr, '://' ) ) {
 417                          $attr = $this->recursiveConvertTopLevel( $attr, $toVariant );
 418                      }
 419  
 420                      // Remove HTML tags to avoid disrupting the layout
 421                      $attr = preg_replace( '/<[^>]+>/', '', $attr );
 422                      if ( $attr !== $attrs[$attrName] ) {
 423                          $attrs[$attrName] = $attr;
 424                          $changed = true;
 425                      }
 426                  }
 427                  if ( $changed ) {
 428                      $element = $elementMatches[1] . Html::expandAttributes( $attrs ) .
 429                          $elementMatches[3];
 430                  }
 431              }
 432              $literalBlob .= $element . "\000";
 433          }
 434  
 435          // Do the main translation batch
 436          $translatedBlob = $this->translate( $sourceBlob, $toVariant );
 437  
 438          // Put the output back together
 439          $translatedIter = StringUtils::explode( "\000", $translatedBlob );
 440          $literalIter = StringUtils::explode( "\000", $literalBlob );
 441          $output = '';
 442          while ( $translatedIter->valid() && $literalIter->valid() ) {
 443              $output .= $translatedIter->current();
 444              $output .= $literalIter->current();
 445              $translatedIter->next();
 446              $literalIter->next();
 447          }
 448  
 449          wfProfileOut( __METHOD__ );
 450          return $output;
 451      }
 452  
 453      /**
 454       * Translate a string to a variant.
 455       * Doesn't parse rules or do any of that other stuff, for that use
 456       * convert() or convertTo().
 457       *
 458       * @param string $text Text to convert
 459       * @param string $variant Variant language code
 460       * @return string Translated text
 461       */
 462  	public function translate( $text, $variant ) {
 463          wfProfileIn( __METHOD__ );
 464          // If $text is empty or only includes spaces, do nothing
 465          // Otherwise translate it
 466          if ( trim( $text ) ) {
 467              $this->loadTables();
 468              $text = $this->mTables[$variant]->replace( $text );
 469          }
 470          wfProfileOut( __METHOD__ );
 471          return $text;
 472      }
 473  
 474      /**
 475       * Call translate() to convert text to all valid variants.
 476       *
 477       * @param string $text The text to be converted
 478       * @return array Variant => converted text
 479       */
 480  	public function autoConvertToAllVariants( $text ) {
 481          wfProfileIn( __METHOD__ );
 482          $this->loadTables();
 483  
 484          $ret = array();
 485          foreach ( $this->mVariants as $variant ) {
 486              $ret[$variant] = $this->translate( $text, $variant );
 487          }
 488  
 489          wfProfileOut( __METHOD__ );
 490          return $ret;
 491      }
 492  
 493      /**
 494       * Apply manual conversion rules.
 495       *
 496       * @param ConverterRule $convRule
 497       */
 498  	protected function applyManualConv( $convRule ) {
 499          // Use syntax -{T|zh-cn:TitleCN; zh-tw:TitleTw}- to custom
 500          // title conversion.
 501          // Bug 24072: $mConvRuleTitle was overwritten by other manual
 502          // rule(s) not for title, this breaks the title conversion.
 503          $newConvRuleTitle = $convRule->getTitle();
 504          if ( $newConvRuleTitle ) {
 505              // So I add an empty check for getTitle()
 506              $this->mConvRuleTitle = $newConvRuleTitle;
 507          }
 508  
 509          // merge/remove manual conversion rules to/from global table
 510          $convTable = $convRule->getConvTable();
 511          $action = $convRule->getRulesAction();
 512          foreach ( $convTable as $variant => $pair ) {
 513              if ( !$this->validateVariant( $variant ) ) {
 514                  continue;
 515              }
 516  
 517              if ( $action == 'add' ) {
 518                  foreach ( $pair as $from => $to ) {
 519                      // to ensure that $from and $to not be left blank
 520                      // so $this->translate() could always return a string
 521                      if ( $from || $to ) {
 522                          // more efficient than array_merge(), about 2.5 times.
 523                          $this->mTables[$variant]->setPair( $from, $to );
 524                      }
 525                  }
 526              } elseif ( $action == 'remove' ) {
 527                  $this->mTables[$variant]->removeArray( $pair );
 528              }
 529          }
 530      }
 531  
 532      /**
 533       * Auto convert a Title object to a readable string in the
 534       * preferred variant.
 535       *
 536       * @param Title $title A object of Title
 537       * @return string Converted title text
 538       */
 539  	public function convertTitle( $title ) {
 540          $variant = $this->getPreferredVariant();
 541          $index = $title->getNamespace();
 542          if ( $index !== NS_MAIN ) {
 543              $text = $this->convertNamespace( $index, $variant ) . ':';
 544          } else {
 545              $text = '';
 546          }
 547          $text .= $this->translate( $title->getText(), $variant );
 548          return $text;
 549      }
 550  
 551      /**
 552       * Get the namespace display name in the preferred variant.
 553       *
 554       * @param int $index Namespace id
 555       * @param string|null $variant Variant code or null for preferred variant
 556       * @return string Namespace name for display
 557       */
 558  	public function convertNamespace( $index, $variant = null ) {
 559          if ( $variant === null ) {
 560              $variant = $this->getPreferredVariant();
 561          }
 562          if ( $index === NS_MAIN ) {
 563              return '';
 564          } else {
 565              // First check if a message gives a converted name in the target variant.
 566              $nsConvMsg = wfMessage( 'conversion-ns' . $index )->inLanguage( $variant );
 567              if ( $nsConvMsg->exists() ) {
 568                  return $nsConvMsg->plain();
 569              }
 570              // Then check if a message gives a converted name in content language
 571              // which needs extra translation to the target variant.
 572              $nsConvMsg = wfMessage( 'conversion-ns' . $index )->inContentLanguage();
 573              if ( $nsConvMsg->exists() ) {
 574                  return $this->translate( $nsConvMsg->plain(), $variant );
 575              }
 576              // No message exists, retrieve it from the target variant's namespace names.
 577              $langObj = $this->mLangObj->factory( $variant );
 578              return $langObj->getFormattedNsText( $index );
 579          }
 580      }
 581  
 582      /**
 583       * Convert text to different variants of a language. The automatic
 584       * conversion is done in autoConvert(). Here we parse the text
 585       * marked with -{}-, which specifies special conversions of the
 586       * text that can not be accomplished in autoConvert().
 587       *
 588       * Syntax of the markup:
 589       * -{code1:text1;code2:text2;...}-  or
 590       * -{flags|code1:text1;code2:text2;...}-  or
 591       * -{text}- in which case no conversion should take place for text
 592       *
 593       * @param string $text Text to be converted
 594       * @return string Converted text
 595       */
 596  	public function convert( $text ) {
 597          $variant = $this->getPreferredVariant();
 598          return $this->convertTo( $text, $variant );
 599      }
 600  
 601      /**
 602       * Same as convert() except a extra parameter to custom variant.
 603       *
 604       * @param string $text Text to be converted
 605       * @param string $variant The target variant code
 606       * @return string Converted text
 607       */
 608  	public function convertTo( $text, $variant ) {
 609          global $wgDisableLangConversion;
 610          if ( $wgDisableLangConversion ) {
 611              return $text;
 612          }
 613          // Reset converter state for a new converter run.
 614          $this->mConvRuleTitle = false;
 615          return $this->recursiveConvertTopLevel( $text, $variant );
 616      }
 617  
 618      /**
 619       * Recursively convert text on the outside. Allow to use nested
 620       * markups to custom rules.
 621       *
 622       * @param string $text Text to be converted
 623       * @param string $variant The target variant code
 624       * @param int $depth Depth of recursion
 625       * @return string Converted text
 626       */
 627  	protected function recursiveConvertTopLevel( $text, $variant, $depth = 0 ) {
 628          $startPos = 0;
 629          $out = '';
 630          $length = strlen( $text );
 631          $shouldConvert = !$this->guessVariant( $text, $variant );
 632  
 633          while ( $startPos < $length ) {
 634              $pos = strpos( $text, '-{', $startPos );
 635  
 636              if ( $pos === false ) {
 637                  // No more markup, append final segment
 638                  $fragment = substr( $text, $startPos );
 639                  $out .= $shouldConvert ? $this->autoConvert( $fragment, $variant ) : $fragment;
 640                  return $out;
 641              }
 642  
 643              // Markup found
 644              // Append initial segment
 645              $fragment = substr( $text, $startPos, $pos - $startPos );
 646              $out .= $shouldConvert ? $this->autoConvert( $fragment, $variant ) : $fragment;
 647  
 648              // Advance position
 649              $startPos = $pos;
 650  
 651              // Do recursive conversion
 652              $out .= $this->recursiveConvertRule( $text, $variant, $startPos, $depth + 1 );
 653          }
 654  
 655          return $out;
 656      }
 657  
 658      /**
 659       * Recursively convert text on the inside.
 660       *
 661       * @param string $text Text to be converted
 662       * @param string $variant The target variant code
 663       * @param int $startPos
 664       * @param int $depth Depth of recursion
 665       *
 666       * @throws MWException
 667       * @return string Converted text
 668       */
 669  	protected function recursiveConvertRule( $text, $variant, &$startPos, $depth = 0 ) {
 670          // Quick sanity check (no function calls)
 671          if ( $text[$startPos] !== '-' || $text[$startPos + 1] !== '{' ) {
 672              throw new MWException( __METHOD__ . ': invalid input string' );
 673          }
 674  
 675          $startPos += 2;
 676          $inner = '';
 677          $warningDone = false;
 678          $length = strlen( $text );
 679  
 680          while ( $startPos < $length ) {
 681              $m = false;
 682              preg_match( '/-\{|\}-/', $text, $m, PREG_OFFSET_CAPTURE, $startPos );
 683              if ( !$m ) {
 684                  // Unclosed rule
 685                  break;
 686              }
 687  
 688              $token = $m[0][0];
 689              $pos = $m[0][1];
 690  
 691              // Markup found
 692              // Append initial segment
 693              $inner .= substr( $text, $startPos, $pos - $startPos );
 694  
 695              // Advance position
 696              $startPos = $pos;
 697  
 698              switch ( $token ) {
 699                  case '-{':
 700                      // Check max depth
 701                      if ( $depth >= $this->mMaxDepth ) {
 702                          $inner .= '-{';
 703                          if ( !$warningDone ) {
 704                              $inner .= '<span class="error">' .
 705                                  wfMessage( 'language-converter-depth-warning' )
 706                                      ->numParams( $this->mMaxDepth )->inContentLanguage()->text() .
 707                                  '</span>';
 708                              $warningDone = true;
 709                          }
 710                          $startPos += 2;
 711                          continue;
 712                      }
 713                      // Recursively parse another rule
 714                      $inner .= $this->recursiveConvertRule( $text, $variant, $startPos, $depth + 1 );
 715                      break;
 716                  case '}-':
 717                      // Apply the rule
 718                      $startPos += 2;
 719                      $rule = new ConverterRule( $inner, $this );
 720                      $rule->parse( $variant );
 721                      $this->applyManualConv( $rule );
 722                      return $rule->getDisplay();
 723                  default:
 724                      throw new MWException( __METHOD__ . ': invalid regex match' );
 725              }
 726          }
 727  
 728          // Unclosed rule
 729          if ( $startPos < $length ) {
 730              $inner .= substr( $text, $startPos );
 731          }
 732          $startPos = $length;
 733          return '-{' . $this->autoConvert( $inner, $variant );
 734      }
 735  
 736      /**
 737       * If a language supports multiple variants, it is possible that
 738       * non-existing link in one variant actually exists in another variant.
 739       * This function tries to find it. See e.g. LanguageZh.php
 740       * The input parameters may be modified upon return
 741       *
 742       * @param string &$link The name of the link
 743       * @param Title &$nt The title object of the link
 744       * @param bool $ignoreOtherCond To disable other conditions when
 745       *   we need to transclude a template or update a category's link
 746       */
 747  	public function findVariantLink( &$link, &$nt, $ignoreOtherCond = false ) {
 748          # If the article has already existed, there is no need to
 749          # check it again, otherwise it may cause a fault.
 750          if ( is_object( $nt ) && $nt->exists() ) {
 751              return;
 752          }
 753  
 754          global $wgDisableLangConversion, $wgDisableTitleConversion, $wgRequest;
 755          $isredir = $wgRequest->getText( 'redirect', 'yes' );
 756          $action = $wgRequest->getText( 'action' );
 757          if ( $action == 'edit' && $wgRequest->getBool( 'redlink' ) ) {
 758              $action = 'view';
 759          }
 760          $linkconvert = $wgRequest->getText( 'linkconvert', 'yes' );
 761          $disableLinkConversion = $wgDisableLangConversion
 762              || $wgDisableTitleConversion;
 763          $linkBatch = new LinkBatch();
 764  
 765          $ns = NS_MAIN;
 766  
 767          if ( $disableLinkConversion ||
 768              ( !$ignoreOtherCond &&
 769                  ( $isredir == 'no'
 770                      || $action == 'edit'
 771                      || $action == 'submit'
 772                      || $linkconvert == 'no' ) ) ) {
 773              return;
 774          }
 775  
 776          if ( is_object( $nt ) ) {
 777              $ns = $nt->getNamespace();
 778          }
 779  
 780          $variants = $this->autoConvertToAllVariants( $link );
 781          if ( !$variants ) { // give up
 782              return;
 783          }
 784  
 785          $titles = array();
 786  
 787          foreach ( $variants as $v ) {
 788              if ( $v != $link ) {
 789                  $varnt = Title::newFromText( $v, $ns );
 790                  if ( !is_null( $varnt ) ) {
 791                      $linkBatch->addObj( $varnt );
 792                      $titles[] = $varnt;
 793                  }
 794              }
 795          }
 796  
 797          // fetch all variants in single query
 798          $linkBatch->execute();
 799  
 800          foreach ( $titles as $varnt ) {
 801              if ( $varnt->getArticleID() > 0 ) {
 802                  $nt = $varnt;
 803                  $link = $varnt->getText();
 804                  break;
 805              }
 806          }
 807      }
 808  
 809      /**
 810       * Returns language specific hash options.
 811       *
 812       * @return string
 813       */
 814  	public function getExtraHashOptions() {
 815          $variant = $this->getPreferredVariant();
 816  
 817          return '!' . $variant;
 818      }
 819  
 820      /**
 821       * Guess if a text is written in a variant. This should be implemented in subclasses.
 822       *
 823       * @param string $text The text to be checked
 824       * @param string $variant Language code of the variant to be checked for
 825       * @return bool True if $text appears to be written in $variant, false if not
 826       *
 827       * @author Nikola Smolenski <[email protected]>
 828       * @since 1.19
 829       */
 830  	public function guessVariant( $text, $variant ) {
 831          return false;
 832      }
 833  
 834      /**
 835       * Load default conversion tables.
 836       * This method must be implemented in derived class.
 837       *
 838       * @private
 839       * @throws MWException
 840       */
 841  	function loadDefaultTables() {
 842          $name = get_class( $this );
 843  
 844          throw new MWException( "Must implement loadDefaultTables() method in class $name" );
 845      }
 846  
 847      /**
 848       * Load conversion tables either from the cache or the disk.
 849       * @private
 850       * @param bool $fromCache Load from memcached? Defaults to true.
 851       */
 852  	function loadTables( $fromCache = true ) {
 853          global $wgLangConvMemc;
 854  
 855          if ( $this->mTablesLoaded ) {
 856              return;
 857          }
 858  
 859          wfProfileIn( __METHOD__ );
 860          $this->mTablesLoaded = true;
 861          $this->mTables = false;
 862          if ( $fromCache ) {
 863              wfProfileIn( __METHOD__ . '-cache' );
 864              $this->mTables = $wgLangConvMemc->get( $this->mCacheKey );
 865              wfProfileOut( __METHOD__ . '-cache' );
 866          }
 867          if ( !$this->mTables || !array_key_exists( self::CACHE_VERSION_KEY, $this->mTables ) ) {
 868              wfProfileIn( __METHOD__ . '-recache' );
 869              // not in cache, or we need a fresh reload.
 870              // We will first load the default tables
 871              // then update them using things in MediaWiki:Conversiontable/*
 872              $this->loadDefaultTables();
 873              foreach ( $this->mVariants as $var ) {
 874                  $cached = $this->parseCachedTable( $var );
 875                  $this->mTables[$var]->mergeArray( $cached );
 876              }
 877  
 878              $this->postLoadTables();
 879              $this->mTables[self::CACHE_VERSION_KEY] = true;
 880  
 881              $wgLangConvMemc->set( $this->mCacheKey, $this->mTables, 43200 );
 882              wfProfileOut( __METHOD__ . '-recache' );
 883          }
 884          wfProfileOut( __METHOD__ );
 885      }
 886  
 887      /**
 888       * Hook for post processing after conversion tables are loaded.
 889       */
 890  	function postLoadTables() {
 891      }
 892  
 893      /**
 894       * Reload the conversion tables.
 895       *
 896       * @private
 897       */
 898  	function reloadTables() {
 899          if ( $this->mTables ) {
 900              unset( $this->mTables );
 901          }
 902  
 903          $this->mTablesLoaded = false;
 904          $this->loadTables( false );
 905      }
 906  
 907      /**
 908       * Parse the conversion table stored in the cache.
 909       *
 910       * The tables should be in blocks of the following form:
 911       *        -{
 912       *            word => word ;
 913       *            word => word ;
 914       *            ...
 915       *        }-
 916       *
 917       * To make the tables more manageable, subpages are allowed
 918       * and will be parsed recursively if $recursive == true.
 919       *
 920       * @param string $code Language code
 921       * @param string $subpage Subpage name
 922       * @param bool $recursive Parse subpages recursively? Defaults to true.
 923       *
 924       * @return array
 925       */
 926  	function parseCachedTable( $code, $subpage = '', $recursive = true ) {
 927          static $parsed = array();
 928  
 929          $key = 'Conversiontable/' . $code;
 930          if ( $subpage ) {
 931              $key .= '/' . $subpage;
 932          }
 933          if ( array_key_exists( $key, $parsed ) ) {
 934              return array();
 935          }
 936  
 937          $parsed[$key] = true;
 938  
 939          if ( $subpage === '' ) {
 940              $txt = MessageCache::singleton()->getMsgFromNamespace( $key, $code );
 941          } else {
 942              $txt = false;
 943              $title = Title::makeTitleSafe( NS_MEDIAWIKI, $key );
 944              if ( $title && $title->exists() ) {
 945                  $revision = Revision::newFromTitle( $title );
 946                  if ( $revision ) {
 947                      if ( $revision->getContentModel() == CONTENT_MODEL_WIKITEXT ) {
 948                          $txt = $revision->getContent( Revision::RAW )->getNativeData();
 949                      }
 950  
 951                      // @todo in the future, use a specialized content model, perhaps based on json!
 952                  }
 953              }
 954          }
 955  
 956          # Nothing to parse if there's no text
 957          if ( $txt === false || $txt === null || $txt === '' ) {
 958              return array();
 959          }
 960  
 961          // get all subpage links of the form
 962          // [[MediaWiki:Conversiontable/zh-xx/...|...]]
 963          $linkhead = $this->mLangObj->getNsText( NS_MEDIAWIKI ) .
 964              ':Conversiontable';
 965          $subs = StringUtils::explode( '[[', $txt );
 966          $sublinks = array();
 967          foreach ( $subs as $sub ) {
 968              $link = explode( ']]', $sub, 2 );
 969              if ( count( $link ) != 2 ) {
 970                  continue;
 971              }
 972              $b = explode( '|', $link[0], 2 );
 973              $b = explode( '/', trim( $b[0] ), 3 );
 974              if ( count( $b ) == 3 ) {
 975                  $sublink = $b[2];
 976              } else {
 977                  $sublink = '';
 978              }
 979  
 980              if ( $b[0] == $linkhead && $b[1] == $code ) {
 981                  $sublinks[] = $sublink;
 982              }
 983          }
 984  
 985          // parse the mappings in this page
 986          $blocks = StringUtils::explode( '-{', $txt );
 987          $ret = array();
 988          $first = true;
 989          foreach ( $blocks as $block ) {
 990              if ( $first ) {
 991                  // Skip the part before the first -{
 992                  $first = false;
 993                  continue;
 994              }
 995              $mappings = explode( '}-', $block, 2 );
 996              $stripped = str_replace( array( "'", '"', '*', '#' ), '', $mappings[0] );
 997              $table = StringUtils::explode( ';', $stripped );
 998              foreach ( $table as $t ) {
 999                  $m = explode( '=>', $t, 3 );
1000                  if ( count( $m ) != 2 ) {
1001                      continue;
1002                  }
1003                  // trim any trailling comments starting with '//'
1004                  $tt = explode( '//', $m[1], 2 );
1005                  $ret[trim( $m[0] )] = trim( $tt[0] );
1006              }
1007          }
1008  
1009          // recursively parse the subpages
1010          if ( $recursive ) {
1011              foreach ( $sublinks as $link ) {
1012                  $s = $this->parseCachedTable( $code, $link, $recursive );
1013                  $ret = array_merge( $ret, $s );
1014              }
1015          }
1016  
1017          if ( $this->mUcfirst ) {
1018              foreach ( $ret as $k => $v ) {
1019                  $ret[$this->mLangObj->ucfirst( $k )] = $this->mLangObj->ucfirst( $v );
1020              }
1021          }
1022          return $ret;
1023      }
1024  
1025      /**
1026       * Enclose a string with the "no conversion" tag. This is used by
1027       * various functions in the Parser.
1028       *
1029       * @param string $text Text to be tagged for no conversion
1030       * @param bool $noParse Unused
1031       * @return string The tagged text
1032       */
1033  	public function markNoConversion( $text, $noParse = false ) {
1034          # don't mark if already marked
1035          if ( strpos( $text, '-{' ) || strpos( $text, '}-' ) ) {
1036              return $text;
1037          }
1038  
1039          $ret = "-{R|$text}-";
1040          return $ret;
1041      }
1042  
1043      /**
1044       * Convert the sorting key for category links. This should make different
1045       * keys that are variants of each other map to the same key.
1046       *
1047       * @param string $key
1048       *
1049       * @return string
1050       */
1051  	function convertCategoryKey( $key ) {
1052          return $key;
1053      }
1054  
1055      /**
1056       * Hook to refresh the cache of conversion tables when
1057       * MediaWiki:Conversiontable* is updated.
1058       * @private
1059       *
1060       * @param WikiPage $page
1061       * @param User $user User object for the current user
1062       * @param Content $content New page content
1063       * @param string $summary Edit summary of the edit
1064       * @param bool $isMinor Was the edit marked as minor?
1065       * @param null $isWatch Unused.
1066       * @param null $section Unused.
1067       * @param int $flags Bitfield
1068       * @param Revision|null $revision New Revision object or null
1069       * @return bool True
1070       */
1071  	function OnPageContentSaveComplete( $page, $user, $content, $summary, $isMinor,
1072              $isWatch, $section, $flags, $revision ) {
1073          $titleobj = $page->getTitle();
1074          if ( $titleobj->getNamespace() == NS_MEDIAWIKI ) {
1075              $title = $titleobj->getDBkey();
1076              $t = explode( '/', $title, 3 );
1077              $c = count( $t );
1078              if ( $c > 1 && $t[0] == 'Conversiontable' ) {
1079                  if ( $this->validateVariant( $t[1] ) ) {
1080                      $this->reloadTables();
1081                  }
1082              }
1083          }
1084          return true;
1085      }
1086  
1087      /**
1088       * Armour rendered math against conversion.
1089       * Escape special chars in parsed math text. (in most cases are img elements)
1090       *
1091       * @param string $text Text to armour against conversion
1092       * @return string Armoured text where { and } have been converted to
1093       *   &#123; and &#125;
1094       * @deprecated since 1.22 is no longer used
1095       */
1096  	public function armourMath( $text ) {
1097          // convert '-{' and '}-' to '-&#123;' and '&#125;-' to prevent
1098          // any unwanted markup appearing in the math image tag.
1099          $text = strtr( $text, array( '-{' => '-&#123;', '}-' => '&#125;-' ) );
1100          return $text;
1101      }
1102  
1103      /**
1104       * Get the cached separator pattern for ConverterRule::parseRules()
1105       * @return string
1106       */
1107  	function getVarSeparatorPattern() {
1108          if ( is_null( $this->mVarSeparatorPattern ) ) {
1109              // varsep_pattern for preg_split:
1110              // text should be splited by ";" only if a valid variant
1111              // name exist after the markup, for example:
1112              //  -{zh-hans:<span style="font-size:120%;">xxx</span>;zh-hant:\
1113              //    <span style="font-size:120%;">yyy</span>;}-
1114              // we should split it as:
1115              //  array(
1116              //      [0] => 'zh-hans:<span style="font-size:120%;">xxx</span>'
1117              //      [1] => 'zh-hant:<span style="font-size:120%;">yyy</span>'
1118              //      [2] => ''
1119              //     )
1120              $pat = '/;\s*(?=';
1121              foreach ( $this->mVariants as $variant ) {
1122                  // zh-hans:xxx;zh-hant:yyy
1123                  $pat .= $variant . '\s*:|';
1124                  // xxx=>zh-hans:yyy; xxx=>zh-hant:zzz
1125                  $pat .= '[^;]*?=>\s*' . $variant . '\s*:|';
1126              }
1127              $pat .= '\s*$)/';
1128              $this->mVarSeparatorPattern = $pat;
1129          }
1130          return $this->mVarSeparatorPattern;
1131      }
1132  }


Generated: Fri Nov 28 14:03:12 2014 Cross-referenced by PHPXref 0.7.1