MediaWiki  REL1_24
ConverterRule.php
Go to the documentation of this file.
00001 <?php
00027 class ConverterRule {
00028     public $mText; // original text in -{text}-
00029     public $mConverter; // LanguageConverter object
00030     public $mRuleDisplay = '';
00031     public $mRuleTitle = false;
00032     public $mRules = '';// string : the text of the rules
00033     public $mRulesAction = 'none';
00034     public $mFlags = array();
00035     public $mVariantFlags = array();
00036     public $mConvTable = array();
00037     public $mBidtable = array();// array of the translation in each variant
00038     public $mUnidtable = array();// array of the translation in each variant
00039 
00046     public function __construct( $text, $converter ) {
00047         $this->mText = $text;
00048         $this->mConverter = $converter;
00049     }
00050 
00057     public function getTextInBidtable( $variants ) {
00058         $variants = (array)$variants;
00059         if ( !$variants ) {
00060             return false;
00061         }
00062         foreach ( $variants as $variant ) {
00063             if ( isset( $this->mBidtable[$variant] ) ) {
00064                 return $this->mBidtable[$variant];
00065             }
00066         }
00067         return false;
00068     }
00069 
00074     function parseFlags() {
00075         $text = $this->mText;
00076         $flags = array();
00077         $variantFlags = array();
00078 
00079         $sepPos = strpos( $text, '|' );
00080         if ( $sepPos !== false ) {
00081             $validFlags = $this->mConverter->mFlags;
00082             $f = StringUtils::explode( ';', substr( $text, 0, $sepPos ) );
00083             foreach ( $f as $ff ) {
00084                 $ff = trim( $ff );
00085                 if ( isset( $validFlags[$ff] ) ) {
00086                     $flags[$validFlags[$ff]] = true;
00087                 }
00088             }
00089             $text = strval( substr( $text, $sepPos + 1 ) );
00090         }
00091 
00092         if ( !$flags ) {
00093             $flags['S'] = true;
00094         } elseif ( isset( $flags['R'] ) ) {
00095             $flags = array( 'R' => true );// remove other flags
00096         } elseif ( isset( $flags['N'] ) ) {
00097             $flags = array( 'N' => true );// remove other flags
00098         } elseif ( isset( $flags['-'] ) ) {
00099             $flags = array( '-' => true );// remove other flags
00100         } elseif ( count( $flags ) == 1 && isset( $flags['T'] ) ) {
00101             $flags['H'] = true;
00102         } elseif ( isset( $flags['H'] ) ) {
00103             // replace A flag, and remove other flags except T
00104             $temp = array( '+' => true, 'H' => true );
00105             if ( isset( $flags['T'] ) ) {
00106                 $temp['T'] = true;
00107             }
00108             if ( isset( $flags['D'] ) ) {
00109                 $temp['D'] = true;
00110             }
00111             $flags = $temp;
00112         } else {
00113             if ( isset( $flags['A'] ) ) {
00114                 $flags['+'] = true;
00115                 $flags['S'] = true;
00116             }
00117             if ( isset( $flags['D'] ) ) {
00118                 unset( $flags['S'] );
00119             }
00120             // try to find flags like "zh-hans", "zh-hant"
00121             // allow syntaxes like "-{zh-hans;zh-hant|XXXX}-"
00122             $variantFlags = array_intersect( array_keys( $flags ), $this->mConverter->mVariants );
00123             if ( $variantFlags ) {
00124                 $variantFlags = array_flip( $variantFlags );
00125                 $flags = array();
00126             }
00127         }
00128         $this->mVariantFlags = $variantFlags;
00129         $this->mRules = $text;
00130         $this->mFlags = $flags;
00131     }
00132 
00137     function parseRules() {
00138         $rules = $this->mRules;
00139         $bidtable = array();
00140         $unidtable = array();
00141         $variants = $this->mConverter->mVariants;
00142         $varsep_pattern = $this->mConverter->getVarSeparatorPattern();
00143 
00144         // Split according to $varsep_pattern, but ignore semicolons from HTML entities
00145         $rules = preg_replace( '/(&[#a-zA-Z0-9]+);/', "$1\x01", $rules );
00146         $choice = preg_split( $varsep_pattern, $rules );
00147         $choice = str_replace( "\x01", ';', $choice );
00148 
00149         foreach ( $choice as $c ) {
00150             $v = explode( ':', $c, 2 );
00151             if ( count( $v ) != 2 ) {
00152                 // syntax error, skip
00153                 continue;
00154             }
00155             $to = trim( $v[1] );
00156             $v = trim( $v[0] );
00157             $u = explode( '=>', $v, 2 );
00158             // if $to is empty, strtr() could return a wrong result
00159             if ( count( $u ) == 1 && $to && in_array( $v, $variants ) ) {
00160                 $bidtable[$v] = $to;
00161             } elseif ( count( $u ) == 2 ) {
00162                 $from = trim( $u[0] );
00163                 $v = trim( $u[1] );
00164                 if ( array_key_exists( $v, $unidtable )
00165                     && !is_array( $unidtable[$v] )
00166                     && $to
00167                     && in_array( $v, $variants ) ) {
00168                     $unidtable[$v] = array( $from => $to );
00169                 } elseif ( $to && in_array( $v, $variants ) ) {
00170                     $unidtable[$v][$from] = $to;
00171                 }
00172             }
00173             // syntax error, pass
00174             if ( !isset( $this->mConverter->mVariantNames[$v] ) ) {
00175                 $bidtable = array();
00176                 $unidtable = array();
00177                 break;
00178             }
00179         }
00180         $this->mBidtable = $bidtable;
00181         $this->mUnidtable = $unidtable;
00182     }
00183 
00189     function getRulesDesc() {
00190         $codesep = $this->mConverter->mDescCodeSep;
00191         $varsep = $this->mConverter->mDescVarSep;
00192         $text = '';
00193         foreach ( $this->mBidtable as $k => $v ) {
00194             $text .= $this->mConverter->mVariantNames[$k] . "$codesep$v$varsep";
00195         }
00196         foreach ( $this->mUnidtable as $k => $a ) {
00197             foreach ( $a as $from => $to ) {
00198                 $text .= $from . '⇒' . $this->mConverter->mVariantNames[$k] .
00199                     "$codesep$to$varsep";
00200             }
00201         }
00202         return $text;
00203     }
00204 
00213     function getRuleConvertedStr( $variant ) {
00214         $bidtable = $this->mBidtable;
00215         $unidtable = $this->mUnidtable;
00216 
00217         if ( count( $bidtable ) + count( $unidtable ) == 0 ) {
00218             return $this->mRules;
00219         } else {
00220             // display current variant in bidirectional array
00221             $disp = $this->getTextInBidtable( $variant );
00222             // or display current variant in fallbacks
00223             if ( !$disp ) {
00224                 $disp = $this->getTextInBidtable(
00225                     $this->mConverter->getVariantFallbacks( $variant ) );
00226             }
00227             // or display current variant in unidirectional array
00228             if ( !$disp && array_key_exists( $variant, $unidtable ) ) {
00229                 $disp = array_values( $unidtable[$variant] );
00230                 $disp = $disp[0];
00231             }
00232             // or display frist text under disable manual convert
00233             if ( !$disp && $this->mConverter->mManualLevel[$variant] == 'disable' ) {
00234                 if ( count( $bidtable ) > 0 ) {
00235                     $disp = array_values( $bidtable );
00236                     $disp = $disp[0];
00237                 } else {
00238                     $disp = array_values( $unidtable );
00239                     $disp = array_values( $disp[0] );
00240                     $disp = $disp[0];
00241                 }
00242             }
00243             return $disp;
00244         }
00245     }
00246 
00257     function getRuleConvertedTitle( $variant ) {
00258         if ( $variant === $this->mConverter->mMainLanguageCode ) {
00259             // If a string targeting exactly this variant is set,
00260             // use it. Otherwise, just return false, so the real
00261             // page name can be shown (and because variant === main,
00262             // there'll be no further automatic conversion).
00263             $disp = $this->getTextInBidtable( $variant );
00264             if ( $disp ) {
00265                 return $disp;
00266             }
00267             if ( array_key_exists( $variant, $this->mUnidtable ) ) {
00268                 $disp = array_values( $this->mUnidtable[$variant] );
00269                 $disp = $disp[0];
00270             }
00271             // Assigned above or still false.
00272             return $disp;
00273         } else {
00274             return $this->getRuleConvertedStr( $variant );
00275         }
00276     }
00277 
00282     function generateConvTable() {
00283         // Special case optimisation
00284         if ( !$this->mBidtable && !$this->mUnidtable ) {
00285             $this->mConvTable = array();
00286             return;
00287         }
00288 
00289         $bidtable = $this->mBidtable;
00290         $unidtable = $this->mUnidtable;
00291         $manLevel = $this->mConverter->mManualLevel;
00292 
00293         $vmarked = array();
00294         foreach ( $this->mConverter->mVariants as $v ) {
00295             /* for bidirectional array
00296                 fill in the missing variants, if any,
00297                 with fallbacks */
00298             if ( !isset( $bidtable[$v] ) ) {
00299                 $variantFallbacks =
00300                     $this->mConverter->getVariantFallbacks( $v );
00301                 $vf = $this->getTextInBidtable( $variantFallbacks );
00302                 if ( $vf ) {
00303                     $bidtable[$v] = $vf;
00304                 }
00305             }
00306 
00307             if ( isset( $bidtable[$v] ) ) {
00308                 foreach ( $vmarked as $vo ) {
00309                     // use syntax: -{A|zh:WordZh;zh-tw:WordTw}-
00310                     // or -{H|zh:WordZh;zh-tw:WordTw}-
00311                     // or -{-|zh:WordZh;zh-tw:WordTw}-
00312                     // to introduce a custom mapping between
00313                     // words WordZh and WordTw in the whole text
00314                     if ( $manLevel[$v] == 'bidirectional' ) {
00315                         $this->mConvTable[$v][$bidtable[$vo]] = $bidtable[$v];
00316                     }
00317                     if ( $manLevel[$vo] == 'bidirectional' ) {
00318                         $this->mConvTable[$vo][$bidtable[$v]] = $bidtable[$vo];
00319                     }
00320                 }
00321                 $vmarked[] = $v;
00322             }
00323             /* for unidirectional array fill to convert tables */
00324             if ( ( $manLevel[$v] == 'bidirectional' || $manLevel[$v] == 'unidirectional' )
00325                 && isset( $unidtable[$v] )
00326             ) {
00327                 if ( isset( $this->mConvTable[$v] ) ) {
00328                     $this->mConvTable[$v] = array_merge( $this->mConvTable[$v], $unidtable[$v] );
00329                 } else {
00330                     $this->mConvTable[$v] = $unidtable[$v];
00331                 }
00332             }
00333         }
00334     }
00335 
00340     public function parse( $variant = null ) {
00341         if ( !$variant ) {
00342             $variant = $this->mConverter->getPreferredVariant();
00343         }
00344 
00345         $this->parseFlags();
00346         $flags = $this->mFlags;
00347 
00348         // convert to specified variant
00349         // syntax: -{zh-hans;zh-hant[;...]|<text to convert>}-
00350         if ( $this->mVariantFlags ) {
00351             // check if current variant in flags
00352             if ( isset( $this->mVariantFlags[$variant] ) ) {
00353                 // then convert <text to convert> to current language
00354                 $this->mRules = $this->mConverter->autoConvert( $this->mRules,
00355                     $variant );
00356             } else {
00357                 // if current variant no in flags,
00358                 // then we check its fallback variants.
00359                 $variantFallbacks =
00360                     $this->mConverter->getVariantFallbacks( $variant );
00361                 if ( is_array( $variantFallbacks ) ) {
00362                     foreach ( $variantFallbacks as $variantFallback ) {
00363                         // if current variant's fallback exist in flags
00364                         if ( isset( $this->mVariantFlags[$variantFallback] ) ) {
00365                             // then convert <text to convert> to fallback language
00366                             $this->mRules =
00367                                 $this->mConverter->autoConvert( $this->mRules,
00368                                     $variantFallback );
00369                             break;
00370                         }
00371                     }
00372                 }
00373             }
00374             $this->mFlags = $flags = array( 'R' => true );
00375         }
00376 
00377         if ( !isset( $flags['R'] ) && !isset( $flags['N'] ) ) {
00378             // decode => HTML entities modified by Sanitizer::removeHTMLtags
00379             $this->mRules = str_replace( '=&gt;', '=>', $this->mRules );
00380             $this->parseRules();
00381         }
00382         $rules = $this->mRules;
00383 
00384         if ( !$this->mBidtable && !$this->mUnidtable ) {
00385             if ( isset( $flags['+'] ) || isset( $flags['-'] ) ) {
00386                 // fill all variants if text in -{A/H/-|text} without rules
00387                 foreach ( $this->mConverter->mVariants as $v ) {
00388                     $this->mBidtable[$v] = $rules;
00389                 }
00390             } elseif ( !isset( $flags['N'] ) && !isset( $flags['T'] ) ) {
00391                 $this->mFlags = $flags = array( 'R' => true );
00392             }
00393         }
00394 
00395         $this->mRuleDisplay = false;
00396         foreach ( $flags as $flag => $unused ) {
00397             switch ( $flag ) {
00398                 case 'R':
00399                     // if we don't do content convert, still strip the -{}- tags
00400                     $this->mRuleDisplay = $rules;
00401                     break;
00402                 case 'N':
00403                     // process N flag: output current variant name
00404                     $ruleVar = trim( $rules );
00405                     if ( isset( $this->mConverter->mVariantNames[$ruleVar] ) ) {
00406                         $this->mRuleDisplay = $this->mConverter->mVariantNames[$ruleVar];
00407                     } else {
00408                         $this->mRuleDisplay = '';
00409                     }
00410                     break;
00411                 case 'D':
00412                     // process D flag: output rules description
00413                     $this->mRuleDisplay = $this->getRulesDesc();
00414                     break;
00415                 case 'H':
00416                     // process H,- flag or T only: output nothing
00417                     $this->mRuleDisplay = '';
00418                     break;
00419                 case '-':
00420                     $this->mRulesAction = 'remove';
00421                     $this->mRuleDisplay = '';
00422                     break;
00423                 case '+':
00424                     $this->mRulesAction = 'add';
00425                     $this->mRuleDisplay = '';
00426                     break;
00427                 case 'S':
00428                     $this->mRuleDisplay = $this->getRuleConvertedStr( $variant );
00429                     break;
00430                 case 'T':
00431                     $this->mRuleTitle = $this->getRuleConvertedTitle( $variant );
00432                     $this->mRuleDisplay = '';
00433                     break;
00434                 default:
00435                     // ignore unknown flags (but see error case below)
00436             }
00437         }
00438         if ( $this->mRuleDisplay === false ) {
00439             $this->mRuleDisplay = '<span class="error">'
00440                 . wfMessage( 'converter-manual-rule-error' )->inContentLanguage()->escaped()
00441                 . '</span>';
00442         }
00443 
00444         $this->generateConvTable();
00445     }
00446 
00450     public function hasRules() {
00451         // TODO:
00452     }
00453 
00458     public function getDisplay() {
00459         return $this->mRuleDisplay;
00460     }
00461 
00466     public function getTitle() {
00467         return $this->mRuleTitle;
00468     }
00469 
00474     public function getRulesAction() {
00475         return $this->mRulesAction;
00476     }
00477 
00483     public function getConvTable() {
00484         return $this->mConvTable;
00485     }
00486 
00491     public function getRules() {
00492         return $this->mRules;
00493     }
00494 
00499     public function getFlags() {
00500         return $this->mFlags;
00501     }
00502 }