MediaWiki  REL1_24
CLDRPluralRuleConverter.php
Go to the documentation of this file.
00001 <?php
00015 class CLDRPluralRuleConverter {
00021     public $rule;
00022 
00028     public $pos;
00029 
00035     public $end;
00036 
00042     public $operators = array();
00043 
00049     public $operands = array();
00050 
00056     private static $precedence = array(
00057         'or' => 2,
00058         'and' => 3,
00059         'is' => 4,
00060         'is-not' => 4,
00061         'in' => 4,
00062         'not-in' => 4,
00063         'within' => 4,
00064         'not-within' => 4,
00065         'mod' => 5,
00066         ',' => 6,
00067         '..' => 7,
00068     );
00069 
00073     const WHITESPACE_CLASS = " \t\r\n";
00074 
00079     const NUMBER_CLASS = '0123456789';
00080 
00084     const OPERAND_SYMBOLS = 'nivwft';
00085 
00089     const WORD_REGEX = '/[a-zA-Z@]+/A';
00090 
00097     public static function convert( $rule ) {
00098         $parser = new self( $rule );
00099 
00100         return $parser->doConvert();
00101     }
00102 
00107     protected function __construct( $rule ) {
00108         $this->rule = $rule;
00109         $this->pos = 0;
00110         $this->end = strlen( $rule );
00111     }
00112 
00118     protected function doConvert() {
00119         $expectOperator = true;
00120 
00121         // Iterate through all tokens, saving the operators and operands to a
00122         // stack per Dijkstra's shunting yard algorithm.
00124         while ( false !== ( $token = $this->nextToken() ) ) {
00125             // In this grammar, there are only binary operators, so every valid
00126             // rule string will alternate between operator and operand tokens.
00127             $expectOperator = !$expectOperator;
00128 
00129             if ( $token instanceof CLDRPluralRuleConverterExpression ) {
00130                 // Operand
00131                 if ( $expectOperator ) {
00132                     $token->error( 'unexpected operand' );
00133                 }
00134                 $this->operands[] = $token;
00135                 continue;
00136             } else {
00137                 // Operator
00138                 if ( !$expectOperator ) {
00139                     $token->error( 'unexpected operator' );
00140                 }
00141                 // Resolve higher precedence levels
00142                 $lastOp = end( $this->operators );
00143                 while ( $lastOp && self::$precedence[$token->name] <= self::$precedence[$lastOp->name] ) {
00144                     $this->doOperation( $lastOp, $this->operands );
00145                     array_pop( $this->operators );
00146                     $lastOp = end( $this->operators );
00147                 }
00148                 $this->operators[] = $token;
00149             }
00150         }
00151 
00152         // Finish off the stack
00153         while ( $op = array_pop( $this->operators ) ) {
00154             $this->doOperation( $op, $this->operands );
00155         }
00156 
00157         // Make sure the result is sane. The first case is possible for an empty
00158         // string input, the second should be unreachable.
00159         if ( !count( $this->operands ) ) {
00160             $this->error( 'condition expected' );
00161         } elseif ( count( $this->operands ) > 1 ) {
00162             $this->error( 'missing operator or too many operands' );
00163         }
00164 
00165         $value = $this->operands[0];
00166         if ( $value->type !== 'boolean' ) {
00167             $this->error( 'the result must have a boolean type' );
00168         }
00169 
00170         return $this->operands[0]->rpn;
00171     }
00172 
00178     protected function nextToken() {
00179         if ( $this->pos >= $this->end ) {
00180             return false;
00181         }
00182 
00183         // Whitespace
00184         $length = strspn( $this->rule, self::WHITESPACE_CLASS, $this->pos );
00185         $this->pos += $length;
00186 
00187         if ( $this->pos >= $this->end ) {
00188             return false;
00189         }
00190 
00191         // Number
00192         $length = strspn( $this->rule, self::NUMBER_CLASS, $this->pos );
00193         if ( $length !== 0 ) {
00194             $token = $this->newNumber( substr( $this->rule, $this->pos, $length ), $this->pos );
00195             $this->pos += $length;
00196 
00197             return $token;
00198         }
00199 
00200         // Two-character operators
00201         $op2 = substr( $this->rule, $this->pos, 2 );
00202         if ( $op2 === '..' || $op2 === '!=' ) {
00203             $token = $this->newOperator( $op2, $this->pos, 2 );
00204             $this->pos += 2;
00205 
00206             return $token;
00207         }
00208 
00209         // Single-character operators
00210         $op1 = $this->rule[$this->pos];
00211         if ( $op1 === ',' || $op1 === '=' || $op1 === '%' ) {
00212             $token = $this->newOperator( $op1, $this->pos, 1 );
00213             $this->pos++;
00214 
00215             return $token;
00216         }
00217 
00218         // Word
00219         if ( !preg_match( self::WORD_REGEX, $this->rule, $m, 0, $this->pos ) ) {
00220             $this->error( 'unexpected character "' . $this->rule[$this->pos] . '"' );
00221         }
00222         $word1 = strtolower( $m[0] );
00223         $word2 = '';
00224         $nextTokenPos = $this->pos + strlen( $word1 );
00225         if ( $word1 === 'not' || $word1 === 'is' ) {
00226             // Look ahead one word
00227             $nextTokenPos += strspn( $this->rule, self::WHITESPACE_CLASS, $nextTokenPos );
00228             if ( $nextTokenPos < $this->end
00229                 && preg_match( self::WORD_REGEX, $this->rule, $m, 0, $nextTokenPos )
00230             ) {
00231                 $word2 = strtolower( $m[0] );
00232                 $nextTokenPos += strlen( $word2 );
00233             }
00234         }
00235 
00236         // Two-word operators like "is not" take precedence over single-word operators like "is"
00237         if ( $word2 !== '' ) {
00238             $bothWords = "{$word1}-{$word2}";
00239             if ( isset( self::$precedence[$bothWords] ) ) {
00240                 $token = $this->newOperator( $bothWords, $this->pos, $nextTokenPos - $this->pos );
00241                 $this->pos = $nextTokenPos;
00242 
00243                 return $token;
00244             }
00245         }
00246 
00247         // Single-word operators
00248         if ( isset( self::$precedence[$word1] ) ) {
00249             $token = $this->newOperator( $word1, $this->pos, strlen( $word1 ) );
00250             $this->pos += strlen( $word1 );
00251 
00252             return $token;
00253         }
00254 
00255         // The single-character operand symbols
00256         if ( strpos( self::OPERAND_SYMBOLS, $word1 ) !== false ) {
00257             $token = $this->newNumber( $word1, $this->pos );
00258             $this->pos++;
00259 
00260             return $token;
00261         }
00262 
00263         // Samples
00264         if ( $word1 === '@integer' || $word1 === '@decimal' ) {
00265             // Samples are like comments, they have no effect on rule evaluation.
00266             // They run from the first sample indicator to the end of the string.
00267             $this->pos = $this->end;
00268 
00269             return false;
00270         }
00271 
00272         $this->error( 'unrecognised word' );
00273     }
00274 
00282     protected function doOperation( $op ) {
00283         if ( count( $this->operands ) < 2 ) {
00284             $op->error( 'missing operand' );
00285         }
00286         $right = array_pop( $this->operands );
00287         $left = array_pop( $this->operands );
00288         $result = $op->operate( $left, $right );
00289         $this->operands[] = $result;
00290     }
00291 
00299     protected function newNumber( $text, $pos ) {
00300         return new CLDRPluralRuleConverterExpression( $this, 'number', $text, $pos, strlen( $text ) );
00301     }
00302 
00311     protected function newOperator( $type, $pos, $length ) {
00312         return new CLDRPluralRuleConverterOperator( $this, $type, $pos, $length );
00313     }
00314 
00319     protected function error( $message ) {
00320         throw new CLDRPluralRuleError( $message );
00321     }
00322 }