MediaWiki  REL1_23
CLDRPluralRuleConverter.php
Go to the documentation of this file.
00001 <?php
00002 
00016 class CLDRPluralRuleConverter {
00022     public $rule;
00023 
00029     public $pos;
00030 
00036     public $end;
00037 
00043     public $operators = array();
00044 
00050     public $operands = array();
00051 
00057     static $precedence = array(
00058         'or' => 2,
00059         'and' => 3,
00060         'is' => 4,
00061         'is-not' => 4,
00062         'in' => 4,
00063         'not-in' => 4,
00064         'within' => 4,
00065         'not-within' => 4,
00066         'mod' => 5,
00067         ',' => 6,
00068         '..' => 7,
00069     );
00070 
00074     const WHITESPACE_CLASS = " \t\r\n";
00075 
00080     const NUMBER_CLASS = '0123456789';
00081 
00085     const OPERAND_SYMBOLS = 'nivwft';
00086 
00090     const WORD_REGEX = '/[a-zA-Z@]+/A';
00091 
00098     public static function convert( $rule ) {
00099         $parser = new self( $rule );
00100         return $parser->doConvert();
00101     }
00102 
00106     protected function __construct( $rule ) {
00107         $this->rule = $rule;
00108         $this->pos = 0;
00109         $this->end = strlen( $rule );
00110     }
00111 
00117     protected function doConvert() {
00118         $expectOperator = true;
00119 
00120         // Iterate through all tokens, saving the operators and operands to a
00121         // stack per Dijkstra's shunting yard algorithm.
00123         while ( false !== ( $token = $this->nextToken() ) ) {
00124             // In this grammar, there are only binary operators, so every valid
00125             // rule string will alternate between operator and operand tokens.
00126             $expectOperator = !$expectOperator;
00127 
00128             if ( $token instanceof CLDRPluralRuleConverter_Expression ) {
00129                 // Operand
00130                 if ( $expectOperator ) {
00131                     $token->error( 'unexpected operand' );
00132                 }
00133                 $this->operands[] = $token;
00134                 continue;
00135             } else {
00136                 // Operator
00137                 if ( !$expectOperator ) {
00138                     $token->error( 'unexpected operator' );
00139                 }
00140                 // Resolve higher precedence levels
00141                 $lastOp = end( $this->operators );
00142                 while ( $lastOp && self::$precedence[$token->name] <= self::$precedence[$lastOp->name] ) {
00143                     $this->doOperation( $lastOp, $this->operands );
00144                     array_pop( $this->operators );
00145                     $lastOp = end( $this->operators );
00146                 }
00147                 $this->operators[] = $token;
00148             }
00149         }
00150 
00151         // Finish off the stack
00152         while ( $op = array_pop( $this->operators ) ) {
00153             $this->doOperation( $op, $this->operands );
00154         }
00155 
00156         // Make sure the result is sane. The first case is possible for an empty
00157         // string input, the second should be unreachable.
00158         if ( !count( $this->operands ) ) {
00159             $this->error( 'condition expected' );
00160         } elseif ( count( $this->operands ) > 1 ) {
00161             $this->error( 'missing operator or too many operands' );
00162         }
00163 
00164         $value = $this->operands[0];
00165         if ( $value->type !== 'boolean' ) {
00166             $this->error( 'the result must have a boolean type' );
00167         }
00168 
00169         return $this->operands[0]->rpn;
00170     }
00171 
00177     protected function nextToken() {
00178         if ( $this->pos >= $this->end ) {
00179             return false;
00180         }
00181 
00182         // Whitespace
00183         $length = strspn( $this->rule, self::WHITESPACE_CLASS, $this->pos );
00184         $this->pos += $length;
00185 
00186         if ( $this->pos >= $this->end ) {
00187             return false;
00188         }
00189 
00190         // Number
00191         $length = strspn( $this->rule, self::NUMBER_CLASS, $this->pos );
00192         if ( $length !== 0 ) {
00193             $token = $this->newNumber( substr( $this->rule, $this->pos, $length ), $this->pos );
00194             $this->pos += $length;
00195             return $token;
00196         }
00197 
00198         // Two-character operators
00199         $op2 = substr( $this->rule, $this->pos, 2 );
00200         if ( $op2 === '..' || $op2 === '!=' ) {
00201             $token = $this->newOperator( $op2, $this->pos, 2 );
00202             $this->pos += 2;
00203             return $token;
00204         }
00205 
00206         // Single-character operators
00207         $op1 = $this->rule[$this->pos];
00208         if ( $op1 === ',' || $op1 === '=' || $op1 === '%' ) {
00209             $token = $this->newOperator( $op1, $this->pos, 1 );
00210             $this->pos ++;
00211             return $token;
00212         }
00213 
00214         // Word
00215         if ( !preg_match( self::WORD_REGEX, $this->rule, $m, 0, $this->pos ) ) {
00216             $this->error( 'unexpected character "' . $this->rule[$this->pos] . '"' );
00217         }
00218         $word1 = strtolower( $m[0] );
00219         $word2 = '';
00220         $nextTokenPos = $this->pos + strlen( $word1 );
00221         if ( $word1 === 'not' || $word1 === 'is' ) {
00222             // Look ahead one word
00223             $nextTokenPos += strspn( $this->rule, self::WHITESPACE_CLASS, $nextTokenPos );
00224             if ( $nextTokenPos < $this->end
00225                 && preg_match( self::WORD_REGEX, $this->rule, $m, 0, $nextTokenPos )
00226             ) {
00227                 $word2 = strtolower( $m[0] );
00228                 $nextTokenPos += strlen( $word2 );
00229             }
00230         }
00231 
00232         // Two-word operators like "is not" take precedence over single-word operators like "is"
00233         if ( $word2 !== '' ) {
00234             $bothWords = "{$word1}-{$word2}";
00235             if ( isset( self::$precedence[$bothWords] ) ) {
00236                 $token = $this->newOperator( $bothWords, $this->pos, $nextTokenPos - $this->pos );
00237                 $this->pos = $nextTokenPos;
00238                 return $token;
00239             }
00240         }
00241 
00242         // Single-word operators
00243         if ( isset( self::$precedence[$word1] ) ) {
00244             $token = $this->newOperator( $word1, $this->pos, strlen( $word1 ) );
00245             $this->pos += strlen( $word1 );
00246             return $token;
00247         }
00248 
00249         // The single-character operand symbols
00250         if ( strpos( self::OPERAND_SYMBOLS, $word1 ) !== false ) {
00251             $token = $this->newNumber( $word1, $this->pos );
00252             $this->pos ++;
00253             return $token;
00254         }
00255 
00256         // Samples
00257         if ( $word1 === '@integer' || $word1 === '@decimal' ) {
00258             // Samples are like comments, they have no effect on rule evaluation.
00259             // They run from the first sample indicator to the end of the string.
00260             $this->pos = $this->end;
00261             return false;
00262         }
00263 
00264         $this->error( 'unrecognised word' );
00265     }
00266 
00274     protected function doOperation( $op ) {
00275         if ( count( $this->operands ) < 2 ) {
00276             $op->error( 'missing operand' );
00277         }
00278         $right = array_pop( $this->operands );
00279         $left = array_pop( $this->operands );
00280         $result = $op->operate( $left, $right );
00281         $this->operands[] = $result;
00282     }
00283 
00291     protected function newNumber( $text, $pos ) {
00292         return new CLDRPluralRuleConverter_Expression( $this, 'number', $text, $pos, strlen( $text ) );
00293     }
00294 
00303     protected function newOperator( $type, $pos, $length ) {
00304         return new CLDRPluralRuleConverter_Operator( $this, $type, $pos, $length );
00305     }
00306 
00310     protected function error( $message ) {
00311         throw new CLDRPluralRuleError( $message );
00312     }
00313 }