MediaWiki
REL1_24
|
00001 <?php 00015 class CLDRPluralRuleConverter { 00021 public $rule; 00022 00028 public $pos; 00029 00035 public $end; 00036 00042 public $operators = array(); 00043 00049 public $operands = array(); 00050 00056 private static $precedence = array( 00057 'or' => 2, 00058 'and' => 3, 00059 'is' => 4, 00060 'is-not' => 4, 00061 'in' => 4, 00062 'not-in' => 4, 00063 'within' => 4, 00064 'not-within' => 4, 00065 'mod' => 5, 00066 ',' => 6, 00067 '..' => 7, 00068 ); 00069 00073 const WHITESPACE_CLASS = " \t\r\n"; 00074 00079 const NUMBER_CLASS = '0123456789'; 00080 00084 const OPERAND_SYMBOLS = 'nivwft'; 00085 00089 const WORD_REGEX = '/[a-zA-Z@]+/A'; 00090 00097 public static function convert( $rule ) { 00098 $parser = new self( $rule ); 00099 00100 return $parser->doConvert(); 00101 } 00102 00107 protected function __construct( $rule ) { 00108 $this->rule = $rule; 00109 $this->pos = 0; 00110 $this->end = strlen( $rule ); 00111 } 00112 00118 protected function doConvert() { 00119 $expectOperator = true; 00120 00121 // Iterate through all tokens, saving the operators and operands to a 00122 // stack per Dijkstra's shunting yard algorithm. 00124 while ( false !== ( $token = $this->nextToken() ) ) { 00125 // In this grammar, there are only binary operators, so every valid 00126 // rule string will alternate between operator and operand tokens. 00127 $expectOperator = !$expectOperator; 00128 00129 if ( $token instanceof CLDRPluralRuleConverterExpression ) { 00130 // Operand 00131 if ( $expectOperator ) { 00132 $token->error( 'unexpected operand' ); 00133 } 00134 $this->operands[] = $token; 00135 continue; 00136 } else { 00137 // Operator 00138 if ( !$expectOperator ) { 00139 $token->error( 'unexpected operator' ); 00140 } 00141 // Resolve higher precedence levels 00142 $lastOp = end( $this->operators ); 00143 while ( $lastOp && self::$precedence[$token->name] <= self::$precedence[$lastOp->name] ) { 00144 $this->doOperation( $lastOp, $this->operands ); 00145 array_pop( $this->operators ); 00146 $lastOp = end( $this->operators ); 00147 } 00148 $this->operators[] = $token; 00149 } 00150 } 00151 00152 // Finish off the stack 00153 while ( $op = array_pop( $this->operators ) ) { 00154 $this->doOperation( $op, $this->operands ); 00155 } 00156 00157 // Make sure the result is sane. The first case is possible for an empty 00158 // string input, the second should be unreachable. 00159 if ( !count( $this->operands ) ) { 00160 $this->error( 'condition expected' ); 00161 } elseif ( count( $this->operands ) > 1 ) { 00162 $this->error( 'missing operator or too many operands' ); 00163 } 00164 00165 $value = $this->operands[0]; 00166 if ( $value->type !== 'boolean' ) { 00167 $this->error( 'the result must have a boolean type' ); 00168 } 00169 00170 return $this->operands[0]->rpn; 00171 } 00172 00178 protected function nextToken() { 00179 if ( $this->pos >= $this->end ) { 00180 return false; 00181 } 00182 00183 // Whitespace 00184 $length = strspn( $this->rule, self::WHITESPACE_CLASS, $this->pos ); 00185 $this->pos += $length; 00186 00187 if ( $this->pos >= $this->end ) { 00188 return false; 00189 } 00190 00191 // Number 00192 $length = strspn( $this->rule, self::NUMBER_CLASS, $this->pos ); 00193 if ( $length !== 0 ) { 00194 $token = $this->newNumber( substr( $this->rule, $this->pos, $length ), $this->pos ); 00195 $this->pos += $length; 00196 00197 return $token; 00198 } 00199 00200 // Two-character operators 00201 $op2 = substr( $this->rule, $this->pos, 2 ); 00202 if ( $op2 === '..' || $op2 === '!=' ) { 00203 $token = $this->newOperator( $op2, $this->pos, 2 ); 00204 $this->pos += 2; 00205 00206 return $token; 00207 } 00208 00209 // Single-character operators 00210 $op1 = $this->rule[$this->pos]; 00211 if ( $op1 === ',' || $op1 === '=' || $op1 === '%' ) { 00212 $token = $this->newOperator( $op1, $this->pos, 1 ); 00213 $this->pos++; 00214 00215 return $token; 00216 } 00217 00218 // Word 00219 if ( !preg_match( self::WORD_REGEX, $this->rule, $m, 0, $this->pos ) ) { 00220 $this->error( 'unexpected character "' . $this->rule[$this->pos] . '"' ); 00221 } 00222 $word1 = strtolower( $m[0] ); 00223 $word2 = ''; 00224 $nextTokenPos = $this->pos + strlen( $word1 ); 00225 if ( $word1 === 'not' || $word1 === 'is' ) { 00226 // Look ahead one word 00227 $nextTokenPos += strspn( $this->rule, self::WHITESPACE_CLASS, $nextTokenPos ); 00228 if ( $nextTokenPos < $this->end 00229 && preg_match( self::WORD_REGEX, $this->rule, $m, 0, $nextTokenPos ) 00230 ) { 00231 $word2 = strtolower( $m[0] ); 00232 $nextTokenPos += strlen( $word2 ); 00233 } 00234 } 00235 00236 // Two-word operators like "is not" take precedence over single-word operators like "is" 00237 if ( $word2 !== '' ) { 00238 $bothWords = "{$word1}-{$word2}"; 00239 if ( isset( self::$precedence[$bothWords] ) ) { 00240 $token = $this->newOperator( $bothWords, $this->pos, $nextTokenPos - $this->pos ); 00241 $this->pos = $nextTokenPos; 00242 00243 return $token; 00244 } 00245 } 00246 00247 // Single-word operators 00248 if ( isset( self::$precedence[$word1] ) ) { 00249 $token = $this->newOperator( $word1, $this->pos, strlen( $word1 ) ); 00250 $this->pos += strlen( $word1 ); 00251 00252 return $token; 00253 } 00254 00255 // The single-character operand symbols 00256 if ( strpos( self::OPERAND_SYMBOLS, $word1 ) !== false ) { 00257 $token = $this->newNumber( $word1, $this->pos ); 00258 $this->pos++; 00259 00260 return $token; 00261 } 00262 00263 // Samples 00264 if ( $word1 === '@integer' || $word1 === '@decimal' ) { 00265 // Samples are like comments, they have no effect on rule evaluation. 00266 // They run from the first sample indicator to the end of the string. 00267 $this->pos = $this->end; 00268 00269 return false; 00270 } 00271 00272 $this->error( 'unrecognised word' ); 00273 } 00274 00282 protected function doOperation( $op ) { 00283 if ( count( $this->operands ) < 2 ) { 00284 $op->error( 'missing operand' ); 00285 } 00286 $right = array_pop( $this->operands ); 00287 $left = array_pop( $this->operands ); 00288 $result = $op->operate( $left, $right ); 00289 $this->operands[] = $result; 00290 } 00291 00299 protected function newNumber( $text, $pos ) { 00300 return new CLDRPluralRuleConverterExpression( $this, 'number', $text, $pos, strlen( $text ) ); 00301 } 00302 00311 protected function newOperator( $type, $pos, $length ) { 00312 return new CLDRPluralRuleConverterOperator( $this, $type, $pos, $length ); 00313 } 00314 00319 protected function error( $message ) { 00320 throw new CLDRPluralRuleError( $message ); 00321 } 00322 }