MediaWiki
REL1_21
|
00001 <?php 00033 class CLDRPluralRuleEvaluator { 00042 public static function evaluate( $number, array $rules ) { 00043 $rules = self::compile( $rules ); 00044 return self::evaluateCompiled( $number, $rules ); 00045 } 00046 00054 public static function compile( array $rules ) { 00055 // We can't use array_map() for this because it generates a warning if 00056 // there is an exception. 00057 foreach ( $rules as &$rule ) { 00058 $rule = CLDRPluralRuleConverter::convert( $rule ); 00059 } 00060 return $rules; 00061 } 00062 00067 public static function evaluateCompiled( $number, array $rules ) { 00068 // The compiled form is RPN, with tokens strictly delimited by 00069 // spaces, so this is a simple RPN evaluator. 00070 foreach ( $rules as $i => $rule ) { 00071 $stack = array(); 00072 $zero = ord( '0' ); 00073 $nine = ord( '9' ); 00074 foreach ( StringUtils::explode( ' ', $rule ) as $token ) { 00075 $ord = ord( $token ); 00076 if ( $token === 'n' ) { 00077 $stack[] = $number; 00078 } elseif ( $ord >= $zero && $ord <= $nine ) { 00079 $stack[] = intval( $token ); 00080 } else { 00081 $right = array_pop( $stack ); 00082 $left = array_pop( $stack ); 00083 $result = self::doOperation( $token, $left, $right ); 00084 $stack[] = $result; 00085 } 00086 } 00087 if ( $stack[0] ) { 00088 return $i; 00089 } 00090 } 00091 // None of the provided rules match. The number belongs to caregory 00092 // 'other' which comes last. 00093 return count( $rules ); 00094 } 00095 00105 private static function doOperation( $token, $left, $right ) { 00106 if ( in_array( $token, array( 'in', 'not-in', 'within', 'not-within' ) ) ) { 00107 if ( !($right instanceof CLDRPluralRuleEvaluator_Range ) ) { 00108 $right = new CLDRPluralRuleEvaluator_Range( $right ); 00109 } 00110 } 00111 switch ( $token ) { 00112 case 'or': 00113 return $left || $right; 00114 case 'and': 00115 return $left && $right; 00116 case 'is': 00117 return $left == $right; 00118 case 'is-not': 00119 return $left != $right; 00120 case 'in': 00121 return $right->isNumberIn( $left ); 00122 case 'not-in': 00123 return !$right->isNumberIn( $left ); 00124 case 'within': 00125 return $right->isNumberWithin( $left ); 00126 case 'not-within': 00127 return !$right->isNumberWithin( $left ); 00128 case 'mod': 00129 if ( is_int( $left ) ) { 00130 return (int) fmod( $left, $right ); 00131 } 00132 return fmod( $left, $right ); 00133 case ',': 00134 if ( $left instanceof CLDRPluralRuleEvaluator_Range ) { 00135 $range = $left; 00136 } else { 00137 $range = new CLDRPluralRuleEvaluator_Range( $left ); 00138 } 00139 $range->add( $right ); 00140 return $range; 00141 case '..': 00142 return new CLDRPluralRuleEvaluator_Range( $left, $right ); 00143 default: 00144 throw new CLDRPluralRuleError( "Invalid RPN token" ); 00145 } 00146 } 00147 } 00148 00152 class CLDRPluralRuleEvaluator_Range { 00153 public $parts = array(); 00154 00155 function __construct( $start, $end = false ) { 00156 if ( $end === false ) { 00157 $this->parts[] = $start; 00158 } else { 00159 $this->parts[] = array( $start, $end ); 00160 } 00161 } 00162 00168 function isNumberIn( $number, $integerConstraint = true ) { 00169 foreach ( $this->parts as $part ) { 00170 if ( is_array( $part ) ) { 00171 if ( ( !$integerConstraint || floor( $number ) === (float)$number ) 00172 && $number >= $part[0] && $number <= $part[1] ) 00173 { 00174 return true; 00175 } 00176 } else { 00177 if ( $number == $part ) { 00178 return true; 00179 } 00180 } 00181 } 00182 return false; 00183 } 00184 00189 function isNumberWithin( $number ) { 00190 return $this->isNumberIn( $number, false ); 00191 } 00192 00197 function add( $other ) { 00198 if ( $other instanceof self ) { 00199 $this->parts = array_merge( $this->parts, $other->parts ); 00200 } else { 00201 $this->parts[] = $other; 00202 } 00203 } 00204 00208 function __toString() { 00209 $s = 'Range('; 00210 foreach ( $this->parts as $i => $part ) { 00211 if ( $i ) { 00212 $s .= ', '; 00213 } 00214 if ( is_array( $part ) ) { 00215 $s .= $part[0] . '..' . $part[1]; 00216 } else { 00217 $s .= $part; 00218 } 00219 } 00220 $s .= ')'; 00221 return $s; 00222 } 00223 00224 } 00225 00229 class CLDRPluralRuleConverter { 00230 public $rule, $pos, $end; 00231 public $operators = array(); 00232 public $operands = array(); 00233 00239 static $precedence = array( 00240 'or' => 2, 00241 'and' => 3, 00242 'is' => 4, 00243 'is-not' => 4, 00244 'in' => 4, 00245 'not-in' => 4, 00246 'within' => 4, 00247 'not-within' => 4, 00248 'mod' => 5, 00249 ',' => 6, 00250 '..' => 7, 00251 ); 00252 00256 const WHITESPACE_CLASS = " \t\r\n"; 00257 00262 const NUMBER_CLASS = '0123456789'; 00263 00267 const WORD_REGEX = '/[a-zA-Z]+/A'; 00268 00272 public static function convert( $rule ) { 00273 $parser = new self( $rule ); 00274 return $parser->doConvert(); 00275 } 00276 00280 protected function __construct( $rule ) { 00281 $this->rule = $rule; 00282 $this->pos = 0; 00283 $this->end = strlen( $rule ); 00284 } 00285 00289 protected function doConvert() { 00290 $expectOperator = true; 00291 00292 // Iterate through all tokens, saving the operators and operands to a 00293 // stack per Dijkstra's shunting yard algorithm. 00294 while ( false !== ( $token = $this->nextToken() ) ) { 00295 // In this grammar, there are only binary operators, so every valid 00296 // rule string will alternate between operator and operand tokens. 00297 $expectOperator = !$expectOperator; 00298 00299 if ( $token instanceof CLDRPluralRuleConverter_Expression ) { 00300 // Operand 00301 if ( $expectOperator ) { 00302 $token->error( 'unexpected operand' ); 00303 } 00304 $this->operands[] = $token; 00305 continue; 00306 } else { 00307 // Operator 00308 if ( !$expectOperator ) { 00309 $token->error( 'unexpected operator' ); 00310 } 00311 // Resolve higher precedence levels 00312 $lastOp = end( $this->operators ); 00313 while ( $lastOp && self::$precedence[$token->name] <= self::$precedence[$lastOp->name] ) { 00314 $this->doOperation( $lastOp, $this->operands ); 00315 array_pop( $this->operators ); 00316 $lastOp = end( $this->operators ); 00317 } 00318 $this->operators[] = $token; 00319 } 00320 } 00321 00322 // Finish off the stack 00323 while ( $op = array_pop( $this->operators ) ) { 00324 $this->doOperation( $op, $this->operands ); 00325 } 00326 00327 // Make sure the result is sane. The first case is possible for an empty 00328 // string input, the second should be unreachable. 00329 if ( !count( $this->operands ) ) { 00330 $this->error( 'condition expected' ); 00331 } elseif ( count( $this->operands ) > 1 ) { 00332 $this->error( 'missing operator or too many operands' ); 00333 } 00334 00335 $value = $this->operands[0]; 00336 if ( $value->type !== 'boolean' ) { 00337 $this->error( 'the result must have a boolean type' ); 00338 } 00339 00340 return $this->operands[0]->rpn; 00341 } 00342 00347 protected function nextToken() { 00348 if ( $this->pos >= $this->end ) { 00349 return false; 00350 } 00351 00352 // Whitespace 00353 $length = strspn( $this->rule, self::WHITESPACE_CLASS, $this->pos ); 00354 $this->pos += $length; 00355 00356 if ( $this->pos >= $this->end ) { 00357 return false; 00358 } 00359 00360 // Number 00361 $length = strspn( $this->rule, self::NUMBER_CLASS, $this->pos ); 00362 if ( $length !== 0 ) { 00363 $token = $this->newNumber( substr( $this->rule, $this->pos, $length ), $this->pos ); 00364 $this->pos += $length; 00365 return $token; 00366 } 00367 00368 // Comma 00369 if ( $this->rule[$this->pos] === ',' ) { 00370 $token = $this->newOperator( ',', $this->pos, 1 ); 00371 $this->pos ++; 00372 return $token; 00373 } 00374 00375 // Dot dot 00376 if ( substr( $this->rule, $this->pos, 2 ) === '..' ) { 00377 $token = $this->newOperator( '..', $this->pos, 2 ); 00378 $this->pos += 2; 00379 return $token; 00380 } 00381 00382 // Word 00383 if ( !preg_match( self::WORD_REGEX, $this->rule, $m, 0, $this->pos ) ) { 00384 $this->error( 'unexpected character "' . $this->rule[$this->pos] . '"' ); 00385 } 00386 $word1 = strtolower( $m[0] ); 00387 $word2 = ''; 00388 $nextTokenPos = $this->pos + strlen( $word1 ); 00389 if ( $word1 === 'not' || $word1 === 'is' ) { 00390 // Look ahead one word 00391 $nextTokenPos += strspn( $this->rule, self::WHITESPACE_CLASS, $nextTokenPos ); 00392 if ( $nextTokenPos < $this->end 00393 && preg_match( self::WORD_REGEX, $this->rule, $m, 0, $nextTokenPos ) ) 00394 { 00395 $word2 = strtolower( $m[0] ); 00396 $nextTokenPos += strlen( $word2 ); 00397 } 00398 } 00399 00400 // Two-word operators like "is not" take precedence over single-word operators like "is" 00401 if ( $word2 !== '' ) { 00402 $bothWords = "{$word1}-{$word2}"; 00403 if ( isset( self::$precedence[$bothWords] ) ) { 00404 $token = $this->newOperator( $bothWords, $this->pos, $nextTokenPos - $this->pos ); 00405 $this->pos = $nextTokenPos; 00406 return $token; 00407 } 00408 } 00409 00410 // Single-word operators 00411 if ( isset( self::$precedence[$word1] ) ) { 00412 $token = $this->newOperator( $word1, $this->pos, strlen( $word1 ) ); 00413 $this->pos += strlen( $word1 ); 00414 return $token; 00415 } 00416 00417 // The special numerical keyword "n" 00418 if ( $word1 === 'n' ) { 00419 $token = $this->newNumber( 'n', $this->pos ); 00420 $this->pos ++; 00421 return $token; 00422 } 00423 00424 $this->error( 'unrecognised word' ); 00425 } 00426 00432 protected function doOperation( $op ) { 00433 if ( count( $this->operands ) < 2 ) { 00434 $op->error( 'missing operand' ); 00435 } 00436 $right = array_pop( $this->operands ); 00437 $left = array_pop( $this->operands ); 00438 $result = $op->operate( $left, $right ); 00439 $this->operands[] = $result; 00440 } 00441 00445 protected function newNumber( $text, $pos ) { 00446 return new CLDRPluralRuleConverter_Expression( $this, 'number', $text, $pos, strlen( $text ) ); 00447 } 00448 00452 protected function newOperator( $type, $pos, $length ) { 00453 return new CLDRPluralRuleConverter_Operator( $this, $type, $pos, $length ); 00454 } 00455 00459 protected function error( $message ) { 00460 throw new CLDRPluralRuleError( $message ); 00461 } 00462 } 00463 00468 class CLDRPluralRuleConverter_Fragment { 00469 public $parser, $pos, $length, $end; 00470 00471 function __construct( $parser, $pos, $length ) { 00472 $this->parser = $parser; 00473 $this->pos = $pos; 00474 $this->length = $length; 00475 $this->end = $pos + $length; 00476 } 00477 00478 public function error( $message ) { 00479 $text = $this->getText(); 00480 throw new CLDRPluralRuleError( "$message at position " . ( $this->pos + 1 ) . ": \"$text\"" ); 00481 } 00482 00483 public function getText() { 00484 return substr( $this->parser->rule, $this->pos, $this->length ); 00485 } 00486 } 00487 00494 class CLDRPluralRuleConverter_Expression extends CLDRPluralRuleConverter_Fragment { 00495 public $type, $rpn; 00496 00497 function __construct( $parser, $type, $rpn, $pos, $length ) { 00498 parent::__construct( $parser, $pos, $length ); 00499 $this->type = $type; 00500 $this->rpn = $rpn; 00501 } 00502 00503 public function isType( $type ) { 00504 if ( $type === 'range' && ( $this->type === 'range' || $this->type === 'number' ) ) { 00505 return true; 00506 } 00507 if ( $type === $this->type ) { 00508 return true; 00509 } 00510 return false; 00511 } 00512 } 00513 00519 class CLDRPluralRuleConverter_Operator extends CLDRPluralRuleConverter_Fragment { 00520 public $name; 00521 00531 static $opTypes = array( 00532 'or' => 'bbb', 00533 'and' => 'bbb', 00534 'is' => 'nnb', 00535 'is-not' => 'nnb', 00536 'in' => 'nrb', 00537 'not-in' => 'nrb', 00538 'within' => 'nrb', 00539 'not-within' => 'nrb', 00540 'mod' => 'nnn', 00541 ',' => 'rrr', 00542 '..' => 'nnr', 00543 ); 00544 00548 static $typeSpecMap = array( 00549 'b' => 'boolean', 00550 'n' => 'number', 00551 'r' => 'range', 00552 ); 00553 00554 function __construct( $parser, $name, $pos, $length ) { 00555 parent::__construct( $parser, $pos, $length ); 00556 $this->name = $name; 00557 } 00558 00559 public function operate( $left, $right ) { 00560 $typeSpec = self::$opTypes[$this->name]; 00561 00562 $leftType = self::$typeSpecMap[$typeSpec[0]]; 00563 $rightType = self::$typeSpecMap[$typeSpec[1]]; 00564 $resultType = self::$typeSpecMap[$typeSpec[2]]; 00565 00566 $start = min( $this->pos, $left->pos, $right->pos ); 00567 $end = max( $this->end, $left->end, $right->end ); 00568 $length = $end - $start; 00569 00570 $newExpr = new CLDRPluralRuleConverter_Expression( $this->parser, $resultType, 00571 "{$left->rpn} {$right->rpn} {$this->name}", 00572 $start, $length ); 00573 00574 if ( !$left->isType( $leftType ) ) { 00575 $newExpr->error( "invalid type for left operand: expected $leftType, got {$left->type}" ); 00576 } 00577 00578 if ( !$right->isType( $rightType ) ) { 00579 $newExpr->error( "invalid type for right operand: expected $rightType, got {$right->type}" ); 00580 } 00581 return $newExpr; 00582 } 00583 } 00584 00589 class CLDRPluralRuleError extends MWException { 00590 function __construct( $message ) { 00591 parent::__construct( 'CLDR plural rule error: ' . $message ); 00592 } 00593 }