MediaWiki
REL1_20
|
00001 <?php 00015 class CLDRPluralRuleEvaluator { 00024 public static function evaluate( $number, array $rules ) { 00025 $rules = self::compile( $rules ); 00026 return self::evaluateCompiled( $number, $rules ); 00027 } 00028 00036 public static function compile( array $rules ) { 00037 // We can't use array_map() for this because it generates a warning if 00038 // there is an exception. 00039 foreach ( $rules as &$rule ) { 00040 $rule = CLDRPluralRuleConverter::convert( $rule ); 00041 } 00042 return $rules; 00043 } 00044 00049 public static function evaluateCompiled( $number, array $rules ) { 00050 // The compiled form is RPN, with tokens strictly delimited by 00051 // spaces, so this is a simple RPN evaluator. 00052 foreach ( $rules as $i => $rule ) { 00053 $stack = array(); 00054 $zero = ord( '0' ); 00055 $nine = ord( '9' ); 00056 foreach ( StringUtils::explode( ' ', $rule ) as $token ) { 00057 $ord = ord( $token ); 00058 if ( $token === 'n' ) { 00059 $stack[] = $number; 00060 } elseif ( $ord >= $zero && $ord <= $nine ) { 00061 $stack[] = intval( $token ); 00062 } else { 00063 $right = array_pop( $stack ); 00064 $left = array_pop( $stack ); 00065 $result = self::doOperation( $token, $left, $right ); 00066 $stack[] = $result; 00067 } 00068 } 00069 if ( $stack[0] ) { 00070 return $i; 00071 } 00072 } 00073 // None of the provided rules match. The number belongs to caregory 00074 // 'other' which comes last. 00075 return count( $rules ); 00076 } 00077 00086 private static function doOperation( $token, $left, $right ) { 00087 if ( in_array( $token, array( 'in', 'not-in', 'within', 'not-within' ) ) ) { 00088 if ( !($right instanceof CLDRPluralRuleEvaluator_Range ) ) { 00089 $right = new CLDRPluralRuleEvaluator_Range( $right ); 00090 } 00091 } 00092 switch ( $token ) { 00093 case 'or': 00094 return $left || $right; 00095 case 'and': 00096 return $left && $right; 00097 case 'is': 00098 return $left == $right; 00099 case 'is-not': 00100 return $left != $right; 00101 case 'in': 00102 return $right->isNumberIn( $left ); 00103 case 'not-in': 00104 return !$right->isNumberIn( $left ); 00105 case 'within': 00106 return $right->isNumberWithin( $left ); 00107 case 'not-within': 00108 return !$right->isNumberWithin( $left ); 00109 case 'mod': 00110 if ( is_int( $left ) ) { 00111 return (int) fmod( $left, $right ); 00112 } 00113 return fmod( $left, $right ); 00114 case ',': 00115 if ( $left instanceof CLDRPluralRuleEvaluator_Range ) { 00116 $range = $left; 00117 } else { 00118 $range = new CLDRPluralRuleEvaluator_Range( $left ); 00119 } 00120 $range->add( $right ); 00121 return $range; 00122 case '..': 00123 return new CLDRPluralRuleEvaluator_Range( $left, $right ); 00124 default: 00125 throw new CLDRPluralRuleError( "Invalid RPN token" ); 00126 } 00127 } 00128 } 00129 00133 class CLDRPluralRuleEvaluator_Range { 00134 var $parts = array(); 00135 00136 function __construct( $start, $end = false ) { 00137 if ( $end === false ) { 00138 $this->parts[] = $start; 00139 } else { 00140 $this->parts[] = array( $start, $end ); 00141 } 00142 } 00143 00149 function isNumberIn( $number, $integerConstraint = true ) { 00150 foreach ( $this->parts as $part ) { 00151 if ( is_array( $part ) ) { 00152 if ( ( !$integerConstraint || floor( $number ) === (float)$number ) 00153 && $number >= $part[0] && $number <= $part[1] ) 00154 { 00155 return true; 00156 } 00157 } else { 00158 if ( $number == $part ) { 00159 return true; 00160 } 00161 } 00162 } 00163 return false; 00164 } 00165 00170 function isNumberWithin( $number ) { 00171 return $this->isNumberIn( $number, false ); 00172 } 00173 00178 function add( $other ) { 00179 if ( $other instanceof self ) { 00180 $this->parts = array_merge( $this->parts, $other->parts ); 00181 } else { 00182 $this->parts[] = $other; 00183 } 00184 } 00185 00189 function __toString() { 00190 $s = 'Range('; 00191 foreach ( $this->parts as $i => $part ) { 00192 if ( $i ) { 00193 $s .= ', '; 00194 } 00195 if ( is_array( $part ) ) { 00196 $s .= $part[0] . '..' . $part[1]; 00197 } else { 00198 $s .= $part; 00199 } 00200 } 00201 $s .= ')'; 00202 return $s; 00203 } 00204 00205 } 00206 00210 class CLDRPluralRuleConverter { 00211 var $rule, $pos, $end; 00212 var $operators = array(); 00213 var $operands = array(); 00214 00220 static $precedence = array( 00221 'or' => 2, 00222 'and' => 3, 00223 'is' => 4, 00224 'is-not' => 4, 00225 'in' => 4, 00226 'not-in' => 4, 00227 'within' => 4, 00228 'not-within' => 4, 00229 'mod' => 5, 00230 ',' => 6, 00231 '..' => 7, 00232 ); 00233 00237 const WHITESPACE_CLASS = " \t\r\n"; 00238 00243 const NUMBER_CLASS = '0123456789'; 00244 00248 const WORD_REGEX = '/[a-zA-Z]+/A'; 00249 00253 public static function convert( $rule ) { 00254 $parser = new self( $rule ); 00255 return $parser->doConvert(); 00256 } 00257 00261 protected function __construct( $rule ) { 00262 $this->rule = $rule; 00263 $this->pos = 0; 00264 $this->end = strlen( $rule ); 00265 } 00266 00270 protected function doConvert() { 00271 $expectOperator = true; 00272 00273 // Iterate through all tokens, saving the operators and operands to a 00274 // stack per Dijkstra's shunting yard algorithm. 00275 while ( false !== ( $token = $this->nextToken() ) ) { 00276 // In this grammar, there are only binary operators, so every valid 00277 // rule string will alternate between operator and operand tokens. 00278 $expectOperator = !$expectOperator; 00279 00280 if ( $token instanceof CLDRPluralRuleConverter_Expression ) { 00281 // Operand 00282 if ( $expectOperator ) { 00283 $token->error( 'unexpected operand' ); 00284 } 00285 $this->operands[] = $token; 00286 continue; 00287 } else { 00288 // Operator 00289 if ( !$expectOperator ) { 00290 $token->error( 'unexpected operator' ); 00291 } 00292 // Resolve higher precedence levels 00293 $lastOp = end( $this->operators ); 00294 while ( $lastOp && self::$precedence[$token->name] <= self::$precedence[$lastOp->name] ) { 00295 $this->doOperation( $lastOp, $this->operands ); 00296 array_pop( $this->operators ); 00297 $lastOp = end( $this->operators ); 00298 } 00299 $this->operators[] = $token; 00300 } 00301 } 00302 00303 // Finish off the stack 00304 while ( $op = array_pop( $this->operators ) ) { 00305 $this->doOperation( $op, $this->operands ); 00306 } 00307 00308 // Make sure the result is sane. The first case is possible for an empty 00309 // string input, the second should be unreachable. 00310 if ( !count( $this->operands ) ) { 00311 $this->error( 'condition expected' ); 00312 } elseif ( count( $this->operands ) > 1 ) { 00313 $this->error( 'missing operator or too many operands' ); 00314 } 00315 00316 $value = $this->operands[0]; 00317 if ( $value->type !== 'boolean' ) { 00318 $this->error( 'the result must have a boolean type' ); 00319 } 00320 00321 return $this->operands[0]->rpn; 00322 } 00323 00328 protected function nextToken() { 00329 if ( $this->pos >= $this->end ) { 00330 return false; 00331 } 00332 00333 // Whitespace 00334 $length = strspn( $this->rule, self::WHITESPACE_CLASS, $this->pos ); 00335 $this->pos += $length; 00336 00337 if ( $this->pos >= $this->end ) { 00338 return false; 00339 } 00340 00341 // Number 00342 $length = strspn( $this->rule, self::NUMBER_CLASS, $this->pos ); 00343 if ( $length !== 0 ) { 00344 $token = $this->newNumber( substr( $this->rule, $this->pos, $length ), $this->pos ); 00345 $this->pos += $length; 00346 return $token; 00347 } 00348 00349 // Comma 00350 if ( $this->rule[$this->pos] === ',' ) { 00351 $token = $this->newOperator( ',', $this->pos, 1 ); 00352 $this->pos ++; 00353 return $token; 00354 } 00355 00356 // Dot dot 00357 if ( substr( $this->rule, $this->pos, 2 ) === '..' ) { 00358 $token = $this->newOperator( '..', $this->pos, 2 ); 00359 $this->pos += 2; 00360 return $token; 00361 } 00362 00363 // Word 00364 if ( !preg_match( self::WORD_REGEX, $this->rule, $m, 0, $this->pos ) ) { 00365 $this->error( 'unexpected character "' . $this->rule[$this->pos] . '"' ); 00366 } 00367 $word1 = strtolower( $m[0] ); 00368 $word2 = ''; 00369 $nextTokenPos = $this->pos + strlen( $word1 ); 00370 if ( $word1 === 'not' || $word1 === 'is' ) { 00371 // Look ahead one word 00372 $nextTokenPos += strspn( $this->rule, self::WHITESPACE_CLASS, $nextTokenPos ); 00373 if ( $nextTokenPos < $this->end 00374 && preg_match( self::WORD_REGEX, $this->rule, $m, 0, $nextTokenPos ) ) 00375 { 00376 $word2 = strtolower( $m[0] ); 00377 $nextTokenPos += strlen( $word2 ); 00378 } 00379 } 00380 00381 // Two-word operators like "is not" take precedence over single-word operators like "is" 00382 if ( $word2 !== '' ) { 00383 $bothWords = "{$word1}-{$word2}"; 00384 if ( isset( self::$precedence[$bothWords] ) ) { 00385 $token = $this->newOperator( $bothWords, $this->pos, $nextTokenPos - $this->pos ); 00386 $this->pos = $nextTokenPos; 00387 return $token; 00388 } 00389 } 00390 00391 // Single-word operators 00392 if ( isset( self::$precedence[$word1] ) ) { 00393 $token = $this->newOperator( $word1, $this->pos, strlen( $word1 ) ); 00394 $this->pos += strlen( $word1 ); 00395 return $token; 00396 } 00397 00398 // The special numerical keyword "n" 00399 if ( $word1 === 'n' ) { 00400 $token = $this->newNumber( 'n', $this->pos ); 00401 $this->pos ++; 00402 return $token; 00403 } 00404 00405 $this->error( 'unrecognised word' ); 00406 } 00407 00413 protected function doOperation( $op ) { 00414 if ( count( $this->operands ) < 2 ) { 00415 $op->error( 'missing operand' ); 00416 } 00417 $right = array_pop( $this->operands ); 00418 $left = array_pop( $this->operands ); 00419 $result = $op->operate( $left, $right ); 00420 $this->operands[] = $result; 00421 } 00422 00426 protected function newNumber( $text, $pos ) { 00427 return new CLDRPluralRuleConverter_Expression( $this, 'number', $text, $pos, strlen( $text ) ); 00428 } 00429 00433 protected function newOperator( $type, $pos, $length ) { 00434 return new CLDRPluralRuleConverter_Operator( $this, $type, $pos, $length ); 00435 } 00436 00440 protected function error( $message ) { 00441 throw new CLDRPluralRuleError( $message ); 00442 } 00443 } 00444 00449 class CLDRPluralRuleConverter_Fragment { 00450 var $parser, $pos, $length, $end; 00451 00452 function __construct( $parser, $pos, $length ) { 00453 $this->parser = $parser; 00454 $this->pos = $pos; 00455 $this->length = $length; 00456 $this->end = $pos + $length; 00457 } 00458 00459 public function error( $message ) { 00460 $text = $this->getText(); 00461 throw new CLDRPluralRuleError( "$message at position " . ( $this->pos + 1 ) . ": \"$text\"" ); 00462 } 00463 00464 public function getText() { 00465 return substr( $this->parser->rule, $this->pos, $this->length ); 00466 } 00467 } 00468 00475 class CLDRPluralRuleConverter_Expression extends CLDRPluralRuleConverter_Fragment { 00476 var $type, $rpn; 00477 00478 function __construct( $parser, $type, $rpn, $pos, $length ) { 00479 parent::__construct( $parser, $pos, $length ); 00480 $this->type = $type; 00481 $this->rpn = $rpn; 00482 } 00483 00484 public function isType( $type ) { 00485 if ( $type === 'range' && ( $this->type === 'range' || $this->type === 'number' ) ) { 00486 return true; 00487 } 00488 if ( $type === $this->type ) { 00489 return true; 00490 } 00491 return false; 00492 } 00493 } 00494 00500 class CLDRPluralRuleConverter_Operator extends CLDRPluralRuleConverter_Fragment { 00501 var $name; 00502 00512 static $opTypes = array( 00513 'or' => 'bbb', 00514 'and' => 'bbb', 00515 'is' => 'nnb', 00516 'is-not' => 'nnb', 00517 'in' => 'nrb', 00518 'not-in' => 'nrb', 00519 'within' => 'nrb', 00520 'not-within' => 'nrb', 00521 'mod' => 'nnn', 00522 ',' => 'rrr', 00523 '..' => 'nnr', 00524 ); 00525 00529 static $typeSpecMap = array( 00530 'b' => 'boolean', 00531 'n' => 'number', 00532 'r' => 'range', 00533 ); 00534 00535 function __construct( $parser, $name, $pos, $length ) { 00536 parent::__construct( $parser, $pos, $length ); 00537 $this->name = $name; 00538 } 00539 00540 public function operate( $left, $right ) { 00541 $typeSpec = self::$opTypes[$this->name]; 00542 00543 $leftType = self::$typeSpecMap[$typeSpec[0]]; 00544 $rightType = self::$typeSpecMap[$typeSpec[1]]; 00545 $resultType = self::$typeSpecMap[$typeSpec[2]]; 00546 00547 $start = min( $this->pos, $left->pos, $right->pos ); 00548 $end = max( $this->end, $left->end, $right->end ); 00549 $length = $end - $start; 00550 00551 $newExpr = new CLDRPluralRuleConverter_Expression( $this->parser, $resultType, 00552 "{$left->rpn} {$right->rpn} {$this->name}", 00553 $start, $length ); 00554 00555 if ( !$left->isType( $leftType ) ) { 00556 $newExpr->error( "invalid type for left operand: expected $leftType, got {$left->type}" ); 00557 } 00558 00559 if ( !$right->isType( $rightType ) ) { 00560 $newExpr->error( "invalid type for right operand: expected $rightType, got {$right->type}" ); 00561 } 00562 return $newExpr; 00563 } 00564 } 00565 00570 class CLDRPluralRuleError extends MWException { 00571 function __construct( $message ) { 00572 parent::__construct( 'CLDR plural rule error: ' . $message ); 00573 } 00574 }