MediaWiki
REL1_22
|
00001 <?php 00034 class CLDRPluralRuleEvaluator { 00043 public static function evaluate( $number, array $rules ) { 00044 $rules = self::compile( $rules ); 00045 return self::evaluateCompiled( $number, $rules ); 00046 } 00047 00055 public static function compile( array $rules ) { 00056 // We can't use array_map() for this because it generates a warning if 00057 // there is an exception. 00058 foreach ( $rules as &$rule ) { 00059 $rule = CLDRPluralRuleConverter::convert( $rule ); 00060 } 00061 return $rules; 00062 } 00063 00073 public static function evaluateCompiled( $number, array $rules ) { 00074 // Calculate the values of the operand symbols 00075 $number = strval( $number ); 00076 if ( !preg_match( '/^ -? ( ([0-9]+) (?: \. ([0-9]+) )? )$/x', $number, $m ) ) { 00077 wfDebug( __METHOD__.': invalid number input, returning "other"' ); 00078 return count( $rules ); 00079 } 00080 if ( !isset( $m[3] ) ) { 00081 $operandSymbols = array( 00082 'n' => intval( $m[1] ), 00083 'i' => intval( $m[1] ), 00084 'v' => 0, 00085 'w' => 0, 00086 'f' => 0, 00087 't' => 0 00088 ); 00089 } else { 00090 $absValStr = $m[1]; 00091 $intStr = $m[2]; 00092 $fracStr = $m[3]; 00093 $operandSymbols = array( 00094 'n' => floatval( $absValStr ), 00095 'i' => intval( $intStr ), 00096 'v' => strlen( $fracStr ), 00097 'w' => strlen( rtrim( $fracStr, '0' ) ), 00098 'f' => intval( $fracStr ), 00099 't' => intval( rtrim( $fracStr, '0' ) ), 00100 ); 00101 } 00102 00103 // The compiled form is RPN, with tokens strictly delimited by 00104 // spaces, so this is a simple RPN evaluator. 00105 foreach ( $rules as $i => $rule ) { 00106 $stack = array(); 00107 $zero = ord( '0' ); 00108 $nine = ord( '9' ); 00109 foreach ( StringUtils::explode( ' ', $rule ) as $token ) { 00110 $ord = ord( $token ); 00111 if ( isset( $operandSymbols[$token] ) ) { 00112 $stack[] = $operandSymbols[$token]; 00113 } elseif ( $ord >= $zero && $ord <= $nine ) { 00114 $stack[] = intval( $token ); 00115 } else { 00116 $right = array_pop( $stack ); 00117 $left = array_pop( $stack ); 00118 $result = self::doOperation( $token, $left, $right ); 00119 $stack[] = $result; 00120 } 00121 } 00122 if ( $stack[0] ) { 00123 return $i; 00124 } 00125 } 00126 // None of the provided rules match. The number belongs to category 00127 // 'other', which comes last. 00128 return count( $rules ); 00129 } 00130 00140 private static function doOperation( $token, $left, $right ) { 00141 if ( in_array( $token, array( 'in', 'not-in', 'within', 'not-within' ) ) ) { 00142 if ( !( $right instanceof CLDRPluralRuleEvaluator_Range ) ) { 00143 $right = new CLDRPluralRuleEvaluator_Range( $right ); 00144 } 00145 } 00146 switch ( $token ) { 00147 case 'or': 00148 return $left || $right; 00149 case 'and': 00150 return $left && $right; 00151 case 'is': 00152 return $left == $right; 00153 case 'is-not': 00154 return $left != $right; 00155 case 'in': 00156 return $right->isNumberIn( $left ); 00157 case 'not-in': 00158 return !$right->isNumberIn( $left ); 00159 case 'within': 00160 return $right->isNumberWithin( $left ); 00161 case 'not-within': 00162 return !$right->isNumberWithin( $left ); 00163 case 'mod': 00164 if ( is_int( $left ) ) { 00165 return (int)fmod( $left, $right ); 00166 } 00167 return fmod( $left, $right ); 00168 case ',': 00169 if ( $left instanceof CLDRPluralRuleEvaluator_Range ) { 00170 $range = $left; 00171 } else { 00172 $range = new CLDRPluralRuleEvaluator_Range( $left ); 00173 } 00174 $range->add( $right ); 00175 return $range; 00176 case '..': 00177 return new CLDRPluralRuleEvaluator_Range( $left, $right ); 00178 default: 00179 throw new CLDRPluralRuleError( "Invalid RPN token" ); 00180 } 00181 } 00182 } 00183 00187 class CLDRPluralRuleEvaluator_Range { 00188 public $parts = array(); 00189 00190 function __construct( $start, $end = false ) { 00191 if ( $end === false ) { 00192 $this->parts[] = $start; 00193 } else { 00194 $this->parts[] = array( $start, $end ); 00195 } 00196 } 00197 00203 function isNumberIn( $number, $integerConstraint = true ) { 00204 foreach ( $this->parts as $part ) { 00205 if ( is_array( $part ) ) { 00206 if ( ( !$integerConstraint || floor( $number ) === (float)$number ) 00207 && $number >= $part[0] && $number <= $part[1] ) 00208 { 00209 return true; 00210 } 00211 } else { 00212 if ( $number == $part ) { 00213 return true; 00214 } 00215 } 00216 } 00217 return false; 00218 } 00219 00224 function isNumberWithin( $number ) { 00225 return $this->isNumberIn( $number, false ); 00226 } 00227 00232 function add( $other ) { 00233 if ( $other instanceof self ) { 00234 $this->parts = array_merge( $this->parts, $other->parts ); 00235 } else { 00236 $this->parts[] = $other; 00237 } 00238 } 00239 00243 function __toString() { 00244 $s = 'Range('; 00245 foreach ( $this->parts as $i => $part ) { 00246 if ( $i ) { 00247 $s .= ', '; 00248 } 00249 if ( is_array( $part ) ) { 00250 $s .= $part[0] . '..' . $part[1]; 00251 } else { 00252 $s .= $part; 00253 } 00254 } 00255 $s .= ')'; 00256 return $s; 00257 } 00258 00259 } 00260 00264 class CLDRPluralRuleConverter { 00270 public $rule; 00271 00277 public $pos; 00278 00284 public $end; 00285 00291 public $operators = array(); 00292 00298 public $operands = array(); 00299 00305 static $precedence = array( 00306 'or' => 2, 00307 'and' => 3, 00308 'is' => 4, 00309 'is-not' => 4, 00310 'in' => 4, 00311 'not-in' => 4, 00312 'within' => 4, 00313 'not-within' => 4, 00314 'mod' => 5, 00315 ',' => 6, 00316 '..' => 7, 00317 ); 00318 00322 const WHITESPACE_CLASS = " \t\r\n"; 00323 00328 const NUMBER_CLASS = '0123456789'; 00329 00333 const OPERAND_SYMBOLS = 'nivwft'; 00334 00338 const WORD_REGEX = '/[a-zA-Z@]+/A'; 00339 00343 public static function convert( $rule ) { 00344 $parser = new self( $rule ); 00345 return $parser->doConvert(); 00346 } 00347 00351 protected function __construct( $rule ) { 00352 $this->rule = $rule; 00353 $this->pos = 0; 00354 $this->end = strlen( $rule ); 00355 } 00356 00360 protected function doConvert() { 00361 $expectOperator = true; 00362 00363 // Iterate through all tokens, saving the operators and operands to a 00364 // stack per Dijkstra's shunting yard algorithm. 00365 while ( false !== ( $token = $this->nextToken() ) ) { 00366 // In this grammar, there are only binary operators, so every valid 00367 // rule string will alternate between operator and operand tokens. 00368 $expectOperator = !$expectOperator; 00369 00370 if ( $token instanceof CLDRPluralRuleConverter_Expression ) { 00371 // Operand 00372 if ( $expectOperator ) { 00373 $token->error( 'unexpected operand' ); 00374 } 00375 $this->operands[] = $token; 00376 continue; 00377 } else { 00378 // Operator 00379 if ( !$expectOperator ) { 00380 $token->error( 'unexpected operator' ); 00381 } 00382 // Resolve higher precedence levels 00383 $lastOp = end( $this->operators ); 00384 while ( $lastOp && self::$precedence[$token->name] <= self::$precedence[$lastOp->name] ) { 00385 $this->doOperation( $lastOp, $this->operands ); 00386 array_pop( $this->operators ); 00387 $lastOp = end( $this->operators ); 00388 } 00389 $this->operators[] = $token; 00390 } 00391 } 00392 00393 // Finish off the stack 00394 while ( $op = array_pop( $this->operators ) ) { 00395 $this->doOperation( $op, $this->operands ); 00396 } 00397 00398 // Make sure the result is sane. The first case is possible for an empty 00399 // string input, the second should be unreachable. 00400 if ( !count( $this->operands ) ) { 00401 $this->error( 'condition expected' ); 00402 } elseif ( count( $this->operands ) > 1 ) { 00403 $this->error( 'missing operator or too many operands' ); 00404 } 00405 00406 $value = $this->operands[0]; 00407 if ( $value->type !== 'boolean' ) { 00408 $this->error( 'the result must have a boolean type' ); 00409 } 00410 00411 return $this->operands[0]->rpn; 00412 } 00413 00418 protected function nextToken() { 00419 if ( $this->pos >= $this->end ) { 00420 return false; 00421 } 00422 00423 // Whitespace 00424 $length = strspn( $this->rule, self::WHITESPACE_CLASS, $this->pos ); 00425 $this->pos += $length; 00426 00427 if ( $this->pos >= $this->end ) { 00428 return false; 00429 } 00430 00431 // Number 00432 $length = strspn( $this->rule, self::NUMBER_CLASS, $this->pos ); 00433 if ( $length !== 0 ) { 00434 $token = $this->newNumber( substr( $this->rule, $this->pos, $length ), $this->pos ); 00435 $this->pos += $length; 00436 return $token; 00437 } 00438 00439 // Two-character operators 00440 $op2 = substr( $this->rule, $this->pos, 2 ); 00441 if ( $op2 === '..' || $op2 === '!=' ) { 00442 $token = $this->newOperator( $op2, $this->pos, 2 ); 00443 $this->pos += 2; 00444 return $token; 00445 } 00446 00447 // Single-character operators 00448 $op1 = $this->rule[$this->pos]; 00449 if ( $op1 === ',' || $op1 === '=' || $op1 === '%' ) { 00450 $token = $this->newOperator( $op1, $this->pos, 1 ); 00451 $this->pos ++; 00452 return $token; 00453 } 00454 00455 // Word 00456 if ( !preg_match( self::WORD_REGEX, $this->rule, $m, 0, $this->pos ) ) { 00457 $this->error( 'unexpected character "' . $this->rule[$this->pos] . '"' ); 00458 } 00459 $word1 = strtolower( $m[0] ); 00460 $word2 = ''; 00461 $nextTokenPos = $this->pos + strlen( $word1 ); 00462 if ( $word1 === 'not' || $word1 === 'is' ) { 00463 // Look ahead one word 00464 $nextTokenPos += strspn( $this->rule, self::WHITESPACE_CLASS, $nextTokenPos ); 00465 if ( $nextTokenPos < $this->end 00466 && preg_match( self::WORD_REGEX, $this->rule, $m, 0, $nextTokenPos ) ) 00467 { 00468 $word2 = strtolower( $m[0] ); 00469 $nextTokenPos += strlen( $word2 ); 00470 } 00471 } 00472 00473 // Two-word operators like "is not" take precedence over single-word operators like "is" 00474 if ( $word2 !== '' ) { 00475 $bothWords = "{$word1}-{$word2}"; 00476 if ( isset( self::$precedence[$bothWords] ) ) { 00477 $token = $this->newOperator( $bothWords, $this->pos, $nextTokenPos - $this->pos ); 00478 $this->pos = $nextTokenPos; 00479 return $token; 00480 } 00481 } 00482 00483 // Single-word operators 00484 if ( isset( self::$precedence[$word1] ) ) { 00485 $token = $this->newOperator( $word1, $this->pos, strlen( $word1 ) ); 00486 $this->pos += strlen( $word1 ); 00487 return $token; 00488 } 00489 00490 // The single-character operand symbols 00491 if ( strpos( self::OPERAND_SYMBOLS, $word1 ) !== false ) { 00492 $token = $this->newNumber( $word1, $this->pos ); 00493 $this->pos ++; 00494 return $token; 00495 } 00496 00497 // Samples 00498 if ( $word1 === '@integer' || $word1 === '@decimal' ) { 00499 // Samples are like comments, they have no effect on rule evaluation. 00500 // They run from the first sample indicator to the end of the string. 00501 $this->pos = $this->end; 00502 return false; 00503 } 00504 00505 $this->error( 'unrecognised word' ); 00506 } 00507 00513 protected function doOperation( $op ) { 00514 if ( count( $this->operands ) < 2 ) { 00515 $op->error( 'missing operand' ); 00516 } 00517 $right = array_pop( $this->operands ); 00518 $left = array_pop( $this->operands ); 00519 $result = $op->operate( $left, $right ); 00520 $this->operands[] = $result; 00521 } 00522 00526 protected function newNumber( $text, $pos ) { 00527 return new CLDRPluralRuleConverter_Expression( $this, 'number', $text, $pos, strlen( $text ) ); 00528 } 00529 00533 protected function newOperator( $type, $pos, $length ) { 00534 return new CLDRPluralRuleConverter_Operator( $this, $type, $pos, $length ); 00535 } 00536 00540 protected function error( $message ) { 00541 throw new CLDRPluralRuleError( $message ); 00542 } 00543 } 00544 00549 class CLDRPluralRuleConverter_Fragment { 00550 public $parser, $pos, $length, $end; 00551 00552 function __construct( $parser, $pos, $length ) { 00553 $this->parser = $parser; 00554 $this->pos = $pos; 00555 $this->length = $length; 00556 $this->end = $pos + $length; 00557 } 00558 00559 public function error( $message ) { 00560 $text = $this->getText(); 00561 throw new CLDRPluralRuleError( "$message at position " . ( $this->pos + 1 ) . ": \"$text\"" ); 00562 } 00563 00564 public function getText() { 00565 return substr( $this->parser->rule, $this->pos, $this->length ); 00566 } 00567 } 00568 00575 class CLDRPluralRuleConverter_Expression extends CLDRPluralRuleConverter_Fragment { 00576 public $type, $rpn; 00577 00578 function __construct( $parser, $type, $rpn, $pos, $length ) { 00579 parent::__construct( $parser, $pos, $length ); 00580 $this->type = $type; 00581 $this->rpn = $rpn; 00582 } 00583 00584 public function isType( $type ) { 00585 if ( $type === 'range' && ( $this->type === 'range' || $this->type === 'number' ) ) { 00586 return true; 00587 } 00588 if ( $type === $this->type ) { 00589 return true; 00590 } 00591 return false; 00592 } 00593 } 00594 00600 class CLDRPluralRuleConverter_Operator extends CLDRPluralRuleConverter_Fragment { 00601 public $name; 00602 00612 static $opTypes = array( 00613 'or' => 'bbb', 00614 'and' => 'bbb', 00615 'is' => 'nnb', 00616 'is-not' => 'nnb', 00617 'in' => 'nrb', 00618 'not-in' => 'nrb', 00619 'within' => 'nrb', 00620 'not-within' => 'nrb', 00621 'mod' => 'nnn', 00622 ',' => 'rrr', 00623 '..' => 'nnr', 00624 ); 00625 00629 static $typeSpecMap = array( 00630 'b' => 'boolean', 00631 'n' => 'number', 00632 'r' => 'range', 00633 ); 00634 00638 static $aliasMap = array( 00639 '%' => 'mod', 00640 '!=' => 'not-in', 00641 '=' => 'in' 00642 ); 00643 00652 function __construct( $parser, $name, $pos, $length ) { 00653 parent::__construct( $parser, $pos, $length ); 00654 if ( isset( self::$aliasMap[$name] ) ) { 00655 $name = self::$aliasMap[$name]; 00656 } 00657 $this->name = $name; 00658 } 00659 00660 public function operate( $left, $right ) { 00661 $typeSpec = self::$opTypes[$this->name]; 00662 00663 $leftType = self::$typeSpecMap[$typeSpec[0]]; 00664 $rightType = self::$typeSpecMap[$typeSpec[1]]; 00665 $resultType = self::$typeSpecMap[$typeSpec[2]]; 00666 00667 $start = min( $this->pos, $left->pos, $right->pos ); 00668 $end = max( $this->end, $left->end, $right->end ); 00669 $length = $end - $start; 00670 00671 $newExpr = new CLDRPluralRuleConverter_Expression( $this->parser, $resultType, 00672 "{$left->rpn} {$right->rpn} {$this->name}", 00673 $start, $length ); 00674 00675 if ( !$left->isType( $leftType ) ) { 00676 $newExpr->error( "invalid type for left operand: expected $leftType, got {$left->type}" ); 00677 } 00678 00679 if ( !$right->isType( $rightType ) ) { 00680 $newExpr->error( "invalid type for right operand: expected $rightType, got {$right->type}" ); 00681 } 00682 return $newExpr; 00683 } 00684 } 00685 00690 class CLDRPluralRuleError extends MWException { 00691 function __construct( $message ) { 00692 parent::__construct( 'CLDR plural rule error: ' . $message ); 00693 } 00694 }