MediaWiki
REL1_23
|
00001 <?php 00002 00016 class CLDRPluralRuleConverter { 00022 public $rule; 00023 00029 public $pos; 00030 00036 public $end; 00037 00043 public $operators = array(); 00044 00050 public $operands = array(); 00051 00057 static $precedence = array( 00058 'or' => 2, 00059 'and' => 3, 00060 'is' => 4, 00061 'is-not' => 4, 00062 'in' => 4, 00063 'not-in' => 4, 00064 'within' => 4, 00065 'not-within' => 4, 00066 'mod' => 5, 00067 ',' => 6, 00068 '..' => 7, 00069 ); 00070 00074 const WHITESPACE_CLASS = " \t\r\n"; 00075 00080 const NUMBER_CLASS = '0123456789'; 00081 00085 const OPERAND_SYMBOLS = 'nivwft'; 00086 00090 const WORD_REGEX = '/[a-zA-Z@]+/A'; 00091 00098 public static function convert( $rule ) { 00099 $parser = new self( $rule ); 00100 return $parser->doConvert(); 00101 } 00102 00106 protected function __construct( $rule ) { 00107 $this->rule = $rule; 00108 $this->pos = 0; 00109 $this->end = strlen( $rule ); 00110 } 00111 00117 protected function doConvert() { 00118 $expectOperator = true; 00119 00120 // Iterate through all tokens, saving the operators and operands to a 00121 // stack per Dijkstra's shunting yard algorithm. 00123 while ( false !== ( $token = $this->nextToken() ) ) { 00124 // In this grammar, there are only binary operators, so every valid 00125 // rule string will alternate between operator and operand tokens. 00126 $expectOperator = !$expectOperator; 00127 00128 if ( $token instanceof CLDRPluralRuleConverter_Expression ) { 00129 // Operand 00130 if ( $expectOperator ) { 00131 $token->error( 'unexpected operand' ); 00132 } 00133 $this->operands[] = $token; 00134 continue; 00135 } else { 00136 // Operator 00137 if ( !$expectOperator ) { 00138 $token->error( 'unexpected operator' ); 00139 } 00140 // Resolve higher precedence levels 00141 $lastOp = end( $this->operators ); 00142 while ( $lastOp && self::$precedence[$token->name] <= self::$precedence[$lastOp->name] ) { 00143 $this->doOperation( $lastOp, $this->operands ); 00144 array_pop( $this->operators ); 00145 $lastOp = end( $this->operators ); 00146 } 00147 $this->operators[] = $token; 00148 } 00149 } 00150 00151 // Finish off the stack 00152 while ( $op = array_pop( $this->operators ) ) { 00153 $this->doOperation( $op, $this->operands ); 00154 } 00155 00156 // Make sure the result is sane. The first case is possible for an empty 00157 // string input, the second should be unreachable. 00158 if ( !count( $this->operands ) ) { 00159 $this->error( 'condition expected' ); 00160 } elseif ( count( $this->operands ) > 1 ) { 00161 $this->error( 'missing operator or too many operands' ); 00162 } 00163 00164 $value = $this->operands[0]; 00165 if ( $value->type !== 'boolean' ) { 00166 $this->error( 'the result must have a boolean type' ); 00167 } 00168 00169 return $this->operands[0]->rpn; 00170 } 00171 00177 protected function nextToken() { 00178 if ( $this->pos >= $this->end ) { 00179 return false; 00180 } 00181 00182 // Whitespace 00183 $length = strspn( $this->rule, self::WHITESPACE_CLASS, $this->pos ); 00184 $this->pos += $length; 00185 00186 if ( $this->pos >= $this->end ) { 00187 return false; 00188 } 00189 00190 // Number 00191 $length = strspn( $this->rule, self::NUMBER_CLASS, $this->pos ); 00192 if ( $length !== 0 ) { 00193 $token = $this->newNumber( substr( $this->rule, $this->pos, $length ), $this->pos ); 00194 $this->pos += $length; 00195 return $token; 00196 } 00197 00198 // Two-character operators 00199 $op2 = substr( $this->rule, $this->pos, 2 ); 00200 if ( $op2 === '..' || $op2 === '!=' ) { 00201 $token = $this->newOperator( $op2, $this->pos, 2 ); 00202 $this->pos += 2; 00203 return $token; 00204 } 00205 00206 // Single-character operators 00207 $op1 = $this->rule[$this->pos]; 00208 if ( $op1 === ',' || $op1 === '=' || $op1 === '%' ) { 00209 $token = $this->newOperator( $op1, $this->pos, 1 ); 00210 $this->pos ++; 00211 return $token; 00212 } 00213 00214 // Word 00215 if ( !preg_match( self::WORD_REGEX, $this->rule, $m, 0, $this->pos ) ) { 00216 $this->error( 'unexpected character "' . $this->rule[$this->pos] . '"' ); 00217 } 00218 $word1 = strtolower( $m[0] ); 00219 $word2 = ''; 00220 $nextTokenPos = $this->pos + strlen( $word1 ); 00221 if ( $word1 === 'not' || $word1 === 'is' ) { 00222 // Look ahead one word 00223 $nextTokenPos += strspn( $this->rule, self::WHITESPACE_CLASS, $nextTokenPos ); 00224 if ( $nextTokenPos < $this->end 00225 && preg_match( self::WORD_REGEX, $this->rule, $m, 0, $nextTokenPos ) 00226 ) { 00227 $word2 = strtolower( $m[0] ); 00228 $nextTokenPos += strlen( $word2 ); 00229 } 00230 } 00231 00232 // Two-word operators like "is not" take precedence over single-word operators like "is" 00233 if ( $word2 !== '' ) { 00234 $bothWords = "{$word1}-{$word2}"; 00235 if ( isset( self::$precedence[$bothWords] ) ) { 00236 $token = $this->newOperator( $bothWords, $this->pos, $nextTokenPos - $this->pos ); 00237 $this->pos = $nextTokenPos; 00238 return $token; 00239 } 00240 } 00241 00242 // Single-word operators 00243 if ( isset( self::$precedence[$word1] ) ) { 00244 $token = $this->newOperator( $word1, $this->pos, strlen( $word1 ) ); 00245 $this->pos += strlen( $word1 ); 00246 return $token; 00247 } 00248 00249 // The single-character operand symbols 00250 if ( strpos( self::OPERAND_SYMBOLS, $word1 ) !== false ) { 00251 $token = $this->newNumber( $word1, $this->pos ); 00252 $this->pos ++; 00253 return $token; 00254 } 00255 00256 // Samples 00257 if ( $word1 === '@integer' || $word1 === '@decimal' ) { 00258 // Samples are like comments, they have no effect on rule evaluation. 00259 // They run from the first sample indicator to the end of the string. 00260 $this->pos = $this->end; 00261 return false; 00262 } 00263 00264 $this->error( 'unrecognised word' ); 00265 } 00266 00274 protected function doOperation( $op ) { 00275 if ( count( $this->operands ) < 2 ) { 00276 $op->error( 'missing operand' ); 00277 } 00278 $right = array_pop( $this->operands ); 00279 $left = array_pop( $this->operands ); 00280 $result = $op->operate( $left, $right ); 00281 $this->operands[] = $result; 00282 } 00283 00291 protected function newNumber( $text, $pos ) { 00292 return new CLDRPluralRuleConverter_Expression( $this, 'number', $text, $pos, strlen( $text ) ); 00293 } 00294 00303 protected function newOperator( $type, $pos, $length ) { 00304 return new CLDRPluralRuleConverter_Operator( $this, $type, $pos, $length ); 00305 } 00306 00310 protected function error( $message ) { 00311 throw new CLDRPluralRuleError( $message ); 00312 } 00313 }