[ Index ] |
PHP Cross Reference of MediaWiki-1.24.0 |
[Summary view] [Print] [Text view]
1 <?php 2 /** 3 * @author Niklas Laxström, Tim Starling 4 * 5 * @copyright Copyright © 2010-2012, Niklas Laxström 6 * @license http://www.gnu.org/copyleft/gpl.html GNU General Public License 2.0 or later 7 * 8 * @file 9 * @since 1.20 10 */ 11 12 /** 13 * Helper class for converting rules to reverse polish notation (RPN). 14 */ 15 class CLDRPluralRuleConverter { 16 /** 17 * The input string 18 * 19 * @var string 20 */ 21 public $rule; 22 23 /** 24 * The current position 25 * 26 * @var int 27 */ 28 public $pos; 29 30 /** 31 * The past-the-end position 32 * 33 * @var int 34 */ 35 public $end; 36 37 /** 38 * The operator stack 39 * 40 * @var array 41 */ 42 public $operators = array(); 43 44 /** 45 * The operand stack 46 * 47 * @var array 48 */ 49 public $operands = array(); 50 51 /** 52 * Precedence levels. Note that there's no need to worry about associativity 53 * for the level 4 operators, since they return boolean and don't accept 54 * boolean inputs. 55 */ 56 private static $precedence = array( 57 'or' => 2, 58 'and' => 3, 59 'is' => 4, 60 'is-not' => 4, 61 'in' => 4, 62 'not-in' => 4, 63 'within' => 4, 64 'not-within' => 4, 65 'mod' => 5, 66 ',' => 6, 67 '..' => 7, 68 ); 69 70 /** 71 * A character list defining whitespace, for use in strspn() etc. 72 */ 73 const WHITESPACE_CLASS = " \t\r\n"; 74 75 /** 76 * Same for digits. Note that the grammar given in UTS #35 doesn't allow 77 * negative numbers or decimal separators. 78 */ 79 const NUMBER_CLASS = '0123456789'; 80 81 /** 82 * A character list of symbolic operands. 83 */ 84 const OPERAND_SYMBOLS = 'nivwft'; 85 86 /** 87 * An anchored regular expression which matches a word at the current offset. 88 */ 89 const WORD_REGEX = '/[a-zA-Z@]+/A'; 90 91 /** 92 * Convert a rule to RPN. This is the only public entry point. 93 * 94 * @param string $rule The rule to convert 95 * @return string The RPN representation of the rule 96 */ 97 public static function convert( $rule ) { 98 $parser = new self( $rule ); 99 100 return $parser->doConvert(); 101 } 102 103 /** 104 * Private constructor. 105 * @param string $rule 106 */ 107 protected function __construct( $rule ) { 108 $this->rule = $rule; 109 $this->pos = 0; 110 $this->end = strlen( $rule ); 111 } 112 113 /** 114 * Do the operation. 115 * 116 * @return string The RPN representation of the rule (e.g. "5 3 mod n is") 117 */ 118 protected function doConvert() { 119 $expectOperator = true; 120 121 // Iterate through all tokens, saving the operators and operands to a 122 // stack per Dijkstra's shunting yard algorithm. 123 /** @var CLDRPluralRuleConverterOperator $token */ 124 while ( false !== ( $token = $this->nextToken() ) ) { 125 // In this grammar, there are only binary operators, so every valid 126 // rule string will alternate between operator and operand tokens. 127 $expectOperator = !$expectOperator; 128 129 if ( $token instanceof CLDRPluralRuleConverterExpression ) { 130 // Operand 131 if ( $expectOperator ) { 132 $token->error( 'unexpected operand' ); 133 } 134 $this->operands[] = $token; 135 continue; 136 } else { 137 // Operator 138 if ( !$expectOperator ) { 139 $token->error( 'unexpected operator' ); 140 } 141 // Resolve higher precedence levels 142 $lastOp = end( $this->operators ); 143 while ( $lastOp && self::$precedence[$token->name] <= self::$precedence[$lastOp->name] ) { 144 $this->doOperation( $lastOp, $this->operands ); 145 array_pop( $this->operators ); 146 $lastOp = end( $this->operators ); 147 } 148 $this->operators[] = $token; 149 } 150 } 151 152 // Finish off the stack 153 while ( $op = array_pop( $this->operators ) ) { 154 $this->doOperation( $op, $this->operands ); 155 } 156 157 // Make sure the result is sane. The first case is possible for an empty 158 // string input, the second should be unreachable. 159 if ( !count( $this->operands ) ) { 160 $this->error( 'condition expected' ); 161 } elseif ( count( $this->operands ) > 1 ) { 162 $this->error( 'missing operator or too many operands' ); 163 } 164 165 $value = $this->operands[0]; 166 if ( $value->type !== 'boolean' ) { 167 $this->error( 'the result must have a boolean type' ); 168 } 169 170 return $this->operands[0]->rpn; 171 } 172 173 /** 174 * Fetch the next token from the input string. 175 * 176 * @return CLDRPluralRuleConverterFragment The next token 177 */ 178 protected function nextToken() { 179 if ( $this->pos >= $this->end ) { 180 return false; 181 } 182 183 // Whitespace 184 $length = strspn( $this->rule, self::WHITESPACE_CLASS, $this->pos ); 185 $this->pos += $length; 186 187 if ( $this->pos >= $this->end ) { 188 return false; 189 } 190 191 // Number 192 $length = strspn( $this->rule, self::NUMBER_CLASS, $this->pos ); 193 if ( $length !== 0 ) { 194 $token = $this->newNumber( substr( $this->rule, $this->pos, $length ), $this->pos ); 195 $this->pos += $length; 196 197 return $token; 198 } 199 200 // Two-character operators 201 $op2 = substr( $this->rule, $this->pos, 2 ); 202 if ( $op2 === '..' || $op2 === '!=' ) { 203 $token = $this->newOperator( $op2, $this->pos, 2 ); 204 $this->pos += 2; 205 206 return $token; 207 } 208 209 // Single-character operators 210 $op1 = $this->rule[$this->pos]; 211 if ( $op1 === ',' || $op1 === '=' || $op1 === '%' ) { 212 $token = $this->newOperator( $op1, $this->pos, 1 ); 213 $this->pos++; 214 215 return $token; 216 } 217 218 // Word 219 if ( !preg_match( self::WORD_REGEX, $this->rule, $m, 0, $this->pos ) ) { 220 $this->error( 'unexpected character "' . $this->rule[$this->pos] . '"' ); 221 } 222 $word1 = strtolower( $m[0] ); 223 $word2 = ''; 224 $nextTokenPos = $this->pos + strlen( $word1 ); 225 if ( $word1 === 'not' || $word1 === 'is' ) { 226 // Look ahead one word 227 $nextTokenPos += strspn( $this->rule, self::WHITESPACE_CLASS, $nextTokenPos ); 228 if ( $nextTokenPos < $this->end 229 && preg_match( self::WORD_REGEX, $this->rule, $m, 0, $nextTokenPos ) 230 ) { 231 $word2 = strtolower( $m[0] ); 232 $nextTokenPos += strlen( $word2 ); 233 } 234 } 235 236 // Two-word operators like "is not" take precedence over single-word operators like "is" 237 if ( $word2 !== '' ) { 238 $bothWords = "{$word1}-{$word2}"; 239 if ( isset( self::$precedence[$bothWords] ) ) { 240 $token = $this->newOperator( $bothWords, $this->pos, $nextTokenPos - $this->pos ); 241 $this->pos = $nextTokenPos; 242 243 return $token; 244 } 245 } 246 247 // Single-word operators 248 if ( isset( self::$precedence[$word1] ) ) { 249 $token = $this->newOperator( $word1, $this->pos, strlen( $word1 ) ); 250 $this->pos += strlen( $word1 ); 251 252 return $token; 253 } 254 255 // The single-character operand symbols 256 if ( strpos( self::OPERAND_SYMBOLS, $word1 ) !== false ) { 257 $token = $this->newNumber( $word1, $this->pos ); 258 $this->pos++; 259 260 return $token; 261 } 262 263 // Samples 264 if ( $word1 === '@integer' || $word1 === '@decimal' ) { 265 // Samples are like comments, they have no effect on rule evaluation. 266 // They run from the first sample indicator to the end of the string. 267 $this->pos = $this->end; 268 269 return false; 270 } 271 272 $this->error( 'unrecognised word' ); 273 } 274 275 /** 276 * For the binary operator $op, pop its operands off the stack and push 277 * a fragment with rpn and type members describing the result of that 278 * operation. 279 * 280 * @param CLDRPluralRuleConverterOperator $op 281 */ 282 protected function doOperation( $op ) { 283 if ( count( $this->operands ) < 2 ) { 284 $op->error( 'missing operand' ); 285 } 286 $right = array_pop( $this->operands ); 287 $left = array_pop( $this->operands ); 288 $result = $op->operate( $left, $right ); 289 $this->operands[] = $result; 290 } 291 292 /** 293 * Create a numerical expression object 294 * 295 * @param string $text 296 * @param int $pos 297 * @return CLDRPluralRuleConverterExpression The numerical expression 298 */ 299 protected function newNumber( $text, $pos ) { 300 return new CLDRPluralRuleConverterExpression( $this, 'number', $text, $pos, strlen( $text ) ); 301 } 302 303 /** 304 * Create a binary operator 305 * 306 * @param string $type 307 * @param int $pos 308 * @param int $length 309 * @return CLDRPluralRuleConverterOperator The operator 310 */ 311 protected function newOperator( $type, $pos, $length ) { 312 return new CLDRPluralRuleConverterOperator( $this, $type, $pos, $length ); 313 } 314 315 /** 316 * Throw an error 317 * @param string $message 318 */ 319 protected function error( $message ) { 320 throw new CLDRPluralRuleError( $message ); 321 } 322 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
Generated: Fri Nov 28 14:03:12 2014 | Cross-referenced by PHPXref 0.7.1 |