[ Index ]

PHP Cross Reference of MediaWiki-1.24.0

title

Body

[close]

/languages/utils/ -> CLDRPluralRuleConverter.php (source)

   1  <?php
   2  /**
   3   * @author Niklas Laxström, Tim Starling
   4   *
   5   * @copyright Copyright © 2010-2012, Niklas Laxström
   6   * @license http://www.gnu.org/copyleft/gpl.html GNU General Public License 2.0 or later
   7   *
   8   * @file
   9   * @since 1.20
  10   */
  11  
  12  /**
  13   * Helper class for converting rules to reverse polish notation (RPN).
  14   */
  15  class CLDRPluralRuleConverter {
  16      /**
  17       * The input string
  18       *
  19       * @var string
  20       */
  21      public $rule;
  22  
  23      /**
  24       * The current position
  25       *
  26       * @var int
  27       */
  28      public $pos;
  29  
  30      /**
  31       * The past-the-end position
  32       *
  33       * @var int
  34       */
  35      public $end;
  36  
  37      /**
  38       * The operator stack
  39       *
  40       * @var array
  41       */
  42      public $operators = array();
  43  
  44      /**
  45       * The operand stack
  46       *
  47       * @var array
  48       */
  49      public $operands = array();
  50  
  51      /**
  52       * Precedence levels. Note that there's no need to worry about associativity
  53       * for the level 4 operators, since they return boolean and don't accept
  54       * boolean inputs.
  55       */
  56      private static $precedence = array(
  57          'or' => 2,
  58          'and' => 3,
  59          'is' => 4,
  60          'is-not' => 4,
  61          'in' => 4,
  62          'not-in' => 4,
  63          'within' => 4,
  64          'not-within' => 4,
  65          'mod' => 5,
  66          ',' => 6,
  67          '..' => 7,
  68      );
  69  
  70      /**
  71       * A character list defining whitespace, for use in strspn() etc.
  72       */
  73      const WHITESPACE_CLASS = " \t\r\n";
  74  
  75      /**
  76       * Same for digits. Note that the grammar given in UTS #35 doesn't allow
  77       * negative numbers or decimal separators.
  78       */
  79      const NUMBER_CLASS = '0123456789';
  80  
  81      /**
  82       * A character list of symbolic operands.
  83       */
  84      const OPERAND_SYMBOLS = 'nivwft';
  85  
  86      /**
  87       * An anchored regular expression which matches a word at the current offset.
  88       */
  89      const WORD_REGEX = '/[a-zA-Z@]+/A';
  90  
  91      /**
  92       * Convert a rule to RPN. This is the only public entry point.
  93       *
  94       * @param string $rule The rule to convert
  95       * @return string The RPN representation of the rule
  96       */
  97  	public static function convert( $rule ) {
  98          $parser = new self( $rule );
  99  
 100          return $parser->doConvert();
 101      }
 102  
 103      /**
 104       * Private constructor.
 105       * @param string $rule
 106       */
 107  	protected function __construct( $rule ) {
 108          $this->rule = $rule;
 109          $this->pos = 0;
 110          $this->end = strlen( $rule );
 111      }
 112  
 113      /**
 114       * Do the operation.
 115       *
 116       * @return string The RPN representation of the rule (e.g. "5 3 mod n is")
 117       */
 118  	protected function doConvert() {
 119          $expectOperator = true;
 120  
 121          // Iterate through all tokens, saving the operators and operands to a
 122          // stack per Dijkstra's shunting yard algorithm.
 123          /** @var CLDRPluralRuleConverterOperator $token */
 124          while ( false !== ( $token = $this->nextToken() ) ) {
 125              // In this grammar, there are only binary operators, so every valid
 126              // rule string will alternate between operator and operand tokens.
 127              $expectOperator = !$expectOperator;
 128  
 129              if ( $token instanceof CLDRPluralRuleConverterExpression ) {
 130                  // Operand
 131                  if ( $expectOperator ) {
 132                      $token->error( 'unexpected operand' );
 133                  }
 134                  $this->operands[] = $token;
 135                  continue;
 136              } else {
 137                  // Operator
 138                  if ( !$expectOperator ) {
 139                      $token->error( 'unexpected operator' );
 140                  }
 141                  // Resolve higher precedence levels
 142                  $lastOp = end( $this->operators );
 143                  while ( $lastOp && self::$precedence[$token->name] <= self::$precedence[$lastOp->name] ) {
 144                      $this->doOperation( $lastOp, $this->operands );
 145                      array_pop( $this->operators );
 146                      $lastOp = end( $this->operators );
 147                  }
 148                  $this->operators[] = $token;
 149              }
 150          }
 151  
 152          // Finish off the stack
 153          while ( $op = array_pop( $this->operators ) ) {
 154              $this->doOperation( $op, $this->operands );
 155          }
 156  
 157          // Make sure the result is sane. The first case is possible for an empty
 158          // string input, the second should be unreachable.
 159          if ( !count( $this->operands ) ) {
 160              $this->error( 'condition expected' );
 161          } elseif ( count( $this->operands ) > 1 ) {
 162              $this->error( 'missing operator or too many operands' );
 163          }
 164  
 165          $value = $this->operands[0];
 166          if ( $value->type !== 'boolean' ) {
 167              $this->error( 'the result must have a boolean type' );
 168          }
 169  
 170          return $this->operands[0]->rpn;
 171      }
 172  
 173      /**
 174       * Fetch the next token from the input string.
 175       *
 176       * @return CLDRPluralRuleConverterFragment The next token
 177       */
 178  	protected function nextToken() {
 179          if ( $this->pos >= $this->end ) {
 180              return false;
 181          }
 182  
 183          // Whitespace
 184          $length = strspn( $this->rule, self::WHITESPACE_CLASS, $this->pos );
 185          $this->pos += $length;
 186  
 187          if ( $this->pos >= $this->end ) {
 188              return false;
 189          }
 190  
 191          // Number
 192          $length = strspn( $this->rule, self::NUMBER_CLASS, $this->pos );
 193          if ( $length !== 0 ) {
 194              $token = $this->newNumber( substr( $this->rule, $this->pos, $length ), $this->pos );
 195              $this->pos += $length;
 196  
 197              return $token;
 198          }
 199  
 200          // Two-character operators
 201          $op2 = substr( $this->rule, $this->pos, 2 );
 202          if ( $op2 === '..' || $op2 === '!=' ) {
 203              $token = $this->newOperator( $op2, $this->pos, 2 );
 204              $this->pos += 2;
 205  
 206              return $token;
 207          }
 208  
 209          // Single-character operators
 210          $op1 = $this->rule[$this->pos];
 211          if ( $op1 === ',' || $op1 === '=' || $op1 === '%' ) {
 212              $token = $this->newOperator( $op1, $this->pos, 1 );
 213              $this->pos++;
 214  
 215              return $token;
 216          }
 217  
 218          // Word
 219          if ( !preg_match( self::WORD_REGEX, $this->rule, $m, 0, $this->pos ) ) {
 220              $this->error( 'unexpected character "' . $this->rule[$this->pos] . '"' );
 221          }
 222          $word1 = strtolower( $m[0] );
 223          $word2 = '';
 224          $nextTokenPos = $this->pos + strlen( $word1 );
 225          if ( $word1 === 'not' || $word1 === 'is' ) {
 226              // Look ahead one word
 227              $nextTokenPos += strspn( $this->rule, self::WHITESPACE_CLASS, $nextTokenPos );
 228              if ( $nextTokenPos < $this->end
 229                  && preg_match( self::WORD_REGEX, $this->rule, $m, 0, $nextTokenPos )
 230              ) {
 231                  $word2 = strtolower( $m[0] );
 232                  $nextTokenPos += strlen( $word2 );
 233              }
 234          }
 235  
 236          // Two-word operators like "is not" take precedence over single-word operators like "is"
 237          if ( $word2 !== '' ) {
 238              $bothWords = "{$word1}-{$word2}";
 239              if ( isset( self::$precedence[$bothWords] ) ) {
 240                  $token = $this->newOperator( $bothWords, $this->pos, $nextTokenPos - $this->pos );
 241                  $this->pos = $nextTokenPos;
 242  
 243                  return $token;
 244              }
 245          }
 246  
 247          // Single-word operators
 248          if ( isset( self::$precedence[$word1] ) ) {
 249              $token = $this->newOperator( $word1, $this->pos, strlen( $word1 ) );
 250              $this->pos += strlen( $word1 );
 251  
 252              return $token;
 253          }
 254  
 255          // The single-character operand symbols
 256          if ( strpos( self::OPERAND_SYMBOLS, $word1 ) !== false ) {
 257              $token = $this->newNumber( $word1, $this->pos );
 258              $this->pos++;
 259  
 260              return $token;
 261          }
 262  
 263          // Samples
 264          if ( $word1 === '@integer' || $word1 === '@decimal' ) {
 265              // Samples are like comments, they have no effect on rule evaluation.
 266              // They run from the first sample indicator to the end of the string.
 267              $this->pos = $this->end;
 268  
 269              return false;
 270          }
 271  
 272          $this->error( 'unrecognised word' );
 273      }
 274  
 275      /**
 276       * For the binary operator $op, pop its operands off the stack and push
 277       * a fragment with rpn and type members describing the result of that
 278       * operation.
 279       *
 280       * @param CLDRPluralRuleConverterOperator $op
 281       */
 282  	protected function doOperation( $op ) {
 283          if ( count( $this->operands ) < 2 ) {
 284              $op->error( 'missing operand' );
 285          }
 286          $right = array_pop( $this->operands );
 287          $left = array_pop( $this->operands );
 288          $result = $op->operate( $left, $right );
 289          $this->operands[] = $result;
 290      }
 291  
 292      /**
 293       * Create a numerical expression object
 294       *
 295       * @param string $text
 296       * @param int $pos
 297       * @return CLDRPluralRuleConverterExpression The numerical expression
 298       */
 299  	protected function newNumber( $text, $pos ) {
 300          return new CLDRPluralRuleConverterExpression( $this, 'number', $text, $pos, strlen( $text ) );
 301      }
 302  
 303      /**
 304       * Create a binary operator
 305       *
 306       * @param string $type
 307       * @param int $pos
 308       * @param int $length
 309       * @return CLDRPluralRuleConverterOperator The operator
 310       */
 311  	protected function newOperator( $type, $pos, $length ) {
 312          return new CLDRPluralRuleConverterOperator( $this, $type, $pos, $length );
 313      }
 314  
 315      /**
 316       * Throw an error
 317       * @param string $message
 318       */
 319  	protected function error( $message ) {
 320          throw new CLDRPluralRuleError( $message );
 321      }
 322  }


Generated: Fri Nov 28 14:03:12 2014 Cross-referenced by PHPXref 0.7.1