PHPXRef 0.7.1 : vtigercrm-6.1.0 : /libraries/antlr/BaseRecognizer.php source

[Summary view] [Print] [Text view]
   1  <?php
   2  
   3  abstract class BaseRecognizer{
   4      
   5      public static $MEMO_RULE_FAILED = -2;
   6      public static $MEMO_RULE_UNKNOWN = -1;
   7      public static $INITIAL_FOLLOW_STACK_SIZE = 100;
   8  
   9      // copies from Token object for convenience in actions
  10      public static $DEFAULT_TOKEN_CHANNEL; //= TokenConst::$DEFAULT_CHANNEL;
  11      public static $HIDDEN; //= TokenConst::$HIDDEN_CHANNEL;
  12  
  13      public static $NEXT_TOKEN_RULE_NAME = "nextToken";
  14      
  15  	public function __construct($state = null) {
  16          if ( $state==null ) {
  17              $state = new RecognizerSharedState();
  18          }
  19          $this->state = $state;
  20      }
  21      
  22      /** reset the parser's state; subclasses must rewinds the input stream */
  23  	public function reset() {
  24          // wack everything related to error recovery
  25          if ( $this->state==null ) {
  26              return; // no shared state work to do
  27          }
  28          $this->state->_fsp = -1;
  29          $this->state->errorRecovery = false;
  30          $this->state->lastErrorIndex = -1;
  31          $this->state->failed = false;
  32          $this->state->syntaxErrors = 0;
  33          // wack everything related to backtracking and memoization
  34          $this->state->backtracking = 0;
  35          for ($i = 0; $this->state->ruleMemo!=null && $i < $this->state->ruleMemo->length; $i++) { // wipe cache
  36              $this->state->ruleMemo[$i] = null;
  37          }
  38      }
  39  
  40  
  41      /** Match current input symbol against ttype.  Attempt
  42       *  single token insertion or deletion error recovery.  If
  43       *  that fails, throw MismatchedTokenException.
  44       *
  45       *  To turn off single token insertion or deletion error
  46       *  recovery, override mismatchRecover() and have it call
  47       *  plain mismatch(), which does not recover.  Then any error
  48       *  in a rule will cause an exception and immediate exit from
  49       *  rule.  Rule would recover by resynchronizing to the set of
  50       *  symbols that can follow rule ref.
  51       */
  52  	public function match($input, $ttype, $follow)
  53      {
  54          //System.out.println("match "+((TokenStream)input).LT(1));
  55          $matchedSymbol = $this->getCurrentInputSymbol($input);
  56          if ( $input->LA(1)==$ttype ) {
  57              $input->consume();
  58              $this->state->errorRecovery = false;
  59              $this->state->failed = false;
  60              return $matchedSymbol;
  61          }
  62          if ( $this->state->backtracking>0 ) {
  63              $this->state->failed = true;
  64              return $matchedSymbol;
  65          }
  66          $matchedSymbol = $this->recoverFromMismatchedToken($input, $ttype, $follow);
  67          return $matchedSymbol;
  68      }
  69  
  70      /** Match the wildcard: in a symbol */
  71  	public function matchAny($input) {
  72          $this->state->errorRecovery = false;
  73          $this->state->failed = false;
  74          $input->consume();
  75      }
  76  
  77  	public function mismatchIsUnwantedToken($input, $ttype) {
  78          return $input->LA(2)==$ttype;
  79      }
  80  
  81  	public function mismatchIsMissingToken($input, $follow) {
  82          if ( $follow==null ) {
  83              // we have no information about the follow; we can only consume
  84              // a single token and hope for the best
  85              return $false;
  86          }
  87          // compute what can follow this grammar element reference
  88          if ( $follow->member(TokenConst::$EOR_TOKEN_TYPE) ) {
  89              $viableTokensFollowingThisRule = $this->computeContextSensitiveRuleFOLLOW();
  90              $follow = $follow->union($viableTokensFollowingThisRule);
  91              if ( $this->state->_fsp>=0 ) { // remove EOR if we're not the start symbol
  92                  $follow->remove(TokenConst::$EOR_TOKEN_TYPE);
  93              }
  94          }
  95          // if current token is consistent with what could come after set
  96          // then we know we're missing a token; error recovery is free to
  97          // "insert" the missing token
  98  
  99          //System.out.println("viable tokens="+follow.toString(getTokenNames()));
 100          //System.out.println("LT(1)="+((TokenStream)input).LT(1));
 101  
 102          // BitSet cannot handle negative numbers like -1 (EOF) so I leave EOR
 103          // in follow set to indicate that the fall of the start symbol is
 104          // in the set (EOF can follow).
 105          if ( $follow->member($input->LA(1)) || $follow->member(TokenConst::$EOR_TOKEN_TYPE) ) {
 106              //System.out.println("LT(1)=="+((TokenStream)input).LT(1)+" is consistent with what follows; inserting...");
 107              return true;
 108          }
 109          return false;
 110      }
 111  
 112      /** Factor out what to do upon token mismatch so tree parsers can behave
 113       *  differently.  Override and call mismatchRecover(input, ttype, follow)
 114       *  to get single token insertion and deletion.  Use this to turn of
 115       *  single token insertion and deletion. Override mismatchRecover
 116       *  to call this instead.
 117       */
 118  	protected function mismatch($input, $ttype, $follow)
 119      {
 120          if ( $this->mismatchIsUnwantedToken($input, $ttype) ) {
 121              throw new UnwantedTokenException($ttype, $input);
 122          }
 123          else if ( $this->mismatchIsMissingToken($input, $follow) ) {
 124              throw new MissingTokenException($ttype, $input, null);
 125          }
 126          throw new MismatchedTokenException($ttype, $input);
 127      }
 128  
 129      /** Report a recognition problem.
 130       *
 131       *  This method sets errorRecovery to indicate the parser is recovering
 132       *  not parsing.  Once in recovery mode, no errors are generated.
 133       *  To get out of recovery mode, the parser must successfully match
 134       *  a token (after a resync).  So it will go:
 135       *
 136       *         1. error occurs
 137       *         2. enter recovery mode, report error
 138       *         3. consume until token found in resynch set
 139       *         4. try to resume parsing
 140       *         5. next match() will reset errorRecovery mode
 141       *
 142       *  If you override, make sure to update syntaxErrors if you care about that.
 143       */
 144  	public function reportError($e) {
 145          // if we've already reported an error and have not matched a token
 146          // yet successfully, don't report any errors.
 147          if ( $this->state->errorRecovery ) {
 148              //System.err.print("[SPURIOUS] ");
 149              return;
 150          }
 151          $this->state->syntaxErrors++; // don't count spurious
 152          $this->state->errorRecovery = true;
 153  
 154          $this->displayRecognitionError($this->getTokenNames(), $e);
 155      }
 156      
 157      
 158  	public function displayRecognitionError($tokenNames, $e){
 159          $hdr = $this->getErrorHeader($e);
 160          $msg = $this->getErrorMessage($e, $tokenNames);
 161          $this->emitErrorMessage($hdr." ".$msg);
 162      }
 163      
 164      /** What error message should be generated for the various
 165       *  exception types?
 166       *
 167       *  Not very object-oriented code, but I like having all error message
 168       *  generation within one method rather than spread among all of the
 169       *  exception classes. This also makes it much easier for the exception
 170       *  handling because the exception classes do not have to have pointers back
 171       *  to this object to access utility routines and so on. Also, changing
 172       *  the message for an exception type would be difficult because you
 173       *  would have to subclassing exception, but then somehow get ANTLR
 174       *  to make those kinds of exception objects instead of the default.
 175       *  This looks weird, but trust me--it makes the most sense in terms
 176       *  of flexibility.
 177       *
 178       *  For grammar debugging, you will want to override this to add
 179       *  more information such as the stack frame with
 180       *  getRuleInvocationStack(e, this.getClass().getName()) and,
 181       *  for no viable alts, the decision description and state etc...
 182       *
 183       *  Override this to change the message generated for one or more
 184       *  exception types.
 185       */
 186  	public function getErrorMessage($e, $tokenNames) {
 187          $msg = $e->getMessage();
 188          if ( $e instanceof UnwantedTokenException ) {
 189              $ute = $e;
 190              $tokenName="<unknown>";
 191              if ( $ute->expecting== TokenConst::$EOF ) {
 192                  $tokenName = "EOF";
 193              }
 194              else {
 195                  $tokenName = $tokenNames[$ute->expecting];
 196              }
 197              $msg = "extraneous input ".$this->getTokenErrorDisplay($ute->getUnexpectedToken()).
 198                  " expecting ".$tokenName;
 199          }
 200          else if ( $e instanceof MissingTokenException ) {
 201              $mte = $e;
 202              $tokenName="<unknown>";
 203              if ( $mte->expecting== TokenConst::$EOF ) {
 204                  $tokenName = "EOF";
 205              }
 206              else {
 207                  $tokenName = $tokenNames[$mte->expecting];
 208              }
 209              $msg = "missing ".$tokenName." at ".$this->getTokenErrorDisplay($e->token);
 210          }
 211          else if ( $e instanceof MismatchedTokenException ) {
 212              $mte = $e;
 213              $tokenName="<unknown>";
 214              if ( $mte->expecting== TokenConst::$EOF ) {
 215                  $tokenName = "EOF";
 216              }
 217              else {
 218                  $tokenName = $tokenNames[$mte->expecting];
 219              }
 220              $msg = "mismatched input ".$this->getTokenErrorDisplay($e->token).
 221                  " expecting ".$tokenName;
 222          }
 223          else if ( $e instanceof MismatchedTreeNodeException ) {
 224              $mtne = $e;
 225              $tokenName="<unknown>";
 226              if ( $mtne->expecting==TokenConst::$EOF ) {
 227                  $tokenName = "EOF";
 228              }
 229              else {
 230                  $tokenName = $tokenNames[$mtne->expecting];
 231              }
 232              $msg = "mismatched tree node: ".$mtne->node.
 233                  " expecting ".$tokenName;
 234          }
 235          else if ( $e instanceof NoViableAltException ) {
 236              $nvae = $e;
 237              // for development, can add "decision=<<"+nvae.grammarDecisionDescription+">>"
 238              // and "(decision="+nvae.decisionNumber+") and
 239              // "state "+nvae.stateNumber
 240              $msg = "no viable alternative at input ".$this->getTokenErrorDisplay($e->token);
 241          }
 242          else if ( $e instanceof EarlyExitException ) {
 243              $eee = $e;
 244              // for development, can add "(decision="+eee.decisionNumber+")"
 245              $msg = "required (...)+ loop did not match anything at input ".
 246                  getTokenErrorDisplay($e->token);
 247          }
 248          else if ( $e instanceof MismatchedSetException ) {
 249              $mse = $e;
 250              $msg = "mismatched input ".$this->getTokenErrorDisplay($e->token).
 251                  " expecting set ".$mse->expecting;
 252          }
 253          else if ( $e instanceof MismatchedNotSetException ) {
 254              $mse = $e;
 255              $msg = "mismatched input ".$this->getTokenErrorDisplay($e->token).
 256                  " expecting set ".$mse->expecting;
 257          }
 258          else if ( $e instanceof FailedPredicateException ) {
 259              $fpe = $e;
 260              $msg = "rule ".$fpe->ruleName." failed predicate: {".
 261                  $fpe->predicateText."}?";
 262          }
 263          return $msg;
 264      }
 265  
 266      /** Get number of recognition errors (lexer, parser, tree parser).  Each
 267       *  recognizer tracks its own number.  So parser and lexer each have
 268       *  separate count.  Does not count the spurious errors found between
 269       *  an error and next valid token match
 270       *
 271       *  See also reportError()
 272       */
 273  	public function getNumberOfSyntaxErrors() {
 274          return $state->syntaxErrors;
 275      }
 276  
 277      /** What is the error header, normally line/character position information? */
 278  	public function getErrorHeader($e) {
 279          return "line ".$e->line.":".$e->charPositionInLine;
 280      }
 281      
 282      
 283      /** How should a token be displayed in an error message? The default
 284       *  is to display just the text, but during development you might
 285       *  want to have a lot of information spit out.  Override in that case
 286       *  to use t.toString() (which, for CommonToken, dumps everything about
 287       *  the token). This is better than forcing you to override a method in
 288       *  your token objects because you don't have to go modify your lexer
 289       *  so that it creates a new Java type.
 290       */
 291  	public function getTokenErrorDisplay($t) {
 292          $s = $t->getText();
 293          if ( $s==null ) {
 294              if ( $t->getType()==TokenConst::$EOF ) {
 295                  $s = "<EOF>";
 296              }
 297              else {
 298                  $s = "<".$t->getType().">";
 299              }
 300          }
 301          $s = str_replace("\n", '\n', $s);
 302          $s = str_replace("\r",'\r', $s);
 303          $s = str_replace("\t",'\t', $s);
 304          return "'".$s."'";
 305      }
 306  
 307      /** Override this method to change where error messages go */
 308  	public function emitErrorMessage($msg) {
 309          echo $msg;
 310      }
 311  
 312      /** Recover from an error found on the input stream.  This is
 313       *  for NoViableAlt and mismatched symbol exceptions.  If you enable
 314       *  single token insertion and deletion, this will usually not
 315       *  handle mismatched symbol exceptions but there could be a mismatched
 316       *  token that the match() routine could not recover from.
 317       */
 318  	public function recover($input, $re) {
 319          if ( $this->state->lastErrorIndex==$input->index() ) {
 320              // uh oh, another error at same token index; must be a case
 321              // where LT(1) is in the recovery token set so nothing is
 322              // consumed; consume a single token so at least to prevent
 323              // an infinite loop; this is a failsafe.
 324              $input->consume();
 325          }
 326          $this->state->lastErrorIndex = $input->index();
 327          $followSet = $this->computeErrorRecoverySet();
 328          $this->beginResync();
 329          $this->consumeUntilInSet($input, $followSet);
 330          $this->endResync();
 331      }
 332  
 333      /** A hook to listen in on the token consumption during error recovery.
 334       *  The DebugParser subclasses this to fire events to the listenter.
 335       */
 336  	public function beginResync() {
 337      }
 338  
 339  	public function endResync() {
 340      }
 341  
 342      /*  Compute the error recovery set for the current rule.  During
 343       *  rule invocation, the parser pushes the set of tokens that can
 344       *  follow that rule reference on the stack; this amounts to
 345       *  computing FIRST of what follows the rule reference in the
 346       *  enclosing rule. This local follow set only includes tokens
 347       *  from within the rule; i.e., the FIRST computation done by
 348       *  ANTLR stops at the end of a rule.
 349       *
 350       *  EXAMPLE
 351       *
 352       *  When you find a "no viable alt exception", the input is not
 353       *  consistent with any of the alternatives for rule r.  The best
 354       *  thing to do is to consume tokens until you see something that
 355       *  can legally follow a call to r *or* any rule that called r.
 356       *  You don't want the exact set of viable next tokens because the
 357       *  input might just be missing a token--you might consume the
 358       *  rest of the input looking for one of the missing tokens.
 359       *
 360       *  Consider grammar:
 361       *
 362       *  a : '[' b ']'
 363       *    | '(' b ')'
 364       *    ;
 365       *  b : c '^' INT ;
 366       *  c : ID
 367       *    | INT
 368       *    ;
 369       *
 370       *  At each rule invocation, the set of tokens that could follow
 371       *  that rule is pushed on a stack.  Here are the various "local"
 372       *  follow sets:
 373       *
 374       *  FOLLOW(b1_in_a) = FIRST(']') = ']'
 375       *  FOLLOW(b2_in_a) = FIRST(')') = ')'
 376       *  FOLLOW(c_in_b) = FIRST('^') = '^'
 377       *
 378       *  Upon erroneous input "[]", the call chain is
 379       *
 380       *  a -> b -> c
 381       *
 382       *  and, hence, the follow context stack is:
 383       *
 384       *  depth  local follow set     after call to rule
 385       *    0         <EOF>                    a (from main())
 386       *    1          ']'                     b
 387       *    3          '^'                     c
 388       *
 389       *  Notice that ')' is not included, because b would have to have
 390       *  been called from a different context in rule a for ')' to be
 391       *  included.
 392       *
 393       *  For error recovery, we cannot consider FOLLOW(c)
 394       *  (context-sensitive or otherwise).  We need the combined set of
 395       *  all context-sensitive FOLLOW sets--the set of all tokens that
 396       *  could follow any reference in the call chain.  We need to
 397       *  resync to one of those tokens.  Note that FOLLOW(c)='^' and if
 398       *  we resync'd to that token, we'd consume until EOF.  We need to
 399       *  sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}.
 400       *  In this case, for input "[]", LA(1) is in this set so we would
 401       *  not consume anything and after printing an error rule c would
 402       *  return normally.  It would not find the required '^' though.
 403       *  At this point, it gets a mismatched token error and throws an
 404       *  exception (since LA(1) is not in the viable following token
 405       *  set).  The rule exception handler tries to recover, but finds
 406       *  the same recovery set and doesn't consume anything.  Rule b
 407       *  exits normally returning to rule a.  Now it finds the ']' (and
 408       *  with the successful match exits errorRecovery mode).
 409       *
 410       *  So, you cna see that the parser walks up call chain looking
 411       *  for the token that was a member of the recovery set.
 412       *
 413       *  Errors are not generated in errorRecovery mode.
 414       *
 415       *  ANTLR's error recovery mechanism is based upon original ideas:
 416       *
 417       *  "Algorithms + Data Structures = Programs" by Niklaus Wirth
 418       *
 419       *  and
 420       *
 421       *  "A note on error recovery in recursive descent parsers":
 422       *  http://portal.acm.org/citation.cfm?id=947902.947905
 423       *
 424       *  Later, Josef Grosch had some good ideas:
 425       *
 426       *  "Efficient and Comfortable Error Recovery in Recursive Descent
 427       *  Parsers":
 428       *  ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip
 429       *
 430       *  Like Grosch I implemented local FOLLOW sets that are combined
 431       *  at run-time upon error to avoid overhead during parsing.
 432       */
 433  	protected function computeErrorRecoverySet() {
 434          return $this->combineFollows(false);
 435      }
 436  
 437      /** Compute the context-sensitive FOLLOW set for current rule.
 438       *  This is set of token types that can follow a specific rule
 439       *  reference given a specific call chain.  You get the set of
 440       *  viable tokens that can possibly come next (lookahead depth 1)
 441       *  given the current call chain.  Contrast this with the
 442       *  definition of plain FOLLOW for rule r:
 443       *
 444       *   FOLLOW(r)={x | S=>*alpha r beta in G and x in FIRST(beta)}
 445       *
 446       *  where x in T* and alpha, beta in V*; T is set of terminals and
 447       *  V is the set of terminals and nonterminals.  In other words,
 448       *  FOLLOW(r) is the set of all tokens that can possibly follow
 449       *  references to r in *any* sentential form (context).  At
 450       *  runtime, however, we know precisely which context applies as
 451       *  we have the call chain.  We may compute the exact (rather
 452       *  than covering superset) set of following tokens.
 453       *
 454       *  For example, consider grammar:
 455       *
 456       *  stat : ID '=' expr ';'      // FOLLOW(stat)=={EOF}
 457       *       | "return" expr '.'
 458       *       ;
 459       *  expr : atom ('+' atom)* ;   // FOLLOW(expr)=={';','.',')'}
 460       *  atom : INT                  // FOLLOW(atom)=={'+',')',';','.'}
 461       *       | '(' expr ')'
 462       *       ;
 463       *
 464       *  The FOLLOW sets are all inclusive whereas context-sensitive
 465       *  FOLLOW sets are precisely what could follow a rule reference.
 466       *  For input input "i=(3);", here is the derivation:
 467       *
 468       *  stat => ID '=' expr ';'
 469       *       => ID '=' atom ('+' atom)* ';'
 470       *       => ID '=' '(' expr ')' ('+' atom)* ';'
 471       *       => ID '=' '(' atom ')' ('+' atom)* ';'
 472       *       => ID '=' '(' INT ')' ('+' atom)* ';'
 473       *       => ID '=' '(' INT ')' ';'
 474       *
 475       *  At the "3" token, you'd have a call chain of
 476       *
 477       *    stat -> expr -> atom -> expr -> atom
 478       *
 479       *  What can follow that specific nested ref to atom?  Exactly ')'
 480       *  as you can see by looking at the derivation of this specific
 481       *  input.  Contrast this with the FOLLOW(atom)={'+',')',';','.'}.
 482       *
 483       *  You want the exact viable token set when recovering from a
 484       *  token mismatch.  Upon token mismatch, if LA(1) is member of
 485       *  the viable next token set, then you know there is most likely
 486       *  a missing token in the input stream.  "Insert" one by just not
 487       *  throwing an exception.
 488       */
 489  	protected function computeContextSensitiveRuleFOLLOW() {
 490          return $this->combineFollows(true);
 491      }
 492  
 493  	protected function combineFollows($exact) {
 494          $top = $this->state->_fsp;
 495          $followSet = new Set(array());
 496          for ($i=$top; $i>=0; $i--) {
 497              $localFollowSet = $this->state->following[$i];
 498              /*
 499              System.out.println("local follow depth "+i+"="+
 500                                 localFollowSet.toString(getTokenNames())+")");
 501               */
 502              $followSet->unionInPlace($localFollowSet);
 503              if ( $this->exact ) {
 504                  // can we see end of rule?
 505                  if ( $localFollowSet->member(TokenConst::$EOR_TOKEN_TYPE) ) {
 506                      // Only leave EOR in set if at top (start rule); this lets
 507                      // us know if have to include follow(start rule); i.e., EOF
 508                      if ( $i>0 ) {
 509                          $followSet->remove(TokenConst::$EOR_TOKEN_TYPE);
 510                      }
 511                  }
 512                  else { // can't see end of rule, quit
 513                      break;
 514                  }
 515              }
 516          }
 517          return $followSet;
 518      }
 519  
 520      /** Attempt to recover from a single missing or extra token.
 521       *
 522       *  EXTRA TOKEN
 523       *
 524       *  LA(1) is not what we are looking for.  If LA(2) has the right token,
 525       *  however, then assume LA(1) is some extra spurious token.  Delete it
 526       *  and LA(2) as if we were doing a normal match(), which advances the
 527       *  input.
 528       *
 529       *  MISSING TOKEN
 530       *
 531       *  If current token is consistent with what could come after
 532       *  ttype then it is ok to "insert" the missing token, else throw
 533       *  exception For example, Input "i=(3;" is clearly missing the
 534       *  ')'.  When the parser returns from the nested call to expr, it
 535       *  will have call chain:
 536       *
 537       *    stat -> expr -> atom
 538       *
 539       *  and it will be trying to match the ')' at this point in the
 540       *  derivation:
 541       *
 542       *       => ID '=' '(' INT ')' ('+' atom)* ';'
 543       *                          ^
 544       *  match() will see that ';' doesn't match ')' and report a
 545       *  mismatched token error.  To recover, it sees that LA(1)==';'
 546       *  is in the set of tokens that can follow the ')' token
 547       *  reference in rule atom.  It can assume that you forgot the ')'.
 548       */
 549  	protected function recoverFromMismatchedToken($input, $ttype, $follow)
 550      {
 551          $e = null;
 552          // if next token is what we are looking for then "delete" this token
 553  
 554          if ( $this->mismatchIsUnwantedToken($input, $ttype) ) {
 555              $e = new UnwantedTokenException($ttype, $input);
 556              /*
 557              System.err.println("recoverFromMismatchedToken deleting "+
 558                                 ((TokenStream)input).LT(1)+
 559                                 " since "+((TokenStream)input).LT(2)+" is what we want");
 560               */
 561              $this->beginResync();
 562              $input->consume(); // simply delete extra token
 563              $this->endResync();
 564              $this->reportError($e);  // report after consuming so AW sees the token in the exception
 565              // we want to return the token we're actually matching
 566              $matchedSymbol = $this->getCurrentInputSymbol($input);
 567              $input->consume(); // move past ttype token as if all were ok
 568              return $matchedSymbol;
 569          }
 570          // can't recover with single token deletion, try insertion
 571          if ( $this->mismatchIsMissingToken($input, $follow) ) {
 572              $inserted = $this->getMissingSymbol($input, $e, $ttype, $follow);
 573              $e = new MissingTokenException($ttype, $input, $inserted);
 574              $this->reportError($e);  // report after inserting so AW sees the token in the exception
 575              return $inserted;
 576          }
 577          // even that didn't work; must throw the exception
 578          $e = new MismatchedTokenException($ttype, $input);
 579          throw $e;
 580      }
 581  
 582      /** Not currently used */
 583  	public function recoverFromMismatchedSet($input, $e, $follow) {
 584          if ( $this->mismatchIsMissingToken($input, $follow) ) {
 585              // System.out.println("missing token");
 586              reportError($e);
 587              // we don't know how to conjure up a token for sets yet
 588              return $this->getMissingSymbol($input, $e, TokenConst::$INVALID_TOKEN_TYPE, $follow);
 589          }
 590          // TODO do single token deletion like above for Token mismatch
 591          throw $e;
 592      }
 593  
 594      /** Match needs to return the current input symbol, which gets put
 595       *  into the label for the associated token ref; e.g., x=ID.  Token
 596       *  and tree parsers need to return different objects. Rather than test
 597       *  for input stream type or change the IntStream interface, I use
 598       *  a simple method to ask the recognizer to tell me what the current
 599       *  input symbol is.
 600       * 
 601       *  This is ignored for lexers.
 602       */
 603  	protected function getCurrentInputSymbol($input) { return null; }
 604  
 605      /** Conjure up a missing token during error recovery.
 606       *
 607       *  The recognizer attempts to recover from single missing
 608       *  symbols. But, actions might refer to that missing symbol.
 609       *  For example, x=ID {f($x);}. The action clearly assumes
 610       *  that there has been an identifier matched previously and that
 611       *  $x points at that token. If that token is missing, but
 612       *  the next token in the stream is what we want we assume that
 613       *  this token is missing and we keep going. Because we
 614       *  have to return some token to replace the missing token,
 615       *  we have to conjure one up. This method gives the user control
 616       *  over the tokens returned for missing tokens. Mostly,
 617       *  you will want to create something special for identifier
 618       *  tokens. For literals such as '{' and ',', the default
 619       *  action in the parser or tree parser works. It simply creates
 620       *  a CommonToken of the appropriate type. The text will be the token.
 621       *  If you change what tokens must be created by the lexer,
 622       *  override this method to create the appropriate tokens.
 623       */
 624  	protected function getMissingSymbol($input, $e, $expectedTokenType, $follow) {
 625          return null;
 626      }
 627  
 628  	public function consumeUntilMatchesType($input, $tokenType) {
 629          //System.out.println("consumeUntil "+tokenType);
 630          $ttype = $input->LA(1);
 631          while ($ttype != TokenConst::$EOF && $ttype != $tokenType) {
 632              $input->consume();
 633              $ttype = $input->LA(1);
 634          }
 635      }
 636  
 637      /** Consume tokens until one matches the given token set */
 638  	public function consumeUntilInSet($input, $set) {
 639          //System.out.println("consumeUntil("+set.toString(getTokenNames())+")");
 640          $ttype = $input->LA(1);
 641          while ($ttype != TokenConst::$EOF && !$set->member($ttype) ) {
 642              //System.out.println("consume during recover LA(1)="+getTokenNames()[input.LA(1)]);
 643              $input->consume();
 644              $ttype = $input->LA(1);
 645          }
 646      }
 647  
 648      /** Push a rule's follow set using our own hardcoded stack */
 649  	protected function pushFollow($fset) {
 650          // if ( ($this->state->_fsp +1)>=sizeof($this->state->following) ) {
 651          //             $f = array();
 652          //             System.arraycopy(state.following, 0, f, 0, state.following.length-1);
 653          //             $this->state->following = f;
 654          //         }
 655           $this->state->following[++$this->state->_fsp] = $fset;
 656      }
 657  
 658      /** Return List<String> of the rules in your parser instance
 659       *  leading up to a call to this method.  You could override if
 660       *  you want more details such as the file/line info of where
 661       *  in the parser java code a rule is invoked.
 662       *
 663       *  This is very useful for error messages and for context-sensitive
 664       *  error recovery.
 665       */
 666  
 667      /** A more general version of getRuleInvocationStack where you can
 668       *  pass in, for example, a RecognitionException to get it's rule
 669       *  stack trace.  This routine is shared with all recognizers, hence,
 670       *  static.
 671       *
 672       *  TODO: move to a utility class or something; weird having lexer call this
 673       */
 674  	public static function getRuleInvocationStack($e=null,
 675                                                $recognizerClassName=null)
 676      {
 677          if($e==null){
 678              $e = new Exception();
 679          }
 680          if($recognizerClassName==null){
 681              $recognizerClassName = get_class($this);
 682          }
 683          throw new Exception("Not implemented yet");
 684          // List rules = new ArrayList();
 685          //         StackTraceElement[] stack = e.getStackTrace();
 686          //         int i = 0;
 687          //         for (i=stack.length-1; i>=0; i--) {
 688          //             StackTraceElement t = stack[i];
 689          //             if ( t.getClassName().startsWith("org.antlr.runtime.") ) {
 690          //                 continue; // skip support code such as this method
 691          //             }
 692          //             if ( t.getMethodName().equals(NEXT_TOKEN_RULE_NAME) ) {
 693          //                 continue;
 694          //             }
 695          //             if ( !t.getClassName().equals(recognizerClassName) ) {
 696          //                 continue; // must not be part of this parser
 697          //             }
 698          //             rules.add(t.getMethodName());
 699          //         }
 700          //         return rules;
 701      }
 702  
 703  	public function getBacktrackingLevel() {
 704          return $this->state->backtracking;
 705      }
 706  
 707      /** Used to print out token names like ID during debugging and
 708       *  error reporting.  The generated parsers implement a method
 709       *  that overrides this to point to their String[] tokenNames.
 710       */
 711  	public function getTokenNames() {
 712          return null;
 713      }
 714  
 715      /** For debugging and other purposes, might want the grammar name.
 716       *  Have ANTLR generate an implementation for this method.
 717       */
 718  	public function getGrammarFileName() {
 719          return null;
 720      }
 721  
 722      public abstract function getSourceName();
 723  
 724      /** A convenience method for use most often with template rewrites.
 725       *  Convert a List<Token> to List<String>
 726       */
 727  	public function toStrings($tokens) {
 728          if ( $tokens==null ) return null;
 729          $strings = array();
 730          for ($i=0; $i<$tokens->size(); $i++) {
 731              $strings[] = $tokens[$i]->getText();
 732          }
 733          return $strings;
 734      }
 735  
 736      /** Given a rule number and a start token index number, return
 737       *  MEMO_RULE_UNKNOWN if the rule has not parsed input starting from
 738       *  start index.  If this rule has parsed input starting from the
 739       *  start index before, then return where the rule stopped parsing.
 740       *  It returns the index of the last token matched by the rule.
 741       *
 742       *  For now we use a hashtable and just the slow Object-based one.
 743       *  Later, we can make a special one for ints and also one that
 744       *  tosses out data after we commit past input position i.
 745       */
 746  	public function getRuleMemoization($ruleIndex, $ruleStartIndex) {
 747          if ( $this->state->ruleMemo[$ruleIndex]==null ) {
 748              $this->state->ruleMemo[$ruleIndex] = array();
 749          }
 750          $stopIndexI =
 751              $this->state->ruleMemo[$ruleIndex][$ruleStartIndex];
 752          if ( $stopIndexI==null ) {
 753              return self::$MEMO_RULE_UNKNOWN;
 754          }
 755          return $stopIndexI;
 756      }
 757  
 758      /** Has this rule already parsed input at the current index in the
 759       *  input stream?  Return the stop token index or MEMO_RULE_UNKNOWN.
 760       *  If we attempted but failed to parse properly before, return
 761       *  MEMO_RULE_FAILED.
 762       *
 763       *  This method has a side-effect: if we have seen this input for
 764       *  this rule and successfully parsed before, then seek ahead to
 765       *  1 past the stop token matched for this rule last time.
 766       */
 767  	public function alreadyParsedRule($input, $ruleIndex) {
 768          $stopIndex = $this->getRuleMemoization($ruleIndex, $input->index());
 769          if ( $stopIndex==self::$MEMO_RULE_UNKNOWN ) {
 770              return false;
 771          }
 772          if ( $stopIndex==self::$MEMO_RULE_FAILED ) {
 773              //System.out.println("rule "+ruleIndex+" will never succeed");
 774              $this->state->failed=true;
 775          }
 776          else {
 777              //System.out.println("seen rule "+ruleIndex+" before; skipping ahead to @"+(stopIndex+1)+" failed="+state.failed);
 778              $input->seek($stopIndex+1); // jump to one past stop token
 779          }
 780          return true;
 781      }
 782  
 783      /** Record whether or not this rule parsed the input at this position
 784       *  successfully.  Use a standard java hashtable for now.
 785       */
 786  	public function memoize($input, $ruleIndex, $ruleStartIndex){
 787          $stopTokenIndex = $this->state->failed?self::$MEMO_RULE_FAILED:$input->index()-1;
 788          if ( $this->state->ruleMemo==null ) {
 789              echo("!!!!!!!!! memo array is null for ". getGrammarFileName());
 790          }
 791          if ( $ruleIndex >= sizeof($this->state->ruleMemo) ) {
 792              echo("!!!!!!!!! memo size is ".sizeof($this->state->ruleMemo).", but rule index is ".$ruleIndex);
 793          }
 794          if ( $this->state->ruleMemo[$ruleIndex]!=null ) {
 795              $this->state->ruleMemo[$ruleIndex][$ruleStartIndex] = $stopTokenIndex;
 796          }
 797      }
 798  
 799      /** return how many rule/input-index pairs there are in total.
 800       *  TODO: this includes synpreds. :(
 801       */
 802  	public function getRuleMemoizationCacheSize() {
 803          $n = 0;
 804          for ($i = 0; $this->state->ruleMemo!=null && $i < sizeof($this->state->ruleMemo); $i++) {
 805              $ruleMap = $this->state->ruleMemo[$i];
 806              if ( $ruleMap!=null ) {
 807                  $n += sizeof($ruleMap); // how many input indexes are recorded?
 808              }
 809          }
 810          return $n;
 811      }
 812  
 813  	public function traceIn($ruleName, $ruleIndex, $inputSymbol)  {
 814          echo("enter ".$ruleName." ".$inputSymbol);
 815          if ( $this->state->failed ) {
 816              echo(" failed=".$this->state->failed);
 817          }
 818          if ( $this->state->backtracking>0 ) {
 819              echo(" backtracking=".$this->state->backtracking);
 820          }
 821          echo "\n";
 822      }
 823  
 824  	public function traceOut($ruleName, $ruleIndex, $inputSymbol) {
 825          echo("exit ".$ruleName." ".$inputSymbol);
 826          if ( $this->state->failed ) {
 827              echo(" failed=".$this->state->failed);
 828          }
 829          if ( $this->state->backtracking>0 ) {
 830              echo(" backtracking="+$this->state->backtracking);
 831          }
 832          echo "\n";
 833      }
 834  
 835  	public function getToken($name){
 836          if(preg_match("/\d+/", $name)){
 837              return (integer)$name;
 838          }else{
 839              return $this->$name;
 840          }
 841      }
 842      
 843  	public function getTokenName($tokenId){
 844          
 845      }
 846  
 847  }
 848  
 849  BaseRecognizer::$DEFAULT_TOKEN_CHANNEL = TokenConst::$DEFAULT_CHANNEL;
 850  BaseRecognizer::$HIDDEN = TokenConst::$HIDDEN_CHANNEL;
 851  ?>
PHP Cross Reference of vtigercrm-6.1.0

/libraries/antlr/ -> BaseRecognizer.php (source)