[ Index ] |
PHP Cross Reference of vtigercrm-6.1.0 |
[Summary view] [Print] [Text view]
1 <?php 2 3 abstract class BaseRecognizer{ 4 5 public static $MEMO_RULE_FAILED = -2; 6 public static $MEMO_RULE_UNKNOWN = -1; 7 public static $INITIAL_FOLLOW_STACK_SIZE = 100; 8 9 // copies from Token object for convenience in actions 10 public static $DEFAULT_TOKEN_CHANNEL; //= TokenConst::$DEFAULT_CHANNEL; 11 public static $HIDDEN; //= TokenConst::$HIDDEN_CHANNEL; 12 13 public static $NEXT_TOKEN_RULE_NAME = "nextToken"; 14 15 public function __construct($state = null) { 16 if ( $state==null ) { 17 $state = new RecognizerSharedState(); 18 } 19 $this->state = $state; 20 } 21 22 /** reset the parser's state; subclasses must rewinds the input stream */ 23 public function reset() { 24 // wack everything related to error recovery 25 if ( $this->state==null ) { 26 return; // no shared state work to do 27 } 28 $this->state->_fsp = -1; 29 $this->state->errorRecovery = false; 30 $this->state->lastErrorIndex = -1; 31 $this->state->failed = false; 32 $this->state->syntaxErrors = 0; 33 // wack everything related to backtracking and memoization 34 $this->state->backtracking = 0; 35 for ($i = 0; $this->state->ruleMemo!=null && $i < $this->state->ruleMemo->length; $i++) { // wipe cache 36 $this->state->ruleMemo[$i] = null; 37 } 38 } 39 40 41 /** Match current input symbol against ttype. Attempt 42 * single token insertion or deletion error recovery. If 43 * that fails, throw MismatchedTokenException. 44 * 45 * To turn off single token insertion or deletion error 46 * recovery, override mismatchRecover() and have it call 47 * plain mismatch(), which does not recover. Then any error 48 * in a rule will cause an exception and immediate exit from 49 * rule. Rule would recover by resynchronizing to the set of 50 * symbols that can follow rule ref. 51 */ 52 public function match($input, $ttype, $follow) 53 { 54 //System.out.println("match "+((TokenStream)input).LT(1)); 55 $matchedSymbol = $this->getCurrentInputSymbol($input); 56 if ( $input->LA(1)==$ttype ) { 57 $input->consume(); 58 $this->state->errorRecovery = false; 59 $this->state->failed = false; 60 return $matchedSymbol; 61 } 62 if ( $this->state->backtracking>0 ) { 63 $this->state->failed = true; 64 return $matchedSymbol; 65 } 66 $matchedSymbol = $this->recoverFromMismatchedToken($input, $ttype, $follow); 67 return $matchedSymbol; 68 } 69 70 /** Match the wildcard: in a symbol */ 71 public function matchAny($input) { 72 $this->state->errorRecovery = false; 73 $this->state->failed = false; 74 $input->consume(); 75 } 76 77 public function mismatchIsUnwantedToken($input, $ttype) { 78 return $input->LA(2)==$ttype; 79 } 80 81 public function mismatchIsMissingToken($input, $follow) { 82 if ( $follow==null ) { 83 // we have no information about the follow; we can only consume 84 // a single token and hope for the best 85 return $false; 86 } 87 // compute what can follow this grammar element reference 88 if ( $follow->member(TokenConst::$EOR_TOKEN_TYPE) ) { 89 $viableTokensFollowingThisRule = $this->computeContextSensitiveRuleFOLLOW(); 90 $follow = $follow->union($viableTokensFollowingThisRule); 91 if ( $this->state->_fsp>=0 ) { // remove EOR if we're not the start symbol 92 $follow->remove(TokenConst::$EOR_TOKEN_TYPE); 93 } 94 } 95 // if current token is consistent with what could come after set 96 // then we know we're missing a token; error recovery is free to 97 // "insert" the missing token 98 99 //System.out.println("viable tokens="+follow.toString(getTokenNames())); 100 //System.out.println("LT(1)="+((TokenStream)input).LT(1)); 101 102 // BitSet cannot handle negative numbers like -1 (EOF) so I leave EOR 103 // in follow set to indicate that the fall of the start symbol is 104 // in the set (EOF can follow). 105 if ( $follow->member($input->LA(1)) || $follow->member(TokenConst::$EOR_TOKEN_TYPE) ) { 106 //System.out.println("LT(1)=="+((TokenStream)input).LT(1)+" is consistent with what follows; inserting..."); 107 return true; 108 } 109 return false; 110 } 111 112 /** Factor out what to do upon token mismatch so tree parsers can behave 113 * differently. Override and call mismatchRecover(input, ttype, follow) 114 * to get single token insertion and deletion. Use this to turn of 115 * single token insertion and deletion. Override mismatchRecover 116 * to call this instead. 117 */ 118 protected function mismatch($input, $ttype, $follow) 119 { 120 if ( $this->mismatchIsUnwantedToken($input, $ttype) ) { 121 throw new UnwantedTokenException($ttype, $input); 122 } 123 else if ( $this->mismatchIsMissingToken($input, $follow) ) { 124 throw new MissingTokenException($ttype, $input, null); 125 } 126 throw new MismatchedTokenException($ttype, $input); 127 } 128 129 /** Report a recognition problem. 130 * 131 * This method sets errorRecovery to indicate the parser is recovering 132 * not parsing. Once in recovery mode, no errors are generated. 133 * To get out of recovery mode, the parser must successfully match 134 * a token (after a resync). So it will go: 135 * 136 * 1. error occurs 137 * 2. enter recovery mode, report error 138 * 3. consume until token found in resynch set 139 * 4. try to resume parsing 140 * 5. next match() will reset errorRecovery mode 141 * 142 * If you override, make sure to update syntaxErrors if you care about that. 143 */ 144 public function reportError($e) { 145 // if we've already reported an error and have not matched a token 146 // yet successfully, don't report any errors. 147 if ( $this->state->errorRecovery ) { 148 //System.err.print("[SPURIOUS] "); 149 return; 150 } 151 $this->state->syntaxErrors++; // don't count spurious 152 $this->state->errorRecovery = true; 153 154 $this->displayRecognitionError($this->getTokenNames(), $e); 155 } 156 157 158 public function displayRecognitionError($tokenNames, $e){ 159 $hdr = $this->getErrorHeader($e); 160 $msg = $this->getErrorMessage($e, $tokenNames); 161 $this->emitErrorMessage($hdr." ".$msg); 162 } 163 164 /** What error message should be generated for the various 165 * exception types? 166 * 167 * Not very object-oriented code, but I like having all error message 168 * generation within one method rather than spread among all of the 169 * exception classes. This also makes it much easier for the exception 170 * handling because the exception classes do not have to have pointers back 171 * to this object to access utility routines and so on. Also, changing 172 * the message for an exception type would be difficult because you 173 * would have to subclassing exception, but then somehow get ANTLR 174 * to make those kinds of exception objects instead of the default. 175 * This looks weird, but trust me--it makes the most sense in terms 176 * of flexibility. 177 * 178 * For grammar debugging, you will want to override this to add 179 * more information such as the stack frame with 180 * getRuleInvocationStack(e, this.getClass().getName()) and, 181 * for no viable alts, the decision description and state etc... 182 * 183 * Override this to change the message generated for one or more 184 * exception types. 185 */ 186 public function getErrorMessage($e, $tokenNames) { 187 $msg = $e->getMessage(); 188 if ( $e instanceof UnwantedTokenException ) { 189 $ute = $e; 190 $tokenName="<unknown>"; 191 if ( $ute->expecting== TokenConst::$EOF ) { 192 $tokenName = "EOF"; 193 } 194 else { 195 $tokenName = $tokenNames[$ute->expecting]; 196 } 197 $msg = "extraneous input ".$this->getTokenErrorDisplay($ute->getUnexpectedToken()). 198 " expecting ".$tokenName; 199 } 200 else if ( $e instanceof MissingTokenException ) { 201 $mte = $e; 202 $tokenName="<unknown>"; 203 if ( $mte->expecting== TokenConst::$EOF ) { 204 $tokenName = "EOF"; 205 } 206 else { 207 $tokenName = $tokenNames[$mte->expecting]; 208 } 209 $msg = "missing ".$tokenName." at ".$this->getTokenErrorDisplay($e->token); 210 } 211 else if ( $e instanceof MismatchedTokenException ) { 212 $mte = $e; 213 $tokenName="<unknown>"; 214 if ( $mte->expecting== TokenConst::$EOF ) { 215 $tokenName = "EOF"; 216 } 217 else { 218 $tokenName = $tokenNames[$mte->expecting]; 219 } 220 $msg = "mismatched input ".$this->getTokenErrorDisplay($e->token). 221 " expecting ".$tokenName; 222 } 223 else if ( $e instanceof MismatchedTreeNodeException ) { 224 $mtne = $e; 225 $tokenName="<unknown>"; 226 if ( $mtne->expecting==TokenConst::$EOF ) { 227 $tokenName = "EOF"; 228 } 229 else { 230 $tokenName = $tokenNames[$mtne->expecting]; 231 } 232 $msg = "mismatched tree node: ".$mtne->node. 233 " expecting ".$tokenName; 234 } 235 else if ( $e instanceof NoViableAltException ) { 236 $nvae = $e; 237 // for development, can add "decision=<<"+nvae.grammarDecisionDescription+">>" 238 // and "(decision="+nvae.decisionNumber+") and 239 // "state "+nvae.stateNumber 240 $msg = "no viable alternative at input ".$this->getTokenErrorDisplay($e->token); 241 } 242 else if ( $e instanceof EarlyExitException ) { 243 $eee = $e; 244 // for development, can add "(decision="+eee.decisionNumber+")" 245 $msg = "required (...)+ loop did not match anything at input ". 246 getTokenErrorDisplay($e->token); 247 } 248 else if ( $e instanceof MismatchedSetException ) { 249 $mse = $e; 250 $msg = "mismatched input ".$this->getTokenErrorDisplay($e->token). 251 " expecting set ".$mse->expecting; 252 } 253 else if ( $e instanceof MismatchedNotSetException ) { 254 $mse = $e; 255 $msg = "mismatched input ".$this->getTokenErrorDisplay($e->token). 256 " expecting set ".$mse->expecting; 257 } 258 else if ( $e instanceof FailedPredicateException ) { 259 $fpe = $e; 260 $msg = "rule ".$fpe->ruleName." failed predicate: {". 261 $fpe->predicateText."}?"; 262 } 263 return $msg; 264 } 265 266 /** Get number of recognition errors (lexer, parser, tree parser). Each 267 * recognizer tracks its own number. So parser and lexer each have 268 * separate count. Does not count the spurious errors found between 269 * an error and next valid token match 270 * 271 * See also reportError() 272 */ 273 public function getNumberOfSyntaxErrors() { 274 return $state->syntaxErrors; 275 } 276 277 /** What is the error header, normally line/character position information? */ 278 public function getErrorHeader($e) { 279 return "line ".$e->line.":".$e->charPositionInLine; 280 } 281 282 283 /** How should a token be displayed in an error message? The default 284 * is to display just the text, but during development you might 285 * want to have a lot of information spit out. Override in that case 286 * to use t.toString() (which, for CommonToken, dumps everything about 287 * the token). This is better than forcing you to override a method in 288 * your token objects because you don't have to go modify your lexer 289 * so that it creates a new Java type. 290 */ 291 public function getTokenErrorDisplay($t) { 292 $s = $t->getText(); 293 if ( $s==null ) { 294 if ( $t->getType()==TokenConst::$EOF ) { 295 $s = "<EOF>"; 296 } 297 else { 298 $s = "<".$t->getType().">"; 299 } 300 } 301 $s = str_replace("\n", '\n', $s); 302 $s = str_replace("\r",'\r', $s); 303 $s = str_replace("\t",'\t', $s); 304 return "'".$s."'"; 305 } 306 307 /** Override this method to change where error messages go */ 308 public function emitErrorMessage($msg) { 309 echo $msg; 310 } 311 312 /** Recover from an error found on the input stream. This is 313 * for NoViableAlt and mismatched symbol exceptions. If you enable 314 * single token insertion and deletion, this will usually not 315 * handle mismatched symbol exceptions but there could be a mismatched 316 * token that the match() routine could not recover from. 317 */ 318 public function recover($input, $re) { 319 if ( $this->state->lastErrorIndex==$input->index() ) { 320 // uh oh, another error at same token index; must be a case 321 // where LT(1) is in the recovery token set so nothing is 322 // consumed; consume a single token so at least to prevent 323 // an infinite loop; this is a failsafe. 324 $input->consume(); 325 } 326 $this->state->lastErrorIndex = $input->index(); 327 $followSet = $this->computeErrorRecoverySet(); 328 $this->beginResync(); 329 $this->consumeUntilInSet($input, $followSet); 330 $this->endResync(); 331 } 332 333 /** A hook to listen in on the token consumption during error recovery. 334 * The DebugParser subclasses this to fire events to the listenter. 335 */ 336 public function beginResync() { 337 } 338 339 public function endResync() { 340 } 341 342 /* Compute the error recovery set for the current rule. During 343 * rule invocation, the parser pushes the set of tokens that can 344 * follow that rule reference on the stack; this amounts to 345 * computing FIRST of what follows the rule reference in the 346 * enclosing rule. This local follow set only includes tokens 347 * from within the rule; i.e., the FIRST computation done by 348 * ANTLR stops at the end of a rule. 349 * 350 * EXAMPLE 351 * 352 * When you find a "no viable alt exception", the input is not 353 * consistent with any of the alternatives for rule r. The best 354 * thing to do is to consume tokens until you see something that 355 * can legally follow a call to r *or* any rule that called r. 356 * You don't want the exact set of viable next tokens because the 357 * input might just be missing a token--you might consume the 358 * rest of the input looking for one of the missing tokens. 359 * 360 * Consider grammar: 361 * 362 * a : '[' b ']' 363 * | '(' b ')' 364 * ; 365 * b : c '^' INT ; 366 * c : ID 367 * | INT 368 * ; 369 * 370 * At each rule invocation, the set of tokens that could follow 371 * that rule is pushed on a stack. Here are the various "local" 372 * follow sets: 373 * 374 * FOLLOW(b1_in_a) = FIRST(']') = ']' 375 * FOLLOW(b2_in_a) = FIRST(')') = ')' 376 * FOLLOW(c_in_b) = FIRST('^') = '^' 377 * 378 * Upon erroneous input "[]", the call chain is 379 * 380 * a -> b -> c 381 * 382 * and, hence, the follow context stack is: 383 * 384 * depth local follow set after call to rule 385 * 0 <EOF> a (from main()) 386 * 1 ']' b 387 * 3 '^' c 388 * 389 * Notice that ')' is not included, because b would have to have 390 * been called from a different context in rule a for ')' to be 391 * included. 392 * 393 * For error recovery, we cannot consider FOLLOW(c) 394 * (context-sensitive or otherwise). We need the combined set of 395 * all context-sensitive FOLLOW sets--the set of all tokens that 396 * could follow any reference in the call chain. We need to 397 * resync to one of those tokens. Note that FOLLOW(c)='^' and if 398 * we resync'd to that token, we'd consume until EOF. We need to 399 * sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}. 400 * In this case, for input "[]", LA(1) is in this set so we would 401 * not consume anything and after printing an error rule c would 402 * return normally. It would not find the required '^' though. 403 * At this point, it gets a mismatched token error and throws an 404 * exception (since LA(1) is not in the viable following token 405 * set). The rule exception handler tries to recover, but finds 406 * the same recovery set and doesn't consume anything. Rule b 407 * exits normally returning to rule a. Now it finds the ']' (and 408 * with the successful match exits errorRecovery mode). 409 * 410 * So, you cna see that the parser walks up call chain looking 411 * for the token that was a member of the recovery set. 412 * 413 * Errors are not generated in errorRecovery mode. 414 * 415 * ANTLR's error recovery mechanism is based upon original ideas: 416 * 417 * "Algorithms + Data Structures = Programs" by Niklaus Wirth 418 * 419 * and 420 * 421 * "A note on error recovery in recursive descent parsers": 422 * http://portal.acm.org/citation.cfm?id=947902.947905 423 * 424 * Later, Josef Grosch had some good ideas: 425 * 426 * "Efficient and Comfortable Error Recovery in Recursive Descent 427 * Parsers": 428 * ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip 429 * 430 * Like Grosch I implemented local FOLLOW sets that are combined 431 * at run-time upon error to avoid overhead during parsing. 432 */ 433 protected function computeErrorRecoverySet() { 434 return $this->combineFollows(false); 435 } 436 437 /** Compute the context-sensitive FOLLOW set for current rule. 438 * This is set of token types that can follow a specific rule 439 * reference given a specific call chain. You get the set of 440 * viable tokens that can possibly come next (lookahead depth 1) 441 * given the current call chain. Contrast this with the 442 * definition of plain FOLLOW for rule r: 443 * 444 * FOLLOW(r)={x | S=>*alpha r beta in G and x in FIRST(beta)} 445 * 446 * where x in T* and alpha, beta in V*; T is set of terminals and 447 * V is the set of terminals and nonterminals. In other words, 448 * FOLLOW(r) is the set of all tokens that can possibly follow 449 * references to r in *any* sentential form (context). At 450 * runtime, however, we know precisely which context applies as 451 * we have the call chain. We may compute the exact (rather 452 * than covering superset) set of following tokens. 453 * 454 * For example, consider grammar: 455 * 456 * stat : ID '=' expr ';' // FOLLOW(stat)=={EOF} 457 * | "return" expr '.' 458 * ; 459 * expr : atom ('+' atom)* ; // FOLLOW(expr)=={';','.',')'} 460 * atom : INT // FOLLOW(atom)=={'+',')',';','.'} 461 * | '(' expr ')' 462 * ; 463 * 464 * The FOLLOW sets are all inclusive whereas context-sensitive 465 * FOLLOW sets are precisely what could follow a rule reference. 466 * For input input "i=(3);", here is the derivation: 467 * 468 * stat => ID '=' expr ';' 469 * => ID '=' atom ('+' atom)* ';' 470 * => ID '=' '(' expr ')' ('+' atom)* ';' 471 * => ID '=' '(' atom ')' ('+' atom)* ';' 472 * => ID '=' '(' INT ')' ('+' atom)* ';' 473 * => ID '=' '(' INT ')' ';' 474 * 475 * At the "3" token, you'd have a call chain of 476 * 477 * stat -> expr -> atom -> expr -> atom 478 * 479 * What can follow that specific nested ref to atom? Exactly ')' 480 * as you can see by looking at the derivation of this specific 481 * input. Contrast this with the FOLLOW(atom)={'+',')',';','.'}. 482 * 483 * You want the exact viable token set when recovering from a 484 * token mismatch. Upon token mismatch, if LA(1) is member of 485 * the viable next token set, then you know there is most likely 486 * a missing token in the input stream. "Insert" one by just not 487 * throwing an exception. 488 */ 489 protected function computeContextSensitiveRuleFOLLOW() { 490 return $this->combineFollows(true); 491 } 492 493 protected function combineFollows($exact) { 494 $top = $this->state->_fsp; 495 $followSet = new Set(array()); 496 for ($i=$top; $i>=0; $i--) { 497 $localFollowSet = $this->state->following[$i]; 498 /* 499 System.out.println("local follow depth "+i+"="+ 500 localFollowSet.toString(getTokenNames())+")"); 501 */ 502 $followSet->unionInPlace($localFollowSet); 503 if ( $this->exact ) { 504 // can we see end of rule? 505 if ( $localFollowSet->member(TokenConst::$EOR_TOKEN_TYPE) ) { 506 // Only leave EOR in set if at top (start rule); this lets 507 // us know if have to include follow(start rule); i.e., EOF 508 if ( $i>0 ) { 509 $followSet->remove(TokenConst::$EOR_TOKEN_TYPE); 510 } 511 } 512 else { // can't see end of rule, quit 513 break; 514 } 515 } 516 } 517 return $followSet; 518 } 519 520 /** Attempt to recover from a single missing or extra token. 521 * 522 * EXTRA TOKEN 523 * 524 * LA(1) is not what we are looking for. If LA(2) has the right token, 525 * however, then assume LA(1) is some extra spurious token. Delete it 526 * and LA(2) as if we were doing a normal match(), which advances the 527 * input. 528 * 529 * MISSING TOKEN 530 * 531 * If current token is consistent with what could come after 532 * ttype then it is ok to "insert" the missing token, else throw 533 * exception For example, Input "i=(3;" is clearly missing the 534 * ')'. When the parser returns from the nested call to expr, it 535 * will have call chain: 536 * 537 * stat -> expr -> atom 538 * 539 * and it will be trying to match the ')' at this point in the 540 * derivation: 541 * 542 * => ID '=' '(' INT ')' ('+' atom)* ';' 543 * ^ 544 * match() will see that ';' doesn't match ')' and report a 545 * mismatched token error. To recover, it sees that LA(1)==';' 546 * is in the set of tokens that can follow the ')' token 547 * reference in rule atom. It can assume that you forgot the ')'. 548 */ 549 protected function recoverFromMismatchedToken($input, $ttype, $follow) 550 { 551 $e = null; 552 // if next token is what we are looking for then "delete" this token 553 554 if ( $this->mismatchIsUnwantedToken($input, $ttype) ) { 555 $e = new UnwantedTokenException($ttype, $input); 556 /* 557 System.err.println("recoverFromMismatchedToken deleting "+ 558 ((TokenStream)input).LT(1)+ 559 " since "+((TokenStream)input).LT(2)+" is what we want"); 560 */ 561 $this->beginResync(); 562 $input->consume(); // simply delete extra token 563 $this->endResync(); 564 $this->reportError($e); // report after consuming so AW sees the token in the exception 565 // we want to return the token we're actually matching 566 $matchedSymbol = $this->getCurrentInputSymbol($input); 567 $input->consume(); // move past ttype token as if all were ok 568 return $matchedSymbol; 569 } 570 // can't recover with single token deletion, try insertion 571 if ( $this->mismatchIsMissingToken($input, $follow) ) { 572 $inserted = $this->getMissingSymbol($input, $e, $ttype, $follow); 573 $e = new MissingTokenException($ttype, $input, $inserted); 574 $this->reportError($e); // report after inserting so AW sees the token in the exception 575 return $inserted; 576 } 577 // even that didn't work; must throw the exception 578 $e = new MismatchedTokenException($ttype, $input); 579 throw $e; 580 } 581 582 /** Not currently used */ 583 public function recoverFromMismatchedSet($input, $e, $follow) { 584 if ( $this->mismatchIsMissingToken($input, $follow) ) { 585 // System.out.println("missing token"); 586 reportError($e); 587 // we don't know how to conjure up a token for sets yet 588 return $this->getMissingSymbol($input, $e, TokenConst::$INVALID_TOKEN_TYPE, $follow); 589 } 590 // TODO do single token deletion like above for Token mismatch 591 throw $e; 592 } 593 594 /** Match needs to return the current input symbol, which gets put 595 * into the label for the associated token ref; e.g., x=ID. Token 596 * and tree parsers need to return different objects. Rather than test 597 * for input stream type or change the IntStream interface, I use 598 * a simple method to ask the recognizer to tell me what the current 599 * input symbol is. 600 * 601 * This is ignored for lexers. 602 */ 603 protected function getCurrentInputSymbol($input) { return null; } 604 605 /** Conjure up a missing token during error recovery. 606 * 607 * The recognizer attempts to recover from single missing 608 * symbols. But, actions might refer to that missing symbol. 609 * For example, x=ID {f($x);}. The action clearly assumes 610 * that there has been an identifier matched previously and that 611 * $x points at that token. If that token is missing, but 612 * the next token in the stream is what we want we assume that 613 * this token is missing and we keep going. Because we 614 * have to return some token to replace the missing token, 615 * we have to conjure one up. This method gives the user control 616 * over the tokens returned for missing tokens. Mostly, 617 * you will want to create something special for identifier 618 * tokens. For literals such as '{' and ',', the default 619 * action in the parser or tree parser works. It simply creates 620 * a CommonToken of the appropriate type. The text will be the token. 621 * If you change what tokens must be created by the lexer, 622 * override this method to create the appropriate tokens. 623 */ 624 protected function getMissingSymbol($input, $e, $expectedTokenType, $follow) { 625 return null; 626 } 627 628 public function consumeUntilMatchesType($input, $tokenType) { 629 //System.out.println("consumeUntil "+tokenType); 630 $ttype = $input->LA(1); 631 while ($ttype != TokenConst::$EOF && $ttype != $tokenType) { 632 $input->consume(); 633 $ttype = $input->LA(1); 634 } 635 } 636 637 /** Consume tokens until one matches the given token set */ 638 public function consumeUntilInSet($input, $set) { 639 //System.out.println("consumeUntil("+set.toString(getTokenNames())+")"); 640 $ttype = $input->LA(1); 641 while ($ttype != TokenConst::$EOF && !$set->member($ttype) ) { 642 //System.out.println("consume during recover LA(1)="+getTokenNames()[input.LA(1)]); 643 $input->consume(); 644 $ttype = $input->LA(1); 645 } 646 } 647 648 /** Push a rule's follow set using our own hardcoded stack */ 649 protected function pushFollow($fset) { 650 // if ( ($this->state->_fsp +1)>=sizeof($this->state->following) ) { 651 // $f = array(); 652 // System.arraycopy(state.following, 0, f, 0, state.following.length-1); 653 // $this->state->following = f; 654 // } 655 $this->state->following[++$this->state->_fsp] = $fset; 656 } 657 658 /** Return List<String> of the rules in your parser instance 659 * leading up to a call to this method. You could override if 660 * you want more details such as the file/line info of where 661 * in the parser java code a rule is invoked. 662 * 663 * This is very useful for error messages and for context-sensitive 664 * error recovery. 665 */ 666 667 /** A more general version of getRuleInvocationStack where you can 668 * pass in, for example, a RecognitionException to get it's rule 669 * stack trace. This routine is shared with all recognizers, hence, 670 * static. 671 * 672 * TODO: move to a utility class or something; weird having lexer call this 673 */ 674 public static function getRuleInvocationStack($e=null, 675 $recognizerClassName=null) 676 { 677 if($e==null){ 678 $e = new Exception(); 679 } 680 if($recognizerClassName==null){ 681 $recognizerClassName = get_class($this); 682 } 683 throw new Exception("Not implemented yet"); 684 // List rules = new ArrayList(); 685 // StackTraceElement[] stack = e.getStackTrace(); 686 // int i = 0; 687 // for (i=stack.length-1; i>=0; i--) { 688 // StackTraceElement t = stack[i]; 689 // if ( t.getClassName().startsWith("org.antlr.runtime.") ) { 690 // continue; // skip support code such as this method 691 // } 692 // if ( t.getMethodName().equals(NEXT_TOKEN_RULE_NAME) ) { 693 // continue; 694 // } 695 // if ( !t.getClassName().equals(recognizerClassName) ) { 696 // continue; // must not be part of this parser 697 // } 698 // rules.add(t.getMethodName()); 699 // } 700 // return rules; 701 } 702 703 public function getBacktrackingLevel() { 704 return $this->state->backtracking; 705 } 706 707 /** Used to print out token names like ID during debugging and 708 * error reporting. The generated parsers implement a method 709 * that overrides this to point to their String[] tokenNames. 710 */ 711 public function getTokenNames() { 712 return null; 713 } 714 715 /** For debugging and other purposes, might want the grammar name. 716 * Have ANTLR generate an implementation for this method. 717 */ 718 public function getGrammarFileName() { 719 return null; 720 } 721 722 public abstract function getSourceName(); 723 724 /** A convenience method for use most often with template rewrites. 725 * Convert a List<Token> to List<String> 726 */ 727 public function toStrings($tokens) { 728 if ( $tokens==null ) return null; 729 $strings = array(); 730 for ($i=0; $i<$tokens->size(); $i++) { 731 $strings[] = $tokens[$i]->getText(); 732 } 733 return $strings; 734 } 735 736 /** Given a rule number and a start token index number, return 737 * MEMO_RULE_UNKNOWN if the rule has not parsed input starting from 738 * start index. If this rule has parsed input starting from the 739 * start index before, then return where the rule stopped parsing. 740 * It returns the index of the last token matched by the rule. 741 * 742 * For now we use a hashtable and just the slow Object-based one. 743 * Later, we can make a special one for ints and also one that 744 * tosses out data after we commit past input position i. 745 */ 746 public function getRuleMemoization($ruleIndex, $ruleStartIndex) { 747 if ( $this->state->ruleMemo[$ruleIndex]==null ) { 748 $this->state->ruleMemo[$ruleIndex] = array(); 749 } 750 $stopIndexI = 751 $this->state->ruleMemo[$ruleIndex][$ruleStartIndex]; 752 if ( $stopIndexI==null ) { 753 return self::$MEMO_RULE_UNKNOWN; 754 } 755 return $stopIndexI; 756 } 757 758 /** Has this rule already parsed input at the current index in the 759 * input stream? Return the stop token index or MEMO_RULE_UNKNOWN. 760 * If we attempted but failed to parse properly before, return 761 * MEMO_RULE_FAILED. 762 * 763 * This method has a side-effect: if we have seen this input for 764 * this rule and successfully parsed before, then seek ahead to 765 * 1 past the stop token matched for this rule last time. 766 */ 767 public function alreadyParsedRule($input, $ruleIndex) { 768 $stopIndex = $this->getRuleMemoization($ruleIndex, $input->index()); 769 if ( $stopIndex==self::$MEMO_RULE_UNKNOWN ) { 770 return false; 771 } 772 if ( $stopIndex==self::$MEMO_RULE_FAILED ) { 773 //System.out.println("rule "+ruleIndex+" will never succeed"); 774 $this->state->failed=true; 775 } 776 else { 777 //System.out.println("seen rule "+ruleIndex+" before; skipping ahead to @"+(stopIndex+1)+" failed="+state.failed); 778 $input->seek($stopIndex+1); // jump to one past stop token 779 } 780 return true; 781 } 782 783 /** Record whether or not this rule parsed the input at this position 784 * successfully. Use a standard java hashtable for now. 785 */ 786 public function memoize($input, $ruleIndex, $ruleStartIndex){ 787 $stopTokenIndex = $this->state->failed?self::$MEMO_RULE_FAILED:$input->index()-1; 788 if ( $this->state->ruleMemo==null ) { 789 echo("!!!!!!!!! memo array is null for ". getGrammarFileName()); 790 } 791 if ( $ruleIndex >= sizeof($this->state->ruleMemo) ) { 792 echo("!!!!!!!!! memo size is ".sizeof($this->state->ruleMemo).", but rule index is ".$ruleIndex); 793 } 794 if ( $this->state->ruleMemo[$ruleIndex]!=null ) { 795 $this->state->ruleMemo[$ruleIndex][$ruleStartIndex] = $stopTokenIndex; 796 } 797 } 798 799 /** return how many rule/input-index pairs there are in total. 800 * TODO: this includes synpreds. :( 801 */ 802 public function getRuleMemoizationCacheSize() { 803 $n = 0; 804 for ($i = 0; $this->state->ruleMemo!=null && $i < sizeof($this->state->ruleMemo); $i++) { 805 $ruleMap = $this->state->ruleMemo[$i]; 806 if ( $ruleMap!=null ) { 807 $n += sizeof($ruleMap); // how many input indexes are recorded? 808 } 809 } 810 return $n; 811 } 812 813 public function traceIn($ruleName, $ruleIndex, $inputSymbol) { 814 echo("enter ".$ruleName." ".$inputSymbol); 815 if ( $this->state->failed ) { 816 echo(" failed=".$this->state->failed); 817 } 818 if ( $this->state->backtracking>0 ) { 819 echo(" backtracking=".$this->state->backtracking); 820 } 821 echo "\n"; 822 } 823 824 public function traceOut($ruleName, $ruleIndex, $inputSymbol) { 825 echo("exit ".$ruleName." ".$inputSymbol); 826 if ( $this->state->failed ) { 827 echo(" failed=".$this->state->failed); 828 } 829 if ( $this->state->backtracking>0 ) { 830 echo(" backtracking="+$this->state->backtracking); 831 } 832 echo "\n"; 833 } 834 835 public function getToken($name){ 836 if(preg_match("/\d+/", $name)){ 837 return (integer)$name; 838 }else{ 839 return $this->$name; 840 } 841 } 842 843 public function getTokenName($tokenId){ 844 845 } 846 847 } 848 849 BaseRecognizer::$DEFAULT_TOKEN_CHANNEL = TokenConst::$DEFAULT_CHANNEL; 850 BaseRecognizer::$HIDDEN = TokenConst::$HIDDEN_CHANNEL; 851 ?>
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
Generated: Fri Nov 28 20:08:37 2014 | Cross-referenced by PHPXref 0.7.1 |