MediaWiki  REL1_24
jsminplus.php
Go to the documentation of this file.
00001 <?php
00002 // @codingStandardsIgnoreFile File external to MediaWiki. Ignore coding conventions checks.
00031 /* ***** BEGIN LICENSE BLOCK *****
00032  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
00033  *
00034  * The contents of this file are subject to the Mozilla Public License Version
00035  * 1.1 (the "License"); you may not use this file except in compliance with
00036  * the License. You may obtain a copy of the License at
00037  * http://www.mozilla.org/MPL/
00038  *
00039  * Software distributed under the License is distributed on an "AS IS" basis,
00040  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
00041  * for the specific language governing rights and limitations under the
00042  * License.
00043  *
00044  * The Original Code is the Narcissus JavaScript engine.
00045  *
00046  * The Initial Developer of the Original Code is
00047  * Brendan Eich <[email protected]>.
00048  * Portions created by the Initial Developer are Copyright (C) 2004
00049  * the Initial Developer. All Rights Reserved.
00050  *
00051  * Contributor(s): Tino Zijdel <[email protected]>
00052  * PHP port, modifications and minifier routine are (C) 2009-2011
00053  *
00054  * Alternatively, the contents of this file may be used under the terms of
00055  * either the GNU General Public License Version 2 or later (the "GPL"), or
00056  * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
00057  * in which case the provisions of the GPL or the LGPL are applicable instead
00058  * of those above. If you wish to allow use of your version of this file only
00059  * under the terms of either the GPL or the LGPL, and not to allow others to
00060  * use your version of this file under the terms of the MPL, indicate your
00061  * decision by deleting the provisions above and replace them with the notice
00062  * and other provisions required by the GPL or the LGPL. If you do not delete
00063  * the provisions above, a recipient may use your version of this file under
00064  * the terms of any one of the MPL, the GPL or the LGPL.
00065  *
00066  * ***** END LICENSE BLOCK ***** */
00067 
00068 define('TOKEN_END', 1);
00069 define('TOKEN_NUMBER', 2);
00070 define('TOKEN_IDENTIFIER', 3);
00071 define('TOKEN_STRING', 4);
00072 define('TOKEN_REGEXP', 5);
00073 define('TOKEN_NEWLINE', 6);
00074 define('TOKEN_CONDCOMMENT_START', 7);
00075 define('TOKEN_CONDCOMMENT_END', 8);
00076 
00077 define('JS_SCRIPT', 100);
00078 define('JS_BLOCK', 101);
00079 define('JS_LABEL', 102);
00080 define('JS_FOR_IN', 103);
00081 define('JS_CALL', 104);
00082 define('JS_NEW_WITH_ARGS', 105);
00083 define('JS_INDEX', 106);
00084 define('JS_ARRAY_INIT', 107);
00085 define('JS_OBJECT_INIT', 108);
00086 define('JS_PROPERTY_INIT', 109);
00087 define('JS_GETTER', 110);
00088 define('JS_SETTER', 111);
00089 define('JS_GROUP', 112);
00090 define('JS_LIST', 113);
00091 
00092 define('JS_MINIFIED', 999);
00093 
00094 define('DECLARED_FORM', 0);
00095 define('EXPRESSED_FORM', 1);
00096 define('STATEMENT_FORM', 2);
00097 
00098 /* Operators */
00099 define('OP_SEMICOLON', ';');
00100 define('OP_COMMA', ',');
00101 define('OP_HOOK', '?');
00102 define('OP_COLON', ':');
00103 define('OP_OR', '||');
00104 define('OP_AND', '&&');
00105 define('OP_BITWISE_OR', '|');
00106 define('OP_BITWISE_XOR', '^');
00107 define('OP_BITWISE_AND', '&');
00108 define('OP_STRICT_EQ', '===');
00109 define('OP_EQ', '==');
00110 define('OP_ASSIGN', '=');
00111 define('OP_STRICT_NE', '!==');
00112 define('OP_NE', '!=');
00113 define('OP_LSH', '<<');
00114 define('OP_LE', '<=');
00115 define('OP_LT', '<');
00116 define('OP_URSH', '>>>');
00117 define('OP_RSH', '>>');
00118 define('OP_GE', '>=');
00119 define('OP_GT', '>');
00120 define('OP_INCREMENT', '++');
00121 define('OP_DECREMENT', '--');
00122 define('OP_PLUS', '+');
00123 define('OP_MINUS', '-');
00124 define('OP_MUL', '*');
00125 define('OP_DIV', '/');
00126 define('OP_MOD', '%');
00127 define('OP_NOT', '!');
00128 define('OP_BITWISE_NOT', '~');
00129 define('OP_DOT', '.');
00130 define('OP_LEFT_BRACKET', '[');
00131 define('OP_RIGHT_BRACKET', ']');
00132 define('OP_LEFT_CURLY', '{');
00133 define('OP_RIGHT_CURLY', '}');
00134 define('OP_LEFT_PAREN', '(');
00135 define('OP_RIGHT_PAREN', ')');
00136 define('OP_CONDCOMMENT_END', '@*/');
00137 
00138 define('OP_UNARY_PLUS', 'U+');
00139 define('OP_UNARY_MINUS', 'U-');
00140 
00141 /* Keywords */
00142 define('KEYWORD_BREAK', 'break');
00143 define('KEYWORD_CASE', 'case');
00144 define('KEYWORD_CATCH', 'catch');
00145 define('KEYWORD_CONST', 'const');
00146 define('KEYWORD_CONTINUE', 'continue');
00147 define('KEYWORD_DEBUGGER', 'debugger');
00148 define('KEYWORD_DEFAULT', 'default');
00149 define('KEYWORD_DELETE', 'delete');
00150 define('KEYWORD_DO', 'do');
00151 define('KEYWORD_ELSE', 'else');
00152 define('KEYWORD_ENUM', 'enum');
00153 define('KEYWORD_FALSE', 'false');
00154 define('KEYWORD_FINALLY', 'finally');
00155 define('KEYWORD_FOR', 'for');
00156 define('KEYWORD_FUNCTION', 'function');
00157 define('KEYWORD_IF', 'if');
00158 define('KEYWORD_IN', 'in');
00159 define('KEYWORD_INSTANCEOF', 'instanceof');
00160 define('KEYWORD_NEW', 'new');
00161 define('KEYWORD_NULL', 'null');
00162 define('KEYWORD_RETURN', 'return');
00163 define('KEYWORD_SWITCH', 'switch');
00164 define('KEYWORD_THIS', 'this');
00165 define('KEYWORD_THROW', 'throw');
00166 define('KEYWORD_TRUE', 'true');
00167 define('KEYWORD_TRY', 'try');
00168 define('KEYWORD_TYPEOF', 'typeof');
00169 define('KEYWORD_VAR', 'var');
00170 define('KEYWORD_VOID', 'void');
00171 define('KEYWORD_WHILE', 'while');
00172 define('KEYWORD_WITH', 'with');
00173 
00174 
00175 class JSMinPlus
00176 {
00177     private $parser;
00178     private $reserved = array(
00179         'break', 'case', 'catch', 'continue', 'default', 'delete', 'do',
00180         'else', 'finally', 'for', 'function', 'if', 'in', 'instanceof',
00181         'new', 'return', 'switch', 'this', 'throw', 'try', 'typeof', 'var',
00182         'void', 'while', 'with',
00183         // Words reserved for future use
00184         'abstract', 'boolean', 'byte', 'char', 'class', 'const', 'debugger',
00185         'double', 'enum', 'export', 'extends', 'final', 'float', 'goto',
00186         'implements', 'import', 'int', 'interface', 'long', 'native',
00187         'package', 'private', 'protected', 'public', 'short', 'static',
00188         'super', 'synchronized', 'throws', 'transient', 'volatile',
00189         // These are not reserved, but should be taken into account
00190         // in isValidIdentifier (See jslint source code)
00191         'arguments', 'eval', 'true', 'false', 'Infinity', 'NaN', 'null', 'undefined'
00192     );
00193 
00194     private function __construct()
00195     {
00196         $this->parser = new JSParser($this);
00197     }
00198 
00199     public static function minify($js, $filename='')
00200     {
00201         static $instance;
00202 
00203         // this is a singleton
00204         if(!$instance)
00205             $instance = new JSMinPlus();
00206 
00207         return $instance->min($js, $filename);
00208     }
00209 
00210     private function min($js, $filename)
00211     {
00212         try
00213         {
00214             $n = $this->parser->parse($js, $filename, 1);
00215             return $this->parseTree($n);
00216         }
00217         catch(Exception $e)
00218         {
00219             echo $e->getMessage() . "\n";
00220         }
00221 
00222         return false;
00223     }
00224 
00225     public function parseTree($n, $noBlockGrouping = false)
00226     {
00227         $s = '';
00228 
00229         switch ($n->type)
00230         {
00231             case JS_MINIFIED:
00232                 $s = $n->value;
00233             break;
00234 
00235             case JS_SCRIPT:
00236                 // we do nothing yet with funDecls or varDecls
00237                 $noBlockGrouping = true;
00238             // FALL THROUGH
00239 
00240             case JS_BLOCK:
00241                 $childs = $n->treeNodes;
00242                 $lastType = 0;
00243                 for ($c = 0, $i = 0, $j = count($childs); $i < $j; $i++)
00244                 {
00245                     $type = $childs[$i]->type;
00246                     $t = $this->parseTree($childs[$i]);
00247                     if (strlen($t))
00248                     {
00249                         if ($c)
00250                         {
00251                             $s = rtrim($s, ';');
00252 
00253                             if ($type == KEYWORD_FUNCTION && $childs[$i]->functionForm == DECLARED_FORM)
00254                             {
00255                                 // put declared functions on a new line
00256                                 $s .= "\n";
00257                             }
00258                             elseif ($type == KEYWORD_VAR && $type == $lastType)
00259                             {
00260                                 // multiple var-statements can go into one
00261                                 $t = ',' . substr($t, 4);
00262                             }
00263                             else
00264                             {
00265                                 // add terminator
00266                                 $s .= ';';
00267                             }
00268                         }
00269 
00270                         $s .= $t;
00271 
00272                         $c++;
00273                         $lastType = $type;
00274                     }
00275                 }
00276 
00277                 if ($c > 1 && !$noBlockGrouping)
00278                 {
00279                     $s = '{' . $s . '}';
00280                 }
00281             break;
00282 
00283             case KEYWORD_FUNCTION:
00284                 $s .= 'function' . ($n->name ? ' ' . $n->name : '') . '(';
00285                 $params = $n->params;
00286                 for ($i = 0, $j = count($params); $i < $j; $i++)
00287                     $s .= ($i ? ',' : '') . $params[$i];
00288                 $s .= '){' . $this->parseTree($n->body, true) . '}';
00289             break;
00290 
00291             case KEYWORD_IF:
00292                 $s = 'if(' . $this->parseTree($n->condition) . ')';
00293                 $thenPart = $this->parseTree($n->thenPart);
00294                 $elsePart = $n->elsePart ? $this->parseTree($n->elsePart) : null;
00295 
00296                 // empty if-statement
00297                 if ($thenPart == '')
00298                     $thenPart = ';';
00299 
00300                 if ($elsePart)
00301                 {
00302                     // be careful and always make a block out of the thenPart; could be more optimized but is a lot of trouble
00303                     if ($thenPart != ';' && $thenPart[0] != '{')
00304                         $thenPart = '{' . $thenPart . '}';
00305 
00306                     $s .= $thenPart . 'else';
00307 
00308                     // we could check for more, but that hardly ever applies so go for performance
00309                     if ($elsePart[0] != '{')
00310                         $s .= ' ';
00311 
00312                     $s .= $elsePart;
00313                 }
00314                 else
00315                 {
00316                     $s .= $thenPart;
00317                 }
00318             break;
00319 
00320             case KEYWORD_SWITCH:
00321                 $s = 'switch(' . $this->parseTree($n->discriminant) . '){';
00322                 $cases = $n->cases;
00323                 for ($i = 0, $j = count($cases); $i < $j; $i++)
00324                 {
00325                     $case = $cases[$i];
00326                     if ($case->type == KEYWORD_CASE)
00327                         $s .= 'case' . ($case->caseLabel->type != TOKEN_STRING ? ' ' : '') . $this->parseTree($case->caseLabel) . ':';
00328                     else
00329                         $s .= 'default:';
00330 
00331                     $statement = $this->parseTree($case->statements, true);
00332                     if ($statement)
00333                     {
00334                         $s .= $statement;
00335                         // no terminator for last statement
00336                         if ($i + 1 < $j)
00337                             $s .= ';';
00338                     }
00339                 }
00340                 $s .= '}';
00341             break;
00342 
00343             case KEYWORD_FOR:
00344                 $s = 'for(' . ($n->setup ? $this->parseTree($n->setup) : '')
00345                     . ';' . ($n->condition ? $this->parseTree($n->condition) : '')
00346                     . ';' . ($n->update ? $this->parseTree($n->update) : '') . ')';
00347 
00348                 $body  = $this->parseTree($n->body);
00349                 if ($body == '')
00350                     $body = ';';
00351 
00352                 $s .= $body;
00353             break;
00354 
00355             case KEYWORD_WHILE:
00356                 $s = 'while(' . $this->parseTree($n->condition) . ')';
00357 
00358                 $body  = $this->parseTree($n->body);
00359                 if ($body == '')
00360                     $body = ';';
00361 
00362                 $s .= $body;
00363             break;
00364 
00365             case JS_FOR_IN:
00366                 $s = 'for(' . ($n->varDecl ? $this->parseTree($n->varDecl) : $this->parseTree($n->iterator)) . ' in ' . $this->parseTree($n->object) . ')';
00367 
00368                 $body  = $this->parseTree($n->body);
00369                 if ($body == '')
00370                     $body = ';';
00371 
00372                 $s .= $body;
00373             break;
00374 
00375             case KEYWORD_DO:
00376                 $s = 'do{' . $this->parseTree($n->body, true) . '}while(' . $this->parseTree($n->condition) . ')';
00377             break;
00378 
00379             case KEYWORD_BREAK:
00380             case KEYWORD_CONTINUE:
00381                 $s = $n->value . ($n->label ? ' ' . $n->label : '');
00382             break;
00383 
00384             case KEYWORD_TRY:
00385                 $s = 'try{' . $this->parseTree($n->tryBlock, true) . '}';
00386                 $catchClauses = $n->catchClauses;
00387                 for ($i = 0, $j = count($catchClauses); $i < $j; $i++)
00388                 {
00389                     $t = $catchClauses[$i];
00390                     $s .= 'catch(' . $t->varName . ($t->guard ? ' if ' . $this->parseTree($t->guard) : '') . '){' . $this->parseTree($t->block, true) . '}';
00391                 }
00392                 if ($n->finallyBlock)
00393                     $s .= 'finally{' . $this->parseTree($n->finallyBlock, true) . '}';
00394             break;
00395 
00396             case KEYWORD_THROW:
00397             case KEYWORD_RETURN:
00398                 $s = $n->type;
00399                 if ($n->value)
00400                 {
00401                     $t = $this->parseTree($n->value);
00402                     if (strlen($t))
00403                     {
00404                         if ($this->isWordChar($t[0]) || $t[0] == '\\')
00405                             $s .= ' ';
00406 
00407                         $s .= $t;
00408                     }
00409                 }
00410             break;
00411 
00412             case KEYWORD_WITH:
00413                 $s = 'with(' . $this->parseTree($n->object) . ')' . $this->parseTree($n->body);
00414             break;
00415 
00416             case KEYWORD_VAR:
00417             case KEYWORD_CONST:
00418                 $s = $n->value . ' ';
00419                 $childs = $n->treeNodes;
00420                 for ($i = 0, $j = count($childs); $i < $j; $i++)
00421                 {
00422                     $t = $childs[$i];
00423                     $s .= ($i ? ',' : '') . $t->name;
00424                     $u = $t->initializer;
00425                     if ($u)
00426                         $s .= '=' . $this->parseTree($u);
00427                 }
00428             break;
00429 
00430             case KEYWORD_IN:
00431             case KEYWORD_INSTANCEOF:
00432                 $left = $this->parseTree($n->treeNodes[0]);
00433                 $right = $this->parseTree($n->treeNodes[1]);
00434 
00435                 $s = $left;
00436 
00437                 if ($this->isWordChar(substr($left, -1)))
00438                     $s .= ' ';
00439 
00440                 $s .= $n->type;
00441 
00442                 if ($this->isWordChar($right[0]) || $right[0] == '\\')
00443                     $s .= ' ';
00444 
00445                 $s .= $right;
00446             break;
00447 
00448             case KEYWORD_DELETE:
00449             case KEYWORD_TYPEOF:
00450                 $right = $this->parseTree($n->treeNodes[0]);
00451 
00452                 $s = $n->type;
00453 
00454                 if ($this->isWordChar($right[0]) || $right[0] == '\\')
00455                     $s .= ' ';
00456 
00457                 $s .= $right;
00458             break;
00459 
00460             case KEYWORD_VOID:
00461                 $s = 'void(' . $this->parseTree($n->treeNodes[0]) . ')';
00462             break;
00463 
00464             case KEYWORD_DEBUGGER:
00465                 throw new Exception('NOT IMPLEMENTED: DEBUGGER');
00466             break;
00467 
00468             case TOKEN_CONDCOMMENT_START:
00469             case TOKEN_CONDCOMMENT_END:
00470                 $s = $n->value . ($n->type == TOKEN_CONDCOMMENT_START ? ' ' : '');
00471                 $childs = $n->treeNodes;
00472                 for ($i = 0, $j = count($childs); $i < $j; $i++)
00473                     $s .= $this->parseTree($childs[$i]);
00474             break;
00475 
00476             case OP_SEMICOLON:
00477                 if ($expression = $n->expression)
00478                     $s = $this->parseTree($expression);
00479             break;
00480 
00481             case JS_LABEL:
00482                 $s = $n->label . ':' . $this->parseTree($n->statement);
00483             break;
00484 
00485             case OP_COMMA:
00486                 $childs = $n->treeNodes;
00487                 for ($i = 0, $j = count($childs); $i < $j; $i++)
00488                     $s .= ($i ? ',' : '') . $this->parseTree($childs[$i]);
00489             break;
00490 
00491             case OP_ASSIGN:
00492                 $s = $this->parseTree($n->treeNodes[0]) . $n->value . $this->parseTree($n->treeNodes[1]);
00493             break;
00494 
00495             case OP_HOOK:
00496                 $s = $this->parseTree($n->treeNodes[0]) . '?' . $this->parseTree($n->treeNodes[1]) . ':' . $this->parseTree($n->treeNodes[2]);
00497             break;
00498 
00499             case OP_OR: case OP_AND:
00500             case OP_BITWISE_OR: case OP_BITWISE_XOR: case OP_BITWISE_AND:
00501             case OP_EQ: case OP_NE: case OP_STRICT_EQ: case OP_STRICT_NE:
00502             case OP_LT: case OP_LE: case OP_GE: case OP_GT:
00503             case OP_LSH: case OP_RSH: case OP_URSH:
00504             case OP_MUL: case OP_DIV: case OP_MOD:
00505                 $s = $this->parseTree($n->treeNodes[0]) . $n->type . $this->parseTree($n->treeNodes[1]);
00506             break;
00507 
00508             case OP_PLUS:
00509             case OP_MINUS:
00510                 $left = $this->parseTree($n->treeNodes[0]);
00511                 $right = $this->parseTree($n->treeNodes[1]);
00512 
00513                 switch ($n->treeNodes[1]->type)
00514                 {
00515                     case OP_PLUS:
00516                     case OP_MINUS:
00517                     case OP_INCREMENT:
00518                     case OP_DECREMENT:
00519                     case OP_UNARY_PLUS:
00520                     case OP_UNARY_MINUS:
00521                         $s = $left . $n->type . ' ' . $right;
00522                     break;
00523 
00524                     case TOKEN_STRING:
00525                         //combine concatenated strings with same quote style
00526                         if ($n->type == OP_PLUS && substr($left, -1) == $right[0])
00527                         {
00528                             $s = substr($left, 0, -1) . substr($right, 1);
00529                             break;
00530                         }
00531                     // FALL THROUGH
00532 
00533                     default:
00534                         $s = $left . $n->type . $right;
00535                 }
00536             break;
00537 
00538             case OP_NOT:
00539             case OP_BITWISE_NOT:
00540             case OP_UNARY_PLUS:
00541             case OP_UNARY_MINUS:
00542                 $s = $n->value . $this->parseTree($n->treeNodes[0]);
00543             break;
00544 
00545             case OP_INCREMENT:
00546             case OP_DECREMENT:
00547                 if ($n->postfix)
00548                     $s = $this->parseTree($n->treeNodes[0]) . $n->value;
00549                 else
00550                     $s = $n->value . $this->parseTree($n->treeNodes[0]);
00551             break;
00552 
00553             case OP_DOT:
00554                 $s = $this->parseTree($n->treeNodes[0]) . '.' . $this->parseTree($n->treeNodes[1]);
00555             break;
00556 
00557             case JS_INDEX:
00558                 $s = $this->parseTree($n->treeNodes[0]);
00559                 // See if we can replace named index with a dot saving 3 bytes
00560                 if (    $n->treeNodes[0]->type == TOKEN_IDENTIFIER &&
00561                     $n->treeNodes[1]->type == TOKEN_STRING &&
00562                     $this->isValidIdentifier(substr($n->treeNodes[1]->value, 1, -1))
00563                 )
00564                     $s .= '.' . substr($n->treeNodes[1]->value, 1, -1);
00565                 else
00566                     $s .= '[' . $this->parseTree($n->treeNodes[1]) . ']';
00567             break;
00568 
00569             case JS_LIST:
00570                 $childs = $n->treeNodes;
00571                 for ($i = 0, $j = count($childs); $i < $j; $i++)
00572                     $s .= ($i ? ',' : '') . $this->parseTree($childs[$i]);
00573             break;
00574 
00575             case JS_CALL:
00576                 $s = $this->parseTree($n->treeNodes[0]) . '(' . $this->parseTree($n->treeNodes[1]) . ')';
00577             break;
00578 
00579             case KEYWORD_NEW:
00580             case JS_NEW_WITH_ARGS:
00581                 $s = 'new ' . $this->parseTree($n->treeNodes[0]) . '(' . ($n->type == JS_NEW_WITH_ARGS ? $this->parseTree($n->treeNodes[1]) : '') . ')';
00582             break;
00583 
00584             case JS_ARRAY_INIT:
00585                 $s = '[';
00586                 $childs = $n->treeNodes;
00587                 for ($i = 0, $j = count($childs); $i < $j; $i++)
00588                 {
00589                     $s .= ($i ? ',' : '') . $this->parseTree($childs[$i]);
00590                 }
00591                 $s .= ']';
00592             break;
00593 
00594             case JS_OBJECT_INIT:
00595                 $s = '{';
00596                 $childs = $n->treeNodes;
00597                 for ($i = 0, $j = count($childs); $i < $j; $i++)
00598                 {
00599                     $t = $childs[$i];
00600                     if ($i)
00601                         $s .= ',';
00602                     if ($t->type == JS_PROPERTY_INIT)
00603                     {
00604                         // Ditch the quotes when the index is a valid identifier
00605                         if (    $t->treeNodes[0]->type == TOKEN_STRING &&
00606                             $this->isValidIdentifier(substr($t->treeNodes[0]->value, 1, -1))
00607                         )
00608                             $s .= substr($t->treeNodes[0]->value, 1, -1);
00609                         else
00610                             $s .= $t->treeNodes[0]->value;
00611 
00612                         $s .= ':' . $this->parseTree($t->treeNodes[1]);
00613                     }
00614                     else
00615                     {
00616                         $s .= $t->type == JS_GETTER ? 'get' : 'set';
00617                         $s .= ' ' . $t->name . '(';
00618                         $params = $t->params;
00619                         for ($i = 0, $j = count($params); $i < $j; $i++)
00620                             $s .= ($i ? ',' : '') . $params[$i];
00621                         $s .= '){' . $this->parseTree($t->body, true) . '}';
00622                     }
00623                 }
00624                 $s .= '}';
00625             break;
00626 
00627             case TOKEN_NUMBER:
00628                 $s = $n->value;
00629                 if (preg_match('/^([1-9]+)(0{3,})$/', $s, $m))
00630                     $s = $m[1] . 'e' . strlen($m[2]);
00631             break;
00632 
00633             case KEYWORD_NULL: case KEYWORD_THIS: case KEYWORD_TRUE: case KEYWORD_FALSE:
00634             case TOKEN_IDENTIFIER: case TOKEN_STRING: case TOKEN_REGEXP:
00635                 $s = $n->value;
00636             break;
00637 
00638             case JS_GROUP:
00639                 if (in_array(
00640                     $n->treeNodes[0]->type,
00641                     array(
00642                         JS_ARRAY_INIT, JS_OBJECT_INIT, JS_GROUP,
00643                         TOKEN_NUMBER, TOKEN_STRING, TOKEN_REGEXP, TOKEN_IDENTIFIER,
00644                         KEYWORD_NULL, KEYWORD_THIS, KEYWORD_TRUE, KEYWORD_FALSE
00645                     )
00646                 ))
00647                 {
00648                     $s = $this->parseTree($n->treeNodes[0]);
00649                 }
00650                 else
00651                 {
00652                     $s = '(' . $this->parseTree($n->treeNodes[0]) . ')';
00653                 }
00654             break;
00655 
00656             default:
00657                 throw new Exception('UNKNOWN TOKEN TYPE: ' . $n->type);
00658         }
00659 
00660         return $s;
00661     }
00662 
00663     private function isValidIdentifier($string)
00664     {
00665         return preg_match('/^[a-zA-Z_][a-zA-Z0-9_]*$/', $string) && !in_array($string, $this->reserved);
00666     }
00667 
00668     private function isWordChar($char)
00669     {
00670         return $char == '_' || $char == '$' || ctype_alnum($char);
00671     }
00672 }
00673 
00674 class JSParser
00675 {
00676     private $t;
00677     private $minifier;
00678 
00679     private $opPrecedence = array(
00680         ';' => 0,
00681         ',' => 1,
00682         '=' => 2, '?' => 2, ':' => 2,
00683         // The above all have to have the same precedence, see bug 330975
00684         '||' => 4,
00685         '&&' => 5,
00686         '|' => 6,
00687         '^' => 7,
00688         '&' => 8,
00689         '==' => 9, '!=' => 9, '===' => 9, '!==' => 9,
00690         '<' => 10, '<=' => 10, '>=' => 10, '>' => 10, 'in' => 10, 'instanceof' => 10,
00691         '<<' => 11, '>>' => 11, '>>>' => 11,
00692         '+' => 12, '-' => 12,
00693         '*' => 13, '/' => 13, '%' => 13,
00694         'delete' => 14, 'void' => 14, 'typeof' => 14,
00695         '!' => 14, '~' => 14, 'U+' => 14, 'U-' => 14,
00696         '++' => 15, '--' => 15,
00697         'new' => 16,
00698         '.' => 17,
00699         JS_NEW_WITH_ARGS => 0, JS_INDEX => 0, JS_CALL => 0,
00700         JS_ARRAY_INIT => 0, JS_OBJECT_INIT => 0, JS_GROUP => 0
00701     );
00702 
00703     private $opArity = array(
00704         ',' => -2,
00705         '=' => 2,
00706         '?' => 3,
00707         '||' => 2,
00708         '&&' => 2,
00709         '|' => 2,
00710         '^' => 2,
00711         '&' => 2,
00712         '==' => 2, '!=' => 2, '===' => 2, '!==' => 2,
00713         '<' => 2, '<=' => 2, '>=' => 2, '>' => 2, 'in' => 2, 'instanceof' => 2,
00714         '<<' => 2, '>>' => 2, '>>>' => 2,
00715         '+' => 2, '-' => 2,
00716         '*' => 2, '/' => 2, '%' => 2,
00717         'delete' => 1, 'void' => 1, 'typeof' => 1,
00718         '!' => 1, '~' => 1, 'U+' => 1, 'U-' => 1,
00719         '++' => 1, '--' => 1,
00720         'new' => 1,
00721         '.' => 2,
00722         JS_NEW_WITH_ARGS => 2, JS_INDEX => 2, JS_CALL => 2,
00723         JS_ARRAY_INIT => 1, JS_OBJECT_INIT => 1, JS_GROUP => 1,
00724         TOKEN_CONDCOMMENT_START => 1, TOKEN_CONDCOMMENT_END => 1
00725     );
00726 
00727     public function __construct($minifier=null)
00728     {
00729         $this->minifier = $minifier;
00730         $this->t = new JSTokenizer();
00731     }
00732 
00733     public function parse($s, $f, $l)
00734     {
00735         // initialize tokenizer
00736         $this->t->init($s, $f, $l);
00737 
00738         $x = new JSCompilerContext(false);
00739         $n = $this->Script($x);
00740         if (!$this->t->isDone())
00741             throw $this->t->newSyntaxError('Syntax error');
00742 
00743         return $n;
00744     }
00745 
00746     private function Script($x)
00747     {
00748         $n = $this->Statements($x);
00749         $n->type = JS_SCRIPT;
00750         $n->funDecls = $x->funDecls;
00751         $n->varDecls = $x->varDecls;
00752 
00753         // minify by scope
00754         if ($this->minifier)
00755         {
00756             $n->value = $this->minifier->parseTree($n);
00757 
00758             // clear tree from node to save memory
00759             $n->treeNodes = null;
00760             $n->funDecls = null;
00761             $n->varDecls = null;
00762 
00763             $n->type = JS_MINIFIED;
00764         }
00765 
00766         return $n;
00767     }
00768 
00769     private function Statements($x)
00770     {
00771         $n = new JSNode($this->t, JS_BLOCK);
00772         array_push($x->stmtStack, $n);
00773 
00774         while (!$this->t->isDone() && $this->t->peek() != OP_RIGHT_CURLY)
00775             $n->addNode($this->Statement($x));
00776 
00777         array_pop($x->stmtStack);
00778 
00779         return $n;
00780     }
00781 
00782     private function Block($x)
00783     {
00784         $this->t->mustMatch(OP_LEFT_CURLY);
00785         $n = $this->Statements($x);
00786         $this->t->mustMatch(OP_RIGHT_CURLY);
00787 
00788         return $n;
00789     }
00790 
00791     private function Statement($x)
00792     {
00793         $tt = $this->t->get();
00794         $n2 = null;
00795 
00796         // Cases for statements ending in a right curly return early, avoiding the
00797         // common semicolon insertion magic after this switch.
00798         switch ($tt)
00799         {
00800             case KEYWORD_FUNCTION:
00801                 return $this->FunctionDefinition(
00802                     $x,
00803                     true,
00804                     count($x->stmtStack) > 1 ? STATEMENT_FORM : DECLARED_FORM
00805                 );
00806             break;
00807 
00808             case OP_LEFT_CURLY:
00809                 $n = $this->Statements($x);
00810                 $this->t->mustMatch(OP_RIGHT_CURLY);
00811             return $n;
00812 
00813             case KEYWORD_IF:
00814                 $n = new JSNode($this->t);
00815                 $n->condition = $this->ParenExpression($x);
00816                 array_push($x->stmtStack, $n);
00817                 $n->thenPart = $this->Statement($x);
00818                 $n->elsePart = $this->t->match(KEYWORD_ELSE) ? $this->Statement($x) : null;
00819                 array_pop($x->stmtStack);
00820             return $n;
00821 
00822             case KEYWORD_SWITCH:
00823                 $n = new JSNode($this->t);
00824                 $this->t->mustMatch(OP_LEFT_PAREN);
00825                 $n->discriminant = $this->Expression($x);
00826                 $this->t->mustMatch(OP_RIGHT_PAREN);
00827                 $n->cases = array();
00828                 $n->defaultIndex = -1;
00829 
00830                 array_push($x->stmtStack, $n);
00831 
00832                 $this->t->mustMatch(OP_LEFT_CURLY);
00833 
00834                 while (($tt = $this->t->get()) != OP_RIGHT_CURLY)
00835                 {
00836                     switch ($tt)
00837                     {
00838                         case KEYWORD_DEFAULT:
00839                             if ($n->defaultIndex >= 0)
00840                                 throw $this->t->newSyntaxError('More than one switch default');
00841                             // FALL THROUGH
00842                         case KEYWORD_CASE:
00843                             $n2 = new JSNode($this->t);
00844                             if ($tt == KEYWORD_DEFAULT)
00845                                 $n->defaultIndex = count($n->cases);
00846                             else
00847                                 $n2->caseLabel = $this->Expression($x, OP_COLON);
00848                                 break;
00849                         default:
00850                             throw $this->t->newSyntaxError('Invalid switch case');
00851                     }
00852 
00853                     $this->t->mustMatch(OP_COLON);
00854                     $n2->statements = new JSNode($this->t, JS_BLOCK);
00855                     while (($tt = $this->t->peek()) != KEYWORD_CASE && $tt != KEYWORD_DEFAULT && $tt != OP_RIGHT_CURLY)
00856                         $n2->statements->addNode($this->Statement($x));
00857 
00858                     array_push($n->cases, $n2);
00859                 }
00860 
00861                 array_pop($x->stmtStack);
00862             return $n;
00863 
00864             case KEYWORD_FOR:
00865                 $n = new JSNode($this->t);
00866                 $n->isLoop = true;
00867                 $this->t->mustMatch(OP_LEFT_PAREN);
00868 
00869                 if (($tt = $this->t->peek()) != OP_SEMICOLON)
00870                 {
00871                     $x->inForLoopInit = true;
00872                     if ($tt == KEYWORD_VAR || $tt == KEYWORD_CONST)
00873                     {
00874                         $this->t->get();
00875                         $n2 = $this->Variables($x);
00876                     }
00877                     else
00878                     {
00879                         $n2 = $this->Expression($x);
00880                     }
00881                     $x->inForLoopInit = false;
00882                 }
00883 
00884                 if ($n2 && $this->t->match(KEYWORD_IN))
00885                 {
00886                     $n->type = JS_FOR_IN;
00887                     if ($n2->type == KEYWORD_VAR)
00888                     {
00889                         if (count($n2->treeNodes) != 1)
00890                         {
00891                             throw $this->t->SyntaxError(
00892                                 'Invalid for..in left-hand side',
00893                                 $this->t->filename,
00894                                 $n2->lineno
00895                             );
00896                         }
00897 
00898                         // NB: n2[0].type == IDENTIFIER and n2[0].value == n2[0].name.
00899                         $n->iterator = $n2->treeNodes[0];
00900                         $n->varDecl = $n2;
00901                     }
00902                     else
00903                     {
00904                         $n->iterator = $n2;
00905                         $n->varDecl = null;
00906                     }
00907 
00908                     $n->object = $this->Expression($x);
00909                 }
00910                 else
00911                 {
00912                     $n->setup = $n2 ? $n2 : null;
00913                     $this->t->mustMatch(OP_SEMICOLON);
00914                     $n->condition = $this->t->peek() == OP_SEMICOLON ? null : $this->Expression($x);
00915                     $this->t->mustMatch(OP_SEMICOLON);
00916                     $n->update = $this->t->peek() == OP_RIGHT_PAREN ? null : $this->Expression($x);
00917                 }
00918 
00919                 $this->t->mustMatch(OP_RIGHT_PAREN);
00920                 $n->body = $this->nest($x, $n);
00921             return $n;
00922 
00923             case KEYWORD_WHILE:
00924                     $n = new JSNode($this->t);
00925                     $n->isLoop = true;
00926                     $n->condition = $this->ParenExpression($x);
00927                     $n->body = $this->nest($x, $n);
00928             return $n;
00929 
00930             case KEYWORD_DO:
00931                 $n = new JSNode($this->t);
00932                 $n->isLoop = true;
00933                 $n->body = $this->nest($x, $n, KEYWORD_WHILE);
00934                 $n->condition = $this->ParenExpression($x);
00935                 if (!$x->ecmaStrictMode)
00936                 {
00937                     // <script language="JavaScript"> (without version hints) may need
00938                     // automatic semicolon insertion without a newline after do-while.
00939                     // See http://bugzilla.mozilla.org/show_bug.cgi?id=238945.
00940                     $this->t->match(OP_SEMICOLON);
00941                     return $n;
00942                 }
00943             break;
00944 
00945             case KEYWORD_BREAK:
00946             case KEYWORD_CONTINUE:
00947                 $n = new JSNode($this->t);
00948 
00949                 if ($this->t->peekOnSameLine() == TOKEN_IDENTIFIER)
00950                 {
00951                     $this->t->get();
00952                     $n->label = $this->t->currentToken()->value;
00953                 }
00954 
00955                 $ss = $x->stmtStack;
00956                 $i = count($ss);
00957                 $label = $n->label;
00958                 if ($label)
00959                 {
00960                     do
00961                     {
00962                         if (--$i < 0)
00963                             throw $this->t->newSyntaxError('Label not found');
00964                     }
00965                     while ($ss[$i]->label != $label);
00966                 }
00967                 else
00968                 {
00969                     do
00970                     {
00971                         if (--$i < 0)
00972                             throw $this->t->newSyntaxError('Invalid ' . $tt);
00973                     }
00974                     while (!$ss[$i]->isLoop && ($tt != KEYWORD_BREAK || $ss[$i]->type != KEYWORD_SWITCH));
00975                 }
00976 
00977                 $n->target = $ss[$i];
00978             break;
00979 
00980             case KEYWORD_TRY:
00981                 $n = new JSNode($this->t);
00982                 $n->tryBlock = $this->Block($x);
00983                 $n->catchClauses = array();
00984 
00985                 while ($this->t->match(KEYWORD_CATCH))
00986                 {
00987                     $n2 = new JSNode($this->t);
00988                     $this->t->mustMatch(OP_LEFT_PAREN);
00989                     $n2->varName = $this->t->mustMatch(TOKEN_IDENTIFIER)->value;
00990 
00991                     if ($this->t->match(KEYWORD_IF))
00992                     {
00993                         if ($x->ecmaStrictMode)
00994                             throw $this->t->newSyntaxError('Illegal catch guard');
00995 
00996                         if (count($n->catchClauses) && !end($n->catchClauses)->guard)
00997                             throw $this->t->newSyntaxError('Guarded catch after unguarded');
00998 
00999                         $n2->guard = $this->Expression($x);
01000                     }
01001                     else
01002                     {
01003                         $n2->guard = null;
01004                     }
01005 
01006                     $this->t->mustMatch(OP_RIGHT_PAREN);
01007                     $n2->block = $this->Block($x);
01008                     array_push($n->catchClauses, $n2);
01009                 }
01010 
01011                 if ($this->t->match(KEYWORD_FINALLY))
01012                     $n->finallyBlock = $this->Block($x);
01013 
01014                 if (!count($n->catchClauses) && !$n->finallyBlock)
01015                     throw $this->t->newSyntaxError('Invalid try statement');
01016             return $n;
01017 
01018             case KEYWORD_CATCH:
01019             case KEYWORD_FINALLY:
01020                 throw $this->t->newSyntaxError($tt + ' without preceding try');
01021 
01022             case KEYWORD_THROW:
01023                 $n = new JSNode($this->t);
01024                 $n->value = $this->Expression($x);
01025             break;
01026 
01027             case KEYWORD_RETURN:
01028                 if (!$x->inFunction)
01029                     throw $this->t->newSyntaxError('Invalid return');
01030 
01031                 $n = new JSNode($this->t);
01032                 $tt = $this->t->peekOnSameLine();
01033                 if ($tt != TOKEN_END && $tt != TOKEN_NEWLINE && $tt != OP_SEMICOLON && $tt != OP_RIGHT_CURLY)
01034                     $n->value = $this->Expression($x);
01035                 else
01036                     $n->value = null;
01037             break;
01038 
01039             case KEYWORD_WITH:
01040                 $n = new JSNode($this->t);
01041                 $n->object = $this->ParenExpression($x);
01042                 $n->body = $this->nest($x, $n);
01043             return $n;
01044 
01045             case KEYWORD_VAR:
01046             case KEYWORD_CONST:
01047                     $n = $this->Variables($x);
01048             break;
01049 
01050             case TOKEN_CONDCOMMENT_START:
01051             case TOKEN_CONDCOMMENT_END:
01052                 $n = new JSNode($this->t);
01053             return $n;
01054 
01055             case KEYWORD_DEBUGGER:
01056                 $n = new JSNode($this->t);
01057             break;
01058 
01059             case TOKEN_NEWLINE:
01060             case OP_SEMICOLON:
01061                 $n = new JSNode($this->t, OP_SEMICOLON);
01062                 $n->expression = null;
01063             return $n;
01064 
01065             default:
01066                 if ($tt == TOKEN_IDENTIFIER)
01067                 {
01068                     $this->t->scanOperand = false;
01069                     $tt = $this->t->peek();
01070                     $this->t->scanOperand = true;
01071                     if ($tt == OP_COLON)
01072                     {
01073                         $label = $this->t->currentToken()->value;
01074                         $ss = $x->stmtStack;
01075                         for ($i = count($ss) - 1; $i >= 0; --$i)
01076                         {
01077                             if ($ss[$i]->label == $label)
01078                                 throw $this->t->newSyntaxError('Duplicate label');
01079                         }
01080 
01081                         $this->t->get();
01082                         $n = new JSNode($this->t, JS_LABEL);
01083                         $n->label = $label;
01084                         $n->statement = $this->nest($x, $n);
01085 
01086                         return $n;
01087                     }
01088                 }
01089 
01090                 $n = new JSNode($this->t, OP_SEMICOLON);
01091                 $this->t->unget();
01092                 $n->expression = $this->Expression($x);
01093                 $n->end = $n->expression->end;
01094             break;
01095         }
01096 
01097         if ($this->t->lineno == $this->t->currentToken()->lineno)
01098         {
01099             $tt = $this->t->peekOnSameLine();
01100             if ($tt != TOKEN_END && $tt != TOKEN_NEWLINE && $tt != OP_SEMICOLON && $tt != OP_RIGHT_CURLY)
01101                 throw $this->t->newSyntaxError('Missing ; before statement');
01102         }
01103 
01104         $this->t->match(OP_SEMICOLON);
01105 
01106         return $n;
01107     }
01108 
01109     private function FunctionDefinition($x, $requireName, $functionForm)
01110     {
01111         $f = new JSNode($this->t);
01112 
01113         if ($f->type != KEYWORD_FUNCTION)
01114             $f->type = ($f->value == 'get') ? JS_GETTER : JS_SETTER;
01115 
01116         if ($this->t->match(TOKEN_IDENTIFIER))
01117             $f->name = $this->t->currentToken()->value;
01118         elseif ($requireName)
01119             throw $this->t->newSyntaxError('Missing function identifier');
01120 
01121         $this->t->mustMatch(OP_LEFT_PAREN);
01122             $f->params = array();
01123 
01124         while (($tt = $this->t->get()) != OP_RIGHT_PAREN)
01125         {
01126             if ($tt != TOKEN_IDENTIFIER)
01127                 throw $this->t->newSyntaxError('Missing formal parameter');
01128 
01129             array_push($f->params, $this->t->currentToken()->value);
01130 
01131             if ($this->t->peek() != OP_RIGHT_PAREN)
01132                 $this->t->mustMatch(OP_COMMA);
01133         }
01134 
01135         $this->t->mustMatch(OP_LEFT_CURLY);
01136 
01137         $x2 = new JSCompilerContext(true);
01138         $f->body = $this->Script($x2);
01139 
01140         $this->t->mustMatch(OP_RIGHT_CURLY);
01141         $f->end = $this->t->currentToken()->end;
01142 
01143         $f->functionForm = $functionForm;
01144         if ($functionForm == DECLARED_FORM)
01145             array_push($x->funDecls, $f);
01146 
01147         return $f;
01148     }
01149 
01150     private function Variables($x)
01151     {
01152         $n = new JSNode($this->t);
01153 
01154         do
01155         {
01156             $this->t->mustMatch(TOKEN_IDENTIFIER);
01157 
01158             $n2 = new JSNode($this->t);
01159             $n2->name = $n2->value;
01160 
01161             if ($this->t->match(OP_ASSIGN))
01162             {
01163                 if ($this->t->currentToken()->assignOp)
01164                     throw $this->t->newSyntaxError('Invalid variable initialization');
01165 
01166                 $n2->initializer = $this->Expression($x, OP_COMMA);
01167             }
01168 
01169             $n2->readOnly = $n->type == KEYWORD_CONST;
01170 
01171             $n->addNode($n2);
01172             array_push($x->varDecls, $n2);
01173         }
01174         while ($this->t->match(OP_COMMA));
01175 
01176         return $n;
01177     }
01178 
01179     private function Expression($x, $stop=false)
01180     {
01181         $operators = array();
01182         $operands = array();
01183         $n = false;
01184 
01185         $bl = $x->bracketLevel;
01186         $cl = $x->curlyLevel;
01187         $pl = $x->parenLevel;
01188         $hl = $x->hookLevel;
01189 
01190         while (($tt = $this->t->get()) != TOKEN_END)
01191         {
01192             if ($tt == $stop &&
01193                 $x->bracketLevel == $bl &&
01194                 $x->curlyLevel == $cl &&
01195                 $x->parenLevel == $pl &&
01196                 $x->hookLevel == $hl
01197             )
01198             {
01199                 // Stop only if tt matches the optional stop parameter, and that
01200                 // token is not quoted by some kind of bracket.
01201                 break;
01202             }
01203 
01204             switch ($tt)
01205             {
01206                 case OP_SEMICOLON:
01207                     // NB: cannot be empty, Statement handled that.
01208                     break 2;
01209 
01210                 case OP_HOOK:
01211                     if ($this->t->scanOperand)
01212                         break 2;
01213 
01214                     while ( !empty($operators) &&
01215                         $this->opPrecedence[end($operators)->type] > $this->opPrecedence[$tt]
01216                     )
01217                         $this->reduce($operators, $operands);
01218 
01219                     array_push($operators, new JSNode($this->t));
01220 
01221                     ++$x->hookLevel;
01222                     $this->t->scanOperand = true;
01223                     $n = $this->Expression($x);
01224 
01225                     if (!$this->t->match(OP_COLON))
01226                         break 2;
01227 
01228                     --$x->hookLevel;
01229                     array_push($operands, $n);
01230                 break;
01231 
01232                 case OP_COLON:
01233                     if ($x->hookLevel)
01234                         break 2;
01235 
01236                     throw $this->t->newSyntaxError('Invalid label');
01237                 break;
01238 
01239                 case OP_ASSIGN:
01240                     if ($this->t->scanOperand)
01241                         break 2;
01242 
01243                     // Use >, not >=, for right-associative ASSIGN
01244                     while ( !empty($operators) &&
01245                         $this->opPrecedence[end($operators)->type] > $this->opPrecedence[$tt]
01246                     )
01247                         $this->reduce($operators, $operands);
01248 
01249                     array_push($operators, new JSNode($this->t));
01250                     end($operands)->assignOp = $this->t->currentToken()->assignOp;
01251                     $this->t->scanOperand = true;
01252                 break;
01253 
01254                 case KEYWORD_IN:
01255                     // An in operator should not be parsed if we're parsing the head of
01256                     // a for (...) loop, unless it is in the then part of a conditional
01257                     // expression, or parenthesized somehow.
01258                     if ($x->inForLoopInit && !$x->hookLevel &&
01259                         !$x->bracketLevel && !$x->curlyLevel &&
01260                         !$x->parenLevel
01261                     )
01262                         break 2;
01263                 // FALL THROUGH
01264                 case OP_COMMA:
01265                     // A comma operator should not be parsed if we're parsing the then part
01266                     // of a conditional expression unless it's parenthesized somehow.
01267                     if ($tt == OP_COMMA && $x->hookLevel &&
01268                         !$x->bracketLevel && !$x->curlyLevel &&
01269                         !$x->parenLevel
01270                     )
01271                         break 2;
01272                 // Treat comma as left-associative so reduce can fold left-heavy
01273                 // COMMA trees into a single array.
01274                 // FALL THROUGH
01275                 case OP_OR:
01276                 case OP_AND:
01277                 case OP_BITWISE_OR:
01278                 case OP_BITWISE_XOR:
01279                 case OP_BITWISE_AND:
01280                 case OP_EQ: case OP_NE: case OP_STRICT_EQ: case OP_STRICT_NE:
01281                 case OP_LT: case OP_LE: case OP_GE: case OP_GT:
01282                 case KEYWORD_INSTANCEOF:
01283                 case OP_LSH: case OP_RSH: case OP_URSH:
01284                 case OP_PLUS: case OP_MINUS:
01285                 case OP_MUL: case OP_DIV: case OP_MOD:
01286                 case OP_DOT:
01287                     if ($this->t->scanOperand)
01288                         break 2;
01289 
01290                     while ( !empty($operators) &&
01291                         $this->opPrecedence[end($operators)->type] >= $this->opPrecedence[$tt]
01292                     )
01293                         $this->reduce($operators, $operands);
01294 
01295                     if ($tt == OP_DOT)
01296                     {
01297                         $this->t->mustMatch(TOKEN_IDENTIFIER);
01298                         array_push($operands, new JSNode($this->t, OP_DOT, array_pop($operands), new JSNode($this->t)));
01299                     }
01300                     else
01301                     {
01302                         array_push($operators, new JSNode($this->t));
01303                         $this->t->scanOperand = true;
01304                     }
01305                 break;
01306 
01307                 case KEYWORD_DELETE: case KEYWORD_VOID: case KEYWORD_TYPEOF:
01308                 case OP_NOT: case OP_BITWISE_NOT: case OP_UNARY_PLUS: case OP_UNARY_MINUS:
01309                 case KEYWORD_NEW:
01310                     if (!$this->t->scanOperand)
01311                         break 2;
01312 
01313                     array_push($operators, new JSNode($this->t));
01314                 break;
01315 
01316                 case OP_INCREMENT: case OP_DECREMENT:
01317                     if ($this->t->scanOperand)
01318                     {
01319                         array_push($operators, new JSNode($this->t));  // prefix increment or decrement
01320                     }
01321                     else
01322                     {
01323                         // Don't cross a line boundary for postfix {in,de}crement.
01324                         $t = $this->t->tokens[($this->t->tokenIndex + $this->t->lookahead - 1) & 3];
01325                         if ($t && $t->lineno != $this->t->lineno)
01326                             break 2;
01327 
01328                         if (!empty($operators))
01329                         {
01330                             // Use >, not >=, so postfix has higher precedence than prefix.
01331                             while ($this->opPrecedence[end($operators)->type] > $this->opPrecedence[$tt])
01332                                 $this->reduce($operators, $operands);
01333                         }
01334 
01335                         $n = new JSNode($this->t, $tt, array_pop($operands));
01336                         $n->postfix = true;
01337                         array_push($operands, $n);
01338                     }
01339                 break;
01340 
01341                 case KEYWORD_FUNCTION:
01342                     if (!$this->t->scanOperand)
01343                         break 2;
01344 
01345                     array_push($operands, $this->FunctionDefinition($x, false, EXPRESSED_FORM));
01346                     $this->t->scanOperand = false;
01347                 break;
01348 
01349                 case KEYWORD_NULL: case KEYWORD_THIS: case KEYWORD_TRUE: case KEYWORD_FALSE:
01350                 case TOKEN_IDENTIFIER: case TOKEN_NUMBER: case TOKEN_STRING: case TOKEN_REGEXP:
01351                     if (!$this->t->scanOperand)
01352                         break 2;
01353 
01354                     array_push($operands, new JSNode($this->t));
01355                     $this->t->scanOperand = false;
01356                 break;
01357 
01358                 case TOKEN_CONDCOMMENT_START:
01359                 case TOKEN_CONDCOMMENT_END:
01360                     if ($this->t->scanOperand)
01361                         array_push($operators, new JSNode($this->t));
01362                     else
01363                         array_push($operands, new JSNode($this->t));
01364                 break;
01365 
01366                 case OP_LEFT_BRACKET:
01367                     if ($this->t->scanOperand)
01368                     {
01369                         // Array initialiser.  Parse using recursive descent, as the
01370                         // sub-grammar here is not an operator grammar.
01371                         $n = new JSNode($this->t, JS_ARRAY_INIT);
01372                         while (($tt = $this->t->peek()) != OP_RIGHT_BRACKET)
01373                         {
01374                             if ($tt == OP_COMMA)
01375                             {
01376                                 $this->t->get();
01377                                 $n->addNode(null);
01378                                 continue;
01379                             }
01380 
01381                             $n->addNode($this->Expression($x, OP_COMMA));
01382                             if (!$this->t->match(OP_COMMA))
01383                                 break;
01384                         }
01385 
01386                         $this->t->mustMatch(OP_RIGHT_BRACKET);
01387                         array_push($operands, $n);
01388                         $this->t->scanOperand = false;
01389                     }
01390                     else
01391                     {
01392                         // Property indexing operator.
01393                         array_push($operators, new JSNode($this->t, JS_INDEX));
01394                         $this->t->scanOperand = true;
01395                         ++$x->bracketLevel;
01396                     }
01397                 break;
01398 
01399                 case OP_RIGHT_BRACKET:
01400                     if ($this->t->scanOperand || $x->bracketLevel == $bl)
01401                         break 2;
01402 
01403                     while ($this->reduce($operators, $operands)->type != JS_INDEX)
01404                         continue;
01405 
01406                     --$x->bracketLevel;
01407                 break;
01408 
01409                 case OP_LEFT_CURLY:
01410                     if (!$this->t->scanOperand)
01411                         break 2;
01412 
01413                     // Object initialiser.  As for array initialisers (see above),
01414                     // parse using recursive descent.
01415                     ++$x->curlyLevel;
01416                     $n = new JSNode($this->t, JS_OBJECT_INIT);
01417                     while (!$this->t->match(OP_RIGHT_CURLY))
01418                     {
01419                         do
01420                         {
01421                             $tt = $this->t->get();
01422                             $tv = $this->t->currentToken()->value;
01423                             if (($tv == 'get' || $tv == 'set') && $this->t->peek() == TOKEN_IDENTIFIER)
01424                             {
01425                                 if ($x->ecmaStrictMode)
01426                                     throw $this->t->newSyntaxError('Illegal property accessor');
01427 
01428                                 $n->addNode($this->FunctionDefinition($x, true, EXPRESSED_FORM));
01429                             }
01430                             else
01431                             {
01432                                 switch ($tt)
01433                                 {
01434                                     case TOKEN_IDENTIFIER:
01435                                     case TOKEN_NUMBER:
01436                                     case TOKEN_STRING:
01437                                         $id = new JSNode($this->t);
01438                                     break;
01439 
01440                                     case OP_RIGHT_CURLY:
01441                                         if ($x->ecmaStrictMode)
01442                                             throw $this->t->newSyntaxError('Illegal trailing ,');
01443                                     break 3;
01444 
01445                                     default:
01446                                         throw $this->t->newSyntaxError('Invalid property name');
01447                                 }
01448 
01449                                 $this->t->mustMatch(OP_COLON);
01450                                 $n->addNode(new JSNode($this->t, JS_PROPERTY_INIT, $id, $this->Expression($x, OP_COMMA)));
01451                             }
01452                         }
01453                         while ($this->t->match(OP_COMMA));
01454 
01455                         $this->t->mustMatch(OP_RIGHT_CURLY);
01456                         break;
01457                     }
01458 
01459                     array_push($operands, $n);
01460                     $this->t->scanOperand = false;
01461                     --$x->curlyLevel;
01462                 break;
01463 
01464                 case OP_RIGHT_CURLY:
01465                     if (!$this->t->scanOperand && $x->curlyLevel != $cl)
01466                         throw new Exception('PANIC: right curly botch');
01467                 break 2;
01468 
01469                 case OP_LEFT_PAREN:
01470                     if ($this->t->scanOperand)
01471                     {
01472                         array_push($operators, new JSNode($this->t, JS_GROUP));
01473                     }
01474                     else
01475                     {
01476                         while ( !empty($operators) &&
01477                             $this->opPrecedence[end($operators)->type] > $this->opPrecedence[KEYWORD_NEW]
01478                         )
01479                             $this->reduce($operators, $operands);
01480 
01481                         // Handle () now, to regularize the n-ary case for n > 0.
01482                         // We must set scanOperand in case there are arguments and
01483                         // the first one is a regexp or unary+/-.
01484                         $n = end($operators);
01485                         $this->t->scanOperand = true;
01486                         if ($this->t->match(OP_RIGHT_PAREN))
01487                         {
01488                             if ($n && $n->type == KEYWORD_NEW)
01489                             {
01490                                 array_pop($operators);
01491                                 $n->addNode(array_pop($operands));
01492                             }
01493                             else
01494                             {
01495                                 $n = new JSNode($this->t, JS_CALL, array_pop($operands), new JSNode($this->t, JS_LIST));
01496                             }
01497 
01498                             array_push($operands, $n);
01499                             $this->t->scanOperand = false;
01500                             break;
01501                         }
01502 
01503                         if ($n && $n->type == KEYWORD_NEW)
01504                             $n->type = JS_NEW_WITH_ARGS;
01505                         else
01506                             array_push($operators, new JSNode($this->t, JS_CALL));
01507                     }
01508 
01509                     ++$x->parenLevel;
01510                 break;
01511 
01512                 case OP_RIGHT_PAREN:
01513                     if ($this->t->scanOperand || $x->parenLevel == $pl)
01514                         break 2;
01515 
01516                     while (($tt = $this->reduce($operators, $operands)->type) != JS_GROUP &&
01517                         $tt != JS_CALL && $tt != JS_NEW_WITH_ARGS
01518                     )
01519                     {
01520                         continue;
01521                     }
01522 
01523                     if ($tt != JS_GROUP)
01524                     {
01525                         $n = end($operands);
01526                         if ($n->treeNodes[1]->type != OP_COMMA)
01527                             $n->treeNodes[1] = new JSNode($this->t, JS_LIST, $n->treeNodes[1]);
01528                         else
01529                             $n->treeNodes[1]->type = JS_LIST;
01530                     }
01531 
01532                     --$x->parenLevel;
01533                 break;
01534 
01535                 // Automatic semicolon insertion means we may scan across a newline
01536                 // and into the beginning of another statement.  If so, break out of
01537                 // the while loop and let the t.scanOperand logic handle errors.
01538                 default:
01539                     break 2;
01540             }
01541         }
01542 
01543         if ($x->hookLevel != $hl)
01544             throw $this->t->newSyntaxError('Missing : in conditional expression');
01545 
01546         if ($x->parenLevel != $pl)
01547             throw $this->t->newSyntaxError('Missing ) in parenthetical');
01548 
01549         if ($x->bracketLevel != $bl)
01550             throw $this->t->newSyntaxError('Missing ] in index expression');
01551 
01552         if ($this->t->scanOperand)
01553             throw $this->t->newSyntaxError('Missing operand');
01554 
01555         // Resume default mode, scanning for operands, not operators.
01556         $this->t->scanOperand = true;
01557         $this->t->unget();
01558 
01559         while (count($operators))
01560             $this->reduce($operators, $operands);
01561 
01562         return array_pop($operands);
01563     }
01564 
01565     private function ParenExpression($x)
01566     {
01567         $this->t->mustMatch(OP_LEFT_PAREN);
01568         $n = $this->Expression($x);
01569         $this->t->mustMatch(OP_RIGHT_PAREN);
01570 
01571         return $n;
01572     }
01573 
01574     // Statement stack and nested statement handler.
01575     private function nest($x, $node, $end = false)
01576     {
01577         array_push($x->stmtStack, $node);
01578         $n = $this->statement($x);
01579         array_pop($x->stmtStack);
01580 
01581         if ($end)
01582             $this->t->mustMatch($end);
01583 
01584         return $n;
01585     }
01586 
01587     private function reduce(&$operators, &$operands)
01588     {
01589         $n = array_pop($operators);
01590         $op = $n->type;
01591         $arity = $this->opArity[$op];
01592         $c = count($operands);
01593         if ($arity == -2)
01594         {
01595             // Flatten left-associative trees
01596             if ($c >= 2)
01597             {
01598                 $left = $operands[$c - 2];
01599                 if ($left->type == $op)
01600                 {
01601                     $right = array_pop($operands);
01602                     $left->addNode($right);
01603                     return $left;
01604                 }
01605             }
01606             $arity = 2;
01607         }
01608 
01609         // Always use push to add operands to n, to update start and end
01610         $a = array_splice($operands, $c - $arity);
01611         for ($i = 0; $i < $arity; $i++)
01612             $n->addNode($a[$i]);
01613 
01614         // Include closing bracket or postfix operator in [start,end]
01615         $te = $this->t->currentToken()->end;
01616         if ($n->end < $te)
01617             $n->end = $te;
01618 
01619         array_push($operands, $n);
01620 
01621         return $n;
01622     }
01623 }
01624 
01625 class JSCompilerContext
01626 {
01627     public $inFunction = false;
01628     public $inForLoopInit = false;
01629     public $ecmaStrictMode = false;
01630     public $bracketLevel = 0;
01631     public $curlyLevel = 0;
01632     public $parenLevel = 0;
01633     public $hookLevel = 0;
01634 
01635     public $stmtStack = array();
01636     public $funDecls = array();
01637     public $varDecls = array();
01638 
01639     public function __construct($inFunction)
01640     {
01641         $this->inFunction = $inFunction;
01642     }
01643 }
01644 
01645 class JSNode
01646 {
01647     private $type;
01648     private $value;
01649     private $lineno;
01650     private $start;
01651     private $end;
01652 
01653     public $treeNodes = array();
01654     public $funDecls = array();
01655     public $varDecls = array();
01656 
01657     public function __construct($t, $type=0)
01658     {
01659         if ($token = $t->currentToken())
01660         {
01661             $this->type = $type ? $type : $token->type;
01662             $this->value = $token->value;
01663             $this->lineno = $token->lineno;
01664             $this->start = $token->start;
01665             $this->end = $token->end;
01666         }
01667         else
01668         {
01669             $this->type = $type;
01670             $this->lineno = $t->lineno;
01671         }
01672 
01673         if (($numargs = func_num_args()) > 2)
01674         {
01675             $args = func_get_args();
01676             for ($i = 2; $i < $numargs; $i++)
01677                 $this->addNode($args[$i]);
01678         }
01679     }
01680 
01681     // we don't want to bloat our object with all kind of specific properties, so we use overloading
01682     public function __set($name, $value)
01683     {
01684         $this->$name = $value;
01685     }
01686 
01687     public function __get($name)
01688     {
01689         if (isset($this->$name))
01690             return $this->$name;
01691 
01692         return null;
01693     }
01694 
01695     public function addNode($node)
01696     {
01697         if ($node !== null)
01698         {
01699             if ($node->start < $this->start)
01700                 $this->start = $node->start;
01701             if ($this->end < $node->end)
01702                 $this->end = $node->end;
01703         }
01704 
01705         $this->treeNodes[] = $node;
01706     }
01707 }
01708 
01709 class JSTokenizer
01710 {
01711     private $cursor = 0;
01712     private $source;
01713 
01714     public $tokens = array();
01715     public $tokenIndex = 0;
01716     public $lookahead = 0;
01717     public $scanNewlines = false;
01718     public $scanOperand = true;
01719 
01720     public $filename;
01721     public $lineno;
01722 
01723     private $keywords = array(
01724         'break',
01725         'case', 'catch', 'const', 'continue',
01726         'debugger', 'default', 'delete', 'do',
01727         'else', 'enum',
01728         'false', 'finally', 'for', 'function',
01729         'if', 'in', 'instanceof',
01730         'new', 'null',
01731         'return',
01732         'switch',
01733         'this', 'throw', 'true', 'try', 'typeof',
01734         'var', 'void',
01735         'while', 'with'
01736     );
01737 
01738     private $opTypeNames = array(
01739         ';', ',', '?', ':', '||', '&&', '|', '^',
01740         '&', '===', '==', '=', '!==', '!=', '<<', '<=',
01741         '<', '>>>', '>>', '>=', '>', '++', '--', '+',
01742         '-', '*', '/', '%', '!', '~', '.', '[',
01743         ']', '{', '}', '(', ')', '@*/'
01744     );
01745 
01746     private $assignOps = array('|', '^', '&', '<<', '>>', '>>>', '+', '-', '*', '/', '%');
01747     private $opRegExp;
01748 
01749     public function __construct()
01750     {
01751         $this->opRegExp = '#^(' . implode('|', array_map('preg_quote', $this->opTypeNames)) . ')#';
01752     }
01753 
01754     public function init($source, $filename = '', $lineno = 1)
01755     {
01756         $this->source = $source;
01757         $this->filename = $filename ? $filename : '[inline]';
01758         $this->lineno = $lineno;
01759 
01760         $this->cursor = 0;
01761         $this->tokens = array();
01762         $this->tokenIndex = 0;
01763         $this->lookahead = 0;
01764         $this->scanNewlines = false;
01765         $this->scanOperand = true;
01766     }
01767 
01768     public function getInput($chunksize)
01769     {
01770         if ($chunksize)
01771             return substr($this->source, $this->cursor, $chunksize);
01772 
01773         return substr($this->source, $this->cursor);
01774     }
01775 
01776     public function isDone()
01777     {
01778         return $this->peek() == TOKEN_END;
01779     }
01780 
01781     public function match($tt)
01782     {
01783         return $this->get() == $tt || $this->unget();
01784     }
01785 
01786     public function mustMatch($tt)
01787     {
01788             if (!$this->match($tt))
01789             throw $this->newSyntaxError('Unexpected token; token ' . $tt . ' expected');
01790 
01791         return $this->currentToken();
01792     }
01793 
01794     public function peek()
01795     {
01796         if ($this->lookahead)
01797         {
01798             $next = $this->tokens[($this->tokenIndex + $this->lookahead) & 3];
01799             if ($this->scanNewlines && $next->lineno != $this->lineno)
01800                 $tt = TOKEN_NEWLINE;
01801             else
01802                 $tt = $next->type;
01803         }
01804         else
01805         {
01806             $tt = $this->get();
01807             $this->unget();
01808         }
01809 
01810         return $tt;
01811     }
01812 
01813     public function peekOnSameLine()
01814     {
01815         $this->scanNewlines = true;
01816         $tt = $this->peek();
01817         $this->scanNewlines = false;
01818 
01819         return $tt;
01820     }
01821 
01822     public function currentToken()
01823     {
01824         if (!empty($this->tokens))
01825             return $this->tokens[$this->tokenIndex];
01826     }
01827 
01828     public function get($chunksize = 1000)
01829     {
01830         while($this->lookahead)
01831         {
01832             $this->lookahead--;
01833             $this->tokenIndex = ($this->tokenIndex + 1) & 3;
01834             $token = $this->tokens[$this->tokenIndex];
01835             if ($token->type != TOKEN_NEWLINE || $this->scanNewlines)
01836                 return $token->type;
01837         }
01838 
01839         $conditional_comment = false;
01840 
01841         // strip whitespace and comments
01842         while(true)
01843         {
01844             $input = $this->getInput($chunksize);
01845 
01846             // whitespace handling; gobble up \r as well (effectively we don't have support for MAC newlines!)
01847             $re = $this->scanNewlines ? '/^[ \r\t]+/' : '/^\s+/';
01848             if (preg_match($re, $input, $match))
01849             {
01850                 $spaces = $match[0];
01851                 $spacelen = strlen($spaces);
01852                 $this->cursor += $spacelen;
01853                 if (!$this->scanNewlines)
01854                     $this->lineno += substr_count($spaces, "\n");
01855 
01856                 if ($spacelen == $chunksize)
01857                     continue; // complete chunk contained whitespace
01858 
01859                 $input = $this->getInput($chunksize);
01860                 if ($input == '' || $input[0] != '/')
01861                     break;
01862             }
01863 
01864             // Comments
01865             if (!preg_match('/^\/(?:\*(@(?:cc_on|if|elif|else|end))?.*?\*\/|\/[^\n]*)/s', $input, $match))
01866             {
01867                 if (!$chunksize)
01868                     break;
01869 
01870                 // retry with a full chunk fetch; this also prevents breakage of long regular expressions (which will never match a comment)
01871                 $chunksize = null;
01872                 continue;
01873             }
01874 
01875             // check if this is a conditional (JScript) comment
01876             if (!empty($match[1]))
01877             {
01878                 $match[0] = '/*' . $match[1];
01879                 $conditional_comment = true;
01880                 break;
01881             }
01882             else
01883             {
01884                 $this->cursor += strlen($match[0]);
01885                 $this->lineno += substr_count($match[0], "\n");
01886             }
01887         }
01888 
01889         if ($input == '')
01890         {
01891             $tt = TOKEN_END;
01892             $match = array('');
01893         }
01894         elseif ($conditional_comment)
01895         {
01896             $tt = TOKEN_CONDCOMMENT_START;
01897         }
01898         else
01899         {
01900             switch ($input[0])
01901             {
01902                 case '0':
01903                     // hexadecimal
01904                     if (($input[1] == 'x' || $input[1] == 'X') && preg_match('/^0x[0-9a-f]+/i', $input, $match))
01905                     {
01906                         $tt = TOKEN_NUMBER;
01907                         break;
01908                     }
01909                 // FALL THROUGH
01910 
01911                 case '1': case '2': case '3': case '4': case '5':
01912                 case '6': case '7': case '8': case '9':
01913                     // should always match
01914                     preg_match('/^\d+(?:\.\d*)?(?:[eE][-+]?\d+)?/', $input, $match);
01915                     $tt = TOKEN_NUMBER;
01916                 break;
01917 
01918                 case "'":
01919                     if (preg_match('/^\'(?:[^\\\\\'\r\n]++|\\\\(?:.|\r?\n))*\'/', $input, $match))
01920                     {
01921                         $tt = TOKEN_STRING;
01922                     }
01923                     else
01924                     {
01925                         if ($chunksize)
01926                             return $this->get(null); // retry with a full chunk fetch
01927 
01928                         throw $this->newSyntaxError('Unterminated string literal');
01929                     }
01930                 break;
01931 
01932                 case '"':
01933                     if (preg_match('/^"(?:[^\\\\"\r\n]++|\\\\(?:.|\r?\n))*"/', $input, $match))
01934                     {
01935                         $tt = TOKEN_STRING;
01936                     }
01937                     else
01938                     {
01939                         if ($chunksize)
01940                             return $this->get(null); // retry with a full chunk fetch
01941 
01942                         throw $this->newSyntaxError('Unterminated string literal');
01943                     }
01944                 break;
01945 
01946                 case '/':
01947                     if ($this->scanOperand && preg_match('/^\/((?:\\\\.|\[(?:\\\\.|[^\]])*\]|[^\/])+)\/([gimy]*)/', $input, $match))
01948                     {
01949                         $tt = TOKEN_REGEXP;
01950                         break;
01951                     }
01952                 // FALL THROUGH
01953 
01954                 case '|':
01955                 case '^':
01956                 case '&':
01957                 case '<':
01958                 case '>':
01959                 case '+':
01960                 case '-':
01961                 case '*':
01962                 case '%':
01963                 case '=':
01964                 case '!':
01965                     // should always match
01966                     preg_match($this->opRegExp, $input, $match);
01967                     $op = $match[0];
01968                     if (in_array($op, $this->assignOps) && $input[strlen($op)] == '=')
01969                     {
01970                         $tt = OP_ASSIGN;
01971                         $match[0] .= '=';
01972                     }
01973                     else
01974                     {
01975                         $tt = $op;
01976                         if ($this->scanOperand)
01977                         {
01978                             if ($op == OP_PLUS)
01979                                 $tt = OP_UNARY_PLUS;
01980                             elseif ($op == OP_MINUS)
01981                                 $tt = OP_UNARY_MINUS;
01982                         }
01983                         $op = null;
01984                     }
01985                 break;
01986 
01987                 case '.':
01988                     if (preg_match('/^\.\d+(?:[eE][-+]?\d+)?/', $input, $match))
01989                     {
01990                         $tt = TOKEN_NUMBER;
01991                         break;
01992                     }
01993                 // FALL THROUGH
01994 
01995                 case ';':
01996                 case ',':
01997                 case '?':
01998                 case ':':
01999                 case '~':
02000                 case '[':
02001                 case ']':
02002                 case '{':
02003                 case '}':
02004                 case '(':
02005                 case ')':
02006                     // these are all single
02007                     $match = array($input[0]);
02008                     $tt = $input[0];
02009                 break;
02010 
02011                 case '@':
02012                     // check end of conditional comment
02013                     if (substr($input, 0, 3) == '@*/')
02014                     {
02015                         $match = array('@*/');
02016                         $tt = TOKEN_CONDCOMMENT_END;
02017                     }
02018                     else
02019                         throw $this->newSyntaxError('Illegal token');
02020                 break;
02021 
02022                 case "\n":
02023                     if ($this->scanNewlines)
02024                     {
02025                         $match = array("\n");
02026                         $tt = TOKEN_NEWLINE;
02027                     }
02028                     else
02029                         throw $this->newSyntaxError('Illegal token');
02030                 break;
02031 
02032                 default:
02033                     // Fast path for identifiers: word chars followed by whitespace or various other tokens.
02034                     // Note we don't need to exclude digits in the first char, as they've already been found
02035                     // above.
02036                     if (!preg_match('/^[$\w]+(?=[\s\/\|\^\&<>\+\-\*%=!.;,\?:~\[\]\{\}\(\)@])/', $input, $match))
02037                     {
02038                         // Character classes per ECMA-262 edition 5.1 section 7.6
02039                         // Per spec, must accept Unicode 3.0, *may* accept later versions.
02040                         // We'll take whatever PCRE understands, which should be more recent.
02041                         $identifierStartChars = "\\p{L}\\p{Nl}" .  # UnicodeLetter
02042                                                 "\$" .
02043                                                 "_";
02044                         $identifierPartChars  = $identifierStartChars .
02045                                                 "\\p{Mn}\\p{Mc}" . # UnicodeCombiningMark
02046                                                 "\\p{Nd}" .        # UnicodeDigit
02047                                                 "\\p{Pc}";         # UnicodeConnectorPunctuation
02048                         $unicodeEscape = "\\\\u[0-9A-F-a-f]{4}";
02049                         $identifierRegex = "/^" .
02050                                            "(?:[$identifierStartChars]|$unicodeEscape)" .
02051                                            "(?:[$identifierPartChars]|$unicodeEscape)*" .
02052                                            "/uS";
02053                         if (preg_match($identifierRegex, $input, $match))
02054                         {
02055                             if (strpos($match[0], '\\') !== false) {
02056                                 // Per ECMA-262 edition 5.1, section 7.6 escape sequences should behave as if they were
02057                                 // the original chars, but only within the boundaries of the identifier.
02058                                 $decoded = preg_replace_callback('/\\\\u([0-9A-Fa-f]{4})/',
02059                                         array(__CLASS__, 'unicodeEscapeCallback'),
02060                                         $match[0]);
02061 
02062                                 // Since our original regex didn't de-escape the originals, we need to check for validity again.
02063                                 // No need to worry about token boundaries, as anything outside the identifier is illegal!
02064                                 if (!preg_match("/^[$identifierStartChars][$identifierPartChars]*$/u", $decoded)) {
02065                                     throw $this->newSyntaxError('Illegal token');
02066                                 }
02067 
02068                                 // Per spec it _ought_ to work to use these escapes for keywords words as well...
02069                                 // but IE rejects them as invalid, while Firefox and Chrome treat them as identifiers
02070                                 // that don't match the keyword.
02071                                 if (in_array($decoded, $this->keywords)) {
02072                                     throw $this->newSyntaxError('Illegal token');
02073                                 }
02074 
02075                                 // TODO: save the decoded form for output?
02076                             }
02077                         }
02078                         else
02079                             throw $this->newSyntaxError('Illegal token');
02080                     }
02081                     $tt = in_array($match[0], $this->keywords) ? $match[0] : TOKEN_IDENTIFIER;
02082             }
02083         }
02084 
02085         $this->tokenIndex = ($this->tokenIndex + 1) & 3;
02086 
02087         if (!isset($this->tokens[$this->tokenIndex]))
02088             $this->tokens[$this->tokenIndex] = new JSToken();
02089 
02090         $token = $this->tokens[$this->tokenIndex];
02091         $token->type = $tt;
02092 
02093         if ($tt == OP_ASSIGN)
02094             $token->assignOp = $op;
02095 
02096         $token->start = $this->cursor;
02097 
02098         $token->value = $match[0];
02099         $this->cursor += strlen($match[0]);
02100 
02101         $token->end = $this->cursor;
02102         $token->lineno = $this->lineno;
02103 
02104         return $tt;
02105     }
02106 
02107     public function unget()
02108     {
02109         if (++$this->lookahead == 4)
02110             throw $this->newSyntaxError('PANIC: too much lookahead!');
02111 
02112         $this->tokenIndex = ($this->tokenIndex - 1) & 3;
02113     }
02114 
02115     public function newSyntaxError($m)
02116     {
02117         return new Exception('Parse error: ' . $m . ' in file \'' . $this->filename . '\' on line ' . $this->lineno);
02118     }
02119 
02120     public static function unicodeEscapeCallback($m)
02121     {
02122         return html_entity_decode('&#x' . $m[1]. ';', ENT_QUOTES, 'UTF-8');
02123     }
02124 }
02125 
02126 class JSToken
02127 {
02128     public $type;
02129     public $value;
02130     public $start;
02131     public $end;
02132     public $lineno;
02133     public $assignOp;
02134 }