MediaWiki  REL1_19
JavaScriptMinifier.php
Go to the documentation of this file.
00001 <?php
00016 class JavaScriptMinifier {
00017 
00018         /* Class constants */
00019         /* Parsing states.
00020          * The state machine is only necessary to decide whether to parse a slash as division
00021          * operator or as regexp literal.
00022          * States are named after the next expected item. We only distinguish states when the
00023          * distinction is relevant for our purpose.
00024          */
00025         const STATEMENT                = 0;
00026         const CONDITION                = 1;
00027         const PROPERTY_ASSIGNMENT      = 2;
00028         const EXPRESSION               = 3;
00029         const EXPRESSION_NO_NL         = 4; // only relevant for semicolon insertion
00030         const EXPRESSION_OP            = 5;
00031         const EXPRESSION_FUNC          = 6;
00032         const EXPRESSION_TERNARY       = 7; // used to determine the role of a colon
00033         const EXPRESSION_TERNARY_OP    = 8;
00034         const EXPRESSION_TERNARY_FUNC  = 9;
00035         const PAREN_EXPRESSION         = 10; // expression which is not on the top level
00036         const PAREN_EXPRESSION_OP      = 11;
00037         const PAREN_EXPRESSION_FUNC    = 12;
00038         const PROPERTY_EXPRESSION      = 13; // expression which is within an object literal
00039         const PROPERTY_EXPRESSION_OP   = 14;
00040         const PROPERTY_EXPRESSION_FUNC = 15;
00041 
00042         /* Token types */
00043         const TYPE_UN_OP       = 1; // unary operators
00044         const TYPE_INCR_OP     = 2; // ++ and --
00045         const TYPE_BIN_OP      = 3; // binary operators
00046         const TYPE_ADD_OP      = 4; // + and - which can be either unary or binary ops
00047         const TYPE_HOOK        = 5; // ?
00048         const TYPE_COLON       = 6; // :
00049         const TYPE_COMMA       = 7; // ,
00050         const TYPE_SEMICOLON   = 8; // ;
00051         const TYPE_BRACE_OPEN  = 9; // {
00052         const TYPE_BRACE_CLOSE = 10; // }
00053         const TYPE_PAREN_OPEN  = 11; // ( and [
00054         const TYPE_PAREN_CLOSE = 12; // ) and ]
00055         const TYPE_RETURN      = 13; // keywords: break, continue, return, throw
00056         const TYPE_IF          = 14; // keywords: catch, for, with, switch, while, if
00057         const TYPE_DO          = 15; // keywords: case, var, finally, else, do, try
00058         const TYPE_FUNC        = 16; // keywords: function
00059         const TYPE_LITERAL     = 17; // all literals, identifiers and unrecognised tokens
00060         
00061         // Sanity limit to avoid excessive memory usage
00062         const STACK_LIMIT = 1000;
00063 
00064         /* Static functions */
00065 
00078         public static function minify( $s, $statementsOnOwnLine = false, $maxLineLength = 1000 ) {
00079                 // First we declare a few tables that contain our parsing rules
00080 
00081                 // $opChars : characters, which can be combined without whitespace in between them
00082                 $opChars = array(
00083                         '!' => true,
00084                         '"' => true,
00085                         '%' => true,
00086                         '&' => true,
00087                         "'" => true,
00088                         '(' => true,
00089                         ')' => true,
00090                         '*' => true,
00091                         '+' => true,
00092                         ',' => true,
00093                         '-' => true,
00094                         '.' => true,
00095                         '/' => true,
00096                         ':' => true,
00097                         ';' => true,
00098                         '<' => true,
00099                         '=' => true,
00100                         '>' => true,
00101                         '?' => true,
00102                         '[' => true,
00103                         ']' => true,
00104                         '^' => true,
00105                         '{' => true,
00106                         '|' => true,
00107                         '}' => true,
00108                         '~' => true
00109                 );
00110 
00111                 // $tokenTypes : maps keywords and operators to their corresponding token type
00112                 $tokenTypes = array(
00113                         '!'          => self::TYPE_UN_OP,
00114                         '~'          => self::TYPE_UN_OP,
00115                         'delete'     => self::TYPE_UN_OP,
00116                         'new'        => self::TYPE_UN_OP,
00117                         'typeof'     => self::TYPE_UN_OP,
00118                         'void'       => self::TYPE_UN_OP,
00119                         '++'         => self::TYPE_INCR_OP,
00120                         '--'         => self::TYPE_INCR_OP,
00121                         '!='         => self::TYPE_BIN_OP,
00122                         '!=='        => self::TYPE_BIN_OP,
00123                         '%'          => self::TYPE_BIN_OP,
00124                         '%='         => self::TYPE_BIN_OP,
00125                         '&'          => self::TYPE_BIN_OP,
00126                         '&&'         => self::TYPE_BIN_OP,
00127                         '&='         => self::TYPE_BIN_OP,
00128                         '*'          => self::TYPE_BIN_OP,
00129                         '*='         => self::TYPE_BIN_OP,
00130                         '+='         => self::TYPE_BIN_OP,
00131                         '-='         => self::TYPE_BIN_OP,
00132                         '.'          => self::TYPE_BIN_OP,
00133                         '/'          => self::TYPE_BIN_OP,
00134                         '/='         => self::TYPE_BIN_OP,
00135                         '<'          => self::TYPE_BIN_OP,
00136                         '<<'         => self::TYPE_BIN_OP,
00137                         '<<='        => self::TYPE_BIN_OP,
00138                         '<='         => self::TYPE_BIN_OP,
00139                         '='          => self::TYPE_BIN_OP,
00140                         '=='         => self::TYPE_BIN_OP,
00141                         '==='        => self::TYPE_BIN_OP,
00142                         '>'          => self::TYPE_BIN_OP,
00143                         '>='         => self::TYPE_BIN_OP,
00144                         '>>'         => self::TYPE_BIN_OP,
00145                         '>>='        => self::TYPE_BIN_OP,
00146                         '>>>'        => self::TYPE_BIN_OP,
00147                         '>>>='       => self::TYPE_BIN_OP,
00148                         '^'          => self::TYPE_BIN_OP,
00149                         '^='         => self::TYPE_BIN_OP,
00150                         '|'          => self::TYPE_BIN_OP,
00151                         '|='         => self::TYPE_BIN_OP,
00152                         '||'         => self::TYPE_BIN_OP,
00153                         'in'         => self::TYPE_BIN_OP,
00154                         'instanceof' => self::TYPE_BIN_OP,
00155                         '+'          => self::TYPE_ADD_OP,
00156                         '-'          => self::TYPE_ADD_OP,
00157                         '?'          => self::TYPE_HOOK,
00158                         ':'          => self::TYPE_COLON,
00159                         ','          => self::TYPE_COMMA,
00160                         ';'          => self::TYPE_SEMICOLON,
00161                         '{'          => self::TYPE_BRACE_OPEN,
00162                         '}'          => self::TYPE_BRACE_CLOSE,
00163                         '('          => self::TYPE_PAREN_OPEN,
00164                         '['          => self::TYPE_PAREN_OPEN,
00165                         ')'          => self::TYPE_PAREN_CLOSE,
00166                         ']'          => self::TYPE_PAREN_CLOSE,
00167                         'break'      => self::TYPE_RETURN,
00168                         'continue'   => self::TYPE_RETURN,
00169                         'return'     => self::TYPE_RETURN,
00170                         'throw'      => self::TYPE_RETURN,
00171                         'catch'      => self::TYPE_IF,
00172                         'for'        => self::TYPE_IF,
00173                         'if'         => self::TYPE_IF,
00174                         'switch'     => self::TYPE_IF,
00175                         'while'      => self::TYPE_IF,
00176                         'with'       => self::TYPE_IF,
00177                         'case'       => self::TYPE_DO,
00178                         'do'         => self::TYPE_DO,
00179                         'else'       => self::TYPE_DO,
00180                         'finally'    => self::TYPE_DO,
00181                         'try'        => self::TYPE_DO,
00182                         'var'        => self::TYPE_DO,
00183                         'function'   => self::TYPE_FUNC
00184                 );
00185 
00186                 // $goto : This is the main table for our state machine. For every state/token pair
00187                 //         the following state is defined. When no rule exists for a given pair,
00188                 //         the state is left unchanged.
00189                 $goto = array(
00190                         self::STATEMENT => array(
00191                                 self::TYPE_UN_OP      => self::EXPRESSION,
00192                                 self::TYPE_INCR_OP    => self::EXPRESSION,
00193                                 self::TYPE_ADD_OP     => self::EXPRESSION,
00194                                 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION,
00195                                 self::TYPE_RETURN     => self::EXPRESSION_NO_NL,
00196                                 self::TYPE_IF         => self::CONDITION,
00197                                 self::TYPE_FUNC       => self::CONDITION,
00198                                 self::TYPE_LITERAL    => self::EXPRESSION_OP
00199                         ),
00200                         self::CONDITION => array(
00201                                 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION
00202                         ),
00203                         self::PROPERTY_ASSIGNMENT => array(
00204                                 self::TYPE_COLON      => self::PROPERTY_EXPRESSION,
00205                                 self::TYPE_BRACE_OPEN => self::STATEMENT
00206                         ),
00207                         self::EXPRESSION => array(
00208                                 self::TYPE_SEMICOLON  => self::STATEMENT,
00209                                 self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT,
00210                                 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION,
00211                                 self::TYPE_FUNC       => self::EXPRESSION_FUNC,
00212                                 self::TYPE_LITERAL    => self::EXPRESSION_OP
00213                         ),
00214                         self::EXPRESSION_NO_NL => array(
00215                                 self::TYPE_SEMICOLON  => self::STATEMENT,
00216                                 self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT,
00217                                 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION,
00218                                 self::TYPE_FUNC       => self::EXPRESSION_FUNC,
00219                                 self::TYPE_LITERAL    => self::EXPRESSION_OP
00220                         ),
00221                         self::EXPRESSION_OP => array(
00222                                 self::TYPE_BIN_OP     => self::EXPRESSION,
00223                                 self::TYPE_ADD_OP     => self::EXPRESSION,
00224                                 self::TYPE_HOOK       => self::EXPRESSION_TERNARY,
00225                                 self::TYPE_COLON      => self::STATEMENT,
00226                                 self::TYPE_COMMA      => self::EXPRESSION,
00227                                 self::TYPE_SEMICOLON  => self::STATEMENT,
00228                                 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION
00229                         ),
00230                         self::EXPRESSION_FUNC => array(
00231                                 self::TYPE_BRACE_OPEN => self::STATEMENT
00232                         ),
00233                         self::EXPRESSION_TERNARY => array(
00234                                 self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT,
00235                                 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION,
00236                                 self::TYPE_FUNC       => self::EXPRESSION_TERNARY_FUNC,
00237                                 self::TYPE_LITERAL    => self::EXPRESSION_TERNARY_OP
00238                         ),
00239                         self::EXPRESSION_TERNARY_OP => array(
00240                                 self::TYPE_BIN_OP     => self::EXPRESSION_TERNARY,
00241                                 self::TYPE_ADD_OP     => self::EXPRESSION_TERNARY,
00242                                 self::TYPE_HOOK       => self::EXPRESSION_TERNARY,
00243                                 self::TYPE_COMMA      => self::EXPRESSION_TERNARY,
00244                                 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION
00245                         ),
00246                         self::EXPRESSION_TERNARY_FUNC => array(
00247                                 self::TYPE_BRACE_OPEN => self::STATEMENT
00248                         ),
00249                         self::PAREN_EXPRESSION => array(
00250                                 self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT,
00251                                 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION,
00252                                 self::TYPE_FUNC       => self::PAREN_EXPRESSION_FUNC,
00253                                 self::TYPE_LITERAL    => self::PAREN_EXPRESSION_OP
00254                         ),
00255                         self::PAREN_EXPRESSION_OP => array(
00256                                 self::TYPE_BIN_OP     => self::PAREN_EXPRESSION,
00257                                 self::TYPE_ADD_OP     => self::PAREN_EXPRESSION,
00258                                 self::TYPE_HOOK       => self::PAREN_EXPRESSION,
00259                                 self::TYPE_COLON      => self::PAREN_EXPRESSION,
00260                                 self::TYPE_COMMA      => self::PAREN_EXPRESSION,
00261                                 self::TYPE_SEMICOLON  => self::PAREN_EXPRESSION,
00262                                 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION
00263                         ),
00264                         self::PAREN_EXPRESSION_FUNC => array(
00265                                 self::TYPE_BRACE_OPEN => self::STATEMENT
00266                         ),
00267                         self::PROPERTY_EXPRESSION => array(
00268                                 self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT,
00269                                 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION,
00270                                 self::TYPE_FUNC       => self::PROPERTY_EXPRESSION_FUNC,
00271                                 self::TYPE_LITERAL    => self::PROPERTY_EXPRESSION_OP
00272                         ),
00273                         self::PROPERTY_EXPRESSION_OP => array(
00274                                 self::TYPE_BIN_OP     => self::PROPERTY_EXPRESSION,
00275                                 self::TYPE_ADD_OP     => self::PROPERTY_EXPRESSION,
00276                                 self::TYPE_HOOK       => self::PROPERTY_EXPRESSION,
00277                                 self::TYPE_COMMA      => self::PROPERTY_ASSIGNMENT,
00278                                 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION
00279                         ),
00280                         self::PROPERTY_EXPRESSION_FUNC => array(
00281                                 self::TYPE_BRACE_OPEN => self::STATEMENT
00282                         )
00283                 );
00284 
00285                 // $push : This table contains the rules for when to push a state onto the stack.
00286                 //         The pushed state is the state to return to when the corresponding
00287                 //         closing token is found
00288                 $push = array(
00289                         self::STATEMENT => array(
00290                                 self::TYPE_BRACE_OPEN => self::STATEMENT,
00291                                 self::TYPE_PAREN_OPEN => self::EXPRESSION_OP
00292                         ),
00293                         self::CONDITION => array(
00294                                 self::TYPE_PAREN_OPEN => self::STATEMENT
00295                         ),
00296                         self::PROPERTY_ASSIGNMENT => array(
00297                                 self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT
00298                         ),
00299                         self::EXPRESSION => array(
00300                                 self::TYPE_BRACE_OPEN => self::EXPRESSION_OP,
00301                                 self::TYPE_PAREN_OPEN => self::EXPRESSION_OP
00302                         ),
00303                         self::EXPRESSION_NO_NL => array(
00304                                 self::TYPE_BRACE_OPEN => self::EXPRESSION_OP,
00305                                 self::TYPE_PAREN_OPEN => self::EXPRESSION_OP
00306                         ),
00307                         self::EXPRESSION_OP => array(
00308                                 self::TYPE_HOOK       => self::EXPRESSION,
00309                                 self::TYPE_PAREN_OPEN => self::EXPRESSION_OP
00310                         ),
00311                         self::EXPRESSION_FUNC => array(
00312                                 self::TYPE_BRACE_OPEN => self::EXPRESSION_OP
00313                         ),
00314                         self::EXPRESSION_TERNARY => array(
00315                                 self::TYPE_BRACE_OPEN => self::EXPRESSION_TERNARY_OP,
00316                                 self::TYPE_PAREN_OPEN => self::EXPRESSION_TERNARY_OP
00317                         ),
00318                         self::EXPRESSION_TERNARY_OP => array(
00319                                 self::TYPE_HOOK       => self::EXPRESSION_TERNARY,
00320                                 self::TYPE_PAREN_OPEN => self::EXPRESSION_TERNARY_OP
00321                         ),
00322                         self::EXPRESSION_TERNARY_FUNC => array(
00323                                 self::TYPE_BRACE_OPEN => self::EXPRESSION_TERNARY_OP
00324                         ),
00325                         self::PAREN_EXPRESSION => array(
00326                                 self::TYPE_BRACE_OPEN => self::PAREN_EXPRESSION_OP,
00327                                 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION_OP
00328                         ),
00329                         self::PAREN_EXPRESSION_OP => array(
00330                                 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION_OP
00331                         ),
00332                         self::PAREN_EXPRESSION_FUNC => array(
00333                                 self::TYPE_BRACE_OPEN => self::PAREN_EXPRESSION_OP
00334                         ),
00335                         self::PROPERTY_EXPRESSION => array(
00336                                 self::TYPE_BRACE_OPEN => self::PROPERTY_EXPRESSION_OP,
00337                                 self::TYPE_PAREN_OPEN => self::PROPERTY_EXPRESSION_OP
00338                         ),
00339                         self::PROPERTY_EXPRESSION_OP => array(
00340                                 self::TYPE_PAREN_OPEN => self::PROPERTY_EXPRESSION_OP
00341                         ),
00342                         self::PROPERTY_EXPRESSION_FUNC => array(
00343                                 self::TYPE_BRACE_OPEN => self::PROPERTY_EXPRESSION_OP
00344                         )
00345                 );
00346 
00347                 // $pop : Rules for when to pop a state from the stack
00348                 $pop = array(
00349                         self::STATEMENT              => array( self::TYPE_BRACE_CLOSE => true ),
00350                         self::PROPERTY_ASSIGNMENT    => array( self::TYPE_BRACE_CLOSE => true ),
00351                         self::EXPRESSION             => array( self::TYPE_BRACE_CLOSE => true ),
00352                         self::EXPRESSION_NO_NL       => array( self::TYPE_BRACE_CLOSE => true ),
00353                         self::EXPRESSION_OP          => array( self::TYPE_BRACE_CLOSE => true ),
00354                         self::EXPRESSION_TERNARY_OP  => array( self::TYPE_COLON       => true ),
00355                         self::PAREN_EXPRESSION       => array( self::TYPE_PAREN_CLOSE => true ),
00356                         self::PAREN_EXPRESSION_OP    => array( self::TYPE_PAREN_CLOSE => true ),
00357                         self::PROPERTY_EXPRESSION    => array( self::TYPE_BRACE_CLOSE => true ),
00358                         self::PROPERTY_EXPRESSION_OP => array( self::TYPE_BRACE_CLOSE => true )
00359                 );
00360 
00361                 // $semicolon : Rules for when a semicolon insertion is appropriate
00362                 $semicolon = array(
00363                         self::EXPRESSION_NO_NL => array(
00364                                 self::TYPE_UN_OP      => true,
00365                                 self::TYPE_INCR_OP    => true,
00366                                 self::TYPE_ADD_OP     => true,
00367                                 self::TYPE_BRACE_OPEN => true,
00368                                 self::TYPE_PAREN_OPEN => true,
00369                                 self::TYPE_RETURN     => true,
00370                                 self::TYPE_IF         => true,
00371                                 self::TYPE_DO         => true,
00372                                 self::TYPE_FUNC       => true,
00373                                 self::TYPE_LITERAL    => true
00374                         ),
00375                         self::EXPRESSION_OP => array(
00376                                 self::TYPE_UN_OP      => true,
00377                                 self::TYPE_INCR_OP    => true,
00378                                 self::TYPE_BRACE_OPEN => true,
00379                                 self::TYPE_RETURN     => true,
00380                                 self::TYPE_IF         => true,
00381                                 self::TYPE_DO         => true,
00382                                 self::TYPE_FUNC       => true,
00383                                 self::TYPE_LITERAL    => true
00384                         )
00385                 );
00386                 
00387                 // Rules for when newlines should be inserted if
00388                 // $statementsOnOwnLine is enabled.
00389                 // $newlineBefore is checked before switching state,
00390                 // $newlineAfter is checked after
00391                 $newlineBefore = array(
00392                         self::STATEMENT => array(
00393                                 self::TYPE_BRACE_CLOSE => true,
00394                         ),
00395                 );
00396                 $newlineAfter = array(
00397                         self::STATEMENT => array(
00398                                 self::TYPE_BRACE_OPEN => true,
00399                                 self::TYPE_PAREN_CLOSE => true,
00400                                 self::TYPE_SEMICOLON => true,
00401                         ),
00402                 );
00403 
00404                 // $divStates : Contains all states that can be followed by a division operator
00405                 $divStates = array(
00406                         self::EXPRESSION_OP          => true,
00407                         self::EXPRESSION_TERNARY_OP  => true,
00408                         self::PAREN_EXPRESSION_OP    => true,
00409                         self::PROPERTY_EXPRESSION_OP => true
00410                 );
00411 
00412                 // Here's where the minifying takes place: Loop through the input, looking for tokens
00413                 // and output them to $out, taking actions to the above defined rules when appropriate.
00414                 $out = '';
00415                 $pos = 0;
00416                 $length = strlen( $s );
00417                 $lineLength = 0;
00418                 $newlineFound = true;
00419                 $state = self::STATEMENT;
00420                 $stack = array();
00421                 $last = ';'; // Pretend that we have seen a semicolon yet
00422                 while( $pos < $length ) {
00423                         // First, skip over any whitespace and multiline comments, recording whether we
00424                         // found any newline character
00425                         $skip = strspn( $s, " \t\n\r\xb\xc", $pos );
00426                         if( !$skip ) {
00427                                 $ch = $s[$pos];
00428                                 if( $ch === '/' && substr( $s, $pos, 2 ) === '/*' ) {
00429                                         // Multiline comment. Search for the end token or EOT.
00430                                         $end = strpos( $s, '*/', $pos + 2 );
00431                                         $skip = $end === false ? $length - $pos : $end - $pos + 2;
00432                                 }
00433                         }
00434                         if( $skip ) {
00435                                 // The semicolon insertion mechanism needs to know whether there was a newline
00436                                 // between two tokens, so record it now.
00437                                 if( !$newlineFound && strcspn( $s, "\r\n", $pos, $skip ) !== $skip ) {
00438                                         $newlineFound = true;
00439                                 }
00440                                 $pos += $skip;
00441                                 continue;
00442                         }
00443                         // Handle C++-style comments and html comments, which are treated as single line
00444                         // comments by the browser, regardless of whether the end tag is on the same line.
00445                         // Handle --> the same way, but only if it's at the beginning of the line
00446                         if( ( $ch === '/' && substr( $s, $pos, 2 ) === '//' )
00447                                 || ( $ch === '<' && substr( $s, $pos, 4 ) === '<!--' )
00448                                 || ( $ch === '-' && $newlineFound && substr( $s, $pos, 3 ) === '-->' )
00449                         ) {
00450                                 $pos += strcspn( $s, "\r\n", $pos );
00451                                 continue;
00452                         }
00453 
00454                         // Find out which kind of token we're handling. $end will point past the end of it.
00455                         $end = $pos + 1;
00456                         // Handle string literals
00457                         if( $ch === "'" || $ch === '"' ) {
00458                                 // Search to the end of the string literal, skipping over backslash escapes
00459                                 $search = $ch . '\\';
00460                                 do{
00461                                         $end += strcspn( $s, $search, $end ) + 2;
00462                                 } while( $end - 2 < $length && $s[$end - 2] === '\\' );
00463                                 $end--;
00464                         // We have to distinguish between regexp literals and division operators
00465                         // A division operator is only possible in certain states
00466                         } elseif( $ch === '/' && !isset( $divStates[$state] ) ) {
00467                                 // Regexp literal, search to the end, skipping over backslash escapes and
00468                                 // character classes
00469                                 for( ; ; ) {
00470                                         do{
00471                                                 $end += strcspn( $s, '/[\\', $end ) + 2;
00472                                         } while( $end - 2 < $length && $s[$end - 2] === '\\' );
00473                                         $end--;
00474                                         if( $end - 1 >= $length || $s[$end - 1] === '/' ) {
00475                                                 break;
00476                                         }
00477                                         do{
00478                                                 $end += strcspn( $s, ']\\', $end ) + 2;
00479                                         } while( $end - 2 < $length && $s[$end - 2] === '\\' );
00480                                         $end--;
00481                                 };
00482                                 // Search past the regexp modifiers (gi)
00483                                 while( $end < $length && ctype_alpha( $s[$end] ) ) {
00484                                         $end++;
00485                                 }
00486                         } elseif(
00487                                 $ch === '0'
00488                                 && ($pos + 1 < $length) && ($s[$pos + 1] === 'x' || $s[$pos + 1] === 'X' )
00489                         ) {
00490                                 // Hex numeric literal
00491                                 $end++; // x or X
00492                                 $len = strspn( $s, '0123456789ABCDEFabcdef', $end );
00493                                 if ( !$len ) {
00494                                         return self::parseError($s, $pos, 'Expected a hexadecimal number but found ' . substr( $s, $pos, 5 ) . '...' );
00495                                 }
00496                                 $end += $len;
00497                         } elseif(
00498                                 ctype_digit( $ch )
00499                                 || ( $ch === '.' && $pos + 1 < $length && ctype_digit( $s[$pos + 1] ) )
00500                         ) {
00501                                 $end += strspn( $s, '0123456789', $end );
00502                                 $decimal = strspn( $s, '.', $end );
00503                                 if ($decimal) {
00504                                         if ( $decimal > 2 ) {
00505                                                 return self::parseError($s, $end, 'The number has too many decimal points' );
00506                                         }
00507                                         $end += strspn( $s, '0123456789', $end + 1 ) + $decimal;
00508                                 }
00509                                 $exponent = strspn( $s, 'eE', $end );
00510                                 if( $exponent ) {
00511                                         if ( $exponent > 1 ) {
00512                                                 return self::parseError($s, $end, 'Number with several E' );
00513                                         }
00514                                         $end++;
00515                                         
00516                                         // + sign is optional; - sign is required.
00517                                         $end += strspn( $s, '-+', $end );
00518                                         $len = strspn( $s, '0123456789', $end );
00519                                         if ( !$len ) {
00520                                                 return self::parseError($s, $pos, 'No decimal digits after e, how many zeroes should be added?' );
00521                                         }
00522                                         $end += $len;
00523                                 }
00524                         } elseif( isset( $opChars[$ch] ) ) {
00525                                 // Punctuation character. Search for the longest matching operator.
00526                                 while(
00527                                         $end < $length
00528                                         && isset( $tokenTypes[substr( $s, $pos, $end - $pos + 1 )] )
00529                                 ) {
00530                                         $end++;
00531                                 }
00532                         } else {
00533                                 // Identifier or reserved word. Search for the end by excluding whitespace and
00534                                 // punctuation.
00535                                 $end += strcspn( $s, " \t\n.;,=<>+-{}()[]?:*/%'\"!&|^~\xb\xc\r", $end );
00536                         }
00537 
00538                         // Now get the token type from our type array
00539                         $token = substr( $s, $pos, $end - $pos ); // so $end - $pos == strlen( $token )
00540                         $type = isset( $tokenTypes[$token] ) ? $tokenTypes[$token] : self::TYPE_LITERAL;
00541 
00542                         if( $newlineFound && isset( $semicolon[$state][$type] ) ) {
00543                                 // This token triggers the semicolon insertion mechanism of javascript. While we
00544                                 // could add the ; token here ourselves, keeping the newline has a few advantages.
00545                                 $out .= "\n";
00546                                 $state = self::STATEMENT;
00547                                 $lineLength = 0;
00548                         } elseif( $maxLineLength > 0 && $lineLength + $end - $pos > $maxLineLength &&
00549                                         !isset( $semicolon[$state][$type] ) && $type !== self::TYPE_INCR_OP )
00550                         {
00551                                 // This line would get too long if we added $token, so add a newline first.
00552                                 // Only do this if it won't trigger semicolon insertion and if it won't
00553                                 // put a postfix increment operator on its own line, which is illegal in js.
00554                                 $out .= "\n";
00555                                 $lineLength = 0;
00556                         // Check, whether we have to separate the token from the last one with whitespace
00557                         } elseif( !isset( $opChars[$last] ) && !isset( $opChars[$ch] ) ) {
00558                                 $out .= ' ';
00559                                 $lineLength++;
00560                         // Don't accidentally create ++, -- or // tokens
00561                         } elseif( $last === $ch && ( $ch === '+' || $ch === '-' || $ch === '/' ) ) {
00562                                 $out .= ' ';
00563                                 $lineLength++;
00564                         }
00565                         
00566                         $out .= $token;
00567                         $lineLength += $end - $pos; // += strlen( $token )
00568                         $last = $s[$end - 1];
00569                         $pos = $end;
00570                         $newlineFound = false;
00571                         
00572                         // Output a newline after the token if required
00573                         // This is checked before AND after switching state
00574                         $newlineAdded = false;
00575                         if ( $statementsOnOwnLine && !$newlineAdded && isset( $newlineBefore[$state][$type] ) ) {
00576                                 $out .= "\n";
00577                                 $lineLength = 0;
00578                                 $newlineAdded = true;
00579                         }
00580 
00581                         // Now that we have output our token, transition into the new state.
00582                         if( isset( $push[$state][$type] ) && count( $stack ) < self::STACK_LIMIT ) {
00583                                 $stack[] = $push[$state][$type];
00584                         }
00585                         if( $stack && isset( $pop[$state][$type] ) ) {
00586                                 $state = array_pop( $stack );
00587                         } elseif( isset( $goto[$state][$type] ) ) {
00588                                 $state = $goto[$state][$type];
00589                         }
00590                         
00591                         // Check for newline insertion again
00592                         if ( $statementsOnOwnLine && !$newlineAdded && isset( $newlineAfter[$state][$type] ) ) {
00593                                 $out .= "\n";
00594                                 $lineLength = 0;
00595                         }
00596                 }
00597                 return $out;
00598         }
00599         
00600         static function parseError($fullJavascript, $position, $errorMsg) {
00601                 // TODO: Handle the error: trigger_error, throw exception, return false...
00602                 return false;
00603         }
00604 }