MediaWiki  REL1_22
JavaScriptMinifier.php
Go to the documentation of this file.
00001 <?php
00018 class JavaScriptMinifier {
00019 
00020     /* Class constants */
00021     /* Parsing states.
00022      * The state machine is only necessary to decide whether to parse a slash as division
00023      * operator or as regexp literal.
00024      * States are named after the next expected item. We only distinguish states when the
00025      * distinction is relevant for our purpose.
00026      */
00027     const STATEMENT                = 0;
00028     const CONDITION                = 1;
00029     const PROPERTY_ASSIGNMENT      = 2;
00030     const EXPRESSION               = 3;
00031     const EXPRESSION_NO_NL         = 4; // only relevant for semicolon insertion
00032     const EXPRESSION_OP            = 5;
00033     const EXPRESSION_FUNC          = 6;
00034     const EXPRESSION_TERNARY       = 7; // used to determine the role of a colon
00035     const EXPRESSION_TERNARY_OP    = 8;
00036     const EXPRESSION_TERNARY_FUNC  = 9;
00037     const PAREN_EXPRESSION         = 10; // expression which is not on the top level
00038     const PAREN_EXPRESSION_OP      = 11;
00039     const PAREN_EXPRESSION_FUNC    = 12;
00040     const PROPERTY_EXPRESSION      = 13; // expression which is within an object literal
00041     const PROPERTY_EXPRESSION_OP   = 14;
00042     const PROPERTY_EXPRESSION_FUNC = 15;
00043 
00044     /* Token types */
00045     const TYPE_UN_OP       = 1; // unary operators
00046     const TYPE_INCR_OP     = 2; // ++ and --
00047     const TYPE_BIN_OP      = 3; // binary operators
00048     const TYPE_ADD_OP      = 4; // + and - which can be either unary or binary ops
00049     const TYPE_HOOK        = 5; // ?
00050     const TYPE_COLON       = 6; // :
00051     const TYPE_COMMA       = 7; // ,
00052     const TYPE_SEMICOLON   = 8; // ;
00053     const TYPE_BRACE_OPEN  = 9; // {
00054     const TYPE_BRACE_CLOSE = 10; // }
00055     const TYPE_PAREN_OPEN  = 11; // ( and [
00056     const TYPE_PAREN_CLOSE = 12; // ) and ]
00057     const TYPE_RETURN      = 13; // keywords: break, continue, return, throw
00058     const TYPE_IF          = 14; // keywords: catch, for, with, switch, while, if
00059     const TYPE_DO          = 15; // keywords: case, var, finally, else, do, try
00060     const TYPE_FUNC        = 16; // keywords: function
00061     const TYPE_LITERAL     = 17; // all literals, identifiers and unrecognised tokens
00062 
00063     // Sanity limit to avoid excessive memory usage
00064     const STACK_LIMIT = 1000;
00065 
00066     /* Static functions */
00067 
00080     public static function minify( $s, $statementsOnOwnLine = false, $maxLineLength = 1000 ) {
00081         // First we declare a few tables that contain our parsing rules
00082 
00083         // $opChars : characters, which can be combined without whitespace in between them
00084         $opChars = array(
00085             '!' => true,
00086             '"' => true,
00087             '%' => true,
00088             '&' => true,
00089             "'" => true,
00090             '(' => true,
00091             ')' => true,
00092             '*' => true,
00093             '+' => true,
00094             ',' => true,
00095             '-' => true,
00096             '.' => true,
00097             '/' => true,
00098             ':' => true,
00099             ';' => true,
00100             '<' => true,
00101             '=' => true,
00102             '>' => true,
00103             '?' => true,
00104             '[' => true,
00105             ']' => true,
00106             '^' => true,
00107             '{' => true,
00108             '|' => true,
00109             '}' => true,
00110             '~' => true
00111         );
00112 
00113         // $tokenTypes : maps keywords and operators to their corresponding token type
00114         $tokenTypes = array(
00115             '!'          => self::TYPE_UN_OP,
00116             '~'          => self::TYPE_UN_OP,
00117             'delete'     => self::TYPE_UN_OP,
00118             'new'        => self::TYPE_UN_OP,
00119             'typeof'     => self::TYPE_UN_OP,
00120             'void'       => self::TYPE_UN_OP,
00121             '++'         => self::TYPE_INCR_OP,
00122             '--'         => self::TYPE_INCR_OP,
00123             '!='         => self::TYPE_BIN_OP,
00124             '!=='        => self::TYPE_BIN_OP,
00125             '%'          => self::TYPE_BIN_OP,
00126             '%='         => self::TYPE_BIN_OP,
00127             '&'          => self::TYPE_BIN_OP,
00128             '&&'         => self::TYPE_BIN_OP,
00129             '&='         => self::TYPE_BIN_OP,
00130             '*'          => self::TYPE_BIN_OP,
00131             '*='         => self::TYPE_BIN_OP,
00132             '+='         => self::TYPE_BIN_OP,
00133             '-='         => self::TYPE_BIN_OP,
00134             '.'          => self::TYPE_BIN_OP,
00135             '/'          => self::TYPE_BIN_OP,
00136             '/='         => self::TYPE_BIN_OP,
00137             '<'          => self::TYPE_BIN_OP,
00138             '<<'         => self::TYPE_BIN_OP,
00139             '<<='        => self::TYPE_BIN_OP,
00140             '<='         => self::TYPE_BIN_OP,
00141             '='          => self::TYPE_BIN_OP,
00142             '=='         => self::TYPE_BIN_OP,
00143             '==='        => self::TYPE_BIN_OP,
00144             '>'          => self::TYPE_BIN_OP,
00145             '>='         => self::TYPE_BIN_OP,
00146             '>>'         => self::TYPE_BIN_OP,
00147             '>>='        => self::TYPE_BIN_OP,
00148             '>>>'        => self::TYPE_BIN_OP,
00149             '>>>='       => self::TYPE_BIN_OP,
00150             '^'          => self::TYPE_BIN_OP,
00151             '^='         => self::TYPE_BIN_OP,
00152             '|'          => self::TYPE_BIN_OP,
00153             '|='         => self::TYPE_BIN_OP,
00154             '||'         => self::TYPE_BIN_OP,
00155             'in'         => self::TYPE_BIN_OP,
00156             'instanceof' => self::TYPE_BIN_OP,
00157             '+'          => self::TYPE_ADD_OP,
00158             '-'          => self::TYPE_ADD_OP,
00159             '?'          => self::TYPE_HOOK,
00160             ':'          => self::TYPE_COLON,
00161             ','          => self::TYPE_COMMA,
00162             ';'          => self::TYPE_SEMICOLON,
00163             '{'          => self::TYPE_BRACE_OPEN,
00164             '}'          => self::TYPE_BRACE_CLOSE,
00165             '('          => self::TYPE_PAREN_OPEN,
00166             '['          => self::TYPE_PAREN_OPEN,
00167             ')'          => self::TYPE_PAREN_CLOSE,
00168             ']'          => self::TYPE_PAREN_CLOSE,
00169             'break'      => self::TYPE_RETURN,
00170             'continue'   => self::TYPE_RETURN,
00171             'return'     => self::TYPE_RETURN,
00172             'throw'      => self::TYPE_RETURN,
00173             'catch'      => self::TYPE_IF,
00174             'for'        => self::TYPE_IF,
00175             'if'         => self::TYPE_IF,
00176             'switch'     => self::TYPE_IF,
00177             'while'      => self::TYPE_IF,
00178             'with'       => self::TYPE_IF,
00179             'case'       => self::TYPE_DO,
00180             'do'         => self::TYPE_DO,
00181             'else'       => self::TYPE_DO,
00182             'finally'    => self::TYPE_DO,
00183             'try'        => self::TYPE_DO,
00184             'var'        => self::TYPE_DO,
00185             'function'   => self::TYPE_FUNC
00186         );
00187 
00188         // $goto : This is the main table for our state machine. For every state/token pair
00189         //         the following state is defined. When no rule exists for a given pair,
00190         //         the state is left unchanged.
00191         $goto = array(
00192             self::STATEMENT => array(
00193                 self::TYPE_UN_OP      => self::EXPRESSION,
00194                 self::TYPE_INCR_OP    => self::EXPRESSION,
00195                 self::TYPE_ADD_OP     => self::EXPRESSION,
00196                 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION,
00197                 self::TYPE_RETURN     => self::EXPRESSION_NO_NL,
00198                 self::TYPE_IF         => self::CONDITION,
00199                 self::TYPE_FUNC       => self::CONDITION,
00200                 self::TYPE_LITERAL    => self::EXPRESSION_OP
00201             ),
00202             self::CONDITION => array(
00203                 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION
00204             ),
00205             self::PROPERTY_ASSIGNMENT => array(
00206                 self::TYPE_COLON      => self::PROPERTY_EXPRESSION,
00207                 self::TYPE_BRACE_OPEN => self::STATEMENT
00208             ),
00209             self::EXPRESSION => array(
00210                 self::TYPE_SEMICOLON  => self::STATEMENT,
00211                 self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT,
00212                 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION,
00213                 self::TYPE_FUNC       => self::EXPRESSION_FUNC,
00214                 self::TYPE_LITERAL    => self::EXPRESSION_OP
00215             ),
00216             self::EXPRESSION_NO_NL => array(
00217                 self::TYPE_SEMICOLON  => self::STATEMENT,
00218                 self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT,
00219                 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION,
00220                 self::TYPE_FUNC       => self::EXPRESSION_FUNC,
00221                 self::TYPE_LITERAL    => self::EXPRESSION_OP
00222             ),
00223             self::EXPRESSION_OP => array(
00224                 self::TYPE_BIN_OP     => self::EXPRESSION,
00225                 self::TYPE_ADD_OP     => self::EXPRESSION,
00226                 self::TYPE_HOOK       => self::EXPRESSION_TERNARY,
00227                 self::TYPE_COLON      => self::STATEMENT,
00228                 self::TYPE_COMMA      => self::EXPRESSION,
00229                 self::TYPE_SEMICOLON  => self::STATEMENT,
00230                 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION
00231             ),
00232             self::EXPRESSION_FUNC => array(
00233                 self::TYPE_BRACE_OPEN => self::STATEMENT
00234             ),
00235             self::EXPRESSION_TERNARY => array(
00236                 self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT,
00237                 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION,
00238                 self::TYPE_FUNC       => self::EXPRESSION_TERNARY_FUNC,
00239                 self::TYPE_LITERAL    => self::EXPRESSION_TERNARY_OP
00240             ),
00241             self::EXPRESSION_TERNARY_OP => array(
00242                 self::TYPE_BIN_OP     => self::EXPRESSION_TERNARY,
00243                 self::TYPE_ADD_OP     => self::EXPRESSION_TERNARY,
00244                 self::TYPE_HOOK       => self::EXPRESSION_TERNARY,
00245                 self::TYPE_COMMA      => self::EXPRESSION_TERNARY,
00246                 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION
00247             ),
00248             self::EXPRESSION_TERNARY_FUNC => array(
00249                 self::TYPE_BRACE_OPEN => self::STATEMENT
00250             ),
00251             self::PAREN_EXPRESSION => array(
00252                 self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT,
00253                 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION,
00254                 self::TYPE_FUNC       => self::PAREN_EXPRESSION_FUNC,
00255                 self::TYPE_LITERAL    => self::PAREN_EXPRESSION_OP
00256             ),
00257             self::PAREN_EXPRESSION_OP => array(
00258                 self::TYPE_BIN_OP     => self::PAREN_EXPRESSION,
00259                 self::TYPE_ADD_OP     => self::PAREN_EXPRESSION,
00260                 self::TYPE_HOOK       => self::PAREN_EXPRESSION,
00261                 self::TYPE_COLON      => self::PAREN_EXPRESSION,
00262                 self::TYPE_COMMA      => self::PAREN_EXPRESSION,
00263                 self::TYPE_SEMICOLON  => self::PAREN_EXPRESSION,
00264                 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION
00265             ),
00266             self::PAREN_EXPRESSION_FUNC => array(
00267                 self::TYPE_BRACE_OPEN => self::STATEMENT
00268             ),
00269             self::PROPERTY_EXPRESSION => array(
00270                 self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT,
00271                 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION,
00272                 self::TYPE_FUNC       => self::PROPERTY_EXPRESSION_FUNC,
00273                 self::TYPE_LITERAL    => self::PROPERTY_EXPRESSION_OP
00274             ),
00275             self::PROPERTY_EXPRESSION_OP => array(
00276                 self::TYPE_BIN_OP     => self::PROPERTY_EXPRESSION,
00277                 self::TYPE_ADD_OP     => self::PROPERTY_EXPRESSION,
00278                 self::TYPE_HOOK       => self::PROPERTY_EXPRESSION,
00279                 self::TYPE_COMMA      => self::PROPERTY_ASSIGNMENT,
00280                 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION
00281             ),
00282             self::PROPERTY_EXPRESSION_FUNC => array(
00283                 self::TYPE_BRACE_OPEN => self::STATEMENT
00284             )
00285         );
00286 
00287         // $push : This table contains the rules for when to push a state onto the stack.
00288         //         The pushed state is the state to return to when the corresponding
00289         //         closing token is found
00290         $push = array(
00291             self::STATEMENT => array(
00292                 self::TYPE_BRACE_OPEN => self::STATEMENT,
00293                 self::TYPE_PAREN_OPEN => self::EXPRESSION_OP
00294             ),
00295             self::CONDITION => array(
00296                 self::TYPE_PAREN_OPEN => self::STATEMENT
00297             ),
00298             self::PROPERTY_ASSIGNMENT => array(
00299                 self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT
00300             ),
00301             self::EXPRESSION => array(
00302                 self::TYPE_BRACE_OPEN => self::EXPRESSION_OP,
00303                 self::TYPE_PAREN_OPEN => self::EXPRESSION_OP
00304             ),
00305             self::EXPRESSION_NO_NL => array(
00306                 self::TYPE_BRACE_OPEN => self::EXPRESSION_OP,
00307                 self::TYPE_PAREN_OPEN => self::EXPRESSION_OP
00308             ),
00309             self::EXPRESSION_OP => array(
00310                 self::TYPE_HOOK       => self::EXPRESSION,
00311                 self::TYPE_PAREN_OPEN => self::EXPRESSION_OP
00312             ),
00313             self::EXPRESSION_FUNC => array(
00314                 self::TYPE_BRACE_OPEN => self::EXPRESSION_OP
00315             ),
00316             self::EXPRESSION_TERNARY => array(
00317                 self::TYPE_BRACE_OPEN => self::EXPRESSION_TERNARY_OP,
00318                 self::TYPE_PAREN_OPEN => self::EXPRESSION_TERNARY_OP
00319             ),
00320             self::EXPRESSION_TERNARY_OP => array(
00321                 self::TYPE_HOOK       => self::EXPRESSION_TERNARY,
00322                 self::TYPE_PAREN_OPEN => self::EXPRESSION_TERNARY_OP
00323             ),
00324             self::EXPRESSION_TERNARY_FUNC => array(
00325                 self::TYPE_BRACE_OPEN => self::EXPRESSION_TERNARY_OP
00326             ),
00327             self::PAREN_EXPRESSION => array(
00328                 self::TYPE_BRACE_OPEN => self::PAREN_EXPRESSION_OP,
00329                 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION_OP
00330             ),
00331             self::PAREN_EXPRESSION_OP => array(
00332                 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION_OP
00333             ),
00334             self::PAREN_EXPRESSION_FUNC => array(
00335                 self::TYPE_BRACE_OPEN => self::PAREN_EXPRESSION_OP
00336             ),
00337             self::PROPERTY_EXPRESSION => array(
00338                 self::TYPE_BRACE_OPEN => self::PROPERTY_EXPRESSION_OP,
00339                 self::TYPE_PAREN_OPEN => self::PROPERTY_EXPRESSION_OP
00340             ),
00341             self::PROPERTY_EXPRESSION_OP => array(
00342                 self::TYPE_PAREN_OPEN => self::PROPERTY_EXPRESSION_OP
00343             ),
00344             self::PROPERTY_EXPRESSION_FUNC => array(
00345                 self::TYPE_BRACE_OPEN => self::PROPERTY_EXPRESSION_OP
00346             )
00347         );
00348 
00349         // $pop : Rules for when to pop a state from the stack
00350         $pop = array(
00351             self::STATEMENT              => array( self::TYPE_BRACE_CLOSE => true ),
00352             self::PROPERTY_ASSIGNMENT    => array( self::TYPE_BRACE_CLOSE => true ),
00353             self::EXPRESSION             => array( self::TYPE_BRACE_CLOSE => true ),
00354             self::EXPRESSION_NO_NL       => array( self::TYPE_BRACE_CLOSE => true ),
00355             self::EXPRESSION_OP          => array( self::TYPE_BRACE_CLOSE => true ),
00356             self::EXPRESSION_TERNARY_OP  => array( self::TYPE_COLON       => true ),
00357             self::PAREN_EXPRESSION       => array( self::TYPE_PAREN_CLOSE => true ),
00358             self::PAREN_EXPRESSION_OP    => array( self::TYPE_PAREN_CLOSE => true ),
00359             self::PROPERTY_EXPRESSION    => array( self::TYPE_BRACE_CLOSE => true ),
00360             self::PROPERTY_EXPRESSION_OP => array( self::TYPE_BRACE_CLOSE => true )
00361         );
00362 
00363         // $semicolon : Rules for when a semicolon insertion is appropriate
00364         $semicolon = array(
00365             self::EXPRESSION_NO_NL => array(
00366                 self::TYPE_UN_OP      => true,
00367                 self::TYPE_INCR_OP    => true,
00368                 self::TYPE_ADD_OP     => true,
00369                 self::TYPE_BRACE_OPEN => true,
00370                 self::TYPE_PAREN_OPEN => true,
00371                 self::TYPE_RETURN     => true,
00372                 self::TYPE_IF         => true,
00373                 self::TYPE_DO         => true,
00374                 self::TYPE_FUNC       => true,
00375                 self::TYPE_LITERAL    => true
00376             ),
00377             self::EXPRESSION_OP => array(
00378                 self::TYPE_UN_OP      => true,
00379                 self::TYPE_INCR_OP    => true,
00380                 self::TYPE_BRACE_OPEN => true,
00381                 self::TYPE_RETURN     => true,
00382                 self::TYPE_IF         => true,
00383                 self::TYPE_DO         => true,
00384                 self::TYPE_FUNC       => true,
00385                 self::TYPE_LITERAL    => true
00386             )
00387         );
00388 
00389         // Rules for when newlines should be inserted if
00390         // $statementsOnOwnLine is enabled.
00391         // $newlineBefore is checked before switching state,
00392         // $newlineAfter is checked after
00393         $newlineBefore = array(
00394             self::STATEMENT => array(
00395                 self::TYPE_BRACE_CLOSE => true,
00396             ),
00397         );
00398         $newlineAfter = array(
00399             self::STATEMENT => array(
00400                 self::TYPE_BRACE_OPEN => true,
00401                 self::TYPE_PAREN_CLOSE => true,
00402                 self::TYPE_SEMICOLON => true,
00403             ),
00404         );
00405 
00406         // $divStates : Contains all states that can be followed by a division operator
00407         $divStates = array(
00408             self::EXPRESSION_OP          => true,
00409             self::EXPRESSION_TERNARY_OP  => true,
00410             self::PAREN_EXPRESSION_OP    => true,
00411             self::PROPERTY_EXPRESSION_OP => true
00412         );
00413 
00414         // Here's where the minifying takes place: Loop through the input, looking for tokens
00415         // and output them to $out, taking actions to the above defined rules when appropriate.
00416         $out = '';
00417         $pos = 0;
00418         $length = strlen( $s );
00419         $lineLength = 0;
00420         $newlineFound = true;
00421         $state = self::STATEMENT;
00422         $stack = array();
00423         $last = ';'; // Pretend that we have seen a semicolon yet
00424         while( $pos < $length ) {
00425             // First, skip over any whitespace and multiline comments, recording whether we
00426             // found any newline character
00427             $skip = strspn( $s, " \t\n\r\xb\xc", $pos );
00428             if( !$skip ) {
00429                 $ch = $s[$pos];
00430                 if( $ch === '/' && substr( $s, $pos, 2 ) === '/*' ) {
00431                     // Multiline comment. Search for the end token or EOT.
00432                     $end = strpos( $s, '*/', $pos + 2 );
00433                     $skip = $end === false ? $length - $pos : $end - $pos + 2;
00434                 }
00435             }
00436             if( $skip ) {
00437                 // The semicolon insertion mechanism needs to know whether there was a newline
00438                 // between two tokens, so record it now.
00439                 if( !$newlineFound && strcspn( $s, "\r\n", $pos, $skip ) !== $skip ) {
00440                     $newlineFound = true;
00441                 }
00442                 $pos += $skip;
00443                 continue;
00444             }
00445             // Handle C++-style comments and html comments, which are treated as single line
00446             // comments by the browser, regardless of whether the end tag is on the same line.
00447             // Handle --> the same way, but only if it's at the beginning of the line
00448             if( ( $ch === '/' && substr( $s, $pos, 2 ) === '//' )
00449                 || ( $ch === '<' && substr( $s, $pos, 4 ) === '<!--' )
00450                 || ( $ch === '-' && $newlineFound && substr( $s, $pos, 3 ) === '-->' )
00451             ) {
00452                 $pos += strcspn( $s, "\r\n", $pos );
00453                 continue;
00454             }
00455 
00456             // Find out which kind of token we're handling. $end will point past the end of it.
00457             $end = $pos + 1;
00458             // Handle string literals
00459             if( $ch === "'" || $ch === '"' ) {
00460                 // Search to the end of the string literal, skipping over backslash escapes
00461                 $search = $ch . '\\';
00462                 do{
00463                     $end += strcspn( $s, $search, $end ) + 2;
00464                 } while( $end - 2 < $length && $s[$end - 2] === '\\' );
00465                 $end--;
00466             // We have to distinguish between regexp literals and division operators
00467             // A division operator is only possible in certain states
00468             } elseif( $ch === '/' && !isset( $divStates[$state] ) ) {
00469                 // Regexp literal, search to the end, skipping over backslash escapes and
00470                 // character classes
00471                 for( ; ; ) {
00472                     do{
00473                         $end += strcspn( $s, '/[\\', $end ) + 2;
00474                     } while( $end - 2 < $length && $s[$end - 2] === '\\' );
00475                     $end--;
00476                     if( $end - 1 >= $length || $s[$end - 1] === '/' ) {
00477                         break;
00478                     }
00479                     do{
00480                         $end += strcspn( $s, ']\\', $end ) + 2;
00481                     } while( $end - 2 < $length && $s[$end - 2] === '\\' );
00482                     $end--;
00483                 };
00484                 // Search past the regexp modifiers (gi)
00485                 while( $end < $length && ctype_alpha( $s[$end] ) ) {
00486                     $end++;
00487                 }
00488             } elseif(
00489                 $ch === '0'
00490                 && ($pos + 1 < $length) && ($s[$pos + 1] === 'x' || $s[$pos + 1] === 'X' )
00491             ) {
00492                 // Hex numeric literal
00493                 $end++; // x or X
00494                 $len = strspn( $s, '0123456789ABCDEFabcdef', $end );
00495                 if ( !$len ) {
00496                     return self::parseError($s, $pos, 'Expected a hexadecimal number but found ' . substr( $s, $pos, 5 ) . '...' );
00497                 }
00498                 $end += $len;
00499             } elseif(
00500                 ctype_digit( $ch )
00501                 || ( $ch === '.' && $pos + 1 < $length && ctype_digit( $s[$pos + 1] ) )
00502             ) {
00503                 $end += strspn( $s, '0123456789', $end );
00504                 $decimal = strspn( $s, '.', $end );
00505                 if ($decimal) {
00506                     if ( $decimal > 2 ) {
00507                         return self::parseError($s, $end, 'The number has too many decimal points' );
00508                     }
00509                     $end += strspn( $s, '0123456789', $end + 1 ) + $decimal;
00510                 }
00511                 $exponent = strspn( $s, 'eE', $end );
00512                 if( $exponent ) {
00513                     if ( $exponent > 1 ) {
00514                         return self::parseError($s, $end, 'Number with several E' );
00515                     }
00516                     $end++;
00517 
00518                     // + sign is optional; - sign is required.
00519                     $end += strspn( $s, '-+', $end );
00520                     $len = strspn( $s, '0123456789', $end );
00521                     if ( !$len ) {
00522                         return self::parseError($s, $pos, 'No decimal digits after e, how many zeroes should be added?' );
00523                     }
00524                     $end += $len;
00525                 }
00526             } elseif( isset( $opChars[$ch] ) ) {
00527                 // Punctuation character. Search for the longest matching operator.
00528                 while(
00529                     $end < $length
00530                     && isset( $tokenTypes[substr( $s, $pos, $end - $pos + 1 )] )
00531                 ) {
00532                     $end++;
00533                 }
00534             } else {
00535                 // Identifier or reserved word. Search for the end by excluding whitespace and
00536                 // punctuation.
00537                 $end += strcspn( $s, " \t\n.;,=<>+-{}()[]?:*/%'\"!&|^~\xb\xc\r", $end );
00538             }
00539 
00540             // Now get the token type from our type array
00541             $token = substr( $s, $pos, $end - $pos ); // so $end - $pos == strlen( $token )
00542             $type = isset( $tokenTypes[$token] ) ? $tokenTypes[$token] : self::TYPE_LITERAL;
00543 
00544             if( $newlineFound && isset( $semicolon[$state][$type] ) ) {
00545                 // This token triggers the semicolon insertion mechanism of javascript. While we
00546                 // could add the ; token here ourselves, keeping the newline has a few advantages.
00547                 $out .= "\n";
00548                 $state = self::STATEMENT;
00549                 $lineLength = 0;
00550             } elseif( $maxLineLength > 0 && $lineLength + $end - $pos > $maxLineLength &&
00551                     !isset( $semicolon[$state][$type] ) && $type !== self::TYPE_INCR_OP )
00552             {
00553                 // This line would get too long if we added $token, so add a newline first.
00554                 // Only do this if it won't trigger semicolon insertion and if it won't
00555                 // put a postfix increment operator on its own line, which is illegal in js.
00556                 $out .= "\n";
00557                 $lineLength = 0;
00558             // Check, whether we have to separate the token from the last one with whitespace
00559             } elseif( !isset( $opChars[$last] ) && !isset( $opChars[$ch] ) ) {
00560                 $out .= ' ';
00561                 $lineLength++;
00562             // Don't accidentally create ++, -- or // tokens
00563             } elseif( $last === $ch && ( $ch === '+' || $ch === '-' || $ch === '/' ) ) {
00564                 $out .= ' ';
00565                 $lineLength++;
00566             }
00567 
00568             $out .= $token;
00569             $lineLength += $end - $pos; // += strlen( $token )
00570             $last = $s[$end - 1];
00571             $pos = $end;
00572             $newlineFound = false;
00573 
00574             // Output a newline after the token if required
00575             // This is checked before AND after switching state
00576             $newlineAdded = false;
00577             if ( $statementsOnOwnLine && !$newlineAdded && isset( $newlineBefore[$state][$type] ) ) {
00578                 $out .= "\n";
00579                 $lineLength = 0;
00580                 $newlineAdded = true;
00581             }
00582 
00583             // Now that we have output our token, transition into the new state.
00584             if( isset( $push[$state][$type] ) && count( $stack ) < self::STACK_LIMIT ) {
00585                 $stack[] = $push[$state][$type];
00586             }
00587             if( $stack && isset( $pop[$state][$type] ) ) {
00588                 $state = array_pop( $stack );
00589             } elseif( isset( $goto[$state][$type] ) ) {
00590                 $state = $goto[$state][$type];
00591             }
00592 
00593             // Check for newline insertion again
00594             if ( $statementsOnOwnLine && !$newlineAdded && isset( $newlineAfter[$state][$type] ) ) {
00595                 $out .= "\n";
00596                 $lineLength = 0;
00597             }
00598         }
00599         return $out;
00600     }
00601 
00602     static function parseError($fullJavascript, $position, $errorMsg) {
00603         // TODO: Handle the error: trigger_error, throw exception, return false...
00604         return false;
00605     }
00606 }