MediaWiki  REL1_24
JavaScriptMinifier.php
Go to the documentation of this file.
00001 <?php
00002 // @codingStandardsIgnoreFile File external to MediaWiki. Ignore coding conventions checks.
00019 class JavaScriptMinifier {
00020 
00021     /* Class constants */
00022     /* Parsing states.
00023      * The state machine is only necessary to decide whether to parse a slash as division
00024      * operator or as regexp literal.
00025      * States are named after the next expected item. We only distinguish states when the
00026      * distinction is relevant for our purpose.
00027      */
00028     const STATEMENT                = 0;
00029     const CONDITION                = 1;
00030     const PROPERTY_ASSIGNMENT      = 2;
00031     const EXPRESSION               = 3;
00032     const EXPRESSION_NO_NL         = 4; // only relevant for semicolon insertion
00033     const EXPRESSION_OP            = 5;
00034     const EXPRESSION_FUNC          = 6;
00035     const EXPRESSION_TERNARY       = 7; // used to determine the role of a colon
00036     const EXPRESSION_TERNARY_OP    = 8;
00037     const EXPRESSION_TERNARY_FUNC  = 9;
00038     const PAREN_EXPRESSION         = 10; // expression which is not on the top level
00039     const PAREN_EXPRESSION_OP      = 11;
00040     const PAREN_EXPRESSION_FUNC    = 12;
00041     const PROPERTY_EXPRESSION      = 13; // expression which is within an object literal
00042     const PROPERTY_EXPRESSION_OP   = 14;
00043     const PROPERTY_EXPRESSION_FUNC = 15;
00044 
00045     /* Token types */
00046     const TYPE_UN_OP       = 1; // unary operators
00047     const TYPE_INCR_OP     = 2; // ++ and --
00048     const TYPE_BIN_OP      = 3; // binary operators
00049     const TYPE_ADD_OP      = 4; // + and - which can be either unary or binary ops
00050     const TYPE_HOOK        = 5; // ?
00051     const TYPE_COLON       = 6; // :
00052     const TYPE_COMMA       = 7; // ,
00053     const TYPE_SEMICOLON   = 8; // ;
00054     const TYPE_BRACE_OPEN  = 9; // {
00055     const TYPE_BRACE_CLOSE = 10; // }
00056     const TYPE_PAREN_OPEN  = 11; // ( and [
00057     const TYPE_PAREN_CLOSE = 12; // ) and ]
00058     const TYPE_RETURN      = 13; // keywords: break, continue, return, throw
00059     const TYPE_IF          = 14; // keywords: catch, for, with, switch, while, if
00060     const TYPE_DO          = 15; // keywords: case, var, finally, else, do, try
00061     const TYPE_FUNC        = 16; // keywords: function
00062     const TYPE_LITERAL     = 17; // all literals, identifiers and unrecognised tokens
00063 
00064     // Sanity limit to avoid excessive memory usage
00065     const STACK_LIMIT = 1000;
00066 
00067     /* Static functions */
00068 
00081     public static function minify( $s, $statementsOnOwnLine = false, $maxLineLength = 1000 ) {
00082         // First we declare a few tables that contain our parsing rules
00083 
00084         // $opChars : characters, which can be combined without whitespace in between them
00085         $opChars = array(
00086             '!' => true,
00087             '"' => true,
00088             '%' => true,
00089             '&' => true,
00090             "'" => true,
00091             '(' => true,
00092             ')' => true,
00093             '*' => true,
00094             '+' => true,
00095             ',' => true,
00096             '-' => true,
00097             '.' => true,
00098             '/' => true,
00099             ':' => true,
00100             ';' => true,
00101             '<' => true,
00102             '=' => true,
00103             '>' => true,
00104             '?' => true,
00105             '[' => true,
00106             ']' => true,
00107             '^' => true,
00108             '{' => true,
00109             '|' => true,
00110             '}' => true,
00111             '~' => true
00112         );
00113 
00114         // $tokenTypes : maps keywords and operators to their corresponding token type
00115         $tokenTypes = array(
00116             '!'          => self::TYPE_UN_OP,
00117             '~'          => self::TYPE_UN_OP,
00118             'delete'     => self::TYPE_UN_OP,
00119             'new'        => self::TYPE_UN_OP,
00120             'typeof'     => self::TYPE_UN_OP,
00121             'void'       => self::TYPE_UN_OP,
00122             '++'         => self::TYPE_INCR_OP,
00123             '--'         => self::TYPE_INCR_OP,
00124             '!='         => self::TYPE_BIN_OP,
00125             '!=='        => self::TYPE_BIN_OP,
00126             '%'          => self::TYPE_BIN_OP,
00127             '%='         => self::TYPE_BIN_OP,
00128             '&'          => self::TYPE_BIN_OP,
00129             '&&'         => self::TYPE_BIN_OP,
00130             '&='         => self::TYPE_BIN_OP,
00131             '*'          => self::TYPE_BIN_OP,
00132             '*='         => self::TYPE_BIN_OP,
00133             '+='         => self::TYPE_BIN_OP,
00134             '-='         => self::TYPE_BIN_OP,
00135             '.'          => self::TYPE_BIN_OP,
00136             '/'          => self::TYPE_BIN_OP,
00137             '/='         => self::TYPE_BIN_OP,
00138             '<'          => self::TYPE_BIN_OP,
00139             '<<'         => self::TYPE_BIN_OP,
00140             '<<='        => self::TYPE_BIN_OP,
00141             '<='         => self::TYPE_BIN_OP,
00142             '='          => self::TYPE_BIN_OP,
00143             '=='         => self::TYPE_BIN_OP,
00144             '==='        => self::TYPE_BIN_OP,
00145             '>'          => self::TYPE_BIN_OP,
00146             '>='         => self::TYPE_BIN_OP,
00147             '>>'         => self::TYPE_BIN_OP,
00148             '>>='        => self::TYPE_BIN_OP,
00149             '>>>'        => self::TYPE_BIN_OP,
00150             '>>>='       => self::TYPE_BIN_OP,
00151             '^'          => self::TYPE_BIN_OP,
00152             '^='         => self::TYPE_BIN_OP,
00153             '|'          => self::TYPE_BIN_OP,
00154             '|='         => self::TYPE_BIN_OP,
00155             '||'         => self::TYPE_BIN_OP,
00156             'in'         => self::TYPE_BIN_OP,
00157             'instanceof' => self::TYPE_BIN_OP,
00158             '+'          => self::TYPE_ADD_OP,
00159             '-'          => self::TYPE_ADD_OP,
00160             '?'          => self::TYPE_HOOK,
00161             ':'          => self::TYPE_COLON,
00162             ','          => self::TYPE_COMMA,
00163             ';'          => self::TYPE_SEMICOLON,
00164             '{'          => self::TYPE_BRACE_OPEN,
00165             '}'          => self::TYPE_BRACE_CLOSE,
00166             '('          => self::TYPE_PAREN_OPEN,
00167             '['          => self::TYPE_PAREN_OPEN,
00168             ')'          => self::TYPE_PAREN_CLOSE,
00169             ']'          => self::TYPE_PAREN_CLOSE,
00170             'break'      => self::TYPE_RETURN,
00171             'continue'   => self::TYPE_RETURN,
00172             'return'     => self::TYPE_RETURN,
00173             'throw'      => self::TYPE_RETURN,
00174             'catch'      => self::TYPE_IF,
00175             'for'        => self::TYPE_IF,
00176             'if'         => self::TYPE_IF,
00177             'switch'     => self::TYPE_IF,
00178             'while'      => self::TYPE_IF,
00179             'with'       => self::TYPE_IF,
00180             'case'       => self::TYPE_DO,
00181             'do'         => self::TYPE_DO,
00182             'else'       => self::TYPE_DO,
00183             'finally'    => self::TYPE_DO,
00184             'try'        => self::TYPE_DO,
00185             'var'        => self::TYPE_DO,
00186             'function'   => self::TYPE_FUNC
00187         );
00188 
00189         // $goto : This is the main table for our state machine. For every state/token pair
00190         //         the following state is defined. When no rule exists for a given pair,
00191         //         the state is left unchanged.
00192         $goto = array(
00193             self::STATEMENT => array(
00194                 self::TYPE_UN_OP      => self::EXPRESSION,
00195                 self::TYPE_INCR_OP    => self::EXPRESSION,
00196                 self::TYPE_ADD_OP     => self::EXPRESSION,
00197                 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION,
00198                 self::TYPE_RETURN     => self::EXPRESSION_NO_NL,
00199                 self::TYPE_IF         => self::CONDITION,
00200                 self::TYPE_FUNC       => self::CONDITION,
00201                 self::TYPE_LITERAL    => self::EXPRESSION_OP
00202             ),
00203             self::CONDITION => array(
00204                 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION
00205             ),
00206             self::PROPERTY_ASSIGNMENT => array(
00207                 self::TYPE_COLON      => self::PROPERTY_EXPRESSION,
00208                 self::TYPE_BRACE_OPEN => self::STATEMENT
00209             ),
00210             self::EXPRESSION => array(
00211                 self::TYPE_SEMICOLON  => self::STATEMENT,
00212                 self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT,
00213                 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION,
00214                 self::TYPE_FUNC       => self::EXPRESSION_FUNC,
00215                 self::TYPE_LITERAL    => self::EXPRESSION_OP
00216             ),
00217             self::EXPRESSION_NO_NL => array(
00218                 self::TYPE_SEMICOLON  => self::STATEMENT,
00219                 self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT,
00220                 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION,
00221                 self::TYPE_FUNC       => self::EXPRESSION_FUNC,
00222                 self::TYPE_LITERAL    => self::EXPRESSION_OP
00223             ),
00224             self::EXPRESSION_OP => array(
00225                 self::TYPE_BIN_OP     => self::EXPRESSION,
00226                 self::TYPE_ADD_OP     => self::EXPRESSION,
00227                 self::TYPE_HOOK       => self::EXPRESSION_TERNARY,
00228                 self::TYPE_COLON      => self::STATEMENT,
00229                 self::TYPE_COMMA      => self::EXPRESSION,
00230                 self::TYPE_SEMICOLON  => self::STATEMENT,
00231                 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION
00232             ),
00233             self::EXPRESSION_FUNC => array(
00234                 self::TYPE_BRACE_OPEN => self::STATEMENT
00235             ),
00236             self::EXPRESSION_TERNARY => array(
00237                 self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT,
00238                 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION,
00239                 self::TYPE_FUNC       => self::EXPRESSION_TERNARY_FUNC,
00240                 self::TYPE_LITERAL    => self::EXPRESSION_TERNARY_OP
00241             ),
00242             self::EXPRESSION_TERNARY_OP => array(
00243                 self::TYPE_BIN_OP     => self::EXPRESSION_TERNARY,
00244                 self::TYPE_ADD_OP     => self::EXPRESSION_TERNARY,
00245                 self::TYPE_HOOK       => self::EXPRESSION_TERNARY,
00246                 self::TYPE_COMMA      => self::EXPRESSION_TERNARY,
00247                 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION
00248             ),
00249             self::EXPRESSION_TERNARY_FUNC => array(
00250                 self::TYPE_BRACE_OPEN => self::STATEMENT
00251             ),
00252             self::PAREN_EXPRESSION => array(
00253                 self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT,
00254                 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION,
00255                 self::TYPE_FUNC       => self::PAREN_EXPRESSION_FUNC,
00256                 self::TYPE_LITERAL    => self::PAREN_EXPRESSION_OP
00257             ),
00258             self::PAREN_EXPRESSION_OP => array(
00259                 self::TYPE_BIN_OP     => self::PAREN_EXPRESSION,
00260                 self::TYPE_ADD_OP     => self::PAREN_EXPRESSION,
00261                 self::TYPE_HOOK       => self::PAREN_EXPRESSION,
00262                 self::TYPE_COLON      => self::PAREN_EXPRESSION,
00263                 self::TYPE_COMMA      => self::PAREN_EXPRESSION,
00264                 self::TYPE_SEMICOLON  => self::PAREN_EXPRESSION,
00265                 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION
00266             ),
00267             self::PAREN_EXPRESSION_FUNC => array(
00268                 self::TYPE_BRACE_OPEN => self::STATEMENT
00269             ),
00270             self::PROPERTY_EXPRESSION => array(
00271                 self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT,
00272                 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION,
00273                 self::TYPE_FUNC       => self::PROPERTY_EXPRESSION_FUNC,
00274                 self::TYPE_LITERAL    => self::PROPERTY_EXPRESSION_OP
00275             ),
00276             self::PROPERTY_EXPRESSION_OP => array(
00277                 self::TYPE_BIN_OP     => self::PROPERTY_EXPRESSION,
00278                 self::TYPE_ADD_OP     => self::PROPERTY_EXPRESSION,
00279                 self::TYPE_HOOK       => self::PROPERTY_EXPRESSION,
00280                 self::TYPE_COMMA      => self::PROPERTY_ASSIGNMENT,
00281                 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION
00282             ),
00283             self::PROPERTY_EXPRESSION_FUNC => array(
00284                 self::TYPE_BRACE_OPEN => self::STATEMENT
00285             )
00286         );
00287 
00288         // $push : This table contains the rules for when to push a state onto the stack.
00289         //         The pushed state is the state to return to when the corresponding
00290         //         closing token is found
00291         $push = array(
00292             self::STATEMENT => array(
00293                 self::TYPE_BRACE_OPEN => self::STATEMENT,
00294                 self::TYPE_PAREN_OPEN => self::EXPRESSION_OP
00295             ),
00296             self::CONDITION => array(
00297                 self::TYPE_PAREN_OPEN => self::STATEMENT
00298             ),
00299             self::PROPERTY_ASSIGNMENT => array(
00300                 self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT
00301             ),
00302             self::EXPRESSION => array(
00303                 self::TYPE_BRACE_OPEN => self::EXPRESSION_OP,
00304                 self::TYPE_PAREN_OPEN => self::EXPRESSION_OP
00305             ),
00306             self::EXPRESSION_NO_NL => array(
00307                 self::TYPE_BRACE_OPEN => self::EXPRESSION_OP,
00308                 self::TYPE_PAREN_OPEN => self::EXPRESSION_OP
00309             ),
00310             self::EXPRESSION_OP => array(
00311                 self::TYPE_HOOK       => self::EXPRESSION,
00312                 self::TYPE_PAREN_OPEN => self::EXPRESSION_OP
00313             ),
00314             self::EXPRESSION_FUNC => array(
00315                 self::TYPE_BRACE_OPEN => self::EXPRESSION_OP
00316             ),
00317             self::EXPRESSION_TERNARY => array(
00318                 self::TYPE_BRACE_OPEN => self::EXPRESSION_TERNARY_OP,
00319                 self::TYPE_PAREN_OPEN => self::EXPRESSION_TERNARY_OP
00320             ),
00321             self::EXPRESSION_TERNARY_OP => array(
00322                 self::TYPE_HOOK       => self::EXPRESSION_TERNARY,
00323                 self::TYPE_PAREN_OPEN => self::EXPRESSION_TERNARY_OP
00324             ),
00325             self::EXPRESSION_TERNARY_FUNC => array(
00326                 self::TYPE_BRACE_OPEN => self::EXPRESSION_TERNARY_OP
00327             ),
00328             self::PAREN_EXPRESSION => array(
00329                 self::TYPE_BRACE_OPEN => self::PAREN_EXPRESSION_OP,
00330                 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION_OP
00331             ),
00332             self::PAREN_EXPRESSION_OP => array(
00333                 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION_OP
00334             ),
00335             self::PAREN_EXPRESSION_FUNC => array(
00336                 self::TYPE_BRACE_OPEN => self::PAREN_EXPRESSION_OP
00337             ),
00338             self::PROPERTY_EXPRESSION => array(
00339                 self::TYPE_BRACE_OPEN => self::PROPERTY_EXPRESSION_OP,
00340                 self::TYPE_PAREN_OPEN => self::PROPERTY_EXPRESSION_OP
00341             ),
00342             self::PROPERTY_EXPRESSION_OP => array(
00343                 self::TYPE_PAREN_OPEN => self::PROPERTY_EXPRESSION_OP
00344             ),
00345             self::PROPERTY_EXPRESSION_FUNC => array(
00346                 self::TYPE_BRACE_OPEN => self::PROPERTY_EXPRESSION_OP
00347             )
00348         );
00349 
00350         // $pop : Rules for when to pop a state from the stack
00351         $pop = array(
00352             self::STATEMENT              => array( self::TYPE_BRACE_CLOSE => true ),
00353             self::PROPERTY_ASSIGNMENT    => array( self::TYPE_BRACE_CLOSE => true ),
00354             self::EXPRESSION             => array( self::TYPE_BRACE_CLOSE => true ),
00355             self::EXPRESSION_NO_NL       => array( self::TYPE_BRACE_CLOSE => true ),
00356             self::EXPRESSION_OP          => array( self::TYPE_BRACE_CLOSE => true ),
00357             self::EXPRESSION_TERNARY_OP  => array( self::TYPE_COLON       => true ),
00358             self::PAREN_EXPRESSION       => array( self::TYPE_PAREN_CLOSE => true ),
00359             self::PAREN_EXPRESSION_OP    => array( self::TYPE_PAREN_CLOSE => true ),
00360             self::PROPERTY_EXPRESSION    => array( self::TYPE_BRACE_CLOSE => true ),
00361             self::PROPERTY_EXPRESSION_OP => array( self::TYPE_BRACE_CLOSE => true )
00362         );
00363 
00364         // $semicolon : Rules for when a semicolon insertion is appropriate
00365         $semicolon = array(
00366             self::EXPRESSION_NO_NL => array(
00367                 self::TYPE_UN_OP      => true,
00368                 self::TYPE_INCR_OP    => true,
00369                 self::TYPE_ADD_OP     => true,
00370                 self::TYPE_BRACE_OPEN => true,
00371                 self::TYPE_PAREN_OPEN => true,
00372                 self::TYPE_RETURN     => true,
00373                 self::TYPE_IF         => true,
00374                 self::TYPE_DO         => true,
00375                 self::TYPE_FUNC       => true,
00376                 self::TYPE_LITERAL    => true
00377             ),
00378             self::EXPRESSION_OP => array(
00379                 self::TYPE_UN_OP      => true,
00380                 self::TYPE_INCR_OP    => true,
00381                 self::TYPE_BRACE_OPEN => true,
00382                 self::TYPE_RETURN     => true,
00383                 self::TYPE_IF         => true,
00384                 self::TYPE_DO         => true,
00385                 self::TYPE_FUNC       => true,
00386                 self::TYPE_LITERAL    => true
00387             )
00388         );
00389 
00390         // Rules for when newlines should be inserted if
00391         // $statementsOnOwnLine is enabled.
00392         // $newlineBefore is checked before switching state,
00393         // $newlineAfter is checked after
00394         $newlineBefore = array(
00395             self::STATEMENT => array(
00396                 self::TYPE_BRACE_CLOSE => true,
00397             ),
00398         );
00399         $newlineAfter = array(
00400             self::STATEMENT => array(
00401                 self::TYPE_BRACE_OPEN => true,
00402                 self::TYPE_PAREN_CLOSE => true,
00403                 self::TYPE_SEMICOLON => true,
00404             ),
00405         );
00406 
00407         // $divStates : Contains all states that can be followed by a division operator
00408         $divStates = array(
00409             self::EXPRESSION_OP          => true,
00410             self::EXPRESSION_TERNARY_OP  => true,
00411             self::PAREN_EXPRESSION_OP    => true,
00412             self::PROPERTY_EXPRESSION_OP => true
00413         );
00414 
00415         // Here's where the minifying takes place: Loop through the input, looking for tokens
00416         // and output them to $out, taking actions to the above defined rules when appropriate.
00417         $out = '';
00418         $pos = 0;
00419         $length = strlen( $s );
00420         $lineLength = 0;
00421         $newlineFound = true;
00422         $state = self::STATEMENT;
00423         $stack = array();
00424         $last = ';'; // Pretend that we have seen a semicolon yet
00425         while( $pos < $length ) {
00426             // First, skip over any whitespace and multiline comments, recording whether we
00427             // found any newline character
00428             $skip = strspn( $s, " \t\n\r\xb\xc", $pos );
00429             if( !$skip ) {
00430                 $ch = $s[$pos];
00431                 if( $ch === '/' && substr( $s, $pos, 2 ) === '/*' ) {
00432                     // Multiline comment. Search for the end token or EOT.
00433                     $end = strpos( $s, '*/', $pos + 2 );
00434                     $skip = $end === false ? $length - $pos : $end - $pos + 2;
00435                 }
00436             }
00437             if( $skip ) {
00438                 // The semicolon insertion mechanism needs to know whether there was a newline
00439                 // between two tokens, so record it now.
00440                 if( !$newlineFound && strcspn( $s, "\r\n", $pos, $skip ) !== $skip ) {
00441                     $newlineFound = true;
00442                 }
00443                 $pos += $skip;
00444                 continue;
00445             }
00446             // Handle C++-style comments and html comments, which are treated as single line
00447             // comments by the browser, regardless of whether the end tag is on the same line.
00448             // Handle --> the same way, but only if it's at the beginning of the line
00449             if( ( $ch === '/' && substr( $s, $pos, 2 ) === '//' )
00450                 || ( $ch === '<' && substr( $s, $pos, 4 ) === '<!--' )
00451                 || ( $ch === '-' && $newlineFound && substr( $s, $pos, 3 ) === '-->' )
00452             ) {
00453                 $pos += strcspn( $s, "\r\n", $pos );
00454                 continue;
00455             }
00456 
00457             // Find out which kind of token we're handling. $end will point past the end of it.
00458             $end = $pos + 1;
00459             // Handle string literals
00460             if( $ch === "'" || $ch === '"' ) {
00461                 // Search to the end of the string literal, skipping over backslash escapes
00462                 $search = $ch . '\\';
00463                 do{
00464                     $end += strcspn( $s, $search, $end ) + 2;
00465                 } while( $end - 2 < $length && $s[$end - 2] === '\\' );
00466                 $end--;
00467             // We have to distinguish between regexp literals and division operators
00468             // A division operator is only possible in certain states
00469             } elseif( $ch === '/' && !isset( $divStates[$state] ) ) {
00470                 // Regexp literal, search to the end, skipping over backslash escapes and
00471                 // character classes
00472                 for( ; ; ) {
00473                     do{
00474                         $end += strcspn( $s, '/[\\', $end ) + 2;
00475                     } while( $end - 2 < $length && $s[$end - 2] === '\\' );
00476                     $end--;
00477                     if( $end - 1 >= $length || $s[$end - 1] === '/' ) {
00478                         break;
00479                     }
00480                     do{
00481                         $end += strcspn( $s, ']\\', $end ) + 2;
00482                     } while( $end - 2 < $length && $s[$end - 2] === '\\' );
00483                     $end--;
00484                 };
00485                 // Search past the regexp modifiers (gi)
00486                 while( $end < $length && ctype_alpha( $s[$end] ) ) {
00487                     $end++;
00488                 }
00489             } elseif(
00490                 $ch === '0'
00491                 && ($pos + 1 < $length) && ($s[$pos + 1] === 'x' || $s[$pos + 1] === 'X' )
00492             ) {
00493                 // Hex numeric literal
00494                 $end++; // x or X
00495                 $len = strspn( $s, '0123456789ABCDEFabcdef', $end );
00496                 if ( !$len ) {
00497                     return self::parseError($s, $pos, 'Expected a hexadecimal number but found ' . substr( $s, $pos, 5 ) . '...' );
00498                 }
00499                 $end += $len;
00500             } elseif(
00501                 ctype_digit( $ch )
00502                 || ( $ch === '.' && $pos + 1 < $length && ctype_digit( $s[$pos + 1] ) )
00503             ) {
00504                 $end += strspn( $s, '0123456789', $end );
00505                 $decimal = strspn( $s, '.', $end );
00506                 if ($decimal) {
00507                     if ( $decimal > 2 ) {
00508                         return self::parseError($s, $end, 'The number has too many decimal points' );
00509                     }
00510                     $end += strspn( $s, '0123456789', $end + 1 ) + $decimal;
00511                 }
00512                 $exponent = strspn( $s, 'eE', $end );
00513                 if( $exponent ) {
00514                     if ( $exponent > 1 ) {
00515                         return self::parseError($s, $end, 'Number with several E' );
00516                     }
00517                     $end++;
00518 
00519                     // + sign is optional; - sign is required.
00520                     $end += strspn( $s, '-+', $end );
00521                     $len = strspn( $s, '0123456789', $end );
00522                     if ( !$len ) {
00523                         return self::parseError($s, $pos, 'No decimal digits after e, how many zeroes should be added?' );
00524                     }
00525                     $end += $len;
00526                 }
00527             } elseif( isset( $opChars[$ch] ) ) {
00528                 // Punctuation character. Search for the longest matching operator.
00529                 while(
00530                     $end < $length
00531                     && isset( $tokenTypes[substr( $s, $pos, $end - $pos + 1 )] )
00532                 ) {
00533                     $end++;
00534                 }
00535             } else {
00536                 // Identifier or reserved word. Search for the end by excluding whitespace and
00537                 // punctuation.
00538                 $end += strcspn( $s, " \t\n.;,=<>+-{}()[]?:*/%'\"!&|^~\xb\xc\r", $end );
00539             }
00540 
00541             // Now get the token type from our type array
00542             $token = substr( $s, $pos, $end - $pos ); // so $end - $pos == strlen( $token )
00543             $type = isset( $tokenTypes[$token] ) ? $tokenTypes[$token] : self::TYPE_LITERAL;
00544 
00545             if( $newlineFound && isset( $semicolon[$state][$type] ) ) {
00546                 // This token triggers the semicolon insertion mechanism of javascript. While we
00547                 // could add the ; token here ourselves, keeping the newline has a few advantages.
00548                 $out .= "\n";
00549                 $state = self::STATEMENT;
00550                 $lineLength = 0;
00551             } elseif( $maxLineLength > 0 && $lineLength + $end - $pos > $maxLineLength &&
00552                     !isset( $semicolon[$state][$type] ) && $type !== self::TYPE_INCR_OP )
00553             {
00554                 // This line would get too long if we added $token, so add a newline first.
00555                 // Only do this if it won't trigger semicolon insertion and if it won't
00556                 // put a postfix increment operator on its own line, which is illegal in js.
00557                 $out .= "\n";
00558                 $lineLength = 0;
00559             // Check, whether we have to separate the token from the last one with whitespace
00560             } elseif( !isset( $opChars[$last] ) && !isset( $opChars[$ch] ) ) {
00561                 $out .= ' ';
00562                 $lineLength++;
00563             // Don't accidentally create ++, -- or // tokens
00564             } elseif( $last === $ch && ( $ch === '+' || $ch === '-' || $ch === '/' ) ) {
00565                 $out .= ' ';
00566                 $lineLength++;
00567             }
00568 
00569             $out .= $token;
00570             $lineLength += $end - $pos; // += strlen( $token )
00571             $last = $s[$end - 1];
00572             $pos = $end;
00573             $newlineFound = false;
00574 
00575             // Output a newline after the token if required
00576             // This is checked before AND after switching state
00577             $newlineAdded = false;
00578             if ( $statementsOnOwnLine && !$newlineAdded && isset( $newlineBefore[$state][$type] ) ) {
00579                 $out .= "\n";
00580                 $lineLength = 0;
00581                 $newlineAdded = true;
00582             }
00583 
00584             // Now that we have output our token, transition into the new state.
00585             if( isset( $push[$state][$type] ) && count( $stack ) < self::STACK_LIMIT ) {
00586                 $stack[] = $push[$state][$type];
00587             }
00588             if( $stack && isset( $pop[$state][$type] ) ) {
00589                 $state = array_pop( $stack );
00590             } elseif( isset( $goto[$state][$type] ) ) {
00591                 $state = $goto[$state][$type];
00592             }
00593 
00594             // Check for newline insertion again
00595             if ( $statementsOnOwnLine && !$newlineAdded && isset( $newlineAfter[$state][$type] ) ) {
00596                 $out .= "\n";
00597                 $lineLength = 0;
00598             }
00599         }
00600         return $out;
00601     }
00602 
00603     static function parseError($fullJavascript, $position, $errorMsg) {
00604         // TODO: Handle the error: trigger_error, throw exception, return false...
00605         return false;
00606     }
00607 }