MediaWiki  REL1_20
JavaScriptMinifier.php
Go to the documentation of this file.
00001 <?php
00018 class JavaScriptMinifier {
00019 
00020         /* Class constants */
00021         /* Parsing states.
00022          * The state machine is only necessary to decide whether to parse a slash as division
00023          * operator or as regexp literal.
00024          * States are named after the next expected item. We only distinguish states when the
00025          * distinction is relevant for our purpose.
00026          */
00027         const STATEMENT                = 0;
00028         const CONDITION                = 1;
00029         const PROPERTY_ASSIGNMENT      = 2;
00030         const EXPRESSION               = 3;
00031         const EXPRESSION_NO_NL         = 4; // only relevant for semicolon insertion
00032         const EXPRESSION_OP            = 5;
00033         const EXPRESSION_FUNC          = 6;
00034         const EXPRESSION_TERNARY       = 7; // used to determine the role of a colon
00035         const EXPRESSION_TERNARY_OP    = 8;
00036         const EXPRESSION_TERNARY_FUNC  = 9;
00037         const PAREN_EXPRESSION         = 10; // expression which is not on the top level
00038         const PAREN_EXPRESSION_OP      = 11;
00039         const PAREN_EXPRESSION_FUNC    = 12;
00040         const PROPERTY_EXPRESSION      = 13; // expression which is within an object literal
00041         const PROPERTY_EXPRESSION_OP   = 14;
00042         const PROPERTY_EXPRESSION_FUNC = 15;
00043 
00044         /* Token types */
00045         const TYPE_UN_OP       = 1; // unary operators
00046         const TYPE_INCR_OP     = 2; // ++ and --
00047         const TYPE_BIN_OP      = 3; // binary operators
00048         const TYPE_ADD_OP      = 4; // + and - which can be either unary or binary ops
00049         const TYPE_HOOK        = 5; // ?
00050         const TYPE_COLON       = 6; // :
00051         const TYPE_COMMA       = 7; // ,
00052         const TYPE_SEMICOLON   = 8; // ;
00053         const TYPE_BRACE_OPEN  = 9; // {
00054         const TYPE_BRACE_CLOSE = 10; // }
00055         const TYPE_PAREN_OPEN  = 11; // ( and [
00056         const TYPE_PAREN_CLOSE = 12; // ) and ]
00057         const TYPE_RETURN      = 13; // keywords: break, continue, return, throw
00058         const TYPE_IF          = 14; // keywords: catch, for, with, switch, while, if
00059         const TYPE_DO          = 15; // keywords: case, var, finally, else, do, try
00060         const TYPE_FUNC        = 16; // keywords: function
00061         const TYPE_LITERAL     = 17; // all literals, identifiers and unrecognised tokens
00062         
00063         // Sanity limit to avoid excessive memory usage
00064         const STACK_LIMIT = 1000;
00065 
00066         /* Static functions */
00067 
00080         public static function minify( $s, $statementsOnOwnLine = false, $maxLineLength = 1000 ) {
00081                 // First we declare a few tables that contain our parsing rules
00082 
00083                 // $opChars : characters, which can be combined without whitespace in between them
00084                 $opChars = array(
00085                         '!' => true,
00086                         '"' => true,
00087                         '%' => true,
00088                         '&' => true,
00089                         "'" => true,
00090                         '(' => true,
00091                         ')' => true,
00092                         '*' => true,
00093                         '+' => true,
00094                         ',' => true,
00095                         '-' => true,
00096                         '.' => true,
00097                         '/' => true,
00098                         ':' => true,
00099                         ';' => true,
00100                         '<' => true,
00101                         '=' => true,
00102                         '>' => true,
00103                         '?' => true,
00104                         '[' => true,
00105                         ']' => true,
00106                         '^' => true,
00107                         '{' => true,
00108                         '|' => true,
00109                         '}' => true,
00110                         '~' => true
00111                 );
00112 
00113                 // $tokenTypes : maps keywords and operators to their corresponding token type
00114                 $tokenTypes = array(
00115                         '!'          => self::TYPE_UN_OP,
00116                         '~'          => self::TYPE_UN_OP,
00117                         'delete'     => self::TYPE_UN_OP,
00118                         'new'        => self::TYPE_UN_OP,
00119                         'typeof'     => self::TYPE_UN_OP,
00120                         'void'       => self::TYPE_UN_OP,
00121                         '++'         => self::TYPE_INCR_OP,
00122                         '--'         => self::TYPE_INCR_OP,
00123                         '!='         => self::TYPE_BIN_OP,
00124                         '!=='        => self::TYPE_BIN_OP,
00125                         '%'          => self::TYPE_BIN_OP,
00126                         '%='         => self::TYPE_BIN_OP,
00127                         '&'          => self::TYPE_BIN_OP,
00128                         '&&'         => self::TYPE_BIN_OP,
00129                         '&='         => self::TYPE_BIN_OP,
00130                         '*'          => self::TYPE_BIN_OP,
00131                         '*='         => self::TYPE_BIN_OP,
00132                         '+='         => self::TYPE_BIN_OP,
00133                         '-='         => self::TYPE_BIN_OP,
00134                         '.'          => self::TYPE_BIN_OP,
00135                         '/'          => self::TYPE_BIN_OP,
00136                         '/='         => self::TYPE_BIN_OP,
00137                         '<'          => self::TYPE_BIN_OP,
00138                         '<<'         => self::TYPE_BIN_OP,
00139                         '<<='        => self::TYPE_BIN_OP,
00140                         '<='         => self::TYPE_BIN_OP,
00141                         '='          => self::TYPE_BIN_OP,
00142                         '=='         => self::TYPE_BIN_OP,
00143                         '==='        => self::TYPE_BIN_OP,
00144                         '>'          => self::TYPE_BIN_OP,
00145                         '>='         => self::TYPE_BIN_OP,
00146                         '>>'         => self::TYPE_BIN_OP,
00147                         '>>='        => self::TYPE_BIN_OP,
00148                         '>>>'        => self::TYPE_BIN_OP,
00149                         '>>>='       => self::TYPE_BIN_OP,
00150                         '^'          => self::TYPE_BIN_OP,
00151                         '^='         => self::TYPE_BIN_OP,
00152                         '|'          => self::TYPE_BIN_OP,
00153                         '|='         => self::TYPE_BIN_OP,
00154                         '||'         => self::TYPE_BIN_OP,
00155                         'in'         => self::TYPE_BIN_OP,
00156                         'instanceof' => self::TYPE_BIN_OP,
00157                         '+'          => self::TYPE_ADD_OP,
00158                         '-'          => self::TYPE_ADD_OP,
00159                         '?'          => self::TYPE_HOOK,
00160                         ':'          => self::TYPE_COLON,
00161                         ','          => self::TYPE_COMMA,
00162                         ';'          => self::TYPE_SEMICOLON,
00163                         '{'          => self::TYPE_BRACE_OPEN,
00164                         '}'          => self::TYPE_BRACE_CLOSE,
00165                         '('          => self::TYPE_PAREN_OPEN,
00166                         '['          => self::TYPE_PAREN_OPEN,
00167                         ')'          => self::TYPE_PAREN_CLOSE,
00168                         ']'          => self::TYPE_PAREN_CLOSE,
00169                         'break'      => self::TYPE_RETURN,
00170                         'continue'   => self::TYPE_RETURN,
00171                         'return'     => self::TYPE_RETURN,
00172                         'throw'      => self::TYPE_RETURN,
00173                         'catch'      => self::TYPE_IF,
00174                         'for'        => self::TYPE_IF,
00175                         'if'         => self::TYPE_IF,
00176                         'switch'     => self::TYPE_IF,
00177                         'while'      => self::TYPE_IF,
00178                         'with'       => self::TYPE_IF,
00179                         'case'       => self::TYPE_DO,
00180                         'do'         => self::TYPE_DO,
00181                         'else'       => self::TYPE_DO,
00182                         'finally'    => self::TYPE_DO,
00183                         'try'        => self::TYPE_DO,
00184                         'var'        => self::TYPE_DO,
00185                         'function'   => self::TYPE_FUNC
00186                 );
00187 
00188                 // $goto : This is the main table for our state machine. For every state/token pair
00189                 //         the following state is defined. When no rule exists for a given pair,
00190                 //         the state is left unchanged.
00191                 $goto = array(
00192                         self::STATEMENT => array(
00193                                 self::TYPE_UN_OP      => self::EXPRESSION,
00194                                 self::TYPE_INCR_OP    => self::EXPRESSION,
00195                                 self::TYPE_ADD_OP     => self::EXPRESSION,
00196                                 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION,
00197                                 self::TYPE_RETURN     => self::EXPRESSION_NO_NL,
00198                                 self::TYPE_IF         => self::CONDITION,
00199                                 self::TYPE_FUNC       => self::CONDITION,
00200                                 self::TYPE_LITERAL    => self::EXPRESSION_OP
00201                         ),
00202                         self::CONDITION => array(
00203                                 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION
00204                         ),
00205                         self::PROPERTY_ASSIGNMENT => array(
00206                                 self::TYPE_COLON      => self::PROPERTY_EXPRESSION,
00207                                 self::TYPE_BRACE_OPEN => self::STATEMENT
00208                         ),
00209                         self::EXPRESSION => array(
00210                                 self::TYPE_SEMICOLON  => self::STATEMENT,
00211                                 self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT,
00212                                 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION,
00213                                 self::TYPE_FUNC       => self::EXPRESSION_FUNC,
00214                                 self::TYPE_LITERAL    => self::EXPRESSION_OP
00215                         ),
00216                         self::EXPRESSION_NO_NL => array(
00217                                 self::TYPE_SEMICOLON  => self::STATEMENT,
00218                                 self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT,
00219                                 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION,
00220                                 self::TYPE_FUNC       => self::EXPRESSION_FUNC,
00221                                 self::TYPE_LITERAL    => self::EXPRESSION_OP
00222                         ),
00223                         self::EXPRESSION_OP => array(
00224                                 self::TYPE_BIN_OP     => self::EXPRESSION,
00225                                 self::TYPE_ADD_OP     => self::EXPRESSION,
00226                                 self::TYPE_HOOK       => self::EXPRESSION_TERNARY,
00227                                 self::TYPE_COLON      => self::STATEMENT,
00228                                 self::TYPE_COMMA      => self::EXPRESSION,
00229                                 self::TYPE_SEMICOLON  => self::STATEMENT,
00230                                 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION
00231                         ),
00232                         self::EXPRESSION_FUNC => array(
00233                                 self::TYPE_BRACE_OPEN => self::STATEMENT
00234                         ),
00235                         self::EXPRESSION_TERNARY => array(
00236                                 self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT,
00237                                 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION,
00238                                 self::TYPE_FUNC       => self::EXPRESSION_TERNARY_FUNC,
00239                                 self::TYPE_LITERAL    => self::EXPRESSION_TERNARY_OP
00240                         ),
00241                         self::EXPRESSION_TERNARY_OP => array(
00242                                 self::TYPE_BIN_OP     => self::EXPRESSION_TERNARY,
00243                                 self::TYPE_ADD_OP     => self::EXPRESSION_TERNARY,
00244                                 self::TYPE_HOOK       => self::EXPRESSION_TERNARY,
00245                                 self::TYPE_COMMA      => self::EXPRESSION_TERNARY,
00246                                 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION
00247                         ),
00248                         self::EXPRESSION_TERNARY_FUNC => array(
00249                                 self::TYPE_BRACE_OPEN => self::STATEMENT
00250                         ),
00251                         self::PAREN_EXPRESSION => array(
00252                                 self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT,
00253                                 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION,
00254                                 self::TYPE_FUNC       => self::PAREN_EXPRESSION_FUNC,
00255                                 self::TYPE_LITERAL    => self::PAREN_EXPRESSION_OP
00256                         ),
00257                         self::PAREN_EXPRESSION_OP => array(
00258                                 self::TYPE_BIN_OP     => self::PAREN_EXPRESSION,
00259                                 self::TYPE_ADD_OP     => self::PAREN_EXPRESSION,
00260                                 self::TYPE_HOOK       => self::PAREN_EXPRESSION,
00261                                 self::TYPE_COLON      => self::PAREN_EXPRESSION,
00262                                 self::TYPE_COMMA      => self::PAREN_EXPRESSION,
00263                                 self::TYPE_SEMICOLON  => self::PAREN_EXPRESSION,
00264                                 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION
00265                         ),
00266                         self::PAREN_EXPRESSION_FUNC => array(
00267                                 self::TYPE_BRACE_OPEN => self::STATEMENT
00268                         ),
00269                         self::PROPERTY_EXPRESSION => array(
00270                                 self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT,
00271                                 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION,
00272                                 self::TYPE_FUNC       => self::PROPERTY_EXPRESSION_FUNC,
00273                                 self::TYPE_LITERAL    => self::PROPERTY_EXPRESSION_OP
00274                         ),
00275                         self::PROPERTY_EXPRESSION_OP => array(
00276                                 self::TYPE_BIN_OP     => self::PROPERTY_EXPRESSION,
00277                                 self::TYPE_ADD_OP     => self::PROPERTY_EXPRESSION,
00278                                 self::TYPE_HOOK       => self::PROPERTY_EXPRESSION,
00279                                 self::TYPE_COMMA      => self::PROPERTY_ASSIGNMENT,
00280                                 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION
00281                         ),
00282                         self::PROPERTY_EXPRESSION_FUNC => array(
00283                                 self::TYPE_BRACE_OPEN => self::STATEMENT
00284                         )
00285                 );
00286 
00287                 // $push : This table contains the rules for when to push a state onto the stack.
00288                 //         The pushed state is the state to return to when the corresponding
00289                 //         closing token is found
00290                 $push = array(
00291                         self::STATEMENT => array(
00292                                 self::TYPE_BRACE_OPEN => self::STATEMENT,
00293                                 self::TYPE_PAREN_OPEN => self::EXPRESSION_OP
00294                         ),
00295                         self::CONDITION => array(
00296                                 self::TYPE_PAREN_OPEN => self::STATEMENT
00297                         ),
00298                         self::PROPERTY_ASSIGNMENT => array(
00299                                 self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT
00300                         ),
00301                         self::EXPRESSION => array(
00302                                 self::TYPE_BRACE_OPEN => self::EXPRESSION_OP,
00303                                 self::TYPE_PAREN_OPEN => self::EXPRESSION_OP
00304                         ),
00305                         self::EXPRESSION_NO_NL => array(
00306                                 self::TYPE_BRACE_OPEN => self::EXPRESSION_OP,
00307                                 self::TYPE_PAREN_OPEN => self::EXPRESSION_OP
00308                         ),
00309                         self::EXPRESSION_OP => array(
00310                                 self::TYPE_HOOK       => self::EXPRESSION,
00311                                 self::TYPE_PAREN_OPEN => self::EXPRESSION_OP
00312                         ),
00313                         self::EXPRESSION_FUNC => array(
00314                                 self::TYPE_BRACE_OPEN => self::EXPRESSION_OP
00315                         ),
00316                         self::EXPRESSION_TERNARY => array(
00317                                 self::TYPE_BRACE_OPEN => self::EXPRESSION_TERNARY_OP,
00318                                 self::TYPE_PAREN_OPEN => self::EXPRESSION_TERNARY_OP
00319                         ),
00320                         self::EXPRESSION_TERNARY_OP => array(
00321                                 self::TYPE_HOOK       => self::EXPRESSION_TERNARY,
00322                                 self::TYPE_PAREN_OPEN => self::EXPRESSION_TERNARY_OP
00323                         ),
00324                         self::EXPRESSION_TERNARY_FUNC => array(
00325                                 self::TYPE_BRACE_OPEN => self::EXPRESSION_TERNARY_OP
00326                         ),
00327                         self::PAREN_EXPRESSION => array(
00328                                 self::TYPE_BRACE_OPEN => self::PAREN_EXPRESSION_OP,
00329                                 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION_OP
00330                         ),
00331                         self::PAREN_EXPRESSION_OP => array(
00332                                 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION_OP
00333                         ),
00334                         self::PAREN_EXPRESSION_FUNC => array(
00335                                 self::TYPE_BRACE_OPEN => self::PAREN_EXPRESSION_OP
00336                         ),
00337                         self::PROPERTY_EXPRESSION => array(
00338                                 self::TYPE_BRACE_OPEN => self::PROPERTY_EXPRESSION_OP,
00339                                 self::TYPE_PAREN_OPEN => self::PROPERTY_EXPRESSION_OP
00340                         ),
00341                         self::PROPERTY_EXPRESSION_OP => array(
00342                                 self::TYPE_PAREN_OPEN => self::PROPERTY_EXPRESSION_OP
00343                         ),
00344                         self::PROPERTY_EXPRESSION_FUNC => array(
00345                                 self::TYPE_BRACE_OPEN => self::PROPERTY_EXPRESSION_OP
00346                         )
00347                 );
00348 
00349                 // $pop : Rules for when to pop a state from the stack
00350                 $pop = array(
00351                         self::STATEMENT              => array( self::TYPE_BRACE_CLOSE => true ),
00352                         self::PROPERTY_ASSIGNMENT    => array( self::TYPE_BRACE_CLOSE => true ),
00353                         self::EXPRESSION             => array( self::TYPE_BRACE_CLOSE => true ),
00354                         self::EXPRESSION_NO_NL       => array( self::TYPE_BRACE_CLOSE => true ),
00355                         self::EXPRESSION_OP          => array( self::TYPE_BRACE_CLOSE => true ),
00356                         self::EXPRESSION_TERNARY_OP  => array( self::TYPE_COLON       => true ),
00357                         self::PAREN_EXPRESSION       => array( self::TYPE_PAREN_CLOSE => true ),
00358                         self::PAREN_EXPRESSION_OP    => array( self::TYPE_PAREN_CLOSE => true ),
00359                         self::PROPERTY_EXPRESSION    => array( self::TYPE_BRACE_CLOSE => true ),
00360                         self::PROPERTY_EXPRESSION_OP => array( self::TYPE_BRACE_CLOSE => true )
00361                 );
00362 
00363                 // $semicolon : Rules for when a semicolon insertion is appropriate
00364                 $semicolon = array(
00365                         self::EXPRESSION_NO_NL => array(
00366                                 self::TYPE_UN_OP      => true,
00367                                 self::TYPE_INCR_OP    => true,
00368                                 self::TYPE_ADD_OP     => true,
00369                                 self::TYPE_BRACE_OPEN => true,
00370                                 self::TYPE_PAREN_OPEN => true,
00371                                 self::TYPE_RETURN     => true,
00372                                 self::TYPE_IF         => true,
00373                                 self::TYPE_DO         => true,
00374                                 self::TYPE_FUNC       => true,
00375                                 self::TYPE_LITERAL    => true
00376                         ),
00377                         self::EXPRESSION_OP => array(
00378                                 self::TYPE_UN_OP      => true,
00379                                 self::TYPE_INCR_OP    => true,
00380                                 self::TYPE_BRACE_OPEN => true,
00381                                 self::TYPE_RETURN     => true,
00382                                 self::TYPE_IF         => true,
00383                                 self::TYPE_DO         => true,
00384                                 self::TYPE_FUNC       => true,
00385                                 self::TYPE_LITERAL    => true
00386                         )
00387                 );
00388                 
00389                 // Rules for when newlines should be inserted if
00390                 // $statementsOnOwnLine is enabled.
00391                 // $newlineBefore is checked before switching state,
00392                 // $newlineAfter is checked after
00393                 $newlineBefore = array(
00394                         self::STATEMENT => array(
00395                                 self::TYPE_BRACE_CLOSE => true,
00396                         ),
00397                 );
00398                 $newlineAfter = array(
00399                         self::STATEMENT => array(
00400                                 self::TYPE_BRACE_OPEN => true,
00401                                 self::TYPE_PAREN_CLOSE => true,
00402                                 self::TYPE_SEMICOLON => true,
00403                         ),
00404                 );
00405 
00406                 // $divStates : Contains all states that can be followed by a division operator
00407                 $divStates = array(
00408                         self::EXPRESSION_OP          => true,
00409                         self::EXPRESSION_TERNARY_OP  => true,
00410                         self::PAREN_EXPRESSION_OP    => true,
00411                         self::PROPERTY_EXPRESSION_OP => true
00412                 );
00413 
00414                 // Here's where the minifying takes place: Loop through the input, looking for tokens
00415                 // and output them to $out, taking actions to the above defined rules when appropriate.
00416                 $out = '';
00417                 $pos = 0;
00418                 $length = strlen( $s );
00419                 $lineLength = 0;
00420                 $newlineFound = true;
00421                 $state = self::STATEMENT;
00422                 $stack = array();
00423                 $last = ';'; // Pretend that we have seen a semicolon yet
00424                 while( $pos < $length ) {
00425                         // First, skip over any whitespace and multiline comments, recording whether we
00426                         // found any newline character
00427                         $skip = strspn( $s, " \t\n\r\xb\xc", $pos );
00428                         if( !$skip ) {
00429                                 $ch = $s[$pos];
00430                                 if( $ch === '/' && substr( $s, $pos, 2 ) === '/*' ) {
00431                                         // Multiline comment. Search for the end token or EOT.
00432                                         $end = strpos( $s, '*/', $pos + 2 );
00433                                         $skip = $end === false ? $length - $pos : $end - $pos + 2;
00434                                 }
00435                         }
00436                         if( $skip ) {
00437                                 // The semicolon insertion mechanism needs to know whether there was a newline
00438                                 // between two tokens, so record it now.
00439                                 if( !$newlineFound && strcspn( $s, "\r\n", $pos, $skip ) !== $skip ) {
00440                                         $newlineFound = true;
00441                                 }
00442                                 $pos += $skip;
00443                                 continue;
00444                         }
00445                         // Handle C++-style comments and html comments, which are treated as single line
00446                         // comments by the browser, regardless of whether the end tag is on the same line.
00447                         // Handle --> the same way, but only if it's at the beginning of the line
00448                         if( ( $ch === '/' && substr( $s, $pos, 2 ) === '//' )
00449                                 || ( $ch === '<' && substr( $s, $pos, 4 ) === '<!--' )
00450                                 || ( $ch === '-' && $newlineFound && substr( $s, $pos, 3 ) === '-->' )
00451                         ) {
00452                                 $pos += strcspn( $s, "\r\n", $pos );
00453                                 continue;
00454                         }
00455 
00456                         // Find out which kind of token we're handling. $end will point past the end of it.
00457                         $end = $pos + 1;
00458                         // Handle string literals
00459                         if( $ch === "'" || $ch === '"' ) {
00460                                 // Search to the end of the string literal, skipping over backslash escapes
00461                                 $search = $ch . '\\';
00462                                 do{
00463                                         $end += strcspn( $s, $search, $end ) + 2;
00464                                 } while( $end - 2 < $length && $s[$end - 2] === '\\' );
00465                                 $end--;
00466                         // We have to distinguish between regexp literals and division operators
00467                         // A division operator is only possible in certain states
00468                         } elseif( $ch === '/' && !isset( $divStates[$state] ) ) {
00469                                 // Regexp literal, search to the end, skipping over backslash escapes and
00470                                 // character classes
00471                                 for( ; ; ) {
00472                                         do{
00473                                                 $end += strcspn( $s, '/[\\', $end ) + 2;
00474                                         } while( $end - 2 < $length && $s[$end - 2] === '\\' );
00475                                         $end--;
00476                                         if( $end - 1 >= $length || $s[$end - 1] === '/' ) {
00477                                                 break;
00478                                         }
00479                                         do{
00480                                                 $end += strcspn( $s, ']\\', $end ) + 2;
00481                                         } while( $end - 2 < $length && $s[$end - 2] === '\\' );
00482                                         $end--;
00483                                 };
00484                                 // Search past the regexp modifiers (gi)
00485                                 while( $end < $length && ctype_alpha( $s[$end] ) ) {
00486                                         $end++;
00487                                 }
00488                         } elseif(
00489                                 $ch === '0'
00490                                 && ($pos + 1 < $length) && ($s[$pos + 1] === 'x' || $s[$pos + 1] === 'X' )
00491                         ) {
00492                                 // Hex numeric literal
00493                                 $end++; // x or X
00494                                 $len = strspn( $s, '0123456789ABCDEFabcdef', $end );
00495                                 if ( !$len ) {
00496                                         return self::parseError($s, $pos, 'Expected a hexadecimal number but found ' . substr( $s, $pos, 5 ) . '...' );
00497                                 }
00498                                 $end += $len;
00499                         } elseif(
00500                                 ctype_digit( $ch )
00501                                 || ( $ch === '.' && $pos + 1 < $length && ctype_digit( $s[$pos + 1] ) )
00502                         ) {
00503                                 $end += strspn( $s, '0123456789', $end );
00504                                 $decimal = strspn( $s, '.', $end );
00505                                 if ($decimal) {
00506                                         if ( $decimal > 2 ) {
00507                                                 return self::parseError($s, $end, 'The number has too many decimal points' );
00508                                         }
00509                                         $end += strspn( $s, '0123456789', $end + 1 ) + $decimal;
00510                                 }
00511                                 $exponent = strspn( $s, 'eE', $end );
00512                                 if( $exponent ) {
00513                                         if ( $exponent > 1 ) {
00514                                                 return self::parseError($s, $end, 'Number with several E' );
00515                                         }
00516                                         $end++;
00517                                         
00518                                         // + sign is optional; - sign is required.
00519                                         $end += strspn( $s, '-+', $end );
00520                                         $len = strspn( $s, '0123456789', $end );
00521                                         if ( !$len ) {
00522                                                 return self::parseError($s, $pos, 'No decimal digits after e, how many zeroes should be added?' );
00523                                         }
00524                                         $end += $len;
00525                                 }
00526                         } elseif( isset( $opChars[$ch] ) ) {
00527                                 // Punctuation character. Search for the longest matching operator.
00528                                 while(
00529                                         $end < $length
00530                                         && isset( $tokenTypes[substr( $s, $pos, $end - $pos + 1 )] )
00531                                 ) {
00532                                         $end++;
00533                                 }
00534                         } else {
00535                                 // Identifier or reserved word. Search for the end by excluding whitespace and
00536                                 // punctuation.
00537                                 $end += strcspn( $s, " \t\n.;,=<>+-{}()[]?:*/%'\"!&|^~\xb\xc\r", $end );
00538                         }
00539 
00540                         // Now get the token type from our type array
00541                         $token = substr( $s, $pos, $end - $pos ); // so $end - $pos == strlen( $token )
00542                         $type = isset( $tokenTypes[$token] ) ? $tokenTypes[$token] : self::TYPE_LITERAL;
00543 
00544                         if( $newlineFound && isset( $semicolon[$state][$type] ) ) {
00545                                 // This token triggers the semicolon insertion mechanism of javascript. While we
00546                                 // could add the ; token here ourselves, keeping the newline has a few advantages.
00547                                 $out .= "\n";
00548                                 $state = self::STATEMENT;
00549                                 $lineLength = 0;
00550                         } elseif( $maxLineLength > 0 && $lineLength + $end - $pos > $maxLineLength &&
00551                                         !isset( $semicolon[$state][$type] ) && $type !== self::TYPE_INCR_OP )
00552                         {
00553                                 // This line would get too long if we added $token, so add a newline first.
00554                                 // Only do this if it won't trigger semicolon insertion and if it won't
00555                                 // put a postfix increment operator on its own line, which is illegal in js.
00556                                 $out .= "\n";
00557                                 $lineLength = 0;
00558                         // Check, whether we have to separate the token from the last one with whitespace
00559                         } elseif( !isset( $opChars[$last] ) && !isset( $opChars[$ch] ) ) {
00560                                 $out .= ' ';
00561                                 $lineLength++;
00562                         // Don't accidentally create ++, -- or // tokens
00563                         } elseif( $last === $ch && ( $ch === '+' || $ch === '-' || $ch === '/' ) ) {
00564                                 $out .= ' ';
00565                                 $lineLength++;
00566                         }
00567                         
00568                         $out .= $token;
00569                         $lineLength += $end - $pos; // += strlen( $token )
00570                         $last = $s[$end - 1];
00571                         $pos = $end;
00572                         $newlineFound = false;
00573                         
00574                         // Output a newline after the token if required
00575                         // This is checked before AND after switching state
00576                         $newlineAdded = false;
00577                         if ( $statementsOnOwnLine && !$newlineAdded && isset( $newlineBefore[$state][$type] ) ) {
00578                                 $out .= "\n";
00579                                 $lineLength = 0;
00580                                 $newlineAdded = true;
00581                         }
00582 
00583                         // Now that we have output our token, transition into the new state.
00584                         if( isset( $push[$state][$type] ) && count( $stack ) < self::STACK_LIMIT ) {
00585                                 $stack[] = $push[$state][$type];
00586                         }
00587                         if( $stack && isset( $pop[$state][$type] ) ) {
00588                                 $state = array_pop( $stack );
00589                         } elseif( isset( $goto[$state][$type] ) ) {
00590                                 $state = $goto[$state][$type];
00591                         }
00592                         
00593                         // Check for newline insertion again
00594                         if ( $statementsOnOwnLine && !$newlineAdded && isset( $newlineAfter[$state][$type] ) ) {
00595                                 $out .= "\n";
00596                                 $lineLength = 0;
00597                         }
00598                 }
00599                 return $out;
00600         }
00601         
00602         static function parseError($fullJavascript, $position, $errorMsg) {
00603                 // TODO: Handle the error: trigger_error, throw exception, return false...
00604                 return false;
00605         }
00606 }