MediaWiki
REL1_22
|
00001 <?php 00018 class JavaScriptMinifier { 00019 00020 /* Class constants */ 00021 /* Parsing states. 00022 * The state machine is only necessary to decide whether to parse a slash as division 00023 * operator or as regexp literal. 00024 * States are named after the next expected item. We only distinguish states when the 00025 * distinction is relevant for our purpose. 00026 */ 00027 const STATEMENT = 0; 00028 const CONDITION = 1; 00029 const PROPERTY_ASSIGNMENT = 2; 00030 const EXPRESSION = 3; 00031 const EXPRESSION_NO_NL = 4; // only relevant for semicolon insertion 00032 const EXPRESSION_OP = 5; 00033 const EXPRESSION_FUNC = 6; 00034 const EXPRESSION_TERNARY = 7; // used to determine the role of a colon 00035 const EXPRESSION_TERNARY_OP = 8; 00036 const EXPRESSION_TERNARY_FUNC = 9; 00037 const PAREN_EXPRESSION = 10; // expression which is not on the top level 00038 const PAREN_EXPRESSION_OP = 11; 00039 const PAREN_EXPRESSION_FUNC = 12; 00040 const PROPERTY_EXPRESSION = 13; // expression which is within an object literal 00041 const PROPERTY_EXPRESSION_OP = 14; 00042 const PROPERTY_EXPRESSION_FUNC = 15; 00043 00044 /* Token types */ 00045 const TYPE_UN_OP = 1; // unary operators 00046 const TYPE_INCR_OP = 2; // ++ and -- 00047 const TYPE_BIN_OP = 3; // binary operators 00048 const TYPE_ADD_OP = 4; // + and - which can be either unary or binary ops 00049 const TYPE_HOOK = 5; // ? 00050 const TYPE_COLON = 6; // : 00051 const TYPE_COMMA = 7; // , 00052 const TYPE_SEMICOLON = 8; // ; 00053 const TYPE_BRACE_OPEN = 9; // { 00054 const TYPE_BRACE_CLOSE = 10; // } 00055 const TYPE_PAREN_OPEN = 11; // ( and [ 00056 const TYPE_PAREN_CLOSE = 12; // ) and ] 00057 const TYPE_RETURN = 13; // keywords: break, continue, return, throw 00058 const TYPE_IF = 14; // keywords: catch, for, with, switch, while, if 00059 const TYPE_DO = 15; // keywords: case, var, finally, else, do, try 00060 const TYPE_FUNC = 16; // keywords: function 00061 const TYPE_LITERAL = 17; // all literals, identifiers and unrecognised tokens 00062 00063 // Sanity limit to avoid excessive memory usage 00064 const STACK_LIMIT = 1000; 00065 00066 /* Static functions */ 00067 00080 public static function minify( $s, $statementsOnOwnLine = false, $maxLineLength = 1000 ) { 00081 // First we declare a few tables that contain our parsing rules 00082 00083 // $opChars : characters, which can be combined without whitespace in between them 00084 $opChars = array( 00085 '!' => true, 00086 '"' => true, 00087 '%' => true, 00088 '&' => true, 00089 "'" => true, 00090 '(' => true, 00091 ')' => true, 00092 '*' => true, 00093 '+' => true, 00094 ',' => true, 00095 '-' => true, 00096 '.' => true, 00097 '/' => true, 00098 ':' => true, 00099 ';' => true, 00100 '<' => true, 00101 '=' => true, 00102 '>' => true, 00103 '?' => true, 00104 '[' => true, 00105 ']' => true, 00106 '^' => true, 00107 '{' => true, 00108 '|' => true, 00109 '}' => true, 00110 '~' => true 00111 ); 00112 00113 // $tokenTypes : maps keywords and operators to their corresponding token type 00114 $tokenTypes = array( 00115 '!' => self::TYPE_UN_OP, 00116 '~' => self::TYPE_UN_OP, 00117 'delete' => self::TYPE_UN_OP, 00118 'new' => self::TYPE_UN_OP, 00119 'typeof' => self::TYPE_UN_OP, 00120 'void' => self::TYPE_UN_OP, 00121 '++' => self::TYPE_INCR_OP, 00122 '--' => self::TYPE_INCR_OP, 00123 '!=' => self::TYPE_BIN_OP, 00124 '!==' => self::TYPE_BIN_OP, 00125 '%' => self::TYPE_BIN_OP, 00126 '%=' => self::TYPE_BIN_OP, 00127 '&' => self::TYPE_BIN_OP, 00128 '&&' => self::TYPE_BIN_OP, 00129 '&=' => self::TYPE_BIN_OP, 00130 '*' => self::TYPE_BIN_OP, 00131 '*=' => self::TYPE_BIN_OP, 00132 '+=' => self::TYPE_BIN_OP, 00133 '-=' => self::TYPE_BIN_OP, 00134 '.' => self::TYPE_BIN_OP, 00135 '/' => self::TYPE_BIN_OP, 00136 '/=' => self::TYPE_BIN_OP, 00137 '<' => self::TYPE_BIN_OP, 00138 '<<' => self::TYPE_BIN_OP, 00139 '<<=' => self::TYPE_BIN_OP, 00140 '<=' => self::TYPE_BIN_OP, 00141 '=' => self::TYPE_BIN_OP, 00142 '==' => self::TYPE_BIN_OP, 00143 '===' => self::TYPE_BIN_OP, 00144 '>' => self::TYPE_BIN_OP, 00145 '>=' => self::TYPE_BIN_OP, 00146 '>>' => self::TYPE_BIN_OP, 00147 '>>=' => self::TYPE_BIN_OP, 00148 '>>>' => self::TYPE_BIN_OP, 00149 '>>>=' => self::TYPE_BIN_OP, 00150 '^' => self::TYPE_BIN_OP, 00151 '^=' => self::TYPE_BIN_OP, 00152 '|' => self::TYPE_BIN_OP, 00153 '|=' => self::TYPE_BIN_OP, 00154 '||' => self::TYPE_BIN_OP, 00155 'in' => self::TYPE_BIN_OP, 00156 'instanceof' => self::TYPE_BIN_OP, 00157 '+' => self::TYPE_ADD_OP, 00158 '-' => self::TYPE_ADD_OP, 00159 '?' => self::TYPE_HOOK, 00160 ':' => self::TYPE_COLON, 00161 ',' => self::TYPE_COMMA, 00162 ';' => self::TYPE_SEMICOLON, 00163 '{' => self::TYPE_BRACE_OPEN, 00164 '}' => self::TYPE_BRACE_CLOSE, 00165 '(' => self::TYPE_PAREN_OPEN, 00166 '[' => self::TYPE_PAREN_OPEN, 00167 ')' => self::TYPE_PAREN_CLOSE, 00168 ']' => self::TYPE_PAREN_CLOSE, 00169 'break' => self::TYPE_RETURN, 00170 'continue' => self::TYPE_RETURN, 00171 'return' => self::TYPE_RETURN, 00172 'throw' => self::TYPE_RETURN, 00173 'catch' => self::TYPE_IF, 00174 'for' => self::TYPE_IF, 00175 'if' => self::TYPE_IF, 00176 'switch' => self::TYPE_IF, 00177 'while' => self::TYPE_IF, 00178 'with' => self::TYPE_IF, 00179 'case' => self::TYPE_DO, 00180 'do' => self::TYPE_DO, 00181 'else' => self::TYPE_DO, 00182 'finally' => self::TYPE_DO, 00183 'try' => self::TYPE_DO, 00184 'var' => self::TYPE_DO, 00185 'function' => self::TYPE_FUNC 00186 ); 00187 00188 // $goto : This is the main table for our state machine. For every state/token pair 00189 // the following state is defined. When no rule exists for a given pair, 00190 // the state is left unchanged. 00191 $goto = array( 00192 self::STATEMENT => array( 00193 self::TYPE_UN_OP => self::EXPRESSION, 00194 self::TYPE_INCR_OP => self::EXPRESSION, 00195 self::TYPE_ADD_OP => self::EXPRESSION, 00196 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION, 00197 self::TYPE_RETURN => self::EXPRESSION_NO_NL, 00198 self::TYPE_IF => self::CONDITION, 00199 self::TYPE_FUNC => self::CONDITION, 00200 self::TYPE_LITERAL => self::EXPRESSION_OP 00201 ), 00202 self::CONDITION => array( 00203 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION 00204 ), 00205 self::PROPERTY_ASSIGNMENT => array( 00206 self::TYPE_COLON => self::PROPERTY_EXPRESSION, 00207 self::TYPE_BRACE_OPEN => self::STATEMENT 00208 ), 00209 self::EXPRESSION => array( 00210 self::TYPE_SEMICOLON => self::STATEMENT, 00211 self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT, 00212 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION, 00213 self::TYPE_FUNC => self::EXPRESSION_FUNC, 00214 self::TYPE_LITERAL => self::EXPRESSION_OP 00215 ), 00216 self::EXPRESSION_NO_NL => array( 00217 self::TYPE_SEMICOLON => self::STATEMENT, 00218 self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT, 00219 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION, 00220 self::TYPE_FUNC => self::EXPRESSION_FUNC, 00221 self::TYPE_LITERAL => self::EXPRESSION_OP 00222 ), 00223 self::EXPRESSION_OP => array( 00224 self::TYPE_BIN_OP => self::EXPRESSION, 00225 self::TYPE_ADD_OP => self::EXPRESSION, 00226 self::TYPE_HOOK => self::EXPRESSION_TERNARY, 00227 self::TYPE_COLON => self::STATEMENT, 00228 self::TYPE_COMMA => self::EXPRESSION, 00229 self::TYPE_SEMICOLON => self::STATEMENT, 00230 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION 00231 ), 00232 self::EXPRESSION_FUNC => array( 00233 self::TYPE_BRACE_OPEN => self::STATEMENT 00234 ), 00235 self::EXPRESSION_TERNARY => array( 00236 self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT, 00237 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION, 00238 self::TYPE_FUNC => self::EXPRESSION_TERNARY_FUNC, 00239 self::TYPE_LITERAL => self::EXPRESSION_TERNARY_OP 00240 ), 00241 self::EXPRESSION_TERNARY_OP => array( 00242 self::TYPE_BIN_OP => self::EXPRESSION_TERNARY, 00243 self::TYPE_ADD_OP => self::EXPRESSION_TERNARY, 00244 self::TYPE_HOOK => self::EXPRESSION_TERNARY, 00245 self::TYPE_COMMA => self::EXPRESSION_TERNARY, 00246 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION 00247 ), 00248 self::EXPRESSION_TERNARY_FUNC => array( 00249 self::TYPE_BRACE_OPEN => self::STATEMENT 00250 ), 00251 self::PAREN_EXPRESSION => array( 00252 self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT, 00253 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION, 00254 self::TYPE_FUNC => self::PAREN_EXPRESSION_FUNC, 00255 self::TYPE_LITERAL => self::PAREN_EXPRESSION_OP 00256 ), 00257 self::PAREN_EXPRESSION_OP => array( 00258 self::TYPE_BIN_OP => self::PAREN_EXPRESSION, 00259 self::TYPE_ADD_OP => self::PAREN_EXPRESSION, 00260 self::TYPE_HOOK => self::PAREN_EXPRESSION, 00261 self::TYPE_COLON => self::PAREN_EXPRESSION, 00262 self::TYPE_COMMA => self::PAREN_EXPRESSION, 00263 self::TYPE_SEMICOLON => self::PAREN_EXPRESSION, 00264 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION 00265 ), 00266 self::PAREN_EXPRESSION_FUNC => array( 00267 self::TYPE_BRACE_OPEN => self::STATEMENT 00268 ), 00269 self::PROPERTY_EXPRESSION => array( 00270 self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT, 00271 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION, 00272 self::TYPE_FUNC => self::PROPERTY_EXPRESSION_FUNC, 00273 self::TYPE_LITERAL => self::PROPERTY_EXPRESSION_OP 00274 ), 00275 self::PROPERTY_EXPRESSION_OP => array( 00276 self::TYPE_BIN_OP => self::PROPERTY_EXPRESSION, 00277 self::TYPE_ADD_OP => self::PROPERTY_EXPRESSION, 00278 self::TYPE_HOOK => self::PROPERTY_EXPRESSION, 00279 self::TYPE_COMMA => self::PROPERTY_ASSIGNMENT, 00280 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION 00281 ), 00282 self::PROPERTY_EXPRESSION_FUNC => array( 00283 self::TYPE_BRACE_OPEN => self::STATEMENT 00284 ) 00285 ); 00286 00287 // $push : This table contains the rules for when to push a state onto the stack. 00288 // The pushed state is the state to return to when the corresponding 00289 // closing token is found 00290 $push = array( 00291 self::STATEMENT => array( 00292 self::TYPE_BRACE_OPEN => self::STATEMENT, 00293 self::TYPE_PAREN_OPEN => self::EXPRESSION_OP 00294 ), 00295 self::CONDITION => array( 00296 self::TYPE_PAREN_OPEN => self::STATEMENT 00297 ), 00298 self::PROPERTY_ASSIGNMENT => array( 00299 self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT 00300 ), 00301 self::EXPRESSION => array( 00302 self::TYPE_BRACE_OPEN => self::EXPRESSION_OP, 00303 self::TYPE_PAREN_OPEN => self::EXPRESSION_OP 00304 ), 00305 self::EXPRESSION_NO_NL => array( 00306 self::TYPE_BRACE_OPEN => self::EXPRESSION_OP, 00307 self::TYPE_PAREN_OPEN => self::EXPRESSION_OP 00308 ), 00309 self::EXPRESSION_OP => array( 00310 self::TYPE_HOOK => self::EXPRESSION, 00311 self::TYPE_PAREN_OPEN => self::EXPRESSION_OP 00312 ), 00313 self::EXPRESSION_FUNC => array( 00314 self::TYPE_BRACE_OPEN => self::EXPRESSION_OP 00315 ), 00316 self::EXPRESSION_TERNARY => array( 00317 self::TYPE_BRACE_OPEN => self::EXPRESSION_TERNARY_OP, 00318 self::TYPE_PAREN_OPEN => self::EXPRESSION_TERNARY_OP 00319 ), 00320 self::EXPRESSION_TERNARY_OP => array( 00321 self::TYPE_HOOK => self::EXPRESSION_TERNARY, 00322 self::TYPE_PAREN_OPEN => self::EXPRESSION_TERNARY_OP 00323 ), 00324 self::EXPRESSION_TERNARY_FUNC => array( 00325 self::TYPE_BRACE_OPEN => self::EXPRESSION_TERNARY_OP 00326 ), 00327 self::PAREN_EXPRESSION => array( 00328 self::TYPE_BRACE_OPEN => self::PAREN_EXPRESSION_OP, 00329 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION_OP 00330 ), 00331 self::PAREN_EXPRESSION_OP => array( 00332 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION_OP 00333 ), 00334 self::PAREN_EXPRESSION_FUNC => array( 00335 self::TYPE_BRACE_OPEN => self::PAREN_EXPRESSION_OP 00336 ), 00337 self::PROPERTY_EXPRESSION => array( 00338 self::TYPE_BRACE_OPEN => self::PROPERTY_EXPRESSION_OP, 00339 self::TYPE_PAREN_OPEN => self::PROPERTY_EXPRESSION_OP 00340 ), 00341 self::PROPERTY_EXPRESSION_OP => array( 00342 self::TYPE_PAREN_OPEN => self::PROPERTY_EXPRESSION_OP 00343 ), 00344 self::PROPERTY_EXPRESSION_FUNC => array( 00345 self::TYPE_BRACE_OPEN => self::PROPERTY_EXPRESSION_OP 00346 ) 00347 ); 00348 00349 // $pop : Rules for when to pop a state from the stack 00350 $pop = array( 00351 self::STATEMENT => array( self::TYPE_BRACE_CLOSE => true ), 00352 self::PROPERTY_ASSIGNMENT => array( self::TYPE_BRACE_CLOSE => true ), 00353 self::EXPRESSION => array( self::TYPE_BRACE_CLOSE => true ), 00354 self::EXPRESSION_NO_NL => array( self::TYPE_BRACE_CLOSE => true ), 00355 self::EXPRESSION_OP => array( self::TYPE_BRACE_CLOSE => true ), 00356 self::EXPRESSION_TERNARY_OP => array( self::TYPE_COLON => true ), 00357 self::PAREN_EXPRESSION => array( self::TYPE_PAREN_CLOSE => true ), 00358 self::PAREN_EXPRESSION_OP => array( self::TYPE_PAREN_CLOSE => true ), 00359 self::PROPERTY_EXPRESSION => array( self::TYPE_BRACE_CLOSE => true ), 00360 self::PROPERTY_EXPRESSION_OP => array( self::TYPE_BRACE_CLOSE => true ) 00361 ); 00362 00363 // $semicolon : Rules for when a semicolon insertion is appropriate 00364 $semicolon = array( 00365 self::EXPRESSION_NO_NL => array( 00366 self::TYPE_UN_OP => true, 00367 self::TYPE_INCR_OP => true, 00368 self::TYPE_ADD_OP => true, 00369 self::TYPE_BRACE_OPEN => true, 00370 self::TYPE_PAREN_OPEN => true, 00371 self::TYPE_RETURN => true, 00372 self::TYPE_IF => true, 00373 self::TYPE_DO => true, 00374 self::TYPE_FUNC => true, 00375 self::TYPE_LITERAL => true 00376 ), 00377 self::EXPRESSION_OP => array( 00378 self::TYPE_UN_OP => true, 00379 self::TYPE_INCR_OP => true, 00380 self::TYPE_BRACE_OPEN => true, 00381 self::TYPE_RETURN => true, 00382 self::TYPE_IF => true, 00383 self::TYPE_DO => true, 00384 self::TYPE_FUNC => true, 00385 self::TYPE_LITERAL => true 00386 ) 00387 ); 00388 00389 // Rules for when newlines should be inserted if 00390 // $statementsOnOwnLine is enabled. 00391 // $newlineBefore is checked before switching state, 00392 // $newlineAfter is checked after 00393 $newlineBefore = array( 00394 self::STATEMENT => array( 00395 self::TYPE_BRACE_CLOSE => true, 00396 ), 00397 ); 00398 $newlineAfter = array( 00399 self::STATEMENT => array( 00400 self::TYPE_BRACE_OPEN => true, 00401 self::TYPE_PAREN_CLOSE => true, 00402 self::TYPE_SEMICOLON => true, 00403 ), 00404 ); 00405 00406 // $divStates : Contains all states that can be followed by a division operator 00407 $divStates = array( 00408 self::EXPRESSION_OP => true, 00409 self::EXPRESSION_TERNARY_OP => true, 00410 self::PAREN_EXPRESSION_OP => true, 00411 self::PROPERTY_EXPRESSION_OP => true 00412 ); 00413 00414 // Here's where the minifying takes place: Loop through the input, looking for tokens 00415 // and output them to $out, taking actions to the above defined rules when appropriate. 00416 $out = ''; 00417 $pos = 0; 00418 $length = strlen( $s ); 00419 $lineLength = 0; 00420 $newlineFound = true; 00421 $state = self::STATEMENT; 00422 $stack = array(); 00423 $last = ';'; // Pretend that we have seen a semicolon yet 00424 while( $pos < $length ) { 00425 // First, skip over any whitespace and multiline comments, recording whether we 00426 // found any newline character 00427 $skip = strspn( $s, " \t\n\r\xb\xc", $pos ); 00428 if( !$skip ) { 00429 $ch = $s[$pos]; 00430 if( $ch === '/' && substr( $s, $pos, 2 ) === '/*' ) { 00431 // Multiline comment. Search for the end token or EOT. 00432 $end = strpos( $s, '*/', $pos + 2 ); 00433 $skip = $end === false ? $length - $pos : $end - $pos + 2; 00434 } 00435 } 00436 if( $skip ) { 00437 // The semicolon insertion mechanism needs to know whether there was a newline 00438 // between two tokens, so record it now. 00439 if( !$newlineFound && strcspn( $s, "\r\n", $pos, $skip ) !== $skip ) { 00440 $newlineFound = true; 00441 } 00442 $pos += $skip; 00443 continue; 00444 } 00445 // Handle C++-style comments and html comments, which are treated as single line 00446 // comments by the browser, regardless of whether the end tag is on the same line. 00447 // Handle --> the same way, but only if it's at the beginning of the line 00448 if( ( $ch === '/' && substr( $s, $pos, 2 ) === '//' ) 00449 || ( $ch === '<' && substr( $s, $pos, 4 ) === '<!--' ) 00450 || ( $ch === '-' && $newlineFound && substr( $s, $pos, 3 ) === '-->' ) 00451 ) { 00452 $pos += strcspn( $s, "\r\n", $pos ); 00453 continue; 00454 } 00455 00456 // Find out which kind of token we're handling. $end will point past the end of it. 00457 $end = $pos + 1; 00458 // Handle string literals 00459 if( $ch === "'" || $ch === '"' ) { 00460 // Search to the end of the string literal, skipping over backslash escapes 00461 $search = $ch . '\\'; 00462 do{ 00463 $end += strcspn( $s, $search, $end ) + 2; 00464 } while( $end - 2 < $length && $s[$end - 2] === '\\' ); 00465 $end--; 00466 // We have to distinguish between regexp literals and division operators 00467 // A division operator is only possible in certain states 00468 } elseif( $ch === '/' && !isset( $divStates[$state] ) ) { 00469 // Regexp literal, search to the end, skipping over backslash escapes and 00470 // character classes 00471 for( ; ; ) { 00472 do{ 00473 $end += strcspn( $s, '/[\\', $end ) + 2; 00474 } while( $end - 2 < $length && $s[$end - 2] === '\\' ); 00475 $end--; 00476 if( $end - 1 >= $length || $s[$end - 1] === '/' ) { 00477 break; 00478 } 00479 do{ 00480 $end += strcspn( $s, ']\\', $end ) + 2; 00481 } while( $end - 2 < $length && $s[$end - 2] === '\\' ); 00482 $end--; 00483 }; 00484 // Search past the regexp modifiers (gi) 00485 while( $end < $length && ctype_alpha( $s[$end] ) ) { 00486 $end++; 00487 } 00488 } elseif( 00489 $ch === '0' 00490 && ($pos + 1 < $length) && ($s[$pos + 1] === 'x' || $s[$pos + 1] === 'X' ) 00491 ) { 00492 // Hex numeric literal 00493 $end++; // x or X 00494 $len = strspn( $s, '0123456789ABCDEFabcdef', $end ); 00495 if ( !$len ) { 00496 return self::parseError($s, $pos, 'Expected a hexadecimal number but found ' . substr( $s, $pos, 5 ) . '...' ); 00497 } 00498 $end += $len; 00499 } elseif( 00500 ctype_digit( $ch ) 00501 || ( $ch === '.' && $pos + 1 < $length && ctype_digit( $s[$pos + 1] ) ) 00502 ) { 00503 $end += strspn( $s, '0123456789', $end ); 00504 $decimal = strspn( $s, '.', $end ); 00505 if ($decimal) { 00506 if ( $decimal > 2 ) { 00507 return self::parseError($s, $end, 'The number has too many decimal points' ); 00508 } 00509 $end += strspn( $s, '0123456789', $end + 1 ) + $decimal; 00510 } 00511 $exponent = strspn( $s, 'eE', $end ); 00512 if( $exponent ) { 00513 if ( $exponent > 1 ) { 00514 return self::parseError($s, $end, 'Number with several E' ); 00515 } 00516 $end++; 00517 00518 // + sign is optional; - sign is required. 00519 $end += strspn( $s, '-+', $end ); 00520 $len = strspn( $s, '0123456789', $end ); 00521 if ( !$len ) { 00522 return self::parseError($s, $pos, 'No decimal digits after e, how many zeroes should be added?' ); 00523 } 00524 $end += $len; 00525 } 00526 } elseif( isset( $opChars[$ch] ) ) { 00527 // Punctuation character. Search for the longest matching operator. 00528 while( 00529 $end < $length 00530 && isset( $tokenTypes[substr( $s, $pos, $end - $pos + 1 )] ) 00531 ) { 00532 $end++; 00533 } 00534 } else { 00535 // Identifier or reserved word. Search for the end by excluding whitespace and 00536 // punctuation. 00537 $end += strcspn( $s, " \t\n.;,=<>+-{}()[]?:*/%'\"!&|^~\xb\xc\r", $end ); 00538 } 00539 00540 // Now get the token type from our type array 00541 $token = substr( $s, $pos, $end - $pos ); // so $end - $pos == strlen( $token ) 00542 $type = isset( $tokenTypes[$token] ) ? $tokenTypes[$token] : self::TYPE_LITERAL; 00543 00544 if( $newlineFound && isset( $semicolon[$state][$type] ) ) { 00545 // This token triggers the semicolon insertion mechanism of javascript. While we 00546 // could add the ; token here ourselves, keeping the newline has a few advantages. 00547 $out .= "\n"; 00548 $state = self::STATEMENT; 00549 $lineLength = 0; 00550 } elseif( $maxLineLength > 0 && $lineLength + $end - $pos > $maxLineLength && 00551 !isset( $semicolon[$state][$type] ) && $type !== self::TYPE_INCR_OP ) 00552 { 00553 // This line would get too long if we added $token, so add a newline first. 00554 // Only do this if it won't trigger semicolon insertion and if it won't 00555 // put a postfix increment operator on its own line, which is illegal in js. 00556 $out .= "\n"; 00557 $lineLength = 0; 00558 // Check, whether we have to separate the token from the last one with whitespace 00559 } elseif( !isset( $opChars[$last] ) && !isset( $opChars[$ch] ) ) { 00560 $out .= ' '; 00561 $lineLength++; 00562 // Don't accidentally create ++, -- or // tokens 00563 } elseif( $last === $ch && ( $ch === '+' || $ch === '-' || $ch === '/' ) ) { 00564 $out .= ' '; 00565 $lineLength++; 00566 } 00567 00568 $out .= $token; 00569 $lineLength += $end - $pos; // += strlen( $token ) 00570 $last = $s[$end - 1]; 00571 $pos = $end; 00572 $newlineFound = false; 00573 00574 // Output a newline after the token if required 00575 // This is checked before AND after switching state 00576 $newlineAdded = false; 00577 if ( $statementsOnOwnLine && !$newlineAdded && isset( $newlineBefore[$state][$type] ) ) { 00578 $out .= "\n"; 00579 $lineLength = 0; 00580 $newlineAdded = true; 00581 } 00582 00583 // Now that we have output our token, transition into the new state. 00584 if( isset( $push[$state][$type] ) && count( $stack ) < self::STACK_LIMIT ) { 00585 $stack[] = $push[$state][$type]; 00586 } 00587 if( $stack && isset( $pop[$state][$type] ) ) { 00588 $state = array_pop( $stack ); 00589 } elseif( isset( $goto[$state][$type] ) ) { 00590 $state = $goto[$state][$type]; 00591 } 00592 00593 // Check for newline insertion again 00594 if ( $statementsOnOwnLine && !$newlineAdded && isset( $newlineAfter[$state][$type] ) ) { 00595 $out .= "\n"; 00596 $lineLength = 0; 00597 } 00598 } 00599 return $out; 00600 } 00601 00602 static function parseError($fullJavascript, $position, $errorMsg) { 00603 // TODO: Handle the error: trigger_error, throw exception, return false... 00604 return false; 00605 } 00606 }