MediaWiki
REL1_19
|
00001 <?php 00016 class JavaScriptMinifier { 00017 00018 /* Class constants */ 00019 /* Parsing states. 00020 * The state machine is only necessary to decide whether to parse a slash as division 00021 * operator or as regexp literal. 00022 * States are named after the next expected item. We only distinguish states when the 00023 * distinction is relevant for our purpose. 00024 */ 00025 const STATEMENT = 0; 00026 const CONDITION = 1; 00027 const PROPERTY_ASSIGNMENT = 2; 00028 const EXPRESSION = 3; 00029 const EXPRESSION_NO_NL = 4; // only relevant for semicolon insertion 00030 const EXPRESSION_OP = 5; 00031 const EXPRESSION_FUNC = 6; 00032 const EXPRESSION_TERNARY = 7; // used to determine the role of a colon 00033 const EXPRESSION_TERNARY_OP = 8; 00034 const EXPRESSION_TERNARY_FUNC = 9; 00035 const PAREN_EXPRESSION = 10; // expression which is not on the top level 00036 const PAREN_EXPRESSION_OP = 11; 00037 const PAREN_EXPRESSION_FUNC = 12; 00038 const PROPERTY_EXPRESSION = 13; // expression which is within an object literal 00039 const PROPERTY_EXPRESSION_OP = 14; 00040 const PROPERTY_EXPRESSION_FUNC = 15; 00041 00042 /* Token types */ 00043 const TYPE_UN_OP = 1; // unary operators 00044 const TYPE_INCR_OP = 2; // ++ and -- 00045 const TYPE_BIN_OP = 3; // binary operators 00046 const TYPE_ADD_OP = 4; // + and - which can be either unary or binary ops 00047 const TYPE_HOOK = 5; // ? 00048 const TYPE_COLON = 6; // : 00049 const TYPE_COMMA = 7; // , 00050 const TYPE_SEMICOLON = 8; // ; 00051 const TYPE_BRACE_OPEN = 9; // { 00052 const TYPE_BRACE_CLOSE = 10; // } 00053 const TYPE_PAREN_OPEN = 11; // ( and [ 00054 const TYPE_PAREN_CLOSE = 12; // ) and ] 00055 const TYPE_RETURN = 13; // keywords: break, continue, return, throw 00056 const TYPE_IF = 14; // keywords: catch, for, with, switch, while, if 00057 const TYPE_DO = 15; // keywords: case, var, finally, else, do, try 00058 const TYPE_FUNC = 16; // keywords: function 00059 const TYPE_LITERAL = 17; // all literals, identifiers and unrecognised tokens 00060 00061 // Sanity limit to avoid excessive memory usage 00062 const STACK_LIMIT = 1000; 00063 00064 /* Static functions */ 00065 00078 public static function minify( $s, $statementsOnOwnLine = false, $maxLineLength = 1000 ) { 00079 // First we declare a few tables that contain our parsing rules 00080 00081 // $opChars : characters, which can be combined without whitespace in between them 00082 $opChars = array( 00083 '!' => true, 00084 '"' => true, 00085 '%' => true, 00086 '&' => true, 00087 "'" => true, 00088 '(' => true, 00089 ')' => true, 00090 '*' => true, 00091 '+' => true, 00092 ',' => true, 00093 '-' => true, 00094 '.' => true, 00095 '/' => true, 00096 ':' => true, 00097 ';' => true, 00098 '<' => true, 00099 '=' => true, 00100 '>' => true, 00101 '?' => true, 00102 '[' => true, 00103 ']' => true, 00104 '^' => true, 00105 '{' => true, 00106 '|' => true, 00107 '}' => true, 00108 '~' => true 00109 ); 00110 00111 // $tokenTypes : maps keywords and operators to their corresponding token type 00112 $tokenTypes = array( 00113 '!' => self::TYPE_UN_OP, 00114 '~' => self::TYPE_UN_OP, 00115 'delete' => self::TYPE_UN_OP, 00116 'new' => self::TYPE_UN_OP, 00117 'typeof' => self::TYPE_UN_OP, 00118 'void' => self::TYPE_UN_OP, 00119 '++' => self::TYPE_INCR_OP, 00120 '--' => self::TYPE_INCR_OP, 00121 '!=' => self::TYPE_BIN_OP, 00122 '!==' => self::TYPE_BIN_OP, 00123 '%' => self::TYPE_BIN_OP, 00124 '%=' => self::TYPE_BIN_OP, 00125 '&' => self::TYPE_BIN_OP, 00126 '&&' => self::TYPE_BIN_OP, 00127 '&=' => self::TYPE_BIN_OP, 00128 '*' => self::TYPE_BIN_OP, 00129 '*=' => self::TYPE_BIN_OP, 00130 '+=' => self::TYPE_BIN_OP, 00131 '-=' => self::TYPE_BIN_OP, 00132 '.' => self::TYPE_BIN_OP, 00133 '/' => self::TYPE_BIN_OP, 00134 '/=' => self::TYPE_BIN_OP, 00135 '<' => self::TYPE_BIN_OP, 00136 '<<' => self::TYPE_BIN_OP, 00137 '<<=' => self::TYPE_BIN_OP, 00138 '<=' => self::TYPE_BIN_OP, 00139 '=' => self::TYPE_BIN_OP, 00140 '==' => self::TYPE_BIN_OP, 00141 '===' => self::TYPE_BIN_OP, 00142 '>' => self::TYPE_BIN_OP, 00143 '>=' => self::TYPE_BIN_OP, 00144 '>>' => self::TYPE_BIN_OP, 00145 '>>=' => self::TYPE_BIN_OP, 00146 '>>>' => self::TYPE_BIN_OP, 00147 '>>>=' => self::TYPE_BIN_OP, 00148 '^' => self::TYPE_BIN_OP, 00149 '^=' => self::TYPE_BIN_OP, 00150 '|' => self::TYPE_BIN_OP, 00151 '|=' => self::TYPE_BIN_OP, 00152 '||' => self::TYPE_BIN_OP, 00153 'in' => self::TYPE_BIN_OP, 00154 'instanceof' => self::TYPE_BIN_OP, 00155 '+' => self::TYPE_ADD_OP, 00156 '-' => self::TYPE_ADD_OP, 00157 '?' => self::TYPE_HOOK, 00158 ':' => self::TYPE_COLON, 00159 ',' => self::TYPE_COMMA, 00160 ';' => self::TYPE_SEMICOLON, 00161 '{' => self::TYPE_BRACE_OPEN, 00162 '}' => self::TYPE_BRACE_CLOSE, 00163 '(' => self::TYPE_PAREN_OPEN, 00164 '[' => self::TYPE_PAREN_OPEN, 00165 ')' => self::TYPE_PAREN_CLOSE, 00166 ']' => self::TYPE_PAREN_CLOSE, 00167 'break' => self::TYPE_RETURN, 00168 'continue' => self::TYPE_RETURN, 00169 'return' => self::TYPE_RETURN, 00170 'throw' => self::TYPE_RETURN, 00171 'catch' => self::TYPE_IF, 00172 'for' => self::TYPE_IF, 00173 'if' => self::TYPE_IF, 00174 'switch' => self::TYPE_IF, 00175 'while' => self::TYPE_IF, 00176 'with' => self::TYPE_IF, 00177 'case' => self::TYPE_DO, 00178 'do' => self::TYPE_DO, 00179 'else' => self::TYPE_DO, 00180 'finally' => self::TYPE_DO, 00181 'try' => self::TYPE_DO, 00182 'var' => self::TYPE_DO, 00183 'function' => self::TYPE_FUNC 00184 ); 00185 00186 // $goto : This is the main table for our state machine. For every state/token pair 00187 // the following state is defined. When no rule exists for a given pair, 00188 // the state is left unchanged. 00189 $goto = array( 00190 self::STATEMENT => array( 00191 self::TYPE_UN_OP => self::EXPRESSION, 00192 self::TYPE_INCR_OP => self::EXPRESSION, 00193 self::TYPE_ADD_OP => self::EXPRESSION, 00194 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION, 00195 self::TYPE_RETURN => self::EXPRESSION_NO_NL, 00196 self::TYPE_IF => self::CONDITION, 00197 self::TYPE_FUNC => self::CONDITION, 00198 self::TYPE_LITERAL => self::EXPRESSION_OP 00199 ), 00200 self::CONDITION => array( 00201 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION 00202 ), 00203 self::PROPERTY_ASSIGNMENT => array( 00204 self::TYPE_COLON => self::PROPERTY_EXPRESSION, 00205 self::TYPE_BRACE_OPEN => self::STATEMENT 00206 ), 00207 self::EXPRESSION => array( 00208 self::TYPE_SEMICOLON => self::STATEMENT, 00209 self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT, 00210 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION, 00211 self::TYPE_FUNC => self::EXPRESSION_FUNC, 00212 self::TYPE_LITERAL => self::EXPRESSION_OP 00213 ), 00214 self::EXPRESSION_NO_NL => array( 00215 self::TYPE_SEMICOLON => self::STATEMENT, 00216 self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT, 00217 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION, 00218 self::TYPE_FUNC => self::EXPRESSION_FUNC, 00219 self::TYPE_LITERAL => self::EXPRESSION_OP 00220 ), 00221 self::EXPRESSION_OP => array( 00222 self::TYPE_BIN_OP => self::EXPRESSION, 00223 self::TYPE_ADD_OP => self::EXPRESSION, 00224 self::TYPE_HOOK => self::EXPRESSION_TERNARY, 00225 self::TYPE_COLON => self::STATEMENT, 00226 self::TYPE_COMMA => self::EXPRESSION, 00227 self::TYPE_SEMICOLON => self::STATEMENT, 00228 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION 00229 ), 00230 self::EXPRESSION_FUNC => array( 00231 self::TYPE_BRACE_OPEN => self::STATEMENT 00232 ), 00233 self::EXPRESSION_TERNARY => array( 00234 self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT, 00235 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION, 00236 self::TYPE_FUNC => self::EXPRESSION_TERNARY_FUNC, 00237 self::TYPE_LITERAL => self::EXPRESSION_TERNARY_OP 00238 ), 00239 self::EXPRESSION_TERNARY_OP => array( 00240 self::TYPE_BIN_OP => self::EXPRESSION_TERNARY, 00241 self::TYPE_ADD_OP => self::EXPRESSION_TERNARY, 00242 self::TYPE_HOOK => self::EXPRESSION_TERNARY, 00243 self::TYPE_COMMA => self::EXPRESSION_TERNARY, 00244 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION 00245 ), 00246 self::EXPRESSION_TERNARY_FUNC => array( 00247 self::TYPE_BRACE_OPEN => self::STATEMENT 00248 ), 00249 self::PAREN_EXPRESSION => array( 00250 self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT, 00251 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION, 00252 self::TYPE_FUNC => self::PAREN_EXPRESSION_FUNC, 00253 self::TYPE_LITERAL => self::PAREN_EXPRESSION_OP 00254 ), 00255 self::PAREN_EXPRESSION_OP => array( 00256 self::TYPE_BIN_OP => self::PAREN_EXPRESSION, 00257 self::TYPE_ADD_OP => self::PAREN_EXPRESSION, 00258 self::TYPE_HOOK => self::PAREN_EXPRESSION, 00259 self::TYPE_COLON => self::PAREN_EXPRESSION, 00260 self::TYPE_COMMA => self::PAREN_EXPRESSION, 00261 self::TYPE_SEMICOLON => self::PAREN_EXPRESSION, 00262 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION 00263 ), 00264 self::PAREN_EXPRESSION_FUNC => array( 00265 self::TYPE_BRACE_OPEN => self::STATEMENT 00266 ), 00267 self::PROPERTY_EXPRESSION => array( 00268 self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT, 00269 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION, 00270 self::TYPE_FUNC => self::PROPERTY_EXPRESSION_FUNC, 00271 self::TYPE_LITERAL => self::PROPERTY_EXPRESSION_OP 00272 ), 00273 self::PROPERTY_EXPRESSION_OP => array( 00274 self::TYPE_BIN_OP => self::PROPERTY_EXPRESSION, 00275 self::TYPE_ADD_OP => self::PROPERTY_EXPRESSION, 00276 self::TYPE_HOOK => self::PROPERTY_EXPRESSION, 00277 self::TYPE_COMMA => self::PROPERTY_ASSIGNMENT, 00278 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION 00279 ), 00280 self::PROPERTY_EXPRESSION_FUNC => array( 00281 self::TYPE_BRACE_OPEN => self::STATEMENT 00282 ) 00283 ); 00284 00285 // $push : This table contains the rules for when to push a state onto the stack. 00286 // The pushed state is the state to return to when the corresponding 00287 // closing token is found 00288 $push = array( 00289 self::STATEMENT => array( 00290 self::TYPE_BRACE_OPEN => self::STATEMENT, 00291 self::TYPE_PAREN_OPEN => self::EXPRESSION_OP 00292 ), 00293 self::CONDITION => array( 00294 self::TYPE_PAREN_OPEN => self::STATEMENT 00295 ), 00296 self::PROPERTY_ASSIGNMENT => array( 00297 self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT 00298 ), 00299 self::EXPRESSION => array( 00300 self::TYPE_BRACE_OPEN => self::EXPRESSION_OP, 00301 self::TYPE_PAREN_OPEN => self::EXPRESSION_OP 00302 ), 00303 self::EXPRESSION_NO_NL => array( 00304 self::TYPE_BRACE_OPEN => self::EXPRESSION_OP, 00305 self::TYPE_PAREN_OPEN => self::EXPRESSION_OP 00306 ), 00307 self::EXPRESSION_OP => array( 00308 self::TYPE_HOOK => self::EXPRESSION, 00309 self::TYPE_PAREN_OPEN => self::EXPRESSION_OP 00310 ), 00311 self::EXPRESSION_FUNC => array( 00312 self::TYPE_BRACE_OPEN => self::EXPRESSION_OP 00313 ), 00314 self::EXPRESSION_TERNARY => array( 00315 self::TYPE_BRACE_OPEN => self::EXPRESSION_TERNARY_OP, 00316 self::TYPE_PAREN_OPEN => self::EXPRESSION_TERNARY_OP 00317 ), 00318 self::EXPRESSION_TERNARY_OP => array( 00319 self::TYPE_HOOK => self::EXPRESSION_TERNARY, 00320 self::TYPE_PAREN_OPEN => self::EXPRESSION_TERNARY_OP 00321 ), 00322 self::EXPRESSION_TERNARY_FUNC => array( 00323 self::TYPE_BRACE_OPEN => self::EXPRESSION_TERNARY_OP 00324 ), 00325 self::PAREN_EXPRESSION => array( 00326 self::TYPE_BRACE_OPEN => self::PAREN_EXPRESSION_OP, 00327 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION_OP 00328 ), 00329 self::PAREN_EXPRESSION_OP => array( 00330 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION_OP 00331 ), 00332 self::PAREN_EXPRESSION_FUNC => array( 00333 self::TYPE_BRACE_OPEN => self::PAREN_EXPRESSION_OP 00334 ), 00335 self::PROPERTY_EXPRESSION => array( 00336 self::TYPE_BRACE_OPEN => self::PROPERTY_EXPRESSION_OP, 00337 self::TYPE_PAREN_OPEN => self::PROPERTY_EXPRESSION_OP 00338 ), 00339 self::PROPERTY_EXPRESSION_OP => array( 00340 self::TYPE_PAREN_OPEN => self::PROPERTY_EXPRESSION_OP 00341 ), 00342 self::PROPERTY_EXPRESSION_FUNC => array( 00343 self::TYPE_BRACE_OPEN => self::PROPERTY_EXPRESSION_OP 00344 ) 00345 ); 00346 00347 // $pop : Rules for when to pop a state from the stack 00348 $pop = array( 00349 self::STATEMENT => array( self::TYPE_BRACE_CLOSE => true ), 00350 self::PROPERTY_ASSIGNMENT => array( self::TYPE_BRACE_CLOSE => true ), 00351 self::EXPRESSION => array( self::TYPE_BRACE_CLOSE => true ), 00352 self::EXPRESSION_NO_NL => array( self::TYPE_BRACE_CLOSE => true ), 00353 self::EXPRESSION_OP => array( self::TYPE_BRACE_CLOSE => true ), 00354 self::EXPRESSION_TERNARY_OP => array( self::TYPE_COLON => true ), 00355 self::PAREN_EXPRESSION => array( self::TYPE_PAREN_CLOSE => true ), 00356 self::PAREN_EXPRESSION_OP => array( self::TYPE_PAREN_CLOSE => true ), 00357 self::PROPERTY_EXPRESSION => array( self::TYPE_BRACE_CLOSE => true ), 00358 self::PROPERTY_EXPRESSION_OP => array( self::TYPE_BRACE_CLOSE => true ) 00359 ); 00360 00361 // $semicolon : Rules for when a semicolon insertion is appropriate 00362 $semicolon = array( 00363 self::EXPRESSION_NO_NL => array( 00364 self::TYPE_UN_OP => true, 00365 self::TYPE_INCR_OP => true, 00366 self::TYPE_ADD_OP => true, 00367 self::TYPE_BRACE_OPEN => true, 00368 self::TYPE_PAREN_OPEN => true, 00369 self::TYPE_RETURN => true, 00370 self::TYPE_IF => true, 00371 self::TYPE_DO => true, 00372 self::TYPE_FUNC => true, 00373 self::TYPE_LITERAL => true 00374 ), 00375 self::EXPRESSION_OP => array( 00376 self::TYPE_UN_OP => true, 00377 self::TYPE_INCR_OP => true, 00378 self::TYPE_BRACE_OPEN => true, 00379 self::TYPE_RETURN => true, 00380 self::TYPE_IF => true, 00381 self::TYPE_DO => true, 00382 self::TYPE_FUNC => true, 00383 self::TYPE_LITERAL => true 00384 ) 00385 ); 00386 00387 // Rules for when newlines should be inserted if 00388 // $statementsOnOwnLine is enabled. 00389 // $newlineBefore is checked before switching state, 00390 // $newlineAfter is checked after 00391 $newlineBefore = array( 00392 self::STATEMENT => array( 00393 self::TYPE_BRACE_CLOSE => true, 00394 ), 00395 ); 00396 $newlineAfter = array( 00397 self::STATEMENT => array( 00398 self::TYPE_BRACE_OPEN => true, 00399 self::TYPE_PAREN_CLOSE => true, 00400 self::TYPE_SEMICOLON => true, 00401 ), 00402 ); 00403 00404 // $divStates : Contains all states that can be followed by a division operator 00405 $divStates = array( 00406 self::EXPRESSION_OP => true, 00407 self::EXPRESSION_TERNARY_OP => true, 00408 self::PAREN_EXPRESSION_OP => true, 00409 self::PROPERTY_EXPRESSION_OP => true 00410 ); 00411 00412 // Here's where the minifying takes place: Loop through the input, looking for tokens 00413 // and output them to $out, taking actions to the above defined rules when appropriate. 00414 $out = ''; 00415 $pos = 0; 00416 $length = strlen( $s ); 00417 $lineLength = 0; 00418 $newlineFound = true; 00419 $state = self::STATEMENT; 00420 $stack = array(); 00421 $last = ';'; // Pretend that we have seen a semicolon yet 00422 while( $pos < $length ) { 00423 // First, skip over any whitespace and multiline comments, recording whether we 00424 // found any newline character 00425 $skip = strspn( $s, " \t\n\r\xb\xc", $pos ); 00426 if( !$skip ) { 00427 $ch = $s[$pos]; 00428 if( $ch === '/' && substr( $s, $pos, 2 ) === '/*' ) { 00429 // Multiline comment. Search for the end token or EOT. 00430 $end = strpos( $s, '*/', $pos + 2 ); 00431 $skip = $end === false ? $length - $pos : $end - $pos + 2; 00432 } 00433 } 00434 if( $skip ) { 00435 // The semicolon insertion mechanism needs to know whether there was a newline 00436 // between two tokens, so record it now. 00437 if( !$newlineFound && strcspn( $s, "\r\n", $pos, $skip ) !== $skip ) { 00438 $newlineFound = true; 00439 } 00440 $pos += $skip; 00441 continue; 00442 } 00443 // Handle C++-style comments and html comments, which are treated as single line 00444 // comments by the browser, regardless of whether the end tag is on the same line. 00445 // Handle --> the same way, but only if it's at the beginning of the line 00446 if( ( $ch === '/' && substr( $s, $pos, 2 ) === '//' ) 00447 || ( $ch === '<' && substr( $s, $pos, 4 ) === '<!--' ) 00448 || ( $ch === '-' && $newlineFound && substr( $s, $pos, 3 ) === '-->' ) 00449 ) { 00450 $pos += strcspn( $s, "\r\n", $pos ); 00451 continue; 00452 } 00453 00454 // Find out which kind of token we're handling. $end will point past the end of it. 00455 $end = $pos + 1; 00456 // Handle string literals 00457 if( $ch === "'" || $ch === '"' ) { 00458 // Search to the end of the string literal, skipping over backslash escapes 00459 $search = $ch . '\\'; 00460 do{ 00461 $end += strcspn( $s, $search, $end ) + 2; 00462 } while( $end - 2 < $length && $s[$end - 2] === '\\' ); 00463 $end--; 00464 // We have to distinguish between regexp literals and division operators 00465 // A division operator is only possible in certain states 00466 } elseif( $ch === '/' && !isset( $divStates[$state] ) ) { 00467 // Regexp literal, search to the end, skipping over backslash escapes and 00468 // character classes 00469 for( ; ; ) { 00470 do{ 00471 $end += strcspn( $s, '/[\\', $end ) + 2; 00472 } while( $end - 2 < $length && $s[$end - 2] === '\\' ); 00473 $end--; 00474 if( $end - 1 >= $length || $s[$end - 1] === '/' ) { 00475 break; 00476 } 00477 do{ 00478 $end += strcspn( $s, ']\\', $end ) + 2; 00479 } while( $end - 2 < $length && $s[$end - 2] === '\\' ); 00480 $end--; 00481 }; 00482 // Search past the regexp modifiers (gi) 00483 while( $end < $length && ctype_alpha( $s[$end] ) ) { 00484 $end++; 00485 } 00486 } elseif( 00487 $ch === '0' 00488 && ($pos + 1 < $length) && ($s[$pos + 1] === 'x' || $s[$pos + 1] === 'X' ) 00489 ) { 00490 // Hex numeric literal 00491 $end++; // x or X 00492 $len = strspn( $s, '0123456789ABCDEFabcdef', $end ); 00493 if ( !$len ) { 00494 return self::parseError($s, $pos, 'Expected a hexadecimal number but found ' . substr( $s, $pos, 5 ) . '...' ); 00495 } 00496 $end += $len; 00497 } elseif( 00498 ctype_digit( $ch ) 00499 || ( $ch === '.' && $pos + 1 < $length && ctype_digit( $s[$pos + 1] ) ) 00500 ) { 00501 $end += strspn( $s, '0123456789', $end ); 00502 $decimal = strspn( $s, '.', $end ); 00503 if ($decimal) { 00504 if ( $decimal > 2 ) { 00505 return self::parseError($s, $end, 'The number has too many decimal points' ); 00506 } 00507 $end += strspn( $s, '0123456789', $end + 1 ) + $decimal; 00508 } 00509 $exponent = strspn( $s, 'eE', $end ); 00510 if( $exponent ) { 00511 if ( $exponent > 1 ) { 00512 return self::parseError($s, $end, 'Number with several E' ); 00513 } 00514 $end++; 00515 00516 // + sign is optional; - sign is required. 00517 $end += strspn( $s, '-+', $end ); 00518 $len = strspn( $s, '0123456789', $end ); 00519 if ( !$len ) { 00520 return self::parseError($s, $pos, 'No decimal digits after e, how many zeroes should be added?' ); 00521 } 00522 $end += $len; 00523 } 00524 } elseif( isset( $opChars[$ch] ) ) { 00525 // Punctuation character. Search for the longest matching operator. 00526 while( 00527 $end < $length 00528 && isset( $tokenTypes[substr( $s, $pos, $end - $pos + 1 )] ) 00529 ) { 00530 $end++; 00531 } 00532 } else { 00533 // Identifier or reserved word. Search for the end by excluding whitespace and 00534 // punctuation. 00535 $end += strcspn( $s, " \t\n.;,=<>+-{}()[]?:*/%'\"!&|^~\xb\xc\r", $end ); 00536 } 00537 00538 // Now get the token type from our type array 00539 $token = substr( $s, $pos, $end - $pos ); // so $end - $pos == strlen( $token ) 00540 $type = isset( $tokenTypes[$token] ) ? $tokenTypes[$token] : self::TYPE_LITERAL; 00541 00542 if( $newlineFound && isset( $semicolon[$state][$type] ) ) { 00543 // This token triggers the semicolon insertion mechanism of javascript. While we 00544 // could add the ; token here ourselves, keeping the newline has a few advantages. 00545 $out .= "\n"; 00546 $state = self::STATEMENT; 00547 $lineLength = 0; 00548 } elseif( $maxLineLength > 0 && $lineLength + $end - $pos > $maxLineLength && 00549 !isset( $semicolon[$state][$type] ) && $type !== self::TYPE_INCR_OP ) 00550 { 00551 // This line would get too long if we added $token, so add a newline first. 00552 // Only do this if it won't trigger semicolon insertion and if it won't 00553 // put a postfix increment operator on its own line, which is illegal in js. 00554 $out .= "\n"; 00555 $lineLength = 0; 00556 // Check, whether we have to separate the token from the last one with whitespace 00557 } elseif( !isset( $opChars[$last] ) && !isset( $opChars[$ch] ) ) { 00558 $out .= ' '; 00559 $lineLength++; 00560 // Don't accidentally create ++, -- or // tokens 00561 } elseif( $last === $ch && ( $ch === '+' || $ch === '-' || $ch === '/' ) ) { 00562 $out .= ' '; 00563 $lineLength++; 00564 } 00565 00566 $out .= $token; 00567 $lineLength += $end - $pos; // += strlen( $token ) 00568 $last = $s[$end - 1]; 00569 $pos = $end; 00570 $newlineFound = false; 00571 00572 // Output a newline after the token if required 00573 // This is checked before AND after switching state 00574 $newlineAdded = false; 00575 if ( $statementsOnOwnLine && !$newlineAdded && isset( $newlineBefore[$state][$type] ) ) { 00576 $out .= "\n"; 00577 $lineLength = 0; 00578 $newlineAdded = true; 00579 } 00580 00581 // Now that we have output our token, transition into the new state. 00582 if( isset( $push[$state][$type] ) && count( $stack ) < self::STACK_LIMIT ) { 00583 $stack[] = $push[$state][$type]; 00584 } 00585 if( $stack && isset( $pop[$state][$type] ) ) { 00586 $state = array_pop( $stack ); 00587 } elseif( isset( $goto[$state][$type] ) ) { 00588 $state = $goto[$state][$type]; 00589 } 00590 00591 // Check for newline insertion again 00592 if ( $statementsOnOwnLine && !$newlineAdded && isset( $newlineAfter[$state][$type] ) ) { 00593 $out .= "\n"; 00594 $lineLength = 0; 00595 } 00596 } 00597 return $out; 00598 } 00599 00600 static function parseError($fullJavascript, $position, $errorMsg) { 00601 // TODO: Handle the error: trigger_error, throw exception, return false... 00602 return false; 00603 } 00604 }