MediaWiki
REL1_24
|
00001 <?php 00002 // @codingStandardsIgnoreFile File external to MediaWiki. Ignore coding conventions checks. 00019 class JavaScriptMinifier { 00020 00021 /* Class constants */ 00022 /* Parsing states. 00023 * The state machine is only necessary to decide whether to parse a slash as division 00024 * operator or as regexp literal. 00025 * States are named after the next expected item. We only distinguish states when the 00026 * distinction is relevant for our purpose. 00027 */ 00028 const STATEMENT = 0; 00029 const CONDITION = 1; 00030 const PROPERTY_ASSIGNMENT = 2; 00031 const EXPRESSION = 3; 00032 const EXPRESSION_NO_NL = 4; // only relevant for semicolon insertion 00033 const EXPRESSION_OP = 5; 00034 const EXPRESSION_FUNC = 6; 00035 const EXPRESSION_TERNARY = 7; // used to determine the role of a colon 00036 const EXPRESSION_TERNARY_OP = 8; 00037 const EXPRESSION_TERNARY_FUNC = 9; 00038 const PAREN_EXPRESSION = 10; // expression which is not on the top level 00039 const PAREN_EXPRESSION_OP = 11; 00040 const PAREN_EXPRESSION_FUNC = 12; 00041 const PROPERTY_EXPRESSION = 13; // expression which is within an object literal 00042 const PROPERTY_EXPRESSION_OP = 14; 00043 const PROPERTY_EXPRESSION_FUNC = 15; 00044 00045 /* Token types */ 00046 const TYPE_UN_OP = 1; // unary operators 00047 const TYPE_INCR_OP = 2; // ++ and -- 00048 const TYPE_BIN_OP = 3; // binary operators 00049 const TYPE_ADD_OP = 4; // + and - which can be either unary or binary ops 00050 const TYPE_HOOK = 5; // ? 00051 const TYPE_COLON = 6; // : 00052 const TYPE_COMMA = 7; // , 00053 const TYPE_SEMICOLON = 8; // ; 00054 const TYPE_BRACE_OPEN = 9; // { 00055 const TYPE_BRACE_CLOSE = 10; // } 00056 const TYPE_PAREN_OPEN = 11; // ( and [ 00057 const TYPE_PAREN_CLOSE = 12; // ) and ] 00058 const TYPE_RETURN = 13; // keywords: break, continue, return, throw 00059 const TYPE_IF = 14; // keywords: catch, for, with, switch, while, if 00060 const TYPE_DO = 15; // keywords: case, var, finally, else, do, try 00061 const TYPE_FUNC = 16; // keywords: function 00062 const TYPE_LITERAL = 17; // all literals, identifiers and unrecognised tokens 00063 00064 // Sanity limit to avoid excessive memory usage 00065 const STACK_LIMIT = 1000; 00066 00067 /* Static functions */ 00068 00081 public static function minify( $s, $statementsOnOwnLine = false, $maxLineLength = 1000 ) { 00082 // First we declare a few tables that contain our parsing rules 00083 00084 // $opChars : characters, which can be combined without whitespace in between them 00085 $opChars = array( 00086 '!' => true, 00087 '"' => true, 00088 '%' => true, 00089 '&' => true, 00090 "'" => true, 00091 '(' => true, 00092 ')' => true, 00093 '*' => true, 00094 '+' => true, 00095 ',' => true, 00096 '-' => true, 00097 '.' => true, 00098 '/' => true, 00099 ':' => true, 00100 ';' => true, 00101 '<' => true, 00102 '=' => true, 00103 '>' => true, 00104 '?' => true, 00105 '[' => true, 00106 ']' => true, 00107 '^' => true, 00108 '{' => true, 00109 '|' => true, 00110 '}' => true, 00111 '~' => true 00112 ); 00113 00114 // $tokenTypes : maps keywords and operators to their corresponding token type 00115 $tokenTypes = array( 00116 '!' => self::TYPE_UN_OP, 00117 '~' => self::TYPE_UN_OP, 00118 'delete' => self::TYPE_UN_OP, 00119 'new' => self::TYPE_UN_OP, 00120 'typeof' => self::TYPE_UN_OP, 00121 'void' => self::TYPE_UN_OP, 00122 '++' => self::TYPE_INCR_OP, 00123 '--' => self::TYPE_INCR_OP, 00124 '!=' => self::TYPE_BIN_OP, 00125 '!==' => self::TYPE_BIN_OP, 00126 '%' => self::TYPE_BIN_OP, 00127 '%=' => self::TYPE_BIN_OP, 00128 '&' => self::TYPE_BIN_OP, 00129 '&&' => self::TYPE_BIN_OP, 00130 '&=' => self::TYPE_BIN_OP, 00131 '*' => self::TYPE_BIN_OP, 00132 '*=' => self::TYPE_BIN_OP, 00133 '+=' => self::TYPE_BIN_OP, 00134 '-=' => self::TYPE_BIN_OP, 00135 '.' => self::TYPE_BIN_OP, 00136 '/' => self::TYPE_BIN_OP, 00137 '/=' => self::TYPE_BIN_OP, 00138 '<' => self::TYPE_BIN_OP, 00139 '<<' => self::TYPE_BIN_OP, 00140 '<<=' => self::TYPE_BIN_OP, 00141 '<=' => self::TYPE_BIN_OP, 00142 '=' => self::TYPE_BIN_OP, 00143 '==' => self::TYPE_BIN_OP, 00144 '===' => self::TYPE_BIN_OP, 00145 '>' => self::TYPE_BIN_OP, 00146 '>=' => self::TYPE_BIN_OP, 00147 '>>' => self::TYPE_BIN_OP, 00148 '>>=' => self::TYPE_BIN_OP, 00149 '>>>' => self::TYPE_BIN_OP, 00150 '>>>=' => self::TYPE_BIN_OP, 00151 '^' => self::TYPE_BIN_OP, 00152 '^=' => self::TYPE_BIN_OP, 00153 '|' => self::TYPE_BIN_OP, 00154 '|=' => self::TYPE_BIN_OP, 00155 '||' => self::TYPE_BIN_OP, 00156 'in' => self::TYPE_BIN_OP, 00157 'instanceof' => self::TYPE_BIN_OP, 00158 '+' => self::TYPE_ADD_OP, 00159 '-' => self::TYPE_ADD_OP, 00160 '?' => self::TYPE_HOOK, 00161 ':' => self::TYPE_COLON, 00162 ',' => self::TYPE_COMMA, 00163 ';' => self::TYPE_SEMICOLON, 00164 '{' => self::TYPE_BRACE_OPEN, 00165 '}' => self::TYPE_BRACE_CLOSE, 00166 '(' => self::TYPE_PAREN_OPEN, 00167 '[' => self::TYPE_PAREN_OPEN, 00168 ')' => self::TYPE_PAREN_CLOSE, 00169 ']' => self::TYPE_PAREN_CLOSE, 00170 'break' => self::TYPE_RETURN, 00171 'continue' => self::TYPE_RETURN, 00172 'return' => self::TYPE_RETURN, 00173 'throw' => self::TYPE_RETURN, 00174 'catch' => self::TYPE_IF, 00175 'for' => self::TYPE_IF, 00176 'if' => self::TYPE_IF, 00177 'switch' => self::TYPE_IF, 00178 'while' => self::TYPE_IF, 00179 'with' => self::TYPE_IF, 00180 'case' => self::TYPE_DO, 00181 'do' => self::TYPE_DO, 00182 'else' => self::TYPE_DO, 00183 'finally' => self::TYPE_DO, 00184 'try' => self::TYPE_DO, 00185 'var' => self::TYPE_DO, 00186 'function' => self::TYPE_FUNC 00187 ); 00188 00189 // $goto : This is the main table for our state machine. For every state/token pair 00190 // the following state is defined. When no rule exists for a given pair, 00191 // the state is left unchanged. 00192 $goto = array( 00193 self::STATEMENT => array( 00194 self::TYPE_UN_OP => self::EXPRESSION, 00195 self::TYPE_INCR_OP => self::EXPRESSION, 00196 self::TYPE_ADD_OP => self::EXPRESSION, 00197 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION, 00198 self::TYPE_RETURN => self::EXPRESSION_NO_NL, 00199 self::TYPE_IF => self::CONDITION, 00200 self::TYPE_FUNC => self::CONDITION, 00201 self::TYPE_LITERAL => self::EXPRESSION_OP 00202 ), 00203 self::CONDITION => array( 00204 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION 00205 ), 00206 self::PROPERTY_ASSIGNMENT => array( 00207 self::TYPE_COLON => self::PROPERTY_EXPRESSION, 00208 self::TYPE_BRACE_OPEN => self::STATEMENT 00209 ), 00210 self::EXPRESSION => array( 00211 self::TYPE_SEMICOLON => self::STATEMENT, 00212 self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT, 00213 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION, 00214 self::TYPE_FUNC => self::EXPRESSION_FUNC, 00215 self::TYPE_LITERAL => self::EXPRESSION_OP 00216 ), 00217 self::EXPRESSION_NO_NL => array( 00218 self::TYPE_SEMICOLON => self::STATEMENT, 00219 self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT, 00220 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION, 00221 self::TYPE_FUNC => self::EXPRESSION_FUNC, 00222 self::TYPE_LITERAL => self::EXPRESSION_OP 00223 ), 00224 self::EXPRESSION_OP => array( 00225 self::TYPE_BIN_OP => self::EXPRESSION, 00226 self::TYPE_ADD_OP => self::EXPRESSION, 00227 self::TYPE_HOOK => self::EXPRESSION_TERNARY, 00228 self::TYPE_COLON => self::STATEMENT, 00229 self::TYPE_COMMA => self::EXPRESSION, 00230 self::TYPE_SEMICOLON => self::STATEMENT, 00231 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION 00232 ), 00233 self::EXPRESSION_FUNC => array( 00234 self::TYPE_BRACE_OPEN => self::STATEMENT 00235 ), 00236 self::EXPRESSION_TERNARY => array( 00237 self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT, 00238 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION, 00239 self::TYPE_FUNC => self::EXPRESSION_TERNARY_FUNC, 00240 self::TYPE_LITERAL => self::EXPRESSION_TERNARY_OP 00241 ), 00242 self::EXPRESSION_TERNARY_OP => array( 00243 self::TYPE_BIN_OP => self::EXPRESSION_TERNARY, 00244 self::TYPE_ADD_OP => self::EXPRESSION_TERNARY, 00245 self::TYPE_HOOK => self::EXPRESSION_TERNARY, 00246 self::TYPE_COMMA => self::EXPRESSION_TERNARY, 00247 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION 00248 ), 00249 self::EXPRESSION_TERNARY_FUNC => array( 00250 self::TYPE_BRACE_OPEN => self::STATEMENT 00251 ), 00252 self::PAREN_EXPRESSION => array( 00253 self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT, 00254 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION, 00255 self::TYPE_FUNC => self::PAREN_EXPRESSION_FUNC, 00256 self::TYPE_LITERAL => self::PAREN_EXPRESSION_OP 00257 ), 00258 self::PAREN_EXPRESSION_OP => array( 00259 self::TYPE_BIN_OP => self::PAREN_EXPRESSION, 00260 self::TYPE_ADD_OP => self::PAREN_EXPRESSION, 00261 self::TYPE_HOOK => self::PAREN_EXPRESSION, 00262 self::TYPE_COLON => self::PAREN_EXPRESSION, 00263 self::TYPE_COMMA => self::PAREN_EXPRESSION, 00264 self::TYPE_SEMICOLON => self::PAREN_EXPRESSION, 00265 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION 00266 ), 00267 self::PAREN_EXPRESSION_FUNC => array( 00268 self::TYPE_BRACE_OPEN => self::STATEMENT 00269 ), 00270 self::PROPERTY_EXPRESSION => array( 00271 self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT, 00272 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION, 00273 self::TYPE_FUNC => self::PROPERTY_EXPRESSION_FUNC, 00274 self::TYPE_LITERAL => self::PROPERTY_EXPRESSION_OP 00275 ), 00276 self::PROPERTY_EXPRESSION_OP => array( 00277 self::TYPE_BIN_OP => self::PROPERTY_EXPRESSION, 00278 self::TYPE_ADD_OP => self::PROPERTY_EXPRESSION, 00279 self::TYPE_HOOK => self::PROPERTY_EXPRESSION, 00280 self::TYPE_COMMA => self::PROPERTY_ASSIGNMENT, 00281 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION 00282 ), 00283 self::PROPERTY_EXPRESSION_FUNC => array( 00284 self::TYPE_BRACE_OPEN => self::STATEMENT 00285 ) 00286 ); 00287 00288 // $push : This table contains the rules for when to push a state onto the stack. 00289 // The pushed state is the state to return to when the corresponding 00290 // closing token is found 00291 $push = array( 00292 self::STATEMENT => array( 00293 self::TYPE_BRACE_OPEN => self::STATEMENT, 00294 self::TYPE_PAREN_OPEN => self::EXPRESSION_OP 00295 ), 00296 self::CONDITION => array( 00297 self::TYPE_PAREN_OPEN => self::STATEMENT 00298 ), 00299 self::PROPERTY_ASSIGNMENT => array( 00300 self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT 00301 ), 00302 self::EXPRESSION => array( 00303 self::TYPE_BRACE_OPEN => self::EXPRESSION_OP, 00304 self::TYPE_PAREN_OPEN => self::EXPRESSION_OP 00305 ), 00306 self::EXPRESSION_NO_NL => array( 00307 self::TYPE_BRACE_OPEN => self::EXPRESSION_OP, 00308 self::TYPE_PAREN_OPEN => self::EXPRESSION_OP 00309 ), 00310 self::EXPRESSION_OP => array( 00311 self::TYPE_HOOK => self::EXPRESSION, 00312 self::TYPE_PAREN_OPEN => self::EXPRESSION_OP 00313 ), 00314 self::EXPRESSION_FUNC => array( 00315 self::TYPE_BRACE_OPEN => self::EXPRESSION_OP 00316 ), 00317 self::EXPRESSION_TERNARY => array( 00318 self::TYPE_BRACE_OPEN => self::EXPRESSION_TERNARY_OP, 00319 self::TYPE_PAREN_OPEN => self::EXPRESSION_TERNARY_OP 00320 ), 00321 self::EXPRESSION_TERNARY_OP => array( 00322 self::TYPE_HOOK => self::EXPRESSION_TERNARY, 00323 self::TYPE_PAREN_OPEN => self::EXPRESSION_TERNARY_OP 00324 ), 00325 self::EXPRESSION_TERNARY_FUNC => array( 00326 self::TYPE_BRACE_OPEN => self::EXPRESSION_TERNARY_OP 00327 ), 00328 self::PAREN_EXPRESSION => array( 00329 self::TYPE_BRACE_OPEN => self::PAREN_EXPRESSION_OP, 00330 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION_OP 00331 ), 00332 self::PAREN_EXPRESSION_OP => array( 00333 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION_OP 00334 ), 00335 self::PAREN_EXPRESSION_FUNC => array( 00336 self::TYPE_BRACE_OPEN => self::PAREN_EXPRESSION_OP 00337 ), 00338 self::PROPERTY_EXPRESSION => array( 00339 self::TYPE_BRACE_OPEN => self::PROPERTY_EXPRESSION_OP, 00340 self::TYPE_PAREN_OPEN => self::PROPERTY_EXPRESSION_OP 00341 ), 00342 self::PROPERTY_EXPRESSION_OP => array( 00343 self::TYPE_PAREN_OPEN => self::PROPERTY_EXPRESSION_OP 00344 ), 00345 self::PROPERTY_EXPRESSION_FUNC => array( 00346 self::TYPE_BRACE_OPEN => self::PROPERTY_EXPRESSION_OP 00347 ) 00348 ); 00349 00350 // $pop : Rules for when to pop a state from the stack 00351 $pop = array( 00352 self::STATEMENT => array( self::TYPE_BRACE_CLOSE => true ), 00353 self::PROPERTY_ASSIGNMENT => array( self::TYPE_BRACE_CLOSE => true ), 00354 self::EXPRESSION => array( self::TYPE_BRACE_CLOSE => true ), 00355 self::EXPRESSION_NO_NL => array( self::TYPE_BRACE_CLOSE => true ), 00356 self::EXPRESSION_OP => array( self::TYPE_BRACE_CLOSE => true ), 00357 self::EXPRESSION_TERNARY_OP => array( self::TYPE_COLON => true ), 00358 self::PAREN_EXPRESSION => array( self::TYPE_PAREN_CLOSE => true ), 00359 self::PAREN_EXPRESSION_OP => array( self::TYPE_PAREN_CLOSE => true ), 00360 self::PROPERTY_EXPRESSION => array( self::TYPE_BRACE_CLOSE => true ), 00361 self::PROPERTY_EXPRESSION_OP => array( self::TYPE_BRACE_CLOSE => true ) 00362 ); 00363 00364 // $semicolon : Rules for when a semicolon insertion is appropriate 00365 $semicolon = array( 00366 self::EXPRESSION_NO_NL => array( 00367 self::TYPE_UN_OP => true, 00368 self::TYPE_INCR_OP => true, 00369 self::TYPE_ADD_OP => true, 00370 self::TYPE_BRACE_OPEN => true, 00371 self::TYPE_PAREN_OPEN => true, 00372 self::TYPE_RETURN => true, 00373 self::TYPE_IF => true, 00374 self::TYPE_DO => true, 00375 self::TYPE_FUNC => true, 00376 self::TYPE_LITERAL => true 00377 ), 00378 self::EXPRESSION_OP => array( 00379 self::TYPE_UN_OP => true, 00380 self::TYPE_INCR_OP => true, 00381 self::TYPE_BRACE_OPEN => true, 00382 self::TYPE_RETURN => true, 00383 self::TYPE_IF => true, 00384 self::TYPE_DO => true, 00385 self::TYPE_FUNC => true, 00386 self::TYPE_LITERAL => true 00387 ) 00388 ); 00389 00390 // Rules for when newlines should be inserted if 00391 // $statementsOnOwnLine is enabled. 00392 // $newlineBefore is checked before switching state, 00393 // $newlineAfter is checked after 00394 $newlineBefore = array( 00395 self::STATEMENT => array( 00396 self::TYPE_BRACE_CLOSE => true, 00397 ), 00398 ); 00399 $newlineAfter = array( 00400 self::STATEMENT => array( 00401 self::TYPE_BRACE_OPEN => true, 00402 self::TYPE_PAREN_CLOSE => true, 00403 self::TYPE_SEMICOLON => true, 00404 ), 00405 ); 00406 00407 // $divStates : Contains all states that can be followed by a division operator 00408 $divStates = array( 00409 self::EXPRESSION_OP => true, 00410 self::EXPRESSION_TERNARY_OP => true, 00411 self::PAREN_EXPRESSION_OP => true, 00412 self::PROPERTY_EXPRESSION_OP => true 00413 ); 00414 00415 // Here's where the minifying takes place: Loop through the input, looking for tokens 00416 // and output them to $out, taking actions to the above defined rules when appropriate. 00417 $out = ''; 00418 $pos = 0; 00419 $length = strlen( $s ); 00420 $lineLength = 0; 00421 $newlineFound = true; 00422 $state = self::STATEMENT; 00423 $stack = array(); 00424 $last = ';'; // Pretend that we have seen a semicolon yet 00425 while( $pos < $length ) { 00426 // First, skip over any whitespace and multiline comments, recording whether we 00427 // found any newline character 00428 $skip = strspn( $s, " \t\n\r\xb\xc", $pos ); 00429 if( !$skip ) { 00430 $ch = $s[$pos]; 00431 if( $ch === '/' && substr( $s, $pos, 2 ) === '/*' ) { 00432 // Multiline comment. Search for the end token or EOT. 00433 $end = strpos( $s, '*/', $pos + 2 ); 00434 $skip = $end === false ? $length - $pos : $end - $pos + 2; 00435 } 00436 } 00437 if( $skip ) { 00438 // The semicolon insertion mechanism needs to know whether there was a newline 00439 // between two tokens, so record it now. 00440 if( !$newlineFound && strcspn( $s, "\r\n", $pos, $skip ) !== $skip ) { 00441 $newlineFound = true; 00442 } 00443 $pos += $skip; 00444 continue; 00445 } 00446 // Handle C++-style comments and html comments, which are treated as single line 00447 // comments by the browser, regardless of whether the end tag is on the same line. 00448 // Handle --> the same way, but only if it's at the beginning of the line 00449 if( ( $ch === '/' && substr( $s, $pos, 2 ) === '//' ) 00450 || ( $ch === '<' && substr( $s, $pos, 4 ) === '<!--' ) 00451 || ( $ch === '-' && $newlineFound && substr( $s, $pos, 3 ) === '-->' ) 00452 ) { 00453 $pos += strcspn( $s, "\r\n", $pos ); 00454 continue; 00455 } 00456 00457 // Find out which kind of token we're handling. $end will point past the end of it. 00458 $end = $pos + 1; 00459 // Handle string literals 00460 if( $ch === "'" || $ch === '"' ) { 00461 // Search to the end of the string literal, skipping over backslash escapes 00462 $search = $ch . '\\'; 00463 do{ 00464 $end += strcspn( $s, $search, $end ) + 2; 00465 } while( $end - 2 < $length && $s[$end - 2] === '\\' ); 00466 $end--; 00467 // We have to distinguish between regexp literals and division operators 00468 // A division operator is only possible in certain states 00469 } elseif( $ch === '/' && !isset( $divStates[$state] ) ) { 00470 // Regexp literal, search to the end, skipping over backslash escapes and 00471 // character classes 00472 for( ; ; ) { 00473 do{ 00474 $end += strcspn( $s, '/[\\', $end ) + 2; 00475 } while( $end - 2 < $length && $s[$end - 2] === '\\' ); 00476 $end--; 00477 if( $end - 1 >= $length || $s[$end - 1] === '/' ) { 00478 break; 00479 } 00480 do{ 00481 $end += strcspn( $s, ']\\', $end ) + 2; 00482 } while( $end - 2 < $length && $s[$end - 2] === '\\' ); 00483 $end--; 00484 }; 00485 // Search past the regexp modifiers (gi) 00486 while( $end < $length && ctype_alpha( $s[$end] ) ) { 00487 $end++; 00488 } 00489 } elseif( 00490 $ch === '0' 00491 && ($pos + 1 < $length) && ($s[$pos + 1] === 'x' || $s[$pos + 1] === 'X' ) 00492 ) { 00493 // Hex numeric literal 00494 $end++; // x or X 00495 $len = strspn( $s, '0123456789ABCDEFabcdef', $end ); 00496 if ( !$len ) { 00497 return self::parseError($s, $pos, 'Expected a hexadecimal number but found ' . substr( $s, $pos, 5 ) . '...' ); 00498 } 00499 $end += $len; 00500 } elseif( 00501 ctype_digit( $ch ) 00502 || ( $ch === '.' && $pos + 1 < $length && ctype_digit( $s[$pos + 1] ) ) 00503 ) { 00504 $end += strspn( $s, '0123456789', $end ); 00505 $decimal = strspn( $s, '.', $end ); 00506 if ($decimal) { 00507 if ( $decimal > 2 ) { 00508 return self::parseError($s, $end, 'The number has too many decimal points' ); 00509 } 00510 $end += strspn( $s, '0123456789', $end + 1 ) + $decimal; 00511 } 00512 $exponent = strspn( $s, 'eE', $end ); 00513 if( $exponent ) { 00514 if ( $exponent > 1 ) { 00515 return self::parseError($s, $end, 'Number with several E' ); 00516 } 00517 $end++; 00518 00519 // + sign is optional; - sign is required. 00520 $end += strspn( $s, '-+', $end ); 00521 $len = strspn( $s, '0123456789', $end ); 00522 if ( !$len ) { 00523 return self::parseError($s, $pos, 'No decimal digits after e, how many zeroes should be added?' ); 00524 } 00525 $end += $len; 00526 } 00527 } elseif( isset( $opChars[$ch] ) ) { 00528 // Punctuation character. Search for the longest matching operator. 00529 while( 00530 $end < $length 00531 && isset( $tokenTypes[substr( $s, $pos, $end - $pos + 1 )] ) 00532 ) { 00533 $end++; 00534 } 00535 } else { 00536 // Identifier or reserved word. Search for the end by excluding whitespace and 00537 // punctuation. 00538 $end += strcspn( $s, " \t\n.;,=<>+-{}()[]?:*/%'\"!&|^~\xb\xc\r", $end ); 00539 } 00540 00541 // Now get the token type from our type array 00542 $token = substr( $s, $pos, $end - $pos ); // so $end - $pos == strlen( $token ) 00543 $type = isset( $tokenTypes[$token] ) ? $tokenTypes[$token] : self::TYPE_LITERAL; 00544 00545 if( $newlineFound && isset( $semicolon[$state][$type] ) ) { 00546 // This token triggers the semicolon insertion mechanism of javascript. While we 00547 // could add the ; token here ourselves, keeping the newline has a few advantages. 00548 $out .= "\n"; 00549 $state = self::STATEMENT; 00550 $lineLength = 0; 00551 } elseif( $maxLineLength > 0 && $lineLength + $end - $pos > $maxLineLength && 00552 !isset( $semicolon[$state][$type] ) && $type !== self::TYPE_INCR_OP ) 00553 { 00554 // This line would get too long if we added $token, so add a newline first. 00555 // Only do this if it won't trigger semicolon insertion and if it won't 00556 // put a postfix increment operator on its own line, which is illegal in js. 00557 $out .= "\n"; 00558 $lineLength = 0; 00559 // Check, whether we have to separate the token from the last one with whitespace 00560 } elseif( !isset( $opChars[$last] ) && !isset( $opChars[$ch] ) ) { 00561 $out .= ' '; 00562 $lineLength++; 00563 // Don't accidentally create ++, -- or // tokens 00564 } elseif( $last === $ch && ( $ch === '+' || $ch === '-' || $ch === '/' ) ) { 00565 $out .= ' '; 00566 $lineLength++; 00567 } 00568 00569 $out .= $token; 00570 $lineLength += $end - $pos; // += strlen( $token ) 00571 $last = $s[$end - 1]; 00572 $pos = $end; 00573 $newlineFound = false; 00574 00575 // Output a newline after the token if required 00576 // This is checked before AND after switching state 00577 $newlineAdded = false; 00578 if ( $statementsOnOwnLine && !$newlineAdded && isset( $newlineBefore[$state][$type] ) ) { 00579 $out .= "\n"; 00580 $lineLength = 0; 00581 $newlineAdded = true; 00582 } 00583 00584 // Now that we have output our token, transition into the new state. 00585 if( isset( $push[$state][$type] ) && count( $stack ) < self::STACK_LIMIT ) { 00586 $stack[] = $push[$state][$type]; 00587 } 00588 if( $stack && isset( $pop[$state][$type] ) ) { 00589 $state = array_pop( $stack ); 00590 } elseif( isset( $goto[$state][$type] ) ) { 00591 $state = $goto[$state][$type]; 00592 } 00593 00594 // Check for newline insertion again 00595 if ( $statementsOnOwnLine && !$newlineAdded && isset( $newlineAfter[$state][$type] ) ) { 00596 $out .= "\n"; 00597 $lineLength = 0; 00598 } 00599 } 00600 return $out; 00601 } 00602 00603 static function parseError($fullJavascript, $position, $errorMsg) { 00604 // TODO: Handle the error: trigger_error, throw exception, return false... 00605 return false; 00606 } 00607 }