MediaWiki
REL1_24
|
00001 <?php 00026 class FormatJson { 00034 const UTF8_OK = 1; 00035 00046 const XMLMETA_OK = 2; 00047 00055 const ALL_OK = 3; 00056 00064 const FORCE_ASSOC = 0x100; 00065 00071 const TRY_FIXING = 0x200; 00072 00082 const WS_CLEANUP_REGEX = '/(?<=[\[{])\n\s*+(?=[\]}])/'; 00083 00090 private static $badChars = array( 00091 "\xe2\x80\xa8", // U+2028 LINE SEPARATOR 00092 "\xe2\x80\xa9", // U+2029 PARAGRAPH SEPARATOR 00093 ); 00094 00098 private static $badCharsEscaped = array( 00099 '\u2028', // U+2028 LINE SEPARATOR 00100 '\u2029', // U+2029 PARAGRAPH SEPARATOR 00101 ); 00102 00120 public static function encode( $value, $pretty = false, $escaping = 0 ) { 00121 if ( !is_string( $pretty ) ) { 00122 $pretty = $pretty ? ' ' : false; 00123 } 00124 00125 if ( defined( 'JSON_UNESCAPED_UNICODE' ) ) { 00126 return self::encode54( $value, $pretty, $escaping ); 00127 } 00128 00129 return self::encode53( $value, $pretty, $escaping ); 00130 } 00131 00144 public static function decode( $value, $assoc = false ) { 00145 return json_decode( $value, $assoc ); 00146 } 00147 00156 public static function parse( $value, $options = 0 ) { 00157 $assoc = ( $options & self::FORCE_ASSOC ) !== 0; 00158 $result = json_decode( $value, $assoc ); 00159 $code = json_last_error(); 00160 00161 if ( $code === JSON_ERROR_SYNTAX && ( $options & self::TRY_FIXING ) !== 0 ) { 00162 // The most common error is the trailing comma in a list or an object. 00163 // We cannot simply replace /,\s*[}\]]/ because it could be inside a string value. 00164 // But we could use the fact that JSON does not allow multi-line string values, 00165 // And remove trailing commas if they are et the end of a line. 00166 // JSON only allows 4 control characters: [ \t\r\n]. So we must not use '\s' for matching. 00167 // Regex match ,]<any non-quote chars>\n or ,\n] with optional spaces/tabs. 00168 $count = 0; 00169 $value = 00170 preg_replace( '/,([ \t]*[}\]][^"\r\n]*([\r\n]|$)|[ \t]*[\r\n][ \t\r\n]*[}\]])/', '$1', 00171 $value, - 1, $count ); 00172 if ( $count > 0 ) { 00173 $result = json_decode( $value, $assoc ); 00174 if ( JSON_ERROR_NONE === json_last_error() ) { 00175 // Report warning 00176 $st = Status::newGood( $result ); 00177 $st->warning( wfMessage( 'json-warn-trailing-comma' )->numParams( $count ) ); 00178 return $st; 00179 } 00180 } 00181 } 00182 00183 switch ( $code ) { 00184 case JSON_ERROR_NONE: 00185 return Status::newGood( $result ); 00186 default: 00187 return Status::newFatal( wfMessage( 'json-error-unknown' )->numParams( $code ) ); 00188 case JSON_ERROR_DEPTH: 00189 $msg = 'json-error-depth'; 00190 break; 00191 case JSON_ERROR_STATE_MISMATCH: 00192 $msg = 'json-error-state-mismatch'; 00193 break; 00194 case JSON_ERROR_CTRL_CHAR: 00195 $msg = 'json-error-ctrl-char'; 00196 break; 00197 case JSON_ERROR_SYNTAX: 00198 $msg = 'json-error-syntax'; 00199 break; 00200 case JSON_ERROR_UTF8: 00201 $msg = 'json-error-utf8'; 00202 break; 00203 case JSON_ERROR_RECURSION: 00204 $msg = 'json-error-recursion'; 00205 break; 00206 case JSON_ERROR_INF_OR_NAN: 00207 $msg = 'json-error-inf-or-nan'; 00208 break; 00209 case JSON_ERROR_UNSUPPORTED_TYPE: 00210 $msg = 'json-error-unsupported-type'; 00211 break; 00212 } 00213 return Status::newFatal( $msg ); 00214 } 00215 00224 private static function encode54( $value, $pretty, $escaping ) { 00225 static $bug66021; 00226 if ( $pretty !== false && $bug66021 === null ) { 00227 $bug66021 = json_encode( array(), JSON_PRETTY_PRINT ) !== '[]'; 00228 } 00229 00230 // PHP escapes '/' to prevent breaking out of inline script blocks using '</script>', 00231 // which is hardly useful when '<' and '>' are escaped (and inadequate), and such 00232 // escaping negatively impacts the human readability of URLs and similar strings. 00233 $options = JSON_UNESCAPED_SLASHES; 00234 $options |= $pretty !== false ? JSON_PRETTY_PRINT : 0; 00235 $options |= ( $escaping & self::UTF8_OK ) ? JSON_UNESCAPED_UNICODE : 0; 00236 $options |= ( $escaping & self::XMLMETA_OK ) ? 0 : ( JSON_HEX_TAG | JSON_HEX_AMP ); 00237 $json = json_encode( $value, $options ); 00238 if ( $json === false ) { 00239 return false; 00240 } 00241 00242 if ( $pretty !== false ) { 00243 // Workaround for <https://bugs.php.net/bug.php?id=66021> 00244 if ( $bug66021 ) { 00245 $json = preg_replace( self::WS_CLEANUP_REGEX, '', $json ); 00246 } 00247 if ( $pretty !== ' ' ) { 00248 // Change the four-space indent to a tab indent 00249 $json = str_replace( "\n ", "\n\t", $json ); 00250 while ( strpos( $json, "\t " ) !== false ) { 00251 $json = str_replace( "\t ", "\t\t", $json ); 00252 } 00253 00254 if ( $pretty !== "\t" ) { 00255 // Change the tab indent to the provided indent 00256 $json = str_replace( "\t", $pretty, $json ); 00257 } 00258 } 00259 } 00260 if ( $escaping & self::UTF8_OK ) { 00261 $json = str_replace( self::$badChars, self::$badCharsEscaped, $json ); 00262 } 00263 00264 return $json; 00265 } 00266 00276 private static function encode53( $value, $pretty, $escaping ) { 00277 $options = ( $escaping & self::XMLMETA_OK ) ? 0 : ( JSON_HEX_TAG | JSON_HEX_AMP ); 00278 $json = json_encode( $value, $options ); 00279 if ( $json === false ) { 00280 return false; 00281 } 00282 00283 // Emulate JSON_UNESCAPED_SLASHES. Because the JSON contains no unescaped slashes 00284 // (only escaped slashes), a simple string replacement works fine. 00285 $json = str_replace( '\/', '/', $json ); 00286 00287 if ( $escaping & self::UTF8_OK ) { 00288 // JSON hex escape sequences follow the format \uDDDD, where DDDD is four hex digits 00289 // indicating the equivalent UTF-16 code unit's value. To most efficiently unescape 00290 // them, we exploit the JSON extension's built-in decoder. 00291 // * We escape the input a second time, so any such sequence becomes \\uDDDD. 00292 // * To avoid interpreting escape sequences that were in the original input, 00293 // each double-escaped backslash (\\\\) is replaced with \\\u005c. 00294 // * We strip one of the backslashes from each of the escape sequences to unescape. 00295 // * Then the JSON decoder can perform the actual unescaping. 00296 $json = str_replace( "\\\\\\\\", "\\\\\\u005c", addcslashes( $json, '\"' ) ); 00297 $json = json_decode( preg_replace( "/\\\\\\\\u(?!00[0-7])/", "\\\\u", "\"$json\"" ) ); 00298 $json = str_replace( self::$badChars, self::$badCharsEscaped, $json ); 00299 } 00300 00301 if ( $pretty !== false ) { 00302 return self::prettyPrint( $json, $pretty ); 00303 } 00304 00305 return $json; 00306 } 00307 00316 private static function prettyPrint( $json, $indentString ) { 00317 $buf = ''; 00318 $indent = 0; 00319 $json = strtr( $json, array( '\\\\' => '\\\\', '\"' => "\x01" ) ); 00320 for ( $i = 0, $n = strlen( $json ); $i < $n; $i += $skip ) { 00321 $skip = 1; 00322 switch ( $json[$i] ) { 00323 case ':': 00324 $buf .= ': '; 00325 break; 00326 case '[': 00327 case '{': 00328 ++$indent; 00329 // falls through 00330 case ',': 00331 $buf .= $json[$i] . "\n" . str_repeat( $indentString, $indent ); 00332 break; 00333 case ']': 00334 case '}': 00335 $buf .= "\n" . str_repeat( $indentString, --$indent ) . $json[$i]; 00336 break; 00337 case '"': 00338 $skip = strcspn( $json, '"', $i + 1 ) + 2; 00339 $buf .= substr( $json, $i, $skip ); 00340 break; 00341 default: 00342 $skip = strcspn( $json, ',]}"', $i + 1 ) + 1; 00343 $buf .= substr( $json, $i, $skip ); 00344 } 00345 } 00346 $buf = preg_replace( self::WS_CLEANUP_REGEX, '', $buf ); 00347 00348 return str_replace( "\x01", '\"', $buf ); 00349 } 00350 }