MediaWiki  REL1_24
FormatJson.php
Go to the documentation of this file.
00001 <?php
00026 class FormatJson {
00034     const UTF8_OK = 1;
00035 
00046     const XMLMETA_OK = 2;
00047 
00055     const ALL_OK = 3;
00056 
00064     const FORCE_ASSOC = 0x100;
00065 
00071     const TRY_FIXING = 0x200;
00072 
00082     const WS_CLEANUP_REGEX = '/(?<=[\[{])\n\s*+(?=[\]}])/';
00083 
00090     private static $badChars = array(
00091         "\xe2\x80\xa8", // U+2028 LINE SEPARATOR
00092         "\xe2\x80\xa9", // U+2029 PARAGRAPH SEPARATOR
00093     );
00094 
00098     private static $badCharsEscaped = array(
00099         '\u2028', // U+2028 LINE SEPARATOR
00100         '\u2029', // U+2029 PARAGRAPH SEPARATOR
00101     );
00102 
00120     public static function encode( $value, $pretty = false, $escaping = 0 ) {
00121         if ( !is_string( $pretty ) ) {
00122             $pretty = $pretty ? '    ' : false;
00123         }
00124 
00125         if ( defined( 'JSON_UNESCAPED_UNICODE' ) ) {
00126             return self::encode54( $value, $pretty, $escaping );
00127         }
00128 
00129         return self::encode53( $value, $pretty, $escaping );
00130     }
00131 
00144     public static function decode( $value, $assoc = false ) {
00145         return json_decode( $value, $assoc );
00146     }
00147 
00156     public static function parse( $value, $options = 0 ) {
00157         $assoc = ( $options & self::FORCE_ASSOC ) !== 0;
00158         $result = json_decode( $value, $assoc );
00159         $code = json_last_error();
00160 
00161         if ( $code === JSON_ERROR_SYNTAX && ( $options & self::TRY_FIXING ) !== 0 ) {
00162             // The most common error is the trailing comma in a list or an object.
00163             // We cannot simply replace /,\s*[}\]]/ because it could be inside a string value.
00164             // But we could use the fact that JSON does not allow multi-line string values,
00165             // And remove trailing commas if they are et the end of a line.
00166             // JSON only allows 4 control characters: [ \t\r\n].  So we must not use '\s' for matching.
00167             // Regex match   ,]<any non-quote chars>\n   or   ,\n]   with optional spaces/tabs.
00168             $count = 0;
00169             $value =
00170                 preg_replace( '/,([ \t]*[}\]][^"\r\n]*([\r\n]|$)|[ \t]*[\r\n][ \t\r\n]*[}\]])/', '$1',
00171                     $value, - 1, $count );
00172             if ( $count > 0 ) {
00173                 $result = json_decode( $value, $assoc );
00174                 if ( JSON_ERROR_NONE === json_last_error() ) {
00175                     // Report warning
00176                     $st = Status::newGood( $result );
00177                     $st->warning( wfMessage( 'json-warn-trailing-comma' )->numParams( $count ) );
00178                     return $st;
00179                 }
00180             }
00181         }
00182 
00183         switch ( $code ) {
00184             case JSON_ERROR_NONE:
00185                 return Status::newGood( $result );
00186             default:
00187                 return Status::newFatal( wfMessage( 'json-error-unknown' )->numParams( $code ) );
00188             case JSON_ERROR_DEPTH:
00189                 $msg = 'json-error-depth';
00190                 break;
00191             case JSON_ERROR_STATE_MISMATCH:
00192                 $msg = 'json-error-state-mismatch';
00193                 break;
00194             case JSON_ERROR_CTRL_CHAR:
00195                 $msg = 'json-error-ctrl-char';
00196                 break;
00197             case JSON_ERROR_SYNTAX:
00198                 $msg = 'json-error-syntax';
00199                 break;
00200             case JSON_ERROR_UTF8:
00201                 $msg = 'json-error-utf8';
00202                 break;
00203             case JSON_ERROR_RECURSION:
00204                 $msg = 'json-error-recursion';
00205                 break;
00206             case JSON_ERROR_INF_OR_NAN:
00207                 $msg = 'json-error-inf-or-nan';
00208                 break;
00209             case JSON_ERROR_UNSUPPORTED_TYPE:
00210                 $msg = 'json-error-unsupported-type';
00211                 break;
00212         }
00213         return Status::newFatal( $msg );
00214     }
00215 
00224     private static function encode54( $value, $pretty, $escaping ) {
00225         static $bug66021;
00226         if ( $pretty !== false && $bug66021 === null ) {
00227             $bug66021 = json_encode( array(), JSON_PRETTY_PRINT ) !== '[]';
00228         }
00229 
00230         // PHP escapes '/' to prevent breaking out of inline script blocks using '</script>',
00231         // which is hardly useful when '<' and '>' are escaped (and inadequate), and such
00232         // escaping negatively impacts the human readability of URLs and similar strings.
00233         $options = JSON_UNESCAPED_SLASHES;
00234         $options |= $pretty !== false ? JSON_PRETTY_PRINT : 0;
00235         $options |= ( $escaping & self::UTF8_OK ) ? JSON_UNESCAPED_UNICODE : 0;
00236         $options |= ( $escaping & self::XMLMETA_OK ) ? 0 : ( JSON_HEX_TAG | JSON_HEX_AMP );
00237         $json = json_encode( $value, $options );
00238         if ( $json === false ) {
00239             return false;
00240         }
00241 
00242         if ( $pretty !== false ) {
00243             // Workaround for <https://bugs.php.net/bug.php?id=66021>
00244             if ( $bug66021 ) {
00245                 $json = preg_replace( self::WS_CLEANUP_REGEX, '', $json );
00246             }
00247             if ( $pretty !== '    ' ) {
00248                 // Change the four-space indent to a tab indent
00249                 $json = str_replace( "\n    ", "\n\t", $json );
00250                 while ( strpos( $json, "\t    " ) !== false ) {
00251                     $json = str_replace( "\t    ", "\t\t", $json );
00252                 }
00253 
00254                 if ( $pretty !== "\t" ) {
00255                     // Change the tab indent to the provided indent
00256                     $json = str_replace( "\t", $pretty, $json );
00257                 }
00258             }
00259         }
00260         if ( $escaping & self::UTF8_OK ) {
00261             $json = str_replace( self::$badChars, self::$badCharsEscaped, $json );
00262         }
00263 
00264         return $json;
00265     }
00266 
00276     private static function encode53( $value, $pretty, $escaping ) {
00277         $options = ( $escaping & self::XMLMETA_OK ) ? 0 : ( JSON_HEX_TAG | JSON_HEX_AMP );
00278         $json = json_encode( $value, $options );
00279         if ( $json === false ) {
00280             return false;
00281         }
00282 
00283         // Emulate JSON_UNESCAPED_SLASHES. Because the JSON contains no unescaped slashes
00284         // (only escaped slashes), a simple string replacement works fine.
00285         $json = str_replace( '\/', '/', $json );
00286 
00287         if ( $escaping & self::UTF8_OK ) {
00288             // JSON hex escape sequences follow the format \uDDDD, where DDDD is four hex digits
00289             // indicating the equivalent UTF-16 code unit's value. To most efficiently unescape
00290             // them, we exploit the JSON extension's built-in decoder.
00291             // * We escape the input a second time, so any such sequence becomes \\uDDDD.
00292             // * To avoid interpreting escape sequences that were in the original input,
00293             //   each double-escaped backslash (\\\\) is replaced with \\\u005c.
00294             // * We strip one of the backslashes from each of the escape sequences to unescape.
00295             // * Then the JSON decoder can perform the actual unescaping.
00296             $json = str_replace( "\\\\\\\\", "\\\\\\u005c", addcslashes( $json, '\"' ) );
00297             $json = json_decode( preg_replace( "/\\\\\\\\u(?!00[0-7])/", "\\\\u", "\"$json\"" ) );
00298             $json = str_replace( self::$badChars, self::$badCharsEscaped, $json );
00299         }
00300 
00301         if ( $pretty !== false ) {
00302             return self::prettyPrint( $json, $pretty );
00303         }
00304 
00305         return $json;
00306     }
00307 
00316     private static function prettyPrint( $json, $indentString ) {
00317         $buf = '';
00318         $indent = 0;
00319         $json = strtr( $json, array( '\\\\' => '\\\\', '\"' => "\x01" ) );
00320         for ( $i = 0, $n = strlen( $json ); $i < $n; $i += $skip ) {
00321             $skip = 1;
00322             switch ( $json[$i] ) {
00323                 case ':':
00324                     $buf .= ': ';
00325                     break;
00326                 case '[':
00327                 case '{':
00328                     ++$indent;
00329                     // falls through
00330                 case ',':
00331                     $buf .= $json[$i] . "\n" . str_repeat( $indentString, $indent );
00332                     break;
00333                 case ']':
00334                 case '}':
00335                     $buf .= "\n" . str_repeat( $indentString, --$indent ) . $json[$i];
00336                     break;
00337                 case '"':
00338                     $skip = strcspn( $json, '"', $i + 1 ) + 2;
00339                     $buf .= substr( $json, $i, $skip );
00340                     break;
00341                 default:
00342                     $skip = strcspn( $json, ',]}"', $i + 1 ) + 1;
00343                     $buf .= substr( $json, $i, $skip );
00344             }
00345         }
00346         $buf = preg_replace( self::WS_CLEANUP_REGEX, '', $buf );
00347 
00348         return str_replace( "\x01", '\"', $buf );
00349     }
00350 }