MediaWiki  REL1_22
FormatJson.php
Go to the documentation of this file.
00001 <?php
00026 class FormatJson {
00027 
00035     const UTF8_OK = 1;
00036 
00047     const XMLMETA_OK = 2;
00048 
00056     const ALL_OK = 3;
00057 
00067     const WS_CLEANUP_REGEX = '/(?<=[\[{])\n\s*+(?=[\]}])/';
00068 
00075     private static $badChars = array(
00076         "\xe2\x80\xa8", // U+2028 LINE SEPARATOR
00077         "\xe2\x80\xa9", // U+2029 PARAGRAPH SEPARATOR
00078     );
00079 
00083     private static $badCharsEscaped = array(
00084         '\u2028', // U+2028 LINE SEPARATOR
00085         '\u2029', // U+2029 PARAGRAPH SEPARATOR
00086     );
00087 
00103     public static function encode( $value, $pretty = false, $escaping = 0 ) {
00104         if ( defined( 'JSON_UNESCAPED_UNICODE' ) ) {
00105             return self::encode54( $value, $pretty, $escaping );
00106         }
00107         return self::encode53( $value, $pretty, $escaping );
00108     }
00109 
00120     public static function decode( $value, $assoc = false ) {
00121         return json_decode( $value, $assoc );
00122     }
00123 
00132     private static function encode54( $value, $pretty, $escaping ) {
00133         // PHP escapes '/' to prevent breaking out of inline script blocks using '</script>',
00134         // which is hardly useful when '<' and '>' are escaped (and inadequate), and such
00135         // escaping negatively impacts the human readability of URLs and similar strings.
00136         $options = JSON_UNESCAPED_SLASHES;
00137         $options |= $pretty ? JSON_PRETTY_PRINT : 0;
00138         $options |= ( $escaping & self::UTF8_OK ) ? JSON_UNESCAPED_UNICODE : 0;
00139         $options |= ( $escaping & self::XMLMETA_OK ) ? 0 : ( JSON_HEX_TAG | JSON_HEX_AMP );
00140         $json = json_encode( $value, $options );
00141         if ( $json === false ) {
00142             return false;
00143         }
00144 
00145         if ( $pretty ) {
00146             // Remove whitespace inside empty arrays/objects; different JSON encoders
00147             // vary on this, and we want our output to be consistent across implementations.
00148             $json = preg_replace( self::WS_CLEANUP_REGEX, '', $json );
00149         }
00150         if ( $escaping & self::UTF8_OK ) {
00151             $json = str_replace( self::$badChars, self::$badCharsEscaped, $json );
00152         }
00153         return $json;
00154     }
00155 
00165     private static function encode53( $value, $pretty, $escaping ) {
00166         $options = ( $escaping & self::XMLMETA_OK ) ? 0 : ( JSON_HEX_TAG | JSON_HEX_AMP );
00167         $json = json_encode( $value, $options );
00168         if ( $json === false ) {
00169             return false;
00170         }
00171 
00172         // Emulate JSON_UNESCAPED_SLASHES. Because the JSON contains no unescaped slashes
00173         // (only escaped slashes), a simple string replacement works fine.
00174         $json = str_replace( '\/', '/', $json );
00175 
00176         if ( $escaping & self::UTF8_OK ) {
00177             // JSON hex escape sequences follow the format \uDDDD, where DDDD is four hex digits
00178             // indicating the equivalent UTF-16 code unit's value. To most efficiently unescape
00179             // them, we exploit the JSON extension's built-in decoder.
00180             // * We escape the input a second time, so any such sequence becomes \\uDDDD.
00181             // * To avoid interpreting escape sequences that were in the original input,
00182             //   each double-escaped backslash (\\\\) is replaced with \\\u005c.
00183             // * We strip one of the backslashes from each of the escape sequences to unescape.
00184             // * Then the JSON decoder can perform the actual unescaping.
00185             $json = str_replace( "\\\\\\\\", "\\\\\\u005c", addcslashes( $json, '\"' ) );
00186             $json = json_decode( preg_replace( "/\\\\\\\\u(?!00[0-7])/", "\\\\u", "\"$json\"" ) );
00187             $json = str_replace( self::$badChars, self::$badCharsEscaped, $json );
00188         }
00189 
00190         if ( $pretty ) {
00191             return self::prettyPrint( $json );
00192         }
00193         return $json;
00194     }
00195 
00203     private static function prettyPrint( $json ) {
00204         $buf = '';
00205         $indent = 0;
00206         $json = strtr( $json, array( '\\\\' => '\\\\', '\"' => "\x01" ) );
00207         for ( $i = 0, $n = strlen( $json ); $i < $n; $i += $skip ) {
00208             $skip = 1;
00209             switch ( $json[$i] ) {
00210                 case ':':
00211                     $buf .= ': ';
00212                     break;
00213                 case '[':
00214                 case '{':
00215                     ++$indent;
00216                     // falls through
00217                 case ',':
00218                     $buf .= $json[$i] . "\n" . str_repeat( '    ', $indent );
00219                     break;
00220                 case ']':
00221                 case '}':
00222                     $buf .= "\n" . str_repeat( '    ', --$indent ) . $json[$i];
00223                     break;
00224                 case '"':
00225                     $skip = strcspn( $json, '"', $i + 1 ) + 2;
00226                     $buf .= substr( $json, $i, $skip );
00227                     break;
00228                 default:
00229                     $skip = strcspn( $json, ',]}"', $i + 1 ) + 1;
00230                     $buf .= substr( $json, $i, $skip );
00231             }
00232         }
00233         $buf = preg_replace( self::WS_CLEANUP_REGEX, '', $buf );
00234         return str_replace( "\x01", '\"', $buf );
00235     }
00236 }