MediaWiki
REL1_19
|
00001 <?php 00036 function codepointToUtf8( $codepoint ) { 00037 if($codepoint < 0x80) return chr($codepoint); 00038 if($codepoint < 0x800) return chr($codepoint >> 6 & 0x3f | 0xc0) . 00039 chr($codepoint & 0x3f | 0x80); 00040 if($codepoint < 0x10000) return chr($codepoint >> 12 & 0x0f | 0xe0) . 00041 chr($codepoint >> 6 & 0x3f | 0x80) . 00042 chr($codepoint & 0x3f | 0x80); 00043 if($codepoint < 0x110000) return chr($codepoint >> 18 & 0x07 | 0xf0) . 00044 chr($codepoint >> 12 & 0x3f | 0x80) . 00045 chr($codepoint >> 6 & 0x3f | 0x80) . 00046 chr($codepoint & 0x3f | 0x80); 00047 00048 echo "Asked for code outside of range ($codepoint)\n"; 00049 die( -1 ); 00050 } 00051 00061 function hexSequenceToUtf8( $sequence ) { 00062 $utf = ''; 00063 foreach( explode( ' ', $sequence ) as $hex ) { 00064 $n = hexdec( $hex ); 00065 $utf .= codepointToUtf8( $n ); 00066 } 00067 return $utf; 00068 } 00069 00078 function utf8ToHexSequence( $str ) { 00079 return rtrim( preg_replace( '/(.)/uSe', 00080 'sprintf("%04x ", utf8ToCodepoint("$1"))', 00081 $str ) ); 00082 } 00083 00092 function utf8ToCodepoint( $char ) { 00093 # Find the length 00094 $z = ord( $char[0] ); 00095 if ( $z & 0x80 ) { 00096 $length = 0; 00097 while ( $z & 0x80 ) { 00098 $length++; 00099 $z <<= 1; 00100 } 00101 } else { 00102 $length = 1; 00103 } 00104 00105 if ( $length != strlen( $char ) ) { 00106 return false; 00107 } 00108 if ( $length == 1 ) { 00109 return ord( $char ); 00110 } 00111 00112 # Mask off the length-determining bits and shift back to the original location 00113 $z &= 0xff; 00114 $z >>= $length; 00115 00116 # Add in the free bits from subsequent bytes 00117 for ( $i=1; $i<$length; $i++ ) { 00118 $z <<= 6; 00119 $z |= ord( $char[$i] ) & 0x3f; 00120 } 00121 00122 return $z; 00123 } 00124 00132 function escapeSingleString( $string ) { 00133 return strtr( $string, 00134 array( 00135 '\\' => '\\\\', 00136 '\'' => '\\\'' 00137 )); 00138 }