MediaWiki
REL1_22
|
00001 <?php 00036 function codepointToUtf8( $codepoint ) { 00037 if($codepoint < 0x80) return chr($codepoint); 00038 if($codepoint < 0x800) return chr($codepoint >> 6 & 0x3f | 0xc0) . 00039 chr($codepoint & 0x3f | 0x80); 00040 if($codepoint < 0x10000) return chr($codepoint >> 12 & 0x0f | 0xe0) . 00041 chr($codepoint >> 6 & 0x3f | 0x80) . 00042 chr($codepoint & 0x3f | 0x80); 00043 if($codepoint < 0x110000) return chr($codepoint >> 18 & 0x07 | 0xf0) . 00044 chr($codepoint >> 12 & 0x3f | 0x80) . 00045 chr($codepoint >> 6 & 0x3f | 0x80) . 00046 chr($codepoint & 0x3f | 0x80); 00047 00048 echo "Asked for code outside of range ($codepoint)\n"; 00049 die( -1 ); 00050 } 00051 00061 function hexSequenceToUtf8( $sequence ) { 00062 $utf = ''; 00063 foreach( explode( ' ', $sequence ) as $hex ) { 00064 $n = hexdec( $hex ); 00065 $utf .= codepointToUtf8( $n ); 00066 } 00067 return $utf; 00068 } 00069 00078 function utf8ToHexSequence( $str ) { 00079 $buf = ''; 00080 foreach ( preg_split( '//u', $str, -1, PREG_SPLIT_NO_EMPTY ) as $cp ) { 00081 $buf .= sprintf( '%04x ', utf8ToCodepoint( $cp ) ); 00082 } 00083 return rtrim( $buf ); 00084 } 00085 00094 function utf8ToCodepoint( $char ) { 00095 # Find the length 00096 $z = ord( $char[0] ); 00097 if ( $z & 0x80 ) { 00098 $length = 0; 00099 while ( $z & 0x80 ) { 00100 $length++; 00101 $z <<= 1; 00102 } 00103 } else { 00104 $length = 1; 00105 } 00106 00107 if ( $length != strlen( $char ) ) { 00108 return false; 00109 } 00110 if ( $length == 1 ) { 00111 return ord( $char ); 00112 } 00113 00114 # Mask off the length-determining bits and shift back to the original location 00115 $z &= 0xff; 00116 $z >>= $length; 00117 00118 # Add in the free bits from subsequent bytes 00119 for ( $i=1; $i < $length; $i++ ) { 00120 $z <<= 6; 00121 $z |= ord( $char[$i] ) & 0x3f; 00122 } 00123 00124 return $z; 00125 } 00126 00134 function escapeSingleString( $string ) { 00135 return strtr( $string, 00136 array( 00137 '\\' => '\\\\', 00138 '\'' => '\\\'' 00139 )); 00140 }