MediaWiki  REL1_19
UtfNormalUtil.php
Go to the documentation of this file.
00001 <?php
00036 function codepointToUtf8( $codepoint ) {
00037         if($codepoint <         0x80) return chr($codepoint);
00038         if($codepoint <    0x800) return chr($codepoint >>      6 & 0x3f | 0xc0) .
00039                                                                          chr($codepoint           & 0x3f | 0x80);
00040         if($codepoint <  0x10000) return chr($codepoint >> 12 & 0x0f | 0xe0) .
00041                                                                          chr($codepoint >>      6 & 0x3f | 0x80) .
00042                                                                          chr($codepoint           & 0x3f | 0x80);
00043         if($codepoint < 0x110000) return chr($codepoint >> 18 & 0x07 | 0xf0) .
00044                                                                          chr($codepoint >> 12 & 0x3f | 0x80) .
00045                                                                          chr($codepoint >>      6 & 0x3f | 0x80) .
00046                                                                          chr($codepoint           & 0x3f | 0x80);
00047 
00048         echo "Asked for code outside of range ($codepoint)\n";
00049         die( -1 );
00050 }
00051 
00061 function hexSequenceToUtf8( $sequence ) {
00062         $utf = '';
00063         foreach( explode( ' ', $sequence ) as $hex ) {
00064                 $n = hexdec( $hex );
00065                 $utf .= codepointToUtf8( $n );
00066         }
00067         return $utf;
00068 }
00069 
00078 function utf8ToHexSequence( $str ) {
00079         return rtrim( preg_replace( '/(.)/uSe',
00080                                     'sprintf("%04x ", utf8ToCodepoint("$1"))',
00081                                     $str ) );
00082 }
00083 
00092 function utf8ToCodepoint( $char ) {
00093         # Find the length
00094         $z = ord( $char[0] );
00095         if ( $z & 0x80 ) {
00096                 $length = 0;
00097                 while ( $z & 0x80 ) {
00098                         $length++;
00099                         $z <<= 1;
00100                 }
00101         } else {
00102                 $length = 1;
00103         }
00104 
00105         if ( $length != strlen( $char ) ) {
00106                 return false;
00107         }
00108         if ( $length == 1 ) {
00109                 return ord( $char );
00110         }
00111 
00112         # Mask off the length-determining bits and shift back to the original location
00113         $z &= 0xff;
00114         $z >>= $length;
00115 
00116         # Add in the free bits from subsequent bytes
00117         for ( $i=1; $i<$length; $i++ ) {
00118                 $z <<= 6;
00119                 $z |= ord( $char[$i] ) & 0x3f;
00120         }
00121 
00122         return $z;
00123 }
00124 
00132 function escapeSingleString( $string ) {
00133         return strtr( $string,
00134                 array(
00135                         '\\' => '\\\\',
00136                         '\'' => '\\\''
00137                 ));
00138 }