MediaWiki  REL1_22
UtfNormalUtil.php
Go to the documentation of this file.
00001 <?php
00036 function codepointToUtf8( $codepoint ) {
00037     if($codepoint <     0x80) return chr($codepoint);
00038     if($codepoint <    0x800) return chr($codepoint >>  6 & 0x3f | 0xc0) .
00039                                      chr($codepoint       & 0x3f | 0x80);
00040     if($codepoint <  0x10000) return chr($codepoint >> 12 & 0x0f | 0xe0) .
00041                                      chr($codepoint >>  6 & 0x3f | 0x80) .
00042                                      chr($codepoint       & 0x3f | 0x80);
00043     if($codepoint < 0x110000) return chr($codepoint >> 18 & 0x07 | 0xf0) .
00044                                      chr($codepoint >> 12 & 0x3f | 0x80) .
00045                                      chr($codepoint >>  6 & 0x3f | 0x80) .
00046                                      chr($codepoint       & 0x3f | 0x80);
00047 
00048     echo "Asked for code outside of range ($codepoint)\n";
00049     die( -1 );
00050 }
00051 
00061 function hexSequenceToUtf8( $sequence ) {
00062     $utf = '';
00063     foreach( explode( ' ', $sequence ) as $hex ) {
00064         $n = hexdec( $hex );
00065         $utf .= codepointToUtf8( $n );
00066     }
00067     return $utf;
00068 }
00069 
00078 function utf8ToHexSequence( $str ) {
00079     $buf = '';
00080     foreach ( preg_split( '//u', $str, -1, PREG_SPLIT_NO_EMPTY ) as $cp ) {
00081         $buf .= sprintf( '%04x ', utf8ToCodepoint( $cp ) );
00082     }
00083     return rtrim( $buf );
00084 }
00085 
00094 function utf8ToCodepoint( $char ) {
00095     # Find the length
00096     $z = ord( $char[0] );
00097     if ( $z & 0x80 ) {
00098         $length = 0;
00099         while ( $z & 0x80 ) {
00100             $length++;
00101             $z <<= 1;
00102         }
00103     } else {
00104         $length = 1;
00105     }
00106 
00107     if ( $length != strlen( $char ) ) {
00108         return false;
00109     }
00110     if ( $length == 1 ) {
00111         return ord( $char );
00112     }
00113 
00114     # Mask off the length-determining bits and shift back to the original location
00115     $z &= 0xff;
00116     $z >>= $length;
00117 
00118     # Add in the free bits from subsequent bytes
00119     for ( $i=1; $i < $length; $i++ ) {
00120         $z <<= 6;
00121         $z |= ord( $char[$i] ) & 0x3f;
00122     }
00123 
00124     return $z;
00125 }
00126 
00134 function escapeSingleString( $string ) {
00135     return strtr( $string,
00136         array(
00137             '\\' => '\\\\',
00138             '\'' => '\\\''
00139         ));
00140 }