MediaWiki  REL1_21
StringUtilsTest.php
Go to the documentation of this file.
00001 <?php
00002 
00003 class StringUtilsTest extends MediaWikiTestCase {
00004 
00012         function testIsUtf8WithMbstring( $expected, $string ) {
00013                 if ( !function_exists( 'mb_check_encoding' ) ) {
00014                         $this->markTestSkipped( 'Test requires the mbstring PHP extension' );
00015                 }
00016                 $this->assertEquals( $expected,
00017                         StringUtils::isUtf8( $string ),
00018                         'Testing string "' . $this->escaped( $string ) . '" with mb_check_encoding'
00019                 );
00020         }
00021 
00030         function testIsUtf8WithPhpFallbackImplementation( $expected, $string ) {
00031                 $this->assertEquals( $expected,
00032                         StringUtils::isUtf8( $string,  true ),
00033                         'Testing string "' . $this->escaped( $string ) . '" with pure PHP implementation'
00034                 );
00035         }
00036 
00040         function escaped( $string ) {
00041                 $escaped = '';
00042                 $length = strlen( $string );
00043                 for ( $i = 0; $i < $length; $i++ ) {
00044                         $char = $string[$i];
00045                         $val = ord( $char );
00046                         if ( $val > 127 ) {
00047                                 $escaped .= '\x' . dechex( $val );
00048                         } else {
00049                                 $escaped .= $char;
00050                         }
00051                 }
00052                 return $escaped;
00053         }
00054 
00060         function provideStringsForIsUtf8Check() {
00061                 // Expected return values for StringUtils::isUtf8()
00062                 $PASS = true;
00063                 $FAIL = false;
00064 
00065                 return array(
00066                         array( $PASS, 'Some ASCII' ),
00067                         array( $PASS, "Euro sign €" ),
00068 
00069                         # First possible sequences
00070                         array( $PASS, "\x00" ),
00071                         array( $PASS, "\xc2\x80" ),
00072                         array( $PASS, "\xe0\xa0\x80" ),
00073                         array( $PASS, "\xf0\x90\x80\x80" ),
00074                         array( $PASS, "\xf8\x88\x80\x80\x80" ),
00075                         array( $PASS, "\xfc\x84\x80\x80\x80\x80" ),
00076 
00077                         # Last possible sequence
00078                         array( $PASS, "\x7f" ),
00079                         array( $PASS, "\xdf\xbf" ),
00080                         array( $PASS, "\xef\xbf\xbf" ),
00081                         array( $PASS, "\xf7\xbf\xbf\xbf" ),
00082                         array( $PASS, "\xfb\xbf\xbf\xbf\xbf" ),
00083                         array( $FAIL, "\xfd\xbf\xbf\xbf\xbf\xbf" ),
00084 
00085                         # boundaries:
00086                         array( $PASS, "\xed\x9f\xbf" ),
00087                         array( $PASS, "\xee\x80\x80" ),
00088                         array( $PASS, "\xef\xbf\xbd" ),
00089                         array( $PASS, "\xf4\x8f\xbf\xbf" ),
00090                         array( $PASS, "\xf4\x90\x80\x80" ),
00091 
00092                         # Malformed
00093                         array( $FAIL, "\x80" ),
00094                         array( $FAIL, "\xBF" ),
00095                         array( $FAIL, "\x80\xbf" ),
00096                         array( $FAIL, "\x80\xbf\x80" ),
00097                         array( $FAIL, "\x80\xbf\x80\xbf" ),
00098                         array( $FAIL, "\x80\xbf\x80\xbf\x80" ),
00099                         array( $FAIL, "\x80\xbf\x80\xbf\x80\xbf" ),
00100                         array( $FAIL, "\x80\xbf\x80\xbf\x80\xbf\x80" ),
00101 
00102                         # last byte missing
00103                         array( $FAIL, "\xc0" ),
00104                         array( $FAIL, "\xe0\x80" ),
00105                         array( $FAIL, "\xf0\x80\x80" ),
00106                         array( $FAIL, "\xf8\x80\x80\x80" ),
00107                         array( $FAIL, "\xfc\x80\x80\x80\x80" ),
00108                         array( $FAIL, "\xdf" ),
00109                         array( $FAIL, "\xef\xbf" ),
00110                         array( $FAIL, "\xf7\xbf\xbf" ),
00111                         array( $FAIL, "\xfb\xbf\xbf\xbf" ),
00112                         array( $FAIL, "\xfd\xbf\xbf\xbf\xbf" ),
00113 
00114                         # impossible bytes
00115                         array( $FAIL, "\xfe" ),
00116                         array( $FAIL, "\xff" ),
00117                         array( $FAIL, "\xfe\xfe\xff\xff" ),
00118 
00138                         # non characters
00139                         array( $PASS, "\xef\xbf\xbe" ),
00140                         array( $PASS, "\xef\xbf\xbf" ),
00141                 );
00142         }
00143 }