MediaWiki  REL1_19
Utf8Test.php
Go to the documentation of this file.
00001 <?php
00030 require_once 'UtfNormalDefines.php';
00031 require_once 'UtfNormalUtil.php';
00032 require_once 'UtfNormal.php';
00033 mb_internal_encoding( "utf-8" );
00034 
00035 $verbose = false;
00036 #$verbose = true;
00037 if( php_sapi_name() != 'cli' ) {
00038         die( "Run me from the command line please.\n" );
00039 }
00040 
00041 $in = fopen( "UTF-8-test.txt", "rt" );
00042 if( !$in ) {
00043         print "Couldn't open UTF-8-test.txt -- can't run tests.\n";
00044         print "If necessary, manually download this file. It can be obtained at\n";
00045         print "http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt\n";
00046         exit(-1);
00047 }
00048 
00049 $columns = 0;
00050 while( false !== ( $line = fgets( $in ) ) ) {
00051         $matches = array();
00052         if( preg_match( '/^(Here come the tests:\s*)\|$/', $line, $matches ) ) {
00053                 $columns = strpos( $line, '|' );
00054                 break;
00055         }
00056 }
00057 
00058 if( !$columns ) {
00059         print "Something seems to be wrong; couldn't extract line length.\n";
00060         print "Check that UTF-8-test.txt was downloaded correctly from\n";
00061         print "http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt\n";
00062         exit(-1);
00063 }
00064 
00065 # print "$columns\n";
00066 
00067 $ignore = array(
00068         # These two lines actually seem to be corrupt
00069         '2.1.1', '2.2.1' );
00070 
00071 $exceptions = array(
00072         # Tests that should mark invalid characters due to using long
00073         # sequences beyond what is now considered legal.
00074         '2.1.5', '2.1.6', '2.2.4', '2.2.5', '2.2.6', '2.3.5',
00075 
00076         # Literal 0xffff, which is illegal
00077         '2.2.3' );
00078 
00079 $longTests = array(
00080         # These tests span multiple lines
00081         '3.1.9', '3.2.1', '3.2.2', '3.2.3', '3.2.4', '3.2.5',
00082         '3.4' );
00083 
00084 # These tests are not in proper subsections
00085 $sectionTests = array( '3.4' );
00086 
00087 $section = null;
00088 $test = '';
00089 $failed = 0;
00090 $success = 0;
00091 $total = 0;
00092 while( false !== ( $line = fgets( $in ) ) ) {
00093         $matches = array();
00094         if( preg_match( '/^(\d+)\s+(.*?)\s*\|/', $line, $matches ) ) {
00095                 $section = $matches[1];
00096                 print $line;
00097                 continue;
00098         }
00099         if( preg_match( '/^(\d+\.\d+\.\d+)\s*/', $line, $matches ) ) {
00100                 $test = $matches[1];
00101 
00102                 if( in_array( $test, $ignore ) ) {
00103                         continue;
00104                 }
00105                 if( in_array( $test, $longTests ) ) {
00106                         $line = fgets( $in );
00107                         for( $line = fgets( $in ); !preg_match( '/^\s+\|/', $line ); $line = fgets( $in ) ) {
00108                                 testLine( $test, $line, $total, $success, $failed, $columns, $exceptions, $verbose );
00109                         }
00110                 } else {
00111                         testLine( $test, $line, $total, $success, $failed, $columns, $exceptions, $verbose );
00112                 }
00113         }
00114 }
00115 
00116 if( $failed ) {
00117         echo "\nFailed $failed tests.\n";
00118         echo "UTF-8 DECODER TEST FAILED\n";
00119         exit (-1);
00120 }
00121 
00122 echo "UTF-8 DECODER TEST SUCCESS!\n";
00123 exit (0);
00124 
00125 
00126 function testLine( $test, $line, &$total, &$success, &$failed, $columns, $exceptions, $verbose ) {
00127         $stripped = $line;
00128         UtfNormal::quickisNFCVerify( $stripped );
00129 
00130         $same = ( $line == $stripped );
00131         $len = mb_strlen( substr( $stripped, 0, strpos( $stripped, '|' ) ) );
00132         if( $len == 0 ) {
00133                 $len = strlen( substr( $stripped, 0, strpos( $stripped, '|' ) ) );
00134         }
00135 
00136         $ok = $same ^ ($test >= 3 );
00137 
00138         $ok ^= in_array( $test, $exceptions );
00139 
00140         $ok &= ($columns == $len);
00141 
00142         $total++;
00143         if( $ok ) {
00144                 $success++;
00145         } else {
00146                 $failed++;
00147         }
00148 
00149         if( $verbose || !$ok ) {
00150                 print str_replace( "\n", "$len\n", $stripped );
00151         }
00152 }