MediaWiki  REL1_19
UtfNormalBench.php
Go to the documentation of this file.
00001 <?php
00027 if( isset( $_SERVER['argv'] ) && in_array( '--icu', $_SERVER['argv'] ) ) {
00028         dl( 'php_utfnormal.so' );
00029 }
00030 
00031 require_once 'UtfNormalDefines.php';
00032 require_once 'UtfNormalUtil.php';
00033 require_once 'UtfNormal.php';
00034 
00035 define( 'BENCH_CYCLES', 5 );
00036 
00037 if( php_sapi_name() != 'cli' ) {
00038         die( "Run me from the command line please.\n" );
00039 }
00040 
00041 $testfiles = array(
00042         'testdata/washington.txt' => 'English text',
00043         'testdata/berlin.txt' => 'German text',
00044         'testdata/bulgakov.txt' => 'Russian text',
00045         'testdata/tokyo.txt' => 'Japanese text',
00046         'testdata/young.txt' => 'Korean text'
00047 );
00048 $normalizer = new UtfNormal;
00049 UtfNormal::loadData();
00050 foreach( $testfiles as $file => $desc ) {
00051         benchmarkTest( $normalizer, $file, $desc );
00052 }
00053 
00054 # -------
00055 
00056 function benchmarkTest( &$u, $filename, $desc ) {
00057         print "Testing $filename ($desc)...\n";
00058         $data = file_get_contents( $filename );
00059         $forms = array(
00060 #               'placebo',
00061                 'cleanUp',
00062                 'toNFC',
00063 #               'toNFKC',
00064 #               'toNFD', 'toNFKD',
00065                 'NFC',
00066 #               'NFKC',
00067 #               'NFD', 'NFKD',
00068                 array( 'fastDecompose', 'fastCombiningSort', 'fastCompose' ),
00069 #               'quickIsNFC', 'quickIsNFCVerify',
00070                 );
00071         foreach( $forms as $form ) {
00072                 if( is_array( $form ) ) {
00073                         $str = $data;
00074                         foreach( $form as $step ) {
00075                                 $str = benchmarkForm( $u, $str, $step );
00076                         }
00077                 } else {
00078                         benchmarkForm( $u, $data, $form );
00079                 }
00080         }
00081 }
00082 
00083 function benchTime(){
00084         $st = explode( ' ', microtime() );
00085         return (float)$st[0] + (float)$st[1];
00086 }
00087 
00088 function benchmarkForm( &$u, &$data, $form ) {
00089         #$start = benchTime();
00090         for( $i = 0; $i < BENCH_CYCLES; $i++ ) {
00091                 $start = benchTime();
00092                 $out = $u->$form( $data, UtfNormal::$utfCanonicalDecomp );
00093                 $deltas[] = (benchTime() - $start);
00094         }
00095         #$delta = (benchTime() - $start) / BENCH_CYCLES;
00096         sort( $deltas );
00097         $delta = $deltas[0]; # Take shortest time
00098 
00099         $rate = intval( strlen( $data ) / $delta );
00100         $same = (0 == strcmp( $data, $out ) );
00101 
00102         printf( " %20s %6.1fms %12s bytes/s (%s)\n",
00103                 $form,
00104                 $delta*1000.0,
00105                 number_format( $rate ),
00106                 ($same ? 'no change' : 'changed' ) );
00107         return $out;
00108 }