MediaWiki
REL1_19
|
00001 <?php 00023 $optionsWithArgs = array( 'start', 'limit', 'type' ); 00024 require( dirname( __FILE__ ) . '/../commandLine.inc' ); 00025 00026 if ( !isset( $args[0] ) ) { 00027 echo "Usage: php testCompression.php [--type=<type>] [--start=<start-date>] [--limit=<num-revs>] <page-title>\n"; 00028 exit( 1 ); 00029 } 00030 00031 $title = Title::newFromText( $args[0] ); 00032 if ( isset( $options['start'] ) ) { 00033 $start = wfTimestamp( TS_MW, strtotime( $options['start'] ) ); 00034 echo "Starting from " . $wgLang->timeanddate( $start ) . "\n"; 00035 } else { 00036 $start = '19700101000000'; 00037 } 00038 if ( isset( $options['limit'] ) ) { 00039 $limit = $options['limit']; 00040 $untilHappy = false; 00041 } else { 00042 $limit = 1000; 00043 $untilHappy = true; 00044 } 00045 $type = isset( $options['type'] ) ? $options['type'] : 'ConcatenatedGzipHistoryBlob'; 00046 00047 00048 $dbr = wfGetDB( DB_SLAVE ); 00049 $res = $dbr->select( 00050 array( 'page', 'revision', 'text' ), 00051 '*', 00052 array( 00053 'page_namespace' => $title->getNamespace(), 00054 'page_title' => $title->getDBkey(), 00055 'page_id=rev_page', 00056 'rev_timestamp > ' . $dbr->addQuotes( $dbr->timestamp( $start ) ), 00057 'rev_text_id=old_id' 00058 ), __FILE__, array( 'LIMIT' => $limit ) 00059 ); 00060 00061 $blob = new $type; 00062 $hashes = array(); 00063 $keys = array(); 00064 $uncompressedSize = 0; 00065 $t = -microtime( true ); 00066 foreach ( $res as $row ) { 00067 $revision = new Revision( $row ); 00068 $text = $revision->getText(); 00069 $uncompressedSize += strlen( $text ); 00070 $hashes[$row->rev_id] = md5( $text ); 00071 $keys[$row->rev_id] = $blob->addItem( $text ); 00072 if ( $untilHappy && !$blob->isHappy() ) { 00073 break; 00074 } 00075 } 00076 00077 $serialized = serialize( $blob ); 00078 $t += microtime( true ); 00079 # print_r( $blob->mDiffMap ); 00080 00081 printf( "%s\nCompression ratio for %d revisions: %5.2f, %s -> %d\n", 00082 $type, 00083 count( $hashes ), 00084 $uncompressedSize / strlen( $serialized ), 00085 $wgLang->formatSize( $uncompressedSize ), 00086 strlen( $serialized ) 00087 ); 00088 printf( "Compression time: %5.2f ms\n", $t * 1000 ); 00089 00090 $t = -microtime( true ); 00091 $blob = unserialize( $serialized ); 00092 foreach ( $keys as $id => $key ) { 00093 $text = $blob->getItem( $key ); 00094 if ( md5( $text ) != $hashes[$id] ) { 00095 echo "Content hash mismatch for rev_id $id\n"; 00096 # var_dump( $text ); 00097 } 00098 } 00099 $t += microtime( true ); 00100 printf( "Decompression time: %5.2f ms\n", $t * 1000 ); 00101