[ Index ]

PHP Cross Reference of MediaWiki-1.24.0

title

Body

[close]

/maintenance/storage/ -> testCompression.php (source)

   1  <?php
   2  /**
   3   * Test revision text compression and decompression.
   4   *
   5   * This program is free software; you can redistribute it and/or modify
   6   * it under the terms of the GNU General Public License as published by
   7   * the Free Software Foundation; either version 2 of the License, or
   8   * (at your option) any later version.
   9   *
  10   * This program is distributed in the hope that it will be useful,
  11   * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13   * GNU General Public License for more details.
  14   *
  15   * You should have received a copy of the GNU General Public License along
  16   * with this program; if not, write to the Free Software Foundation, Inc.,
  17   * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  18   * http://www.gnu.org/copyleft/gpl.html
  19   *
  20   * @file
  21   * @ingroup Maintenance ExternalStorage
  22   */
  23  
  24  $optionsWithArgs = array( 'start', 'limit', 'type' );
  25  require  __DIR__ . '/../commandLine.inc';
  26  
  27  if ( !isset( $args[0] ) ) {
  28      echo "Usage: php testCompression.php [--type=<type>] [--start=<start-date>] " .
  29          "[--limit=<num-revs>] <page-title>\n";
  30      exit( 1 );
  31  }
  32  
  33  $title = Title::newFromText( $args[0] );
  34  if ( isset( $options['start'] ) ) {
  35      $start = wfTimestamp( TS_MW, strtotime( $options['start'] ) );
  36      echo "Starting from " . $wgLang->timeanddate( $start ) . "\n";
  37  } else {
  38      $start = '19700101000000';
  39  }
  40  if ( isset( $options['limit'] ) ) {
  41      $limit = $options['limit'];
  42      $untilHappy = false;
  43  } else {
  44      $limit = 1000;
  45      $untilHappy = true;
  46  }
  47  $type = isset( $options['type'] ) ? $options['type'] : 'ConcatenatedGzipHistoryBlob';
  48  
  49  $dbr = wfGetDB( DB_SLAVE );
  50  $res = $dbr->select(
  51      array( 'page', 'revision', 'text' ),
  52      '*',
  53      array(
  54          'page_namespace' => $title->getNamespace(),
  55          'page_title' => $title->getDBkey(),
  56          'page_id=rev_page',
  57          'rev_timestamp > ' . $dbr->addQuotes( $dbr->timestamp( $start ) ),
  58          'rev_text_id=old_id'
  59      ), __FILE__, array( 'LIMIT' => $limit )
  60  );
  61  
  62  $blob = new $type;
  63  $hashes = array();
  64  $keys = array();
  65  $uncompressedSize = 0;
  66  $t = -microtime( true );
  67  foreach ( $res as $row ) {
  68      $revision = new Revision( $row );
  69      $text = $revision->getSerializedData();
  70      $uncompressedSize += strlen( $text );
  71      $hashes[$row->rev_id] = md5( $text );
  72      $keys[$row->rev_id] = $blob->addItem( $text );
  73      if ( $untilHappy && !$blob->isHappy() ) {
  74          break;
  75      }
  76  }
  77  
  78  $serialized = serialize( $blob );
  79  $t += microtime( true );
  80  # print_r( $blob->mDiffMap );
  81  
  82  printf( "%s\nCompression ratio for %d revisions: %5.2f, %s -> %d\n",
  83      $type,
  84      count( $hashes ),
  85      $uncompressedSize / strlen( $serialized ),
  86      $wgLang->formatSize( $uncompressedSize ),
  87      strlen( $serialized )
  88  );
  89  printf( "Compression time: %5.2f ms\n", $t * 1000 );
  90  
  91  $t = -microtime( true );
  92  $blob = unserialize( $serialized );
  93  foreach ( $keys as $id => $key ) {
  94      $text = $blob->getItem( $key );
  95      if ( md5( $text ) != $hashes[$id] ) {
  96          echo "Content hash mismatch for rev_id $id\n";
  97          # var_dump( $text );
  98      }
  99  }
 100  $t += microtime( true );
 101  printf( "Decompression time: %5.2f ms\n", $t * 1000 );


Generated: Fri Nov 28 14:03:12 2014 Cross-referenced by PHPXref 0.7.1