[ Index ]

PHP Cross Reference of MediaWiki-1.24.0

title

Body

[close]

/maintenance/benchmarks/ -> bench_utf8_title_check.php (source)

   1  <?php
   2  /**
   3   * Benchmark for using a regexp vs. mb_check_encoding to check for UTF-8 encoding.
   4   *
   5   * This program is free software; you can redistribute it and/or modify
   6   * it under the terms of the GNU General Public License as published by
   7   * the Free Software Foundation; either version 2 of the License, or
   8   * (at your option) any later version.
   9   *
  10   * This program is distributed in the hope that it will be useful,
  11   * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13   * GNU General Public License for more details.
  14   *
  15   * You should have received a copy of the GNU General Public License along
  16   * with this program; if not, write to the Free Software Foundation, Inc.,
  17   * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  18   * http://www.gnu.org/copyleft/gpl.html
  19   *
  20   * @file
  21   * @ingroup Benchmark
  22   */
  23  
  24  require_once  __DIR__ . '/Benchmarker.php';
  25  
  26  /**
  27   * This little benchmark executes the regexp used in Language->checkTitleEncoding()
  28   * and compares its execution time against that of mb_check_encoding, if available.
  29   *
  30   * @ingroup Benchmark
  31   */
  32  class BenchUtf8TitleCheck extends Benchmarker {
  33      private $canRun;
  34  
  35      private $data;
  36  
  37  	public function __construct() {
  38          parent::__construct();
  39  
  40          // @codingStandardsIgnoreStart Ignore long line warnings.
  41          $this->data = array(
  42              "",
  43              "United States of America", // 7bit ASCII
  44              "S%C3%A9rie%20t%C3%A9l%C3%A9vis%C3%A9e",
  45              "Acteur%7CAlbert%20Robbins%7CAnglais%7CAnn%20Donahue%7CAnthony%20E.%20Zuiker%7CCarol%20Mendelsohn",
  46              // This comes from bug 36839
  47              "Acteur%7CAlbert%20Robbins%7CAnglais%7CAnn%20Donahue%7CAnthony%20E.%20Zuiker%7CCarol%20Mendelsohn%7C"
  48              . "Catherine%20Willows%7CDavid%20Hodges%7CDavid%20Phillips%7CGil%20Grissom%7CGreg%20Sanders%7CHodges%7C"
  49              . "Internet%20Movie%20Database%7CJim%20Brass%7CLady%20Heather%7C"
  50              . "Les%20Experts%20(s%C3%A9rie%20t%C3%A9l%C3%A9vis%C3%A9e)%7CLes%20Experts%20:%20Manhattan%7C"
  51              . "Les%20Experts%20:%20Miami%7CListe%20des%20personnages%20des%20Experts%7C"
  52              . "Liste%20des%20%C3%A9pisodes%20des%20Experts%7CMod%C3%A8le%20discussion:Palette%20Les%20Experts%7C"
  53              . "Nick%20Stokes%7CPersonnage%20de%20fiction%7CPersonnage%20fictif%7CPersonnage%20de%20fiction%7C"
  54              . "Personnages%20r%C3%A9currents%20dans%20Les%20Experts%7CRaymond%20Langston%7CRiley%20Adams%7C"
  55              . "Saison%201%20des%20Experts%7CSaison%2010%20des%20Experts%7CSaison%2011%20des%20Experts%7C"
  56              . "Saison%2012%20des%20Experts%7CSaison%202%20des%20Experts%7CSaison%203%20des%20Experts%7C"
  57              . "Saison%204%20des%20Experts%7CSaison%205%20des%20Experts%7CSaison%206%20des%20Experts%7C"
  58              . "Saison%207%20des%20Experts%7CSaison%208%20des%20Experts%7CSaison%209%20des%20Experts%7C"
  59              . "Sara%20Sidle%7CSofia%20Curtis%7CS%C3%A9rie%20t%C3%A9l%C3%A9vis%C3%A9e%7CWallace%20Langham%7C"
  60              . "Warrick%20Brown%7CWendy%20Simms%7C%C3%89tats-Unis"
  61          );
  62          // @codingStandardsIgnoreEnd
  63  
  64          $this->canRun = function_exists( 'mb_check_encoding' );
  65  
  66          if ( $this->canRun ) {
  67              $this->mDescription = "Benchmark for using a regexp vs. mb_check_encoding " .
  68                  "to check for UTF-8 encoding.";
  69              mb_internal_encoding( 'UTF-8' );
  70          } else {
  71              $this->mDescription = "CANNOT RUN benchmark using mb_check_encoding: function not available.";
  72          }
  73      }
  74  
  75  	public function execute() {
  76          if ( !$this->canRun ) {
  77              return;
  78          }
  79          $benchmarks = array();
  80          foreach ( $this->data as $val ) {
  81              $benchmarks[] = array(
  82                  'function' => array( $this, 'use_regexp' ),
  83                  'args' => array( rawurldecode( $val ) )
  84              );
  85              $benchmarks[] = array(
  86                  'function' => array( $this, 'use_regexp_non_capturing' ),
  87                  'args' => array( rawurldecode( $val ) )
  88              );
  89              $benchmarks[] = array(
  90                  'function' => array( $this, 'use_regexp_once_only' ),
  91                  'args' => array( rawurldecode( $val ) )
  92              );
  93              $benchmarks[] = array(
  94                  'function' => array( $this, 'use_mb_check_encoding' ),
  95                  'args' => array( rawurldecode( $val ) )
  96              );
  97          }
  98          $this->bench( $benchmarks );
  99          print $this->getFormattedResults();
 100      }
 101  
 102      private $isutf8;
 103  
 104  	function use_regexp( $s ) {
 105          $this->isutf8 = preg_match( '/^([\x00-\x7f]|[\xc0-\xdf][\x80-\xbf]|' .
 106              '[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xf7][\x80-\xbf]{3})+$/', $s );
 107      }
 108  
 109  	function use_regexp_non_capturing( $s ) {
 110          // Same as above with a non-capturing subgroup.
 111          $this->isutf8 = preg_match( '/^(?:[\x00-\x7f]|[\xc0-\xdf][\x80-\xbf]|' .
 112              '[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xf7][\x80-\xbf]{3})+$/', $s );
 113      }
 114  
 115  	function use_regexp_once_only( $s ) {
 116          // Same as above with a once-only subgroup.
 117          $this->isutf8 = preg_match( '/^(?>[\x00-\x7f]|[\xc0-\xdf][\x80-\xbf]|' .
 118              '[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xf7][\x80-\xbf]{3})+$/', $s );
 119      }
 120  
 121  	function use_mb_check_encoding( $s ) {
 122          $this->isutf8 = mb_check_encoding( $s, 'UTF-8' );
 123      }
 124  }
 125  
 126  $maintClass = 'BenchUtf8TitleCheck';
 127  require_once RUN_MAINTENANCE_IF_MAIN;


Generated: Fri Nov 28 14:03:12 2014 Cross-referenced by PHPXref 0.7.1