[ Index ] |
PHP Cross Reference of MediaWiki-1.24.0 |
[Summary view] [Print] [Text view]
1 <?php 2 /** 3 * Benchmark for using a regexp vs. mb_check_encoding to check for UTF-8 encoding. 4 * 5 * This program is free software; you can redistribute it and/or modify 6 * it under the terms of the GNU General Public License as published by 7 * the Free Software Foundation; either version 2 of the License, or 8 * (at your option) any later version. 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License along 16 * with this program; if not, write to the Free Software Foundation, Inc., 17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 18 * http://www.gnu.org/copyleft/gpl.html 19 * 20 * @file 21 * @ingroup Benchmark 22 */ 23 24 require_once __DIR__ . '/Benchmarker.php'; 25 26 /** 27 * This little benchmark executes the regexp used in Language->checkTitleEncoding() 28 * and compares its execution time against that of mb_check_encoding, if available. 29 * 30 * @ingroup Benchmark 31 */ 32 class BenchUtf8TitleCheck extends Benchmarker { 33 private $canRun; 34 35 private $data; 36 37 public function __construct() { 38 parent::__construct(); 39 40 // @codingStandardsIgnoreStart Ignore long line warnings. 41 $this->data = array( 42 "", 43 "United States of America", // 7bit ASCII 44 "S%C3%A9rie%20t%C3%A9l%C3%A9vis%C3%A9e", 45 "Acteur%7CAlbert%20Robbins%7CAnglais%7CAnn%20Donahue%7CAnthony%20E.%20Zuiker%7CCarol%20Mendelsohn", 46 // This comes from bug 36839 47 "Acteur%7CAlbert%20Robbins%7CAnglais%7CAnn%20Donahue%7CAnthony%20E.%20Zuiker%7CCarol%20Mendelsohn%7C" 48 . "Catherine%20Willows%7CDavid%20Hodges%7CDavid%20Phillips%7CGil%20Grissom%7CGreg%20Sanders%7CHodges%7C" 49 . "Internet%20Movie%20Database%7CJim%20Brass%7CLady%20Heather%7C" 50 . "Les%20Experts%20(s%C3%A9rie%20t%C3%A9l%C3%A9vis%C3%A9e)%7CLes%20Experts%20:%20Manhattan%7C" 51 . "Les%20Experts%20:%20Miami%7CListe%20des%20personnages%20des%20Experts%7C" 52 . "Liste%20des%20%C3%A9pisodes%20des%20Experts%7CMod%C3%A8le%20discussion:Palette%20Les%20Experts%7C" 53 . "Nick%20Stokes%7CPersonnage%20de%20fiction%7CPersonnage%20fictif%7CPersonnage%20de%20fiction%7C" 54 . "Personnages%20r%C3%A9currents%20dans%20Les%20Experts%7CRaymond%20Langston%7CRiley%20Adams%7C" 55 . "Saison%201%20des%20Experts%7CSaison%2010%20des%20Experts%7CSaison%2011%20des%20Experts%7C" 56 . "Saison%2012%20des%20Experts%7CSaison%202%20des%20Experts%7CSaison%203%20des%20Experts%7C" 57 . "Saison%204%20des%20Experts%7CSaison%205%20des%20Experts%7CSaison%206%20des%20Experts%7C" 58 . "Saison%207%20des%20Experts%7CSaison%208%20des%20Experts%7CSaison%209%20des%20Experts%7C" 59 . "Sara%20Sidle%7CSofia%20Curtis%7CS%C3%A9rie%20t%C3%A9l%C3%A9vis%C3%A9e%7CWallace%20Langham%7C" 60 . "Warrick%20Brown%7CWendy%20Simms%7C%C3%89tats-Unis" 61 ); 62 // @codingStandardsIgnoreEnd 63 64 $this->canRun = function_exists( 'mb_check_encoding' ); 65 66 if ( $this->canRun ) { 67 $this->mDescription = "Benchmark for using a regexp vs. mb_check_encoding " . 68 "to check for UTF-8 encoding."; 69 mb_internal_encoding( 'UTF-8' ); 70 } else { 71 $this->mDescription = "CANNOT RUN benchmark using mb_check_encoding: function not available."; 72 } 73 } 74 75 public function execute() { 76 if ( !$this->canRun ) { 77 return; 78 } 79 $benchmarks = array(); 80 foreach ( $this->data as $val ) { 81 $benchmarks[] = array( 82 'function' => array( $this, 'use_regexp' ), 83 'args' => array( rawurldecode( $val ) ) 84 ); 85 $benchmarks[] = array( 86 'function' => array( $this, 'use_regexp_non_capturing' ), 87 'args' => array( rawurldecode( $val ) ) 88 ); 89 $benchmarks[] = array( 90 'function' => array( $this, 'use_regexp_once_only' ), 91 'args' => array( rawurldecode( $val ) ) 92 ); 93 $benchmarks[] = array( 94 'function' => array( $this, 'use_mb_check_encoding' ), 95 'args' => array( rawurldecode( $val ) ) 96 ); 97 } 98 $this->bench( $benchmarks ); 99 print $this->getFormattedResults(); 100 } 101 102 private $isutf8; 103 104 function use_regexp( $s ) { 105 $this->isutf8 = preg_match( '/^([\x00-\x7f]|[\xc0-\xdf][\x80-\xbf]|' . 106 '[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xf7][\x80-\xbf]{3})+$/', $s ); 107 } 108 109 function use_regexp_non_capturing( $s ) { 110 // Same as above with a non-capturing subgroup. 111 $this->isutf8 = preg_match( '/^(?:[\x00-\x7f]|[\xc0-\xdf][\x80-\xbf]|' . 112 '[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xf7][\x80-\xbf]{3})+$/', $s ); 113 } 114 115 function use_regexp_once_only( $s ) { 116 // Same as above with a once-only subgroup. 117 $this->isutf8 = preg_match( '/^(?>[\x00-\x7f]|[\xc0-\xdf][\x80-\xbf]|' . 118 '[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xf7][\x80-\xbf]{3})+$/', $s ); 119 } 120 121 function use_mb_check_encoding( $s ) { 122 $this->isutf8 = mb_check_encoding( $s, 'UTF-8' ); 123 } 124 } 125 126 $maintClass = 'BenchUtf8TitleCheck'; 127 require_once RUN_MAINTENANCE_IF_MAIN;
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
Generated: Fri Nov 28 14:03:12 2014 | Cross-referenced by PHPXref 0.7.1 |