[ Index ] |
PHP Cross Reference of MediaWiki-1.24.0 |
[Summary view] [Print] [Text view]
1 <?php 2 /** 3 * MySQL search engine 4 * 5 * Copyright (C) 2004 Brion Vibber <[email protected]> 6 * https://www.mediawiki.org/ 7 * 8 * This program is free software; you can redistribute it and/or modify 9 * it under the terms of the GNU General Public License as published by 10 * the Free Software Foundation; either version 2 of the License, or 11 * (at your option) any later version. 12 * 13 * This program is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 * GNU General Public License for more details. 17 * 18 * You should have received a copy of the GNU General Public License along 19 * with this program; if not, write to the Free Software Foundation, Inc., 20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 21 * http://www.gnu.org/copyleft/gpl.html 22 * 23 * @file 24 * @ingroup Search 25 */ 26 27 /** 28 * Search engine hook for MySQL 4+ 29 * @ingroup Search 30 */ 31 class SearchMySQL extends SearchDatabase { 32 protected $strictMatching = true; 33 34 private static $mMinSearchLength; 35 36 /** 37 * Parse the user's query and transform it into an SQL fragment which will 38 * become part of a WHERE clause 39 * 40 * @param string $filteredText 41 * @param string $fulltext 42 * 43 * @return string 44 */ 45 function parseQuery( $filteredText, $fulltext ) { 46 global $wgContLang; 47 48 $lc = $this->legalSearchChars(); // Minus format chars 49 $searchon = ''; 50 $this->searchTerms = array(); 51 52 # @todo FIXME: This doesn't handle parenthetical expressions. 53 $m = array(); 54 if ( preg_match_all( '/([-+<>~]?)(([' . $lc . ']+)(\*?)|"[^"]*")/', 55 $filteredText, $m, PREG_SET_ORDER ) ) { 56 foreach ( $m as $bits ) { 57 wfSuppressWarnings(); 58 list( /* all */, $modifier, $term, $nonQuoted, $wildcard ) = $bits; 59 wfRestoreWarnings(); 60 61 if ( $nonQuoted != '' ) { 62 $term = $nonQuoted; 63 $quote = ''; 64 } else { 65 $term = str_replace( '"', '', $term ); 66 $quote = '"'; 67 } 68 69 if ( $searchon !== '' ) { 70 $searchon .= ' '; 71 } 72 if ( $this->strictMatching && ( $modifier == '' ) ) { 73 // If we leave this out, boolean op defaults to OR which is rarely helpful. 74 $modifier = '+'; 75 } 76 77 // Some languages such as Serbian store the input form in the search index, 78 // so we may need to search for matches in multiple writing system variants. 79 $convertedVariants = $wgContLang->autoConvertToAllVariants( $term ); 80 if ( is_array( $convertedVariants ) ) { 81 $variants = array_unique( array_values( $convertedVariants ) ); 82 } else { 83 $variants = array( $term ); 84 } 85 86 // The low-level search index does some processing on input to work 87 // around problems with minimum lengths and encoding in MySQL's 88 // fulltext engine. 89 // For Chinese this also inserts spaces between adjacent Han characters. 90 $strippedVariants = array_map( 91 array( $wgContLang, 'normalizeForSearch' ), 92 $variants ); 93 94 // Some languages such as Chinese force all variants to a canonical 95 // form when stripping to the low-level search index, so to be sure 96 // let's check our variants list for unique items after stripping. 97 $strippedVariants = array_unique( $strippedVariants ); 98 99 $searchon .= $modifier; 100 if ( count( $strippedVariants ) > 1 ) { 101 $searchon .= '('; 102 } 103 foreach ( $strippedVariants as $stripped ) { 104 $stripped = $this->normalizeText( $stripped ); 105 if ( $nonQuoted && strpos( $stripped, ' ' ) !== false ) { 106 // Hack for Chinese: we need to toss in quotes for 107 // multiple-character phrases since normalizeForSearch() 108 // added spaces between them to make word breaks. 109 $stripped = '"' . trim( $stripped ) . '"'; 110 } 111 $searchon .= "$quote$stripped$quote$wildcard "; 112 } 113 if ( count( $strippedVariants ) > 1 ) { 114 $searchon .= ')'; 115 } 116 117 // Match individual terms or quoted phrase in result highlighting... 118 // Note that variants will be introduced in a later stage for highlighting! 119 $regexp = $this->regexTerm( $term, $wildcard ); 120 $this->searchTerms[] = $regexp; 121 } 122 wfDebug( __METHOD__ . ": Would search with '$searchon'\n" ); 123 wfDebug( __METHOD__ . ': Match with /' . implode( '|', $this->searchTerms ) . "/\n" ); 124 } else { 125 wfDebug( __METHOD__ . ": Can't understand search query '{$filteredText}'\n" ); 126 } 127 128 $searchon = $this->db->addQuotes( $searchon ); 129 $field = $this->getIndexField( $fulltext ); 130 return " MATCH($field) AGAINST($searchon IN BOOLEAN MODE) "; 131 } 132 133 function regexTerm( $string, $wildcard ) { 134 global $wgContLang; 135 136 $regex = preg_quote( $string, '/' ); 137 if ( $wgContLang->hasWordBreaks() ) { 138 if ( $wildcard ) { 139 // Don't cut off the final bit! 140 $regex = "\b$regex"; 141 } else { 142 $regex = "\b$regex\b"; 143 } 144 } else { 145 // For Chinese, words may legitimately abut other words in the text literal. 146 // Don't add \b boundary checks... note this could cause false positives 147 // for latin chars. 148 } 149 return $regex; 150 } 151 152 public static function legalSearchChars() { 153 return "\"*" . parent::legalSearchChars(); 154 } 155 156 /** 157 * Perform a full text search query and return a result set. 158 * 159 * @param string $term Raw search term 160 * @return SqlSearchResultSet 161 */ 162 function searchText( $term ) { 163 return $this->searchInternal( $term, true ); 164 } 165 166 /** 167 * Perform a title-only search query and return a result set. 168 * 169 * @param string $term Raw search term 170 * @return SqlSearchResultSet 171 */ 172 function searchTitle( $term ) { 173 return $this->searchInternal( $term, false ); 174 } 175 176 protected function searchInternal( $term, $fulltext ) { 177 // This seems out of place, why is this called with empty term? 178 if ( trim( $term ) === '' ) { 179 return null; 180 } 181 182 $filteredTerm = $this->filter( $term ); 183 $query = $this->getQuery( $filteredTerm, $fulltext ); 184 $resultSet = $this->db->select( 185 $query['tables'], $query['fields'], $query['conds'], 186 __METHOD__, $query['options'], $query['joins'] 187 ); 188 189 $total = null; 190 $query = $this->getCountQuery( $filteredTerm, $fulltext ); 191 $totalResult = $this->db->select( 192 $query['tables'], $query['fields'], $query['conds'], 193 __METHOD__, $query['options'], $query['joins'] 194 ); 195 196 $row = $totalResult->fetchObject(); 197 if ( $row ) { 198 $total = intval( $row->c ); 199 } 200 $totalResult->free(); 201 202 return new SqlSearchResultSet( $resultSet, $this->searchTerms, $total ); 203 } 204 205 public function supports( $feature ) { 206 switch ( $feature ) { 207 case 'title-suffix-filter': 208 return true; 209 default: 210 return parent::supports( $feature ); 211 } 212 } 213 214 /** 215 * Add special conditions 216 * @param array $query 217 * @since 1.18 218 */ 219 protected function queryFeatures( &$query ) { 220 foreach ( $this->features as $feature => $value ) { 221 if ( $feature === 'title-suffix-filter' && $value ) { 222 $query['conds'][] = 'page_title' . $this->db->buildLike( $this->db->anyString(), $value ); 223 } 224 } 225 } 226 227 /** 228 * Add namespace conditions 229 * @param array $query 230 * @since 1.18 (changed) 231 */ 232 function queryNamespaces( &$query ) { 233 if ( is_array( $this->namespaces ) ) { 234 if ( count( $this->namespaces ) === 0 ) { 235 $this->namespaces[] = '0'; 236 } 237 $query['conds']['page_namespace'] = $this->namespaces; 238 } 239 } 240 241 /** 242 * Add limit options 243 * @param array $query 244 * @since 1.18 245 */ 246 protected function limitResult( &$query ) { 247 $query['options']['LIMIT'] = $this->limit; 248 $query['options']['OFFSET'] = $this->offset; 249 } 250 251 /** 252 * Construct the SQL query to do the search. 253 * The guts shoulds be constructed in queryMain() 254 * @param string $filteredTerm 255 * @param bool $fulltext 256 * @return array 257 * @since 1.18 (changed) 258 */ 259 function getQuery( $filteredTerm, $fulltext ) { 260 $query = array( 261 'tables' => array(), 262 'fields' => array(), 263 'conds' => array(), 264 'options' => array(), 265 'joins' => array(), 266 ); 267 268 $this->queryMain( $query, $filteredTerm, $fulltext ); 269 $this->queryFeatures( $query ); 270 $this->queryNamespaces( $query ); 271 $this->limitResult( $query ); 272 273 return $query; 274 } 275 276 /** 277 * Picks which field to index on, depending on what type of query. 278 * @param bool $fulltext 279 * @return string 280 */ 281 function getIndexField( $fulltext ) { 282 return $fulltext ? 'si_text' : 'si_title'; 283 } 284 285 /** 286 * Get the base part of the search query. 287 * 288 * @param array &$query Search query array 289 * @param string $filteredTerm 290 * @param bool $fulltext 291 * @since 1.18 (changed) 292 */ 293 function queryMain( &$query, $filteredTerm, $fulltext ) { 294 $match = $this->parseQuery( $filteredTerm, $fulltext ); 295 $query['tables'][] = 'page'; 296 $query['tables'][] = 'searchindex'; 297 $query['fields'][] = 'page_id'; 298 $query['fields'][] = 'page_namespace'; 299 $query['fields'][] = 'page_title'; 300 $query['conds'][] = 'page_id=si_page'; 301 $query['conds'][] = $match; 302 } 303 304 /** 305 * @since 1.18 (changed) 306 * @param string $filteredTerm 307 * @param bool $fulltext 308 * @return array 309 */ 310 function getCountQuery( $filteredTerm, $fulltext ) { 311 $match = $this->parseQuery( $filteredTerm, $fulltext ); 312 313 $query = array( 314 'tables' => array( 'page', 'searchindex' ), 315 'fields' => array( 'COUNT(*) as c' ), 316 'conds' => array( 'page_id=si_page', $match ), 317 'options' => array(), 318 'joins' => array(), 319 ); 320 321 $this->queryFeatures( $query ); 322 $this->queryNamespaces( $query ); 323 324 return $query; 325 } 326 327 /** 328 * Create or update the search index record for the given page. 329 * Title and text should be pre-processed. 330 * 331 * @param int $id 332 * @param string $title 333 * @param string $text 334 */ 335 function update( $id, $title, $text ) { 336 $dbw = wfGetDB( DB_MASTER ); 337 $dbw->replace( 'searchindex', 338 array( 'si_page' ), 339 array( 340 'si_page' => $id, 341 'si_title' => $this->normalizeText( $title ), 342 'si_text' => $this->normalizeText( $text ) 343 ), __METHOD__ ); 344 } 345 346 /** 347 * Update a search index record's title only. 348 * Title should be pre-processed. 349 * 350 * @param int $id 351 * @param string $title 352 */ 353 function updateTitle( $id, $title ) { 354 $dbw = wfGetDB( DB_MASTER ); 355 356 $dbw->update( 'searchindex', 357 array( 'si_title' => $this->normalizeText( $title ) ), 358 array( 'si_page' => $id ), 359 __METHOD__, 360 array( $dbw->lowPriorityOption() ) ); 361 } 362 363 /** 364 * Delete an indexed page 365 * Title should be pre-processed. 366 * 367 * @param int $id Page id that was deleted 368 * @param string $title Title of page that was deleted 369 */ 370 function delete( $id, $title ) { 371 $dbw = wfGetDB( DB_MASTER ); 372 373 $dbw->delete( 'searchindex', array( 'si_page' => $id ), __METHOD__ ); 374 } 375 376 /** 377 * Converts some characters for MySQL's indexing to grok it correctly, 378 * and pads short words to overcome limitations. 379 * @param string $string 380 * @return mixed|string 381 */ 382 function normalizeText( $string ) { 383 global $wgContLang; 384 385 wfProfileIn( __METHOD__ ); 386 387 $out = parent::normalizeText( $string ); 388 389 // MySQL fulltext index doesn't grok utf-8, so we 390 // need to fold cases and convert to hex 391 $out = preg_replace_callback( 392 "/([\\xc0-\\xff][\\x80-\\xbf]*)/", 393 array( $this, 'stripForSearchCallback' ), 394 $wgContLang->lc( $out ) ); 395 396 // And to add insult to injury, the default indexing 397 // ignores short words... Pad them so we can pass them 398 // through without reconfiguring the server... 399 $minLength = $this->minSearchLength(); 400 if ( $minLength > 1 ) { 401 $n = $minLength - 1; 402 $out = preg_replace( 403 "/\b(\w{1,$n})\b/", 404 "$1u800", 405 $out ); 406 } 407 408 // Periods within things like hostnames and IP addresses 409 // are also important -- we want a search for "example.com" 410 // or "192.168.1.1" to work sanely. 411 // 412 // MySQL's search seems to ignore them, so you'd match on 413 // "example.wikipedia.com" and "192.168.83.1" as well. 414 $out = preg_replace( 415 "/(\w)\.(\w|\*)/u", 416 "$1u82e$2", 417 $out ); 418 419 wfProfileOut( __METHOD__ ); 420 421 return $out; 422 } 423 424 /** 425 * Armor a case-folded UTF-8 string to get through MySQL's 426 * fulltext search without being mucked up by funny charset 427 * settings or anything else of the sort. 428 * @param array $matches 429 * @return string 430 */ 431 protected function stripForSearchCallback( $matches ) { 432 return 'u8' . bin2hex( $matches[1] ); 433 } 434 435 /** 436 * Check MySQL server's ft_min_word_len setting so we know 437 * if we need to pad short words... 438 * 439 * @return int 440 */ 441 protected function minSearchLength() { 442 if ( is_null( self::$mMinSearchLength ) ) { 443 $sql = "SHOW GLOBAL VARIABLES LIKE 'ft\\_min\\_word\\_len'"; 444 445 $dbr = wfGetDB( DB_SLAVE ); 446 $result = $dbr->query( $sql ); 447 $row = $result->fetchObject(); 448 $result->free(); 449 450 if ( $row && $row->Variable_name == 'ft_min_word_len' ) { 451 self::$mMinSearchLength = intval( $row->Value ); 452 } else { 453 self::$mMinSearchLength = 0; 454 } 455 } 456 return self::$mMinSearchLength; 457 } 458 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
Generated: Fri Nov 28 14:03:12 2014 | Cross-referenced by PHPXref 0.7.1 |