[ Index ]

PHP Cross Reference of MediaWiki-1.24.0

title

Body

[close]

/includes/search/ -> SearchSqlite.php (source)

   1  <?php
   2  /**
   3   * SQLite search backend, based upon SearchMysql
   4   *
   5   * This program is free software; you can redistribute it and/or modify
   6   * it under the terms of the GNU General Public License as published by
   7   * the Free Software Foundation; either version 2 of the License, or
   8   * (at your option) any later version.
   9   *
  10   * This program is distributed in the hope that it will be useful,
  11   * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13   * GNU General Public License for more details.
  14   *
  15   * You should have received a copy of the GNU General Public License along
  16   * with this program; if not, write to the Free Software Foundation, Inc.,
  17   * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  18   * http://www.gnu.org/copyleft/gpl.html
  19   *
  20   * @file
  21   * @ingroup Search
  22   */
  23  
  24  /**
  25   * Search engine hook for SQLite
  26   * @ingroup Search
  27   */
  28  class SearchSqlite extends SearchDatabase {
  29      /**
  30       * Whether fulltext search is supported by current schema
  31       * @return bool
  32       */
  33  	function fulltextSearchSupported() {
  34          return $this->db->checkForEnabledSearch();
  35      }
  36  
  37      /**
  38       * Parse the user's query and transform it into an SQL fragment which will
  39       * become part of a WHERE clause
  40       *
  41       * @param string $filteredText
  42       * @param bool $fulltext
  43       * @return string
  44       */
  45  	function parseQuery( $filteredText, $fulltext ) {
  46          global $wgContLang;
  47          $lc = $this->legalSearchChars(); // Minus format chars
  48          $searchon = '';
  49          $this->searchTerms = array();
  50  
  51          $m = array();
  52          if ( preg_match_all( '/([-+<>~]?)(([' . $lc . ']+)(\*?)|"[^"]*")/',
  53                  $filteredText, $m, PREG_SET_ORDER ) ) {
  54              foreach ( $m as $bits ) {
  55                  wfSuppressWarnings();
  56                  list( /* all */, $modifier, $term, $nonQuoted, $wildcard ) = $bits;
  57                  wfRestoreWarnings();
  58  
  59                  if ( $nonQuoted != '' ) {
  60                      $term = $nonQuoted;
  61                      $quote = '';
  62                  } else {
  63                      $term = str_replace( '"', '', $term );
  64                      $quote = '"';
  65                  }
  66  
  67                  if ( $searchon !== '' ) {
  68                      $searchon .= ' ';
  69                  }
  70  
  71                  // Some languages such as Serbian store the input form in the search index,
  72                  // so we may need to search for matches in multiple writing system variants.
  73                  $convertedVariants = $wgContLang->autoConvertToAllVariants( $term );
  74                  if ( is_array( $convertedVariants ) ) {
  75                      $variants = array_unique( array_values( $convertedVariants ) );
  76                  } else {
  77                      $variants = array( $term );
  78                  }
  79  
  80                  // The low-level search index does some processing on input to work
  81                  // around problems with minimum lengths and encoding in MySQL's
  82                  // fulltext engine.
  83                  // For Chinese this also inserts spaces between adjacent Han characters.
  84                  $strippedVariants = array_map(
  85                      array( $wgContLang, 'normalizeForSearch' ),
  86                      $variants );
  87  
  88                  // Some languages such as Chinese force all variants to a canonical
  89                  // form when stripping to the low-level search index, so to be sure
  90                  // let's check our variants list for unique items after stripping.
  91                  $strippedVariants = array_unique( $strippedVariants );
  92  
  93                  $searchon .= $modifier;
  94                  if ( count( $strippedVariants ) > 1 ) {
  95                      $searchon .= '(';
  96                  }
  97                  foreach ( $strippedVariants as $stripped ) {
  98                      if ( $nonQuoted && strpos( $stripped, ' ' ) !== false ) {
  99                          // Hack for Chinese: we need to toss in quotes for
 100                          // multiple-character phrases since normalizeForSearch()
 101                          // added spaces between them to make word breaks.
 102                          $stripped = '"' . trim( $stripped ) . '"';
 103                      }
 104                      $searchon .= "$quote$stripped$quote$wildcard ";
 105                  }
 106                  if ( count( $strippedVariants ) > 1 ) {
 107                      $searchon .= ')';
 108                  }
 109  
 110                  // Match individual terms or quoted phrase in result highlighting...
 111                  // Note that variants will be introduced in a later stage for highlighting!
 112                  $regexp = $this->regexTerm( $term, $wildcard );
 113                  $this->searchTerms[] = $regexp;
 114              }
 115  
 116          } else {
 117              wfDebug( __METHOD__ . ": Can't understand search query '{$filteredText}'\n" );
 118          }
 119  
 120          $searchon = $this->db->addQuotes( $searchon );
 121          $field = $this->getIndexField( $fulltext );
 122          return " $field MATCH $searchon ";
 123      }
 124  
 125  	function regexTerm( $string, $wildcard ) {
 126          global $wgContLang;
 127  
 128          $regex = preg_quote( $string, '/' );
 129          if ( $wgContLang->hasWordBreaks() ) {
 130              if ( $wildcard ) {
 131                  // Don't cut off the final bit!
 132                  $regex = "\b$regex";
 133              } else {
 134                  $regex = "\b$regex\b";
 135              }
 136          } else {
 137              // For Chinese, words may legitimately abut other words in the text literal.
 138              // Don't add \b boundary checks... note this could cause false positives
 139              // for latin chars.
 140          }
 141          return $regex;
 142      }
 143  
 144  	public static function legalSearchChars() {
 145          return "\"*" . parent::legalSearchChars();
 146      }
 147  
 148      /**
 149       * Perform a full text search query and return a result set.
 150       *
 151       * @param string $term Raw search term
 152       * @return SqlSearchResultSet
 153       */
 154  	function searchText( $term ) {
 155          return $this->searchInternal( $term, true );
 156      }
 157  
 158      /**
 159       * Perform a title-only search query and return a result set.
 160       *
 161       * @param string $term Raw search term
 162       * @return SqlSearchResultSet
 163       */
 164  	function searchTitle( $term ) {
 165          return $this->searchInternal( $term, false );
 166      }
 167  
 168  	protected function searchInternal( $term, $fulltext ) {
 169          global $wgContLang;
 170  
 171          if ( !$this->fulltextSearchSupported() ) {
 172              return null;
 173          }
 174  
 175          $filteredTerm = $this->filter( $wgContLang->lc( $term ) );
 176          $resultSet = $this->db->query( $this->getQuery( $filteredTerm, $fulltext ) );
 177  
 178          $total = null;
 179          $totalResult = $this->db->query( $this->getCountQuery( $filteredTerm, $fulltext ) );
 180          $row = $totalResult->fetchObject();
 181          if ( $row ) {
 182              $total = intval( $row->c );
 183          }
 184          $totalResult->free();
 185  
 186          return new SqlSearchResultSet( $resultSet, $this->searchTerms, $total );
 187      }
 188  
 189      /**
 190       * Return a partial WHERE clause to limit the search to the given namespaces
 191       * @return string
 192       */
 193  	function queryNamespaces() {
 194          if ( is_null( $this->namespaces ) ) {
 195              return '';  # search all
 196          }
 197          if ( !count( $this->namespaces ) ) {
 198              $namespaces = '0';
 199          } else {
 200              $namespaces = $this->db->makeList( $this->namespaces );
 201          }
 202          return 'AND page_namespace IN (' . $namespaces . ')';
 203      }
 204  
 205      /**
 206       * Returns a query with limit for number of results set.
 207       * @param string $sql
 208       * @return string
 209       */
 210  	function limitResult( $sql ) {
 211          return $this->db->limitResult( $sql, $this->limit, $this->offset );
 212      }
 213  
 214      /**
 215       * Construct the full SQL query to do the search.
 216       * The guts shoulds be constructed in queryMain()
 217       * @param string $filteredTerm
 218       * @param bool $fulltext
 219       * @return string
 220       */
 221  	function getQuery( $filteredTerm, $fulltext ) {
 222          return $this->limitResult(
 223              $this->queryMain( $filteredTerm, $fulltext ) . ' ' .
 224              $this->queryNamespaces()
 225          );
 226      }
 227  
 228      /**
 229       * Picks which field to index on, depending on what type of query.
 230       * @param bool $fulltext
 231       * @return string
 232       */
 233  	function getIndexField( $fulltext ) {
 234          return $fulltext ? 'si_text' : 'si_title';
 235      }
 236  
 237      /**
 238       * Get the base part of the search query.
 239       *
 240       * @param string $filteredTerm
 241       * @param bool $fulltext
 242       * @return string
 243       */
 244  	function queryMain( $filteredTerm, $fulltext ) {
 245          $match = $this->parseQuery( $filteredTerm, $fulltext );
 246          $page = $this->db->tableName( 'page' );
 247          $searchindex = $this->db->tableName( 'searchindex' );
 248          return "SELECT $searchindex.rowid, page_namespace, page_title " .
 249              "FROM $page,$searchindex " .
 250              "WHERE page_id=$searchindex.rowid AND $match";
 251      }
 252  
 253  	function getCountQuery( $filteredTerm, $fulltext ) {
 254          $match = $this->parseQuery( $filteredTerm, $fulltext );
 255          $page = $this->db->tableName( 'page' );
 256          $searchindex = $this->db->tableName( 'searchindex' );
 257          return "SELECT COUNT(*) AS c " .
 258              "FROM $page,$searchindex " .
 259              "WHERE page_id=$searchindex.rowid AND $match " .
 260              $this->queryNamespaces();
 261      }
 262  
 263      /**
 264       * Create or update the search index record for the given page.
 265       * Title and text should be pre-processed.
 266       *
 267       * @param int $id
 268       * @param string $title
 269       * @param string $text
 270       */
 271  	function update( $id, $title, $text ) {
 272          if ( !$this->fulltextSearchSupported() ) {
 273              return;
 274          }
 275          // @todo find a method to do it in a single request,
 276          // couldn't do it so far due to typelessness of FTS3 tables.
 277          $dbw = wfGetDB( DB_MASTER );
 278  
 279          $dbw->delete( 'searchindex', array( 'rowid' => $id ), __METHOD__ );
 280  
 281          $dbw->insert( 'searchindex',
 282              array(
 283                  'rowid' => $id,
 284                  'si_title' => $title,
 285                  'si_text' => $text
 286              ), __METHOD__ );
 287      }
 288  
 289      /**
 290       * Update a search index record's title only.
 291       * Title should be pre-processed.
 292       *
 293       * @param int $id
 294       * @param string $title
 295       */
 296  	function updateTitle( $id, $title ) {
 297          if ( !$this->fulltextSearchSupported() ) {
 298              return;
 299          }
 300          $dbw = wfGetDB( DB_MASTER );
 301  
 302          $dbw->update( 'searchindex',
 303              array( 'si_title' => $title ),
 304              array( 'rowid' => $id ),
 305              __METHOD__ );
 306      }
 307  }


Generated: Fri Nov 28 14:03:12 2014 Cross-referenced by PHPXref 0.7.1