[ Index ]

PHP Cross Reference of MediaWiki-1.24.0

title

Body

[close]

/includes/search/ -> SearchPostgres.php (source)

   1  <?php
   2  /**
   3   * PostgreSQL search engine
   4   *
   5   * Copyright © 2006-2007 Greg Sabino Mullane <[email protected]>
   6   * https://www.mediawiki.org/
   7   *
   8   * This program is free software; you can redistribute it and/or modify
   9   * it under the terms of the GNU General Public License as published by
  10   * the Free Software Foundation; either version 2 of the License, or
  11   * (at your option) any later version.
  12   *
  13   * This program is distributed in the hope that it will be useful,
  14   * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  16   * GNU General Public License for more details.
  17   *
  18   * You should have received a copy of the GNU General Public License along
  19   * with this program; if not, write to the Free Software Foundation, Inc.,
  20   * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  21   * http://www.gnu.org/copyleft/gpl.html
  22   *
  23   * @file
  24   * @ingroup Search
  25   */
  26  
  27  /**
  28   * Search engine hook base class for Postgres
  29   * @ingroup Search
  30   */
  31  class SearchPostgres extends SearchDatabase {
  32      /**
  33       * Perform a full text search query via tsearch2 and return a result set.
  34       * Currently searches a page's current title (page.page_title) and
  35       * latest revision article text (pagecontent.old_text)
  36       *
  37       * @param string $term Raw search term
  38       * @return SqlSearchResultSet
  39       */
  40  	function searchTitle( $term ) {
  41          $q = $this->searchQuery( $term, 'titlevector', 'page_title' );
  42          $olderror = error_reporting( E_ERROR );
  43          $resultSet = $this->db->query( $q, 'SearchPostgres', true );
  44          error_reporting( $olderror );
  45          return new SqlSearchResultSet( $resultSet, $this->searchTerms );
  46      }
  47  
  48  	function searchText( $term ) {
  49          $q = $this->searchQuery( $term, 'textvector', 'old_text' );
  50          $olderror = error_reporting( E_ERROR );
  51          $resultSet = $this->db->query( $q, 'SearchPostgres', true );
  52          error_reporting( $olderror );
  53          return new SqlSearchResultSet( $resultSet, $this->searchTerms );
  54      }
  55  
  56      /**
  57       * Transform the user's search string into a better form for tsearch2
  58       * Returns an SQL fragment consisting of quoted text to search for.
  59       *
  60       * @param string $term
  61       *
  62       * @return string
  63       */
  64  	function parseQuery( $term ) {
  65  
  66          wfDebug( "parseQuery received: $term \n" );
  67  
  68          ## No backslashes allowed
  69          $term = preg_replace( '/\\\/', '', $term );
  70  
  71          ## Collapse parens into nearby words:
  72          $term = preg_replace( '/\s*\(\s*/', ' (', $term );
  73          $term = preg_replace( '/\s*\)\s*/', ') ', $term );
  74  
  75          ## Treat colons as word separators:
  76          $term = preg_replace( '/:/', ' ', $term );
  77  
  78          $searchstring = '';
  79          $m = array();
  80          if ( preg_match_all( '/([-!]?)(\S+)\s*/', $term, $m, PREG_SET_ORDER ) ) {
  81              foreach ( $m as $terms ) {
  82                  if ( strlen( $terms[1] ) ) {
  83                      $searchstring .= ' & !';
  84                  }
  85                  if ( strtolower( $terms[2] ) === 'and' ) {
  86                      $searchstring .= ' & ';
  87                  }
  88                  elseif ( strtolower( $terms[2] ) === 'or' or $terms[2] === '|' ) {
  89                      $searchstring .= ' | ';
  90                  }
  91                  elseif ( strtolower( $terms[2] ) === 'not' ) {
  92                      $searchstring .= ' & !';
  93                  }
  94                  else {
  95                      $searchstring .= " & $terms[2]";
  96                  }
  97              }
  98          }
  99  
 100          ## Strip out leading junk
 101          $searchstring = preg_replace( '/^[\s\&\|]+/', '', $searchstring );
 102  
 103          ## Remove any doubled-up operators
 104          $searchstring = preg_replace( '/([\!\&\|]) +(?:[\&\|] +)+/', "$1 ", $searchstring );
 105  
 106          ## Remove any non-spaced operators (e.g. "Zounds!")
 107          $searchstring = preg_replace( '/([^ ])[\!\&\|]/', "$1", $searchstring );
 108  
 109          ## Remove any trailing whitespace or operators
 110          $searchstring = preg_replace( '/[\s\!\&\|]+$/', '', $searchstring );
 111  
 112          ## Remove unnecessary quotes around everything
 113          $searchstring = preg_replace( '/^[\'"](.*)[\'"]$/', "$1", $searchstring );
 114  
 115          ## Quote the whole thing
 116          $searchstring = $this->db->addQuotes( $searchstring );
 117  
 118          wfDebug( "parseQuery returned: $searchstring \n" );
 119  
 120          return $searchstring;
 121  
 122      }
 123  
 124      /**
 125       * Construct the full SQL query to do the search.
 126       * @param string $term
 127       * @param string $fulltext
 128       * @param string $colname
 129       * @return string
 130       */
 131  	function searchQuery( $term, $fulltext, $colname ) {
 132          # Get the SQL fragment for the given term
 133          $searchstring = $this->parseQuery( $term );
 134  
 135          ## We need a separate query here so gin does not complain about empty searches
 136          $sql = "SELECT to_tsquery($searchstring)";
 137          $res = $this->db->query( $sql );
 138          if ( !$res ) {
 139              ## TODO: Better output (example to catch: one 'two)
 140              die( "Sorry, that was not a valid search string. Please go back and try again" );
 141          }
 142          $top = $res->fetchRow();
 143          $top = $top[0];
 144  
 145          $this->searchTerms = array();
 146          if ( $top === "" ) { ## e.g. if only stopwords are used XXX return something better
 147              $query = "SELECT page_id, page_namespace, page_title, 0 AS score " .
 148                  "FROM page p, revision r, pagecontent c WHERE p.page_latest = r.rev_id " .
 149                  "AND r.rev_text_id = c.old_id AND 1=0";
 150          }
 151          else {
 152              $m = array();
 153              if ( preg_match_all( "/'([^']+)'/", $top, $m, PREG_SET_ORDER ) ) {
 154                  foreach ( $m as $terms ) {
 155                      $this->searchTerms[$terms[1]] = $terms[1];
 156                  }
 157              }
 158  
 159              $query = "SELECT page_id, page_namespace, page_title, " .
 160              "ts_rank($fulltext, to_tsquery($searchstring), 5) AS score " .
 161              "FROM page p, revision r, pagecontent c WHERE p.page_latest = r.rev_id " .
 162              "AND r.rev_text_id = c.old_id AND $fulltext @@ to_tsquery($searchstring)";
 163          }
 164  
 165          ## Namespaces - defaults to 0
 166          if ( !is_null( $this->namespaces ) ) { // null -> search all
 167              if ( count( $this->namespaces ) < 1 ) {
 168                  $query .= ' AND page_namespace = 0';
 169              } else {
 170                  $namespaces = $this->db->makeList( $this->namespaces );
 171                  $query .= " AND page_namespace IN ($namespaces)";
 172              }
 173          }
 174  
 175          $query .= " ORDER BY score DESC, page_id DESC";
 176  
 177          $query .= $this->db->limitResult( '', $this->limit, $this->offset );
 178  
 179          wfDebug( "searchQuery returned: $query \n" );
 180  
 181          return $query;
 182      }
 183  
 184      ## Most of the work of these two functions are done automatically via triggers
 185  
 186  	function update( $pageid, $title, $text ) {
 187          ## We don't want to index older revisions
 188          $sql = "UPDATE pagecontent SET textvector = NULL WHERE textvector IS NOT NULL and old_id IN " .
 189                  "(SELECT rev_text_id FROM revision WHERE rev_page = " . intval( $pageid ) .
 190                  " ORDER BY rev_text_id DESC OFFSET 1)";
 191          $this->db->query( $sql );
 192          return true;
 193      }
 194  
 195  	function updateTitle( $id, $title ) {
 196          return true;
 197      }
 198  
 199  }


Generated: Fri Nov 28 14:03:12 2014 Cross-referenced by PHPXref 0.7.1