MediaWiki  REL1_23
SearchOracle.php
Go to the documentation of this file.
00001 <?php
00031 class SearchOracle extends SearchDatabase {
00032 
00033     private $reservedWords = array(
00034         'ABOUT' => 1,
00035         'ACCUM' => 1,
00036         'AND' => 1,
00037         'BT' => 1,
00038         'BTG' => 1,
00039         'BTI' => 1,
00040         'BTP' => 1,
00041         'FUZZY' => 1,
00042         'HASPATH' => 1,
00043         'INPATH' => 1,
00044         'MINUS' => 1,
00045         'NEAR' => 1,
00046         'NOT' => 1,
00047         'NT' => 1,
00048         'NTG' => 1,
00049         'NTI' => 1,
00050         'NTP' => 1,
00051         'OR' => 1,
00052         'PT' => 1,
00053         'RT' => 1,
00054         'SQE' => 1,
00055         'SYN' => 1,
00056         'TR' => 1,
00057         'TRSYN' => 1,
00058         'TT' => 1,
00059         'WITHIN' => 1,
00060     );
00061 
00068     function searchText( $term ) {
00069         if ( $term == '' ) {
00070             return new SqlSearchResultSet( false, '' );
00071         }
00072 
00073         $resultSet = $this->db->resultObject( $this->db->query( $this->getQuery( $this->filter( $term ), true ) ) );
00074         return new SqlSearchResultSet( $resultSet, $this->searchTerms );
00075     }
00076 
00083     function searchTitle( $term ) {
00084         if ( $term == '' ) {
00085             return new SqlSearchResultSet( false, '' );
00086         }
00087 
00088         $resultSet = $this->db->resultObject( $this->db->query( $this->getQuery( $this->filter( $term ), false ) ) );
00089         return new MySQLSearchResultSet( $resultSet, $this->searchTerms );
00090     }
00091 
00096     function queryNamespaces() {
00097         if ( is_null( $this->namespaces ) ) {
00098             return '';
00099         }
00100         if ( !count( $this->namespaces ) ) {
00101             $namespaces = '0';
00102         } else {
00103             $namespaces = $this->db->makeList( $this->namespaces );
00104         }
00105         return 'AND page_namespace IN (' . $namespaces . ')';
00106     }
00107 
00115     function queryLimit( $sql ) {
00116         return $this->db->limitResult( $sql, $this->limit, $this->offset );
00117     }
00118 
00125     function queryRanking( $filteredTerm, $fulltext ) {
00126         return ' ORDER BY score(1)';
00127     }
00128 
00136     function getQuery( $filteredTerm, $fulltext ) {
00137         return $this->queryLimit( $this->queryMain( $filteredTerm, $fulltext ) . ' ' .
00138             $this->queryNamespaces() . ' ' .
00139             $this->queryRanking( $filteredTerm, $fulltext ) . ' ' );
00140     }
00141 
00147     function getIndexField( $fulltext ) {
00148         return $fulltext ? 'si_text' : 'si_title';
00149     }
00150 
00158     function queryMain( $filteredTerm, $fulltext ) {
00159         $match = $this->parseQuery( $filteredTerm, $fulltext );
00160         $page = $this->db->tableName( 'page' );
00161         $searchindex = $this->db->tableName( 'searchindex' );
00162         return 'SELECT page_id, page_namespace, page_title ' .
00163             "FROM $page,$searchindex " .
00164             'WHERE page_id=si_page AND ' . $match;
00165     }
00166 
00172     function parseQuery( $filteredText, $fulltext ) {
00173         global $wgContLang;
00174         $lc = SearchEngine::legalSearchChars();
00175         $this->searchTerms = array();
00176 
00177         # @todo FIXME: This doesn't handle parenthetical expressions.
00178         $m = array();
00179         $searchon = '';
00180         if ( preg_match_all( '/([-+<>~]?)(([' . $lc . ']+)(\*?)|"[^"]*")/',
00181                 $filteredText, $m, PREG_SET_ORDER ) ) {
00182             foreach ( $m as $terms ) {
00183                 // Search terms in all variant forms, only
00184                 // apply on wiki with LanguageConverter
00185                 $temp_terms = $wgContLang->autoConvertToAllVariants( $terms[2] );
00186                 if ( is_array( $temp_terms ) ) {
00187                     $temp_terms = array_unique( array_values( $temp_terms ) );
00188                     foreach ( $temp_terms as $t ) {
00189                         $searchon .= ( $terms[1] == '-' ? ' ~' : ' & ' ) . $this->escapeTerm( $t );
00190                     }
00191                 }
00192                 else {
00193                     $searchon .= ( $terms[1] == '-' ? ' ~' : ' & ' ) . $this->escapeTerm( $terms[2] );
00194                 }
00195                 if ( !empty( $terms[3] ) ) {
00196                     $regexp = preg_quote( $terms[3], '/' );
00197                     if ( $terms[4] ) {
00198                         $regexp .= "[0-9A-Za-z_]+";
00199                     }
00200                 } else {
00201                     $regexp = preg_quote( str_replace( '"', '', $terms[2] ), '/' );
00202                 }
00203                 $this->searchTerms[] = $regexp;
00204             }
00205         }
00206 
00207         $searchon = $this->db->addQuotes( ltrim( $searchon, ' &' ) );
00208         $field = $this->getIndexField( $fulltext );
00209         return " CONTAINS($field, $searchon, 1) > 0 ";
00210     }
00211 
00212     private function escapeTerm( $t ) {
00213         global $wgContLang;
00214         $t = $wgContLang->normalizeForSearch( $t );
00215         $t = isset( $this->reservedWords[strtoupper( $t )] ) ? '{' . $t . '}' : $t;
00216         $t = preg_replace( '/^"(.*)"$/', '($1)', $t );
00217         $t = preg_replace( '/([-&|])/', '\\\\$1', $t );
00218         return $t;
00219     }
00220 
00229     function update( $id, $title, $text ) {
00230         $dbw = wfGetDB( DB_MASTER );
00231         $dbw->replace( 'searchindex',
00232             array( 'si_page' ),
00233             array(
00234                 'si_page' => $id,
00235                 'si_title' => $title,
00236                 'si_text' => $text
00237             ), 'SearchOracle::update' );
00238 
00239         // Sync the index
00240         // We need to specify the DB name (i.e. user/schema) here so that
00241         // it can work from the installer, where
00242         //     ALTER SESSION SET CURRENT_SCHEMA = ...
00243         // was used.
00244         $dbw->query( "CALL ctx_ddl.sync_index(" .
00245             $dbw->addQuotes( $dbw->getDBname() . '.' . $dbw->tableName( 'si_text_idx', 'raw' ) ) . ")" );
00246         $dbw->query( "CALL ctx_ddl.sync_index(" .
00247             $dbw->addQuotes( $dbw->getDBname() . '.' . $dbw->tableName( 'si_title_idx', 'raw' ) ) . ")" );
00248     }
00249 
00257     function updateTitle( $id, $title ) {
00258         $dbw = wfGetDB( DB_MASTER );
00259 
00260         $dbw->update( 'searchindex',
00261             array( 'si_title' => $title ),
00262             array( 'si_page' => $id ),
00263             'SearchOracle::updateTitle',
00264             array() );
00265     }
00266 
00267     public static function legalSearchChars() {
00268         return "\"" . parent::legalSearchChars();
00269     }
00270 }