MediaWiki  REL1_22
SearchOracle.php
Go to the documentation of this file.
00001 <?php
00031 class SearchOracle extends SearchEngine {
00032 
00033     private $reservedWords = array(
00034         'ABOUT' => 1,
00035         'ACCUM' => 1,
00036         'AND' => 1,
00037         'BT' => 1,
00038         'BTG' => 1,
00039         'BTI' => 1,
00040         'BTP' => 1,
00041         'FUZZY' => 1,
00042         'HASPATH' => 1,
00043         'INPATH' => 1,
00044         'MINUS' => 1,
00045         'NEAR' => 1,
00046         'NOT' => 1,
00047         'NT' => 1,
00048         'NTG' => 1,
00049         'NTI' => 1,
00050         'NTP' => 1,
00051         'OR' => 1,
00052         'PT' => 1,
00053         'RT' => 1,
00054         'SQE' => 1,
00055         'SYN' => 1,
00056         'TR' => 1,
00057         'TRSYN' => 1,
00058         'TT' => 1,
00059         'WITHIN' => 1,
00060     );
00061 
00066     function __construct( $db ) {
00067         parent::__construct( $db );
00068     }
00069 
00076     function searchText( $term ) {
00077         if ( $term == '' ) {
00078             return new SqlSearchResultSet( false, '' );
00079         }
00080 
00081         $resultSet = $this->db->resultObject( $this->db->query( $this->getQuery( $this->filter( $term ), true ) ) );
00082         return new SqlSearchResultSet( $resultSet, $this->searchTerms );
00083     }
00084 
00091     function searchTitle( $term ) {
00092         if ( $term == '' ) {
00093             return new SqlSearchResultSet( false, '' );
00094         }
00095 
00096         $resultSet = $this->db->resultObject( $this->db->query( $this->getQuery( $this->filter( $term ), false ) ) );
00097         return new MySQLSearchResultSet( $resultSet, $this->searchTerms );
00098     }
00099 
00104     function queryRedirect() {
00105         if ( $this->showRedirects ) {
00106             return '';
00107         } else {
00108             return 'AND page_is_redirect=0';
00109         }
00110     }
00111 
00116     function queryNamespaces() {
00117         if ( is_null( $this->namespaces ) ) {
00118             return '';
00119         }
00120         if ( !count( $this->namespaces ) ) {
00121             $namespaces = '0';
00122         } else {
00123             $namespaces = $this->db->makeList( $this->namespaces );
00124         }
00125         return 'AND page_namespace IN (' . $namespaces . ')';
00126     }
00127 
00135     function queryLimit( $sql ) {
00136         return $this->db->limitResult( $sql, $this->limit, $this->offset );
00137     }
00138 
00145     function queryRanking( $filteredTerm, $fulltext ) {
00146         return ' ORDER BY score(1)';
00147     }
00148 
00156     function getQuery( $filteredTerm, $fulltext ) {
00157         return $this->queryLimit( $this->queryMain( $filteredTerm, $fulltext ) . ' ' .
00158             $this->queryRedirect() . ' ' .
00159             $this->queryNamespaces() . ' ' .
00160             $this->queryRanking( $filteredTerm, $fulltext ) . ' ' );
00161     }
00162 
00168     function getIndexField( $fulltext ) {
00169         return $fulltext ? 'si_text' : 'si_title';
00170     }
00171 
00179     function queryMain( $filteredTerm, $fulltext ) {
00180         $match = $this->parseQuery( $filteredTerm, $fulltext );
00181         $page = $this->db->tableName( 'page' );
00182         $searchindex = $this->db->tableName( 'searchindex' );
00183         return 'SELECT page_id, page_namespace, page_title ' .
00184             "FROM $page,$searchindex " .
00185             'WHERE page_id=si_page AND ' . $match;
00186     }
00187 
00193     function parseQuery( $filteredText, $fulltext ) {
00194         global $wgContLang;
00195         $lc = SearchEngine::legalSearchChars();
00196         $this->searchTerms = array();
00197 
00198         # @todo FIXME: This doesn't handle parenthetical expressions.
00199         $m = array();
00200         $searchon = '';
00201         if ( preg_match_all( '/([-+<>~]?)(([' . $lc . ']+)(\*?)|"[^"]*")/',
00202                 $filteredText, $m, PREG_SET_ORDER ) ) {
00203             foreach ( $m as $terms ) {
00204                 // Search terms in all variant forms, only
00205                 // apply on wiki with LanguageConverter
00206                 $temp_terms = $wgContLang->autoConvertToAllVariants( $terms[2] );
00207                 if ( is_array( $temp_terms ) ) {
00208                     $temp_terms = array_unique( array_values( $temp_terms ) );
00209                     foreach ( $temp_terms as $t ) {
00210                         $searchon .= ( $terms[1] == '-' ? ' ~' : ' & ' ) . $this->escapeTerm( $t );
00211                     }
00212                 }
00213                 else {
00214                     $searchon .= ( $terms[1] == '-' ? ' ~' : ' & ' ) . $this->escapeTerm( $terms[2] );
00215                 }
00216                 if ( !empty( $terms[3] ) ) {
00217                     $regexp = preg_quote( $terms[3], '/' );
00218                     if ( $terms[4] ) {
00219                         $regexp .= "[0-9A-Za-z_]+";
00220                     }
00221                 } else {
00222                     $regexp = preg_quote( str_replace( '"', '', $terms[2] ), '/' );
00223                 }
00224                 $this->searchTerms[] = $regexp;
00225             }
00226         }
00227 
00228         $searchon = $this->db->addQuotes( ltrim( $searchon, ' &' ) );
00229         $field = $this->getIndexField( $fulltext );
00230         return " CONTAINS($field, $searchon, 1) > 0 ";
00231     }
00232 
00233     private function escapeTerm( $t ) {
00234         global $wgContLang;
00235         $t = $wgContLang->normalizeForSearch( $t );
00236         $t = isset( $this->reservedWords[strtoupper( $t )] ) ? '{' . $t . '}' : $t;
00237         $t = preg_replace( '/^"(.*)"$/', '($1)', $t );
00238         $t = preg_replace( '/([-&|])/', '\\\\$1', $t );
00239         return $t;
00240     }
00249     function update( $id, $title, $text ) {
00250         $dbw = wfGetDB( DB_MASTER );
00251         $dbw->replace( 'searchindex',
00252             array( 'si_page' ),
00253             array(
00254                 'si_page' => $id,
00255                 'si_title' => $title,
00256                 'si_text' => $text
00257             ), 'SearchOracle::update' );
00258 
00259         // Sync the index
00260         // We need to specify the DB name (i.e. user/schema) here so that
00261         // it can work from the installer, where
00262         //     ALTER SESSION SET CURRENT_SCHEMA = ...
00263         // was used.
00264         $dbw->query( "CALL ctx_ddl.sync_index(" .
00265             $dbw->addQuotes( $dbw->getDBname() . '.' . $dbw->tableName( 'si_text_idx', 'raw' ) ) . ")" );
00266         $dbw->query( "CALL ctx_ddl.sync_index(" .
00267             $dbw->addQuotes( $dbw->getDBname() . '.' . $dbw->tableName( 'si_title_idx', 'raw' ) ) . ")" );
00268     }
00269 
00277     function updateTitle( $id, $title ) {
00278         $dbw = wfGetDB( DB_MASTER );
00279 
00280         $dbw->update( 'searchindex',
00281             array( 'si_title' => $title ),
00282             array( 'si_page' => $id ),
00283             'SearchOracle::updateTitle',
00284             array() );
00285     }
00286 
00287     public static function legalSearchChars() {
00288         return "\"" . parent::legalSearchChars();
00289     }
00290 }