MediaWiki  REL1_21
SearchOracle.php
Go to the documentation of this file.
00001 <?php
00031 class SearchOracle extends SearchEngine {
00032 
00033         private $reservedWords = array ('ABOUT' => 1,
00034                                                                         'ACCUM' => 1,
00035                                                                         'AND' => 1,
00036                                                                         'BT' => 1,
00037                                                                         'BTG' => 1,
00038                                                                         'BTI' => 1,
00039                                                                         'BTP' => 1,
00040                                                                         'FUZZY' => 1,
00041                                                                         'HASPATH' => 1,
00042                                                                         'INPATH' => 1,
00043                                                                         'MINUS' => 1,
00044                                                                         'NEAR' => 1,
00045                                                                         'NOT' => 1,
00046                                                                         'NT' => 1,
00047                                                                         'NTG' => 1,
00048                                                                         'NTI' => 1,
00049                                                                         'NTP' => 1,
00050                                                                         'OR' => 1,
00051                                                                         'PT' => 1,
00052                                                                         'RT' => 1,
00053                                                                         'SQE' => 1,
00054                                                                         'SYN' => 1,
00055                                                                         'TR' => 1,
00056                                                                         'TRSYN' => 1,
00057                                                                         'TT' => 1,
00058                                                                         'WITHIN' => 1);
00059 
00064         function __construct( $db ) {
00065                 parent::__construct( $db );
00066         }
00067 
00074         function searchText( $term ) {
00075                 if ( $term == '' )
00076                         return new SqlSearchResultSet( false, '' );
00077 
00078                 $resultSet = $this->db->resultObject( $this->db->query( $this->getQuery( $this->filter( $term ), true ) ) );
00079                 return new SqlSearchResultSet( $resultSet, $this->searchTerms );
00080         }
00081 
00088         function searchTitle( $term ) {
00089                 if ( $term == '' )
00090                         return new SqlSearchResultSet( false, '' );
00091 
00092                 $resultSet = $this->db->resultObject( $this->db->query( $this->getQuery( $this->filter( $term ), false ) ) );
00093                 return new MySQLSearchResultSet( $resultSet, $this->searchTerms );
00094         }
00095 
00100         function queryRedirect() {
00101                 if ( $this->showRedirects ) {
00102                         return '';
00103                 } else {
00104                         return 'AND page_is_redirect=0';
00105                 }
00106         }
00107 
00112         function queryNamespaces() {
00113                 if( is_null( $this->namespaces ) )
00114                         return '';
00115                 if ( !count( $this->namespaces ) ) {
00116                         $namespaces = '0';
00117                 } else {
00118                         $namespaces = $this->db->makeList( $this->namespaces );
00119                 }
00120                 return 'AND page_namespace IN (' . $namespaces . ')';
00121         }
00122 
00130         function queryLimit( $sql ) {
00131                 return $this->db->limitResult( $sql, $this->limit, $this->offset );
00132         }
00133 
00140         function queryRanking( $filteredTerm, $fulltext ) {
00141                 return ' ORDER BY score(1)';
00142         }
00143 
00151         function getQuery( $filteredTerm, $fulltext ) {
00152                 return $this->queryLimit( $this->queryMain( $filteredTerm, $fulltext ) . ' ' .
00153                         $this->queryRedirect() . ' ' .
00154                         $this->queryNamespaces() . ' ' .
00155                         $this->queryRanking( $filteredTerm, $fulltext ) . ' ' );
00156         }
00157 
00163         function getIndexField( $fulltext ) {
00164                 return $fulltext ? 'si_text' : 'si_title';
00165         }
00166 
00174         function queryMain( $filteredTerm, $fulltext ) {
00175                 $match = $this->parseQuery( $filteredTerm, $fulltext );
00176                 $page = $this->db->tableName( 'page' );
00177                 $searchindex = $this->db->tableName( 'searchindex' );
00178                 return 'SELECT page_id, page_namespace, page_title ' .
00179                         "FROM $page,$searchindex " .
00180                         'WHERE page_id=si_page AND ' . $match;
00181         }
00182 
00188         function parseQuery( $filteredText, $fulltext ) {
00189                 global $wgContLang;
00190                 $lc = SearchEngine::legalSearchChars();
00191                 $this->searchTerms = array();
00192 
00193                 # @todo FIXME: This doesn't handle parenthetical expressions.
00194                 $m = array();
00195                 $searchon = '';
00196                 if ( preg_match_all( '/([-+<>~]?)(([' . $lc . ']+)(\*?)|"[^"]*")/',
00197                                 $filteredText, $m, PREG_SET_ORDER ) ) {
00198                         foreach( $m as $terms ) {
00199                                 // Search terms in all variant forms, only
00200                                 // apply on wiki with LanguageConverter
00201                                 $temp_terms = $wgContLang->autoConvertToAllVariants( $terms[2] );
00202                                 if( is_array( $temp_terms )) {
00203                                         $temp_terms = array_unique( array_values( $temp_terms ));
00204                                         foreach( $temp_terms as $t ) {
00205                                                 $searchon .= ($terms[1] == '-' ? ' ~' : ' & ') . $this->escapeTerm( $t );
00206                                         }
00207                                 }
00208                                 else {
00209                                         $searchon .= ($terms[1] == '-' ? ' ~' : ' & ') . $this->escapeTerm( $terms[2] );
00210                                 }
00211                                 if ( !empty( $terms[3] ) ) {
00212                                         $regexp = preg_quote( $terms[3], '/' );
00213                                         if ( $terms[4] )
00214                                                 $regexp .= "[0-9A-Za-z_]+";
00215                                 } else {
00216                                         $regexp = preg_quote( str_replace( '"', '', $terms[2] ), '/' );
00217                                 }
00218                                 $this->searchTerms[] = $regexp;
00219                         }
00220                 }
00221 
00222                 $searchon = $this->db->addQuotes( ltrim( $searchon, ' &' ) );
00223                 $field = $this->getIndexField( $fulltext );
00224                 return " CONTAINS($field, $searchon, 1) > 0 ";
00225         }
00226 
00227         private function escapeTerm( $t ) {
00228                 global $wgContLang;
00229                 $t = $wgContLang->normalizeForSearch( $t );
00230                 $t = isset( $this->reservedWords[strtoupper( $t )] ) ? '{'.$t.'}' : $t;
00231                 $t = preg_replace('/^"(.*)"$/', '($1)', $t);
00232                 $t = preg_replace('/([-&|])/', '\\\\$1', $t);
00233                 return $t;
00234         }
00243         function update( $id, $title, $text ) {
00244                 $dbw = wfGetDB( DB_MASTER );
00245                 $dbw->replace( 'searchindex',
00246                         array( 'si_page' ),
00247                         array(
00248                                 'si_page' => $id,
00249                                 'si_title' => $title,
00250                                 'si_text' => $text
00251                         ), 'SearchOracle::update' );
00252 
00253                 // Sync the index
00254                 // We need to specify the DB name (i.e. user/schema) here so that
00255                 // it can work from the installer, where
00256                 //     ALTER SESSION SET CURRENT_SCHEMA = ...
00257                 // was used.
00258                 $dbw->query( "CALL ctx_ddl.sync_index(" .
00259                         $dbw->addQuotes( $dbw->getDBname() . '.' . $dbw->tableName( 'si_text_idx', 'raw' ) ) . ")" );
00260                 $dbw->query( "CALL ctx_ddl.sync_index(" .
00261                         $dbw->addQuotes( $dbw->getDBname() . '.' . $dbw->tableName( 'si_title_idx', 'raw' ) ) . ")" );
00262         }
00263 
00271         function updateTitle( $id, $title ) {
00272                 $dbw = wfGetDB( DB_MASTER );
00273 
00274                 $dbw->update( 'searchindex',
00275                         array( 'si_title' => $title ),
00276                         array( 'si_page'  => $id ),
00277                         'SearchOracle::updateTitle',
00278                         array() );
00279         }
00280 
00281         public static function legalSearchChars() {
00282                 return "\"" . parent::legalSearchChars();
00283         }
00284 }