MediaWiki  REL1_20
SearchOracle.php
Go to the documentation of this file.
00001 <?php
00031 class SearchOracle extends SearchEngine {
00032         
00033         private $reservedWords = array ('ABOUT' => 1, 
00034                                                                         'ACCUM' => 1, 
00035                                                                         'AND' => 1, 
00036                                                                         'BT' => 1, 
00037                                                                         'BTG' => 1, 
00038                                                                         'BTI' => 1, 
00039                                                                         'BTP' => 1,
00040                                                                         'FUZZY' => 1, 
00041                                                                         'HASPATH' => 1, 
00042                                                                         'INPATH' => 1, 
00043                                                                         'MINUS' => 1, 
00044                                                                         'NEAR' => 1, 
00045                                                                         'NOT' => 1,
00046                                                                         'NT' => 1, 
00047                                                                         'NTG' => 1, 
00048                                                                         'NTI' => 1, 
00049                                                                         'NTP' => 1, 
00050                                                                         'OR' => 1, 
00051                                                                         'PT' => 1, 
00052                                                                         'RT' => 1, 
00053                                                                         'SQE' => 1,
00054                                                                         'SYN' => 1, 
00055                                                                         'TR' => 1, 
00056                                                                         'TRSYN' => 1, 
00057                                                                         'TT' => 1, 
00058                                                                         'WITHIN' => 1);
00059 
00064         function __construct($db) {
00065                 parent::__construct( $db );
00066         }
00067 
00074         function searchText( $term ) {
00075                 if ($term == '')
00076                         return new SqlSearchResultSet(false, '');
00077 
00078                 $resultSet = $this->db->resultObject($this->db->query($this->getQuery($this->filter($term), true)));
00079                 return new SqlSearchResultSet($resultSet, $this->searchTerms);
00080         }
00081 
00088         function searchTitle($term) {
00089                 if ($term == '')
00090                         return new SqlSearchResultSet(false, '');
00091 
00092                 $resultSet = $this->db->resultObject($this->db->query($this->getQuery($this->filter($term), false)));
00093                 return new MySQLSearchResultSet($resultSet, $this->searchTerms);
00094         }
00095 
00096 
00101         function queryRedirect() {
00102                 if ($this->showRedirects) {
00103                         return '';
00104                 } else {
00105                         return 'AND page_is_redirect=0';
00106                 }
00107         }
00108 
00113         function queryNamespaces() {
00114                 if( is_null($this->namespaces) )
00115                         return '';
00116                 if ( !count( $this->namespaces ) ) {
00117                         $namespaces = '0';
00118                 } else {
00119                         $namespaces = $this->db->makeList( $this->namespaces );
00120                 }
00121                 return 'AND page_namespace IN (' . $namespaces . ')';
00122         }
00123 
00131         function queryLimit( $sql ) {
00132                 return $this->db->limitResult($sql, $this->limit, $this->offset);
00133         }
00134 
00141         function queryRanking( $filteredTerm, $fulltext ) {
00142                 return ' ORDER BY score(1)';
00143         }
00144 
00152         function getQuery( $filteredTerm, $fulltext ) {
00153                 return $this->queryLimit($this->queryMain($filteredTerm, $fulltext) . ' ' .
00154                         $this->queryRedirect() . ' ' .
00155                         $this->queryNamespaces() . ' ' .
00156                         $this->queryRanking( $filteredTerm, $fulltext ) . ' ');
00157         }
00158 
00159 
00165         function getIndexField($fulltext) {
00166                 return $fulltext ? 'si_text' : 'si_title';
00167         }
00168 
00176         function queryMain( $filteredTerm, $fulltext ) {
00177                 $match = $this->parseQuery($filteredTerm, $fulltext);
00178                 $page        = $this->db->tableName('page');
00179                 $searchindex = $this->db->tableName('searchindex');
00180                 return 'SELECT page_id, page_namespace, page_title ' .
00181                         "FROM $page,$searchindex " .
00182                         'WHERE page_id=si_page AND ' . $match;
00183         }
00184 
00190         function parseQuery($filteredText, $fulltext) {
00191                 global $wgContLang;
00192                 $lc = SearchEngine::legalSearchChars();
00193                 $this->searchTerms = array();
00194 
00195                 # @todo FIXME: This doesn't handle parenthetical expressions.
00196                 $m = array();
00197                 $searchon = '';
00198                 if (preg_match_all('/([-+<>~]?)(([' . $lc . ']+)(\*?)|"[^"]*")/',
00199                           $filteredText, $m, PREG_SET_ORDER)) {
00200                         foreach($m as $terms) {
00201                                 // Search terms in all variant forms, only
00202                                 // apply on wiki with LanguageConverter
00203                                 $temp_terms = $wgContLang->autoConvertToAllVariants( $terms[2] );
00204                                 if( is_array( $temp_terms )) {
00205                                         $temp_terms = array_unique( array_values( $temp_terms ));
00206                                         foreach( $temp_terms as $t ) {
00207                                                 $searchon .= ($terms[1] == '-' ? ' ~' : ' & ') . $this->escapeTerm( $t );
00208                                         }
00209                                 }
00210                                 else {
00211                                         $searchon .= ($terms[1] == '-' ? ' ~' : ' & ') . $this->escapeTerm( $terms[2] );
00212                                 }
00213                                 if (!empty($terms[3])) {
00214                                         $regexp = preg_quote( $terms[3], '/' );
00215                                         if ($terms[4])
00216                                                 $regexp .= "[0-9A-Za-z_]+";
00217                                 } else {
00218                                         $regexp = preg_quote(str_replace('"', '', $terms[2]), '/');
00219                                 }
00220                                 $this->searchTerms[] = $regexp;
00221                         }
00222                 }
00223 
00224 
00225                 $searchon = $this->db->addQuotes(ltrim($searchon, ' &'));
00226                 $field = $this->getIndexField($fulltext);
00227                 return " CONTAINS($field, $searchon, 1) > 0 ";
00228         }
00229 
00230         private function escapeTerm($t) {
00231                 global $wgContLang;
00232                 $t = $wgContLang->normalizeForSearch($t);
00233                 $t = isset($this->reservedWords[strtoupper($t)]) ? '{'.$t.'}' : $t;
00234                 $t = preg_replace('/^"(.*)"$/', '($1)', $t);
00235                 $t = preg_replace('/([-&|])/', '\\\\$1', $t);
00236                 return $t;
00237         }
00246         function update($id, $title, $text) {
00247                 $dbw = wfGetDB(DB_MASTER);
00248                 $dbw->replace('searchindex',
00249                         array('si_page'),
00250                         array(
00251                                 'si_page' => $id,
00252                                 'si_title' => $title,
00253                                 'si_text' => $text
00254                         ), 'SearchOracle::update' );
00255 
00256                 // Sync the index
00257                 // We need to specify the DB name (i.e. user/schema) here so that 
00258                 // it can work from the installer, where
00259                 //     ALTER SESSION SET CURRENT_SCHEMA = ...
00260                 // was used.
00261                 $dbw->query( "CALL ctx_ddl.sync_index(" . 
00262                         $dbw->addQuotes( $dbw->getDBname() . '.' . $dbw->tableName( 'si_text_idx', 'raw' ) ) . ")" );
00263                 $dbw->query( "CALL ctx_ddl.sync_index(" . 
00264                         $dbw->addQuotes( $dbw->getDBname() . '.' . $dbw->tableName( 'si_title_idx', 'raw' ) ) . ")" );
00265         }
00266 
00274         function updateTitle($id, $title) {
00275                 $dbw = wfGetDB(DB_MASTER);
00276 
00277                 $dbw->update('searchindex',
00278                         array('si_title' => $title),
00279                         array('si_page'  => $id),
00280                         'SearchOracle::updateTitle',
00281                         array());
00282         }
00283 
00284 
00285         public static function legalSearchChars() {
00286                 return "\"" . parent::legalSearchChars();
00287         }
00288 }