MediaWiki  REL1_19
SearchOracle.php
Go to the documentation of this file.
00001 <?php
00031 class SearchOracle extends SearchEngine {
00032         
00033         private $reservedWords = array ('ABOUT' => 1, 
00034                                                                         'ACCUM' => 1, 
00035                                                                         'AND' => 1, 
00036                                                                         'BT' => 1, 
00037                                                                         'BTG' => 1, 
00038                                                                         'BTI' => 1, 
00039                                                                         'BTP' => 1,
00040                                                                         'FUZZY' => 1, 
00041                                                                         'HASPATH' => 1, 
00042                                                                         'INPATH' => 1, 
00043                                                                         'MINUS' => 1, 
00044                                                                         'NEAR' => 1, 
00045                                                                         'NOT' => 1,
00046                                                                         'NT' => 1, 
00047                                                                         'NTG' => 1, 
00048                                                                         'NTI' => 1, 
00049                                                                         'NTP' => 1, 
00050                                                                         'OR' => 1, 
00051                                                                         'PT' => 1, 
00052                                                                         'RT' => 1, 
00053                                                                         'SQE' => 1,
00054                                                                         'SYN' => 1, 
00055                                                                         'TR' => 1, 
00056                                                                         'TRSYN' => 1, 
00057                                                                         'TT' => 1, 
00058                                                                         'WITHIN' => 1);
00059 
00064         function __construct($db) {
00065                 parent::__construct( $db );
00066         }
00067 
00074         function searchText( $term ) {
00075                 if ($term == '')
00076                         return new SqlSearchResultSet(false, '');
00077 
00078                 $resultSet = $this->db->resultObject($this->db->query($this->getQuery($this->filter($term), true)));
00079                 return new SqlSearchResultSet($resultSet, $this->searchTerms);
00080         }
00081 
00088         function searchTitle($term) {
00089                 if ($term == '')
00090                         return new SqlSearchResultSet(false, '');
00091 
00092                 $resultSet = $this->db->resultObject($this->db->query($this->getQuery($this->filter($term), false)));
00093                 return new MySQLSearchResultSet($resultSet, $this->searchTerms);
00094         }
00095 
00096 
00101         function queryRedirect() {
00102                 if ($this->showRedirects) {
00103                         return '';
00104                 } else {
00105                         return 'AND page_is_redirect=0';
00106                 }
00107         }
00108 
00113         function queryNamespaces() {
00114                 if( is_null($this->namespaces) )
00115                         return '';
00116                 if ( !count( $this->namespaces ) ) {
00117                         $namespaces = '0';
00118                 } else {
00119                         $namespaces = $this->db->makeList( $this->namespaces );
00120                 }
00121                 return 'AND page_namespace IN (' . $namespaces . ')';
00122         }
00123 
00131         function queryLimit( $sql ) {
00132                 return $this->db->limitResult($sql, $this->limit, $this->offset);
00133         }
00134 
00141         function queryRanking( $filteredTerm, $fulltext ) {
00142                 return ' ORDER BY score(1)';
00143         }
00144 
00151         function getQuery( $filteredTerm, $fulltext ) {
00152                 return $this->queryLimit($this->queryMain($filteredTerm, $fulltext) . ' ' .
00153                         $this->queryRedirect() . ' ' .
00154                         $this->queryNamespaces() . ' ' .
00155                         $this->queryRanking( $filteredTerm, $fulltext ) . ' ');
00156         }
00157 
00158 
00164         function getIndexField($fulltext) {
00165                 return $fulltext ? 'si_text' : 'si_title';
00166         }
00167 
00175         function queryMain( $filteredTerm, $fulltext ) {
00176                 $match = $this->parseQuery($filteredTerm, $fulltext);
00177                 $page        = $this->db->tableName('page');
00178                 $searchindex = $this->db->tableName('searchindex');
00179                 return 'SELECT page_id, page_namespace, page_title ' .
00180                         "FROM $page,$searchindex " .
00181                         'WHERE page_id=si_page AND ' . $match;
00182         }
00183 
00188         function parseQuery($filteredText, $fulltext) {
00189                 global $wgContLang;
00190                 $lc = SearchEngine::legalSearchChars();
00191                 $this->searchTerms = array();
00192 
00193                 # @todo FIXME: This doesn't handle parenthetical expressions.
00194                 $m = array();
00195                 $searchon = '';
00196                 if (preg_match_all('/([-+<>~]?)(([' . $lc . ']+)(\*?)|"[^"]*")/',
00197                           $filteredText, $m, PREG_SET_ORDER)) {
00198                         foreach($m as $terms) {
00199                                 // Search terms in all variant forms, only
00200                                 // apply on wiki with LanguageConverter
00201                                 $temp_terms = $wgContLang->autoConvertToAllVariants( $terms[2] );
00202                                 if( is_array( $temp_terms )) {
00203                                         $temp_terms = array_unique( array_values( $temp_terms ));
00204                                         foreach( $temp_terms as $t ) {
00205                                                 $searchon .= ($terms[1] == '-' ? ' ~' : ' & ') . $this->escapeTerm( $t );
00206                                         }
00207                                 }
00208                                 else {
00209                                         $searchon .= ($terms[1] == '-' ? ' ~' : ' & ') . $this->escapeTerm( $terms[2] );
00210                                 }
00211                                 if (!empty($terms[3])) {
00212                                         $regexp = preg_quote( $terms[3], '/' );
00213                                         if ($terms[4])
00214                                                 $regexp .= "[0-9A-Za-z_]+";
00215                                 } else {
00216                                         $regexp = preg_quote(str_replace('"', '', $terms[2]), '/');
00217                                 }
00218                                 $this->searchTerms[] = $regexp;
00219                         }
00220                 }
00221 
00222 
00223                 $searchon = $this->db->addQuotes(ltrim($searchon, ' &'));
00224                 $field = $this->getIndexField($fulltext);
00225                 return " CONTAINS($field, $searchon, 1) > 0 ";
00226         }
00227 
00228         private function escapeTerm($t) {
00229                 global $wgContLang;
00230                 $t = $wgContLang->normalizeForSearch($t);
00231                 $t = isset($this->reservedWords[strtoupper($t)]) ? '{'.$t.'}' : $t;
00232                 $t = preg_replace('/^"(.*)"$/', '($1)', $t);
00233                 $t = preg_replace('/([-&|])/', '\\\\$1', $t);
00234                 return $t;
00235         }
00244         function update($id, $title, $text) {
00245                 $dbw = wfGetDB(DB_MASTER);
00246                 $dbw->replace('searchindex',
00247                         array('si_page'),
00248                         array(
00249                                 'si_page' => $id,
00250                                 'si_title' => $title,
00251                                 'si_text' => $text
00252                         ), 'SearchOracle::update' );
00253 
00254                 // Sync the index
00255                 // We need to specify the DB name (i.e. user/schema) here so that 
00256                 // it can work from the installer, where
00257                 //     ALTER SESSION SET CURRENT_SCHEMA = ...
00258                 // was used.
00259                 $dbw->query( "CALL ctx_ddl.sync_index(" . 
00260                         $dbw->addQuotes( $dbw->getDBname() . '.' . $dbw->tableName( 'si_text_idx', 'raw' ) ) . ")" );
00261                 $dbw->query( "CALL ctx_ddl.sync_index(" . 
00262                         $dbw->addQuotes( $dbw->getDBname() . '.' . $dbw->tableName( 'si_title_idx', 'raw' ) ) . ")" );
00263         }
00264 
00272         function updateTitle($id, $title) {
00273                 $dbw = wfGetDB(DB_MASTER);
00274 
00275                 $dbw->update('searchindex',
00276                         array('si_title' => $title),
00277                         array('si_page'  => $id),
00278                         'SearchOracle::updateTitle',
00279                         array());
00280         }
00281 
00282 
00283         public static function legalSearchChars() {
00284                 return "\"" . parent::legalSearchChars();
00285         }
00286 }