MediaWiki  REL1_19
SearchSqlite.php
Go to the documentation of this file.
00001 <?php
00028 class SearchSqlite extends SearchEngine {
00029 
00033         protected $db;
00034 
00039         function __construct( $db ) {
00040                 parent::__construct( $db );
00041         }
00042 
00047         function fulltextSearchSupported() {
00048                 return $this->db->checkForEnabledSearch();
00049         }
00050 
00057         function parseQuery( $filteredText, $fulltext ) {
00058                 global $wgContLang;
00059                 $lc = SearchEngine::legalSearchChars(); // Minus format chars
00060                 $searchon = '';
00061                 $this->searchTerms = array();
00062 
00063                 $m = array();
00064                 if( preg_match_all( '/([-+<>~]?)(([' . $lc . ']+)(\*?)|"[^"]*")/',
00065                           $filteredText, $m, PREG_SET_ORDER ) ) {
00066                         foreach( $m as $bits ) {
00067                                 @list( /* all */, $modifier, $term, $nonQuoted, $wildcard ) = $bits;
00068 
00069                                 if( $nonQuoted != '' ) {
00070                                         $term = $nonQuoted;
00071                                         $quote = '';
00072                                 } else {
00073                                         $term = str_replace( '"', '', $term );
00074                                         $quote = '"';
00075                                 }
00076 
00077                                 if( $searchon !== '' ) {
00078                                         $searchon .= ' ';
00079                                 }
00080 
00081                                 // Some languages such as Serbian store the input form in the search index,
00082                                 // so we may need to search for matches in multiple writing system variants.
00083                                 $convertedVariants = $wgContLang->autoConvertToAllVariants( $term );
00084                                 if( is_array( $convertedVariants ) ) {
00085                                         $variants = array_unique( array_values( $convertedVariants ) );
00086                                 } else {
00087                                         $variants = array( $term );
00088                                 }
00089 
00090                                 // The low-level search index does some processing on input to work
00091                                 // around problems with minimum lengths and encoding in MySQL's
00092                                 // fulltext engine.
00093                                 // For Chinese this also inserts spaces between adjacent Han characters.
00094                                 $strippedVariants = array_map(
00095                                         array( $wgContLang, 'normalizeForSearch' ),
00096                                         $variants );
00097 
00098                                 // Some languages such as Chinese force all variants to a canonical
00099                                 // form when stripping to the low-level search index, so to be sure
00100                                 // let's check our variants list for unique items after stripping.
00101                                 $strippedVariants = array_unique( $strippedVariants );
00102 
00103                                 $searchon .= $modifier;
00104                                 if( count( $strippedVariants) > 1 )
00105                                         $searchon .= '(';
00106                                 foreach( $strippedVariants as $stripped ) {
00107                                         if( $nonQuoted && strpos( $stripped, ' ' ) !== false ) {
00108                                                 // Hack for Chinese: we need to toss in quotes for
00109                                                 // multiple-character phrases since normalizeForSearch()
00110                                                 // added spaces between them to make word breaks.
00111                                                 $stripped = '"' . trim( $stripped ) . '"';
00112                                         }
00113                                         $searchon .= "$quote$stripped$quote$wildcard ";
00114                                 }
00115                                 if( count( $strippedVariants) > 1 )
00116                                         $searchon .= ')';
00117 
00118                                 // Match individual terms or quoted phrase in result highlighting...
00119                                 // Note that variants will be introduced in a later stage for highlighting!
00120                                 $regexp = $this->regexTerm( $term, $wildcard );
00121                                 $this->searchTerms[] = $regexp;
00122                         }
00123 
00124                 } else {
00125                         wfDebug( __METHOD__ . ": Can't understand search query '{$filteredText}'\n" );
00126                 }
00127 
00128                 $searchon = $this->db->strencode( $searchon );
00129                 $field = $this->getIndexField( $fulltext );
00130                 return " $field MATCH '$searchon' ";
00131         }
00132 
00133         function regexTerm( $string, $wildcard ) {
00134                 global $wgContLang;
00135 
00136                 $regex = preg_quote( $string, '/' );
00137                 if( $wgContLang->hasWordBreaks() ) {
00138                         if( $wildcard ) {
00139                                 // Don't cut off the final bit!
00140                                 $regex = "\b$regex";
00141                         } else {
00142                                 $regex = "\b$regex\b";
00143                         }
00144                 } else {
00145                         // For Chinese, words may legitimately abut other words in the text literal.
00146                         // Don't add \b boundary checks... note this could cause false positives
00147                         // for latin chars.
00148                 }
00149                 return $regex;
00150         }
00151 
00152         public static function legalSearchChars() {
00153                 return "\"*" . parent::legalSearchChars();
00154         }
00155 
00162         function searchText( $term ) {
00163                 return $this->searchInternal( $term, true );
00164         }
00165 
00172         function searchTitle( $term ) {
00173                 return $this->searchInternal( $term, false );
00174         }
00175 
00176         protected function searchInternal( $term, $fulltext ) {
00177                 global $wgCountTotalSearchHits, $wgContLang;
00178 
00179                 if ( !$this->fulltextSearchSupported() ) {
00180                         return null;
00181                 }
00182 
00183                 $filteredTerm = $this->filter( $wgContLang->lc( $term ) );
00184                 $resultSet = $this->db->query( $this->getQuery( $filteredTerm, $fulltext ) );
00185 
00186                 $total = null;
00187                 if( $wgCountTotalSearchHits ) {
00188                         $totalResult = $this->db->query( $this->getCountQuery( $filteredTerm, $fulltext ) );
00189                         $row = $totalResult->fetchObject();
00190                         if( $row ) {
00191                                 $total = intval( $row->c );
00192                         }
00193                         $totalResult->free();
00194                 }
00195 
00196                 return new SqliteSearchResultSet( $resultSet, $this->searchTerms, $total );
00197         }
00198 
00199 
00204         function queryRedirect() {
00205                 if( $this->showRedirects ) {
00206                         return '';
00207                 } else {
00208                         return 'AND page_is_redirect=0';
00209                 }
00210         }
00211 
00216         function queryNamespaces() {
00217                 if( is_null($this->namespaces) )
00218                         return '';  # search all
00219                 if ( !count( $this->namespaces ) ) {
00220                         $namespaces = '0';
00221                 } else {
00222                         $namespaces = $this->db->makeList( $this->namespaces );
00223                 }
00224                 return 'AND page_namespace IN (' . $namespaces . ')';
00225         }
00226 
00232         function limitResult( $sql ) {
00233                 return $this->db->limitResult( $sql, $this->limit, $this->offset );
00234         }
00235 
00242         function getQuery( $filteredTerm, $fulltext ) {
00243                 return $this->limitResult(
00244                         $this->queryMain( $filteredTerm, $fulltext ) . ' ' .
00245                         $this->queryRedirect() . ' ' .
00246                         $this->queryNamespaces()
00247                 );
00248         }
00249 
00255         function getIndexField( $fulltext ) {
00256                 return $fulltext ? 'si_text' : 'si_title';
00257         }
00258 
00266         function queryMain( $filteredTerm, $fulltext ) {
00267                 $match = $this->parseQuery( $filteredTerm, $fulltext );
00268                 $page        = $this->db->tableName( 'page' );
00269                 $searchindex = $this->db->tableName( 'searchindex' );
00270                 return "SELECT $searchindex.rowid, page_namespace, page_title " .
00271                         "FROM $page,$searchindex " .
00272                         "WHERE page_id=$searchindex.rowid AND $match";
00273         }
00274 
00275         function getCountQuery( $filteredTerm, $fulltext ) {
00276                 $match = $this->parseQuery( $filteredTerm, $fulltext );
00277                 $page        = $this->db->tableName( 'page' );
00278                 $searchindex = $this->db->tableName( 'searchindex' );
00279                 return "SELECT COUNT(*) AS c " .
00280                         "FROM $page,$searchindex " .
00281                         "WHERE page_id=$searchindex.rowid AND $match" .
00282                         $this->queryRedirect() . ' ' .
00283                         $this->queryNamespaces();
00284         }
00285 
00294         function update( $id, $title, $text ) {
00295                 if ( !$this->fulltextSearchSupported() ) {
00296                         return;
00297                 }
00298                 // @todo: find a method to do it in a single request,
00299                 // couldn't do it so far due to typelessness of FTS3 tables.
00300                 $dbw = wfGetDB( DB_MASTER );
00301 
00302                 $dbw->delete( 'searchindex', array( 'rowid' => $id ), __METHOD__ );
00303 
00304                 $dbw->insert( 'searchindex',
00305                         array(
00306                                 'rowid' => $id,
00307                                 'si_title' => $title,
00308                                 'si_text' => $text
00309                         ), __METHOD__ );
00310         }
00311 
00319     function updateTitle( $id, $title ) {
00320                 if ( !$this->fulltextSearchSupported() ) {
00321                         return;
00322                 }
00323                 $dbw = wfGetDB( DB_MASTER );
00324 
00325                 $dbw->update( 'searchindex',
00326                         array( 'si_title' => $title ),
00327                         array( 'rowid'  => $id ),
00328                         __METHOD__ );
00329         }
00330 }
00331 
00335 class SqliteSearchResultSet extends SqlSearchResultSet {
00336         function __construct( $resultSet, $terms, $totalHits=null ) {
00337                 parent::__construct( $resultSet, $terms );
00338                 $this->mTotalHits = $totalHits;
00339         }
00340 
00341         function getTotalHits() {
00342                 return $this->mTotalHits;
00343         }
00344 }