MediaWiki  REL1_23
SearchSqlite.php
Go to the documentation of this file.
00001 <?php
00028 class SearchSqlite extends SearchDatabase {
00033     function fulltextSearchSupported() {
00034         return $this->db->checkForEnabledSearch();
00035     }
00036 
00043     function parseQuery( $filteredText, $fulltext ) {
00044         global $wgContLang;
00045         $lc = SearchEngine::legalSearchChars(); // Minus format chars
00046         $searchon = '';
00047         $this->searchTerms = array();
00048 
00049         $m = array();
00050         if ( preg_match_all( '/([-+<>~]?)(([' . $lc . ']+)(\*?)|"[^"]*")/',
00051                 $filteredText, $m, PREG_SET_ORDER ) ) {
00052             foreach ( $m as $bits ) {
00053                 @list( /* all */, $modifier, $term, $nonQuoted, $wildcard ) = $bits;
00054 
00055                 if ( $nonQuoted != '' ) {
00056                     $term = $nonQuoted;
00057                     $quote = '';
00058                 } else {
00059                     $term = str_replace( '"', '', $term );
00060                     $quote = '"';
00061                 }
00062 
00063                 if ( $searchon !== '' ) {
00064                     $searchon .= ' ';
00065                 }
00066 
00067                 // Some languages such as Serbian store the input form in the search index,
00068                 // so we may need to search for matches in multiple writing system variants.
00069                 $convertedVariants = $wgContLang->autoConvertToAllVariants( $term );
00070                 if ( is_array( $convertedVariants ) ) {
00071                     $variants = array_unique( array_values( $convertedVariants ) );
00072                 } else {
00073                     $variants = array( $term );
00074                 }
00075 
00076                 // The low-level search index does some processing on input to work
00077                 // around problems with minimum lengths and encoding in MySQL's
00078                 // fulltext engine.
00079                 // For Chinese this also inserts spaces between adjacent Han characters.
00080                 $strippedVariants = array_map(
00081                     array( $wgContLang, 'normalizeForSearch' ),
00082                     $variants );
00083 
00084                 // Some languages such as Chinese force all variants to a canonical
00085                 // form when stripping to the low-level search index, so to be sure
00086                 // let's check our variants list for unique items after stripping.
00087                 $strippedVariants = array_unique( $strippedVariants );
00088 
00089                 $searchon .= $modifier;
00090                 if ( count( $strippedVariants ) > 1 ) {
00091                     $searchon .= '(';
00092                 }
00093                 foreach ( $strippedVariants as $stripped ) {
00094                     if ( $nonQuoted && strpos( $stripped, ' ' ) !== false ) {
00095                         // Hack for Chinese: we need to toss in quotes for
00096                         // multiple-character phrases since normalizeForSearch()
00097                         // added spaces between them to make word breaks.
00098                         $stripped = '"' . trim( $stripped ) . '"';
00099                     }
00100                     $searchon .= "$quote$stripped$quote$wildcard ";
00101                 }
00102                 if ( count( $strippedVariants ) > 1 ) {
00103                     $searchon .= ')';
00104                 }
00105 
00106                 // Match individual terms or quoted phrase in result highlighting...
00107                 // Note that variants will be introduced in a later stage for highlighting!
00108                 $regexp = $this->regexTerm( $term, $wildcard );
00109                 $this->searchTerms[] = $regexp;
00110             }
00111 
00112         } else {
00113             wfDebug( __METHOD__ . ": Can't understand search query '{$filteredText}'\n" );
00114         }
00115 
00116         $searchon = $this->db->strencode( $searchon );
00117         $field = $this->getIndexField( $fulltext );
00118         return " $field MATCH '$searchon' ";
00119     }
00120 
00121     function regexTerm( $string, $wildcard ) {
00122         global $wgContLang;
00123 
00124         $regex = preg_quote( $string, '/' );
00125         if ( $wgContLang->hasWordBreaks() ) {
00126             if ( $wildcard ) {
00127                 // Don't cut off the final bit!
00128                 $regex = "\b$regex";
00129             } else {
00130                 $regex = "\b$regex\b";
00131             }
00132         } else {
00133             // For Chinese, words may legitimately abut other words in the text literal.
00134             // Don't add \b boundary checks... note this could cause false positives
00135             // for latin chars.
00136         }
00137         return $regex;
00138     }
00139 
00140     public static function legalSearchChars() {
00141         return "\"*" . parent::legalSearchChars();
00142     }
00143 
00150     function searchText( $term ) {
00151         return $this->searchInternal( $term, true );
00152     }
00153 
00160     function searchTitle( $term ) {
00161         return $this->searchInternal( $term, false );
00162     }
00163 
00164     protected function searchInternal( $term, $fulltext ) {
00165         global $wgCountTotalSearchHits, $wgContLang;
00166 
00167         if ( !$this->fulltextSearchSupported() ) {
00168             return null;
00169         }
00170 
00171         $filteredTerm = $this->filter( $wgContLang->lc( $term ) );
00172         $resultSet = $this->db->query( $this->getQuery( $filteredTerm, $fulltext ) );
00173 
00174         $total = null;
00175         if ( $wgCountTotalSearchHits ) {
00176             $totalResult = $this->db->query( $this->getCountQuery( $filteredTerm, $fulltext ) );
00177             $row = $totalResult->fetchObject();
00178             if ( $row ) {
00179                 $total = intval( $row->c );
00180             }
00181             $totalResult->free();
00182         }
00183 
00184         return new SqliteSearchResultSet( $resultSet, $this->searchTerms, $total );
00185     }
00186 
00191     function queryNamespaces() {
00192         if ( is_null( $this->namespaces ) ) {
00193             return '';  # search all
00194         }
00195         if ( !count( $this->namespaces ) ) {
00196             $namespaces = '0';
00197         } else {
00198             $namespaces = $this->db->makeList( $this->namespaces );
00199         }
00200         return 'AND page_namespace IN (' . $namespaces . ')';
00201     }
00202 
00208     function limitResult( $sql ) {
00209         return $this->db->limitResult( $sql, $this->limit, $this->offset );
00210     }
00211 
00219     function getQuery( $filteredTerm, $fulltext ) {
00220         return $this->limitResult(
00221             $this->queryMain( $filteredTerm, $fulltext ) . ' ' .
00222             $this->queryNamespaces()
00223         );
00224     }
00225 
00231     function getIndexField( $fulltext ) {
00232         return $fulltext ? 'si_text' : 'si_title';
00233     }
00234 
00242     function queryMain( $filteredTerm, $fulltext ) {
00243         $match = $this->parseQuery( $filteredTerm, $fulltext );
00244         $page = $this->db->tableName( 'page' );
00245         $searchindex = $this->db->tableName( 'searchindex' );
00246         return "SELECT $searchindex.rowid, page_namespace, page_title " .
00247             "FROM $page,$searchindex " .
00248             "WHERE page_id=$searchindex.rowid AND $match";
00249     }
00250 
00251     function getCountQuery( $filteredTerm, $fulltext ) {
00252         $match = $this->parseQuery( $filteredTerm, $fulltext );
00253         $page = $this->db->tableName( 'page' );
00254         $searchindex = $this->db->tableName( 'searchindex' );
00255         return "SELECT COUNT(*) AS c " .
00256             "FROM $page,$searchindex " .
00257             "WHERE page_id=$searchindex.rowid AND $match " .
00258             $this->queryNamespaces();
00259     }
00260 
00269     function update( $id, $title, $text ) {
00270         if ( !$this->fulltextSearchSupported() ) {
00271             return;
00272         }
00273         // @todo find a method to do it in a single request,
00274         // couldn't do it so far due to typelessness of FTS3 tables.
00275         $dbw = wfGetDB( DB_MASTER );
00276 
00277         $dbw->delete( 'searchindex', array( 'rowid' => $id ), __METHOD__ );
00278 
00279         $dbw->insert( 'searchindex',
00280             array(
00281                 'rowid' => $id,
00282                 'si_title' => $title,
00283                 'si_text' => $text
00284             ), __METHOD__ );
00285     }
00286 
00294     function updateTitle( $id, $title ) {
00295         if ( !$this->fulltextSearchSupported() ) {
00296             return;
00297         }
00298         $dbw = wfGetDB( DB_MASTER );
00299 
00300         $dbw->update( 'searchindex',
00301             array( 'si_title' => $title ),
00302             array( 'rowid' => $id ),
00303             __METHOD__ );
00304     }
00305 }
00306 
00310 class SqliteSearchResultSet extends SqlSearchResultSet {
00311     function __construct( $resultSet, $terms, $totalHits = null ) {
00312         parent::__construct( $resultSet, $terms );
00313         $this->mTotalHits = $totalHits;
00314     }
00315 
00316     function getTotalHits() {
00317         return $this->mTotalHits;
00318     }
00319 }