MediaWiki  REL1_22
SearchSqlite.php
Go to the documentation of this file.
00001 <?php
00028 class SearchSqlite extends SearchEngine {
00029 
00033     protected $db;
00034 
00039     function __construct( $db ) {
00040         parent::__construct( $db );
00041     }
00042 
00047     function fulltextSearchSupported() {
00048         return $this->db->checkForEnabledSearch();
00049     }
00050 
00057     function parseQuery( $filteredText, $fulltext ) {
00058         global $wgContLang;
00059         $lc = SearchEngine::legalSearchChars(); // Minus format chars
00060         $searchon = '';
00061         $this->searchTerms = array();
00062 
00063         $m = array();
00064         if ( preg_match_all( '/([-+<>~]?)(([' . $lc . ']+)(\*?)|"[^"]*")/',
00065                 $filteredText, $m, PREG_SET_ORDER ) ) {
00066             foreach ( $m as $bits ) {
00067                 @list( /* all */, $modifier, $term, $nonQuoted, $wildcard ) = $bits;
00068 
00069                 if ( $nonQuoted != '' ) {
00070                     $term = $nonQuoted;
00071                     $quote = '';
00072                 } else {
00073                     $term = str_replace( '"', '', $term );
00074                     $quote = '"';
00075                 }
00076 
00077                 if ( $searchon !== '' ) {
00078                     $searchon .= ' ';
00079                 }
00080 
00081                 // Some languages such as Serbian store the input form in the search index,
00082                 // so we may need to search for matches in multiple writing system variants.
00083                 $convertedVariants = $wgContLang->autoConvertToAllVariants( $term );
00084                 if ( is_array( $convertedVariants ) ) {
00085                     $variants = array_unique( array_values( $convertedVariants ) );
00086                 } else {
00087                     $variants = array( $term );
00088                 }
00089 
00090                 // The low-level search index does some processing on input to work
00091                 // around problems with minimum lengths and encoding in MySQL's
00092                 // fulltext engine.
00093                 // For Chinese this also inserts spaces between adjacent Han characters.
00094                 $strippedVariants = array_map(
00095                     array( $wgContLang, 'normalizeForSearch' ),
00096                     $variants );
00097 
00098                 // Some languages such as Chinese force all variants to a canonical
00099                 // form when stripping to the low-level search index, so to be sure
00100                 // let's check our variants list for unique items after stripping.
00101                 $strippedVariants = array_unique( $strippedVariants );
00102 
00103                 $searchon .= $modifier;
00104                 if ( count( $strippedVariants ) > 1 ) {
00105                     $searchon .= '(';
00106                 }
00107                 foreach ( $strippedVariants as $stripped ) {
00108                     if ( $nonQuoted && strpos( $stripped, ' ' ) !== false ) {
00109                         // Hack for Chinese: we need to toss in quotes for
00110                         // multiple-character phrases since normalizeForSearch()
00111                         // added spaces between them to make word breaks.
00112                         $stripped = '"' . trim( $stripped ) . '"';
00113                     }
00114                     $searchon .= "$quote$stripped$quote$wildcard ";
00115                 }
00116                 if ( count( $strippedVariants ) > 1 ) {
00117                     $searchon .= ')';
00118                 }
00119 
00120                 // Match individual terms or quoted phrase in result highlighting...
00121                 // Note that variants will be introduced in a later stage for highlighting!
00122                 $regexp = $this->regexTerm( $term, $wildcard );
00123                 $this->searchTerms[] = $regexp;
00124             }
00125 
00126         } else {
00127             wfDebug( __METHOD__ . ": Can't understand search query '{$filteredText}'\n" );
00128         }
00129 
00130         $searchon = $this->db->strencode( $searchon );
00131         $field = $this->getIndexField( $fulltext );
00132         return " $field MATCH '$searchon' ";
00133     }
00134 
00135     function regexTerm( $string, $wildcard ) {
00136         global $wgContLang;
00137 
00138         $regex = preg_quote( $string, '/' );
00139         if ( $wgContLang->hasWordBreaks() ) {
00140             if ( $wildcard ) {
00141                 // Don't cut off the final bit!
00142                 $regex = "\b$regex";
00143             } else {
00144                 $regex = "\b$regex\b";
00145             }
00146         } else {
00147             // For Chinese, words may legitimately abut other words in the text literal.
00148             // Don't add \b boundary checks... note this could cause false positives
00149             // for latin chars.
00150         }
00151         return $regex;
00152     }
00153 
00154     public static function legalSearchChars() {
00155         return "\"*" . parent::legalSearchChars();
00156     }
00157 
00164     function searchText( $term ) {
00165         return $this->searchInternal( $term, true );
00166     }
00167 
00174     function searchTitle( $term ) {
00175         return $this->searchInternal( $term, false );
00176     }
00177 
00178     protected function searchInternal( $term, $fulltext ) {
00179         global $wgCountTotalSearchHits, $wgContLang;
00180 
00181         if ( !$this->fulltextSearchSupported() ) {
00182             return null;
00183         }
00184 
00185         $filteredTerm = $this->filter( $wgContLang->lc( $term ) );
00186         $resultSet = $this->db->query( $this->getQuery( $filteredTerm, $fulltext ) );
00187 
00188         $total = null;
00189         if ( $wgCountTotalSearchHits ) {
00190             $totalResult = $this->db->query( $this->getCountQuery( $filteredTerm, $fulltext ) );
00191             $row = $totalResult->fetchObject();
00192             if ( $row ) {
00193                 $total = intval( $row->c );
00194             }
00195             $totalResult->free();
00196         }
00197 
00198         return new SqliteSearchResultSet( $resultSet, $this->searchTerms, $total );
00199     }
00200 
00205     function queryRedirect() {
00206         if ( $this->showRedirects ) {
00207             return '';
00208         } else {
00209             return 'AND page_is_redirect=0';
00210         }
00211     }
00212 
00217     function queryNamespaces() {
00218         if ( is_null( $this->namespaces ) ) {
00219             return '';  # search all
00220         }
00221         if ( !count( $this->namespaces ) ) {
00222             $namespaces = '0';
00223         } else {
00224             $namespaces = $this->db->makeList( $this->namespaces );
00225         }
00226         return 'AND page_namespace IN (' . $namespaces . ')';
00227     }
00228 
00234     function limitResult( $sql ) {
00235         return $this->db->limitResult( $sql, $this->limit, $this->offset );
00236     }
00237 
00245     function getQuery( $filteredTerm, $fulltext ) {
00246         return $this->limitResult(
00247             $this->queryMain( $filteredTerm, $fulltext ) . ' ' .
00248             $this->queryRedirect() . ' ' .
00249             $this->queryNamespaces()
00250         );
00251     }
00252 
00258     function getIndexField( $fulltext ) {
00259         return $fulltext ? 'si_text' : 'si_title';
00260     }
00261 
00269     function queryMain( $filteredTerm, $fulltext ) {
00270         $match = $this->parseQuery( $filteredTerm, $fulltext );
00271         $page = $this->db->tableName( 'page' );
00272         $searchindex = $this->db->tableName( 'searchindex' );
00273         return "SELECT $searchindex.rowid, page_namespace, page_title " .
00274             "FROM $page,$searchindex " .
00275             "WHERE page_id=$searchindex.rowid AND $match";
00276     }
00277 
00278     function getCountQuery( $filteredTerm, $fulltext ) {
00279         $match = $this->parseQuery( $filteredTerm, $fulltext );
00280         $page = $this->db->tableName( 'page' );
00281         $searchindex = $this->db->tableName( 'searchindex' );
00282         return "SELECT COUNT(*) AS c " .
00283             "FROM $page,$searchindex " .
00284             "WHERE page_id=$searchindex.rowid AND $match" .
00285             $this->queryRedirect() . ' ' .
00286             $this->queryNamespaces();
00287     }
00288 
00297     function update( $id, $title, $text ) {
00298         if ( !$this->fulltextSearchSupported() ) {
00299             return;
00300         }
00301         // @todo find a method to do it in a single request,
00302         // couldn't do it so far due to typelessness of FTS3 tables.
00303         $dbw = wfGetDB( DB_MASTER );
00304 
00305         $dbw->delete( 'searchindex', array( 'rowid' => $id ), __METHOD__ );
00306 
00307         $dbw->insert( 'searchindex',
00308             array(
00309                 'rowid' => $id,
00310                 'si_title' => $title,
00311                 'si_text' => $text
00312             ), __METHOD__ );
00313     }
00314 
00322     function updateTitle( $id, $title ) {
00323         if ( !$this->fulltextSearchSupported() ) {
00324             return;
00325         }
00326         $dbw = wfGetDB( DB_MASTER );
00327 
00328         $dbw->update( 'searchindex',
00329             array( 'si_title' => $title ),
00330             array( 'rowid' => $id ),
00331             __METHOD__ );
00332     }
00333 }
00334 
00338 class SqliteSearchResultSet extends SqlSearchResultSet {
00339     function __construct( $resultSet, $terms, $totalHits = null ) {
00340         parent::__construct( $resultSet, $terms );
00341         $this->mTotalHits = $totalHits;
00342     }
00343 
00344     function getTotalHits() {
00345         return $this->mTotalHits;
00346     }
00347 }