MediaWiki  REL1_24
SearchSqlite.php
Go to the documentation of this file.
00001 <?php
00028 class SearchSqlite extends SearchDatabase {
00033     function fulltextSearchSupported() {
00034         return $this->db->checkForEnabledSearch();
00035     }
00036 
00045     function parseQuery( $filteredText, $fulltext ) {
00046         global $wgContLang;
00047         $lc = $this->legalSearchChars(); // Minus format chars
00048         $searchon = '';
00049         $this->searchTerms = array();
00050 
00051         $m = array();
00052         if ( preg_match_all( '/([-+<>~]?)(([' . $lc . ']+)(\*?)|"[^"]*")/',
00053                 $filteredText, $m, PREG_SET_ORDER ) ) {
00054             foreach ( $m as $bits ) {
00055                 wfSuppressWarnings();
00056                 list( /* all */, $modifier, $term, $nonQuoted, $wildcard ) = $bits;
00057                 wfRestoreWarnings();
00058 
00059                 if ( $nonQuoted != '' ) {
00060                     $term = $nonQuoted;
00061                     $quote = '';
00062                 } else {
00063                     $term = str_replace( '"', '', $term );
00064                     $quote = '"';
00065                 }
00066 
00067                 if ( $searchon !== '' ) {
00068                     $searchon .= ' ';
00069                 }
00070 
00071                 // Some languages such as Serbian store the input form in the search index,
00072                 // so we may need to search for matches in multiple writing system variants.
00073                 $convertedVariants = $wgContLang->autoConvertToAllVariants( $term );
00074                 if ( is_array( $convertedVariants ) ) {
00075                     $variants = array_unique( array_values( $convertedVariants ) );
00076                 } else {
00077                     $variants = array( $term );
00078                 }
00079 
00080                 // The low-level search index does some processing on input to work
00081                 // around problems with minimum lengths and encoding in MySQL's
00082                 // fulltext engine.
00083                 // For Chinese this also inserts spaces between adjacent Han characters.
00084                 $strippedVariants = array_map(
00085                     array( $wgContLang, 'normalizeForSearch' ),
00086                     $variants );
00087 
00088                 // Some languages such as Chinese force all variants to a canonical
00089                 // form when stripping to the low-level search index, so to be sure
00090                 // let's check our variants list for unique items after stripping.
00091                 $strippedVariants = array_unique( $strippedVariants );
00092 
00093                 $searchon .= $modifier;
00094                 if ( count( $strippedVariants ) > 1 ) {
00095                     $searchon .= '(';
00096                 }
00097                 foreach ( $strippedVariants as $stripped ) {
00098                     if ( $nonQuoted && strpos( $stripped, ' ' ) !== false ) {
00099                         // Hack for Chinese: we need to toss in quotes for
00100                         // multiple-character phrases since normalizeForSearch()
00101                         // added spaces between them to make word breaks.
00102                         $stripped = '"' . trim( $stripped ) . '"';
00103                     }
00104                     $searchon .= "$quote$stripped$quote$wildcard ";
00105                 }
00106                 if ( count( $strippedVariants ) > 1 ) {
00107                     $searchon .= ')';
00108                 }
00109 
00110                 // Match individual terms or quoted phrase in result highlighting...
00111                 // Note that variants will be introduced in a later stage for highlighting!
00112                 $regexp = $this->regexTerm( $term, $wildcard );
00113                 $this->searchTerms[] = $regexp;
00114             }
00115 
00116         } else {
00117             wfDebug( __METHOD__ . ": Can't understand search query '{$filteredText}'\n" );
00118         }
00119 
00120         $searchon = $this->db->addQuotes( $searchon );
00121         $field = $this->getIndexField( $fulltext );
00122         return " $field MATCH $searchon ";
00123     }
00124 
00125     function regexTerm( $string, $wildcard ) {
00126         global $wgContLang;
00127 
00128         $regex = preg_quote( $string, '/' );
00129         if ( $wgContLang->hasWordBreaks() ) {
00130             if ( $wildcard ) {
00131                 // Don't cut off the final bit!
00132                 $regex = "\b$regex";
00133             } else {
00134                 $regex = "\b$regex\b";
00135             }
00136         } else {
00137             // For Chinese, words may legitimately abut other words in the text literal.
00138             // Don't add \b boundary checks... note this could cause false positives
00139             // for latin chars.
00140         }
00141         return $regex;
00142     }
00143 
00144     public static function legalSearchChars() {
00145         return "\"*" . parent::legalSearchChars();
00146     }
00147 
00154     function searchText( $term ) {
00155         return $this->searchInternal( $term, true );
00156     }
00157 
00164     function searchTitle( $term ) {
00165         return $this->searchInternal( $term, false );
00166     }
00167 
00168     protected function searchInternal( $term, $fulltext ) {
00169         global $wgContLang;
00170 
00171         if ( !$this->fulltextSearchSupported() ) {
00172             return null;
00173         }
00174 
00175         $filteredTerm = $this->filter( $wgContLang->lc( $term ) );
00176         $resultSet = $this->db->query( $this->getQuery( $filteredTerm, $fulltext ) );
00177 
00178         $total = null;
00179         $totalResult = $this->db->query( $this->getCountQuery( $filteredTerm, $fulltext ) );
00180         $row = $totalResult->fetchObject();
00181         if ( $row ) {
00182             $total = intval( $row->c );
00183         }
00184         $totalResult->free();
00185 
00186         return new SqlSearchResultSet( $resultSet, $this->searchTerms, $total );
00187     }
00188 
00193     function queryNamespaces() {
00194         if ( is_null( $this->namespaces ) ) {
00195             return '';  # search all
00196         }
00197         if ( !count( $this->namespaces ) ) {
00198             $namespaces = '0';
00199         } else {
00200             $namespaces = $this->db->makeList( $this->namespaces );
00201         }
00202         return 'AND page_namespace IN (' . $namespaces . ')';
00203     }
00204 
00210     function limitResult( $sql ) {
00211         return $this->db->limitResult( $sql, $this->limit, $this->offset );
00212     }
00213 
00221     function getQuery( $filteredTerm, $fulltext ) {
00222         return $this->limitResult(
00223             $this->queryMain( $filteredTerm, $fulltext ) . ' ' .
00224             $this->queryNamespaces()
00225         );
00226     }
00227 
00233     function getIndexField( $fulltext ) {
00234         return $fulltext ? 'si_text' : 'si_title';
00235     }
00236 
00244     function queryMain( $filteredTerm, $fulltext ) {
00245         $match = $this->parseQuery( $filteredTerm, $fulltext );
00246         $page = $this->db->tableName( 'page' );
00247         $searchindex = $this->db->tableName( 'searchindex' );
00248         return "SELECT $searchindex.rowid, page_namespace, page_title " .
00249             "FROM $page,$searchindex " .
00250             "WHERE page_id=$searchindex.rowid AND $match";
00251     }
00252 
00253     function getCountQuery( $filteredTerm, $fulltext ) {
00254         $match = $this->parseQuery( $filteredTerm, $fulltext );
00255         $page = $this->db->tableName( 'page' );
00256         $searchindex = $this->db->tableName( 'searchindex' );
00257         return "SELECT COUNT(*) AS c " .
00258             "FROM $page,$searchindex " .
00259             "WHERE page_id=$searchindex.rowid AND $match " .
00260             $this->queryNamespaces();
00261     }
00262 
00271     function update( $id, $title, $text ) {
00272         if ( !$this->fulltextSearchSupported() ) {
00273             return;
00274         }
00275         // @todo find a method to do it in a single request,
00276         // couldn't do it so far due to typelessness of FTS3 tables.
00277         $dbw = wfGetDB( DB_MASTER );
00278 
00279         $dbw->delete( 'searchindex', array( 'rowid' => $id ), __METHOD__ );
00280 
00281         $dbw->insert( 'searchindex',
00282             array(
00283                 'rowid' => $id,
00284                 'si_title' => $title,
00285                 'si_text' => $text
00286             ), __METHOD__ );
00287     }
00288 
00296     function updateTitle( $id, $title ) {
00297         if ( !$this->fulltextSearchSupported() ) {
00298             return;
00299         }
00300         $dbw = wfGetDB( DB_MASTER );
00301 
00302         $dbw->update( 'searchindex',
00303             array( 'si_title' => $title ),
00304             array( 'rowid' => $id ),
00305             __METHOD__ );
00306     }
00307 }