MediaWiki
REL1_24
|
00001 <?php 00028 class SearchSqlite extends SearchDatabase { 00033 function fulltextSearchSupported() { 00034 return $this->db->checkForEnabledSearch(); 00035 } 00036 00045 function parseQuery( $filteredText, $fulltext ) { 00046 global $wgContLang; 00047 $lc = $this->legalSearchChars(); // Minus format chars 00048 $searchon = ''; 00049 $this->searchTerms = array(); 00050 00051 $m = array(); 00052 if ( preg_match_all( '/([-+<>~]?)(([' . $lc . ']+)(\*?)|"[^"]*")/', 00053 $filteredText, $m, PREG_SET_ORDER ) ) { 00054 foreach ( $m as $bits ) { 00055 wfSuppressWarnings(); 00056 list( /* all */, $modifier, $term, $nonQuoted, $wildcard ) = $bits; 00057 wfRestoreWarnings(); 00058 00059 if ( $nonQuoted != '' ) { 00060 $term = $nonQuoted; 00061 $quote = ''; 00062 } else { 00063 $term = str_replace( '"', '', $term ); 00064 $quote = '"'; 00065 } 00066 00067 if ( $searchon !== '' ) { 00068 $searchon .= ' '; 00069 } 00070 00071 // Some languages such as Serbian store the input form in the search index, 00072 // so we may need to search for matches in multiple writing system variants. 00073 $convertedVariants = $wgContLang->autoConvertToAllVariants( $term ); 00074 if ( is_array( $convertedVariants ) ) { 00075 $variants = array_unique( array_values( $convertedVariants ) ); 00076 } else { 00077 $variants = array( $term ); 00078 } 00079 00080 // The low-level search index does some processing on input to work 00081 // around problems with minimum lengths and encoding in MySQL's 00082 // fulltext engine. 00083 // For Chinese this also inserts spaces between adjacent Han characters. 00084 $strippedVariants = array_map( 00085 array( $wgContLang, 'normalizeForSearch' ), 00086 $variants ); 00087 00088 // Some languages such as Chinese force all variants to a canonical 00089 // form when stripping to the low-level search index, so to be sure 00090 // let's check our variants list for unique items after stripping. 00091 $strippedVariants = array_unique( $strippedVariants ); 00092 00093 $searchon .= $modifier; 00094 if ( count( $strippedVariants ) > 1 ) { 00095 $searchon .= '('; 00096 } 00097 foreach ( $strippedVariants as $stripped ) { 00098 if ( $nonQuoted && strpos( $stripped, ' ' ) !== false ) { 00099 // Hack for Chinese: we need to toss in quotes for 00100 // multiple-character phrases since normalizeForSearch() 00101 // added spaces between them to make word breaks. 00102 $stripped = '"' . trim( $stripped ) . '"'; 00103 } 00104 $searchon .= "$quote$stripped$quote$wildcard "; 00105 } 00106 if ( count( $strippedVariants ) > 1 ) { 00107 $searchon .= ')'; 00108 } 00109 00110 // Match individual terms or quoted phrase in result highlighting... 00111 // Note that variants will be introduced in a later stage for highlighting! 00112 $regexp = $this->regexTerm( $term, $wildcard ); 00113 $this->searchTerms[] = $regexp; 00114 } 00115 00116 } else { 00117 wfDebug( __METHOD__ . ": Can't understand search query '{$filteredText}'\n" ); 00118 } 00119 00120 $searchon = $this->db->addQuotes( $searchon ); 00121 $field = $this->getIndexField( $fulltext ); 00122 return " $field MATCH $searchon "; 00123 } 00124 00125 function regexTerm( $string, $wildcard ) { 00126 global $wgContLang; 00127 00128 $regex = preg_quote( $string, '/' ); 00129 if ( $wgContLang->hasWordBreaks() ) { 00130 if ( $wildcard ) { 00131 // Don't cut off the final bit! 00132 $regex = "\b$regex"; 00133 } else { 00134 $regex = "\b$regex\b"; 00135 } 00136 } else { 00137 // For Chinese, words may legitimately abut other words in the text literal. 00138 // Don't add \b boundary checks... note this could cause false positives 00139 // for latin chars. 00140 } 00141 return $regex; 00142 } 00143 00144 public static function legalSearchChars() { 00145 return "\"*" . parent::legalSearchChars(); 00146 } 00147 00154 function searchText( $term ) { 00155 return $this->searchInternal( $term, true ); 00156 } 00157 00164 function searchTitle( $term ) { 00165 return $this->searchInternal( $term, false ); 00166 } 00167 00168 protected function searchInternal( $term, $fulltext ) { 00169 global $wgContLang; 00170 00171 if ( !$this->fulltextSearchSupported() ) { 00172 return null; 00173 } 00174 00175 $filteredTerm = $this->filter( $wgContLang->lc( $term ) ); 00176 $resultSet = $this->db->query( $this->getQuery( $filteredTerm, $fulltext ) ); 00177 00178 $total = null; 00179 $totalResult = $this->db->query( $this->getCountQuery( $filteredTerm, $fulltext ) ); 00180 $row = $totalResult->fetchObject(); 00181 if ( $row ) { 00182 $total = intval( $row->c ); 00183 } 00184 $totalResult->free(); 00185 00186 return new SqlSearchResultSet( $resultSet, $this->searchTerms, $total ); 00187 } 00188 00193 function queryNamespaces() { 00194 if ( is_null( $this->namespaces ) ) { 00195 return ''; # search all 00196 } 00197 if ( !count( $this->namespaces ) ) { 00198 $namespaces = '0'; 00199 } else { 00200 $namespaces = $this->db->makeList( $this->namespaces ); 00201 } 00202 return 'AND page_namespace IN (' . $namespaces . ')'; 00203 } 00204 00210 function limitResult( $sql ) { 00211 return $this->db->limitResult( $sql, $this->limit, $this->offset ); 00212 } 00213 00221 function getQuery( $filteredTerm, $fulltext ) { 00222 return $this->limitResult( 00223 $this->queryMain( $filteredTerm, $fulltext ) . ' ' . 00224 $this->queryNamespaces() 00225 ); 00226 } 00227 00233 function getIndexField( $fulltext ) { 00234 return $fulltext ? 'si_text' : 'si_title'; 00235 } 00236 00244 function queryMain( $filteredTerm, $fulltext ) { 00245 $match = $this->parseQuery( $filteredTerm, $fulltext ); 00246 $page = $this->db->tableName( 'page' ); 00247 $searchindex = $this->db->tableName( 'searchindex' ); 00248 return "SELECT $searchindex.rowid, page_namespace, page_title " . 00249 "FROM $page,$searchindex " . 00250 "WHERE page_id=$searchindex.rowid AND $match"; 00251 } 00252 00253 function getCountQuery( $filteredTerm, $fulltext ) { 00254 $match = $this->parseQuery( $filteredTerm, $fulltext ); 00255 $page = $this->db->tableName( 'page' ); 00256 $searchindex = $this->db->tableName( 'searchindex' ); 00257 return "SELECT COUNT(*) AS c " . 00258 "FROM $page,$searchindex " . 00259 "WHERE page_id=$searchindex.rowid AND $match " . 00260 $this->queryNamespaces(); 00261 } 00262 00271 function update( $id, $title, $text ) { 00272 if ( !$this->fulltextSearchSupported() ) { 00273 return; 00274 } 00275 // @todo find a method to do it in a single request, 00276 // couldn't do it so far due to typelessness of FTS3 tables. 00277 $dbw = wfGetDB( DB_MASTER ); 00278 00279 $dbw->delete( 'searchindex', array( 'rowid' => $id ), __METHOD__ ); 00280 00281 $dbw->insert( 'searchindex', 00282 array( 00283 'rowid' => $id, 00284 'si_title' => $title, 00285 'si_text' => $text 00286 ), __METHOD__ ); 00287 } 00288 00296 function updateTitle( $id, $title ) { 00297 if ( !$this->fulltextSearchSupported() ) { 00298 return; 00299 } 00300 $dbw = wfGetDB( DB_MASTER ); 00301 00302 $dbw->update( 'searchindex', 00303 array( 'si_title' => $title ), 00304 array( 'rowid' => $id ), 00305 __METHOD__ ); 00306 } 00307 }