MediaWiki  REL1_23
SearchEngine.php
Go to the documentation of this file.
00001 <?php
00032 class SearchEngine {
00033     var $limit = 10;
00034     var $offset = 0;
00035     var $prefix = '';
00036     var $searchTerms = array();
00037     var $namespaces = array( NS_MAIN );
00038     protected $showSuggestion = true;
00039 
00041     protected $features = array();
00042 
00051     function searchText( $term ) {
00052         return null;
00053     }
00054 
00063     function searchTitle( $term ) {
00064         return null;
00065     }
00066 
00072     public function supports( $feature ) {
00073         switch ( $feature ) {
00074         case 'search-update':
00075             return true;
00076         case 'title-suffix-filter':
00077         default:
00078             return false;
00079         }
00080     }
00081 
00089     public function setFeatureData( $feature, $data ) {
00090         $this->features[$feature] = $data;
00091     }
00092 
00101     public function normalizeText( $string ) {
00102         global $wgContLang;
00103 
00104         // Some languages such as Chinese require word segmentation
00105         return $wgContLang->segmentByWord( $string );
00106     }
00107 
00112     function transformSearchTerm( $term ) {
00113         return $term;
00114     }
00115 
00123     public static function getNearMatch( $searchterm ) {
00124         $title = self::getNearMatchInternal( $searchterm );
00125 
00126         wfRunHooks( 'SearchGetNearMatchComplete', array( $searchterm, &$title ) );
00127         return $title;
00128     }
00129 
00137     public static function getNearMatchResultSet( $searchterm ) {
00138         return new SearchNearMatchResultSet( self::getNearMatch( $searchterm ) );
00139     }
00140 
00145     private static function getNearMatchInternal( $searchterm ) {
00146         global $wgContLang, $wgEnableSearchContributorsByIP;
00147 
00148         $allSearchTerms = array( $searchterm );
00149 
00150         if ( $wgContLang->hasVariants() ) {
00151             $allSearchTerms = array_merge( $allSearchTerms, $wgContLang->autoConvertToAllVariants( $searchterm ) );
00152         }
00153 
00154         $titleResult = null;
00155         if ( !wfRunHooks( 'SearchGetNearMatchBefore', array( $allSearchTerms, &$titleResult ) ) ) {
00156             return $titleResult;
00157         }
00158 
00159         foreach ( $allSearchTerms as $term ) {
00160 
00161             # Exact match? No need to look further.
00162             $title = Title::newFromText( $term );
00163             if ( is_null( $title ) ) {
00164                 return null;
00165             }
00166 
00167             # Try files if searching in the Media: namespace
00168             if ( $title->getNamespace() == NS_MEDIA ) {
00169                 $title = Title::makeTitle( NS_FILE, $title->getText() );
00170             }
00171 
00172             if ( $title->isSpecialPage() || $title->isExternal() || $title->exists() ) {
00173                 return $title;
00174             }
00175 
00176             # See if it still otherwise has content is some sane sense
00177             $page = WikiPage::factory( $title );
00178             if ( $page->hasViewableContent() ) {
00179                 return $title;
00180             }
00181 
00182             if ( !wfRunHooks( 'SearchAfterNoDirectMatch', array( $term, &$title ) ) ) {
00183                 return $title;
00184             }
00185 
00186             # Now try all lower case (i.e. first letter capitalized)
00187             $title = Title::newFromText( $wgContLang->lc( $term ) );
00188             if ( $title && $title->exists() ) {
00189                 return $title;
00190             }
00191 
00192             # Now try capitalized string
00193             $title = Title::newFromText( $wgContLang->ucwords( $term ) );
00194             if ( $title && $title->exists() ) {
00195                 return $title;
00196             }
00197 
00198             # Now try all upper case
00199             $title = Title::newFromText( $wgContLang->uc( $term ) );
00200             if ( $title && $title->exists() ) {
00201                 return $title;
00202             }
00203 
00204             # Now try Word-Caps-Breaking-At-Word-Breaks, for hyphenated names etc
00205             $title = Title::newFromText( $wgContLang->ucwordbreaks( $term ) );
00206             if ( $title && $title->exists() ) {
00207                 return $title;
00208             }
00209 
00210             // Give hooks a chance at better match variants
00211             $title = null;
00212             if ( !wfRunHooks( 'SearchGetNearMatch', array( $term, &$title ) ) ) {
00213                 return $title;
00214             }
00215         }
00216 
00217         $title = Title::newFromText( $searchterm );
00218 
00219         # Entering an IP address goes to the contributions page
00220         if ( $wgEnableSearchContributorsByIP ) {
00221             if ( ( $title->getNamespace() == NS_USER && User::isIP( $title->getText() ) )
00222                 || User::isIP( trim( $searchterm ) ) ) {
00223                 return SpecialPage::getTitleFor( 'Contributions', $title->getDBkey() );
00224             }
00225         }
00226 
00227         # Entering a user goes to the user page whether it's there or not
00228         if ( $title->getNamespace() == NS_USER ) {
00229             return $title;
00230         }
00231 
00232         # Go to images that exist even if there's no local page.
00233         # There may have been a funny upload, or it may be on a shared
00234         # file repository such as Wikimedia Commons.
00235         if ( $title->getNamespace() == NS_FILE ) {
00236             $image = wfFindFile( $title );
00237             if ( $image ) {
00238                 return $title;
00239             }
00240         }
00241 
00242         # MediaWiki namespace? Page may be "implied" if not customized.
00243         # Just return it, with caps forced as the message system likes it.
00244         if ( $title->getNamespace() == NS_MEDIAWIKI ) {
00245             return Title::makeTitle( NS_MEDIAWIKI, $wgContLang->ucfirst( $title->getText() ) );
00246         }
00247 
00248         # Quoted term? Try without the quotes...
00249         $matches = array();
00250         if ( preg_match( '/^"([^"]+)"$/', $searchterm, $matches ) ) {
00251             return SearchEngine::getNearMatch( $matches[1] );
00252         }
00253 
00254         return null;
00255     }
00256 
00257     public static function legalSearchChars() {
00258         return "A-Za-z_'.0-9\\x80-\\xFF\\-";
00259     }
00260 
00268     function setLimitOffset( $limit, $offset = 0 ) {
00269         $this->limit = intval( $limit );
00270         $this->offset = intval( $offset );
00271     }
00272 
00279     function setNamespaces( $namespaces ) {
00280         $this->namespaces = $namespaces;
00281     }
00282 
00290     function setShowSuggestion( $showSuggestion ) {
00291         $this->showSuggestion = $showSuggestion;
00292     }
00293 
00301     function replacePrefixes( $query ) {
00302         global $wgContLang;
00303 
00304         $parsed = $query;
00305         if ( strpos( $query, ':' ) === false ) { // nothing to do
00306             wfRunHooks( 'SearchEngineReplacePrefixesComplete', array( $this, $query, &$parsed ) );
00307             return $parsed;
00308         }
00309 
00310         $allkeyword = wfMessage( 'searchall' )->inContentLanguage()->text() . ":";
00311         if ( strncmp( $query, $allkeyword, strlen( $allkeyword ) ) == 0 ) {
00312             $this->namespaces = null;
00313             $parsed = substr( $query, strlen( $allkeyword ) );
00314         } elseif ( strpos( $query, ':' ) !== false ) {
00315             $prefix = str_replace( ' ', '_', substr( $query, 0, strpos( $query, ':' ) ) );
00316             $index = $wgContLang->getNsIndex( $prefix );
00317             if ( $index !== false ) {
00318                 $this->namespaces = array( $index );
00319                 $parsed = substr( $query, strlen( $prefix ) + 1 );
00320             }
00321         }
00322         if ( trim( $parsed ) == '' ) {
00323             $parsed = $query; // prefix was the whole query
00324         }
00325 
00326         wfRunHooks( 'SearchEngineReplacePrefixesComplete', array( $this, $query, &$parsed ) );
00327 
00328         return $parsed;
00329     }
00330 
00335     public static function searchableNamespaces() {
00336         global $wgContLang;
00337         $arr = array();
00338         foreach ( $wgContLang->getNamespaces() as $ns => $name ) {
00339             if ( $ns >= NS_MAIN ) {
00340                 $arr[$ns] = $name;
00341             }
00342         }
00343 
00344         wfRunHooks( 'SearchableNamespaces', array( &$arr ) );
00345         return $arr;
00346     }
00347 
00355     public static function userNamespaces( $user ) {
00356         global $wgSearchEverythingOnlyLoggedIn;
00357 
00358         $searchableNamespaces = SearchEngine::searchableNamespaces();
00359 
00360         // get search everything preference, that can be set to be read for logged-in users
00361         // it overrides other options
00362         if ( !$wgSearchEverythingOnlyLoggedIn || $user->isLoggedIn() ) {
00363             if ( $user->getOption( 'searcheverything' ) ) {
00364                 return array_keys( $searchableNamespaces );
00365             }
00366         }
00367 
00368         $arr = array();
00369         foreach ( $searchableNamespaces as $ns => $name ) {
00370             if ( $user->getOption( 'searchNs' . $ns ) ) {
00371                 $arr[] = $ns;
00372             }
00373         }
00374 
00375         return $arr;
00376     }
00377 
00383     public static function userHighlightPrefs() {
00384         $contextlines = 2; // Hardcode this. Old defaults sucked. :)
00385         $contextchars = 75; // same as above.... :P
00386         return array( $contextlines, $contextchars );
00387     }
00388 
00394     public static function defaultNamespaces() {
00395         global $wgNamespacesToBeSearchedDefault;
00396 
00397         return array_keys( $wgNamespacesToBeSearchedDefault, true );
00398     }
00399 
00407     public static function namespacesAsText( $namespaces ) {
00408         global $wgContLang;
00409 
00410         $formatted = array_map( array( $wgContLang, 'getFormattedNsText' ), $namespaces );
00411         foreach ( $formatted as $key => $ns ) {
00412             if ( empty( $ns ) ) {
00413                 $formatted[$key] = wfMessage( 'blanknamespace' )->text();
00414             }
00415         }
00416         return $formatted;
00417     }
00418 
00424     public static function helpNamespaces() {
00425         global $wgNamespacesToBeSearchedHelp;
00426 
00427         return array_keys( $wgNamespacesToBeSearchedHelp, true );
00428     }
00429 
00436     function filter( $text ) {
00437         $lc = $this->legalSearchChars();
00438         return trim( preg_replace( "/[^{$lc}]/", " ", $text ) );
00439     }
00440 
00448     public static function create( $type = null ) {
00449         global $wgSearchType;
00450         $dbr = null;
00451 
00452         $alternatives = self::getSearchTypes();
00453 
00454         if ( $type && in_array( $type, $alternatives ) ) {
00455             $class = $type;
00456         } elseif ( $wgSearchType !== null ) {
00457             $class = $wgSearchType;
00458         } else {
00459             $dbr = wfGetDB( DB_SLAVE );
00460             $class = $dbr->getSearchEngine();
00461         }
00462 
00463         $search = new $class( $dbr );
00464         return $search;
00465     }
00466 
00473     public static function getSearchTypes() {
00474         global $wgSearchType, $wgSearchTypeAlternatives;
00475 
00476         $alternatives = $wgSearchTypeAlternatives ?: array();
00477         array_unshift( $alternatives, $wgSearchType );
00478 
00479         return $alternatives;
00480     }
00481 
00491     function update( $id, $title, $text ) {
00492         // no-op
00493     }
00494 
00503     function updateTitle( $id, $title ) {
00504         // no-op
00505     }
00506 
00515     function delete( $id, $title ) {
00516         // no-op
00517     }
00518 
00524     public static function getOpenSearchTemplate() {
00525         global $wgOpenSearchTemplate, $wgCanonicalServer;
00526         if ( $wgOpenSearchTemplate ) {
00527             return $wgOpenSearchTemplate;
00528         } else {
00529             $ns = implode( '|', SearchEngine::defaultNamespaces() );
00530             if ( !$ns ) {
00531                 $ns = "0";
00532             }
00533             return $wgCanonicalServer . wfScript( 'api' ) . '?action=opensearch&search={searchTerms}&namespace=' . $ns;
00534         }
00535     }
00536 
00547     public function getTextFromContent( Title $t, Content $c = null ) {
00548         return $c ? $c->getTextForSearchIndex() : '';
00549     }
00550 
00558     public function textAlreadyUpdatedForIndex() {
00559         return false;
00560     }
00561 }
00562 
00566 class SearchResultTooMany {
00567     # # Some search engines may bail out if too many matches are found
00568 }
00569 
00576 class SearchEngineDummy extends SearchEngine {
00577     // no-op
00578 }