MediaWiki  master
SearchEngine.php
Go to the documentation of this file.
1 <?php
29 
34 abstract class SearchEngine {
36  public $prefix = '';
37 
39  public $namespaces = [ NS_MAIN ];
40 
42  protected $limit = 10;
43 
45  protected $offset = 0;
46 
48  protected $searchTerms = [];
49 
51  protected $showSuggestion = true;
52  private $sort = 'relevance';
53 
55  protected $features = [];
56 
58  const COMPLETION_PROFILE_TYPE = 'completionSearchProfile';
59 
61  const FT_QUERY_INDEP_PROFILE_TYPE = 'fulltextQueryIndepProfile';
62 
71  function searchText( $term ) {
72  return null;
73  }
74 
83  function searchTitle( $term ) {
84  return null;
85  }
86 
92  public function supports( $feature ) {
93  switch ( $feature ) {
94  case 'search-update':
95  return true;
96  case 'title-suffix-filter':
97  default:
98  return false;
99  }
100  }
101 
109  public function setFeatureData( $feature, $data ) {
110  $this->features[$feature] = $data;
111  }
112 
121  public function normalizeText( $string ) {
123 
124  // Some languages such as Chinese require word segmentation
125  return $wgContLang->segmentByWord( $string );
126  }
127 
135  public function transformSearchTerm( $term ) {
136  return $term;
137  }
138 
144  public function getNearMatcher( Config $config ) {
146  return new SearchNearMatcher( $config, $wgContLang );
147  }
148 
153  protected static function defaultNearMatcher() {
154  $config = MediaWikiServices::getInstance()->getMainConfig();
155  return MediaWikiServices::getInstance()->newSearchEngine()->getNearMatcher( $config );
156  }
157 
165  public static function getNearMatch( $searchterm ) {
166  return static::defaultNearMatcher()->getNearMatch( $searchterm );
167  }
168 
176  public static function getNearMatchResultSet( $searchterm ) {
177  return static::defaultNearMatcher()->getNearMatchResultSet( $searchterm );
178  }
179 
185  public static function legalSearchChars() {
186  return "A-Za-z_'.0-9\\x80-\\xFF\\-";
187  }
188 
196  function setLimitOffset( $limit, $offset = 0 ) {
197  $this->limit = intval( $limit );
198  $this->offset = intval( $offset );
199  }
200 
208  if ( $namespaces ) {
209  // Filter namespaces to only keep valid ones
210  $validNs = $this->searchableNamespaces();
211  $namespaces = array_filter( $namespaces, function( $ns ) use( $validNs ) {
212  return $ns < 0 || isset( $validNs[$ns] );
213  } );
214  } else {
215  $namespaces = [];
216  }
217  $this->namespaces = $namespaces;
218  }
219 
228  $this->showSuggestion = $showSuggestion;
229  }
230 
238  public function getValidSorts() {
239  return [ 'relevance' ];
240  }
241 
250  public function setSort( $sort ) {
251  if ( !in_array( $sort, $this->getValidSorts() ) ) {
252  throw new InvalidArgumentException( "Invalid sort: $sort. " .
253  "Must be one of: " . implode( ', ', $this->getValidSorts() ) );
254  }
255  $this->sort = $sort;
256  }
257 
264  public function getSort() {
265  return $this->sort;
266  }
267 
275  function replacePrefixes( $query ) {
277 
278  $parsed = $query;
279  if ( strpos( $query, ':' ) === false ) { // nothing to do
280  return $parsed;
281  }
282 
283  $allkeyword = wfMessage( 'searchall' )->inContentLanguage()->text() . ":";
284  if ( strncmp( $query, $allkeyword, strlen( $allkeyword ) ) == 0 ) {
285  $this->namespaces = null;
286  $parsed = substr( $query, strlen( $allkeyword ) );
287  } elseif ( strpos( $query, ':' ) !== false ) {
288  $prefix = str_replace( ' ', '_', substr( $query, 0, strpos( $query, ':' ) ) );
289  $index = $wgContLang->getNsIndex( $prefix );
290  if ( $index !== false ) {
291  $this->namespaces = [ $index ];
292  $parsed = substr( $query, strlen( $prefix ) + 1 );
293  }
294  }
295  if ( trim( $parsed ) == '' ) {
296  $parsed = $query; // prefix was the whole query
297  }
298 
299  return $parsed;
300  }
301 
306  public static function userHighlightPrefs() {
307  $contextlines = 2; // Hardcode this. Old defaults sucked. :)
308  $contextchars = 75; // same as above.... :P
309  return [ $contextlines, $contextchars ];
310  }
311 
321  function update( $id, $title, $text ) {
322  // no-op
323  }
324 
333  function updateTitle( $id, $title ) {
334  // no-op
335  }
336 
345  function delete( $id, $title ) {
346  // no-op
347  }
348 
355  public static function getOpenSearchTemplate() {
356  wfDeprecated( __METHOD__, '1.25' );
357  return ApiOpenSearch::getOpenSearchTemplate( 'application/x-suggestions+json' );
358  }
359 
370  public function getTextFromContent( Title $t, Content $c = null ) {
371  return $c ? $c->getTextForSearchIndex() : '';
372  }
373 
381  public function textAlreadyUpdatedForIndex() {
382  return false;
383  }
384 
391  protected function normalizeNamespaces( $search ) {
392  // Find a Title which is not an interwiki and is in NS_MAIN
393  $title = Title::newFromText( $search );
394  $ns = $this->namespaces;
395  if ( $title && !$title->isExternal() ) {
396  $ns = [ $title->getNamespace() ];
397  $search = $title->getText();
398  if ( $ns[0] == NS_MAIN ) {
399  $ns = $this->namespaces; // no explicit prefix, use default namespaces
400  Hooks::run( 'PrefixSearchExtractNamespace', [ &$ns, &$search ] );
401  }
402  } else {
403  $title = Title::newFromText( $search . 'Dummy' );
404  if ( $title && $title->getText() == 'Dummy'
405  && $title->getNamespace() != NS_MAIN
406  && !$title->isExternal() )
407  {
408  $ns = [ $title->getNamespace() ];
409  $search = '';
410  } else {
411  Hooks::run( 'PrefixSearchExtractNamespace', [ &$ns, &$search ] );
412  }
413  }
414 
415  $ns = array_map( function( $space ) {
416  return $space == NS_MEDIA ? NS_FILE : $space;
417  }, $ns );
418 
419  $this->setNamespaces( $ns );
420  return $search;
421  }
422 
430  protected function completionSearchBackend( $search ) {
431  $results = [];
432 
433  $search = trim( $search );
434 
435  if ( !in_array( NS_SPECIAL, $this->namespaces ) && // We do not run hook on Special: search
436  !Hooks::run( 'PrefixSearchBackend',
437  [ $this->namespaces, $search, $this->limit, &$results, $this->offset ]
438  ) ) {
439  // False means hook worked.
440  // FIXME: Yes, the API is weird. That's why it is going to be deprecated.
441 
442  return SearchSuggestionSet::fromStrings( $results );
443  } else {
444  // Hook did not do the job, use default simple search
445  $results = $this->simplePrefixSearch( $search );
446  return SearchSuggestionSet::fromTitles( $results );
447  }
448  }
449 
455  public function completionSearch( $search ) {
456  if ( trim( $search ) === '' ) {
457  return SearchSuggestionSet::emptySuggestionSet(); // Return empty result
458  }
459  $search = $this->normalizeNamespaces( $search );
460  return $this->processCompletionResults( $search, $this->completionSearchBackend( $search ) );
461  }
462 
468  public function completionSearchWithVariants( $search ) {
469  if ( trim( $search ) === '' ) {
470  return SearchSuggestionSet::emptySuggestionSet(); // Return empty result
471  }
472  $search = $this->normalizeNamespaces( $search );
473 
474  $results = $this->completionSearchBackend( $search );
475  $fallbackLimit = $this->limit - $results->getSize();
476  if ( $fallbackLimit > 0 ) {
478 
479  $fallbackSearches = $wgContLang->autoConvertToAllVariants( $search );
480  $fallbackSearches = array_diff( array_unique( $fallbackSearches ), [ $search ] );
481 
482  foreach ( $fallbackSearches as $fbs ) {
483  $this->setLimitOffset( $fallbackLimit );
484  $fallbackSearchResult = $this->completionSearch( $fbs );
485  $results->appendAll( $fallbackSearchResult );
486  $fallbackLimit -= count( $fallbackSearchResult );
487  if ( $fallbackLimit <= 0 ) {
488  break;
489  }
490  }
491  }
492  return $this->processCompletionResults( $search, $results );
493  }
494 
500  public function extractTitles( SearchSuggestionSet $completionResults ) {
501  return $completionResults->map( function( SearchSuggestion $sugg ) {
502  return $sugg->getSuggestedTitle();
503  } );
504  }
505 
512  protected function processCompletionResults( $search, SearchSuggestionSet $suggestions ) {
513  $search = trim( $search );
514  // preload the titles with LinkBatch
515  $titles = $suggestions->map( function( SearchSuggestion $sugg ) {
516  return $sugg->getSuggestedTitle();
517  } );
518  $lb = new LinkBatch( $titles );
519  $lb->setCaller( __METHOD__ );
520  $lb->execute();
521 
522  $results = $suggestions->map( function( SearchSuggestion $sugg ) {
523  return $sugg->getSuggestedTitle()->getPrefixedText();
524  } );
525 
526  // Rescore results with an exact title match
527  // NOTE: in some cases like cross-namespace redirects
528  // (frequently used as shortcuts e.g. WP:WP on huwiki) some
529  // backends like Cirrus will return no results. We should still
530  // try an exact title match to workaround this limitation
531  $rescorer = new SearchExactMatchRescorer();
532  $rescoredResults = $rescorer->rescore( $search, $this->namespaces, $results, $this->limit );
533 
534  if ( count( $rescoredResults ) > 0 ) {
535  $found = array_search( $rescoredResults[0], $results );
536  if ( $found === false ) {
537  // If the first result is not in the previous array it
538  // means that we found a new exact match
539  $exactMatch = SearchSuggestion::fromTitle( 0, Title::newFromText( $rescoredResults[0] ) );
540  $suggestions->prepend( $exactMatch );
541  $suggestions->shrink( $this->limit );
542  } else {
543  // if the first result is not the same we need to rescore
544  if ( $found > 0 ) {
545  $suggestions->rescore( $found );
546  }
547  }
548  }
549 
550  return $suggestions;
551  }
552 
558  public function defaultPrefixSearch( $search ) {
559  if ( trim( $search ) === '' ) {
560  return [];
561  }
562 
563  $search = $this->normalizeNamespaces( $search );
564  return $this->simplePrefixSearch( $search );
565  }
566 
573  protected function simplePrefixSearch( $search ) {
574  // Use default database prefix search
575  $backend = new TitlePrefixSearch;
576  return $backend->defaultSearchBackend( $this->namespaces, $search, $this->limit, $this->offset );
577  }
578 
584  public static function searchableNamespaces() {
585  return MediaWikiServices::getInstance()->getSearchEngineConfig()->searchableNamespaces();
586  }
587 
595  public static function userNamespaces( $user ) {
596  return MediaWikiServices::getInstance()->getSearchEngineConfig()->userNamespaces( $user );
597  }
598 
604  public static function defaultNamespaces() {
605  return MediaWikiServices::getInstance()->getSearchEngineConfig()->defaultNamespaces();
606  }
607 
615  public static function namespacesAsText( $namespaces ) {
616  return MediaWikiServices::getInstance()->getSearchEngineConfig()->namespacesAsText( $namespaces );
617  }
618 
626  public static function create( $type = null ) {
627  return MediaWikiServices::getInstance()->getSearchEngineFactory()->create( $type );
628  }
629 
636  public static function getSearchTypes() {
637  return MediaWikiServices::getInstance()->getSearchEngineConfig()->getSearchTypes();
638  }
639 
654  public function getProfiles( $profileType ) {
655  return null;
656  }
657 
666  public function makeSearchFieldMapping( $name, $type ) {
667  return new NullIndexField();
668  }
669 
675  public function getSearchIndexFields() {
676  $models = ContentHandler::getContentModels();
677  $fields = [];
678  foreach ( $models as $model ) {
679  $handler = ContentHandler::getForModelID( $model );
680  $handlerFields = $handler->getFieldsForSearchIndex( $this );
681  foreach ( $handlerFields as $fieldName => $fieldData ) {
682  if ( empty( $fields[$fieldName] ) ) {
683  $fields[$fieldName] = $fieldData;
684  } else {
685  // TODO: do we allow some clashes with the same type or reject all of them?
686  $mergeDef = $fields[$fieldName]->merge( $fieldData );
687  if ( !$mergeDef ) {
688  throw new InvalidArgumentException( "Duplicate field $fieldName for model $model" );
689  }
690  $fields[$fieldName] = $mergeDef;
691  }
692  }
693  }
694  // Hook to allow extensions to produce search mapping fields
695  Hooks::run( 'SearchIndexFields', [ &$fields, $this ] );
696  return $fields;
697  }
698 }
699 
707  // no-op
708 }
Dummy class to be used when non-supported Database engine is present.
getSort()
Get the sort direction of the search results.
replacePrefixes($query)
Parse some common prefixes: all (search everything) or namespace names.
string $prefix
static getNearMatchResultSet($searchterm)
Do a near match (see SearchEngine::getNearMatch) and wrap it into a SearchResultSet.
external whereas SearchGetNearMatch runs after $term
Definition: hooks.txt:2598
transformSearchTerm($term)
Transform search term in cases when parts of the query came as different GET params (when supported)...
static searchableNamespaces()
Make a list of searchable namespaces and their canonical names.
searchText($term)
Perform a full text search query and return a result set.
null for the local wiki Added should default to null in handler for backwards compatibility add a value to it if you want to add a cookie that have to vary cache options can modify $query
Definition: hooks.txt:1435
Search suggestion.
completionSearchBackend($search)
Perform a completion search.
static defaultNamespaces()
An array of namespaces indexes to be searched by default.
static namespacesAsText($namespaces)
Get a list of namespace names useful for showing in tooltips and preferences.
const NS_MAIN
Definition: Defines.php:69
to move a page</td >< td > &*You are moving the page across namespaces
static userNamespaces($user)
Extract default namespaces to search from the given user's settings, returning a list of index number...
makeSearchFieldMapping($name, $type)
Create a search field definition.
static legalSearchChars()
Get chars legal for search.
Apache License January AND DISTRIBUTION Definitions License shall mean the terms and conditions for use
update($id, $title, $text)
Create or update the search index record for the given page.
setShowSuggestion($showSuggestion)
Set whether the searcher should try to build a suggestion.
getProfiles($profileType)
Get a list of supported profiles.
defaultPrefixSearch($search)
Simple prefix search for subpages.
const NS_SPECIAL
Definition: Defines.php:58
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency MediaWikiServices
Definition: injection.txt:23
static newFromText($text, $defaultNamespace=NS_MAIN)
Create a new Title from text, such as what one would find in a link.
Definition: Title.php:256
static fromStrings(array $titles)
Builds a new set of suggestion based on a string array.
Represents a title within MediaWiki.
Definition: Title.php:36
when a variable name is used in a it is silently declared as a new local masking the global
Definition: design.txt:93
getSuggestedTitle()
Title object in the case this suggestion is based on a title.
simplePrefixSearch($search)
Call out to simple search backend.
Null index field - means search engine does not implement this field.
supports($feature)
completionSearchWithVariants($search)
Perform a completion search with variants.
Class representing a list of titles The execute() method checks them all for existence and adds them ...
Definition: LinkBatch.php:32
normalizeNamespaces($search)
Makes search simple string if it was namespaced.
static getOpenSearchTemplate()
Get OpenSearch suggestion template.
setLimitOffset($limit, $offset=0)
Set the maximum number of results to return and how many to skip before returning the first...
normalizeText($string)
When overridden in derived class, performs database-specific conversions on text to be used for searc...
Interface for configuration instances.
Definition: Config.php:28
map($callback)
Call array_map on the suggestions array.
prepend(SearchSuggestion $suggestion)
Add a new suggestion at the top.
defaultSearchBackend($namespaces, $search, $limit, $offset)
Unless overridden by PrefixSearchBackend hook...
getTextFromContent(Title $t, Content $c=null)
Get the raw text for updating the index from a content object Nicer search backends could possibly do...
array string $searchTerms
const NS_MEDIA
Definition: Defines.php:57
searchTitle($term)
Perform a title-only search query and return a result set.
An utility class to rescore search results by looking for an exact match in the db and add the page f...
Base interface for content objects.
Definition: Content.php:34
textAlreadyUpdatedForIndex()
If an implementation of SearchEngine handles all of its own text processing in getTextFromContent() a...
static getNearMatch($searchterm)
If an exact title match can be found, or a very slightly close match, return the title.
getValidSorts()
Get the valid sort directions.
static defaultNearMatcher()
Get near matcher for default SearchEngine.
wfDeprecated($function, $version=false, $component=false, $callerOffset=2)
Throws a warning that $function is deprecated.
const COMPLETION_PROFILE_TYPE
string profile type for completionSearch
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped just before the function returns a value If you return an< a > element with HTML attributes $attribs and contents $html will be returned If you return $ret will be returned after processing after in associative array form externallinks including delete and has completed for all link tables whether this was an auto creation default is conds Array Extra conditions for the No matching items in log is displayed if loglist is empty msgKey Array If you want a nice box with a set this to the key of the message First element is the message additional optional elements are parameters for the key that are processed with wfMessage() -> params() ->parseAsBlock()-offset Set to overwrite offset parameter in $wgRequest set to ''to unsetoffset-wrap String Wrap the message in html(usually something like"&lt
bool $showSuggestion
Allows to change the fields on the form that will be generated are created Can be used to omit specific feeds from being outputted You must not use this hook to add use OutputPage::addFeedLink() instead.&$feedLinks conditions will AND in the final query as a Content object as a Content object $title
Definition: hooks.txt:312
static run($event, array $args=[], $deprecatedVersion=null)
Call hook functions defined in Hooks::register and $wgHooks.
Definition: Hooks.php:131
const NS_FILE
Definition: Defines.php:75
extractTitles(SearchSuggestionSet $completionResults)
Extract titles from completion results.
static getSearchTypes()
Return the search engines we support.
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
Definition: distributors.txt:9
Performs prefix search, returning Title objects.
updateTitle($id, $title)
Update a search index record's title only.
please add to it if you re going to add events to the MediaWiki code where normally authentication against an external auth plugin would be creating a local account $user
Definition: hooks.txt:242
shrink($limit)
Remove any extra elements in the suggestions set.
const FT_QUERY_INDEP_PROFILE_TYPE
string profile type for query independent ranking features
setSort($sort)
Set the sort direction of the search results.
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition: injection.txt:35
int[] null $namespaces
setFeatureData($feature, $data)
Way to pass custom data for engines.
Implementation of near match title search.
linkcache txt The LinkCache class maintains a list of article titles and the information about whether or not the article exists in the database This is used to mark up links when displaying a page If the same link appears more than once on any page then it only has to be looked up once In most cases link lookups are done in batches with the LinkBatch class or the equivalent in so the link cache is mostly useful for short snippets of parsed and for links in the navigation areas of the skin The link cache was formerly used to track links used in a document for the purposes of updating the link tables This application is now deprecated To create a you can use the following $titles
Definition: linkcache.txt:17
array $features
Feature values.
setNamespaces($namespaces)
Set which namespaces the search should include.
completionSearch($search)
Perform a completion search.
Contain a class for special pages.
rescore($key)
Move the suggestion at index $key to the first position.
this class mediates it Skin Encapsulates a look and feel for the wiki All of the functions that render HTML and make choices about how to render it are here and are called from various other places when and is meant to be subclassed with other skins that may override some of its functions The User object contains a reference to a and so rather than having a global skin object we just rely on the global User and get the skin with $wgUser and also has some character encoding functions and other locale stuff The current user interface language is instantiated as and the local content language as $wgContLang
Definition: design.txt:56
getSearchIndexFields()
Get fields for search index.
Search suggestion sets.
static create($type=null)
Load up the appropriate search engine class for the currently active database backend, and return a configured instance.
this hook is for auditing only or null if authentication failed before getting that far or null if we can t even determine that probably a stub it is not rendered in wiki pages or galleries in category pages allow injecting custom HTML after the section Any uses of the hook need to handle escaping see BaseTemplate::getToolbox and BaseTemplate::makeListItem for details on the format of individual items inside of this array or by returning and letting standard HTTP rendering take place modifiable or by returning false and taking over the output modifiable modifiable after all normalizations have been except for the $wgMaxImageArea check set to true or false to override the $wgMaxImageArea check result gives extension the possibility to transform it themselves $handler
Definition: hooks.txt:776
getNearMatcher(Config $config)
Get service class to finding near matches.
static fromTitle($score, Title $title)
Create suggestion from Title.
static fromTitles(array $titles)
Builds a new set of suggestion based on a title array.
do that in ParserLimitReportFormat instead use this to modify the parameters of the image and a DIV can begin in one section and end in another Make sure your code can handle that case gracefully See the EditSectionClearerLink extension for an example zero but section is usually empty its values are the globals values before the output is cached one of or reset my talk my contributions etc etc otherwise the built in rate limiting checks are if enabled allows for interception of redirect as a string mapping parameter names to values & $type
Definition: hooks.txt:2376
processCompletionResults($search, SearchSuggestionSet $suggestions)
Process completion search results.
static userHighlightPrefs()
Find snippet highlight settings for all users.
static getOpenSearchTemplate($type)
Fetch the template for a type.
Allows to change the fields on the form that will be generated $name
Definition: hooks.txt:310