MediaWiki  REL1_24
SearchEngine.php
Go to the documentation of this file.
00001 <?php
00032 class SearchEngine {
00034     public $prefix = '';
00035 
00037     public $namespaces = array( NS_MAIN );
00038 
00040     protected $limit = 10;
00041 
00043     protected $offset = 0;
00044 
00046     protected $searchTerms = array();
00047 
00049     protected $showSuggestion = true;
00050 
00052     protected $features = array();
00053 
00062     function searchText( $term ) {
00063         return null;
00064     }
00065 
00074     function searchTitle( $term ) {
00075         return null;
00076     }
00077 
00083     public function supports( $feature ) {
00084         switch ( $feature ) {
00085         case 'search-update':
00086             return true;
00087         case 'title-suffix-filter':
00088         default:
00089             return false;
00090         }
00091     }
00092 
00100     public function setFeatureData( $feature, $data ) {
00101         $this->features[$feature] = $data;
00102     }
00103 
00112     public function normalizeText( $string ) {
00113         global $wgContLang;
00114 
00115         // Some languages such as Chinese require word segmentation
00116         return $wgContLang->segmentByWord( $string );
00117     }
00118 
00126     function transformSearchTerm( $term ) {
00127         return $term;
00128     }
00129 
00137     public static function getNearMatch( $searchterm ) {
00138         $title = self::getNearMatchInternal( $searchterm );
00139 
00140         wfRunHooks( 'SearchGetNearMatchComplete', array( $searchterm, &$title ) );
00141         return $title;
00142     }
00143 
00151     public static function getNearMatchResultSet( $searchterm ) {
00152         return new SearchNearMatchResultSet( self::getNearMatch( $searchterm ) );
00153     }
00154 
00160     private static function getNearMatchInternal( $searchterm ) {
00161         global $wgContLang, $wgEnableSearchContributorsByIP;
00162 
00163         $allSearchTerms = array( $searchterm );
00164 
00165         if ( $wgContLang->hasVariants() ) {
00166             $allSearchTerms = array_merge(
00167                 $allSearchTerms,
00168                 $wgContLang->autoConvertToAllVariants( $searchterm )
00169             );
00170         }
00171 
00172         $titleResult = null;
00173         if ( !wfRunHooks( 'SearchGetNearMatchBefore', array( $allSearchTerms, &$titleResult ) ) ) {
00174             return $titleResult;
00175         }
00176 
00177         foreach ( $allSearchTerms as $term ) {
00178 
00179             # Exact match? No need to look further.
00180             $title = Title::newFromText( $term );
00181             if ( is_null( $title ) ) {
00182                 return null;
00183             }
00184 
00185             # Try files if searching in the Media: namespace
00186             if ( $title->getNamespace() == NS_MEDIA ) {
00187                 $title = Title::makeTitle( NS_FILE, $title->getText() );
00188             }
00189 
00190             if ( $title->isSpecialPage() || $title->isExternal() || $title->exists() ) {
00191                 return $title;
00192             }
00193 
00194             # See if it still otherwise has content is some sane sense
00195             $page = WikiPage::factory( $title );
00196             if ( $page->hasViewableContent() ) {
00197                 return $title;
00198             }
00199 
00200             if ( !wfRunHooks( 'SearchAfterNoDirectMatch', array( $term, &$title ) ) ) {
00201                 return $title;
00202             }
00203 
00204             # Now try all lower case (i.e. first letter capitalized)
00205             $title = Title::newFromText( $wgContLang->lc( $term ) );
00206             if ( $title && $title->exists() ) {
00207                 return $title;
00208             }
00209 
00210             # Now try capitalized string
00211             $title = Title::newFromText( $wgContLang->ucwords( $term ) );
00212             if ( $title && $title->exists() ) {
00213                 return $title;
00214             }
00215 
00216             # Now try all upper case
00217             $title = Title::newFromText( $wgContLang->uc( $term ) );
00218             if ( $title && $title->exists() ) {
00219                 return $title;
00220             }
00221 
00222             # Now try Word-Caps-Breaking-At-Word-Breaks, for hyphenated names etc
00223             $title = Title::newFromText( $wgContLang->ucwordbreaks( $term ) );
00224             if ( $title && $title->exists() ) {
00225                 return $title;
00226             }
00227 
00228             // Give hooks a chance at better match variants
00229             $title = null;
00230             if ( !wfRunHooks( 'SearchGetNearMatch', array( $term, &$title ) ) ) {
00231                 return $title;
00232             }
00233         }
00234 
00235         $title = Title::newFromText( $searchterm );
00236 
00237         # Entering an IP address goes to the contributions page
00238         if ( $wgEnableSearchContributorsByIP ) {
00239             if ( ( $title->getNamespace() == NS_USER && User::isIP( $title->getText() ) )
00240                 || User::isIP( trim( $searchterm ) ) ) {
00241                 return SpecialPage::getTitleFor( 'Contributions', $title->getDBkey() );
00242             }
00243         }
00244 
00245         # Entering a user goes to the user page whether it's there or not
00246         if ( $title->getNamespace() == NS_USER ) {
00247             return $title;
00248         }
00249 
00250         # Go to images that exist even if there's no local page.
00251         # There may have been a funny upload, or it may be on a shared
00252         # file repository such as Wikimedia Commons.
00253         if ( $title->getNamespace() == NS_FILE ) {
00254             $image = wfFindFile( $title );
00255             if ( $image ) {
00256                 return $title;
00257             }
00258         }
00259 
00260         # MediaWiki namespace? Page may be "implied" if not customized.
00261         # Just return it, with caps forced as the message system likes it.
00262         if ( $title->getNamespace() == NS_MEDIAWIKI ) {
00263             return Title::makeTitle( NS_MEDIAWIKI, $wgContLang->ucfirst( $title->getText() ) );
00264         }
00265 
00266         # Quoted term? Try without the quotes...
00267         $matches = array();
00268         if ( preg_match( '/^"([^"]+)"$/', $searchterm, $matches ) ) {
00269             return SearchEngine::getNearMatch( $matches[1] );
00270         }
00271 
00272         return null;
00273     }
00274 
00275     public static function legalSearchChars() {
00276         return "A-Za-z_'.0-9\\x80-\\xFF\\-";
00277     }
00278 
00286     function setLimitOffset( $limit, $offset = 0 ) {
00287         $this->limit = intval( $limit );
00288         $this->offset = intval( $offset );
00289     }
00290 
00297     function setNamespaces( $namespaces ) {
00298         $this->namespaces = $namespaces;
00299     }
00300 
00308     function setShowSuggestion( $showSuggestion ) {
00309         $this->showSuggestion = $showSuggestion;
00310     }
00311 
00319     function replacePrefixes( $query ) {
00320         global $wgContLang;
00321 
00322         $parsed = $query;
00323         if ( strpos( $query, ':' ) === false ) { // nothing to do
00324             return $parsed;
00325         }
00326 
00327         $allkeyword = wfMessage( 'searchall' )->inContentLanguage()->text() . ":";
00328         if ( strncmp( $query, $allkeyword, strlen( $allkeyword ) ) == 0 ) {
00329             $this->namespaces = null;
00330             $parsed = substr( $query, strlen( $allkeyword ) );
00331         } elseif ( strpos( $query, ':' ) !== false ) {
00332             $prefix = str_replace( ' ', '_', substr( $query, 0, strpos( $query, ':' ) ) );
00333             $index = $wgContLang->getNsIndex( $prefix );
00334             if ( $index !== false ) {
00335                 $this->namespaces = array( $index );
00336                 $parsed = substr( $query, strlen( $prefix ) + 1 );
00337             }
00338         }
00339         if ( trim( $parsed ) == '' ) {
00340             $parsed = $query; // prefix was the whole query
00341         }
00342 
00343         return $parsed;
00344     }
00345 
00350     public static function searchableNamespaces() {
00351         global $wgContLang;
00352         $arr = array();
00353         foreach ( $wgContLang->getNamespaces() as $ns => $name ) {
00354             if ( $ns >= NS_MAIN ) {
00355                 $arr[$ns] = $name;
00356             }
00357         }
00358 
00359         wfRunHooks( 'SearchableNamespaces', array( &$arr ) );
00360         return $arr;
00361     }
00362 
00370     public static function userNamespaces( $user ) {
00371         $arr = array();
00372         foreach ( SearchEngine::searchableNamespaces() as $ns => $name ) {
00373             if ( $user->getOption( 'searchNs' . $ns ) ) {
00374                 $arr[] = $ns;
00375             }
00376         }
00377 
00378         return $arr;
00379     }
00380 
00386     public static function userHighlightPrefs() {
00387         $contextlines = 2; // Hardcode this. Old defaults sucked. :)
00388         $contextchars = 75; // same as above.... :P
00389         return array( $contextlines, $contextchars );
00390     }
00391 
00397     public static function defaultNamespaces() {
00398         global $wgNamespacesToBeSearchedDefault;
00399 
00400         return array_keys( $wgNamespacesToBeSearchedDefault, true );
00401     }
00402 
00410     public static function namespacesAsText( $namespaces ) {
00411         global $wgContLang;
00412 
00413         $formatted = array_map( array( $wgContLang, 'getFormattedNsText' ), $namespaces );
00414         foreach ( $formatted as $key => $ns ) {
00415             if ( empty( $ns ) ) {
00416                 $formatted[$key] = wfMessage( 'blanknamespace' )->text();
00417             }
00418         }
00419         return $formatted;
00420     }
00421 
00429     public static function create( $type = null ) {
00430         global $wgSearchType;
00431         $dbr = null;
00432 
00433         $alternatives = self::getSearchTypes();
00434 
00435         if ( $type && in_array( $type, $alternatives ) ) {
00436             $class = $type;
00437         } elseif ( $wgSearchType !== null ) {
00438             $class = $wgSearchType;
00439         } else {
00440             $dbr = wfGetDB( DB_SLAVE );
00441             $class = $dbr->getSearchEngine();
00442         }
00443 
00444         $search = new $class( $dbr );
00445         return $search;
00446     }
00447 
00454     public static function getSearchTypes() {
00455         global $wgSearchType, $wgSearchTypeAlternatives;
00456 
00457         $alternatives = $wgSearchTypeAlternatives ?: array();
00458         array_unshift( $alternatives, $wgSearchType );
00459 
00460         return $alternatives;
00461     }
00462 
00472     function update( $id, $title, $text ) {
00473         // no-op
00474     }
00475 
00484     function updateTitle( $id, $title ) {
00485         // no-op
00486     }
00487 
00496     function delete( $id, $title ) {
00497         // no-op
00498     }
00499 
00505     public static function getOpenSearchTemplate() {
00506         global $wgOpenSearchTemplate, $wgCanonicalServer;
00507 
00508         if ( $wgOpenSearchTemplate ) {
00509             return $wgOpenSearchTemplate;
00510         } else {
00511             $ns = implode( '|', SearchEngine::defaultNamespaces() );
00512             if ( !$ns ) {
00513                 $ns = "0";
00514             }
00515 
00516             return $wgCanonicalServer . wfScript( 'api' )
00517                 . '?action=opensearch&search={searchTerms}&namespace=' . $ns;
00518         }
00519     }
00520 
00531     public function getTextFromContent( Title $t, Content $c = null ) {
00532         return $c ? $c->getTextForSearchIndex() : '';
00533     }
00534 
00542     public function textAlreadyUpdatedForIndex() {
00543         return false;
00544     }
00545 }
00546 
00553 class SearchEngineDummy extends SearchEngine {
00554     // no-op
00555 }