[ Index ] |
PHP Cross Reference of MediaWiki-1.24.0 |
[Summary view] [Print] [Text view]
1 <?php 2 3 /** 4 * Base class for different kinds of blacklists 5 */ 6 abstract class BaseBlacklist { 7 /** 8 * Array of blacklist sources 9 * 10 * @var array 11 */ 12 public $files = array(); 13 14 /** 15 * Array containing regexes to test against 16 * 17 * @var bool|array 18 */ 19 protected $regexes = false; 20 21 /** 22 * Chance of receiving a warning when the filter is hit 23 * 24 * @var int 25 */ 26 public $warningChance = 100; 27 28 /** 29 * @var int 30 */ 31 public $warningTime = 600; 32 33 /** 34 * @var int 35 */ 36 public $expiryTime = 900; 37 38 /** 39 * Array containing blacklists that extend BaseBlacklist 40 * 41 * @var array 42 */ 43 private static $blacklistTypes = array( 44 'spam' => 'SpamBlacklist', 45 'email' => 'EmailBlacklist', 46 ); 47 48 /** 49 * Array of blacklist instances 50 * 51 * @var array 52 */ 53 private static $instances = array(); 54 55 /** 56 * Constructor 57 * 58 * @param array $settings 59 */ 60 function __construct( $settings = array() ) { 61 foreach ( $settings as $name => $value ) { 62 $this->$name = $value; 63 } 64 } 65 66 /** 67 * Adds a blacklist class to the registry 68 * 69 * @param $type string 70 * @param $class string 71 */ 72 public static function addBlacklistType( $type, $class ) { 73 self::$blacklistTypes[$type] = $class; 74 } 75 76 /** 77 * Return the array of blacklist types currently defined 78 * 79 * @return array 80 */ 81 public static function getBlacklistTypes() { 82 return self::$blacklistTypes; 83 } 84 85 /** 86 * Returns an instance of the given blacklist 87 * 88 * @param $type string Code for the blacklist 89 * @return BaseBlacklist 90 * @throws MWException 91 */ 92 public static function getInstance( $type ) { 93 if ( !isset( self::$blacklistTypes[$type] ) ) { 94 throw new MWException( "Invalid blacklist type '$type' passed to " . __METHOD__ ); 95 } 96 97 if ( !isset( self::$instances[$type] ) ) { 98 global $wgBlacklistSettings; 99 100 // Prevent notices 101 if ( !isset( $wgBlacklistSettings[$type] ) ) { 102 $wgBlacklistSettings[$type] = array(); 103 } 104 105 self::$instances[$type] = new self::$blacklistTypes[$type]( $wgBlacklistSettings[$type] ); 106 } 107 108 return self::$instances[$type]; 109 } 110 111 /** 112 * Returns the code for the blacklist implementation 113 * 114 * @return string 115 */ 116 abstract protected function getBlacklistType(); 117 118 /** 119 * Check if the given local page title is a spam regex source. 120 * 121 * @param Title $title 122 * @return bool 123 */ 124 public static function isLocalSource( Title $title ) { 125 global $wgDBname, $wgBlacklistSettings; 126 127 if( $title->getNamespace() == NS_MEDIAWIKI ) { 128 $sources = array(); 129 foreach ( self::$blacklistTypes as $type => $class ) { 130 $type = ucfirst( $type ); 131 $sources += array( 132 "$type-blacklist", 133 "$type-whitelist" 134 ); 135 } 136 137 if( in_array( $title->getDBkey(), $sources ) ) { 138 return true; 139 } 140 } 141 142 $thisHttp = wfExpandUrl( $title->getFullUrl( 'action=raw' ), PROTO_HTTP ); 143 $thisHttpRegex = '/^' . preg_quote( $thisHttp, '/' ) . '(?:&.*)?$/'; 144 145 $files = array(); 146 foreach ( self::$blacklistTypes as $type => $class ) { 147 if ( isset( $wgBlacklistSettings[$type]['files'] ) ) { 148 $files += $wgBlacklistSettings[$type]['files']; 149 } 150 } 151 152 foreach( $files as $fileName ) { 153 $matches = array(); 154 if ( preg_match( '/^DB: (\w*) (.*)$/', $fileName, $matches ) ) { 155 if ( $wgDBname == $matches[1] ) { 156 if( $matches[2] == $title->getPrefixedDbKey() ) { 157 // Local DB fetch of this page... 158 return true; 159 } 160 } 161 } elseif( preg_match( $thisHttpRegex, $fileName ) ) { 162 // Raw view of this page 163 return true; 164 } 165 } 166 167 return false; 168 } 169 170 /** 171 * Returns the type of blacklist from the given title 172 * 173 * @param Title $title 174 * @return bool|string 175 */ 176 public static function getTypeFromTitle( Title $title ) { 177 $types = array_map( 'preg_quote', array_keys( self::$blacklistTypes ), array( '/' ) ); 178 $regex = '/(' . implode( '|', $types ). ')-(?:Blacklist|Whitelist)/'; 179 180 if ( preg_match( $regex, $title->getDBkey(), $m ) ) { 181 return strtolower( $m[1] ); 182 } 183 184 return false; 185 } 186 187 /** 188 * Fetch local and (possibly cached) remote blacklists. 189 * Will be cached locally across multiple invocations. 190 * @return array set of regular expressions, potentially empty. 191 */ 192 function getBlacklists() { 193 if( $this->regexes === false ) { 194 $this->regexes = array_merge( 195 $this->getLocalBlacklists(), 196 $this->getSharedBlacklists() ); 197 } 198 return $this->regexes; 199 } 200 201 /** 202 * Returns the local blacklist 203 * 204 * @return array Regular expressions 205 */ 206 public function getLocalBlacklists() { 207 return SpamRegexBatch::regexesFromMessage( "{$this->getBlacklistType()}-blacklist", $this ); 208 } 209 210 /** 211 * Returns the (local) whitelist 212 * 213 * @return array Regular expressions 214 */ 215 public function getWhitelists() { 216 return SpamRegexBatch::regexesFromMessage( "{$this->getBlacklistType()}-whitelist", $this ); 217 } 218 219 /** 220 * Fetch (possibly cached) remote blacklists. 221 * @return array 222 */ 223 function getSharedBlacklists() { 224 global $wgMemc, $wgDBname; 225 $listType = $this->getBlacklistType(); 226 $fname = 'SpamBlacklist::getRegex'; 227 wfProfileIn( $fname ); 228 229 wfDebugLog( 'SpamBlacklist', "Loading $listType regex..." ); 230 231 if ( count( $this->files ) == 0 ){ 232 # No lists 233 wfDebugLog( 'SpamBlacklist', "no files specified\n" ); 234 wfProfileOut( $fname ); 235 return array(); 236 } 237 238 // This used to be cached per-site, but that could be bad on a shared 239 // server where not all wikis have the same configuration. 240 $cachedRegexes = $wgMemc->get( "$wgDBname:{$listType}_blacklist_regexes" ); 241 if( is_array( $cachedRegexes ) ) { 242 wfDebugLog( 'SpamBlacklist', "Got shared spam regexes from cache\n" ); 243 wfProfileOut( $fname ); 244 return $cachedRegexes; 245 } 246 247 $regexes = $this->buildSharedBlacklists(); 248 $wgMemc->set( "$wgDBname:{$listType}_blacklist_regexes", $regexes, $this->expiryTime ); 249 250 return $regexes; 251 } 252 253 function clearCache() { 254 global $wgMemc, $wgDBname; 255 $listType = $this->getBlacklistType(); 256 257 $wgMemc->delete( "$wgDBname:{$listType}_blacklist_regexes" ); 258 wfDebugLog( 'SpamBlacklist', "$listType blacklist local cache cleared.\n" ); 259 } 260 261 function buildSharedBlacklists() { 262 $regexes = array(); 263 $listType = $this->getBlacklistType(); 264 # Load lists 265 wfDebugLog( 'SpamBlacklist', "Constructing $listType blacklist\n" ); 266 foreach ( $this->files as $fileName ) { 267 $matches = array(); 268 if ( preg_match( '/^DB: ([\w-]*) (.*)$/', $fileName, $matches ) ) { 269 $text = $this->getArticleText( $matches[1], $matches[2] ); 270 } elseif ( preg_match( '/^http:\/\//', $fileName ) ) { 271 $text = $this->getHttpText( $fileName ); 272 } else { 273 $text = file_get_contents( $fileName ); 274 wfDebugLog( 'SpamBlacklist', "got from file $fileName\n" ); 275 } 276 277 // Build a separate batch of regexes from each source. 278 // While in theory we could squeeze a little efficiency 279 // out of combining multiple sources in one regex, if 280 // there's a bad line in one of them we'll gain more 281 // from only having to break that set into smaller pieces. 282 $regexes = array_merge( $regexes, 283 SpamRegexBatch::regexesFromText( $text, $this, $fileName ) ); 284 } 285 286 return $regexes; 287 } 288 289 function getHttpText( $fileName ) { 290 global $wgDBname, $messageMemc; 291 $listType = $this->getBlacklistType(); 292 293 # HTTP request 294 # To keep requests to a minimum, we save results into $messageMemc, which is 295 # similar to $wgMemc except almost certain to exist. By default, it is stored 296 # in the database 297 # 298 # There are two keys, when the warning key expires, a random thread will refresh 299 # the real key. This reduces the chance of multiple requests under high traffic 300 # conditions. 301 $key = "{$listType}_blacklist_file:$fileName"; 302 $warningKey = "$wgDBname:{$listType}filewarning:$fileName"; 303 $httpText = $messageMemc->get( $key ); 304 $warning = $messageMemc->get( $warningKey ); 305 306 if ( !is_string( $httpText ) || ( !$warning && !mt_rand( 0, $this->warningChance ) ) ) { 307 wfDebugLog( 'SpamBlacklist', "Loading $listType blacklist from $fileName\n" ); 308 $httpText = Http::get( $fileName ); 309 if( $httpText === false ) { 310 wfDebugLog( 'SpamBlacklist', "Error loading $listType blacklist from $fileName\n" ); 311 } 312 $messageMemc->set( $warningKey, 1, $this->warningTime ); 313 $messageMemc->set( $key, $httpText, $this->expiryTime ); 314 } else { 315 wfDebugLog( 'SpamBlacklist', "Got $listType blacklist from HTTP cache for $fileName\n" ); 316 } 317 return $httpText; 318 } 319 320 /** 321 * Fetch an article from this or another local MediaWiki database. 322 * This is probably *very* fragile, and shouldn't be used perhaps. 323 * 324 * @param string $db 325 * @param string $article 326 * @return string 327 */ 328 function getArticleText( $db, $article ) { 329 wfDebugLog( 'SpamBlacklist', "Fetching {$this->getBlacklistType()} spam blacklist from '$article' on '$db'...\n" ); 330 global $wgDBname; 331 $dbr = wfGetDB( DB_READ ); 332 $dbr->selectDB( $db ); 333 $text = false; 334 if ( $dbr->tableExists( 'page' ) ) { 335 // 1.5 schema 336 $dbw = wfGetDB( DB_READ ); 337 $dbw->selectDB( $db ); 338 $revision = Revision::newFromTitle( Title::newFromText( $article ) ); 339 if ( $revision ) { 340 $text = $revision->getText(); 341 } 342 $dbw->selectDB( $wgDBname ); 343 } else { 344 // 1.4 schema 345 $title = Title::newFromText( $article ); 346 $text = $dbr->selectField( 'cur', 'cur_text', array( 'cur_namespace' => $title->getNamespace(), 347 'cur_title' => $title->getDBkey() ), __METHOD__ ); 348 } 349 $dbr->selectDB( $wgDBname ); 350 return strval( $text ); 351 } 352 353 /** 354 * Returns the start of the regex for matches 355 * 356 * @return string 357 */ 358 public function getRegexStart() { 359 return '/[a-z0-9_\-.]*'; 360 } 361 362 /** 363 * Returns the end of the regex for matches 364 * 365 * @param $batchSize 366 * @return string 367 */ 368 public function getRegexEnd( $batchSize ) { 369 return ($batchSize > 0 ) ? '/Sim' : '/im'; 370 } 371 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
Generated: Fri Nov 28 14:03:12 2014 | Cross-referenced by PHPXref 0.7.1 |