[ Index ]

PHP Cross Reference of MediaWiki-1.24.0

title

Body

[close]

/extensions/SpamBlacklist/ -> BaseBlacklist.php (source)

   1  <?php
   2  
   3  /**
   4   * Base class for different kinds of blacklists
   5   */
   6  abstract class BaseBlacklist {
   7      /**
   8       * Array of blacklist sources
   9       *
  10       * @var array
  11       */
  12      public $files = array();
  13  
  14      /**
  15       * Array containing regexes to test against
  16       *
  17       * @var bool|array
  18       */
  19      protected $regexes = false;
  20  
  21      /**
  22       * Chance of receiving a warning when the filter is hit
  23       *
  24       * @var int
  25       */
  26      public $warningChance = 100;
  27  
  28      /**
  29       * @var int
  30       */
  31      public $warningTime = 600;
  32  
  33      /**
  34       * @var int
  35       */
  36      public $expiryTime = 900;
  37  
  38      /**
  39       * Array containing blacklists that extend BaseBlacklist
  40       *
  41       * @var array
  42       */
  43      private static $blacklistTypes = array(
  44          'spam' => 'SpamBlacklist',
  45          'email' => 'EmailBlacklist',
  46      );
  47  
  48      /**
  49       * Array of blacklist instances
  50       *
  51       * @var array
  52       */
  53      private static $instances = array();
  54  
  55      /**
  56       * Constructor
  57       *
  58       * @param array $settings
  59       */
  60  	function __construct( $settings = array() ) {
  61          foreach ( $settings as $name => $value ) {
  62              $this->$name = $value;
  63          }
  64      }
  65  
  66      /**
  67       * Adds a blacklist class to the registry
  68       *
  69       * @param $type string
  70       * @param $class string
  71       */
  72  	public static function addBlacklistType( $type, $class ) {
  73          self::$blacklistTypes[$type] = $class;
  74      }
  75  
  76      /**
  77       * Return the array of blacklist types currently defined
  78       *
  79       * @return array
  80       */
  81  	public static function getBlacklistTypes() {
  82          return self::$blacklistTypes;
  83      }
  84  
  85      /**
  86       * Returns an instance of the given blacklist
  87       *
  88       * @param $type string Code for the blacklist
  89       * @return BaseBlacklist
  90       * @throws MWException
  91       */
  92  	public static function getInstance( $type ) {
  93          if ( !isset( self::$blacklistTypes[$type] ) ) {
  94              throw new MWException( "Invalid blacklist type '$type' passed to " . __METHOD__ );
  95          }
  96  
  97          if ( !isset( self::$instances[$type] ) ) {
  98              global $wgBlacklistSettings;
  99  
 100              // Prevent notices
 101              if ( !isset( $wgBlacklistSettings[$type] ) ) {
 102                  $wgBlacklistSettings[$type] = array();
 103              }
 104  
 105              self::$instances[$type] = new self::$blacklistTypes[$type]( $wgBlacklistSettings[$type] );
 106          }
 107  
 108          return self::$instances[$type];
 109      }
 110  
 111      /**
 112       * Returns the code for the blacklist implementation
 113       *
 114       * @return string
 115       */
 116      abstract protected function getBlacklistType();
 117  
 118      /**
 119       * Check if the given local page title is a spam regex source.
 120       *
 121       * @param Title $title
 122       * @return bool
 123       */
 124  	public static function isLocalSource( Title $title ) {
 125          global $wgDBname, $wgBlacklistSettings;
 126  
 127          if( $title->getNamespace() == NS_MEDIAWIKI ) {
 128              $sources = array();
 129              foreach ( self::$blacklistTypes as $type => $class ) {
 130                  $type = ucfirst( $type );
 131                  $sources += array(
 132                      "$type-blacklist",
 133                      "$type-whitelist"
 134                  );
 135              }
 136  
 137              if( in_array( $title->getDBkey(), $sources ) ) {
 138                  return true;
 139              }
 140          }
 141  
 142          $thisHttp = wfExpandUrl( $title->getFullUrl( 'action=raw' ), PROTO_HTTP );
 143          $thisHttpRegex = '/^' . preg_quote( $thisHttp, '/' ) . '(?:&.*)?$/';
 144  
 145          $files = array();
 146          foreach ( self::$blacklistTypes as $type => $class ) {
 147              if ( isset( $wgBlacklistSettings[$type]['files'] ) ) {
 148                  $files += $wgBlacklistSettings[$type]['files'];
 149              }
 150          }
 151  
 152          foreach( $files as $fileName ) {
 153              $matches = array();
 154              if ( preg_match( '/^DB: (\w*) (.*)$/', $fileName, $matches ) ) {
 155                  if ( $wgDBname == $matches[1] ) {
 156                      if( $matches[2] == $title->getPrefixedDbKey() ) {
 157                          // Local DB fetch of this page...
 158                          return true;
 159                      }
 160                  }
 161              } elseif( preg_match( $thisHttpRegex, $fileName ) ) {
 162                  // Raw view of this page
 163                  return true;
 164              }
 165          }
 166  
 167          return false;
 168      }
 169  
 170      /**
 171       * Returns the type of blacklist from the given title
 172       *
 173       * @param Title $title
 174       * @return bool|string
 175       */
 176  	public static function getTypeFromTitle( Title $title ) {
 177          $types = array_map( 'preg_quote', array_keys( self::$blacklistTypes ), array( '/' ) );
 178          $regex = '/(' . implode( '|', $types ).  ')-(?:Blacklist|Whitelist)/';
 179  
 180          if ( preg_match( $regex, $title->getDBkey(), $m ) ) {
 181              return strtolower( $m[1] );
 182          }
 183  
 184          return false;
 185      }
 186  
 187      /**
 188       * Fetch local and (possibly cached) remote blacklists.
 189       * Will be cached locally across multiple invocations.
 190       * @return array set of regular expressions, potentially empty.
 191       */
 192  	function getBlacklists() {
 193          if( $this->regexes === false ) {
 194              $this->regexes = array_merge(
 195                  $this->getLocalBlacklists(),
 196                  $this->getSharedBlacklists() );
 197          }
 198          return $this->regexes;
 199      }
 200  
 201      /**
 202       * Returns the local blacklist
 203       *
 204       * @return array Regular expressions
 205       */
 206  	public function getLocalBlacklists() {
 207          return SpamRegexBatch::regexesFromMessage( "{$this->getBlacklistType()}-blacklist", $this );
 208      }
 209  
 210      /**
 211       * Returns the (local) whitelist
 212       *
 213       * @return array Regular expressions
 214       */
 215  	public function getWhitelists() {
 216          return SpamRegexBatch::regexesFromMessage( "{$this->getBlacklistType()}-whitelist", $this );
 217      }
 218  
 219      /**
 220       * Fetch (possibly cached) remote blacklists.
 221       * @return array
 222       */
 223  	function getSharedBlacklists() {
 224          global $wgMemc, $wgDBname;
 225          $listType = $this->getBlacklistType();
 226          $fname = 'SpamBlacklist::getRegex';
 227          wfProfileIn( $fname );
 228  
 229          wfDebugLog( 'SpamBlacklist', "Loading $listType regex..." );
 230  
 231          if ( count( $this->files ) == 0 ){
 232              # No lists
 233              wfDebugLog( 'SpamBlacklist', "no files specified\n" );
 234              wfProfileOut( $fname );
 235              return array();
 236          }
 237  
 238          // This used to be cached per-site, but that could be bad on a shared
 239          // server where not all wikis have the same configuration.
 240          $cachedRegexes = $wgMemc->get( "$wgDBname:{$listType}_blacklist_regexes" );
 241          if( is_array( $cachedRegexes ) ) {
 242              wfDebugLog( 'SpamBlacklist', "Got shared spam regexes from cache\n" );
 243              wfProfileOut( $fname );
 244              return $cachedRegexes;
 245          }
 246  
 247          $regexes = $this->buildSharedBlacklists();
 248          $wgMemc->set( "$wgDBname:{$listType}_blacklist_regexes", $regexes, $this->expiryTime );
 249  
 250          return $regexes;
 251      }
 252  
 253  	function clearCache() {
 254          global $wgMemc, $wgDBname;
 255          $listType = $this->getBlacklistType();
 256  
 257          $wgMemc->delete( "$wgDBname:{$listType}_blacklist_regexes" );
 258          wfDebugLog( 'SpamBlacklist', "$listType blacklist local cache cleared.\n" );
 259      }
 260  
 261  	function buildSharedBlacklists() {
 262          $regexes = array();
 263          $listType = $this->getBlacklistType();
 264          # Load lists
 265          wfDebugLog( 'SpamBlacklist', "Constructing $listType blacklist\n" );
 266          foreach ( $this->files as $fileName ) {
 267              $matches = array();
 268              if ( preg_match( '/^DB: ([\w-]*) (.*)$/', $fileName, $matches ) ) {
 269                  $text = $this->getArticleText( $matches[1], $matches[2] );
 270              } elseif ( preg_match( '/^http:\/\//', $fileName ) ) {
 271                  $text = $this->getHttpText( $fileName );
 272              } else {
 273                  $text = file_get_contents( $fileName );
 274                  wfDebugLog( 'SpamBlacklist', "got from file $fileName\n" );
 275              }
 276  
 277              // Build a separate batch of regexes from each source.
 278              // While in theory we could squeeze a little efficiency
 279              // out of combining multiple sources in one regex, if
 280              // there's a bad line in one of them we'll gain more
 281              // from only having to break that set into smaller pieces.
 282              $regexes = array_merge( $regexes,
 283                  SpamRegexBatch::regexesFromText( $text, $this, $fileName ) );
 284          }
 285  
 286          return $regexes;
 287      }
 288  
 289  	function getHttpText( $fileName ) {
 290          global $wgDBname, $messageMemc;
 291          $listType = $this->getBlacklistType();
 292  
 293          # HTTP request
 294          # To keep requests to a minimum, we save results into $messageMemc, which is
 295          # similar to $wgMemc except almost certain to exist. By default, it is stored
 296          # in the database
 297          #
 298          # There are two keys, when the warning key expires, a random thread will refresh
 299          # the real key. This reduces the chance of multiple requests under high traffic
 300          # conditions.
 301          $key = "{$listType}_blacklist_file:$fileName";
 302          $warningKey = "$wgDBname:{$listType}filewarning:$fileName";
 303          $httpText = $messageMemc->get( $key );
 304          $warning = $messageMemc->get( $warningKey );
 305  
 306          if ( !is_string( $httpText ) || ( !$warning && !mt_rand( 0, $this->warningChance ) ) ) {
 307              wfDebugLog( 'SpamBlacklist', "Loading $listType blacklist from $fileName\n" );
 308              $httpText = Http::get( $fileName );
 309              if( $httpText === false ) {
 310                  wfDebugLog( 'SpamBlacklist', "Error loading $listType blacklist from $fileName\n" );
 311              }
 312              $messageMemc->set( $warningKey, 1, $this->warningTime );
 313              $messageMemc->set( $key, $httpText, $this->expiryTime );
 314          } else {
 315              wfDebugLog( 'SpamBlacklist', "Got $listType blacklist from HTTP cache for $fileName\n" );
 316          }
 317          return $httpText;
 318      }
 319  
 320      /**
 321       * Fetch an article from this or another local MediaWiki database.
 322       * This is probably *very* fragile, and shouldn't be used perhaps.
 323       *
 324       * @param string $db
 325       * @param string $article
 326       * @return string
 327       */
 328  	function getArticleText( $db, $article ) {
 329          wfDebugLog( 'SpamBlacklist', "Fetching {$this->getBlacklistType()} spam blacklist from '$article' on '$db'...\n" );
 330          global $wgDBname;
 331          $dbr = wfGetDB( DB_READ );
 332          $dbr->selectDB( $db );
 333          $text = false;
 334          if ( $dbr->tableExists( 'page' ) ) {
 335              // 1.5 schema
 336              $dbw = wfGetDB( DB_READ );
 337              $dbw->selectDB( $db );
 338              $revision = Revision::newFromTitle( Title::newFromText( $article ) );
 339              if ( $revision ) {
 340                  $text = $revision->getText();
 341              }
 342              $dbw->selectDB( $wgDBname );
 343          } else {
 344              // 1.4 schema
 345              $title = Title::newFromText( $article );
 346              $text = $dbr->selectField( 'cur', 'cur_text', array( 'cur_namespace' => $title->getNamespace(),
 347                  'cur_title' => $title->getDBkey() ), __METHOD__ );
 348          }
 349          $dbr->selectDB( $wgDBname );
 350          return strval( $text );
 351      }
 352  
 353      /**
 354       * Returns the start of the regex for matches
 355       *
 356       * @return string
 357       */
 358  	public function getRegexStart() {
 359          return '/[a-z0-9_\-.]*';
 360      }
 361  
 362      /**
 363       * Returns the end of the regex for matches
 364       *
 365       * @param $batchSize
 366       * @return string
 367       */
 368  	public function getRegexEnd( $batchSize ) {
 369          return ($batchSize > 0 ) ? '/Sim' : '/im';
 370      }
 371  }


Generated: Fri Nov 28 14:03:12 2014 Cross-referenced by PHPXref 0.7.1