[ Index ]

PHP Cross Reference of MediaWiki-1.24.0

title

Body

[close]

/extensions/SpamBlacklist/ -> SpamBlacklist_body.php (source)

   1  <?php
   2  
   3  if ( !defined( 'MEDIAWIKI' ) ) {
   4      exit;
   5  }
   6  
   7  class SpamBlacklist extends BaseBlacklist {
   8  
   9      /**
  10       * Returns the code for the blacklist implementation
  11       *
  12       * @return string
  13       */
  14  	protected function getBlacklistType() {
  15          return 'spam';
  16      }
  17  
  18      /**
  19       * Apply some basic anti-spoofing to the links before they get filtered,
  20       * see @bug 12896
  21       *
  22       * @param string $text
  23       *
  24       * @return string
  25       */
  26  	protected function antiSpoof( $text ) {
  27          $text = str_replace( '.', '.', $text );
  28          return $text;
  29      }
  30  
  31      /**
  32       * @param string[] $links An array of links to check against the blacklist
  33       * @param Title  $title The title of the page to which the filter shall be applied.
  34       *               This is used to load the old links already on the page, so
  35       *               the filter is only applied to links that got added. If not given,
  36       *               the filter is applied to all $links.
  37       * @param boolean $preventLog Whether to prevent logging of hits. Set to true when
  38       *               the action is testing the links rather than attempting to save them
  39       *               (e.g. the API spamblacklist action)
  40       *
  41       * @return Array Matched text(s) if the edit should not be allowed, false otherwise
  42       */
  43  	function filter( array $links, Title $title = null, $preventLog = false ) {
  44          $fname = 'wfSpamBlacklistFilter';
  45          wfProfileIn( $fname );
  46  
  47          $blacklists = $this->getBlacklists();
  48          $whitelists = $this->getWhitelists();
  49  
  50          if ( count( $blacklists ) ) {
  51              // poor man's anti-spoof, see bug 12896
  52              $newLinks = array_map( array( $this, 'antiSpoof' ), $links );
  53  
  54              $oldLinks = array();
  55              if ( $title !== null ) {
  56                  $oldLinks = $this->getCurrentLinks( $title );
  57                  $addedLinks = array_diff( $newLinks, $oldLinks );
  58              } else {
  59                  // can't load old links, so treat all links as added.
  60                  $addedLinks = $newLinks;
  61              }
  62  
  63              wfDebugLog( 'SpamBlacklist', "Old URLs: " . implode( ', ', $oldLinks ) );
  64              wfDebugLog( 'SpamBlacklist', "New URLs: " . implode( ', ', $newLinks ) );
  65              wfDebugLog( 'SpamBlacklist', "Added URLs: " . implode( ', ', $addedLinks ) );
  66  
  67              $links = implode( "\n", $addedLinks );
  68  
  69              # Strip whitelisted URLs from the match
  70              if( is_array( $whitelists ) ) {
  71                  wfDebugLog( 'SpamBlacklist', "Excluding whitelisted URLs from " . count( $whitelists ) .
  72                      " regexes: " . implode( ', ', $whitelists ) . "\n" );
  73                  foreach( $whitelists as $regex ) {
  74                      wfSuppressWarnings();
  75                      $newLinks = preg_replace( $regex, '', $links );
  76                      wfRestoreWarnings();
  77                      if( is_string( $newLinks ) ) {
  78                          // If there wasn't a regex error, strip the matching URLs
  79                          $links = $newLinks;
  80                      }
  81                  }
  82              }
  83  
  84              # Do the match
  85              wfDebugLog( 'SpamBlacklist', "Checking text against " . count( $blacklists ) .
  86                  " regexes: " . implode( ', ', $blacklists ) . "\n" );
  87              $retVal = false;
  88              foreach( $blacklists as $regex ) {
  89                  wfSuppressWarnings();
  90                  $matches = array();
  91                  $check = ( preg_match_all( $regex, $links, $matches ) > 0 );
  92                  wfRestoreWarnings();
  93                  if( $check ) {
  94                      wfDebugLog( 'SpamBlacklist', "Match!\n" );
  95                      global $wgRequest;
  96                      $ip = $wgRequest->getIP();
  97                      $imploded = implode( ' ', $matches[0] );
  98                      wfDebugLog( 'SpamBlacklistHit', "$ip caught submitting spam: $imploded\n" );
  99                      if( !$preventLog ) {
 100                          $this->logFilterHit( $title, $imploded ); // Log it
 101                      }
 102                      if( $retVal === false ){
 103                          $retVal = array();
 104                      }
 105                      $retVal = array_merge( $retVal, $matches[1] );
 106                  }
 107              }
 108              if ( is_array( $retVal ) ) {
 109                  $retVal = array_unique( $retVal );
 110              }
 111          } else {
 112              $retVal = false;
 113          }
 114          wfProfileOut( $fname );
 115          return $retVal;
 116      }
 117  
 118      /**
 119       * Look up the links currently in the article, so we can
 120       * ignore them on a second run.
 121       *
 122       * WARNING: I can add more *of the same link* with no problem here.
 123       * @param $title Title
 124       * @return array
 125       */
 126  	function getCurrentLinks( $title ) {
 127          $dbr = wfGetDB( DB_SLAVE );
 128          $id = $title->getArticleID(); // should be zero queries
 129          $res = $dbr->select( 'externallinks', array( 'el_to' ),
 130              array( 'el_from' => $id ), __METHOD__ );
 131          $links = array();
 132          foreach ( $res as $row ) {
 133              $links[] = $row->el_to;
 134          }
 135          return $links;
 136      }
 137  
 138      /**
 139       * Returns the start of the regex for matches
 140       *
 141       * @return string
 142       */
 143  	public function getRegexStart() {
 144          return '/(?:https?:)?\/\/+[a-z0-9_\-.]*(';
 145      }
 146  
 147      /**
 148       * Returns the end of the regex for matches
 149       *
 150       * @param $batchSize
 151       * @return string
 152       */
 153  	public function getRegexEnd( $batchSize ) {
 154          return ')' . parent::getRegexEnd( $batchSize );
 155      }
 156      /**
 157       * Logs the filter hit to Special:Log if
 158       * $wgLogSpamBlacklistHits is enabled.
 159       *
 160       * @param Title $title
 161       * @param string $url URL that the user attempted to add
 162       */
 163  	public function logFilterHit( $title, $url ) {
 164          global $wgUser, $wgLogSpamBlacklistHits;
 165          if ( $wgLogSpamBlacklistHits ) {
 166              $logEntry = new ManualLogEntry( 'spamblacklist', 'hit' );
 167              $logEntry->setPerformer( $wgUser );
 168              $logEntry->setTarget( $title );
 169              $logEntry->setParameters( array(
 170                  '4::url' => $url,
 171              ) );
 172              $logid = $logEntry->insert();
 173              $logEntry->publish( $logid, "rc" );
 174          }
 175      }
 176  }


Generated: Fri Nov 28 14:03:12 2014 Cross-referenced by PHPXref 0.7.1