[ Index ]

PHP Cross Reference of MediaWiki-1.24.0

title

Body

[close]

/includes/ -> LinkFilter.php (source)

   1  <?php
   2  /**
   3   * Functions to help implement an external link filter for spam control.
   4   *
   5   * This program is free software; you can redistribute it and/or modify
   6   * it under the terms of the GNU General Public License as published by
   7   * the Free Software Foundation; either version 2 of the License, or
   8   * (at your option) any later version.
   9   *
  10   * This program is distributed in the hope that it will be useful,
  11   * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13   * GNU General Public License for more details.
  14   *
  15   * You should have received a copy of the GNU General Public License along
  16   * with this program; if not, write to the Free Software Foundation, Inc.,
  17   * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  18   * http://www.gnu.org/copyleft/gpl.html
  19   *
  20   * @file
  21   */
  22  
  23  /**
  24   * Some functions to help implement an external link filter for spam control.
  25   *
  26   * @todo implement the filter. Currently these are just some functions to help
  27   * maintenance/cleanupSpam.php remove links to a single specified domain. The
  28   * next thing is to implement functions for checking a given page against a big
  29   * list of domains.
  30   *
  31   * Another cool thing to do would be a web interface for fast spam removal.
  32   */
  33  class LinkFilter {
  34  
  35      /**
  36       * Check whether $content contains a link to $filterEntry
  37       *
  38       * @param Content $content Content to check
  39       * @param string $filterEntry Domainparts, see makeRegex() for more details
  40       * @return int 0 if no match or 1 if there's at least one match
  41       */
  42  	static function matchEntry( Content $content, $filterEntry ) {
  43          if ( !( $content instanceof TextContent ) ) {
  44              //TODO: handle other types of content too.
  45              //      Maybe create ContentHandler::matchFilter( LinkFilter ).
  46              //      Think about a common base class for LinkFilter and MagicWord.
  47              return 0;
  48          }
  49  
  50          $text = $content->getNativeData();
  51  
  52          $regex = LinkFilter::makeRegex( $filterEntry );
  53          return preg_match( $regex, $text );
  54      }
  55  
  56      /**
  57       * Builds a regex pattern for $filterEntry.
  58       *
  59       * @param string $filterEntry URL, if it begins with "*.", it'll be
  60       *        replaced to match any subdomain
  61       * @return string Regex pattern, for preg_match()
  62       */
  63  	private static function makeRegex( $filterEntry ) {
  64          $regex = '!http://';
  65          if ( substr( $filterEntry, 0, 2 ) == '*.' ) {
  66              $regex .= '(?:[A-Za-z0-9.-]+\.|)';
  67              $filterEntry = substr( $filterEntry, 2 );
  68          }
  69          $regex .= preg_quote( $filterEntry, '!' ) . '!Si';
  70          return $regex;
  71      }
  72  
  73      /**
  74       * Make an array to be used for calls to DatabaseBase::buildLike(), which
  75       * will match the specified string. There are several kinds of filter entry:
  76       *     *.domain.com    -  Produces http://com.domain.%, matches domain.com
  77       *                        and www.domain.com
  78       *     domain.com      -  Produces http://com.domain./%, matches domain.com
  79       *                        or domain.com/ but not www.domain.com
  80       *     *.domain.com/x  -  Produces http://com.domain.%/x%, matches
  81       *                        www.domain.com/xy
  82       *     domain.com/x    -  Produces http://com.domain./x%, matches
  83       *                        domain.com/xy but not www.domain.com/xy
  84       *
  85       * Asterisks in any other location are considered invalid.
  86       *
  87       * This function does the same as wfMakeUrlIndexes(), except it also takes care
  88       * of adding wildcards
  89       *
  90       * @param string $filterEntry Domainparts
  91       * @param string $protocol Protocol (default http://)
  92       * @return array Array to be passed to DatabaseBase::buildLike() or false on error
  93       */
  94  	public static function makeLikeArray( $filterEntry, $protocol = 'http://' ) {
  95          $db = wfGetDB( DB_MASTER );
  96  
  97          $target = $protocol . $filterEntry;
  98          $bits = wfParseUrl( $target );
  99  
 100          if ( $bits == false ) {
 101              // Unknown protocol?
 102              return false;
 103          }
 104  
 105          if ( substr( $bits['host'], 0, 2 ) == '*.' ) {
 106              $subdomains = true;
 107              $bits['host'] = substr( $bits['host'], 2 );
 108              if ( $bits['host'] == '' ) {
 109                  // We don't want to make a clause that will match everything,
 110                  // that could be dangerous
 111                  return false;
 112              }
 113          } else {
 114              $subdomains = false;
 115          }
 116  
 117          // Reverse the labels in the hostname, convert to lower case
 118          // For emails reverse domainpart only
 119          if ( $bits['scheme'] === 'mailto' && strpos( $bits['host'], '@' ) ) {
 120              // complete email address
 121              $mailparts = explode( '@', $bits['host'] );
 122              $domainpart = strtolower( implode( '.', array_reverse( explode( '.', $mailparts[1] ) ) ) );
 123              $bits['host'] = $domainpart . '@' . $mailparts[0];
 124          } elseif ( $bits['scheme'] === 'mailto' ) {
 125              // domainpart of email address only, do not add '.'
 126              $bits['host'] = strtolower( implode( '.', array_reverse( explode( '.', $bits['host'] ) ) ) );
 127          } else {
 128              $bits['host'] = strtolower( implode( '.', array_reverse( explode( '.', $bits['host'] ) ) ) );
 129              if ( substr( $bits['host'], -1, 1 ) !== '.' ) {
 130                  $bits['host'] .= '.';
 131              }
 132          }
 133  
 134          $like[] = $bits['scheme'] . $bits['delimiter'] . $bits['host'];
 135  
 136          if ( $subdomains ) {
 137              $like[] = $db->anyString();
 138          }
 139  
 140          if ( isset( $bits['port'] ) ) {
 141              $like[] = ':' . $bits['port'];
 142          }
 143          if ( isset( $bits['path'] ) ) {
 144              $like[] = $bits['path'];
 145          } elseif ( !$subdomains ) {
 146              $like[] = '/';
 147          }
 148          if ( isset( $bits['query'] ) ) {
 149              $like[] = '?' . $bits['query'];
 150          }
 151          if ( isset( $bits['fragment'] ) ) {
 152              $like[] = '#' . $bits['fragment'];
 153          }
 154  
 155          // Check for stray asterisks: asterisk only allowed at the start of the domain
 156          foreach ( $like as $likepart ) {
 157              if ( !( $likepart instanceof LikeMatch ) && strpos( $likepart, '*' ) !== false ) {
 158                  return false;
 159              }
 160          }
 161  
 162          if ( !( $like[count( $like ) - 1] instanceof LikeMatch ) ) {
 163              // Add wildcard at the end if there isn't one already
 164              $like[] = $db->anyString();
 165          }
 166  
 167          return $like;
 168      }
 169  
 170      /**
 171       * Filters an array returned by makeLikeArray(), removing everything past first
 172       * pattern placeholder.
 173       *
 174       * @param array $arr Array to filter
 175       * @return array Filtered array
 176       */
 177  	public static function keepOneWildcard( $arr ) {
 178          if ( !is_array( $arr ) ) {
 179              return $arr;
 180          }
 181  
 182          foreach ( $arr as $key => $value ) {
 183              if ( $value instanceof LikeMatch ) {
 184                  return array_slice( $arr, 0, $key + 1 );
 185              }
 186          }
 187  
 188          return $arr;
 189      }
 190  }


Generated: Fri Nov 28 14:03:12 2014 Cross-referenced by PHPXref 0.7.1