[ Index ] |
PHP Cross Reference of MediaWiki-1.24.0 |
[Summary view] [Print] [Text view]
1 <?php 2 /** 3 * Functions to help implement an external link filter for spam control. 4 * 5 * This program is free software; you can redistribute it and/or modify 6 * it under the terms of the GNU General Public License as published by 7 * the Free Software Foundation; either version 2 of the License, or 8 * (at your option) any later version. 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License along 16 * with this program; if not, write to the Free Software Foundation, Inc., 17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 18 * http://www.gnu.org/copyleft/gpl.html 19 * 20 * @file 21 */ 22 23 /** 24 * Some functions to help implement an external link filter for spam control. 25 * 26 * @todo implement the filter. Currently these are just some functions to help 27 * maintenance/cleanupSpam.php remove links to a single specified domain. The 28 * next thing is to implement functions for checking a given page against a big 29 * list of domains. 30 * 31 * Another cool thing to do would be a web interface for fast spam removal. 32 */ 33 class LinkFilter { 34 35 /** 36 * Check whether $content contains a link to $filterEntry 37 * 38 * @param Content $content Content to check 39 * @param string $filterEntry Domainparts, see makeRegex() for more details 40 * @return int 0 if no match or 1 if there's at least one match 41 */ 42 static function matchEntry( Content $content, $filterEntry ) { 43 if ( !( $content instanceof TextContent ) ) { 44 //TODO: handle other types of content too. 45 // Maybe create ContentHandler::matchFilter( LinkFilter ). 46 // Think about a common base class for LinkFilter and MagicWord. 47 return 0; 48 } 49 50 $text = $content->getNativeData(); 51 52 $regex = LinkFilter::makeRegex( $filterEntry ); 53 return preg_match( $regex, $text ); 54 } 55 56 /** 57 * Builds a regex pattern for $filterEntry. 58 * 59 * @param string $filterEntry URL, if it begins with "*.", it'll be 60 * replaced to match any subdomain 61 * @return string Regex pattern, for preg_match() 62 */ 63 private static function makeRegex( $filterEntry ) { 64 $regex = '!http://'; 65 if ( substr( $filterEntry, 0, 2 ) == '*.' ) { 66 $regex .= '(?:[A-Za-z0-9.-]+\.|)'; 67 $filterEntry = substr( $filterEntry, 2 ); 68 } 69 $regex .= preg_quote( $filterEntry, '!' ) . '!Si'; 70 return $regex; 71 } 72 73 /** 74 * Make an array to be used for calls to DatabaseBase::buildLike(), which 75 * will match the specified string. There are several kinds of filter entry: 76 * *.domain.com - Produces http://com.domain.%, matches domain.com 77 * and www.domain.com 78 * domain.com - Produces http://com.domain./%, matches domain.com 79 * or domain.com/ but not www.domain.com 80 * *.domain.com/x - Produces http://com.domain.%/x%, matches 81 * www.domain.com/xy 82 * domain.com/x - Produces http://com.domain./x%, matches 83 * domain.com/xy but not www.domain.com/xy 84 * 85 * Asterisks in any other location are considered invalid. 86 * 87 * This function does the same as wfMakeUrlIndexes(), except it also takes care 88 * of adding wildcards 89 * 90 * @param string $filterEntry Domainparts 91 * @param string $protocol Protocol (default http://) 92 * @return array Array to be passed to DatabaseBase::buildLike() or false on error 93 */ 94 public static function makeLikeArray( $filterEntry, $protocol = 'http://' ) { 95 $db = wfGetDB( DB_MASTER ); 96 97 $target = $protocol . $filterEntry; 98 $bits = wfParseUrl( $target ); 99 100 if ( $bits == false ) { 101 // Unknown protocol? 102 return false; 103 } 104 105 if ( substr( $bits['host'], 0, 2 ) == '*.' ) { 106 $subdomains = true; 107 $bits['host'] = substr( $bits['host'], 2 ); 108 if ( $bits['host'] == '' ) { 109 // We don't want to make a clause that will match everything, 110 // that could be dangerous 111 return false; 112 } 113 } else { 114 $subdomains = false; 115 } 116 117 // Reverse the labels in the hostname, convert to lower case 118 // For emails reverse domainpart only 119 if ( $bits['scheme'] === 'mailto' && strpos( $bits['host'], '@' ) ) { 120 // complete email address 121 $mailparts = explode( '@', $bits['host'] ); 122 $domainpart = strtolower( implode( '.', array_reverse( explode( '.', $mailparts[1] ) ) ) ); 123 $bits['host'] = $domainpart . '@' . $mailparts[0]; 124 } elseif ( $bits['scheme'] === 'mailto' ) { 125 // domainpart of email address only, do not add '.' 126 $bits['host'] = strtolower( implode( '.', array_reverse( explode( '.', $bits['host'] ) ) ) ); 127 } else { 128 $bits['host'] = strtolower( implode( '.', array_reverse( explode( '.', $bits['host'] ) ) ) ); 129 if ( substr( $bits['host'], -1, 1 ) !== '.' ) { 130 $bits['host'] .= '.'; 131 } 132 } 133 134 $like[] = $bits['scheme'] . $bits['delimiter'] . $bits['host']; 135 136 if ( $subdomains ) { 137 $like[] = $db->anyString(); 138 } 139 140 if ( isset( $bits['port'] ) ) { 141 $like[] = ':' . $bits['port']; 142 } 143 if ( isset( $bits['path'] ) ) { 144 $like[] = $bits['path']; 145 } elseif ( !$subdomains ) { 146 $like[] = '/'; 147 } 148 if ( isset( $bits['query'] ) ) { 149 $like[] = '?' . $bits['query']; 150 } 151 if ( isset( $bits['fragment'] ) ) { 152 $like[] = '#' . $bits['fragment']; 153 } 154 155 // Check for stray asterisks: asterisk only allowed at the start of the domain 156 foreach ( $like as $likepart ) { 157 if ( !( $likepart instanceof LikeMatch ) && strpos( $likepart, '*' ) !== false ) { 158 return false; 159 } 160 } 161 162 if ( !( $like[count( $like ) - 1] instanceof LikeMatch ) ) { 163 // Add wildcard at the end if there isn't one already 164 $like[] = $db->anyString(); 165 } 166 167 return $like; 168 } 169 170 /** 171 * Filters an array returned by makeLikeArray(), removing everything past first 172 * pattern placeholder. 173 * 174 * @param array $arr Array to filter 175 * @return array Filtered array 176 */ 177 public static function keepOneWildcard( $arr ) { 178 if ( !is_array( $arr ) ) { 179 return $arr; 180 } 181 182 foreach ( $arr as $key => $value ) { 183 if ( $value instanceof LikeMatch ) { 184 return array_slice( $arr, 0, $key + 1 ); 185 } 186 } 187 188 return $arr; 189 } 190 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
Generated: Fri Nov 28 14:03:12 2014 | Cross-referenced by PHPXref 0.7.1 |