MediaWiki
REL1_19
|
00001 <?php 00024 require_once( dirname( __FILE__ ) . '/Maintenance.php' ); 00025 00026 class CleanupSpam extends Maintenance { 00027 public function __construct() { 00028 parent::__construct(); 00029 $this->mDescription = "Cleanup all spam from a given hostname"; 00030 $this->addOption( 'all', 'Check all wikis in $wgLocalDatabases' ); 00031 $this->addArg( 'hostname', 'Hostname that was spamming' ); 00032 } 00033 00034 public function execute() { 00035 global $wgLocalDatabases, $wgUser; 00036 00037 $username = wfMsg( 'spambot_username' ); 00038 $wgUser = User::newFromName( $username ); 00039 if ( !$wgUser ) { 00040 $this->error( "Invalid username", true ); 00041 } 00042 // Create the user if necessary 00043 if ( !$wgUser->getId() ) { 00044 $wgUser->addToDatabase(); 00045 } 00046 $spec = $this->getArg(); 00047 $like = LinkFilter::makeLikeArray( $spec ); 00048 if ( !$like ) { 00049 $this->error( "Not a valid hostname specification: $spec", true ); 00050 } 00051 00052 if ( $this->hasOption( 'all' ) ) { 00053 // Clean up spam on all wikis 00054 $this->output( "Finding spam on " . count( $wgLocalDatabases ) . " wikis\n" ); 00055 $found = false; 00056 foreach ( $wgLocalDatabases as $wikiID ) { 00057 $dbr = wfGetDB( DB_SLAVE, array(), $wikiID ); 00058 00059 $count = $dbr->selectField( 'externallinks', 'COUNT(*)', 00060 array( 'el_index' . $dbr->buildLike( $like ) ), __METHOD__ ); 00061 if ( $count ) { 00062 $found = true; 00063 passthru( "php cleanupSpam.php --wiki='$wikiID' $spec | sed 's/^/$wikiID: /'" ); 00064 } 00065 } 00066 if ( $found ) { 00067 $this->output( "All done\n" ); 00068 } else { 00069 $this->output( "None found\n" ); 00070 } 00071 } else { 00072 // Clean up spam on this wiki 00073 00074 $dbr = wfGetDB( DB_SLAVE ); 00075 $res = $dbr->select( 'externallinks', array( 'DISTINCT el_from' ), 00076 array( 'el_index' . $dbr->buildLike( $like ) ), __METHOD__ ); 00077 $count = $dbr->numRows( $res ); 00078 $this->output( "Found $count articles containing $spec\n" ); 00079 foreach ( $res as $row ) { 00080 $this->cleanupArticle( $row->el_from, $spec ); 00081 } 00082 if ( $count ) { 00083 $this->output( "Done\n" ); 00084 } 00085 } 00086 } 00087 00088 private function cleanupArticle( $id, $domain ) { 00089 $title = Title::newFromID( $id ); 00090 if ( !$title ) { 00091 $this->error( "Internal error: no page for ID $id" ); 00092 return; 00093 } 00094 00095 $this->output( $title->getPrefixedDBkey() . " ..." ); 00096 $rev = Revision::newFromTitle( $title ); 00097 $currentRevId = $rev->getId(); 00098 00099 while ( $rev && ( $rev->isDeleted( Revision::DELETED_TEXT ) || LinkFilter::matchEntry( $rev->getText() , $domain ) ) ) { 00100 $rev = $rev->getPrevious(); 00101 } 00102 00103 if ( $rev && $rev->getId() == $currentRevId ) { 00104 // The regex didn't match the current article text 00105 // This happens e.g. when a link comes from a template rather than the page itself 00106 $this->output( "False match\n" ); 00107 } else { 00108 $dbw = wfGetDB( DB_MASTER ); 00109 $dbw->begin(); 00110 $page = WikiPage::factory( $title ); 00111 if ( !$rev ) { 00112 // Didn't find a non-spammy revision, blank the page 00113 $this->output( "blanking\n" ); 00114 $page->doEdit( '', wfMsgForContent( 'spam_blanking', $domain ) ); 00115 } else { 00116 // Revert to this revision 00117 $this->output( "reverting\n" ); 00118 $page->doEdit( $rev->getText(), wfMsgForContent( 'spam_reverting', $domain ), 00119 EDIT_UPDATE, $rev->getId() ); 00120 } 00121 $dbw->commit(); 00122 } 00123 } 00124 } 00125 00126 $maintClass = "CleanupSpam"; 00127 require_once( RUN_MAINTENANCE_IF_MAIN );