[ Index ] |
PHP Cross Reference of MediaWiki-1.24.0 |
[Summary view] [Print] [Text view]
1 <?php 2 3 /** 4 * An aggressive spam cleanup script. 5 * Searches the database for matching pages, and reverts them to the last non-spammed revision. 6 * If all revisions contain spam, deletes the page 7 */ 8 9 require_once ( '../../maintenance/commandLine.inc' ); 10 require_once ( 'SpamBlacklist_body.php' ); 11 12 /** 13 * Find the latest revision of the article that does not contain spam and revert to it 14 */ 15 function cleanupArticle( Revision $rev, $regexes, $match ) { 16 $title = $rev->getTitle(); 17 $revId = $rev->getId(); 18 while ( $rev ) { 19 $matches = false; 20 foreach ( $regexes as $regex ) { 21 $matches = $matches || preg_match( $regex, $rev->getText() ); 22 } 23 if ( !$matches ) { 24 // Didn't find any spam 25 break; 26 } 27 # Revision::getPrevious can't be used in this way before MW 1.6 (Revision.php 1.26) 28 #$rev = $rev->getPrevious(); 29 $revId = $title->getPreviousRevisionID( $revId ); 30 if ( $revId ) { 31 $rev = Revision::newFromTitle( $title, $revId ); 32 } else { 33 $rev = false; 34 } 35 } 36 $dbw = wfGetDB( DB_MASTER ); 37 $dbw->begin(); 38 if ( !$rev ) { 39 // Didn't find a non-spammy revision, delete the page 40 /* 41 print "All revisions are spam, deleting...\n"; 42 $article = new Article( $title ); 43 $article->doDeleteArticle( "All revisions matched the spam blacklist" ); 44 */ 45 // Too scary, blank instead 46 print "All revisions are spam, blanking...\n"; 47 $text = ''; 48 $comment = "All revisions matched the spam blacklist ($match), blanking"; 49 } else { 50 // Revert to this revision 51 $text = $rev->getText(); 52 $comment = "Cleaning up links to $match"; 53 } 54 $wikiPage = new WikiPage( $title ); 55 $wikiPage->doEdit( $text, $comment ); 56 $dbw->commit(); 57 } 58 59 //------------------------------------------------------------------------------ 60 61 $username = 'Spam cleanup script'; 62 $wgUser = User::newFromName( $username ); 63 if ( $wgUser->idForName() == 0 ) { 64 // Create the user 65 $status = $wgUser->addToDatabase(); 66 if ( $status === null || $status->isOK() ) { 67 $dbw = wfGetDB( DB_MASTER ); 68 $dbw->update( 'user', array( 'user_password' => 'nologin' ), 69 array( 'user_name' => $username ), $username ); 70 } 71 } 72 73 if ( isset( $options['n'] ) ) { 74 $dryRun = true; 75 } else { 76 $dryRun = false; 77 } 78 79 $sb = new SpamBlacklist( $wgSpamBlacklistSettings ); 80 if ( $wgSpamBlacklistFiles ) { 81 $sb->files = $wgSpamBlacklistFiles; 82 } 83 $regexes = $sb->getBlacklists(); 84 if ( !$regexes ) { 85 print "Invalid regex, can't clean up spam\n"; 86 exit( 1 ); 87 } 88 89 $dbr = wfGetDB( DB_SLAVE ); 90 $maxID = $dbr->selectField( 'page', 'MAX(page_id)' ); 91 $reportingInterval = 100; 92 93 print "Regexes are " . implode( ', ', array_map( 'count', $regexes ) ) . " bytes\n"; 94 print "Searching for spam in $maxID pages...\n"; 95 if ( $dryRun ) { 96 print "Dry run only\n"; 97 } 98 99 for ( $id = 1; $id <= $maxID; $id++ ) { 100 if ( $id % $reportingInterval == 0 ) { 101 printf( "%-8d %-5.2f%%\r", $id, $id / $maxID * 100 ); 102 } 103 $revision = Revision::loadFromPageId( $dbr, $id ); 104 if ( $revision ) { 105 $text = $revision->getText(); 106 if ( $text ) { 107 foreach ( $regexes as $regex ) { 108 if ( preg_match( $regex, $text, $matches ) ) { 109 $title = $revision->getTitle(); 110 $titleText = $title->getPrefixedText(); 111 if ( $dryRun ) { 112 print "\nFound spam in [[$titleText]]\n"; 113 } else { 114 print "\nCleaning up links to {$matches[0]} in [[$titleText]]\n"; 115 $match = str_replace( 'http://', '', $matches[0] ); 116 cleanupArticle( $revision, $regexes, $match ); 117 } 118 } 119 } 120 } 121 } 122 } 123 // Just for satisfaction 124 printf( "%-8d %-5.2f%%\n", $id - 1, ( $id - 1 ) / $maxID * 100 ); 125
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
Generated: Fri Nov 28 14:03:12 2014 | Cross-referenced by PHPXref 0.7.1 |