MediaWiki  REL1_22
SquidUpdate.php
Go to the documentation of this file.
00001 <?php
00028 class SquidUpdate {
00029 
00034     protected $urlArr;
00035 
00040     public function __construct( $urlArr = array(), $maxTitles = false ) {
00041         global $wgMaxSquidPurgeTitles;
00042         if ( $maxTitles === false ) {
00043             $maxTitles = $wgMaxSquidPurgeTitles;
00044         }
00045 
00046         // Remove duplicate URLs from list
00047         $urlArr = array_unique( $urlArr );
00048         if ( count( $urlArr ) > $maxTitles ) {
00049             // Truncate to desired maximum URL count
00050             $urlArr = array_slice( $urlArr, 0, $maxTitles );
00051         }
00052         $this->urlArr = $urlArr;
00053     }
00054 
00064     public static function newFromLinksTo( Title $title ) {
00065         global $wgMaxSquidPurgeTitles;
00066         wfProfileIn( __METHOD__ );
00067 
00068         # Get a list of URLs linking to this page
00069         $dbr = wfGetDB( DB_SLAVE );
00070         $res = $dbr->select( array( 'links', 'page' ),
00071             array( 'page_namespace', 'page_title' ),
00072             array(
00073                 'pl_namespace' => $title->getNamespace(),
00074                 'pl_title' => $title->getDBkey(),
00075                 'pl_from=page_id' ),
00076             __METHOD__ );
00077         $blurlArr = $title->getSquidURLs();
00078         if ( $res->numRows() <= $wgMaxSquidPurgeTitles ) {
00079             foreach ( $res as $BL ) {
00080                 $tobj = Title::makeTitle( $BL->page_namespace, $BL->page_title );
00081                 $blurlArr[] = $tobj->getInternalURL();
00082             }
00083         }
00084 
00085         wfProfileOut( __METHOD__ );
00086         return new SquidUpdate( $blurlArr );
00087     }
00088 
00096     public static function newFromTitles( $titles, $urlArr = array() ) {
00097         global $wgMaxSquidPurgeTitles;
00098         $i = 0;
00099         foreach ( $titles as $title ) {
00100             $urlArr[] = $title->getInternalURL();
00101             if ( $i++ > $wgMaxSquidPurgeTitles ) {
00102                 break;
00103             }
00104         }
00105         return new SquidUpdate( $urlArr );
00106     }
00107 
00112     public static function newSimplePurge( Title $title ) {
00113         $urlArr = $title->getSquidURLs();
00114         return new SquidUpdate( $urlArr );
00115     }
00116 
00120     public function doUpdate() {
00121         self::purge( $this->urlArr );
00122     }
00123 
00132     public static function purge( $urlArr ) {
00133         global $wgSquidServers, $wgHTCPRouting;
00134 
00135         if ( !$urlArr ) {
00136             return;
00137         }
00138 
00139         wfDebugLog( 'squid', __METHOD__ . ': ' . implode( ' ', $urlArr ) . "\n" );
00140 
00141         if ( $wgHTCPRouting ) {
00142             self::HTCPPurge( $urlArr );
00143         }
00144 
00145         wfProfileIn( __METHOD__ );
00146 
00147         // Remove duplicate URLs
00148         $urlArr = array_unique( $urlArr );
00149         // Maximum number of parallel connections per squid
00150         $maxSocketsPerSquid = 8;
00151         // Number of requests to send per socket
00152         // 400 seems to be a good tradeoff, opening a socket takes a while
00153         $urlsPerSocket = 400;
00154         $socketsPerSquid = ceil( count( $urlArr ) / $urlsPerSocket );
00155         if ( $socketsPerSquid > $maxSocketsPerSquid ) {
00156             $socketsPerSquid = $maxSocketsPerSquid;
00157         }
00158 
00159         $pool = new SquidPurgeClientPool;
00160         $chunks = array_chunk( $urlArr, ceil( count( $urlArr ) / $socketsPerSquid ) );
00161         foreach ( $wgSquidServers as $server ) {
00162             foreach ( $chunks as $chunk ) {
00163                 $client = new SquidPurgeClient( $server );
00164                 foreach ( $chunk as $url ) {
00165                     $client->queuePurge( $url );
00166                 }
00167                 $pool->addClient( $client );
00168             }
00169         }
00170         $pool->run();
00171 
00172         wfProfileOut( __METHOD__ );
00173     }
00174 
00181     public static function HTCPPurge( $urlArr ) {
00182         global $wgHTCPRouting, $wgHTCPMulticastTTL;
00183         wfProfileIn( __METHOD__ );
00184 
00185         // HTCP CLR operation
00186         $htcpOpCLR = 4;
00187 
00188         // @todo FIXME: PHP doesn't support these socket constants (include/linux/in.h)
00189         if ( !defined( "IPPROTO_IP" ) ) {
00190             define( "IPPROTO_IP", 0 );
00191             define( "IP_MULTICAST_LOOP", 34 );
00192             define( "IP_MULTICAST_TTL", 33 );
00193         }
00194 
00195         // pfsockopen doesn't work because we need set_sock_opt
00196         $conn = socket_create( AF_INET, SOCK_DGRAM, SOL_UDP );
00197         if ( ! $conn ) {
00198             $errstr = socket_strerror( socket_last_error() );
00199             wfDebugLog( 'squid', __METHOD__ .
00200                 ": Error opening UDP socket: $errstr\n" );
00201             wfProfileOut( __METHOD__ );
00202             return;
00203         }
00204 
00205         // Set socket options
00206         socket_set_option( $conn, IPPROTO_IP, IP_MULTICAST_LOOP, 0 );
00207         if ( $wgHTCPMulticastTTL != 1 ) {
00208             // Set multicast time to live (hop count) option on socket
00209             socket_set_option( $conn, IPPROTO_IP, IP_MULTICAST_TTL,
00210                 $wgHTCPMulticastTTL );
00211         }
00212 
00213         // Remove duplicate URLs from collection
00214         $urlArr = array_unique( $urlArr );
00215         foreach ( $urlArr as $url ) {
00216             if ( !is_string( $url ) ) {
00217                 wfProfileOut( __METHOD__ );
00218                 throw new MWException( 'Bad purge URL' );
00219             }
00220             $url = self::expand( $url );
00221             $conf = self::getRuleForURL( $url, $wgHTCPRouting );
00222             if ( !$conf ) {
00223                 wfDebugLog( 'squid', __METHOD__ .
00224                     "No HTCP rule configured for URL {$url} , skipping\n" );
00225                 continue;
00226             }
00227 
00228             if ( isset( $conf['host'] ) && isset( $conf['port'] ) ) {
00229                 // Normalize single entries
00230                 $conf = array( $conf );
00231             }
00232             foreach ( $conf as $subconf ) {
00233                 if ( !isset( $subconf['host'] ) || !isset( $subconf['port'] ) ) {
00234                     wfProfileOut( __METHOD__ );
00235                     throw new MWException( "Invalid HTCP rule for URL $url\n" );
00236                 }
00237             }
00238 
00239             // Construct a minimal HTCP request diagram
00240             // as per RFC 2756
00241             // Opcode 'CLR', no response desired, no auth
00242             $htcpTransID = rand();
00243 
00244             $htcpSpecifier = pack( 'na4na*na8n',
00245                 4, 'HEAD', strlen( $url ), $url,
00246                 8, 'HTTP/1.0', 0 );
00247 
00248             $htcpDataLen = 8 + 2 + strlen( $htcpSpecifier );
00249             $htcpLen = 4 + $htcpDataLen + 2;
00250 
00251             // Note! Squid gets the bit order of the first
00252             // word wrong, wrt the RFC. Apparently no other
00253             // implementation exists, so adapt to Squid
00254             $htcpPacket = pack( 'nxxnCxNxxa*n',
00255                 $htcpLen, $htcpDataLen, $htcpOpCLR,
00256                 $htcpTransID, $htcpSpecifier, 2 );
00257 
00258             wfDebugLog( 'squid', __METHOD__ .
00259                 "Purging URL $url via HTCP\n" );
00260             foreach ( $conf as $subconf ) {
00261                 socket_sendto( $conn, $htcpPacket, $htcpLen, 0,
00262                     $subconf['host'], $subconf['port'] );
00263             }
00264         }
00265         wfProfileOut( __METHOD__ );
00266     }
00267 
00282     public static function expand( $url ) {
00283         return wfExpandUrl( $url, PROTO_INTERNAL );
00284     }
00285 
00292     private static function getRuleForURL( $url, $rules ) {
00293         foreach ( $rules as $regex => $routing ) {
00294             if ( $regex === '' || preg_match( $regex, $url ) ) {
00295                 return $routing;
00296             }
00297         }
00298         return false;
00299     }
00300 }