[ Index ]

PHP Cross Reference of MediaWiki-1.24.0

title

Body

[close]

/includes/libs/ -> MultiHttpClient.php (source)

   1  <?php
   2  /**
   3   * HTTP service client
   4   *
   5   * This program is free software; you can redistribute it and/or modify
   6   * it under the terms of the GNU General Public License as published by
   7   * the Free Software Foundation; either version 2 of the License, or
   8   * (at your option) any later version.
   9   *
  10   * This program is distributed in the hope that it will be useful,
  11   * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13   * GNU General Public License for more details.
  14   *
  15   * You should have received a copy of the GNU General Public License along
  16   * with this program; if not, write to the Free Software Foundation, Inc.,
  17   * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  18   * http://www.gnu.org/copyleft/gpl.html
  19   *
  20   * @file
  21   */
  22  
  23  /**
  24   * Class to handle concurrent HTTP requests
  25   *
  26   * HTTP request maps are arrays that use the following format:
  27   *   - method   : GET/HEAD/PUT/POST/DELETE
  28   *   - url      : HTTP/HTTPS URL
  29   *   - query    : <query parameter field/value associative array> (uses RFC 3986)
  30   *   - headers  : <header name/value associative array>
  31   *   - body     : source to get the HTTP request body from;
  32   *                this can simply be a string (always), a resource for
  33   *                PUT requests, and a field/value array for POST request;
  34   *                array bodies are encoded as multipart/form-data and strings
  35   *                use application/x-www-form-urlencoded (headers sent automatically)
  36   *   - stream   : resource to stream the HTTP response body to
  37   * Request maps can use integer index 0 instead of 'method' and 1 instead of 'url'.
  38   *
  39   * @author Aaron Schulz
  40   * @since 1.23
  41   */
  42  class MultiHttpClient {
  43      /** @var resource */
  44      protected $multiHandle = null; // curl_multi handle
  45      /** @var string|null SSL certificates path  */
  46      protected $caBundlePath;
  47      /** @var integer */
  48      protected $connTimeout = 10;
  49      /** @var integer */
  50      protected $reqTimeout = 300;
  51      /** @var bool */
  52      protected $usePipelining = false;
  53      /** @var integer */
  54      protected $maxConnsPerHost = 50;
  55  
  56      /**
  57       * @param array $options
  58       *   - connTimeout     : default connection timeout
  59       *   - reqTimeout      : default request timeout
  60       *   - usePipelining   : whether to use HTTP pipelining if possible (for all hosts)
  61       *   - maxConnsPerHost : maximum number of concurrent connections (per host)
  62       */
  63  	public function __construct( array $options ) {
  64          if ( isset( $options['caBundlePath'] ) ) {
  65              $this->caBundlePath = $options['caBundlePath'];
  66              if ( !file_exists( $this->caBundlePath ) ) {
  67                  throw new Exception( "Cannot find CA bundle: " . $this->caBundlePath );
  68              }
  69          }
  70          static $opts = array( 'connTimeout', 'reqTimeout', 'usePipelining', 'maxConnsPerHost' );
  71          foreach ( $opts as $key ) {
  72              if ( isset( $options[$key] ) ) {
  73                  $this->$key = $options[$key];
  74              }
  75          }
  76      }
  77  
  78      /**
  79       * Execute an HTTP(S) request
  80       *
  81       * This method returns a response map of:
  82        *   - code    : HTTP response code or 0 if there was a serious cURL error
  83        *   - reason  : HTTP response reason (empty if there was a serious cURL error)
  84        *   - headers : <header name/value associative array>
  85        *   - body    : HTTP response body or resource (if "stream" was set)
  86        *   - err     : Any cURL error string
  87        * The map also stores integer-indexed copies of these values. This lets callers do:
  88       *    <code>
  89       *        list( $rcode, $rdesc, $rhdrs, $rbody, $rerr ) = $http->run( $req );
  90       *  </code>
  91       * @param array $req HTTP request array
  92       * @param array $opts
  93       *   - connTimeout    : connection timeout per request
  94       *   - reqTimeout     : post-connection timeout per request
  95       * @return array Response array for request
  96       */
  97  	final public function run( array $req, array $opts = array() ) {
  98          $req = $this->runMulti( array( $req ), $opts );
  99          return $req[0]['response'];
 100      }
 101  
 102      /**
 103       * Execute a set of HTTP(S) requests concurrently
 104       *
 105       * The maps are returned by this method with the 'response' field set to a map of:
 106        *   - code    : HTTP response code or 0 if there was a serious cURL error
 107        *   - reason  : HTTP response reason (empty if there was a serious cURL error)
 108        *   - headers : <header name/value associative array>
 109        *   - body    : HTTP response body or resource (if "stream" was set)
 110        *   - err     : Any cURL error string
 111        * The map also stores integer-indexed copies of these values. This lets callers do:
 112       *    <code>
 113       *        list( $rcode, $rdesc, $rhdrs, $rbody, $rerr ) = $req['response'];
 114       *  </code>
 115       * All headers in the 'headers' field are normalized to use lower case names.
 116       * This is true for the request headers and the response headers. Integer-indexed
 117       * method/URL entries will also be changed to use the corresponding string keys.
 118       *
 119       * @param array $reqs Map of HTTP request arrays
 120       * @param array $opts
 121       *   - connTimeout     : connection timeout per request
 122       *   - reqTimeout      : post-connection timeout per request
 123       *   - usePipelining   : whether to use HTTP pipelining if possible
 124       *   - maxConnsPerHost : maximum number of concurrent connections (per host)
 125       * @return array $reqs With response array populated for each
 126       */
 127  	public function runMulti( array $reqs, array $opts = array() ) {
 128          $chm = $this->getCurlMulti();
 129  
 130          // Normalize $reqs and add all of the required cURL handles...
 131          $handles = array();
 132          foreach ( $reqs as $index => &$req ) {
 133              $req['response'] = array(
 134                  'code'     => 0,
 135                  'reason'   => '',
 136                  'headers'  => array(),
 137                  'body'     => '',
 138                  'error'    => ''
 139              );
 140              if ( isset( $req[0] ) ) {
 141                  $req['method'] = $req[0]; // short-form
 142                  unset( $req[0] );
 143              }
 144              if ( isset( $req[1] ) ) {
 145                  $req['url'] = $req[1]; // short-form
 146                  unset( $req[1] );
 147              }
 148              if ( !isset( $req['method'] ) ) {
 149                  throw new Exception( "Request has no 'method' field set." );
 150              } elseif ( !isset( $req['url'] ) ) {
 151                  throw new Exception( "Request has no 'url' field set." );
 152              }
 153              $req['query'] = isset( $req['query'] ) ? $req['query'] : array();
 154              $headers = array(); // normalized headers
 155              if ( isset( $req['headers'] ) ) {
 156                  foreach ( $req['headers'] as $name => $value ) {
 157                      $headers[strtolower( $name )] = $value;
 158                  }
 159              }
 160              $req['headers'] = $headers;
 161              if ( !isset( $req['body'] ) ) {
 162                  $req['body'] = '';
 163                  $req['headers']['content-length'] = 0;
 164              }
 165              $handles[$index] = $this->getCurlHandle( $req, $opts );
 166              if ( count( $reqs ) > 1 ) {
 167                  // https://github.com/guzzle/guzzle/issues/349
 168                  curl_setopt( $handles[$index], CURLOPT_FORBID_REUSE, true );
 169              }
 170          }
 171          unset( $req ); // don't assign over this by accident
 172  
 173          $indexes = array_keys( $reqs );
 174          if ( function_exists( 'curl_multi_setopt' ) ) { // PHP 5.5
 175              if ( isset( $opts['usePipelining'] ) ) {
 176                  curl_multi_setopt( $chm, CURLMOPT_PIPELINING, (int)$opts['usePipelining'] );
 177              }
 178              if ( isset( $opts['maxConnsPerHost'] ) ) {
 179                  // Keep these sockets around as they may be needed later in the request
 180                  curl_multi_setopt( $chm, CURLMOPT_MAXCONNECTS, (int)$opts['maxConnsPerHost'] );
 181              }
 182          }
 183  
 184          // @TODO: use a per-host rolling handle window (e.g. CURLMOPT_MAX_HOST_CONNECTIONS)
 185          $batches = array_chunk( $indexes, $this->maxConnsPerHost );
 186  
 187          foreach ( $batches as $batch ) {
 188              // Attach all cURL handles for this batch
 189              foreach ( $batch as $index ) {
 190                  curl_multi_add_handle( $chm, $handles[$index] );
 191              }
 192              // Execute the cURL handles concurrently...
 193              $active = null; // handles still being processed
 194              do {
 195                  // Do any available work...
 196                  do {
 197                      $mrc = curl_multi_exec( $chm, $active );
 198                  } while ( $mrc == CURLM_CALL_MULTI_PERFORM );
 199                  // Wait (if possible) for available work...
 200                  if ( $active > 0 && $mrc == CURLM_OK ) {
 201                      if ( curl_multi_select( $chm, 10 ) == -1 ) {
 202                          // PHP bug 63411; http://curl.haxx.se/libcurl/c/curl_multi_fdset.html
 203                          usleep( 5000 ); // 5ms
 204                      }
 205                  }
 206              } while ( $active > 0 && $mrc == CURLM_OK );
 207          }
 208  
 209          // Remove all of the added cURL handles and check for errors...
 210          foreach ( $reqs as $index => &$req ) {
 211              $ch = $handles[$index];
 212              curl_multi_remove_handle( $chm, $ch );
 213              if ( curl_errno( $ch ) !== 0 ) {
 214                  $req['response']['error'] = "(curl error: " .
 215                      curl_errno( $ch ) . ") " . curl_error( $ch );
 216              }
 217              // For convenience with the list() operator
 218              $req['response'][0] = $req['response']['code'];
 219              $req['response'][1] = $req['response']['reason'];
 220              $req['response'][2] = $req['response']['headers'];
 221              $req['response'][3] = $req['response']['body'];
 222              $req['response'][4] = $req['response']['error'];
 223              curl_close( $ch );
 224              // Close any string wrapper file handles
 225              if ( isset( $req['_closeHandle'] ) ) {
 226                  fclose( $req['_closeHandle'] );
 227                  unset( $req['_closeHandle'] );
 228              }
 229          }
 230          unset( $req ); // don't assign over this by accident
 231  
 232          // Restore the default settings
 233          if ( function_exists( 'curl_multi_setopt' ) ) { // PHP 5.5
 234              curl_multi_setopt( $chm, CURLMOPT_PIPELINING, (int)$this->usePipelining );
 235              curl_multi_setopt( $chm, CURLMOPT_MAXCONNECTS, (int)$this->maxConnsPerHost );
 236          }
 237  
 238          return $reqs;
 239      }
 240  
 241      /**
 242       * @param array $req HTTP request map
 243       * @param array $opts
 244       *   - connTimeout    : default connection timeout
 245       *   - reqTimeout     : default request timeout
 246       * @return resource
 247       */
 248  	protected function getCurlHandle( array &$req, array $opts = array() ) {
 249          $ch = curl_init();
 250  
 251          curl_setopt( $ch, CURLOPT_CONNECTTIMEOUT,
 252              isset( $opts['connTimeout'] ) ? $opts['connTimeout'] : $this->connTimeout );
 253          curl_setopt( $ch, CURLOPT_TIMEOUT,
 254              isset( $opts['reqTimeout'] ) ? $opts['reqTimeout'] : $this->reqTimeout );
 255          curl_setopt( $ch, CURLOPT_FOLLOWLOCATION, 1 );
 256          curl_setopt( $ch, CURLOPT_MAXREDIRS, 4 );
 257          curl_setopt( $ch, CURLOPT_HEADER, 0 );
 258          if ( !is_null( $this->caBundlePath ) ) {
 259              curl_setopt( $ch, CURLOPT_SSL_VERIFYPEER, true );
 260              curl_setopt( $ch, CURLOPT_CAINFO, $this->caBundlePath );
 261          }
 262          curl_setopt( $ch, CURLOPT_RETURNTRANSFER, 1 );
 263  
 264          $url = $req['url'];
 265          // PHP_QUERY_RFC3986 is PHP 5.4+ only
 266          $query = str_replace(
 267              array( '+', '%7E' ),
 268              array( '%20', '~' ),
 269              http_build_query( $req['query'], '', '&' )
 270          );
 271          if ( $query != '' ) {
 272              $url .= strpos( $req['url'], '?' ) === false ? "?$query" : "&$query";
 273          }
 274          curl_setopt( $ch, CURLOPT_URL, $url );
 275  
 276          curl_setopt( $ch, CURLOPT_CUSTOMREQUEST, $req['method'] );
 277          if ( $req['method'] === 'HEAD' ) {
 278              curl_setopt( $ch, CURLOPT_NOBODY, 1 );
 279          }
 280  
 281          if ( $req['method'] === 'PUT' ) {
 282              curl_setopt( $ch, CURLOPT_PUT, 1 );
 283              if ( is_resource( $req['body'] ) ) {
 284                  curl_setopt( $ch, CURLOPT_INFILE, $req['body'] );
 285                  if ( isset( $req['headers']['content-length'] ) ) {
 286                      curl_setopt( $ch, CURLOPT_INFILESIZE, $req['headers']['content-length'] );
 287                  } elseif ( isset( $req['headers']['transfer-encoding'] ) &&
 288                      $req['headers']['transfer-encoding'] === 'chunks'
 289                  ) {
 290                      curl_setopt( $ch, CURLOPT_UPLOAD, true );
 291                  } else {
 292                      throw new Exception( "Missing 'Content-Length' or 'Transfer-Encoding' header." );
 293                  }
 294              } elseif ( $req['body'] !== '' ) {
 295                  $fp = fopen( "php://temp", "wb+" );
 296                  fwrite( $fp, $req['body'], strlen( $req['body'] ) );
 297                  rewind( $fp );
 298                  curl_setopt( $ch, CURLOPT_INFILE, $fp );
 299                  curl_setopt( $ch, CURLOPT_INFILESIZE, strlen( $req['body'] ) );
 300                  $req['_closeHandle'] = $fp; // remember to close this later
 301              } else {
 302                  curl_setopt( $ch, CURLOPT_INFILESIZE, 0 );
 303              }
 304              curl_setopt( $ch, CURLOPT_READFUNCTION,
 305                  function ( $ch, $fd, $length ) {
 306                      $data = fread( $fd, $length );
 307                      $len = strlen( $data );
 308                      return $data;
 309                  }
 310              );
 311          } elseif ( $req['method'] === 'POST' ) {
 312              curl_setopt( $ch, CURLOPT_POST, 1 );
 313              curl_setopt( $ch, CURLOPT_POSTFIELDS, $req['body'] );
 314          } else {
 315              if ( is_resource( $req['body'] ) || $req['body'] !== '' ) {
 316                  throw new Exception( "HTTP body specified for a non PUT/POST request." );
 317              }
 318              $req['headers']['content-length'] = 0;
 319          }
 320  
 321          $headers = array();
 322          foreach ( $req['headers'] as $name => $value ) {
 323              if ( strpos( $name, ': ' ) ) {
 324                  throw new Exception( "Headers cannot have ':' in the name." );
 325              }
 326              $headers[] = $name . ': ' . trim( $value );
 327          }
 328          curl_setopt( $ch, CURLOPT_HTTPHEADER, $headers );
 329  
 330          curl_setopt( $ch, CURLOPT_HEADERFUNCTION,
 331              function ( $ch, $header ) use ( &$req ) {
 332                  $length = strlen( $header );
 333                  $matches = array();
 334                  if ( preg_match( "/^(HTTP\/1\.[01]) (\d{3}) (.*)/", $header, $matches ) ) {
 335                      $req['response']['code'] = (int)$matches[2];
 336                      $req['response']['reason'] = trim( $matches[3] );
 337                      return $length;
 338                  }
 339                  if ( strpos( $header, ":" ) === false ) {
 340                      return $length;
 341                  }
 342                  list( $name, $value ) = explode( ":", $header, 2 );
 343                  $req['response']['headers'][strtolower( $name )] = trim( $value );
 344                  return $length;
 345              }
 346          );
 347  
 348          if ( isset( $req['stream'] ) ) {
 349              // Don't just use CURLOPT_FILE as that might give:
 350              // curl_setopt(): cannot represent a stream of type Output as a STDIO FILE*
 351              // The callback here handles both normal files and php://temp handles.
 352              curl_setopt( $ch, CURLOPT_WRITEFUNCTION,
 353                  function ( $ch, $data ) use ( &$req ) {
 354                      return fwrite( $req['stream'], $data );
 355                  }
 356              );
 357          } else {
 358              curl_setopt( $ch, CURLOPT_WRITEFUNCTION,
 359                  function ( $ch, $data ) use ( &$req ) {
 360                      $req['response']['body'] .= $data;
 361                      return strlen( $data );
 362                  }
 363              );
 364          }
 365  
 366          return $ch;
 367      }
 368  
 369      /**
 370       * @return resource
 371       */
 372  	protected function getCurlMulti() {
 373          if ( !$this->multiHandle ) {
 374              $cmh = curl_multi_init();
 375              if ( function_exists( 'curl_multi_setopt' ) ) { // PHP 5.5
 376                  curl_multi_setopt( $cmh, CURLMOPT_PIPELINING, (int)$this->usePipelining );
 377                  curl_multi_setopt( $cmh, CURLMOPT_MAXCONNECTS, (int)$this->maxConnsPerHost );
 378              }
 379              $this->multiHandle = $cmh;
 380          }
 381          return $this->multiHandle;
 382      }
 383  
 384  	function __destruct() {
 385          if ( $this->multiHandle ) {
 386              curl_multi_close( $this->multiHandle );
 387          }
 388      }
 389  }


Generated: Fri Nov 28 14:03:12 2014 Cross-referenced by PHPXref 0.7.1