[ Index ]

PHP Cross Reference of MediaWiki-1.24.0

title

Body

[close]

/includes/libs/ -> RunningStat.php (source)

   1  <?php
   2  /**
   3   * Compute running mean, variance, and extrema of a stream of numbers.
   4   *
   5   * This program is free software; you can redistribute it and/or modify
   6   * it under the terms of the GNU General Public License as published by
   7   * the Free Software Foundation; either version 2 of the License, or
   8   * (at your option) any later version.
   9   *
  10   * This program is distributed in the hope that it will be useful,
  11   * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13   * GNU General Public License for more details.
  14   *
  15   * You should have received a copy of the GNU General Public License along
  16   * with this program; if not, write to the Free Software Foundation, Inc.,
  17   * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  18   * http://www.gnu.org/copyleft/gpl.html
  19   *
  20   * @file
  21   * @ingroup Profiler
  22   */
  23  
  24  // Needed due to PHP non-bug <https://bugs.php.net/bug.php?id=49828>.
  25  define( 'NEGATIVE_INF', -INF );
  26  
  27  /**
  28   * Represents a running summary of a stream of numbers.
  29   *
  30   * RunningStat instances are accumulator-like objects that provide a set of
  31   * continuously-updated summary statistics for a stream of numbers, without
  32   * requiring that each value be stored. The measures it provides are the
  33   * arithmetic mean, variance, standard deviation, and extrema (min and max);
  34   * together they describe the central tendency and statistical dispersion of a
  35   * set of values.
  36   *
  37   * One RunningStat instance can be merged into another; the resultant
  38   * RunningStat has the state it would have had if it had accumulated each
  39   * individual point. This allows data to be summarized in parallel and in
  40   * stages without loss of fidelity.
  41   *
  42   * Based on a C++ implementation by John D. Cook:
  43   *  <http://www.johndcook.com/standard_deviation.html>
  44   *  <http://www.johndcook.com/skewness_kurtosis.html>
  45   *
  46   * The in-line documentation for this class incorporates content from the
  47   * English Wikipedia articles "Variance", "Algorithms for calculating
  48   * variance", and "Standard deviation".
  49   *
  50   * @since 1.23
  51   */
  52  class RunningStat implements Countable {
  53  
  54      /** @var int Number of samples. **/
  55      public $n = 0;
  56  
  57      /** @var float The first moment (or mean, or expected value). **/
  58      public $m1 = 0.0;
  59  
  60      /** @var float The second central moment (or variance). **/
  61      public $m2 = 0.0;
  62  
  63      /** @var float The least value in the the set. **/
  64      public $min = INF;
  65  
  66      /** @var float The most value in the set. **/
  67      public $max = NEGATIVE_INF;
  68  
  69      /**
  70       * Count the number of accumulated values.
  71       * @return int Number of values
  72       */
  73  	public function count() {
  74          return $this->n;
  75      }
  76  
  77      /**
  78       * Add a number to the data set.
  79       * @param int|float $x Value to add
  80       */
  81  	public function push( $x ) {
  82          $x = (float) $x;
  83  
  84          $this->min = min( $this->min, $x );
  85          $this->max = max( $this->max, $x );
  86  
  87          $n1 = $this->n;
  88          $this->n += 1;
  89          $delta = $x - $this->m1;
  90          $delta_n = $delta / $this->n;
  91          $this->m1 += $delta_n;
  92          $this->m2 += $delta * $delta_n * $n1;
  93      }
  94  
  95      /**
  96       * Get the mean, or expected value.
  97       *
  98       * The arithmetic mean is the sum of all measurements divided by the number
  99       * of observations in the data set.
 100       *
 101       * @return float Mean
 102       */
 103  	public function getMean() {
 104          return $this->m1;
 105      }
 106  
 107      /**
 108       * Get the estimated variance.
 109       *
 110       * Variance measures how far a set of numbers is spread out. A small
 111       * variance indicates that the data points tend to be very close to the
 112       * mean (and hence to each other), while a high variance indicates that the
 113       * data points are very spread out from the mean and from each other.
 114       *
 115       * @return float Estimated variance
 116       */
 117  	public function getVariance() {
 118          if ( $this->n === 0 ) {
 119              // The variance of the empty set is undefined.
 120              return NAN;
 121          } elseif ( $this->n === 1 ) {
 122              return 0.0;
 123          } else {
 124              return $this->m2 / ( $this->n - 1.0 );
 125          }
 126      }
 127  
 128      /**
 129       * Get the estimated stanard deviation.
 130       *
 131       * The standard deviation of a statistical population is the square root of
 132       * its variance. It shows shows how much variation from the mean exists. In
 133       * addition to expressing the variability of a population, the standard
 134       * deviation is commonly used to measure confidence in statistical conclusions.
 135       *
 136       * @return float Estimated standard deviation
 137       */
 138  	public function getStdDev() {
 139          return sqrt( $this->getVariance() );
 140      }
 141  
 142      /**
 143       * Merge another RunningStat instance into this instance.
 144       *
 145       * This instance then has the state it would have had if all the data had
 146       * been accumulated by it alone.
 147       *
 148       * @param RunningStat RunningStat instance to merge into this one
 149       */
 150  	public function merge( RunningStat $other ) {
 151          // If the other RunningStat is empty, there's nothing to do.
 152          if ( $other->n === 0 ) {
 153              return;
 154          }
 155  
 156          // If this RunningStat is empty, copy values from other RunningStat.
 157          if ( $this->n === 0 ) {
 158              $this->n = $other->n;
 159              $this->m1 = $other->m1;
 160              $this->m2 = $other->m2;
 161              $this->min = $other->min;
 162              $this->max = $other->max;
 163              return;
 164          }
 165  
 166          $n = $this->n + $other->n;
 167          $delta = $other->m1 - $this->m1;
 168          $delta2 = $delta * $delta;
 169  
 170          $this->m1 = ( ( $this->n * $this->m1 ) + ( $other->n * $other->m1 ) ) / $n;
 171          $this->m2 = $this->m2 + $other->m2 + ( $delta2 * $this->n * $other->n / $n );
 172          $this->min = min( $this->min, $other->min );
 173          $this->max = max( $this->max, $other->max );
 174          $this->n = $n;
 175      }
 176  }


Generated: Fri Nov 28 14:03:12 2014 Cross-referenced by PHPXref 0.7.1