[ Index ] |
PHP Cross Reference of MediaWiki-1.24.0 |
[Summary view] [Print] [Text view]
1 <?php 2 /** 3 * Compute running mean, variance, and extrema of a stream of numbers. 4 * 5 * This program is free software; you can redistribute it and/or modify 6 * it under the terms of the GNU General Public License as published by 7 * the Free Software Foundation; either version 2 of the License, or 8 * (at your option) any later version. 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License along 16 * with this program; if not, write to the Free Software Foundation, Inc., 17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 18 * http://www.gnu.org/copyleft/gpl.html 19 * 20 * @file 21 * @ingroup Profiler 22 */ 23 24 // Needed due to PHP non-bug <https://bugs.php.net/bug.php?id=49828>. 25 define( 'NEGATIVE_INF', -INF ); 26 27 /** 28 * Represents a running summary of a stream of numbers. 29 * 30 * RunningStat instances are accumulator-like objects that provide a set of 31 * continuously-updated summary statistics for a stream of numbers, without 32 * requiring that each value be stored. The measures it provides are the 33 * arithmetic mean, variance, standard deviation, and extrema (min and max); 34 * together they describe the central tendency and statistical dispersion of a 35 * set of values. 36 * 37 * One RunningStat instance can be merged into another; the resultant 38 * RunningStat has the state it would have had if it had accumulated each 39 * individual point. This allows data to be summarized in parallel and in 40 * stages without loss of fidelity. 41 * 42 * Based on a C++ implementation by John D. Cook: 43 * <http://www.johndcook.com/standard_deviation.html> 44 * <http://www.johndcook.com/skewness_kurtosis.html> 45 * 46 * The in-line documentation for this class incorporates content from the 47 * English Wikipedia articles "Variance", "Algorithms for calculating 48 * variance", and "Standard deviation". 49 * 50 * @since 1.23 51 */ 52 class RunningStat implements Countable { 53 54 /** @var int Number of samples. **/ 55 public $n = 0; 56 57 /** @var float The first moment (or mean, or expected value). **/ 58 public $m1 = 0.0; 59 60 /** @var float The second central moment (or variance). **/ 61 public $m2 = 0.0; 62 63 /** @var float The least value in the the set. **/ 64 public $min = INF; 65 66 /** @var float The most value in the set. **/ 67 public $max = NEGATIVE_INF; 68 69 /** 70 * Count the number of accumulated values. 71 * @return int Number of values 72 */ 73 public function count() { 74 return $this->n; 75 } 76 77 /** 78 * Add a number to the data set. 79 * @param int|float $x Value to add 80 */ 81 public function push( $x ) { 82 $x = (float) $x; 83 84 $this->min = min( $this->min, $x ); 85 $this->max = max( $this->max, $x ); 86 87 $n1 = $this->n; 88 $this->n += 1; 89 $delta = $x - $this->m1; 90 $delta_n = $delta / $this->n; 91 $this->m1 += $delta_n; 92 $this->m2 += $delta * $delta_n * $n1; 93 } 94 95 /** 96 * Get the mean, or expected value. 97 * 98 * The arithmetic mean is the sum of all measurements divided by the number 99 * of observations in the data set. 100 * 101 * @return float Mean 102 */ 103 public function getMean() { 104 return $this->m1; 105 } 106 107 /** 108 * Get the estimated variance. 109 * 110 * Variance measures how far a set of numbers is spread out. A small 111 * variance indicates that the data points tend to be very close to the 112 * mean (and hence to each other), while a high variance indicates that the 113 * data points are very spread out from the mean and from each other. 114 * 115 * @return float Estimated variance 116 */ 117 public function getVariance() { 118 if ( $this->n === 0 ) { 119 // The variance of the empty set is undefined. 120 return NAN; 121 } elseif ( $this->n === 1 ) { 122 return 0.0; 123 } else { 124 return $this->m2 / ( $this->n - 1.0 ); 125 } 126 } 127 128 /** 129 * Get the estimated stanard deviation. 130 * 131 * The standard deviation of a statistical population is the square root of 132 * its variance. It shows shows how much variation from the mean exists. In 133 * addition to expressing the variability of a population, the standard 134 * deviation is commonly used to measure confidence in statistical conclusions. 135 * 136 * @return float Estimated standard deviation 137 */ 138 public function getStdDev() { 139 return sqrt( $this->getVariance() ); 140 } 141 142 /** 143 * Merge another RunningStat instance into this instance. 144 * 145 * This instance then has the state it would have had if all the data had 146 * been accumulated by it alone. 147 * 148 * @param RunningStat RunningStat instance to merge into this one 149 */ 150 public function merge( RunningStat $other ) { 151 // If the other RunningStat is empty, there's nothing to do. 152 if ( $other->n === 0 ) { 153 return; 154 } 155 156 // If this RunningStat is empty, copy values from other RunningStat. 157 if ( $this->n === 0 ) { 158 $this->n = $other->n; 159 $this->m1 = $other->m1; 160 $this->m2 = $other->m2; 161 $this->min = $other->min; 162 $this->max = $other->max; 163 return; 164 } 165 166 $n = $this->n + $other->n; 167 $delta = $other->m1 - $this->m1; 168 $delta2 = $delta * $delta; 169 170 $this->m1 = ( ( $this->n * $this->m1 ) + ( $other->n * $other->m1 ) ) / $n; 171 $this->m2 = $this->m2 + $other->m2 + ( $delta2 * $this->n * $other->n / $n ); 172 $this->min = min( $this->min, $other->min ); 173 $this->max = max( $this->max, $other->max ); 174 $this->n = $n; 175 } 176 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
Generated: Fri Nov 28 14:03:12 2014 | Cross-referenced by PHPXref 0.7.1 |