GraphLab: Distributed Graph-Parallel API  2.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
random.hpp
1 /**
2  * Copyright (c) 2009 Carnegie Mellon University.
3  * All rights reserved.
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing,
12  * software distributed under the License is distributed on an "AS
13  * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
14  * express or implied. See the License for the specific language
15  * governing permissions and limitations under the License.
16  *
17  * For more about this software visit:
18  *
19  * http://www.graphlab.ml.cmu.edu
20  *
21  */
22 
23 
24 #ifndef GRAPHLAB_RANDOM_HPP
25 #define GRAPHLAB_RANDOM_HPP
26 
27 #include <cstdlib>
28 #include <stdint.h>
29 
30 
31 #include <vector>
32 #include <limits>
33 #include <algorithm>
34 
35 #include <boost/random.hpp>
36 #include <graphlab/util/timer.hpp>
37 #include <graphlab/parallel/pthread_tools.hpp>
38 
39 namespace graphlab {
40 
41  /**
42  * \ingroup random
43  * A collection of thread safe random number routines. Each thread
44  * is assigned its own generator however assigning a seed affects
45  * all current and future generators.
46  */
47  namespace random {
48 
49 
50  ///////////////////////////////////////////////////////////////////////
51  //// Underlying generator definition
52 
53 
54 
55  namespace distributions {
56  /**
57  * The uniform distribution struct is used for partial function
58  * specialization. Generating uniform random real numbers is
59  * accomplished slightly differently than for integers.
60  * Therefore the base case is for integers and we then
61  * specialize the two real number types (floats and doubles).
62  */
63  template<typename IntType>
64  struct uniform {
65  typedef boost::uniform_int<IntType> distribution_type;
66  template<typename RealRNG, typename DiscreteRNG>
67  static inline IntType sample(RealRNG& real_rng,
68  DiscreteRNG& discrete_rng,
69  const IntType& min, const IntType& max) {
70  return distribution_type(min, max)(discrete_rng);
71  }
72  };
73  template<>
74  struct uniform<double> {
75  typedef boost::uniform_real<double> distribution_type;
76  template<typename RealRNG, typename DiscreteRNG>
77  static inline double sample(RealRNG& real_rng,
78  DiscreteRNG& discrete_rng,
79  const double& min, const double& max) {
80  return distribution_type(min, max)(real_rng);
81  }
82  };
83  template<>
84  struct uniform<float> {
85  typedef boost::uniform_real<float> distribution_type;
86  template<typename RealRNG, typename DiscreteRNG>
87  static inline float sample(RealRNG& real_rng,
88  DiscreteRNG& discrete_rng,
89  const float& min, const float& max) {
90  return distribution_type(min, max)(real_rng);
91  }
92  };
93  }; // end of namespace distributions
94 
95  /**
96  * The generator class is the base underlying type used to
97  * generate random numbers. User threads should use the functions
98  * provided in the random namespace.
99  */
100  class generator {
101  public:
102  // base Generator types
103  typedef boost::lagged_fibonacci607 real_rng_type;
104  typedef boost::mt11213b discrete_rng_type;
105  typedef boost::rand48 fast_discrete_rng_type;
106 
107  generator() {
108  time_seed();
109  }
110 
111  //! Seed the generator using the default seed
112  inline void seed() {
113  mut.lock();
114  real_rng.seed();
115  discrete_rng.seed();
116  fast_discrete_rng.seed();
117  mut.unlock();
118  }
119 
120  //! Seed the generator nondeterministically
121  void nondet_seed();
122 
123 
124  //! Seed the generator using the current time in microseconds
125  inline void time_seed() {
127  }
128 
129  //! Seed the random number generator based on a number
130  void seed(size_t number) {
131  mut.lock();
132  fast_discrete_rng.seed(number);
133  real_rng.seed(fast_discrete_rng);
134  discrete_rng.seed(fast_discrete_rng);
135  mut.unlock();
136  }
137 
138  //! Seed the generator using another generator
139  void seed(generator& other){
140  mut.lock();
141  real_rng.seed(other.real_rng);
142  discrete_rng.seed(other.discrete_rng);
143  fast_discrete_rng.seed(other.fast_discrete_rng());
144  mut.unlock();
145  }
146 
147  /**
148  * Generate a random number in the uniform real with range [min,
149  * max) or [min, max] if the number type is discrete.
150  */
151  template<typename NumType>
152  inline NumType uniform(const NumType min, const NumType max) {
153  mut.lock();
154  const NumType result = distributions::uniform<NumType>::
155  sample(real_rng, discrete_rng, min, max);
156  mut.unlock();
157  return result;
158  } // end of uniform
159 
160  /**
161  * Generate a random number in the uniform real with range [min,
162  * max) or [min, max] if the number type is discrete.
163  */
164  template<typename NumType>
165  inline NumType fast_uniform(const NumType min, const NumType max) {
166  mut.lock();
167  const NumType result = distributions::uniform<NumType>::
168  sample(real_rng, fast_discrete_rng, min, max);
169  mut.unlock();
170  return result;
171  } // end of fast_uniform
172 
173 
174  /**
175  * Generate a random number in the uniform real with range [min,
176  * max);
177  */
178  inline double gamma(const double alpha = double(1)) {
179  boost::gamma_distribution<double> gamma_dist(alpha);
180  mut.lock();
181  const double result = gamma_dist(real_rng);
182  mut.unlock();
183  return result;
184  } // end of gamma
185 
186 
187  /**
188  * Generate a gaussian random variable with zero mean and unit
189  * variance.
190  */
191  inline double gaussian(const double mean = double(0),
192  const double stdev = double(1)) {
193  boost::normal_distribution<double> normal_dist(mean,stdev);
194  mut.lock();
195  const double result = normal_dist(real_rng);
196  mut.unlock();
197  return result;
198  } // end of gaussian
199 
200  /**
201  * Generate a gaussian random variable with zero mean and unit
202  * variance.
203  */
204  inline double normal(const double mean = double(0),
205  const double stdev = double(1)) {
206  return gaussian(mean, stdev);
207  } // end of normal
208 
209 
210  inline bool bernoulli(const double p = double(0.5)) {
211  boost::bernoulli_distribution<double> dist(p);
212  mut.lock();
213  const double result(dist(discrete_rng));
214  mut.unlock();
215  return result;
216  } // end of bernoulli
217 
218  inline bool fast_bernoulli(const double p = double(0.5)) {
219  boost::bernoulli_distribution<double> dist(p);
220  mut.lock();
221  const double result(dist(fast_discrete_rng));
222  mut.unlock();
223  return result;
224  } // end of bernoulli
225 
226 
227  /**
228  * Draw a random number from a multinomial
229  */
230  template<typename Double>
231  size_t multinomial(const std::vector<Double>& prb) {
232  ASSERT_GT(prb.size(),0);
233  if (prb.size() == 1) { return 0; }
234  Double sum(0);
235  for(size_t i = 0; i < prb.size(); ++i) {
236  ASSERT_GE(prb[i], 0); // Each entry must be P[i] >= 0
237  sum += prb[i];
238  }
239  ASSERT_GT(sum, 0); // Normalizer must be positive
240  // actually draw the random number
241  const Double rnd(uniform<Double>(0,1));
242  size_t ind = 0;
243  for(Double cumsum(prb[ind]/sum);
244  rnd > cumsum && (ind+1) < prb.size();
245  cumsum += (prb[++ind]/sum));
246  return ind;
247  } // end of multinomial
248 
249 
250  /**
251  * Generate a draw from a multinomial using a CDF. This is
252  * slightly more efficient since normalization is not required
253  * and a binary search can be used.
254  */
255  template<typename Double>
256  inline size_t multinomial_cdf(const std::vector<Double>& cdf) {
257  return std::upper_bound(cdf.begin(), cdf.end(),
258  uniform<Double>(0,1)) - cdf.begin();
259 
260  } // end of multinomial_cdf
261 
262 
263  /**
264  * Construct a random permutation
265  */
266  template<typename T>
267  inline std::vector<T> permutation(const size_t nelems) {
268  std::vector<T> perm(nelems);
269  for(T i = 0; i < nelems; ++i) perm[i] = i;
270  shuffle(perm);
271  return perm;
272  } // end of construct a permutation
273 
274  /**
275  * Shuffle a standard vector
276  */
277  template<typename T>
278  void shuffle(std::vector<T>& vec) { shuffle(vec.begin(), vec.end()); }
279 
280  /**
281  * Shuffle a range using the begin and end iterators
282  */
283  template<typename Iterator>
284  void shuffle(Iterator begin, Iterator end) {
285  mut.lock();
286  shuffle_functor functor(*this);
287  std::random_shuffle(begin, end, functor);
288  mut.unlock();
289  } // end of shuffle
290 
291  private:
292  //////////////////////////////////////////////////////
293  /// Data members
294  struct shuffle_functor {
295  generator& gen;
296  inline shuffle_functor(generator& gen) : gen(gen) { }
297  inline std::ptrdiff_t operator()(std::ptrdiff_t end) {
298  return distributions::uniform<ptrdiff_t>::
299  sample(gen.real_rng, gen.fast_discrete_rng, 0, end-1);
300  }
301  };
302 
303 
304  //! The real random number generator
305  real_rng_type real_rng;
306  //! The discrete random number generator
307  discrete_rng_type discrete_rng;
308  //! The fast discrete random number generator
309  fast_discrete_rng_type fast_discrete_rng;
310  //! lock used to access local members
311  mutex mut;
312  }; // end of class generator
313 
314 
315 
316 
317 
318 
319 
320 
321 
322 
323 
324 
325 
326 
327 
328  /**
329  * \ingroup random
330  * Seed all generators using the default seed
331  */
332  void seed();
333 
334  /**
335  * \ingroup random
336  * Seed all generators using an integer
337  */
338  void seed(size_t seed_value);
339 
340  /**
341  * \ingroup random
342  * Seed all generators using a nondeterministic source
343  */
344  void nondet_seed();
345 
346  /**
347  * \ingroup random
348  * Seed all generators using the current time in microseconds
349  */
350  void time_seed();
351 
352 
353  /**
354  * \ingroup random
355  * Get the local generator
356  */
357  generator& get_source();
358 
359  /**
360  * \ingroup random
361  * Generate a random number in the uniform real with range [min,
362  * max) or [min, max] if the number type is discrete.
363  */
364  template<typename NumType>
365  inline NumType uniform(const NumType min, const NumType max) {
366  if (min == max) return min;
367  return get_source().uniform<NumType>(min, max);
368  } // end of uniform
369 
370  /**
371  * \ingroup random
372  * Generate a random number in the uniform real with range [min,
373  * max) or [min, max] if the number type is discrete.
374  */
375  template<typename NumType>
376  inline NumType fast_uniform(const NumType min, const NumType max) {
377  if (min == max) return min;
378  return get_source().fast_uniform<NumType>(min, max);
379  } // end of fast_uniform
380 
381  /**
382  * \ingroup random
383  * Generate a random number between 0 and 1
384  */
385  inline double rand01() { return uniform<double>(0, 1); }
386 
387  /**
388  * \ingroup random
389  * Simulates the standard rand function as defined in cstdlib
390  */
391  inline int rand() { return fast_uniform(0, RAND_MAX); }
392 
393 
394  /**
395  * \ingroup random
396  * Generate a random number from a gamma distribution.
397  */
398  inline double gamma(const double alpha = double(1)) {
399  return get_source().gamma(alpha);
400  }
401 
402 
403 
404  /**
405  * \ingroup random
406  * Generate a gaussian random variable with zero mean and unit
407  * standard deviation.
408  */
409  inline double gaussian(const double mean = double(0),
410  const double stdev = double(1)) {
411  return get_source().gaussian(mean, stdev);
412  }
413 
414  /**
415  * \ingroup random
416  * Generate a gaussian random variable with zero mean and unit
417  * standard deviation.
418  */
419  inline double normal(const double mean = double(0),
420  const double stdev = double(1)) {
421  return get_source().normal(mean, stdev);
422  }
423 
424  /**
425  * \ingroup random
426  * Draw a sample from a bernoulli distribution
427  */
428  inline bool bernoulli(const double p = double(0.5)) {
429  return get_source().bernoulli(p);
430  }
431 
432  /**
433  * \ingroup random
434  * Draw a sample form a bernoulli distribution using the faster generator
435  */
436  inline bool fast_bernoulli(const double p = double(0.5)) {
437  return get_source().fast_bernoulli(p);
438  }
439 
440  /**
441  * \ingroup random
442  * Generate a draw from a multinomial. This function
443  * automatically normalizes as well.
444  */
445  template<typename Double>
446  inline size_t multinomial(const std::vector<Double>& prb) {
447  return get_source().multinomial(prb);
448  }
449 
450 
451  /**
452  * \ingroup random
453  * Generate a draw from a cdf;
454  */
455  template<typename Double>
456  inline size_t multinomial_cdf(const std::vector<Double>& cdf) {
457  return get_source().multinomial_cdf(cdf);
458  }
459 
460 
461 
462  /**
463  * \ingroup random
464  * Construct a random permutation
465  */
466  template<typename T>
467  inline std::vector<T> permutation(const size_t nelems) {
468  return get_source().permutation<T>(nelems);
469  }
470 
471 
472  /**
473  * \ingroup random
474  * Shuffle a standard vector
475  */
476  template<typename T>
477  inline void shuffle(std::vector<T>& vec) {
478  get_source().shuffle(vec);
479  }
480 
481  /**
482  * \ingroup random
483  * Shuffle a range using the begin and end iterators
484  */
485  template<typename Iterator>
486  inline void shuffle(Iterator begin, Iterator end) {
487  get_source().shuffle(begin, end);
488  }
489 
490  /**
491  * Converts a discrete PDF into a CDF
492  */
493  void pdf2cdf(std::vector<double>& pdf);
494 
495 
496 
497  }; // end of random
498 }; // end of graphlab
499 
500 
501 #endif
502