GraphLab: Distributed Graph-Parallel API  2.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
fast_multinomial.hpp
1 /**
2  * Copyright (c) 2009 Carnegie Mellon University.
3  * All rights reserved.
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing,
12  * software distributed under the License is distributed on an "AS
13  * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
14  * express or implied. See the License for the specific language
15  * governing permissions and limitations under the License.
16  *
17  * For more about this software visit:
18  *
19  * http://www.graphlab.ml.cmu.edu
20  *
21  */
22 
23 
24 #ifndef GRAPHLAB_FAST_MULTINOMIAL_HPP
25 #define GRAPHLAB_FAST_MULTINOMIAL_HPP
26 
27 #include <vector>
28 #include <algorithm>
29 
30 
31 #include <boost/integer.hpp>
32 #include <boost/random.hpp>
33 
34 #include <graphlab.hpp>
35 
36 #include <graphlab/parallel/pthread_tools.hpp>
37 #include <graphlab/parallel/atomic.hpp>
38 
39 #include <graphlab/util/generics/float_selector.hpp>
40 
41 
42 #include <graphlab/macros_def.hpp>
43 
44 
45 
46 namespace graphlab {
47 
48 
49  /// \ingroup util_internal
50  class fast_multinomial {
51  // system word length float
52  typedef float_selector<sizeof(size_t)>::float_type float_t;
53 
54  //! First leaf index
55  size_t first_leaf_index;
56 
57  //! The number of assignments to the multinomial
58  size_t num_asg;
59 
60  //! The tree datastructure
61  std::vector<float_t> tree;
62 
63  //! the number of positive probability elements
64  atomic<size_t> num_support;
65 
66 
67 
68 
69  // Helper routines
70  // ========================================================>
71 
72  //! Compute the next power of 2
73  // size_t next_powerof2(size_t val) {
74  // size_t powof2 = 1;
75  // while(powof2 < val) powof2 = powof2 * 2;
76  // return powof2;
77  // }
78 
79  //! Clever next power of two bit magic (by ylow)
80  uint64_t next_powerof2(uint64_t val) {
81  --val;
82  val = val | (val >> 1);
83  val = val | (val >> 2);
84  val = val | (val >> 4);
85  val = val | (val >> 8);
86  val = val | (val >> 16);
87  val = val | (val >> 32);
88  return val + 1;
89  }
90 
91 
92 
93 
94  //! Returns the index of the left child of the supplied index.
95  size_t left_child(size_t i) const { return 2 * i + 1; }
96 
97  //! Returns the index of the right child of the supplied index.
98  size_t right_child(size_t i) const { return 2 * i + 2; }
99 
100  //! Returns the index of the parent of the supplied index.
101  size_t parent(size_t i) const { return (i-1) / 2; }
102 
103  //! returns the sibling of
104  size_t sibling(size_t i) const {
105  // the binary here is equivalent to (+1) if leaf is odd and (-1) if
106  // leaf is even
107  return i + (i & 1)*2 - 1;
108  }
109 
110  //! get the tree location of the assignment
111  size_t tree_loc_from_asg(size_t asg) const {
112  size_t loc = asg + first_leaf_index;
113  assert(loc < tree.size());
114  assert(is_leaf(loc));
115  return loc;
116  }
117 
118  //! determine the assignment from the location in the tree
119  size_t asg_from_tree_loc(size_t i) const {
120  assert(is_leaf(i));
121  size_t asg = i - first_leaf_index;
122  assert(asg < num_asg);
123  return asg;
124  }
125 
126 
127  //! Returns true if the index corresponds to a leaf
128  bool is_leaf(size_t i) const {
129  return i >= first_leaf_index;
130  // return left_child(i) > tree.size();
131  }
132 
133  //! Returns true if the location is the root
134  bool is_root(size_t i) const { return i == 0; }
135 
136 
137  /// Returns the index of a leaf sampled proportionate to its
138  /// priority. Returns false on failure
139  bool try_sample(size_t& asg, size_t cpuid) {
140  size_t loc = 0;
141  while ( !is_leaf(loc) ) {
142  // get the left and right priorities
143  float_t left_p = tree.at(left_child(loc));
144  float_t right_p = tree.at(right_child(loc));
145  // if both are zero, the sample has failed. Return
146  if (left_p + right_p == 0) return false;
147  else if (right_p == 0) loc = left_child(loc);
148  else if (left_p == 0) loc = right_child(loc);
149  else {
150  // pick from a bernoulli trial
151  float_t childsum = left_p + right_p;
152  float_t rndnumber = graphlab::random::uniform<float_t>(0,1);
153  if((childsum * rndnumber) < left_p)
154  loc = left_child(loc);
155  else
156  loc = right_child(loc);
157  }
158  }
159  assert(is_leaf(loc));
160  asg = asg_from_tree_loc(loc);
161  assert(asg < num_asg);
162  return true;
163  } // end of sample index
164 
165 
166  /// Propagates a cumulative sum update up the tree.
167  void propagate_change(size_t loc) {
168  // Loop while the location is not the root each time moving up
169  // the tree
170  for( ; !is_root(loc); loc = parent(loc) ) {
171  // Get the sibbling of this location
172  size_t sibling_loc = sibling(loc);
173  assert(sibling_loc < tree.size());
174  // Get the parent
175  size_t parent_loc = parent(loc);
176  assert(parent_loc < tree.size());
177  // Assert that the sibling is infact the sibling
178  assert(parent_loc == parent(sibling_loc));
179  // Get the priority of this location and the sibling
180  // and the parent
181  volatile float_t* sibling1 = &(tree[loc]);
182  volatile float_t* sibling2 = &(tree[sibling_loc]);
183  volatile float_t* parent = &(tree[parent_loc]);
184 
185  // write to the parent. Use a concurrent write mechanism
186  // size_t spin_count = 0;
187  //float_t old_value = *parent;
188  // float_t new_value = *sibling1 + *sibling2;
189  // while(!atomic_compare_and_swap(tree[parent_loc], old_value, new_value)) {
190  // old_value = *parent;
191  // new_value = *sibling1 + *sibling2;
192  // if(++spin_count % 10 == 0) {
193  // std::cout << "Propagate_change: " << spin_count << std::endl;
194  // }
195  // }
196  while(true) {
197  float_t sum = (*sibling1) + (*sibling2);
198  (*parent) = sum;
199  __asm("mfence");
200  float_t sum2 = (*sibling1) + (*sibling2);
201  float_t parentval = (*parent);
202  if (sum2 == parentval) break;
203  }
204  // If the update was successful accomplished by anothe thread
205  // than return
206  //if(old_value == new_value) return;
207  } // end of for loop
208  } // end of propagate change
209 
210 
211  public:
212 
213  /** initialize a fast multinomail */
214  fast_multinomial(size_t num_asg,
215  size_t ncpus) :
216  first_leaf_index(0),
217  num_asg(num_asg) {
218  // // initialize the generators
219  // for(size_t i = 0; i < rngs.size(); ++i) {
220  // rngs[i].seed(rand());
221  // distributions.push_back(distribution_type(rngs[i]));
222  // }
223  // Determine the size of the tree
224  first_leaf_index = next_powerof2(num_asg) - 1;
225  size_t tree_size = first_leaf_index + next_powerof2(num_asg);
226  tree.resize(tree_size, 0.0);
227  }
228 
229  void zero(size_t asg) {
230  assert(asg < num_asg);
231  size_t loc = tree_loc_from_asg(asg);
232  // Use CAS
233  float_t old_value = tree[loc];
234  float_t new_value = 0;
235  while(!atomic_compare_and_swap(tree[loc], old_value, new_value)){
236  old_value = tree[loc];
237  }
238  propagate_change(loc);
239  if(old_value > 0) {
240  num_support.dec();
241  }
242  }
243 
244  //! Set a leaf value
245  void set(size_t asg, float_t value) {
246  assert(asg < num_asg);
247  assert(value >= 0);
248  size_t loc = tree_loc_from_asg(asg);
249  // Use atomic compare and swap to update the value
250  float_t old_value = tree[loc];
251  float_t new_value = value;
252  while(!atomic_compare_and_swap(tree[loc], old_value, new_value)){
253  old_value = tree[loc];
254  }
255  if(old_value == 0 && new_value > 0) {
256  num_support.inc();
257  }
258  propagate_change(loc);
259  // Update support count
260  if(old_value > 0 && new_value == 0) {
261  num_support.dec();
262  }
263  } // end of set
264 
265  //! Set a leaf value
266  void add(size_t asg, float_t value) {
267  assert(asg < num_asg);
268  assert(value >= 0);
269  size_t loc = tree_loc_from_asg(asg);
270  // Use atomic compare and swap to update the value
271  float_t old_value = tree[loc];
272  float_t new_value = value + old_value;
273  while(!atomic_compare_and_swap(tree[loc], old_value, new_value)){
274  old_value = tree[loc];
275  new_value = value + old_value;
276  }
277  // Update support count
278  if(old_value == 0 && new_value > 0) {
279  num_support.inc();
280  }
281  propagate_change(loc);
282  } // end of add
283 
284  //! Set a leaf value
285  void max(size_t asg, float_t value) {
286  assert(asg < num_asg);
287  assert(value >= 0);
288  size_t loc = tree_loc_from_asg(asg);
289  // Use atomic compare and swap to update the value
290  float_t old_value = tree[loc];
291  float_t new_value = std::max(value, old_value);
292  while(!atomic_compare_and_swap(tree[loc], old_value, new_value)){
293  old_value = tree[loc];
294  new_value = std::max(value, old_value);
295  }
296  if(old_value == 0 && new_value > 0) {
297  num_support.inc();
298  }
299  propagate_change(loc);
300  // Update support count
301  if(old_value > 0 && new_value == 0) {
302  num_support.dec();
303  }
304  } // end of set
305 
306  /**
307  * Try to draw a sample from the multinomial. If everything has
308  * probability zero then return false.
309  */
310  bool sample(size_t& ret_asg, size_t cpuid) {
311  // While there is positive support for some assignment
312  // size_t spin_count = 0;
313  volatile float_t *root = &(tree[0]);
314  while(num_support.value > 0 || (*root) > 0) {
315  // Try and get a sample
316  if(try_sample(ret_asg, cpuid)) {
317  assert(ret_asg < num_asg);
318  return true;
319  }
320 
321  // if(++spin_count % 10000 == 0) {
322  // std::cout // << THREAD_ID() << ": "
323  // << " Sample: " << spin_count
324  // << ", " << tree[0]
325  // << ", " << num_support.value
326  // << std::endl;
327  // float_t sum = 0;
328  // for(size_t i = first_leaf_index; i < tree.size(); ++i) {
329  // sum += tree[i];
330  // }
331  // std::cout << "Tree Sum: " << sum << std::endl;
332  // std::getchar();
333  // }
334 
335  } // End of While loop
336 
337  // if(spin_count >= 10){
338  // std::cout // << THREAD_ID() << ": "
339  // << " Sample_recover: " << spin_count << std::endl;
340  // }
341 
342  return false;
343  } // end of sample
344 
345 
346  /**
347  * Try to draw a sample from the multinomial and zero out the
348  * probability of the element that was drawn. If everything has
349  * probability zero then return false.
350  */
351  bool pop(size_t& ret_asg, size_t cpuid) {
352  if(tree.empty()) return false;
353  // While there is positive support for some assignment
354  while(num_support.value > 0 || tree[0] > 0) {
355  // Try and get a sample
356  if(try_sample(ret_asg, cpuid)) {
357  assert(ret_asg < num_asg);
358  // We have a sample but it is possible that another thread
359  // also go this sample so we will use CAS to see who "wins"
360  // and gets to keep the sample and who has to try again
361  size_t loc = tree_loc_from_asg(ret_asg);
362  // Use CAS
363  float_t old_value = tree[loc];
364  float_t new_value = 0;
365  while(!atomic_compare_and_swap(tree[loc], old_value, new_value)){
366  old_value = tree[loc];
367  }
368  // Figure out if we won and get to keep the sample or if
369  // some other thread won and zeroed out the sample before we
370  // got it.
371  if(old_value > 0) {
372  // We win!!! and keep the sample :-)
373  propagate_change(loc);
374  num_support.dec();
375  return true;
376  }
377  // The other thread wins and we have to try agian :-(.
378  }
379  }
380 
381  std::cout << "Queue emptied!: " << tree[0]
382  << ", " << num_support.value << std::endl;
383  print_tree();
384  return false;
385  } // end of pop
386 
387  /** Get the number of assignments with positive support */
388  size_t positive_support() {
389  return num_support.value;
390  }
391 
392  /** print the tree */
393  void print_tree() {
394  for (size_t i = 0; i < std::min(tree.size(), size_t(1000)); ++i) {
395  if(is_leaf(i)) {
396  std::cout << "Leaf(" << asg_from_tree_loc(i)
397  << ", [" << parent(i) << "], "
398  << tree[i] << ") ";
399  } else {
400  std::cout << "Node(" << i << ", "
401  << "[" << left_child(i) << ", "
402  << right_child(i) << "], "
403  << tree[i] << ") ";
404  }
405  }
406  std::cout << std::endl;
407  }
408 
409  float_t get_weight(size_t asg) const {
410  size_t loc = tree_loc_from_asg(asg);
411  return tree[loc];
412  }
413 
414  bool has_support(size_t asg) const {
415  size_t loc = tree_loc_from_asg(asg);
416  return tree[loc] > 0;
417  }
418 
419  void clear() {
420  // not thread safe
421  std::fill(tree.begin(), tree.end(), 0.0);
422  num_support.value = 0;
423  }
424  }; // end of fast_multinomial
425 
426 } // end of namespace
427 
428 #undef float_t
429 
430 #include <graphlab/macros_undef.hpp>
431 #endif
432