GraphLab: Distributed Graph-Parallel API  2.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
small_set.hpp
1 /**
2  * Copyright (c) 2009 Carnegie Mellon University.
3  * All rights reserved.
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing,
12  * software distributed under the License is distributed on an "AS
13  * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
14  * express or implied. See the License for the specific language
15  * governing permissions and limitations under the License.
16  *
17  * For more about this software visit:
18  *
19  * http://www.graphlab.ml.cmu.edu
20  *
21  */
22 
23 
24 #ifndef GRAPHLAB_SMALL_SET_HPP
25 #define GRAPHLAB_SMALL_SET_HPP
26 
27 
28 #include <iostream>
29 #include <set>
30 #include <iterator>
31 #include <algorithm>
32 
33 #include <graphlab/serialization/iarchive.hpp>
34 #include <graphlab/serialization/oarchive.hpp>
35 
36 
37 
38 #include <graphlab/macros_def.hpp>
39 namespace graphlab {
40 
41 
42  /**
43  * This class implements a dense set of fixed maximum size which
44  * support quick operations with stack allocation.
45  */
46  template<size_t MAX_DIM, typename T, typename Less = std::less<T> >
47  class small_set {
48  public: // typedefs
49  typedef T value_type;
50  typedef T& reference;
51  typedef const T& const_reference;
52  typedef ptrdiff_t difference_type;
53  typedef size_t size_type;
54  typedef T* iterator;
55  typedef const T* const_iterator;
56  enum sizes {max_dim_type = MAX_DIM };
57 
58 
59  template< size_t T1, size_t T2 >
60  struct max_type { enum max_value { value = T1 < T2? T2 : T1 }; };
61 
62  struct Equals {
63  inline bool operator()(const T& a, const T& b) const {
64  return !Less()(a,b) && !Less()(b,a);
65  }
66  }; // end of equals
67 
68  public:
69  //! Construct an empty set
70  small_set() : nelems(0) { }
71 
72  //! Create a stack set with just one element
73  small_set(const T& elem) : nelems(1) { values[0] = elem; }
74 
75  /**
76  * Create a stack from an std set by adding each element one at a
77  * time
78  */
79  template<typename OtherT>
80  small_set(const std::set<OtherT>& other) : nelems(other.size()) {
81  ASSERT_LE(nelems, MAX_DIM);
82  size_t index = 0;
83  foreach(const OtherT& elem, other) values[index++] = elem;
84  }
85 
86 
87  /**
88  * Create a stack from an std set by adding each element one at a
89  * time
90  */
91  template<size_t OtherSize>
93  nelems(other.size()) {
94  ASSERT_LE(nelems, MAX_DIM);
95  size_t index = 0;
96  for(const T* elem = other.begin(); elem != other.end(); ++elem)
97  values[index++] = *elem;
98  }
99 
100 
101  //! Get the begin iterator
102  inline T* begin() { return values; }
103 
104  //! get the end iterator
105  inline T* end() { return values + nelems; }
106 
107 
108  //! Get the begin iterator
109  inline const T* begin() const { return values; }
110 
111  //! Get the end iterator
112  inline const T* end() const { return values + nelems; }
113 
114  //! get the size of the set
115  inline size_t size() const { return nelems; }
116 
117  //! determine if there are any elements in the set
118  inline bool empty() const { return size() == 0; }
119 
120 
121  //! test whether the set contains the given element
122  bool contains(const T& elem) const {
123  return std::binary_search(begin(), end(), elem, Less());
124  }
125 
126  //! test whether the set contains the given set of element
127  template<size_t OtherDim>
128  bool contains(const small_set<OtherDim, T, Less>& other) const {
129  return std::includes(begin(), end(),
130  other.begin(), other.end(), Less());
131  }
132 
133 
134  /**
135  * Test if this set is contained in other. If so this returns
136  * true.
137  */
138  template<size_t OtherDim>
139  bool operator<=(const small_set<OtherDim, T, Less>& other) const {
140  return other.contains(*this);
141  }
142 
143 
144  /**
145  * Test if this set is contained in other. If so this returns
146  * true.
147  */
148  template<size_t OtherDim>
149  bool operator<(const small_set<OtherDim, T, Less>& other) const {
150  return other.contains(*this) && size() < other.size();
151  }
152 
153  template<size_t OtherDim>
154  bool operator==(const small_set<OtherDim, T, Less>& other) const {
155  if(size() != other.size()) return false;
156  return std::equal(begin(), end(), other.begin(), Equals());
157  }
158 
159 
160 
161  //! insert an element into this set
162  inline void insert(const T& elem) {
163  *this += elem;
164  }
165 
166 
167  //! insert a range of elements into this set
168  inline void insert(const T* begin, const T* end) {
169  // Ensure that other size is not negative
170  ASSERT_LE(begin, end);
171  // Ensure that the other set has an appropriate size
172  const size_t other_size = end - begin;
173  ASSERT_LE(other_size, MAX_DIM);
174  // Construct a temporary small set representing the range
175  small_set other;
176  for(size_t i = 0; i < other_size; ++i) {
177  other[i] = begin[i];
178  // Ensure that the other set is sorted
179  if(i+1 < other_size) ASSERT_LT(begin[i], begin[i+1]);
180  }
181  // Insert it into this small set using the + operation
182  *this += other;
183  }
184 
185 
186  //! remove an element from the set
187  void erase(const T& elem) { *this -= elem; }
188 
189 
190  //! get the element at a particular location
191  virtual const T& operator[](size_t index) const {
192  ASSERT_LT(index, nelems);
193  return values[index];
194  }
195 
196 
197 
198 
199  // //! Take the union of two sets
200  // inline small_set operator+(const small_set& other) const {
201  // small_set result;
202  // size_t i = 0, j = 0;
203  // while(i < size() && j < other.size()) {
204  // assert(result.nelems < MAX_DIM);
205  // if(values[i] < other.values[j]) // This comes first
206  // result.values[result.nelems++] = values[i++];
207  // else if (values[i] > other.values[j]) // other comes first
208  // result.values[result.nelems++] = other.values[j++];
209  // else { // both are same
210  // result.values[result.nelems++] = values[i++]; j++;
211  // }
212  // }
213  // // finish writing this
214  // while(i < size()) {
215  // assert(result.nelems < MAX_DIM);
216  // result.values[result.nelems++] = values[i++];
217  // }
218  // // finish writing other
219  // while(j < other.size()) {
220  // assert(result.nelems < MAX_DIM);
221  // result.values[result.nelems++] = other.values[j++];
222  // }
223  // return result;
224  // }
225 
226 
227  //! Take the union of two sets
228  inline small_set operator+(const T& elem) const {
229  small_set result(*this);
230  return result += elem;
231  }
232 
233 
234  //! Take the union of two sets
235  template<size_t OtherDim>
239  result_type;
240  result_type result;
241  const T* new_end =
242  std::set_union(begin(), end(),
243  other.begin(), other.end(),
244  safe_iterator(result.begin(),
245  result.absolute_end()),
246  Less()).begin;
247  result.nelems = new_end - result.begin();
248  ASSERT_LE(result.nelems, result_type::max_dim_type);
249  return result;
250  }
251 
252 
253  //! Add the other set to this set
254  template<size_t OtherDim>
256  *this = *this + other;
257  return *this;
258  }
259 
260 
261  //! Add an element to this set. This is "optimized" since it is
262  //! used frequently
263  inline small_set& operator+=(const T& elem) {
264  // // Find where elem should be inserted
265  // size_t index = 0;
266  // for(; index < nelems && values[index] < elem; ++index);
267  T* ptr(std::lower_bound(begin(), end(), elem, Less()));
268  // if the element already exists return
269  if(ptr != end() && !(elem < *ptr) ) return *this;
270  // otherwise the element does not exist so add it at the current
271  // location and increment the number of elements
272  T tmp(elem); nelems++; // advances end
273  ASSERT_LE(nelems, MAX_DIM);
274  // Insert the element at index swapping out the rest of the
275  // array
276  for(; ptr < end(); ++ptr) std::swap(*ptr, tmp);
277  // Finished return
278  return *this;
279  }
280 
281 
282 
283  //! Remove the other set from this set
284  template<size_t OtherDim>
286  // if(other.size() == 0) return *this;
287  // // Backup the old nelems and reset nelems
288  // size_t old_nelems = size(); nelems = 0;
289  // for(size_t i = 0, j = 0; i < old_nelems; ++i ) {
290  // // advance the other index
291  // for( ; j < other.size() && values[i] > other.values[j]; ++j);
292  // // otherwise check equality or move forward
293  // if(j >= other.size() || (values[i] != other.values[j]))
294  // values[nelems++] = values[i];
295  // }
296  // ASSERT_LE(nelems, MAX_DIM);
297  *this = *this - other;
298  return *this;
299  }
300 
301  //! subtract the right set form the left set
302  template<size_t OtherDim>
304  // small_set result = *this;
305  // result -= other;
306  small_set result;
307  T* const new_end =
308  std::set_difference(begin(), end(),
309  other.begin(), other.end(),
310  safe_iterator(result.begin(),
311  result.absolute_end()),
312  Less()).begin;
313  result.nelems = new_end - result.begin();
314  ASSERT_LE(result.nelems, MAX_DIM);
315  return result;
316  }
317 
318  //! Take the intersection of two sets
319  template<size_t OtherDim>
321  small_set result;
322  const T* new_end =
323  std::set_intersection(begin(), end(),
324  other.begin(), other.end(),
325  safe_iterator(result.begin(),
326  result.absolute_end()),
327  Less()).begin;
328  result.nelems = new_end - result.end();
329  ASSERT_LE(result.nelems, MAX_DIM);
330  return result;
331  }
332 
333  //! Take the intersection of two sets
334  template<size_t OtherDim>
336  *this = *this * other;
337  return *this;
338  }
339 
340  //! Load the set form file
341  void load(iarchive& arc) {
342  arc >> nelems;
343  assert(nelems <= MAX_DIM);
344  for(size_t i = 0; i < nelems; ++i) {
345  arc >> values[i];
346  if( i > 0 ) assert(values[i] > values[i-1]);
347  }
348  }
349 
350  //! Save the set to file
351  void save(oarchive& arc) const {
352  arc << nelems;
353  for(size_t i = 0; i < nelems; ++i) arc << values[i];
354  }
355  private:
356  size_t nelems;
357  T values[MAX_DIM];
358 
359 
360  //! get the end iterator to the complete range
361  inline T* absolute_end() { return values + MAX_DIM; }
362 
363 
364  struct safe_iterator :
365  public std::iterator<std::input_iterator_tag, T> {
366  T* begin;
367  const T* end;
368  safe_iterator(const safe_iterator& other) :
369  begin(other.begin), end(other.end) { }
370  safe_iterator(T* begin, const T* end) :
371  begin(begin), end(end) {
372  ASSERT_NE(begin, NULL);
373  ASSERT_NE(end, NULL);
374  ASSERT_LE(begin, end);
375  }
376  inline safe_iterator& operator++() { ++begin; return *this; }
377  inline safe_iterator& operator++(int) {
378  safe_iterator tmp(*this); operator++(); return tmp;
379  }
380  inline bool operator==(const safe_iterator& other) {
381  ASSERT_EQ(end, other.end);
382  return begin == other.begin;
383  }
384  inline bool operator!=(const safe_iterator& other) {
385  return !operator==(other);
386  }
387  T& operator*() { ASSERT_LT(begin, end); return *begin; }
388  };
389  }; // end of small_set
390 
391  template<size_t MAX_DIM, typename T>
392  std::ostream&
393  operator<<(std::ostream& out, const graphlab::small_set<MAX_DIM, T>& set) {
394  out << "{";
395  for(size_t i = 0; i < set.size(); ++i) {
396  out << set[i];
397  if(i + 1 < set.size()) out << ", ";
398  }
399  out << "}";
400  return out;
401  }
402 }; // end of graphlab namespace
403 
404 
405 
406 
407 #include <graphlab/macros_undef.hpp>
408 #endif
409