GraphLab: Distributed Graph-Parallel API  2.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
sharding_constraint.hpp
1 /**
2  * Copyright (c) 2009 Carnegie Mellon University.
3  * All rights reserved.
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing,
12  * software distributed under the License is distributed on an "AS
13  * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
14  * express or implied. See the License for the specific language
15  * governing permissions and limitations under the License.
16  *
17  * For more about this software visit:
18  *
19  * http://www.graphlab.ml.cmu.edu
20  *
21  */
22 
23 #ifndef GRAPHLAB_DISTRIBUTED_SHARDING_CONSTRAINT_HPP
24 #define GRAPHLAB_DISTRIBUTED_SHARDING_CONSTRAINT_HPP
25 
26 #include <graphlab/graph/graph_basic_types.hpp>
27 #include <graphlab/util/generate_pds.hpp>
28 #include <algorithm>
29 #include <vector>
30 
31 
32 /**
33  * This class defines the dependencies among the shards when using
34  * a constrained partitioning algorithm.
35  *
36  * In constrained partitioning, vertices are assgined to a master shard
37  * using hash function on the vids. Each shard S masters a partition of
38  * vertices: V_s.
39  *
40  * Let Ai be the set of shards that Shard i depends on. Then the partitioning
41  * algorithm can only put edges with either ends in V_si into Ai. For example,
42  * Shard i is the master of vertex u, and Shard j is the master of vertex v,
43  * then edge u->v must be placed into Ai \intersect Aj.
44  *
45  * This class currently has two implementations of the shard constraints. One
46  * construction is based on a grid, and the other is based on perfect difference set.
47  * Both algorithms guarentees that Ai \intersect Aj is non-empty.
48  *
49  * \note: grid methods requires the number of shards to be a perfect square number. pds
50  * requires the number of shards to be p^2 + p + 1 where p is a prime number.
51  *
52  */
53 namespace graphlab {
54  class sharding_constraint {
55  size_t nshards;
56  std::vector<std::vector<procid_t> > constraint_graph;
57  public:
58  sharding_constraint(size_t num_shards, std::string method) {
59  nshards = num_shards;
60  // ignore the method input for now, only construct grid graph.
61  // assuming nshards is perfect square
62  if (method == "grid") {
63  make_grid_constraint();
64  } else if (method == "pds") {
65  make_pds_constraint();
66  } else {
67  logstream(LOG_FATAL) << "Unknown sharding constraint method: " << method << std::endl;
68  }
69  check();
70  }
71 
72  bool get_neighbors (procid_t shard, std::vector<procid_t>& neighbors) {
73  ASSERT_LT(shard, nshards);
74  neighbors.clear();
75  std::vector<procid_t>& ls = constraint_graph[shard];
76  for (size_t i = 0; i < ls.size(); ++i)
77  neighbors.push_back(ls[i]);
78  return true;
79  }
80 
81  bool get_joint_neighbors (procid_t shardi, procid_t shardj, std::vector<procid_t>& neighbors) {
82  ASSERT_EQ(neighbors.size(), 0);
83  ASSERT_LT(shardi, nshards);
84  ASSERT_LT(shardj, nshards);
85  // if (shardi == shardj) {
86  // neighbors.push_back(shardi);
87  // return true;
88  // }
89 
90  std::vector<procid_t>& ls1 = constraint_graph[shardi];
91  std::vector<procid_t>& ls2 = constraint_graph[shardj];
92  neighbors.clear();
93  size_t i = 0;
94  size_t j = 0;
95  while (i < ls1.size() && j < ls2.size()) {
96  if (ls1[i] == ls2[j]) {
97  neighbors.push_back(ls1[i]);
98  ++i; ++j;
99  } else if (ls1[i] < ls2[j]) {
100  ++i;
101  } else {
102  ++j;
103  }
104  }
105  return true;
106  }
107 
108  private:
109  void make_grid_constraint() {
110  size_t ncols, nrows;
111  ncols = nrows = (size_t)sqrt(nshards);
112  ASSERT_EQ(ncols*nrows, nshards);
113 
114  for (size_t i = 0; i < nshards; i++) {
115  std::vector<procid_t> adjlist;
116  // add self
117  adjlist.push_back(i);
118 
119  // add the row of i
120  size_t rowbegin = (i/ncols) * ncols;
121  for (size_t j = rowbegin; j < rowbegin + ncols; ++j)
122  if (i != j) adjlist.push_back(j);
123 
124  // add the col of i
125  for (size_t j = i % ncols; j < nshards; j+=ncols)
126  if (i != j) adjlist.push_back(j);
127 
128  std::sort(adjlist.begin(), adjlist.end());
129  constraint_graph.push_back(adjlist);
130  }
131  }
132 
133  void make_pds_constraint() {
134  int p = floor(sqrt(nshards-1));
135  ASSERT_EQ((p*p+p+1), nshards);
136  pds pds_generator;
137  std::vector<size_t> results = pds_generator.get_pds(p);
138  for (size_t i = 0; i < nshards; i++) {
139  std::vector<procid_t> adjlist;
140  for (size_t j = 0; j < results.size(); j++) {
141  adjlist.push_back( (results[j] + i) % nshards);
142  }
143  std::sort(adjlist.begin(), adjlist.end());
144  constraint_graph.push_back(adjlist);
145  }
146  }
147 
148  void check() {
149  // debug
150  // for (size_t i = 0; i < constraint_graph.size(); ++i) {
151  // std::vector<procid_t> adjlist = constraint_graph[i];
152  // std::cout << i << ": [";
153  // for (size_t j = 0; j < adjlist.size(); j++)
154  // std::cout << adjlist[j] << " ";
155  // std::cout << "]" << std::endl;
156  // }
157  for (size_t i = 0; i < nshards; ++i) {
158  for (size_t j = i+1; j < nshards; ++j) {
159  std::vector<procid_t> ls;
160  get_joint_neighbors(i, j, ls);
161  ASSERT_GT(ls.size(), 0);
162  }
163  }
164  }
165  }; // end of sharding_constraint
166 }; // end of namespace graphlab
167 #endif