GraphLab: Distributed Graph-Parallel API  2.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
local_graph.hpp
1 /**
2  * Copyright (c) 2009 Carnegie Mellon University.
3  * All rights reserved.
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing,
12  * software distributed under the License is distributed on an "AS
13  * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
14  * express or implied. See the License for the specific language
15  * governing permissions and limitations under the License.
16  *
17  * For more about this software visit:
18  *
19  * http://www.graphlab.ml.cmu.edu
20  *
21  */
22 
23 #ifndef GRAPHLAB_LOCAL_GRAPH_HPP
24 #define GRAPHLAB_LOCAL_GRAPH_HPP
25 
26 
27 #include <cmath>
28 
29 #include <string>
30 #include <list>
31 #include <vector>
32 #include <set>
33 #include <map>
34 
35 #include <queue>
36 #include <algorithm>
37 #include <functional>
38 #include <fstream>
39 
40 #include <boost/bind.hpp>
41 #include <boost/unordered_set.hpp>
42 #include <boost/type_traits.hpp>
43 #include <boost/typeof/typeof.hpp>
44 #include <boost/iterator/transform_iterator.hpp>
45 
46 #include <graphlab/graph/graph_basic_types.hpp>
47 
49 #include <graphlab/logger/assertions.hpp>
50 
51 #include <graphlab/serialization/iarchive.hpp>
52 #include <graphlab/serialization/oarchive.hpp>
53 
54 #include <graphlab/util/random.hpp>
55 #include <graphlab/graph/graph_storage.hpp>
56 #include <graphlab/macros_def.hpp>
57 
58 
59 
60 namespace graphlab {
61 
62 
63  template<typename VertexData, typename EdgeData>
64  class json_parser;
65 
66  template<typename VertexData, typename EdgeData>
67  class local_graph {
68 
69 
70  /** \internal
71  * \brief The type of the graph structure storage of the local_graph. */
72  typedef graph_storage<VertexData, EdgeData> gstore_type;
73  public:
74 
75  /** The type of the vertex data stored in the local_graph. */
76  typedef VertexData vertex_data_type;
77 
78  /** The type of the edge data stored in the local_graph. */
79  typedef EdgeData edge_data_type;
80 
81  typedef typename gstore_type::edge_info edge_info;
82 
85 
86  friend class json_parser<VertexData, EdgeData>;
87 
88 
89  struct edge_type;
90  struct vertex_type;
91  struct edge_list_type;
92 
93  /** Edge object which provides access to the edge data
94  * and information about it.
95  */
96  struct edge_type {
97  local_graph& lgraph_ref;
98  typename gstore_type::edge_type e;
99  edge_type(local_graph& lgraph_ref,
100  typename gstore_type::edge_type e) :
101  lgraph_ref(lgraph_ref),e(e) { }
102 
103  /// \brief Returns a constant reference to the data on the edge.
104  const edge_data_type& data() const {
105  return lgraph_ref.gstore.edge_data(e);
106  }
107  /// \brief Returns a reference to the data on the edge.
108  edge_data_type& data() {
109  return lgraph_ref.gstore.edge_data(e);
110  }
111  /// \brief Returns the source vertex of the edge.
112  vertex_type source() const {
113  return vertex_type(lgraph_ref, e.source());
114  }
115  /// \brief Returns the target vertex of the edge.
116  vertex_type target() const {
117  return vertex_type(lgraph_ref, e.target());
118  }
119  /**
120  * \brief Returns the id of the edge.*/
121  edge_id_type id() const {
122  return lgraph_ref.gstore.edge_id(e);
123  }
124  };
125 
126  /** Vertex object which provides access to the vertex data
127  * and information about it.
128  */
129  struct vertex_type {
130  local_graph& lgraph_ref;
131  lvid_type vid;
132  vertex_type(local_graph& lgraph_ref, lvid_type vid):lgraph_ref(lgraph_ref),vid(vid) { }
133 
134  /// \brief Returns a constant reference to the data on the vertex.
135  const vertex_data_type& data() const {
136  return lgraph_ref.vertex_data(vid);
137  }
138  /// \brief Returns a reference to the data on the vertex.
139  vertex_data_type& data() {
140  return lgraph_ref.vertex_data(vid);
141  }
142  /// \brief Returns the number of in edges of the vertex.
143  size_t num_in_edges() const {
144  return lgraph_ref.num_in_edges(vid);
145  }
146  /// \brief Returns the number of out edges of the vertex.
147  size_t num_out_edges() const {
148  return lgraph_ref.num_out_edges(vid);
149  }
150  /// \brief Returns the ID of the vertex.
151  lvid_type id() const {
152  return vid;
153  }
154  /// \brief Returns a list of in edges.
156  return edge_list_type(lgraph_ref, lgraph_ref.gstore.in_edges(vid));
157  }
158  /// \brief Returns a list of out edges.
160  return edge_list_type(lgraph_ref, lgraph_ref.gstore.out_edges(vid));
161  }
162  };
163 
164  struct make_edge_type_functor {
165  typedef typename gstore_type::edge_type argument_type;
166  typedef edge_type result_type;
167  local_graph& lgraph_ref;
168  make_edge_type_functor(local_graph& lgraph_ref):lgraph_ref(lgraph_ref) { }
169  result_type operator() (const argument_type et) const {
170  return edge_type(lgraph_ref, et);
171  }
172  };
173 
174  /** \brief Represents an iteratable list of edge_types. */
175  struct edge_list_type {
176  make_edge_type_functor me_functor;
177  typename gstore_type::edge_list elist;
178  typedef boost::transform_iterator<make_edge_type_functor, typename gstore_type::edge_list::iterator> iterator;
179  typedef iterator const_iterator;
180 
181  edge_list_type(local_graph& lgraph_ref, typename gstore_type::edge_list elist): me_functor(lgraph_ref), elist(elist) { }
182  /// \brief Returns the size of the edge list.
183  size_t size() const { return elist.size(); }
184  /// \brief Random access to the list elements.
185  edge_type operator[](size_t i) const {return me_functor(elist[i]);}
186  /// \brief Returns an iterator to the beginning of the list.
187  iterator begin() const { return
188  boost::make_transform_iterator(elist.begin(), me_functor); }
189  /// \brief Returns an iterator to the end of the list.
190  iterator end() const { return
191  boost::make_transform_iterator(elist.end(), me_functor); }
192  bool empty() const { return elist.empty(); }
193  }; // end of class edge_list.
194 
195 
196 
197 
198  public:
199 
200  // CONSTRUCTORS ============================================================>
201 
202  /** Create an empty local_graph. */
203  local_graph() : finalized(false) { }
204 
205  /** Create a local_graph with nverts vertices. */
206  local_graph(size_t nverts) :
207  vertices(nverts),
208  finalized(false) { }
209 
210  // METHODS =================================================================>
211 
212  /**
213  * \brief Resets the local_graph state.
214  */
215  void clear() {
216  finalized = false;
217  gstore.clear();
218  vertices.clear();
219  edges_tmp.clear();
220  }
221 
222  /**
223  * \brief Reset the local_graph state and free up the reserved memory.
224  */
225  void clear_reserve() {
226  clear();
227  edges_tmp.clear();
228  std::vector<VertexData>().swap(vertices);
229  gstore.clear_reserve();
230  }
231 
232 
233  /**
234  * \brief Finalize the local_graph data structure by
235  * sorting edges to maximize the efficiency of graphlab.
236  * This function takes O(|V|log(degree)) time and will
237  * fail if there are any duplicate edges.
238  * Detail implementation depends on the type of graph_storage.
239  * This is also automatically invoked by the engine at start.
240  */
241  void finalize() {
242  if(finalized) return;
243  graphlab::timer mytimer; mytimer.start();
244  gstore.finalize(vertices.size(), edges_tmp);
245  logstream(LOG_INFO) << "Graph finalized in " << mytimer.current_time()
246  << " secs" << std::endl;
247  finalized = true;
248  } // End of finalize
249 
250  /** \brief Get the number of vertices */
251  size_t num_vertices() const {
252  return vertices.size();
253  } // end of num vertices
254 
255  /** \brief Get the number of vertices local to this machine */
256  size_t local_vertices() const {
257  return vertices.size();
258  } // end of num vertices
259 
260  /** \brief Get the number of edges */
261  size_t num_edges() const {
262  if (finalized) {
263  return gstore.edge_size();
264  } else {
265  return edges_tmp.size();
266  }
267  } // end of num edges
268 
269  /** \brief Finds an edge. Returns an empty edge if not exists. */
270  edge_type find(const lvid_type source,
271  const lvid_type target) const {
272  return gstore.find(source, target);
273  } // end of find
274 
275  /** \brief Finds the reverse of an edge. Returns an empty edge if not exists. */
276  edge_type reverse_edge(const edge_type& edge) const {
277  return gstore.find(edge.target(), edge.source());
278  }
279 
280 
281  /**
282  * \brief Creates a vertex containing the vertex data and returns the id
283  * of the new vertex id. Vertex ids are assigned in increasing order with
284  * the first vertex having id 0.
285  */
286  void add_vertex(lvid_type vid,
287  const VertexData& vdata = VertexData() ) {
288  // logstream(LOG_INFO)
289  // << "Attempting add vertex to a finalized local_graph." << std::endl;
290  // // ASSERT_MSG(false, "Add vertex to a finalized local_graph.");
291  if(vid >= vertices.size()) {
292  // Enable capacity doubling if resizing beyond capacity
293  if(vid >= vertices.capacity()) {
294  const size_t new_size = std::max(2 * vertices.capacity(),
295  size_t(vid));
296  vertices.reserve(new_size);
297  }
298  vertices.resize(vid+1);
299  }
300  vertices[vid] = vdata;
301  } // End of add vertex;
302 
303  void reserve(size_t num_vertices) {
304  ASSERT_GE(num_vertices, vertices.size());
305  vertices.reserve(num_vertices);
306  }
307 
308  /**
309  * \brief Add additional vertices up to provided num_vertices. This will
310  * fail if resizing down.
311  */
312  void resize(size_t num_vertices ) {
313  ASSERT_GE(num_vertices, vertices.size());
314  vertices.resize(num_vertices);
315  } // End of resize
316 
317  void reserve_edge_space(size_t n) {
318  edges_tmp.reserve_edge_space(n);
319  }
320  /**
321  * \brief Creates an edge connecting vertex source to vertex target. Any
322  * existing data will be cleared. Should not be called after finalization.
323  */
324  edge_id_type add_edge(lvid_type source, lvid_type target,
325  const EdgeData& edata = EdgeData()) {
326  if (finalized) {
327  logstream(LOG_FATAL)
328  << "Attempting add edge to a finalized local_graph." << std::endl;
329  ASSERT_MSG(false, "Add edge to a finalized local_graph.");
330  }
331 
332  if(source == target) {
333  logstream(LOG_FATAL)
334  << "Attempting to add self edge (" << source << " -> " << target << "). "
335  << "This operation is not permitted in GraphLab!" << std::endl;
336  ASSERT_MSG(source != target, "Attempting to add self edge!");
337  }
338 
339  if(source >= vertices.size() || target >= vertices.size())
340  add_vertex(std::max(source, target));
341 
342  // Add the edge to the set of edge data (this copies the edata)
343  edges_tmp.add_edge(source, target, edata);
344 
345  // This is not the final edge_id, so we always return 0.
346  return 0;
347  } // End of add edge
348 
349  /**
350  * \brief Add edges in block.
351  */
352  void add_edges(const std::vector<lvid_type>& src_arr,
353  const std::vector<lvid_type>& dst_arr,
354  const std::vector<EdgeData>& edata_arr) {
355  ASSERT_TRUE((src_arr.size() == dst_arr.size())
356  && (src_arr.size() == edata_arr.size()));
357  if (finalized) {
358  logstream(LOG_FATAL)
359  << "Attempting add edges to a finalized local_graph." << std::endl;
360  }
361 
362  for (size_t i = 0; i < src_arr.size(); ++i) {
363  lvid_type source = src_arr[i];
364  lvid_type target = dst_arr[i];
365  if ( source >= vertices.size()
366  || target >= vertices.size() ) {
367  logstream(LOG_FATAL)
368  << "Attempting add_edge (" << source
369  << " -> " << target
370  << ") when there are only " << vertices.size()
371  << " vertices" << std::endl;
372  ASSERT_MSG(source < vertices.size(), "Invalid source vertex!");
373  ASSERT_MSG(target < vertices.size(), "Invalid target vertex!");
374  }
375 
376  if(source == target) {
377  logstream(LOG_FATAL)
378  << "Attempting to add self edge (" << source << " -> " << target << "). "
379  << "This operation is not permitted in GraphLab!" << std::endl;
380  ASSERT_MSG(source != target, "Attempting to add self edge!");
381  }
382  }
383  edges_tmp.add_block_edges(src_arr, dst_arr, edata_arr);
384  } // End of add block edges
385 
386 
387  /** \brief Returns a vertex of given ID. */
388  vertex_type vertex(lvid_type vid) {
389  ASSERT_LT(vid, vertices.size());
390  return vertex_type(*this, vid);
391  }
392 
393  /** \brief Returns a vertex of given ID. */
394  const vertex_type vertex(lvid_type vid) const {
395  ASSERT_LT(vid, vertices.size());
396  return vertex_type(*this, vid);
397  }
398 
399  /** \brief Returns a reference to the data stored on the vertex v. */
400  VertexData& vertex_data(lvid_type v) {
401  ASSERT_LT(v, vertices.size());
402  return vertices[v];
403  } // end of data(v)
404 
405  /** \brief Returns a constant reference to the data stored on the vertex v. */
406  const VertexData& vertex_data(lvid_type v) const {
407  ASSERT_LT(v, vertices.size());
408  return vertices[v];
409  } // end of data(v)
410 
411  /** \brief Returns a reference to the data stored on the edge source->target. */
412  EdgeData& edge_data(lvid_type source, lvid_type target) {
413  ASSERT_TRUE(finalized);
414  return gstore.edge_data(source, target);
415  } // end of edge_data(u,v)
416 
417  /** \brief Returns a constant reference to the data stored on the
418  edge source->target. */
419  const EdgeData& edge_data(lvid_type source, lvid_type target) const {
420  ASSERT_TRUE(finalized);
421  return gstore.edge_data(source, target);
422  } // end of edge_data(u,v)
423 
424  /** \brief Returns a reference to the data stored on the edge. */
425  EdgeData& edge_data(const edge_type& edge) {
426  ASSERT_TRUE(finalized);
427  return gstore.edge_data(edge.e);
428  }
429  /** \brief Returns a constant reference to the data stored on the edge. */
430  const EdgeData& edge_data(const edge_type& edge) const {
431  ASSERT_TRUE(finalized);
432  return gstore.edge_data(edge.e);
433  }
434 
435 
436 
437  /** \brief Load the local_graph from an archive */
438  void load(iarchive& arc) {
439  clear();
440  // read the vertices
441  arc >> vertices
442  >> gstore
443  >> finalized;
444  } // end of load
445 
446  /** \brief Save the local_graph to an archive */
447  void save(oarchive& arc) const {
448  // Write the number of edges and vertices
449  arc << vertices
450  << gstore
451  << finalized;
452  } // end of save
453 
454  /** swap two graphs */
455  void swap(local_graph& other) {
456  std::swap(vertices, other.vertices);
457  std::swap(gstore, other.gstore);
458  std::swap(finalized, other.finalized);
459  } // end of swap
460 
461 
462  /** \brief Load the local_graph from a file */
463  void load(const std::string& filename) {
464  std::ifstream fin(filename.c_str());
465  iarchive iarc(fin);
466  iarc >> *this;
467  fin.close();
468  } // end of load
469 
470 
471 
472  /**
473  * \brief save the local_graph to the file given by the filename
474  */
475  void save(const std::string& filename) const {
476  std::ofstream fout(filename.c_str());
477  oarchive oarc(fout);
478  oarc << *this;
479  fout.close();
480  } // end of save
481 
482 
483 
484  /**
485  * \brief save the adjacency structure to a text file.
486  *
487  * Save the adjacency structure as a text file in:
488  * src_Id, dest_Id \n
489  * src_Id, dest_Id \n
490  * format.
491  */
492  void save_adjacency(const std::string& filename) const {
493  std::ofstream fout(filename.c_str());
494  ASSERT_TRUE(fout.good());
495  for(size_t i = 0; i < num_edges(); ++i) {
496  fout << gstore.source(i) << ", " << gstore.target(i) << "\n";
497  ASSERT_TRUE(fout.good());
498  }
499  fout.close();
500  }
501 
502  /****************************************************************************
503  * Internal Functions *
504  * ---------------------- *
505  * These functions functions and types provide internal access to the *
506  * underlying local_graph representation. They should not be used unless you *
507  * *really* know what you are doing. *
508  ****************************************************************************/
509  /** \internal
510  * \brief Returns the internal id of the edge*/
511  edge_id_type edge_id(const edge_type& edge) const {
512  return gstore.edge_id(edge.e);
513  }
514 
515  /**
516  * \internal
517  * \brief Returns the number of in edges of the vertex with the given id. */
518  size_t num_in_edges(const lvid_type v) const {
519  ASSERT_TRUE(finalized);
520  return gstore.num_in_edges(v);
521  }
522 
523  /**
524  * \internal
525  * \brief Returns the number of in edges of the vertex with the given id. */
526  size_t num_out_edges(const lvid_type v) const {
527  ASSERT_TRUE(finalized);
528  return gstore.num_out_edges(v);
529  }
530 
531  /**
532  * \internal
533  * \brief Returns a list of in edges of the vertex with the given id. */
534  edge_list_type in_edges(lvid_type v) {
535  return edge_list_type(*this, gstore.in_edges(v));
536  }
537 
538  /**
539  * \internal
540  * \brief Returns a list of out edges of the vertex with the given id. */
541  edge_list_type out_edges(lvid_type v) {
542  return edge_list_type(*this, gstore.out_edges(v));
543  }
544 
545  /**
546  * \internal
547  * \brief Returns the estimated memory footprint of the local_graph. */
548  size_t estimate_sizeof() const {
549  const size_t vlist_size = sizeof(vertices) +
550  sizeof(VertexData) * vertices.capacity();
551  size_t elist_size = edges_tmp.estimate_sizeof();
552  size_t store_size = gstore.estimate_sizeof();
553  // std::cout << "local_graph: tmplist size: " << (double)elist_size/(1024*1024)
554  // << " gstoreage size: " << (double)store_size/(1024*1024)
555  // << " vdata list size: " << (double)vlist_size/(1024*1024)
556  // << std::endl;
557  return store_size + vlist_size + elist_size;
558  }
559  /** \internal
560  * \brief Returns the column index of CSR stored in the
561  * internal local_graph storage.
562  */
563  const std::vector<lvid_type>& get_out_index_storage() const {
564  return gstore.get_csr_src();
565  }
566  /** \internal
567  * \brief Returns the row index of CSC stored in the
568  * internal local_graph storage.
569  */
570  const std::vector<lvid_type>& get_in_index_storage() const {
571  return gstore.get_csc_dst();
572  }
573  /** \internal
574  * \brief Returns the row pointer of CSR stored in the
575  * internal local_graph storage.
576  */
577  const std::vector<lvid_type>& get_out_edge_storage() const {
578  return gstore.get_csr_dst();
579  }
580 
581  /** \internal
582  * \brief Returns the column pointer of CSC stored in the
583  * internal local_graph storage.
584  */
585  const std::vector<lvid_type>& get_in_edge_storage() const {
586  return gstore.get_csc_src();
587  }
588  /** \internal
589  * \brief Returns the reference of edge data list stored in the
590  * internal local_graph storage.
591  */
592  const std::vector<EdgeData> & get_edge_data_storage() const {
593  return gstore.get_edge_data();
594  }
595 
596  /** \internal
597  * \brief For debug purpose, returns the largest vertex id in the edges_tmp
598  */
599  const lvid_type maxlvid() const {
600  if (edges_tmp.size()) {
601  lvid_type max(0);
602  foreach(lvid_type i, edges_tmp.source_arr)
603  max = std::max(max, i);
604  foreach(lvid_type i, edges_tmp.target_arr)
605  max = std::max(max, i);
606  return max;
607  } else {
608  return lvid_type(-1);
609  }
610  }
611 
612  private:
613  /** Internal edge class */
614 
615 
616  // PRIVATE DATA MEMBERS ===================================================>
617  /** The vertex data is simply a vector of vertex data */
618  std::vector<VertexData> vertices;
619 
620  /** Stores the edge data and edge relationships. */
621  gstore_type gstore;
622 
623  /** The edge data is a vector of edges where each edge stores its
624  source, destination, and data. Used for temporary storage. The
625  data is transferred into CSR+CSC representation in
626  Finalize. This will be cleared after finalized.*/
627  edge_info edges_tmp;
628 
629  /** Mark whether the local_graph is finalized. Graph finalization is a
630  costly procedure but it can also dramatically improve
631  performance. */
632  bool finalized;
633 
634  }; // End of class local_graph
635 
636 
637  template<typename VertexData, typename EdgeData>
638  std::ostream& operator<<(std::ostream& out,
639  const local_graph<VertexData, EdgeData>& local_graph) {
640  for(lvid_type vid = 0; vid < local_graph.num_vertices(); ++vid) {
641  foreach(edge_id_type eid, local_graph.out_edge_ids(vid))
642  out << vid << ", " << local_graph.target(eid) << '\n';
643  }
644  return out;
645  }
646 } // end of namespace graphlab
647 
648 
649 namespace std {
650  /**
651  * Swap two graphs
652  */
653  template<typename VertexData, typename EdgeData>
654  inline void swap(graphlab::local_graph<VertexData,EdgeData>& a,
655  graphlab::local_graph<VertexData,EdgeData>& b) {
656  a.swap(b);
657  } // end of swap
658 
659 }; // end of namespace std
660 
661 #include <graphlab/macros_undef.hpp>
662 #endif
663