GraphLab: Distributed Graph-Parallel API  2.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
dynamic_local_graph.hpp
1 /**
2  * Copyright (c) 2009 Carnegie Mellon University.
3  * All rights reserved.
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing,
12  * software distributed under the License is distributed on an "AS
13  * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
14  * express or implied. See the License for the specific language
15  * governing permissions and limitations under the License.
16  *
17  * For more about this software visit:
18  *
19  * http://www.graphlab.ml.cmu.edu
20  *
21  */
22 
23 #ifndef GRAPHLAB_LOCAL_GRAPH_HPP
24 #define GRAPHLAB_LOCAL_GRAPH_HPP
25 
26 
27 #include <cmath>
28 
29 #include <string>
30 #include <list>
31 #include <vector>
32 #include <set>
33 #include <map>
34 
35 #include <queue>
36 #include <algorithm>
37 #include <functional>
38 #include <fstream>
39 
40 #include <boost/bind.hpp>
41 #include <boost/unordered_set.hpp>
42 #include <boost/type_traits.hpp>
43 #include <boost/typeof/typeof.hpp>
44 #include <boost/iterator/transform_iterator.hpp>
45 
46 #include <graphlab/graph/graph_basic_types.hpp>
47 
49 #include <graphlab/logger/assertions.hpp>
50 
51 #include <graphlab/serialization/iarchive.hpp>
52 #include <graphlab/serialization/oarchive.hpp>
53 
54 #include <graphlab/util/random.hpp>
55 #include <graphlab/graph/graph_storage.hpp>
56 #include <graphlab/macros_def.hpp>
57 
58 
59 
60 namespace graphlab {
61 
62 
63  template<typename VertexData, typename EdgeData>
64  class json_parser;
65 
66  template<typename VertexData, typename EdgeData>
67  class local_graph {
68 
69 
70  /** \internal
71  * \brief The type of the graph structure storage of the local_graph. */
72  typedef graph_storage<VertexData, EdgeData> gstore_type;
73  public:
74 
75  /** The type of the vertex data stored in the local_graph. */
76  typedef VertexData vertex_data_type;
77 
78  /** The type of the edge data stored in the local_graph. */
79  typedef EdgeData edge_data_type;
80 
81  typedef typename gstore_type::edge_info edge_info;
82 
85 
86  friend class json_parser<VertexData, EdgeData>;
87 
88 
89  struct edge_type;
90  struct vertex_type;
91  struct edge_list_type;
92 
93  /** Edge object which provides access to the edge data
94  * and information about it.
95  */
96  struct edge_type {
97  local_graph& lgraph_ref;
98  typename gstore_type::edge_type e;
99  edge_type(local_graph& lgraph_ref,
100  typename gstore_type::edge_type e) :
101  lgraph_ref(lgraph_ref),e(e) { }
102 
103  /// \brief Returns a constant reference to the data on the edge.
104  const edge_data_type& data() const {
105  return lgraph_ref.gstore.edge_data(e);
106  }
107  /// \brief Returns a reference to the data on the edge.
108  edge_data_type& data() {
109  return lgraph_ref.gstore.edge_data(e);
110  }
111  /// \brief Returns the source vertex of the edge.
112  vertex_type source() const {
113  return vertex_type(lgraph_ref, e.source());
114  }
115  /// \brief Returns the target vertex of the edge.
116  vertex_type target() const {
117  return vertex_type(lgraph_ref, e.target());
118  }
119  /**
120  * \brief Returns the id of the edge.*/
121  edge_id_type id() const {
122  return lgraph_ref.gstore.edge_id(e);
123  }
124  };
125 
126  /** Vertex object which provides access to the vertex data
127  * and information about it.
128  */
129  struct vertex_type {
130  local_graph& lgraph_ref;
131  lvid_type vid;
132  vertex_type(local_graph& lgraph_ref, lvid_type vid):lgraph_ref(lgraph_ref),vid(vid) { }
133 
134  /// \brief Returns a constant reference to the data on the vertex.
135  const vertex_data_type& data() const {
136  return lgraph_ref.vertex_data(vid);
137  }
138  /// \brief Returns a reference to the data on the vertex.
139  vertex_data_type& data() {
140  return lgraph_ref.vertex_data(vid);
141  }
142  /// \brief Returns the number of in edges of the vertex.
143  size_t num_in_edges() const {
144  return lgraph_ref.num_in_edges(vid);
145  }
146  /// \brief Returns the number of out edges of the vertex.
147  size_t num_out_edges() const {
148  return lgraph_ref.num_out_edges(vid);
149  }
150  /// \brief Returns the ID of the vertex.
151  lvid_type id() const {
152  return vid;
153  }
154  /// \brief Returns a list of in edges.
156  return edge_list_type(lgraph_ref, lgraph_ref.gstore.in_edges(vid));
157  }
158  /// \brief Returns a list of out edges.
160  return edge_list_type(lgraph_ref, lgraph_ref.gstore.out_edges(vid));
161  }
162  };
163 
164  struct make_edge_type_functor {
165  typedef typename gstore_type::edge_type argument_type;
166  typedef edge_type result_type;
167  local_graph& lgraph_ref;
168  make_edge_type_functor(local_graph& lgraph_ref):lgraph_ref(lgraph_ref) { }
169  result_type operator() (const argument_type et) const {
170  return edge_type(lgraph_ref, et);
171  }
172  };
173 
174  /** \brief Represents an iteratable list of edge_types. */
175  struct edge_list_type {
176  make_edge_type_functor me_functor;
177  typename gstore_type::edge_list elist;
178  typedef boost::transform_iterator<make_edge_type_functor, typename gstore_type::edge_list::iterator> iterator;
179  typedef iterator const_iterator;
180 
181  edge_list_type(local_graph& lgraph_ref, typename gstore_type::edge_list elist): me_functor(lgraph_ref), elist(elist) { }
182  /// \brief Returns the size of the edge list.
183  size_t size() const { return elist.size(); }
184  /// \brief Random access to the list elements.
185  edge_type operator[](size_t i) const {return me_functor(elist[i]);}
186  /// \brief Returns an iterator to the beginning of the list.
187  iterator begin() const { return
188  boost::make_transform_iterator(elist.begin(), me_functor); }
189  /// \brief Returns an iterator to the end of the list.
190  iterator end() const { return
191  boost::make_transform_iterator(elist.end(), me_functor); }
192  bool empty() const { return elist.empty(); }
193  }; // end of class edge_list.
194 
195 
196 
197 
198  public:
199 
200  // CONSTRUCTORS ============================================================>
201 
202  /** Create an empty local_graph. */
203  local_graph() : finalized(false) { }
204 
205  /** Create a local_graph with nverts vertices. */
206  local_graph(size_t nverts) :
207  vertices(nverts),
208  finalized(false) { }
209 
210  // METHODS =================================================================>
211 
212  /**
213  * \brief Resets the local_graph state.
214  */
215  void clear() {
216  finalized = false;
217  gstore.clear();
218  vertices.clear();
219  edges_tmp.clear();
220  }
221 
222  /**
223  * \brief Reset the local_graph state and free up the reserved memory.
224  */
225  void clear_reserve() {
226  clear();
227  edges_tmp.clear();
228  std::vector<VertexData>().swap(vertices);
229  gstore.clear_reserve();
230  }
231 
232 
233  /**
234  * \brief Finalize the local_graph data structure by
235  * sorting edges to maximize the efficiency of graphlab.
236  * This function takes O(|V|log(degree)) time and will
237  * fail if there are any duplicate edges.
238  * Detail implementation depends on the type of graph_storage.
239  * This is also automatically invoked by the engine at start.
240  */
241  void finalize() {
242  if(finalized) return;
243  graphlab::timer mytimer; mytimer.start();
244  gstore.finalize(vertices.size(), edges_tmp);
245  logstream(LOG_INFO) << "Graph finalized in " << mytimer.current_time()
246  << " secs" << std::endl;
247  finalized = true;
248  } // End of finalize
249 
250  /** \brief Get the number of vertices */
251  size_t num_vertices() const {
252  return vertices.size();
253  } // end of num vertices
254 
255  /** \brief Get the number of vertices local to this machine */
256  size_t local_vertices() const {
257  return vertices.size();
258  } // end of num vertices
259 
260  /** \brief Get the number of edges */
261  size_t num_edges() const {
262  if (finalized) {
263  return gstore.edge_size();
264  } else {
265  return edges_tmp.size();
266  }
267  } // end of num edges
268 
269  /** \brief Finds an edge. Returns an empty edge if not exists. */
270  edge_type find(const lvid_type source,
271  const lvid_type target) const {
272  return gstore.find(source, target);
273  } // end of find
274 
275  /** \brief Finds the reverse of an edge. Returns an empty edge if not exists. */
276  edge_type reverse_edge(const edge_type& edge) const {
277  return gstore.find(edge.target(), edge.source());
278  }
279 
280 
281  /**
282  * \brief Creates a vertex containing the vertex data and returns the id
283  * of the new vertex id. Vertex ids are assigned in increasing order with
284  * the first vertex having id 0.
285  */
286  void add_vertex(lvid_type vid,
287  const VertexData& vdata = VertexData() ) {
288  // logstream(LOG_INFO)
289  // << "Attempting add vertex to a finalized local_graph." << std::endl;
290  // // ASSERT_MSG(false, "Add vertex to a finalized local_graph.");
291  if(vid >= vertices.size()) {
292  // Enable capacity doubling if resizing beyond capacity
293  if(vid >= vertices.capacity()) {
294  const size_t new_size = std::max(2 * vertices.capacity(),
295  size_t(vid));
296  vertices.reserve(new_size);
297  }
298  vertices.resize(vid+1);
299  }
300  vertices[vid] = vdata;
301  } // End of add vertex;
302 
303  void reserve(size_t num_vertices) {
304  ASSERT_GE(num_vertices, vertices.size());
305  vertices.reserve(num_vertices);
306  }
307 
308  /**
309  * \brief Add additional vertices up to provided num_vertices. This will
310  * fail if resizing down.
311  */
312  void resize(size_t num_vertices ) {
313  ASSERT_GE(num_vertices, vertices.size());
314  vertices.resize(num_vertices);
315  } // End of resize
316 
317  void reserve_edge_space(size_t n) {
318  edges_tmp.reserve_edge_space(n);
319  }
320  /**
321  * \brief Creates an edge connecting vertex source to vertex target. Any
322  * existing data will be cleared. Should not be called after finalization.
323  */
324  edge_id_type add_edge(lvid_type source, lvid_type target,
325  const EdgeData& edata = EdgeData()) {
326  if (finalized) {
327  logstream(LOG_FATAL)
328  << "Attempting add edge to a finalized local_graph." << std::endl;
329  ASSERT_MSG(false, "Add edge to a finalized local_graph.");
330  }
331 
332  if(source == target) {
333  logstream(LOG_FATAL)
334  << "Attempting to add self edge (" << source << " -> " << target << "). "
335  << "This operation is not permitted in GraphLab!" << std::endl;
336  ASSERT_MSG(source != target, "Attempting to add self edge!");
337  }
338  if(source >= vertices.size() || target >= vertices.size())
339  add_vertex(std::max(source, target));
340 
341  // Add the edge to the set of edge data (this copies the edata)
342  edges_tmp.add_edge(source, target, edata);
343 
344  // This is not the final edge_id, so we always return 0.
345  return 0;
346  } // End of add edge
347 
348  /**
349  * \brief Add edges in block.
350  */
351  void add_edges(const std::vector<lvid_type>& src_arr,
352  const std::vector<lvid_type>& dst_arr,
353  const std::vector<EdgeData>& edata_arr) {
354  ASSERT_TRUE((src_arr.size() == dst_arr.size())
355  && (src_arr.size() == edata_arr.size()));
356  if (finalized) {
357  logstream(LOG_FATAL)
358  << "Attempting add edges to a finalized local_graph." << std::endl;
359  }
360 
361  for (size_t i = 0; i < src_arr.size(); ++i) {
362  lvid_type source = src_arr[i];
363  lvid_type target = dst_arr[i];
364  if ( source >= vertices.size()
365  || target >= vertices.size() ) {
366  logstream(LOG_FATAL)
367  << "Attempting add_edge (" << source
368  << " -> " << target
369  << ") when there are only " << vertices.size()
370  << " vertices" << std::endl;
371  ASSERT_MSG(source < vertices.size(), "Invalid source vertex!");
372  ASSERT_MSG(target < vertices.size(), "Invalid target vertex!");
373  }
374 
375  if(source == target) {
376  logstream(LOG_FATAL)
377  << "Attempting to add self edge (" << source << " -> " << target << "). "
378  << "This operation is not permitted in GraphLab!" << std::endl;
379  ASSERT_MSG(source != target, "Attempting to add self edge!");
380  }
381  }
382  edges_tmp.add_block_edges(src_arr, dst_arr, edata_arr);
383  } // End of add block edges
384 
385 
386  /** \brief Returns a vertex of given ID. */
387  vertex_type vertex(lvid_type vid) {
388  ASSERT_LT(vid, vertices.size());
389  return vertex_type(*this, vid);
390  }
391 
392  /** \brief Returns a vertex of given ID. */
393  const vertex_type vertex(lvid_type vid) const {
394  ASSERT_LT(vid, vertices.size());
395  return vertex_type(*this, vid);
396  }
397 
398  /** \brief Returns a reference to the data stored on the vertex v. */
399  VertexData& vertex_data(lvid_type v) {
400  ASSERT_LT(v, vertices.size());
401  return vertices[v];
402  } // end of data(v)
403 
404  /** \brief Returns a constant reference to the data stored on the vertex v. */
405  const VertexData& vertex_data(lvid_type v) const {
406  ASSERT_LT(v, vertices.size());
407  return vertices[v];
408  } // end of data(v)
409 
410  /** \brief Returns a reference to the data stored on the edge source->target. */
411  EdgeData& edge_data(lvid_type source, lvid_type target) {
412  ASSERT_TRUE(finalized);
413  return gstore.edge_data(source, target);
414  } // end of edge_data(u,v)
415 
416  /** \brief Returns a constant reference to the data stored on the
417  edge source->target. */
418  const EdgeData& edge_data(lvid_type source, lvid_type target) const {
419  ASSERT_TRUE(finalized);
420  return gstore.edge_data(source, target);
421  } // end of edge_data(u,v)
422 
423  /** \brief Returns a reference to the data stored on the edge. */
424  EdgeData& edge_data(const edge_type& edge) {
425  ASSERT_TRUE(finalized);
426  return gstore.edge_data(edge.e);
427  }
428  /** \brief Returns a constant reference to the data stored on the edge. */
429  const EdgeData& edge_data(const edge_type& edge) const {
430  ASSERT_TRUE(finalized);
431  return gstore.edge_data(edge.e);
432  }
433 
434 
435 
436  /** \brief Load the local_graph from an archive */
437  void load(iarchive& arc) {
438  clear();
439  // read the vertices
440  arc >> vertices
441  >> gstore
442  >> finalized;
443  } // end of load
444 
445  /** \brief Save the local_graph to an archive */
446  void save(oarchive& arc) const {
447  // Write the number of edges and vertices
448  arc << vertices
449  << gstore
450  << finalized;
451  } // end of save
452 
453  /** swap two graphs */
454  void swap(local_graph& other) {
455  std::swap(vertices, other.vertices);
456  std::swap(gstore, other.gstore);
457  std::swap(finalized, other.finalized);
458  } // end of swap
459 
460 
461  /** \brief Load the local_graph from a file */
462  void load(const std::string& filename) {
463  std::ifstream fin(filename.c_str());
464  iarchive iarc(fin);
465  iarc >> *this;
466  fin.close();
467  } // end of load
468 
469 
470 
471  /**
472  * \brief save the local_graph to the file given by the filename
473  */
474  void save(const std::string& filename) const {
475  std::ofstream fout(filename.c_str());
476  oarchive oarc(fout);
477  oarc << *this;
478  fout.close();
479  } // end of save
480 
481 
482 
483  /**
484  * \brief save the adjacency structure to a text file.
485  *
486  * Save the adjacency structure as a text file in:
487  * src_Id, dest_Id \n
488  * src_Id, dest_Id \n
489  * format.
490  */
491  void save_adjacency(const std::string& filename) const {
492  std::ofstream fout(filename.c_str());
493  ASSERT_TRUE(fout.good());
494  for(size_t i = 0; i < num_edges(); ++i) {
495  fout << gstore.source(i) << ", " << gstore.target(i) << "\n";
496  ASSERT_TRUE(fout.good());
497  }
498  fout.close();
499  }
500 
501  /****************************************************************************
502  * Internal Functions *
503  * ---------------------- *
504  * These functions functions and types provide internal access to the *
505  * underlying local_graph representation. They should not be used unless you *
506  * *really* know what you are doing. *
507  ****************************************************************************/
508  /** \internal
509  * \brief Returns the internal id of the edge*/
510  edge_id_type edge_id(const edge_type& edge) const {
511  return gstore.edge_id(edge.e);
512  }
513 
514  /**
515  * \internal
516  * \brief Returns the number of in edges of the vertex with the given id. */
517  size_t num_in_edges(const lvid_type v) const {
518  ASSERT_TRUE(finalized);
519  return gstore.num_in_edges(v);
520  }
521 
522  /**
523  * \internal
524  * \brief Returns the number of in edges of the vertex with the given id. */
525  size_t num_out_edges(const lvid_type v) const {
526  ASSERT_TRUE(finalized);
527  return gstore.num_out_edges(v);
528  }
529 
530  /**
531  * \internal
532  * \brief Returns a list of in edges of the vertex with the given id. */
533  edge_list_type in_edges(lvid_type v) {
534  return edge_list_type(*this, gstore.in_edges(v));
535  }
536 
537  /**
538  * \internal
539  * \brief Returns a list of out edges of the vertex with the given id. */
540  edge_list_type out_edges(lvid_type v) {
541  return edge_list_type(*this, gstore.out_edges(v));
542  }
543 
544  /**
545  * \internal
546  * \brief Returns the estimated memory footprint of the local_graph. */
547  size_t estimate_sizeof() const {
548  const size_t vlist_size = sizeof(vertices) +
549  sizeof(VertexData) * vertices.capacity();
550  size_t elist_size = edges_tmp.estimate_sizeof();
551  size_t store_size = gstore.estimate_sizeof();
552  // std::cout << "local_graph: tmplist size: " << (double)elist_size/(1024*1024)
553  // << " gstoreage size: " << (double)store_size/(1024*1024)
554  // << " vdata list size: " << (double)vlist_size/(1024*1024)
555  // << std::endl;
556  return store_size + vlist_size + elist_size;
557  }
558  /** \internal
559  * \brief Returns the column index of CSR stored in the
560  * internal local_graph storage.
561  */
562  const std::vector<lvid_type>& get_out_index_storage() const {
563  return gstore.get_csr_src();
564  }
565  /** \internal
566  * \brief Returns the row index of CSC stored in the
567  * internal local_graph storage.
568  */
569  const std::vector<lvid_type>& get_in_index_storage() const {
570  return gstore.get_csc_dst();
571  }
572  /** \internal
573  * \brief Returns the row pointer of CSR stored in the
574  * internal local_graph storage.
575  */
576  const std::vector<lvid_type>& get_out_edge_storage() const {
577  return gstore.get_csr_dst();
578  }
579 
580  /** \internal
581  * \brief Returns the column pointer of CSC stored in the
582  * internal local_graph storage.
583  */
584  const std::vector<lvid_type>& get_in_edge_storage() const {
585  return gstore.get_csc_src();
586  }
587  /** \internal
588  * \brief Returns the reference of edge data list stored in the
589  * internal local_graph storage.
590  */
591  const std::vector<EdgeData> & get_edge_data_storage() const {
592  return gstore.get_edge_data();
593  }
594 
595  /** \internal
596  * \brief For debug purpose, returns the largest vertex id in the edges_tmp
597  */
598  const lvid_type maxlvid() const {
599  if (edges_tmp.size()) {
600  lvid_type max(0);
601  foreach(lvid_type i, edges_tmp.source_arr)
602  max = std::max(max, i);
603  foreach(lvid_type i, edges_tmp.target_arr)
604  max = std::max(max, i);
605  return max;
606  } else {
607  return lvid_type(-1);
608  }
609  }
610 
611  private:
612  /** Internal edge class */
613 
614 
615  // PRIVATE DATA MEMBERS ===================================================>
616  /** The vertex data is simply a vector of vertex data */
617  std::vector<VertexData> vertices;
618 
619  /** Stores the edge data and edge relationships. */
620  gstore_type gstore;
621 
622  /** The edge data is a vector of edges where each edge stores its
623  source, destination, and data. Used for temporary storage. The
624  data is transferred into CSR+CSC representation in
625  Finalize. This will be cleared after finalized.*/
626  edge_info edges_tmp;
627 
628  /** Mark whether the local_graph is finalized. Graph finalization is a
629  costly procedure but it can also dramatically improve
630  performance. */
631  bool finalized;
632 
633  }; // End of class local_graph
634 
635 
636  template<typename VertexData, typename EdgeData>
637  std::ostream& operator<<(std::ostream& out,
638  const local_graph<VertexData, EdgeData>& local_graph) {
639  for(lvid_type vid = 0; vid < local_graph.num_vertices(); ++vid) {
640  foreach(edge_id_type eid, local_graph.out_edge_ids(vid))
641  out << vid << ", " << local_graph.target(eid) << '\n';
642  }
643  return out;
644  }
645 } // end of namespace graphlab
646 
647 
648 namespace std {
649  /**
650  * Swap two graphs
651  */
652  template<typename VertexData, typename EdgeData>
653  inline void swap(graphlab::local_graph<VertexData,EdgeData>& a,
654  graphlab::local_graph<VertexData,EdgeData>& b) {
655  a.swap(b);
656  } // end of swap
657 
658 }; // end of namespace std
659 
660 #include <graphlab/macros_undef.hpp>
661 #endif
662