GraphLab: Distributed Graph-Parallel API  2.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
simple_pagerank.cpp
1 /*
2  * Copyright (c) 2009 Carnegie Mellon University.
3  * All rights reserved.
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing,
12  * software distributed under the License is distributed on an "AS
13  * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
14  * express or implied. See the License for the specific language
15  * governing permissions and limitations under the License.
16  *
17  * For more about this software visit:
18  *
19  * http://www.graphlab.ml.cmu.edu
20  *
21  */
22 
23 #include <vector>
24 #include <string>
25 #include <fstream>
26 
27 #include <graphlab.hpp>
28 // #include <graphlab/macros_def.hpp>
29 
30 // Global random reset probability
31 float RESET_PROB = 0.15;
32 
33 float TOLERANCE = 1.0E-2;
34 
35 // The vertex data is just the pagerank value (a float)
36 typedef float vertex_data_type;
37 
38 // There is no edge data in the pagerank application
39 typedef graphlab::empty edge_data_type;
40 
41 // The graph type is determined by the vertex and edge data types
43 
44 /*
45  * A simple function used by graph.transform_vertices(init_vertex);
46  * to initialize the vertes data.
47  */
48 void init_vertex(graph_type::vertex_type& vertex) { vertex.data() = 1; }
49 
50 
51 
52 /*
53  * The factorized page rank update function extends ivertex_program
54  * specifying the:
55  *
56  * 1) graph_type
57  * 2) gather_type: float (returned by the gather function). Note
58  * that the gather type is not strictly needed here since it is
59  * assumed to be the same as the vertex_data_type unless
60  * otherwise specified
61  *
62  * In addition ivertex program also takes a message type which is
63  * assumed to be empty. Since we do not need messages no message type
64  * is provided.
65  *
66  * pagerank also extends graphlab::IS_POD_TYPE (is plain old data type)
67  * which tells graphlab that the pagerank program can be serialized
68  * (converted to a byte stream) by directly reading its in memory
69  * representation. If a vertex program does not exted
70  * graphlab::IS_POD_TYPE it must implement load and save functions.
71  */
72 class pagerank :
73  public graphlab::ivertex_program<graph_type, float>,
74  public graphlab::IS_POD_TYPE {
75  float last_change;
76 public:
77  /* Gather the weighted rank of the adjacent page */
78  float gather(icontext_type& context, const vertex_type& vertex,
79  edge_type& edge) const {
80  return ((1.0 - RESET_PROB) / edge.source().num_out_edges()) *
81  edge.source().data();
82  }
83 
84  /* Use the total rank of adjacent pages to update this page */
85  void apply(icontext_type& context, vertex_type& vertex,
86  const gather_type& total) {
87  const double newval = total + RESET_PROB;
88  last_change = std::fabs(newval - vertex.data());
89  vertex.data() = newval;
90  }
91 
92  /* The scatter edges depend on whether the pagerank has converged */
93  edge_dir_type scatter_edges(icontext_type& context,
94  const vertex_type& vertex) const {
95  if (last_change > TOLERANCE) return graphlab::OUT_EDGES;
96  else return graphlab::NO_EDGES;
97  }
98 
99  /* The scatter function just signal adjacent pages */
100  void scatter(icontext_type& context, const vertex_type& vertex,
101  edge_type& edge) const {
102  context.signal(edge.target());
103  }
104 }; // end of factorized_pagerank update functor
105 
106 
107 /*
108  * We want to save the final graph so we define a write which will be
109  * used in graph.save("path/prefix", pagerank_writer()) to save the graph.
110  */
111 struct pagerank_writer {
112  std::string save_vertex(graph_type::vertex_type v) {
113  std::stringstream strm;
114  strm << v.id() << "\t" << v.data() << "\n";
115  return strm.str();
116  }
117  std::string save_edge(graph_type::edge_type e) { return ""; }
118 }; // end of pagerank writer
119 
120 
121 
122 int main(int argc, char** argv) {
123  // Initialize control plain using mpi
124  graphlab::mpi_tools::init(argc, argv);
126  global_logger().set_log_level(LOG_INFO);
127 
128  // Parse command line options -----------------------------------------------
129  graphlab::command_line_options clopts("PageRank algorithm.");
130  std::string graph_dir;
131  std::string format = "adj";
132  std::string exec_type = "synchronous";
133  clopts.attach_option("graph", graph_dir,
134  "The graph file. Required ");
135  clopts.add_positional("graph");
136  clopts.attach_option("format", format,
137  "The graph file format");
138  clopts.attach_option("engine", exec_type,
139  "The engine type synchronous or asynchronous");
140  clopts.attach_option("tol", TOLERANCE,
141  "The permissible change at convergence.");
142  std::string saveprefix;
143  clopts.attach_option("saveprefix", saveprefix,
144  "If set, will save the resultant pagerank to a "
145  "sequence of files with prefix saveprefix");
146 
147  if(!clopts.parse(argc, argv)) {
148  dc.cout() << "Error in parsing command line arguments." << std::endl;
149  return EXIT_FAILURE;
150  }
151 
152  if (graph_dir == "") {
153  dc.cout() << "Graph not specified. Cannot continue";
154  return EXIT_FAILURE;
155  }
156 
157  // Build the graph ----------------------------------------------------------
158  graph_type graph(dc, clopts);
159  dc.cout() << "Loading graph in format: "<< format << std::endl;
160  graph.load_format(graph_dir, format);
161  // must call finalize before querying the graph
162  graph.finalize();
163  dc.cout() << "#vertices: " << graph.num_vertices()
164  << " #edges:" << graph.num_edges() << std::endl;
165 
166  // Initialize the vertex data
167  graph.transform_vertices(init_vertex);
168 
169  // Running The Engine -------------------------------------------------------
170  graphlab::omni_engine<pagerank> engine(dc, graph, exec_type, clopts);
171  engine.signal_all();
172  engine.start();
173  const float runtime = engine.elapsed_seconds();
174  dc.cout() << "Finished Running engine in " << runtime
175  << " seconds." << std::endl;
176 
177  // Save the final graph -----------------------------------------------------
178  if (saveprefix != "") {
179  graph.save(saveprefix, pagerank_writer(),
180  false, // do not gzip
181  true, // save vertices
182  false); // do not save edges
183  }
184 
185  // Tear-down communication layer and quit -----------------------------------
186  graphlab::mpi_tools::finalize();
187  return EXIT_SUCCESS;
188 } // End of main
189 
190 
191