Caffe2 - C++ API
A deep learning, cross platform ML framework
convert_encoded_to_raw_leveldb.cc
1 // This script converts an image dataset to leveldb.
2 //
3 // caffe2::FLAGS_input_folder is the root folder that holds all the images, and
4 // caffe2::FLAGS_list_file should be a list of files as well as their labels, in the
5 // format as
6 // subfolder1/file1.JPEG 7
7 // ....
8 
9 #include <opencv2/opencv.hpp>
10 
11 #include <fstream> // NOLINT(readability/streams)
12 #include <memory>
13 #include <random>
14 #include <string>
15 
16 #include "caffe2/core/init.h"
17 #include "caffe2/proto/caffe2.pb.h"
18 #include "caffe2/core/logging.h"
19 #include "leveldb/db.h"
20 #include "leveldb/write_batch.h"
21 
22 CAFFE2_DEFINE_string(input_db_name, "", "The input image file name.");
23 CAFFE2_DEFINE_string(output_db_name, "", "The output training leveldb name.");
24 CAFFE2_DEFINE_bool(color, true, "If set, load images in color.");
25 CAFFE2_DEFINE_int(scale, 256,
26  "If caffe2::FLAGS_raw is set, scale all the images' shorter edge to the given "
27  "value.");
28 CAFFE2_DEFINE_bool(warp, false, "If warp is set, warp the images to square.");
29 
30 
31 namespace caffe2 {
32 
33 using std::string;
34 using std::unique_ptr;
35 
36 void ConvertToRawDataset(
37  const string& input_db_name, const string& output_db_name) {
38  // input leveldb
39  std::unique_ptr<leveldb::DB> input_db;
40  LOG(INFO) << "Opening input leveldb " << input_db_name;
41  {
42  leveldb::Options options;
43  options.create_if_missing = false;
44  leveldb::DB* db_temp;
45  leveldb::Status status = leveldb::DB::Open(
46  options, input_db_name, &db_temp);
47  CAFFE_ENFORCE(status.ok(), "Failed to open leveldb ", input_db_name, ".");
48  input_db.reset(db_temp);
49  }
50 
51  // output leveldb
52  std::unique_ptr<leveldb::DB> output_db;
53  std::unique_ptr<leveldb::WriteBatch> batch;
54  LOG(INFO) << "Opening leveldb " << output_db_name;
55  {
56  leveldb::Options options;
57  options.error_if_exists = true;
58  options.create_if_missing = true;
59  options.write_buffer_size = 268435456;
60  leveldb::DB* db_temp;
61  leveldb::Status status = leveldb::DB::Open(
62  options, output_db_name, &db_temp);
63  CAFFE_ENFORCE(
64  status.ok(),
65  "Failed to open leveldb ",
66  output_db_name,
67  ". Is it already existing?");
68  output_db.reset(db_temp);
69  }
70  batch.reset(new leveldb::WriteBatch());
71 
72  TensorProtos input_protos;
73  TensorProtos output_protos;
74  TensorProto* data = output_protos.add_protos();
75  TensorProto* label = output_protos.add_protos();
76  data->set_data_type(TensorProto::BYTE);
77  data->add_dims(0);
78  data->add_dims(0);
79  if (caffe2::FLAGS_color) {
80  data->add_dims(3);
81  }
82  string value;
83 
84  unique_ptr<leveldb::Iterator> iter;
85  iter.reset(input_db->NewIterator(leveldb::ReadOptions()));
86  iter->SeekToFirst();
87  int count = 0;
88  for (; iter->Valid(); iter->Next()) {
89  CAFFE_ENFORCE(input_protos.ParseFromString(iter->value().ToString()));
90  label->CopyFrom(input_protos.protos(1));
91  const string& encoded_image = input_protos.protos(0).string_data(0);
92  int encoded_size = encoded_image.size();
93  cv::Mat img = cv::imdecode(
94  cv::Mat(1, &encoded_size, CV_8UC1,
95  const_cast<char*>(encoded_image.data())),
96  caffe2::FLAGS_color ? CV_LOAD_IMAGE_COLOR : CV_LOAD_IMAGE_GRAYSCALE);
97  cv::Mat resized_img;
98  int scaled_width, scaled_height;
99  if (caffe2::FLAGS_warp) {
100  scaled_width = caffe2::FLAGS_scale;
101  scaled_height = caffe2::FLAGS_scale;
102  } else if (img.rows > img.cols) {
103  scaled_width = caffe2::FLAGS_scale;
104  scaled_height = static_cast<float>(img.rows) * caffe2::FLAGS_scale / img.cols;
105  } else {
106  scaled_height = caffe2::FLAGS_scale;
107  scaled_width = static_cast<float>(img.cols) * caffe2::FLAGS_scale / img.rows;
108  }
109  cv::resize(img, resized_img, cv::Size(scaled_width, scaled_height), 0, 0,
110  cv::INTER_LINEAR);
111  data->set_dims(0, scaled_height);
112  data->set_dims(1, scaled_width);
113  DCHECK(resized_img.isContinuous());
114  data->set_byte_data(resized_img.ptr(),
115  scaled_height * scaled_width * (caffe2::FLAGS_color ? 3 : 1));
116  output_protos.SerializeToString(&value);
117  // Put in db
118  batch->Put(iter->key(), value);
119  if (++count % 1000 == 0) {
120  output_db->Write(leveldb::WriteOptions(), batch.get());
121  batch.reset(new leveldb::WriteBatch());
122  LOG(INFO) << "Processed " << count << " files.";
123  }
124  }
125  // write the last batch
126  if (count % 1000 != 0) {
127  output_db->Write(leveldb::WriteOptions(), batch.get());
128  }
129  LOG(INFO) << "Processed a total of " << count << " files.";
130 }
131 
132 } // namespace caffe2
133 
134 
135 int main(int argc, char** argv) {
136  caffe2::GlobalInit(&argc, &argv);
137  caffe2::ConvertToRawDataset(
138  caffe2::FLAGS_input_db_name, caffe2::FLAGS_output_db_name);
139  return 0;
140 }
bool GlobalInit(int *pargc, char ***pargv)
Initialize the global environment of caffe2.
Definition: init.cc:15
Simple registry implementation in Caffe2 that uses static variables to register object creators durin...