Caffe2 - C++ API
A deep learning, cross platform ML framework
math.h
1 #ifndef CAFFE2_UTILS_MATH_H_
2 #define CAFFE2_UTILS_MATH_H_
3 // This is a simple translation from the old Caffe math interfaces. We aim to
4 // still keep it simple, so all platforms would be able to support it fairly
5 // easily.
6 
7 // We include the cblas header here so that we can obtain the macros from cblas.
8 extern "C" {
9 #include "caffe2/utils/cblas.h"
10 }
11 
12 #ifdef CAFFE2_USE_ACCELERATE
13 #include <Accelerate/Accelerate.h>
14 #endif // CAFFE2_USE_ACCELERATE
15 
16 #include "caffe2/core/common.h"
17 #include "caffe2/core/types.h"
18 
19 #ifndef __CUDACC__
20 #include "Eigen/Core"
21 #include "Eigen/Dense"
22 #endif
23 
24 namespace caffe2 {
25 
26 // An empty class as a placeholder for a math function that has no specific
27 // engine specified.
28 class DefaultEngine {};
29 
30 #ifndef __CUDACC__
31 // Common Eigen types that we will often use
32 template <typename T>
33 using EigenMatrixMap =
34  Eigen::Map<Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic> >;
35 template <typename T>
36 using EigenArrayMap =
37  Eigen::Map<Eigen::Array<T, Eigen::Dynamic, Eigen::Dynamic> >;
38 template <typename T>
39 using EigenVectorMap = Eigen::Map<Eigen::Matrix<T, Eigen::Dynamic, 1> >;
40 template <typename T>
41 using EigenVectorArrayMap = Eigen::Map<Eigen::Array<T, Eigen::Dynamic, 1> >;
42 template <typename T>
43 using ConstEigenMatrixMap =
44  Eigen::Map<const Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic> >;
45 template <typename T>
46 using ConstEigenArrayMap =
47  Eigen::Map<const Eigen::Array<T, Eigen::Dynamic, Eigen::Dynamic> >;
48 template <typename T>
49 using ConstEigenVectorMap =
50  Eigen::Map<const Eigen::Matrix<T, Eigen::Dynamic, 1> >;
51 template <typename T>
52 using ConstEigenVectorArrayMap =
53  Eigen::Map<const Eigen::Array<T, Eigen::Dynamic, 1> >;
54 #endif
55 
56 namespace math {
57 
58 template <typename T, class Context>
59 void Exp(const int N, const T* x, T* y, Context* context);
60 template <typename T, class Context>
61 void Log(const int N, const T* x, T* y, Context* context);
62 template <typename T, class Context>
63 void Sqr(const int N, const T* x, T* y, Context* context);
64 
65 template <typename T, class Context>
66 void Not(const int N, const T* x, T* y, Context* context);
67 
68 template <typename T, class Context>
69 void Powx(const int N, const T* a, const T b, T* y, Context* context);
70 
71 #define CAFFE2_DECLARE_BINARY_OP_BINARY_RESULT(name) \
72  template <typename T, class Context> \
73  void name(const int N, const T* a, const T* b, bool* y, Context* context); \
74  template <typename T, class Context> \
75  void name##ToRow( \
76  const int M, \
77  const int N, \
78  const T* a, \
79  const T* b, \
80  bool* y, \
81  Context* context);
82 
83 CAFFE2_DECLARE_BINARY_OP_BINARY_RESULT(LT);
84 CAFFE2_DECLARE_BINARY_OP_BINARY_RESULT(LE);
85 CAFFE2_DECLARE_BINARY_OP_BINARY_RESULT(GT);
86 CAFFE2_DECLARE_BINARY_OP_BINARY_RESULT(GE);
87 
88 CAFFE2_DECLARE_BINARY_OP_BINARY_RESULT(And);
89 CAFFE2_DECLARE_BINARY_OP_BINARY_RESULT(Or);
90 CAFFE2_DECLARE_BINARY_OP_BINARY_RESULT(Xor);
91 
92 #undef CAFFE2_DECLARE_BINARY_OP_BINARY_RESULT
93 
94 #define CAFFE2_DECLARE_BINARY_OP(name) \
95  template <typename T, class Context> \
96  void name(const int N, const T* a, const T* b, T* y, Context* context); \
97  template <typename T, class Context> \
98  void name##ToRow( \
99  const int M, \
100  const int N, \
101  const T* a, \
102  const T* b, \
103  T* y, \
104  Context* context); \
105  template <typename T, class Context> \
106  void name##ToRow( \
107  const int M, const int N, const T* x, T* y, Context* context); \
108  template <typename T, class Context> \
109  void name##ToCol( \
110  const int M, const int N, const T* x, T* y, Context* context);
111 
112 CAFFE2_DECLARE_BINARY_OP(Add);
113 CAFFE2_DECLARE_BINARY_OP(Sub);
114 CAFFE2_DECLARE_BINARY_OP(Mul);
115 CAFFE2_DECLARE_BINARY_OP(Div);
116 
117 #undef CAFFE2_DECLARE_BINARY_OP
118 
119 // Adds batch sub-tensors elementwise to output. Stripe is the stripe length
120 // and N is the number of elements to add (size of Y).
121 template <typename T, class Context>
122 void AddStripedBatch(
123  const int N,
124  const T* first,
125  T* y,
126  const int stripe,
127  const int batch,
128  Context* context);
129 
130 // Compute the row-wise max of a N*D matrix X, and write it to a N
131 // dimensional vector y.
132 template <typename T, class Context>
133 void RowwiseMax(const int N, const int D, const T* x, T* y,
134  Context* context);
135 
136 // Compute the column-wise max of a N*D matrix X, and write it to a D
137 // dimensional vector y.
138 template <typename T, class Context>
139 void ColwiseMax(const int N, const int D, const T* x, T* y,
140  Context* context);
141 
142 // Decaf gemm provides a simpler interface to the gemm functions, with the
143 // limitation that the data has to be contiguous in memory.
144 template <typename T, class Context, class Engine=DefaultEngine>
145 void Gemm(const CBLAS_TRANSPOSE TransA, const CBLAS_TRANSPOSE TransB,
146  const int M, const int N, const int K, const T alpha, const T* A,
147  const T* B, const T beta, T* C, Context* context);
148 
149 // We also provide a gemm that has explicit lda, ldb and ldc specified.
150 // In most cases you probably want to use the function above, though.
151 template <typename T, class Context, class Engine = DefaultEngine>
152 void GemmEx(
153  const CBLAS_TRANSPOSE TransA,
154  const CBLAS_TRANSPOSE TransB,
155  const int M,
156  const int N,
157  const int K,
158  const T alpha,
159  const T* A,
160  const int lda,
161  const T* B,
162  const int ldb,
163  const T beta,
164  T* C,
165  const int ldc,
166  Context* context);
167 
168 // Gemv always takes in a M*N matrix A, and depending on whether we set TransA
169 // to Trans, the output is:
170 // CblasNoTrans: x is an N dim vector and y is an M dim vector.
171 // CblasTrans: x is an M dim vector and y is an N dim vector.
172 template <typename T, class Context, class Engine=DefaultEngine>
173 void Gemv(const CBLAS_TRANSPOSE TransA, const int M, const int N,
174  const T alpha, const T* A, const T* x, const T beta,
175  T* y, Context* context);
176 
177 template <typename T, class Context>
178 void Set(const TIndex N, const T alpha, T* X, Context* context);
179 
180 template <typename T, class Context>
181 void RandUniform(const int n, const T a, const T b, T* r,
182  Context* context);
183 
184 template <typename T, class Context>
185 void RandUniformUnique(
186  const size_t n,
187  const T a,
188  const T b,
189  T* r,
190  const size_t m,
191  const T* avoid,
192  Context* context);
193 
194 template <typename T, class Context>
195 void RandGaussian(
196  const int n,
197  const T mean,
198  const T std,
199  T* r,
200  Context* context);
201 
202 // Dot matrix of vector a and b, and writes the result to a single value y.
203 template <typename T, class Context>
204 void Dot(const int N, const T* a, const T* b, T* y, Context* context);
205 
206 // Sum of vector x, and writes the result to a single value y.
207 template <typename T, class Context>
208 void Sum(const int N, const T* x, T* y, Context* context);
209 
210 // Sum of squares of vector x, and writes the result to a single value y.
211 template <typename T, class Context>
212 void SumSqr(const int N, const T* x, T* y, Context* context);
213 
214 // Select does index selection of the rows a N*D matrix x, and gives the N
215 // dimensional vector y that contains the selected data.
216 template <typename T, class Context>
217 void Select(const int N, const int D, const T* x, const int* idx, T* y,
218  Context* context);
219 
220 template <typename T, class Context>
221 void Scale(const int N, const T alpha, const T* x, T* y, Context* context);
222 
223 // Different from the Scale function above, if alpha is passed in
224 // as a pointer, we will assume that it lives on the Context device,
225 // for example on GPU.
226 template <typename T, class Context>
227 void Scale(const int N, const T* alpha, const T* x, T* y,
228  Context* context);
229 
230 template <typename T, class Context>
231 void Axpy(const int N, const T alpha, const T* x, T* y, Context* context);
232 
233 // Different from the Axpy function above, if alpha is passed in
234 // as a pointer, we will assume that it lives on the Context device,
235 // for example on GPU.
236 template <typename T, class Context>
237 void Axpy(const int N, const T* alpha, const T* x, T* y,
238  Context* context);
239 
240 template <typename T, class Context>
241 void Axpby(const int N, const T alpha, const T* x, const T b, T* y,
242  Context* context);
243 
244 template <typename T, class Context, int order>
245 void Im2colNd(
246  const T* data_img,
247  const int* im_shape,
248  const int* col_shape,
249  const int img_size,
250  const int col_size,
251  const int* kernel_shape,
252  const int* stride,
253  const int* dilation,
254  const int* pad,
255  const int N,
256  T* data_col,
257  Context* context,
258  bool accumulate_output = false);
259 
260 template <typename T, class Context, int order>
261 void Col2imNd(
262  const T* data_col,
263  const int* img_shape,
264  const int* col_shape,
265  const int img_size,
266  const int col_size,
267  const int* kernel_shape,
268  const int* stride,
269  const int* dilation,
270  const int* pad,
271  const int N,
272  T* data_img,
273  Context* context);
274 
275 template <typename T, class Context, int order>
276 void Im2col(
277  const T* data_im,
278  const int channels,
279  const int height,
280  const int width,
281  const int kernel_h,
282  const int kernel_w,
283  const int dilation_h,
284  const int dilation_w,
285  const int pad_t,
286  const int pad_l,
287  const int pad_b,
288  const int pad_r,
289  const int stride_h,
290  const int stride_w,
291  T* data_col,
292  Context* context);
293 
294 template <typename T, class Context, int order>
295 void Col2im(
296  const T* data_col,
297  const int channels,
298  const int height,
299  const int width,
300  const int patch_h,
301  const int patch_w,
302  const int dilation_h,
303  const int dilation_w,
304  const int pad_t,
305  const int pad_l,
306  const int pad_b,
307  const int pad_r,
308  const int stride_h,
309  const int stride_w,
310  T* data_im,
311  Context* context);
312 
313 // Applies a per-channel bias value to each channel of the input
314 // image. image_size is H * W
315 template <typename T, class Context>
316 void BiasCHW(
317  const T* bias,
318  const int bias_channels,
319  const int image_size,
320  T* image,
321  Context* context);
322 
323 template <class Context>
324 void CopyMatrix(const size_t item_size, const int M, const int N, const void* A,
325  const int lda, void* B, const int ldb, Context* context);
326 
327 
328 uint32_t randomNumberSeed();
329 
330 // Function uses casting from int to unsigned to compare if value of
331 // parameter a is greater or equal to zero and lower than value of
332 // parameter b. The b parameter is of type signed and is always
333 // positive,
334 // therefore its value is always lower than 0x800... where casting
335 // negative value of a parameter converts it to value higher than
336 // 0x800...
337 // The casting allows to use one condition instead of two.
338 inline bool is_a_ge_zero_and_a_lt_b(int a, int b) {
339  return static_cast<unsigned>(a) < static_cast<unsigned>(b);
340 }
341 
342 // Calculates ceil(a / b). User must be careful to ensure that there
343 // is no overflow or underflow in the calculation.
344 template <typename T>
345 inline T divUp(T a, T b) {
346  return (a + b - (T) 1) / b;
347 }
348 
349 // Rounds a up to the next highest multiple of b. User must be careful
350 // to ensure that there is no overflow or underflow in the calculation
351 // of divUp.
352 template <typename T>
353 inline T roundUp(T a, T b) {
354  return divUp<T>(a, b) * b;
355 }
356 
357 
358 } // namespace math
359 } // namespace caffe2
360 
361 #include "caffe2/utils/math-detail.h"
362 #endif // CAFFE2_UTILS_MATH_H_
Definition: types.h:57
Simple registry implementation in Caffe2 that uses static variables to register object creators durin...