Eigen  3.2.7
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
Redux.h
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2008 Gael Guennebaud <[email protected]>
5 // Copyright (C) 2006-2008 Benoit Jacob <[email protected]>
6 //
7 // This Source Code Form is subject to the terms of the Mozilla
8 // Public License v. 2.0. If a copy of the MPL was not distributed
9 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
10 
11 #ifndef EIGEN_REDUX_H
12 #define EIGEN_REDUX_H
13 
14 namespace Eigen {
15 
16 namespace internal {
17 
18 // TODO
19 // * implement other kind of vectorization
20 // * factorize code
21 
22 /***************************************************************************
23 * Part 1 : the logic deciding a strategy for vectorization and unrolling
24 ***************************************************************************/
25 
26 template<typename Func, typename Derived>
27 struct redux_traits
28 {
29 public:
30  enum {
31  PacketSize = packet_traits<typename Derived::Scalar>::size,
32  InnerMaxSize = int(Derived::IsRowMajor)
33  ? Derived::MaxColsAtCompileTime
34  : Derived::MaxRowsAtCompileTime
35  };
36 
37  enum {
38  MightVectorize = (int(Derived::Flags)&ActualPacketAccessBit)
39  && (functor_traits<Func>::PacketAccess),
40  MayLinearVectorize = MightVectorize && (int(Derived::Flags)&LinearAccessBit),
41  MaySliceVectorize = MightVectorize && int(InnerMaxSize)>=3*PacketSize
42  };
43 
44 public:
45  enum {
46  Traversal = int(MayLinearVectorize) ? int(LinearVectorizedTraversal)
47  : int(MaySliceVectorize) ? int(SliceVectorizedTraversal)
48  : int(DefaultTraversal)
49  };
50 
51 public:
52  enum {
53  Cost = ( Derived::SizeAtCompileTime == Dynamic
54  || Derived::CoeffReadCost == Dynamic
55  || (Derived::SizeAtCompileTime!=1 && functor_traits<Func>::Cost == Dynamic)
56  ) ? Dynamic
57  : Derived::SizeAtCompileTime * Derived::CoeffReadCost
58  + (Derived::SizeAtCompileTime-1) * functor_traits<Func>::Cost,
59  UnrollingLimit = EIGEN_UNROLLING_LIMIT * (int(Traversal) == int(DefaultTraversal) ? 1 : int(PacketSize))
60  };
61 
62 public:
63  enum {
64  Unrolling = Cost != Dynamic && Cost <= UnrollingLimit
65  ? CompleteUnrolling
66  : NoUnrolling
67  };
68 };
69 
70 /***************************************************************************
71 * Part 2 : unrollers
72 ***************************************************************************/
73 
74 /*** no vectorization ***/
75 
76 template<typename Func, typename Derived, int Start, int Length>
77 struct redux_novec_unroller
78 {
79  enum {
80  HalfLength = Length/2
81  };
82 
83  typedef typename Derived::Scalar Scalar;
84 
85  static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func& func)
86  {
87  return func(redux_novec_unroller<Func, Derived, Start, HalfLength>::run(mat,func),
88  redux_novec_unroller<Func, Derived, Start+HalfLength, Length-HalfLength>::run(mat,func));
89  }
90 };
91 
92 template<typename Func, typename Derived, int Start>
93 struct redux_novec_unroller<Func, Derived, Start, 1>
94 {
95  enum {
96  outer = Start / Derived::InnerSizeAtCompileTime,
97  inner = Start % Derived::InnerSizeAtCompileTime
98  };
99 
100  typedef typename Derived::Scalar Scalar;
101 
102  static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func&)
103  {
104  return mat.coeffByOuterInner(outer, inner);
105  }
106 };
107 
108 // This is actually dead code and will never be called. It is required
109 // to prevent false warnings regarding failed inlining though
110 // for 0 length run() will never be called at all.
111 template<typename Func, typename Derived, int Start>
112 struct redux_novec_unroller<Func, Derived, Start, 0>
113 {
114  typedef typename Derived::Scalar Scalar;
115  static EIGEN_STRONG_INLINE Scalar run(const Derived&, const Func&) { return Scalar(); }
116 };
117 
118 /*** vectorization ***/
119 
120 template<typename Func, typename Derived, int Start, int Length>
121 struct redux_vec_unroller
122 {
123  enum {
124  PacketSize = packet_traits<typename Derived::Scalar>::size,
125  HalfLength = Length/2
126  };
127 
128  typedef typename Derived::Scalar Scalar;
129  typedef typename packet_traits<Scalar>::type PacketScalar;
130 
131  static EIGEN_STRONG_INLINE PacketScalar run(const Derived &mat, const Func& func)
132  {
133  return func.packetOp(
134  redux_vec_unroller<Func, Derived, Start, HalfLength>::run(mat,func),
135  redux_vec_unroller<Func, Derived, Start+HalfLength, Length-HalfLength>::run(mat,func) );
136  }
137 };
138 
139 template<typename Func, typename Derived, int Start>
140 struct redux_vec_unroller<Func, Derived, Start, 1>
141 {
142  enum {
143  index = Start * packet_traits<typename Derived::Scalar>::size,
144  outer = index / int(Derived::InnerSizeAtCompileTime),
145  inner = index % int(Derived::InnerSizeAtCompileTime),
146  alignment = (Derived::Flags & AlignedBit) ? Aligned : Unaligned
147  };
148 
149  typedef typename Derived::Scalar Scalar;
150  typedef typename packet_traits<Scalar>::type PacketScalar;
151 
152  static EIGEN_STRONG_INLINE PacketScalar run(const Derived &mat, const Func&)
153  {
154  return mat.template packetByOuterInner<alignment>(outer, inner);
155  }
156 };
157 
158 /***************************************************************************
159 * Part 3 : implementation of all cases
160 ***************************************************************************/
161 
162 template<typename Func, typename Derived,
163  int Traversal = redux_traits<Func, Derived>::Traversal,
164  int Unrolling = redux_traits<Func, Derived>::Unrolling
165 >
166 struct redux_impl;
167 
168 template<typename Func, typename Derived>
169 struct redux_impl<Func, Derived, DefaultTraversal, NoUnrolling>
170 {
171  typedef typename Derived::Scalar Scalar;
172  typedef typename Derived::Index Index;
173  static EIGEN_STRONG_INLINE Scalar run(const Derived& mat, const Func& func)
174  {
175  eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix");
176  Scalar res;
177  res = mat.coeffByOuterInner(0, 0);
178  for(Index i = 1; i < mat.innerSize(); ++i)
179  res = func(res, mat.coeffByOuterInner(0, i));
180  for(Index i = 1; i < mat.outerSize(); ++i)
181  for(Index j = 0; j < mat.innerSize(); ++j)
182  res = func(res, mat.coeffByOuterInner(i, j));
183  return res;
184  }
185 };
186 
187 template<typename Func, typename Derived>
188 struct redux_impl<Func,Derived, DefaultTraversal, CompleteUnrolling>
189  : public redux_novec_unroller<Func,Derived, 0, Derived::SizeAtCompileTime>
190 {};
191 
192 template<typename Func, typename Derived>
193 struct redux_impl<Func, Derived, LinearVectorizedTraversal, NoUnrolling>
194 {
195  typedef typename Derived::Scalar Scalar;
196  typedef typename packet_traits<Scalar>::type PacketScalar;
197  typedef typename Derived::Index Index;
198 
199  static Scalar run(const Derived& mat, const Func& func)
200  {
201  const Index size = mat.size();
202  eigen_assert(size && "you are using an empty matrix");
203  const Index packetSize = packet_traits<Scalar>::size;
204  const Index alignedStart = internal::first_aligned(mat);
205  enum {
206  alignment = bool(Derived::Flags & DirectAccessBit) || bool(Derived::Flags & AlignedBit)
207  ? Aligned : Unaligned
208  };
209  const Index alignedSize2 = ((size-alignedStart)/(2*packetSize))*(2*packetSize);
210  const Index alignedSize = ((size-alignedStart)/(packetSize))*(packetSize);
211  const Index alignedEnd2 = alignedStart + alignedSize2;
212  const Index alignedEnd = alignedStart + alignedSize;
213  Scalar res;
214  if(alignedSize)
215  {
216  PacketScalar packet_res0 = mat.template packet<alignment>(alignedStart);
217  if(alignedSize>packetSize) // we have at least two packets to partly unroll the loop
218  {
219  PacketScalar packet_res1 = mat.template packet<alignment>(alignedStart+packetSize);
220  for(Index index = alignedStart + 2*packetSize; index < alignedEnd2; index += 2*packetSize)
221  {
222  packet_res0 = func.packetOp(packet_res0, mat.template packet<alignment>(index));
223  packet_res1 = func.packetOp(packet_res1, mat.template packet<alignment>(index+packetSize));
224  }
225 
226  packet_res0 = func.packetOp(packet_res0,packet_res1);
227  if(alignedEnd>alignedEnd2)
228  packet_res0 = func.packetOp(packet_res0, mat.template packet<alignment>(alignedEnd2));
229  }
230  res = func.predux(packet_res0);
231 
232  for(Index index = 0; index < alignedStart; ++index)
233  res = func(res,mat.coeff(index));
234 
235  for(Index index = alignedEnd; index < size; ++index)
236  res = func(res,mat.coeff(index));
237  }
238  else // too small to vectorize anything.
239  // since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize.
240  {
241  res = mat.coeff(0);
242  for(Index index = 1; index < size; ++index)
243  res = func(res,mat.coeff(index));
244  }
245 
246  return res;
247  }
248 };
249 
250 // NOTE: for SliceVectorizedTraversal we simply bypass unrolling
251 template<typename Func, typename Derived, int Unrolling>
252 struct redux_impl<Func, Derived, SliceVectorizedTraversal, Unrolling>
253 {
254  typedef typename Derived::Scalar Scalar;
255  typedef typename packet_traits<Scalar>::type PacketScalar;
256  typedef typename Derived::Index Index;
257 
258  static Scalar run(const Derived& mat, const Func& func)
259  {
260  eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix");
261  const Index innerSize = mat.innerSize();
262  const Index outerSize = mat.outerSize();
263  enum {
264  packetSize = packet_traits<Scalar>::size
265  };
266  const Index packetedInnerSize = ((innerSize)/packetSize)*packetSize;
267  Scalar res;
268  if(packetedInnerSize)
269  {
270  PacketScalar packet_res = mat.template packet<Unaligned>(0,0);
271  for(Index j=0; j<outerSize; ++j)
272  for(Index i=(j==0?packetSize:0); i<packetedInnerSize; i+=Index(packetSize))
273  packet_res = func.packetOp(packet_res, mat.template packetByOuterInner<Unaligned>(j,i));
274 
275  res = func.predux(packet_res);
276  for(Index j=0; j<outerSize; ++j)
277  for(Index i=packetedInnerSize; i<innerSize; ++i)
278  res = func(res, mat.coeffByOuterInner(j,i));
279  }
280  else // too small to vectorize anything.
281  // since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize.
282  {
283  res = redux_impl<Func, Derived, DefaultTraversal, NoUnrolling>::run(mat, func);
284  }
285 
286  return res;
287  }
288 };
289 
290 template<typename Func, typename Derived>
291 struct redux_impl<Func, Derived, LinearVectorizedTraversal, CompleteUnrolling>
292 {
293  typedef typename Derived::Scalar Scalar;
294  typedef typename packet_traits<Scalar>::type PacketScalar;
295  enum {
296  PacketSize = packet_traits<Scalar>::size,
297  Size = Derived::SizeAtCompileTime,
298  VectorizedSize = (Size / PacketSize) * PacketSize
299  };
300  static EIGEN_STRONG_INLINE Scalar run(const Derived& mat, const Func& func)
301  {
302  eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix");
303  Scalar res = func.predux(redux_vec_unroller<Func, Derived, 0, Size / PacketSize>::run(mat,func));
304  if (VectorizedSize != Size)
305  res = func(res,redux_novec_unroller<Func, Derived, VectorizedSize, Size-VectorizedSize>::run(mat,func));
306  return res;
307  }
308 };
309 
310 } // end namespace internal
311 
312 /***************************************************************************
313 * Part 4 : public API
314 ***************************************************************************/
315 
316 
324 template<typename Derived>
325 template<typename Func>
326 EIGEN_STRONG_INLINE typename internal::result_of<Func(typename internal::traits<Derived>::Scalar)>::type
327 DenseBase<Derived>::redux(const Func& func) const
328 {
329  typedef typename internal::remove_all<typename Derived::Nested>::type ThisNested;
330  return internal::redux_impl<Func, ThisNested>
331  ::run(derived(), func);
332 }
333 
337 template<typename Derived>
338 EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
340 {
341  return this->redux(Eigen::internal::scalar_min_op<Scalar>());
342 }
343 
347 template<typename Derived>
348 EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
350 {
351  return this->redux(Eigen::internal::scalar_max_op<Scalar>());
352 }
353 
358 template<typename Derived>
359 EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
361 {
362  if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0))
363  return Scalar(0);
364  return this->redux(Eigen::internal::scalar_sum_op<Scalar>());
365 }
366 
371 template<typename Derived>
372 EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
374 {
375  return Scalar(this->redux(Eigen::internal::scalar_sum_op<Scalar>())) / Scalar(this->size());
376 }
377 
385 template<typename Derived>
386 EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
388 {
389  if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0))
390  return Scalar(1);
391  return this->redux(Eigen::internal::scalar_product_op<Scalar>());
392 }
393 
400 template<typename Derived>
401 EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
403 {
404  return derived().diagonal().sum();
405 }
406 
407 } // end namespace Eigen
408 
409 #endif // EIGEN_REDUX_H
Scalar mean() const
Definition: Redux.h:373
internal::traits< Derived >::Scalar minCoeff() const
Definition: Redux.h:339
Scalar prod() const
Definition: Redux.h:387
const int Dynamic
Definition: Constants.h:21
internal::traits< Derived >::Scalar maxCoeff() const
Definition: Redux.h:349
Base class for all dense matrices, vectors, and arrays.
Definition: DenseBase.h:41
Definition: Constants.h:194
Scalar trace() const
Definition: Redux.h:402
const unsigned int ActualPacketAccessBit
Definition: Constants.h:92
Scalar sum() const
Definition: Redux.h:360
const unsigned int LinearAccessBit
Definition: Constants.h:117
Definition: Constants.h:192
const unsigned int DirectAccessBit
Definition: Constants.h:142
const unsigned int AlignedBit
Definition: Constants.h:147