Eigen  3.2.7
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
CoeffBasedProduct.h
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2006-2008 Benoit Jacob <[email protected]>
5 // Copyright (C) 2008-2010 Gael Guennebaud <[email protected]>
6 //
7 // This Source Code Form is subject to the terms of the Mozilla
8 // Public License v. 2.0. If a copy of the MPL was not distributed
9 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
10 
11 #ifndef EIGEN_COEFFBASED_PRODUCT_H
12 #define EIGEN_COEFFBASED_PRODUCT_H
13 
14 namespace Eigen {
15 
16 namespace internal {
17 
18 /*********************************************************************************
19 * Coefficient based product implementation.
20 * It is designed for the following use cases:
21 * - small fixed sizes
22 * - lazy products
23 *********************************************************************************/
24 
25 /* Since the all the dimensions of the product are small, here we can rely
26  * on the generic Assign mechanism to evaluate the product per coeff (or packet).
27  *
28  * Note that here the inner-loops should always be unrolled.
29  */
30 
31 template<int Traversal, int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
32 struct product_coeff_impl;
33 
34 template<int StorageOrder, int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
35 struct product_packet_impl;
36 
37 template<typename LhsNested, typename RhsNested, int NestingFlags>
38 struct traits<CoeffBasedProduct<LhsNested,RhsNested,NestingFlags> >
39 {
40  typedef MatrixXpr XprKind;
41  typedef typename remove_all<LhsNested>::type _LhsNested;
42  typedef typename remove_all<RhsNested>::type _RhsNested;
43  typedef typename scalar_product_traits<typename _LhsNested::Scalar, typename _RhsNested::Scalar>::ReturnType Scalar;
44  typedef typename promote_storage_type<typename traits<_LhsNested>::StorageKind,
45  typename traits<_RhsNested>::StorageKind>::ret StorageKind;
46  typedef typename promote_index_type<typename traits<_LhsNested>::Index,
47  typename traits<_RhsNested>::Index>::type Index;
48 
49  enum {
50  LhsCoeffReadCost = _LhsNested::CoeffReadCost,
51  RhsCoeffReadCost = _RhsNested::CoeffReadCost,
52  LhsFlags = _LhsNested::Flags,
53  RhsFlags = _RhsNested::Flags,
54 
55  RowsAtCompileTime = _LhsNested::RowsAtCompileTime,
56  ColsAtCompileTime = _RhsNested::ColsAtCompileTime,
57  InnerSize = EIGEN_SIZE_MIN_PREFER_FIXED(_LhsNested::ColsAtCompileTime, _RhsNested::RowsAtCompileTime),
58 
59  MaxRowsAtCompileTime = _LhsNested::MaxRowsAtCompileTime,
60  MaxColsAtCompileTime = _RhsNested::MaxColsAtCompileTime,
61 
62  LhsRowMajor = LhsFlags & RowMajorBit,
63  RhsRowMajor = RhsFlags & RowMajorBit,
64 
65  SameType = is_same<typename _LhsNested::Scalar,typename _RhsNested::Scalar>::value,
66 
67  CanVectorizeRhs = RhsRowMajor && (RhsFlags & PacketAccessBit)
68  && (ColsAtCompileTime == Dynamic
69  || ( (ColsAtCompileTime % packet_traits<Scalar>::size) == 0
70  && (RhsFlags&AlignedBit)
71  )
72  ),
73 
74  CanVectorizeLhs = (!LhsRowMajor) && (LhsFlags & PacketAccessBit)
75  && (RowsAtCompileTime == Dynamic
76  || ( (RowsAtCompileTime % packet_traits<Scalar>::size) == 0
77  && (LhsFlags&AlignedBit)
78  )
79  ),
80 
81  EvalToRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1
82  : (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0
83  : (RhsRowMajor && !CanVectorizeLhs),
84 
85  Flags = ((unsigned int)(LhsFlags | RhsFlags) & HereditaryBits & ~RowMajorBit)
86  | (EvalToRowMajor ? RowMajorBit : 0)
87  | NestingFlags
88  | (LhsFlags & RhsFlags & AlignedBit)
89  // TODO enable vectorization for mixed types
90  | (SameType && (CanVectorizeLhs || CanVectorizeRhs) ? PacketAccessBit : 0),
91 
92  CoeffReadCost = InnerSize == Dynamic ? Dynamic
93  : InnerSize == 0 ? 0
94  : InnerSize * (NumTraits<Scalar>::MulCost + LhsCoeffReadCost + RhsCoeffReadCost)
95  + (InnerSize - 1) * NumTraits<Scalar>::AddCost,
96 
97  /* CanVectorizeInner deserves special explanation. It does not affect the product flags. It is not used outside
98  * of Product. If the Product itself is not a packet-access expression, there is still a chance that the inner
99  * loop of the product might be vectorized. This is the meaning of CanVectorizeInner. Since it doesn't affect
100  * the Flags, it is safe to make this value depend on ActualPacketAccessBit, that doesn't affect the ABI.
101  */
102  CanVectorizeInner = SameType
103  && LhsRowMajor
104  && (!RhsRowMajor)
105  && (LhsFlags & RhsFlags & ActualPacketAccessBit)
106  && (LhsFlags & RhsFlags & AlignedBit)
107  && (InnerSize % packet_traits<Scalar>::size == 0)
108  };
109 };
110 
111 } // end namespace internal
112 
113 template<typename LhsNested, typename RhsNested, int NestingFlags>
114 class CoeffBasedProduct
115  : internal::no_assignment_operator,
116  public MatrixBase<CoeffBasedProduct<LhsNested, RhsNested, NestingFlags> >
117 {
118  public:
119 
120  typedef MatrixBase<CoeffBasedProduct> Base;
121  EIGEN_DENSE_PUBLIC_INTERFACE(CoeffBasedProduct)
122  typedef typename Base::PlainObject PlainObject;
123 
124  private:
125 
126  typedef typename internal::traits<CoeffBasedProduct>::_LhsNested _LhsNested;
127  typedef typename internal::traits<CoeffBasedProduct>::_RhsNested _RhsNested;
128 
129  enum {
130  PacketSize = internal::packet_traits<Scalar>::size,
131  InnerSize = internal::traits<CoeffBasedProduct>::InnerSize,
132  Unroll = CoeffReadCost != Dynamic && CoeffReadCost <= EIGEN_UNROLLING_LIMIT,
133  CanVectorizeInner = internal::traits<CoeffBasedProduct>::CanVectorizeInner
134  };
135 
136  typedef internal::product_coeff_impl<CanVectorizeInner ? InnerVectorizedTraversal : DefaultTraversal,
137  Unroll ? InnerSize : Dynamic,
138  _LhsNested, _RhsNested, Scalar> ScalarCoeffImpl;
139 
140  typedef CoeffBasedProduct<LhsNested,RhsNested,NestByRefBit> LazyCoeffBasedProductType;
141 
142  public:
143 
144  inline CoeffBasedProduct(const CoeffBasedProduct& other)
145  : Base(), m_lhs(other.m_lhs), m_rhs(other.m_rhs)
146  {}
147 
148  template<typename Lhs, typename Rhs>
149  inline CoeffBasedProduct(const Lhs& lhs, const Rhs& rhs)
150  : m_lhs(lhs), m_rhs(rhs)
151  {
152  // we don't allow taking products of matrices of different real types, as that wouldn't be vectorizable.
153  // We still allow to mix T and complex<T>.
154  EIGEN_STATIC_ASSERT((internal::scalar_product_traits<typename Lhs::RealScalar, typename Rhs::RealScalar>::Defined),
155  YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
156  eigen_assert(lhs.cols() == rhs.rows()
157  && "invalid matrix product"
158  && "if you wanted a coeff-wise or a dot product use the respective explicit functions");
159  }
160 
161  EIGEN_STRONG_INLINE Index rows() const { return m_lhs.rows(); }
162  EIGEN_STRONG_INLINE Index cols() const { return m_rhs.cols(); }
163 
164  EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const
165  {
166  Scalar res;
167  ScalarCoeffImpl::run(row, col, m_lhs, m_rhs, res);
168  return res;
169  }
170 
171  /* Allow index-based non-packet access. It is impossible though to allow index-based packed access,
172  * which is why we don't set the LinearAccessBit.
173  */
174  EIGEN_STRONG_INLINE const Scalar coeff(Index index) const
175  {
176  Scalar res;
177  const Index row = RowsAtCompileTime == 1 ? 0 : index;
178  const Index col = RowsAtCompileTime == 1 ? index : 0;
179  ScalarCoeffImpl::run(row, col, m_lhs, m_rhs, res);
180  return res;
181  }
182 
183  template<int LoadMode>
184  EIGEN_STRONG_INLINE const PacketScalar packet(Index row, Index col) const
185  {
186  PacketScalar res;
187  internal::product_packet_impl<Flags&RowMajorBit ? RowMajor : ColMajor,
188  Unroll ? InnerSize : Dynamic,
189  _LhsNested, _RhsNested, PacketScalar, LoadMode>
190  ::run(row, col, m_lhs, m_rhs, res);
191  return res;
192  }
193 
194  // Implicit conversion to the nested type (trigger the evaluation of the product)
195  EIGEN_STRONG_INLINE operator const PlainObject& () const
196  {
197  m_result.lazyAssign(*this);
198  return m_result;
199  }
200 
201  const _LhsNested& lhs() const { return m_lhs; }
202  const _RhsNested& rhs() const { return m_rhs; }
203 
204  const Diagonal<const LazyCoeffBasedProductType,0> diagonal() const
205  { return reinterpret_cast<const LazyCoeffBasedProductType&>(*this); }
206 
207  template<int DiagonalIndex>
208  const Diagonal<const LazyCoeffBasedProductType,DiagonalIndex> diagonal() const
209  { return reinterpret_cast<const LazyCoeffBasedProductType&>(*this); }
210 
211  const Diagonal<const LazyCoeffBasedProductType,Dynamic> diagonal(Index index) const
212  { return reinterpret_cast<const LazyCoeffBasedProductType&>(*this).diagonal(index); }
213 
214  protected:
215  typename internal::add_const_on_value_type<LhsNested>::type m_lhs;
216  typename internal::add_const_on_value_type<RhsNested>::type m_rhs;
217 
218  mutable PlainObject m_result;
219 };
220 
221 namespace internal {
222 
223 // here we need to overload the nested rule for products
224 // such that the nested type is a const reference to a plain matrix
225 template<typename Lhs, typename Rhs, int N, typename PlainObject>
226 struct nested<CoeffBasedProduct<Lhs,Rhs,EvalBeforeNestingBit|EvalBeforeAssigningBit>, N, PlainObject>
227 {
228  typedef PlainObject const& type;
229 };
230 
231 /***************************************************************************
232 * Normal product .coeff() implementation (with meta-unrolling)
233 ***************************************************************************/
234 
235 /**************************************
236 *** Scalar path - no vectorization ***
237 **************************************/
238 
239 template<int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
240 struct product_coeff_impl<DefaultTraversal, UnrollingIndex, Lhs, Rhs, RetScalar>
241 {
242  typedef typename Lhs::Index Index;
243  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
244  {
245  product_coeff_impl<DefaultTraversal, UnrollingIndex-1, Lhs, Rhs, RetScalar>::run(row, col, lhs, rhs, res);
246  res += lhs.coeff(row, UnrollingIndex-1) * rhs.coeff(UnrollingIndex-1, col);
247  }
248 };
249 
250 template<typename Lhs, typename Rhs, typename RetScalar>
251 struct product_coeff_impl<DefaultTraversal, 1, Lhs, Rhs, RetScalar>
252 {
253  typedef typename Lhs::Index Index;
254  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
255  {
256  res = lhs.coeff(row, 0) * rhs.coeff(0, col);
257  }
258 };
259 
260 template<typename Lhs, typename Rhs, typename RetScalar>
261 struct product_coeff_impl<DefaultTraversal, 0, Lhs, Rhs, RetScalar>
262 {
263  typedef typename Lhs::Index Index;
264  static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, RetScalar &res)
265  {
266  res = RetScalar(0);
267  }
268 };
269 
270 template<typename Lhs, typename Rhs, typename RetScalar>
271 struct product_coeff_impl<DefaultTraversal, Dynamic, Lhs, Rhs, RetScalar>
272 {
273  typedef typename Lhs::Index Index;
274  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar& res)
275  {
276  res = (lhs.row(row).transpose().cwiseProduct( rhs.col(col) )).sum();
277  }
278 };
279 
280 /*******************************************
281 *** Scalar path with inner vectorization ***
282 *******************************************/
283 
284 template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet>
285 struct product_coeff_vectorized_unroller
286 {
287  typedef typename Lhs::Index Index;
288  enum { PacketSize = packet_traits<typename Lhs::Scalar>::size };
289  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres)
290  {
291  product_coeff_vectorized_unroller<UnrollingIndex-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, pres);
292  pres = padd(pres, pmul( lhs.template packet<Aligned>(row, UnrollingIndex) , rhs.template packet<Aligned>(UnrollingIndex, col) ));
293  }
294 };
295 
296 template<typename Lhs, typename Rhs, typename Packet>
297 struct product_coeff_vectorized_unroller<0, Lhs, Rhs, Packet>
298 {
299  typedef typename Lhs::Index Index;
300  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres)
301  {
302  pres = pmul(lhs.template packet<Aligned>(row, 0) , rhs.template packet<Aligned>(0, col));
303  }
304 };
305 
306 template<typename Lhs, typename Rhs, typename RetScalar>
307 struct product_coeff_impl<InnerVectorizedTraversal, 0, Lhs, Rhs, RetScalar>
308 {
309  typedef typename Lhs::Index Index;
310  static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, RetScalar &res)
311  {
312  res = 0;
313  }
314 };
315 
316 template<int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
317 struct product_coeff_impl<InnerVectorizedTraversal, UnrollingIndex, Lhs, Rhs, RetScalar>
318 {
319  typedef typename Lhs::PacketScalar Packet;
320  typedef typename Lhs::Index Index;
321  enum { PacketSize = packet_traits<typename Lhs::Scalar>::size };
322  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
323  {
324  Packet pres;
325  product_coeff_vectorized_unroller<UnrollingIndex-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, pres);
326  res = predux(pres);
327  }
328 };
329 
330 template<typename Lhs, typename Rhs, int LhsRows = Lhs::RowsAtCompileTime, int RhsCols = Rhs::ColsAtCompileTime>
331 struct product_coeff_vectorized_dyn_selector
332 {
333  typedef typename Lhs::Index Index;
334  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
335  {
336  res = lhs.row(row).transpose().cwiseProduct(rhs.col(col)).sum();
337  }
338 };
339 
340 // NOTE the 3 following specializations are because taking .col(0) on a vector is a bit slower
341 // NOTE maybe they are now useless since we have a specialization for Block<Matrix>
342 template<typename Lhs, typename Rhs, int RhsCols>
343 struct product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,RhsCols>
344 {
345  typedef typename Lhs::Index Index;
346  static EIGEN_STRONG_INLINE void run(Index /*row*/, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
347  {
348  res = lhs.transpose().cwiseProduct(rhs.col(col)).sum();
349  }
350 };
351 
352 template<typename Lhs, typename Rhs, int LhsRows>
353 struct product_coeff_vectorized_dyn_selector<Lhs,Rhs,LhsRows,1>
354 {
355  typedef typename Lhs::Index Index;
356  static EIGEN_STRONG_INLINE void run(Index row, Index /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
357  {
358  res = lhs.row(row).transpose().cwiseProduct(rhs).sum();
359  }
360 };
361 
362 template<typename Lhs, typename Rhs>
363 struct product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,1>
364 {
365  typedef typename Lhs::Index Index;
366  static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
367  {
368  res = lhs.transpose().cwiseProduct(rhs).sum();
369  }
370 };
371 
372 template<typename Lhs, typename Rhs, typename RetScalar>
373 struct product_coeff_impl<InnerVectorizedTraversal, Dynamic, Lhs, Rhs, RetScalar>
374 {
375  typedef typename Lhs::Index Index;
376  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
377  {
378  product_coeff_vectorized_dyn_selector<Lhs,Rhs>::run(row, col, lhs, rhs, res);
379  }
380 };
381 
382 /*******************
383 *** Packet path ***
384 *******************/
385 
386 template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
387 struct product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
388 {
389  typedef typename Lhs::Index Index;
390  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
391  {
392  product_packet_impl<RowMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, res);
393  res = pmadd(pset1<Packet>(lhs.coeff(row, UnrollingIndex-1)), rhs.template packet<LoadMode>(UnrollingIndex-1, col), res);
394  }
395 };
396 
397 template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
398 struct product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
399 {
400  typedef typename Lhs::Index Index;
401  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
402  {
403  product_packet_impl<ColMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, res);
404  res = pmadd(lhs.template packet<LoadMode>(row, UnrollingIndex-1), pset1<Packet>(rhs.coeff(UnrollingIndex-1, col)), res);
405  }
406 };
407 
408 template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
409 struct product_packet_impl<RowMajor, 1, Lhs, Rhs, Packet, LoadMode>
410 {
411  typedef typename Lhs::Index Index;
412  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
413  {
414  res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col));
415  }
416 };
417 
418 template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
419 struct product_packet_impl<ColMajor, 1, Lhs, Rhs, Packet, LoadMode>
420 {
421  typedef typename Lhs::Index Index;
422  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
423  {
424  res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col)));
425  }
426 };
427 
428 template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
429 struct product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode>
430 {
431  typedef typename Lhs::Index Index;
432  static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Packet &res)
433  {
434  res = pset1<Packet>(0);
435  }
436 };
437 
438 template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
439 struct product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode>
440 {
441  typedef typename Lhs::Index Index;
442  static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Packet &res)
443  {
444  res = pset1<Packet>(0);
445  }
446 };
447 
448 template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
449 struct product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
450 {
451  typedef typename Lhs::Index Index;
452  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet& res)
453  {
454  res = pset1<Packet>(0);
455  for(Index i = 0; i < lhs.cols(); ++i)
456  res = pmadd(pset1<Packet>(lhs.coeff(row, i)), rhs.template packet<LoadMode>(i, col), res);
457  }
458 };
459 
460 template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
461 struct product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
462 {
463  typedef typename Lhs::Index Index;
464  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet& res)
465  {
466  res = pset1<Packet>(0);
467  for(Index i = 0; i < lhs.cols(); ++i)
468  res = pmadd(lhs.template packet<LoadMode>(row, i), pset1<Packet>(rhs.coeff(i, col)), res);
469  }
470 };
471 
472 } // end namespace internal
473 
474 } // end namespace Eigen
475 
476 #endif // EIGEN_COEFFBASED_PRODUCT_H
const unsigned int EvalBeforeNestingBit
Definition: Constants.h:58
const int Dynamic
Definition: Constants.h:21
Definition: Constants.h:264
const unsigned int PacketAccessBit
Definition: Constants.h:81
const unsigned int ActualPacketAccessBit
Definition: Constants.h:92
const unsigned int EvalBeforeAssigningBit
Definition: Constants.h:63
Definition: Constants.h:266
const unsigned int RowMajorBit
Definition: Constants.h:53
const unsigned int AlignedBit
Definition: Constants.h:147