33 #ifndef EIGEN_ASSIGN_VML_H
34 #define EIGEN_ASSIGN_VML_H
40 template<
typename Op>
struct vml_call
41 {
enum { IsSupported = 0 }; };
43 template<
typename Dst,
typename Src,
typename UnaryOp>
44 class vml_assign_traits
51 StorageOrdersAgree = (int(Dst::IsRowMajor) == int(Src::IsRowMajor)),
52 InnerSize =
int(Dst::IsVectorAtCompileTime) ? int(Dst::SizeAtCompileTime)
53 : int(Dst::Flags)&
RowMajorBit ? int(Dst::ColsAtCompileTime)
54 : int(Dst::RowsAtCompileTime),
55 InnerMaxSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::MaxSizeAtCompileTime)
56 : int(Dst::Flags)&
RowMajorBit ? int(Dst::MaxColsAtCompileTime)
57 : int(Dst::MaxRowsAtCompileTime),
58 MaxSizeAtCompileTime = Dst::SizeAtCompileTime,
60 MightEnableVml = vml_call<UnaryOp>::IsSupported && StorageOrdersAgree && DstHasDirectAccess && SrcHasDirectAccess
61 && Src::InnerStrideAtCompileTime==1 && Dst::InnerStrideAtCompileTime==1,
62 MightLinearize = MightEnableVml && (int(Dst::Flags) & int(Src::Flags) &
LinearAccessBit),
63 VmlSize = MightLinearize ? MaxSizeAtCompileTime : InnerMaxSize,
64 LargeEnough = VmlSize==
Dynamic || VmlSize>=EIGEN_MKL_VML_THRESHOLD,
65 MayEnableVml = MightEnableVml && LargeEnough,
66 MayLinearize = MayEnableVml && MightLinearize
70 Traversal = MayLinearize ? LinearVectorizedTraversal
71 : MayEnableVml ? InnerVectorizedTraversal
76 template<
typename Derived1,
typename Derived2,
typename UnaryOp,
int Traversal,
int Unrolling,
77 int VmlTraversal = vml_assign_traits<Derived1, Derived2, UnaryOp>::Traversal >
78 struct vml_assign_impl
79 : assign_impl<Derived1, Eigen::CwiseUnaryOp<UnaryOp, Derived2>,Traversal,Unrolling,BuiltIn>
83 template<
typename Derived1,
typename Derived2,
typename UnaryOp,
int Traversal,
int Unrolling>
84 struct vml_assign_impl<Derived1, Derived2, UnaryOp, Traversal, Unrolling, InnerVectorizedTraversal>
86 typedef typename Derived1::Scalar Scalar;
87 typedef typename Derived1::Index Index;
88 static inline void run(Derived1& dst,
const CwiseUnaryOp<UnaryOp, Derived2>& src)
92 const Index innerSize = dst.innerSize();
93 const Index outerSize = dst.outerSize();
94 for(Index outer = 0; outer < outerSize; ++outer) {
95 const Scalar *src_ptr = src.IsRowMajor ? &(src.nestedExpression().coeffRef(outer,0)) :
96 &(src.nestedExpression().coeffRef(0, outer));
97 Scalar *dst_ptr = dst.IsRowMajor ? &(dst.coeffRef(outer,0)) : &(dst.coeffRef(0, outer));
98 vml_call<UnaryOp>::run(src.functor(), innerSize, src_ptr, dst_ptr );
103 template<
typename Derived1,
typename Derived2,
typename UnaryOp,
int Traversal,
int Unrolling>
104 struct vml_assign_impl<Derived1, Derived2, UnaryOp, Traversal, Unrolling, LinearVectorizedTraversal>
106 static inline void run(Derived1& dst,
const CwiseUnaryOp<UnaryOp, Derived2>& src)
110 vml_call<UnaryOp>::run(src.functor(), dst.size(), src.nestedExpression().data(), dst.data() );
116 #define EIGEN_MKL_VML_SPECIALIZE_ASSIGN(TRAVERSAL,UNROLLING) \
117 template<typename Derived1, typename Derived2, typename UnaryOp> \
118 struct assign_impl<Derived1, Eigen::CwiseUnaryOp<UnaryOp, Derived2>, TRAVERSAL, UNROLLING, Specialized> { \
119 static inline void run(Derived1 &dst, const Eigen::CwiseUnaryOp<UnaryOp, Derived2> &src) { \
120 vml_assign_impl<Derived1,Derived2,UnaryOp,TRAVERSAL,UNROLLING>::run(dst, src); \
124 EIGEN_MKL_VML_SPECIALIZE_ASSIGN(DefaultTraversal,NoUnrolling)
125 EIGEN_MKL_VML_SPECIALIZE_ASSIGN(DefaultTraversal,CompleteUnrolling)
126 EIGEN_MKL_VML_SPECIALIZE_ASSIGN(DefaultTraversal,InnerUnrolling)
127 EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearTraversal,NoUnrolling)
128 EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearTraversal,CompleteUnrolling)
129 EIGEN_MKL_VML_SPECIALIZE_ASSIGN(InnerVectorizedTraversal,NoUnrolling)
130 EIGEN_MKL_VML_SPECIALIZE_ASSIGN(InnerVectorizedTraversal,CompleteUnrolling)
131 EIGEN_MKL_VML_SPECIALIZE_ASSIGN(InnerVectorizedTraversal,InnerUnrolling)
132 EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearVectorizedTraversal,CompleteUnrolling)
133 EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearVectorizedTraversal,NoUnrolling)
134 EIGEN_MKL_VML_SPECIALIZE_ASSIGN(SliceVectorizedTraversal,NoUnrolling)
137 #if !defined (EIGEN_FAST_MATH) || (EIGEN_FAST_MATH != 1)
138 #define EIGEN_MKL_VML_MODE VML_HA
140 #define EIGEN_MKL_VML_MODE VML_LA
143 #define EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE) \
144 template<> struct vml_call< scalar_##EIGENOP##_op<EIGENTYPE> > { \
145 enum { IsSupported = 1 }; \
146 static inline void run( const scalar_##EIGENOP##_op<EIGENTYPE>& , \
147 int size, const EIGENTYPE* src, EIGENTYPE* dst) { \
148 VMLOP(size, (const VMLTYPE*)src, (VMLTYPE*)dst); \
152 #define EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE) \
153 template<> struct vml_call< scalar_##EIGENOP##_op<EIGENTYPE> > { \
154 enum { IsSupported = 1 }; \
155 static inline void run( const scalar_##EIGENOP##_op<EIGENTYPE>& , \
156 int size, const EIGENTYPE* src, EIGENTYPE* dst) { \
157 MKL_INT64 vmlMode = EIGEN_MKL_VML_MODE; \
158 VMLOP(size, (const VMLTYPE*)src, (VMLTYPE*)dst, vmlMode); \
162 #define EIGEN_MKL_VML_DECLARE_POW_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE) \
163 template<> struct vml_call< scalar_##EIGENOP##_op<EIGENTYPE> > { \
164 enum { IsSupported = 1 }; \
165 static inline void run( const scalar_##EIGENOP##_op<EIGENTYPE>& func, \
166 int size, const EIGENTYPE* src, EIGENTYPE* dst) { \
167 EIGENTYPE exponent = func.m_exponent; \
168 MKL_INT64 vmlMode = EIGEN_MKL_VML_MODE; \
169 VMLOP(&size, (const VMLTYPE*)src, (const VMLTYPE*)&exponent, \
170 (VMLTYPE*)dst, &vmlMode); \
174 #define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP) \
175 EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vs##VMLOP, float, float) \
176 EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vd##VMLOP, double, double)
178 #define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX(EIGENOP, VMLOP) \
179 EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vc##VMLOP, scomplex, MKL_Complex8) \
180 EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vz##VMLOP, dcomplex, MKL_Complex16)
182 #define EIGEN_MKL_VML_DECLARE_UNARY_CALLS(EIGENOP, VMLOP) \
183 EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP) \
184 EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX(EIGENOP, VMLOP)
187 #define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL_LA(EIGENOP, VMLOP) \
188 EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vms##VMLOP, float, float) \
189 EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vmd##VMLOP, double, double)
191 #define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX_LA(EIGENOP, VMLOP) \
192 EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vmc##VMLOP, scomplex, MKL_Complex8) \
193 EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vmz##VMLOP, dcomplex, MKL_Complex16)
195 #define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(EIGENOP, VMLOP) \
196 EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL_LA(EIGENOP, VMLOP) \
197 EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX_LA(EIGENOP, VMLOP)
200 EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(sin, Sin)
201 EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(asin, Asin)
202 EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(cos, Cos)
203 EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(acos, Acos)
204 EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(tan, Tan)
206 EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(exp, Exp)
207 EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(log, Ln)
208 EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(sqrt, Sqrt)
210 EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(square, Sqr)
214 EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmspowx_,
float,
float)
215 EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmdpowx_,
double,
double)
216 EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmcpowx_, scomplex, MKL_Complex8)
217 EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmzpowx_, dcomplex, MKL_Complex16)
224 #endif // EIGEN_ASSIGN_VML_H
const int Dynamic
Definition: Constants.h:21
const unsigned int LinearAccessBit
Definition: Constants.h:117
const unsigned int RowMajorBit
Definition: Constants.h:53
const unsigned int DirectAccessBit
Definition: Constants.h:142