10 #ifndef EIGEN_SELFADJOINT_MATRIX_MATRIX_H
11 #define EIGEN_SELFADJOINT_MATRIX_MATRIX_H
18 template<
typename Scalar,
typename Index,
int Pack1,
int Pack2,
int StorageOrder>
21 template<
int BlockRows>
inline
22 void pack(Scalar* blockA,
const const_blas_data_mapper<Scalar,Index,StorageOrder>& lhs, Index cols, Index i, Index& count)
25 for(Index k=0; k<i; k++)
26 for(Index w=0; w<BlockRows; w++)
27 blockA[count++] = lhs(i+w,k);
30 for(Index k=i; k<i+BlockRows; k++)
32 for(Index w=0; w<h; w++)
33 blockA[count++] = numext::conj(lhs(k, i+w));
35 blockA[count++] = numext::real(lhs(k,k));
37 for(Index w=h+1; w<BlockRows; w++)
38 blockA[count++] = lhs(i+w, k);
42 for(Index k=i+BlockRows; k<cols; k++)
43 for(Index w=0; w<BlockRows; w++)
44 blockA[count++] = numext::conj(lhs(k, i+w));
46 void operator()(Scalar* blockA,
const Scalar* _lhs, Index lhsStride, Index cols, Index rows)
48 const_blas_data_mapper<Scalar,Index,StorageOrder> lhs(_lhs,lhsStride);
50 Index peeled_mc = (rows/Pack1)*Pack1;
51 for(Index i=0; i<peeled_mc; i+=Pack1)
53 pack<Pack1>(blockA, lhs, cols, i, count);
56 if(rows-peeled_mc>=Pack2)
58 pack<Pack2>(blockA, lhs, cols, peeled_mc, count);
63 for(Index i=peeled_mc; i<rows; i++)
65 for(Index k=0; k<i; k++)
66 blockA[count++] = lhs(i, k);
68 blockA[count++] = numext::real(lhs(i, i));
70 for(Index k=i+1; k<cols; k++)
71 blockA[count++] = numext::conj(lhs(k, i));
76 template<
typename Scalar,
typename Index,
int nr,
int StorageOrder>
79 enum { PacketSize = packet_traits<Scalar>::size };
80 void operator()(Scalar* blockB,
const Scalar* _rhs, Index rhsStride, Index rows, Index cols, Index k2)
82 Index end_k = k2 + rows;
84 const_blas_data_mapper<Scalar,Index,StorageOrder> rhs(_rhs,rhsStride);
85 Index packet_cols = (cols/nr)*nr;
88 for(Index j2=0; j2<k2; j2+=nr)
90 for(Index k=k2; k<end_k; k++)
92 blockB[count+0] = rhs(k,j2+0);
93 blockB[count+1] = rhs(k,j2+1);
96 blockB[count+2] = rhs(k,j2+2);
97 blockB[count+3] = rhs(k,j2+3);
104 for(Index j2=k2; j2<(std::min)(k2+rows,packet_cols); j2+=nr)
108 for(Index k=k2; k<j2; k++)
110 blockB[count+0] = numext::conj(rhs(j2+0,k));
111 blockB[count+1] = numext::conj(rhs(j2+1,k));
114 blockB[count+2] = numext::conj(rhs(j2+2,k));
115 blockB[count+3] = numext::conj(rhs(j2+3,k));
121 for(Index k=j2; k<j2+nr; k++)
124 for (Index w=0 ; w<h; ++w)
125 blockB[count+w] = rhs(k,j2+w);
127 blockB[count+h] = numext::real(rhs(k,k));
130 for (Index w=h+1 ; w<nr; ++w)
131 blockB[count+w] = numext::conj(rhs(j2+w,k));
136 for(Index k=j2+nr; k<end_k; k++)
138 blockB[count+0] = rhs(k,j2+0);
139 blockB[count+1] = rhs(k,j2+1);
142 blockB[count+2] = rhs(k,j2+2);
143 blockB[count+3] = rhs(k,j2+3);
150 for(Index j2=k2+rows; j2<packet_cols; j2+=nr)
152 for(Index k=k2; k<end_k; k++)
154 blockB[count+0] = numext::conj(rhs(j2+0,k));
155 blockB[count+1] = numext::conj(rhs(j2+1,k));
158 blockB[count+2] = numext::conj(rhs(j2+2,k));
159 blockB[count+3] = numext::conj(rhs(j2+3,k));
166 for(Index j2=packet_cols; j2<cols; ++j2)
169 Index half = (std::min)(end_k,j2);
170 for(Index k=k2; k<half; k++)
172 blockB[count] = numext::conj(rhs(j2,k));
176 if(half==j2 && half<k2+rows)
178 blockB[count] = numext::real(rhs(j2,j2));
185 for(Index k=half+1; k<k2+rows; k++)
187 blockB[count] = rhs(k,j2);
197 template <
typename Scalar,
typename Index,
198 int LhsStorageOrder,
bool LhsSelfAdjoint,
bool ConjugateLhs,
199 int RhsStorageOrder,
bool RhsSelfAdjoint,
bool ConjugateRhs,
201 struct product_selfadjoint_matrix;
203 template <
typename Scalar,
typename Index,
204 int LhsStorageOrder,
bool LhsSelfAdjoint,
bool ConjugateLhs,
205 int RhsStorageOrder,
bool RhsSelfAdjoint,
bool ConjugateRhs>
206 struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,LhsSelfAdjoint,ConjugateLhs, RhsStorageOrder,RhsSelfAdjoint,ConjugateRhs,
RowMajor>
209 static EIGEN_STRONG_INLINE
void run(
210 Index rows, Index cols,
211 const Scalar* lhs, Index lhsStride,
212 const Scalar* rhs, Index rhsStride,
213 Scalar* res, Index resStride,
216 product_selfadjoint_matrix<Scalar, Index,
218 RhsSelfAdjoint, NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(RhsSelfAdjoint,ConjugateRhs),
220 LhsSelfAdjoint, NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(LhsSelfAdjoint,ConjugateLhs),
222 ::run(cols, rows, rhs, rhsStride, lhs, lhsStride, res, resStride, alpha);
226 template <
typename Scalar,
typename Index,
227 int LhsStorageOrder,
bool ConjugateLhs,
228 int RhsStorageOrder,
bool ConjugateRhs>
229 struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs, RhsStorageOrder,false,ConjugateRhs,
ColMajor>
232 static EIGEN_DONT_INLINE
void run(
233 Index rows, Index cols,
234 const Scalar* _lhs, Index lhsStride,
235 const Scalar* _rhs, Index rhsStride,
236 Scalar* res, Index resStride,
237 const Scalar& alpha);
240 template <
typename Scalar,
typename Index,
241 int LhsStorageOrder,
bool ConjugateLhs,
242 int RhsStorageOrder,
bool ConjugateRhs>
243 EIGEN_DONT_INLINE
void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs, RhsStorageOrder,false,ConjugateRhs,ColMajor>::run(
244 Index rows, Index cols,
245 const Scalar* _lhs, Index lhsStride,
246 const Scalar* _rhs, Index rhsStride,
247 Scalar* res, Index resStride,
252 const_blas_data_mapper<Scalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride);
253 const_blas_data_mapper<Scalar, Index, RhsStorageOrder> rhs(_rhs,rhsStride);
255 typedef gebp_traits<Scalar,Scalar> Traits;
260 computeProductBlockingSizes<Scalar,Scalar>(kc, mc, nc);
262 kc = (std::min)(kc,mc);
264 std::size_t sizeW = kc*Traits::WorkSpaceFactor;
265 std::size_t sizeB = sizeW + kc*cols;
266 ei_declare_aligned_stack_constructed_variable(Scalar, blockA, kc*mc, 0);
267 ei_declare_aligned_stack_constructed_variable(Scalar, allocatedBlockB, sizeB, 0);
268 Scalar* blockB = allocatedBlockB + sizeW;
270 gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
271 symm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
272 gemm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder> pack_rhs;
273 gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder==RowMajor?ColMajor:RowMajor, true> pack_lhs_transposed;
275 for(Index k2=0; k2<size; k2+=kc)
277 const Index actual_kc = (std::min)(k2+kc,size)-k2;
282 pack_rhs(blockB, &rhs(k2,0), rhsStride, actual_kc, cols);
288 for(Index i2=0; i2<k2; i2+=mc)
290 const Index actual_mc = (std::min)(i2+mc,k2)-i2;
292 pack_lhs_transposed(blockA, &lhs(k2, i2), lhsStride, actual_kc, actual_mc);
294 gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha);
298 const Index actual_mc = (std::min)(k2+kc,size)-k2;
300 pack_lhs(blockA, &lhs(k2,k2), lhsStride, actual_kc, actual_mc);
302 gebp_kernel(res+k2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha);
305 for(Index i2=k2+kc; i2<size; i2+=mc)
307 const Index actual_mc = (std::min)(i2+mc,size)-i2;
308 gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder,false>()
309 (blockA, &lhs(i2, k2), lhsStride, actual_kc, actual_mc);
311 gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha);
317 template <
typename Scalar,
typename Index,
318 int LhsStorageOrder,
bool ConjugateLhs,
319 int RhsStorageOrder,
bool ConjugateRhs>
320 struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLhs, RhsStorageOrder,true,ConjugateRhs,
ColMajor>
323 static EIGEN_DONT_INLINE
void run(
324 Index rows, Index cols,
325 const Scalar* _lhs, Index lhsStride,
326 const Scalar* _rhs, Index rhsStride,
327 Scalar* res, Index resStride,
328 const Scalar& alpha);
331 template <
typename Scalar,
typename Index,
332 int LhsStorageOrder,
bool ConjugateLhs,
333 int RhsStorageOrder,
bool ConjugateRhs>
334 EIGEN_DONT_INLINE
void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLhs, RhsStorageOrder,true,ConjugateRhs,ColMajor>::run(
335 Index rows, Index cols,
336 const Scalar* _lhs, Index lhsStride,
337 const Scalar* _rhs, Index rhsStride,
338 Scalar* res, Index resStride,
343 const_blas_data_mapper<Scalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride);
345 typedef gebp_traits<Scalar,Scalar> Traits;
350 computeProductBlockingSizes<Scalar,Scalar>(kc, mc, nc);
351 std::size_t sizeW = kc*Traits::WorkSpaceFactor;
352 std::size_t sizeB = sizeW + kc*cols;
353 ei_declare_aligned_stack_constructed_variable(Scalar, blockA, kc*mc, 0);
354 ei_declare_aligned_stack_constructed_variable(Scalar, allocatedBlockB, sizeB, 0);
355 Scalar* blockB = allocatedBlockB + sizeW;
357 gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
358 gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
359 symm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder> pack_rhs;
361 for(Index k2=0; k2<size; k2+=kc)
363 const Index actual_kc = (std::min)(k2+kc,size)-k2;
365 pack_rhs(blockB, _rhs, rhsStride, actual_kc, cols, k2);
368 for(Index i2=0; i2<rows; i2+=mc)
370 const Index actual_mc = (std::min)(i2+mc,rows)-i2;
371 pack_lhs(blockA, &lhs(i2, k2), lhsStride, actual_kc, actual_mc);
373 gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha);
385 template<
typename Lhs,
int LhsMode,
typename Rhs,
int RhsMode>
386 struct traits<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false> >
387 : traits<ProductBase<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false>, Lhs, Rhs> >
391 template<
typename Lhs,
int LhsMode,
typename Rhs,
int RhsMode>
392 struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false>
393 :
public ProductBase<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false>, Lhs, Rhs >
395 EIGEN_PRODUCT_PUBLIC_INTERFACE(SelfadjointProductMatrix)
397 SelfadjointProductMatrix(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {}
403 RhsIsSelfAdjoint = (RhsMode&
SelfAdjoint)==SelfAdjoint
406 template<
typename Dest>
void scaleAndAddTo(Dest& dst,
const Scalar& alpha)
const
408 eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols());
410 typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(m_lhs);
411 typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(m_rhs);
413 Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs)
414 * RhsBlasTraits::extractScalarFactor(m_rhs);
416 internal::product_selfadjoint_matrix<Scalar, Index,
417 EIGEN_LOGICAL_XOR(LhsIsUpper,
419 NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(LhsIsUpper,
bool(LhsBlasTraits::NeedToConjugate)),
420 EIGEN_LOGICAL_XOR(RhsIsUpper,
422 NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(RhsIsUpper,
bool(RhsBlasTraits::NeedToConjugate)),
425 lhs.rows(), rhs.cols(),
426 &lhs.coeffRef(0,0), lhs.outerStride(),
427 &rhs.coeffRef(0,0), rhs.outerStride(),
428 &dst.coeffRef(0,0), dst.outerStride(),
436 #endif // EIGEN_SELFADJOINT_MATRIX_MATRIX_H
Definition: Constants.h:167
Definition: Constants.h:264
Definition: Constants.h:169
Definition: Constants.h:183
Definition: Constants.h:266
const unsigned int RowMajorBit
Definition: Constants.h:53