subroutine chb2st_kernels	(	character	UPLO,
		logical	WANTZ,
		integer	TTYPE,
		integer	ST,
		integer	ED,
		integer	SWEEP,
		integer	N,
		integer	NB,
		integer	IB,
		complex, dimension( lda, * )	A,
		integer	LDA,
		complex, dimension( * )	V,
		complex, dimension( * )	TAU,
		integer	LDVT,
		complex, dimension( * )	WORK
	)

CHB2ST_KERNELS

Download CHB2ST_KERNELS + dependencies [TGZ] [ZIP] [TXT]

Purpose:

 CHB2ST_KERNELS is an internal routine used by the CHETRD_HB2ST
 subroutine.

Parameters

[in]	n	The order of the matrix A.
[in]	nb	The size of the band.
[in,out]	A	A pointer to the matrix A.
[in]	lda	The leading dimension of the matrix A.
[out]	V	COMPLEX array, dimension 2*n if eigenvalues only are requested or to be queried for vectors.
[out]	TAU	COMPLEX array, dimension (2*n). The scalar factors of the Householder reflectors are stored in this array.
[in]	st	internal parameter for indices.
[in]	ed	internal parameter for indices.
[in]	sweep	internal parameter for indices.
[in]	Vblksiz	internal parameter for indices.
[in]	wantz	logical which indicate if Eigenvalue are requested or both Eigenvalue/Eigenvectors.
[in]	work	Workspace of size nb.

Further Details:

  Implemented by Azzam Haidar.

  All details are available on technical report, SC11, SC13 papers.

  Azzam Haidar, Hatem Ltaief, and Jack Dongarra.
  Parallel reduction to condensed forms for symmetric eigenvalue problems
  using aggregated fine-grained and memory-aware kernels. In Proceedings
  of 2011 International Conference for High Performance Computing,
  Networking, Storage and Analysis (SC '11), New York, NY, USA,
  Article 8 , 11 pages.
  http://doi.acm.org/10.1145/2063384.2063394

  A. Haidar, J. Kurzak, P. Luszczek, 2013.
  An improved parallel singular value algorithm and its implementation 
  for multicore hardware, In Proceedings of 2013 International Conference
  for High Performance Computing, Networking, Storage and Analysis (SC '13).
  Denver, Colorado, USA, 2013.
  Article 90, 12 pages.
  http://doi.acm.org/10.1145/2503210.2503292

  A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra.
  A novel hybrid CPU-GPU generalized eigensolver for electronic structure 
  calculations based on fine-grained memory aware tasks.
  International Journal of High Performance Computing Applications.
  Volume 28 Issue 2, Pages 196-209, May 2014.
  http://hpc.sagepub.com/content/28/2/196

Definition at line 128 of file chb2st_kernels.f.

 *
       IMPLICIT NONE
 *
 *  -- LAPACK computational routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 *     December 2016
 *
 *     .. Scalar Arguments ..
       CHARACTER          uplo
       LOGICAL            wantz
       INTEGER            ttype, st, ed, sweep, n, nb, ib, lda, ldvt
 *     ..
 *     .. Array Arguments ..
       COMPLEX            a( lda, * ), v( * ), 
      $                   tau( * ), work( * )
 *     ..
 *
 *  =====================================================================
 *
 *     .. Parameters ..
       COMPLEX            zero, one
       parameter                ( zero = ( 0.0e+0, 0.0e+0 ),
      $                   one = ( 1.0e+0, 0.0e+0 ) )
 *     ..
 *     .. Local Scalars ..
       LOGICAL            upper
       INTEGER            i, j1, j2, lm, ln, vpos, taupos,
      $                   dpos, ofdpos, ajeter 
       COMPLEX            ctmp 
 *     ..
 *     .. External Subroutines ..
       EXTERNAL           clarfg, clarfx, clarfy
 *     ..
 *     .. Intrinsic Functions ..
       INTRINSIC          conjg, mod
 *     .. External Functions ..
       LOGICAL            lsame
       EXTERNAL           lsame
 *     ..
 *     ..
 *     .. Executable Statements ..
 *      
       ajeter = ib + ldvt
       upper = lsame( uplo, 'U' )
 
       IF( upper ) THEN
           dpos    = 2 * nb + 1
           ofdpos  = 2 * nb
       ELSE
           dpos    = 1
           ofdpos  = 2
       ENDIF
 
 *
 *     Upper case
 *
       IF( upper ) THEN
 *
           IF( wantz ) THEN
               vpos   = mod( sweep-1, 2 ) * n + st
               taupos = mod( sweep-1, 2 ) * n + st
           ELSE
               vpos   = mod( sweep-1, 2 ) * n + st
               taupos = mod( sweep-1, 2 ) * n + st
           ENDIF
 *
           IF( ttype.EQ.1 ) THEN
               lm = ed - st + 1
 *
               v( vpos ) = one
               DO 10 i = 1, lm-1
                   v( vpos+i )         = conjg( a( ofdpos-i, st+i ) )
                   a( ofdpos-i, st+i ) = zero  
    10         CONTINUE
               ctmp = conjg( a( ofdpos, st ) )
               CALL clarfg( lm, ctmp, v( vpos+1 ), 1, 
      $                                       tau( taupos ) )
               a( ofdpos, st ) = ctmp
 *
               lm = ed - st + 1
               CALL clarfy( uplo, lm, v( vpos ), 1,
      $                     conjg( tau( taupos ) ),
      $                     a( dpos, st ), lda-1, work)
           ENDIF
 *
           IF( ttype.EQ.3 ) THEN
 *
               lm = ed - st + 1
               CALL clarfy( uplo, lm, v( vpos ), 1,
      $                     conjg( tau( taupos ) ),
      $                     a( dpos, st ), lda-1, work)
           ENDIF
 *
           IF( ttype.EQ.2 ) THEN
               j1 = ed+1
               j2 = min( ed+nb, n )
               ln = ed-st+1
               lm = j2-j1+1
               IF( lm.GT.0) THEN
                   CALL clarfx( 'Left', ln, lm, v( vpos ),
      $                         conjg( tau( taupos ) ),
      $                         a( dpos-nb, j1 ), lda-1, work)
 *
                   IF( wantz ) THEN
                       vpos   = mod( sweep-1, 2 ) * n + j1
                       taupos = mod( sweep-1, 2 ) * n + j1
                   ELSE
                       vpos   = mod( sweep-1, 2 ) * n + j1
                       taupos = mod( sweep-1, 2 ) * n + j1
                   ENDIF
 *
                   v( vpos ) = one
                   DO 30 i = 1, lm-1
                       v( vpos+i )          = 
      $                                    conjg( a( dpos-nb-i, j1+i ) )
                       a( dpos-nb-i, j1+i ) = zero
    30             CONTINUE
                   ctmp = conjg( a( dpos-nb, j1 ) )
                   CALL clarfg( lm, ctmp, v( vpos+1 ), 1, tau( taupos ) )
                   a( dpos-nb, j1 ) = ctmp
 *                 
                   CALL clarfx( 'Right', ln-1, lm, v( vpos ),
      $                         tau( taupos ),
      $                         a( dpos-nb+1, j1 ), lda-1, work)
               ENDIF
           ENDIF
 *
 *     Lower case
 *  
       ELSE
 *      
           IF( wantz ) THEN
               vpos   = mod( sweep-1, 2 ) * n + st
               taupos = mod( sweep-1, 2 ) * n + st
           ELSE
               vpos   = mod( sweep-1, 2 ) * n + st
               taupos = mod( sweep-1, 2 ) * n + st
           ENDIF
 *
           IF( ttype.EQ.1 ) THEN
               lm = ed - st + 1
 *
               v( vpos ) = one
               DO 20 i = 1, lm-1
                   v( vpos+i )         = a( ofdpos+i, st-1 )
                   a( ofdpos+i, st-1 ) = zero  
    20         CONTINUE
               CALL clarfg( lm, a( ofdpos, st-1 ), v( vpos+1 ), 1, 
      $                                       tau( taupos ) )
 *
               lm = ed - st + 1
 *
               CALL clarfy( uplo, lm, v( vpos ), 1,
      $                     conjg( tau( taupos ) ),
      $                     a( dpos, st ), lda-1, work)
 
           ENDIF
 *
           IF( ttype.EQ.3 ) THEN
               lm = ed - st + 1
 *
               CALL clarfy( uplo, lm, v( vpos ), 1,
      $                     conjg( tau( taupos ) ),
      $                     a( dpos, st ), lda-1, work)
 
           ENDIF
 *
           IF( ttype.EQ.2 ) THEN
               j1 = ed+1
               j2 = min( ed+nb, n )
               ln = ed-st+1
               lm = j2-j1+1
 *
               IF( lm.GT.0) THEN
                   CALL clarfx( 'Right', lm, ln, v( vpos ), 
      $                         tau( taupos ), a( dpos+nb, st ),
      $                         lda-1, work)
 *
                   IF( wantz ) THEN
                       vpos   = mod( sweep-1, 2 ) * n + j1
                       taupos = mod( sweep-1, 2 ) * n + j1
                   ELSE
                       vpos   = mod( sweep-1, 2 ) * n + j1
                       taupos = mod( sweep-1, 2 ) * n + j1
                   ENDIF
 *
                   v( vpos ) = one
                   DO 40 i = 1, lm-1
                       v( vpos+i )        = a( dpos+nb+i, st )
                       a( dpos+nb+i, st ) = zero
    40             CONTINUE
                   CALL clarfg( lm, a( dpos+nb, st ), v( vpos+1 ), 1, 
      $                                        tau( taupos ) )
 *
                   CALL clarfx( 'Left', lm, ln-1, v( vpos ), 
      $                         conjg( tau( taupos ) ),
      $                         a( dpos+nb-1, st+1 ), lda-1, work)
              
               ENDIF
           ENDIF
       ENDIF
 *
       RETURN
 *
 *     END OF CHB2ST_KERNELS
 *

Here is the call graph for this function: