d6/da2/zhetrd__he2hb_8f_source.html

 *> \brief \b ZHETRD_HE2HB

 *

 *  @precisions fortran z -> s d c

 *

 *  =========== DOCUMENTATION ===========

 *

 * Online html documentation available at

 *            http://www.netlib.org/lapack/explore-html/

 *

 *> \htmlonly

 *> Download ZHETRD_HE2HB + dependencies

 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zhetrd.f">

 *> [TGZ]</a>

 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zhetrd.f">

 *> [ZIP]</a>

 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zhetrd.f">

 *> [TXT]</a>

 *> \endhtmlonly

 *

 *  Definition:

 *  ===========

 *

 *       SUBROUTINE ZHETRD_HE2HB( UPLO, N, KD, A, LDA, AB, LDAB, TAU,

 *                              WORK, LWORK, INFO )

 *

 *       IMPLICIT NONE

 *

 *       .. Scalar Arguments ..

 *       CHARACTER          UPLO

 *       INTEGER            INFO, LDA, LDAB, LWORK, N, KD

 *       ..

 *       .. Array Arguments ..

 *       COMPLEX*16         A( LDA, * ), AB( LDAB, * ),

 *                          TAU( * ), WORK( * )

 *       ..

 *

 *

 *> \par Purpose:

 *  =============

 *>

 *> \verbatim

 *>

 *> ZHETRD_HE2HB reduces a complex Hermitian matrix A to complex Hermitian

 *> band-diagonal form AB by a unitary similarity transformation:

 *> Q**H * A * Q = AB.

 *> \endverbatim

 *

 *  Arguments:

 *  ==========

 *

 *> \param[in] UPLO

 *> \verbatim

 *>          UPLO is CHARACTER*1

 *>          = 'U':  Upper triangle of A is stored;

 *>          = 'L':  Lower triangle of A is stored.

 *> \endverbatim

 *>

 *> \param[in] N

 *> \verbatim

 *>          N is INTEGER

 *>          The order of the matrix A.  N >= 0.

 *> \endverbatim

 *>

 *> \param[in] KD

 *> \verbatim

 *>          KD is INTEGER

 *>          The number of superdiagonals of the reduced matrix if UPLO = 'U',

 *>          or the number of subdiagonals if UPLO = 'L'.  KD >= 0.

 *>          The reduced matrix is stored in the array AB.

 *> \endverbatim

 *>

 *> \param[in,out] A

 *> \verbatim

 *>          A is COMPLEX*16 array, dimension (LDA,N)

 *>          On entry, the Hermitian matrix A.  If UPLO = 'U', the leading

 *>          N-by-N upper triangular part of A contains the upper

 *>          triangular part of the matrix A, and the strictly lower

 *>          triangular part of A is not referenced.  If UPLO = 'L', the

 *>          leading N-by-N lower triangular part of A contains the lower

 *>          triangular part of the matrix A, and the strictly upper

 *>          triangular part of A is not referenced.

 *>          On exit, if UPLO = 'U', the diagonal and first superdiagonal

 *>          of A are overwritten by the corresponding elements of the

 *>          tridiagonal matrix T, and the elements above the first

 *>          superdiagonal, with the array TAU, represent the unitary

 *>          matrix Q as a product of elementary reflectors; if UPLO

 *>          = 'L', the diagonal and first subdiagonal of A are over-

 *>          written by the corresponding elements of the tridiagonal

 *>          matrix T, and the elements below the first subdiagonal, with

 *>          the array TAU, represent the unitary matrix Q as a product

 *>          of elementary reflectors. See Further Details.

 *> \endverbatim

 *>

 *> \param[in] LDA

 *> \verbatim

 *>          LDA is INTEGER

 *>          The leading dimension of the array A.  LDA >= max(1,N).

 *> \endverbatim

 *>

 *> \param[out] AB

 *> \verbatim

 *>          AB is COMPLEX*16 array, dimension (LDAB,N)

 *>          On exit, the upper or lower triangle of the Hermitian band

 *>          matrix A, stored in the first KD+1 rows of the array.  The

 *>          j-th column of A is stored in the j-th column of the array AB

 *>          as follows:

 *>          if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j;

 *>          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=min(n,j+kd).

 *> \endverbatim

 *>

 *> \param[in] LDAB

 *> \verbatim

 *>          LDAB is INTEGER

 *>          The leading dimension of the array AB.  LDAB >= KD+1.

 *> \endverbatim

 *>

 *> \param[out] TAU

 *> \verbatim

 *>          TAU is COMPLEX*16 array, dimension (N-KD)

 *>          The scalar factors of the elementary reflectors (see Further

 *>          Details).

 *> \endverbatim

 *>

 *> \param[out] WORK

 *> \verbatim

 *>          WORK is COMPLEX*16 array, dimension LWORK.

 *>          On exit, if INFO = 0, or if LWORK=-1,

 *>          WORK(1) returns the size of LWORK.

 *> \endverbatim

 *>

 *> \param[in] LWORK

 *> \verbatim

 *>          LWORK is INTEGER

 *>          The dimension of the array WORK which should be calculated

 *           by a workspace query. LWORK = MAX(1, LWORK_QUERY)

 *>          If LWORK = -1, then a workspace query is assumed; the routine

 *>          only calculates the optimal size of the WORK array, returns

 *>          this value as the first entry of the WORK array, and no error

 *>          message related to LWORK is issued by XERBLA.

 *>          LWORK_QUERY = N*KD + N*max(KD,FACTOPTNB) + 2*KD*KD

 *>          where FACTOPTNB is the blocking used by the QR or LQ

 *>          algorithm, usually FACTOPTNB=128 is a good choice otherwise

 *>          putting LWORK=-1 will provide the size of WORK.

 *> \endverbatim

 *>

 *> \param[out] INFO

 *> \verbatim

 *>          INFO is INTEGER

 *>          = 0:  successful exit

 *>          < 0:  if INFO = -i, the i-th argument had an illegal value

 *> \endverbatim

 *

 *  Authors:

 *  ========

 *

 *> \author Univ. of Tennessee

 *> \author Univ. of California Berkeley

 *> \author Univ. of Colorado Denver

 *> \author NAG Ltd.

 *

 *> \date December 2016

 *

 *> \ingroup complex16HEcomputational

 *

 *> \par Further Details:

 *  =====================

 *>

 *> \verbatim

 *>

 *>  Implemented by Azzam Haidar.

 *>

 *>  All details are available on technical report, SC11, SC13 papers.

 *>

 *>  Azzam Haidar, Hatem Ltaief, and Jack Dongarra.

 *>  Parallel reduction to condensed forms for symmetric eigenvalue problems

 *>  using aggregated fine-grained and memory-aware kernels. In Proceedings

 *>  of 2011 International Conference for High Performance Computing,

 *>  Networking, Storage and Analysis (SC '11), New York, NY, USA,

 *>  Article 8 , 11 pages.

 *>  http://doi.acm.org/10.1145/2063384.2063394

 *>

 *>  A. Haidar, J. Kurzak, P. Luszczek, 2013.

 *>  An improved parallel singular value algorithm and its implementation

 *>  for multicore hardware, In Proceedings of 2013 International Conference

 *>  for High Performance Computing, Networking, Storage and Analysis (SC '13).

 *>  Denver, Colorado, USA, 2013.

 *>  Article 90, 12 pages.

 *>  http://doi.acm.org/10.1145/2503210.2503292

 *>

 *>  A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra.

 *>  A novel hybrid CPU-GPU generalized eigensolver for electronic structure

 *>  calculations based on fine-grained memory aware tasks.

 *>  International Journal of High Performance Computing Applications.

 *>  Volume 28 Issue 2, Pages 196-209, May 2014.

 *>  http://hpc.sagepub.com/content/28/2/196

 *>

 *> \endverbatim

 *>

 *> \verbatim

 *>

 *>  If UPLO = 'U', the matrix Q is represented as a product of elementary

 *>  reflectors

 *>

 *>     Q = H(k)**H . . . H(2)**H H(1)**H, where k = n-kd.

 *>

 *>  Each H(i) has the form

 *>

 *>     H(i) = I - tau * v * v**H

 *>

 *>  where tau is a complex scalar, and v is a complex vector with

 *>  v(1:i+kd-1) = 0 and v(i+kd) = 1; conjg(v(i+kd+1:n)) is stored on exit in

 *>  A(i,i+kd+1:n), and tau in TAU(i).

 *>

 *>  If UPLO = 'L', the matrix Q is represented as a product of elementary

 *>  reflectors

 *>

 *>     Q = H(1) H(2) . . . H(k), where k = n-kd.

 *>

 *>  Each H(i) has the form

 *>

 *>     H(i) = I - tau * v * v**H

 *>

 *>  where tau is a complex scalar, and v is a complex vector with

 *>  v(kd+1:i) = 0 and v(i+kd+1) = 1; v(i+kd+2:n) is stored on exit in

 *   A(i+kd+2:n,i), and tau in TAU(i).

 *>

 *>  The contents of A on exit are illustrated by the following examples

 *>  with n = 5:

 *>

 *>  if UPLO = 'U':                       if UPLO = 'L':

 *>

 *>    (  ab  ab/v1  v1      v1     v1    )              (  ab                            )

 *>    (      ab     ab/v2   v2     v2    )              (  ab/v1  ab                     )

 *>    (             ab      ab/v3  v3    )              (  v1     ab/v2  ab              )

 *>    (                     ab     ab/v4 )              (  v1     v2     ab/v3  ab       )

 *>    (                            ab    )              (  v1     v2     v3     ab/v4 ab )

 *>

 *>  where d and e denote diagonal and off-diagonal elements of T, and vi

 *>  denotes an element of the vector defining H(i).

 *> \endverbatim

 *>

 *  =====================================================================

       SUBROUTINE zhetrd_he2hb( UPLO, N, KD, A, LDA, AB, LDAB, TAU,

      $                         work, lwork, info )

 *

       IMPLICIT NONE

 *

 *  -- LAPACK computational routine (version 3.7.0) --

 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --

 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--

 *     December 2016

 *

 *     .. Scalar Arguments ..

       CHARACTER          UPLO

       INTEGER            INFO, LDA, LDAB, LWORK, N, KD

 *     ..

 *     .. Array Arguments ..

       COMPLEX*16         A( lda, * ), AB( ldab, * ),

      $                   tau( * ), work( * )

 *     ..

 *

 *  =====================================================================

 *

 *     .. Parameters ..

       DOUBLE PRECISION   RONE

       COMPLEX*16         ZERO, ONE, HALF

       parameter                ( rone = 1.0d+0,

      $                   zero = ( 0.0d+0, 0.0d+0 ),

      $                   one = ( 1.0d+0, 0.0d+0 ),

      $                   half = ( 0.5d+0, 0.0d+0 ) )

 *     ..

 *     .. Local Scalars ..

       LOGICAL            LQUERY, UPPER

       INTEGER            I, J, IINFO, LWMIN, PN, PK, LK,

      $                   ldt, ldw, lds2, lds1,

      $                   ls2, ls1, lw, lt,

      $                   tpos, wpos, s2pos, s1pos

 *     ..

 *     .. External Subroutines ..

       EXTERNAL           xerbla, zher2k, zhemm, zgemm,

      $                   zlarft, zgelqf, zgeqrf, zlaset

 *     ..

 *     .. Intrinsic Functions ..

       INTRINSIC          min, max

 *     ..

 *     .. External Functions ..

       LOGICAL            LSAME

       INTEGER            ILAENV

       EXTERNAL           lsame, ilaenv

 *     ..

 *     .. Executable Statements ..

 *

 *     Determine the minimal workspace size required

 *     and test the input parameters

 *

       info   = 0

       upper  = lsame( uplo, 'U' )

       lquery = ( lwork.EQ.-1 )

       lwmin  = ilaenv( 20, 'ZHETRD_HE2HB', '', n, kd, -1, -1 )


       IF( .NOT.upper .AND. .NOT.lsame( uplo, 'L' ) ) THEN

          info = -1

       ELSE IF( n.LT.0 ) THEN

          info = -2

       ELSE IF( kd.LT.0 ) THEN

          info = -3

       ELSE IF( lda.LT.max( 1, n ) ) THEN

          info = -5

       ELSE IF( ldab.LT.max( 1, kd+1 ) ) THEN

          info = -7

       ELSE IF( lwork.LT.lwmin .AND. .NOT.lquery ) THEN

          info = -10

       END IF

 *

       IF( info.NE.0 ) THEN

          CALL xerbla( 'ZHETRD_HE2HB', -info )

          RETURN

       ELSE IF( lquery ) THEN

          work( 1 ) = lwmin

          RETURN

       END IF

 *

 *     Quick return if possible

 *     Copy the upper/lower portion of A into AB

 *

       IF( n.LE.kd+1 ) THEN

           IF( upper ) THEN

               DO 100 i = 1, n

                   lk = min( kd+1, i )

                   CALL zcopy( lk, a( i-lk+1, i ), 1,

      $                            ab( kd+1-lk+1, i ), 1 )

   100         CONTINUE

           ELSE

               DO 110 i = 1, n

                   lk = min( kd+1, n-i+1 )

                   CALL zcopy( lk, a( i, i ), 1, ab( 1, i ), 1 )

   110         CONTINUE

           ENDIF

           work( 1 ) = 1

           RETURN

       END IF

 *

 *     Determine the pointer position for the workspace

 *

       ldt    = kd

       lds1   = kd

       lt     = ldt*kd

       lw     = n*kd

       ls1    = lds1*kd

       ls2    = lwmin - lt - lw - ls1

 *      LS2 = N*MAX(KD,FACTOPTNB)

       tpos   = 1

       wpos   = tpos  + lt

       s1pos  = wpos  + lw

       s2pos  = s1pos + ls1

       IF( upper ) THEN

           ldw    = kd

           lds2   = kd

       ELSE

           ldw    = n

           lds2   = n

       ENDIF

 *

 *

 *     Set the workspace of the triangular matrix T to zero once such a

 *     way everytime T is generated the upper/lower portion will be always zero

 *

       CALL zlaset( "A", ldt, kd, zero, zero, work( tpos ), ldt )

 *

       IF( upper ) THEN

           DO 10 i = 1, n - kd, kd

              pn = n-i-kd+1

              pk = min( n-i-kd+1, kd )

 *

 *            Compute the LQ factorization of the current block

 *

              CALL zgelqf( kd, pn, a( i, i+kd ), lda,

      $                    tau( i ), work( s2pos ), ls2, iinfo )

 *

 *            Copy the upper portion of A into AB

 *

              DO 20 j = i, i+pk-1

                 lk = min( kd, n-j ) + 1

                 CALL zcopy( lk, a( j, j ), lda, ab( kd+1, j ), ldab-1 )

    20        CONTINUE

 *

              CALL zlaset( 'Lower', pk, pk, zero, one,

      $                    a( i, i+kd ), lda )

 *

 *            Form the matrix T

 *

              CALL zlarft( 'Forward', 'Rowwise', pn, pk,

      $                    a( i, i+kd ), lda, tau( i ),

      $                    work( tpos ), ldt )

 *

 *            Compute W:

 *

              CALL zgemm( 'Conjugate', 'No transpose', pk, pn, pk,

      $                   one,  work( tpos ), ldt,

      $                         a( i, i+kd ), lda,

      $                   zero, work( s2pos ), lds2 )

 *

              CALL zhemm( 'Right', uplo, pk, pn,

      $                   one,  a( i+kd, i+kd ), lda,

      $                         work( s2pos ), lds2,

      $                   zero, work( wpos ), ldw )

 *

              CALL zgemm( 'No transpose', 'Conjugate', pk, pk, pn,

      $                   one,  work( wpos ), ldw,

      $                         work( s2pos ), lds2,

      $                   zero, work( s1pos ), lds1 )

 *

              CALL zgemm( 'No transpose', 'No transpose', pk, pn, pk,

      $                   -half, work( s1pos ), lds1,

      $                          a( i, i+kd ), lda,

      $                   one,   work( wpos ), ldw )

 *

 *

 *            Update the unreduced submatrix A(i+kd:n,i+kd:n), using

 *            an update of the form:  A := A - V'*W - W'*V

 *

              CALL zher2k( uplo, 'Conjugate', pn, pk,

      $                    -one, a( i, i+kd ), lda,

      $                          work( wpos ), ldw,

      $                    rone, a( i+kd, i+kd ), lda )

    10     CONTINUE

 *

 *        Copy the upper band to AB which is the band storage matrix

 *

          DO 30 j = n-kd+1, n

             lk = min(kd, n-j) + 1

             CALL zcopy( lk, a( j, j ), lda, ab( kd+1, j ), ldab-1 )

    30    CONTINUE

 *

       ELSE

 *

 *         Reduce the lower triangle of A to lower band matrix

 *

           DO 40 i = 1, n - kd, kd

              pn = n-i-kd+1

              pk = min( n-i-kd+1, kd )

 *

 *            Compute the QR factorization of the current block

 *

              CALL zgeqrf( pn, kd, a( i+kd, i ), lda,

      $                    tau( i ), work( s2pos ), ls2, iinfo )

 *

 *            Copy the upper portion of A into AB

 *

              DO 50 j = i, i+pk-1

                 lk = min( kd, n-j ) + 1

                 CALL zcopy( lk, a( j, j ), 1, ab( 1, j ), 1 )

    50        CONTINUE

 *

              CALL zlaset( 'Upper', pk, pk, zero, one,

      $                    a( i+kd, i ), lda )

 *

 *            Form the matrix T

 *

              CALL zlarft( 'Forward', 'Columnwise', pn, pk,

      $                    a( i+kd, i ), lda, tau( i ),

      $                    work( tpos ), ldt )

 *

 *            Compute W:

 *

              CALL zgemm( 'No transpose', 'No transpose', pn, pk, pk,

      $                   one, a( i+kd, i ), lda,

      $                         work( tpos ), ldt,

      $                   zero, work( s2pos ), lds2 )

 *

              CALL zhemm( 'Left', uplo, pn, pk,

      $                   one, a( i+kd, i+kd ), lda,

      $                         work( s2pos ), lds2,

      $                   zero, work( wpos ), ldw )

 *

              CALL zgemm( 'Conjugate', 'No transpose', pk, pk, pn,

      $                   one, work( s2pos ), lds2,

      $                         work( wpos ), ldw,

      $                   zero, work( s1pos ), lds1 )

 *

              CALL zgemm( 'No transpose', 'No transpose', pn, pk, pk,

      $                   -half, a( i+kd, i ), lda,

      $                         work( s1pos ), lds1,

      $                   one, work( wpos ), ldw )

 *

 *

 *            Update the unreduced submatrix A(i+kd:n,i+kd:n), using

 *            an update of the form:  A := A - V*W' - W*V'

 *

              CALL zher2k( uplo, 'No transpose', pn, pk,

      $                    -one, a( i+kd, i ), lda,

      $                           work( wpos ), ldw,

      $                    rone, a( i+kd, i+kd ), lda )

 *            ==================================================================

 *            RESTORE A FOR COMPARISON AND CHECKING TO BE REMOVED

 *             DO 45 J = I, I+PK-1

 *                LK = MIN( KD, N-J ) + 1

 *                CALL ZCOPY( LK, AB( 1, J ), 1, A( J, J ), 1 )

 *   45        CONTINUE

 *            ==================================================================

    40     CONTINUE

 *

 *        Copy the lower band to AB which is the band storage matrix

 *

          DO 60 j = n-kd+1, n

             lk = min(kd, n-j) + 1

             CALL zcopy( lk, a( j, j ), 1, ab( 1, j ), 1 )

    60    CONTINUE


       END IF

 *

       work( 1 ) = lwmin

       RETURN

 *

 *     End of ZHETRD_HE2HB

 *

       END

zcopy
subroutine zcopy(N, ZX, INCX, ZY, INCY)
ZCOPY
Definition: zcopy.f:52

zlarft
subroutine zlarft(DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT)
ZLARFT forms the triangular factor T of a block reflector H = I - vtvH
Definition: zlarft.f:165

zgemm
subroutine zgemm(TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC)
ZGEMM
Definition: zgemm.f:189

zgeqrf
subroutine zgeqrf(M, N, A, LDA, TAU, WORK, LWORK, INFO)
ZGEQRF VARIANT: left-looking Level 3 BLAS of the algorithm.
Definition: zgeqrf.f:151

zhetrd_he2hb
subroutine zhetrd_he2hb(UPLO, N, KD, A, LDA, AB, LDAB, TAU,                                                                                                                       WORK, LWORK, INFO)
ZHETRD_HE2HB
Definition: zhetrd_he2hb.f:245

zhemm
subroutine zhemm(SIDE, UPLO, M, N, ALPHA, A, LDA, B, LDB, BETA, C, LDC)
ZHEMM
Definition: zhemm.f:193

xerbla
subroutine xerbla(SRNAME, INFO)
XERBLA
Definition: xerbla.f:62

zlaset
subroutine zlaset(UPLO, M, N, ALPHA, BETA, A, LDA)
ZLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values...
Definition: zlaset.f:108

zgelqf
subroutine zgelqf(M, N, A, LDA, TAU, WORK, LWORK, INFO)
ZGELQF
Definition: zgelqf.f:137

zher2k
subroutine zher2k(UPLO, TRANS, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC)
ZHER2K
Definition: zher2k.f:200