d2/dbd/chetrf__aa_8f_source.html

 *> \brief \b CHETRF_AA

 *

 *  =========== DOCUMENTATION ===========

 *

 * Online html documentation available at

 *            http://www.netlib.org/lapack/explore-html/

 *

 *> \htmlonly

 *> Download CHETRF_AA + dependencies

 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/chetrf_aa.f">

 *> [TGZ]</a>

 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/chetrf_aa.f">

 *> [ZIP]</a>

 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/chetrf_aa.f">

 *> [TXT]</a>

 *> \endhtmlonly

 *

 *  Definition:

 *  ===========

 *

 *       SUBROUTINE CHETRF_AA( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO )

 *

 *       .. Scalar Arguments ..

 *       CHARACTER    UPLO

 *       INTEGER      N, LDA, LWORK, INFO

 *       ..

 *       .. Array Arguments ..

 *       INTEGER      IPIV( * )

 *       COMPLEX      A( LDA, * ), WORK( * )

 *       ..

 *

 *> \par Purpose:

 *  =============

 *>

 *> \verbatim

 *>

 *> CHETRF_AA computes the factorization of a complex hermitian matrix A

 *> using the Aasen's algorithm.  The form of the factorization is

 *>

 *>    A = U*T*U**H  or  A = L*T*L**H

 *>

 *> where U (or L) is a product of permutation and unit upper (lower)

 *> triangular matrices, and T is a hermitian tridiagonal matrix.

 *>

 *> This is the blocked version of the algorithm, calling Level 3 BLAS.

 *> \endverbatim

 *

 *  Arguments:

 *  ==========

 *

 *> \param[in] UPLO

 *> \verbatim

 *>          UPLO is CHARACTER*1

 *>          = 'U':  Upper triangle of A is stored;

 *>          = 'L':  Lower triangle of A is stored.

 *> \endverbatim

 *>

 *> \param[in] N

 *> \verbatim

 *>          N is INTEGER

 *>          The order of the matrix A.  N >= 0.

 *> \endverbatim

 *>

 *> \param[in,out] A

 *> \verbatim

 *>          A is COMPLEX array, dimension (LDA,N)

 *>          On entry, the hermitian matrix A.  If UPLO = 'U', the leading

 *>          N-by-N upper triangular part of A contains the upper

 *>          triangular part of the matrix A, and the strictly lower

 *>          triangular part of A is not referenced.  If UPLO = 'L', the

 *>          leading N-by-N lower triangular part of A contains the lower

 *>          triangular part of the matrix A, and the strictly upper

 *>          triangular part of A is not referenced.

 *>

 *>          On exit, the tridiagonal matrix is stored in the diagonals

 *>          and the subdiagonals of A just below (or above) the diagonals,

 *>          and L is stored below (or above) the subdiaonals, when UPLO

 *>          is 'L' (or 'U').

 *> \endverbatim

 *>

 *> \param[in] LDA

 *> \verbatim

 *>          LDA is INTEGER

 *>          The leading dimension of the array A.  LDA >= max(1,N).

 *> \endverbatim

 *>

 *> \param[out] IPIV

 *> \verbatim

 *>          IPIV is INTEGER array, dimension (N)

 *>          On exit, it contains the details of the interchanges, i.e.,

 *>          the row and column k of A were interchanged with the

 *>          row and column IPIV(k).

 *> \endverbatim

 *>

 *> \param[out] WORK

 *> \verbatim

 *>          WORK is COMPLEX array, dimension (MAX(1,LWORK))

 *>          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.

 *> \endverbatim

 *>

 *> \param[in] LWORK

 *> \verbatim

 *>          LWORK is INTEGER

 *>          The length of WORK.  LWORK >= 2*N. For optimum performance

 *>          LWORK >= N*(1+NB), where NB is the optimal blocksize.

 *>

 *>          If LWORK = -1, then a workspace query is assumed; the routine

 *>          only calculates the optimal size of the WORK array, returns

 *>          this value as the first entry of the WORK array, and no error

 *>          message related to LWORK is issued by XERBLA.

 *> \endverbatim

 *>

 *> \param[out] INFO

 *> \verbatim

 *>          INFO is INTEGER

 *>          = 0:  successful exit

 *>          < 0:  if INFO = -i, the i-th argument had an illegal value

 *>          > 0:  if INFO = i, D(i,i) is exactly zero.  The factorization

 *>                has been completed, but the block diagonal matrix D is

 *>                exactly singular, and division by zero will occur if it

 *>                is used to solve a system of equations.

 *> \endverbatim

 *

 *  Authors:

 *  ========

 *

 *> \author Univ. of Tennessee

 *> \author Univ. of California Berkeley

 *> \author Univ. of Colorado Denver

 *> \author NAG Ltd.

 *

 *> \date December 2016

 *

 *> \ingroup complexHEcomputational

 *

 *  =====================================================================

       SUBROUTINE chetrf_aa( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO)

 *

 *  -- LAPACK computational routine (version 3.7.0) --

 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --

 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--

 *     December 2016

 *

       IMPLICIT NONE

 *

 *     .. Scalar Arguments ..

       CHARACTER    UPLO

       INTEGER      N, LDA, LWORK, INFO

 *     ..

 *     .. Array Arguments ..

       INTEGER      IPIV( * )

       COMPLEX      A( lda, * ), WORK( * )

 *     ..

 *

 *  =====================================================================

 *     .. Parameters ..

       COMPLEX      ZERO, ONE

       parameter    ( zero = (0.0e+0, 0.0e+0), one = (1.0e+0, 0.0e+0) )

 *

 *     .. Local Scalars ..

       LOGICAL      LQUERY, UPPER

       INTEGER      J, LWKOPT, IINFO

       INTEGER      NB, MJ, NJ, K1, K2, J1, J2, J3, JB

       COMPLEX      ALPHA

 *     ..

 *     .. External Functions ..

       LOGICAL      LSAME

       INTEGER      ILAENV

       EXTERNAL     lsame, ilaenv

 *     ..

 *     .. External Subroutines ..

       EXTERNAL     xerbla

 *     ..

 *     .. Intrinsic Functions ..

       INTRINSIC    REAL, CONJG, MAX

 *     ..

 *     .. Executable Statements ..

 *

 *     Determine the block size

 *

       nb = ilaenv( 1, 'CHETRF', uplo, n, -1, -1, -1 )

 *

 *     Test the input parameters.

 *

       info = 0

       upper = lsame( uplo, 'U' )

       lquery = ( lwork.EQ.-1 )

       IF( .NOT.upper .AND. .NOT.lsame( uplo, 'L' ) ) THEN

          info = -1

       ELSE IF( n.LT.0 ) THEN

          info = -2

       ELSE IF( lda.LT.max( 1, n ) ) THEN

          info = -4

       ELSE IF( lwork.LT.( 2*n ) .AND. .NOT.lquery ) THEN

          info = -7

       END IF

 *

       IF( info.EQ.0 ) THEN

          lwkopt = (nb+1)*n

          work( 1 ) = lwkopt

       END IF

 *

       IF( info.NE.0 ) THEN

          CALL xerbla( 'CHETRF_AA', -info )

          RETURN

       ELSE IF( lquery ) THEN

          RETURN

       END IF

 *

 *     Quick return

 *

       IF ( n.EQ.0 ) THEN

           RETURN

       ENDIF

       ipiv( 1 ) = 1

       IF ( n.EQ.1 ) THEN

          a( 1, 1 ) = REAL( A( 1, 1 ) )

          IF ( a( 1, 1 ).EQ.zero ) THEN

             info = 1

          END IF

          RETURN

       END IF

 *

 *     Adjubst block size based on the workspace size

 *

       IF( lwork.LT.((1+nb)*n) ) THEN

          nb = ( lwork-n ) / n

       END IF

 *

       IF( upper ) THEN

 *

 *        .....................................................

 *        Factorize A as L*D*L**H using the upper triangle of A

 *        .....................................................

 *

 *        copy first row A(1, 1:N) into H(1:n) (stored in WORK(1:N))

 *

          CALL ccopy( n, a( 1, 1 ), lda, work( 1 ), 1 )

 *

 *        J is the main loop index, increasing from 1 to N in steps of

 *        JB, where JB is the number of columns factorized by CLAHEF;

 *        JB is either NB, or N-J+1 for the last block

 *

          j = 0

  10      CONTINUE

          IF( j.GE.n )

      $      GO TO 20

 *

 *        each step of the main loop

 *         J is the last column of the previous panel

 *         J1 is the first column of the current panel

 *         K1 identifies if the previous column of the panel has been

 *          explicitly stored, e.g., K1=1 for the first panel, and

 *          K1=0 for the rest

 *

          j1 = j + 1

          jb = min( n-j1+1, nb )

          k1 = max(1, j)-j

 *

 *        Panel factorization

 *

          CALL clahef_aa( uplo, 2-k1, n-j, jb,

      $                      a( max(1, j), j+1 ), lda,

      $                      ipiv( j+1 ), work, n, work( n*nb+1 ),

      $                      iinfo )

          IF( (iinfo.GT.0) .AND. (info.EQ.0) ) THEN

              info = iinfo+j

          ENDIF

 *

 *        Ajust IPIV and apply it back (J-th step picks (J+1)-th pivot)

 *

          DO j2 = j+2, min(n, j+jb+1)

             ipiv( j2 ) = ipiv( j2 ) + j

             IF( (j2.NE.ipiv(j2)) .AND. ((j1-k1).GT.2) ) THEN

                CALL cswap( j1-k1-2, a( 1, j2 ), 1,

      $                              a( 1, ipiv(j2) ), 1 )

             END IF

          END DO

          j = j + jb

 *

 *        Trailing submatrix update, where

 *         the row A(J1-1, J2-1:N) stores U(J1, J2+1:N) and

 *         WORK stores the current block of the auxiriarly matrix H

 *

          IF( j.LT.n ) THEN

 *

 *          if the first panel and JB=1 (NB=1), then nothing to do

 *

             IF( j1.GT.1 .OR. jb.GT.1 ) THEN

 *

 *              Merge rank-1 update with BLAS-3 update

 *

                alpha = conjg( a( j, j+1 ) )

                a( j, j+1 ) = one

                CALL ccopy( n-j, a( j-1, j+1 ), lda,

      $                          work( (j+1-j1+1)+jb*n ), 1 )

                CALL cscal( n-j, alpha, work( (j+1-j1+1)+jb*n ), 1 )

 *

 *              K1 identifies if the previous column of the panel has been

 *               explicitly stored, e.g., K1=0 and K2=1 for the first panel,

 *               and K1=1 and K2=0 for the rest

 *

                IF( j1.GT.1 ) THEN

 *

 *                 Not first panel

 *

                   k2 = 1

                ELSE

 *

 *                 First panel

 *

                   k2 = 0

 *

 *                 First update skips the first column

 *

                   jb = jb - 1

                END IF

 *

                DO j2 = j+1, n, nb

                   nj = min( nb, n-j2+1 )

 *

 *                 Update (J2, J2) diagonal block with CGEMV

 *

                   j3 = j2

                   DO mj = nj-1, 1, -1

                      CALL cgemm( 'Conjugate transpose', 'Transpose',

      $                            1, mj, jb+1,

      $                           -one, a( j1-k2, j3 ), lda,

      $                                 work( (j3-j1+1)+k1*n ), n,

      $                            one, a( j3, j3 ), lda )

                      j3 = j3 + 1

                   END DO

 *

 *                 Update off-diagonal block of J2-th block row with CGEMM

 *

                   CALL cgemm( 'Conjugate transpose', 'Transpose',

      $                        nj, n-j3+1, jb+1,

      $                       -one, a( j1-k2, j2 ), lda,

      $                             work( (j3-j1+1)+k1*n ), n,

      $                        one, a( j2, j3 ), lda )

                END DO

 *

 *              Recover T( J, J+1 )

 *

                a( j, j+1 ) = conjg( alpha )

             END IF

 *

 *           WORK(J+1, 1) stores H(J+1, 1)

 *

             CALL ccopy( n-j, a( j+1, j+1 ), lda, work( 1 ), 1 )

          END IF

          GO TO 10

       ELSE

 *

 *        .....................................................

 *        Factorize A as L*D*L**H using the lower triangle of A

 *        .....................................................

 *

 *        copy first column A(1:N, 1) into H(1:N, 1)

 *         (stored in WORK(1:N))

 *

          CALL ccopy( n, a( 1, 1 ), 1, work( 1 ), 1 )

 *

 *        J is the main loop index, increasing from 1 to N in steps of

 *        JB, where JB is the number of columns factorized by CLAHEF;

 *        JB is either NB, or N-J+1 for the last block

 *

          j = 0

  11      CONTINUE

          IF( j.GE.n )

      $      GO TO 20

 *

 *        each step of the main loop

 *         J is the last column of the previous panel

 *         J1 is the first column of the current panel

 *         K1 identifies if the previous column of the panel has been

 *          explicitly stored, e.g., K1=1 for the first panel, and

 *          K1=0 for the rest

 *

          j1 = j+1

          jb = min( n-j1+1, nb )

          k1 = max(1, j)-j

 *

 *        Panel factorization

 *

          CALL clahef_aa( uplo, 2-k1, n-j, jb,

      $                      a( j+1, max(1, j) ), lda,

      $                      ipiv( j+1 ), work, n, work( n*nb+1 ), iinfo)

          IF( (iinfo.GT.0) .AND. (info.EQ.0) ) THEN

             info = iinfo+j

          ENDIF

 *

 *        Ajust IPIV and apply it back (J-th step picks (J+1)-th pivot)

 *

          DO j2 = j+2, min(n, j+jb+1)

             ipiv( j2 ) = ipiv( j2 ) + j

             IF( (j2.NE.ipiv(j2)) .AND. ((j1-k1).GT.2) ) THEN

                CALL cswap( j1-k1-2, a( j2, 1 ), lda,

      $                              a( ipiv(j2), 1 ), lda )

             END IF

          END DO

          j = j + jb

 *

 *        Trailing submatrix update, where

 *          A(J2+1, J1-1) stores L(J2+1, J1) and

 *          WORK(J2+1, 1) stores H(J2+1, 1)

 *

          IF( j.LT.n ) THEN

 *

 *          if the first panel and JB=1 (NB=1), then nothing to do

 *

             IF( j1.GT.1 .OR. jb.GT.1 ) THEN

 *

 *              Merge rank-1 update with BLAS-3 update

 *

                alpha = conjg( a( j+1, j ) )

                a( j+1, j ) = one

                CALL ccopy( n-j, a( j+1, j-1 ), 1,

      $                          work( (j+1-j1+1)+jb*n ), 1 )

                CALL cscal( n-j, alpha, work( (j+1-j1+1)+jb*n ), 1 )

 *

 *              K1 identifies if the previous column of the panel has been

 *               explicitly stored, e.g., K1=0 and K2=1 for the first panel,

 *               and K1=1 and K2=0 for the rest

 *

                IF( j1.GT.1 ) THEN

 *

 *                 Not first panel

 *

                   k2 = 1

                ELSE

 *

 *                 First panel

 *

                   k2 = 0

 *

 *                 First update skips the first column

 *

                   jb = jb - 1

                END IF

 *

                DO j2 = j+1, n, nb

                   nj = min( nb, n-j2+1 )

 *

 *                 Update (J2, J2) diagonal block with CGEMV

 *

                   j3 = j2

                   DO mj = nj-1, 1, -1

                      CALL cgemm( 'No transpose', 'Conjugate transpose',

      $                           mj, 1, jb+1,

      $                          -one, work( (j3-j1+1)+k1*n ), n,

      $                                a( j3, j1-k2 ), lda,

      $                           one, a( j3, j3 ), lda )

                      j3 = j3 + 1

                   END DO

 *

 *                 Update off-diagonal block of J2-th block column with CGEMM

 *

                   CALL cgemm( 'No transpose', 'Conjugate transpose',

      $                        n-j3+1, nj, jb+1,

      $                       -one, work( (j3-j1+1)+k1*n ), n,

      $                             a( j2, j1-k2 ), lda,

      $                        one, a( j3, j2 ), lda )

                END DO

 *

 *              Recover T( J+1, J )

 *

                a( j+1, j ) = conjg( alpha )

             END IF

 *

 *           WORK(J+1, 1) stores H(J+1, 1)

 *

             CALL ccopy( n-j, a( j+1, j+1 ), 1, work( 1 ), 1 )

          END IF

          GO TO 11

       END IF

 *

    20 CONTINUE

       RETURN

 *

 *     End of CHETRF_AA

 *

       END

cscal
subroutine cscal(N, CA, CX, INCX)
CSCAL
Definition: cscal.f:54

clahef_aa
subroutine clahef_aa(UPLO, J1, M, NB, A, LDA, IPIV,                                                                                                       H, LDH, WORK, INFO)
CLAHEF_AA
Definition: clahef_aa.f:156

xerbla
subroutine xerbla(SRNAME, INFO)
XERBLA
Definition: xerbla.f:62

ccopy
subroutine ccopy(N, CX, INCX, CY, INCY)
CCOPY
Definition: ccopy.f:52

cswap
subroutine cswap(N, CX, INCX, CY, INCY)
CSWAP
Definition: cswap.f:52

chetrf_aa
subroutine chetrf_aa(UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO)
CHETRF_AA
Definition: chetrf_aa.f:138

cgemm
subroutine cgemm(TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC)
CGEMM
Definition: cgemm.f:189