d7/dbf/zsytrf__aa_8f_source.html

 *> \brief \b ZSYTRF_AA

 *

 *  =========== DOCUMENTATION ===========

 *

 * Online html documentation available at

 *            http://www.netlib.org/lapack/explore-html/

 *

 *> \htmlonly

 *> Download ZSYTRF_AA + dependencies

 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zsytrf_aa.f">

 *> [TGZ]</a>

 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zsytrf_aa.f">

 *> [ZIP]</a>

 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zsytrf_aa.f">

 *> [TXT]</a>

 *> \endhtmlonly

 *

 *  Definition:

 *  ===========

 *

 *       SUBROUTINE ZSYTRF_AA( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO )

 *

 *       .. Scalar Arguments ..

 *       CHARACTER          UPLO

 *       INTEGER            N, LDA, LWORK, INFO

 *       ..

 *       .. Array Arguments ..

 *       INTEGER            IPIV( * )

 *       COMPLEX*16         A( LDA, * ), WORK( * )

 *       ..

 *

 *> \par Purpose:

 *  =============

 *>

 *> \verbatim

 *>

 *> ZSYTRF_AA computes the factorization of a complex symmetric matrix A

 *> using the Aasen's algorithm.  The form of the factorization is

 *>

 *>    A = U*T*U**T  or  A = L*T*L**T

 *>

 *> where U (or L) is a product of permutation and unit upper (lower)

 *> triangular matrices, and T is a complex symmetric tridiagonal matrix.

 *>

 *> This is the blocked version of the algorithm, calling Level 3 BLAS.

 *> \endverbatim

 *

 *  Arguments:

 *  ==========

 *

 *> \param[in] UPLO

 *> \verbatim

 *>          UPLO is CHARACTER*1

 *>          = 'U':  Upper triangle of A is stored;

 *>          = 'L':  Lower triangle of A is stored.

 *> \endverbatim

 *>

 *> \param[in] N

 *> \verbatim

 *>          N is INTEGER

 *>          The order of the matrix A.  N >= 0.

 *> \endverbatim

 *>

 *> \param[in,out] A

 *> \verbatim

 *>          A is COMPLEX*16 array, dimension (LDA,N)

 *>          On entry, the symmetric matrix A.  If UPLO = 'U', the leading

 *>          N-by-N upper triangular part of A contains the upper

 *>          triangular part of the matrix A, and the strictly lower

 *>          triangular part of A is not referenced.  If UPLO = 'L', the

 *>          leading N-by-N lower triangular part of A contains the lower

 *>          triangular part of the matrix A, and the strictly upper

 *>          triangular part of A is not referenced.

 *>

 *>          On exit, the tridiagonal matrix is stored in the diagonals

 *>          and the subdiagonals of A just below (or above) the diagonals,

 *>          and L is stored below (or above) the subdiaonals, when UPLO

 *>          is 'L' (or 'U').

 *> \endverbatim

 *>

 *> \param[in] LDA

 *> \verbatim

 *>          LDA is INTEGER

 *>          The leading dimension of the array A.  LDA >= max(1,N).

 *> \endverbatim

 *>

 *> \param[out] IPIV

 *> \verbatim

 *>          IPIV is INTEGER array, dimension (N)

 *>          On exit, it contains the details of the interchanges, i.e.,

 *>          the row and column k of A were interchanged with the

 *>          row and column IPIV(k).

 *> \endverbatim

 *>

 *> \param[out] WORK

 *> \verbatim

 *>          WORK is COMPLEX*16 array, dimension (MAX(1,LWORK))

 *>          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.

 *> \endverbatim

 *>

 *> \param[in] LWORK

 *> \verbatim

 *>          LWORK is INTEGER

 *>          The length of WORK. LWORK >=MAX(1,2*N). For optimum performance

 *>          LWORK >= N*(1+NB), where NB is the optimal blocksize.

 *>

 *>          If LWORK = -1, then a workspace query is assumed; the routine

 *>          only calculates the optimal size of the WORK array, returns

 *>          this value as the first entry of the WORK array, and no error

 *>          message related to LWORK is issued by XERBLA.

 *> \endverbatim

 *>

 *> \param[out] INFO

 *> \verbatim

 *>          INFO is INTEGER

 *>          = 0:  successful exit

 *>          < 0:  if INFO = -i, the i-th argument had an illegal value

 *>          > 0:  if INFO = i, D(i,i) is exactly zero.  The factorization

 *>                has been completed, but the block diagonal matrix D is

 *>                exactly singular, and division by zero will occur if it

 *>                is used to solve a system of equations.

 *> \endverbatim

 *

 *  Authors:

 *  ========

 *

 *> \author Univ. of Tennessee

 *> \author Univ. of California Berkeley

 *> \author Univ. of Colorado Denver

 *> \author NAG Ltd.

 *

 *> \date December 2016

 *

 *> \ingroup complex16SYcomputational

 *

 *  =====================================================================

       SUBROUTINE zsytrf_aa( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO)

 *

 *  -- LAPACK computational routine (version 3.7.0) --

 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --

 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--

 *     December 2016

 *

       IMPLICIT NONE

 *

 *     .. Scalar Arguments ..

       CHARACTER          UPLO

       INTEGER            N, LDA, LWORK, INFO

 *     ..

 *     .. Array Arguments ..

       INTEGER            IPIV( * )

       COMPLEX*16         A( lda, * ), WORK( * )

 *     ..

 *

 *  =====================================================================

 *     .. Parameters ..

       COMPLEX*16         ZERO, ONE

       parameter                ( zero = 0.0d+0, one = 1.0d+0 )

 *

 *     .. Local Scalars ..

       LOGICAL            LQUERY, UPPER

       INTEGER            J, LWKOPT, IINFO

       INTEGER            NB, MJ, NJ, K1, K2, J1, J2, J3, JB

       COMPLEX*16         ALPHA

 *     ..

 *     .. External Functions ..

       LOGICAL            LSAME

       INTEGER            ILAENV

       EXTERNAL           lsame, ilaenv

 *     ..

 *     .. External Subroutines ..

       EXTERNAL           xerbla

 *     ..

 *     .. Intrinsic Functions ..

       INTRINSIC          max

 *     ..

 *     .. Executable Statements ..

 *

 *     Determine the block size

 *

       nb = ilaenv( 1, 'ZSYTRF', uplo, n, -1, -1, -1 )

 *

 *     Test the input parameters.

 *

       info = 0

       upper = lsame( uplo, 'U' )

       lquery = ( lwork.EQ.-1 )

       IF( .NOT.upper .AND. .NOT.lsame( uplo, 'L' ) ) THEN

          info = -1

       ELSE IF( n.LT.0 ) THEN

          info = -2

       ELSE IF( lda.LT.max( 1, n ) ) THEN

          info = -4

       ELSE IF( lwork.LT.max( 1, 2*n ) .AND. .NOT.lquery ) THEN

          info = -7

       END IF

 *

       IF( info.EQ.0 ) THEN

          lwkopt = (nb+1)*n

          work( 1 ) = lwkopt

       END IF

 *

       IF( info.NE.0 ) THEN

          CALL xerbla( 'ZSYTRF_AA', -info )

          RETURN

       ELSE IF( lquery ) THEN

          RETURN

       END IF

 *

 *     Quick return

 *

       IF ( n.EQ.0 ) THEN

           RETURN

       ENDIF

       ipiv( 1 ) = 1

       IF ( n.EQ.1 ) THEN

          IF ( a( 1, 1 ).EQ.zero ) THEN

             info = 1

          END IF

          RETURN

       END IF

 *

 *     Adjubst block size based on the workspace size

 *

       IF( lwork.LT.((1+nb)*n) ) THEN

          nb = ( lwork-n ) / n

       END IF

 *

       IF( upper ) THEN

 *

 *        .....................................................

 *        Factorize A as L*D*L**T using the upper triangle of A

 *        .....................................................

 *

 *        Copy first row A(1, 1:N) into H(1:n) (stored in WORK(1:N))

 *

          CALL zcopy( n, a( 1, 1 ), lda, work( 1 ), 1 )

 *

 *        J is the main loop index, increasing from 1 to N in steps of

 *        JB, where JB is the number of columns factorized by ZLASYF;

 *        JB is either NB, or N-J+1 for the last block

 *

          j = 0

  10      CONTINUE

          IF( j.GE.n )

      $      GO TO 20

 *

 *        each step of the main loop

 *         J is the last column of the previous panel

 *         J1 is the first column of the current panel

 *         K1 identifies if the previous column of the panel has been

 *          explicitly stored, e.g., K1=1 for the first panel, and

 *          K1=0 for the rest

 *

          j1 = j + 1

          jb = min( n-j1+1, nb )

          k1 = max(1, j)-j

 *

 *        Panel factorization

 *

          CALL zlasyf_aa( uplo, 2-k1, n-j, jb,

      $                   a( max(1, j), j+1 ), lda,

      $                   ipiv( j+1 ), work, n, work( n*nb+1 ),

      $                      iinfo )

          IF( (iinfo.GT.0) .AND. (info.EQ.0) ) THEN

              info = iinfo+j

          ENDIF

 *

 *        Ajust IPIV and apply it back (J-th step picks (J+1)-th pivot)

 *

          DO j2 = j+2, min(n, j+jb+1)

             ipiv( j2 ) = ipiv( j2 ) + j

             IF( (j2.NE.ipiv(j2)) .AND. ((j1-k1).GT.2) ) THEN

                CALL zswap( j1-k1-2, a( 1, j2 ), 1,

      $                              a( 1, ipiv(j2) ), 1 )

             END IF

          END DO

          j = j + jb

 *

 *        Trailing submatrix update, where

 *         the row A(J1-1, J2-1:N) stores U(J1, J2+1:N) and

 *         WORK stores the current block of the auxiriarly matrix H

 *

          IF( j.LT.n ) THEN

 *

 *           If first panel and JB=1 (NB=1), then nothing to do

 *

             IF( j1.GT.1 .OR. jb.GT.1 ) THEN

 *

 *              Merge rank-1 update with BLAS-3 update

 *

                alpha = a( j, j+1 )

                a( j, j+1 ) = one

                CALL zcopy( n-j, a( j-1, j+1 ), lda,

      $                          work( (j+1-j1+1)+jb*n ), 1 )

                CALL zscal( n-j, alpha, work( (j+1-j1+1)+jb*n ), 1 )

 *

 *              K1 identifies if the previous column of the panel has been

 *               explicitly stored, e.g., K1=1 and K2= 0 for the first panel,

 *               while K1=0 and K2=1 for the rest

 *

                IF( j1.GT.1 ) THEN

 *

 *                 Not first panel

 *

                   k2 = 1

                ELSE

 *

 *                 First panel

 *

                   k2 = 0

 *

 *                 First update skips the first column

 *

                   jb = jb - 1

                END IF

 *

                DO j2 = j+1, n, nb

                   nj = min( nb, n-j2+1 )

 *

 *                 Update (J2, J2) diagonal block with ZGEMV

 *

                   j3 = j2

                   DO mj = nj-1, 1, -1

                      CALL zgemv( 'No transpose', mj, jb+1,

      $                          -one, work( j3-j1+1+k1*n ), n,

      $                                a( j1-k2, j3 ), 1,

      $                           one, a( j3, j3 ), lda )

                      j3 = j3 + 1

                   END DO

 *

 *                 Update off-diagonal block of J2-th block row with ZGEMM

 *

                   CALL zgemm( 'Transpose', 'Transpose',

      $                        nj, n-j3+1, jb+1,

      $                       -one, a( j1-k2, j2 ), lda,

      $                             work( j3-j1+1+k1*n ), n,

      $                        one, a( j2, j3 ), lda )

                END DO

 *

 *              Recover T( J, J+1 )

 *

                a( j, j+1 ) = alpha

             END IF

 *

 *           WORK(J+1, 1) stores H(J+1, 1)

 *

             CALL zcopy( n-j, a( j+1, j+1 ), lda, work( 1 ), 1 )

          END IF

          GO TO 10

       ELSE

 *

 *        .....................................................

 *        Factorize A as L*D*L**T using the lower triangle of A

 *        .....................................................

 *

 *        copy first column A(1:N, 1) into H(1:N, 1)

 *         (stored in WORK(1:N))

 *

          CALL zcopy( n, a( 1, 1 ), 1, work( 1 ), 1 )

 *

 *        J is the main loop index, increasing from 1 to N in steps of

 *        JB, where JB is the number of columns factorized by ZLASYF;

 *        JB is either NB, or N-J+1 for the last block

 *

          j = 0

  11      CONTINUE

          IF( j.GE.n )

      $      GO TO 20

 *

 *        each step of the main loop

 *         J is the last column of the previous panel

 *         J1 is the first column of the current panel

 *         K1 identifies if the previous column of the panel has been

 *          explicitly stored, e.g., K1=1 for the first panel, and

 *          K1=0 for the rest

 *

          j1 = j+1

          jb = min( n-j1+1, nb )

          k1 = max(1, j)-j

 *

 *        Panel factorization

 *

          CALL zlasyf_aa( uplo, 2-k1, n-j, jb,

      $                   a( j+1, max(1, j) ), lda,

      $                   ipiv( j+1 ), work, n, work( n*nb+1 ), iinfo)

          IF( (iinfo.GT.0) .AND. (info.EQ.0) ) THEN

             info = iinfo+j

          ENDIF

 *

 *        Ajust IPIV and apply it back (J-th step picks (J+1)-th pivot)

 *

          DO j2 = j+2, min(n, j+jb+1)

             ipiv( j2 ) = ipiv( j2 ) + j

             IF( (j2.NE.ipiv(j2)) .AND. ((j1-k1).GT.2) ) THEN

                CALL zswap( j1-k1-2, a( j2, 1 ), lda,

      $                              a( ipiv(j2), 1 ), lda )

             END IF

          END DO

          j = j + jb

 *

 *        Trailing submatrix update, where

 *          A(J2+1, J1-1) stores L(J2+1, J1) and

 *          WORK(J2+1, 1) stores H(J2+1, 1)

 *

          IF( j.LT.n ) THEN

 *

 *           if first panel and JB=1 (NB=1), then nothing to do

 *

             IF( j1.GT.1 .OR. jb.GT.1 ) THEN

 *

 *              Merge rank-1 update with BLAS-3 update

 *

                alpha = a( j+1, j )

                a( j+1, j ) = one

                CALL zcopy( n-j, a( j+1, j-1 ), 1,

      $                          work( (j+1-j1+1)+jb*n ), 1 )

                CALL zscal( n-j, alpha, work( (j+1-j1+1)+jb*n ), 1 )

 *

 *              K1 identifies if the previous column of the panel has been

 *               explicitly stored, e.g., K1=1 and K2= 0 for the first panel,

 *               while K1=0 and K2=1 for the rest

 *

                IF( j1.GT.1 ) THEN

 *

 *                 Not first panel

 *

                   k2 = 1

                ELSE

 *

 *                 First panel

 *

                   k2 = 0

 *

 *                 First update skips the first column

 *

                   jb = jb - 1

                END IF

 *

                DO j2 = j+1, n, nb

                   nj = min( nb, n-j2+1 )

 *

 *                 Update (J2, J2) diagonal block with ZGEMV

 *

                   j3 = j2

                   DO mj = nj-1, 1, -1

                      CALL zgemv( 'No transpose', mj, jb+1,

      $                          -one, work( j3-j1+1+k1*n ), n,

      $                                a( j3, j1-k2 ), lda,

      $                           one, a( j3, j3 ), 1 )

                      j3 = j3 + 1

                   END DO

 *

 *                 Update off-diagonal block in J2-th block column with ZGEMM

 *

                   CALL zgemm( 'No transpose', 'Transpose',

      $                        n-j3+1, nj, jb+1,

      $                       -one, work( j3-j1+1+k1*n ), n,

      $                             a( j2, j1-k2 ), lda,

      $                        one, a( j3, j2 ), lda )

                END DO

 *

 *              Recover T( J+1, J )

 *

                a( j+1, j ) = alpha

             END IF

 *

 *           WORK(J+1, 1) stores H(J+1, 1)

 *

             CALL zcopy( n-j, a( j+1, j+1 ), 1, work( 1 ), 1 )

          END IF

          GO TO 11

       END IF

 *

    20 CONTINUE

       RETURN

 *

 *     End of ZSYTRF_AA

 *

       END

zcopy
subroutine zcopy(N, ZX, INCX, ZY, INCY)
ZCOPY
Definition: zcopy.f:52

zsytrf_aa
subroutine zsytrf_aa(UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO)
ZSYTRF_AA
Definition: zsytrf_aa.f:138

zgemv
subroutine zgemv(TRANS, M, N, ALPHA, A, LDA, X, INCX, BETA, Y, INCY)
ZGEMV
Definition: zgemv.f:160

zlasyf_aa
subroutine zlasyf_aa(UPLO, J1, M, NB, A, LDA, IPIV,                                                                                                                   H, LDH, WORK, INFO)
ZLASYF_AA
Definition: zlasyf_aa.f:156

zswap
subroutine zswap(N, ZX, INCX, ZY, INCY)
ZSWAP
Definition: zswap.f:52

zgemm
subroutine zgemm(TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC)
ZGEMM
Definition: zgemm.f:189

xerbla
subroutine xerbla(SRNAME, INFO)
XERBLA
Definition: xerbla.f:62

zscal
subroutine zscal(N, ZA, ZX, INCX)
ZSCAL
Definition: zscal.f:54