subroutine dsytrf_aa	(	character	UPLO,
		integer	N,
		double precision, dimension( lda, * )	A,
		integer	LDA,
		integer, dimension( * )	IPIV,
		double precision, dimension( * )	WORK,
		integer	LWORK,
		integer	INFO
	)

DSYTRF_AA

Download DSYTRF_AA + dependencies [TGZ] [ZIP] [TXT]

Purpose:

 DSYTRF_AA computes the factorization of a real symmetric matrix A
 using the Aasen's algorithm.  The form of the factorization is

    A = U*T*U**T  or  A = L*T*L**T

 where U (or L) is a product of permutation and unit upper (lower)
 triangular matrices, and T is a symmetric tridiagonal matrix.

 This is the blocked version of the algorithm, calling Level 3 BLAS.

Parameters

[in]	UPLO	UPLO is CHARACTER*1 = 'U': Upper triangle of A is stored; = 'L': Lower triangle of A is stored.
[in]	N	N is INTEGER The order of the matrix A. N >= 0.
[in,out]	A	A is DOUBLE PRECISION array, dimension (LDA,N) On entry, the symmetric matrix A. If UPLO = 'U', the leading N-by-N upper triangular part of A contains the upper triangular part of the matrix A, and the strictly lower triangular part of A is not referenced. If UPLO = 'L', the leading N-by-N lower triangular part of A contains the lower triangular part of the matrix A, and the strictly upper triangular part of A is not referenced. On exit, the tridiagonal matrix is stored in the diagonals and the subdiagonals of A just below (or above) the diagonals, and L is stored below (or above) the subdiaonals, when UPLO is 'L' (or 'U').
[in]	LDA	LDA is INTEGER The leading dimension of the array A. LDA >= max(1,N).
[out]	IPIV	IPIV is INTEGER array, dimension (N) On exit, it contains the details of the interchanges, i.e., the row and column k of A were interchanged with the row and column IPIV(k).
[out]	WORK	WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
[in]	LWORK	LWORK is INTEGER The length of WORK. LWORK >= MAX(1,2N). For optimum performance LWORK >= N(1+NB), where NB is the optimal blocksize. If LWORK = -1, then a workspace query is assumed; the routine only calculates the optimal size of the WORK array, returns this value as the first entry of the WORK array, and no error message related to LWORK is issued by XERBLA.
[out]	INFO	INFO is INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value > 0: if INFO = i, D(i,i) is exactly zero. The factorization has been completed, but the block diagonal matrix D is exactly singular, and division by zero will occur if it is used to solve a system of equations.

Author: Univ. of Tennessee; Univ. of California Berkeley; Univ. of Colorado Denver; NAG Ltd.

Date: December 2016

Definition at line 138 of file dsytrf_aa.f.

 *
 *  -- LAPACK computational routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 *     December 2016
 *
       IMPLICIT NONE
 *
 *     .. Scalar Arguments ..
       CHARACTER          uplo
       INTEGER            n, lda, lwork, info
 *     ..
 *     .. Array Arguments ..
       INTEGER            ipiv( * )
       DOUBLE PRECISION   a( lda, * ), work( * )
 *     ..
 *
 *  =====================================================================
 *     .. Parameters ..
       DOUBLE PRECISION   zero, one
       parameter                ( zero = 0.0d+0, one = 1.0d+0 )
 *
 *     .. Local Scalars ..
       LOGICAL            lquery, upper
       INTEGER            j, lwkopt, iinfo
       INTEGER            nb, mj, nj, k1, k2, j1, j2, j3, jb
       DOUBLE PRECISION   alpha
 *     ..
 *     .. External Functions ..
       LOGICAL            lsame
       INTEGER            ilaenv
       EXTERNAL           lsame, ilaenv
 *     ..
 *     .. External Subroutines ..
       EXTERNAL           xerbla
 *     ..
 *     .. Intrinsic Functions ..
       INTRINSIC          max
 *     ..
 *     .. Executable Statements ..
 *
 *     Determine the block size
 *
       nb = ilaenv( 1, 'DSYTRF', uplo, n, -1, -1, -1 )
 *
 *     Test the input parameters.
 *
       info = 0
       upper = lsame( uplo, 'U' )
       lquery = ( lwork.EQ.-1 )
       IF( .NOT.upper .AND. .NOT.lsame( uplo, 'L' ) ) THEN
          info = -1
       ELSE IF( n.LT.0 ) THEN
          info = -2
       ELSE IF( lda.LT.max( 1, n ) ) THEN
          info = -4
       ELSE IF( lwork.LT.max( 1, 2*n ) .AND. .NOT.lquery ) THEN
          info = -7
       END IF
 *
       IF( info.EQ.0 ) THEN
          lwkopt = (nb+1)*n
          work( 1 ) = lwkopt
       END IF
 *
       IF( info.NE.0 ) THEN
          CALL xerbla( 'DSYTRF_AA', -info )
          RETURN
       ELSE IF( lquery ) THEN
          RETURN
       END IF
 *
 *     Quick return
 *
       IF ( n.EQ.0 ) THEN
           RETURN
       ENDIF
       ipiv( 1 ) = 1
       IF ( n.EQ.1 ) THEN
          IF ( a( 1, 1 ).EQ.zero ) THEN
             info = 1
          END IF
          RETURN
       END IF
 *
 *     Adjubst block size based on the workspace size
 *
       IF( lwork.LT.((1+nb)*n) ) THEN
          nb = ( lwork-n ) / n
       END IF
 *
       IF( upper ) THEN
 *
 *        .....................................................
 *        Factorize A as L*D*L**T using the upper triangle of A
 *        .....................................................
 *
 *        Copy first row A(1, 1:N) into H(1:n) (stored in WORK(1:N))
 *
          CALL dcopy( n, a( 1, 1 ), lda, work( 1 ), 1 )
 *
 *        J is the main loop index, increasing from 1 to N in steps of
 *        JB, where JB is the number of columns factorized by DLASYF;
 *        JB is either NB, or N-J+1 for the last block
 *
          j = 0
  10      CONTINUE
          IF( j.GE.n )
      $      GO TO 20
 *
 *        each step of the main loop
 *         J is the last column of the previous panel
 *         J1 is the first column of the current panel
 *         K1 identifies if the previous column of the panel has been
 *          explicitly stored, e.g., K1=1 for the first panel, and
 *          K1=0 for the rest
 *
          j1 = j + 1
          jb = min( n-j1+1, nb )
          k1 = max(1, j)-j
 *
 *        Panel factorization
 *
          CALL dlasyf_aa( uplo, 2-k1, n-j, jb,
      $                      a( max(1, j), j+1 ), lda,
      $                      ipiv( j+1 ), work, n, work( n*nb+1 ),
      $                      iinfo )
          IF( (iinfo.GT.0) .AND. (info.EQ.0) ) THEN
              info = iinfo+j
          ENDIF
 *
 *        Ajust IPIV and apply it back (J-th step picks (J+1)-th pivot)
 *
          DO j2 = j+2, min(n, j+jb+1)
             ipiv( j2 ) = ipiv( j2 ) + j
             IF( (j2.NE.ipiv(j2)) .AND. ((j1-k1).GT.2) ) THEN
                CALL dswap( j1-k1-2, a( 1, j2 ), 1,
      $                              a( 1, ipiv(j2) ), 1 )
             END IF
          END DO
          j = j + jb
 *
 *        Trailing submatrix update, where
 *         the row A(J1-1, J2-1:N) stores U(J1, J2+1:N) and
 *         WORK stores the current block of the auxiriarly matrix H
 *
          IF( j.LT.n ) THEN
 *
 *           If first panel and JB=1 (NB=1), then nothing to do
 *
             IF( j1.GT.1 .OR. jb.GT.1 ) THEN
 *
 *              Merge rank-1 update with BLAS-3 update
 *
                alpha = a( j, j+1 )
                a( j, j+1 ) = one
                CALL dcopy( n-j, a( j-1, j+1 ), lda,
      $                          work( (j+1-j1+1)+jb*n ), 1 )
                CALL dscal( n-j, alpha, work( (j+1-j1+1)+jb*n ), 1 )
 *
 *              K1 identifies if the previous column of the panel has been
 *               explicitly stored, e.g., K1=1 and K2= 0 for the first panel,
 *               while K1=0 and K2=1 for the rest
 *
                IF( j1.GT.1 ) THEN
 *
 *                 Not first panel
 *
                   k2 = 1
                ELSE
 *
 *                 First panel
 *
                   k2 = 0
 *
 *                 First update skips the first column
 *
                   jb = jb - 1
                END IF
 *
                DO j2 = j+1, n, nb
                   nj = min( nb, n-j2+1 )
 *
 *                 Update (J2, J2) diagonal block with DGEMV
 *
                   j3 = j2
                   DO mj = nj-1, 1, -1
                      CALL dgemv( 'No transpose', mj, jb+1,
      $                          -one, work( j3-j1+1+k1*n ), n,
      $                                a( j1-k2, j3 ), 1,
      $                           one, a( j3, j3 ), lda )
                      j3 = j3 + 1
                   END DO
 *
 *                 Update off-diagonal block of J2-th block row with DGEMM
 *
                   CALL dgemm( 'Transpose', 'Transpose',
      $                        nj, n-j3+1, jb+1,
      $                       -one, a( j1-k2, j2 ), lda,
      $                             work( j3-j1+1+k1*n ), n,
      $                        one, a( j2, j3 ), lda )
                END DO
 *
 *              Recover T( J, J+1 )
 *
                a( j, j+1 ) = alpha
             END IF
 *
 *           WORK(J+1, 1) stores H(J+1, 1)
 *
             CALL dcopy( n-j, a( j+1, j+1 ), lda, work( 1 ), 1 )
          END IF
          GO TO 10
       ELSE
 *
 *        .....................................................
 *        Factorize A as L*D*L**T using the lower triangle of A
 *        .....................................................
 *
 *        copy first column A(1:N, 1) into H(1:N, 1)
 *         (stored in WORK(1:N))
 *
          CALL dcopy( n, a( 1, 1 ), 1, work( 1 ), 1 )
 *
 *        J is the main loop index, increasing from 1 to N in steps of
 *        JB, where JB is the number of columns factorized by DLASYF;
 *        JB is either NB, or N-J+1 for the last block
 *
          j = 0
  11      CONTINUE
          IF( j.GE.n )
      $      GO TO 20
 *
 *        each step of the main loop
 *         J is the last column of the previous panel
 *         J1 is the first column of the current panel
 *         K1 identifies if the previous column of the panel has been
 *          explicitly stored, e.g., K1=1 for the first panel, and
 *          K1=0 for the rest
 *
          j1 = j+1
          jb = min( n-j1+1, nb )
          k1 = max(1, j)-j
 *
 *        Panel factorization
 *
          CALL dlasyf_aa( uplo, 2-k1, n-j, jb,
      $                      a( j+1, max(1, j) ), lda,
      $                      ipiv( j+1 ), work, n, work( n*nb+1 ), iinfo)
          IF( (iinfo.GT.0) .AND. (info.EQ.0) ) THEN
             info = iinfo+j
          ENDIF
 *
 *        Ajust IPIV and apply it back (J-th step picks (J+1)-th pivot)
 *
          DO j2 = j+2, min(n, j+jb+1)
             ipiv( j2 ) = ipiv( j2 ) + j
             IF( (j2.NE.ipiv(j2)) .AND. ((j1-k1).GT.2) ) THEN
                CALL dswap( j1-k1-2, a( j2, 1 ), lda,
      $                              a( ipiv(j2), 1 ), lda )
             END IF
          END DO
          j = j + jb
 *
 *        Trailing submatrix update, where
 *          A(J2+1, J1-1) stores L(J2+1, J1) and
 *          WORK(J2+1, 1) stores H(J2+1, 1)
 *
          IF( j.LT.n ) THEN
 *
 *           if first panel and JB=1 (NB=1), then nothing to do
 *
             IF( j1.GT.1 .OR. jb.GT.1 ) THEN
 *
 *              Merge rank-1 update with BLAS-3 update
 *
                alpha = a( j+1, j )
                a( j+1, j ) = one
                CALL dcopy( n-j, a( j+1, j-1 ), 1,
      $                          work( (j+1-j1+1)+jb*n ), 1 )
                CALL dscal( n-j, alpha, work( (j+1-j1+1)+jb*n ), 1 )
 *
 *              K1 identifies if the previous column of the panel has been
 *               explicitly stored, e.g., K1=1 and K2= 0 for the first panel,
 *               while K1=0 and K2=1 for the rest
 *
                IF( j1.GT.1 ) THEN
 *
 *                 Not first panel
 *
                   k2 = 1
                ELSE
 *
 *                 First panel
 *
                   k2 = 0
 *
 *                 First update skips the first column
 *
                   jb = jb - 1
                END IF
 *
                DO j2 = j+1, n, nb
                   nj = min( nb, n-j2+1 )
 *
 *                 Update (J2, J2) diagonal block with DGEMV
 *
                   j3 = j2
                   DO mj = nj-1, 1, -1
                      CALL dgemv( 'No transpose', mj, jb+1,
      $                          -one, work( j3-j1+1+k1*n ), n,
      $                                a( j3, j1-k2 ), lda,
      $                           one, a( j3, j3 ), 1 )
                      j3 = j3 + 1
                   END DO
 *
 *                 Update off-diagonal block in J2-th block column with DGEMM
 *
                   CALL dgemm( 'No transpose', 'Transpose',
      $                        n-j3+1, nj, jb+1,
      $                       -one, work( j3-j1+1+k1*n ), n,
      $                             a( j2, j1-k2 ), lda,
      $                        one, a( j3, j2 ), lda )
                END DO
 *
 *              Recover T( J+1, J )
 *
                a( j+1, j ) = alpha
             END IF
 *
 *           WORK(J+1, 1) stores H(J+1, 1)
 *
             CALL dcopy( n-j, a( j+1, j+1 ), 1, work( 1 ), 1 )
          END IF
          GO TO 11
       END IF
 *
    20 CONTINUE
       RETURN
 *
 *     End of DSYTRF_AA
 *

Here is the call graph for this function:

Here is the caller graph for this function: