LAPACK  3.7.0
LAPACK: Linear Algebra PACKage
subroutine dsytrf_aa ( character  UPLO,
integer  N,
double precision, dimension( lda, * )  A,
integer  LDA,
integer, dimension( * )  IPIV,
double precision, dimension( * )  WORK,
integer  LWORK,
integer  INFO 
)

DSYTRF_AA

Download DSYTRF_AA + dependencies [TGZ] [ZIP] [TXT]

Purpose:
 DSYTRF_AA computes the factorization of a real symmetric matrix A
 using the Aasen's algorithm.  The form of the factorization is

    A = U*T*U**T  or  A = L*T*L**T

 where U (or L) is a product of permutation and unit upper (lower)
 triangular matrices, and T is a symmetric tridiagonal matrix.

 This is the blocked version of the algorithm, calling Level 3 BLAS.
Parameters
[in]UPLO
          UPLO is CHARACTER*1
          = 'U':  Upper triangle of A is stored;
          = 'L':  Lower triangle of A is stored.
[in]N
          N is INTEGER
          The order of the matrix A.  N >= 0.
[in,out]A
          A is DOUBLE PRECISION array, dimension (LDA,N)
          On entry, the symmetric matrix A.  If UPLO = 'U', the leading
          N-by-N upper triangular part of A contains the upper
          triangular part of the matrix A, and the strictly lower
          triangular part of A is not referenced.  If UPLO = 'L', the
          leading N-by-N lower triangular part of A contains the lower
          triangular part of the matrix A, and the strictly upper
          triangular part of A is not referenced.

          On exit, the tridiagonal matrix is stored in the diagonals
          and the subdiagonals of A just below (or above) the diagonals,
          and L is stored below (or above) the subdiaonals, when UPLO
          is 'L' (or 'U').
[in]LDA
          LDA is INTEGER
          The leading dimension of the array A.  LDA >= max(1,N).
[out]IPIV
          IPIV is INTEGER array, dimension (N)
          On exit, it contains the details of the interchanges, i.e.,
          the row and column k of A were interchanged with the
          row and column IPIV(k).
[out]WORK
          WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK))
          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
[in]LWORK
          LWORK is INTEGER
          The length of WORK.  LWORK >= MAX(1,2*N). For optimum performance
          LWORK >= N*(1+NB), where NB is the optimal blocksize.

          If LWORK = -1, then a workspace query is assumed; the routine
          only calculates the optimal size of the WORK array, returns
          this value as the first entry of the WORK array, and no error
          message related to LWORK is issued by XERBLA.
[out]INFO
          INFO is INTEGER
          = 0:  successful exit
          < 0:  if INFO = -i, the i-th argument had an illegal value
          > 0:  if INFO = i, D(i,i) is exactly zero.  The factorization
                has been completed, but the block diagonal matrix D is
                exactly singular, and division by zero will occur if it
                is used to solve a system of equations.
Author
Univ. of Tennessee
Univ. of California Berkeley
Univ. of Colorado Denver
NAG Ltd.
Date
December 2016

Definition at line 138 of file dsytrf_aa.f.

138 *
139 * -- LAPACK computational routine (version 3.7.0) --
140 * -- LAPACK is a software package provided by Univ. of Tennessee, --
141 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
142 * December 2016
143 *
144  IMPLICIT NONE
145 *
146 * .. Scalar Arguments ..
147  CHARACTER uplo
148  INTEGER n, lda, lwork, info
149 * ..
150 * .. Array Arguments ..
151  INTEGER ipiv( * )
152  DOUBLE PRECISION a( lda, * ), work( * )
153 * ..
154 *
155 * =====================================================================
156 * .. Parameters ..
157  DOUBLE PRECISION zero, one
158  parameter ( zero = 0.0d+0, one = 1.0d+0 )
159 *
160 * .. Local Scalars ..
161  LOGICAL lquery, upper
162  INTEGER j, lwkopt, iinfo
163  INTEGER nb, mj, nj, k1, k2, j1, j2, j3, jb
164  DOUBLE PRECISION alpha
165 * ..
166 * .. External Functions ..
167  LOGICAL lsame
168  INTEGER ilaenv
169  EXTERNAL lsame, ilaenv
170 * ..
171 * .. External Subroutines ..
172  EXTERNAL xerbla
173 * ..
174 * .. Intrinsic Functions ..
175  INTRINSIC max
176 * ..
177 * .. Executable Statements ..
178 *
179 * Determine the block size
180 *
181  nb = ilaenv( 1, 'DSYTRF', uplo, n, -1, -1, -1 )
182 *
183 * Test the input parameters.
184 *
185  info = 0
186  upper = lsame( uplo, 'U' )
187  lquery = ( lwork.EQ.-1 )
188  IF( .NOT.upper .AND. .NOT.lsame( uplo, 'L' ) ) THEN
189  info = -1
190  ELSE IF( n.LT.0 ) THEN
191  info = -2
192  ELSE IF( lda.LT.max( 1, n ) ) THEN
193  info = -4
194  ELSE IF( lwork.LT.max( 1, 2*n ) .AND. .NOT.lquery ) THEN
195  info = -7
196  END IF
197 *
198  IF( info.EQ.0 ) THEN
199  lwkopt = (nb+1)*n
200  work( 1 ) = lwkopt
201  END IF
202 *
203  IF( info.NE.0 ) THEN
204  CALL xerbla( 'DSYTRF_AA', -info )
205  RETURN
206  ELSE IF( lquery ) THEN
207  RETURN
208  END IF
209 *
210 * Quick return
211 *
212  IF ( n.EQ.0 ) THEN
213  RETURN
214  ENDIF
215  ipiv( 1 ) = 1
216  IF ( n.EQ.1 ) THEN
217  IF ( a( 1, 1 ).EQ.zero ) THEN
218  info = 1
219  END IF
220  RETURN
221  END IF
222 *
223 * Adjubst block size based on the workspace size
224 *
225  IF( lwork.LT.((1+nb)*n) ) THEN
226  nb = ( lwork-n ) / n
227  END IF
228 *
229  IF( upper ) THEN
230 *
231 * .....................................................
232 * Factorize A as L*D*L**T using the upper triangle of A
233 * .....................................................
234 *
235 * Copy first row A(1, 1:N) into H(1:n) (stored in WORK(1:N))
236 *
237  CALL dcopy( n, a( 1, 1 ), lda, work( 1 ), 1 )
238 *
239 * J is the main loop index, increasing from 1 to N in steps of
240 * JB, where JB is the number of columns factorized by DLASYF;
241 * JB is either NB, or N-J+1 for the last block
242 *
243  j = 0
244  10 CONTINUE
245  IF( j.GE.n )
246  $ GO TO 20
247 *
248 * each step of the main loop
249 * J is the last column of the previous panel
250 * J1 is the first column of the current panel
251 * K1 identifies if the previous column of the panel has been
252 * explicitly stored, e.g., K1=1 for the first panel, and
253 * K1=0 for the rest
254 *
255  j1 = j + 1
256  jb = min( n-j1+1, nb )
257  k1 = max(1, j)-j
258 *
259 * Panel factorization
260 *
261  CALL dlasyf_aa( uplo, 2-k1, n-j, jb,
262  $ a( max(1, j), j+1 ), lda,
263  $ ipiv( j+1 ), work, n, work( n*nb+1 ),
264  $ iinfo )
265  IF( (iinfo.GT.0) .AND. (info.EQ.0) ) THEN
266  info = iinfo+j
267  ENDIF
268 *
269 * Ajust IPIV and apply it back (J-th step picks (J+1)-th pivot)
270 *
271  DO j2 = j+2, min(n, j+jb+1)
272  ipiv( j2 ) = ipiv( j2 ) + j
273  IF( (j2.NE.ipiv(j2)) .AND. ((j1-k1).GT.2) ) THEN
274  CALL dswap( j1-k1-2, a( 1, j2 ), 1,
275  $ a( 1, ipiv(j2) ), 1 )
276  END IF
277  END DO
278  j = j + jb
279 *
280 * Trailing submatrix update, where
281 * the row A(J1-1, J2-1:N) stores U(J1, J2+1:N) and
282 * WORK stores the current block of the auxiriarly matrix H
283 *
284  IF( j.LT.n ) THEN
285 *
286 * If first panel and JB=1 (NB=1), then nothing to do
287 *
288  IF( j1.GT.1 .OR. jb.GT.1 ) THEN
289 *
290 * Merge rank-1 update with BLAS-3 update
291 *
292  alpha = a( j, j+1 )
293  a( j, j+1 ) = one
294  CALL dcopy( n-j, a( j-1, j+1 ), lda,
295  $ work( (j+1-j1+1)+jb*n ), 1 )
296  CALL dscal( n-j, alpha, work( (j+1-j1+1)+jb*n ), 1 )
297 *
298 * K1 identifies if the previous column of the panel has been
299 * explicitly stored, e.g., K1=1 and K2= 0 for the first panel,
300 * while K1=0 and K2=1 for the rest
301 *
302  IF( j1.GT.1 ) THEN
303 *
304 * Not first panel
305 *
306  k2 = 1
307  ELSE
308 *
309 * First panel
310 *
311  k2 = 0
312 *
313 * First update skips the first column
314 *
315  jb = jb - 1
316  END IF
317 *
318  DO j2 = j+1, n, nb
319  nj = min( nb, n-j2+1 )
320 *
321 * Update (J2, J2) diagonal block with DGEMV
322 *
323  j3 = j2
324  DO mj = nj-1, 1, -1
325  CALL dgemv( 'No transpose', mj, jb+1,
326  $ -one, work( j3-j1+1+k1*n ), n,
327  $ a( j1-k2, j3 ), 1,
328  $ one, a( j3, j3 ), lda )
329  j3 = j3 + 1
330  END DO
331 *
332 * Update off-diagonal block of J2-th block row with DGEMM
333 *
334  CALL dgemm( 'Transpose', 'Transpose',
335  $ nj, n-j3+1, jb+1,
336  $ -one, a( j1-k2, j2 ), lda,
337  $ work( j3-j1+1+k1*n ), n,
338  $ one, a( j2, j3 ), lda )
339  END DO
340 *
341 * Recover T( J, J+1 )
342 *
343  a( j, j+1 ) = alpha
344  END IF
345 *
346 * WORK(J+1, 1) stores H(J+1, 1)
347 *
348  CALL dcopy( n-j, a( j+1, j+1 ), lda, work( 1 ), 1 )
349  END IF
350  GO TO 10
351  ELSE
352 *
353 * .....................................................
354 * Factorize A as L*D*L**T using the lower triangle of A
355 * .....................................................
356 *
357 * copy first column A(1:N, 1) into H(1:N, 1)
358 * (stored in WORK(1:N))
359 *
360  CALL dcopy( n, a( 1, 1 ), 1, work( 1 ), 1 )
361 *
362 * J is the main loop index, increasing from 1 to N in steps of
363 * JB, where JB is the number of columns factorized by DLASYF;
364 * JB is either NB, or N-J+1 for the last block
365 *
366  j = 0
367  11 CONTINUE
368  IF( j.GE.n )
369  $ GO TO 20
370 *
371 * each step of the main loop
372 * J is the last column of the previous panel
373 * J1 is the first column of the current panel
374 * K1 identifies if the previous column of the panel has been
375 * explicitly stored, e.g., K1=1 for the first panel, and
376 * K1=0 for the rest
377 *
378  j1 = j+1
379  jb = min( n-j1+1, nb )
380  k1 = max(1, j)-j
381 *
382 * Panel factorization
383 *
384  CALL dlasyf_aa( uplo, 2-k1, n-j, jb,
385  $ a( j+1, max(1, j) ), lda,
386  $ ipiv( j+1 ), work, n, work( n*nb+1 ), iinfo)
387  IF( (iinfo.GT.0) .AND. (info.EQ.0) ) THEN
388  info = iinfo+j
389  ENDIF
390 *
391 * Ajust IPIV and apply it back (J-th step picks (J+1)-th pivot)
392 *
393  DO j2 = j+2, min(n, j+jb+1)
394  ipiv( j2 ) = ipiv( j2 ) + j
395  IF( (j2.NE.ipiv(j2)) .AND. ((j1-k1).GT.2) ) THEN
396  CALL dswap( j1-k1-2, a( j2, 1 ), lda,
397  $ a( ipiv(j2), 1 ), lda )
398  END IF
399  END DO
400  j = j + jb
401 *
402 * Trailing submatrix update, where
403 * A(J2+1, J1-1) stores L(J2+1, J1) and
404 * WORK(J2+1, 1) stores H(J2+1, 1)
405 *
406  IF( j.LT.n ) THEN
407 *
408 * if first panel and JB=1 (NB=1), then nothing to do
409 *
410  IF( j1.GT.1 .OR. jb.GT.1 ) THEN
411 *
412 * Merge rank-1 update with BLAS-3 update
413 *
414  alpha = a( j+1, j )
415  a( j+1, j ) = one
416  CALL dcopy( n-j, a( j+1, j-1 ), 1,
417  $ work( (j+1-j1+1)+jb*n ), 1 )
418  CALL dscal( n-j, alpha, work( (j+1-j1+1)+jb*n ), 1 )
419 *
420 * K1 identifies if the previous column of the panel has been
421 * explicitly stored, e.g., K1=1 and K2= 0 for the first panel,
422 * while K1=0 and K2=1 for the rest
423 *
424  IF( j1.GT.1 ) THEN
425 *
426 * Not first panel
427 *
428  k2 = 1
429  ELSE
430 *
431 * First panel
432 *
433  k2 = 0
434 *
435 * First update skips the first column
436 *
437  jb = jb - 1
438  END IF
439 *
440  DO j2 = j+1, n, nb
441  nj = min( nb, n-j2+1 )
442 *
443 * Update (J2, J2) diagonal block with DGEMV
444 *
445  j3 = j2
446  DO mj = nj-1, 1, -1
447  CALL dgemv( 'No transpose', mj, jb+1,
448  $ -one, work( j3-j1+1+k1*n ), n,
449  $ a( j3, j1-k2 ), lda,
450  $ one, a( j3, j3 ), 1 )
451  j3 = j3 + 1
452  END DO
453 *
454 * Update off-diagonal block in J2-th block column with DGEMM
455 *
456  CALL dgemm( 'No transpose', 'Transpose',
457  $ n-j3+1, nj, jb+1,
458  $ -one, work( j3-j1+1+k1*n ), n,
459  $ a( j2, j1-k2 ), lda,
460  $ one, a( j3, j2 ), lda )
461  END DO
462 *
463 * Recover T( J+1, J )
464 *
465  a( j+1, j ) = alpha
466  END IF
467 *
468 * WORK(J+1, 1) stores H(J+1, 1)
469 *
470  CALL dcopy( n-j, a( j+1, j+1 ), 1, work( 1 ), 1 )
471  END IF
472  GO TO 11
473  END IF
474 *
475  20 CONTINUE
476  RETURN
477 *
478 * End of DSYTRF_AA
479 *
subroutine dcopy(N, DX, INCX, DY, INCY)
DCOPY
Definition: dcopy.f:53
subroutine dgemv(TRANS, M, N, ALPHA, A, LDA, X, INCX, BETA, Y, INCY)
DGEMV
Definition: dgemv.f:158
subroutine dgemm(TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC)
DGEMM
Definition: dgemm.f:189
integer function ilaenv(ISPEC, NAME, OPTS, N1, N2, N3, N4)
ILAENV
Definition: tstiee.f:83
subroutine dswap(N, DX, INCX, DY, INCY)
DSWAP
Definition: dswap.f:53
subroutine xerbla(SRNAME, INFO)
XERBLA
Definition: xerbla.f:62
logical function lsame(CA, CB)
LSAME
Definition: lsame.f:55
subroutine dscal(N, DA, DX, INCX)
DSCAL
Definition: dscal.f:55
subroutine dlasyf_aa(UPLO, J1, M, NB, A, LDA, IPIV, H, LDH, WORK, INFO)
DLASYF_AA
Definition: dlasyf_aa.f:156

Here is the call graph for this function:

Here is the caller graph for this function: