LAPACK  3.7.0
LAPACK: Linear Algebra PACKage
subroutine chetrf_aa ( character  UPLO,
integer  N,
complex, dimension( lda, * )  A,
integer  LDA,
integer, dimension( * )  IPIV,
complex, dimension( * )  WORK,
integer  LWORK,
integer  INFO 
)

CHETRF_AA

Download CHETRF_AA + dependencies [TGZ] [ZIP] [TXT]

Purpose:
 CHETRF_AA computes the factorization of a complex hermitian matrix A
 using the Aasen's algorithm.  The form of the factorization is

    A = U*T*U**H  or  A = L*T*L**H

 where U (or L) is a product of permutation and unit upper (lower)
 triangular matrices, and T is a hermitian tridiagonal matrix.

 This is the blocked version of the algorithm, calling Level 3 BLAS.
Parameters
[in]UPLO
          UPLO is CHARACTER*1
          = 'U':  Upper triangle of A is stored;
          = 'L':  Lower triangle of A is stored.
[in]N
          N is INTEGER
          The order of the matrix A.  N >= 0.
[in,out]A
          A is COMPLEX array, dimension (LDA,N)
          On entry, the hermitian matrix A.  If UPLO = 'U', the leading
          N-by-N upper triangular part of A contains the upper
          triangular part of the matrix A, and the strictly lower
          triangular part of A is not referenced.  If UPLO = 'L', the
          leading N-by-N lower triangular part of A contains the lower
          triangular part of the matrix A, and the strictly upper
          triangular part of A is not referenced.

          On exit, the tridiagonal matrix is stored in the diagonals
          and the subdiagonals of A just below (or above) the diagonals,
          and L is stored below (or above) the subdiaonals, when UPLO
          is 'L' (or 'U').
[in]LDA
          LDA is INTEGER
          The leading dimension of the array A.  LDA >= max(1,N).
[out]IPIV
          IPIV is INTEGER array, dimension (N)
          On exit, it contains the details of the interchanges, i.e.,
          the row and column k of A were interchanged with the
          row and column IPIV(k).
[out]WORK
          WORK is COMPLEX array, dimension (MAX(1,LWORK))
          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
[in]LWORK
          LWORK is INTEGER
          The length of WORK.  LWORK >= 2*N. For optimum performance
          LWORK >= N*(1+NB), where NB is the optimal blocksize.

          If LWORK = -1, then a workspace query is assumed; the routine
          only calculates the optimal size of the WORK array, returns
          this value as the first entry of the WORK array, and no error
          message related to LWORK is issued by XERBLA.
[out]INFO
          INFO is INTEGER
          = 0:  successful exit
          < 0:  if INFO = -i, the i-th argument had an illegal value
          > 0:  if INFO = i, D(i,i) is exactly zero.  The factorization
                has been completed, but the block diagonal matrix D is
                exactly singular, and division by zero will occur if it
                is used to solve a system of equations.
Author
Univ. of Tennessee
Univ. of California Berkeley
Univ. of Colorado Denver
NAG Ltd.
Date
December 2016

Definition at line 138 of file chetrf_aa.f.

138 *
139 * -- LAPACK computational routine (version 3.7.0) --
140 * -- LAPACK is a software package provided by Univ. of Tennessee, --
141 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
142 * December 2016
143 *
144  IMPLICIT NONE
145 *
146 * .. Scalar Arguments ..
147  CHARACTER uplo
148  INTEGER n, lda, lwork, info
149 * ..
150 * .. Array Arguments ..
151  INTEGER ipiv( * )
152  COMPLEX a( lda, * ), work( * )
153 * ..
154 *
155 * =====================================================================
156 * .. Parameters ..
157  COMPLEX zero, one
158  parameter ( zero = (0.0e+0, 0.0e+0), one = (1.0e+0, 0.0e+0) )
159 *
160 * .. Local Scalars ..
161  LOGICAL lquery, upper
162  INTEGER j, lwkopt, iinfo
163  INTEGER nb, mj, nj, k1, k2, j1, j2, j3, jb
164  COMPLEX alpha
165 * ..
166 * .. External Functions ..
167  LOGICAL lsame
168  INTEGER ilaenv
169  EXTERNAL lsame, ilaenv
170 * ..
171 * .. External Subroutines ..
172  EXTERNAL xerbla
173 * ..
174 * .. Intrinsic Functions ..
175  INTRINSIC REAL, conjg, max
176 * ..
177 * .. Executable Statements ..
178 *
179 * Determine the block size
180 *
181  nb = ilaenv( 1, 'CHETRF', uplo, n, -1, -1, -1 )
182 *
183 * Test the input parameters.
184 *
185  info = 0
186  upper = lsame( uplo, 'U' )
187  lquery = ( lwork.EQ.-1 )
188  IF( .NOT.upper .AND. .NOT.lsame( uplo, 'L' ) ) THEN
189  info = -1
190  ELSE IF( n.LT.0 ) THEN
191  info = -2
192  ELSE IF( lda.LT.max( 1, n ) ) THEN
193  info = -4
194  ELSE IF( lwork.LT.( 2*n ) .AND. .NOT.lquery ) THEN
195  info = -7
196  END IF
197 *
198  IF( info.EQ.0 ) THEN
199  lwkopt = (nb+1)*n
200  work( 1 ) = lwkopt
201  END IF
202 *
203  IF( info.NE.0 ) THEN
204  CALL xerbla( 'CHETRF_AA', -info )
205  RETURN
206  ELSE IF( lquery ) THEN
207  RETURN
208  END IF
209 *
210 * Quick return
211 *
212  IF ( n.EQ.0 ) THEN
213  RETURN
214  ENDIF
215  ipiv( 1 ) = 1
216  IF ( n.EQ.1 ) THEN
217  a( 1, 1 ) = REAL( A( 1, 1 ) )
218  IF ( a( 1, 1 ).EQ.zero ) THEN
219  info = 1
220  END IF
221  RETURN
222  END IF
223 *
224 * Adjubst block size based on the workspace size
225 *
226  IF( lwork.LT.((1+nb)*n) ) THEN
227  nb = ( lwork-n ) / n
228  END IF
229 *
230  IF( upper ) THEN
231 *
232 * .....................................................
233 * Factorize A as L*D*L**H using the upper triangle of A
234 * .....................................................
235 *
236 * copy first row A(1, 1:N) into H(1:n) (stored in WORK(1:N))
237 *
238  CALL ccopy( n, a( 1, 1 ), lda, work( 1 ), 1 )
239 *
240 * J is the main loop index, increasing from 1 to N in steps of
241 * JB, where JB is the number of columns factorized by CLAHEF;
242 * JB is either NB, or N-J+1 for the last block
243 *
244  j = 0
245  10 CONTINUE
246  IF( j.GE.n )
247  $ GO TO 20
248 *
249 * each step of the main loop
250 * J is the last column of the previous panel
251 * J1 is the first column of the current panel
252 * K1 identifies if the previous column of the panel has been
253 * explicitly stored, e.g., K1=1 for the first panel, and
254 * K1=0 for the rest
255 *
256  j1 = j + 1
257  jb = min( n-j1+1, nb )
258  k1 = max(1, j)-j
259 *
260 * Panel factorization
261 *
262  CALL clahef_aa( uplo, 2-k1, n-j, jb,
263  $ a( max(1, j), j+1 ), lda,
264  $ ipiv( j+1 ), work, n, work( n*nb+1 ),
265  $ iinfo )
266  IF( (iinfo.GT.0) .AND. (info.EQ.0) ) THEN
267  info = iinfo+j
268  ENDIF
269 *
270 * Ajust IPIV and apply it back (J-th step picks (J+1)-th pivot)
271 *
272  DO j2 = j+2, min(n, j+jb+1)
273  ipiv( j2 ) = ipiv( j2 ) + j
274  IF( (j2.NE.ipiv(j2)) .AND. ((j1-k1).GT.2) ) THEN
275  CALL cswap( j1-k1-2, a( 1, j2 ), 1,
276  $ a( 1, ipiv(j2) ), 1 )
277  END IF
278  END DO
279  j = j + jb
280 *
281 * Trailing submatrix update, where
282 * the row A(J1-1, J2-1:N) stores U(J1, J2+1:N) and
283 * WORK stores the current block of the auxiriarly matrix H
284 *
285  IF( j.LT.n ) THEN
286 *
287 * if the first panel and JB=1 (NB=1), then nothing to do
288 *
289  IF( j1.GT.1 .OR. jb.GT.1 ) THEN
290 *
291 * Merge rank-1 update with BLAS-3 update
292 *
293  alpha = conjg( a( j, j+1 ) )
294  a( j, j+1 ) = one
295  CALL ccopy( n-j, a( j-1, j+1 ), lda,
296  $ work( (j+1-j1+1)+jb*n ), 1 )
297  CALL cscal( n-j, alpha, work( (j+1-j1+1)+jb*n ), 1 )
298 *
299 * K1 identifies if the previous column of the panel has been
300 * explicitly stored, e.g., K1=0 and K2=1 for the first panel,
301 * and K1=1 and K2=0 for the rest
302 *
303  IF( j1.GT.1 ) THEN
304 *
305 * Not first panel
306 *
307  k2 = 1
308  ELSE
309 *
310 * First panel
311 *
312  k2 = 0
313 *
314 * First update skips the first column
315 *
316  jb = jb - 1
317  END IF
318 *
319  DO j2 = j+1, n, nb
320  nj = min( nb, n-j2+1 )
321 *
322 * Update (J2, J2) diagonal block with CGEMV
323 *
324  j3 = j2
325  DO mj = nj-1, 1, -1
326  CALL cgemm( 'Conjugate transpose', 'Transpose',
327  $ 1, mj, jb+1,
328  $ -one, a( j1-k2, j3 ), lda,
329  $ work( (j3-j1+1)+k1*n ), n,
330  $ one, a( j3, j3 ), lda )
331  j3 = j3 + 1
332  END DO
333 *
334 * Update off-diagonal block of J2-th block row with CGEMM
335 *
336  CALL cgemm( 'Conjugate transpose', 'Transpose',
337  $ nj, n-j3+1, jb+1,
338  $ -one, a( j1-k2, j2 ), lda,
339  $ work( (j3-j1+1)+k1*n ), n,
340  $ one, a( j2, j3 ), lda )
341  END DO
342 *
343 * Recover T( J, J+1 )
344 *
345  a( j, j+1 ) = conjg( alpha )
346  END IF
347 *
348 * WORK(J+1, 1) stores H(J+1, 1)
349 *
350  CALL ccopy( n-j, a( j+1, j+1 ), lda, work( 1 ), 1 )
351  END IF
352  GO TO 10
353  ELSE
354 *
355 * .....................................................
356 * Factorize A as L*D*L**H using the lower triangle of A
357 * .....................................................
358 *
359 * copy first column A(1:N, 1) into H(1:N, 1)
360 * (stored in WORK(1:N))
361 *
362  CALL ccopy( n, a( 1, 1 ), 1, work( 1 ), 1 )
363 *
364 * J is the main loop index, increasing from 1 to N in steps of
365 * JB, where JB is the number of columns factorized by CLAHEF;
366 * JB is either NB, or N-J+1 for the last block
367 *
368  j = 0
369  11 CONTINUE
370  IF( j.GE.n )
371  $ GO TO 20
372 *
373 * each step of the main loop
374 * J is the last column of the previous panel
375 * J1 is the first column of the current panel
376 * K1 identifies if the previous column of the panel has been
377 * explicitly stored, e.g., K1=1 for the first panel, and
378 * K1=0 for the rest
379 *
380  j1 = j+1
381  jb = min( n-j1+1, nb )
382  k1 = max(1, j)-j
383 *
384 * Panel factorization
385 *
386  CALL clahef_aa( uplo, 2-k1, n-j, jb,
387  $ a( j+1, max(1, j) ), lda,
388  $ ipiv( j+1 ), work, n, work( n*nb+1 ), iinfo)
389  IF( (iinfo.GT.0) .AND. (info.EQ.0) ) THEN
390  info = iinfo+j
391  ENDIF
392 *
393 * Ajust IPIV and apply it back (J-th step picks (J+1)-th pivot)
394 *
395  DO j2 = j+2, min(n, j+jb+1)
396  ipiv( j2 ) = ipiv( j2 ) + j
397  IF( (j2.NE.ipiv(j2)) .AND. ((j1-k1).GT.2) ) THEN
398  CALL cswap( j1-k1-2, a( j2, 1 ), lda,
399  $ a( ipiv(j2), 1 ), lda )
400  END IF
401  END DO
402  j = j + jb
403 *
404 * Trailing submatrix update, where
405 * A(J2+1, J1-1) stores L(J2+1, J1) and
406 * WORK(J2+1, 1) stores H(J2+1, 1)
407 *
408  IF( j.LT.n ) THEN
409 *
410 * if the first panel and JB=1 (NB=1), then nothing to do
411 *
412  IF( j1.GT.1 .OR. jb.GT.1 ) THEN
413 *
414 * Merge rank-1 update with BLAS-3 update
415 *
416  alpha = conjg( a( j+1, j ) )
417  a( j+1, j ) = one
418  CALL ccopy( n-j, a( j+1, j-1 ), 1,
419  $ work( (j+1-j1+1)+jb*n ), 1 )
420  CALL cscal( n-j, alpha, work( (j+1-j1+1)+jb*n ), 1 )
421 *
422 * K1 identifies if the previous column of the panel has been
423 * explicitly stored, e.g., K1=0 and K2=1 for the first panel,
424 * and K1=1 and K2=0 for the rest
425 *
426  IF( j1.GT.1 ) THEN
427 *
428 * Not first panel
429 *
430  k2 = 1
431  ELSE
432 *
433 * First panel
434 *
435  k2 = 0
436 *
437 * First update skips the first column
438 *
439  jb = jb - 1
440  END IF
441 *
442  DO j2 = j+1, n, nb
443  nj = min( nb, n-j2+1 )
444 *
445 * Update (J2, J2) diagonal block with CGEMV
446 *
447  j3 = j2
448  DO mj = nj-1, 1, -1
449  CALL cgemm( 'No transpose', 'Conjugate transpose',
450  $ mj, 1, jb+1,
451  $ -one, work( (j3-j1+1)+k1*n ), n,
452  $ a( j3, j1-k2 ), lda,
453  $ one, a( j3, j3 ), lda )
454  j3 = j3 + 1
455  END DO
456 *
457 * Update off-diagonal block of J2-th block column with CGEMM
458 *
459  CALL cgemm( 'No transpose', 'Conjugate transpose',
460  $ n-j3+1, nj, jb+1,
461  $ -one, work( (j3-j1+1)+k1*n ), n,
462  $ a( j2, j1-k2 ), lda,
463  $ one, a( j3, j2 ), lda )
464  END DO
465 *
466 * Recover T( J+1, J )
467 *
468  a( j+1, j ) = conjg( alpha )
469  END IF
470 *
471 * WORK(J+1, 1) stores H(J+1, 1)
472 *
473  CALL ccopy( n-j, a( j+1, j+1 ), 1, work( 1 ), 1 )
474  END IF
475  GO TO 11
476  END IF
477 *
478  20 CONTINUE
479  RETURN
480 *
481 * End of CHETRF_AA
482 *
integer function ilaenv(ISPEC, NAME, OPTS, N1, N2, N3, N4)
ILAENV
Definition: tstiee.f:83
subroutine cscal(N, CA, CX, INCX)
CSCAL
Definition: cscal.f:54
subroutine clahef_aa(UPLO, J1, M, NB, A, LDA, IPIV, H, LDH, WORK, INFO)
CLAHEF_AA
Definition: clahef_aa.f:156
subroutine xerbla(SRNAME, INFO)
XERBLA
Definition: xerbla.f:62
logical function lsame(CA, CB)
LSAME
Definition: lsame.f:55
subroutine ccopy(N, CX, INCX, CY, INCY)
CCOPY
Definition: ccopy.f:52
subroutine cswap(N, CX, INCX, CY, INCY)
CSWAP
Definition: cswap.f:52
subroutine cgemm(TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC)
CGEMM
Definition: cgemm.f:189

Here is the call graph for this function:

Here is the caller graph for this function: