LAPACK  3.7.0
LAPACK: Linear Algebra PACKage
chetrf_aa.f
Go to the documentation of this file.
1 *> \brief \b CHETRF_AA
2 *
3 * =========== DOCUMENTATION ===========
4 *
5 * Online html documentation available at
6 * http://www.netlib.org/lapack/explore-html/
7 *
8 *> \htmlonly
9 *> Download CHETRF_AA + dependencies
10 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/chetrf_aa.f">
11 *> [TGZ]</a>
12 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/chetrf_aa.f">
13 *> [ZIP]</a>
14 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/chetrf_aa.f">
15 *> [TXT]</a>
16 *> \endhtmlonly
17 *
18 * Definition:
19 * ===========
20 *
21 * SUBROUTINE CHETRF_AA( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO )
22 *
23 * .. Scalar Arguments ..
24 * CHARACTER UPLO
25 * INTEGER N, LDA, LWORK, INFO
26 * ..
27 * .. Array Arguments ..
28 * INTEGER IPIV( * )
29 * COMPLEX A( LDA, * ), WORK( * )
30 * ..
31 *
32 *> \par Purpose:
33 * =============
34 *>
35 *> \verbatim
36 *>
37 *> CHETRF_AA computes the factorization of a complex hermitian matrix A
38 *> using the Aasen's algorithm. The form of the factorization is
39 *>
40 *> A = U*T*U**H or A = L*T*L**H
41 *>
42 *> where U (or L) is a product of permutation and unit upper (lower)
43 *> triangular matrices, and T is a hermitian tridiagonal matrix.
44 *>
45 *> This is the blocked version of the algorithm, calling Level 3 BLAS.
46 *> \endverbatim
47 *
48 * Arguments:
49 * ==========
50 *
51 *> \param[in] UPLO
52 *> \verbatim
53 *> UPLO is CHARACTER*1
54 *> = 'U': Upper triangle of A is stored;
55 *> = 'L': Lower triangle of A is stored.
56 *> \endverbatim
57 *>
58 *> \param[in] N
59 *> \verbatim
60 *> N is INTEGER
61 *> The order of the matrix A. N >= 0.
62 *> \endverbatim
63 *>
64 *> \param[in,out] A
65 *> \verbatim
66 *> A is COMPLEX array, dimension (LDA,N)
67 *> On entry, the hermitian matrix A. If UPLO = 'U', the leading
68 *> N-by-N upper triangular part of A contains the upper
69 *> triangular part of the matrix A, and the strictly lower
70 *> triangular part of A is not referenced. If UPLO = 'L', the
71 *> leading N-by-N lower triangular part of A contains the lower
72 *> triangular part of the matrix A, and the strictly upper
73 *> triangular part of A is not referenced.
74 *>
75 *> On exit, the tridiagonal matrix is stored in the diagonals
76 *> and the subdiagonals of A just below (or above) the diagonals,
77 *> and L is stored below (or above) the subdiaonals, when UPLO
78 *> is 'L' (or 'U').
79 *> \endverbatim
80 *>
81 *> \param[in] LDA
82 *> \verbatim
83 *> LDA is INTEGER
84 *> The leading dimension of the array A. LDA >= max(1,N).
85 *> \endverbatim
86 *>
87 *> \param[out] IPIV
88 *> \verbatim
89 *> IPIV is INTEGER array, dimension (N)
90 *> On exit, it contains the details of the interchanges, i.e.,
91 *> the row and column k of A were interchanged with the
92 *> row and column IPIV(k).
93 *> \endverbatim
94 *>
95 *> \param[out] WORK
96 *> \verbatim
97 *> WORK is COMPLEX array, dimension (MAX(1,LWORK))
98 *> On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
99 *> \endverbatim
100 *>
101 *> \param[in] LWORK
102 *> \verbatim
103 *> LWORK is INTEGER
104 *> The length of WORK. LWORK >= 2*N. For optimum performance
105 *> LWORK >= N*(1+NB), where NB is the optimal blocksize.
106 *>
107 *> If LWORK = -1, then a workspace query is assumed; the routine
108 *> only calculates the optimal size of the WORK array, returns
109 *> this value as the first entry of the WORK array, and no error
110 *> message related to LWORK is issued by XERBLA.
111 *> \endverbatim
112 *>
113 *> \param[out] INFO
114 *> \verbatim
115 *> INFO is INTEGER
116 *> = 0: successful exit
117 *> < 0: if INFO = -i, the i-th argument had an illegal value
118 *> > 0: if INFO = i, D(i,i) is exactly zero. The factorization
119 *> has been completed, but the block diagonal matrix D is
120 *> exactly singular, and division by zero will occur if it
121 *> is used to solve a system of equations.
122 *> \endverbatim
123 *
124 * Authors:
125 * ========
126 *
127 *> \author Univ. of Tennessee
128 *> \author Univ. of California Berkeley
129 *> \author Univ. of Colorado Denver
130 *> \author NAG Ltd.
131 *
132 *> \date December 2016
133 *
134 *> \ingroup complexHEcomputational
135 *
136 * =====================================================================
137  SUBROUTINE chetrf_aa( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO)
138 *
139 * -- LAPACK computational routine (version 3.7.0) --
140 * -- LAPACK is a software package provided by Univ. of Tennessee, --
141 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
142 * December 2016
143 *
144  IMPLICIT NONE
145 *
146 * .. Scalar Arguments ..
147  CHARACTER UPLO
148  INTEGER N, LDA, LWORK, INFO
149 * ..
150 * .. Array Arguments ..
151  INTEGER IPIV( * )
152  COMPLEX A( lda, * ), WORK( * )
153 * ..
154 *
155 * =====================================================================
156 * .. Parameters ..
157  COMPLEX ZERO, ONE
158  parameter ( zero = (0.0e+0, 0.0e+0), one = (1.0e+0, 0.0e+0) )
159 *
160 * .. Local Scalars ..
161  LOGICAL LQUERY, UPPER
162  INTEGER J, LWKOPT, IINFO
163  INTEGER NB, MJ, NJ, K1, K2, J1, J2, J3, JB
164  COMPLEX ALPHA
165 * ..
166 * .. External Functions ..
167  LOGICAL LSAME
168  INTEGER ILAENV
169  EXTERNAL lsame, ilaenv
170 * ..
171 * .. External Subroutines ..
172  EXTERNAL xerbla
173 * ..
174 * .. Intrinsic Functions ..
175  INTRINSIC REAL, CONJG, MAX
176 * ..
177 * .. Executable Statements ..
178 *
179 * Determine the block size
180 *
181  nb = ilaenv( 1, 'CHETRF', uplo, n, -1, -1, -1 )
182 *
183 * Test the input parameters.
184 *
185  info = 0
186  upper = lsame( uplo, 'U' )
187  lquery = ( lwork.EQ.-1 )
188  IF( .NOT.upper .AND. .NOT.lsame( uplo, 'L' ) ) THEN
189  info = -1
190  ELSE IF( n.LT.0 ) THEN
191  info = -2
192  ELSE IF( lda.LT.max( 1, n ) ) THEN
193  info = -4
194  ELSE IF( lwork.LT.( 2*n ) .AND. .NOT.lquery ) THEN
195  info = -7
196  END IF
197 *
198  IF( info.EQ.0 ) THEN
199  lwkopt = (nb+1)*n
200  work( 1 ) = lwkopt
201  END IF
202 *
203  IF( info.NE.0 ) THEN
204  CALL xerbla( 'CHETRF_AA', -info )
205  RETURN
206  ELSE IF( lquery ) THEN
207  RETURN
208  END IF
209 *
210 * Quick return
211 *
212  IF ( n.EQ.0 ) THEN
213  RETURN
214  ENDIF
215  ipiv( 1 ) = 1
216  IF ( n.EQ.1 ) THEN
217  a( 1, 1 ) = REAL( A( 1, 1 ) )
218  IF ( a( 1, 1 ).EQ.zero ) THEN
219  info = 1
220  END IF
221  RETURN
222  END IF
223 *
224 * Adjubst block size based on the workspace size
225 *
226  IF( lwork.LT.((1+nb)*n) ) THEN
227  nb = ( lwork-n ) / n
228  END IF
229 *
230  IF( upper ) THEN
231 *
232 * .....................................................
233 * Factorize A as L*D*L**H using the upper triangle of A
234 * .....................................................
235 *
236 * copy first row A(1, 1:N) into H(1:n) (stored in WORK(1:N))
237 *
238  CALL ccopy( n, a( 1, 1 ), lda, work( 1 ), 1 )
239 *
240 * J is the main loop index, increasing from 1 to N in steps of
241 * JB, where JB is the number of columns factorized by CLAHEF;
242 * JB is either NB, or N-J+1 for the last block
243 *
244  j = 0
245  10 CONTINUE
246  IF( j.GE.n )
247  $ GO TO 20
248 *
249 * each step of the main loop
250 * J is the last column of the previous panel
251 * J1 is the first column of the current panel
252 * K1 identifies if the previous column of the panel has been
253 * explicitly stored, e.g., K1=1 for the first panel, and
254 * K1=0 for the rest
255 *
256  j1 = j + 1
257  jb = min( n-j1+1, nb )
258  k1 = max(1, j)-j
259 *
260 * Panel factorization
261 *
262  CALL clahef_aa( uplo, 2-k1, n-j, jb,
263  $ a( max(1, j), j+1 ), lda,
264  $ ipiv( j+1 ), work, n, work( n*nb+1 ),
265  $ iinfo )
266  IF( (iinfo.GT.0) .AND. (info.EQ.0) ) THEN
267  info = iinfo+j
268  ENDIF
269 *
270 * Ajust IPIV and apply it back (J-th step picks (J+1)-th pivot)
271 *
272  DO j2 = j+2, min(n, j+jb+1)
273  ipiv( j2 ) = ipiv( j2 ) + j
274  IF( (j2.NE.ipiv(j2)) .AND. ((j1-k1).GT.2) ) THEN
275  CALL cswap( j1-k1-2, a( 1, j2 ), 1,
276  $ a( 1, ipiv(j2) ), 1 )
277  END IF
278  END DO
279  j = j + jb
280 *
281 * Trailing submatrix update, where
282 * the row A(J1-1, J2-1:N) stores U(J1, J2+1:N) and
283 * WORK stores the current block of the auxiriarly matrix H
284 *
285  IF( j.LT.n ) THEN
286 *
287 * if the first panel and JB=1 (NB=1), then nothing to do
288 *
289  IF( j1.GT.1 .OR. jb.GT.1 ) THEN
290 *
291 * Merge rank-1 update with BLAS-3 update
292 *
293  alpha = conjg( a( j, j+1 ) )
294  a( j, j+1 ) = one
295  CALL ccopy( n-j, a( j-1, j+1 ), lda,
296  $ work( (j+1-j1+1)+jb*n ), 1 )
297  CALL cscal( n-j, alpha, work( (j+1-j1+1)+jb*n ), 1 )
298 *
299 * K1 identifies if the previous column of the panel has been
300 * explicitly stored, e.g., K1=0 and K2=1 for the first panel,
301 * and K1=1 and K2=0 for the rest
302 *
303  IF( j1.GT.1 ) THEN
304 *
305 * Not first panel
306 *
307  k2 = 1
308  ELSE
309 *
310 * First panel
311 *
312  k2 = 0
313 *
314 * First update skips the first column
315 *
316  jb = jb - 1
317  END IF
318 *
319  DO j2 = j+1, n, nb
320  nj = min( nb, n-j2+1 )
321 *
322 * Update (J2, J2) diagonal block with CGEMV
323 *
324  j3 = j2
325  DO mj = nj-1, 1, -1
326  CALL cgemm( 'Conjugate transpose', 'Transpose',
327  $ 1, mj, jb+1,
328  $ -one, a( j1-k2, j3 ), lda,
329  $ work( (j3-j1+1)+k1*n ), n,
330  $ one, a( j3, j3 ), lda )
331  j3 = j3 + 1
332  END DO
333 *
334 * Update off-diagonal block of J2-th block row with CGEMM
335 *
336  CALL cgemm( 'Conjugate transpose', 'Transpose',
337  $ nj, n-j3+1, jb+1,
338  $ -one, a( j1-k2, j2 ), lda,
339  $ work( (j3-j1+1)+k1*n ), n,
340  $ one, a( j2, j3 ), lda )
341  END DO
342 *
343 * Recover T( J, J+1 )
344 *
345  a( j, j+1 ) = conjg( alpha )
346  END IF
347 *
348 * WORK(J+1, 1) stores H(J+1, 1)
349 *
350  CALL ccopy( n-j, a( j+1, j+1 ), lda, work( 1 ), 1 )
351  END IF
352  GO TO 10
353  ELSE
354 *
355 * .....................................................
356 * Factorize A as L*D*L**H using the lower triangle of A
357 * .....................................................
358 *
359 * copy first column A(1:N, 1) into H(1:N, 1)
360 * (stored in WORK(1:N))
361 *
362  CALL ccopy( n, a( 1, 1 ), 1, work( 1 ), 1 )
363 *
364 * J is the main loop index, increasing from 1 to N in steps of
365 * JB, where JB is the number of columns factorized by CLAHEF;
366 * JB is either NB, or N-J+1 for the last block
367 *
368  j = 0
369  11 CONTINUE
370  IF( j.GE.n )
371  $ GO TO 20
372 *
373 * each step of the main loop
374 * J is the last column of the previous panel
375 * J1 is the first column of the current panel
376 * K1 identifies if the previous column of the panel has been
377 * explicitly stored, e.g., K1=1 for the first panel, and
378 * K1=0 for the rest
379 *
380  j1 = j+1
381  jb = min( n-j1+1, nb )
382  k1 = max(1, j)-j
383 *
384 * Panel factorization
385 *
386  CALL clahef_aa( uplo, 2-k1, n-j, jb,
387  $ a( j+1, max(1, j) ), lda,
388  $ ipiv( j+1 ), work, n, work( n*nb+1 ), iinfo)
389  IF( (iinfo.GT.0) .AND. (info.EQ.0) ) THEN
390  info = iinfo+j
391  ENDIF
392 *
393 * Ajust IPIV and apply it back (J-th step picks (J+1)-th pivot)
394 *
395  DO j2 = j+2, min(n, j+jb+1)
396  ipiv( j2 ) = ipiv( j2 ) + j
397  IF( (j2.NE.ipiv(j2)) .AND. ((j1-k1).GT.2) ) THEN
398  CALL cswap( j1-k1-2, a( j2, 1 ), lda,
399  $ a( ipiv(j2), 1 ), lda )
400  END IF
401  END DO
402  j = j + jb
403 *
404 * Trailing submatrix update, where
405 * A(J2+1, J1-1) stores L(J2+1, J1) and
406 * WORK(J2+1, 1) stores H(J2+1, 1)
407 *
408  IF( j.LT.n ) THEN
409 *
410 * if the first panel and JB=1 (NB=1), then nothing to do
411 *
412  IF( j1.GT.1 .OR. jb.GT.1 ) THEN
413 *
414 * Merge rank-1 update with BLAS-3 update
415 *
416  alpha = conjg( a( j+1, j ) )
417  a( j+1, j ) = one
418  CALL ccopy( n-j, a( j+1, j-1 ), 1,
419  $ work( (j+1-j1+1)+jb*n ), 1 )
420  CALL cscal( n-j, alpha, work( (j+1-j1+1)+jb*n ), 1 )
421 *
422 * K1 identifies if the previous column of the panel has been
423 * explicitly stored, e.g., K1=0 and K2=1 for the first panel,
424 * and K1=1 and K2=0 for the rest
425 *
426  IF( j1.GT.1 ) THEN
427 *
428 * Not first panel
429 *
430  k2 = 1
431  ELSE
432 *
433 * First panel
434 *
435  k2 = 0
436 *
437 * First update skips the first column
438 *
439  jb = jb - 1
440  END IF
441 *
442  DO j2 = j+1, n, nb
443  nj = min( nb, n-j2+1 )
444 *
445 * Update (J2, J2) diagonal block with CGEMV
446 *
447  j3 = j2
448  DO mj = nj-1, 1, -1
449  CALL cgemm( 'No transpose', 'Conjugate transpose',
450  $ mj, 1, jb+1,
451  $ -one, work( (j3-j1+1)+k1*n ), n,
452  $ a( j3, j1-k2 ), lda,
453  $ one, a( j3, j3 ), lda )
454  j3 = j3 + 1
455  END DO
456 *
457 * Update off-diagonal block of J2-th block column with CGEMM
458 *
459  CALL cgemm( 'No transpose', 'Conjugate transpose',
460  $ n-j3+1, nj, jb+1,
461  $ -one, work( (j3-j1+1)+k1*n ), n,
462  $ a( j2, j1-k2 ), lda,
463  $ one, a( j3, j2 ), lda )
464  END DO
465 *
466 * Recover T( J+1, J )
467 *
468  a( j+1, j ) = conjg( alpha )
469  END IF
470 *
471 * WORK(J+1, 1) stores H(J+1, 1)
472 *
473  CALL ccopy( n-j, a( j+1, j+1 ), 1, work( 1 ), 1 )
474  END IF
475  GO TO 11
476  END IF
477 *
478  20 CONTINUE
479  RETURN
480 *
481 * End of CHETRF_AA
482 *
483  END
subroutine cscal(N, CA, CX, INCX)
CSCAL
Definition: cscal.f:54
subroutine clahef_aa(UPLO, J1, M, NB, A, LDA, IPIV, H, LDH, WORK, INFO)
CLAHEF_AA
Definition: clahef_aa.f:156
subroutine xerbla(SRNAME, INFO)
XERBLA
Definition: xerbla.f:62
subroutine ccopy(N, CX, INCX, CY, INCY)
CCOPY
Definition: ccopy.f:52
subroutine cswap(N, CX, INCX, CY, INCY)
CSWAP
Definition: cswap.f:52
subroutine chetrf_aa(UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO)
CHETRF_AA
Definition: chetrf_aa.f:138
subroutine cgemm(TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC)
CGEMM
Definition: cgemm.f:189