LAPACK  3.7.0
LAPACK: Linear Algebra PACKage
zsytrf_aa.f
Go to the documentation of this file.
1 *> \brief \b ZSYTRF_AA
2 *
3 * =========== DOCUMENTATION ===========
4 *
5 * Online html documentation available at
6 * http://www.netlib.org/lapack/explore-html/
7 *
8 *> \htmlonly
9 *> Download ZSYTRF_AA + dependencies
10 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zsytrf_aa.f">
11 *> [TGZ]</a>
12 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zsytrf_aa.f">
13 *> [ZIP]</a>
14 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zsytrf_aa.f">
15 *> [TXT]</a>
16 *> \endhtmlonly
17 *
18 * Definition:
19 * ===========
20 *
21 * SUBROUTINE ZSYTRF_AA( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO )
22 *
23 * .. Scalar Arguments ..
24 * CHARACTER UPLO
25 * INTEGER N, LDA, LWORK, INFO
26 * ..
27 * .. Array Arguments ..
28 * INTEGER IPIV( * )
29 * COMPLEX*16 A( LDA, * ), WORK( * )
30 * ..
31 *
32 *> \par Purpose:
33 * =============
34 *>
35 *> \verbatim
36 *>
37 *> ZSYTRF_AA computes the factorization of a complex symmetric matrix A
38 *> using the Aasen's algorithm. The form of the factorization is
39 *>
40 *> A = U*T*U**T or A = L*T*L**T
41 *>
42 *> where U (or L) is a product of permutation and unit upper (lower)
43 *> triangular matrices, and T is a complex symmetric tridiagonal matrix.
44 *>
45 *> This is the blocked version of the algorithm, calling Level 3 BLAS.
46 *> \endverbatim
47 *
48 * Arguments:
49 * ==========
50 *
51 *> \param[in] UPLO
52 *> \verbatim
53 *> UPLO is CHARACTER*1
54 *> = 'U': Upper triangle of A is stored;
55 *> = 'L': Lower triangle of A is stored.
56 *> \endverbatim
57 *>
58 *> \param[in] N
59 *> \verbatim
60 *> N is INTEGER
61 *> The order of the matrix A. N >= 0.
62 *> \endverbatim
63 *>
64 *> \param[in,out] A
65 *> \verbatim
66 *> A is COMPLEX*16 array, dimension (LDA,N)
67 *> On entry, the symmetric matrix A. If UPLO = 'U', the leading
68 *> N-by-N upper triangular part of A contains the upper
69 *> triangular part of the matrix A, and the strictly lower
70 *> triangular part of A is not referenced. If UPLO = 'L', the
71 *> leading N-by-N lower triangular part of A contains the lower
72 *> triangular part of the matrix A, and the strictly upper
73 *> triangular part of A is not referenced.
74 *>
75 *> On exit, the tridiagonal matrix is stored in the diagonals
76 *> and the subdiagonals of A just below (or above) the diagonals,
77 *> and L is stored below (or above) the subdiaonals, when UPLO
78 *> is 'L' (or 'U').
79 *> \endverbatim
80 *>
81 *> \param[in] LDA
82 *> \verbatim
83 *> LDA is INTEGER
84 *> The leading dimension of the array A. LDA >= max(1,N).
85 *> \endverbatim
86 *>
87 *> \param[out] IPIV
88 *> \verbatim
89 *> IPIV is INTEGER array, dimension (N)
90 *> On exit, it contains the details of the interchanges, i.e.,
91 *> the row and column k of A were interchanged with the
92 *> row and column IPIV(k).
93 *> \endverbatim
94 *>
95 *> \param[out] WORK
96 *> \verbatim
97 *> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK))
98 *> On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
99 *> \endverbatim
100 *>
101 *> \param[in] LWORK
102 *> \verbatim
103 *> LWORK is INTEGER
104 *> The length of WORK. LWORK >=MAX(1,2*N). For optimum performance
105 *> LWORK >= N*(1+NB), where NB is the optimal blocksize.
106 *>
107 *> If LWORK = -1, then a workspace query is assumed; the routine
108 *> only calculates the optimal size of the WORK array, returns
109 *> this value as the first entry of the WORK array, and no error
110 *> message related to LWORK is issued by XERBLA.
111 *> \endverbatim
112 *>
113 *> \param[out] INFO
114 *> \verbatim
115 *> INFO is INTEGER
116 *> = 0: successful exit
117 *> < 0: if INFO = -i, the i-th argument had an illegal value
118 *> > 0: if INFO = i, D(i,i) is exactly zero. The factorization
119 *> has been completed, but the block diagonal matrix D is
120 *> exactly singular, and division by zero will occur if it
121 *> is used to solve a system of equations.
122 *> \endverbatim
123 *
124 * Authors:
125 * ========
126 *
127 *> \author Univ. of Tennessee
128 *> \author Univ. of California Berkeley
129 *> \author Univ. of Colorado Denver
130 *> \author NAG Ltd.
131 *
132 *> \date December 2016
133 *
134 *> \ingroup complex16SYcomputational
135 *
136 * =====================================================================
137  SUBROUTINE zsytrf_aa( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO)
138 *
139 * -- LAPACK computational routine (version 3.7.0) --
140 * -- LAPACK is a software package provided by Univ. of Tennessee, --
141 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
142 * December 2016
143 *
144  IMPLICIT NONE
145 *
146 * .. Scalar Arguments ..
147  CHARACTER UPLO
148  INTEGER N, LDA, LWORK, INFO
149 * ..
150 * .. Array Arguments ..
151  INTEGER IPIV( * )
152  COMPLEX*16 A( lda, * ), WORK( * )
153 * ..
154 *
155 * =====================================================================
156 * .. Parameters ..
157  COMPLEX*16 ZERO, ONE
158  parameter ( zero = 0.0d+0, one = 1.0d+0 )
159 *
160 * .. Local Scalars ..
161  LOGICAL LQUERY, UPPER
162  INTEGER J, LWKOPT, IINFO
163  INTEGER NB, MJ, NJ, K1, K2, J1, J2, J3, JB
164  COMPLEX*16 ALPHA
165 * ..
166 * .. External Functions ..
167  LOGICAL LSAME
168  INTEGER ILAENV
169  EXTERNAL lsame, ilaenv
170 * ..
171 * .. External Subroutines ..
172  EXTERNAL xerbla
173 * ..
174 * .. Intrinsic Functions ..
175  INTRINSIC max
176 * ..
177 * .. Executable Statements ..
178 *
179 * Determine the block size
180 *
181  nb = ilaenv( 1, 'ZSYTRF', uplo, n, -1, -1, -1 )
182 *
183 * Test the input parameters.
184 *
185  info = 0
186  upper = lsame( uplo, 'U' )
187  lquery = ( lwork.EQ.-1 )
188  IF( .NOT.upper .AND. .NOT.lsame( uplo, 'L' ) ) THEN
189  info = -1
190  ELSE IF( n.LT.0 ) THEN
191  info = -2
192  ELSE IF( lda.LT.max( 1, n ) ) THEN
193  info = -4
194  ELSE IF( lwork.LT.max( 1, 2*n ) .AND. .NOT.lquery ) THEN
195  info = -7
196  END IF
197 *
198  IF( info.EQ.0 ) THEN
199  lwkopt = (nb+1)*n
200  work( 1 ) = lwkopt
201  END IF
202 *
203  IF( info.NE.0 ) THEN
204  CALL xerbla( 'ZSYTRF_AA', -info )
205  RETURN
206  ELSE IF( lquery ) THEN
207  RETURN
208  END IF
209 *
210 * Quick return
211 *
212  IF ( n.EQ.0 ) THEN
213  RETURN
214  ENDIF
215  ipiv( 1 ) = 1
216  IF ( n.EQ.1 ) THEN
217  IF ( a( 1, 1 ).EQ.zero ) THEN
218  info = 1
219  END IF
220  RETURN
221  END IF
222 *
223 * Adjubst block size based on the workspace size
224 *
225  IF( lwork.LT.((1+nb)*n) ) THEN
226  nb = ( lwork-n ) / n
227  END IF
228 *
229  IF( upper ) THEN
230 *
231 * .....................................................
232 * Factorize A as L*D*L**T using the upper triangle of A
233 * .....................................................
234 *
235 * Copy first row A(1, 1:N) into H(1:n) (stored in WORK(1:N))
236 *
237  CALL zcopy( n, a( 1, 1 ), lda, work( 1 ), 1 )
238 *
239 * J is the main loop index, increasing from 1 to N in steps of
240 * JB, where JB is the number of columns factorized by ZLASYF;
241 * JB is either NB, or N-J+1 for the last block
242 *
243  j = 0
244  10 CONTINUE
245  IF( j.GE.n )
246  $ GO TO 20
247 *
248 * each step of the main loop
249 * J is the last column of the previous panel
250 * J1 is the first column of the current panel
251 * K1 identifies if the previous column of the panel has been
252 * explicitly stored, e.g., K1=1 for the first panel, and
253 * K1=0 for the rest
254 *
255  j1 = j + 1
256  jb = min( n-j1+1, nb )
257  k1 = max(1, j)-j
258 *
259 * Panel factorization
260 *
261  CALL zlasyf_aa( uplo, 2-k1, n-j, jb,
262  $ a( max(1, j), j+1 ), lda,
263  $ ipiv( j+1 ), work, n, work( n*nb+1 ),
264  $ iinfo )
265  IF( (iinfo.GT.0) .AND. (info.EQ.0) ) THEN
266  info = iinfo+j
267  ENDIF
268 *
269 * Ajust IPIV and apply it back (J-th step picks (J+1)-th pivot)
270 *
271  DO j2 = j+2, min(n, j+jb+1)
272  ipiv( j2 ) = ipiv( j2 ) + j
273  IF( (j2.NE.ipiv(j2)) .AND. ((j1-k1).GT.2) ) THEN
274  CALL zswap( j1-k1-2, a( 1, j2 ), 1,
275  $ a( 1, ipiv(j2) ), 1 )
276  END IF
277  END DO
278  j = j + jb
279 *
280 * Trailing submatrix update, where
281 * the row A(J1-1, J2-1:N) stores U(J1, J2+1:N) and
282 * WORK stores the current block of the auxiriarly matrix H
283 *
284  IF( j.LT.n ) THEN
285 *
286 * If first panel and JB=1 (NB=1), then nothing to do
287 *
288  IF( j1.GT.1 .OR. jb.GT.1 ) THEN
289 *
290 * Merge rank-1 update with BLAS-3 update
291 *
292  alpha = a( j, j+1 )
293  a( j, j+1 ) = one
294  CALL zcopy( n-j, a( j-1, j+1 ), lda,
295  $ work( (j+1-j1+1)+jb*n ), 1 )
296  CALL zscal( n-j, alpha, work( (j+1-j1+1)+jb*n ), 1 )
297 *
298 * K1 identifies if the previous column of the panel has been
299 * explicitly stored, e.g., K1=1 and K2= 0 for the first panel,
300 * while K1=0 and K2=1 for the rest
301 *
302  IF( j1.GT.1 ) THEN
303 *
304 * Not first panel
305 *
306  k2 = 1
307  ELSE
308 *
309 * First panel
310 *
311  k2 = 0
312 *
313 * First update skips the first column
314 *
315  jb = jb - 1
316  END IF
317 *
318  DO j2 = j+1, n, nb
319  nj = min( nb, n-j2+1 )
320 *
321 * Update (J2, J2) diagonal block with ZGEMV
322 *
323  j3 = j2
324  DO mj = nj-1, 1, -1
325  CALL zgemv( 'No transpose', mj, jb+1,
326  $ -one, work( j3-j1+1+k1*n ), n,
327  $ a( j1-k2, j3 ), 1,
328  $ one, a( j3, j3 ), lda )
329  j3 = j3 + 1
330  END DO
331 *
332 * Update off-diagonal block of J2-th block row with ZGEMM
333 *
334  CALL zgemm( 'Transpose', 'Transpose',
335  $ nj, n-j3+1, jb+1,
336  $ -one, a( j1-k2, j2 ), lda,
337  $ work( j3-j1+1+k1*n ), n,
338  $ one, a( j2, j3 ), lda )
339  END DO
340 *
341 * Recover T( J, J+1 )
342 *
343  a( j, j+1 ) = alpha
344  END IF
345 *
346 * WORK(J+1, 1) stores H(J+1, 1)
347 *
348  CALL zcopy( n-j, a( j+1, j+1 ), lda, work( 1 ), 1 )
349  END IF
350  GO TO 10
351  ELSE
352 *
353 * .....................................................
354 * Factorize A as L*D*L**T using the lower triangle of A
355 * .....................................................
356 *
357 * copy first column A(1:N, 1) into H(1:N, 1)
358 * (stored in WORK(1:N))
359 *
360  CALL zcopy( n, a( 1, 1 ), 1, work( 1 ), 1 )
361 *
362 * J is the main loop index, increasing from 1 to N in steps of
363 * JB, where JB is the number of columns factorized by ZLASYF;
364 * JB is either NB, or N-J+1 for the last block
365 *
366  j = 0
367  11 CONTINUE
368  IF( j.GE.n )
369  $ GO TO 20
370 *
371 * each step of the main loop
372 * J is the last column of the previous panel
373 * J1 is the first column of the current panel
374 * K1 identifies if the previous column of the panel has been
375 * explicitly stored, e.g., K1=1 for the first panel, and
376 * K1=0 for the rest
377 *
378  j1 = j+1
379  jb = min( n-j1+1, nb )
380  k1 = max(1, j)-j
381 *
382 * Panel factorization
383 *
384  CALL zlasyf_aa( uplo, 2-k1, n-j, jb,
385  $ a( j+1, max(1, j) ), lda,
386  $ ipiv( j+1 ), work, n, work( n*nb+1 ), iinfo)
387  IF( (iinfo.GT.0) .AND. (info.EQ.0) ) THEN
388  info = iinfo+j
389  ENDIF
390 *
391 * Ajust IPIV and apply it back (J-th step picks (J+1)-th pivot)
392 *
393  DO j2 = j+2, min(n, j+jb+1)
394  ipiv( j2 ) = ipiv( j2 ) + j
395  IF( (j2.NE.ipiv(j2)) .AND. ((j1-k1).GT.2) ) THEN
396  CALL zswap( j1-k1-2, a( j2, 1 ), lda,
397  $ a( ipiv(j2), 1 ), lda )
398  END IF
399  END DO
400  j = j + jb
401 *
402 * Trailing submatrix update, where
403 * A(J2+1, J1-1) stores L(J2+1, J1) and
404 * WORK(J2+1, 1) stores H(J2+1, 1)
405 *
406  IF( j.LT.n ) THEN
407 *
408 * if first panel and JB=1 (NB=1), then nothing to do
409 *
410  IF( j1.GT.1 .OR. jb.GT.1 ) THEN
411 *
412 * Merge rank-1 update with BLAS-3 update
413 *
414  alpha = a( j+1, j )
415  a( j+1, j ) = one
416  CALL zcopy( n-j, a( j+1, j-1 ), 1,
417  $ work( (j+1-j1+1)+jb*n ), 1 )
418  CALL zscal( n-j, alpha, work( (j+1-j1+1)+jb*n ), 1 )
419 *
420 * K1 identifies if the previous column of the panel has been
421 * explicitly stored, e.g., K1=1 and K2= 0 for the first panel,
422 * while K1=0 and K2=1 for the rest
423 *
424  IF( j1.GT.1 ) THEN
425 *
426 * Not first panel
427 *
428  k2 = 1
429  ELSE
430 *
431 * First panel
432 *
433  k2 = 0
434 *
435 * First update skips the first column
436 *
437  jb = jb - 1
438  END IF
439 *
440  DO j2 = j+1, n, nb
441  nj = min( nb, n-j2+1 )
442 *
443 * Update (J2, J2) diagonal block with ZGEMV
444 *
445  j3 = j2
446  DO mj = nj-1, 1, -1
447  CALL zgemv( 'No transpose', mj, jb+1,
448  $ -one, work( j3-j1+1+k1*n ), n,
449  $ a( j3, j1-k2 ), lda,
450  $ one, a( j3, j3 ), 1 )
451  j3 = j3 + 1
452  END DO
453 *
454 * Update off-diagonal block in J2-th block column with ZGEMM
455 *
456  CALL zgemm( 'No transpose', 'Transpose',
457  $ n-j3+1, nj, jb+1,
458  $ -one, work( j3-j1+1+k1*n ), n,
459  $ a( j2, j1-k2 ), lda,
460  $ one, a( j3, j2 ), lda )
461  END DO
462 *
463 * Recover T( J+1, J )
464 *
465  a( j+1, j ) = alpha
466  END IF
467 *
468 * WORK(J+1, 1) stores H(J+1, 1)
469 *
470  CALL zcopy( n-j, a( j+1, j+1 ), 1, work( 1 ), 1 )
471  END IF
472  GO TO 11
473  END IF
474 *
475  20 CONTINUE
476  RETURN
477 *
478 * End of ZSYTRF_AA
479 *
480  END
subroutine zcopy(N, ZX, INCX, ZY, INCY)
ZCOPY
Definition: zcopy.f:52
subroutine zsytrf_aa(UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO)
ZSYTRF_AA
Definition: zsytrf_aa.f:138
subroutine zgemv(TRANS, M, N, ALPHA, A, LDA, X, INCX, BETA, Y, INCY)
ZGEMV
Definition: zgemv.f:160
subroutine zlasyf_aa(UPLO, J1, M, NB, A, LDA, IPIV, H, LDH, WORK, INFO)
ZLASYF_AA
Definition: zlasyf_aa.f:156
subroutine zswap(N, ZX, INCX, ZY, INCY)
ZSWAP
Definition: zswap.f:52
subroutine zgemm(TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC)
ZGEMM
Definition: zgemm.f:189
subroutine xerbla(SRNAME, INFO)
XERBLA
Definition: xerbla.f:62
subroutine zscal(N, ZA, ZX, INCX)
ZSCAL
Definition: zscal.f:54