LAPACK  3.7.0
LAPACK: Linear Algebra PACKage
csytri_3x.f
Go to the documentation of this file.
1 *> \brief \b CSYTRI_3X
2 *
3 * =========== DOCUMENTATION ===========
4 *
5 * Online html documentation available at
6 * http://www.netlib.org/lapack/explore-html/
7 *
8 *> \htmlonly
9 *> Download CSYTRI_3X + dependencies
10 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/csytri_3x.f">
11 *> [TGZ]</a>
12 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/csytri_3x.f">
13 *> [ZIP]</a>
14 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/csytri_3x.f">
15 *> [TXT]</a>
16 *> \endhtmlonly
17 *
18 * Definition:
19 * ===========
20 *
21 * SUBROUTINE CSYTRI_3X( UPLO, N, A, LDA, E, IPIV, WORK, NB, INFO )
22 *
23 * .. Scalar Arguments ..
24 * CHARACTER UPLO
25 * INTEGER INFO, LDA, N, NB
26 * ..
27 * .. Array Arguments ..
28 * INTEGER IPIV( * )
29 * COMPLEX*16 A( LDA, * ), E( * ), WORK( N+NB+1, * )
30 * ..
31 *
32 *
33 *> \par Purpose:
34 * =============
35 *>
36 *> \verbatim
37 *> CSYTRI_3X computes the inverse of a complex symmetric indefinite
38 *> matrix A using the factorization computed by CSYTRF_RK or CSYTRF_BK:
39 *>
40 *> A = P*U*D*(U**T)*(P**T) or A = P*L*D*(L**T)*(P**T),
41 *>
42 *> where U (or L) is unit upper (or lower) triangular matrix,
43 *> U**T (or L**T) is the transpose of U (or L), P is a permutation
44 *> matrix, P**T is the transpose of P, and D is symmetric and block
45 *> diagonal with 1-by-1 and 2-by-2 diagonal blocks.
46 *>
47 *> This is the blocked version of the algorithm, calling Level 3 BLAS.
48 *> \endverbatim
49 *
50 * Arguments:
51 * ==========
52 *
53 *> \param[in] UPLO
54 *> \verbatim
55 *> UPLO is CHARACTER*1
56 *> Specifies whether the details of the factorization are
57 *> stored as an upper or lower triangular matrix.
58 *> = 'U': Upper triangle of A is stored;
59 *> = 'L': Lower triangle of A is stored.
60 *> \endverbatim
61 *>
62 *> \param[in] N
63 *> \verbatim
64 *> N is INTEGER
65 *> The order of the matrix A. N >= 0.
66 *> \endverbatim
67 *>
68 *> \param[in,out] A
69 *> \verbatim
70 *> A is COMPLEX array, dimension (LDA,N)
71 *> On entry, diagonal of the block diagonal matrix D and
72 *> factors U or L as computed by CSYTRF_RK and CSYTRF_BK:
73 *> a) ONLY diagonal elements of the symmetric block diagonal
74 *> matrix D on the diagonal of A, i.e. D(k,k) = A(k,k);
75 *> (superdiagonal (or subdiagonal) elements of D
76 *> should be provided on entry in array E), and
77 *> b) If UPLO = 'U': factor U in the superdiagonal part of A.
78 *> If UPLO = 'L': factor L in the subdiagonal part of A.
79 *>
80 *> On exit, if INFO = 0, the symmetric inverse of the original
81 *> matrix.
82 *> If UPLO = 'U': the upper triangular part of the inverse
83 *> is formed and the part of A below the diagonal is not
84 *> referenced;
85 *> If UPLO = 'L': the lower triangular part of the inverse
86 *> is formed and the part of A above the diagonal is not
87 *> referenced.
88 *> \endverbatim
89 *>
90 *> \param[in] LDA
91 *> \verbatim
92 *> LDA is INTEGER
93 *> The leading dimension of the array A. LDA >= max(1,N).
94 *> \endverbatim
95 *>
96 *> \param[in] E
97 *> \verbatim
98 *> E is COMPLEX array, dimension (N)
99 *> On entry, contains the superdiagonal (or subdiagonal)
100 *> elements of the symmetric block diagonal matrix D
101 *> with 1-by-1 or 2-by-2 diagonal blocks, where
102 *> If UPLO = 'U': E(i) = D(i-1,i), i=2:N, E(1) not refernced;
103 *> If UPLO = 'L': E(i) = D(i+1,i), i=1:N-1, E(N) not referenced.
104 *>
105 *> NOTE: For 1-by-1 diagonal block D(k), where
106 *> 1 <= k <= N, the element E(k) is not referenced in both
107 *> UPLO = 'U' or UPLO = 'L' cases.
108 *> \endverbatim
109 *>
110 *> \param[in] IPIV
111 *> \verbatim
112 *> IPIV is INTEGER array, dimension (N)
113 *> Details of the interchanges and the block structure of D
114 *> as determined by CSYTRF_RK or CSYTRF_BK.
115 *> \endverbatim
116 *>
117 *> \param[out] WORK
118 *> \verbatim
119 *> WORK is COMPLEX array, dimension (N+NB+1,NB+3).
120 *> \endverbatim
121 *>
122 *> \param[in] NB
123 *> \verbatim
124 *> NB is INTEGER
125 *> Block size.
126 *> \endverbatim
127 *>
128 *> \param[out] INFO
129 *> \verbatim
130 *> INFO is INTEGER
131 *> = 0: successful exit
132 *> < 0: if INFO = -i, the i-th argument had an illegal value
133 *> > 0: if INFO = i, D(i,i) = 0; the matrix is singular and its
134 *> inverse could not be computed.
135 *> \endverbatim
136 *
137 * Authors:
138 * ========
139 *
140 *> \author Univ. of Tennessee
141 *> \author Univ. of California Berkeley
142 *> \author Univ. of Colorado Denver
143 *> \author NAG Ltd.
144 *
145 *> \date December 2016
146 *
147 *> \ingroup complexSYcomputational
148 *
149 *> \par Contributors:
150 * ==================
151 *> \verbatim
152 *>
153 *> December 2016, Igor Kozachenko,
154 *> Computer Science Division,
155 *> University of California, Berkeley
156 *>
157 *> \endverbatim
158 *
159 * =====================================================================
160  SUBROUTINE csytri_3x( UPLO, N, A, LDA, E, IPIV, WORK, NB, INFO )
161 *
162 * -- LAPACK computational routine (version 3.7.0) --
163 * -- LAPACK is a software package provided by Univ. of Tennessee, --
164 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
165 * December 2016
166 *
167 * .. Scalar Arguments ..
168  CHARACTER UPLO
169  INTEGER INFO, LDA, N, NB
170 * ..
171 * .. Array Arguments ..
172  INTEGER IPIV( * )
173  COMPLEX A( lda, * ), E( * ), WORK( n+nb+1, * )
174 * ..
175 *
176 * =====================================================================
177 *
178 * .. Parameters ..
179  COMPLEX CONE, CZERO
180  parameter ( cone = ( 1.0e+0, 0.0e+0 ),
181  $ czero = ( 0.0e+0, 0.0e+0 ) )
182 * ..
183 * .. Local Scalars ..
184  LOGICAL UPPER
185  INTEGER CUT, I, ICOUNT, INVD, IP, K, NNB, J, U11
186  COMPLEX AK, AKKP1, AKP1, D, T, U01_I_J, U01_IP1_J,
187  $ u11_i_j, u11_ip1_j
188 * ..
189 * .. External Functions ..
190  LOGICAL LSAME
191  EXTERNAL lsame
192 * ..
193 * .. External Subroutines ..
194  EXTERNAL cgemm, csyswapr, ctrtri, ctrmm, xerbla
195 * ..
196 * .. Intrinsic Functions ..
197  INTRINSIC abs, max, mod
198 * ..
199 * .. Executable Statements ..
200 *
201 * Test the input parameters.
202 *
203  info = 0
204  upper = lsame( uplo, 'U' )
205  IF( .NOT.upper .AND. .NOT.lsame( uplo, 'L' ) ) THEN
206  info = -1
207  ELSE IF( n.LT.0 ) THEN
208  info = -2
209  ELSE IF( lda.LT.max( 1, n ) ) THEN
210  info = -4
211  END IF
212 *
213 * Quick return if possible
214 *
215  IF( info.NE.0 ) THEN
216  CALL xerbla( 'CSYTRI_3X', -info )
217  RETURN
218  END IF
219  IF( n.EQ.0 )
220  $ RETURN
221 *
222 * Workspace got Non-diag elements of D
223 *
224  DO k = 1, n
225  work( k, 1 ) = e( k )
226  END DO
227 *
228 * Check that the diagonal matrix D is nonsingular.
229 *
230  IF( upper ) THEN
231 *
232 * Upper triangular storage: examine D from bottom to top
233 *
234  DO info = n, 1, -1
235  IF( ipiv( info ).GT.0 .AND. a( info, info ).EQ.czero )
236  $ RETURN
237  END DO
238  ELSE
239 *
240 * Lower triangular storage: examine D from top to bottom.
241 *
242  DO info = 1, n
243  IF( ipiv( info ).GT.0 .AND. a( info, info ).EQ.czero )
244  $ RETURN
245  END DO
246  END IF
247 *
248  info = 0
249 *
250 * Splitting Workspace
251 * U01 is a block ( N, NB+1 )
252 * The first element of U01 is in WORK( 1, 1 )
253 * U11 is a block ( NB+1, NB+1 )
254 * The first element of U11 is in WORK( N+1, 1 )
255 *
256  u11 = n
257 *
258 * INVD is a block ( N, 2 )
259 * The first element of INVD is in WORK( 1, INVD )
260 *
261  invd = nb + 2
262 
263  IF( upper ) THEN
264 *
265 * Begin Upper
266 *
267 * invA = P * inv(U**T) * inv(D) * inv(U) * P**T.
268 *
269  CALL ctrtri( uplo, 'U', n, a, lda, info )
270 *
271 * inv(D) and inv(D) * inv(U)
272 *
273  k = 1
274  DO WHILE( k.LE.n )
275  IF( ipiv( k ).GT.0 ) THEN
276 * 1 x 1 diagonal NNB
277  work( k, invd ) = cone / a( k, k )
278  work( k, invd+1 ) = czero
279  ELSE
280 * 2 x 2 diagonal NNB
281  t = work( k+1, 1 )
282  ak = a( k, k ) / t
283  akp1 = a( k+1, k+1 ) / t
284  akkp1 = work( k+1, 1 ) / t
285  d = t*( ak*akp1-cone )
286  work( k, invd ) = akp1 / d
287  work( k+1, invd+1 ) = ak / d
288  work( k, invd+1 ) = -akkp1 / d
289  work( k+1, invd ) = work( k, invd+1 )
290  k = k + 1
291  END IF
292  k = k + 1
293  END DO
294 *
295 * inv(U**T) = (inv(U))**T
296 *
297 * inv(U**T) * inv(D) * inv(U)
298 *
299  cut = n
300  DO WHILE( cut.GT.0 )
301  nnb = nb
302  IF( cut.LE.nnb ) THEN
303  nnb = cut
304  ELSE
305  icount = 0
306 * count negative elements,
307  DO i = cut+1-nnb, cut
308  IF( ipiv( i ).LT.0 ) icount = icount + 1
309  END DO
310 * need a even number for a clear cut
311  IF( mod( icount, 2 ).EQ.1 ) nnb = nnb + 1
312  END IF
313 
314  cut = cut - nnb
315 *
316 * U01 Block
317 *
318  DO i = 1, cut
319  DO j = 1, nnb
320  work( i, j ) = a( i, cut+j )
321  END DO
322  END DO
323 *
324 * U11 Block
325 *
326  DO i = 1, nnb
327  work( u11+i, i ) = cone
328  DO j = 1, i-1
329  work( u11+i, j ) = czero
330  END DO
331  DO j = i+1, nnb
332  work( u11+i, j ) = a( cut+i, cut+j )
333  END DO
334  END DO
335 *
336 * invD * U01
337 *
338  i = 1
339  DO WHILE( i.LE.cut )
340  IF( ipiv( i ).GT.0 ) THEN
341  DO j = 1, nnb
342  work( i, j ) = work( i, invd ) * work( i, j )
343  END DO
344  ELSE
345  DO j = 1, nnb
346  u01_i_j = work( i, j )
347  u01_ip1_j = work( i+1, j )
348  work( i, j ) = work( i, invd ) * u01_i_j
349  $ + work( i, invd+1 ) * u01_ip1_j
350  work( i+1, j ) = work( i+1, invd ) * u01_i_j
351  $ + work( i+1, invd+1 ) * u01_ip1_j
352  END DO
353  i = i + 1
354  END IF
355  i = i + 1
356  END DO
357 *
358 * invD1 * U11
359 *
360  i = 1
361  DO WHILE ( i.LE.nnb )
362  IF( ipiv( cut+i ).GT.0 ) THEN
363  DO j = i, nnb
364  work( u11+i, j ) = work(cut+i,invd) * work(u11+i,j)
365  END DO
366  ELSE
367  DO j = i, nnb
368  u11_i_j = work(u11+i,j)
369  u11_ip1_j = work(u11+i+1,j)
370  work( u11+i, j ) = work(cut+i,invd) * work(u11+i,j)
371  $ + work(cut+i,invd+1) * work(u11+i+1,j)
372  work( u11+i+1, j ) = work(cut+i+1,invd) * u11_i_j
373  $ + work(cut+i+1,invd+1) * u11_ip1_j
374  END DO
375  i = i + 1
376  END IF
377  i = i + 1
378  END DO
379 *
380 * U11**T * invD1 * U11 -> U11
381 *
382  CALL ctrmm( 'L', 'U', 'T', 'U', nnb, nnb,
383  $ cone, a( cut+1, cut+1 ), lda, work( u11+1, 1 ),
384  $ n+nb+1 )
385 *
386  DO i = 1, nnb
387  DO j = i, nnb
388  a( cut+i, cut+j ) = work( u11+i, j )
389  END DO
390  END DO
391 *
392 * U01**T * invD * U01 -> A( CUT+I, CUT+J )
393 *
394  CALL cgemm( 'T', 'N', nnb, nnb, cut, cone, a( 1, cut+1 ),
395  $ lda, work, n+nb+1, czero, work(u11+1,1),
396  $ n+nb+1 )
397 
398 *
399 * U11 = U11**T * invD1 * U11 + U01**T * invD * U01
400 *
401  DO i = 1, nnb
402  DO j = i, nnb
403  a( cut+i, cut+j ) = a( cut+i, cut+j ) + work(u11+i,j)
404  END DO
405  END DO
406 *
407 * U01 = U00**T * invD0 * U01
408 *
409  CALL ctrmm( 'L', uplo, 'T', 'U', cut, nnb,
410  $ cone, a, lda, work, n+nb+1 )
411 
412 *
413 * Update U01
414 *
415  DO i = 1, cut
416  DO j = 1, nnb
417  a( i, cut+j ) = work( i, j )
418  END DO
419  END DO
420 *
421 * Next Block
422 *
423  END DO
424 *
425 * Apply PERMUTATIONS P and P**T:
426 * P * inv(U**T) * inv(D) * inv(U) * P**T.
427 * Interchange rows and columns I and IPIV(I) in reverse order
428 * from the formation order of IPIV vector for Upper case.
429 *
430 * ( We can use a loop over IPIV with increment 1,
431 * since the ABS value of IPIV(I) represents the row (column)
432 * index of the interchange with row (column) i in both 1x1
433 * and 2x2 pivot cases, i.e. we don't need separate code branches
434 * for 1x1 and 2x2 pivot cases )
435 *
436  DO i = 1, n
437  ip = abs( ipiv( i ) )
438  IF( ip.NE.i ) THEN
439  IF (i .LT. ip) CALL csyswapr( uplo, n, a, lda, i ,ip )
440  IF (i .GT. ip) CALL csyswapr( uplo, n, a, lda, ip ,i )
441  END IF
442  END DO
443 *
444  ELSE
445 *
446 * Begin Lower
447 *
448 * inv A = P * inv(L**T) * inv(D) * inv(L) * P**T.
449 *
450  CALL ctrtri( uplo, 'U', n, a, lda, info )
451 *
452 * inv(D) and inv(D) * inv(L)
453 *
454  k = n
455  DO WHILE ( k .GE. 1 )
456  IF( ipiv( k ).GT.0 ) THEN
457 * 1 x 1 diagonal NNB
458  work( k, invd ) = cone / a( k, k )
459  work( k, invd+1 ) = czero
460  ELSE
461 * 2 x 2 diagonal NNB
462  t = work( k-1, 1 )
463  ak = a( k-1, k-1 ) / t
464  akp1 = a( k, k ) / t
465  akkp1 = work( k-1, 1 ) / t
466  d = t*( ak*akp1-cone )
467  work( k-1, invd ) = akp1 / d
468  work( k, invd ) = ak / d
469  work( k, invd+1 ) = -akkp1 / d
470  work( k-1, invd+1 ) = work( k, invd+1 )
471  k = k - 1
472  END IF
473  k = k - 1
474  END DO
475 *
476 * inv(L**T) = (inv(L))**T
477 *
478 * inv(L**T) * inv(D) * inv(L)
479 *
480  cut = 0
481  DO WHILE( cut.LT.n )
482  nnb = nb
483  IF( (cut + nnb).GT.n ) THEN
484  nnb = n - cut
485  ELSE
486  icount = 0
487 * count negative elements,
488  DO i = cut + 1, cut+nnb
489  IF ( ipiv( i ).LT.0 ) icount = icount + 1
490  END DO
491 * need a even number for a clear cut
492  IF( mod( icount, 2 ).EQ.1 ) nnb = nnb + 1
493  END IF
494 *
495 * L21 Block
496 *
497  DO i = 1, n-cut-nnb
498  DO j = 1, nnb
499  work( i, j ) = a( cut+nnb+i, cut+j )
500  END DO
501  END DO
502 *
503 * L11 Block
504 *
505  DO i = 1, nnb
506  work( u11+i, i) = cone
507  DO j = i+1, nnb
508  work( u11+i, j ) = czero
509  END DO
510  DO j = 1, i-1
511  work( u11+i, j ) = a( cut+i, cut+j )
512  END DO
513  END DO
514 *
515 * invD*L21
516 *
517  i = n-cut-nnb
518  DO WHILE( i.GE.1 )
519  IF( ipiv( cut+nnb+i ).GT.0 ) THEN
520  DO j = 1, nnb
521  work( i, j ) = work( cut+nnb+i, invd) * work( i, j)
522  END DO
523  ELSE
524  DO j = 1, nnb
525  u01_i_j = work(i,j)
526  u01_ip1_j = work(i-1,j)
527  work(i,j)=work(cut+nnb+i,invd)*u01_i_j+
528  $ work(cut+nnb+i,invd+1)*u01_ip1_j
529  work(i-1,j)=work(cut+nnb+i-1,invd+1)*u01_i_j+
530  $ work(cut+nnb+i-1,invd)*u01_ip1_j
531  END DO
532  i = i - 1
533  END IF
534  i = i - 1
535  END DO
536 *
537 * invD1*L11
538 *
539  i = nnb
540  DO WHILE( i.GE.1 )
541  IF( ipiv( cut+i ).GT.0 ) THEN
542  DO j = 1, nnb
543  work( u11+i, j ) = work( cut+i, invd)*work(u11+i,j)
544  END DO
545 
546  ELSE
547  DO j = 1, nnb
548  u11_i_j = work( u11+i, j )
549  u11_ip1_j = work( u11+i-1, j )
550  work( u11+i, j ) = work(cut+i,invd) * work(u11+i,j)
551  $ + work(cut+i,invd+1) * u11_ip1_j
552  work( u11+i-1, j ) = work(cut+i-1,invd+1) * u11_i_j
553  $ + work(cut+i-1,invd) * u11_ip1_j
554  END DO
555  i = i - 1
556  END IF
557  i = i - 1
558  END DO
559 *
560 * L11**T * invD1 * L11 -> L11
561 *
562  CALL ctrmm( 'L', uplo, 'T', 'U', nnb, nnb, cone,
563  $ a( cut+1, cut+1 ), lda, work( u11+1, 1 ),
564  $ n+nb+1 )
565 
566 *
567  DO i = 1, nnb
568  DO j = 1, i
569  a( cut+i, cut+j ) = work( u11+i, j )
570  END DO
571  END DO
572 *
573  IF( (cut+nnb).LT.n ) THEN
574 *
575 * L21**T * invD2*L21 -> A( CUT+I, CUT+J )
576 *
577  CALL cgemm( 'T', 'N', nnb, nnb, n-nnb-cut, cone,
578  $ a( cut+nnb+1, cut+1 ), lda, work, n+nb+1,
579  $ czero, work( u11+1, 1 ), n+nb+1 )
580 
581 *
582 * L11 = L11**T * invD1 * L11 + U01**T * invD * U01
583 *
584  DO i = 1, nnb
585  DO j = 1, i
586  a( cut+i, cut+j ) = a( cut+i, cut+j )+work(u11+i,j)
587  END DO
588  END DO
589 *
590 * L01 = L22**T * invD2 * L21
591 *
592  CALL ctrmm( 'L', uplo, 'T', 'U', n-nnb-cut, nnb, cone,
593  $ a( cut+nnb+1, cut+nnb+1 ), lda, work,
594  $ n+nb+1 )
595 *
596 * Update L21
597 *
598  DO i = 1, n-cut-nnb
599  DO j = 1, nnb
600  a( cut+nnb+i, cut+j ) = work( i, j )
601  END DO
602  END DO
603 *
604  ELSE
605 *
606 * L11 = L11**T * invD1 * L11
607 *
608  DO i = 1, nnb
609  DO j = 1, i
610  a( cut+i, cut+j ) = work( u11+i, j )
611  END DO
612  END DO
613  END IF
614 *
615 * Next Block
616 *
617  cut = cut + nnb
618 *
619  END DO
620 *
621 * Apply PERMUTATIONS P and P**T:
622 * P * inv(L**T) * inv(D) * inv(L) * P**T.
623 * Interchange rows and columns I and IPIV(I) in reverse order
624 * from the formation order of IPIV vector for Lower case.
625 *
626 * ( We can use a loop over IPIV with increment -1,
627 * since the ABS value of IPIV(I) represents the row (column)
628 * index of the interchange with row (column) i in both 1x1
629 * and 2x2 pivot cases, i.e. we don't need separate code branches
630 * for 1x1 and 2x2 pivot cases )
631 *
632  DO i = n, 1, -1
633  ip = abs( ipiv( i ) )
634  IF( ip.NE.i ) THEN
635  IF (i .LT. ip) CALL csyswapr( uplo, n, a, lda, i ,ip )
636  IF (i .GT. ip) CALL csyswapr( uplo, n, a, lda, ip ,i )
637  END IF
638  END DO
639 *
640  END IF
641 *
642  RETURN
643 *
644 * End of CSYTRI_3X
645 *
646  END
647 
subroutine csytri_3x(UPLO, N, A, LDA, E, IPIV, WORK, NB, INFO)
CSYTRI_3X
Definition: csytri_3x.f:161
subroutine xerbla(SRNAME, INFO)
XERBLA
Definition: xerbla.f:62
subroutine ctrmm(SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, A, LDA, B, LDB)
CTRMM
Definition: ctrmm.f:179
subroutine ctrtri(UPLO, DIAG, N, A, LDA, INFO)
CTRTRI
Definition: ctrtri.f:111
subroutine cgemm(TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC)
CGEMM
Definition: cgemm.f:189
subroutine csyswapr(UPLO, N, A, LDA, I1, I2)
CSYSWAPR
Definition: csyswapr.f:104