LAPACK  3.7.0
LAPACK: Linear Algebra PACKage
chetri_3x.f
Go to the documentation of this file.
1 *> \brief \b CHETRI_3X
2 *
3 * =========== DOCUMENTATION ===========
4 *
5 * Online html documentation available at
6 * http://www.netlib.org/lapack/explore-html/
7 *
8 *> \htmlonly
9 *> Download CHETRI_3X + dependencies
10 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/chetri_3x.f">
11 *> [TGZ]</a>
12 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/chetri_3x.f">
13 *> [ZIP]</a>
14 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/chetri_3x.f">
15 *> [TXT]</a>
16 *> \endhtmlonly
17 *
18 * Definition:
19 * ===========
20 *
21 * SUBROUTINE CHETRI_3X( UPLO, N, A, LDA, E, IPIV, WORK, NB, INFO )
22 *
23 * .. Scalar Arguments ..
24 * CHARACTER UPLO
25 * INTEGER INFO, LDA, N, NB
26 * ..
27 * .. Array Arguments ..
28 * INTEGER IPIV( * )
29 * COMPLEX A( LDA, * ), E( * ), WORK( N+NB+1, * )
30 * ..
31 *
32 *
33 *> \par Purpose:
34 * =============
35 *>
36 *> \verbatim
37 *> CHETRI_3X computes the inverse of a complex Hermitian indefinite
38 *> matrix A using the factorization computed by CHETRF_RK or CHETRF_BK:
39 *>
40 *> A = P*U*D*(U**H)*(P**T) or A = P*L*D*(L**H)*(P**T),
41 *>
42 *> where U (or L) is unit upper (or lower) triangular matrix,
43 *> U**H (or L**H) is the conjugate of U (or L), P is a permutation
44 *> matrix, P**T is the transpose of P, and D is Hermitian and block
45 *> diagonal with 1-by-1 and 2-by-2 diagonal blocks.
46 *>
47 *> This is the blocked version of the algorithm, calling Level 3 BLAS.
48 *> \endverbatim
49 *
50 * Arguments:
51 * ==========
52 *
53 *> \param[in] UPLO
54 *> \verbatim
55 *> UPLO is CHARACTER*1
56 *> Specifies whether the details of the factorization are
57 *> stored as an upper or lower triangular matrix.
58 *> = 'U': Upper triangle of A is stored;
59 *> = 'L': Lower triangle of A is stored.
60 *> \endverbatim
61 *>
62 *> \param[in] N
63 *> \verbatim
64 *> N is INTEGER
65 *> The order of the matrix A. N >= 0.
66 *> \endverbatim
67 *>
68 *> \param[in,out] A
69 *> \verbatim
70 *> A is COMPLEX array, dimension (LDA,N)
71 *> On entry, diagonal of the block diagonal matrix D and
72 *> factors U or L as computed by CHETRF_RK and CHETRF_BK:
73 *> a) ONLY diagonal elements of the Hermitian block diagonal
74 *> matrix D on the diagonal of A, i.e. D(k,k) = A(k,k);
75 *> (superdiagonal (or subdiagonal) elements of D
76 *> should be provided on entry in array E), and
77 *> b) If UPLO = 'U': factor U in the superdiagonal part of A.
78 *> If UPLO = 'L': factor L in the subdiagonal part of A.
79 *>
80 *> On exit, if INFO = 0, the Hermitian inverse of the original
81 *> matrix.
82 *> If UPLO = 'U': the upper triangular part of the inverse
83 *> is formed and the part of A below the diagonal is not
84 *> referenced;
85 *> If UPLO = 'L': the lower triangular part of the inverse
86 *> is formed and the part of A above the diagonal is not
87 *> referenced.
88 *> \endverbatim
89 *>
90 *> \param[in] LDA
91 *> \verbatim
92 *> LDA is INTEGER
93 *> The leading dimension of the array A. LDA >= max(1,N).
94 *> \endverbatim
95 *>
96 *> \param[in] E
97 *> \verbatim
98 *> E is COMPLEX array, dimension (N)
99 *> On entry, contains the superdiagonal (or subdiagonal)
100 *> elements of the Hermitian block diagonal matrix D
101 *> with 1-by-1 or 2-by-2 diagonal blocks, where
102 *> If UPLO = 'U': E(i) = D(i-1,i), i=2:N, E(1) not refernced;
103 *> If UPLO = 'L': E(i) = D(i+1,i), i=1:N-1, E(N) not referenced.
104 *>
105 *> NOTE: For 1-by-1 diagonal block D(k), where
106 *> 1 <= k <= N, the element E(k) is not referenced in both
107 *> UPLO = 'U' or UPLO = 'L' cases.
108 *> \endverbatim
109 *>
110 *> \param[in] IPIV
111 *> \verbatim
112 *> IPIV is INTEGER array, dimension (N)
113 *> Details of the interchanges and the block structure of D
114 *> as determined by CHETRF_RK or CHETRF_BK.
115 *> \endverbatim
116 *>
117 *> \param[out] WORK
118 *> \verbatim
119 *> WORK is COMPLEX array, dimension (N+NB+1,NB+3).
120 *> \endverbatim
121 *>
122 *> \param[in] NB
123 *> \verbatim
124 *> NB is INTEGER
125 *> Block size.
126 *> \endverbatim
127 *>
128 *> \param[out] INFO
129 *> \verbatim
130 *> INFO is INTEGER
131 *> = 0: successful exit
132 *> < 0: if INFO = -i, the i-th argument had an illegal value
133 *> > 0: if INFO = i, D(i,i) = 0; the matrix is singular and its
134 *> inverse could not be computed.
135 *> \endverbatim
136 *
137 * Authors:
138 * ========
139 *
140 *> \author Univ. of Tennessee
141 *> \author Univ. of California Berkeley
142 *> \author Univ. of Colorado Denver
143 *> \author NAG Ltd.
144 *
145 *> \date December 2016
146 *
147 *> \ingroup complexHEcomputational
148 *
149 *> \par Contributors:
150 * ==================
151 *> \verbatim
152 *>
153 *> December 2016, Igor Kozachenko,
154 *> Computer Science Division,
155 *> University of California, Berkeley
156 *>
157 *> \endverbatim
158 *
159 * =====================================================================
160  SUBROUTINE chetri_3x( UPLO, N, A, LDA, E, IPIV, WORK, NB, INFO )
161 *
162 * -- LAPACK computational routine (version 3.7.0) --
163 * -- LAPACK is a software package provided by Univ. of Tennessee, --
164 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
165 * December 2016
166 *
167 * .. Scalar Arguments ..
168  CHARACTER UPLO
169  INTEGER INFO, LDA, N, NB
170 * ..
171 * .. Array Arguments ..
172  INTEGER IPIV( * )
173  COMPLEX A( lda, * ), E( * ), WORK( n+nb+1, * )
174 * ..
175 *
176 * =====================================================================
177 *
178 * .. Parameters ..
179  REAL ONE
180  parameter ( one = 1.0e+0 )
181  COMPLEX CONE, CZERO
182  parameter ( cone = ( 1.0e+0, 0.0e+0 ),
183  $ czero = ( 0.0e+0, 0.0e+0 ) )
184 * ..
185 * .. Local Scalars ..
186  LOGICAL UPPER
187  INTEGER CUT, I, ICOUNT, INVD, IP, K, NNB, J, U11
188  REAL AK, AKP1, T
189  COMPLEX AKKP1, D, U01_I_J, U01_IP1_J, U11_I_J,
190  $ u11_ip1_j
191 * ..
192 * .. External Functions ..
193  LOGICAL LSAME
194  EXTERNAL lsame
195 * ..
196 * .. External Subroutines ..
197  EXTERNAL cgemm, cheswapr, ctrtri, ctrmm, xerbla
198 * ..
199 * .. Intrinsic Functions ..
200  INTRINSIC abs, conjg, max, real
201 * ..
202 * .. Executable Statements ..
203 *
204 * Test the input parameters.
205 *
206  info = 0
207  upper = lsame( uplo, 'U' )
208  IF( .NOT.upper .AND. .NOT.lsame( uplo, 'L' ) ) THEN
209  info = -1
210  ELSE IF( n.LT.0 ) THEN
211  info = -2
212  ELSE IF( lda.LT.max( 1, n ) ) THEN
213  info = -4
214  END IF
215 *
216 * Quick return if possible
217 *
218  IF( info.NE.0 ) THEN
219  CALL xerbla( 'CHETRI_3X', -info )
220  RETURN
221  END IF
222  IF( n.EQ.0 )
223  $ RETURN
224 *
225 * Workspace got Non-diag elements of D
226 *
227  DO k = 1, n
228  work( k, 1 ) = e( k )
229  END DO
230 *
231 * Check that the diagonal matrix D is nonsingular.
232 *
233  IF( upper ) THEN
234 *
235 * Upper triangular storage: examine D from bottom to top
236 *
237  DO info = n, 1, -1
238  IF( ipiv( info ).GT.0 .AND. a( info, info ).EQ.czero )
239  $ RETURN
240  END DO
241  ELSE
242 *
243 * Lower triangular storage: examine D from top to bottom.
244 *
245  DO info = 1, n
246  IF( ipiv( info ).GT.0 .AND. a( info, info ).EQ.czero )
247  $ RETURN
248  END DO
249  END IF
250 *
251  info = 0
252 *
253 * Splitting Workspace
254 * U01 is a block ( N, NB+1 )
255 * The first element of U01 is in WORK( 1, 1 )
256 * U11 is a block ( NB+1, NB+1 )
257 * The first element of U11 is in WORK( N+1, 1 )
258 *
259  u11 = n
260 *
261 * INVD is a block ( N, 2 )
262 * The first element of INVD is in WORK( 1, INVD )
263 *
264  invd = nb + 2
265 
266  IF( upper ) THEN
267 *
268 * Begin Upper
269 *
270 * invA = P * inv(U**H) * inv(D) * inv(U) * P**T.
271 *
272  CALL ctrtri( uplo, 'U', n, a, lda, info )
273 *
274 * inv(D) and inv(D) * inv(U)
275 *
276  k = 1
277  DO WHILE( k.LE.n )
278  IF( ipiv( k ).GT.0 ) THEN
279 * 1 x 1 diagonal NNB
280  work( k, invd ) = one / REAL( A( K, K ) )
281  work( k, invd+1 ) = czero
282  ELSE
283 * 2 x 2 diagonal NNB
284  t = abs( work( k+1, 1 ) )
285  ak = REAL( A( K, K ) ) / T
286  akp1 = REAL( A( K+1, K+1 ) ) / T
287  akkp1 = work( k+1, 1 ) / t
288  d = t*( ak*akp1-cone )
289  work( k, invd ) = akp1 / d
290  work( k+1, invd+1 ) = ak / d
291  work( k, invd+1 ) = -akkp1 / d
292  work( k+1, invd ) = conjg( work( k, invd+1 ) )
293  k = k + 1
294  END IF
295  k = k + 1
296  END DO
297 *
298 * inv(U**H) = (inv(U))**H
299 *
300 * inv(U**H) * inv(D) * inv(U)
301 *
302  cut = n
303  DO WHILE( cut.GT.0 )
304  nnb = nb
305  IF( cut.LE.nnb ) THEN
306  nnb = cut
307  ELSE
308  icount = 0
309 * count negative elements,
310  DO i = cut+1-nnb, cut
311  IF( ipiv( i ).LT.0 ) icount = icount + 1
312  END DO
313 * need a even number for a clear cut
314  IF( mod( icount, 2 ).EQ.1 ) nnb = nnb + 1
315  END IF
316 
317  cut = cut - nnb
318 *
319 * U01 Block
320 *
321  DO i = 1, cut
322  DO j = 1, nnb
323  work( i, j ) = a( i, cut+j )
324  END DO
325  END DO
326 *
327 * U11 Block
328 *
329  DO i = 1, nnb
330  work( u11+i, i ) = cone
331  DO j = 1, i-1
332  work( u11+i, j ) = czero
333  END DO
334  DO j = i+1, nnb
335  work( u11+i, j ) = a( cut+i, cut+j )
336  END DO
337  END DO
338 *
339 * invD * U01
340 *
341  i = 1
342  DO WHILE( i.LE.cut )
343  IF( ipiv( i ).GT.0 ) THEN
344  DO j = 1, nnb
345  work( i, j ) = work( i, invd ) * work( i, j )
346  END DO
347  ELSE
348  DO j = 1, nnb
349  u01_i_j = work( i, j )
350  u01_ip1_j = work( i+1, j )
351  work( i, j ) = work( i, invd ) * u01_i_j
352  $ + work( i, invd+1 ) * u01_ip1_j
353  work( i+1, j ) = work( i+1, invd ) * u01_i_j
354  $ + work( i+1, invd+1 ) * u01_ip1_j
355  END DO
356  i = i + 1
357  END IF
358  i = i + 1
359  END DO
360 *
361 * invD1 * U11
362 *
363  i = 1
364  DO WHILE ( i.LE.nnb )
365  IF( ipiv( cut+i ).GT.0 ) THEN
366  DO j = i, nnb
367  work( u11+i, j ) = work(cut+i,invd) * work(u11+i,j)
368  END DO
369  ELSE
370  DO j = i, nnb
371  u11_i_j = work(u11+i,j)
372  u11_ip1_j = work(u11+i+1,j)
373  work( u11+i, j ) = work(cut+i,invd) * work(u11+i,j)
374  $ + work(cut+i,invd+1) * work(u11+i+1,j)
375  work( u11+i+1, j ) = work(cut+i+1,invd) * u11_i_j
376  $ + work(cut+i+1,invd+1) * u11_ip1_j
377  END DO
378  i = i + 1
379  END IF
380  i = i + 1
381  END DO
382 *
383 * U11**H * invD1 * U11 -> U11
384 *
385  CALL ctrmm( 'L', 'U', 'C', 'U', nnb, nnb,
386  $ cone, a( cut+1, cut+1 ), lda, work( u11+1, 1 ),
387  $ n+nb+1 )
388 *
389  DO i = 1, nnb
390  DO j = i, nnb
391  a( cut+i, cut+j ) = work( u11+i, j )
392  END DO
393  END DO
394 *
395 * U01**H * invD * U01 -> A( CUT+I, CUT+J )
396 *
397  CALL cgemm( 'C', 'N', nnb, nnb, cut, cone, a( 1, cut+1 ),
398  $ lda, work, n+nb+1, czero, work(u11+1,1),
399  $ n+nb+1 )
400 
401 *
402 * U11 = U11**H * invD1 * U11 + U01**H * invD * U01
403 *
404  DO i = 1, nnb
405  DO j = i, nnb
406  a( cut+i, cut+j ) = a( cut+i, cut+j ) + work(u11+i,j)
407  END DO
408  END DO
409 *
410 * U01 = U00**H * invD0 * U01
411 *
412  CALL ctrmm( 'L', uplo, 'C', 'U', cut, nnb,
413  $ cone, a, lda, work, n+nb+1 )
414 
415 *
416 * Update U01
417 *
418  DO i = 1, cut
419  DO j = 1, nnb
420  a( i, cut+j ) = work( i, j )
421  END DO
422  END DO
423 *
424 * Next Block
425 *
426  END DO
427 *
428 * Apply PERMUTATIONS P and P**T:
429 * P * inv(U**H) * inv(D) * inv(U) * P**T.
430 * Interchange rows and columns I and IPIV(I) in reverse order
431 * from the formation order of IPIV vector for Upper case.
432 *
433 * ( We can use a loop over IPIV with increment 1,
434 * since the ABS value of IPIV(I) represents the row (column)
435 * index of the interchange with row (column) i in both 1x1
436 * and 2x2 pivot cases, i.e. we don't need separate code branches
437 * for 1x1 and 2x2 pivot cases )
438 *
439  DO i = 1, n
440  ip = abs( ipiv( i ) )
441  IF( ip.NE.i ) THEN
442  IF (i .LT. ip) CALL cheswapr( uplo, n, a, lda, i ,ip )
443  IF (i .GT. ip) CALL cheswapr( uplo, n, a, lda, ip ,i )
444  END IF
445  END DO
446 *
447  ELSE
448 *
449 * Begin Lower
450 *
451 * inv A = P * inv(L**H) * inv(D) * inv(L) * P**T.
452 *
453  CALL ctrtri( uplo, 'U', n, a, lda, info )
454 *
455 * inv(D) and inv(D) * inv(L)
456 *
457  k = n
458  DO WHILE ( k .GE. 1 )
459  IF( ipiv( k ).GT.0 ) THEN
460 * 1 x 1 diagonal NNB
461  work( k, invd ) = one / REAL( A( K, K ) )
462  work( k, invd+1 ) = czero
463  ELSE
464 * 2 x 2 diagonal NNB
465  t = abs( work( k-1, 1 ) )
466  ak = REAL( A( K-1, K-1 ) ) / T
467  akp1 = REAL( A( K, K ) ) / T
468  akkp1 = work( k-1, 1 ) / t
469  d = t*( ak*akp1-cone )
470  work( k-1, invd ) = akp1 / d
471  work( k, invd ) = ak / d
472  work( k, invd+1 ) = -akkp1 / d
473  work( k-1, invd+1 ) = conjg( work( k, invd+1 ) )
474  k = k - 1
475  END IF
476  k = k - 1
477  END DO
478 *
479 * inv(L**H) = (inv(L))**H
480 *
481 * inv(L**H) * inv(D) * inv(L)
482 *
483  cut = 0
484  DO WHILE( cut.LT.n )
485  nnb = nb
486  IF( (cut + nnb).GT.n ) THEN
487  nnb = n - cut
488  ELSE
489  icount = 0
490 * count negative elements,
491  DO i = cut + 1, cut+nnb
492  IF ( ipiv( i ).LT.0 ) icount = icount + 1
493  END DO
494 * need a even number for a clear cut
495  IF( mod( icount, 2 ).EQ.1 ) nnb = nnb + 1
496  END IF
497 *
498 * L21 Block
499 *
500  DO i = 1, n-cut-nnb
501  DO j = 1, nnb
502  work( i, j ) = a( cut+nnb+i, cut+j )
503  END DO
504  END DO
505 *
506 * L11 Block
507 *
508  DO i = 1, nnb
509  work( u11+i, i) = cone
510  DO j = i+1, nnb
511  work( u11+i, j ) = czero
512  END DO
513  DO j = 1, i-1
514  work( u11+i, j ) = a( cut+i, cut+j )
515  END DO
516  END DO
517 *
518 * invD*L21
519 *
520  i = n-cut-nnb
521  DO WHILE( i.GE.1 )
522  IF( ipiv( cut+nnb+i ).GT.0 ) THEN
523  DO j = 1, nnb
524  work( i, j ) = work( cut+nnb+i, invd) * work( i, j)
525  END DO
526  ELSE
527  DO j = 1, nnb
528  u01_i_j = work(i,j)
529  u01_ip1_j = work(i-1,j)
530  work(i,j)=work(cut+nnb+i,invd)*u01_i_j+
531  $ work(cut+nnb+i,invd+1)*u01_ip1_j
532  work(i-1,j)=work(cut+nnb+i-1,invd+1)*u01_i_j+
533  $ work(cut+nnb+i-1,invd)*u01_ip1_j
534  END DO
535  i = i - 1
536  END IF
537  i = i - 1
538  END DO
539 *
540 * invD1*L11
541 *
542  i = nnb
543  DO WHILE( i.GE.1 )
544  IF( ipiv( cut+i ).GT.0 ) THEN
545  DO j = 1, nnb
546  work( u11+i, j ) = work( cut+i, invd)*work(u11+i,j)
547  END DO
548 
549  ELSE
550  DO j = 1, nnb
551  u11_i_j = work( u11+i, j )
552  u11_ip1_j = work( u11+i-1, j )
553  work( u11+i, j ) = work(cut+i,invd) * work(u11+i,j)
554  $ + work(cut+i,invd+1) * u11_ip1_j
555  work( u11+i-1, j ) = work(cut+i-1,invd+1) * u11_i_j
556  $ + work(cut+i-1,invd) * u11_ip1_j
557  END DO
558  i = i - 1
559  END IF
560  i = i - 1
561  END DO
562 *
563 * L11**H * invD1 * L11 -> L11
564 *
565  CALL ctrmm( 'L', uplo, 'C', 'U', nnb, nnb, cone,
566  $ a( cut+1, cut+1 ), lda, work( u11+1, 1 ),
567  $ n+nb+1 )
568 
569 *
570  DO i = 1, nnb
571  DO j = 1, i
572  a( cut+i, cut+j ) = work( u11+i, j )
573  END DO
574  END DO
575 *
576  IF( (cut+nnb).LT.n ) THEN
577 *
578 * L21**H * invD2*L21 -> A( CUT+I, CUT+J )
579 *
580  CALL cgemm( 'C', 'N', nnb, nnb, n-nnb-cut, cone,
581  $ a( cut+nnb+1, cut+1 ), lda, work, n+nb+1,
582  $ czero, work( u11+1, 1 ), n+nb+1 )
583 
584 *
585 * L11 = L11**H * invD1 * L11 + U01**H * invD * U01
586 *
587  DO i = 1, nnb
588  DO j = 1, i
589  a( cut+i, cut+j ) = a( cut+i, cut+j )+work(u11+i,j)
590  END DO
591  END DO
592 *
593 * L01 = L22**H * invD2 * L21
594 *
595  CALL ctrmm( 'L', uplo, 'C', 'U', n-nnb-cut, nnb, cone,
596  $ a( cut+nnb+1, cut+nnb+1 ), lda, work,
597  $ n+nb+1 )
598 *
599 * Update L21
600 *
601  DO i = 1, n-cut-nnb
602  DO j = 1, nnb
603  a( cut+nnb+i, cut+j ) = work( i, j )
604  END DO
605  END DO
606 *
607  ELSE
608 *
609 * L11 = L11**H * invD1 * L11
610 *
611  DO i = 1, nnb
612  DO j = 1, i
613  a( cut+i, cut+j ) = work( u11+i, j )
614  END DO
615  END DO
616  END IF
617 *
618 * Next Block
619 *
620  cut = cut + nnb
621 *
622  END DO
623 *
624 * Apply PERMUTATIONS P and P**T:
625 * P * inv(L**H) * inv(D) * inv(L) * P**T.
626 * Interchange rows and columns I and IPIV(I) in reverse order
627 * from the formation order of IPIV vector for Lower case.
628 *
629 * ( We can use a loop over IPIV with increment -1,
630 * since the ABS value of IPIV(I) represents the row (column)
631 * index of the interchange with row (column) i in both 1x1
632 * and 2x2 pivot cases, i.e. we don't need separate code branches
633 * for 1x1 and 2x2 pivot cases )
634 *
635  DO i = n, 1, -1
636  ip = abs( ipiv( i ) )
637  IF( ip.NE.i ) THEN
638  IF (i .LT. ip) CALL cheswapr( uplo, n, a, lda, i ,ip )
639  IF (i .GT. ip) CALL cheswapr( uplo, n, a, lda, ip ,i )
640  END IF
641  END DO
642 *
643  END IF
644 *
645  RETURN
646 *
647 * End of CHETRI_3X
648 *
649  END
subroutine cheswapr(UPLO, N, A, LDA, I1, I2)
CHESWAPR applies an elementary permutation on the rows and columns of a Hermitian matrix...
Definition: cheswapr.f:104
subroutine chetri_3x(UPLO, N, A, LDA, E, IPIV, WORK, NB, INFO)
CHETRI_3X
Definition: chetri_3x.f:161
subroutine xerbla(SRNAME, INFO)
XERBLA
Definition: xerbla.f:62
subroutine ctrmm(SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, A, LDA, B, LDB)
CTRMM
Definition: ctrmm.f:179
subroutine ctrtri(UPLO, DIAG, N, A, LDA, INFO)
CTRTRI
Definition: ctrtri.f:111
subroutine cgemm(TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC)
CGEMM
Definition: cgemm.f:189