LAPACK  3.7.0
LAPACK: Linear Algebra PACKage
zhetrd_he2hb.f
Go to the documentation of this file.
1 *> \brief \b ZHETRD_HE2HB
2 *
3 * @precisions fortran z -> s d c
4 *
5 * =========== DOCUMENTATION ===========
6 *
7 * Online html documentation available at
8 * http://www.netlib.org/lapack/explore-html/
9 *
10 *> \htmlonly
11 *> Download ZHETRD_HE2HB + dependencies
12 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zhetrd.f">
13 *> [TGZ]</a>
14 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zhetrd.f">
15 *> [ZIP]</a>
16 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zhetrd.f">
17 *> [TXT]</a>
18 *> \endhtmlonly
19 *
20 * Definition:
21 * ===========
22 *
23 * SUBROUTINE ZHETRD_HE2HB( UPLO, N, KD, A, LDA, AB, LDAB, TAU,
24 * WORK, LWORK, INFO )
25 *
26 * IMPLICIT NONE
27 *
28 * .. Scalar Arguments ..
29 * CHARACTER UPLO
30 * INTEGER INFO, LDA, LDAB, LWORK, N, KD
31 * ..
32 * .. Array Arguments ..
33 * COMPLEX*16 A( LDA, * ), AB( LDAB, * ),
34 * TAU( * ), WORK( * )
35 * ..
36 *
37 *
38 *> \par Purpose:
39 * =============
40 *>
41 *> \verbatim
42 *>
43 *> ZHETRD_HE2HB reduces a complex Hermitian matrix A to complex Hermitian
44 *> band-diagonal form AB by a unitary similarity transformation:
45 *> Q**H * A * Q = AB.
46 *> \endverbatim
47 *
48 * Arguments:
49 * ==========
50 *
51 *> \param[in] UPLO
52 *> \verbatim
53 *> UPLO is CHARACTER*1
54 *> = 'U': Upper triangle of A is stored;
55 *> = 'L': Lower triangle of A is stored.
56 *> \endverbatim
57 *>
58 *> \param[in] N
59 *> \verbatim
60 *> N is INTEGER
61 *> The order of the matrix A. N >= 0.
62 *> \endverbatim
63 *>
64 *> \param[in] KD
65 *> \verbatim
66 *> KD is INTEGER
67 *> The number of superdiagonals of the reduced matrix if UPLO = 'U',
68 *> or the number of subdiagonals if UPLO = 'L'. KD >= 0.
69 *> The reduced matrix is stored in the array AB.
70 *> \endverbatim
71 *>
72 *> \param[in,out] A
73 *> \verbatim
74 *> A is COMPLEX*16 array, dimension (LDA,N)
75 *> On entry, the Hermitian matrix A. If UPLO = 'U', the leading
76 *> N-by-N upper triangular part of A contains the upper
77 *> triangular part of the matrix A, and the strictly lower
78 *> triangular part of A is not referenced. If UPLO = 'L', the
79 *> leading N-by-N lower triangular part of A contains the lower
80 *> triangular part of the matrix A, and the strictly upper
81 *> triangular part of A is not referenced.
82 *> On exit, if UPLO = 'U', the diagonal and first superdiagonal
83 *> of A are overwritten by the corresponding elements of the
84 *> tridiagonal matrix T, and the elements above the first
85 *> superdiagonal, with the array TAU, represent the unitary
86 *> matrix Q as a product of elementary reflectors; if UPLO
87 *> = 'L', the diagonal and first subdiagonal of A are over-
88 *> written by the corresponding elements of the tridiagonal
89 *> matrix T, and the elements below the first subdiagonal, with
90 *> the array TAU, represent the unitary matrix Q as a product
91 *> of elementary reflectors. See Further Details.
92 *> \endverbatim
93 *>
94 *> \param[in] LDA
95 *> \verbatim
96 *> LDA is INTEGER
97 *> The leading dimension of the array A. LDA >= max(1,N).
98 *> \endverbatim
99 *>
100 *> \param[out] AB
101 *> \verbatim
102 *> AB is COMPLEX*16 array, dimension (LDAB,N)
103 *> On exit, the upper or lower triangle of the Hermitian band
104 *> matrix A, stored in the first KD+1 rows of the array. The
105 *> j-th column of A is stored in the j-th column of the array AB
106 *> as follows:
107 *> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j;
108 *> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd).
109 *> \endverbatim
110 *>
111 *> \param[in] LDAB
112 *> \verbatim
113 *> LDAB is INTEGER
114 *> The leading dimension of the array AB. LDAB >= KD+1.
115 *> \endverbatim
116 *>
117 *> \param[out] TAU
118 *> \verbatim
119 *> TAU is COMPLEX*16 array, dimension (N-KD)
120 *> The scalar factors of the elementary reflectors (see Further
121 *> Details).
122 *> \endverbatim
123 *>
124 *> \param[out] WORK
125 *> \verbatim
126 *> WORK is COMPLEX*16 array, dimension LWORK.
127 *> On exit, if INFO = 0, or if LWORK=-1,
128 *> WORK(1) returns the size of LWORK.
129 *> \endverbatim
130 *>
131 *> \param[in] LWORK
132 *> \verbatim
133 *> LWORK is INTEGER
134 *> The dimension of the array WORK which should be calculated
135 * by a workspace query. LWORK = MAX(1, LWORK_QUERY)
136 *> If LWORK = -1, then a workspace query is assumed; the routine
137 *> only calculates the optimal size of the WORK array, returns
138 *> this value as the first entry of the WORK array, and no error
139 *> message related to LWORK is issued by XERBLA.
140 *> LWORK_QUERY = N*KD + N*max(KD,FACTOPTNB) + 2*KD*KD
141 *> where FACTOPTNB is the blocking used by the QR or LQ
142 *> algorithm, usually FACTOPTNB=128 is a good choice otherwise
143 *> putting LWORK=-1 will provide the size of WORK.
144 *> \endverbatim
145 *>
146 *> \param[out] INFO
147 *> \verbatim
148 *> INFO is INTEGER
149 *> = 0: successful exit
150 *> < 0: if INFO = -i, the i-th argument had an illegal value
151 *> \endverbatim
152 *
153 * Authors:
154 * ========
155 *
156 *> \author Univ. of Tennessee
157 *> \author Univ. of California Berkeley
158 *> \author Univ. of Colorado Denver
159 *> \author NAG Ltd.
160 *
161 *> \date December 2016
162 *
163 *> \ingroup complex16HEcomputational
164 *
165 *> \par Further Details:
166 * =====================
167 *>
168 *> \verbatim
169 *>
170 *> Implemented by Azzam Haidar.
171 *>
172 *> All details are available on technical report, SC11, SC13 papers.
173 *>
174 *> Azzam Haidar, Hatem Ltaief, and Jack Dongarra.
175 *> Parallel reduction to condensed forms for symmetric eigenvalue problems
176 *> using aggregated fine-grained and memory-aware kernels. In Proceedings
177 *> of 2011 International Conference for High Performance Computing,
178 *> Networking, Storage and Analysis (SC '11), New York, NY, USA,
179 *> Article 8 , 11 pages.
180 *> http://doi.acm.org/10.1145/2063384.2063394
181 *>
182 *> A. Haidar, J. Kurzak, P. Luszczek, 2013.
183 *> An improved parallel singular value algorithm and its implementation
184 *> for multicore hardware, In Proceedings of 2013 International Conference
185 *> for High Performance Computing, Networking, Storage and Analysis (SC '13).
186 *> Denver, Colorado, USA, 2013.
187 *> Article 90, 12 pages.
188 *> http://doi.acm.org/10.1145/2503210.2503292
189 *>
190 *> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra.
191 *> A novel hybrid CPU-GPU generalized eigensolver for electronic structure
192 *> calculations based on fine-grained memory aware tasks.
193 *> International Journal of High Performance Computing Applications.
194 *> Volume 28 Issue 2, Pages 196-209, May 2014.
195 *> http://hpc.sagepub.com/content/28/2/196
196 *>
197 *> \endverbatim
198 *>
199 *> \verbatim
200 *>
201 *> If UPLO = 'U', the matrix Q is represented as a product of elementary
202 *> reflectors
203 *>
204 *> Q = H(k)**H . . . H(2)**H H(1)**H, where k = n-kd.
205 *>
206 *> Each H(i) has the form
207 *>
208 *> H(i) = I - tau * v * v**H
209 *>
210 *> where tau is a complex scalar, and v is a complex vector with
211 *> v(1:i+kd-1) = 0 and v(i+kd) = 1; conjg(v(i+kd+1:n)) is stored on exit in
212 *> A(i,i+kd+1:n), and tau in TAU(i).
213 *>
214 *> If UPLO = 'L', the matrix Q is represented as a product of elementary
215 *> reflectors
216 *>
217 *> Q = H(1) H(2) . . . H(k), where k = n-kd.
218 *>
219 *> Each H(i) has the form
220 *>
221 *> H(i) = I - tau * v * v**H
222 *>
223 *> where tau is a complex scalar, and v is a complex vector with
224 *> v(kd+1:i) = 0 and v(i+kd+1) = 1; v(i+kd+2:n) is stored on exit in
225 * A(i+kd+2:n,i), and tau in TAU(i).
226 *>
227 *> The contents of A on exit are illustrated by the following examples
228 *> with n = 5:
229 *>
230 *> if UPLO = 'U': if UPLO = 'L':
231 *>
232 *> ( ab ab/v1 v1 v1 v1 ) ( ab )
233 *> ( ab ab/v2 v2 v2 ) ( ab/v1 ab )
234 *> ( ab ab/v3 v3 ) ( v1 ab/v2 ab )
235 *> ( ab ab/v4 ) ( v1 v2 ab/v3 ab )
236 *> ( ab ) ( v1 v2 v3 ab/v4 ab )
237 *>
238 *> where d and e denote diagonal and off-diagonal elements of T, and vi
239 *> denotes an element of the vector defining H(i).
240 *> \endverbatim
241 *>
242 * =====================================================================
243  SUBROUTINE zhetrd_he2hb( UPLO, N, KD, A, LDA, AB, LDAB, TAU,
244  $ work, lwork, info )
245 *
246  IMPLICIT NONE
247 *
248 * -- LAPACK computational routine (version 3.7.0) --
249 * -- LAPACK is a software package provided by Univ. of Tennessee, --
250 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
251 * December 2016
252 *
253 * .. Scalar Arguments ..
254  CHARACTER UPLO
255  INTEGER INFO, LDA, LDAB, LWORK, N, KD
256 * ..
257 * .. Array Arguments ..
258  COMPLEX*16 A( lda, * ), AB( ldab, * ),
259  $ tau( * ), work( * )
260 * ..
261 *
262 * =====================================================================
263 *
264 * .. Parameters ..
265  DOUBLE PRECISION RONE
266  COMPLEX*16 ZERO, ONE, HALF
267  parameter ( rone = 1.0d+0,
268  $ zero = ( 0.0d+0, 0.0d+0 ),
269  $ one = ( 1.0d+0, 0.0d+0 ),
270  $ half = ( 0.5d+0, 0.0d+0 ) )
271 * ..
272 * .. Local Scalars ..
273  LOGICAL LQUERY, UPPER
274  INTEGER I, J, IINFO, LWMIN, PN, PK, LK,
275  $ ldt, ldw, lds2, lds1,
276  $ ls2, ls1, lw, lt,
277  $ tpos, wpos, s2pos, s1pos
278 * ..
279 * .. External Subroutines ..
280  EXTERNAL xerbla, zher2k, zhemm, zgemm,
282 * ..
283 * .. Intrinsic Functions ..
284  INTRINSIC min, max
285 * ..
286 * .. External Functions ..
287  LOGICAL LSAME
288  INTEGER ILAENV
289  EXTERNAL lsame, ilaenv
290 * ..
291 * .. Executable Statements ..
292 *
293 * Determine the minimal workspace size required
294 * and test the input parameters
295 *
296  info = 0
297  upper = lsame( uplo, 'U' )
298  lquery = ( lwork.EQ.-1 )
299  lwmin = ilaenv( 20, 'ZHETRD_HE2HB', '', n, kd, -1, -1 )
300 
301  IF( .NOT.upper .AND. .NOT.lsame( uplo, 'L' ) ) THEN
302  info = -1
303  ELSE IF( n.LT.0 ) THEN
304  info = -2
305  ELSE IF( kd.LT.0 ) THEN
306  info = -3
307  ELSE IF( lda.LT.max( 1, n ) ) THEN
308  info = -5
309  ELSE IF( ldab.LT.max( 1, kd+1 ) ) THEN
310  info = -7
311  ELSE IF( lwork.LT.lwmin .AND. .NOT.lquery ) THEN
312  info = -10
313  END IF
314 *
315  IF( info.NE.0 ) THEN
316  CALL xerbla( 'ZHETRD_HE2HB', -info )
317  RETURN
318  ELSE IF( lquery ) THEN
319  work( 1 ) = lwmin
320  RETURN
321  END IF
322 *
323 * Quick return if possible
324 * Copy the upper/lower portion of A into AB
325 *
326  IF( n.LE.kd+1 ) THEN
327  IF( upper ) THEN
328  DO 100 i = 1, n
329  lk = min( kd+1, i )
330  CALL zcopy( lk, a( i-lk+1, i ), 1,
331  $ ab( kd+1-lk+1, i ), 1 )
332  100 CONTINUE
333  ELSE
334  DO 110 i = 1, n
335  lk = min( kd+1, n-i+1 )
336  CALL zcopy( lk, a( i, i ), 1, ab( 1, i ), 1 )
337  110 CONTINUE
338  ENDIF
339  work( 1 ) = 1
340  RETURN
341  END IF
342 *
343 * Determine the pointer position for the workspace
344 *
345  ldt = kd
346  lds1 = kd
347  lt = ldt*kd
348  lw = n*kd
349  ls1 = lds1*kd
350  ls2 = lwmin - lt - lw - ls1
351 * LS2 = N*MAX(KD,FACTOPTNB)
352  tpos = 1
353  wpos = tpos + lt
354  s1pos = wpos + lw
355  s2pos = s1pos + ls1
356  IF( upper ) THEN
357  ldw = kd
358  lds2 = kd
359  ELSE
360  ldw = n
361  lds2 = n
362  ENDIF
363 *
364 *
365 * Set the workspace of the triangular matrix T to zero once such a
366 * way everytime T is generated the upper/lower portion will be always zero
367 *
368  CALL zlaset( "A", ldt, kd, zero, zero, work( tpos ), ldt )
369 *
370  IF( upper ) THEN
371  DO 10 i = 1, n - kd, kd
372  pn = n-i-kd+1
373  pk = min( n-i-kd+1, kd )
374 *
375 * Compute the LQ factorization of the current block
376 *
377  CALL zgelqf( kd, pn, a( i, i+kd ), lda,
378  $ tau( i ), work( s2pos ), ls2, iinfo )
379 *
380 * Copy the upper portion of A into AB
381 *
382  DO 20 j = i, i+pk-1
383  lk = min( kd, n-j ) + 1
384  CALL zcopy( lk, a( j, j ), lda, ab( kd+1, j ), ldab-1 )
385  20 CONTINUE
386 *
387  CALL zlaset( 'Lower', pk, pk, zero, one,
388  $ a( i, i+kd ), lda )
389 *
390 * Form the matrix T
391 *
392  CALL zlarft( 'Forward', 'Rowwise', pn, pk,
393  $ a( i, i+kd ), lda, tau( i ),
394  $ work( tpos ), ldt )
395 *
396 * Compute W:
397 *
398  CALL zgemm( 'Conjugate', 'No transpose', pk, pn, pk,
399  $ one, work( tpos ), ldt,
400  $ a( i, i+kd ), lda,
401  $ zero, work( s2pos ), lds2 )
402 *
403  CALL zhemm( 'Right', uplo, pk, pn,
404  $ one, a( i+kd, i+kd ), lda,
405  $ work( s2pos ), lds2,
406  $ zero, work( wpos ), ldw )
407 *
408  CALL zgemm( 'No transpose', 'Conjugate', pk, pk, pn,
409  $ one, work( wpos ), ldw,
410  $ work( s2pos ), lds2,
411  $ zero, work( s1pos ), lds1 )
412 *
413  CALL zgemm( 'No transpose', 'No transpose', pk, pn, pk,
414  $ -half, work( s1pos ), lds1,
415  $ a( i, i+kd ), lda,
416  $ one, work( wpos ), ldw )
417 *
418 *
419 * Update the unreduced submatrix A(i+kd:n,i+kd:n), using
420 * an update of the form: A := A - V'*W - W'*V
421 *
422  CALL zher2k( uplo, 'Conjugate', pn, pk,
423  $ -one, a( i, i+kd ), lda,
424  $ work( wpos ), ldw,
425  $ rone, a( i+kd, i+kd ), lda )
426  10 CONTINUE
427 *
428 * Copy the upper band to AB which is the band storage matrix
429 *
430  DO 30 j = n-kd+1, n
431  lk = min(kd, n-j) + 1
432  CALL zcopy( lk, a( j, j ), lda, ab( kd+1, j ), ldab-1 )
433  30 CONTINUE
434 *
435  ELSE
436 *
437 * Reduce the lower triangle of A to lower band matrix
438 *
439  DO 40 i = 1, n - kd, kd
440  pn = n-i-kd+1
441  pk = min( n-i-kd+1, kd )
442 *
443 * Compute the QR factorization of the current block
444 *
445  CALL zgeqrf( pn, kd, a( i+kd, i ), lda,
446  $ tau( i ), work( s2pos ), ls2, iinfo )
447 *
448 * Copy the upper portion of A into AB
449 *
450  DO 50 j = i, i+pk-1
451  lk = min( kd, n-j ) + 1
452  CALL zcopy( lk, a( j, j ), 1, ab( 1, j ), 1 )
453  50 CONTINUE
454 *
455  CALL zlaset( 'Upper', pk, pk, zero, one,
456  $ a( i+kd, i ), lda )
457 *
458 * Form the matrix T
459 *
460  CALL zlarft( 'Forward', 'Columnwise', pn, pk,
461  $ a( i+kd, i ), lda, tau( i ),
462  $ work( tpos ), ldt )
463 *
464 * Compute W:
465 *
466  CALL zgemm( 'No transpose', 'No transpose', pn, pk, pk,
467  $ one, a( i+kd, i ), lda,
468  $ work( tpos ), ldt,
469  $ zero, work( s2pos ), lds2 )
470 *
471  CALL zhemm( 'Left', uplo, pn, pk,
472  $ one, a( i+kd, i+kd ), lda,
473  $ work( s2pos ), lds2,
474  $ zero, work( wpos ), ldw )
475 *
476  CALL zgemm( 'Conjugate', 'No transpose', pk, pk, pn,
477  $ one, work( s2pos ), lds2,
478  $ work( wpos ), ldw,
479  $ zero, work( s1pos ), lds1 )
480 *
481  CALL zgemm( 'No transpose', 'No transpose', pn, pk, pk,
482  $ -half, a( i+kd, i ), lda,
483  $ work( s1pos ), lds1,
484  $ one, work( wpos ), ldw )
485 *
486 *
487 * Update the unreduced submatrix A(i+kd:n,i+kd:n), using
488 * an update of the form: A := A - V*W' - W*V'
489 *
490  CALL zher2k( uplo, 'No transpose', pn, pk,
491  $ -one, a( i+kd, i ), lda,
492  $ work( wpos ), ldw,
493  $ rone, a( i+kd, i+kd ), lda )
494 * ==================================================================
495 * RESTORE A FOR COMPARISON AND CHECKING TO BE REMOVED
496 * DO 45 J = I, I+PK-1
497 * LK = MIN( KD, N-J ) + 1
498 * CALL ZCOPY( LK, AB( 1, J ), 1, A( J, J ), 1 )
499 * 45 CONTINUE
500 * ==================================================================
501  40 CONTINUE
502 *
503 * Copy the lower band to AB which is the band storage matrix
504 *
505  DO 60 j = n-kd+1, n
506  lk = min(kd, n-j) + 1
507  CALL zcopy( lk, a( j, j ), 1, ab( 1, j ), 1 )
508  60 CONTINUE
509 
510  END IF
511 *
512  work( 1 ) = lwmin
513  RETURN
514 *
515 * End of ZHETRD_HE2HB
516 *
517  END
subroutine zcopy(N, ZX, INCX, ZY, INCY)
ZCOPY
Definition: zcopy.f:52
subroutine zlarft(DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT)
ZLARFT forms the triangular factor T of a block reflector H = I - vtvH
Definition: zlarft.f:165
subroutine zgemm(TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC)
ZGEMM
Definition: zgemm.f:189
subroutine zgeqrf(M, N, A, LDA, TAU, WORK, LWORK, INFO)
ZGEQRF VARIANT: left-looking Level 3 BLAS of the algorithm.
Definition: zgeqrf.f:151
subroutine zhetrd_he2hb(UPLO, N, KD, A, LDA, AB, LDAB, TAU, WORK, LWORK, INFO)
ZHETRD_HE2HB
Definition: zhetrd_he2hb.f:245
subroutine zhemm(SIDE, UPLO, M, N, ALPHA, A, LDA, B, LDB, BETA, C, LDC)
ZHEMM
Definition: zhemm.f:193
subroutine xerbla(SRNAME, INFO)
XERBLA
Definition: xerbla.f:62
subroutine zlaset(UPLO, M, N, ALPHA, BETA, A, LDA)
ZLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values...
Definition: zlaset.f:108
subroutine zgelqf(M, N, A, LDA, TAU, WORK, LWORK, INFO)
ZGELQF
Definition: zgelqf.f:137
subroutine zher2k(UPLO, TRANS, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC)
ZHER2K
Definition: zher2k.f:200