LAPACK  3.7.0
LAPACK: Linear Algebra PACKage
zheevd_2stage.f
Go to the documentation of this file.
1 *> \brief <b> ZHEEVD_2STAGE computes the eigenvalues and, optionally, the left and/or right eigenvectors for HE matrices</b>
2 *
3 * @precisions fortran z -> s d c
4 *
5 * =========== DOCUMENTATION ===========
6 *
7 * Online html documentation available at
8 * http://www.netlib.org/lapack/explore-html/
9 *
10 *> \htmlonly
11 *> Download ZHEEVD_2STAGE + dependencies
12 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zheevd_2stage.f">
13 *> [TGZ]</a>
14 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zheevd_2stage.f">
15 *> [ZIP]</a>
16 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zheevd_2stage.f">
17 *> [TXT]</a>
18 *> \endhtmlonly
19 *
20 * Definition:
21 * ===========
22 *
23 * SUBROUTINE ZHEEVD_2STAGE( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK,
24 * RWORK, LRWORK, IWORK, LIWORK, INFO )
25 *
26 * IMPLICIT NONE
27 *
28 * .. Scalar Arguments ..
29 * CHARACTER JOBZ, UPLO
30 * INTEGER INFO, LDA, LIWORK, LRWORK, LWORK, N
31 * ..
32 * .. Array Arguments ..
33 * INTEGER IWORK( * )
34 * DOUBLE PRECISION RWORK( * ), W( * )
35 * COMPLEX*16 A( LDA, * ), WORK( * )
36 * ..
37 *
38 *
39 *> \par Purpose:
40 * =============
41 *>
42 *> \verbatim
43 *>
44 *> ZHEEVD_2STAGE computes all eigenvalues and, optionally, eigenvectors of a
45 *> complex Hermitian matrix A using the 2stage technique for
46 *> the reduction to tridiagonal. If eigenvectors are desired, it uses a
47 *> divide and conquer algorithm.
48 *>
49 *> The divide and conquer algorithm makes very mild assumptions about
50 *> floating point arithmetic. It will work on machines with a guard
51 *> digit in add/subtract, or on those binary machines without guard
52 *> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or
53 *> Cray-2. It could conceivably fail on hexadecimal or decimal machines
54 *> without guard digits, but we know of none.
55 *> \endverbatim
56 *
57 * Arguments:
58 * ==========
59 *
60 *> \param[in] JOBZ
61 *> \verbatim
62 *> JOBZ is CHARACTER*1
63 *> = 'N': Compute eigenvalues only;
64 *> = 'V': Compute eigenvalues and eigenvectors.
65 *> Not available in this release.
66 *> \endverbatim
67 *>
68 *> \param[in] UPLO
69 *> \verbatim
70 *> UPLO is CHARACTER*1
71 *> = 'U': Upper triangle of A is stored;
72 *> = 'L': Lower triangle of A is stored.
73 *> \endverbatim
74 *>
75 *> \param[in] N
76 *> \verbatim
77 *> N is INTEGER
78 *> The order of the matrix A. N >= 0.
79 *> \endverbatim
80 *>
81 *> \param[in,out] A
82 *> \verbatim
83 *> A is COMPLEX*16 array, dimension (LDA, N)
84 *> On entry, the Hermitian matrix A. If UPLO = 'U', the
85 *> leading N-by-N upper triangular part of A contains the
86 *> upper triangular part of the matrix A. If UPLO = 'L',
87 *> the leading N-by-N lower triangular part of A contains
88 *> the lower triangular part of the matrix A.
89 *> On exit, if JOBZ = 'V', then if INFO = 0, A contains the
90 *> orthonormal eigenvectors of the matrix A.
91 *> If JOBZ = 'N', then on exit the lower triangle (if UPLO='L')
92 *> or the upper triangle (if UPLO='U') of A, including the
93 *> diagonal, is destroyed.
94 *> \endverbatim
95 *>
96 *> \param[in] LDA
97 *> \verbatim
98 *> LDA is INTEGER
99 *> The leading dimension of the array A. LDA >= max(1,N).
100 *> \endverbatim
101 *>
102 *> \param[out] W
103 *> \verbatim
104 *> W is DOUBLE PRECISION array, dimension (N)
105 *> If INFO = 0, the eigenvalues in ascending order.
106 *> \endverbatim
107 *>
108 *> \param[out] WORK
109 *> \verbatim
110 *> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK))
111 *> On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
112 *> \endverbatim
113 *>
114 *> \param[in] LWORK
115 *> \verbatim
116 *> LWORK is INTEGER
117 *> The dimension of the array WORK.
118 *> If N <= 1, LWORK must be at least 1.
119 *> If JOBZ = 'N' and N > 1, LWORK must be queried.
120 *> LWORK = MAX(1, dimension) where
121 *> dimension = max(stage1,stage2) + (KD+1)*N + N+1
122 *> = N*KD + N*max(KD+1,FACTOPTNB)
123 *> + max(2*KD*KD, KD*NTHREADS)
124 *> + (KD+1)*N + N+1
125 *> where KD is the blocking size of the reduction,
126 *> FACTOPTNB is the blocking used by the QR or LQ
127 *> algorithm, usually FACTOPTNB=128 is a good choice
128 *> NTHREADS is the number of threads used when
129 *> openMP compilation is enabled, otherwise =1.
130 *> If JOBZ = 'V' and N > 1, LWORK must be at least 2*N + N**2
131 *>
132 *> If LWORK = -1, then a workspace query is assumed; the routine
133 *> only calculates the optimal sizes of the WORK, RWORK and
134 *> IWORK arrays, returns these values as the first entries of
135 *> the WORK, RWORK and IWORK arrays, and no error message
136 *> related to LWORK or LRWORK or LIWORK is issued by XERBLA.
137 *> \endverbatim
138 *>
139 *> \param[out] RWORK
140 *> \verbatim
141 *> RWORK is DOUBLE PRECISION array,
142 *> dimension (LRWORK)
143 *> On exit, if INFO = 0, RWORK(1) returns the optimal LRWORK.
144 *> \endverbatim
145 *>
146 *> \param[in] LRWORK
147 *> \verbatim
148 *> LRWORK is INTEGER
149 *> The dimension of the array RWORK.
150 *> If N <= 1, LRWORK must be at least 1.
151 *> If JOBZ = 'N' and N > 1, LRWORK must be at least N.
152 *> If JOBZ = 'V' and N > 1, LRWORK must be at least
153 *> 1 + 5*N + 2*N**2.
154 *>
155 *> If LRWORK = -1, then a workspace query is assumed; the
156 *> routine only calculates the optimal sizes of the WORK, RWORK
157 *> and IWORK arrays, returns these values as the first entries
158 *> of the WORK, RWORK and IWORK arrays, and no error message
159 *> related to LWORK or LRWORK or LIWORK is issued by XERBLA.
160 *> \endverbatim
161 *>
162 *> \param[out] IWORK
163 *> \verbatim
164 *> IWORK is INTEGER array, dimension (MAX(1,LIWORK))
165 *> On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK.
166 *> \endverbatim
167 *>
168 *> \param[in] LIWORK
169 *> \verbatim
170 *> LIWORK is INTEGER
171 *> The dimension of the array IWORK.
172 *> If N <= 1, LIWORK must be at least 1.
173 *> If JOBZ = 'N' and N > 1, LIWORK must be at least 1.
174 *> If JOBZ = 'V' and N > 1, LIWORK must be at least 3 + 5*N.
175 *>
176 *> If LIWORK = -1, then a workspace query is assumed; the
177 *> routine only calculates the optimal sizes of the WORK, RWORK
178 *> and IWORK arrays, returns these values as the first entries
179 *> of the WORK, RWORK and IWORK arrays, and no error message
180 *> related to LWORK or LRWORK or LIWORK is issued by XERBLA.
181 *> \endverbatim
182 *>
183 *> \param[out] INFO
184 *> \verbatim
185 *> INFO is INTEGER
186 *> = 0: successful exit
187 *> < 0: if INFO = -i, the i-th argument had an illegal value
188 *> > 0: if INFO = i and JOBZ = 'N', then the algorithm failed
189 *> to converge; i off-diagonal elements of an intermediate
190 *> tridiagonal form did not converge to zero;
191 *> if INFO = i and JOBZ = 'V', then the algorithm failed
192 *> to compute an eigenvalue while working on the submatrix
193 *> lying in rows and columns INFO/(N+1) through
194 *> mod(INFO,N+1).
195 *> \endverbatim
196 *
197 * Authors:
198 * ========
199 *
200 *> \author Univ. of Tennessee
201 *> \author Univ. of California Berkeley
202 *> \author Univ. of Colorado Denver
203 *> \author NAG Ltd.
204 *
205 *> \date December 2016
206 *
207 *> \ingroup complex16HEeigen
208 *
209 *> \par Further Details:
210 * =====================
211 *>
212 *> Modified description of INFO. Sven, 16 Feb 05.
213 *
214 *> \par Contributors:
215 * ==================
216 *>
217 *> Jeff Rutter, Computer Science Division, University of California
218 *> at Berkeley, USA
219 *>
220 *> \par Further Details:
221 * =====================
222 *>
223 *> \verbatim
224 *>
225 *> All details about the 2stage techniques are available in:
226 *>
227 *> Azzam Haidar, Hatem Ltaief, and Jack Dongarra.
228 *> Parallel reduction to condensed forms for symmetric eigenvalue problems
229 *> using aggregated fine-grained and memory-aware kernels. In Proceedings
230 *> of 2011 International Conference for High Performance Computing,
231 *> Networking, Storage and Analysis (SC '11), New York, NY, USA,
232 *> Article 8 , 11 pages.
233 *> http://doi.acm.org/10.1145/2063384.2063394
234 *>
235 *> A. Haidar, J. Kurzak, P. Luszczek, 2013.
236 *> An improved parallel singular value algorithm and its implementation
237 *> for multicore hardware, In Proceedings of 2013 International Conference
238 *> for High Performance Computing, Networking, Storage and Analysis (SC '13).
239 *> Denver, Colorado, USA, 2013.
240 *> Article 90, 12 pages.
241 *> http://doi.acm.org/10.1145/2503210.2503292
242 *>
243 *> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra.
244 *> A novel hybrid CPU-GPU generalized eigensolver for electronic structure
245 *> calculations based on fine-grained memory aware tasks.
246 *> International Journal of High Performance Computing Applications.
247 *> Volume 28 Issue 2, Pages 196-209, May 2014.
248 *> http://hpc.sagepub.com/content/28/2/196
249 *>
250 *> \endverbatim
251 *
252 * =====================================================================
253  SUBROUTINE zheevd_2stage( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK,
254  $ rwork, lrwork, iwork, liwork, info )
255 *
256  IMPLICIT NONE
257 *
258 * -- LAPACK driver routine (version 3.7.0) --
259 * -- LAPACK is a software package provided by Univ. of Tennessee, --
260 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
261 * December 2016
262 *
263 * .. Scalar Arguments ..
264  CHARACTER JOBZ, UPLO
265  INTEGER INFO, LDA, LIWORK, LRWORK, LWORK, N
266 * ..
267 * .. Array Arguments ..
268  INTEGER IWORK( * )
269  DOUBLE PRECISION RWORK( * ), W( * )
270  COMPLEX*16 A( lda, * ), WORK( * )
271 * ..
272 *
273 * =====================================================================
274 *
275 * .. Parameters ..
276  DOUBLE PRECISION ZERO, ONE
277  parameter ( zero = 0.0d0, one = 1.0d0 )
278  COMPLEX*16 CONE
279  parameter ( cone = ( 1.0d0, 0.0d0 ) )
280 * ..
281 * .. Local Scalars ..
282  LOGICAL LOWER, LQUERY, WANTZ
283  INTEGER IINFO, IMAX, INDE, INDRWK, INDTAU, INDWK2,
284  $ indwrk, iscale, liwmin, llrwk, llwork,
285  $ llwrk2, lrwmin, lwmin,
286  $ lhtrd, lwtrd, kd, ib, indhous
287 
288 
289  DOUBLE PRECISION ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA,
290  $ smlnum
291 * ..
292 * .. External Functions ..
293  LOGICAL LSAME
294  INTEGER ILAENV
295  DOUBLE PRECISION DLAMCH, ZLANHE
296  EXTERNAL lsame, ilaenv, dlamch, zlanhe
297 * ..
298 * .. External Subroutines ..
299  EXTERNAL dscal, dsterf, xerbla, zlacpy, zlascl,
301 * ..
302 * .. Intrinsic Functions ..
303  INTRINSIC dble, max, sqrt
304 * ..
305 * .. Executable Statements ..
306 *
307 * Test the input parameters.
308 *
309  wantz = lsame( jobz, 'V' )
310  lower = lsame( uplo, 'L' )
311  lquery = ( lwork.EQ.-1 .OR. lrwork.EQ.-1 .OR. liwork.EQ.-1 )
312 *
313  info = 0
314  IF( .NOT.( lsame( jobz, 'N' ) ) ) THEN
315  info = -1
316  ELSE IF( .NOT.( lower .OR. lsame( uplo, 'U' ) ) ) THEN
317  info = -2
318  ELSE IF( n.LT.0 ) THEN
319  info = -3
320  ELSE IF( lda.LT.max( 1, n ) ) THEN
321  info = -5
322  END IF
323 *
324  IF( info.EQ.0 ) THEN
325  IF( n.LE.1 ) THEN
326  lwmin = 1
327  lrwmin = 1
328  liwmin = 1
329  ELSE
330  kd = ilaenv( 17, 'ZHETRD_2STAGE', jobz, n, -1, -1, -1 )
331  ib = ilaenv( 18, 'ZHETRD_2STAGE', jobz, n, kd, -1, -1 )
332  lhtrd = ilaenv( 19, 'ZHETRD_2STAGE', jobz, n, kd, ib, -1 )
333  lwtrd = ilaenv( 20, 'ZHETRD_2STAGE', jobz, n, kd, ib, -1 )
334  IF( wantz ) THEN
335  lwmin = 2*n + n*n
336  lrwmin = 1 + 5*n + 2*n**2
337  liwmin = 3 + 5*n
338  ELSE
339  lwmin = n + 1 + lhtrd + lwtrd
340  lrwmin = n
341  liwmin = 1
342  END IF
343  END IF
344  work( 1 ) = lwmin
345  rwork( 1 ) = lrwmin
346  iwork( 1 ) = liwmin
347 *
348  IF( lwork.LT.lwmin .AND. .NOT.lquery ) THEN
349  info = -8
350  ELSE IF( lrwork.LT.lrwmin .AND. .NOT.lquery ) THEN
351  info = -10
352  ELSE IF( liwork.LT.liwmin .AND. .NOT.lquery ) THEN
353  info = -12
354  END IF
355  END IF
356 *
357  IF( info.NE.0 ) THEN
358  CALL xerbla( 'ZHEEVD_2STAGE', -info )
359  RETURN
360  ELSE IF( lquery ) THEN
361  RETURN
362  END IF
363 *
364 * Quick return if possible
365 *
366  IF( n.EQ.0 )
367  $ RETURN
368 *
369  IF( n.EQ.1 ) THEN
370  w( 1 ) = dble( a( 1, 1 ) )
371  IF( wantz )
372  $ a( 1, 1 ) = cone
373  RETURN
374  END IF
375 *
376 * Get machine constants.
377 *
378  safmin = dlamch( 'Safe minimum' )
379  eps = dlamch( 'Precision' )
380  smlnum = safmin / eps
381  bignum = one / smlnum
382  rmin = sqrt( smlnum )
383  rmax = sqrt( bignum )
384 *
385 * Scale matrix to allowable range, if necessary.
386 *
387  anrm = zlanhe( 'M', uplo, n, a, lda, rwork )
388  iscale = 0
389  IF( anrm.GT.zero .AND. anrm.LT.rmin ) THEN
390  iscale = 1
391  sigma = rmin / anrm
392  ELSE IF( anrm.GT.rmax ) THEN
393  iscale = 1
394  sigma = rmax / anrm
395  END IF
396  IF( iscale.EQ.1 )
397  $ CALL zlascl( uplo, 0, 0, one, sigma, n, n, a, lda, info )
398 *
399 * Call ZHETRD_2STAGE to reduce Hermitian matrix to tridiagonal form.
400 *
401  inde = 1
402  indrwk = inde + n
403  llrwk = lrwork - indrwk + 1
404  indtau = 1
405  indhous = indtau + n
406  indwrk = indhous + lhtrd
407  llwork = lwork - indwrk + 1
408  indwk2 = indwrk + n*n
409  llwrk2 = lwork - indwk2 + 1
410 *
411  CALL zhetrd_2stage( jobz, uplo, n, a, lda, w, rwork( inde ),
412  $ work( indtau ), work( indhous ), lhtrd,
413  $ work( indwrk ), llwork, iinfo )
414 *
415 * For eigenvalues only, call DSTERF. For eigenvectors, first call
416 * ZSTEDC to generate the eigenvector matrix, WORK(INDWRK), of the
417 * tridiagonal matrix, then call ZUNMTR to multiply it to the
418 * Householder transformations represented as Householder vectors in
419 * A.
420 *
421  IF( .NOT.wantz ) THEN
422  CALL dsterf( n, w, rwork( inde ), info )
423  ELSE
424  CALL zstedc( 'I', n, w, rwork( inde ), work( indwrk ), n,
425  $ work( indwk2 ), llwrk2, rwork( indrwk ), llrwk,
426  $ iwork, liwork, info )
427  CALL zunmtr( 'L', uplo, 'N', n, n, a, lda, work( indtau ),
428  $ work( indwrk ), n, work( indwk2 ), llwrk2, iinfo )
429  CALL zlacpy( 'A', n, n, work( indwrk ), n, a, lda )
430  END IF
431 *
432 * If matrix was scaled, then rescale eigenvalues appropriately.
433 *
434  IF( iscale.EQ.1 ) THEN
435  IF( info.EQ.0 ) THEN
436  imax = n
437  ELSE
438  imax = info - 1
439  END IF
440  CALL dscal( imax, one / sigma, w, 1 )
441  END IF
442 *
443  work( 1 ) = lwmin
444  rwork( 1 ) = lrwmin
445  iwork( 1 ) = liwmin
446 *
447  RETURN
448 *
449 * End of ZHEEVD_2STAGE
450 *
451  END
subroutine zhetrd_2stage(VECT, UPLO, N, A, LDA, D, E, TAU, HOUS2, LHOUS2, WORK, LWORK, INFO)
ZHETRD_2STAGE
subroutine zheevd_2stage(JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, RWORK, LRWORK, IWORK, LIWORK, INFO)
ZHEEVD_2STAGE computes the eigenvalues and, optionally, the left and/or right eigenvectors for HE ma...
subroutine zstedc(COMPZ, N, D, E, Z, LDZ, WORK, LWORK, RWORK, LRWORK, IWORK, LIWORK, INFO)
ZSTEDC
Definition: zstedc.f:215
subroutine zlascl(TYPE, KL, KU, CFROM, CTO, M, N, A, LDA, INFO)
ZLASCL multiplies a general rectangular matrix by a real scalar defined as cto/cfrom.
Definition: zlascl.f:145
subroutine zlacpy(UPLO, M, N, A, LDA, B, LDB)
ZLACPY copies all or part of one two-dimensional array to another.
Definition: zlacpy.f:105
subroutine dsterf(N, D, E, INFO)
DSTERF
Definition: dsterf.f:88
subroutine zunmtr(SIDE, UPLO, TRANS, M, N, A, LDA, TAU, C, LDC, WORK, LWORK, INFO)
ZUNMTR
Definition: zunmtr.f:173
subroutine xerbla(SRNAME, INFO)
XERBLA
Definition: xerbla.f:62
subroutine dscal(N, DA, DX, INCX)
DSCAL
Definition: dscal.f:55