LAPACK  3.7.0
LAPACK: Linear Algebra PACKage
chb2st_kernels.f
Go to the documentation of this file.
1 *> \brief \b CHB2ST_KERNELS
2 *
3 * @generated from zhb2st_kernels.f, fortran z -> c, Wed Dec 7 08:22:40 2016
4 *
5 * =========== DOCUMENTATION ===========
6 *
7 * Online html documentation available at
8 * http://www.netlib.org/lapack/explore-html/
9 *
10 *> \htmlonly
11 *> Download CHB2ST_KERNELS + dependencies
12 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/chb2st_kernels.f">
13 *> [TGZ]</a>
14 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/chb2st_kernels.f">
15 *> [ZIP]</a>
16 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/chb2st_kernels.f">
17 *> [TXT]</a>
18 *> \endhtmlonly
19 *
20 * Definition:
21 * ===========
22 *
23 * SUBROUTINE CHB2ST_KERNELS( UPLO, WANTZ, TTYPE,
24 * ST, ED, SWEEP, N, NB, IB,
25 * A, LDA, V, TAU, LDVT, WORK)
26 *
27 * IMPLICIT NONE
28 *
29 * .. Scalar Arguments ..
30 * CHARACTER UPLO
31 * LOGICAL WANTZ
32 * INTEGER TTYPE, ST, ED, SWEEP, N, NB, IB, LDA, LDVT
33 * ..
34 * .. Array Arguments ..
35 * COMPLEX A( LDA, * ), V( * ),
36 * TAU( * ), WORK( * )
37 *
38 *> \par Purpose:
39 * =============
40 *>
41 *> \verbatim
42 *>
43 *> CHB2ST_KERNELS is an internal routine used by the CHETRD_HB2ST
44 *> subroutine.
45 *> \endverbatim
46 *
47 * Arguments:
48 * ==========
49 *
50 *> @param[in] n
51 *> The order of the matrix A.
52 *>
53 *> @param[in] nb
54 *> The size of the band.
55 *>
56 *> @param[in, out] A
57 *> A pointer to the matrix A.
58 *>
59 *> @param[in] lda
60 *> The leading dimension of the matrix A.
61 *>
62 *> @param[out] V
63 *> COMPLEX array, dimension 2*n if eigenvalues only are
64 *> requested or to be queried for vectors.
65 *>
66 *> @param[out] TAU
67 *> COMPLEX array, dimension (2*n).
68 *> The scalar factors of the Householder reflectors are stored
69 *> in this array.
70 *>
71 *> @param[in] st
72 *> internal parameter for indices.
73 *>
74 *> @param[in] ed
75 *> internal parameter for indices.
76 *>
77 *> @param[in] sweep
78 *> internal parameter for indices.
79 *>
80 *> @param[in] Vblksiz
81 *> internal parameter for indices.
82 *>
83 *> @param[in] wantz
84 *> logical which indicate if Eigenvalue are requested or both
85 *> Eigenvalue/Eigenvectors.
86 *>
87 *> @param[in] work
88 *> Workspace of size nb.
89 *>
90 *> \par Further Details:
91 * =====================
92 *>
93 *> \verbatim
94 *>
95 *> Implemented by Azzam Haidar.
96 *>
97 *> All details are available on technical report, SC11, SC13 papers.
98 *>
99 *> Azzam Haidar, Hatem Ltaief, and Jack Dongarra.
100 *> Parallel reduction to condensed forms for symmetric eigenvalue problems
101 *> using aggregated fine-grained and memory-aware kernels. In Proceedings
102 *> of 2011 International Conference for High Performance Computing,
103 *> Networking, Storage and Analysis (SC '11), New York, NY, USA,
104 *> Article 8 , 11 pages.
105 *> http://doi.acm.org/10.1145/2063384.2063394
106 *>
107 *> A. Haidar, J. Kurzak, P. Luszczek, 2013.
108 *> An improved parallel singular value algorithm and its implementation
109 *> for multicore hardware, In Proceedings of 2013 International Conference
110 *> for High Performance Computing, Networking, Storage and Analysis (SC '13).
111 *> Denver, Colorado, USA, 2013.
112 *> Article 90, 12 pages.
113 *> http://doi.acm.org/10.1145/2503210.2503292
114 *>
115 *> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra.
116 *> A novel hybrid CPU-GPU generalized eigensolver for electronic structure
117 *> calculations based on fine-grained memory aware tasks.
118 *> International Journal of High Performance Computing Applications.
119 *> Volume 28 Issue 2, Pages 196-209, May 2014.
120 *> http://hpc.sagepub.com/content/28/2/196
121 *>
122 *> \endverbatim
123 *>
124 * =====================================================================
125  SUBROUTINE chb2st_kernels( UPLO, WANTZ, TTYPE,
126  $ st, ed, sweep, n, nb, ib,
127  $ a, lda, v, tau, ldvt, work)
128 *
129  IMPLICIT NONE
130 *
131 * -- LAPACK computational routine (version 3.7.0) --
132 * -- LAPACK is a software package provided by Univ. of Tennessee, --
133 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
134 * December 2016
135 *
136 * .. Scalar Arguments ..
137  CHARACTER UPLO
138  LOGICAL WANTZ
139  INTEGER TTYPE, ST, ED, SWEEP, N, NB, IB, LDA, LDVT
140 * ..
141 * .. Array Arguments ..
142  COMPLEX A( lda, * ), V( * ),
143  $ tau( * ), work( * )
144 * ..
145 *
146 * =====================================================================
147 *
148 * .. Parameters ..
149  COMPLEX ZERO, ONE
150  parameter ( zero = ( 0.0e+0, 0.0e+0 ),
151  $ one = ( 1.0e+0, 0.0e+0 ) )
152 * ..
153 * .. Local Scalars ..
154  LOGICAL UPPER
155  INTEGER I, J1, J2, LM, LN, VPOS, TAUPOS,
156  $ dpos, ofdpos, ajeter
157  COMPLEX CTMP
158 * ..
159 * .. External Subroutines ..
160  EXTERNAL clarfg, clarfx, clarfy
161 * ..
162 * .. Intrinsic Functions ..
163  INTRINSIC conjg, mod
164 * .. External Functions ..
165  LOGICAL LSAME
166  EXTERNAL lsame
167 * ..
168 * ..
169 * .. Executable Statements ..
170 *
171  ajeter = ib + ldvt
172  upper = lsame( uplo, 'U' )
173 
174  IF( upper ) THEN
175  dpos = 2 * nb + 1
176  ofdpos = 2 * nb
177  ELSE
178  dpos = 1
179  ofdpos = 2
180  ENDIF
181 
182 *
183 * Upper case
184 *
185  IF( upper ) THEN
186 *
187  IF( wantz ) THEN
188  vpos = mod( sweep-1, 2 ) * n + st
189  taupos = mod( sweep-1, 2 ) * n + st
190  ELSE
191  vpos = mod( sweep-1, 2 ) * n + st
192  taupos = mod( sweep-1, 2 ) * n + st
193  ENDIF
194 *
195  IF( ttype.EQ.1 ) THEN
196  lm = ed - st + 1
197 *
198  v( vpos ) = one
199  DO 10 i = 1, lm-1
200  v( vpos+i ) = conjg( a( ofdpos-i, st+i ) )
201  a( ofdpos-i, st+i ) = zero
202  10 CONTINUE
203  ctmp = conjg( a( ofdpos, st ) )
204  CALL clarfg( lm, ctmp, v( vpos+1 ), 1,
205  $ tau( taupos ) )
206  a( ofdpos, st ) = ctmp
207 *
208  lm = ed - st + 1
209  CALL clarfy( uplo, lm, v( vpos ), 1,
210  $ conjg( tau( taupos ) ),
211  $ a( dpos, st ), lda-1, work)
212  ENDIF
213 *
214  IF( ttype.EQ.3 ) THEN
215 *
216  lm = ed - st + 1
217  CALL clarfy( uplo, lm, v( vpos ), 1,
218  $ conjg( tau( taupos ) ),
219  $ a( dpos, st ), lda-1, work)
220  ENDIF
221 *
222  IF( ttype.EQ.2 ) THEN
223  j1 = ed+1
224  j2 = min( ed+nb, n )
225  ln = ed-st+1
226  lm = j2-j1+1
227  IF( lm.GT.0) THEN
228  CALL clarfx( 'Left', ln, lm, v( vpos ),
229  $ conjg( tau( taupos ) ),
230  $ a( dpos-nb, j1 ), lda-1, work)
231 *
232  IF( wantz ) THEN
233  vpos = mod( sweep-1, 2 ) * n + j1
234  taupos = mod( sweep-1, 2 ) * n + j1
235  ELSE
236  vpos = mod( sweep-1, 2 ) * n + j1
237  taupos = mod( sweep-1, 2 ) * n + j1
238  ENDIF
239 *
240  v( vpos ) = one
241  DO 30 i = 1, lm-1
242  v( vpos+i ) =
243  $ conjg( a( dpos-nb-i, j1+i ) )
244  a( dpos-nb-i, j1+i ) = zero
245  30 CONTINUE
246  ctmp = conjg( a( dpos-nb, j1 ) )
247  CALL clarfg( lm, ctmp, v( vpos+1 ), 1, tau( taupos ) )
248  a( dpos-nb, j1 ) = ctmp
249 *
250  CALL clarfx( 'Right', ln-1, lm, v( vpos ),
251  $ tau( taupos ),
252  $ a( dpos-nb+1, j1 ), lda-1, work)
253  ENDIF
254  ENDIF
255 *
256 * Lower case
257 *
258  ELSE
259 *
260  IF( wantz ) THEN
261  vpos = mod( sweep-1, 2 ) * n + st
262  taupos = mod( sweep-1, 2 ) * n + st
263  ELSE
264  vpos = mod( sweep-1, 2 ) * n + st
265  taupos = mod( sweep-1, 2 ) * n + st
266  ENDIF
267 *
268  IF( ttype.EQ.1 ) THEN
269  lm = ed - st + 1
270 *
271  v( vpos ) = one
272  DO 20 i = 1, lm-1
273  v( vpos+i ) = a( ofdpos+i, st-1 )
274  a( ofdpos+i, st-1 ) = zero
275  20 CONTINUE
276  CALL clarfg( lm, a( ofdpos, st-1 ), v( vpos+1 ), 1,
277  $ tau( taupos ) )
278 *
279  lm = ed - st + 1
280 *
281  CALL clarfy( uplo, lm, v( vpos ), 1,
282  $ conjg( tau( taupos ) ),
283  $ a( dpos, st ), lda-1, work)
284 
285  ENDIF
286 *
287  IF( ttype.EQ.3 ) THEN
288  lm = ed - st + 1
289 *
290  CALL clarfy( uplo, lm, v( vpos ), 1,
291  $ conjg( tau( taupos ) ),
292  $ a( dpos, st ), lda-1, work)
293 
294  ENDIF
295 *
296  IF( ttype.EQ.2 ) THEN
297  j1 = ed+1
298  j2 = min( ed+nb, n )
299  ln = ed-st+1
300  lm = j2-j1+1
301 *
302  IF( lm.GT.0) THEN
303  CALL clarfx( 'Right', lm, ln, v( vpos ),
304  $ tau( taupos ), a( dpos+nb, st ),
305  $ lda-1, work)
306 *
307  IF( wantz ) THEN
308  vpos = mod( sweep-1, 2 ) * n + j1
309  taupos = mod( sweep-1, 2 ) * n + j1
310  ELSE
311  vpos = mod( sweep-1, 2 ) * n + j1
312  taupos = mod( sweep-1, 2 ) * n + j1
313  ENDIF
314 *
315  v( vpos ) = one
316  DO 40 i = 1, lm-1
317  v( vpos+i ) = a( dpos+nb+i, st )
318  a( dpos+nb+i, st ) = zero
319  40 CONTINUE
320  CALL clarfg( lm, a( dpos+nb, st ), v( vpos+1 ), 1,
321  $ tau( taupos ) )
322 *
323  CALL clarfx( 'Left', lm, ln-1, v( vpos ),
324  $ conjg( tau( taupos ) ),
325  $ a( dpos+nb-1, st+1 ), lda-1, work)
326 
327  ENDIF
328  ENDIF
329  ENDIF
330 *
331  RETURN
332 *
333 * END OF CHB2ST_KERNELS
334 *
335  END
subroutine clarfx(SIDE, M, N, V, TAU, C, LDC, WORK)
CLARFX applies an elementary reflector to a general rectangular matrix, with loop unrolling when the ...
Definition: clarfx.f:121
subroutine clarfg(N, ALPHA, X, INCX, TAU)
CLARFG generates an elementary reflector (Householder matrix).
Definition: clarfg.f:108
subroutine chb2st_kernels(UPLO, WANTZ, TTYPE, ST, ED, SWEEP, N, NB, IB, A, LDA, V, TAU, LDVT, WORK)
CHB2ST_KERNELS
subroutine clarfy(UPLO, N, V, INCV, TAU, C, LDC, WORK)
CLARFY
Definition: clarfy.f:110