LAPACK  3.7.0
LAPACK: Linear Algebra PACKage
subroutine chb2st_kernels ( character  UPLO,
logical  WANTZ,
integer  TTYPE,
integer  ST,
integer  ED,
integer  SWEEP,
integer  N,
integer  NB,
integer  IB,
complex, dimension( lda, * )  A,
integer  LDA,
complex, dimension( * )  V,
complex, dimension( * )  TAU,
integer  LDVT,
complex, dimension( * )  WORK 
)

CHB2ST_KERNELS

Download CHB2ST_KERNELS + dependencies [TGZ] [ZIP] [TXT]

Purpose:
 CHB2ST_KERNELS is an internal routine used by the CHETRD_HB2ST
 subroutine.
Parameters
[in]nThe order of the matrix A.
[in]nbThe size of the band.
[in,out]AA pointer to the matrix A.
[in]ldaThe leading dimension of the matrix A.
[out]VCOMPLEX array, dimension 2*n if eigenvalues only are requested or to be queried for vectors.
[out]TAUCOMPLEX array, dimension (2*n). The scalar factors of the Householder reflectors are stored in this array.
[in]stinternal parameter for indices.
[in]edinternal parameter for indices.
[in]sweepinternal parameter for indices.
[in]Vblksizinternal parameter for indices.
[in]wantzlogical which indicate if Eigenvalue are requested or both Eigenvalue/Eigenvectors.
[in]workWorkspace of size nb.
Further Details:
  Implemented by Azzam Haidar.

  All details are available on technical report, SC11, SC13 papers.

  Azzam Haidar, Hatem Ltaief, and Jack Dongarra.
  Parallel reduction to condensed forms for symmetric eigenvalue problems
  using aggregated fine-grained and memory-aware kernels. In Proceedings
  of 2011 International Conference for High Performance Computing,
  Networking, Storage and Analysis (SC '11), New York, NY, USA,
  Article 8 , 11 pages.
  http://doi.acm.org/10.1145/2063384.2063394

  A. Haidar, J. Kurzak, P. Luszczek, 2013.
  An improved parallel singular value algorithm and its implementation 
  for multicore hardware, In Proceedings of 2013 International Conference
  for High Performance Computing, Networking, Storage and Analysis (SC '13).
  Denver, Colorado, USA, 2013.
  Article 90, 12 pages.
  http://doi.acm.org/10.1145/2503210.2503292

  A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra.
  A novel hybrid CPU-GPU generalized eigensolver for electronic structure 
  calculations based on fine-grained memory aware tasks.
  International Journal of High Performance Computing Applications.
  Volume 28 Issue 2, Pages 196-209, May 2014.
  http://hpc.sagepub.com/content/28/2/196 

Definition at line 128 of file chb2st_kernels.f.

128 *
129  IMPLICIT NONE
130 *
131 * -- LAPACK computational routine (version 3.7.0) --
132 * -- LAPACK is a software package provided by Univ. of Tennessee, --
133 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
134 * December 2016
135 *
136 * .. Scalar Arguments ..
137  CHARACTER uplo
138  LOGICAL wantz
139  INTEGER ttype, st, ed, sweep, n, nb, ib, lda, ldvt
140 * ..
141 * .. Array Arguments ..
142  COMPLEX a( lda, * ), v( * ),
143  $ tau( * ), work( * )
144 * ..
145 *
146 * =====================================================================
147 *
148 * .. Parameters ..
149  COMPLEX zero, one
150  parameter ( zero = ( 0.0e+0, 0.0e+0 ),
151  $ one = ( 1.0e+0, 0.0e+0 ) )
152 * ..
153 * .. Local Scalars ..
154  LOGICAL upper
155  INTEGER i, j1, j2, lm, ln, vpos, taupos,
156  $ dpos, ofdpos, ajeter
157  COMPLEX ctmp
158 * ..
159 * .. External Subroutines ..
160  EXTERNAL clarfg, clarfx, clarfy
161 * ..
162 * .. Intrinsic Functions ..
163  INTRINSIC conjg, mod
164 * .. External Functions ..
165  LOGICAL lsame
166  EXTERNAL lsame
167 * ..
168 * ..
169 * .. Executable Statements ..
170 *
171  ajeter = ib + ldvt
172  upper = lsame( uplo, 'U' )
173 
174  IF( upper ) THEN
175  dpos = 2 * nb + 1
176  ofdpos = 2 * nb
177  ELSE
178  dpos = 1
179  ofdpos = 2
180  ENDIF
181 
182 *
183 * Upper case
184 *
185  IF( upper ) THEN
186 *
187  IF( wantz ) THEN
188  vpos = mod( sweep-1, 2 ) * n + st
189  taupos = mod( sweep-1, 2 ) * n + st
190  ELSE
191  vpos = mod( sweep-1, 2 ) * n + st
192  taupos = mod( sweep-1, 2 ) * n + st
193  ENDIF
194 *
195  IF( ttype.EQ.1 ) THEN
196  lm = ed - st + 1
197 *
198  v( vpos ) = one
199  DO 10 i = 1, lm-1
200  v( vpos+i ) = conjg( a( ofdpos-i, st+i ) )
201  a( ofdpos-i, st+i ) = zero
202  10 CONTINUE
203  ctmp = conjg( a( ofdpos, st ) )
204  CALL clarfg( lm, ctmp, v( vpos+1 ), 1,
205  $ tau( taupos ) )
206  a( ofdpos, st ) = ctmp
207 *
208  lm = ed - st + 1
209  CALL clarfy( uplo, lm, v( vpos ), 1,
210  $ conjg( tau( taupos ) ),
211  $ a( dpos, st ), lda-1, work)
212  ENDIF
213 *
214  IF( ttype.EQ.3 ) THEN
215 *
216  lm = ed - st + 1
217  CALL clarfy( uplo, lm, v( vpos ), 1,
218  $ conjg( tau( taupos ) ),
219  $ a( dpos, st ), lda-1, work)
220  ENDIF
221 *
222  IF( ttype.EQ.2 ) THEN
223  j1 = ed+1
224  j2 = min( ed+nb, n )
225  ln = ed-st+1
226  lm = j2-j1+1
227  IF( lm.GT.0) THEN
228  CALL clarfx( 'Left', ln, lm, v( vpos ),
229  $ conjg( tau( taupos ) ),
230  $ a( dpos-nb, j1 ), lda-1, work)
231 *
232  IF( wantz ) THEN
233  vpos = mod( sweep-1, 2 ) * n + j1
234  taupos = mod( sweep-1, 2 ) * n + j1
235  ELSE
236  vpos = mod( sweep-1, 2 ) * n + j1
237  taupos = mod( sweep-1, 2 ) * n + j1
238  ENDIF
239 *
240  v( vpos ) = one
241  DO 30 i = 1, lm-1
242  v( vpos+i ) =
243  $ conjg( a( dpos-nb-i, j1+i ) )
244  a( dpos-nb-i, j1+i ) = zero
245  30 CONTINUE
246  ctmp = conjg( a( dpos-nb, j1 ) )
247  CALL clarfg( lm, ctmp, v( vpos+1 ), 1, tau( taupos ) )
248  a( dpos-nb, j1 ) = ctmp
249 *
250  CALL clarfx( 'Right', ln-1, lm, v( vpos ),
251  $ tau( taupos ),
252  $ a( dpos-nb+1, j1 ), lda-1, work)
253  ENDIF
254  ENDIF
255 *
256 * Lower case
257 *
258  ELSE
259 *
260  IF( wantz ) THEN
261  vpos = mod( sweep-1, 2 ) * n + st
262  taupos = mod( sweep-1, 2 ) * n + st
263  ELSE
264  vpos = mod( sweep-1, 2 ) * n + st
265  taupos = mod( sweep-1, 2 ) * n + st
266  ENDIF
267 *
268  IF( ttype.EQ.1 ) THEN
269  lm = ed - st + 1
270 *
271  v( vpos ) = one
272  DO 20 i = 1, lm-1
273  v( vpos+i ) = a( ofdpos+i, st-1 )
274  a( ofdpos+i, st-1 ) = zero
275  20 CONTINUE
276  CALL clarfg( lm, a( ofdpos, st-1 ), v( vpos+1 ), 1,
277  $ tau( taupos ) )
278 *
279  lm = ed - st + 1
280 *
281  CALL clarfy( uplo, lm, v( vpos ), 1,
282  $ conjg( tau( taupos ) ),
283  $ a( dpos, st ), lda-1, work)
284 
285  ENDIF
286 *
287  IF( ttype.EQ.3 ) THEN
288  lm = ed - st + 1
289 *
290  CALL clarfy( uplo, lm, v( vpos ), 1,
291  $ conjg( tau( taupos ) ),
292  $ a( dpos, st ), lda-1, work)
293 
294  ENDIF
295 *
296  IF( ttype.EQ.2 ) THEN
297  j1 = ed+1
298  j2 = min( ed+nb, n )
299  ln = ed-st+1
300  lm = j2-j1+1
301 *
302  IF( lm.GT.0) THEN
303  CALL clarfx( 'Right', lm, ln, v( vpos ),
304  $ tau( taupos ), a( dpos+nb, st ),
305  $ lda-1, work)
306 *
307  IF( wantz ) THEN
308  vpos = mod( sweep-1, 2 ) * n + j1
309  taupos = mod( sweep-1, 2 ) * n + j1
310  ELSE
311  vpos = mod( sweep-1, 2 ) * n + j1
312  taupos = mod( sweep-1, 2 ) * n + j1
313  ENDIF
314 *
315  v( vpos ) = one
316  DO 40 i = 1, lm-1
317  v( vpos+i ) = a( dpos+nb+i, st )
318  a( dpos+nb+i, st ) = zero
319  40 CONTINUE
320  CALL clarfg( lm, a( dpos+nb, st ), v( vpos+1 ), 1,
321  $ tau( taupos ) )
322 *
323  CALL clarfx( 'Left', lm, ln-1, v( vpos ),
324  $ conjg( tau( taupos ) ),
325  $ a( dpos+nb-1, st+1 ), lda-1, work)
326 
327  ENDIF
328  ENDIF
329  ENDIF
330 *
331  RETURN
332 *
333 * END OF CHB2ST_KERNELS
334 *
subroutine clarfx(SIDE, M, N, V, TAU, C, LDC, WORK)
CLARFX applies an elementary reflector to a general rectangular matrix, with loop unrolling when the ...
Definition: clarfx.f:121
subroutine clarfg(N, ALPHA, X, INCX, TAU)
CLARFG generates an elementary reflector (Householder matrix).
Definition: clarfg.f:108
logical function lsame(CA, CB)
LSAME
Definition: lsame.f:55
subroutine clarfy(UPLO, N, V, INCV, TAU, C, LDC, WORK)
CLARFY
Definition: clarfy.f:110

Here is the call graph for this function: