LAPACK  3.7.0
LAPACK: Linear Algebra PACKage
subroutine ssytrf_aa ( character  UPLO,
integer  N,
real, dimension( lda, * )  A,
integer  LDA,
integer, dimension( * )  IPIV,
real, dimension( * )  WORK,
integer  LWORK,
integer  INFO 
)

SSYTRF_AA

Download SSYTRF_AA + dependencies [TGZ] [ZIP] [TXT]

Purpose:
 SSYTRF_AA computes the factorization of a real symmetric matrix A
 using the Aasen's algorithm.  The form of the factorization is

    A = U*T*U**T  or  A = L*T*L**T

 where U (or L) is a product of permutation and unit upper (lower)
 triangular matrices, and T is a symmetric tridiagonal matrix.

 This is the blocked version of the algorithm, calling Level 3 BLAS.
Parameters
[in]UPLO
          UPLO is CHARACTER*1
          = 'U':  Upper triangle of A is stored;
          = 'L':  Lower triangle of A is stored.
[in]N
          N is INTEGER
          The order of the matrix A.  N >= 0.
[in,out]A
          A is REAL array, dimension (LDA,N)
          On entry, the symmetric matrix A.  If UPLO = 'U', the leading
          N-by-N upper triangular part of A contains the upper
          triangular part of the matrix A, and the strictly lower
          triangular part of A is not referenced.  If UPLO = 'L', the
          leading N-by-N lower triangular part of A contains the lower
          triangular part of the matrix A, and the strictly upper
          triangular part of A is not referenced.

          On exit, the tridiagonal matrix is stored in the diagonals
          and the subdiagonals of A just below (or above) the diagonals,
          and L is stored below (or above) the subdiaonals, when UPLO
          is 'L' (or 'U').
[in]LDA
          LDA is INTEGER
          The leading dimension of the array A.  LDA >= max(1,N).
[out]IPIV
          IPIV is INTEGER array, dimension (N)
          On exit, it contains the details of the interchanges, i.e.,
          the row and column k of A were interchanged with the
          row and column IPIV(k).
[out]WORK
          WORK is REAL array, dimension (MAX(1,LWORK))
          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
[in]LWORK
          LWORK is INTEGER
          The length of WORK.  LWORK >= MAX(1,2*N). For optimum performance
          LWORK >= N*(1+NB), where NB is the optimal blocksize.

          If LWORK = -1, then a workspace query is assumed; the routine
          only calculates the optimal size of the WORK array, returns
          this value as the first entry of the WORK array, and no error
          message related to LWORK is issued by XERBLA.
[out]INFO
          INFO is INTEGER
          = 0:  successful exit
          < 0:  if INFO = -i, the i-th argument had an illegal value
          > 0:  if INFO = i, D(i,i) is exactly zero.  The factorization
                has been completed, but the block diagonal matrix D is
                exactly singular, and division by zero will occur if it
                is used to solve a system of equations.
Author
Univ. of Tennessee
Univ. of California Berkeley
Univ. of Colorado Denver
NAG Ltd.
Date
December 2016

Definition at line 138 of file ssytrf_aa.f.

138 *
139 * -- LAPACK computational routine (version 3.7.0) --
140 * -- LAPACK is a software package provided by Univ. of Tennessee, --
141 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
142 * December 2016
143 *
144  IMPLICIT NONE
145 *
146 * .. Scalar Arguments ..
147  CHARACTER uplo
148  INTEGER n, lda, lwork, info
149 * ..
150 * .. Array Arguments ..
151  INTEGER ipiv( * )
152  REAL a( lda, * ), work( * )
153 * ..
154 *
155 * =====================================================================
156 * .. Parameters ..
157  REAL zero, one
158  parameter ( zero = 0.0e+0, one = 1.0e+0 )
159 *
160 * .. Local Scalars ..
161  LOGICAL lquery, upper
162  INTEGER j, lwkopt, iinfo
163  INTEGER nb, mj, nj, k1, k2, j1, j2, j3, jb
164  REAL alpha
165 * ..
166 * .. External Functions ..
167  LOGICAL lsame
168  INTEGER ilaenv
169  EXTERNAL lsame, ilaenv
170 * ..
171 * .. External Subroutines ..
172  EXTERNAL xerbla
173 * ..
174 * .. Intrinsic Functions ..
175  INTRINSIC max
176 * ..
177 * .. Executable Statements ..
178 *
179 * Determine the block size
180 *
181  nb = ilaenv( 1, 'SSYTRF', uplo, n, -1, -1, -1 )
182 *
183 * Test the input parameters.
184 *
185  info = 0
186  upper = lsame( uplo, 'U' )
187  lquery = ( lwork.EQ.-1 )
188  IF( .NOT.upper .AND. .NOT.lsame( uplo, 'L' ) ) THEN
189  info = -1
190  ELSE IF( n.LT.0 ) THEN
191  info = -2
192  ELSE IF( lda.LT.max( 1, n ) ) THEN
193  info = -4
194  ELSE IF( lwork.LT.max( 1, 2*n ) .AND. .NOT.lquery ) THEN
195  info = -7
196  END IF
197 *
198  IF( info.EQ.0 ) THEN
199  lwkopt = (nb+1)*n
200  work( 1 ) = lwkopt
201  END IF
202 *
203  IF( info.NE.0 ) THEN
204  CALL xerbla( 'SSYTRF_AA', -info )
205  RETURN
206  ELSE IF( lquery ) THEN
207  RETURN
208  END IF
209 *
210 * Quick return
211 *
212  IF ( n.EQ.0 ) THEN
213  RETURN
214  ENDIF
215  ipiv( 1 ) = 1
216  IF ( n.EQ.1 ) THEN
217  IF ( a( 1, 1 ).EQ.zero ) THEN
218  info = 1
219  END IF
220  RETURN
221  END IF
222 *
223 * Adjubst block size based on the workspace size
224 *
225  IF( lwork.LT.((1+nb)*n) ) THEN
226  nb = ( lwork-n ) / n
227  END IF
228 *
229  IF( upper ) THEN
230 *
231 * .....................................................
232 * Factorize A as L*D*L**T using the upper triangle of A
233 * .....................................................
234 *
235 * Copy first row A(1, 1:N) into H(1:n) (stored in WORK(1:N))
236 *
237  CALL scopy( n, a( 1, 1 ), lda, work( 1 ), 1 )
238 *
239 * J is the main loop index, increasing from 1 to N in steps of
240 * JB, where JB is the number of columns factorized by SLASYF;
241 * JB is either NB, or N-J+1 for the last block
242 *
243  j = 0
244  10 CONTINUE
245  IF( j.GE.n )
246  $ GO TO 20
247 *
248 * each step of the main loop
249 * J is the last column of the previous panel
250 * J1 is the first column of the current panel
251 * K1 identifies if the previous column of the panel has been
252 * explicitly stored, e.g., K1=1 for the first panel, and
253 * K1=0 for the rest
254 *
255  j1 = j + 1
256  jb = min( n-j1+1, nb )
257  k1 = max(1, j)-j
258 *
259 * Panel factorization
260 *
261  CALL slasyf_aa( uplo, 2-k1, n-j, jb,
262  $ a( max(1, j), j+1 ), lda,
263  $ ipiv( j+1 ), work, n, work( n*nb+1 ),
264  $ iinfo )
265  IF( (iinfo.GT.0) .AND. (info.EQ.0) ) THEN
266  info = iinfo+j
267  ENDIF
268 *
269 * Ajust IPIV and apply it back (J-th step picks (J+1)-th pivot)
270 *
271  DO j2 = j+2, min(n, j+jb+1)
272  ipiv( j2 ) = ipiv( j2 ) + j
273  IF( (j2.NE.ipiv(j2)) .AND. ((j1-k1).GT.2) ) THEN
274  CALL sswap( j1-k1-2, a( 1, j2 ), 1,
275  $ a( 1, ipiv(j2) ), 1 )
276  END IF
277  END DO
278  j = j + jb
279 *
280 * Trailing submatrix update, where
281 * the row A(J1-1, J2-1:N) stores U(J1, J2+1:N) and
282 * WORK stores the current block of the auxiriarly matrix H
283 *
284  IF( j.LT.n ) THEN
285 *
286 * If first panel and JB=1 (NB=1), then nothing to do
287 *
288  IF( j1.GT.1 .OR. jb.GT.1 ) THEN
289 *
290 * Merge rank-1 update with BLAS-3 update
291 *
292  alpha = a( j, j+1 )
293  a( j, j+1 ) = one
294  CALL scopy( n-j, a( j-1, j+1 ), lda,
295  $ work( (j+1-j1+1)+jb*n ), 1 )
296  CALL sscal( n-j, alpha, work( (j+1-j1+1)+jb*n ), 1 )
297 *
298 * K1 identifies if the previous column of the panel has been
299 * explicitly stored, e.g., K1=1 and K2= 0 for the first panel,
300 * while K1=0 and K2=1 for the rest
301 *
302  IF( j1.GT.1 ) THEN
303 *
304 * Not first panel
305 *
306  k2 = 1
307  ELSE
308 *
309 * First panel
310 *
311  k2 = 0
312 *
313 * First update skips the first column
314 *
315  jb = jb - 1
316  END IF
317 *
318  DO j2 = j+1, n, nb
319  nj = min( nb, n-j2+1 )
320 *
321 * Update (J2, J2) diagonal block with SGEMV
322 *
323  j3 = j2
324  DO mj = nj-1, 1, -1
325  CALL sgemv( 'No transpose', mj, jb+1,
326  $ -one, work( j3-j1+1+k1*n ), n,
327  $ a( j1-k2, j3 ), 1,
328  $ one, a( j3, j3 ), lda )
329  j3 = j3 + 1
330  END DO
331 *
332 * Update off-diagonal block of J2-th block row with SGEMM
333 *
334  CALL sgemm( 'Transpose', 'Transpose',
335  $ nj, n-j3+1, jb+1,
336  $ -one, a( j1-k2, j2 ), lda,
337  $ work( j3-j1+1+k1*n ), n,
338  $ one, a( j2, j3 ), lda )
339  END DO
340 *
341 * Recover T( J, J+1 )
342 *
343  a( j, j+1 ) = alpha
344  END IF
345 *
346 * WORK(J+1, 1) stores H(J+1, 1)
347 *
348  CALL scopy( n-j, a( j+1, j+1 ), lda, work( 1 ), 1 )
349  END IF
350  GO TO 10
351  ELSE
352 *
353 * .....................................................
354 * Factorize A as L*D*L**T using the lower triangle of A
355 * .....................................................
356 *
357 * copy first column A(1:N, 1) into H(1:N, 1)
358 * (stored in WORK(1:N))
359 *
360  CALL scopy( n, a( 1, 1 ), 1, work( 1 ), 1 )
361 *
362 * J is the main loop index, increasing from 1 to N in steps of
363 * JB, where JB is the number of columns factorized by SLASYF;
364 * JB is either NB, or N-J+1 for the last block
365 *
366  j = 0
367  11 CONTINUE
368  IF( j.GE.n )
369  $ GO TO 20
370 *
371 * each step of the main loop
372 * J is the last column of the previous panel
373 * J1 is the first column of the current panel
374 * K1 identifies if the previous column of the panel has been
375 * explicitly stored, e.g., K1=1 for the first panel, and
376 * K1=0 for the rest
377 *
378  j1 = j+1
379  jb = min( n-j1+1, nb )
380  k1 = max(1, j)-j
381 *
382 * Panel factorization
383 *
384  CALL slasyf_aa( uplo, 2-k1, n-j, jb,
385  $ a( j+1, max(1, j) ), lda,
386  $ ipiv( j+1 ), work, n, work( n*nb+1 ), iinfo)
387  IF( (iinfo.GT.0) .AND. (info.EQ.0) ) THEN
388  info = iinfo+j
389  ENDIF
390 *
391 * Ajust IPIV and apply it back (J-th step picks (J+1)-th pivot)
392 *
393  DO j2 = j+2, min(n, j+jb+1)
394  ipiv( j2 ) = ipiv( j2 ) + j
395  IF( (j2.NE.ipiv(j2)) .AND. ((j1-k1).GT.2) ) THEN
396  CALL sswap( j1-k1-2, a( j2, 1 ), lda,
397  $ a( ipiv(j2), 1 ), lda )
398  END IF
399  END DO
400  j = j + jb
401 *
402 * Trailing submatrix update, where
403 * A(J2+1, J1-1) stores L(J2+1, J1) and
404 * WORK(J2+1, 1) stores H(J2+1, 1)
405 *
406  IF( j.LT.n ) THEN
407 *
408 * if first panel and JB=1 (NB=1), then nothing to do
409 *
410  IF( j1.GT.1 .OR. jb.GT.1 ) THEN
411 *
412 * Merge rank-1 update with BLAS-3 update
413 *
414  alpha = a( j+1, j )
415  a( j+1, j ) = one
416  CALL scopy( n-j, a( j+1, j-1 ), 1,
417  $ work( (j+1-j1+1)+jb*n ), 1 )
418  CALL sscal( n-j, alpha, work( (j+1-j1+1)+jb*n ), 1 )
419 *
420 * K1 identifies if the previous column of the panel has been
421 * explicitly stored, e.g., K1=1 and K2= 0 for the first panel,
422 * while K1=0 and K2=1 for the rest
423 *
424  IF( j1.GT.1 ) THEN
425 *
426 * Not first panel
427 *
428  k2 = 1
429  ELSE
430 *
431 * First panel
432 *
433  k2 = 0
434 *
435 * First update skips the first column
436 *
437  jb = jb - 1
438  END IF
439 *
440  DO j2 = j+1, n, nb
441  nj = min( nb, n-j2+1 )
442 *
443 * Update (J2, J2) diagonal block with SGEMV
444 *
445  j3 = j2
446  DO mj = nj-1, 1, -1
447  CALL sgemv( 'No transpose', mj, jb+1,
448  $ -one, work( j3-j1+1+k1*n ), n,
449  $ a( j3, j1-k2 ), lda,
450  $ one, a( j3, j3 ), 1 )
451  j3 = j3 + 1
452  END DO
453 *
454 * Update off-diagonal block in J2-th block column with SGEMM
455 *
456  CALL sgemm( 'No transpose', 'Transpose',
457  $ n-j3+1, nj, jb+1,
458  $ -one, work( j3-j1+1+k1*n ), n,
459  $ a( j2, j1-k2 ), lda,
460  $ one, a( j3, j2 ), lda )
461  END DO
462 *
463 * Recover T( J+1, J )
464 *
465  a( j+1, j ) = alpha
466  END IF
467 *
468 * WORK(J+1, 1) stores H(J+1, 1)
469 *
470  CALL scopy( n-j, a( j+1, j+1 ), 1, work( 1 ), 1 )
471  END IF
472  GO TO 11
473  END IF
474 *
475  20 CONTINUE
476  RETURN
477 *
478 * End of SSYTRF_AA
479 *
subroutine sgemm(TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC)
SGEMM
Definition: sgemm.f:189
integer function ilaenv(ISPEC, NAME, OPTS, N1, N2, N3, N4)
ILAENV
Definition: tstiee.f:83
subroutine sgemv(TRANS, M, N, ALPHA, A, LDA, X, INCX, BETA, Y, INCY)
SGEMV
Definition: sgemv.f:158
subroutine xerbla(SRNAME, INFO)
XERBLA
Definition: xerbla.f:62
logical function lsame(CA, CB)
LSAME
Definition: lsame.f:55
subroutine sscal(N, SA, SX, INCX)
SSCAL
Definition: sscal.f:55
subroutine sswap(N, SX, INCX, SY, INCY)
SSWAP
Definition: sswap.f:53
subroutine scopy(N, SX, INCX, SY, INCY)
SCOPY
Definition: scopy.f:53
subroutine slasyf_aa(UPLO, J1, M, NB, A, LDA, IPIV, H, LDH, WORK, INFO)
SLASYF_AA
Definition: slasyf_aa.f:156

Here is the call graph for this function:

Here is the caller graph for this function: