GNU Octave  4.0.0
A high-level interpreted language, primarily intended for numerical computations, mostly compatible with Matlab
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
__ilu__.cc
Go to the documentation of this file.
1 /*
2 
3 Copyright (C) 2014-2015 Eduardo Ramos Fernández <[email protected]>
4 Copyright (C) 2013-2015 Kai T. Ohlhus <[email protected]>
5 
6 This file is part of Octave.
7 
8 Octave is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 3 of the License, or (at your
11 option) any later version.
12 
13 Octave is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17 
18 You should have received a copy of the GNU General Public License
19 along with Octave; see the file COPYING. If not, see
20 <http://www.gnu.org/licenses/>.
21 
22 */
23 
24 #ifdef HAVE_CONFIG_H
25 #include <config.h>
26 #endif
27 
28 #include "oct-locbuf.h"
29 
30 #include "defun.h"
31 #include "error.h"
32 #include "parse.h"
33 
34 // That function implements the IKJ and JKI variants of Gaussian elimination to
35 // perform the ILUTP decomposition. The behaviour is controlled by milu
36 // parameter. If milu = ['off'|'col'] the JKI version is performed taking
37 // advantage of CCS format of the input matrix. If milu = 'row' the input
38 // matrix has to be transposed to obtain the equivalent CRS structure so we can
39 // work efficiently with rows. In this case IKJ version is used.
40 template <typename octave_matrix_t, typename T>
41 void ilu_0 (octave_matrix_t& sm, const std::string milu = "off")
42 {
43 
44  const octave_idx_type n = sm.cols ();
47  octave_idx_type j1, j2, jrow, jw, i, k, jj;
48  T tl, r;
49 
50  enum {OFF, ROW, COL};
51  char opt;
52  if (milu == "row")
53  {
54  opt = ROW;
55  sm = sm.transpose ();
56  }
57  else if (milu == "col")
58  opt = COL;
59  else
60  opt = OFF;
61 
62  octave_idx_type* cidx = sm.cidx ();
63  octave_idx_type* ridx = sm.ridx ();
64  T* data = sm.data ();
65  for (i = 0; i < n; i++)
66  iw[i] = -1;
67  for (k = 0; k < n; k++)
68  {
69  j1 = cidx[k];
70  j2 = cidx[k+1] - 1;
72  for (j = j1; j <= j2; j++)
73  {
74  iw[ridx[j]] = j;
75  }
76  r = 0;
77  j = j1;
78  jrow = ridx[j];
79  while ((jrow < k) && (j <= j2))
80  {
81  if (opt == ROW)
82  {
83  tl = data[j] / data[uptr[jrow]];
84  data[j] = tl;
85  }
86  for (jj = uptr[jrow] + 1; jj < cidx[jrow+1]; jj++)
87  {
88  jw = iw[ridx[jj]];
89  if (jw != -1)
90  if (opt == ROW)
91  data[jw] -= tl * data[jj];
92  else
93  data[jw] -= data[j] * data[jj];
94 
95  else
96  // That is for the milu='row'
97  if (opt == ROW)
98  r += tl * data[jj];
99  else if (opt == COL)
100  r += data[j] * data[jj];
101  }
102  j++;
103  jrow = ridx[j];
104  }
105  uptr[k] = j;
106  if (opt != OFF)
107  data[uptr[k]] -= r;
108  if (opt != ROW)
109  for (jj = uptr[k] + 1; jj < cidx[k+1]; jj++)
110  data[jj] /= data[uptr[k]];
111  if (k != jrow)
112  {
113  error ("ilu: A has a zero on the diagonal");
114  break;
115  }
116 
117  if (data[j] == T(0))
118  {
119  error ("ilu: encountered a pivot equal to 0");
120  break;
121  }
122  for (i = j1; i <= j2; i++)
123  iw[ridx[i]] = -1;
124  }
125  if (opt == ROW)
126  sm = sm.transpose ();
127 }
128 
129 DEFUN (__ilu0__, args, nargout,
130  "-*- texinfo -*-\n\
131 @deftypefn {Built-in Function} {[@var{L}, @var{U}] =} __ilu0__ (@var{A})\n\
132 @deftypefnx {Built-in Function} {[@var{L}, @var{U}] =} __ilu0__ (@var{A}, @var{milu})\n\
133 @deftypefnx {Built-in Function} {[@var{L}, @var{U}, @var{P}] =} __ilu0__ (@var{A}, @dots{})\n\
134 Undocumented internal function.\n\
135 @end deftypefn")
136 {
137  octave_value_list retval;
138 
139  int nargin = args.length ();
140  std::string milu;
141 
142  if (nargout > 2 || nargin < 1 || nargin > 2)
143  {
144  print_usage ();
145  return retval;
146  }
147 
148  // In ILU0 algorithm the zero-pattern of the input matrix is preserved so
149  // it's structure does not change during the algorithm. The same input
150  // matrix is used to build the output matrix due to that fact.
151  octave_value_list param_list;
152  if (! args(0).is_complex_type ())
153  {
154  SparseMatrix sm = args(0).sparse_matrix_value ();
155  ilu_0 <SparseMatrix, double> (sm, milu);
156  if (!error_state)
157  {
158  param_list.append (sm);
159  retval(1) = feval ("triu", param_list)(0).sparse_matrix_value ();
160  SparseMatrix eye =
161  feval ("speye", octave_value_list (
162  octave_value (sm.cols ())))(0).sparse_matrix_value ();
163  param_list.append (-1);
164  retval(0) = eye +
165  feval ("tril", param_list)(0).sparse_matrix_value ();
166  }
167  }
168  else
169  {
170  SparseComplexMatrix sm = args(0).sparse_complex_matrix_value ();
171  ilu_0 <SparseComplexMatrix, Complex> (sm, milu);
172  if (! error_state)
173  {
174  param_list.append (sm);
175  retval(1) =
176  feval ("triu", param_list)(0).sparse_complex_matrix_value ();
177  SparseComplexMatrix eye =
178  feval ("speye", octave_value_list (
179  octave_value (sm.cols ())))(0).sparse_complex_matrix_value ();
180  param_list.append (-1);
181  retval(0) =
182  eye + feval ("tril", param_list)(0).sparse_complex_matrix_value ();
183  }
184  }
185 
186  return retval;
187 }
188 
189 template <typename octave_matrix_t, typename T>
190 void ilu_crout (octave_matrix_t& sm_l, octave_matrix_t& sm_u,
191  octave_matrix_t& L, octave_matrix_t& U, T* cols_norm,
192  T* rows_norm, const T droptol = 0,
193  const std::string milu = "off")
194 {
195 
196  // Map the strings into chars for faster comparing inside loops
197  char opt;
198  enum {OFF, ROW, COL};
199  if (milu == "row")
200  opt = ROW;
201  else if (milu == "col")
202  opt = COL;
203  else
204  opt = OFF;
205 
206  octave_idx_type jrow, i, j, k, jj, total_len_l, total_len_u, max_len_u,
207  max_len_l, w_len_u, w_len_l, cols_list_len, rows_list_len;
208 
209  const octave_idx_type n = sm_u.cols ();
210  sm_u = sm_u.transpose ();
211 
212  max_len_u = sm_u.nnz ();
213  max_len_u += (0.1 * max_len_u) > n ? 0.1 * max_len_u : n;
214  max_len_l = sm_l.nnz ();
215  max_len_l += (0.1 * max_len_l) > n ? 0.1 * max_len_l : n;
216  // Extract pointers to the arrays for faster access inside loops
217  octave_idx_type* cidx_in_u = sm_u.cidx ();
218  octave_idx_type* ridx_in_u = sm_u.ridx ();
219  T* data_in_u = sm_u.data ();
220  octave_idx_type* cidx_in_l = sm_l.cidx ();
221  octave_idx_type* ridx_in_l = sm_l.ridx ();
222  T* data_in_l = sm_l.data ();
223 
224  // L output arrays
225  Array <octave_idx_type> ridx_out_l (dim_vector (max_len_l, 1));
226  octave_idx_type* ridx_l = ridx_out_l.fortran_vec ();
227  Array <T> data_out_l (dim_vector (max_len_l, 1));
228  T* data_l = data_out_l.fortran_vec ();
229 
230  // U output arrays
231  Array <octave_idx_type> ridx_out_u (dim_vector (max_len_u, 1));
232  octave_idx_type* ridx_u = ridx_out_u.fortran_vec ();
233  Array <T> data_out_u (dim_vector (max_len_u, 1));
234  T* data_u = data_out_u.fortran_vec ();
235 
236  // Working arrays
237  OCTAVE_LOCAL_BUFFER (octave_idx_type, cidx_l, n + 1);
238  OCTAVE_LOCAL_BUFFER (octave_idx_type, cidx_u, n + 1);
239  OCTAVE_LOCAL_BUFFER (octave_idx_type, cols_list, n);
240  OCTAVE_LOCAL_BUFFER (octave_idx_type, rows_list, n);
241  OCTAVE_LOCAL_BUFFER (T, w_data_l, n);
242  OCTAVE_LOCAL_BUFFER (T, w_data_u, n);
245  OCTAVE_LOCAL_BUFFER (T, cr_sum, n);
246 
247  T zero = T (0);
248 
249  cidx_u[0] = cidx_in_u[0];
250  cidx_l[0] = cidx_in_l[0];
251  for (i = 0; i < n; i++)
252  {
253  w_data_u[i] = zero;
254  w_data_l[i] = zero;
255  cr_sum[i] = zero;
256  }
257 
258  total_len_u = 0;
259  total_len_l = 0;
260  cols_list_len = 0;
261  rows_list_len = 0;
262 
263  for (k = 0; k < n; k++)
264  {
265  // Load the working column and working row
266  for (i = cidx_in_l[k]; i < cidx_in_l[k+1]; i++)
267  w_data_l[ridx_in_l[i]] = data_in_l[i];
268 
269  for (i = cidx_in_u[k]; i < cidx_in_u[k+1]; i++)
270  w_data_u[ridx_in_u[i]] = data_in_u[i];
271 
272  // Update U working row
273  for (j = 0; j < rows_list_len; j++)
274  {
275  if ((Ufirst[rows_list[j]] != -1))
276  for (jj = Ufirst[rows_list[j]]; jj < cidx_u[rows_list[j]+1]; jj++)
277  {
278  jrow = ridx_u[jj];
279  w_data_u[jrow] -= data_u[jj] * data_l[Lfirst[rows_list[j]]];
280  }
281  }
282  // Update L working column
283  for (j = 0; j < cols_list_len; j++)
284  {
285  if (Lfirst[cols_list[j]] != -1)
286  for (jj = Lfirst[cols_list[j]]; jj < cidx_l[cols_list[j]+1]; jj++)
287  {
288  jrow = ridx_l[jj];
289  w_data_l[jrow] -= data_l[jj] * data_u[Ufirst[cols_list[j]]];
290  }
291  }
292 
293  if ((max_len_u - total_len_u) < n)
294  {
295  max_len_u += (0.1 * max_len_u) > n ? 0.1 * max_len_u : n;
296  data_out_u.resize (dim_vector (max_len_u, 1));
297  data_u = data_out_u.fortran_vec ();
298  ridx_out_u.resize (dim_vector (max_len_u, 1));
299  ridx_u = ridx_out_u.fortran_vec ();
300  }
301 
302  if ((max_len_l - total_len_l) < n)
303  {
304  max_len_l += (0.1 * max_len_l) > n ? 0.1 * max_len_l : n;
305  data_out_l.resize (dim_vector (max_len_l, 1));
306  data_l = data_out_l.fortran_vec ();
307  ridx_out_l.resize (dim_vector (max_len_l, 1));
308  ridx_l = ridx_out_l.fortran_vec ();
309  }
310 
311  // Expand the working row into the U output data structures
312  w_len_l = 0;
313  data_u[total_len_u] = w_data_u[k];
314  ridx_u[total_len_u] = k;
315  w_len_u = 1;
316  for (i = k + 1; i < n; i++)
317  {
318  if (w_data_u[i] != zero)
319  {
320  if (std::abs (w_data_u[i]) < (droptol * rows_norm[k]))
321  {
322  if (opt == ROW)
323  cr_sum[k] += w_data_u[i];
324  else if (opt == COL)
325  cr_sum[i] += w_data_u[i];
326  }
327  else
328  {
329  data_u[total_len_u + w_len_u] = w_data_u[i];
330  ridx_u[total_len_u + w_len_u] = i;
331  w_len_u++;
332  }
333  }
334 
335  // Expand the working column into the L output data structures
336  if (w_data_l[i] != zero)
337  {
338  if (std::abs (w_data_l[i]) < (droptol * cols_norm[k]))
339  {
340  if (opt == COL)
341  cr_sum[k] += w_data_l[i];
342  else if (opt == ROW)
343  cr_sum[i] += w_data_l[i];
344  }
345  else
346  {
347  data_l[total_len_l + w_len_l] = w_data_l[i];
348  ridx_l[total_len_l + w_len_l] = i;
349  w_len_l++;
350  }
351  }
352  w_data_u[i] = zero;
353  w_data_l[i] = zero;
354  }
355 
356  // Compensate row and column sums --> milu option
357  if (opt == COL || opt == ROW)
358  data_u[total_len_u] += cr_sum[k];
359 
360  // Check if the pivot is zero
361  if (data_u[total_len_u] == zero)
362  {
363  error ("ilu: encountered a pivot equal to 0");
364  break;
365  }
366 
367  // Scale the elements in L by the pivot
368  for (i = total_len_l ; i < (total_len_l + w_len_l); i++)
369  data_l[i] /= data_u[total_len_u];
370 
371 
372  total_len_u += w_len_u;
373  total_len_l += w_len_l;
374  // Check if there are too many elements to be indexed with
375  // octave_idx_type type due to fill-in during the process.
376  if (total_len_l < 0 || total_len_u < 0)
377  {
378  error ("ilu: integer overflow. Too many fill-in elements in L or U");
379  break;
380  }
381  cidx_u[k+1] = cidx_u[k] - cidx_u[0] + w_len_u;
382  cidx_l[k+1] = cidx_l[k] - cidx_l[0] + w_len_l;
383 
384  // The tricky part of the algorithm. The arrays pointing to the first
385  // working element of each column in the next iteration (Lfirst) or
386  // the first working element of each row (Ufirst) are updated. Also the
387  // arrays working as lists cols_list and rows_list are filled with
388  // indices pointing to Ufirst and Lfirst respectively.
389  // TODO: Maybe the -1 indicating in Ufirst and Lfirst, that no elements
390  // have to be considered in a certain column or row in next iteration,
391  // can be removed. It feels safer to me using such an indicator.
392  if (k < (n - 1))
393  {
394  if (w_len_u > 0)
395  Ufirst[k] = cidx_u[k];
396  else
397  Ufirst[k] = -1;
398  if (w_len_l > 0)
399  Lfirst[k] = cidx_l[k];
400  else
401  Lfirst[k] = -1;
402  cols_list_len = 0;
403  rows_list_len = 0;
404  for (i = 0; i <= k; i++)
405  {
406  if (Ufirst[i] != -1)
407  {
408  jj = ridx_u[Ufirst[i]];
409  if (jj < (k + 1))
410  {
411  if (Ufirst[i] < (cidx_u[i+1]))
412  {
413  Ufirst[i]++;
414  if (Ufirst[i] == cidx_u[i+1])
415  Ufirst[i] = -1;
416  else
417  jj = ridx_u[Ufirst[i]];
418  }
419  }
420  if (jj == (k + 1))
421  {
422  cols_list[cols_list_len] = i;
423  cols_list_len++;
424  }
425  }
426 
427  if (Lfirst[i] != -1)
428  {
429  jj = ridx_l[Lfirst[i]];
430  if (jj < (k + 1))
431  if (Lfirst[i] < (cidx_l[i+1]))
432  {
433  Lfirst[i]++;
434  if (Lfirst[i] == cidx_l[i+1])
435  Lfirst[i] = -1;
436  else
437  jj = ridx_l[Lfirst[i]];
438  }
439  if (jj == (k + 1))
440  {
441  rows_list[rows_list_len] = i;
442  rows_list_len++;
443  }
444  }
445  }
446  }
447  }
448 
449  if (! error_state)
450  {
451  // Build the output matrices
452  L = octave_matrix_t (n, n, total_len_l);
453  U = octave_matrix_t (n, n, total_len_u);
454  for (i = 0; i <= n; i++)
455  L.cidx (i) = cidx_l[i];
456  for (i = 0; i < total_len_l; i++)
457  {
458  L.ridx (i) = ridx_l[i];
459  L.data (i) = data_l[i];
460  }
461  for (i = 0; i <= n; i++)
462  U.cidx (i) = cidx_u[i];
463  for (i = 0; i < total_len_u; i++)
464  {
465  U.ridx (i) = ridx_u[i];
466  U.data (i) = data_u[i];
467  }
468  U = U.transpose ();
469  }
470 }
471 
472 DEFUN (__iluc__, args, nargout,
473  "-*- texinfo -*-\n\
474 @deftypefn {Built-in Function} {[@var{L}, @var{U}] =} __iluc__ (@var{A})\n\
475 @deftypefnx {Built-in Function} {[@var{L}, @var{U}] =} __iluc__ (@var{A}, @var{droptol})\n\
476 @deftypefnx {Built-in Function} {[@var{L}, @var{U}] =} __iluc__ (@var{A}, @var{droptol}, @var{milu})\n\
477 @deftypefnx {Built-in Function} {[@var{L}, @var{U}, @var{P}] =} __iluc__ (@var{A}, @dots{})\n\
478 Undocumented internal function.\n\
479 @end deftypefn")
480 {
481  octave_value_list retval;
482  int nargin = args.length ();
483  std::string milu = "off";
484  double droptol = 0;
485 
486  if (nargout != 2 || nargin < 1 || nargin > 3)
487  {
488  print_usage ();
489  return retval;
490  }
491 
492  // Don't repeat input validation of arguments done in ilu.m
493  if (nargin >= 2)
494  droptol = args(1).double_value ();
495 
496  if (nargin == 3)
497  milu = args(2).string_value ();
498 
499  octave_value_list param_list;
500  if (! args(0).is_complex_type ())
501  {
502  Array<double> cols_norm, rows_norm;
503  param_list.append (args(0).sparse_matrix_value ());
504  SparseMatrix sm_u = feval ("triu", param_list)(0).sparse_matrix_value ();
505  param_list.append (-1);
506  SparseMatrix sm_l = feval ("tril", param_list)(0).sparse_matrix_value ();
507  param_list(1) = "rows";
508  rows_norm = feval ("norm", param_list)(0).vector_value ();
509  param_list(1) = "cols";
510  cols_norm = feval ("norm", param_list)(0).vector_value ();
511  param_list.clear ();
512  SparseMatrix U;
513  SparseMatrix L;
514  ilu_crout <SparseMatrix, double> (sm_l, sm_u, L, U,
515  cols_norm.fortran_vec (),
516  rows_norm.fortran_vec (),
517  droptol, milu);
518  if (! error_state)
519  {
520  param_list.append (octave_value (L.cols ()));
521  SparseMatrix eye =
522  feval ("speye", param_list)(0).sparse_matrix_value ();
523  retval(1) = U;
524  retval(0) = L + eye;
525  }
526  }
527  else
528  {
529  Array<Complex> cols_norm, rows_norm;
530  param_list.append (args(0).sparse_complex_matrix_value ());
531  SparseComplexMatrix sm_u =
532  feval("triu", param_list)(0).sparse_complex_matrix_value ();
533  param_list.append (-1);
534  SparseComplexMatrix sm_l =
535  feval("tril", param_list)(0).sparse_complex_matrix_value ();
536  param_list(1) = "rows";
537  rows_norm = feval ("norm", param_list)(0).complex_vector_value ();
538  param_list(1) = "cols";
539  cols_norm = feval ("norm", param_list)(0).complex_vector_value ();
540  param_list.clear ();
543  ilu_crout < SparseComplexMatrix, Complex >
544  (sm_l, sm_u, L, U, cols_norm.fortran_vec () ,
545  rows_norm.fortran_vec (), Complex (droptol), milu);
546  if (! error_state)
547  {
548  param_list.append (octave_value (L.cols ()));
549  SparseComplexMatrix eye =
550  feval ("speye", param_list)(0).sparse_complex_matrix_value ();
551  retval(1) = U;
552  retval(0) = L + eye;
553  }
554  }
555 
556  return retval;
557 }
558 
559 // That function implements the IKJ and JKI variants of gaussian elimination
560 // to perform the ILUTP decomposition. The behaviour is controlled by milu
561 // parameter. If milu = ['off'|'col'] the JKI version is performed taking
562 // advantage of CCS format of the input matrix. Row pivoting is performed.
563 // If milu = 'row' the input matrix has to be transposed to obtain the
564 // equivalent CRS structure so we can work efficiently with rows. In that
565 // case IKJ version is used and column pivoting is performed.
566 
567 template <typename octave_matrix_t, typename T>
568 void ilu_tp (octave_matrix_t& sm, octave_matrix_t& L, octave_matrix_t& U,
569  octave_idx_type nnz_u, octave_idx_type nnz_l, T* cols_norm,
570  Array <octave_idx_type>& perm_vec, const T droptol = T(0),
571  const T thresh = T(0), const std::string milu = "off",
572  const double udiag = 0)
573 {
574  char opt;
575  enum {OFF, ROW, COL};
576  if (milu == "row")
577  opt = ROW;
578  else if (milu == "col")
579  opt = COL;
580  else
581  opt = OFF;
582 
583  const octave_idx_type n = sm.cols ();
584 
585  // That is necessary for the JKI (milu = "row") variant.
586  if (opt == ROW)
587  sm = sm.transpose();
588 
589  // Extract pointers to the arrays for faster access inside loops
590  octave_idx_type* cidx_in = sm.cidx ();
591  octave_idx_type* ridx_in = sm.ridx ();
592  T* data_in = sm.data ();
593  octave_idx_type jrow, i, j, k, jj, c, total_len_l, total_len_u, p_perm,
594  max_ind, max_len_l, max_len_u;
595  T zero = T(0);
596 
597  T tl = zero, aux, maximum;
598 
599  max_len_u = nnz_u;
600  max_len_u += (0.1 * max_len_u) > n ? 0.1 * max_len_u : n;
601  max_len_l = nnz_l;
602  max_len_l += (0.1 * max_len_l) > n ? 0.1 * max_len_l : n;
603 
604  Array <octave_idx_type> cidx_out_l (dim_vector (n + 1, 1));
605  octave_idx_type* cidx_l = cidx_out_l.fortran_vec ();
606  Array <octave_idx_type> ridx_out_l (dim_vector (max_len_l, 1));
607  octave_idx_type* ridx_l = ridx_out_l.fortran_vec ();
608  Array <T> data_out_l (dim_vector (max_len_l ,1));
609  T* data_l = data_out_l.fortran_vec ();
610  // Data for U
611  Array <octave_idx_type> cidx_out_u (dim_vector (n + 1, 1));
612  octave_idx_type* cidx_u = cidx_out_u.fortran_vec ();
613  Array <octave_idx_type> ridx_out_u (dim_vector (max_len_u, 1));
614  octave_idx_type* ridx_u = ridx_out_u.fortran_vec ();
615  Array <T> data_out_u (dim_vector (max_len_u, 1));
616  T* data_u = data_out_u.fortran_vec();
617 
618  // Working arrays and permutation arrays
619  octave_idx_type w_len_u, w_len_l;
620  T total_sum, partial_col_sum = zero, partial_row_sum = zero;
621  std::set <octave_idx_type> iw_l;
622  std::set <octave_idx_type> iw_u;
623  std::set <octave_idx_type>::iterator it, it2;
624  OCTAVE_LOCAL_BUFFER (T, w_data, n);
626  octave_idx_type* perm = perm_vec.fortran_vec ();
628 
629 
630  cidx_l[0] = cidx_in[0];
631  cidx_u[0] = cidx_in[0];
632  for (i = 0; i < n; i++)
633  {
634  w_data[i] = 0;
635  perm[i] = i;
636  iperm[i] = i;
637  }
638  total_len_u = 0;
639  total_len_l = 0;
640 
641  for (k = 0; k < n; k++)
642  {
643 
644  for (j = cidx_in[k]; j < cidx_in[k+1]; j++)
645  {
646  p_perm = iperm[ridx_in[j]];
647  w_data[iperm[ridx_in[j]]] = data_in[j];
648  if (p_perm > k)
649  iw_l.insert (ridx_in[j]);
650  else
651  iw_u.insert (p_perm);
652  }
653 
654  it = iw_u.begin ();
655  jrow = *it;
656  total_sum = zero;
657  while ((jrow < k) && (it != iw_u.end ()))
658  {
659  if (opt == COL)
660  partial_col_sum = w_data[jrow];
661  if (w_data[jrow] != zero)
662  {
663  if (opt == ROW)
664  {
665  partial_row_sum = w_data[jrow];
666  tl = w_data[jrow] / data_u[uptr[jrow]];
667  }
668  for (jj = cidx_l[jrow]; jj < cidx_l[jrow+1]; jj++)
669  {
670  p_perm = iperm[ridx_l[jj]];
671  aux = w_data[p_perm];
672  if (opt == ROW)
673  {
674  w_data[p_perm] -= tl * data_l[jj];
675  partial_row_sum += tl * data_l[jj];
676  }
677  else
678  {
679  tl = data_l[jj] * w_data[jrow];
680  w_data[p_perm] -= tl;
681  if (opt == COL)
682  partial_col_sum += tl;
683  }
684 
685  if ((aux == zero) && (w_data[p_perm] != zero))
686  {
687  if (p_perm > k)
688  iw_l.insert (ridx_l[jj]);
689  else
690  iw_u.insert (p_perm);
691  }
692  }
693 
694  // Drop element from the U part in IKJ and L part in JKI
695  // variant (milu = [col|off])
696  if ((std::abs (w_data[jrow]) < (droptol * cols_norm[k]))
697  && (w_data[jrow] != zero))
698  {
699  if (opt == COL)
700  total_sum += partial_col_sum;
701  else if (opt == ROW)
702  total_sum += partial_row_sum;
703  w_data[jrow] = zero;
704  it2 = it;
705  it++;
706  iw_u.erase (it2);
707  jrow = *it;
708  continue;
709  }
710  else
711  // This is the element scaled by the pivot
712  // in the actual iteration
713  if (opt == ROW)
714  w_data[jrow] = tl;
715  }
716  jrow = *(++it);
717  }
718 
719  // Search for the pivot and update iw_l and iw_u if the pivot is not the
720  // diagonal element
721  if ((thresh > zero) && (k < (n - 1)))
722  {
723  maximum = std::abs (w_data[k]) / thresh;
724  max_ind = perm[k];
725  for (it = iw_l.begin (); it != iw_l.end (); ++it)
726  {
727  p_perm = iperm[*it];
728  if (std::abs (w_data[p_perm]) > maximum)
729  {
730  maximum = std::abs (w_data[p_perm]);
731  max_ind = *it;
732  it2 = it;
733  }
734  }
735  // If the pivot is not the diagonal element update all.
736  p_perm = iperm[max_ind];
737  if (max_ind != perm[k])
738  {
739  iw_l.erase (it2);
740  if (w_data[k] != zero)
741  iw_l.insert (perm[k]);
742  else
743  iw_u.insert (k);
744  // Swap data and update permutation vectors
745  aux = w_data[k];
746  iperm[perm[p_perm]] = k;
747  iperm[perm[k]] = p_perm;
748  c = perm[k];
749  perm[k] = perm[p_perm];
750  perm[p_perm] = c;
751  w_data[k] = w_data[p_perm];
752  w_data[p_perm] = aux;
753  }
754 
755  }
756 
757  // Drop elements in the L part in the IKJ and from the U part in the JKI
758  // version.
759  it = iw_l.begin ();
760  while (it != iw_l.end ())
761  {
762  p_perm = iperm[*it];
763  if (droptol > zero)
764  if (std::abs (w_data[p_perm]) < (droptol * cols_norm[k]))
765  {
766  if (opt != OFF)
767  total_sum += w_data[p_perm];
768  w_data[p_perm] = zero;
769  it2 = it;
770  it++;
771  iw_l.erase (it2);
772  continue;
773  }
774 
775  it++;
776  }
777 
778  // If milu == [row|col] summation is preserved.
779  // Compensate diagonal element.
780  if (opt != OFF)
781  {
782  if ((total_sum > zero) && (w_data[k] == zero))
783  iw_u.insert (k);
784  w_data[k] += total_sum;
785  }
786 
787 
788 
789  // Check if the pivot is zero and if udiag is active.
790  // NOTE: If the pivot == 0 and udiag is active, then if milu = [col|row]
791  // will not preserve the row sum for that column/row.
792  if (w_data[k] == zero)
793  {
794  if (udiag == 1)
795  {
796  w_data[k] = droptol;
797  iw_u.insert (k);
798  }
799  else
800  {
801  error ("ilu: encountered a pivot equal to 0");
802  break;
803  }
804  }
805 
806  // Scale the elements on the L part for IKJ version (milu = [col|off])
807  if (opt != ROW)
808  for (it = iw_l.begin (); it != iw_l.end (); ++it)
809  {
810  p_perm = iperm[*it];
811  w_data[p_perm] = w_data[p_perm] / w_data[k];
812  }
813 
814 
815  if ((max_len_u - total_len_u) < n)
816  {
817  max_len_u += (0.1 * max_len_u) > n ? 0.1 * max_len_u : n;
818  data_out_u.resize (dim_vector (max_len_u, 1));
819  data_u = data_out_u.fortran_vec ();
820  ridx_out_u.resize (dim_vector (max_len_u, 1));
821  ridx_u = ridx_out_u.fortran_vec ();
822  }
823 
824  if ((max_len_l - total_len_l) < n)
825  {
826  max_len_l += (0.1 * max_len_l) > n ? 0.1 * max_len_l : n;
827  data_out_l.resize (dim_vector (max_len_l, 1));
828  data_l = data_out_l.fortran_vec ();
829  ridx_out_l.resize (dim_vector (max_len_l, 1));
830  ridx_l = ridx_out_l.fortran_vec ();
831  }
832 
833  // Expand working vector into U.
834  w_len_u = 0;
835  for (it = iw_u.begin (); it != iw_u.end (); ++it)
836  {
837  if (w_data[*it] != zero)
838  {
839  data_u[total_len_u + w_len_u] = w_data[*it];
840  ridx_u[total_len_u + w_len_u] = *it;
841  w_len_u++;
842  }
843  w_data[*it] = 0;
844  }
845  // Expand working vector into L.
846  w_len_l = 0;
847  for (it = iw_l.begin (); it != iw_l.end (); ++it)
848  {
849  p_perm = iperm[*it];
850  if (w_data[p_perm] != zero)
851  {
852  data_l[total_len_l + w_len_l] = w_data[p_perm];
853  ridx_l[total_len_l + w_len_l] = *it;
854  w_len_l++;
855  }
856  w_data[p_perm] = 0;
857  }
858  total_len_u += w_len_u;
859  total_len_l += w_len_l;
860  // Check if there are too many elements to be indexed with
861  // octave_idx_type type due to fill-in during the process.
862  if (total_len_l < 0 || total_len_u < 0)
863  {
864  error ("ilu: Integer overflow. Too many fill-in elements in L or U");
865  break;
866  }
867  if (opt == ROW)
868  uptr[k] = total_len_u - 1;
869  cidx_u[k+1] = cidx_u[k] - cidx_u[0] + w_len_u;
870  cidx_l[k+1] = cidx_l[k] - cidx_l[0] + w_len_l;
871 
872  iw_l.clear ();
873  iw_u.clear ();
874  }
875 
876  if (! error_state)
877  {
878  octave_matrix_t *L_ptr;
879  octave_matrix_t *U_ptr;
880  octave_matrix_t diag (n, n, n);
881 
882  // L and U are interchanged if milu = 'row'. It is a matter
883  // of nomenclature to re-use code with both IKJ and JKI
884  // versions of the algorithm.
885  if (opt == ROW)
886  {
887  L_ptr = &U;
888  U_ptr = &L;
889  L = octave_matrix_t (n, n, total_len_u - n);
890  U = octave_matrix_t (n, n, total_len_l);
891  }
892  else
893  {
894  L_ptr = &L;
895  U_ptr = &U;
896  L = octave_matrix_t (n, n, total_len_l);
897  U = octave_matrix_t (n, n, total_len_u);
898  }
899 
900  for (i = 0; i <= n; i++)
901  {
902  L_ptr->cidx (i) = cidx_l[i];
903  U_ptr->cidx (i) = cidx_u[i];
904  if (opt == ROW)
905  U_ptr->cidx (i) -= i;
906  }
907 
908  for (i = 0; i < n; i++)
909  {
910  if (opt == ROW)
911  diag.elem (i,i) = data_u[uptr[i]];
912  j = cidx_l[i];
913 
914  while (j < cidx_l[i+1])
915  {
916  L_ptr->ridx (j) = ridx_l[j];
917  L_ptr->data (j) = data_l[j];
918  j++;
919  }
920  j = cidx_u[i];
921 
922  while (j < cidx_u[i+1])
923  {
924  c = j;
925  if (opt == ROW)
926  {
927  // The diagonal is removed from L if milu = 'row'.
928  // That is because is convenient to have it inside
929  // the L part to carry out the process.
930  if (ridx_u[j] == i)
931  {
932  j++;
933  continue;
934  }
935  else
936  c -= i;
937  }
938  U_ptr->data (c) = data_u[j];
939  U_ptr->ridx (c) = ridx_u[j];
940  j++;
941  }
942  }
943 
944  if (opt == ROW)
945  {
946  U = U.transpose ();
947  // The diagonal, conveniently permuted is added to U
948  U += diag.index (idx_vector::colon, perm_vec);
949  L = L.transpose ();
950  }
951  }
952 }
953 
954 DEFUN (__ilutp__, args, nargout,
955  "-*- texinfo -*-\n\
956 @deftypefn {Built-in Function} {[@var{L}, @var{U}] =} __ilutp__ (@var{A})\n\
957 @deftypefnx {Built-in Function} {[@var{L}, @var{U}] =} __ilutp__ (@var{A}, @var{droptol})\n\
958 @deftypefnx {Built-in Function} {[@var{L}, @var{U}] =} __ilutp__ (@var{A}, @var{droptol}, @var{thresh})\n\
959 @deftypefnx {Built-in Function} {[@var{L}, @var{U}] =} __ilutp__ (@var{A}, @var{droptol}, @var{thresh}, @var{milu})\n\
960 @deftypefnx {Built-in Function} {[@var{L}, @var{U}] =} __ilutp__ (@var{A}, @var{droptol}, @var{thresh}, @var{milu}, @var{udiag})\n\
961 @deftypefnx {Built-in Function} {[@var{L}, @var{U}, @var{P}] =} __ilutp__ (@var{A}, @dots{})\n\
962 Undocumented internal function.\n\
963 @end deftypefn")
964 {
965  octave_value_list retval;
966 
967  int nargin = args.length ();
968  std::string milu = "";
969  double droptol = 0, thresh = 1;
970  double udiag = 0;
971 
972  if (nargout < 2 || nargout > 3 || nargin < 1 || nargin > 5)
973  {
974  print_usage ();
975  return retval;
976  }
977 
978  // Don't repeat input validation of arguments done in ilu.m
979  if (nargin >= 2)
980  droptol = args(1).double_value ();
981 
982  if (nargin >= 3)
983  thresh = args(2).double_value ();
984 
985  if (nargin >= 4)
986  milu = args(3).string_value ();
987 
988  if (nargin == 5)
989  udiag = args(4).double_value ();
990 
991  octave_value_list param_list;
992  octave_idx_type nnz_u, nnz_l;
993  if (! args(0).is_complex_type ())
994  {
995  Array <double> rc_norm;
996  SparseMatrix sm = args(0).sparse_matrix_value ();
997  param_list.append (sm);
998  nnz_u = (feval ("triu", param_list)(0).sparse_matrix_value ()).nnz ();
999  param_list.append (-1);
1000  nnz_l = (feval ("tril", param_list)(0).sparse_matrix_value ()).nnz ();
1001  if (milu == "row")
1002  param_list (1) = "rows";
1003  else
1004  param_list (1) = "cols";
1005  rc_norm = feval ("norm", param_list)(0).vector_value ();
1006  param_list.clear ();
1007  Array <octave_idx_type> perm (dim_vector (sm.cols (), 1));
1008  SparseMatrix U;
1009  SparseMatrix L;
1010  ilu_tp <SparseMatrix, double> (sm, L, U, nnz_u, nnz_l,
1011  rc_norm.fortran_vec (),
1012  perm, droptol, thresh, milu, udiag);
1013  if (! error_state)
1014  {
1015  param_list.append (octave_value (L.cols ()));
1016  SparseMatrix eye =
1017  feval ("speye", param_list)(0).sparse_matrix_value ();
1018  if (milu == "row")
1019  {
1020  if (nargout == 3)
1021  {
1022  retval(2) = eye.index (idx_vector::colon, perm);
1023  retval(1) = U.index (idx_vector::colon, perm);
1024  }
1025  else if (nargout == 2)
1026  retval(1) = U;
1027  retval(0) = L + eye;
1028  }
1029  else
1030  {
1031  if (nargout == 3)
1032  {
1033  retval(2) = eye.index (perm, idx_vector::colon);
1034  retval(1) = U;
1035  retval(0) = L.index (perm, idx_vector::colon) + eye;
1036  }
1037  else
1038  {
1039  retval(1) = U;
1040  retval(0) = L + eye.index (perm, idx_vector::colon);
1041  }
1042  }
1043  }
1044  }
1045  else
1046  {
1047  Array <Complex> rc_norm;
1048  SparseComplexMatrix sm = args(0).sparse_complex_matrix_value ();
1049  param_list.append (sm);
1050  nnz_u =
1051  feval ("triu", param_list)(0).sparse_complex_matrix_value ().nnz ();
1052  param_list.append (-1);
1053  nnz_l =
1054  feval ("tril", param_list)(0).sparse_complex_matrix_value ().nnz ();
1055  if (milu == "row")
1056  param_list(1) = "rows";
1057  else
1058  param_list(1) = "cols";
1059  rc_norm = feval ("norm", param_list)(0).complex_vector_value ();
1060  Array <octave_idx_type> perm (dim_vector (sm.cols (), 1));
1061  param_list.clear ();
1064  ilu_tp < SparseComplexMatrix, Complex>
1065  (sm, L, U, nnz_u, nnz_l, rc_norm.fortran_vec (), perm,
1066  Complex (droptol), Complex (thresh), milu, udiag);
1067 
1068  if (! error_state)
1069  {
1070  param_list.append (octave_value (L.cols ()));
1071  SparseComplexMatrix eye =
1072  feval ("speye", param_list)(0).sparse_complex_matrix_value ();
1073  if (milu == "row")
1074  {
1075  if (nargout == 3)
1076  {
1077  retval(2) = eye.index (idx_vector::colon, perm);
1078  retval(1) = U.index (idx_vector::colon, perm);
1079  }
1080  else if (nargout == 2)
1081  retval(1) = U;
1082  retval(0) = L + eye;
1083  }
1084  else
1085  {
1086  if (nargout == 3)
1087  {
1088  retval(2) = eye.index (perm, idx_vector::colon);
1089  retval(1) = U;
1090  retval(0) = L.index (perm, idx_vector::colon) + eye;
1091  }
1092  else
1093  {
1094  retval(1) = U;
1095  retval(0) = L + eye.index (perm, idx_vector::colon);
1096  }
1097  }
1098  }
1099  }
1100 
1101  return retval;
1102 }
1103 
1104 /*
1105 ## No test needed for internal helper function.
1106 %!assert (1)
1107 */
1108 
octave_idx_type cols(void) const
Definition: Sparse.h:264
void clear(void)
Definition: oct-obj.h:148
static const idx_vector colon
Definition: idx-vector.h:492
OCTINTERP_API void print_usage(void)
Definition: defun.cc:51
octave_idx_type length(void) const
Definition: oct-obj.h:89
octave_value_list & append(const octave_value &val)
Definition: oct-obj.cc:85
#define DEFUN(name, args_name, nargout_name, doc)
Definition: defun.h:44
void error(const char *fmt,...)
Definition: error.cc:476
Sparse< T > index(const idx_vector &i, bool resize_ok=false) const
Definition: Sparse.cc:1387
octave_value_list feval(const std::string &name, const octave_value_list &args, int nargout)
Definition: oct-parse.cc:8625
int error_state
Definition: error.cc:101
void resize(const dim_vector &dv, const T &rfv)
Definition: Array.cc:1033
void ilu_tp(octave_matrix_t &sm, octave_matrix_t &L, octave_matrix_t &U, octave_idx_type nnz_u, octave_idx_type nnz_l, T *cols_norm, Array< octave_idx_type > &perm_vec, const T droptol=T(0), const T thresh=T(0), const std::string milu="off", const double udiag=0)
Definition: __ilu__.cc:568
void ilu_0(octave_matrix_t &sm, const std::string milu="off")
Definition: __ilu__.cc:41
#define OCTAVE_LOCAL_BUFFER(T, buf, size)
Definition: oct-locbuf.h:197
std::complex< double > Complex
Definition: oct-cmplx.h:29
const T * fortran_vec(void) const
Definition: Array.h:481
void ilu_crout(octave_matrix_t &sm_l, octave_matrix_t &sm_u, octave_matrix_t &L, octave_matrix_t &U, T *cols_norm, T *rows_norm, const T droptol=0, const std::string milu="off")
Definition: __ilu__.cc:190
T abs(T x)
Definition: pr-output.cc:3062