GNU Octave  3.8.0
A high-level interpreted language, primarily intended for numerical computations, mostly compatible with Matlab
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
lookup.cc
Go to the documentation of this file.
1 /*
2 
3 Copyright (C) 2008-2013 VZLU Prague a.s., Czech Republic
4 
5 This file is part of Octave.
6 
7 Octave is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or (at
10 your option) any later version.
11 
12 Octave is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with Octave; see the file COPYING. If not, see
19 <http://www.gnu.org/licenses/>.
20 
21 */
22 
23 // Author: Jaroslav Hajek <[email protected]>
24 
25 #ifdef HAVE_CONFIG_H
26 #include <config.h>
27 #endif
28 
29 #include <cctype>
30 #include <functional>
31 #include <algorithm>
32 
33 #include "dNDArray.h"
34 #include "CNDArray.h"
35 
36 #include "Cell.h"
37 #include "defun.h"
38 #include "error.h"
39 #include "gripes.h"
40 #include "oct-obj.h"
41 #include "ov.h"
42 
43 static
44 bool
45 contains_char (const std::string& str, char c)
46 {
47  return (str.find (c) != std::string::npos
48  || str.find (std::toupper (c)) != std::string::npos);
49 }
50 
51 // case-insensitive character comparison functors
52 struct icmp_char_lt : public std::binary_function<char, char, bool>
53 {
54  bool operator () (char x, char y) const
55  { return std::toupper (x) < std::toupper (y); }
56 };
57 
58 struct icmp_char_gt : public std::binary_function<char, char, bool>
59 {
60  bool operator () (char x, char y) const
61  { return std::toupper (x) > std::toupper (y); }
62 };
63 
64 // FIXME: maybe these should go elsewhere?
65 // FIXME: are they even needed now?
66 // case-insensitive ascending comparator
67 #if 0
68 static bool
69 stri_comp_lt (const std::string& a, const std::string& b)
70 {
71  return std::lexicographical_compare (a.begin (), a.end (),
72  b.begin (), b.end (),
73  icmp_char_lt ());
74 }
75 
76 // case-insensitive descending comparator
77 static bool
78 stri_comp_gt (const std::string& a, const std::string& b)
79 {
80  return std::lexicographical_compare (a.begin (), a.end (),
81  b.begin (), b.end (),
82  icmp_char_gt ());
83 }
84 #endif
85 
86 template <class T>
87 inline sortmode
88 get_sort_mode (const Array<T>& array,
89  typename octave_sort<T>::compare_fcn_type desc_comp
91 {
92  octave_idx_type n = array.numel ();
93  if (n > 1 && desc_comp (array (0), array (n-1)))
94  return DESCENDING;
95  else
96  return ASCENDING;
97 }
98 
99 // FIXME: perhaps there should be octave_value::lookup?
100 // The question is, how should it behave w.r.t. the second argument's type.
101 // We'd need a dispatch on two arguments. Hmmm...
102 
103 #define INT_ARRAY_LOOKUP(TYPE) \
104  (table.is_ ## TYPE ## _type () && y.is_ ## TYPE ## _type ()) \
105  retval = do_numeric_lookup (table.TYPE ## _array_value (), \
106  y.TYPE ## _array_value (), \
107  left_inf, right_inf, \
108  match_idx, match_bool);
109 template <class ArrayT>
110 static octave_value
111 do_numeric_lookup (const ArrayT& array, const ArrayT& values,
112  bool left_inf, bool right_inf,
113  bool match_idx, bool match_bool)
114 {
115  octave_value retval;
116 
117  Array<octave_idx_type> idx = array.lookup (values);
118  octave_idx_type n = array.numel (), nval = values.numel ();
119 
120  // Post-process.
121  if (match_bool)
122  {
123  boolNDArray match (idx.dims ());
124  for (octave_idx_type i = 0; i < nval; i++)
125  {
126  octave_idx_type j = idx.xelem (i);
127  match.xelem (i) = j != 0 && values(i) == array(j-1);
128  }
129 
130  retval = match;
131  }
132  else if (match_idx || left_inf || right_inf)
133  {
134  if (match_idx)
135  {
136  NDArray ridx (idx.dims ());
137 
138  for (octave_idx_type i = 0; i < nval; i++)
139  {
140  octave_idx_type j = idx.xelem (i);
141  ridx.xelem (i) = (j != 0 && values(i) == array(j-1)) ? j : 0;
142  }
143 
144  retval = ridx;
145  }
146  else if (left_inf && right_inf)
147  {
148  // Results in valid indices. Optimize using lazy index.
149  octave_idx_type zero = 0;
150  for (octave_idx_type i = 0; i < nval; i++)
151  {
152  octave_idx_type j = idx.xelem (i) - 1;
153  idx.xelem (i) = std::max (zero, std::min (j, n-2));
154  }
155 
156  retval = idx_vector (idx);
157  }
158  else if (left_inf)
159  {
160  // Results in valid indices. Optimize using lazy index.
161  octave_idx_type zero = 0;
162  for (octave_idx_type i = 0; i < nval; i++)
163  {
164  octave_idx_type j = idx.xelem (i) - 1;
165  idx.xelem (i) = std::max (zero, j);
166  }
167 
168  retval = idx_vector (idx);
169  }
170  else if (right_inf)
171  {
172  NDArray ridx (idx.dims ());
173 
174  for (octave_idx_type i = 0; i < nval; i++)
175  {
176  octave_idx_type j = idx.xelem (i);
177  ridx.xelem (i) = std::min (j, n-1);
178  }
179 
180  retval = ridx;
181  }
182  }
183  else
184  retval = idx;
185 
186  return retval;
187 }
188 
189 DEFUN (lookup, args, ,
190  "-*- texinfo -*-\n\
191 @deftypefn {Built-in Function} {@var{idx} =} lookup (@var{table}, @var{y})\n\
192 @deftypefnx {Built-in Function} {@var{idx} =} lookup (@var{table}, @var{y}, @var{opt})\n\
193 Lookup values in a sorted table. Usually used as a prelude to\n\
194 interpolation.\n\
195 \n\
196 If table is increasing and @code{idx = lookup (table, y)}, then\n\
197 @code{table(idx(i)) <= y(i) < table(idx(i+1))} for all @code{y(i)}\n\
198 within the table. If @code{y(i) < table(1)} then\n\
199 @code{idx(i)} is 0. If @code{y(i) >= table(end)} or @code{isnan (y(i))} then\n\
200 @code{idx(i)} is @code{n}.\n\
201 \n\
202 If the table is decreasing, then the tests are reversed.\n\
203 For non-strictly monotonic tables, empty intervals are always skipped.\n\
204 The result is undefined if @var{table} is not monotonic, or if\n\
205 @var{table} contains a NaN.\n\
206 \n\
207 The complexity of the lookup is O(M*log(N)) where N is the size of\n\
208 @var{table} and M is the size of @var{y}. In the special case when @var{y}\n\
209 is also sorted, the complexity is O(min(M*log(N),M+N)).\n\
210 \n\
211 @var{table} and @var{y} can also be cell arrays of strings\n\
212 (or @var{y} can be a single string). In this case, string lookup\n\
213 is performed using lexicographical comparison.\n\
214 \n\
215 If @var{opts} is specified, it must be a string with letters indicating\n\
216 additional options.\n\
217 \n\
218 @table @code\n\
219 @item m\n\
220 @code{table(idx(i)) == val(i)} if @code{val(i)}\n\
221 occurs in table; otherwise, @code{idx(i)} is zero.\n\
222 \n\
223 @item b\n\
224 @code{idx(i)} is a logical 1 or 0, indicating whether\n\
225 @code{val(i)} is contained in table or not.\n\
226 \n\
227 @item l\n\
228 For numeric lookups\n\
229 the leftmost subinterval shall be extended to infinity (i.e., all indices\n\
230 at least 1)\n\
231 \n\
232 @item r\n\
233 For numeric lookups\n\
234 the rightmost subinterval shall be extended to infinity (i.e., all indices\n\
235 at most n-1).\n\
236 @end table\n\
237 @end deftypefn")
238 {
239  octave_value retval;
240 
241  int nargin = args.length ();
242 
243  if (nargin < 2 || nargin > 3 || (nargin == 3 && ! args(2).is_string ()))
244  {
245  print_usage ();
246  return retval;
247  }
248 
249  octave_value table = args(0), y = args(1);
250  if (table.ndims () > 2 || (table.columns () > 1 && table.rows () > 1))
251  warning ("lookup: table is not a vector");
252 
253  bool num_case = ((table.is_numeric_type () && y.is_numeric_type ())
254  || (table.is_char_matrix () && y.is_char_matrix ()));
255  bool str_case = table.is_cellstr () && (y.is_string () || y.is_cellstr ());
256  bool left_inf = false;
257  bool right_inf = false;
258  bool match_idx = false;
259  bool match_bool = false;
260 
261  if (nargin == 3)
262  {
263  std::string opt = args(2).string_value ();
264  left_inf = contains_char (opt, 'l');
265  right_inf = contains_char (opt, 'r');
266  match_idx = contains_char (opt, 'm');
267  match_bool = contains_char (opt, 'b');
268  if (opt.find_first_not_of ("lrmb") != std::string::npos)
269  {
270  error ("lookup: unrecognized option: %c",
271  opt[opt.find_first_not_of ("lrmb")]);
272  return retval;
273  }
274  }
275 
276  if ((match_idx || match_bool) && (left_inf || right_inf))
277  error ("lookup: m, b cannot be specified with l or r");
278  else if (match_idx && match_bool)
279  error ("lookup: only one of m or b can be specified");
280  else if (str_case && (left_inf || right_inf))
281  error ("lookup: l, r are not recognized for string lookups");
282 
283  if (error_state)
284  return retval;
285 
286  if (num_case)
287  {
288 
289  // In the case of a complex array, absolute values will be used for
290  // compatibility (though it's not too meaningful).
291 
292  if (table.is_complex_type ())
293  table = table.abs ();
294 
295  if (y.is_complex_type ())
296  y = y.abs ();
297 
299 
300  // PS: I learned this from data.cc
301  if INT_ARRAY_LOOKUP (int8)
302  else if INT_ARRAY_LOOKUP (int16)
303  else if INT_ARRAY_LOOKUP (int32)
304  else if INT_ARRAY_LOOKUP (int64)
305  else if INT_ARRAY_LOOKUP (uint8)
306  else if INT_ARRAY_LOOKUP (uint16)
307  else if INT_ARRAY_LOOKUP (uint32)
308  else if INT_ARRAY_LOOKUP (uint64)
309  else if (table.is_char_matrix () && y.is_char_matrix ())
310  retval = do_numeric_lookup (table.char_array_value (),
311  y.char_array_value (),
312  left_inf, right_inf,
313  match_idx, match_bool);
314  else if (table.is_single_type () || y.is_single_type ())
315  retval = do_numeric_lookup (table.float_array_value (),
316  y.float_array_value (),
317  left_inf, right_inf,
318  match_idx, match_bool);
319  else
320  retval = do_numeric_lookup (table.array_value (),
321  y.array_value (),
322  left_inf, right_inf,
323  match_idx, match_bool);
324 
325  }
326  else if (str_case)
327  {
328  Array<std::string> str_table = table.cellstr_value ();
329  Array<std::string> str_y (dim_vector (1, 1));
330 
331  if (y.is_cellstr ())
332  str_y = y.cellstr_value ();
333  else
334  str_y(0) = y.string_value ();
335 
336  Array<octave_idx_type> idx = str_table.lookup (str_y);
337  octave_idx_type nval = str_y.numel ();
338 
339  // Post-process.
340  if (match_bool)
341  {
342  boolNDArray match (idx.dims ());
343  for (octave_idx_type i = 0; i < nval; i++)
344  {
345  octave_idx_type j = idx.xelem (i);
346  match.xelem (i) = j != 0 && str_y(i) == str_table(j-1);
347  }
348 
349  retval = match;
350  }
351  else if (match_idx)
352  {
353  NDArray ridx (idx.dims ());
354  if (match_idx)
355  {
356  for (octave_idx_type i = 0; i < nval; i++)
357  {
358  octave_idx_type j = idx.xelem (i);
359  ridx.xelem (i) = (j != 0 && str_y(i) == str_table(j-1)) ? j
360  : 0;
361  }
362  }
363 
364  retval = ridx;
365  }
366  else
367  retval = idx;
368  }
369  else
370  print_usage ();
371 
372  return retval;
373 
374 }
375 
376 /*
377 %!assert (lookup (1:3, 0.5), 0) # value before table
378 %!assert (lookup (1:3, 3.5), 3) # value after table error
379 %!assert (lookup (1:3, 1.5), 1) # value within table error
380 %!assert (lookup (1:3, [3,2,1]), [3,2,1])
381 %!assert (lookup ([1:4]', [1.2, 3.5]'), [1, 3]')
382 %!assert (lookup ([1:4], [1.2, 3.5]'), [1, 3]')
383 %!assert (lookup ([1:4]', [1.2, 3.5]), [1, 3])
384 %!assert (lookup ([1:4], [1.2, 3.5]), [1, 3])
385 %!assert (lookup (1:3, [3, 2, 1]), [3, 2, 1])
386 %!assert (lookup ([3:-1:1], [3.5, 3, 1.2, 2.5, 2.5]), [0, 1, 2, 1, 1])
387 %!assert (isempty (lookup ([1:3], [])))
388 %!assert (isempty (lookup ([1:3]', [])))
389 %!assert (lookup (1:3, [1, 2; 3, 0.5]), [1, 2; 3, 0])
390 %!assert (lookup (1:4, [1, 1.2; 3, 2.5], "m"), [1, 0; 3, 0])
391 %!assert (lookup (4:-1:1, [1, 1.2; 3, 2.5], "m"), [4, 0; 2, 0])
392 %!assert (lookup (1:4, [1, 1.2; 3, 2.5], "b"), logical ([1, 0; 3, 0]))
393 %!assert (lookup (4:-1:1, [1, 1.2; 3, 2.5], "b"), logical ([4, 0; 2, 0]))
394 %!
395 %!assert (lookup ({"apple","lemon","orange"}, {"banana","kiwi"; "ananas","mango"}), [1,1;0,2])
396 %!assert (lookup ({"apple","lemon","orange"}, "potato"), 3)
397 %!assert (lookup ({"orange","lemon","apple"}, "potato"), 0)
398 */