59 size_t len = s.length ();
65 if (s[j] ==
'\\' && j+1 < len)
115 size_t len = s.length ();
121 if (s[j] ==
'\\' && j+1 < len)
184 const std::string& who,
int skip,
bool& extra_args)
186 int nargin = args.
length ();
190 for (
int i = skip; i < nargin; i++)
194 if (args(i).is_string ())
195 str = args(i).string_value ();
198 error (
"%s: optional arguments must be strings", who.c_str ());
204 if (str.find (
"once", 0) == 0)
206 else if (str.find (
"matchcase", 0) == 0)
208 else if (str.find (
"ignorecase", 0) == 0)
210 else if (str.find (
"dotall", 0) == 0)
212 else if (str.find (
"stringanchors", 0) == 0)
214 else if (str.find (
"literalspacing", 0) == 0)
216 else if (str.find (
"noemptymatch", 0) == 0)
218 else if (str.find (
"dotexceptnewline", 0) == 0)
220 else if (str.find (
"lineanchors", 0) == 0)
222 else if (str.find (
"freespacing", 0) == 0)
224 else if (str.find (
"emptymatch", 0) == 0)
226 else if (str.find (
"start", 0) == 0
227 || str.find (
"end", 0) == 0
228 || str.find (
"tokenextents", 0) == 0
229 || str.find (
"match", 0) == 0
230 || str.find (
"tokens", 0) == 0
231 || str.find (
"names", 0) == 0
232 || str.find (
"split", 0) == 0)
235 error (
"%s: unrecognized option", who.c_str ());
241 const std::string &who,
bool case_insensitive =
false)
245 int nargin = args.
length ();
248 const std::string buffer = args(0).string_value ();
252 std::string pattern = args(1).string_value ();
256 if (args(1).is_sq_string ())
261 bool extra_options =
false;
270 size_t sz = rx_lst.
size ();
285 for (
int j = 0; j < named_pats.
length (); j++)
286 nmap.
assign (named_pats(j), named_tokens(j));
292 for (
int j = 0; j < named_pats.
length (); j++)
298 p != rx_lst.
end (); p++)
302 tmp(i++) = named_tokens(j);
315 retval(4) = sz ? p->tokens () :
Cell ();
316 retval(3) = sz ? p->match_string () : std::string ();
317 retval(2) = sz ? p->token_extents () :
Matrix ();
321 double start = p->start ();
322 double end = p->end ();
325 split(0) = buffer.substr (0, start-1);
326 split(1) = buffer.substr (end);
351 p != rx_lst.
end (); p++)
353 double s = p->start ();
354 double e = p->end ();
358 match_string(i) = p->match_string ();
359 token_extents(i) = p->token_extents ();
362 split(i) = buffer.substr (sp_start, s-sp_start-1);
367 split(i) = buffer.substr (sp_start);
371 retval(3) = match_string;
372 retval(2) = token_extents;
383 new_retval.
resize (nargout);
386 for (
int j = 0; j < 6; j++)
389 for (
int j = 2; j < nargin; j++)
392 std::string str = args(j).string_value ();
395 if (str.find (
"once", 0) == 0
396 || str.find (
"stringanchors", 0) == 0
397 || str.find (
"lineanchors", 0) == 0
398 || str.find (
"matchcase", 0) == 0
399 || str.find (
"ignorecase", 0) == 0
400 || str.find (
"dotall", 0) == 0
401 || str.find (
"dotexceptnewline", 0) == 0
402 || str.find (
"literalspacing", 0) == 0
403 || str.find (
"freespacing", 0) == 0
404 || str.find (
"noemptymatch", 0) == 0
405 || str.find (
"emptymatch", 0) == 0)
407 else if (str.find (
"start", 0) == 0)
409 else if (str.find (
"end", 0) == 0)
411 else if (str.find (
"tokenextents", 0) == 0)
413 else if (str.find (
"match", 0) == 0)
415 else if (str.find (
"tokens", 0) == 0)
417 else if (str.find (
"names", 0) == 0)
419 else if (str.find (
"split", 0) == 0)
422 new_retval(n++) = retval(k);
432 for (
int j = 0; j < 6; j++)
435 new_retval(n++) = retval(j);
448 const std::string &who,
bool case_insensitive =
false)
452 if (args(0).is_cell ())
457 if (args(1).is_cell ())
461 if (cellpat.
numel () == 1)
463 for (
int j = 0; j < nargout; j++)
464 newretval[j].resize (cellstr.
dims ());
466 new_args(1) = cellpat(0);
470 new_args(0) = cellstr(i);
477 for (
int j = 0; j < nargout; j++)
478 newretval[j](i) = tmp(j);
481 else if (cellstr.
numel () == 1)
483 for (
int j = 0; j < nargout; j++)
484 newretval[j].resize (cellpat.
dims ());
486 new_args(0) = cellstr(0);
490 new_args(1) = cellpat(i);
497 for (
int j = 0; j < nargout; j++)
498 newretval[j](i) = tmp(j);
501 else if (cellstr.
numel () == cellpat.
numel ())
504 if (cellstr.
dims () != cellpat.
dims ())
505 error (
"%s: inconsistent cell array dimensions", who.c_str ());
508 for (
int j = 0; j < nargout; j++)
509 newretval[j].resize (cellstr.
dims ());
513 new_args(0) = cellstr(i);
514 new_args(1) = cellpat(i);
522 for (
int j = 0; j < nargout; j++)
523 newretval[j](i) = tmp(j);
528 error (
"regexp: cell array arguments must be scalar or equal size");
532 for (
int j = 0; j < nargout; j++)
533 newretval[j].resize (cellstr.
dims ());
537 new_args(0) = cellstr(i);
544 for (
int j = 0; j < nargout; j++)
545 newretval[j](i) = tmp(j);
550 for (
int j = 0; j < nargout; j++)
553 else if (args(1).is_cell ())
559 for (
int j = 0; j < nargout; j++)
560 newretval[j].resize (cellpat.
dims ());
564 new_args(1) = cellpat(i);
571 for (
int j = 0; j < nargout; j++)
572 newretval[j](i) = tmp(j);
577 for (
int j = 0; j < nargout; j++)
582 retval =
octregexp (args, nargout, who, case_insensitive);
590 @deftypefn {Built-in Function} {[@var{s}, @var{e}, @var{te}, @var{m}, @var{t}, @var{nm}, @var{sp}] =} regexp (@var{str}, @var{pat})\n\
591 @deftypefnx {Built-in Function} {[@dots{}] =} regexp (@var{str}, @var{pat}, \"@var{opt1}\", @dots{})\n\
592 Regular expression string matching.\n\
594 Search for @var{pat} in @var{str} and return the positions and substrings of\n\
595 any matches, or empty values if there are none.\n\
597 The matched pattern @var{pat} can include any of the standard regex\n\
598 operators, including:\n\
602 Match any character\n\
605 Repetition operators, representing\n\
609 Match zero or more times\n\
612 Match one or more times\n\
615 Match zero or one times\n\
618 Match exactly @var{n} times\n\
620 @item @{@var{n},@}\n\
621 Match @var{n} or more times\n\
623 @item @{@var{m},@var{n}@}\n\
624 Match between @var{m} and @var{n} times\n\
627 @item [@dots{}] [^@dots{}]\n\
629 List operators. The pattern will match any character listed between \"[\"\n\
630 and \"]\". If the first character is \"^\" then the pattern is inverted and\n\
631 any character except those listed between brackets will match.\n\
633 Escape sequences defined below can also be used inside list operators. For\n\
634 example, a template for a floating point number might be @code{[-+.\\d]+}.\n\
637 Grouping operator. The first form, parentheses only, also creates a token.\n\
640 Alternation operator. Match one of a choice of regular expressions. The\n\
641 alternatives must be delimited by the grouping operator @code{()} above.\n\
644 Anchoring operators. Requires pattern to occur at the start (@code{^}) or\n\
645 end (@code{$}) of the string.\n\
648 In addition, the following escaped characters have special meaning.\n\
656 Match any non-digit\n\
659 Match any whitespace character\n\
662 Match any non-whitespace character\n\
665 Match any word character\n\
668 Match any non-word character\n\
671 Match the beginning of a word\n\
674 Match the end of a word\n\
677 Match within a word\n\
680 Implementation Note: For compatibility with @sc{matlab}, escape sequences\n\
681 in @var{pat} (e.g., @qcode{\"@xbackslashchar{}n\"} => newline) are expanded\n\
682 even when @var{pat} has been defined with single quotes. To disable\n\
683 expansion use a second backslash before the escape sequence (e.g.,\n\
684 \"@xbackslashchar{}@xbackslashchar{}n\") or use the @code{regexptranslate}\n\
687 The outputs of @code{regexp} default to the order given below\n\
691 The start indices of each matching substring\n\
694 The end indices of each matching substring\n\
697 The extents of each matched token surrounded by @code{(@dots{})} in\n\
701 A cell array of the text of each match\n\
704 A cell array of the text of each token matched\n\
707 A structure containing the text of each matched named token, with the name\n\
708 being used as the fieldname. A named token is denoted by\n\
709 @code{(?<name>@dots{})}.\n\
712 A cell array of the text not returned by match, i.e., what remains if you\n\
713 split the string based on @var{pat}.\n\
716 Particular output arguments, or the order of the output arguments, can be\n\
717 selected by additional @var{opt} arguments. These are strings and the\n\
718 correspondence between the output arguments and the optional argument\n\
721 @multitable @columnfractions 0.2 0.3 0.3 0.2\n\
722 @item @tab @qcode{'start'} @tab @var{s} @tab\n\
723 @item @tab @qcode{'end'} @tab @var{e} @tab\n\
724 @item @tab @qcode{'tokenExtents'} @tab @var{te} @tab\n\
725 @item @tab @qcode{'match'} @tab @var{m} @tab\n\
726 @item @tab @qcode{'tokens'} @tab @var{t} @tab\n\
727 @item @tab @qcode{'names'} @tab @var{nm} @tab\n\
728 @item @tab @qcode{'split'} @tab @var{sp} @tab\n\
731 Additional arguments are summarized below.\n\
735 Return only the first occurrence of the pattern.\n\
738 Make the matching case sensitive. (default)\n\
740 Alternatively, use (?-i) in the pattern.\n\
743 Ignore case when matching the pattern to the string.\n\
745 Alternatively, use (?i) in the pattern.\n\
747 @item stringanchors\n\
748 Match the anchor characters at the beginning and end of the string.\n\
751 Alternatively, use (?-m) in the pattern.\n\
754 Match the anchor characters at the beginning and end of the line.\n\
756 Alternatively, use (?m) in the pattern.\n\
759 The pattern @code{.} matches all characters including the newline character.\n\
762 Alternatively, use (?s) in the pattern.\n\
764 @item dotexceptnewline\n\
765 The pattern @code{.} matches all characters except the newline character.\n\
767 Alternatively, use (?-s) in the pattern.\n\
769 @item literalspacing\n\
770 All characters in the pattern, including whitespace, are significant and are\n\
771 used in pattern matching. (default)\n\
773 Alternatively, use (?-x) in the pattern.\n\
776 The pattern may include arbitrary whitespace and also comments beginning with\n\
777 the character @samp{#}.\n\
779 Alternatively, use (?x) in the pattern.\n\
781 @item noemptymatch\n\
782 Zero-length matches are not returned. (default)\n\
785 Return zero-length matches.\n\
787 @code{regexp ('a', 'b*', 'emptymatch')} returns @code{[1 2]} because there\n\
788 are zero or more @qcode{'b'} characters at positions 1 and end-of-string.\n\
791 @seealso{regexpi, strfind, regexprep}\n\
796 int nargin = args.
length ();
800 else if (args(0).is_cell () || args(1).is_cell ())
801 retval =
octcellregexp (args, (nargout > 0 ? nargout : 1),
"regexp");
803 retval =
octregexp (args, nargout,
"regexp");
1070 DEFUN (regexpi, args, nargout,
1072 @deftypefn {Built-in Function} {[@var{s}, @var{e}, @var{te}, @var{m}, @var{t}, @var{nm}, @var{sp}] =} regexpi (@var{str}, @var{pat})\n\
1073 @deftypefnx {Built-in Function} {[@dots{}] =} regexpi (@var{str}, @var{pat}, \"@var{opt1}\", @dots{})\n\
1075 Case insensitive regular expression string matching.\n\
1077 Search for @var{pat} in @var{str} and return the positions and substrings of\n\
1078 any matches, or empty values if there are none. @xref{XREFregexp,,regexp},\n\
1079 for details on the syntax of the search pattern.\n\
1085 int nargin = args.
length ();
1089 else if (args(0).is_cell () || args(1).is_cell ())
1090 retval =
octcellregexp (args, (nargout > 0 ? nargout : 1),
"regexpi",
true);
1092 retval =
octregexp (args, nargout,
"regexpi",
true);
1236 int nargin = args.
length ();
1239 const std::string buffer = args(0).string_value ();
1243 std::string pattern = args(1).string_value ();
1247 if (args(1).is_sq_string ())
1250 std::string replacement = args(2).string_value ();
1254 if (args(2).is_sq_string ())
1262 for (
int i = 3; i < nargin; i++)
1264 const std::string opt = args(i).string_value ();
1265 if (opt !=
"tokenize" && opt !=
"start" && opt !=
"end"
1266 && opt !=
"tokenextents" && opt !=
"match" && opt !=
"tokens"
1267 && opt !=
"names" && opt !=
"split" && opt !=
"warnings")
1269 regexpargs(len++) = args(i);
1275 bool extra_args =
false;
1280 return regexp_replace (pattern, buffer, replacement, options, who);
1283 DEFUN (regexprep, args, ,
1285 @deftypefn {Built-in Function} {@var{outstr} =} regexprep (@var{string}, @var{pat}, @var{repstr})\n\
1286 @deftypefnx {Built-in Function} {@var{outstr} =} regexprep (@var{string}, @var{pat}, @var{repstr}, \"@var{opt1}\", @dots{})\n\
1287 Replace occurrences of pattern @var{pat} in @var{string} with @var{repstr}.\n\
1289 The pattern is a regular expression as documented for @code{regexp}.\n\
1290 @xref{XREFregexp,,regexp}.\n\
1292 The replacement string may contain @code{$i}, which substitutes for the ith\n\
1293 set of parentheses in the match string. For example,\n\
1296 regexprep (\"Bill Dunn\", '(\\w+) (\\w+)', '$2, $1')\n\
1300 returns \"Dunn, Bill\"\n\
1302 Options in addition to those of @code{regexp} are\n\
1307 Replace only the first occurrence of @var{pat} in the result.\n\
1310 This option is present for compatibility but is ignored.\n\
1314 Implementation Note: For compatibility with @sc{matlab}, escape sequences\n\
1315 in @var{pat} (e.g., @qcode{\"@xbackslashchar{}n\"} => newline) are expanded\n\
1316 even when @var{pat} has been defined with single quotes. To disable\n\
1317 expansion use a second backslash before the escape sequence (e.g.,\n\
1318 \"@xbackslashchar{}@xbackslashchar{}n\") or use the @code{regexptranslate}\n\
1320 @seealso{regexp, regexpi, strrep}\n\
1324 int nargin = args.
length ();
1332 if (args(0).is_cell () || args(1).is_cell () || args(2).is_cell ())
1340 if (args(0).is_cell ())
1341 str = args(0).cell_value ();
1343 str =
Cell (args(0));
1345 if (args(1).is_cell ())
1346 pat = args(1).cell_value ();
1348 pat =
Cell (args(1));
1350 if (args(2).is_cell ())
1351 rep = args(2).cell_value ();
1353 rep =
Cell (args(2));
1356 if (pat.
numel () != 1)
1359 if (rep.
numel () != 1 && dv1 != rep.
dims ())
1360 error (
"regexprep: inconsistent cell array dimensions");
1362 else if (rep.
numel () != 1)
1372 new_args(0) = str(i);
1373 if (pat.
numel () == 1)
1374 new_args(1) = pat(0);
1375 if (rep.
numel () == 1)
1376 new_args(2) = rep(0);
1380 if (pat.
numel () != 1)
1381 new_args(1) = pat(j);
1382 if (rep.
numel () != 1)
1383 new_args(2) = rep(j);
1393 ret(i) = new_args(0);
void emptymatch(bool val)
OCTINTERP_API void print_usage(void)
octave_idx_type numel(void) const
Number of elements in the array.
octave_idx_type length(void) const
regexp::match_data regexp_match(const std::string &pat, const std::string &buffer, const regexp::opts &opt=regexp::opts(), const std::string &who="regexp")
string_vector named_patterns(void)
void dotexceptnewline(bool val)
#define DEFUN(name, args_name, nargout_name, doc)
void error(const char *fmt,...)
void freespacing(bool val)
static octave_value_list octcellregexp(const octave_value_list &args, int nargout, const std::string &who, bool case_insensitive=false)
octave_idx_type numel(int n=0) const
Number of elements that a matrix with this dimensions would have.
static std::string do_regexp_rep_string_escapes(const std::string &s)
Cell cell_value(void) const
const dim_vector & dims(void) const
Return a const-reference so that dims ()(i) works efficiently.
std::string regexp_replace(const std::string &pat, const std::string &buffer, const std::string &replacement, const regexp::opts &opt=regexp::opts(), const std::string &who="regexp")
static octave_value_list octregexp(const octave_value_list &args, int nargout, const std::string &who, bool case_insensitive=false)
static std::string do_regexp_ptn_string_escapes(const std::string &s)
std::list< match_element >::const_iterator const_iterator
static void parse_options(regexp::opts &options, const octave_value_list &args, const std::string &who, int skip, bool &extra_args)
octave_idx_type length(void) const
Number of elements in the array.
void assign(const std::string &k, const octave_value &val)
void case_insensitive(bool val)
#define OCTAVE_LOCAL_BUFFER(T, buf, size)
void lineanchors(bool val)
void resize(octave_idx_type n, const octave_value &rfv=octave_value())
ColumnVector transform(const Matrix &m, double x, double y, double z)
static octave_value octregexprep(const octave_value_list &args, const std::string &who)
return octave_value(v1.char_array_value().concat(v2.char_array_value(), ra_idx),((a1.is_sq_string()||a2.is_sq_string())? '\'': '"'))