/* * call-seq: * str.split(pattern=$;, [limit]) => anArray * * Divides <i>str</i> into substrings based on a delimiter, returning an array * of these substrings. * * If <i>pattern</i> is a <code>String</code>, then its contents are used as * the delimiter when splitting <i>str</i>. If <i>pattern</i> is a single * space, <i>str</i> is split on whitespace, with leading whitespace and runs * of contiguous whitespace characters ignored. * * If <i>pattern</i> is a <code>Regexp</code>, <i>str</i> is divided where the * pattern matches. Whenever the pattern matches a zero-length string, * <i>str</i> is split into individual characters. * * If <i>pattern</i> is omitted, the value of <code>$;</code> is used. If * <code>$;</code> is <code>nil</code> (which is the default), <i>str</i> is * split on whitespace as if ` ' were specified. * * If the <i>limit</i> parameter is omitted, trailing null fields are * suppressed. If <i>limit</i> is a positive number, at most that number of * fields will be returned (if <i>limit</i> is <code>1</code>, the entire * string is returned as the only entry in an array). If negative, there is no * limit to the number of fields returned, and trailing null fields are not * suppressed. * * " now's the time".split #=> ["now's", "the", "time"] * " now's the time".split(' ') #=> ["now's", "the", "time"] * " now's the time".split(/ /) #=> ["", "now's", "", "the", "time"] * "1, 2.34,56, 7".split(%r{,\s*}) #=> ["1", "2.34", "56", "7"] * "hello".split(//) #=> ["h", "e", "l", "l", "o"] * "hello".split(//, 3) #=> ["h", "e", "llo"] * "hi mom".split(%r{\s*}) #=> ["h", "i", "m", "o", "m"] * * "mellow yellow".split("ello") #=> ["m", "w y", "w"] * "1,2,,3,4,,".split(',') #=> ["1", "2", "", "3", "4"] * "1,2,,3,4,,".split(',', 4) #=> ["1", "2", "", "3,4,,"] * "1,2,,3,4,,".split(',', -4) #=> ["1", "2", "", "3", "4", "", ""] */ static VALUE rb_str_split_m(argc, argv, str) int argc; VALUE *argv; VALUE str; { VALUE spat; VALUE limit; int awk_split = Qfalse; long beg, end, i = 0; int lim = 0; VALUE result, tmp; if (rb_scan_args(argc, argv, "02", &spat, &limit) == 2) { lim = NUM2INT(limit); if (lim <= 0) limit = Qnil; else if (lim == 1) { if (RSTRING(str)->len == 0) return rb_ary_new2(0); return rb_ary_new3(1, str); } i = 1; } if (NIL_P(spat)) { if (!NIL_P(rb_fs)) { spat = rb_fs; goto fs_set; } awk_split = Qtrue; } else { fs_set: if (TYPE(spat) == T_STRING && RSTRING(spat)->len == 1) { if (RSTRING(spat)->ptr[0] == ' ') { awk_split = Qtrue; } else { spat = rb_reg_regcomp(rb_reg_quote(spat)); } } else { spat = get_pat(spat, 1); } } result = rb_ary_new(); beg = 0; if (awk_split) { char *ptr = RSTRING(str)->ptr; long len = RSTRING(str)->len; char *eptr = ptr + len; int skip = 1; for (end = beg = 0; ptr<eptr; ptr++) { if (skip) { if (ISSPACE(*ptr)) { beg++; } else { end = beg+1; skip = 0; if (!NIL_P(limit) && lim <= i) break; } } else { if (ISSPACE(*ptr)) { rb_ary_push(result, rb_str_substr(str, beg, end-beg)); skip = 1; beg = end + 1; if (!NIL_P(limit)) ++i; } else { end++; } } } } else { long start = beg; long idx; int last_null = 0; struct re_registers *regs; while ((end = rb_reg_search(spat, str, start, 0)) >= 0) { regs = RMATCH(rb_backref_get())->regs; if (start == end && BEG(0) == END(0)) { if (!RSTRING(str)->ptr) { rb_ary_push(result, rb_str_new("", 0)); break; } else if (last_null == 1) { rb_ary_push(result, rb_str_substr(str, beg, mbclen2(RSTRING(str)->ptr[beg],spat))); beg = start; } else { start += mbclen2(RSTRING(str)->ptr[start],spat); last_null = 1; continue; } } else { rb_ary_push(result, rb_str_substr(str, beg, end-beg)); beg = start = END(0); } last_null = 0; for (idx=1; idx < regs->num_regs; idx++) { if (BEG(idx) == -1) continue; if (BEG(idx) == END(idx)) tmp = rb_str_new5(str, 0, 0); else tmp = rb_str_substr(str, BEG(idx), END(idx)-BEG(idx)); rb_ary_push(result, tmp); } if (!NIL_P(limit) && lim <= ++i) break; } } if (RSTRING(str)->len > 0 && (!NIL_P(limit) || RSTRING(str)->len > beg || lim < 0)) { if (RSTRING(str)->len == beg) tmp = rb_str_new5(str, 0, 0); else tmp = rb_str_substr(str, beg, RSTRING(str)->len-beg); rb_ary_push(result, tmp); } if (NIL_P(limit) && lim == 0) { while (RARRAY(result)->len > 0 && RSTRING(RARRAY(result)->ptr[RARRAY(result)->len-1])->len == 0) rb_ary_pop(result); } return result; }