/*
* call-seq:
* str.split(pattern=$;, [limit]) => anArray
*
* Divides <i>str</i> into substrings based on a delimiter, returning an array
* of these substrings.
*
* If <i>pattern</i> is a <code>String</code>, then its contents are used as
* the delimiter when splitting <i>str</i>. If <i>pattern</i> is a single
* space, <i>str</i> is split on whitespace, with leading whitespace and runs
* of contiguous whitespace characters ignored.
*
* If <i>pattern</i> is a <code>Regexp</code>, <i>str</i> is divided where the
* pattern matches. Whenever the pattern matches a zero-length string,
* <i>str</i> is split into individual characters.
*
* If <i>pattern</i> is omitted, the value of <code>$;</code> is used. If
* <code>$;</code> is <code>nil</code> (which is the default), <i>str</i> is
* split on whitespace as if ` ' were specified.
*
* If the <i>limit</i> parameter is omitted, trailing null fields are
* suppressed. If <i>limit</i> is a positive number, at most that number of
* fields will be returned (if <i>limit</i> is <code>1</code>, the entire
* string is returned as the only entry in an array). If negative, there is no
* limit to the number of fields returned, and trailing null fields are not
* suppressed.
*
* " now's the time".split #=> ["now's", "the", "time"]
* " now's the time".split(' ') #=> ["now's", "the", "time"]
* " now's the time".split(/ /) #=> ["", "now's", "", "the", "time"]
* "1, 2.34,56, 7".split(%r{,\s*}) #=> ["1", "2.34", "56", "7"]
* "hello".split(//) #=> ["h", "e", "l", "l", "o"]
* "hello".split(//, 3) #=> ["h", "e", "llo"]
* "hi mom".split(%r{\s*}) #=> ["h", "i", "m", "o", "m"]
*
* "mellow yellow".split("ello") #=> ["m", "w y", "w"]
* "1,2,,3,4,,".split(',') #=> ["1", "2", "", "3", "4"]
* "1,2,,3,4,,".split(',', 4) #=> ["1", "2", "", "3,4,,"]
* "1,2,,3,4,,".split(',', -4) #=> ["1", "2", "", "3", "4", "", ""]
*/
static VALUE
rb_str_split_m(argc, argv, str)
int argc;
VALUE *argv;
VALUE str;
{
VALUE spat;
VALUE limit;
int awk_split = Qfalse;
long beg, end, i = 0;
int lim = 0;
VALUE result, tmp;
if (rb_scan_args(argc, argv, "02", &spat, &limit) == 2) {
lim = NUM2INT(limit);
if (lim <= 0) limit = Qnil;
else if (lim == 1) {
if (RSTRING(str)->len == 0)
return rb_ary_new2(0);
return rb_ary_new3(1, str);
}
i = 1;
}
if (NIL_P(spat)) {
if (!NIL_P(rb_fs)) {
spat = rb_fs;
goto fs_set;
}
awk_split = Qtrue;
}
else {
fs_set:
if (TYPE(spat) == T_STRING && RSTRING(spat)->len == 1) {
if (RSTRING(spat)->ptr[0] == ' ') {
awk_split = Qtrue;
}
else {
spat = rb_reg_regcomp(rb_reg_quote(spat));
}
}
else {
spat = get_pat(spat, 1);
}
}
result = rb_ary_new();
beg = 0;
if (awk_split) {
char *ptr = RSTRING(str)->ptr;
long len = RSTRING(str)->len;
char *eptr = ptr + len;
int skip = 1;
for (end = beg = 0; ptr<eptr; ptr++) {
if (skip) {
if (ISSPACE(*ptr)) {
beg++;
}
else {
end = beg+1;
skip = 0;
if (!NIL_P(limit) && lim <= i) break;
}
}
else {
if (ISSPACE(*ptr)) {
rb_ary_push(result, rb_str_substr(str, beg, end-beg));
skip = 1;
beg = end + 1;
if (!NIL_P(limit)) ++i;
}
else {
end++;
}
}
}
}
else {
long start = beg;
long idx;
int last_null = 0;
struct re_registers *regs;
while ((end = rb_reg_search(spat, str, start, 0)) >= 0) {
regs = RMATCH(rb_backref_get())->regs;
if (start == end && BEG(0) == END(0)) {
if (!RSTRING(str)->ptr) {
rb_ary_push(result, rb_str_new("", 0));
break;
}
else if (last_null == 1) {
rb_ary_push(result, rb_str_substr(str, beg, mbclen2(RSTRING(str)->ptr[beg],spat)));
beg = start;
}
else {
start += mbclen2(RSTRING(str)->ptr[start],spat);
last_null = 1;
continue;
}
}
else {
rb_ary_push(result, rb_str_substr(str, beg, end-beg));
beg = start = END(0);
}
last_null = 0;
for (idx=1; idx < regs->num_regs; idx++) {
if (BEG(idx) == -1) continue;
if (BEG(idx) == END(idx))
tmp = rb_str_new5(str, 0, 0);
else
tmp = rb_str_substr(str, BEG(idx), END(idx)-BEG(idx));
rb_ary_push(result, tmp);
}
if (!NIL_P(limit) && lim <= ++i) break;
}
}
if (RSTRING(str)->len > 0 && (!NIL_P(limit) || RSTRING(str)->len > beg || lim < 0)) {
if (RSTRING(str)->len == beg)
tmp = rb_str_new5(str, 0, 0);
else
tmp = rb_str_substr(str, beg, RSTRING(str)->len-beg);
rb_ary_push(result, tmp);
}
if (NIL_P(limit) && lim == 0) {
while (RARRAY(result)->len > 0 &&
RSTRING(RARRAY(result)->ptr[RARRAY(result)->len-1])->len == 0)
rb_ary_pop(result);
}
return result;
}