18ca5c256SDaniel C. Sobral #include <stdio.h>
28ca5c256SDaniel C. Sobral #include <string.h>
38ca5c256SDaniel C. Sobral
4*e3bc7f4dSEnji Cooper #include "split.ih"
5*e3bc7f4dSEnji Cooper
68ca5c256SDaniel C. Sobral /*
78ca5c256SDaniel C. Sobral - split - divide a string into fields, like awk split()
816c284ecSEnji Cooper == int split(char *string, char *fields[], int nfields, char *sep);
98eba7ea3SEnji Cooper - fields: list is not NULL-terminated
108eba7ea3SEnji Cooper - nfields: number of entries available in fields[]
118eba7ea3SEnji Cooper - sep: "" white, "c" single char, "ab" [ab]+
128ca5c256SDaniel C. Sobral */
138ca5c256SDaniel C. Sobral int /* number of fields, including overflow */
split(char * string,char * fields[],int nfields,char * sep)148eba7ea3SEnji Cooper split(char *string, char *fields[], int nfields, char *sep)
158ca5c256SDaniel C. Sobral {
168fb3f3f6SDavid E. O'Brien char *p = string;
178fb3f3f6SDavid E. O'Brien char c; /* latest character */
188fb3f3f6SDavid E. O'Brien char sepc = sep[0];
198fb3f3f6SDavid E. O'Brien char sepc2;
208fb3f3f6SDavid E. O'Brien int fn;
218fb3f3f6SDavid E. O'Brien char **fp = fields;
228fb3f3f6SDavid E. O'Brien char *sepp;
238fb3f3f6SDavid E. O'Brien int trimtrail;
248ca5c256SDaniel C. Sobral
258ca5c256SDaniel C. Sobral /* white space */
268ca5c256SDaniel C. Sobral if (sepc == '\0') {
278ca5c256SDaniel C. Sobral while ((c = *p++) == ' ' || c == '\t')
288ca5c256SDaniel C. Sobral continue;
298ca5c256SDaniel C. Sobral p--;
308ca5c256SDaniel C. Sobral trimtrail = 1;
318ca5c256SDaniel C. Sobral sep = " \t"; /* note, code below knows this is 2 long */
328ca5c256SDaniel C. Sobral sepc = ' ';
338ca5c256SDaniel C. Sobral } else
348ca5c256SDaniel C. Sobral trimtrail = 0;
358ca5c256SDaniel C. Sobral sepc2 = sep[1]; /* now we can safely pick this up */
368ca5c256SDaniel C. Sobral
378ca5c256SDaniel C. Sobral /* catch empties */
388ca5c256SDaniel C. Sobral if (*p == '\0')
398ca5c256SDaniel C. Sobral return(0);
408ca5c256SDaniel C. Sobral
418ca5c256SDaniel C. Sobral /* single separator */
428ca5c256SDaniel C. Sobral if (sepc2 == '\0') {
438ca5c256SDaniel C. Sobral fn = nfields;
448ca5c256SDaniel C. Sobral for (;;) {
458ca5c256SDaniel C. Sobral *fp++ = p;
468ca5c256SDaniel C. Sobral fn--;
478ca5c256SDaniel C. Sobral if (fn == 0)
488ca5c256SDaniel C. Sobral break;
498ca5c256SDaniel C. Sobral while ((c = *p++) != sepc)
508ca5c256SDaniel C. Sobral if (c == '\0')
518ca5c256SDaniel C. Sobral return(nfields - fn);
528ca5c256SDaniel C. Sobral *(p-1) = '\0';
538ca5c256SDaniel C. Sobral }
548ca5c256SDaniel C. Sobral /* we have overflowed the fields vector -- just count them */
558ca5c256SDaniel C. Sobral fn = nfields;
568ca5c256SDaniel C. Sobral for (;;) {
578ca5c256SDaniel C. Sobral while ((c = *p++) != sepc)
588ca5c256SDaniel C. Sobral if (c == '\0')
598ca5c256SDaniel C. Sobral return(fn);
608ca5c256SDaniel C. Sobral fn++;
618ca5c256SDaniel C. Sobral }
628ca5c256SDaniel C. Sobral /* not reached */
638ca5c256SDaniel C. Sobral }
648ca5c256SDaniel C. Sobral
658ca5c256SDaniel C. Sobral /* two separators */
668ca5c256SDaniel C. Sobral if (sep[2] == '\0') {
678ca5c256SDaniel C. Sobral fn = nfields;
688ca5c256SDaniel C. Sobral for (;;) {
698ca5c256SDaniel C. Sobral *fp++ = p;
708ca5c256SDaniel C. Sobral fn--;
718ca5c256SDaniel C. Sobral while ((c = *p++) != sepc && c != sepc2)
728ca5c256SDaniel C. Sobral if (c == '\0') {
738ca5c256SDaniel C. Sobral if (trimtrail && **(fp-1) == '\0')
748ca5c256SDaniel C. Sobral fn++;
758ca5c256SDaniel C. Sobral return(nfields - fn);
768ca5c256SDaniel C. Sobral }
778ca5c256SDaniel C. Sobral if (fn == 0)
788ca5c256SDaniel C. Sobral break;
798ca5c256SDaniel C. Sobral *(p-1) = '\0';
808ca5c256SDaniel C. Sobral while ((c = *p++) == sepc || c == sepc2)
818ca5c256SDaniel C. Sobral continue;
828ca5c256SDaniel C. Sobral p--;
838ca5c256SDaniel C. Sobral }
848ca5c256SDaniel C. Sobral /* we have overflowed the fields vector -- just count them */
858ca5c256SDaniel C. Sobral fn = nfields;
868ca5c256SDaniel C. Sobral while (c != '\0') {
878ca5c256SDaniel C. Sobral while ((c = *p++) == sepc || c == sepc2)
888ca5c256SDaniel C. Sobral continue;
898ca5c256SDaniel C. Sobral p--;
908ca5c256SDaniel C. Sobral fn++;
918ca5c256SDaniel C. Sobral while ((c = *p++) != '\0' && c != sepc && c != sepc2)
928ca5c256SDaniel C. Sobral continue;
938ca5c256SDaniel C. Sobral }
948ca5c256SDaniel C. Sobral /* might have to trim trailing white space */
958ca5c256SDaniel C. Sobral if (trimtrail) {
968ca5c256SDaniel C. Sobral p--;
978ca5c256SDaniel C. Sobral while ((c = *--p) == sepc || c == sepc2)
988ca5c256SDaniel C. Sobral continue;
998ca5c256SDaniel C. Sobral p++;
1008ca5c256SDaniel C. Sobral if (*p != '\0') {
1018ca5c256SDaniel C. Sobral if (fn == nfields+1)
1028ca5c256SDaniel C. Sobral *p = '\0';
1038ca5c256SDaniel C. Sobral fn--;
1048ca5c256SDaniel C. Sobral }
1058ca5c256SDaniel C. Sobral }
1068ca5c256SDaniel C. Sobral return(fn);
1078ca5c256SDaniel C. Sobral }
1088ca5c256SDaniel C. Sobral
1098ca5c256SDaniel C. Sobral /* n separators */
1108ca5c256SDaniel C. Sobral fn = 0;
1118ca5c256SDaniel C. Sobral for (;;) {
1128ca5c256SDaniel C. Sobral if (fn < nfields)
1138ca5c256SDaniel C. Sobral *fp++ = p;
1148ca5c256SDaniel C. Sobral fn++;
1158ca5c256SDaniel C. Sobral for (;;) {
1168ca5c256SDaniel C. Sobral c = *p++;
1178ca5c256SDaniel C. Sobral if (c == '\0')
1188ca5c256SDaniel C. Sobral return(fn);
1198ca5c256SDaniel C. Sobral sepp = sep;
1208ca5c256SDaniel C. Sobral while ((sepc = *sepp++) != '\0' && sepc != c)
1218ca5c256SDaniel C. Sobral continue;
1228ca5c256SDaniel C. Sobral if (sepc != '\0') /* it was a separator */
1238ca5c256SDaniel C. Sobral break;
1248ca5c256SDaniel C. Sobral }
1258ca5c256SDaniel C. Sobral if (fn < nfields)
1268ca5c256SDaniel C. Sobral *(p-1) = '\0';
1278ca5c256SDaniel C. Sobral for (;;) {
1288ca5c256SDaniel C. Sobral c = *p++;
1298ca5c256SDaniel C. Sobral sepp = sep;
1308ca5c256SDaniel C. Sobral while ((sepc = *sepp++) != '\0' && sepc != c)
1318ca5c256SDaniel C. Sobral continue;
1328ca5c256SDaniel C. Sobral if (sepc == '\0') /* it wasn't a separator */
1338ca5c256SDaniel C. Sobral break;
1348ca5c256SDaniel C. Sobral }
1358ca5c256SDaniel C. Sobral p--;
1368ca5c256SDaniel C. Sobral }
1378ca5c256SDaniel C. Sobral
1388ca5c256SDaniel C. Sobral /* not reached */
1398ca5c256SDaniel C. Sobral }
1408ca5c256SDaniel C. Sobral
1418ca5c256SDaniel C. Sobral #ifdef TEST_SPLIT
1428ca5c256SDaniel C. Sobral
1438ca5c256SDaniel C. Sobral
1448ca5c256SDaniel C. Sobral /*
1458ca5c256SDaniel C. Sobral * test program
1468ca5c256SDaniel C. Sobral * pgm runs regression
1478ca5c256SDaniel C. Sobral * pgm sep splits stdin lines by sep
1488ca5c256SDaniel C. Sobral * pgm str sep splits str by sep
1498ca5c256SDaniel C. Sobral * pgm str sep n splits str by sep n times
1508ca5c256SDaniel C. Sobral */
1518ca5c256SDaniel C. Sobral int
main(int argc,char * argv[])1528eba7ea3SEnji Cooper main(int argc, char *argv[])
1538ca5c256SDaniel C. Sobral {
1548ca5c256SDaniel C. Sobral char buf[512];
1558fb3f3f6SDavid E. O'Brien int n;
1568ca5c256SDaniel C. Sobral # define MNF 10
1578ca5c256SDaniel C. Sobral char *fields[MNF];
1588ca5c256SDaniel C. Sobral
1598ca5c256SDaniel C. Sobral if (argc > 4)
1608ca5c256SDaniel C. Sobral for (n = atoi(argv[3]); n > 0; n--) {
1618ca5c256SDaniel C. Sobral (void) strcpy(buf, argv[1]);
1628ca5c256SDaniel C. Sobral }
1638ca5c256SDaniel C. Sobral else if (argc > 3)
1648ca5c256SDaniel C. Sobral for (n = atoi(argv[3]); n > 0; n--) {
1658ca5c256SDaniel C. Sobral (void) strcpy(buf, argv[1]);
1668ca5c256SDaniel C. Sobral (void) split(buf, fields, MNF, argv[2]);
1678ca5c256SDaniel C. Sobral }
1688ca5c256SDaniel C. Sobral else if (argc > 2)
1698ca5c256SDaniel C. Sobral dosplit(argv[1], argv[2]);
1708ca5c256SDaniel C. Sobral else if (argc > 1)
1718ca5c256SDaniel C. Sobral while (fgets(buf, sizeof(buf), stdin) != NULL) {
1728ca5c256SDaniel C. Sobral buf[strlen(buf)-1] = '\0'; /* stomp newline */
1738ca5c256SDaniel C. Sobral dosplit(buf, argv[1]);
1748ca5c256SDaniel C. Sobral }
1758ca5c256SDaniel C. Sobral else
1768ca5c256SDaniel C. Sobral regress();
1778ca5c256SDaniel C. Sobral
1788ca5c256SDaniel C. Sobral exit(0);
1798ca5c256SDaniel C. Sobral }
1808ca5c256SDaniel C. Sobral
1818eba7ea3SEnji Cooper void
dosplit(char * string,char * seps)1828eba7ea3SEnji Cooper dosplit(char *string, char *seps)
1838ca5c256SDaniel C. Sobral {
1848ca5c256SDaniel C. Sobral # define NF 5
1858ca5c256SDaniel C. Sobral char *fields[NF];
1868fb3f3f6SDavid E. O'Brien int nf;
1878ca5c256SDaniel C. Sobral
1888ca5c256SDaniel C. Sobral nf = split(string, fields, NF, seps);
1898ca5c256SDaniel C. Sobral print(nf, NF, fields);
1908ca5c256SDaniel C. Sobral }
1918ca5c256SDaniel C. Sobral
1928eba7ea3SEnji Cooper void
print(int nf,int nfp,char * fields[])1938eba7ea3SEnji Cooper print(int nf, int nfp, char *fields[])
1948ca5c256SDaniel C. Sobral {
1958fb3f3f6SDavid E. O'Brien int fn;
1968fb3f3f6SDavid E. O'Brien int bound;
1978ca5c256SDaniel C. Sobral
1988ca5c256SDaniel C. Sobral bound = (nf > nfp) ? nfp : nf;
1998ca5c256SDaniel C. Sobral printf("%d:\t", nf);
2008ca5c256SDaniel C. Sobral for (fn = 0; fn < bound; fn++)
2018ca5c256SDaniel C. Sobral printf("\"%s\"%s", fields[fn], (fn+1 < nf) ? ", " : "\n");
2028ca5c256SDaniel C. Sobral }
2038ca5c256SDaniel C. Sobral
2048ca5c256SDaniel C. Sobral #define RNF 5 /* some table entries know this */
2058ca5c256SDaniel C. Sobral struct {
2068ca5c256SDaniel C. Sobral char *str;
2078ca5c256SDaniel C. Sobral char *seps;
2088ca5c256SDaniel C. Sobral int nf;
2098ca5c256SDaniel C. Sobral char *fi[RNF];
2108ca5c256SDaniel C. Sobral } tests[] = {
2118ca5c256SDaniel C. Sobral "", " ", 0, { "" },
2128ca5c256SDaniel C. Sobral " ", " ", 2, { "", "" },
2138ca5c256SDaniel C. Sobral "x", " ", 1, { "x" },
2148ca5c256SDaniel C. Sobral "xy", " ", 1, { "xy" },
2158ca5c256SDaniel C. Sobral "x y", " ", 2, { "x", "y" },
2168ca5c256SDaniel C. Sobral "abc def g ", " ", 5, { "abc", "def", "", "g", "" },
2178ca5c256SDaniel C. Sobral " a bcd", " ", 4, { "", "", "a", "bcd" },
2188ca5c256SDaniel C. Sobral "a b c d e f", " ", 6, { "a", "b", "c", "d", "e f" },
2198ca5c256SDaniel C. Sobral " a b c d ", " ", 6, { "", "a", "b", "c", "d " },
2208ca5c256SDaniel C. Sobral
2218ca5c256SDaniel C. Sobral "", " _", 0, { "" },
2228ca5c256SDaniel C. Sobral " ", " _", 2, { "", "" },
2238ca5c256SDaniel C. Sobral "x", " _", 1, { "x" },
2248ca5c256SDaniel C. Sobral "x y", " _", 2, { "x", "y" },
2258ca5c256SDaniel C. Sobral "ab _ cd", " _", 2, { "ab", "cd" },
2268ca5c256SDaniel C. Sobral " a_b c ", " _", 5, { "", "a", "b", "c", "" },
2278ca5c256SDaniel C. Sobral "a b c_d e f", " _", 6, { "a", "b", "c", "d", "e f" },
2288ca5c256SDaniel C. Sobral " a b c d ", " _", 6, { "", "a", "b", "c", "d " },
2298ca5c256SDaniel C. Sobral
2308ca5c256SDaniel C. Sobral "", " _~", 0, { "" },
2318ca5c256SDaniel C. Sobral " ", " _~", 2, { "", "" },
2328ca5c256SDaniel C. Sobral "x", " _~", 1, { "x" },
2338ca5c256SDaniel C. Sobral "x y", " _~", 2, { "x", "y" },
2348ca5c256SDaniel C. Sobral "ab _~ cd", " _~", 2, { "ab", "cd" },
2358ca5c256SDaniel C. Sobral " a_b c~", " _~", 5, { "", "a", "b", "c", "" },
2368ca5c256SDaniel C. Sobral "a b_c d~e f", " _~", 6, { "a", "b", "c", "d", "e f" },
2378ca5c256SDaniel C. Sobral "~a b c d ", " _~", 6, { "", "a", "b", "c", "d " },
2388ca5c256SDaniel C. Sobral
2398ca5c256SDaniel C. Sobral "", " _~-", 0, { "" },
2408ca5c256SDaniel C. Sobral " ", " _~-", 2, { "", "" },
2418ca5c256SDaniel C. Sobral "x", " _~-", 1, { "x" },
2428ca5c256SDaniel C. Sobral "x y", " _~-", 2, { "x", "y" },
2438ca5c256SDaniel C. Sobral "ab _~- cd", " _~-", 2, { "ab", "cd" },
2448ca5c256SDaniel C. Sobral " a_b c~", " _~-", 5, { "", "a", "b", "c", "" },
2458ca5c256SDaniel C. Sobral "a b_c-d~e f", " _~-", 6, { "a", "b", "c", "d", "e f" },
2468ca5c256SDaniel C. Sobral "~a-b c d ", " _~-", 6, { "", "a", "b", "c", "d " },
2478ca5c256SDaniel C. Sobral
2488ca5c256SDaniel C. Sobral "", " ", 0, { "" },
2498ca5c256SDaniel C. Sobral " ", " ", 2, { "", "" },
2508ca5c256SDaniel C. Sobral "x", " ", 1, { "x" },
2518ca5c256SDaniel C. Sobral "xy", " ", 1, { "xy" },
2528ca5c256SDaniel C. Sobral "x y", " ", 2, { "x", "y" },
2538ca5c256SDaniel C. Sobral "abc def g ", " ", 4, { "abc", "def", "g", "" },
2548ca5c256SDaniel C. Sobral " a bcd", " ", 3, { "", "a", "bcd" },
2558ca5c256SDaniel C. Sobral "a b c d e f", " ", 6, { "a", "b", "c", "d", "e f" },
2568ca5c256SDaniel C. Sobral " a b c d ", " ", 6, { "", "a", "b", "c", "d " },
2578ca5c256SDaniel C. Sobral
2588ca5c256SDaniel C. Sobral "", "", 0, { "" },
2598ca5c256SDaniel C. Sobral " ", "", 0, { "" },
2608ca5c256SDaniel C. Sobral "x", "", 1, { "x" },
2618ca5c256SDaniel C. Sobral "xy", "", 1, { "xy" },
2628ca5c256SDaniel C. Sobral "x y", "", 2, { "x", "y" },
2638ca5c256SDaniel C. Sobral "abc def g ", "", 3, { "abc", "def", "g" },
2648ca5c256SDaniel C. Sobral "\t a bcd", "", 2, { "a", "bcd" },
2658ca5c256SDaniel C. Sobral " a \tb\t c ", "", 3, { "a", "b", "c" },
2668ca5c256SDaniel C. Sobral "a b c d e ", "", 5, { "a", "b", "c", "d", "e" },
2678ca5c256SDaniel C. Sobral "a b\tc d e f", "", 6, { "a", "b", "c", "d", "e f" },
2688ca5c256SDaniel C. Sobral " a b c d e f ", "", 6, { "a", "b", "c", "d", "e f " },
2698ca5c256SDaniel C. Sobral
2708ca5c256SDaniel C. Sobral NULL, NULL, 0, { NULL },
2718ca5c256SDaniel C. Sobral };
2728ca5c256SDaniel C. Sobral
2738eba7ea3SEnji Cooper void
regress(void)2748eba7ea3SEnji Cooper regress(void)
2758ca5c256SDaniel C. Sobral {
2768ca5c256SDaniel C. Sobral char buf[512];
2778fb3f3f6SDavid E. O'Brien int n;
2788ca5c256SDaniel C. Sobral char *fields[RNF+1];
2798fb3f3f6SDavid E. O'Brien int nf;
2808fb3f3f6SDavid E. O'Brien int i;
2818fb3f3f6SDavid E. O'Brien int printit;
2828fb3f3f6SDavid E. O'Brien char *f;
2838ca5c256SDaniel C. Sobral
2848ca5c256SDaniel C. Sobral for (n = 0; tests[n].str != NULL; n++) {
2858ca5c256SDaniel C. Sobral (void) strcpy(buf, tests[n].str);
2868ca5c256SDaniel C. Sobral fields[RNF] = NULL;
2878ca5c256SDaniel C. Sobral nf = split(buf, fields, RNF, tests[n].seps);
2888ca5c256SDaniel C. Sobral printit = 0;
2898ca5c256SDaniel C. Sobral if (nf != tests[n].nf) {
2908ca5c256SDaniel C. Sobral printf("split `%s' by `%s' gave %d fields, not %d\n",
2918ca5c256SDaniel C. Sobral tests[n].str, tests[n].seps, nf, tests[n].nf);
2928ca5c256SDaniel C. Sobral printit = 1;
2938ca5c256SDaniel C. Sobral } else if (fields[RNF] != NULL) {
2948ca5c256SDaniel C. Sobral printf("split() went beyond array end\n");
2958ca5c256SDaniel C. Sobral printit = 1;
2968ca5c256SDaniel C. Sobral } else {
2978ca5c256SDaniel C. Sobral for (i = 0; i < nf && i < RNF; i++) {
2988ca5c256SDaniel C. Sobral f = fields[i];
2998ca5c256SDaniel C. Sobral if (f == NULL)
3008ca5c256SDaniel C. Sobral f = "(NULL)";
3018ca5c256SDaniel C. Sobral if (strcmp(f, tests[n].fi[i]) != 0) {
3028ca5c256SDaniel C. Sobral printf("split `%s' by `%s' field %d is `%s', not `%s'\n",
3038ca5c256SDaniel C. Sobral tests[n].str, tests[n].seps,
3048ca5c256SDaniel C. Sobral i, fields[i], tests[n].fi[i]);
3058ca5c256SDaniel C. Sobral printit = 1;
3068ca5c256SDaniel C. Sobral }
3078ca5c256SDaniel C. Sobral }
3088ca5c256SDaniel C. Sobral }
3098ca5c256SDaniel C. Sobral if (printit)
3108ca5c256SDaniel C. Sobral print(nf, RNF, fields);
3118ca5c256SDaniel C. Sobral }
3128ca5c256SDaniel C. Sobral }
3138ca5c256SDaniel C. Sobral #endif
314