1333fc21eSDavid E. O'Brien #include <sys/cdefs.h> 2333fc21eSDavid E. O'Brien __FBSDID("$FreeBSD$"); 3333fc21eSDavid E. O'Brien 48ca5c256SDaniel C. Sobral #include <stdio.h> 58ca5c256SDaniel C. Sobral #include <string.h> 68ca5c256SDaniel C. Sobral 7*e3bc7f4dSEnji Cooper #include "split.ih" 8*e3bc7f4dSEnji Cooper 98ca5c256SDaniel C. Sobral /* 108ca5c256SDaniel C. Sobral - split - divide a string into fields, like awk split() 1116c284ecSEnji Cooper == int split(char *string, char *fields[], int nfields, char *sep); 128eba7ea3SEnji Cooper - fields: list is not NULL-terminated 138eba7ea3SEnji Cooper - nfields: number of entries available in fields[] 148eba7ea3SEnji Cooper - sep: "" white, "c" single char, "ab" [ab]+ 158ca5c256SDaniel C. Sobral */ 168ca5c256SDaniel C. Sobral int /* number of fields, including overflow */ 178eba7ea3SEnji Cooper split(char *string, char *fields[], int nfields, char *sep) 188ca5c256SDaniel C. Sobral { 198fb3f3f6SDavid E. O'Brien char *p = string; 208fb3f3f6SDavid E. O'Brien char c; /* latest character */ 218fb3f3f6SDavid E. O'Brien char sepc = sep[0]; 228fb3f3f6SDavid E. O'Brien char sepc2; 238fb3f3f6SDavid E. O'Brien int fn; 248fb3f3f6SDavid E. O'Brien char **fp = fields; 258fb3f3f6SDavid E. O'Brien char *sepp; 268fb3f3f6SDavid E. O'Brien int trimtrail; 278ca5c256SDaniel C. Sobral 288ca5c256SDaniel C. Sobral /* white space */ 298ca5c256SDaniel C. Sobral if (sepc == '\0') { 308ca5c256SDaniel C. Sobral while ((c = *p++) == ' ' || c == '\t') 318ca5c256SDaniel C. Sobral continue; 328ca5c256SDaniel C. Sobral p--; 338ca5c256SDaniel C. Sobral trimtrail = 1; 348ca5c256SDaniel C. Sobral sep = " \t"; /* note, code below knows this is 2 long */ 358ca5c256SDaniel C. Sobral sepc = ' '; 368ca5c256SDaniel C. Sobral } else 378ca5c256SDaniel C. Sobral trimtrail = 0; 388ca5c256SDaniel C. Sobral sepc2 = sep[1]; /* now we can safely pick this up */ 398ca5c256SDaniel C. Sobral 408ca5c256SDaniel C. Sobral /* catch empties */ 418ca5c256SDaniel C. Sobral if (*p == '\0') 428ca5c256SDaniel C. Sobral return(0); 438ca5c256SDaniel C. Sobral 448ca5c256SDaniel C. Sobral /* single separator */ 458ca5c256SDaniel C. Sobral if (sepc2 == '\0') { 468ca5c256SDaniel C. Sobral fn = nfields; 478ca5c256SDaniel C. Sobral for (;;) { 488ca5c256SDaniel C. Sobral *fp++ = p; 498ca5c256SDaniel C. Sobral fn--; 508ca5c256SDaniel C. Sobral if (fn == 0) 518ca5c256SDaniel C. Sobral break; 528ca5c256SDaniel C. Sobral while ((c = *p++) != sepc) 538ca5c256SDaniel C. Sobral if (c == '\0') 548ca5c256SDaniel C. Sobral return(nfields - fn); 558ca5c256SDaniel C. Sobral *(p-1) = '\0'; 568ca5c256SDaniel C. Sobral } 578ca5c256SDaniel C. Sobral /* we have overflowed the fields vector -- just count them */ 588ca5c256SDaniel C. Sobral fn = nfields; 598ca5c256SDaniel C. Sobral for (;;) { 608ca5c256SDaniel C. Sobral while ((c = *p++) != sepc) 618ca5c256SDaniel C. Sobral if (c == '\0') 628ca5c256SDaniel C. Sobral return(fn); 638ca5c256SDaniel C. Sobral fn++; 648ca5c256SDaniel C. Sobral } 658ca5c256SDaniel C. Sobral /* not reached */ 668ca5c256SDaniel C. Sobral } 678ca5c256SDaniel C. Sobral 688ca5c256SDaniel C. Sobral /* two separators */ 698ca5c256SDaniel C. Sobral if (sep[2] == '\0') { 708ca5c256SDaniel C. Sobral fn = nfields; 718ca5c256SDaniel C. Sobral for (;;) { 728ca5c256SDaniel C. Sobral *fp++ = p; 738ca5c256SDaniel C. Sobral fn--; 748ca5c256SDaniel C. Sobral while ((c = *p++) != sepc && c != sepc2) 758ca5c256SDaniel C. Sobral if (c == '\0') { 768ca5c256SDaniel C. Sobral if (trimtrail && **(fp-1) == '\0') 778ca5c256SDaniel C. Sobral fn++; 788ca5c256SDaniel C. Sobral return(nfields - fn); 798ca5c256SDaniel C. Sobral } 808ca5c256SDaniel C. Sobral if (fn == 0) 818ca5c256SDaniel C. Sobral break; 828ca5c256SDaniel C. Sobral *(p-1) = '\0'; 838ca5c256SDaniel C. Sobral while ((c = *p++) == sepc || c == sepc2) 848ca5c256SDaniel C. Sobral continue; 858ca5c256SDaniel C. Sobral p--; 868ca5c256SDaniel C. Sobral } 878ca5c256SDaniel C. Sobral /* we have overflowed the fields vector -- just count them */ 888ca5c256SDaniel C. Sobral fn = nfields; 898ca5c256SDaniel C. Sobral while (c != '\0') { 908ca5c256SDaniel C. Sobral while ((c = *p++) == sepc || c == sepc2) 918ca5c256SDaniel C. Sobral continue; 928ca5c256SDaniel C. Sobral p--; 938ca5c256SDaniel C. Sobral fn++; 948ca5c256SDaniel C. Sobral while ((c = *p++) != '\0' && c != sepc && c != sepc2) 958ca5c256SDaniel C. Sobral continue; 968ca5c256SDaniel C. Sobral } 978ca5c256SDaniel C. Sobral /* might have to trim trailing white space */ 988ca5c256SDaniel C. Sobral if (trimtrail) { 998ca5c256SDaniel C. Sobral p--; 1008ca5c256SDaniel C. Sobral while ((c = *--p) == sepc || c == sepc2) 1018ca5c256SDaniel C. Sobral continue; 1028ca5c256SDaniel C. Sobral p++; 1038ca5c256SDaniel C. Sobral if (*p != '\0') { 1048ca5c256SDaniel C. Sobral if (fn == nfields+1) 1058ca5c256SDaniel C. Sobral *p = '\0'; 1068ca5c256SDaniel C. Sobral fn--; 1078ca5c256SDaniel C. Sobral } 1088ca5c256SDaniel C. Sobral } 1098ca5c256SDaniel C. Sobral return(fn); 1108ca5c256SDaniel C. Sobral } 1118ca5c256SDaniel C. Sobral 1128ca5c256SDaniel C. Sobral /* n separators */ 1138ca5c256SDaniel C. Sobral fn = 0; 1148ca5c256SDaniel C. Sobral for (;;) { 1158ca5c256SDaniel C. Sobral if (fn < nfields) 1168ca5c256SDaniel C. Sobral *fp++ = p; 1178ca5c256SDaniel C. Sobral fn++; 1188ca5c256SDaniel C. Sobral for (;;) { 1198ca5c256SDaniel C. Sobral c = *p++; 1208ca5c256SDaniel C. Sobral if (c == '\0') 1218ca5c256SDaniel C. Sobral return(fn); 1228ca5c256SDaniel C. Sobral sepp = sep; 1238ca5c256SDaniel C. Sobral while ((sepc = *sepp++) != '\0' && sepc != c) 1248ca5c256SDaniel C. Sobral continue; 1258ca5c256SDaniel C. Sobral if (sepc != '\0') /* it was a separator */ 1268ca5c256SDaniel C. Sobral break; 1278ca5c256SDaniel C. Sobral } 1288ca5c256SDaniel C. Sobral if (fn < nfields) 1298ca5c256SDaniel C. Sobral *(p-1) = '\0'; 1308ca5c256SDaniel C. Sobral for (;;) { 1318ca5c256SDaniel C. Sobral c = *p++; 1328ca5c256SDaniel C. Sobral sepp = sep; 1338ca5c256SDaniel C. Sobral while ((sepc = *sepp++) != '\0' && sepc != c) 1348ca5c256SDaniel C. Sobral continue; 1358ca5c256SDaniel C. Sobral if (sepc == '\0') /* it wasn't a separator */ 1368ca5c256SDaniel C. Sobral break; 1378ca5c256SDaniel C. Sobral } 1388ca5c256SDaniel C. Sobral p--; 1398ca5c256SDaniel C. Sobral } 1408ca5c256SDaniel C. Sobral 1418ca5c256SDaniel C. Sobral /* not reached */ 1428ca5c256SDaniel C. Sobral } 1438ca5c256SDaniel C. Sobral 1448ca5c256SDaniel C. Sobral #ifdef TEST_SPLIT 1458ca5c256SDaniel C. Sobral 1468ca5c256SDaniel C. Sobral 1478ca5c256SDaniel C. Sobral /* 1488ca5c256SDaniel C. Sobral * test program 1498ca5c256SDaniel C. Sobral * pgm runs regression 1508ca5c256SDaniel C. Sobral * pgm sep splits stdin lines by sep 1518ca5c256SDaniel C. Sobral * pgm str sep splits str by sep 1528ca5c256SDaniel C. Sobral * pgm str sep n splits str by sep n times 1538ca5c256SDaniel C. Sobral */ 1548ca5c256SDaniel C. Sobral int 1558eba7ea3SEnji Cooper main(int argc, char *argv[]) 1568ca5c256SDaniel C. Sobral { 1578ca5c256SDaniel C. Sobral char buf[512]; 1588fb3f3f6SDavid E. O'Brien int n; 1598ca5c256SDaniel C. Sobral # define MNF 10 1608ca5c256SDaniel C. Sobral char *fields[MNF]; 1618ca5c256SDaniel C. Sobral 1628ca5c256SDaniel C. Sobral if (argc > 4) 1638ca5c256SDaniel C. Sobral for (n = atoi(argv[3]); n > 0; n--) { 1648ca5c256SDaniel C. Sobral (void) strcpy(buf, argv[1]); 1658ca5c256SDaniel C. Sobral } 1668ca5c256SDaniel C. Sobral else if (argc > 3) 1678ca5c256SDaniel C. Sobral for (n = atoi(argv[3]); n > 0; n--) { 1688ca5c256SDaniel C. Sobral (void) strcpy(buf, argv[1]); 1698ca5c256SDaniel C. Sobral (void) split(buf, fields, MNF, argv[2]); 1708ca5c256SDaniel C. Sobral } 1718ca5c256SDaniel C. Sobral else if (argc > 2) 1728ca5c256SDaniel C. Sobral dosplit(argv[1], argv[2]); 1738ca5c256SDaniel C. Sobral else if (argc > 1) 1748ca5c256SDaniel C. Sobral while (fgets(buf, sizeof(buf), stdin) != NULL) { 1758ca5c256SDaniel C. Sobral buf[strlen(buf)-1] = '\0'; /* stomp newline */ 1768ca5c256SDaniel C. Sobral dosplit(buf, argv[1]); 1778ca5c256SDaniel C. Sobral } 1788ca5c256SDaniel C. Sobral else 1798ca5c256SDaniel C. Sobral regress(); 1808ca5c256SDaniel C. Sobral 1818ca5c256SDaniel C. Sobral exit(0); 1828ca5c256SDaniel C. Sobral } 1838ca5c256SDaniel C. Sobral 1848eba7ea3SEnji Cooper void 1858eba7ea3SEnji Cooper dosplit(char *string, char *seps) 1868ca5c256SDaniel C. Sobral { 1878ca5c256SDaniel C. Sobral # define NF 5 1888ca5c256SDaniel C. Sobral char *fields[NF]; 1898fb3f3f6SDavid E. O'Brien int nf; 1908ca5c256SDaniel C. Sobral 1918ca5c256SDaniel C. Sobral nf = split(string, fields, NF, seps); 1928ca5c256SDaniel C. Sobral print(nf, NF, fields); 1938ca5c256SDaniel C. Sobral } 1948ca5c256SDaniel C. Sobral 1958eba7ea3SEnji Cooper void 1968eba7ea3SEnji Cooper print(int nf, int nfp, char *fields[]) 1978ca5c256SDaniel C. Sobral { 1988fb3f3f6SDavid E. O'Brien int fn; 1998fb3f3f6SDavid E. O'Brien int bound; 2008ca5c256SDaniel C. Sobral 2018ca5c256SDaniel C. Sobral bound = (nf > nfp) ? nfp : nf; 2028ca5c256SDaniel C. Sobral printf("%d:\t", nf); 2038ca5c256SDaniel C. Sobral for (fn = 0; fn < bound; fn++) 2048ca5c256SDaniel C. Sobral printf("\"%s\"%s", fields[fn], (fn+1 < nf) ? ", " : "\n"); 2058ca5c256SDaniel C. Sobral } 2068ca5c256SDaniel C. Sobral 2078ca5c256SDaniel C. Sobral #define RNF 5 /* some table entries know this */ 2088ca5c256SDaniel C. Sobral struct { 2098ca5c256SDaniel C. Sobral char *str; 2108ca5c256SDaniel C. Sobral char *seps; 2118ca5c256SDaniel C. Sobral int nf; 2128ca5c256SDaniel C. Sobral char *fi[RNF]; 2138ca5c256SDaniel C. Sobral } tests[] = { 2148ca5c256SDaniel C. Sobral "", " ", 0, { "" }, 2158ca5c256SDaniel C. Sobral " ", " ", 2, { "", "" }, 2168ca5c256SDaniel C. Sobral "x", " ", 1, { "x" }, 2178ca5c256SDaniel C. Sobral "xy", " ", 1, { "xy" }, 2188ca5c256SDaniel C. Sobral "x y", " ", 2, { "x", "y" }, 2198ca5c256SDaniel C. Sobral "abc def g ", " ", 5, { "abc", "def", "", "g", "" }, 2208ca5c256SDaniel C. Sobral " a bcd", " ", 4, { "", "", "a", "bcd" }, 2218ca5c256SDaniel C. Sobral "a b c d e f", " ", 6, { "a", "b", "c", "d", "e f" }, 2228ca5c256SDaniel C. Sobral " a b c d ", " ", 6, { "", "a", "b", "c", "d " }, 2238ca5c256SDaniel C. Sobral 2248ca5c256SDaniel C. Sobral "", " _", 0, { "" }, 2258ca5c256SDaniel C. Sobral " ", " _", 2, { "", "" }, 2268ca5c256SDaniel C. Sobral "x", " _", 1, { "x" }, 2278ca5c256SDaniel C. Sobral "x y", " _", 2, { "x", "y" }, 2288ca5c256SDaniel C. Sobral "ab _ cd", " _", 2, { "ab", "cd" }, 2298ca5c256SDaniel C. Sobral " a_b c ", " _", 5, { "", "a", "b", "c", "" }, 2308ca5c256SDaniel C. Sobral "a b c_d e f", " _", 6, { "a", "b", "c", "d", "e f" }, 2318ca5c256SDaniel C. Sobral " a b c d ", " _", 6, { "", "a", "b", "c", "d " }, 2328ca5c256SDaniel C. Sobral 2338ca5c256SDaniel C. Sobral "", " _~", 0, { "" }, 2348ca5c256SDaniel C. Sobral " ", " _~", 2, { "", "" }, 2358ca5c256SDaniel C. Sobral "x", " _~", 1, { "x" }, 2368ca5c256SDaniel C. Sobral "x y", " _~", 2, { "x", "y" }, 2378ca5c256SDaniel C. Sobral "ab _~ cd", " _~", 2, { "ab", "cd" }, 2388ca5c256SDaniel C. Sobral " a_b c~", " _~", 5, { "", "a", "b", "c", "" }, 2398ca5c256SDaniel C. Sobral "a b_c d~e f", " _~", 6, { "a", "b", "c", "d", "e f" }, 2408ca5c256SDaniel C. Sobral "~a b c d ", " _~", 6, { "", "a", "b", "c", "d " }, 2418ca5c256SDaniel C. Sobral 2428ca5c256SDaniel C. Sobral "", " _~-", 0, { "" }, 2438ca5c256SDaniel C. Sobral " ", " _~-", 2, { "", "" }, 2448ca5c256SDaniel C. Sobral "x", " _~-", 1, { "x" }, 2458ca5c256SDaniel C. Sobral "x y", " _~-", 2, { "x", "y" }, 2468ca5c256SDaniel C. Sobral "ab _~- cd", " _~-", 2, { "ab", "cd" }, 2478ca5c256SDaniel C. Sobral " a_b c~", " _~-", 5, { "", "a", "b", "c", "" }, 2488ca5c256SDaniel C. Sobral "a b_c-d~e f", " _~-", 6, { "a", "b", "c", "d", "e f" }, 2498ca5c256SDaniel C. Sobral "~a-b c d ", " _~-", 6, { "", "a", "b", "c", "d " }, 2508ca5c256SDaniel C. Sobral 2518ca5c256SDaniel C. Sobral "", " ", 0, { "" }, 2528ca5c256SDaniel C. Sobral " ", " ", 2, { "", "" }, 2538ca5c256SDaniel C. Sobral "x", " ", 1, { "x" }, 2548ca5c256SDaniel C. Sobral "xy", " ", 1, { "xy" }, 2558ca5c256SDaniel C. Sobral "x y", " ", 2, { "x", "y" }, 2568ca5c256SDaniel C. Sobral "abc def g ", " ", 4, { "abc", "def", "g", "" }, 2578ca5c256SDaniel C. Sobral " a bcd", " ", 3, { "", "a", "bcd" }, 2588ca5c256SDaniel C. Sobral "a b c d e f", " ", 6, { "a", "b", "c", "d", "e f" }, 2598ca5c256SDaniel C. Sobral " a b c d ", " ", 6, { "", "a", "b", "c", "d " }, 2608ca5c256SDaniel C. Sobral 2618ca5c256SDaniel C. Sobral "", "", 0, { "" }, 2628ca5c256SDaniel C. Sobral " ", "", 0, { "" }, 2638ca5c256SDaniel C. Sobral "x", "", 1, { "x" }, 2648ca5c256SDaniel C. Sobral "xy", "", 1, { "xy" }, 2658ca5c256SDaniel C. Sobral "x y", "", 2, { "x", "y" }, 2668ca5c256SDaniel C. Sobral "abc def g ", "", 3, { "abc", "def", "g" }, 2678ca5c256SDaniel C. Sobral "\t a bcd", "", 2, { "a", "bcd" }, 2688ca5c256SDaniel C. Sobral " a \tb\t c ", "", 3, { "a", "b", "c" }, 2698ca5c256SDaniel C. Sobral "a b c d e ", "", 5, { "a", "b", "c", "d", "e" }, 2708ca5c256SDaniel C. Sobral "a b\tc d e f", "", 6, { "a", "b", "c", "d", "e f" }, 2718ca5c256SDaniel C. Sobral " a b c d e f ", "", 6, { "a", "b", "c", "d", "e f " }, 2728ca5c256SDaniel C. Sobral 2738ca5c256SDaniel C. Sobral NULL, NULL, 0, { NULL }, 2748ca5c256SDaniel C. Sobral }; 2758ca5c256SDaniel C. Sobral 2768eba7ea3SEnji Cooper void 2778eba7ea3SEnji Cooper regress(void) 2788ca5c256SDaniel C. Sobral { 2798ca5c256SDaniel C. Sobral char buf[512]; 2808fb3f3f6SDavid E. O'Brien int n; 2818ca5c256SDaniel C. Sobral char *fields[RNF+1]; 2828fb3f3f6SDavid E. O'Brien int nf; 2838fb3f3f6SDavid E. O'Brien int i; 2848fb3f3f6SDavid E. O'Brien int printit; 2858fb3f3f6SDavid E. O'Brien char *f; 2868ca5c256SDaniel C. Sobral 2878ca5c256SDaniel C. Sobral for (n = 0; tests[n].str != NULL; n++) { 2888ca5c256SDaniel C. Sobral (void) strcpy(buf, tests[n].str); 2898ca5c256SDaniel C. Sobral fields[RNF] = NULL; 2908ca5c256SDaniel C. Sobral nf = split(buf, fields, RNF, tests[n].seps); 2918ca5c256SDaniel C. Sobral printit = 0; 2928ca5c256SDaniel C. Sobral if (nf != tests[n].nf) { 2938ca5c256SDaniel C. Sobral printf("split `%s' by `%s' gave %d fields, not %d\n", 2948ca5c256SDaniel C. Sobral tests[n].str, tests[n].seps, nf, tests[n].nf); 2958ca5c256SDaniel C. Sobral printit = 1; 2968ca5c256SDaniel C. Sobral } else if (fields[RNF] != NULL) { 2978ca5c256SDaniel C. Sobral printf("split() went beyond array end\n"); 2988ca5c256SDaniel C. Sobral printit = 1; 2998ca5c256SDaniel C. Sobral } else { 3008ca5c256SDaniel C. Sobral for (i = 0; i < nf && i < RNF; i++) { 3018ca5c256SDaniel C. Sobral f = fields[i]; 3028ca5c256SDaniel C. Sobral if (f == NULL) 3038ca5c256SDaniel C. Sobral f = "(NULL)"; 3048ca5c256SDaniel C. Sobral if (strcmp(f, tests[n].fi[i]) != 0) { 3058ca5c256SDaniel C. Sobral printf("split `%s' by `%s' field %d is `%s', not `%s'\n", 3068ca5c256SDaniel C. Sobral tests[n].str, tests[n].seps, 3078ca5c256SDaniel C. Sobral i, fields[i], tests[n].fi[i]); 3088ca5c256SDaniel C. Sobral printit = 1; 3098ca5c256SDaniel C. Sobral } 3108ca5c256SDaniel C. Sobral } 3118ca5c256SDaniel C. Sobral } 3128ca5c256SDaniel C. Sobral if (printit) 3138ca5c256SDaniel C. Sobral print(nf, RNF, fields); 3148ca5c256SDaniel C. Sobral } 3158ca5c256SDaniel C. Sobral } 3168ca5c256SDaniel C. Sobral #endif 317