1 #include <sys/cdefs.h> 2 __FBSDID("$FreeBSD$"); 3 4 #include <stdio.h> 5 #include <string.h> 6 7 #include "split.ih" 8 9 /* 10 - split - divide a string into fields, like awk split() 11 == int split(char *string, char *fields[], int nfields, char *sep); 12 - fields: list is not NULL-terminated 13 - nfields: number of entries available in fields[] 14 - sep: "" white, "c" single char, "ab" [ab]+ 15 */ 16 int /* number of fields, including overflow */ 17 split(char *string, char *fields[], int nfields, char *sep) 18 { 19 char *p = string; 20 char c; /* latest character */ 21 char sepc = sep[0]; 22 char sepc2; 23 int fn; 24 char **fp = fields; 25 char *sepp; 26 int trimtrail; 27 28 /* white space */ 29 if (sepc == '\0') { 30 while ((c = *p++) == ' ' || c == '\t') 31 continue; 32 p--; 33 trimtrail = 1; 34 sep = " \t"; /* note, code below knows this is 2 long */ 35 sepc = ' '; 36 } else 37 trimtrail = 0; 38 sepc2 = sep[1]; /* now we can safely pick this up */ 39 40 /* catch empties */ 41 if (*p == '\0') 42 return(0); 43 44 /* single separator */ 45 if (sepc2 == '\0') { 46 fn = nfields; 47 for (;;) { 48 *fp++ = p; 49 fn--; 50 if (fn == 0) 51 break; 52 while ((c = *p++) != sepc) 53 if (c == '\0') 54 return(nfields - fn); 55 *(p-1) = '\0'; 56 } 57 /* we have overflowed the fields vector -- just count them */ 58 fn = nfields; 59 for (;;) { 60 while ((c = *p++) != sepc) 61 if (c == '\0') 62 return(fn); 63 fn++; 64 } 65 /* not reached */ 66 } 67 68 /* two separators */ 69 if (sep[2] == '\0') { 70 fn = nfields; 71 for (;;) { 72 *fp++ = p; 73 fn--; 74 while ((c = *p++) != sepc && c != sepc2) 75 if (c == '\0') { 76 if (trimtrail && **(fp-1) == '\0') 77 fn++; 78 return(nfields - fn); 79 } 80 if (fn == 0) 81 break; 82 *(p-1) = '\0'; 83 while ((c = *p++) == sepc || c == sepc2) 84 continue; 85 p--; 86 } 87 /* we have overflowed the fields vector -- just count them */ 88 fn = nfields; 89 while (c != '\0') { 90 while ((c = *p++) == sepc || c == sepc2) 91 continue; 92 p--; 93 fn++; 94 while ((c = *p++) != '\0' && c != sepc && c != sepc2) 95 continue; 96 } 97 /* might have to trim trailing white space */ 98 if (trimtrail) { 99 p--; 100 while ((c = *--p) == sepc || c == sepc2) 101 continue; 102 p++; 103 if (*p != '\0') { 104 if (fn == nfields+1) 105 *p = '\0'; 106 fn--; 107 } 108 } 109 return(fn); 110 } 111 112 /* n separators */ 113 fn = 0; 114 for (;;) { 115 if (fn < nfields) 116 *fp++ = p; 117 fn++; 118 for (;;) { 119 c = *p++; 120 if (c == '\0') 121 return(fn); 122 sepp = sep; 123 while ((sepc = *sepp++) != '\0' && sepc != c) 124 continue; 125 if (sepc != '\0') /* it was a separator */ 126 break; 127 } 128 if (fn < nfields) 129 *(p-1) = '\0'; 130 for (;;) { 131 c = *p++; 132 sepp = sep; 133 while ((sepc = *sepp++) != '\0' && sepc != c) 134 continue; 135 if (sepc == '\0') /* it wasn't a separator */ 136 break; 137 } 138 p--; 139 } 140 141 /* not reached */ 142 } 143 144 #ifdef TEST_SPLIT 145 146 147 /* 148 * test program 149 * pgm runs regression 150 * pgm sep splits stdin lines by sep 151 * pgm str sep splits str by sep 152 * pgm str sep n splits str by sep n times 153 */ 154 int 155 main(int argc, char *argv[]) 156 { 157 char buf[512]; 158 int n; 159 # define MNF 10 160 char *fields[MNF]; 161 162 if (argc > 4) 163 for (n = atoi(argv[3]); n > 0; n--) { 164 (void) strcpy(buf, argv[1]); 165 } 166 else if (argc > 3) 167 for (n = atoi(argv[3]); n > 0; n--) { 168 (void) strcpy(buf, argv[1]); 169 (void) split(buf, fields, MNF, argv[2]); 170 } 171 else if (argc > 2) 172 dosplit(argv[1], argv[2]); 173 else if (argc > 1) 174 while (fgets(buf, sizeof(buf), stdin) != NULL) { 175 buf[strlen(buf)-1] = '\0'; /* stomp newline */ 176 dosplit(buf, argv[1]); 177 } 178 else 179 regress(); 180 181 exit(0); 182 } 183 184 void 185 dosplit(char *string, char *seps) 186 { 187 # define NF 5 188 char *fields[NF]; 189 int nf; 190 191 nf = split(string, fields, NF, seps); 192 print(nf, NF, fields); 193 } 194 195 void 196 print(int nf, int nfp, char *fields[]) 197 { 198 int fn; 199 int bound; 200 201 bound = (nf > nfp) ? nfp : nf; 202 printf("%d:\t", nf); 203 for (fn = 0; fn < bound; fn++) 204 printf("\"%s\"%s", fields[fn], (fn+1 < nf) ? ", " : "\n"); 205 } 206 207 #define RNF 5 /* some table entries know this */ 208 struct { 209 char *str; 210 char *seps; 211 int nf; 212 char *fi[RNF]; 213 } tests[] = { 214 "", " ", 0, { "" }, 215 " ", " ", 2, { "", "" }, 216 "x", " ", 1, { "x" }, 217 "xy", " ", 1, { "xy" }, 218 "x y", " ", 2, { "x", "y" }, 219 "abc def g ", " ", 5, { "abc", "def", "", "g", "" }, 220 " a bcd", " ", 4, { "", "", "a", "bcd" }, 221 "a b c d e f", " ", 6, { "a", "b", "c", "d", "e f" }, 222 " a b c d ", " ", 6, { "", "a", "b", "c", "d " }, 223 224 "", " _", 0, { "" }, 225 " ", " _", 2, { "", "" }, 226 "x", " _", 1, { "x" }, 227 "x y", " _", 2, { "x", "y" }, 228 "ab _ cd", " _", 2, { "ab", "cd" }, 229 " a_b c ", " _", 5, { "", "a", "b", "c", "" }, 230 "a b c_d e f", " _", 6, { "a", "b", "c", "d", "e f" }, 231 " a b c d ", " _", 6, { "", "a", "b", "c", "d " }, 232 233 "", " _~", 0, { "" }, 234 " ", " _~", 2, { "", "" }, 235 "x", " _~", 1, { "x" }, 236 "x y", " _~", 2, { "x", "y" }, 237 "ab _~ cd", " _~", 2, { "ab", "cd" }, 238 " a_b c~", " _~", 5, { "", "a", "b", "c", "" }, 239 "a b_c d~e f", " _~", 6, { "a", "b", "c", "d", "e f" }, 240 "~a b c d ", " _~", 6, { "", "a", "b", "c", "d " }, 241 242 "", " _~-", 0, { "" }, 243 " ", " _~-", 2, { "", "" }, 244 "x", " _~-", 1, { "x" }, 245 "x y", " _~-", 2, { "x", "y" }, 246 "ab _~- cd", " _~-", 2, { "ab", "cd" }, 247 " a_b c~", " _~-", 5, { "", "a", "b", "c", "" }, 248 "a b_c-d~e f", " _~-", 6, { "a", "b", "c", "d", "e f" }, 249 "~a-b c d ", " _~-", 6, { "", "a", "b", "c", "d " }, 250 251 "", " ", 0, { "" }, 252 " ", " ", 2, { "", "" }, 253 "x", " ", 1, { "x" }, 254 "xy", " ", 1, { "xy" }, 255 "x y", " ", 2, { "x", "y" }, 256 "abc def g ", " ", 4, { "abc", "def", "g", "" }, 257 " a bcd", " ", 3, { "", "a", "bcd" }, 258 "a b c d e f", " ", 6, { "a", "b", "c", "d", "e f" }, 259 " a b c d ", " ", 6, { "", "a", "b", "c", "d " }, 260 261 "", "", 0, { "" }, 262 " ", "", 0, { "" }, 263 "x", "", 1, { "x" }, 264 "xy", "", 1, { "xy" }, 265 "x y", "", 2, { "x", "y" }, 266 "abc def g ", "", 3, { "abc", "def", "g" }, 267 "\t a bcd", "", 2, { "a", "bcd" }, 268 " a \tb\t c ", "", 3, { "a", "b", "c" }, 269 "a b c d e ", "", 5, { "a", "b", "c", "d", "e" }, 270 "a b\tc d e f", "", 6, { "a", "b", "c", "d", "e f" }, 271 " a b c d e f ", "", 6, { "a", "b", "c", "d", "e f " }, 272 273 NULL, NULL, 0, { NULL }, 274 }; 275 276 void 277 regress(void) 278 { 279 char buf[512]; 280 int n; 281 char *fields[RNF+1]; 282 int nf; 283 int i; 284 int printit; 285 char *f; 286 287 for (n = 0; tests[n].str != NULL; n++) { 288 (void) strcpy(buf, tests[n].str); 289 fields[RNF] = NULL; 290 nf = split(buf, fields, RNF, tests[n].seps); 291 printit = 0; 292 if (nf != tests[n].nf) { 293 printf("split `%s' by `%s' gave %d fields, not %d\n", 294 tests[n].str, tests[n].seps, nf, tests[n].nf); 295 printit = 1; 296 } else if (fields[RNF] != NULL) { 297 printf("split() went beyond array end\n"); 298 printit = 1; 299 } else { 300 for (i = 0; i < nf && i < RNF; i++) { 301 f = fields[i]; 302 if (f == NULL) 303 f = "(NULL)"; 304 if (strcmp(f, tests[n].fi[i]) != 0) { 305 printf("split `%s' by `%s' field %d is `%s', not `%s'\n", 306 tests[n].str, tests[n].seps, 307 i, fields[i], tests[n].fi[i]); 308 printit = 1; 309 } 310 } 311 } 312 if (printit) 313 print(nf, RNF, fields); 314 } 315 } 316 #endif 317