1 /* $FreeBSD$ 2 */ 3 #include <stdio.h> 4 #include <string.h> 5 6 /* 7 - split - divide a string into fields, like awk split() 8 = int split(char *string, char *fields[], int nfields, char *sep); 9 */ 10 int /* number of fields, including overflow */ 11 split(string, fields, nfields, sep) 12 char *string; 13 char *fields[]; /* list is not NULL-terminated */ 14 int nfields; /* number of entries available in fields[] */ 15 char *sep; /* "" white, "c" single char, "ab" [ab]+ */ 16 { 17 register char *p = string; 18 register char c; /* latest character */ 19 register char sepc = sep[0]; 20 register char sepc2; 21 register int fn; 22 register char **fp = fields; 23 register char *sepp; 24 register int trimtrail; 25 26 /* white space */ 27 if (sepc == '\0') { 28 while ((c = *p++) == ' ' || c == '\t') 29 continue; 30 p--; 31 trimtrail = 1; 32 sep = " \t"; /* note, code below knows this is 2 long */ 33 sepc = ' '; 34 } else 35 trimtrail = 0; 36 sepc2 = sep[1]; /* now we can safely pick this up */ 37 38 /* catch empties */ 39 if (*p == '\0') 40 return(0); 41 42 /* single separator */ 43 if (sepc2 == '\0') { 44 fn = nfields; 45 for (;;) { 46 *fp++ = p; 47 fn--; 48 if (fn == 0) 49 break; 50 while ((c = *p++) != sepc) 51 if (c == '\0') 52 return(nfields - fn); 53 *(p-1) = '\0'; 54 } 55 /* we have overflowed the fields vector -- just count them */ 56 fn = nfields; 57 for (;;) { 58 while ((c = *p++) != sepc) 59 if (c == '\0') 60 return(fn); 61 fn++; 62 } 63 /* not reached */ 64 } 65 66 /* two separators */ 67 if (sep[2] == '\0') { 68 fn = nfields; 69 for (;;) { 70 *fp++ = p; 71 fn--; 72 while ((c = *p++) != sepc && c != sepc2) 73 if (c == '\0') { 74 if (trimtrail && **(fp-1) == '\0') 75 fn++; 76 return(nfields - fn); 77 } 78 if (fn == 0) 79 break; 80 *(p-1) = '\0'; 81 while ((c = *p++) == sepc || c == sepc2) 82 continue; 83 p--; 84 } 85 /* we have overflowed the fields vector -- just count them */ 86 fn = nfields; 87 while (c != '\0') { 88 while ((c = *p++) == sepc || c == sepc2) 89 continue; 90 p--; 91 fn++; 92 while ((c = *p++) != '\0' && c != sepc && c != sepc2) 93 continue; 94 } 95 /* might have to trim trailing white space */ 96 if (trimtrail) { 97 p--; 98 while ((c = *--p) == sepc || c == sepc2) 99 continue; 100 p++; 101 if (*p != '\0') { 102 if (fn == nfields+1) 103 *p = '\0'; 104 fn--; 105 } 106 } 107 return(fn); 108 } 109 110 /* n separators */ 111 fn = 0; 112 for (;;) { 113 if (fn < nfields) 114 *fp++ = p; 115 fn++; 116 for (;;) { 117 c = *p++; 118 if (c == '\0') 119 return(fn); 120 sepp = sep; 121 while ((sepc = *sepp++) != '\0' && sepc != c) 122 continue; 123 if (sepc != '\0') /* it was a separator */ 124 break; 125 } 126 if (fn < nfields) 127 *(p-1) = '\0'; 128 for (;;) { 129 c = *p++; 130 sepp = sep; 131 while ((sepc = *sepp++) != '\0' && sepc != c) 132 continue; 133 if (sepc == '\0') /* it wasn't a separator */ 134 break; 135 } 136 p--; 137 } 138 139 /* not reached */ 140 } 141 142 #ifdef TEST_SPLIT 143 144 145 /* 146 * test program 147 * pgm runs regression 148 * pgm sep splits stdin lines by sep 149 * pgm str sep splits str by sep 150 * pgm str sep n splits str by sep n times 151 */ 152 int 153 main(argc, argv) 154 int argc; 155 char *argv[]; 156 { 157 char buf[512]; 158 register int n; 159 # define MNF 10 160 char *fields[MNF]; 161 162 if (argc > 4) 163 for (n = atoi(argv[3]); n > 0; n--) { 164 (void) strcpy(buf, argv[1]); 165 } 166 else if (argc > 3) 167 for (n = atoi(argv[3]); n > 0; n--) { 168 (void) strcpy(buf, argv[1]); 169 (void) split(buf, fields, MNF, argv[2]); 170 } 171 else if (argc > 2) 172 dosplit(argv[1], argv[2]); 173 else if (argc > 1) 174 while (fgets(buf, sizeof(buf), stdin) != NULL) { 175 buf[strlen(buf)-1] = '\0'; /* stomp newline */ 176 dosplit(buf, argv[1]); 177 } 178 else 179 regress(); 180 181 exit(0); 182 } 183 184 dosplit(string, seps) 185 char *string; 186 char *seps; 187 { 188 # define NF 5 189 char *fields[NF]; 190 register int nf; 191 192 nf = split(string, fields, NF, seps); 193 print(nf, NF, fields); 194 } 195 196 print(nf, nfp, fields) 197 int nf; 198 int nfp; 199 char *fields[]; 200 { 201 register int fn; 202 register int bound; 203 204 bound = (nf > nfp) ? nfp : nf; 205 printf("%d:\t", nf); 206 for (fn = 0; fn < bound; fn++) 207 printf("\"%s\"%s", fields[fn], (fn+1 < nf) ? ", " : "\n"); 208 } 209 210 #define RNF 5 /* some table entries know this */ 211 struct { 212 char *str; 213 char *seps; 214 int nf; 215 char *fi[RNF]; 216 } tests[] = { 217 "", " ", 0, { "" }, 218 " ", " ", 2, { "", "" }, 219 "x", " ", 1, { "x" }, 220 "xy", " ", 1, { "xy" }, 221 "x y", " ", 2, { "x", "y" }, 222 "abc def g ", " ", 5, { "abc", "def", "", "g", "" }, 223 " a bcd", " ", 4, { "", "", "a", "bcd" }, 224 "a b c d e f", " ", 6, { "a", "b", "c", "d", "e f" }, 225 " a b c d ", " ", 6, { "", "a", "b", "c", "d " }, 226 227 "", " _", 0, { "" }, 228 " ", " _", 2, { "", "" }, 229 "x", " _", 1, { "x" }, 230 "x y", " _", 2, { "x", "y" }, 231 "ab _ cd", " _", 2, { "ab", "cd" }, 232 " a_b c ", " _", 5, { "", "a", "b", "c", "" }, 233 "a b c_d e f", " _", 6, { "a", "b", "c", "d", "e f" }, 234 " a b c d ", " _", 6, { "", "a", "b", "c", "d " }, 235 236 "", " _~", 0, { "" }, 237 " ", " _~", 2, { "", "" }, 238 "x", " _~", 1, { "x" }, 239 "x y", " _~", 2, { "x", "y" }, 240 "ab _~ cd", " _~", 2, { "ab", "cd" }, 241 " a_b c~", " _~", 5, { "", "a", "b", "c", "" }, 242 "a b_c d~e f", " _~", 6, { "a", "b", "c", "d", "e f" }, 243 "~a b c d ", " _~", 6, { "", "a", "b", "c", "d " }, 244 245 "", " _~-", 0, { "" }, 246 " ", " _~-", 2, { "", "" }, 247 "x", " _~-", 1, { "x" }, 248 "x y", " _~-", 2, { "x", "y" }, 249 "ab _~- cd", " _~-", 2, { "ab", "cd" }, 250 " a_b c~", " _~-", 5, { "", "a", "b", "c", "" }, 251 "a b_c-d~e f", " _~-", 6, { "a", "b", "c", "d", "e f" }, 252 "~a-b c d ", " _~-", 6, { "", "a", "b", "c", "d " }, 253 254 "", " ", 0, { "" }, 255 " ", " ", 2, { "", "" }, 256 "x", " ", 1, { "x" }, 257 "xy", " ", 1, { "xy" }, 258 "x y", " ", 2, { "x", "y" }, 259 "abc def g ", " ", 4, { "abc", "def", "g", "" }, 260 " a bcd", " ", 3, { "", "a", "bcd" }, 261 "a b c d e f", " ", 6, { "a", "b", "c", "d", "e f" }, 262 " a b c d ", " ", 6, { "", "a", "b", "c", "d " }, 263 264 "", "", 0, { "" }, 265 " ", "", 0, { "" }, 266 "x", "", 1, { "x" }, 267 "xy", "", 1, { "xy" }, 268 "x y", "", 2, { "x", "y" }, 269 "abc def g ", "", 3, { "abc", "def", "g" }, 270 "\t a bcd", "", 2, { "a", "bcd" }, 271 " a \tb\t c ", "", 3, { "a", "b", "c" }, 272 "a b c d e ", "", 5, { "a", "b", "c", "d", "e" }, 273 "a b\tc d e f", "", 6, { "a", "b", "c", "d", "e f" }, 274 " a b c d e f ", "", 6, { "a", "b", "c", "d", "e f " }, 275 276 NULL, NULL, 0, { NULL }, 277 }; 278 279 regress() 280 { 281 char buf[512]; 282 register int n; 283 char *fields[RNF+1]; 284 register int nf; 285 register int i; 286 register int printit; 287 register char *f; 288 289 for (n = 0; tests[n].str != NULL; n++) { 290 (void) strcpy(buf, tests[n].str); 291 fields[RNF] = NULL; 292 nf = split(buf, fields, RNF, tests[n].seps); 293 printit = 0; 294 if (nf != tests[n].nf) { 295 printf("split `%s' by `%s' gave %d fields, not %d\n", 296 tests[n].str, tests[n].seps, nf, tests[n].nf); 297 printit = 1; 298 } else if (fields[RNF] != NULL) { 299 printf("split() went beyond array end\n"); 300 printit = 1; 301 } else { 302 for (i = 0; i < nf && i < RNF; i++) { 303 f = fields[i]; 304 if (f == NULL) 305 f = "(NULL)"; 306 if (strcmp(f, tests[n].fi[i]) != 0) { 307 printf("split `%s' by `%s' field %d is `%s', not `%s'\n", 308 tests[n].str, tests[n].seps, 309 i, fields[i], tests[n].fi[i]); 310 printit = 1; 311 } 312 } 313 } 314 if (printit) 315 print(nf, RNF, fields); 316 } 317 } 318 #endif 319