1 #include <sys/cdefs.h> 2 __FBSDID("$FreeBSD$"); 3 4 #include <stdio.h> 5 #include <string.h> 6 7 /* 8 - split - divide a string into fields, like awk split() 9 = int split(char *string, char *fields[], int nfields, char *sep); 10 */ 11 int /* number of fields, including overflow */ 12 split(string, fields, nfields, sep) 13 char *string; 14 char *fields[]; /* list is not NULL-terminated */ 15 int nfields; /* number of entries available in fields[] */ 16 char *sep; /* "" white, "c" single char, "ab" [ab]+ */ 17 { 18 char *p = string; 19 char c; /* latest character */ 20 char sepc = sep[0]; 21 char sepc2; 22 int fn; 23 char **fp = fields; 24 char *sepp; 25 int trimtrail; 26 27 /* white space */ 28 if (sepc == '\0') { 29 while ((c = *p++) == ' ' || c == '\t') 30 continue; 31 p--; 32 trimtrail = 1; 33 sep = " \t"; /* note, code below knows this is 2 long */ 34 sepc = ' '; 35 } else 36 trimtrail = 0; 37 sepc2 = sep[1]; /* now we can safely pick this up */ 38 39 /* catch empties */ 40 if (*p == '\0') 41 return(0); 42 43 /* single separator */ 44 if (sepc2 == '\0') { 45 fn = nfields; 46 for (;;) { 47 *fp++ = p; 48 fn--; 49 if (fn == 0) 50 break; 51 while ((c = *p++) != sepc) 52 if (c == '\0') 53 return(nfields - fn); 54 *(p-1) = '\0'; 55 } 56 /* we have overflowed the fields vector -- just count them */ 57 fn = nfields; 58 for (;;) { 59 while ((c = *p++) != sepc) 60 if (c == '\0') 61 return(fn); 62 fn++; 63 } 64 /* not reached */ 65 } 66 67 /* two separators */ 68 if (sep[2] == '\0') { 69 fn = nfields; 70 for (;;) { 71 *fp++ = p; 72 fn--; 73 while ((c = *p++) != sepc && c != sepc2) 74 if (c == '\0') { 75 if (trimtrail && **(fp-1) == '\0') 76 fn++; 77 return(nfields - fn); 78 } 79 if (fn == 0) 80 break; 81 *(p-1) = '\0'; 82 while ((c = *p++) == sepc || c == sepc2) 83 continue; 84 p--; 85 } 86 /* we have overflowed the fields vector -- just count them */ 87 fn = nfields; 88 while (c != '\0') { 89 while ((c = *p++) == sepc || c == sepc2) 90 continue; 91 p--; 92 fn++; 93 while ((c = *p++) != '\0' && c != sepc && c != sepc2) 94 continue; 95 } 96 /* might have to trim trailing white space */ 97 if (trimtrail) { 98 p--; 99 while ((c = *--p) == sepc || c == sepc2) 100 continue; 101 p++; 102 if (*p != '\0') { 103 if (fn == nfields+1) 104 *p = '\0'; 105 fn--; 106 } 107 } 108 return(fn); 109 } 110 111 /* n separators */ 112 fn = 0; 113 for (;;) { 114 if (fn < nfields) 115 *fp++ = p; 116 fn++; 117 for (;;) { 118 c = *p++; 119 if (c == '\0') 120 return(fn); 121 sepp = sep; 122 while ((sepc = *sepp++) != '\0' && sepc != c) 123 continue; 124 if (sepc != '\0') /* it was a separator */ 125 break; 126 } 127 if (fn < nfields) 128 *(p-1) = '\0'; 129 for (;;) { 130 c = *p++; 131 sepp = sep; 132 while ((sepc = *sepp++) != '\0' && sepc != c) 133 continue; 134 if (sepc == '\0') /* it wasn't a separator */ 135 break; 136 } 137 p--; 138 } 139 140 /* not reached */ 141 } 142 143 #ifdef TEST_SPLIT 144 145 146 /* 147 * test program 148 * pgm runs regression 149 * pgm sep splits stdin lines by sep 150 * pgm str sep splits str by sep 151 * pgm str sep n splits str by sep n times 152 */ 153 int 154 main(argc, argv) 155 int argc; 156 char *argv[]; 157 { 158 char buf[512]; 159 int n; 160 # define MNF 10 161 char *fields[MNF]; 162 163 if (argc > 4) 164 for (n = atoi(argv[3]); n > 0; n--) { 165 (void) strcpy(buf, argv[1]); 166 } 167 else if (argc > 3) 168 for (n = atoi(argv[3]); n > 0; n--) { 169 (void) strcpy(buf, argv[1]); 170 (void) split(buf, fields, MNF, argv[2]); 171 } 172 else if (argc > 2) 173 dosplit(argv[1], argv[2]); 174 else if (argc > 1) 175 while (fgets(buf, sizeof(buf), stdin) != NULL) { 176 buf[strlen(buf)-1] = '\0'; /* stomp newline */ 177 dosplit(buf, argv[1]); 178 } 179 else 180 regress(); 181 182 exit(0); 183 } 184 185 dosplit(string, seps) 186 char *string; 187 char *seps; 188 { 189 # define NF 5 190 char *fields[NF]; 191 int nf; 192 193 nf = split(string, fields, NF, seps); 194 print(nf, NF, fields); 195 } 196 197 print(nf, nfp, fields) 198 int nf; 199 int nfp; 200 char *fields[]; 201 { 202 int fn; 203 int bound; 204 205 bound = (nf > nfp) ? nfp : nf; 206 printf("%d:\t", nf); 207 for (fn = 0; fn < bound; fn++) 208 printf("\"%s\"%s", fields[fn], (fn+1 < nf) ? ", " : "\n"); 209 } 210 211 #define RNF 5 /* some table entries know this */ 212 struct { 213 char *str; 214 char *seps; 215 int nf; 216 char *fi[RNF]; 217 } tests[] = { 218 "", " ", 0, { "" }, 219 " ", " ", 2, { "", "" }, 220 "x", " ", 1, { "x" }, 221 "xy", " ", 1, { "xy" }, 222 "x y", " ", 2, { "x", "y" }, 223 "abc def g ", " ", 5, { "abc", "def", "", "g", "" }, 224 " a bcd", " ", 4, { "", "", "a", "bcd" }, 225 "a b c d e f", " ", 6, { "a", "b", "c", "d", "e f" }, 226 " a b c d ", " ", 6, { "", "a", "b", "c", "d " }, 227 228 "", " _", 0, { "" }, 229 " ", " _", 2, { "", "" }, 230 "x", " _", 1, { "x" }, 231 "x y", " _", 2, { "x", "y" }, 232 "ab _ cd", " _", 2, { "ab", "cd" }, 233 " a_b c ", " _", 5, { "", "a", "b", "c", "" }, 234 "a b c_d e f", " _", 6, { "a", "b", "c", "d", "e f" }, 235 " a b c d ", " _", 6, { "", "a", "b", "c", "d " }, 236 237 "", " _~", 0, { "" }, 238 " ", " _~", 2, { "", "" }, 239 "x", " _~", 1, { "x" }, 240 "x y", " _~", 2, { "x", "y" }, 241 "ab _~ cd", " _~", 2, { "ab", "cd" }, 242 " a_b c~", " _~", 5, { "", "a", "b", "c", "" }, 243 "a b_c d~e f", " _~", 6, { "a", "b", "c", "d", "e f" }, 244 "~a b c d ", " _~", 6, { "", "a", "b", "c", "d " }, 245 246 "", " _~-", 0, { "" }, 247 " ", " _~-", 2, { "", "" }, 248 "x", " _~-", 1, { "x" }, 249 "x y", " _~-", 2, { "x", "y" }, 250 "ab _~- cd", " _~-", 2, { "ab", "cd" }, 251 " a_b c~", " _~-", 5, { "", "a", "b", "c", "" }, 252 "a b_c-d~e f", " _~-", 6, { "a", "b", "c", "d", "e f" }, 253 "~a-b c d ", " _~-", 6, { "", "a", "b", "c", "d " }, 254 255 "", " ", 0, { "" }, 256 " ", " ", 2, { "", "" }, 257 "x", " ", 1, { "x" }, 258 "xy", " ", 1, { "xy" }, 259 "x y", " ", 2, { "x", "y" }, 260 "abc def g ", " ", 4, { "abc", "def", "g", "" }, 261 " a bcd", " ", 3, { "", "a", "bcd" }, 262 "a b c d e f", " ", 6, { "a", "b", "c", "d", "e f" }, 263 " a b c d ", " ", 6, { "", "a", "b", "c", "d " }, 264 265 "", "", 0, { "" }, 266 " ", "", 0, { "" }, 267 "x", "", 1, { "x" }, 268 "xy", "", 1, { "xy" }, 269 "x y", "", 2, { "x", "y" }, 270 "abc def g ", "", 3, { "abc", "def", "g" }, 271 "\t a bcd", "", 2, { "a", "bcd" }, 272 " a \tb\t c ", "", 3, { "a", "b", "c" }, 273 "a b c d e ", "", 5, { "a", "b", "c", "d", "e" }, 274 "a b\tc d e f", "", 6, { "a", "b", "c", "d", "e f" }, 275 " a b c d e f ", "", 6, { "a", "b", "c", "d", "e f " }, 276 277 NULL, NULL, 0, { NULL }, 278 }; 279 280 regress() 281 { 282 char buf[512]; 283 int n; 284 char *fields[RNF+1]; 285 int nf; 286 int i; 287 int printit; 288 char *f; 289 290 for (n = 0; tests[n].str != NULL; n++) { 291 (void) strcpy(buf, tests[n].str); 292 fields[RNF] = NULL; 293 nf = split(buf, fields, RNF, tests[n].seps); 294 printit = 0; 295 if (nf != tests[n].nf) { 296 printf("split `%s' by `%s' gave %d fields, not %d\n", 297 tests[n].str, tests[n].seps, nf, tests[n].nf); 298 printit = 1; 299 } else if (fields[RNF] != NULL) { 300 printf("split() went beyond array end\n"); 301 printit = 1; 302 } else { 303 for (i = 0; i < nf && i < RNF; i++) { 304 f = fields[i]; 305 if (f == NULL) 306 f = "(NULL)"; 307 if (strcmp(f, tests[n].fi[i]) != 0) { 308 printf("split `%s' by `%s' field %d is `%s', not `%s'\n", 309 tests[n].str, tests[n].seps, 310 i, fields[i], tests[n].fi[i]); 311 printit = 1; 312 } 313 } 314 } 315 if (printit) 316 print(nf, RNF, fields); 317 } 318 } 319 #endif 320