1 #include <stdio.h> 2 #include <string.h> 3 4 #include "split.ih" 5 6 /* 7 - split - divide a string into fields, like awk split() 8 == int split(char *string, char *fields[], int nfields, char *sep); 9 - fields: list is not NULL-terminated 10 - nfields: number of entries available in fields[] 11 - sep: "" white, "c" single char, "ab" [ab]+ 12 */ 13 int /* number of fields, including overflow */ 14 split(char *string, char *fields[], int nfields, char *sep) 15 { 16 char *p = string; 17 char c; /* latest character */ 18 char sepc = sep[0]; 19 char sepc2; 20 int fn; 21 char **fp = fields; 22 char *sepp; 23 int trimtrail; 24 25 /* white space */ 26 if (sepc == '\0') { 27 while ((c = *p++) == ' ' || c == '\t') 28 continue; 29 p--; 30 trimtrail = 1; 31 sep = " \t"; /* note, code below knows this is 2 long */ 32 sepc = ' '; 33 } else 34 trimtrail = 0; 35 sepc2 = sep[1]; /* now we can safely pick this up */ 36 37 /* catch empties */ 38 if (*p == '\0') 39 return(0); 40 41 /* single separator */ 42 if (sepc2 == '\0') { 43 fn = nfields; 44 for (;;) { 45 *fp++ = p; 46 fn--; 47 if (fn == 0) 48 break; 49 while ((c = *p++) != sepc) 50 if (c == '\0') 51 return(nfields - fn); 52 *(p-1) = '\0'; 53 } 54 /* we have overflowed the fields vector -- just count them */ 55 fn = nfields; 56 for (;;) { 57 while ((c = *p++) != sepc) 58 if (c == '\0') 59 return(fn); 60 fn++; 61 } 62 /* not reached */ 63 } 64 65 /* two separators */ 66 if (sep[2] == '\0') { 67 fn = nfields; 68 for (;;) { 69 *fp++ = p; 70 fn--; 71 while ((c = *p++) != sepc && c != sepc2) 72 if (c == '\0') { 73 if (trimtrail && **(fp-1) == '\0') 74 fn++; 75 return(nfields - fn); 76 } 77 if (fn == 0) 78 break; 79 *(p-1) = '\0'; 80 while ((c = *p++) == sepc || c == sepc2) 81 continue; 82 p--; 83 } 84 /* we have overflowed the fields vector -- just count them */ 85 fn = nfields; 86 while (c != '\0') { 87 while ((c = *p++) == sepc || c == sepc2) 88 continue; 89 p--; 90 fn++; 91 while ((c = *p++) != '\0' && c != sepc && c != sepc2) 92 continue; 93 } 94 /* might have to trim trailing white space */ 95 if (trimtrail) { 96 p--; 97 while ((c = *--p) == sepc || c == sepc2) 98 continue; 99 p++; 100 if (*p != '\0') { 101 if (fn == nfields+1) 102 *p = '\0'; 103 fn--; 104 } 105 } 106 return(fn); 107 } 108 109 /* n separators */ 110 fn = 0; 111 for (;;) { 112 if (fn < nfields) 113 *fp++ = p; 114 fn++; 115 for (;;) { 116 c = *p++; 117 if (c == '\0') 118 return(fn); 119 sepp = sep; 120 while ((sepc = *sepp++) != '\0' && sepc != c) 121 continue; 122 if (sepc != '\0') /* it was a separator */ 123 break; 124 } 125 if (fn < nfields) 126 *(p-1) = '\0'; 127 for (;;) { 128 c = *p++; 129 sepp = sep; 130 while ((sepc = *sepp++) != '\0' && sepc != c) 131 continue; 132 if (sepc == '\0') /* it wasn't a separator */ 133 break; 134 } 135 p--; 136 } 137 138 /* not reached */ 139 } 140 141 #ifdef TEST_SPLIT 142 143 144 /* 145 * test program 146 * pgm runs regression 147 * pgm sep splits stdin lines by sep 148 * pgm str sep splits str by sep 149 * pgm str sep n splits str by sep n times 150 */ 151 int 152 main(int argc, char *argv[]) 153 { 154 char buf[512]; 155 int n; 156 # define MNF 10 157 char *fields[MNF]; 158 159 if (argc > 4) 160 for (n = atoi(argv[3]); n > 0; n--) { 161 (void) strcpy(buf, argv[1]); 162 } 163 else if (argc > 3) 164 for (n = atoi(argv[3]); n > 0; n--) { 165 (void) strcpy(buf, argv[1]); 166 (void) split(buf, fields, MNF, argv[2]); 167 } 168 else if (argc > 2) 169 dosplit(argv[1], argv[2]); 170 else if (argc > 1) 171 while (fgets(buf, sizeof(buf), stdin) != NULL) { 172 buf[strlen(buf)-1] = '\0'; /* stomp newline */ 173 dosplit(buf, argv[1]); 174 } 175 else 176 regress(); 177 178 exit(0); 179 } 180 181 void 182 dosplit(char *string, char *seps) 183 { 184 # define NF 5 185 char *fields[NF]; 186 int nf; 187 188 nf = split(string, fields, NF, seps); 189 print(nf, NF, fields); 190 } 191 192 void 193 print(int nf, int nfp, char *fields[]) 194 { 195 int fn; 196 int bound; 197 198 bound = (nf > nfp) ? nfp : nf; 199 printf("%d:\t", nf); 200 for (fn = 0; fn < bound; fn++) 201 printf("\"%s\"%s", fields[fn], (fn+1 < nf) ? ", " : "\n"); 202 } 203 204 #define RNF 5 /* some table entries know this */ 205 struct { 206 char *str; 207 char *seps; 208 int nf; 209 char *fi[RNF]; 210 } tests[] = { 211 "", " ", 0, { "" }, 212 " ", " ", 2, { "", "" }, 213 "x", " ", 1, { "x" }, 214 "xy", " ", 1, { "xy" }, 215 "x y", " ", 2, { "x", "y" }, 216 "abc def g ", " ", 5, { "abc", "def", "", "g", "" }, 217 " a bcd", " ", 4, { "", "", "a", "bcd" }, 218 "a b c d e f", " ", 6, { "a", "b", "c", "d", "e f" }, 219 " a b c d ", " ", 6, { "", "a", "b", "c", "d " }, 220 221 "", " _", 0, { "" }, 222 " ", " _", 2, { "", "" }, 223 "x", " _", 1, { "x" }, 224 "x y", " _", 2, { "x", "y" }, 225 "ab _ cd", " _", 2, { "ab", "cd" }, 226 " a_b c ", " _", 5, { "", "a", "b", "c", "" }, 227 "a b c_d e f", " _", 6, { "a", "b", "c", "d", "e f" }, 228 " a b c d ", " _", 6, { "", "a", "b", "c", "d " }, 229 230 "", " _~", 0, { "" }, 231 " ", " _~", 2, { "", "" }, 232 "x", " _~", 1, { "x" }, 233 "x y", " _~", 2, { "x", "y" }, 234 "ab _~ cd", " _~", 2, { "ab", "cd" }, 235 " a_b c~", " _~", 5, { "", "a", "b", "c", "" }, 236 "a b_c d~e f", " _~", 6, { "a", "b", "c", "d", "e f" }, 237 "~a b c d ", " _~", 6, { "", "a", "b", "c", "d " }, 238 239 "", " _~-", 0, { "" }, 240 " ", " _~-", 2, { "", "" }, 241 "x", " _~-", 1, { "x" }, 242 "x y", " _~-", 2, { "x", "y" }, 243 "ab _~- cd", " _~-", 2, { "ab", "cd" }, 244 " a_b c~", " _~-", 5, { "", "a", "b", "c", "" }, 245 "a b_c-d~e f", " _~-", 6, { "a", "b", "c", "d", "e f" }, 246 "~a-b c d ", " _~-", 6, { "", "a", "b", "c", "d " }, 247 248 "", " ", 0, { "" }, 249 " ", " ", 2, { "", "" }, 250 "x", " ", 1, { "x" }, 251 "xy", " ", 1, { "xy" }, 252 "x y", " ", 2, { "x", "y" }, 253 "abc def g ", " ", 4, { "abc", "def", "g", "" }, 254 " a bcd", " ", 3, { "", "a", "bcd" }, 255 "a b c d e f", " ", 6, { "a", "b", "c", "d", "e f" }, 256 " a b c d ", " ", 6, { "", "a", "b", "c", "d " }, 257 258 "", "", 0, { "" }, 259 " ", "", 0, { "" }, 260 "x", "", 1, { "x" }, 261 "xy", "", 1, { "xy" }, 262 "x y", "", 2, { "x", "y" }, 263 "abc def g ", "", 3, { "abc", "def", "g" }, 264 "\t a bcd", "", 2, { "a", "bcd" }, 265 " a \tb\t c ", "", 3, { "a", "b", "c" }, 266 "a b c d e ", "", 5, { "a", "b", "c", "d", "e" }, 267 "a b\tc d e f", "", 6, { "a", "b", "c", "d", "e f" }, 268 " a b c d e f ", "", 6, { "a", "b", "c", "d", "e f " }, 269 270 NULL, NULL, 0, { NULL }, 271 }; 272 273 void 274 regress(void) 275 { 276 char buf[512]; 277 int n; 278 char *fields[RNF+1]; 279 int nf; 280 int i; 281 int printit; 282 char *f; 283 284 for (n = 0; tests[n].str != NULL; n++) { 285 (void) strcpy(buf, tests[n].str); 286 fields[RNF] = NULL; 287 nf = split(buf, fields, RNF, tests[n].seps); 288 printit = 0; 289 if (nf != tests[n].nf) { 290 printf("split `%s' by `%s' gave %d fields, not %d\n", 291 tests[n].str, tests[n].seps, nf, tests[n].nf); 292 printit = 1; 293 } else if (fields[RNF] != NULL) { 294 printf("split() went beyond array end\n"); 295 printit = 1; 296 } else { 297 for (i = 0; i < nf && i < RNF; i++) { 298 f = fields[i]; 299 if (f == NULL) 300 f = "(NULL)"; 301 if (strcmp(f, tests[n].fi[i]) != 0) { 302 printf("split `%s' by `%s' field %d is `%s', not `%s'\n", 303 tests[n].str, tests[n].seps, 304 i, fields[i], tests[n].fi[i]); 305 printit = 1; 306 } 307 } 308 } 309 if (printit) 310 print(nf, RNF, fields); 311 } 312 } 313 #endif 314