1 /* $NetBSD: split.c,v 1.1 2011/01/08 18:10:31 pgoyette Exp $ */ 2 3 /*- 4 * Copyright (c) 1993 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 * POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 #include <regex.h> 30 #include <stdio.h> 31 #include <string.h> 32 33 #include "test_regex.h" 34 35 /* 36 * split - divide a string into fields, like awk split() 37 * 38 * returns number of fields, including overflow 39 * 40 * fields[] list is not NULL-terminated 41 * nfields number of entries available in fields[] 42 * sep "" white, "c" single char, "ab" [ab]+ 43 */ 44 int 45 split(char *string, char *fields[], int nfields, const char *sep) 46 { 47 char *p = string; 48 char c; /* latest character */ 49 char sepc = *sep; 50 char sepc2; 51 int fn; 52 char **fp = fields; 53 const char *sepp; 54 int trimtrail; 55 56 /* white space */ 57 if (sepc == '\0') { 58 while ((c = *p++) == ' ' || c == '\t') 59 continue; 60 p--; 61 trimtrail = 1; 62 sep = " \t"; /* note, code below knows this is 2 long */ 63 sepc = ' '; 64 } else 65 trimtrail = 0; 66 sepc2 = sep[1]; /* now we can safely pick this up */ 67 68 /* catch empties */ 69 if (*p == '\0') 70 return(0); 71 72 /* single separator */ 73 if (sepc2 == '\0') { 74 fn = nfields; 75 for (;;) { 76 *fp++ = p; 77 fn--; 78 if (fn == 0) 79 break; 80 while ((c = *p++) != sepc) 81 if (c == '\0') 82 return(nfields - fn); 83 *(p-1) = '\0'; 84 } 85 /* we have overflowed the fields vector -- just count them */ 86 fn = nfields; 87 for (;;) { 88 while ((c = *p++) != sepc) 89 if (c == '\0') 90 return(fn); 91 fn++; 92 } 93 /* not reached */ 94 } 95 96 /* two separators */ 97 if (sep[2] == '\0') { 98 fn = nfields; 99 for (;;) { 100 *fp++ = p; 101 fn--; 102 while ((c = *p++) != sepc && c != sepc2) 103 if (c == '\0') { 104 if (trimtrail && **(fp-1) == '\0') 105 fn++; 106 return(nfields - fn); 107 } 108 if (fn == 0) 109 break; 110 *(p-1) = '\0'; 111 while ((c = *p++) == sepc || c == sepc2) 112 continue; 113 p--; 114 } 115 /* we have overflowed the fields vector -- just count them */ 116 fn = nfields; 117 while (c != '\0') { 118 while ((c = *p++) == sepc || c == sepc2) 119 continue; 120 p--; 121 fn++; 122 while ((c = *p++) != '\0' && c != sepc && c != sepc2) 123 continue; 124 } 125 /* might have to trim trailing white space */ 126 if (trimtrail) { 127 p--; 128 while ((c = *--p) == sepc || c == sepc2) 129 continue; 130 p++; 131 if (*p != '\0') { 132 if (fn == nfields+1) 133 *p = '\0'; 134 fn--; 135 } 136 } 137 return(fn); 138 } 139 140 /* n separators */ 141 fn = 0; 142 for (;;) { 143 if (fn < nfields) 144 *fp++ = p; 145 fn++; 146 for (;;) { 147 c = *p++; 148 if (c == '\0') 149 return(fn); 150 sepp = sep; 151 while ((sepc = *sepp++) != '\0' && sepc != c) 152 continue; 153 if (sepc != '\0') /* it was a separator */ 154 break; 155 } 156 if (fn < nfields) 157 *(p-1) = '\0'; 158 for (;;) { 159 c = *p++; 160 sepp = sep; 161 while ((sepc = *sepp++) != '\0' && sepc != c) 162 continue; 163 if (sepc == '\0') /* it wasn't a separator */ 164 break; 165 } 166 p--; 167 } 168 169 /* not reached */ 170 } 171 172 #ifdef TEST_SPLIT 173 174 175 /* 176 * test program 177 * pgm runs regression 178 * pgm sep splits stdin lines by sep 179 * pgm str sep splits str by sep 180 * pgm str sep n splits str by sep n times 181 */ 182 int 183 main(int argc, char *argv[]) 184 { 185 char buf[512]; 186 int n; 187 # define MNF 10 188 char *fields[MNF]; 189 190 if (argc > 4) 191 for (n = atoi(argv[3]); n > 0; n--) { 192 (void) strcpy(buf, argv[1]); 193 } 194 else if (argc > 3) 195 for (n = atoi(argv[3]); n > 0; n--) { 196 (void) strcpy(buf, argv[1]); 197 (void) split(buf, fields, MNF, argv[2]); 198 } 199 else if (argc > 2) 200 dosplit(argv[1], argv[2]); 201 else if (argc > 1) 202 while (fgets(buf, sizeof(buf), stdin) != NULL) { 203 buf[strlen(buf)-1] = '\0'; /* stomp newline */ 204 dosplit(buf, argv[1]); 205 } 206 else 207 regress(); 208 209 exit(0); 210 } 211 212 void 213 dosplit(char *string, char *seps) 214 { 215 # define NF 5 216 char *fields[NF]; 217 int nf; 218 219 nf = split(string, fields, NF, seps); 220 print(nf, NF, fields); 221 } 222 223 void 224 print(int nf, int nfp, char *fields) 225 { 226 int fn; 227 int bound; 228 229 bound = (nf > nfp) ? nfp : nf; 230 printf("%d:\t", nf); 231 for (fn = 0; fn < bound; fn++) 232 printf("\"%s\"%s", fields[fn], (fn+1 < nf) ? ", " : "\n"); 233 } 234 235 #define RNF 5 /* some table entries know this */ 236 struct { 237 char *str; 238 char *seps; 239 int nf; 240 char *fi[RNF]; 241 } tests[] = { 242 "", " ", 0, { "" }, 243 " ", " ", 2, { "", "" }, 244 "x", " ", 1, { "x" }, 245 "xy", " ", 1, { "xy" }, 246 "x y", " ", 2, { "x", "y" }, 247 "abc def g ", " ", 5, { "abc", "def", "", "g", "" }, 248 " a bcd", " ", 4, { "", "", "a", "bcd" }, 249 "a b c d e f", " ", 6, { "a", "b", "c", "d", "e f" }, 250 " a b c d ", " ", 6, { "", "a", "b", "c", "d " }, 251 252 "", " _", 0, { "" }, 253 " ", " _", 2, { "", "" }, 254 "x", " _", 1, { "x" }, 255 "x y", " _", 2, { "x", "y" }, 256 "ab _ cd", " _", 2, { "ab", "cd" }, 257 " a_b c ", " _", 5, { "", "a", "b", "c", "" }, 258 "a b c_d e f", " _", 6, { "a", "b", "c", "d", "e f" }, 259 " a b c d ", " _", 6, { "", "a", "b", "c", "d " }, 260 261 "", " _~", 0, { "" }, 262 " ", " _~", 2, { "", "" }, 263 "x", " _~", 1, { "x" }, 264 "x y", " _~", 2, { "x", "y" }, 265 "ab _~ cd", " _~", 2, { "ab", "cd" }, 266 " a_b c~", " _~", 5, { "", "a", "b", "c", "" }, 267 "a b_c d~e f", " _~", 6, { "a", "b", "c", "d", "e f" }, 268 "~a b c d ", " _~", 6, { "", "a", "b", "c", "d " }, 269 270 "", " _~-", 0, { "" }, 271 " ", " _~-", 2, { "", "" }, 272 "x", " _~-", 1, { "x" }, 273 "x y", " _~-", 2, { "x", "y" }, 274 "ab _~- cd", " _~-", 2, { "ab", "cd" }, 275 " a_b c~", " _~-", 5, { "", "a", "b", "c", "" }, 276 "a b_c-d~e f", " _~-", 6, { "a", "b", "c", "d", "e f" }, 277 "~a-b c d ", " _~-", 6, { "", "a", "b", "c", "d " }, 278 279 "", " ", 0, { "" }, 280 " ", " ", 2, { "", "" }, 281 "x", " ", 1, { "x" }, 282 "xy", " ", 1, { "xy" }, 283 "x y", " ", 2, { "x", "y" }, 284 "abc def g ", " ", 4, { "abc", "def", "g", "" }, 285 " a bcd", " ", 3, { "", "a", "bcd" }, 286 "a b c d e f", " ", 6, { "a", "b", "c", "d", "e f" }, 287 " a b c d ", " ", 6, { "", "a", "b", "c", "d " }, 288 289 "", "", 0, { "" }, 290 " ", "", 0, { "" }, 291 "x", "", 1, { "x" }, 292 "xy", "", 1, { "xy" }, 293 "x y", "", 2, { "x", "y" }, 294 "abc def g ", "", 3, { "abc", "def", "g" }, 295 "\t a bcd", "", 2, { "a", "bcd" }, 296 " a \tb\t c ", "", 3, { "a", "b", "c" }, 297 "a b c d e ", "", 5, { "a", "b", "c", "d", "e" }, 298 "a b\tc d e f", "", 6, { "a", "b", "c", "d", "e f" }, 299 " a b c d e f ", "", 6, { "a", "b", "c", "d", "e f " }, 300 301 NULL, NULL, 0, { NULL }, 302 }; 303 304 void 305 regress(void) 306 { 307 char buf[512]; 308 int n; 309 char *fields[RNF+1]; 310 int nf; 311 int i; 312 int printit; 313 char *f; 314 315 for (n = 0; tests[n].str != NULL; n++) { 316 (void) strcpy(buf, tests[n].str); 317 fields[RNF] = NULL; 318 nf = split(buf, fields, RNF, tests[n].seps); 319 printit = 0; 320 if (nf != tests[n].nf) { 321 printf("split `%s' by `%s' gave %d fields, not %d\n", 322 tests[n].str, tests[n].seps, nf, tests[n].nf); 323 printit = 1; 324 } else if (fields[RNF] != NULL) { 325 printf("split() went beyond array end\n"); 326 printit = 1; 327 } else { 328 for (i = 0; i < nf && i < RNF; i++) { 329 f = fields[i]; 330 if (f == NULL) 331 f = "(NULL)"; 332 if (strcmp(f, tests[n].fi[i]) != 0) { 333 printf("split `%s' by `%s', field %d is `%s', not `%s'\n", 334 tests[n].str, tests[n].seps, 335 i, fields[i], tests[n].fi[i]); 336 printit = 1; 337 } 338 } 339 } 340 if (printit) 341 print(nf, RNF, fields); 342 } 343 } 344 #endif 345