1*57718be8SEnji Cooper /* $NetBSD: split.c,v 1.1 2011/01/08 18:10:31 pgoyette Exp $ */ 2*57718be8SEnji Cooper 3*57718be8SEnji Cooper /*- 4*57718be8SEnji Cooper * Copyright (c) 1993 The NetBSD Foundation, Inc. 5*57718be8SEnji Cooper * All rights reserved. 6*57718be8SEnji Cooper * 7*57718be8SEnji Cooper * Redistribution and use in source and binary forms, with or without 8*57718be8SEnji Cooper * modification, are permitted provided that the following conditions 9*57718be8SEnji Cooper * are met: 10*57718be8SEnji Cooper * 1. Redistributions of source code must retain the above copyright 11*57718be8SEnji Cooper * notice, this list of conditions and the following disclaimer. 12*57718be8SEnji Cooper * 2. Redistributions in binary form must reproduce the above copyright 13*57718be8SEnji Cooper * notice, this list of conditions and the following disclaimer in the 14*57718be8SEnji Cooper * documentation and/or other materials provided with the distribution. 15*57718be8SEnji Cooper * 16*57718be8SEnji Cooper * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 17*57718be8SEnji Cooper * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 18*57718be8SEnji Cooper * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19*57718be8SEnji Cooper * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 20*57718be8SEnji Cooper * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21*57718be8SEnji Cooper * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22*57718be8SEnji Cooper * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23*57718be8SEnji Cooper * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24*57718be8SEnji Cooper * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25*57718be8SEnji Cooper * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26*57718be8SEnji Cooper * POSSIBILITY OF SUCH DAMAGE. 27*57718be8SEnji Cooper */ 28*57718be8SEnji Cooper 29*57718be8SEnji Cooper #include <regex.h> 30*57718be8SEnji Cooper #include <stdio.h> 31*57718be8SEnji Cooper #include <string.h> 32*57718be8SEnji Cooper 33*57718be8SEnji Cooper #include "test_regex.h" 34*57718be8SEnji Cooper 35*57718be8SEnji Cooper /* 36*57718be8SEnji Cooper * split - divide a string into fields, like awk split() 37*57718be8SEnji Cooper * 38*57718be8SEnji Cooper * returns number of fields, including overflow 39*57718be8SEnji Cooper * 40*57718be8SEnji Cooper * fields[] list is not NULL-terminated 41*57718be8SEnji Cooper * nfields number of entries available in fields[] 42*57718be8SEnji Cooper * sep "" white, "c" single char, "ab" [ab]+ 43*57718be8SEnji Cooper */ 44*57718be8SEnji Cooper int 45*57718be8SEnji Cooper split(char *string, char *fields[], int nfields, const char *sep) 46*57718be8SEnji Cooper { 47*57718be8SEnji Cooper char *p = string; 48*57718be8SEnji Cooper char c; /* latest character */ 49*57718be8SEnji Cooper char sepc = *sep; 50*57718be8SEnji Cooper char sepc2; 51*57718be8SEnji Cooper int fn; 52*57718be8SEnji Cooper char **fp = fields; 53*57718be8SEnji Cooper const char *sepp; 54*57718be8SEnji Cooper int trimtrail; 55*57718be8SEnji Cooper 56*57718be8SEnji Cooper /* white space */ 57*57718be8SEnji Cooper if (sepc == '\0') { 58*57718be8SEnji Cooper while ((c = *p++) == ' ' || c == '\t') 59*57718be8SEnji Cooper continue; 60*57718be8SEnji Cooper p--; 61*57718be8SEnji Cooper trimtrail = 1; 62*57718be8SEnji Cooper sep = " \t"; /* note, code below knows this is 2 long */ 63*57718be8SEnji Cooper sepc = ' '; 64*57718be8SEnji Cooper } else 65*57718be8SEnji Cooper trimtrail = 0; 66*57718be8SEnji Cooper sepc2 = sep[1]; /* now we can safely pick this up */ 67*57718be8SEnji Cooper 68*57718be8SEnji Cooper /* catch empties */ 69*57718be8SEnji Cooper if (*p == '\0') 70*57718be8SEnji Cooper return(0); 71*57718be8SEnji Cooper 72*57718be8SEnji Cooper /* single separator */ 73*57718be8SEnji Cooper if (sepc2 == '\0') { 74*57718be8SEnji Cooper fn = nfields; 75*57718be8SEnji Cooper for (;;) { 76*57718be8SEnji Cooper *fp++ = p; 77*57718be8SEnji Cooper fn--; 78*57718be8SEnji Cooper if (fn == 0) 79*57718be8SEnji Cooper break; 80*57718be8SEnji Cooper while ((c = *p++) != sepc) 81*57718be8SEnji Cooper if (c == '\0') 82*57718be8SEnji Cooper return(nfields - fn); 83*57718be8SEnji Cooper *(p-1) = '\0'; 84*57718be8SEnji Cooper } 85*57718be8SEnji Cooper /* we have overflowed the fields vector -- just count them */ 86*57718be8SEnji Cooper fn = nfields; 87*57718be8SEnji Cooper for (;;) { 88*57718be8SEnji Cooper while ((c = *p++) != sepc) 89*57718be8SEnji Cooper if (c == '\0') 90*57718be8SEnji Cooper return(fn); 91*57718be8SEnji Cooper fn++; 92*57718be8SEnji Cooper } 93*57718be8SEnji Cooper /* not reached */ 94*57718be8SEnji Cooper } 95*57718be8SEnji Cooper 96*57718be8SEnji Cooper /* two separators */ 97*57718be8SEnji Cooper if (sep[2] == '\0') { 98*57718be8SEnji Cooper fn = nfields; 99*57718be8SEnji Cooper for (;;) { 100*57718be8SEnji Cooper *fp++ = p; 101*57718be8SEnji Cooper fn--; 102*57718be8SEnji Cooper while ((c = *p++) != sepc && c != sepc2) 103*57718be8SEnji Cooper if (c == '\0') { 104*57718be8SEnji Cooper if (trimtrail && **(fp-1) == '\0') 105*57718be8SEnji Cooper fn++; 106*57718be8SEnji Cooper return(nfields - fn); 107*57718be8SEnji Cooper } 108*57718be8SEnji Cooper if (fn == 0) 109*57718be8SEnji Cooper break; 110*57718be8SEnji Cooper *(p-1) = '\0'; 111*57718be8SEnji Cooper while ((c = *p++) == sepc || c == sepc2) 112*57718be8SEnji Cooper continue; 113*57718be8SEnji Cooper p--; 114*57718be8SEnji Cooper } 115*57718be8SEnji Cooper /* we have overflowed the fields vector -- just count them */ 116*57718be8SEnji Cooper fn = nfields; 117*57718be8SEnji Cooper while (c != '\0') { 118*57718be8SEnji Cooper while ((c = *p++) == sepc || c == sepc2) 119*57718be8SEnji Cooper continue; 120*57718be8SEnji Cooper p--; 121*57718be8SEnji Cooper fn++; 122*57718be8SEnji Cooper while ((c = *p++) != '\0' && c != sepc && c != sepc2) 123*57718be8SEnji Cooper continue; 124*57718be8SEnji Cooper } 125*57718be8SEnji Cooper /* might have to trim trailing white space */ 126*57718be8SEnji Cooper if (trimtrail) { 127*57718be8SEnji Cooper p--; 128*57718be8SEnji Cooper while ((c = *--p) == sepc || c == sepc2) 129*57718be8SEnji Cooper continue; 130*57718be8SEnji Cooper p++; 131*57718be8SEnji Cooper if (*p != '\0') { 132*57718be8SEnji Cooper if (fn == nfields+1) 133*57718be8SEnji Cooper *p = '\0'; 134*57718be8SEnji Cooper fn--; 135*57718be8SEnji Cooper } 136*57718be8SEnji Cooper } 137*57718be8SEnji Cooper return(fn); 138*57718be8SEnji Cooper } 139*57718be8SEnji Cooper 140*57718be8SEnji Cooper /* n separators */ 141*57718be8SEnji Cooper fn = 0; 142*57718be8SEnji Cooper for (;;) { 143*57718be8SEnji Cooper if (fn < nfields) 144*57718be8SEnji Cooper *fp++ = p; 145*57718be8SEnji Cooper fn++; 146*57718be8SEnji Cooper for (;;) { 147*57718be8SEnji Cooper c = *p++; 148*57718be8SEnji Cooper if (c == '\0') 149*57718be8SEnji Cooper return(fn); 150*57718be8SEnji Cooper sepp = sep; 151*57718be8SEnji Cooper while ((sepc = *sepp++) != '\0' && sepc != c) 152*57718be8SEnji Cooper continue; 153*57718be8SEnji Cooper if (sepc != '\0') /* it was a separator */ 154*57718be8SEnji Cooper break; 155*57718be8SEnji Cooper } 156*57718be8SEnji Cooper if (fn < nfields) 157*57718be8SEnji Cooper *(p-1) = '\0'; 158*57718be8SEnji Cooper for (;;) { 159*57718be8SEnji Cooper c = *p++; 160*57718be8SEnji Cooper sepp = sep; 161*57718be8SEnji Cooper while ((sepc = *sepp++) != '\0' && sepc != c) 162*57718be8SEnji Cooper continue; 163*57718be8SEnji Cooper if (sepc == '\0') /* it wasn't a separator */ 164*57718be8SEnji Cooper break; 165*57718be8SEnji Cooper } 166*57718be8SEnji Cooper p--; 167*57718be8SEnji Cooper } 168*57718be8SEnji Cooper 169*57718be8SEnji Cooper /* not reached */ 170*57718be8SEnji Cooper } 171*57718be8SEnji Cooper 172*57718be8SEnji Cooper #ifdef TEST_SPLIT 173*57718be8SEnji Cooper 174*57718be8SEnji Cooper 175*57718be8SEnji Cooper /* 176*57718be8SEnji Cooper * test program 177*57718be8SEnji Cooper * pgm runs regression 178*57718be8SEnji Cooper * pgm sep splits stdin lines by sep 179*57718be8SEnji Cooper * pgm str sep splits str by sep 180*57718be8SEnji Cooper * pgm str sep n splits str by sep n times 181*57718be8SEnji Cooper */ 182*57718be8SEnji Cooper int 183*57718be8SEnji Cooper main(int argc, char *argv[]) 184*57718be8SEnji Cooper { 185*57718be8SEnji Cooper char buf[512]; 186*57718be8SEnji Cooper int n; 187*57718be8SEnji Cooper # define MNF 10 188*57718be8SEnji Cooper char *fields[MNF]; 189*57718be8SEnji Cooper 190*57718be8SEnji Cooper if (argc > 4) 191*57718be8SEnji Cooper for (n = atoi(argv[3]); n > 0; n--) { 192*57718be8SEnji Cooper (void) strcpy(buf, argv[1]); 193*57718be8SEnji Cooper } 194*57718be8SEnji Cooper else if (argc > 3) 195*57718be8SEnji Cooper for (n = atoi(argv[3]); n > 0; n--) { 196*57718be8SEnji Cooper (void) strcpy(buf, argv[1]); 197*57718be8SEnji Cooper (void) split(buf, fields, MNF, argv[2]); 198*57718be8SEnji Cooper } 199*57718be8SEnji Cooper else if (argc > 2) 200*57718be8SEnji Cooper dosplit(argv[1], argv[2]); 201*57718be8SEnji Cooper else if (argc > 1) 202*57718be8SEnji Cooper while (fgets(buf, sizeof(buf), stdin) != NULL) { 203*57718be8SEnji Cooper buf[strlen(buf)-1] = '\0'; /* stomp newline */ 204*57718be8SEnji Cooper dosplit(buf, argv[1]); 205*57718be8SEnji Cooper } 206*57718be8SEnji Cooper else 207*57718be8SEnji Cooper regress(); 208*57718be8SEnji Cooper 209*57718be8SEnji Cooper exit(0); 210*57718be8SEnji Cooper } 211*57718be8SEnji Cooper 212*57718be8SEnji Cooper void 213*57718be8SEnji Cooper dosplit(char *string, char *seps) 214*57718be8SEnji Cooper { 215*57718be8SEnji Cooper # define NF 5 216*57718be8SEnji Cooper char *fields[NF]; 217*57718be8SEnji Cooper int nf; 218*57718be8SEnji Cooper 219*57718be8SEnji Cooper nf = split(string, fields, NF, seps); 220*57718be8SEnji Cooper print(nf, NF, fields); 221*57718be8SEnji Cooper } 222*57718be8SEnji Cooper 223*57718be8SEnji Cooper void 224*57718be8SEnji Cooper print(int nf, int nfp, char *fields) 225*57718be8SEnji Cooper { 226*57718be8SEnji Cooper int fn; 227*57718be8SEnji Cooper int bound; 228*57718be8SEnji Cooper 229*57718be8SEnji Cooper bound = (nf > nfp) ? nfp : nf; 230*57718be8SEnji Cooper printf("%d:\t", nf); 231*57718be8SEnji Cooper for (fn = 0; fn < bound; fn++) 232*57718be8SEnji Cooper printf("\"%s\"%s", fields[fn], (fn+1 < nf) ? ", " : "\n"); 233*57718be8SEnji Cooper } 234*57718be8SEnji Cooper 235*57718be8SEnji Cooper #define RNF 5 /* some table entries know this */ 236*57718be8SEnji Cooper struct { 237*57718be8SEnji Cooper char *str; 238*57718be8SEnji Cooper char *seps; 239*57718be8SEnji Cooper int nf; 240*57718be8SEnji Cooper char *fi[RNF]; 241*57718be8SEnji Cooper } tests[] = { 242*57718be8SEnji Cooper "", " ", 0, { "" }, 243*57718be8SEnji Cooper " ", " ", 2, { "", "" }, 244*57718be8SEnji Cooper "x", " ", 1, { "x" }, 245*57718be8SEnji Cooper "xy", " ", 1, { "xy" }, 246*57718be8SEnji Cooper "x y", " ", 2, { "x", "y" }, 247*57718be8SEnji Cooper "abc def g ", " ", 5, { "abc", "def", "", "g", "" }, 248*57718be8SEnji Cooper " a bcd", " ", 4, { "", "", "a", "bcd" }, 249*57718be8SEnji Cooper "a b c d e f", " ", 6, { "a", "b", "c", "d", "e f" }, 250*57718be8SEnji Cooper " a b c d ", " ", 6, { "", "a", "b", "c", "d " }, 251*57718be8SEnji Cooper 252*57718be8SEnji Cooper "", " _", 0, { "" }, 253*57718be8SEnji Cooper " ", " _", 2, { "", "" }, 254*57718be8SEnji Cooper "x", " _", 1, { "x" }, 255*57718be8SEnji Cooper "x y", " _", 2, { "x", "y" }, 256*57718be8SEnji Cooper "ab _ cd", " _", 2, { "ab", "cd" }, 257*57718be8SEnji Cooper " a_b c ", " _", 5, { "", "a", "b", "c", "" }, 258*57718be8SEnji Cooper "a b c_d e f", " _", 6, { "a", "b", "c", "d", "e f" }, 259*57718be8SEnji Cooper " a b c d ", " _", 6, { "", "a", "b", "c", "d " }, 260*57718be8SEnji Cooper 261*57718be8SEnji Cooper "", " _~", 0, { "" }, 262*57718be8SEnji Cooper " ", " _~", 2, { "", "" }, 263*57718be8SEnji Cooper "x", " _~", 1, { "x" }, 264*57718be8SEnji Cooper "x y", " _~", 2, { "x", "y" }, 265*57718be8SEnji Cooper "ab _~ cd", " _~", 2, { "ab", "cd" }, 266*57718be8SEnji Cooper " a_b c~", " _~", 5, { "", "a", "b", "c", "" }, 267*57718be8SEnji Cooper "a b_c d~e f", " _~", 6, { "a", "b", "c", "d", "e f" }, 268*57718be8SEnji Cooper "~a b c d ", " _~", 6, { "", "a", "b", "c", "d " }, 269*57718be8SEnji Cooper 270*57718be8SEnji Cooper "", " _~-", 0, { "" }, 271*57718be8SEnji Cooper " ", " _~-", 2, { "", "" }, 272*57718be8SEnji Cooper "x", " _~-", 1, { "x" }, 273*57718be8SEnji Cooper "x y", " _~-", 2, { "x", "y" }, 274*57718be8SEnji Cooper "ab _~- cd", " _~-", 2, { "ab", "cd" }, 275*57718be8SEnji Cooper " a_b c~", " _~-", 5, { "", "a", "b", "c", "" }, 276*57718be8SEnji Cooper "a b_c-d~e f", " _~-", 6, { "a", "b", "c", "d", "e f" }, 277*57718be8SEnji Cooper "~a-b c d ", " _~-", 6, { "", "a", "b", "c", "d " }, 278*57718be8SEnji Cooper 279*57718be8SEnji Cooper "", " ", 0, { "" }, 280*57718be8SEnji Cooper " ", " ", 2, { "", "" }, 281*57718be8SEnji Cooper "x", " ", 1, { "x" }, 282*57718be8SEnji Cooper "xy", " ", 1, { "xy" }, 283*57718be8SEnji Cooper "x y", " ", 2, { "x", "y" }, 284*57718be8SEnji Cooper "abc def g ", " ", 4, { "abc", "def", "g", "" }, 285*57718be8SEnji Cooper " a bcd", " ", 3, { "", "a", "bcd" }, 286*57718be8SEnji Cooper "a b c d e f", " ", 6, { "a", "b", "c", "d", "e f" }, 287*57718be8SEnji Cooper " a b c d ", " ", 6, { "", "a", "b", "c", "d " }, 288*57718be8SEnji Cooper 289*57718be8SEnji Cooper "", "", 0, { "" }, 290*57718be8SEnji Cooper " ", "", 0, { "" }, 291*57718be8SEnji Cooper "x", "", 1, { "x" }, 292*57718be8SEnji Cooper "xy", "", 1, { "xy" }, 293*57718be8SEnji Cooper "x y", "", 2, { "x", "y" }, 294*57718be8SEnji Cooper "abc def g ", "", 3, { "abc", "def", "g" }, 295*57718be8SEnji Cooper "\t a bcd", "", 2, { "a", "bcd" }, 296*57718be8SEnji Cooper " a \tb\t c ", "", 3, { "a", "b", "c" }, 297*57718be8SEnji Cooper "a b c d e ", "", 5, { "a", "b", "c", "d", "e" }, 298*57718be8SEnji Cooper "a b\tc d e f", "", 6, { "a", "b", "c", "d", "e f" }, 299*57718be8SEnji Cooper " a b c d e f ", "", 6, { "a", "b", "c", "d", "e f " }, 300*57718be8SEnji Cooper 301*57718be8SEnji Cooper NULL, NULL, 0, { NULL }, 302*57718be8SEnji Cooper }; 303*57718be8SEnji Cooper 304*57718be8SEnji Cooper void 305*57718be8SEnji Cooper regress(void) 306*57718be8SEnji Cooper { 307*57718be8SEnji Cooper char buf[512]; 308*57718be8SEnji Cooper int n; 309*57718be8SEnji Cooper char *fields[RNF+1]; 310*57718be8SEnji Cooper int nf; 311*57718be8SEnji Cooper int i; 312*57718be8SEnji Cooper int printit; 313*57718be8SEnji Cooper char *f; 314*57718be8SEnji Cooper 315*57718be8SEnji Cooper for (n = 0; tests[n].str != NULL; n++) { 316*57718be8SEnji Cooper (void) strcpy(buf, tests[n].str); 317*57718be8SEnji Cooper fields[RNF] = NULL; 318*57718be8SEnji Cooper nf = split(buf, fields, RNF, tests[n].seps); 319*57718be8SEnji Cooper printit = 0; 320*57718be8SEnji Cooper if (nf != tests[n].nf) { 321*57718be8SEnji Cooper printf("split `%s' by `%s' gave %d fields, not %d\n", 322*57718be8SEnji Cooper tests[n].str, tests[n].seps, nf, tests[n].nf); 323*57718be8SEnji Cooper printit = 1; 324*57718be8SEnji Cooper } else if (fields[RNF] != NULL) { 325*57718be8SEnji Cooper printf("split() went beyond array end\n"); 326*57718be8SEnji Cooper printit = 1; 327*57718be8SEnji Cooper } else { 328*57718be8SEnji Cooper for (i = 0; i < nf && i < RNF; i++) { 329*57718be8SEnji Cooper f = fields[i]; 330*57718be8SEnji Cooper if (f == NULL) 331*57718be8SEnji Cooper f = "(NULL)"; 332*57718be8SEnji Cooper if (strcmp(f, tests[n].fi[i]) != 0) { 333*57718be8SEnji Cooper printf("split `%s' by `%s', field %d is `%s', not `%s'\n", 334*57718be8SEnji Cooper tests[n].str, tests[n].seps, 335*57718be8SEnji Cooper i, fields[i], tests[n].fi[i]); 336*57718be8SEnji Cooper printit = 1; 337*57718be8SEnji Cooper } 338*57718be8SEnji Cooper } 339*57718be8SEnji Cooper } 340*57718be8SEnji Cooper if (printit) 341*57718be8SEnji Cooper print(nf, RNF, fields); 342*57718be8SEnji Cooper } 343*57718be8SEnji Cooper } 344*57718be8SEnji Cooper #endif 345