xref: /freebsd/lib/libc/regex/grot/split.c (revision d0b2dbfa0ecf2bbc9709efc5e20baf8e4b44bbbf)
1 #include <sys/cdefs.h>
2 #include <stdio.h>
3 #include <string.h>
4 
5 #include "split.ih"
6 
7 /*
8  - split - divide a string into fields, like awk split()
9  == int split(char *string, char *fields[], int nfields, char *sep);
10  - fields: list is not NULL-terminated
11  - nfields: number of entries available in fields[]
12  - sep: "" white, "c" single char, "ab" [ab]+
13  */
14 int				/* number of fields, including overflow */
15 split(char *string, char *fields[], int nfields, char *sep)
16 {
17 	char *p = string;
18 	char c;			/* latest character */
19 	char sepc = sep[0];
20 	char sepc2;
21 	int fn;
22 	char **fp = fields;
23 	char *sepp;
24 	int trimtrail;
25 
26 	/* white space */
27 	if (sepc == '\0') {
28 		while ((c = *p++) == ' ' || c == '\t')
29 			continue;
30 		p--;
31 		trimtrail = 1;
32 		sep = " \t";	/* note, code below knows this is 2 long */
33 		sepc = ' ';
34 	} else
35 		trimtrail = 0;
36 	sepc2 = sep[1];		/* now we can safely pick this up */
37 
38 	/* catch empties */
39 	if (*p == '\0')
40 		return(0);
41 
42 	/* single separator */
43 	if (sepc2 == '\0') {
44 		fn = nfields;
45 		for (;;) {
46 			*fp++ = p;
47 			fn--;
48 			if (fn == 0)
49 				break;
50 			while ((c = *p++) != sepc)
51 				if (c == '\0')
52 					return(nfields - fn);
53 			*(p-1) = '\0';
54 		}
55 		/* we have overflowed the fields vector -- just count them */
56 		fn = nfields;
57 		for (;;) {
58 			while ((c = *p++) != sepc)
59 				if (c == '\0')
60 					return(fn);
61 			fn++;
62 		}
63 		/* not reached */
64 	}
65 
66 	/* two separators */
67 	if (sep[2] == '\0') {
68 		fn = nfields;
69 		for (;;) {
70 			*fp++ = p;
71 			fn--;
72 			while ((c = *p++) != sepc && c != sepc2)
73 				if (c == '\0') {
74 					if (trimtrail && **(fp-1) == '\0')
75 						fn++;
76 					return(nfields - fn);
77 				}
78 			if (fn == 0)
79 				break;
80 			*(p-1) = '\0';
81 			while ((c = *p++) == sepc || c == sepc2)
82 				continue;
83 			p--;
84 		}
85 		/* we have overflowed the fields vector -- just count them */
86 		fn = nfields;
87 		while (c != '\0') {
88 			while ((c = *p++) == sepc || c == sepc2)
89 				continue;
90 			p--;
91 			fn++;
92 			while ((c = *p++) != '\0' && c != sepc && c != sepc2)
93 				continue;
94 		}
95 		/* might have to trim trailing white space */
96 		if (trimtrail) {
97 			p--;
98 			while ((c = *--p) == sepc || c == sepc2)
99 				continue;
100 			p++;
101 			if (*p != '\0') {
102 				if (fn == nfields+1)
103 					*p = '\0';
104 				fn--;
105 			}
106 		}
107 		return(fn);
108 	}
109 
110 	/* n separators */
111 	fn = 0;
112 	for (;;) {
113 		if (fn < nfields)
114 			*fp++ = p;
115 		fn++;
116 		for (;;) {
117 			c = *p++;
118 			if (c == '\0')
119 				return(fn);
120 			sepp = sep;
121 			while ((sepc = *sepp++) != '\0' && sepc != c)
122 				continue;
123 			if (sepc != '\0')	/* it was a separator */
124 				break;
125 		}
126 		if (fn < nfields)
127 			*(p-1) = '\0';
128 		for (;;) {
129 			c = *p++;
130 			sepp = sep;
131 			while ((sepc = *sepp++) != '\0' && sepc != c)
132 				continue;
133 			if (sepc == '\0')	/* it wasn't a separator */
134 				break;
135 		}
136 		p--;
137 	}
138 
139 	/* not reached */
140 }
141 
142 #ifdef TEST_SPLIT
143 
144 
145 /*
146  * test program
147  * pgm		runs regression
148  * pgm sep	splits stdin lines by sep
149  * pgm str sep	splits str by sep
150  * pgm str sep n	splits str by sep n times
151  */
152 int
153 main(int argc, char *argv[])
154 {
155 	char buf[512];
156 	int n;
157 #	define	MNF	10
158 	char *fields[MNF];
159 
160 	if (argc > 4)
161 		for (n = atoi(argv[3]); n > 0; n--) {
162 			(void) strcpy(buf, argv[1]);
163 		}
164 	else if (argc > 3)
165 		for (n = atoi(argv[3]); n > 0; n--) {
166 			(void) strcpy(buf, argv[1]);
167 			(void) split(buf, fields, MNF, argv[2]);
168 		}
169 	else if (argc > 2)
170 		dosplit(argv[1], argv[2]);
171 	else if (argc > 1)
172 		while (fgets(buf, sizeof(buf), stdin) != NULL) {
173 			buf[strlen(buf)-1] = '\0';	/* stomp newline */
174 			dosplit(buf, argv[1]);
175 		}
176 	else
177 		regress();
178 
179 	exit(0);
180 }
181 
182 void
183 dosplit(char *string, char *seps)
184 {
185 #	define	NF	5
186 	char *fields[NF];
187 	int nf;
188 
189 	nf = split(string, fields, NF, seps);
190 	print(nf, NF, fields);
191 }
192 
193 void
194 print(int nf, int nfp, char *fields[])
195 {
196 	int fn;
197 	int bound;
198 
199 	bound = (nf > nfp) ? nfp : nf;
200 	printf("%d:\t", nf);
201 	for (fn = 0; fn < bound; fn++)
202 		printf("\"%s\"%s", fields[fn], (fn+1 < nf) ? ", " : "\n");
203 }
204 
205 #define	RNF	5		/* some table entries know this */
206 struct {
207 	char *str;
208 	char *seps;
209 	int nf;
210 	char *fi[RNF];
211 } tests[] = {
212 	"",		" ",	0,	{ "" },
213 	" ",		" ",	2,	{ "", "" },
214 	"x",		" ",	1,	{ "x" },
215 	"xy",		" ",	1,	{ "xy" },
216 	"x y",		" ",	2,	{ "x", "y" },
217 	"abc def  g ",	" ",	5,	{ "abc", "def", "", "g", "" },
218 	"  a bcd",	" ",	4,	{ "", "", "a", "bcd" },
219 	"a b c d e f",	" ",	6,	{ "a", "b", "c", "d", "e f" },
220 	" a b c d ",	" ",	6,	{ "", "a", "b", "c", "d " },
221 
222 	"",		" _",	0,	{ "" },
223 	" ",		" _",	2,	{ "", "" },
224 	"x",		" _",	1,	{ "x" },
225 	"x y",		" _",	2,	{ "x", "y" },
226 	"ab _ cd",	" _",	2,	{ "ab", "cd" },
227 	" a_b  c ",	" _",	5,	{ "", "a", "b", "c", "" },
228 	"a b c_d e f",	" _",	6,	{ "a", "b", "c", "d", "e f" },
229 	" a b c d ",	" _",	6,	{ "", "a", "b", "c", "d " },
230 
231 	"",		" _~",	0,	{ "" },
232 	" ",		" _~",	2,	{ "", "" },
233 	"x",		" _~",	1,	{ "x" },
234 	"x y",		" _~",	2,	{ "x", "y" },
235 	"ab _~ cd",	" _~",	2,	{ "ab", "cd" },
236 	" a_b  c~",	" _~",	5,	{ "", "a", "b", "c", "" },
237 	"a b_c d~e f",	" _~",	6,	{ "a", "b", "c", "d", "e f" },
238 	"~a b c d ",	" _~",	6,	{ "", "a", "b", "c", "d " },
239 
240 	"",		" _~-",	0,	{ "" },
241 	" ",		" _~-",	2,	{ "", "" },
242 	"x",		" _~-",	1,	{ "x" },
243 	"x y",		" _~-",	2,	{ "x", "y" },
244 	"ab _~- cd",	" _~-",	2,	{ "ab", "cd" },
245 	" a_b  c~",	" _~-",	5,	{ "", "a", "b", "c", "" },
246 	"a b_c-d~e f",	" _~-",	6,	{ "a", "b", "c", "d", "e f" },
247 	"~a-b c d ",	" _~-",	6,	{ "", "a", "b", "c", "d " },
248 
249 	"",		"  ",	0,	{ "" },
250 	" ",		"  ",	2,	{ "", "" },
251 	"x",		"  ",	1,	{ "x" },
252 	"xy",		"  ",	1,	{ "xy" },
253 	"x y",		"  ",	2,	{ "x", "y" },
254 	"abc def  g ",	"  ",	4,	{ "abc", "def", "g", "" },
255 	"  a bcd",	"  ",	3,	{ "", "a", "bcd" },
256 	"a b c d e f",	"  ",	6,	{ "a", "b", "c", "d", "e f" },
257 	" a b c d ",	"  ",	6,	{ "", "a", "b", "c", "d " },
258 
259 	"",		"",	0,	{ "" },
260 	" ",		"",	0,	{ "" },
261 	"x",		"",	1,	{ "x" },
262 	"xy",		"",	1,	{ "xy" },
263 	"x y",		"",	2,	{ "x", "y" },
264 	"abc def  g ",	"",	3,	{ "abc", "def", "g" },
265 	"\t a bcd",	"",	2,	{ "a", "bcd" },
266 	"  a \tb\t c ",	"",	3,	{ "a", "b", "c" },
267 	"a b c d e ",	"",	5,	{ "a", "b", "c", "d", "e" },
268 	"a b\tc d e f",	"",	6,	{ "a", "b", "c", "d", "e f" },
269 	" a b c d e f ",	"",	6,	{ "a", "b", "c", "d", "e f " },
270 
271 	NULL,		NULL,	0,	{ NULL },
272 };
273 
274 void
275 regress(void)
276 {
277 	char buf[512];
278 	int n;
279 	char *fields[RNF+1];
280 	int nf;
281 	int i;
282 	int printit;
283 	char *f;
284 
285 	for (n = 0; tests[n].str != NULL; n++) {
286 		(void) strcpy(buf, tests[n].str);
287 		fields[RNF] = NULL;
288 		nf = split(buf, fields, RNF, tests[n].seps);
289 		printit = 0;
290 		if (nf != tests[n].nf) {
291 			printf("split `%s' by `%s' gave %d fields, not %d\n",
292 				tests[n].str, tests[n].seps, nf, tests[n].nf);
293 			printit = 1;
294 		} else if (fields[RNF] != NULL) {
295 			printf("split() went beyond array end\n");
296 			printit = 1;
297 		} else {
298 			for (i = 0; i < nf && i < RNF; i++) {
299 				f = fields[i];
300 				if (f == NULL)
301 					f = "(NULL)";
302 				if (strcmp(f, tests[n].fi[i]) != 0) {
303 					printf("split `%s' by `%s' field %d is `%s', not `%s'\n",
304 						tests[n].str, tests[n].seps,
305 						i, fields[i], tests[n].fi[i]);
306 					printit = 1;
307 				}
308 			}
309 		}
310 		if (printit)
311 			print(nf, RNF, fields);
312 	}
313 }
314 #endif
315