xref: /titanic_44/usr/src/cmd/fgrep/fgrep.c (revision b7f45089ccbe01bab3d7c7377b49d80d2ae18a69)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
28 /*	  All Rights Reserved  	*/
29 
30 /*	Copyright (c) 1987, 1988 Microsoft Corporation	*/
31 /*	  All Rights Reserved	*/
32 
33 #pragma ident	"%Z%%M%	%I%	%E% SMI"
34 
35 /*
36  * fgrep -- print all lines containing any of a set of keywords
37  *
38  *	status returns:
39  *		0 - ok, and some matches
40  *		1 - ok, but no matches
41  *		2 - some error
42  */
43 
44 #include <stdio.h>
45 #include <ctype.h>
46 #include <sys/types.h>
47 #include <stdlib.h>
48 #include <string.h>
49 #include <locale.h>
50 #include <libintl.h>
51 #include <euc.h>
52 
53 #include <getwidth.h>
54 
55 eucwidth_t WW;
56 #define	WIDTH1	WW._eucw1
57 #define	WIDTH2	WW._eucw2
58 #define	WIDTH3	WW._eucw3
59 #define	MULTI_BYTE	WW._multibyte
60 #define	GETONE(lc, p) \
61 	cw = ISASCII(lc = (unsigned char)*p++) ? 1 :     \
62 		(ISSET2(lc) ? WIDTH2 :                       \
63 		(ISSET3(lc) ? WIDTH3 : WIDTH1));             \
64 	if (--cw > --ccount) {                           \
65 		cw -= ccount;                                \
66 		while (ccount--)                             \
67 			lc = (lc << 7) | ((*p++) & 0177);        \
68 			if (p >= &buf[fw_lBufsiz + BUFSIZ]) {    \
69 			if (nlp == buf) {                        \
70 				/* Increase the buffer size */       \
71 				fw_lBufsiz += BUFSIZ;                \
72 				if ((buf = realloc(buf,              \
73 					fw_lBufsiz + BUFSIZ)) == NULL) { \
74 					exit(2); /* out of memory */     \
75 				}                                    \
76 				nlp = buf;                           \
77 				p = &buf[fw_lBufsiz];                \
78 			} else {                                 \
79 				/* shift the buffer contents down */ \
80 				(void) memmove(buf, nlp,             \
81 					&buf[fw_lBufsiz + BUFSIZ] - nlp);\
82 				p -= nlp - buf;                      \
83 				nlp = buf;                           \
84 			}                                        \
85 		}                                            \
86 		if (p > &buf[fw_lBufsiz]) {                  \
87 			if ((ccount = fread(p, sizeof (char),    \
88 			    &buf[fw_lBufsiz + BUFSIZ] - p, fptr))\
89 				<= 0) break;                         \
90 		} else if ((ccount = fread(p,                \
91 			sizeof (char),  BUFSIZ, fptr)) <= 0)     \
92 			break;                                   \
93 		blkno += (long long)ccount;                  \
94 	}                                                \
95 	ccount -= cw;                                    \
96 	while (cw--)                                     \
97 		lc = (lc << 7) | ((*p++) & 0177)
98 
99 /*
100  * The same() macro and letter() function were inserted to allow for
101  * the -i option work for the multi-byte environment.
102  */
103 wchar_t letter();
104 #define	same(a, b) \
105 	(a == b || iflag && (!MULTI_BYTE || ISASCII(a)) && (a ^ b) == ' ' && \
106 	letter(a) == letter(b))
107 
108 #define	MAXSIZ 6000
109 
110 #define	QSIZE 400
111 struct words {
112 	wchar_t inp;
113 	char	out;
114 	struct	words *nst;
115 	struct	words *link;
116 	struct	words *fail;
117 } w[MAXSIZ], *smax, *q;
118 
119 FILE *fptr;
120 long long lnum;
121 int	bflag, cflag, lflag, fflag, nflag, vflag, xflag, eflag, sflag;
122 int	hflag, iflag;
123 int	retcode = 0;
124 int	nfile;
125 long long blkno;
126 int	nsucc;
127 long long tln;
128 FILE	*wordf;
129 char	*argptr;
130 
131 void	execute(char *);
132 void	cgotofn(void);
133 void	overflo(void);
134 void	cfail(void);
135 
136 static long fw_lBufsiz = 0;
137 
138 int
139 main(int argc, char **argv)
140 {
141 	int c;
142 	int errflg = 0;
143 
144 	(void) setlocale(LC_ALL, "");
145 #if !defined(TEXT_DOMAIN)	/* Should be defined by cc -D */
146 #define	TEXT_DOMAIN "SYS_TEST"	/* Use this only if it weren't */
147 #endif
148 	(void) textdomain(TEXT_DOMAIN);
149 
150 	while ((c = getopt(argc, argv, "hybcie:f:lnvxs")) != EOF)
151 		switch (c) {
152 
153 		case 's':
154 			sflag++;
155 			continue;
156 		case 'h':
157 			hflag++;
158 			continue;
159 		case 'b':
160 			bflag++;
161 			continue;
162 
163 		case 'i':
164 		case 'y':
165 			iflag++;
166 			continue;
167 
168 		case 'c':
169 			cflag++;
170 			continue;
171 
172 		case 'e':
173 			eflag++;
174 			argptr = optarg;
175 			continue;
176 
177 		case 'f':
178 			fflag++;
179 			wordf = fopen(optarg, "r");
180 			if (wordf == NULL) {
181 				(void) fprintf(stderr,
182 					gettext("fgrep: can't open %s\n"),
183 					optarg);
184 				exit(2);
185 			}
186 			continue;
187 
188 		case 'l':
189 			lflag++;
190 			continue;
191 
192 		case 'n':
193 			nflag++;
194 			continue;
195 
196 		case 'v':
197 			vflag++;
198 			continue;
199 
200 		case 'x':
201 			xflag++;
202 			continue;
203 
204 		case '?':
205 			errflg++;
206 	}
207 
208 	argc -= optind;
209 	if (errflg || ((argc <= 0) && !fflag && !eflag)) {
210 		(void) printf(gettext("usage: fgrep [ -bchilnsvx ] "
211 			"[ -e exp ] [ -f file ] [ strings ] [ file ] ...\n"));
212 		exit(2);
213 	}
214 	if (!eflag && !fflag) {
215 		argptr = argv[optind];
216 		optind++;
217 		argc--;
218 	}
219 
220 	getwidth(&WW);
221 	if ((WIDTH1 == 0) && (WIDTH2 == 0) &&
222 		(WIDTH3 == 0)) {
223 		/*
224 		 * If non EUC-based locale,
225 		 * assume WIDTH1 is 1.
226 		 */
227 		WIDTH1 = 1;
228 	}
229 	WIDTH2++;
230 	WIDTH3++;
231 
232 	cgotofn();
233 	cfail();
234 	nfile = argc;
235 	argv = &argv[optind];
236 	if (argc <= 0) {
237 		execute((char *)NULL);
238 	} else
239 		while (--argc >= 0) {
240 			execute(*argv);
241 			argv++;
242 		}
243 	return (retcode != 0 ? retcode : nsucc == 0);
244 }
245 
246 void
247 execute(char *file)
248 {
249 	char *p;
250 	struct words *c;
251 	int ccount;
252 	static char *buf = NULL;
253 	int failed;
254 	char *nlp;
255 	wchar_t lc;
256 	int cw;
257 
258 	if (buf == NULL) {
259 		fw_lBufsiz = BUFSIZ;
260 		if ((buf = malloc(fw_lBufsiz + BUFSIZ)) == NULL) {
261 			exit(2); /* out of memory */
262 		}
263 	}
264 
265 	if (file) {
266 		if ((fptr = fopen(file, "r")) == NULL) {
267 			(void) fprintf(stderr,
268 				gettext("fgrep: can't open %s\n"), file);
269 			retcode = 2;
270 			return;
271 		}
272 	} else {
273 		file = "<stdin>";
274 		fptr = stdin;
275 	}
276 	ccount = 0;
277 	failed = 0;
278 	lnum = 1;
279 	tln = 0;
280 	blkno = 0;
281 	p = buf;
282 	nlp = p;
283 	c = w;
284 	for (;;) {
285 		if (c == 0)
286 			break;
287 		if (ccount <= 0) {
288 			if (p >= &buf[fw_lBufsiz + BUFSIZ]) {
289 				if (nlp == buf) {
290 					/* increase the buffer size */
291 					fw_lBufsiz += BUFSIZ;
292 					if ((buf = realloc(buf,
293 						fw_lBufsiz + BUFSIZ)) == NULL) {
294 						exit(2); /* out of memory */
295 					}
296 					nlp = buf;
297 					p = &buf[fw_lBufsiz];
298 				} else {
299 					/* shift the buffer down */
300 					(void) memmove(buf, nlp,
301 						&buf[fw_lBufsiz + BUFSIZ]
302 						- nlp);
303 					p -= nlp - buf;
304 					nlp = buf;
305 				}
306 
307 			}
308 			if (p > &buf[fw_lBufsiz]) {
309 				if ((ccount = fread(p, sizeof (char),
310 					&buf[fw_lBufsiz + BUFSIZ] - p, fptr))
311 					<= 0)
312 					break;
313 			} else if ((ccount = fread(p, sizeof (char),
314 				BUFSIZ, fptr)) <= 0)
315 				break;
316 			blkno += (long long)ccount;
317 		}
318 		GETONE(lc, p);
319 nstate:
320 		if (same(c->inp, lc)) {
321 			c = c->nst;
322 		} else if (c->link != 0) {
323 			c = c->link;
324 			goto nstate;
325 		} else {
326 			c = c->fail;
327 			failed = 1;
328 			if (c == 0) {
329 				c = w;
330 istate:
331 				if (same(c->inp, lc)) {
332 					c = c->nst;
333 				} else if (c->link != 0) {
334 					c = c->link;
335 					goto istate;
336 				}
337 			} else
338 				goto nstate;
339 		}
340 
341 		if (c == 0)
342 			break;
343 
344 		if (c->out) {
345 			while (lc != '\n') {
346 				if (ccount <= 0) {
347 if (p == &buf[fw_lBufsiz + BUFSIZ]) {
348 	if (nlp == buf) {
349 		/* increase buffer size */
350 		fw_lBufsiz += BUFSIZ;
351 		if ((buf = realloc(buf, fw_lBufsiz + BUFSIZ)) == NULL) {
352 			exit(2); /* out of memory */
353 		}
354 		nlp = buf;
355 		p = &buf[fw_lBufsiz];
356 	} else {
357 		/* shift buffer down */
358 		(void) memmove(buf, nlp, &buf[fw_lBufsiz + BUFSIZ] - nlp);
359 		p -= nlp - buf;
360 		nlp = buf;
361 	}
362 }
363 if (p > &buf[fw_lBufsiz]) {
364 	if ((ccount = fread(p, sizeof (char),
365 		&buf[fw_lBufsiz + BUFSIZ] - p, fptr)) <= 0) break;
366 	} else if ((ccount = fread(p, sizeof (char), BUFSIZ,
367 		fptr)) <= 0) break;
368 		blkno += (long long)ccount;
369 	}
370 	GETONE(lc, p);
371 }
372 			if ((vflag && (failed == 0 || xflag == 0)) ||
373 				(vflag == 0 && xflag && failed))
374 				goto nomatch;
375 succeed:
376 			nsucc = 1;
377 			if (cflag)
378 				tln++;
379 			else if (lflag && !sflag) {
380 				(void) printf("%s\n", file);
381 				(void) fclose(fptr);
382 				return;
383 			} else if (!sflag) {
384 				if (nfile > 1 && !hflag)
385 					(void) printf("%s:", file);
386 				if (bflag)
387 					(void) printf("%lld:",
388 						(blkno - (long long)(ccount-1))
389 						/ BUFSIZ);
390 				if (nflag)
391 					(void) printf("%lld:", lnum);
392 				if (p <= nlp) {
393 					while (nlp < &buf[fw_lBufsiz + BUFSIZ])
394 						(void) putchar(*nlp++);
395 					nlp = buf;
396 				}
397 				while (nlp < p)
398 					(void) putchar(*nlp++);
399 			}
400 nomatch:
401 			lnum++;
402 			nlp = p;
403 			c = w;
404 			failed = 0;
405 			continue;
406 		}
407 		if (lc == '\n')
408 			if (vflag)
409 				goto succeed;
410 			else {
411 				lnum++;
412 				nlp = p;
413 				c = w;
414 				failed = 0;
415 			}
416 	}
417 	(void) fclose(fptr);
418 	if (cflag) {
419 		if ((nfile > 1) && !hflag)
420 			(void) printf("%s:", file);
421 		(void) printf("%lld\n", tln);
422 	}
423 }
424 
425 
426 wchar_t
427 getargc(void)
428 {
429 	/* appends a newline to shell quoted argument list so */
430 	/* the list looks like it came from an ed style file  */
431 	wchar_t c;
432 	int cw;
433 	int b;
434 	static int endflg;
435 
436 
437 	if (wordf) {
438 		if ((b = getc(wordf)) == EOF)
439 			return (EOF);
440 		cw = ISASCII(c = (wchar_t)b) ? 1 :
441 			(ISSET2(c) ? WIDTH2 : (ISSET3(c) ? WIDTH3 : WIDTH1));
442 		while (--cw) {
443 			if ((b = getc(wordf)) == EOF)
444 				return (EOF);
445 			c = (c << 7) | (b & 0177);
446 		}
447 		return (iflag ? letter(c) : c);
448 	}
449 
450 	if (endflg)
451 		return (EOF);
452 
453 	{
454 		cw = ISASCII(c = (unsigned char)*argptr++) ? 1 :
455 			(ISSET2(c) ? WIDTH2 : (ISSET3(c) ? WIDTH3 : WIDTH1));
456 
457 		while (--cw)
458 			c = (c << 7) | ((*argptr++) & 0177);
459 		if (c == '\0') {
460 			endflg++;
461 			return ('\n');
462 		}
463 	}
464 	return (iflag ? letter(c) : c);
465 
466 
467 }
468 
469 void
470 cgotofn(void)
471 {
472 	int c;
473 	struct words *s;
474 
475 	s = smax = w;
476 nword:
477 	for (;;) {
478 		c = getargc();
479 		if (c == EOF)
480 			return;
481 		if (c == 0)
482 			goto enter;
483 		if (c == '\n') {
484 			if (xflag) {
485 				for (;;) {
486 					if (s->inp == c) {
487 						s = s->nst;
488 						break;
489 					}
490 					if (s->inp == 0)
491 						goto nenter;
492 					if (s->link == 0) {
493 						if (smax >= &w[MAXSIZ -1])
494 							overflo();
495 						s->link = ++smax;
496 						s = smax;
497 						goto nenter;
498 					}
499 					s = s->link;
500 				}
501 			}
502 			s->out = 1;
503 			s = w;
504 		} else {
505 loop:
506 			if (s->inp == c) {
507 				s = s->nst;
508 				continue;
509 			}
510 			if (s->inp == 0)
511 				goto enter;
512 			if (s->link == 0) {
513 				if (smax >= &w[MAXSIZ - 1])
514 					overflo();
515 				s->link = ++smax;
516 				s = smax;
517 				goto enter;
518 			}
519 			s = s->link;
520 			goto loop;
521 		}
522 	}
523 
524 enter:
525 	do {
526 		s->inp = c;
527 		if (smax >= &w[MAXSIZ - 1])
528 			overflo();
529 		s->nst = ++smax;
530 		s = smax;
531 	} while ((c = getargc()) != '\n' && c != EOF);
532 	if (xflag) {
533 nenter:
534 		s->inp = '\n';
535 		if (smax >= &w[MAXSIZ -1])
536 			overflo();
537 		s->nst = ++smax;
538 	}
539 	smax->out = 1;
540 	s = w;
541 	if (c != EOF)
542 		goto nword;
543 }
544 
545 void
546 overflo(void)
547 {
548 	(void) fprintf(stderr, gettext("wordlist too large\n"));
549 	exit(2);
550 }
551 
552 void
553 cfail(void)
554 {
555 	int qsize = QSIZE;
556 	struct words **queue = NULL;
557 
558 	/*
559 	 * front and rear are pointers used to traverse the global words
560 	 * structure "w" which contains the data of input pattern file
561 	 */
562 	struct words **front, **rear;
563 	struct words *state;
564 	unsigned long frontoffset = 0, rearoffset = 0;
565 	char c;
566 	struct words *s;
567 	s = w;
568 	if ((queue = (struct words **)calloc(qsize, sizeof (struct words *)))
569 				== NULL) {
570 		perror("fgrep");
571 		exit(2);
572 	}
573 	front = rear = queue;
574 init:
575 	if ((s->inp) != 0) {
576 		*rear++ = s->nst;
577 	/*
578 	 * Reallocates the queue if the number of distinct starting
579 	 * character of patterns exceeds the qsize value
580 	 */
581 		if (rear >= &queue[qsize - 1]) {
582 			frontoffset = front - queue;
583 			rearoffset = rear - queue;
584 			qsize += QSIZE;
585 			if ((queue = (struct words **)realloc(queue,
586 				qsize * sizeof (struct words *))) == NULL) {
587 				perror("fgrep");
588 				exit(2);
589 			}
590 			front = queue + frontoffset;
591 			rear = queue + rearoffset;
592 		}
593 	}
594 	if ((s = s->link) != 0) {
595 		goto init;
596 	}
597 
598 	while (rear != front) {
599 		s = *front++;
600 cloop:
601 		if ((c = s->inp) != 0) {
602 			*rear++ = (q = s->nst);
603 		/*
604 		 * Reallocate the queue if the rear pointer reaches the end
605 		 * queue
606 		 */
607 			if (rear >= &queue[qsize - 1]) {
608 				frontoffset = front - queue;
609 				rearoffset = rear - queue;
610 				qsize += QSIZE;
611 				if ((queue = (struct words **)realloc(queue,
612 				    qsize * sizeof (struct words *))) == NULL) {
613 					perror("fgrep");
614 					exit(2);
615 				}
616 				front = queue + frontoffset;
617 				rear = queue + rearoffset;
618 			}
619 			state = s->fail;
620 floop:
621 			if (state == 0)
622 				state = w;
623 			if (state->inp == c) {
624 qloop:
625 				q->fail = state->nst;
626 				if ((state->nst)->out == 1)
627 					q->out = 1;
628 				if ((q = q->link) != 0)
629 					goto qloop;
630 			} else if ((state = state->link) != 0)
631 				goto floop;
632 		}
633 		if ((s = s->link) != 0)
634 			goto cloop;
635 	}
636 }
637 
638 wchar_t
639 letter(wchar_t c)
640 {
641 	if (c >= 'a' && c <= 'z')
642 		return (c);
643 	if (c >= 'A' && c <= 'Z')
644 		return (c + 'a' - 'A');
645 	return (c);
646 }
647