xref: /titanic_51/usr/src/cmd/fgrep/fgrep.c (revision 1901763758d1e410cf3b2040acae1a01781765ce)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
28 /*	  All Rights Reserved  	*/
29 
30 /*	Copyright (c) 1987, 1988 Microsoft Corporation	*/
31 /*	  All Rights Reserved	*/
32 
33 #pragma ident	"%Z%%M%	%I%	%E% SMI"
34 
35 /*
36  * fgrep -- print all lines containing any of a set of keywords
37  *
38  *	status returns:
39  *		0 - ok, and some matches
40  *		1 - ok, but no matches
41  *		2 - some error
42  */
43 
44 #include <stdio.h>
45 #include <ctype.h>
46 #include <sys/types.h>
47 #include <stdlib.h>
48 #include <string.h>
49 #include <locale.h>
50 #include <libintl.h>
51 #include <euc.h>
52 #include <sys/stat.h>
53 #include <fcntl.h>
54 
55 #include <getwidth.h>
56 
57 eucwidth_t WW;
58 #define	WIDTH1	WW._eucw1
59 #define	WIDTH2	WW._eucw2
60 #define	WIDTH3	WW._eucw3
61 #define	MULTI_BYTE	WW._multibyte
62 #define	GETONE(lc, p) \
63 	cw = ISASCII(lc = (unsigned char)*p++) ? 1 :     \
64 		(ISSET2(lc) ? WIDTH2 :                       \
65 		(ISSET3(lc) ? WIDTH3 : WIDTH1));             \
66 	if (--cw > --ccount) {                           \
67 		cw -= ccount;                                \
68 		while (ccount--)                             \
69 			lc = (lc << 7) | ((*p++) & 0177);        \
70 			if (p >= &buf[fw_lBufsiz + BUFSIZ]) {    \
71 			if (nlp == buf) {                        \
72 				/* Increase the buffer size */       \
73 				fw_lBufsiz += BUFSIZ;                \
74 				if ((buf = realloc(buf,              \
75 					fw_lBufsiz + BUFSIZ)) == NULL) { \
76 					exit(2); /* out of memory */     \
77 				}                                    \
78 				nlp = buf;                           \
79 				p = &buf[fw_lBufsiz];                \
80 			} else {                                 \
81 				/* shift the buffer contents down */ \
82 				(void) memmove(buf, nlp,             \
83 					&buf[fw_lBufsiz + BUFSIZ] - nlp);\
84 				p -= nlp - buf;                      \
85 				nlp = buf;                           \
86 			}                                        \
87 		}                                            \
88 		if (p > &buf[fw_lBufsiz]) {                  \
89 			if ((ccount = fread(p, sizeof (char),    \
90 			    &buf[fw_lBufsiz + BUFSIZ] - p, fptr))\
91 				<= 0) break;                         \
92 		} else if ((ccount = fread(p,                \
93 			sizeof (char),  BUFSIZ, fptr)) <= 0)     \
94 			break;                                   \
95 		blkno += (long long)ccount;                  \
96 	}                                                \
97 	ccount -= cw;                                    \
98 	while (cw--)                                     \
99 		lc = (lc << 7) | ((*p++) & 0177)
100 
101 /*
102  * The same() macro and letter() function were inserted to allow for
103  * the -i option work for the multi-byte environment.
104  */
105 wchar_t letter();
106 #define	same(a, b) \
107 	(a == b || iflag && (!MULTI_BYTE || ISASCII(a)) && (a ^ b) == ' ' && \
108 	letter(a) == letter(b))
109 
110 
111 #define	QSIZE 400
112 struct words {
113 	wchar_t inp;
114 	char	out;
115 	struct	words *nst;
116 	struct	words *link;
117 	struct	words *fail;
118 } *w = NULL, *smax, *q;
119 
120 FILE *fptr;
121 long long lnum;
122 int	bflag, cflag, lflag, fflag, nflag, vflag, xflag, eflag, sflag;
123 int	hflag, iflag;
124 int	retcode = 0;
125 int	nfile;
126 long long blkno;
127 int	nsucc;
128 long long tln;
129 FILE	*wordf;
130 char	*argptr;
131 off_t input_size = 0;
132 
133 void	execute(char *);
134 void	cgotofn(void);
135 void	overflo(void);
136 void	cfail(void);
137 
138 static long fw_lBufsiz = 0;
139 
140 int
141 main(int argc, char **argv)
142 {
143 	int c;
144 	int errflg = 0;
145 	struct stat file_stat;
146 
147 	(void) setlocale(LC_ALL, "");
148 #if !defined(TEXT_DOMAIN)	/* Should be defined by cc -D */
149 #define	TEXT_DOMAIN "SYS_TEST"	/* Use this only if it weren't */
150 #endif
151 	(void) textdomain(TEXT_DOMAIN);
152 
153 	while ((c = getopt(argc, argv, "hybcie:f:lnvxs")) != EOF)
154 		switch (c) {
155 
156 		case 's':
157 			sflag++;
158 			continue;
159 		case 'h':
160 			hflag++;
161 			continue;
162 		case 'b':
163 			bflag++;
164 			continue;
165 
166 		case 'i':
167 		case 'y':
168 			iflag++;
169 			continue;
170 
171 		case 'c':
172 			cflag++;
173 			continue;
174 
175 		case 'e':
176 			eflag++;
177 			argptr = optarg;
178 			input_size = strlen(argptr);
179 			continue;
180 
181 		case 'f':
182 			fflag++;
183 			wordf = fopen(optarg, "r");
184 			if (wordf == NULL) {
185 				(void) fprintf(stderr,
186 					gettext("fgrep: can't open %s\n"),
187 					optarg);
188 				exit(2);
189 			}
190 
191 			if (fstat(fileno(wordf), &file_stat) == 0) {
192 			    input_size = file_stat.st_size;
193 			} else {
194 				(void) fprintf(stderr,
195 					gettext("fgrep: can't fstat %s\n"),
196 					optarg);
197 				exit(2);
198 			}
199 
200 			continue;
201 
202 		case 'l':
203 			lflag++;
204 			continue;
205 
206 		case 'n':
207 			nflag++;
208 			continue;
209 
210 		case 'v':
211 			vflag++;
212 			continue;
213 
214 		case 'x':
215 			xflag++;
216 			continue;
217 
218 		case '?':
219 			errflg++;
220 	}
221 
222 	argc -= optind;
223 	if (errflg || ((argc <= 0) && !fflag && !eflag)) {
224 		(void) printf(gettext("usage: fgrep [ -bchilnsvx ] "
225 			"[ -e exp ] [ -f file ] [ strings ] [ file ] ...\n"));
226 		exit(2);
227 	}
228 	if (!eflag && !fflag) {
229 		argptr = argv[optind];
230 		input_size = strlen(argptr);
231 		input_size++;
232 		optind++;
233 		argc--;
234 	}
235 
236 /*
237  * Normally we need one struct words for each letter in the pattern
238  * plus one terminating struct words with outp = 1, but when -x option
239  * is specified we require one more struct words for `\n` character so we
240  * calculate the input_size as below. We add extra 1 because
241  * (input_size/2) rounds off odd numbers
242  */
243 
244 	if (xflag) {
245 		input_size = input_size + (input_size/2) + 1;
246 	}
247 
248 	input_size++;
249 
250 	w = (struct words *)calloc(input_size, sizeof (struct words));
251 	if (w == NULL) {
252 		(void) fprintf(stderr,
253 			gettext("fgrep: could not allocate "
254 				"memory for wordlist\n"));
255 		exit(2);
256 	}
257 
258 	getwidth(&WW);
259 	if ((WIDTH1 == 0) && (WIDTH2 == 0) &&
260 		(WIDTH3 == 0)) {
261 		/*
262 		 * If non EUC-based locale,
263 		 * assume WIDTH1 is 1.
264 		 */
265 		WIDTH1 = 1;
266 	}
267 	WIDTH2++;
268 	WIDTH3++;
269 
270 	cgotofn();
271 	cfail();
272 	nfile = argc;
273 	argv = &argv[optind];
274 	if (argc <= 0) {
275 		execute((char *)NULL);
276 	} else
277 		while (--argc >= 0) {
278 			execute(*argv);
279 			argv++;
280 		}
281 
282 	if (w != NULL) {
283 		free(w);
284 	}
285 
286 	return (retcode != 0 ? retcode : nsucc == 0);
287 }
288 
289 void
290 execute(char *file)
291 {
292 	char *p;
293 	struct words *c;
294 	int ccount;
295 	static char *buf = NULL;
296 	int failed;
297 	char *nlp;
298 	wchar_t lc;
299 	int cw;
300 
301 	if (buf == NULL) {
302 		fw_lBufsiz = BUFSIZ;
303 		if ((buf = malloc(fw_lBufsiz + BUFSIZ)) == NULL) {
304 			exit(2); /* out of memory */
305 		}
306 	}
307 
308 	if (file) {
309 		if ((fptr = fopen(file, "r")) == NULL) {
310 			(void) fprintf(stderr,
311 				gettext("fgrep: can't open %s\n"), file);
312 			retcode = 2;
313 			return;
314 		}
315 	} else {
316 		file = "<stdin>";
317 		fptr = stdin;
318 	}
319 	ccount = 0;
320 	failed = 0;
321 	lnum = 1;
322 	tln = 0;
323 	blkno = 0;
324 	p = buf;
325 	nlp = p;
326 	c = w;
327 	for (;;) {
328 		if (c == 0)
329 			break;
330 		if (ccount <= 0) {
331 			if (p >= &buf[fw_lBufsiz + BUFSIZ]) {
332 				if (nlp == buf) {
333 					/* increase the buffer size */
334 					fw_lBufsiz += BUFSIZ;
335 					if ((buf = realloc(buf,
336 						fw_lBufsiz + BUFSIZ)) == NULL) {
337 						exit(2); /* out of memory */
338 					}
339 					nlp = buf;
340 					p = &buf[fw_lBufsiz];
341 				} else {
342 					/* shift the buffer down */
343 					(void) memmove(buf, nlp,
344 						&buf[fw_lBufsiz + BUFSIZ]
345 						- nlp);
346 					p -= nlp - buf;
347 					nlp = buf;
348 				}
349 
350 			}
351 			if (p > &buf[fw_lBufsiz]) {
352 				if ((ccount = fread(p, sizeof (char),
353 					&buf[fw_lBufsiz + BUFSIZ] - p, fptr))
354 					<= 0)
355 					break;
356 			} else if ((ccount = fread(p, sizeof (char),
357 				BUFSIZ, fptr)) <= 0)
358 				break;
359 			blkno += (long long)ccount;
360 		}
361 		GETONE(lc, p);
362 nstate:
363 		if (same(c->inp, lc)) {
364 			c = c->nst;
365 		} else if (c->link != 0) {
366 			c = c->link;
367 			goto nstate;
368 		} else {
369 			c = c->fail;
370 			failed = 1;
371 			if (c == 0) {
372 				c = w;
373 istate:
374 				if (same(c->inp, lc)) {
375 					c = c->nst;
376 				} else if (c->link != 0) {
377 					c = c->link;
378 					goto istate;
379 				}
380 			} else
381 				goto nstate;
382 		}
383 
384 		if (c == 0)
385 			break;
386 
387 		if (c->out) {
388 			while (lc != '\n') {
389 				if (ccount <= 0) {
390 if (p == &buf[fw_lBufsiz + BUFSIZ]) {
391 	if (nlp == buf) {
392 		/* increase buffer size */
393 		fw_lBufsiz += BUFSIZ;
394 		if ((buf = realloc(buf, fw_lBufsiz + BUFSIZ)) == NULL) {
395 			exit(2); /* out of memory */
396 		}
397 		nlp = buf;
398 		p = &buf[fw_lBufsiz];
399 	} else {
400 		/* shift buffer down */
401 		(void) memmove(buf, nlp, &buf[fw_lBufsiz + BUFSIZ] - nlp);
402 		p -= nlp - buf;
403 		nlp = buf;
404 	}
405 }
406 if (p > &buf[fw_lBufsiz]) {
407 	if ((ccount = fread(p, sizeof (char),
408 		&buf[fw_lBufsiz + BUFSIZ] - p, fptr)) <= 0) break;
409 	} else if ((ccount = fread(p, sizeof (char), BUFSIZ,
410 		fptr)) <= 0) break;
411 		blkno += (long long)ccount;
412 	}
413 	GETONE(lc, p);
414 }
415 			if ((vflag && (failed == 0 || xflag == 0)) ||
416 				(vflag == 0 && xflag && failed))
417 				goto nomatch;
418 succeed:
419 			nsucc = 1;
420 			if (cflag)
421 				tln++;
422 			else if (lflag && !sflag) {
423 				(void) printf("%s\n", file);
424 				(void) fclose(fptr);
425 				return;
426 			} else if (!sflag) {
427 				if (nfile > 1 && !hflag)
428 					(void) printf("%s:", file);
429 				if (bflag)
430 					(void) printf("%lld:",
431 						(blkno - (long long)(ccount-1))
432 						/ BUFSIZ);
433 				if (nflag)
434 					(void) printf("%lld:", lnum);
435 				if (p <= nlp) {
436 					while (nlp < &buf[fw_lBufsiz + BUFSIZ])
437 						(void) putchar(*nlp++);
438 					nlp = buf;
439 				}
440 				while (nlp < p)
441 					(void) putchar(*nlp++);
442 			}
443 nomatch:
444 			lnum++;
445 			nlp = p;
446 			c = w;
447 			failed = 0;
448 			continue;
449 		}
450 		if (lc == '\n')
451 			if (vflag)
452 				goto succeed;
453 			else {
454 				lnum++;
455 				nlp = p;
456 				c = w;
457 				failed = 0;
458 			}
459 	}
460 	(void) fclose(fptr);
461 	if (cflag) {
462 		if ((nfile > 1) && !hflag)
463 			(void) printf("%s:", file);
464 		(void) printf("%lld\n", tln);
465 	}
466 }
467 
468 
469 wchar_t
470 getargc(void)
471 {
472 	/* appends a newline to shell quoted argument list so */
473 	/* the list looks like it came from an ed style file  */
474 	wchar_t c;
475 	int cw;
476 	int b;
477 	static int endflg;
478 
479 
480 	if (wordf) {
481 		if ((b = getc(wordf)) == EOF)
482 			return (EOF);
483 		cw = ISASCII(c = (wchar_t)b) ? 1 :
484 			(ISSET2(c) ? WIDTH2 : (ISSET3(c) ? WIDTH3 : WIDTH1));
485 		while (--cw) {
486 			if ((b = getc(wordf)) == EOF)
487 				return (EOF);
488 			c = (c << 7) | (b & 0177);
489 		}
490 		return (iflag ? letter(c) : c);
491 	}
492 
493 	if (endflg)
494 		return (EOF);
495 
496 	{
497 		cw = ISASCII(c = (unsigned char)*argptr++) ? 1 :
498 			(ISSET2(c) ? WIDTH2 : (ISSET3(c) ? WIDTH3 : WIDTH1));
499 
500 		while (--cw)
501 			c = (c << 7) | ((*argptr++) & 0177);
502 		if (c == '\0') {
503 			endflg++;
504 			return ('\n');
505 		}
506 	}
507 	return (iflag ? letter(c) : c);
508 
509 
510 }
511 
512 void
513 cgotofn(void)
514 {
515 	int c;
516 	struct words *s;
517 
518 	s = smax = w;
519 nword:
520 	for (;;) {
521 		c = getargc();
522 		if (c == EOF)
523 			return;
524 		if (c == 0)
525 			goto enter;
526 		if (c == '\n') {
527 			if (xflag) {
528 				for (;;) {
529 					if (s->inp == c) {
530 						s = s->nst;
531 						break;
532 					}
533 					if (s->inp == 0)
534 						goto nenter;
535 					if (s->link == 0) {
536 						if (smax >= &w[input_size -1])
537 							overflo();
538 						s->link = ++smax;
539 						s = smax;
540 						goto nenter;
541 					}
542 					s = s->link;
543 				}
544 			}
545 			s->out = 1;
546 			s = w;
547 		} else {
548 loop:
549 			if (s->inp == c) {
550 				s = s->nst;
551 				continue;
552 			}
553 			if (s->inp == 0)
554 				goto enter;
555 			if (s->link == 0) {
556 				if (smax >= &w[input_size -1])
557 					overflo();
558 				s->link = ++smax;
559 				s = smax;
560 				goto enter;
561 			}
562 			s = s->link;
563 			goto loop;
564 		}
565 	}
566 
567 enter:
568 	do {
569 		s->inp = c;
570 		if (smax >= &w[input_size -1])
571 			overflo();
572 		s->nst = ++smax;
573 		s = smax;
574 	} while ((c = getargc()) != '\n' && c != EOF);
575 	if (xflag) {
576 nenter:
577 		s->inp = '\n';
578 		if (smax >= &w[input_size -1])
579 			overflo();
580 		s->nst = ++smax;
581 	}
582 	smax->out = 1;
583 	s = w;
584 	if (c != EOF)
585 		goto nword;
586 }
587 
588 /*
589  * This function is an unexpected condition, since input_size should have been
590  * calculated correctly before hand.
591  */
592 
593 void
594 overflo(void)
595 {
596 	(void) fprintf(stderr, gettext("fgrep: wordlist too large\n"));
597 	exit(2);
598 }
599 
600 void
601 cfail(void)
602 {
603 	int qsize = QSIZE;
604 	struct words **queue = NULL;
605 
606 	/*
607 	 * front and rear are pointers used to traverse the global words
608 	 * structure "w" which contains the data of input pattern file
609 	 */
610 	struct words **front, **rear;
611 	struct words *state;
612 	unsigned long frontoffset = 0, rearoffset = 0;
613 	char c;
614 	struct words *s;
615 	s = w;
616 	if ((queue = (struct words **)calloc(qsize, sizeof (struct words *)))
617 				== NULL) {
618 		perror("fgrep");
619 		exit(2);
620 	}
621 	front = rear = queue;
622 init:
623 	if ((s->inp) != 0) {
624 		*rear++ = s->nst;
625 	/*
626 	 * Reallocates the queue if the number of distinct starting
627 	 * character of patterns exceeds the qsize value
628 	 */
629 		if (rear >= &queue[qsize - 1]) {
630 			frontoffset = front - queue;
631 			rearoffset = rear - queue;
632 			qsize += QSIZE;
633 			if ((queue = (struct words **)realloc(queue,
634 				qsize * sizeof (struct words *))) == NULL) {
635 				perror("fgrep");
636 				exit(2);
637 			}
638 			front = queue + frontoffset;
639 			rear = queue + rearoffset;
640 		}
641 	}
642 	if ((s = s->link) != 0) {
643 		goto init;
644 	}
645 
646 	while (rear != front) {
647 		s = *front++;
648 cloop:
649 		if ((c = s->inp) != 0) {
650 			*rear++ = (q = s->nst);
651 		/*
652 		 * Reallocate the queue if the rear pointer reaches the end
653 		 * queue
654 		 */
655 			if (rear >= &queue[qsize - 1]) {
656 				frontoffset = front - queue;
657 				rearoffset = rear - queue;
658 				qsize += QSIZE;
659 				if ((queue = (struct words **)realloc(queue,
660 				    qsize * sizeof (struct words *))) == NULL) {
661 					perror("fgrep");
662 					exit(2);
663 				}
664 				front = queue + frontoffset;
665 				rear = queue + rearoffset;
666 			}
667 			state = s->fail;
668 floop:
669 			if (state == 0)
670 				state = w;
671 			if (state->inp == c) {
672 qloop:
673 				q->fail = state->nst;
674 				if ((state->nst)->out == 1)
675 					q->out = 1;
676 				if ((q = q->link) != 0)
677 					goto qloop;
678 			} else if ((state = state->link) != 0)
679 				goto floop;
680 		}
681 		if ((s = s->link) != 0)
682 			goto cloop;
683 	}
684 }
685 
686 wchar_t
687 letter(wchar_t c)
688 {
689 	if (c >= 'a' && c <= 'z')
690 		return (c);
691 	if (c >= 'A' && c <= 'Z')
692 		return (c + 'a' - 'A');
693 	return (c);
694 }
695