xref: /titanic_52/usr/src/cmd/fgrep/fgrep.c (revision 10d63b7db37a83b39c7f511cf9426c9d03ea0760)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
28 /*	  All Rights Reserved  	*/
29 
30 /*	Copyright (c) 1987, 1988 Microsoft Corporation	*/
31 /*	  All Rights Reserved	*/
32 
33 /*
34  * Copyright 2013 Damian Bogel. All rights reserved.
35  */
36 
37 /*
38  * fgrep -- print all lines containing any of a set of keywords
39  *
40  *	status returns:
41  *		0 - ok, and some matches
42  *		1 - ok, but no matches
43  *		2 - some error
44  */
45 
46 #include <stdio.h>
47 #include <ctype.h>
48 #include <sys/types.h>
49 #include <stdlib.h>
50 #include <string.h>
51 #include <locale.h>
52 #include <libintl.h>
53 #include <euc.h>
54 #include <sys/stat.h>
55 #include <fcntl.h>
56 
57 #include <getwidth.h>
58 
59 eucwidth_t WW;
60 #define	WIDTH1	WW._eucw1
61 #define	WIDTH2	WW._eucw2
62 #define	WIDTH3	WW._eucw3
63 #define	MULTI_BYTE	WW._multibyte
64 #define	GETONE(lc, p) \
65 	cw = ISASCII(lc = (unsigned char)*p++) ? 1 :     \
66 		(ISSET2(lc) ? WIDTH2 :                       \
67 		(ISSET3(lc) ? WIDTH3 : WIDTH1));             \
68 	if (--cw > --ccount) {                           \
69 		cw -= ccount;                                \
70 		while (ccount--)                             \
71 			lc = (lc << 7) | ((*p++) & 0177);        \
72 			if (p >= &buf[fw_lBufsiz + BUFSIZ]) {    \
73 			if (nlp == buf) {                        \
74 				/* Increase the buffer size */       \
75 				fw_lBufsiz += BUFSIZ;                \
76 				if ((buf = realloc(buf,              \
77 					fw_lBufsiz + BUFSIZ)) == NULL) { \
78 					exit(2); /* out of memory */     \
79 				}                                    \
80 				nlp = buf;                           \
81 				p = &buf[fw_lBufsiz];                \
82 			} else {                                 \
83 				/* shift the buffer contents down */ \
84 				(void) memmove(buf, nlp,             \
85 					&buf[fw_lBufsiz + BUFSIZ] - nlp);\
86 				p -= nlp - buf;                      \
87 				nlp = buf;                           \
88 			}                                        \
89 		}                                            \
90 		if (p > &buf[fw_lBufsiz]) {                  \
91 			if ((ccount = fread(p, sizeof (char),    \
92 			    &buf[fw_lBufsiz + BUFSIZ] - p, fptr))\
93 				<= 0) break;                         \
94 		} else if ((ccount = fread(p,                \
95 			sizeof (char),  BUFSIZ, fptr)) <= 0)     \
96 			break;                                   \
97 		blkno += (long long)ccount;                  \
98 	}                                                \
99 	ccount -= cw;                                    \
100 	while (cw--)                                     \
101 		lc = (lc << 7) | ((*p++) & 0177)
102 
103 /*
104  * The same() macro and letter() function were inserted to allow for
105  * the -i option work for the multi-byte environment.
106  */
107 wchar_t letter();
108 #define	same(a, b) \
109 	(a == b || iflag && (!MULTI_BYTE || ISASCII(a)) && (a ^ b) == ' ' && \
110 	letter(a) == letter(b))
111 
112 #define	STDIN_FILENAME gettext("(standard input)")
113 
114 #define	QSIZE 400
115 struct words {
116 	wchar_t inp;
117 	char	out;
118 	struct	words *nst;
119 	struct	words *link;
120 	struct	words *fail;
121 } *w = NULL, *smax, *q;
122 
123 FILE *fptr;
124 long long lnum;
125 int	bflag, cflag, lflag, fflag, nflag, vflag, xflag, eflag, qflag;
126 int	Hflag, hflag, iflag;
127 int	retcode = 0;
128 int	nfile;
129 long long blkno;
130 int	nsucc;
131 long long tln;
132 FILE	*wordf;
133 char	*argptr;
134 off_t input_size = 0;
135 
136 void	execute(char *);
137 void	cgotofn(void);
138 void	overflo(void);
139 void	cfail(void);
140 
141 static long fw_lBufsiz = 0;
142 
143 int
144 main(int argc, char **argv)
145 {
146 	int c;
147 	int errflg = 0;
148 	struct stat file_stat;
149 
150 	(void) setlocale(LC_ALL, "");
151 #if !defined(TEXT_DOMAIN)	/* Should be defined by cc -D */
152 #define	TEXT_DOMAIN "SYS_TEST"	/* Use this only if it weren't */
153 #endif
154 	(void) textdomain(TEXT_DOMAIN);
155 
156 	while ((c = getopt(argc, argv, "Hhybcie:f:lnvxqs")) != EOF)
157 		switch (c) {
158 
159 		case 'q':
160 		case 's': /* Solaris: legacy option */
161 			qflag++;
162 			continue;
163 		case 'H':
164 			Hflag++;
165 			hflag = 0;
166 			continue;
167 		case 'h':
168 			hflag++;
169 			Hflag = 0;
170 			continue;
171 		case 'b':
172 			bflag++;
173 			continue;
174 
175 		case 'i':
176 		case 'y':
177 			iflag++;
178 			continue;
179 
180 		case 'c':
181 			cflag++;
182 			continue;
183 
184 		case 'e':
185 			eflag++;
186 			argptr = optarg;
187 			input_size = strlen(argptr);
188 			continue;
189 
190 		case 'f':
191 			fflag++;
192 			wordf = fopen(optarg, "r");
193 			if (wordf == NULL) {
194 				(void) fprintf(stderr,
195 				    gettext("fgrep: can't open %s\n"),
196 				    optarg);
197 				exit(2);
198 			}
199 
200 			if (fstat(fileno(wordf), &file_stat) == 0) {
201 				input_size = file_stat.st_size;
202 			} else {
203 				(void) fprintf(stderr,
204 				    gettext("fgrep: can't fstat %s\n"),
205 				    optarg);
206 				exit(2);
207 			}
208 
209 			continue;
210 
211 		case 'l':
212 			lflag++;
213 			continue;
214 
215 		case 'n':
216 			nflag++;
217 			continue;
218 
219 		case 'v':
220 			vflag++;
221 			continue;
222 
223 		case 'x':
224 			xflag++;
225 			continue;
226 
227 		case '?':
228 			errflg++;
229 	}
230 
231 	argc -= optind;
232 	if (errflg || ((argc <= 0) && !fflag && !eflag)) {
233 		(void) printf(gettext("usage: fgrep [ -bcHhilnqsvx ] "
234 		    "[ -e exp ] [ -f file ] [ strings ] [ file ] ...\n"));
235 		exit(2);
236 	}
237 	if (!eflag && !fflag) {
238 		argptr = argv[optind];
239 		input_size = strlen(argptr);
240 		input_size++;
241 		optind++;
242 		argc--;
243 	}
244 
245 /*
246  * Normally we need one struct words for each letter in the pattern
247  * plus one terminating struct words with outp = 1, but when -x option
248  * is specified we require one more struct words for `\n` character so we
249  * calculate the input_size as below. We add extra 1 because
250  * (input_size/2) rounds off odd numbers
251  */
252 
253 	if (xflag) {
254 		input_size = input_size + (input_size/2) + 1;
255 	}
256 
257 	input_size++;
258 
259 	w = (struct words *)calloc(input_size, sizeof (struct words));
260 	if (w == NULL) {
261 		(void) fprintf(stderr,
262 		    gettext("fgrep: could not allocate "
263 		    "memory for wordlist\n"));
264 		exit(2);
265 	}
266 
267 	getwidth(&WW);
268 	if ((WIDTH1 == 0) && (WIDTH2 == 0) &&
269 	    (WIDTH3 == 0)) {
270 		/*
271 		 * If non EUC-based locale,
272 		 * assume WIDTH1 is 1.
273 		 */
274 		WIDTH1 = 1;
275 	}
276 	WIDTH2++;
277 	WIDTH3++;
278 
279 	cgotofn();
280 	cfail();
281 	nfile = argc;
282 	argv = &argv[optind];
283 	if (argc <= 0) {
284 		execute((char *)NULL);
285 	} else
286 		while (--argc >= 0) {
287 			execute(*argv);
288 			argv++;
289 		}
290 
291 	if (w != NULL) {
292 		free(w);
293 	}
294 
295 	return (retcode != 0 ? retcode : nsucc == 0);
296 }
297 
298 void
299 execute(char *file)
300 {
301 	char *p;
302 	struct words *c;
303 	int ccount;
304 	static char *buf = NULL;
305 	int failed;
306 	char *nlp;
307 	wchar_t lc;
308 	int cw;
309 
310 	if (buf == NULL) {
311 		fw_lBufsiz = BUFSIZ;
312 		if ((buf = malloc(fw_lBufsiz + BUFSIZ)) == NULL) {
313 			exit(2); /* out of memory */
314 		}
315 	}
316 
317 	if (file) {
318 		if ((fptr = fopen(file, "r")) == NULL) {
319 			(void) fprintf(stderr,
320 			    gettext("fgrep: can't open %s\n"), file);
321 			retcode = 2;
322 			return;
323 		}
324 	} else {
325 		fptr = stdin;
326 		file = STDIN_FILENAME;
327 	}
328 	ccount = 0;
329 	failed = 0;
330 	lnum = 1;
331 	tln = 0;
332 	blkno = 0;
333 	p = buf;
334 	nlp = p;
335 	c = w;
336 	for (;;) {
337 		if (c == 0)
338 			break;
339 		if (ccount <= 0) {
340 			if (p >= &buf[fw_lBufsiz + BUFSIZ]) {
341 				if (nlp == buf) {
342 					/* increase the buffer size */
343 					fw_lBufsiz += BUFSIZ;
344 					if ((buf = realloc(buf,
345 					    fw_lBufsiz + BUFSIZ)) == NULL) {
346 						exit(2); /* out of memory */
347 					}
348 					nlp = buf;
349 					p = &buf[fw_lBufsiz];
350 				} else {
351 					/* shift the buffer down */
352 					(void) memmove(buf, nlp,
353 					    &buf[fw_lBufsiz + BUFSIZ]
354 					    - nlp);
355 					p -= nlp - buf;
356 					nlp = buf;
357 				}
358 
359 			}
360 			if (p > &buf[fw_lBufsiz]) {
361 				if ((ccount = fread(p, sizeof (char),
362 				    &buf[fw_lBufsiz + BUFSIZ] - p, fptr))
363 				    <= 0)
364 					break;
365 			} else if ((ccount = fread(p, sizeof (char),
366 			    BUFSIZ, fptr)) <= 0)
367 				break;
368 			blkno += (long long)ccount;
369 		}
370 		GETONE(lc, p);
371 nstate:
372 		if (same(c->inp, lc)) {
373 			c = c->nst;
374 		} else if (c->link != 0) {
375 			c = c->link;
376 			goto nstate;
377 		} else {
378 			c = c->fail;
379 			failed = 1;
380 			if (c == 0) {
381 				c = w;
382 istate:
383 				if (same(c->inp, lc)) {
384 					c = c->nst;
385 				} else if (c->link != 0) {
386 					c = c->link;
387 					goto istate;
388 				}
389 			} else
390 				goto nstate;
391 		}
392 
393 		if (c == 0)
394 			break;
395 
396 		if (c->out) {
397 			while (lc != '\n') {
398 				if (ccount <= 0) {
399 if (p == &buf[fw_lBufsiz + BUFSIZ]) {
400 	if (nlp == buf) {
401 		/* increase buffer size */
402 		fw_lBufsiz += BUFSIZ;
403 		if ((buf = realloc(buf, fw_lBufsiz + BUFSIZ)) == NULL) {
404 			exit(2); /* out of memory */
405 		}
406 		nlp = buf;
407 		p = &buf[fw_lBufsiz];
408 	} else {
409 		/* shift buffer down */
410 		(void) memmove(buf, nlp, &buf[fw_lBufsiz + BUFSIZ] - nlp);
411 		p -= nlp - buf;
412 		nlp = buf;
413 	}
414 }
415 if (p > &buf[fw_lBufsiz]) {
416 	if ((ccount = fread(p, sizeof (char),
417 		&buf[fw_lBufsiz + BUFSIZ] - p, fptr)) <= 0) break;
418 	} else if ((ccount = fread(p, sizeof (char), BUFSIZ,
419 		fptr)) <= 0) break;
420 		blkno += (long long)ccount;
421 	}
422 	GETONE(lc, p);
423 }
424 			if ((vflag && (failed == 0 || xflag == 0)) ||
425 				(vflag == 0 && xflag && failed))
426 				goto nomatch;
427 succeed:
428 			nsucc = 1;
429 			if (lflag || qflag) {
430 				if (!qflag)
431 					(void) printf("%s\n", file);
432 				(void) fclose(fptr);
433 				return;
434 			}
435 			if (cflag) {
436 				tln++;
437 			} else {
438 				if (Hflag || (nfile > 1 && !hflag))
439 					(void) printf("%s:", file);
440 				if (bflag)
441 					(void) printf("%lld:",
442 						(blkno - (long long)(ccount-1))
443 						/ BUFSIZ);
444 				if (nflag)
445 					(void) printf("%lld:", lnum);
446 				if (p <= nlp) {
447 					while (nlp < &buf[fw_lBufsiz + BUFSIZ])
448 						(void) putchar(*nlp++);
449 					nlp = buf;
450 				}
451 				while (nlp < p)
452 					(void) putchar(*nlp++);
453 			}
454 nomatch:
455 			lnum++;
456 			nlp = p;
457 			c = w;
458 			failed = 0;
459 			continue;
460 		}
461 		if (lc == '\n')
462 			if (vflag)
463 				goto succeed;
464 			else {
465 				lnum++;
466 				nlp = p;
467 				c = w;
468 				failed = 0;
469 			}
470 	}
471 	(void) fclose(fptr);
472 	if (cflag && !qflag) {
473 		if (Hflag || (nfile > 1 && !hflag))
474 			(void) printf("%s:", file);
475 		(void) printf("%lld\n", tln);
476 	}
477 }
478 
479 
480 wchar_t
481 getargc(void)
482 {
483 	/* appends a newline to shell quoted argument list so */
484 	/* the list looks like it came from an ed style file  */
485 	wchar_t c;
486 	int cw;
487 	int b;
488 	static int endflg;
489 
490 
491 	if (wordf) {
492 		if ((b = getc(wordf)) == EOF)
493 			return (EOF);
494 		cw = ISASCII(c = (wchar_t)b) ? 1 :
495 		    (ISSET2(c) ? WIDTH2 : (ISSET3(c) ? WIDTH3 : WIDTH1));
496 		while (--cw) {
497 			if ((b = getc(wordf)) == EOF)
498 				return (EOF);
499 			c = (c << 7) | (b & 0177);
500 		}
501 		return (iflag ? letter(c) : c);
502 	}
503 
504 	if (endflg)
505 		return (EOF);
506 
507 	{
508 		cw = ISASCII(c = (unsigned char)*argptr++) ? 1 :
509 		    (ISSET2(c) ? WIDTH2 : (ISSET3(c) ? WIDTH3 : WIDTH1));
510 
511 		while (--cw)
512 			c = (c << 7) | ((*argptr++) & 0177);
513 		if (c == '\0') {
514 			endflg++;
515 			return ('\n');
516 		}
517 	}
518 	return (iflag ? letter(c) : c);
519 
520 
521 }
522 
523 void
524 cgotofn(void)
525 {
526 	int c;
527 	struct words *s;
528 
529 	s = smax = w;
530 nword:
531 	for (;;) {
532 		c = getargc();
533 		if (c == EOF)
534 			return;
535 		if (c == 0)
536 			goto enter;
537 		if (c == '\n') {
538 			if (xflag) {
539 				for (;;) {
540 					if (s->inp == c) {
541 						s = s->nst;
542 						break;
543 					}
544 					if (s->inp == 0)
545 						goto nenter;
546 					if (s->link == 0) {
547 						if (smax >= &w[input_size -1])
548 							overflo();
549 						s->link = ++smax;
550 						s = smax;
551 						goto nenter;
552 					}
553 					s = s->link;
554 				}
555 			}
556 			s->out = 1;
557 			s = w;
558 		} else {
559 loop:
560 			if (s->inp == c) {
561 				s = s->nst;
562 				continue;
563 			}
564 			if (s->inp == 0)
565 				goto enter;
566 			if (s->link == 0) {
567 				if (smax >= &w[input_size -1])
568 					overflo();
569 				s->link = ++smax;
570 				s = smax;
571 				goto enter;
572 			}
573 			s = s->link;
574 			goto loop;
575 		}
576 	}
577 
578 enter:
579 	do {
580 		s->inp = c;
581 		if (smax >= &w[input_size -1])
582 			overflo();
583 		s->nst = ++smax;
584 		s = smax;
585 	} while ((c = getargc()) != '\n' && c != EOF);
586 	if (xflag) {
587 nenter:
588 		s->inp = '\n';
589 		if (smax >= &w[input_size -1])
590 			overflo();
591 		s->nst = ++smax;
592 	}
593 	smax->out = 1;
594 	s = w;
595 	if (c != EOF)
596 		goto nword;
597 }
598 
599 /*
600  * This function is an unexpected condition, since input_size should have been
601  * calculated correctly before hand.
602  */
603 
604 void
605 overflo(void)
606 {
607 	(void) fprintf(stderr, gettext("fgrep: wordlist too large\n"));
608 	exit(2);
609 }
610 
611 void
612 cfail(void)
613 {
614 	int qsize = QSIZE;
615 	struct words **queue = NULL;
616 
617 	/*
618 	 * front and rear are pointers used to traverse the global words
619 	 * structure "w" which contains the data of input pattern file
620 	 */
621 	struct words **front, **rear;
622 	struct words *state;
623 	unsigned long frontoffset = 0, rearoffset = 0;
624 	char c;
625 	struct words *s;
626 	s = w;
627 	if ((queue = (struct words **)calloc(qsize, sizeof (struct words *)))
628 	    == NULL) {
629 		perror("fgrep");
630 		exit(2);
631 	}
632 	front = rear = queue;
633 init:
634 	if ((s->inp) != 0) {
635 		*rear++ = s->nst;
636 	/*
637 	 * Reallocates the queue if the number of distinct starting
638 	 * character of patterns exceeds the qsize value
639 	 */
640 		if (rear >= &queue[qsize - 1]) {
641 			frontoffset = front - queue;
642 			rearoffset = rear - queue;
643 			qsize += QSIZE;
644 			if ((queue = (struct words **)realloc(queue,
645 				qsize * sizeof (struct words *))) == NULL) {
646 				perror("fgrep");
647 				exit(2);
648 			}
649 			front = queue + frontoffset;
650 			rear = queue + rearoffset;
651 		}
652 	}
653 	if ((s = s->link) != 0) {
654 		goto init;
655 	}
656 
657 	while (rear != front) {
658 		s = *front++;
659 cloop:
660 		if ((c = s->inp) != 0) {
661 			*rear++ = (q = s->nst);
662 		/*
663 		 * Reallocate the queue if the rear pointer reaches the end
664 		 * queue
665 		 */
666 			if (rear >= &queue[qsize - 1]) {
667 				frontoffset = front - queue;
668 				rearoffset = rear - queue;
669 				qsize += QSIZE;
670 				if ((queue = (struct words **)realloc(queue,
671 				    qsize * sizeof (struct words *))) == NULL) {
672 					perror("fgrep");
673 					exit(2);
674 				}
675 				front = queue + frontoffset;
676 				rear = queue + rearoffset;
677 			}
678 			state = s->fail;
679 floop:
680 			if (state == 0)
681 				state = w;
682 			if (state->inp == c) {
683 qloop:
684 				q->fail = state->nst;
685 				if ((state->nst)->out == 1)
686 					q->out = 1;
687 				if ((q = q->link) != 0)
688 					goto qloop;
689 			} else if ((state = state->link) != 0)
690 				goto floop;
691 		}
692 		if ((s = s->link) != 0)
693 			goto cloop;
694 	}
695 }
696 
697 wchar_t
698 letter(wchar_t c)
699 {
700 	if (c >= 'a' && c <= 'z')
701 		return (c);
702 	if (c >= 'A' && c <= 'Z')
703 		return (c + 'a' - 'A');
704 	return (c);
705 }
706