xref: /freebsd/usr.bin/ident/ident.c (revision 7fdf597e96a02165cfe22ff357b857d5fa15ed8a)
1 /*-
2  * Copyright (c) 2015-2021 Baptiste Daroussin <bapt@FreeBSD.org>
3  * Copyright (c) 2015 Xin LI <delphij@FreeBSD.org>
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer
10  *    in this position and unchanged.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18  * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
19  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 
27 #include <sys/capsicum.h>
28 #include <sys/types.h>
29 #include <sys/sbuf.h>
30 
31 #include <capsicum_helpers.h>
32 #include <ctype.h>
33 #include <err.h>
34 #include <errno.h>
35 #include <stdbool.h>
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <string.h>
39 #include <unistd.h>
40 #include <xlocale.h>
41 
42 typedef enum {
43 	/* state	condition to transit to next state */
44 	INIT,		/* '$' */
45 	DELIM_SEEN,	/* letter */
46 	KEYWORD,	/* punctuation mark */
47 	PUNC_SEEN,	/* ':' -> _SVN; space -> TEXT */
48 	PUNC_SEEN_SVN,	/* space */
49 	TEXT
50 } analyzer_states;
51 
52 static int
53 scan(FILE *fp, const char *name, bool quiet)
54 {
55 	int c;
56 	bool hasid = false;
57 	bool subversion = false;
58 	analyzer_states state = INIT;
59 	FILE* buffp;
60 	char *buf;
61 	size_t sz;
62 	locale_t l;
63 
64 	l = newlocale(LC_ALL_MASK, "C", NULL);
65 	sz = 0;
66 	buf = NULL;
67 	buffp = open_memstream(&buf, &sz);
68 	if (buffp == NULL)
69 		err(EXIT_FAILURE, "open_memstream()");
70 
71 	if (name != NULL)
72 		printf("%s:\n", name);
73 
74 	while ((c = fgetc(fp)) != EOF) {
75 		switch (state) {
76 		case INIT:
77 			if (c == '$') {
78 				/* Transit to DELIM_SEEN if we see $ */
79 				state = DELIM_SEEN;
80 			} else {
81 				/* Otherwise, stay in INIT state */
82 				continue;
83 			}
84 			break;
85 		case DELIM_SEEN:
86 			if (isalpha_l(c, l)) {
87 				/* Transit to KEYWORD if we see letter */
88 				if (buf != NULL)
89 					memset(buf, 0, sz);
90 				rewind(buffp);
91 				fputc('$', buffp);
92 				fputc(c, buffp);
93 				state = KEYWORD;
94 
95 				continue;
96 			} else if (c == '$') {
97 				/* Or, stay in DELIM_SEEN if more $ */
98 				continue;
99 			} else {
100 				/* Otherwise, transit back to INIT */
101 				state = INIT;
102 			}
103 			break;
104 		case KEYWORD:
105 			fputc(c, buffp);
106 
107 			if (isalpha_l(c, l)) {
108 				/*
109 				 * Stay in KEYWORD if additional letter is seen
110 				 */
111 				continue;
112 			} else if (c == ':') {
113 				/*
114 				 * See ':' for the first time, transit to
115 				 * PUNC_SEEN.
116 				 */
117 				state = PUNC_SEEN;
118 				subversion = false;
119 			} else if (c == '$') {
120 				/*
121 				 * Incomplete ident.  Go back to DELIM_SEEN
122 				 * state because we see a '$' which could be
123 				 * the beginning of a keyword.
124 				 */
125 				state = DELIM_SEEN;
126 			} else {
127 				/*
128 				 * Go back to INIT state otherwise.
129 				 */
130 				state = INIT;
131 			}
132 			break;
133 		case PUNC_SEEN:
134 		case PUNC_SEEN_SVN:
135 			fputc(c, buffp);
136 
137 			switch (c) {
138 			case ':':
139 				/*
140 				 * If we see '::' (seen : in PUNC_SEEN),
141 				 * activate subversion treatment and transit
142 				 * to PUNC_SEEN_SVN state.
143 				 *
144 				 * If more than two :'s were seen, the ident
145 				 * is invalid and we would therefore go back
146 				 * to INIT state.
147 				 */
148 				if (state == PUNC_SEEN) {
149 					state = PUNC_SEEN_SVN;
150 					subversion = true;
151 				} else {
152 					state = INIT;
153 				}
154 				break;
155 			case ' ':
156 				/*
157 				 * A space after ':' or '::' indicates we are at the
158 				 * last component of potential ident.
159 				 */
160 				state = TEXT;
161 				break;
162 			default:
163 				/* All other characters are invalid */
164 				state = INIT;
165 				break;
166 			}
167 			break;
168 		case TEXT:
169 			fputc(c, buffp);
170 
171 			if (iscntrl_l(c, l)) {
172 				/* Control characters are not allowed in this state */
173 				state = INIT;
174 			} else if (c == '$') {
175 				fflush(buffp);
176 				/*
177 				 * valid ident should end with a space.
178 				 *
179 				 * subversion extension uses '#' to indicate that
180 				 * the keyword expansion have exceeded the fixed
181 				 * width, so it is also permitted if we are in
182 				 * subversion mode.  No length check is enforced
183 				 * because GNU RCS ident(1) does not do it either.
184 				 */
185 				c = buf[strlen(buf) -2 ];
186 				if (c == ' ' || (subversion && c == '#')) {
187 					printf("     %s\n", buf);
188 					hasid = true;
189 				}
190 				state = INIT;
191 			}
192 			/* Other characters: stay in the state */
193 			break;
194 		}
195 	}
196 	fclose(buffp);
197 	free(buf);
198 	freelocale(l);
199 
200 	if (!hasid) {
201 		if (!quiet)
202 			fprintf(stderr, "%s warning: no id keywords in %s\n",
203 			    getprogname(), name ? name : "standard input");
204 
205 		return (EXIT_FAILURE);
206 	}
207 
208 	return (EXIT_SUCCESS);
209 }
210 
211 int
212 main(int argc, char **argv)
213 {
214 	bool quiet = false;
215 	int ch, i, *fds, fd;
216 	int ret = EXIT_SUCCESS;
217 	size_t nfds;
218 	FILE *fp;
219 
220 	while ((ch = getopt(argc, argv, "qV")) != -1) {
221 		switch (ch) {
222 		case 'q':
223 			quiet = true;
224 			break;
225 		case 'V':
226 			/* Do nothing, compat with GNU rcs's ident */
227 			return (EXIT_SUCCESS);
228 		default:
229 			errx(EXIT_FAILURE, "usage: %s [-q] [-V] [file...]",
230 			    getprogname());
231 		}
232 	}
233 
234 	argc -= optind;
235 	argv += optind;
236 
237 	if (caph_limit_stdio() < 0)
238 		err(EXIT_FAILURE, "unable to limit stdio");
239 
240 	if (argc == 0) {
241 		nfds = 1;
242 		fds = malloc(sizeof(*fds));
243 		if (fds == NULL)
244 			err(EXIT_FAILURE, "unable to allocate fds array");
245 		fds[0] = STDIN_FILENO;
246 	} else {
247 		nfds = argc;
248 		fds = malloc(sizeof(*fds) * nfds);
249 		if (fds == NULL)
250 			err(EXIT_FAILURE, "unable to allocate fds array");
251 
252 		for (i = 0; i < argc; i++) {
253 			fds[i] = fd = open(argv[i], O_RDONLY);
254 			if (fd < 0) {
255 				warn("%s", argv[i]);
256 				ret = EXIT_FAILURE;
257 				continue;
258 			}
259 			if (caph_limit_stream(fd, CAPH_READ) < 0)
260 				err(EXIT_FAILURE,
261 				    "unable to limit fcntls/rights for %s",
262 				    argv[i]);
263 		}
264 	}
265 
266 	/* Enter Capsicum sandbox. */
267 	if (caph_enter() < 0)
268 		err(EXIT_FAILURE, "unable to enter capability mode");
269 
270 	for (i = 0; i < (int)nfds; i++) {
271 		if (fds[i] < 0)
272 			continue;
273 
274 		fp = fdopen(fds[i], "r");
275 		if (fp == NULL) {
276 			warn("%s", argv[i]);
277 			ret = EXIT_FAILURE;
278 			continue;
279 		}
280 		if (scan(fp, argc == 0 ? NULL : argv[i], quiet) != EXIT_SUCCESS)
281 			ret = EXIT_FAILURE;
282 		fclose(fp);
283 	}
284 
285 	return (ret);
286 }
287