xref: /freebsd/usr.bin/ident/ident.c (revision 361e428888e630eb708c72cf31579a25ba5d4f03)
1 /*-
2  * Copyright (c) 2015 Baptiste Daroussin <bapt@FreeBSD.org>
3  * Copyright (c) 2015 Xin LI <delphij@FreeBSD.org>
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer
11  *    in this position and unchanged.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19  * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
30 
31 #include <sys/types.h>
32 #include <sys/sbuf.h>
33 
34 #include <ctype.h>
35 #include <err.h>
36 #include <stdbool.h>
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <unistd.h>
40 #include <xlocale.h>
41 
42 typedef enum {
43 	/* state	condition to transit to next state */
44 	INIT,		/* '$' */
45 	DELIM_SEEN,	/* letter */
46 	KEYWORD,	/* punctuation mark */
47 	PUNC_SEEN,	/* ':' -> _SVN; space -> TEXT */
48 	PUNC_SEEN_SVN,	/* space */
49 	TEXT
50 } analyzer_states;
51 
52 static int
53 scan(FILE *fp, const char *name, bool quiet)
54 {
55 	int c;
56 	bool hasid = false;
57 	bool subversion = false;
58 	analyzer_states state = INIT;
59 	struct sbuf *id = sbuf_new_auto();
60 	locale_t l;
61 
62 	l = newlocale(LC_ALL_MASK, "C", NULL);
63 
64 	if (name != NULL)
65 		printf("%s:\n", name);
66 
67 	while ((c = fgetc(fp)) != EOF) {
68 		switch (state) {
69 		case INIT:
70 			if (c == '$') {
71 				/* Transit to DELIM_SEEN if we see $ */
72 				state = DELIM_SEEN;
73 			} else {
74 				/* Otherwise, stay in INIT state */
75 				continue;
76 			}
77 			break;
78 		case DELIM_SEEN:
79 			if (isalpha_l(c, l)) {
80 				/* Transit to KEYWORD if we see letter */
81 				sbuf_clear(id);
82 				sbuf_putc(id, '$');
83 				sbuf_putc(id, c);
84 				state = KEYWORD;
85 
86 				continue;
87 			} else if (c == '$') {
88 				/* Or, stay in DELIM_SEEN if more $ */
89 				continue;
90 			} else {
91 				/* Otherwise, transit back to INIT */
92 				state = INIT;
93 			}
94 			break;
95 		case KEYWORD:
96 			sbuf_putc(id, c);
97 
98 			if (isalpha_l(c, l)) {
99 				/*
100 				 * Stay in KEYWORD if additional letter is seen
101 				 */
102 				continue;
103 			} else if (c == ':') {
104 				/*
105 				 * See ':' for the first time, transit to
106 				 * PUNC_SEEN.
107 				 */
108 				state = PUNC_SEEN;
109 				subversion = false;
110 			} else if (c == '$') {
111 				/*
112 				 * Incomplete ident.  Go back to DELIM_SEEN
113 				 * state because we see a '$' which could be
114 				 * the beginning of a keyword.
115 				 */
116 				state = DELIM_SEEN;
117 			} else {
118 				/*
119 				 * Go back to INIT state otherwise.
120 				 */
121 				state = INIT;
122 			}
123 			break;
124 		case PUNC_SEEN:
125 		case PUNC_SEEN_SVN:
126 			sbuf_putc(id, c);
127 
128 			switch (c) {
129 			case ':':
130 				/*
131 				 * If we see '::' (seen : in PUNC_SEEN),
132 				 * activate subversion treatment and transit
133 				 * to PUNC_SEEN_SVN state.
134 				 *
135 				 * If more than two :'s were seen, the ident
136 				 * is invalid and we would therefore go back
137 				 * to INIT state.
138 				 */
139 				if (state == PUNC_SEEN) {
140 					state = PUNC_SEEN_SVN;
141 					subversion = true;
142 				} else {
143 					state = INIT;
144 				}
145 				break;
146 			case ' ':
147 				/*
148 				 * A space after ':' or '::' indicates we are at the
149 				 * last component of potential ident.
150 				 */
151 				state = TEXT;
152 				break;
153 			default:
154 				/* All other characters are invalid */
155 				state = INIT;
156 				break;
157 			}
158 			break;
159 		case TEXT:
160 			sbuf_putc(id, c);
161 
162 			if (iscntrl_l(c, l)) {
163 				/* Control characters are not allowed in this state */
164 				state = INIT;
165 			} else if (c == '$') {
166 				sbuf_finish(id);
167 				/*
168 				 * valid ident should end with a space.
169 				 *
170 				 * subversion extension uses '#' to indicate that
171 				 * the keyword expansion have exceeded the fixed
172 				 * width, so it is also permitted if we are in
173 				 * subversion mode.  No length check is enforced
174 				 * because GNU RCS ident(1) does not do it either.
175 				 */
176 				c = sbuf_data(id)[sbuf_len(id) - 2];
177 				if (c == ' ' || (subversion && c == '#')) {
178 					printf("     %s\n", sbuf_data(id));
179 					hasid = true;
180 				}
181 				state = INIT;
182 			}
183 			/* Other characters: stay in the state */
184 			break;
185 		}
186 	}
187 	sbuf_delete(id);
188 	freelocale(l);
189 
190 	if (!hasid) {
191 		if (!quiet)
192 			fprintf(stderr, "%s warning: no id keywords in %s\n",
193 			    getprogname(), name ? name : "standard input");
194 
195 		return (EXIT_FAILURE);
196 	}
197 
198 	return (EXIT_SUCCESS);
199 }
200 
201 int
202 main(int argc, char **argv)
203 {
204 	bool quiet = false;
205 	int ch, i;
206 	int ret = EXIT_SUCCESS;
207 	FILE *fp;
208 
209 	while ((ch = getopt(argc, argv, "qV")) != -1) {
210 		switch (ch) {
211 		case 'q':
212 			quiet = true;
213 			break;
214 		case 'V':
215 			/* Do nothing, compat with GNU rcs's ident */
216 			return (EXIT_SUCCESS);
217 		default:
218 			errx(EXIT_FAILURE, "usage: %s [-q] [-V] [file...]",
219 			    getprogname());
220 		}
221 	}
222 
223 	argc -= optind;
224 	argv += optind;
225 
226 	if (argc == 0)
227 		return (scan(stdin, NULL, quiet));
228 
229 	for (i = 0; i < argc; i++) {
230 		fp = fopen(argv[i], "r");
231 		if (fp == NULL) {
232 			warn("%s", argv[i]);
233 			ret = EXIT_FAILURE;
234 			continue;
235 		}
236 		if (scan(fp, argv[i], quiet) != EXIT_SUCCESS)
237 			ret = EXIT_FAILURE;
238 		fclose(fp);
239 	}
240 
241 	return (ret);
242 }
243