xref: /illumos-gate/usr/src/cmd/filesync/ignore.c (revision 948f2876ce2a3010558f4f6937e16086ebcd36f2)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright (c) 1995 Sun Microsystems, Inc.  All Rights Reserved
24  *
25  * module:
26  *	ignore.c
27  *
28  * purpose:
29  *	routines to manage the ignore lists and test names against them,
30  *
31  * contents:
32  *	ignore_check ... is a particular file covered by an ignore rule
33  *	ignore_file .... add a specific file name to be ignored
34  *	ignore_expr .... add a regular expression for files to be ignored
35  *	ignore_pgm ..... add a rule to run a program to generate a list
36  *	ignore_reset ... flush the internal optimization data structures
37  *
38  *	static
39  *	    ign_hash ... maintain a hash table of ignored names
40  *	    cheap_check. build up a table of safe suffixes
41  *
42  * notes:
43  *	a much simpler implementation could have been provided, but
44  *	this test (every file tested against every rule) has the
45  *	potential to be EXTREMELY expensive.  This module implements
46  *	an engine that attempts to optimize the process of determining
47  *	that a file has not been ignored.
48  *
49  *	the usage scenario is
50  *	    per base
51  *		call ignore_{file,expr,pgm} for each ignore rule
52  *		call ignore_check for every file under the base
53  *		call ignore_reset when you are done
54  */
55 #ident	"%W%	%E% SMI"
56 
57 #include <stdio.h>
58 #include <stdlib.h>
59 #include <string.h>
60 #include <libgen.h>
61 
62 #include "filesync.h"
63 #include "messages.h"
64 
65 /*
66  * routines:
67  */
68 static struct list *ign_hash(const char *, int);
69 static void cheap_check(const char *);
70 
71 /*
72  * globals
73  */
74 struct list {
75 	char *l_value;			/* the actual string		*/
76 	struct list *l_next;		/* pointer to next element	*/
77 };
78 
79 static struct list *expr_list;		/* list of regular expressions	*/
80 static struct list *file_list[ HASH_SIZE ]; /* hash table of literal names */
81 
82 static char cheap_last[256];		/* cheap test: last char	*/
83 static char cheap_penu[256];		/* cheap test: penultimate char	*/
84 
85 /*
86  * routine:
87  *	ignore_check
88  *
89  * purpose:
90  *	determine whether or not a particular name matches an ignore pattern.
91  *
92  * parameters:
93  *	file name
94  *
95  * returns:
96  *	true/false
97  *
98  * note:
99  *	becuse this routine is called on every single file in
100  *	every single sub-directory, it is critical that we make
101  *	it fail quickly for most files.  The purpose of the cheap_last
102  *	and cheap_penu arrays is to quickly determine there is no chance
103  *	that a name will match any expression.  Most expressions have
104  *	wildcards near the front and constant suffixes, so our cheap
105  *	test is to look at the last two bytes.
106  */
107 bool_t
108 ignore_check(const char *name)
109 {	struct list *lp;
110 	const char *s;
111 
112 	/*
113 	 * start with the cheap test
114 	 */
115 	for (s = name; *s; s++);
116 	if (cheap_last[ (unsigned char) s[-1] ] == 0 ||
117 	    cheap_penu[ (unsigned char) s[-2] ] == 0)
118 		return (FALSE);
119 
120 	/* check the literal names in the hash table		*/
121 	if (ign_hash(name, 0)) {
122 		if (opt_debug & DBG_IGNORE)
123 			fprintf(stderr, "IGNO: match %s\n", name);
124 		return (TRUE);
125 	}
126 
127 	/* check all the regular expressions			*/
128 	for (lp = expr_list; lp; lp = lp->l_next) {
129 		if (gmatch(name, lp->l_value) == 0)
130 			continue;
131 
132 		if (opt_debug & DBG_IGNORE)
133 			fprintf(stderr, "IGNO: regex %s : %s\n",
134 				lp->l_value, name);
135 		return (TRUE);
136 	}
137 
138 	return (FALSE);
139 }
140 
141 /*
142  * routine:
143  *	ignore_file
144  *
145  * purpose:
146  *	to add a specific file to an ignore list
147  *
148  * parameters:
149  *	command to run
150  */
151 void
152 ignore_file(const char *name)
153 {
154 	cheap_check(name);
155 
156 	(void) ign_hash(name, 1);
157 
158 	if (opt_debug & DBG_IGNORE)
159 		fprintf(stderr, "IGNO: add file %s\n", name);
160 }
161 
162 /*
163  * routine:
164  *	ignore_expr
165  *
166  * purpose:
167  *	to add a regular expression to an ignore list
168  *
169  * parameters:
170  *	command to run
171  */
172 void
173 ignore_expr(const char *expr)
174 {	struct list *lp;
175 
176 	cheap_check(expr);
177 
178 	/* allocate a new node and stick it on the front of the list	*/
179 	lp = malloc(sizeof (*lp));
180 	if (lp == 0)
181 		nomem("ignore list");
182 	lp->l_value = strdup(expr);
183 	lp->l_next = expr_list;
184 	expr_list = lp;
185 
186 	if (opt_debug & DBG_IGNORE)
187 		fprintf(stderr, "IGNO: add expr %s\n", expr);
188 }
189 
190 /*
191  * routine:
192  *	ignore_pgm
193  *
194  * purpose:
195  *	to run a program and gather up the ignore list it produces
196  *
197  * parameters:
198  *	command to run
199  */
200 void
201 ignore_pgm(const char *cmd)
202 {	char *s;
203 	FILE *fp;
204 	char inbuf[ MAX_LINE ];
205 
206 	if (opt_debug & DBG_IGNORE)
207 		fprintf(stderr, "IGNO: add pgm %s\n", cmd);
208 
209 	/* run the command and collect its ouput	*/
210 	fp = popen(cmd, "r");
211 	if (fp == NULL) {
212 		fprintf(stderr, gettext(ERR_badrun), cmd);
213 		return;
214 	}
215 
216 	/*
217 	 * read each line, strip off the newline and add it to the list
218 	 */
219 	while (fgets(inbuf, sizeof (inbuf), fp) != 0) {
220 		/* strip off any trailing newline	*/
221 		for (s = inbuf; *s && *s != '\n'; s++);
222 		*s = 0;
223 
224 		/* skip any leading white space		*/
225 		for (s = inbuf; *s == ' ' || *s == '\t'; s++);
226 
227 		/* add this file to the list		*/
228 		if (*s) {
229 			cheap_check(s);
230 			(void) ign_hash(s, 1);
231 
232 			if (opt_debug & DBG_IGNORE)
233 				fprintf(stderr, "IGNO: ... %s\n", s);
234 		}
235 	}
236 
237 	pclose(fp);
238 }
239 
240 /*
241  * routine:
242  *	ign_hash
243  *
244  * purpose:
245  *	to find an entry in the hash list
246  *
247  * parameters:
248  *	name
249  *	allocate flag
250  *
251  * returns:
252  *	pointer to new list entry or 0
253  */
254 static struct list *
255 ign_hash(const char *name, int alloc)
256 {	const unsigned char *s;
257 	int i;
258 	struct list *lp;
259 	struct list **pp;
260 
261 	/* perform the hash and find the chain	*/
262 	for (s = (const unsigned char *) name, i = 0; *s; s++)
263 		i += *s;
264 	pp = &file_list[i % HASH_SIZE ];
265 
266 	/* search for the specified entry	*/
267 	for (lp = *pp; lp; lp = *pp) {
268 		if (strcmp(name, lp->l_value) == 0)
269 			return (lp);
270 		pp = &(lp->l_next);
271 	}
272 
273 	/* if caller said alloc, buy a new node and chain it in	*/
274 	if (alloc) {
275 		lp = malloc(sizeof (*lp));
276 		if (lp == 0)
277 			nomem("ignore list");
278 		lp->l_value = strdup(name);
279 		lp->l_next = 0;
280 		*pp = lp;
281 	}
282 
283 	return (lp);
284 }
285 
286 /*
287  * routine:
288  *	cheap_check
289  *
290  * purpose:
291  *	to update the cheap-check arrays for an ignore expression
292  *
293  * parameters:
294  *	name/expression
295  */
296 static void
297 cheap_check(const char *name)
298 {	const char *s;
299 	unsigned char c;
300 	int i;
301 
302 	for (s = name; *s; s++);
303 	s--;
304 
305 	/* if expr ends in a wild card, we are undone		*/
306 	c = *s;
307 	if (c == '*' || c == '?' || c == ']' || c == '}') {
308 		for (i = 0; i < 256; i++) {
309 			cheap_last[i] = 1;
310 			cheap_penu[i] = 1;
311 		}
312 		return;
313 	} else
314 		cheap_last[c] = 1;
315 
316 	if (s <= name)
317 		return;
318 
319 	/* check the next to last character too		*/
320 	c = s[-1];
321 	if (c == '*' || c == '?' || c == ']' || c == '}') {
322 		for (i = 0; i < 256; i++)
323 			cheap_penu[i] = 1;
324 	} else
325 		cheap_penu[c] = 1;
326 }
327 
328 /*
329  * routine:
330  *	ignore_reset
331  *
332  * purpose:
333  *	to free up all the ignore entries so we can start anew
334  */
335 void
336 ignore_reset(void)
337 {	int i;
338 	struct list *np = 0;	/* for LINT */
339 	struct list *lp;
340 
341 	/* clear the cheap check arrays */
342 	for (i = 0; i < 255; i++) {
343 		cheap_last[i] = 0;
344 		cheap_penu[i] = 0;
345 	}
346 
347 	/* free all of the literal hash chains	*/
348 	for (i = 0; i < HASH_SIZE; i++) {
349 		for (lp = file_list[i]; lp; lp = np) {
350 			np = lp->l_next;
351 			free(lp->l_value);
352 			free(lp);
353 		}
354 		file_list[i] = 0;
355 	}
356 
357 	/* free all of the expressions on the chain	*/
358 	for (lp = expr_list; lp; lp = np) {
359 		np = lp->l_next;
360 		free(lp->l_value);
361 		free(lp);
362 	}
363 	expr_list = 0;
364 }
365