xref: /illumos-gate/usr/src/cmd/logadm/glob.c (revision 4de2612967d06c4fdbf524a62556a1e8118a006f)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright (c) 2001 by Sun Microsystems, Inc.
24  * All rights reserved.
25  *
26  * logadm/glob.c -- globbing routines
27  *
28  * these routines support two kinds of globs.  first, the
29  * usual kind of filename globbing, like:
30  *
31  * 	*.c
32  * 	/var/log/syslog.?
33  * 	log[0-9]*file
34  * 	/var/apache/logs/x*{access,error}_log
35  *
36  * this is basically the same syntax that csh supports for globs and
37  * is provided by the routine glob_glob() which takes a filename and
38  * returns a list of filenames that match the glob.
39  *
40  * the second type is something called a "reglob" which is a pathname
41  * where the components are regular expressions as described in regex(3c).
42  * some examples:
43  *
44  * 	.*\.c
45  * 	/var/log/syslog\..
46  * 	log[0-9].*file
47  * 	/var/log/syslog\.([0-9]+)$0
48  *
49  * the last example uses the ()$n form to assign a numeric extension
50  * on a filename to the "n" value kept by the fn routines with each
51  * filename (see fn_setn() in fn.c).  logadm uses this mechanism to
52  * correctly sort lognames when templates containing $n are used.
53  *
54  * the routine glob_reglob() is used to expand reglobs.  glob_glob()
55  * is implemented by expanding the curly braces, converting the globs
56  * to reglobs, and then passing the work to glob_reglob().
57  *
58  * finally, since expanding globs and reglobs requires doing a stat(2)
59  * on the files, we store the resulting stat information in the filename
60  * struct (see fn_setstat() in fn.c).
61  *
62  * the glob(3c) routines are not used here since they don't support
63  * braces, and don't support the more powerful reglobs required by logadm.
64  */
65 
66 #pragma ident	"%Z%%M%	%I%	%E% SMI"
67 
68 #include <stdio.h>
69 #include <libintl.h>
70 #include <stdlib.h>
71 #include <libgen.h>
72 #include <strings.h>
73 #include <sys/types.h>
74 #include <sys/param.h>
75 #include <sys/stat.h>
76 #include <dirent.h>
77 #include "err.h"
78 #include "fn.h"
79 #include "glob.h"
80 
81 /* forward declarations for functions used internally by this module */
82 static struct fn_list *glob_debrace(struct fn *fnp);
83 static struct fn_list *glob_reglob_list(struct fn_list *fnlp);
84 static boolean_t glob_magic(struct fn *fnp);
85 
86 /* expand curly braces (like file{one,two,three}name) */
87 static struct fn_list *
88 glob_debrace(struct fn *fnp)
89 {
90 	struct fn_list *ret = fn_list_new(NULL);
91 	struct fn_list *newret;
92 	char *sp = fn_s(fnp);
93 	char *left;
94 	char *right;
95 	char *comma;
96 
97 	/* start with an empty string in the list */
98 	fn_list_adds(ret, "");
99 
100 	/* while braces remain... */
101 	while ((left = strchr(sp, '{')) != NULL)
102 		if ((right = strchr(left, '}')) == NULL) {
103 			err(EF_FILE|EF_JMP, "Missing }");
104 			fn_list_free(ret);
105 			return (NULL);
106 		} else {
107 			/* stuff before "left" is finished */
108 			fn_list_appendrange(ret, sp, left);
109 
110 			/* stuff after "right" still need processing */
111 			sp = right + 1;
112 
113 			if (left + 1 == right)
114 				continue;	/* just an empty {} */
115 
116 			/* stuff between "left" and "right" is comma-sep list */
117 			left++;
118 			newret = fn_list_new(NULL);
119 			while ((comma = strchr(left, ',')) != NULL) {
120 				struct fn_list *dup = fn_list_dup(ret);
121 
122 				/* stuff from left to comma is one variant */
123 				fn_list_appendrange(dup, left, comma);
124 				fn_list_addfn_list(newret, dup);
125 				left = comma + 1;
126 			}
127 			/* what's left is the last item in the list */
128 			fn_list_appendrange(ret, left, right);
129 			fn_list_addfn_list(newret, ret);
130 			ret = newret;
131 		}
132 
133 	/* anything remaining in "s" is finished */
134 	fn_list_appendrange(ret, sp, &sp[strlen(sp)]);
135 	return (ret);
136 }
137 
138 /* return true if filename contains any "magic" characters (*,?,[) */
139 static boolean_t
140 glob_magic(struct fn *fnp)
141 {
142 	char *s = fn_s(fnp);
143 
144 	for (; *s; s++)
145 		if (*s == '*' ||
146 		    *s == '?' ||
147 		    *s == '[')
148 			return (B_TRUE);
149 
150 	return (B_FALSE);
151 }
152 
153 /*
154  * glob_glob -- given a filename glob, return the list of matching filenames
155  *
156  * fn_setn() and fn_setstat() are called to set the "n" and stat information
157  * for the resulting filenames.
158  */
159 struct fn_list *
160 glob_glob(struct fn *fnp)
161 {
162 	struct fn_list *tmplist = glob_debrace(fnp);
163 	struct fn_list *ret;
164 	struct fn *nextfnp;
165 	struct fn *newfnp;
166 	int magic = 0;
167 
168 	/* debracing produced NULL list? */
169 	if (tmplist == NULL)
170 		return (NULL);
171 
172 	/* see if anything in list contains magic characters */
173 	fn_list_rewind(tmplist);
174 	while ((nextfnp = fn_list_next(tmplist)) != NULL)
175 		if (glob_magic(nextfnp)) {
176 			magic = 1;
177 			break;
178 		}
179 
180 	if (!magic)
181 		return (tmplist);	/* no globs to expand */
182 
183 	/* foreach name in the list, call glob_glob() to expand it */
184 	fn_list_rewind(tmplist);
185 	ret = fn_list_new(NULL);
186 	while ((nextfnp = fn_list_next(tmplist)) != NULL) {
187 		newfnp = glob_to_reglob(nextfnp);
188 		fn_list_addfn(ret, newfnp);
189 	}
190 	fn_list_free(tmplist);
191 	tmplist = ret;
192 	ret = glob_reglob_list(tmplist);
193 	fn_list_free(tmplist);
194 
195 	return (ret);
196 }
197 
198 /*
199  * glob_glob_list -- given a list of filename globs, return all matches
200  */
201 struct fn_list *
202 glob_glob_list(struct fn_list *fnlp)
203 {
204 	struct fn_list *ret = fn_list_new(NULL);
205 	struct fn *fnp;
206 
207 	fn_list_rewind(fnlp);
208 	while ((fnp = fn_list_next(fnlp)) != NULL)
209 		fn_list_addfn_list(ret, glob_glob(fnp));
210 	return (ret);
211 }
212 
213 /*
214  * glob_reglob -- given a filename reglob, return a list of matching filenames
215  *
216  * this routine does all the hard work in this module.
217  */
218 struct fn_list *
219 glob_reglob(struct fn *fnp)
220 {
221 	struct fn_list *ret = fn_list_new(NULL);
222 	struct fn_list *newret;
223 	struct fn *nextfnp;
224 	char *mys = STRDUP(fn_s(fnp));
225 	char *sp = mys;
226 	char *slash;
227 	int skipdotfiles;
228 	char *re;
229 	char ret0[MAXPATHLEN];
230 
231 	/* start with the initial directory in the list */
232 	if (*sp == '/') {
233 		fn_list_adds(ret, "/");
234 		while (*sp == '/')
235 			sp++;
236 	} else
237 		fn_list_adds(ret, "./");
238 
239 	/* while components remain... */
240 	do {
241 		if ((slash = strchr(sp, '/')) != NULL) {
242 			*slash++ = '\0';
243 			/* skip superfluous slashes */
244 			while (*slash == '/')
245 				slash++;
246 		}
247 
248 		/* dot files are skipped unless a dot was specifically given */
249 		if (sp[0] == '\\' && sp[1] == '.')
250 			skipdotfiles = 0;
251 		else
252 			skipdotfiles = 1;
253 
254 		/* compile the regex */
255 		if ((re = regcmp("^", sp, "$", (char *)0)) == NULL)
256 			err(EF_FILE|EF_JMP, "regcmp failed on <%s>", sp);
257 
258 		/* apply regex to every filename we've matched so far */
259 		newret = fn_list_new(NULL);
260 		fn_list_rewind(ret);
261 		while ((nextfnp = fn_list_next(ret)) != NULL) {
262 			DIR *dirp;
263 			struct dirent *dp;
264 
265 			/* go through directory looking for matches */
266 			if ((dirp = opendir(fn_s(nextfnp))) == NULL)
267 				continue;
268 
269 			while ((dp = readdir(dirp)) != NULL) {
270 				if (skipdotfiles && dp->d_name[0] == '.')
271 					continue;
272 				*ret0 = '\0';
273 				if (regex(re, dp->d_name, ret0)) {
274 					struct fn *matchfnp = fn_dup(nextfnp);
275 					struct stat stbuf;
276 					int n;
277 
278 					fn_puts(matchfnp, dp->d_name);
279 
280 					if (stat(fn_s(matchfnp), &stbuf) < 0) {
281 						fn_free(matchfnp);
282 						continue;
283 					}
284 
285 					/* skip non-dirs if more components */
286 					if (slash &&
287 					    (stbuf.st_mode & S_IFMT) !=
288 					    S_IFDIR) {
289 						fn_free(matchfnp);
290 						continue;
291 					}
292 
293 					/*
294 					 * component matched, fill in "n"
295 					 * value, stat information, and
296 					 * append component to directory
297 					 * name just searched.
298 					 */
299 
300 					if (*ret0)
301 						n = atoi(ret0);
302 					else
303 						n = -1;
304 					fn_setn(matchfnp, n);
305 					fn_setstat(matchfnp, &stbuf);
306 
307 					if (slash)
308 						fn_putc(matchfnp, '/');
309 
310 					fn_list_addfn(newret, matchfnp);
311 				}
312 			}
313 			(void) closedir(dirp);
314 		}
315 		fn_list_free(ret);
316 		ret = newret;
317 		sp = slash;
318 	} while (slash);
319 
320 	FREE(mys);
321 
322 	return (ret);
323 }
324 
325 /* reglob a list of filenames */
326 static struct fn_list *
327 glob_reglob_list(struct fn_list *fnlp)
328 {
329 	struct fn_list *ret = fn_list_new(NULL);
330 	struct fn *fnp;
331 
332 	fn_list_rewind(fnlp);
333 	while ((fnp = fn_list_next(fnlp)) != NULL)
334 		fn_list_addfn_list(ret, glob_reglob(fnp));
335 	return (ret);
336 }
337 
338 /*
339  * glob_to_reglob -- convert a glob (*, ?, etc) to a reglob (.*, ., etc.)
340  */
341 struct fn *
342 glob_to_reglob(struct fn *fnp)
343 {
344 	int c;
345 	struct fn *ret = fn_new(NULL);
346 
347 	fn_rewind(fnp);
348 	while ((c = fn_getc(fnp)) != '\0')
349 		switch (c) {
350 		case '.':
351 		case '(':
352 		case ')':
353 		case '^':
354 		case '+':
355 		case '{':
356 		case '}':
357 		case '$':
358 			/* magic characters need backslash */
359 			fn_putc(ret, '\\');
360 			fn_putc(ret, c);
361 			break;
362 		case '?':
363 			/* change '?' to a single dot */
364 			fn_putc(ret, '.');
365 			break;
366 		case '*':
367 			/* change '*' to ".*" */
368 			fn_putc(ret, '.');
369 			fn_putc(ret, '*');
370 			break;
371 		default:
372 			fn_putc(ret, c);
373 		}
374 
375 	return (ret);
376 }
377 
378 #ifdef	TESTMODULE
379 
380 /*
381  * test main for glob module, usage: a.out [-r] [pattern...]
382  *	-r means the patterns are reglobs instead of globs
383  */
384 main(int argc, char *argv[])
385 {
386 	int i;
387 	int reglobs = 0;
388 	struct fn *argfnp = fn_new(NULL);
389 	struct fn *fnp;
390 	struct fn_list *fnlp;
391 
392 	err_init(argv[0]);
393 	setbuf(stdout, NULL);
394 
395 	for (i = 1; i < argc; i++) {
396 		if (strcmp(argv[i], "-r") == 0) {
397 			reglobs = 1;
398 			continue;
399 		}
400 
401 		if (SETJMP) {
402 			printf("    skipped due to errors\n");
403 			continue;
404 		} else {
405 			printf("<%s>:\n", argv[i]);
406 			fn_renew(argfnp, argv[i]);
407 			if (reglobs)
408 				fnlp = glob_reglob(argfnp);
409 			else
410 				fnlp = glob_glob(argfnp);
411 		}
412 
413 		fn_list_rewind(fnlp);
414 		while ((fnp = fn_list_next(fnlp)) != NULL)
415 			printf("    <%s>\n", fn_s(fnp));
416 
417 		printf("total size: %d\n", fn_list_totalsize(fnlp));
418 
419 		while ((fnp = fn_list_popoldest(fnlp)) != NULL) {
420 			printf("    oldest <%s>\n", fn_s(fnp));
421 			fn_free(fnp);
422 		}
423 
424 		fn_list_free(fnlp);
425 	}
426 	fn_free(argfnp);
427 
428 	err_done(0);
429 }
430 
431 #endif	/* TESTMODULE */
432