xref: /illumos-gate/usr/src/cmd/logadm/glob.c (revision 4283d10e18fc3904736c7c067fb29de9bb67d25d)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  *
25  * logadm/glob.c -- globbing routines
26  *
27  * these routines support two kinds of globs.  first, the
28  * usual kind of filename globbing, like:
29  *
30  * 	*.c
31  * 	/var/log/syslog.?
32  * 	log[0-9]*file
33  * 	/var/apache/logs/x*{access,error}_log
34  *
35  * this is basically the same syntax that csh supports for globs and
36  * is provided by the routine glob_glob() which takes a filename and
37  * returns a list of filenames that match the glob.
38  *
39  * the second type is something called a "reglob" which is a pathname
40  * where the components are regular expressions as described in regex(3c).
41  * some examples:
42  *
43  * 	.*\.c
44  * 	/var/log/syslog\..
45  * 	log[0-9].*file
46  * 	/var/log/syslog\.([0-9]+)$0
47  *
48  * the last example uses the ()$n form to assign a numeric extension
49  * on a filename to the "n" value kept by the fn routines with each
50  * filename (see fn_setn() in fn.c).  logadm uses this mechanism to
51  * correctly sort lognames when templates containing $n are used.
52  *
53  * the routine glob_reglob() is used to expand reglobs.  glob_glob()
54  * is implemented by expanding the curly braces, converting the globs
55  * to reglobs, and then passing the work to glob_reglob().
56  *
57  * finally, since expanding globs and reglobs requires doing a stat(2)
58  * on the files, we store the resulting stat information in the filename
59  * struct (see fn_setstat() in fn.c).
60  *
61  * the glob(3c) routines are not used here since they don't support
62  * braces, and don't support the more powerful reglobs required by logadm.
63  */
64 
65 #include <stdio.h>
66 #include <libintl.h>
67 #include <stdlib.h>
68 #include <libgen.h>
69 #include <strings.h>
70 #include <sys/types.h>
71 #include <sys/param.h>
72 #include <sys/stat.h>
73 #include <dirent.h>
74 #include "err.h"
75 #include "fn.h"
76 #include "glob.h"
77 
78 /* forward declarations for functions used internally by this module */
79 static struct fn_list *glob_debrace(struct fn *fnp);
80 static struct fn_list *glob_reglob_list(struct fn_list *fnlp);
81 static boolean_t glob_magic(struct fn *fnp);
82 
83 /* expand curly braces (like file{one,two,three}name) */
84 static struct fn_list *
85 glob_debrace(struct fn *fnp)
86 {
87 	struct fn_list *ret = fn_list_new(NULL);
88 	struct fn_list *newret;
89 	char *sp = fn_s(fnp);
90 	char *left;
91 	char *right;
92 	char *comma;
93 
94 	/* start with an empty string in the list */
95 	fn_list_adds(ret, "");
96 
97 	/* while braces remain... */
98 	while (sp != NULL && (left = strchr(sp, '{')) != NULL)
99 		if ((right = strchr(left, '}')) == NULL) {
100 			err(EF_FILE|EF_JMP, "Missing }");
101 			fn_list_free(ret);
102 			return (NULL);
103 		} else {
104 			/* stuff before "left" is finished */
105 			fn_list_appendrange(ret, sp, left);
106 
107 			/* stuff after "right" still need processing */
108 			sp = right + 1;
109 
110 			if (left + 1 == right)
111 				continue;	/* just an empty {} */
112 
113 			/* stuff between "left" and "right" is comma-sep list */
114 			left++;
115 			newret = fn_list_new(NULL);
116 			while ((comma = strchr(left, ',')) != NULL) {
117 				struct fn_list *dup = fn_list_dup(ret);
118 
119 				/* stuff from left to comma is one variant */
120 				fn_list_appendrange(dup, left, comma);
121 				fn_list_addfn_list(newret, dup);
122 				left = comma + 1;
123 			}
124 			/* what's left is the last item in the list */
125 			fn_list_appendrange(ret, left, right);
126 			fn_list_addfn_list(newret, ret);
127 			ret = newret;
128 		}
129 
130 	/* anything remaining in "s" is finished */
131 	fn_list_appendrange(ret, sp, &sp[strlen(sp)]);
132 	return (ret);
133 }
134 
135 /* return true if filename contains any "magic" characters (*,?,[) */
136 static boolean_t
137 glob_magic(struct fn *fnp)
138 {
139 	char *s = fn_s(fnp);
140 
141 	for (; s != NULL && *s; s++)
142 		if (*s == '*' ||
143 		    *s == '?' ||
144 		    *s == '[')
145 			return (B_TRUE);
146 
147 	return (B_FALSE);
148 }
149 
150 /*
151  * glob_glob -- given a filename glob, return the list of matching filenames
152  *
153  * fn_setn() and fn_setstat() are called to set the "n" and stat information
154  * for the resulting filenames.
155  */
156 struct fn_list *
157 glob_glob(struct fn *fnp)
158 {
159 	struct fn_list *tmplist = glob_debrace(fnp);
160 	struct fn_list *ret;
161 	struct fn *nextfnp;
162 	struct fn *newfnp;
163 	int magic = 0;
164 
165 	/* debracing produced NULL list? */
166 	if (tmplist == NULL)
167 		return (NULL);
168 
169 	/* see if anything in list contains magic characters */
170 	fn_list_rewind(tmplist);
171 	while ((nextfnp = fn_list_next(tmplist)) != NULL)
172 		if (glob_magic(nextfnp)) {
173 			magic = 1;
174 			break;
175 		}
176 
177 	if (!magic)
178 		return (tmplist);	/* no globs to expand */
179 
180 	/* foreach name in the list, call glob_glob() to expand it */
181 	fn_list_rewind(tmplist);
182 	ret = fn_list_new(NULL);
183 	while ((nextfnp = fn_list_next(tmplist)) != NULL) {
184 		newfnp = glob_to_reglob(nextfnp);
185 		fn_list_addfn(ret, newfnp);
186 	}
187 	fn_list_free(tmplist);
188 	tmplist = ret;
189 	ret = glob_reglob_list(tmplist);
190 	fn_list_free(tmplist);
191 
192 	return (ret);
193 }
194 
195 /*
196  * glob_glob_list -- given a list of filename globs, return all matches
197  */
198 struct fn_list *
199 glob_glob_list(struct fn_list *fnlp)
200 {
201 	struct fn_list *ret = fn_list_new(NULL);
202 	struct fn *fnp;
203 
204 	fn_list_rewind(fnlp);
205 	while ((fnp = fn_list_next(fnlp)) != NULL)
206 		fn_list_addfn_list(ret, glob_glob(fnp));
207 	return (ret);
208 }
209 
210 /*
211  * glob_reglob -- given a filename reglob, return a list of matching filenames
212  *
213  * this routine does all the hard work in this module.
214  */
215 struct fn_list *
216 glob_reglob(struct fn *fnp)
217 {
218 	struct fn_list *ret = fn_list_new(NULL);
219 	struct fn_list *newret;
220 	struct fn *nextfnp;
221 	char *mys = STRDUP(fn_s(fnp));
222 	char *sp = mys;
223 	char *slash;
224 	int skipdotfiles;
225 	char *re;
226 	char ret0[MAXPATHLEN];
227 
228 
229 	/* start with the initial directory in the list */
230 	if (*sp == '/') {
231 		fn_list_adds(ret, "/");
232 		while (*sp == '/')
233 			sp++;
234 	} else
235 		fn_list_adds(ret, "./");
236 
237 	/* while components remain... */
238 	do {
239 		if ((slash = strchr(sp, '/')) != NULL) {
240 			*slash++ = '\0';
241 			/* skip superfluous slashes */
242 			while (*slash == '/')
243 				slash++;
244 		}
245 
246 		/* dot files are skipped unless a dot was specifically given */
247 		if (sp[0] == '\\' && sp[1] == '.')
248 			skipdotfiles = 0;
249 		else
250 			skipdotfiles = 1;
251 
252 		/* compile the regex */
253 		if ((re = regcmp("^", sp, "$", (char *)0)) == NULL)
254 			err(EF_FILE|EF_JMP, "regcmp failed on <%s>", sp);
255 
256 		/* apply regex to every filename we've matched so far */
257 		newret = fn_list_new(NULL);
258 		fn_list_rewind(ret);
259 		while ((nextfnp = fn_list_next(ret)) != NULL) {
260 			DIR *dirp;
261 			struct dirent *dp;
262 
263 			/* go through directory looking for matches */
264 			if ((dirp = opendir(fn_s(nextfnp))) == NULL)
265 				continue;
266 
267 			while ((dp = readdir(dirp)) != NULL) {
268 				if (skipdotfiles && dp->d_name[0] == '.')
269 					continue;
270 				*ret0 = '\0';
271 				if (regex(re, dp->d_name, ret0)) {
272 					struct fn *matchfnp = fn_dup(nextfnp);
273 					struct stat stbuf;
274 					int n;
275 
276 					fn_puts(matchfnp, dp->d_name);
277 
278 					if (stat(fn_s(matchfnp), &stbuf) < 0) {
279 						fn_free(matchfnp);
280 						continue;
281 					}
282 
283 					/* skip non-dirs if more components */
284 					if (slash &&
285 					    (stbuf.st_mode & S_IFMT) !=
286 					    S_IFDIR) {
287 						fn_free(matchfnp);
288 						continue;
289 					}
290 
291 					/*
292 					 * component matched, fill in "n"
293 					 * value, stat information, and
294 					 * append component to directory
295 					 * name just searched.
296 					 */
297 
298 					if (*ret0)
299 						n = atoi(ret0);
300 					else
301 						n = -1;
302 					fn_setn(matchfnp, n);
303 					fn_setstat(matchfnp, &stbuf);
304 
305 					if (slash)
306 						fn_putc(matchfnp, '/');
307 
308 					fn_list_addfn(newret, matchfnp);
309 				}
310 			}
311 			(void) closedir(dirp);
312 		}
313 		fn_list_free(ret);
314 		ret = newret;
315 		sp = slash;
316 	} while (slash);
317 
318 	FREE(mys);
319 
320 	return (ret);
321 }
322 
323 /* reglob a list of filenames */
324 static struct fn_list *
325 glob_reglob_list(struct fn_list *fnlp)
326 {
327 	struct fn_list *ret = fn_list_new(NULL);
328 	struct fn *fnp;
329 
330 	fn_list_rewind(fnlp);
331 	while ((fnp = fn_list_next(fnlp)) != NULL)
332 		fn_list_addfn_list(ret, glob_reglob(fnp));
333 	return (ret);
334 }
335 
336 /*
337  * glob_to_reglob -- convert a glob (*, ?, etc) to a reglob (.*, ., etc.)
338  */
339 struct fn *
340 glob_to_reglob(struct fn *fnp)
341 {
342 	int c;
343 	struct fn *ret = fn_new(NULL);
344 
345 	fn_rewind(fnp);
346 	while ((c = fn_getc(fnp)) != '\0')
347 		switch (c) {
348 		case '.':
349 		case '(':
350 		case ')':
351 		case '^':
352 		case '+':
353 		case '{':
354 		case '}':
355 		case '$':
356 			/* magic characters need backslash */
357 			fn_putc(ret, '\\');
358 			fn_putc(ret, c);
359 			break;
360 		case '?':
361 			/* change '?' to a single dot */
362 			fn_putc(ret, '.');
363 			break;
364 		case '*':
365 			/* change '*' to ".*" */
366 			fn_putc(ret, '.');
367 			fn_putc(ret, '*');
368 			break;
369 		default:
370 			fn_putc(ret, c);
371 		}
372 
373 	return (ret);
374 }
375 
376 #ifdef	TESTMODULE
377 
378 /*
379  * test main for glob module, usage: a.out [-r] [pattern...]
380  *	-r means the patterns are reglobs instead of globs
381  */
382 int
383 main(int argc, char *argv[])
384 {
385 	int i;
386 	int reglobs = 0;
387 	struct fn *argfnp = fn_new(NULL);
388 	struct fn *fnp;
389 	struct fn_list *fnlp;
390 
391 	err_init(argv[0]);
392 	setbuf(stdout, NULL);
393 
394 	for (i = 1; i < argc; i++) {
395 		if (strcmp(argv[i], "-r") == 0) {
396 			reglobs = 1;
397 			continue;
398 		}
399 
400 		if (SETJMP) {
401 			printf("    skipped due to errors\n");
402 			continue;
403 		} else {
404 			printf("<%s>:\n", argv[i]);
405 			fn_renew(argfnp, argv[i]);
406 			if (reglobs)
407 				fnlp = glob_reglob(argfnp);
408 			else
409 				fnlp = glob_glob(argfnp);
410 		}
411 
412 		fn_list_rewind(fnlp);
413 		while ((fnp = fn_list_next(fnlp)) != NULL)
414 			printf("    <%s>\n", fn_s(fnp));
415 
416 		printf("total size: %lld\n", fn_list_totalsize(fnlp));
417 
418 		while ((fnp = fn_list_popoldest(fnlp)) != NULL) {
419 			printf("    oldest <%s>\n", fn_s(fnp));
420 			fn_free(fnp);
421 		}
422 
423 		fn_list_free(fnlp);
424 	}
425 	fn_free(argfnp);
426 
427 	err_done(0);
428 	/* NOTREACHED */
429 	return (0);
430 }
431 
432 #endif	/* TESTMODULE */
433