1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 *
25 * logadm/glob.c -- globbing routines
26 *
27 * these routines support two kinds of globs. first, the
28 * usual kind of filename globbing, like:
29 *
30 * *.c
31 * /var/log/syslog.?
32 * log[0-9]*file
33 * /var/apache/logs/x*{access,error}_log
34 *
35 * this is basically the same syntax that csh supports for globs and
36 * is provided by the routine glob_glob() which takes a filename and
37 * returns a list of filenames that match the glob.
38 *
39 * the second type is something called a "reglob" which is a pathname
40 * where the components are regular expressions as described in regex(3c).
41 * some examples:
42 *
43 * .*\.c
44 * /var/log/syslog\..
45 * log[0-9].*file
46 * /var/log/syslog\.([0-9]+)$0
47 *
48 * the last example uses the ()$n form to assign a numeric extension
49 * on a filename to the "n" value kept by the fn routines with each
50 * filename (see fn_setn() in fn.c). logadm uses this mechanism to
51 * correctly sort lognames when templates containing $n are used.
52 *
53 * the routine glob_reglob() is used to expand reglobs. glob_glob()
54 * is implemented by expanding the curly braces, converting the globs
55 * to reglobs, and then passing the work to glob_reglob().
56 *
57 * finally, since expanding globs and reglobs requires doing a stat(2)
58 * on the files, we store the resulting stat information in the filename
59 * struct (see fn_setstat() in fn.c).
60 *
61 * the glob(3c) routines are not used here since they don't support
62 * braces, and don't support the more powerful reglobs required by logadm.
63 */
64
65 #include <stdio.h>
66 #include <libintl.h>
67 #include <stdlib.h>
68 #include <libgen.h>
69 #include <strings.h>
70 #include <sys/types.h>
71 #include <sys/param.h>
72 #include <sys/stat.h>
73 #include <dirent.h>
74 #include "err.h"
75 #include "fn.h"
76 #include "glob.h"
77
78 /* forward declarations for functions used internally by this module */
79 static struct fn_list *glob_debrace(struct fn *fnp);
80 static struct fn_list *glob_reglob_list(struct fn_list *fnlp);
81 static boolean_t glob_magic(struct fn *fnp);
82
83 /* expand curly braces (like file{one,two,three}name) */
84 static struct fn_list *
glob_debrace(struct fn * fnp)85 glob_debrace(struct fn *fnp)
86 {
87 struct fn_list *ret = fn_list_new(NULL);
88 struct fn_list *newret;
89 char *sp = fn_s(fnp);
90 char *left;
91 char *right;
92 char *comma;
93
94 /* start with an empty string in the list */
95 fn_list_adds(ret, "");
96
97 /* while braces remain... */
98 while (sp != NULL && (left = strchr(sp, '{')) != NULL)
99 if ((right = strchr(left, '}')) == NULL) {
100 err(EF_FILE|EF_JMP, "Missing }");
101 fn_list_free(ret);
102 return (NULL);
103 } else {
104 /* stuff before "left" is finished */
105 fn_list_appendrange(ret, sp, left);
106
107 /* stuff after "right" still need processing */
108 sp = right + 1;
109
110 if (left + 1 == right)
111 continue; /* just an empty {} */
112
113 /* stuff between "left" and "right" is comma-sep list */
114 left++;
115 newret = fn_list_new(NULL);
116 while ((comma = strchr(left, ',')) != NULL) {
117 struct fn_list *dup = fn_list_dup(ret);
118
119 /* stuff from left to comma is one variant */
120 fn_list_appendrange(dup, left, comma);
121 fn_list_addfn_list(newret, dup);
122 left = comma + 1;
123 }
124 /* what's left is the last item in the list */
125 fn_list_appendrange(ret, left, right);
126 fn_list_addfn_list(newret, ret);
127 ret = newret;
128 }
129
130 /* anything remaining in "s" is finished */
131 fn_list_appendrange(ret, sp, &sp[strlen(sp)]);
132 return (ret);
133 }
134
135 /* return true if filename contains any "magic" characters (*,?,[) */
136 static boolean_t
glob_magic(struct fn * fnp)137 glob_magic(struct fn *fnp)
138 {
139 char *s = fn_s(fnp);
140
141 for (; s != NULL && *s; s++)
142 if (*s == '*' ||
143 *s == '?' ||
144 *s == '[')
145 return (B_TRUE);
146
147 return (B_FALSE);
148 }
149
150 /*
151 * glob_glob -- given a filename glob, return the list of matching filenames
152 *
153 * fn_setn() and fn_setstat() are called to set the "n" and stat information
154 * for the resulting filenames.
155 */
156 struct fn_list *
glob_glob(struct fn * fnp)157 glob_glob(struct fn *fnp)
158 {
159 struct fn_list *tmplist = glob_debrace(fnp);
160 struct fn_list *ret;
161 struct fn *nextfnp;
162 struct fn *newfnp;
163 int magic = 0;
164
165 /* debracing produced NULL list? */
166 if (tmplist == NULL)
167 return (NULL);
168
169 /* see if anything in list contains magic characters */
170 fn_list_rewind(tmplist);
171 while ((nextfnp = fn_list_next(tmplist)) != NULL)
172 if (glob_magic(nextfnp)) {
173 magic = 1;
174 break;
175 }
176
177 if (!magic)
178 return (tmplist); /* no globs to expand */
179
180 /* foreach name in the list, call glob_glob() to expand it */
181 fn_list_rewind(tmplist);
182 ret = fn_list_new(NULL);
183 while ((nextfnp = fn_list_next(tmplist)) != NULL) {
184 newfnp = glob_to_reglob(nextfnp);
185 fn_list_addfn(ret, newfnp);
186 }
187 fn_list_free(tmplist);
188 tmplist = ret;
189 ret = glob_reglob_list(tmplist);
190 fn_list_free(tmplist);
191
192 return (ret);
193 }
194
195 /*
196 * glob_glob_list -- given a list of filename globs, return all matches
197 */
198 struct fn_list *
glob_glob_list(struct fn_list * fnlp)199 glob_glob_list(struct fn_list *fnlp)
200 {
201 struct fn_list *ret = fn_list_new(NULL);
202 struct fn *fnp;
203
204 fn_list_rewind(fnlp);
205 while ((fnp = fn_list_next(fnlp)) != NULL)
206 fn_list_addfn_list(ret, glob_glob(fnp));
207 return (ret);
208 }
209
210 /*
211 * glob_reglob -- given a filename reglob, return a list of matching filenames
212 *
213 * this routine does all the hard work in this module.
214 */
215 struct fn_list *
glob_reglob(struct fn * fnp)216 glob_reglob(struct fn *fnp)
217 {
218 struct fn_list *ret = fn_list_new(NULL);
219 struct fn_list *newret;
220 struct fn *nextfnp;
221 char *mys = STRDUP(fn_s(fnp));
222 char *sp = mys;
223 char *slash;
224 int skipdotfiles;
225 char *re;
226 char ret0[MAXPATHLEN];
227
228
229 /* start with the initial directory in the list */
230 if (*sp == '/') {
231 fn_list_adds(ret, "/");
232 while (*sp == '/')
233 sp++;
234 } else
235 fn_list_adds(ret, "./");
236
237 /* while components remain... */
238 do {
239 if ((slash = strchr(sp, '/')) != NULL) {
240 *slash++ = '\0';
241 /* skip superfluous slashes */
242 while (*slash == '/')
243 slash++;
244 }
245
246 /* dot files are skipped unless a dot was specifically given */
247 if (sp[0] == '\\' && sp[1] == '.')
248 skipdotfiles = 0;
249 else
250 skipdotfiles = 1;
251
252 /* compile the regex */
253 if ((re = regcmp("^", sp, "$", (char *)0)) == NULL)
254 err(EF_FILE|EF_JMP, "regcmp failed on <%s>", sp);
255
256 /* apply regex to every filename we've matched so far */
257 newret = fn_list_new(NULL);
258 fn_list_rewind(ret);
259 while ((nextfnp = fn_list_next(ret)) != NULL) {
260 DIR *dirp;
261 struct dirent *dp;
262
263 /* go through directory looking for matches */
264 if ((dirp = opendir(fn_s(nextfnp))) == NULL)
265 continue;
266
267 while ((dp = readdir(dirp)) != NULL) {
268 if (skipdotfiles && dp->d_name[0] == '.')
269 continue;
270 *ret0 = '\0';
271 if (regex(re, dp->d_name, ret0)) {
272 struct fn *matchfnp = fn_dup(nextfnp);
273 struct stat stbuf;
274 int n;
275
276 fn_puts(matchfnp, dp->d_name);
277
278 if (stat(fn_s(matchfnp), &stbuf) < 0) {
279 fn_free(matchfnp);
280 continue;
281 }
282
283 /* skip non-dirs if more components */
284 if (slash &&
285 (stbuf.st_mode & S_IFMT) !=
286 S_IFDIR) {
287 fn_free(matchfnp);
288 continue;
289 }
290
291 /*
292 * component matched, fill in "n"
293 * value, stat information, and
294 * append component to directory
295 * name just searched.
296 */
297
298 if (*ret0)
299 n = atoi(ret0);
300 else
301 n = -1;
302 fn_setn(matchfnp, n);
303 fn_setstat(matchfnp, &stbuf);
304
305 if (slash)
306 fn_putc(matchfnp, '/');
307
308 fn_list_addfn(newret, matchfnp);
309 }
310 }
311 (void) closedir(dirp);
312 }
313 fn_list_free(ret);
314 ret = newret;
315 sp = slash;
316 } while (slash);
317
318 FREE(mys);
319
320 return (ret);
321 }
322
323 /* reglob a list of filenames */
324 static struct fn_list *
glob_reglob_list(struct fn_list * fnlp)325 glob_reglob_list(struct fn_list *fnlp)
326 {
327 struct fn_list *ret = fn_list_new(NULL);
328 struct fn *fnp;
329
330 fn_list_rewind(fnlp);
331 while ((fnp = fn_list_next(fnlp)) != NULL)
332 fn_list_addfn_list(ret, glob_reglob(fnp));
333 return (ret);
334 }
335
336 /*
337 * glob_to_reglob -- convert a glob (*, ?, etc) to a reglob (.*, ., etc.)
338 */
339 struct fn *
glob_to_reglob(struct fn * fnp)340 glob_to_reglob(struct fn *fnp)
341 {
342 int c;
343 struct fn *ret = fn_new(NULL);
344
345 fn_rewind(fnp);
346 while ((c = fn_getc(fnp)) != '\0')
347 switch (c) {
348 case '.':
349 case '(':
350 case ')':
351 case '^':
352 case '+':
353 case '{':
354 case '}':
355 case '$':
356 /* magic characters need backslash */
357 fn_putc(ret, '\\');
358 fn_putc(ret, c);
359 break;
360 case '?':
361 /* change '?' to a single dot */
362 fn_putc(ret, '.');
363 break;
364 case '*':
365 /* change '*' to ".*" */
366 fn_putc(ret, '.');
367 fn_putc(ret, '*');
368 break;
369 default:
370 fn_putc(ret, c);
371 }
372
373 return (ret);
374 }
375
376 #ifdef TESTMODULE
377
378 /*
379 * test main for glob module, usage: a.out [-r] [pattern...]
380 * -r means the patterns are reglobs instead of globs
381 */
382 int
main(int argc,char * argv[])383 main(int argc, char *argv[])
384 {
385 int i;
386 int reglobs = 0;
387 struct fn *argfnp = fn_new(NULL);
388 struct fn *fnp;
389 struct fn_list *fnlp;
390
391 err_init(argv[0]);
392 setbuf(stdout, NULL);
393
394 for (i = 1; i < argc; i++) {
395 if (strcmp(argv[i], "-r") == 0) {
396 reglobs = 1;
397 continue;
398 }
399
400 if (SETJMP) {
401 printf(" skipped due to errors\n");
402 continue;
403 } else {
404 printf("<%s>:\n", argv[i]);
405 fn_renew(argfnp, argv[i]);
406 if (reglobs)
407 fnlp = glob_reglob(argfnp);
408 else
409 fnlp = glob_glob(argfnp);
410 }
411
412 fn_list_rewind(fnlp);
413 while ((fnp = fn_list_next(fnlp)) != NULL)
414 printf(" <%s>\n", fn_s(fnp));
415
416 printf("total size: %lld\n", fn_list_totalsize(fnlp));
417
418 while ((fnp = fn_list_popoldest(fnlp)) != NULL) {
419 printf(" oldest <%s>\n", fn_s(fnp));
420 fn_free(fnp);
421 }
422
423 fn_list_free(fnlp);
424 }
425 fn_free(argfnp);
426
427 err_done(0);
428 /* NOTREACHED */
429 return (0);
430 }
431
432 #endif /* TESTMODULE */
433