xref: /illumos-gate/usr/src/cmd/filesync/files.c (revision 35a5a3587fd94b666239c157d3722745250ccbd7)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright (c) 1995 Sun Microsystems, Inc.  All Rights Reserved
24  *
25  * module:
26  *	files.c
27  *
28  * purpose:
29  *	routines to examine and manipulate file names
30  *
31  * contents:
32  *	qualify ... ensure that a name is fully qualified
33  *	expand  ... expand env variables within a string or file name
34  *	noblanks .. ensure that a name contains no embdded unescaped blanks
35  *	lex ....... a lexer that can handle escaped/embedded blanks
36  *	wildcards . see whether or not a name contains wild cards
37  *	prefix .... does one string begin with another
38  *	suffix .... does one string end with another
39  *	contains .. does one string contain another
40  *
41  *	cannonize (static) ...	compress redundant "." and ".." out of name
42  *
43  * notes:
44  *	we are interested in embedded blanks because international
45  *	character sets and non-unix file systems can both contain
46  *	the byte 0x20.  Thus, whenever we record a filename in
47  *	file, we must be careful to escape any embedded blanks that
48  *	cause trouble when we re-lex that file later.
49  */
50 #ident	"%W%	%E% SMI"
51 
52 #include <stdio.h>
53 #include <stdlib.h>
54 #include <string.h>
55 #include <ctype.h>
56 #include <unistd.h>
57 
58 #include "filesync.h"
59 #include "messages.h"
60 
61 static void cannonize(char *name);
62 
63 /*
64  * routine:
65  *	qualify
66  *
67  * purpose:
68  *	to fully qualify a name
69  *
70  * parameters:
71  *	name to be qualified
72  *
73  * returns:
74  *	either original pointer or copy to a new (malloced) buffer
75  *
76  * notes:
77  *	someday I may conclude that I should always make a copy
78  *	so that the caller can know that it is safe to free the parm
79  *
80  *	I thought about this and concluded that there is never a need
81  *	to fully qualify a string containing variables.  If the string
82  *	came from the command line, the variables were already expanded
83  *	and if it came from the rules data base it is required to already
84  *	be fully qualified.
85  */
86 char *
87 qualify(char *name)
88 {
89 	char namebuf[ MAX_PATH ];
90 
91 	/* in the simple case, the parameter is already there */
92 	if (*name == '/') {
93 		cannonize(name);
94 		return (name);
95 	}
96 
97 	/* things that begin with variables get the benefit of the doubt */
98 	if (*name == '$') {
99 		cannonize(name);
100 		return (name);
101 	}
102 
103 	/* start with the current working directory	*/
104 	if (getcwd(namebuf, sizeof (namebuf)) == 0) {
105 		fprintf(stderr, gettext(ERR_nocwd), name);
106 		exit(ERR_OTHER);
107 	}
108 
109 	/* make sure we have room for our file name	*/
110 	if ((strlen(namebuf) + strlen(name) + 2) >= sizeof (namebuf)) {
111 		fprintf(stderr, gettext(ERR_longname), name);
112 		exit(ERR_OTHER);
113 	}
114 
115 	/* append the specified file name to it	*/
116 	strcat(namebuf, "/");
117 	strcat(namebuf, name);
118 
119 	/* filter out redundant dots	*/
120 	cannonize(namebuf);
121 
122 	if (opt_debug & DBG_VARS)
123 		fprintf(stderr, "VARS: QUALIFY %s to %s\n", name, namebuf);
124 
125 	/* and return a newly malloc'd copy	*/
126 	return (strdup(namebuf));
127 }
128 
129 /*
130  * routine:
131  *	expand
132  *
133  * purpose:
134  *	to expand variable names within a string
135  *
136  * parameters:
137  *	string to be expanded.  Variable references always begin
138  *	with a $ and are delimited by parens or curleys.
139  *
140  * returns:
141  *	either original pointer or a copy to a new (malloced) buffer
142  *
143  * notes:
144  *	someday I may conclude that I should always make a copy
145  *	so that the caller can know that it is safe to free the parm
146  *
147  *	someday I may decide to support escape conventions for embedding
148  *	$(){} in file names, but I suspec that day will never come.
149  *
150  *	I thought about this and concluded there was no reason to
151  *	fully qualify these names, because the only names that should
152  *	need qualification are src/dst lines from the command line,
153  *	and the shell should have handled those for me.  Once something
154  *	makes it into the database, it is expected to be fully qualified
155  *	already.
156  *
157  *	We are limited to producing strings of length MAX_PATH or less
158  *	and variable names of length MAX_NAME or less.  In practice,
159  *	these limitations should not be a problem.
160  */
161 char *
162 expand(char *name)
163 {	const char *s;
164 	char *p, *v;
165 	char delim;
166 	char namebuf[ MAX_PATH ];
167 	char varbuf[ MAX_NAME ];
168 
169 	/* first see if there are no variables to be bound */
170 	for (s = name; *s && *s != '$'; s++);
171 	if (*s == 0)
172 		return (name);
173 
174 	/* move through the string, copying and expanding	*/
175 	for (s = name, p = namebuf; *s; s++) {
176 
177 		/* check for overflow	*/
178 		if (p >= &namebuf[ MAX_PATH ]) {
179 			fprintf(stderr, gettext(ERR_longname), name);
180 			exit(ERR_OTHER);
181 		}
182 
183 		/* normal characters, we just copy		*/
184 		if (*s != '$') {
185 			*p++ = *s;
186 			continue;
187 		}
188 
189 		/* figure out how the variable name is delimited */
190 		delim = *++s;
191 		if (delim == '(') {
192 			delim = ')';
193 			s++;
194 		} else if (delim == '{') {
195 			delim = '}';
196 			s++;
197 		} else
198 			delim = 0;
199 
200 		/* copy the variable name up to the closing delimiter */
201 		for (v = varbuf; *s; s++) {
202 			if (isalnum(*s) || (*s == '_') ||
203 				(delim && *s != delim))
204 				*v++ = *s;
205 			else
206 				break;
207 
208 			/* make sure we don't overflow var name buffer	*/
209 			if (v >= &varbuf[MAX_NAME - 1]) {
210 				*v = 0;
211 				fprintf(stderr, gettext(ERR_longname), varbuf);
212 				exit(ERR_OTHER);
213 			}
214 		}
215 
216 		*v = 0;
217 
218 		/* FIX THIS ... there must be a more elegant way */
219 		/* we may have to back up because s will be bumped */
220 		if (delim == 0 || *s != delim)
221 			s--;
222 
223 		/* look up the variable 			*/
224 		v = getenv(varbuf);
225 		if (v == 0 || *v == 0) {
226 			fprintf(stderr, gettext(ERR_undef), varbuf);
227 			return (0);
228 		}
229 
230 		/* copy the variable into the buffer		*/
231 		while (*v)
232 			*p++ = *v++;
233 	}
234 
235 	/* null terminate the copy	*/
236 	*p = 0;
237 
238 	/* compress out any redundant dots and dot-dots	*/
239 	cannonize(namebuf);
240 
241 	if (opt_debug & DBG_VARS)
242 		fprintf(stderr, "VARS: EXPAND %s to %s\n", name, namebuf);
243 
244 	/* and return a newly malloc'd copy	*/
245 	return (strdup(namebuf));
246 }
247 
248 /*
249  * routine:
250  *	noblanks
251  *
252  * purpose:
253  *	to ensure that a name contains no unescaped embedded blanks
254  *
255  * parameters:
256  *	pointer to name
257  *
258  * returns:
259  *	pointer to name or pointer to buffer containing escaped version of name
260  *
261  * notes:
262  *	this routine can be called on full file names, and so can
263  *	conceivably require an arbitrarily large buffer.
264  */
265 const char *
266 noblanks(const char *name)
267 {
268 	const char *s;
269 	char *p;
270 	static char *namebuf = 0;
271 	static int buflen = 0;
272 	int l;
273 
274 	/* first see if there are no embedded blanks	*/
275 	for (s = name; *s && *s != ' '; s++);
276 	if (*s == 0)
277 		return (name);
278 
279 	/* make sure we have a buffer large enough for the worst case	*/
280 	l = 4 + (2*strlen(name));
281 	for (buflen = MAX_PATH; buflen < l; buflen += MAX_NAME);
282 	namebuf = (char *) realloc(namebuf, buflen);
283 
284 	/* quote the name, and copy it, escaping quotes	*/
285 	p = namebuf;
286 	*p++ = '"';
287 
288 	for (s = name; *s; s++) {
289 		if (*s == '"' || *s == '\\')
290 			*p++ = '\\';
291 		*p++ = *s;
292 	}
293 
294 	*p++ = '"';
295 	*p = 0;
296 
297 	return (namebuf);
298 }
299 
300 /*
301  * routine:
302  *	lex
303  *
304  * purpose:
305  *	my own version of strtok that handles quoting and escaping
306  *
307  * parameters:
308  *	FILE structure for file to read (0 for same string, same file)
309  *
310  * returns:
311  *	pointer to next token
312  *
313  * notes:
314  *	this routine makes no changes to the string it is passed,
315  *	copying tokens into a static buffer.
316  *
317  *	this routine handles continuation lines after reading and
318  *	before the lexing even starts.  This limits continued lines
319  *	to a length of MAX_LINE, but keeps everything else very simple.
320  *	We also, therefore, limit tokens to a maximum length of MAX_LINE.
321  */
322 int lex_linenum;		/* line number in current input file	*/
323 
324 char *
325 lex(FILE *file)
326 {	char c, delim;
327 	char *p;
328 	char *s;
329 	static char *savep;
330 	static char namebuf[ MAX_LINE ];
331 	static char inbuf[ MAX_LINE ];
332 
333 	if (file) {			/* read a new line		*/
334 		p = inbuf + sizeof (inbuf);
335 
336 		/* read the next input line, with all continuations	*/
337 		for (s = inbuf; savep = fgets(s, p - s, file); ) {
338 			lex_linenum++;
339 
340 			/* go find the last character of the input line	*/
341 			while (*s && s[1])
342 				s++;
343 			if (*s == '\n')
344 				s--;
345 
346 			/* see whether or not we need a continuation	*/
347 			if (s < inbuf || *s != '\\')
348 				break;
349 
350 			continue;
351 		}
352 
353 		if (savep == 0)
354 			return (0);
355 
356 		s = inbuf;
357 	} else {			/* continue with old line	*/
358 		if (savep == 0)
359 			return (0);
360 		s = savep;
361 	}
362 	savep = 0;
363 
364 	/* skip over leading white space	*/
365 	while (isspace(*s))
366 		s++;
367 	if (*s == 0)
368 		return (0);
369 
370 	/* see if this is a quoted string	*/
371 	c = *s;
372 	if (c == '\'' || c == '"') {
373 		delim = c;
374 		s++;
375 	} else
376 		delim = 0;
377 
378 	/* copy the token into the buffer	*/
379 	for (p = namebuf; (c = *s) != 0; s++) {
380 		/* literal escape		*/
381 		if (c == '\\') {
382 			s++;
383 			*p++ = *s;
384 			continue;
385 		}
386 
387 		/* closing delimiter		*/
388 		if (c == delim) {
389 			s++;
390 			break;
391 		}
392 
393 		/* delimiting white space	*/
394 		if (delim == 0 && isspace(c))
395 			break;
396 
397 		/* ordinary characters		*/
398 		*p++ = *s;
399 	}
400 
401 
402 	/* remember where we left off		*/
403 	savep = *s ? s : 0;
404 
405 	/* null terminate and return the buffer	*/
406 	*p = 0;
407 	return (namebuf);
408 }
409 
410 /*
411  * routine:
412  *	wildcards
413  *
414  * purpose:
415  *	determine whether or not there are any wild cards in a name
416  *
417  * parameters:
418  *	name to be checked
419  *
420  * returns:
421  *	true/false
422  *
423  * notes:
424  *	we use this to take shortcuts
425  */
426 bool_t
427 wildcards(const char *name)
428 {	const char *s;
429 	int literal = 0;
430 
431 	for (s = name; *s; s++)
432 		if (literal)
433 			switch (*s) {
434 				case '\'':	/* end of literal string */
435 					literal = 0;
436 					continue;
437 				case '\\':	/* escape next character */
438 					s++;
439 					continue;
440 			}
441 		else
442 			switch (*s) {
443 				case '\'':	/* literal string	*/
444 					literal = 1;
445 					continue;
446 				case '\\':	/* escape next character */
447 					s++;
448 					continue;
449 				case '*':
450 				case '[':
451 				case '{':
452 				case '?':
453 					/* any of these is a wild card	*/
454 					return (TRUE);
455 			}
456 
457 	return (FALSE);
458 }
459 
460 /*
461  * routine:
462  *	cannonize
463  *
464  * purpose:
465  *	to compress redundant dots out of a path
466  *
467  * parameters:
468  *	file name in an editable buffer
469  *
470  * returns:
471  *	void
472  *
473  * notes:
474  *	because we compress the string in place, there is no danger
475  *	of our overflowing any fixed sized buffer.
476  */
477 static void
478 cannonize(char *name)
479 {	char *s, *p;
480 
481 	/* leading dot-slashes	*/
482 	for (s = name; *s == '.' && s[1] == '/'; strcpy(s, &s[2]));
483 
484 	for (s = name; *s; s++) {
485 		/* interesting things happen after slashes	*/
486 		if (*s != '/')
487 			continue;
488 
489 		/* embedded dot-slashes */
490 		while (s[1] == '.' && s[2] == '/')
491 			strcpy(&s[1], &s[3]);
492 
493 		/* embedded slash-dot-dot-slash	*/
494 		if (strncmp(s, "/../", 4) == 0) {
495 			/* scan backwards to eliminate last directory */
496 			for (p = s-1; p > name && *p != '/'; p--);
497 
498 			if (p < name)
499 				p = name;
500 			strcpy(p, &s[3]);
501 		}
502 
503 		continue;
504 	}
505 }
506 
507 /*
508  * routine:
509  *	prefix
510  *
511  * purpose:
512  *	determine whether or not one string begins with another
513  *
514  * parameters:
515  *	string to be tested
516  *	suspected prefix
517  *
518  * returns:
519  *	no	0
520  *	yes	pointer character after prefix
521  */
522 const char *
523 prefix(const char *s, const char *p)
524 {
525 	while (*p)
526 		if (*p++ != *s++)
527 			return (0);
528 
529 	return (s);
530 }
531 
532 /*
533  * routine:
534  *	suffix
535  *
536  * purpose:
537  *	determine whether or not one string ends with another
538  *
539  * parameters:
540  *	string to be tested
541  *	suspected suffix
542  *
543  * returns:
544  *	true/false
545  */
546 bool_t
547 suffix(const char *str, const char *suf)
548 {	const char *s;
549 
550 	/* go to where the alleged suffix would start */
551 	for (s = str; *s; s++);
552 	s -= strlen(suf);
553 	if (s < str)
554 		return (FALSE);
555 
556 	/* see if the string ends with the suffix */
557 	while (*suf)
558 		if (*suf++ != *s++)
559 			return (FALSE);
560 
561 	return (TRUE);
562 }
563 
564 /*
565  * routine:
566  *	contains
567  *
568  * purpose:
569  *	determine whether or not one string contains another
570  *
571  * parameters:
572  *	string to be checked
573  *	pattern we are seeking
574  *
575  * returns:
576  *	true/false
577  */
578 bool_t
579 contains(const char *str, const char *pat)
580 {	const char *s, *p;
581 
582 	while (*str) {
583 		if (*str++ == *pat) {
584 			for (s = str, p = &pat[1]; *s == *p; s++, p++)
585 				if (p[1] == 0)
586 					return (TRUE);
587 		}
588 	}
589 
590 	return (FALSE);
591 }
592