xref: /illumos-gate/usr/src/cmd/bart/create.c (revision 3e95bd4ab92abca814bd28e854607d1975c7dc88)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
23  */
24 
25 #include <signal.h>
26 #include <unistd.h>
27 #include <sys/acl.h>
28 #include <sys/statvfs.h>
29 #include <sys/wait.h>
30 #include "bart.h"
31 #include <aclutils.h>
32 
33 static int	sanitize_reloc_root(char *root, size_t bufsize);
34 static int	create_manifest_filelist(char **argv, char *reloc_root);
35 static int	create_manifest_rule(char *reloc_root, FILE *rule_fp);
36 static void	output_manifest(void);
37 static int	eval_file(const char *fname, const struct stat64 *statb);
38 static char	*sanitized_fname(const char *, boolean_t);
39 static char	*get_acl_string(const char *fname, const struct stat64 *statb,
40     int *err_code);
41 static int	generate_hash(int fdin, char *hash_str);
42 static int	read_filelist(char *reloc_root, char **argv, char *buf,
43     size_t bufsize);
44 static int	walker(const char *name, const struct stat64 *sp,
45     int type, struct FTW *ftwx);
46 
47 /*
48  * The following globals are necessary due to the "walker" function
49  * provided by nftw().  Since there is no way to pass them through to the
50  * walker function, they must be global.
51  */
52 static int		compute_chksum = 1, eval_err = 0;
53 static struct rule	*subtree_root;
54 static char		reloc_root[PATH_MAX];
55 static struct statvfs64	parent_vfs;
56 
57 int
58 bart_create(int argc, char **argv)
59 {
60 	boolean_t	filelist_input;
61 	int		ret, c, output_pipe[2];
62 	FILE 		*rules_fd = NULL;
63 	pid_t		pid;
64 
65 	filelist_input = B_FALSE;
66 	reloc_root[0] = '\0';
67 
68 	while ((c = getopt(argc, argv, "Inr:R:")) != EOF) {
69 		switch (c) {
70 		case 'I':
71 			if (rules_fd != NULL) {
72 				(void) fprintf(stderr, "%s", INPUT_ERR);
73 				usage();
74 			}
75 			filelist_input = B_TRUE;
76 			break;
77 
78 		case 'n':
79 			compute_chksum = 0;
80 			break;
81 
82 		case 'r':
83 			if (strcmp(optarg, "-") == 0)
84 				rules_fd = stdin;
85 			else
86 				rules_fd = fopen(optarg, "r");
87 			if (rules_fd == NULL) {
88 				perror(optarg);
89 				usage();
90 			}
91 			break;
92 
93 		case 'R':
94 			(void) strlcpy(reloc_root, optarg, sizeof (reloc_root));
95 			ret = sanitize_reloc_root(reloc_root,
96 			    sizeof (reloc_root));
97 			if (ret == 0)
98 				usage();
99 			break;
100 
101 		case '?':
102 		default :
103 			usage();
104 		}
105 	}
106 	argv += optind;
107 
108 	if (pipe(output_pipe) < 0) {
109 		perror("");
110 		exit(FATAL_EXIT);
111 	}
112 
113 	pid = fork();
114 	if (pid < 0) {
115 		perror(NULL);
116 		exit(FATAL_EXIT);
117 	}
118 
119 	/*
120 	 * Break the creation of a manifest into two parts: the parent process
121 	 * generated the data whereas the child process sorts the data.
122 	 *
123 	 * The processes communicate through the pipe.
124 	 */
125 	if (pid > 0) {
126 		/*
127 		 * Redirect the stdout of this process so it goes into
128 		 * output_pipe[0].  The output of this process will be read
129 		 * by the child, which will sort the output.
130 		 */
131 		if (dup2(output_pipe[0], STDOUT_FILENO) != STDOUT_FILENO) {
132 			perror(NULL);
133 			exit(FATAL_EXIT);
134 		}
135 		(void) close(output_pipe[0]);
136 		(void) close(output_pipe[1]);
137 
138 		if (filelist_input == B_TRUE) {
139 			ret = create_manifest_filelist(argv, reloc_root);
140 		} else {
141 			ret = create_manifest_rule(reloc_root, rules_fd);
142 		}
143 
144 		/* Close stdout so the sort in the child proc will complete */
145 		(void) fclose(stdout);
146 	} else {
147 		/*
148 		 * Redirect the stdin of this process so its read in from
149 		 * the pipe, which is the parent process in this case.
150 		 */
151 		if (dup2(output_pipe[1], STDIN_FILENO) != STDIN_FILENO) {
152 			perror(NULL);
153 			exit(FATAL_EXIT);
154 		}
155 		(void) close(output_pipe[0]);
156 
157 		output_manifest();
158 	}
159 
160 	/* Wait for the child proc (the sort) to complete */
161 	(void) wait(0);
162 
163 	return (ret);
164 }
165 
166 /*
167  * Handle the -R option and sets 'root' to be the absolute path of the
168  * relocatable root.  This is useful when the user specifies '-R ../../foo'.
169  *
170  * Return code is whether or not the location spec'd by the -R flag is a
171  * directory or not.
172  */
173 static int
174 sanitize_reloc_root(char *root, size_t bufsize)
175 {
176 	char		pwd[PATH_MAX];
177 
178 	/*
179 	 * First, save the current directory and go to the location
180 	 * specified with the -R option.
181 	 */
182 	(void) getcwd(pwd, sizeof (pwd));
183 	if (chdir(root) < 0) {
184 		/* Failed to change directory, something is wrong.... */
185 		perror(root);
186 		return (0);
187 	}
188 
189 	/*
190 	 * Save the absolute path of the relocatable root directory.
191 	 */
192 	(void) getcwd(root, bufsize);
193 
194 	/*
195 	 * Now, go back to where we started, necessary for picking up a rules
196 	 * file.
197 	 */
198 	if (chdir(pwd) < 0) {
199 		/* Failed to change directory, something is wrong.... */
200 		perror(root);
201 		return (0);
202 	}
203 
204 	/*
205 	 * Make sure the path returned does not have a trailing /. This
206 	 * can only happen when the entire pathname is "/".
207 	 */
208 	if (strcmp(root, "/") == 0)
209 		root[0] = '\0';
210 
211 	/*
212 	 * Since the earlier chdir() succeeded, return success.
213 	 */
214 	return (1);
215 }
216 
217 /*
218  * This is the worker bee which creates the manifest based upon the command
219  * line options supplied by the user.
220  *
221  * NOTE: create_manifest() eventually outputs data to a pipe, which is read in
222  * by the child process.  The child process is running output_manifest(), which
223  * is responsible for generating sorted output.
224  */
225 static int
226 create_manifest_rule(char *reloc_root, FILE *rule_fp)
227 {
228 	struct rule	*root;
229 	int		ret_status = EXIT;
230 	uint_t		flags;
231 
232 	if (compute_chksum)
233 		flags = ATTR_CONTENTS;
234 	else
235 		flags = 0;
236 	ret_status = read_rules(rule_fp, reloc_root, flags, 1);
237 
238 	/* Loop through every single subtree */
239 	for (root = get_first_subtree(); root != NULL;
240 	    root = get_next_subtree(root)) {
241 
242 		/*
243 		 * Check to see if this subtree should have contents
244 		 * checking turned on or off.
245 		 *
246 		 * NOTE: The 'compute_chksum' and 'parent_vfs'
247 		 * are a necessary hack: the variables are used in
248 		 * walker(), both directly and indirectly.  Since
249 		 * the parameters to walker() are defined by nftw(),
250 		 * the globals are really a backdoor mechanism.
251 		 */
252 		ret_status = statvfs64(root->subtree, &parent_vfs);
253 		if (ret_status < 0) {
254 			perror(root->subtree);
255 			continue;
256 		}
257 
258 		/*
259 		 * Walk the subtree and invoke the callback function walker()
260 		 * Use FTW_ANYERR to get FTW_NS and FTW_DNR entries *and*
261 		 * to continue past those errors.
262 		 */
263 		subtree_root = root;
264 		(void) nftw64(root->subtree, &walker, 20, FTW_PHYS|FTW_ANYERR);
265 
266 		/*
267 		 * Ugly but necessary:
268 		 *
269 		 * walker() must return 0, or the tree walk will stop,
270 		 * so warning flags must be set through a global.
271 		 */
272 		if (eval_err == WARNING_EXIT)
273 			ret_status = WARNING_EXIT;
274 
275 	}
276 	return (ret_status);
277 }
278 
279 static int
280 create_manifest_filelist(char **argv, char *reloc_root)
281 {
282 	int	ret_status = EXIT;
283 	char	input_fname[PATH_MAX];
284 
285 	while (read_filelist(reloc_root, argv,
286 	    input_fname, sizeof (input_fname)) != -1) {
287 
288 		struct stat64	stat_buf;
289 		int		ret;
290 
291 		ret = lstat64(input_fname, &stat_buf);
292 		if (ret < 0) {
293 			ret_status = WARNING_EXIT;
294 			perror(input_fname);
295 		} else {
296 			ret = eval_file(input_fname, &stat_buf);
297 
298 			if (ret == WARNING_EXIT)
299 				ret_status = WARNING_EXIT;
300 		}
301 	}
302 
303 	return (ret_status);
304 }
305 
306 /*
307  * output_manifest() the child process.  It reads in the output from
308  * create_manifest() and sorts it.
309  */
310 static void
311 output_manifest(void)
312 {
313 	char	*env[] = {"LC_CTYPE=C", "LC_COLLATE=C", "LC_NUMERIC=C", NULL};
314 	time_t		time_val;
315 	struct tm	*tm;
316 	char		time_buf[1024];
317 
318 	(void) printf("%s", MANIFEST_VER);
319 	time_val = time((time_t)0);
320 	tm = localtime(&time_val);
321 	(void) strftime(time_buf, sizeof (time_buf), "%A, %B %d, %Y (%T)", tm);
322 	(void) printf("! %s\n", time_buf);
323 	(void) printf("%s", FORMAT_STR);
324 	(void) fflush(stdout);
325 	/*
326 	 * Simply run sort and read from the the current stdin, which is really
327 	 * the output of create_manifest().
328 	 * Also, make sure the output is unique, since a given file may be
329 	 * included by several stanzas.
330 	 */
331 	if (execle("/usr/bin/sort", "sort", "-u", NULL, env) < 0) {
332 		perror("");
333 		exit(FATAL_EXIT);
334 	}
335 
336 	/*NOTREACHED*/
337 }
338 
339 /*
340  * Callback function for nftw()
341  */
342 static int
343 walker(const char *name, const struct stat64 *sp, int type, struct FTW *ftwx)
344 {
345 	int			ret;
346 	struct statvfs64	path_vfs;
347 	boolean_t		dir_flag = B_FALSE;
348 	struct rule		*rule;
349 
350 	switch (type) {
351 	case FTW_F:	/* file 		*/
352 		rule = check_rules(name, 'F');
353 		if (rule != NULL) {
354 			if (rule->attr_list & ATTR_CONTENTS)
355 				compute_chksum = 1;
356 			else
357 				compute_chksum = 0;
358 		}
359 		break;
360 	case FTW_SL:	/* symbolic link, FTW_PHYS	*/
361 	case FTW_SLN:	/* symbolic link, ~FTW_PHYS	*/
362 		break;
363 	case FTW_DP:	/* end of directory, FTW_DEPTH	*/
364 	case FTW_D:	/* enter directory, ~FTW_DEPTH	*/
365 		dir_flag = B_TRUE;
366 		ret = statvfs64(name, &path_vfs);
367 		if (ret < 0)
368 			eval_err = WARNING_EXIT;
369 		break;
370 	case FTW_NS:	/* unstatable file	*/
371 		(void) fprintf(stderr, UNKNOWN_FILE, name);
372 		eval_err = WARNING_EXIT;
373 		return (0);
374 	case FTW_DNR:	/* unreadable directory	*/
375 		(void) fprintf(stderr, CANTLIST_DIR, name);
376 		eval_err = WARNING_EXIT;
377 		return (0);
378 	default:
379 		(void) fprintf(stderr, INTERNAL_ERR, name);
380 		eval_err = WARNING_EXIT;
381 		return (0);
382 	}
383 
384 	/* This is the function which really processes the file */
385 	ret = eval_file(name, sp);
386 
387 	/*
388 	 * Since the parameters to walker() are constrained by nftw(),
389 	 * need to use a global to reflect a WARNING.  Sigh.
390 	 */
391 	if (ret == WARNING_EXIT)
392 		eval_err = WARNING_EXIT;
393 
394 	/*
395 	 * This is a case of a directory which crosses into a mounted
396 	 * filesystem of a different type, e.g., UFS -> NFS.
397 	 * BART should not walk the new filesystem (by specification), so
398 	 * set this consolidation-private flag so the rest of the subtree
399 	 * under this directory is not waled.
400 	 */
401 	if (dir_flag &&
402 	    (strcmp(parent_vfs.f_basetype, path_vfs.f_basetype) != 0))
403 		ftwx->quit = FTW_PRUNE;
404 
405 	return (0);
406 }
407 
408 /*
409  * This file does the per-file evaluation and is run to generate every entry
410  * in the manifest.
411  *
412  * All output is written to a pipe which is read by the child process,
413  * which is running output_manifest().
414  */
415 static int
416 eval_file(const char *fname, const struct stat64 *statb)
417 {
418 	int	fd, ret, err_code, i;
419 	char	last_field[PATH_MAX], ftype, *acl_str;
420 	char	*quoted_name;
421 
422 	err_code = EXIT;
423 
424 	switch (statb->st_mode & S_IFMT) {
425 	/* Regular file */
426 	case S_IFREG: ftype = 'F'; break;
427 
428 	/* Directory */
429 	case S_IFDIR: ftype = 'D'; break;
430 
431 	/* Block Device */
432 	case S_IFBLK: ftype = 'B'; break;
433 
434 	/* Character Device */
435 	case S_IFCHR: ftype = 'C'; break;
436 
437 	/* Named Pipe */
438 	case S_IFIFO: ftype = 'P'; break;
439 
440 	/* Socket */
441 	case S_IFSOCK: ftype = 'S'; break;
442 
443 	/* Door */
444 	case S_IFDOOR: ftype = 'O'; break;
445 
446 	/* Symbolic link */
447 	case S_IFLNK: ftype = 'L'; break;
448 
449 	default: ftype = '-'; break;
450 	}
451 
452 	/* First, make sure this file should be cataloged */
453 
454 	if ((subtree_root != NULL) &&
455 	    (exclude_fname(fname, ftype, subtree_root)))
456 		return (err_code);
457 
458 	for (i = 0; i < PATH_MAX; i++)
459 		last_field[i] = '\0';
460 
461 	/*
462 	 * Regular files, compute the MD5 checksum and put it into 'last_field'
463 	 * UNLESS instructed to ignore the checksums.
464 	 */
465 	if (ftype == 'F') {
466 		if (compute_chksum) {
467 			fd = open(fname, O_RDONLY|O_LARGEFILE);
468 			if (fd < 0) {
469 				err_code = WARNING_EXIT;
470 				perror(fname);
471 
472 				/* default value since the computution failed */
473 				(void) strcpy(last_field, "-");
474 			} else {
475 				if (generate_hash(fd, last_field) != 0) {
476 					err_code = WARNING_EXIT;
477 					(void) fprintf(stderr, CONTENTS_WARN,
478 					    fname);
479 					(void) strcpy(last_field, "-");
480 				}
481 			}
482 			(void) close(fd);
483 		}
484 		/* Instructed to ignore checksums, just put in a '-' */
485 		else
486 			(void) strcpy(last_field, "-");
487 	}
488 
489 	/*
490 	 * For symbolic links, put the destination of the symbolic link into
491 	 * 'last_field'
492 	 */
493 	if (ftype == 'L') {
494 		ret = readlink(fname, last_field, sizeof (last_field));
495 		if (ret < 0) {
496 			err_code = WARNING_EXIT;
497 			perror(fname);
498 
499 			/* default value since the computation failed */
500 			(void) strcpy(last_field, "-");
501 		}
502 		else
503 			(void) strlcpy(last_field,
504 			    sanitized_fname(last_field, B_FALSE),
505 			    sizeof (last_field));
506 
507 		/*
508 		 * Boundary condition: possible for a symlink to point to
509 		 * nothing [ ln -s '' link_name ].  For this case, set the
510 		 * destination to "\000".
511 		 */
512 		if (strlen(last_field) == 0)
513 			(void) strcpy(last_field, "\\000");
514 	}
515 
516 	acl_str = get_acl_string(fname, statb, &err_code);
517 
518 	/* Sanitize 'fname', so its in the proper format for the manifest */
519 	quoted_name = sanitized_fname(fname, B_TRUE);
520 
521 	/* Start to build the entry.... */
522 	(void) printf("%s %c %d %o %s %x %d %d", quoted_name, ftype,
523 	    (int)statb->st_size, (int)statb->st_mode, acl_str,
524 	    (int)statb->st_mtime, (int)statb->st_uid, (int)statb->st_gid);
525 
526 	/* Finish it off based upon whether or not it's a device node */
527 	if ((ftype == 'B') || (ftype == 'C'))
528 		(void) printf(" %x\n", (int)statb->st_rdev);
529 	else if (strlen(last_field) > 0)
530 		(void) printf(" %s\n", last_field);
531 	else
532 		(void) printf("\n");
533 
534 	/* free the memory consumed */
535 	free(acl_str);
536 	free(quoted_name);
537 
538 	return (err_code);
539 }
540 
541 /*
542  * When creating a manifest, make sure all '?', tabs, space, newline, '/'
543  * and '[' are all properly quoted.  Convert them to a "\ooo" where the 'ooo'
544  * represents their octal value. For filesystem objects, as opposed to symlink
545  * targets, also canonicalize the pathname.
546  */
547 static char *
548 sanitized_fname(const char *fname, boolean_t canon_path)
549 {
550 	const char *ip;
551 	unsigned char ch;
552 	char *op, *quoted_name;
553 
554 	/* Initialize everything */
555 	quoted_name = safe_calloc((4 * PATH_MAX) + 1);
556 	ip = fname;
557 	op = quoted_name;
558 
559 	if (canon_path) {
560 		/*
561 		 * In the case when a relocatable root was used, the relocatable
562 		 * root should *not* be part of the manifest.
563 		 */
564 		ip += strlen(reloc_root);
565 
566 		/*
567 		 * In the case when the '-I' option was used, make sure
568 		 * the quoted_name starts with a '/'.
569 		 */
570 		if (*ip != '/')
571 			*op++ = '/';
572 	}
573 
574 	/* Now walk through 'fname' and build the quoted string */
575 	while ((ch = *ip++) != 0) {
576 		switch (ch) {
577 		/* Quote the following characters */
578 		case ' ':
579 		case '*':
580 		case '\n':
581 		case '?':
582 		case '[':
583 		case '\\':
584 		case '\t':
585 			op += sprintf(op, "\\%.3o", (unsigned char)ch);
586 			break;
587 
588 		/* Otherwise, simply append them */
589 		default:
590 			*op++ = ch;
591 			break;
592 		}
593 	}
594 
595 	*op = 0;
596 
597 	return (quoted_name);
598 }
599 
600 /*
601  * Function responsible for generating the ACL information for a given
602  * file.  Note, the string is put into buffer malloc'd by this function.
603  * It's the responsibility of the caller to free the buffer.  This function
604  * should never return a NULL pointer.
605  */
606 static char *
607 get_acl_string(const char *fname, const struct stat64 *statb, int *err_code)
608 {
609 	acl_t		*aclp;
610 	char		*acltext;
611 	int		error;
612 
613 	if (S_ISLNK(statb->st_mode)) {
614 		return (safe_strdup("-"));
615 	}
616 
617 	/*
618 	 *  Include trivial acl's
619 	 */
620 	error = acl_get(fname, 0, &aclp);
621 
622 	if (error != 0) {
623 		*err_code = WARNING_EXIT;
624 		(void) fprintf(stderr, "%s: %s\n", fname, acl_strerror(error));
625 		return (safe_strdup("-"));
626 	} else {
627 		acltext = acl_totext(aclp, 0);
628 		acl_free(aclp);
629 		if (acltext == NULL)
630 			return (safe_strdup("-"));
631 		else
632 			return (acltext);
633 	}
634 }
635 
636 
637 /*
638  *
639  * description:	This routine reads stdin in BUF_SIZE chunks, uses the bits
640  *		to update the md5 hash buffer, and outputs the chunks
641  *		to stdout.  When stdin is exhausted, the hash is computed,
642  *		converted to a hexadecimal string, and returned.
643  *
644  * returns:	The md5 hash of stdin, or NULL if unsuccessful for any reason.
645  */
646 static int
647 generate_hash(int fdin, char *hash_str)
648 {
649 	unsigned char buf[BUF_SIZE];
650 	unsigned char hash[MD5_DIGEST_LENGTH];
651 	int i, amtread;
652 	MD5_CTX ctx;
653 
654 	MD5Init(&ctx);
655 
656 	for (;;) {
657 		amtread = read(fdin, buf, sizeof (buf));
658 		if (amtread == 0)
659 			break;
660 		if (amtread <  0)
661 			return (1);
662 
663 		/* got some data.  Now update hash */
664 		MD5Update(&ctx, buf, amtread);
665 	}
666 
667 	/* done passing through data, calculate hash */
668 	MD5Final(hash, &ctx);
669 
670 	for (i = 0; i < MD5_DIGEST_LENGTH; i++)
671 		(void) sprintf(hash_str + (i*2), "%2.2x", hash[i]);
672 
673 	return (0);
674 }
675 
676 /*
677  * Used by 'bart create' with the '-I' option.  Return each entry into a 'buf'
678  * with the appropriate exit code: '0' for success and '-1' for failure.
679  */
680 static int
681 read_filelist(char *reloc_root, char **argv, char *buf, size_t bufsize)
682 {
683 	static int		argv_index = -1;
684 	static boolean_t	read_stdinput = B_FALSE;
685 	char			temp_buf[PATH_MAX];
686 	char 			*cp;
687 
688 	/*
689 	 * INITIALIZATION:
690 	 * Setup this code so it knows whether or not to read sdtin.
691 	 * Also, if reading from argv, setup the index, "argv_index"
692 	 */
693 	if (argv_index == -1) {
694 		argv_index = 0;
695 
696 		/* In this case, no args after '-I', so read stdin */
697 		if (argv[0] == NULL)
698 			read_stdinput = B_TRUE;
699 	}
700 
701 	buf[0] = '\0';
702 
703 	if (read_stdinput) {
704 		if (fgets(temp_buf, PATH_MAX, stdin) == NULL)
705 			return (-1);
706 		cp = strtok(temp_buf, "\n");
707 	} else {
708 		cp = argv[argv_index++];
709 	}
710 
711 	if (cp == NULL)
712 		return (-1);
713 
714 	/*
715 	 * Unlike similar code elsewhere, avoid adding a leading
716 	 * slash for relative pathnames.
717 	 */
718 	(void) snprintf(buf, bufsize,
719 	    (reloc_root[0] == '\0' || cp[0] == '/') ? "%s%s" : "%s/%s",
720 	    reloc_root, cp);
721 
722 	return (0);
723 }
724