xref: /illumos-gate/usr/src/cmd/varpd/varpd.c (revision 1e56f352c1c208679012bca47d552e127f5b1072)
1 /*
2  * This file and its contents are supplied under the terms of the
3  * Common Development and Distribution License ("CDDL"), version 1.0.
4  * You may only use this file in accordance with the terms of version
5  * 1.0 of the CDDL.
6  *
7  * A full copy of the text of the CDDL should have accompanied this
8  * source.  A copy of the CDDL is also available via the Internet at
9  * http://www.illumos.org/license/CDDL.
10  */
11 
12 /*
13  * Copyright (c) 2021 Joyent, Inc.
14  */
15 
16 /*
17  * virtual arp daemon -- varpd
18  *
19  * The virtual arp daemon is the user land counterpart to the overlay driver. To
20  * truly understand its purpose and how it fits into things, you should read the
21  * overlay big theory statement in uts/common/io/overlay/overlay.c.
22  *
23  * varpd's purpose it to provide a means for looking up the destination on the
24  * underlay network for a host on an overlay network and to also be a door
25  * server such that dladm(8) via libdladm can configure and get useful status
26  * information. The heavy lifting is all done by libvarpd and the various lookup
27  * plugins.
28  *
29  * When varpd first starts up, we take care of chdiring into /var/run/varpd,
30  * which is also where we create /var/run/varpd/varpd.door, our door server.
31  * After that we daemonize and only after we daemonize do we go ahead and load
32  * plugins. The reason that we don't load plugins before daemonizing is that
33  * they could very well be creating threads and thus lose them all. In general,
34  * we want to make things easier on our children and not require them to be
35  * fork safe.
36  *
37  * Once it's spun up, the main varpd thread sits in sigsuspend and really just
38  * hangs out waiting for something, libvarpd handles everything else.
39  */
40 
41 #include <libvarpd.h>
42 #include <stdio.h>
43 #include <unistd.h>
44 #include <string.h>
45 #include <signal.h>
46 #include <sys/types.h>
47 #include <sys/stat.h>
48 #include <fcntl.h>
49 #include <errno.h>
50 #include <libgen.h>
51 #include <stdarg.h>
52 #include <stdlib.h>
53 #include <paths.h>
54 #include <limits.h>
55 #include <sys/corectl.h>
56 #include <signal.h>
57 #include <strings.h>
58 #include <sys/wait.h>
59 #include <unistd.h>
60 #include <thread.h>
61 #include <priv.h>
62 #include <libscf.h>
63 
64 #define	VARPD_EXIT_REQUESTED	SMF_EXIT_OK
65 #define	VARPD_EXIT_FATAL	SMF_EXIT_ERR_FATAL
66 #define	VARPD_EXIT_USAGE	SMF_EXIT_ERR_CONFIG
67 
68 #define	VARPD_RUNDIR	"/var/run/varpd"
69 #define	VARPD_DEFAULT_DOOR	"/var/run/varpd/varpd.door"
70 
71 #define	VARPD_PG	"varpd"
72 #define	VARPD_PROP_INC	"include_path"
73 
74 static varpd_handle_t *varpd_handle;
75 static const char *varpd_pname;
76 static volatile boolean_t varpd_exit = B_FALSE;
77 
78 /*
79  * Debug builds are automatically wired up for umem debugging.
80  */
81 #ifdef	DEBUG
82 const char *
83 _umem_debug_init()
84 {
85 	return ("default,verbose");
86 }
87 
88 const char *
89 _umem_logging_init(void)
90 {
91 	return ("fail,contents");
92 }
93 #endif	/* DEBUG */
94 
95 static void
96 varpd_vwarn(FILE *out, const char *fmt, va_list ap)
97 {
98 	int error = errno;
99 
100 	(void) fprintf(out, "%s: ", varpd_pname);
101 	(void) vfprintf(out, fmt, ap);
102 
103 	if (fmt[strlen(fmt) - 1] != '\n')
104 		(void) fprintf(out, ": %s\n", strerror(error));
105 }
106 
107 static void
108 varpd_fatal(const char *fmt, ...)
109 {
110 	va_list ap;
111 
112 	va_start(ap, fmt);
113 	varpd_vwarn(stderr, fmt, ap);
114 	va_end(ap);
115 
116 	exit(VARPD_EXIT_FATAL);
117 }
118 
119 static void
120 varpd_dfatal(int dfd, const char *fmt, ...)
121 {
122 	int status = VARPD_EXIT_FATAL;
123 	va_list ap;
124 
125 	va_start(ap, fmt);
126 	varpd_vwarn(stdout, fmt, ap);
127 	va_end(ap);
128 
129 	/* Take a single shot at this */
130 	(void) write(dfd, &status, sizeof (status));
131 	exit(status);
132 }
133 
134 /* ARGSUSED */
135 static int
136 varpd_plugin_walk_cb(varpd_handle_t *vph, const char *name, void *unused)
137 {
138 	(void) printf("loaded %s!\n", name);
139 	return (0);
140 }
141 
142 static int
143 varpd_dir_setup(void)
144 {
145 	int fd;
146 
147 	if (mkdir(VARPD_RUNDIR, 0700) != 0) {
148 		if (errno != EEXIST)
149 			varpd_fatal("failed to create %s: %s", VARPD_RUNDIR,
150 			    strerror(errno));
151 	}
152 
153 	fd = open(VARPD_RUNDIR, O_RDONLY);
154 	if (fd < 0)
155 		varpd_fatal("failed to open %s: %s", VARPD_RUNDIR,
156 		    strerror(errno));
157 
158 	if (fchown(fd, UID_NETADM, GID_NETADM) != 0)
159 		varpd_fatal("failed to chown %s: %s\n", VARPD_RUNDIR,
160 		    strerror(errno));
161 
162 	return (fd);
163 }
164 
165 /*
166  * Because varpd is generally run under SMF, we opt to keep its stdout and
167  * stderr to be whatever our parent set them up to be.
168  */
169 static void
170 varpd_fd_setup(void)
171 {
172 	int dupfd;
173 
174 	closefrom(STDERR_FILENO + 1);
175 	dupfd = open(_PATH_DEVNULL, O_RDONLY);
176 	if (dupfd < 0)
177 		varpd_fatal("failed to open %s: %s", _PATH_DEVNULL,
178 		    strerror(errno));
179 	if (dup2(dupfd, STDIN_FILENO) == -1)
180 		varpd_fatal("failed to dup out stdin: %s", strerror(errno));
181 }
182 
183 /*
184  * We borrow fmd's daemonization style. Basically, the parent waits for the
185  * child to successfully set up a door and recover all of the old configurations
186  * before we say that we're good to go.
187  */
188 static int
189 varpd_daemonize(int dirfd)
190 {
191 	char path[PATH_MAX];
192 	struct rlimit rlim;
193 	sigset_t set, oset;
194 	int estatus, pfds[2];
195 	pid_t child;
196 	priv_set_t *pset;
197 
198 	/*
199 	 * Set a per-process core path to be inside of /var/run/varpd. Make sure
200 	 * that we aren't limited in our dump size.
201 	 */
202 	(void) snprintf(path, sizeof (path),
203 	    "/var/run/varpd/core.%s.%%p", varpd_pname);
204 	(void) core_set_process_path(path, strlen(path) + 1, getpid());
205 
206 	rlim.rlim_cur = RLIM_INFINITY;
207 	rlim.rlim_max = RLIM_INFINITY;
208 	(void) setrlimit(RLIMIT_CORE, &rlim);
209 
210 	/*
211 	 * Claim as many file descriptors as the system will let us.
212 	 */
213 	if (getrlimit(RLIMIT_NOFILE, &rlim) == 0) {
214 		rlim.rlim_cur = rlim.rlim_max;
215 		(void) setrlimit(RLIMIT_NOFILE, &rlim);
216 	}
217 
218 	/*
219 	 * chdir /var/run/varpd
220 	 */
221 	if (fchdir(dirfd) != 0)
222 		varpd_fatal("failed to chdir to %s", VARPD_RUNDIR);
223 
224 
225 	/*
226 	 * At this point block all signals going in so we don't have the parent
227 	 * mistakingly exit when the child is running, but never block SIGABRT.
228 	 */
229 	if (sigfillset(&set) != 0)
230 		abort();
231 	if (sigdelset(&set, SIGABRT) != 0)
232 		abort();
233 	if (sigprocmask(SIG_BLOCK, &set, &oset) != 0)
234 		abort();
235 
236 	/*
237 	 * Do the fork+setsid dance.
238 	 */
239 	if (pipe(pfds) != 0)
240 		varpd_fatal("failed to create pipe for daemonizing");
241 
242 	if ((child = fork()) == -1)
243 		varpd_fatal("failed to fork for daemonizing");
244 
245 	if (child != 0) {
246 		/* We'll be exiting shortly, so allow for silent failure */
247 		(void) close(pfds[1]);
248 		if (read(pfds[0], &estatus, sizeof (estatus)) ==
249 		    sizeof (estatus))
250 			_exit(estatus);
251 
252 		if (waitpid(child, &estatus, 0) == child && WIFEXITED(estatus))
253 			_exit(WEXITSTATUS(estatus));
254 
255 		_exit(VARPD_EXIT_FATAL);
256 	}
257 
258 	/*
259 	 * Drop privileges here.
260 	 *
261 	 * We should make sure we keep around PRIV_NET_PRIVADDR and
262 	 * PRIV_SYS_DLCONFIG, but drop everything else; however, keep basic
263 	 * privs and have our child drop them.
264 	 *
265 	 * We should also run as netadm:netadm and drop all of our groups.
266 	 */
267 	if (setgroups(0, NULL) != 0)
268 		abort();
269 	if (setgid(GID_NETADM) == -1 || seteuid(UID_NETADM) == -1)
270 		abort();
271 	if ((pset = priv_allocset()) == NULL)
272 		abort();
273 	priv_basicset(pset);
274 	if (priv_delset(pset, PRIV_PROC_EXEC) == -1 ||
275 	    priv_delset(pset, PRIV_PROC_INFO) == -1 ||
276 	    priv_delset(pset, PRIV_PROC_FORK) == -1 ||
277 	    priv_delset(pset, PRIV_PROC_SESSION) == -1 ||
278 	    priv_delset(pset, PRIV_FILE_LINK_ANY) == -1 ||
279 	    priv_addset(pset, PRIV_SYS_DL_CONFIG) == -1 ||
280 	    priv_addset(pset, PRIV_NET_PRIVADDR) == -1) {
281 		abort();
282 	}
283 	/*
284 	 * Remove privs from the permitted set. That will cause them to be
285 	 * removed from the effective set. We want to make sure that in the case
286 	 * of a vulnerability, something can't get back in here and wreak more
287 	 * havoc. But if we want non-basic privs in the effective set, we have
288 	 * to request them explicitly.
289 	 */
290 	if (setppriv(PRIV_SET, PRIV_PERMITTED, pset) == -1)
291 		abort();
292 	if (setppriv(PRIV_SET, PRIV_EFFECTIVE, pset) == -1)
293 		abort();
294 
295 	priv_freeset(pset);
296 
297 	if (close(pfds[0]) != 0)
298 		abort();
299 	if (setsid() == -1)
300 		abort();
301 	if (sigprocmask(SIG_SETMASK, &oset, NULL) != 0)
302 		abort();
303 	(void) umask(0022);
304 
305 	return (pfds[1]);
306 }
307 
308 static int
309 varpd_setup_lookup_threads(void)
310 {
311 	int ret;
312 	long i, ncpus = sysconf(_SC_NPROCESSORS_ONLN) * 2 + 1;
313 
314 	if (ncpus <= 0)
315 		abort();
316 	for (i = 0; i < ncpus; i++) {
317 		thread_t thr;
318 
319 		ret = thr_create(NULL, 0, libvarpd_overlay_lookup_run,
320 		    varpd_handle, THR_DETACHED | THR_DAEMON, &thr);
321 		if (ret != 0)
322 			return (ret);
323 	}
324 
325 	return (0);
326 }
327 
328 static void
329 varpd_cleanup(void)
330 {
331 	varpd_exit = B_TRUE;
332 }
333 
334 /*
335  * Load default information from SMF and apply any of if necessary. We recognize
336  * the following properties:
337  *
338  *	varpd/include_path		Treat these as a series of -i options.
339  *
340  * If we're not under SMF, just move on.
341  */
342 static void
343 varpd_load_smf(int dfd)
344 {
345 	char *fmri, *inc;
346 	scf_simple_prop_t *prop;
347 
348 	if ((fmri = getenv("SMF_FMRI")) == NULL)
349 		return;
350 
351 	if ((prop = scf_simple_prop_get(NULL, fmri, VARPD_PG,
352 	    VARPD_PROP_INC)) == NULL)
353 		return;
354 
355 	while ((inc = scf_simple_prop_next_astring(prop)) != NULL) {
356 		int err = libvarpd_plugin_load(varpd_handle, inc);
357 		if (err != 0) {
358 			varpd_dfatal(dfd, "failed to load from %s: %s\n",
359 			    inc, strerror(err));
360 		}
361 	}
362 
363 	scf_simple_prop_free(prop);
364 }
365 
366 /*
367  * There are a bunch of things we need to do to be a proper daemon here.
368  *
369  *   o Ensure that /var/run/varpd exists or create it
370  *   o make stdin /dev/null (stdout?)
371  *   o Ensure any other fds that we somehow inherited are closed, eg.
372  *     closefrom()
373  *   o Properly daemonize
374  *   o Mask all signals except sigabrt before creating our first door -- all
375  *     other doors will inherit from that.
376  *   o Have the main thread sigsuspend looking for most things that are
377  *     actionable...
378  */
379 int
380 main(int argc, char *argv[])
381 {
382 	int err, c, dirfd, dfd, i;
383 	const char *doorpath = VARPD_DEFAULT_DOOR;
384 	sigset_t set;
385 	struct sigaction act;
386 	int nincpath = 0, nextincpath = 0;
387 	char **incpath = NULL;
388 
389 	varpd_pname = basename(argv[0]);
390 
391 	/*
392 	 * We want to clean up our file descriptors before we do anything else
393 	 * as we can't assume that libvarpd won't open file descriptors, etc.
394 	 */
395 	varpd_fd_setup();
396 
397 	if ((err = libvarpd_create(&varpd_handle)) != 0) {
398 		varpd_fatal("failed to open a libvarpd handle");
399 		return (1);
400 	}
401 
402 	while ((c = getopt(argc, argv, ":i:d:")) != -1) {
403 		switch (c) {
404 		case 'i':
405 			if (nextincpath == nincpath) {
406 				if (nincpath == 0)
407 					nincpath = 16;
408 				else
409 					nincpath *= 2;
410 				incpath = realloc(incpath, sizeof (char *) *
411 				    nincpath);
412 				if (incpath == NULL) {
413 					(void) fprintf(stderr, "failed to "
414 					    "allocate memory for the %dth "
415 					    "-I option: %s\n", nextincpath + 1,
416 					    strerror(errno));
417 				}
418 
419 			}
420 			incpath[nextincpath] = optarg;
421 			nextincpath++;
422 			break;
423 		case 'd':
424 			doorpath = optarg;
425 			break;
426 		default:
427 			(void) fprintf(stderr, "unknown option: %c\n", c);
428 			return (1);
429 		}
430 	}
431 
432 	dirfd = varpd_dir_setup();
433 
434 	(void) libvarpd_plugin_walk(varpd_handle, varpd_plugin_walk_cb, NULL);
435 
436 	dfd = varpd_daemonize(dirfd);
437 
438 	/*
439 	 * Now that we're in the child, go ahead and load all of our plug-ins.
440 	 * We do this, in part, because these plug-ins may need threads of their
441 	 * own and fork won't preserve those and we'd rather the plug-ins don't
442 	 * have to learn about fork-handlers.
443 	 */
444 	for (i = 0; i < nextincpath; i++) {
445 		err = libvarpd_plugin_load(varpd_handle, incpath[i]);
446 		if (err != 0) {
447 			varpd_dfatal(dfd, "failed to load from %s: %s\n",
448 			    incpath[i], strerror(err));
449 		}
450 	}
451 
452 	varpd_load_smf(dfd);
453 
454 	if ((err = libvarpd_persist_enable(varpd_handle, VARPD_RUNDIR)) != 0)
455 		varpd_dfatal(dfd, "failed to enable varpd persistence: %s\n",
456 		    strerror(err));
457 
458 	if ((err = libvarpd_persist_restore(varpd_handle)) != 0)
459 		varpd_dfatal(dfd, "failed to enable varpd persistence: %s\n",
460 		    strerror(err));
461 
462 	/*
463 	 * The ur-door thread will inherit from this signal mask. So set it to
464 	 * what we want before doing anything else. In addition, so will our
465 	 * threads that handle varpd lookups.
466 	 */
467 	if (sigfillset(&set) != 0)
468 		varpd_dfatal(dfd, "failed to fill a signal set...");
469 
470 	if (sigdelset(&set, SIGABRT) != 0)
471 		varpd_dfatal(dfd, "failed to unmask SIGABRT");
472 
473 	if (sigprocmask(SIG_BLOCK, &set, NULL) != 0)
474 		varpd_dfatal(dfd, "failed to set our door signal mask");
475 
476 	if ((err = varpd_setup_lookup_threads()) != 0)
477 		varpd_dfatal(dfd, "failed to create lookup threads: %s\n",
478 		    strerror(err));
479 
480 	if ((err = libvarpd_door_server_create(varpd_handle, doorpath)) != 0)
481 		varpd_dfatal(dfd, "failed to create door server at %s: %s\n",
482 		    doorpath, strerror(err));
483 
484 	/*
485 	 * At this point, finish up signal initialization and finally go ahead,
486 	 * notify the parent that we're okay, and enter the sigsuspend loop.
487 	 */
488 	bzero(&act, sizeof (struct sigaction));
489 	act.sa_handler = varpd_cleanup;
490 	if (sigfillset(&act.sa_mask) != 0)
491 		varpd_dfatal(dfd, "failed to fill sigaction mask");
492 	act.sa_flags = 0;
493 	if (sigaction(SIGHUP, &act, NULL) != 0)
494 		varpd_dfatal(dfd, "failed to register HUP handler");
495 	if (sigdelset(&set, SIGHUP) != 0)
496 		varpd_dfatal(dfd, "failed to remove HUP from mask");
497 	if (sigaction(SIGQUIT, &act, NULL) != 0)
498 		varpd_dfatal(dfd, "failed to register QUIT handler");
499 	if (sigdelset(&set, SIGQUIT) != 0)
500 		varpd_dfatal(dfd, "failed to remove QUIT from mask");
501 	if (sigaction(SIGINT, &act, NULL) != 0)
502 		varpd_dfatal(dfd, "failed to register INT handler");
503 	if (sigdelset(&set, SIGINT) != 0)
504 		varpd_dfatal(dfd, "failed to remove INT from mask");
505 	if (sigaction(SIGTERM, &act, NULL) != 0)
506 		varpd_dfatal(dfd, "failed to register TERM handler");
507 	if (sigdelset(&set, SIGTERM) != 0)
508 		varpd_dfatal(dfd, "failed to remove TERM from mask");
509 
510 	err = 0;
511 	(void) write(dfd, &err, sizeof (err));
512 	(void) close(dfd);
513 
514 	for (;;) {
515 		if (sigsuspend(&set) == -1)
516 			if (errno == EFAULT)
517 				abort();
518 		if (varpd_exit == B_TRUE)
519 			break;
520 	}
521 
522 	libvarpd_door_server_destroy(varpd_handle);
523 	libvarpd_destroy(varpd_handle);
524 
525 	return (VARPD_EXIT_REQUESTED);
526 }
527