xref: /freebsd/sys/contrib/openzfs/etc/systemd/system-generators/zfs-mount-generator.c (revision a3266ba2697a383d2ede56803320d941866c7e76)
1 /*
2  * Copyright (c) 2017 Antonio Russo <antonio.e.russo@gmail.com>
3  * Copyright (c) 2020 InsanePrawn <insane.prawny@gmail.com>
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining
6  * a copy of this software and associated documentation files (the
7  * "Software"), to deal in the Software without restriction, including
8  * without limitation the rights to use, copy, modify, merge, publish,
9  * distribute, sublicense, and/or sell copies of the Software, and to
10  * permit persons to whom the Software is furnished to do so, subject to
11  * the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be
14  * included in all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20  * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21  * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22  * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  */
24 
25 
26 #include <sys/resource.h>
27 #include <sys/types.h>
28 #include <sys/time.h>
29 #include <sys/stat.h>
30 #include <sys/wait.h>
31 #include <sys/mman.h>
32 #include <semaphore.h>
33 #include <stdbool.h>
34 #include <unistd.h>
35 #include <fcntl.h>
36 #include <stdio.h>
37 #include <time.h>
38 #include <regex.h>
39 #include <search.h>
40 #include <dirent.h>
41 #include <string.h>
42 #include <stdlib.h>
43 #include <limits.h>
44 #include <errno.h>
45 #include <libzfs.h>
46 
47 #define	STRCMP ((int(*)(const void *, const void *))&strcmp)
48 #define	PID_T_CMP ((int(*)(const void *, const void *))&pid_t_cmp)
49 
50 static int
51 pid_t_cmp(const pid_t *lhs, const pid_t *rhs)
52 {
53 	/*
54 	 * This is always valid, quoth sys_types.h(7posix):
55 	 * > blksize_t, pid_t, and ssize_t shall be signed integer types.
56 	 */
57 	return (*lhs - *rhs);
58 }
59 
60 #define	EXIT_ENOMEM() \
61 	do { \
62 		fprintf(stderr, PROGNAME "[%d]: " \
63 		    "not enough memory (L%d)!\n", getpid(), __LINE__); \
64 		_exit(1); \
65 	} while (0)
66 
67 
68 #define	PROGNAME "zfs-mount-generator"
69 #define	FSLIST SYSCONFDIR "/zfs/zfs-list.cache"
70 #define	ZFS SBINDIR "/zfs"
71 
72 #define	OUTPUT_HEADER \
73 	"# Automatically generated by " PROGNAME "\n" \
74 	"\n"
75 
76 /*
77  * Starts like the one in libzfs_util.c but also matches "//"
78  * and captures until the end, since we actually use it for path extraxion
79  */
80 #define	URI_REGEX_S "^\\([A-Za-z][A-Za-z0-9+.\\-]*\\):\\/\\/\\(.*\\)$"
81 static regex_t uri_regex;
82 
83 static char *argv0;
84 
85 static const char *destdir = "/tmp";
86 static int destdir_fd = -1;
87 
88 static void *known_pools = NULL; /* tsearch() of C strings */
89 static struct {
90 	sem_t noauto_not_on_sem;
91 
92 	sem_t noauto_names_sem;
93 	size_t noauto_names_len;
94 	size_t noauto_names_max;
95 	char noauto_names[][NAME_MAX];
96 } *noauto_files;
97 
98 
99 static char *
100 systemd_escape(const char *input, const char *prepend, const char *append)
101 {
102 	size_t len = strlen(input);
103 	size_t applen = strlen(append);
104 	size_t prelen = strlen(prepend);
105 	char *ret = malloc(4 * len + prelen + applen + 1);
106 	if (!ret)
107 		EXIT_ENOMEM();
108 
109 	memcpy(ret, prepend, prelen);
110 	char *out = ret + prelen;
111 
112 	const char *cur = input;
113 	if (*cur == '.') {
114 		memcpy(out, "\\x2e", 4);
115 		out += 4;
116 		++cur;
117 	}
118 	for (; *cur; ++cur) {
119 		if (*cur == '/')
120 			*(out++) = '-';
121 		else if (strchr(
122 		    "0123456789"
123 		    "abcdefghijklmnopqrstuvwxyz"
124 		    "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
125 		    ":_.", *cur))
126 			*(out++) = *cur;
127 		else {
128 			sprintf(out, "\\x%02x", (int)*cur);
129 			out += 4;
130 		}
131 	}
132 
133 	memcpy(out, append, applen + 1);
134 	return (ret);
135 }
136 
137 static void
138 simplify_path(char *path)
139 {
140 	char *out = path;
141 	for (char *cur = path; *cur; ++cur) {
142 		if (*cur == '/') {
143 			while (*(cur + 1) == '/')
144 				++cur;
145 			*(out++) = '/';
146 		} else
147 			*(out++) = *cur;
148 	}
149 
150 	*(out++) = '\0';
151 }
152 
153 static bool
154 strendswith(const char *what, const char *suff)
155 {
156 	size_t what_l = strlen(what);
157 	size_t suff_l = strlen(suff);
158 
159 	return ((what_l >= suff_l) &&
160 	    (strcmp(what + what_l - suff_l, suff) == 0));
161 }
162 
163 /* Assumes already-simplified path, doesn't modify input */
164 static char *
165 systemd_escape_path(char *input, const char *prepend, const char *append)
166 {
167 	if (strcmp(input, "/") == 0) {
168 		char *ret;
169 		if (asprintf(&ret, "%s-%s", prepend, append) == -1)
170 			EXIT_ENOMEM();
171 		return (ret);
172 	} else {
173 		/*
174 		 * path_is_normalized() (flattened for absolute paths here),
175 		 * required for proper escaping
176 		 */
177 		if (strstr(input, "/./") || strstr(input, "/../") ||
178 		    strendswith(input, "/.") || strendswith(input, "/.."))
179 			return (NULL);
180 
181 
182 		if (input[0] == '/')
183 			++input;
184 
185 		char *back = &input[strlen(input) - 1];
186 		bool deslash = *back == '/';
187 		if (deslash)
188 			*back = '\0';
189 
190 		char *ret = systemd_escape(input, prepend, append);
191 
192 		if (deslash)
193 			*back = '/';
194 		return (ret);
195 	}
196 }
197 
198 static FILE *
199 fopenat(int dirfd, const char *pathname, int flags,
200     const char *stream_mode, mode_t mode)
201 {
202 	int fd = openat(dirfd, pathname, flags, mode);
203 	if (fd < 0)
204 		return (NULL);
205 
206 	return (fdopen(fd, stream_mode));
207 }
208 
209 static int
210 line_worker(char *line, const char *cachefile)
211 {
212 	char *toktmp;
213 	/* BEGIN CSTYLED */
214 	const char *dataset                     = strtok_r(line, "\t", &toktmp);
215 	      char *p_mountpoint                = strtok_r(NULL, "\t", &toktmp);
216 	const char *p_canmount                  = strtok_r(NULL, "\t", &toktmp);
217 	const char *p_atime                     = strtok_r(NULL, "\t", &toktmp);
218 	const char *p_relatime                  = strtok_r(NULL, "\t", &toktmp);
219 	const char *p_devices                   = strtok_r(NULL, "\t", &toktmp);
220 	const char *p_exec                      = strtok_r(NULL, "\t", &toktmp);
221 	const char *p_readonly                  = strtok_r(NULL, "\t", &toktmp);
222 	const char *p_setuid                    = strtok_r(NULL, "\t", &toktmp);
223 	const char *p_nbmand                    = strtok_r(NULL, "\t", &toktmp);
224 	const char *p_encroot                   = strtok_r(NULL, "\t", &toktmp) ?: "-";
225 	      char *p_keyloc                    = strtok_r(NULL, "\t", &toktmp) ?: strdupa("none");
226 	const char *p_systemd_requires          = strtok_r(NULL, "\t", &toktmp) ?: "-";
227 	const char *p_systemd_requiresmountsfor = strtok_r(NULL, "\t", &toktmp) ?: "-";
228 	const char *p_systemd_before            = strtok_r(NULL, "\t", &toktmp) ?: "-";
229 	const char *p_systemd_after             = strtok_r(NULL, "\t", &toktmp) ?: "-";
230 	      char *p_systemd_wantedby          = strtok_r(NULL, "\t", &toktmp) ?: strdupa("-");
231 	      char *p_systemd_requiredby        = strtok_r(NULL, "\t", &toktmp) ?: strdupa("-");
232 	const char *p_systemd_nofail            = strtok_r(NULL, "\t", &toktmp) ?: "-";
233 	const char *p_systemd_ignore            = strtok_r(NULL, "\t", &toktmp) ?: "-";
234 	/* END CSTYLED */
235 
236 	const char *pool = dataset;
237 	if ((toktmp = strchr(pool, '/')) != NULL)
238 		pool = strndupa(pool, toktmp - pool);
239 
240 	if (p_nbmand == NULL) {
241 		fprintf(stderr, PROGNAME "[%d]: %s: not enough tokens!\n",
242 		    getpid(), dataset);
243 		return (1);
244 	}
245 
246 	strncpy(argv0, dataset, strlen(argv0));
247 
248 	/* Minimal pre-requisites to mount a ZFS dataset */
249 	const char *after = "zfs-import.target";
250 	const char *wants = "zfs-import.target";
251 	const char *bindsto = NULL;
252 	char *wantedby = NULL;
253 	char *requiredby = NULL;
254 	bool noauto = false;
255 	bool wantedby_append = true;
256 
257 	/*
258 	 * zfs-import.target is not needed if the pool is already imported.
259 	 * This avoids a dependency loop on root-on-ZFS systems:
260 	 *   systemd-random-seed.service After (via RequiresMountsFor)
261 	 *   var-lib.mount After
262 	 *   zfs-import.target After
263 	 *   zfs-import-{cache,scan}.service After
264 	 *   cryptsetup.service After
265 	 *   systemd-random-seed.service
266 	 */
267 	if (tfind(pool, &known_pools, STRCMP)) {
268 		after = "";
269 		wants = "";
270 	}
271 
272 	if (strcmp(p_systemd_after, "-") == 0)
273 		p_systemd_after = NULL;
274 	if (strcmp(p_systemd_before, "-") == 0)
275 		p_systemd_before = NULL;
276 	if (strcmp(p_systemd_requires, "-") == 0)
277 		p_systemd_requires = NULL;
278 	if (strcmp(p_systemd_requiresmountsfor, "-") == 0)
279 		p_systemd_requiresmountsfor = NULL;
280 
281 
282 	if (strcmp(p_encroot, "-") != 0) {
283 		char *keyloadunit =
284 		    systemd_escape(p_encroot, "zfs-load-key@", ".service");
285 
286 		if (strcmp(dataset, p_encroot) == 0) {
287 			const char *keymountdep = NULL;
288 			bool is_prompt = false;
289 
290 			regmatch_t uri_matches[3];
291 			if (regexec(&uri_regex, p_keyloc,
292 			    sizeof (uri_matches) / sizeof (*uri_matches),
293 			    uri_matches, 0) == 0) {
294 				p_keyloc[uri_matches[2].rm_eo] = '\0';
295 				const char *path =
296 				    &p_keyloc[uri_matches[2].rm_so];
297 
298 				/*
299 				 * Assumes all URI keylocations need
300 				 * the mount for their path;
301 				 * http://, for example, wouldn't
302 				 * (but it'd need network-online.target et al.)
303 				 */
304 				keymountdep = path;
305 			} else {
306 				if (strcmp(p_keyloc, "prompt") != 0)
307 					fprintf(stderr, PROGNAME "[%d]: %s: "
308 					    "unknown non-URI keylocation=%s\n",
309 					    getpid(), dataset, p_keyloc);
310 
311 				is_prompt = true;
312 			}
313 
314 
315 			/* Generate the key-load .service unit */
316 			FILE *keyloadunit_f = fopenat(destdir_fd, keyloadunit,
317 			    O_WRONLY | O_CREAT | O_TRUNC | O_CLOEXEC, "w",
318 			    0644);
319 			if (!keyloadunit_f) {
320 				fprintf(stderr, PROGNAME "[%d]: %s: "
321 				    "couldn't open %s under %s: %s\n",
322 				    getpid(), dataset, keyloadunit, destdir,
323 				    strerror(errno));
324 				return (1);
325 			}
326 
327 			fprintf(keyloadunit_f,
328 			    OUTPUT_HEADER
329 			    "[Unit]\n"
330 			    "Description=Load ZFS key for %s\n"
331 			    "SourcePath=" FSLIST "/%s\n"
332 			    "Documentation=man:zfs-mount-generator(8)\n"
333 			    "DefaultDependencies=no\n"
334 			    "Wants=%s\n"
335 			    "After=%s\n",
336 			    dataset, cachefile, wants, after);
337 
338 			if (p_systemd_requires)
339 				fprintf(keyloadunit_f,
340 				    "Requires=%s\n", p_systemd_requires);
341 
342 			if (p_systemd_requiresmountsfor || keymountdep) {
343 				fprintf(keyloadunit_f, "RequiresMountsFor=");
344 				if (p_systemd_requiresmountsfor)
345 					fprintf(keyloadunit_f,
346 					    "%s ", p_systemd_requiresmountsfor);
347 				if (keymountdep)
348 					fprintf(keyloadunit_f,
349 					    "'%s'", keymountdep);
350 				fprintf(keyloadunit_f, "\n");
351 			}
352 
353 			/* BEGIN CSTYLED */
354 			fprintf(keyloadunit_f,
355 			    "\n"
356 			    "[Service]\n"
357 			    "Type=oneshot\n"
358 			    "RemainAfterExit=yes\n"
359 			    "# This avoids a dependency loop involving systemd-journald.socket if this\n"
360 			    "# dataset is a parent of the root filesystem.\n"
361 			    "StandardOutput=null\n"
362 			    "StandardError=null\n"
363 			    "ExecStart=/bin/sh -euc '"
364 			        "[ \"$$(" ZFS " get -H -o value keystatus \"%s\")\" = \"unavailable\" ] || exit 0;",
365 			    dataset);
366 			if (is_prompt)
367 				fprintf(keyloadunit_f,
368 				    "for i in 1 2 3; do "
369 				        "systemd-ask-password --id=\"zfs:%s\" \"Enter passphrase for %s:\" |"
370 				        "" ZFS " load-key \"%s\" && exit 0;"
371 				    "done;"
372 				    "exit 1",
373 				    dataset, dataset, dataset);
374 			else
375 				fprintf(keyloadunit_f,
376 				    "exec " ZFS " load-key \"%s\"",
377 				    dataset);
378 
379 			fprintf(keyloadunit_f,
380 				"'\n"
381 				"ExecStop=/bin/sh -euc '"
382 				    "[ \"$$(" ZFS " get -H -o value keystatus \"%s\")\" = \"available\" ] || exit 0;"
383 				    "exec " ZFS " unload-key \"%s\""
384 				"'\n",
385 				dataset, dataset);
386 			/* END CSTYLED */
387 
388 			(void) fclose(keyloadunit_f);
389 		}
390 
391 		/* Update dependencies for the mount file to want this */
392 		bindsto = keyloadunit;
393 		if (after[0] == '\0')
394 			after = keyloadunit;
395 		else if (asprintf(&toktmp, "%s %s", after, keyloadunit) != -1)
396 			after = toktmp;
397 		else
398 			EXIT_ENOMEM();
399 	}
400 
401 
402 	/* Skip generation of the mount unit if org.openzfs.systemd:ignore=on */
403 	if (strcmp(p_systemd_ignore, "-") == 0 ||
404 	    strcmp(p_systemd_ignore, "off") == 0) {
405 		/* ok */
406 	} else if (strcmp(p_systemd_ignore, "on") == 0)
407 		return (0);
408 	else {
409 		fprintf(stderr, PROGNAME "[%d]: %s: "
410 		    "invalid org.openzfs.systemd:ignore=%s\n",
411 		    getpid(), dataset, p_systemd_ignore);
412 		return (1);
413 	}
414 
415 	/* Check for canmount */
416 	if (strcmp(p_canmount, "on") == 0) {
417 		/* ok */
418 	} else if (strcmp(p_canmount, "noauto") == 0)
419 		noauto = true;
420 	else if (strcmp(p_canmount, "off") == 0)
421 		return (0);
422 	else {
423 		fprintf(stderr, PROGNAME "[%d]: %s: invalid canmount=%s\n",
424 		    getpid(), dataset, p_canmount);
425 		return (1);
426 	}
427 
428 	/* Check for legacy and blank mountpoints */
429 	if (strcmp(p_mountpoint, "legacy") == 0 ||
430 	    strcmp(p_mountpoint, "none") == 0)
431 		return (0);
432 	else if (p_mountpoint[0] != '/') {
433 		fprintf(stderr, PROGNAME "[%d]: %s: invalid mountpoint=%s\n",
434 		    getpid(), dataset, p_mountpoint);
435 		return (1);
436 	}
437 
438 	/* Escape the mountpoint per systemd policy */
439 	simplify_path(p_mountpoint);
440 	const char *mountfile = systemd_escape_path(p_mountpoint, "", ".mount");
441 	if (mountfile == NULL) {
442 		fprintf(stderr,
443 		    PROGNAME "[%d]: %s: abnormal simplified mountpoint: %s\n",
444 		    getpid(), dataset, p_mountpoint);
445 		return (1);
446 	}
447 
448 
449 	/*
450 	 * Parse options, cf. lib/libzfs/libzfs_mount.c:zfs_add_options
451 	 *
452 	 * The longest string achievable here is
453 	 * ",atime,strictatime,nodev,noexec,rw,nosuid,nomand".
454 	 */
455 	char opts[64] = "";
456 
457 	/* atime */
458 	if (strcmp(p_atime, "on") == 0) {
459 		/* relatime */
460 		if (strcmp(p_relatime, "on") == 0)
461 			strcat(opts, ",atime,relatime");
462 		else if (strcmp(p_relatime, "off") == 0)
463 			strcat(opts, ",atime,strictatime");
464 		else
465 			fprintf(stderr,
466 			    PROGNAME "[%d]: %s: invalid relatime=%s\n",
467 			    getpid(), dataset, p_relatime);
468 	} else if (strcmp(p_atime, "off") == 0) {
469 		strcat(opts, ",noatime");
470 	} else
471 		fprintf(stderr, PROGNAME "[%d]: %s: invalid atime=%s\n",
472 		    getpid(), dataset, p_atime);
473 
474 	/* devices */
475 	if (strcmp(p_devices, "on") == 0)
476 		strcat(opts, ",dev");
477 	else if (strcmp(p_devices, "off") == 0)
478 		strcat(opts, ",nodev");
479 	else
480 		fprintf(stderr, PROGNAME "[%d]: %s: invalid devices=%s\n",
481 		    getpid(), dataset, p_devices);
482 
483 	/* exec */
484 	if (strcmp(p_exec, "on") == 0)
485 		strcat(opts, ",exec");
486 	else if (strcmp(p_exec, "off") == 0)
487 		strcat(opts, ",noexec");
488 	else
489 		fprintf(stderr, PROGNAME "[%d]: %s: invalid exec=%s\n",
490 		    getpid(), dataset, p_exec);
491 
492 	/* readonly */
493 	if (strcmp(p_readonly, "on") == 0)
494 		strcat(opts, ",ro");
495 	else if (strcmp(p_readonly, "off") == 0)
496 		strcat(opts, ",rw");
497 	else
498 		fprintf(stderr, PROGNAME "[%d]: %s: invalid readonly=%s\n",
499 		    getpid(), dataset, p_readonly);
500 
501 	/* setuid */
502 	if (strcmp(p_setuid, "on") == 0)
503 		strcat(opts, ",suid");
504 	else if (strcmp(p_setuid, "off") == 0)
505 		strcat(opts, ",nosuid");
506 	else
507 		fprintf(stderr, PROGNAME "[%d]: %s: invalid setuid=%s\n",
508 		    getpid(), dataset, p_setuid);
509 
510 	/* nbmand */
511 	if (strcmp(p_nbmand, "on") == 0)
512 		strcat(opts, ",mand");
513 	else if (strcmp(p_nbmand, "off") == 0)
514 		strcat(opts, ",nomand");
515 	else
516 		fprintf(stderr, PROGNAME "[%d]: %s: invalid nbmand=%s\n",
517 		    getpid(), dataset, p_setuid);
518 
519 	if (strcmp(p_systemd_wantedby, "-") != 0) {
520 		noauto = true;
521 
522 		if (strcmp(p_systemd_wantedby, "none") != 0)
523 			wantedby = p_systemd_wantedby;
524 	}
525 
526 	if (strcmp(p_systemd_requiredby, "-") != 0) {
527 		noauto = true;
528 
529 		if (strcmp(p_systemd_requiredby, "none") != 0)
530 			requiredby = p_systemd_requiredby;
531 	}
532 
533 	/*
534 	 * For datasets with canmount=on, a dependency is created for
535 	 * local-fs.target by default. To avoid regressions, this dependency
536 	 * is reduced to "wants" rather than "requires" when nofail!=off.
537 	 * **THIS MAY CHANGE**
538 	 * noauto=on disables this behavior completely.
539 	 */
540 	if (!noauto) {
541 		if (strcmp(p_systemd_nofail, "off") == 0)
542 			requiredby = strdupa("local-fs.target");
543 		else {
544 			wantedby = strdupa("local-fs.target");
545 			wantedby_append = strcmp(p_systemd_nofail, "on") != 0;
546 		}
547 	}
548 
549 	/*
550 	 * Handle existing files:
551 	 * 1.	We never overwrite existing files, although we may delete
552 	 * 	files if we're sure they were created by us. (see 5.)
553 	 * 2.	We handle files differently based on canmount.
554 	 * 	Units with canmount=on always have precedence over noauto.
555 	 * 	This is enforced by the noauto_not_on_sem semaphore,
556 	 * 	which is only unlocked when the last canmount=on process exits.
557 	 * 	It is important to use p_canmount and not noauto here,
558 	 * 	since we categorise by canmount while other properties,
559 	 * 	e.g. org.openzfs.systemd:wanted-by, also modify noauto.
560 	 * 3.	If no unit file exists for a noauto dataset, we create one.
561 	 * 	Additionally, we use noauto_files to track the unit file names
562 	 * 	(which are the systemd-escaped mountpoints) of all (exclusively)
563 	 * 	noauto datasets that had a file created.
564 	 * 4.	If the file to be created is found in the tracking array,
565 	 * 	we do NOT create it.
566 	 * 5.	If a file exists for a noauto dataset,
567 	 * 	we check whether the file name is in the array.
568 	 * 	If it is, we have multiple noauto datasets for the same
569 	 * 	mountpoint. In such cases, we remove the file for safety.
570 	 * 	We leave the file name in the tracking array to avoid
571 	 * 	further noauto datasets creating a file for this path again.
572 	 */
573 
574 	{
575 		sem_t *our_sem = (strcmp(p_canmount, "on") == 0) ?
576 		    &noauto_files->noauto_names_sem :
577 		    &noauto_files->noauto_not_on_sem;
578 		while (sem_wait(our_sem) == -1 && errno == EINTR)
579 			;
580 	}
581 
582 	struct stat stbuf;
583 	bool already_exists = fstatat(destdir_fd, mountfile, &stbuf, 0) == 0;
584 
585 	bool is_known = false;
586 	for (size_t i = 0; i < noauto_files->noauto_names_len; ++i) {
587 		if (strncmp(
588 		    noauto_files->noauto_names[i], mountfile, NAME_MAX) == 0) {
589 			is_known = true;
590 			break;
591 		}
592 	}
593 
594 	if (already_exists) {
595 		if (is_known) {
596 			/* If it's in $noauto_files, we must be noauto too */
597 
598 			/* See 5 */
599 			errno = 0;
600 			(void) unlinkat(destdir_fd, mountfile, 0);
601 
602 			/* See 2 */
603 			fprintf(stderr, PROGNAME "[%d]: %s: "
604 			    "removing duplicate noauto unit %s%s%s\n",
605 			    getpid(), dataset, mountfile,
606 			    errno ? "" : " failed: ",
607 			    errno ? "" : strerror(errno));
608 		} else {
609 			/* Don't log for canmount=noauto */
610 			if (strcmp(p_canmount, "on") == 0)
611 				fprintf(stderr, PROGNAME "[%d]: %s: "
612 				    "%s already exists. Skipping.\n",
613 				    getpid(), dataset, mountfile);
614 		}
615 
616 		/* File exists: skip current dataset */
617 		if (strcmp(p_canmount, "on") == 0)
618 			sem_post(&noauto_files->noauto_names_sem);
619 		return (0);
620 	} else {
621 		if (is_known) {
622 			/* See 4 */
623 			if (strcmp(p_canmount, "on") == 0)
624 				sem_post(&noauto_files->noauto_names_sem);
625 			return (0);
626 		} else if (strcmp(p_canmount, "noauto") == 0) {
627 			if (noauto_files->noauto_names_len ==
628 			    noauto_files->noauto_names_max)
629 				fprintf(stderr, PROGNAME "[%d]: %s: "
630 				    "noauto dataset limit (%zu) reached! "
631 				    "Not tracking %s. Please report this to "
632 				    "https://github.com/openzfs/zfs\n",
633 				    getpid(), dataset,
634 				    noauto_files->noauto_names_max, mountfile);
635 			else {
636 				strncpy(noauto_files->noauto_names[
637 				    noauto_files->noauto_names_len],
638 				    mountfile, NAME_MAX);
639 				++noauto_files->noauto_names_len;
640 			}
641 		}
642 	}
643 
644 
645 	FILE *mountfile_f = fopenat(destdir_fd, mountfile,
646 	    O_WRONLY | O_CREAT | O_TRUNC | O_CLOEXEC, "w", 0644);
647 	if (strcmp(p_canmount, "on") == 0)
648 		sem_post(&noauto_files->noauto_names_sem);
649 	if (!mountfile_f) {
650 		fprintf(stderr,
651 		    PROGNAME "[%d]: %s: couldn't open %s under %s: %s\n",
652 		    getpid(), dataset, mountfile, destdir, strerror(errno));
653 		return (1);
654 	}
655 
656 	fprintf(mountfile_f,
657 	    OUTPUT_HEADER
658 	    "[Unit]\n"
659 	    "SourcePath=" FSLIST "/%s\n"
660 	    "Documentation=man:zfs-mount-generator(8)\n"
661 	    "\n"
662 	    "Before=",
663 	    cachefile);
664 
665 	if (p_systemd_before)
666 		fprintf(mountfile_f, "%s ", p_systemd_before);
667 	fprintf(mountfile_f, "zfs-mount.service"); /* Ensures we don't race */
668 	if (requiredby)
669 		fprintf(mountfile_f, " %s", requiredby);
670 	if (wantedby && wantedby_append)
671 		fprintf(mountfile_f, " %s", wantedby);
672 
673 	fprintf(mountfile_f,
674 	    "\n"
675 	    "After=");
676 	if (p_systemd_after)
677 		fprintf(mountfile_f, "%s ", p_systemd_after);
678 	fprintf(mountfile_f, "%s\n", after);
679 
680 	fprintf(mountfile_f, "Wants=%s\n", wants);
681 
682 	if (bindsto)
683 		fprintf(mountfile_f, "BindsTo=%s\n", bindsto);
684 	if (p_systemd_requires)
685 		fprintf(mountfile_f, "Requires=%s\n", p_systemd_requires);
686 	if (p_systemd_requiresmountsfor)
687 		fprintf(mountfile_f,
688 		    "RequiresMountsFor=%s\n", p_systemd_requiresmountsfor);
689 
690 	fprintf(mountfile_f,
691 	    "\n"
692 	    "[Mount]\n"
693 	    "Where=%s\n"
694 	    "What=%s\n"
695 	    "Type=zfs\n"
696 	    "Options=defaults%s,zfsutil\n",
697 	    p_mountpoint, dataset, opts);
698 
699 	(void) fclose(mountfile_f);
700 
701 	if (!requiredby && !wantedby)
702 		return (0);
703 
704 	/* Finally, create the appropriate dependencies */
705 	char *linktgt;
706 	if (asprintf(&linktgt, "../%s", mountfile) == -1)
707 		EXIT_ENOMEM();
708 
709 	char *dependencies[][2] = {
710 		{"wants", wantedby},
711 		{"requires", requiredby},
712 		{}
713 	};
714 	for (__typeof__(&*dependencies) dep = &*dependencies; **dep; ++dep) {
715 		if (!(*dep)[1])
716 			continue;
717 
718 		for (char *reqby = strtok_r((*dep)[1], " ", &toktmp);
719 		    reqby;
720 		    reqby = strtok_r(NULL, " ", &toktmp)) {
721 			char *depdir;
722 			if (asprintf(&depdir, "%s.%s", reqby, (*dep)[0]) == -1)
723 				EXIT_ENOMEM();
724 
725 			(void) mkdirat(destdir_fd, depdir, 0755);
726 			int depdir_fd = openat(destdir_fd, depdir,
727 			    O_PATH | O_DIRECTORY | O_CLOEXEC);
728 			if (depdir_fd < 0) {
729 				fprintf(stderr, PROGNAME "[%d]: %s: "
730 				    "couldn't open %s under %s: %s\n",
731 				    getpid(), dataset, depdir, destdir,
732 				    strerror(errno));
733 				free(depdir);
734 				continue;
735 			}
736 
737 			if (symlinkat(linktgt, depdir_fd, mountfile) == -1)
738 				fprintf(stderr, PROGNAME "[%d]: %s: "
739 				    "couldn't symlink at "
740 				    "%s under %s under %s: %s\n",
741 				    getpid(), dataset, mountfile,
742 				    depdir, destdir, strerror(errno));
743 
744 			(void) close(depdir_fd);
745 			free(depdir);
746 		}
747 	}
748 
749 	return (0);
750 }
751 
752 
753 static int
754 pool_enumerator(zpool_handle_t *pool, void *data __attribute__((unused)))
755 {
756 	int ret = 0;
757 
758 	/*
759 	 * Pools are guaranteed-unique by the kernel,
760 	 * no risk of leaking dupes here
761 	 */
762 	char *name = strdup(zpool_get_name(pool));
763 	if (!name || !tsearch(name, &known_pools, STRCMP)) {
764 		free(name);
765 		ret = ENOMEM;
766 	}
767 
768 	zpool_close(pool);
769 	return (ret);
770 }
771 
772 int
773 main(int argc, char **argv)
774 {
775 	struct timespec time_init = {};
776 	clock_gettime(CLOCK_MONOTONIC_RAW, &time_init);
777 
778 	{
779 		int kmfd = open("/dev/kmsg", O_WRONLY | O_CLOEXEC);
780 		if (kmfd >= 0) {
781 			(void) dup2(kmfd, STDERR_FILENO);
782 			(void) close(kmfd);
783 		}
784 	}
785 
786 	uint8_t debug = 0;
787 
788 	argv0 = argv[0];
789 	switch (argc) {
790 	case 1:
791 		/* Use default */
792 		break;
793 	case 2:
794 	case 4:
795 		destdir = argv[1];
796 		break;
797 	default:
798 		fprintf(stderr,
799 		    PROGNAME "[%d]: wrong argument count: %d\n",
800 		    getpid(), argc - 1);
801 		_exit(1);
802 	}
803 
804 	{
805 		destdir_fd = open(destdir, O_PATH | O_DIRECTORY | O_CLOEXEC);
806 		if (destdir_fd < 0) {
807 			fprintf(stderr, PROGNAME "[%d]: "
808 			    "can't open destination directory %s: %s\n",
809 			    getpid(), destdir, strerror(errno));
810 			_exit(1);
811 		}
812 	}
813 
814 	DIR *fslist_dir = opendir(FSLIST);
815 	if (!fslist_dir) {
816 		if (errno != ENOENT)
817 			fprintf(stderr,
818 			    PROGNAME "[%d]: couldn't open " FSLIST ": %s\n",
819 			    getpid(), strerror(errno));
820 		_exit(0);
821 	}
822 
823 	{
824 		libzfs_handle_t *libzfs = libzfs_init();
825 		if (libzfs) {
826 			if (zpool_iter(libzfs, pool_enumerator, NULL) != 0)
827 				fprintf(stderr, PROGNAME "[%d]: "
828 				    "error listing pools, ignoring\n",
829 				    getpid());
830 			libzfs_fini(libzfs);
831 		} else
832 			fprintf(stderr, PROGNAME "[%d]: "
833 			    "couldn't start libzfs, ignoring\n",
834 			    getpid());
835 	}
836 
837 	{
838 		int regerr = regcomp(&uri_regex, URI_REGEX_S, 0);
839 		if (regerr != 0) {
840 			fprintf(stderr,
841 			    PROGNAME "[%d]: invalid regex: %d\n",
842 			    getpid(), regerr);
843 			_exit(1);
844 		}
845 	}
846 
847 	{
848 		/*
849 		 * We could just get a gigabyte here and Not Care,
850 		 * but if vm.overcommit_memory=2, then MAP_NORESERVE is ignored
851 		 * and we'd try (and likely fail) to rip it out of swap
852 		 */
853 		noauto_files = mmap(NULL, 4 * 1024 * 1024,
854 		    PROT_READ | PROT_WRITE,
855 		    MAP_SHARED | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0);
856 		if (noauto_files == MAP_FAILED) {
857 			fprintf(stderr,
858 			    PROGNAME "[%d]: couldn't allocate IPC region: %s\n",
859 			    getpid(), strerror(errno));
860 			_exit(1);
861 		}
862 
863 		sem_init(&noauto_files->noauto_not_on_sem, true, 0);
864 		sem_init(&noauto_files->noauto_names_sem, true, 1);
865 		noauto_files->noauto_names_len = 0;
866 		/* Works out to 16447ish, *well* enough */
867 		noauto_files->noauto_names_max =
868 		    (4 * 1024 * 1024 - sizeof (*noauto_files)) / NAME_MAX;
869 	}
870 
871 	char *line = NULL;
872 	size_t linelen = 0;
873 	struct timespec time_start = {};
874 	{
875 		const char *dbgenv = getenv("ZFS_DEBUG");
876 		if (dbgenv)
877 			debug = atoi(dbgenv);
878 		else {
879 			FILE *cmdline = fopen("/proc/cmdline", "re");
880 			if (cmdline != NULL) {
881 				if (getline(&line, &linelen, cmdline) >= 0)
882 					debug = strstr(line, "debug") ? 2 : 0;
883 				(void) fclose(cmdline);
884 			}
885 		}
886 
887 		if (debug && !isatty(STDOUT_FILENO))
888 			dup2(STDERR_FILENO, STDOUT_FILENO);
889 	}
890 
891 	size_t forked_canmount_on = 0;
892 	size_t forked_canmount_not_on = 0;
893 	size_t canmount_on_pids_len = 128;
894 	pid_t *canmount_on_pids =
895 	    malloc(canmount_on_pids_len * sizeof (*canmount_on_pids));
896 	if (canmount_on_pids == NULL)
897 		canmount_on_pids_len = 0;
898 
899 	if (debug)
900 		clock_gettime(CLOCK_MONOTONIC_RAW, &time_start);
901 
902 	ssize_t read;
903 	pid_t pid;
904 	struct dirent *cachent;
905 	while ((cachent = readdir(fslist_dir)) != NULL) {
906 		if (strcmp(cachent->d_name, ".") == 0 ||
907 		    strcmp(cachent->d_name, "..") == 0)
908 			continue;
909 
910 		FILE *cachefile = fopenat(dirfd(fslist_dir), cachent->d_name,
911 		    O_RDONLY | O_CLOEXEC, "r", 0);
912 		if (!cachefile) {
913 			fprintf(stderr, PROGNAME "[%d]: "
914 			    "couldn't open %s under " FSLIST ": %s\n",
915 			    getpid(), cachent->d_name, strerror(errno));
916 			continue;
917 		}
918 
919 		while ((read = getline(&line, &linelen, cachefile)) >= 0) {
920 			line[read - 1] = '\0'; /* newline */
921 
922 			switch (pid = fork()) {
923 			case -1:
924 				fprintf(stderr,
925 				    PROGNAME "[%d]: couldn't fork for %s: %s\n",
926 				    getpid(), line, strerror(errno));
927 				break;
928 			case 0: /* child */
929 				_exit(line_worker(line, cachent->d_name));
930 			default: { /* parent */
931 				char *tmp;
932 				char *dset = strtok_r(line, "\t", &tmp);
933 				strtok_r(NULL, "\t", &tmp);
934 				char *canmount = strtok_r(NULL, "\t", &tmp);
935 				bool canmount_on =
936 				    canmount && strncmp(canmount, "on", 2) == 0;
937 
938 				if (debug >= 2)
939 					printf(PROGNAME ": forked %d, "
940 					    "canmount_on=%d, dataset=%s\n",
941 					    (int)pid, canmount_on, dset);
942 
943 				if (canmount_on &&
944 				    forked_canmount_on ==
945 				    canmount_on_pids_len) {
946 					size_t new_len =
947 					    (canmount_on_pids_len ?: 16) * 2;
948 					void *new_pidlist =
949 					    realloc(canmount_on_pids,
950 					    new_len *
951 					    sizeof (*canmount_on_pids));
952 					if (!new_pidlist) {
953 						fprintf(stderr,
954 						    PROGNAME "[%d]: "
955 						    "out of memory! "
956 						    "Mount ordering may be "
957 						    "affected.\n", getpid());
958 						continue;
959 					}
960 
961 					canmount_on_pids = new_pidlist;
962 					canmount_on_pids_len = new_len;
963 				}
964 
965 				if (canmount_on) {
966 					canmount_on_pids[forked_canmount_on] =
967 					    pid;
968 					++forked_canmount_on;
969 				} else
970 					++forked_canmount_not_on;
971 				break;
972 			}
973 			}
974 		}
975 
976 		(void) fclose(cachefile);
977 	}
978 	free(line);
979 
980 	if (forked_canmount_on == 0) {
981 		/* No canmount=on processes to finish, so don't deadlock here */
982 		for (size_t i = 0; i < forked_canmount_not_on; ++i)
983 			sem_post(&noauto_files->noauto_not_on_sem);
984 	} else {
985 		/* Likely a no-op, since we got these from a narrow fork loop */
986 		qsort(canmount_on_pids, forked_canmount_on,
987 		    sizeof (*canmount_on_pids), PID_T_CMP);
988 	}
989 
990 	int status, ret = 0;
991 	struct rusage usage;
992 	size_t forked_canmount_on_max = forked_canmount_on;
993 	while ((pid = wait4(-1, &status, 0, &usage)) != -1) {
994 		ret |= WEXITSTATUS(status) | WTERMSIG(status);
995 
996 		if (forked_canmount_on != 0) {
997 			if (bsearch(&pid, canmount_on_pids,
998 			    forked_canmount_on_max, sizeof (*canmount_on_pids),
999 			    PID_T_CMP))
1000 				--forked_canmount_on;
1001 
1002 			if (forked_canmount_on == 0) {
1003 				/*
1004 				 * All canmount=on processes have finished,
1005 				 * let all the lower-priority ones finish now
1006 				 */
1007 				for (size_t i = 0;
1008 				    i < forked_canmount_not_on; ++i)
1009 					sem_post(
1010 					    &noauto_files->noauto_not_on_sem);
1011 			}
1012 		}
1013 
1014 		if (debug >= 2)
1015 			printf(PROGNAME ": %d done, user=%llu.%06us, "
1016 			    "system=%llu.%06us, maxrss=%ldB, ex=0x%x\n",
1017 			    (int)pid,
1018 			    (unsigned long long) usage.ru_utime.tv_sec,
1019 			    (unsigned int) usage.ru_utime.tv_usec,
1020 			    (unsigned long long) usage.ru_stime.tv_sec,
1021 			    (unsigned int) usage.ru_stime.tv_usec,
1022 			    usage.ru_maxrss * 1024, status);
1023 	}
1024 
1025 	if (debug) {
1026 		struct timespec time_end = {};
1027 		clock_gettime(CLOCK_MONOTONIC_RAW, &time_end);
1028 
1029 		getrusage(RUSAGE_SELF, &usage);
1030 		printf(
1031 		    "\n"
1032 		    PROGNAME ": self    : "
1033 		    "user=%llu.%06us, system=%llu.%06us, maxrss=%ldB\n",
1034 		    (unsigned long long) usage.ru_utime.tv_sec,
1035 		    (unsigned int) usage.ru_utime.tv_usec,
1036 		    (unsigned long long) usage.ru_stime.tv_sec,
1037 		    (unsigned int) usage.ru_stime.tv_usec,
1038 		    usage.ru_maxrss * 1024);
1039 
1040 		getrusage(RUSAGE_CHILDREN, &usage);
1041 		printf(PROGNAME ": children: "
1042 		    "user=%llu.%06us, system=%llu.%06us, maxrss=%ldB\n",
1043 		    (unsigned long long) usage.ru_utime.tv_sec,
1044 		    (unsigned int) usage.ru_utime.tv_usec,
1045 		    (unsigned long long) usage.ru_stime.tv_sec,
1046 		    (unsigned int) usage.ru_stime.tv_usec,
1047 		    usage.ru_maxrss * 1024);
1048 
1049 		if (time_start.tv_nsec > time_end.tv_nsec) {
1050 			time_end.tv_nsec =
1051 			    1000000000 + time_end.tv_nsec - time_start.tv_nsec;
1052 			time_end.tv_sec -= 1;
1053 		} else
1054 			time_end.tv_nsec -= time_start.tv_nsec;
1055 		time_end.tv_sec -= time_start.tv_sec;
1056 
1057 		if (time_init.tv_nsec > time_start.tv_nsec) {
1058 			time_start.tv_nsec =
1059 			    1000000000 + time_start.tv_nsec - time_init.tv_nsec;
1060 			time_start.tv_sec -= 1;
1061 		} else
1062 			time_start.tv_nsec -= time_init.tv_nsec;
1063 		time_start.tv_sec -= time_init.tv_sec;
1064 
1065 		time_init.tv_nsec = time_start.tv_nsec + time_end.tv_nsec;
1066 		time_init.tv_sec =
1067 		    time_start.tv_sec + time_end.tv_sec +
1068 		    time_init.tv_nsec / 1000000000;
1069 		time_init.tv_nsec %= 1000000000;
1070 
1071 		printf(PROGNAME ": wall    : "
1072 		    "total=%llu.%09llus = "
1073 		    "init=%llu.%09llus + real=%llu.%09llus\n",
1074 		    (unsigned long long) time_init.tv_sec,
1075 		    (unsigned long long) time_init.tv_nsec,
1076 		    (unsigned long long) time_start.tv_sec,
1077 		    (unsigned long long) time_start.tv_nsec,
1078 		    (unsigned long long) time_end.tv_sec,
1079 		    (unsigned long long) time_end.tv_nsec);
1080 	}
1081 
1082 	_exit(ret);
1083 }
1084