xref: /freebsd/sys/compat/linux/linux_mib.c (revision 5e801ac66d24704442eba426ed13c3effb8a34e7)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 1999 Marcel Moolenaar
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include <sys/param.h>
33 #include <sys/kernel.h>
34 #include <sys/sdt.h>
35 #include <sys/systm.h>
36 #include <sys/sysctl.h>
37 #include <sys/proc.h>
38 #include <sys/malloc.h>
39 #include <sys/mount.h>
40 #include <sys/jail.h>
41 #include <sys/lock.h>
42 #include <sys/sx.h>
43 
44 #include <compat/linux/linux_mib.h>
45 #include <compat/linux/linux_misc.h>
46 
47 struct linux_prison {
48 	char	pr_osname[LINUX_MAX_UTSNAME];
49 	char	pr_osrelease[LINUX_MAX_UTSNAME];
50 	int	pr_oss_version;
51 	int	pr_osrel;
52 };
53 
54 static struct linux_prison lprison0 = {
55 	.pr_osname =		"Linux",
56 	.pr_osrelease =		LINUX_VERSION_STR,
57 	.pr_oss_version =	0x030600,
58 	.pr_osrel =		LINUX_VERSION_CODE
59 };
60 
61 static unsigned linux_osd_jail_slot;
62 
63 SYSCTL_NODE(_compat, OID_AUTO, linux, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
64     "Linux mode");
65 
66 int linux_debug = 3;
67 SYSCTL_INT(_compat_linux, OID_AUTO, debug, CTLFLAG_RWTUN,
68     &linux_debug, 0, "Log warnings from linux(4); or 0 to disable");
69 
70 int linux_default_openfiles = 1024;
71 SYSCTL_INT(_compat_linux, OID_AUTO, default_openfiles, CTLFLAG_RWTUN,
72     &linux_default_openfiles, 0,
73     "Default soft openfiles resource limit, or -1 for unlimited");
74 
75 int linux_default_stacksize = 8 * 1024 * 1024;
76 SYSCTL_INT(_compat_linux, OID_AUTO, default_stacksize, CTLFLAG_RWTUN,
77     &linux_default_stacksize, 0,
78     "Default soft stack size resource limit, or -1 for unlimited");
79 
80 int linux_dummy_rlimits = 0;
81 SYSCTL_INT(_compat_linux, OID_AUTO, dummy_rlimits, CTLFLAG_RWTUN,
82     &linux_dummy_rlimits, 0,
83     "Return dummy values for unsupported Linux-specific rlimits");
84 
85 int linux_ignore_ip_recverr = 1;
86 SYSCTL_INT(_compat_linux, OID_AUTO, ignore_ip_recverr, CTLFLAG_RWTUN,
87     &linux_ignore_ip_recverr, 0, "Ignore enabling IP_RECVERR");
88 
89 int linux_preserve_vstatus = 1;
90 SYSCTL_INT(_compat_linux, OID_AUTO, preserve_vstatus, CTLFLAG_RWTUN,
91     &linux_preserve_vstatus, 0, "Preserve VSTATUS termios(4) flag");
92 
93 bool linux_map_sched_prio = true;
94 SYSCTL_BOOL(_compat_linux, OID_AUTO, map_sched_prio, CTLFLAG_RDTUN,
95     &linux_map_sched_prio, 0, "Map scheduler priorities to Linux priorities "
96     "(not POSIX compliant)");
97 
98 int linux_use_emul_path = 1;
99 SYSCTL_INT(_compat_linux, OID_AUTO, use_emul_path, CTLFLAG_RWTUN,
100     &linux_use_emul_path, 0, "Use linux.compat.emul_path");
101 
102 static bool linux_setid_allowed = true;
103 SYSCTL_BOOL(_compat_linux, OID_AUTO, setid_allowed, CTLFLAG_RWTUN,
104     &linux_setid_allowed, 0,
105     "Allow setuid/setgid on execve of Linux binary");
106 
107 int
108 linux_setid_allowed_query(struct thread *td __unused,
109     struct image_params *imgp __unused)
110 {
111 	return (linux_setid_allowed);
112 }
113 
114 static int	linux_set_osname(struct thread *td, char *osname);
115 static int	linux_set_osrelease(struct thread *td, char *osrelease);
116 static int	linux_set_oss_version(struct thread *td, int oss_version);
117 
118 static int
119 linux_sysctl_osname(SYSCTL_HANDLER_ARGS)
120 {
121 	char osname[LINUX_MAX_UTSNAME];
122 	int error;
123 
124 	linux_get_osname(req->td, osname);
125 	error = sysctl_handle_string(oidp, osname, LINUX_MAX_UTSNAME, req);
126 	if (error != 0 || req->newptr == NULL)
127 		return (error);
128 	error = linux_set_osname(req->td, osname);
129 
130 	return (error);
131 }
132 
133 SYSCTL_PROC(_compat_linux, OID_AUTO, osname,
134 	    CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE,
135 	    0, 0, linux_sysctl_osname, "A",
136 	    "Linux kernel OS name");
137 
138 static int
139 linux_sysctl_osrelease(SYSCTL_HANDLER_ARGS)
140 {
141 	char osrelease[LINUX_MAX_UTSNAME];
142 	int error;
143 
144 	linux_get_osrelease(req->td, osrelease);
145 	error = sysctl_handle_string(oidp, osrelease, LINUX_MAX_UTSNAME, req);
146 	if (error != 0 || req->newptr == NULL)
147 		return (error);
148 	error = linux_set_osrelease(req->td, osrelease);
149 
150 	return (error);
151 }
152 
153 SYSCTL_PROC(_compat_linux, OID_AUTO, osrelease,
154 	    CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE,
155 	    0, 0, linux_sysctl_osrelease, "A",
156 	    "Linux kernel OS release");
157 
158 static int
159 linux_sysctl_oss_version(SYSCTL_HANDLER_ARGS)
160 {
161 	int oss_version;
162 	int error;
163 
164 	oss_version = linux_get_oss_version(req->td);
165 	error = sysctl_handle_int(oidp, &oss_version, 0, req);
166 	if (error != 0 || req->newptr == NULL)
167 		return (error);
168 	error = linux_set_oss_version(req->td, oss_version);
169 
170 	return (error);
171 }
172 
173 SYSCTL_PROC(_compat_linux, OID_AUTO, oss_version,
174 	    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE,
175 	    0, 0, linux_sysctl_oss_version, "I",
176 	    "Linux OSS version");
177 
178 /*
179  * Map the osrelease into integer
180  */
181 static int
182 linux_map_osrel(char *osrelease, int *osrel)
183 {
184 	char *sep, *eosrelease;
185 	int len, v0, v1, v2, v;
186 
187 	len = strlen(osrelease);
188 	eosrelease = osrelease + len;
189 	v0 = strtol(osrelease, &sep, 10);
190 	if (osrelease == sep || sep + 1 >= eosrelease || *sep != '.')
191 		return (EINVAL);
192 	osrelease = sep + 1;
193 	v1 = strtol(osrelease, &sep, 10);
194 	if (osrelease == sep || sep + 1 >= eosrelease || *sep != '.')
195 		return (EINVAL);
196 	osrelease = sep + 1;
197 	v2 = strtol(osrelease, &sep, 10);
198 	if (osrelease == sep ||
199 	    (sep != eosrelease && (sep + 1 >= eosrelease || *sep != '-')))
200 		return (EINVAL);
201 
202 	v = LINUX_KERNVER(v0, v1, v2);
203 	if (v < LINUX_KERNVER(1, 0, 0))
204 		return (EINVAL);
205 
206 	if (osrel != NULL)
207 		*osrel = v;
208 
209 	return (0);
210 }
211 
212 /*
213  * Find a prison with Linux info.
214  * Return the Linux info and the (locked) prison.
215  */
216 static struct linux_prison *
217 linux_find_prison(struct prison *spr, struct prison **prp)
218 {
219 	struct prison *pr;
220 	struct linux_prison *lpr;
221 
222 	for (pr = spr;; pr = pr->pr_parent) {
223 		mtx_lock(&pr->pr_mtx);
224 		lpr = (pr == &prison0)
225 		    ? &lprison0
226 		    : osd_jail_get(pr, linux_osd_jail_slot);
227 		if (lpr != NULL)
228 			break;
229 		mtx_unlock(&pr->pr_mtx);
230 	}
231 	*prp = pr;
232 
233 	return (lpr);
234 }
235 
236 /*
237  * Ensure a prison has its own Linux info.  If lprp is non-null, point it to
238  * the Linux info and lock the prison.
239  */
240 static void
241 linux_alloc_prison(struct prison *pr, struct linux_prison **lprp)
242 {
243 	struct prison *ppr;
244 	struct linux_prison *lpr, *nlpr;
245 	void **rsv;
246 
247 	/* If this prison already has Linux info, return that. */
248 	lpr = linux_find_prison(pr, &ppr);
249 	if (ppr == pr)
250 		goto done;
251 	/*
252 	 * Allocate a new info record.  Then check again, in case something
253 	 * changed during the allocation.
254 	 */
255 	mtx_unlock(&ppr->pr_mtx);
256 	nlpr = malloc(sizeof(struct linux_prison), M_PRISON, M_WAITOK);
257 	rsv = osd_reserve(linux_osd_jail_slot);
258 	lpr = linux_find_prison(pr, &ppr);
259 	if (ppr == pr) {
260 		free(nlpr, M_PRISON);
261 		osd_free_reserved(rsv);
262 		goto done;
263 	}
264 	/* Inherit the initial values from the ancestor. */
265 	mtx_lock(&pr->pr_mtx);
266 	(void)osd_jail_set_reserved(pr, linux_osd_jail_slot, rsv, nlpr);
267 	bcopy(lpr, nlpr, sizeof(*lpr));
268 	lpr = nlpr;
269 	mtx_unlock(&ppr->pr_mtx);
270  done:
271 	if (lprp != NULL)
272 		*lprp = lpr;
273 	else
274 		mtx_unlock(&pr->pr_mtx);
275 }
276 
277 /*
278  * Jail OSD methods for Linux prison data.
279  */
280 static int
281 linux_prison_create(void *obj, void *data)
282 {
283 	struct prison *pr = obj;
284 	struct vfsoptlist *opts = data;
285 	int jsys;
286 
287 	if (vfs_copyopt(opts, "linux", &jsys, sizeof(jsys)) == 0 &&
288 	    jsys == JAIL_SYS_INHERIT)
289 		return (0);
290 	/*
291 	 * Inherit a prison's initial values from its parent
292 	 * (different from JAIL_SYS_INHERIT which also inherits changes).
293 	 */
294 	linux_alloc_prison(pr, NULL);
295 	return (0);
296 }
297 
298 static int
299 linux_prison_check(void *obj __unused, void *data)
300 {
301 	struct vfsoptlist *opts = data;
302 	char *osname, *osrelease;
303 	int error, jsys, len, oss_version;
304 
305 	/* Check that the parameters are correct. */
306 	error = vfs_copyopt(opts, "linux", &jsys, sizeof(jsys));
307 	if (error != ENOENT) {
308 		if (error != 0)
309 			return (error);
310 		if (jsys != JAIL_SYS_NEW && jsys != JAIL_SYS_INHERIT)
311 			return (EINVAL);
312 	}
313 	error = vfs_getopt(opts, "linux.osname", (void **)&osname, &len);
314 	if (error != ENOENT) {
315 		if (error != 0)
316 			return (error);
317 		if (len == 0 || osname[len - 1] != '\0')
318 			return (EINVAL);
319 		if (len > LINUX_MAX_UTSNAME) {
320 			vfs_opterror(opts, "linux.osname too long");
321 			return (ENAMETOOLONG);
322 		}
323 	}
324 	error = vfs_getopt(opts, "linux.osrelease", (void **)&osrelease, &len);
325 	if (error != ENOENT) {
326 		if (error != 0)
327 			return (error);
328 		if (len == 0 || osrelease[len - 1] != '\0')
329 			return (EINVAL);
330 		if (len > LINUX_MAX_UTSNAME) {
331 			vfs_opterror(opts, "linux.osrelease too long");
332 			return (ENAMETOOLONG);
333 		}
334 		error = linux_map_osrel(osrelease, NULL);
335 		if (error != 0) {
336 			vfs_opterror(opts, "linux.osrelease format error");
337 			return (error);
338 		}
339 	}
340 	error = vfs_copyopt(opts, "linux.oss_version", &oss_version,
341 	    sizeof(oss_version));
342 
343 	if (error == ENOENT)
344 		error = 0;
345 	return (error);
346 }
347 
348 static int
349 linux_prison_set(void *obj, void *data)
350 {
351 	struct linux_prison *lpr;
352 	struct prison *pr = obj;
353 	struct vfsoptlist *opts = data;
354 	char *osname, *osrelease;
355 	int error, gotversion, jsys, len, oss_version;
356 
357 	/* Set the parameters, which should be correct. */
358 	error = vfs_copyopt(opts, "linux", &jsys, sizeof(jsys));
359 	if (error == ENOENT)
360 		jsys = -1;
361 	error = vfs_getopt(opts, "linux.osname", (void **)&osname, &len);
362 	if (error == ENOENT)
363 		osname = NULL;
364 	else
365 		jsys = JAIL_SYS_NEW;
366 	error = vfs_getopt(opts, "linux.osrelease", (void **)&osrelease, &len);
367 	if (error == ENOENT)
368 		osrelease = NULL;
369 	else
370 		jsys = JAIL_SYS_NEW;
371 	error = vfs_copyopt(opts, "linux.oss_version", &oss_version,
372 	    sizeof(oss_version));
373 	if (error == ENOENT)
374 		gotversion = 0;
375 	else {
376 		gotversion = 1;
377 		jsys = JAIL_SYS_NEW;
378 	}
379 	switch (jsys) {
380 	case JAIL_SYS_INHERIT:
381 		/* "linux=inherit": inherit the parent's Linux info. */
382 		mtx_lock(&pr->pr_mtx);
383 		osd_jail_del(pr, linux_osd_jail_slot);
384 		mtx_unlock(&pr->pr_mtx);
385 		break;
386 	case JAIL_SYS_NEW:
387 		/*
388 		 * "linux=new" or "linux.*":
389 		 * the prison gets its own Linux info.
390 		 */
391 		linux_alloc_prison(pr, &lpr);
392 		if (osrelease) {
393 			(void)linux_map_osrel(osrelease, &lpr->pr_osrel);
394 			strlcpy(lpr->pr_osrelease, osrelease,
395 			    LINUX_MAX_UTSNAME);
396 		}
397 		if (osname)
398 			strlcpy(lpr->pr_osname, osname, LINUX_MAX_UTSNAME);
399 		if (gotversion)
400 			lpr->pr_oss_version = oss_version;
401 		mtx_unlock(&pr->pr_mtx);
402 	}
403 
404 	return (0);
405 }
406 
407 SYSCTL_JAIL_PARAM_SYS_NODE(linux, CTLFLAG_RW, "Jail Linux parameters");
408 SYSCTL_JAIL_PARAM_STRING(_linux, osname, CTLFLAG_RW, LINUX_MAX_UTSNAME,
409     "Jail Linux kernel OS name");
410 SYSCTL_JAIL_PARAM_STRING(_linux, osrelease, CTLFLAG_RW, LINUX_MAX_UTSNAME,
411     "Jail Linux kernel OS release");
412 SYSCTL_JAIL_PARAM(_linux, oss_version, CTLTYPE_INT | CTLFLAG_RW,
413     "I", "Jail Linux OSS version");
414 
415 static int
416 linux_prison_get(void *obj, void *data)
417 {
418 	struct linux_prison *lpr;
419 	struct prison *ppr;
420 	struct prison *pr = obj;
421 	struct vfsoptlist *opts = data;
422 	int error, i;
423 
424 	static int version0;
425 
426 	/* See if this prison is the one with the Linux info. */
427 	lpr = linux_find_prison(pr, &ppr);
428 	i = (ppr == pr) ? JAIL_SYS_NEW : JAIL_SYS_INHERIT;
429 	error = vfs_setopt(opts, "linux", &i, sizeof(i));
430 	if (error != 0 && error != ENOENT)
431 		goto done;
432 	if (i) {
433 		error = vfs_setopts(opts, "linux.osname", lpr->pr_osname);
434 		if (error != 0 && error != ENOENT)
435 			goto done;
436 		error = vfs_setopts(opts, "linux.osrelease", lpr->pr_osrelease);
437 		if (error != 0 && error != ENOENT)
438 			goto done;
439 		error = vfs_setopt(opts, "linux.oss_version",
440 		    &lpr->pr_oss_version, sizeof(lpr->pr_oss_version));
441 		if (error != 0 && error != ENOENT)
442 			goto done;
443 	} else {
444 		/*
445 		 * If this prison is inheriting its Linux info, report
446 		 * empty/zero parameters.
447 		 */
448 		error = vfs_setopts(opts, "linux.osname", "");
449 		if (error != 0 && error != ENOENT)
450 			goto done;
451 		error = vfs_setopts(opts, "linux.osrelease", "");
452 		if (error != 0 && error != ENOENT)
453 			goto done;
454 		error = vfs_setopt(opts, "linux.oss_version", &version0,
455 		    sizeof(lpr->pr_oss_version));
456 		if (error != 0 && error != ENOENT)
457 			goto done;
458 	}
459 	error = 0;
460 
461  done:
462 	mtx_unlock(&ppr->pr_mtx);
463 
464 	return (error);
465 }
466 
467 static void
468 linux_prison_destructor(void *data)
469 {
470 
471 	free(data, M_PRISON);
472 }
473 
474 void
475 linux_osd_jail_register(void)
476 {
477 	struct prison *pr;
478 	osd_method_t methods[PR_MAXMETHOD] = {
479 	    [PR_METHOD_CREATE] =	linux_prison_create,
480 	    [PR_METHOD_GET] =		linux_prison_get,
481 	    [PR_METHOD_SET] =		linux_prison_set,
482 	    [PR_METHOD_CHECK] =		linux_prison_check
483 	};
484 
485 	linux_osd_jail_slot =
486 	    osd_jail_register(linux_prison_destructor, methods);
487 	/* Copy the system Linux info to any current prisons. */
488 	sx_slock(&allprison_lock);
489 	TAILQ_FOREACH(pr, &allprison, pr_list)
490 		linux_alloc_prison(pr, NULL);
491 	sx_sunlock(&allprison_lock);
492 }
493 
494 void
495 linux_osd_jail_deregister(void)
496 {
497 
498 	osd_jail_deregister(linux_osd_jail_slot);
499 }
500 
501 void
502 linux_get_osname(struct thread *td, char *dst)
503 {
504 	struct prison *pr;
505 	struct linux_prison *lpr;
506 
507 	lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
508 	bcopy(lpr->pr_osname, dst, LINUX_MAX_UTSNAME);
509 	mtx_unlock(&pr->pr_mtx);
510 }
511 
512 static int
513 linux_set_osname(struct thread *td, char *osname)
514 {
515 	struct prison *pr;
516 	struct linux_prison *lpr;
517 
518 	lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
519 	strlcpy(lpr->pr_osname, osname, LINUX_MAX_UTSNAME);
520 	mtx_unlock(&pr->pr_mtx);
521 
522 	return (0);
523 }
524 
525 void
526 linux_get_osrelease(struct thread *td, char *dst)
527 {
528 	struct prison *pr;
529 	struct linux_prison *lpr;
530 
531 	lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
532 	bcopy(lpr->pr_osrelease, dst, LINUX_MAX_UTSNAME);
533 	mtx_unlock(&pr->pr_mtx);
534 }
535 
536 int
537 linux_kernver(struct thread *td)
538 {
539 	struct prison *pr;
540 	struct linux_prison *lpr;
541 	int osrel;
542 
543 	lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
544 	osrel = lpr->pr_osrel;
545 	mtx_unlock(&pr->pr_mtx);
546 
547 	return (osrel);
548 }
549 
550 static int
551 linux_set_osrelease(struct thread *td, char *osrelease)
552 {
553 	struct prison *pr;
554 	struct linux_prison *lpr;
555 	int error;
556 
557 	lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
558 	error = linux_map_osrel(osrelease, &lpr->pr_osrel);
559 	if (error == 0)
560 		strlcpy(lpr->pr_osrelease, osrelease, LINUX_MAX_UTSNAME);
561 	mtx_unlock(&pr->pr_mtx);
562 
563 	return (error);
564 }
565 
566 int
567 linux_get_oss_version(struct thread *td)
568 {
569 	struct prison *pr;
570 	struct linux_prison *lpr;
571 	int version;
572 
573 	lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
574 	version = lpr->pr_oss_version;
575 	mtx_unlock(&pr->pr_mtx);
576 
577 	return (version);
578 }
579 
580 static int
581 linux_set_oss_version(struct thread *td, int oss_version)
582 {
583 	struct prison *pr;
584 	struct linux_prison *lpr;
585 
586 	lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
587 	lpr->pr_oss_version = oss_version;
588 	mtx_unlock(&pr->pr_mtx);
589 
590 	return (0);
591 }
592