xref: /freebsd/sys/compat/linux/linux_mib.c (revision 6fa42b91ca3f481912af98c4d49c44507eb1b8e1)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 1999 Marcel Moolenaar
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include <sys/param.h>
30 #include <sys/lock.h>
31 #include <sys/malloc.h>
32 #include <sys/mount.h>
33 #include <sys/jail.h>
34 #include <sys/proc.h>
35 #include <sys/sx.h>
36 
37 #include <compat/linux/linux_mib.h>
38 #include <compat/linux/linux_misc.h>
39 
40 struct linux_prison {
41 	char	pr_osname[LINUX_MAX_UTSNAME];
42 	char	pr_osrelease[LINUX_MAX_UTSNAME];
43 	int	pr_oss_version;
44 	int	pr_osrel;
45 };
46 
47 static struct linux_prison lprison0 = {
48 	.pr_osname =		"Linux",
49 	.pr_osrelease =		LINUX_VERSION_STR,
50 	.pr_oss_version =	0x030600,
51 	.pr_osrel =		LINUX_VERSION_CODE
52 };
53 
54 static unsigned linux_osd_jail_slot;
55 
56 SYSCTL_NODE(_compat, OID_AUTO, linux, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
57     "Linux mode");
58 
59 int linux_debug = 3;
60 SYSCTL_INT(_compat_linux, OID_AUTO, debug, CTLFLAG_RWTUN,
61     &linux_debug, 0, "Log warnings from linux(4); or 0 to disable");
62 
63 int linux_default_openfiles = 1024;
64 SYSCTL_INT(_compat_linux, OID_AUTO, default_openfiles, CTLFLAG_RWTUN,
65     &linux_default_openfiles, 0,
66     "Default soft openfiles resource limit, or -1 for unlimited");
67 
68 int linux_default_stacksize = 8 * 1024 * 1024;
69 SYSCTL_INT(_compat_linux, OID_AUTO, default_stacksize, CTLFLAG_RWTUN,
70     &linux_default_stacksize, 0,
71     "Default soft stack size resource limit, or -1 for unlimited");
72 
73 int linux_dummy_rlimits = 0;
74 SYSCTL_INT(_compat_linux, OID_AUTO, dummy_rlimits, CTLFLAG_RWTUN,
75     &linux_dummy_rlimits, 0,
76     "Return dummy values for unsupported Linux-specific rlimits");
77 
78 int linux_ignore_ip_recverr = 1;
79 SYSCTL_INT(_compat_linux, OID_AUTO, ignore_ip_recverr, CTLFLAG_RWTUN,
80     &linux_ignore_ip_recverr, 0, "Ignore enabling IP_RECVERR");
81 
82 int linux_preserve_vstatus = 1;
83 SYSCTL_INT(_compat_linux, OID_AUTO, preserve_vstatus, CTLFLAG_RWTUN,
84     &linux_preserve_vstatus, 0, "Preserve VSTATUS termios(4) flag");
85 
86 bool linux_map_sched_prio = true;
87 SYSCTL_BOOL(_compat_linux, OID_AUTO, map_sched_prio, CTLFLAG_RDTUN,
88     &linux_map_sched_prio, 0, "Map scheduler priorities to Linux priorities "
89     "(not POSIX compliant)");
90 
91 static bool linux_setid_allowed = true;
92 SYSCTL_BOOL(_compat_linux, OID_AUTO, setid_allowed, CTLFLAG_RWTUN,
93     &linux_setid_allowed, 0,
94     "Allow setuid/setgid on execve of Linux binary");
95 
96 int
97 linux_setid_allowed_query(struct thread *td __unused,
98     struct image_params *imgp __unused)
99 {
100 	return (linux_setid_allowed);
101 }
102 
103 static int	linux_set_osname(struct thread *td, char *osname);
104 static int	linux_set_osrelease(struct thread *td, char *osrelease);
105 static int	linux_set_oss_version(struct thread *td, int oss_version);
106 
107 static int
108 linux_sysctl_osname(SYSCTL_HANDLER_ARGS)
109 {
110 	char osname[LINUX_MAX_UTSNAME];
111 	int error;
112 
113 	linux_get_osname(req->td, osname);
114 	error = sysctl_handle_string(oidp, osname, LINUX_MAX_UTSNAME, req);
115 	if (error != 0 || req->newptr == NULL)
116 		return (error);
117 	error = linux_set_osname(req->td, osname);
118 
119 	return (error);
120 }
121 
122 SYSCTL_PROC(_compat_linux, OID_AUTO, osname,
123 	    CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE,
124 	    0, 0, linux_sysctl_osname, "A",
125 	    "Linux kernel OS name");
126 
127 static int
128 linux_sysctl_osrelease(SYSCTL_HANDLER_ARGS)
129 {
130 	char osrelease[LINUX_MAX_UTSNAME];
131 	int error;
132 
133 	linux_get_osrelease(req->td, osrelease);
134 	error = sysctl_handle_string(oidp, osrelease, LINUX_MAX_UTSNAME, req);
135 	if (error != 0 || req->newptr == NULL)
136 		return (error);
137 	error = linux_set_osrelease(req->td, osrelease);
138 
139 	return (error);
140 }
141 
142 SYSCTL_PROC(_compat_linux, OID_AUTO, osrelease,
143 	    CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE,
144 	    0, 0, linux_sysctl_osrelease, "A",
145 	    "Linux kernel OS release");
146 
147 static int
148 linux_sysctl_oss_version(SYSCTL_HANDLER_ARGS)
149 {
150 	int oss_version;
151 	int error;
152 
153 	oss_version = linux_get_oss_version(req->td);
154 	error = sysctl_handle_int(oidp, &oss_version, 0, req);
155 	if (error != 0 || req->newptr == NULL)
156 		return (error);
157 	error = linux_set_oss_version(req->td, oss_version);
158 
159 	return (error);
160 }
161 
162 SYSCTL_PROC(_compat_linux, OID_AUTO, oss_version,
163 	    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE,
164 	    0, 0, linux_sysctl_oss_version, "I",
165 	    "Linux OSS version");
166 
167 /*
168  * Map the osrelease into integer
169  */
170 static int
171 linux_map_osrel(char *osrelease, int *osrel)
172 {
173 	char *sep, *eosrelease;
174 	int len, v0, v1, v2, v;
175 
176 	len = strlen(osrelease);
177 	eosrelease = osrelease + len;
178 	v0 = strtol(osrelease, &sep, 10);
179 	if (osrelease == sep || sep + 1 >= eosrelease || *sep != '.')
180 		return (EINVAL);
181 	osrelease = sep + 1;
182 	v1 = strtol(osrelease, &sep, 10);
183 	if (osrelease == sep || sep + 1 >= eosrelease || *sep != '.')
184 		return (EINVAL);
185 	osrelease = sep + 1;
186 	v2 = strtol(osrelease, &sep, 10);
187 	if (osrelease == sep ||
188 	    (sep != eosrelease && (sep + 1 >= eosrelease || *sep != '-')))
189 		return (EINVAL);
190 
191 	v = LINUX_KERNVER(v0, v1, v2);
192 	if (v < LINUX_KERNVER(1, 0, 0))
193 		return (EINVAL);
194 
195 	if (osrel != NULL)
196 		*osrel = v;
197 
198 	return (0);
199 }
200 
201 /*
202  * Find a prison with Linux info.
203  * Return the Linux info and the (locked) prison.
204  */
205 static struct linux_prison *
206 linux_find_prison(struct prison *spr, struct prison **prp)
207 {
208 	struct prison *pr;
209 	struct linux_prison *lpr;
210 
211 	for (pr = spr;; pr = pr->pr_parent) {
212 		mtx_lock(&pr->pr_mtx);
213 		lpr = (pr == &prison0)
214 		    ? &lprison0
215 		    : osd_jail_get(pr, linux_osd_jail_slot);
216 		if (lpr != NULL)
217 			break;
218 		mtx_unlock(&pr->pr_mtx);
219 	}
220 	*prp = pr;
221 
222 	return (lpr);
223 }
224 
225 /*
226  * Ensure a prison has its own Linux info.  If lprp is non-null, point it to
227  * the Linux info and lock the prison.
228  */
229 static void
230 linux_alloc_prison(struct prison *pr, struct linux_prison **lprp)
231 {
232 	struct prison *ppr;
233 	struct linux_prison *lpr, *nlpr;
234 	void **rsv;
235 
236 	/* If this prison already has Linux info, return that. */
237 	lpr = linux_find_prison(pr, &ppr);
238 	if (ppr == pr)
239 		goto done;
240 	/*
241 	 * Allocate a new info record.  Then check again, in case something
242 	 * changed during the allocation.
243 	 */
244 	mtx_unlock(&ppr->pr_mtx);
245 	nlpr = malloc(sizeof(struct linux_prison), M_PRISON, M_WAITOK);
246 	rsv = osd_reserve(linux_osd_jail_slot);
247 	lpr = linux_find_prison(pr, &ppr);
248 	if (ppr == pr) {
249 		free(nlpr, M_PRISON);
250 		osd_free_reserved(rsv);
251 		goto done;
252 	}
253 	/* Inherit the initial values from the ancestor. */
254 	mtx_lock(&pr->pr_mtx);
255 	(void)osd_jail_set_reserved(pr, linux_osd_jail_slot, rsv, nlpr);
256 	bcopy(lpr, nlpr, sizeof(*lpr));
257 	lpr = nlpr;
258 	mtx_unlock(&ppr->pr_mtx);
259  done:
260 	if (lprp != NULL)
261 		*lprp = lpr;
262 	else
263 		mtx_unlock(&pr->pr_mtx);
264 }
265 
266 /*
267  * Jail OSD methods for Linux prison data.
268  */
269 static int
270 linux_prison_create(void *obj, void *data)
271 {
272 	struct prison *pr = obj;
273 	struct vfsoptlist *opts = data;
274 	int jsys;
275 
276 	if (vfs_copyopt(opts, "linux", &jsys, sizeof(jsys)) == 0 &&
277 	    jsys == JAIL_SYS_INHERIT)
278 		return (0);
279 	/*
280 	 * Inherit a prison's initial values from its parent
281 	 * (different from JAIL_SYS_INHERIT which also inherits changes).
282 	 */
283 	linux_alloc_prison(pr, NULL);
284 	return (0);
285 }
286 
287 static int
288 linux_prison_check(void *obj __unused, void *data)
289 {
290 	struct vfsoptlist *opts = data;
291 	char *osname, *osrelease;
292 	int error, jsys, len, oss_version;
293 
294 	/* Check that the parameters are correct. */
295 	error = vfs_copyopt(opts, "linux", &jsys, sizeof(jsys));
296 	if (error != ENOENT) {
297 		if (error != 0)
298 			return (error);
299 		if (jsys != JAIL_SYS_NEW && jsys != JAIL_SYS_INHERIT)
300 			return (EINVAL);
301 	}
302 	error = vfs_getopt(opts, "linux.osname", (void **)&osname, &len);
303 	if (error != ENOENT) {
304 		if (error != 0)
305 			return (error);
306 		if (len == 0 || osname[len - 1] != '\0')
307 			return (EINVAL);
308 		if (len > LINUX_MAX_UTSNAME) {
309 			vfs_opterror(opts, "linux.osname too long");
310 			return (ENAMETOOLONG);
311 		}
312 	}
313 	error = vfs_getopt(opts, "linux.osrelease", (void **)&osrelease, &len);
314 	if (error != ENOENT) {
315 		if (error != 0)
316 			return (error);
317 		if (len == 0 || osrelease[len - 1] != '\0')
318 			return (EINVAL);
319 		if (len > LINUX_MAX_UTSNAME) {
320 			vfs_opterror(opts, "linux.osrelease too long");
321 			return (ENAMETOOLONG);
322 		}
323 		error = linux_map_osrel(osrelease, NULL);
324 		if (error != 0) {
325 			vfs_opterror(opts, "linux.osrelease format error");
326 			return (error);
327 		}
328 	}
329 	error = vfs_copyopt(opts, "linux.oss_version", &oss_version,
330 	    sizeof(oss_version));
331 
332 	if (error == ENOENT)
333 		error = 0;
334 	return (error);
335 }
336 
337 static int
338 linux_prison_set(void *obj, void *data)
339 {
340 	struct linux_prison *lpr;
341 	struct prison *pr = obj;
342 	struct vfsoptlist *opts = data;
343 	char *osname, *osrelease;
344 	int error, gotversion, jsys, len, oss_version;
345 
346 	/* Set the parameters, which should be correct. */
347 	error = vfs_copyopt(opts, "linux", &jsys, sizeof(jsys));
348 	if (error == ENOENT)
349 		jsys = -1;
350 	error = vfs_getopt(opts, "linux.osname", (void **)&osname, &len);
351 	if (error == ENOENT)
352 		osname = NULL;
353 	else
354 		jsys = JAIL_SYS_NEW;
355 	error = vfs_getopt(opts, "linux.osrelease", (void **)&osrelease, &len);
356 	if (error == ENOENT)
357 		osrelease = NULL;
358 	else
359 		jsys = JAIL_SYS_NEW;
360 	error = vfs_copyopt(opts, "linux.oss_version", &oss_version,
361 	    sizeof(oss_version));
362 	if (error == ENOENT)
363 		gotversion = 0;
364 	else {
365 		gotversion = 1;
366 		jsys = JAIL_SYS_NEW;
367 	}
368 	switch (jsys) {
369 	case JAIL_SYS_INHERIT:
370 		/* "linux=inherit": inherit the parent's Linux info. */
371 		mtx_lock(&pr->pr_mtx);
372 		osd_jail_del(pr, linux_osd_jail_slot);
373 		mtx_unlock(&pr->pr_mtx);
374 		break;
375 	case JAIL_SYS_NEW:
376 		/*
377 		 * "linux=new" or "linux.*":
378 		 * the prison gets its own Linux info.
379 		 */
380 		linux_alloc_prison(pr, &lpr);
381 		if (osrelease) {
382 			(void)linux_map_osrel(osrelease, &lpr->pr_osrel);
383 			strlcpy(lpr->pr_osrelease, osrelease,
384 			    LINUX_MAX_UTSNAME);
385 		}
386 		if (osname)
387 			strlcpy(lpr->pr_osname, osname, LINUX_MAX_UTSNAME);
388 		if (gotversion)
389 			lpr->pr_oss_version = oss_version;
390 		mtx_unlock(&pr->pr_mtx);
391 	}
392 
393 	return (0);
394 }
395 
396 SYSCTL_JAIL_PARAM_SYS_NODE(linux, CTLFLAG_RW, "Jail Linux parameters");
397 SYSCTL_JAIL_PARAM_STRING(_linux, osname, CTLFLAG_RW, LINUX_MAX_UTSNAME,
398     "Jail Linux kernel OS name");
399 SYSCTL_JAIL_PARAM_STRING(_linux, osrelease, CTLFLAG_RW, LINUX_MAX_UTSNAME,
400     "Jail Linux kernel OS release");
401 SYSCTL_JAIL_PARAM(_linux, oss_version, CTLTYPE_INT | CTLFLAG_RW,
402     "I", "Jail Linux OSS version");
403 
404 static int
405 linux_prison_get(void *obj, void *data)
406 {
407 	struct linux_prison *lpr;
408 	struct prison *ppr;
409 	struct prison *pr = obj;
410 	struct vfsoptlist *opts = data;
411 	int error, i;
412 
413 	static int version0;
414 
415 	/* See if this prison is the one with the Linux info. */
416 	lpr = linux_find_prison(pr, &ppr);
417 	i = (ppr == pr) ? JAIL_SYS_NEW : JAIL_SYS_INHERIT;
418 	error = vfs_setopt(opts, "linux", &i, sizeof(i));
419 	if (error != 0 && error != ENOENT)
420 		goto done;
421 	if (i) {
422 		error = vfs_setopts(opts, "linux.osname", lpr->pr_osname);
423 		if (error != 0 && error != ENOENT)
424 			goto done;
425 		error = vfs_setopts(opts, "linux.osrelease", lpr->pr_osrelease);
426 		if (error != 0 && error != ENOENT)
427 			goto done;
428 		error = vfs_setopt(opts, "linux.oss_version",
429 		    &lpr->pr_oss_version, sizeof(lpr->pr_oss_version));
430 		if (error != 0 && error != ENOENT)
431 			goto done;
432 	} else {
433 		/*
434 		 * If this prison is inheriting its Linux info, report
435 		 * empty/zero parameters.
436 		 */
437 		error = vfs_setopts(opts, "linux.osname", "");
438 		if (error != 0 && error != ENOENT)
439 			goto done;
440 		error = vfs_setopts(opts, "linux.osrelease", "");
441 		if (error != 0 && error != ENOENT)
442 			goto done;
443 		error = vfs_setopt(opts, "linux.oss_version", &version0,
444 		    sizeof(lpr->pr_oss_version));
445 		if (error != 0 && error != ENOENT)
446 			goto done;
447 	}
448 	error = 0;
449 
450  done:
451 	mtx_unlock(&ppr->pr_mtx);
452 
453 	return (error);
454 }
455 
456 static void
457 linux_prison_destructor(void *data)
458 {
459 
460 	free(data, M_PRISON);
461 }
462 
463 void
464 linux_osd_jail_register(void)
465 {
466 	struct prison *pr;
467 	osd_method_t methods[PR_MAXMETHOD] = {
468 	    [PR_METHOD_CREATE] =	linux_prison_create,
469 	    [PR_METHOD_GET] =		linux_prison_get,
470 	    [PR_METHOD_SET] =		linux_prison_set,
471 	    [PR_METHOD_CHECK] =		linux_prison_check
472 	};
473 
474 	linux_osd_jail_slot =
475 	    osd_jail_register(linux_prison_destructor, methods);
476 	/* Copy the system Linux info to any current prisons. */
477 	sx_slock(&allprison_lock);
478 	TAILQ_FOREACH(pr, &allprison, pr_list)
479 		linux_alloc_prison(pr, NULL);
480 	sx_sunlock(&allprison_lock);
481 }
482 
483 void
484 linux_osd_jail_deregister(void)
485 {
486 
487 	osd_jail_deregister(linux_osd_jail_slot);
488 }
489 
490 void
491 linux_get_osname(struct thread *td, char *dst)
492 {
493 	struct prison *pr;
494 	struct linux_prison *lpr;
495 
496 	lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
497 	bcopy(lpr->pr_osname, dst, LINUX_MAX_UTSNAME);
498 	mtx_unlock(&pr->pr_mtx);
499 }
500 
501 static int
502 linux_set_osname(struct thread *td, char *osname)
503 {
504 	struct prison *pr;
505 	struct linux_prison *lpr;
506 
507 	lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
508 	strlcpy(lpr->pr_osname, osname, LINUX_MAX_UTSNAME);
509 	mtx_unlock(&pr->pr_mtx);
510 
511 	return (0);
512 }
513 
514 void
515 linux_get_osrelease(struct thread *td, char *dst)
516 {
517 	struct prison *pr;
518 	struct linux_prison *lpr;
519 
520 	lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
521 	bcopy(lpr->pr_osrelease, dst, LINUX_MAX_UTSNAME);
522 	mtx_unlock(&pr->pr_mtx);
523 }
524 
525 int
526 linux_kernver(struct thread *td)
527 {
528 	struct prison *pr;
529 	struct linux_prison *lpr;
530 	int osrel;
531 
532 	lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
533 	osrel = lpr->pr_osrel;
534 	mtx_unlock(&pr->pr_mtx);
535 
536 	return (osrel);
537 }
538 
539 static int
540 linux_set_osrelease(struct thread *td, char *osrelease)
541 {
542 	struct prison *pr;
543 	struct linux_prison *lpr;
544 	int error;
545 
546 	lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
547 	error = linux_map_osrel(osrelease, &lpr->pr_osrel);
548 	if (error == 0)
549 		strlcpy(lpr->pr_osrelease, osrelease, LINUX_MAX_UTSNAME);
550 	mtx_unlock(&pr->pr_mtx);
551 
552 	return (error);
553 }
554 
555 int
556 linux_get_oss_version(struct thread *td)
557 {
558 	struct prison *pr;
559 	struct linux_prison *lpr;
560 	int version;
561 
562 	lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
563 	version = lpr->pr_oss_version;
564 	mtx_unlock(&pr->pr_mtx);
565 
566 	return (version);
567 }
568 
569 static int
570 linux_set_oss_version(struct thread *td, int oss_version)
571 {
572 	struct prison *pr;
573 	struct linux_prison *lpr;
574 
575 	lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
576 	lpr->pr_oss_version = oss_version;
577 	mtx_unlock(&pr->pr_mtx);
578 
579 	return (0);
580 }
581