xref: /freebsd/sys/compat/linux/linux_mib.c (revision 66fd12cf4896eb08ad8e7a2627537f84ead84dd3)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 1999 Marcel Moolenaar
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include <sys/param.h>
33 #include <sys/lock.h>
34 #include <sys/malloc.h>
35 #include <sys/mount.h>
36 #include <sys/jail.h>
37 #include <sys/proc.h>
38 #include <sys/sx.h>
39 
40 #include <compat/linux/linux_mib.h>
41 #include <compat/linux/linux_misc.h>
42 
43 struct linux_prison {
44 	char	pr_osname[LINUX_MAX_UTSNAME];
45 	char	pr_osrelease[LINUX_MAX_UTSNAME];
46 	int	pr_oss_version;
47 	int	pr_osrel;
48 };
49 
50 static struct linux_prison lprison0 = {
51 	.pr_osname =		"Linux",
52 	.pr_osrelease =		LINUX_VERSION_STR,
53 	.pr_oss_version =	0x030600,
54 	.pr_osrel =		LINUX_VERSION_CODE
55 };
56 
57 static unsigned linux_osd_jail_slot;
58 
59 SYSCTL_NODE(_compat, OID_AUTO, linux, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
60     "Linux mode");
61 
62 int linux_debug = 3;
63 SYSCTL_INT(_compat_linux, OID_AUTO, debug, CTLFLAG_RWTUN,
64     &linux_debug, 0, "Log warnings from linux(4); or 0 to disable");
65 
66 int linux_default_openfiles = 1024;
67 SYSCTL_INT(_compat_linux, OID_AUTO, default_openfiles, CTLFLAG_RWTUN,
68     &linux_default_openfiles, 0,
69     "Default soft openfiles resource limit, or -1 for unlimited");
70 
71 int linux_default_stacksize = 8 * 1024 * 1024;
72 SYSCTL_INT(_compat_linux, OID_AUTO, default_stacksize, CTLFLAG_RWTUN,
73     &linux_default_stacksize, 0,
74     "Default soft stack size resource limit, or -1 for unlimited");
75 
76 int linux_dummy_rlimits = 0;
77 SYSCTL_INT(_compat_linux, OID_AUTO, dummy_rlimits, CTLFLAG_RWTUN,
78     &linux_dummy_rlimits, 0,
79     "Return dummy values for unsupported Linux-specific rlimits");
80 
81 int linux_ignore_ip_recverr = 1;
82 SYSCTL_INT(_compat_linux, OID_AUTO, ignore_ip_recverr, CTLFLAG_RWTUN,
83     &linux_ignore_ip_recverr, 0, "Ignore enabling IP_RECVERR");
84 
85 int linux_preserve_vstatus = 1;
86 SYSCTL_INT(_compat_linux, OID_AUTO, preserve_vstatus, CTLFLAG_RWTUN,
87     &linux_preserve_vstatus, 0, "Preserve VSTATUS termios(4) flag");
88 
89 bool linux_map_sched_prio = true;
90 SYSCTL_BOOL(_compat_linux, OID_AUTO, map_sched_prio, CTLFLAG_RDTUN,
91     &linux_map_sched_prio, 0, "Map scheduler priorities to Linux priorities "
92     "(not POSIX compliant)");
93 
94 static bool linux_setid_allowed = true;
95 SYSCTL_BOOL(_compat_linux, OID_AUTO, setid_allowed, CTLFLAG_RWTUN,
96     &linux_setid_allowed, 0,
97     "Allow setuid/setgid on execve of Linux binary");
98 
99 int
100 linux_setid_allowed_query(struct thread *td __unused,
101     struct image_params *imgp __unused)
102 {
103 	return (linux_setid_allowed);
104 }
105 
106 static int	linux_set_osname(struct thread *td, char *osname);
107 static int	linux_set_osrelease(struct thread *td, char *osrelease);
108 static int	linux_set_oss_version(struct thread *td, int oss_version);
109 
110 static int
111 linux_sysctl_osname(SYSCTL_HANDLER_ARGS)
112 {
113 	char osname[LINUX_MAX_UTSNAME];
114 	int error;
115 
116 	linux_get_osname(req->td, osname);
117 	error = sysctl_handle_string(oidp, osname, LINUX_MAX_UTSNAME, req);
118 	if (error != 0 || req->newptr == NULL)
119 		return (error);
120 	error = linux_set_osname(req->td, osname);
121 
122 	return (error);
123 }
124 
125 SYSCTL_PROC(_compat_linux, OID_AUTO, osname,
126 	    CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE,
127 	    0, 0, linux_sysctl_osname, "A",
128 	    "Linux kernel OS name");
129 
130 static int
131 linux_sysctl_osrelease(SYSCTL_HANDLER_ARGS)
132 {
133 	char osrelease[LINUX_MAX_UTSNAME];
134 	int error;
135 
136 	linux_get_osrelease(req->td, osrelease);
137 	error = sysctl_handle_string(oidp, osrelease, LINUX_MAX_UTSNAME, req);
138 	if (error != 0 || req->newptr == NULL)
139 		return (error);
140 	error = linux_set_osrelease(req->td, osrelease);
141 
142 	return (error);
143 }
144 
145 SYSCTL_PROC(_compat_linux, OID_AUTO, osrelease,
146 	    CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE,
147 	    0, 0, linux_sysctl_osrelease, "A",
148 	    "Linux kernel OS release");
149 
150 static int
151 linux_sysctl_oss_version(SYSCTL_HANDLER_ARGS)
152 {
153 	int oss_version;
154 	int error;
155 
156 	oss_version = linux_get_oss_version(req->td);
157 	error = sysctl_handle_int(oidp, &oss_version, 0, req);
158 	if (error != 0 || req->newptr == NULL)
159 		return (error);
160 	error = linux_set_oss_version(req->td, oss_version);
161 
162 	return (error);
163 }
164 
165 SYSCTL_PROC(_compat_linux, OID_AUTO, oss_version,
166 	    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE,
167 	    0, 0, linux_sysctl_oss_version, "I",
168 	    "Linux OSS version");
169 
170 /*
171  * Map the osrelease into integer
172  */
173 static int
174 linux_map_osrel(char *osrelease, int *osrel)
175 {
176 	char *sep, *eosrelease;
177 	int len, v0, v1, v2, v;
178 
179 	len = strlen(osrelease);
180 	eosrelease = osrelease + len;
181 	v0 = strtol(osrelease, &sep, 10);
182 	if (osrelease == sep || sep + 1 >= eosrelease || *sep != '.')
183 		return (EINVAL);
184 	osrelease = sep + 1;
185 	v1 = strtol(osrelease, &sep, 10);
186 	if (osrelease == sep || sep + 1 >= eosrelease || *sep != '.')
187 		return (EINVAL);
188 	osrelease = sep + 1;
189 	v2 = strtol(osrelease, &sep, 10);
190 	if (osrelease == sep ||
191 	    (sep != eosrelease && (sep + 1 >= eosrelease || *sep != '-')))
192 		return (EINVAL);
193 
194 	v = LINUX_KERNVER(v0, v1, v2);
195 	if (v < LINUX_KERNVER(1, 0, 0))
196 		return (EINVAL);
197 
198 	if (osrel != NULL)
199 		*osrel = v;
200 
201 	return (0);
202 }
203 
204 /*
205  * Find a prison with Linux info.
206  * Return the Linux info and the (locked) prison.
207  */
208 static struct linux_prison *
209 linux_find_prison(struct prison *spr, struct prison **prp)
210 {
211 	struct prison *pr;
212 	struct linux_prison *lpr;
213 
214 	for (pr = spr;; pr = pr->pr_parent) {
215 		mtx_lock(&pr->pr_mtx);
216 		lpr = (pr == &prison0)
217 		    ? &lprison0
218 		    : osd_jail_get(pr, linux_osd_jail_slot);
219 		if (lpr != NULL)
220 			break;
221 		mtx_unlock(&pr->pr_mtx);
222 	}
223 	*prp = pr;
224 
225 	return (lpr);
226 }
227 
228 /*
229  * Ensure a prison has its own Linux info.  If lprp is non-null, point it to
230  * the Linux info and lock the prison.
231  */
232 static void
233 linux_alloc_prison(struct prison *pr, struct linux_prison **lprp)
234 {
235 	struct prison *ppr;
236 	struct linux_prison *lpr, *nlpr;
237 	void **rsv;
238 
239 	/* If this prison already has Linux info, return that. */
240 	lpr = linux_find_prison(pr, &ppr);
241 	if (ppr == pr)
242 		goto done;
243 	/*
244 	 * Allocate a new info record.  Then check again, in case something
245 	 * changed during the allocation.
246 	 */
247 	mtx_unlock(&ppr->pr_mtx);
248 	nlpr = malloc(sizeof(struct linux_prison), M_PRISON, M_WAITOK);
249 	rsv = osd_reserve(linux_osd_jail_slot);
250 	lpr = linux_find_prison(pr, &ppr);
251 	if (ppr == pr) {
252 		free(nlpr, M_PRISON);
253 		osd_free_reserved(rsv);
254 		goto done;
255 	}
256 	/* Inherit the initial values from the ancestor. */
257 	mtx_lock(&pr->pr_mtx);
258 	(void)osd_jail_set_reserved(pr, linux_osd_jail_slot, rsv, nlpr);
259 	bcopy(lpr, nlpr, sizeof(*lpr));
260 	lpr = nlpr;
261 	mtx_unlock(&ppr->pr_mtx);
262  done:
263 	if (lprp != NULL)
264 		*lprp = lpr;
265 	else
266 		mtx_unlock(&pr->pr_mtx);
267 }
268 
269 /*
270  * Jail OSD methods for Linux prison data.
271  */
272 static int
273 linux_prison_create(void *obj, void *data)
274 {
275 	struct prison *pr = obj;
276 	struct vfsoptlist *opts = data;
277 	int jsys;
278 
279 	if (vfs_copyopt(opts, "linux", &jsys, sizeof(jsys)) == 0 &&
280 	    jsys == JAIL_SYS_INHERIT)
281 		return (0);
282 	/*
283 	 * Inherit a prison's initial values from its parent
284 	 * (different from JAIL_SYS_INHERIT which also inherits changes).
285 	 */
286 	linux_alloc_prison(pr, NULL);
287 	return (0);
288 }
289 
290 static int
291 linux_prison_check(void *obj __unused, void *data)
292 {
293 	struct vfsoptlist *opts = data;
294 	char *osname, *osrelease;
295 	int error, jsys, len, oss_version;
296 
297 	/* Check that the parameters are correct. */
298 	error = vfs_copyopt(opts, "linux", &jsys, sizeof(jsys));
299 	if (error != ENOENT) {
300 		if (error != 0)
301 			return (error);
302 		if (jsys != JAIL_SYS_NEW && jsys != JAIL_SYS_INHERIT)
303 			return (EINVAL);
304 	}
305 	error = vfs_getopt(opts, "linux.osname", (void **)&osname, &len);
306 	if (error != ENOENT) {
307 		if (error != 0)
308 			return (error);
309 		if (len == 0 || osname[len - 1] != '\0')
310 			return (EINVAL);
311 		if (len > LINUX_MAX_UTSNAME) {
312 			vfs_opterror(opts, "linux.osname too long");
313 			return (ENAMETOOLONG);
314 		}
315 	}
316 	error = vfs_getopt(opts, "linux.osrelease", (void **)&osrelease, &len);
317 	if (error != ENOENT) {
318 		if (error != 0)
319 			return (error);
320 		if (len == 0 || osrelease[len - 1] != '\0')
321 			return (EINVAL);
322 		if (len > LINUX_MAX_UTSNAME) {
323 			vfs_opterror(opts, "linux.osrelease too long");
324 			return (ENAMETOOLONG);
325 		}
326 		error = linux_map_osrel(osrelease, NULL);
327 		if (error != 0) {
328 			vfs_opterror(opts, "linux.osrelease format error");
329 			return (error);
330 		}
331 	}
332 	error = vfs_copyopt(opts, "linux.oss_version", &oss_version,
333 	    sizeof(oss_version));
334 
335 	if (error == ENOENT)
336 		error = 0;
337 	return (error);
338 }
339 
340 static int
341 linux_prison_set(void *obj, void *data)
342 {
343 	struct linux_prison *lpr;
344 	struct prison *pr = obj;
345 	struct vfsoptlist *opts = data;
346 	char *osname, *osrelease;
347 	int error, gotversion, jsys, len, oss_version;
348 
349 	/* Set the parameters, which should be correct. */
350 	error = vfs_copyopt(opts, "linux", &jsys, sizeof(jsys));
351 	if (error == ENOENT)
352 		jsys = -1;
353 	error = vfs_getopt(opts, "linux.osname", (void **)&osname, &len);
354 	if (error == ENOENT)
355 		osname = NULL;
356 	else
357 		jsys = JAIL_SYS_NEW;
358 	error = vfs_getopt(opts, "linux.osrelease", (void **)&osrelease, &len);
359 	if (error == ENOENT)
360 		osrelease = NULL;
361 	else
362 		jsys = JAIL_SYS_NEW;
363 	error = vfs_copyopt(opts, "linux.oss_version", &oss_version,
364 	    sizeof(oss_version));
365 	if (error == ENOENT)
366 		gotversion = 0;
367 	else {
368 		gotversion = 1;
369 		jsys = JAIL_SYS_NEW;
370 	}
371 	switch (jsys) {
372 	case JAIL_SYS_INHERIT:
373 		/* "linux=inherit": inherit the parent's Linux info. */
374 		mtx_lock(&pr->pr_mtx);
375 		osd_jail_del(pr, linux_osd_jail_slot);
376 		mtx_unlock(&pr->pr_mtx);
377 		break;
378 	case JAIL_SYS_NEW:
379 		/*
380 		 * "linux=new" or "linux.*":
381 		 * the prison gets its own Linux info.
382 		 */
383 		linux_alloc_prison(pr, &lpr);
384 		if (osrelease) {
385 			(void)linux_map_osrel(osrelease, &lpr->pr_osrel);
386 			strlcpy(lpr->pr_osrelease, osrelease,
387 			    LINUX_MAX_UTSNAME);
388 		}
389 		if (osname)
390 			strlcpy(lpr->pr_osname, osname, LINUX_MAX_UTSNAME);
391 		if (gotversion)
392 			lpr->pr_oss_version = oss_version;
393 		mtx_unlock(&pr->pr_mtx);
394 	}
395 
396 	return (0);
397 }
398 
399 SYSCTL_JAIL_PARAM_SYS_NODE(linux, CTLFLAG_RW, "Jail Linux parameters");
400 SYSCTL_JAIL_PARAM_STRING(_linux, osname, CTLFLAG_RW, LINUX_MAX_UTSNAME,
401     "Jail Linux kernel OS name");
402 SYSCTL_JAIL_PARAM_STRING(_linux, osrelease, CTLFLAG_RW, LINUX_MAX_UTSNAME,
403     "Jail Linux kernel OS release");
404 SYSCTL_JAIL_PARAM(_linux, oss_version, CTLTYPE_INT | CTLFLAG_RW,
405     "I", "Jail Linux OSS version");
406 
407 static int
408 linux_prison_get(void *obj, void *data)
409 {
410 	struct linux_prison *lpr;
411 	struct prison *ppr;
412 	struct prison *pr = obj;
413 	struct vfsoptlist *opts = data;
414 	int error, i;
415 
416 	static int version0;
417 
418 	/* See if this prison is the one with the Linux info. */
419 	lpr = linux_find_prison(pr, &ppr);
420 	i = (ppr == pr) ? JAIL_SYS_NEW : JAIL_SYS_INHERIT;
421 	error = vfs_setopt(opts, "linux", &i, sizeof(i));
422 	if (error != 0 && error != ENOENT)
423 		goto done;
424 	if (i) {
425 		error = vfs_setopts(opts, "linux.osname", lpr->pr_osname);
426 		if (error != 0 && error != ENOENT)
427 			goto done;
428 		error = vfs_setopts(opts, "linux.osrelease", lpr->pr_osrelease);
429 		if (error != 0 && error != ENOENT)
430 			goto done;
431 		error = vfs_setopt(opts, "linux.oss_version",
432 		    &lpr->pr_oss_version, sizeof(lpr->pr_oss_version));
433 		if (error != 0 && error != ENOENT)
434 			goto done;
435 	} else {
436 		/*
437 		 * If this prison is inheriting its Linux info, report
438 		 * empty/zero parameters.
439 		 */
440 		error = vfs_setopts(opts, "linux.osname", "");
441 		if (error != 0 && error != ENOENT)
442 			goto done;
443 		error = vfs_setopts(opts, "linux.osrelease", "");
444 		if (error != 0 && error != ENOENT)
445 			goto done;
446 		error = vfs_setopt(opts, "linux.oss_version", &version0,
447 		    sizeof(lpr->pr_oss_version));
448 		if (error != 0 && error != ENOENT)
449 			goto done;
450 	}
451 	error = 0;
452 
453  done:
454 	mtx_unlock(&ppr->pr_mtx);
455 
456 	return (error);
457 }
458 
459 static void
460 linux_prison_destructor(void *data)
461 {
462 
463 	free(data, M_PRISON);
464 }
465 
466 void
467 linux_osd_jail_register(void)
468 {
469 	struct prison *pr;
470 	osd_method_t methods[PR_MAXMETHOD] = {
471 	    [PR_METHOD_CREATE] =	linux_prison_create,
472 	    [PR_METHOD_GET] =		linux_prison_get,
473 	    [PR_METHOD_SET] =		linux_prison_set,
474 	    [PR_METHOD_CHECK] =		linux_prison_check
475 	};
476 
477 	linux_osd_jail_slot =
478 	    osd_jail_register(linux_prison_destructor, methods);
479 	/* Copy the system Linux info to any current prisons. */
480 	sx_slock(&allprison_lock);
481 	TAILQ_FOREACH(pr, &allprison, pr_list)
482 		linux_alloc_prison(pr, NULL);
483 	sx_sunlock(&allprison_lock);
484 }
485 
486 void
487 linux_osd_jail_deregister(void)
488 {
489 
490 	osd_jail_deregister(linux_osd_jail_slot);
491 }
492 
493 void
494 linux_get_osname(struct thread *td, char *dst)
495 {
496 	struct prison *pr;
497 	struct linux_prison *lpr;
498 
499 	lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
500 	bcopy(lpr->pr_osname, dst, LINUX_MAX_UTSNAME);
501 	mtx_unlock(&pr->pr_mtx);
502 }
503 
504 static int
505 linux_set_osname(struct thread *td, char *osname)
506 {
507 	struct prison *pr;
508 	struct linux_prison *lpr;
509 
510 	lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
511 	strlcpy(lpr->pr_osname, osname, LINUX_MAX_UTSNAME);
512 	mtx_unlock(&pr->pr_mtx);
513 
514 	return (0);
515 }
516 
517 void
518 linux_get_osrelease(struct thread *td, char *dst)
519 {
520 	struct prison *pr;
521 	struct linux_prison *lpr;
522 
523 	lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
524 	bcopy(lpr->pr_osrelease, dst, LINUX_MAX_UTSNAME);
525 	mtx_unlock(&pr->pr_mtx);
526 }
527 
528 int
529 linux_kernver(struct thread *td)
530 {
531 	struct prison *pr;
532 	struct linux_prison *lpr;
533 	int osrel;
534 
535 	lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
536 	osrel = lpr->pr_osrel;
537 	mtx_unlock(&pr->pr_mtx);
538 
539 	return (osrel);
540 }
541 
542 static int
543 linux_set_osrelease(struct thread *td, char *osrelease)
544 {
545 	struct prison *pr;
546 	struct linux_prison *lpr;
547 	int error;
548 
549 	lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
550 	error = linux_map_osrel(osrelease, &lpr->pr_osrel);
551 	if (error == 0)
552 		strlcpy(lpr->pr_osrelease, osrelease, LINUX_MAX_UTSNAME);
553 	mtx_unlock(&pr->pr_mtx);
554 
555 	return (error);
556 }
557 
558 int
559 linux_get_oss_version(struct thread *td)
560 {
561 	struct prison *pr;
562 	struct linux_prison *lpr;
563 	int version;
564 
565 	lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
566 	version = lpr->pr_oss_version;
567 	mtx_unlock(&pr->pr_mtx);
568 
569 	return (version);
570 }
571 
572 static int
573 linux_set_oss_version(struct thread *td, int oss_version)
574 {
575 	struct prison *pr;
576 	struct linux_prison *lpr;
577 
578 	lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
579 	lpr->pr_oss_version = oss_version;
580 	mtx_unlock(&pr->pr_mtx);
581 
582 	return (0);
583 }
584