xref: /freebsd/sys/compat/linux/linux_mib.c (revision 1d386b48a555f61cb7325543adbbb5c3f3407a66)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 1999 Marcel Moolenaar
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 #include <sys/param.h>
31 #include <sys/lock.h>
32 #include <sys/malloc.h>
33 #include <sys/mount.h>
34 #include <sys/jail.h>
35 #include <sys/proc.h>
36 #include <sys/sx.h>
37 
38 #include <compat/linux/linux_mib.h>
39 #include <compat/linux/linux_misc.h>
40 
41 struct linux_prison {
42 	char	pr_osname[LINUX_MAX_UTSNAME];
43 	char	pr_osrelease[LINUX_MAX_UTSNAME];
44 	int	pr_oss_version;
45 	int	pr_osrel;
46 };
47 
48 static struct linux_prison lprison0 = {
49 	.pr_osname =		"Linux",
50 	.pr_osrelease =		LINUX_VERSION_STR,
51 	.pr_oss_version =	0x030600,
52 	.pr_osrel =		LINUX_VERSION_CODE
53 };
54 
55 static unsigned linux_osd_jail_slot;
56 
57 SYSCTL_NODE(_compat, OID_AUTO, linux, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
58     "Linux mode");
59 
60 int linux_debug = 3;
61 SYSCTL_INT(_compat_linux, OID_AUTO, debug, CTLFLAG_RWTUN,
62     &linux_debug, 0, "Log warnings from linux(4); or 0 to disable");
63 
64 int linux_default_openfiles = 1024;
65 SYSCTL_INT(_compat_linux, OID_AUTO, default_openfiles, CTLFLAG_RWTUN,
66     &linux_default_openfiles, 0,
67     "Default soft openfiles resource limit, or -1 for unlimited");
68 
69 int linux_default_stacksize = 8 * 1024 * 1024;
70 SYSCTL_INT(_compat_linux, OID_AUTO, default_stacksize, CTLFLAG_RWTUN,
71     &linux_default_stacksize, 0,
72     "Default soft stack size resource limit, or -1 for unlimited");
73 
74 int linux_dummy_rlimits = 0;
75 SYSCTL_INT(_compat_linux, OID_AUTO, dummy_rlimits, CTLFLAG_RWTUN,
76     &linux_dummy_rlimits, 0,
77     "Return dummy values for unsupported Linux-specific rlimits");
78 
79 int linux_ignore_ip_recverr = 1;
80 SYSCTL_INT(_compat_linux, OID_AUTO, ignore_ip_recverr, CTLFLAG_RWTUN,
81     &linux_ignore_ip_recverr, 0, "Ignore enabling IP_RECVERR");
82 
83 int linux_preserve_vstatus = 1;
84 SYSCTL_INT(_compat_linux, OID_AUTO, preserve_vstatus, CTLFLAG_RWTUN,
85     &linux_preserve_vstatus, 0, "Preserve VSTATUS termios(4) flag");
86 
87 bool linux_map_sched_prio = true;
88 SYSCTL_BOOL(_compat_linux, OID_AUTO, map_sched_prio, CTLFLAG_RDTUN,
89     &linux_map_sched_prio, 0, "Map scheduler priorities to Linux priorities "
90     "(not POSIX compliant)");
91 
92 static bool linux_setid_allowed = true;
93 SYSCTL_BOOL(_compat_linux, OID_AUTO, setid_allowed, CTLFLAG_RWTUN,
94     &linux_setid_allowed, 0,
95     "Allow setuid/setgid on execve of Linux binary");
96 
97 int
98 linux_setid_allowed_query(struct thread *td __unused,
99     struct image_params *imgp __unused)
100 {
101 	return (linux_setid_allowed);
102 }
103 
104 static int	linux_set_osname(struct thread *td, char *osname);
105 static int	linux_set_osrelease(struct thread *td, char *osrelease);
106 static int	linux_set_oss_version(struct thread *td, int oss_version);
107 
108 static int
109 linux_sysctl_osname(SYSCTL_HANDLER_ARGS)
110 {
111 	char osname[LINUX_MAX_UTSNAME];
112 	int error;
113 
114 	linux_get_osname(req->td, osname);
115 	error = sysctl_handle_string(oidp, osname, LINUX_MAX_UTSNAME, req);
116 	if (error != 0 || req->newptr == NULL)
117 		return (error);
118 	error = linux_set_osname(req->td, osname);
119 
120 	return (error);
121 }
122 
123 SYSCTL_PROC(_compat_linux, OID_AUTO, osname,
124 	    CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE,
125 	    0, 0, linux_sysctl_osname, "A",
126 	    "Linux kernel OS name");
127 
128 static int
129 linux_sysctl_osrelease(SYSCTL_HANDLER_ARGS)
130 {
131 	char osrelease[LINUX_MAX_UTSNAME];
132 	int error;
133 
134 	linux_get_osrelease(req->td, osrelease);
135 	error = sysctl_handle_string(oidp, osrelease, LINUX_MAX_UTSNAME, req);
136 	if (error != 0 || req->newptr == NULL)
137 		return (error);
138 	error = linux_set_osrelease(req->td, osrelease);
139 
140 	return (error);
141 }
142 
143 SYSCTL_PROC(_compat_linux, OID_AUTO, osrelease,
144 	    CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE,
145 	    0, 0, linux_sysctl_osrelease, "A",
146 	    "Linux kernel OS release");
147 
148 static int
149 linux_sysctl_oss_version(SYSCTL_HANDLER_ARGS)
150 {
151 	int oss_version;
152 	int error;
153 
154 	oss_version = linux_get_oss_version(req->td);
155 	error = sysctl_handle_int(oidp, &oss_version, 0, req);
156 	if (error != 0 || req->newptr == NULL)
157 		return (error);
158 	error = linux_set_oss_version(req->td, oss_version);
159 
160 	return (error);
161 }
162 
163 SYSCTL_PROC(_compat_linux, OID_AUTO, oss_version,
164 	    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE,
165 	    0, 0, linux_sysctl_oss_version, "I",
166 	    "Linux OSS version");
167 
168 /*
169  * Map the osrelease into integer
170  */
171 static int
172 linux_map_osrel(char *osrelease, int *osrel)
173 {
174 	char *sep, *eosrelease;
175 	int len, v0, v1, v2, v;
176 
177 	len = strlen(osrelease);
178 	eosrelease = osrelease + len;
179 	v0 = strtol(osrelease, &sep, 10);
180 	if (osrelease == sep || sep + 1 >= eosrelease || *sep != '.')
181 		return (EINVAL);
182 	osrelease = sep + 1;
183 	v1 = strtol(osrelease, &sep, 10);
184 	if (osrelease == sep || sep + 1 >= eosrelease || *sep != '.')
185 		return (EINVAL);
186 	osrelease = sep + 1;
187 	v2 = strtol(osrelease, &sep, 10);
188 	if (osrelease == sep ||
189 	    (sep != eosrelease && (sep + 1 >= eosrelease || *sep != '-')))
190 		return (EINVAL);
191 
192 	v = LINUX_KERNVER(v0, v1, v2);
193 	if (v < LINUX_KERNVER(1, 0, 0))
194 		return (EINVAL);
195 
196 	if (osrel != NULL)
197 		*osrel = v;
198 
199 	return (0);
200 }
201 
202 /*
203  * Find a prison with Linux info.
204  * Return the Linux info and the (locked) prison.
205  */
206 static struct linux_prison *
207 linux_find_prison(struct prison *spr, struct prison **prp)
208 {
209 	struct prison *pr;
210 	struct linux_prison *lpr;
211 
212 	for (pr = spr;; pr = pr->pr_parent) {
213 		mtx_lock(&pr->pr_mtx);
214 		lpr = (pr == &prison0)
215 		    ? &lprison0
216 		    : osd_jail_get(pr, linux_osd_jail_slot);
217 		if (lpr != NULL)
218 			break;
219 		mtx_unlock(&pr->pr_mtx);
220 	}
221 	*prp = pr;
222 
223 	return (lpr);
224 }
225 
226 /*
227  * Ensure a prison has its own Linux info.  If lprp is non-null, point it to
228  * the Linux info and lock the prison.
229  */
230 static void
231 linux_alloc_prison(struct prison *pr, struct linux_prison **lprp)
232 {
233 	struct prison *ppr;
234 	struct linux_prison *lpr, *nlpr;
235 	void **rsv;
236 
237 	/* If this prison already has Linux info, return that. */
238 	lpr = linux_find_prison(pr, &ppr);
239 	if (ppr == pr)
240 		goto done;
241 	/*
242 	 * Allocate a new info record.  Then check again, in case something
243 	 * changed during the allocation.
244 	 */
245 	mtx_unlock(&ppr->pr_mtx);
246 	nlpr = malloc(sizeof(struct linux_prison), M_PRISON, M_WAITOK);
247 	rsv = osd_reserve(linux_osd_jail_slot);
248 	lpr = linux_find_prison(pr, &ppr);
249 	if (ppr == pr) {
250 		free(nlpr, M_PRISON);
251 		osd_free_reserved(rsv);
252 		goto done;
253 	}
254 	/* Inherit the initial values from the ancestor. */
255 	mtx_lock(&pr->pr_mtx);
256 	(void)osd_jail_set_reserved(pr, linux_osd_jail_slot, rsv, nlpr);
257 	bcopy(lpr, nlpr, sizeof(*lpr));
258 	lpr = nlpr;
259 	mtx_unlock(&ppr->pr_mtx);
260  done:
261 	if (lprp != NULL)
262 		*lprp = lpr;
263 	else
264 		mtx_unlock(&pr->pr_mtx);
265 }
266 
267 /*
268  * Jail OSD methods for Linux prison data.
269  */
270 static int
271 linux_prison_create(void *obj, void *data)
272 {
273 	struct prison *pr = obj;
274 	struct vfsoptlist *opts = data;
275 	int jsys;
276 
277 	if (vfs_copyopt(opts, "linux", &jsys, sizeof(jsys)) == 0 &&
278 	    jsys == JAIL_SYS_INHERIT)
279 		return (0);
280 	/*
281 	 * Inherit a prison's initial values from its parent
282 	 * (different from JAIL_SYS_INHERIT which also inherits changes).
283 	 */
284 	linux_alloc_prison(pr, NULL);
285 	return (0);
286 }
287 
288 static int
289 linux_prison_check(void *obj __unused, void *data)
290 {
291 	struct vfsoptlist *opts = data;
292 	char *osname, *osrelease;
293 	int error, jsys, len, oss_version;
294 
295 	/* Check that the parameters are correct. */
296 	error = vfs_copyopt(opts, "linux", &jsys, sizeof(jsys));
297 	if (error != ENOENT) {
298 		if (error != 0)
299 			return (error);
300 		if (jsys != JAIL_SYS_NEW && jsys != JAIL_SYS_INHERIT)
301 			return (EINVAL);
302 	}
303 	error = vfs_getopt(opts, "linux.osname", (void **)&osname, &len);
304 	if (error != ENOENT) {
305 		if (error != 0)
306 			return (error);
307 		if (len == 0 || osname[len - 1] != '\0')
308 			return (EINVAL);
309 		if (len > LINUX_MAX_UTSNAME) {
310 			vfs_opterror(opts, "linux.osname too long");
311 			return (ENAMETOOLONG);
312 		}
313 	}
314 	error = vfs_getopt(opts, "linux.osrelease", (void **)&osrelease, &len);
315 	if (error != ENOENT) {
316 		if (error != 0)
317 			return (error);
318 		if (len == 0 || osrelease[len - 1] != '\0')
319 			return (EINVAL);
320 		if (len > LINUX_MAX_UTSNAME) {
321 			vfs_opterror(opts, "linux.osrelease too long");
322 			return (ENAMETOOLONG);
323 		}
324 		error = linux_map_osrel(osrelease, NULL);
325 		if (error != 0) {
326 			vfs_opterror(opts, "linux.osrelease format error");
327 			return (error);
328 		}
329 	}
330 	error = vfs_copyopt(opts, "linux.oss_version", &oss_version,
331 	    sizeof(oss_version));
332 
333 	if (error == ENOENT)
334 		error = 0;
335 	return (error);
336 }
337 
338 static int
339 linux_prison_set(void *obj, void *data)
340 {
341 	struct linux_prison *lpr;
342 	struct prison *pr = obj;
343 	struct vfsoptlist *opts = data;
344 	char *osname, *osrelease;
345 	int error, gotversion, jsys, len, oss_version;
346 
347 	/* Set the parameters, which should be correct. */
348 	error = vfs_copyopt(opts, "linux", &jsys, sizeof(jsys));
349 	if (error == ENOENT)
350 		jsys = -1;
351 	error = vfs_getopt(opts, "linux.osname", (void **)&osname, &len);
352 	if (error == ENOENT)
353 		osname = NULL;
354 	else
355 		jsys = JAIL_SYS_NEW;
356 	error = vfs_getopt(opts, "linux.osrelease", (void **)&osrelease, &len);
357 	if (error == ENOENT)
358 		osrelease = NULL;
359 	else
360 		jsys = JAIL_SYS_NEW;
361 	error = vfs_copyopt(opts, "linux.oss_version", &oss_version,
362 	    sizeof(oss_version));
363 	if (error == ENOENT)
364 		gotversion = 0;
365 	else {
366 		gotversion = 1;
367 		jsys = JAIL_SYS_NEW;
368 	}
369 	switch (jsys) {
370 	case JAIL_SYS_INHERIT:
371 		/* "linux=inherit": inherit the parent's Linux info. */
372 		mtx_lock(&pr->pr_mtx);
373 		osd_jail_del(pr, linux_osd_jail_slot);
374 		mtx_unlock(&pr->pr_mtx);
375 		break;
376 	case JAIL_SYS_NEW:
377 		/*
378 		 * "linux=new" or "linux.*":
379 		 * the prison gets its own Linux info.
380 		 */
381 		linux_alloc_prison(pr, &lpr);
382 		if (osrelease) {
383 			(void)linux_map_osrel(osrelease, &lpr->pr_osrel);
384 			strlcpy(lpr->pr_osrelease, osrelease,
385 			    LINUX_MAX_UTSNAME);
386 		}
387 		if (osname)
388 			strlcpy(lpr->pr_osname, osname, LINUX_MAX_UTSNAME);
389 		if (gotversion)
390 			lpr->pr_oss_version = oss_version;
391 		mtx_unlock(&pr->pr_mtx);
392 	}
393 
394 	return (0);
395 }
396 
397 SYSCTL_JAIL_PARAM_SYS_NODE(linux, CTLFLAG_RW, "Jail Linux parameters");
398 SYSCTL_JAIL_PARAM_STRING(_linux, osname, CTLFLAG_RW, LINUX_MAX_UTSNAME,
399     "Jail Linux kernel OS name");
400 SYSCTL_JAIL_PARAM_STRING(_linux, osrelease, CTLFLAG_RW, LINUX_MAX_UTSNAME,
401     "Jail Linux kernel OS release");
402 SYSCTL_JAIL_PARAM(_linux, oss_version, CTLTYPE_INT | CTLFLAG_RW,
403     "I", "Jail Linux OSS version");
404 
405 static int
406 linux_prison_get(void *obj, void *data)
407 {
408 	struct linux_prison *lpr;
409 	struct prison *ppr;
410 	struct prison *pr = obj;
411 	struct vfsoptlist *opts = data;
412 	int error, i;
413 
414 	static int version0;
415 
416 	/* See if this prison is the one with the Linux info. */
417 	lpr = linux_find_prison(pr, &ppr);
418 	i = (ppr == pr) ? JAIL_SYS_NEW : JAIL_SYS_INHERIT;
419 	error = vfs_setopt(opts, "linux", &i, sizeof(i));
420 	if (error != 0 && error != ENOENT)
421 		goto done;
422 	if (i) {
423 		error = vfs_setopts(opts, "linux.osname", lpr->pr_osname);
424 		if (error != 0 && error != ENOENT)
425 			goto done;
426 		error = vfs_setopts(opts, "linux.osrelease", lpr->pr_osrelease);
427 		if (error != 0 && error != ENOENT)
428 			goto done;
429 		error = vfs_setopt(opts, "linux.oss_version",
430 		    &lpr->pr_oss_version, sizeof(lpr->pr_oss_version));
431 		if (error != 0 && error != ENOENT)
432 			goto done;
433 	} else {
434 		/*
435 		 * If this prison is inheriting its Linux info, report
436 		 * empty/zero parameters.
437 		 */
438 		error = vfs_setopts(opts, "linux.osname", "");
439 		if (error != 0 && error != ENOENT)
440 			goto done;
441 		error = vfs_setopts(opts, "linux.osrelease", "");
442 		if (error != 0 && error != ENOENT)
443 			goto done;
444 		error = vfs_setopt(opts, "linux.oss_version", &version0,
445 		    sizeof(lpr->pr_oss_version));
446 		if (error != 0 && error != ENOENT)
447 			goto done;
448 	}
449 	error = 0;
450 
451  done:
452 	mtx_unlock(&ppr->pr_mtx);
453 
454 	return (error);
455 }
456 
457 static void
458 linux_prison_destructor(void *data)
459 {
460 
461 	free(data, M_PRISON);
462 }
463 
464 void
465 linux_osd_jail_register(void)
466 {
467 	struct prison *pr;
468 	osd_method_t methods[PR_MAXMETHOD] = {
469 	    [PR_METHOD_CREATE] =	linux_prison_create,
470 	    [PR_METHOD_GET] =		linux_prison_get,
471 	    [PR_METHOD_SET] =		linux_prison_set,
472 	    [PR_METHOD_CHECK] =		linux_prison_check
473 	};
474 
475 	linux_osd_jail_slot =
476 	    osd_jail_register(linux_prison_destructor, methods);
477 	/* Copy the system Linux info to any current prisons. */
478 	sx_slock(&allprison_lock);
479 	TAILQ_FOREACH(pr, &allprison, pr_list)
480 		linux_alloc_prison(pr, NULL);
481 	sx_sunlock(&allprison_lock);
482 }
483 
484 void
485 linux_osd_jail_deregister(void)
486 {
487 
488 	osd_jail_deregister(linux_osd_jail_slot);
489 }
490 
491 void
492 linux_get_osname(struct thread *td, char *dst)
493 {
494 	struct prison *pr;
495 	struct linux_prison *lpr;
496 
497 	lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
498 	bcopy(lpr->pr_osname, dst, LINUX_MAX_UTSNAME);
499 	mtx_unlock(&pr->pr_mtx);
500 }
501 
502 static int
503 linux_set_osname(struct thread *td, char *osname)
504 {
505 	struct prison *pr;
506 	struct linux_prison *lpr;
507 
508 	lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
509 	strlcpy(lpr->pr_osname, osname, LINUX_MAX_UTSNAME);
510 	mtx_unlock(&pr->pr_mtx);
511 
512 	return (0);
513 }
514 
515 void
516 linux_get_osrelease(struct thread *td, char *dst)
517 {
518 	struct prison *pr;
519 	struct linux_prison *lpr;
520 
521 	lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
522 	bcopy(lpr->pr_osrelease, dst, LINUX_MAX_UTSNAME);
523 	mtx_unlock(&pr->pr_mtx);
524 }
525 
526 int
527 linux_kernver(struct thread *td)
528 {
529 	struct prison *pr;
530 	struct linux_prison *lpr;
531 	int osrel;
532 
533 	lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
534 	osrel = lpr->pr_osrel;
535 	mtx_unlock(&pr->pr_mtx);
536 
537 	return (osrel);
538 }
539 
540 static int
541 linux_set_osrelease(struct thread *td, char *osrelease)
542 {
543 	struct prison *pr;
544 	struct linux_prison *lpr;
545 	int error;
546 
547 	lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
548 	error = linux_map_osrel(osrelease, &lpr->pr_osrel);
549 	if (error == 0)
550 		strlcpy(lpr->pr_osrelease, osrelease, LINUX_MAX_UTSNAME);
551 	mtx_unlock(&pr->pr_mtx);
552 
553 	return (error);
554 }
555 
556 int
557 linux_get_oss_version(struct thread *td)
558 {
559 	struct prison *pr;
560 	struct linux_prison *lpr;
561 	int version;
562 
563 	lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
564 	version = lpr->pr_oss_version;
565 	mtx_unlock(&pr->pr_mtx);
566 
567 	return (version);
568 }
569 
570 static int
571 linux_set_oss_version(struct thread *td, int oss_version)
572 {
573 	struct prison *pr;
574 	struct linux_prison *lpr;
575 
576 	lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
577 	lpr->pr_oss_version = oss_version;
578 	mtx_unlock(&pr->pr_mtx);
579 
580 	return (0);
581 }
582