xref: /freebsd/sys/compat/linux/linux_mib.c (revision 36d6566e5985030fd2f1100bd9c1387bbe0bd290)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 1999 Marcel Moolenaar
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include <sys/param.h>
33 #include <sys/kernel.h>
34 #include <sys/sdt.h>
35 #include <sys/systm.h>
36 #include <sys/sysctl.h>
37 #include <sys/proc.h>
38 #include <sys/malloc.h>
39 #include <sys/mount.h>
40 #include <sys/jail.h>
41 #include <sys/lock.h>
42 #include <sys/sx.h>
43 
44 #include <compat/linux/linux_mib.h>
45 #include <compat/linux/linux_misc.h>
46 
47 struct linux_prison {
48 	char	pr_osname[LINUX_MAX_UTSNAME];
49 	char	pr_osrelease[LINUX_MAX_UTSNAME];
50 	int	pr_oss_version;
51 	int	pr_osrel;
52 };
53 
54 static struct linux_prison lprison0 = {
55 	.pr_osname =		"Linux",
56 	.pr_osrelease =		LINUX_VERSION_STR,
57 	.pr_oss_version =	0x030600,
58 	.pr_osrel =		LINUX_VERSION_CODE
59 };
60 
61 static unsigned linux_osd_jail_slot;
62 
63 SYSCTL_NODE(_compat, OID_AUTO, linux, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
64     "Linux mode");
65 
66 int linux_debug = 3;
67 SYSCTL_INT(_compat_linux, OID_AUTO, debug, CTLFLAG_RWTUN,
68     &linux_debug, 0, "Log warnings from linux(4); or 0 to disable");
69 
70 int linux_default_openfiles = 1024;
71 SYSCTL_INT(_compat_linux, OID_AUTO, default_openfiles, CTLFLAG_RWTUN,
72     &linux_default_openfiles, 0,
73     "Default soft openfiles resource limit, or -1 for unlimited");
74 
75 int linux_default_stacksize = 8 * 1024 * 1024;
76 SYSCTL_INT(_compat_linux, OID_AUTO, default_stacksize, CTLFLAG_RWTUN,
77     &linux_default_stacksize, 0,
78     "Default soft stack size resource limit, or -1 for unlimited");
79 
80 int linux_dummy_rlimits = 0;
81 SYSCTL_INT(_compat_linux, OID_AUTO, dummy_rlimits, CTLFLAG_RWTUN,
82     &linux_dummy_rlimits, 0,
83     "Return dummy values for unsupported Linux-specific rlimits");
84 
85 int linux_ignore_ip_recverr = 1;
86 SYSCTL_INT(_compat_linux, OID_AUTO, ignore_ip_recverr, CTLFLAG_RWTUN,
87     &linux_ignore_ip_recverr, 0, "Ignore enabling IP_RECVERR");
88 
89 int linux_preserve_vstatus = 0;
90 SYSCTL_INT(_compat_linux, OID_AUTO, preserve_vstatus, CTLFLAG_RWTUN,
91     &linux_preserve_vstatus, 0, "Preserve VSTATUS termios(4) flag");
92 
93 bool linux_map_sched_prio = true;
94 SYSCTL_BOOL(_compat_linux, OID_AUTO, map_sched_prio, CTLFLAG_RDTUN,
95     &linux_map_sched_prio, 0, "Map scheduler priorities to Linux priorities "
96     "(not POSIX compliant)");
97 
98 int linux_use_emul_path = 1;
99 SYSCTL_INT(_compat_linux, OID_AUTO, use_emul_path, CTLFLAG_RWTUN,
100     &linux_use_emul_path, 0, "Use linux.compat.emul_path");
101 
102 static int	linux_set_osname(struct thread *td, char *osname);
103 static int	linux_set_osrelease(struct thread *td, char *osrelease);
104 static int	linux_set_oss_version(struct thread *td, int oss_version);
105 
106 static int
107 linux_sysctl_osname(SYSCTL_HANDLER_ARGS)
108 {
109 	char osname[LINUX_MAX_UTSNAME];
110 	int error;
111 
112 	linux_get_osname(req->td, osname);
113 	error = sysctl_handle_string(oidp, osname, LINUX_MAX_UTSNAME, req);
114 	if (error != 0 || req->newptr == NULL)
115 		return (error);
116 	error = linux_set_osname(req->td, osname);
117 
118 	return (error);
119 }
120 
121 SYSCTL_PROC(_compat_linux, OID_AUTO, osname,
122 	    CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE,
123 	    0, 0, linux_sysctl_osname, "A",
124 	    "Linux kernel OS name");
125 
126 static int
127 linux_sysctl_osrelease(SYSCTL_HANDLER_ARGS)
128 {
129 	char osrelease[LINUX_MAX_UTSNAME];
130 	int error;
131 
132 	linux_get_osrelease(req->td, osrelease);
133 	error = sysctl_handle_string(oidp, osrelease, LINUX_MAX_UTSNAME, req);
134 	if (error != 0 || req->newptr == NULL)
135 		return (error);
136 	error = linux_set_osrelease(req->td, osrelease);
137 
138 	return (error);
139 }
140 
141 SYSCTL_PROC(_compat_linux, OID_AUTO, osrelease,
142 	    CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE,
143 	    0, 0, linux_sysctl_osrelease, "A",
144 	    "Linux kernel OS release");
145 
146 static int
147 linux_sysctl_oss_version(SYSCTL_HANDLER_ARGS)
148 {
149 	int oss_version;
150 	int error;
151 
152 	oss_version = linux_get_oss_version(req->td);
153 	error = sysctl_handle_int(oidp, &oss_version, 0, req);
154 	if (error != 0 || req->newptr == NULL)
155 		return (error);
156 	error = linux_set_oss_version(req->td, oss_version);
157 
158 	return (error);
159 }
160 
161 SYSCTL_PROC(_compat_linux, OID_AUTO, oss_version,
162 	    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE,
163 	    0, 0, linux_sysctl_oss_version, "I",
164 	    "Linux OSS version");
165 
166 /*
167  * Map the osrelease into integer
168  */
169 static int
170 linux_map_osrel(char *osrelease, int *osrel)
171 {
172 	char *sep, *eosrelease;
173 	int len, v0, v1, v2, v;
174 
175 	len = strlen(osrelease);
176 	eosrelease = osrelease + len;
177 	v0 = strtol(osrelease, &sep, 10);
178 	if (osrelease == sep || sep + 1 >= eosrelease || *sep != '.')
179 		return (EINVAL);
180 	osrelease = sep + 1;
181 	v1 = strtol(osrelease, &sep, 10);
182 	if (osrelease == sep || sep + 1 >= eosrelease || *sep != '.')
183 		return (EINVAL);
184 	osrelease = sep + 1;
185 	v2 = strtol(osrelease, &sep, 10);
186 	if (osrelease == sep ||
187 	    (sep != eosrelease && (sep + 1 >= eosrelease || *sep != '-')))
188 		return (EINVAL);
189 
190 	v = LINUX_KERNVER(v0, v1, v2);
191 	if (v < LINUX_KERNVER(1, 0, 0))
192 		return (EINVAL);
193 
194 	if (osrel != NULL)
195 		*osrel = v;
196 
197 	return (0);
198 }
199 
200 /*
201  * Find a prison with Linux info.
202  * Return the Linux info and the (locked) prison.
203  */
204 static struct linux_prison *
205 linux_find_prison(struct prison *spr, struct prison **prp)
206 {
207 	struct prison *pr;
208 	struct linux_prison *lpr;
209 
210 	for (pr = spr;; pr = pr->pr_parent) {
211 		mtx_lock(&pr->pr_mtx);
212 		lpr = (pr == &prison0)
213 		    ? &lprison0
214 		    : osd_jail_get(pr, linux_osd_jail_slot);
215 		if (lpr != NULL)
216 			break;
217 		mtx_unlock(&pr->pr_mtx);
218 	}
219 	*prp = pr;
220 
221 	return (lpr);
222 }
223 
224 /*
225  * Ensure a prison has its own Linux info.  If lprp is non-null, point it to
226  * the Linux info and lock the prison.
227  */
228 static void
229 linux_alloc_prison(struct prison *pr, struct linux_prison **lprp)
230 {
231 	struct prison *ppr;
232 	struct linux_prison *lpr, *nlpr;
233 	void **rsv;
234 
235 	/* If this prison already has Linux info, return that. */
236 	lpr = linux_find_prison(pr, &ppr);
237 	if (ppr == pr)
238 		goto done;
239 	/*
240 	 * Allocate a new info record.  Then check again, in case something
241 	 * changed during the allocation.
242 	 */
243 	mtx_unlock(&ppr->pr_mtx);
244 	nlpr = malloc(sizeof(struct linux_prison), M_PRISON, M_WAITOK);
245 	rsv = osd_reserve(linux_osd_jail_slot);
246 	lpr = linux_find_prison(pr, &ppr);
247 	if (ppr == pr) {
248 		free(nlpr, M_PRISON);
249 		osd_free_reserved(rsv);
250 		goto done;
251 	}
252 	/* Inherit the initial values from the ancestor. */
253 	mtx_lock(&pr->pr_mtx);
254 	(void)osd_jail_set_reserved(pr, linux_osd_jail_slot, rsv, nlpr);
255 	bcopy(lpr, nlpr, sizeof(*lpr));
256 	lpr = nlpr;
257 	mtx_unlock(&ppr->pr_mtx);
258  done:
259 	if (lprp != NULL)
260 		*lprp = lpr;
261 	else
262 		mtx_unlock(&pr->pr_mtx);
263 }
264 
265 /*
266  * Jail OSD methods for Linux prison data.
267  */
268 static int
269 linux_prison_create(void *obj, void *data)
270 {
271 	struct prison *pr = obj;
272 	struct vfsoptlist *opts = data;
273 	int jsys;
274 
275 	if (vfs_copyopt(opts, "linux", &jsys, sizeof(jsys)) == 0 &&
276 	    jsys == JAIL_SYS_INHERIT)
277 		return (0);
278 	/*
279 	 * Inherit a prison's initial values from its parent
280 	 * (different from JAIL_SYS_INHERIT which also inherits changes).
281 	 */
282 	linux_alloc_prison(pr, NULL);
283 	return (0);
284 }
285 
286 static int
287 linux_prison_check(void *obj __unused, void *data)
288 {
289 	struct vfsoptlist *opts = data;
290 	char *osname, *osrelease;
291 	int error, jsys, len, oss_version;
292 
293 	/* Check that the parameters are correct. */
294 	error = vfs_copyopt(opts, "linux", &jsys, sizeof(jsys));
295 	if (error != ENOENT) {
296 		if (error != 0)
297 			return (error);
298 		if (jsys != JAIL_SYS_NEW && jsys != JAIL_SYS_INHERIT)
299 			return (EINVAL);
300 	}
301 	error = vfs_getopt(opts, "linux.osname", (void **)&osname, &len);
302 	if (error != ENOENT) {
303 		if (error != 0)
304 			return (error);
305 		if (len == 0 || osname[len - 1] != '\0')
306 			return (EINVAL);
307 		if (len > LINUX_MAX_UTSNAME) {
308 			vfs_opterror(opts, "linux.osname too long");
309 			return (ENAMETOOLONG);
310 		}
311 	}
312 	error = vfs_getopt(opts, "linux.osrelease", (void **)&osrelease, &len);
313 	if (error != ENOENT) {
314 		if (error != 0)
315 			return (error);
316 		if (len == 0 || osrelease[len - 1] != '\0')
317 			return (EINVAL);
318 		if (len > LINUX_MAX_UTSNAME) {
319 			vfs_opterror(opts, "linux.osrelease too long");
320 			return (ENAMETOOLONG);
321 		}
322 		error = linux_map_osrel(osrelease, NULL);
323 		if (error != 0) {
324 			vfs_opterror(opts, "linux.osrelease format error");
325 			return (error);
326 		}
327 	}
328 	error = vfs_copyopt(opts, "linux.oss_version", &oss_version,
329 	    sizeof(oss_version));
330 
331 	if (error == ENOENT)
332 		error = 0;
333 	return (error);
334 }
335 
336 static int
337 linux_prison_set(void *obj, void *data)
338 {
339 	struct linux_prison *lpr;
340 	struct prison *pr = obj;
341 	struct vfsoptlist *opts = data;
342 	char *osname, *osrelease;
343 	int error, gotversion, jsys, len, oss_version;
344 
345 	/* Set the parameters, which should be correct. */
346 	error = vfs_copyopt(opts, "linux", &jsys, sizeof(jsys));
347 	if (error == ENOENT)
348 		jsys = -1;
349 	error = vfs_getopt(opts, "linux.osname", (void **)&osname, &len);
350 	if (error == ENOENT)
351 		osname = NULL;
352 	else
353 		jsys = JAIL_SYS_NEW;
354 	error = vfs_getopt(opts, "linux.osrelease", (void **)&osrelease, &len);
355 	if (error == ENOENT)
356 		osrelease = NULL;
357 	else
358 		jsys = JAIL_SYS_NEW;
359 	error = vfs_copyopt(opts, "linux.oss_version", &oss_version,
360 	    sizeof(oss_version));
361 	if (error == ENOENT)
362 		gotversion = 0;
363 	else {
364 		gotversion = 1;
365 		jsys = JAIL_SYS_NEW;
366 	}
367 	switch (jsys) {
368 	case JAIL_SYS_INHERIT:
369 		/* "linux=inherit": inherit the parent's Linux info. */
370 		mtx_lock(&pr->pr_mtx);
371 		osd_jail_del(pr, linux_osd_jail_slot);
372 		mtx_unlock(&pr->pr_mtx);
373 		break;
374 	case JAIL_SYS_NEW:
375 		/*
376 		 * "linux=new" or "linux.*":
377 		 * the prison gets its own Linux info.
378 		 */
379 		linux_alloc_prison(pr, &lpr);
380 		if (osrelease) {
381 			(void)linux_map_osrel(osrelease, &lpr->pr_osrel);
382 			strlcpy(lpr->pr_osrelease, osrelease,
383 			    LINUX_MAX_UTSNAME);
384 		}
385 		if (osname)
386 			strlcpy(lpr->pr_osname, osname, LINUX_MAX_UTSNAME);
387 		if (gotversion)
388 			lpr->pr_oss_version = oss_version;
389 		mtx_unlock(&pr->pr_mtx);
390 	}
391 
392 	return (0);
393 }
394 
395 SYSCTL_JAIL_PARAM_SYS_NODE(linux, CTLFLAG_RW, "Jail Linux parameters");
396 SYSCTL_JAIL_PARAM_STRING(_linux, osname, CTLFLAG_RW, LINUX_MAX_UTSNAME,
397     "Jail Linux kernel OS name");
398 SYSCTL_JAIL_PARAM_STRING(_linux, osrelease, CTLFLAG_RW, LINUX_MAX_UTSNAME,
399     "Jail Linux kernel OS release");
400 SYSCTL_JAIL_PARAM(_linux, oss_version, CTLTYPE_INT | CTLFLAG_RW,
401     "I", "Jail Linux OSS version");
402 
403 static int
404 linux_prison_get(void *obj, void *data)
405 {
406 	struct linux_prison *lpr;
407 	struct prison *ppr;
408 	struct prison *pr = obj;
409 	struct vfsoptlist *opts = data;
410 	int error, i;
411 
412 	static int version0;
413 
414 	/* See if this prison is the one with the Linux info. */
415 	lpr = linux_find_prison(pr, &ppr);
416 	i = (ppr == pr) ? JAIL_SYS_NEW : JAIL_SYS_INHERIT;
417 	error = vfs_setopt(opts, "linux", &i, sizeof(i));
418 	if (error != 0 && error != ENOENT)
419 		goto done;
420 	if (i) {
421 		error = vfs_setopts(opts, "linux.osname", lpr->pr_osname);
422 		if (error != 0 && error != ENOENT)
423 			goto done;
424 		error = vfs_setopts(opts, "linux.osrelease", lpr->pr_osrelease);
425 		if (error != 0 && error != ENOENT)
426 			goto done;
427 		error = vfs_setopt(opts, "linux.oss_version",
428 		    &lpr->pr_oss_version, sizeof(lpr->pr_oss_version));
429 		if (error != 0 && error != ENOENT)
430 			goto done;
431 	} else {
432 		/*
433 		 * If this prison is inheriting its Linux info, report
434 		 * empty/zero parameters.
435 		 */
436 		error = vfs_setopts(opts, "linux.osname", "");
437 		if (error != 0 && error != ENOENT)
438 			goto done;
439 		error = vfs_setopts(opts, "linux.osrelease", "");
440 		if (error != 0 && error != ENOENT)
441 			goto done;
442 		error = vfs_setopt(opts, "linux.oss_version", &version0,
443 		    sizeof(lpr->pr_oss_version));
444 		if (error != 0 && error != ENOENT)
445 			goto done;
446 	}
447 	error = 0;
448 
449  done:
450 	mtx_unlock(&ppr->pr_mtx);
451 
452 	return (error);
453 }
454 
455 static void
456 linux_prison_destructor(void *data)
457 {
458 
459 	free(data, M_PRISON);
460 }
461 
462 void
463 linux_osd_jail_register(void)
464 {
465 	struct prison *pr;
466 	osd_method_t methods[PR_MAXMETHOD] = {
467 	    [PR_METHOD_CREATE] =	linux_prison_create,
468 	    [PR_METHOD_GET] =		linux_prison_get,
469 	    [PR_METHOD_SET] =		linux_prison_set,
470 	    [PR_METHOD_CHECK] =		linux_prison_check
471 	};
472 
473 	linux_osd_jail_slot =
474 	    osd_jail_register(linux_prison_destructor, methods);
475 	/* Copy the system Linux info to any current prisons. */
476 	sx_slock(&allprison_lock);
477 	TAILQ_FOREACH(pr, &allprison, pr_list)
478 		linux_alloc_prison(pr, NULL);
479 	sx_sunlock(&allprison_lock);
480 }
481 
482 void
483 linux_osd_jail_deregister(void)
484 {
485 
486 	osd_jail_deregister(linux_osd_jail_slot);
487 }
488 
489 void
490 linux_get_osname(struct thread *td, char *dst)
491 {
492 	struct prison *pr;
493 	struct linux_prison *lpr;
494 
495 	lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
496 	bcopy(lpr->pr_osname, dst, LINUX_MAX_UTSNAME);
497 	mtx_unlock(&pr->pr_mtx);
498 }
499 
500 static int
501 linux_set_osname(struct thread *td, char *osname)
502 {
503 	struct prison *pr;
504 	struct linux_prison *lpr;
505 
506 	lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
507 	strlcpy(lpr->pr_osname, osname, LINUX_MAX_UTSNAME);
508 	mtx_unlock(&pr->pr_mtx);
509 
510 	return (0);
511 }
512 
513 void
514 linux_get_osrelease(struct thread *td, char *dst)
515 {
516 	struct prison *pr;
517 	struct linux_prison *lpr;
518 
519 	lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
520 	bcopy(lpr->pr_osrelease, dst, LINUX_MAX_UTSNAME);
521 	mtx_unlock(&pr->pr_mtx);
522 }
523 
524 int
525 linux_kernver(struct thread *td)
526 {
527 	struct prison *pr;
528 	struct linux_prison *lpr;
529 	int osrel;
530 
531 	lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
532 	osrel = lpr->pr_osrel;
533 	mtx_unlock(&pr->pr_mtx);
534 
535 	return (osrel);
536 }
537 
538 static int
539 linux_set_osrelease(struct thread *td, char *osrelease)
540 {
541 	struct prison *pr;
542 	struct linux_prison *lpr;
543 	int error;
544 
545 	lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
546 	error = linux_map_osrel(osrelease, &lpr->pr_osrel);
547 	if (error == 0)
548 		strlcpy(lpr->pr_osrelease, osrelease, LINUX_MAX_UTSNAME);
549 	mtx_unlock(&pr->pr_mtx);
550 
551 	return (error);
552 }
553 
554 int
555 linux_get_oss_version(struct thread *td)
556 {
557 	struct prison *pr;
558 	struct linux_prison *lpr;
559 	int version;
560 
561 	lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
562 	version = lpr->pr_oss_version;
563 	mtx_unlock(&pr->pr_mtx);
564 
565 	return (version);
566 }
567 
568 static int
569 linux_set_oss_version(struct thread *td, int oss_version)
570 {
571 	struct prison *pr;
572 	struct linux_prison *lpr;
573 
574 	lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
575 	lpr->pr_oss_version = oss_version;
576 	mtx_unlock(&pr->pr_mtx);
577 
578 	return (0);
579 }
580