xref: /freebsd/sys/compat/linux/linux_mib.c (revision fa42a0bfa40342531df64873dcef74593702f4b3)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 1999 Marcel Moolenaar
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include <sys/param.h>
33 #include <sys/kernel.h>
34 #include <sys/sdt.h>
35 #include <sys/systm.h>
36 #include <sys/sysctl.h>
37 #include <sys/proc.h>
38 #include <sys/malloc.h>
39 #include <sys/mount.h>
40 #include <sys/jail.h>
41 #include <sys/lock.h>
42 #include <sys/sx.h>
43 
44 #include <compat/linux/linux_mib.h>
45 #include <compat/linux/linux_misc.h>
46 
47 struct linux_prison {
48 	char	pr_osname[LINUX_MAX_UTSNAME];
49 	char	pr_osrelease[LINUX_MAX_UTSNAME];
50 	int	pr_oss_version;
51 	int	pr_osrel;
52 };
53 
54 static struct linux_prison lprison0 = {
55 	.pr_osname =		"Linux",
56 	.pr_osrelease =		LINUX_VERSION_STR,
57 	.pr_oss_version =	0x030600,
58 	.pr_osrel =		LINUX_VERSION_CODE
59 };
60 
61 static unsigned linux_osd_jail_slot;
62 
63 SYSCTL_NODE(_compat, OID_AUTO, linux, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
64     "Linux mode");
65 
66 int linux_debug = 1;
67 SYSCTL_INT(_compat_linux, OID_AUTO, debug, CTLFLAG_RWTUN,
68     &linux_debug, 0, "Log warnings from linux(4); or 0 to disable");
69 
70 int linux_default_openfiles = 1024;
71 SYSCTL_INT(_compat_linux, OID_AUTO, default_openfiles, CTLFLAG_RWTUN,
72     &linux_default_openfiles, 0,
73     "Default soft openfiles resource limit, or -1 for unlimited");
74 
75 int linux_default_stacksize = 8 * 1024 * 1024;
76 SYSCTL_INT(_compat_linux, OID_AUTO, default_stacksize, CTLFLAG_RWTUN,
77     &linux_default_stacksize, 0,
78     "Default soft stack size resource limit, or -1 for unlimited");
79 
80 int linux_ignore_ip_recverr = 1;
81 SYSCTL_INT(_compat_linux, OID_AUTO, ignore_ip_recverr, CTLFLAG_RWTUN,
82     &linux_ignore_ip_recverr, 0, "Ignore enabling IP_RECVERR");
83 
84 int linux_preserve_vstatus = 0;
85 SYSCTL_INT(_compat_linux, OID_AUTO, preserve_vstatus, CTLFLAG_RWTUN,
86     &linux_preserve_vstatus, 0, "Preserve VSTATUS termios(4) flag");
87 
88 bool linux_map_sched_prio = true;
89 SYSCTL_BOOL(_compat_linux, OID_AUTO, map_sched_prio, CTLFLAG_RDTUN,
90     &linux_map_sched_prio, 0, "Map scheduler priorities to Linux priorities "
91     "(not POSIX compliant)");
92 
93 int linux_use_emul_path = 1;
94 SYSCTL_INT(_compat_linux, OID_AUTO, use_emul_path, CTLFLAG_RWTUN,
95     &linux_use_emul_path, 0, "Use linux.compat.emul_path");
96 
97 static int	linux_set_osname(struct thread *td, char *osname);
98 static int	linux_set_osrelease(struct thread *td, char *osrelease);
99 static int	linux_set_oss_version(struct thread *td, int oss_version);
100 
101 static int
102 linux_sysctl_osname(SYSCTL_HANDLER_ARGS)
103 {
104 	char osname[LINUX_MAX_UTSNAME];
105 	int error;
106 
107 	linux_get_osname(req->td, osname);
108 	error = sysctl_handle_string(oidp, osname, LINUX_MAX_UTSNAME, req);
109 	if (error != 0 || req->newptr == NULL)
110 		return (error);
111 	error = linux_set_osname(req->td, osname);
112 
113 	return (error);
114 }
115 
116 SYSCTL_PROC(_compat_linux, OID_AUTO, osname,
117 	    CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE,
118 	    0, 0, linux_sysctl_osname, "A",
119 	    "Linux kernel OS name");
120 
121 static int
122 linux_sysctl_osrelease(SYSCTL_HANDLER_ARGS)
123 {
124 	char osrelease[LINUX_MAX_UTSNAME];
125 	int error;
126 
127 	linux_get_osrelease(req->td, osrelease);
128 	error = sysctl_handle_string(oidp, osrelease, LINUX_MAX_UTSNAME, req);
129 	if (error != 0 || req->newptr == NULL)
130 		return (error);
131 	error = linux_set_osrelease(req->td, osrelease);
132 
133 	return (error);
134 }
135 
136 SYSCTL_PROC(_compat_linux, OID_AUTO, osrelease,
137 	    CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE,
138 	    0, 0, linux_sysctl_osrelease, "A",
139 	    "Linux kernel OS release");
140 
141 static int
142 linux_sysctl_oss_version(SYSCTL_HANDLER_ARGS)
143 {
144 	int oss_version;
145 	int error;
146 
147 	oss_version = linux_get_oss_version(req->td);
148 	error = sysctl_handle_int(oidp, &oss_version, 0, req);
149 	if (error != 0 || req->newptr == NULL)
150 		return (error);
151 	error = linux_set_oss_version(req->td, oss_version);
152 
153 	return (error);
154 }
155 
156 SYSCTL_PROC(_compat_linux, OID_AUTO, oss_version,
157 	    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE,
158 	    0, 0, linux_sysctl_oss_version, "I",
159 	    "Linux OSS version");
160 
161 /*
162  * Map the osrelease into integer
163  */
164 static int
165 linux_map_osrel(char *osrelease, int *osrel)
166 {
167 	char *sep, *eosrelease;
168 	int len, v0, v1, v2, v;
169 
170 	len = strlen(osrelease);
171 	eosrelease = osrelease + len;
172 	v0 = strtol(osrelease, &sep, 10);
173 	if (osrelease == sep || sep + 1 >= eosrelease || *sep != '.')
174 		return (EINVAL);
175 	osrelease = sep + 1;
176 	v1 = strtol(osrelease, &sep, 10);
177 	if (osrelease == sep || sep + 1 >= eosrelease || *sep != '.')
178 		return (EINVAL);
179 	osrelease = sep + 1;
180 	v2 = strtol(osrelease, &sep, 10);
181 	if (osrelease == sep ||
182 	    (sep != eosrelease && (sep + 1 >= eosrelease || *sep != '-')))
183 		return (EINVAL);
184 
185 	v = LINUX_KERNVER(v0, v1, v2);
186 	if (v < LINUX_KERNVER(1, 0, 0))
187 		return (EINVAL);
188 
189 	if (osrel != NULL)
190 		*osrel = v;
191 
192 	return (0);
193 }
194 
195 /*
196  * Find a prison with Linux info.
197  * Return the Linux info and the (locked) prison.
198  */
199 static struct linux_prison *
200 linux_find_prison(struct prison *spr, struct prison **prp)
201 {
202 	struct prison *pr;
203 	struct linux_prison *lpr;
204 
205 	for (pr = spr;; pr = pr->pr_parent) {
206 		mtx_lock(&pr->pr_mtx);
207 		lpr = (pr == &prison0)
208 		    ? &lprison0
209 		    : osd_jail_get(pr, linux_osd_jail_slot);
210 		if (lpr != NULL)
211 			break;
212 		mtx_unlock(&pr->pr_mtx);
213 	}
214 	*prp = pr;
215 
216 	return (lpr);
217 }
218 
219 /*
220  * Ensure a prison has its own Linux info.  If lprp is non-null, point it to
221  * the Linux info and lock the prison.
222  */
223 static void
224 linux_alloc_prison(struct prison *pr, struct linux_prison **lprp)
225 {
226 	struct prison *ppr;
227 	struct linux_prison *lpr, *nlpr;
228 	void **rsv;
229 
230 	/* If this prison already has Linux info, return that. */
231 	lpr = linux_find_prison(pr, &ppr);
232 	if (ppr == pr)
233 		goto done;
234 	/*
235 	 * Allocate a new info record.  Then check again, in case something
236 	 * changed during the allocation.
237 	 */
238 	mtx_unlock(&ppr->pr_mtx);
239 	nlpr = malloc(sizeof(struct linux_prison), M_PRISON, M_WAITOK);
240 	rsv = osd_reserve(linux_osd_jail_slot);
241 	lpr = linux_find_prison(pr, &ppr);
242 	if (ppr == pr) {
243 		free(nlpr, M_PRISON);
244 		osd_free_reserved(rsv);
245 		goto done;
246 	}
247 	/* Inherit the initial values from the ancestor. */
248 	mtx_lock(&pr->pr_mtx);
249 	(void)osd_jail_set_reserved(pr, linux_osd_jail_slot, rsv, nlpr);
250 	bcopy(lpr, nlpr, sizeof(*lpr));
251 	lpr = nlpr;
252 	mtx_unlock(&ppr->pr_mtx);
253  done:
254 	if (lprp != NULL)
255 		*lprp = lpr;
256 	else
257 		mtx_unlock(&pr->pr_mtx);
258 }
259 
260 /*
261  * Jail OSD methods for Linux prison data.
262  */
263 static int
264 linux_prison_create(void *obj, void *data)
265 {
266 	struct prison *pr = obj;
267 	struct vfsoptlist *opts = data;
268 	int jsys;
269 
270 	if (vfs_copyopt(opts, "linux", &jsys, sizeof(jsys)) == 0 &&
271 	    jsys == JAIL_SYS_INHERIT)
272 		return (0);
273 	/*
274 	 * Inherit a prison's initial values from its parent
275 	 * (different from JAIL_SYS_INHERIT which also inherits changes).
276 	 */
277 	linux_alloc_prison(pr, NULL);
278 	return (0);
279 }
280 
281 static int
282 linux_prison_check(void *obj __unused, void *data)
283 {
284 	struct vfsoptlist *opts = data;
285 	char *osname, *osrelease;
286 	int error, jsys, len, oss_version;
287 
288 	/* Check that the parameters are correct. */
289 	error = vfs_copyopt(opts, "linux", &jsys, sizeof(jsys));
290 	if (error != ENOENT) {
291 		if (error != 0)
292 			return (error);
293 		if (jsys != JAIL_SYS_NEW && jsys != JAIL_SYS_INHERIT)
294 			return (EINVAL);
295 	}
296 	error = vfs_getopt(opts, "linux.osname", (void **)&osname, &len);
297 	if (error != ENOENT) {
298 		if (error != 0)
299 			return (error);
300 		if (len == 0 || osname[len - 1] != '\0')
301 			return (EINVAL);
302 		if (len > LINUX_MAX_UTSNAME) {
303 			vfs_opterror(opts, "linux.osname too long");
304 			return (ENAMETOOLONG);
305 		}
306 	}
307 	error = vfs_getopt(opts, "linux.osrelease", (void **)&osrelease, &len);
308 	if (error != ENOENT) {
309 		if (error != 0)
310 			return (error);
311 		if (len == 0 || osrelease[len - 1] != '\0')
312 			return (EINVAL);
313 		if (len > LINUX_MAX_UTSNAME) {
314 			vfs_opterror(opts, "linux.osrelease too long");
315 			return (ENAMETOOLONG);
316 		}
317 		error = linux_map_osrel(osrelease, NULL);
318 		if (error != 0) {
319 			vfs_opterror(opts, "linux.osrelease format error");
320 			return (error);
321 		}
322 	}
323 	error = vfs_copyopt(opts, "linux.oss_version", &oss_version,
324 	    sizeof(oss_version));
325 
326 	if (error == ENOENT)
327 		error = 0;
328 	return (error);
329 }
330 
331 static int
332 linux_prison_set(void *obj, void *data)
333 {
334 	struct linux_prison *lpr;
335 	struct prison *pr = obj;
336 	struct vfsoptlist *opts = data;
337 	char *osname, *osrelease;
338 	int error, gotversion, jsys, len, oss_version;
339 
340 	/* Set the parameters, which should be correct. */
341 	error = vfs_copyopt(opts, "linux", &jsys, sizeof(jsys));
342 	if (error == ENOENT)
343 		jsys = -1;
344 	error = vfs_getopt(opts, "linux.osname", (void **)&osname, &len);
345 	if (error == ENOENT)
346 		osname = NULL;
347 	else
348 		jsys = JAIL_SYS_NEW;
349 	error = vfs_getopt(opts, "linux.osrelease", (void **)&osrelease, &len);
350 	if (error == ENOENT)
351 		osrelease = NULL;
352 	else
353 		jsys = JAIL_SYS_NEW;
354 	error = vfs_copyopt(opts, "linux.oss_version", &oss_version,
355 	    sizeof(oss_version));
356 	if (error == ENOENT)
357 		gotversion = 0;
358 	else {
359 		gotversion = 1;
360 		jsys = JAIL_SYS_NEW;
361 	}
362 	switch (jsys) {
363 	case JAIL_SYS_INHERIT:
364 		/* "linux=inherit": inherit the parent's Linux info. */
365 		mtx_lock(&pr->pr_mtx);
366 		osd_jail_del(pr, linux_osd_jail_slot);
367 		mtx_unlock(&pr->pr_mtx);
368 		break;
369 	case JAIL_SYS_NEW:
370 		/*
371 		 * "linux=new" or "linux.*":
372 		 * the prison gets its own Linux info.
373 		 */
374 		linux_alloc_prison(pr, &lpr);
375 		if (osrelease) {
376 			(void)linux_map_osrel(osrelease, &lpr->pr_osrel);
377 			strlcpy(lpr->pr_osrelease, osrelease,
378 			    LINUX_MAX_UTSNAME);
379 		}
380 		if (osname)
381 			strlcpy(lpr->pr_osname, osname, LINUX_MAX_UTSNAME);
382 		if (gotversion)
383 			lpr->pr_oss_version = oss_version;
384 		mtx_unlock(&pr->pr_mtx);
385 	}
386 
387 	return (0);
388 }
389 
390 SYSCTL_JAIL_PARAM_SYS_NODE(linux, CTLFLAG_RW, "Jail Linux parameters");
391 SYSCTL_JAIL_PARAM_STRING(_linux, osname, CTLFLAG_RW, LINUX_MAX_UTSNAME,
392     "Jail Linux kernel OS name");
393 SYSCTL_JAIL_PARAM_STRING(_linux, osrelease, CTLFLAG_RW, LINUX_MAX_UTSNAME,
394     "Jail Linux kernel OS release");
395 SYSCTL_JAIL_PARAM(_linux, oss_version, CTLTYPE_INT | CTLFLAG_RW,
396     "I", "Jail Linux OSS version");
397 
398 static int
399 linux_prison_get(void *obj, void *data)
400 {
401 	struct linux_prison *lpr;
402 	struct prison *ppr;
403 	struct prison *pr = obj;
404 	struct vfsoptlist *opts = data;
405 	int error, i;
406 
407 	static int version0;
408 
409 	/* See if this prison is the one with the Linux info. */
410 	lpr = linux_find_prison(pr, &ppr);
411 	i = (ppr == pr) ? JAIL_SYS_NEW : JAIL_SYS_INHERIT;
412 	error = vfs_setopt(opts, "linux", &i, sizeof(i));
413 	if (error != 0 && error != ENOENT)
414 		goto done;
415 	if (i) {
416 		error = vfs_setopts(opts, "linux.osname", lpr->pr_osname);
417 		if (error != 0 && error != ENOENT)
418 			goto done;
419 		error = vfs_setopts(opts, "linux.osrelease", lpr->pr_osrelease);
420 		if (error != 0 && error != ENOENT)
421 			goto done;
422 		error = vfs_setopt(opts, "linux.oss_version",
423 		    &lpr->pr_oss_version, sizeof(lpr->pr_oss_version));
424 		if (error != 0 && error != ENOENT)
425 			goto done;
426 	} else {
427 		/*
428 		 * If this prison is inheriting its Linux info, report
429 		 * empty/zero parameters.
430 		 */
431 		error = vfs_setopts(opts, "linux.osname", "");
432 		if (error != 0 && error != ENOENT)
433 			goto done;
434 		error = vfs_setopts(opts, "linux.osrelease", "");
435 		if (error != 0 && error != ENOENT)
436 			goto done;
437 		error = vfs_setopt(opts, "linux.oss_version", &version0,
438 		    sizeof(lpr->pr_oss_version));
439 		if (error != 0 && error != ENOENT)
440 			goto done;
441 	}
442 	error = 0;
443 
444  done:
445 	mtx_unlock(&ppr->pr_mtx);
446 
447 	return (error);
448 }
449 
450 static void
451 linux_prison_destructor(void *data)
452 {
453 
454 	free(data, M_PRISON);
455 }
456 
457 void
458 linux_osd_jail_register(void)
459 {
460 	struct prison *pr;
461 	osd_method_t methods[PR_MAXMETHOD] = {
462 	    [PR_METHOD_CREATE] =	linux_prison_create,
463 	    [PR_METHOD_GET] =		linux_prison_get,
464 	    [PR_METHOD_SET] =		linux_prison_set,
465 	    [PR_METHOD_CHECK] =		linux_prison_check
466 	};
467 
468 	linux_osd_jail_slot =
469 	    osd_jail_register(linux_prison_destructor, methods);
470 	/* Copy the system Linux info to any current prisons. */
471 	sx_slock(&allprison_lock);
472 	TAILQ_FOREACH(pr, &allprison, pr_list)
473 		linux_alloc_prison(pr, NULL);
474 	sx_sunlock(&allprison_lock);
475 }
476 
477 void
478 linux_osd_jail_deregister(void)
479 {
480 
481 	osd_jail_deregister(linux_osd_jail_slot);
482 }
483 
484 void
485 linux_get_osname(struct thread *td, char *dst)
486 {
487 	struct prison *pr;
488 	struct linux_prison *lpr;
489 
490 	lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
491 	bcopy(lpr->pr_osname, dst, LINUX_MAX_UTSNAME);
492 	mtx_unlock(&pr->pr_mtx);
493 }
494 
495 static int
496 linux_set_osname(struct thread *td, char *osname)
497 {
498 	struct prison *pr;
499 	struct linux_prison *lpr;
500 
501 	lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
502 	strlcpy(lpr->pr_osname, osname, LINUX_MAX_UTSNAME);
503 	mtx_unlock(&pr->pr_mtx);
504 
505 	return (0);
506 }
507 
508 void
509 linux_get_osrelease(struct thread *td, char *dst)
510 {
511 	struct prison *pr;
512 	struct linux_prison *lpr;
513 
514 	lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
515 	bcopy(lpr->pr_osrelease, dst, LINUX_MAX_UTSNAME);
516 	mtx_unlock(&pr->pr_mtx);
517 }
518 
519 int
520 linux_kernver(struct thread *td)
521 {
522 	struct prison *pr;
523 	struct linux_prison *lpr;
524 	int osrel;
525 
526 	lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
527 	osrel = lpr->pr_osrel;
528 	mtx_unlock(&pr->pr_mtx);
529 
530 	return (osrel);
531 }
532 
533 static int
534 linux_set_osrelease(struct thread *td, char *osrelease)
535 {
536 	struct prison *pr;
537 	struct linux_prison *lpr;
538 	int error;
539 
540 	lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
541 	error = linux_map_osrel(osrelease, &lpr->pr_osrel);
542 	if (error == 0)
543 		strlcpy(lpr->pr_osrelease, osrelease, LINUX_MAX_UTSNAME);
544 	mtx_unlock(&pr->pr_mtx);
545 
546 	return (error);
547 }
548 
549 int
550 linux_get_oss_version(struct thread *td)
551 {
552 	struct prison *pr;
553 	struct linux_prison *lpr;
554 	int version;
555 
556 	lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
557 	version = lpr->pr_oss_version;
558 	mtx_unlock(&pr->pr_mtx);
559 
560 	return (version);
561 }
562 
563 static int
564 linux_set_oss_version(struct thread *td, int oss_version)
565 {
566 	struct prison *pr;
567 	struct linux_prison *lpr;
568 
569 	lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
570 	lpr->pr_oss_version = oss_version;
571 	mtx_unlock(&pr->pr_mtx);
572 
573 	return (0);
574 }
575