xref: /freebsd/sys/compat/linux/linux_mib.c (revision eda14cbc264d6969b02f2b1994cef11148e914f1)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 1999 Marcel Moolenaar
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include <sys/param.h>
33 #include <sys/kernel.h>
34 #include <sys/sdt.h>
35 #include <sys/systm.h>
36 #include <sys/sysctl.h>
37 #include <sys/proc.h>
38 #include <sys/malloc.h>
39 #include <sys/mount.h>
40 #include <sys/jail.h>
41 #include <sys/lock.h>
42 #include <sys/sx.h>
43 
44 #include <compat/linux/linux_mib.h>
45 #include <compat/linux/linux_misc.h>
46 
47 struct linux_prison {
48 	char	pr_osname[LINUX_MAX_UTSNAME];
49 	char	pr_osrelease[LINUX_MAX_UTSNAME];
50 	int	pr_oss_version;
51 	int	pr_osrel;
52 };
53 
54 static struct linux_prison lprison0 = {
55 	.pr_osname =		"Linux",
56 	.pr_osrelease =		LINUX_VERSION_STR,
57 	.pr_oss_version =	0x030600,
58 	.pr_osrel =		LINUX_VERSION_CODE
59 };
60 
61 static unsigned linux_osd_jail_slot;
62 
63 SYSCTL_NODE(_compat, OID_AUTO, linux, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
64     "Linux mode");
65 
66 int linux_debug = 1;
67 SYSCTL_INT(_compat_linux, OID_AUTO, debug, CTLFLAG_RWTUN,
68     &linux_debug, 0, "Log warnings from linux(4); or 0 to disable");
69 
70 int linux_default_openfiles = 1024;
71 SYSCTL_INT(_compat_linux, OID_AUTO, default_openfiles, CTLFLAG_RWTUN,
72     &linux_default_openfiles, 0,
73     "Default soft openfiles resource limit, or -1 for unlimited");
74 
75 int linux_ignore_ip_recverr = 1;
76 SYSCTL_INT(_compat_linux, OID_AUTO, ignore_ip_recverr, CTLFLAG_RWTUN,
77     &linux_ignore_ip_recverr, 0, "Ignore enabling IP_RECVERR");
78 
79 int linux_preserve_vstatus = 0;
80 SYSCTL_INT(_compat_linux, OID_AUTO, preserve_vstatus, CTLFLAG_RWTUN,
81     &linux_preserve_vstatus, 0, "Preserve VSTATUS termios(4) flag");
82 
83 bool linux_map_sched_prio = true;
84 SYSCTL_BOOL(_compat_linux, OID_AUTO, map_sched_prio, CTLFLAG_RDTUN,
85     &linux_map_sched_prio, 0, "Map scheduler priorities to Linux priorities "
86     "(not POSIX compliant)");
87 
88 int linux_use_emul_path = 1;
89 SYSCTL_INT(_compat_linux, OID_AUTO, use_emul_path, CTLFLAG_RWTUN,
90     &linux_use_emul_path, 0, "Use linux.compat.emul_path");
91 
92 static int	linux_set_osname(struct thread *td, char *osname);
93 static int	linux_set_osrelease(struct thread *td, char *osrelease);
94 static int	linux_set_oss_version(struct thread *td, int oss_version);
95 
96 static int
97 linux_sysctl_osname(SYSCTL_HANDLER_ARGS)
98 {
99 	char osname[LINUX_MAX_UTSNAME];
100 	int error;
101 
102 	linux_get_osname(req->td, osname);
103 	error = sysctl_handle_string(oidp, osname, LINUX_MAX_UTSNAME, req);
104 	if (error != 0 || req->newptr == NULL)
105 		return (error);
106 	error = linux_set_osname(req->td, osname);
107 
108 	return (error);
109 }
110 
111 SYSCTL_PROC(_compat_linux, OID_AUTO, osname,
112 	    CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE,
113 	    0, 0, linux_sysctl_osname, "A",
114 	    "Linux kernel OS name");
115 
116 static int
117 linux_sysctl_osrelease(SYSCTL_HANDLER_ARGS)
118 {
119 	char osrelease[LINUX_MAX_UTSNAME];
120 	int error;
121 
122 	linux_get_osrelease(req->td, osrelease);
123 	error = sysctl_handle_string(oidp, osrelease, LINUX_MAX_UTSNAME, req);
124 	if (error != 0 || req->newptr == NULL)
125 		return (error);
126 	error = linux_set_osrelease(req->td, osrelease);
127 
128 	return (error);
129 }
130 
131 SYSCTL_PROC(_compat_linux, OID_AUTO, osrelease,
132 	    CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE,
133 	    0, 0, linux_sysctl_osrelease, "A",
134 	    "Linux kernel OS release");
135 
136 static int
137 linux_sysctl_oss_version(SYSCTL_HANDLER_ARGS)
138 {
139 	int oss_version;
140 	int error;
141 
142 	oss_version = linux_get_oss_version(req->td);
143 	error = sysctl_handle_int(oidp, &oss_version, 0, req);
144 	if (error != 0 || req->newptr == NULL)
145 		return (error);
146 	error = linux_set_oss_version(req->td, oss_version);
147 
148 	return (error);
149 }
150 
151 SYSCTL_PROC(_compat_linux, OID_AUTO, oss_version,
152 	    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE,
153 	    0, 0, linux_sysctl_oss_version, "I",
154 	    "Linux OSS version");
155 
156 /*
157  * Map the osrelease into integer
158  */
159 static int
160 linux_map_osrel(char *osrelease, int *osrel)
161 {
162 	char *sep, *eosrelease;
163 	int len, v0, v1, v2, v;
164 
165 	len = strlen(osrelease);
166 	eosrelease = osrelease + len;
167 	v0 = strtol(osrelease, &sep, 10);
168 	if (osrelease == sep || sep + 1 >= eosrelease || *sep != '.')
169 		return (EINVAL);
170 	osrelease = sep + 1;
171 	v1 = strtol(osrelease, &sep, 10);
172 	if (osrelease == sep || sep + 1 >= eosrelease || *sep != '.')
173 		return (EINVAL);
174 	osrelease = sep + 1;
175 	v2 = strtol(osrelease, &sep, 10);
176 	if (osrelease == sep ||
177 	    (sep != eosrelease && (sep + 1 >= eosrelease || *sep != '-')))
178 		return (EINVAL);
179 
180 	v = LINUX_KERNVER(v0, v1, v2);
181 	if (v < LINUX_KERNVER(1, 0, 0))
182 		return (EINVAL);
183 
184 	if (osrel != NULL)
185 		*osrel = v;
186 
187 	return (0);
188 }
189 
190 /*
191  * Find a prison with Linux info.
192  * Return the Linux info and the (locked) prison.
193  */
194 static struct linux_prison *
195 linux_find_prison(struct prison *spr, struct prison **prp)
196 {
197 	struct prison *pr;
198 	struct linux_prison *lpr;
199 
200 	for (pr = spr;; pr = pr->pr_parent) {
201 		mtx_lock(&pr->pr_mtx);
202 		lpr = (pr == &prison0)
203 		    ? &lprison0
204 		    : osd_jail_get(pr, linux_osd_jail_slot);
205 		if (lpr != NULL)
206 			break;
207 		mtx_unlock(&pr->pr_mtx);
208 	}
209 	*prp = pr;
210 
211 	return (lpr);
212 }
213 
214 /*
215  * Ensure a prison has its own Linux info.  If lprp is non-null, point it to
216  * the Linux info and lock the prison.
217  */
218 static void
219 linux_alloc_prison(struct prison *pr, struct linux_prison **lprp)
220 {
221 	struct prison *ppr;
222 	struct linux_prison *lpr, *nlpr;
223 	void **rsv;
224 
225 	/* If this prison already has Linux info, return that. */
226 	lpr = linux_find_prison(pr, &ppr);
227 	if (ppr == pr)
228 		goto done;
229 	/*
230 	 * Allocate a new info record.  Then check again, in case something
231 	 * changed during the allocation.
232 	 */
233 	mtx_unlock(&ppr->pr_mtx);
234 	nlpr = malloc(sizeof(struct linux_prison), M_PRISON, M_WAITOK);
235 	rsv = osd_reserve(linux_osd_jail_slot);
236 	lpr = linux_find_prison(pr, &ppr);
237 	if (ppr == pr) {
238 		free(nlpr, M_PRISON);
239 		osd_free_reserved(rsv);
240 		goto done;
241 	}
242 	/* Inherit the initial values from the ancestor. */
243 	mtx_lock(&pr->pr_mtx);
244 	(void)osd_jail_set_reserved(pr, linux_osd_jail_slot, rsv, nlpr);
245 	bcopy(lpr, nlpr, sizeof(*lpr));
246 	lpr = nlpr;
247 	mtx_unlock(&ppr->pr_mtx);
248  done:
249 	if (lprp != NULL)
250 		*lprp = lpr;
251 	else
252 		mtx_unlock(&pr->pr_mtx);
253 }
254 
255 /*
256  * Jail OSD methods for Linux prison data.
257  */
258 static int
259 linux_prison_create(void *obj, void *data)
260 {
261 	struct prison *pr = obj;
262 	struct vfsoptlist *opts = data;
263 	int jsys;
264 
265 	if (vfs_copyopt(opts, "linux", &jsys, sizeof(jsys)) == 0 &&
266 	    jsys == JAIL_SYS_INHERIT)
267 		return (0);
268 	/*
269 	 * Inherit a prison's initial values from its parent
270 	 * (different from JAIL_SYS_INHERIT which also inherits changes).
271 	 */
272 	linux_alloc_prison(pr, NULL);
273 	return (0);
274 }
275 
276 static int
277 linux_prison_check(void *obj __unused, void *data)
278 {
279 	struct vfsoptlist *opts = data;
280 	char *osname, *osrelease;
281 	int error, jsys, len, oss_version;
282 
283 	/* Check that the parameters are correct. */
284 	error = vfs_copyopt(opts, "linux", &jsys, sizeof(jsys));
285 	if (error != ENOENT) {
286 		if (error != 0)
287 			return (error);
288 		if (jsys != JAIL_SYS_NEW && jsys != JAIL_SYS_INHERIT)
289 			return (EINVAL);
290 	}
291 	error = vfs_getopt(opts, "linux.osname", (void **)&osname, &len);
292 	if (error != ENOENT) {
293 		if (error != 0)
294 			return (error);
295 		if (len == 0 || osname[len - 1] != '\0')
296 			return (EINVAL);
297 		if (len > LINUX_MAX_UTSNAME) {
298 			vfs_opterror(opts, "linux.osname too long");
299 			return (ENAMETOOLONG);
300 		}
301 	}
302 	error = vfs_getopt(opts, "linux.osrelease", (void **)&osrelease, &len);
303 	if (error != ENOENT) {
304 		if (error != 0)
305 			return (error);
306 		if (len == 0 || osrelease[len - 1] != '\0')
307 			return (EINVAL);
308 		if (len > LINUX_MAX_UTSNAME) {
309 			vfs_opterror(opts, "linux.osrelease too long");
310 			return (ENAMETOOLONG);
311 		}
312 		error = linux_map_osrel(osrelease, NULL);
313 		if (error != 0) {
314 			vfs_opterror(opts, "linux.osrelease format error");
315 			return (error);
316 		}
317 	}
318 	error = vfs_copyopt(opts, "linux.oss_version", &oss_version,
319 	    sizeof(oss_version));
320 
321 	if (error == ENOENT)
322 		error = 0;
323 	return (error);
324 }
325 
326 static int
327 linux_prison_set(void *obj, void *data)
328 {
329 	struct linux_prison *lpr;
330 	struct prison *pr = obj;
331 	struct vfsoptlist *opts = data;
332 	char *osname, *osrelease;
333 	int error, gotversion, jsys, len, oss_version;
334 
335 	/* Set the parameters, which should be correct. */
336 	error = vfs_copyopt(opts, "linux", &jsys, sizeof(jsys));
337 	if (error == ENOENT)
338 		jsys = -1;
339 	error = vfs_getopt(opts, "linux.osname", (void **)&osname, &len);
340 	if (error == ENOENT)
341 		osname = NULL;
342 	else
343 		jsys = JAIL_SYS_NEW;
344 	error = vfs_getopt(opts, "linux.osrelease", (void **)&osrelease, &len);
345 	if (error == ENOENT)
346 		osrelease = NULL;
347 	else
348 		jsys = JAIL_SYS_NEW;
349 	error = vfs_copyopt(opts, "linux.oss_version", &oss_version,
350 	    sizeof(oss_version));
351 	if (error == ENOENT)
352 		gotversion = 0;
353 	else {
354 		gotversion = 1;
355 		jsys = JAIL_SYS_NEW;
356 	}
357 	switch (jsys) {
358 	case JAIL_SYS_INHERIT:
359 		/* "linux=inherit": inherit the parent's Linux info. */
360 		mtx_lock(&pr->pr_mtx);
361 		osd_jail_del(pr, linux_osd_jail_slot);
362 		mtx_unlock(&pr->pr_mtx);
363 		break;
364 	case JAIL_SYS_NEW:
365 		/*
366 		 * "linux=new" or "linux.*":
367 		 * the prison gets its own Linux info.
368 		 */
369 		linux_alloc_prison(pr, &lpr);
370 		if (osrelease) {
371 			(void)linux_map_osrel(osrelease, &lpr->pr_osrel);
372 			strlcpy(lpr->pr_osrelease, osrelease,
373 			    LINUX_MAX_UTSNAME);
374 		}
375 		if (osname)
376 			strlcpy(lpr->pr_osname, osname, LINUX_MAX_UTSNAME);
377 		if (gotversion)
378 			lpr->pr_oss_version = oss_version;
379 		mtx_unlock(&pr->pr_mtx);
380 	}
381 
382 	return (0);
383 }
384 
385 SYSCTL_JAIL_PARAM_SYS_NODE(linux, CTLFLAG_RW, "Jail Linux parameters");
386 SYSCTL_JAIL_PARAM_STRING(_linux, osname, CTLFLAG_RW, LINUX_MAX_UTSNAME,
387     "Jail Linux kernel OS name");
388 SYSCTL_JAIL_PARAM_STRING(_linux, osrelease, CTLFLAG_RW, LINUX_MAX_UTSNAME,
389     "Jail Linux kernel OS release");
390 SYSCTL_JAIL_PARAM(_linux, oss_version, CTLTYPE_INT | CTLFLAG_RW,
391     "I", "Jail Linux OSS version");
392 
393 static int
394 linux_prison_get(void *obj, void *data)
395 {
396 	struct linux_prison *lpr;
397 	struct prison *ppr;
398 	struct prison *pr = obj;
399 	struct vfsoptlist *opts = data;
400 	int error, i;
401 
402 	static int version0;
403 
404 	/* See if this prison is the one with the Linux info. */
405 	lpr = linux_find_prison(pr, &ppr);
406 	i = (ppr == pr) ? JAIL_SYS_NEW : JAIL_SYS_INHERIT;
407 	error = vfs_setopt(opts, "linux", &i, sizeof(i));
408 	if (error != 0 && error != ENOENT)
409 		goto done;
410 	if (i) {
411 		error = vfs_setopts(opts, "linux.osname", lpr->pr_osname);
412 		if (error != 0 && error != ENOENT)
413 			goto done;
414 		error = vfs_setopts(opts, "linux.osrelease", lpr->pr_osrelease);
415 		if (error != 0 && error != ENOENT)
416 			goto done;
417 		error = vfs_setopt(opts, "linux.oss_version",
418 		    &lpr->pr_oss_version, sizeof(lpr->pr_oss_version));
419 		if (error != 0 && error != ENOENT)
420 			goto done;
421 	} else {
422 		/*
423 		 * If this prison is inheriting its Linux info, report
424 		 * empty/zero parameters.
425 		 */
426 		error = vfs_setopts(opts, "linux.osname", "");
427 		if (error != 0 && error != ENOENT)
428 			goto done;
429 		error = vfs_setopts(opts, "linux.osrelease", "");
430 		if (error != 0 && error != ENOENT)
431 			goto done;
432 		error = vfs_setopt(opts, "linux.oss_version", &version0,
433 		    sizeof(lpr->pr_oss_version));
434 		if (error != 0 && error != ENOENT)
435 			goto done;
436 	}
437 	error = 0;
438 
439  done:
440 	mtx_unlock(&ppr->pr_mtx);
441 
442 	return (error);
443 }
444 
445 static void
446 linux_prison_destructor(void *data)
447 {
448 
449 	free(data, M_PRISON);
450 }
451 
452 void
453 linux_osd_jail_register(void)
454 {
455 	struct prison *pr;
456 	osd_method_t methods[PR_MAXMETHOD] = {
457 	    [PR_METHOD_CREATE] =	linux_prison_create,
458 	    [PR_METHOD_GET] =		linux_prison_get,
459 	    [PR_METHOD_SET] =		linux_prison_set,
460 	    [PR_METHOD_CHECK] =		linux_prison_check
461 	};
462 
463 	linux_osd_jail_slot =
464 	    osd_jail_register(linux_prison_destructor, methods);
465 	/* Copy the system Linux info to any current prisons. */
466 	sx_slock(&allprison_lock);
467 	TAILQ_FOREACH(pr, &allprison, pr_list)
468 		linux_alloc_prison(pr, NULL);
469 	sx_sunlock(&allprison_lock);
470 }
471 
472 void
473 linux_osd_jail_deregister(void)
474 {
475 
476 	osd_jail_deregister(linux_osd_jail_slot);
477 }
478 
479 void
480 linux_get_osname(struct thread *td, char *dst)
481 {
482 	struct prison *pr;
483 	struct linux_prison *lpr;
484 
485 	lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
486 	bcopy(lpr->pr_osname, dst, LINUX_MAX_UTSNAME);
487 	mtx_unlock(&pr->pr_mtx);
488 }
489 
490 static int
491 linux_set_osname(struct thread *td, char *osname)
492 {
493 	struct prison *pr;
494 	struct linux_prison *lpr;
495 
496 	lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
497 	strlcpy(lpr->pr_osname, osname, LINUX_MAX_UTSNAME);
498 	mtx_unlock(&pr->pr_mtx);
499 
500 	return (0);
501 }
502 
503 void
504 linux_get_osrelease(struct thread *td, char *dst)
505 {
506 	struct prison *pr;
507 	struct linux_prison *lpr;
508 
509 	lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
510 	bcopy(lpr->pr_osrelease, dst, LINUX_MAX_UTSNAME);
511 	mtx_unlock(&pr->pr_mtx);
512 }
513 
514 int
515 linux_kernver(struct thread *td)
516 {
517 	struct prison *pr;
518 	struct linux_prison *lpr;
519 	int osrel;
520 
521 	lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
522 	osrel = lpr->pr_osrel;
523 	mtx_unlock(&pr->pr_mtx);
524 
525 	return (osrel);
526 }
527 
528 static int
529 linux_set_osrelease(struct thread *td, char *osrelease)
530 {
531 	struct prison *pr;
532 	struct linux_prison *lpr;
533 	int error;
534 
535 	lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
536 	error = linux_map_osrel(osrelease, &lpr->pr_osrel);
537 	if (error == 0)
538 		strlcpy(lpr->pr_osrelease, osrelease, LINUX_MAX_UTSNAME);
539 	mtx_unlock(&pr->pr_mtx);
540 
541 	return (error);
542 }
543 
544 int
545 linux_get_oss_version(struct thread *td)
546 {
547 	struct prison *pr;
548 	struct linux_prison *lpr;
549 	int version;
550 
551 	lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
552 	version = lpr->pr_oss_version;
553 	mtx_unlock(&pr->pr_mtx);
554 
555 	return (version);
556 }
557 
558 static int
559 linux_set_oss_version(struct thread *td, int oss_version)
560 {
561 	struct prison *pr;
562 	struct linux_prison *lpr;
563 
564 	lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
565 	lpr->pr_oss_version = oss_version;
566 	mtx_unlock(&pr->pr_mtx);
567 
568 	return (0);
569 }
570