xref: /freebsd/sys/kern/kern_jaildesc.c (revision 66d8ffe3046ded1eb3f78599c6af8eb965482ef5)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2025 James Gritton.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include <sys/param.h>
30 #include <sys/fcntl.h>
31 #include <sys/file.h>
32 #include <sys/filedesc.h>
33 #include <sys/kernel.h>
34 #include <sys/jail.h>
35 #include <sys/jaildesc.h>
36 #include <sys/lock.h>
37 #include <sys/malloc.h>
38 #include <sys/mutex.h>
39 #include <sys/poll.h>
40 #include <sys/priv.h>
41 #include <sys/stat.h>
42 #include <sys/sysproto.h>
43 #include <sys/systm.h>
44 #include <sys/ucred.h>
45 #include <sys/user.h>
46 #include <sys/vnode.h>
47 
48 MALLOC_DEFINE(M_JAILDESC, "jaildesc", "jail descriptors");
49 
50 static fo_poll_t	jaildesc_poll;
51 static fo_kqfilter_t	jaildesc_kqfilter;
52 static fo_stat_t	jaildesc_stat;
53 static fo_close_t	jaildesc_close;
54 static fo_fill_kinfo_t	jaildesc_fill_kinfo;
55 static fo_cmp_t		jaildesc_cmp;
56 
57 static struct fileops jaildesc_ops = {
58 	.fo_read = invfo_rdwr,
59 	.fo_write = invfo_rdwr,
60 	.fo_truncate = invfo_truncate,
61 	.fo_ioctl = invfo_ioctl,
62 	.fo_poll = jaildesc_poll,
63 	.fo_kqfilter = jaildesc_kqfilter,
64 	.fo_stat = jaildesc_stat,
65 	.fo_close = jaildesc_close,
66 	.fo_chmod = invfo_chmod,
67 	.fo_chown = invfo_chown,
68 	.fo_sendfile = invfo_sendfile,
69 	.fo_fill_kinfo = jaildesc_fill_kinfo,
70 	.fo_cmp = jaildesc_cmp,
71 	.fo_flags = DFLAG_PASSABLE,
72 };
73 
74 /*
75  * Given a jail descriptor number, return its prison and/or its
76  * credential.  They are returned held, and will need to be released
77  * by the caller.
78  */
79 int
jaildesc_find(struct thread * td,int fd,struct prison ** prp,struct ucred ** ucredp)80 jaildesc_find(struct thread *td, int fd, struct prison **prp,
81     struct ucred **ucredp)
82 {
83 	struct file *fp;
84 	struct jaildesc *jd;
85 	struct prison *pr;
86 	int error;
87 
88 	error = fget(td, fd, &cap_no_rights, &fp);
89 	if (error != 0)
90 		return (error);
91 	if (fp->f_type != DTYPE_JAILDESC) {
92 		error = EINVAL;
93 		goto out;
94 	}
95 	jd = fp->f_data;
96 	JAILDESC_LOCK(jd);
97 	pr = jd->jd_prison;
98 	if (pr == NULL || !prison_isvalid(pr)) {
99 		error = ENOENT;
100 		JAILDESC_UNLOCK(jd);
101 		goto out;
102 	}
103 	if (prp != NULL) {
104 		prison_hold(pr);
105 		*prp = pr;
106 	}
107 	JAILDESC_UNLOCK(jd);
108 	if (ucredp != NULL)
109 		*ucredp = crhold(fp->f_cred);
110  out:
111 	fdrop(fp, td);
112 	return (error);
113 }
114 
115 /*
116  * Allocate a new jail decriptor, not yet associated with a prison.
117  * Return the file pointer (with a reference held) and the descriptor
118  * number.
119  */
120 int
jaildesc_alloc(struct thread * td,struct file ** fpp,int * fdp,int owning)121 jaildesc_alloc(struct thread *td, struct file **fpp, int *fdp, int owning)
122 {
123 	struct file *fp;
124 	struct jaildesc *jd;
125 	int error;
126 
127 	if (owning) {
128 		error = priv_check(td, PRIV_JAIL_REMOVE);
129 		if (error != 0)
130 			return (error);
131 	}
132 	jd = malloc(sizeof(*jd), M_JAILDESC, M_WAITOK | M_ZERO);
133 	error = falloc_caps(td, &fp, fdp, 0, NULL);
134 	if (error != 0) {
135 		free(jd, M_JAILDESC);
136 		return (error);
137 	}
138 	finit(fp, priv_check_cred(fp->f_cred, PRIV_JAIL_SET) == 0 ?
139 	    FREAD | FWRITE : FREAD, DTYPE_JAILDESC, jd, &jaildesc_ops);
140 	JAILDESC_LOCK_INIT(jd);
141 	knlist_init_mtx(&jd->jd_selinfo.si_note, &jd->jd_lock);
142 	if (owning)
143 		jd->jd_flags |= JDF_OWNING;
144 	*fpp = fp;
145 	return (0);
146 }
147 
148 /*
149  * Assocate a jail descriptor with its prison.
150  */
151 void
jaildesc_set_prison(struct file * fp,struct prison * pr)152 jaildesc_set_prison(struct file *fp, struct prison *pr)
153 {
154 	struct jaildesc *jd;
155 
156 	mtx_assert(&pr->pr_mtx, MA_OWNED);
157 	jd = fp->f_data;
158 	JAILDESC_LOCK(jd);
159 	jd->jd_prison = pr;
160 	LIST_INSERT_HEAD(&pr->pr_descs, jd, jd_list);
161 	prison_hold(pr);
162 	JAILDESC_UNLOCK(jd);
163 }
164 
165 /*
166  * Detach all the jail descriptors from a prison.
167  */
168 void
jaildesc_prison_cleanup(struct prison * pr)169 jaildesc_prison_cleanup(struct prison *pr)
170 {
171 	struct jaildesc *jd;
172 
173 	mtx_assert(&pr->pr_mtx, MA_OWNED);
174 	while ((jd = LIST_FIRST(&pr->pr_descs))) {
175 		JAILDESC_LOCK(jd);
176 		LIST_REMOVE(jd, jd_list);
177 		jd->jd_prison = NULL;
178 		JAILDESC_UNLOCK(jd);
179 		prison_free(pr);
180 	}
181 }
182 
183 /*
184  * Pass a note to all listening kqueues.
185  */
186 void
jaildesc_knote(struct prison * pr,long hint)187 jaildesc_knote(struct prison *pr, long hint)
188 {
189 	struct jaildesc *jd;
190 	int prison_locked;
191 
192 	if (!LIST_EMPTY(&pr->pr_descs)) {
193 		prison_locked = mtx_owned(&pr->pr_mtx);
194 		if (!prison_locked)
195 			prison_lock(pr);
196 		LIST_FOREACH(jd, &pr->pr_descs, jd_list) {
197 			JAILDESC_LOCK(jd);
198 			if (hint == NOTE_JAIL_REMOVE) {
199 				jd->jd_flags |= JDF_REMOVED;
200 				if (jd->jd_flags & JDF_SELECTED) {
201 					jd->jd_flags &= ~JDF_SELECTED;
202 					selwakeup(&jd->jd_selinfo);
203 				}
204 			}
205 			KNOTE_LOCKED(&jd->jd_selinfo.si_note, hint);
206 			JAILDESC_UNLOCK(jd);
207 		}
208 		if (!prison_locked)
209 			prison_unlock(pr);
210 	}
211 }
212 
213 static int
jaildesc_close(struct file * fp,struct thread * td)214 jaildesc_close(struct file *fp, struct thread *td)
215 {
216 	struct jaildesc *jd;
217 	struct prison *pr;
218 
219 	jd = fp->f_data;
220 	fp->f_data = NULL;
221 	if (jd != NULL) {
222 		JAILDESC_LOCK(jd);
223 		pr = jd->jd_prison;
224 		if (pr != NULL) {
225 			/*
226 			 * Free or remove the associated prison.
227 			 * This requires a second check after re-
228 			 * ordering locks.  This jaildesc can remain
229 			 * unlocked once we have a prison reference,
230 			 * because that prison is the only place that
231 			 * still points back to it.
232 			 */
233 			prison_hold(pr);
234 			JAILDESC_UNLOCK(jd);
235 			if (jd->jd_flags & JDF_OWNING) {
236 				sx_xlock(&allprison_lock);
237 				prison_lock(pr);
238 				if (jd->jd_prison != NULL) {
239 					/*
240 					 * Unlink the prison, but don't free
241 					 * it; that will be done as part of
242 					 * of prison_remove.
243 					 */
244 					LIST_REMOVE(jd, jd_list);
245 					prison_remove(pr);
246 				} else {
247 					prison_unlock(pr);
248 					sx_xunlock(&allprison_lock);
249 				}
250 			} else {
251 				prison_lock(pr);
252 				if (jd->jd_prison != NULL) {
253 					LIST_REMOVE(jd, jd_list);
254 					prison_free(pr);
255 				}
256 				prison_unlock(pr);
257 			}
258 			prison_free(pr);
259 		}
260 		knlist_destroy(&jd->jd_selinfo.si_note);
261 		JAILDESC_LOCK_DESTROY(jd);
262 		free(jd, M_JAILDESC);
263 	}
264 	return (0);
265 }
266 
267 static int
jaildesc_poll(struct file * fp,int events,struct ucred * active_cred,struct thread * td)268 jaildesc_poll(struct file *fp, int events, struct ucred *active_cred,
269     struct thread *td)
270 {
271 	struct jaildesc *jd;
272 	int revents;
273 
274 	revents = 0;
275 	jd = fp->f_data;
276 	JAILDESC_LOCK(jd);
277 	if (jd->jd_flags & JDF_REMOVED)
278 		revents |= POLLHUP;
279 	if (revents == 0) {
280 		selrecord(td, &jd->jd_selinfo);
281 		jd->jd_flags |= JDF_SELECTED;
282 	}
283 	JAILDESC_UNLOCK(jd);
284 	return (revents);
285 }
286 
287 static void
jaildesc_kqops_detach(struct knote * kn)288 jaildesc_kqops_detach(struct knote *kn)
289 {
290 	struct jaildesc *jd;
291 
292 	jd = kn->kn_fp->f_data;
293 	knlist_remove(&jd->jd_selinfo.si_note, kn, 0);
294 }
295 
296 static int
jaildesc_kqops_event(struct knote * kn,long hint)297 jaildesc_kqops_event(struct knote *kn, long hint)
298 {
299 	struct jaildesc *jd;
300 	u_int event;
301 
302 	jd = kn->kn_fp->f_data;
303 	if (hint == 0) {
304 		/*
305 		 * Initial test after registration. Generate a
306 		 * NOTE_JAIL_REMOVE in case the prison already died
307 		 * before registration.
308 		 */
309 		event = jd->jd_flags & JDF_REMOVED ? NOTE_JAIL_REMOVE : 0;
310 	} else {
311 		/*
312 		 * Mask off extra data.  In the NOTE_JAIL_CHILD case,
313 		 * that's everything except the NOTE_JAIL_CHILD bit
314 		 * itself, since a JID is any positive integer.
315 		 */
316 		event = ((u_int)hint & NOTE_JAIL_CHILD) ? NOTE_JAIL_CHILD :
317 		    (u_int)hint & NOTE_JAIL_CTRLMASK;
318 	}
319 
320 	/* If the user is interested in this event, record it. */
321 	if (kn->kn_sfflags & event) {
322 		kn->kn_fflags |= event;
323 		/* Report the created jail id or attached process id. */
324 		if (event == NOTE_JAIL_CHILD || event == NOTE_JAIL_ATTACH) {
325 			if (kn->kn_data != 0)
326 				kn->kn_fflags |= NOTE_JAIL_MULTI;
327 			kn->kn_data = (kn->kn_fflags & NOTE_JAIL_MULTI) ? 0U :
328 			    (u_int)hint & ~event;
329 		}
330 	}
331 
332 	/* Prison is gone, so flag the event as finished. */
333 	if (event == NOTE_JAIL_REMOVE) {
334 		kn->kn_flags |= EV_EOF | EV_ONESHOT;
335 		if (kn->kn_fflags == 0)
336 			kn->kn_flags |= EV_DROP;
337 		return (1);
338 	}
339 
340 	return (kn->kn_fflags != 0);
341 }
342 
343 static const struct filterops jaildesc_kqops = {
344 	.f_isfd = 1,
345 	.f_detach = jaildesc_kqops_detach,
346 	.f_event = jaildesc_kqops_event,
347 };
348 
349 static int
jaildesc_kqfilter(struct file * fp,struct knote * kn)350 jaildesc_kqfilter(struct file *fp, struct knote *kn)
351 {
352 	struct jaildesc *jd;
353 
354 	jd = fp->f_data;
355 	switch (kn->kn_filter) {
356 	case EVFILT_JAILDESC:
357 		kn->kn_fop = &jaildesc_kqops;
358 		kn->kn_flags |= EV_CLEAR;
359 		knlist_add(&jd->jd_selinfo.si_note, kn, 0);
360 		return (0);
361 	default:
362 		return (EINVAL);
363 	}
364 }
365 
366 static int
jaildesc_stat(struct file * fp,struct stat * sb,struct ucred * active_cred)367 jaildesc_stat(struct file *fp, struct stat *sb, struct ucred *active_cred)
368 {
369 	struct jaildesc *jd;
370 
371 	bzero(sb, sizeof(struct stat));
372 	jd = fp->f_data;
373 	JAILDESC_LOCK(jd);
374 	if (jd->jd_prison != NULL) {
375 		sb->st_ino = jd->jd_prison->pr_id;
376 		sb->st_mode = S_IFREG | S_IRWXU;
377 	} else
378 		sb->st_mode = S_IFREG;
379 	JAILDESC_UNLOCK(jd);
380 	return (0);
381 }
382 
383 static int
jaildesc_fill_kinfo(struct file * fp,struct kinfo_file * kif,struct filedesc * fdp)384 jaildesc_fill_kinfo(struct file *fp, struct kinfo_file *kif,
385     struct filedesc *fdp)
386 {
387 	struct jaildesc *jd;
388 
389 	jd = fp->f_data;
390 	kif->kf_type = KF_TYPE_JAILDESC;
391 	kif->kf_un.kf_jail.kf_jid = jd->jd_prison ? jd->jd_prison->pr_id : 0;
392 	return (0);
393 }
394 
395 static int
jaildesc_cmp(struct file * fp1,struct file * fp2,struct thread * td)396 jaildesc_cmp(struct file *fp1, struct file *fp2, struct thread *td)
397 {
398 	struct jaildesc *jd1, *jd2;
399 	int jid1, jid2;
400 
401 	if (fp2->f_type != DTYPE_JAILDESC)
402 		return (3);
403 	jd1 = fp1->f_data;
404 	JAILDESC_LOCK(jd1);
405 	jid1 = jd1->jd_prison ? (uintptr_t)jd1->jd_prison->pr_id : 0;
406 	JAILDESC_UNLOCK(jd1);
407 	jd2 = fp2->f_data;
408 	JAILDESC_LOCK(jd2);
409 	jid2 = jd2->jd_prison ? (uintptr_t)jd2->jd_prison->pr_id : 0;
410 	JAILDESC_UNLOCK(jd2);
411 	return (kcmp_cmp(jid1, jid2));
412 }
413