1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2025 James Gritton.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29 #include <sys/param.h>
30 #include <sys/fcntl.h>
31 #include <sys/file.h>
32 #include <sys/filedesc.h>
33 #include <sys/kernel.h>
34 #include <sys/jail.h>
35 #include <sys/jaildesc.h>
36 #include <sys/lock.h>
37 #include <sys/malloc.h>
38 #include <sys/mutex.h>
39 #include <sys/poll.h>
40 #include <sys/priv.h>
41 #include <sys/stat.h>
42 #include <sys/sysproto.h>
43 #include <sys/systm.h>
44 #include <sys/ucred.h>
45 #include <sys/user.h>
46 #include <sys/vnode.h>
47
48 MALLOC_DEFINE(M_JAILDESC, "jaildesc", "jail descriptors");
49
50 static fo_poll_t jaildesc_poll;
51 static fo_kqfilter_t jaildesc_kqfilter;
52 static fo_stat_t jaildesc_stat;
53 static fo_close_t jaildesc_close;
54 static fo_fill_kinfo_t jaildesc_fill_kinfo;
55 static fo_cmp_t jaildesc_cmp;
56
57 static struct fileops jaildesc_ops = {
58 .fo_read = invfo_rdwr,
59 .fo_write = invfo_rdwr,
60 .fo_truncate = invfo_truncate,
61 .fo_ioctl = invfo_ioctl,
62 .fo_poll = jaildesc_poll,
63 .fo_kqfilter = jaildesc_kqfilter,
64 .fo_stat = jaildesc_stat,
65 .fo_close = jaildesc_close,
66 .fo_chmod = invfo_chmod,
67 .fo_chown = invfo_chown,
68 .fo_sendfile = invfo_sendfile,
69 .fo_fill_kinfo = jaildesc_fill_kinfo,
70 .fo_cmp = jaildesc_cmp,
71 .fo_flags = DFLAG_PASSABLE,
72 };
73
74 /*
75 * Given a jail descriptor number, return its prison and/or its
76 * credential. They are returned held, and will need to be released
77 * by the caller.
78 */
79 int
jaildesc_find(struct thread * td,int fd,struct prison ** prp,struct ucred ** ucredp)80 jaildesc_find(struct thread *td, int fd, struct prison **prp,
81 struct ucred **ucredp)
82 {
83 struct file *fp;
84 struct jaildesc *jd;
85 struct prison *pr;
86 int error;
87
88 error = fget(td, fd, &cap_no_rights, &fp);
89 if (error != 0)
90 return (error);
91 if (fp->f_type != DTYPE_JAILDESC) {
92 error = EINVAL;
93 goto out;
94 }
95 jd = fp->f_data;
96 JAILDESC_LOCK(jd);
97 pr = jd->jd_prison;
98 if (pr == NULL || !prison_isvalid(pr)) {
99 error = ENOENT;
100 JAILDESC_UNLOCK(jd);
101 goto out;
102 }
103 if (prp != NULL) {
104 prison_hold(pr);
105 *prp = pr;
106 }
107 JAILDESC_UNLOCK(jd);
108 if (ucredp != NULL)
109 *ucredp = crhold(fp->f_cred);
110 out:
111 fdrop(fp, td);
112 return (error);
113 }
114
115 /*
116 * Allocate a new jail decriptor, not yet associated with a prison.
117 * Return the file pointer (with a reference held) and the descriptor
118 * number.
119 */
120 int
jaildesc_alloc(struct thread * td,struct file ** fpp,int * fdp,int owning)121 jaildesc_alloc(struct thread *td, struct file **fpp, int *fdp, int owning)
122 {
123 struct file *fp;
124 struct jaildesc *jd;
125 int error;
126
127 if (owning) {
128 error = priv_check(td, PRIV_JAIL_REMOVE);
129 if (error != 0)
130 return (error);
131 }
132 jd = malloc(sizeof(*jd), M_JAILDESC, M_WAITOK | M_ZERO);
133 error = falloc_caps(td, &fp, fdp, 0, NULL);
134 if (error != 0) {
135 free(jd, M_JAILDESC);
136 return (error);
137 }
138 finit(fp, priv_check_cred(fp->f_cred, PRIV_JAIL_SET) == 0 ?
139 FREAD | FWRITE : FREAD, DTYPE_JAILDESC, jd, &jaildesc_ops);
140 JAILDESC_LOCK_INIT(jd);
141 knlist_init_mtx(&jd->jd_selinfo.si_note, &jd->jd_lock);
142 if (owning)
143 jd->jd_flags |= JDF_OWNING;
144 *fpp = fp;
145 return (0);
146 }
147
148 /*
149 * Assocate a jail descriptor with its prison.
150 */
151 void
jaildesc_set_prison(struct file * fp,struct prison * pr)152 jaildesc_set_prison(struct file *fp, struct prison *pr)
153 {
154 struct jaildesc *jd;
155
156 mtx_assert(&pr->pr_mtx, MA_OWNED);
157 jd = fp->f_data;
158 JAILDESC_LOCK(jd);
159 jd->jd_prison = pr;
160 LIST_INSERT_HEAD(&pr->pr_descs, jd, jd_list);
161 prison_hold(pr);
162 JAILDESC_UNLOCK(jd);
163 }
164
165 /*
166 * Detach all the jail descriptors from a prison.
167 */
168 void
jaildesc_prison_cleanup(struct prison * pr)169 jaildesc_prison_cleanup(struct prison *pr)
170 {
171 struct jaildesc *jd;
172
173 mtx_assert(&pr->pr_mtx, MA_OWNED);
174 while ((jd = LIST_FIRST(&pr->pr_descs))) {
175 JAILDESC_LOCK(jd);
176 LIST_REMOVE(jd, jd_list);
177 jd->jd_prison = NULL;
178 JAILDESC_UNLOCK(jd);
179 prison_free(pr);
180 }
181 }
182
183 /*
184 * Pass a note to all listening kqueues.
185 */
186 void
jaildesc_knote(struct prison * pr,long hint)187 jaildesc_knote(struct prison *pr, long hint)
188 {
189 struct jaildesc *jd;
190 int prison_locked;
191
192 if (!LIST_EMPTY(&pr->pr_descs)) {
193 prison_locked = mtx_owned(&pr->pr_mtx);
194 if (!prison_locked)
195 prison_lock(pr);
196 LIST_FOREACH(jd, &pr->pr_descs, jd_list) {
197 JAILDESC_LOCK(jd);
198 if (hint == NOTE_JAIL_REMOVE) {
199 jd->jd_flags |= JDF_REMOVED;
200 if (jd->jd_flags & JDF_SELECTED) {
201 jd->jd_flags &= ~JDF_SELECTED;
202 selwakeup(&jd->jd_selinfo);
203 }
204 }
205 KNOTE_LOCKED(&jd->jd_selinfo.si_note, hint);
206 JAILDESC_UNLOCK(jd);
207 }
208 if (!prison_locked)
209 prison_unlock(pr);
210 }
211 }
212
213 static int
jaildesc_close(struct file * fp,struct thread * td)214 jaildesc_close(struct file *fp, struct thread *td)
215 {
216 struct jaildesc *jd;
217 struct prison *pr;
218
219 jd = fp->f_data;
220 fp->f_data = NULL;
221 if (jd != NULL) {
222 JAILDESC_LOCK(jd);
223 pr = jd->jd_prison;
224 if (pr != NULL) {
225 /*
226 * Free or remove the associated prison.
227 * This requires a second check after re-
228 * ordering locks. This jaildesc can remain
229 * unlocked once we have a prison reference,
230 * because that prison is the only place that
231 * still points back to it.
232 */
233 prison_hold(pr);
234 JAILDESC_UNLOCK(jd);
235 if (jd->jd_flags & JDF_OWNING) {
236 sx_xlock(&allprison_lock);
237 prison_lock(pr);
238 if (jd->jd_prison != NULL) {
239 /*
240 * Unlink the prison, but don't free
241 * it; that will be done as part of
242 * of prison_remove.
243 */
244 LIST_REMOVE(jd, jd_list);
245 prison_remove(pr);
246 } else {
247 prison_unlock(pr);
248 sx_xunlock(&allprison_lock);
249 }
250 } else {
251 prison_lock(pr);
252 if (jd->jd_prison != NULL) {
253 LIST_REMOVE(jd, jd_list);
254 prison_free(pr);
255 }
256 prison_unlock(pr);
257 }
258 prison_free(pr);
259 }
260 knlist_destroy(&jd->jd_selinfo.si_note);
261 JAILDESC_LOCK_DESTROY(jd);
262 free(jd, M_JAILDESC);
263 }
264 return (0);
265 }
266
267 static int
jaildesc_poll(struct file * fp,int events,struct ucred * active_cred,struct thread * td)268 jaildesc_poll(struct file *fp, int events, struct ucred *active_cred,
269 struct thread *td)
270 {
271 struct jaildesc *jd;
272 int revents;
273
274 revents = 0;
275 jd = fp->f_data;
276 JAILDESC_LOCK(jd);
277 if (jd->jd_flags & JDF_REMOVED)
278 revents |= POLLHUP;
279 if (revents == 0) {
280 selrecord(td, &jd->jd_selinfo);
281 jd->jd_flags |= JDF_SELECTED;
282 }
283 JAILDESC_UNLOCK(jd);
284 return (revents);
285 }
286
287 static void
jaildesc_kqops_detach(struct knote * kn)288 jaildesc_kqops_detach(struct knote *kn)
289 {
290 struct jaildesc *jd;
291
292 jd = kn->kn_fp->f_data;
293 knlist_remove(&jd->jd_selinfo.si_note, kn, 0);
294 }
295
296 static int
jaildesc_kqops_event(struct knote * kn,long hint)297 jaildesc_kqops_event(struct knote *kn, long hint)
298 {
299 struct jaildesc *jd;
300 u_int event;
301
302 jd = kn->kn_fp->f_data;
303 if (hint == 0) {
304 /*
305 * Initial test after registration. Generate a
306 * NOTE_JAIL_REMOVE in case the prison already died
307 * before registration.
308 */
309 event = jd->jd_flags & JDF_REMOVED ? NOTE_JAIL_REMOVE : 0;
310 } else {
311 /*
312 * Mask off extra data. In the NOTE_JAIL_CHILD case,
313 * that's everything except the NOTE_JAIL_CHILD bit
314 * itself, since a JID is any positive integer.
315 */
316 event = ((u_int)hint & NOTE_JAIL_CHILD) ? NOTE_JAIL_CHILD :
317 (u_int)hint & NOTE_JAIL_CTRLMASK;
318 }
319
320 /* If the user is interested in this event, record it. */
321 if (kn->kn_sfflags & event) {
322 kn->kn_fflags |= event;
323 /* Report the created jail id or attached process id. */
324 if (event == NOTE_JAIL_CHILD || event == NOTE_JAIL_ATTACH) {
325 if (kn->kn_data != 0)
326 kn->kn_fflags |= NOTE_JAIL_MULTI;
327 kn->kn_data = (kn->kn_fflags & NOTE_JAIL_MULTI) ? 0U :
328 (u_int)hint & ~event;
329 }
330 }
331
332 /* Prison is gone, so flag the event as finished. */
333 if (event == NOTE_JAIL_REMOVE) {
334 kn->kn_flags |= EV_EOF | EV_ONESHOT;
335 if (kn->kn_fflags == 0)
336 kn->kn_flags |= EV_DROP;
337 return (1);
338 }
339
340 return (kn->kn_fflags != 0);
341 }
342
343 static const struct filterops jaildesc_kqops = {
344 .f_isfd = 1,
345 .f_detach = jaildesc_kqops_detach,
346 .f_event = jaildesc_kqops_event,
347 };
348
349 static int
jaildesc_kqfilter(struct file * fp,struct knote * kn)350 jaildesc_kqfilter(struct file *fp, struct knote *kn)
351 {
352 struct jaildesc *jd;
353
354 jd = fp->f_data;
355 switch (kn->kn_filter) {
356 case EVFILT_JAILDESC:
357 kn->kn_fop = &jaildesc_kqops;
358 kn->kn_flags |= EV_CLEAR;
359 knlist_add(&jd->jd_selinfo.si_note, kn, 0);
360 return (0);
361 default:
362 return (EINVAL);
363 }
364 }
365
366 static int
jaildesc_stat(struct file * fp,struct stat * sb,struct ucred * active_cred)367 jaildesc_stat(struct file *fp, struct stat *sb, struct ucred *active_cred)
368 {
369 struct jaildesc *jd;
370
371 bzero(sb, sizeof(struct stat));
372 jd = fp->f_data;
373 JAILDESC_LOCK(jd);
374 if (jd->jd_prison != NULL) {
375 sb->st_ino = jd->jd_prison->pr_id;
376 sb->st_mode = S_IFREG | S_IRWXU;
377 } else
378 sb->st_mode = S_IFREG;
379 JAILDESC_UNLOCK(jd);
380 return (0);
381 }
382
383 static int
jaildesc_fill_kinfo(struct file * fp,struct kinfo_file * kif,struct filedesc * fdp)384 jaildesc_fill_kinfo(struct file *fp, struct kinfo_file *kif,
385 struct filedesc *fdp)
386 {
387 struct jaildesc *jd;
388
389 jd = fp->f_data;
390 kif->kf_type = KF_TYPE_JAILDESC;
391 kif->kf_un.kf_jail.kf_jid = jd->jd_prison ? jd->jd_prison->pr_id : 0;
392 return (0);
393 }
394
395 static int
jaildesc_cmp(struct file * fp1,struct file * fp2,struct thread * td)396 jaildesc_cmp(struct file *fp1, struct file *fp2, struct thread *td)
397 {
398 struct jaildesc *jd1, *jd2;
399 int jid1, jid2;
400
401 if (fp2->f_type != DTYPE_JAILDESC)
402 return (3);
403 jd1 = fp1->f_data;
404 JAILDESC_LOCK(jd1);
405 jid1 = jd1->jd_prison ? (uintptr_t)jd1->jd_prison->pr_id : 0;
406 JAILDESC_UNLOCK(jd1);
407 jd2 = fp2->f_data;
408 JAILDESC_LOCK(jd2);
409 jid2 = jd2->jd_prison ? (uintptr_t)jd2->jd_prison->pr_id : 0;
410 JAILDESC_UNLOCK(jd2);
411 return (kcmp_cmp(jid1, jid2));
412 }
413