1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2025 James Gritton. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/param.h> 30 #include <sys/fcntl.h> 31 #include <sys/file.h> 32 #include <sys/filedesc.h> 33 #include <sys/kernel.h> 34 #include <sys/jail.h> 35 #include <sys/jaildesc.h> 36 #include <sys/lock.h> 37 #include <sys/malloc.h> 38 #include <sys/mutex.h> 39 #include <sys/poll.h> 40 #include <sys/priv.h> 41 #include <sys/stat.h> 42 #include <sys/sysproto.h> 43 #include <sys/systm.h> 44 #include <sys/ucred.h> 45 #include <sys/user.h> 46 #include <sys/vnode.h> 47 48 MALLOC_DEFINE(M_JAILDESC, "jaildesc", "jail descriptors"); 49 50 static fo_poll_t jaildesc_poll; 51 static fo_kqfilter_t jaildesc_kqfilter; 52 static fo_stat_t jaildesc_stat; 53 static fo_close_t jaildesc_close; 54 static fo_fill_kinfo_t jaildesc_fill_kinfo; 55 static fo_cmp_t jaildesc_cmp; 56 57 static struct fileops jaildesc_ops = { 58 .fo_read = invfo_rdwr, 59 .fo_write = invfo_rdwr, 60 .fo_truncate = invfo_truncate, 61 .fo_ioctl = invfo_ioctl, 62 .fo_poll = jaildesc_poll, 63 .fo_kqfilter = jaildesc_kqfilter, 64 .fo_stat = jaildesc_stat, 65 .fo_close = jaildesc_close, 66 .fo_chmod = invfo_chmod, 67 .fo_chown = invfo_chown, 68 .fo_sendfile = invfo_sendfile, 69 .fo_fill_kinfo = jaildesc_fill_kinfo, 70 .fo_cmp = jaildesc_cmp, 71 .fo_flags = DFLAG_PASSABLE, 72 }; 73 74 /* 75 * Given a jail descriptor number, return its prison and/or its 76 * credential. They are returned held, and will need to be released 77 * by the caller. 78 */ 79 int 80 jaildesc_find(struct thread *td, int fd, struct prison **prp, 81 struct ucred **ucredp) 82 { 83 struct file *fp; 84 struct jaildesc *jd; 85 struct prison *pr; 86 int error; 87 88 error = fget(td, fd, &cap_no_rights, &fp); 89 if (error != 0) 90 return (error); 91 if (fp->f_type != DTYPE_JAILDESC) { 92 error = EINVAL; 93 goto out; 94 } 95 jd = fp->f_data; 96 JAILDESC_LOCK(jd); 97 pr = jd->jd_prison; 98 if (pr == NULL || !prison_isvalid(pr)) { 99 error = ENOENT; 100 JAILDESC_UNLOCK(jd); 101 goto out; 102 } 103 if (prp != NULL) { 104 prison_hold(pr); 105 *prp = pr; 106 } 107 JAILDESC_UNLOCK(jd); 108 if (ucredp != NULL) 109 *ucredp = crhold(fp->f_cred); 110 out: 111 fdrop(fp, td); 112 return (error); 113 } 114 115 /* 116 * Allocate a new jail decriptor, not yet associated with a prison. 117 * Return the file pointer (with a reference held) and the descriptor 118 * number. 119 */ 120 int 121 jaildesc_alloc(struct thread *td, struct file **fpp, int *fdp, int owning) 122 { 123 struct file *fp; 124 struct jaildesc *jd; 125 int error; 126 127 if (owning) { 128 error = priv_check(td, PRIV_JAIL_REMOVE); 129 if (error != 0) 130 return (error); 131 } 132 jd = malloc(sizeof(*jd), M_JAILDESC, M_WAITOK | M_ZERO); 133 error = falloc_caps(td, &fp, fdp, 0, NULL); 134 if (error != 0) { 135 free(jd, M_JAILDESC); 136 return (error); 137 } 138 finit(fp, priv_check_cred(fp->f_cred, PRIV_JAIL_SET) == 0 ? 139 FREAD | FWRITE : FREAD, DTYPE_JAILDESC, jd, &jaildesc_ops); 140 JAILDESC_LOCK_INIT(jd); 141 knlist_init_mtx(&jd->jd_selinfo.si_note, &jd->jd_lock); 142 if (owning) 143 jd->jd_flags |= JDF_OWNING; 144 *fpp = fp; 145 return (0); 146 } 147 148 /* 149 * Assocate a jail descriptor with its prison. 150 */ 151 void 152 jaildesc_set_prison(struct file *fp, struct prison *pr) 153 { 154 struct jaildesc *jd; 155 156 mtx_assert(&pr->pr_mtx, MA_OWNED); 157 jd = fp->f_data; 158 JAILDESC_LOCK(jd); 159 jd->jd_prison = pr; 160 LIST_INSERT_HEAD(&pr->pr_descs, jd, jd_list); 161 prison_hold(pr); 162 JAILDESC_UNLOCK(jd); 163 } 164 165 /* 166 * Detach all the jail descriptors from a prison. 167 */ 168 void 169 jaildesc_prison_cleanup(struct prison *pr) 170 { 171 struct jaildesc *jd; 172 173 mtx_assert(&pr->pr_mtx, MA_OWNED); 174 while ((jd = LIST_FIRST(&pr->pr_descs))) { 175 JAILDESC_LOCK(jd); 176 LIST_REMOVE(jd, jd_list); 177 jd->jd_prison = NULL; 178 JAILDESC_UNLOCK(jd); 179 prison_free(pr); 180 } 181 } 182 183 /* 184 * Pass a note to all listening kqueues. 185 */ 186 void 187 jaildesc_knote(struct prison *pr, long hint) 188 { 189 struct jaildesc *jd; 190 int prison_locked; 191 192 if (!LIST_EMPTY(&pr->pr_descs)) { 193 prison_locked = mtx_owned(&pr->pr_mtx); 194 if (!prison_locked) 195 prison_lock(pr); 196 LIST_FOREACH(jd, &pr->pr_descs, jd_list) { 197 JAILDESC_LOCK(jd); 198 if (hint == NOTE_JAIL_REMOVE) { 199 jd->jd_flags |= JDF_REMOVED; 200 if (jd->jd_flags & JDF_SELECTED) { 201 jd->jd_flags &= ~JDF_SELECTED; 202 selwakeup(&jd->jd_selinfo); 203 } 204 } 205 KNOTE_LOCKED(&jd->jd_selinfo.si_note, hint); 206 JAILDESC_UNLOCK(jd); 207 } 208 if (!prison_locked) 209 prison_unlock(pr); 210 } 211 } 212 213 static int 214 jaildesc_close(struct file *fp, struct thread *td) 215 { 216 struct jaildesc *jd; 217 struct prison *pr; 218 219 jd = fp->f_data; 220 fp->f_data = NULL; 221 if (jd != NULL) { 222 JAILDESC_LOCK(jd); 223 pr = jd->jd_prison; 224 if (pr != NULL) { 225 /* 226 * Free or remove the associated prison. 227 * This requires a second check after re- 228 * ordering locks. This jaildesc can remain 229 * unlocked once we have a prison reference, 230 * because that prison is the only place that 231 * still points back to it. 232 */ 233 prison_hold(pr); 234 JAILDESC_UNLOCK(jd); 235 if (jd->jd_flags & JDF_OWNING) { 236 sx_xlock(&allprison_lock); 237 prison_lock(pr); 238 if (jd->jd_prison != NULL) { 239 /* 240 * Unlink the prison, but don't free 241 * it; that will be done as part of 242 * of prison_remove. 243 */ 244 LIST_REMOVE(jd, jd_list); 245 prison_remove(pr); 246 } else { 247 prison_unlock(pr); 248 sx_xunlock(&allprison_lock); 249 } 250 } else { 251 prison_lock(pr); 252 if (jd->jd_prison != NULL) { 253 LIST_REMOVE(jd, jd_list); 254 prison_free(pr); 255 } 256 prison_unlock(pr); 257 } 258 prison_free(pr); 259 } 260 knlist_destroy(&jd->jd_selinfo.si_note); 261 JAILDESC_LOCK_DESTROY(jd); 262 free(jd, M_JAILDESC); 263 } 264 return (0); 265 } 266 267 static int 268 jaildesc_poll(struct file *fp, int events, struct ucred *active_cred, 269 struct thread *td) 270 { 271 struct jaildesc *jd; 272 int revents; 273 274 revents = 0; 275 jd = fp->f_data; 276 JAILDESC_LOCK(jd); 277 if (jd->jd_flags & JDF_REMOVED) 278 revents |= POLLHUP; 279 if (revents == 0) { 280 selrecord(td, &jd->jd_selinfo); 281 jd->jd_flags |= JDF_SELECTED; 282 } 283 JAILDESC_UNLOCK(jd); 284 return (revents); 285 } 286 287 static void 288 jaildesc_kqops_detach(struct knote *kn) 289 { 290 struct jaildesc *jd; 291 292 jd = kn->kn_fp->f_data; 293 knlist_remove(&jd->jd_selinfo.si_note, kn, 0); 294 } 295 296 static int 297 jaildesc_kqops_event(struct knote *kn, long hint) 298 { 299 struct jaildesc *jd; 300 u_int event; 301 302 jd = kn->kn_fp->f_data; 303 if (hint == 0) { 304 /* 305 * Initial test after registration. Generate a 306 * NOTE_JAIL_REMOVE in case the prison already died 307 * before registration. 308 */ 309 event = jd->jd_flags & JDF_REMOVED ? NOTE_JAIL_REMOVE : 0; 310 } else { 311 /* 312 * Mask off extra data. In the NOTE_JAIL_CHILD case, 313 * that's everything except the NOTE_JAIL_CHILD bit 314 * itself, since a JID is any positive integer. 315 */ 316 event = ((u_int)hint & NOTE_JAIL_CHILD) ? NOTE_JAIL_CHILD : 317 (u_int)hint & NOTE_JAIL_CTRLMASK; 318 } 319 320 /* If the user is interested in this event, record it. */ 321 if (kn->kn_sfflags & event) { 322 kn->kn_fflags |= event; 323 /* Report the created jail id or attached process id. */ 324 if (event == NOTE_JAIL_CHILD || event == NOTE_JAIL_ATTACH) { 325 if (kn->kn_data != 0) 326 kn->kn_fflags |= NOTE_JAIL_MULTI; 327 kn->kn_data = (kn->kn_fflags & NOTE_JAIL_MULTI) ? 0U : 328 (u_int)hint & ~event; 329 } 330 } 331 332 /* Prison is gone, so flag the event as finished. */ 333 if (event == NOTE_JAIL_REMOVE) { 334 kn->kn_flags |= EV_EOF | EV_ONESHOT; 335 if (kn->kn_fflags == 0) 336 kn->kn_flags |= EV_DROP; 337 return (1); 338 } 339 340 return (kn->kn_fflags != 0); 341 } 342 343 static const struct filterops jaildesc_kqops = { 344 .f_isfd = 1, 345 .f_detach = jaildesc_kqops_detach, 346 .f_event = jaildesc_kqops_event, 347 }; 348 349 static int 350 jaildesc_kqfilter(struct file *fp, struct knote *kn) 351 { 352 struct jaildesc *jd; 353 354 jd = fp->f_data; 355 switch (kn->kn_filter) { 356 case EVFILT_JAILDESC: 357 kn->kn_fop = &jaildesc_kqops; 358 kn->kn_flags |= EV_CLEAR; 359 knlist_add(&jd->jd_selinfo.si_note, kn, 0); 360 return (0); 361 default: 362 return (EINVAL); 363 } 364 } 365 366 static int 367 jaildesc_stat(struct file *fp, struct stat *sb, struct ucred *active_cred) 368 { 369 struct jaildesc *jd; 370 371 bzero(sb, sizeof(struct stat)); 372 jd = fp->f_data; 373 JAILDESC_LOCK(jd); 374 if (jd->jd_prison != NULL) { 375 sb->st_ino = jd->jd_prison->pr_id; 376 sb->st_mode = S_IFREG | S_IRWXU; 377 } else 378 sb->st_mode = S_IFREG; 379 JAILDESC_UNLOCK(jd); 380 return (0); 381 } 382 383 static int 384 jaildesc_fill_kinfo(struct file *fp, struct kinfo_file *kif, 385 struct filedesc *fdp) 386 { 387 struct jaildesc *jd; 388 389 jd = fp->f_data; 390 kif->kf_type = KF_TYPE_JAILDESC; 391 kif->kf_un.kf_jail.kf_jid = jd->jd_prison ? jd->jd_prison->pr_id : 0; 392 return (0); 393 } 394 395 static int 396 jaildesc_cmp(struct file *fp1, struct file *fp2, struct thread *td) 397 { 398 struct jaildesc *jd1, *jd2; 399 int jid1, jid2; 400 401 if (fp2->f_type != DTYPE_JAILDESC) 402 return (3); 403 jd1 = fp1->f_data; 404 JAILDESC_LOCK(jd1); 405 jid1 = jd1->jd_prison ? (uintptr_t)jd1->jd_prison->pr_id : 0; 406 JAILDESC_UNLOCK(jd1); 407 jd2 = fp2->f_data; 408 JAILDESC_LOCK(jd2); 409 jid2 = jd2->jd_prison ? (uintptr_t)jd2->jd_prison->pr_id : 0; 410 JAILDESC_UNLOCK(jd2); 411 return (kcmp_cmp(jid1, jid2)); 412 } 413