1 /*- 2 * Implementation of SVID messages 3 * 4 * Author: Daniel Boulet 5 * 6 * Copyright 1993 Daniel Boulet and RTMX Inc. 7 * 8 * This system call was implemented by Daniel Boulet under contract from RTMX. 9 * 10 * Redistribution and use in source forms, with and without modification, 11 * are permitted provided that this entire comment appears intact. 12 * 13 * Redistribution in binary form may occur without any restrictions. 14 * Obviously, it would be nice if you gave credit where credit is due 15 * but requiring it would be too onerous. 16 * 17 * This software is provided ``AS IS'' without any warranties of any kind. 18 */ 19 /*- 20 * Copyright (c) 2003-2005 McAfee, Inc. 21 * All rights reserved. 22 * 23 * This software was developed for the FreeBSD Project in part by McAfee 24 * Research, the Security Research Division of McAfee, Inc under DARPA/SPAWAR 25 * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS research 26 * program. 27 * 28 * Redistribution and use in source and binary forms, with or without 29 * modification, are permitted provided that the following conditions 30 * are met: 31 * 1. Redistributions of source code must retain the above copyright 32 * notice, this list of conditions and the following disclaimer. 33 * 2. Redistributions in binary form must reproduce the above copyright 34 * notice, this list of conditions and the following disclaimer in the 35 * documentation and/or other materials provided with the distribution. 36 * 37 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 38 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 39 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 40 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 41 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 42 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 43 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 44 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 45 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 46 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 47 * SUCH DAMAGE. 48 */ 49 50 #include <sys/cdefs.h> 51 __FBSDID("$FreeBSD$"); 52 53 #include "opt_compat.h" 54 #include "opt_sysvipc.h" 55 56 #include <sys/param.h> 57 #include <sys/systm.h> 58 #include <sys/sysproto.h> 59 #include <sys/kernel.h> 60 #include <sys/priv.h> 61 #include <sys/proc.h> 62 #include <sys/lock.h> 63 #include <sys/mutex.h> 64 #include <sys/module.h> 65 #include <sys/mount.h> 66 #include <sys/msg.h> 67 #include <sys/racct.h> 68 #include <sys/sx.h> 69 #include <sys/syscall.h> 70 #include <sys/syscallsubr.h> 71 #include <sys/sysent.h> 72 #include <sys/sysctl.h> 73 #include <sys/malloc.h> 74 #include <sys/jail.h> 75 76 #include <security/mac/mac_framework.h> 77 78 FEATURE(sysv_msg, "System V message queues support"); 79 80 static MALLOC_DEFINE(M_MSG, "msg", "SVID compatible message queues"); 81 82 static int msginit(void); 83 static int msgunload(void); 84 static int sysvmsg_modload(struct module *, int, void *); 85 static void msq_remove(struct msqid_kernel *); 86 static struct prison *msg_find_prison(struct ucred *); 87 static int msq_prison_cansee(struct prison *, struct msqid_kernel *); 88 static int msg_prison_check(void *, void *); 89 static int msg_prison_set(void *, void *); 90 static int msg_prison_get(void *, void *); 91 static int msg_prison_remove(void *, void *); 92 static void msg_prison_cleanup(struct prison *); 93 94 95 #ifdef MSG_DEBUG 96 #define DPRINTF(a) printf a 97 #else 98 #define DPRINTF(a) (void)0 99 #endif 100 101 static void msg_freehdr(struct msg *msghdr); 102 103 #ifndef MSGSSZ 104 #define MSGSSZ 8 /* Each segment must be 2^N long */ 105 #endif 106 #ifndef MSGSEG 107 #define MSGSEG 2048 /* must be less than 32767 */ 108 #endif 109 #define MSGMAX (MSGSSZ*MSGSEG) 110 #ifndef MSGMNB 111 #define MSGMNB 2048 /* max # of bytes in a queue */ 112 #endif 113 #ifndef MSGMNI 114 #define MSGMNI 40 115 #endif 116 #ifndef MSGTQL 117 #define MSGTQL 40 118 #endif 119 120 /* 121 * Based on the configuration parameters described in an SVR2 (yes, two) 122 * config(1m) man page. 123 * 124 * Each message is broken up and stored in segments that are msgssz bytes 125 * long. For efficiency reasons, this should be a power of two. Also, 126 * it doesn't make sense if it is less than 8 or greater than about 256. 127 * Consequently, msginit in kern/sysv_msg.c checks that msgssz is a power of 128 * two between 8 and 1024 inclusive (and panic's if it isn't). 129 */ 130 struct msginfo msginfo = { 131 MSGMAX, /* max chars in a message */ 132 MSGMNI, /* # of message queue identifiers */ 133 MSGMNB, /* max chars in a queue */ 134 MSGTQL, /* max messages in system */ 135 MSGSSZ, /* size of a message segment */ 136 /* (must be small power of 2 greater than 4) */ 137 MSGSEG /* number of message segments */ 138 }; 139 140 /* 141 * macros to convert between msqid_ds's and msqid's. 142 * (specific to this implementation) 143 */ 144 #define MSQID(ix,ds) ((ix) & 0xffff | (((ds).msg_perm.seq << 16) & 0xffff0000)) 145 #define MSQID_IX(id) ((id) & 0xffff) 146 #define MSQID_SEQ(id) (((id) >> 16) & 0xffff) 147 148 /* 149 * The rest of this file is specific to this particular implementation. 150 */ 151 152 struct msgmap { 153 short next; /* next segment in buffer */ 154 /* -1 -> available */ 155 /* 0..(MSGSEG-1) -> index of next segment */ 156 }; 157 158 #define MSG_LOCKED 01000 /* Is this msqid_ds locked? */ 159 160 static int nfree_msgmaps; /* # of free map entries */ 161 static short free_msgmaps; /* head of linked list of free map entries */ 162 static struct msg *free_msghdrs;/* list of free msg headers */ 163 static char *msgpool; /* MSGMAX byte long msg buffer pool */ 164 static struct msgmap *msgmaps; /* MSGSEG msgmap structures */ 165 static struct msg *msghdrs; /* MSGTQL msg headers */ 166 static struct msqid_kernel *msqids; /* MSGMNI msqid_kernel struct's */ 167 static struct mtx msq_mtx; /* global mutex for message queues. */ 168 static unsigned msg_prison_slot;/* prison OSD slot */ 169 170 static struct syscall_helper_data msg_syscalls[] = { 171 SYSCALL_INIT_HELPER(msgctl), 172 SYSCALL_INIT_HELPER(msgget), 173 SYSCALL_INIT_HELPER(msgsnd), 174 SYSCALL_INIT_HELPER(msgrcv), 175 #if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \ 176 defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7) 177 SYSCALL_INIT_HELPER(msgsys), 178 SYSCALL_INIT_HELPER_COMPAT(freebsd7_msgctl), 179 #endif 180 SYSCALL_INIT_LAST 181 }; 182 183 #ifdef COMPAT_FREEBSD32 184 #include <compat/freebsd32/freebsd32.h> 185 #include <compat/freebsd32/freebsd32_ipc.h> 186 #include <compat/freebsd32/freebsd32_proto.h> 187 #include <compat/freebsd32/freebsd32_signal.h> 188 #include <compat/freebsd32/freebsd32_syscall.h> 189 #include <compat/freebsd32/freebsd32_util.h> 190 191 static struct syscall_helper_data msg32_syscalls[] = { 192 SYSCALL32_INIT_HELPER(freebsd32_msgctl), 193 SYSCALL32_INIT_HELPER(freebsd32_msgsnd), 194 SYSCALL32_INIT_HELPER(freebsd32_msgrcv), 195 SYSCALL32_INIT_HELPER_COMPAT(msgget), 196 SYSCALL32_INIT_HELPER(freebsd32_msgsys), 197 #if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \ 198 defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7) 199 SYSCALL32_INIT_HELPER(freebsd7_freebsd32_msgctl), 200 #endif 201 SYSCALL_INIT_LAST 202 }; 203 #endif 204 205 static int 206 msginit() 207 { 208 struct prison *pr; 209 void **rsv; 210 int i, error; 211 osd_method_t methods[PR_MAXMETHOD] = { 212 [PR_METHOD_CHECK] = msg_prison_check, 213 [PR_METHOD_SET] = msg_prison_set, 214 [PR_METHOD_GET] = msg_prison_get, 215 [PR_METHOD_REMOVE] = msg_prison_remove, 216 }; 217 218 msginfo.msgmax = msginfo.msgseg * msginfo.msgssz; 219 msgpool = malloc(msginfo.msgmax, M_MSG, M_WAITOK); 220 msgmaps = malloc(sizeof(struct msgmap) * msginfo.msgseg, M_MSG, M_WAITOK); 221 msghdrs = malloc(sizeof(struct msg) * msginfo.msgtql, M_MSG, M_WAITOK); 222 msqids = malloc(sizeof(struct msqid_kernel) * msginfo.msgmni, M_MSG, 223 M_WAITOK); 224 225 /* 226 * msginfo.msgssz should be a power of two for efficiency reasons. 227 * It is also pretty silly if msginfo.msgssz is less than 8 228 * or greater than about 256 so ... 229 */ 230 231 i = 8; 232 while (i < 1024 && i != msginfo.msgssz) 233 i <<= 1; 234 if (i != msginfo.msgssz) { 235 DPRINTF(("msginfo.msgssz=%d (0x%x)\n", msginfo.msgssz, 236 msginfo.msgssz)); 237 panic("msginfo.msgssz not a small power of 2"); 238 } 239 240 if (msginfo.msgseg > 32767) { 241 DPRINTF(("msginfo.msgseg=%d\n", msginfo.msgseg)); 242 panic("msginfo.msgseg > 32767"); 243 } 244 245 for (i = 0; i < msginfo.msgseg; i++) { 246 if (i > 0) 247 msgmaps[i-1].next = i; 248 msgmaps[i].next = -1; /* implies entry is available */ 249 } 250 free_msgmaps = 0; 251 nfree_msgmaps = msginfo.msgseg; 252 253 for (i = 0; i < msginfo.msgtql; i++) { 254 msghdrs[i].msg_type = 0; 255 if (i > 0) 256 msghdrs[i-1].msg_next = &msghdrs[i]; 257 msghdrs[i].msg_next = NULL; 258 #ifdef MAC 259 mac_sysvmsg_init(&msghdrs[i]); 260 #endif 261 } 262 free_msghdrs = &msghdrs[0]; 263 264 for (i = 0; i < msginfo.msgmni; i++) { 265 msqids[i].u.msg_qbytes = 0; /* implies entry is available */ 266 msqids[i].u.msg_perm.seq = 0; /* reset to a known value */ 267 msqids[i].u.msg_perm.mode = 0; 268 #ifdef MAC 269 mac_sysvmsq_init(&msqids[i]); 270 #endif 271 } 272 mtx_init(&msq_mtx, "msq", NULL, MTX_DEF); 273 274 /* Set current prisons according to their allow.sysvipc. */ 275 msg_prison_slot = osd_jail_register(NULL, methods); 276 rsv = osd_reserve(msg_prison_slot); 277 prison_lock(&prison0); 278 (void)osd_jail_set_reserved(&prison0, msg_prison_slot, rsv, &prison0); 279 prison_unlock(&prison0); 280 rsv = NULL; 281 sx_slock(&allprison_lock); 282 TAILQ_FOREACH(pr, &allprison, pr_list) { 283 if (rsv == NULL) 284 rsv = osd_reserve(msg_prison_slot); 285 prison_lock(pr); 286 if ((pr->pr_allow & PR_ALLOW_SYSVIPC) && pr->pr_ref > 0) { 287 (void)osd_jail_set_reserved(pr, msg_prison_slot, rsv, 288 &prison0); 289 rsv = NULL; 290 } 291 prison_unlock(pr); 292 } 293 if (rsv != NULL) 294 osd_free_reserved(rsv); 295 sx_sunlock(&allprison_lock); 296 297 error = syscall_helper_register(msg_syscalls, SY_THR_STATIC_KLD); 298 if (error != 0) 299 return (error); 300 #ifdef COMPAT_FREEBSD32 301 error = syscall32_helper_register(msg32_syscalls, SY_THR_STATIC_KLD); 302 if (error != 0) 303 return (error); 304 #endif 305 return (0); 306 } 307 308 static int 309 msgunload() 310 { 311 struct msqid_kernel *msqkptr; 312 int msqid; 313 #ifdef MAC 314 int i; 315 #endif 316 317 syscall_helper_unregister(msg_syscalls); 318 #ifdef COMPAT_FREEBSD32 319 syscall32_helper_unregister(msg32_syscalls); 320 #endif 321 322 for (msqid = 0; msqid < msginfo.msgmni; msqid++) { 323 /* 324 * Look for an unallocated and unlocked msqid_ds. 325 * msqid_ds's can be locked by msgsnd or msgrcv while 326 * they are copying the message in/out. We can't 327 * re-use the entry until they release it. 328 */ 329 msqkptr = &msqids[msqid]; 330 if (msqkptr->u.msg_qbytes != 0 || 331 (msqkptr->u.msg_perm.mode & MSG_LOCKED) != 0) 332 break; 333 } 334 if (msqid != msginfo.msgmni) 335 return (EBUSY); 336 337 if (msg_prison_slot != 0) 338 osd_jail_deregister(msg_prison_slot); 339 #ifdef MAC 340 for (i = 0; i < msginfo.msgtql; i++) 341 mac_sysvmsg_destroy(&msghdrs[i]); 342 for (msqid = 0; msqid < msginfo.msgmni; msqid++) 343 mac_sysvmsq_destroy(&msqids[msqid]); 344 #endif 345 free(msgpool, M_MSG); 346 free(msgmaps, M_MSG); 347 free(msghdrs, M_MSG); 348 free(msqids, M_MSG); 349 mtx_destroy(&msq_mtx); 350 return (0); 351 } 352 353 354 static int 355 sysvmsg_modload(struct module *module, int cmd, void *arg) 356 { 357 int error = 0; 358 359 switch (cmd) { 360 case MOD_LOAD: 361 error = msginit(); 362 if (error != 0) 363 msgunload(); 364 break; 365 case MOD_UNLOAD: 366 error = msgunload(); 367 break; 368 case MOD_SHUTDOWN: 369 break; 370 default: 371 error = EINVAL; 372 break; 373 } 374 return (error); 375 } 376 377 static moduledata_t sysvmsg_mod = { 378 "sysvmsg", 379 &sysvmsg_modload, 380 NULL 381 }; 382 383 DECLARE_MODULE(sysvmsg, sysvmsg_mod, SI_SUB_SYSV_MSG, SI_ORDER_FIRST); 384 MODULE_VERSION(sysvmsg, 1); 385 386 static void 387 msg_freehdr(msghdr) 388 struct msg *msghdr; 389 { 390 while (msghdr->msg_ts > 0) { 391 short next; 392 if (msghdr->msg_spot < 0 || msghdr->msg_spot >= msginfo.msgseg) 393 panic("msghdr->msg_spot out of range"); 394 next = msgmaps[msghdr->msg_spot].next; 395 msgmaps[msghdr->msg_spot].next = free_msgmaps; 396 free_msgmaps = msghdr->msg_spot; 397 nfree_msgmaps++; 398 msghdr->msg_spot = next; 399 if (msghdr->msg_ts >= msginfo.msgssz) 400 msghdr->msg_ts -= msginfo.msgssz; 401 else 402 msghdr->msg_ts = 0; 403 } 404 if (msghdr->msg_spot != -1) 405 panic("msghdr->msg_spot != -1"); 406 msghdr->msg_next = free_msghdrs; 407 free_msghdrs = msghdr; 408 #ifdef MAC 409 mac_sysvmsg_cleanup(msghdr); 410 #endif 411 } 412 413 static void 414 msq_remove(struct msqid_kernel *msqkptr) 415 { 416 struct msg *msghdr; 417 418 racct_sub_cred(msqkptr->cred, RACCT_NMSGQ, 1); 419 racct_sub_cred(msqkptr->cred, RACCT_MSGQQUEUED, msqkptr->u.msg_qnum); 420 racct_sub_cred(msqkptr->cred, RACCT_MSGQSIZE, msqkptr->u.msg_cbytes); 421 crfree(msqkptr->cred); 422 msqkptr->cred = NULL; 423 424 /* Free the message headers */ 425 msghdr = msqkptr->u.msg_first; 426 while (msghdr != NULL) { 427 struct msg *msghdr_tmp; 428 429 /* Free the segments of each message */ 430 msqkptr->u.msg_cbytes -= msghdr->msg_ts; 431 msqkptr->u.msg_qnum--; 432 msghdr_tmp = msghdr; 433 msghdr = msghdr->msg_next; 434 msg_freehdr(msghdr_tmp); 435 } 436 437 if (msqkptr->u.msg_cbytes != 0) 438 panic("msg_cbytes is screwed up"); 439 if (msqkptr->u.msg_qnum != 0) 440 panic("msg_qnum is screwed up"); 441 442 msqkptr->u.msg_qbytes = 0; /* Mark it as free */ 443 444 #ifdef MAC 445 mac_sysvmsq_cleanup(msqkptr); 446 #endif 447 448 wakeup(msqkptr); 449 } 450 451 static struct prison * 452 msg_find_prison(struct ucred *cred) 453 { 454 struct prison *pr, *rpr; 455 456 pr = cred->cr_prison; 457 prison_lock(pr); 458 rpr = osd_jail_get(pr, msg_prison_slot); 459 prison_unlock(pr); 460 return rpr; 461 } 462 463 static int 464 msq_prison_cansee(struct prison *rpr, struct msqid_kernel *msqkptr) 465 { 466 467 if (msqkptr->cred == NULL || 468 !(rpr == msqkptr->cred->cr_prison || 469 prison_ischild(rpr, msqkptr->cred->cr_prison))) 470 return (EINVAL); 471 return (0); 472 } 473 474 #ifndef _SYS_SYSPROTO_H_ 475 struct msgctl_args { 476 int msqid; 477 int cmd; 478 struct msqid_ds *buf; 479 }; 480 #endif 481 int 482 sys_msgctl(td, uap) 483 struct thread *td; 484 register struct msgctl_args *uap; 485 { 486 int msqid = uap->msqid; 487 int cmd = uap->cmd; 488 struct msqid_ds msqbuf; 489 int error; 490 491 DPRINTF(("call to msgctl(%d, %d, %p)\n", msqid, cmd, uap->buf)); 492 if (cmd == IPC_SET && 493 (error = copyin(uap->buf, &msqbuf, sizeof(msqbuf))) != 0) 494 return (error); 495 error = kern_msgctl(td, msqid, cmd, &msqbuf); 496 if (cmd == IPC_STAT && error == 0) 497 error = copyout(&msqbuf, uap->buf, sizeof(struct msqid_ds)); 498 return (error); 499 } 500 501 int 502 kern_msgctl(td, msqid, cmd, msqbuf) 503 struct thread *td; 504 int msqid; 505 int cmd; 506 struct msqid_ds *msqbuf; 507 { 508 int rval, error, msqix; 509 register struct msqid_kernel *msqkptr; 510 struct prison *rpr; 511 512 rpr = msg_find_prison(td->td_ucred); 513 if (rpr == NULL) 514 return (ENOSYS); 515 516 msqix = IPCID_TO_IX(msqid); 517 518 if (msqix < 0 || msqix >= msginfo.msgmni) { 519 DPRINTF(("msqid (%d) out of range (0<=msqid<%d)\n", msqix, 520 msginfo.msgmni)); 521 return (EINVAL); 522 } 523 524 msqkptr = &msqids[msqix]; 525 526 mtx_lock(&msq_mtx); 527 if (msqkptr->u.msg_qbytes == 0) { 528 DPRINTF(("no such msqid\n")); 529 error = EINVAL; 530 goto done2; 531 } 532 if (msqkptr->u.msg_perm.seq != IPCID_TO_SEQ(msqid)) { 533 DPRINTF(("wrong sequence number\n")); 534 error = EINVAL; 535 goto done2; 536 } 537 538 error = msq_prison_cansee(rpr, msqkptr); 539 if (error != 0) { 540 DPRINTF(("requester can't see prison\n")); 541 goto done2; 542 } 543 544 #ifdef MAC 545 error = mac_sysvmsq_check_msqctl(td->td_ucred, msqkptr, cmd); 546 if (error != 0) 547 goto done2; 548 #endif 549 550 error = 0; 551 rval = 0; 552 553 switch (cmd) { 554 555 case IPC_RMID: 556 { 557 #ifdef MAC 558 struct msg *msghdr; 559 #endif 560 if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_M))) 561 goto done2; 562 563 #ifdef MAC 564 /* 565 * Check that the thread has MAC access permissions to 566 * individual msghdrs. Note: We need to do this in a 567 * separate loop because the actual loop alters the 568 * msq/msghdr info as it progresses, and there is no going 569 * back if half the way through we discover that the 570 * thread cannot free a certain msghdr. The msq will get 571 * into an inconsistent state. 572 */ 573 for (msghdr = msqkptr->u.msg_first; msghdr != NULL; 574 msghdr = msghdr->msg_next) { 575 error = mac_sysvmsq_check_msgrmid(td->td_ucred, msghdr); 576 if (error != 0) 577 goto done2; 578 } 579 #endif 580 581 msq_remove(msqkptr); 582 } 583 584 break; 585 586 case IPC_SET: 587 if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_M))) 588 goto done2; 589 if (msqbuf->msg_qbytes > msqkptr->u.msg_qbytes) { 590 error = priv_check(td, PRIV_IPC_MSGSIZE); 591 if (error) 592 goto done2; 593 } 594 if (msqbuf->msg_qbytes > msginfo.msgmnb) { 595 DPRINTF(("can't increase msg_qbytes beyond %d" 596 "(truncating)\n", msginfo.msgmnb)); 597 msqbuf->msg_qbytes = msginfo.msgmnb; /* silently restrict qbytes to system limit */ 598 } 599 if (msqbuf->msg_qbytes == 0) { 600 DPRINTF(("can't reduce msg_qbytes to 0\n")); 601 error = EINVAL; /* non-standard errno! */ 602 goto done2; 603 } 604 msqkptr->u.msg_perm.uid = msqbuf->msg_perm.uid; /* change the owner */ 605 msqkptr->u.msg_perm.gid = msqbuf->msg_perm.gid; /* change the owner */ 606 msqkptr->u.msg_perm.mode = (msqkptr->u.msg_perm.mode & ~0777) | 607 (msqbuf->msg_perm.mode & 0777); 608 msqkptr->u.msg_qbytes = msqbuf->msg_qbytes; 609 msqkptr->u.msg_ctime = time_second; 610 break; 611 612 case IPC_STAT: 613 if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_R))) { 614 DPRINTF(("requester doesn't have read access\n")); 615 goto done2; 616 } 617 *msqbuf = msqkptr->u; 618 if (td->td_ucred->cr_prison != msqkptr->cred->cr_prison) 619 msqbuf->msg_perm.key = IPC_PRIVATE; 620 break; 621 622 default: 623 DPRINTF(("invalid command %d\n", cmd)); 624 error = EINVAL; 625 goto done2; 626 } 627 628 if (error == 0) 629 td->td_retval[0] = rval; 630 done2: 631 mtx_unlock(&msq_mtx); 632 return (error); 633 } 634 635 #ifndef _SYS_SYSPROTO_H_ 636 struct msgget_args { 637 key_t key; 638 int msgflg; 639 }; 640 #endif 641 642 int 643 sys_msgget(td, uap) 644 struct thread *td; 645 register struct msgget_args *uap; 646 { 647 int msqid, error = 0; 648 int key = uap->key; 649 int msgflg = uap->msgflg; 650 struct ucred *cred = td->td_ucred; 651 register struct msqid_kernel *msqkptr = NULL; 652 653 DPRINTF(("msgget(0x%x, 0%o)\n", key, msgflg)); 654 655 if (msg_find_prison(cred) == NULL) 656 return (ENOSYS); 657 658 mtx_lock(&msq_mtx); 659 if (key != IPC_PRIVATE) { 660 for (msqid = 0; msqid < msginfo.msgmni; msqid++) { 661 msqkptr = &msqids[msqid]; 662 if (msqkptr->u.msg_qbytes != 0 && 663 msqkptr->cred != NULL && 664 msqkptr->cred->cr_prison == cred->cr_prison && 665 msqkptr->u.msg_perm.key == key) 666 break; 667 } 668 if (msqid < msginfo.msgmni) { 669 DPRINTF(("found public key\n")); 670 if ((msgflg & IPC_CREAT) && (msgflg & IPC_EXCL)) { 671 DPRINTF(("not exclusive\n")); 672 error = EEXIST; 673 goto done2; 674 } 675 if ((error = ipcperm(td, &msqkptr->u.msg_perm, 676 msgflg & 0700))) { 677 DPRINTF(("requester doesn't have 0%o access\n", 678 msgflg & 0700)); 679 goto done2; 680 } 681 #ifdef MAC 682 error = mac_sysvmsq_check_msqget(cred, msqkptr); 683 if (error != 0) 684 goto done2; 685 #endif 686 goto found; 687 } 688 } 689 690 DPRINTF(("need to allocate the msqid_ds\n")); 691 if (key == IPC_PRIVATE || (msgflg & IPC_CREAT)) { 692 for (msqid = 0; msqid < msginfo.msgmni; msqid++) { 693 /* 694 * Look for an unallocated and unlocked msqid_ds. 695 * msqid_ds's can be locked by msgsnd or msgrcv while 696 * they are copying the message in/out. We can't 697 * re-use the entry until they release it. 698 */ 699 msqkptr = &msqids[msqid]; 700 if (msqkptr->u.msg_qbytes == 0 && 701 (msqkptr->u.msg_perm.mode & MSG_LOCKED) == 0) 702 break; 703 } 704 if (msqid == msginfo.msgmni) { 705 DPRINTF(("no more msqid_ds's available\n")); 706 error = ENOSPC; 707 goto done2; 708 } 709 #ifdef RACCT 710 if (racct_enable) { 711 PROC_LOCK(td->td_proc); 712 error = racct_add(td->td_proc, RACCT_NMSGQ, 1); 713 PROC_UNLOCK(td->td_proc); 714 if (error != 0) { 715 error = ENOSPC; 716 goto done2; 717 } 718 } 719 #endif 720 DPRINTF(("msqid %d is available\n", msqid)); 721 msqkptr->u.msg_perm.key = key; 722 msqkptr->u.msg_perm.cuid = cred->cr_uid; 723 msqkptr->u.msg_perm.uid = cred->cr_uid; 724 msqkptr->u.msg_perm.cgid = cred->cr_gid; 725 msqkptr->u.msg_perm.gid = cred->cr_gid; 726 msqkptr->u.msg_perm.mode = (msgflg & 0777); 727 msqkptr->cred = crhold(cred); 728 /* Make sure that the returned msqid is unique */ 729 msqkptr->u.msg_perm.seq = (msqkptr->u.msg_perm.seq + 1) & 0x7fff; 730 msqkptr->u.msg_first = NULL; 731 msqkptr->u.msg_last = NULL; 732 msqkptr->u.msg_cbytes = 0; 733 msqkptr->u.msg_qnum = 0; 734 msqkptr->u.msg_qbytes = msginfo.msgmnb; 735 msqkptr->u.msg_lspid = 0; 736 msqkptr->u.msg_lrpid = 0; 737 msqkptr->u.msg_stime = 0; 738 msqkptr->u.msg_rtime = 0; 739 msqkptr->u.msg_ctime = time_second; 740 #ifdef MAC 741 mac_sysvmsq_create(cred, msqkptr); 742 #endif 743 } else { 744 DPRINTF(("didn't find it and wasn't asked to create it\n")); 745 error = ENOENT; 746 goto done2; 747 } 748 749 found: 750 /* Construct the unique msqid */ 751 td->td_retval[0] = IXSEQ_TO_IPCID(msqid, msqkptr->u.msg_perm); 752 done2: 753 mtx_unlock(&msq_mtx); 754 return (error); 755 } 756 757 #ifndef _SYS_SYSPROTO_H_ 758 struct msgsnd_args { 759 int msqid; 760 const void *msgp; 761 size_t msgsz; 762 int msgflg; 763 }; 764 #endif 765 int 766 kern_msgsnd(td, msqid, msgp, msgsz, msgflg, mtype) 767 struct thread *td; 768 int msqid; 769 const void *msgp; /* XXX msgp is actually mtext. */ 770 size_t msgsz; 771 int msgflg; 772 long mtype; 773 { 774 int msqix, segs_needed, error = 0; 775 register struct msqid_kernel *msqkptr; 776 register struct msg *msghdr; 777 struct prison *rpr; 778 short next; 779 #ifdef RACCT 780 size_t saved_msgsz; 781 #endif 782 783 rpr = msg_find_prison(td->td_ucred); 784 if (rpr == NULL) 785 return (ENOSYS); 786 787 mtx_lock(&msq_mtx); 788 msqix = IPCID_TO_IX(msqid); 789 790 if (msqix < 0 || msqix >= msginfo.msgmni) { 791 DPRINTF(("msqid (%d) out of range (0<=msqid<%d)\n", msqix, 792 msginfo.msgmni)); 793 error = EINVAL; 794 goto done2; 795 } 796 797 msqkptr = &msqids[msqix]; 798 if (msqkptr->u.msg_qbytes == 0) { 799 DPRINTF(("no such message queue id\n")); 800 error = EINVAL; 801 goto done2; 802 } 803 if (msqkptr->u.msg_perm.seq != IPCID_TO_SEQ(msqid)) { 804 DPRINTF(("wrong sequence number\n")); 805 error = EINVAL; 806 goto done2; 807 } 808 809 if ((error = msq_prison_cansee(rpr, msqkptr))) { 810 DPRINTF(("requester can't see prison\n")); 811 goto done2; 812 } 813 814 if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_W))) { 815 DPRINTF(("requester doesn't have write access\n")); 816 goto done2; 817 } 818 819 #ifdef MAC 820 error = mac_sysvmsq_check_msqsnd(td->td_ucred, msqkptr); 821 if (error != 0) 822 goto done2; 823 #endif 824 825 #ifdef RACCT 826 if (racct_enable) { 827 PROC_LOCK(td->td_proc); 828 if (racct_add(td->td_proc, RACCT_MSGQQUEUED, 1)) { 829 PROC_UNLOCK(td->td_proc); 830 error = EAGAIN; 831 goto done2; 832 } 833 saved_msgsz = msgsz; 834 if (racct_add(td->td_proc, RACCT_MSGQSIZE, msgsz)) { 835 racct_sub(td->td_proc, RACCT_MSGQQUEUED, 1); 836 PROC_UNLOCK(td->td_proc); 837 error = EAGAIN; 838 goto done2; 839 } 840 PROC_UNLOCK(td->td_proc); 841 } 842 #endif 843 844 segs_needed = howmany(msgsz, msginfo.msgssz); 845 DPRINTF(("msgsz=%zu, msgssz=%d, segs_needed=%d\n", msgsz, 846 msginfo.msgssz, segs_needed)); 847 for (;;) { 848 int need_more_resources = 0; 849 850 /* 851 * check msgsz 852 * (inside this loop in case msg_qbytes changes while we sleep) 853 */ 854 855 if (msgsz > msqkptr->u.msg_qbytes) { 856 DPRINTF(("msgsz > msqkptr->u.msg_qbytes\n")); 857 error = EINVAL; 858 goto done3; 859 } 860 861 if (msqkptr->u.msg_perm.mode & MSG_LOCKED) { 862 DPRINTF(("msqid is locked\n")); 863 need_more_resources = 1; 864 } 865 if (msgsz + msqkptr->u.msg_cbytes > msqkptr->u.msg_qbytes) { 866 DPRINTF(("msgsz + msg_cbytes > msg_qbytes\n")); 867 need_more_resources = 1; 868 } 869 if (segs_needed > nfree_msgmaps) { 870 DPRINTF(("segs_needed > nfree_msgmaps\n")); 871 need_more_resources = 1; 872 } 873 if (free_msghdrs == NULL) { 874 DPRINTF(("no more msghdrs\n")); 875 need_more_resources = 1; 876 } 877 878 if (need_more_resources) { 879 int we_own_it; 880 881 if ((msgflg & IPC_NOWAIT) != 0) { 882 DPRINTF(("need more resources but caller " 883 "doesn't want to wait\n")); 884 error = EAGAIN; 885 goto done3; 886 } 887 888 if ((msqkptr->u.msg_perm.mode & MSG_LOCKED) != 0) { 889 DPRINTF(("we don't own the msqid_ds\n")); 890 we_own_it = 0; 891 } else { 892 /* Force later arrivals to wait for our 893 request */ 894 DPRINTF(("we own the msqid_ds\n")); 895 msqkptr->u.msg_perm.mode |= MSG_LOCKED; 896 we_own_it = 1; 897 } 898 DPRINTF(("msgsnd: goodnight\n")); 899 error = msleep(msqkptr, &msq_mtx, (PZERO - 4) | PCATCH, 900 "msgsnd", hz); 901 DPRINTF(("msgsnd: good morning, error=%d\n", error)); 902 if (we_own_it) 903 msqkptr->u.msg_perm.mode &= ~MSG_LOCKED; 904 if (error == EWOULDBLOCK) { 905 DPRINTF(("msgsnd: timed out\n")); 906 continue; 907 } 908 if (error != 0) { 909 DPRINTF(("msgsnd: interrupted system call\n")); 910 error = EINTR; 911 goto done3; 912 } 913 914 /* 915 * Make sure that the msq queue still exists 916 */ 917 918 if (msqkptr->u.msg_qbytes == 0) { 919 DPRINTF(("msqid deleted\n")); 920 error = EIDRM; 921 goto done3; 922 } 923 924 } else { 925 DPRINTF(("got all the resources that we need\n")); 926 break; 927 } 928 } 929 930 /* 931 * We have the resources that we need. 932 * Make sure! 933 */ 934 935 if (msqkptr->u.msg_perm.mode & MSG_LOCKED) 936 panic("msg_perm.mode & MSG_LOCKED"); 937 if (segs_needed > nfree_msgmaps) 938 panic("segs_needed > nfree_msgmaps"); 939 if (msgsz + msqkptr->u.msg_cbytes > msqkptr->u.msg_qbytes) 940 panic("msgsz + msg_cbytes > msg_qbytes"); 941 if (free_msghdrs == NULL) 942 panic("no more msghdrs"); 943 944 /* 945 * Re-lock the msqid_ds in case we page-fault when copying in the 946 * message 947 */ 948 949 if ((msqkptr->u.msg_perm.mode & MSG_LOCKED) != 0) 950 panic("msqid_ds is already locked"); 951 msqkptr->u.msg_perm.mode |= MSG_LOCKED; 952 953 /* 954 * Allocate a message header 955 */ 956 957 msghdr = free_msghdrs; 958 free_msghdrs = msghdr->msg_next; 959 msghdr->msg_spot = -1; 960 msghdr->msg_ts = msgsz; 961 msghdr->msg_type = mtype; 962 #ifdef MAC 963 /* 964 * XXXMAC: Should the mac_sysvmsq_check_msgmsq check follow here 965 * immediately? Or, should it be checked just before the msg is 966 * enqueued in the msgq (as it is done now)? 967 */ 968 mac_sysvmsg_create(td->td_ucred, msqkptr, msghdr); 969 #endif 970 971 /* 972 * Allocate space for the message 973 */ 974 975 while (segs_needed > 0) { 976 if (nfree_msgmaps <= 0) 977 panic("not enough msgmaps"); 978 if (free_msgmaps == -1) 979 panic("nil free_msgmaps"); 980 next = free_msgmaps; 981 if (next <= -1) 982 panic("next too low #1"); 983 if (next >= msginfo.msgseg) 984 panic("next out of range #1"); 985 DPRINTF(("allocating segment %d to message\n", next)); 986 free_msgmaps = msgmaps[next].next; 987 nfree_msgmaps--; 988 msgmaps[next].next = msghdr->msg_spot; 989 msghdr->msg_spot = next; 990 segs_needed--; 991 } 992 993 /* 994 * Validate the message type 995 */ 996 997 if (msghdr->msg_type < 1) { 998 msg_freehdr(msghdr); 999 msqkptr->u.msg_perm.mode &= ~MSG_LOCKED; 1000 wakeup(msqkptr); 1001 DPRINTF(("mtype (%ld) < 1\n", msghdr->msg_type)); 1002 error = EINVAL; 1003 goto done3; 1004 } 1005 1006 /* 1007 * Copy in the message body 1008 */ 1009 1010 next = msghdr->msg_spot; 1011 while (msgsz > 0) { 1012 size_t tlen; 1013 if (msgsz > msginfo.msgssz) 1014 tlen = msginfo.msgssz; 1015 else 1016 tlen = msgsz; 1017 if (next <= -1) 1018 panic("next too low #2"); 1019 if (next >= msginfo.msgseg) 1020 panic("next out of range #2"); 1021 mtx_unlock(&msq_mtx); 1022 if ((error = copyin(msgp, &msgpool[next * msginfo.msgssz], 1023 tlen)) != 0) { 1024 mtx_lock(&msq_mtx); 1025 DPRINTF(("error %d copying in message segment\n", 1026 error)); 1027 msg_freehdr(msghdr); 1028 msqkptr->u.msg_perm.mode &= ~MSG_LOCKED; 1029 wakeup(msqkptr); 1030 goto done3; 1031 } 1032 mtx_lock(&msq_mtx); 1033 msgsz -= tlen; 1034 msgp = (const char *)msgp + tlen; 1035 next = msgmaps[next].next; 1036 } 1037 if (next != -1) 1038 panic("didn't use all the msg segments"); 1039 1040 /* 1041 * We've got the message. Unlock the msqid_ds. 1042 */ 1043 1044 msqkptr->u.msg_perm.mode &= ~MSG_LOCKED; 1045 1046 /* 1047 * Make sure that the msqid_ds is still allocated. 1048 */ 1049 1050 if (msqkptr->u.msg_qbytes == 0) { 1051 msg_freehdr(msghdr); 1052 wakeup(msqkptr); 1053 error = EIDRM; 1054 goto done3; 1055 } 1056 1057 #ifdef MAC 1058 /* 1059 * Note: Since the task/thread allocates the msghdr and usually 1060 * primes it with its own MAC label, for a majority of policies, it 1061 * won't be necessary to check whether the msghdr has access 1062 * permissions to the msgq. The mac_sysvmsq_check_msqsnd check would 1063 * suffice in that case. However, this hook may be required where 1064 * individual policies derive a non-identical label for the msghdr 1065 * from the current thread label and may want to check the msghdr 1066 * enqueue permissions, along with read/write permissions to the 1067 * msgq. 1068 */ 1069 error = mac_sysvmsq_check_msgmsq(td->td_ucred, msghdr, msqkptr); 1070 if (error != 0) { 1071 msg_freehdr(msghdr); 1072 wakeup(msqkptr); 1073 goto done3; 1074 } 1075 #endif 1076 1077 /* 1078 * Put the message into the queue 1079 */ 1080 if (msqkptr->u.msg_first == NULL) { 1081 msqkptr->u.msg_first = msghdr; 1082 msqkptr->u.msg_last = msghdr; 1083 } else { 1084 msqkptr->u.msg_last->msg_next = msghdr; 1085 msqkptr->u.msg_last = msghdr; 1086 } 1087 msqkptr->u.msg_last->msg_next = NULL; 1088 1089 msqkptr->u.msg_cbytes += msghdr->msg_ts; 1090 msqkptr->u.msg_qnum++; 1091 msqkptr->u.msg_lspid = td->td_proc->p_pid; 1092 msqkptr->u.msg_stime = time_second; 1093 1094 wakeup(msqkptr); 1095 td->td_retval[0] = 0; 1096 done3: 1097 #ifdef RACCT 1098 if (racct_enable && error != 0) { 1099 PROC_LOCK(td->td_proc); 1100 racct_sub(td->td_proc, RACCT_MSGQQUEUED, 1); 1101 racct_sub(td->td_proc, RACCT_MSGQSIZE, saved_msgsz); 1102 PROC_UNLOCK(td->td_proc); 1103 } 1104 #endif 1105 done2: 1106 mtx_unlock(&msq_mtx); 1107 return (error); 1108 } 1109 1110 int 1111 sys_msgsnd(td, uap) 1112 struct thread *td; 1113 register struct msgsnd_args *uap; 1114 { 1115 int error; 1116 long mtype; 1117 1118 DPRINTF(("call to msgsnd(%d, %p, %zu, %d)\n", uap->msqid, uap->msgp, 1119 uap->msgsz, uap->msgflg)); 1120 1121 if ((error = copyin(uap->msgp, &mtype, sizeof(mtype))) != 0) { 1122 DPRINTF(("error %d copying the message type\n", error)); 1123 return (error); 1124 } 1125 return (kern_msgsnd(td, uap->msqid, 1126 (const char *)uap->msgp + sizeof(mtype), 1127 uap->msgsz, uap->msgflg, mtype)); 1128 } 1129 1130 #ifndef _SYS_SYSPROTO_H_ 1131 struct msgrcv_args { 1132 int msqid; 1133 void *msgp; 1134 size_t msgsz; 1135 long msgtyp; 1136 int msgflg; 1137 }; 1138 #endif 1139 int 1140 kern_msgrcv(td, msqid, msgp, msgsz, msgtyp, msgflg, mtype) 1141 struct thread *td; 1142 int msqid; 1143 void *msgp; /* XXX msgp is actually mtext. */ 1144 size_t msgsz; 1145 long msgtyp; 1146 int msgflg; 1147 long *mtype; 1148 { 1149 size_t len; 1150 register struct msqid_kernel *msqkptr; 1151 register struct msg *msghdr; 1152 struct prison *rpr; 1153 int msqix, error = 0; 1154 short next; 1155 1156 rpr = msg_find_prison(td->td_ucred); 1157 if (rpr == NULL) 1158 return (ENOSYS); 1159 1160 msqix = IPCID_TO_IX(msqid); 1161 1162 if (msqix < 0 || msqix >= msginfo.msgmni) { 1163 DPRINTF(("msqid (%d) out of range (0<=msqid<%d)\n", msqix, 1164 msginfo.msgmni)); 1165 return (EINVAL); 1166 } 1167 1168 msqkptr = &msqids[msqix]; 1169 mtx_lock(&msq_mtx); 1170 if (msqkptr->u.msg_qbytes == 0) { 1171 DPRINTF(("no such message queue id\n")); 1172 error = EINVAL; 1173 goto done2; 1174 } 1175 if (msqkptr->u.msg_perm.seq != IPCID_TO_SEQ(msqid)) { 1176 DPRINTF(("wrong sequence number\n")); 1177 error = EINVAL; 1178 goto done2; 1179 } 1180 1181 if ((error = msq_prison_cansee(rpr, msqkptr))) { 1182 DPRINTF(("requester can't see prison\n")); 1183 goto done2; 1184 } 1185 1186 if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_R))) { 1187 DPRINTF(("requester doesn't have read access\n")); 1188 goto done2; 1189 } 1190 1191 #ifdef MAC 1192 error = mac_sysvmsq_check_msqrcv(td->td_ucred, msqkptr); 1193 if (error != 0) 1194 goto done2; 1195 #endif 1196 1197 msghdr = NULL; 1198 while (msghdr == NULL) { 1199 if (msgtyp == 0) { 1200 msghdr = msqkptr->u.msg_first; 1201 if (msghdr != NULL) { 1202 if (msgsz < msghdr->msg_ts && 1203 (msgflg & MSG_NOERROR) == 0) { 1204 DPRINTF(("first message on the queue " 1205 "is too big (want %zu, got %d)\n", 1206 msgsz, msghdr->msg_ts)); 1207 error = E2BIG; 1208 goto done2; 1209 } 1210 #ifdef MAC 1211 error = mac_sysvmsq_check_msgrcv(td->td_ucred, 1212 msghdr); 1213 if (error != 0) 1214 goto done2; 1215 #endif 1216 if (msqkptr->u.msg_first == msqkptr->u.msg_last) { 1217 msqkptr->u.msg_first = NULL; 1218 msqkptr->u.msg_last = NULL; 1219 } else { 1220 msqkptr->u.msg_first = msghdr->msg_next; 1221 if (msqkptr->u.msg_first == NULL) 1222 panic("msg_first/last screwed up #1"); 1223 } 1224 } 1225 } else { 1226 struct msg *previous; 1227 struct msg **prev; 1228 1229 previous = NULL; 1230 prev = &(msqkptr->u.msg_first); 1231 while ((msghdr = *prev) != NULL) { 1232 /* 1233 * Is this message's type an exact match or is 1234 * this message's type less than or equal to 1235 * the absolute value of a negative msgtyp? 1236 * Note that the second half of this test can 1237 * NEVER be true if msgtyp is positive since 1238 * msg_type is always positive! 1239 */ 1240 1241 if (msgtyp == msghdr->msg_type || 1242 msghdr->msg_type <= -msgtyp) { 1243 DPRINTF(("found message type %ld, " 1244 "requested %ld\n", 1245 msghdr->msg_type, msgtyp)); 1246 if (msgsz < msghdr->msg_ts && 1247 (msgflg & MSG_NOERROR) == 0) { 1248 DPRINTF(("requested message " 1249 "on the queue is too big " 1250 "(want %zu, got %hu)\n", 1251 msgsz, msghdr->msg_ts)); 1252 error = E2BIG; 1253 goto done2; 1254 } 1255 #ifdef MAC 1256 error = mac_sysvmsq_check_msgrcv( 1257 td->td_ucred, msghdr); 1258 if (error != 0) 1259 goto done2; 1260 #endif 1261 *prev = msghdr->msg_next; 1262 if (msghdr == msqkptr->u.msg_last) { 1263 if (previous == NULL) { 1264 if (prev != 1265 &msqkptr->u.msg_first) 1266 panic("msg_first/last screwed up #2"); 1267 msqkptr->u.msg_first = 1268 NULL; 1269 msqkptr->u.msg_last = 1270 NULL; 1271 } else { 1272 if (prev == 1273 &msqkptr->u.msg_first) 1274 panic("msg_first/last screwed up #3"); 1275 msqkptr->u.msg_last = 1276 previous; 1277 } 1278 } 1279 break; 1280 } 1281 previous = msghdr; 1282 prev = &(msghdr->msg_next); 1283 } 1284 } 1285 1286 /* 1287 * We've either extracted the msghdr for the appropriate 1288 * message or there isn't one. 1289 * If there is one then bail out of this loop. 1290 */ 1291 1292 if (msghdr != NULL) 1293 break; 1294 1295 /* 1296 * Hmph! No message found. Does the user want to wait? 1297 */ 1298 1299 if ((msgflg & IPC_NOWAIT) != 0) { 1300 DPRINTF(("no appropriate message found (msgtyp=%ld)\n", 1301 msgtyp)); 1302 /* The SVID says to return ENOMSG. */ 1303 error = ENOMSG; 1304 goto done2; 1305 } 1306 1307 /* 1308 * Wait for something to happen 1309 */ 1310 1311 DPRINTF(("msgrcv: goodnight\n")); 1312 error = msleep(msqkptr, &msq_mtx, (PZERO - 4) | PCATCH, 1313 "msgrcv", 0); 1314 DPRINTF(("msgrcv: good morning (error=%d)\n", error)); 1315 1316 if (error != 0) { 1317 DPRINTF(("msgrcv: interrupted system call\n")); 1318 error = EINTR; 1319 goto done2; 1320 } 1321 1322 /* 1323 * Make sure that the msq queue still exists 1324 */ 1325 1326 if (msqkptr->u.msg_qbytes == 0 || 1327 msqkptr->u.msg_perm.seq != IPCID_TO_SEQ(msqid)) { 1328 DPRINTF(("msqid deleted\n")); 1329 error = EIDRM; 1330 goto done2; 1331 } 1332 } 1333 1334 /* 1335 * Return the message to the user. 1336 * 1337 * First, do the bookkeeping (before we risk being interrupted). 1338 */ 1339 1340 msqkptr->u.msg_cbytes -= msghdr->msg_ts; 1341 msqkptr->u.msg_qnum--; 1342 msqkptr->u.msg_lrpid = td->td_proc->p_pid; 1343 msqkptr->u.msg_rtime = time_second; 1344 1345 racct_sub_cred(msqkptr->cred, RACCT_MSGQQUEUED, 1); 1346 racct_sub_cred(msqkptr->cred, RACCT_MSGQSIZE, msghdr->msg_ts); 1347 1348 /* 1349 * Make msgsz the actual amount that we'll be returning. 1350 * Note that this effectively truncates the message if it is too long 1351 * (since msgsz is never increased). 1352 */ 1353 1354 DPRINTF(("found a message, msgsz=%zu, msg_ts=%hu\n", msgsz, 1355 msghdr->msg_ts)); 1356 if (msgsz > msghdr->msg_ts) 1357 msgsz = msghdr->msg_ts; 1358 *mtype = msghdr->msg_type; 1359 1360 /* 1361 * Return the segments to the user 1362 */ 1363 1364 next = msghdr->msg_spot; 1365 for (len = 0; len < msgsz; len += msginfo.msgssz) { 1366 size_t tlen; 1367 1368 if (msgsz - len > msginfo.msgssz) 1369 tlen = msginfo.msgssz; 1370 else 1371 tlen = msgsz - len; 1372 if (next <= -1) 1373 panic("next too low #3"); 1374 if (next >= msginfo.msgseg) 1375 panic("next out of range #3"); 1376 mtx_unlock(&msq_mtx); 1377 error = copyout(&msgpool[next * msginfo.msgssz], msgp, tlen); 1378 mtx_lock(&msq_mtx); 1379 if (error != 0) { 1380 DPRINTF(("error (%d) copying out message segment\n", 1381 error)); 1382 msg_freehdr(msghdr); 1383 wakeup(msqkptr); 1384 goto done2; 1385 } 1386 msgp = (char *)msgp + tlen; 1387 next = msgmaps[next].next; 1388 } 1389 1390 /* 1391 * Done, return the actual number of bytes copied out. 1392 */ 1393 1394 msg_freehdr(msghdr); 1395 wakeup(msqkptr); 1396 td->td_retval[0] = msgsz; 1397 done2: 1398 mtx_unlock(&msq_mtx); 1399 return (error); 1400 } 1401 1402 int 1403 sys_msgrcv(td, uap) 1404 struct thread *td; 1405 register struct msgrcv_args *uap; 1406 { 1407 int error; 1408 long mtype; 1409 1410 DPRINTF(("call to msgrcv(%d, %p, %zu, %ld, %d)\n", uap->msqid, 1411 uap->msgp, uap->msgsz, uap->msgtyp, uap->msgflg)); 1412 1413 if ((error = kern_msgrcv(td, uap->msqid, 1414 (char *)uap->msgp + sizeof(mtype), uap->msgsz, 1415 uap->msgtyp, uap->msgflg, &mtype)) != 0) 1416 return (error); 1417 if ((error = copyout(&mtype, uap->msgp, sizeof(mtype))) != 0) 1418 DPRINTF(("error %d copying the message type\n", error)); 1419 return (error); 1420 } 1421 1422 static int 1423 sysctl_msqids(SYSCTL_HANDLER_ARGS) 1424 { 1425 struct msqid_kernel tmsqk; 1426 struct prison *pr, *rpr; 1427 int error, i; 1428 1429 pr = req->td->td_ucred->cr_prison; 1430 rpr = msg_find_prison(req->td->td_ucred); 1431 error = 0; 1432 for (i = 0; i < msginfo.msgmni; i++) { 1433 mtx_lock(&msq_mtx); 1434 if (msqids[i].u.msg_qbytes == 0 || rpr == NULL || 1435 msq_prison_cansee(rpr, &msqids[i]) != 0) 1436 bzero(&tmsqk, sizeof(tmsqk)); 1437 else { 1438 tmsqk = msqids[i]; 1439 if (tmsqk.cred->cr_prison != pr) 1440 tmsqk.u.msg_perm.key = IPC_PRIVATE; 1441 } 1442 mtx_unlock(&msq_mtx); 1443 error = SYSCTL_OUT(req, &tmsqk, sizeof(tmsqk)); 1444 if (error != 0) 1445 break; 1446 } 1447 return (error); 1448 } 1449 1450 SYSCTL_INT(_kern_ipc, OID_AUTO, msgmax, CTLFLAG_RD, &msginfo.msgmax, 0, 1451 "Maximum message size"); 1452 SYSCTL_INT(_kern_ipc, OID_AUTO, msgmni, CTLFLAG_RDTUN, &msginfo.msgmni, 0, 1453 "Number of message queue identifiers"); 1454 SYSCTL_INT(_kern_ipc, OID_AUTO, msgmnb, CTLFLAG_RDTUN, &msginfo.msgmnb, 0, 1455 "Maximum number of bytes in a queue"); 1456 SYSCTL_INT(_kern_ipc, OID_AUTO, msgtql, CTLFLAG_RDTUN, &msginfo.msgtql, 0, 1457 "Maximum number of messages in the system"); 1458 SYSCTL_INT(_kern_ipc, OID_AUTO, msgssz, CTLFLAG_RDTUN, &msginfo.msgssz, 0, 1459 "Size of a message segment"); 1460 SYSCTL_INT(_kern_ipc, OID_AUTO, msgseg, CTLFLAG_RDTUN, &msginfo.msgseg, 0, 1461 "Number of message segments"); 1462 SYSCTL_PROC(_kern_ipc, OID_AUTO, msqids, 1463 CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE, 1464 NULL, 0, sysctl_msqids, "", "Message queue IDs"); 1465 1466 static int 1467 msg_prison_check(void *obj, void *data) 1468 { 1469 struct prison *pr = obj; 1470 struct prison *prpr; 1471 struct vfsoptlist *opts = data; 1472 int error, jsys; 1473 1474 /* 1475 * sysvmsg is a jailsys integer. 1476 * It must be "disable" if the parent jail is disabled. 1477 */ 1478 error = vfs_copyopt(opts, "sysvmsg", &jsys, sizeof(jsys)); 1479 if (error != ENOENT) { 1480 if (error != 0) 1481 return (error); 1482 switch (jsys) { 1483 case JAIL_SYS_DISABLE: 1484 break; 1485 case JAIL_SYS_NEW: 1486 case JAIL_SYS_INHERIT: 1487 prison_lock(pr->pr_parent); 1488 prpr = osd_jail_get(pr->pr_parent, msg_prison_slot); 1489 prison_unlock(pr->pr_parent); 1490 if (prpr == NULL) 1491 return (EPERM); 1492 break; 1493 default: 1494 return (EINVAL); 1495 } 1496 } 1497 1498 return (0); 1499 } 1500 1501 static int 1502 msg_prison_set(void *obj, void *data) 1503 { 1504 struct prison *pr = obj; 1505 struct prison *tpr, *orpr, *nrpr, *trpr; 1506 struct vfsoptlist *opts = data; 1507 void *rsv; 1508 int jsys, descend; 1509 1510 /* 1511 * sysvmsg controls which jail is the root of the associated msgs (this 1512 * jail or same as the parent), or if the feature is available at all. 1513 */ 1514 if (vfs_copyopt(opts, "sysvmsg", &jsys, sizeof(jsys)) == ENOENT) 1515 jsys = vfs_flagopt(opts, "allow.sysvipc", NULL, 0) 1516 ? JAIL_SYS_INHERIT 1517 : vfs_flagopt(opts, "allow.nosysvipc", NULL, 0) 1518 ? JAIL_SYS_DISABLE 1519 : -1; 1520 if (jsys == JAIL_SYS_DISABLE) { 1521 prison_lock(pr); 1522 orpr = osd_jail_get(pr, msg_prison_slot); 1523 if (orpr != NULL) 1524 osd_jail_del(pr, msg_prison_slot); 1525 prison_unlock(pr); 1526 if (orpr != NULL) { 1527 if (orpr == pr) 1528 msg_prison_cleanup(pr); 1529 /* Disable all child jails as well. */ 1530 FOREACH_PRISON_DESCENDANT(pr, tpr, descend) { 1531 prison_lock(tpr); 1532 trpr = osd_jail_get(tpr, msg_prison_slot); 1533 if (trpr != NULL) { 1534 osd_jail_del(tpr, msg_prison_slot); 1535 prison_unlock(tpr); 1536 if (trpr == tpr) 1537 msg_prison_cleanup(tpr); 1538 } else { 1539 prison_unlock(tpr); 1540 descend = 0; 1541 } 1542 } 1543 } 1544 } else if (jsys != -1) { 1545 if (jsys == JAIL_SYS_NEW) 1546 nrpr = pr; 1547 else { 1548 prison_lock(pr->pr_parent); 1549 nrpr = osd_jail_get(pr->pr_parent, msg_prison_slot); 1550 prison_unlock(pr->pr_parent); 1551 } 1552 rsv = osd_reserve(msg_prison_slot); 1553 prison_lock(pr); 1554 orpr = osd_jail_get(pr, msg_prison_slot); 1555 if (orpr != nrpr) 1556 (void)osd_jail_set_reserved(pr, msg_prison_slot, rsv, 1557 nrpr); 1558 else 1559 osd_free_reserved(rsv); 1560 prison_unlock(pr); 1561 if (orpr != nrpr) { 1562 if (orpr == pr) 1563 msg_prison_cleanup(pr); 1564 if (orpr != NULL) { 1565 /* Change child jails matching the old root, */ 1566 FOREACH_PRISON_DESCENDANT(pr, tpr, descend) { 1567 prison_lock(tpr); 1568 trpr = osd_jail_get(tpr, 1569 msg_prison_slot); 1570 if (trpr == orpr) { 1571 (void)osd_jail_set(tpr, 1572 msg_prison_slot, nrpr); 1573 prison_unlock(tpr); 1574 if (trpr == tpr) 1575 msg_prison_cleanup(tpr); 1576 } else { 1577 prison_unlock(tpr); 1578 descend = 0; 1579 } 1580 } 1581 } 1582 } 1583 } 1584 1585 return (0); 1586 } 1587 1588 static int 1589 msg_prison_get(void *obj, void *data) 1590 { 1591 struct prison *pr = obj; 1592 struct prison *rpr; 1593 struct vfsoptlist *opts = data; 1594 int error, jsys; 1595 1596 /* Set sysvmsg based on the jail's root prison. */ 1597 prison_lock(pr); 1598 rpr = osd_jail_get(pr, msg_prison_slot); 1599 prison_unlock(pr); 1600 jsys = rpr == NULL ? JAIL_SYS_DISABLE 1601 : rpr == pr ? JAIL_SYS_NEW : JAIL_SYS_INHERIT; 1602 error = vfs_setopt(opts, "sysvmsg", &jsys, sizeof(jsys)); 1603 if (error == ENOENT) 1604 error = 0; 1605 return (error); 1606 } 1607 1608 static int 1609 msg_prison_remove(void *obj, void *data __unused) 1610 { 1611 struct prison *pr = obj; 1612 struct prison *rpr; 1613 1614 prison_lock(pr); 1615 rpr = osd_jail_get(pr, msg_prison_slot); 1616 prison_unlock(pr); 1617 if (rpr == pr) 1618 msg_prison_cleanup(pr); 1619 return (0); 1620 } 1621 1622 static void 1623 msg_prison_cleanup(struct prison *pr) 1624 { 1625 struct msqid_kernel *msqkptr; 1626 int i; 1627 1628 /* Remove any msqs that belong to this jail. */ 1629 mtx_lock(&msq_mtx); 1630 for (i = 0; i < msginfo.msgmni; i++) { 1631 msqkptr = &msqids[i]; 1632 if (msqkptr->u.msg_qbytes != 0 && 1633 msqkptr->cred != NULL && msqkptr->cred->cr_prison == pr) 1634 msq_remove(msqkptr); 1635 } 1636 mtx_unlock(&msq_mtx); 1637 } 1638 1639 SYSCTL_JAIL_PARAM_SYS_NODE(sysvmsg, CTLFLAG_RW, "SYSV message queues"); 1640 1641 #ifdef COMPAT_FREEBSD32 1642 int 1643 freebsd32_msgsys(struct thread *td, struct freebsd32_msgsys_args *uap) 1644 { 1645 1646 #if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \ 1647 defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7) 1648 switch (uap->which) { 1649 case 0: 1650 return (freebsd7_freebsd32_msgctl(td, 1651 (struct freebsd7_freebsd32_msgctl_args *)&uap->a2)); 1652 case 2: 1653 return (freebsd32_msgsnd(td, 1654 (struct freebsd32_msgsnd_args *)&uap->a2)); 1655 case 3: 1656 return (freebsd32_msgrcv(td, 1657 (struct freebsd32_msgrcv_args *)&uap->a2)); 1658 default: 1659 return (sys_msgsys(td, (struct msgsys_args *)uap)); 1660 } 1661 #else 1662 return (nosys(td, NULL)); 1663 #endif 1664 } 1665 1666 #if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \ 1667 defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7) 1668 int 1669 freebsd7_freebsd32_msgctl(struct thread *td, 1670 struct freebsd7_freebsd32_msgctl_args *uap) 1671 { 1672 struct msqid_ds msqbuf; 1673 struct msqid_ds32_old msqbuf32; 1674 int error; 1675 1676 if (uap->cmd == IPC_SET) { 1677 error = copyin(uap->buf, &msqbuf32, sizeof(msqbuf32)); 1678 if (error) 1679 return (error); 1680 freebsd32_ipcperm_old_in(&msqbuf32.msg_perm, &msqbuf.msg_perm); 1681 PTRIN_CP(msqbuf32, msqbuf, msg_first); 1682 PTRIN_CP(msqbuf32, msqbuf, msg_last); 1683 CP(msqbuf32, msqbuf, msg_cbytes); 1684 CP(msqbuf32, msqbuf, msg_qnum); 1685 CP(msqbuf32, msqbuf, msg_qbytes); 1686 CP(msqbuf32, msqbuf, msg_lspid); 1687 CP(msqbuf32, msqbuf, msg_lrpid); 1688 CP(msqbuf32, msqbuf, msg_stime); 1689 CP(msqbuf32, msqbuf, msg_rtime); 1690 CP(msqbuf32, msqbuf, msg_ctime); 1691 } 1692 error = kern_msgctl(td, uap->msqid, uap->cmd, &msqbuf); 1693 if (error) 1694 return (error); 1695 if (uap->cmd == IPC_STAT) { 1696 bzero(&msqbuf32, sizeof(msqbuf32)); 1697 freebsd32_ipcperm_old_out(&msqbuf.msg_perm, &msqbuf32.msg_perm); 1698 PTROUT_CP(msqbuf, msqbuf32, msg_first); 1699 PTROUT_CP(msqbuf, msqbuf32, msg_last); 1700 CP(msqbuf, msqbuf32, msg_cbytes); 1701 CP(msqbuf, msqbuf32, msg_qnum); 1702 CP(msqbuf, msqbuf32, msg_qbytes); 1703 CP(msqbuf, msqbuf32, msg_lspid); 1704 CP(msqbuf, msqbuf32, msg_lrpid); 1705 CP(msqbuf, msqbuf32, msg_stime); 1706 CP(msqbuf, msqbuf32, msg_rtime); 1707 CP(msqbuf, msqbuf32, msg_ctime); 1708 error = copyout(&msqbuf32, uap->buf, sizeof(struct msqid_ds32)); 1709 } 1710 return (error); 1711 } 1712 #endif 1713 1714 int 1715 freebsd32_msgctl(struct thread *td, struct freebsd32_msgctl_args *uap) 1716 { 1717 struct msqid_ds msqbuf; 1718 struct msqid_ds32 msqbuf32; 1719 int error; 1720 1721 if (uap->cmd == IPC_SET) { 1722 error = copyin(uap->buf, &msqbuf32, sizeof(msqbuf32)); 1723 if (error) 1724 return (error); 1725 freebsd32_ipcperm_in(&msqbuf32.msg_perm, &msqbuf.msg_perm); 1726 PTRIN_CP(msqbuf32, msqbuf, msg_first); 1727 PTRIN_CP(msqbuf32, msqbuf, msg_last); 1728 CP(msqbuf32, msqbuf, msg_cbytes); 1729 CP(msqbuf32, msqbuf, msg_qnum); 1730 CP(msqbuf32, msqbuf, msg_qbytes); 1731 CP(msqbuf32, msqbuf, msg_lspid); 1732 CP(msqbuf32, msqbuf, msg_lrpid); 1733 CP(msqbuf32, msqbuf, msg_stime); 1734 CP(msqbuf32, msqbuf, msg_rtime); 1735 CP(msqbuf32, msqbuf, msg_ctime); 1736 } 1737 error = kern_msgctl(td, uap->msqid, uap->cmd, &msqbuf); 1738 if (error) 1739 return (error); 1740 if (uap->cmd == IPC_STAT) { 1741 freebsd32_ipcperm_out(&msqbuf.msg_perm, &msqbuf32.msg_perm); 1742 PTROUT_CP(msqbuf, msqbuf32, msg_first); 1743 PTROUT_CP(msqbuf, msqbuf32, msg_last); 1744 CP(msqbuf, msqbuf32, msg_cbytes); 1745 CP(msqbuf, msqbuf32, msg_qnum); 1746 CP(msqbuf, msqbuf32, msg_qbytes); 1747 CP(msqbuf, msqbuf32, msg_lspid); 1748 CP(msqbuf, msqbuf32, msg_lrpid); 1749 CP(msqbuf, msqbuf32, msg_stime); 1750 CP(msqbuf, msqbuf32, msg_rtime); 1751 CP(msqbuf, msqbuf32, msg_ctime); 1752 error = copyout(&msqbuf32, uap->buf, sizeof(struct msqid_ds32)); 1753 } 1754 return (error); 1755 } 1756 1757 int 1758 freebsd32_msgsnd(struct thread *td, struct freebsd32_msgsnd_args *uap) 1759 { 1760 const void *msgp; 1761 long mtype; 1762 int32_t mtype32; 1763 int error; 1764 1765 msgp = PTRIN(uap->msgp); 1766 if ((error = copyin(msgp, &mtype32, sizeof(mtype32))) != 0) 1767 return (error); 1768 mtype = mtype32; 1769 return (kern_msgsnd(td, uap->msqid, 1770 (const char *)msgp + sizeof(mtype32), 1771 uap->msgsz, uap->msgflg, mtype)); 1772 } 1773 1774 int 1775 freebsd32_msgrcv(struct thread *td, struct freebsd32_msgrcv_args *uap) 1776 { 1777 void *msgp; 1778 long mtype; 1779 int32_t mtype32; 1780 int error; 1781 1782 msgp = PTRIN(uap->msgp); 1783 if ((error = kern_msgrcv(td, uap->msqid, 1784 (char *)msgp + sizeof(mtype32), uap->msgsz, 1785 uap->msgtyp, uap->msgflg, &mtype)) != 0) 1786 return (error); 1787 mtype32 = (int32_t)mtype; 1788 return (copyout(&mtype32, msgp, sizeof(mtype32))); 1789 } 1790 #endif 1791 1792 #if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \ 1793 defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7) 1794 1795 /* XXX casting to (sy_call_t *) is bogus, as usual. */ 1796 static sy_call_t *msgcalls[] = { 1797 (sy_call_t *)freebsd7_msgctl, (sy_call_t *)sys_msgget, 1798 (sy_call_t *)sys_msgsnd, (sy_call_t *)sys_msgrcv 1799 }; 1800 1801 /* 1802 * Entry point for all MSG calls. 1803 */ 1804 int 1805 sys_msgsys(td, uap) 1806 struct thread *td; 1807 /* XXX actually varargs. */ 1808 struct msgsys_args /* { 1809 int which; 1810 int a2; 1811 int a3; 1812 int a4; 1813 int a5; 1814 int a6; 1815 } */ *uap; 1816 { 1817 int error; 1818 1819 if (uap->which < 0 || uap->which >= nitems(msgcalls)) 1820 return (EINVAL); 1821 error = (*msgcalls[uap->which])(td, &uap->a2); 1822 return (error); 1823 } 1824 1825 #ifndef CP 1826 #define CP(src, dst, fld) do { (dst).fld = (src).fld; } while (0) 1827 #endif 1828 1829 #ifndef _SYS_SYSPROTO_H_ 1830 struct freebsd7_msgctl_args { 1831 int msqid; 1832 int cmd; 1833 struct msqid_ds_old *buf; 1834 }; 1835 #endif 1836 int 1837 freebsd7_msgctl(td, uap) 1838 struct thread *td; 1839 struct freebsd7_msgctl_args *uap; 1840 { 1841 struct msqid_ds_old msqold; 1842 struct msqid_ds msqbuf; 1843 int error; 1844 1845 DPRINTF(("call to freebsd7_msgctl(%d, %d, %p)\n", uap->msqid, uap->cmd, 1846 uap->buf)); 1847 if (uap->cmd == IPC_SET) { 1848 error = copyin(uap->buf, &msqold, sizeof(msqold)); 1849 if (error) 1850 return (error); 1851 ipcperm_old2new(&msqold.msg_perm, &msqbuf.msg_perm); 1852 CP(msqold, msqbuf, msg_first); 1853 CP(msqold, msqbuf, msg_last); 1854 CP(msqold, msqbuf, msg_cbytes); 1855 CP(msqold, msqbuf, msg_qnum); 1856 CP(msqold, msqbuf, msg_qbytes); 1857 CP(msqold, msqbuf, msg_lspid); 1858 CP(msqold, msqbuf, msg_lrpid); 1859 CP(msqold, msqbuf, msg_stime); 1860 CP(msqold, msqbuf, msg_rtime); 1861 CP(msqold, msqbuf, msg_ctime); 1862 } 1863 error = kern_msgctl(td, uap->msqid, uap->cmd, &msqbuf); 1864 if (error) 1865 return (error); 1866 if (uap->cmd == IPC_STAT) { 1867 bzero(&msqold, sizeof(msqold)); 1868 ipcperm_new2old(&msqbuf.msg_perm, &msqold.msg_perm); 1869 CP(msqbuf, msqold, msg_first); 1870 CP(msqbuf, msqold, msg_last); 1871 CP(msqbuf, msqold, msg_cbytes); 1872 CP(msqbuf, msqold, msg_qnum); 1873 CP(msqbuf, msqold, msg_qbytes); 1874 CP(msqbuf, msqold, msg_lspid); 1875 CP(msqbuf, msqold, msg_lrpid); 1876 CP(msqbuf, msqold, msg_stime); 1877 CP(msqbuf, msqold, msg_rtime); 1878 CP(msqbuf, msqold, msg_ctime); 1879 error = copyout(&msqold, uap->buf, sizeof(struct msqid_ds_old)); 1880 } 1881 return (error); 1882 } 1883 1884 #undef CP 1885 1886 #endif /* COMPAT_FREEBSD4 || COMPAT_FREEBSD5 || COMPAT_FREEBSD6 || 1887 COMPAT_FREEBSD7 */ 1888