1 /* 2 * linux/ipc/msg.c 3 * Copyright (C) 1992 Krishna Balasubramanian 4 * 5 * Removed all the remaining kerneld mess 6 * Catch the -EFAULT stuff properly 7 * Use GFP_KERNEL for messages as in 1.2 8 * Fixed up the unchecked user space derefs 9 * Copyright (C) 1998 Alan Cox & Andi Kleen 10 * 11 * /proc/sysvipc/msg support (c) 1999 Dragos Acostachioaie <dragos@iname.com> 12 * 13 * mostly rewritten, threaded and wake-one semantics added 14 * MSGMAX limit removed, sysctl's added 15 * (c) 1999 Manfred Spraul <manfred@colorfullife.com> 16 * 17 * support for audit of ipc object properties and permission changes 18 * Dustin Kirkland <dustin.kirkland@us.ibm.com> 19 * 20 * namespaces support 21 * OpenVZ, SWsoft Inc. 22 * Pavel Emelianov <xemul@openvz.org> 23 */ 24 25 #include <linux/capability.h> 26 #include <linux/msg.h> 27 #include <linux/spinlock.h> 28 #include <linux/init.h> 29 #include <linux/mm.h> 30 #include <linux/proc_fs.h> 31 #include <linux/list.h> 32 #include <linux/security.h> 33 #include <linux/sched.h> 34 #include <linux/syscalls.h> 35 #include <linux/audit.h> 36 #include <linux/seq_file.h> 37 #include <linux/rwsem.h> 38 #include <linux/nsproxy.h> 39 #include <linux/ipc_namespace.h> 40 41 #include <asm/current.h> 42 #include <linux/uaccess.h> 43 #include "util.h" 44 45 /* one msg_receiver structure for each sleeping receiver */ 46 struct msg_receiver { 47 struct list_head r_list; 48 struct task_struct *r_tsk; 49 50 int r_mode; 51 long r_msgtype; 52 long r_maxsize; 53 54 struct msg_msg *r_msg; 55 }; 56 57 /* one msg_sender for each sleeping sender */ 58 struct msg_sender { 59 struct list_head list; 60 struct task_struct *tsk; 61 size_t msgsz; 62 }; 63 64 #define SEARCH_ANY 1 65 #define SEARCH_EQUAL 2 66 #define SEARCH_NOTEQUAL 3 67 #define SEARCH_LESSEQUAL 4 68 #define SEARCH_NUMBER 5 69 70 #define msg_ids(ns) ((ns)->ids[IPC_MSG_IDS]) 71 72 static inline struct msg_queue *msq_obtain_object(struct ipc_namespace *ns, int id) 73 { 74 struct kern_ipc_perm *ipcp = ipc_obtain_object_idr(&msg_ids(ns), id); 75 76 if (IS_ERR(ipcp)) 77 return ERR_CAST(ipcp); 78 79 return container_of(ipcp, struct msg_queue, q_perm); 80 } 81 82 static inline struct msg_queue *msq_obtain_object_check(struct ipc_namespace *ns, 83 int id) 84 { 85 struct kern_ipc_perm *ipcp = ipc_obtain_object_check(&msg_ids(ns), id); 86 87 if (IS_ERR(ipcp)) 88 return ERR_CAST(ipcp); 89 90 return container_of(ipcp, struct msg_queue, q_perm); 91 } 92 93 static inline void msg_rmid(struct ipc_namespace *ns, struct msg_queue *s) 94 { 95 ipc_rmid(&msg_ids(ns), &s->q_perm); 96 } 97 98 static void msg_rcu_free(struct rcu_head *head) 99 { 100 struct ipc_rcu *p = container_of(head, struct ipc_rcu, rcu); 101 struct msg_queue *msq = ipc_rcu_to_struct(p); 102 103 security_msg_queue_free(msq); 104 ipc_rcu_free(head); 105 } 106 107 /** 108 * newque - Create a new msg queue 109 * @ns: namespace 110 * @params: ptr to the structure that contains the key and msgflg 111 * 112 * Called with msg_ids.rwsem held (writer) 113 */ 114 static int newque(struct ipc_namespace *ns, struct ipc_params *params) 115 { 116 struct msg_queue *msq; 117 int id, retval; 118 key_t key = params->key; 119 int msgflg = params->flg; 120 121 msq = ipc_rcu_alloc(sizeof(*msq)); 122 if (!msq) 123 return -ENOMEM; 124 125 msq->q_perm.mode = msgflg & S_IRWXUGO; 126 msq->q_perm.key = key; 127 128 msq->q_perm.security = NULL; 129 retval = security_msg_queue_alloc(msq); 130 if (retval) { 131 ipc_rcu_putref(msq, ipc_rcu_free); 132 return retval; 133 } 134 135 msq->q_stime = msq->q_rtime = 0; 136 msq->q_ctime = get_seconds(); 137 msq->q_cbytes = msq->q_qnum = 0; 138 msq->q_qbytes = ns->msg_ctlmnb; 139 msq->q_lspid = msq->q_lrpid = 0; 140 INIT_LIST_HEAD(&msq->q_messages); 141 INIT_LIST_HEAD(&msq->q_receivers); 142 INIT_LIST_HEAD(&msq->q_senders); 143 144 /* ipc_addid() locks msq upon success. */ 145 id = ipc_addid(&msg_ids(ns), &msq->q_perm, ns->msg_ctlmni); 146 if (id < 0) { 147 ipc_rcu_putref(msq, msg_rcu_free); 148 return id; 149 } 150 151 ipc_unlock_object(&msq->q_perm); 152 rcu_read_unlock(); 153 154 return msq->q_perm.id; 155 } 156 157 static inline bool msg_fits_inqueue(struct msg_queue *msq, size_t msgsz) 158 { 159 return msgsz + msq->q_cbytes <= msq->q_qbytes && 160 1 + msq->q_qnum <= msq->q_qbytes; 161 } 162 163 static inline void ss_add(struct msg_queue *msq, 164 struct msg_sender *mss, size_t msgsz) 165 { 166 mss->tsk = current; 167 mss->msgsz = msgsz; 168 __set_current_state(TASK_INTERRUPTIBLE); 169 list_add_tail(&mss->list, &msq->q_senders); 170 } 171 172 static inline void ss_del(struct msg_sender *mss) 173 { 174 if (mss->list.next) 175 list_del(&mss->list); 176 } 177 178 static void ss_wakeup(struct msg_queue *msq, 179 struct wake_q_head *wake_q, bool kill) 180 { 181 struct msg_sender *mss, *t; 182 struct task_struct *stop_tsk = NULL; 183 struct list_head *h = &msq->q_senders; 184 185 list_for_each_entry_safe(mss, t, h, list) { 186 if (kill) 187 mss->list.next = NULL; 188 189 /* 190 * Stop at the first task we don't wakeup, 191 * we've already iterated the original 192 * sender queue. 193 */ 194 else if (stop_tsk == mss->tsk) 195 break; 196 /* 197 * We are not in an EIDRM scenario here, therefore 198 * verify that we really need to wakeup the task. 199 * To maintain current semantics and wakeup order, 200 * move the sender to the tail on behalf of the 201 * blocked task. 202 */ 203 else if (!msg_fits_inqueue(msq, mss->msgsz)) { 204 if (!stop_tsk) 205 stop_tsk = mss->tsk; 206 207 list_move_tail(&mss->list, &msq->q_senders); 208 continue; 209 } 210 211 wake_q_add(wake_q, mss->tsk); 212 } 213 } 214 215 static void expunge_all(struct msg_queue *msq, int res, 216 struct wake_q_head *wake_q) 217 { 218 struct msg_receiver *msr, *t; 219 220 list_for_each_entry_safe(msr, t, &msq->q_receivers, r_list) { 221 wake_q_add(wake_q, msr->r_tsk); 222 WRITE_ONCE(msr->r_msg, ERR_PTR(res)); 223 } 224 } 225 226 /* 227 * freeque() wakes up waiters on the sender and receiver waiting queue, 228 * removes the message queue from message queue ID IDR, and cleans up all the 229 * messages associated with this queue. 230 * 231 * msg_ids.rwsem (writer) and the spinlock for this message queue are held 232 * before freeque() is called. msg_ids.rwsem remains locked on exit. 233 */ 234 static void freeque(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) 235 { 236 struct msg_msg *msg, *t; 237 struct msg_queue *msq = container_of(ipcp, struct msg_queue, q_perm); 238 DEFINE_WAKE_Q(wake_q); 239 240 expunge_all(msq, -EIDRM, &wake_q); 241 ss_wakeup(msq, &wake_q, true); 242 msg_rmid(ns, msq); 243 ipc_unlock_object(&msq->q_perm); 244 wake_up_q(&wake_q); 245 rcu_read_unlock(); 246 247 list_for_each_entry_safe(msg, t, &msq->q_messages, m_list) { 248 atomic_dec(&ns->msg_hdrs); 249 free_msg(msg); 250 } 251 atomic_sub(msq->q_cbytes, &ns->msg_bytes); 252 ipc_rcu_putref(msq, msg_rcu_free); 253 } 254 255 /* 256 * Called with msg_ids.rwsem and ipcp locked. 257 */ 258 static inline int msg_security(struct kern_ipc_perm *ipcp, int msgflg) 259 { 260 struct msg_queue *msq = container_of(ipcp, struct msg_queue, q_perm); 261 262 return security_msg_queue_associate(msq, msgflg); 263 } 264 265 SYSCALL_DEFINE2(msgget, key_t, key, int, msgflg) 266 { 267 struct ipc_namespace *ns; 268 static const struct ipc_ops msg_ops = { 269 .getnew = newque, 270 .associate = msg_security, 271 }; 272 struct ipc_params msg_params; 273 274 ns = current->nsproxy->ipc_ns; 275 276 msg_params.key = key; 277 msg_params.flg = msgflg; 278 279 return ipcget(ns, &msg_ids(ns), &msg_ops, &msg_params); 280 } 281 282 static inline unsigned long 283 copy_msqid_to_user(void __user *buf, struct msqid64_ds *in, int version) 284 { 285 switch (version) { 286 case IPC_64: 287 return copy_to_user(buf, in, sizeof(*in)); 288 case IPC_OLD: 289 { 290 struct msqid_ds out; 291 292 memset(&out, 0, sizeof(out)); 293 294 ipc64_perm_to_ipc_perm(&in->msg_perm, &out.msg_perm); 295 296 out.msg_stime = in->msg_stime; 297 out.msg_rtime = in->msg_rtime; 298 out.msg_ctime = in->msg_ctime; 299 300 if (in->msg_cbytes > USHRT_MAX) 301 out.msg_cbytes = USHRT_MAX; 302 else 303 out.msg_cbytes = in->msg_cbytes; 304 out.msg_lcbytes = in->msg_cbytes; 305 306 if (in->msg_qnum > USHRT_MAX) 307 out.msg_qnum = USHRT_MAX; 308 else 309 out.msg_qnum = in->msg_qnum; 310 311 if (in->msg_qbytes > USHRT_MAX) 312 out.msg_qbytes = USHRT_MAX; 313 else 314 out.msg_qbytes = in->msg_qbytes; 315 out.msg_lqbytes = in->msg_qbytes; 316 317 out.msg_lspid = in->msg_lspid; 318 out.msg_lrpid = in->msg_lrpid; 319 320 return copy_to_user(buf, &out, sizeof(out)); 321 } 322 default: 323 return -EINVAL; 324 } 325 } 326 327 static inline unsigned long 328 copy_msqid_from_user(struct msqid64_ds *out, void __user *buf, int version) 329 { 330 switch (version) { 331 case IPC_64: 332 if (copy_from_user(out, buf, sizeof(*out))) 333 return -EFAULT; 334 return 0; 335 case IPC_OLD: 336 { 337 struct msqid_ds tbuf_old; 338 339 if (copy_from_user(&tbuf_old, buf, sizeof(tbuf_old))) 340 return -EFAULT; 341 342 out->msg_perm.uid = tbuf_old.msg_perm.uid; 343 out->msg_perm.gid = tbuf_old.msg_perm.gid; 344 out->msg_perm.mode = tbuf_old.msg_perm.mode; 345 346 if (tbuf_old.msg_qbytes == 0) 347 out->msg_qbytes = tbuf_old.msg_lqbytes; 348 else 349 out->msg_qbytes = tbuf_old.msg_qbytes; 350 351 return 0; 352 } 353 default: 354 return -EINVAL; 355 } 356 } 357 358 /* 359 * This function handles some msgctl commands which require the rwsem 360 * to be held in write mode. 361 * NOTE: no locks must be held, the rwsem is taken inside this function. 362 */ 363 static int msgctl_down(struct ipc_namespace *ns, int msqid, int cmd, 364 struct msqid_ds __user *buf, int version) 365 { 366 struct kern_ipc_perm *ipcp; 367 struct msqid64_ds uninitialized_var(msqid64); 368 struct msg_queue *msq; 369 int err; 370 371 if (cmd == IPC_SET) { 372 if (copy_msqid_from_user(&msqid64, buf, version)) 373 return -EFAULT; 374 } 375 376 down_write(&msg_ids(ns).rwsem); 377 rcu_read_lock(); 378 379 ipcp = ipcctl_pre_down_nolock(ns, &msg_ids(ns), msqid, cmd, 380 &msqid64.msg_perm, msqid64.msg_qbytes); 381 if (IS_ERR(ipcp)) { 382 err = PTR_ERR(ipcp); 383 goto out_unlock1; 384 } 385 386 msq = container_of(ipcp, struct msg_queue, q_perm); 387 388 err = security_msg_queue_msgctl(msq, cmd); 389 if (err) 390 goto out_unlock1; 391 392 switch (cmd) { 393 case IPC_RMID: 394 ipc_lock_object(&msq->q_perm); 395 /* freeque unlocks the ipc object and rcu */ 396 freeque(ns, ipcp); 397 goto out_up; 398 case IPC_SET: 399 { 400 DEFINE_WAKE_Q(wake_q); 401 402 if (msqid64.msg_qbytes > ns->msg_ctlmnb && 403 !capable(CAP_SYS_RESOURCE)) { 404 err = -EPERM; 405 goto out_unlock1; 406 } 407 408 ipc_lock_object(&msq->q_perm); 409 err = ipc_update_perm(&msqid64.msg_perm, ipcp); 410 if (err) 411 goto out_unlock0; 412 413 msq->q_qbytes = msqid64.msg_qbytes; 414 415 msq->q_ctime = get_seconds(); 416 /* 417 * Sleeping receivers might be excluded by 418 * stricter permissions. 419 */ 420 expunge_all(msq, -EAGAIN, &wake_q); 421 /* 422 * Sleeping senders might be able to send 423 * due to a larger queue size. 424 */ 425 ss_wakeup(msq, &wake_q, false); 426 ipc_unlock_object(&msq->q_perm); 427 wake_up_q(&wake_q); 428 429 goto out_unlock1; 430 } 431 default: 432 err = -EINVAL; 433 goto out_unlock1; 434 } 435 436 out_unlock0: 437 ipc_unlock_object(&msq->q_perm); 438 out_unlock1: 439 rcu_read_unlock(); 440 out_up: 441 up_write(&msg_ids(ns).rwsem); 442 return err; 443 } 444 445 static int msgctl_nolock(struct ipc_namespace *ns, int msqid, 446 int cmd, int version, void __user *buf) 447 { 448 int err; 449 struct msg_queue *msq; 450 451 switch (cmd) { 452 case IPC_INFO: 453 case MSG_INFO: 454 { 455 struct msginfo msginfo; 456 int max_id; 457 458 if (!buf) 459 return -EFAULT; 460 461 /* 462 * We must not return kernel stack data. 463 * due to padding, it's not enough 464 * to set all member fields. 465 */ 466 err = security_msg_queue_msgctl(NULL, cmd); 467 if (err) 468 return err; 469 470 memset(&msginfo, 0, sizeof(msginfo)); 471 msginfo.msgmni = ns->msg_ctlmni; 472 msginfo.msgmax = ns->msg_ctlmax; 473 msginfo.msgmnb = ns->msg_ctlmnb; 474 msginfo.msgssz = MSGSSZ; 475 msginfo.msgseg = MSGSEG; 476 down_read(&msg_ids(ns).rwsem); 477 if (cmd == MSG_INFO) { 478 msginfo.msgpool = msg_ids(ns).in_use; 479 msginfo.msgmap = atomic_read(&ns->msg_hdrs); 480 msginfo.msgtql = atomic_read(&ns->msg_bytes); 481 } else { 482 msginfo.msgmap = MSGMAP; 483 msginfo.msgpool = MSGPOOL; 484 msginfo.msgtql = MSGTQL; 485 } 486 max_id = ipc_get_maxid(&msg_ids(ns)); 487 up_read(&msg_ids(ns).rwsem); 488 if (copy_to_user(buf, &msginfo, sizeof(struct msginfo))) 489 return -EFAULT; 490 return (max_id < 0) ? 0 : max_id; 491 } 492 493 case MSG_STAT: 494 case IPC_STAT: 495 { 496 struct msqid64_ds tbuf; 497 int success_return; 498 499 if (!buf) 500 return -EFAULT; 501 502 memset(&tbuf, 0, sizeof(tbuf)); 503 504 rcu_read_lock(); 505 if (cmd == MSG_STAT) { 506 msq = msq_obtain_object(ns, msqid); 507 if (IS_ERR(msq)) { 508 err = PTR_ERR(msq); 509 goto out_unlock; 510 } 511 success_return = msq->q_perm.id; 512 } else { 513 msq = msq_obtain_object_check(ns, msqid); 514 if (IS_ERR(msq)) { 515 err = PTR_ERR(msq); 516 goto out_unlock; 517 } 518 success_return = 0; 519 } 520 521 err = -EACCES; 522 if (ipcperms(ns, &msq->q_perm, S_IRUGO)) 523 goto out_unlock; 524 525 err = security_msg_queue_msgctl(msq, cmd); 526 if (err) 527 goto out_unlock; 528 529 kernel_to_ipc64_perm(&msq->q_perm, &tbuf.msg_perm); 530 tbuf.msg_stime = msq->q_stime; 531 tbuf.msg_rtime = msq->q_rtime; 532 tbuf.msg_ctime = msq->q_ctime; 533 tbuf.msg_cbytes = msq->q_cbytes; 534 tbuf.msg_qnum = msq->q_qnum; 535 tbuf.msg_qbytes = msq->q_qbytes; 536 tbuf.msg_lspid = msq->q_lspid; 537 tbuf.msg_lrpid = msq->q_lrpid; 538 rcu_read_unlock(); 539 540 if (copy_msqid_to_user(buf, &tbuf, version)) 541 return -EFAULT; 542 return success_return; 543 } 544 545 default: 546 return -EINVAL; 547 } 548 549 return err; 550 out_unlock: 551 rcu_read_unlock(); 552 return err; 553 } 554 555 SYSCALL_DEFINE3(msgctl, int, msqid, int, cmd, struct msqid_ds __user *, buf) 556 { 557 int version; 558 struct ipc_namespace *ns; 559 560 if (msqid < 0 || cmd < 0) 561 return -EINVAL; 562 563 version = ipc_parse_version(&cmd); 564 ns = current->nsproxy->ipc_ns; 565 566 switch (cmd) { 567 case IPC_INFO: 568 case MSG_INFO: 569 case MSG_STAT: /* msqid is an index rather than a msg queue id */ 570 case IPC_STAT: 571 return msgctl_nolock(ns, msqid, cmd, version, buf); 572 case IPC_SET: 573 case IPC_RMID: 574 return msgctl_down(ns, msqid, cmd, buf, version); 575 default: 576 return -EINVAL; 577 } 578 } 579 580 static int testmsg(struct msg_msg *msg, long type, int mode) 581 { 582 switch (mode) { 583 case SEARCH_ANY: 584 case SEARCH_NUMBER: 585 return 1; 586 case SEARCH_LESSEQUAL: 587 if (msg->m_type <= type) 588 return 1; 589 break; 590 case SEARCH_EQUAL: 591 if (msg->m_type == type) 592 return 1; 593 break; 594 case SEARCH_NOTEQUAL: 595 if (msg->m_type != type) 596 return 1; 597 break; 598 } 599 return 0; 600 } 601 602 static inline int pipelined_send(struct msg_queue *msq, struct msg_msg *msg, 603 struct wake_q_head *wake_q) 604 { 605 struct msg_receiver *msr, *t; 606 607 list_for_each_entry_safe(msr, t, &msq->q_receivers, r_list) { 608 if (testmsg(msg, msr->r_msgtype, msr->r_mode) && 609 !security_msg_queue_msgrcv(msq, msg, msr->r_tsk, 610 msr->r_msgtype, msr->r_mode)) { 611 612 list_del(&msr->r_list); 613 if (msr->r_maxsize < msg->m_ts) { 614 wake_q_add(wake_q, msr->r_tsk); 615 WRITE_ONCE(msr->r_msg, ERR_PTR(-E2BIG)); 616 } else { 617 msq->q_lrpid = task_pid_vnr(msr->r_tsk); 618 msq->q_rtime = get_seconds(); 619 620 wake_q_add(wake_q, msr->r_tsk); 621 WRITE_ONCE(msr->r_msg, msg); 622 return 1; 623 } 624 } 625 } 626 627 return 0; 628 } 629 630 long do_msgsnd(int msqid, long mtype, void __user *mtext, 631 size_t msgsz, int msgflg) 632 { 633 struct msg_queue *msq; 634 struct msg_msg *msg; 635 int err; 636 struct ipc_namespace *ns; 637 DEFINE_WAKE_Q(wake_q); 638 639 ns = current->nsproxy->ipc_ns; 640 641 if (msgsz > ns->msg_ctlmax || (long) msgsz < 0 || msqid < 0) 642 return -EINVAL; 643 if (mtype < 1) 644 return -EINVAL; 645 646 msg = load_msg(mtext, msgsz); 647 if (IS_ERR(msg)) 648 return PTR_ERR(msg); 649 650 msg->m_type = mtype; 651 msg->m_ts = msgsz; 652 653 rcu_read_lock(); 654 msq = msq_obtain_object_check(ns, msqid); 655 if (IS_ERR(msq)) { 656 err = PTR_ERR(msq); 657 goto out_unlock1; 658 } 659 660 ipc_lock_object(&msq->q_perm); 661 662 for (;;) { 663 struct msg_sender s; 664 665 err = -EACCES; 666 if (ipcperms(ns, &msq->q_perm, S_IWUGO)) 667 goto out_unlock0; 668 669 /* raced with RMID? */ 670 if (!ipc_valid_object(&msq->q_perm)) { 671 err = -EIDRM; 672 goto out_unlock0; 673 } 674 675 err = security_msg_queue_msgsnd(msq, msg, msgflg); 676 if (err) 677 goto out_unlock0; 678 679 if (msg_fits_inqueue(msq, msgsz)) 680 break; 681 682 /* queue full, wait: */ 683 if (msgflg & IPC_NOWAIT) { 684 err = -EAGAIN; 685 goto out_unlock0; 686 } 687 688 /* enqueue the sender and prepare to block */ 689 ss_add(msq, &s, msgsz); 690 691 if (!ipc_rcu_getref(msq)) { 692 err = -EIDRM; 693 goto out_unlock0; 694 } 695 696 ipc_unlock_object(&msq->q_perm); 697 rcu_read_unlock(); 698 schedule(); 699 700 rcu_read_lock(); 701 ipc_lock_object(&msq->q_perm); 702 703 ipc_rcu_putref(msq, msg_rcu_free); 704 /* raced with RMID? */ 705 if (!ipc_valid_object(&msq->q_perm)) { 706 err = -EIDRM; 707 goto out_unlock0; 708 } 709 ss_del(&s); 710 711 if (signal_pending(current)) { 712 err = -ERESTARTNOHAND; 713 goto out_unlock0; 714 } 715 716 } 717 718 msq->q_lspid = task_tgid_vnr(current); 719 msq->q_stime = get_seconds(); 720 721 if (!pipelined_send(msq, msg, &wake_q)) { 722 /* no one is waiting for this message, enqueue it */ 723 list_add_tail(&msg->m_list, &msq->q_messages); 724 msq->q_cbytes += msgsz; 725 msq->q_qnum++; 726 atomic_add(msgsz, &ns->msg_bytes); 727 atomic_inc(&ns->msg_hdrs); 728 } 729 730 err = 0; 731 msg = NULL; 732 733 out_unlock0: 734 ipc_unlock_object(&msq->q_perm); 735 wake_up_q(&wake_q); 736 out_unlock1: 737 rcu_read_unlock(); 738 if (msg != NULL) 739 free_msg(msg); 740 return err; 741 } 742 743 SYSCALL_DEFINE4(msgsnd, int, msqid, struct msgbuf __user *, msgp, size_t, msgsz, 744 int, msgflg) 745 { 746 long mtype; 747 748 if (get_user(mtype, &msgp->mtype)) 749 return -EFAULT; 750 return do_msgsnd(msqid, mtype, msgp->mtext, msgsz, msgflg); 751 } 752 753 static inline int convert_mode(long *msgtyp, int msgflg) 754 { 755 if (msgflg & MSG_COPY) 756 return SEARCH_NUMBER; 757 /* 758 * find message of correct type. 759 * msgtyp = 0 => get first. 760 * msgtyp > 0 => get first message of matching type. 761 * msgtyp < 0 => get message with least type must be < abs(msgtype). 762 */ 763 if (*msgtyp == 0) 764 return SEARCH_ANY; 765 if (*msgtyp < 0) { 766 if (*msgtyp == LONG_MIN) /* -LONG_MIN is undefined */ 767 *msgtyp = LONG_MAX; 768 else 769 *msgtyp = -*msgtyp; 770 return SEARCH_LESSEQUAL; 771 } 772 if (msgflg & MSG_EXCEPT) 773 return SEARCH_NOTEQUAL; 774 return SEARCH_EQUAL; 775 } 776 777 static long do_msg_fill(void __user *dest, struct msg_msg *msg, size_t bufsz) 778 { 779 struct msgbuf __user *msgp = dest; 780 size_t msgsz; 781 782 if (put_user(msg->m_type, &msgp->mtype)) 783 return -EFAULT; 784 785 msgsz = (bufsz > msg->m_ts) ? msg->m_ts : bufsz; 786 if (store_msg(msgp->mtext, msg, msgsz)) 787 return -EFAULT; 788 return msgsz; 789 } 790 791 #ifdef CONFIG_CHECKPOINT_RESTORE 792 /* 793 * This function creates new kernel message structure, large enough to store 794 * bufsz message bytes. 795 */ 796 static inline struct msg_msg *prepare_copy(void __user *buf, size_t bufsz) 797 { 798 struct msg_msg *copy; 799 800 /* 801 * Create dummy message to copy real message to. 802 */ 803 copy = load_msg(buf, bufsz); 804 if (!IS_ERR(copy)) 805 copy->m_ts = bufsz; 806 return copy; 807 } 808 809 static inline void free_copy(struct msg_msg *copy) 810 { 811 if (copy) 812 free_msg(copy); 813 } 814 #else 815 static inline struct msg_msg *prepare_copy(void __user *buf, size_t bufsz) 816 { 817 return ERR_PTR(-ENOSYS); 818 } 819 820 static inline void free_copy(struct msg_msg *copy) 821 { 822 } 823 #endif 824 825 static struct msg_msg *find_msg(struct msg_queue *msq, long *msgtyp, int mode) 826 { 827 struct msg_msg *msg, *found = NULL; 828 long count = 0; 829 830 list_for_each_entry(msg, &msq->q_messages, m_list) { 831 if (testmsg(msg, *msgtyp, mode) && 832 !security_msg_queue_msgrcv(msq, msg, current, 833 *msgtyp, mode)) { 834 if (mode == SEARCH_LESSEQUAL && msg->m_type != 1) { 835 *msgtyp = msg->m_type - 1; 836 found = msg; 837 } else if (mode == SEARCH_NUMBER) { 838 if (*msgtyp == count) 839 return msg; 840 } else 841 return msg; 842 count++; 843 } 844 } 845 846 return found ?: ERR_PTR(-EAGAIN); 847 } 848 849 long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, int msgflg, 850 long (*msg_handler)(void __user *, struct msg_msg *, size_t)) 851 { 852 int mode; 853 struct msg_queue *msq; 854 struct ipc_namespace *ns; 855 struct msg_msg *msg, *copy = NULL; 856 DEFINE_WAKE_Q(wake_q); 857 858 ns = current->nsproxy->ipc_ns; 859 860 if (msqid < 0 || (long) bufsz < 0) 861 return -EINVAL; 862 863 if (msgflg & MSG_COPY) { 864 if ((msgflg & MSG_EXCEPT) || !(msgflg & IPC_NOWAIT)) 865 return -EINVAL; 866 copy = prepare_copy(buf, min_t(size_t, bufsz, ns->msg_ctlmax)); 867 if (IS_ERR(copy)) 868 return PTR_ERR(copy); 869 } 870 mode = convert_mode(&msgtyp, msgflg); 871 872 rcu_read_lock(); 873 msq = msq_obtain_object_check(ns, msqid); 874 if (IS_ERR(msq)) { 875 rcu_read_unlock(); 876 free_copy(copy); 877 return PTR_ERR(msq); 878 } 879 880 for (;;) { 881 struct msg_receiver msr_d; 882 883 msg = ERR_PTR(-EACCES); 884 if (ipcperms(ns, &msq->q_perm, S_IRUGO)) 885 goto out_unlock1; 886 887 ipc_lock_object(&msq->q_perm); 888 889 /* raced with RMID? */ 890 if (!ipc_valid_object(&msq->q_perm)) { 891 msg = ERR_PTR(-EIDRM); 892 goto out_unlock0; 893 } 894 895 msg = find_msg(msq, &msgtyp, mode); 896 if (!IS_ERR(msg)) { 897 /* 898 * Found a suitable message. 899 * Unlink it from the queue. 900 */ 901 if ((bufsz < msg->m_ts) && !(msgflg & MSG_NOERROR)) { 902 msg = ERR_PTR(-E2BIG); 903 goto out_unlock0; 904 } 905 /* 906 * If we are copying, then do not unlink message and do 907 * not update queue parameters. 908 */ 909 if (msgflg & MSG_COPY) { 910 msg = copy_msg(msg, copy); 911 goto out_unlock0; 912 } 913 914 list_del(&msg->m_list); 915 msq->q_qnum--; 916 msq->q_rtime = get_seconds(); 917 msq->q_lrpid = task_tgid_vnr(current); 918 msq->q_cbytes -= msg->m_ts; 919 atomic_sub(msg->m_ts, &ns->msg_bytes); 920 atomic_dec(&ns->msg_hdrs); 921 ss_wakeup(msq, &wake_q, false); 922 923 goto out_unlock0; 924 } 925 926 /* No message waiting. Wait for a message */ 927 if (msgflg & IPC_NOWAIT) { 928 msg = ERR_PTR(-ENOMSG); 929 goto out_unlock0; 930 } 931 932 list_add_tail(&msr_d.r_list, &msq->q_receivers); 933 msr_d.r_tsk = current; 934 msr_d.r_msgtype = msgtyp; 935 msr_d.r_mode = mode; 936 if (msgflg & MSG_NOERROR) 937 msr_d.r_maxsize = INT_MAX; 938 else 939 msr_d.r_maxsize = bufsz; 940 msr_d.r_msg = ERR_PTR(-EAGAIN); 941 __set_current_state(TASK_INTERRUPTIBLE); 942 943 ipc_unlock_object(&msq->q_perm); 944 rcu_read_unlock(); 945 schedule(); 946 947 /* 948 * Lockless receive, part 1: 949 * We don't hold a reference to the queue and getting a 950 * reference would defeat the idea of a lockless operation, 951 * thus the code relies on rcu to guarantee the existence of 952 * msq: 953 * Prior to destruction, expunge_all(-EIRDM) changes r_msg. 954 * Thus if r_msg is -EAGAIN, then the queue not yet destroyed. 955 */ 956 rcu_read_lock(); 957 958 /* 959 * Lockless receive, part 2: 960 * The work in pipelined_send() and expunge_all(): 961 * - Set pointer to message 962 * - Queue the receiver task for later wakeup 963 * - Wake up the process after the lock is dropped. 964 * 965 * Should the process wake up before this wakeup (due to a 966 * signal) it will either see the message and continue ... 967 */ 968 msg = READ_ONCE(msr_d.r_msg); 969 if (msg != ERR_PTR(-EAGAIN)) 970 goto out_unlock1; 971 972 /* 973 * ... or see -EAGAIN, acquire the lock to check the message 974 * again. 975 */ 976 ipc_lock_object(&msq->q_perm); 977 978 msg = msr_d.r_msg; 979 if (msg != ERR_PTR(-EAGAIN)) 980 goto out_unlock0; 981 982 list_del(&msr_d.r_list); 983 if (signal_pending(current)) { 984 msg = ERR_PTR(-ERESTARTNOHAND); 985 goto out_unlock0; 986 } 987 988 ipc_unlock_object(&msq->q_perm); 989 } 990 991 out_unlock0: 992 ipc_unlock_object(&msq->q_perm); 993 wake_up_q(&wake_q); 994 out_unlock1: 995 rcu_read_unlock(); 996 if (IS_ERR(msg)) { 997 free_copy(copy); 998 return PTR_ERR(msg); 999 } 1000 1001 bufsz = msg_handler(buf, msg, bufsz); 1002 free_msg(msg); 1003 1004 return bufsz; 1005 } 1006 1007 SYSCALL_DEFINE5(msgrcv, int, msqid, struct msgbuf __user *, msgp, size_t, msgsz, 1008 long, msgtyp, int, msgflg) 1009 { 1010 return do_msgrcv(msqid, msgp, msgsz, msgtyp, msgflg, do_msg_fill); 1011 } 1012 1013 1014 void msg_init_ns(struct ipc_namespace *ns) 1015 { 1016 ns->msg_ctlmax = MSGMAX; 1017 ns->msg_ctlmnb = MSGMNB; 1018 ns->msg_ctlmni = MSGMNI; 1019 1020 atomic_set(&ns->msg_bytes, 0); 1021 atomic_set(&ns->msg_hdrs, 0); 1022 ipc_init_ids(&ns->ids[IPC_MSG_IDS]); 1023 } 1024 1025 #ifdef CONFIG_IPC_NS 1026 void msg_exit_ns(struct ipc_namespace *ns) 1027 { 1028 free_ipcs(ns, &msg_ids(ns), freeque); 1029 idr_destroy(&ns->ids[IPC_MSG_IDS].ipcs_idr); 1030 } 1031 #endif 1032 1033 #ifdef CONFIG_PROC_FS 1034 static int sysvipc_msg_proc_show(struct seq_file *s, void *it) 1035 { 1036 struct user_namespace *user_ns = seq_user_ns(s); 1037 struct msg_queue *msq = it; 1038 1039 seq_printf(s, 1040 "%10d %10d %4o %10lu %10lu %5u %5u %5u %5u %5u %5u %10lu %10lu %10lu\n", 1041 msq->q_perm.key, 1042 msq->q_perm.id, 1043 msq->q_perm.mode, 1044 msq->q_cbytes, 1045 msq->q_qnum, 1046 msq->q_lspid, 1047 msq->q_lrpid, 1048 from_kuid_munged(user_ns, msq->q_perm.uid), 1049 from_kgid_munged(user_ns, msq->q_perm.gid), 1050 from_kuid_munged(user_ns, msq->q_perm.cuid), 1051 from_kgid_munged(user_ns, msq->q_perm.cgid), 1052 msq->q_stime, 1053 msq->q_rtime, 1054 msq->q_ctime); 1055 1056 return 0; 1057 } 1058 #endif 1059 1060 void __init msg_init(void) 1061 { 1062 msg_init_ns(&init_ipc_ns); 1063 1064 ipc_init_proc_interface("sysvipc/msg", 1065 " key msqid perms cbytes qnum lspid lrpid uid gid cuid cgid stime rtime ctime\n", 1066 IPC_MSG_IDS, sysvipc_msg_proc_show); 1067 } 1068