1 /* 2 * linux/ipc/msg.c 3 * Copyright (C) 1992 Krishna Balasubramanian 4 * 5 * Removed all the remaining kerneld mess 6 * Catch the -EFAULT stuff properly 7 * Use GFP_KERNEL for messages as in 1.2 8 * Fixed up the unchecked user space derefs 9 * Copyright (C) 1998 Alan Cox & Andi Kleen 10 * 11 * /proc/sysvipc/msg support (c) 1999 Dragos Acostachioaie <dragos@iname.com> 12 * 13 * mostly rewritten, threaded and wake-one semantics added 14 * MSGMAX limit removed, sysctl's added 15 * (c) 1999 Manfred Spraul <manfred@colorfullife.com> 16 * 17 * support for audit of ipc object properties and permission changes 18 * Dustin Kirkland <dustin.kirkland@us.ibm.com> 19 * 20 * namespaces support 21 * OpenVZ, SWsoft Inc. 22 * Pavel Emelianov <xemul@openvz.org> 23 */ 24 25 #include <linux/capability.h> 26 #include <linux/msg.h> 27 #include <linux/spinlock.h> 28 #include <linux/init.h> 29 #include <linux/mm.h> 30 #include <linux/proc_fs.h> 31 #include <linux/list.h> 32 #include <linux/security.h> 33 #include <linux/sched/wake_q.h> 34 #include <linux/syscalls.h> 35 #include <linux/audit.h> 36 #include <linux/seq_file.h> 37 #include <linux/rwsem.h> 38 #include <linux/nsproxy.h> 39 #include <linux/ipc_namespace.h> 40 41 #include <asm/current.h> 42 #include <linux/uaccess.h> 43 #include "util.h" 44 45 /* one msg_receiver structure for each sleeping receiver */ 46 struct msg_receiver { 47 struct list_head r_list; 48 struct task_struct *r_tsk; 49 50 int r_mode; 51 long r_msgtype; 52 long r_maxsize; 53 54 struct msg_msg *r_msg; 55 }; 56 57 /* one msg_sender for each sleeping sender */ 58 struct msg_sender { 59 struct list_head list; 60 struct task_struct *tsk; 61 size_t msgsz; 62 }; 63 64 #define SEARCH_ANY 1 65 #define SEARCH_EQUAL 2 66 #define SEARCH_NOTEQUAL 3 67 #define SEARCH_LESSEQUAL 4 68 #define SEARCH_NUMBER 5 69 70 #define msg_ids(ns) ((ns)->ids[IPC_MSG_IDS]) 71 72 static inline struct msg_queue *msq_obtain_object(struct ipc_namespace *ns, int id) 73 { 74 struct kern_ipc_perm *ipcp = ipc_obtain_object_idr(&msg_ids(ns), id); 75 76 if (IS_ERR(ipcp)) 77 return ERR_CAST(ipcp); 78 79 return container_of(ipcp, struct msg_queue, q_perm); 80 } 81 82 static inline struct msg_queue *msq_obtain_object_check(struct ipc_namespace *ns, 83 int id) 84 { 85 struct kern_ipc_perm *ipcp = ipc_obtain_object_check(&msg_ids(ns), id); 86 87 if (IS_ERR(ipcp)) 88 return ERR_CAST(ipcp); 89 90 return container_of(ipcp, struct msg_queue, q_perm); 91 } 92 93 static inline void msg_rmid(struct ipc_namespace *ns, struct msg_queue *s) 94 { 95 ipc_rmid(&msg_ids(ns), &s->q_perm); 96 } 97 98 static void msg_rcu_free(struct rcu_head *head) 99 { 100 struct kern_ipc_perm *p = container_of(head, struct kern_ipc_perm, rcu); 101 struct msg_queue *msq = container_of(p, struct msg_queue, q_perm); 102 103 security_msg_queue_free(msq); 104 kvfree(msq); 105 } 106 107 /** 108 * newque - Create a new msg queue 109 * @ns: namespace 110 * @params: ptr to the structure that contains the key and msgflg 111 * 112 * Called with msg_ids.rwsem held (writer) 113 */ 114 static int newque(struct ipc_namespace *ns, struct ipc_params *params) 115 { 116 struct msg_queue *msq; 117 int retval; 118 key_t key = params->key; 119 int msgflg = params->flg; 120 121 msq = kvmalloc(sizeof(*msq), GFP_KERNEL); 122 if (unlikely(!msq)) 123 return -ENOMEM; 124 125 msq->q_perm.mode = msgflg & S_IRWXUGO; 126 msq->q_perm.key = key; 127 128 msq->q_perm.security = NULL; 129 retval = security_msg_queue_alloc(msq); 130 if (retval) { 131 kvfree(msq); 132 return retval; 133 } 134 135 msq->q_stime = msq->q_rtime = 0; 136 msq->q_ctime = ktime_get_real_seconds(); 137 msq->q_cbytes = msq->q_qnum = 0; 138 msq->q_qbytes = ns->msg_ctlmnb; 139 msq->q_lspid = msq->q_lrpid = 0; 140 INIT_LIST_HEAD(&msq->q_messages); 141 INIT_LIST_HEAD(&msq->q_receivers); 142 INIT_LIST_HEAD(&msq->q_senders); 143 144 /* ipc_addid() locks msq upon success. */ 145 retval = ipc_addid(&msg_ids(ns), &msq->q_perm, ns->msg_ctlmni); 146 if (retval < 0) { 147 call_rcu(&msq->q_perm.rcu, msg_rcu_free); 148 return retval; 149 } 150 151 ipc_unlock_object(&msq->q_perm); 152 rcu_read_unlock(); 153 154 return msq->q_perm.id; 155 } 156 157 static inline bool msg_fits_inqueue(struct msg_queue *msq, size_t msgsz) 158 { 159 return msgsz + msq->q_cbytes <= msq->q_qbytes && 160 1 + msq->q_qnum <= msq->q_qbytes; 161 } 162 163 static inline void ss_add(struct msg_queue *msq, 164 struct msg_sender *mss, size_t msgsz) 165 { 166 mss->tsk = current; 167 mss->msgsz = msgsz; 168 __set_current_state(TASK_INTERRUPTIBLE); 169 list_add_tail(&mss->list, &msq->q_senders); 170 } 171 172 static inline void ss_del(struct msg_sender *mss) 173 { 174 if (mss->list.next) 175 list_del(&mss->list); 176 } 177 178 static void ss_wakeup(struct msg_queue *msq, 179 struct wake_q_head *wake_q, bool kill) 180 { 181 struct msg_sender *mss, *t; 182 struct task_struct *stop_tsk = NULL; 183 struct list_head *h = &msq->q_senders; 184 185 list_for_each_entry_safe(mss, t, h, list) { 186 if (kill) 187 mss->list.next = NULL; 188 189 /* 190 * Stop at the first task we don't wakeup, 191 * we've already iterated the original 192 * sender queue. 193 */ 194 else if (stop_tsk == mss->tsk) 195 break; 196 /* 197 * We are not in an EIDRM scenario here, therefore 198 * verify that we really need to wakeup the task. 199 * To maintain current semantics and wakeup order, 200 * move the sender to the tail on behalf of the 201 * blocked task. 202 */ 203 else if (!msg_fits_inqueue(msq, mss->msgsz)) { 204 if (!stop_tsk) 205 stop_tsk = mss->tsk; 206 207 list_move_tail(&mss->list, &msq->q_senders); 208 continue; 209 } 210 211 wake_q_add(wake_q, mss->tsk); 212 } 213 } 214 215 static void expunge_all(struct msg_queue *msq, int res, 216 struct wake_q_head *wake_q) 217 { 218 struct msg_receiver *msr, *t; 219 220 list_for_each_entry_safe(msr, t, &msq->q_receivers, r_list) { 221 wake_q_add(wake_q, msr->r_tsk); 222 WRITE_ONCE(msr->r_msg, ERR_PTR(res)); 223 } 224 } 225 226 /* 227 * freeque() wakes up waiters on the sender and receiver waiting queue, 228 * removes the message queue from message queue ID IDR, and cleans up all the 229 * messages associated with this queue. 230 * 231 * msg_ids.rwsem (writer) and the spinlock for this message queue are held 232 * before freeque() is called. msg_ids.rwsem remains locked on exit. 233 */ 234 static void freeque(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) 235 { 236 struct msg_msg *msg, *t; 237 struct msg_queue *msq = container_of(ipcp, struct msg_queue, q_perm); 238 DEFINE_WAKE_Q(wake_q); 239 240 expunge_all(msq, -EIDRM, &wake_q); 241 ss_wakeup(msq, &wake_q, true); 242 msg_rmid(ns, msq); 243 ipc_unlock_object(&msq->q_perm); 244 wake_up_q(&wake_q); 245 rcu_read_unlock(); 246 247 list_for_each_entry_safe(msg, t, &msq->q_messages, m_list) { 248 atomic_dec(&ns->msg_hdrs); 249 free_msg(msg); 250 } 251 atomic_sub(msq->q_cbytes, &ns->msg_bytes); 252 ipc_rcu_putref(&msq->q_perm, msg_rcu_free); 253 } 254 255 /* 256 * Called with msg_ids.rwsem and ipcp locked. 257 */ 258 static inline int msg_security(struct kern_ipc_perm *ipcp, int msgflg) 259 { 260 struct msg_queue *msq = container_of(ipcp, struct msg_queue, q_perm); 261 262 return security_msg_queue_associate(msq, msgflg); 263 } 264 265 SYSCALL_DEFINE2(msgget, key_t, key, int, msgflg) 266 { 267 struct ipc_namespace *ns; 268 static const struct ipc_ops msg_ops = { 269 .getnew = newque, 270 .associate = msg_security, 271 }; 272 struct ipc_params msg_params; 273 274 ns = current->nsproxy->ipc_ns; 275 276 msg_params.key = key; 277 msg_params.flg = msgflg; 278 279 return ipcget(ns, &msg_ids(ns), &msg_ops, &msg_params); 280 } 281 282 static inline unsigned long 283 copy_msqid_to_user(void __user *buf, struct msqid64_ds *in, int version) 284 { 285 switch (version) { 286 case IPC_64: 287 return copy_to_user(buf, in, sizeof(*in)); 288 case IPC_OLD: 289 { 290 struct msqid_ds out; 291 292 memset(&out, 0, sizeof(out)); 293 294 ipc64_perm_to_ipc_perm(&in->msg_perm, &out.msg_perm); 295 296 out.msg_stime = in->msg_stime; 297 out.msg_rtime = in->msg_rtime; 298 out.msg_ctime = in->msg_ctime; 299 300 if (in->msg_cbytes > USHRT_MAX) 301 out.msg_cbytes = USHRT_MAX; 302 else 303 out.msg_cbytes = in->msg_cbytes; 304 out.msg_lcbytes = in->msg_cbytes; 305 306 if (in->msg_qnum > USHRT_MAX) 307 out.msg_qnum = USHRT_MAX; 308 else 309 out.msg_qnum = in->msg_qnum; 310 311 if (in->msg_qbytes > USHRT_MAX) 312 out.msg_qbytes = USHRT_MAX; 313 else 314 out.msg_qbytes = in->msg_qbytes; 315 out.msg_lqbytes = in->msg_qbytes; 316 317 out.msg_lspid = in->msg_lspid; 318 out.msg_lrpid = in->msg_lrpid; 319 320 return copy_to_user(buf, &out, sizeof(out)); 321 } 322 default: 323 return -EINVAL; 324 } 325 } 326 327 static inline unsigned long 328 copy_msqid_from_user(struct msqid64_ds *out, void __user *buf, int version) 329 { 330 switch (version) { 331 case IPC_64: 332 if (copy_from_user(out, buf, sizeof(*out))) 333 return -EFAULT; 334 return 0; 335 case IPC_OLD: 336 { 337 struct msqid_ds tbuf_old; 338 339 if (copy_from_user(&tbuf_old, buf, sizeof(tbuf_old))) 340 return -EFAULT; 341 342 out->msg_perm.uid = tbuf_old.msg_perm.uid; 343 out->msg_perm.gid = tbuf_old.msg_perm.gid; 344 out->msg_perm.mode = tbuf_old.msg_perm.mode; 345 346 if (tbuf_old.msg_qbytes == 0) 347 out->msg_qbytes = tbuf_old.msg_lqbytes; 348 else 349 out->msg_qbytes = tbuf_old.msg_qbytes; 350 351 return 0; 352 } 353 default: 354 return -EINVAL; 355 } 356 } 357 358 /* 359 * This function handles some msgctl commands which require the rwsem 360 * to be held in write mode. 361 * NOTE: no locks must be held, the rwsem is taken inside this function. 362 */ 363 static int msgctl_down(struct ipc_namespace *ns, int msqid, int cmd, 364 struct msqid64_ds *msqid64) 365 { 366 struct kern_ipc_perm *ipcp; 367 struct msg_queue *msq; 368 int err; 369 370 down_write(&msg_ids(ns).rwsem); 371 rcu_read_lock(); 372 373 ipcp = ipcctl_pre_down_nolock(ns, &msg_ids(ns), msqid, cmd, 374 &msqid64->msg_perm, msqid64->msg_qbytes); 375 if (IS_ERR(ipcp)) { 376 err = PTR_ERR(ipcp); 377 goto out_unlock1; 378 } 379 380 msq = container_of(ipcp, struct msg_queue, q_perm); 381 382 err = security_msg_queue_msgctl(msq, cmd); 383 if (err) 384 goto out_unlock1; 385 386 switch (cmd) { 387 case IPC_RMID: 388 ipc_lock_object(&msq->q_perm); 389 /* freeque unlocks the ipc object and rcu */ 390 freeque(ns, ipcp); 391 goto out_up; 392 case IPC_SET: 393 { 394 DEFINE_WAKE_Q(wake_q); 395 396 if (msqid64->msg_qbytes > ns->msg_ctlmnb && 397 !capable(CAP_SYS_RESOURCE)) { 398 err = -EPERM; 399 goto out_unlock1; 400 } 401 402 ipc_lock_object(&msq->q_perm); 403 err = ipc_update_perm(&msqid64->msg_perm, ipcp); 404 if (err) 405 goto out_unlock0; 406 407 msq->q_qbytes = msqid64->msg_qbytes; 408 409 msq->q_ctime = ktime_get_real_seconds(); 410 /* 411 * Sleeping receivers might be excluded by 412 * stricter permissions. 413 */ 414 expunge_all(msq, -EAGAIN, &wake_q); 415 /* 416 * Sleeping senders might be able to send 417 * due to a larger queue size. 418 */ 419 ss_wakeup(msq, &wake_q, false); 420 ipc_unlock_object(&msq->q_perm); 421 wake_up_q(&wake_q); 422 423 goto out_unlock1; 424 } 425 default: 426 err = -EINVAL; 427 goto out_unlock1; 428 } 429 430 out_unlock0: 431 ipc_unlock_object(&msq->q_perm); 432 out_unlock1: 433 rcu_read_unlock(); 434 out_up: 435 up_write(&msg_ids(ns).rwsem); 436 return err; 437 } 438 439 static int msgctl_info(struct ipc_namespace *ns, int msqid, 440 int cmd, struct msginfo *msginfo) 441 { 442 int err; 443 int max_id; 444 445 /* 446 * We must not return kernel stack data. 447 * due to padding, it's not enough 448 * to set all member fields. 449 */ 450 err = security_msg_queue_msgctl(NULL, cmd); 451 if (err) 452 return err; 453 454 memset(msginfo, 0, sizeof(*msginfo)); 455 msginfo->msgmni = ns->msg_ctlmni; 456 msginfo->msgmax = ns->msg_ctlmax; 457 msginfo->msgmnb = ns->msg_ctlmnb; 458 msginfo->msgssz = MSGSSZ; 459 msginfo->msgseg = MSGSEG; 460 down_read(&msg_ids(ns).rwsem); 461 if (cmd == MSG_INFO) { 462 msginfo->msgpool = msg_ids(ns).in_use; 463 msginfo->msgmap = atomic_read(&ns->msg_hdrs); 464 msginfo->msgtql = atomic_read(&ns->msg_bytes); 465 } else { 466 msginfo->msgmap = MSGMAP; 467 msginfo->msgpool = MSGPOOL; 468 msginfo->msgtql = MSGTQL; 469 } 470 max_id = ipc_get_maxid(&msg_ids(ns)); 471 up_read(&msg_ids(ns).rwsem); 472 return (max_id < 0) ? 0 : max_id; 473 } 474 475 static int msgctl_stat(struct ipc_namespace *ns, int msqid, 476 int cmd, struct msqid64_ds *p) 477 { 478 int err; 479 struct msg_queue *msq; 480 int success_return; 481 482 memset(p, 0, sizeof(*p)); 483 484 rcu_read_lock(); 485 if (cmd == MSG_STAT) { 486 msq = msq_obtain_object(ns, msqid); 487 if (IS_ERR(msq)) { 488 err = PTR_ERR(msq); 489 goto out_unlock; 490 } 491 success_return = msq->q_perm.id; 492 } else { 493 msq = msq_obtain_object_check(ns, msqid); 494 if (IS_ERR(msq)) { 495 err = PTR_ERR(msq); 496 goto out_unlock; 497 } 498 success_return = 0; 499 } 500 501 err = -EACCES; 502 if (ipcperms(ns, &msq->q_perm, S_IRUGO)) 503 goto out_unlock; 504 505 err = security_msg_queue_msgctl(msq, cmd); 506 if (err) 507 goto out_unlock; 508 509 kernel_to_ipc64_perm(&msq->q_perm, &p->msg_perm); 510 p->msg_stime = msq->q_stime; 511 p->msg_rtime = msq->q_rtime; 512 p->msg_ctime = msq->q_ctime; 513 p->msg_cbytes = msq->q_cbytes; 514 p->msg_qnum = msq->q_qnum; 515 p->msg_qbytes = msq->q_qbytes; 516 p->msg_lspid = msq->q_lspid; 517 p->msg_lrpid = msq->q_lrpid; 518 rcu_read_unlock(); 519 520 return success_return; 521 522 out_unlock: 523 rcu_read_unlock(); 524 return err; 525 } 526 527 SYSCALL_DEFINE3(msgctl, int, msqid, int, cmd, struct msqid_ds __user *, buf) 528 { 529 int version; 530 struct ipc_namespace *ns; 531 struct msqid64_ds msqid64; 532 int err; 533 534 if (msqid < 0 || cmd < 0) 535 return -EINVAL; 536 537 version = ipc_parse_version(&cmd); 538 ns = current->nsproxy->ipc_ns; 539 540 switch (cmd) { 541 case IPC_INFO: 542 case MSG_INFO: { 543 struct msginfo msginfo; 544 err = msgctl_info(ns, msqid, cmd, &msginfo); 545 if (err < 0) 546 return err; 547 if (copy_to_user(buf, &msginfo, sizeof(struct msginfo))) 548 err = -EFAULT; 549 return err; 550 } 551 case MSG_STAT: /* msqid is an index rather than a msg queue id */ 552 case IPC_STAT: 553 err = msgctl_stat(ns, msqid, cmd, &msqid64); 554 if (err < 0) 555 return err; 556 if (copy_msqid_to_user(buf, &msqid64, version)) 557 err = -EFAULT; 558 return err; 559 case IPC_SET: 560 if (copy_msqid_from_user(&msqid64, buf, version)) 561 return -EFAULT; 562 /* fallthru */ 563 case IPC_RMID: 564 return msgctl_down(ns, msqid, cmd, &msqid64); 565 default: 566 return -EINVAL; 567 } 568 } 569 570 #ifdef CONFIG_COMPAT 571 572 struct compat_msqid_ds { 573 struct compat_ipc_perm msg_perm; 574 compat_uptr_t msg_first; 575 compat_uptr_t msg_last; 576 compat_time_t msg_stime; 577 compat_time_t msg_rtime; 578 compat_time_t msg_ctime; 579 compat_ulong_t msg_lcbytes; 580 compat_ulong_t msg_lqbytes; 581 unsigned short msg_cbytes; 582 unsigned short msg_qnum; 583 unsigned short msg_qbytes; 584 compat_ipc_pid_t msg_lspid; 585 compat_ipc_pid_t msg_lrpid; 586 }; 587 588 static int copy_compat_msqid_from_user(struct msqid64_ds *out, void __user *buf, 589 int version) 590 { 591 memset(out, 0, sizeof(*out)); 592 if (version == IPC_64) { 593 struct compat_msqid64_ds *p = buf; 594 if (get_compat_ipc64_perm(&out->msg_perm, &p->msg_perm)) 595 return -EFAULT; 596 if (get_user(out->msg_qbytes, &p->msg_qbytes)) 597 return -EFAULT; 598 } else { 599 struct compat_msqid_ds *p = buf; 600 if (get_compat_ipc_perm(&out->msg_perm, &p->msg_perm)) 601 return -EFAULT; 602 if (get_user(out->msg_qbytes, &p->msg_qbytes)) 603 return -EFAULT; 604 } 605 return 0; 606 } 607 608 static int copy_compat_msqid_to_user(void __user *buf, struct msqid64_ds *in, 609 int version) 610 { 611 if (version == IPC_64) { 612 struct compat_msqid64_ds v; 613 memset(&v, 0, sizeof(v)); 614 to_compat_ipc64_perm(&v.msg_perm, &in->msg_perm); 615 v.msg_stime = in->msg_stime; 616 v.msg_rtime = in->msg_rtime; 617 v.msg_ctime = in->msg_ctime; 618 v.msg_cbytes = in->msg_cbytes; 619 v.msg_qnum = in->msg_qnum; 620 v.msg_qbytes = in->msg_qbytes; 621 v.msg_lspid = in->msg_lspid; 622 v.msg_lrpid = in->msg_lrpid; 623 return copy_to_user(buf, &v, sizeof(v)); 624 } else { 625 struct compat_msqid_ds v; 626 memset(&v, 0, sizeof(v)); 627 to_compat_ipc_perm(&v.msg_perm, &in->msg_perm); 628 v.msg_stime = in->msg_stime; 629 v.msg_rtime = in->msg_rtime; 630 v.msg_ctime = in->msg_ctime; 631 v.msg_cbytes = in->msg_cbytes; 632 v.msg_qnum = in->msg_qnum; 633 v.msg_qbytes = in->msg_qbytes; 634 v.msg_lspid = in->msg_lspid; 635 v.msg_lrpid = in->msg_lrpid; 636 return copy_to_user(buf, &v, sizeof(v)); 637 } 638 } 639 640 COMPAT_SYSCALL_DEFINE3(msgctl, int, msqid, int, cmd, void __user *, uptr) 641 { 642 struct ipc_namespace *ns; 643 int err; 644 struct msqid64_ds msqid64; 645 int version = compat_ipc_parse_version(&cmd); 646 647 ns = current->nsproxy->ipc_ns; 648 649 if (msqid < 0 || cmd < 0) 650 return -EINVAL; 651 652 switch (cmd & (~IPC_64)) { 653 case IPC_INFO: 654 case MSG_INFO: { 655 struct msginfo msginfo; 656 err = msgctl_info(ns, msqid, cmd, &msginfo); 657 if (err < 0) 658 return err; 659 if (copy_to_user(uptr, &msginfo, sizeof(struct msginfo))) 660 err = -EFAULT; 661 return err; 662 } 663 case IPC_STAT: 664 case MSG_STAT: 665 err = msgctl_stat(ns, msqid, cmd, &msqid64); 666 if (err < 0) 667 return err; 668 if (copy_compat_msqid_to_user(uptr, &msqid64, version)) 669 err = -EFAULT; 670 return err; 671 case IPC_SET: 672 if (copy_compat_msqid_from_user(&msqid64, uptr, version)) 673 return -EFAULT; 674 /* fallthru */ 675 case IPC_RMID: 676 return msgctl_down(ns, msqid, cmd, &msqid64); 677 default: 678 return -EINVAL; 679 } 680 } 681 #endif 682 683 static int testmsg(struct msg_msg *msg, long type, int mode) 684 { 685 switch (mode) { 686 case SEARCH_ANY: 687 case SEARCH_NUMBER: 688 return 1; 689 case SEARCH_LESSEQUAL: 690 if (msg->m_type <= type) 691 return 1; 692 break; 693 case SEARCH_EQUAL: 694 if (msg->m_type == type) 695 return 1; 696 break; 697 case SEARCH_NOTEQUAL: 698 if (msg->m_type != type) 699 return 1; 700 break; 701 } 702 return 0; 703 } 704 705 static inline int pipelined_send(struct msg_queue *msq, struct msg_msg *msg, 706 struct wake_q_head *wake_q) 707 { 708 struct msg_receiver *msr, *t; 709 710 list_for_each_entry_safe(msr, t, &msq->q_receivers, r_list) { 711 if (testmsg(msg, msr->r_msgtype, msr->r_mode) && 712 !security_msg_queue_msgrcv(msq, msg, msr->r_tsk, 713 msr->r_msgtype, msr->r_mode)) { 714 715 list_del(&msr->r_list); 716 if (msr->r_maxsize < msg->m_ts) { 717 wake_q_add(wake_q, msr->r_tsk); 718 WRITE_ONCE(msr->r_msg, ERR_PTR(-E2BIG)); 719 } else { 720 msq->q_lrpid = task_pid_vnr(msr->r_tsk); 721 msq->q_rtime = get_seconds(); 722 723 wake_q_add(wake_q, msr->r_tsk); 724 WRITE_ONCE(msr->r_msg, msg); 725 return 1; 726 } 727 } 728 } 729 730 return 0; 731 } 732 733 static long do_msgsnd(int msqid, long mtype, void __user *mtext, 734 size_t msgsz, int msgflg) 735 { 736 struct msg_queue *msq; 737 struct msg_msg *msg; 738 int err; 739 struct ipc_namespace *ns; 740 DEFINE_WAKE_Q(wake_q); 741 742 ns = current->nsproxy->ipc_ns; 743 744 if (msgsz > ns->msg_ctlmax || (long) msgsz < 0 || msqid < 0) 745 return -EINVAL; 746 if (mtype < 1) 747 return -EINVAL; 748 749 msg = load_msg(mtext, msgsz); 750 if (IS_ERR(msg)) 751 return PTR_ERR(msg); 752 753 msg->m_type = mtype; 754 msg->m_ts = msgsz; 755 756 rcu_read_lock(); 757 msq = msq_obtain_object_check(ns, msqid); 758 if (IS_ERR(msq)) { 759 err = PTR_ERR(msq); 760 goto out_unlock1; 761 } 762 763 ipc_lock_object(&msq->q_perm); 764 765 for (;;) { 766 struct msg_sender s; 767 768 err = -EACCES; 769 if (ipcperms(ns, &msq->q_perm, S_IWUGO)) 770 goto out_unlock0; 771 772 /* raced with RMID? */ 773 if (!ipc_valid_object(&msq->q_perm)) { 774 err = -EIDRM; 775 goto out_unlock0; 776 } 777 778 err = security_msg_queue_msgsnd(msq, msg, msgflg); 779 if (err) 780 goto out_unlock0; 781 782 if (msg_fits_inqueue(msq, msgsz)) 783 break; 784 785 /* queue full, wait: */ 786 if (msgflg & IPC_NOWAIT) { 787 err = -EAGAIN; 788 goto out_unlock0; 789 } 790 791 /* enqueue the sender and prepare to block */ 792 ss_add(msq, &s, msgsz); 793 794 if (!ipc_rcu_getref(&msq->q_perm)) { 795 err = -EIDRM; 796 goto out_unlock0; 797 } 798 799 ipc_unlock_object(&msq->q_perm); 800 rcu_read_unlock(); 801 schedule(); 802 803 rcu_read_lock(); 804 ipc_lock_object(&msq->q_perm); 805 806 ipc_rcu_putref(&msq->q_perm, msg_rcu_free); 807 /* raced with RMID? */ 808 if (!ipc_valid_object(&msq->q_perm)) { 809 err = -EIDRM; 810 goto out_unlock0; 811 } 812 ss_del(&s); 813 814 if (signal_pending(current)) { 815 err = -ERESTARTNOHAND; 816 goto out_unlock0; 817 } 818 819 } 820 821 msq->q_lspid = task_tgid_vnr(current); 822 msq->q_stime = get_seconds(); 823 824 if (!pipelined_send(msq, msg, &wake_q)) { 825 /* no one is waiting for this message, enqueue it */ 826 list_add_tail(&msg->m_list, &msq->q_messages); 827 msq->q_cbytes += msgsz; 828 msq->q_qnum++; 829 atomic_add(msgsz, &ns->msg_bytes); 830 atomic_inc(&ns->msg_hdrs); 831 } 832 833 err = 0; 834 msg = NULL; 835 836 out_unlock0: 837 ipc_unlock_object(&msq->q_perm); 838 wake_up_q(&wake_q); 839 out_unlock1: 840 rcu_read_unlock(); 841 if (msg != NULL) 842 free_msg(msg); 843 return err; 844 } 845 846 SYSCALL_DEFINE4(msgsnd, int, msqid, struct msgbuf __user *, msgp, size_t, msgsz, 847 int, msgflg) 848 { 849 long mtype; 850 851 if (get_user(mtype, &msgp->mtype)) 852 return -EFAULT; 853 return do_msgsnd(msqid, mtype, msgp->mtext, msgsz, msgflg); 854 } 855 856 #ifdef CONFIG_COMPAT 857 858 struct compat_msgbuf { 859 compat_long_t mtype; 860 char mtext[1]; 861 }; 862 863 COMPAT_SYSCALL_DEFINE4(msgsnd, int, msqid, compat_uptr_t, msgp, 864 compat_ssize_t, msgsz, int, msgflg) 865 { 866 struct compat_msgbuf __user *up = compat_ptr(msgp); 867 compat_long_t mtype; 868 869 if (get_user(mtype, &up->mtype)) 870 return -EFAULT; 871 return do_msgsnd(msqid, mtype, up->mtext, (ssize_t)msgsz, msgflg); 872 } 873 #endif 874 875 static inline int convert_mode(long *msgtyp, int msgflg) 876 { 877 if (msgflg & MSG_COPY) 878 return SEARCH_NUMBER; 879 /* 880 * find message of correct type. 881 * msgtyp = 0 => get first. 882 * msgtyp > 0 => get first message of matching type. 883 * msgtyp < 0 => get message with least type must be < abs(msgtype). 884 */ 885 if (*msgtyp == 0) 886 return SEARCH_ANY; 887 if (*msgtyp < 0) { 888 if (*msgtyp == LONG_MIN) /* -LONG_MIN is undefined */ 889 *msgtyp = LONG_MAX; 890 else 891 *msgtyp = -*msgtyp; 892 return SEARCH_LESSEQUAL; 893 } 894 if (msgflg & MSG_EXCEPT) 895 return SEARCH_NOTEQUAL; 896 return SEARCH_EQUAL; 897 } 898 899 static long do_msg_fill(void __user *dest, struct msg_msg *msg, size_t bufsz) 900 { 901 struct msgbuf __user *msgp = dest; 902 size_t msgsz; 903 904 if (put_user(msg->m_type, &msgp->mtype)) 905 return -EFAULT; 906 907 msgsz = (bufsz > msg->m_ts) ? msg->m_ts : bufsz; 908 if (store_msg(msgp->mtext, msg, msgsz)) 909 return -EFAULT; 910 return msgsz; 911 } 912 913 #ifdef CONFIG_CHECKPOINT_RESTORE 914 /* 915 * This function creates new kernel message structure, large enough to store 916 * bufsz message bytes. 917 */ 918 static inline struct msg_msg *prepare_copy(void __user *buf, size_t bufsz) 919 { 920 struct msg_msg *copy; 921 922 /* 923 * Create dummy message to copy real message to. 924 */ 925 copy = load_msg(buf, bufsz); 926 if (!IS_ERR(copy)) 927 copy->m_ts = bufsz; 928 return copy; 929 } 930 931 static inline void free_copy(struct msg_msg *copy) 932 { 933 if (copy) 934 free_msg(copy); 935 } 936 #else 937 static inline struct msg_msg *prepare_copy(void __user *buf, size_t bufsz) 938 { 939 return ERR_PTR(-ENOSYS); 940 } 941 942 static inline void free_copy(struct msg_msg *copy) 943 { 944 } 945 #endif 946 947 static struct msg_msg *find_msg(struct msg_queue *msq, long *msgtyp, int mode) 948 { 949 struct msg_msg *msg, *found = NULL; 950 long count = 0; 951 952 list_for_each_entry(msg, &msq->q_messages, m_list) { 953 if (testmsg(msg, *msgtyp, mode) && 954 !security_msg_queue_msgrcv(msq, msg, current, 955 *msgtyp, mode)) { 956 if (mode == SEARCH_LESSEQUAL && msg->m_type != 1) { 957 *msgtyp = msg->m_type - 1; 958 found = msg; 959 } else if (mode == SEARCH_NUMBER) { 960 if (*msgtyp == count) 961 return msg; 962 } else 963 return msg; 964 count++; 965 } 966 } 967 968 return found ?: ERR_PTR(-EAGAIN); 969 } 970 971 static long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, int msgflg, 972 long (*msg_handler)(void __user *, struct msg_msg *, size_t)) 973 { 974 int mode; 975 struct msg_queue *msq; 976 struct ipc_namespace *ns; 977 struct msg_msg *msg, *copy = NULL; 978 DEFINE_WAKE_Q(wake_q); 979 980 ns = current->nsproxy->ipc_ns; 981 982 if (msqid < 0 || (long) bufsz < 0) 983 return -EINVAL; 984 985 if (msgflg & MSG_COPY) { 986 if ((msgflg & MSG_EXCEPT) || !(msgflg & IPC_NOWAIT)) 987 return -EINVAL; 988 copy = prepare_copy(buf, min_t(size_t, bufsz, ns->msg_ctlmax)); 989 if (IS_ERR(copy)) 990 return PTR_ERR(copy); 991 } 992 mode = convert_mode(&msgtyp, msgflg); 993 994 rcu_read_lock(); 995 msq = msq_obtain_object_check(ns, msqid); 996 if (IS_ERR(msq)) { 997 rcu_read_unlock(); 998 free_copy(copy); 999 return PTR_ERR(msq); 1000 } 1001 1002 for (;;) { 1003 struct msg_receiver msr_d; 1004 1005 msg = ERR_PTR(-EACCES); 1006 if (ipcperms(ns, &msq->q_perm, S_IRUGO)) 1007 goto out_unlock1; 1008 1009 ipc_lock_object(&msq->q_perm); 1010 1011 /* raced with RMID? */ 1012 if (!ipc_valid_object(&msq->q_perm)) { 1013 msg = ERR_PTR(-EIDRM); 1014 goto out_unlock0; 1015 } 1016 1017 msg = find_msg(msq, &msgtyp, mode); 1018 if (!IS_ERR(msg)) { 1019 /* 1020 * Found a suitable message. 1021 * Unlink it from the queue. 1022 */ 1023 if ((bufsz < msg->m_ts) && !(msgflg & MSG_NOERROR)) { 1024 msg = ERR_PTR(-E2BIG); 1025 goto out_unlock0; 1026 } 1027 /* 1028 * If we are copying, then do not unlink message and do 1029 * not update queue parameters. 1030 */ 1031 if (msgflg & MSG_COPY) { 1032 msg = copy_msg(msg, copy); 1033 goto out_unlock0; 1034 } 1035 1036 list_del(&msg->m_list); 1037 msq->q_qnum--; 1038 msq->q_rtime = get_seconds(); 1039 msq->q_lrpid = task_tgid_vnr(current); 1040 msq->q_cbytes -= msg->m_ts; 1041 atomic_sub(msg->m_ts, &ns->msg_bytes); 1042 atomic_dec(&ns->msg_hdrs); 1043 ss_wakeup(msq, &wake_q, false); 1044 1045 goto out_unlock0; 1046 } 1047 1048 /* No message waiting. Wait for a message */ 1049 if (msgflg & IPC_NOWAIT) { 1050 msg = ERR_PTR(-ENOMSG); 1051 goto out_unlock0; 1052 } 1053 1054 list_add_tail(&msr_d.r_list, &msq->q_receivers); 1055 msr_d.r_tsk = current; 1056 msr_d.r_msgtype = msgtyp; 1057 msr_d.r_mode = mode; 1058 if (msgflg & MSG_NOERROR) 1059 msr_d.r_maxsize = INT_MAX; 1060 else 1061 msr_d.r_maxsize = bufsz; 1062 msr_d.r_msg = ERR_PTR(-EAGAIN); 1063 __set_current_state(TASK_INTERRUPTIBLE); 1064 1065 ipc_unlock_object(&msq->q_perm); 1066 rcu_read_unlock(); 1067 schedule(); 1068 1069 /* 1070 * Lockless receive, part 1: 1071 * We don't hold a reference to the queue and getting a 1072 * reference would defeat the idea of a lockless operation, 1073 * thus the code relies on rcu to guarantee the existence of 1074 * msq: 1075 * Prior to destruction, expunge_all(-EIRDM) changes r_msg. 1076 * Thus if r_msg is -EAGAIN, then the queue not yet destroyed. 1077 */ 1078 rcu_read_lock(); 1079 1080 /* 1081 * Lockless receive, part 2: 1082 * The work in pipelined_send() and expunge_all(): 1083 * - Set pointer to message 1084 * - Queue the receiver task for later wakeup 1085 * - Wake up the process after the lock is dropped. 1086 * 1087 * Should the process wake up before this wakeup (due to a 1088 * signal) it will either see the message and continue ... 1089 */ 1090 msg = READ_ONCE(msr_d.r_msg); 1091 if (msg != ERR_PTR(-EAGAIN)) 1092 goto out_unlock1; 1093 1094 /* 1095 * ... or see -EAGAIN, acquire the lock to check the message 1096 * again. 1097 */ 1098 ipc_lock_object(&msq->q_perm); 1099 1100 msg = msr_d.r_msg; 1101 if (msg != ERR_PTR(-EAGAIN)) 1102 goto out_unlock0; 1103 1104 list_del(&msr_d.r_list); 1105 if (signal_pending(current)) { 1106 msg = ERR_PTR(-ERESTARTNOHAND); 1107 goto out_unlock0; 1108 } 1109 1110 ipc_unlock_object(&msq->q_perm); 1111 } 1112 1113 out_unlock0: 1114 ipc_unlock_object(&msq->q_perm); 1115 wake_up_q(&wake_q); 1116 out_unlock1: 1117 rcu_read_unlock(); 1118 if (IS_ERR(msg)) { 1119 free_copy(copy); 1120 return PTR_ERR(msg); 1121 } 1122 1123 bufsz = msg_handler(buf, msg, bufsz); 1124 free_msg(msg); 1125 1126 return bufsz; 1127 } 1128 1129 SYSCALL_DEFINE5(msgrcv, int, msqid, struct msgbuf __user *, msgp, size_t, msgsz, 1130 long, msgtyp, int, msgflg) 1131 { 1132 return do_msgrcv(msqid, msgp, msgsz, msgtyp, msgflg, do_msg_fill); 1133 } 1134 1135 #ifdef CONFIG_COMPAT 1136 static long compat_do_msg_fill(void __user *dest, struct msg_msg *msg, size_t bufsz) 1137 { 1138 struct compat_msgbuf __user *msgp = dest; 1139 size_t msgsz; 1140 1141 if (put_user(msg->m_type, &msgp->mtype)) 1142 return -EFAULT; 1143 1144 msgsz = (bufsz > msg->m_ts) ? msg->m_ts : bufsz; 1145 if (store_msg(msgp->mtext, msg, msgsz)) 1146 return -EFAULT; 1147 return msgsz; 1148 } 1149 1150 COMPAT_SYSCALL_DEFINE5(msgrcv, int, msqid, compat_uptr_t, msgp, 1151 compat_ssize_t, msgsz, compat_long_t, msgtyp, int, msgflg) 1152 { 1153 return do_msgrcv(msqid, compat_ptr(msgp), (ssize_t)msgsz, (long)msgtyp, 1154 msgflg, compat_do_msg_fill); 1155 } 1156 #endif 1157 1158 int msg_init_ns(struct ipc_namespace *ns) 1159 { 1160 ns->msg_ctlmax = MSGMAX; 1161 ns->msg_ctlmnb = MSGMNB; 1162 ns->msg_ctlmni = MSGMNI; 1163 1164 atomic_set(&ns->msg_bytes, 0); 1165 atomic_set(&ns->msg_hdrs, 0); 1166 return ipc_init_ids(&ns->ids[IPC_MSG_IDS]); 1167 } 1168 1169 #ifdef CONFIG_IPC_NS 1170 void msg_exit_ns(struct ipc_namespace *ns) 1171 { 1172 free_ipcs(ns, &msg_ids(ns), freeque); 1173 idr_destroy(&ns->ids[IPC_MSG_IDS].ipcs_idr); 1174 rhashtable_destroy(&ns->ids[IPC_MSG_IDS].key_ht); 1175 } 1176 #endif 1177 1178 #ifdef CONFIG_PROC_FS 1179 static int sysvipc_msg_proc_show(struct seq_file *s, void *it) 1180 { 1181 struct user_namespace *user_ns = seq_user_ns(s); 1182 struct kern_ipc_perm *ipcp = it; 1183 struct msg_queue *msq = container_of(ipcp, struct msg_queue, q_perm); 1184 1185 seq_printf(s, 1186 "%10d %10d %4o %10lu %10lu %5u %5u %5u %5u %5u %5u %10llu %10llu %10llu\n", 1187 msq->q_perm.key, 1188 msq->q_perm.id, 1189 msq->q_perm.mode, 1190 msq->q_cbytes, 1191 msq->q_qnum, 1192 msq->q_lspid, 1193 msq->q_lrpid, 1194 from_kuid_munged(user_ns, msq->q_perm.uid), 1195 from_kgid_munged(user_ns, msq->q_perm.gid), 1196 from_kuid_munged(user_ns, msq->q_perm.cuid), 1197 from_kgid_munged(user_ns, msq->q_perm.cgid), 1198 msq->q_stime, 1199 msq->q_rtime, 1200 msq->q_ctime); 1201 1202 return 0; 1203 } 1204 #endif 1205 1206 int __init msg_init(void) 1207 { 1208 const int err = msg_init_ns(&init_ipc_ns); 1209 1210 ipc_init_proc_interface("sysvipc/msg", 1211 " key msqid perms cbytes qnum lspid lrpid uid gid cuid cgid stime rtime ctime\n", 1212 IPC_MSG_IDS, sysvipc_msg_proc_show); 1213 return err; 1214 } 1215