1 /* 2 * linux/ipc/shm.c 3 * Copyright (C) 1992, 1993 Krishna Balasubramanian 4 * Many improvements/fixes by Bruno Haible. 5 * Replaced `struct shm_desc' by `struct vm_area_struct', July 1994. 6 * Fixed the shm swap deallocation (shm_unuse()), August 1998 Andrea Arcangeli. 7 * 8 * /proc/sysvipc/shm support (c) 1999 Dragos Acostachioaie <dragos@iname.com> 9 * BIGMEM support, Andrea Arcangeli <andrea@suse.de> 10 * SMP thread shm, Jean-Luc Boyard <jean-luc.boyard@siemens.fr> 11 * HIGHMEM support, Ingo Molnar <mingo@redhat.com> 12 * Make shmmax, shmall, shmmni sysctl'able, Christoph Rohland <cr@sap.com> 13 * Shared /dev/zero support, Kanoj Sarcar <kanoj@sgi.com> 14 * Move the mm functionality over to mm/shmem.c, Christoph Rohland <cr@sap.com> 15 * 16 * support for audit of ipc object properties and permission changes 17 * Dustin Kirkland <dustin.kirkland@us.ibm.com> 18 * 19 * namespaces support 20 * OpenVZ, SWsoft Inc. 21 * Pavel Emelianov <xemul@openvz.org> 22 * 23 * Better ipc lock (kern_ipc_perm.lock) handling 24 * Davidlohr Bueso <davidlohr.bueso@hp.com>, June 2013. 25 */ 26 27 #include <linux/slab.h> 28 #include <linux/mm.h> 29 #include <linux/hugetlb.h> 30 #include <linux/shm.h> 31 #include <linux/init.h> 32 #include <linux/file.h> 33 #include <linux/mman.h> 34 #include <linux/shmem_fs.h> 35 #include <linux/security.h> 36 #include <linux/syscalls.h> 37 #include <linux/audit.h> 38 #include <linux/capability.h> 39 #include <linux/ptrace.h> 40 #include <linux/seq_file.h> 41 #include <linux/rwsem.h> 42 #include <linux/nsproxy.h> 43 #include <linux/mount.h> 44 #include <linux/ipc_namespace.h> 45 46 #include <linux/uaccess.h> 47 48 #include "util.h" 49 50 struct shm_file_data { 51 int id; 52 struct ipc_namespace *ns; 53 struct file *file; 54 const struct vm_operations_struct *vm_ops; 55 }; 56 57 #define shm_file_data(file) (*((struct shm_file_data **)&(file)->private_data)) 58 59 static const struct file_operations shm_file_operations; 60 static const struct vm_operations_struct shm_vm_ops; 61 62 #define shm_ids(ns) ((ns)->ids[IPC_SHM_IDS]) 63 64 #define shm_unlock(shp) \ 65 ipc_unlock(&(shp)->shm_perm) 66 67 static int newseg(struct ipc_namespace *, struct ipc_params *); 68 static void shm_open(struct vm_area_struct *vma); 69 static void shm_close(struct vm_area_struct *vma); 70 static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp); 71 #ifdef CONFIG_PROC_FS 72 static int sysvipc_shm_proc_show(struct seq_file *s, void *it); 73 #endif 74 75 void shm_init_ns(struct ipc_namespace *ns) 76 { 77 ns->shm_ctlmax = SHMMAX; 78 ns->shm_ctlall = SHMALL; 79 ns->shm_ctlmni = SHMMNI; 80 ns->shm_rmid_forced = 0; 81 ns->shm_tot = 0; 82 ipc_init_ids(&shm_ids(ns)); 83 } 84 85 /* 86 * Called with shm_ids.rwsem (writer) and the shp structure locked. 87 * Only shm_ids.rwsem remains locked on exit. 88 */ 89 static void do_shm_rmid(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) 90 { 91 struct shmid_kernel *shp; 92 shp = container_of(ipcp, struct shmid_kernel, shm_perm); 93 94 if (shp->shm_nattch) { 95 shp->shm_perm.mode |= SHM_DEST; 96 /* Do not find it any more */ 97 shp->shm_perm.key = IPC_PRIVATE; 98 shm_unlock(shp); 99 } else 100 shm_destroy(ns, shp); 101 } 102 103 #ifdef CONFIG_IPC_NS 104 void shm_exit_ns(struct ipc_namespace *ns) 105 { 106 free_ipcs(ns, &shm_ids(ns), do_shm_rmid); 107 idr_destroy(&ns->ids[IPC_SHM_IDS].ipcs_idr); 108 } 109 #endif 110 111 static int __init ipc_ns_init(void) 112 { 113 shm_init_ns(&init_ipc_ns); 114 return 0; 115 } 116 117 pure_initcall(ipc_ns_init); 118 119 void __init shm_init(void) 120 { 121 ipc_init_proc_interface("sysvipc/shm", 122 #if BITS_PER_LONG <= 32 123 " key shmid perms size cpid lpid nattch uid gid cuid cgid atime dtime ctime rss swap\n", 124 #else 125 " key shmid perms size cpid lpid nattch uid gid cuid cgid atime dtime ctime rss swap\n", 126 #endif 127 IPC_SHM_IDS, sysvipc_shm_proc_show); 128 } 129 130 static inline struct shmid_kernel *shm_obtain_object(struct ipc_namespace *ns, int id) 131 { 132 struct kern_ipc_perm *ipcp = ipc_obtain_object_idr(&shm_ids(ns), id); 133 134 if (IS_ERR(ipcp)) 135 return ERR_CAST(ipcp); 136 137 return container_of(ipcp, struct shmid_kernel, shm_perm); 138 } 139 140 static inline struct shmid_kernel *shm_obtain_object_check(struct ipc_namespace *ns, int id) 141 { 142 struct kern_ipc_perm *ipcp = ipc_obtain_object_check(&shm_ids(ns), id); 143 144 if (IS_ERR(ipcp)) 145 return ERR_CAST(ipcp); 146 147 return container_of(ipcp, struct shmid_kernel, shm_perm); 148 } 149 150 /* 151 * shm_lock_(check_) routines are called in the paths where the rwsem 152 * is not necessarily held. 153 */ 154 static inline struct shmid_kernel *shm_lock(struct ipc_namespace *ns, int id) 155 { 156 struct kern_ipc_perm *ipcp = ipc_lock(&shm_ids(ns), id); 157 158 /* 159 * Callers of shm_lock() must validate the status of the returned ipc 160 * object pointer (as returned by ipc_lock()), and error out as 161 * appropriate. 162 */ 163 if (IS_ERR(ipcp)) 164 return (void *)ipcp; 165 return container_of(ipcp, struct shmid_kernel, shm_perm); 166 } 167 168 static inline void shm_lock_by_ptr(struct shmid_kernel *ipcp) 169 { 170 rcu_read_lock(); 171 ipc_lock_object(&ipcp->shm_perm); 172 } 173 174 static void shm_rcu_free(struct rcu_head *head) 175 { 176 struct ipc_rcu *p = container_of(head, struct ipc_rcu, rcu); 177 struct shmid_kernel *shp = ipc_rcu_to_struct(p); 178 179 security_shm_free(shp); 180 ipc_rcu_free(head); 181 } 182 183 static inline void shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *s) 184 { 185 list_del(&s->shm_clist); 186 ipc_rmid(&shm_ids(ns), &s->shm_perm); 187 } 188 189 190 static int __shm_open(struct vm_area_struct *vma) 191 { 192 struct file *file = vma->vm_file; 193 struct shm_file_data *sfd = shm_file_data(file); 194 struct shmid_kernel *shp; 195 196 shp = shm_lock(sfd->ns, sfd->id); 197 198 if (IS_ERR(shp)) 199 return PTR_ERR(shp); 200 201 shp->shm_atim = get_seconds(); 202 shp->shm_lprid = task_tgid_vnr(current); 203 shp->shm_nattch++; 204 shm_unlock(shp); 205 return 0; 206 } 207 208 /* This is called by fork, once for every shm attach. */ 209 static void shm_open(struct vm_area_struct *vma) 210 { 211 int err = __shm_open(vma); 212 /* 213 * We raced in the idr lookup or with shm_destroy(). 214 * Either way, the ID is busted. 215 */ 216 WARN_ON_ONCE(err); 217 } 218 219 /* 220 * shm_destroy - free the struct shmid_kernel 221 * 222 * @ns: namespace 223 * @shp: struct to free 224 * 225 * It has to be called with shp and shm_ids.rwsem (writer) locked, 226 * but returns with shp unlocked and freed. 227 */ 228 static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp) 229 { 230 struct file *shm_file; 231 232 shm_file = shp->shm_file; 233 shp->shm_file = NULL; 234 ns->shm_tot -= (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT; 235 shm_rmid(ns, shp); 236 shm_unlock(shp); 237 if (!is_file_hugepages(shm_file)) 238 shmem_lock(shm_file, 0, shp->mlock_user); 239 else if (shp->mlock_user) 240 user_shm_unlock(i_size_read(file_inode(shm_file)), 241 shp->mlock_user); 242 fput(shm_file); 243 ipc_rcu_putref(shp, shm_rcu_free); 244 } 245 246 /* 247 * shm_may_destroy - identifies whether shm segment should be destroyed now 248 * 249 * Returns true if and only if there are no active users of the segment and 250 * one of the following is true: 251 * 252 * 1) shmctl(id, IPC_RMID, NULL) was called for this shp 253 * 254 * 2) sysctl kernel.shm_rmid_forced is set to 1. 255 */ 256 static bool shm_may_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp) 257 { 258 return (shp->shm_nattch == 0) && 259 (ns->shm_rmid_forced || 260 (shp->shm_perm.mode & SHM_DEST)); 261 } 262 263 /* 264 * remove the attach descriptor vma. 265 * free memory for segment if it is marked destroyed. 266 * The descriptor has already been removed from the current->mm->mmap list 267 * and will later be kfree()d. 268 */ 269 static void shm_close(struct vm_area_struct *vma) 270 { 271 struct file *file = vma->vm_file; 272 struct shm_file_data *sfd = shm_file_data(file); 273 struct shmid_kernel *shp; 274 struct ipc_namespace *ns = sfd->ns; 275 276 down_write(&shm_ids(ns).rwsem); 277 /* remove from the list of attaches of the shm segment */ 278 shp = shm_lock(ns, sfd->id); 279 280 /* 281 * We raced in the idr lookup or with shm_destroy(). 282 * Either way, the ID is busted. 283 */ 284 if (WARN_ON_ONCE(IS_ERR(shp))) 285 goto done; /* no-op */ 286 287 shp->shm_lprid = task_tgid_vnr(current); 288 shp->shm_dtim = get_seconds(); 289 shp->shm_nattch--; 290 if (shm_may_destroy(ns, shp)) 291 shm_destroy(ns, shp); 292 else 293 shm_unlock(shp); 294 done: 295 up_write(&shm_ids(ns).rwsem); 296 } 297 298 /* Called with ns->shm_ids(ns).rwsem locked */ 299 static int shm_try_destroy_orphaned(int id, void *p, void *data) 300 { 301 struct ipc_namespace *ns = data; 302 struct kern_ipc_perm *ipcp = p; 303 struct shmid_kernel *shp = container_of(ipcp, struct shmid_kernel, shm_perm); 304 305 /* 306 * We want to destroy segments without users and with already 307 * exit'ed originating process. 308 * 309 * As shp->* are changed under rwsem, it's safe to skip shp locking. 310 */ 311 if (shp->shm_creator != NULL) 312 return 0; 313 314 if (shm_may_destroy(ns, shp)) { 315 shm_lock_by_ptr(shp); 316 shm_destroy(ns, shp); 317 } 318 return 0; 319 } 320 321 void shm_destroy_orphaned(struct ipc_namespace *ns) 322 { 323 down_write(&shm_ids(ns).rwsem); 324 if (shm_ids(ns).in_use) 325 idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_orphaned, ns); 326 up_write(&shm_ids(ns).rwsem); 327 } 328 329 /* Locking assumes this will only be called with task == current */ 330 void exit_shm(struct task_struct *task) 331 { 332 struct ipc_namespace *ns = task->nsproxy->ipc_ns; 333 struct shmid_kernel *shp, *n; 334 335 if (list_empty(&task->sysvshm.shm_clist)) 336 return; 337 338 /* 339 * If kernel.shm_rmid_forced is not set then only keep track of 340 * which shmids are orphaned, so that a later set of the sysctl 341 * can clean them up. 342 */ 343 if (!ns->shm_rmid_forced) { 344 down_read(&shm_ids(ns).rwsem); 345 list_for_each_entry(shp, &task->sysvshm.shm_clist, shm_clist) 346 shp->shm_creator = NULL; 347 /* 348 * Only under read lock but we are only called on current 349 * so no entry on the list will be shared. 350 */ 351 list_del(&task->sysvshm.shm_clist); 352 up_read(&shm_ids(ns).rwsem); 353 return; 354 } 355 356 /* 357 * Destroy all already created segments, that were not yet mapped, 358 * and mark any mapped as orphan to cover the sysctl toggling. 359 * Destroy is skipped if shm_may_destroy() returns false. 360 */ 361 down_write(&shm_ids(ns).rwsem); 362 list_for_each_entry_safe(shp, n, &task->sysvshm.shm_clist, shm_clist) { 363 shp->shm_creator = NULL; 364 365 if (shm_may_destroy(ns, shp)) { 366 shm_lock_by_ptr(shp); 367 shm_destroy(ns, shp); 368 } 369 } 370 371 /* Remove the list head from any segments still attached. */ 372 list_del(&task->sysvshm.shm_clist); 373 up_write(&shm_ids(ns).rwsem); 374 } 375 376 static int shm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) 377 { 378 struct file *file = vma->vm_file; 379 struct shm_file_data *sfd = shm_file_data(file); 380 381 return sfd->vm_ops->fault(vma, vmf); 382 } 383 384 #ifdef CONFIG_NUMA 385 static int shm_set_policy(struct vm_area_struct *vma, struct mempolicy *new) 386 { 387 struct file *file = vma->vm_file; 388 struct shm_file_data *sfd = shm_file_data(file); 389 int err = 0; 390 if (sfd->vm_ops->set_policy) 391 err = sfd->vm_ops->set_policy(vma, new); 392 return err; 393 } 394 395 static struct mempolicy *shm_get_policy(struct vm_area_struct *vma, 396 unsigned long addr) 397 { 398 struct file *file = vma->vm_file; 399 struct shm_file_data *sfd = shm_file_data(file); 400 struct mempolicy *pol = NULL; 401 402 if (sfd->vm_ops->get_policy) 403 pol = sfd->vm_ops->get_policy(vma, addr); 404 else if (vma->vm_policy) 405 pol = vma->vm_policy; 406 407 return pol; 408 } 409 #endif 410 411 static int shm_mmap(struct file *file, struct vm_area_struct *vma) 412 { 413 struct shm_file_data *sfd = shm_file_data(file); 414 int ret; 415 416 /* 417 * In case of remap_file_pages() emulation, the file can represent 418 * removed IPC ID: propogate shm_lock() error to caller. 419 */ 420 ret =__shm_open(vma); 421 if (ret) 422 return ret; 423 424 ret = sfd->file->f_op->mmap(sfd->file, vma); 425 if (ret) { 426 shm_close(vma); 427 return ret; 428 } 429 sfd->vm_ops = vma->vm_ops; 430 #ifdef CONFIG_MMU 431 WARN_ON(!sfd->vm_ops->fault); 432 #endif 433 vma->vm_ops = &shm_vm_ops; 434 return 0; 435 } 436 437 static int shm_release(struct inode *ino, struct file *file) 438 { 439 struct shm_file_data *sfd = shm_file_data(file); 440 441 put_ipc_ns(sfd->ns); 442 shm_file_data(file) = NULL; 443 kfree(sfd); 444 return 0; 445 } 446 447 static int shm_fsync(struct file *file, loff_t start, loff_t end, int datasync) 448 { 449 struct shm_file_data *sfd = shm_file_data(file); 450 451 if (!sfd->file->f_op->fsync) 452 return -EINVAL; 453 return sfd->file->f_op->fsync(sfd->file, start, end, datasync); 454 } 455 456 static long shm_fallocate(struct file *file, int mode, loff_t offset, 457 loff_t len) 458 { 459 struct shm_file_data *sfd = shm_file_data(file); 460 461 if (!sfd->file->f_op->fallocate) 462 return -EOPNOTSUPP; 463 return sfd->file->f_op->fallocate(file, mode, offset, len); 464 } 465 466 static unsigned long shm_get_unmapped_area(struct file *file, 467 unsigned long addr, unsigned long len, unsigned long pgoff, 468 unsigned long flags) 469 { 470 struct shm_file_data *sfd = shm_file_data(file); 471 return sfd->file->f_op->get_unmapped_area(sfd->file, addr, len, 472 pgoff, flags); 473 } 474 475 static const struct file_operations shm_file_operations = { 476 .mmap = shm_mmap, 477 .fsync = shm_fsync, 478 .release = shm_release, 479 .get_unmapped_area = shm_get_unmapped_area, 480 .llseek = noop_llseek, 481 .fallocate = shm_fallocate, 482 }; 483 484 /* 485 * shm_file_operations_huge is now identical to shm_file_operations, 486 * but we keep it distinct for the sake of is_file_shm_hugepages(). 487 */ 488 static const struct file_operations shm_file_operations_huge = { 489 .mmap = shm_mmap, 490 .fsync = shm_fsync, 491 .release = shm_release, 492 .get_unmapped_area = shm_get_unmapped_area, 493 .llseek = noop_llseek, 494 .fallocate = shm_fallocate, 495 }; 496 497 bool is_file_shm_hugepages(struct file *file) 498 { 499 return file->f_op == &shm_file_operations_huge; 500 } 501 502 static const struct vm_operations_struct shm_vm_ops = { 503 .open = shm_open, /* callback for a new vm-area open */ 504 .close = shm_close, /* callback for when the vm-area is released */ 505 .fault = shm_fault, 506 #if defined(CONFIG_NUMA) 507 .set_policy = shm_set_policy, 508 .get_policy = shm_get_policy, 509 #endif 510 }; 511 512 /** 513 * newseg - Create a new shared memory segment 514 * @ns: namespace 515 * @params: ptr to the structure that contains key, size and shmflg 516 * 517 * Called with shm_ids.rwsem held as a writer. 518 */ 519 static int newseg(struct ipc_namespace *ns, struct ipc_params *params) 520 { 521 key_t key = params->key; 522 int shmflg = params->flg; 523 size_t size = params->u.size; 524 int error; 525 struct shmid_kernel *shp; 526 size_t numpages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT; 527 struct file *file; 528 char name[13]; 529 int id; 530 vm_flags_t acctflag = 0; 531 532 if (size < SHMMIN || size > ns->shm_ctlmax) 533 return -EINVAL; 534 535 if (numpages << PAGE_SHIFT < size) 536 return -ENOSPC; 537 538 if (ns->shm_tot + numpages < ns->shm_tot || 539 ns->shm_tot + numpages > ns->shm_ctlall) 540 return -ENOSPC; 541 542 shp = ipc_rcu_alloc(sizeof(*shp)); 543 if (!shp) 544 return -ENOMEM; 545 546 shp->shm_perm.key = key; 547 shp->shm_perm.mode = (shmflg & S_IRWXUGO); 548 shp->mlock_user = NULL; 549 550 shp->shm_perm.security = NULL; 551 error = security_shm_alloc(shp); 552 if (error) { 553 ipc_rcu_putref(shp, ipc_rcu_free); 554 return error; 555 } 556 557 sprintf(name, "SYSV%08x", key); 558 if (shmflg & SHM_HUGETLB) { 559 struct hstate *hs; 560 size_t hugesize; 561 562 hs = hstate_sizelog((shmflg >> SHM_HUGE_SHIFT) & SHM_HUGE_MASK); 563 if (!hs) { 564 error = -EINVAL; 565 goto no_file; 566 } 567 hugesize = ALIGN(size, huge_page_size(hs)); 568 569 /* hugetlb_file_setup applies strict accounting */ 570 if (shmflg & SHM_NORESERVE) 571 acctflag = VM_NORESERVE; 572 file = hugetlb_file_setup(name, hugesize, acctflag, 573 &shp->mlock_user, HUGETLB_SHMFS_INODE, 574 (shmflg >> SHM_HUGE_SHIFT) & SHM_HUGE_MASK); 575 } else { 576 /* 577 * Do not allow no accounting for OVERCOMMIT_NEVER, even 578 * if it's asked for. 579 */ 580 if ((shmflg & SHM_NORESERVE) && 581 sysctl_overcommit_memory != OVERCOMMIT_NEVER) 582 acctflag = VM_NORESERVE; 583 file = shmem_kernel_file_setup(name, size, acctflag); 584 } 585 error = PTR_ERR(file); 586 if (IS_ERR(file)) 587 goto no_file; 588 589 shp->shm_cprid = task_tgid_vnr(current); 590 shp->shm_lprid = 0; 591 shp->shm_atim = shp->shm_dtim = 0; 592 shp->shm_ctim = get_seconds(); 593 shp->shm_segsz = size; 594 shp->shm_nattch = 0; 595 shp->shm_file = file; 596 shp->shm_creator = current; 597 598 id = ipc_addid(&shm_ids(ns), &shp->shm_perm, ns->shm_ctlmni); 599 if (id < 0) { 600 error = id; 601 goto no_id; 602 } 603 604 list_add(&shp->shm_clist, ¤t->sysvshm.shm_clist); 605 606 /* 607 * shmid gets reported as "inode#" in /proc/pid/maps. 608 * proc-ps tools use this. Changing this will break them. 609 */ 610 file_inode(file)->i_ino = shp->shm_perm.id; 611 612 ns->shm_tot += numpages; 613 error = shp->shm_perm.id; 614 615 ipc_unlock_object(&shp->shm_perm); 616 rcu_read_unlock(); 617 return error; 618 619 no_id: 620 if (is_file_hugepages(file) && shp->mlock_user) 621 user_shm_unlock(size, shp->mlock_user); 622 fput(file); 623 no_file: 624 ipc_rcu_putref(shp, shm_rcu_free); 625 return error; 626 } 627 628 /* 629 * Called with shm_ids.rwsem and ipcp locked. 630 */ 631 static inline int shm_security(struct kern_ipc_perm *ipcp, int shmflg) 632 { 633 struct shmid_kernel *shp; 634 635 shp = container_of(ipcp, struct shmid_kernel, shm_perm); 636 return security_shm_associate(shp, shmflg); 637 } 638 639 /* 640 * Called with shm_ids.rwsem and ipcp locked. 641 */ 642 static inline int shm_more_checks(struct kern_ipc_perm *ipcp, 643 struct ipc_params *params) 644 { 645 struct shmid_kernel *shp; 646 647 shp = container_of(ipcp, struct shmid_kernel, shm_perm); 648 if (shp->shm_segsz < params->u.size) 649 return -EINVAL; 650 651 return 0; 652 } 653 654 SYSCALL_DEFINE3(shmget, key_t, key, size_t, size, int, shmflg) 655 { 656 struct ipc_namespace *ns; 657 static const struct ipc_ops shm_ops = { 658 .getnew = newseg, 659 .associate = shm_security, 660 .more_checks = shm_more_checks, 661 }; 662 struct ipc_params shm_params; 663 664 ns = current->nsproxy->ipc_ns; 665 666 shm_params.key = key; 667 shm_params.flg = shmflg; 668 shm_params.u.size = size; 669 670 return ipcget(ns, &shm_ids(ns), &shm_ops, &shm_params); 671 } 672 673 static inline unsigned long copy_shmid_to_user(void __user *buf, struct shmid64_ds *in, int version) 674 { 675 switch (version) { 676 case IPC_64: 677 return copy_to_user(buf, in, sizeof(*in)); 678 case IPC_OLD: 679 { 680 struct shmid_ds out; 681 682 memset(&out, 0, sizeof(out)); 683 ipc64_perm_to_ipc_perm(&in->shm_perm, &out.shm_perm); 684 out.shm_segsz = in->shm_segsz; 685 out.shm_atime = in->shm_atime; 686 out.shm_dtime = in->shm_dtime; 687 out.shm_ctime = in->shm_ctime; 688 out.shm_cpid = in->shm_cpid; 689 out.shm_lpid = in->shm_lpid; 690 out.shm_nattch = in->shm_nattch; 691 692 return copy_to_user(buf, &out, sizeof(out)); 693 } 694 default: 695 return -EINVAL; 696 } 697 } 698 699 static inline unsigned long 700 copy_shmid_from_user(struct shmid64_ds *out, void __user *buf, int version) 701 { 702 switch (version) { 703 case IPC_64: 704 if (copy_from_user(out, buf, sizeof(*out))) 705 return -EFAULT; 706 return 0; 707 case IPC_OLD: 708 { 709 struct shmid_ds tbuf_old; 710 711 if (copy_from_user(&tbuf_old, buf, sizeof(tbuf_old))) 712 return -EFAULT; 713 714 out->shm_perm.uid = tbuf_old.shm_perm.uid; 715 out->shm_perm.gid = tbuf_old.shm_perm.gid; 716 out->shm_perm.mode = tbuf_old.shm_perm.mode; 717 718 return 0; 719 } 720 default: 721 return -EINVAL; 722 } 723 } 724 725 static inline unsigned long copy_shminfo_to_user(void __user *buf, struct shminfo64 *in, int version) 726 { 727 switch (version) { 728 case IPC_64: 729 return copy_to_user(buf, in, sizeof(*in)); 730 case IPC_OLD: 731 { 732 struct shminfo out; 733 734 if (in->shmmax > INT_MAX) 735 out.shmmax = INT_MAX; 736 else 737 out.shmmax = (int)in->shmmax; 738 739 out.shmmin = in->shmmin; 740 out.shmmni = in->shmmni; 741 out.shmseg = in->shmseg; 742 out.shmall = in->shmall; 743 744 return copy_to_user(buf, &out, sizeof(out)); 745 } 746 default: 747 return -EINVAL; 748 } 749 } 750 751 /* 752 * Calculate and add used RSS and swap pages of a shm. 753 * Called with shm_ids.rwsem held as a reader 754 */ 755 static void shm_add_rss_swap(struct shmid_kernel *shp, 756 unsigned long *rss_add, unsigned long *swp_add) 757 { 758 struct inode *inode; 759 760 inode = file_inode(shp->shm_file); 761 762 if (is_file_hugepages(shp->shm_file)) { 763 struct address_space *mapping = inode->i_mapping; 764 struct hstate *h = hstate_file(shp->shm_file); 765 *rss_add += pages_per_huge_page(h) * mapping->nrpages; 766 } else { 767 #ifdef CONFIG_SHMEM 768 struct shmem_inode_info *info = SHMEM_I(inode); 769 spin_lock_irq(&info->lock); 770 *rss_add += inode->i_mapping->nrpages; 771 *swp_add += info->swapped; 772 spin_unlock_irq(&info->lock); 773 #else 774 *rss_add += inode->i_mapping->nrpages; 775 #endif 776 } 777 } 778 779 /* 780 * Called with shm_ids.rwsem held as a reader 781 */ 782 static void shm_get_stat(struct ipc_namespace *ns, unsigned long *rss, 783 unsigned long *swp) 784 { 785 int next_id; 786 int total, in_use; 787 788 *rss = 0; 789 *swp = 0; 790 791 in_use = shm_ids(ns).in_use; 792 793 for (total = 0, next_id = 0; total < in_use; next_id++) { 794 struct kern_ipc_perm *ipc; 795 struct shmid_kernel *shp; 796 797 ipc = idr_find(&shm_ids(ns).ipcs_idr, next_id); 798 if (ipc == NULL) 799 continue; 800 shp = container_of(ipc, struct shmid_kernel, shm_perm); 801 802 shm_add_rss_swap(shp, rss, swp); 803 804 total++; 805 } 806 } 807 808 /* 809 * This function handles some shmctl commands which require the rwsem 810 * to be held in write mode. 811 * NOTE: no locks must be held, the rwsem is taken inside this function. 812 */ 813 static int shmctl_down(struct ipc_namespace *ns, int shmid, int cmd, 814 struct shmid_ds __user *buf, int version) 815 { 816 struct kern_ipc_perm *ipcp; 817 struct shmid64_ds shmid64; 818 struct shmid_kernel *shp; 819 int err; 820 821 if (cmd == IPC_SET) { 822 if (copy_shmid_from_user(&shmid64, buf, version)) 823 return -EFAULT; 824 } 825 826 down_write(&shm_ids(ns).rwsem); 827 rcu_read_lock(); 828 829 ipcp = ipcctl_pre_down_nolock(ns, &shm_ids(ns), shmid, cmd, 830 &shmid64.shm_perm, 0); 831 if (IS_ERR(ipcp)) { 832 err = PTR_ERR(ipcp); 833 goto out_unlock1; 834 } 835 836 shp = container_of(ipcp, struct shmid_kernel, shm_perm); 837 838 err = security_shm_shmctl(shp, cmd); 839 if (err) 840 goto out_unlock1; 841 842 switch (cmd) { 843 case IPC_RMID: 844 ipc_lock_object(&shp->shm_perm); 845 /* do_shm_rmid unlocks the ipc object and rcu */ 846 do_shm_rmid(ns, ipcp); 847 goto out_up; 848 case IPC_SET: 849 ipc_lock_object(&shp->shm_perm); 850 err = ipc_update_perm(&shmid64.shm_perm, ipcp); 851 if (err) 852 goto out_unlock0; 853 shp->shm_ctim = get_seconds(); 854 break; 855 default: 856 err = -EINVAL; 857 goto out_unlock1; 858 } 859 860 out_unlock0: 861 ipc_unlock_object(&shp->shm_perm); 862 out_unlock1: 863 rcu_read_unlock(); 864 out_up: 865 up_write(&shm_ids(ns).rwsem); 866 return err; 867 } 868 869 static int shmctl_nolock(struct ipc_namespace *ns, int shmid, 870 int cmd, int version, void __user *buf) 871 { 872 int err; 873 struct shmid_kernel *shp; 874 875 /* preliminary security checks for *_INFO */ 876 if (cmd == IPC_INFO || cmd == SHM_INFO) { 877 err = security_shm_shmctl(NULL, cmd); 878 if (err) 879 return err; 880 } 881 882 switch (cmd) { 883 case IPC_INFO: 884 { 885 struct shminfo64 shminfo; 886 887 memset(&shminfo, 0, sizeof(shminfo)); 888 shminfo.shmmni = shminfo.shmseg = ns->shm_ctlmni; 889 shminfo.shmmax = ns->shm_ctlmax; 890 shminfo.shmall = ns->shm_ctlall; 891 892 shminfo.shmmin = SHMMIN; 893 if (copy_shminfo_to_user(buf, &shminfo, version)) 894 return -EFAULT; 895 896 down_read(&shm_ids(ns).rwsem); 897 err = ipc_get_maxid(&shm_ids(ns)); 898 up_read(&shm_ids(ns).rwsem); 899 900 if (err < 0) 901 err = 0; 902 goto out; 903 } 904 case SHM_INFO: 905 { 906 struct shm_info shm_info; 907 908 memset(&shm_info, 0, sizeof(shm_info)); 909 down_read(&shm_ids(ns).rwsem); 910 shm_info.used_ids = shm_ids(ns).in_use; 911 shm_get_stat(ns, &shm_info.shm_rss, &shm_info.shm_swp); 912 shm_info.shm_tot = ns->shm_tot; 913 shm_info.swap_attempts = 0; 914 shm_info.swap_successes = 0; 915 err = ipc_get_maxid(&shm_ids(ns)); 916 up_read(&shm_ids(ns).rwsem); 917 if (copy_to_user(buf, &shm_info, sizeof(shm_info))) { 918 err = -EFAULT; 919 goto out; 920 } 921 922 err = err < 0 ? 0 : err; 923 goto out; 924 } 925 case SHM_STAT: 926 case IPC_STAT: 927 { 928 struct shmid64_ds tbuf; 929 int result; 930 931 rcu_read_lock(); 932 if (cmd == SHM_STAT) { 933 shp = shm_obtain_object(ns, shmid); 934 if (IS_ERR(shp)) { 935 err = PTR_ERR(shp); 936 goto out_unlock; 937 } 938 result = shp->shm_perm.id; 939 } else { 940 shp = shm_obtain_object_check(ns, shmid); 941 if (IS_ERR(shp)) { 942 err = PTR_ERR(shp); 943 goto out_unlock; 944 } 945 result = 0; 946 } 947 948 err = -EACCES; 949 if (ipcperms(ns, &shp->shm_perm, S_IRUGO)) 950 goto out_unlock; 951 952 err = security_shm_shmctl(shp, cmd); 953 if (err) 954 goto out_unlock; 955 956 memset(&tbuf, 0, sizeof(tbuf)); 957 kernel_to_ipc64_perm(&shp->shm_perm, &tbuf.shm_perm); 958 tbuf.shm_segsz = shp->shm_segsz; 959 tbuf.shm_atime = shp->shm_atim; 960 tbuf.shm_dtime = shp->shm_dtim; 961 tbuf.shm_ctime = shp->shm_ctim; 962 tbuf.shm_cpid = shp->shm_cprid; 963 tbuf.shm_lpid = shp->shm_lprid; 964 tbuf.shm_nattch = shp->shm_nattch; 965 rcu_read_unlock(); 966 967 if (copy_shmid_to_user(buf, &tbuf, version)) 968 err = -EFAULT; 969 else 970 err = result; 971 goto out; 972 } 973 default: 974 return -EINVAL; 975 } 976 977 out_unlock: 978 rcu_read_unlock(); 979 out: 980 return err; 981 } 982 983 SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf) 984 { 985 struct shmid_kernel *shp; 986 int err, version; 987 struct ipc_namespace *ns; 988 989 if (cmd < 0 || shmid < 0) 990 return -EINVAL; 991 992 version = ipc_parse_version(&cmd); 993 ns = current->nsproxy->ipc_ns; 994 995 switch (cmd) { 996 case IPC_INFO: 997 case SHM_INFO: 998 case SHM_STAT: 999 case IPC_STAT: 1000 return shmctl_nolock(ns, shmid, cmd, version, buf); 1001 case IPC_RMID: 1002 case IPC_SET: 1003 return shmctl_down(ns, shmid, cmd, buf, version); 1004 case SHM_LOCK: 1005 case SHM_UNLOCK: 1006 { 1007 struct file *shm_file; 1008 1009 rcu_read_lock(); 1010 shp = shm_obtain_object_check(ns, shmid); 1011 if (IS_ERR(shp)) { 1012 err = PTR_ERR(shp); 1013 goto out_unlock1; 1014 } 1015 1016 audit_ipc_obj(&(shp->shm_perm)); 1017 err = security_shm_shmctl(shp, cmd); 1018 if (err) 1019 goto out_unlock1; 1020 1021 ipc_lock_object(&shp->shm_perm); 1022 1023 /* check if shm_destroy() is tearing down shp */ 1024 if (!ipc_valid_object(&shp->shm_perm)) { 1025 err = -EIDRM; 1026 goto out_unlock0; 1027 } 1028 1029 if (!ns_capable(ns->user_ns, CAP_IPC_LOCK)) { 1030 kuid_t euid = current_euid(); 1031 if (!uid_eq(euid, shp->shm_perm.uid) && 1032 !uid_eq(euid, shp->shm_perm.cuid)) { 1033 err = -EPERM; 1034 goto out_unlock0; 1035 } 1036 if (cmd == SHM_LOCK && !rlimit(RLIMIT_MEMLOCK)) { 1037 err = -EPERM; 1038 goto out_unlock0; 1039 } 1040 } 1041 1042 shm_file = shp->shm_file; 1043 if (is_file_hugepages(shm_file)) 1044 goto out_unlock0; 1045 1046 if (cmd == SHM_LOCK) { 1047 struct user_struct *user = current_user(); 1048 err = shmem_lock(shm_file, 1, user); 1049 if (!err && !(shp->shm_perm.mode & SHM_LOCKED)) { 1050 shp->shm_perm.mode |= SHM_LOCKED; 1051 shp->mlock_user = user; 1052 } 1053 goto out_unlock0; 1054 } 1055 1056 /* SHM_UNLOCK */ 1057 if (!(shp->shm_perm.mode & SHM_LOCKED)) 1058 goto out_unlock0; 1059 shmem_lock(shm_file, 0, shp->mlock_user); 1060 shp->shm_perm.mode &= ~SHM_LOCKED; 1061 shp->mlock_user = NULL; 1062 get_file(shm_file); 1063 ipc_unlock_object(&shp->shm_perm); 1064 rcu_read_unlock(); 1065 shmem_unlock_mapping(shm_file->f_mapping); 1066 1067 fput(shm_file); 1068 return err; 1069 } 1070 default: 1071 return -EINVAL; 1072 } 1073 1074 out_unlock0: 1075 ipc_unlock_object(&shp->shm_perm); 1076 out_unlock1: 1077 rcu_read_unlock(); 1078 return err; 1079 } 1080 1081 /* 1082 * Fix shmaddr, allocate descriptor, map shm, add attach descriptor to lists. 1083 * 1084 * NOTE! Despite the name, this is NOT a direct system call entrypoint. The 1085 * "raddr" thing points to kernel space, and there has to be a wrapper around 1086 * this. 1087 */ 1088 long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr, 1089 unsigned long shmlba) 1090 { 1091 struct shmid_kernel *shp; 1092 unsigned long addr; 1093 unsigned long size; 1094 struct file *file; 1095 int err; 1096 unsigned long flags; 1097 unsigned long prot; 1098 int acc_mode; 1099 struct ipc_namespace *ns; 1100 struct shm_file_data *sfd; 1101 struct path path; 1102 fmode_t f_mode; 1103 unsigned long populate = 0; 1104 1105 err = -EINVAL; 1106 if (shmid < 0) 1107 goto out; 1108 else if ((addr = (ulong)shmaddr)) { 1109 if (addr & (shmlba - 1)) { 1110 if (shmflg & SHM_RND) 1111 addr &= ~(shmlba - 1); /* round down */ 1112 else 1113 #ifndef __ARCH_FORCE_SHMLBA 1114 if (addr & ~PAGE_MASK) 1115 #endif 1116 goto out; 1117 } 1118 flags = MAP_SHARED | MAP_FIXED; 1119 } else { 1120 if ((shmflg & SHM_REMAP)) 1121 goto out; 1122 1123 flags = MAP_SHARED; 1124 } 1125 1126 if (shmflg & SHM_RDONLY) { 1127 prot = PROT_READ; 1128 acc_mode = S_IRUGO; 1129 f_mode = FMODE_READ; 1130 } else { 1131 prot = PROT_READ | PROT_WRITE; 1132 acc_mode = S_IRUGO | S_IWUGO; 1133 f_mode = FMODE_READ | FMODE_WRITE; 1134 } 1135 if (shmflg & SHM_EXEC) { 1136 prot |= PROT_EXEC; 1137 acc_mode |= S_IXUGO; 1138 } 1139 1140 /* 1141 * We cannot rely on the fs check since SYSV IPC does have an 1142 * additional creator id... 1143 */ 1144 ns = current->nsproxy->ipc_ns; 1145 rcu_read_lock(); 1146 shp = shm_obtain_object_check(ns, shmid); 1147 if (IS_ERR(shp)) { 1148 err = PTR_ERR(shp); 1149 goto out_unlock; 1150 } 1151 1152 err = -EACCES; 1153 if (ipcperms(ns, &shp->shm_perm, acc_mode)) 1154 goto out_unlock; 1155 1156 err = security_shm_shmat(shp, shmaddr, shmflg); 1157 if (err) 1158 goto out_unlock; 1159 1160 ipc_lock_object(&shp->shm_perm); 1161 1162 /* check if shm_destroy() is tearing down shp */ 1163 if (!ipc_valid_object(&shp->shm_perm)) { 1164 ipc_unlock_object(&shp->shm_perm); 1165 err = -EIDRM; 1166 goto out_unlock; 1167 } 1168 1169 path = shp->shm_file->f_path; 1170 path_get(&path); 1171 shp->shm_nattch++; 1172 size = i_size_read(d_inode(path.dentry)); 1173 ipc_unlock_object(&shp->shm_perm); 1174 rcu_read_unlock(); 1175 1176 err = -ENOMEM; 1177 sfd = kzalloc(sizeof(*sfd), GFP_KERNEL); 1178 if (!sfd) { 1179 path_put(&path); 1180 goto out_nattch; 1181 } 1182 1183 file = alloc_file(&path, f_mode, 1184 is_file_hugepages(shp->shm_file) ? 1185 &shm_file_operations_huge : 1186 &shm_file_operations); 1187 err = PTR_ERR(file); 1188 if (IS_ERR(file)) { 1189 kfree(sfd); 1190 path_put(&path); 1191 goto out_nattch; 1192 } 1193 1194 file->private_data = sfd; 1195 file->f_mapping = shp->shm_file->f_mapping; 1196 sfd->id = shp->shm_perm.id; 1197 sfd->ns = get_ipc_ns(ns); 1198 sfd->file = shp->shm_file; 1199 sfd->vm_ops = NULL; 1200 1201 err = security_mmap_file(file, prot, flags); 1202 if (err) 1203 goto out_fput; 1204 1205 if (down_write_killable(¤t->mm->mmap_sem)) { 1206 err = -EINTR; 1207 goto out_fput; 1208 } 1209 1210 if (addr && !(shmflg & SHM_REMAP)) { 1211 err = -EINVAL; 1212 if (addr + size < addr) 1213 goto invalid; 1214 1215 if (find_vma_intersection(current->mm, addr, addr + size)) 1216 goto invalid; 1217 } 1218 1219 addr = do_mmap_pgoff(file, addr, size, prot, flags, 0, &populate); 1220 *raddr = addr; 1221 err = 0; 1222 if (IS_ERR_VALUE(addr)) 1223 err = (long)addr; 1224 invalid: 1225 up_write(¤t->mm->mmap_sem); 1226 if (populate) 1227 mm_populate(addr, populate); 1228 1229 out_fput: 1230 fput(file); 1231 1232 out_nattch: 1233 down_write(&shm_ids(ns).rwsem); 1234 shp = shm_lock(ns, shmid); 1235 shp->shm_nattch--; 1236 if (shm_may_destroy(ns, shp)) 1237 shm_destroy(ns, shp); 1238 else 1239 shm_unlock(shp); 1240 up_write(&shm_ids(ns).rwsem); 1241 return err; 1242 1243 out_unlock: 1244 rcu_read_unlock(); 1245 out: 1246 return err; 1247 } 1248 1249 SYSCALL_DEFINE3(shmat, int, shmid, char __user *, shmaddr, int, shmflg) 1250 { 1251 unsigned long ret; 1252 long err; 1253 1254 err = do_shmat(shmid, shmaddr, shmflg, &ret, SHMLBA); 1255 if (err) 1256 return err; 1257 force_successful_syscall_return(); 1258 return (long)ret; 1259 } 1260 1261 /* 1262 * detach and kill segment if marked destroyed. 1263 * The work is done in shm_close. 1264 */ 1265 SYSCALL_DEFINE1(shmdt, char __user *, shmaddr) 1266 { 1267 struct mm_struct *mm = current->mm; 1268 struct vm_area_struct *vma; 1269 unsigned long addr = (unsigned long)shmaddr; 1270 int retval = -EINVAL; 1271 #ifdef CONFIG_MMU 1272 loff_t size = 0; 1273 struct file *file; 1274 struct vm_area_struct *next; 1275 #endif 1276 1277 if (addr & ~PAGE_MASK) 1278 return retval; 1279 1280 if (down_write_killable(&mm->mmap_sem)) 1281 return -EINTR; 1282 1283 /* 1284 * This function tries to be smart and unmap shm segments that 1285 * were modified by partial mlock or munmap calls: 1286 * - It first determines the size of the shm segment that should be 1287 * unmapped: It searches for a vma that is backed by shm and that 1288 * started at address shmaddr. It records it's size and then unmaps 1289 * it. 1290 * - Then it unmaps all shm vmas that started at shmaddr and that 1291 * are within the initially determined size and that are from the 1292 * same shm segment from which we determined the size. 1293 * Errors from do_munmap are ignored: the function only fails if 1294 * it's called with invalid parameters or if it's called to unmap 1295 * a part of a vma. Both calls in this function are for full vmas, 1296 * the parameters are directly copied from the vma itself and always 1297 * valid - therefore do_munmap cannot fail. (famous last words?) 1298 */ 1299 /* 1300 * If it had been mremap()'d, the starting address would not 1301 * match the usual checks anyway. So assume all vma's are 1302 * above the starting address given. 1303 */ 1304 vma = find_vma(mm, addr); 1305 1306 #ifdef CONFIG_MMU 1307 while (vma) { 1308 next = vma->vm_next; 1309 1310 /* 1311 * Check if the starting address would match, i.e. it's 1312 * a fragment created by mprotect() and/or munmap(), or it 1313 * otherwise it starts at this address with no hassles. 1314 */ 1315 if ((vma->vm_ops == &shm_vm_ops) && 1316 (vma->vm_start - addr)/PAGE_SIZE == vma->vm_pgoff) { 1317 1318 /* 1319 * Record the file of the shm segment being 1320 * unmapped. With mremap(), someone could place 1321 * page from another segment but with equal offsets 1322 * in the range we are unmapping. 1323 */ 1324 file = vma->vm_file; 1325 size = i_size_read(file_inode(vma->vm_file)); 1326 do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start); 1327 /* 1328 * We discovered the size of the shm segment, so 1329 * break out of here and fall through to the next 1330 * loop that uses the size information to stop 1331 * searching for matching vma's. 1332 */ 1333 retval = 0; 1334 vma = next; 1335 break; 1336 } 1337 vma = next; 1338 } 1339 1340 /* 1341 * We need look no further than the maximum address a fragment 1342 * could possibly have landed at. Also cast things to loff_t to 1343 * prevent overflows and make comparisons vs. equal-width types. 1344 */ 1345 size = PAGE_ALIGN(size); 1346 while (vma && (loff_t)(vma->vm_end - addr) <= size) { 1347 next = vma->vm_next; 1348 1349 /* finding a matching vma now does not alter retval */ 1350 if ((vma->vm_ops == &shm_vm_ops) && 1351 ((vma->vm_start - addr)/PAGE_SIZE == vma->vm_pgoff) && 1352 (vma->vm_file == file)) 1353 do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start); 1354 vma = next; 1355 } 1356 1357 #else /* CONFIG_MMU */ 1358 /* under NOMMU conditions, the exact address to be destroyed must be 1359 * given */ 1360 if (vma && vma->vm_start == addr && vma->vm_ops == &shm_vm_ops) { 1361 do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start); 1362 retval = 0; 1363 } 1364 1365 #endif 1366 1367 up_write(&mm->mmap_sem); 1368 return retval; 1369 } 1370 1371 #ifdef CONFIG_PROC_FS 1372 static int sysvipc_shm_proc_show(struct seq_file *s, void *it) 1373 { 1374 struct user_namespace *user_ns = seq_user_ns(s); 1375 struct shmid_kernel *shp = it; 1376 unsigned long rss = 0, swp = 0; 1377 1378 shm_add_rss_swap(shp, &rss, &swp); 1379 1380 #if BITS_PER_LONG <= 32 1381 #define SIZE_SPEC "%10lu" 1382 #else 1383 #define SIZE_SPEC "%21lu" 1384 #endif 1385 1386 seq_printf(s, 1387 "%10d %10d %4o " SIZE_SPEC " %5u %5u " 1388 "%5lu %5u %5u %5u %5u %10lu %10lu %10lu " 1389 SIZE_SPEC " " SIZE_SPEC "\n", 1390 shp->shm_perm.key, 1391 shp->shm_perm.id, 1392 shp->shm_perm.mode, 1393 shp->shm_segsz, 1394 shp->shm_cprid, 1395 shp->shm_lprid, 1396 shp->shm_nattch, 1397 from_kuid_munged(user_ns, shp->shm_perm.uid), 1398 from_kgid_munged(user_ns, shp->shm_perm.gid), 1399 from_kuid_munged(user_ns, shp->shm_perm.cuid), 1400 from_kgid_munged(user_ns, shp->shm_perm.cgid), 1401 shp->shm_atim, 1402 shp->shm_dtim, 1403 shp->shm_ctim, 1404 rss * PAGE_SIZE, 1405 swp * PAGE_SIZE); 1406 1407 return 0; 1408 } 1409 #endif 1410