1 /* 2 * linux/ipc/shm.c 3 * Copyright (C) 1992, 1993 Krishna Balasubramanian 4 * Many improvements/fixes by Bruno Haible. 5 * Replaced `struct shm_desc' by `struct vm_area_struct', July 1994. 6 * Fixed the shm swap deallocation (shm_unuse()), August 1998 Andrea Arcangeli. 7 * 8 * /proc/sysvipc/shm support (c) 1999 Dragos Acostachioaie <dragos@iname.com> 9 * BIGMEM support, Andrea Arcangeli <andrea@suse.de> 10 * SMP thread shm, Jean-Luc Boyard <jean-luc.boyard@siemens.fr> 11 * HIGHMEM support, Ingo Molnar <mingo@redhat.com> 12 * Make shmmax, shmall, shmmni sysctl'able, Christoph Rohland <cr@sap.com> 13 * Shared /dev/zero support, Kanoj Sarcar <kanoj@sgi.com> 14 * Move the mm functionality over to mm/shmem.c, Christoph Rohland <cr@sap.com> 15 * 16 * support for audit of ipc object properties and permission changes 17 * Dustin Kirkland <dustin.kirkland@us.ibm.com> 18 * 19 * namespaces support 20 * OpenVZ, SWsoft Inc. 21 * Pavel Emelianov <xemul@openvz.org> 22 */ 23 24 #include <linux/slab.h> 25 #include <linux/mm.h> 26 #include <linux/hugetlb.h> 27 #include <linux/shm.h> 28 #include <linux/init.h> 29 #include <linux/file.h> 30 #include <linux/mman.h> 31 #include <linux/shmem_fs.h> 32 #include <linux/security.h> 33 #include <linux/syscalls.h> 34 #include <linux/audit.h> 35 #include <linux/capability.h> 36 #include <linux/ptrace.h> 37 #include <linux/seq_file.h> 38 #include <linux/rwsem.h> 39 #include <linux/nsproxy.h> 40 #include <linux/mount.h> 41 #include <linux/ipc_namespace.h> 42 43 #include <asm/uaccess.h> 44 45 #include "util.h" 46 47 struct shm_file_data { 48 int id; 49 struct ipc_namespace *ns; 50 struct file *file; 51 const struct vm_operations_struct *vm_ops; 52 }; 53 54 #define shm_file_data(file) (*((struct shm_file_data **)&(file)->private_data)) 55 56 static const struct file_operations shm_file_operations; 57 static const struct vm_operations_struct shm_vm_ops; 58 59 #define shm_ids(ns) ((ns)->ids[IPC_SHM_IDS]) 60 61 #define shm_unlock(shp) \ 62 ipc_unlock(&(shp)->shm_perm) 63 64 static int newseg(struct ipc_namespace *, struct ipc_params *); 65 static void shm_open(struct vm_area_struct *vma); 66 static void shm_close(struct vm_area_struct *vma); 67 static void shm_destroy (struct ipc_namespace *ns, struct shmid_kernel *shp); 68 #ifdef CONFIG_PROC_FS 69 static int sysvipc_shm_proc_show(struct seq_file *s, void *it); 70 #endif 71 72 void shm_init_ns(struct ipc_namespace *ns) 73 { 74 ns->shm_ctlmax = SHMMAX; 75 ns->shm_ctlall = SHMALL; 76 ns->shm_ctlmni = SHMMNI; 77 ns->shm_rmid_forced = 0; 78 ns->shm_tot = 0; 79 ipc_init_ids(&shm_ids(ns)); 80 } 81 82 /* 83 * Called with shm_ids.rw_mutex (writer) and the shp structure locked. 84 * Only shm_ids.rw_mutex remains locked on exit. 85 */ 86 static void do_shm_rmid(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) 87 { 88 struct shmid_kernel *shp; 89 shp = container_of(ipcp, struct shmid_kernel, shm_perm); 90 91 if (shp->shm_nattch){ 92 shp->shm_perm.mode |= SHM_DEST; 93 /* Do not find it any more */ 94 shp->shm_perm.key = IPC_PRIVATE; 95 shm_unlock(shp); 96 } else 97 shm_destroy(ns, shp); 98 } 99 100 #ifdef CONFIG_IPC_NS 101 void shm_exit_ns(struct ipc_namespace *ns) 102 { 103 free_ipcs(ns, &shm_ids(ns), do_shm_rmid); 104 idr_destroy(&ns->ids[IPC_SHM_IDS].ipcs_idr); 105 } 106 #endif 107 108 void __init shm_init (void) 109 { 110 shm_init_ns(&init_ipc_ns); 111 ipc_init_proc_interface("sysvipc/shm", 112 #if BITS_PER_LONG <= 32 113 " key shmid perms size cpid lpid nattch uid gid cuid cgid atime dtime ctime rss swap\n", 114 #else 115 " key shmid perms size cpid lpid nattch uid gid cuid cgid atime dtime ctime rss swap\n", 116 #endif 117 IPC_SHM_IDS, sysvipc_shm_proc_show); 118 } 119 120 /* 121 * shm_lock_(check_) routines are called in the paths where the rw_mutex 122 * is not necessarily held. 123 */ 124 static inline struct shmid_kernel *shm_lock(struct ipc_namespace *ns, int id) 125 { 126 struct kern_ipc_perm *ipcp = ipc_lock(&shm_ids(ns), id); 127 128 if (IS_ERR(ipcp)) 129 return (struct shmid_kernel *)ipcp; 130 131 return container_of(ipcp, struct shmid_kernel, shm_perm); 132 } 133 134 static inline struct shmid_kernel *shm_lock_check(struct ipc_namespace *ns, 135 int id) 136 { 137 struct kern_ipc_perm *ipcp = ipc_lock_check(&shm_ids(ns), id); 138 139 if (IS_ERR(ipcp)) 140 return (struct shmid_kernel *)ipcp; 141 142 return container_of(ipcp, struct shmid_kernel, shm_perm); 143 } 144 145 static inline void shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *s) 146 { 147 ipc_rmid(&shm_ids(ns), &s->shm_perm); 148 } 149 150 151 /* This is called by fork, once for every shm attach. */ 152 static void shm_open(struct vm_area_struct *vma) 153 { 154 struct file *file = vma->vm_file; 155 struct shm_file_data *sfd = shm_file_data(file); 156 struct shmid_kernel *shp; 157 158 shp = shm_lock(sfd->ns, sfd->id); 159 BUG_ON(IS_ERR(shp)); 160 shp->shm_atim = get_seconds(); 161 shp->shm_lprid = task_tgid_vnr(current); 162 shp->shm_nattch++; 163 shm_unlock(shp); 164 } 165 166 /* 167 * shm_destroy - free the struct shmid_kernel 168 * 169 * @ns: namespace 170 * @shp: struct to free 171 * 172 * It has to be called with shp and shm_ids.rw_mutex (writer) locked, 173 * but returns with shp unlocked and freed. 174 */ 175 static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp) 176 { 177 ns->shm_tot -= (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT; 178 shm_rmid(ns, shp); 179 shm_unlock(shp); 180 if (!is_file_hugepages(shp->shm_file)) 181 shmem_lock(shp->shm_file, 0, shp->mlock_user); 182 else if (shp->mlock_user) 183 user_shm_unlock(shp->shm_file->f_path.dentry->d_inode->i_size, 184 shp->mlock_user); 185 fput (shp->shm_file); 186 security_shm_free(shp); 187 ipc_rcu_putref(shp); 188 } 189 190 /* 191 * shm_may_destroy - identifies whether shm segment should be destroyed now 192 * 193 * Returns true if and only if there are no active users of the segment and 194 * one of the following is true: 195 * 196 * 1) shmctl(id, IPC_RMID, NULL) was called for this shp 197 * 198 * 2) sysctl kernel.shm_rmid_forced is set to 1. 199 */ 200 static bool shm_may_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp) 201 { 202 return (shp->shm_nattch == 0) && 203 (ns->shm_rmid_forced || 204 (shp->shm_perm.mode & SHM_DEST)); 205 } 206 207 /* 208 * remove the attach descriptor vma. 209 * free memory for segment if it is marked destroyed. 210 * The descriptor has already been removed from the current->mm->mmap list 211 * and will later be kfree()d. 212 */ 213 static void shm_close(struct vm_area_struct *vma) 214 { 215 struct file * file = vma->vm_file; 216 struct shm_file_data *sfd = shm_file_data(file); 217 struct shmid_kernel *shp; 218 struct ipc_namespace *ns = sfd->ns; 219 220 down_write(&shm_ids(ns).rw_mutex); 221 /* remove from the list of attaches of the shm segment */ 222 shp = shm_lock(ns, sfd->id); 223 BUG_ON(IS_ERR(shp)); 224 shp->shm_lprid = task_tgid_vnr(current); 225 shp->shm_dtim = get_seconds(); 226 shp->shm_nattch--; 227 if (shm_may_destroy(ns, shp)) 228 shm_destroy(ns, shp); 229 else 230 shm_unlock(shp); 231 up_write(&shm_ids(ns).rw_mutex); 232 } 233 234 static int shm_try_destroy_current(int id, void *p, void *data) 235 { 236 struct ipc_namespace *ns = data; 237 struct shmid_kernel *shp = shm_lock(ns, id); 238 239 if (IS_ERR(shp)) 240 return 0; 241 242 if (shp->shm_cprid != task_tgid_vnr(current)) { 243 shm_unlock(shp); 244 return 0; 245 } 246 247 if (shm_may_destroy(ns, shp)) 248 shm_destroy(ns, shp); 249 else 250 shm_unlock(shp); 251 return 0; 252 } 253 254 static int shm_try_destroy_orphaned(int id, void *p, void *data) 255 { 256 struct ipc_namespace *ns = data; 257 struct shmid_kernel *shp = shm_lock(ns, id); 258 struct task_struct *task; 259 260 if (IS_ERR(shp)) 261 return 0; 262 263 /* 264 * We want to destroy segments without users and with already 265 * exit'ed originating process. 266 * 267 * XXX: the originating process may exist in another pid namespace. 268 */ 269 task = find_task_by_vpid(shp->shm_cprid); 270 if (task != NULL) { 271 shm_unlock(shp); 272 return 0; 273 } 274 275 if (shm_may_destroy(ns, shp)) 276 shm_destroy(ns, shp); 277 else 278 shm_unlock(shp); 279 return 0; 280 } 281 282 void shm_destroy_orphaned(struct ipc_namespace *ns) 283 { 284 down_write(&shm_ids(ns).rw_mutex); 285 idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_orphaned, ns); 286 up_write(&shm_ids(ns).rw_mutex); 287 } 288 289 290 void exit_shm(struct task_struct *task) 291 { 292 struct nsproxy *nsp = task->nsproxy; 293 struct ipc_namespace *ns; 294 295 if (!nsp) 296 return; 297 ns = nsp->ipc_ns; 298 if (!ns || !ns->shm_rmid_forced) 299 return; 300 301 /* Destroy all already created segments, but not mapped yet */ 302 down_write(&shm_ids(ns).rw_mutex); 303 idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_current, ns); 304 up_write(&shm_ids(ns).rw_mutex); 305 } 306 307 static int shm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) 308 { 309 struct file *file = vma->vm_file; 310 struct shm_file_data *sfd = shm_file_data(file); 311 312 return sfd->vm_ops->fault(vma, vmf); 313 } 314 315 #ifdef CONFIG_NUMA 316 static int shm_set_policy(struct vm_area_struct *vma, struct mempolicy *new) 317 { 318 struct file *file = vma->vm_file; 319 struct shm_file_data *sfd = shm_file_data(file); 320 int err = 0; 321 if (sfd->vm_ops->set_policy) 322 err = sfd->vm_ops->set_policy(vma, new); 323 return err; 324 } 325 326 static struct mempolicy *shm_get_policy(struct vm_area_struct *vma, 327 unsigned long addr) 328 { 329 struct file *file = vma->vm_file; 330 struct shm_file_data *sfd = shm_file_data(file); 331 struct mempolicy *pol = NULL; 332 333 if (sfd->vm_ops->get_policy) 334 pol = sfd->vm_ops->get_policy(vma, addr); 335 else if (vma->vm_policy) 336 pol = vma->vm_policy; 337 338 return pol; 339 } 340 #endif 341 342 static int shm_mmap(struct file * file, struct vm_area_struct * vma) 343 { 344 struct shm_file_data *sfd = shm_file_data(file); 345 int ret; 346 347 ret = sfd->file->f_op->mmap(sfd->file, vma); 348 if (ret != 0) 349 return ret; 350 sfd->vm_ops = vma->vm_ops; 351 #ifdef CONFIG_MMU 352 BUG_ON(!sfd->vm_ops->fault); 353 #endif 354 vma->vm_ops = &shm_vm_ops; 355 shm_open(vma); 356 357 return ret; 358 } 359 360 static int shm_release(struct inode *ino, struct file *file) 361 { 362 struct shm_file_data *sfd = shm_file_data(file); 363 364 put_ipc_ns(sfd->ns); 365 shm_file_data(file) = NULL; 366 kfree(sfd); 367 return 0; 368 } 369 370 static int shm_fsync(struct file *file, loff_t start, loff_t end, int datasync) 371 { 372 struct shm_file_data *sfd = shm_file_data(file); 373 374 if (!sfd->file->f_op->fsync) 375 return -EINVAL; 376 return sfd->file->f_op->fsync(sfd->file, start, end, datasync); 377 } 378 379 static unsigned long shm_get_unmapped_area(struct file *file, 380 unsigned long addr, unsigned long len, unsigned long pgoff, 381 unsigned long flags) 382 { 383 struct shm_file_data *sfd = shm_file_data(file); 384 return sfd->file->f_op->get_unmapped_area(sfd->file, addr, len, 385 pgoff, flags); 386 } 387 388 static const struct file_operations shm_file_operations = { 389 .mmap = shm_mmap, 390 .fsync = shm_fsync, 391 .release = shm_release, 392 #ifndef CONFIG_MMU 393 .get_unmapped_area = shm_get_unmapped_area, 394 #endif 395 .llseek = noop_llseek, 396 }; 397 398 static const struct file_operations shm_file_operations_huge = { 399 .mmap = shm_mmap, 400 .fsync = shm_fsync, 401 .release = shm_release, 402 .get_unmapped_area = shm_get_unmapped_area, 403 .llseek = noop_llseek, 404 }; 405 406 int is_file_shm_hugepages(struct file *file) 407 { 408 return file->f_op == &shm_file_operations_huge; 409 } 410 411 static const struct vm_operations_struct shm_vm_ops = { 412 .open = shm_open, /* callback for a new vm-area open */ 413 .close = shm_close, /* callback for when the vm-area is released */ 414 .fault = shm_fault, 415 #if defined(CONFIG_NUMA) 416 .set_policy = shm_set_policy, 417 .get_policy = shm_get_policy, 418 #endif 419 }; 420 421 /** 422 * newseg - Create a new shared memory segment 423 * @ns: namespace 424 * @params: ptr to the structure that contains key, size and shmflg 425 * 426 * Called with shm_ids.rw_mutex held as a writer. 427 */ 428 429 static int newseg(struct ipc_namespace *ns, struct ipc_params *params) 430 { 431 key_t key = params->key; 432 int shmflg = params->flg; 433 size_t size = params->u.size; 434 int error; 435 struct shmid_kernel *shp; 436 int numpages = (size + PAGE_SIZE -1) >> PAGE_SHIFT; 437 struct file * file; 438 char name[13]; 439 int id; 440 vm_flags_t acctflag = 0; 441 442 if (size < SHMMIN || size > ns->shm_ctlmax) 443 return -EINVAL; 444 445 if (ns->shm_tot + numpages > ns->shm_ctlall) 446 return -ENOSPC; 447 448 shp = ipc_rcu_alloc(sizeof(*shp)); 449 if (!shp) 450 return -ENOMEM; 451 452 shp->shm_perm.key = key; 453 shp->shm_perm.mode = (shmflg & S_IRWXUGO); 454 shp->mlock_user = NULL; 455 456 shp->shm_perm.security = NULL; 457 error = security_shm_alloc(shp); 458 if (error) { 459 ipc_rcu_putref(shp); 460 return error; 461 } 462 463 sprintf (name, "SYSV%08x", key); 464 if (shmflg & SHM_HUGETLB) { 465 /* hugetlb_file_setup applies strict accounting */ 466 if (shmflg & SHM_NORESERVE) 467 acctflag = VM_NORESERVE; 468 file = hugetlb_file_setup(name, size, acctflag, 469 &shp->mlock_user, HUGETLB_SHMFS_INODE); 470 } else { 471 /* 472 * Do not allow no accounting for OVERCOMMIT_NEVER, even 473 * if it's asked for. 474 */ 475 if ((shmflg & SHM_NORESERVE) && 476 sysctl_overcommit_memory != OVERCOMMIT_NEVER) 477 acctflag = VM_NORESERVE; 478 file = shmem_file_setup(name, size, acctflag); 479 } 480 error = PTR_ERR(file); 481 if (IS_ERR(file)) 482 goto no_file; 483 484 id = ipc_addid(&shm_ids(ns), &shp->shm_perm, ns->shm_ctlmni); 485 if (id < 0) { 486 error = id; 487 goto no_id; 488 } 489 490 shp->shm_cprid = task_tgid_vnr(current); 491 shp->shm_lprid = 0; 492 shp->shm_atim = shp->shm_dtim = 0; 493 shp->shm_ctim = get_seconds(); 494 shp->shm_segsz = size; 495 shp->shm_nattch = 0; 496 shp->shm_file = file; 497 /* 498 * shmid gets reported as "inode#" in /proc/pid/maps. 499 * proc-ps tools use this. Changing this will break them. 500 */ 501 file->f_dentry->d_inode->i_ino = shp->shm_perm.id; 502 503 ns->shm_tot += numpages; 504 error = shp->shm_perm.id; 505 shm_unlock(shp); 506 return error; 507 508 no_id: 509 if (is_file_hugepages(file) && shp->mlock_user) 510 user_shm_unlock(size, shp->mlock_user); 511 fput(file); 512 no_file: 513 security_shm_free(shp); 514 ipc_rcu_putref(shp); 515 return error; 516 } 517 518 /* 519 * Called with shm_ids.rw_mutex and ipcp locked. 520 */ 521 static inline int shm_security(struct kern_ipc_perm *ipcp, int shmflg) 522 { 523 struct shmid_kernel *shp; 524 525 shp = container_of(ipcp, struct shmid_kernel, shm_perm); 526 return security_shm_associate(shp, shmflg); 527 } 528 529 /* 530 * Called with shm_ids.rw_mutex and ipcp locked. 531 */ 532 static inline int shm_more_checks(struct kern_ipc_perm *ipcp, 533 struct ipc_params *params) 534 { 535 struct shmid_kernel *shp; 536 537 shp = container_of(ipcp, struct shmid_kernel, shm_perm); 538 if (shp->shm_segsz < params->u.size) 539 return -EINVAL; 540 541 return 0; 542 } 543 544 SYSCALL_DEFINE3(shmget, key_t, key, size_t, size, int, shmflg) 545 { 546 struct ipc_namespace *ns; 547 struct ipc_ops shm_ops; 548 struct ipc_params shm_params; 549 550 ns = current->nsproxy->ipc_ns; 551 552 shm_ops.getnew = newseg; 553 shm_ops.associate = shm_security; 554 shm_ops.more_checks = shm_more_checks; 555 556 shm_params.key = key; 557 shm_params.flg = shmflg; 558 shm_params.u.size = size; 559 560 return ipcget(ns, &shm_ids(ns), &shm_ops, &shm_params); 561 } 562 563 static inline unsigned long copy_shmid_to_user(void __user *buf, struct shmid64_ds *in, int version) 564 { 565 switch(version) { 566 case IPC_64: 567 return copy_to_user(buf, in, sizeof(*in)); 568 case IPC_OLD: 569 { 570 struct shmid_ds out; 571 572 memset(&out, 0, sizeof(out)); 573 ipc64_perm_to_ipc_perm(&in->shm_perm, &out.shm_perm); 574 out.shm_segsz = in->shm_segsz; 575 out.shm_atime = in->shm_atime; 576 out.shm_dtime = in->shm_dtime; 577 out.shm_ctime = in->shm_ctime; 578 out.shm_cpid = in->shm_cpid; 579 out.shm_lpid = in->shm_lpid; 580 out.shm_nattch = in->shm_nattch; 581 582 return copy_to_user(buf, &out, sizeof(out)); 583 } 584 default: 585 return -EINVAL; 586 } 587 } 588 589 static inline unsigned long 590 copy_shmid_from_user(struct shmid64_ds *out, void __user *buf, int version) 591 { 592 switch(version) { 593 case IPC_64: 594 if (copy_from_user(out, buf, sizeof(*out))) 595 return -EFAULT; 596 return 0; 597 case IPC_OLD: 598 { 599 struct shmid_ds tbuf_old; 600 601 if (copy_from_user(&tbuf_old, buf, sizeof(tbuf_old))) 602 return -EFAULT; 603 604 out->shm_perm.uid = tbuf_old.shm_perm.uid; 605 out->shm_perm.gid = tbuf_old.shm_perm.gid; 606 out->shm_perm.mode = tbuf_old.shm_perm.mode; 607 608 return 0; 609 } 610 default: 611 return -EINVAL; 612 } 613 } 614 615 static inline unsigned long copy_shminfo_to_user(void __user *buf, struct shminfo64 *in, int version) 616 { 617 switch(version) { 618 case IPC_64: 619 return copy_to_user(buf, in, sizeof(*in)); 620 case IPC_OLD: 621 { 622 struct shminfo out; 623 624 if(in->shmmax > INT_MAX) 625 out.shmmax = INT_MAX; 626 else 627 out.shmmax = (int)in->shmmax; 628 629 out.shmmin = in->shmmin; 630 out.shmmni = in->shmmni; 631 out.shmseg = in->shmseg; 632 out.shmall = in->shmall; 633 634 return copy_to_user(buf, &out, sizeof(out)); 635 } 636 default: 637 return -EINVAL; 638 } 639 } 640 641 /* 642 * Calculate and add used RSS and swap pages of a shm. 643 * Called with shm_ids.rw_mutex held as a reader 644 */ 645 static void shm_add_rss_swap(struct shmid_kernel *shp, 646 unsigned long *rss_add, unsigned long *swp_add) 647 { 648 struct inode *inode; 649 650 inode = shp->shm_file->f_path.dentry->d_inode; 651 652 if (is_file_hugepages(shp->shm_file)) { 653 struct address_space *mapping = inode->i_mapping; 654 struct hstate *h = hstate_file(shp->shm_file); 655 *rss_add += pages_per_huge_page(h) * mapping->nrpages; 656 } else { 657 #ifdef CONFIG_SHMEM 658 struct shmem_inode_info *info = SHMEM_I(inode); 659 spin_lock(&info->lock); 660 *rss_add += inode->i_mapping->nrpages; 661 *swp_add += info->swapped; 662 spin_unlock(&info->lock); 663 #else 664 *rss_add += inode->i_mapping->nrpages; 665 #endif 666 } 667 } 668 669 /* 670 * Called with shm_ids.rw_mutex held as a reader 671 */ 672 static void shm_get_stat(struct ipc_namespace *ns, unsigned long *rss, 673 unsigned long *swp) 674 { 675 int next_id; 676 int total, in_use; 677 678 *rss = 0; 679 *swp = 0; 680 681 in_use = shm_ids(ns).in_use; 682 683 for (total = 0, next_id = 0; total < in_use; next_id++) { 684 struct kern_ipc_perm *ipc; 685 struct shmid_kernel *shp; 686 687 ipc = idr_find(&shm_ids(ns).ipcs_idr, next_id); 688 if (ipc == NULL) 689 continue; 690 shp = container_of(ipc, struct shmid_kernel, shm_perm); 691 692 shm_add_rss_swap(shp, rss, swp); 693 694 total++; 695 } 696 } 697 698 /* 699 * This function handles some shmctl commands which require the rw_mutex 700 * to be held in write mode. 701 * NOTE: no locks must be held, the rw_mutex is taken inside this function. 702 */ 703 static int shmctl_down(struct ipc_namespace *ns, int shmid, int cmd, 704 struct shmid_ds __user *buf, int version) 705 { 706 struct kern_ipc_perm *ipcp; 707 struct shmid64_ds shmid64; 708 struct shmid_kernel *shp; 709 int err; 710 711 if (cmd == IPC_SET) { 712 if (copy_shmid_from_user(&shmid64, buf, version)) 713 return -EFAULT; 714 } 715 716 ipcp = ipcctl_pre_down(ns, &shm_ids(ns), shmid, cmd, 717 &shmid64.shm_perm, 0); 718 if (IS_ERR(ipcp)) 719 return PTR_ERR(ipcp); 720 721 shp = container_of(ipcp, struct shmid_kernel, shm_perm); 722 723 err = security_shm_shmctl(shp, cmd); 724 if (err) 725 goto out_unlock; 726 switch (cmd) { 727 case IPC_RMID: 728 do_shm_rmid(ns, ipcp); 729 goto out_up; 730 case IPC_SET: 731 ipc_update_perm(&shmid64.shm_perm, ipcp); 732 shp->shm_ctim = get_seconds(); 733 break; 734 default: 735 err = -EINVAL; 736 } 737 out_unlock: 738 shm_unlock(shp); 739 out_up: 740 up_write(&shm_ids(ns).rw_mutex); 741 return err; 742 } 743 744 SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf) 745 { 746 struct shmid_kernel *shp; 747 int err, version; 748 struct ipc_namespace *ns; 749 750 if (cmd < 0 || shmid < 0) { 751 err = -EINVAL; 752 goto out; 753 } 754 755 version = ipc_parse_version(&cmd); 756 ns = current->nsproxy->ipc_ns; 757 758 switch (cmd) { /* replace with proc interface ? */ 759 case IPC_INFO: 760 { 761 struct shminfo64 shminfo; 762 763 err = security_shm_shmctl(NULL, cmd); 764 if (err) 765 return err; 766 767 memset(&shminfo, 0, sizeof(shminfo)); 768 shminfo.shmmni = shminfo.shmseg = ns->shm_ctlmni; 769 shminfo.shmmax = ns->shm_ctlmax; 770 shminfo.shmall = ns->shm_ctlall; 771 772 shminfo.shmmin = SHMMIN; 773 if(copy_shminfo_to_user (buf, &shminfo, version)) 774 return -EFAULT; 775 776 down_read(&shm_ids(ns).rw_mutex); 777 err = ipc_get_maxid(&shm_ids(ns)); 778 up_read(&shm_ids(ns).rw_mutex); 779 780 if(err<0) 781 err = 0; 782 goto out; 783 } 784 case SHM_INFO: 785 { 786 struct shm_info shm_info; 787 788 err = security_shm_shmctl(NULL, cmd); 789 if (err) 790 return err; 791 792 memset(&shm_info, 0, sizeof(shm_info)); 793 down_read(&shm_ids(ns).rw_mutex); 794 shm_info.used_ids = shm_ids(ns).in_use; 795 shm_get_stat (ns, &shm_info.shm_rss, &shm_info.shm_swp); 796 shm_info.shm_tot = ns->shm_tot; 797 shm_info.swap_attempts = 0; 798 shm_info.swap_successes = 0; 799 err = ipc_get_maxid(&shm_ids(ns)); 800 up_read(&shm_ids(ns).rw_mutex); 801 if (copy_to_user(buf, &shm_info, sizeof(shm_info))) { 802 err = -EFAULT; 803 goto out; 804 } 805 806 err = err < 0 ? 0 : err; 807 goto out; 808 } 809 case SHM_STAT: 810 case IPC_STAT: 811 { 812 struct shmid64_ds tbuf; 813 int result; 814 815 if (cmd == SHM_STAT) { 816 shp = shm_lock(ns, shmid); 817 if (IS_ERR(shp)) { 818 err = PTR_ERR(shp); 819 goto out; 820 } 821 result = shp->shm_perm.id; 822 } else { 823 shp = shm_lock_check(ns, shmid); 824 if (IS_ERR(shp)) { 825 err = PTR_ERR(shp); 826 goto out; 827 } 828 result = 0; 829 } 830 err = -EACCES; 831 if (ipcperms(ns, &shp->shm_perm, S_IRUGO)) 832 goto out_unlock; 833 err = security_shm_shmctl(shp, cmd); 834 if (err) 835 goto out_unlock; 836 memset(&tbuf, 0, sizeof(tbuf)); 837 kernel_to_ipc64_perm(&shp->shm_perm, &tbuf.shm_perm); 838 tbuf.shm_segsz = shp->shm_segsz; 839 tbuf.shm_atime = shp->shm_atim; 840 tbuf.shm_dtime = shp->shm_dtim; 841 tbuf.shm_ctime = shp->shm_ctim; 842 tbuf.shm_cpid = shp->shm_cprid; 843 tbuf.shm_lpid = shp->shm_lprid; 844 tbuf.shm_nattch = shp->shm_nattch; 845 shm_unlock(shp); 846 if(copy_shmid_to_user (buf, &tbuf, version)) 847 err = -EFAULT; 848 else 849 err = result; 850 goto out; 851 } 852 case SHM_LOCK: 853 case SHM_UNLOCK: 854 { 855 struct file *uninitialized_var(shm_file); 856 857 lru_add_drain_all(); /* drain pagevecs to lru lists */ 858 859 shp = shm_lock_check(ns, shmid); 860 if (IS_ERR(shp)) { 861 err = PTR_ERR(shp); 862 goto out; 863 } 864 865 audit_ipc_obj(&(shp->shm_perm)); 866 867 if (!ns_capable(ns->user_ns, CAP_IPC_LOCK)) { 868 uid_t euid = current_euid(); 869 err = -EPERM; 870 if (euid != shp->shm_perm.uid && 871 euid != shp->shm_perm.cuid) 872 goto out_unlock; 873 if (cmd == SHM_LOCK && !rlimit(RLIMIT_MEMLOCK)) 874 goto out_unlock; 875 } 876 877 err = security_shm_shmctl(shp, cmd); 878 if (err) 879 goto out_unlock; 880 881 if(cmd==SHM_LOCK) { 882 struct user_struct *user = current_user(); 883 if (!is_file_hugepages(shp->shm_file)) { 884 err = shmem_lock(shp->shm_file, 1, user); 885 if (!err && !(shp->shm_perm.mode & SHM_LOCKED)){ 886 shp->shm_perm.mode |= SHM_LOCKED; 887 shp->mlock_user = user; 888 } 889 } 890 } else if (!is_file_hugepages(shp->shm_file)) { 891 shmem_lock(shp->shm_file, 0, shp->mlock_user); 892 shp->shm_perm.mode &= ~SHM_LOCKED; 893 shp->mlock_user = NULL; 894 } 895 shm_unlock(shp); 896 goto out; 897 } 898 case IPC_RMID: 899 case IPC_SET: 900 err = shmctl_down(ns, shmid, cmd, buf, version); 901 return err; 902 default: 903 return -EINVAL; 904 } 905 906 out_unlock: 907 shm_unlock(shp); 908 out: 909 return err; 910 } 911 912 /* 913 * Fix shmaddr, allocate descriptor, map shm, add attach descriptor to lists. 914 * 915 * NOTE! Despite the name, this is NOT a direct system call entrypoint. The 916 * "raddr" thing points to kernel space, and there has to be a wrapper around 917 * this. 918 */ 919 long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr) 920 { 921 struct shmid_kernel *shp; 922 unsigned long addr; 923 unsigned long size; 924 struct file * file; 925 int err; 926 unsigned long flags; 927 unsigned long prot; 928 int acc_mode; 929 unsigned long user_addr; 930 struct ipc_namespace *ns; 931 struct shm_file_data *sfd; 932 struct path path; 933 fmode_t f_mode; 934 935 err = -EINVAL; 936 if (shmid < 0) 937 goto out; 938 else if ((addr = (ulong)shmaddr)) { 939 if (addr & (SHMLBA-1)) { 940 if (shmflg & SHM_RND) 941 addr &= ~(SHMLBA-1); /* round down */ 942 else 943 #ifndef __ARCH_FORCE_SHMLBA 944 if (addr & ~PAGE_MASK) 945 #endif 946 goto out; 947 } 948 flags = MAP_SHARED | MAP_FIXED; 949 } else { 950 if ((shmflg & SHM_REMAP)) 951 goto out; 952 953 flags = MAP_SHARED; 954 } 955 956 if (shmflg & SHM_RDONLY) { 957 prot = PROT_READ; 958 acc_mode = S_IRUGO; 959 f_mode = FMODE_READ; 960 } else { 961 prot = PROT_READ | PROT_WRITE; 962 acc_mode = S_IRUGO | S_IWUGO; 963 f_mode = FMODE_READ | FMODE_WRITE; 964 } 965 if (shmflg & SHM_EXEC) { 966 prot |= PROT_EXEC; 967 acc_mode |= S_IXUGO; 968 } 969 970 /* 971 * We cannot rely on the fs check since SYSV IPC does have an 972 * additional creator id... 973 */ 974 ns = current->nsproxy->ipc_ns; 975 shp = shm_lock_check(ns, shmid); 976 if (IS_ERR(shp)) { 977 err = PTR_ERR(shp); 978 goto out; 979 } 980 981 err = -EACCES; 982 if (ipcperms(ns, &shp->shm_perm, acc_mode)) 983 goto out_unlock; 984 985 err = security_shm_shmat(shp, shmaddr, shmflg); 986 if (err) 987 goto out_unlock; 988 989 path = shp->shm_file->f_path; 990 path_get(&path); 991 shp->shm_nattch++; 992 size = i_size_read(path.dentry->d_inode); 993 shm_unlock(shp); 994 995 err = -ENOMEM; 996 sfd = kzalloc(sizeof(*sfd), GFP_KERNEL); 997 if (!sfd) 998 goto out_put_dentry; 999 1000 file = alloc_file(&path, f_mode, 1001 is_file_hugepages(shp->shm_file) ? 1002 &shm_file_operations_huge : 1003 &shm_file_operations); 1004 if (!file) 1005 goto out_free; 1006 1007 file->private_data = sfd; 1008 file->f_mapping = shp->shm_file->f_mapping; 1009 sfd->id = shp->shm_perm.id; 1010 sfd->ns = get_ipc_ns(ns); 1011 sfd->file = shp->shm_file; 1012 sfd->vm_ops = NULL; 1013 1014 down_write(¤t->mm->mmap_sem); 1015 if (addr && !(shmflg & SHM_REMAP)) { 1016 err = -EINVAL; 1017 if (find_vma_intersection(current->mm, addr, addr + size)) 1018 goto invalid; 1019 /* 1020 * If shm segment goes below stack, make sure there is some 1021 * space left for the stack to grow (at least 4 pages). 1022 */ 1023 if (addr < current->mm->start_stack && 1024 addr > current->mm->start_stack - size - PAGE_SIZE * 5) 1025 goto invalid; 1026 } 1027 1028 user_addr = do_mmap (file, addr, size, prot, flags, 0); 1029 *raddr = user_addr; 1030 err = 0; 1031 if (IS_ERR_VALUE(user_addr)) 1032 err = (long)user_addr; 1033 invalid: 1034 up_write(¤t->mm->mmap_sem); 1035 1036 fput(file); 1037 1038 out_nattch: 1039 down_write(&shm_ids(ns).rw_mutex); 1040 shp = shm_lock(ns, shmid); 1041 BUG_ON(IS_ERR(shp)); 1042 shp->shm_nattch--; 1043 if (shm_may_destroy(ns, shp)) 1044 shm_destroy(ns, shp); 1045 else 1046 shm_unlock(shp); 1047 up_write(&shm_ids(ns).rw_mutex); 1048 1049 out: 1050 return err; 1051 1052 out_unlock: 1053 shm_unlock(shp); 1054 goto out; 1055 1056 out_free: 1057 kfree(sfd); 1058 out_put_dentry: 1059 path_put(&path); 1060 goto out_nattch; 1061 } 1062 1063 SYSCALL_DEFINE3(shmat, int, shmid, char __user *, shmaddr, int, shmflg) 1064 { 1065 unsigned long ret; 1066 long err; 1067 1068 err = do_shmat(shmid, shmaddr, shmflg, &ret); 1069 if (err) 1070 return err; 1071 force_successful_syscall_return(); 1072 return (long)ret; 1073 } 1074 1075 /* 1076 * detach and kill segment if marked destroyed. 1077 * The work is done in shm_close. 1078 */ 1079 SYSCALL_DEFINE1(shmdt, char __user *, shmaddr) 1080 { 1081 struct mm_struct *mm = current->mm; 1082 struct vm_area_struct *vma; 1083 unsigned long addr = (unsigned long)shmaddr; 1084 int retval = -EINVAL; 1085 #ifdef CONFIG_MMU 1086 loff_t size = 0; 1087 struct vm_area_struct *next; 1088 #endif 1089 1090 if (addr & ~PAGE_MASK) 1091 return retval; 1092 1093 down_write(&mm->mmap_sem); 1094 1095 /* 1096 * This function tries to be smart and unmap shm segments that 1097 * were modified by partial mlock or munmap calls: 1098 * - It first determines the size of the shm segment that should be 1099 * unmapped: It searches for a vma that is backed by shm and that 1100 * started at address shmaddr. It records it's size and then unmaps 1101 * it. 1102 * - Then it unmaps all shm vmas that started at shmaddr and that 1103 * are within the initially determined size. 1104 * Errors from do_munmap are ignored: the function only fails if 1105 * it's called with invalid parameters or if it's called to unmap 1106 * a part of a vma. Both calls in this function are for full vmas, 1107 * the parameters are directly copied from the vma itself and always 1108 * valid - therefore do_munmap cannot fail. (famous last words?) 1109 */ 1110 /* 1111 * If it had been mremap()'d, the starting address would not 1112 * match the usual checks anyway. So assume all vma's are 1113 * above the starting address given. 1114 */ 1115 vma = find_vma(mm, addr); 1116 1117 #ifdef CONFIG_MMU 1118 while (vma) { 1119 next = vma->vm_next; 1120 1121 /* 1122 * Check if the starting address would match, i.e. it's 1123 * a fragment created by mprotect() and/or munmap(), or it 1124 * otherwise it starts at this address with no hassles. 1125 */ 1126 if ((vma->vm_ops == &shm_vm_ops) && 1127 (vma->vm_start - addr)/PAGE_SIZE == vma->vm_pgoff) { 1128 1129 1130 size = vma->vm_file->f_path.dentry->d_inode->i_size; 1131 do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start); 1132 /* 1133 * We discovered the size of the shm segment, so 1134 * break out of here and fall through to the next 1135 * loop that uses the size information to stop 1136 * searching for matching vma's. 1137 */ 1138 retval = 0; 1139 vma = next; 1140 break; 1141 } 1142 vma = next; 1143 } 1144 1145 /* 1146 * We need look no further than the maximum address a fragment 1147 * could possibly have landed at. Also cast things to loff_t to 1148 * prevent overflows and make comparisons vs. equal-width types. 1149 */ 1150 size = PAGE_ALIGN(size); 1151 while (vma && (loff_t)(vma->vm_end - addr) <= size) { 1152 next = vma->vm_next; 1153 1154 /* finding a matching vma now does not alter retval */ 1155 if ((vma->vm_ops == &shm_vm_ops) && 1156 (vma->vm_start - addr)/PAGE_SIZE == vma->vm_pgoff) 1157 1158 do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start); 1159 vma = next; 1160 } 1161 1162 #else /* CONFIG_MMU */ 1163 /* under NOMMU conditions, the exact address to be destroyed must be 1164 * given */ 1165 retval = -EINVAL; 1166 if (vma->vm_start == addr && vma->vm_ops == &shm_vm_ops) { 1167 do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start); 1168 retval = 0; 1169 } 1170 1171 #endif 1172 1173 up_write(&mm->mmap_sem); 1174 return retval; 1175 } 1176 1177 #ifdef CONFIG_PROC_FS 1178 static int sysvipc_shm_proc_show(struct seq_file *s, void *it) 1179 { 1180 struct shmid_kernel *shp = it; 1181 unsigned long rss = 0, swp = 0; 1182 1183 shm_add_rss_swap(shp, &rss, &swp); 1184 1185 #if BITS_PER_LONG <= 32 1186 #define SIZE_SPEC "%10lu" 1187 #else 1188 #define SIZE_SPEC "%21lu" 1189 #endif 1190 1191 return seq_printf(s, 1192 "%10d %10d %4o " SIZE_SPEC " %5u %5u " 1193 "%5lu %5u %5u %5u %5u %10lu %10lu %10lu " 1194 SIZE_SPEC " " SIZE_SPEC "\n", 1195 shp->shm_perm.key, 1196 shp->shm_perm.id, 1197 shp->shm_perm.mode, 1198 shp->shm_segsz, 1199 shp->shm_cprid, 1200 shp->shm_lprid, 1201 shp->shm_nattch, 1202 shp->shm_perm.uid, 1203 shp->shm_perm.gid, 1204 shp->shm_perm.cuid, 1205 shp->shm_perm.cgid, 1206 shp->shm_atim, 1207 shp->shm_dtim, 1208 shp->shm_ctim, 1209 rss * PAGE_SIZE, 1210 swp * PAGE_SIZE); 1211 } 1212 #endif 1213