xref: /linux/ipc/shm.c (revision 2e5c6f4fd4001562781e99bbfc7f1f0127187542)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * linux/ipc/shm.c
4  * Copyright (C) 1992, 1993 Krishna Balasubramanian
5  *	 Many improvements/fixes by Bruno Haible.
6  * Replaced `struct shm_desc' by `struct vm_area_struct', July 1994.
7  * Fixed the shm swap deallocation (shm_unuse()), August 1998 Andrea Arcangeli.
8  *
9  * /proc/sysvipc/shm support (c) 1999 Dragos Acostachioaie <dragos@iname.com>
10  * BIGMEM support, Andrea Arcangeli <andrea@suse.de>
11  * SMP thread shm, Jean-Luc Boyard <jean-luc.boyard@siemens.fr>
12  * HIGHMEM support, Ingo Molnar <mingo@redhat.com>
13  * Make shmmax, shmall, shmmni sysctl'able, Christoph Rohland <cr@sap.com>
14  * Shared /dev/zero support, Kanoj Sarcar <kanoj@sgi.com>
15  * Move the mm functionality over to mm/shmem.c, Christoph Rohland <cr@sap.com>
16  *
17  * support for audit of ipc object properties and permission changes
18  * Dustin Kirkland <dustin.kirkland@us.ibm.com>
19  *
20  * namespaces support
21  * OpenVZ, SWsoft Inc.
22  * Pavel Emelianov <xemul@openvz.org>
23  *
24  * Better ipc lock (kern_ipc_perm.lock) handling
25  * Davidlohr Bueso <davidlohr.bueso@hp.com>, June 2013.
26  */
27 
28 #include <linux/slab.h>
29 #include <linux/mm.h>
30 #include <linux/hugetlb.h>
31 #include <linux/shm.h>
32 #include <uapi/linux/shm.h>
33 #include <linux/init.h>
34 #include <linux/file.h>
35 #include <linux/mman.h>
36 #include <linux/shmem_fs.h>
37 #include <linux/security.h>
38 #include <linux/syscalls.h>
39 #include <linux/audit.h>
40 #include <linux/capability.h>
41 #include <linux/ptrace.h>
42 #include <linux/seq_file.h>
43 #include <linux/rwsem.h>
44 #include <linux/nsproxy.h>
45 #include <linux/mount.h>
46 #include <linux/ipc_namespace.h>
47 #include <linux/rhashtable.h>
48 #include <linux/nstree.h>
49 
50 #include <linux/uaccess.h>
51 
52 #include "util.h"
53 
54 struct shmid_kernel /* private to the kernel */
55 {
56 	struct kern_ipc_perm	shm_perm;
57 	struct file		*shm_file;
58 	unsigned long		shm_nattch;
59 	unsigned long		shm_segsz;
60 	time64_t		shm_atim;
61 	time64_t		shm_dtim;
62 	time64_t		shm_ctim;
63 	struct pid		*shm_cprid;
64 	struct pid		*shm_lprid;
65 	struct ucounts		*mlock_ucounts;
66 
67 	/*
68 	 * The task created the shm object, for
69 	 * task_lock(shp->shm_creator)
70 	 */
71 	struct task_struct	*shm_creator;
72 
73 	/*
74 	 * List by creator. task_lock(->shm_creator) required for read/write.
75 	 * If list_empty(), then the creator is dead already.
76 	 */
77 	struct list_head	shm_clist;
78 	struct ipc_namespace	*ns;
79 } __randomize_layout;
80 
81 /* shm_mode upper byte flags */
82 #define SHM_DEST	01000	/* segment will be destroyed on last detach */
83 #define SHM_LOCKED	02000   /* segment will not be swapped */
84 
85 struct shm_file_data {
86 	int id;
87 	struct ipc_namespace *ns;
88 	struct file *file;
89 	const struct vm_operations_struct *vm_ops;
90 };
91 
92 #define shm_file_data(file) (*((struct shm_file_data **)&(file)->private_data))
93 
94 static const struct file_operations shm_file_operations;
95 static const struct vm_operations_struct shm_vm_ops;
96 
97 #define shm_ids(ns)	((ns)->ids[IPC_SHM_IDS])
98 
99 #define shm_unlock(shp)			\
100 	ipc_unlock(&(shp)->shm_perm)
101 
102 static int newseg(struct ipc_namespace *, struct ipc_params *);
103 static void shm_open(struct vm_area_struct *vma);
104 static void shm_close(struct vm_area_struct *vma);
105 static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp);
106 #ifdef CONFIG_PROC_FS
107 static int sysvipc_shm_proc_show(struct seq_file *s, void *it);
108 #endif
109 
110 void shm_init_ns(struct ipc_namespace *ns)
111 {
112 	ns->shm_ctlmax = SHMMAX;
113 	ns->shm_ctlall = SHMALL;
114 	ns->shm_ctlmni = SHMMNI;
115 	ns->shm_rmid_forced = 0;
116 	ns->shm_tot = 0;
117 	ipc_init_ids(&shm_ids(ns));
118 }
119 
120 /*
121  * Called with shm_ids.rwsem (writer) and the shp structure locked.
122  * Only shm_ids.rwsem remains locked on exit.
123  */
124 static void do_shm_rmid(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
125 {
126 	struct shmid_kernel *shp;
127 
128 	shp = container_of(ipcp, struct shmid_kernel, shm_perm);
129 	WARN_ON(ns != shp->ns);
130 
131 	if (shp->shm_nattch) {
132 		shp->shm_perm.mode |= SHM_DEST;
133 		/* Do not find it any more */
134 		ipc_set_key_private(&shm_ids(ns), &shp->shm_perm);
135 		shm_unlock(shp);
136 	} else
137 		shm_destroy(ns, shp);
138 }
139 
140 #ifdef CONFIG_IPC_NS
141 void shm_exit_ns(struct ipc_namespace *ns)
142 {
143 	free_ipcs(ns, &shm_ids(ns), do_shm_rmid);
144 	idr_destroy(&ns->ids[IPC_SHM_IDS].ipcs_idr);
145 	rhashtable_destroy(&ns->ids[IPC_SHM_IDS].key_ht);
146 }
147 #endif
148 
149 static int __init ipc_ns_init(void)
150 {
151 	shm_init_ns(&init_ipc_ns);
152 	ns_tree_add(&init_ipc_ns);
153 	return 0;
154 }
155 
156 pure_initcall(ipc_ns_init);
157 
158 void __init shm_init(void)
159 {
160 	ipc_init_proc_interface("sysvipc/shm",
161 #if BITS_PER_LONG <= 32
162 				"       key      shmid perms       size  cpid  lpid nattch   uid   gid  cuid  cgid      atime      dtime      ctime        rss       swap\n",
163 #else
164 				"       key      shmid perms                  size  cpid  lpid nattch   uid   gid  cuid  cgid      atime      dtime      ctime                   rss                  swap\n",
165 #endif
166 				IPC_SHM_IDS, sysvipc_shm_proc_show);
167 }
168 
169 static inline struct shmid_kernel *shm_obtain_object(struct ipc_namespace *ns, int id)
170 {
171 	struct kern_ipc_perm *ipcp = ipc_obtain_object_idr(&shm_ids(ns), id);
172 
173 	if (IS_ERR(ipcp))
174 		return ERR_CAST(ipcp);
175 
176 	return container_of(ipcp, struct shmid_kernel, shm_perm);
177 }
178 
179 static inline struct shmid_kernel *shm_obtain_object_check(struct ipc_namespace *ns, int id)
180 {
181 	struct kern_ipc_perm *ipcp = ipc_obtain_object_check(&shm_ids(ns), id);
182 
183 	if (IS_ERR(ipcp))
184 		return ERR_CAST(ipcp);
185 
186 	return container_of(ipcp, struct shmid_kernel, shm_perm);
187 }
188 
189 /*
190  * shm_lock_(check_) routines are called in the paths where the rwsem
191  * is not necessarily held.
192  */
193 static inline struct shmid_kernel *shm_lock(struct ipc_namespace *ns, int id)
194 {
195 	struct kern_ipc_perm *ipcp;
196 
197 	rcu_read_lock();
198 	ipcp = ipc_obtain_object_idr(&shm_ids(ns), id);
199 	if (IS_ERR(ipcp))
200 		goto err;
201 
202 	ipc_lock_object(ipcp);
203 	/*
204 	 * ipc_rmid() may have already freed the ID while ipc_lock_object()
205 	 * was spinning: here verify that the structure is still valid.
206 	 * Upon races with RMID, return -EIDRM, thus indicating that
207 	 * the ID points to a removed identifier.
208 	 */
209 	if (ipc_valid_object(ipcp)) {
210 		/* return a locked ipc object upon success */
211 		return container_of(ipcp, struct shmid_kernel, shm_perm);
212 	}
213 
214 	ipc_unlock_object(ipcp);
215 	ipcp = ERR_PTR(-EIDRM);
216 err:
217 	rcu_read_unlock();
218 	/*
219 	 * Callers of shm_lock() must validate the status of the returned ipc
220 	 * object pointer and error out as appropriate.
221 	 */
222 	return ERR_CAST(ipcp);
223 }
224 
225 static inline void shm_lock_by_ptr(struct shmid_kernel *ipcp)
226 {
227 	rcu_read_lock();
228 	ipc_lock_object(&ipcp->shm_perm);
229 }
230 
231 static void shm_rcu_free(struct rcu_head *head)
232 {
233 	struct kern_ipc_perm *ptr = container_of(head, struct kern_ipc_perm,
234 							rcu);
235 	struct shmid_kernel *shp = container_of(ptr, struct shmid_kernel,
236 							shm_perm);
237 	security_shm_free(&shp->shm_perm);
238 	kfree(shp);
239 }
240 
241 /*
242  * It has to be called with shp locked.
243  * It must be called before ipc_rmid()
244  */
245 static inline void shm_clist_rm(struct shmid_kernel *shp)
246 {
247 	struct task_struct *creator;
248 
249 	/* ensure that shm_creator does not disappear */
250 	rcu_read_lock();
251 
252 	/*
253 	 * A concurrent exit_shm may do a list_del_init() as well.
254 	 * Just do nothing if exit_shm already did the work
255 	 */
256 	if (!list_empty(&shp->shm_clist)) {
257 		/*
258 		 * shp->shm_creator is guaranteed to be valid *only*
259 		 * if shp->shm_clist is not empty.
260 		 */
261 		creator = shp->shm_creator;
262 
263 		task_lock(creator);
264 		/*
265 		 * list_del_init() is a nop if the entry was already removed
266 		 * from the list.
267 		 */
268 		list_del_init(&shp->shm_clist);
269 		task_unlock(creator);
270 	}
271 	rcu_read_unlock();
272 }
273 
274 static inline void shm_rmid(struct shmid_kernel *s)
275 {
276 	shm_clist_rm(s);
277 	ipc_rmid(&shm_ids(s->ns), &s->shm_perm);
278 }
279 
280 
281 static int __shm_open(struct shm_file_data *sfd)
282 {
283 	struct shmid_kernel *shp;
284 
285 	shp = shm_lock(sfd->ns, sfd->id);
286 
287 	if (IS_ERR(shp))
288 		return PTR_ERR(shp);
289 
290 	if (shp->shm_file != sfd->file) {
291 		/* ID was reused */
292 		shm_unlock(shp);
293 		return -EINVAL;
294 	}
295 
296 	shp->shm_atim = ktime_get_real_seconds();
297 	ipc_update_pid(&shp->shm_lprid, task_tgid(current));
298 	shp->shm_nattch++;
299 	shm_unlock(shp);
300 	return 0;
301 }
302 
303 /* This is called by fork, once for every shm attach. */
304 static void shm_open(struct vm_area_struct *vma)
305 {
306 	struct file *file = vma->vm_file;
307 	struct shm_file_data *sfd = shm_file_data(file);
308 	int err;
309 
310 	/* Always call underlying open if present */
311 	if (sfd->vm_ops->open)
312 		sfd->vm_ops->open(vma);
313 
314 	err = __shm_open(sfd);
315 	/*
316 	 * We raced in the idr lookup or with shm_destroy().
317 	 * Either way, the ID is busted.
318 	 */
319 	WARN_ON_ONCE(err);
320 }
321 
322 /*
323  * shm_destroy - free the struct shmid_kernel
324  *
325  * @ns: namespace
326  * @shp: struct to free
327  *
328  * It has to be called with shp and shm_ids.rwsem (writer) locked,
329  * but returns with shp unlocked and freed.
330  */
331 static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
332 {
333 	struct file *shm_file;
334 
335 	shm_file = shp->shm_file;
336 	shp->shm_file = NULL;
337 	ns->shm_tot -= (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT;
338 	shm_rmid(shp);
339 	shm_unlock(shp);
340 	if (!is_file_hugepages(shm_file))
341 		shmem_lock(shm_file, 0, shp->mlock_ucounts);
342 	fput(shm_file);
343 	ipc_update_pid(&shp->shm_cprid, NULL);
344 	ipc_update_pid(&shp->shm_lprid, NULL);
345 	ipc_rcu_putref(&shp->shm_perm, shm_rcu_free);
346 }
347 
348 /*
349  * shm_may_destroy - identifies whether shm segment should be destroyed now
350  *
351  * Returns true if and only if there are no active users of the segment and
352  * one of the following is true:
353  *
354  * 1) shmctl(id, IPC_RMID, NULL) was called for this shp
355  *
356  * 2) sysctl kernel.shm_rmid_forced is set to 1.
357  */
358 static bool shm_may_destroy(struct shmid_kernel *shp)
359 {
360 	return (shp->shm_nattch == 0) &&
361 	       (shp->ns->shm_rmid_forced ||
362 		(shp->shm_perm.mode & SHM_DEST));
363 }
364 
365 /*
366  * remove the attach descriptor vma.
367  * free memory for segment if it is marked destroyed.
368  * The descriptor has already been removed from the current->mm->mmap list
369  * and will later be kfree()d.
370  */
371 static void __shm_close(struct shm_file_data *sfd)
372 {
373 	struct shmid_kernel *shp;
374 	struct ipc_namespace *ns = sfd->ns;
375 
376 	down_write(&shm_ids(ns).rwsem);
377 	/* remove from the list of attaches of the shm segment */
378 	shp = shm_lock(ns, sfd->id);
379 
380 	/*
381 	 * We raced in the idr lookup or with shm_destroy().
382 	 * Either way, the ID is busted.
383 	 */
384 	if (WARN_ON_ONCE(IS_ERR(shp)))
385 		goto done; /* no-op */
386 
387 	ipc_update_pid(&shp->shm_lprid, task_tgid(current));
388 	shp->shm_dtim = ktime_get_real_seconds();
389 	shp->shm_nattch--;
390 	if (shm_may_destroy(shp))
391 		shm_destroy(ns, shp);
392 	else
393 		shm_unlock(shp);
394 done:
395 	up_write(&shm_ids(ns).rwsem);
396 }
397 
398 static void shm_close(struct vm_area_struct *vma)
399 {
400 	struct file *file = vma->vm_file;
401 	struct shm_file_data *sfd = shm_file_data(file);
402 
403 	/* Always call underlying close if present */
404 	if (sfd->vm_ops->close)
405 		sfd->vm_ops->close(vma);
406 
407 	__shm_close(sfd);
408 }
409 
410 /* Called with ns->shm_ids(ns).rwsem locked */
411 static int shm_try_destroy_orphaned(int id, void *p, void *data)
412 {
413 	struct ipc_namespace *ns = data;
414 	struct kern_ipc_perm *ipcp = p;
415 	struct shmid_kernel *shp = container_of(ipcp, struct shmid_kernel, shm_perm);
416 
417 	/*
418 	 * We want to destroy segments without users and with already
419 	 * exit'ed originating process.
420 	 *
421 	 * shm_nattch can be changed under shm_perm.lock without holding the
422 	 * rwsem, so take the object lock before checking shm_may_destroy().
423 	 */
424 	if (!list_empty(&shp->shm_clist))
425 		return 0;
426 
427 	shm_lock_by_ptr(shp);
428 	if (shm_may_destroy(shp))
429 		shm_destroy(ns, shp);
430 	else
431 		shm_unlock(shp);
432 	return 0;
433 }
434 
435 void shm_destroy_orphaned(struct ipc_namespace *ns)
436 {
437 	down_write(&shm_ids(ns).rwsem);
438 	if (shm_ids(ns).in_use) {
439 		rcu_read_lock();
440 		idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_orphaned, ns);
441 		rcu_read_unlock();
442 	}
443 	up_write(&shm_ids(ns).rwsem);
444 }
445 
446 /* Locking assumes this will only be called with task == current */
447 void exit_shm(struct task_struct *task)
448 {
449 	for (;;) {
450 		struct shmid_kernel *shp;
451 		struct ipc_namespace *ns;
452 
453 		task_lock(task);
454 
455 		if (list_empty(&task->sysvshm.shm_clist)) {
456 			task_unlock(task);
457 			break;
458 		}
459 
460 		shp = list_first_entry(&task->sysvshm.shm_clist, struct shmid_kernel,
461 				shm_clist);
462 
463 		/*
464 		 * 1) Get pointer to the ipc namespace. It is worth to say
465 		 * that this pointer is guaranteed to be valid because
466 		 * shp lifetime is always shorter than namespace lifetime
467 		 * in which shp lives.
468 		 * We taken task_lock it means that shp won't be freed.
469 		 */
470 		ns = shp->ns;
471 
472 		/*
473 		 * 2) If kernel.shm_rmid_forced is not set then only keep track of
474 		 * which shmids are orphaned, so that a later set of the sysctl
475 		 * can clean them up.
476 		 */
477 		if (!ns->shm_rmid_forced)
478 			goto unlink_continue;
479 
480 		/*
481 		 * 3) get a reference to the namespace.
482 		 *    The refcount could be already 0. If it is 0, then
483 		 *    the shm objects will be free by free_ipc_work().
484 		 */
485 		ns = get_ipc_ns_not_zero(ns);
486 		if (!ns) {
487 unlink_continue:
488 			list_del_init(&shp->shm_clist);
489 			task_unlock(task);
490 			continue;
491 		}
492 
493 		/*
494 		 * 4) get a reference to shp.
495 		 *   This cannot fail: shm_clist_rm() is called before
496 		 *   ipc_rmid(), thus the refcount cannot be 0.
497 		 */
498 		WARN_ON(!ipc_rcu_getref(&shp->shm_perm));
499 
500 		/*
501 		 * 5) unlink the shm segment from the list of segments
502 		 *    created by current.
503 		 *    This must be done last. After unlinking,
504 		 *    only the refcounts obtained above prevent IPC_RMID
505 		 *    from destroying the segment or the namespace.
506 		 */
507 		list_del_init(&shp->shm_clist);
508 
509 		task_unlock(task);
510 
511 		/*
512 		 * 6) we have all references
513 		 *    Thus lock & if needed destroy shp.
514 		 */
515 		down_write(&shm_ids(ns).rwsem);
516 		shm_lock_by_ptr(shp);
517 		/*
518 		 * rcu_read_lock was implicitly taken in shm_lock_by_ptr, it's
519 		 * safe to call ipc_rcu_putref here
520 		 */
521 		ipc_rcu_putref(&shp->shm_perm, shm_rcu_free);
522 
523 		if (ipc_valid_object(&shp->shm_perm)) {
524 			if (shm_may_destroy(shp))
525 				shm_destroy(ns, shp);
526 			else
527 				shm_unlock(shp);
528 		} else {
529 			/*
530 			 * Someone else deleted the shp from namespace
531 			 * idr/kht while we have waited.
532 			 * Just unlock and continue.
533 			 */
534 			shm_unlock(shp);
535 		}
536 
537 		up_write(&shm_ids(ns).rwsem);
538 		put_ipc_ns(ns); /* paired with get_ipc_ns_not_zero */
539 	}
540 }
541 
542 static vm_fault_t shm_fault(struct vm_fault *vmf)
543 {
544 	struct file *file = vmf->vma->vm_file;
545 	struct shm_file_data *sfd = shm_file_data(file);
546 
547 	return sfd->vm_ops->fault(vmf);
548 }
549 
550 static int shm_may_split(struct vm_area_struct *vma, unsigned long addr)
551 {
552 	struct file *file = vma->vm_file;
553 	struct shm_file_data *sfd = shm_file_data(file);
554 
555 	if (sfd->vm_ops->may_split)
556 		return sfd->vm_ops->may_split(vma, addr);
557 
558 	return 0;
559 }
560 
561 static unsigned long shm_pagesize(struct vm_area_struct *vma)
562 {
563 	struct file *file = vma->vm_file;
564 	struct shm_file_data *sfd = shm_file_data(file);
565 
566 	if (sfd->vm_ops->pagesize)
567 		return sfd->vm_ops->pagesize(vma);
568 
569 	return PAGE_SIZE;
570 }
571 
572 #ifdef CONFIG_NUMA
573 static int shm_set_policy(struct vm_area_struct *vma, struct mempolicy *mpol)
574 {
575 	struct shm_file_data *sfd = shm_file_data(vma->vm_file);
576 	int err = 0;
577 
578 	if (sfd->vm_ops->set_policy)
579 		err = sfd->vm_ops->set_policy(vma, mpol);
580 	return err;
581 }
582 
583 static struct mempolicy *shm_get_policy(struct vm_area_struct *vma,
584 					unsigned long addr, pgoff_t *ilx)
585 {
586 	struct shm_file_data *sfd = shm_file_data(vma->vm_file);
587 	struct mempolicy *mpol = vma->vm_policy;
588 
589 	if (sfd->vm_ops->get_policy)
590 		mpol = sfd->vm_ops->get_policy(vma, addr, ilx);
591 	return mpol;
592 }
593 #endif
594 
595 static int shm_mmap(struct file *file, struct vm_area_struct *vma)
596 {
597 	struct shm_file_data *sfd = shm_file_data(file);
598 	int ret;
599 
600 	/*
601 	 * In case of remap_file_pages() emulation, the file can represent an
602 	 * IPC ID that was removed, and possibly even reused by another shm
603 	 * segment already.  Propagate this case as an error to caller.
604 	 */
605 	ret = __shm_open(sfd);
606 	if (ret)
607 		return ret;
608 
609 	ret = vfs_mmap(sfd->file, vma);
610 	if (ret) {
611 		__shm_close(sfd);
612 		return ret;
613 	}
614 	sfd->vm_ops = vma->vm_ops;
615 #ifdef CONFIG_MMU
616 	WARN_ON(!sfd->vm_ops->fault);
617 #endif
618 	vma->vm_ops = &shm_vm_ops;
619 	return 0;
620 }
621 
622 static int shm_release(struct inode *ino, struct file *file)
623 {
624 	struct shm_file_data *sfd = shm_file_data(file);
625 
626 	put_ipc_ns(sfd->ns);
627 	fput(sfd->file);
628 	shm_file_data(file) = NULL;
629 	kfree(sfd);
630 	return 0;
631 }
632 
633 static int shm_fsync(struct file *file, loff_t start, loff_t end, int datasync)
634 {
635 	struct shm_file_data *sfd = shm_file_data(file);
636 
637 	if (!sfd->file->f_op->fsync)
638 		return -EINVAL;
639 	return sfd->file->f_op->fsync(sfd->file, start, end, datasync);
640 }
641 
642 static long shm_fallocate(struct file *file, int mode, loff_t offset,
643 			  loff_t len)
644 {
645 	struct shm_file_data *sfd = shm_file_data(file);
646 
647 	if (!sfd->file->f_op->fallocate)
648 		return -EOPNOTSUPP;
649 	return sfd->file->f_op->fallocate(file, mode, offset, len);
650 }
651 
652 static unsigned long shm_get_unmapped_area(struct file *file,
653 	unsigned long addr, unsigned long len, unsigned long pgoff,
654 	unsigned long flags)
655 {
656 	struct shm_file_data *sfd = shm_file_data(file);
657 
658 	return sfd->file->f_op->get_unmapped_area(sfd->file, addr, len,
659 						pgoff, flags);
660 }
661 
662 static const struct file_operations shm_file_operations = {
663 	.mmap		= shm_mmap,
664 	.fsync		= shm_fsync,
665 	.release	= shm_release,
666 	.get_unmapped_area	= shm_get_unmapped_area,
667 	.llseek		= noop_llseek,
668 	.fallocate	= shm_fallocate,
669 };
670 
671 /*
672  * shm_file_operations_huge is now identical to shm_file_operations
673  * except for fop_flags
674  */
675 static const struct file_operations shm_file_operations_huge = {
676 	.mmap		= shm_mmap,
677 	.fsync		= shm_fsync,
678 	.release	= shm_release,
679 	.get_unmapped_area	= shm_get_unmapped_area,
680 	.llseek		= noop_llseek,
681 	.fallocate	= shm_fallocate,
682 	.fop_flags	= FOP_HUGE_PAGES,
683 };
684 
685 static const struct vm_operations_struct shm_vm_ops = {
686 	.open	= shm_open,	/* callback for a new vm-area open */
687 	.close	= shm_close,	/* callback for when the vm-area is released */
688 	.fault	= shm_fault,
689 	.may_split = shm_may_split,
690 	.pagesize = shm_pagesize,
691 #if defined(CONFIG_NUMA)
692 	.set_policy = shm_set_policy,
693 	.get_policy = shm_get_policy,
694 #endif
695 };
696 
697 /**
698  * newseg - Create a new shared memory segment
699  * @ns: namespace
700  * @params: ptr to the structure that contains key, size and shmflg
701  *
702  * Called with shm_ids.rwsem held as a writer.
703  */
704 static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
705 {
706 	key_t key = params->key;
707 	int shmflg = params->flg;
708 	size_t size = params->u.size;
709 	int error;
710 	struct shmid_kernel *shp;
711 	size_t numpages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
712 	const bool has_no_reserve = shmflg & SHM_NORESERVE;
713 	vma_flags_t acctflag = EMPTY_VMA_FLAGS;
714 	struct file *file;
715 	char name[13];
716 
717 	if (size < SHMMIN || size > ns->shm_ctlmax)
718 		return -EINVAL;
719 
720 	if (numpages << PAGE_SHIFT < size)
721 		return -ENOSPC;
722 
723 	if (ns->shm_tot + numpages < ns->shm_tot ||
724 			ns->shm_tot + numpages > ns->shm_ctlall)
725 		return -ENOSPC;
726 
727 	shp = kmalloc_obj(*shp, GFP_KERNEL_ACCOUNT);
728 	if (unlikely(!shp))
729 		return -ENOMEM;
730 
731 	shp->shm_perm.key = key;
732 	shp->shm_perm.mode = (shmflg & S_IRWXUGO);
733 	shp->mlock_ucounts = NULL;
734 
735 	shp->shm_perm.security = NULL;
736 	error = security_shm_alloc(&shp->shm_perm);
737 	if (error) {
738 		kfree(shp);
739 		return error;
740 	}
741 
742 	sprintf(name, "SYSV%08x", key);
743 	if (shmflg & SHM_HUGETLB) {
744 		struct hstate *hs;
745 		size_t hugesize;
746 
747 		hs = hstate_sizelog((shmflg >> SHM_HUGE_SHIFT) & SHM_HUGE_MASK);
748 		if (!hs) {
749 			error = -EINVAL;
750 			goto no_file;
751 		}
752 		hugesize = ALIGN(size, huge_page_size(hs));
753 
754 		/* hugetlb_file_setup applies strict accounting */
755 		if (has_no_reserve)
756 			vma_flags_set(&acctflag, VMA_NORESERVE_BIT);
757 		file = hugetlb_file_setup(name, hugesize, acctflag,
758 				HUGETLB_SHMFS_INODE, (shmflg >> SHM_HUGE_SHIFT) & SHM_HUGE_MASK);
759 	} else {
760 		/*
761 		 * Do not allow no accounting for OVERCOMMIT_NEVER, even
762 		 * if it's asked for.
763 		 */
764 		if  (has_no_reserve && sysctl_overcommit_memory != OVERCOMMIT_NEVER)
765 			vma_flags_set(&acctflag, VMA_NORESERVE_BIT);
766 		file = shmem_kernel_file_setup(name, size, acctflag);
767 	}
768 	error = PTR_ERR(file);
769 	if (IS_ERR(file))
770 		goto no_file;
771 
772 	shp->shm_cprid = get_pid(task_tgid(current));
773 	shp->shm_lprid = NULL;
774 	shp->shm_atim = shp->shm_dtim = 0;
775 	shp->shm_ctim = ktime_get_real_seconds();
776 	shp->shm_segsz = size;
777 	shp->shm_nattch = 0;
778 	shp->shm_file = file;
779 	shp->shm_creator = current;
780 
781 	/* ipc_addid() locks shp upon success. */
782 	error = ipc_addid(&shm_ids(ns), &shp->shm_perm, ns->shm_ctlmni);
783 	if (error < 0)
784 		goto no_id;
785 
786 	shp->ns = ns;
787 
788 	task_lock(current);
789 	list_add(&shp->shm_clist, &current->sysvshm.shm_clist);
790 	task_unlock(current);
791 
792 	/*
793 	 * shmid gets reported as "inode#" in /proc/pid/maps.
794 	 * proc-ps tools use this. Changing this will break them.
795 	 */
796 	file_inode(file)->i_ino = shp->shm_perm.id;
797 
798 	ns->shm_tot += numpages;
799 	error = shp->shm_perm.id;
800 
801 	ipc_unlock_object(&shp->shm_perm);
802 	rcu_read_unlock();
803 	return error;
804 
805 no_id:
806 	ipc_update_pid(&shp->shm_cprid, NULL);
807 	ipc_update_pid(&shp->shm_lprid, NULL);
808 	fput(file);
809 	ipc_rcu_putref(&shp->shm_perm, shm_rcu_free);
810 	return error;
811 no_file:
812 	call_rcu(&shp->shm_perm.rcu, shm_rcu_free);
813 	return error;
814 }
815 
816 /*
817  * Called with shm_ids.rwsem and ipcp locked.
818  */
819 static int shm_more_checks(struct kern_ipc_perm *ipcp, struct ipc_params *params)
820 {
821 	struct shmid_kernel *shp;
822 
823 	shp = container_of(ipcp, struct shmid_kernel, shm_perm);
824 	if (shp->shm_segsz < params->u.size)
825 		return -EINVAL;
826 
827 	return 0;
828 }
829 
830 long ksys_shmget(key_t key, size_t size, int shmflg)
831 {
832 	struct ipc_namespace *ns;
833 	static const struct ipc_ops shm_ops = {
834 		.getnew = newseg,
835 		.associate = security_shm_associate,
836 		.more_checks = shm_more_checks,
837 	};
838 	struct ipc_params shm_params;
839 
840 	ns = current->nsproxy->ipc_ns;
841 
842 	shm_params.key = key;
843 	shm_params.flg = shmflg;
844 	shm_params.u.size = size;
845 
846 	return ipcget(ns, &shm_ids(ns), &shm_ops, &shm_params);
847 }
848 
849 SYSCALL_DEFINE3(shmget, key_t, key, size_t, size, int, shmflg)
850 {
851 	return ksys_shmget(key, size, shmflg);
852 }
853 
854 static inline unsigned long copy_shmid_to_user(void __user *buf, struct shmid64_ds *in, int version)
855 {
856 	switch (version) {
857 	case IPC_64:
858 		return copy_to_user(buf, in, sizeof(*in));
859 	case IPC_OLD:
860 	    {
861 		struct shmid_ds out;
862 
863 		memset(&out, 0, sizeof(out));
864 		ipc64_perm_to_ipc_perm(&in->shm_perm, &out.shm_perm);
865 		out.shm_segsz	= in->shm_segsz;
866 		out.shm_atime	= in->shm_atime;
867 		out.shm_dtime	= in->shm_dtime;
868 		out.shm_ctime	= in->shm_ctime;
869 		out.shm_cpid	= in->shm_cpid;
870 		out.shm_lpid	= in->shm_lpid;
871 		out.shm_nattch	= in->shm_nattch;
872 
873 		return copy_to_user(buf, &out, sizeof(out));
874 	    }
875 	default:
876 		return -EINVAL;
877 	}
878 }
879 
880 static inline unsigned long
881 copy_shmid_from_user(struct shmid64_ds *out, void __user *buf, int version)
882 {
883 	switch (version) {
884 	case IPC_64:
885 		if (copy_from_user(out, buf, sizeof(*out)))
886 			return -EFAULT;
887 		return 0;
888 	case IPC_OLD:
889 	    {
890 		struct shmid_ds tbuf_old;
891 
892 		if (copy_from_user(&tbuf_old, buf, sizeof(tbuf_old)))
893 			return -EFAULT;
894 
895 		out->shm_perm.uid	= tbuf_old.shm_perm.uid;
896 		out->shm_perm.gid	= tbuf_old.shm_perm.gid;
897 		out->shm_perm.mode	= tbuf_old.shm_perm.mode;
898 
899 		return 0;
900 	    }
901 	default:
902 		return -EINVAL;
903 	}
904 }
905 
906 static inline unsigned long copy_shminfo_to_user(void __user *buf, struct shminfo64 *in, int version)
907 {
908 	switch (version) {
909 	case IPC_64:
910 		return copy_to_user(buf, in, sizeof(*in));
911 	case IPC_OLD:
912 	    {
913 		struct shminfo out;
914 
915 		if (in->shmmax > INT_MAX)
916 			out.shmmax = INT_MAX;
917 		else
918 			out.shmmax = (int)in->shmmax;
919 
920 		out.shmmin	= in->shmmin;
921 		out.shmmni	= in->shmmni;
922 		out.shmseg	= in->shmseg;
923 		out.shmall	= in->shmall;
924 
925 		return copy_to_user(buf, &out, sizeof(out));
926 	    }
927 	default:
928 		return -EINVAL;
929 	}
930 }
931 
932 /*
933  * Calculate and add used RSS and swap pages of a shm.
934  * Called with shm_ids.rwsem held as a reader
935  */
936 static void shm_add_rss_swap(struct shmid_kernel *shp,
937 	unsigned long *rss_add, unsigned long *swp_add)
938 {
939 	struct inode *inode;
940 
941 	inode = file_inode(shp->shm_file);
942 
943 	if (is_file_hugepages(shp->shm_file)) {
944 		struct address_space *mapping = inode->i_mapping;
945 		struct hstate *h = hstate_file(shp->shm_file);
946 		*rss_add += pages_per_huge_page(h) * mapping->nrpages;
947 	} else {
948 #ifdef CONFIG_SHMEM
949 		struct shmem_inode_info *info = SHMEM_I(inode);
950 
951 		spin_lock_irq(&info->lock);
952 		*rss_add += inode->i_mapping->nrpages;
953 		*swp_add += info->swapped;
954 		spin_unlock_irq(&info->lock);
955 #else
956 		*rss_add += inode->i_mapping->nrpages;
957 #endif
958 	}
959 }
960 
961 /*
962  * Called with shm_ids.rwsem held as a reader
963  */
964 static void shm_get_stat(struct ipc_namespace *ns, unsigned long *rss,
965 		unsigned long *swp)
966 {
967 	int next_id;
968 	int total, in_use;
969 
970 	*rss = 0;
971 	*swp = 0;
972 
973 	in_use = shm_ids(ns).in_use;
974 
975 	for (total = 0, next_id = 0; total < in_use; next_id++) {
976 		struct kern_ipc_perm *ipc;
977 		struct shmid_kernel *shp;
978 
979 		ipc = idr_find(&shm_ids(ns).ipcs_idr, next_id);
980 		if (ipc == NULL)
981 			continue;
982 		shp = container_of(ipc, struct shmid_kernel, shm_perm);
983 
984 		shm_add_rss_swap(shp, rss, swp);
985 
986 		total++;
987 	}
988 }
989 
990 /*
991  * This function handles some shmctl commands which require the rwsem
992  * to be held in write mode.
993  * NOTE: no locks must be held, the rwsem is taken inside this function.
994  */
995 static int shmctl_down(struct ipc_namespace *ns, int shmid, int cmd,
996 		       struct shmid64_ds *shmid64)
997 {
998 	struct kern_ipc_perm *ipcp;
999 	struct shmid_kernel *shp;
1000 	int err;
1001 
1002 	down_write(&shm_ids(ns).rwsem);
1003 	rcu_read_lock();
1004 
1005 	ipcp = ipcctl_obtain_check(ns, &shm_ids(ns), shmid, cmd,
1006 				      &shmid64->shm_perm, 0);
1007 	if (IS_ERR(ipcp)) {
1008 		err = PTR_ERR(ipcp);
1009 		goto out_unlock1;
1010 	}
1011 
1012 	shp = container_of(ipcp, struct shmid_kernel, shm_perm);
1013 
1014 	err = security_shm_shmctl(&shp->shm_perm, cmd);
1015 	if (err)
1016 		goto out_unlock1;
1017 
1018 	switch (cmd) {
1019 	case IPC_RMID:
1020 		ipc_lock_object(&shp->shm_perm);
1021 		/* do_shm_rmid unlocks the ipc object and rcu */
1022 		do_shm_rmid(ns, ipcp);
1023 		goto out_up;
1024 	case IPC_SET:
1025 		ipc_lock_object(&shp->shm_perm);
1026 		err = ipc_update_perm(&shmid64->shm_perm, ipcp);
1027 		if (err)
1028 			goto out_unlock0;
1029 		shp->shm_ctim = ktime_get_real_seconds();
1030 		break;
1031 	default:
1032 		err = -EINVAL;
1033 		goto out_unlock1;
1034 	}
1035 
1036 out_unlock0:
1037 	ipc_unlock_object(&shp->shm_perm);
1038 out_unlock1:
1039 	rcu_read_unlock();
1040 out_up:
1041 	up_write(&shm_ids(ns).rwsem);
1042 	return err;
1043 }
1044 
1045 static int shmctl_ipc_info(struct ipc_namespace *ns,
1046 			   struct shminfo64 *shminfo)
1047 {
1048 	int err = security_shm_shmctl(NULL, IPC_INFO);
1049 	if (!err) {
1050 		memset(shminfo, 0, sizeof(*shminfo));
1051 		shminfo->shmmni = shminfo->shmseg = ns->shm_ctlmni;
1052 		shminfo->shmmax = ns->shm_ctlmax;
1053 		shminfo->shmall = ns->shm_ctlall;
1054 		shminfo->shmmin = SHMMIN;
1055 		down_read(&shm_ids(ns).rwsem);
1056 		err = ipc_get_maxidx(&shm_ids(ns));
1057 		up_read(&shm_ids(ns).rwsem);
1058 		if (err < 0)
1059 			err = 0;
1060 	}
1061 	return err;
1062 }
1063 
1064 static int shmctl_shm_info(struct ipc_namespace *ns,
1065 			   struct shm_info *shm_info)
1066 {
1067 	int err = security_shm_shmctl(NULL, SHM_INFO);
1068 	if (!err) {
1069 		memset(shm_info, 0, sizeof(*shm_info));
1070 		down_read(&shm_ids(ns).rwsem);
1071 		shm_info->used_ids = shm_ids(ns).in_use;
1072 		shm_get_stat(ns, &shm_info->shm_rss, &shm_info->shm_swp);
1073 		shm_info->shm_tot = ns->shm_tot;
1074 		shm_info->swap_attempts = 0;
1075 		shm_info->swap_successes = 0;
1076 		err = ipc_get_maxidx(&shm_ids(ns));
1077 		up_read(&shm_ids(ns).rwsem);
1078 		if (err < 0)
1079 			err = 0;
1080 	}
1081 	return err;
1082 }
1083 
1084 static int shmctl_stat(struct ipc_namespace *ns, int shmid,
1085 			int cmd, struct shmid64_ds *tbuf)
1086 {
1087 	struct shmid_kernel *shp;
1088 	int err;
1089 
1090 	memset(tbuf, 0, sizeof(*tbuf));
1091 
1092 	rcu_read_lock();
1093 	if (cmd == SHM_STAT || cmd == SHM_STAT_ANY) {
1094 		shp = shm_obtain_object(ns, shmid);
1095 		if (IS_ERR(shp)) {
1096 			err = PTR_ERR(shp);
1097 			goto out_unlock;
1098 		}
1099 	} else { /* IPC_STAT */
1100 		shp = shm_obtain_object_check(ns, shmid);
1101 		if (IS_ERR(shp)) {
1102 			err = PTR_ERR(shp);
1103 			goto out_unlock;
1104 		}
1105 	}
1106 
1107 	/*
1108 	 * Semantically SHM_STAT_ANY ought to be identical to
1109 	 * that functionality provided by the /proc/sysvipc/
1110 	 * interface. As such, only audit these calls and
1111 	 * do not do traditional S_IRUGO permission checks on
1112 	 * the ipc object.
1113 	 */
1114 	if (cmd == SHM_STAT_ANY)
1115 		audit_ipc_obj(&shp->shm_perm);
1116 	else {
1117 		err = -EACCES;
1118 		if (ipcperms(ns, &shp->shm_perm, S_IRUGO))
1119 			goto out_unlock;
1120 	}
1121 
1122 	err = security_shm_shmctl(&shp->shm_perm, cmd);
1123 	if (err)
1124 		goto out_unlock;
1125 
1126 	ipc_lock_object(&shp->shm_perm);
1127 
1128 	if (!ipc_valid_object(&shp->shm_perm)) {
1129 		ipc_unlock_object(&shp->shm_perm);
1130 		err = -EIDRM;
1131 		goto out_unlock;
1132 	}
1133 
1134 	kernel_to_ipc64_perm(&shp->shm_perm, &tbuf->shm_perm);
1135 	tbuf->shm_segsz	= shp->shm_segsz;
1136 	tbuf->shm_atime	= shp->shm_atim;
1137 	tbuf->shm_dtime	= shp->shm_dtim;
1138 	tbuf->shm_ctime	= shp->shm_ctim;
1139 #ifndef CONFIG_64BIT
1140 	tbuf->shm_atime_high = shp->shm_atim >> 32;
1141 	tbuf->shm_dtime_high = shp->shm_dtim >> 32;
1142 	tbuf->shm_ctime_high = shp->shm_ctim >> 32;
1143 #endif
1144 	tbuf->shm_cpid	= pid_vnr(shp->shm_cprid);
1145 	tbuf->shm_lpid	= pid_vnr(shp->shm_lprid);
1146 	tbuf->shm_nattch = shp->shm_nattch;
1147 
1148 	if (cmd == IPC_STAT) {
1149 		/*
1150 		 * As defined in SUS:
1151 		 * Return 0 on success
1152 		 */
1153 		err = 0;
1154 	} else {
1155 		/*
1156 		 * SHM_STAT and SHM_STAT_ANY (both Linux specific)
1157 		 * Return the full id, including the sequence number
1158 		 */
1159 		err = shp->shm_perm.id;
1160 	}
1161 
1162 	ipc_unlock_object(&shp->shm_perm);
1163 out_unlock:
1164 	rcu_read_unlock();
1165 	return err;
1166 }
1167 
1168 static int shmctl_do_lock(struct ipc_namespace *ns, int shmid, int cmd)
1169 {
1170 	struct shmid_kernel *shp;
1171 	struct file *shm_file;
1172 	int err;
1173 
1174 	rcu_read_lock();
1175 	shp = shm_obtain_object_check(ns, shmid);
1176 	if (IS_ERR(shp)) {
1177 		err = PTR_ERR(shp);
1178 		goto out_unlock1;
1179 	}
1180 
1181 	audit_ipc_obj(&(shp->shm_perm));
1182 	err = security_shm_shmctl(&shp->shm_perm, cmd);
1183 	if (err)
1184 		goto out_unlock1;
1185 
1186 	ipc_lock_object(&shp->shm_perm);
1187 
1188 	/* check if shm_destroy() is tearing down shp */
1189 	if (!ipc_valid_object(&shp->shm_perm)) {
1190 		err = -EIDRM;
1191 		goto out_unlock0;
1192 	}
1193 
1194 	if (!ns_capable(ns->user_ns, CAP_IPC_LOCK)) {
1195 		kuid_t euid = current_euid();
1196 
1197 		if (!uid_eq(euid, shp->shm_perm.uid) &&
1198 		    !uid_eq(euid, shp->shm_perm.cuid)) {
1199 			err = -EPERM;
1200 			goto out_unlock0;
1201 		}
1202 		if (cmd == SHM_LOCK && !rlimit(RLIMIT_MEMLOCK)) {
1203 			err = -EPERM;
1204 			goto out_unlock0;
1205 		}
1206 	}
1207 
1208 	shm_file = shp->shm_file;
1209 	if (is_file_hugepages(shm_file))
1210 		goto out_unlock0;
1211 
1212 	if (cmd == SHM_LOCK) {
1213 		struct ucounts *ucounts = current_ucounts();
1214 
1215 		err = shmem_lock(shm_file, 1, ucounts);
1216 		if (!err && !(shp->shm_perm.mode & SHM_LOCKED)) {
1217 			shp->shm_perm.mode |= SHM_LOCKED;
1218 			shp->mlock_ucounts = ucounts;
1219 		}
1220 		goto out_unlock0;
1221 	}
1222 
1223 	/* SHM_UNLOCK */
1224 	if (!(shp->shm_perm.mode & SHM_LOCKED))
1225 		goto out_unlock0;
1226 	shmem_lock(shm_file, 0, shp->mlock_ucounts);
1227 	shp->shm_perm.mode &= ~SHM_LOCKED;
1228 	shp->mlock_ucounts = NULL;
1229 	get_file(shm_file);
1230 	ipc_unlock_object(&shp->shm_perm);
1231 	rcu_read_unlock();
1232 	shmem_unlock_mapping(shm_file->f_mapping);
1233 
1234 	fput(shm_file);
1235 	return err;
1236 
1237 out_unlock0:
1238 	ipc_unlock_object(&shp->shm_perm);
1239 out_unlock1:
1240 	rcu_read_unlock();
1241 	return err;
1242 }
1243 
1244 static long ksys_shmctl(int shmid, int cmd, struct shmid_ds __user *buf, int version)
1245 {
1246 	int err;
1247 	struct ipc_namespace *ns;
1248 	struct shmid64_ds sem64;
1249 
1250 	if (cmd < 0 || shmid < 0)
1251 		return -EINVAL;
1252 
1253 	ns = current->nsproxy->ipc_ns;
1254 
1255 	switch (cmd) {
1256 	case IPC_INFO: {
1257 		struct shminfo64 shminfo;
1258 		err = shmctl_ipc_info(ns, &shminfo);
1259 		if (err < 0)
1260 			return err;
1261 		if (copy_shminfo_to_user(buf, &shminfo, version))
1262 			err = -EFAULT;
1263 		return err;
1264 	}
1265 	case SHM_INFO: {
1266 		struct shm_info shm_info;
1267 		err = shmctl_shm_info(ns, &shm_info);
1268 		if (err < 0)
1269 			return err;
1270 		if (copy_to_user(buf, &shm_info, sizeof(shm_info)))
1271 			err = -EFAULT;
1272 		return err;
1273 	}
1274 	case SHM_STAT:
1275 	case SHM_STAT_ANY:
1276 	case IPC_STAT: {
1277 		err = shmctl_stat(ns, shmid, cmd, &sem64);
1278 		if (err < 0)
1279 			return err;
1280 		if (copy_shmid_to_user(buf, &sem64, version))
1281 			err = -EFAULT;
1282 		return err;
1283 	}
1284 	case IPC_SET:
1285 		if (copy_shmid_from_user(&sem64, buf, version))
1286 			return -EFAULT;
1287 		fallthrough;
1288 	case IPC_RMID:
1289 		return shmctl_down(ns, shmid, cmd, &sem64);
1290 	case SHM_LOCK:
1291 	case SHM_UNLOCK:
1292 		return shmctl_do_lock(ns, shmid, cmd);
1293 	default:
1294 		return -EINVAL;
1295 	}
1296 }
1297 
1298 SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf)
1299 {
1300 	return ksys_shmctl(shmid, cmd, buf, IPC_64);
1301 }
1302 
1303 #ifdef CONFIG_ARCH_WANT_IPC_PARSE_VERSION
1304 long ksys_old_shmctl(int shmid, int cmd, struct shmid_ds __user *buf)
1305 {
1306 	int version = ipc_parse_version(&cmd);
1307 
1308 	return ksys_shmctl(shmid, cmd, buf, version);
1309 }
1310 
1311 SYSCALL_DEFINE3(old_shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf)
1312 {
1313 	return ksys_old_shmctl(shmid, cmd, buf);
1314 }
1315 #endif
1316 
1317 #ifdef CONFIG_COMPAT
1318 
1319 struct compat_shmid_ds {
1320 	struct compat_ipc_perm shm_perm;
1321 	int shm_segsz;
1322 	old_time32_t shm_atime;
1323 	old_time32_t shm_dtime;
1324 	old_time32_t shm_ctime;
1325 	compat_ipc_pid_t shm_cpid;
1326 	compat_ipc_pid_t shm_lpid;
1327 	unsigned short shm_nattch;
1328 	unsigned short shm_unused;
1329 	compat_uptr_t shm_unused2;
1330 	compat_uptr_t shm_unused3;
1331 };
1332 
1333 struct compat_shminfo64 {
1334 	compat_ulong_t shmmax;
1335 	compat_ulong_t shmmin;
1336 	compat_ulong_t shmmni;
1337 	compat_ulong_t shmseg;
1338 	compat_ulong_t shmall;
1339 	compat_ulong_t __unused1;
1340 	compat_ulong_t __unused2;
1341 	compat_ulong_t __unused3;
1342 	compat_ulong_t __unused4;
1343 };
1344 
1345 struct compat_shm_info {
1346 	compat_int_t used_ids;
1347 	compat_ulong_t shm_tot, shm_rss, shm_swp;
1348 	compat_ulong_t swap_attempts, swap_successes;
1349 };
1350 
1351 static int copy_compat_shminfo_to_user(void __user *buf, struct shminfo64 *in,
1352 					int version)
1353 {
1354 	if (in->shmmax > INT_MAX)
1355 		in->shmmax = INT_MAX;
1356 	if (version == IPC_64) {
1357 		struct compat_shminfo64 info;
1358 		memset(&info, 0, sizeof(info));
1359 		info.shmmax = in->shmmax;
1360 		info.shmmin = in->shmmin;
1361 		info.shmmni = in->shmmni;
1362 		info.shmseg = in->shmseg;
1363 		info.shmall = in->shmall;
1364 		return copy_to_user(buf, &info, sizeof(info));
1365 	} else {
1366 		struct shminfo info;
1367 		memset(&info, 0, sizeof(info));
1368 		info.shmmax = in->shmmax;
1369 		info.shmmin = in->shmmin;
1370 		info.shmmni = in->shmmni;
1371 		info.shmseg = in->shmseg;
1372 		info.shmall = in->shmall;
1373 		return copy_to_user(buf, &info, sizeof(info));
1374 	}
1375 }
1376 
1377 static int put_compat_shm_info(struct shm_info *ip,
1378 				struct compat_shm_info __user *uip)
1379 {
1380 	struct compat_shm_info info;
1381 
1382 	memset(&info, 0, sizeof(info));
1383 	info.used_ids = ip->used_ids;
1384 	info.shm_tot = ip->shm_tot;
1385 	info.shm_rss = ip->shm_rss;
1386 	info.shm_swp = ip->shm_swp;
1387 	info.swap_attempts = ip->swap_attempts;
1388 	info.swap_successes = ip->swap_successes;
1389 	return copy_to_user(uip, &info, sizeof(info));
1390 }
1391 
1392 static int copy_compat_shmid_to_user(void __user *buf, struct shmid64_ds *in,
1393 					int version)
1394 {
1395 	if (version == IPC_64) {
1396 		struct compat_shmid64_ds v;
1397 		memset(&v, 0, sizeof(v));
1398 		to_compat_ipc64_perm(&v.shm_perm, &in->shm_perm);
1399 		v.shm_atime	 = lower_32_bits(in->shm_atime);
1400 		v.shm_atime_high = upper_32_bits(in->shm_atime);
1401 		v.shm_dtime	 = lower_32_bits(in->shm_dtime);
1402 		v.shm_dtime_high = upper_32_bits(in->shm_dtime);
1403 		v.shm_ctime	 = lower_32_bits(in->shm_ctime);
1404 		v.shm_ctime_high = upper_32_bits(in->shm_ctime);
1405 		v.shm_segsz = in->shm_segsz;
1406 		v.shm_nattch = in->shm_nattch;
1407 		v.shm_cpid = in->shm_cpid;
1408 		v.shm_lpid = in->shm_lpid;
1409 		return copy_to_user(buf, &v, sizeof(v));
1410 	} else {
1411 		struct compat_shmid_ds v;
1412 		memset(&v, 0, sizeof(v));
1413 		to_compat_ipc_perm(&v.shm_perm, &in->shm_perm);
1414 		v.shm_perm.key = in->shm_perm.key;
1415 		v.shm_atime = in->shm_atime;
1416 		v.shm_dtime = in->shm_dtime;
1417 		v.shm_ctime = in->shm_ctime;
1418 		v.shm_segsz = in->shm_segsz;
1419 		v.shm_nattch = in->shm_nattch;
1420 		v.shm_cpid = in->shm_cpid;
1421 		v.shm_lpid = in->shm_lpid;
1422 		return copy_to_user(buf, &v, sizeof(v));
1423 	}
1424 }
1425 
1426 static int copy_compat_shmid_from_user(struct shmid64_ds *out, void __user *buf,
1427 					int version)
1428 {
1429 	memset(out, 0, sizeof(*out));
1430 	if (version == IPC_64) {
1431 		struct compat_shmid64_ds __user *p = buf;
1432 		return get_compat_ipc64_perm(&out->shm_perm, &p->shm_perm);
1433 	} else {
1434 		struct compat_shmid_ds __user *p = buf;
1435 		return get_compat_ipc_perm(&out->shm_perm, &p->shm_perm);
1436 	}
1437 }
1438 
1439 static long compat_ksys_shmctl(int shmid, int cmd, void __user *uptr, int version)
1440 {
1441 	struct ipc_namespace *ns;
1442 	struct shmid64_ds sem64;
1443 	int err;
1444 
1445 	ns = current->nsproxy->ipc_ns;
1446 
1447 	if (cmd < 0 || shmid < 0)
1448 		return -EINVAL;
1449 
1450 	switch (cmd) {
1451 	case IPC_INFO: {
1452 		struct shminfo64 shminfo;
1453 		err = shmctl_ipc_info(ns, &shminfo);
1454 		if (err < 0)
1455 			return err;
1456 		if (copy_compat_shminfo_to_user(uptr, &shminfo, version))
1457 			err = -EFAULT;
1458 		return err;
1459 	}
1460 	case SHM_INFO: {
1461 		struct shm_info shm_info;
1462 		err = shmctl_shm_info(ns, &shm_info);
1463 		if (err < 0)
1464 			return err;
1465 		if (put_compat_shm_info(&shm_info, uptr))
1466 			err = -EFAULT;
1467 		return err;
1468 	}
1469 	case IPC_STAT:
1470 	case SHM_STAT_ANY:
1471 	case SHM_STAT:
1472 		err = shmctl_stat(ns, shmid, cmd, &sem64);
1473 		if (err < 0)
1474 			return err;
1475 		if (copy_compat_shmid_to_user(uptr, &sem64, version))
1476 			err = -EFAULT;
1477 		return err;
1478 
1479 	case IPC_SET:
1480 		if (copy_compat_shmid_from_user(&sem64, uptr, version))
1481 			return -EFAULT;
1482 		fallthrough;
1483 	case IPC_RMID:
1484 		return shmctl_down(ns, shmid, cmd, &sem64);
1485 	case SHM_LOCK:
1486 	case SHM_UNLOCK:
1487 		return shmctl_do_lock(ns, shmid, cmd);
1488 	default:
1489 		return -EINVAL;
1490 	}
1491 	return err;
1492 }
1493 
1494 COMPAT_SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, void __user *, uptr)
1495 {
1496 	return compat_ksys_shmctl(shmid, cmd, uptr, IPC_64);
1497 }
1498 
1499 #ifdef CONFIG_ARCH_WANT_COMPAT_IPC_PARSE_VERSION
1500 long compat_ksys_old_shmctl(int shmid, int cmd, void __user *uptr)
1501 {
1502 	int version = compat_ipc_parse_version(&cmd);
1503 
1504 	return compat_ksys_shmctl(shmid, cmd, uptr, version);
1505 }
1506 
1507 COMPAT_SYSCALL_DEFINE3(old_shmctl, int, shmid, int, cmd, void __user *, uptr)
1508 {
1509 	return compat_ksys_old_shmctl(shmid, cmd, uptr);
1510 }
1511 #endif
1512 #endif
1513 
1514 /*
1515  * Fix shmaddr, allocate descriptor, map shm, add attach descriptor to lists.
1516  *
1517  * NOTE! Despite the name, this is NOT a direct system call entrypoint. The
1518  * "raddr" thing points to kernel space, and there has to be a wrapper around
1519  * this.
1520  */
1521 long do_shmat(int shmid, char __user *shmaddr, int shmflg,
1522 	      ulong *raddr, unsigned long shmlba)
1523 {
1524 	struct shmid_kernel *shp;
1525 	unsigned long addr = (unsigned long)shmaddr;
1526 	unsigned long size;
1527 	struct file *file, *base;
1528 	int    err;
1529 	unsigned long flags = MAP_SHARED;
1530 	unsigned long prot;
1531 	int acc_mode;
1532 	struct ipc_namespace *ns;
1533 	struct shm_file_data *sfd;
1534 	int f_flags;
1535 	unsigned long populate = 0;
1536 
1537 	err = -EINVAL;
1538 	if (shmid < 0)
1539 		goto out;
1540 
1541 	if (addr) {
1542 		if (addr & (shmlba - 1)) {
1543 			if (shmflg & SHM_RND) {
1544 				addr &= ~(shmlba - 1);  /* round down */
1545 
1546 				/*
1547 				 * Ensure that the round-down is non-nil
1548 				 * when remapping. This can happen for
1549 				 * cases when addr < shmlba.
1550 				 */
1551 				if (!addr && (shmflg & SHM_REMAP))
1552 					goto out;
1553 			} else
1554 #ifndef __ARCH_FORCE_SHMLBA
1555 				if (addr & ~PAGE_MASK)
1556 #endif
1557 					goto out;
1558 		}
1559 
1560 		flags |= MAP_FIXED;
1561 	} else if ((shmflg & SHM_REMAP))
1562 		goto out;
1563 
1564 	if (shmflg & SHM_RDONLY) {
1565 		prot = PROT_READ;
1566 		acc_mode = S_IRUGO;
1567 		f_flags = O_RDONLY;
1568 	} else {
1569 		prot = PROT_READ | PROT_WRITE;
1570 		acc_mode = S_IRUGO | S_IWUGO;
1571 		f_flags = O_RDWR;
1572 	}
1573 	if (shmflg & SHM_EXEC) {
1574 		prot |= PROT_EXEC;
1575 		acc_mode |= S_IXUGO;
1576 	}
1577 
1578 	/*
1579 	 * We cannot rely on the fs check since SYSV IPC does have an
1580 	 * additional creator id...
1581 	 */
1582 	ns = current->nsproxy->ipc_ns;
1583 	rcu_read_lock();
1584 	shp = shm_obtain_object_check(ns, shmid);
1585 	if (IS_ERR(shp)) {
1586 		err = PTR_ERR(shp);
1587 		goto out_unlock;
1588 	}
1589 
1590 	err = -EACCES;
1591 	if (ipcperms(ns, &shp->shm_perm, acc_mode))
1592 		goto out_unlock;
1593 
1594 	err = security_shm_shmat(&shp->shm_perm, shmaddr, shmflg);
1595 	if (err)
1596 		goto out_unlock;
1597 
1598 	ipc_lock_object(&shp->shm_perm);
1599 
1600 	/* check if shm_destroy() is tearing down shp */
1601 	if (!ipc_valid_object(&shp->shm_perm)) {
1602 		ipc_unlock_object(&shp->shm_perm);
1603 		err = -EIDRM;
1604 		goto out_unlock;
1605 	}
1606 
1607 	/*
1608 	 * We need to take a reference to the real shm file to prevent the
1609 	 * pointer from becoming stale in cases where the lifetime of the outer
1610 	 * file extends beyond that of the shm segment.  It's not usually
1611 	 * possible, but it can happen during remap_file_pages() emulation as
1612 	 * that unmaps the memory, then does ->mmap() via file reference only.
1613 	 * We'll deny the ->mmap() if the shm segment was since removed, but to
1614 	 * detect shm ID reuse we need to compare the file pointers.
1615 	 */
1616 	base = get_file(shp->shm_file);
1617 	shp->shm_nattch++;
1618 	size = i_size_read(file_inode(base));
1619 	ipc_unlock_object(&shp->shm_perm);
1620 	rcu_read_unlock();
1621 
1622 	err = -ENOMEM;
1623 	sfd = kzalloc_obj(*sfd);
1624 	if (!sfd) {
1625 		fput(base);
1626 		goto out_nattch;
1627 	}
1628 
1629 	file = alloc_file_clone(base, f_flags,
1630 			  is_file_hugepages(base) ?
1631 				&shm_file_operations_huge :
1632 				&shm_file_operations);
1633 	err = PTR_ERR(file);
1634 	if (IS_ERR(file)) {
1635 		kfree(sfd);
1636 		fput(base);
1637 		goto out_nattch;
1638 	}
1639 
1640 	sfd->id = shp->shm_perm.id;
1641 	sfd->ns = get_ipc_ns(ns);
1642 	sfd->file = base;
1643 	sfd->vm_ops = NULL;
1644 	file->private_data = sfd;
1645 
1646 	err = security_mmap_file(file, prot, flags);
1647 	if (err)
1648 		goto out_fput;
1649 
1650 	if (mmap_write_lock_killable(current->mm)) {
1651 		err = -EINTR;
1652 		goto out_fput;
1653 	}
1654 
1655 	if (addr && !(shmflg & SHM_REMAP)) {
1656 		err = -EINVAL;
1657 		if (addr + size < addr)
1658 			goto invalid;
1659 
1660 		if (find_vma_intersection(current->mm, addr, addr + size))
1661 			goto invalid;
1662 	}
1663 
1664 	addr = do_mmap(file, addr, size, prot, flags, 0, 0, &populate, NULL);
1665 	*raddr = addr;
1666 	err = 0;
1667 	if (IS_ERR_VALUE(addr))
1668 		err = (long)addr;
1669 invalid:
1670 	mmap_write_unlock(current->mm);
1671 	if (populate)
1672 		mm_populate(addr, populate);
1673 
1674 out_fput:
1675 	fput(file);
1676 
1677 out_nattch:
1678 	down_write(&shm_ids(ns).rwsem);
1679 	shp = shm_lock(ns, shmid);
1680 	shp->shm_nattch--;
1681 
1682 	if (shm_may_destroy(shp))
1683 		shm_destroy(ns, shp);
1684 	else
1685 		shm_unlock(shp);
1686 	up_write(&shm_ids(ns).rwsem);
1687 	return err;
1688 
1689 out_unlock:
1690 	rcu_read_unlock();
1691 out:
1692 	return err;
1693 }
1694 
1695 SYSCALL_DEFINE3(shmat, int, shmid, char __user *, shmaddr, int, shmflg)
1696 {
1697 	unsigned long ret;
1698 	long err;
1699 
1700 	err = do_shmat(shmid, shmaddr, shmflg, &ret, SHMLBA);
1701 	if (err)
1702 		return err;
1703 	force_successful_syscall_return();
1704 	return (long)ret;
1705 }
1706 
1707 #ifdef CONFIG_COMPAT
1708 
1709 #ifndef COMPAT_SHMLBA
1710 #define COMPAT_SHMLBA	SHMLBA
1711 #endif
1712 
1713 COMPAT_SYSCALL_DEFINE3(shmat, int, shmid, compat_uptr_t, shmaddr, int, shmflg)
1714 {
1715 	unsigned long ret;
1716 	long err;
1717 
1718 	err = do_shmat(shmid, compat_ptr(shmaddr), shmflg, &ret, COMPAT_SHMLBA);
1719 	if (err)
1720 		return err;
1721 	force_successful_syscall_return();
1722 	return (long)ret;
1723 }
1724 #endif
1725 
1726 /*
1727  * detach and kill segment if marked destroyed.
1728  * The work is done in shm_close.
1729  */
1730 long ksys_shmdt(char __user *shmaddr)
1731 {
1732 	struct mm_struct *mm = current->mm;
1733 	struct vm_area_struct *vma;
1734 	unsigned long addr = (unsigned long)shmaddr;
1735 	int retval = -EINVAL;
1736 #ifdef CONFIG_MMU
1737 	loff_t size = 0;
1738 	struct file *file;
1739 	VMA_ITERATOR(vmi, mm, addr);
1740 #endif
1741 
1742 	if (addr & ~PAGE_MASK)
1743 		return retval;
1744 
1745 	if (mmap_write_lock_killable(mm))
1746 		return -EINTR;
1747 
1748 	/*
1749 	 * This function tries to be smart and unmap shm segments that
1750 	 * were modified by partial mlock or munmap calls:
1751 	 * - It first determines the size of the shm segment that should be
1752 	 *   unmapped: It searches for a vma that is backed by shm and that
1753 	 *   started at address shmaddr. It records it's size and then unmaps
1754 	 *   it.
1755 	 * - Then it unmaps all shm vmas that started at shmaddr and that
1756 	 *   are within the initially determined size and that are from the
1757 	 *   same shm segment from which we determined the size.
1758 	 * Errors from do_munmap are ignored: the function only fails if
1759 	 * it's called with invalid parameters or if it's called to unmap
1760 	 * a part of a vma. Both calls in this function are for full vmas,
1761 	 * the parameters are directly copied from the vma itself and always
1762 	 * valid - therefore do_munmap cannot fail. (famous last words?)
1763 	 */
1764 	/*
1765 	 * If it had been mremap()'d, the starting address would not
1766 	 * match the usual checks anyway. So assume all vma's are
1767 	 * above the starting address given.
1768 	 */
1769 
1770 #ifdef CONFIG_MMU
1771 	for_each_vma(vmi, vma) {
1772 		/*
1773 		 * Check if the starting address would match, i.e. it's
1774 		 * a fragment created by mprotect() and/or munmap(), or it
1775 		 * otherwise it starts at this address with no hassles.
1776 		 */
1777 		if ((vma->vm_ops == &shm_vm_ops) &&
1778 			(vma->vm_start - addr)/PAGE_SIZE == vma->vm_pgoff) {
1779 
1780 			/*
1781 			 * Record the file of the shm segment being
1782 			 * unmapped.  With mremap(), someone could place
1783 			 * page from another segment but with equal offsets
1784 			 * in the range we are unmapping.
1785 			 */
1786 			file = vma->vm_file;
1787 			size = i_size_read(file_inode(vma->vm_file));
1788 			do_vmi_align_munmap(&vmi, vma, mm, vma->vm_start,
1789 					    vma->vm_end, NULL, false);
1790 			/*
1791 			 * We discovered the size of the shm segment, so
1792 			 * break out of here and fall through to the next
1793 			 * loop that uses the size information to stop
1794 			 * searching for matching vma's.
1795 			 */
1796 			retval = 0;
1797 			vma = vma_next(&vmi);
1798 			break;
1799 		}
1800 	}
1801 
1802 	/*
1803 	 * We need look no further than the maximum address a fragment
1804 	 * could possibly have landed at. Also cast things to loff_t to
1805 	 * prevent overflows and make comparisons vs. equal-width types.
1806 	 */
1807 	size = PAGE_ALIGN(size);
1808 	while (vma && (loff_t)(vma->vm_end - addr) <= size) {
1809 		/* finding a matching vma now does not alter retval */
1810 		if ((vma->vm_ops == &shm_vm_ops) &&
1811 		    ((vma->vm_start - addr)/PAGE_SIZE == vma->vm_pgoff) &&
1812 		    (vma->vm_file == file)) {
1813 			do_vmi_align_munmap(&vmi, vma, mm, vma->vm_start,
1814 					    vma->vm_end, NULL, false);
1815 		}
1816 
1817 		vma = vma_next(&vmi);
1818 	}
1819 
1820 #else	/* CONFIG_MMU */
1821 	vma = vma_lookup(mm, addr);
1822 	/* under NOMMU conditions, the exact address to be destroyed must be
1823 	 * given
1824 	 */
1825 	if (vma && vma->vm_start == addr && vma->vm_ops == &shm_vm_ops) {
1826 		do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start, NULL);
1827 		retval = 0;
1828 	}
1829 
1830 #endif
1831 
1832 	mmap_write_unlock(mm);
1833 	return retval;
1834 }
1835 
1836 SYSCALL_DEFINE1(shmdt, char __user *, shmaddr)
1837 {
1838 	return ksys_shmdt(shmaddr);
1839 }
1840 
1841 #ifdef CONFIG_PROC_FS
1842 static int sysvipc_shm_proc_show(struct seq_file *s, void *it)
1843 {
1844 	struct pid_namespace *pid_ns = ipc_seq_pid_ns(s);
1845 	struct user_namespace *user_ns = seq_user_ns(s);
1846 	struct kern_ipc_perm *ipcp = it;
1847 	struct shmid_kernel *shp;
1848 	unsigned long rss = 0, swp = 0;
1849 
1850 	shp = container_of(ipcp, struct shmid_kernel, shm_perm);
1851 	shm_add_rss_swap(shp, &rss, &swp);
1852 
1853 #if BITS_PER_LONG <= 32
1854 #define SIZE_SPEC "%10lu"
1855 #else
1856 #define SIZE_SPEC "%21lu"
1857 #endif
1858 
1859 	seq_printf(s,
1860 		   "%10d %10d  %4o " SIZE_SPEC " %5u %5u  "
1861 		   "%5lu %5u %5u %5u %5u %10llu %10llu %10llu "
1862 		   SIZE_SPEC " " SIZE_SPEC "\n",
1863 		   shp->shm_perm.key,
1864 		   shp->shm_perm.id,
1865 		   shp->shm_perm.mode,
1866 		   shp->shm_segsz,
1867 		   pid_nr_ns(shp->shm_cprid, pid_ns),
1868 		   pid_nr_ns(shp->shm_lprid, pid_ns),
1869 		   shp->shm_nattch,
1870 		   from_kuid_munged(user_ns, shp->shm_perm.uid),
1871 		   from_kgid_munged(user_ns, shp->shm_perm.gid),
1872 		   from_kuid_munged(user_ns, shp->shm_perm.cuid),
1873 		   from_kgid_munged(user_ns, shp->shm_perm.cgid),
1874 		   shp->shm_atim,
1875 		   shp->shm_dtim,
1876 		   shp->shm_ctim,
1877 		   rss * PAGE_SIZE,
1878 		   swp * PAGE_SIZE);
1879 
1880 	return 0;
1881 }
1882 #endif
1883