18e38aeffSJohn Baldwin /*- 24d846d26SWarner Losh * SPDX-License-Identifier: BSD-2-Clause 38a36da99SPedro F. Giffuni * 415bcf785SRobert Watson * Copyright (c) 2006, 2011, 2016-2017 Robert N. M. Watson 5d301b358SKonstantin Belousov * Copyright 2020 The FreeBSD Foundation 68e38aeffSJohn Baldwin * All rights reserved. 78e38aeffSJohn Baldwin * 815bcf785SRobert Watson * Portions of this software were developed by BAE Systems, the University of 915bcf785SRobert Watson * Cambridge Computer Laboratory, and Memorial University under DARPA/AFRL 1015bcf785SRobert Watson * contract FA8650-15-C-7558 ("CADETS"), as part of the DARPA Transparent 1115bcf785SRobert Watson * Computing (TC) research program. 1215bcf785SRobert Watson * 13d301b358SKonstantin Belousov * Portions of this software were developed by Konstantin Belousov 14d301b358SKonstantin Belousov * under sponsorship from the FreeBSD Foundation. 15d301b358SKonstantin Belousov * 168e38aeffSJohn Baldwin * Redistribution and use in source and binary forms, with or without 178e38aeffSJohn Baldwin * modification, are permitted provided that the following conditions 188e38aeffSJohn Baldwin * are met: 198e38aeffSJohn Baldwin * 1. Redistributions of source code must retain the above copyright 208e38aeffSJohn Baldwin * notice, this list of conditions and the following disclaimer. 218e38aeffSJohn Baldwin * 2. Redistributions in binary form must reproduce the above copyright 228e38aeffSJohn Baldwin * notice, this list of conditions and the following disclaimer in the 238e38aeffSJohn Baldwin * documentation and/or other materials provided with the distribution. 248e38aeffSJohn Baldwin * 258e38aeffSJohn Baldwin * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 268e38aeffSJohn Baldwin * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 278e38aeffSJohn Baldwin * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 288e38aeffSJohn Baldwin * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 298e38aeffSJohn Baldwin * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 308e38aeffSJohn Baldwin * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 318e38aeffSJohn Baldwin * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 328e38aeffSJohn Baldwin * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 338e38aeffSJohn Baldwin * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 348e38aeffSJohn Baldwin * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 358e38aeffSJohn Baldwin * SUCH DAMAGE. 368e38aeffSJohn Baldwin */ 378e38aeffSJohn Baldwin 388e38aeffSJohn Baldwin /* 398e38aeffSJohn Baldwin * Support for shared swap-backed anonymous memory objects via 409afb12baSDavid Bright * shm_open(2), shm_rename(2), and shm_unlink(2). 419afb12baSDavid Bright * While most of the implementation is here, vm_mmap.c contains 429afb12baSDavid Bright * mapping logic changes. 438e38aeffSJohn Baldwin * 445c066cd2SKonstantin Belousov * posixshmcontrol(1) allows users to inspect the state of the memory 455c066cd2SKonstantin Belousov * objects. Per-uid swap resource limit controls total amount of 465c066cd2SKonstantin Belousov * memory that user can consume for anonymous objects, including 475c066cd2SKonstantin Belousov * shared. 488e38aeffSJohn Baldwin */ 498e38aeffSJohn Baldwin 508e38aeffSJohn Baldwin #include <sys/cdefs.h> 5112bc222eSJonathan Anderson #include "opt_capsicum.h" 52551a7895SRui Paulo #include "opt_ktrace.h" 5312bc222eSJonathan Anderson 548e38aeffSJohn Baldwin #include <sys/param.h> 554a144410SRobert Watson #include <sys/capsicum.h> 56610a2b3cSJohn Baldwin #include <sys/conf.h> 578e38aeffSJohn Baldwin #include <sys/fcntl.h> 588e38aeffSJohn Baldwin #include <sys/file.h> 598e38aeffSJohn Baldwin #include <sys/filedesc.h> 602b64ab22SMark Johnston #include <sys/filio.h> 618e38aeffSJohn Baldwin #include <sys/fnv_hash.h> 628e38aeffSJohn Baldwin #include <sys/kernel.h> 6391898857SMark Johnston #include <sys/limits.h> 64551a7895SRui Paulo #include <sys/uio.h> 65551a7895SRui Paulo #include <sys/signal.h> 66cc7b259aSJamie Gritton #include <sys/jail.h> 67551a7895SRui Paulo #include <sys/ktrace.h> 688e38aeffSJohn Baldwin #include <sys/lock.h> 698e38aeffSJohn Baldwin #include <sys/malloc.h> 708e38aeffSJohn Baldwin #include <sys/mman.h> 718e38aeffSJohn Baldwin #include <sys/mutex.h> 729c00bb91SKonstantin Belousov #include <sys/priv.h> 738e38aeffSJohn Baldwin #include <sys/proc.h> 748e38aeffSJohn Baldwin #include <sys/refcount.h> 758e38aeffSJohn Baldwin #include <sys/resourcevar.h> 7689f6b863SAttilio Rao #include <sys/rwlock.h> 7756d0e33eSKonstantin Belousov #include <sys/sbuf.h> 788e38aeffSJohn Baldwin #include <sys/stat.h> 797ee1b208SEd Schouten #include <sys/syscallsubr.h> 808e38aeffSJohn Baldwin #include <sys/sysctl.h> 818e38aeffSJohn Baldwin #include <sys/sysproto.h> 828e38aeffSJohn Baldwin #include <sys/systm.h> 838e38aeffSJohn Baldwin #include <sys/sx.h> 848e38aeffSJohn Baldwin #include <sys/time.h> 85d301b358SKonstantin Belousov #include <sys/vmmeter.h> 868e38aeffSJohn Baldwin #include <sys/vnode.h> 87940cb0e2SKonstantin Belousov #include <sys/unistd.h> 889696feebSJohn Baldwin #include <sys/user.h> 898e38aeffSJohn Baldwin 9015bcf785SRobert Watson #include <security/audit/audit.h> 918e38aeffSJohn Baldwin #include <security/mac/mac_framework.h> 928e38aeffSJohn Baldwin 938e38aeffSJohn Baldwin #include <vm/vm.h> 948e38aeffSJohn Baldwin #include <vm/vm_param.h> 958e38aeffSJohn Baldwin #include <vm/pmap.h> 96338e7cf2SJohn Baldwin #include <vm/vm_extern.h> 978e38aeffSJohn Baldwin #include <vm/vm_map.h> 98fb680e16SJohn Baldwin #include <vm/vm_kern.h> 998e38aeffSJohn Baldwin #include <vm/vm_object.h> 1008e38aeffSJohn Baldwin #include <vm/vm_page.h> 1012971897dSAlan Cox #include <vm/vm_pageout.h> 1028e38aeffSJohn Baldwin #include <vm/vm_pager.h> 1038e38aeffSJohn Baldwin #include <vm/swap_pager.h> 1048e38aeffSJohn Baldwin 1058e38aeffSJohn Baldwin struct shm_mapping { 1068e38aeffSJohn Baldwin char *sm_path; 1078e38aeffSJohn Baldwin Fnv32_t sm_fnv; 1088e38aeffSJohn Baldwin struct shmfd *sm_shmfd; 1098e38aeffSJohn Baldwin LIST_ENTRY(shm_mapping) sm_link; 1108e38aeffSJohn Baldwin }; 1118e38aeffSJohn Baldwin 1128e38aeffSJohn Baldwin static MALLOC_DEFINE(M_SHMFD, "shmfd", "shared memory file descriptor"); 1138e38aeffSJohn Baldwin static LIST_HEAD(, shm_mapping) *shm_dictionary; 1148e38aeffSJohn Baldwin static struct sx shm_dict_lock; 1158e38aeffSJohn Baldwin static struct mtx shm_timestamp_lock; 1168e38aeffSJohn Baldwin static u_long shm_hash; 1177883ce1fSMateusz Guzik static struct unrhdr64 shm_ino_unr; 118610a2b3cSJohn Baldwin static dev_t shm_dev_ino; 1198e38aeffSJohn Baldwin 1208e38aeffSJohn Baldwin #define SHM_HASH(fnv) (&shm_dictionary[(fnv) & shm_hash]) 1218e38aeffSJohn Baldwin 1225be725d7SAndreas Tobler static void shm_init(void *arg); 1238e38aeffSJohn Baldwin static void shm_insert(char *path, Fnv32_t fnv, struct shmfd *shmfd); 1248e38aeffSJohn Baldwin static struct shmfd *shm_lookup(char *path, Fnv32_t fnv); 1258e38aeffSJohn Baldwin static int shm_remove(char *path, Fnv32_t fnv, struct ucred *ucred); 1267060da62SJamie Gritton static void shm_doremove(struct shm_mapping *map); 127d301b358SKonstantin Belousov static int shm_dotruncate_cookie(struct shmfd *shmfd, off_t length, 128d301b358SKonstantin Belousov void *rl_cookie); 129af755d3eSKyle Evans static int shm_dotruncate_locked(struct shmfd *shmfd, off_t length, 130af755d3eSKyle Evans void *rl_cookie); 1312d5603feSDavid Bright static int shm_copyin_path(struct thread *td, const char *userpath_in, 1322d5603feSDavid Bright char **path_out); 133454bc887SKa Ho Ng static int shm_deallocate(struct shmfd *shmfd, off_t *offset, 134454bc887SKa Ho Ng off_t *length, int flags); 1358e38aeffSJohn Baldwin 1368e38aeffSJohn Baldwin static fo_rdwr_t shm_read; 1378e38aeffSJohn Baldwin static fo_rdwr_t shm_write; 1388e38aeffSJohn Baldwin static fo_truncate_t shm_truncate; 1392b64ab22SMark Johnston static fo_ioctl_t shm_ioctl; 1408e38aeffSJohn Baldwin static fo_stat_t shm_stat; 1418e38aeffSJohn Baldwin static fo_close_t shm_close; 1429c00bb91SKonstantin Belousov static fo_chmod_t shm_chmod; 1439c00bb91SKonstantin Belousov static fo_chown_t shm_chown; 144940cb0e2SKonstantin Belousov static fo_seek_t shm_seek; 1459696feebSJohn Baldwin static fo_fill_kinfo_t shm_fill_kinfo; 1467077c426SJohn Baldwin static fo_mmap_t shm_mmap; 147af755d3eSKyle Evans static fo_get_seals_t shm_get_seals; 148af755d3eSKyle Evans static fo_add_seals_t shm_add_seals; 149f1040532SKyle Evans static fo_fallocate_t shm_fallocate; 150454bc887SKa Ho Ng static fo_fspacectl_t shm_fspacectl; 1518e38aeffSJohn Baldwin 1528e38aeffSJohn Baldwin /* File descriptor operations. */ 1531bdbd705SKonstantin Belousov struct fileops shm_ops = { 1548e38aeffSJohn Baldwin .fo_read = shm_read, 1558e38aeffSJohn Baldwin .fo_write = shm_write, 1568e38aeffSJohn Baldwin .fo_truncate = shm_truncate, 1572b64ab22SMark Johnston .fo_ioctl = shm_ioctl, 1582d69d0dcSJohn Baldwin .fo_poll = invfo_poll, 1592d69d0dcSJohn Baldwin .fo_kqfilter = invfo_kqfilter, 1608e38aeffSJohn Baldwin .fo_stat = shm_stat, 1618e38aeffSJohn Baldwin .fo_close = shm_close, 1629c00bb91SKonstantin Belousov .fo_chmod = shm_chmod, 1639c00bb91SKonstantin Belousov .fo_chown = shm_chown, 164227aaa86SKonstantin Belousov .fo_sendfile = vn_sendfile, 165940cb0e2SKonstantin Belousov .fo_seek = shm_seek, 1669696feebSJohn Baldwin .fo_fill_kinfo = shm_fill_kinfo, 1677077c426SJohn Baldwin .fo_mmap = shm_mmap, 168af755d3eSKyle Evans .fo_get_seals = shm_get_seals, 169af755d3eSKyle Evans .fo_add_seals = shm_add_seals, 170f1040532SKyle Evans .fo_fallocate = shm_fallocate, 171454bc887SKa Ho Ng .fo_fspacectl = shm_fspacectl, 172f28526e9SKonstantin Belousov .fo_cmp = file_kcmp_generic, 173d301b358SKonstantin Belousov .fo_flags = DFLAG_PASSABLE | DFLAG_SEEKABLE, 1748e38aeffSJohn Baldwin }; 1758e38aeffSJohn Baldwin 1768e38aeffSJohn Baldwin FEATURE(posix_shm, "POSIX shared memory"); 1778e38aeffSJohn Baldwin 178d301b358SKonstantin Belousov static SYSCTL_NODE(_vm, OID_AUTO, largepages, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 179d301b358SKonstantin Belousov ""); 180d301b358SKonstantin Belousov 181d301b358SKonstantin Belousov static int largepage_reclaim_tries = 1; 182d301b358SKonstantin Belousov SYSCTL_INT(_vm_largepages, OID_AUTO, reclaim_tries, 183d301b358SKonstantin Belousov CTLFLAG_RWTUN, &largepage_reclaim_tries, 0, 184d301b358SKonstantin Belousov "Number of contig reclaims before giving up for default alloc policy"); 185d301b358SKonstantin Belousov 1866df6facfSKonstantin Belousov #define shm_rangelock_unlock(shmfd, cookie) \ 187c3d8a931SKonstantin Belousov rangelock_unlock(&(shmfd)->shm_rl, (cookie)) 1886df6facfSKonstantin Belousov #define shm_rangelock_rlock(shmfd, start, end) \ 189c3d8a931SKonstantin Belousov rangelock_rlock(&(shmfd)->shm_rl, (start), (end)) 1906df6facfSKonstantin Belousov #define shm_rangelock_tryrlock(shmfd, start, end) \ 191c3d8a931SKonstantin Belousov rangelock_tryrlock(&(shmfd)->shm_rl, (start), (end)) 1926df6facfSKonstantin Belousov #define shm_rangelock_wlock(shmfd, start, end) \ 193c3d8a931SKonstantin Belousov rangelock_wlock(&(shmfd)->shm_rl, (start), (end)) 1946df6facfSKonstantin Belousov 1958e38aeffSJohn Baldwin static int 19641cf41fdSKonstantin Belousov uiomove_object_page(vm_object_t obj, size_t len, struct uio *uio) 19741cf41fdSKonstantin Belousov { 19841cf41fdSKonstantin Belousov vm_page_t m; 19941cf41fdSKonstantin Belousov vm_pindex_t idx; 20041cf41fdSKonstantin Belousov size_t tlen; 20141cf41fdSKonstantin Belousov int error, offset, rv; 20241cf41fdSKonstantin Belousov 20341cf41fdSKonstantin Belousov idx = OFF_TO_IDX(uio->uio_offset); 20441cf41fdSKonstantin Belousov offset = uio->uio_offset & PAGE_MASK; 20541cf41fdSKonstantin Belousov tlen = MIN(PAGE_SIZE - offset, len); 20641cf41fdSKonstantin Belousov 207f72eaaebSJeff Roberson rv = vm_page_grab_valid_unlocked(&m, obj, idx, 208f72eaaebSJeff Roberson VM_ALLOC_SBUSY | VM_ALLOC_IGN_SBUSY | VM_ALLOC_NOCREAT); 209f72eaaebSJeff Roberson if (rv == VM_PAGER_OK) 210f72eaaebSJeff Roberson goto found; 21141cf41fdSKonstantin Belousov 21241cf41fdSKonstantin Belousov /* 2136311d7aaSWill Andrews * Read I/O without either a corresponding resident page or swap 2146311d7aaSWill Andrews * page: use zero_region. This is intended to avoid instantiating 2156311d7aaSWill Andrews * pages on read from a sparse region. 2166311d7aaSWill Andrews */ 217f72eaaebSJeff Roberson VM_OBJECT_WLOCK(obj); 218f72eaaebSJeff Roberson m = vm_page_lookup(obj, idx); 219f72eaaebSJeff Roberson if (uio->uio_rw == UIO_READ && m == NULL && 2206311d7aaSWill Andrews !vm_pager_has_page(obj, idx, NULL, NULL)) { 2216311d7aaSWill Andrews VM_OBJECT_WUNLOCK(obj); 222b9062c93SKonstantin Belousov return (uiomove(__DECONST(void *, zero_region), tlen, uio)); 2236311d7aaSWill Andrews } 2246311d7aaSWill Andrews 2256311d7aaSWill Andrews /* 22641cf41fdSKonstantin Belousov * Although the tmpfs vnode lock is held here, it is 22741cf41fdSKonstantin Belousov * nonetheless safe to sleep waiting for a free page. The 22841cf41fdSKonstantin Belousov * pageout daemon does not need to acquire the tmpfs vnode 22941cf41fdSKonstantin Belousov * lock to page out tobj's pages because tobj is a OBJT_SWAP 23041cf41fdSKonstantin Belousov * type object. 23141cf41fdSKonstantin Belousov */ 232c7575748SJeff Roberson rv = vm_page_grab_valid(&m, obj, idx, 233a8081778SJeff Roberson VM_ALLOC_NORMAL | VM_ALLOC_SBUSY | VM_ALLOC_IGN_SBUSY); 23441cf41fdSKonstantin Belousov if (rv != VM_PAGER_OK) { 23541cf41fdSKonstantin Belousov VM_OBJECT_WUNLOCK(obj); 2367ec4b29bSKonstantin Belousov if (bootverbose) { 2377ec4b29bSKonstantin Belousov printf("uiomove_object: vm_obj %p idx %jd " 2387ec4b29bSKonstantin Belousov "pager error %d\n", obj, idx, rv); 2397ec4b29bSKonstantin Belousov } 24037aea264SKonstantin Belousov return (rv == VM_PAGER_AGAIN ? ENOSPC : EIO); 24141cf41fdSKonstantin Belousov } 24241cf41fdSKonstantin Belousov VM_OBJECT_WUNLOCK(obj); 243f72eaaebSJeff Roberson 244f72eaaebSJeff Roberson found: 24541cf41fdSKonstantin Belousov error = uiomove_fromphys(&m, offset, tlen, uio); 246a8081778SJeff Roberson if (uio->uio_rw == UIO_WRITE && error == 0) 247a8081778SJeff Roberson vm_page_set_dirty(m); 248d29f674fSJeff Roberson vm_page_activate(m); 249a8081778SJeff Roberson vm_page_sunbusy(m); 25041cf41fdSKonstantin Belousov 25141cf41fdSKonstantin Belousov return (error); 25241cf41fdSKonstantin Belousov } 25341cf41fdSKonstantin Belousov 25441cf41fdSKonstantin Belousov int 25541cf41fdSKonstantin Belousov uiomove_object(vm_object_t obj, off_t obj_size, struct uio *uio) 25641cf41fdSKonstantin Belousov { 25741cf41fdSKonstantin Belousov ssize_t resid; 25841cf41fdSKonstantin Belousov size_t len; 25941cf41fdSKonstantin Belousov int error; 26041cf41fdSKonstantin Belousov 26141cf41fdSKonstantin Belousov error = 0; 26241cf41fdSKonstantin Belousov while ((resid = uio->uio_resid) > 0) { 26341cf41fdSKonstantin Belousov if (obj_size <= uio->uio_offset) 26441cf41fdSKonstantin Belousov break; 26541cf41fdSKonstantin Belousov len = MIN(obj_size - uio->uio_offset, resid); 26641cf41fdSKonstantin Belousov if (len == 0) 26741cf41fdSKonstantin Belousov break; 26841cf41fdSKonstantin Belousov error = uiomove_object_page(obj, len, uio); 26941cf41fdSKonstantin Belousov if (error != 0 || resid == uio->uio_resid) 27041cf41fdSKonstantin Belousov break; 27141cf41fdSKonstantin Belousov } 27241cf41fdSKonstantin Belousov return (error); 27341cf41fdSKonstantin Belousov } 27441cf41fdSKonstantin Belousov 275d301b358SKonstantin Belousov static u_long count_largepages[MAXPAGESIZES]; 276d301b358SKonstantin Belousov 277d301b358SKonstantin Belousov static int 278d301b358SKonstantin Belousov shm_largepage_phys_populate(vm_object_t object, vm_pindex_t pidx, 279d301b358SKonstantin Belousov int fault_type, vm_prot_t max_prot, vm_pindex_t *first, vm_pindex_t *last) 280d301b358SKonstantin Belousov { 2813b5331ddSKonstantin Belousov vm_page_t m __diagused; 282d301b358SKonstantin Belousov int psind; 283d301b358SKonstantin Belousov 284d301b358SKonstantin Belousov psind = object->un_pager.phys.data_val; 285d301b358SKonstantin Belousov if (psind == 0 || pidx >= object->size) 286d301b358SKonstantin Belousov return (VM_PAGER_FAIL); 287d301b358SKonstantin Belousov *first = rounddown2(pidx, pagesizes[psind] / PAGE_SIZE); 288d301b358SKonstantin Belousov 289d301b358SKonstantin Belousov /* 290d301b358SKonstantin Belousov * We only busy the first page in the superpage run. It is 291d301b358SKonstantin Belousov * useless to busy whole run since we only remove full 292d301b358SKonstantin Belousov * superpage, and it takes too long to busy e.g. 512 * 512 == 293d301b358SKonstantin Belousov * 262144 pages constituing 1G amd64 superage. 294d301b358SKonstantin Belousov */ 295d301b358SKonstantin Belousov m = vm_page_grab(object, *first, VM_ALLOC_NORMAL | VM_ALLOC_NOCREAT); 296d301b358SKonstantin Belousov MPASS(m != NULL); 297d301b358SKonstantin Belousov 298d301b358SKonstantin Belousov *last = *first + atop(pagesizes[psind]) - 1; 299d301b358SKonstantin Belousov return (VM_PAGER_OK); 300d301b358SKonstantin Belousov } 301d301b358SKonstantin Belousov 302d301b358SKonstantin Belousov static boolean_t 303d301b358SKonstantin Belousov shm_largepage_phys_haspage(vm_object_t object, vm_pindex_t pindex, 304d301b358SKonstantin Belousov int *before, int *after) 305d301b358SKonstantin Belousov { 306d301b358SKonstantin Belousov int psind; 307d301b358SKonstantin Belousov 308d301b358SKonstantin Belousov psind = object->un_pager.phys.data_val; 309d301b358SKonstantin Belousov if (psind == 0 || pindex >= object->size) 310d301b358SKonstantin Belousov return (FALSE); 311d301b358SKonstantin Belousov if (before != NULL) { 312d301b358SKonstantin Belousov *before = pindex - rounddown2(pindex, pagesizes[psind] / 313d301b358SKonstantin Belousov PAGE_SIZE); 314d301b358SKonstantin Belousov } 315d301b358SKonstantin Belousov if (after != NULL) { 316d301b358SKonstantin Belousov *after = roundup2(pindex, pagesizes[psind] / PAGE_SIZE) - 317d301b358SKonstantin Belousov pindex; 318d301b358SKonstantin Belousov } 319d301b358SKonstantin Belousov return (TRUE); 320d301b358SKonstantin Belousov } 321d301b358SKonstantin Belousov 322d301b358SKonstantin Belousov static void 323d301b358SKonstantin Belousov shm_largepage_phys_ctor(vm_object_t object, vm_prot_t prot, 324d301b358SKonstantin Belousov vm_ooffset_t foff, struct ucred *cred) 325d301b358SKonstantin Belousov { 326d301b358SKonstantin Belousov } 327d301b358SKonstantin Belousov 328d301b358SKonstantin Belousov static void 329d301b358SKonstantin Belousov shm_largepage_phys_dtor(vm_object_t object) 330d301b358SKonstantin Belousov { 331d301b358SKonstantin Belousov int psind; 332d301b358SKonstantin Belousov 333d301b358SKonstantin Belousov psind = object->un_pager.phys.data_val; 334d301b358SKonstantin Belousov if (psind != 0) { 335d301b358SKonstantin Belousov atomic_subtract_long(&count_largepages[psind], 336d301b358SKonstantin Belousov object->size / (pagesizes[psind] / PAGE_SIZE)); 337d301b358SKonstantin Belousov vm_wire_sub(object->size); 338d301b358SKonstantin Belousov } else { 339d301b358SKonstantin Belousov KASSERT(object->size == 0, 340d301b358SKonstantin Belousov ("largepage phys obj %p not initialized bit size %#jx > 0", 341d301b358SKonstantin Belousov object, (uintmax_t)object->size)); 342d301b358SKonstantin Belousov } 343d301b358SKonstantin Belousov } 344d301b358SKonstantin Belousov 345d474440aSKonstantin Belousov static const struct phys_pager_ops shm_largepage_phys_ops = { 346d301b358SKonstantin Belousov .phys_pg_populate = shm_largepage_phys_populate, 347d301b358SKonstantin Belousov .phys_pg_haspage = shm_largepage_phys_haspage, 348d301b358SKonstantin Belousov .phys_pg_ctor = shm_largepage_phys_ctor, 349d301b358SKonstantin Belousov .phys_pg_dtor = shm_largepage_phys_dtor, 350d301b358SKonstantin Belousov }; 351d301b358SKonstantin Belousov 352d301b358SKonstantin Belousov bool 353d301b358SKonstantin Belousov shm_largepage(struct shmfd *shmfd) 354d301b358SKonstantin Belousov { 355d301b358SKonstantin Belousov return (shmfd->shm_object->type == OBJT_PHYS); 356d301b358SKonstantin Belousov } 357d301b358SKonstantin Belousov 3580919f29dSKonstantin Belousov static void 3590919f29dSKonstantin Belousov shm_pager_freespace(vm_object_t obj, vm_pindex_t start, vm_size_t size) 3600919f29dSKonstantin Belousov { 3610919f29dSKonstantin Belousov struct shmfd *shm; 3620919f29dSKonstantin Belousov vm_size_t c; 3630919f29dSKonstantin Belousov 3640919f29dSKonstantin Belousov swap_pager_freespace(obj, start, size, &c); 3650919f29dSKonstantin Belousov if (c == 0) 3660919f29dSKonstantin Belousov return; 3670919f29dSKonstantin Belousov 3680919f29dSKonstantin Belousov shm = obj->un_pager.swp.swp_priv; 3690919f29dSKonstantin Belousov if (shm == NULL) 3700919f29dSKonstantin Belousov return; 3710919f29dSKonstantin Belousov KASSERT(shm->shm_pages >= c, 3720919f29dSKonstantin Belousov ("shm %p pages %jd free %jd", shm, 3730919f29dSKonstantin Belousov (uintmax_t)shm->shm_pages, (uintmax_t)c)); 3740919f29dSKonstantin Belousov shm->shm_pages -= c; 3750919f29dSKonstantin Belousov } 3760919f29dSKonstantin Belousov 3770919f29dSKonstantin Belousov static void 3780919f29dSKonstantin Belousov shm_page_inserted(vm_object_t obj, vm_page_t m) 3790919f29dSKonstantin Belousov { 3800919f29dSKonstantin Belousov struct shmfd *shm; 3810919f29dSKonstantin Belousov 3820919f29dSKonstantin Belousov shm = obj->un_pager.swp.swp_priv; 3830919f29dSKonstantin Belousov if (shm == NULL) 3840919f29dSKonstantin Belousov return; 3850919f29dSKonstantin Belousov if (!vm_pager_has_page(obj, m->pindex, NULL, NULL)) 3860919f29dSKonstantin Belousov shm->shm_pages += 1; 3870919f29dSKonstantin Belousov } 3880919f29dSKonstantin Belousov 3890919f29dSKonstantin Belousov static void 3900919f29dSKonstantin Belousov shm_page_removed(vm_object_t obj, vm_page_t m) 3910919f29dSKonstantin Belousov { 3920919f29dSKonstantin Belousov struct shmfd *shm; 3930919f29dSKonstantin Belousov 3940919f29dSKonstantin Belousov shm = obj->un_pager.swp.swp_priv; 3950919f29dSKonstantin Belousov if (shm == NULL) 3960919f29dSKonstantin Belousov return; 3970919f29dSKonstantin Belousov if (!vm_pager_has_page(obj, m->pindex, NULL, NULL)) { 3980919f29dSKonstantin Belousov KASSERT(shm->shm_pages >= 1, 3990919f29dSKonstantin Belousov ("shm %p pages %jd free 1", shm, 4000919f29dSKonstantin Belousov (uintmax_t)shm->shm_pages)); 4010919f29dSKonstantin Belousov shm->shm_pages -= 1; 4020919f29dSKonstantin Belousov } 4030919f29dSKonstantin Belousov } 4040919f29dSKonstantin Belousov 4050919f29dSKonstantin Belousov static struct pagerops shm_swap_pager_ops = { 4060919f29dSKonstantin Belousov .pgo_kvme_type = KVME_TYPE_SWAP, 4070919f29dSKonstantin Belousov .pgo_freespace = shm_pager_freespace, 4080919f29dSKonstantin Belousov .pgo_page_inserted = shm_page_inserted, 4090919f29dSKonstantin Belousov .pgo_page_removed = shm_page_removed, 4100919f29dSKonstantin Belousov }; 4110919f29dSKonstantin Belousov static int shmfd_pager_type = -1; 4120919f29dSKonstantin Belousov 41341cf41fdSKonstantin Belousov static int 414940cb0e2SKonstantin Belousov shm_seek(struct file *fp, off_t offset, int whence, struct thread *td) 415940cb0e2SKonstantin Belousov { 416940cb0e2SKonstantin Belousov struct shmfd *shmfd; 417940cb0e2SKonstantin Belousov off_t foffset; 418940cb0e2SKonstantin Belousov int error; 419940cb0e2SKonstantin Belousov 420940cb0e2SKonstantin Belousov shmfd = fp->f_data; 421940cb0e2SKonstantin Belousov foffset = foffset_lock(fp, 0); 422940cb0e2SKonstantin Belousov error = 0; 423940cb0e2SKonstantin Belousov switch (whence) { 424940cb0e2SKonstantin Belousov case L_INCR: 425940cb0e2SKonstantin Belousov if (foffset < 0 || 426940cb0e2SKonstantin Belousov (offset > 0 && foffset > OFF_MAX - offset)) { 427940cb0e2SKonstantin Belousov error = EOVERFLOW; 428940cb0e2SKonstantin Belousov break; 429940cb0e2SKonstantin Belousov } 430940cb0e2SKonstantin Belousov offset += foffset; 431940cb0e2SKonstantin Belousov break; 432940cb0e2SKonstantin Belousov case L_XTND: 433940cb0e2SKonstantin Belousov if (offset > 0 && shmfd->shm_size > OFF_MAX - offset) { 434940cb0e2SKonstantin Belousov error = EOVERFLOW; 435940cb0e2SKonstantin Belousov break; 436940cb0e2SKonstantin Belousov } 437940cb0e2SKonstantin Belousov offset += shmfd->shm_size; 438940cb0e2SKonstantin Belousov break; 439940cb0e2SKonstantin Belousov case L_SET: 440940cb0e2SKonstantin Belousov break; 441940cb0e2SKonstantin Belousov default: 442940cb0e2SKonstantin Belousov error = EINVAL; 443940cb0e2SKonstantin Belousov } 444940cb0e2SKonstantin Belousov if (error == 0) { 445940cb0e2SKonstantin Belousov if (offset < 0 || offset > shmfd->shm_size) 446940cb0e2SKonstantin Belousov error = EINVAL; 447940cb0e2SKonstantin Belousov else 4486f2b769cSJohn-Mark Gurney td->td_uretoff.tdu_off = offset; 449940cb0e2SKonstantin Belousov } 450940cb0e2SKonstantin Belousov foffset_unlock(fp, offset, error != 0 ? FOF_NOUPDATE : 0); 451940cb0e2SKonstantin Belousov return (error); 452940cb0e2SKonstantin Belousov } 453940cb0e2SKonstantin Belousov 454940cb0e2SKonstantin Belousov static int 4558e38aeffSJohn Baldwin shm_read(struct file *fp, struct uio *uio, struct ucred *active_cred, 4568e38aeffSJohn Baldwin int flags, struct thread *td) 4578e38aeffSJohn Baldwin { 458940cb0e2SKonstantin Belousov struct shmfd *shmfd; 459940cb0e2SKonstantin Belousov void *rl_cookie; 460940cb0e2SKonstantin Belousov int error; 4618e38aeffSJohn Baldwin 462940cb0e2SKonstantin Belousov shmfd = fp->f_data; 463940cb0e2SKonstantin Belousov #ifdef MAC 464940cb0e2SKonstantin Belousov error = mac_posixshm_check_read(active_cred, fp->f_cred, shmfd); 465940cb0e2SKonstantin Belousov if (error) 466940cb0e2SKonstantin Belousov return (error); 467940cb0e2SKonstantin Belousov #endif 4686ea906eeSJilles Tjoelker foffset_lock_uio(fp, uio, flags); 4696df6facfSKonstantin Belousov rl_cookie = shm_rangelock_rlock(shmfd, uio->uio_offset, 4706df6facfSKonstantin Belousov uio->uio_offset + uio->uio_resid); 471940cb0e2SKonstantin Belousov error = uiomove_object(shmfd->shm_object, shmfd->shm_size, uio); 4726df6facfSKonstantin Belousov shm_rangelock_unlock(shmfd, rl_cookie); 473940cb0e2SKonstantin Belousov foffset_unlock_uio(fp, uio, flags); 474940cb0e2SKonstantin Belousov return (error); 4758e38aeffSJohn Baldwin } 4768e38aeffSJohn Baldwin 4778e38aeffSJohn Baldwin static int 4788e38aeffSJohn Baldwin shm_write(struct file *fp, struct uio *uio, struct ucred *active_cred, 4798e38aeffSJohn Baldwin int flags, struct thread *td) 4808e38aeffSJohn Baldwin { 481940cb0e2SKonstantin Belousov struct shmfd *shmfd; 482940cb0e2SKonstantin Belousov void *rl_cookie; 483940cb0e2SKonstantin Belousov int error; 4843f07b9d9SKyle Evans off_t size; 4858e38aeffSJohn Baldwin 486940cb0e2SKonstantin Belousov shmfd = fp->f_data; 487940cb0e2SKonstantin Belousov #ifdef MAC 488940cb0e2SKonstantin Belousov error = mac_posixshm_check_write(active_cred, fp->f_cred, shmfd); 489940cb0e2SKonstantin Belousov if (error) 490940cb0e2SKonstantin Belousov return (error); 491940cb0e2SKonstantin Belousov #endif 492d301b358SKonstantin Belousov if (shm_largepage(shmfd) && shmfd->shm_lp_psind == 0) 493d301b358SKonstantin Belousov return (EINVAL); 494940cb0e2SKonstantin Belousov foffset_lock_uio(fp, uio, flags); 4953f07b9d9SKyle Evans if (uio->uio_resid > OFF_MAX - uio->uio_offset) { 4963f07b9d9SKyle Evans /* 4973f07b9d9SKyle Evans * Overflow is only an error if we're supposed to expand on 4983f07b9d9SKyle Evans * write. Otherwise, we'll just truncate the write to the 4993f07b9d9SKyle Evans * size of the file, which can only grow up to OFF_MAX. 5003f07b9d9SKyle Evans */ 5013f07b9d9SKyle Evans if ((shmfd->shm_flags & SHM_GROW_ON_WRITE) != 0) { 5023f07b9d9SKyle Evans foffset_unlock_uio(fp, uio, flags); 5033f07b9d9SKyle Evans return (EFBIG); 5043f07b9d9SKyle Evans } 5053f07b9d9SKyle Evans 5063f07b9d9SKyle Evans size = shmfd->shm_size; 5073f07b9d9SKyle Evans } else { 5083f07b9d9SKyle Evans size = uio->uio_offset + uio->uio_resid; 5093f07b9d9SKyle Evans } 5106df6facfSKonstantin Belousov if ((flags & FOF_OFFSET) == 0) 5116df6facfSKonstantin Belousov rl_cookie = shm_rangelock_wlock(shmfd, 0, OFF_MAX); 5126df6facfSKonstantin Belousov else 5136df6facfSKonstantin Belousov rl_cookie = shm_rangelock_wlock(shmfd, uio->uio_offset, size); 5143f07b9d9SKyle Evans if ((shmfd->shm_seals & F_SEAL_WRITE) != 0) { 515af755d3eSKyle Evans error = EPERM; 5163f07b9d9SKyle Evans } else { 5173f07b9d9SKyle Evans error = 0; 5183f07b9d9SKyle Evans if ((shmfd->shm_flags & SHM_GROW_ON_WRITE) != 0 && 5193f07b9d9SKyle Evans size > shmfd->shm_size) { 52079783634SKonstantin Belousov error = shm_dotruncate_cookie(shmfd, size, rl_cookie); 5213f07b9d9SKyle Evans } 5223f07b9d9SKyle Evans if (error == 0) 5233f07b9d9SKyle Evans error = uiomove_object(shmfd->shm_object, 5243f07b9d9SKyle Evans shmfd->shm_size, uio); 5253f07b9d9SKyle Evans } 5266df6facfSKonstantin Belousov shm_rangelock_unlock(shmfd, rl_cookie); 527940cb0e2SKonstantin Belousov foffset_unlock_uio(fp, uio, flags); 528940cb0e2SKonstantin Belousov return (error); 5298e38aeffSJohn Baldwin } 5308e38aeffSJohn Baldwin 5318e38aeffSJohn Baldwin static int 5328e38aeffSJohn Baldwin shm_truncate(struct file *fp, off_t length, struct ucred *active_cred, 5338e38aeffSJohn Baldwin struct thread *td) 5348e38aeffSJohn Baldwin { 5358e38aeffSJohn Baldwin struct shmfd *shmfd; 5368e38aeffSJohn Baldwin #ifdef MAC 5378e38aeffSJohn Baldwin int error; 5388e38aeffSJohn Baldwin #endif 5398e38aeffSJohn Baldwin 5408e38aeffSJohn Baldwin shmfd = fp->f_data; 5418e38aeffSJohn Baldwin #ifdef MAC 5428e38aeffSJohn Baldwin error = mac_posixshm_check_truncate(active_cred, fp->f_cred, shmfd); 5438e38aeffSJohn Baldwin if (error) 5448e38aeffSJohn Baldwin return (error); 5458e38aeffSJohn Baldwin #endif 5463364c323SKonstantin Belousov return (shm_dotruncate(shmfd, length)); 5478e38aeffSJohn Baldwin } 5488e38aeffSJohn Baldwin 5492b64ab22SMark Johnston int 5502b64ab22SMark Johnston shm_ioctl(struct file *fp, u_long com, void *data, struct ucred *active_cred, 5512b64ab22SMark Johnston struct thread *td) 5522b64ab22SMark Johnston { 553d301b358SKonstantin Belousov struct shmfd *shmfd; 554d301b358SKonstantin Belousov struct shm_largepage_conf *conf; 555d301b358SKonstantin Belousov void *rl_cookie; 5562b64ab22SMark Johnston 557d301b358SKonstantin Belousov shmfd = fp->f_data; 5582b64ab22SMark Johnston switch (com) { 5592b64ab22SMark Johnston case FIONBIO: 5602b64ab22SMark Johnston case FIOASYNC: 5612b64ab22SMark Johnston /* 5622b64ab22SMark Johnston * Allow fcntl(fd, F_SETFL, O_NONBLOCK) to work, 5632b64ab22SMark Johnston * just like it would on an unlinked regular file 5642b64ab22SMark Johnston */ 5652b64ab22SMark Johnston return (0); 566d301b358SKonstantin Belousov case FIOSSHMLPGCNF: 567d301b358SKonstantin Belousov if (!shm_largepage(shmfd)) 568d301b358SKonstantin Belousov return (ENOTTY); 569d301b358SKonstantin Belousov conf = data; 570d301b358SKonstantin Belousov if (shmfd->shm_lp_psind != 0 && 571d301b358SKonstantin Belousov conf->psind != shmfd->shm_lp_psind) 572d301b358SKonstantin Belousov return (EINVAL); 573d301b358SKonstantin Belousov if (conf->psind <= 0 || conf->psind >= MAXPAGESIZES || 574d301b358SKonstantin Belousov pagesizes[conf->psind] == 0) 575d301b358SKonstantin Belousov return (EINVAL); 576d301b358SKonstantin Belousov if (conf->alloc_policy != SHM_LARGEPAGE_ALLOC_DEFAULT && 577d301b358SKonstantin Belousov conf->alloc_policy != SHM_LARGEPAGE_ALLOC_NOWAIT && 578d301b358SKonstantin Belousov conf->alloc_policy != SHM_LARGEPAGE_ALLOC_HARD) 579d301b358SKonstantin Belousov return (EINVAL); 580d301b358SKonstantin Belousov 5816df6facfSKonstantin Belousov rl_cookie = shm_rangelock_wlock(shmfd, 0, OFF_MAX); 582d301b358SKonstantin Belousov shmfd->shm_lp_psind = conf->psind; 583d301b358SKonstantin Belousov shmfd->shm_lp_alloc_policy = conf->alloc_policy; 584d301b358SKonstantin Belousov shmfd->shm_object->un_pager.phys.data_val = conf->psind; 5856df6facfSKonstantin Belousov shm_rangelock_unlock(shmfd, rl_cookie); 586d301b358SKonstantin Belousov return (0); 587d301b358SKonstantin Belousov case FIOGSHMLPGCNF: 588d301b358SKonstantin Belousov if (!shm_largepage(shmfd)) 589d301b358SKonstantin Belousov return (ENOTTY); 590d301b358SKonstantin Belousov conf = data; 5916df6facfSKonstantin Belousov rl_cookie = shm_rangelock_rlock(shmfd, 0, OFF_MAX); 592d301b358SKonstantin Belousov conf->psind = shmfd->shm_lp_psind; 593d301b358SKonstantin Belousov conf->alloc_policy = shmfd->shm_lp_alloc_policy; 5946df6facfSKonstantin Belousov shm_rangelock_unlock(shmfd, rl_cookie); 595d301b358SKonstantin Belousov return (0); 5962b64ab22SMark Johnston default: 5972b64ab22SMark Johnston return (ENOTTY); 5982b64ab22SMark Johnston } 5992b64ab22SMark Johnston } 6002b64ab22SMark Johnston 6018e38aeffSJohn Baldwin static int 6022b68eb8eSMateusz Guzik shm_stat(struct file *fp, struct stat *sb, struct ucred *active_cred) 6038e38aeffSJohn Baldwin { 6048e38aeffSJohn Baldwin struct shmfd *shmfd; 6058e38aeffSJohn Baldwin #ifdef MAC 6068e38aeffSJohn Baldwin int error; 6078e38aeffSJohn Baldwin #endif 6088e38aeffSJohn Baldwin 6098e38aeffSJohn Baldwin shmfd = fp->f_data; 6108e38aeffSJohn Baldwin 6118e38aeffSJohn Baldwin #ifdef MAC 6128e38aeffSJohn Baldwin error = mac_posixshm_check_stat(active_cred, fp->f_cred, shmfd); 6138e38aeffSJohn Baldwin if (error) 6148e38aeffSJohn Baldwin return (error); 6158e38aeffSJohn Baldwin #endif 6168e38aeffSJohn Baldwin 6178e38aeffSJohn Baldwin /* 6188e38aeffSJohn Baldwin * Attempt to return sanish values for fstat() on a memory file 6198e38aeffSJohn Baldwin * descriptor. 6208e38aeffSJohn Baldwin */ 6218e38aeffSJohn Baldwin bzero(sb, sizeof(*sb)); 6228e38aeffSJohn Baldwin sb->st_blksize = PAGE_SIZE; 6238e38aeffSJohn Baldwin sb->st_size = shmfd->shm_size; 6249c00bb91SKonstantin Belousov mtx_lock(&shm_timestamp_lock); 625510ea843SEd Schouten sb->st_atim = shmfd->shm_atime; 626510ea843SEd Schouten sb->st_ctim = shmfd->shm_ctime; 627510ea843SEd Schouten sb->st_mtim = shmfd->shm_mtime; 628510ea843SEd Schouten sb->st_birthtim = shmfd->shm_birthtime; 6299c00bb91SKonstantin Belousov sb->st_mode = S_IFREG | shmfd->shm_mode; /* XXX */ 6308e38aeffSJohn Baldwin sb->st_uid = shmfd->shm_uid; 6318e38aeffSJohn Baldwin sb->st_gid = shmfd->shm_gid; 6329c00bb91SKonstantin Belousov mtx_unlock(&shm_timestamp_lock); 633610a2b3cSJohn Baldwin sb->st_dev = shm_dev_ino; 634610a2b3cSJohn Baldwin sb->st_ino = shmfd->shm_ino; 635e4b77548SKonstantin Belousov sb->st_nlink = shmfd->shm_object->ref_count; 6360919f29dSKonstantin Belousov if (shm_largepage(shmfd)) { 637d301b358SKonstantin Belousov sb->st_blocks = shmfd->shm_object->size / 638d301b358SKonstantin Belousov (pagesizes[shmfd->shm_lp_psind] >> PAGE_SHIFT); 6390919f29dSKonstantin Belousov } else { 6400919f29dSKonstantin Belousov sb->st_blocks = shmfd->shm_pages; 6410919f29dSKonstantin Belousov } 6428e38aeffSJohn Baldwin 6438e38aeffSJohn Baldwin return (0); 6448e38aeffSJohn Baldwin } 6458e38aeffSJohn Baldwin 6468e38aeffSJohn Baldwin static int 6478e38aeffSJohn Baldwin shm_close(struct file *fp, struct thread *td) 6488e38aeffSJohn Baldwin { 6498e38aeffSJohn Baldwin struct shmfd *shmfd; 6508e38aeffSJohn Baldwin 6518e38aeffSJohn Baldwin shmfd = fp->f_data; 6528e38aeffSJohn Baldwin fp->f_data = NULL; 6538e38aeffSJohn Baldwin shm_drop(shmfd); 6548e38aeffSJohn Baldwin 6558e38aeffSJohn Baldwin return (0); 6568e38aeffSJohn Baldwin } 6578e38aeffSJohn Baldwin 658af755d3eSKyle Evans static int 6592d5603feSDavid Bright shm_copyin_path(struct thread *td, const char *userpath_in, char **path_out) { 6602d5603feSDavid Bright int error; 6612d5603feSDavid Bright char *path; 6622d5603feSDavid Bright const char *pr_path; 6632d5603feSDavid Bright size_t pr_pathlen; 6642d5603feSDavid Bright 6652d5603feSDavid Bright path = malloc(MAXPATHLEN, M_SHMFD, M_WAITOK); 6662d5603feSDavid Bright pr_path = td->td_ucred->cr_prison->pr_path; 6672d5603feSDavid Bright 6682d5603feSDavid Bright /* Construct a full pathname for jailed callers. */ 6692d5603feSDavid Bright pr_pathlen = strcmp(pr_path, "/") == 6702d5603feSDavid Bright 0 ? 0 : strlcpy(path, pr_path, MAXPATHLEN); 6712d5603feSDavid Bright error = copyinstr(userpath_in, path + pr_pathlen, 6722d5603feSDavid Bright MAXPATHLEN - pr_pathlen, NULL); 6732d5603feSDavid Bright if (error != 0) 6742d5603feSDavid Bright goto out; 6752d5603feSDavid Bright 6762d5603feSDavid Bright #ifdef KTRACE 6772d5603feSDavid Bright if (KTRPOINT(curthread, KTR_NAMEI)) 6782d5603feSDavid Bright ktrnamei(path); 6792d5603feSDavid Bright #endif 6802d5603feSDavid Bright 6812d5603feSDavid Bright /* Require paths to start with a '/' character. */ 6822d5603feSDavid Bright if (path[pr_pathlen] != '/') { 6832d5603feSDavid Bright error = EINVAL; 6842d5603feSDavid Bright goto out; 6852d5603feSDavid Bright } 6862d5603feSDavid Bright 6872d5603feSDavid Bright *path_out = path; 6882d5603feSDavid Bright 6892d5603feSDavid Bright out: 6902d5603feSDavid Bright if (error != 0) 6912d5603feSDavid Bright free(path, M_SHMFD); 6922d5603feSDavid Bright 6932d5603feSDavid Bright return (error); 6942d5603feSDavid Bright } 6952d5603feSDavid Bright 6962d5603feSDavid Bright static int 697454bc887SKa Ho Ng shm_partial_page_invalidate(vm_object_t object, vm_pindex_t idx, int base, 698454bc887SKa Ho Ng int end) 699454bc887SKa Ho Ng { 700454bc887SKa Ho Ng vm_page_t m; 701454bc887SKa Ho Ng int rv; 702454bc887SKa Ho Ng 703454bc887SKa Ho Ng VM_OBJECT_ASSERT_WLOCKED(object); 704454bc887SKa Ho Ng KASSERT(base >= 0, ("%s: base %d", __func__, base)); 705454bc887SKa Ho Ng KASSERT(end - base <= PAGE_SIZE, ("%s: base %d end %d", __func__, base, 706454bc887SKa Ho Ng end)); 707454bc887SKa Ho Ng 708454bc887SKa Ho Ng retry: 709454bc887SKa Ho Ng m = vm_page_grab(object, idx, VM_ALLOC_NOCREAT); 710454bc887SKa Ho Ng if (m != NULL) { 711454bc887SKa Ho Ng MPASS(vm_page_all_valid(m)); 712454bc887SKa Ho Ng } else if (vm_pager_has_page(object, idx, NULL, NULL)) { 713454bc887SKa Ho Ng m = vm_page_alloc(object, idx, 714454bc887SKa Ho Ng VM_ALLOC_NORMAL | VM_ALLOC_WAITFAIL); 715454bc887SKa Ho Ng if (m == NULL) 716454bc887SKa Ho Ng goto retry; 717454bc887SKa Ho Ng vm_object_pip_add(object, 1); 718454bc887SKa Ho Ng VM_OBJECT_WUNLOCK(object); 719454bc887SKa Ho Ng rv = vm_pager_get_pages(object, &m, 1, NULL, NULL); 720454bc887SKa Ho Ng VM_OBJECT_WLOCK(object); 721454bc887SKa Ho Ng vm_object_pip_wakeup(object); 722454bc887SKa Ho Ng if (rv == VM_PAGER_OK) { 723454bc887SKa Ho Ng /* 724454bc887SKa Ho Ng * Since the page was not resident, and therefore not 725454bc887SKa Ho Ng * recently accessed, immediately enqueue it for 726454bc887SKa Ho Ng * asynchronous laundering. The current operation is 727454bc887SKa Ho Ng * not regarded as an access. 728454bc887SKa Ho Ng */ 729454bc887SKa Ho Ng vm_page_launder(m); 730454bc887SKa Ho Ng } else { 731454bc887SKa Ho Ng vm_page_free(m); 732454bc887SKa Ho Ng VM_OBJECT_WUNLOCK(object); 733454bc887SKa Ho Ng return (EIO); 734454bc887SKa Ho Ng } 735454bc887SKa Ho Ng } 736454bc887SKa Ho Ng if (m != NULL) { 737454bc887SKa Ho Ng pmap_zero_page_area(m, base, end - base); 738454bc887SKa Ho Ng KASSERT(vm_page_all_valid(m), ("%s: page %p is invalid", 739454bc887SKa Ho Ng __func__, m)); 740454bc887SKa Ho Ng vm_page_set_dirty(m); 741454bc887SKa Ho Ng vm_page_xunbusy(m); 742454bc887SKa Ho Ng } 743454bc887SKa Ho Ng 744454bc887SKa Ho Ng return (0); 745454bc887SKa Ho Ng } 746454bc887SKa Ho Ng 747454bc887SKa Ho Ng static int 748af755d3eSKyle Evans shm_dotruncate_locked(struct shmfd *shmfd, off_t length, void *rl_cookie) 7498e38aeffSJohn Baldwin { 7508e38aeffSJohn Baldwin vm_object_t object; 751454bc887SKa Ho Ng vm_pindex_t nobjsize; 7523364c323SKonstantin Belousov vm_ooffset_t delta; 753454bc887SKa Ho Ng int base, error; 7548e38aeffSJohn Baldwin 7552a016de1SAlan Cox KASSERT(length >= 0, ("shm_dotruncate: length < 0")); 7568e38aeffSJohn Baldwin object = shmfd->shm_object; 757af755d3eSKyle Evans VM_OBJECT_ASSERT_WLOCKED(object); 758af755d3eSKyle Evans rangelock_cookie_assert(rl_cookie, RA_WLOCKED); 759af755d3eSKyle Evans if (length == shmfd->shm_size) 7603364c323SKonstantin Belousov return (0); 7618e38aeffSJohn Baldwin nobjsize = OFF_TO_IDX(length + PAGE_MASK); 7628e38aeffSJohn Baldwin 7638e38aeffSJohn Baldwin /* Are we shrinking? If so, trim the end. */ 7648e38aeffSJohn Baldwin if (length < shmfd->shm_size) { 765af755d3eSKyle Evans if ((shmfd->shm_seals & F_SEAL_SHRINK) != 0) 766af755d3eSKyle Evans return (EPERM); 767af755d3eSKyle Evans 768fb680e16SJohn Baldwin /* 769fb680e16SJohn Baldwin * Disallow any requests to shrink the size if this 770fb680e16SJohn Baldwin * object is mapped into the kernel. 771fb680e16SJohn Baldwin */ 772af755d3eSKyle Evans if (shmfd->shm_kmappings > 0) 773fb680e16SJohn Baldwin return (EBUSY); 7742971897dSAlan Cox 7752971897dSAlan Cox /* 7762971897dSAlan Cox * Zero the truncated part of the last page. 7772971897dSAlan Cox */ 7782971897dSAlan Cox base = length & PAGE_MASK; 7792971897dSAlan Cox if (base != 0) { 780454bc887SKa Ho Ng error = shm_partial_page_invalidate(object, 781454bc887SKa Ho Ng OFF_TO_IDX(length), base, PAGE_SIZE); 782454bc887SKa Ho Ng if (error) 783454bc887SKa Ho Ng return (error); 7842971897dSAlan Cox } 7852a016de1SAlan Cox delta = IDX_TO_OFF(object->size - nobjsize); 7863364c323SKonstantin Belousov 7878e38aeffSJohn Baldwin if (nobjsize < object->size) 7888e38aeffSJohn Baldwin vm_object_page_remove(object, nobjsize, object->size, 7896bbee8e2SAlan Cox 0); 7908e38aeffSJohn Baldwin 7913364c323SKonstantin Belousov /* Free the swap accounted for shm */ 792ef694c1aSEdward Tomasz Napierala swap_release_by_cred(delta, object->cred); 7933364c323SKonstantin Belousov object->charge -= delta; 7943364c323SKonstantin Belousov } else { 795af755d3eSKyle Evans if ((shmfd->shm_seals & F_SEAL_GROW) != 0) 796af755d3eSKyle Evans return (EPERM); 797af755d3eSKyle Evans 7982a016de1SAlan Cox /* Try to reserve additional swap space. */ 7992a016de1SAlan Cox delta = IDX_TO_OFF(nobjsize - object->size); 800af755d3eSKyle Evans if (!swap_reserve_by_cred(delta, object->cred)) 8013364c323SKonstantin Belousov return (ENOMEM); 8023364c323SKonstantin Belousov object->charge += delta; 8038e38aeffSJohn Baldwin } 8048e38aeffSJohn Baldwin shmfd->shm_size = length; 8058e38aeffSJohn Baldwin mtx_lock(&shm_timestamp_lock); 8068e38aeffSJohn Baldwin vfs_timestamp(&shmfd->shm_ctime); 8078e38aeffSJohn Baldwin shmfd->shm_mtime = shmfd->shm_ctime; 8088e38aeffSJohn Baldwin mtx_unlock(&shm_timestamp_lock); 8098e38aeffSJohn Baldwin object->size = nobjsize; 8103364c323SKonstantin Belousov return (0); 8118e38aeffSJohn Baldwin } 8128e38aeffSJohn Baldwin 813d301b358SKonstantin Belousov static int 814d301b358SKonstantin Belousov shm_dotruncate_largepage(struct shmfd *shmfd, off_t length, void *rl_cookie) 815d301b358SKonstantin Belousov { 816d301b358SKonstantin Belousov vm_object_t object; 817d301b358SKonstantin Belousov vm_page_t m; 8183b5331ddSKonstantin Belousov vm_pindex_t newobjsz; 8193b5331ddSKonstantin Belousov vm_pindex_t oldobjsz __unused; 820d301b358SKonstantin Belousov int aflags, error, i, psind, try; 821d301b358SKonstantin Belousov 822d301b358SKonstantin Belousov KASSERT(length >= 0, ("shm_dotruncate: length < 0")); 823d301b358SKonstantin Belousov object = shmfd->shm_object; 824d301b358SKonstantin Belousov VM_OBJECT_ASSERT_WLOCKED(object); 825d301b358SKonstantin Belousov rangelock_cookie_assert(rl_cookie, RA_WLOCKED); 826d301b358SKonstantin Belousov 827d301b358SKonstantin Belousov oldobjsz = object->size; 828d301b358SKonstantin Belousov newobjsz = OFF_TO_IDX(length); 829d301b358SKonstantin Belousov if (length == shmfd->shm_size) 830d301b358SKonstantin Belousov return (0); 831d301b358SKonstantin Belousov psind = shmfd->shm_lp_psind; 832d301b358SKonstantin Belousov if (psind == 0 && length != 0) 833d301b358SKonstantin Belousov return (EINVAL); 834d301b358SKonstantin Belousov if ((length & (pagesizes[psind] - 1)) != 0) 835d301b358SKonstantin Belousov return (EINVAL); 836d301b358SKonstantin Belousov 837d301b358SKonstantin Belousov if (length < shmfd->shm_size) { 838d301b358SKonstantin Belousov if ((shmfd->shm_seals & F_SEAL_SHRINK) != 0) 839d301b358SKonstantin Belousov return (EPERM); 840d301b358SKonstantin Belousov if (shmfd->shm_kmappings > 0) 841d301b358SKonstantin Belousov return (EBUSY); 842d301b358SKonstantin Belousov return (ENOTSUP); /* Pages are unmanaged. */ 843d301b358SKonstantin Belousov #if 0 844d301b358SKonstantin Belousov vm_object_page_remove(object, newobjsz, oldobjsz, 0); 845d301b358SKonstantin Belousov object->size = newobjsz; 846d301b358SKonstantin Belousov shmfd->shm_size = length; 847d301b358SKonstantin Belousov return (0); 848d301b358SKonstantin Belousov #endif 849d301b358SKonstantin Belousov } 850d301b358SKonstantin Belousov 85179783634SKonstantin Belousov if ((shmfd->shm_seals & F_SEAL_GROW) != 0) 85279783634SKonstantin Belousov return (EPERM); 85379783634SKonstantin Belousov 854d301b358SKonstantin Belousov aflags = VM_ALLOC_NORMAL | VM_ALLOC_ZERO; 855d301b358SKonstantin Belousov if (shmfd->shm_lp_alloc_policy == SHM_LARGEPAGE_ALLOC_NOWAIT) 856d301b358SKonstantin Belousov aflags |= VM_ALLOC_WAITFAIL; 857d301b358SKonstantin Belousov try = 0; 858d301b358SKonstantin Belousov 859d301b358SKonstantin Belousov /* 860d301b358SKonstantin Belousov * Extend shmfd and object, keeping all already fully 861d301b358SKonstantin Belousov * allocated large pages intact even on error, because dropped 862d301b358SKonstantin Belousov * object lock might allowed mapping of them. 863d301b358SKonstantin Belousov */ 864d301b358SKonstantin Belousov while (object->size < newobjsz) { 865d301b358SKonstantin Belousov m = vm_page_alloc_contig(object, object->size, aflags, 866d301b358SKonstantin Belousov pagesizes[psind] / PAGE_SIZE, 0, ~0, 867d301b358SKonstantin Belousov pagesizes[psind], 0, 868d301b358SKonstantin Belousov VM_MEMATTR_DEFAULT); 869d301b358SKonstantin Belousov if (m == NULL) { 870d301b358SKonstantin Belousov VM_OBJECT_WUNLOCK(object); 871d301b358SKonstantin Belousov if (shmfd->shm_lp_alloc_policy == 872d301b358SKonstantin Belousov SHM_LARGEPAGE_ALLOC_NOWAIT || 873d301b358SKonstantin Belousov (shmfd->shm_lp_alloc_policy == 874d301b358SKonstantin Belousov SHM_LARGEPAGE_ALLOC_DEFAULT && 875d301b358SKonstantin Belousov try >= largepage_reclaim_tries)) { 876d301b358SKonstantin Belousov VM_OBJECT_WLOCK(object); 877d301b358SKonstantin Belousov return (ENOMEM); 878d301b358SKonstantin Belousov } 879d301b358SKonstantin Belousov error = vm_page_reclaim_contig(aflags, 880d301b358SKonstantin Belousov pagesizes[psind] / PAGE_SIZE, 0, ~0, 8812619c5ccSJason A. Harmening pagesizes[psind], 0); 8822619c5ccSJason A. Harmening if (error == ENOMEM) 8832619c5ccSJason A. Harmening error = vm_wait_intr(object); 884d301b358SKonstantin Belousov if (error != 0) { 885d301b358SKonstantin Belousov VM_OBJECT_WLOCK(object); 886d301b358SKonstantin Belousov return (error); 887d301b358SKonstantin Belousov } 888d301b358SKonstantin Belousov try++; 889d301b358SKonstantin Belousov VM_OBJECT_WLOCK(object); 890d301b358SKonstantin Belousov continue; 891d301b358SKonstantin Belousov } 892d301b358SKonstantin Belousov try = 0; 893d301b358SKonstantin Belousov for (i = 0; i < pagesizes[psind] / PAGE_SIZE; i++) { 894d301b358SKonstantin Belousov if ((m[i].flags & PG_ZERO) == 0) 895d301b358SKonstantin Belousov pmap_zero_page(&m[i]); 896d301b358SKonstantin Belousov vm_page_valid(&m[i]); 897d301b358SKonstantin Belousov vm_page_xunbusy(&m[i]); 898d301b358SKonstantin Belousov } 899d301b358SKonstantin Belousov object->size += OFF_TO_IDX(pagesizes[psind]); 900d301b358SKonstantin Belousov shmfd->shm_size += pagesizes[psind]; 901d301b358SKonstantin Belousov atomic_add_long(&count_largepages[psind], 1); 902d301b358SKonstantin Belousov vm_wire_add(atop(pagesizes[psind])); 903d301b358SKonstantin Belousov } 904d301b358SKonstantin Belousov return (0); 905d301b358SKonstantin Belousov } 906d301b358SKonstantin Belousov 907d301b358SKonstantin Belousov static int 908d301b358SKonstantin Belousov shm_dotruncate_cookie(struct shmfd *shmfd, off_t length, void *rl_cookie) 909d301b358SKonstantin Belousov { 910d301b358SKonstantin Belousov int error; 911d301b358SKonstantin Belousov 912d301b358SKonstantin Belousov VM_OBJECT_WLOCK(shmfd->shm_object); 913d301b358SKonstantin Belousov error = shm_largepage(shmfd) ? shm_dotruncate_largepage(shmfd, 914d301b358SKonstantin Belousov length, rl_cookie) : shm_dotruncate_locked(shmfd, length, 915d301b358SKonstantin Belousov rl_cookie); 916d301b358SKonstantin Belousov VM_OBJECT_WUNLOCK(shmfd->shm_object); 917d301b358SKonstantin Belousov return (error); 918d301b358SKonstantin Belousov } 919d301b358SKonstantin Belousov 920af755d3eSKyle Evans int 921af755d3eSKyle Evans shm_dotruncate(struct shmfd *shmfd, off_t length) 922af755d3eSKyle Evans { 923af755d3eSKyle Evans void *rl_cookie; 924af755d3eSKyle Evans int error; 925af755d3eSKyle Evans 9266df6facfSKonstantin Belousov rl_cookie = shm_rangelock_wlock(shmfd, 0, OFF_MAX); 927d301b358SKonstantin Belousov error = shm_dotruncate_cookie(shmfd, length, rl_cookie); 9286df6facfSKonstantin Belousov shm_rangelock_unlock(shmfd, rl_cookie); 929af755d3eSKyle Evans return (error); 930af755d3eSKyle Evans } 931af755d3eSKyle Evans 9328e38aeffSJohn Baldwin /* 9338e38aeffSJohn Baldwin * shmfd object management including creation and reference counting 9348e38aeffSJohn Baldwin * routines. 9358e38aeffSJohn Baldwin */ 9361bdbd705SKonstantin Belousov struct shmfd * 937d301b358SKonstantin Belousov shm_alloc(struct ucred *ucred, mode_t mode, bool largepage) 9388e38aeffSJohn Baldwin { 9398e38aeffSJohn Baldwin struct shmfd *shmfd; 9400919f29dSKonstantin Belousov vm_object_t obj; 9418e38aeffSJohn Baldwin 9428e38aeffSJohn Baldwin shmfd = malloc(sizeof(*shmfd), M_SHMFD, M_WAITOK | M_ZERO); 9438e38aeffSJohn Baldwin shmfd->shm_size = 0; 9448e38aeffSJohn Baldwin shmfd->shm_uid = ucred->cr_uid; 9458e38aeffSJohn Baldwin shmfd->shm_gid = ucred->cr_gid; 9468e38aeffSJohn Baldwin shmfd->shm_mode = mode; 947d301b358SKonstantin Belousov if (largepage) { 948*a10870ecSKonstantin Belousov obj = shmfd->shm_object = phys_pager_allocate(NULL, 949d301b358SKonstantin Belousov &shm_largepage_phys_ops, NULL, shmfd->shm_size, 950d301b358SKonstantin Belousov VM_PROT_DEFAULT, 0, ucred); 951*a10870ecSKonstantin Belousov VM_OBJECT_WLOCK(shmfd->shm_object); 952*a10870ecSKonstantin Belousov obj->un_pager.phys.phys_priv = shmfd; 953*a10870ecSKonstantin Belousov vm_object_set_flag(obj, OBJ_POSIXSHM); 954*a10870ecSKonstantin Belousov VM_OBJECT_WUNLOCK(shmfd->shm_object); 955d301b358SKonstantin Belousov shmfd->shm_lp_alloc_policy = SHM_LARGEPAGE_ALLOC_DEFAULT; 956d301b358SKonstantin Belousov } else { 9570919f29dSKonstantin Belousov obj = vm_pager_allocate(shmfd_pager_type, NULL, 9583364c323SKonstantin Belousov shmfd->shm_size, VM_PROT_DEFAULT, 0, ucred); 9590919f29dSKonstantin Belousov VM_OBJECT_WLOCK(obj); 9600919f29dSKonstantin Belousov obj->un_pager.swp.swp_priv = shmfd; 961*a10870ecSKonstantin Belousov vm_object_set_flag(obj, OBJ_POSIXSHM); 9620919f29dSKonstantin Belousov VM_OBJECT_WUNLOCK(obj); 9630919f29dSKonstantin Belousov shmfd->shm_object = obj; 964d301b358SKonstantin Belousov } 9658e38aeffSJohn Baldwin KASSERT(shmfd->shm_object != NULL, ("shm_create: vm_pager_allocate")); 9668e38aeffSJohn Baldwin vfs_timestamp(&shmfd->shm_birthtime); 9678e38aeffSJohn Baldwin shmfd->shm_atime = shmfd->shm_mtime = shmfd->shm_ctime = 9688e38aeffSJohn Baldwin shmfd->shm_birthtime; 9697883ce1fSMateusz Guzik shmfd->shm_ino = alloc_unr64(&shm_ino_unr); 9708e38aeffSJohn Baldwin refcount_init(&shmfd->shm_refs, 1); 971940cb0e2SKonstantin Belousov mtx_init(&shmfd->shm_mtx, "shmrl", NULL, MTX_DEF); 972940cb0e2SKonstantin Belousov rangelock_init(&shmfd->shm_rl); 9738e38aeffSJohn Baldwin #ifdef MAC 9748e38aeffSJohn Baldwin mac_posixshm_init(shmfd); 9758e38aeffSJohn Baldwin mac_posixshm_create(ucred, shmfd); 9768e38aeffSJohn Baldwin #endif 9778e38aeffSJohn Baldwin 9788e38aeffSJohn Baldwin return (shmfd); 9798e38aeffSJohn Baldwin } 9808e38aeffSJohn Baldwin 9811bdbd705SKonstantin Belousov struct shmfd * 9828e38aeffSJohn Baldwin shm_hold(struct shmfd *shmfd) 9838e38aeffSJohn Baldwin { 9848e38aeffSJohn Baldwin 9858e38aeffSJohn Baldwin refcount_acquire(&shmfd->shm_refs); 9868e38aeffSJohn Baldwin return (shmfd); 9878e38aeffSJohn Baldwin } 9888e38aeffSJohn Baldwin 9891bdbd705SKonstantin Belousov void 9908e38aeffSJohn Baldwin shm_drop(struct shmfd *shmfd) 9918e38aeffSJohn Baldwin { 9920919f29dSKonstantin Belousov vm_object_t obj; 9938e38aeffSJohn Baldwin 9948e38aeffSJohn Baldwin if (refcount_release(&shmfd->shm_refs)) { 9958e38aeffSJohn Baldwin #ifdef MAC 9968e38aeffSJohn Baldwin mac_posixshm_destroy(shmfd); 9978e38aeffSJohn Baldwin #endif 998940cb0e2SKonstantin Belousov rangelock_destroy(&shmfd->shm_rl); 999940cb0e2SKonstantin Belousov mtx_destroy(&shmfd->shm_mtx); 10000919f29dSKonstantin Belousov obj = shmfd->shm_object; 10010919f29dSKonstantin Belousov VM_OBJECT_WLOCK(obj); 1002*a10870ecSKonstantin Belousov if (shm_largepage(shmfd)) 1003*a10870ecSKonstantin Belousov obj->un_pager.phys.phys_priv = NULL; 1004*a10870ecSKonstantin Belousov else 10050919f29dSKonstantin Belousov obj->un_pager.swp.swp_priv = NULL; 10060919f29dSKonstantin Belousov VM_OBJECT_WUNLOCK(obj); 10070919f29dSKonstantin Belousov vm_object_deallocate(obj); 10088e38aeffSJohn Baldwin free(shmfd, M_SHMFD); 10098e38aeffSJohn Baldwin } 10108e38aeffSJohn Baldwin } 10118e38aeffSJohn Baldwin 10128e38aeffSJohn Baldwin /* 10138e38aeffSJohn Baldwin * Determine if the credentials have sufficient permissions for a 10148e38aeffSJohn Baldwin * specified combination of FREAD and FWRITE. 10158e38aeffSJohn Baldwin */ 10161bdbd705SKonstantin Belousov int 10178e38aeffSJohn Baldwin shm_access(struct shmfd *shmfd, struct ucred *ucred, int flags) 10188e38aeffSJohn Baldwin { 101915bc6b2bSEdward Tomasz Napierala accmode_t accmode; 10209c00bb91SKonstantin Belousov int error; 10218e38aeffSJohn Baldwin 102215bc6b2bSEdward Tomasz Napierala accmode = 0; 10238e38aeffSJohn Baldwin if (flags & FREAD) 102415bc6b2bSEdward Tomasz Napierala accmode |= VREAD; 10258e38aeffSJohn Baldwin if (flags & FWRITE) 102615bc6b2bSEdward Tomasz Napierala accmode |= VWRITE; 10279c00bb91SKonstantin Belousov mtx_lock(&shm_timestamp_lock); 10289c00bb91SKonstantin Belousov error = vaccess(VREG, shmfd->shm_mode, shmfd->shm_uid, shmfd->shm_gid, 1029d292b194SMateusz Guzik accmode, ucred); 10309c00bb91SKonstantin Belousov mtx_unlock(&shm_timestamp_lock); 10319c00bb91SKonstantin Belousov return (error); 10328e38aeffSJohn Baldwin } 10338e38aeffSJohn Baldwin 10348e38aeffSJohn Baldwin static void 1035610a2b3cSJohn Baldwin shm_init(void *arg) 10368e38aeffSJohn Baldwin { 1037d301b358SKonstantin Belousov char name[32]; 1038d301b358SKonstantin Belousov int i; 10398e38aeffSJohn Baldwin 10408e38aeffSJohn Baldwin mtx_init(&shm_timestamp_lock, "shm timestamps", NULL, MTX_DEF); 10418e38aeffSJohn Baldwin sx_init(&shm_dict_lock, "shm dictionary"); 10428e38aeffSJohn Baldwin shm_dictionary = hashinit(1024, M_SHMFD, &shm_hash); 10437883ce1fSMateusz Guzik new_unrhdr64(&shm_ino_unr, 1); 1044610a2b3cSJohn Baldwin shm_dev_ino = devfs_alloc_cdp_inode(); 1045610a2b3cSJohn Baldwin KASSERT(shm_dev_ino > 0, ("shm dev inode not initialized")); 10460919f29dSKonstantin Belousov shmfd_pager_type = vm_pager_alloc_dyn_type(&shm_swap_pager_ops, 10470919f29dSKonstantin Belousov OBJT_SWAP); 10480919f29dSKonstantin Belousov MPASS(shmfd_pager_type != -1); 1049d301b358SKonstantin Belousov 1050d301b358SKonstantin Belousov for (i = 1; i < MAXPAGESIZES; i++) { 1051d301b358SKonstantin Belousov if (pagesizes[i] == 0) 1052d301b358SKonstantin Belousov break; 1053d301b358SKonstantin Belousov #define M (1024 * 1024) 1054d301b358SKonstantin Belousov #define G (1024 * M) 1055d301b358SKonstantin Belousov if (pagesizes[i] >= G) 1056d301b358SKonstantin Belousov snprintf(name, sizeof(name), "%luG", pagesizes[i] / G); 1057d301b358SKonstantin Belousov else if (pagesizes[i] >= M) 1058d301b358SKonstantin Belousov snprintf(name, sizeof(name), "%luM", pagesizes[i] / M); 1059d301b358SKonstantin Belousov else 1060d301b358SKonstantin Belousov snprintf(name, sizeof(name), "%lu", pagesizes[i]); 1061d301b358SKonstantin Belousov #undef G 1062d301b358SKonstantin Belousov #undef M 1063d301b358SKonstantin Belousov SYSCTL_ADD_ULONG(NULL, SYSCTL_STATIC_CHILDREN(_vm_largepages), 1064d301b358SKonstantin Belousov OID_AUTO, name, CTLFLAG_RD, &count_largepages[i], 1065d301b358SKonstantin Belousov "number of non-transient largepages allocated"); 1066d301b358SKonstantin Belousov } 10678e38aeffSJohn Baldwin } 1068610a2b3cSJohn Baldwin SYSINIT(shm_init, SI_SUB_SYSV_SHM, SI_ORDER_ANY, shm_init, NULL); 10698e38aeffSJohn Baldwin 107025f44824SKonstantin Belousov /* 10717060da62SJamie Gritton * Remove all shared memory objects that belong to a prison. 10727060da62SJamie Gritton */ 10737060da62SJamie Gritton void 10747060da62SJamie Gritton shm_remove_prison(struct prison *pr) 10757060da62SJamie Gritton { 10767060da62SJamie Gritton struct shm_mapping *shmm, *tshmm; 10777060da62SJamie Gritton u_long i; 10787060da62SJamie Gritton 10797060da62SJamie Gritton sx_xlock(&shm_dict_lock); 10807060da62SJamie Gritton for (i = 0; i < shm_hash + 1; i++) { 10817060da62SJamie Gritton LIST_FOREACH_SAFE(shmm, &shm_dictionary[i], sm_link, tshmm) { 10827060da62SJamie Gritton if (shmm->sm_shmfd->shm_object->cred && 10837060da62SJamie Gritton shmm->sm_shmfd->shm_object->cred->cr_prison == pr) 10847060da62SJamie Gritton shm_doremove(shmm); 10857060da62SJamie Gritton } 10867060da62SJamie Gritton } 10877060da62SJamie Gritton sx_xunlock(&shm_dict_lock); 10887060da62SJamie Gritton } 10897060da62SJamie Gritton 10907060da62SJamie Gritton /* 109125f44824SKonstantin Belousov * Dictionary management. We maintain an in-kernel dictionary to map 109225f44824SKonstantin Belousov * paths to shmfd objects. We use the FNV hash on the path to store 109325f44824SKonstantin Belousov * the mappings in a hash table. 109425f44824SKonstantin Belousov */ 10958e38aeffSJohn Baldwin static struct shmfd * 10968e38aeffSJohn Baldwin shm_lookup(char *path, Fnv32_t fnv) 10978e38aeffSJohn Baldwin { 10988e38aeffSJohn Baldwin struct shm_mapping *map; 10998e38aeffSJohn Baldwin 11008e38aeffSJohn Baldwin LIST_FOREACH(map, SHM_HASH(fnv), sm_link) { 11018e38aeffSJohn Baldwin if (map->sm_fnv != fnv) 11028e38aeffSJohn Baldwin continue; 11038e38aeffSJohn Baldwin if (strcmp(map->sm_path, path) == 0) 11048e38aeffSJohn Baldwin return (map->sm_shmfd); 11058e38aeffSJohn Baldwin } 11068e38aeffSJohn Baldwin 11078e38aeffSJohn Baldwin return (NULL); 11088e38aeffSJohn Baldwin } 11098e38aeffSJohn Baldwin 11108e38aeffSJohn Baldwin static void 11118e38aeffSJohn Baldwin shm_insert(char *path, Fnv32_t fnv, struct shmfd *shmfd) 11128e38aeffSJohn Baldwin { 11138e38aeffSJohn Baldwin struct shm_mapping *map; 11148e38aeffSJohn Baldwin 11158e38aeffSJohn Baldwin map = malloc(sizeof(struct shm_mapping), M_SHMFD, M_WAITOK); 11168e38aeffSJohn Baldwin map->sm_path = path; 11178e38aeffSJohn Baldwin map->sm_fnv = fnv; 11188e38aeffSJohn Baldwin map->sm_shmfd = shm_hold(shmfd); 1119e506e182SJohn Baldwin shmfd->shm_path = path; 11208e38aeffSJohn Baldwin LIST_INSERT_HEAD(SHM_HASH(fnv), map, sm_link); 11218e38aeffSJohn Baldwin } 11228e38aeffSJohn Baldwin 11238e38aeffSJohn Baldwin static int 11248e38aeffSJohn Baldwin shm_remove(char *path, Fnv32_t fnv, struct ucred *ucred) 11258e38aeffSJohn Baldwin { 11268e38aeffSJohn Baldwin struct shm_mapping *map; 11278e38aeffSJohn Baldwin int error; 11288e38aeffSJohn Baldwin 11298e38aeffSJohn Baldwin LIST_FOREACH(map, SHM_HASH(fnv), sm_link) { 11308e38aeffSJohn Baldwin if (map->sm_fnv != fnv) 11318e38aeffSJohn Baldwin continue; 11328e38aeffSJohn Baldwin if (strcmp(map->sm_path, path) == 0) { 11338e38aeffSJohn Baldwin #ifdef MAC 11348e38aeffSJohn Baldwin error = mac_posixshm_check_unlink(ucred, map->sm_shmfd); 11358e38aeffSJohn Baldwin if (error) 11368e38aeffSJohn Baldwin return (error); 11378e38aeffSJohn Baldwin #endif 11388e38aeffSJohn Baldwin error = shm_access(map->sm_shmfd, ucred, 11398e38aeffSJohn Baldwin FREAD | FWRITE); 11408e38aeffSJohn Baldwin if (error) 11418e38aeffSJohn Baldwin return (error); 11427060da62SJamie Gritton shm_doremove(map); 11438e38aeffSJohn Baldwin return (0); 11448e38aeffSJohn Baldwin } 11458e38aeffSJohn Baldwin } 11468e38aeffSJohn Baldwin 11478e38aeffSJohn Baldwin return (ENOENT); 11488e38aeffSJohn Baldwin } 11498e38aeffSJohn Baldwin 11507060da62SJamie Gritton static void 11517060da62SJamie Gritton shm_doremove(struct shm_mapping *map) 11527060da62SJamie Gritton { 11537060da62SJamie Gritton map->sm_shmfd->shm_path = NULL; 11547060da62SJamie Gritton LIST_REMOVE(map, sm_link); 11557060da62SJamie Gritton shm_drop(map->sm_shmfd); 11567060da62SJamie Gritton free(map->sm_path, M_SHMFD); 11577060da62SJamie Gritton free(map, M_SHMFD); 11587060da62SJamie Gritton } 11597060da62SJamie Gritton 11608e38aeffSJohn Baldwin int 1161535b1df9SKyle Evans kern_shm_open2(struct thread *td, const char *userpath, int flags, mode_t mode, 1162535b1df9SKyle Evans int shmflags, struct filecaps *fcaps, const char *name __unused) 11638e38aeffSJohn Baldwin { 116485078b85SConrad Meyer struct pwddesc *pdp; 11658e38aeffSJohn Baldwin struct shmfd *shmfd; 11668e38aeffSJohn Baldwin struct file *fp; 11678e38aeffSJohn Baldwin char *path; 11680cd95859SKyle Evans void *rl_cookie; 11698e38aeffSJohn Baldwin Fnv32_t fnv; 11708e38aeffSJohn Baldwin mode_t cmode; 1171535b1df9SKyle Evans int error, fd, initial_seals; 1172d301b358SKonstantin Belousov bool largepage; 1173535b1df9SKyle Evans 1174d301b358SKonstantin Belousov if ((shmflags & ~(SHM_ALLOW_SEALING | SHM_GROW_ON_WRITE | 1175d301b358SKonstantin Belousov SHM_LARGEPAGE)) != 0) 1176535b1df9SKyle Evans return (EINVAL); 1177535b1df9SKyle Evans 1178535b1df9SKyle Evans initial_seals = F_SEAL_SEAL; 1179535b1df9SKyle Evans if ((shmflags & SHM_ALLOW_SEALING) != 0) 1180535b1df9SKyle Evans initial_seals &= ~F_SEAL_SEAL; 11818e38aeffSJohn Baldwin 118215bcf785SRobert Watson AUDIT_ARG_FFLAGS(flags); 118315bcf785SRobert Watson AUDIT_ARG_MODE(mode); 118415bcf785SRobert Watson 11857ee1b208SEd Schouten if ((flags & O_ACCMODE) != O_RDONLY && (flags & O_ACCMODE) != O_RDWR) 11868e38aeffSJohn Baldwin return (EINVAL); 11878e38aeffSJohn Baldwin 11887ee1b208SEd Schouten if ((flags & ~(O_ACCMODE | O_CREAT | O_EXCL | O_TRUNC | O_CLOEXEC)) != 0) 11898e38aeffSJohn Baldwin return (EINVAL); 11908e38aeffSJohn Baldwin 1191d301b358SKonstantin Belousov largepage = (shmflags & SHM_LARGEPAGE) != 0; 119278257765SMark Johnston if (largepage && !PMAP_HAS_LARGEPAGES) 1193d301b358SKonstantin Belousov return (ENOTTY); 1194d301b358SKonstantin Belousov 11950cd95859SKyle Evans /* 11960cd95859SKyle Evans * Currently only F_SEAL_SEAL may be set when creating or opening shmfd. 11970cd95859SKyle Evans * If the decision is made later to allow additional seals, care must be 11980cd95859SKyle Evans * taken below to ensure that the seals are properly set if the shmfd 11990cd95859SKyle Evans * already existed -- this currently assumes that only F_SEAL_SEAL can 12000cd95859SKyle Evans * be set and doesn't take further precautions to ensure the validity of 12010cd95859SKyle Evans * the seals being added with respect to current mappings. 12020cd95859SKyle Evans */ 12030cd95859SKyle Evans if ((initial_seals & ~F_SEAL_SEAL) != 0) 12040cd95859SKyle Evans return (EINVAL); 12050cd95859SKyle Evans 1206b112232eSJake Freeland if (userpath != SHM_ANON) { 1207b112232eSJake Freeland error = shm_copyin_path(td, userpath, &path); 1208b112232eSJake Freeland if (error != 0) 1209b112232eSJake Freeland return (error); 1210b112232eSJake Freeland 1211b112232eSJake Freeland #ifdef CAPABILITY_MODE 1212b112232eSJake Freeland /* 1213b112232eSJake Freeland * shm_open(2) is only allowed for anonymous objects. 1214b112232eSJake Freeland */ 1215b112232eSJake Freeland if (CAP_TRACING(td)) 1216b112232eSJake Freeland ktrcapfail(CAPFAIL_NAMEI, path); 1217b112232eSJake Freeland if (IN_CAPABILITY_MODE(td)) { 1218b112232eSJake Freeland free(path, M_SHMFD); 1219b112232eSJake Freeland return (ECAPMODE); 1220b112232eSJake Freeland } 1221b112232eSJake Freeland #endif 1222b112232eSJake Freeland 1223b112232eSJake Freeland AUDIT_ARG_UPATH1_CANON(path); 1224e411b227SMark Johnston } else { 1225e411b227SMark Johnston path = NULL; 1226b112232eSJake Freeland } 1227b112232eSJake Freeland 122885078b85SConrad Meyer pdp = td->td_proc->p_pd; 122985078b85SConrad Meyer cmode = (mode & ~pdp->pd_cmask) & ACCESSPERMS; 12308e38aeffSJohn Baldwin 1231b5a7ac99SKyle Evans /* 1232b5a7ac99SKyle Evans * shm_open(2) created shm should always have O_CLOEXEC set, as mandated 1233b5a7ac99SKyle Evans * by POSIX. We allow it to be unset here so that an in-kernel 1234b5a7ac99SKyle Evans * interface may be written as a thin layer around shm, optionally not 1235b5a7ac99SKyle Evans * setting CLOEXEC. For shm_open(2), O_CLOEXEC is set unconditionally 1236b5a7ac99SKyle Evans * in sys_shm_open() to keep this implementation compliant. 1237b5a7ac99SKyle Evans */ 1238b5a7ac99SKyle Evans error = falloc_caps(td, &fp, &fd, flags & O_CLOEXEC, fcaps); 1239b112232eSJake Freeland if (error) { 1240b112232eSJake Freeland free(path, M_SHMFD); 12418e38aeffSJohn Baldwin return (error); 1242b112232eSJake Freeland } 12438e38aeffSJohn Baldwin 12448e38aeffSJohn Baldwin /* A SHM_ANON path pointer creates an anonymous object. */ 12457ee1b208SEd Schouten if (userpath == SHM_ANON) { 12468e38aeffSJohn Baldwin /* A read-only anonymous object is pointless. */ 12477ee1b208SEd Schouten if ((flags & O_ACCMODE) == O_RDONLY) { 124890f54cbfSMateusz Guzik fdclose(td, fp, fd); 12498e38aeffSJohn Baldwin fdrop(fp, td); 12508e38aeffSJohn Baldwin return (EINVAL); 12518e38aeffSJohn Baldwin } 1252d301b358SKonstantin Belousov shmfd = shm_alloc(td->td_ucred, cmode, largepage); 12530cd95859SKyle Evans shmfd->shm_seals = initial_seals; 12545dd47b52SKyle Evans shmfd->shm_flags = shmflags; 12558e38aeffSJohn Baldwin } else { 12568e38aeffSJohn Baldwin fnv = fnv_32_str(path, FNV1_32_INIT); 12578e38aeffSJohn Baldwin sx_xlock(&shm_dict_lock); 12588e38aeffSJohn Baldwin shmfd = shm_lookup(path, fnv); 12598e38aeffSJohn Baldwin if (shmfd == NULL) { 12608e38aeffSJohn Baldwin /* Object does not yet exist, create it if requested. */ 12617ee1b208SEd Schouten if (flags & O_CREAT) { 12629b6dd12eSRobert Watson #ifdef MAC 12639b6dd12eSRobert Watson error = mac_posixshm_check_create(td->td_ucred, 12649b6dd12eSRobert Watson path); 12659b6dd12eSRobert Watson if (error == 0) { 12669b6dd12eSRobert Watson #endif 1267d301b358SKonstantin Belousov shmfd = shm_alloc(td->td_ucred, cmode, 1268d301b358SKonstantin Belousov largepage); 12690cd95859SKyle Evans shmfd->shm_seals = initial_seals; 12705dd47b52SKyle Evans shmfd->shm_flags = shmflags; 12718e38aeffSJohn Baldwin shm_insert(path, fnv, shmfd); 12729b6dd12eSRobert Watson #ifdef MAC 12739b6dd12eSRobert Watson } 12749b6dd12eSRobert Watson #endif 12758e38aeffSJohn Baldwin } else { 12768e38aeffSJohn Baldwin free(path, M_SHMFD); 12778e38aeffSJohn Baldwin error = ENOENT; 12788e38aeffSJohn Baldwin } 12798e38aeffSJohn Baldwin } else { 12806df6facfSKonstantin Belousov rl_cookie = shm_rangelock_wlock(shmfd, 0, OFF_MAX); 12810cd95859SKyle Evans 12820cd95859SKyle Evans /* 12830cd95859SKyle Evans * kern_shm_open() likely shouldn't ever error out on 12840cd95859SKyle Evans * trying to set a seal that already exists, unlike 12850cd95859SKyle Evans * F_ADD_SEALS. This would break terribly as 12860cd95859SKyle Evans * shm_open(2) actually sets F_SEAL_SEAL to maintain 12870cd95859SKyle Evans * historical behavior where the underlying file could 12880cd95859SKyle Evans * not be sealed. 12890cd95859SKyle Evans */ 12900cd95859SKyle Evans initial_seals &= ~shmfd->shm_seals; 12910cd95859SKyle Evans 12928e38aeffSJohn Baldwin /* 12938e38aeffSJohn Baldwin * Object already exists, obtain a new 12948e38aeffSJohn Baldwin * reference if requested and permitted. 12958e38aeffSJohn Baldwin */ 12968e38aeffSJohn Baldwin free(path, M_SHMFD); 12970cd95859SKyle Evans 12980cd95859SKyle Evans /* 12990cd95859SKyle Evans * initial_seals can't set additional seals if we've 13000cd95859SKyle Evans * already been set F_SEAL_SEAL. If F_SEAL_SEAL is set, 13010cd95859SKyle Evans * then we've already removed that one from 13020cd95859SKyle Evans * initial_seals. This is currently redundant as we 13030cd95859SKyle Evans * only allow setting F_SEAL_SEAL at creation time, but 13040cd95859SKyle Evans * it's cheap to check and decreases the effort required 13050cd95859SKyle Evans * to allow additional seals. 13060cd95859SKyle Evans */ 13070cd95859SKyle Evans if ((shmfd->shm_seals & F_SEAL_SEAL) != 0 && 13080cd95859SKyle Evans initial_seals != 0) 13090cd95859SKyle Evans error = EPERM; 13100cd95859SKyle Evans else if ((flags & (O_CREAT | O_EXCL)) == 13110cd95859SKyle Evans (O_CREAT | O_EXCL)) 13128e38aeffSJohn Baldwin error = EEXIST; 13135dd47b52SKyle Evans else if (shmflags != 0 && shmflags != shmfd->shm_flags) 13145dd47b52SKyle Evans error = EINVAL; 13158e38aeffSJohn Baldwin else { 13168e38aeffSJohn Baldwin #ifdef MAC 13178e38aeffSJohn Baldwin error = mac_posixshm_check_open(td->td_ucred, 13187ee1b208SEd Schouten shmfd, FFLAGS(flags & O_ACCMODE)); 13198e38aeffSJohn Baldwin if (error == 0) 13208e38aeffSJohn Baldwin #endif 13218e38aeffSJohn Baldwin error = shm_access(shmfd, td->td_ucred, 13227ee1b208SEd Schouten FFLAGS(flags & O_ACCMODE)); 13238e38aeffSJohn Baldwin } 13248e38aeffSJohn Baldwin 13258e38aeffSJohn Baldwin /* 13268e38aeffSJohn Baldwin * Truncate the file back to zero length if 13278e38aeffSJohn Baldwin * O_TRUNC was specified and the object was 13288e38aeffSJohn Baldwin * opened with read/write. 13298e38aeffSJohn Baldwin */ 13308e38aeffSJohn Baldwin if (error == 0 && 13317ee1b208SEd Schouten (flags & (O_ACCMODE | O_TRUNC)) == 13328e38aeffSJohn Baldwin (O_RDWR | O_TRUNC)) { 13330cd95859SKyle Evans VM_OBJECT_WLOCK(shmfd->shm_object); 13348e38aeffSJohn Baldwin #ifdef MAC 13358e38aeffSJohn Baldwin error = mac_posixshm_check_truncate( 13368e38aeffSJohn Baldwin td->td_ucred, fp->f_cred, shmfd); 13378e38aeffSJohn Baldwin if (error == 0) 13388e38aeffSJohn Baldwin #endif 13390cd95859SKyle Evans error = shm_dotruncate_locked(shmfd, 0, 13400cd95859SKyle Evans rl_cookie); 13410cd95859SKyle Evans VM_OBJECT_WUNLOCK(shmfd->shm_object); 13428e38aeffSJohn Baldwin } 13430cd95859SKyle Evans if (error == 0) { 13440cd95859SKyle Evans /* 13450cd95859SKyle Evans * Currently we only allow F_SEAL_SEAL to be 13460cd95859SKyle Evans * set initially. As noted above, this would 13470cd95859SKyle Evans * need to be reworked should that change. 13480cd95859SKyle Evans */ 13490cd95859SKyle Evans shmfd->shm_seals |= initial_seals; 13508e38aeffSJohn Baldwin shm_hold(shmfd); 13518e38aeffSJohn Baldwin } 13526df6facfSKonstantin Belousov shm_rangelock_unlock(shmfd, rl_cookie); 13530cd95859SKyle Evans } 13548e38aeffSJohn Baldwin sx_xunlock(&shm_dict_lock); 13558e38aeffSJohn Baldwin 13568e38aeffSJohn Baldwin if (error) { 135790f54cbfSMateusz Guzik fdclose(td, fp, fd); 13588e38aeffSJohn Baldwin fdrop(fp, td); 13598e38aeffSJohn Baldwin return (error); 13608e38aeffSJohn Baldwin } 13618e38aeffSJohn Baldwin } 13628e38aeffSJohn Baldwin 13637ee1b208SEd Schouten finit(fp, FFLAGS(flags & O_ACCMODE), DTYPE_SHM, shmfd, &shm_ops); 13648e38aeffSJohn Baldwin 13658e38aeffSJohn Baldwin td->td_retval[0] = fd; 13668e38aeffSJohn Baldwin fdrop(fp, td); 13678e38aeffSJohn Baldwin 13688e38aeffSJohn Baldwin return (0); 13698e38aeffSJohn Baldwin } 13708e38aeffSJohn Baldwin 13717ee1b208SEd Schouten /* System calls. */ 1372a9ac5e14SKyle Evans #ifdef COMPAT_FREEBSD12 13737ee1b208SEd Schouten int 1374a9ac5e14SKyle Evans freebsd12_shm_open(struct thread *td, struct freebsd12_shm_open_args *uap) 13757ee1b208SEd Schouten { 13767ee1b208SEd Schouten 1377535b1df9SKyle Evans return (kern_shm_open(td, uap->path, uap->flags | O_CLOEXEC, 1378535b1df9SKyle Evans uap->mode, NULL)); 13797ee1b208SEd Schouten } 1380a9ac5e14SKyle Evans #endif 13817ee1b208SEd Schouten 13828e38aeffSJohn Baldwin int 13838451d0ddSKip Macy sys_shm_unlink(struct thread *td, struct shm_unlink_args *uap) 13848e38aeffSJohn Baldwin { 13858e38aeffSJohn Baldwin char *path; 13868e38aeffSJohn Baldwin Fnv32_t fnv; 13878e38aeffSJohn Baldwin int error; 13888e38aeffSJohn Baldwin 13892d5603feSDavid Bright error = shm_copyin_path(td, uap->path, &path); 13902d5603feSDavid Bright if (error != 0) 13918e38aeffSJohn Baldwin return (error); 13922d5603feSDavid Bright 139315bcf785SRobert Watson AUDIT_ARG_UPATH1_CANON(path); 13948e38aeffSJohn Baldwin fnv = fnv_32_str(path, FNV1_32_INIT); 13958e38aeffSJohn Baldwin sx_xlock(&shm_dict_lock); 13968e38aeffSJohn Baldwin error = shm_remove(path, fnv, td->td_ucred); 13978e38aeffSJohn Baldwin sx_xunlock(&shm_dict_lock); 13984cf919edSMark Johnston free(path, M_SHMFD); 13998e38aeffSJohn Baldwin 14008e38aeffSJohn Baldwin return (error); 14018e38aeffSJohn Baldwin } 14028e38aeffSJohn Baldwin 14038e38aeffSJohn Baldwin int 14049afb12baSDavid Bright sys_shm_rename(struct thread *td, struct shm_rename_args *uap) 14059afb12baSDavid Bright { 14069afb12baSDavid Bright char *path_from = NULL, *path_to = NULL; 14079afb12baSDavid Bright Fnv32_t fnv_from, fnv_to; 14089afb12baSDavid Bright struct shmfd *fd_from; 14099afb12baSDavid Bright struct shmfd *fd_to; 14109afb12baSDavid Bright int error; 14119afb12baSDavid Bright int flags; 14129afb12baSDavid Bright 14139afb12baSDavid Bright flags = uap->flags; 14142d5603feSDavid Bright AUDIT_ARG_FFLAGS(flags); 14159afb12baSDavid Bright 14169afb12baSDavid Bright /* 14179afb12baSDavid Bright * Make sure the user passed only valid flags. 14189afb12baSDavid Bright * If you add a new flag, please add a new term here. 14199afb12baSDavid Bright */ 14209afb12baSDavid Bright if ((flags & ~( 14219afb12baSDavid Bright SHM_RENAME_NOREPLACE | 14229afb12baSDavid Bright SHM_RENAME_EXCHANGE 14239afb12baSDavid Bright )) != 0) { 14249afb12baSDavid Bright error = EINVAL; 14259afb12baSDavid Bright goto out; 14269afb12baSDavid Bright } 14279afb12baSDavid Bright 14289afb12baSDavid Bright /* 14299afb12baSDavid Bright * EXCHANGE and NOREPLACE don't quite make sense together. Let's 14309afb12baSDavid Bright * force the user to choose one or the other. 14319afb12baSDavid Bright */ 14329afb12baSDavid Bright if ((flags & SHM_RENAME_NOREPLACE) != 0 && 14339afb12baSDavid Bright (flags & SHM_RENAME_EXCHANGE) != 0) { 14349afb12baSDavid Bright error = EINVAL; 14359afb12baSDavid Bright goto out; 14369afb12baSDavid Bright } 14379afb12baSDavid Bright 14382d5603feSDavid Bright /* Renaming to or from anonymous makes no sense */ 14392d5603feSDavid Bright if (uap->path_from == SHM_ANON || uap->path_to == SHM_ANON) { 14402d5603feSDavid Bright error = EINVAL; 14412d5603feSDavid Bright goto out; 14422d5603feSDavid Bright } 14432d5603feSDavid Bright 14442d5603feSDavid Bright error = shm_copyin_path(td, uap->path_from, &path_from); 14452d5603feSDavid Bright if (error != 0) 14469afb12baSDavid Bright goto out; 14479afb12baSDavid Bright 14482d5603feSDavid Bright error = shm_copyin_path(td, uap->path_to, &path_to); 14492d5603feSDavid Bright if (error != 0) 14509afb12baSDavid Bright goto out; 14519afb12baSDavid Bright 14522d5603feSDavid Bright AUDIT_ARG_UPATH1_CANON(path_from); 14532d5603feSDavid Bright AUDIT_ARG_UPATH2_CANON(path_to); 14542d5603feSDavid Bright 14559afb12baSDavid Bright /* Rename with from/to equal is a no-op */ 14562d5603feSDavid Bright if (strcmp(path_from, path_to) == 0) 14579afb12baSDavid Bright goto out; 14589afb12baSDavid Bright 14599afb12baSDavid Bright fnv_from = fnv_32_str(path_from, FNV1_32_INIT); 14609afb12baSDavid Bright fnv_to = fnv_32_str(path_to, FNV1_32_INIT); 14619afb12baSDavid Bright 14629afb12baSDavid Bright sx_xlock(&shm_dict_lock); 14639afb12baSDavid Bright 14649afb12baSDavid Bright fd_from = shm_lookup(path_from, fnv_from); 14659afb12baSDavid Bright if (fd_from == NULL) { 14669afb12baSDavid Bright error = ENOENT; 14672d5603feSDavid Bright goto out_locked; 14689afb12baSDavid Bright } 14699afb12baSDavid Bright 14709afb12baSDavid Bright fd_to = shm_lookup(path_to, fnv_to); 14719afb12baSDavid Bright if ((flags & SHM_RENAME_NOREPLACE) != 0 && fd_to != NULL) { 14729afb12baSDavid Bright error = EEXIST; 14732d5603feSDavid Bright goto out_locked; 14749afb12baSDavid Bright } 14759afb12baSDavid Bright 14769afb12baSDavid Bright /* 14779afb12baSDavid Bright * Unconditionally prevents shm_remove from invalidating the 'from' 14789afb12baSDavid Bright * shm's state. 14799afb12baSDavid Bright */ 14809afb12baSDavid Bright shm_hold(fd_from); 14819afb12baSDavid Bright error = shm_remove(path_from, fnv_from, td->td_ucred); 14829afb12baSDavid Bright 14839afb12baSDavid Bright /* 14849afb12baSDavid Bright * One of my assumptions failed if ENOENT (e.g. locking didn't 14859afb12baSDavid Bright * protect us) 14869afb12baSDavid Bright */ 14879afb12baSDavid Bright KASSERT(error != ENOENT, ("Our shm disappeared during shm_rename: %s", 14889afb12baSDavid Bright path_from)); 14892d5603feSDavid Bright if (error != 0) { 14909afb12baSDavid Bright shm_drop(fd_from); 14912d5603feSDavid Bright goto out_locked; 14929afb12baSDavid Bright } 14939afb12baSDavid Bright 14949afb12baSDavid Bright /* 14959afb12baSDavid Bright * If we are exchanging, we need to ensure the shm_remove below 14969afb12baSDavid Bright * doesn't invalidate the dest shm's state. 14979afb12baSDavid Bright */ 14989afb12baSDavid Bright if ((flags & SHM_RENAME_EXCHANGE) != 0 && fd_to != NULL) 14999afb12baSDavid Bright shm_hold(fd_to); 15009afb12baSDavid Bright 15019afb12baSDavid Bright /* 15029afb12baSDavid Bright * NOTE: if path_to is not already in the hash, c'est la vie; 15039afb12baSDavid Bright * it simply means we have nothing already at path_to to unlink. 15049afb12baSDavid Bright * That is the ENOENT case. 15059afb12baSDavid Bright * 15069afb12baSDavid Bright * If we somehow don't have access to unlink this guy, but 15079afb12baSDavid Bright * did for the shm at path_from, then relink the shm to path_from 15089afb12baSDavid Bright * and abort with EACCES. 15099afb12baSDavid Bright * 15109afb12baSDavid Bright * All other errors: that is weird; let's relink and abort the 15119afb12baSDavid Bright * operation. 15129afb12baSDavid Bright */ 15139afb12baSDavid Bright error = shm_remove(path_to, fnv_to, td->td_ucred); 15142d5603feSDavid Bright if (error != 0 && error != ENOENT) { 15159afb12baSDavid Bright shm_insert(path_from, fnv_from, fd_from); 15169afb12baSDavid Bright shm_drop(fd_from); 15179afb12baSDavid Bright /* Don't free path_from now, since the hash references it */ 15189afb12baSDavid Bright path_from = NULL; 15192d5603feSDavid Bright goto out_locked; 15209afb12baSDavid Bright } 15219afb12baSDavid Bright 15222d5603feSDavid Bright error = 0; 15232d5603feSDavid Bright 15249afb12baSDavid Bright shm_insert(path_to, fnv_to, fd_from); 15259afb12baSDavid Bright 15269afb12baSDavid Bright /* Don't free path_to now, since the hash references it */ 15279afb12baSDavid Bright path_to = NULL; 15289afb12baSDavid Bright 15299afb12baSDavid Bright /* We kept a ref when we removed, and incremented again in insert */ 15309afb12baSDavid Bright shm_drop(fd_from); 15319afb12baSDavid Bright KASSERT(fd_from->shm_refs > 0, ("Expected >0 refs; got: %d\n", 15329afb12baSDavid Bright fd_from->shm_refs)); 15339afb12baSDavid Bright 15349afb12baSDavid Bright if ((flags & SHM_RENAME_EXCHANGE) != 0 && fd_to != NULL) { 15359afb12baSDavid Bright shm_insert(path_from, fnv_from, fd_to); 15369afb12baSDavid Bright path_from = NULL; 15379afb12baSDavid Bright shm_drop(fd_to); 15389afb12baSDavid Bright KASSERT(fd_to->shm_refs > 0, ("Expected >0 refs; got: %d\n", 15399afb12baSDavid Bright fd_to->shm_refs)); 15409afb12baSDavid Bright } 15419afb12baSDavid Bright 15422d5603feSDavid Bright out_locked: 15439afb12baSDavid Bright sx_xunlock(&shm_dict_lock); 15449afb12baSDavid Bright 15459afb12baSDavid Bright out: 15469afb12baSDavid Bright free(path_from, M_SHMFD); 15479afb12baSDavid Bright free(path_to, M_SHMFD); 15489afb12baSDavid Bright return (error); 15499afb12baSDavid Bright } 15509afb12baSDavid Bright 1551d301b358SKonstantin Belousov static int 1552d301b358SKonstantin Belousov shm_mmap_large(struct shmfd *shmfd, vm_map_t map, vm_offset_t *addr, 1553d301b358SKonstantin Belousov vm_size_t size, vm_prot_t prot, vm_prot_t max_prot, int flags, 155479783634SKonstantin Belousov vm_ooffset_t foff, struct thread *td) 1555d301b358SKonstantin Belousov { 1556d301b358SKonstantin Belousov struct vmspace *vms; 1557d301b358SKonstantin Belousov vm_map_entry_t next_entry, prev_entry; 1558d301b358SKonstantin Belousov vm_offset_t align, mask, maxaddr; 1559d301b358SKonstantin Belousov int docow, error, rv, try; 1560d301b358SKonstantin Belousov bool curmap; 1561d301b358SKonstantin Belousov 1562d301b358SKonstantin Belousov if (shmfd->shm_lp_psind == 0) 1563d301b358SKonstantin Belousov return (EINVAL); 1564d301b358SKonstantin Belousov 1565d301b358SKonstantin Belousov /* MAP_PRIVATE is disabled */ 1566d301b358SKonstantin Belousov if ((flags & ~(MAP_SHARED | MAP_FIXED | MAP_EXCL | 1567f3e11927SDmitry Chagin MAP_NOCORE | MAP_32BIT | MAP_ALIGNMENT_MASK)) != 0) 1568d301b358SKonstantin Belousov return (EINVAL); 1569d301b358SKonstantin Belousov 1570d301b358SKonstantin Belousov vms = td->td_proc->p_vmspace; 1571d301b358SKonstantin Belousov curmap = map == &vms->vm_map; 1572d301b358SKonstantin Belousov if (curmap) { 1573d301b358SKonstantin Belousov error = kern_mmap_racct_check(td, map, size); 1574d301b358SKonstantin Belousov if (error != 0) 1575d301b358SKonstantin Belousov return (error); 1576d301b358SKonstantin Belousov } 1577d301b358SKonstantin Belousov 1578d301b358SKonstantin Belousov docow = shmfd->shm_lp_psind << MAP_SPLIT_BOUNDARY_SHIFT; 1579d301b358SKonstantin Belousov docow |= MAP_INHERIT_SHARE; 1580d301b358SKonstantin Belousov if ((flags & MAP_NOCORE) != 0) 1581d301b358SKonstantin Belousov docow |= MAP_DISABLE_COREDUMP; 1582d301b358SKonstantin Belousov 1583d301b358SKonstantin Belousov mask = pagesizes[shmfd->shm_lp_psind] - 1; 1584d301b358SKonstantin Belousov if ((foff & mask) != 0) 1585d301b358SKonstantin Belousov return (EINVAL); 1586d301b358SKonstantin Belousov maxaddr = vm_map_max(map); 1587d301b358SKonstantin Belousov if ((flags & MAP_32BIT) != 0 && maxaddr > MAP_32BIT_MAX_ADDR) 1588d301b358SKonstantin Belousov maxaddr = MAP_32BIT_MAX_ADDR; 1589d301b358SKonstantin Belousov if (size == 0 || (size & mask) != 0 || 1590d301b358SKonstantin Belousov (*addr != 0 && ((*addr & mask) != 0 || 1591d301b358SKonstantin Belousov *addr + size < *addr || *addr + size > maxaddr))) 1592d301b358SKonstantin Belousov return (EINVAL); 1593d301b358SKonstantin Belousov 1594d301b358SKonstantin Belousov align = flags & MAP_ALIGNMENT_MASK; 1595d301b358SKonstantin Belousov if (align == 0) { 1596d301b358SKonstantin Belousov align = pagesizes[shmfd->shm_lp_psind]; 1597d301b358SKonstantin Belousov } else if (align == MAP_ALIGNED_SUPER) { 15983e00c11aSAlan Cox /* 15993e00c11aSAlan Cox * MAP_ALIGNED_SUPER is only supported on superpage sizes, 16003e00c11aSAlan Cox * i.e., [1, VM_NRESERVLEVEL]. shmfd->shm_lp_psind < 1 is 16013e00c11aSAlan Cox * handled above. 16023e00c11aSAlan Cox */ 16033e00c11aSAlan Cox if ( 16043e00c11aSAlan Cox #if VM_NRESERVLEVEL > 0 16053e00c11aSAlan Cox shmfd->shm_lp_psind > VM_NRESERVLEVEL 16063e00c11aSAlan Cox #else 16073e00c11aSAlan Cox shmfd->shm_lp_psind > 1 16083e00c11aSAlan Cox #endif 16093e00c11aSAlan Cox ) 1610d301b358SKonstantin Belousov return (EINVAL); 16113e00c11aSAlan Cox align = pagesizes[shmfd->shm_lp_psind]; 1612d301b358SKonstantin Belousov } else { 1613d301b358SKonstantin Belousov align >>= MAP_ALIGNMENT_SHIFT; 1614d301b358SKonstantin Belousov align = 1ULL << align; 1615d301b358SKonstantin Belousov /* Also handles overflow. */ 1616d301b358SKonstantin Belousov if (align < pagesizes[shmfd->shm_lp_psind]) 1617d301b358SKonstantin Belousov return (EINVAL); 1618d301b358SKonstantin Belousov } 1619d301b358SKonstantin Belousov 1620d301b358SKonstantin Belousov vm_map_lock(map); 1621d301b358SKonstantin Belousov if ((flags & MAP_FIXED) == 0) { 1622d301b358SKonstantin Belousov try = 1; 1623d301b358SKonstantin Belousov if (curmap && (*addr == 0 || 1624d301b358SKonstantin Belousov (*addr >= round_page((vm_offset_t)vms->vm_taddr) && 1625d301b358SKonstantin Belousov *addr < round_page((vm_offset_t)vms->vm_daddr + 1626d301b358SKonstantin Belousov lim_max(td, RLIMIT_DATA))))) { 1627d301b358SKonstantin Belousov *addr = roundup2((vm_offset_t)vms->vm_daddr + 1628d301b358SKonstantin Belousov lim_max(td, RLIMIT_DATA), 1629d301b358SKonstantin Belousov pagesizes[shmfd->shm_lp_psind]); 1630d301b358SKonstantin Belousov } 1631d301b358SKonstantin Belousov again: 1632d301b358SKonstantin Belousov rv = vm_map_find_aligned(map, addr, size, maxaddr, align); 1633d301b358SKonstantin Belousov if (rv != KERN_SUCCESS) { 1634d301b358SKonstantin Belousov if (try == 1) { 1635d301b358SKonstantin Belousov try = 2; 1636d301b358SKonstantin Belousov *addr = vm_map_min(map); 1637d301b358SKonstantin Belousov if ((*addr & mask) != 0) 1638d301b358SKonstantin Belousov *addr = (*addr + mask) & mask; 1639d301b358SKonstantin Belousov goto again; 1640d301b358SKonstantin Belousov } 1641d301b358SKonstantin Belousov goto fail1; 1642d301b358SKonstantin Belousov } 1643d301b358SKonstantin Belousov } else if ((flags & MAP_EXCL) == 0) { 1644d301b358SKonstantin Belousov rv = vm_map_delete(map, *addr, *addr + size); 1645d301b358SKonstantin Belousov if (rv != KERN_SUCCESS) 1646d301b358SKonstantin Belousov goto fail1; 1647d301b358SKonstantin Belousov } else { 1648d301b358SKonstantin Belousov error = ENOSPC; 1649d301b358SKonstantin Belousov if (vm_map_lookup_entry(map, *addr, &prev_entry)) 1650d301b358SKonstantin Belousov goto fail; 1651d301b358SKonstantin Belousov next_entry = vm_map_entry_succ(prev_entry); 1652d301b358SKonstantin Belousov if (next_entry->start < *addr + size) 1653d301b358SKonstantin Belousov goto fail; 1654d301b358SKonstantin Belousov } 1655d301b358SKonstantin Belousov 1656d301b358SKonstantin Belousov rv = vm_map_insert(map, shmfd->shm_object, foff, *addr, *addr + size, 1657d301b358SKonstantin Belousov prot, max_prot, docow); 1658d301b358SKonstantin Belousov fail1: 1659d301b358SKonstantin Belousov error = vm_mmap_to_errno(rv); 1660d301b358SKonstantin Belousov fail: 1661d301b358SKonstantin Belousov vm_map_unlock(map); 1662d301b358SKonstantin Belousov return (error); 1663d301b358SKonstantin Belousov } 1664d301b358SKonstantin Belousov 1665d301b358SKonstantin Belousov static int 16667077c426SJohn Baldwin shm_mmap(struct file *fp, vm_map_t map, vm_offset_t *addr, vm_size_t objsize, 166733c2c58fSMark Johnston vm_prot_t prot, vm_prot_t max_maxprot, int flags, 16687077c426SJohn Baldwin vm_ooffset_t foff, struct thread *td) 16698e38aeffSJohn Baldwin { 16707077c426SJohn Baldwin struct shmfd *shmfd; 16717077c426SJohn Baldwin vm_prot_t maxprot; 16727077c426SJohn Baldwin int error; 1673dca52ab4SKyle Evans bool writecnt; 1674af755d3eSKyle Evans void *rl_cookie; 16757077c426SJohn Baldwin 16767077c426SJohn Baldwin shmfd = fp->f_data; 16777077c426SJohn Baldwin maxprot = VM_PROT_NONE; 16787077c426SJohn Baldwin 16796df6facfSKonstantin Belousov rl_cookie = shm_rangelock_rlock(shmfd, 0, objsize); 16807077c426SJohn Baldwin /* FREAD should always be set. */ 16817077c426SJohn Baldwin if ((fp->f_flag & FREAD) != 0) 16827077c426SJohn Baldwin maxprot |= VM_PROT_EXECUTE | VM_PROT_READ; 168358366f05SKyle Evans 168458366f05SKyle Evans /* 168558366f05SKyle Evans * If FWRITE's set, we can allow VM_PROT_WRITE unless it's a shared 1686c7841c6bSMark Johnston * mapping with a write seal applied. Private mappings are always 1687c7841c6bSMark Johnston * writeable. 168858366f05SKyle Evans */ 1689c7841c6bSMark Johnston if ((flags & MAP_SHARED) == 0) { 169033c2c58fSMark Johnston if ((max_maxprot & VM_PROT_WRITE) != 0) 16917077c426SJohn Baldwin maxprot |= VM_PROT_WRITE; 1692c7841c6bSMark Johnston writecnt = false; 1693c7841c6bSMark Johnston } else { 1694c7841c6bSMark Johnston if ((fp->f_flag & FWRITE) != 0 && 1695c7841c6bSMark Johnston (shmfd->shm_seals & F_SEAL_WRITE) == 0) 1696c7841c6bSMark Johnston maxprot |= VM_PROT_WRITE; 1697af755d3eSKyle Evans 169851a16c84SKyle Evans /* 169951a16c84SKyle Evans * Any mappings from a writable descriptor may be upgraded to 170051a16c84SKyle Evans * VM_PROT_WRITE with mprotect(2), unless a write-seal was 170151a16c84SKyle Evans * applied between the open and subsequent mmap(2). We want to 170251a16c84SKyle Evans * reject application of a write seal as long as any such 170351a16c84SKyle Evans * mapping exists so that the seal cannot be trivially bypassed. 170451a16c84SKyle Evans */ 170551a16c84SKyle Evans writecnt = (maxprot & VM_PROT_WRITE) != 0; 170651a16c84SKyle Evans if (!writecnt && (prot & VM_PROT_WRITE) != 0) { 1707af755d3eSKyle Evans error = EACCES; 1708af755d3eSKyle Evans goto out; 1709af755d3eSKyle Evans } 1710c7841c6bSMark Johnston } 171133c2c58fSMark Johnston maxprot &= max_maxprot; 17127077c426SJohn Baldwin 1713987ff181SKonstantin Belousov /* See comment in vn_mmap(). */ 1714987ff181SKonstantin Belousov if ( 1715987ff181SKonstantin Belousov #ifdef _LP64 1716987ff181SKonstantin Belousov objsize > OFF_MAX || 1717987ff181SKonstantin Belousov #endif 1718f9cc8410SEric van Gyzen foff > OFF_MAX - objsize) { 1719af755d3eSKyle Evans error = EINVAL; 1720af755d3eSKyle Evans goto out; 1721af755d3eSKyle Evans } 1722987ff181SKonstantin Belousov 17237077c426SJohn Baldwin #ifdef MAC 17247077c426SJohn Baldwin error = mac_posixshm_check_mmap(td->td_ucred, shmfd, prot, flags); 17257077c426SJohn Baldwin if (error != 0) 1726af755d3eSKyle Evans goto out; 17277077c426SJohn Baldwin #endif 17288e38aeffSJohn Baldwin 17298e38aeffSJohn Baldwin mtx_lock(&shm_timestamp_lock); 17308e38aeffSJohn Baldwin vfs_timestamp(&shmfd->shm_atime); 17318e38aeffSJohn Baldwin mtx_unlock(&shm_timestamp_lock); 17328e38aeffSJohn Baldwin vm_object_reference(shmfd->shm_object); 17337077c426SJohn Baldwin 1734d301b358SKonstantin Belousov if (shm_largepage(shmfd)) { 173579783634SKonstantin Belousov writecnt = false; 1736d301b358SKonstantin Belousov error = shm_mmap_large(shmfd, map, addr, objsize, prot, 173779783634SKonstantin Belousov maxprot, flags, foff, td); 1738d301b358SKonstantin Belousov } else { 173979783634SKonstantin Belousov if (writecnt) { 174079783634SKonstantin Belousov vm_pager_update_writecount(shmfd->shm_object, 0, 174179783634SKonstantin Belousov objsize); 174279783634SKonstantin Belousov } 17437077c426SJohn Baldwin error = vm_mmap_object(map, addr, objsize, prot, maxprot, flags, 1744dca52ab4SKyle Evans shmfd->shm_object, foff, writecnt, td); 1745d301b358SKonstantin Belousov } 1746dca52ab4SKyle Evans if (error != 0) { 1747dca52ab4SKyle Evans if (writecnt) 1748dca52ab4SKyle Evans vm_pager_release_writecount(shmfd->shm_object, 0, 1749dca52ab4SKyle Evans objsize); 17507077c426SJohn Baldwin vm_object_deallocate(shmfd->shm_object); 1751dca52ab4SKyle Evans } 1752af755d3eSKyle Evans out: 17536df6facfSKonstantin Belousov shm_rangelock_unlock(shmfd, rl_cookie); 175434d3e89fSKonstantin Belousov return (error); 17558e38aeffSJohn Baldwin } 17569c00bb91SKonstantin Belousov 17579c00bb91SKonstantin Belousov static int 17589c00bb91SKonstantin Belousov shm_chmod(struct file *fp, mode_t mode, struct ucred *active_cred, 17599c00bb91SKonstantin Belousov struct thread *td) 17609c00bb91SKonstantin Belousov { 17619c00bb91SKonstantin Belousov struct shmfd *shmfd; 17629c00bb91SKonstantin Belousov int error; 17639c00bb91SKonstantin Belousov 17649c00bb91SKonstantin Belousov error = 0; 17659c00bb91SKonstantin Belousov shmfd = fp->f_data; 17669c00bb91SKonstantin Belousov mtx_lock(&shm_timestamp_lock); 17679c00bb91SKonstantin Belousov /* 17689c00bb91SKonstantin Belousov * SUSv4 says that x bits of permission need not be affected. 17699c00bb91SKonstantin Belousov * Be consistent with our shm_open there. 17709c00bb91SKonstantin Belousov */ 17719c00bb91SKonstantin Belousov #ifdef MAC 17729c00bb91SKonstantin Belousov error = mac_posixshm_check_setmode(active_cred, shmfd, mode); 17739c00bb91SKonstantin Belousov if (error != 0) 17749c00bb91SKonstantin Belousov goto out; 17759c00bb91SKonstantin Belousov #endif 1776d292b194SMateusz Guzik error = vaccess(VREG, shmfd->shm_mode, shmfd->shm_uid, shmfd->shm_gid, 1777d292b194SMateusz Guzik VADMIN, active_cred); 17789c00bb91SKonstantin Belousov if (error != 0) 17799c00bb91SKonstantin Belousov goto out; 17809c00bb91SKonstantin Belousov shmfd->shm_mode = mode & ACCESSPERMS; 17819c00bb91SKonstantin Belousov out: 17829c00bb91SKonstantin Belousov mtx_unlock(&shm_timestamp_lock); 17839c00bb91SKonstantin Belousov return (error); 17849c00bb91SKonstantin Belousov } 17859c00bb91SKonstantin Belousov 17869c00bb91SKonstantin Belousov static int 17879c00bb91SKonstantin Belousov shm_chown(struct file *fp, uid_t uid, gid_t gid, struct ucred *active_cred, 17889c00bb91SKonstantin Belousov struct thread *td) 17899c00bb91SKonstantin Belousov { 17909c00bb91SKonstantin Belousov struct shmfd *shmfd; 17919c00bb91SKonstantin Belousov int error; 17929c00bb91SKonstantin Belousov 179368889ed6SKonstantin Belousov error = 0; 17949c00bb91SKonstantin Belousov shmfd = fp->f_data; 17959c00bb91SKonstantin Belousov mtx_lock(&shm_timestamp_lock); 17969c00bb91SKonstantin Belousov #ifdef MAC 17979c00bb91SKonstantin Belousov error = mac_posixshm_check_setowner(active_cred, shmfd, uid, gid); 17989c00bb91SKonstantin Belousov if (error != 0) 17999c00bb91SKonstantin Belousov goto out; 18009c00bb91SKonstantin Belousov #endif 18019c00bb91SKonstantin Belousov if (uid == (uid_t)-1) 18029c00bb91SKonstantin Belousov uid = shmfd->shm_uid; 18039c00bb91SKonstantin Belousov if (gid == (gid_t)-1) 18049c00bb91SKonstantin Belousov gid = shmfd->shm_gid; 18059c00bb91SKonstantin Belousov if (((uid != shmfd->shm_uid && uid != active_cred->cr_uid) || 18069c00bb91SKonstantin Belousov (gid != shmfd->shm_gid && !groupmember(gid, active_cred))) && 1807cc426dd3SMateusz Guzik (error = priv_check_cred(active_cred, PRIV_VFS_CHOWN))) 18089c00bb91SKonstantin Belousov goto out; 18099c00bb91SKonstantin Belousov shmfd->shm_uid = uid; 18109c00bb91SKonstantin Belousov shmfd->shm_gid = gid; 18119c00bb91SKonstantin Belousov out: 18129c00bb91SKonstantin Belousov mtx_unlock(&shm_timestamp_lock); 18139c00bb91SKonstantin Belousov return (error); 18149c00bb91SKonstantin Belousov } 1815fb680e16SJohn Baldwin 1816fb680e16SJohn Baldwin /* 1817fb680e16SJohn Baldwin * Helper routines to allow the backing object of a shared memory file 1818fb680e16SJohn Baldwin * descriptor to be mapped in the kernel. 1819fb680e16SJohn Baldwin */ 1820fb680e16SJohn Baldwin int 1821fb680e16SJohn Baldwin shm_map(struct file *fp, size_t size, off_t offset, void **memp) 1822fb680e16SJohn Baldwin { 1823fb680e16SJohn Baldwin struct shmfd *shmfd; 1824fb680e16SJohn Baldwin vm_offset_t kva, ofs; 1825fb680e16SJohn Baldwin vm_object_t obj; 1826fb680e16SJohn Baldwin int rv; 1827fb680e16SJohn Baldwin 1828fb680e16SJohn Baldwin if (fp->f_type != DTYPE_SHM) 1829fb680e16SJohn Baldwin return (EINVAL); 1830fb680e16SJohn Baldwin shmfd = fp->f_data; 1831fb680e16SJohn Baldwin obj = shmfd->shm_object; 183289f6b863SAttilio Rao VM_OBJECT_WLOCK(obj); 1833fb680e16SJohn Baldwin /* 1834fb680e16SJohn Baldwin * XXXRW: This validation is probably insufficient, and subject to 1835fb680e16SJohn Baldwin * sign errors. It should be fixed. 1836fb680e16SJohn Baldwin */ 1837fb680e16SJohn Baldwin if (offset >= shmfd->shm_size || 1838fb680e16SJohn Baldwin offset + size > round_page(shmfd->shm_size)) { 183989f6b863SAttilio Rao VM_OBJECT_WUNLOCK(obj); 1840fb680e16SJohn Baldwin return (EINVAL); 1841fb680e16SJohn Baldwin } 1842fb680e16SJohn Baldwin 1843fb680e16SJohn Baldwin shmfd->shm_kmappings++; 1844fb680e16SJohn Baldwin vm_object_reference_locked(obj); 184589f6b863SAttilio Rao VM_OBJECT_WUNLOCK(obj); 1846fb680e16SJohn Baldwin 1847fb680e16SJohn Baldwin /* Map the object into the kernel_map and wire it. */ 1848fb680e16SJohn Baldwin kva = vm_map_min(kernel_map); 1849fb680e16SJohn Baldwin ofs = offset & PAGE_MASK; 1850fb680e16SJohn Baldwin offset = trunc_page(offset); 1851fb680e16SJohn Baldwin size = round_page(size + ofs); 1852edb572a3SJohn Baldwin rv = vm_map_find(kernel_map, obj, offset, &kva, size, 0, 18535e3a17c0SJohn Baldwin VMFS_OPTIMAL_SPACE, VM_PROT_READ | VM_PROT_WRITE, 1854fb680e16SJohn Baldwin VM_PROT_READ | VM_PROT_WRITE, 0); 1855fb680e16SJohn Baldwin if (rv == KERN_SUCCESS) { 1856fb680e16SJohn Baldwin rv = vm_map_wire(kernel_map, kva, kva + size, 1857fb680e16SJohn Baldwin VM_MAP_WIRE_SYSTEM | VM_MAP_WIRE_NOHOLES); 1858fb680e16SJohn Baldwin if (rv == KERN_SUCCESS) { 1859fb680e16SJohn Baldwin *memp = (void *)(kva + ofs); 1860fb680e16SJohn Baldwin return (0); 1861fb680e16SJohn Baldwin } 1862fb680e16SJohn Baldwin vm_map_remove(kernel_map, kva, kva + size); 1863fb680e16SJohn Baldwin } else 1864fb680e16SJohn Baldwin vm_object_deallocate(obj); 1865fb680e16SJohn Baldwin 1866fb680e16SJohn Baldwin /* On failure, drop our mapping reference. */ 186789f6b863SAttilio Rao VM_OBJECT_WLOCK(obj); 1868fb680e16SJohn Baldwin shmfd->shm_kmappings--; 186989f6b863SAttilio Rao VM_OBJECT_WUNLOCK(obj); 1870fb680e16SJohn Baldwin 1871338e7cf2SJohn Baldwin return (vm_mmap_to_errno(rv)); 1872fb680e16SJohn Baldwin } 1873fb680e16SJohn Baldwin 1874fb680e16SJohn Baldwin /* 1875fb680e16SJohn Baldwin * We require the caller to unmap the entire entry. This allows us to 1876fb680e16SJohn Baldwin * safely decrement shm_kmappings when a mapping is removed. 1877fb680e16SJohn Baldwin */ 1878fb680e16SJohn Baldwin int 1879fb680e16SJohn Baldwin shm_unmap(struct file *fp, void *mem, size_t size) 1880fb680e16SJohn Baldwin { 1881fb680e16SJohn Baldwin struct shmfd *shmfd; 1882fb680e16SJohn Baldwin vm_map_entry_t entry; 1883fb680e16SJohn Baldwin vm_offset_t kva, ofs; 1884fb680e16SJohn Baldwin vm_object_t obj; 1885fb680e16SJohn Baldwin vm_pindex_t pindex; 1886fb680e16SJohn Baldwin vm_prot_t prot; 1887fb680e16SJohn Baldwin boolean_t wired; 1888fb680e16SJohn Baldwin vm_map_t map; 1889fb680e16SJohn Baldwin int rv; 1890fb680e16SJohn Baldwin 1891fb680e16SJohn Baldwin if (fp->f_type != DTYPE_SHM) 1892fb680e16SJohn Baldwin return (EINVAL); 1893fb680e16SJohn Baldwin shmfd = fp->f_data; 1894fb680e16SJohn Baldwin kva = (vm_offset_t)mem; 1895fb680e16SJohn Baldwin ofs = kva & PAGE_MASK; 1896fb680e16SJohn Baldwin kva = trunc_page(kva); 1897fb680e16SJohn Baldwin size = round_page(size + ofs); 1898fb680e16SJohn Baldwin map = kernel_map; 1899fb680e16SJohn Baldwin rv = vm_map_lookup(&map, kva, VM_PROT_READ | VM_PROT_WRITE, &entry, 1900fb680e16SJohn Baldwin &obj, &pindex, &prot, &wired); 1901fb680e16SJohn Baldwin if (rv != KERN_SUCCESS) 1902fb680e16SJohn Baldwin return (EINVAL); 1903fb680e16SJohn Baldwin if (entry->start != kva || entry->end != kva + size) { 1904fb680e16SJohn Baldwin vm_map_lookup_done(map, entry); 1905fb680e16SJohn Baldwin return (EINVAL); 1906fb680e16SJohn Baldwin } 1907fb680e16SJohn Baldwin vm_map_lookup_done(map, entry); 1908fb680e16SJohn Baldwin if (obj != shmfd->shm_object) 1909fb680e16SJohn Baldwin return (EINVAL); 1910fb680e16SJohn Baldwin vm_map_remove(map, kva, kva + size); 191189f6b863SAttilio Rao VM_OBJECT_WLOCK(obj); 1912fb680e16SJohn Baldwin KASSERT(shmfd->shm_kmappings > 0, ("shm_unmap: object not mapped")); 1913fb680e16SJohn Baldwin shmfd->shm_kmappings--; 191489f6b863SAttilio Rao VM_OBJECT_WUNLOCK(obj); 1915fb680e16SJohn Baldwin return (0); 1916fb680e16SJohn Baldwin } 1917e506e182SJohn Baldwin 19189696feebSJohn Baldwin static int 191956d0e33eSKonstantin Belousov shm_fill_kinfo_locked(struct shmfd *shmfd, struct kinfo_file *kif, bool list) 1920e506e182SJohn Baldwin { 1921cc7b259aSJamie Gritton const char *path, *pr_path; 1922cc7b259aSJamie Gritton size_t pr_pathlen; 192356d0e33eSKonstantin Belousov bool visible; 1924e506e182SJohn Baldwin 192556d0e33eSKonstantin Belousov sx_assert(&shm_dict_lock, SA_LOCKED); 19269696feebSJohn Baldwin kif->kf_type = KF_TYPE_SHM; 192756d0e33eSKonstantin Belousov kif->kf_un.kf_file.kf_file_mode = S_IFREG | shmfd->shm_mode; 19289696feebSJohn Baldwin kif->kf_un.kf_file.kf_file_size = shmfd->shm_size; 19299696feebSJohn Baldwin if (shmfd->shm_path != NULL) { 1930cc7b259aSJamie Gritton path = shmfd->shm_path; 1931cc7b259aSJamie Gritton pr_path = curthread->td_ucred->cr_prison->pr_path; 193244c16975SJamie Gritton if (strcmp(pr_path, "/") != 0) { 193344c16975SJamie Gritton /* Return the jail-rooted pathname. */ 1934cc7b259aSJamie Gritton pr_pathlen = strlen(pr_path); 19357975f57bSRicardo Branco visible = strncmp(path, pr_path, pr_pathlen) == 0 && 19367975f57bSRicardo Branco path[pr_pathlen] == '/'; 193756d0e33eSKonstantin Belousov if (list && !visible) 193856d0e33eSKonstantin Belousov return (EPERM); 193956d0e33eSKonstantin Belousov if (visible) 1940cc7b259aSJamie Gritton path += pr_pathlen; 1941cc7b259aSJamie Gritton } 1942cc7b259aSJamie Gritton strlcpy(kif->kf_path, path, sizeof(kif->kf_path)); 1943cc7b259aSJamie Gritton } 19449696feebSJohn Baldwin return (0); 19459696feebSJohn Baldwin } 194656d0e33eSKonstantin Belousov 194756d0e33eSKonstantin Belousov static int 194856d0e33eSKonstantin Belousov shm_fill_kinfo(struct file *fp, struct kinfo_file *kif, 194956d0e33eSKonstantin Belousov struct filedesc *fdp __unused) 195056d0e33eSKonstantin Belousov { 195156d0e33eSKonstantin Belousov int res; 195256d0e33eSKonstantin Belousov 195356d0e33eSKonstantin Belousov sx_slock(&shm_dict_lock); 195456d0e33eSKonstantin Belousov res = shm_fill_kinfo_locked(fp->f_data, kif, false); 195556d0e33eSKonstantin Belousov sx_sunlock(&shm_dict_lock); 195656d0e33eSKonstantin Belousov return (res); 195756d0e33eSKonstantin Belousov } 195856d0e33eSKonstantin Belousov 195956d0e33eSKonstantin Belousov static int 1960af755d3eSKyle Evans shm_add_seals(struct file *fp, int seals) 1961af755d3eSKyle Evans { 1962af755d3eSKyle Evans struct shmfd *shmfd; 1963af755d3eSKyle Evans void *rl_cookie; 1964af755d3eSKyle Evans vm_ooffset_t writemappings; 1965af755d3eSKyle Evans int error, nseals; 1966af755d3eSKyle Evans 1967af755d3eSKyle Evans error = 0; 1968af755d3eSKyle Evans shmfd = fp->f_data; 19696df6facfSKonstantin Belousov rl_cookie = shm_rangelock_wlock(shmfd, 0, OFF_MAX); 1970af755d3eSKyle Evans 1971af755d3eSKyle Evans /* Even already-set seals should result in EPERM. */ 1972af755d3eSKyle Evans if ((shmfd->shm_seals & F_SEAL_SEAL) != 0) { 1973af755d3eSKyle Evans error = EPERM; 1974af755d3eSKyle Evans goto out; 1975af755d3eSKyle Evans } 1976af755d3eSKyle Evans nseals = seals & ~shmfd->shm_seals; 1977af755d3eSKyle Evans if ((nseals & F_SEAL_WRITE) != 0) { 197879783634SKonstantin Belousov if (shm_largepage(shmfd)) { 197979783634SKonstantin Belousov error = ENOTSUP; 198079783634SKonstantin Belousov goto out; 198179783634SKonstantin Belousov } 198279783634SKonstantin Belousov 1983af755d3eSKyle Evans /* 1984af755d3eSKyle Evans * The rangelock above prevents writable mappings from being 1985af755d3eSKyle Evans * added after we've started applying seals. The RLOCK here 1986af755d3eSKyle Evans * is to avoid torn reads on ILP32 arches as unmapping/reducing 1987af755d3eSKyle Evans * writemappings will be done without a rangelock. 1988af755d3eSKyle Evans */ 1989af755d3eSKyle Evans VM_OBJECT_RLOCK(shmfd->shm_object); 1990af755d3eSKyle Evans writemappings = shmfd->shm_object->un_pager.swp.writemappings; 1991af755d3eSKyle Evans VM_OBJECT_RUNLOCK(shmfd->shm_object); 1992af755d3eSKyle Evans /* kmappings are also writable */ 1993af755d3eSKyle Evans if (writemappings > 0) { 1994af755d3eSKyle Evans error = EBUSY; 1995af755d3eSKyle Evans goto out; 1996af755d3eSKyle Evans } 1997af755d3eSKyle Evans } 1998af755d3eSKyle Evans shmfd->shm_seals |= nseals; 1999af755d3eSKyle Evans out: 20006df6facfSKonstantin Belousov shm_rangelock_unlock(shmfd, rl_cookie); 2001af755d3eSKyle Evans return (error); 2002af755d3eSKyle Evans } 2003af755d3eSKyle Evans 2004af755d3eSKyle Evans static int 2005af755d3eSKyle Evans shm_get_seals(struct file *fp, int *seals) 2006af755d3eSKyle Evans { 2007af755d3eSKyle Evans struct shmfd *shmfd; 2008af755d3eSKyle Evans 2009af755d3eSKyle Evans shmfd = fp->f_data; 2010af755d3eSKyle Evans *seals = shmfd->shm_seals; 2011af755d3eSKyle Evans return (0); 2012af755d3eSKyle Evans } 2013af755d3eSKyle Evans 2014af755d3eSKyle Evans static int 2015454bc887SKa Ho Ng shm_deallocate(struct shmfd *shmfd, off_t *offset, off_t *length, int flags) 2016454bc887SKa Ho Ng { 2017454bc887SKa Ho Ng vm_object_t object; 2018454bc887SKa Ho Ng vm_pindex_t pistart, pi, piend; 2019454bc887SKa Ho Ng vm_ooffset_t off, len; 2020454bc887SKa Ho Ng int startofs, endofs, end; 2021454bc887SKa Ho Ng int error; 2022454bc887SKa Ho Ng 2023454bc887SKa Ho Ng off = *offset; 2024454bc887SKa Ho Ng len = *length; 2025454bc887SKa Ho Ng KASSERT(off + len <= (vm_ooffset_t)OFF_MAX, ("off + len overflows")); 20261eaa3652SKa Ho Ng if (off + len > shmfd->shm_size) 20271eaa3652SKa Ho Ng len = shmfd->shm_size - off; 2028454bc887SKa Ho Ng object = shmfd->shm_object; 2029454bc887SKa Ho Ng startofs = off & PAGE_MASK; 2030454bc887SKa Ho Ng endofs = (off + len) & PAGE_MASK; 2031454bc887SKa Ho Ng pistart = OFF_TO_IDX(off); 2032454bc887SKa Ho Ng piend = OFF_TO_IDX(off + len); 2033454bc887SKa Ho Ng pi = OFF_TO_IDX(off + PAGE_MASK); 2034454bc887SKa Ho Ng error = 0; 2035454bc887SKa Ho Ng 20365c1428d2SKa Ho Ng /* Handle the case when offset is on or beyond shm size. */ 20375c1428d2SKa Ho Ng if ((off_t)len <= 0) { 20381eaa3652SKa Ho Ng *length = 0; 20391eaa3652SKa Ho Ng return (0); 20401eaa3652SKa Ho Ng } 20411eaa3652SKa Ho Ng 2042454bc887SKa Ho Ng VM_OBJECT_WLOCK(object); 2043454bc887SKa Ho Ng 2044454bc887SKa Ho Ng if (startofs != 0) { 2045454bc887SKa Ho Ng end = pistart != piend ? PAGE_SIZE : endofs; 2046454bc887SKa Ho Ng error = shm_partial_page_invalidate(object, pistart, startofs, 2047454bc887SKa Ho Ng end); 2048454bc887SKa Ho Ng if (error) 2049454bc887SKa Ho Ng goto out; 2050454bc887SKa Ho Ng off += end - startofs; 2051454bc887SKa Ho Ng len -= end - startofs; 2052454bc887SKa Ho Ng } 2053454bc887SKa Ho Ng 2054454bc887SKa Ho Ng if (pi < piend) { 2055454bc887SKa Ho Ng vm_object_page_remove(object, pi, piend, 0); 2056454bc887SKa Ho Ng off += IDX_TO_OFF(piend - pi); 2057454bc887SKa Ho Ng len -= IDX_TO_OFF(piend - pi); 2058454bc887SKa Ho Ng } 2059454bc887SKa Ho Ng 2060454bc887SKa Ho Ng if (endofs != 0 && pistart != piend) { 2061454bc887SKa Ho Ng error = shm_partial_page_invalidate(object, piend, 0, endofs); 2062454bc887SKa Ho Ng if (error) 2063454bc887SKa Ho Ng goto out; 2064454bc887SKa Ho Ng off += endofs; 2065454bc887SKa Ho Ng len -= endofs; 2066454bc887SKa Ho Ng } 2067454bc887SKa Ho Ng 2068454bc887SKa Ho Ng out: 2069454bc887SKa Ho Ng VM_OBJECT_WUNLOCK(shmfd->shm_object); 2070454bc887SKa Ho Ng *offset = off; 2071454bc887SKa Ho Ng *length = len; 2072454bc887SKa Ho Ng return (error); 2073454bc887SKa Ho Ng } 2074454bc887SKa Ho Ng 2075454bc887SKa Ho Ng static int 2076454bc887SKa Ho Ng shm_fspacectl(struct file *fp, int cmd, off_t *offset, off_t *length, int flags, 2077454bc887SKa Ho Ng struct ucred *active_cred, struct thread *td) 2078454bc887SKa Ho Ng { 2079454bc887SKa Ho Ng void *rl_cookie; 2080454bc887SKa Ho Ng struct shmfd *shmfd; 2081454bc887SKa Ho Ng off_t off, len; 2082454bc887SKa Ho Ng int error; 2083454bc887SKa Ho Ng 20848c9aa94bSKa Ho Ng KASSERT(cmd == SPACECTL_DEALLOC, ("shm_fspacectl: Invalid cmd")); 20858c9aa94bSKa Ho Ng KASSERT((flags & ~SPACECTL_F_SUPPORTED) == 0, 20868c9aa94bSKa Ho Ng ("shm_fspacectl: non-zero flags")); 20878c9aa94bSKa Ho Ng KASSERT(*offset >= 0 && *length > 0 && *length <= OFF_MAX - *offset, 20888c9aa94bSKa Ho Ng ("shm_fspacectl: offset/length overflow or underflow")); 2089454bc887SKa Ho Ng error = EINVAL; 2090454bc887SKa Ho Ng shmfd = fp->f_data; 2091454bc887SKa Ho Ng off = *offset; 2092454bc887SKa Ho Ng len = *length; 2093454bc887SKa Ho Ng 20946df6facfSKonstantin Belousov rl_cookie = shm_rangelock_wlock(shmfd, off, off + len); 2095454bc887SKa Ho Ng switch (cmd) { 2096454bc887SKa Ho Ng case SPACECTL_DEALLOC: 2097454bc887SKa Ho Ng if ((shmfd->shm_seals & F_SEAL_WRITE) != 0) { 2098454bc887SKa Ho Ng error = EPERM; 2099454bc887SKa Ho Ng break; 2100454bc887SKa Ho Ng } 2101454bc887SKa Ho Ng error = shm_deallocate(shmfd, &off, &len, flags); 2102454bc887SKa Ho Ng *offset = off; 2103454bc887SKa Ho Ng *length = len; 2104454bc887SKa Ho Ng break; 2105454bc887SKa Ho Ng default: 2106454bc887SKa Ho Ng __assert_unreachable(); 2107454bc887SKa Ho Ng } 21086df6facfSKonstantin Belousov shm_rangelock_unlock(shmfd, rl_cookie); 2109454bc887SKa Ho Ng return (error); 2110454bc887SKa Ho Ng } 2111454bc887SKa Ho Ng 2112454bc887SKa Ho Ng 2113454bc887SKa Ho Ng static int 2114f1040532SKyle Evans shm_fallocate(struct file *fp, off_t offset, off_t len, struct thread *td) 2115f1040532SKyle Evans { 2116f1040532SKyle Evans void *rl_cookie; 2117f1040532SKyle Evans struct shmfd *shmfd; 2118f1040532SKyle Evans size_t size; 2119f1040532SKyle Evans int error; 2120f1040532SKyle Evans 2121f1040532SKyle Evans /* This assumes that the caller already checked for overflow. */ 2122f1040532SKyle Evans error = 0; 2123f1040532SKyle Evans shmfd = fp->f_data; 2124f1040532SKyle Evans size = offset + len; 212539eae263SKyle Evans 212639eae263SKyle Evans /* 212739eae263SKyle Evans * Just grab the rangelock for the range that we may be attempting to 212839eae263SKyle Evans * grow, rather than blocking read/write for regions we won't be 212939eae263SKyle Evans * touching while this (potential) resize is in progress. Other 213039eae263SKyle Evans * attempts to resize the shmfd will have to take a write lock from 0 to 213139eae263SKyle Evans * OFF_MAX, so this being potentially beyond the current usable range of 213239eae263SKyle Evans * the shmfd is not necessarily a concern. If other mechanisms are 213339eae263SKyle Evans * added to grow a shmfd, this may need to be re-evaluated. 213439eae263SKyle Evans */ 21356df6facfSKonstantin Belousov rl_cookie = shm_rangelock_wlock(shmfd, offset, size); 2136d301b358SKonstantin Belousov if (size > shmfd->shm_size) 2137d301b358SKonstantin Belousov error = shm_dotruncate_cookie(shmfd, size, rl_cookie); 21386df6facfSKonstantin Belousov shm_rangelock_unlock(shmfd, rl_cookie); 2139f1040532SKyle Evans /* Translate to posix_fallocate(2) return value as needed. */ 2140f1040532SKyle Evans if (error == ENOMEM) 2141f1040532SKyle Evans error = ENOSPC; 2142f1040532SKyle Evans return (error); 2143f1040532SKyle Evans } 2144f1040532SKyle Evans 2145f1040532SKyle Evans static int 214656d0e33eSKonstantin Belousov sysctl_posix_shm_list(SYSCTL_HANDLER_ARGS) 214756d0e33eSKonstantin Belousov { 214856d0e33eSKonstantin Belousov struct shm_mapping *shmm; 214956d0e33eSKonstantin Belousov struct sbuf sb; 215056d0e33eSKonstantin Belousov struct kinfo_file kif; 215156d0e33eSKonstantin Belousov u_long i; 215256d0e33eSKonstantin Belousov int error, error2; 215356d0e33eSKonstantin Belousov 215456d0e33eSKonstantin Belousov sbuf_new_for_sysctl(&sb, NULL, sizeof(struct kinfo_file) * 5, req); 215556d0e33eSKonstantin Belousov sbuf_clear_flags(&sb, SBUF_INCLUDENUL); 215656d0e33eSKonstantin Belousov error = 0; 215756d0e33eSKonstantin Belousov sx_slock(&shm_dict_lock); 215856d0e33eSKonstantin Belousov for (i = 0; i < shm_hash + 1; i++) { 215956d0e33eSKonstantin Belousov LIST_FOREACH(shmm, &shm_dictionary[i], sm_link) { 216056d0e33eSKonstantin Belousov error = shm_fill_kinfo_locked(shmm->sm_shmfd, 216156d0e33eSKonstantin Belousov &kif, true); 2162747a4726SJamie Gritton if (error == EPERM) { 2163747a4726SJamie Gritton error = 0; 216456d0e33eSKonstantin Belousov continue; 2165747a4726SJamie Gritton } 216656d0e33eSKonstantin Belousov if (error != 0) 216756d0e33eSKonstantin Belousov break; 216856d0e33eSKonstantin Belousov pack_kinfo(&kif); 216956d0e33eSKonstantin Belousov error = sbuf_bcat(&sb, &kif, kif.kf_structsize) == 0 ? 217056d0e33eSKonstantin Belousov 0 : ENOMEM; 217156d0e33eSKonstantin Belousov if (error != 0) 217256d0e33eSKonstantin Belousov break; 217356d0e33eSKonstantin Belousov } 217456d0e33eSKonstantin Belousov } 217556d0e33eSKonstantin Belousov sx_sunlock(&shm_dict_lock); 217656d0e33eSKonstantin Belousov error2 = sbuf_finish(&sb); 217756d0e33eSKonstantin Belousov sbuf_delete(&sb); 217856d0e33eSKonstantin Belousov return (error != 0 ? error : error2); 217956d0e33eSKonstantin Belousov } 218056d0e33eSKonstantin Belousov 218156d0e33eSKonstantin Belousov SYSCTL_PROC(_kern_ipc, OID_AUTO, posix_shm_list, 2182d7c4ea7dSJamie Gritton CTLFLAG_RD | CTLFLAG_PRISON | CTLFLAG_MPSAFE | CTLTYPE_OPAQUE, 218356d0e33eSKonstantin Belousov NULL, 0, sysctl_posix_shm_list, "", 218456d0e33eSKonstantin Belousov "POSIX SHM list"); 218520f70576SKyle Evans 218620f70576SKyle Evans int 2187535b1df9SKyle Evans kern_shm_open(struct thread *td, const char *path, int flags, mode_t mode, 2188535b1df9SKyle Evans struct filecaps *caps) 218920f70576SKyle Evans { 219020f70576SKyle Evans 2191535b1df9SKyle Evans return (kern_shm_open2(td, path, flags, mode, 0, caps, NULL)); 219220f70576SKyle Evans } 219320f70576SKyle Evans 219420f70576SKyle Evans /* 219520f70576SKyle Evans * This version of the shm_open() interface leaves CLOEXEC behavior up to the 219620f70576SKyle Evans * caller, and libc will enforce it for the traditional shm_open() call. This 219720f70576SKyle Evans * allows other consumers, like memfd_create(), to opt-in for CLOEXEC. This 219820f70576SKyle Evans * interface also includes a 'name' argument that is currently unused, but could 219920f70576SKyle Evans * potentially be exported later via some interface for debugging purposes. 220020f70576SKyle Evans * From the kernel's perspective, it is optional. Individual consumers like 220120f70576SKyle Evans * memfd_create() may require it in order to be compatible with other systems 220220f70576SKyle Evans * implementing the same function. 220320f70576SKyle Evans */ 220420f70576SKyle Evans int 220520f70576SKyle Evans sys_shm_open2(struct thread *td, struct shm_open2_args *uap) 220620f70576SKyle Evans { 220720f70576SKyle Evans 220820f70576SKyle Evans return (kern_shm_open2(td, uap->path, uap->flags, uap->mode, 2209535b1df9SKyle Evans uap->shmflags, NULL, uap->name)); 221020f70576SKyle Evans } 2211