18e38aeffSJohn Baldwin /*- 24d846d26SWarner Losh * SPDX-License-Identifier: BSD-2-Clause 38a36da99SPedro F. Giffuni * 415bcf785SRobert Watson * Copyright (c) 2006, 2011, 2016-2017 Robert N. M. Watson 5d301b358SKonstantin Belousov * Copyright 2020 The FreeBSD Foundation 68e38aeffSJohn Baldwin * All rights reserved. 78e38aeffSJohn Baldwin * 815bcf785SRobert Watson * Portions of this software were developed by BAE Systems, the University of 915bcf785SRobert Watson * Cambridge Computer Laboratory, and Memorial University under DARPA/AFRL 1015bcf785SRobert Watson * contract FA8650-15-C-7558 ("CADETS"), as part of the DARPA Transparent 1115bcf785SRobert Watson * Computing (TC) research program. 1215bcf785SRobert Watson * 13d301b358SKonstantin Belousov * Portions of this software were developed by Konstantin Belousov 14d301b358SKonstantin Belousov * under sponsorship from the FreeBSD Foundation. 15d301b358SKonstantin Belousov * 168e38aeffSJohn Baldwin * Redistribution and use in source and binary forms, with or without 178e38aeffSJohn Baldwin * modification, are permitted provided that the following conditions 188e38aeffSJohn Baldwin * are met: 198e38aeffSJohn Baldwin * 1. Redistributions of source code must retain the above copyright 208e38aeffSJohn Baldwin * notice, this list of conditions and the following disclaimer. 218e38aeffSJohn Baldwin * 2. Redistributions in binary form must reproduce the above copyright 228e38aeffSJohn Baldwin * notice, this list of conditions and the following disclaimer in the 238e38aeffSJohn Baldwin * documentation and/or other materials provided with the distribution. 248e38aeffSJohn Baldwin * 258e38aeffSJohn Baldwin * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 268e38aeffSJohn Baldwin * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 278e38aeffSJohn Baldwin * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 288e38aeffSJohn Baldwin * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 298e38aeffSJohn Baldwin * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 308e38aeffSJohn Baldwin * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 318e38aeffSJohn Baldwin * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 328e38aeffSJohn Baldwin * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 338e38aeffSJohn Baldwin * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 348e38aeffSJohn Baldwin * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 358e38aeffSJohn Baldwin * SUCH DAMAGE. 368e38aeffSJohn Baldwin */ 378e38aeffSJohn Baldwin 388e38aeffSJohn Baldwin /* 398e38aeffSJohn Baldwin * Support for shared swap-backed anonymous memory objects via 409afb12baSDavid Bright * shm_open(2), shm_rename(2), and shm_unlink(2). 419afb12baSDavid Bright * While most of the implementation is here, vm_mmap.c contains 429afb12baSDavid Bright * mapping logic changes. 438e38aeffSJohn Baldwin * 445c066cd2SKonstantin Belousov * posixshmcontrol(1) allows users to inspect the state of the memory 455c066cd2SKonstantin Belousov * objects. Per-uid swap resource limit controls total amount of 465c066cd2SKonstantin Belousov * memory that user can consume for anonymous objects, including 475c066cd2SKonstantin Belousov * shared. 488e38aeffSJohn Baldwin */ 498e38aeffSJohn Baldwin 508e38aeffSJohn Baldwin #include <sys/cdefs.h> 5112bc222eSJonathan Anderson #include "opt_capsicum.h" 52551a7895SRui Paulo #include "opt_ktrace.h" 5312bc222eSJonathan Anderson 548e38aeffSJohn Baldwin #include <sys/param.h> 554a144410SRobert Watson #include <sys/capsicum.h> 56610a2b3cSJohn Baldwin #include <sys/conf.h> 578e38aeffSJohn Baldwin #include <sys/fcntl.h> 588e38aeffSJohn Baldwin #include <sys/file.h> 598e38aeffSJohn Baldwin #include <sys/filedesc.h> 602b64ab22SMark Johnston #include <sys/filio.h> 618e38aeffSJohn Baldwin #include <sys/fnv_hash.h> 628e38aeffSJohn Baldwin #include <sys/kernel.h> 6391898857SMark Johnston #include <sys/limits.h> 64551a7895SRui Paulo #include <sys/uio.h> 65551a7895SRui Paulo #include <sys/signal.h> 66cc7b259aSJamie Gritton #include <sys/jail.h> 67551a7895SRui Paulo #include <sys/ktrace.h> 688e38aeffSJohn Baldwin #include <sys/lock.h> 698e38aeffSJohn Baldwin #include <sys/malloc.h> 708e38aeffSJohn Baldwin #include <sys/mman.h> 718e38aeffSJohn Baldwin #include <sys/mutex.h> 729c00bb91SKonstantin Belousov #include <sys/priv.h> 738e38aeffSJohn Baldwin #include <sys/proc.h> 748e38aeffSJohn Baldwin #include <sys/refcount.h> 758e38aeffSJohn Baldwin #include <sys/resourcevar.h> 7689f6b863SAttilio Rao #include <sys/rwlock.h> 7756d0e33eSKonstantin Belousov #include <sys/sbuf.h> 788e38aeffSJohn Baldwin #include <sys/stat.h> 797ee1b208SEd Schouten #include <sys/syscallsubr.h> 808e38aeffSJohn Baldwin #include <sys/sysctl.h> 818e38aeffSJohn Baldwin #include <sys/sysproto.h> 828e38aeffSJohn Baldwin #include <sys/systm.h> 838e38aeffSJohn Baldwin #include <sys/sx.h> 848e38aeffSJohn Baldwin #include <sys/time.h> 85d301b358SKonstantin Belousov #include <sys/vmmeter.h> 868e38aeffSJohn Baldwin #include <sys/vnode.h> 87940cb0e2SKonstantin Belousov #include <sys/unistd.h> 889696feebSJohn Baldwin #include <sys/user.h> 898e38aeffSJohn Baldwin 9015bcf785SRobert Watson #include <security/audit/audit.h> 918e38aeffSJohn Baldwin #include <security/mac/mac_framework.h> 928e38aeffSJohn Baldwin 938e38aeffSJohn Baldwin #include <vm/vm.h> 948e38aeffSJohn Baldwin #include <vm/vm_param.h> 958e38aeffSJohn Baldwin #include <vm/pmap.h> 96338e7cf2SJohn Baldwin #include <vm/vm_extern.h> 978e38aeffSJohn Baldwin #include <vm/vm_map.h> 98fb680e16SJohn Baldwin #include <vm/vm_kern.h> 998e38aeffSJohn Baldwin #include <vm/vm_object.h> 1008e38aeffSJohn Baldwin #include <vm/vm_page.h> 1012971897dSAlan Cox #include <vm/vm_pageout.h> 1028e38aeffSJohn Baldwin #include <vm/vm_pager.h> 1038e38aeffSJohn Baldwin #include <vm/swap_pager.h> 1048e38aeffSJohn Baldwin 1058e38aeffSJohn Baldwin struct shm_mapping { 1068e38aeffSJohn Baldwin char *sm_path; 1078e38aeffSJohn Baldwin Fnv32_t sm_fnv; 1088e38aeffSJohn Baldwin struct shmfd *sm_shmfd; 1098e38aeffSJohn Baldwin LIST_ENTRY(shm_mapping) sm_link; 1108e38aeffSJohn Baldwin }; 1118e38aeffSJohn Baldwin 1128e38aeffSJohn Baldwin static MALLOC_DEFINE(M_SHMFD, "shmfd", "shared memory file descriptor"); 1138e38aeffSJohn Baldwin static LIST_HEAD(, shm_mapping) *shm_dictionary; 1148e38aeffSJohn Baldwin static struct sx shm_dict_lock; 1158e38aeffSJohn Baldwin static struct mtx shm_timestamp_lock; 1168e38aeffSJohn Baldwin static u_long shm_hash; 1177883ce1fSMateusz Guzik static struct unrhdr64 shm_ino_unr; 118610a2b3cSJohn Baldwin static dev_t shm_dev_ino; 1198e38aeffSJohn Baldwin 1208e38aeffSJohn Baldwin #define SHM_HASH(fnv) (&shm_dictionary[(fnv) & shm_hash]) 1218e38aeffSJohn Baldwin 1225be725d7SAndreas Tobler static void shm_init(void *arg); 1238e38aeffSJohn Baldwin static void shm_insert(char *path, Fnv32_t fnv, struct shmfd *shmfd); 1248e38aeffSJohn Baldwin static struct shmfd *shm_lookup(char *path, Fnv32_t fnv); 1258e38aeffSJohn Baldwin static int shm_remove(char *path, Fnv32_t fnv, struct ucred *ucred); 1267060da62SJamie Gritton static void shm_doremove(struct shm_mapping *map); 127d301b358SKonstantin Belousov static int shm_dotruncate_cookie(struct shmfd *shmfd, off_t length, 128d301b358SKonstantin Belousov void *rl_cookie); 129af755d3eSKyle Evans static int shm_dotruncate_locked(struct shmfd *shmfd, off_t length, 130af755d3eSKyle Evans void *rl_cookie); 1312d5603feSDavid Bright static int shm_copyin_path(struct thread *td, const char *userpath_in, 1322d5603feSDavid Bright char **path_out); 133454bc887SKa Ho Ng static int shm_deallocate(struct shmfd *shmfd, off_t *offset, 134454bc887SKa Ho Ng off_t *length, int flags); 1358e38aeffSJohn Baldwin 1368e38aeffSJohn Baldwin static fo_rdwr_t shm_read; 1378e38aeffSJohn Baldwin static fo_rdwr_t shm_write; 1388e38aeffSJohn Baldwin static fo_truncate_t shm_truncate; 1392b64ab22SMark Johnston static fo_ioctl_t shm_ioctl; 1408e38aeffSJohn Baldwin static fo_stat_t shm_stat; 1418e38aeffSJohn Baldwin static fo_close_t shm_close; 1429c00bb91SKonstantin Belousov static fo_chmod_t shm_chmod; 1439c00bb91SKonstantin Belousov static fo_chown_t shm_chown; 144940cb0e2SKonstantin Belousov static fo_seek_t shm_seek; 1459696feebSJohn Baldwin static fo_fill_kinfo_t shm_fill_kinfo; 1467077c426SJohn Baldwin static fo_mmap_t shm_mmap; 147af755d3eSKyle Evans static fo_get_seals_t shm_get_seals; 148af755d3eSKyle Evans static fo_add_seals_t shm_add_seals; 149f1040532SKyle Evans static fo_fallocate_t shm_fallocate; 150454bc887SKa Ho Ng static fo_fspacectl_t shm_fspacectl; 1518e38aeffSJohn Baldwin 1528e38aeffSJohn Baldwin /* File descriptor operations. */ 1531bdbd705SKonstantin Belousov struct fileops shm_ops = { 1548e38aeffSJohn Baldwin .fo_read = shm_read, 1558e38aeffSJohn Baldwin .fo_write = shm_write, 1568e38aeffSJohn Baldwin .fo_truncate = shm_truncate, 1572b64ab22SMark Johnston .fo_ioctl = shm_ioctl, 1582d69d0dcSJohn Baldwin .fo_poll = invfo_poll, 1592d69d0dcSJohn Baldwin .fo_kqfilter = invfo_kqfilter, 1608e38aeffSJohn Baldwin .fo_stat = shm_stat, 1618e38aeffSJohn Baldwin .fo_close = shm_close, 1629c00bb91SKonstantin Belousov .fo_chmod = shm_chmod, 1639c00bb91SKonstantin Belousov .fo_chown = shm_chown, 164227aaa86SKonstantin Belousov .fo_sendfile = vn_sendfile, 165940cb0e2SKonstantin Belousov .fo_seek = shm_seek, 1669696feebSJohn Baldwin .fo_fill_kinfo = shm_fill_kinfo, 1677077c426SJohn Baldwin .fo_mmap = shm_mmap, 168af755d3eSKyle Evans .fo_get_seals = shm_get_seals, 169af755d3eSKyle Evans .fo_add_seals = shm_add_seals, 170f1040532SKyle Evans .fo_fallocate = shm_fallocate, 171454bc887SKa Ho Ng .fo_fspacectl = shm_fspacectl, 172f28526e9SKonstantin Belousov .fo_cmp = file_kcmp_generic, 173d301b358SKonstantin Belousov .fo_flags = DFLAG_PASSABLE | DFLAG_SEEKABLE, 1748e38aeffSJohn Baldwin }; 1758e38aeffSJohn Baldwin 1768e38aeffSJohn Baldwin FEATURE(posix_shm, "POSIX shared memory"); 1778e38aeffSJohn Baldwin 178d301b358SKonstantin Belousov static SYSCTL_NODE(_vm, OID_AUTO, largepages, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 179d301b358SKonstantin Belousov ""); 180d301b358SKonstantin Belousov 181d301b358SKonstantin Belousov static int largepage_reclaim_tries = 1; 182d301b358SKonstantin Belousov SYSCTL_INT(_vm_largepages, OID_AUTO, reclaim_tries, 183d301b358SKonstantin Belousov CTLFLAG_RWTUN, &largepage_reclaim_tries, 0, 184d301b358SKonstantin Belousov "Number of contig reclaims before giving up for default alloc policy"); 185d301b358SKonstantin Belousov 1866df6facfSKonstantin Belousov #define shm_rangelock_unlock(shmfd, cookie) \ 1876df6facfSKonstantin Belousov rangelock_unlock(&(shmfd)->shm_rl, (cookie), &(shmfd)->shm_mtx) 1886df6facfSKonstantin Belousov #define shm_rangelock_rlock(shmfd, start, end) \ 1896df6facfSKonstantin Belousov rangelock_rlock(&(shmfd)->shm_rl, (start), (end), &(shmfd)->shm_mtx) 1906df6facfSKonstantin Belousov #define shm_rangelock_tryrlock(shmfd, start, end) \ 1916df6facfSKonstantin Belousov rangelock_tryrlock(&(shmfd)->shm_rl, (start), (end), &(shmfd)->shm_mtx) 1926df6facfSKonstantin Belousov #define shm_rangelock_wlock(shmfd, start, end) \ 1936df6facfSKonstantin Belousov rangelock_wlock(&(shmfd)->shm_rl, (start), (end), &(shmfd)->shm_mtx) 1946df6facfSKonstantin Belousov 1958e38aeffSJohn Baldwin static int 19641cf41fdSKonstantin Belousov uiomove_object_page(vm_object_t obj, size_t len, struct uio *uio) 19741cf41fdSKonstantin Belousov { 19841cf41fdSKonstantin Belousov vm_page_t m; 19941cf41fdSKonstantin Belousov vm_pindex_t idx; 20041cf41fdSKonstantin Belousov size_t tlen; 20141cf41fdSKonstantin Belousov int error, offset, rv; 20241cf41fdSKonstantin Belousov 20341cf41fdSKonstantin Belousov idx = OFF_TO_IDX(uio->uio_offset); 20441cf41fdSKonstantin Belousov offset = uio->uio_offset & PAGE_MASK; 20541cf41fdSKonstantin Belousov tlen = MIN(PAGE_SIZE - offset, len); 20641cf41fdSKonstantin Belousov 207f72eaaebSJeff Roberson rv = vm_page_grab_valid_unlocked(&m, obj, idx, 208f72eaaebSJeff Roberson VM_ALLOC_SBUSY | VM_ALLOC_IGN_SBUSY | VM_ALLOC_NOCREAT); 209f72eaaebSJeff Roberson if (rv == VM_PAGER_OK) 210f72eaaebSJeff Roberson goto found; 21141cf41fdSKonstantin Belousov 21241cf41fdSKonstantin Belousov /* 2136311d7aaSWill Andrews * Read I/O without either a corresponding resident page or swap 2146311d7aaSWill Andrews * page: use zero_region. This is intended to avoid instantiating 2156311d7aaSWill Andrews * pages on read from a sparse region. 2166311d7aaSWill Andrews */ 217f72eaaebSJeff Roberson VM_OBJECT_WLOCK(obj); 218f72eaaebSJeff Roberson m = vm_page_lookup(obj, idx); 219f72eaaebSJeff Roberson if (uio->uio_rw == UIO_READ && m == NULL && 2206311d7aaSWill Andrews !vm_pager_has_page(obj, idx, NULL, NULL)) { 2216311d7aaSWill Andrews VM_OBJECT_WUNLOCK(obj); 222b9062c93SKonstantin Belousov return (uiomove(__DECONST(void *, zero_region), tlen, uio)); 2236311d7aaSWill Andrews } 2246311d7aaSWill Andrews 2256311d7aaSWill Andrews /* 22641cf41fdSKonstantin Belousov * Although the tmpfs vnode lock is held here, it is 22741cf41fdSKonstantin Belousov * nonetheless safe to sleep waiting for a free page. The 22841cf41fdSKonstantin Belousov * pageout daemon does not need to acquire the tmpfs vnode 22941cf41fdSKonstantin Belousov * lock to page out tobj's pages because tobj is a OBJT_SWAP 23041cf41fdSKonstantin Belousov * type object. 23141cf41fdSKonstantin Belousov */ 232c7575748SJeff Roberson rv = vm_page_grab_valid(&m, obj, idx, 233a8081778SJeff Roberson VM_ALLOC_NORMAL | VM_ALLOC_SBUSY | VM_ALLOC_IGN_SBUSY); 23441cf41fdSKonstantin Belousov if (rv != VM_PAGER_OK) { 23541cf41fdSKonstantin Belousov VM_OBJECT_WUNLOCK(obj); 2367ec4b29bSKonstantin Belousov if (bootverbose) { 2377ec4b29bSKonstantin Belousov printf("uiomove_object: vm_obj %p idx %jd " 2387ec4b29bSKonstantin Belousov "pager error %d\n", obj, idx, rv); 2397ec4b29bSKonstantin Belousov } 24037aea264SKonstantin Belousov return (rv == VM_PAGER_AGAIN ? ENOSPC : EIO); 24141cf41fdSKonstantin Belousov } 24241cf41fdSKonstantin Belousov VM_OBJECT_WUNLOCK(obj); 243f72eaaebSJeff Roberson 244f72eaaebSJeff Roberson found: 24541cf41fdSKonstantin Belousov error = uiomove_fromphys(&m, offset, tlen, uio); 246a8081778SJeff Roberson if (uio->uio_rw == UIO_WRITE && error == 0) 247a8081778SJeff Roberson vm_page_set_dirty(m); 248d29f674fSJeff Roberson vm_page_activate(m); 249a8081778SJeff Roberson vm_page_sunbusy(m); 25041cf41fdSKonstantin Belousov 25141cf41fdSKonstantin Belousov return (error); 25241cf41fdSKonstantin Belousov } 25341cf41fdSKonstantin Belousov 25441cf41fdSKonstantin Belousov int 25541cf41fdSKonstantin Belousov uiomove_object(vm_object_t obj, off_t obj_size, struct uio *uio) 25641cf41fdSKonstantin Belousov { 25741cf41fdSKonstantin Belousov ssize_t resid; 25841cf41fdSKonstantin Belousov size_t len; 25941cf41fdSKonstantin Belousov int error; 26041cf41fdSKonstantin Belousov 26141cf41fdSKonstantin Belousov error = 0; 26241cf41fdSKonstantin Belousov while ((resid = uio->uio_resid) > 0) { 26341cf41fdSKonstantin Belousov if (obj_size <= uio->uio_offset) 26441cf41fdSKonstantin Belousov break; 26541cf41fdSKonstantin Belousov len = MIN(obj_size - uio->uio_offset, resid); 26641cf41fdSKonstantin Belousov if (len == 0) 26741cf41fdSKonstantin Belousov break; 26841cf41fdSKonstantin Belousov error = uiomove_object_page(obj, len, uio); 26941cf41fdSKonstantin Belousov if (error != 0 || resid == uio->uio_resid) 27041cf41fdSKonstantin Belousov break; 27141cf41fdSKonstantin Belousov } 27241cf41fdSKonstantin Belousov return (error); 27341cf41fdSKonstantin Belousov } 27441cf41fdSKonstantin Belousov 275d301b358SKonstantin Belousov static u_long count_largepages[MAXPAGESIZES]; 276d301b358SKonstantin Belousov 277d301b358SKonstantin Belousov static int 278d301b358SKonstantin Belousov shm_largepage_phys_populate(vm_object_t object, vm_pindex_t pidx, 279d301b358SKonstantin Belousov int fault_type, vm_prot_t max_prot, vm_pindex_t *first, vm_pindex_t *last) 280d301b358SKonstantin Belousov { 2813b5331ddSKonstantin Belousov vm_page_t m __diagused; 282d301b358SKonstantin Belousov int psind; 283d301b358SKonstantin Belousov 284d301b358SKonstantin Belousov psind = object->un_pager.phys.data_val; 285d301b358SKonstantin Belousov if (psind == 0 || pidx >= object->size) 286d301b358SKonstantin Belousov return (VM_PAGER_FAIL); 287d301b358SKonstantin Belousov *first = rounddown2(pidx, pagesizes[psind] / PAGE_SIZE); 288d301b358SKonstantin Belousov 289d301b358SKonstantin Belousov /* 290d301b358SKonstantin Belousov * We only busy the first page in the superpage run. It is 291d301b358SKonstantin Belousov * useless to busy whole run since we only remove full 292d301b358SKonstantin Belousov * superpage, and it takes too long to busy e.g. 512 * 512 == 293d301b358SKonstantin Belousov * 262144 pages constituing 1G amd64 superage. 294d301b358SKonstantin Belousov */ 295d301b358SKonstantin Belousov m = vm_page_grab(object, *first, VM_ALLOC_NORMAL | VM_ALLOC_NOCREAT); 296d301b358SKonstantin Belousov MPASS(m != NULL); 297d301b358SKonstantin Belousov 298d301b358SKonstantin Belousov *last = *first + atop(pagesizes[psind]) - 1; 299d301b358SKonstantin Belousov return (VM_PAGER_OK); 300d301b358SKonstantin Belousov } 301d301b358SKonstantin Belousov 302d301b358SKonstantin Belousov static boolean_t 303d301b358SKonstantin Belousov shm_largepage_phys_haspage(vm_object_t object, vm_pindex_t pindex, 304d301b358SKonstantin Belousov int *before, int *after) 305d301b358SKonstantin Belousov { 306d301b358SKonstantin Belousov int psind; 307d301b358SKonstantin Belousov 308d301b358SKonstantin Belousov psind = object->un_pager.phys.data_val; 309d301b358SKonstantin Belousov if (psind == 0 || pindex >= object->size) 310d301b358SKonstantin Belousov return (FALSE); 311d301b358SKonstantin Belousov if (before != NULL) { 312d301b358SKonstantin Belousov *before = pindex - rounddown2(pindex, pagesizes[psind] / 313d301b358SKonstantin Belousov PAGE_SIZE); 314d301b358SKonstantin Belousov } 315d301b358SKonstantin Belousov if (after != NULL) { 316d301b358SKonstantin Belousov *after = roundup2(pindex, pagesizes[psind] / PAGE_SIZE) - 317d301b358SKonstantin Belousov pindex; 318d301b358SKonstantin Belousov } 319d301b358SKonstantin Belousov return (TRUE); 320d301b358SKonstantin Belousov } 321d301b358SKonstantin Belousov 322d301b358SKonstantin Belousov static void 323d301b358SKonstantin Belousov shm_largepage_phys_ctor(vm_object_t object, vm_prot_t prot, 324d301b358SKonstantin Belousov vm_ooffset_t foff, struct ucred *cred) 325d301b358SKonstantin Belousov { 326d301b358SKonstantin Belousov } 327d301b358SKonstantin Belousov 328d301b358SKonstantin Belousov static void 329d301b358SKonstantin Belousov shm_largepage_phys_dtor(vm_object_t object) 330d301b358SKonstantin Belousov { 331d301b358SKonstantin Belousov int psind; 332d301b358SKonstantin Belousov 333d301b358SKonstantin Belousov psind = object->un_pager.phys.data_val; 334d301b358SKonstantin Belousov if (psind != 0) { 335d301b358SKonstantin Belousov atomic_subtract_long(&count_largepages[psind], 336d301b358SKonstantin Belousov object->size / (pagesizes[psind] / PAGE_SIZE)); 337d301b358SKonstantin Belousov vm_wire_sub(object->size); 338d301b358SKonstantin Belousov } else { 339d301b358SKonstantin Belousov KASSERT(object->size == 0, 340d301b358SKonstantin Belousov ("largepage phys obj %p not initialized bit size %#jx > 0", 341d301b358SKonstantin Belousov object, (uintmax_t)object->size)); 342d301b358SKonstantin Belousov } 343d301b358SKonstantin Belousov } 344d301b358SKonstantin Belousov 345d474440aSKonstantin Belousov static const struct phys_pager_ops shm_largepage_phys_ops = { 346d301b358SKonstantin Belousov .phys_pg_populate = shm_largepage_phys_populate, 347d301b358SKonstantin Belousov .phys_pg_haspage = shm_largepage_phys_haspage, 348d301b358SKonstantin Belousov .phys_pg_ctor = shm_largepage_phys_ctor, 349d301b358SKonstantin Belousov .phys_pg_dtor = shm_largepage_phys_dtor, 350d301b358SKonstantin Belousov }; 351d301b358SKonstantin Belousov 352d301b358SKonstantin Belousov bool 353d301b358SKonstantin Belousov shm_largepage(struct shmfd *shmfd) 354d301b358SKonstantin Belousov { 355d301b358SKonstantin Belousov return (shmfd->shm_object->type == OBJT_PHYS); 356d301b358SKonstantin Belousov } 357d301b358SKonstantin Belousov 3580919f29dSKonstantin Belousov static void 3590919f29dSKonstantin Belousov shm_pager_freespace(vm_object_t obj, vm_pindex_t start, vm_size_t size) 3600919f29dSKonstantin Belousov { 3610919f29dSKonstantin Belousov struct shmfd *shm; 3620919f29dSKonstantin Belousov vm_size_t c; 3630919f29dSKonstantin Belousov 3640919f29dSKonstantin Belousov swap_pager_freespace(obj, start, size, &c); 3650919f29dSKonstantin Belousov if (c == 0) 3660919f29dSKonstantin Belousov return; 3670919f29dSKonstantin Belousov 3680919f29dSKonstantin Belousov shm = obj->un_pager.swp.swp_priv; 3690919f29dSKonstantin Belousov if (shm == NULL) 3700919f29dSKonstantin Belousov return; 3710919f29dSKonstantin Belousov KASSERT(shm->shm_pages >= c, 3720919f29dSKonstantin Belousov ("shm %p pages %jd free %jd", shm, 3730919f29dSKonstantin Belousov (uintmax_t)shm->shm_pages, (uintmax_t)c)); 3740919f29dSKonstantin Belousov shm->shm_pages -= c; 3750919f29dSKonstantin Belousov } 3760919f29dSKonstantin Belousov 3770919f29dSKonstantin Belousov static void 3780919f29dSKonstantin Belousov shm_page_inserted(vm_object_t obj, vm_page_t m) 3790919f29dSKonstantin Belousov { 3800919f29dSKonstantin Belousov struct shmfd *shm; 3810919f29dSKonstantin Belousov 3820919f29dSKonstantin Belousov shm = obj->un_pager.swp.swp_priv; 3830919f29dSKonstantin Belousov if (shm == NULL) 3840919f29dSKonstantin Belousov return; 3850919f29dSKonstantin Belousov if (!vm_pager_has_page(obj, m->pindex, NULL, NULL)) 3860919f29dSKonstantin Belousov shm->shm_pages += 1; 3870919f29dSKonstantin Belousov } 3880919f29dSKonstantin Belousov 3890919f29dSKonstantin Belousov static void 3900919f29dSKonstantin Belousov shm_page_removed(vm_object_t obj, vm_page_t m) 3910919f29dSKonstantin Belousov { 3920919f29dSKonstantin Belousov struct shmfd *shm; 3930919f29dSKonstantin Belousov 3940919f29dSKonstantin Belousov shm = obj->un_pager.swp.swp_priv; 3950919f29dSKonstantin Belousov if (shm == NULL) 3960919f29dSKonstantin Belousov return; 3970919f29dSKonstantin Belousov if (!vm_pager_has_page(obj, m->pindex, NULL, NULL)) { 3980919f29dSKonstantin Belousov KASSERT(shm->shm_pages >= 1, 3990919f29dSKonstantin Belousov ("shm %p pages %jd free 1", shm, 4000919f29dSKonstantin Belousov (uintmax_t)shm->shm_pages)); 4010919f29dSKonstantin Belousov shm->shm_pages -= 1; 4020919f29dSKonstantin Belousov } 4030919f29dSKonstantin Belousov } 4040919f29dSKonstantin Belousov 4050919f29dSKonstantin Belousov static struct pagerops shm_swap_pager_ops = { 4060919f29dSKonstantin Belousov .pgo_kvme_type = KVME_TYPE_SWAP, 4070919f29dSKonstantin Belousov .pgo_freespace = shm_pager_freespace, 4080919f29dSKonstantin Belousov .pgo_page_inserted = shm_page_inserted, 4090919f29dSKonstantin Belousov .pgo_page_removed = shm_page_removed, 4100919f29dSKonstantin Belousov }; 4110919f29dSKonstantin Belousov static int shmfd_pager_type = -1; 4120919f29dSKonstantin Belousov 41341cf41fdSKonstantin Belousov static int 414940cb0e2SKonstantin Belousov shm_seek(struct file *fp, off_t offset, int whence, struct thread *td) 415940cb0e2SKonstantin Belousov { 416940cb0e2SKonstantin Belousov struct shmfd *shmfd; 417940cb0e2SKonstantin Belousov off_t foffset; 418940cb0e2SKonstantin Belousov int error; 419940cb0e2SKonstantin Belousov 420940cb0e2SKonstantin Belousov shmfd = fp->f_data; 421940cb0e2SKonstantin Belousov foffset = foffset_lock(fp, 0); 422940cb0e2SKonstantin Belousov error = 0; 423940cb0e2SKonstantin Belousov switch (whence) { 424940cb0e2SKonstantin Belousov case L_INCR: 425940cb0e2SKonstantin Belousov if (foffset < 0 || 426940cb0e2SKonstantin Belousov (offset > 0 && foffset > OFF_MAX - offset)) { 427940cb0e2SKonstantin Belousov error = EOVERFLOW; 428940cb0e2SKonstantin Belousov break; 429940cb0e2SKonstantin Belousov } 430940cb0e2SKonstantin Belousov offset += foffset; 431940cb0e2SKonstantin Belousov break; 432940cb0e2SKonstantin Belousov case L_XTND: 433940cb0e2SKonstantin Belousov if (offset > 0 && shmfd->shm_size > OFF_MAX - offset) { 434940cb0e2SKonstantin Belousov error = EOVERFLOW; 435940cb0e2SKonstantin Belousov break; 436940cb0e2SKonstantin Belousov } 437940cb0e2SKonstantin Belousov offset += shmfd->shm_size; 438940cb0e2SKonstantin Belousov break; 439940cb0e2SKonstantin Belousov case L_SET: 440940cb0e2SKonstantin Belousov break; 441940cb0e2SKonstantin Belousov default: 442940cb0e2SKonstantin Belousov error = EINVAL; 443940cb0e2SKonstantin Belousov } 444940cb0e2SKonstantin Belousov if (error == 0) { 445940cb0e2SKonstantin Belousov if (offset < 0 || offset > shmfd->shm_size) 446940cb0e2SKonstantin Belousov error = EINVAL; 447940cb0e2SKonstantin Belousov else 4486f2b769cSJohn-Mark Gurney td->td_uretoff.tdu_off = offset; 449940cb0e2SKonstantin Belousov } 450940cb0e2SKonstantin Belousov foffset_unlock(fp, offset, error != 0 ? FOF_NOUPDATE : 0); 451940cb0e2SKonstantin Belousov return (error); 452940cb0e2SKonstantin Belousov } 453940cb0e2SKonstantin Belousov 454940cb0e2SKonstantin Belousov static int 4558e38aeffSJohn Baldwin shm_read(struct file *fp, struct uio *uio, struct ucred *active_cred, 4568e38aeffSJohn Baldwin int flags, struct thread *td) 4578e38aeffSJohn Baldwin { 458940cb0e2SKonstantin Belousov struct shmfd *shmfd; 459940cb0e2SKonstantin Belousov void *rl_cookie; 460940cb0e2SKonstantin Belousov int error; 4618e38aeffSJohn Baldwin 462940cb0e2SKonstantin Belousov shmfd = fp->f_data; 463940cb0e2SKonstantin Belousov #ifdef MAC 464940cb0e2SKonstantin Belousov error = mac_posixshm_check_read(active_cred, fp->f_cred, shmfd); 465940cb0e2SKonstantin Belousov if (error) 466940cb0e2SKonstantin Belousov return (error); 467940cb0e2SKonstantin Belousov #endif 4686ea906eeSJilles Tjoelker foffset_lock_uio(fp, uio, flags); 4696df6facfSKonstantin Belousov rl_cookie = shm_rangelock_rlock(shmfd, uio->uio_offset, 4706df6facfSKonstantin Belousov uio->uio_offset + uio->uio_resid); 471940cb0e2SKonstantin Belousov error = uiomove_object(shmfd->shm_object, shmfd->shm_size, uio); 4726df6facfSKonstantin Belousov shm_rangelock_unlock(shmfd, rl_cookie); 473940cb0e2SKonstantin Belousov foffset_unlock_uio(fp, uio, flags); 474940cb0e2SKonstantin Belousov return (error); 4758e38aeffSJohn Baldwin } 4768e38aeffSJohn Baldwin 4778e38aeffSJohn Baldwin static int 4788e38aeffSJohn Baldwin shm_write(struct file *fp, struct uio *uio, struct ucred *active_cred, 4798e38aeffSJohn Baldwin int flags, struct thread *td) 4808e38aeffSJohn Baldwin { 481940cb0e2SKonstantin Belousov struct shmfd *shmfd; 482940cb0e2SKonstantin Belousov void *rl_cookie; 483940cb0e2SKonstantin Belousov int error; 4843f07b9d9SKyle Evans off_t size; 4858e38aeffSJohn Baldwin 486940cb0e2SKonstantin Belousov shmfd = fp->f_data; 487940cb0e2SKonstantin Belousov #ifdef MAC 488940cb0e2SKonstantin Belousov error = mac_posixshm_check_write(active_cred, fp->f_cred, shmfd); 489940cb0e2SKonstantin Belousov if (error) 490940cb0e2SKonstantin Belousov return (error); 491940cb0e2SKonstantin Belousov #endif 492d301b358SKonstantin Belousov if (shm_largepage(shmfd) && shmfd->shm_lp_psind == 0) 493d301b358SKonstantin Belousov return (EINVAL); 494940cb0e2SKonstantin Belousov foffset_lock_uio(fp, uio, flags); 4953f07b9d9SKyle Evans if (uio->uio_resid > OFF_MAX - uio->uio_offset) { 4963f07b9d9SKyle Evans /* 4973f07b9d9SKyle Evans * Overflow is only an error if we're supposed to expand on 4983f07b9d9SKyle Evans * write. Otherwise, we'll just truncate the write to the 4993f07b9d9SKyle Evans * size of the file, which can only grow up to OFF_MAX. 5003f07b9d9SKyle Evans */ 5013f07b9d9SKyle Evans if ((shmfd->shm_flags & SHM_GROW_ON_WRITE) != 0) { 5023f07b9d9SKyle Evans foffset_unlock_uio(fp, uio, flags); 5033f07b9d9SKyle Evans return (EFBIG); 5043f07b9d9SKyle Evans } 5053f07b9d9SKyle Evans 5063f07b9d9SKyle Evans size = shmfd->shm_size; 5073f07b9d9SKyle Evans } else { 5083f07b9d9SKyle Evans size = uio->uio_offset + uio->uio_resid; 5093f07b9d9SKyle Evans } 5106df6facfSKonstantin Belousov if ((flags & FOF_OFFSET) == 0) 5116df6facfSKonstantin Belousov rl_cookie = shm_rangelock_wlock(shmfd, 0, OFF_MAX); 5126df6facfSKonstantin Belousov else 5136df6facfSKonstantin Belousov rl_cookie = shm_rangelock_wlock(shmfd, uio->uio_offset, size); 5143f07b9d9SKyle Evans if ((shmfd->shm_seals & F_SEAL_WRITE) != 0) { 515af755d3eSKyle Evans error = EPERM; 5163f07b9d9SKyle Evans } else { 5173f07b9d9SKyle Evans error = 0; 5183f07b9d9SKyle Evans if ((shmfd->shm_flags & SHM_GROW_ON_WRITE) != 0 && 5193f07b9d9SKyle Evans size > shmfd->shm_size) { 52079783634SKonstantin Belousov error = shm_dotruncate_cookie(shmfd, size, rl_cookie); 5213f07b9d9SKyle Evans } 5223f07b9d9SKyle Evans if (error == 0) 5233f07b9d9SKyle Evans error = uiomove_object(shmfd->shm_object, 5243f07b9d9SKyle Evans shmfd->shm_size, uio); 5253f07b9d9SKyle Evans } 5266df6facfSKonstantin Belousov shm_rangelock_unlock(shmfd, rl_cookie); 527940cb0e2SKonstantin Belousov foffset_unlock_uio(fp, uio, flags); 528940cb0e2SKonstantin Belousov return (error); 5298e38aeffSJohn Baldwin } 5308e38aeffSJohn Baldwin 5318e38aeffSJohn Baldwin static int 5328e38aeffSJohn Baldwin shm_truncate(struct file *fp, off_t length, struct ucred *active_cred, 5338e38aeffSJohn Baldwin struct thread *td) 5348e38aeffSJohn Baldwin { 5358e38aeffSJohn Baldwin struct shmfd *shmfd; 5368e38aeffSJohn Baldwin #ifdef MAC 5378e38aeffSJohn Baldwin int error; 5388e38aeffSJohn Baldwin #endif 5398e38aeffSJohn Baldwin 5408e38aeffSJohn Baldwin shmfd = fp->f_data; 5418e38aeffSJohn Baldwin #ifdef MAC 5428e38aeffSJohn Baldwin error = mac_posixshm_check_truncate(active_cred, fp->f_cred, shmfd); 5438e38aeffSJohn Baldwin if (error) 5448e38aeffSJohn Baldwin return (error); 5458e38aeffSJohn Baldwin #endif 5463364c323SKonstantin Belousov return (shm_dotruncate(shmfd, length)); 5478e38aeffSJohn Baldwin } 5488e38aeffSJohn Baldwin 5492b64ab22SMark Johnston int 5502b64ab22SMark Johnston shm_ioctl(struct file *fp, u_long com, void *data, struct ucred *active_cred, 5512b64ab22SMark Johnston struct thread *td) 5522b64ab22SMark Johnston { 553d301b358SKonstantin Belousov struct shmfd *shmfd; 554d301b358SKonstantin Belousov struct shm_largepage_conf *conf; 555d301b358SKonstantin Belousov void *rl_cookie; 5562b64ab22SMark Johnston 557d301b358SKonstantin Belousov shmfd = fp->f_data; 5582b64ab22SMark Johnston switch (com) { 5592b64ab22SMark Johnston case FIONBIO: 5602b64ab22SMark Johnston case FIOASYNC: 5612b64ab22SMark Johnston /* 5622b64ab22SMark Johnston * Allow fcntl(fd, F_SETFL, O_NONBLOCK) to work, 5632b64ab22SMark Johnston * just like it would on an unlinked regular file 5642b64ab22SMark Johnston */ 5652b64ab22SMark Johnston return (0); 566d301b358SKonstantin Belousov case FIOSSHMLPGCNF: 567d301b358SKonstantin Belousov if (!shm_largepage(shmfd)) 568d301b358SKonstantin Belousov return (ENOTTY); 569d301b358SKonstantin Belousov conf = data; 570d301b358SKonstantin Belousov if (shmfd->shm_lp_psind != 0 && 571d301b358SKonstantin Belousov conf->psind != shmfd->shm_lp_psind) 572d301b358SKonstantin Belousov return (EINVAL); 573d301b358SKonstantin Belousov if (conf->psind <= 0 || conf->psind >= MAXPAGESIZES || 574d301b358SKonstantin Belousov pagesizes[conf->psind] == 0) 575d301b358SKonstantin Belousov return (EINVAL); 576d301b358SKonstantin Belousov if (conf->alloc_policy != SHM_LARGEPAGE_ALLOC_DEFAULT && 577d301b358SKonstantin Belousov conf->alloc_policy != SHM_LARGEPAGE_ALLOC_NOWAIT && 578d301b358SKonstantin Belousov conf->alloc_policy != SHM_LARGEPAGE_ALLOC_HARD) 579d301b358SKonstantin Belousov return (EINVAL); 580d301b358SKonstantin Belousov 5816df6facfSKonstantin Belousov rl_cookie = shm_rangelock_wlock(shmfd, 0, OFF_MAX); 582d301b358SKonstantin Belousov shmfd->shm_lp_psind = conf->psind; 583d301b358SKonstantin Belousov shmfd->shm_lp_alloc_policy = conf->alloc_policy; 584d301b358SKonstantin Belousov shmfd->shm_object->un_pager.phys.data_val = conf->psind; 5856df6facfSKonstantin Belousov shm_rangelock_unlock(shmfd, rl_cookie); 586d301b358SKonstantin Belousov return (0); 587d301b358SKonstantin Belousov case FIOGSHMLPGCNF: 588d301b358SKonstantin Belousov if (!shm_largepage(shmfd)) 589d301b358SKonstantin Belousov return (ENOTTY); 590d301b358SKonstantin Belousov conf = data; 5916df6facfSKonstantin Belousov rl_cookie = shm_rangelock_rlock(shmfd, 0, OFF_MAX); 592d301b358SKonstantin Belousov conf->psind = shmfd->shm_lp_psind; 593d301b358SKonstantin Belousov conf->alloc_policy = shmfd->shm_lp_alloc_policy; 5946df6facfSKonstantin Belousov shm_rangelock_unlock(shmfd, rl_cookie); 595d301b358SKonstantin Belousov return (0); 5962b64ab22SMark Johnston default: 5972b64ab22SMark Johnston return (ENOTTY); 5982b64ab22SMark Johnston } 5992b64ab22SMark Johnston } 6002b64ab22SMark Johnston 6018e38aeffSJohn Baldwin static int 6022b68eb8eSMateusz Guzik shm_stat(struct file *fp, struct stat *sb, struct ucred *active_cred) 6038e38aeffSJohn Baldwin { 6048e38aeffSJohn Baldwin struct shmfd *shmfd; 6058e38aeffSJohn Baldwin #ifdef MAC 6068e38aeffSJohn Baldwin int error; 6078e38aeffSJohn Baldwin #endif 6088e38aeffSJohn Baldwin 6098e38aeffSJohn Baldwin shmfd = fp->f_data; 6108e38aeffSJohn Baldwin 6118e38aeffSJohn Baldwin #ifdef MAC 6128e38aeffSJohn Baldwin error = mac_posixshm_check_stat(active_cred, fp->f_cred, shmfd); 6138e38aeffSJohn Baldwin if (error) 6148e38aeffSJohn Baldwin return (error); 6158e38aeffSJohn Baldwin #endif 6168e38aeffSJohn Baldwin 6178e38aeffSJohn Baldwin /* 6188e38aeffSJohn Baldwin * Attempt to return sanish values for fstat() on a memory file 6198e38aeffSJohn Baldwin * descriptor. 6208e38aeffSJohn Baldwin */ 6218e38aeffSJohn Baldwin bzero(sb, sizeof(*sb)); 6228e38aeffSJohn Baldwin sb->st_blksize = PAGE_SIZE; 6238e38aeffSJohn Baldwin sb->st_size = shmfd->shm_size; 6249c00bb91SKonstantin Belousov mtx_lock(&shm_timestamp_lock); 625510ea843SEd Schouten sb->st_atim = shmfd->shm_atime; 626510ea843SEd Schouten sb->st_ctim = shmfd->shm_ctime; 627510ea843SEd Schouten sb->st_mtim = shmfd->shm_mtime; 628510ea843SEd Schouten sb->st_birthtim = shmfd->shm_birthtime; 6299c00bb91SKonstantin Belousov sb->st_mode = S_IFREG | shmfd->shm_mode; /* XXX */ 6308e38aeffSJohn Baldwin sb->st_uid = shmfd->shm_uid; 6318e38aeffSJohn Baldwin sb->st_gid = shmfd->shm_gid; 6329c00bb91SKonstantin Belousov mtx_unlock(&shm_timestamp_lock); 633610a2b3cSJohn Baldwin sb->st_dev = shm_dev_ino; 634610a2b3cSJohn Baldwin sb->st_ino = shmfd->shm_ino; 635e4b77548SKonstantin Belousov sb->st_nlink = shmfd->shm_object->ref_count; 6360919f29dSKonstantin Belousov if (shm_largepage(shmfd)) { 637d301b358SKonstantin Belousov sb->st_blocks = shmfd->shm_object->size / 638d301b358SKonstantin Belousov (pagesizes[shmfd->shm_lp_psind] >> PAGE_SHIFT); 6390919f29dSKonstantin Belousov } else { 6400919f29dSKonstantin Belousov sb->st_blocks = shmfd->shm_pages; 6410919f29dSKonstantin Belousov } 6428e38aeffSJohn Baldwin 6438e38aeffSJohn Baldwin return (0); 6448e38aeffSJohn Baldwin } 6458e38aeffSJohn Baldwin 6468e38aeffSJohn Baldwin static int 6478e38aeffSJohn Baldwin shm_close(struct file *fp, struct thread *td) 6488e38aeffSJohn Baldwin { 6498e38aeffSJohn Baldwin struct shmfd *shmfd; 6508e38aeffSJohn Baldwin 6518e38aeffSJohn Baldwin shmfd = fp->f_data; 6528e38aeffSJohn Baldwin fp->f_data = NULL; 6538e38aeffSJohn Baldwin shm_drop(shmfd); 6548e38aeffSJohn Baldwin 6558e38aeffSJohn Baldwin return (0); 6568e38aeffSJohn Baldwin } 6578e38aeffSJohn Baldwin 658af755d3eSKyle Evans static int 6592d5603feSDavid Bright shm_copyin_path(struct thread *td, const char *userpath_in, char **path_out) { 6602d5603feSDavid Bright int error; 6612d5603feSDavid Bright char *path; 6622d5603feSDavid Bright const char *pr_path; 6632d5603feSDavid Bright size_t pr_pathlen; 6642d5603feSDavid Bright 6652d5603feSDavid Bright path = malloc(MAXPATHLEN, M_SHMFD, M_WAITOK); 6662d5603feSDavid Bright pr_path = td->td_ucred->cr_prison->pr_path; 6672d5603feSDavid Bright 6682d5603feSDavid Bright /* Construct a full pathname for jailed callers. */ 6692d5603feSDavid Bright pr_pathlen = strcmp(pr_path, "/") == 6702d5603feSDavid Bright 0 ? 0 : strlcpy(path, pr_path, MAXPATHLEN); 6712d5603feSDavid Bright error = copyinstr(userpath_in, path + pr_pathlen, 6722d5603feSDavid Bright MAXPATHLEN - pr_pathlen, NULL); 6732d5603feSDavid Bright if (error != 0) 6742d5603feSDavid Bright goto out; 6752d5603feSDavid Bright 6762d5603feSDavid Bright #ifdef KTRACE 6772d5603feSDavid Bright if (KTRPOINT(curthread, KTR_NAMEI)) 6782d5603feSDavid Bright ktrnamei(path); 6792d5603feSDavid Bright #endif 6802d5603feSDavid Bright 6812d5603feSDavid Bright /* Require paths to start with a '/' character. */ 6822d5603feSDavid Bright if (path[pr_pathlen] != '/') { 6832d5603feSDavid Bright error = EINVAL; 6842d5603feSDavid Bright goto out; 6852d5603feSDavid Bright } 6862d5603feSDavid Bright 6872d5603feSDavid Bright *path_out = path; 6882d5603feSDavid Bright 6892d5603feSDavid Bright out: 6902d5603feSDavid Bright if (error != 0) 6912d5603feSDavid Bright free(path, M_SHMFD); 6922d5603feSDavid Bright 6932d5603feSDavid Bright return (error); 6942d5603feSDavid Bright } 6952d5603feSDavid Bright 6962d5603feSDavid Bright static int 697454bc887SKa Ho Ng shm_partial_page_invalidate(vm_object_t object, vm_pindex_t idx, int base, 698454bc887SKa Ho Ng int end) 699454bc887SKa Ho Ng { 700454bc887SKa Ho Ng vm_page_t m; 701454bc887SKa Ho Ng int rv; 702454bc887SKa Ho Ng 703454bc887SKa Ho Ng VM_OBJECT_ASSERT_WLOCKED(object); 704454bc887SKa Ho Ng KASSERT(base >= 0, ("%s: base %d", __func__, base)); 705454bc887SKa Ho Ng KASSERT(end - base <= PAGE_SIZE, ("%s: base %d end %d", __func__, base, 706454bc887SKa Ho Ng end)); 707454bc887SKa Ho Ng 708454bc887SKa Ho Ng retry: 709454bc887SKa Ho Ng m = vm_page_grab(object, idx, VM_ALLOC_NOCREAT); 710454bc887SKa Ho Ng if (m != NULL) { 711454bc887SKa Ho Ng MPASS(vm_page_all_valid(m)); 712454bc887SKa Ho Ng } else if (vm_pager_has_page(object, idx, NULL, NULL)) { 713454bc887SKa Ho Ng m = vm_page_alloc(object, idx, 714454bc887SKa Ho Ng VM_ALLOC_NORMAL | VM_ALLOC_WAITFAIL); 715454bc887SKa Ho Ng if (m == NULL) 716454bc887SKa Ho Ng goto retry; 717454bc887SKa Ho Ng vm_object_pip_add(object, 1); 718454bc887SKa Ho Ng VM_OBJECT_WUNLOCK(object); 719454bc887SKa Ho Ng rv = vm_pager_get_pages(object, &m, 1, NULL, NULL); 720454bc887SKa Ho Ng VM_OBJECT_WLOCK(object); 721454bc887SKa Ho Ng vm_object_pip_wakeup(object); 722454bc887SKa Ho Ng if (rv == VM_PAGER_OK) { 723454bc887SKa Ho Ng /* 724454bc887SKa Ho Ng * Since the page was not resident, and therefore not 725454bc887SKa Ho Ng * recently accessed, immediately enqueue it for 726454bc887SKa Ho Ng * asynchronous laundering. The current operation is 727454bc887SKa Ho Ng * not regarded as an access. 728454bc887SKa Ho Ng */ 729454bc887SKa Ho Ng vm_page_launder(m); 730454bc887SKa Ho Ng } else { 731454bc887SKa Ho Ng vm_page_free(m); 732454bc887SKa Ho Ng VM_OBJECT_WUNLOCK(object); 733454bc887SKa Ho Ng return (EIO); 734454bc887SKa Ho Ng } 735454bc887SKa Ho Ng } 736454bc887SKa Ho Ng if (m != NULL) { 737454bc887SKa Ho Ng pmap_zero_page_area(m, base, end - base); 738454bc887SKa Ho Ng KASSERT(vm_page_all_valid(m), ("%s: page %p is invalid", 739454bc887SKa Ho Ng __func__, m)); 740454bc887SKa Ho Ng vm_page_set_dirty(m); 741454bc887SKa Ho Ng vm_page_xunbusy(m); 742454bc887SKa Ho Ng } 743454bc887SKa Ho Ng 744454bc887SKa Ho Ng return (0); 745454bc887SKa Ho Ng } 746454bc887SKa Ho Ng 747454bc887SKa Ho Ng static int 748af755d3eSKyle Evans shm_dotruncate_locked(struct shmfd *shmfd, off_t length, void *rl_cookie) 7498e38aeffSJohn Baldwin { 7508e38aeffSJohn Baldwin vm_object_t object; 751454bc887SKa Ho Ng vm_pindex_t nobjsize; 7523364c323SKonstantin Belousov vm_ooffset_t delta; 753454bc887SKa Ho Ng int base, error; 7548e38aeffSJohn Baldwin 7552a016de1SAlan Cox KASSERT(length >= 0, ("shm_dotruncate: length < 0")); 7568e38aeffSJohn Baldwin object = shmfd->shm_object; 757af755d3eSKyle Evans VM_OBJECT_ASSERT_WLOCKED(object); 758af755d3eSKyle Evans rangelock_cookie_assert(rl_cookie, RA_WLOCKED); 759af755d3eSKyle Evans if (length == shmfd->shm_size) 7603364c323SKonstantin Belousov return (0); 7618e38aeffSJohn Baldwin nobjsize = OFF_TO_IDX(length + PAGE_MASK); 7628e38aeffSJohn Baldwin 7638e38aeffSJohn Baldwin /* Are we shrinking? If so, trim the end. */ 7648e38aeffSJohn Baldwin if (length < shmfd->shm_size) { 765af755d3eSKyle Evans if ((shmfd->shm_seals & F_SEAL_SHRINK) != 0) 766af755d3eSKyle Evans return (EPERM); 767af755d3eSKyle Evans 768fb680e16SJohn Baldwin /* 769fb680e16SJohn Baldwin * Disallow any requests to shrink the size if this 770fb680e16SJohn Baldwin * object is mapped into the kernel. 771fb680e16SJohn Baldwin */ 772af755d3eSKyle Evans if (shmfd->shm_kmappings > 0) 773fb680e16SJohn Baldwin return (EBUSY); 7742971897dSAlan Cox 7752971897dSAlan Cox /* 7762971897dSAlan Cox * Zero the truncated part of the last page. 7772971897dSAlan Cox */ 7782971897dSAlan Cox base = length & PAGE_MASK; 7792971897dSAlan Cox if (base != 0) { 780454bc887SKa Ho Ng error = shm_partial_page_invalidate(object, 781454bc887SKa Ho Ng OFF_TO_IDX(length), base, PAGE_SIZE); 782454bc887SKa Ho Ng if (error) 783454bc887SKa Ho Ng return (error); 7842971897dSAlan Cox } 7852a016de1SAlan Cox delta = IDX_TO_OFF(object->size - nobjsize); 7863364c323SKonstantin Belousov 7878e38aeffSJohn Baldwin if (nobjsize < object->size) 7888e38aeffSJohn Baldwin vm_object_page_remove(object, nobjsize, object->size, 7896bbee8e2SAlan Cox 0); 7908e38aeffSJohn Baldwin 7913364c323SKonstantin Belousov /* Free the swap accounted for shm */ 792ef694c1aSEdward Tomasz Napierala swap_release_by_cred(delta, object->cred); 7933364c323SKonstantin Belousov object->charge -= delta; 7943364c323SKonstantin Belousov } else { 795af755d3eSKyle Evans if ((shmfd->shm_seals & F_SEAL_GROW) != 0) 796af755d3eSKyle Evans return (EPERM); 797af755d3eSKyle Evans 7982a016de1SAlan Cox /* Try to reserve additional swap space. */ 7992a016de1SAlan Cox delta = IDX_TO_OFF(nobjsize - object->size); 800af755d3eSKyle Evans if (!swap_reserve_by_cred(delta, object->cred)) 8013364c323SKonstantin Belousov return (ENOMEM); 8023364c323SKonstantin Belousov object->charge += delta; 8038e38aeffSJohn Baldwin } 8048e38aeffSJohn Baldwin shmfd->shm_size = length; 8058e38aeffSJohn Baldwin mtx_lock(&shm_timestamp_lock); 8068e38aeffSJohn Baldwin vfs_timestamp(&shmfd->shm_ctime); 8078e38aeffSJohn Baldwin shmfd->shm_mtime = shmfd->shm_ctime; 8088e38aeffSJohn Baldwin mtx_unlock(&shm_timestamp_lock); 8098e38aeffSJohn Baldwin object->size = nobjsize; 8103364c323SKonstantin Belousov return (0); 8118e38aeffSJohn Baldwin } 8128e38aeffSJohn Baldwin 813d301b358SKonstantin Belousov static int 814d301b358SKonstantin Belousov shm_dotruncate_largepage(struct shmfd *shmfd, off_t length, void *rl_cookie) 815d301b358SKonstantin Belousov { 816d301b358SKonstantin Belousov vm_object_t object; 817d301b358SKonstantin Belousov vm_page_t m; 8183b5331ddSKonstantin Belousov vm_pindex_t newobjsz; 8193b5331ddSKonstantin Belousov vm_pindex_t oldobjsz __unused; 820d301b358SKonstantin Belousov int aflags, error, i, psind, try; 821d301b358SKonstantin Belousov 822d301b358SKonstantin Belousov KASSERT(length >= 0, ("shm_dotruncate: length < 0")); 823d301b358SKonstantin Belousov object = shmfd->shm_object; 824d301b358SKonstantin Belousov VM_OBJECT_ASSERT_WLOCKED(object); 825d301b358SKonstantin Belousov rangelock_cookie_assert(rl_cookie, RA_WLOCKED); 826d301b358SKonstantin Belousov 827d301b358SKonstantin Belousov oldobjsz = object->size; 828d301b358SKonstantin Belousov newobjsz = OFF_TO_IDX(length); 829d301b358SKonstantin Belousov if (length == shmfd->shm_size) 830d301b358SKonstantin Belousov return (0); 831d301b358SKonstantin Belousov psind = shmfd->shm_lp_psind; 832d301b358SKonstantin Belousov if (psind == 0 && length != 0) 833d301b358SKonstantin Belousov return (EINVAL); 834d301b358SKonstantin Belousov if ((length & (pagesizes[psind] - 1)) != 0) 835d301b358SKonstantin Belousov return (EINVAL); 836d301b358SKonstantin Belousov 837d301b358SKonstantin Belousov if (length < shmfd->shm_size) { 838d301b358SKonstantin Belousov if ((shmfd->shm_seals & F_SEAL_SHRINK) != 0) 839d301b358SKonstantin Belousov return (EPERM); 840d301b358SKonstantin Belousov if (shmfd->shm_kmappings > 0) 841d301b358SKonstantin Belousov return (EBUSY); 842d301b358SKonstantin Belousov return (ENOTSUP); /* Pages are unmanaged. */ 843d301b358SKonstantin Belousov #if 0 844d301b358SKonstantin Belousov vm_object_page_remove(object, newobjsz, oldobjsz, 0); 845d301b358SKonstantin Belousov object->size = newobjsz; 846d301b358SKonstantin Belousov shmfd->shm_size = length; 847d301b358SKonstantin Belousov return (0); 848d301b358SKonstantin Belousov #endif 849d301b358SKonstantin Belousov } 850d301b358SKonstantin Belousov 85179783634SKonstantin Belousov if ((shmfd->shm_seals & F_SEAL_GROW) != 0) 85279783634SKonstantin Belousov return (EPERM); 85379783634SKonstantin Belousov 854d301b358SKonstantin Belousov aflags = VM_ALLOC_NORMAL | VM_ALLOC_ZERO; 855d301b358SKonstantin Belousov if (shmfd->shm_lp_alloc_policy == SHM_LARGEPAGE_ALLOC_NOWAIT) 856d301b358SKonstantin Belousov aflags |= VM_ALLOC_WAITFAIL; 857d301b358SKonstantin Belousov try = 0; 858d301b358SKonstantin Belousov 859d301b358SKonstantin Belousov /* 860d301b358SKonstantin Belousov * Extend shmfd and object, keeping all already fully 861d301b358SKonstantin Belousov * allocated large pages intact even on error, because dropped 862d301b358SKonstantin Belousov * object lock might allowed mapping of them. 863d301b358SKonstantin Belousov */ 864d301b358SKonstantin Belousov while (object->size < newobjsz) { 865d301b358SKonstantin Belousov m = vm_page_alloc_contig(object, object->size, aflags, 866d301b358SKonstantin Belousov pagesizes[psind] / PAGE_SIZE, 0, ~0, 867d301b358SKonstantin Belousov pagesizes[psind], 0, 868d301b358SKonstantin Belousov VM_MEMATTR_DEFAULT); 869d301b358SKonstantin Belousov if (m == NULL) { 870d301b358SKonstantin Belousov VM_OBJECT_WUNLOCK(object); 871d301b358SKonstantin Belousov if (shmfd->shm_lp_alloc_policy == 872d301b358SKonstantin Belousov SHM_LARGEPAGE_ALLOC_NOWAIT || 873d301b358SKonstantin Belousov (shmfd->shm_lp_alloc_policy == 874d301b358SKonstantin Belousov SHM_LARGEPAGE_ALLOC_DEFAULT && 875d301b358SKonstantin Belousov try >= largepage_reclaim_tries)) { 876d301b358SKonstantin Belousov VM_OBJECT_WLOCK(object); 877d301b358SKonstantin Belousov return (ENOMEM); 878d301b358SKonstantin Belousov } 879d301b358SKonstantin Belousov error = vm_page_reclaim_contig(aflags, 880d301b358SKonstantin Belousov pagesizes[psind] / PAGE_SIZE, 0, ~0, 8812619c5ccSJason A. Harmening pagesizes[psind], 0); 8822619c5ccSJason A. Harmening if (error == ENOMEM) 8832619c5ccSJason A. Harmening error = vm_wait_intr(object); 884d301b358SKonstantin Belousov if (error != 0) { 885d301b358SKonstantin Belousov VM_OBJECT_WLOCK(object); 886d301b358SKonstantin Belousov return (error); 887d301b358SKonstantin Belousov } 888d301b358SKonstantin Belousov try++; 889d301b358SKonstantin Belousov VM_OBJECT_WLOCK(object); 890d301b358SKonstantin Belousov continue; 891d301b358SKonstantin Belousov } 892d301b358SKonstantin Belousov try = 0; 893d301b358SKonstantin Belousov for (i = 0; i < pagesizes[psind] / PAGE_SIZE; i++) { 894d301b358SKonstantin Belousov if ((m[i].flags & PG_ZERO) == 0) 895d301b358SKonstantin Belousov pmap_zero_page(&m[i]); 896d301b358SKonstantin Belousov vm_page_valid(&m[i]); 897d301b358SKonstantin Belousov vm_page_xunbusy(&m[i]); 898d301b358SKonstantin Belousov } 899d301b358SKonstantin Belousov object->size += OFF_TO_IDX(pagesizes[psind]); 900d301b358SKonstantin Belousov shmfd->shm_size += pagesizes[psind]; 901d301b358SKonstantin Belousov atomic_add_long(&count_largepages[psind], 1); 902d301b358SKonstantin Belousov vm_wire_add(atop(pagesizes[psind])); 903d301b358SKonstantin Belousov } 904d301b358SKonstantin Belousov return (0); 905d301b358SKonstantin Belousov } 906d301b358SKonstantin Belousov 907d301b358SKonstantin Belousov static int 908d301b358SKonstantin Belousov shm_dotruncate_cookie(struct shmfd *shmfd, off_t length, void *rl_cookie) 909d301b358SKonstantin Belousov { 910d301b358SKonstantin Belousov int error; 911d301b358SKonstantin Belousov 912d301b358SKonstantin Belousov VM_OBJECT_WLOCK(shmfd->shm_object); 913d301b358SKonstantin Belousov error = shm_largepage(shmfd) ? shm_dotruncate_largepage(shmfd, 914d301b358SKonstantin Belousov length, rl_cookie) : shm_dotruncate_locked(shmfd, length, 915d301b358SKonstantin Belousov rl_cookie); 916d301b358SKonstantin Belousov VM_OBJECT_WUNLOCK(shmfd->shm_object); 917d301b358SKonstantin Belousov return (error); 918d301b358SKonstantin Belousov } 919d301b358SKonstantin Belousov 920af755d3eSKyle Evans int 921af755d3eSKyle Evans shm_dotruncate(struct shmfd *shmfd, off_t length) 922af755d3eSKyle Evans { 923af755d3eSKyle Evans void *rl_cookie; 924af755d3eSKyle Evans int error; 925af755d3eSKyle Evans 9266df6facfSKonstantin Belousov rl_cookie = shm_rangelock_wlock(shmfd, 0, OFF_MAX); 927d301b358SKonstantin Belousov error = shm_dotruncate_cookie(shmfd, length, rl_cookie); 9286df6facfSKonstantin Belousov shm_rangelock_unlock(shmfd, rl_cookie); 929af755d3eSKyle Evans return (error); 930af755d3eSKyle Evans } 931af755d3eSKyle Evans 9328e38aeffSJohn Baldwin /* 9338e38aeffSJohn Baldwin * shmfd object management including creation and reference counting 9348e38aeffSJohn Baldwin * routines. 9358e38aeffSJohn Baldwin */ 9361bdbd705SKonstantin Belousov struct shmfd * 937d301b358SKonstantin Belousov shm_alloc(struct ucred *ucred, mode_t mode, bool largepage) 9388e38aeffSJohn Baldwin { 9398e38aeffSJohn Baldwin struct shmfd *shmfd; 9400919f29dSKonstantin Belousov vm_object_t obj; 9418e38aeffSJohn Baldwin 9428e38aeffSJohn Baldwin shmfd = malloc(sizeof(*shmfd), M_SHMFD, M_WAITOK | M_ZERO); 9438e38aeffSJohn Baldwin shmfd->shm_size = 0; 9448e38aeffSJohn Baldwin shmfd->shm_uid = ucred->cr_uid; 9458e38aeffSJohn Baldwin shmfd->shm_gid = ucred->cr_gid; 9468e38aeffSJohn Baldwin shmfd->shm_mode = mode; 947d301b358SKonstantin Belousov if (largepage) { 948d301b358SKonstantin Belousov shmfd->shm_object = phys_pager_allocate(NULL, 949d301b358SKonstantin Belousov &shm_largepage_phys_ops, NULL, shmfd->shm_size, 950d301b358SKonstantin Belousov VM_PROT_DEFAULT, 0, ucred); 951d301b358SKonstantin Belousov shmfd->shm_lp_alloc_policy = SHM_LARGEPAGE_ALLOC_DEFAULT; 952d301b358SKonstantin Belousov } else { 9530919f29dSKonstantin Belousov obj = vm_pager_allocate(shmfd_pager_type, NULL, 9543364c323SKonstantin Belousov shmfd->shm_size, VM_PROT_DEFAULT, 0, ucred); 9550919f29dSKonstantin Belousov VM_OBJECT_WLOCK(obj); 9560919f29dSKonstantin Belousov obj->un_pager.swp.swp_priv = shmfd; 9570919f29dSKonstantin Belousov VM_OBJECT_WUNLOCK(obj); 9580919f29dSKonstantin Belousov shmfd->shm_object = obj; 959d301b358SKonstantin Belousov } 9608e38aeffSJohn Baldwin KASSERT(shmfd->shm_object != NULL, ("shm_create: vm_pager_allocate")); 9618e38aeffSJohn Baldwin vfs_timestamp(&shmfd->shm_birthtime); 9628e38aeffSJohn Baldwin shmfd->shm_atime = shmfd->shm_mtime = shmfd->shm_ctime = 9638e38aeffSJohn Baldwin shmfd->shm_birthtime; 9647883ce1fSMateusz Guzik shmfd->shm_ino = alloc_unr64(&shm_ino_unr); 9658e38aeffSJohn Baldwin refcount_init(&shmfd->shm_refs, 1); 966940cb0e2SKonstantin Belousov mtx_init(&shmfd->shm_mtx, "shmrl", NULL, MTX_DEF); 967940cb0e2SKonstantin Belousov rangelock_init(&shmfd->shm_rl); 9688e38aeffSJohn Baldwin #ifdef MAC 9698e38aeffSJohn Baldwin mac_posixshm_init(shmfd); 9708e38aeffSJohn Baldwin mac_posixshm_create(ucred, shmfd); 9718e38aeffSJohn Baldwin #endif 9728e38aeffSJohn Baldwin 9738e38aeffSJohn Baldwin return (shmfd); 9748e38aeffSJohn Baldwin } 9758e38aeffSJohn Baldwin 9761bdbd705SKonstantin Belousov struct shmfd * 9778e38aeffSJohn Baldwin shm_hold(struct shmfd *shmfd) 9788e38aeffSJohn Baldwin { 9798e38aeffSJohn Baldwin 9808e38aeffSJohn Baldwin refcount_acquire(&shmfd->shm_refs); 9818e38aeffSJohn Baldwin return (shmfd); 9828e38aeffSJohn Baldwin } 9838e38aeffSJohn Baldwin 9841bdbd705SKonstantin Belousov void 9858e38aeffSJohn Baldwin shm_drop(struct shmfd *shmfd) 9868e38aeffSJohn Baldwin { 9870919f29dSKonstantin Belousov vm_object_t obj; 9888e38aeffSJohn Baldwin 9898e38aeffSJohn Baldwin if (refcount_release(&shmfd->shm_refs)) { 9908e38aeffSJohn Baldwin #ifdef MAC 9918e38aeffSJohn Baldwin mac_posixshm_destroy(shmfd); 9928e38aeffSJohn Baldwin #endif 993940cb0e2SKonstantin Belousov rangelock_destroy(&shmfd->shm_rl); 994940cb0e2SKonstantin Belousov mtx_destroy(&shmfd->shm_mtx); 9950919f29dSKonstantin Belousov obj = shmfd->shm_object; 9960919f29dSKonstantin Belousov if (!shm_largepage(shmfd)) { 9970919f29dSKonstantin Belousov VM_OBJECT_WLOCK(obj); 9980919f29dSKonstantin Belousov obj->un_pager.swp.swp_priv = NULL; 9990919f29dSKonstantin Belousov VM_OBJECT_WUNLOCK(obj); 10000919f29dSKonstantin Belousov } 10010919f29dSKonstantin Belousov vm_object_deallocate(obj); 10028e38aeffSJohn Baldwin free(shmfd, M_SHMFD); 10038e38aeffSJohn Baldwin } 10048e38aeffSJohn Baldwin } 10058e38aeffSJohn Baldwin 10068e38aeffSJohn Baldwin /* 10078e38aeffSJohn Baldwin * Determine if the credentials have sufficient permissions for a 10088e38aeffSJohn Baldwin * specified combination of FREAD and FWRITE. 10098e38aeffSJohn Baldwin */ 10101bdbd705SKonstantin Belousov int 10118e38aeffSJohn Baldwin shm_access(struct shmfd *shmfd, struct ucred *ucred, int flags) 10128e38aeffSJohn Baldwin { 101315bc6b2bSEdward Tomasz Napierala accmode_t accmode; 10149c00bb91SKonstantin Belousov int error; 10158e38aeffSJohn Baldwin 101615bc6b2bSEdward Tomasz Napierala accmode = 0; 10178e38aeffSJohn Baldwin if (flags & FREAD) 101815bc6b2bSEdward Tomasz Napierala accmode |= VREAD; 10198e38aeffSJohn Baldwin if (flags & FWRITE) 102015bc6b2bSEdward Tomasz Napierala accmode |= VWRITE; 10219c00bb91SKonstantin Belousov mtx_lock(&shm_timestamp_lock); 10229c00bb91SKonstantin Belousov error = vaccess(VREG, shmfd->shm_mode, shmfd->shm_uid, shmfd->shm_gid, 1023d292b194SMateusz Guzik accmode, ucred); 10249c00bb91SKonstantin Belousov mtx_unlock(&shm_timestamp_lock); 10259c00bb91SKonstantin Belousov return (error); 10268e38aeffSJohn Baldwin } 10278e38aeffSJohn Baldwin 10288e38aeffSJohn Baldwin static void 1029610a2b3cSJohn Baldwin shm_init(void *arg) 10308e38aeffSJohn Baldwin { 1031d301b358SKonstantin Belousov char name[32]; 1032d301b358SKonstantin Belousov int i; 10338e38aeffSJohn Baldwin 10348e38aeffSJohn Baldwin mtx_init(&shm_timestamp_lock, "shm timestamps", NULL, MTX_DEF); 10358e38aeffSJohn Baldwin sx_init(&shm_dict_lock, "shm dictionary"); 10368e38aeffSJohn Baldwin shm_dictionary = hashinit(1024, M_SHMFD, &shm_hash); 10377883ce1fSMateusz Guzik new_unrhdr64(&shm_ino_unr, 1); 1038610a2b3cSJohn Baldwin shm_dev_ino = devfs_alloc_cdp_inode(); 1039610a2b3cSJohn Baldwin KASSERT(shm_dev_ino > 0, ("shm dev inode not initialized")); 10400919f29dSKonstantin Belousov shmfd_pager_type = vm_pager_alloc_dyn_type(&shm_swap_pager_ops, 10410919f29dSKonstantin Belousov OBJT_SWAP); 10420919f29dSKonstantin Belousov MPASS(shmfd_pager_type != -1); 1043d301b358SKonstantin Belousov 1044d301b358SKonstantin Belousov for (i = 1; i < MAXPAGESIZES; i++) { 1045d301b358SKonstantin Belousov if (pagesizes[i] == 0) 1046d301b358SKonstantin Belousov break; 1047d301b358SKonstantin Belousov #define M (1024 * 1024) 1048d301b358SKonstantin Belousov #define G (1024 * M) 1049d301b358SKonstantin Belousov if (pagesizes[i] >= G) 1050d301b358SKonstantin Belousov snprintf(name, sizeof(name), "%luG", pagesizes[i] / G); 1051d301b358SKonstantin Belousov else if (pagesizes[i] >= M) 1052d301b358SKonstantin Belousov snprintf(name, sizeof(name), "%luM", pagesizes[i] / M); 1053d301b358SKonstantin Belousov else 1054d301b358SKonstantin Belousov snprintf(name, sizeof(name), "%lu", pagesizes[i]); 1055d301b358SKonstantin Belousov #undef G 1056d301b358SKonstantin Belousov #undef M 1057d301b358SKonstantin Belousov SYSCTL_ADD_ULONG(NULL, SYSCTL_STATIC_CHILDREN(_vm_largepages), 1058d301b358SKonstantin Belousov OID_AUTO, name, CTLFLAG_RD, &count_largepages[i], 1059d301b358SKonstantin Belousov "number of non-transient largepages allocated"); 1060d301b358SKonstantin Belousov } 10618e38aeffSJohn Baldwin } 1062610a2b3cSJohn Baldwin SYSINIT(shm_init, SI_SUB_SYSV_SHM, SI_ORDER_ANY, shm_init, NULL); 10638e38aeffSJohn Baldwin 106425f44824SKonstantin Belousov /* 10657060da62SJamie Gritton * Remove all shared memory objects that belong to a prison. 10667060da62SJamie Gritton */ 10677060da62SJamie Gritton void 10687060da62SJamie Gritton shm_remove_prison(struct prison *pr) 10697060da62SJamie Gritton { 10707060da62SJamie Gritton struct shm_mapping *shmm, *tshmm; 10717060da62SJamie Gritton u_long i; 10727060da62SJamie Gritton 10737060da62SJamie Gritton sx_xlock(&shm_dict_lock); 10747060da62SJamie Gritton for (i = 0; i < shm_hash + 1; i++) { 10757060da62SJamie Gritton LIST_FOREACH_SAFE(shmm, &shm_dictionary[i], sm_link, tshmm) { 10767060da62SJamie Gritton if (shmm->sm_shmfd->shm_object->cred && 10777060da62SJamie Gritton shmm->sm_shmfd->shm_object->cred->cr_prison == pr) 10787060da62SJamie Gritton shm_doremove(shmm); 10797060da62SJamie Gritton } 10807060da62SJamie Gritton } 10817060da62SJamie Gritton sx_xunlock(&shm_dict_lock); 10827060da62SJamie Gritton } 10837060da62SJamie Gritton 10847060da62SJamie Gritton /* 108525f44824SKonstantin Belousov * Dictionary management. We maintain an in-kernel dictionary to map 108625f44824SKonstantin Belousov * paths to shmfd objects. We use the FNV hash on the path to store 108725f44824SKonstantin Belousov * the mappings in a hash table. 108825f44824SKonstantin Belousov */ 10898e38aeffSJohn Baldwin static struct shmfd * 10908e38aeffSJohn Baldwin shm_lookup(char *path, Fnv32_t fnv) 10918e38aeffSJohn Baldwin { 10928e38aeffSJohn Baldwin struct shm_mapping *map; 10938e38aeffSJohn Baldwin 10948e38aeffSJohn Baldwin LIST_FOREACH(map, SHM_HASH(fnv), sm_link) { 10958e38aeffSJohn Baldwin if (map->sm_fnv != fnv) 10968e38aeffSJohn Baldwin continue; 10978e38aeffSJohn Baldwin if (strcmp(map->sm_path, path) == 0) 10988e38aeffSJohn Baldwin return (map->sm_shmfd); 10998e38aeffSJohn Baldwin } 11008e38aeffSJohn Baldwin 11018e38aeffSJohn Baldwin return (NULL); 11028e38aeffSJohn Baldwin } 11038e38aeffSJohn Baldwin 11048e38aeffSJohn Baldwin static void 11058e38aeffSJohn Baldwin shm_insert(char *path, Fnv32_t fnv, struct shmfd *shmfd) 11068e38aeffSJohn Baldwin { 11078e38aeffSJohn Baldwin struct shm_mapping *map; 11088e38aeffSJohn Baldwin 11098e38aeffSJohn Baldwin map = malloc(sizeof(struct shm_mapping), M_SHMFD, M_WAITOK); 11108e38aeffSJohn Baldwin map->sm_path = path; 11118e38aeffSJohn Baldwin map->sm_fnv = fnv; 11128e38aeffSJohn Baldwin map->sm_shmfd = shm_hold(shmfd); 1113e506e182SJohn Baldwin shmfd->shm_path = path; 11148e38aeffSJohn Baldwin LIST_INSERT_HEAD(SHM_HASH(fnv), map, sm_link); 11158e38aeffSJohn Baldwin } 11168e38aeffSJohn Baldwin 11178e38aeffSJohn Baldwin static int 11188e38aeffSJohn Baldwin shm_remove(char *path, Fnv32_t fnv, struct ucred *ucred) 11198e38aeffSJohn Baldwin { 11208e38aeffSJohn Baldwin struct shm_mapping *map; 11218e38aeffSJohn Baldwin int error; 11228e38aeffSJohn Baldwin 11238e38aeffSJohn Baldwin LIST_FOREACH(map, SHM_HASH(fnv), sm_link) { 11248e38aeffSJohn Baldwin if (map->sm_fnv != fnv) 11258e38aeffSJohn Baldwin continue; 11268e38aeffSJohn Baldwin if (strcmp(map->sm_path, path) == 0) { 11278e38aeffSJohn Baldwin #ifdef MAC 11288e38aeffSJohn Baldwin error = mac_posixshm_check_unlink(ucred, map->sm_shmfd); 11298e38aeffSJohn Baldwin if (error) 11308e38aeffSJohn Baldwin return (error); 11318e38aeffSJohn Baldwin #endif 11328e38aeffSJohn Baldwin error = shm_access(map->sm_shmfd, ucred, 11338e38aeffSJohn Baldwin FREAD | FWRITE); 11348e38aeffSJohn Baldwin if (error) 11358e38aeffSJohn Baldwin return (error); 11367060da62SJamie Gritton shm_doremove(map); 11378e38aeffSJohn Baldwin return (0); 11388e38aeffSJohn Baldwin } 11398e38aeffSJohn Baldwin } 11408e38aeffSJohn Baldwin 11418e38aeffSJohn Baldwin return (ENOENT); 11428e38aeffSJohn Baldwin } 11438e38aeffSJohn Baldwin 11447060da62SJamie Gritton static void 11457060da62SJamie Gritton shm_doremove(struct shm_mapping *map) 11467060da62SJamie Gritton { 11477060da62SJamie Gritton map->sm_shmfd->shm_path = NULL; 11487060da62SJamie Gritton LIST_REMOVE(map, sm_link); 11497060da62SJamie Gritton shm_drop(map->sm_shmfd); 11507060da62SJamie Gritton free(map->sm_path, M_SHMFD); 11517060da62SJamie Gritton free(map, M_SHMFD); 11527060da62SJamie Gritton } 11537060da62SJamie Gritton 11548e38aeffSJohn Baldwin int 1155535b1df9SKyle Evans kern_shm_open2(struct thread *td, const char *userpath, int flags, mode_t mode, 1156535b1df9SKyle Evans int shmflags, struct filecaps *fcaps, const char *name __unused) 11578e38aeffSJohn Baldwin { 115885078b85SConrad Meyer struct pwddesc *pdp; 11598e38aeffSJohn Baldwin struct shmfd *shmfd; 11608e38aeffSJohn Baldwin struct file *fp; 11618e38aeffSJohn Baldwin char *path; 11620cd95859SKyle Evans void *rl_cookie; 11638e38aeffSJohn Baldwin Fnv32_t fnv; 11648e38aeffSJohn Baldwin mode_t cmode; 1165535b1df9SKyle Evans int error, fd, initial_seals; 1166d301b358SKonstantin Belousov bool largepage; 1167535b1df9SKyle Evans 1168d301b358SKonstantin Belousov if ((shmflags & ~(SHM_ALLOW_SEALING | SHM_GROW_ON_WRITE | 1169d301b358SKonstantin Belousov SHM_LARGEPAGE)) != 0) 1170535b1df9SKyle Evans return (EINVAL); 1171535b1df9SKyle Evans 1172535b1df9SKyle Evans initial_seals = F_SEAL_SEAL; 1173535b1df9SKyle Evans if ((shmflags & SHM_ALLOW_SEALING) != 0) 1174535b1df9SKyle Evans initial_seals &= ~F_SEAL_SEAL; 11758e38aeffSJohn Baldwin 117615bcf785SRobert Watson AUDIT_ARG_FFLAGS(flags); 117715bcf785SRobert Watson AUDIT_ARG_MODE(mode); 117815bcf785SRobert Watson 11797ee1b208SEd Schouten if ((flags & O_ACCMODE) != O_RDONLY && (flags & O_ACCMODE) != O_RDWR) 11808e38aeffSJohn Baldwin return (EINVAL); 11818e38aeffSJohn Baldwin 11827ee1b208SEd Schouten if ((flags & ~(O_ACCMODE | O_CREAT | O_EXCL | O_TRUNC | O_CLOEXEC)) != 0) 11838e38aeffSJohn Baldwin return (EINVAL); 11848e38aeffSJohn Baldwin 1185d301b358SKonstantin Belousov largepage = (shmflags & SHM_LARGEPAGE) != 0; 118678257765SMark Johnston if (largepage && !PMAP_HAS_LARGEPAGES) 1187d301b358SKonstantin Belousov return (ENOTTY); 1188d301b358SKonstantin Belousov 11890cd95859SKyle Evans /* 11900cd95859SKyle Evans * Currently only F_SEAL_SEAL may be set when creating or opening shmfd. 11910cd95859SKyle Evans * If the decision is made later to allow additional seals, care must be 11920cd95859SKyle Evans * taken below to ensure that the seals are properly set if the shmfd 11930cd95859SKyle Evans * already existed -- this currently assumes that only F_SEAL_SEAL can 11940cd95859SKyle Evans * be set and doesn't take further precautions to ensure the validity of 11950cd95859SKyle Evans * the seals being added with respect to current mappings. 11960cd95859SKyle Evans */ 11970cd95859SKyle Evans if ((initial_seals & ~F_SEAL_SEAL) != 0) 11980cd95859SKyle Evans return (EINVAL); 11990cd95859SKyle Evans 1200b112232eSJake Freeland if (userpath != SHM_ANON) { 1201b112232eSJake Freeland error = shm_copyin_path(td, userpath, &path); 1202b112232eSJake Freeland if (error != 0) 1203b112232eSJake Freeland return (error); 1204b112232eSJake Freeland 1205b112232eSJake Freeland #ifdef CAPABILITY_MODE 1206b112232eSJake Freeland /* 1207b112232eSJake Freeland * shm_open(2) is only allowed for anonymous objects. 1208b112232eSJake Freeland */ 1209b112232eSJake Freeland if (CAP_TRACING(td)) 1210b112232eSJake Freeland ktrcapfail(CAPFAIL_NAMEI, path); 1211b112232eSJake Freeland if (IN_CAPABILITY_MODE(td)) { 1212b112232eSJake Freeland free(path, M_SHMFD); 1213b112232eSJake Freeland return (ECAPMODE); 1214b112232eSJake Freeland } 1215b112232eSJake Freeland #endif 1216b112232eSJake Freeland 1217b112232eSJake Freeland AUDIT_ARG_UPATH1_CANON(path); 1218e411b227SMark Johnston } else { 1219e411b227SMark Johnston path = NULL; 1220b112232eSJake Freeland } 1221b112232eSJake Freeland 122285078b85SConrad Meyer pdp = td->td_proc->p_pd; 122385078b85SConrad Meyer cmode = (mode & ~pdp->pd_cmask) & ACCESSPERMS; 12248e38aeffSJohn Baldwin 1225b5a7ac99SKyle Evans /* 1226b5a7ac99SKyle Evans * shm_open(2) created shm should always have O_CLOEXEC set, as mandated 1227b5a7ac99SKyle Evans * by POSIX. We allow it to be unset here so that an in-kernel 1228b5a7ac99SKyle Evans * interface may be written as a thin layer around shm, optionally not 1229b5a7ac99SKyle Evans * setting CLOEXEC. For shm_open(2), O_CLOEXEC is set unconditionally 1230b5a7ac99SKyle Evans * in sys_shm_open() to keep this implementation compliant. 1231b5a7ac99SKyle Evans */ 1232b5a7ac99SKyle Evans error = falloc_caps(td, &fp, &fd, flags & O_CLOEXEC, fcaps); 1233b112232eSJake Freeland if (error) { 1234b112232eSJake Freeland free(path, M_SHMFD); 12358e38aeffSJohn Baldwin return (error); 1236b112232eSJake Freeland } 12378e38aeffSJohn Baldwin 12388e38aeffSJohn Baldwin /* A SHM_ANON path pointer creates an anonymous object. */ 12397ee1b208SEd Schouten if (userpath == SHM_ANON) { 12408e38aeffSJohn Baldwin /* A read-only anonymous object is pointless. */ 12417ee1b208SEd Schouten if ((flags & O_ACCMODE) == O_RDONLY) { 124290f54cbfSMateusz Guzik fdclose(td, fp, fd); 12438e38aeffSJohn Baldwin fdrop(fp, td); 12448e38aeffSJohn Baldwin return (EINVAL); 12458e38aeffSJohn Baldwin } 1246d301b358SKonstantin Belousov shmfd = shm_alloc(td->td_ucred, cmode, largepage); 12470cd95859SKyle Evans shmfd->shm_seals = initial_seals; 12485dd47b52SKyle Evans shmfd->shm_flags = shmflags; 12498e38aeffSJohn Baldwin } else { 12508e38aeffSJohn Baldwin fnv = fnv_32_str(path, FNV1_32_INIT); 12518e38aeffSJohn Baldwin sx_xlock(&shm_dict_lock); 12528e38aeffSJohn Baldwin shmfd = shm_lookup(path, fnv); 12538e38aeffSJohn Baldwin if (shmfd == NULL) { 12548e38aeffSJohn Baldwin /* Object does not yet exist, create it if requested. */ 12557ee1b208SEd Schouten if (flags & O_CREAT) { 12569b6dd12eSRobert Watson #ifdef MAC 12579b6dd12eSRobert Watson error = mac_posixshm_check_create(td->td_ucred, 12589b6dd12eSRobert Watson path); 12599b6dd12eSRobert Watson if (error == 0) { 12609b6dd12eSRobert Watson #endif 1261d301b358SKonstantin Belousov shmfd = shm_alloc(td->td_ucred, cmode, 1262d301b358SKonstantin Belousov largepage); 12630cd95859SKyle Evans shmfd->shm_seals = initial_seals; 12645dd47b52SKyle Evans shmfd->shm_flags = shmflags; 12658e38aeffSJohn Baldwin shm_insert(path, fnv, shmfd); 12669b6dd12eSRobert Watson #ifdef MAC 12679b6dd12eSRobert Watson } 12689b6dd12eSRobert Watson #endif 12698e38aeffSJohn Baldwin } else { 12708e38aeffSJohn Baldwin free(path, M_SHMFD); 12718e38aeffSJohn Baldwin error = ENOENT; 12728e38aeffSJohn Baldwin } 12738e38aeffSJohn Baldwin } else { 12746df6facfSKonstantin Belousov rl_cookie = shm_rangelock_wlock(shmfd, 0, OFF_MAX); 12750cd95859SKyle Evans 12760cd95859SKyle Evans /* 12770cd95859SKyle Evans * kern_shm_open() likely shouldn't ever error out on 12780cd95859SKyle Evans * trying to set a seal that already exists, unlike 12790cd95859SKyle Evans * F_ADD_SEALS. This would break terribly as 12800cd95859SKyle Evans * shm_open(2) actually sets F_SEAL_SEAL to maintain 12810cd95859SKyle Evans * historical behavior where the underlying file could 12820cd95859SKyle Evans * not be sealed. 12830cd95859SKyle Evans */ 12840cd95859SKyle Evans initial_seals &= ~shmfd->shm_seals; 12850cd95859SKyle Evans 12868e38aeffSJohn Baldwin /* 12878e38aeffSJohn Baldwin * Object already exists, obtain a new 12888e38aeffSJohn Baldwin * reference if requested and permitted. 12898e38aeffSJohn Baldwin */ 12908e38aeffSJohn Baldwin free(path, M_SHMFD); 12910cd95859SKyle Evans 12920cd95859SKyle Evans /* 12930cd95859SKyle Evans * initial_seals can't set additional seals if we've 12940cd95859SKyle Evans * already been set F_SEAL_SEAL. If F_SEAL_SEAL is set, 12950cd95859SKyle Evans * then we've already removed that one from 12960cd95859SKyle Evans * initial_seals. This is currently redundant as we 12970cd95859SKyle Evans * only allow setting F_SEAL_SEAL at creation time, but 12980cd95859SKyle Evans * it's cheap to check and decreases the effort required 12990cd95859SKyle Evans * to allow additional seals. 13000cd95859SKyle Evans */ 13010cd95859SKyle Evans if ((shmfd->shm_seals & F_SEAL_SEAL) != 0 && 13020cd95859SKyle Evans initial_seals != 0) 13030cd95859SKyle Evans error = EPERM; 13040cd95859SKyle Evans else if ((flags & (O_CREAT | O_EXCL)) == 13050cd95859SKyle Evans (O_CREAT | O_EXCL)) 13068e38aeffSJohn Baldwin error = EEXIST; 13075dd47b52SKyle Evans else if (shmflags != 0 && shmflags != shmfd->shm_flags) 13085dd47b52SKyle Evans error = EINVAL; 13098e38aeffSJohn Baldwin else { 13108e38aeffSJohn Baldwin #ifdef MAC 13118e38aeffSJohn Baldwin error = mac_posixshm_check_open(td->td_ucred, 13127ee1b208SEd Schouten shmfd, FFLAGS(flags & O_ACCMODE)); 13138e38aeffSJohn Baldwin if (error == 0) 13148e38aeffSJohn Baldwin #endif 13158e38aeffSJohn Baldwin error = shm_access(shmfd, td->td_ucred, 13167ee1b208SEd Schouten FFLAGS(flags & O_ACCMODE)); 13178e38aeffSJohn Baldwin } 13188e38aeffSJohn Baldwin 13198e38aeffSJohn Baldwin /* 13208e38aeffSJohn Baldwin * Truncate the file back to zero length if 13218e38aeffSJohn Baldwin * O_TRUNC was specified and the object was 13228e38aeffSJohn Baldwin * opened with read/write. 13238e38aeffSJohn Baldwin */ 13248e38aeffSJohn Baldwin if (error == 0 && 13257ee1b208SEd Schouten (flags & (O_ACCMODE | O_TRUNC)) == 13268e38aeffSJohn Baldwin (O_RDWR | O_TRUNC)) { 13270cd95859SKyle Evans VM_OBJECT_WLOCK(shmfd->shm_object); 13288e38aeffSJohn Baldwin #ifdef MAC 13298e38aeffSJohn Baldwin error = mac_posixshm_check_truncate( 13308e38aeffSJohn Baldwin td->td_ucred, fp->f_cred, shmfd); 13318e38aeffSJohn Baldwin if (error == 0) 13328e38aeffSJohn Baldwin #endif 13330cd95859SKyle Evans error = shm_dotruncate_locked(shmfd, 0, 13340cd95859SKyle Evans rl_cookie); 13350cd95859SKyle Evans VM_OBJECT_WUNLOCK(shmfd->shm_object); 13368e38aeffSJohn Baldwin } 13370cd95859SKyle Evans if (error == 0) { 13380cd95859SKyle Evans /* 13390cd95859SKyle Evans * Currently we only allow F_SEAL_SEAL to be 13400cd95859SKyle Evans * set initially. As noted above, this would 13410cd95859SKyle Evans * need to be reworked should that change. 13420cd95859SKyle Evans */ 13430cd95859SKyle Evans shmfd->shm_seals |= initial_seals; 13448e38aeffSJohn Baldwin shm_hold(shmfd); 13458e38aeffSJohn Baldwin } 13466df6facfSKonstantin Belousov shm_rangelock_unlock(shmfd, rl_cookie); 13470cd95859SKyle Evans } 13488e38aeffSJohn Baldwin sx_xunlock(&shm_dict_lock); 13498e38aeffSJohn Baldwin 13508e38aeffSJohn Baldwin if (error) { 135190f54cbfSMateusz Guzik fdclose(td, fp, fd); 13528e38aeffSJohn Baldwin fdrop(fp, td); 13538e38aeffSJohn Baldwin return (error); 13548e38aeffSJohn Baldwin } 13558e38aeffSJohn Baldwin } 13568e38aeffSJohn Baldwin 13577ee1b208SEd Schouten finit(fp, FFLAGS(flags & O_ACCMODE), DTYPE_SHM, shmfd, &shm_ops); 13588e38aeffSJohn Baldwin 13598e38aeffSJohn Baldwin td->td_retval[0] = fd; 13608e38aeffSJohn Baldwin fdrop(fp, td); 13618e38aeffSJohn Baldwin 13628e38aeffSJohn Baldwin return (0); 13638e38aeffSJohn Baldwin } 13648e38aeffSJohn Baldwin 13657ee1b208SEd Schouten /* System calls. */ 1366a9ac5e14SKyle Evans #ifdef COMPAT_FREEBSD12 13677ee1b208SEd Schouten int 1368a9ac5e14SKyle Evans freebsd12_shm_open(struct thread *td, struct freebsd12_shm_open_args *uap) 13697ee1b208SEd Schouten { 13707ee1b208SEd Schouten 1371535b1df9SKyle Evans return (kern_shm_open(td, uap->path, uap->flags | O_CLOEXEC, 1372535b1df9SKyle Evans uap->mode, NULL)); 13737ee1b208SEd Schouten } 1374a9ac5e14SKyle Evans #endif 13757ee1b208SEd Schouten 13768e38aeffSJohn Baldwin int 13778451d0ddSKip Macy sys_shm_unlink(struct thread *td, struct shm_unlink_args *uap) 13788e38aeffSJohn Baldwin { 13798e38aeffSJohn Baldwin char *path; 13808e38aeffSJohn Baldwin Fnv32_t fnv; 13818e38aeffSJohn Baldwin int error; 13828e38aeffSJohn Baldwin 13832d5603feSDavid Bright error = shm_copyin_path(td, uap->path, &path); 13842d5603feSDavid Bright if (error != 0) 13858e38aeffSJohn Baldwin return (error); 13862d5603feSDavid Bright 138715bcf785SRobert Watson AUDIT_ARG_UPATH1_CANON(path); 13888e38aeffSJohn Baldwin fnv = fnv_32_str(path, FNV1_32_INIT); 13898e38aeffSJohn Baldwin sx_xlock(&shm_dict_lock); 13908e38aeffSJohn Baldwin error = shm_remove(path, fnv, td->td_ucred); 13918e38aeffSJohn Baldwin sx_xunlock(&shm_dict_lock); 13924cf919edSMark Johnston free(path, M_SHMFD); 13938e38aeffSJohn Baldwin 13948e38aeffSJohn Baldwin return (error); 13958e38aeffSJohn Baldwin } 13968e38aeffSJohn Baldwin 13978e38aeffSJohn Baldwin int 13989afb12baSDavid Bright sys_shm_rename(struct thread *td, struct shm_rename_args *uap) 13999afb12baSDavid Bright { 14009afb12baSDavid Bright char *path_from = NULL, *path_to = NULL; 14019afb12baSDavid Bright Fnv32_t fnv_from, fnv_to; 14029afb12baSDavid Bright struct shmfd *fd_from; 14039afb12baSDavid Bright struct shmfd *fd_to; 14049afb12baSDavid Bright int error; 14059afb12baSDavid Bright int flags; 14069afb12baSDavid Bright 14079afb12baSDavid Bright flags = uap->flags; 14082d5603feSDavid Bright AUDIT_ARG_FFLAGS(flags); 14099afb12baSDavid Bright 14109afb12baSDavid Bright /* 14119afb12baSDavid Bright * Make sure the user passed only valid flags. 14129afb12baSDavid Bright * If you add a new flag, please add a new term here. 14139afb12baSDavid Bright */ 14149afb12baSDavid Bright if ((flags & ~( 14159afb12baSDavid Bright SHM_RENAME_NOREPLACE | 14169afb12baSDavid Bright SHM_RENAME_EXCHANGE 14179afb12baSDavid Bright )) != 0) { 14189afb12baSDavid Bright error = EINVAL; 14199afb12baSDavid Bright goto out; 14209afb12baSDavid Bright } 14219afb12baSDavid Bright 14229afb12baSDavid Bright /* 14239afb12baSDavid Bright * EXCHANGE and NOREPLACE don't quite make sense together. Let's 14249afb12baSDavid Bright * force the user to choose one or the other. 14259afb12baSDavid Bright */ 14269afb12baSDavid Bright if ((flags & SHM_RENAME_NOREPLACE) != 0 && 14279afb12baSDavid Bright (flags & SHM_RENAME_EXCHANGE) != 0) { 14289afb12baSDavid Bright error = EINVAL; 14299afb12baSDavid Bright goto out; 14309afb12baSDavid Bright } 14319afb12baSDavid Bright 14322d5603feSDavid Bright /* Renaming to or from anonymous makes no sense */ 14332d5603feSDavid Bright if (uap->path_from == SHM_ANON || uap->path_to == SHM_ANON) { 14342d5603feSDavid Bright error = EINVAL; 14352d5603feSDavid Bright goto out; 14362d5603feSDavid Bright } 14372d5603feSDavid Bright 14382d5603feSDavid Bright error = shm_copyin_path(td, uap->path_from, &path_from); 14392d5603feSDavid Bright if (error != 0) 14409afb12baSDavid Bright goto out; 14419afb12baSDavid Bright 14422d5603feSDavid Bright error = shm_copyin_path(td, uap->path_to, &path_to); 14432d5603feSDavid Bright if (error != 0) 14449afb12baSDavid Bright goto out; 14459afb12baSDavid Bright 14462d5603feSDavid Bright AUDIT_ARG_UPATH1_CANON(path_from); 14472d5603feSDavid Bright AUDIT_ARG_UPATH2_CANON(path_to); 14482d5603feSDavid Bright 14499afb12baSDavid Bright /* Rename with from/to equal is a no-op */ 14502d5603feSDavid Bright if (strcmp(path_from, path_to) == 0) 14519afb12baSDavid Bright goto out; 14529afb12baSDavid Bright 14539afb12baSDavid Bright fnv_from = fnv_32_str(path_from, FNV1_32_INIT); 14549afb12baSDavid Bright fnv_to = fnv_32_str(path_to, FNV1_32_INIT); 14559afb12baSDavid Bright 14569afb12baSDavid Bright sx_xlock(&shm_dict_lock); 14579afb12baSDavid Bright 14589afb12baSDavid Bright fd_from = shm_lookup(path_from, fnv_from); 14599afb12baSDavid Bright if (fd_from == NULL) { 14609afb12baSDavid Bright error = ENOENT; 14612d5603feSDavid Bright goto out_locked; 14629afb12baSDavid Bright } 14639afb12baSDavid Bright 14649afb12baSDavid Bright fd_to = shm_lookup(path_to, fnv_to); 14659afb12baSDavid Bright if ((flags & SHM_RENAME_NOREPLACE) != 0 && fd_to != NULL) { 14669afb12baSDavid Bright error = EEXIST; 14672d5603feSDavid Bright goto out_locked; 14689afb12baSDavid Bright } 14699afb12baSDavid Bright 14709afb12baSDavid Bright /* 14719afb12baSDavid Bright * Unconditionally prevents shm_remove from invalidating the 'from' 14729afb12baSDavid Bright * shm's state. 14739afb12baSDavid Bright */ 14749afb12baSDavid Bright shm_hold(fd_from); 14759afb12baSDavid Bright error = shm_remove(path_from, fnv_from, td->td_ucred); 14769afb12baSDavid Bright 14779afb12baSDavid Bright /* 14789afb12baSDavid Bright * One of my assumptions failed if ENOENT (e.g. locking didn't 14799afb12baSDavid Bright * protect us) 14809afb12baSDavid Bright */ 14819afb12baSDavid Bright KASSERT(error != ENOENT, ("Our shm disappeared during shm_rename: %s", 14829afb12baSDavid Bright path_from)); 14832d5603feSDavid Bright if (error != 0) { 14849afb12baSDavid Bright shm_drop(fd_from); 14852d5603feSDavid Bright goto out_locked; 14869afb12baSDavid Bright } 14879afb12baSDavid Bright 14889afb12baSDavid Bright /* 14899afb12baSDavid Bright * If we are exchanging, we need to ensure the shm_remove below 14909afb12baSDavid Bright * doesn't invalidate the dest shm's state. 14919afb12baSDavid Bright */ 14929afb12baSDavid Bright if ((flags & SHM_RENAME_EXCHANGE) != 0 && fd_to != NULL) 14939afb12baSDavid Bright shm_hold(fd_to); 14949afb12baSDavid Bright 14959afb12baSDavid Bright /* 14969afb12baSDavid Bright * NOTE: if path_to is not already in the hash, c'est la vie; 14979afb12baSDavid Bright * it simply means we have nothing already at path_to to unlink. 14989afb12baSDavid Bright * That is the ENOENT case. 14999afb12baSDavid Bright * 15009afb12baSDavid Bright * If we somehow don't have access to unlink this guy, but 15019afb12baSDavid Bright * did for the shm at path_from, then relink the shm to path_from 15029afb12baSDavid Bright * and abort with EACCES. 15039afb12baSDavid Bright * 15049afb12baSDavid Bright * All other errors: that is weird; let's relink and abort the 15059afb12baSDavid Bright * operation. 15069afb12baSDavid Bright */ 15079afb12baSDavid Bright error = shm_remove(path_to, fnv_to, td->td_ucred); 15082d5603feSDavid Bright if (error != 0 && error != ENOENT) { 15099afb12baSDavid Bright shm_insert(path_from, fnv_from, fd_from); 15109afb12baSDavid Bright shm_drop(fd_from); 15119afb12baSDavid Bright /* Don't free path_from now, since the hash references it */ 15129afb12baSDavid Bright path_from = NULL; 15132d5603feSDavid Bright goto out_locked; 15149afb12baSDavid Bright } 15159afb12baSDavid Bright 15162d5603feSDavid Bright error = 0; 15172d5603feSDavid Bright 15189afb12baSDavid Bright shm_insert(path_to, fnv_to, fd_from); 15199afb12baSDavid Bright 15209afb12baSDavid Bright /* Don't free path_to now, since the hash references it */ 15219afb12baSDavid Bright path_to = NULL; 15229afb12baSDavid Bright 15239afb12baSDavid Bright /* We kept a ref when we removed, and incremented again in insert */ 15249afb12baSDavid Bright shm_drop(fd_from); 15259afb12baSDavid Bright KASSERT(fd_from->shm_refs > 0, ("Expected >0 refs; got: %d\n", 15269afb12baSDavid Bright fd_from->shm_refs)); 15279afb12baSDavid Bright 15289afb12baSDavid Bright if ((flags & SHM_RENAME_EXCHANGE) != 0 && fd_to != NULL) { 15299afb12baSDavid Bright shm_insert(path_from, fnv_from, fd_to); 15309afb12baSDavid Bright path_from = NULL; 15319afb12baSDavid Bright shm_drop(fd_to); 15329afb12baSDavid Bright KASSERT(fd_to->shm_refs > 0, ("Expected >0 refs; got: %d\n", 15339afb12baSDavid Bright fd_to->shm_refs)); 15349afb12baSDavid Bright } 15359afb12baSDavid Bright 15362d5603feSDavid Bright out_locked: 15379afb12baSDavid Bright sx_xunlock(&shm_dict_lock); 15389afb12baSDavid Bright 15399afb12baSDavid Bright out: 15409afb12baSDavid Bright free(path_from, M_SHMFD); 15419afb12baSDavid Bright free(path_to, M_SHMFD); 15429afb12baSDavid Bright return (error); 15439afb12baSDavid Bright } 15449afb12baSDavid Bright 1545d301b358SKonstantin Belousov static int 1546d301b358SKonstantin Belousov shm_mmap_large(struct shmfd *shmfd, vm_map_t map, vm_offset_t *addr, 1547d301b358SKonstantin Belousov vm_size_t size, vm_prot_t prot, vm_prot_t max_prot, int flags, 154879783634SKonstantin Belousov vm_ooffset_t foff, struct thread *td) 1549d301b358SKonstantin Belousov { 1550d301b358SKonstantin Belousov struct vmspace *vms; 1551d301b358SKonstantin Belousov vm_map_entry_t next_entry, prev_entry; 1552d301b358SKonstantin Belousov vm_offset_t align, mask, maxaddr; 1553d301b358SKonstantin Belousov int docow, error, rv, try; 1554d301b358SKonstantin Belousov bool curmap; 1555d301b358SKonstantin Belousov 1556d301b358SKonstantin Belousov if (shmfd->shm_lp_psind == 0) 1557d301b358SKonstantin Belousov return (EINVAL); 1558d301b358SKonstantin Belousov 1559d301b358SKonstantin Belousov /* MAP_PRIVATE is disabled */ 1560d301b358SKonstantin Belousov if ((flags & ~(MAP_SHARED | MAP_FIXED | MAP_EXCL | 1561f3e11927SDmitry Chagin MAP_NOCORE | MAP_32BIT | MAP_ALIGNMENT_MASK)) != 0) 1562d301b358SKonstantin Belousov return (EINVAL); 1563d301b358SKonstantin Belousov 1564d301b358SKonstantin Belousov vms = td->td_proc->p_vmspace; 1565d301b358SKonstantin Belousov curmap = map == &vms->vm_map; 1566d301b358SKonstantin Belousov if (curmap) { 1567d301b358SKonstantin Belousov error = kern_mmap_racct_check(td, map, size); 1568d301b358SKonstantin Belousov if (error != 0) 1569d301b358SKonstantin Belousov return (error); 1570d301b358SKonstantin Belousov } 1571d301b358SKonstantin Belousov 1572d301b358SKonstantin Belousov docow = shmfd->shm_lp_psind << MAP_SPLIT_BOUNDARY_SHIFT; 1573d301b358SKonstantin Belousov docow |= MAP_INHERIT_SHARE; 1574d301b358SKonstantin Belousov if ((flags & MAP_NOCORE) != 0) 1575d301b358SKonstantin Belousov docow |= MAP_DISABLE_COREDUMP; 1576d301b358SKonstantin Belousov 1577d301b358SKonstantin Belousov mask = pagesizes[shmfd->shm_lp_psind] - 1; 1578d301b358SKonstantin Belousov if ((foff & mask) != 0) 1579d301b358SKonstantin Belousov return (EINVAL); 1580d301b358SKonstantin Belousov maxaddr = vm_map_max(map); 1581d301b358SKonstantin Belousov if ((flags & MAP_32BIT) != 0 && maxaddr > MAP_32BIT_MAX_ADDR) 1582d301b358SKonstantin Belousov maxaddr = MAP_32BIT_MAX_ADDR; 1583d301b358SKonstantin Belousov if (size == 0 || (size & mask) != 0 || 1584d301b358SKonstantin Belousov (*addr != 0 && ((*addr & mask) != 0 || 1585d301b358SKonstantin Belousov *addr + size < *addr || *addr + size > maxaddr))) 1586d301b358SKonstantin Belousov return (EINVAL); 1587d301b358SKonstantin Belousov 1588d301b358SKonstantin Belousov align = flags & MAP_ALIGNMENT_MASK; 1589d301b358SKonstantin Belousov if (align == 0) { 1590d301b358SKonstantin Belousov align = pagesizes[shmfd->shm_lp_psind]; 1591d301b358SKonstantin Belousov } else if (align == MAP_ALIGNED_SUPER) { 1592*3e00c11aSAlan Cox /* 1593*3e00c11aSAlan Cox * MAP_ALIGNED_SUPER is only supported on superpage sizes, 1594*3e00c11aSAlan Cox * i.e., [1, VM_NRESERVLEVEL]. shmfd->shm_lp_psind < 1 is 1595*3e00c11aSAlan Cox * handled above. 1596*3e00c11aSAlan Cox */ 1597*3e00c11aSAlan Cox if ( 1598*3e00c11aSAlan Cox #if VM_NRESERVLEVEL > 0 1599*3e00c11aSAlan Cox shmfd->shm_lp_psind > VM_NRESERVLEVEL 1600*3e00c11aSAlan Cox #else 1601*3e00c11aSAlan Cox shmfd->shm_lp_psind > 1 1602*3e00c11aSAlan Cox #endif 1603*3e00c11aSAlan Cox ) 1604d301b358SKonstantin Belousov return (EINVAL); 1605*3e00c11aSAlan Cox align = pagesizes[shmfd->shm_lp_psind]; 1606d301b358SKonstantin Belousov } else { 1607d301b358SKonstantin Belousov align >>= MAP_ALIGNMENT_SHIFT; 1608d301b358SKonstantin Belousov align = 1ULL << align; 1609d301b358SKonstantin Belousov /* Also handles overflow. */ 1610d301b358SKonstantin Belousov if (align < pagesizes[shmfd->shm_lp_psind]) 1611d301b358SKonstantin Belousov return (EINVAL); 1612d301b358SKonstantin Belousov } 1613d301b358SKonstantin Belousov 1614d301b358SKonstantin Belousov vm_map_lock(map); 1615d301b358SKonstantin Belousov if ((flags & MAP_FIXED) == 0) { 1616d301b358SKonstantin Belousov try = 1; 1617d301b358SKonstantin Belousov if (curmap && (*addr == 0 || 1618d301b358SKonstantin Belousov (*addr >= round_page((vm_offset_t)vms->vm_taddr) && 1619d301b358SKonstantin Belousov *addr < round_page((vm_offset_t)vms->vm_daddr + 1620d301b358SKonstantin Belousov lim_max(td, RLIMIT_DATA))))) { 1621d301b358SKonstantin Belousov *addr = roundup2((vm_offset_t)vms->vm_daddr + 1622d301b358SKonstantin Belousov lim_max(td, RLIMIT_DATA), 1623d301b358SKonstantin Belousov pagesizes[shmfd->shm_lp_psind]); 1624d301b358SKonstantin Belousov } 1625d301b358SKonstantin Belousov again: 1626d301b358SKonstantin Belousov rv = vm_map_find_aligned(map, addr, size, maxaddr, align); 1627d301b358SKonstantin Belousov if (rv != KERN_SUCCESS) { 1628d301b358SKonstantin Belousov if (try == 1) { 1629d301b358SKonstantin Belousov try = 2; 1630d301b358SKonstantin Belousov *addr = vm_map_min(map); 1631d301b358SKonstantin Belousov if ((*addr & mask) != 0) 1632d301b358SKonstantin Belousov *addr = (*addr + mask) & mask; 1633d301b358SKonstantin Belousov goto again; 1634d301b358SKonstantin Belousov } 1635d301b358SKonstantin Belousov goto fail1; 1636d301b358SKonstantin Belousov } 1637d301b358SKonstantin Belousov } else if ((flags & MAP_EXCL) == 0) { 1638d301b358SKonstantin Belousov rv = vm_map_delete(map, *addr, *addr + size); 1639d301b358SKonstantin Belousov if (rv != KERN_SUCCESS) 1640d301b358SKonstantin Belousov goto fail1; 1641d301b358SKonstantin Belousov } else { 1642d301b358SKonstantin Belousov error = ENOSPC; 1643d301b358SKonstantin Belousov if (vm_map_lookup_entry(map, *addr, &prev_entry)) 1644d301b358SKonstantin Belousov goto fail; 1645d301b358SKonstantin Belousov next_entry = vm_map_entry_succ(prev_entry); 1646d301b358SKonstantin Belousov if (next_entry->start < *addr + size) 1647d301b358SKonstantin Belousov goto fail; 1648d301b358SKonstantin Belousov } 1649d301b358SKonstantin Belousov 1650d301b358SKonstantin Belousov rv = vm_map_insert(map, shmfd->shm_object, foff, *addr, *addr + size, 1651d301b358SKonstantin Belousov prot, max_prot, docow); 1652d301b358SKonstantin Belousov fail1: 1653d301b358SKonstantin Belousov error = vm_mmap_to_errno(rv); 1654d301b358SKonstantin Belousov fail: 1655d301b358SKonstantin Belousov vm_map_unlock(map); 1656d301b358SKonstantin Belousov return (error); 1657d301b358SKonstantin Belousov } 1658d301b358SKonstantin Belousov 1659d301b358SKonstantin Belousov static int 16607077c426SJohn Baldwin shm_mmap(struct file *fp, vm_map_t map, vm_offset_t *addr, vm_size_t objsize, 16617077c426SJohn Baldwin vm_prot_t prot, vm_prot_t cap_maxprot, int flags, 16627077c426SJohn Baldwin vm_ooffset_t foff, struct thread *td) 16638e38aeffSJohn Baldwin { 16647077c426SJohn Baldwin struct shmfd *shmfd; 16657077c426SJohn Baldwin vm_prot_t maxprot; 16667077c426SJohn Baldwin int error; 1667dca52ab4SKyle Evans bool writecnt; 1668af755d3eSKyle Evans void *rl_cookie; 16697077c426SJohn Baldwin 16707077c426SJohn Baldwin shmfd = fp->f_data; 16717077c426SJohn Baldwin maxprot = VM_PROT_NONE; 16727077c426SJohn Baldwin 16736df6facfSKonstantin Belousov rl_cookie = shm_rangelock_rlock(shmfd, 0, objsize); 16747077c426SJohn Baldwin /* FREAD should always be set. */ 16757077c426SJohn Baldwin if ((fp->f_flag & FREAD) != 0) 16767077c426SJohn Baldwin maxprot |= VM_PROT_EXECUTE | VM_PROT_READ; 167758366f05SKyle Evans 167858366f05SKyle Evans /* 167958366f05SKyle Evans * If FWRITE's set, we can allow VM_PROT_WRITE unless it's a shared 1680c7841c6bSMark Johnston * mapping with a write seal applied. Private mappings are always 1681c7841c6bSMark Johnston * writeable. 168258366f05SKyle Evans */ 1683c7841c6bSMark Johnston if ((flags & MAP_SHARED) == 0) { 1684c7841c6bSMark Johnston cap_maxprot |= VM_PROT_WRITE; 16857077c426SJohn Baldwin maxprot |= VM_PROT_WRITE; 1686c7841c6bSMark Johnston writecnt = false; 1687c7841c6bSMark Johnston } else { 1688c7841c6bSMark Johnston if ((fp->f_flag & FWRITE) != 0 && 1689c7841c6bSMark Johnston (shmfd->shm_seals & F_SEAL_WRITE) == 0) 1690c7841c6bSMark Johnston maxprot |= VM_PROT_WRITE; 1691af755d3eSKyle Evans 169251a16c84SKyle Evans /* 169351a16c84SKyle Evans * Any mappings from a writable descriptor may be upgraded to 169451a16c84SKyle Evans * VM_PROT_WRITE with mprotect(2), unless a write-seal was 169551a16c84SKyle Evans * applied between the open and subsequent mmap(2). We want to 169651a16c84SKyle Evans * reject application of a write seal as long as any such 169751a16c84SKyle Evans * mapping exists so that the seal cannot be trivially bypassed. 169851a16c84SKyle Evans */ 169951a16c84SKyle Evans writecnt = (maxprot & VM_PROT_WRITE) != 0; 170051a16c84SKyle Evans if (!writecnt && (prot & VM_PROT_WRITE) != 0) { 1701af755d3eSKyle Evans error = EACCES; 1702af755d3eSKyle Evans goto out; 1703af755d3eSKyle Evans } 1704c7841c6bSMark Johnston } 17057077c426SJohn Baldwin maxprot &= cap_maxprot; 17067077c426SJohn Baldwin 1707987ff181SKonstantin Belousov /* See comment in vn_mmap(). */ 1708987ff181SKonstantin Belousov if ( 1709987ff181SKonstantin Belousov #ifdef _LP64 1710987ff181SKonstantin Belousov objsize > OFF_MAX || 1711987ff181SKonstantin Belousov #endif 1712f9cc8410SEric van Gyzen foff > OFF_MAX - objsize) { 1713af755d3eSKyle Evans error = EINVAL; 1714af755d3eSKyle Evans goto out; 1715af755d3eSKyle Evans } 1716987ff181SKonstantin Belousov 17177077c426SJohn Baldwin #ifdef MAC 17187077c426SJohn Baldwin error = mac_posixshm_check_mmap(td->td_ucred, shmfd, prot, flags); 17197077c426SJohn Baldwin if (error != 0) 1720af755d3eSKyle Evans goto out; 17217077c426SJohn Baldwin #endif 17228e38aeffSJohn Baldwin 17238e38aeffSJohn Baldwin mtx_lock(&shm_timestamp_lock); 17248e38aeffSJohn Baldwin vfs_timestamp(&shmfd->shm_atime); 17258e38aeffSJohn Baldwin mtx_unlock(&shm_timestamp_lock); 17268e38aeffSJohn Baldwin vm_object_reference(shmfd->shm_object); 17277077c426SJohn Baldwin 1728d301b358SKonstantin Belousov if (shm_largepage(shmfd)) { 172979783634SKonstantin Belousov writecnt = false; 1730d301b358SKonstantin Belousov error = shm_mmap_large(shmfd, map, addr, objsize, prot, 173179783634SKonstantin Belousov maxprot, flags, foff, td); 1732d301b358SKonstantin Belousov } else { 173379783634SKonstantin Belousov if (writecnt) { 173479783634SKonstantin Belousov vm_pager_update_writecount(shmfd->shm_object, 0, 173579783634SKonstantin Belousov objsize); 173679783634SKonstantin Belousov } 17377077c426SJohn Baldwin error = vm_mmap_object(map, addr, objsize, prot, maxprot, flags, 1738dca52ab4SKyle Evans shmfd->shm_object, foff, writecnt, td); 1739d301b358SKonstantin Belousov } 1740dca52ab4SKyle Evans if (error != 0) { 1741dca52ab4SKyle Evans if (writecnt) 1742dca52ab4SKyle Evans vm_pager_release_writecount(shmfd->shm_object, 0, 1743dca52ab4SKyle Evans objsize); 17447077c426SJohn Baldwin vm_object_deallocate(shmfd->shm_object); 1745dca52ab4SKyle Evans } 1746af755d3eSKyle Evans out: 17476df6facfSKonstantin Belousov shm_rangelock_unlock(shmfd, rl_cookie); 174834d3e89fSKonstantin Belousov return (error); 17498e38aeffSJohn Baldwin } 17509c00bb91SKonstantin Belousov 17519c00bb91SKonstantin Belousov static int 17529c00bb91SKonstantin Belousov shm_chmod(struct file *fp, mode_t mode, struct ucred *active_cred, 17539c00bb91SKonstantin Belousov struct thread *td) 17549c00bb91SKonstantin Belousov { 17559c00bb91SKonstantin Belousov struct shmfd *shmfd; 17569c00bb91SKonstantin Belousov int error; 17579c00bb91SKonstantin Belousov 17589c00bb91SKonstantin Belousov error = 0; 17599c00bb91SKonstantin Belousov shmfd = fp->f_data; 17609c00bb91SKonstantin Belousov mtx_lock(&shm_timestamp_lock); 17619c00bb91SKonstantin Belousov /* 17629c00bb91SKonstantin Belousov * SUSv4 says that x bits of permission need not be affected. 17639c00bb91SKonstantin Belousov * Be consistent with our shm_open there. 17649c00bb91SKonstantin Belousov */ 17659c00bb91SKonstantin Belousov #ifdef MAC 17669c00bb91SKonstantin Belousov error = mac_posixshm_check_setmode(active_cred, shmfd, mode); 17679c00bb91SKonstantin Belousov if (error != 0) 17689c00bb91SKonstantin Belousov goto out; 17699c00bb91SKonstantin Belousov #endif 1770d292b194SMateusz Guzik error = vaccess(VREG, shmfd->shm_mode, shmfd->shm_uid, shmfd->shm_gid, 1771d292b194SMateusz Guzik VADMIN, active_cred); 17729c00bb91SKonstantin Belousov if (error != 0) 17739c00bb91SKonstantin Belousov goto out; 17749c00bb91SKonstantin Belousov shmfd->shm_mode = mode & ACCESSPERMS; 17759c00bb91SKonstantin Belousov out: 17769c00bb91SKonstantin Belousov mtx_unlock(&shm_timestamp_lock); 17779c00bb91SKonstantin Belousov return (error); 17789c00bb91SKonstantin Belousov } 17799c00bb91SKonstantin Belousov 17809c00bb91SKonstantin Belousov static int 17819c00bb91SKonstantin Belousov shm_chown(struct file *fp, uid_t uid, gid_t gid, struct ucred *active_cred, 17829c00bb91SKonstantin Belousov struct thread *td) 17839c00bb91SKonstantin Belousov { 17849c00bb91SKonstantin Belousov struct shmfd *shmfd; 17859c00bb91SKonstantin Belousov int error; 17869c00bb91SKonstantin Belousov 178768889ed6SKonstantin Belousov error = 0; 17889c00bb91SKonstantin Belousov shmfd = fp->f_data; 17899c00bb91SKonstantin Belousov mtx_lock(&shm_timestamp_lock); 17909c00bb91SKonstantin Belousov #ifdef MAC 17919c00bb91SKonstantin Belousov error = mac_posixshm_check_setowner(active_cred, shmfd, uid, gid); 17929c00bb91SKonstantin Belousov if (error != 0) 17939c00bb91SKonstantin Belousov goto out; 17949c00bb91SKonstantin Belousov #endif 17959c00bb91SKonstantin Belousov if (uid == (uid_t)-1) 17969c00bb91SKonstantin Belousov uid = shmfd->shm_uid; 17979c00bb91SKonstantin Belousov if (gid == (gid_t)-1) 17989c00bb91SKonstantin Belousov gid = shmfd->shm_gid; 17999c00bb91SKonstantin Belousov if (((uid != shmfd->shm_uid && uid != active_cred->cr_uid) || 18009c00bb91SKonstantin Belousov (gid != shmfd->shm_gid && !groupmember(gid, active_cred))) && 1801cc426dd3SMateusz Guzik (error = priv_check_cred(active_cred, PRIV_VFS_CHOWN))) 18029c00bb91SKonstantin Belousov goto out; 18039c00bb91SKonstantin Belousov shmfd->shm_uid = uid; 18049c00bb91SKonstantin Belousov shmfd->shm_gid = gid; 18059c00bb91SKonstantin Belousov out: 18069c00bb91SKonstantin Belousov mtx_unlock(&shm_timestamp_lock); 18079c00bb91SKonstantin Belousov return (error); 18089c00bb91SKonstantin Belousov } 1809fb680e16SJohn Baldwin 1810fb680e16SJohn Baldwin /* 1811fb680e16SJohn Baldwin * Helper routines to allow the backing object of a shared memory file 1812fb680e16SJohn Baldwin * descriptor to be mapped in the kernel. 1813fb680e16SJohn Baldwin */ 1814fb680e16SJohn Baldwin int 1815fb680e16SJohn Baldwin shm_map(struct file *fp, size_t size, off_t offset, void **memp) 1816fb680e16SJohn Baldwin { 1817fb680e16SJohn Baldwin struct shmfd *shmfd; 1818fb680e16SJohn Baldwin vm_offset_t kva, ofs; 1819fb680e16SJohn Baldwin vm_object_t obj; 1820fb680e16SJohn Baldwin int rv; 1821fb680e16SJohn Baldwin 1822fb680e16SJohn Baldwin if (fp->f_type != DTYPE_SHM) 1823fb680e16SJohn Baldwin return (EINVAL); 1824fb680e16SJohn Baldwin shmfd = fp->f_data; 1825fb680e16SJohn Baldwin obj = shmfd->shm_object; 182689f6b863SAttilio Rao VM_OBJECT_WLOCK(obj); 1827fb680e16SJohn Baldwin /* 1828fb680e16SJohn Baldwin * XXXRW: This validation is probably insufficient, and subject to 1829fb680e16SJohn Baldwin * sign errors. It should be fixed. 1830fb680e16SJohn Baldwin */ 1831fb680e16SJohn Baldwin if (offset >= shmfd->shm_size || 1832fb680e16SJohn Baldwin offset + size > round_page(shmfd->shm_size)) { 183389f6b863SAttilio Rao VM_OBJECT_WUNLOCK(obj); 1834fb680e16SJohn Baldwin return (EINVAL); 1835fb680e16SJohn Baldwin } 1836fb680e16SJohn Baldwin 1837fb680e16SJohn Baldwin shmfd->shm_kmappings++; 1838fb680e16SJohn Baldwin vm_object_reference_locked(obj); 183989f6b863SAttilio Rao VM_OBJECT_WUNLOCK(obj); 1840fb680e16SJohn Baldwin 1841fb680e16SJohn Baldwin /* Map the object into the kernel_map and wire it. */ 1842fb680e16SJohn Baldwin kva = vm_map_min(kernel_map); 1843fb680e16SJohn Baldwin ofs = offset & PAGE_MASK; 1844fb680e16SJohn Baldwin offset = trunc_page(offset); 1845fb680e16SJohn Baldwin size = round_page(size + ofs); 1846edb572a3SJohn Baldwin rv = vm_map_find(kernel_map, obj, offset, &kva, size, 0, 18475e3a17c0SJohn Baldwin VMFS_OPTIMAL_SPACE, VM_PROT_READ | VM_PROT_WRITE, 1848fb680e16SJohn Baldwin VM_PROT_READ | VM_PROT_WRITE, 0); 1849fb680e16SJohn Baldwin if (rv == KERN_SUCCESS) { 1850fb680e16SJohn Baldwin rv = vm_map_wire(kernel_map, kva, kva + size, 1851fb680e16SJohn Baldwin VM_MAP_WIRE_SYSTEM | VM_MAP_WIRE_NOHOLES); 1852fb680e16SJohn Baldwin if (rv == KERN_SUCCESS) { 1853fb680e16SJohn Baldwin *memp = (void *)(kva + ofs); 1854fb680e16SJohn Baldwin return (0); 1855fb680e16SJohn Baldwin } 1856fb680e16SJohn Baldwin vm_map_remove(kernel_map, kva, kva + size); 1857fb680e16SJohn Baldwin } else 1858fb680e16SJohn Baldwin vm_object_deallocate(obj); 1859fb680e16SJohn Baldwin 1860fb680e16SJohn Baldwin /* On failure, drop our mapping reference. */ 186189f6b863SAttilio Rao VM_OBJECT_WLOCK(obj); 1862fb680e16SJohn Baldwin shmfd->shm_kmappings--; 186389f6b863SAttilio Rao VM_OBJECT_WUNLOCK(obj); 1864fb680e16SJohn Baldwin 1865338e7cf2SJohn Baldwin return (vm_mmap_to_errno(rv)); 1866fb680e16SJohn Baldwin } 1867fb680e16SJohn Baldwin 1868fb680e16SJohn Baldwin /* 1869fb680e16SJohn Baldwin * We require the caller to unmap the entire entry. This allows us to 1870fb680e16SJohn Baldwin * safely decrement shm_kmappings when a mapping is removed. 1871fb680e16SJohn Baldwin */ 1872fb680e16SJohn Baldwin int 1873fb680e16SJohn Baldwin shm_unmap(struct file *fp, void *mem, size_t size) 1874fb680e16SJohn Baldwin { 1875fb680e16SJohn Baldwin struct shmfd *shmfd; 1876fb680e16SJohn Baldwin vm_map_entry_t entry; 1877fb680e16SJohn Baldwin vm_offset_t kva, ofs; 1878fb680e16SJohn Baldwin vm_object_t obj; 1879fb680e16SJohn Baldwin vm_pindex_t pindex; 1880fb680e16SJohn Baldwin vm_prot_t prot; 1881fb680e16SJohn Baldwin boolean_t wired; 1882fb680e16SJohn Baldwin vm_map_t map; 1883fb680e16SJohn Baldwin int rv; 1884fb680e16SJohn Baldwin 1885fb680e16SJohn Baldwin if (fp->f_type != DTYPE_SHM) 1886fb680e16SJohn Baldwin return (EINVAL); 1887fb680e16SJohn Baldwin shmfd = fp->f_data; 1888fb680e16SJohn Baldwin kva = (vm_offset_t)mem; 1889fb680e16SJohn Baldwin ofs = kva & PAGE_MASK; 1890fb680e16SJohn Baldwin kva = trunc_page(kva); 1891fb680e16SJohn Baldwin size = round_page(size + ofs); 1892fb680e16SJohn Baldwin map = kernel_map; 1893fb680e16SJohn Baldwin rv = vm_map_lookup(&map, kva, VM_PROT_READ | VM_PROT_WRITE, &entry, 1894fb680e16SJohn Baldwin &obj, &pindex, &prot, &wired); 1895fb680e16SJohn Baldwin if (rv != KERN_SUCCESS) 1896fb680e16SJohn Baldwin return (EINVAL); 1897fb680e16SJohn Baldwin if (entry->start != kva || entry->end != kva + size) { 1898fb680e16SJohn Baldwin vm_map_lookup_done(map, entry); 1899fb680e16SJohn Baldwin return (EINVAL); 1900fb680e16SJohn Baldwin } 1901fb680e16SJohn Baldwin vm_map_lookup_done(map, entry); 1902fb680e16SJohn Baldwin if (obj != shmfd->shm_object) 1903fb680e16SJohn Baldwin return (EINVAL); 1904fb680e16SJohn Baldwin vm_map_remove(map, kva, kva + size); 190589f6b863SAttilio Rao VM_OBJECT_WLOCK(obj); 1906fb680e16SJohn Baldwin KASSERT(shmfd->shm_kmappings > 0, ("shm_unmap: object not mapped")); 1907fb680e16SJohn Baldwin shmfd->shm_kmappings--; 190889f6b863SAttilio Rao VM_OBJECT_WUNLOCK(obj); 1909fb680e16SJohn Baldwin return (0); 1910fb680e16SJohn Baldwin } 1911e506e182SJohn Baldwin 19129696feebSJohn Baldwin static int 191356d0e33eSKonstantin Belousov shm_fill_kinfo_locked(struct shmfd *shmfd, struct kinfo_file *kif, bool list) 1914e506e182SJohn Baldwin { 1915cc7b259aSJamie Gritton const char *path, *pr_path; 1916cc7b259aSJamie Gritton size_t pr_pathlen; 191756d0e33eSKonstantin Belousov bool visible; 1918e506e182SJohn Baldwin 191956d0e33eSKonstantin Belousov sx_assert(&shm_dict_lock, SA_LOCKED); 19209696feebSJohn Baldwin kif->kf_type = KF_TYPE_SHM; 192156d0e33eSKonstantin Belousov kif->kf_un.kf_file.kf_file_mode = S_IFREG | shmfd->shm_mode; 19229696feebSJohn Baldwin kif->kf_un.kf_file.kf_file_size = shmfd->shm_size; 19239696feebSJohn Baldwin if (shmfd->shm_path != NULL) { 1924cc7b259aSJamie Gritton path = shmfd->shm_path; 1925cc7b259aSJamie Gritton pr_path = curthread->td_ucred->cr_prison->pr_path; 192644c16975SJamie Gritton if (strcmp(pr_path, "/") != 0) { 192744c16975SJamie Gritton /* Return the jail-rooted pathname. */ 1928cc7b259aSJamie Gritton pr_pathlen = strlen(pr_path); 19297975f57bSRicardo Branco visible = strncmp(path, pr_path, pr_pathlen) == 0 && 19307975f57bSRicardo Branco path[pr_pathlen] == '/'; 193156d0e33eSKonstantin Belousov if (list && !visible) 193256d0e33eSKonstantin Belousov return (EPERM); 193356d0e33eSKonstantin Belousov if (visible) 1934cc7b259aSJamie Gritton path += pr_pathlen; 1935cc7b259aSJamie Gritton } 1936cc7b259aSJamie Gritton strlcpy(kif->kf_path, path, sizeof(kif->kf_path)); 1937cc7b259aSJamie Gritton } 19389696feebSJohn Baldwin return (0); 19399696feebSJohn Baldwin } 194056d0e33eSKonstantin Belousov 194156d0e33eSKonstantin Belousov static int 194256d0e33eSKonstantin Belousov shm_fill_kinfo(struct file *fp, struct kinfo_file *kif, 194356d0e33eSKonstantin Belousov struct filedesc *fdp __unused) 194456d0e33eSKonstantin Belousov { 194556d0e33eSKonstantin Belousov int res; 194656d0e33eSKonstantin Belousov 194756d0e33eSKonstantin Belousov sx_slock(&shm_dict_lock); 194856d0e33eSKonstantin Belousov res = shm_fill_kinfo_locked(fp->f_data, kif, false); 194956d0e33eSKonstantin Belousov sx_sunlock(&shm_dict_lock); 195056d0e33eSKonstantin Belousov return (res); 195156d0e33eSKonstantin Belousov } 195256d0e33eSKonstantin Belousov 195356d0e33eSKonstantin Belousov static int 1954af755d3eSKyle Evans shm_add_seals(struct file *fp, int seals) 1955af755d3eSKyle Evans { 1956af755d3eSKyle Evans struct shmfd *shmfd; 1957af755d3eSKyle Evans void *rl_cookie; 1958af755d3eSKyle Evans vm_ooffset_t writemappings; 1959af755d3eSKyle Evans int error, nseals; 1960af755d3eSKyle Evans 1961af755d3eSKyle Evans error = 0; 1962af755d3eSKyle Evans shmfd = fp->f_data; 19636df6facfSKonstantin Belousov rl_cookie = shm_rangelock_wlock(shmfd, 0, OFF_MAX); 1964af755d3eSKyle Evans 1965af755d3eSKyle Evans /* Even already-set seals should result in EPERM. */ 1966af755d3eSKyle Evans if ((shmfd->shm_seals & F_SEAL_SEAL) != 0) { 1967af755d3eSKyle Evans error = EPERM; 1968af755d3eSKyle Evans goto out; 1969af755d3eSKyle Evans } 1970af755d3eSKyle Evans nseals = seals & ~shmfd->shm_seals; 1971af755d3eSKyle Evans if ((nseals & F_SEAL_WRITE) != 0) { 197279783634SKonstantin Belousov if (shm_largepage(shmfd)) { 197379783634SKonstantin Belousov error = ENOTSUP; 197479783634SKonstantin Belousov goto out; 197579783634SKonstantin Belousov } 197679783634SKonstantin Belousov 1977af755d3eSKyle Evans /* 1978af755d3eSKyle Evans * The rangelock above prevents writable mappings from being 1979af755d3eSKyle Evans * added after we've started applying seals. The RLOCK here 1980af755d3eSKyle Evans * is to avoid torn reads on ILP32 arches as unmapping/reducing 1981af755d3eSKyle Evans * writemappings will be done without a rangelock. 1982af755d3eSKyle Evans */ 1983af755d3eSKyle Evans VM_OBJECT_RLOCK(shmfd->shm_object); 1984af755d3eSKyle Evans writemappings = shmfd->shm_object->un_pager.swp.writemappings; 1985af755d3eSKyle Evans VM_OBJECT_RUNLOCK(shmfd->shm_object); 1986af755d3eSKyle Evans /* kmappings are also writable */ 1987af755d3eSKyle Evans if (writemappings > 0) { 1988af755d3eSKyle Evans error = EBUSY; 1989af755d3eSKyle Evans goto out; 1990af755d3eSKyle Evans } 1991af755d3eSKyle Evans } 1992af755d3eSKyle Evans shmfd->shm_seals |= nseals; 1993af755d3eSKyle Evans out: 19946df6facfSKonstantin Belousov shm_rangelock_unlock(shmfd, rl_cookie); 1995af755d3eSKyle Evans return (error); 1996af755d3eSKyle Evans } 1997af755d3eSKyle Evans 1998af755d3eSKyle Evans static int 1999af755d3eSKyle Evans shm_get_seals(struct file *fp, int *seals) 2000af755d3eSKyle Evans { 2001af755d3eSKyle Evans struct shmfd *shmfd; 2002af755d3eSKyle Evans 2003af755d3eSKyle Evans shmfd = fp->f_data; 2004af755d3eSKyle Evans *seals = shmfd->shm_seals; 2005af755d3eSKyle Evans return (0); 2006af755d3eSKyle Evans } 2007af755d3eSKyle Evans 2008af755d3eSKyle Evans static int 2009454bc887SKa Ho Ng shm_deallocate(struct shmfd *shmfd, off_t *offset, off_t *length, int flags) 2010454bc887SKa Ho Ng { 2011454bc887SKa Ho Ng vm_object_t object; 2012454bc887SKa Ho Ng vm_pindex_t pistart, pi, piend; 2013454bc887SKa Ho Ng vm_ooffset_t off, len; 2014454bc887SKa Ho Ng int startofs, endofs, end; 2015454bc887SKa Ho Ng int error; 2016454bc887SKa Ho Ng 2017454bc887SKa Ho Ng off = *offset; 2018454bc887SKa Ho Ng len = *length; 2019454bc887SKa Ho Ng KASSERT(off + len <= (vm_ooffset_t)OFF_MAX, ("off + len overflows")); 20201eaa3652SKa Ho Ng if (off + len > shmfd->shm_size) 20211eaa3652SKa Ho Ng len = shmfd->shm_size - off; 2022454bc887SKa Ho Ng object = shmfd->shm_object; 2023454bc887SKa Ho Ng startofs = off & PAGE_MASK; 2024454bc887SKa Ho Ng endofs = (off + len) & PAGE_MASK; 2025454bc887SKa Ho Ng pistart = OFF_TO_IDX(off); 2026454bc887SKa Ho Ng piend = OFF_TO_IDX(off + len); 2027454bc887SKa Ho Ng pi = OFF_TO_IDX(off + PAGE_MASK); 2028454bc887SKa Ho Ng error = 0; 2029454bc887SKa Ho Ng 20305c1428d2SKa Ho Ng /* Handle the case when offset is on or beyond shm size. */ 20315c1428d2SKa Ho Ng if ((off_t)len <= 0) { 20321eaa3652SKa Ho Ng *length = 0; 20331eaa3652SKa Ho Ng return (0); 20341eaa3652SKa Ho Ng } 20351eaa3652SKa Ho Ng 2036454bc887SKa Ho Ng VM_OBJECT_WLOCK(object); 2037454bc887SKa Ho Ng 2038454bc887SKa Ho Ng if (startofs != 0) { 2039454bc887SKa Ho Ng end = pistart != piend ? PAGE_SIZE : endofs; 2040454bc887SKa Ho Ng error = shm_partial_page_invalidate(object, pistart, startofs, 2041454bc887SKa Ho Ng end); 2042454bc887SKa Ho Ng if (error) 2043454bc887SKa Ho Ng goto out; 2044454bc887SKa Ho Ng off += end - startofs; 2045454bc887SKa Ho Ng len -= end - startofs; 2046454bc887SKa Ho Ng } 2047454bc887SKa Ho Ng 2048454bc887SKa Ho Ng if (pi < piend) { 2049454bc887SKa Ho Ng vm_object_page_remove(object, pi, piend, 0); 2050454bc887SKa Ho Ng off += IDX_TO_OFF(piend - pi); 2051454bc887SKa Ho Ng len -= IDX_TO_OFF(piend - pi); 2052454bc887SKa Ho Ng } 2053454bc887SKa Ho Ng 2054454bc887SKa Ho Ng if (endofs != 0 && pistart != piend) { 2055454bc887SKa Ho Ng error = shm_partial_page_invalidate(object, piend, 0, endofs); 2056454bc887SKa Ho Ng if (error) 2057454bc887SKa Ho Ng goto out; 2058454bc887SKa Ho Ng off += endofs; 2059454bc887SKa Ho Ng len -= endofs; 2060454bc887SKa Ho Ng } 2061454bc887SKa Ho Ng 2062454bc887SKa Ho Ng out: 2063454bc887SKa Ho Ng VM_OBJECT_WUNLOCK(shmfd->shm_object); 2064454bc887SKa Ho Ng *offset = off; 2065454bc887SKa Ho Ng *length = len; 2066454bc887SKa Ho Ng return (error); 2067454bc887SKa Ho Ng } 2068454bc887SKa Ho Ng 2069454bc887SKa Ho Ng static int 2070454bc887SKa Ho Ng shm_fspacectl(struct file *fp, int cmd, off_t *offset, off_t *length, int flags, 2071454bc887SKa Ho Ng struct ucred *active_cred, struct thread *td) 2072454bc887SKa Ho Ng { 2073454bc887SKa Ho Ng void *rl_cookie; 2074454bc887SKa Ho Ng struct shmfd *shmfd; 2075454bc887SKa Ho Ng off_t off, len; 2076454bc887SKa Ho Ng int error; 2077454bc887SKa Ho Ng 20788c9aa94bSKa Ho Ng KASSERT(cmd == SPACECTL_DEALLOC, ("shm_fspacectl: Invalid cmd")); 20798c9aa94bSKa Ho Ng KASSERT((flags & ~SPACECTL_F_SUPPORTED) == 0, 20808c9aa94bSKa Ho Ng ("shm_fspacectl: non-zero flags")); 20818c9aa94bSKa Ho Ng KASSERT(*offset >= 0 && *length > 0 && *length <= OFF_MAX - *offset, 20828c9aa94bSKa Ho Ng ("shm_fspacectl: offset/length overflow or underflow")); 2083454bc887SKa Ho Ng error = EINVAL; 2084454bc887SKa Ho Ng shmfd = fp->f_data; 2085454bc887SKa Ho Ng off = *offset; 2086454bc887SKa Ho Ng len = *length; 2087454bc887SKa Ho Ng 20886df6facfSKonstantin Belousov rl_cookie = shm_rangelock_wlock(shmfd, off, off + len); 2089454bc887SKa Ho Ng switch (cmd) { 2090454bc887SKa Ho Ng case SPACECTL_DEALLOC: 2091454bc887SKa Ho Ng if ((shmfd->shm_seals & F_SEAL_WRITE) != 0) { 2092454bc887SKa Ho Ng error = EPERM; 2093454bc887SKa Ho Ng break; 2094454bc887SKa Ho Ng } 2095454bc887SKa Ho Ng error = shm_deallocate(shmfd, &off, &len, flags); 2096454bc887SKa Ho Ng *offset = off; 2097454bc887SKa Ho Ng *length = len; 2098454bc887SKa Ho Ng break; 2099454bc887SKa Ho Ng default: 2100454bc887SKa Ho Ng __assert_unreachable(); 2101454bc887SKa Ho Ng } 21026df6facfSKonstantin Belousov shm_rangelock_unlock(shmfd, rl_cookie); 2103454bc887SKa Ho Ng return (error); 2104454bc887SKa Ho Ng } 2105454bc887SKa Ho Ng 2106454bc887SKa Ho Ng 2107454bc887SKa Ho Ng static int 2108f1040532SKyle Evans shm_fallocate(struct file *fp, off_t offset, off_t len, struct thread *td) 2109f1040532SKyle Evans { 2110f1040532SKyle Evans void *rl_cookie; 2111f1040532SKyle Evans struct shmfd *shmfd; 2112f1040532SKyle Evans size_t size; 2113f1040532SKyle Evans int error; 2114f1040532SKyle Evans 2115f1040532SKyle Evans /* This assumes that the caller already checked for overflow. */ 2116f1040532SKyle Evans error = 0; 2117f1040532SKyle Evans shmfd = fp->f_data; 2118f1040532SKyle Evans size = offset + len; 211939eae263SKyle Evans 212039eae263SKyle Evans /* 212139eae263SKyle Evans * Just grab the rangelock for the range that we may be attempting to 212239eae263SKyle Evans * grow, rather than blocking read/write for regions we won't be 212339eae263SKyle Evans * touching while this (potential) resize is in progress. Other 212439eae263SKyle Evans * attempts to resize the shmfd will have to take a write lock from 0 to 212539eae263SKyle Evans * OFF_MAX, so this being potentially beyond the current usable range of 212639eae263SKyle Evans * the shmfd is not necessarily a concern. If other mechanisms are 212739eae263SKyle Evans * added to grow a shmfd, this may need to be re-evaluated. 212839eae263SKyle Evans */ 21296df6facfSKonstantin Belousov rl_cookie = shm_rangelock_wlock(shmfd, offset, size); 2130d301b358SKonstantin Belousov if (size > shmfd->shm_size) 2131d301b358SKonstantin Belousov error = shm_dotruncate_cookie(shmfd, size, rl_cookie); 21326df6facfSKonstantin Belousov shm_rangelock_unlock(shmfd, rl_cookie); 2133f1040532SKyle Evans /* Translate to posix_fallocate(2) return value as needed. */ 2134f1040532SKyle Evans if (error == ENOMEM) 2135f1040532SKyle Evans error = ENOSPC; 2136f1040532SKyle Evans return (error); 2137f1040532SKyle Evans } 2138f1040532SKyle Evans 2139f1040532SKyle Evans static int 214056d0e33eSKonstantin Belousov sysctl_posix_shm_list(SYSCTL_HANDLER_ARGS) 214156d0e33eSKonstantin Belousov { 214256d0e33eSKonstantin Belousov struct shm_mapping *shmm; 214356d0e33eSKonstantin Belousov struct sbuf sb; 214456d0e33eSKonstantin Belousov struct kinfo_file kif; 214556d0e33eSKonstantin Belousov u_long i; 214656d0e33eSKonstantin Belousov int error, error2; 214756d0e33eSKonstantin Belousov 214856d0e33eSKonstantin Belousov sbuf_new_for_sysctl(&sb, NULL, sizeof(struct kinfo_file) * 5, req); 214956d0e33eSKonstantin Belousov sbuf_clear_flags(&sb, SBUF_INCLUDENUL); 215056d0e33eSKonstantin Belousov error = 0; 215156d0e33eSKonstantin Belousov sx_slock(&shm_dict_lock); 215256d0e33eSKonstantin Belousov for (i = 0; i < shm_hash + 1; i++) { 215356d0e33eSKonstantin Belousov LIST_FOREACH(shmm, &shm_dictionary[i], sm_link) { 215456d0e33eSKonstantin Belousov error = shm_fill_kinfo_locked(shmm->sm_shmfd, 215556d0e33eSKonstantin Belousov &kif, true); 2156747a4726SJamie Gritton if (error == EPERM) { 2157747a4726SJamie Gritton error = 0; 215856d0e33eSKonstantin Belousov continue; 2159747a4726SJamie Gritton } 216056d0e33eSKonstantin Belousov if (error != 0) 216156d0e33eSKonstantin Belousov break; 216256d0e33eSKonstantin Belousov pack_kinfo(&kif); 216356d0e33eSKonstantin Belousov error = sbuf_bcat(&sb, &kif, kif.kf_structsize) == 0 ? 216456d0e33eSKonstantin Belousov 0 : ENOMEM; 216556d0e33eSKonstantin Belousov if (error != 0) 216656d0e33eSKonstantin Belousov break; 216756d0e33eSKonstantin Belousov } 216856d0e33eSKonstantin Belousov } 216956d0e33eSKonstantin Belousov sx_sunlock(&shm_dict_lock); 217056d0e33eSKonstantin Belousov error2 = sbuf_finish(&sb); 217156d0e33eSKonstantin Belousov sbuf_delete(&sb); 217256d0e33eSKonstantin Belousov return (error != 0 ? error : error2); 217356d0e33eSKonstantin Belousov } 217456d0e33eSKonstantin Belousov 217556d0e33eSKonstantin Belousov SYSCTL_PROC(_kern_ipc, OID_AUTO, posix_shm_list, 2176d7c4ea7dSJamie Gritton CTLFLAG_RD | CTLFLAG_PRISON | CTLFLAG_MPSAFE | CTLTYPE_OPAQUE, 217756d0e33eSKonstantin Belousov NULL, 0, sysctl_posix_shm_list, "", 217856d0e33eSKonstantin Belousov "POSIX SHM list"); 217920f70576SKyle Evans 218020f70576SKyle Evans int 2181535b1df9SKyle Evans kern_shm_open(struct thread *td, const char *path, int flags, mode_t mode, 2182535b1df9SKyle Evans struct filecaps *caps) 218320f70576SKyle Evans { 218420f70576SKyle Evans 2185535b1df9SKyle Evans return (kern_shm_open2(td, path, flags, mode, 0, caps, NULL)); 218620f70576SKyle Evans } 218720f70576SKyle Evans 218820f70576SKyle Evans /* 218920f70576SKyle Evans * This version of the shm_open() interface leaves CLOEXEC behavior up to the 219020f70576SKyle Evans * caller, and libc will enforce it for the traditional shm_open() call. This 219120f70576SKyle Evans * allows other consumers, like memfd_create(), to opt-in for CLOEXEC. This 219220f70576SKyle Evans * interface also includes a 'name' argument that is currently unused, but could 219320f70576SKyle Evans * potentially be exported later via some interface for debugging purposes. 219420f70576SKyle Evans * From the kernel's perspective, it is optional. Individual consumers like 219520f70576SKyle Evans * memfd_create() may require it in order to be compatible with other systems 219620f70576SKyle Evans * implementing the same function. 219720f70576SKyle Evans */ 219820f70576SKyle Evans int 219920f70576SKyle Evans sys_shm_open2(struct thread *td, struct shm_open2_args *uap) 220020f70576SKyle Evans { 220120f70576SKyle Evans 220220f70576SKyle Evans return (kern_shm_open2(td, uap->path, uap->flags, uap->mode, 2203535b1df9SKyle Evans uap->shmflags, NULL, uap->name)); 220420f70576SKyle Evans } 2205