1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 12 /* 13 * Copyright 2018 Joyent, Inc. 14 */ 15 16 /* 17 * VM - Kernel-to-user mapping segment 18 * 19 * The umap segment driver was primarily designed to facilitate the comm page: 20 * a portion of kernel memory shared with userspace so that certain (namely 21 * clock-related) actions could operate without making an expensive trip into 22 * the kernel. 23 * 24 * Since the initial requirements for the comm page are slim, advanced features 25 * of the segment driver such as per-page protection have been left 26 * unimplemented at this time. 27 */ 28 29 30 #include <sys/types.h> 31 #include <sys/param.h> 32 #include <sys/errno.h> 33 #include <sys/cred.h> 34 #include <sys/kmem.h> 35 #include <sys/lgrp.h> 36 #include <sys/mman.h> 37 38 #include <vm/hat.h> 39 #include <vm/as.h> 40 #include <vm/seg.h> 41 #include <vm/seg_kmem.h> 42 #include <vm/seg_umap.h> 43 44 45 static boolean_t segumap_verify_safe(caddr_t, size_t); 46 static int segumap_dup(struct seg *, struct seg *); 47 static int segumap_unmap(struct seg *, caddr_t, size_t); 48 static void segumap_free(struct seg *); 49 static faultcode_t segumap_fault(struct hat *, struct seg *, caddr_t, size_t, 50 enum fault_type, enum seg_rw); 51 static faultcode_t segumap_faulta(struct seg *, caddr_t); 52 static int segumap_setprot(struct seg *, caddr_t, size_t, uint_t); 53 static int segumap_checkprot(struct seg *, caddr_t, size_t, uint_t); 54 static int segumap_sync(struct seg *, caddr_t, size_t, int, uint_t); 55 static size_t segumap_incore(struct seg *, caddr_t, size_t, char *); 56 static int segumap_lockop(struct seg *, caddr_t, size_t, int, int, ulong_t *, 57 size_t); 58 static int segumap_getprot(struct seg *, caddr_t, size_t, uint_t *); 59 static u_offset_t segumap_getoffset(struct seg *, caddr_t); 60 static int segumap_gettype(struct seg *, caddr_t); 61 static int segumap_getvp(struct seg *, caddr_t, struct vnode **); 62 static int segumap_advise(struct seg *, caddr_t, size_t, uint_t); 63 static void segumap_dump(struct seg *); 64 static int segumap_pagelock(struct seg *, caddr_t, size_t, struct page ***, 65 enum lock_type, enum seg_rw); 66 static int segumap_setpagesize(struct seg *, caddr_t, size_t, uint_t); 67 static int segumap_getmemid(struct seg *, caddr_t, memid_t *); 68 static int segumap_capable(struct seg *, segcapability_t); 69 70 static struct seg_ops segumap_ops = { 71 segumap_dup, 72 segumap_unmap, 73 segumap_free, 74 segumap_fault, 75 segumap_faulta, 76 segumap_setprot, 77 segumap_checkprot, 78 NULL, /* kluster: disabled */ 79 NULL, /* swapout: disabled */ 80 segumap_sync, 81 segumap_incore, 82 segumap_lockop, 83 segumap_getprot, 84 segumap_getoffset, 85 segumap_gettype, 86 segumap_getvp, 87 segumap_advise, 88 segumap_dump, 89 segumap_pagelock, 90 segumap_setpagesize, 91 segumap_getmemid, 92 NULL, /* getpolicy: disabled */ 93 segumap_capable, 94 seg_inherit_notsup 95 }; 96 97 98 /* 99 * Create a kernel/user-mapped segment. 100 */ 101 int 102 segumap_create(struct seg **segpp, void *argsp) 103 { 104 struct seg *seg = *segpp; 105 segumap_crargs_t *a = (struct segumap_crargs *)argsp; 106 segumap_data_t *data; 107 108 ASSERT((uintptr_t)a->kaddr > _userlimit); 109 110 /* 111 * Check several aspects of the mapping request to ensure validity: 112 * - kernel pages must reside entirely in kernel space 113 * - target protection must be user-accessible 114 * - kernel address must be page-aligned 115 * - kernel address must reside inside a "safe" segment 116 */ 117 if ((uintptr_t)a->kaddr <= _userlimit || 118 ((uintptr_t)a->kaddr + seg->s_size) < (uintptr_t)a->kaddr || 119 (a->prot & PROT_USER) == 0 || 120 ((uintptr_t)a->kaddr & PAGEOFFSET) != 0 || 121 !segumap_verify_safe(a->kaddr, seg->s_size)) { 122 return (EINVAL); 123 } 124 125 data = kmem_zalloc(sizeof (*data), KM_SLEEP); 126 rw_init(&data->sud_lock, NULL, RW_DEFAULT, NULL); 127 data->sud_kaddr = a->kaddr; 128 data->sud_prot = a->prot; 129 130 seg->s_ops = &segumap_ops; 131 seg->s_data = data; 132 return (0); 133 } 134 135 static boolean_t 136 segumap_verify_safe(caddr_t kaddr, size_t len) 137 { 138 struct seg *seg; 139 140 /* 141 * Presently, only pages which are backed by segkmem are allowed to be 142 * shared with userspace. This prevents nasty paging behavior with 143 * other drivers such as seg_kp. Furthermore, the backing kernel 144 * segment must completely contain the region to be mapped. 145 * 146 * Failing these checks is fatal for now since such mappings are done 147 * in a very limited context from the kernel. 148 */ 149 AS_LOCK_ENTER(&kas, RW_READER); 150 seg = as_segat(&kas, kaddr); 151 VERIFY(seg != NULL); 152 VERIFY(seg->s_base + seg->s_size >= kaddr + len); 153 VERIFY(seg->s_ops == &segkmem_ops); 154 AS_LOCK_EXIT(&kas); 155 156 return (B_TRUE); 157 } 158 159 static int 160 segumap_dup(struct seg *seg, struct seg *newseg) 161 { 162 segumap_data_t *sud = (segumap_data_t *)seg->s_data; 163 segumap_data_t *newsud; 164 165 ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as)); 166 167 newsud = kmem_zalloc(sizeof (segumap_data_t), KM_SLEEP); 168 rw_init(&newsud->sud_lock, NULL, RW_DEFAULT, NULL); 169 newsud->sud_kaddr = sud->sud_kaddr; 170 newsud->sud_prot = sud->sud_prot; 171 172 newseg->s_ops = seg->s_ops; 173 newseg->s_data = newsud; 174 return (0); 175 } 176 177 static int 178 segumap_unmap(struct seg *seg, caddr_t addr, size_t len) 179 { 180 segumap_data_t *sud = (segumap_data_t *)seg->s_data; 181 182 ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as)); 183 184 /* Only allow unmap of entire segment */ 185 if (addr != seg->s_base || len != seg->s_size) { 186 return (EINVAL); 187 } 188 if (sud->sud_softlockcnt != 0) { 189 return (EAGAIN); 190 } 191 192 /* 193 * Unconditionally unload the entire segment range. 194 */ 195 hat_unload(seg->s_as->a_hat, addr, len, HAT_UNLOAD_UNMAP); 196 197 seg_free(seg); 198 return (0); 199 } 200 201 static void 202 segumap_free(struct seg *seg) 203 { 204 segumap_data_t *data = (segumap_data_t *)seg->s_data; 205 206 ASSERT(data != NULL); 207 208 rw_destroy(&data->sud_lock); 209 VERIFY(data->sud_softlockcnt == 0); 210 kmem_free(data, sizeof (*data)); 211 seg->s_data = NULL; 212 } 213 214 /* ARGSUSED */ 215 static faultcode_t 216 segumap_fault(struct hat *hat, struct seg *seg, caddr_t addr, size_t len, 217 enum fault_type type, enum seg_rw tw) 218 { 219 segumap_data_t *sud = (segumap_data_t *)seg->s_data; 220 221 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as)); 222 223 if (type == F_PROT) { 224 /* 225 * Since protection on the segment is fixed, there is nothing 226 * to do but report an error for protection faults. 227 */ 228 return (FC_PROT); 229 } else if (type == F_SOFTUNLOCK) { 230 size_t plen = btop(len); 231 232 rw_enter(&sud->sud_lock, RW_WRITER); 233 VERIFY(sud->sud_softlockcnt >= plen); 234 sud->sud_softlockcnt -= plen; 235 rw_exit(&sud->sud_lock); 236 return (0); 237 } 238 239 ASSERT(type == F_INVAL || type == F_SOFTLOCK); 240 rw_enter(&sud->sud_lock, RW_WRITER); 241 242 if (type == F_INVAL || 243 (type == F_SOFTLOCK && sud->sud_softlockcnt == 0)) { 244 /* 245 * Load the (entire) segment into the HAT. 246 * 247 * It's possible that threads racing into as_fault will cause 248 * seg_umap to load the same range multiple times in quick 249 * succession. Redundant hat_devload operations are safe. 250 */ 251 for (uintptr_t i = 0; i < seg->s_size; i += PAGESIZE) { 252 pfn_t pfn; 253 254 pfn = hat_getpfnum(kas.a_hat, sud->sud_kaddr + i); 255 VERIFY(pfn != PFN_INVALID); 256 hat_devload(seg->s_as->a_hat, seg->s_base + i, 257 PAGESIZE, pfn, sud->sud_prot, HAT_LOAD); 258 } 259 } 260 if (type == F_SOFTLOCK) { 261 size_t nval = sud->sud_softlockcnt + btop(len); 262 263 if (sud->sud_softlockcnt >= nval) { 264 rw_exit(&sud->sud_lock); 265 return (FC_MAKE_ERR(EOVERFLOW)); 266 } 267 sud->sud_softlockcnt = nval; 268 } 269 270 rw_exit(&sud->sud_lock); 271 return (0); 272 } 273 274 /* ARGSUSED */ 275 static faultcode_t 276 segumap_faulta(struct seg *seg, caddr_t addr) 277 { 278 /* Do nothing since asynch pagefault should not load translation. */ 279 return (0); 280 } 281 282 /* ARGSUSED */ 283 static int 284 segumap_setprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot) 285 { 286 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as)); 287 288 /* 289 * The seg_umap driver does not yet allow protection to be changed. 290 */ 291 return (EACCES); 292 } 293 294 /* ARGSUSED */ 295 static int 296 segumap_checkprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot) 297 { 298 segumap_data_t *sud = (segumap_data_t *)seg->s_data; 299 int error = 0; 300 301 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as)); 302 303 rw_enter(&sud->sud_lock, RW_READER); 304 if ((sud->sud_prot & prot) != prot) { 305 error = EACCES; 306 } 307 rw_exit(&sud->sud_lock); 308 return (error); 309 } 310 311 /* ARGSUSED */ 312 static int 313 segumap_sync(struct seg *seg, caddr_t addr, size_t len, int attr, uint_t flags) 314 { 315 /* Always succeed since there are no backing store to sync */ 316 return (0); 317 } 318 319 /* ARGSUSED */ 320 static size_t 321 segumap_incore(struct seg *seg, caddr_t addr, size_t len, char *vec) 322 { 323 size_t sz = 0; 324 325 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as)); 326 327 len = (len + PAGEOFFSET) & PAGEMASK; 328 while (len > 0) { 329 *vec = 1; 330 sz += PAGESIZE; 331 vec++; 332 len -= PAGESIZE; 333 } 334 return (sz); 335 } 336 337 /* ARGSUSED */ 338 static int 339 segumap_lockop(struct seg *seg, caddr_t addr, size_t len, int attr, int op, 340 ulong_t *lockmap, size_t pos) 341 { 342 /* Report success since kernel pages are always in memory. */ 343 return (0); 344 } 345 346 static int 347 segumap_getprot(struct seg *seg, caddr_t addr, size_t len, uint_t *protv) 348 { 349 segumap_data_t *sud = (segumap_data_t *)seg->s_data; 350 size_t pgno; 351 uint_t prot; 352 353 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as)); 354 355 rw_enter(&sud->sud_lock, RW_READER); 356 prot = sud->sud_prot; 357 rw_exit(&sud->sud_lock); 358 359 /* 360 * Reporting protection is simple since it is not tracked per-page. 361 */ 362 pgno = seg_page(seg, addr + len) - seg_page(seg, addr) + 1; 363 while (pgno > 0) { 364 protv[--pgno] = prot; 365 } 366 return (0); 367 } 368 369 /* ARGSUSED */ 370 static u_offset_t 371 segumap_getoffset(struct seg *seg, caddr_t addr) 372 { 373 /* 374 * To avoid leaking information about the layout of the kernel address 375 * space, always report '0' as the offset. 376 */ 377 return (0); 378 } 379 380 /* ARGSUSED */ 381 static int 382 segumap_gettype(struct seg *seg, caddr_t addr) 383 { 384 /* 385 * Since already-existing kernel pages are being mapped into userspace, 386 * always report the segment type as shared. 387 */ 388 return (MAP_SHARED); 389 } 390 391 /* ARGSUSED */ 392 static int 393 segumap_getvp(struct seg *seg, caddr_t addr, struct vnode **vpp) 394 { 395 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as)); 396 397 *vpp = NULL; 398 return (0); 399 } 400 401 /* ARGSUSED */ 402 static int 403 segumap_advise(struct seg *seg, caddr_t addr, size_t len, uint_t behav) 404 { 405 if (behav == MADV_PURGE) { 406 /* Purge does not make sense for this mapping */ 407 return (EINVAL); 408 } 409 /* Indicate success for everything else. */ 410 return (0); 411 } 412 413 /* ARGSUSED */ 414 static void 415 segumap_dump(struct seg *seg) 416 { 417 /* 418 * Since this is a mapping to share kernel data with userspace, nothing 419 * additional should be dumped. 420 */ 421 } 422 423 /* ARGSUSED */ 424 static int 425 segumap_pagelock(struct seg *seg, caddr_t addr, size_t len, struct page ***ppp, 426 enum lock_type type, enum seg_rw rw) 427 { 428 return (ENOTSUP); 429 } 430 431 /* ARGSUSED */ 432 static int 433 segumap_setpagesize(struct seg *seg, caddr_t addr, size_t len, uint_t szc) 434 { 435 return (ENOTSUP); 436 } 437 438 static int 439 segumap_getmemid(struct seg *seg, caddr_t addr, memid_t *memidp) 440 { 441 segumap_data_t *sud = (segumap_data_t *)seg->s_data; 442 443 memidp->val[0] = (uintptr_t)sud->sud_kaddr; 444 memidp->val[1] = (uintptr_t)(addr - seg->s_base); 445 return (0); 446 } 447 448 /* ARGSUSED */ 449 static int 450 segumap_capable(struct seg *seg, segcapability_t capability) 451 { 452 /* no special capablities */ 453 return (0); 454 } 455