1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 12 /* 13 * Copyright 2018 Joyent, Inc. 14 * Copyright 2021 Oxide Computer Company 15 */ 16 17 /* 18 * segvmm - Virtual-Machine-Memory segment 19 * 20 * The vmm segment driver was designed for mapping regions of kernel memory 21 * allocated to an HVM instance into userspace for manipulation there. It 22 * draws direct lineage from the umap segment driver, but meant for larger 23 * mappings with fewer restrictions. 24 * 25 * seg*k*vmm, in contrast, has mappings for every VMM into kas. We use its 26 * mappings here only to find the relevant PFNs in segvmm_fault_in(). 27 */ 28 29 30 #include <sys/types.h> 31 #include <sys/param.h> 32 #include <sys/errno.h> 33 #include <sys/cred.h> 34 #include <sys/kmem.h> 35 #include <sys/lgrp.h> 36 #include <sys/mman.h> 37 38 #include <vm/hat.h> 39 #include <vm/hat_pte.h> 40 #include <vm/htable.h> 41 #include <vm/as.h> 42 #include <vm/seg.h> 43 #include <vm/seg_kmem.h> 44 45 #include <sys/seg_vmm.h> 46 47 typedef struct segvmm_data { 48 krwlock_t svmd_lock; 49 vm_object_t *svmd_vmo; 50 vm_client_t *svmd_vmc; 51 uintptr_t svmd_off; 52 uchar_t svmd_prot; 53 size_t svmd_softlockcnt; 54 } segvmm_data_t; 55 56 57 static int segvmm_dup(struct seg *, struct seg *); 58 static int segvmm_unmap(struct seg *, caddr_t, size_t); 59 static void segvmm_free(struct seg *); 60 static faultcode_t segvmm_fault(struct hat *, struct seg *, caddr_t, size_t, 61 enum fault_type, enum seg_rw); 62 static faultcode_t segvmm_faulta(struct seg *, caddr_t); 63 static int segvmm_setprot(struct seg *, caddr_t, size_t, uint_t); 64 static int segvmm_checkprot(struct seg *, caddr_t, size_t, uint_t); 65 static int segvmm_sync(struct seg *, caddr_t, size_t, int, uint_t); 66 static size_t segvmm_incore(struct seg *, caddr_t, size_t, char *); 67 static int segvmm_lockop(struct seg *, caddr_t, size_t, int, int, ulong_t *, 68 size_t); 69 static int segvmm_getprot(struct seg *, caddr_t, size_t, uint_t *); 70 static u_offset_t segvmm_getoffset(struct seg *, caddr_t); 71 static int segvmm_gettype(struct seg *, caddr_t); 72 static int segvmm_getvp(struct seg *, caddr_t, struct vnode **); 73 static int segvmm_advise(struct seg *, caddr_t, size_t, uint_t); 74 static void segvmm_dump(struct seg *); 75 static int segvmm_pagelock(struct seg *, caddr_t, size_t, struct page ***, 76 enum lock_type, enum seg_rw); 77 static int segvmm_setpagesize(struct seg *, caddr_t, size_t, uint_t); 78 static int segvmm_getmemid(struct seg *, caddr_t, memid_t *); 79 static int segvmm_capable(struct seg *, segcapability_t); 80 81 static struct seg_ops segvmm_ops = { 82 .dup = segvmm_dup, 83 .unmap = segvmm_unmap, 84 .free = segvmm_free, 85 .fault = segvmm_fault, 86 .faulta = segvmm_faulta, 87 .setprot = segvmm_setprot, 88 .checkprot = segvmm_checkprot, 89 .kluster = NULL, 90 .swapout = NULL, 91 .sync = segvmm_sync, 92 .incore = segvmm_incore, 93 .lockop = segvmm_lockop, 94 .getprot = segvmm_getprot, 95 .getoffset = segvmm_getoffset, 96 .gettype = segvmm_gettype, 97 .getvp = segvmm_getvp, 98 .advise = segvmm_advise, 99 .dump = segvmm_dump, 100 .pagelock = segvmm_pagelock, 101 .setpagesize = segvmm_setpagesize, 102 .getmemid = segvmm_getmemid, 103 .getpolicy = NULL, 104 .capable = segvmm_capable, 105 .inherit = seg_inherit_notsup 106 }; 107 108 /* 109 * Unload a region from the HAT for A/D tracking. 110 */ 111 static void 112 segvmm_invalidate(void *arg, uintptr_t gpa, size_t sz) 113 { 114 struct seg *seg = arg; 115 segvmm_data_t *svmd = seg->s_data; 116 117 /* 118 * Invalidations are only necessary (and configured) for vmspace 119 * mappings. Direct vm_object mappings are not involved. 120 */ 121 ASSERT3P(svmd->svmd_vmo, ==, NULL); 122 123 /* 124 * The region being invalidated may overlap with all, some, or none of 125 * this segment. We are only concerned about that overlap. 126 */ 127 const uintptr_t start = MAX(gpa, svmd->svmd_off); 128 const uintptr_t end = MIN(gpa + sz, svmd->svmd_off + seg->s_size); 129 if (start >= end) { 130 return; 131 } 132 ASSERT(start >= svmd->svmd_off && end <= svmd->svmd_off + seg->s_size); 133 ASSERT(start >= gpa && end <= gpa + sz); 134 const caddr_t unload_va = seg->s_base + (start - svmd->svmd_off); 135 const size_t unload_sz = (end - start); 136 ASSERT3U(unload_sz, <=, seg->s_size); 137 138 hat_unload(seg->s_as->a_hat, unload_va, unload_sz, HAT_UNLOAD); 139 } 140 141 /* 142 * Create a VMM-memory-backed segment. 143 */ 144 int 145 segvmm_create(struct seg **segpp, void *argsp) 146 { 147 struct seg *seg = *segpp; 148 segvmm_crargs_t *cra = argsp; 149 segvmm_data_t *data; 150 151 VERIFY((cra->vmo == NULL && cra->vmc != NULL) || 152 (cra->vmo != NULL && cra->vmc == NULL)); 153 VERIFY(cra->prot & PROT_USER); 154 VERIFY0(cra->offset & PAGEOFFSET); 155 156 data = kmem_zalloc(sizeof (*data), KM_SLEEP); 157 rw_init(&data->svmd_lock, NULL, RW_DEFAULT, NULL); 158 data->svmd_off = cra->offset; 159 data->svmd_prot = cra->prot & ~PROT_USER; 160 161 seg->s_ops = &segvmm_ops; 162 seg->s_data = data; 163 164 if (cra->vmo != NULL) { 165 data->svmd_vmo = cra->vmo; 166 /* Grab a hold on the VM object for the lifetime of segment */ 167 vm_object_reference(data->svmd_vmo); 168 } else { 169 int err; 170 171 data->svmd_vmc = cra->vmc; 172 err = vmc_set_inval_cb(data->svmd_vmc, segvmm_invalidate, seg); 173 if (err != 0) { 174 seg->s_ops = NULL; 175 seg->s_data = NULL; 176 kmem_free(data, sizeof (*data)); 177 return (err); 178 } 179 } 180 return (0); 181 } 182 183 static int 184 segvmm_dup(struct seg *seg, struct seg *newseg) 185 { 186 segvmm_data_t *svmd = seg->s_data; 187 segvmm_data_t *newsvmd; 188 189 ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as)); 190 191 newsvmd = kmem_zalloc(sizeof (segvmm_data_t), KM_SLEEP); 192 rw_init(&newsvmd->svmd_lock, NULL, RW_DEFAULT, NULL); 193 newsvmd->svmd_off = svmd->svmd_off; 194 newsvmd->svmd_prot = svmd->svmd_prot; 195 196 newseg->s_ops = seg->s_ops; 197 newseg->s_data = newsvmd; 198 199 if (svmd->svmd_vmo != NULL) { 200 /* Grab another hold for the duplicate segment */ 201 vm_object_reference(svmd->svmd_vmo); 202 newsvmd->svmd_vmo = svmd->svmd_vmo; 203 } else { 204 int err; 205 206 newsvmd->svmd_vmc = vmc_clone(svmd->svmd_vmc); 207 /* 208 * The cloned client does not inherit the invalidation 209 * configuration, so attempt to set it here for the new segment. 210 */ 211 err = vmc_set_inval_cb(newsvmd->svmd_vmc, segvmm_invalidate, 212 newseg); 213 if (err != 0) { 214 newseg->s_ops = NULL; 215 newseg->s_data = NULL; 216 kmem_free(newsvmd, sizeof (*newsvmd)); 217 return (err); 218 } 219 } 220 221 return (0); 222 } 223 224 static int 225 segvmm_unmap(struct seg *seg, caddr_t addr, size_t len) 226 { 227 segvmm_data_t *svmd = seg->s_data; 228 229 ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as)); 230 231 /* Only allow unmap of entire segment */ 232 if (addr != seg->s_base || len != seg->s_size) { 233 return (EINVAL); 234 } 235 if (svmd->svmd_softlockcnt != 0) { 236 return (EAGAIN); 237 } 238 239 /* Unconditionally unload the entire segment range. */ 240 hat_unload(seg->s_as->a_hat, addr, len, HAT_UNLOAD_UNMAP); 241 242 seg_free(seg); 243 return (0); 244 } 245 246 static void 247 segvmm_free(struct seg *seg) 248 { 249 segvmm_data_t *svmd = seg->s_data; 250 251 ASSERT(svmd != NULL); 252 253 if (svmd->svmd_vmo != NULL) { 254 /* Release the VM object hold this segment possessed */ 255 vm_object_release(svmd->svmd_vmo); 256 svmd->svmd_vmo = NULL; 257 } else { 258 vmc_destroy(svmd->svmd_vmc); 259 svmd->svmd_vmc = NULL; 260 } 261 rw_destroy(&svmd->svmd_lock); 262 VERIFY(svmd->svmd_softlockcnt == 0); 263 kmem_free(svmd, sizeof (*svmd)); 264 seg->s_data = NULL; 265 } 266 267 static int 268 segvmm_fault_obj(struct hat *hat, struct seg *seg, uintptr_t va, size_t len) 269 { 270 segvmm_data_t *svmd = seg->s_data; 271 const uintptr_t end = va + len; 272 const int prot = svmd->svmd_prot; 273 const int uprot = prot | PROT_USER; 274 vm_object_t *vmo = svmd->svmd_vmo; 275 276 ASSERT(vmo != NULL); 277 278 va &= PAGEMASK; 279 uintptr_t off = va - (uintptr_t)seg->s_base + svmd->svmd_off; 280 do { 281 pfn_t pfn; 282 283 pfn = vm_object_pfn(vmo, off); 284 if (pfn == PFN_INVALID) { 285 return (FC_NOMAP); 286 } 287 288 /* Ignore any large-page possibilities for now */ 289 hat_devload(hat, (caddr_t)va, PAGESIZE, pfn, uprot, HAT_LOAD); 290 va += PAGESIZE; 291 off += PAGESIZE; 292 } while (va < end); 293 294 return (0); 295 } 296 297 static int 298 segvmm_fault_space(struct hat *hat, struct seg *seg, uintptr_t va, size_t len) 299 { 300 segvmm_data_t *svmd = seg->s_data; 301 const uintptr_t end = va + len; 302 const int prot = svmd->svmd_prot; 303 const int uprot = prot | PROT_USER; 304 vm_client_t *vmc = svmd->svmd_vmc; 305 306 ASSERT(vmc != NULL); 307 308 va &= PAGEMASK; 309 uintptr_t off = va - (uintptr_t)seg->s_base + svmd->svmd_off; 310 311 do { 312 vm_page_t *vmp; 313 pfn_t pfn; 314 315 vmp = vmc_hold(vmc, off, prot); 316 if (vmp == NULL) { 317 return (FC_NOMAP); 318 } 319 320 pfn = vmp_get_pfn(vmp); 321 ASSERT3U(pfn, !=, PFN_INVALID); 322 323 /* Ignore any large-page possibilities for now */ 324 hat_devload(hat, (caddr_t)va, PAGESIZE, pfn, uprot, HAT_LOAD); 325 326 if (vmp_release(vmp)) { 327 /* 328 * Region was unmapped from vmspace while we were 329 * loading it into this AS. Communicate it as if it 330 * were a fault. 331 */ 332 hat_unload(hat, (caddr_t)va, PAGESIZE, HAT_UNLOAD); 333 return (FC_NOMAP); 334 } 335 336 va += PAGESIZE; 337 off += PAGESIZE; 338 } while (va < end); 339 340 return (0); 341 } 342 343 /* ARGSUSED */ 344 static faultcode_t 345 segvmm_fault(struct hat *hat, struct seg *seg, caddr_t addr, size_t len, 346 enum fault_type type, enum seg_rw rw) 347 { 348 segvmm_data_t *svmd = seg->s_data; 349 int err = 0; 350 351 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as)); 352 353 if (type == F_PROT) { 354 /* 355 * Since protection on the segment is fixed, there is nothing 356 * to do but report an error for protection faults. 357 */ 358 return (FC_PROT); 359 } else if (type == F_SOFTUNLOCK) { 360 size_t plen = btop(len); 361 362 rw_enter(&svmd->svmd_lock, RW_WRITER); 363 VERIFY(svmd->svmd_softlockcnt >= plen); 364 svmd->svmd_softlockcnt -= plen; 365 rw_exit(&svmd->svmd_lock); 366 return (0); 367 } 368 369 VERIFY(type == F_INVAL || type == F_SOFTLOCK); 370 rw_enter(&svmd->svmd_lock, RW_WRITER); 371 372 if (svmd->svmd_vmo != NULL) { 373 err = segvmm_fault_obj(hat, seg, (uintptr_t)addr, len); 374 } else { 375 err = segvmm_fault_space(hat, seg, (uintptr_t)addr, len); 376 } 377 if (type == F_SOFTLOCK && err == 0) { 378 size_t nval = svmd->svmd_softlockcnt + btop(len); 379 380 if (svmd->svmd_softlockcnt >= nval) { 381 rw_exit(&svmd->svmd_lock); 382 return (FC_MAKE_ERR(EOVERFLOW)); 383 } 384 svmd->svmd_softlockcnt = nval; 385 } 386 387 rw_exit(&svmd->svmd_lock); 388 return (err); 389 } 390 391 /* ARGSUSED */ 392 static faultcode_t 393 segvmm_faulta(struct seg *seg, caddr_t addr) 394 { 395 /* Do nothing since asynch pagefault should not load translation. */ 396 return (0); 397 } 398 399 /* ARGSUSED */ 400 static int 401 segvmm_setprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot) 402 { 403 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as)); 404 405 /* The seg_vmm driver does not yet allow protection to be changed. */ 406 return (EACCES); 407 } 408 409 /* ARGSUSED */ 410 static int 411 segvmm_checkprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot) 412 { 413 segvmm_data_t *svmd = seg->s_data; 414 int error = 0; 415 416 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as)); 417 418 rw_enter(&svmd->svmd_lock, RW_READER); 419 if ((svmd->svmd_prot & prot) != prot) { 420 error = EACCES; 421 } 422 rw_exit(&svmd->svmd_lock); 423 return (error); 424 } 425 426 /* ARGSUSED */ 427 static int 428 segvmm_sync(struct seg *seg, caddr_t addr, size_t len, int attr, uint_t flags) 429 { 430 /* Always succeed since there are no backing store to sync */ 431 return (0); 432 } 433 434 /* ARGSUSED */ 435 static size_t 436 segvmm_incore(struct seg *seg, caddr_t addr, size_t len, char *vec) 437 { 438 size_t sz = 0; 439 440 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as)); 441 442 len = (len + PAGEOFFSET) & PAGEMASK; 443 while (len > 0) { 444 *vec = 1; 445 sz += PAGESIZE; 446 vec++; 447 len -= PAGESIZE; 448 } 449 return (sz); 450 } 451 452 /* ARGSUSED */ 453 static int 454 segvmm_lockop(struct seg *seg, caddr_t addr, size_t len, int attr, int op, 455 ulong_t *lockmap, size_t pos) 456 { 457 /* Report success since kernel pages are always in memory. */ 458 return (0); 459 } 460 461 static int 462 segvmm_getprot(struct seg *seg, caddr_t addr, size_t len, uint_t *protv) 463 { 464 segvmm_data_t *svmd = seg->s_data; 465 size_t pgno; 466 uint_t prot; 467 468 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as)); 469 470 rw_enter(&svmd->svmd_lock, RW_READER); 471 prot = svmd->svmd_prot; 472 rw_exit(&svmd->svmd_lock); 473 474 /* 475 * Reporting protection is simple since it is not tracked per-page. 476 */ 477 pgno = seg_page(seg, addr + len) - seg_page(seg, addr) + 1; 478 while (pgno > 0) { 479 protv[--pgno] = prot; 480 } 481 return (0); 482 } 483 484 /* ARGSUSED */ 485 static u_offset_t 486 segvmm_getoffset(struct seg *seg, caddr_t addr) 487 { 488 /* 489 * To avoid leaking information about the layout of the kernel address 490 * space, always report '0' as the offset. 491 */ 492 return (0); 493 } 494 495 /* ARGSUSED */ 496 static int 497 segvmm_gettype(struct seg *seg, caddr_t addr) 498 { 499 /* 500 * Since already-existing vmm reservoir pages are being mapped into 501 * userspace, always report the segment type as shared. 502 */ 503 return (MAP_SHARED); 504 } 505 506 /* ARGSUSED */ 507 static int 508 segvmm_getvp(struct seg *seg, caddr_t addr, struct vnode **vpp) 509 { 510 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as)); 511 512 *vpp = NULL; 513 return (0); 514 } 515 516 /* ARGSUSED */ 517 static int 518 segvmm_advise(struct seg *seg, caddr_t addr, size_t len, uint_t behav) 519 { 520 if (behav == MADV_PURGE) { 521 /* Purge does not make sense for this mapping */ 522 return (EINVAL); 523 } 524 /* Indicate success for everything else. */ 525 return (0); 526 } 527 528 /* ARGSUSED */ 529 static void 530 segvmm_dump(struct seg *seg) 531 { 532 /* 533 * Since this is a mapping to share kernel data with userspace, nothing 534 * additional should be dumped. 535 */ 536 } 537 538 /* ARGSUSED */ 539 static int 540 segvmm_pagelock(struct seg *seg, caddr_t addr, size_t len, struct page ***ppp, 541 enum lock_type type, enum seg_rw rw) 542 { 543 return (ENOTSUP); 544 } 545 546 /* ARGSUSED */ 547 static int 548 segvmm_setpagesize(struct seg *seg, caddr_t addr, size_t len, uint_t szc) 549 { 550 return (ENOTSUP); 551 } 552 553 static int 554 segvmm_getmemid(struct seg *seg, caddr_t addr, memid_t *memidp) 555 { 556 segvmm_data_t *svmd = seg->s_data; 557 558 memidp->val[0] = (uintptr_t)svmd->svmd_vmo; 559 memidp->val[1] = (uintptr_t)(addr - seg->s_base) + svmd->svmd_off; 560 return (0); 561 } 562 563 /* ARGSUSED */ 564 static int 565 segvmm_capable(struct seg *seg, segcapability_t capability) 566 { 567 /* no special capablities */ 568 return (0); 569 } 570