1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * Machine frame segment driver. This segment driver allows dom0 processes to 29 * map pages of other domains or Xen (e.g. during save/restore). ioctl()s on 30 * the privcmd driver provide the MFN values backing each mapping, and we map 31 * them into the process's address space at this time. Demand-faulting is not 32 * supported by this driver due to the requirements upon some of the ioctl()s. 33 */ 34 35 #pragma ident "%Z%%M% %I% %E% SMI" 36 37 #include <sys/types.h> 38 #include <sys/systm.h> 39 #include <sys/vmsystm.h> 40 #include <sys/mman.h> 41 #include <sys/errno.h> 42 #include <sys/kmem.h> 43 #include <sys/cmn_err.h> 44 #include <sys/vnode.h> 45 #include <sys/conf.h> 46 #include <sys/debug.h> 47 #include <sys/lgrp.h> 48 #include <sys/hypervisor.h> 49 50 #include <vm/page.h> 51 #include <vm/hat.h> 52 #include <vm/as.h> 53 #include <vm/seg.h> 54 55 #include <vm/hat_pte.h> 56 #include <vm/seg_mf.h> 57 58 #include <sys/fs/snode.h> 59 60 #define VTOCVP(vp) (VTOS(vp)->s_commonvp) 61 62 #define mfatob(n) ((n) * sizeof (mfn_t)) 63 64 struct segmf_data { 65 kmutex_t lock; 66 struct vnode *vp; 67 uchar_t prot; 68 uchar_t maxprot; 69 size_t softlockcnt; 70 domid_t domid; 71 mfn_t *mfns; 72 }; 73 74 static struct seg_ops segmf_ops; 75 76 static struct segmf_data * 77 segmf_data_zalloc(struct seg *seg) 78 { 79 struct segmf_data *data = kmem_zalloc(sizeof (*data), KM_SLEEP); 80 81 mutex_init(&data->lock, "segmf.lock", MUTEX_DEFAULT, NULL); 82 seg->s_ops = &segmf_ops; 83 seg->s_data = data; 84 return (data); 85 } 86 87 int 88 segmf_create(struct seg *seg, void *args) 89 { 90 struct segmf_crargs *a = args; 91 struct segmf_data *data; 92 struct as *as = seg->s_as; 93 pgcnt_t i, npages = seg_pages(seg); 94 int error; 95 96 hat_map(as->a_hat, seg->s_base, seg->s_size, HAT_MAP); 97 98 data = segmf_data_zalloc(seg); 99 data->vp = specfind(a->dev, VCHR); 100 data->prot = a->prot; 101 data->maxprot = a->maxprot; 102 103 data->mfns = kmem_alloc(mfatob(npages), KM_SLEEP); 104 for (i = 0; i < npages; i++) 105 data->mfns[i] = MFN_INVALID; 106 107 error = VOP_ADDMAP(VTOCVP(data->vp), 0, as, seg->s_base, seg->s_size, 108 data->prot, data->maxprot, MAP_SHARED, CRED(), NULL); 109 110 if (error != 0) 111 hat_unload(as->a_hat, 112 seg->s_base, seg->s_size, HAT_UNLOAD_UNMAP); 113 return (error); 114 } 115 116 /* 117 * Duplicate a seg and return new segment in newseg. 118 */ 119 static int 120 segmf_dup(struct seg *seg, struct seg *newseg) 121 { 122 struct segmf_data *data = seg->s_data; 123 struct segmf_data *ndata; 124 pgcnt_t npages = seg_pages(newseg); 125 126 ndata = segmf_data_zalloc(newseg); 127 128 VN_HOLD(data->vp); 129 ndata->vp = data->vp; 130 ndata->prot = data->prot; 131 ndata->maxprot = data->maxprot; 132 ndata->domid = data->domid; 133 134 ndata->mfns = kmem_alloc(mfatob(npages), KM_SLEEP); 135 bcopy(data->mfns, ndata->mfns, mfatob(npages)); 136 137 return (VOP_ADDMAP(VTOCVP(ndata->vp), 0, newseg->s_as, 138 newseg->s_base, newseg->s_size, ndata->prot, ndata->maxprot, 139 MAP_SHARED, CRED(), NULL)); 140 } 141 142 /* 143 * We only support unmapping the whole segment, and we automatically unlock 144 * what we previously soft-locked. 145 */ 146 static int 147 segmf_unmap(struct seg *seg, caddr_t addr, size_t len) 148 { 149 struct segmf_data *data = seg->s_data; 150 offset_t off; 151 152 if (addr < seg->s_base || addr + len > seg->s_base + seg->s_size || 153 (len & PAGEOFFSET) || ((uintptr_t)addr & PAGEOFFSET)) 154 panic("segmf_unmap"); 155 156 if (addr != seg->s_base || len != seg->s_size) 157 return (ENOTSUP); 158 159 hat_unload(seg->s_as->a_hat, addr, len, 160 HAT_UNLOAD_UNMAP | HAT_UNLOAD_UNLOCK); 161 162 off = (offset_t)seg_page(seg, addr); 163 164 ASSERT(data->vp != NULL); 165 166 (void) VOP_DELMAP(VTOCVP(data->vp), off, seg->s_as, addr, len, 167 data->prot, data->maxprot, MAP_SHARED, CRED(), NULL); 168 169 seg_free(seg); 170 return (0); 171 } 172 173 static void 174 segmf_free(struct seg *seg) 175 { 176 struct segmf_data *data = seg->s_data; 177 pgcnt_t npages = seg_pages(seg); 178 179 kmem_free(data->mfns, mfatob(npages)); 180 VN_RELE(data->vp); 181 mutex_destroy(&data->lock); 182 kmem_free(data, sizeof (*data)); 183 } 184 185 static int segmf_faultpage_debug = 0; 186 187 /*ARGSUSED*/ 188 static int 189 segmf_faultpage(struct hat *hat, struct seg *seg, caddr_t addr, 190 enum fault_type type, uint_t prot) 191 { 192 struct segmf_data *data = seg->s_data; 193 uint_t hat_flags = HAT_LOAD_NOCONSIST; 194 mfn_t mfn; 195 x86pte_t pte; 196 197 mfn = data->mfns[seg_page(seg, addr)]; 198 199 ASSERT(mfn != MFN_INVALID); 200 201 if (type == F_SOFTLOCK) { 202 mutex_enter(&freemem_lock); 203 data->softlockcnt++; 204 mutex_exit(&freemem_lock); 205 hat_flags |= HAT_LOAD_LOCK; 206 } else 207 hat_flags |= HAT_LOAD; 208 209 if (segmf_faultpage_debug > 0) { 210 uprintf("segmf_faultpage: addr %p domid %x mfn %lx prot %x\n", 211 (void *)addr, data->domid, mfn, prot); 212 segmf_faultpage_debug--; 213 } 214 215 /* 216 * Ask the HAT to load a throwaway mapping to page zero, then 217 * overwrite it with our foreign domain mapping. It gets removed 218 * later via hat_unload() 219 */ 220 hat_devload(hat, addr, MMU_PAGESIZE, (pfn_t)0, 221 PROT_READ | HAT_UNORDERED_OK, hat_flags); 222 223 pte = mmu_ptob((x86pte_t)mfn) | PT_VALID | PT_USER | PT_FOREIGN; 224 if (prot & PROT_WRITE) 225 pte |= PT_WRITABLE; 226 227 if (HYPERVISOR_update_va_mapping_otherdomain((uintptr_t)addr, pte, 228 UVMF_INVLPG | UVMF_ALL, data->domid) != 0) { 229 hat_flags = HAT_UNLOAD_UNMAP; 230 231 if (type == F_SOFTLOCK) { 232 hat_flags |= HAT_UNLOAD_UNLOCK; 233 mutex_enter(&freemem_lock); 234 data->softlockcnt--; 235 mutex_exit(&freemem_lock); 236 } 237 238 hat_unload(hat, addr, MMU_PAGESIZE, hat_flags); 239 return (FC_MAKE_ERR(EFAULT)); 240 } 241 242 return (0); 243 } 244 245 static int 246 seg_rw_to_prot(enum seg_rw rw) 247 { 248 switch (rw) { 249 case S_READ: 250 return (PROT_READ); 251 case S_WRITE: 252 return (PROT_WRITE); 253 case S_EXEC: 254 return (PROT_EXEC); 255 case S_OTHER: 256 default: 257 break; 258 } 259 return (PROT_READ | PROT_WRITE | PROT_EXEC); 260 } 261 262 static void 263 segmf_softunlock(struct hat *hat, struct seg *seg, caddr_t addr, size_t len) 264 { 265 struct segmf_data *data = seg->s_data; 266 267 hat_unlock(hat, addr, len); 268 269 mutex_enter(&freemem_lock); 270 ASSERT(data->softlockcnt >= btopr(len)); 271 data->softlockcnt -= btopr(len); 272 mutex_exit(&freemem_lock); 273 274 if (data->softlockcnt == 0) { 275 struct as *as = seg->s_as; 276 277 if (AS_ISUNMAPWAIT(as)) { 278 mutex_enter(&as->a_contents); 279 if (AS_ISUNMAPWAIT(as)) { 280 AS_CLRUNMAPWAIT(as); 281 cv_broadcast(&as->a_cv); 282 } 283 mutex_exit(&as->a_contents); 284 } 285 } 286 } 287 288 static int 289 segmf_fault_range(struct hat *hat, struct seg *seg, caddr_t addr, size_t len, 290 enum fault_type type, enum seg_rw rw) 291 { 292 struct segmf_data *data = seg->s_data; 293 int error = 0; 294 caddr_t a; 295 296 if ((data->prot & seg_rw_to_prot(rw)) == 0) 297 return (FC_PROT); 298 299 /* loop over the address range handling each fault */ 300 301 for (a = addr; a < addr + len; a += PAGESIZE) { 302 error = segmf_faultpage(hat, seg, a, type, data->prot); 303 if (error != 0) 304 break; 305 } 306 307 if (error != 0 && type == F_SOFTLOCK) { 308 size_t done = (size_t)(a - addr); 309 310 /* 311 * Undo what's been done so far. 312 */ 313 if (done > 0) 314 segmf_softunlock(hat, seg, addr, done); 315 } 316 317 return (error); 318 } 319 320 /* 321 * We never demand-fault for seg_mf. 322 */ 323 /*ARGSUSED*/ 324 static int 325 segmf_fault(struct hat *hat, struct seg *seg, caddr_t addr, size_t len, 326 enum fault_type type, enum seg_rw rw) 327 { 328 return (FC_MAKE_ERR(EFAULT)); 329 } 330 331 /*ARGSUSED*/ 332 static int 333 segmf_faulta(struct seg *seg, caddr_t addr) 334 { 335 return (0); 336 } 337 338 /*ARGSUSED*/ 339 static int 340 segmf_setprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot) 341 { 342 return (EINVAL); 343 } 344 345 /*ARGSUSED*/ 346 static int 347 segmf_checkprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot) 348 { 349 return (EINVAL); 350 } 351 352 /*ARGSUSED*/ 353 static int 354 segmf_kluster(struct seg *seg, caddr_t addr, ssize_t delta) 355 { 356 return (-1); 357 } 358 359 /*ARGSUSED*/ 360 static int 361 segmf_sync(struct seg *seg, caddr_t addr, size_t len, int attr, uint_t flags) 362 { 363 return (0); 364 } 365 366 /* 367 * XXPV Hmm. Should we say that mf mapping are "in core?" 368 */ 369 370 /*ARGSUSED*/ 371 static size_t 372 segmf_incore(struct seg *seg, caddr_t addr, size_t len, char *vec) 373 { 374 size_t v; 375 376 for (v = 0, len = (len + PAGEOFFSET) & PAGEMASK; len; 377 len -= PAGESIZE, v += PAGESIZE) 378 *vec++ = 1; 379 return (v); 380 } 381 382 /*ARGSUSED*/ 383 static int 384 segmf_lockop(struct seg *seg, caddr_t addr, 385 size_t len, int attr, int op, ulong_t *lockmap, size_t pos) 386 { 387 return (0); 388 } 389 390 static int 391 segmf_getprot(struct seg *seg, caddr_t addr, size_t len, uint_t *protv) 392 { 393 struct segmf_data *data = seg->s_data; 394 pgcnt_t pgno = seg_page(seg, addr + len) - seg_page(seg, addr) + 1; 395 396 if (pgno != 0) { 397 do 398 protv[--pgno] = data->prot; 399 while (pgno != 0) 400 ; 401 } 402 return (0); 403 } 404 405 static u_offset_t 406 segmf_getoffset(struct seg *seg, caddr_t addr) 407 { 408 return (addr - seg->s_base); 409 } 410 411 /*ARGSUSED*/ 412 static int 413 segmf_gettype(struct seg *seg, caddr_t addr) 414 { 415 return (MAP_SHARED); 416 } 417 418 /*ARGSUSED1*/ 419 static int 420 segmf_getvp(struct seg *seg, caddr_t addr, struct vnode **vpp) 421 { 422 struct segmf_data *data = seg->s_data; 423 424 *vpp = VTOCVP(data->vp); 425 return (0); 426 } 427 428 /*ARGSUSED*/ 429 static int 430 segmf_advise(struct seg *seg, caddr_t addr, size_t len, uint_t behav) 431 { 432 return (0); 433 } 434 435 /*ARGSUSED*/ 436 static void 437 segmf_dump(struct seg *seg) 438 {} 439 440 /*ARGSUSED*/ 441 static int 442 segmf_pagelock(struct seg *seg, caddr_t addr, size_t len, 443 struct page ***ppp, enum lock_type type, enum seg_rw rw) 444 { 445 return (ENOTSUP); 446 } 447 448 /*ARGSUSED*/ 449 static int 450 segmf_setpagesize(struct seg *seg, caddr_t addr, size_t len, uint_t szc) 451 { 452 return (ENOTSUP); 453 } 454 455 static int 456 segmf_getmemid(struct seg *seg, caddr_t addr, memid_t *memid) 457 { 458 struct segmf_data *data = seg->s_data; 459 460 memid->val[0] = (uintptr_t)VTOCVP(data->vp); 461 memid->val[1] = (uintptr_t)seg_page(seg, addr); 462 return (0); 463 } 464 465 /*ARGSUSED*/ 466 static lgrp_mem_policy_info_t * 467 segmf_getpolicy(struct seg *seg, caddr_t addr) 468 { 469 return (NULL); 470 } 471 472 /*ARGSUSED*/ 473 static int 474 segmf_capable(struct seg *seg, segcapability_t capability) 475 { 476 return (0); 477 } 478 479 /* 480 * Add a set of contiguous foreign MFNs to the segment. soft-locking them. The 481 * pre-faulting is necessary due to live migration; in particular we must 482 * return an error in response to IOCTL_PRIVCMD_MMAPBATCH rather than faulting 483 * later on a bad MFN. Whilst this isn't necessary for the other MMAP 484 * ioctl()s, we lock them too, as they should be transitory. 485 */ 486 int 487 segmf_add_mfns(struct seg *seg, caddr_t addr, mfn_t mfn, 488 pgcnt_t pgcnt, domid_t domid) 489 { 490 struct segmf_data *data = seg->s_data; 491 pgcnt_t base = seg_page(seg, addr); 492 faultcode_t fc; 493 pgcnt_t i; 494 int error = 0; 495 496 if (seg->s_ops != &segmf_ops) 497 return (EINVAL); 498 499 /* 500 * Don't mess with dom0. 501 * 502 * Only allow the domid to be set once for the segment. 503 * After that attempts to add mappings to this segment for 504 * other domains explicitly fails. 505 */ 506 507 if (domid == 0 || domid == DOMID_SELF) 508 return (EACCES); 509 510 mutex_enter(&data->lock); 511 512 if (data->domid == 0) 513 data->domid = domid; 514 515 if (data->domid != domid) { 516 error = EINVAL; 517 goto out; 518 } 519 520 base = seg_page(seg, addr); 521 522 for (i = 0; i < pgcnt; i++) 523 data->mfns[base + i] = mfn++; 524 525 fc = segmf_fault_range(seg->s_as->a_hat, seg, addr, 526 pgcnt * MMU_PAGESIZE, F_SOFTLOCK, S_OTHER); 527 528 if (fc != 0) { 529 error = fc_decode(fc); 530 for (i = 0; i < pgcnt; i++) 531 data->mfns[base + i] = MFN_INVALID; 532 } 533 534 out: 535 mutex_exit(&data->lock); 536 return (error); 537 } 538 539 static struct seg_ops segmf_ops = { 540 segmf_dup, 541 segmf_unmap, 542 segmf_free, 543 segmf_fault, 544 segmf_faulta, 545 segmf_setprot, 546 segmf_checkprot, 547 (int (*)())segmf_kluster, 548 (size_t (*)(struct seg *))NULL, /* swapout */ 549 segmf_sync, 550 segmf_incore, 551 segmf_lockop, 552 segmf_getprot, 553 segmf_getoffset, 554 segmf_gettype, 555 segmf_getvp, 556 segmf_advise, 557 segmf_dump, 558 segmf_pagelock, 559 segmf_setpagesize, 560 segmf_getmemid, 561 segmf_getpolicy, 562 segmf_capable 563 }; 564