1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1990 University of Utah. 5 * Copyright (c) 1991, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * This code is derived from software contributed to Berkeley by 9 * the Systems Programming Group of the University of Utah Computer 10 * Science Department. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 */ 36 37 #include <sys/param.h> 38 #include <sys/systm.h> 39 #include <sys/conf.h> 40 #include <sys/lock.h> 41 #include <sys/proc.h> 42 #include <sys/mutex.h> 43 #include <sys/mman.h> 44 #include <sys/rwlock.h> 45 #include <sys/sx.h> 46 #include <sys/user.h> 47 #include <sys/vmmeter.h> 48 49 #include <vm/vm.h> 50 #include <vm/vm_param.h> 51 #include <vm/vm_object.h> 52 #include <vm/vm_page.h> 53 #include <vm/vm_pager.h> 54 #include <vm/vm_radix.h> 55 #include <vm/vm_phys.h> 56 #include <vm/vm_radix.h> 57 #include <vm/uma.h> 58 59 static void dev_pager_init(void); 60 static vm_object_t dev_pager_alloc(void *, vm_ooffset_t, vm_prot_t, 61 vm_ooffset_t, struct ucred *); 62 static void dev_pager_dealloc(vm_object_t); 63 static int dev_pager_getpages(vm_object_t, vm_page_t *, int, int *, int *); 64 static void dev_pager_putpages(vm_object_t, vm_page_t *, int, int, int *); 65 static boolean_t dev_pager_haspage(vm_object_t, vm_pindex_t, int *, int *); 66 static void dev_pager_free_page(vm_object_t object, vm_page_t m); 67 static int dev_pager_populate(vm_object_t object, vm_pindex_t pidx, 68 int fault_type, vm_prot_t, vm_pindex_t *first, vm_pindex_t *last); 69 70 /* list of device pager objects */ 71 static struct pagerlst dev_pager_object_list; 72 /* protect list manipulation */ 73 static struct mtx dev_pager_mtx; 74 75 const struct pagerops devicepagerops = { 76 .pgo_kvme_type = KVME_TYPE_DEVICE, 77 .pgo_init = dev_pager_init, 78 .pgo_alloc = dev_pager_alloc, 79 .pgo_dealloc = dev_pager_dealloc, 80 .pgo_getpages = dev_pager_getpages, 81 .pgo_putpages = dev_pager_putpages, 82 .pgo_haspage = dev_pager_haspage, 83 }; 84 85 const struct pagerops mgtdevicepagerops = { 86 .pgo_kvme_type = KVME_TYPE_MGTDEVICE, 87 .pgo_alloc = dev_pager_alloc, 88 .pgo_dealloc = dev_pager_dealloc, 89 .pgo_getpages = dev_pager_getpages, 90 .pgo_putpages = dev_pager_putpages, 91 .pgo_haspage = dev_pager_haspage, 92 .pgo_populate = dev_pager_populate, 93 }; 94 95 static int old_dev_pager_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot, 96 vm_ooffset_t foff, struct ucred *cred, u_short *color); 97 static void old_dev_pager_dtor(void *handle); 98 static int old_dev_pager_fault(vm_object_t object, vm_ooffset_t offset, 99 int prot, vm_page_t *mres); 100 101 static const struct cdev_pager_ops old_dev_pager_ops = { 102 .cdev_pg_ctor = old_dev_pager_ctor, 103 .cdev_pg_dtor = old_dev_pager_dtor, 104 .cdev_pg_fault = old_dev_pager_fault 105 }; 106 107 static void 108 dev_pager_init(void) 109 { 110 111 TAILQ_INIT(&dev_pager_object_list); 112 mtx_init(&dev_pager_mtx, "dev_pager list", NULL, MTX_DEF); 113 } 114 115 vm_object_t 116 cdev_pager_lookup(void *handle) 117 { 118 vm_object_t object; 119 120 again: 121 mtx_lock(&dev_pager_mtx); 122 object = vm_pager_object_lookup(&dev_pager_object_list, handle); 123 if (object != NULL && object->un_pager.devp.handle == NULL) { 124 msleep(&object->un_pager.devp.handle, &dev_pager_mtx, 125 PVM | PDROP, "cdplkp", 0); 126 vm_object_deallocate(object); 127 goto again; 128 } 129 mtx_unlock(&dev_pager_mtx); 130 return (object); 131 } 132 133 vm_object_t 134 cdev_pager_allocate(void *handle, enum obj_type tp, 135 const struct cdev_pager_ops *ops, vm_ooffset_t size, vm_prot_t prot, 136 vm_ooffset_t foff, struct ucred *cred) 137 { 138 vm_object_t object; 139 vm_pindex_t pindex; 140 141 if (tp != OBJT_DEVICE && tp != OBJT_MGTDEVICE) 142 return (NULL); 143 KASSERT(tp == OBJT_MGTDEVICE || ops->cdev_pg_populate == NULL, 144 ("populate on unmanaged device pager")); 145 146 /* 147 * Offset should be page aligned. 148 */ 149 if (foff & PAGE_MASK) 150 return (NULL); 151 152 /* 153 * Treat the mmap(2) file offset as an unsigned value for a 154 * device mapping. This, in effect, allows a user to pass all 155 * possible off_t values as the mapping cookie to the driver. At 156 * this point, we know that both foff and size are a multiple 157 * of the page size. Do a check to avoid wrap. 158 */ 159 size = round_page(size); 160 pindex = OFF_TO_IDX(foff) + OFF_TO_IDX(size); 161 if (pindex > OBJ_MAX_SIZE || pindex < OFF_TO_IDX(foff) || 162 pindex < OFF_TO_IDX(size)) 163 return (NULL); 164 165 again: 166 mtx_lock(&dev_pager_mtx); 167 168 /* 169 * Look up pager, creating as necessary. 170 */ 171 object = vm_pager_object_lookup(&dev_pager_object_list, handle); 172 if (object == NULL) { 173 vm_object_t object1; 174 175 /* 176 * Allocate object and associate it with the pager. Initialize 177 * the object's pg_color based upon the physical address of the 178 * device's memory. 179 */ 180 mtx_unlock(&dev_pager_mtx); 181 object1 = vm_object_allocate(tp, pindex); 182 mtx_lock(&dev_pager_mtx); 183 object = vm_pager_object_lookup(&dev_pager_object_list, handle); 184 if (object != NULL) { 185 object1->type = OBJT_DEAD; 186 vm_object_deallocate(object1); 187 object1 = NULL; 188 if (object->un_pager.devp.handle == NULL) { 189 msleep(&object->un_pager.devp.handle, 190 &dev_pager_mtx, PVM | PDROP, "cdplkp", 0); 191 vm_object_deallocate(object); 192 goto again; 193 } 194 195 /* 196 * We raced with other thread while allocating object. 197 */ 198 if (pindex > object->size) 199 object->size = pindex; 200 KASSERT(object->type == tp, 201 ("Inconsistent device pager type %p %d", 202 object, tp)); 203 KASSERT(object->un_pager.devp.ops == ops, 204 ("Inconsistent devops %p %p", object, ops)); 205 } else { 206 u_short color; 207 208 object = object1; 209 object1 = NULL; 210 object->handle = handle; 211 object->un_pager.devp.ops = ops; 212 TAILQ_INIT(&object->un_pager.devp.devp_pglist); 213 TAILQ_INSERT_TAIL(&dev_pager_object_list, object, 214 pager_object_list); 215 mtx_unlock(&dev_pager_mtx); 216 if (ops->cdev_pg_populate != NULL) 217 vm_object_set_flag(object, OBJ_POPULATE); 218 if (ops->cdev_pg_ctor(handle, size, prot, foff, 219 cred, &color) != 0) { 220 mtx_lock(&dev_pager_mtx); 221 TAILQ_REMOVE(&dev_pager_object_list, object, 222 pager_object_list); 223 wakeup(&object->un_pager.devp.handle); 224 mtx_unlock(&dev_pager_mtx); 225 object->type = OBJT_DEAD; 226 vm_object_deallocate(object); 227 object = NULL; 228 mtx_lock(&dev_pager_mtx); 229 } else { 230 mtx_lock(&dev_pager_mtx); 231 object->flags |= OBJ_COLORED; 232 object->pg_color = color; 233 object->un_pager.devp.handle = handle; 234 wakeup(&object->un_pager.devp.handle); 235 } 236 } 237 MPASS(object1 == NULL); 238 } else { 239 if (object->un_pager.devp.handle == NULL) { 240 msleep(&object->un_pager.devp.handle, 241 &dev_pager_mtx, PVM | PDROP, "cdplkp", 0); 242 vm_object_deallocate(object); 243 goto again; 244 } 245 if (pindex > object->size) 246 object->size = pindex; 247 KASSERT(object->type == tp, 248 ("Inconsistent device pager type %p %d", object, tp)); 249 } 250 mtx_unlock(&dev_pager_mtx); 251 return (object); 252 } 253 254 static vm_object_t 255 dev_pager_alloc(void *handle, vm_ooffset_t size, vm_prot_t prot, 256 vm_ooffset_t foff, struct ucred *cred) 257 { 258 259 return (cdev_pager_allocate(handle, OBJT_DEVICE, &old_dev_pager_ops, 260 size, prot, foff, cred)); 261 } 262 263 void 264 cdev_pager_free_page(vm_object_t object, vm_page_t m) 265 { 266 267 if (object->type == OBJT_MGTDEVICE) { 268 struct pctrie_iter pages; 269 270 vm_page_iter_init(&pages, object); 271 vm_radix_iter_lookup(&pages, m->pindex); 272 cdev_mgtdev_pager_free_page(&pages, m); 273 } else if (object->type == OBJT_DEVICE) 274 dev_pager_free_page(object, m); 275 else 276 KASSERT(false, 277 ("Invalid device type obj %p m %p", object, m)); 278 } 279 280 void 281 cdev_mgtdev_pager_free_page(struct pctrie_iter *pages, vm_page_t m) 282 { 283 pmap_remove_all(m); 284 vm_page_iter_remove(pages, m); 285 } 286 287 void 288 cdev_mgtdev_pager_free_pages(vm_object_t object) 289 { 290 struct pctrie_iter pages; 291 vm_page_t m; 292 293 vm_page_iter_init(&pages, object); 294 VM_OBJECT_WLOCK(object); 295 retry: 296 KASSERT(pctrie_iter_is_reset(&pages), 297 ("%s: pctrie_iter not reset for retry", __func__)); 298 for (m = vm_radix_iter_lookup_ge(&pages, 0); m != NULL; 299 m = vm_radix_iter_step(&pages)) { 300 if (!vm_page_busy_acquire(m, VM_ALLOC_WAITFAIL)) { 301 pctrie_iter_reset(&pages); 302 goto retry; 303 } 304 cdev_mgtdev_pager_free_page(&pages, m); 305 } 306 VM_OBJECT_WUNLOCK(object); 307 } 308 309 static void 310 dev_pager_free_page(vm_object_t object, vm_page_t m) 311 { 312 313 VM_OBJECT_ASSERT_WLOCKED(object); 314 KASSERT((object->type == OBJT_DEVICE && 315 (m->oflags & VPO_UNMANAGED) != 0), 316 ("Managed device or page obj %p m %p", object, m)); 317 TAILQ_REMOVE(&object->un_pager.devp.devp_pglist, m, plinks.q); 318 vm_page_putfake(m); 319 } 320 321 static void 322 dev_pager_dealloc(vm_object_t object) 323 { 324 vm_page_t m; 325 326 VM_OBJECT_WUNLOCK(object); 327 object->un_pager.devp.ops->cdev_pg_dtor(object->un_pager.devp.handle); 328 329 mtx_lock(&dev_pager_mtx); 330 TAILQ_REMOVE(&dev_pager_object_list, object, pager_object_list); 331 mtx_unlock(&dev_pager_mtx); 332 VM_OBJECT_WLOCK(object); 333 334 if (object->type == OBJT_DEVICE) { 335 /* 336 * Free up our fake pages. 337 */ 338 while ((m = TAILQ_FIRST(&object->un_pager.devp.devp_pglist)) 339 != NULL) { 340 if (vm_page_busy_acquire(m, VM_ALLOC_WAITFAIL) == 0) 341 continue; 342 343 dev_pager_free_page(object, m); 344 } 345 } 346 object->handle = NULL; 347 object->type = OBJT_DEAD; 348 } 349 350 static int 351 dev_pager_getpages(vm_object_t object, vm_page_t *ma, int count, int *rbehind, 352 int *rahead) 353 { 354 int error; 355 356 /* Since our haspage reports zero after/before, the count is 1. */ 357 KASSERT(count == 1, ("%s: count %d", __func__, count)); 358 if (object->un_pager.devp.ops->cdev_pg_fault == NULL) 359 return (VM_PAGER_FAIL); 360 VM_OBJECT_WLOCK(object); 361 error = object->un_pager.devp.ops->cdev_pg_fault(object, 362 IDX_TO_OFF(ma[0]->pindex), PROT_READ, &ma[0]); 363 364 VM_OBJECT_ASSERT_WLOCKED(object); 365 366 if (error == VM_PAGER_OK) { 367 KASSERT((object->type == OBJT_DEVICE && 368 (ma[0]->oflags & VPO_UNMANAGED) != 0) || 369 (object->type == OBJT_MGTDEVICE && 370 (ma[0]->oflags & VPO_UNMANAGED) == 0), 371 ("Wrong page type %p %p", ma[0], object)); 372 if (object->type == OBJT_DEVICE) { 373 TAILQ_INSERT_TAIL(&object->un_pager.devp.devp_pglist, 374 ma[0], plinks.q); 375 } 376 if (rbehind) 377 *rbehind = 0; 378 if (rahead) 379 *rahead = 0; 380 } 381 VM_OBJECT_WUNLOCK(object); 382 383 return (error); 384 } 385 386 static int 387 dev_pager_populate(vm_object_t object, vm_pindex_t pidx, int fault_type, 388 vm_prot_t max_prot, vm_pindex_t *first, vm_pindex_t *last) 389 { 390 391 VM_OBJECT_ASSERT_WLOCKED(object); 392 if (object->un_pager.devp.ops->cdev_pg_populate == NULL) 393 return (VM_PAGER_FAIL); 394 return (object->un_pager.devp.ops->cdev_pg_populate(object, pidx, 395 fault_type, max_prot, first, last)); 396 } 397 398 static int 399 old_dev_pager_fault(vm_object_t object, vm_ooffset_t offset, int prot, 400 vm_page_t *mres) 401 { 402 vm_paddr_t paddr; 403 vm_page_t m_paddr, page; 404 struct cdev *dev; 405 struct cdevsw *csw; 406 struct file *fpop; 407 struct thread *td; 408 vm_memattr_t memattr, memattr1; 409 int ref, ret; 410 411 memattr = object->memattr; 412 413 VM_OBJECT_WUNLOCK(object); 414 415 dev = object->handle; 416 csw = dev_refthread(dev, &ref); 417 if (csw == NULL) { 418 VM_OBJECT_WLOCK(object); 419 return (VM_PAGER_FAIL); 420 } 421 td = curthread; 422 fpop = td->td_fpop; 423 td->td_fpop = NULL; 424 ret = csw->d_mmap(dev, offset, &paddr, prot, &memattr); 425 td->td_fpop = fpop; 426 dev_relthread(dev, ref); 427 if (ret != 0) { 428 printf( 429 "WARNING: dev_pager_getpage: map function returns error %d", ret); 430 VM_OBJECT_WLOCK(object); 431 return (VM_PAGER_FAIL); 432 } 433 434 /* If "paddr" is a real page, perform a sanity check on "memattr". */ 435 if ((m_paddr = vm_phys_paddr_to_vm_page(paddr)) != NULL && 436 (memattr1 = pmap_page_get_memattr(m_paddr)) != memattr) { 437 /* 438 * For the /dev/mem d_mmap routine to return the 439 * correct memattr, pmap_page_get_memattr() needs to 440 * be called, which we do there. 441 */ 442 if ((csw->d_flags & D_MEM) == 0) { 443 printf("WARNING: Device driver %s has set " 444 "\"memattr\" inconsistently (drv %u pmap %u).\n", 445 csw->d_name, memattr, memattr1); 446 } 447 memattr = memattr1; 448 } 449 if (((*mres)->flags & PG_FICTITIOUS) != 0) { 450 /* 451 * If the passed in result page is a fake page, update it with 452 * the new physical address. 453 */ 454 page = *mres; 455 VM_OBJECT_WLOCK(object); 456 vm_page_updatefake(page, paddr, memattr); 457 } else { 458 /* 459 * Replace the passed in reqpage page with our own fake page and 460 * free up the all of the original pages. 461 */ 462 page = vm_page_getfake(paddr, memattr); 463 VM_OBJECT_WLOCK(object); 464 vm_page_replace(page, object, (*mres)->pindex, *mres); 465 *mres = page; 466 } 467 vm_page_valid(page); 468 return (VM_PAGER_OK); 469 } 470 471 static void 472 dev_pager_putpages(vm_object_t object, vm_page_t *m, int count, int flags, 473 int *rtvals) 474 { 475 476 panic("dev_pager_putpage called"); 477 } 478 479 static boolean_t 480 dev_pager_haspage(vm_object_t object, vm_pindex_t pindex, int *before, 481 int *after) 482 { 483 484 if (before != NULL) 485 *before = 0; 486 if (after != NULL) 487 *after = 0; 488 return (TRUE); 489 } 490 491 static int 492 old_dev_pager_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot, 493 vm_ooffset_t foff, struct ucred *cred, u_short *color) 494 { 495 struct cdev *dev; 496 struct cdevsw *csw; 497 vm_memattr_t dummy; 498 vm_ooffset_t off; 499 vm_paddr_t paddr; 500 unsigned int npages; 501 int ref; 502 503 /* 504 * Make sure this device can be mapped. 505 */ 506 dev = handle; 507 csw = dev_refthread(dev, &ref); 508 if (csw == NULL) 509 return (ENXIO); 510 511 /* 512 * Check that the specified range of the device allows the desired 513 * protection. 514 * 515 * XXX assumes VM_PROT_* == PROT_* 516 */ 517 npages = OFF_TO_IDX(size); 518 paddr = 0; /* Make paddr initialized for the case of size == 0. */ 519 for (off = foff; npages--; off += PAGE_SIZE) { 520 if (csw->d_mmap(dev, off, &paddr, (int)prot, &dummy) != 0) { 521 dev_relthread(dev, ref); 522 return (EINVAL); 523 } 524 } 525 526 dev_ref(dev); 527 dev_relthread(dev, ref); 528 *color = atop(paddr) - OFF_TO_IDX(off - PAGE_SIZE); 529 return (0); 530 } 531 532 static void 533 old_dev_pager_dtor(void *handle) 534 { 535 536 dev_rel(handle); 537 } 538