1 /*- 2 * Copyright (c) 2017, 2018 The FreeBSD Foundation 3 * All rights reserved. 4 * Copyright (c) 2018, 2019 Intel Corporation 5 * 6 * This software was developed by Konstantin Belousov <kib@FreeBSD.org> 7 * under sponsorship from the FreeBSD Foundation. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30 31 #include <sys/cdefs.h> 32 __FBSDID("$FreeBSD$"); 33 34 #include "opt_acpi.h" 35 #include "opt_ddb.h" 36 37 #include <sys/param.h> 38 #include <sys/systm.h> 39 #include <sys/bio.h> 40 #include <sys/bus.h> 41 #include <sys/conf.h> 42 #include <sys/devicestat.h> 43 #include <sys/disk.h> 44 #include <sys/efi.h> 45 #include <sys/kernel.h> 46 #include <sys/kthread.h> 47 #include <sys/limits.h> 48 #include <sys/lock.h> 49 #include <sys/malloc.h> 50 #include <sys/module.h> 51 #include <sys/rwlock.h> 52 #include <sys/sglist.h> 53 #include <sys/uio.h> 54 #include <sys/uuid.h> 55 #include <geom/geom.h> 56 #include <geom/geom_int.h> 57 #include <machine/vmparam.h> 58 #include <vm/vm.h> 59 #include <vm/vm_object.h> 60 #include <vm/vm_page.h> 61 #include <vm/vm_pager.h> 62 #include <contrib/dev/acpica/include/acpi.h> 63 #include <contrib/dev/acpica/include/accommon.h> 64 #include <contrib/dev/acpica/include/acuuid.h> 65 #include <dev/acpica/acpivar.h> 66 #include <dev/nvdimm/nvdimm_var.h> 67 68 #define UUID_INITIALIZER_VOLATILE_MEMORY \ 69 {0x7305944f,0xfdda,0x44e3,0xb1,0x6c,{0x3f,0x22,0xd2,0x52,0xe5,0xd0}} 70 #define UUID_INITIALIZER_PERSISTENT_MEMORY \ 71 {0x66f0d379,0xb4f3,0x4074,0xac,0x43,{0x0d,0x33,0x18,0xb7,0x8c,0xdb}} 72 #define UUID_INITIALIZER_CONTROL_REGION \ 73 {0x92f701f6,0x13b4,0x405d,0x91,0x0b,{0x29,0x93,0x67,0xe8,0x23,0x4c}} 74 #define UUID_INITIALIZER_DATA_REGION \ 75 {0x91af0530,0x5d86,0x470e,0xa6,0xb0,{0x0a,0x2d,0xb9,0x40,0x82,0x49}} 76 #define UUID_INITIALIZER_VOLATILE_VIRTUAL_DISK \ 77 {0x77ab535a,0x45fc,0x624b,0x55,0x60,{0xf7,0xb2,0x81,0xd1,0xf9,0x6e}} 78 #define UUID_INITIALIZER_VOLATILE_VIRTUAL_CD \ 79 {0x3d5abd30,0x4175,0x87ce,0x6d,0x64,{0xd2,0xad,0xe5,0x23,0xc4,0xbb}} 80 #define UUID_INITIALIZER_PERSISTENT_VIRTUAL_DISK \ 81 {0x5cea02c9,0x4d07,0x69d3,0x26,0x9f,{0x44,0x96,0xfb,0xe0,0x96,0xf9}} 82 #define UUID_INITIALIZER_PERSISTENT_VIRTUAL_CD \ 83 {0x08018188,0x42cd,0xbb48,0x10,0x0f,{0x53,0x87,0xd5,0x3d,0xed,0x3d}} 84 85 static struct nvdimm_SPA_uuid_list_elm { 86 const char *u_name; 87 struct uuid u_id; 88 const bool u_usr_acc; 89 } nvdimm_SPA_uuid_list[] = { 90 [SPA_TYPE_VOLATILE_MEMORY] = { 91 .u_name = "VOLA MEM ", 92 .u_id = UUID_INITIALIZER_VOLATILE_MEMORY, 93 .u_usr_acc = true, 94 }, 95 [SPA_TYPE_PERSISTENT_MEMORY] = { 96 .u_name = "PERS MEM", 97 .u_id = UUID_INITIALIZER_PERSISTENT_MEMORY, 98 .u_usr_acc = true, 99 }, 100 [SPA_TYPE_CONTROL_REGION] = { 101 .u_name = "CTRL RG ", 102 .u_id = UUID_INITIALIZER_CONTROL_REGION, 103 .u_usr_acc = false, 104 }, 105 [SPA_TYPE_DATA_REGION] = { 106 .u_name = "DATA RG ", 107 .u_id = UUID_INITIALIZER_DATA_REGION, 108 .u_usr_acc = true, 109 }, 110 [SPA_TYPE_VOLATILE_VIRTUAL_DISK] = { 111 .u_name = "VIRT DSK", 112 .u_id = UUID_INITIALIZER_VOLATILE_VIRTUAL_DISK, 113 .u_usr_acc = true, 114 }, 115 [SPA_TYPE_VOLATILE_VIRTUAL_CD] = { 116 .u_name = "VIRT CD ", 117 .u_id = UUID_INITIALIZER_VOLATILE_VIRTUAL_CD, 118 .u_usr_acc = true, 119 }, 120 [SPA_TYPE_PERSISTENT_VIRTUAL_DISK] = { 121 .u_name = "PV DSK ", 122 .u_id = UUID_INITIALIZER_PERSISTENT_VIRTUAL_DISK, 123 .u_usr_acc = true, 124 }, 125 [SPA_TYPE_PERSISTENT_VIRTUAL_CD] = { 126 .u_name = "PV CD ", 127 .u_id = UUID_INITIALIZER_PERSISTENT_VIRTUAL_CD, 128 .u_usr_acc = true, 129 }, 130 }; 131 132 enum SPA_mapping_type 133 nvdimm_spa_type_from_uuid(struct uuid *uuid) 134 { 135 int j; 136 137 for (j = 0; j < nitems(nvdimm_SPA_uuid_list); j++) { 138 if (uuidcmp(uuid, &nvdimm_SPA_uuid_list[j].u_id) != 0) 139 continue; 140 return (j); 141 } 142 return (SPA_TYPE_UNKNOWN); 143 } 144 145 static vm_memattr_t 146 nvdimm_spa_memattr(struct SPA_mapping *spa) 147 { 148 vm_memattr_t mode; 149 150 if ((spa->spa_efi_mem_flags & EFI_MD_ATTR_WB) != 0) 151 mode = VM_MEMATTR_WRITE_BACK; 152 else if ((spa->spa_efi_mem_flags & EFI_MD_ATTR_WT) != 0) 153 mode = VM_MEMATTR_WRITE_THROUGH; 154 else if ((spa->spa_efi_mem_flags & EFI_MD_ATTR_WC) != 0) 155 mode = VM_MEMATTR_WRITE_COMBINING; 156 else if ((spa->spa_efi_mem_flags & EFI_MD_ATTR_WP) != 0) 157 mode = VM_MEMATTR_WRITE_PROTECTED; 158 else if ((spa->spa_efi_mem_flags & EFI_MD_ATTR_UC) != 0) 159 mode = VM_MEMATTR_UNCACHEABLE; 160 else { 161 if (bootverbose) 162 printf("SPA%d mapping attr unsupported\n", 163 spa->spa_nfit_idx); 164 mode = VM_MEMATTR_UNCACHEABLE; 165 } 166 return (mode); 167 } 168 169 static int 170 nvdimm_spa_uio(struct SPA_mapping *spa, struct uio *uio) 171 { 172 struct vm_page m, *ma; 173 off_t off; 174 vm_memattr_t mattr; 175 int error, n; 176 177 error = 0; 178 if (spa->spa_kva == NULL) { 179 mattr = nvdimm_spa_memattr(spa); 180 vm_page_initfake(&m, 0, mattr); 181 ma = &m; 182 while (uio->uio_resid > 0) { 183 if (uio->uio_offset >= spa->spa_len) 184 break; 185 off = spa->spa_phys_base + uio->uio_offset; 186 vm_page_updatefake(&m, trunc_page(off), mattr); 187 n = PAGE_SIZE; 188 if (n > uio->uio_resid) 189 n = uio->uio_resid; 190 error = uiomove_fromphys(&ma, off & PAGE_MASK, n, uio); 191 if (error != 0) 192 break; 193 } 194 } else { 195 while (uio->uio_resid > 0) { 196 if (uio->uio_offset >= spa->spa_len) 197 break; 198 n = INT_MAX; 199 if (n > uio->uio_resid) 200 n = uio->uio_resid; 201 if (uio->uio_offset + n > spa->spa_len) 202 n = spa->spa_len - uio->uio_offset; 203 error = uiomove((char *)spa->spa_kva + uio->uio_offset, 204 n, uio); 205 if (error != 0) 206 break; 207 } 208 } 209 return (error); 210 } 211 212 static int 213 nvdimm_spa_rw(struct cdev *dev, struct uio *uio, int ioflag) 214 { 215 216 return (nvdimm_spa_uio(dev->si_drv1, uio)); 217 } 218 219 static int 220 nvdimm_spa_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag, 221 struct thread *td) 222 { 223 struct SPA_mapping *spa; 224 int error; 225 226 spa = dev->si_drv1; 227 error = 0; 228 switch (cmd) { 229 case DIOCGSECTORSIZE: 230 *(u_int *)data = DEV_BSIZE; 231 break; 232 case DIOCGMEDIASIZE: 233 *(off_t *)data = spa->spa_len; 234 break; 235 default: 236 error = ENOTTY; 237 break; 238 } 239 return (error); 240 } 241 242 static int 243 nvdimm_spa_mmap_single(struct cdev *dev, vm_ooffset_t *offset, vm_size_t size, 244 vm_object_t *objp, int nprot) 245 { 246 struct SPA_mapping *spa; 247 248 spa = dev->si_drv1; 249 if (spa->spa_obj == NULL) 250 return (ENXIO); 251 if (*offset >= spa->spa_len || *offset + size < *offset || 252 *offset + size > spa->spa_len) 253 return (EINVAL); 254 vm_object_reference(spa->spa_obj); 255 *objp = spa->spa_obj; 256 return (0); 257 } 258 259 static struct cdevsw spa_cdevsw = { 260 .d_version = D_VERSION, 261 .d_flags = D_DISK, 262 .d_name = "nvdimm_spa", 263 .d_read = nvdimm_spa_rw, 264 .d_write = nvdimm_spa_rw, 265 .d_ioctl = nvdimm_spa_ioctl, 266 .d_mmap_single = nvdimm_spa_mmap_single, 267 }; 268 269 static void 270 nvdimm_spa_g_all_unmapped(struct SPA_mapping *spa, struct bio *bp, 271 int rw) 272 { 273 struct vm_page maa[bp->bio_ma_n]; 274 vm_page_t ma[bp->bio_ma_n]; 275 vm_memattr_t mattr; 276 int i; 277 278 mattr = nvdimm_spa_memattr(spa); 279 for (i = 0; i < nitems(ma); i++) { 280 maa[i].flags = 0; 281 vm_page_initfake(&maa[i], spa->spa_phys_base + 282 trunc_page(bp->bio_offset) + PAGE_SIZE * i, mattr); 283 ma[i] = &maa[i]; 284 } 285 if (rw == BIO_READ) 286 pmap_copy_pages(ma, bp->bio_offset & PAGE_MASK, bp->bio_ma, 287 bp->bio_ma_offset, bp->bio_length); 288 else 289 pmap_copy_pages(bp->bio_ma, bp->bio_ma_offset, ma, 290 bp->bio_offset & PAGE_MASK, bp->bio_length); 291 } 292 293 static void 294 nvdimm_spa_g_thread(void *arg) 295 { 296 struct SPA_mapping *spa; 297 struct bio *bp; 298 struct uio auio; 299 struct iovec aiovec; 300 int error; 301 302 spa = arg; 303 for (;;) { 304 mtx_lock(&spa->spa_g_mtx); 305 for (;;) { 306 bp = bioq_takefirst(&spa->spa_g_queue); 307 if (bp != NULL) 308 break; 309 msleep(&spa->spa_g_queue, &spa->spa_g_mtx, PRIBIO, 310 "spa_g", 0); 311 if (!spa->spa_g_proc_run) { 312 spa->spa_g_proc_exiting = true; 313 wakeup(&spa->spa_g_queue); 314 mtx_unlock(&spa->spa_g_mtx); 315 kproc_exit(0); 316 } 317 continue; 318 } 319 mtx_unlock(&spa->spa_g_mtx); 320 if (bp->bio_cmd != BIO_READ && bp->bio_cmd != BIO_WRITE && 321 bp->bio_cmd != BIO_FLUSH) { 322 error = EOPNOTSUPP; 323 goto completed; 324 } 325 326 error = 0; 327 if (bp->bio_cmd == BIO_FLUSH) { 328 if (spa->spa_kva != NULL) { 329 pmap_large_map_wb(spa->spa_kva, spa->spa_len); 330 } else { 331 pmap_flush_cache_phys_range( 332 (vm_paddr_t)spa->spa_phys_base, 333 (vm_paddr_t)spa->spa_phys_base + 334 spa->spa_len, nvdimm_spa_memattr(spa)); 335 } 336 /* 337 * XXX flush IMC 338 */ 339 goto completed; 340 } 341 342 if ((bp->bio_flags & BIO_UNMAPPED) != 0) { 343 if (spa->spa_kva != NULL) { 344 aiovec.iov_base = (char *)spa->spa_kva + 345 bp->bio_offset; 346 aiovec.iov_len = bp->bio_length; 347 auio.uio_iov = &aiovec; 348 auio.uio_iovcnt = 1; 349 auio.uio_resid = bp->bio_length; 350 auio.uio_offset = bp->bio_offset; 351 auio.uio_segflg = UIO_SYSSPACE; 352 auio.uio_rw = bp->bio_cmd == BIO_READ ? 353 UIO_WRITE : UIO_READ; 354 auio.uio_td = curthread; 355 error = uiomove_fromphys(bp->bio_ma, 356 bp->bio_ma_offset, bp->bio_length, &auio); 357 bp->bio_resid = auio.uio_resid; 358 } else { 359 nvdimm_spa_g_all_unmapped(spa, bp, bp->bio_cmd); 360 bp->bio_resid = bp->bio_length; 361 error = 0; 362 } 363 } else { 364 aiovec.iov_base = bp->bio_data; 365 aiovec.iov_len = bp->bio_length; 366 auio.uio_iov = &aiovec; 367 auio.uio_iovcnt = 1; 368 auio.uio_resid = bp->bio_length; 369 auio.uio_offset = bp->bio_offset; 370 auio.uio_segflg = UIO_SYSSPACE; 371 auio.uio_rw = bp->bio_cmd == BIO_READ ? UIO_READ : 372 UIO_WRITE; 373 auio.uio_td = curthread; 374 error = nvdimm_spa_uio(spa, &auio); 375 bp->bio_resid = auio.uio_resid; 376 } 377 bp->bio_bcount = bp->bio_length; 378 devstat_end_transaction_bio(spa->spa_g_devstat, bp); 379 completed: 380 bp->bio_completed = bp->bio_length; 381 g_io_deliver(bp, error); 382 } 383 } 384 385 static void 386 nvdimm_spa_g_start(struct bio *bp) 387 { 388 struct SPA_mapping *spa; 389 390 spa = bp->bio_to->geom->softc; 391 if (bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE) { 392 mtx_lock(&spa->spa_g_stat_mtx); 393 devstat_start_transaction_bio(spa->spa_g_devstat, bp); 394 mtx_unlock(&spa->spa_g_stat_mtx); 395 } 396 mtx_lock(&spa->spa_g_mtx); 397 bioq_disksort(&spa->spa_g_queue, bp); 398 wakeup(&spa->spa_g_queue); 399 mtx_unlock(&spa->spa_g_mtx); 400 } 401 402 static int 403 nvdimm_spa_g_access(struct g_provider *pp, int r, int w, int e) 404 { 405 406 return (0); 407 } 408 409 struct g_class nvdimm_spa_g_class = { 410 .name = "SPA", 411 .version = G_VERSION, 412 .start = nvdimm_spa_g_start, 413 .access = nvdimm_spa_g_access, 414 }; 415 DECLARE_GEOM_CLASS(nvdimm_spa_g_class, g_spa); 416 417 int 418 nvdimm_spa_init(struct SPA_mapping *spa, ACPI_NFIT_SYSTEM_ADDRESS *nfitaddr, 419 enum SPA_mapping_type spa_type) 420 { 421 struct make_dev_args mda; 422 struct sglist *spa_sg; 423 int error, error1; 424 425 spa->spa_type = spa_type; 426 spa->spa_domain = ((nfitaddr->Flags & ACPI_NFIT_PROXIMITY_VALID) != 0) ? 427 nfitaddr->ProximityDomain : -1; 428 spa->spa_nfit_idx = nfitaddr->RangeIndex; 429 spa->spa_phys_base = nfitaddr->Address; 430 spa->spa_len = nfitaddr->Length; 431 spa->spa_efi_mem_flags = nfitaddr->MemoryMapping; 432 if (bootverbose) { 433 printf("NVDIMM SPA%d base %#016jx len %#016jx %s fl %#jx\n", 434 spa->spa_nfit_idx, 435 (uintmax_t)spa->spa_phys_base, (uintmax_t)spa->spa_len, 436 nvdimm_SPA_uuid_list[spa_type].u_name, 437 spa->spa_efi_mem_flags); 438 } 439 if (!nvdimm_SPA_uuid_list[spa_type].u_usr_acc) 440 return (0); 441 442 error1 = pmap_large_map(spa->spa_phys_base, spa->spa_len, 443 &spa->spa_kva, nvdimm_spa_memattr(spa)); 444 if (error1 != 0) { 445 printf("NVDIMM SPA%d cannot map into KVA, error %d\n", 446 spa->spa_nfit_idx, error1); 447 spa->spa_kva = NULL; 448 } 449 450 spa_sg = sglist_alloc(1, M_WAITOK); 451 error = sglist_append_phys(spa_sg, spa->spa_phys_base, 452 spa->spa_len); 453 if (error == 0) { 454 spa->spa_obj = vm_pager_allocate(OBJT_SG, spa_sg, spa->spa_len, 455 VM_PROT_ALL, 0, NULL); 456 if (spa->spa_obj == NULL) { 457 printf("NVDIMM SPA%d failed to alloc vm object", 458 spa->spa_nfit_idx); 459 sglist_free(spa_sg); 460 } 461 } else { 462 printf("NVDIMM SPA%d failed to init sglist, error %d", 463 spa->spa_nfit_idx, error); 464 sglist_free(spa_sg); 465 } 466 467 make_dev_args_init(&mda); 468 mda.mda_flags = MAKEDEV_WAITOK | MAKEDEV_CHECKNAME; 469 mda.mda_devsw = &spa_cdevsw; 470 mda.mda_cr = NULL; 471 mda.mda_uid = UID_ROOT; 472 mda.mda_gid = GID_OPERATOR; 473 mda.mda_mode = 0660; 474 mda.mda_si_drv1 = spa; 475 error = make_dev_s(&mda, &spa->spa_dev, "nvdimm_spa%d", 476 spa->spa_nfit_idx); 477 if (error != 0) { 478 printf("NVDIMM SPA%d cannot create devfs node, error %d\n", 479 spa->spa_nfit_idx, error); 480 if (error1 == 0) 481 error1 = error; 482 } 483 484 bioq_init(&spa->spa_g_queue); 485 mtx_init(&spa->spa_g_mtx, "spag", NULL, MTX_DEF); 486 mtx_init(&spa->spa_g_stat_mtx, "spagst", NULL, MTX_DEF); 487 spa->spa_g_proc_run = true; 488 spa->spa_g_proc_exiting = false; 489 error = kproc_create(nvdimm_spa_g_thread, spa, &spa->spa_g_proc, 0, 0, 490 "g_spa%d", spa->spa_nfit_idx); 491 if (error != 0) { 492 printf("NVDIMM SPA%d cannot create geom worker, error %d\n", 493 spa->spa_nfit_idx, error); 494 if (error1 == 0) 495 error1 = error; 496 } else { 497 g_topology_lock(); 498 spa->spa_g = g_new_geomf(&nvdimm_spa_g_class, "spa%d", 499 spa->spa_nfit_idx); 500 spa->spa_g->softc = spa; 501 spa->spa_p = g_new_providerf(spa->spa_g, "spa%d", 502 spa->spa_nfit_idx); 503 spa->spa_p->mediasize = spa->spa_len; 504 spa->spa_p->sectorsize = DEV_BSIZE; 505 spa->spa_p->flags |= G_PF_DIRECT_SEND | G_PF_DIRECT_RECEIVE | 506 G_PF_ACCEPT_UNMAPPED; 507 g_error_provider(spa->spa_p, 0); 508 spa->spa_g_devstat = devstat_new_entry("spa", spa->spa_nfit_idx, 509 DEV_BSIZE, DEVSTAT_ALL_SUPPORTED, DEVSTAT_TYPE_DIRECT, 510 DEVSTAT_PRIORITY_MAX); 511 g_topology_unlock(); 512 } 513 return (error1); 514 } 515 516 void 517 nvdimm_spa_fini(struct SPA_mapping *spa) 518 { 519 520 mtx_lock(&spa->spa_g_mtx); 521 spa->spa_g_proc_run = false; 522 wakeup(&spa->spa_g_queue); 523 while (!spa->spa_g_proc_exiting) 524 msleep(&spa->spa_g_queue, &spa->spa_g_mtx, PRIBIO, "spa_e", 0); 525 mtx_unlock(&spa->spa_g_mtx); 526 if (spa->spa_g != NULL) { 527 g_topology_lock(); 528 g_wither_geom(spa->spa_g, ENXIO); 529 g_topology_unlock(); 530 spa->spa_g = NULL; 531 spa->spa_p = NULL; 532 } 533 if (spa->spa_g_devstat != NULL) { 534 devstat_remove_entry(spa->spa_g_devstat); 535 spa->spa_g_devstat = NULL; 536 } 537 if (spa->spa_dev != NULL) { 538 destroy_dev(spa->spa_dev); 539 spa->spa_dev = NULL; 540 } 541 vm_object_deallocate(spa->spa_obj); 542 if (spa->spa_kva != NULL) { 543 pmap_large_unmap(spa->spa_kva, spa->spa_len); 544 spa->spa_kva = NULL; 545 } 546 mtx_destroy(&spa->spa_g_mtx); 547 mtx_destroy(&spa->spa_g_stat_mtx); 548 } 549