1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 12 /* 13 * Copyright 2018 Joyent, Inc. 14 * Copyright 2024 Hans Rosenfeld 15 * 16 * Convenience routines for identifying current or available devices that are 17 * suitable for PCI passthrough to a bhyve guest. 18 */ 19 20 #include <libdevinfo.h> 21 #include <libppt.h> 22 23 #include <sys/param.h> 24 #include <sys/stat.h> 25 #include <sys/list.h> 26 #include <strings.h> 27 #include <stddef.h> 28 #include <stdlib.h> 29 #include <stdio.h> 30 #include <errno.h> 31 #include <pcidb.h> 32 #include <glob.h> 33 34 typedef struct node_data { 35 pcidb_hdl_t *nd_db; 36 list_t nd_matches; 37 nvlist_t *nd_nvl; 38 int nd_err; 39 } node_data_t; 40 41 typedef struct ppt_match { 42 list_node_t pm_list; 43 char pm_path[MAXPATHLEN]; 44 char pm_vendor[5]; 45 char pm_device[5]; 46 } ppt_match_t; 47 48 static boolean_t 49 is_pci(di_node_t di_node) 50 { 51 char *svals; 52 53 if (di_prop_lookup_strings(DDI_DEV_T_ANY, di_parent_node(di_node), 54 "device_type", &svals) != 1) 55 return (B_FALSE); 56 57 return (strcmp(svals, "pci") == 0 || strcmp(svals, "pciex") == 0); 58 } 59 60 static int 61 populate_int_prop(di_node_t di_node, nvlist_t *nvl, const char *name, int *ival) 62 { 63 char val[20]; 64 int *ivals; 65 int err; 66 67 if (di_prop_lookup_ints(DDI_DEV_T_ANY, di_node, name, &ivals) != 1) 68 return (errno); 69 70 (void) snprintf(val, sizeof (val), "%x", ivals[0]); 71 72 err = nvlist_add_string(nvl, name, val); 73 74 if (err == 0 && ival != NULL) 75 *ival = ivals[0]; 76 77 return (err); 78 } 79 80 static int 81 dev_getlabel(pcidb_hdl_t *db, int vid, int did, char *buf, size_t buflen) 82 { 83 pcidb_vendor_t *vend = NULL; 84 pcidb_device_t *dev = NULL; 85 86 if ((vend = pcidb_lookup_vendor(db, vid)) == NULL) 87 return (ENOENT); 88 89 if ((dev = pcidb_lookup_device_by_vendor(vend, did)) == NULL) 90 return (ENOENT); 91 92 (void) snprintf(buf, buflen, "%s %s", pcidb_vendor_name(vend), 93 pcidb_device_name(dev)); 94 95 return (0); 96 } 97 98 static nvlist_t * 99 dev_getinfo(di_node_t di_node, pcidb_hdl_t *db, 100 const char *dev, const char *path) 101 { 102 char label[MAXPATHLEN]; 103 nvlist_t *nvl = NULL; 104 int vid, did; 105 int err; 106 107 if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0)) != 0) 108 goto out; 109 110 if (dev != NULL && (err = nvlist_add_string(nvl, "dev", dev)) != 0) 111 goto out; 112 if ((err = nvlist_add_string(nvl, "path", path)) != 0) 113 goto out; 114 if ((err = populate_int_prop(di_node, nvl, "vendor-id", &vid)) != 0) 115 goto out; 116 if ((err = populate_int_prop(di_node, nvl, "device-id", &did)) != 0) 117 goto out; 118 if ((err = populate_int_prop(di_node, nvl, "revision-id", NULL)) != 0) 119 goto out; 120 121 /* 122 * Not all PCI(e) devices have a subsystem id and subsystem vendor id, 123 * in which case these properties don't exist. 124 */ 125 (void) populate_int_prop(di_node, nvl, "subsystem-vendor-id", NULL); 126 (void) populate_int_prop(di_node, nvl, "subsystem-id", NULL); 127 128 err = dev_getlabel(db, vid, did, label, sizeof (label)); 129 130 if (err == 0) { 131 err = nvlist_add_string(nvl, "label", label); 132 } else if (err == ENOENT) { 133 err = 0; 134 } 135 136 out: 137 if (err) { 138 nvlist_free(nvl); 139 errno = err; 140 return (NULL); 141 } 142 143 return (nvl); 144 } 145 146 /* 147 * /devices/pci0@0/....@0,1:ppt -> /pci0@0/...@0,1 148 */ 149 static const char * 150 fs_to_phys_path(char *fspath) 151 { 152 const char prefix[] = "/devices"; 153 char *c; 154 155 if ((c = strrchr(fspath, ':')) != NULL && strcmp(c, ":ppt") == 0) 156 *c = '\0'; 157 158 c = fspath; 159 160 if (strncmp(c, prefix, sizeof (prefix) - 1) == 0) 161 c += sizeof (prefix) - 1; 162 163 return (c); 164 } 165 166 /* 167 * Return an nvlist representing the mappings of /dev/ppt* devices to physical 168 * devices. Of the form: 169 * 170 * /pci@0,0/... { 171 * dev: "/dev/ppt0" 172 * path: "/pci@0,0/..." 173 * vendor-id: "8086" 174 * device-id: "1528" 175 * subsystem-vendor-id: "8086" 176 * subsystem-id: "1528" 177 * revision-id: "1" 178 * label: "Intel Corporation ..." 179 * }, 180 * /pci@0,0/... 181 * 182 * The nvlist should be freed by the caller. 183 */ 184 nvlist_t * 185 ppt_list_assigned(void) 186 { 187 di_node_t di_root = DI_NODE_NIL; 188 pcidb_hdl_t *db = NULL; 189 nvlist_t *nvl = NULL; 190 glob_t gl; 191 int err; 192 193 bzero(&gl, sizeof (gl)); 194 195 if ((di_root = di_init("/", DINFOCACHE)) == DI_NODE_NIL) 196 return (NULL); 197 198 if ((db = pcidb_open(PCIDB_VERSION)) == NULL) { 199 err = errno; 200 goto out; 201 } 202 203 if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0)) != 0) 204 goto out; 205 206 if ((err = glob("/dev/ppt*", GLOB_KEEPSTAT | GLOB_ERR, 207 NULL, &gl)) != 0) { 208 err = (err == GLOB_NOMATCH) ? 0 : errno; 209 goto out; 210 } 211 212 for (size_t i = 0; i < gl.gl_pathc; i++) { 213 char fspath[MAXPATHLEN]; 214 nvlist_t *info_nvl; 215 di_node_t di_node; 216 const char *path; 217 218 if (!S_ISLNK(gl.gl_statv[i]->st_mode)) 219 continue; 220 221 if (realpath(gl.gl_pathv[i], fspath) == NULL) { 222 err = errno; 223 goto out; 224 } 225 226 path = fs_to_phys_path(fspath); 227 228 /* 229 * path argument is treated as const. 230 */ 231 if ((di_node = di_lookup_node(di_root, (char *)path)) == NULL) { 232 err = errno; 233 goto out; 234 } 235 236 if (!is_pci(di_node)) 237 continue; 238 239 info_nvl = dev_getinfo(di_node, db, gl.gl_pathv[i], path); 240 241 if (info_nvl == NULL) { 242 err = errno; 243 goto out; 244 } 245 246 err = nvlist_add_nvlist(nvl, path, info_nvl); 247 nvlist_free(info_nvl); 248 249 if (err) 250 goto out; 251 } 252 253 out: 254 if (di_root != DI_NODE_NIL) 255 di_fini(di_root); 256 257 pcidb_close(db); 258 globfree(&gl); 259 260 if (err) { 261 nvlist_free(nvl); 262 errno = err; 263 return (NULL); 264 } 265 266 return (nvl); 267 } 268 269 /* 270 * Read in our list of potential PPT devices. A boot-module provided file 271 * explicitly over-rides anything delivered. 272 */ 273 static int 274 get_matches(list_t *listp) 275 { 276 int err = 0; 277 FILE *fp; 278 279 list_create(listp, sizeof (ppt_match_t), 280 offsetof(ppt_match_t, pm_list)); 281 282 if ((fp = fopen("/system/boot/etc/ppt_matches", "r")) == NULL) { 283 if (errno != ENOENT) 284 return (errno); 285 286 if ((fp = fopen("/etc/ppt_matches", "r")) == NULL) { 287 if (errno == ENOENT) 288 return (0); 289 return (errno); 290 } 291 } 292 293 for (;;) { 294 char *line = NULL; 295 ppt_match_t *pm; 296 size_t cap = 0; 297 ssize_t read; 298 299 if ((read = getline(&line, &cap, fp)) <= 0) { 300 free(line); 301 break; 302 } 303 304 if (line[read - 1] == '\n') 305 line[read - 1] = '\0'; 306 307 if ((pm = malloc(sizeof (*pm))) == NULL) { 308 err = errno; 309 free(line); 310 goto out; 311 } 312 313 bzero(pm, sizeof (*pm)); 314 315 if (sscanf(line, "pciex%4s,%4s", &pm->pm_vendor, 316 &pm->pm_device) == 2 || 317 sscanf(line, "pci%4s,%4s", &pm->pm_vendor, 318 &pm->pm_device) == 2 || 319 sscanf(line, "pciex%4s", &pm->pm_vendor) == 1 || 320 sscanf(line, "pci%4s", &pm->pm_vendor) == 1) { 321 list_insert_tail(listp, pm); 322 } else if (line[0] == '/') { 323 (void) strlcpy(pm->pm_path, line, sizeof (pm->pm_path)); 324 list_insert_tail(listp, pm); 325 } else { 326 /* 327 * Ignore any line we don't understand. 328 */ 329 free(pm); 330 } 331 332 free(line); 333 } 334 335 out: 336 (void) fclose(fp); 337 return (err); 338 } 339 340 static boolean_t 341 match_ppt(list_t *matches, nvlist_t *nvl) 342 { 343 char *vendor; 344 char *device; 345 char *path; 346 347 if (nvlist_lookup_string(nvl, "path", &path) != 0 || 348 nvlist_lookup_string(nvl, "vendor-id", &vendor) != 0 || 349 nvlist_lookup_string(nvl, "device-id", &device) != 0) 350 return (B_FALSE); 351 352 for (ppt_match_t *pm = list_head(matches); pm != NULL; 353 pm = list_next(matches, pm)) { 354 if (pm->pm_path[0] != '\0' && strcmp(pm->pm_path, path) == 0) 355 return (B_TRUE); 356 357 if (pm->pm_vendor[0] != '\0' && 358 strcmp(pm->pm_vendor, vendor) == 0) { 359 if (pm->pm_device[0] == '\0') 360 return (B_TRUE); 361 if (strcmp(pm->pm_device, device) == 0) 362 return (B_TRUE); 363 } 364 } 365 366 return (B_FALSE); 367 } 368 369 static int 370 inspect_node(di_node_t di_node, void *arg) 371 { 372 node_data_t *data = arg; 373 nvlist_t *info_nvl = NULL; 374 char *devname = NULL; 375 const char *driver; 376 char *path = NULL; 377 378 if (!is_pci(di_node)) 379 return (DI_WALK_CONTINUE); 380 381 driver = di_driver_name(di_node); 382 383 if (driver != NULL && strcmp(driver, "ppt") == 0) { 384 if (asprintf(&devname, "/dev/ppt%d", 385 di_instance(di_node)) < 0) { 386 data->nd_err = errno; 387 goto out; 388 } 389 } 390 391 if ((path = di_devfs_path(di_node)) == NULL) { 392 data->nd_err = ENOENT; 393 goto out; 394 } 395 396 info_nvl = dev_getinfo(di_node, data->nd_db, devname, path); 397 398 if (info_nvl == NULL) 399 goto out; 400 401 if (devname == NULL && !match_ppt(&data->nd_matches, info_nvl)) 402 goto out; 403 404 data->nd_err = nvlist_add_nvlist(data->nd_nvl, path, info_nvl); 405 406 out: 407 free(path); 408 free(devname); 409 nvlist_free(info_nvl); 410 return (data->nd_err ? DI_WALK_TERMINATE : DI_WALK_CONTINUE); 411 } 412 413 /* 414 * Like ppt_list_assigned() output, but includes all devices that could be used 415 * for passthrough, whether assigned or not. 416 */ 417 nvlist_t * 418 ppt_list(void) 419 { 420 node_data_t nd = { NULL, }; 421 di_node_t di_root; 422 int err; 423 424 if ((di_root = di_init("/", DINFOCACHE)) == DI_NODE_NIL) 425 return (NULL); 426 427 if ((err = get_matches(&nd.nd_matches)) != 0) 428 goto out; 429 430 if ((nd.nd_db = pcidb_open(PCIDB_VERSION)) == NULL) { 431 err = errno; 432 goto out; 433 } 434 435 if ((err = nvlist_alloc(&nd.nd_nvl, NV_UNIQUE_NAME, 0)) != 0) 436 goto out; 437 438 if ((err = di_walk_node(di_root, DI_WALK_CLDFIRST, 439 &nd, inspect_node)) != 0) 440 goto out; 441 442 err = nd.nd_err; 443 444 out: 445 pcidb_close(nd.nd_db); 446 447 for (ppt_match_t *pm = list_head(&nd.nd_matches); pm != NULL; ) { 448 ppt_match_t *next = list_next(&nd.nd_matches, pm); 449 free(pm); 450 pm = next; 451 } 452 453 if (di_root != DI_NODE_NIL) 454 di_fini(di_root); 455 456 if (err) { 457 nvlist_free(nd.nd_nvl); 458 errno = err; 459 return (NULL); 460 } 461 462 return (nd.nd_nvl); 463 } 464 465 /* 466 * Given a physical path such as "/devices/pci0@0...", return the "/dev/pptX" 467 * that is bound to it, if any. The "/devices/" prefix is optional. The 468 * physical path may have the ":ppt" minor name suffix. 469 * 470 * Returns ENOENT if no such PPT device exists. 471 */ 472 int 473 ppt_devpath_to_dev(const char *inpath, char *buf, size_t buflen) 474 { 475 char fspath[MAXPATHLEN] = ""; 476 nvpair_t *nvp = NULL; 477 const char *devpath; 478 int err = ENOENT; 479 nvlist_t *nvl; 480 481 if (strlcat(fspath, inpath, sizeof (fspath)) >= sizeof (fspath)) 482 return (ENAMETOOLONG); 483 484 devpath = fs_to_phys_path(fspath); 485 486 if ((nvl = ppt_list_assigned()) == NULL) 487 return (errno); 488 489 while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) { 490 const char *name = nvpair_name(nvp); 491 char *ppt = NULL; 492 nvlist_t *props; 493 494 (void) nvpair_value_nvlist(nvp, &props); 495 496 if (strcmp(name, devpath) == 0) { 497 (void) nvlist_lookup_string(props, "dev", &ppt); 498 499 err = 0; 500 501 if (strlcpy(buf, ppt, buflen) >= buflen) 502 err = ENAMETOOLONG; 503 break; 504 } 505 } 506 507 nvlist_free(nvl); 508 return (err); 509 } 510