1 /*
2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
5 * 1.0 of the CDDL.
6 *
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
10 */
11
12 /*
13 * Copyright 2018 Joyent, Inc.
14 * Copyright 2024 Hans Rosenfeld
15 *
16 * Convenience routines for identifying current or available devices that are
17 * suitable for PCI passthrough to a bhyve guest.
18 */
19
20 #include <libdevinfo.h>
21 #include <libppt.h>
22
23 #include <sys/param.h>
24 #include <sys/stat.h>
25 #include <sys/list.h>
26 #include <strings.h>
27 #include <stddef.h>
28 #include <stdlib.h>
29 #include <stdio.h>
30 #include <errno.h>
31 #include <pcidb.h>
32 #include <glob.h>
33
34 typedef struct node_data {
35 pcidb_hdl_t *nd_db;
36 list_t nd_matches;
37 nvlist_t *nd_nvl;
38 int nd_err;
39 } node_data_t;
40
41 typedef struct ppt_match {
42 list_node_t pm_list;
43 char pm_path[MAXPATHLEN];
44 char pm_vendor[5];
45 char pm_device[5];
46 } ppt_match_t;
47
48 static boolean_t
is_pci(di_node_t di_node)49 is_pci(di_node_t di_node)
50 {
51 char *svals;
52
53 if (di_prop_lookup_strings(DDI_DEV_T_ANY, di_parent_node(di_node),
54 "device_type", &svals) != 1)
55 return (B_FALSE);
56
57 return (strcmp(svals, "pci") == 0 || strcmp(svals, "pciex") == 0);
58 }
59
60 static int
populate_int_prop(di_node_t di_node,nvlist_t * nvl,const char * name,int * ival)61 populate_int_prop(di_node_t di_node, nvlist_t *nvl, const char *name, int *ival)
62 {
63 char val[20];
64 int *ivals;
65 int err;
66
67 if (di_prop_lookup_ints(DDI_DEV_T_ANY, di_node, name, &ivals) != 1)
68 return (errno);
69
70 (void) snprintf(val, sizeof (val), "%x", ivals[0]);
71
72 err = nvlist_add_string(nvl, name, val);
73
74 if (err == 0 && ival != NULL)
75 *ival = ivals[0];
76
77 return (err);
78 }
79
80 static int
dev_getlabel(pcidb_hdl_t * db,int vid,int did,char * buf,size_t buflen)81 dev_getlabel(pcidb_hdl_t *db, int vid, int did, char *buf, size_t buflen)
82 {
83 pcidb_vendor_t *vend = NULL;
84 pcidb_device_t *dev = NULL;
85
86 if ((vend = pcidb_lookup_vendor(db, vid)) == NULL)
87 return (ENOENT);
88
89 if ((dev = pcidb_lookup_device_by_vendor(vend, did)) == NULL)
90 return (ENOENT);
91
92 (void) snprintf(buf, buflen, "%s %s", pcidb_vendor_name(vend),
93 pcidb_device_name(dev));
94
95 return (0);
96 }
97
98 static nvlist_t *
dev_getinfo(di_node_t di_node,pcidb_hdl_t * db,const char * dev,const char * path)99 dev_getinfo(di_node_t di_node, pcidb_hdl_t *db,
100 const char *dev, const char *path)
101 {
102 char label[MAXPATHLEN];
103 nvlist_t *nvl = NULL;
104 int vid, did;
105 int err;
106
107 if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0)) != 0)
108 goto out;
109
110 if (dev != NULL && (err = nvlist_add_string(nvl, "dev", dev)) != 0)
111 goto out;
112 if ((err = nvlist_add_string(nvl, "path", path)) != 0)
113 goto out;
114 if ((err = populate_int_prop(di_node, nvl, "vendor-id", &vid)) != 0)
115 goto out;
116 if ((err = populate_int_prop(di_node, nvl, "device-id", &did)) != 0)
117 goto out;
118 if ((err = populate_int_prop(di_node, nvl, "revision-id", NULL)) != 0)
119 goto out;
120
121 /*
122 * Not all PCI(e) devices have a subsystem id and subsystem vendor id,
123 * in which case these properties don't exist.
124 */
125 (void) populate_int_prop(di_node, nvl, "subsystem-vendor-id", NULL);
126 (void) populate_int_prop(di_node, nvl, "subsystem-id", NULL);
127
128 err = dev_getlabel(db, vid, did, label, sizeof (label));
129
130 if (err == 0) {
131 err = nvlist_add_string(nvl, "label", label);
132 } else if (err == ENOENT) {
133 err = 0;
134 }
135
136 out:
137 if (err) {
138 nvlist_free(nvl);
139 errno = err;
140 return (NULL);
141 }
142
143 return (nvl);
144 }
145
146 /*
147 * /devices/pci0@0/....@0,1:ppt -> /pci0@0/...@0,1
148 */
149 static const char *
fs_to_phys_path(char * fspath)150 fs_to_phys_path(char *fspath)
151 {
152 const char prefix[] = "/devices";
153 char *c;
154
155 if ((c = strrchr(fspath, ':')) != NULL && strcmp(c, ":ppt") == 0)
156 *c = '\0';
157
158 c = fspath;
159
160 if (strncmp(c, prefix, sizeof (prefix) - 1) == 0)
161 c += sizeof (prefix) - 1;
162
163 return (c);
164 }
165
166 /*
167 * Return an nvlist representing the mappings of /dev/ppt* devices to physical
168 * devices. Of the form:
169 *
170 * /pci@0,0/... {
171 * dev: "/dev/ppt0"
172 * path: "/pci@0,0/..."
173 * vendor-id: "8086"
174 * device-id: "1528"
175 * subsystem-vendor-id: "8086"
176 * subsystem-id: "1528"
177 * revision-id: "1"
178 * label: "Intel Corporation ..."
179 * },
180 * /pci@0,0/...
181 *
182 * The nvlist should be freed by the caller.
183 */
184 nvlist_t *
ppt_list_assigned(void)185 ppt_list_assigned(void)
186 {
187 di_node_t di_root = DI_NODE_NIL;
188 pcidb_hdl_t *db = NULL;
189 nvlist_t *nvl = NULL;
190 glob_t gl;
191 int err;
192
193 bzero(&gl, sizeof (gl));
194
195 if ((di_root = di_init("/", DINFOCACHE)) == DI_NODE_NIL)
196 return (NULL);
197
198 if ((db = pcidb_open(PCIDB_VERSION)) == NULL) {
199 err = errno;
200 goto out;
201 }
202
203 if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0)) != 0)
204 goto out;
205
206 if ((err = glob("/dev/ppt*", GLOB_KEEPSTAT | GLOB_ERR,
207 NULL, &gl)) != 0) {
208 err = (err == GLOB_NOMATCH) ? 0 : errno;
209 goto out;
210 }
211
212 for (size_t i = 0; i < gl.gl_pathc; i++) {
213 char fspath[MAXPATHLEN];
214 nvlist_t *info_nvl;
215 di_node_t di_node;
216 const char *path;
217
218 if (!S_ISLNK(gl.gl_statv[i]->st_mode))
219 continue;
220
221 if (realpath(gl.gl_pathv[i], fspath) == NULL) {
222 err = errno;
223 goto out;
224 }
225
226 path = fs_to_phys_path(fspath);
227
228 /*
229 * path argument is treated as const.
230 */
231 if ((di_node = di_lookup_node(di_root, (char *)path)) == NULL) {
232 err = errno;
233 goto out;
234 }
235
236 if (!is_pci(di_node))
237 continue;
238
239 info_nvl = dev_getinfo(di_node, db, gl.gl_pathv[i], path);
240
241 if (info_nvl == NULL) {
242 err = errno;
243 goto out;
244 }
245
246 err = nvlist_add_nvlist(nvl, path, info_nvl);
247 nvlist_free(info_nvl);
248
249 if (err)
250 goto out;
251 }
252
253 out:
254 if (di_root != DI_NODE_NIL)
255 di_fini(di_root);
256
257 pcidb_close(db);
258 globfree(&gl);
259
260 if (err) {
261 nvlist_free(nvl);
262 errno = err;
263 return (NULL);
264 }
265
266 return (nvl);
267 }
268
269 /*
270 * Read in our list of potential PPT devices. A boot-module provided file
271 * explicitly over-rides anything delivered.
272 */
273 static int
get_matches(list_t * listp)274 get_matches(list_t *listp)
275 {
276 int err = 0;
277 FILE *fp;
278
279 list_create(listp, sizeof (ppt_match_t),
280 offsetof(ppt_match_t, pm_list));
281
282 if ((fp = fopen("/system/boot/etc/ppt_matches", "r")) == NULL) {
283 if (errno != ENOENT)
284 return (errno);
285
286 if ((fp = fopen("/etc/ppt_matches", "r")) == NULL) {
287 if (errno == ENOENT)
288 return (0);
289 return (errno);
290 }
291 }
292
293 for (;;) {
294 char *line = NULL;
295 ppt_match_t *pm;
296 size_t cap = 0;
297 ssize_t read;
298
299 if ((read = getline(&line, &cap, fp)) <= 0) {
300 free(line);
301 break;
302 }
303
304 if (line[read - 1] == '\n')
305 line[read - 1] = '\0';
306
307 if ((pm = malloc(sizeof (*pm))) == NULL) {
308 err = errno;
309 free(line);
310 goto out;
311 }
312
313 bzero(pm, sizeof (*pm));
314
315 if (sscanf(line, "pciex%4s,%4s", &pm->pm_vendor,
316 &pm->pm_device) == 2 ||
317 sscanf(line, "pci%4s,%4s", &pm->pm_vendor,
318 &pm->pm_device) == 2 ||
319 sscanf(line, "pciex%4s", &pm->pm_vendor) == 1 ||
320 sscanf(line, "pci%4s", &pm->pm_vendor) == 1) {
321 list_insert_tail(listp, pm);
322 } else if (line[0] == '/') {
323 (void) strlcpy(pm->pm_path, line, sizeof (pm->pm_path));
324 list_insert_tail(listp, pm);
325 } else {
326 /*
327 * Ignore any line we don't understand.
328 */
329 free(pm);
330 }
331
332 free(line);
333 }
334
335 out:
336 (void) fclose(fp);
337 return (err);
338 }
339
340 static boolean_t
match_ppt(list_t * matches,nvlist_t * nvl)341 match_ppt(list_t *matches, nvlist_t *nvl)
342 {
343 char *vendor;
344 char *device;
345 char *path;
346
347 if (nvlist_lookup_string(nvl, "path", &path) != 0 ||
348 nvlist_lookup_string(nvl, "vendor-id", &vendor) != 0 ||
349 nvlist_lookup_string(nvl, "device-id", &device) != 0)
350 return (B_FALSE);
351
352 for (ppt_match_t *pm = list_head(matches); pm != NULL;
353 pm = list_next(matches, pm)) {
354 if (pm->pm_path[0] != '\0' && strcmp(pm->pm_path, path) == 0)
355 return (B_TRUE);
356
357 if (pm->pm_vendor[0] != '\0' &&
358 strcmp(pm->pm_vendor, vendor) == 0) {
359 if (pm->pm_device[0] == '\0')
360 return (B_TRUE);
361 if (strcmp(pm->pm_device, device) == 0)
362 return (B_TRUE);
363 }
364 }
365
366 return (B_FALSE);
367 }
368
369 static int
inspect_node(di_node_t di_node,void * arg)370 inspect_node(di_node_t di_node, void *arg)
371 {
372 node_data_t *data = arg;
373 nvlist_t *info_nvl = NULL;
374 char *devname = NULL;
375 const char *driver;
376 char *path = NULL;
377
378 if (!is_pci(di_node))
379 return (DI_WALK_CONTINUE);
380
381 driver = di_driver_name(di_node);
382
383 if (driver != NULL && strcmp(driver, "ppt") == 0) {
384 if (asprintf(&devname, "/dev/ppt%d",
385 di_instance(di_node)) < 0) {
386 data->nd_err = errno;
387 goto out;
388 }
389 }
390
391 if ((path = di_devfs_path(di_node)) == NULL) {
392 data->nd_err = ENOENT;
393 goto out;
394 }
395
396 info_nvl = dev_getinfo(di_node, data->nd_db, devname, path);
397
398 if (info_nvl == NULL)
399 goto out;
400
401 if (devname == NULL && !match_ppt(&data->nd_matches, info_nvl))
402 goto out;
403
404 data->nd_err = nvlist_add_nvlist(data->nd_nvl, path, info_nvl);
405
406 out:
407 free(path);
408 free(devname);
409 nvlist_free(info_nvl);
410 return (data->nd_err ? DI_WALK_TERMINATE : DI_WALK_CONTINUE);
411 }
412
413 /*
414 * Like ppt_list_assigned() output, but includes all devices that could be used
415 * for passthrough, whether assigned or not.
416 */
417 nvlist_t *
ppt_list(void)418 ppt_list(void)
419 {
420 node_data_t nd = { NULL, };
421 di_node_t di_root;
422 int err;
423
424 if ((di_root = di_init("/", DINFOCACHE)) == DI_NODE_NIL)
425 return (NULL);
426
427 if ((err = get_matches(&nd.nd_matches)) != 0)
428 goto out;
429
430 if ((nd.nd_db = pcidb_open(PCIDB_VERSION)) == NULL) {
431 err = errno;
432 goto out;
433 }
434
435 if ((err = nvlist_alloc(&nd.nd_nvl, NV_UNIQUE_NAME, 0)) != 0)
436 goto out;
437
438 if ((err = di_walk_node(di_root, DI_WALK_CLDFIRST,
439 &nd, inspect_node)) != 0)
440 goto out;
441
442 err = nd.nd_err;
443
444 out:
445 pcidb_close(nd.nd_db);
446
447 for (ppt_match_t *pm = list_head(&nd.nd_matches); pm != NULL; ) {
448 ppt_match_t *next = list_next(&nd.nd_matches, pm);
449 free(pm);
450 pm = next;
451 }
452
453 if (di_root != DI_NODE_NIL)
454 di_fini(di_root);
455
456 if (err) {
457 nvlist_free(nd.nd_nvl);
458 errno = err;
459 return (NULL);
460 }
461
462 return (nd.nd_nvl);
463 }
464
465 /*
466 * Given a physical path such as "/devices/pci0@0...", return the "/dev/pptX"
467 * that is bound to it, if any. The "/devices/" prefix is optional. The
468 * physical path may have the ":ppt" minor name suffix.
469 *
470 * Returns ENOENT if no such PPT device exists.
471 */
472 int
ppt_devpath_to_dev(const char * inpath,char * buf,size_t buflen)473 ppt_devpath_to_dev(const char *inpath, char *buf, size_t buflen)
474 {
475 char fspath[MAXPATHLEN] = "";
476 nvpair_t *nvp = NULL;
477 const char *devpath;
478 int err = ENOENT;
479 nvlist_t *nvl;
480
481 if (strlcat(fspath, inpath, sizeof (fspath)) >= sizeof (fspath))
482 return (ENAMETOOLONG);
483
484 devpath = fs_to_phys_path(fspath);
485
486 if ((nvl = ppt_list_assigned()) == NULL)
487 return (errno);
488
489 while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) {
490 const char *name = nvpair_name(nvp);
491 char *ppt = NULL;
492 nvlist_t *props;
493
494 (void) nvpair_value_nvlist(nvp, &props);
495
496 if (strcmp(name, devpath) == 0) {
497 (void) nvlist_lookup_string(props, "dev", &ppt);
498
499 err = 0;
500
501 if (strlcpy(buf, ppt, buflen) >= buflen)
502 err = ENAMETOOLONG;
503 break;
504 }
505 }
506
507 nvlist_free(nvl);
508 return (err);
509 }
510