1 /*
2  * This file and its contents are supplied under the terms of the
3  * Common Development and Distribution License ("CDDL"), version 1.0.
4  * You may only use this file in accordance with the terms of version
5  * 1.0 of the CDDL.
6  *
7  * A full copy of the text of the CDDL should have accompanied this
8  * source.  A copy of the CDDL is also available via the Internet at
9  * http://www.illumos.org/license/CDDL.
10  */
11 
12 /*
13  * Copyright 2018 Joyent, Inc.
14  * Copyright 2024 Hans Rosenfeld
15  *
16  * Convenience routines for identifying current or available devices that are
17  * suitable for PCI passthrough to a bhyve guest.
18  */
19 
20 #include <libdevinfo.h>
21 #include <libppt.h>
22 
23 #include <sys/param.h>
24 #include <sys/stat.h>
25 #include <sys/list.h>
26 #include <strings.h>
27 #include <stddef.h>
28 #include <stdlib.h>
29 #include <stdio.h>
30 #include <errno.h>
31 #include <pcidb.h>
32 #include <glob.h>
33 
34 typedef struct node_data {
35 	pcidb_hdl_t *nd_db;
36 	list_t nd_matches;
37 	nvlist_t *nd_nvl;
38 	int nd_err;
39 } node_data_t;
40 
41 typedef struct ppt_match {
42 	list_node_t pm_list;
43 	char pm_path[MAXPATHLEN];
44 	char pm_vendor[5];
45 	char pm_device[5];
46 } ppt_match_t;
47 
48 static boolean_t
is_pci(di_node_t di_node)49 is_pci(di_node_t di_node)
50 {
51 	char *svals;
52 
53 	if (di_prop_lookup_strings(DDI_DEV_T_ANY, di_parent_node(di_node),
54 	    "device_type", &svals) != 1)
55 		return (B_FALSE);
56 
57 	return (strcmp(svals, "pci") == 0 || strcmp(svals, "pciex") == 0);
58 }
59 
60 static int
populate_int_prop(di_node_t di_node,nvlist_t * nvl,const char * name,int * ival)61 populate_int_prop(di_node_t di_node, nvlist_t *nvl, const char *name, int *ival)
62 {
63 	char val[20];
64 	int *ivals;
65 	int err;
66 
67 	if (di_prop_lookup_ints(DDI_DEV_T_ANY, di_node, name, &ivals) != 1)
68 		return (errno);
69 
70 	(void) snprintf(val, sizeof (val), "%x", ivals[0]);
71 
72 	err = nvlist_add_string(nvl, name, val);
73 
74 	if (err == 0 && ival != NULL)
75 		*ival = ivals[0];
76 
77 	return (err);
78 }
79 
80 static int
dev_getlabel(pcidb_hdl_t * db,int vid,int did,char * buf,size_t buflen)81 dev_getlabel(pcidb_hdl_t *db, int vid, int did, char *buf, size_t buflen)
82 {
83 	pcidb_vendor_t *vend = NULL;
84 	pcidb_device_t *dev = NULL;
85 
86 	if ((vend = pcidb_lookup_vendor(db, vid)) == NULL)
87 		return (ENOENT);
88 
89 	if ((dev = pcidb_lookup_device_by_vendor(vend, did)) == NULL)
90 		return (ENOENT);
91 
92 	(void) snprintf(buf, buflen, "%s %s", pcidb_vendor_name(vend),
93 	    pcidb_device_name(dev));
94 
95 	return (0);
96 }
97 
98 static nvlist_t *
dev_getinfo(di_node_t di_node,pcidb_hdl_t * db,const char * dev,const char * path)99 dev_getinfo(di_node_t di_node, pcidb_hdl_t *db,
100     const char *dev, const char *path)
101 {
102 	char label[MAXPATHLEN];
103 	nvlist_t *nvl = NULL;
104 	int vid, did;
105 	int err;
106 
107 	if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0)) != 0)
108 		goto out;
109 
110 	if (dev != NULL && (err = nvlist_add_string(nvl, "dev", dev)) != 0)
111 		goto out;
112 	if ((err = nvlist_add_string(nvl, "path", path)) != 0)
113 		goto out;
114 	if ((err = populate_int_prop(di_node, nvl, "vendor-id", &vid)) != 0)
115 		goto out;
116 	if ((err = populate_int_prop(di_node, nvl, "device-id", &did)) != 0)
117 		goto out;
118 	if ((err = populate_int_prop(di_node, nvl, "revision-id", NULL)) != 0)
119 		goto out;
120 
121 	/*
122 	 * Not all PCI(e) devices have a subsystem id and subsystem vendor id,
123 	 * in which case these properties don't exist.
124 	 */
125 	(void) populate_int_prop(di_node, nvl, "subsystem-vendor-id", NULL);
126 	(void) populate_int_prop(di_node, nvl, "subsystem-id", NULL);
127 
128 	err = dev_getlabel(db, vid, did, label, sizeof (label));
129 
130 	if (err == 0) {
131 		err = nvlist_add_string(nvl, "label", label);
132 	} else if (err == ENOENT) {
133 		err = 0;
134 	}
135 
136 out:
137 	if (err) {
138 		nvlist_free(nvl);
139 		errno = err;
140 		return (NULL);
141 	}
142 
143 	return (nvl);
144 }
145 
146 /*
147  * /devices/pci0@0/....@0,1:ppt -> /pci0@0/...@0,1
148  */
149 static const char *
fs_to_phys_path(char * fspath)150 fs_to_phys_path(char *fspath)
151 {
152 	const char prefix[] = "/devices";
153 	char *c;
154 
155 	if ((c = strrchr(fspath, ':')) != NULL && strcmp(c, ":ppt") == 0)
156 		*c = '\0';
157 
158 	c = fspath;
159 
160 	if (strncmp(c, prefix, sizeof (prefix) - 1) == 0)
161 		c += sizeof (prefix) - 1;
162 
163 	return (c);
164 }
165 
166 /*
167  * Return an nvlist representing the mappings of /dev/ppt* devices to physical
168  * devices.  Of the form:
169  *
170  * /pci@0,0/... {
171  *  dev: "/dev/ppt0"
172  *  path: "/pci@0,0/..."
173  *  vendor-id: "8086"
174  *  device-id: "1528"
175  *  subsystem-vendor-id: "8086"
176  *  subsystem-id: "1528"
177  *  revision-id: "1"
178  *  label: "Intel Corporation ..."
179  * },
180  * /pci@0,0/...
181  *
182  * The nvlist should be freed by the caller.
183  */
184 nvlist_t *
ppt_list_assigned(void)185 ppt_list_assigned(void)
186 {
187 	di_node_t di_root = DI_NODE_NIL;
188 	pcidb_hdl_t *db = NULL;
189 	nvlist_t *nvl = NULL;
190 	glob_t gl;
191 	int err;
192 
193 	bzero(&gl, sizeof (gl));
194 
195 	if ((di_root = di_init("/", DINFOCACHE)) == DI_NODE_NIL)
196 		return (NULL);
197 
198 	if ((db = pcidb_open(PCIDB_VERSION)) == NULL) {
199 		err = errno;
200 		goto out;
201 	}
202 
203 	if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0)) != 0)
204 		goto out;
205 
206 	if ((err = glob("/dev/ppt*", GLOB_KEEPSTAT | GLOB_ERR,
207 	    NULL, &gl)) != 0) {
208 		err = (err == GLOB_NOMATCH) ? 0 : errno;
209 		goto out;
210 	}
211 
212 	for (size_t i = 0; i < gl.gl_pathc; i++) {
213 		char fspath[MAXPATHLEN];
214 		nvlist_t *info_nvl;
215 		di_node_t di_node;
216 		const char *path;
217 
218 		if (!S_ISLNK(gl.gl_statv[i]->st_mode))
219 			continue;
220 
221 		if (realpath(gl.gl_pathv[i], fspath) == NULL) {
222 			err = errno;
223 			goto out;
224 		}
225 
226 		path = fs_to_phys_path(fspath);
227 
228 		/*
229 		 * path argument is treated as const.
230 		 */
231 		if ((di_node = di_lookup_node(di_root, (char *)path)) == NULL) {
232 			err = errno;
233 			goto out;
234 		}
235 
236 		if (!is_pci(di_node))
237 			continue;
238 
239 		info_nvl = dev_getinfo(di_node, db, gl.gl_pathv[i], path);
240 
241 		if (info_nvl == NULL) {
242 			err = errno;
243 			goto out;
244 		}
245 
246 		err = nvlist_add_nvlist(nvl, path, info_nvl);
247 		nvlist_free(info_nvl);
248 
249 		if (err)
250 			goto out;
251 	}
252 
253 out:
254 	if (di_root != DI_NODE_NIL)
255 		di_fini(di_root);
256 
257 	pcidb_close(db);
258 	globfree(&gl);
259 
260 	if (err) {
261 		nvlist_free(nvl);
262 		errno = err;
263 		return (NULL);
264 	}
265 
266 	return (nvl);
267 }
268 
269 /*
270  * Read in our list of potential PPT devices.  A boot-module provided file
271  * explicitly over-rides anything delivered.
272  */
273 static int
get_matches(list_t * listp)274 get_matches(list_t *listp)
275 {
276 	int err = 0;
277 	FILE *fp;
278 
279 	list_create(listp, sizeof (ppt_match_t),
280 	    offsetof(ppt_match_t, pm_list));
281 
282 	if ((fp = fopen("/system/boot/etc/ppt_matches", "r")) == NULL) {
283 		if (errno != ENOENT)
284 			return (errno);
285 
286 		if ((fp = fopen("/etc/ppt_matches", "r")) == NULL) {
287 			if (errno == ENOENT)
288 				return (0);
289 			return (errno);
290 		}
291 	}
292 
293 	for (;;) {
294 		char *line = NULL;
295 		ppt_match_t *pm;
296 		size_t cap = 0;
297 		ssize_t read;
298 
299 		if ((read = getline(&line, &cap, fp)) <= 0) {
300 			free(line);
301 			break;
302 		}
303 
304 		if (line[read - 1] == '\n')
305 			line[read - 1] = '\0';
306 
307 		if ((pm = malloc(sizeof (*pm))) == NULL) {
308 			err = errno;
309 			free(line);
310 			goto out;
311 		}
312 
313 		bzero(pm, sizeof (*pm));
314 
315 		if (sscanf(line, "pciex%4s,%4s", &pm->pm_vendor,
316 		    &pm->pm_device) == 2 ||
317 		    sscanf(line, "pci%4s,%4s", &pm->pm_vendor,
318 		    &pm->pm_device) == 2 ||
319 		    sscanf(line, "pciex%4s", &pm->pm_vendor) == 1 ||
320 		    sscanf(line, "pci%4s", &pm->pm_vendor) == 1) {
321 			list_insert_tail(listp, pm);
322 		} else if (line[0] == '/') {
323 			(void) strlcpy(pm->pm_path, line, sizeof (pm->pm_path));
324 			list_insert_tail(listp, pm);
325 		} else {
326 			/*
327 			 * Ignore any line we don't understand.
328 			 */
329 			free(pm);
330 		}
331 
332 		free(line);
333 	}
334 
335 out:
336 	(void) fclose(fp);
337 	return (err);
338 }
339 
340 static boolean_t
match_ppt(list_t * matches,nvlist_t * nvl)341 match_ppt(list_t *matches, nvlist_t *nvl)
342 {
343 	char *vendor;
344 	char *device;
345 	char *path;
346 
347 	if (nvlist_lookup_string(nvl, "path", &path) != 0 ||
348 	    nvlist_lookup_string(nvl, "vendor-id", &vendor) != 0 ||
349 	    nvlist_lookup_string(nvl, "device-id", &device) != 0)
350 		return (B_FALSE);
351 
352 	for (ppt_match_t *pm = list_head(matches); pm != NULL;
353 	    pm = list_next(matches, pm)) {
354 		if (pm->pm_path[0] != '\0' && strcmp(pm->pm_path, path) == 0)
355 			return (B_TRUE);
356 
357 		if (pm->pm_vendor[0] != '\0' &&
358 		    strcmp(pm->pm_vendor, vendor) == 0) {
359 			if (pm->pm_device[0] == '\0')
360 				return (B_TRUE);
361 			if (strcmp(pm->pm_device, device) == 0)
362 				return (B_TRUE);
363 		}
364 	}
365 
366 	return (B_FALSE);
367 }
368 
369 static int
inspect_node(di_node_t di_node,void * arg)370 inspect_node(di_node_t di_node, void *arg)
371 {
372 	node_data_t *data = arg;
373 	nvlist_t *info_nvl = NULL;
374 	char *devname = NULL;
375 	const char *driver;
376 	char *path = NULL;
377 
378 	if (!is_pci(di_node))
379 		return (DI_WALK_CONTINUE);
380 
381 	driver = di_driver_name(di_node);
382 
383 	if (driver != NULL && strcmp(driver, "ppt") == 0) {
384 		if (asprintf(&devname, "/dev/ppt%d",
385 		    di_instance(di_node)) < 0) {
386 			data->nd_err = errno;
387 			goto out;
388 		}
389 	}
390 
391 	if ((path = di_devfs_path(di_node)) == NULL) {
392 		data->nd_err = ENOENT;
393 		goto out;
394 	}
395 
396 	info_nvl = dev_getinfo(di_node, data->nd_db, devname, path);
397 
398 	if (info_nvl == NULL)
399 		goto out;
400 
401 	if (devname == NULL && !match_ppt(&data->nd_matches, info_nvl))
402 		goto out;
403 
404 	data->nd_err = nvlist_add_nvlist(data->nd_nvl, path, info_nvl);
405 
406 out:
407 	free(path);
408 	free(devname);
409 	nvlist_free(info_nvl);
410 	return (data->nd_err ? DI_WALK_TERMINATE : DI_WALK_CONTINUE);
411 }
412 
413 /*
414  * Like ppt_list_assigned() output, but includes all devices that could be used
415  * for passthrough, whether assigned or not.
416  */
417 nvlist_t *
ppt_list(void)418 ppt_list(void)
419 {
420 	node_data_t nd = { NULL, };
421 	di_node_t di_root;
422 	int err;
423 
424 	if ((di_root = di_init("/", DINFOCACHE)) == DI_NODE_NIL)
425 		return (NULL);
426 
427 	if ((err = get_matches(&nd.nd_matches)) != 0)
428 		goto out;
429 
430 	if ((nd.nd_db = pcidb_open(PCIDB_VERSION)) == NULL) {
431 		err = errno;
432 		goto out;
433 	}
434 
435 	if ((err = nvlist_alloc(&nd.nd_nvl, NV_UNIQUE_NAME, 0)) != 0)
436 		goto out;
437 
438 	if ((err = di_walk_node(di_root, DI_WALK_CLDFIRST,
439 	    &nd, inspect_node)) != 0)
440 		goto out;
441 
442 	err = nd.nd_err;
443 
444 out:
445 	pcidb_close(nd.nd_db);
446 
447 	for (ppt_match_t *pm = list_head(&nd.nd_matches); pm != NULL; ) {
448 		ppt_match_t *next = list_next(&nd.nd_matches, pm);
449 		free(pm);
450 		pm = next;
451 	}
452 
453 	if (di_root != DI_NODE_NIL)
454 		di_fini(di_root);
455 
456 	if (err) {
457 		nvlist_free(nd.nd_nvl);
458 		errno = err;
459 		return (NULL);
460 	}
461 
462 	return (nd.nd_nvl);
463 }
464 
465 /*
466  * Given a physical path such as "/devices/pci0@0...", return the "/dev/pptX"
467  * that is bound to it, if any.  The "/devices/" prefix is optional.  The
468  * physical path may have the ":ppt" minor name suffix.
469  *
470  * Returns ENOENT if no such PPT device exists.
471  */
472 int
ppt_devpath_to_dev(const char * inpath,char * buf,size_t buflen)473 ppt_devpath_to_dev(const char *inpath, char *buf, size_t buflen)
474 {
475 	char fspath[MAXPATHLEN] = "";
476 	nvpair_t *nvp = NULL;
477 	const char *devpath;
478 	int err = ENOENT;
479 	nvlist_t *nvl;
480 
481 	if (strlcat(fspath, inpath, sizeof (fspath)) >= sizeof (fspath))
482 		return (ENAMETOOLONG);
483 
484 	devpath = fs_to_phys_path(fspath);
485 
486 	if ((nvl = ppt_list_assigned()) == NULL)
487 		return (errno);
488 
489 	while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) {
490 		const char *name = nvpair_name(nvp);
491 		char *ppt = NULL;
492 		nvlist_t *props;
493 
494 		(void) nvpair_value_nvlist(nvp, &props);
495 
496 		if (strcmp(name, devpath) == 0) {
497 			(void) nvlist_lookup_string(props, "dev", &ppt);
498 
499 			err = 0;
500 
501 			if (strlcpy(buf, ppt, buflen) >= buflen)
502 				err = ENAMETOOLONG;
503 			break;
504 		}
505 	}
506 
507 	nvlist_free(nvl);
508 	return (err);
509 }
510