xref: /illumos-gate/usr/src/lib/libppt/common/libppt.c (revision 55fcd84f321375248464013e08f0ff6d6e00fffe)
1 /*
2  * This file and its contents are supplied under the terms of the
3  * Common Development and Distribution License ("CDDL"), version 1.0.
4  * You may only use this file in accordance with the terms of version
5  * 1.0 of the CDDL.
6  *
7  * A full copy of the text of the CDDL should have accompanied this
8  * source.  A copy of the CDDL is also available via the Internet at
9  * http://www.illumos.org/license/CDDL.
10  */
11 
12 /*
13  * Copyright 2018 Joyent, Inc.
14  *
15  * Convenience routines for identifying current or available devices that are
16  * suitable for PCI passthrough to a bhyve guest.
17  */
18 
19 #include <libdevinfo.h>
20 #include <libppt.h>
21 
22 #include <sys/param.h>
23 #include <sys/stat.h>
24 #include <sys/list.h>
25 #include <strings.h>
26 #include <stddef.h>
27 #include <stdlib.h>
28 #include <stdio.h>
29 #include <errno.h>
30 #include <pcidb.h>
31 #include <glob.h>
32 
33 typedef struct node_data {
34 	pcidb_hdl_t *nd_db;
35 	list_t nd_matches;
36 	nvlist_t *nd_nvl;
37 	int nd_err;
38 } node_data_t;
39 
40 typedef struct ppt_match {
41 	list_node_t pm_list;
42 	char pm_path[MAXPATHLEN];
43 	char pm_vendor[5];
44 	char pm_device[5];
45 } ppt_match_t;
46 
47 static boolean_t
48 is_pci(di_node_t di_node)
49 {
50 	char *svals;
51 
52 	if (di_prop_lookup_strings(DDI_DEV_T_ANY, di_parent_node(di_node),
53 	    "device_type", &svals) != 1)
54 		return (B_FALSE);
55 
56 	return (strcmp(svals, "pci") == 0 || strcmp(svals, "pciex") == 0);
57 }
58 
59 static int
60 populate_int_prop(di_node_t di_node, nvlist_t *nvl, const char *name, int *ival)
61 {
62 	char val[20];
63 	int *ivals;
64 	int err;
65 
66 	if (di_prop_lookup_ints(DDI_DEV_T_ANY, di_node, name, &ivals) != 1)
67 		return (errno);
68 
69 	(void) snprintf(val, sizeof (val), "%x", ivals[0]);
70 
71 	err = nvlist_add_string(nvl, name, val);
72 
73 	if (err == 0 && ival != NULL)
74 		*ival = ivals[0];
75 
76 	return (err);
77 }
78 
79 static int
80 dev_getlabel(pcidb_hdl_t *db, int vid, int did, char *buf, size_t buflen)
81 {
82 	pcidb_vendor_t *vend = NULL;
83 	pcidb_device_t *dev = NULL;
84 
85 	if ((vend = pcidb_lookup_vendor(db, vid)) == NULL)
86 		return (ENOENT);
87 
88 	if ((dev = pcidb_lookup_device_by_vendor(vend, did)) == NULL)
89 		return (ENOENT);
90 
91 	(void) snprintf(buf, buflen, "%s %s", pcidb_vendor_name(vend),
92 	    pcidb_device_name(dev));
93 
94 	return (0);
95 }
96 
97 static nvlist_t *
98 dev_getinfo(di_node_t di_node, pcidb_hdl_t *db,
99     const char *dev, const char *path)
100 {
101 	char label[MAXPATHLEN];
102 	nvlist_t *nvl = NULL;
103 	int vid, did;
104 	int err;
105 
106 	if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0)) != 0)
107 		goto out;
108 
109 	if (dev != NULL && (err = nvlist_add_string(nvl, "dev", dev)) != 0)
110 		goto out;
111 	if ((err = nvlist_add_string(nvl, "path", path)) != 0)
112 		goto out;
113 	if ((err = populate_int_prop(di_node, nvl, "vendor-id", &vid)) != 0)
114 		goto out;
115 	if ((err = populate_int_prop(di_node, nvl, "device-id", &did)) != 0)
116 		goto out;
117 	if ((err = populate_int_prop(di_node, nvl,
118 	    "subsystem-vendor-id", NULL)) != 0)
119 		goto out;
120 	if ((err = populate_int_prop(di_node, nvl, "subsystem-id", NULL)) != 0)
121 		goto out;
122 	if ((err = populate_int_prop(di_node, nvl, "revision-id", NULL)) != 0)
123 		goto out;
124 
125 	err = dev_getlabel(db, vid, did, label, sizeof (label));
126 
127 	if (err == 0) {
128 		err = nvlist_add_string(nvl, "label", label);
129 	} else if (err == ENOENT) {
130 		err = 0;
131 	}
132 
133 out:
134 	if (err) {
135 		nvlist_free(nvl);
136 		errno = err;
137 		return (NULL);
138 	}
139 
140 	return (nvl);
141 }
142 
143 /*
144  * /devices/pci0@0/....@0,1:ppt -> /pci0@0/...@0,1
145  */
146 static const char *
147 fs_to_phys_path(char *fspath)
148 {
149 	const char prefix[] = "/devices";
150 	char *c;
151 
152 	if ((c = strrchr(fspath, ':')) != NULL && strcmp(c, ":ppt") == 0)
153 		*c = '\0';
154 
155 	c = fspath;
156 
157 	if (strncmp(c, prefix, sizeof (prefix) - 1) == 0)
158 		c += sizeof (prefix) - 1;
159 
160 	return (c);
161 }
162 
163 /*
164  * Return an nvlist representing the mappings of /dev/ppt* devices to physical
165  * devices.  Of the form:
166  *
167  * /pci@0,0/... {
168  *  dev: "/dev/ppt0"
169  *  path: "/pci@0,0/..."
170  *  vendor-id: "8086"
171  *  device-id: "1528"
172  *  subsystem-vendor-id: "8086"
173  *  subsystem-id: "1528"
174  *  revision-id: "1"
175  *  label: "Intel Corporation ..."
176  * },
177  * /pci@0,0/...
178  *
179  * The nvlist should be freed by the caller.
180  */
181 nvlist_t *
182 ppt_list_assigned(void)
183 {
184 	di_node_t di_root = DI_NODE_NIL;
185 	pcidb_hdl_t *db = NULL;
186 	nvlist_t *nvl = NULL;
187 	glob_t gl;
188 	int err;
189 
190 	bzero(&gl, sizeof (gl));
191 
192 	if ((di_root = di_init("/", DINFOCACHE)) == DI_NODE_NIL)
193 		return (NULL);
194 
195 	if ((db = pcidb_open(PCIDB_VERSION)) == NULL) {
196 		err = errno;
197 		goto out;
198 	}
199 
200 	if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0)) != 0)
201 		goto out;
202 
203 	if ((err = glob("/dev/ppt*", GLOB_KEEPSTAT | GLOB_ERR,
204 	    NULL, &gl)) != 0) {
205 		err = (err == GLOB_NOMATCH) ? 0 : errno;
206 		goto out;
207 	}
208 
209 	for (size_t i = 0; i < gl.gl_pathc; i++) {
210 		char fspath[MAXPATHLEN];
211 		nvlist_t *info_nvl;
212 		di_node_t di_node;
213 		const char *path;
214 
215 		if (!S_ISLNK(gl.gl_statv[i]->st_mode))
216 			continue;
217 
218 		if (realpath(gl.gl_pathv[i], fspath) == NULL) {
219 			err = errno;
220 			goto out;
221 		}
222 
223 		path = fs_to_phys_path(fspath);
224 
225 		/*
226 		 * path argument is treated as const.
227 		 */
228 		if ((di_node = di_lookup_node(di_root, (char *)path)) == NULL) {
229 			err = errno;
230 			goto out;
231 		}
232 
233 		if (!is_pci(di_node))
234 			continue;
235 
236 		info_nvl = dev_getinfo(di_node, db, gl.gl_pathv[i], path);
237 
238 		if (info_nvl == NULL) {
239 			err = errno;
240 			goto out;
241 		}
242 
243 		err = nvlist_add_nvlist(nvl, path, info_nvl);
244 		nvlist_free(info_nvl);
245 
246 		if (err)
247 			goto out;
248 	}
249 
250 out:
251 	if (di_root != DI_NODE_NIL)
252 		di_fini(di_root);
253 
254 	pcidb_close(db);
255 	globfree(&gl);
256 
257 	if (err) {
258 		nvlist_free(nvl);
259 		errno = err;
260 		return (NULL);
261 	}
262 
263 	return (nvl);
264 }
265 
266 /*
267  * Read in our list of potential PPT devices.  A boot-module provided file
268  * explicitly over-rides anything delivered.
269  */
270 static int
271 get_matches(list_t *listp)
272 {
273 	int err = 0;
274 	FILE *fp;
275 
276 	list_create(listp, sizeof (ppt_match_t),
277 	    offsetof(ppt_match_t, pm_list));
278 
279 	if ((fp = fopen("/system/boot/etc/ppt_matches", "r")) == NULL) {
280 		if (errno != ENOENT)
281 			return (errno);
282 
283 		if ((fp = fopen("/etc/ppt_matches", "r")) == NULL) {
284 			if (errno == ENOENT)
285 				return (0);
286 			return (errno);
287 		}
288 	}
289 
290 	for (;;) {
291 		char *line = NULL;
292 		ppt_match_t *pm;
293 		size_t cap = 0;
294 		ssize_t read;
295 
296 		if ((read = getline(&line, &cap, fp)) <= 0) {
297 			free(line);
298 			break;
299 		}
300 
301 		if (line[read - 1] == '\n')
302 			line[read - 1] = '\0';
303 
304 		if ((pm = malloc(sizeof (*pm))) == NULL) {
305 			err = errno;
306 			free(line);
307 			goto out;
308 		}
309 
310 		bzero(pm, sizeof (*pm));
311 
312 		if (sscanf(line, "pciex%4s,%4s", &pm->pm_vendor,
313 		    &pm->pm_device) == 2 ||
314 		    sscanf(line, "pci%4s,%4s", &pm->pm_vendor,
315 		    &pm->pm_device) == 2 ||
316 		    sscanf(line, "pciex%4s", &pm->pm_vendor) == 1 ||
317 		    sscanf(line, "pci%4s", &pm->pm_vendor) == 1) {
318 			list_insert_tail(listp, pm);
319 		} else if (line[0] == '/') {
320 			(void) strlcpy(pm->pm_path, line, sizeof (pm->pm_path));
321 			list_insert_tail(listp, pm);
322 		} else {
323 			/*
324 			 * Ignore any line we don't understand.
325 			 */
326 			free(pm);
327 		}
328 
329 		free(line);
330 	}
331 
332 out:
333 	(void) fclose(fp);
334 	return (err);
335 }
336 
337 static boolean_t
338 match_ppt(list_t *matches, nvlist_t *nvl)
339 {
340 	char *vendor;
341 	char *device;
342 	char *path;
343 
344 	if (nvlist_lookup_string(nvl, "path", &path) != 0 ||
345 	    nvlist_lookup_string(nvl, "vendor-id", &vendor) != 0 ||
346 	    nvlist_lookup_string(nvl, "device-id", &device) != 0)
347 		return (B_FALSE);
348 
349 	for (ppt_match_t *pm = list_head(matches); pm != NULL;
350 	    pm = list_next(matches, pm)) {
351 		if (pm->pm_path[0] != '\0' && strcmp(pm->pm_path, path) == 0)
352 			return (B_TRUE);
353 
354 		if (pm->pm_vendor[0] != '\0' &&
355 		    strcmp(pm->pm_vendor, vendor) == 0) {
356 			if (pm->pm_device[0] == '\0')
357 				return (B_TRUE);
358 			if (strcmp(pm->pm_device, device) == 0)
359 				return (B_TRUE);
360 		}
361 	}
362 
363 	return (B_FALSE);
364 }
365 
366 static int
367 inspect_node(di_node_t di_node, void *arg)
368 {
369 	node_data_t *data = arg;
370 	nvlist_t *info_nvl = NULL;
371 	char *devname = NULL;
372 	const char *driver;
373 	char *path = NULL;
374 
375 	if (!is_pci(di_node))
376 		return (DI_WALK_CONTINUE);
377 
378 	driver = di_driver_name(di_node);
379 
380 	if (driver != NULL && strcmp(driver, "ppt") == 0) {
381 		if (asprintf(&devname, "/dev/ppt%d",
382 		    di_instance(di_node)) < 0) {
383 			data->nd_err = errno;
384 			goto out;
385 		}
386 	}
387 
388 	if ((path = di_devfs_path(di_node)) == NULL) {
389 		data->nd_err = ENOENT;
390 		goto out;
391 	}
392 
393 	info_nvl = dev_getinfo(di_node, data->nd_db, devname, path);
394 
395 	if (info_nvl == NULL)
396 		goto out;
397 
398 	if (devname == NULL && !match_ppt(&data->nd_matches, info_nvl))
399 		goto out;
400 
401 	data->nd_err = nvlist_add_nvlist(data->nd_nvl, path, info_nvl);
402 
403 out:
404 	free(path);
405 	free(devname);
406 	nvlist_free(info_nvl);
407 	return (data->nd_err ? DI_WALK_TERMINATE : DI_WALK_CONTINUE);
408 }
409 
410 /*
411  * Like ppt_list_assigned() output, but includes all devices that could be used
412  * for passthrough, whether assigned or not.
413  */
414 nvlist_t *
415 ppt_list(void)
416 {
417 	node_data_t nd = { NULL, };
418 	di_node_t di_root;
419 	int err;
420 
421 	if ((di_root = di_init("/", DINFOCACHE)) == DI_NODE_NIL)
422 		return (NULL);
423 
424 	if ((err = get_matches(&nd.nd_matches)) != 0)
425 		goto out;
426 
427 	if ((nd.nd_db = pcidb_open(PCIDB_VERSION)) == NULL) {
428 		err = errno;
429 		goto out;
430 	}
431 
432 	if ((err = nvlist_alloc(&nd.nd_nvl, NV_UNIQUE_NAME, 0)) != 0)
433 		goto out;
434 
435 	if ((err = di_walk_node(di_root, DI_WALK_CLDFIRST,
436 	    &nd, inspect_node)) != 0)
437 		goto out;
438 
439 	err = nd.nd_err;
440 
441 out:
442 	pcidb_close(nd.nd_db);
443 
444 	for (ppt_match_t *pm = list_head(&nd.nd_matches); pm != NULL; ) {
445 		ppt_match_t *next = list_next(&nd.nd_matches, pm);
446 		free(pm);
447 		pm = next;
448 	}
449 
450 	if (di_root != DI_NODE_NIL)
451 		di_fini(di_root);
452 
453 	if (err) {
454 		nvlist_free(nd.nd_nvl);
455 		errno = err;
456 		return (NULL);
457 	}
458 
459 	return (nd.nd_nvl);
460 }
461 
462 /*
463  * Given a physical path such as "/devices/pci0@0...", return the "/dev/pptX"
464  * that is bound to it, if any.  The "/devices/" prefix is optional.  The
465  * physical path may have the ":ppt" minor name suffix.
466  *
467  * Returns ENOENT if no such PPT device exists.
468  */
469 int
470 ppt_devpath_to_dev(const char *inpath, char *buf, size_t buflen)
471 {
472 	char fspath[MAXPATHLEN] = "";
473 	nvpair_t *nvp = NULL;
474 	const char *devpath;
475 	int err = ENOENT;
476 	nvlist_t *nvl;
477 
478 	if (strlcat(fspath, inpath, sizeof (fspath)) >= sizeof (fspath))
479 		return (ENAMETOOLONG);
480 
481 	devpath = fs_to_phys_path(fspath);
482 
483 	if ((nvl = ppt_list_assigned()) == NULL)
484 		return (errno);
485 
486 	while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) {
487 		const char *name = nvpair_name(nvp);
488 		char *ppt = NULL;
489 		nvlist_t *props;
490 
491 		(void) nvpair_value_nvlist(nvp, &props);
492 
493 		if (strcmp(name, devpath) == 0) {
494 			(void) nvlist_lookup_string(props, "dev", &ppt);
495 
496 			err = 0;
497 
498 			if (strlcpy(buf, ppt, buflen) >= buflen)
499 				err = ENAMETOOLONG;
500 			break;
501 		}
502 	}
503 
504 	nvlist_free(nvl);
505 	return (err);
506 }
507