xref: /linux/drivers/vdpa/vdpa.c (revision c118478665f467e57d06b2354de65974b246b82b)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * vDPA bus.
4  *
5  * Copyright (c) 2020, Red Hat. All rights reserved.
6  *     Author: Jason Wang <jasowang@redhat.com>
7  *
8  */
9 
10 #include <linux/module.h>
11 #include <linux/idr.h>
12 #include <linux/slab.h>
13 #include <linux/vdpa.h>
14 #include <uapi/linux/vdpa.h>
15 #include <net/genetlink.h>
16 #include <linux/mod_devicetable.h>
17 #include <linux/virtio_ids.h>
18 
19 static LIST_HEAD(mdev_head);
20 /* A global mutex that protects vdpa management device and device level operations. */
21 static DECLARE_RWSEM(vdpa_dev_lock);
22 static DEFINE_IDA(vdpa_index_ida);
23 
24 void vdpa_set_status(struct vdpa_device *vdev, u8 status)
25 {
26 	down_write(&vdev->cf_lock);
27 	vdev->config->set_status(vdev, status);
28 	up_write(&vdev->cf_lock);
29 }
30 EXPORT_SYMBOL(vdpa_set_status);
31 
32 static struct genl_family vdpa_nl_family;
33 
34 static int vdpa_dev_probe(struct device *d)
35 {
36 	struct vdpa_device *vdev = dev_to_vdpa(d);
37 	struct vdpa_driver *drv = drv_to_vdpa(vdev->dev.driver);
38 	const struct vdpa_config_ops *ops = vdev->config;
39 	u32 max_num, min_num = 1;
40 	int ret = 0;
41 
42 	d->dma_mask = &d->coherent_dma_mask;
43 	ret = dma_set_mask_and_coherent(d, DMA_BIT_MASK(64));
44 	if (ret)
45 		return ret;
46 
47 	max_num = ops->get_vq_num_max(vdev);
48 	if (ops->get_vq_num_min)
49 		min_num = ops->get_vq_num_min(vdev);
50 	if (max_num < min_num)
51 		return -EINVAL;
52 
53 	if (drv && drv->probe)
54 		ret = drv->probe(vdev);
55 
56 	return ret;
57 }
58 
59 static void vdpa_dev_remove(struct device *d)
60 {
61 	struct vdpa_device *vdev = dev_to_vdpa(d);
62 	struct vdpa_driver *drv = drv_to_vdpa(vdev->dev.driver);
63 
64 	if (drv && drv->remove)
65 		drv->remove(vdev);
66 }
67 
68 static int vdpa_dev_match(struct device *dev, const struct device_driver *drv)
69 {
70 	struct vdpa_device *vdev = dev_to_vdpa(dev);
71 
72 	/* Check override first, and if set, only use the named driver */
73 	if (vdev->driver_override)
74 		return strcmp(vdev->driver_override, drv->name) == 0;
75 
76 	/* Currently devices must be supported by all vDPA bus drivers */
77 	return 1;
78 }
79 
80 static ssize_t driver_override_store(struct device *dev,
81 				     struct device_attribute *attr,
82 				     const char *buf, size_t count)
83 {
84 	struct vdpa_device *vdev = dev_to_vdpa(dev);
85 	int ret;
86 
87 	ret = driver_set_override(dev, &vdev->driver_override, buf, count);
88 	if (ret)
89 		return ret;
90 
91 	return count;
92 }
93 
94 static ssize_t driver_override_show(struct device *dev,
95 				    struct device_attribute *attr, char *buf)
96 {
97 	struct vdpa_device *vdev = dev_to_vdpa(dev);
98 	ssize_t len;
99 
100 	device_lock(dev);
101 	len = sysfs_emit(buf, "%s\n", vdev->driver_override);
102 	device_unlock(dev);
103 
104 	return len;
105 }
106 static DEVICE_ATTR_RW(driver_override);
107 
108 static struct attribute *vdpa_dev_attrs[] = {
109 	&dev_attr_driver_override.attr,
110 	NULL,
111 };
112 
113 static const struct attribute_group vdpa_dev_group = {
114 	.attrs  = vdpa_dev_attrs,
115 };
116 __ATTRIBUTE_GROUPS(vdpa_dev);
117 
118 static const struct bus_type vdpa_bus = {
119 	.name  = "vdpa",
120 	.dev_groups = vdpa_dev_groups,
121 	.match = vdpa_dev_match,
122 	.probe = vdpa_dev_probe,
123 	.remove = vdpa_dev_remove,
124 };
125 
126 static void vdpa_release_dev(struct device *d)
127 {
128 	struct vdpa_device *vdev = dev_to_vdpa(d);
129 	const struct vdpa_config_ops *ops = vdev->config;
130 
131 	if (ops->free)
132 		ops->free(vdev);
133 
134 	ida_free(&vdpa_index_ida, vdev->index);
135 	kfree(vdev->driver_override);
136 	kfree(vdev);
137 }
138 
139 /**
140  * __vdpa_alloc_device - allocate and initilaize a vDPA device
141  * This allows driver to some prepartion after device is
142  * initialized but before registered.
143  * @parent: the parent device
144  * @config: the bus operations that is supported by this device
145  * @ngroups: number of groups supported by this device
146  * @nas: number of address spaces supported by this device
147  * @size: size of the parent structure that contains private data
148  * @name: name of the vdpa device; optional.
149  * @use_va: indicate whether virtual address must be used by this device
150  *
151  * Driver should use vdpa_alloc_device() wrapper macro instead of
152  * using this directly.
153  *
154  * Return: Returns an error when parent/config/dma_dev is not set or fail to get
155  *	   ida.
156  */
157 struct vdpa_device *__vdpa_alloc_device(struct device *parent,
158 					const struct vdpa_config_ops *config,
159 					unsigned int ngroups, unsigned int nas,
160 					size_t size, const char *name,
161 					bool use_va)
162 {
163 	struct vdpa_device *vdev;
164 	int err = -EINVAL;
165 
166 	if (!config)
167 		goto err;
168 
169 	if (!!config->dma_map != !!config->dma_unmap)
170 		goto err;
171 
172 	/* It should only work for the device that use on-chip IOMMU */
173 	if (use_va && !(config->dma_map || config->set_map))
174 		goto err;
175 
176 	err = -ENOMEM;
177 	vdev = kzalloc(size, GFP_KERNEL);
178 	if (!vdev)
179 		goto err;
180 
181 	err = ida_alloc(&vdpa_index_ida, GFP_KERNEL);
182 	if (err < 0)
183 		goto err_ida;
184 
185 	vdev->dev.bus = &vdpa_bus;
186 	vdev->dev.parent = parent;
187 	vdev->dev.release = vdpa_release_dev;
188 	vdev->index = err;
189 	vdev->config = config;
190 	vdev->features_valid = false;
191 	vdev->use_va = use_va;
192 	vdev->ngroups = ngroups;
193 	vdev->nas = nas;
194 
195 	if (name)
196 		err = dev_set_name(&vdev->dev, "%s", name);
197 	else
198 		err = dev_set_name(&vdev->dev, "vdpa%u", vdev->index);
199 	if (err)
200 		goto err_name;
201 
202 	init_rwsem(&vdev->cf_lock);
203 	device_initialize(&vdev->dev);
204 
205 	return vdev;
206 
207 err_name:
208 	ida_free(&vdpa_index_ida, vdev->index);
209 err_ida:
210 	kfree(vdev);
211 err:
212 	return ERR_PTR(err);
213 }
214 EXPORT_SYMBOL_GPL(__vdpa_alloc_device);
215 
216 static int vdpa_name_match(struct device *dev, const void *data)
217 {
218 	struct vdpa_device *vdev = container_of(dev, struct vdpa_device, dev);
219 
220 	return (strcmp(dev_name(&vdev->dev), data) == 0);
221 }
222 
223 static int __vdpa_register_device(struct vdpa_device *vdev, u32 nvqs)
224 {
225 	struct device *dev;
226 
227 	vdev->nvqs = nvqs;
228 
229 	lockdep_assert_held(&vdpa_dev_lock);
230 	dev = bus_find_device(&vdpa_bus, NULL, dev_name(&vdev->dev), vdpa_name_match);
231 	if (dev) {
232 		put_device(dev);
233 		return -EEXIST;
234 	}
235 	return device_add(&vdev->dev);
236 }
237 
238 /**
239  * _vdpa_register_device - register a vDPA device with vdpa lock held
240  * Caller must have a succeed call of vdpa_alloc_device() before.
241  * Caller must invoke this routine in the management device dev_add()
242  * callback after setting up valid mgmtdev for this vdpa device.
243  * @vdev: the vdpa device to be registered to vDPA bus
244  * @nvqs: number of virtqueues supported by this device
245  *
246  * Return: Returns an error when fail to add device to vDPA bus
247  */
248 int _vdpa_register_device(struct vdpa_device *vdev, u32 nvqs)
249 {
250 	if (!vdev->mdev)
251 		return -EINVAL;
252 
253 	return __vdpa_register_device(vdev, nvqs);
254 }
255 EXPORT_SYMBOL_GPL(_vdpa_register_device);
256 
257 /**
258  * vdpa_register_device - register a vDPA device
259  * Callers must have a succeed call of vdpa_alloc_device() before.
260  * @vdev: the vdpa device to be registered to vDPA bus
261  * @nvqs: number of virtqueues supported by this device
262  *
263  * Return: Returns an error when fail to add to vDPA bus
264  */
265 int vdpa_register_device(struct vdpa_device *vdev, u32 nvqs)
266 {
267 	int err;
268 
269 	down_write(&vdpa_dev_lock);
270 	err = __vdpa_register_device(vdev, nvqs);
271 	up_write(&vdpa_dev_lock);
272 	return err;
273 }
274 EXPORT_SYMBOL_GPL(vdpa_register_device);
275 
276 /**
277  * _vdpa_unregister_device - unregister a vDPA device
278  * Caller must invoke this routine as part of management device dev_del()
279  * callback.
280  * @vdev: the vdpa device to be unregisted from vDPA bus
281  */
282 void _vdpa_unregister_device(struct vdpa_device *vdev)
283 {
284 	lockdep_assert_held(&vdpa_dev_lock);
285 	WARN_ON(!vdev->mdev);
286 	device_unregister(&vdev->dev);
287 }
288 EXPORT_SYMBOL_GPL(_vdpa_unregister_device);
289 
290 /**
291  * vdpa_unregister_device - unregister a vDPA device
292  * @vdev: the vdpa device to be unregisted from vDPA bus
293  */
294 void vdpa_unregister_device(struct vdpa_device *vdev)
295 {
296 	down_write(&vdpa_dev_lock);
297 	device_unregister(&vdev->dev);
298 	up_write(&vdpa_dev_lock);
299 }
300 EXPORT_SYMBOL_GPL(vdpa_unregister_device);
301 
302 /**
303  * __vdpa_register_driver - register a vDPA device driver
304  * @drv: the vdpa device driver to be registered
305  * @owner: module owner of the driver
306  *
307  * Return: Returns an err when fail to do the registration
308  */
309 int __vdpa_register_driver(struct vdpa_driver *drv, struct module *owner)
310 {
311 	drv->driver.bus = &vdpa_bus;
312 	drv->driver.owner = owner;
313 
314 	return driver_register(&drv->driver);
315 }
316 EXPORT_SYMBOL_GPL(__vdpa_register_driver);
317 
318 /**
319  * vdpa_unregister_driver - unregister a vDPA device driver
320  * @drv: the vdpa device driver to be unregistered
321  */
322 void vdpa_unregister_driver(struct vdpa_driver *drv)
323 {
324 	driver_unregister(&drv->driver);
325 }
326 EXPORT_SYMBOL_GPL(vdpa_unregister_driver);
327 
328 /**
329  * vdpa_mgmtdev_register - register a vdpa management device
330  *
331  * @mdev: Pointer to vdpa management device
332  * vdpa_mgmtdev_register() register a vdpa management device which supports
333  * vdpa device management.
334  * Return: Returns 0 on success or failure when required callback ops are not
335  *         initialized.
336  */
337 int vdpa_mgmtdev_register(struct vdpa_mgmt_dev *mdev)
338 {
339 	if (!mdev->device || !mdev->ops || !mdev->ops->dev_add || !mdev->ops->dev_del)
340 		return -EINVAL;
341 
342 	INIT_LIST_HEAD(&mdev->list);
343 	down_write(&vdpa_dev_lock);
344 	list_add_tail(&mdev->list, &mdev_head);
345 	up_write(&vdpa_dev_lock);
346 	return 0;
347 }
348 EXPORT_SYMBOL_GPL(vdpa_mgmtdev_register);
349 
350 static int vdpa_match_remove(struct device *dev, void *data)
351 {
352 	struct vdpa_device *vdev = container_of(dev, struct vdpa_device, dev);
353 	struct vdpa_mgmt_dev *mdev = vdev->mdev;
354 
355 	if (mdev == data)
356 		mdev->ops->dev_del(mdev, vdev);
357 	return 0;
358 }
359 
360 void vdpa_mgmtdev_unregister(struct vdpa_mgmt_dev *mdev)
361 {
362 	down_write(&vdpa_dev_lock);
363 
364 	list_del(&mdev->list);
365 
366 	/* Filter out all the entries belong to this management device and delete it. */
367 	bus_for_each_dev(&vdpa_bus, NULL, mdev, vdpa_match_remove);
368 
369 	up_write(&vdpa_dev_lock);
370 }
371 EXPORT_SYMBOL_GPL(vdpa_mgmtdev_unregister);
372 
373 static void vdpa_get_config_unlocked(struct vdpa_device *vdev,
374 				     unsigned int offset,
375 				     void *buf, unsigned int len)
376 {
377 	const struct vdpa_config_ops *ops = vdev->config;
378 
379 	/*
380 	 * Config accesses aren't supposed to trigger before features are set.
381 	 * If it does happen we assume a legacy guest.
382 	 */
383 	if (!vdev->features_valid)
384 		vdpa_set_features_unlocked(vdev, 0);
385 	ops->get_config(vdev, offset, buf, len);
386 }
387 
388 /**
389  * vdpa_get_config - Get one or more device configuration fields.
390  * @vdev: vdpa device to operate on
391  * @offset: starting byte offset of the field
392  * @buf: buffer pointer to read to
393  * @len: length of the configuration fields in bytes
394  */
395 void vdpa_get_config(struct vdpa_device *vdev, unsigned int offset,
396 		     void *buf, unsigned int len)
397 {
398 	down_read(&vdev->cf_lock);
399 	vdpa_get_config_unlocked(vdev, offset, buf, len);
400 	up_read(&vdev->cf_lock);
401 }
402 EXPORT_SYMBOL_GPL(vdpa_get_config);
403 
404 /**
405  * vdpa_set_config - Set one or more device configuration fields.
406  * @vdev: vdpa device to operate on
407  * @offset: starting byte offset of the field
408  * @buf: buffer pointer to read from
409  * @length: length of the configuration fields in bytes
410  */
411 void vdpa_set_config(struct vdpa_device *vdev, unsigned int offset,
412 		     const void *buf, unsigned int length)
413 {
414 	down_write(&vdev->cf_lock);
415 	vdev->config->set_config(vdev, offset, buf, length);
416 	up_write(&vdev->cf_lock);
417 }
418 EXPORT_SYMBOL_GPL(vdpa_set_config);
419 
420 static bool mgmtdev_handle_match(const struct vdpa_mgmt_dev *mdev,
421 				 const char *busname, const char *devname)
422 {
423 	/* Bus name is optional for simulated management device, so ignore the
424 	 * device with bus if bus attribute is provided.
425 	 */
426 	if ((busname && !mdev->device->bus) || (!busname && mdev->device->bus))
427 		return false;
428 
429 	if (!busname && strcmp(dev_name(mdev->device), devname) == 0)
430 		return true;
431 
432 	if (busname && (strcmp(mdev->device->bus->name, busname) == 0) &&
433 	    (strcmp(dev_name(mdev->device), devname) == 0))
434 		return true;
435 
436 	return false;
437 }
438 
439 static struct vdpa_mgmt_dev *vdpa_mgmtdev_get_from_attr(struct nlattr **attrs)
440 {
441 	struct vdpa_mgmt_dev *mdev;
442 	const char *busname = NULL;
443 	const char *devname;
444 
445 	if (!attrs[VDPA_ATTR_MGMTDEV_DEV_NAME])
446 		return ERR_PTR(-EINVAL);
447 	devname = nla_data(attrs[VDPA_ATTR_MGMTDEV_DEV_NAME]);
448 	if (attrs[VDPA_ATTR_MGMTDEV_BUS_NAME])
449 		busname = nla_data(attrs[VDPA_ATTR_MGMTDEV_BUS_NAME]);
450 
451 	list_for_each_entry(mdev, &mdev_head, list) {
452 		if (mgmtdev_handle_match(mdev, busname, devname))
453 			return mdev;
454 	}
455 	return ERR_PTR(-ENODEV);
456 }
457 
458 static int vdpa_nl_mgmtdev_handle_fill(struct sk_buff *msg, const struct vdpa_mgmt_dev *mdev)
459 {
460 	if (mdev->device->bus &&
461 	    nla_put_string(msg, VDPA_ATTR_MGMTDEV_BUS_NAME, mdev->device->bus->name))
462 		return -EMSGSIZE;
463 	if (nla_put_string(msg, VDPA_ATTR_MGMTDEV_DEV_NAME, dev_name(mdev->device)))
464 		return -EMSGSIZE;
465 	return 0;
466 }
467 
468 static u64 vdpa_mgmtdev_get_classes(const struct vdpa_mgmt_dev *mdev,
469 				    unsigned int *nclasses)
470 {
471 	u64 supported_classes = 0;
472 	unsigned int n = 0;
473 
474 	for (int i = 0; mdev->id_table[i].device; i++) {
475 		if (mdev->id_table[i].device > 63)
476 			continue;
477 		supported_classes |= BIT_ULL(mdev->id_table[i].device);
478 		n++;
479 	}
480 	if (nclasses)
481 		*nclasses = n;
482 
483 	return supported_classes;
484 }
485 
486 static int vdpa_mgmtdev_fill(const struct vdpa_mgmt_dev *mdev, struct sk_buff *msg,
487 			     u32 portid, u32 seq, int flags)
488 {
489 	void *hdr;
490 	int err;
491 
492 	hdr = genlmsg_put(msg, portid, seq, &vdpa_nl_family, flags, VDPA_CMD_MGMTDEV_NEW);
493 	if (!hdr)
494 		return -EMSGSIZE;
495 	err = vdpa_nl_mgmtdev_handle_fill(msg, mdev);
496 	if (err)
497 		goto msg_err;
498 
499 	if (nla_put_u64_64bit(msg, VDPA_ATTR_MGMTDEV_SUPPORTED_CLASSES,
500 			      vdpa_mgmtdev_get_classes(mdev, NULL),
501 			      VDPA_ATTR_UNSPEC)) {
502 		err = -EMSGSIZE;
503 		goto msg_err;
504 	}
505 	if (nla_put_u32(msg, VDPA_ATTR_DEV_MGMTDEV_MAX_VQS,
506 			mdev->max_supported_vqs)) {
507 		err = -EMSGSIZE;
508 		goto msg_err;
509 	}
510 	if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_SUPPORTED_FEATURES,
511 			      mdev->supported_features, VDPA_ATTR_PAD)) {
512 		err = -EMSGSIZE;
513 		goto msg_err;
514 	}
515 
516 	genlmsg_end(msg, hdr);
517 	return 0;
518 
519 msg_err:
520 	genlmsg_cancel(msg, hdr);
521 	return err;
522 }
523 
524 static int vdpa_nl_cmd_mgmtdev_get_doit(struct sk_buff *skb, struct genl_info *info)
525 {
526 	struct vdpa_mgmt_dev *mdev;
527 	struct sk_buff *msg;
528 	int err;
529 
530 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
531 	if (!msg)
532 		return -ENOMEM;
533 
534 	down_read(&vdpa_dev_lock);
535 	mdev = vdpa_mgmtdev_get_from_attr(info->attrs);
536 	if (IS_ERR(mdev)) {
537 		up_read(&vdpa_dev_lock);
538 		NL_SET_ERR_MSG_MOD(info->extack, "Fail to find the specified mgmt device");
539 		err = PTR_ERR(mdev);
540 		goto out;
541 	}
542 
543 	err = vdpa_mgmtdev_fill(mdev, msg, info->snd_portid, info->snd_seq, 0);
544 	up_read(&vdpa_dev_lock);
545 	if (err)
546 		goto out;
547 	err = genlmsg_reply(msg, info);
548 	return err;
549 
550 out:
551 	nlmsg_free(msg);
552 	return err;
553 }
554 
555 static int
556 vdpa_nl_cmd_mgmtdev_get_dumpit(struct sk_buff *msg, struct netlink_callback *cb)
557 {
558 	struct vdpa_mgmt_dev *mdev;
559 	int start = cb->args[0];
560 	int idx = 0;
561 	int err;
562 
563 	down_read(&vdpa_dev_lock);
564 	list_for_each_entry(mdev, &mdev_head, list) {
565 		if (idx < start) {
566 			idx++;
567 			continue;
568 		}
569 		err = vdpa_mgmtdev_fill(mdev, msg, NETLINK_CB(cb->skb).portid,
570 					cb->nlh->nlmsg_seq, NLM_F_MULTI);
571 		if (err)
572 			goto out;
573 		idx++;
574 	}
575 out:
576 	up_read(&vdpa_dev_lock);
577 	cb->args[0] = idx;
578 	return msg->len;
579 }
580 
581 #define VDPA_DEV_NET_ATTRS_MASK (BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR) | \
582 				 BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MTU)     | \
583 				 BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP))
584 
585 /*
586  * Bitmask for all per-device features: feature bits VIRTIO_TRANSPORT_F_START
587  * through VIRTIO_TRANSPORT_F_END are unset, i.e. 0xfffffc000fffffff for
588  * all 64bit features. If the features are extended beyond 64 bits, or new
589  * "holes" are reserved for other type of features than per-device, this
590  * macro would have to be updated.
591  */
592 #define VIRTIO_DEVICE_F_MASK (~0ULL << (VIRTIO_TRANSPORT_F_END + 1) | \
593 			      ((1ULL << VIRTIO_TRANSPORT_F_START) - 1))
594 
595 static int vdpa_nl_cmd_dev_add_set_doit(struct sk_buff *skb, struct genl_info *info)
596 {
597 	struct vdpa_dev_set_config config = {};
598 	struct nlattr **nl_attrs = info->attrs;
599 	struct vdpa_mgmt_dev *mdev;
600 	unsigned int ncls = 0;
601 	const u8 *macaddr;
602 	const char *name;
603 	u64 classes;
604 	int err = 0;
605 
606 	if (!info->attrs[VDPA_ATTR_DEV_NAME])
607 		return -EINVAL;
608 
609 	name = nla_data(info->attrs[VDPA_ATTR_DEV_NAME]);
610 
611 	if (nl_attrs[VDPA_ATTR_DEV_NET_CFG_MACADDR]) {
612 		macaddr = nla_data(nl_attrs[VDPA_ATTR_DEV_NET_CFG_MACADDR]);
613 		memcpy(config.net.mac, macaddr, sizeof(config.net.mac));
614 		config.mask |= BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR);
615 	}
616 	if (nl_attrs[VDPA_ATTR_DEV_NET_CFG_MTU]) {
617 		config.net.mtu =
618 			nla_get_u16(nl_attrs[VDPA_ATTR_DEV_NET_CFG_MTU]);
619 		config.mask |= BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MTU);
620 	}
621 	if (nl_attrs[VDPA_ATTR_DEV_NET_CFG_MAX_VQP]) {
622 		config.net.max_vq_pairs =
623 			nla_get_u16(nl_attrs[VDPA_ATTR_DEV_NET_CFG_MAX_VQP]);
624 		if (!config.net.max_vq_pairs) {
625 			NL_SET_ERR_MSG_MOD(info->extack,
626 					   "At least one pair of VQs is required");
627 			return -EINVAL;
628 		}
629 		config.mask |= BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP);
630 	}
631 	if (nl_attrs[VDPA_ATTR_DEV_FEATURES]) {
632 		u64 missing = 0x0ULL;
633 
634 		config.device_features =
635 			nla_get_u64(nl_attrs[VDPA_ATTR_DEV_FEATURES]);
636 		if (nl_attrs[VDPA_ATTR_DEV_NET_CFG_MACADDR] &&
637 		    !(config.device_features & BIT_ULL(VIRTIO_NET_F_MAC)))
638 			missing |= BIT_ULL(VIRTIO_NET_F_MAC);
639 		if (nl_attrs[VDPA_ATTR_DEV_NET_CFG_MTU] &&
640 		    !(config.device_features & BIT_ULL(VIRTIO_NET_F_MTU)))
641 			missing |= BIT_ULL(VIRTIO_NET_F_MTU);
642 		if (nl_attrs[VDPA_ATTR_DEV_NET_CFG_MAX_VQP] &&
643 		    config.net.max_vq_pairs > 1 &&
644 		    !(config.device_features & BIT_ULL(VIRTIO_NET_F_MQ)))
645 			missing |= BIT_ULL(VIRTIO_NET_F_MQ);
646 		if (missing) {
647 			NL_SET_ERR_MSG_FMT_MOD(info->extack,
648 					       "Missing features 0x%llx for provided attributes",
649 					       missing);
650 			return -EINVAL;
651 		}
652 		config.mask |= BIT_ULL(VDPA_ATTR_DEV_FEATURES);
653 	}
654 
655 	/* Skip checking capability if user didn't prefer to configure any
656 	 * device networking attributes. It is likely that user might have used
657 	 * a device specific method to configure such attributes or using device
658 	 * default attributes.
659 	 */
660 	if ((config.mask & VDPA_DEV_NET_ATTRS_MASK) &&
661 	    !netlink_capable(skb, CAP_NET_ADMIN))
662 		return -EPERM;
663 
664 	down_write(&vdpa_dev_lock);
665 	mdev = vdpa_mgmtdev_get_from_attr(info->attrs);
666 	if (IS_ERR(mdev)) {
667 		NL_SET_ERR_MSG_MOD(info->extack, "Fail to find the specified management device");
668 		err = PTR_ERR(mdev);
669 		goto err;
670 	}
671 
672 	if ((config.mask & mdev->config_attr_mask) != config.mask) {
673 		NL_SET_ERR_MSG_FMT_MOD(info->extack,
674 				       "Some provided attributes are not supported: 0x%llx",
675 				       config.mask & ~mdev->config_attr_mask);
676 		err = -EOPNOTSUPP;
677 		goto err;
678 	}
679 
680 	classes = vdpa_mgmtdev_get_classes(mdev, &ncls);
681 	if (config.mask & VDPA_DEV_NET_ATTRS_MASK &&
682 	    !(classes & BIT_ULL(VIRTIO_ID_NET))) {
683 		NL_SET_ERR_MSG_MOD(info->extack,
684 				   "Network class attributes provided on unsupported management device");
685 		err = -EINVAL;
686 		goto err;
687 	}
688 	if (!(config.mask & VDPA_DEV_NET_ATTRS_MASK) &&
689 	    config.mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES) &&
690 	    classes & BIT_ULL(VIRTIO_ID_NET) && ncls > 1 &&
691 	    config.device_features & VIRTIO_DEVICE_F_MASK) {
692 		NL_SET_ERR_MSG_MOD(info->extack,
693 				   "Management device supports multi-class while device features specified are ambiguous");
694 		err = -EINVAL;
695 		goto err;
696 	}
697 
698 	err = mdev->ops->dev_add(mdev, name, &config);
699 err:
700 	up_write(&vdpa_dev_lock);
701 	return err;
702 }
703 
704 static int vdpa_nl_cmd_dev_del_set_doit(struct sk_buff *skb, struct genl_info *info)
705 {
706 	struct vdpa_mgmt_dev *mdev;
707 	struct vdpa_device *vdev;
708 	struct device *dev;
709 	const char *name;
710 	int err = 0;
711 
712 	if (!info->attrs[VDPA_ATTR_DEV_NAME])
713 		return -EINVAL;
714 	name = nla_data(info->attrs[VDPA_ATTR_DEV_NAME]);
715 
716 	down_write(&vdpa_dev_lock);
717 	dev = bus_find_device(&vdpa_bus, NULL, name, vdpa_name_match);
718 	if (!dev) {
719 		NL_SET_ERR_MSG_MOD(info->extack, "device not found");
720 		err = -ENODEV;
721 		goto dev_err;
722 	}
723 	vdev = container_of(dev, struct vdpa_device, dev);
724 	if (!vdev->mdev) {
725 		NL_SET_ERR_MSG_MOD(info->extack, "Only user created device can be deleted by user");
726 		err = -EINVAL;
727 		goto mdev_err;
728 	}
729 	mdev = vdev->mdev;
730 	mdev->ops->dev_del(mdev, vdev);
731 mdev_err:
732 	put_device(dev);
733 dev_err:
734 	up_write(&vdpa_dev_lock);
735 	return err;
736 }
737 
738 static int
739 vdpa_dev_fill(struct vdpa_device *vdev, struct sk_buff *msg, u32 portid, u32 seq,
740 	      int flags, struct netlink_ext_ack *extack)
741 {
742 	u16 max_vq_size;
743 	u16 min_vq_size = 1;
744 	u32 device_id;
745 	u32 vendor_id;
746 	void *hdr;
747 	int err;
748 
749 	hdr = genlmsg_put(msg, portid, seq, &vdpa_nl_family, flags, VDPA_CMD_DEV_NEW);
750 	if (!hdr)
751 		return -EMSGSIZE;
752 
753 	err = vdpa_nl_mgmtdev_handle_fill(msg, vdev->mdev);
754 	if (err)
755 		goto msg_err;
756 
757 	device_id = vdev->config->get_device_id(vdev);
758 	vendor_id = vdev->config->get_vendor_id(vdev);
759 	max_vq_size = vdev->config->get_vq_num_max(vdev);
760 	if (vdev->config->get_vq_num_min)
761 		min_vq_size = vdev->config->get_vq_num_min(vdev);
762 
763 	err = -EMSGSIZE;
764 	if (nla_put_string(msg, VDPA_ATTR_DEV_NAME, dev_name(&vdev->dev)))
765 		goto msg_err;
766 	if (nla_put_u32(msg, VDPA_ATTR_DEV_ID, device_id))
767 		goto msg_err;
768 	if (nla_put_u32(msg, VDPA_ATTR_DEV_VENDOR_ID, vendor_id))
769 		goto msg_err;
770 	if (nla_put_u32(msg, VDPA_ATTR_DEV_MAX_VQS, vdev->nvqs))
771 		goto msg_err;
772 	if (nla_put_u16(msg, VDPA_ATTR_DEV_MAX_VQ_SIZE, max_vq_size))
773 		goto msg_err;
774 	if (nla_put_u16(msg, VDPA_ATTR_DEV_MIN_VQ_SIZE, min_vq_size))
775 		goto msg_err;
776 
777 	genlmsg_end(msg, hdr);
778 	return 0;
779 
780 msg_err:
781 	genlmsg_cancel(msg, hdr);
782 	return err;
783 }
784 
785 static int vdpa_nl_cmd_dev_get_doit(struct sk_buff *skb, struct genl_info *info)
786 {
787 	struct vdpa_device *vdev;
788 	struct sk_buff *msg;
789 	const char *devname;
790 	struct device *dev;
791 	int err;
792 
793 	if (!info->attrs[VDPA_ATTR_DEV_NAME])
794 		return -EINVAL;
795 	devname = nla_data(info->attrs[VDPA_ATTR_DEV_NAME]);
796 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
797 	if (!msg)
798 		return -ENOMEM;
799 
800 	down_read(&vdpa_dev_lock);
801 	dev = bus_find_device(&vdpa_bus, NULL, devname, vdpa_name_match);
802 	if (!dev) {
803 		NL_SET_ERR_MSG_MOD(info->extack, "device not found");
804 		err = -ENODEV;
805 		goto err;
806 	}
807 	vdev = container_of(dev, struct vdpa_device, dev);
808 	if (!vdev->mdev) {
809 		err = -EINVAL;
810 		goto mdev_err;
811 	}
812 	err = vdpa_dev_fill(vdev, msg, info->snd_portid, info->snd_seq, 0, info->extack);
813 	if (err)
814 		goto mdev_err;
815 
816 	err = genlmsg_reply(msg, info);
817 	put_device(dev);
818 	up_read(&vdpa_dev_lock);
819 	return err;
820 
821 mdev_err:
822 	put_device(dev);
823 err:
824 	up_read(&vdpa_dev_lock);
825 	nlmsg_free(msg);
826 	return err;
827 }
828 
829 struct vdpa_dev_dump_info {
830 	struct sk_buff *msg;
831 	struct netlink_callback *cb;
832 	int start_idx;
833 	int idx;
834 };
835 
836 static int vdpa_dev_dump(struct device *dev, void *data)
837 {
838 	struct vdpa_device *vdev = container_of(dev, struct vdpa_device, dev);
839 	struct vdpa_dev_dump_info *info = data;
840 	int err;
841 
842 	if (!vdev->mdev)
843 		return 0;
844 	if (info->idx < info->start_idx) {
845 		info->idx++;
846 		return 0;
847 	}
848 	err = vdpa_dev_fill(vdev, info->msg, NETLINK_CB(info->cb->skb).portid,
849 			    info->cb->nlh->nlmsg_seq, NLM_F_MULTI, info->cb->extack);
850 	if (err)
851 		return err;
852 
853 	info->idx++;
854 	return 0;
855 }
856 
857 static int vdpa_nl_cmd_dev_get_dumpit(struct sk_buff *msg, struct netlink_callback *cb)
858 {
859 	struct vdpa_dev_dump_info info;
860 
861 	info.msg = msg;
862 	info.cb = cb;
863 	info.start_idx = cb->args[0];
864 	info.idx = 0;
865 
866 	down_read(&vdpa_dev_lock);
867 	bus_for_each_dev(&vdpa_bus, NULL, &info, vdpa_dev_dump);
868 	up_read(&vdpa_dev_lock);
869 	cb->args[0] = info.idx;
870 	return msg->len;
871 }
872 
873 static int vdpa_dev_net_mq_config_fill(struct sk_buff *msg, u64 features,
874 				       const struct virtio_net_config *config)
875 {
876 	u16 val_u16;
877 
878 	if ((features & BIT_ULL(VIRTIO_NET_F_MQ)) == 0 &&
879 	    (features & BIT_ULL(VIRTIO_NET_F_RSS)) == 0)
880 		return 0;
881 
882 	val_u16 = __virtio16_to_cpu(true, config->max_virtqueue_pairs);
883 
884 	return nla_put_u16(msg, VDPA_ATTR_DEV_NET_CFG_MAX_VQP, val_u16);
885 }
886 
887 static int vdpa_dev_net_mtu_config_fill(struct sk_buff *msg, u64 features,
888 					const struct virtio_net_config *config)
889 {
890 	u16 val_u16;
891 
892 	if ((features & BIT_ULL(VIRTIO_NET_F_MTU)) == 0)
893 		return 0;
894 
895 	val_u16 = __virtio16_to_cpu(true, config->mtu);
896 
897 	return nla_put_u16(msg, VDPA_ATTR_DEV_NET_CFG_MTU, val_u16);
898 }
899 
900 static int vdpa_dev_net_mac_config_fill(struct sk_buff *msg, u64 features,
901 					const struct virtio_net_config *config)
902 {
903 	if ((features & BIT_ULL(VIRTIO_NET_F_MAC)) == 0)
904 		return 0;
905 
906 	return  nla_put(msg, VDPA_ATTR_DEV_NET_CFG_MACADDR,
907 			sizeof(config->mac), config->mac);
908 }
909 
910 static int vdpa_dev_net_status_config_fill(struct sk_buff *msg, u64 features,
911 					   const struct virtio_net_config *config)
912 {
913 	u16 val_u16;
914 
915 	if ((features & BIT_ULL(VIRTIO_NET_F_STATUS)) == 0)
916 		return 0;
917 
918 	val_u16 = __virtio16_to_cpu(true, config->status);
919 	return nla_put_u16(msg, VDPA_ATTR_DEV_NET_STATUS, val_u16);
920 }
921 
922 static int vdpa_dev_net_config_fill(struct vdpa_device *vdev, struct sk_buff *msg)
923 {
924 	struct virtio_net_config config = {};
925 	u64 features_device;
926 
927 	vdev->config->get_config(vdev, 0, &config, sizeof(config));
928 
929 	features_device = vdev->config->get_device_features(vdev);
930 
931 	if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_FEATURES, features_device,
932 			      VDPA_ATTR_PAD))
933 		return -EMSGSIZE;
934 
935 	if (vdpa_dev_net_mtu_config_fill(msg, features_device, &config))
936 		return -EMSGSIZE;
937 
938 	if (vdpa_dev_net_mac_config_fill(msg, features_device, &config))
939 		return -EMSGSIZE;
940 
941 	if (vdpa_dev_net_status_config_fill(msg, features_device, &config))
942 		return -EMSGSIZE;
943 
944 	return vdpa_dev_net_mq_config_fill(msg, features_device, &config);
945 }
946 
947 static int
948 vdpa_dev_blk_capacity_config_fill(struct sk_buff *msg,
949 				  const struct virtio_blk_config *config)
950 {
951 	u64 val_u64;
952 
953 	val_u64 = __virtio64_to_cpu(true, config->capacity);
954 
955 	return nla_put_u64_64bit(msg, VDPA_ATTR_DEV_BLK_CFG_CAPACITY,
956 				 val_u64, VDPA_ATTR_PAD);
957 }
958 
959 static int
960 vdpa_dev_blk_seg_size_config_fill(struct sk_buff *msg, u64 features,
961 				  const struct virtio_blk_config *config)
962 {
963 	u32 val_u32;
964 
965 	if ((features & BIT_ULL(VIRTIO_BLK_F_SIZE_MAX)) == 0)
966 		return 0;
967 
968 	val_u32 = __virtio32_to_cpu(true, config->size_max);
969 
970 	return nla_put_u32(msg, VDPA_ATTR_DEV_BLK_CFG_SIZE_MAX, val_u32);
971 }
972 
973 /* fill the block size*/
974 static int
975 vdpa_dev_blk_block_size_config_fill(struct sk_buff *msg, u64 features,
976 				    const struct virtio_blk_config *config)
977 {
978 	u32 val_u32;
979 
980 	if ((features & BIT_ULL(VIRTIO_BLK_F_BLK_SIZE)) == 0)
981 		return 0;
982 
983 	val_u32 = __virtio32_to_cpu(true, config->blk_size);
984 
985 	return nla_put_u32(msg, VDPA_ATTR_DEV_BLK_CFG_BLK_SIZE, val_u32);
986 }
987 
988 static int
989 vdpa_dev_blk_seg_max_config_fill(struct sk_buff *msg, u64 features,
990 				 const struct virtio_blk_config *config)
991 {
992 	u32 val_u32;
993 
994 	if ((features & BIT_ULL(VIRTIO_BLK_F_SEG_MAX)) == 0)
995 		return 0;
996 
997 	val_u32 = __virtio32_to_cpu(true, config->seg_max);
998 
999 	return nla_put_u32(msg, VDPA_ATTR_DEV_BLK_CFG_SEG_MAX, val_u32);
1000 }
1001 
1002 static int vdpa_dev_blk_mq_config_fill(struct sk_buff *msg, u64 features,
1003 				       const struct virtio_blk_config *config)
1004 {
1005 	u16 val_u16;
1006 
1007 	if ((features & BIT_ULL(VIRTIO_BLK_F_MQ)) == 0)
1008 		return 0;
1009 
1010 	val_u16 = __virtio16_to_cpu(true, config->num_queues);
1011 
1012 	return nla_put_u16(msg, VDPA_ATTR_DEV_BLK_CFG_NUM_QUEUES, val_u16);
1013 }
1014 
1015 static int vdpa_dev_blk_topology_config_fill(struct sk_buff *msg, u64 features,
1016 				       const struct virtio_blk_config *config)
1017 {
1018 	u16 min_io_size;
1019 	u32 opt_io_size;
1020 
1021 	if ((features & BIT_ULL(VIRTIO_BLK_F_TOPOLOGY)) == 0)
1022 		return 0;
1023 
1024 	min_io_size = __virtio16_to_cpu(true, config->min_io_size);
1025 	opt_io_size = __virtio32_to_cpu(true, config->opt_io_size);
1026 
1027 	if (nla_put_u8(msg, VDPA_ATTR_DEV_BLK_CFG_PHY_BLK_EXP,
1028 	    config->physical_block_exp))
1029 		return -EMSGSIZE;
1030 
1031 	if (nla_put_u8(msg, VDPA_ATTR_DEV_BLK_CFG_ALIGN_OFFSET,
1032 	    config->alignment_offset))
1033 		return -EMSGSIZE;
1034 
1035 	if (nla_put_u16(msg, VDPA_ATTR_DEV_BLK_CFG_MIN_IO_SIZE, min_io_size))
1036 		return -EMSGSIZE;
1037 
1038 	if (nla_put_u32(msg, VDPA_ATTR_DEV_BLK_CFG_OPT_IO_SIZE, opt_io_size))
1039 		return -EMSGSIZE;
1040 
1041 	return 0;
1042 }
1043 
1044 static int vdpa_dev_blk_discard_config_fill(struct sk_buff *msg, u64 features,
1045 				       const struct virtio_blk_config *config)
1046 {
1047 	u32 val_u32;
1048 
1049 	if ((features & BIT_ULL(VIRTIO_BLK_F_DISCARD)) == 0)
1050 		return 0;
1051 
1052 	val_u32 = __virtio32_to_cpu(true, config->max_discard_sectors);
1053 	if (nla_put_u32(msg, VDPA_ATTR_DEV_BLK_CFG_MAX_DISCARD_SEC, val_u32))
1054 		return -EMSGSIZE;
1055 
1056 	val_u32 = __virtio32_to_cpu(true, config->max_discard_seg);
1057 	if (nla_put_u32(msg, VDPA_ATTR_DEV_BLK_CFG_MAX_DISCARD_SEG, val_u32))
1058 		return -EMSGSIZE;
1059 
1060 	val_u32 = __virtio32_to_cpu(true, config->discard_sector_alignment);
1061 	if (nla_put_u32(msg, VDPA_ATTR_DEV_BLK_CFG_DISCARD_SEC_ALIGN, val_u32))
1062 		return -EMSGSIZE;
1063 
1064 	return 0;
1065 }
1066 
1067 static int
1068 vdpa_dev_blk_write_zeroes_config_fill(struct sk_buff *msg, u64 features,
1069 				     const struct virtio_blk_config *config)
1070 {
1071 	u32 val_u32;
1072 
1073 	if ((features & BIT_ULL(VIRTIO_BLK_F_WRITE_ZEROES)) == 0)
1074 		return 0;
1075 
1076 	val_u32 = __virtio32_to_cpu(true, config->max_write_zeroes_sectors);
1077 	if (nla_put_u32(msg, VDPA_ATTR_DEV_BLK_CFG_MAX_WRITE_ZEROES_SEC, val_u32))
1078 		return -EMSGSIZE;
1079 
1080 	val_u32 = __virtio32_to_cpu(true, config->max_write_zeroes_seg);
1081 	if (nla_put_u32(msg, VDPA_ATTR_DEV_BLK_CFG_MAX_WRITE_ZEROES_SEG, val_u32))
1082 		return -EMSGSIZE;
1083 
1084 	return 0;
1085 }
1086 
1087 static int vdpa_dev_blk_ro_config_fill(struct sk_buff *msg, u64 features)
1088 {
1089 	u8 ro;
1090 
1091 	ro = ((features & BIT_ULL(VIRTIO_BLK_F_RO)) == 0) ? 0 : 1;
1092 	if (nla_put_u8(msg, VDPA_ATTR_DEV_BLK_READ_ONLY, ro))
1093 		return -EMSGSIZE;
1094 
1095 	return 0;
1096 }
1097 
1098 static int vdpa_dev_blk_flush_config_fill(struct sk_buff *msg, u64 features)
1099 {
1100 	u8 flush;
1101 
1102 	flush = ((features & BIT_ULL(VIRTIO_BLK_F_FLUSH)) == 0) ? 0 : 1;
1103 	if (nla_put_u8(msg, VDPA_ATTR_DEV_BLK_FLUSH, flush))
1104 		return -EMSGSIZE;
1105 
1106 	return 0;
1107 }
1108 
1109 static int vdpa_dev_blk_config_fill(struct vdpa_device *vdev,
1110 				    struct sk_buff *msg)
1111 {
1112 	struct virtio_blk_config config = {};
1113 	u64 features_device;
1114 
1115 	vdev->config->get_config(vdev, 0, &config, sizeof(config));
1116 
1117 	features_device = vdev->config->get_device_features(vdev);
1118 
1119 	if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_FEATURES, features_device,
1120 			      VDPA_ATTR_PAD))
1121 		return -EMSGSIZE;
1122 
1123 	if (vdpa_dev_blk_capacity_config_fill(msg, &config))
1124 		return -EMSGSIZE;
1125 
1126 	if (vdpa_dev_blk_seg_size_config_fill(msg, features_device, &config))
1127 		return -EMSGSIZE;
1128 
1129 	if (vdpa_dev_blk_block_size_config_fill(msg, features_device, &config))
1130 		return -EMSGSIZE;
1131 
1132 	if (vdpa_dev_blk_seg_max_config_fill(msg, features_device, &config))
1133 		return -EMSGSIZE;
1134 
1135 	if (vdpa_dev_blk_mq_config_fill(msg, features_device, &config))
1136 		return -EMSGSIZE;
1137 
1138 	if (vdpa_dev_blk_topology_config_fill(msg, features_device, &config))
1139 		return -EMSGSIZE;
1140 
1141 	if (vdpa_dev_blk_discard_config_fill(msg, features_device, &config))
1142 		return -EMSGSIZE;
1143 
1144 	if (vdpa_dev_blk_write_zeroes_config_fill(msg, features_device, &config))
1145 		return -EMSGSIZE;
1146 
1147 	if (vdpa_dev_blk_ro_config_fill(msg, features_device))
1148 		return -EMSGSIZE;
1149 
1150 	if (vdpa_dev_blk_flush_config_fill(msg, features_device))
1151 		return -EMSGSIZE;
1152 
1153 	return 0;
1154 }
1155 
1156 static int
1157 vdpa_dev_config_fill(struct vdpa_device *vdev, struct sk_buff *msg, u32 portid, u32 seq,
1158 		     int flags, struct netlink_ext_ack *extack)
1159 {
1160 	u64 features_driver;
1161 	u8 status = 0;
1162 	u32 device_id;
1163 	void *hdr;
1164 	int err;
1165 
1166 	down_read(&vdev->cf_lock);
1167 	hdr = genlmsg_put(msg, portid, seq, &vdpa_nl_family, flags,
1168 			  VDPA_CMD_DEV_CONFIG_GET);
1169 	if (!hdr) {
1170 		err = -EMSGSIZE;
1171 		goto out;
1172 	}
1173 
1174 	if (nla_put_string(msg, VDPA_ATTR_DEV_NAME, dev_name(&vdev->dev))) {
1175 		err = -EMSGSIZE;
1176 		goto msg_err;
1177 	}
1178 
1179 	device_id = vdev->config->get_device_id(vdev);
1180 	if (nla_put_u32(msg, VDPA_ATTR_DEV_ID, device_id)) {
1181 		err = -EMSGSIZE;
1182 		goto msg_err;
1183 	}
1184 
1185 	/* only read driver features after the feature negotiation is done */
1186 	status = vdev->config->get_status(vdev);
1187 	if (status & VIRTIO_CONFIG_S_FEATURES_OK) {
1188 		features_driver = vdev->config->get_driver_features(vdev);
1189 		if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_NEGOTIATED_FEATURES, features_driver,
1190 				      VDPA_ATTR_PAD)) {
1191 			err = -EMSGSIZE;
1192 			goto msg_err;
1193 		}
1194 	}
1195 
1196 	switch (device_id) {
1197 	case VIRTIO_ID_NET:
1198 		err = vdpa_dev_net_config_fill(vdev, msg);
1199 		break;
1200 	case VIRTIO_ID_BLOCK:
1201 		err = vdpa_dev_blk_config_fill(vdev, msg);
1202 		break;
1203 	default:
1204 		err = -EOPNOTSUPP;
1205 		break;
1206 	}
1207 	if (err)
1208 		goto msg_err;
1209 
1210 	up_read(&vdev->cf_lock);
1211 	genlmsg_end(msg, hdr);
1212 	return 0;
1213 
1214 msg_err:
1215 	genlmsg_cancel(msg, hdr);
1216 out:
1217 	up_read(&vdev->cf_lock);
1218 	return err;
1219 }
1220 
1221 static int vdpa_fill_stats_rec(struct vdpa_device *vdev, struct sk_buff *msg,
1222 			       struct genl_info *info, u32 index)
1223 {
1224 	struct virtio_net_config config = {};
1225 	u64 features;
1226 	u8 status;
1227 	int err;
1228 
1229 	status = vdev->config->get_status(vdev);
1230 	if (!(status & VIRTIO_CONFIG_S_FEATURES_OK)) {
1231 		NL_SET_ERR_MSG_MOD(info->extack, "feature negotiation not complete");
1232 		return -EAGAIN;
1233 	}
1234 	vdpa_get_config_unlocked(vdev, 0, &config, sizeof(config));
1235 
1236 	features = vdev->config->get_driver_features(vdev);
1237 	if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_NEGOTIATED_FEATURES,
1238 			      features, VDPA_ATTR_PAD))
1239 		return -EMSGSIZE;
1240 
1241 	err = vdpa_dev_net_mq_config_fill(msg, features, &config);
1242 	if (err)
1243 		return err;
1244 
1245 	if (nla_put_u32(msg, VDPA_ATTR_DEV_QUEUE_INDEX, index))
1246 		return -EMSGSIZE;
1247 
1248 	err = vdev->config->get_vendor_vq_stats(vdev, index, msg, info->extack);
1249 	if (err)
1250 		return err;
1251 
1252 	return 0;
1253 }
1254 
1255 static int vendor_stats_fill(struct vdpa_device *vdev, struct sk_buff *msg,
1256 			     struct genl_info *info, u32 index)
1257 {
1258 	int err;
1259 
1260 	down_read(&vdev->cf_lock);
1261 	if (!vdev->config->get_vendor_vq_stats) {
1262 		err = -EOPNOTSUPP;
1263 		goto out;
1264 	}
1265 
1266 	err = vdpa_fill_stats_rec(vdev, msg, info, index);
1267 out:
1268 	up_read(&vdev->cf_lock);
1269 	return err;
1270 }
1271 
1272 static int vdpa_dev_vendor_stats_fill(struct vdpa_device *vdev,
1273 				      struct sk_buff *msg,
1274 				      struct genl_info *info, u32 index)
1275 {
1276 	u32 device_id;
1277 	void *hdr;
1278 	int err;
1279 	u32 portid = info->snd_portid;
1280 	u32 seq = info->snd_seq;
1281 	u32 flags = 0;
1282 
1283 	hdr = genlmsg_put(msg, portid, seq, &vdpa_nl_family, flags,
1284 			  VDPA_CMD_DEV_VSTATS_GET);
1285 	if (!hdr)
1286 		return -EMSGSIZE;
1287 
1288 	if (nla_put_string(msg, VDPA_ATTR_DEV_NAME, dev_name(&vdev->dev))) {
1289 		err = -EMSGSIZE;
1290 		goto undo_msg;
1291 	}
1292 
1293 	device_id = vdev->config->get_device_id(vdev);
1294 	if (nla_put_u32(msg, VDPA_ATTR_DEV_ID, device_id)) {
1295 		err = -EMSGSIZE;
1296 		goto undo_msg;
1297 	}
1298 
1299 	switch (device_id) {
1300 	case VIRTIO_ID_NET:
1301 		if (index > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX) {
1302 			NL_SET_ERR_MSG_MOD(info->extack, "queue index exceeds max value");
1303 			err = -ERANGE;
1304 			break;
1305 		}
1306 
1307 		err = vendor_stats_fill(vdev, msg, info, index);
1308 		break;
1309 	default:
1310 		err = -EOPNOTSUPP;
1311 		break;
1312 	}
1313 	genlmsg_end(msg, hdr);
1314 
1315 	return err;
1316 
1317 undo_msg:
1318 	genlmsg_cancel(msg, hdr);
1319 	return err;
1320 }
1321 
1322 static int vdpa_nl_cmd_dev_config_get_doit(struct sk_buff *skb, struct genl_info *info)
1323 {
1324 	struct vdpa_device *vdev;
1325 	struct sk_buff *msg;
1326 	const char *devname;
1327 	struct device *dev;
1328 	int err;
1329 
1330 	if (!info->attrs[VDPA_ATTR_DEV_NAME])
1331 		return -EINVAL;
1332 	devname = nla_data(info->attrs[VDPA_ATTR_DEV_NAME]);
1333 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1334 	if (!msg)
1335 		return -ENOMEM;
1336 
1337 	down_read(&vdpa_dev_lock);
1338 	dev = bus_find_device(&vdpa_bus, NULL, devname, vdpa_name_match);
1339 	if (!dev) {
1340 		NL_SET_ERR_MSG_MOD(info->extack, "device not found");
1341 		err = -ENODEV;
1342 		goto dev_err;
1343 	}
1344 	vdev = container_of(dev, struct vdpa_device, dev);
1345 	if (!vdev->mdev) {
1346 		NL_SET_ERR_MSG_MOD(info->extack, "unmanaged vdpa device");
1347 		err = -EINVAL;
1348 		goto mdev_err;
1349 	}
1350 	err = vdpa_dev_config_fill(vdev, msg, info->snd_portid, info->snd_seq,
1351 				   0, info->extack);
1352 	if (!err)
1353 		err = genlmsg_reply(msg, info);
1354 
1355 mdev_err:
1356 	put_device(dev);
1357 dev_err:
1358 	up_read(&vdpa_dev_lock);
1359 	if (err)
1360 		nlmsg_free(msg);
1361 	return err;
1362 }
1363 
1364 static int vdpa_dev_config_dump(struct device *dev, void *data)
1365 {
1366 	struct vdpa_device *vdev = container_of(dev, struct vdpa_device, dev);
1367 	struct vdpa_dev_dump_info *info = data;
1368 	int err;
1369 
1370 	if (!vdev->mdev)
1371 		return 0;
1372 	if (info->idx < info->start_idx) {
1373 		info->idx++;
1374 		return 0;
1375 	}
1376 	err = vdpa_dev_config_fill(vdev, info->msg, NETLINK_CB(info->cb->skb).portid,
1377 				   info->cb->nlh->nlmsg_seq, NLM_F_MULTI,
1378 				   info->cb->extack);
1379 	if (err)
1380 		return err;
1381 
1382 	info->idx++;
1383 	return 0;
1384 }
1385 
1386 static int
1387 vdpa_nl_cmd_dev_config_get_dumpit(struct sk_buff *msg, struct netlink_callback *cb)
1388 {
1389 	struct vdpa_dev_dump_info info;
1390 
1391 	info.msg = msg;
1392 	info.cb = cb;
1393 	info.start_idx = cb->args[0];
1394 	info.idx = 0;
1395 
1396 	down_read(&vdpa_dev_lock);
1397 	bus_for_each_dev(&vdpa_bus, NULL, &info, vdpa_dev_config_dump);
1398 	up_read(&vdpa_dev_lock);
1399 	cb->args[0] = info.idx;
1400 	return msg->len;
1401 }
1402 
1403 static int vdpa_nl_cmd_dev_stats_get_doit(struct sk_buff *skb,
1404 					  struct genl_info *info)
1405 {
1406 	struct vdpa_device *vdev;
1407 	struct sk_buff *msg;
1408 	const char *devname;
1409 	struct device *dev;
1410 	u32 index;
1411 	int err;
1412 
1413 	if (!info->attrs[VDPA_ATTR_DEV_NAME])
1414 		return -EINVAL;
1415 
1416 	if (!info->attrs[VDPA_ATTR_DEV_QUEUE_INDEX])
1417 		return -EINVAL;
1418 
1419 	devname = nla_data(info->attrs[VDPA_ATTR_DEV_NAME]);
1420 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1421 	if (!msg)
1422 		return -ENOMEM;
1423 
1424 	index = nla_get_u32(info->attrs[VDPA_ATTR_DEV_QUEUE_INDEX]);
1425 	down_read(&vdpa_dev_lock);
1426 	dev = bus_find_device(&vdpa_bus, NULL, devname, vdpa_name_match);
1427 	if (!dev) {
1428 		NL_SET_ERR_MSG_MOD(info->extack, "device not found");
1429 		err = -ENODEV;
1430 		goto dev_err;
1431 	}
1432 	vdev = container_of(dev, struct vdpa_device, dev);
1433 	if (!vdev->mdev) {
1434 		NL_SET_ERR_MSG_MOD(info->extack, "unmanaged vdpa device");
1435 		err = -EINVAL;
1436 		goto mdev_err;
1437 	}
1438 	err = vdpa_dev_vendor_stats_fill(vdev, msg, info, index);
1439 	if (err)
1440 		goto mdev_err;
1441 
1442 	err = genlmsg_reply(msg, info);
1443 
1444 	put_device(dev);
1445 	up_read(&vdpa_dev_lock);
1446 
1447 	return err;
1448 
1449 mdev_err:
1450 	put_device(dev);
1451 dev_err:
1452 	nlmsg_free(msg);
1453 	up_read(&vdpa_dev_lock);
1454 	return err;
1455 }
1456 
1457 static const struct nla_policy vdpa_nl_policy[VDPA_ATTR_MAX + 1] = {
1458 	[VDPA_ATTR_MGMTDEV_BUS_NAME] = { .type = NLA_NUL_STRING },
1459 	[VDPA_ATTR_MGMTDEV_DEV_NAME] = { .type = NLA_STRING },
1460 	[VDPA_ATTR_DEV_NAME] = { .type = NLA_STRING },
1461 	[VDPA_ATTR_DEV_NET_CFG_MACADDR] = NLA_POLICY_ETH_ADDR,
1462 	[VDPA_ATTR_DEV_NET_CFG_MAX_VQP] = { .type = NLA_U16 },
1463 	/* virtio spec 1.1 section 5.1.4.1 for valid MTU range */
1464 	[VDPA_ATTR_DEV_NET_CFG_MTU] = NLA_POLICY_MIN(NLA_U16, 68),
1465 	[VDPA_ATTR_DEV_QUEUE_INDEX] = { .type = NLA_U32 },
1466 	[VDPA_ATTR_DEV_FEATURES] = { .type = NLA_U64 },
1467 };
1468 
1469 static const struct genl_ops vdpa_nl_ops[] = {
1470 	{
1471 		.cmd = VDPA_CMD_MGMTDEV_GET,
1472 		.doit = vdpa_nl_cmd_mgmtdev_get_doit,
1473 		.dumpit = vdpa_nl_cmd_mgmtdev_get_dumpit,
1474 	},
1475 	{
1476 		.cmd = VDPA_CMD_DEV_NEW,
1477 		.doit = vdpa_nl_cmd_dev_add_set_doit,
1478 		.flags = GENL_ADMIN_PERM,
1479 	},
1480 	{
1481 		.cmd = VDPA_CMD_DEV_DEL,
1482 		.doit = vdpa_nl_cmd_dev_del_set_doit,
1483 		.flags = GENL_ADMIN_PERM,
1484 	},
1485 	{
1486 		.cmd = VDPA_CMD_DEV_GET,
1487 		.doit = vdpa_nl_cmd_dev_get_doit,
1488 		.dumpit = vdpa_nl_cmd_dev_get_dumpit,
1489 	},
1490 	{
1491 		.cmd = VDPA_CMD_DEV_CONFIG_GET,
1492 		.doit = vdpa_nl_cmd_dev_config_get_doit,
1493 		.dumpit = vdpa_nl_cmd_dev_config_get_dumpit,
1494 	},
1495 	{
1496 		.cmd = VDPA_CMD_DEV_VSTATS_GET,
1497 		.doit = vdpa_nl_cmd_dev_stats_get_doit,
1498 		.flags = GENL_ADMIN_PERM,
1499 	},
1500 };
1501 
1502 static struct genl_family vdpa_nl_family __ro_after_init = {
1503 	.name = VDPA_GENL_NAME,
1504 	.version = VDPA_GENL_VERSION,
1505 	.maxattr = VDPA_ATTR_MAX,
1506 	.policy = vdpa_nl_policy,
1507 	.netnsok = false,
1508 	.module = THIS_MODULE,
1509 	.ops = vdpa_nl_ops,
1510 	.n_ops = ARRAY_SIZE(vdpa_nl_ops),
1511 	.resv_start_op = VDPA_CMD_DEV_VSTATS_GET + 1,
1512 };
1513 
1514 static int vdpa_init(void)
1515 {
1516 	int err;
1517 
1518 	err = bus_register(&vdpa_bus);
1519 	if (err)
1520 		return err;
1521 	err = genl_register_family(&vdpa_nl_family);
1522 	if (err)
1523 		goto err;
1524 	return 0;
1525 
1526 err:
1527 	bus_unregister(&vdpa_bus);
1528 	return err;
1529 }
1530 
1531 static void __exit vdpa_exit(void)
1532 {
1533 	genl_unregister_family(&vdpa_nl_family);
1534 	bus_unregister(&vdpa_bus);
1535 	ida_destroy(&vdpa_index_ida);
1536 }
1537 core_initcall(vdpa_init);
1538 module_exit(vdpa_exit);
1539 
1540 MODULE_AUTHOR("Jason Wang <jasowang@redhat.com>");
1541 MODULE_DESCRIPTION("vDPA bus");
1542 MODULE_LICENSE("GPL v2");
1543