xref: /linux/drivers/pci/iov.c (revision 0889d07f3e4b171c453b2aaf2b257f9074cdf624)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * PCI Express I/O Virtualization (IOV) support
4  *   Single Root IOV 1.0
5  *   Address Translation Service 1.0
6  *
7  * Copyright (C) 2009 Intel Corporation, Yu Zhao <yu.zhao@intel.com>
8  */
9 
10 #include <linux/pci.h>
11 #include <linux/slab.h>
12 #include <linux/mutex.h>
13 #include <linux/export.h>
14 #include <linux/string.h>
15 #include <linux/delay.h>
16 #include "pci.h"
17 
18 #define VIRTFN_ID_LEN	16
19 
20 int pci_iov_virtfn_bus(struct pci_dev *dev, int vf_id)
21 {
22 	if (!dev->is_physfn)
23 		return -EINVAL;
24 	return dev->bus->number + ((dev->devfn + dev->sriov->offset +
25 				    dev->sriov->stride * vf_id) >> 8);
26 }
27 
28 int pci_iov_virtfn_devfn(struct pci_dev *dev, int vf_id)
29 {
30 	if (!dev->is_physfn)
31 		return -EINVAL;
32 	return (dev->devfn + dev->sriov->offset +
33 		dev->sriov->stride * vf_id) & 0xff;
34 }
35 
36 /*
37  * Per SR-IOV spec sec 3.3.10 and 3.3.11, First VF Offset and VF Stride may
38  * change when NumVFs changes.
39  *
40  * Update iov->offset and iov->stride when NumVFs is written.
41  */
42 static inline void pci_iov_set_numvfs(struct pci_dev *dev, int nr_virtfn)
43 {
44 	struct pci_sriov *iov = dev->sriov;
45 
46 	pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, nr_virtfn);
47 	pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_OFFSET, &iov->offset);
48 	pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_STRIDE, &iov->stride);
49 }
50 
51 /*
52  * The PF consumes one bus number.  NumVFs, First VF Offset, and VF Stride
53  * determine how many additional bus numbers will be consumed by VFs.
54  *
55  * Iterate over all valid NumVFs, validate offset and stride, and calculate
56  * the maximum number of bus numbers that could ever be required.
57  */
58 static int compute_max_vf_buses(struct pci_dev *dev)
59 {
60 	struct pci_sriov *iov = dev->sriov;
61 	int nr_virtfn, busnr, rc = 0;
62 
63 	for (nr_virtfn = iov->total_VFs; nr_virtfn; nr_virtfn--) {
64 		pci_iov_set_numvfs(dev, nr_virtfn);
65 		if (!iov->offset || (nr_virtfn > 1 && !iov->stride)) {
66 			rc = -EIO;
67 			goto out;
68 		}
69 
70 		busnr = pci_iov_virtfn_bus(dev, nr_virtfn - 1);
71 		if (busnr > iov->max_VF_buses)
72 			iov->max_VF_buses = busnr;
73 	}
74 
75 out:
76 	pci_iov_set_numvfs(dev, 0);
77 	return rc;
78 }
79 
80 static struct pci_bus *virtfn_add_bus(struct pci_bus *bus, int busnr)
81 {
82 	struct pci_bus *child;
83 
84 	if (bus->number == busnr)
85 		return bus;
86 
87 	child = pci_find_bus(pci_domain_nr(bus), busnr);
88 	if (child)
89 		return child;
90 
91 	child = pci_add_new_bus(bus, NULL, busnr);
92 	if (!child)
93 		return NULL;
94 
95 	pci_bus_insert_busn_res(child, busnr, busnr);
96 
97 	return child;
98 }
99 
100 static void virtfn_remove_bus(struct pci_bus *physbus, struct pci_bus *virtbus)
101 {
102 	if (physbus != virtbus && list_empty(&virtbus->devices))
103 		pci_remove_bus(virtbus);
104 }
105 
106 resource_size_t pci_iov_resource_size(struct pci_dev *dev, int resno)
107 {
108 	if (!dev->is_physfn)
109 		return 0;
110 
111 	return dev->sriov->barsz[resno - PCI_IOV_RESOURCES];
112 }
113 
114 static void pci_read_vf_config_common(struct pci_dev *virtfn)
115 {
116 	struct pci_dev *physfn = virtfn->physfn;
117 
118 	/*
119 	 * Some config registers are the same across all associated VFs.
120 	 * Read them once from VF0 so we can skip reading them from the
121 	 * other VFs.
122 	 *
123 	 * PCIe r4.0, sec 9.3.4.1, technically doesn't require all VFs to
124 	 * have the same Revision ID and Subsystem ID, but we assume they
125 	 * do.
126 	 */
127 	pci_read_config_dword(virtfn, PCI_CLASS_REVISION,
128 			      &physfn->sriov->class);
129 	pci_read_config_byte(virtfn, PCI_HEADER_TYPE,
130 			     &physfn->sriov->hdr_type);
131 	pci_read_config_word(virtfn, PCI_SUBSYSTEM_VENDOR_ID,
132 			     &physfn->sriov->subsystem_vendor);
133 	pci_read_config_word(virtfn, PCI_SUBSYSTEM_ID,
134 			     &physfn->sriov->subsystem_device);
135 }
136 
137 int pci_iov_add_virtfn(struct pci_dev *dev, int id)
138 {
139 	int i;
140 	int rc = -ENOMEM;
141 	u64 size;
142 	char buf[VIRTFN_ID_LEN];
143 	struct pci_dev *virtfn;
144 	struct resource *res;
145 	struct pci_sriov *iov = dev->sriov;
146 	struct pci_bus *bus;
147 
148 	bus = virtfn_add_bus(dev->bus, pci_iov_virtfn_bus(dev, id));
149 	if (!bus)
150 		goto failed;
151 
152 	virtfn = pci_alloc_dev(bus);
153 	if (!virtfn)
154 		goto failed0;
155 
156 	virtfn->devfn = pci_iov_virtfn_devfn(dev, id);
157 	virtfn->vendor = dev->vendor;
158 	virtfn->device = iov->vf_device;
159 	virtfn->is_virtfn = 1;
160 	virtfn->physfn = pci_dev_get(dev);
161 
162 	if (id == 0)
163 		pci_read_vf_config_common(virtfn);
164 
165 	rc = pci_setup_device(virtfn);
166 	if (rc)
167 		goto failed1;
168 
169 	virtfn->dev.parent = dev->dev.parent;
170 	virtfn->multifunction = 0;
171 
172 	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
173 		res = &dev->resource[i + PCI_IOV_RESOURCES];
174 		if (!res->parent)
175 			continue;
176 		virtfn->resource[i].name = pci_name(virtfn);
177 		virtfn->resource[i].flags = res->flags;
178 		size = pci_iov_resource_size(dev, i + PCI_IOV_RESOURCES);
179 		virtfn->resource[i].start = res->start + size * id;
180 		virtfn->resource[i].end = virtfn->resource[i].start + size - 1;
181 		rc = request_resource(res, &virtfn->resource[i]);
182 		BUG_ON(rc);
183 	}
184 
185 	pci_device_add(virtfn, virtfn->bus);
186 
187 	sprintf(buf, "virtfn%u", id);
188 	rc = sysfs_create_link(&dev->dev.kobj, &virtfn->dev.kobj, buf);
189 	if (rc)
190 		goto failed2;
191 	rc = sysfs_create_link(&virtfn->dev.kobj, &dev->dev.kobj, "physfn");
192 	if (rc)
193 		goto failed3;
194 
195 	kobject_uevent(&virtfn->dev.kobj, KOBJ_CHANGE);
196 
197 	pci_bus_add_device(virtfn);
198 
199 	return 0;
200 
201 failed3:
202 	sysfs_remove_link(&dev->dev.kobj, buf);
203 failed2:
204 	pci_stop_and_remove_bus_device(virtfn);
205 failed1:
206 	pci_dev_put(dev);
207 failed0:
208 	virtfn_remove_bus(dev->bus, bus);
209 failed:
210 
211 	return rc;
212 }
213 
214 void pci_iov_remove_virtfn(struct pci_dev *dev, int id)
215 {
216 	char buf[VIRTFN_ID_LEN];
217 	struct pci_dev *virtfn;
218 
219 	virtfn = pci_get_domain_bus_and_slot(pci_domain_nr(dev->bus),
220 					     pci_iov_virtfn_bus(dev, id),
221 					     pci_iov_virtfn_devfn(dev, id));
222 	if (!virtfn)
223 		return;
224 
225 	sprintf(buf, "virtfn%u", id);
226 	sysfs_remove_link(&dev->dev.kobj, buf);
227 	/*
228 	 * pci_stop_dev() could have been called for this virtfn already,
229 	 * so the directory for the virtfn may have been removed before.
230 	 * Double check to avoid spurious sysfs warnings.
231 	 */
232 	if (virtfn->dev.kobj.sd)
233 		sysfs_remove_link(&virtfn->dev.kobj, "physfn");
234 
235 	pci_stop_and_remove_bus_device(virtfn);
236 	virtfn_remove_bus(dev->bus, virtfn->bus);
237 
238 	/* balance pci_get_domain_bus_and_slot() */
239 	pci_dev_put(virtfn);
240 	pci_dev_put(dev);
241 }
242 
243 static ssize_t sriov_totalvfs_show(struct device *dev,
244 				   struct device_attribute *attr,
245 				   char *buf)
246 {
247 	struct pci_dev *pdev = to_pci_dev(dev);
248 
249 	return sprintf(buf, "%u\n", pci_sriov_get_totalvfs(pdev));
250 }
251 
252 static ssize_t sriov_numvfs_show(struct device *dev,
253 				 struct device_attribute *attr,
254 				 char *buf)
255 {
256 	struct pci_dev *pdev = to_pci_dev(dev);
257 
258 	return sprintf(buf, "%u\n", pdev->sriov->num_VFs);
259 }
260 
261 /*
262  * num_vfs > 0; number of VFs to enable
263  * num_vfs = 0; disable all VFs
264  *
265  * Note: SRIOV spec does not allow partial VF
266  *	 disable, so it's all or none.
267  */
268 static ssize_t sriov_numvfs_store(struct device *dev,
269 				  struct device_attribute *attr,
270 				  const char *buf, size_t count)
271 {
272 	struct pci_dev *pdev = to_pci_dev(dev);
273 	int ret;
274 	u16 num_vfs;
275 
276 	ret = kstrtou16(buf, 0, &num_vfs);
277 	if (ret < 0)
278 		return ret;
279 
280 	if (num_vfs > pci_sriov_get_totalvfs(pdev))
281 		return -ERANGE;
282 
283 	device_lock(&pdev->dev);
284 
285 	if (num_vfs == pdev->sriov->num_VFs)
286 		goto exit;
287 
288 	/* is PF driver loaded w/callback */
289 	if (!pdev->driver || !pdev->driver->sriov_configure) {
290 		pci_info(pdev, "Driver does not support SRIOV configuration via sysfs\n");
291 		ret = -ENOENT;
292 		goto exit;
293 	}
294 
295 	if (num_vfs == 0) {
296 		/* disable VFs */
297 		ret = pdev->driver->sriov_configure(pdev, 0);
298 		goto exit;
299 	}
300 
301 	/* enable VFs */
302 	if (pdev->sriov->num_VFs) {
303 		pci_warn(pdev, "%d VFs already enabled. Disable before enabling %d VFs\n",
304 			 pdev->sriov->num_VFs, num_vfs);
305 		ret = -EBUSY;
306 		goto exit;
307 	}
308 
309 	ret = pdev->driver->sriov_configure(pdev, num_vfs);
310 	if (ret < 0)
311 		goto exit;
312 
313 	if (ret != num_vfs)
314 		pci_warn(pdev, "%d VFs requested; only %d enabled\n",
315 			 num_vfs, ret);
316 
317 exit:
318 	device_unlock(&pdev->dev);
319 
320 	if (ret < 0)
321 		return ret;
322 
323 	return count;
324 }
325 
326 static ssize_t sriov_offset_show(struct device *dev,
327 				 struct device_attribute *attr,
328 				 char *buf)
329 {
330 	struct pci_dev *pdev = to_pci_dev(dev);
331 
332 	return sprintf(buf, "%u\n", pdev->sriov->offset);
333 }
334 
335 static ssize_t sriov_stride_show(struct device *dev,
336 				 struct device_attribute *attr,
337 				 char *buf)
338 {
339 	struct pci_dev *pdev = to_pci_dev(dev);
340 
341 	return sprintf(buf, "%u\n", pdev->sriov->stride);
342 }
343 
344 static ssize_t sriov_vf_device_show(struct device *dev,
345 				    struct device_attribute *attr,
346 				    char *buf)
347 {
348 	struct pci_dev *pdev = to_pci_dev(dev);
349 
350 	return sprintf(buf, "%x\n", pdev->sriov->vf_device);
351 }
352 
353 static ssize_t sriov_drivers_autoprobe_show(struct device *dev,
354 					    struct device_attribute *attr,
355 					    char *buf)
356 {
357 	struct pci_dev *pdev = to_pci_dev(dev);
358 
359 	return sprintf(buf, "%u\n", pdev->sriov->drivers_autoprobe);
360 }
361 
362 static ssize_t sriov_drivers_autoprobe_store(struct device *dev,
363 					     struct device_attribute *attr,
364 					     const char *buf, size_t count)
365 {
366 	struct pci_dev *pdev = to_pci_dev(dev);
367 	bool drivers_autoprobe;
368 
369 	if (kstrtobool(buf, &drivers_autoprobe) < 0)
370 		return -EINVAL;
371 
372 	pdev->sriov->drivers_autoprobe = drivers_autoprobe;
373 
374 	return count;
375 }
376 
377 static DEVICE_ATTR_RO(sriov_totalvfs);
378 static DEVICE_ATTR_RW(sriov_numvfs);
379 static DEVICE_ATTR_RO(sriov_offset);
380 static DEVICE_ATTR_RO(sriov_stride);
381 static DEVICE_ATTR_RO(sriov_vf_device);
382 static DEVICE_ATTR_RW(sriov_drivers_autoprobe);
383 
384 static struct attribute *sriov_dev_attrs[] = {
385 	&dev_attr_sriov_totalvfs.attr,
386 	&dev_attr_sriov_numvfs.attr,
387 	&dev_attr_sriov_offset.attr,
388 	&dev_attr_sriov_stride.attr,
389 	&dev_attr_sriov_vf_device.attr,
390 	&dev_attr_sriov_drivers_autoprobe.attr,
391 	NULL,
392 };
393 
394 static umode_t sriov_attrs_are_visible(struct kobject *kobj,
395 				       struct attribute *a, int n)
396 {
397 	struct device *dev = kobj_to_dev(kobj);
398 
399 	if (!dev_is_pf(dev))
400 		return 0;
401 
402 	return a->mode;
403 }
404 
405 const struct attribute_group sriov_dev_attr_group = {
406 	.attrs = sriov_dev_attrs,
407 	.is_visible = sriov_attrs_are_visible,
408 };
409 
410 int __weak pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
411 {
412 	return 0;
413 }
414 
415 int __weak pcibios_sriov_disable(struct pci_dev *pdev)
416 {
417 	return 0;
418 }
419 
420 static int sriov_add_vfs(struct pci_dev *dev, u16 num_vfs)
421 {
422 	unsigned int i;
423 	int rc;
424 
425 	if (dev->no_vf_scan)
426 		return 0;
427 
428 	for (i = 0; i < num_vfs; i++) {
429 		rc = pci_iov_add_virtfn(dev, i);
430 		if (rc)
431 			goto failed;
432 	}
433 	return 0;
434 failed:
435 	while (i--)
436 		pci_iov_remove_virtfn(dev, i);
437 
438 	return rc;
439 }
440 
441 static int sriov_enable(struct pci_dev *dev, int nr_virtfn)
442 {
443 	int rc;
444 	int i;
445 	int nres;
446 	u16 initial;
447 	struct resource *res;
448 	struct pci_dev *pdev;
449 	struct pci_sriov *iov = dev->sriov;
450 	int bars = 0;
451 	int bus;
452 
453 	if (!nr_virtfn)
454 		return 0;
455 
456 	if (iov->num_VFs)
457 		return -EINVAL;
458 
459 	pci_read_config_word(dev, iov->pos + PCI_SRIOV_INITIAL_VF, &initial);
460 	if (initial > iov->total_VFs ||
461 	    (!(iov->cap & PCI_SRIOV_CAP_VFM) && (initial != iov->total_VFs)))
462 		return -EIO;
463 
464 	if (nr_virtfn < 0 || nr_virtfn > iov->total_VFs ||
465 	    (!(iov->cap & PCI_SRIOV_CAP_VFM) && (nr_virtfn > initial)))
466 		return -EINVAL;
467 
468 	nres = 0;
469 	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
470 		bars |= (1 << (i + PCI_IOV_RESOURCES));
471 		res = &dev->resource[i + PCI_IOV_RESOURCES];
472 		if (res->parent)
473 			nres++;
474 	}
475 	if (nres != iov->nres) {
476 		pci_err(dev, "not enough MMIO resources for SR-IOV\n");
477 		return -ENOMEM;
478 	}
479 
480 	bus = pci_iov_virtfn_bus(dev, nr_virtfn - 1);
481 	if (bus > dev->bus->busn_res.end) {
482 		pci_err(dev, "can't enable %d VFs (bus %02x out of range of %pR)\n",
483 			nr_virtfn, bus, &dev->bus->busn_res);
484 		return -ENOMEM;
485 	}
486 
487 	if (pci_enable_resources(dev, bars)) {
488 		pci_err(dev, "SR-IOV: IOV BARS not allocated\n");
489 		return -ENOMEM;
490 	}
491 
492 	if (iov->link != dev->devfn) {
493 		pdev = pci_get_slot(dev->bus, iov->link);
494 		if (!pdev)
495 			return -ENODEV;
496 
497 		if (!pdev->is_physfn) {
498 			pci_dev_put(pdev);
499 			return -ENOSYS;
500 		}
501 
502 		rc = sysfs_create_link(&dev->dev.kobj,
503 					&pdev->dev.kobj, "dep_link");
504 		pci_dev_put(pdev);
505 		if (rc)
506 			return rc;
507 	}
508 
509 	iov->initial_VFs = initial;
510 	if (nr_virtfn < initial)
511 		initial = nr_virtfn;
512 
513 	rc = pcibios_sriov_enable(dev, initial);
514 	if (rc) {
515 		pci_err(dev, "failure %d from pcibios_sriov_enable()\n", rc);
516 		goto err_pcibios;
517 	}
518 
519 	pci_iov_set_numvfs(dev, nr_virtfn);
520 	iov->ctrl |= PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE;
521 	pci_cfg_access_lock(dev);
522 	pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);
523 	msleep(100);
524 	pci_cfg_access_unlock(dev);
525 
526 	rc = sriov_add_vfs(dev, initial);
527 	if (rc)
528 		goto err_pcibios;
529 
530 	kobject_uevent(&dev->dev.kobj, KOBJ_CHANGE);
531 	iov->num_VFs = nr_virtfn;
532 
533 	return 0;
534 
535 err_pcibios:
536 	iov->ctrl &= ~(PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE);
537 	pci_cfg_access_lock(dev);
538 	pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);
539 	ssleep(1);
540 	pci_cfg_access_unlock(dev);
541 
542 	pcibios_sriov_disable(dev);
543 
544 	if (iov->link != dev->devfn)
545 		sysfs_remove_link(&dev->dev.kobj, "dep_link");
546 
547 	pci_iov_set_numvfs(dev, 0);
548 	return rc;
549 }
550 
551 static void sriov_del_vfs(struct pci_dev *dev)
552 {
553 	struct pci_sriov *iov = dev->sriov;
554 	int i;
555 
556 	if (dev->no_vf_scan)
557 		return;
558 
559 	for (i = 0; i < iov->num_VFs; i++)
560 		pci_iov_remove_virtfn(dev, i);
561 }
562 
563 static void sriov_disable(struct pci_dev *dev)
564 {
565 	struct pci_sriov *iov = dev->sriov;
566 
567 	if (!iov->num_VFs)
568 		return;
569 
570 	sriov_del_vfs(dev);
571 	iov->ctrl &= ~(PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE);
572 	pci_cfg_access_lock(dev);
573 	pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);
574 	ssleep(1);
575 	pci_cfg_access_unlock(dev);
576 
577 	pcibios_sriov_disable(dev);
578 
579 	if (iov->link != dev->devfn)
580 		sysfs_remove_link(&dev->dev.kobj, "dep_link");
581 
582 	iov->num_VFs = 0;
583 	pci_iov_set_numvfs(dev, 0);
584 }
585 
586 static int sriov_init(struct pci_dev *dev, int pos)
587 {
588 	int i, bar64;
589 	int rc;
590 	int nres;
591 	u32 pgsz;
592 	u16 ctrl, total;
593 	struct pci_sriov *iov;
594 	struct resource *res;
595 	struct pci_dev *pdev;
596 
597 	pci_read_config_word(dev, pos + PCI_SRIOV_CTRL, &ctrl);
598 	if (ctrl & PCI_SRIOV_CTRL_VFE) {
599 		pci_write_config_word(dev, pos + PCI_SRIOV_CTRL, 0);
600 		ssleep(1);
601 	}
602 
603 	ctrl = 0;
604 	list_for_each_entry(pdev, &dev->bus->devices, bus_list)
605 		if (pdev->is_physfn)
606 			goto found;
607 
608 	pdev = NULL;
609 	if (pci_ari_enabled(dev->bus))
610 		ctrl |= PCI_SRIOV_CTRL_ARI;
611 
612 found:
613 	pci_write_config_word(dev, pos + PCI_SRIOV_CTRL, ctrl);
614 
615 	pci_read_config_word(dev, pos + PCI_SRIOV_TOTAL_VF, &total);
616 	if (!total)
617 		return 0;
618 
619 	pci_read_config_dword(dev, pos + PCI_SRIOV_SUP_PGSIZE, &pgsz);
620 	i = PAGE_SHIFT > 12 ? PAGE_SHIFT - 12 : 0;
621 	pgsz &= ~((1 << i) - 1);
622 	if (!pgsz)
623 		return -EIO;
624 
625 	pgsz &= ~(pgsz - 1);
626 	pci_write_config_dword(dev, pos + PCI_SRIOV_SYS_PGSIZE, pgsz);
627 
628 	iov = kzalloc(sizeof(*iov), GFP_KERNEL);
629 	if (!iov)
630 		return -ENOMEM;
631 
632 	nres = 0;
633 	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
634 		res = &dev->resource[i + PCI_IOV_RESOURCES];
635 		/*
636 		 * If it is already FIXED, don't change it, something
637 		 * (perhaps EA or header fixups) wants it this way.
638 		 */
639 		if (res->flags & IORESOURCE_PCI_FIXED)
640 			bar64 = (res->flags & IORESOURCE_MEM_64) ? 1 : 0;
641 		else
642 			bar64 = __pci_read_base(dev, pci_bar_unknown, res,
643 						pos + PCI_SRIOV_BAR + i * 4);
644 		if (!res->flags)
645 			continue;
646 		if (resource_size(res) & (PAGE_SIZE - 1)) {
647 			rc = -EIO;
648 			goto failed;
649 		}
650 		iov->barsz[i] = resource_size(res);
651 		res->end = res->start + resource_size(res) * total - 1;
652 		pci_info(dev, "VF(n) BAR%d space: %pR (contains BAR%d for %d VFs)\n",
653 			 i, res, i, total);
654 		i += bar64;
655 		nres++;
656 	}
657 
658 	iov->pos = pos;
659 	iov->nres = nres;
660 	iov->ctrl = ctrl;
661 	iov->total_VFs = total;
662 	iov->driver_max_VFs = total;
663 	pci_read_config_word(dev, pos + PCI_SRIOV_VF_DID, &iov->vf_device);
664 	iov->pgsz = pgsz;
665 	iov->self = dev;
666 	iov->drivers_autoprobe = true;
667 	pci_read_config_dword(dev, pos + PCI_SRIOV_CAP, &iov->cap);
668 	pci_read_config_byte(dev, pos + PCI_SRIOV_FUNC_LINK, &iov->link);
669 	if (pci_pcie_type(dev) == PCI_EXP_TYPE_RC_END)
670 		iov->link = PCI_DEVFN(PCI_SLOT(dev->devfn), iov->link);
671 
672 	if (pdev)
673 		iov->dev = pci_dev_get(pdev);
674 	else
675 		iov->dev = dev;
676 
677 	dev->sriov = iov;
678 	dev->is_physfn = 1;
679 	rc = compute_max_vf_buses(dev);
680 	if (rc)
681 		goto fail_max_buses;
682 
683 	return 0;
684 
685 fail_max_buses:
686 	dev->sriov = NULL;
687 	dev->is_physfn = 0;
688 failed:
689 	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
690 		res = &dev->resource[i + PCI_IOV_RESOURCES];
691 		res->flags = 0;
692 	}
693 
694 	kfree(iov);
695 	return rc;
696 }
697 
698 static void sriov_release(struct pci_dev *dev)
699 {
700 	BUG_ON(dev->sriov->num_VFs);
701 
702 	if (dev != dev->sriov->dev)
703 		pci_dev_put(dev->sriov->dev);
704 
705 	kfree(dev->sriov);
706 	dev->sriov = NULL;
707 }
708 
709 static void sriov_restore_state(struct pci_dev *dev)
710 {
711 	int i;
712 	u16 ctrl;
713 	struct pci_sriov *iov = dev->sriov;
714 
715 	pci_read_config_word(dev, iov->pos + PCI_SRIOV_CTRL, &ctrl);
716 	if (ctrl & PCI_SRIOV_CTRL_VFE)
717 		return;
718 
719 	/*
720 	 * Restore PCI_SRIOV_CTRL_ARI before pci_iov_set_numvfs() because
721 	 * it reads offset & stride, which depend on PCI_SRIOV_CTRL_ARI.
722 	 */
723 	ctrl &= ~PCI_SRIOV_CTRL_ARI;
724 	ctrl |= iov->ctrl & PCI_SRIOV_CTRL_ARI;
725 	pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, ctrl);
726 
727 	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++)
728 		pci_update_resource(dev, i + PCI_IOV_RESOURCES);
729 
730 	pci_write_config_dword(dev, iov->pos + PCI_SRIOV_SYS_PGSIZE, iov->pgsz);
731 	pci_iov_set_numvfs(dev, iov->num_VFs);
732 	pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);
733 	if (iov->ctrl & PCI_SRIOV_CTRL_VFE)
734 		msleep(100);
735 }
736 
737 /**
738  * pci_iov_init - initialize the IOV capability
739  * @dev: the PCI device
740  *
741  * Returns 0 on success, or negative on failure.
742  */
743 int pci_iov_init(struct pci_dev *dev)
744 {
745 	int pos;
746 
747 	if (!pci_is_pcie(dev))
748 		return -ENODEV;
749 
750 	pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_SRIOV);
751 	if (pos)
752 		return sriov_init(dev, pos);
753 
754 	return -ENODEV;
755 }
756 
757 /**
758  * pci_iov_release - release resources used by the IOV capability
759  * @dev: the PCI device
760  */
761 void pci_iov_release(struct pci_dev *dev)
762 {
763 	if (dev->is_physfn)
764 		sriov_release(dev);
765 }
766 
767 /**
768  * pci_iov_remove - clean up SR-IOV state after PF driver is detached
769  * @dev: the PCI device
770  */
771 void pci_iov_remove(struct pci_dev *dev)
772 {
773 	struct pci_sriov *iov = dev->sriov;
774 
775 	if (!dev->is_physfn)
776 		return;
777 
778 	iov->driver_max_VFs = iov->total_VFs;
779 	if (iov->num_VFs)
780 		pci_warn(dev, "driver left SR-IOV enabled after remove\n");
781 }
782 
783 /**
784  * pci_iov_update_resource - update a VF BAR
785  * @dev: the PCI device
786  * @resno: the resource number
787  *
788  * Update a VF BAR in the SR-IOV capability of a PF.
789  */
790 void pci_iov_update_resource(struct pci_dev *dev, int resno)
791 {
792 	struct pci_sriov *iov = dev->is_physfn ? dev->sriov : NULL;
793 	struct resource *res = dev->resource + resno;
794 	int vf_bar = resno - PCI_IOV_RESOURCES;
795 	struct pci_bus_region region;
796 	u16 cmd;
797 	u32 new;
798 	int reg;
799 
800 	/*
801 	 * The generic pci_restore_bars() path calls this for all devices,
802 	 * including VFs and non-SR-IOV devices.  If this is not a PF, we
803 	 * have nothing to do.
804 	 */
805 	if (!iov)
806 		return;
807 
808 	pci_read_config_word(dev, iov->pos + PCI_SRIOV_CTRL, &cmd);
809 	if ((cmd & PCI_SRIOV_CTRL_VFE) && (cmd & PCI_SRIOV_CTRL_MSE)) {
810 		dev_WARN(&dev->dev, "can't update enabled VF BAR%d %pR\n",
811 			 vf_bar, res);
812 		return;
813 	}
814 
815 	/*
816 	 * Ignore unimplemented BARs, unused resource slots for 64-bit
817 	 * BARs, and non-movable resources, e.g., those described via
818 	 * Enhanced Allocation.
819 	 */
820 	if (!res->flags)
821 		return;
822 
823 	if (res->flags & IORESOURCE_UNSET)
824 		return;
825 
826 	if (res->flags & IORESOURCE_PCI_FIXED)
827 		return;
828 
829 	pcibios_resource_to_bus(dev->bus, &region, res);
830 	new = region.start;
831 	new |= res->flags & ~PCI_BASE_ADDRESS_MEM_MASK;
832 
833 	reg = iov->pos + PCI_SRIOV_BAR + 4 * vf_bar;
834 	pci_write_config_dword(dev, reg, new);
835 	if (res->flags & IORESOURCE_MEM_64) {
836 		new = region.start >> 16 >> 16;
837 		pci_write_config_dword(dev, reg + 4, new);
838 	}
839 }
840 
841 resource_size_t __weak pcibios_iov_resource_alignment(struct pci_dev *dev,
842 						      int resno)
843 {
844 	return pci_iov_resource_size(dev, resno);
845 }
846 
847 /**
848  * pci_sriov_resource_alignment - get resource alignment for VF BAR
849  * @dev: the PCI device
850  * @resno: the resource number
851  *
852  * Returns the alignment of the VF BAR found in the SR-IOV capability.
853  * This is not the same as the resource size which is defined as
854  * the VF BAR size multiplied by the number of VFs.  The alignment
855  * is just the VF BAR size.
856  */
857 resource_size_t pci_sriov_resource_alignment(struct pci_dev *dev, int resno)
858 {
859 	return pcibios_iov_resource_alignment(dev, resno);
860 }
861 
862 /**
863  * pci_restore_iov_state - restore the state of the IOV capability
864  * @dev: the PCI device
865  */
866 void pci_restore_iov_state(struct pci_dev *dev)
867 {
868 	if (dev->is_physfn)
869 		sriov_restore_state(dev);
870 }
871 
872 /**
873  * pci_vf_drivers_autoprobe - set PF property drivers_autoprobe for VFs
874  * @dev: the PCI device
875  * @auto_probe: set VF drivers auto probe flag
876  */
877 void pci_vf_drivers_autoprobe(struct pci_dev *dev, bool auto_probe)
878 {
879 	if (dev->is_physfn)
880 		dev->sriov->drivers_autoprobe = auto_probe;
881 }
882 
883 /**
884  * pci_iov_bus_range - find bus range used by Virtual Function
885  * @bus: the PCI bus
886  *
887  * Returns max number of buses (exclude current one) used by Virtual
888  * Functions.
889  */
890 int pci_iov_bus_range(struct pci_bus *bus)
891 {
892 	int max = 0;
893 	struct pci_dev *dev;
894 
895 	list_for_each_entry(dev, &bus->devices, bus_list) {
896 		if (!dev->is_physfn)
897 			continue;
898 		if (dev->sriov->max_VF_buses > max)
899 			max = dev->sriov->max_VF_buses;
900 	}
901 
902 	return max ? max - bus->number : 0;
903 }
904 
905 /**
906  * pci_enable_sriov - enable the SR-IOV capability
907  * @dev: the PCI device
908  * @nr_virtfn: number of virtual functions to enable
909  *
910  * Returns 0 on success, or negative on failure.
911  */
912 int pci_enable_sriov(struct pci_dev *dev, int nr_virtfn)
913 {
914 	might_sleep();
915 
916 	if (!dev->is_physfn)
917 		return -ENOSYS;
918 
919 	return sriov_enable(dev, nr_virtfn);
920 }
921 EXPORT_SYMBOL_GPL(pci_enable_sriov);
922 
923 /**
924  * pci_disable_sriov - disable the SR-IOV capability
925  * @dev: the PCI device
926  */
927 void pci_disable_sriov(struct pci_dev *dev)
928 {
929 	might_sleep();
930 
931 	if (!dev->is_physfn)
932 		return;
933 
934 	sriov_disable(dev);
935 }
936 EXPORT_SYMBOL_GPL(pci_disable_sriov);
937 
938 /**
939  * pci_num_vf - return number of VFs associated with a PF device_release_driver
940  * @dev: the PCI device
941  *
942  * Returns number of VFs, or 0 if SR-IOV is not enabled.
943  */
944 int pci_num_vf(struct pci_dev *dev)
945 {
946 	if (!dev->is_physfn)
947 		return 0;
948 
949 	return dev->sriov->num_VFs;
950 }
951 EXPORT_SYMBOL_GPL(pci_num_vf);
952 
953 /**
954  * pci_vfs_assigned - returns number of VFs are assigned to a guest
955  * @dev: the PCI device
956  *
957  * Returns number of VFs belonging to this device that are assigned to a guest.
958  * If device is not a physical function returns 0.
959  */
960 int pci_vfs_assigned(struct pci_dev *dev)
961 {
962 	struct pci_dev *vfdev;
963 	unsigned int vfs_assigned = 0;
964 	unsigned short dev_id;
965 
966 	/* only search if we are a PF */
967 	if (!dev->is_physfn)
968 		return 0;
969 
970 	/*
971 	 * determine the device ID for the VFs, the vendor ID will be the
972 	 * same as the PF so there is no need to check for that one
973 	 */
974 	dev_id = dev->sriov->vf_device;
975 
976 	/* loop through all the VFs to see if we own any that are assigned */
977 	vfdev = pci_get_device(dev->vendor, dev_id, NULL);
978 	while (vfdev) {
979 		/*
980 		 * It is considered assigned if it is a virtual function with
981 		 * our dev as the physical function and the assigned bit is set
982 		 */
983 		if (vfdev->is_virtfn && (vfdev->physfn == dev) &&
984 			pci_is_dev_assigned(vfdev))
985 			vfs_assigned++;
986 
987 		vfdev = pci_get_device(dev->vendor, dev_id, vfdev);
988 	}
989 
990 	return vfs_assigned;
991 }
992 EXPORT_SYMBOL_GPL(pci_vfs_assigned);
993 
994 /**
995  * pci_sriov_set_totalvfs -- reduce the TotalVFs available
996  * @dev: the PCI PF device
997  * @numvfs: number that should be used for TotalVFs supported
998  *
999  * Should be called from PF driver's probe routine with
1000  * device's mutex held.
1001  *
1002  * Returns 0 if PF is an SRIOV-capable device and
1003  * value of numvfs valid. If not a PF return -ENOSYS;
1004  * if numvfs is invalid return -EINVAL;
1005  * if VFs already enabled, return -EBUSY.
1006  */
1007 int pci_sriov_set_totalvfs(struct pci_dev *dev, u16 numvfs)
1008 {
1009 	if (!dev->is_physfn)
1010 		return -ENOSYS;
1011 
1012 	if (numvfs > dev->sriov->total_VFs)
1013 		return -EINVAL;
1014 
1015 	/* Shouldn't change if VFs already enabled */
1016 	if (dev->sriov->ctrl & PCI_SRIOV_CTRL_VFE)
1017 		return -EBUSY;
1018 
1019 	dev->sriov->driver_max_VFs = numvfs;
1020 	return 0;
1021 }
1022 EXPORT_SYMBOL_GPL(pci_sriov_set_totalvfs);
1023 
1024 /**
1025  * pci_sriov_get_totalvfs -- get total VFs supported on this device
1026  * @dev: the PCI PF device
1027  *
1028  * For a PCIe device with SRIOV support, return the PCIe
1029  * SRIOV capability value of TotalVFs or the value of driver_max_VFs
1030  * if the driver reduced it.  Otherwise 0.
1031  */
1032 int pci_sriov_get_totalvfs(struct pci_dev *dev)
1033 {
1034 	if (!dev->is_physfn)
1035 		return 0;
1036 
1037 	return dev->sriov->driver_max_VFs;
1038 }
1039 EXPORT_SYMBOL_GPL(pci_sriov_get_totalvfs);
1040 
1041 /**
1042  * pci_sriov_configure_simple - helper to configure SR-IOV
1043  * @dev: the PCI device
1044  * @nr_virtfn: number of virtual functions to enable, 0 to disable
1045  *
1046  * Enable or disable SR-IOV for devices that don't require any PF setup
1047  * before enabling SR-IOV.  Return value is negative on error, or number of
1048  * VFs allocated on success.
1049  */
1050 int pci_sriov_configure_simple(struct pci_dev *dev, int nr_virtfn)
1051 {
1052 	int rc;
1053 
1054 	might_sleep();
1055 
1056 	if (!dev->is_physfn)
1057 		return -ENODEV;
1058 
1059 	if (pci_vfs_assigned(dev)) {
1060 		pci_warn(dev, "Cannot modify SR-IOV while VFs are assigned\n");
1061 		return -EPERM;
1062 	}
1063 
1064 	if (nr_virtfn == 0) {
1065 		sriov_disable(dev);
1066 		return 0;
1067 	}
1068 
1069 	rc = sriov_enable(dev, nr_virtfn);
1070 	if (rc < 0)
1071 		return rc;
1072 
1073 	return nr_virtfn;
1074 }
1075 EXPORT_SYMBOL_GPL(pci_sriov_configure_simple);
1076