xref: /linux/drivers/virtio/virtio_pci_modern.c (revision b85d45947951d23cb22d90caecf4c1eb81342c96)
1 /*
2  * Virtio PCI driver - modern (virtio 1.0) device support
3  *
4  * This module allows virtio devices to be used over a virtual PCI device.
5  * This can be used with QEMU based VMMs like KVM or Xen.
6  *
7  * Copyright IBM Corp. 2007
8  * Copyright Red Hat, Inc. 2014
9  *
10  * Authors:
11  *  Anthony Liguori  <aliguori@us.ibm.com>
12  *  Rusty Russell <rusty@rustcorp.com.au>
13  *  Michael S. Tsirkin <mst@redhat.com>
14  *
15  * This work is licensed under the terms of the GNU GPL, version 2 or later.
16  * See the COPYING file in the top-level directory.
17  *
18  */
19 
20 #define VIRTIO_PCI_NO_LEGACY
21 #include "virtio_pci_common.h"
22 
23 /*
24  * Type-safe wrappers for io accesses.
25  * Use these to enforce at compile time the following spec requirement:
26  *
27  * The driver MUST access each field using the “natural” access
28  * method, i.e. 32-bit accesses for 32-bit fields, 16-bit accesses
29  * for 16-bit fields and 8-bit accesses for 8-bit fields.
30  */
31 static inline u8 vp_ioread8(u8 __iomem *addr)
32 {
33 	return ioread8(addr);
34 }
35 static inline u16 vp_ioread16 (u16 __iomem *addr)
36 {
37 	return ioread16(addr);
38 }
39 
40 static inline u32 vp_ioread32(u32 __iomem *addr)
41 {
42 	return ioread32(addr);
43 }
44 
45 static inline void vp_iowrite8(u8 value, u8 __iomem *addr)
46 {
47 	iowrite8(value, addr);
48 }
49 
50 static inline void vp_iowrite16(u16 value, u16 __iomem *addr)
51 {
52 	iowrite16(value, addr);
53 }
54 
55 static inline void vp_iowrite32(u32 value, u32 __iomem *addr)
56 {
57 	iowrite32(value, addr);
58 }
59 
60 static void vp_iowrite64_twopart(u64 val,
61 				 __le32 __iomem *lo, __le32 __iomem *hi)
62 {
63 	vp_iowrite32((u32)val, lo);
64 	vp_iowrite32(val >> 32, hi);
65 }
66 
67 static void __iomem *map_capability(struct pci_dev *dev, int off,
68 				    size_t minlen,
69 				    u32 align,
70 				    u32 start, u32 size,
71 				    size_t *len)
72 {
73 	u8 bar;
74 	u32 offset, length;
75 	void __iomem *p;
76 
77 	pci_read_config_byte(dev, off + offsetof(struct virtio_pci_cap,
78 						 bar),
79 			     &bar);
80 	pci_read_config_dword(dev, off + offsetof(struct virtio_pci_cap, offset),
81 			     &offset);
82 	pci_read_config_dword(dev, off + offsetof(struct virtio_pci_cap, length),
83 			      &length);
84 
85 	if (length <= start) {
86 		dev_err(&dev->dev,
87 			"virtio_pci: bad capability len %u (>%u expected)\n",
88 			length, start);
89 		return NULL;
90 	}
91 
92 	if (length - start < minlen) {
93 		dev_err(&dev->dev,
94 			"virtio_pci: bad capability len %u (>=%zu expected)\n",
95 			length, minlen);
96 		return NULL;
97 	}
98 
99 	length -= start;
100 
101 	if (start + offset < offset) {
102 		dev_err(&dev->dev,
103 			"virtio_pci: map wrap-around %u+%u\n",
104 			start, offset);
105 		return NULL;
106 	}
107 
108 	offset += start;
109 
110 	if (offset & (align - 1)) {
111 		dev_err(&dev->dev,
112 			"virtio_pci: offset %u not aligned to %u\n",
113 			offset, align);
114 		return NULL;
115 	}
116 
117 	if (length > size)
118 		length = size;
119 
120 	if (len)
121 		*len = length;
122 
123 	if (minlen + offset < minlen ||
124 	    minlen + offset > pci_resource_len(dev, bar)) {
125 		dev_err(&dev->dev,
126 			"virtio_pci: map virtio %zu@%u "
127 			"out of range on bar %i length %lu\n",
128 			minlen, offset,
129 			bar, (unsigned long)pci_resource_len(dev, bar));
130 		return NULL;
131 	}
132 
133 	p = pci_iomap_range(dev, bar, offset, length);
134 	if (!p)
135 		dev_err(&dev->dev,
136 			"virtio_pci: unable to map virtio %u@%u on bar %i\n",
137 			length, offset, bar);
138 	return p;
139 }
140 
141 /* virtio config->get_features() implementation */
142 static u64 vp_get_features(struct virtio_device *vdev)
143 {
144 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
145 	u64 features;
146 
147 	vp_iowrite32(0, &vp_dev->common->device_feature_select);
148 	features = vp_ioread32(&vp_dev->common->device_feature);
149 	vp_iowrite32(1, &vp_dev->common->device_feature_select);
150 	features |= ((u64)vp_ioread32(&vp_dev->common->device_feature) << 32);
151 
152 	return features;
153 }
154 
155 /* virtio config->finalize_features() implementation */
156 static int vp_finalize_features(struct virtio_device *vdev)
157 {
158 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
159 
160 	/* Give virtio_ring a chance to accept features. */
161 	vring_transport_features(vdev);
162 
163 	if (!__virtio_test_bit(vdev, VIRTIO_F_VERSION_1)) {
164 		dev_err(&vdev->dev, "virtio: device uses modern interface "
165 			"but does not have VIRTIO_F_VERSION_1\n");
166 		return -EINVAL;
167 	}
168 
169 	vp_iowrite32(0, &vp_dev->common->guest_feature_select);
170 	vp_iowrite32((u32)vdev->features, &vp_dev->common->guest_feature);
171 	vp_iowrite32(1, &vp_dev->common->guest_feature_select);
172 	vp_iowrite32(vdev->features >> 32, &vp_dev->common->guest_feature);
173 
174 	return 0;
175 }
176 
177 /* virtio config->get() implementation */
178 static void vp_get(struct virtio_device *vdev, unsigned offset,
179 		   void *buf, unsigned len)
180 {
181 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
182 	u8 b;
183 	__le16 w;
184 	__le32 l;
185 
186 	BUG_ON(offset + len > vp_dev->device_len);
187 
188 	switch (len) {
189 	case 1:
190 		b = ioread8(vp_dev->device + offset);
191 		memcpy(buf, &b, sizeof b);
192 		break;
193 	case 2:
194 		w = cpu_to_le16(ioread16(vp_dev->device + offset));
195 		memcpy(buf, &w, sizeof w);
196 		break;
197 	case 4:
198 		l = cpu_to_le32(ioread32(vp_dev->device + offset));
199 		memcpy(buf, &l, sizeof l);
200 		break;
201 	case 8:
202 		l = cpu_to_le32(ioread32(vp_dev->device + offset));
203 		memcpy(buf, &l, sizeof l);
204 		l = cpu_to_le32(ioread32(vp_dev->device + offset + sizeof l));
205 		memcpy(buf + sizeof l, &l, sizeof l);
206 		break;
207 	default:
208 		BUG();
209 	}
210 }
211 
212 /* the config->set() implementation.  it's symmetric to the config->get()
213  * implementation */
214 static void vp_set(struct virtio_device *vdev, unsigned offset,
215 		   const void *buf, unsigned len)
216 {
217 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
218 	u8 b;
219 	__le16 w;
220 	__le32 l;
221 
222 	BUG_ON(offset + len > vp_dev->device_len);
223 
224 	switch (len) {
225 	case 1:
226 		memcpy(&b, buf, sizeof b);
227 		iowrite8(b, vp_dev->device + offset);
228 		break;
229 	case 2:
230 		memcpy(&w, buf, sizeof w);
231 		iowrite16(le16_to_cpu(w), vp_dev->device + offset);
232 		break;
233 	case 4:
234 		memcpy(&l, buf, sizeof l);
235 		iowrite32(le32_to_cpu(l), vp_dev->device + offset);
236 		break;
237 	case 8:
238 		memcpy(&l, buf, sizeof l);
239 		iowrite32(le32_to_cpu(l), vp_dev->device + offset);
240 		memcpy(&l, buf + sizeof l, sizeof l);
241 		iowrite32(le32_to_cpu(l), vp_dev->device + offset + sizeof l);
242 		break;
243 	default:
244 		BUG();
245 	}
246 }
247 
248 static u32 vp_generation(struct virtio_device *vdev)
249 {
250 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
251 	return vp_ioread8(&vp_dev->common->config_generation);
252 }
253 
254 /* config->{get,set}_status() implementations */
255 static u8 vp_get_status(struct virtio_device *vdev)
256 {
257 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
258 	return vp_ioread8(&vp_dev->common->device_status);
259 }
260 
261 static void vp_set_status(struct virtio_device *vdev, u8 status)
262 {
263 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
264 	/* We should never be setting status to 0. */
265 	BUG_ON(status == 0);
266 	vp_iowrite8(status, &vp_dev->common->device_status);
267 }
268 
269 static void vp_reset(struct virtio_device *vdev)
270 {
271 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
272 	/* 0 status means a reset. */
273 	vp_iowrite8(0, &vp_dev->common->device_status);
274 	/* Flush out the status write, and flush in device writes,
275 	 * including MSI-X interrupts, if any. */
276 	vp_ioread8(&vp_dev->common->device_status);
277 	/* Flush pending VQ/configuration callbacks. */
278 	vp_synchronize_vectors(vdev);
279 }
280 
281 static u16 vp_config_vector(struct virtio_pci_device *vp_dev, u16 vector)
282 {
283 	/* Setup the vector used for configuration events */
284 	vp_iowrite16(vector, &vp_dev->common->msix_config);
285 	/* Verify we had enough resources to assign the vector */
286 	/* Will also flush the write out to device */
287 	return vp_ioread16(&vp_dev->common->msix_config);
288 }
289 
290 static size_t vring_pci_size(u16 num)
291 {
292 	/* We only need a cacheline separation. */
293 	return PAGE_ALIGN(vring_size(num, SMP_CACHE_BYTES));
294 }
295 
296 static void *alloc_virtqueue_pages(int *num)
297 {
298 	void *pages;
299 
300 	/* TODO: allocate each queue chunk individually */
301 	for (; *num && vring_pci_size(*num) > PAGE_SIZE; *num /= 2) {
302 		pages = alloc_pages_exact(vring_pci_size(*num),
303 					  GFP_KERNEL|__GFP_ZERO|__GFP_NOWARN);
304 		if (pages)
305 			return pages;
306 	}
307 
308 	if (!*num)
309 		return NULL;
310 
311 	/* Try to get a single page. You are my only hope! */
312 	return alloc_pages_exact(vring_pci_size(*num), GFP_KERNEL|__GFP_ZERO);
313 }
314 
315 static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
316 				  struct virtio_pci_vq_info *info,
317 				  unsigned index,
318 				  void (*callback)(struct virtqueue *vq),
319 				  const char *name,
320 				  u16 msix_vec)
321 {
322 	struct virtio_pci_common_cfg __iomem *cfg = vp_dev->common;
323 	struct virtqueue *vq;
324 	u16 num, off;
325 	int err;
326 
327 	if (index >= vp_ioread16(&cfg->num_queues))
328 		return ERR_PTR(-ENOENT);
329 
330 	/* Select the queue we're interested in */
331 	vp_iowrite16(index, &cfg->queue_select);
332 
333 	/* Check if queue is either not available or already active. */
334 	num = vp_ioread16(&cfg->queue_size);
335 	if (!num || vp_ioread16(&cfg->queue_enable))
336 		return ERR_PTR(-ENOENT);
337 
338 	if (num & (num - 1)) {
339 		dev_warn(&vp_dev->pci_dev->dev, "bad queue size %u", num);
340 		return ERR_PTR(-EINVAL);
341 	}
342 
343 	/* get offset of notification word for this vq */
344 	off = vp_ioread16(&cfg->queue_notify_off);
345 
346 	info->num = num;
347 	info->msix_vector = msix_vec;
348 
349 	info->queue = alloc_virtqueue_pages(&info->num);
350 	if (info->queue == NULL)
351 		return ERR_PTR(-ENOMEM);
352 
353 	/* create the vring */
354 	vq = vring_new_virtqueue(index, info->num,
355 				 SMP_CACHE_BYTES, &vp_dev->vdev,
356 				 true, info->queue, vp_notify, callback, name);
357 	if (!vq) {
358 		err = -ENOMEM;
359 		goto err_new_queue;
360 	}
361 
362 	/* activate the queue */
363 	vp_iowrite16(num, &cfg->queue_size);
364 	vp_iowrite64_twopart(virt_to_phys(info->queue),
365 			     &cfg->queue_desc_lo, &cfg->queue_desc_hi);
366 	vp_iowrite64_twopart(virt_to_phys(virtqueue_get_avail(vq)),
367 			     &cfg->queue_avail_lo, &cfg->queue_avail_hi);
368 	vp_iowrite64_twopart(virt_to_phys(virtqueue_get_used(vq)),
369 			     &cfg->queue_used_lo, &cfg->queue_used_hi);
370 
371 	if (vp_dev->notify_base) {
372 		/* offset should not wrap */
373 		if ((u64)off * vp_dev->notify_offset_multiplier + 2
374 		    > vp_dev->notify_len) {
375 			dev_warn(&vp_dev->pci_dev->dev,
376 				 "bad notification offset %u (x %u) "
377 				 "for queue %u > %zd",
378 				 off, vp_dev->notify_offset_multiplier,
379 				 index, vp_dev->notify_len);
380 			err = -EINVAL;
381 			goto err_map_notify;
382 		}
383 		vq->priv = (void __force *)vp_dev->notify_base +
384 			off * vp_dev->notify_offset_multiplier;
385 	} else {
386 		vq->priv = (void __force *)map_capability(vp_dev->pci_dev,
387 					  vp_dev->notify_map_cap, 2, 2,
388 					  off * vp_dev->notify_offset_multiplier, 2,
389 					  NULL);
390 	}
391 
392 	if (!vq->priv) {
393 		err = -ENOMEM;
394 		goto err_map_notify;
395 	}
396 
397 	if (msix_vec != VIRTIO_MSI_NO_VECTOR) {
398 		vp_iowrite16(msix_vec, &cfg->queue_msix_vector);
399 		msix_vec = vp_ioread16(&cfg->queue_msix_vector);
400 		if (msix_vec == VIRTIO_MSI_NO_VECTOR) {
401 			err = -EBUSY;
402 			goto err_assign_vector;
403 		}
404 	}
405 
406 	return vq;
407 
408 err_assign_vector:
409 	if (!vp_dev->notify_base)
410 		pci_iounmap(vp_dev->pci_dev, (void __iomem __force *)vq->priv);
411 err_map_notify:
412 	vring_del_virtqueue(vq);
413 err_new_queue:
414 	free_pages_exact(info->queue, vring_pci_size(info->num));
415 	return ERR_PTR(err);
416 }
417 
418 static int vp_modern_find_vqs(struct virtio_device *vdev, unsigned nvqs,
419 			      struct virtqueue *vqs[],
420 			      vq_callback_t *callbacks[],
421 			      const char *names[])
422 {
423 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
424 	struct virtqueue *vq;
425 	int rc = vp_find_vqs(vdev, nvqs, vqs, callbacks, names);
426 
427 	if (rc)
428 		return rc;
429 
430 	/* Select and activate all queues. Has to be done last: once we do
431 	 * this, there's no way to go back except reset.
432 	 */
433 	list_for_each_entry(vq, &vdev->vqs, list) {
434 		vp_iowrite16(vq->index, &vp_dev->common->queue_select);
435 		vp_iowrite16(1, &vp_dev->common->queue_enable);
436 	}
437 
438 	return 0;
439 }
440 
441 static void del_vq(struct virtio_pci_vq_info *info)
442 {
443 	struct virtqueue *vq = info->vq;
444 	struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev);
445 
446 	vp_iowrite16(vq->index, &vp_dev->common->queue_select);
447 
448 	if (vp_dev->msix_enabled) {
449 		vp_iowrite16(VIRTIO_MSI_NO_VECTOR,
450 			     &vp_dev->common->queue_msix_vector);
451 		/* Flush the write out to device */
452 		vp_ioread16(&vp_dev->common->queue_msix_vector);
453 	}
454 
455 	if (!vp_dev->notify_base)
456 		pci_iounmap(vp_dev->pci_dev, (void __force __iomem *)vq->priv);
457 
458 	vring_del_virtqueue(vq);
459 
460 	free_pages_exact(info->queue, vring_pci_size(info->num));
461 }
462 
463 static const struct virtio_config_ops virtio_pci_config_nodev_ops = {
464 	.get		= NULL,
465 	.set		= NULL,
466 	.generation	= vp_generation,
467 	.get_status	= vp_get_status,
468 	.set_status	= vp_set_status,
469 	.reset		= vp_reset,
470 	.find_vqs	= vp_modern_find_vqs,
471 	.del_vqs	= vp_del_vqs,
472 	.get_features	= vp_get_features,
473 	.finalize_features = vp_finalize_features,
474 	.bus_name	= vp_bus_name,
475 	.set_vq_affinity = vp_set_vq_affinity,
476 };
477 
478 static const struct virtio_config_ops virtio_pci_config_ops = {
479 	.get		= vp_get,
480 	.set		= vp_set,
481 	.generation	= vp_generation,
482 	.get_status	= vp_get_status,
483 	.set_status	= vp_set_status,
484 	.reset		= vp_reset,
485 	.find_vqs	= vp_modern_find_vqs,
486 	.del_vqs	= vp_del_vqs,
487 	.get_features	= vp_get_features,
488 	.finalize_features = vp_finalize_features,
489 	.bus_name	= vp_bus_name,
490 	.set_vq_affinity = vp_set_vq_affinity,
491 };
492 
493 /**
494  * virtio_pci_find_capability - walk capabilities to find device info.
495  * @dev: the pci device
496  * @cfg_type: the VIRTIO_PCI_CAP_* value we seek
497  * @ioresource_types: IORESOURCE_MEM and/or IORESOURCE_IO.
498  *
499  * Returns offset of the capability, or 0.
500  */
501 static inline int virtio_pci_find_capability(struct pci_dev *dev, u8 cfg_type,
502 					     u32 ioresource_types, int *bars)
503 {
504 	int pos;
505 
506 	for (pos = pci_find_capability(dev, PCI_CAP_ID_VNDR);
507 	     pos > 0;
508 	     pos = pci_find_next_capability(dev, pos, PCI_CAP_ID_VNDR)) {
509 		u8 type, bar;
510 		pci_read_config_byte(dev, pos + offsetof(struct virtio_pci_cap,
511 							 cfg_type),
512 				     &type);
513 		pci_read_config_byte(dev, pos + offsetof(struct virtio_pci_cap,
514 							 bar),
515 				     &bar);
516 
517 		/* Ignore structures with reserved BAR values */
518 		if (bar > 0x5)
519 			continue;
520 
521 		if (type == cfg_type) {
522 			if (pci_resource_len(dev, bar) &&
523 			    pci_resource_flags(dev, bar) & ioresource_types) {
524 				*bars |= (1 << bar);
525 				return pos;
526 			}
527 		}
528 	}
529 	return 0;
530 }
531 
532 /* This is part of the ABI.  Don't screw with it. */
533 static inline void check_offsets(void)
534 {
535 	/* Note: disk space was harmed in compilation of this function. */
536 	BUILD_BUG_ON(VIRTIO_PCI_CAP_VNDR !=
537 		     offsetof(struct virtio_pci_cap, cap_vndr));
538 	BUILD_BUG_ON(VIRTIO_PCI_CAP_NEXT !=
539 		     offsetof(struct virtio_pci_cap, cap_next));
540 	BUILD_BUG_ON(VIRTIO_PCI_CAP_LEN !=
541 		     offsetof(struct virtio_pci_cap, cap_len));
542 	BUILD_BUG_ON(VIRTIO_PCI_CAP_CFG_TYPE !=
543 		     offsetof(struct virtio_pci_cap, cfg_type));
544 	BUILD_BUG_ON(VIRTIO_PCI_CAP_BAR !=
545 		     offsetof(struct virtio_pci_cap, bar));
546 	BUILD_BUG_ON(VIRTIO_PCI_CAP_OFFSET !=
547 		     offsetof(struct virtio_pci_cap, offset));
548 	BUILD_BUG_ON(VIRTIO_PCI_CAP_LENGTH !=
549 		     offsetof(struct virtio_pci_cap, length));
550 	BUILD_BUG_ON(VIRTIO_PCI_NOTIFY_CAP_MULT !=
551 		     offsetof(struct virtio_pci_notify_cap,
552 			      notify_off_multiplier));
553 	BUILD_BUG_ON(VIRTIO_PCI_COMMON_DFSELECT !=
554 		     offsetof(struct virtio_pci_common_cfg,
555 			      device_feature_select));
556 	BUILD_BUG_ON(VIRTIO_PCI_COMMON_DF !=
557 		     offsetof(struct virtio_pci_common_cfg, device_feature));
558 	BUILD_BUG_ON(VIRTIO_PCI_COMMON_GFSELECT !=
559 		     offsetof(struct virtio_pci_common_cfg,
560 			      guest_feature_select));
561 	BUILD_BUG_ON(VIRTIO_PCI_COMMON_GF !=
562 		     offsetof(struct virtio_pci_common_cfg, guest_feature));
563 	BUILD_BUG_ON(VIRTIO_PCI_COMMON_MSIX !=
564 		     offsetof(struct virtio_pci_common_cfg, msix_config));
565 	BUILD_BUG_ON(VIRTIO_PCI_COMMON_NUMQ !=
566 		     offsetof(struct virtio_pci_common_cfg, num_queues));
567 	BUILD_BUG_ON(VIRTIO_PCI_COMMON_STATUS !=
568 		     offsetof(struct virtio_pci_common_cfg, device_status));
569 	BUILD_BUG_ON(VIRTIO_PCI_COMMON_CFGGENERATION !=
570 		     offsetof(struct virtio_pci_common_cfg, config_generation));
571 	BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_SELECT !=
572 		     offsetof(struct virtio_pci_common_cfg, queue_select));
573 	BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_SIZE !=
574 		     offsetof(struct virtio_pci_common_cfg, queue_size));
575 	BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_MSIX !=
576 		     offsetof(struct virtio_pci_common_cfg, queue_msix_vector));
577 	BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_ENABLE !=
578 		     offsetof(struct virtio_pci_common_cfg, queue_enable));
579 	BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_NOFF !=
580 		     offsetof(struct virtio_pci_common_cfg, queue_notify_off));
581 	BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_DESCLO !=
582 		     offsetof(struct virtio_pci_common_cfg, queue_desc_lo));
583 	BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_DESCHI !=
584 		     offsetof(struct virtio_pci_common_cfg, queue_desc_hi));
585 	BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_AVAILLO !=
586 		     offsetof(struct virtio_pci_common_cfg, queue_avail_lo));
587 	BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_AVAILHI !=
588 		     offsetof(struct virtio_pci_common_cfg, queue_avail_hi));
589 	BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_USEDLO !=
590 		     offsetof(struct virtio_pci_common_cfg, queue_used_lo));
591 	BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_USEDHI !=
592 		     offsetof(struct virtio_pci_common_cfg, queue_used_hi));
593 }
594 
595 /* the PCI probing function */
596 int virtio_pci_modern_probe(struct virtio_pci_device *vp_dev)
597 {
598 	struct pci_dev *pci_dev = vp_dev->pci_dev;
599 	int err, common, isr, notify, device;
600 	u32 notify_length;
601 	u32 notify_offset;
602 
603 	check_offsets();
604 
605 	/* We only own devices >= 0x1000 and <= 0x107f: leave the rest. */
606 	if (pci_dev->device < 0x1000 || pci_dev->device > 0x107f)
607 		return -ENODEV;
608 
609 	if (pci_dev->device < 0x1040) {
610 		/* Transitional devices: use the PCI subsystem device id as
611 		 * virtio device id, same as legacy driver always did.
612 		 */
613 		vp_dev->vdev.id.device = pci_dev->subsystem_device;
614 	} else {
615 		/* Modern devices: simply use PCI device id, but start from 0x1040. */
616 		vp_dev->vdev.id.device = pci_dev->device - 0x1040;
617 	}
618 	vp_dev->vdev.id.vendor = pci_dev->subsystem_vendor;
619 
620 	/* check for a common config: if not, use legacy mode (bar 0). */
621 	common = virtio_pci_find_capability(pci_dev, VIRTIO_PCI_CAP_COMMON_CFG,
622 					    IORESOURCE_IO | IORESOURCE_MEM,
623 					    &vp_dev->modern_bars);
624 	if (!common) {
625 		dev_info(&pci_dev->dev,
626 			 "virtio_pci: leaving for legacy driver\n");
627 		return -ENODEV;
628 	}
629 
630 	/* If common is there, these should be too... */
631 	isr = virtio_pci_find_capability(pci_dev, VIRTIO_PCI_CAP_ISR_CFG,
632 					 IORESOURCE_IO | IORESOURCE_MEM,
633 					 &vp_dev->modern_bars);
634 	notify = virtio_pci_find_capability(pci_dev, VIRTIO_PCI_CAP_NOTIFY_CFG,
635 					    IORESOURCE_IO | IORESOURCE_MEM,
636 					    &vp_dev->modern_bars);
637 	if (!isr || !notify) {
638 		dev_err(&pci_dev->dev,
639 			"virtio_pci: missing capabilities %i/%i/%i\n",
640 			common, isr, notify);
641 		return -EINVAL;
642 	}
643 
644 	/* Device capability is only mandatory for devices that have
645 	 * device-specific configuration.
646 	 */
647 	device = virtio_pci_find_capability(pci_dev, VIRTIO_PCI_CAP_DEVICE_CFG,
648 					    IORESOURCE_IO | IORESOURCE_MEM,
649 					    &vp_dev->modern_bars);
650 
651 	err = pci_request_selected_regions(pci_dev, vp_dev->modern_bars,
652 					   "virtio-pci-modern");
653 	if (err)
654 		return err;
655 
656 	err = -EINVAL;
657 	vp_dev->common = map_capability(pci_dev, common,
658 					sizeof(struct virtio_pci_common_cfg), 4,
659 					0, sizeof(struct virtio_pci_common_cfg),
660 					NULL);
661 	if (!vp_dev->common)
662 		goto err_map_common;
663 	vp_dev->isr = map_capability(pci_dev, isr, sizeof(u8), 1,
664 				     0, 1,
665 				     NULL);
666 	if (!vp_dev->isr)
667 		goto err_map_isr;
668 
669 	/* Read notify_off_multiplier from config space. */
670 	pci_read_config_dword(pci_dev,
671 			      notify + offsetof(struct virtio_pci_notify_cap,
672 						notify_off_multiplier),
673 			      &vp_dev->notify_offset_multiplier);
674 	/* Read notify length and offset from config space. */
675 	pci_read_config_dword(pci_dev,
676 			      notify + offsetof(struct virtio_pci_notify_cap,
677 						cap.length),
678 			      &notify_length);
679 
680 	pci_read_config_dword(pci_dev,
681 			      notify + offsetof(struct virtio_pci_notify_cap,
682 						cap.length),
683 			      &notify_offset);
684 
685 	/* We don't know how many VQs we'll map, ahead of the time.
686 	 * If notify length is small, map it all now.
687 	 * Otherwise, map each VQ individually later.
688 	 */
689 	if ((u64)notify_length + (notify_offset % PAGE_SIZE) <= PAGE_SIZE) {
690 		vp_dev->notify_base = map_capability(pci_dev, notify, 2, 2,
691 						     0, notify_length,
692 						     &vp_dev->notify_len);
693 		if (!vp_dev->notify_base)
694 			goto err_map_notify;
695 	} else {
696 		vp_dev->notify_map_cap = notify;
697 	}
698 
699 	/* Again, we don't know how much we should map, but PAGE_SIZE
700 	 * is more than enough for all existing devices.
701 	 */
702 	if (device) {
703 		vp_dev->device = map_capability(pci_dev, device, 0, 4,
704 						0, PAGE_SIZE,
705 						&vp_dev->device_len);
706 		if (!vp_dev->device)
707 			goto err_map_device;
708 
709 		vp_dev->vdev.config = &virtio_pci_config_ops;
710 	} else {
711 		vp_dev->vdev.config = &virtio_pci_config_nodev_ops;
712 	}
713 
714 	vp_dev->config_vector = vp_config_vector;
715 	vp_dev->setup_vq = setup_vq;
716 	vp_dev->del_vq = del_vq;
717 
718 	return 0;
719 
720 err_map_device:
721 	if (vp_dev->notify_base)
722 		pci_iounmap(pci_dev, vp_dev->notify_base);
723 err_map_notify:
724 	pci_iounmap(pci_dev, vp_dev->isr);
725 err_map_isr:
726 	pci_iounmap(pci_dev, vp_dev->common);
727 err_map_common:
728 	return err;
729 }
730 
731 void virtio_pci_modern_remove(struct virtio_pci_device *vp_dev)
732 {
733 	struct pci_dev *pci_dev = vp_dev->pci_dev;
734 
735 	if (vp_dev->device)
736 		pci_iounmap(pci_dev, vp_dev->device);
737 	if (vp_dev->notify_base)
738 		pci_iounmap(pci_dev, vp_dev->notify_base);
739 	pci_iounmap(pci_dev, vp_dev->isr);
740 	pci_iounmap(pci_dev, vp_dev->common);
741 	pci_release_selected_regions(pci_dev, vp_dev->modern_bars);
742 }
743