xref: /linux/drivers/net/virtio_net.c (revision a58130ddc896e5a15e4de2bf50a1d89247118c23)
1 /* A network driver using virtio.
2  *
3  * Copyright 2007 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation
4  *
5  * This program is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation; either version 2 of the License, or
8  * (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program; if not, write to the Free Software
17  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
18  */
19 //#define DEBUG
20 #include <linux/netdevice.h>
21 #include <linux/etherdevice.h>
22 #include <linux/ethtool.h>
23 #include <linux/module.h>
24 #include <linux/virtio.h>
25 #include <linux/virtio_net.h>
26 #include <linux/scatterlist.h>
27 #include <linux/if_vlan.h>
28 #include <linux/slab.h>
29 
30 static int napi_weight = 128;
31 module_param(napi_weight, int, 0444);
32 
33 static bool csum = true, gso = true;
34 module_param(csum, bool, 0444);
35 module_param(gso, bool, 0444);
36 
37 /* FIXME: MTU in config. */
38 #define MAX_PACKET_LEN (ETH_HLEN + VLAN_HLEN + ETH_DATA_LEN)
39 #define GOOD_COPY_LEN	128
40 
41 #define VIRTNET_SEND_COMMAND_SG_MAX    2
42 #define VIRTNET_DRIVER_VERSION "1.0.0"
43 
44 struct virtnet_stats {
45 	struct u64_stats_sync tx_syncp;
46 	struct u64_stats_sync rx_syncp;
47 	u64 tx_bytes;
48 	u64 tx_packets;
49 
50 	u64 rx_bytes;
51 	u64 rx_packets;
52 };
53 
54 /* Internal representation of a send virtqueue */
55 struct send_queue {
56 	/* Virtqueue associated with this send _queue */
57 	struct virtqueue *vq;
58 
59 	/* TX: fragments + linear part + virtio header */
60 	struct scatterlist sg[MAX_SKB_FRAGS + 2];
61 
62 	/* Name of the send queue: output.$index */
63 	char name[40];
64 };
65 
66 /* Internal representation of a receive virtqueue */
67 struct receive_queue {
68 	/* Virtqueue associated with this receive_queue */
69 	struct virtqueue *vq;
70 
71 	struct napi_struct napi;
72 
73 	/* Number of input buffers, and max we've ever had. */
74 	unsigned int num, max;
75 
76 	/* Chain pages by the private ptr. */
77 	struct page *pages;
78 
79 	/* RX: fragments + linear part + virtio header */
80 	struct scatterlist sg[MAX_SKB_FRAGS + 2];
81 
82 	/* Name of this receive queue: input.$index */
83 	char name[40];
84 };
85 
86 struct virtnet_info {
87 	struct virtio_device *vdev;
88 	struct virtqueue *cvq;
89 	struct net_device *dev;
90 	struct send_queue *sq;
91 	struct receive_queue *rq;
92 	unsigned int status;
93 
94 	/* Max # of queue pairs supported by the device */
95 	u16 max_queue_pairs;
96 
97 	/* # of queue pairs currently used by the driver */
98 	u16 curr_queue_pairs;
99 
100 	/* I like... big packets and I cannot lie! */
101 	bool big_packets;
102 
103 	/* Host will merge rx buffers for big packets (shake it! shake it!) */
104 	bool mergeable_rx_bufs;
105 
106 	/* Has control virtqueue */
107 	bool has_cvq;
108 
109 	/* enable config space updates */
110 	bool config_enable;
111 
112 	/* Active statistics */
113 	struct virtnet_stats __percpu *stats;
114 
115 	/* Work struct for refilling if we run low on memory. */
116 	struct delayed_work refill;
117 
118 	/* Work struct for config space updates */
119 	struct work_struct config_work;
120 
121 	/* Lock for config space updates */
122 	struct mutex config_lock;
123 
124 	/* Does the affinity hint is set for virtqueues? */
125 	bool affinity_hint_set;
126 };
127 
128 struct skb_vnet_hdr {
129 	union {
130 		struct virtio_net_hdr hdr;
131 		struct virtio_net_hdr_mrg_rxbuf mhdr;
132 	};
133 	unsigned int num_sg;
134 };
135 
136 struct padded_vnet_hdr {
137 	struct virtio_net_hdr hdr;
138 	/*
139 	 * virtio_net_hdr should be in a separated sg buffer because of a
140 	 * QEMU bug, and data sg buffer shares same page with this header sg.
141 	 * This padding makes next sg 16 byte aligned after virtio_net_hdr.
142 	 */
143 	char padding[6];
144 };
145 
146 /* Converting between virtqueue no. and kernel tx/rx queue no.
147  * 0:rx0 1:tx0 2:rx1 3:tx1 ... 2N:rxN 2N+1:txN 2N+2:cvq
148  */
149 static int vq2txq(struct virtqueue *vq)
150 {
151 	return (virtqueue_get_queue_index(vq) - 1) / 2;
152 }
153 
154 static int txq2vq(int txq)
155 {
156 	return txq * 2 + 1;
157 }
158 
159 static int vq2rxq(struct virtqueue *vq)
160 {
161 	return virtqueue_get_queue_index(vq) / 2;
162 }
163 
164 static int rxq2vq(int rxq)
165 {
166 	return rxq * 2;
167 }
168 
169 static inline struct skb_vnet_hdr *skb_vnet_hdr(struct sk_buff *skb)
170 {
171 	return (struct skb_vnet_hdr *)skb->cb;
172 }
173 
174 /*
175  * private is used to chain pages for big packets, put the whole
176  * most recent used list in the beginning for reuse
177  */
178 static void give_pages(struct receive_queue *rq, struct page *page)
179 {
180 	struct page *end;
181 
182 	/* Find end of list, sew whole thing into vi->rq.pages. */
183 	for (end = page; end->private; end = (struct page *)end->private);
184 	end->private = (unsigned long)rq->pages;
185 	rq->pages = page;
186 }
187 
188 static struct page *get_a_page(struct receive_queue *rq, gfp_t gfp_mask)
189 {
190 	struct page *p = rq->pages;
191 
192 	if (p) {
193 		rq->pages = (struct page *)p->private;
194 		/* clear private here, it is used to chain pages */
195 		p->private = 0;
196 	} else
197 		p = alloc_page(gfp_mask);
198 	return p;
199 }
200 
201 static void skb_xmit_done(struct virtqueue *vq)
202 {
203 	struct virtnet_info *vi = vq->vdev->priv;
204 
205 	/* Suppress further interrupts. */
206 	virtqueue_disable_cb(vq);
207 
208 	/* We were probably waiting for more output buffers. */
209 	netif_wake_subqueue(vi->dev, vq2txq(vq));
210 }
211 
212 static void set_skb_frag(struct sk_buff *skb, struct page *page,
213 			 unsigned int offset, unsigned int *len)
214 {
215 	int size = min((unsigned)PAGE_SIZE - offset, *len);
216 	int i = skb_shinfo(skb)->nr_frags;
217 
218 	__skb_fill_page_desc(skb, i, page, offset, size);
219 
220 	skb->data_len += size;
221 	skb->len += size;
222 	skb->truesize += PAGE_SIZE;
223 	skb_shinfo(skb)->nr_frags++;
224 	*len -= size;
225 }
226 
227 /* Called from bottom half context */
228 static struct sk_buff *page_to_skb(struct receive_queue *rq,
229 				   struct page *page, unsigned int len)
230 {
231 	struct virtnet_info *vi = rq->vq->vdev->priv;
232 	struct sk_buff *skb;
233 	struct skb_vnet_hdr *hdr;
234 	unsigned int copy, hdr_len, offset;
235 	char *p;
236 
237 	p = page_address(page);
238 
239 	/* copy small packet so we can reuse these pages for small data */
240 	skb = netdev_alloc_skb_ip_align(vi->dev, GOOD_COPY_LEN);
241 	if (unlikely(!skb))
242 		return NULL;
243 
244 	hdr = skb_vnet_hdr(skb);
245 
246 	if (vi->mergeable_rx_bufs) {
247 		hdr_len = sizeof hdr->mhdr;
248 		offset = hdr_len;
249 	} else {
250 		hdr_len = sizeof hdr->hdr;
251 		offset = sizeof(struct padded_vnet_hdr);
252 	}
253 
254 	memcpy(hdr, p, hdr_len);
255 
256 	len -= hdr_len;
257 	p += offset;
258 
259 	copy = len;
260 	if (copy > skb_tailroom(skb))
261 		copy = skb_tailroom(skb);
262 	memcpy(skb_put(skb, copy), p, copy);
263 
264 	len -= copy;
265 	offset += copy;
266 
267 	/*
268 	 * Verify that we can indeed put this data into a skb.
269 	 * This is here to handle cases when the device erroneously
270 	 * tries to receive more than is possible. This is usually
271 	 * the case of a broken device.
272 	 */
273 	if (unlikely(len > MAX_SKB_FRAGS * PAGE_SIZE)) {
274 		net_dbg_ratelimited("%s: too much data\n", skb->dev->name);
275 		dev_kfree_skb(skb);
276 		return NULL;
277 	}
278 
279 	while (len) {
280 		set_skb_frag(skb, page, offset, &len);
281 		page = (struct page *)page->private;
282 		offset = 0;
283 	}
284 
285 	if (page)
286 		give_pages(rq, page);
287 
288 	return skb;
289 }
290 
291 static int receive_mergeable(struct receive_queue *rq, struct sk_buff *skb)
292 {
293 	struct skb_vnet_hdr *hdr = skb_vnet_hdr(skb);
294 	struct page *page;
295 	int num_buf, i, len;
296 
297 	num_buf = hdr->mhdr.num_buffers;
298 	while (--num_buf) {
299 		i = skb_shinfo(skb)->nr_frags;
300 		if (i >= MAX_SKB_FRAGS) {
301 			pr_debug("%s: packet too long\n", skb->dev->name);
302 			skb->dev->stats.rx_length_errors++;
303 			return -EINVAL;
304 		}
305 		page = virtqueue_get_buf(rq->vq, &len);
306 		if (!page) {
307 			pr_debug("%s: rx error: %d buffers missing\n",
308 				 skb->dev->name, hdr->mhdr.num_buffers);
309 			skb->dev->stats.rx_length_errors++;
310 			return -EINVAL;
311 		}
312 
313 		if (len > PAGE_SIZE)
314 			len = PAGE_SIZE;
315 
316 		set_skb_frag(skb, page, 0, &len);
317 
318 		--rq->num;
319 	}
320 	return 0;
321 }
322 
323 static void receive_buf(struct receive_queue *rq, void *buf, unsigned int len)
324 {
325 	struct virtnet_info *vi = rq->vq->vdev->priv;
326 	struct net_device *dev = vi->dev;
327 	struct virtnet_stats *stats = this_cpu_ptr(vi->stats);
328 	struct sk_buff *skb;
329 	struct page *page;
330 	struct skb_vnet_hdr *hdr;
331 
332 	if (unlikely(len < sizeof(struct virtio_net_hdr) + ETH_HLEN)) {
333 		pr_debug("%s: short packet %i\n", dev->name, len);
334 		dev->stats.rx_length_errors++;
335 		if (vi->mergeable_rx_bufs || vi->big_packets)
336 			give_pages(rq, buf);
337 		else
338 			dev_kfree_skb(buf);
339 		return;
340 	}
341 
342 	if (!vi->mergeable_rx_bufs && !vi->big_packets) {
343 		skb = buf;
344 		len -= sizeof(struct virtio_net_hdr);
345 		skb_trim(skb, len);
346 	} else {
347 		page = buf;
348 		skb = page_to_skb(rq, page, len);
349 		if (unlikely(!skb)) {
350 			dev->stats.rx_dropped++;
351 			give_pages(rq, page);
352 			return;
353 		}
354 		if (vi->mergeable_rx_bufs)
355 			if (receive_mergeable(rq, skb)) {
356 				dev_kfree_skb(skb);
357 				return;
358 			}
359 	}
360 
361 	hdr = skb_vnet_hdr(skb);
362 
363 	u64_stats_update_begin(&stats->rx_syncp);
364 	stats->rx_bytes += skb->len;
365 	stats->rx_packets++;
366 	u64_stats_update_end(&stats->rx_syncp);
367 
368 	if (hdr->hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
369 		pr_debug("Needs csum!\n");
370 		if (!skb_partial_csum_set(skb,
371 					  hdr->hdr.csum_start,
372 					  hdr->hdr.csum_offset))
373 			goto frame_err;
374 	} else if (hdr->hdr.flags & VIRTIO_NET_HDR_F_DATA_VALID) {
375 		skb->ip_summed = CHECKSUM_UNNECESSARY;
376 	}
377 
378 	skb->protocol = eth_type_trans(skb, dev);
379 	pr_debug("Receiving skb proto 0x%04x len %i type %i\n",
380 		 ntohs(skb->protocol), skb->len, skb->pkt_type);
381 
382 	if (hdr->hdr.gso_type != VIRTIO_NET_HDR_GSO_NONE) {
383 		pr_debug("GSO!\n");
384 		switch (hdr->hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
385 		case VIRTIO_NET_HDR_GSO_TCPV4:
386 			skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
387 			break;
388 		case VIRTIO_NET_HDR_GSO_UDP:
389 			skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
390 			break;
391 		case VIRTIO_NET_HDR_GSO_TCPV6:
392 			skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
393 			break;
394 		default:
395 			net_warn_ratelimited("%s: bad gso type %u.\n",
396 					     dev->name, hdr->hdr.gso_type);
397 			goto frame_err;
398 		}
399 
400 		if (hdr->hdr.gso_type & VIRTIO_NET_HDR_GSO_ECN)
401 			skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
402 
403 		skb_shinfo(skb)->gso_size = hdr->hdr.gso_size;
404 		if (skb_shinfo(skb)->gso_size == 0) {
405 			net_warn_ratelimited("%s: zero gso size.\n", dev->name);
406 			goto frame_err;
407 		}
408 
409 		/* Header must be checked, and gso_segs computed. */
410 		skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
411 		skb_shinfo(skb)->gso_segs = 0;
412 	}
413 
414 	netif_receive_skb(skb);
415 	return;
416 
417 frame_err:
418 	dev->stats.rx_frame_errors++;
419 	dev_kfree_skb(skb);
420 }
421 
422 static int add_recvbuf_small(struct receive_queue *rq, gfp_t gfp)
423 {
424 	struct virtnet_info *vi = rq->vq->vdev->priv;
425 	struct sk_buff *skb;
426 	struct skb_vnet_hdr *hdr;
427 	int err;
428 
429 	skb = __netdev_alloc_skb_ip_align(vi->dev, MAX_PACKET_LEN, gfp);
430 	if (unlikely(!skb))
431 		return -ENOMEM;
432 
433 	skb_put(skb, MAX_PACKET_LEN);
434 
435 	hdr = skb_vnet_hdr(skb);
436 	sg_set_buf(rq->sg, &hdr->hdr, sizeof hdr->hdr);
437 
438 	skb_to_sgvec(skb, rq->sg + 1, 0, skb->len);
439 
440 	err = virtqueue_add_buf(rq->vq, rq->sg, 0, 2, skb, gfp);
441 	if (err < 0)
442 		dev_kfree_skb(skb);
443 
444 	return err;
445 }
446 
447 static int add_recvbuf_big(struct receive_queue *rq, gfp_t gfp)
448 {
449 	struct page *first, *list = NULL;
450 	char *p;
451 	int i, err, offset;
452 
453 	/* page in rq->sg[MAX_SKB_FRAGS + 1] is list tail */
454 	for (i = MAX_SKB_FRAGS + 1; i > 1; --i) {
455 		first = get_a_page(rq, gfp);
456 		if (!first) {
457 			if (list)
458 				give_pages(rq, list);
459 			return -ENOMEM;
460 		}
461 		sg_set_buf(&rq->sg[i], page_address(first), PAGE_SIZE);
462 
463 		/* chain new page in list head to match sg */
464 		first->private = (unsigned long)list;
465 		list = first;
466 	}
467 
468 	first = get_a_page(rq, gfp);
469 	if (!first) {
470 		give_pages(rq, list);
471 		return -ENOMEM;
472 	}
473 	p = page_address(first);
474 
475 	/* rq->sg[0], rq->sg[1] share the same page */
476 	/* a separated rq->sg[0] for virtio_net_hdr only due to QEMU bug */
477 	sg_set_buf(&rq->sg[0], p, sizeof(struct virtio_net_hdr));
478 
479 	/* rq->sg[1] for data packet, from offset */
480 	offset = sizeof(struct padded_vnet_hdr);
481 	sg_set_buf(&rq->sg[1], p + offset, PAGE_SIZE - offset);
482 
483 	/* chain first in list head */
484 	first->private = (unsigned long)list;
485 	err = virtqueue_add_buf(rq->vq, rq->sg, 0, MAX_SKB_FRAGS + 2,
486 				first, gfp);
487 	if (err < 0)
488 		give_pages(rq, first);
489 
490 	return err;
491 }
492 
493 static int add_recvbuf_mergeable(struct receive_queue *rq, gfp_t gfp)
494 {
495 	struct page *page;
496 	int err;
497 
498 	page = get_a_page(rq, gfp);
499 	if (!page)
500 		return -ENOMEM;
501 
502 	sg_init_one(rq->sg, page_address(page), PAGE_SIZE);
503 
504 	err = virtqueue_add_buf(rq->vq, rq->sg, 0, 1, page, gfp);
505 	if (err < 0)
506 		give_pages(rq, page);
507 
508 	return err;
509 }
510 
511 /*
512  * Returns false if we couldn't fill entirely (OOM).
513  *
514  * Normally run in the receive path, but can also be run from ndo_open
515  * before we're receiving packets, or from refill_work which is
516  * careful to disable receiving (using napi_disable).
517  */
518 static bool try_fill_recv(struct receive_queue *rq, gfp_t gfp)
519 {
520 	struct virtnet_info *vi = rq->vq->vdev->priv;
521 	int err;
522 	bool oom;
523 
524 	do {
525 		if (vi->mergeable_rx_bufs)
526 			err = add_recvbuf_mergeable(rq, gfp);
527 		else if (vi->big_packets)
528 			err = add_recvbuf_big(rq, gfp);
529 		else
530 			err = add_recvbuf_small(rq, gfp);
531 
532 		oom = err == -ENOMEM;
533 		if (err < 0)
534 			break;
535 		++rq->num;
536 	} while (err > 0);
537 	if (unlikely(rq->num > rq->max))
538 		rq->max = rq->num;
539 	virtqueue_kick(rq->vq);
540 	return !oom;
541 }
542 
543 static void skb_recv_done(struct virtqueue *rvq)
544 {
545 	struct virtnet_info *vi = rvq->vdev->priv;
546 	struct receive_queue *rq = &vi->rq[vq2rxq(rvq)];
547 
548 	/* Schedule NAPI, Suppress further interrupts if successful. */
549 	if (napi_schedule_prep(&rq->napi)) {
550 		virtqueue_disable_cb(rvq);
551 		__napi_schedule(&rq->napi);
552 	}
553 }
554 
555 static void virtnet_napi_enable(struct receive_queue *rq)
556 {
557 	napi_enable(&rq->napi);
558 
559 	/* If all buffers were filled by other side before we napi_enabled, we
560 	 * won't get another interrupt, so process any outstanding packets
561 	 * now.  virtnet_poll wants re-enable the queue, so we disable here.
562 	 * We synchronize against interrupts via NAPI_STATE_SCHED */
563 	if (napi_schedule_prep(&rq->napi)) {
564 		virtqueue_disable_cb(rq->vq);
565 		local_bh_disable();
566 		__napi_schedule(&rq->napi);
567 		local_bh_enable();
568 	}
569 }
570 
571 static void refill_work(struct work_struct *work)
572 {
573 	struct virtnet_info *vi =
574 		container_of(work, struct virtnet_info, refill.work);
575 	bool still_empty;
576 	int i;
577 
578 	for (i = 0; i < vi->max_queue_pairs; i++) {
579 		struct receive_queue *rq = &vi->rq[i];
580 
581 		napi_disable(&rq->napi);
582 		still_empty = !try_fill_recv(rq, GFP_KERNEL);
583 		virtnet_napi_enable(rq);
584 
585 		/* In theory, this can happen: if we don't get any buffers in
586 		 * we will *never* try to fill again.
587 		 */
588 		if (still_empty)
589 			schedule_delayed_work(&vi->refill, HZ/2);
590 	}
591 }
592 
593 static int virtnet_poll(struct napi_struct *napi, int budget)
594 {
595 	struct receive_queue *rq =
596 		container_of(napi, struct receive_queue, napi);
597 	struct virtnet_info *vi = rq->vq->vdev->priv;
598 	void *buf;
599 	unsigned int len, received = 0;
600 
601 again:
602 	while (received < budget &&
603 	       (buf = virtqueue_get_buf(rq->vq, &len)) != NULL) {
604 		receive_buf(rq, buf, len);
605 		--rq->num;
606 		received++;
607 	}
608 
609 	if (rq->num < rq->max / 2) {
610 		if (!try_fill_recv(rq, GFP_ATOMIC))
611 			schedule_delayed_work(&vi->refill, 0);
612 	}
613 
614 	/* Out of packets? */
615 	if (received < budget) {
616 		napi_complete(napi);
617 		if (unlikely(!virtqueue_enable_cb(rq->vq)) &&
618 		    napi_schedule_prep(napi)) {
619 			virtqueue_disable_cb(rq->vq);
620 			__napi_schedule(napi);
621 			goto again;
622 		}
623 	}
624 
625 	return received;
626 }
627 
628 static int virtnet_open(struct net_device *dev)
629 {
630 	struct virtnet_info *vi = netdev_priv(dev);
631 	int i;
632 
633 	for (i = 0; i < vi->max_queue_pairs; i++) {
634 		/* Make sure we have some buffers: if oom use wq. */
635 		if (!try_fill_recv(&vi->rq[i], GFP_KERNEL))
636 			schedule_delayed_work(&vi->refill, 0);
637 		virtnet_napi_enable(&vi->rq[i]);
638 	}
639 
640 	return 0;
641 }
642 
643 static unsigned int free_old_xmit_skbs(struct send_queue *sq)
644 {
645 	struct sk_buff *skb;
646 	unsigned int len, tot_sgs = 0;
647 	struct virtnet_info *vi = sq->vq->vdev->priv;
648 	struct virtnet_stats *stats = this_cpu_ptr(vi->stats);
649 
650 	while ((skb = virtqueue_get_buf(sq->vq, &len)) != NULL) {
651 		pr_debug("Sent skb %p\n", skb);
652 
653 		u64_stats_update_begin(&stats->tx_syncp);
654 		stats->tx_bytes += skb->len;
655 		stats->tx_packets++;
656 		u64_stats_update_end(&stats->tx_syncp);
657 
658 		tot_sgs += skb_vnet_hdr(skb)->num_sg;
659 		dev_kfree_skb_any(skb);
660 	}
661 	return tot_sgs;
662 }
663 
664 static int xmit_skb(struct send_queue *sq, struct sk_buff *skb)
665 {
666 	struct skb_vnet_hdr *hdr = skb_vnet_hdr(skb);
667 	const unsigned char *dest = ((struct ethhdr *)skb->data)->h_dest;
668 	struct virtnet_info *vi = sq->vq->vdev->priv;
669 
670 	pr_debug("%s: xmit %p %pM\n", vi->dev->name, skb, dest);
671 
672 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
673 		hdr->hdr.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
674 		hdr->hdr.csum_start = skb_checksum_start_offset(skb);
675 		hdr->hdr.csum_offset = skb->csum_offset;
676 	} else {
677 		hdr->hdr.flags = 0;
678 		hdr->hdr.csum_offset = hdr->hdr.csum_start = 0;
679 	}
680 
681 	if (skb_is_gso(skb)) {
682 		hdr->hdr.hdr_len = skb_headlen(skb);
683 		hdr->hdr.gso_size = skb_shinfo(skb)->gso_size;
684 		if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4)
685 			hdr->hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
686 		else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6)
687 			hdr->hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
688 		else if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
689 			hdr->hdr.gso_type = VIRTIO_NET_HDR_GSO_UDP;
690 		else
691 			BUG();
692 		if (skb_shinfo(skb)->gso_type & SKB_GSO_TCP_ECN)
693 			hdr->hdr.gso_type |= VIRTIO_NET_HDR_GSO_ECN;
694 	} else {
695 		hdr->hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE;
696 		hdr->hdr.gso_size = hdr->hdr.hdr_len = 0;
697 	}
698 
699 	hdr->mhdr.num_buffers = 0;
700 
701 	/* Encode metadata header at front. */
702 	if (vi->mergeable_rx_bufs)
703 		sg_set_buf(sq->sg, &hdr->mhdr, sizeof hdr->mhdr);
704 	else
705 		sg_set_buf(sq->sg, &hdr->hdr, sizeof hdr->hdr);
706 
707 	hdr->num_sg = skb_to_sgvec(skb, sq->sg + 1, 0, skb->len) + 1;
708 	return virtqueue_add_buf(sq->vq, sq->sg, hdr->num_sg,
709 				 0, skb, GFP_ATOMIC);
710 }
711 
712 static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
713 {
714 	struct virtnet_info *vi = netdev_priv(dev);
715 	int qnum = skb_get_queue_mapping(skb);
716 	struct send_queue *sq = &vi->sq[qnum];
717 	int capacity;
718 
719 	/* Free up any pending old buffers before queueing new ones. */
720 	free_old_xmit_skbs(sq);
721 
722 	/* Try to transmit */
723 	capacity = xmit_skb(sq, skb);
724 
725 	/* This can happen with OOM and indirect buffers. */
726 	if (unlikely(capacity < 0)) {
727 		if (likely(capacity == -ENOMEM)) {
728 			if (net_ratelimit())
729 				dev_warn(&dev->dev,
730 					 "TXQ (%d) failure: out of memory\n",
731 					 qnum);
732 		} else {
733 			dev->stats.tx_fifo_errors++;
734 			if (net_ratelimit())
735 				dev_warn(&dev->dev,
736 					 "Unexpected TXQ (%d) failure: %d\n",
737 					 qnum, capacity);
738 		}
739 		dev->stats.tx_dropped++;
740 		kfree_skb(skb);
741 		return NETDEV_TX_OK;
742 	}
743 	virtqueue_kick(sq->vq);
744 
745 	/* Don't wait up for transmitted skbs to be freed. */
746 	skb_orphan(skb);
747 	nf_reset(skb);
748 
749 	/* Apparently nice girls don't return TX_BUSY; stop the queue
750 	 * before it gets out of hand.  Naturally, this wastes entries. */
751 	if (capacity < 2+MAX_SKB_FRAGS) {
752 		netif_stop_subqueue(dev, qnum);
753 		if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) {
754 			/* More just got used, free them then recheck. */
755 			capacity += free_old_xmit_skbs(sq);
756 			if (capacity >= 2+MAX_SKB_FRAGS) {
757 				netif_start_subqueue(dev, qnum);
758 				virtqueue_disable_cb(sq->vq);
759 			}
760 		}
761 	}
762 
763 	return NETDEV_TX_OK;
764 }
765 
766 static int virtnet_set_mac_address(struct net_device *dev, void *p)
767 {
768 	struct virtnet_info *vi = netdev_priv(dev);
769 	struct virtio_device *vdev = vi->vdev;
770 	int ret;
771 
772 	ret = eth_mac_addr(dev, p);
773 	if (ret)
774 		return ret;
775 
776 	if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC))
777 		vdev->config->set(vdev, offsetof(struct virtio_net_config, mac),
778 		                  dev->dev_addr, dev->addr_len);
779 
780 	return 0;
781 }
782 
783 static struct rtnl_link_stats64 *virtnet_stats(struct net_device *dev,
784 					       struct rtnl_link_stats64 *tot)
785 {
786 	struct virtnet_info *vi = netdev_priv(dev);
787 	int cpu;
788 	unsigned int start;
789 
790 	for_each_possible_cpu(cpu) {
791 		struct virtnet_stats *stats = per_cpu_ptr(vi->stats, cpu);
792 		u64 tpackets, tbytes, rpackets, rbytes;
793 
794 		do {
795 			start = u64_stats_fetch_begin_bh(&stats->tx_syncp);
796 			tpackets = stats->tx_packets;
797 			tbytes   = stats->tx_bytes;
798 		} while (u64_stats_fetch_retry_bh(&stats->tx_syncp, start));
799 
800 		do {
801 			start = u64_stats_fetch_begin_bh(&stats->rx_syncp);
802 			rpackets = stats->rx_packets;
803 			rbytes   = stats->rx_bytes;
804 		} while (u64_stats_fetch_retry_bh(&stats->rx_syncp, start));
805 
806 		tot->rx_packets += rpackets;
807 		tot->tx_packets += tpackets;
808 		tot->rx_bytes   += rbytes;
809 		tot->tx_bytes   += tbytes;
810 	}
811 
812 	tot->tx_dropped = dev->stats.tx_dropped;
813 	tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
814 	tot->rx_dropped = dev->stats.rx_dropped;
815 	tot->rx_length_errors = dev->stats.rx_length_errors;
816 	tot->rx_frame_errors = dev->stats.rx_frame_errors;
817 
818 	return tot;
819 }
820 
821 #ifdef CONFIG_NET_POLL_CONTROLLER
822 static void virtnet_netpoll(struct net_device *dev)
823 {
824 	struct virtnet_info *vi = netdev_priv(dev);
825 	int i;
826 
827 	for (i = 0; i < vi->curr_queue_pairs; i++)
828 		napi_schedule(&vi->rq[i].napi);
829 }
830 #endif
831 
832 /*
833  * Send command via the control virtqueue and check status.  Commands
834  * supported by the hypervisor, as indicated by feature bits, should
835  * never fail unless improperly formated.
836  */
837 static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
838 				 struct scatterlist *data, int out, int in)
839 {
840 	struct scatterlist *s, sg[VIRTNET_SEND_COMMAND_SG_MAX + 2];
841 	struct virtio_net_ctrl_hdr ctrl;
842 	virtio_net_ctrl_ack status = ~0;
843 	unsigned int tmp;
844 	int i;
845 
846 	/* Caller should know better */
847 	BUG_ON(!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ) ||
848 		(out + in > VIRTNET_SEND_COMMAND_SG_MAX));
849 
850 	out++; /* Add header */
851 	in++; /* Add return status */
852 
853 	ctrl.class = class;
854 	ctrl.cmd = cmd;
855 
856 	sg_init_table(sg, out + in);
857 
858 	sg_set_buf(&sg[0], &ctrl, sizeof(ctrl));
859 	for_each_sg(data, s, out + in - 2, i)
860 		sg_set_buf(&sg[i + 1], sg_virt(s), s->length);
861 	sg_set_buf(&sg[out + in - 1], &status, sizeof(status));
862 
863 	BUG_ON(virtqueue_add_buf(vi->cvq, sg, out, in, vi, GFP_ATOMIC) < 0);
864 
865 	virtqueue_kick(vi->cvq);
866 
867 	/*
868 	 * Spin for a response, the kick causes an ioport write, trapping
869 	 * into the hypervisor, so the request should be handled immediately.
870 	 */
871 	while (!virtqueue_get_buf(vi->cvq, &tmp))
872 		cpu_relax();
873 
874 	return status == VIRTIO_NET_OK;
875 }
876 
877 static void virtnet_ack_link_announce(struct virtnet_info *vi)
878 {
879 	rtnl_lock();
880 	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_ANNOUNCE,
881 				  VIRTIO_NET_CTRL_ANNOUNCE_ACK, NULL,
882 				  0, 0))
883 		dev_warn(&vi->dev->dev, "Failed to ack link announce.\n");
884 	rtnl_unlock();
885 }
886 
887 static int virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs)
888 {
889 	struct scatterlist sg;
890 	struct virtio_net_ctrl_mq s;
891 	struct net_device *dev = vi->dev;
892 
893 	if (!vi->has_cvq || !virtio_has_feature(vi->vdev, VIRTIO_NET_F_MQ))
894 		return 0;
895 
896 	s.virtqueue_pairs = queue_pairs;
897 	sg_init_one(&sg, &s, sizeof(s));
898 
899 	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MQ,
900 				  VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET, &sg, 1, 0)){
901 		dev_warn(&dev->dev, "Fail to set num of queue pairs to %d\n",
902 			 queue_pairs);
903 		return -EINVAL;
904 	} else
905 		vi->curr_queue_pairs = queue_pairs;
906 
907 	return 0;
908 }
909 
910 static int virtnet_close(struct net_device *dev)
911 {
912 	struct virtnet_info *vi = netdev_priv(dev);
913 	int i;
914 
915 	/* Make sure refill_work doesn't re-enable napi! */
916 	cancel_delayed_work_sync(&vi->refill);
917 
918 	for (i = 0; i < vi->max_queue_pairs; i++)
919 		napi_disable(&vi->rq[i].napi);
920 
921 	return 0;
922 }
923 
924 static void virtnet_set_rx_mode(struct net_device *dev)
925 {
926 	struct virtnet_info *vi = netdev_priv(dev);
927 	struct scatterlist sg[2];
928 	u8 promisc, allmulti;
929 	struct virtio_net_ctrl_mac *mac_data;
930 	struct netdev_hw_addr *ha;
931 	int uc_count;
932 	int mc_count;
933 	void *buf;
934 	int i;
935 
936 	/* We can't dynamicaly set ndo_set_rx_mode, so return gracefully */
937 	if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_RX))
938 		return;
939 
940 	promisc = ((dev->flags & IFF_PROMISC) != 0);
941 	allmulti = ((dev->flags & IFF_ALLMULTI) != 0);
942 
943 	sg_init_one(sg, &promisc, sizeof(promisc));
944 
945 	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX,
946 				  VIRTIO_NET_CTRL_RX_PROMISC,
947 				  sg, 1, 0))
948 		dev_warn(&dev->dev, "Failed to %sable promisc mode.\n",
949 			 promisc ? "en" : "dis");
950 
951 	sg_init_one(sg, &allmulti, sizeof(allmulti));
952 
953 	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX,
954 				  VIRTIO_NET_CTRL_RX_ALLMULTI,
955 				  sg, 1, 0))
956 		dev_warn(&dev->dev, "Failed to %sable allmulti mode.\n",
957 			 allmulti ? "en" : "dis");
958 
959 	uc_count = netdev_uc_count(dev);
960 	mc_count = netdev_mc_count(dev);
961 	/* MAC filter - use one buffer for both lists */
962 	buf = kzalloc(((uc_count + mc_count) * ETH_ALEN) +
963 		      (2 * sizeof(mac_data->entries)), GFP_ATOMIC);
964 	mac_data = buf;
965 	if (!buf) {
966 		dev_warn(&dev->dev, "No memory for MAC address buffer\n");
967 		return;
968 	}
969 
970 	sg_init_table(sg, 2);
971 
972 	/* Store the unicast list and count in the front of the buffer */
973 	mac_data->entries = uc_count;
974 	i = 0;
975 	netdev_for_each_uc_addr(ha, dev)
976 		memcpy(&mac_data->macs[i++][0], ha->addr, ETH_ALEN);
977 
978 	sg_set_buf(&sg[0], mac_data,
979 		   sizeof(mac_data->entries) + (uc_count * ETH_ALEN));
980 
981 	/* multicast list and count fill the end */
982 	mac_data = (void *)&mac_data->macs[uc_count][0];
983 
984 	mac_data->entries = mc_count;
985 	i = 0;
986 	netdev_for_each_mc_addr(ha, dev)
987 		memcpy(&mac_data->macs[i++][0], ha->addr, ETH_ALEN);
988 
989 	sg_set_buf(&sg[1], mac_data,
990 		   sizeof(mac_data->entries) + (mc_count * ETH_ALEN));
991 
992 	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC,
993 				  VIRTIO_NET_CTRL_MAC_TABLE_SET,
994 				  sg, 2, 0))
995 		dev_warn(&dev->dev, "Failed to set MAC fitler table.\n");
996 
997 	kfree(buf);
998 }
999 
1000 static int virtnet_vlan_rx_add_vid(struct net_device *dev, u16 vid)
1001 {
1002 	struct virtnet_info *vi = netdev_priv(dev);
1003 	struct scatterlist sg;
1004 
1005 	sg_init_one(&sg, &vid, sizeof(vid));
1006 
1007 	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN,
1008 				  VIRTIO_NET_CTRL_VLAN_ADD, &sg, 1, 0))
1009 		dev_warn(&dev->dev, "Failed to add VLAN ID %d.\n", vid);
1010 	return 0;
1011 }
1012 
1013 static int virtnet_vlan_rx_kill_vid(struct net_device *dev, u16 vid)
1014 {
1015 	struct virtnet_info *vi = netdev_priv(dev);
1016 	struct scatterlist sg;
1017 
1018 	sg_init_one(&sg, &vid, sizeof(vid));
1019 
1020 	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN,
1021 				  VIRTIO_NET_CTRL_VLAN_DEL, &sg, 1, 0))
1022 		dev_warn(&dev->dev, "Failed to kill VLAN ID %d.\n", vid);
1023 	return 0;
1024 }
1025 
1026 static void virtnet_set_affinity(struct virtnet_info *vi, bool set)
1027 {
1028 	int i;
1029 
1030 	/* In multiqueue mode, when the number of cpu is equal to the number of
1031 	 * queue pairs, we let the queue pairs to be private to one cpu by
1032 	 * setting the affinity hint to eliminate the contention.
1033 	 */
1034 	if ((vi->curr_queue_pairs == 1 ||
1035 	     vi->max_queue_pairs != num_online_cpus()) && set) {
1036 		if (vi->affinity_hint_set)
1037 			set = false;
1038 		else
1039 			return;
1040 	}
1041 
1042 	for (i = 0; i < vi->max_queue_pairs; i++) {
1043 		int cpu = set ? i : -1;
1044 		virtqueue_set_affinity(vi->rq[i].vq, cpu);
1045 		virtqueue_set_affinity(vi->sq[i].vq, cpu);
1046 	}
1047 
1048 	if (set)
1049 		vi->affinity_hint_set = true;
1050 	else
1051 		vi->affinity_hint_set = false;
1052 }
1053 
1054 static void virtnet_get_ringparam(struct net_device *dev,
1055 				struct ethtool_ringparam *ring)
1056 {
1057 	struct virtnet_info *vi = netdev_priv(dev);
1058 
1059 	ring->rx_max_pending = virtqueue_get_vring_size(vi->rq[0].vq);
1060 	ring->tx_max_pending = virtqueue_get_vring_size(vi->sq[0].vq);
1061 	ring->rx_pending = ring->rx_max_pending;
1062 	ring->tx_pending = ring->tx_max_pending;
1063 }
1064 
1065 
1066 static void virtnet_get_drvinfo(struct net_device *dev,
1067 				struct ethtool_drvinfo *info)
1068 {
1069 	struct virtnet_info *vi = netdev_priv(dev);
1070 	struct virtio_device *vdev = vi->vdev;
1071 
1072 	strlcpy(info->driver, KBUILD_MODNAME, sizeof(info->driver));
1073 	strlcpy(info->version, VIRTNET_DRIVER_VERSION, sizeof(info->version));
1074 	strlcpy(info->bus_info, virtio_bus_name(vdev), sizeof(info->bus_info));
1075 
1076 }
1077 
1078 /* TODO: Eliminate OOO packets during switching */
1079 static int virtnet_set_channels(struct net_device *dev,
1080 				struct ethtool_channels *channels)
1081 {
1082 	struct virtnet_info *vi = netdev_priv(dev);
1083 	u16 queue_pairs = channels->combined_count;
1084 	int err;
1085 
1086 	/* We don't support separate rx/tx channels.
1087 	 * We don't allow setting 'other' channels.
1088 	 */
1089 	if (channels->rx_count || channels->tx_count || channels->other_count)
1090 		return -EINVAL;
1091 
1092 	if (queue_pairs > vi->max_queue_pairs)
1093 		return -EINVAL;
1094 
1095 	err = virtnet_set_queues(vi, queue_pairs);
1096 	if (!err) {
1097 		netif_set_real_num_tx_queues(dev, queue_pairs);
1098 		netif_set_real_num_rx_queues(dev, queue_pairs);
1099 
1100 		virtnet_set_affinity(vi, true);
1101 	}
1102 
1103 	return err;
1104 }
1105 
1106 static void virtnet_get_channels(struct net_device *dev,
1107 				 struct ethtool_channels *channels)
1108 {
1109 	struct virtnet_info *vi = netdev_priv(dev);
1110 
1111 	channels->combined_count = vi->curr_queue_pairs;
1112 	channels->max_combined = vi->max_queue_pairs;
1113 	channels->max_other = 0;
1114 	channels->rx_count = 0;
1115 	channels->tx_count = 0;
1116 	channels->other_count = 0;
1117 }
1118 
1119 static const struct ethtool_ops virtnet_ethtool_ops = {
1120 	.get_drvinfo = virtnet_get_drvinfo,
1121 	.get_link = ethtool_op_get_link,
1122 	.get_ringparam = virtnet_get_ringparam,
1123 	.set_channels = virtnet_set_channels,
1124 	.get_channels = virtnet_get_channels,
1125 };
1126 
1127 #define MIN_MTU 68
1128 #define MAX_MTU 65535
1129 
1130 static int virtnet_change_mtu(struct net_device *dev, int new_mtu)
1131 {
1132 	if (new_mtu < MIN_MTU || new_mtu > MAX_MTU)
1133 		return -EINVAL;
1134 	dev->mtu = new_mtu;
1135 	return 0;
1136 }
1137 
1138 /* To avoid contending a lock hold by a vcpu who would exit to host, select the
1139  * txq based on the processor id.
1140  * TODO: handle cpu hotplug.
1141  */
1142 static u16 virtnet_select_queue(struct net_device *dev, struct sk_buff *skb)
1143 {
1144 	int txq = skb_rx_queue_recorded(skb) ? skb_get_rx_queue(skb) :
1145 		  smp_processor_id();
1146 
1147 	while (unlikely(txq >= dev->real_num_tx_queues))
1148 		txq -= dev->real_num_tx_queues;
1149 
1150 	return txq;
1151 }
1152 
1153 static const struct net_device_ops virtnet_netdev = {
1154 	.ndo_open            = virtnet_open,
1155 	.ndo_stop   	     = virtnet_close,
1156 	.ndo_start_xmit      = start_xmit,
1157 	.ndo_validate_addr   = eth_validate_addr,
1158 	.ndo_set_mac_address = virtnet_set_mac_address,
1159 	.ndo_set_rx_mode     = virtnet_set_rx_mode,
1160 	.ndo_change_mtu	     = virtnet_change_mtu,
1161 	.ndo_get_stats64     = virtnet_stats,
1162 	.ndo_vlan_rx_add_vid = virtnet_vlan_rx_add_vid,
1163 	.ndo_vlan_rx_kill_vid = virtnet_vlan_rx_kill_vid,
1164 	.ndo_select_queue     = virtnet_select_queue,
1165 #ifdef CONFIG_NET_POLL_CONTROLLER
1166 	.ndo_poll_controller = virtnet_netpoll,
1167 #endif
1168 };
1169 
1170 static void virtnet_config_changed_work(struct work_struct *work)
1171 {
1172 	struct virtnet_info *vi =
1173 		container_of(work, struct virtnet_info, config_work);
1174 	u16 v;
1175 
1176 	mutex_lock(&vi->config_lock);
1177 	if (!vi->config_enable)
1178 		goto done;
1179 
1180 	if (virtio_config_val(vi->vdev, VIRTIO_NET_F_STATUS,
1181 			      offsetof(struct virtio_net_config, status),
1182 			      &v) < 0)
1183 		goto done;
1184 
1185 	if (v & VIRTIO_NET_S_ANNOUNCE) {
1186 		netdev_notify_peers(vi->dev);
1187 		virtnet_ack_link_announce(vi);
1188 	}
1189 
1190 	/* Ignore unknown (future) status bits */
1191 	v &= VIRTIO_NET_S_LINK_UP;
1192 
1193 	if (vi->status == v)
1194 		goto done;
1195 
1196 	vi->status = v;
1197 
1198 	if (vi->status & VIRTIO_NET_S_LINK_UP) {
1199 		netif_carrier_on(vi->dev);
1200 		netif_tx_wake_all_queues(vi->dev);
1201 	} else {
1202 		netif_carrier_off(vi->dev);
1203 		netif_tx_stop_all_queues(vi->dev);
1204 	}
1205 done:
1206 	mutex_unlock(&vi->config_lock);
1207 }
1208 
1209 static void virtnet_config_changed(struct virtio_device *vdev)
1210 {
1211 	struct virtnet_info *vi = vdev->priv;
1212 
1213 	schedule_work(&vi->config_work);
1214 }
1215 
1216 static void virtnet_free_queues(struct virtnet_info *vi)
1217 {
1218 	kfree(vi->rq);
1219 	kfree(vi->sq);
1220 }
1221 
1222 static void free_receive_bufs(struct virtnet_info *vi)
1223 {
1224 	int i;
1225 
1226 	for (i = 0; i < vi->max_queue_pairs; i++) {
1227 		while (vi->rq[i].pages)
1228 			__free_pages(get_a_page(&vi->rq[i], GFP_KERNEL), 0);
1229 	}
1230 }
1231 
1232 static void free_unused_bufs(struct virtnet_info *vi)
1233 {
1234 	void *buf;
1235 	int i;
1236 
1237 	for (i = 0; i < vi->max_queue_pairs; i++) {
1238 		struct virtqueue *vq = vi->sq[i].vq;
1239 		while ((buf = virtqueue_detach_unused_buf(vq)) != NULL)
1240 			dev_kfree_skb(buf);
1241 	}
1242 
1243 	for (i = 0; i < vi->max_queue_pairs; i++) {
1244 		struct virtqueue *vq = vi->rq[i].vq;
1245 
1246 		while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) {
1247 			if (vi->mergeable_rx_bufs || vi->big_packets)
1248 				give_pages(&vi->rq[i], buf);
1249 			else
1250 				dev_kfree_skb(buf);
1251 			--vi->rq[i].num;
1252 		}
1253 		BUG_ON(vi->rq[i].num != 0);
1254 	}
1255 }
1256 
1257 static void virtnet_del_vqs(struct virtnet_info *vi)
1258 {
1259 	struct virtio_device *vdev = vi->vdev;
1260 
1261 	virtnet_set_affinity(vi, false);
1262 
1263 	vdev->config->del_vqs(vdev);
1264 
1265 	virtnet_free_queues(vi);
1266 }
1267 
1268 static int virtnet_find_vqs(struct virtnet_info *vi)
1269 {
1270 	vq_callback_t **callbacks;
1271 	struct virtqueue **vqs;
1272 	int ret = -ENOMEM;
1273 	int i, total_vqs;
1274 	const char **names;
1275 
1276 	/* We expect 1 RX virtqueue followed by 1 TX virtqueue, followed by
1277 	 * possible N-1 RX/TX queue pairs used in multiqueue mode, followed by
1278 	 * possible control vq.
1279 	 */
1280 	total_vqs = vi->max_queue_pairs * 2 +
1281 		    virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ);
1282 
1283 	/* Allocate space for find_vqs parameters */
1284 	vqs = kzalloc(total_vqs * sizeof(*vqs), GFP_KERNEL);
1285 	if (!vqs)
1286 		goto err_vq;
1287 	callbacks = kmalloc(total_vqs * sizeof(*callbacks), GFP_KERNEL);
1288 	if (!callbacks)
1289 		goto err_callback;
1290 	names = kmalloc(total_vqs * sizeof(*names), GFP_KERNEL);
1291 	if (!names)
1292 		goto err_names;
1293 
1294 	/* Parameters for control virtqueue, if any */
1295 	if (vi->has_cvq) {
1296 		callbacks[total_vqs - 1] = NULL;
1297 		names[total_vqs - 1] = "control";
1298 	}
1299 
1300 	/* Allocate/initialize parameters for send/receive virtqueues */
1301 	for (i = 0; i < vi->max_queue_pairs; i++) {
1302 		callbacks[rxq2vq(i)] = skb_recv_done;
1303 		callbacks[txq2vq(i)] = skb_xmit_done;
1304 		sprintf(vi->rq[i].name, "input.%d", i);
1305 		sprintf(vi->sq[i].name, "output.%d", i);
1306 		names[rxq2vq(i)] = vi->rq[i].name;
1307 		names[txq2vq(i)] = vi->sq[i].name;
1308 	}
1309 
1310 	ret = vi->vdev->config->find_vqs(vi->vdev, total_vqs, vqs, callbacks,
1311 					 names);
1312 	if (ret)
1313 		goto err_find;
1314 
1315 	if (vi->has_cvq) {
1316 		vi->cvq = vqs[total_vqs - 1];
1317 		if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VLAN))
1318 			vi->dev->features |= NETIF_F_HW_VLAN_FILTER;
1319 	}
1320 
1321 	for (i = 0; i < vi->max_queue_pairs; i++) {
1322 		vi->rq[i].vq = vqs[rxq2vq(i)];
1323 		vi->sq[i].vq = vqs[txq2vq(i)];
1324 	}
1325 
1326 	kfree(names);
1327 	kfree(callbacks);
1328 	kfree(vqs);
1329 
1330 	return 0;
1331 
1332 err_find:
1333 	kfree(names);
1334 err_names:
1335 	kfree(callbacks);
1336 err_callback:
1337 	kfree(vqs);
1338 err_vq:
1339 	return ret;
1340 }
1341 
1342 static int virtnet_alloc_queues(struct virtnet_info *vi)
1343 {
1344 	int i;
1345 
1346 	vi->sq = kzalloc(sizeof(*vi->sq) * vi->max_queue_pairs, GFP_KERNEL);
1347 	if (!vi->sq)
1348 		goto err_sq;
1349 	vi->rq = kzalloc(sizeof(*vi->rq) * vi->max_queue_pairs, GFP_KERNEL);
1350 	if (!vi->rq)
1351 		goto err_rq;
1352 
1353 	INIT_DELAYED_WORK(&vi->refill, refill_work);
1354 	for (i = 0; i < vi->max_queue_pairs; i++) {
1355 		vi->rq[i].pages = NULL;
1356 		netif_napi_add(vi->dev, &vi->rq[i].napi, virtnet_poll,
1357 			       napi_weight);
1358 
1359 		sg_init_table(vi->rq[i].sg, ARRAY_SIZE(vi->rq[i].sg));
1360 		sg_init_table(vi->sq[i].sg, ARRAY_SIZE(vi->sq[i].sg));
1361 	}
1362 
1363 	return 0;
1364 
1365 err_rq:
1366 	kfree(vi->sq);
1367 err_sq:
1368 	return -ENOMEM;
1369 }
1370 
1371 static int init_vqs(struct virtnet_info *vi)
1372 {
1373 	int ret;
1374 
1375 	/* Allocate send & receive queues */
1376 	ret = virtnet_alloc_queues(vi);
1377 	if (ret)
1378 		goto err;
1379 
1380 	ret = virtnet_find_vqs(vi);
1381 	if (ret)
1382 		goto err_free;
1383 
1384 	virtnet_set_affinity(vi, true);
1385 	return 0;
1386 
1387 err_free:
1388 	virtnet_free_queues(vi);
1389 err:
1390 	return ret;
1391 }
1392 
1393 static int virtnet_probe(struct virtio_device *vdev)
1394 {
1395 	int i, err;
1396 	struct net_device *dev;
1397 	struct virtnet_info *vi;
1398 	u16 max_queue_pairs;
1399 
1400 	/* Find if host supports multiqueue virtio_net device */
1401 	err = virtio_config_val(vdev, VIRTIO_NET_F_MQ,
1402 				offsetof(struct virtio_net_config,
1403 				max_virtqueue_pairs), &max_queue_pairs);
1404 
1405 	/* We need at least 2 queue's */
1406 	if (err || max_queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
1407 	    max_queue_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
1408 	    !virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ))
1409 		max_queue_pairs = 1;
1410 
1411 	/* Allocate ourselves a network device with room for our info */
1412 	dev = alloc_etherdev_mq(sizeof(struct virtnet_info), max_queue_pairs);
1413 	if (!dev)
1414 		return -ENOMEM;
1415 
1416 	/* Set up network device as normal. */
1417 	dev->priv_flags |= IFF_UNICAST_FLT | IFF_LIVE_ADDR_CHANGE;
1418 	dev->netdev_ops = &virtnet_netdev;
1419 	dev->features = NETIF_F_HIGHDMA;
1420 
1421 	SET_ETHTOOL_OPS(dev, &virtnet_ethtool_ops);
1422 	SET_NETDEV_DEV(dev, &vdev->dev);
1423 
1424 	/* Do we support "hardware" checksums? */
1425 	if (virtio_has_feature(vdev, VIRTIO_NET_F_CSUM)) {
1426 		/* This opens up the world of extra features. */
1427 		dev->hw_features |= NETIF_F_HW_CSUM|NETIF_F_SG|NETIF_F_FRAGLIST;
1428 		if (csum)
1429 			dev->features |= NETIF_F_HW_CSUM|NETIF_F_SG|NETIF_F_FRAGLIST;
1430 
1431 		if (virtio_has_feature(vdev, VIRTIO_NET_F_GSO)) {
1432 			dev->hw_features |= NETIF_F_TSO | NETIF_F_UFO
1433 				| NETIF_F_TSO_ECN | NETIF_F_TSO6;
1434 		}
1435 		/* Individual feature bits: what can host handle? */
1436 		if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_TSO4))
1437 			dev->hw_features |= NETIF_F_TSO;
1438 		if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_TSO6))
1439 			dev->hw_features |= NETIF_F_TSO6;
1440 		if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_ECN))
1441 			dev->hw_features |= NETIF_F_TSO_ECN;
1442 		if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_UFO))
1443 			dev->hw_features |= NETIF_F_UFO;
1444 
1445 		if (gso)
1446 			dev->features |= dev->hw_features & (NETIF_F_ALL_TSO|NETIF_F_UFO);
1447 		/* (!csum && gso) case will be fixed by register_netdev() */
1448 	}
1449 
1450 	/* Configuration may specify what MAC to use.  Otherwise random. */
1451 	if (virtio_config_val_len(vdev, VIRTIO_NET_F_MAC,
1452 				  offsetof(struct virtio_net_config, mac),
1453 				  dev->dev_addr, dev->addr_len) < 0)
1454 		eth_hw_addr_random(dev);
1455 
1456 	/* Set up our device-specific information */
1457 	vi = netdev_priv(dev);
1458 	vi->dev = dev;
1459 	vi->vdev = vdev;
1460 	vdev->priv = vi;
1461 	vi->stats = alloc_percpu(struct virtnet_stats);
1462 	err = -ENOMEM;
1463 	if (vi->stats == NULL)
1464 		goto free;
1465 
1466 	mutex_init(&vi->config_lock);
1467 	vi->config_enable = true;
1468 	INIT_WORK(&vi->config_work, virtnet_config_changed_work);
1469 
1470 	/* If we can receive ANY GSO packets, we must allocate large ones. */
1471 	if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) ||
1472 	    virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO6) ||
1473 	    virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_ECN))
1474 		vi->big_packets = true;
1475 
1476 	if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF))
1477 		vi->mergeable_rx_bufs = true;
1478 
1479 	if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ))
1480 		vi->has_cvq = true;
1481 
1482 	/* Use single tx/rx queue pair as default */
1483 	vi->curr_queue_pairs = 1;
1484 	vi->max_queue_pairs = max_queue_pairs;
1485 
1486 	/* Allocate/initialize the rx/tx queues, and invoke find_vqs */
1487 	err = init_vqs(vi);
1488 	if (err)
1489 		goto free_stats;
1490 
1491 	netif_set_real_num_tx_queues(dev, 1);
1492 	netif_set_real_num_rx_queues(dev, 1);
1493 
1494 	err = register_netdev(dev);
1495 	if (err) {
1496 		pr_debug("virtio_net: registering device failed\n");
1497 		goto free_vqs;
1498 	}
1499 
1500 	/* Last of all, set up some receive buffers. */
1501 	for (i = 0; i < vi->max_queue_pairs; i++) {
1502 		try_fill_recv(&vi->rq[i], GFP_KERNEL);
1503 
1504 		/* If we didn't even get one input buffer, we're useless. */
1505 		if (vi->rq[i].num == 0) {
1506 			free_unused_bufs(vi);
1507 			err = -ENOMEM;
1508 			goto free_recv_bufs;
1509 		}
1510 	}
1511 
1512 	/* Assume link up if device can't report link status,
1513 	   otherwise get link status from config. */
1514 	if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STATUS)) {
1515 		netif_carrier_off(dev);
1516 		schedule_work(&vi->config_work);
1517 	} else {
1518 		vi->status = VIRTIO_NET_S_LINK_UP;
1519 		netif_carrier_on(dev);
1520 	}
1521 
1522 	pr_debug("virtnet: registered device %s with %d RX and TX vq's\n",
1523 		 dev->name, max_queue_pairs);
1524 
1525 	return 0;
1526 
1527 free_recv_bufs:
1528 	free_receive_bufs(vi);
1529 	unregister_netdev(dev);
1530 free_vqs:
1531 	cancel_delayed_work_sync(&vi->refill);
1532 	virtnet_del_vqs(vi);
1533 free_stats:
1534 	free_percpu(vi->stats);
1535 free:
1536 	free_netdev(dev);
1537 	return err;
1538 }
1539 
1540 static void remove_vq_common(struct virtnet_info *vi)
1541 {
1542 	vi->vdev->config->reset(vi->vdev);
1543 
1544 	/* Free unused buffers in both send and recv, if any. */
1545 	free_unused_bufs(vi);
1546 
1547 	free_receive_bufs(vi);
1548 
1549 	virtnet_del_vqs(vi);
1550 }
1551 
1552 static void virtnet_remove(struct virtio_device *vdev)
1553 {
1554 	struct virtnet_info *vi = vdev->priv;
1555 
1556 	/* Prevent config work handler from accessing the device. */
1557 	mutex_lock(&vi->config_lock);
1558 	vi->config_enable = false;
1559 	mutex_unlock(&vi->config_lock);
1560 
1561 	unregister_netdev(vi->dev);
1562 
1563 	remove_vq_common(vi);
1564 
1565 	flush_work(&vi->config_work);
1566 
1567 	free_percpu(vi->stats);
1568 	free_netdev(vi->dev);
1569 }
1570 
1571 #ifdef CONFIG_PM
1572 static int virtnet_freeze(struct virtio_device *vdev)
1573 {
1574 	struct virtnet_info *vi = vdev->priv;
1575 	int i;
1576 
1577 	/* Prevent config work handler from accessing the device */
1578 	mutex_lock(&vi->config_lock);
1579 	vi->config_enable = false;
1580 	mutex_unlock(&vi->config_lock);
1581 
1582 	netif_device_detach(vi->dev);
1583 	cancel_delayed_work_sync(&vi->refill);
1584 
1585 	if (netif_running(vi->dev))
1586 		for (i = 0; i < vi->max_queue_pairs; i++) {
1587 			napi_disable(&vi->rq[i].napi);
1588 			netif_napi_del(&vi->rq[i].napi);
1589 		}
1590 
1591 	remove_vq_common(vi);
1592 
1593 	flush_work(&vi->config_work);
1594 
1595 	return 0;
1596 }
1597 
1598 static int virtnet_restore(struct virtio_device *vdev)
1599 {
1600 	struct virtnet_info *vi = vdev->priv;
1601 	int err, i;
1602 
1603 	err = init_vqs(vi);
1604 	if (err)
1605 		return err;
1606 
1607 	if (netif_running(vi->dev))
1608 		for (i = 0; i < vi->max_queue_pairs; i++)
1609 			virtnet_napi_enable(&vi->rq[i]);
1610 
1611 	netif_device_attach(vi->dev);
1612 
1613 	for (i = 0; i < vi->max_queue_pairs; i++)
1614 		if (!try_fill_recv(&vi->rq[i], GFP_KERNEL))
1615 			schedule_delayed_work(&vi->refill, 0);
1616 
1617 	mutex_lock(&vi->config_lock);
1618 	vi->config_enable = true;
1619 	mutex_unlock(&vi->config_lock);
1620 
1621 	virtnet_set_queues(vi, vi->curr_queue_pairs);
1622 
1623 	return 0;
1624 }
1625 #endif
1626 
1627 static struct virtio_device_id id_table[] = {
1628 	{ VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID },
1629 	{ 0 },
1630 };
1631 
1632 static unsigned int features[] = {
1633 	VIRTIO_NET_F_CSUM, VIRTIO_NET_F_GUEST_CSUM,
1634 	VIRTIO_NET_F_GSO, VIRTIO_NET_F_MAC,
1635 	VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_HOST_TSO6,
1636 	VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6,
1637 	VIRTIO_NET_F_GUEST_ECN, VIRTIO_NET_F_GUEST_UFO,
1638 	VIRTIO_NET_F_MRG_RXBUF, VIRTIO_NET_F_STATUS, VIRTIO_NET_F_CTRL_VQ,
1639 	VIRTIO_NET_F_CTRL_RX, VIRTIO_NET_F_CTRL_VLAN,
1640 	VIRTIO_NET_F_GUEST_ANNOUNCE, VIRTIO_NET_F_MQ,
1641 };
1642 
1643 static struct virtio_driver virtio_net_driver = {
1644 	.feature_table = features,
1645 	.feature_table_size = ARRAY_SIZE(features),
1646 	.driver.name =	KBUILD_MODNAME,
1647 	.driver.owner =	THIS_MODULE,
1648 	.id_table =	id_table,
1649 	.probe =	virtnet_probe,
1650 	.remove =	virtnet_remove,
1651 	.config_changed = virtnet_config_changed,
1652 #ifdef CONFIG_PM
1653 	.freeze =	virtnet_freeze,
1654 	.restore =	virtnet_restore,
1655 #endif
1656 };
1657 
1658 static int __init init(void)
1659 {
1660 	return register_virtio_driver(&virtio_net_driver);
1661 }
1662 
1663 static void __exit fini(void)
1664 {
1665 	unregister_virtio_driver(&virtio_net_driver);
1666 }
1667 module_init(init);
1668 module_exit(fini);
1669 
1670 MODULE_DEVICE_TABLE(virtio, id_table);
1671 MODULE_DESCRIPTION("Virtio network driver");
1672 MODULE_LICENSE("GPL");
1673