xref: /freebsd/sys/dev/gve/gve_main.c (revision 22fe926a62b7bca771d46502dd6a8c202f25b5be)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 2023-2024 Google LLC
5  *
6  * Redistribution and use in source and binary forms, with or without modification,
7  * are permitted provided that the following conditions are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright notice, this
10  *    list of conditions and the following disclaimer.
11  *
12  * 2. Redistributions in binary form must reproduce the above copyright notice,
13  *    this list of conditions and the following disclaimer in the documentation
14  *    and/or other materials provided with the distribution.
15  *
16  * 3. Neither the name of the copyright holder nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software without
18  *    specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
22  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
24  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
25  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
27  * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
29  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30  */
31 #include "gve.h"
32 #include "gve_adminq.h"
33 #include "gve_dqo.h"
34 
35 #define GVE_DRIVER_VERSION "GVE-FBSD-1.3.3\n"
36 #define GVE_VERSION_MAJOR 1
37 #define GVE_VERSION_MINOR 3
38 #define GVE_VERSION_SUB 3
39 
40 #define GVE_DEFAULT_RX_COPYBREAK 256
41 
42 /* Devices supported by this driver. */
43 static struct gve_dev {
44         uint16_t vendor_id;
45         uint16_t device_id;
46         const char *name;
47 } gve_devs[] = {
48 	{ PCI_VENDOR_ID_GOOGLE, PCI_DEV_ID_GVNIC, "gVNIC" }
49 };
50 
51 struct sx gve_global_lock;
52 
53 static int
gve_verify_driver_compatibility(struct gve_priv * priv)54 gve_verify_driver_compatibility(struct gve_priv *priv)
55 {
56 	int err;
57 	struct gve_driver_info *driver_info;
58 	struct gve_dma_handle driver_info_mem;
59 
60 	err = gve_dma_alloc_coherent(priv, sizeof(struct gve_driver_info),
61 	    PAGE_SIZE, &driver_info_mem);
62 
63 	if (err != 0)
64 		return (ENOMEM);
65 
66 	driver_info = driver_info_mem.cpu_addr;
67 
68 	*driver_info = (struct gve_driver_info) {
69 		.os_type = 3, /* Freebsd */
70 		.driver_major = GVE_VERSION_MAJOR,
71 		.driver_minor = GVE_VERSION_MINOR,
72 		.driver_sub = GVE_VERSION_SUB,
73 		.os_version_major = htobe32(FBSD_VERSION_MAJOR),
74 		.os_version_minor = htobe32(FBSD_VERSION_MINOR),
75 		.os_version_sub = htobe32(FBSD_VERSION_PATCH),
76 		.driver_capability_flags = {
77 			htobe64(GVE_DRIVER_CAPABILITY_FLAGS1),
78 			htobe64(GVE_DRIVER_CAPABILITY_FLAGS2),
79 			htobe64(GVE_DRIVER_CAPABILITY_FLAGS3),
80 			htobe64(GVE_DRIVER_CAPABILITY_FLAGS4),
81 		},
82 	};
83 
84 	snprintf(driver_info->os_version_str1, sizeof(driver_info->os_version_str1),
85 	    "FreeBSD %u", __FreeBSD_version);
86 
87 	bus_dmamap_sync(driver_info_mem.tag, driver_info_mem.map,
88 	    BUS_DMASYNC_PREREAD);
89 
90 	err = gve_adminq_verify_driver_compatibility(priv,
91 	    sizeof(struct gve_driver_info), driver_info_mem.bus_addr);
92 
93 	/* It's ok if the device doesn't support this */
94 	if (err == EOPNOTSUPP)
95 		err = 0;
96 
97 	gve_dma_free_coherent(&driver_info_mem);
98 
99 	return (err);
100 }
101 
102 static int
gve_up(struct gve_priv * priv)103 gve_up(struct gve_priv *priv)
104 {
105 	if_t ifp = priv->ifp;
106 	int err;
107 
108 	GVE_IFACE_LOCK_ASSERT(priv->gve_iface_lock);
109 
110 	if (device_is_attached(priv->dev) == 0) {
111 		device_printf(priv->dev, "Cannot bring the iface up when detached\n");
112 		return (ENXIO);
113 	}
114 
115 	if (gve_get_state_flag(priv, GVE_STATE_FLAG_QUEUES_UP))
116 		return (0);
117 
118 	if_clearhwassist(ifp);
119 	if (if_getcapenable(ifp) & IFCAP_TXCSUM)
120 		if_sethwassistbits(ifp, CSUM_TCP | CSUM_UDP, 0);
121 	if (if_getcapenable(ifp) & IFCAP_TXCSUM_IPV6)
122 		if_sethwassistbits(ifp, CSUM_IP6_TCP | CSUM_IP6_UDP, 0);
123 	if (if_getcapenable(ifp) & IFCAP_TSO4)
124 		if_sethwassistbits(ifp, CSUM_IP_TSO, 0);
125 	if (if_getcapenable(ifp) & IFCAP_TSO6)
126 		if_sethwassistbits(ifp, CSUM_IP6_TSO, 0);
127 
128 	if (gve_is_qpl(priv)) {
129 		err = gve_register_qpls(priv);
130 		if (err != 0)
131 			goto reset;
132 	}
133 
134 	err = gve_create_rx_rings(priv);
135 	if (err != 0)
136 		goto reset;
137 
138 	err = gve_create_tx_rings(priv);
139 	if (err != 0)
140 		goto reset;
141 
142 	if_setdrvflagbits(ifp, IFF_DRV_RUNNING, IFF_DRV_OACTIVE);
143 
144 	if (!gve_get_state_flag(priv, GVE_STATE_FLAG_LINK_UP)) {
145 		if_link_state_change(ifp, LINK_STATE_UP);
146 		gve_set_state_flag(priv, GVE_STATE_FLAG_LINK_UP);
147 	}
148 
149 	gve_unmask_all_queue_irqs(priv);
150 	gve_set_state_flag(priv, GVE_STATE_FLAG_QUEUES_UP);
151 	priv->interface_up_cnt++;
152 	return (0);
153 
154 reset:
155 	gve_schedule_reset(priv);
156 	return (err);
157 }
158 
159 static void
gve_down(struct gve_priv * priv)160 gve_down(struct gve_priv *priv)
161 {
162 	GVE_IFACE_LOCK_ASSERT(priv->gve_iface_lock);
163 
164 	if (!gve_get_state_flag(priv, GVE_STATE_FLAG_QUEUES_UP))
165 		return;
166 
167 	if (gve_get_state_flag(priv, GVE_STATE_FLAG_LINK_UP)) {
168 		if_link_state_change(priv->ifp, LINK_STATE_DOWN);
169 		gve_clear_state_flag(priv, GVE_STATE_FLAG_LINK_UP);
170 	}
171 
172 	if_setdrvflagbits(priv->ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
173 
174 	if (gve_destroy_rx_rings(priv) != 0)
175 		goto reset;
176 
177 	if (gve_destroy_tx_rings(priv) != 0)
178 		goto reset;
179 
180 	if (gve_is_qpl(priv)) {
181 		if (gve_unregister_qpls(priv) != 0)
182 			goto reset;
183 	}
184 
185 	if (gve_is_gqi(priv))
186 		gve_mask_all_queue_irqs(priv);
187 	gve_clear_state_flag(priv, GVE_STATE_FLAG_QUEUES_UP);
188 	priv->interface_down_cnt++;
189 	return;
190 
191 reset:
192 	gve_schedule_reset(priv);
193 }
194 
195 int
gve_adjust_rx_queues(struct gve_priv * priv,uint16_t new_queue_cnt)196 gve_adjust_rx_queues(struct gve_priv *priv, uint16_t new_queue_cnt)
197 {
198 	int err;
199 
200 	GVE_IFACE_LOCK_ASSERT(priv->gve_iface_lock);
201 
202 	gve_down(priv);
203 
204 	if (new_queue_cnt < priv->rx_cfg.num_queues) {
205 		/*
206 		 * Freeing a ring still preserves its ntfy_id,
207 		 * which is needed if we create the ring again.
208 		 */
209 		gve_free_rx_rings(priv, new_queue_cnt, priv->rx_cfg.num_queues);
210 	} else {
211 		err = gve_alloc_rx_rings(priv, priv->rx_cfg.num_queues, new_queue_cnt);
212 		if (err != 0) {
213 			device_printf(priv->dev, "Failed to allocate new queues");
214 			/* Failed to allocate rings, start back up with old ones */
215 			gve_up(priv);
216 			return (err);
217 
218 		}
219 	}
220 	priv->rx_cfg.num_queues = new_queue_cnt;
221 
222 	err = gve_up(priv);
223 	if (err != 0)
224 		gve_schedule_reset(priv);
225 
226 	return (err);
227 }
228 
229 int
gve_adjust_tx_queues(struct gve_priv * priv,uint16_t new_queue_cnt)230 gve_adjust_tx_queues(struct gve_priv *priv, uint16_t new_queue_cnt)
231 {
232 	int err;
233 
234 	GVE_IFACE_LOCK_ASSERT(priv->gve_iface_lock);
235 
236 	gve_down(priv);
237 
238 	if (new_queue_cnt < priv->tx_cfg.num_queues) {
239 		/*
240 		 * Freeing a ring still preserves its ntfy_id,
241 		 * which is needed if we create the ring again.
242 		 */
243 		gve_free_tx_rings(priv, new_queue_cnt, priv->tx_cfg.num_queues);
244 	} else {
245 		err = gve_alloc_tx_rings(priv, priv->tx_cfg.num_queues, new_queue_cnt);
246 		if (err != 0) {
247 			device_printf(priv->dev, "Failed to allocate new queues");
248 			/* Failed to allocate rings, start back up with old ones */
249 			gve_up(priv);
250 			return (err);
251 
252 		}
253 	}
254 	priv->tx_cfg.num_queues = new_queue_cnt;
255 
256 	err = gve_up(priv);
257 	if (err != 0)
258 		gve_schedule_reset(priv);
259 
260 	return (err);
261 }
262 
263 int
gve_adjust_ring_sizes(struct gve_priv * priv,uint16_t new_desc_cnt,bool is_rx)264 gve_adjust_ring_sizes(struct gve_priv *priv, uint16_t new_desc_cnt, bool is_rx)
265 {
266 	int err;
267 	uint16_t prev_desc_cnt;
268 
269 	GVE_IFACE_LOCK_ASSERT(priv->gve_iface_lock);
270 
271 	gve_down(priv);
272 
273 	if (is_rx) {
274 		gve_free_rx_rings(priv, 0, priv->rx_cfg.num_queues);
275 		prev_desc_cnt = priv->rx_desc_cnt;
276 		priv->rx_desc_cnt = new_desc_cnt;
277 		err = gve_alloc_rx_rings(priv, 0, priv->rx_cfg.num_queues);
278 		if (err != 0) {
279 			device_printf(priv->dev,
280 			    "Failed to allocate rings. Trying to start back up with previous ring size.");
281 			priv->rx_desc_cnt = prev_desc_cnt;
282 			err = gve_alloc_rx_rings(priv, 0, priv->rx_cfg.num_queues);
283 		}
284 	} else {
285 		gve_free_tx_rings(priv, 0, priv->tx_cfg.num_queues);
286 		prev_desc_cnt = priv->tx_desc_cnt;
287 		priv->tx_desc_cnt = new_desc_cnt;
288 		err = gve_alloc_tx_rings(priv, 0, priv->tx_cfg.num_queues);
289 		if (err != 0) {
290 			device_printf(priv->dev,
291 			    "Failed to allocate rings. Trying to start back up with previous ring size.");
292 			priv->tx_desc_cnt = prev_desc_cnt;
293 			err = gve_alloc_tx_rings(priv, 0, priv->tx_cfg.num_queues);
294 		}
295 	}
296 
297 	if (err != 0) {
298 		device_printf(priv->dev, "Failed to allocate rings! Cannot start device back up!");
299 		return (err);
300 	}
301 
302 	err = gve_up(priv);
303 	if (err != 0) {
304 		gve_schedule_reset(priv);
305 		return (err);
306 	}
307 
308 	return (0);
309 }
310 
311 static int
gve_set_mtu(if_t ifp,uint32_t new_mtu)312 gve_set_mtu(if_t ifp, uint32_t new_mtu)
313 {
314 	struct gve_priv *priv = if_getsoftc(ifp);
315 	const uint32_t max_problem_range = 8227;
316 	const uint32_t min_problem_range = 7822;
317 	int err;
318 
319 	if ((new_mtu > priv->max_mtu) || (new_mtu < ETHERMIN)) {
320 		device_printf(priv->dev, "Invalid new MTU setting. new mtu: %d max mtu: %d min mtu: %d\n",
321 		    new_mtu, priv->max_mtu, ETHERMIN);
322 		return (EINVAL);
323 	}
324 
325 	/*
326 	 * When hardware LRO is enabled in DQ mode, MTUs within the range
327 	 * [7822, 8227] trigger hardware issues which cause a drastic drop
328 	 * in throughput.
329 	 */
330 	if (!gve_is_gqi(priv) && !gve_disable_hw_lro &&
331 	    new_mtu >= min_problem_range && new_mtu <= max_problem_range) {
332 		device_printf(priv->dev,
333 		    "Cannot set to MTU to %d within the range [%d, %d] while hardware LRO is enabled\n",
334 		    new_mtu, min_problem_range, max_problem_range);
335 		return (EINVAL);
336 	}
337 
338 	err = gve_adminq_set_mtu(priv, new_mtu);
339 	if (err == 0) {
340 		if (bootverbose)
341 			device_printf(priv->dev, "MTU set to %d\n", new_mtu);
342 		if_setmtu(ifp, new_mtu);
343 	} else {
344 		device_printf(priv->dev, "Failed to set MTU to %d\n", new_mtu);
345 	}
346 
347 	return (err);
348 }
349 
350 static void
gve_init(void * arg)351 gve_init(void *arg)
352 {
353 	struct gve_priv *priv = (struct gve_priv *)arg;
354 
355 	if (!gve_get_state_flag(priv, GVE_STATE_FLAG_QUEUES_UP)) {
356 		GVE_IFACE_LOCK_LOCK(priv->gve_iface_lock);
357 		gve_up(priv);
358 		GVE_IFACE_LOCK_UNLOCK(priv->gve_iface_lock);
359 	}
360 }
361 
362 static int
gve_ioctl(if_t ifp,u_long command,caddr_t data)363 gve_ioctl(if_t ifp, u_long command, caddr_t data)
364 {
365 	struct gve_priv *priv;
366 	struct ifreq *ifr;
367 	int rc = 0;
368 
369 	priv = if_getsoftc(ifp);
370 	ifr = (struct ifreq *)data;
371 
372 	switch (command) {
373 	case SIOCSIFMTU:
374 		if (if_getmtu(ifp) == ifr->ifr_mtu)
375 			break;
376 		GVE_IFACE_LOCK_LOCK(priv->gve_iface_lock);
377 		gve_down(priv);
378 		gve_set_mtu(ifp, ifr->ifr_mtu);
379 		rc = gve_up(priv);
380 		GVE_IFACE_LOCK_UNLOCK(priv->gve_iface_lock);
381 		break;
382 
383 	case SIOCSIFFLAGS:
384 		if ((if_getflags(ifp) & IFF_UP) != 0) {
385 			if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) {
386 				GVE_IFACE_LOCK_LOCK(priv->gve_iface_lock);
387 				rc = gve_up(priv);
388 				GVE_IFACE_LOCK_UNLOCK(priv->gve_iface_lock);
389 			}
390 		} else {
391 			if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) != 0) {
392 				GVE_IFACE_LOCK_LOCK(priv->gve_iface_lock);
393 				gve_down(priv);
394 				GVE_IFACE_LOCK_UNLOCK(priv->gve_iface_lock);
395 			}
396 		}
397 		break;
398 
399 	case SIOCSIFCAP:
400 		if (ifr->ifr_reqcap == if_getcapenable(ifp))
401 			break;
402 		GVE_IFACE_LOCK_LOCK(priv->gve_iface_lock);
403 		gve_down(priv);
404 		if_setcapenable(ifp, ifr->ifr_reqcap);
405 		rc = gve_up(priv);
406 		GVE_IFACE_LOCK_UNLOCK(priv->gve_iface_lock);
407 		break;
408 
409 	case SIOCSIFMEDIA:
410 		/* FALLTHROUGH */
411 	case SIOCGIFMEDIA:
412 		rc = ifmedia_ioctl(ifp, ifr, &priv->media, command);
413 		break;
414 
415 	default:
416 		rc = ether_ioctl(ifp, command, data);
417 		break;
418 	}
419 
420 	return (rc);
421 }
422 
423 static int
gve_media_change(if_t ifp)424 gve_media_change(if_t ifp)
425 {
426 	struct gve_priv *priv = if_getsoftc(ifp);
427 
428 	device_printf(priv->dev, "Media change not supported\n");
429 	return (0);
430 }
431 
432 static void
gve_media_status(if_t ifp,struct ifmediareq * ifmr)433 gve_media_status(if_t ifp, struct ifmediareq *ifmr)
434 {
435 	struct gve_priv *priv = if_getsoftc(ifp);
436 
437 	GVE_IFACE_LOCK_LOCK(priv->gve_iface_lock);
438 
439 	ifmr->ifm_status = IFM_AVALID;
440 	ifmr->ifm_active = IFM_ETHER;
441 
442 	if (gve_get_state_flag(priv, GVE_STATE_FLAG_LINK_UP)) {
443 		ifmr->ifm_status |= IFM_ACTIVE;
444 		ifmr->ifm_active |= IFM_AUTO;
445 	} else {
446 		ifmr->ifm_active |= IFM_NONE;
447 	}
448 
449 	GVE_IFACE_LOCK_UNLOCK(priv->gve_iface_lock);
450 }
451 
452 static uint64_t
gve_get_counter(if_t ifp,ift_counter cnt)453 gve_get_counter(if_t ifp, ift_counter cnt)
454 {
455 	struct gve_priv *priv;
456 	uint64_t rpackets = 0;
457 	uint64_t tpackets = 0;
458 	uint64_t rbytes = 0;
459 	uint64_t tbytes = 0;
460 	uint64_t rx_dropped_pkt = 0;
461 	uint64_t tx_dropped_pkt = 0;
462 
463 	priv = if_getsoftc(ifp);
464 
465 	gve_accum_stats(priv, &rpackets, &rbytes, &rx_dropped_pkt, &tpackets,
466 	    &tbytes, &tx_dropped_pkt);
467 
468 	switch (cnt) {
469 	case IFCOUNTER_IPACKETS:
470 		return (rpackets);
471 
472 	case IFCOUNTER_OPACKETS:
473 		return (tpackets);
474 
475 	case IFCOUNTER_IBYTES:
476 		return (rbytes);
477 
478 	case IFCOUNTER_OBYTES:
479 		return (tbytes);
480 
481 	case IFCOUNTER_IQDROPS:
482 		return (rx_dropped_pkt);
483 
484 	case IFCOUNTER_OQDROPS:
485 		return (tx_dropped_pkt);
486 
487 	default:
488 		return (if_get_counter_default(ifp, cnt));
489 	}
490 }
491 
492 static void
gve_setup_ifnet(device_t dev,struct gve_priv * priv)493 gve_setup_ifnet(device_t dev, struct gve_priv *priv)
494 {
495 	int caps = 0;
496 	if_t ifp;
497 
498 	ifp = priv->ifp = if_alloc(IFT_ETHER);
499 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
500 	if_setsoftc(ifp, priv);
501 	if_setdev(ifp, dev);
502 	if_setinitfn(ifp, gve_init);
503 	if_setioctlfn(ifp, gve_ioctl);
504 	if_settransmitfn(ifp, gve_xmit_ifp);
505 	if_setqflushfn(ifp, gve_qflush);
506 
507 	/*
508 	 * Set TSO limits, must match the arguments to bus_dma_tag_create
509 	 * when creating tx->dqo.buf_dmatag. Only applies to the RDA mode
510 	 * because in QPL we copy the entire packet into the bounce buffer
511 	 * and thus it does not matter how fragmented the mbuf is.
512 	 */
513 	if (!gve_is_gqi(priv) && !gve_is_qpl(priv)) {
514 		if_sethwtsomaxsegcount(ifp, GVE_TX_MAX_DATA_DESCS_DQO);
515 		if_sethwtsomaxsegsize(ifp, GVE_TX_MAX_BUF_SIZE_DQO);
516 	}
517 	if_sethwtsomax(ifp, GVE_TSO_MAXSIZE_DQO);
518 
519 #if __FreeBSD_version >= 1400086
520 	if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST);
521 #else
522 	if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST | IFF_KNOWSEPOCH);
523 #endif
524 
525 	ifmedia_init(&priv->media, IFM_IMASK, gve_media_change, gve_media_status);
526 	if_setgetcounterfn(ifp, gve_get_counter);
527 
528 	caps = IFCAP_RXCSUM |
529 	       IFCAP_TXCSUM |
530 	       IFCAP_TXCSUM_IPV6 |
531 	       IFCAP_TSO |
532 	       IFCAP_LRO;
533 
534 	if ((priv->supported_features & GVE_SUP_JUMBO_FRAMES_MASK) != 0)
535 		caps |= IFCAP_JUMBO_MTU;
536 
537 	if_setcapabilities(ifp, caps);
538 	if_setcapenable(ifp, caps);
539 
540 	if (bootverbose)
541 		device_printf(priv->dev, "Setting initial MTU to %d\n", priv->max_mtu);
542 	if_setmtu(ifp, priv->max_mtu);
543 
544 	ether_ifattach(ifp, priv->mac);
545 
546 	ifmedia_add(&priv->media, IFM_ETHER | IFM_AUTO, 0, NULL);
547 	ifmedia_set(&priv->media, IFM_ETHER | IFM_AUTO);
548 }
549 
550 static int
gve_alloc_counter_array(struct gve_priv * priv)551 gve_alloc_counter_array(struct gve_priv *priv)
552 {
553 	int err;
554 
555 	err = gve_dma_alloc_coherent(priv, sizeof(uint32_t) * priv->num_event_counters,
556 	    PAGE_SIZE, &priv->counter_array_mem);
557 	if (err != 0)
558 		return (err);
559 
560 	priv->counters = priv->counter_array_mem.cpu_addr;
561 	return (0);
562 }
563 
564 static void
gve_free_counter_array(struct gve_priv * priv)565 gve_free_counter_array(struct gve_priv *priv)
566 {
567 	if (priv->counters != NULL)
568 		gve_dma_free_coherent(&priv->counter_array_mem);
569 	priv->counter_array_mem = (struct gve_dma_handle){};
570 }
571 
572 static int
gve_alloc_irq_db_array(struct gve_priv * priv)573 gve_alloc_irq_db_array(struct gve_priv *priv)
574 {
575 	int err;
576 
577 	err = gve_dma_alloc_coherent(priv,
578 	    sizeof(struct gve_irq_db) * (priv->num_queues), PAGE_SIZE,
579 	    &priv->irqs_db_mem);
580 	if (err != 0)
581 		return (err);
582 
583 	priv->irq_db_indices = priv->irqs_db_mem.cpu_addr;
584 	return (0);
585 }
586 
587 static void
gve_free_irq_db_array(struct gve_priv * priv)588 gve_free_irq_db_array(struct gve_priv *priv)
589 {
590 	if (priv->irq_db_indices != NULL)
591 		gve_dma_free_coherent(&priv->irqs_db_mem);
592 	priv->irqs_db_mem = (struct gve_dma_handle){};
593 }
594 
595 static void
gve_free_rings(struct gve_priv * priv)596 gve_free_rings(struct gve_priv *priv)
597 {
598 	gve_free_irqs(priv);
599 
600 	gve_free_tx_rings(priv, 0, priv->tx_cfg.num_queues);
601 	free(priv->tx, M_GVE);
602 	priv->tx = NULL;
603 
604 	gve_free_rx_rings(priv, 0, priv->rx_cfg.num_queues);
605 	free(priv->rx, M_GVE);
606 	priv->rx = NULL;
607 }
608 
609 static int
gve_alloc_rings(struct gve_priv * priv)610 gve_alloc_rings(struct gve_priv *priv)
611 {
612 	int err;
613 
614 	priv->rx = malloc(sizeof(struct gve_rx_ring) * priv->rx_cfg.max_queues,
615 	    M_GVE, M_WAITOK | M_ZERO);
616 	err = gve_alloc_rx_rings(priv, 0, priv->rx_cfg.num_queues);
617 	if (err != 0)
618 		goto abort;
619 
620 	priv->tx = malloc(sizeof(struct gve_tx_ring) * priv->tx_cfg.max_queues,
621 	    M_GVE, M_WAITOK | M_ZERO);
622 	err = gve_alloc_tx_rings(priv, 0, priv->tx_cfg.num_queues);
623 	if (err != 0)
624 		goto abort;
625 
626 	err = gve_alloc_irqs(priv);
627 	if (err != 0)
628 		goto abort;
629 
630 	return (0);
631 
632 abort:
633 	gve_free_rings(priv);
634 	return (err);
635 }
636 
637 static void
gve_deconfigure_and_free_device_resources(struct gve_priv * priv)638 gve_deconfigure_and_free_device_resources(struct gve_priv *priv)
639 {
640 	int err;
641 
642 	if (gve_get_state_flag(priv, GVE_STATE_FLAG_RESOURCES_OK)) {
643 		err = gve_adminq_deconfigure_device_resources(priv);
644 		if (err != 0) {
645 			device_printf(priv->dev, "Failed to deconfigure device resources: err=%d\n",
646 			    err);
647 			return;
648 		}
649 		if (bootverbose)
650 			device_printf(priv->dev, "Deconfigured device resources\n");
651 		gve_clear_state_flag(priv, GVE_STATE_FLAG_RESOURCES_OK);
652 	}
653 
654 	gve_free_irq_db_array(priv);
655 	gve_free_counter_array(priv);
656 
657 	if (priv->ptype_lut_dqo) {
658 		free(priv->ptype_lut_dqo, M_GVE);
659 		priv->ptype_lut_dqo = NULL;
660 	}
661 }
662 
663 static int
gve_alloc_and_configure_device_resources(struct gve_priv * priv)664 gve_alloc_and_configure_device_resources(struct gve_priv *priv)
665 {
666 	int err;
667 
668 	if (gve_get_state_flag(priv, GVE_STATE_FLAG_RESOURCES_OK))
669 		return (0);
670 
671 	err = gve_alloc_counter_array(priv);
672 	if (err != 0)
673 		return (err);
674 
675 	err = gve_alloc_irq_db_array(priv);
676 	if (err != 0)
677 		goto abort;
678 
679 	err = gve_adminq_configure_device_resources(priv);
680 	if (err != 0) {
681 		device_printf(priv->dev, "Failed to configure device resources: err=%d\n",
682 			      err);
683 		err = (ENXIO);
684 		goto abort;
685 	}
686 
687 	if (!gve_is_gqi(priv)) {
688 		priv->ptype_lut_dqo = malloc(sizeof(*priv->ptype_lut_dqo), M_GVE,
689 		    M_WAITOK | M_ZERO);
690 
691 		err = gve_adminq_get_ptype_map_dqo(priv, priv->ptype_lut_dqo);
692 		if (err != 0) {
693 			device_printf(priv->dev, "Failed to configure ptype lut: err=%d\n",
694 			    err);
695 			goto abort;
696 		}
697 	}
698 
699 	gve_set_state_flag(priv, GVE_STATE_FLAG_RESOURCES_OK);
700 	if (bootverbose)
701 		device_printf(priv->dev, "Configured device resources\n");
702 	return (0);
703 
704 abort:
705 	gve_deconfigure_and_free_device_resources(priv);
706 	return (err);
707 }
708 
709 static void
gve_set_queue_cnts(struct gve_priv * priv)710 gve_set_queue_cnts(struct gve_priv *priv)
711 {
712 	priv->tx_cfg.max_queues = gve_reg_bar_read_4(priv, MAX_TX_QUEUES);
713 	priv->rx_cfg.max_queues = gve_reg_bar_read_4(priv, MAX_RX_QUEUES);
714 	priv->tx_cfg.num_queues = priv->tx_cfg.max_queues;
715 	priv->rx_cfg.num_queues = priv->rx_cfg.max_queues;
716 
717 	if (priv->default_num_queues > 0) {
718 		priv->tx_cfg.num_queues = MIN(priv->default_num_queues,
719 		    priv->tx_cfg.num_queues);
720 		priv->rx_cfg.num_queues = MIN(priv->default_num_queues,
721 		    priv->rx_cfg.num_queues);
722 	}
723 
724 	priv->num_queues = priv->tx_cfg.max_queues + priv->rx_cfg.max_queues;
725 	priv->mgmt_msix_idx = priv->num_queues;
726 }
727 
728 static int
gve_alloc_adminq_and_describe_device(struct gve_priv * priv)729 gve_alloc_adminq_and_describe_device(struct gve_priv *priv)
730 {
731 	int err;
732 
733 	if ((err = gve_adminq_alloc(priv)) != 0)
734 		return (err);
735 
736 	if ((err = gve_verify_driver_compatibility(priv)) != 0) {
737 		device_printf(priv->dev,
738 		    "Failed to verify driver compatibility: err=%d\n", err);
739 		goto abort;
740 	}
741 
742 	if ((err = gve_adminq_describe_device(priv)) != 0)
743 		goto abort;
744 
745 	gve_set_queue_cnts(priv);
746 
747 	priv->num_registered_pages = 0;
748 	return (0);
749 
750 abort:
751 	gve_release_adminq(priv);
752 	return (err);
753 }
754 
755 void
gve_schedule_reset(struct gve_priv * priv)756 gve_schedule_reset(struct gve_priv *priv)
757 {
758 	if (gve_get_state_flag(priv, GVE_STATE_FLAG_IN_RESET))
759 		return;
760 
761 	device_printf(priv->dev, "Scheduling reset task!\n");
762 	gve_set_state_flag(priv, GVE_STATE_FLAG_DO_RESET);
763 	taskqueue_enqueue(priv->service_tq, &priv->service_task);
764 }
765 
766 static void
gve_destroy(struct gve_priv * priv)767 gve_destroy(struct gve_priv *priv)
768 {
769 	gve_down(priv);
770 	gve_deconfigure_and_free_device_resources(priv);
771 	gve_release_adminq(priv);
772 }
773 
774 static void
gve_restore(struct gve_priv * priv)775 gve_restore(struct gve_priv *priv)
776 {
777 	int err;
778 
779 	err = gve_adminq_alloc(priv);
780 	if (err != 0)
781 		goto abort;
782 
783 	err = gve_adminq_configure_device_resources(priv);
784 	if (err != 0) {
785 		device_printf(priv->dev, "Failed to configure device resources: err=%d\n",
786 		    err);
787 		err = (ENXIO);
788 		goto abort;
789 	}
790 	if (!gve_is_gqi(priv)) {
791 		err = gve_adminq_get_ptype_map_dqo(priv, priv->ptype_lut_dqo);
792 		if (err != 0) {
793 			device_printf(priv->dev, "Failed to configure ptype lut: err=%d\n",
794 			    err);
795 			goto abort;
796 		}
797 	}
798 
799 	err = gve_up(priv);
800 	if (err != 0)
801 		goto abort;
802 
803 	return;
804 
805 abort:
806 	device_printf(priv->dev, "Restore failed!\n");
807 	return;
808 }
809 
810 static void
gve_clear_device_resources(struct gve_priv * priv)811 gve_clear_device_resources(struct gve_priv *priv)
812 {
813 	int i;
814 
815 	for (i = 0; i < priv->num_event_counters; i++)
816 		priv->counters[i] = 0;
817 	bus_dmamap_sync(priv->counter_array_mem.tag, priv->counter_array_mem.map,
818 	    BUS_DMASYNC_PREWRITE);
819 
820 	for (i = 0; i < priv->num_queues; i++)
821 		priv->irq_db_indices[i] = (struct gve_irq_db){};
822 	bus_dmamap_sync(priv->irqs_db_mem.tag, priv->irqs_db_mem.map,
823 	    BUS_DMASYNC_PREWRITE);
824 
825 	if (priv->ptype_lut_dqo)
826 		*priv->ptype_lut_dqo = (struct gve_ptype_lut){0};
827 }
828 
829 static void
gve_handle_reset(struct gve_priv * priv)830 gve_handle_reset(struct gve_priv *priv)
831 {
832 	if (!gve_get_state_flag(priv, GVE_STATE_FLAG_DO_RESET))
833 		return;
834 
835 	gve_clear_state_flag(priv, GVE_STATE_FLAG_DO_RESET);
836 	gve_set_state_flag(priv, GVE_STATE_FLAG_IN_RESET);
837 
838 	GVE_IFACE_LOCK_LOCK(priv->gve_iface_lock);
839 
840 	if_setdrvflagbits(priv->ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
841 	if_link_state_change(priv->ifp, LINK_STATE_DOWN);
842 	gve_clear_state_flag(priv, GVE_STATE_FLAG_LINK_UP);
843 
844 	/*
845 	 * Releasing the adminq causes the NIC to destroy all resources
846 	 * registered with it, so by clearing the flags beneath we cause
847 	 * the subsequent gve_down call below to not attempt to tell the
848 	 * NIC to destroy these resources again.
849 	 *
850 	 * The call to gve_down is needed in the first place to refresh
851 	 * the state and the DMA-able memory within each driver ring.
852 	 */
853 	gve_release_adminq(priv);
854 	gve_clear_state_flag(priv, GVE_STATE_FLAG_RESOURCES_OK);
855 	gve_clear_state_flag(priv, GVE_STATE_FLAG_QPLREG_OK);
856 	gve_clear_state_flag(priv, GVE_STATE_FLAG_RX_RINGS_OK);
857 	gve_clear_state_flag(priv, GVE_STATE_FLAG_TX_RINGS_OK);
858 
859 	gve_down(priv);
860 	gve_clear_device_resources(priv);
861 
862 	gve_restore(priv);
863 
864 	GVE_IFACE_LOCK_UNLOCK(priv->gve_iface_lock);
865 
866 	priv->reset_cnt++;
867 	gve_clear_state_flag(priv, GVE_STATE_FLAG_IN_RESET);
868 }
869 
870 static void
gve_handle_link_status(struct gve_priv * priv)871 gve_handle_link_status(struct gve_priv *priv)
872 {
873 	uint32_t status = gve_reg_bar_read_4(priv, DEVICE_STATUS);
874 	bool link_up = status & GVE_DEVICE_STATUS_LINK_STATUS;
875 
876 	if (link_up == gve_get_state_flag(priv, GVE_STATE_FLAG_LINK_UP))
877 		return;
878 
879 	if (link_up) {
880 		if (bootverbose)
881 			device_printf(priv->dev, "Device link is up.\n");
882 		if_link_state_change(priv->ifp, LINK_STATE_UP);
883 		gve_set_state_flag(priv, GVE_STATE_FLAG_LINK_UP);
884 	} else {
885 		device_printf(priv->dev, "Device link is down.\n");
886 		if_link_state_change(priv->ifp, LINK_STATE_DOWN);
887 		gve_clear_state_flag(priv, GVE_STATE_FLAG_LINK_UP);
888 	}
889 }
890 
891 static void
gve_service_task(void * arg,int pending)892 gve_service_task(void *arg, int pending)
893 {
894 	struct gve_priv *priv = (struct gve_priv *)arg;
895 	uint32_t status = gve_reg_bar_read_4(priv, DEVICE_STATUS);
896 
897 	if (((GVE_DEVICE_STATUS_RESET_MASK & status) != 0) &&
898 	    !gve_get_state_flag(priv, GVE_STATE_FLAG_IN_RESET)) {
899 		device_printf(priv->dev, "Device requested reset\n");
900 		gve_set_state_flag(priv, GVE_STATE_FLAG_DO_RESET);
901 	}
902 
903 	gve_handle_reset(priv);
904 	gve_handle_link_status(priv);
905 }
906 
907 static int
gve_probe(device_t dev)908 gve_probe(device_t dev)
909 {
910 	uint16_t deviceid, vendorid;
911 	int i;
912 
913 	vendorid = pci_get_vendor(dev);
914 	deviceid = pci_get_device(dev);
915 
916 	for (i = 0; i < nitems(gve_devs); i++) {
917 		if (vendorid == gve_devs[i].vendor_id &&
918 		    deviceid == gve_devs[i].device_id) {
919 			device_set_desc(dev, gve_devs[i].name);
920 			return (BUS_PROBE_DEFAULT);
921 		}
922 	}
923 	return (ENXIO);
924 }
925 
926 static void
gve_free_sys_res_mem(struct gve_priv * priv)927 gve_free_sys_res_mem(struct gve_priv *priv)
928 {
929 	if (priv->msix_table != NULL)
930 		bus_release_resource(priv->dev, SYS_RES_MEMORY,
931 		    rman_get_rid(priv->msix_table), priv->msix_table);
932 
933 	if (priv->db_bar != NULL)
934 		bus_release_resource(priv->dev, SYS_RES_MEMORY,
935 		    rman_get_rid(priv->db_bar), priv->db_bar);
936 
937 	if (priv->reg_bar != NULL)
938 		bus_release_resource(priv->dev, SYS_RES_MEMORY,
939 		    rman_get_rid(priv->reg_bar), priv->reg_bar);
940 }
941 
942 static int
gve_attach(device_t dev)943 gve_attach(device_t dev)
944 {
945 	struct gve_priv *priv;
946 	int rid;
947 	int err;
948 
949 	snprintf(gve_version, sizeof(gve_version), "%d.%d.%d",
950 	    GVE_VERSION_MAJOR, GVE_VERSION_MINOR, GVE_VERSION_SUB);
951 
952 	priv = device_get_softc(dev);
953 	priv->dev = dev;
954 	GVE_IFACE_LOCK_INIT(priv->gve_iface_lock);
955 
956 	pci_enable_busmaster(dev);
957 
958 	rid = PCIR_BAR(GVE_REGISTER_BAR);
959 	priv->reg_bar = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
960 	    &rid, RF_ACTIVE);
961 	if (priv->reg_bar == NULL) {
962 		device_printf(dev, "Failed to allocate BAR0\n");
963 		err = ENXIO;
964 		goto abort;
965 	}
966 
967 	rid = PCIR_BAR(GVE_DOORBELL_BAR);
968 	priv->db_bar = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
969 	    &rid, RF_ACTIVE);
970 	if (priv->db_bar == NULL) {
971 		device_printf(dev, "Failed to allocate BAR2\n");
972 		err = ENXIO;
973 		goto abort;
974 	}
975 
976 	rid = pci_msix_table_bar(priv->dev);
977 	priv->msix_table = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
978 	    &rid, RF_ACTIVE);
979 	if (priv->msix_table == NULL) {
980 		device_printf(dev, "Failed to allocate msix table\n");
981 		err = ENXIO;
982 		goto abort;
983 	}
984 
985 	err = gve_alloc_adminq_and_describe_device(priv);
986 	if (err != 0)
987 		goto abort;
988 
989 	err = gve_alloc_and_configure_device_resources(priv);
990 	if (err != 0)
991 		goto abort;
992 
993 	err = gve_alloc_rings(priv);
994 	if (err != 0)
995 		goto abort;
996 
997 	gve_setup_ifnet(dev, priv);
998 
999 	priv->rx_copybreak = GVE_DEFAULT_RX_COPYBREAK;
1000 
1001 	bus_write_multi_1(priv->reg_bar, DRIVER_VERSION, GVE_DRIVER_VERSION,
1002 	    sizeof(GVE_DRIVER_VERSION) - 1);
1003 
1004 	TASK_INIT(&priv->service_task, 0, gve_service_task, priv);
1005 	priv->service_tq = taskqueue_create("gve service", M_WAITOK | M_ZERO,
1006 	    taskqueue_thread_enqueue, &priv->service_tq);
1007 	taskqueue_start_threads(&priv->service_tq, 1, PI_NET, "%s service tq",
1008 	    device_get_nameunit(priv->dev));
1009 
1010         gve_setup_sysctl(priv);
1011 
1012 	if (bootverbose)
1013 		device_printf(priv->dev, "Successfully attached %s", GVE_DRIVER_VERSION);
1014 	return (0);
1015 
1016 abort:
1017 	gve_free_rings(priv);
1018 	gve_deconfigure_and_free_device_resources(priv);
1019 	gve_release_adminq(priv);
1020 	gve_free_sys_res_mem(priv);
1021 	GVE_IFACE_LOCK_DESTROY(priv->gve_iface_lock);
1022 	return (err);
1023 }
1024 
1025 static int
gve_detach(device_t dev)1026 gve_detach(device_t dev)
1027 {
1028 	struct gve_priv *priv = device_get_softc(dev);
1029 	if_t ifp = priv->ifp;
1030 	int error;
1031 
1032 	error = bus_generic_detach(dev);
1033 	if (error != 0)
1034 		return (error);
1035 
1036 	ether_ifdetach(ifp);
1037 
1038 	GVE_IFACE_LOCK_LOCK(priv->gve_iface_lock);
1039 	gve_destroy(priv);
1040 	GVE_IFACE_LOCK_UNLOCK(priv->gve_iface_lock);
1041 
1042 	gve_free_rings(priv);
1043 	gve_free_sys_res_mem(priv);
1044 	GVE_IFACE_LOCK_DESTROY(priv->gve_iface_lock);
1045 
1046 	while (taskqueue_cancel(priv->service_tq, &priv->service_task, NULL))
1047 		taskqueue_drain(priv->service_tq, &priv->service_task);
1048 	taskqueue_free(priv->service_tq);
1049 
1050 	if_free(ifp);
1051 	return (0);
1052 }
1053 
1054 static device_method_t gve_methods[] = {
1055 	DEVMETHOD(device_probe, gve_probe),
1056 	DEVMETHOD(device_attach, gve_attach),
1057 	DEVMETHOD(device_detach, gve_detach),
1058 	DEVMETHOD_END
1059 };
1060 
1061 static driver_t gve_driver = {
1062 	"gve",
1063 	gve_methods,
1064 	sizeof(struct gve_priv)
1065 };
1066 
1067 #if __FreeBSD_version < 1301503
1068 static devclass_t gve_devclass;
1069 
1070 DRIVER_MODULE(gve, pci, gve_driver, gve_devclass, 0, 0);
1071 #else
1072 DRIVER_MODULE(gve, pci, gve_driver, 0, 0);
1073 #endif
1074 MODULE_PNP_INFO("U16:vendor;U16:device;D:#", pci, gve, gve_devs,
1075     nitems(gve_devs));
1076