xref: /freebsd/sys/dev/gve/gve_main.c (revision c27f7d6b9cf6d4ab01cb3d0972726c14e0aca146)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 2023-2024 Google LLC
5  *
6  * Redistribution and use in source and binary forms, with or without modification,
7  * are permitted provided that the following conditions are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright notice, this
10  *    list of conditions and the following disclaimer.
11  *
12  * 2. Redistributions in binary form must reproduce the above copyright notice,
13  *    this list of conditions and the following disclaimer in the documentation
14  *    and/or other materials provided with the distribution.
15  *
16  * 3. Neither the name of the copyright holder nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software without
18  *    specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
22  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
24  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
25  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
27  * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
29  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30  */
31 #include "gve.h"
32 #include "gve_adminq.h"
33 #include "gve_dqo.h"
34 
35 #define GVE_DRIVER_VERSION "GVE-FBSD-1.3.4\n"
36 #define GVE_VERSION_MAJOR 1
37 #define GVE_VERSION_MINOR 3
38 #define GVE_VERSION_SUB 4
39 
40 #define GVE_DEFAULT_RX_COPYBREAK 256
41 
42 /* Devices supported by this driver. */
43 static struct gve_dev {
44         uint16_t vendor_id;
45         uint16_t device_id;
46         const char *name;
47 } gve_devs[] = {
48 	{ PCI_VENDOR_ID_GOOGLE, PCI_DEV_ID_GVNIC, "gVNIC" }
49 };
50 
51 struct sx gve_global_lock;
52 
53 static void gve_start_tx_timeout_service(struct gve_priv *priv);
54 static void gve_stop_tx_timeout_service(struct gve_priv *priv);
55 
56 static int
57 gve_verify_driver_compatibility(struct gve_priv *priv)
58 {
59 	int err;
60 	struct gve_driver_info *driver_info;
61 	struct gve_dma_handle driver_info_mem;
62 
63 	err = gve_dma_alloc_coherent(priv, sizeof(struct gve_driver_info),
64 	    PAGE_SIZE, &driver_info_mem);
65 
66 	if (err != 0)
67 		return (ENOMEM);
68 
69 	driver_info = driver_info_mem.cpu_addr;
70 
71 	*driver_info = (struct gve_driver_info) {
72 		.os_type = 3, /* Freebsd */
73 		.driver_major = GVE_VERSION_MAJOR,
74 		.driver_minor = GVE_VERSION_MINOR,
75 		.driver_sub = GVE_VERSION_SUB,
76 		.os_version_major = htobe32(FBSD_VERSION_MAJOR),
77 		.os_version_minor = htobe32(FBSD_VERSION_MINOR),
78 		.os_version_sub = htobe32(FBSD_VERSION_PATCH),
79 		.driver_capability_flags = {
80 			htobe64(GVE_DRIVER_CAPABILITY_FLAGS1),
81 			htobe64(GVE_DRIVER_CAPABILITY_FLAGS2),
82 			htobe64(GVE_DRIVER_CAPABILITY_FLAGS3),
83 			htobe64(GVE_DRIVER_CAPABILITY_FLAGS4),
84 		},
85 	};
86 
87 	snprintf(driver_info->os_version_str1, sizeof(driver_info->os_version_str1),
88 	    "FreeBSD %u", __FreeBSD_version);
89 
90 	bus_dmamap_sync(driver_info_mem.tag, driver_info_mem.map,
91 	    BUS_DMASYNC_PREREAD);
92 
93 	err = gve_adminq_verify_driver_compatibility(priv,
94 	    sizeof(struct gve_driver_info), driver_info_mem.bus_addr);
95 
96 	/* It's ok if the device doesn't support this */
97 	if (err == EOPNOTSUPP)
98 		err = 0;
99 
100 	gve_dma_free_coherent(&driver_info_mem);
101 
102 	return (err);
103 }
104 
105 static void
106 gve_handle_tx_timeout(struct gve_priv *priv, struct gve_tx_ring *tx,
107     int num_timeout_pkts)
108 {
109 	int64_t time_since_last_kick;
110 
111 	counter_u64_add_protected(tx->stats.tx_timeout, 1);
112 
113 	/* last_kicked is never GVE_TIMESTAMP_INVALID so we can skip checking */
114 	time_since_last_kick = gve_seconds_since(&tx->last_kicked);
115 
116 	/* Try kicking first in case the timeout is due to a missed interrupt */
117 	if (time_since_last_kick > GVE_TX_TIMEOUT_KICK_COOLDOWN_SEC) {
118 		device_printf(priv->dev,
119 		    "Found %d timed out packet(s) on txq%d, kicking it for completions\n",
120 		    num_timeout_pkts, tx->com.id);
121 		gve_set_timestamp(&tx->last_kicked);
122 		taskqueue_enqueue(tx->com.cleanup_tq, &tx->com.cleanup_task);
123 	} else {
124 		device_printf(priv->dev,
125 		    "Found %d timed out packet(s) on txq%d with its last kick %jd sec ago which is less than the cooldown period %d. Resetting device\n",
126 		    num_timeout_pkts, tx->com.id,
127 		    (intmax_t)time_since_last_kick,
128 		    GVE_TX_TIMEOUT_KICK_COOLDOWN_SEC);
129 		gve_schedule_reset(priv);
130 	}
131 }
132 
133 static void
134 gve_tx_timeout_service_callback(void *data)
135 {
136 	struct gve_priv *priv = (struct gve_priv *)data;
137 	struct gve_tx_ring *tx;
138 	uint16_t num_timeout_pkts;
139 
140 	tx = &priv->tx[priv->check_tx_queue_idx];
141 
142 	num_timeout_pkts = gve_is_gqi(priv) ?
143 	    gve_check_tx_timeout_gqi(priv, tx) :
144 	    gve_check_tx_timeout_dqo(priv, tx);
145 	if (num_timeout_pkts)
146 		gve_handle_tx_timeout(priv, tx, num_timeout_pkts);
147 
148 	priv->check_tx_queue_idx = (priv->check_tx_queue_idx + 1) %
149 	    priv->tx_cfg.num_queues;
150 	callout_reset_sbt(&priv->tx_timeout_service,
151 	    SBT_1S * GVE_TX_TIMEOUT_CHECK_CADENCE_SEC, 0,
152 	    gve_tx_timeout_service_callback, (void *)priv, 0);
153 }
154 
155 static void
156 gve_start_tx_timeout_service(struct gve_priv *priv)
157 {
158 	priv->check_tx_queue_idx = 0;
159 	callout_init(&priv->tx_timeout_service, true);
160 	callout_reset_sbt(&priv->tx_timeout_service,
161 	    SBT_1S * GVE_TX_TIMEOUT_CHECK_CADENCE_SEC, 0,
162 	    gve_tx_timeout_service_callback, (void *)priv, 0);
163 }
164 
165 static void
166 gve_stop_tx_timeout_service(struct gve_priv *priv)
167 {
168 	callout_drain(&priv->tx_timeout_service);
169 }
170 
171 static int
172 gve_up(struct gve_priv *priv)
173 {
174 	if_t ifp = priv->ifp;
175 	int err;
176 
177 	GVE_IFACE_LOCK_ASSERT(priv->gve_iface_lock);
178 
179 	if (device_is_attached(priv->dev) == 0) {
180 		device_printf(priv->dev, "Cannot bring the iface up when detached\n");
181 		return (ENXIO);
182 	}
183 
184 	if (gve_get_state_flag(priv, GVE_STATE_FLAG_QUEUES_UP))
185 		return (0);
186 
187 	if_clearhwassist(ifp);
188 	if (if_getcapenable(ifp) & IFCAP_TXCSUM)
189 		if_sethwassistbits(ifp, CSUM_TCP | CSUM_UDP, 0);
190 	if (if_getcapenable(ifp) & IFCAP_TXCSUM_IPV6)
191 		if_sethwassistbits(ifp, CSUM_IP6_TCP | CSUM_IP6_UDP, 0);
192 	if (if_getcapenable(ifp) & IFCAP_TSO4)
193 		if_sethwassistbits(ifp, CSUM_IP_TSO, 0);
194 	if (if_getcapenable(ifp) & IFCAP_TSO6)
195 		if_sethwassistbits(ifp, CSUM_IP6_TSO, 0);
196 
197 	if (gve_is_qpl(priv)) {
198 		err = gve_register_qpls(priv);
199 		if (err != 0)
200 			goto reset;
201 	}
202 
203 	err = gve_create_rx_rings(priv);
204 	if (err != 0)
205 		goto reset;
206 
207 	err = gve_create_tx_rings(priv);
208 	if (err != 0)
209 		goto reset;
210 
211 	if_setdrvflagbits(ifp, IFF_DRV_RUNNING, IFF_DRV_OACTIVE);
212 
213 	if (!gve_get_state_flag(priv, GVE_STATE_FLAG_LINK_UP)) {
214 		if_link_state_change(ifp, LINK_STATE_UP);
215 		gve_set_state_flag(priv, GVE_STATE_FLAG_LINK_UP);
216 	}
217 
218 	gve_unmask_all_queue_irqs(priv);
219 	gve_set_state_flag(priv, GVE_STATE_FLAG_QUEUES_UP);
220 	priv->interface_up_cnt++;
221 
222 	gve_start_tx_timeout_service(priv);
223 
224 	return (0);
225 
226 reset:
227 	gve_schedule_reset(priv);
228 	return (err);
229 }
230 
231 static void
232 gve_down(struct gve_priv *priv)
233 {
234 	GVE_IFACE_LOCK_ASSERT(priv->gve_iface_lock);
235 
236 	if (!gve_get_state_flag(priv, GVE_STATE_FLAG_QUEUES_UP))
237 		return;
238 
239 	gve_stop_tx_timeout_service(priv);
240 
241 	if (gve_get_state_flag(priv, GVE_STATE_FLAG_LINK_UP)) {
242 		if_link_state_change(priv->ifp, LINK_STATE_DOWN);
243 		gve_clear_state_flag(priv, GVE_STATE_FLAG_LINK_UP);
244 	}
245 
246 	if_setdrvflagbits(priv->ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
247 
248 	if (gve_destroy_rx_rings(priv) != 0)
249 		goto reset;
250 
251 	if (gve_destroy_tx_rings(priv) != 0)
252 		goto reset;
253 
254 	if (gve_is_qpl(priv)) {
255 		if (gve_unregister_qpls(priv) != 0)
256 			goto reset;
257 	}
258 
259 	if (gve_is_gqi(priv))
260 		gve_mask_all_queue_irqs(priv);
261 	gve_clear_state_flag(priv, GVE_STATE_FLAG_QUEUES_UP);
262 	priv->interface_down_cnt++;
263 	return;
264 
265 reset:
266 	gve_schedule_reset(priv);
267 }
268 
269 int
270 gve_adjust_rx_queues(struct gve_priv *priv, uint16_t new_queue_cnt)
271 {
272 	int err;
273 
274 	GVE_IFACE_LOCK_ASSERT(priv->gve_iface_lock);
275 
276 	gve_down(priv);
277 
278 	if (new_queue_cnt < priv->rx_cfg.num_queues) {
279 		/*
280 		 * Freeing a ring still preserves its ntfy_id,
281 		 * which is needed if we create the ring again.
282 		 */
283 		gve_free_rx_rings(priv, new_queue_cnt, priv->rx_cfg.num_queues);
284 	} else {
285 		err = gve_alloc_rx_rings(priv, priv->rx_cfg.num_queues, new_queue_cnt);
286 		if (err != 0) {
287 			device_printf(priv->dev, "Failed to allocate new queues");
288 			/* Failed to allocate rings, start back up with old ones */
289 			gve_up(priv);
290 			return (err);
291 
292 		}
293 	}
294 	priv->rx_cfg.num_queues = new_queue_cnt;
295 
296 	err = gve_up(priv);
297 	if (err != 0)
298 		gve_schedule_reset(priv);
299 
300 	return (err);
301 }
302 
303 int
304 gve_adjust_tx_queues(struct gve_priv *priv, uint16_t new_queue_cnt)
305 {
306 	int err;
307 
308 	GVE_IFACE_LOCK_ASSERT(priv->gve_iface_lock);
309 
310 	gve_down(priv);
311 
312 	if (new_queue_cnt < priv->tx_cfg.num_queues) {
313 		/*
314 		 * Freeing a ring still preserves its ntfy_id,
315 		 * which is needed if we create the ring again.
316 		 */
317 		gve_free_tx_rings(priv, new_queue_cnt, priv->tx_cfg.num_queues);
318 	} else {
319 		err = gve_alloc_tx_rings(priv, priv->tx_cfg.num_queues, new_queue_cnt);
320 		if (err != 0) {
321 			device_printf(priv->dev, "Failed to allocate new queues");
322 			/* Failed to allocate rings, start back up with old ones */
323 			gve_up(priv);
324 			return (err);
325 
326 		}
327 	}
328 	priv->tx_cfg.num_queues = new_queue_cnt;
329 
330 	err = gve_up(priv);
331 	if (err != 0)
332 		gve_schedule_reset(priv);
333 
334 	return (err);
335 }
336 
337 int
338 gve_adjust_ring_sizes(struct gve_priv *priv, uint16_t new_desc_cnt, bool is_rx)
339 {
340 	int err;
341 	uint16_t prev_desc_cnt;
342 
343 	GVE_IFACE_LOCK_ASSERT(priv->gve_iface_lock);
344 
345 	gve_down(priv);
346 
347 	if (is_rx) {
348 		gve_free_rx_rings(priv, 0, priv->rx_cfg.num_queues);
349 		prev_desc_cnt = priv->rx_desc_cnt;
350 		priv->rx_desc_cnt = new_desc_cnt;
351 		err = gve_alloc_rx_rings(priv, 0, priv->rx_cfg.num_queues);
352 		if (err != 0) {
353 			device_printf(priv->dev,
354 			    "Failed to allocate rings. Trying to start back up with previous ring size.");
355 			priv->rx_desc_cnt = prev_desc_cnt;
356 			err = gve_alloc_rx_rings(priv, 0, priv->rx_cfg.num_queues);
357 		}
358 	} else {
359 		gve_free_tx_rings(priv, 0, priv->tx_cfg.num_queues);
360 		prev_desc_cnt = priv->tx_desc_cnt;
361 		priv->tx_desc_cnt = new_desc_cnt;
362 		err = gve_alloc_tx_rings(priv, 0, priv->tx_cfg.num_queues);
363 		if (err != 0) {
364 			device_printf(priv->dev,
365 			    "Failed to allocate rings. Trying to start back up with previous ring size.");
366 			priv->tx_desc_cnt = prev_desc_cnt;
367 			err = gve_alloc_tx_rings(priv, 0, priv->tx_cfg.num_queues);
368 		}
369 	}
370 
371 	if (err != 0) {
372 		device_printf(priv->dev, "Failed to allocate rings! Cannot start device back up!");
373 		return (err);
374 	}
375 
376 	err = gve_up(priv);
377 	if (err != 0) {
378 		gve_schedule_reset(priv);
379 		return (err);
380 	}
381 
382 	return (0);
383 }
384 
385 static int
386 gve_set_mtu(if_t ifp, uint32_t new_mtu)
387 {
388 	struct gve_priv *priv = if_getsoftc(ifp);
389 	const uint32_t max_problem_range = 8227;
390 	const uint32_t min_problem_range = 7822;
391 	int err;
392 
393 	if ((new_mtu > priv->max_mtu) || (new_mtu < ETHERMIN)) {
394 		device_printf(priv->dev, "Invalid new MTU setting. new mtu: %d max mtu: %d min mtu: %d\n",
395 		    new_mtu, priv->max_mtu, ETHERMIN);
396 		return (EINVAL);
397 	}
398 
399 	/*
400 	 * When hardware LRO is enabled in DQ mode, MTUs within the range
401 	 * [7822, 8227] trigger hardware issues which cause a drastic drop
402 	 * in throughput.
403 	 */
404 	if (!gve_is_gqi(priv) && !gve_disable_hw_lro &&
405 	    new_mtu >= min_problem_range && new_mtu <= max_problem_range) {
406 		device_printf(priv->dev,
407 		    "Cannot set to MTU to %d within the range [%d, %d] while hardware LRO is enabled\n",
408 		    new_mtu, min_problem_range, max_problem_range);
409 		return (EINVAL);
410 	}
411 
412 	err = gve_adminq_set_mtu(priv, new_mtu);
413 	if (err == 0) {
414 		if (bootverbose)
415 			device_printf(priv->dev, "MTU set to %d\n", new_mtu);
416 		if_setmtu(ifp, new_mtu);
417 	} else {
418 		device_printf(priv->dev, "Failed to set MTU to %d\n", new_mtu);
419 	}
420 
421 	return (err);
422 }
423 
424 static void
425 gve_init(void *arg)
426 {
427 	struct gve_priv *priv = (struct gve_priv *)arg;
428 
429 	if (!gve_get_state_flag(priv, GVE_STATE_FLAG_QUEUES_UP)) {
430 		GVE_IFACE_LOCK_LOCK(priv->gve_iface_lock);
431 		gve_up(priv);
432 		GVE_IFACE_LOCK_UNLOCK(priv->gve_iface_lock);
433 	}
434 }
435 
436 static int
437 gve_ioctl(if_t ifp, u_long command, caddr_t data)
438 {
439 	struct gve_priv *priv;
440 	struct ifreq *ifr;
441 	int rc = 0;
442 
443 	priv = if_getsoftc(ifp);
444 	ifr = (struct ifreq *)data;
445 
446 	switch (command) {
447 	case SIOCSIFMTU:
448 		if (if_getmtu(ifp) == ifr->ifr_mtu)
449 			break;
450 		GVE_IFACE_LOCK_LOCK(priv->gve_iface_lock);
451 		gve_down(priv);
452 		gve_set_mtu(ifp, ifr->ifr_mtu);
453 		rc = gve_up(priv);
454 		GVE_IFACE_LOCK_UNLOCK(priv->gve_iface_lock);
455 		break;
456 
457 	case SIOCSIFFLAGS:
458 		if ((if_getflags(ifp) & IFF_UP) != 0) {
459 			if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) {
460 				GVE_IFACE_LOCK_LOCK(priv->gve_iface_lock);
461 				rc = gve_up(priv);
462 				GVE_IFACE_LOCK_UNLOCK(priv->gve_iface_lock);
463 			}
464 		} else {
465 			if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) != 0) {
466 				GVE_IFACE_LOCK_LOCK(priv->gve_iface_lock);
467 				gve_down(priv);
468 				GVE_IFACE_LOCK_UNLOCK(priv->gve_iface_lock);
469 			}
470 		}
471 		break;
472 
473 	case SIOCSIFCAP:
474 		if (ifr->ifr_reqcap == if_getcapenable(ifp))
475 			break;
476 		GVE_IFACE_LOCK_LOCK(priv->gve_iface_lock);
477 		gve_down(priv);
478 		if_setcapenable(ifp, ifr->ifr_reqcap);
479 		rc = gve_up(priv);
480 		GVE_IFACE_LOCK_UNLOCK(priv->gve_iface_lock);
481 		break;
482 
483 	case SIOCSIFMEDIA:
484 		/* FALLTHROUGH */
485 	case SIOCGIFMEDIA:
486 		rc = ifmedia_ioctl(ifp, ifr, &priv->media, command);
487 		break;
488 
489 	default:
490 		rc = ether_ioctl(ifp, command, data);
491 		break;
492 	}
493 
494 	return (rc);
495 }
496 
497 static int
498 gve_media_change(if_t ifp)
499 {
500 	struct gve_priv *priv = if_getsoftc(ifp);
501 
502 	device_printf(priv->dev, "Media change not supported\n");
503 	return (0);
504 }
505 
506 static void
507 gve_media_status(if_t ifp, struct ifmediareq *ifmr)
508 {
509 	struct gve_priv *priv = if_getsoftc(ifp);
510 
511 	GVE_IFACE_LOCK_LOCK(priv->gve_iface_lock);
512 
513 	ifmr->ifm_status = IFM_AVALID;
514 	ifmr->ifm_active = IFM_ETHER;
515 
516 	if (gve_get_state_flag(priv, GVE_STATE_FLAG_LINK_UP)) {
517 		ifmr->ifm_status |= IFM_ACTIVE;
518 		ifmr->ifm_active |= IFM_AUTO;
519 	} else {
520 		ifmr->ifm_active |= IFM_NONE;
521 	}
522 
523 	GVE_IFACE_LOCK_UNLOCK(priv->gve_iface_lock);
524 }
525 
526 static uint64_t
527 gve_get_counter(if_t ifp, ift_counter cnt)
528 {
529 	struct gve_priv *priv;
530 	uint64_t rpackets = 0;
531 	uint64_t tpackets = 0;
532 	uint64_t rbytes = 0;
533 	uint64_t tbytes = 0;
534 	uint64_t rx_dropped_pkt = 0;
535 	uint64_t tx_dropped_pkt = 0;
536 
537 	priv = if_getsoftc(ifp);
538 
539 	gve_accum_stats(priv, &rpackets, &rbytes, &rx_dropped_pkt, &tpackets,
540 	    &tbytes, &tx_dropped_pkt);
541 
542 	switch (cnt) {
543 	case IFCOUNTER_IPACKETS:
544 		return (rpackets);
545 
546 	case IFCOUNTER_OPACKETS:
547 		return (tpackets);
548 
549 	case IFCOUNTER_IBYTES:
550 		return (rbytes);
551 
552 	case IFCOUNTER_OBYTES:
553 		return (tbytes);
554 
555 	case IFCOUNTER_IQDROPS:
556 		return (rx_dropped_pkt);
557 
558 	case IFCOUNTER_OQDROPS:
559 		return (tx_dropped_pkt);
560 
561 	default:
562 		return (if_get_counter_default(ifp, cnt));
563 	}
564 }
565 
566 static void
567 gve_setup_ifnet(device_t dev, struct gve_priv *priv)
568 {
569 	int caps = 0;
570 	if_t ifp;
571 
572 	ifp = priv->ifp = if_alloc(IFT_ETHER);
573 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
574 	if_setsoftc(ifp, priv);
575 	if_setdev(ifp, dev);
576 	if_setinitfn(ifp, gve_init);
577 	if_setioctlfn(ifp, gve_ioctl);
578 	if_settransmitfn(ifp, gve_xmit_ifp);
579 	if_setqflushfn(ifp, gve_qflush);
580 
581 	/*
582 	 * Set TSO limits, must match the arguments to bus_dma_tag_create
583 	 * when creating tx->dqo.buf_dmatag. Only applies to the RDA mode
584 	 * because in QPL we copy the entire packet into the bounce buffer
585 	 * and thus it does not matter how fragmented the mbuf is.
586 	 */
587 	if (!gve_is_gqi(priv) && !gve_is_qpl(priv)) {
588 		if_sethwtsomaxsegcount(ifp, GVE_TX_MAX_DATA_DESCS_DQO);
589 		if_sethwtsomaxsegsize(ifp, GVE_TX_MAX_BUF_SIZE_DQO);
590 	}
591 	if_sethwtsomax(ifp, GVE_TSO_MAXSIZE_DQO);
592 
593 #if __FreeBSD_version >= 1400086
594 	if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST);
595 #else
596 	if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST | IFF_KNOWSEPOCH);
597 #endif
598 
599 	ifmedia_init(&priv->media, IFM_IMASK, gve_media_change, gve_media_status);
600 	if_setgetcounterfn(ifp, gve_get_counter);
601 
602 	caps = IFCAP_RXCSUM |
603 	       IFCAP_TXCSUM |
604 	       IFCAP_TXCSUM_IPV6 |
605 	       IFCAP_TSO |
606 	       IFCAP_LRO;
607 
608 	if ((priv->supported_features & GVE_SUP_JUMBO_FRAMES_MASK) != 0)
609 		caps |= IFCAP_JUMBO_MTU;
610 
611 	if_setcapabilities(ifp, caps);
612 	if_setcapenable(ifp, caps);
613 
614 	if (bootverbose)
615 		device_printf(priv->dev, "Setting initial MTU to %d\n", priv->max_mtu);
616 	if_setmtu(ifp, priv->max_mtu);
617 
618 	ether_ifattach(ifp, priv->mac);
619 
620 	ifmedia_add(&priv->media, IFM_ETHER | IFM_AUTO, 0, NULL);
621 	ifmedia_set(&priv->media, IFM_ETHER | IFM_AUTO);
622 }
623 
624 static int
625 gve_alloc_counter_array(struct gve_priv *priv)
626 {
627 	int err;
628 
629 	err = gve_dma_alloc_coherent(priv, sizeof(uint32_t) * priv->num_event_counters,
630 	    PAGE_SIZE, &priv->counter_array_mem);
631 	if (err != 0)
632 		return (err);
633 
634 	priv->counters = priv->counter_array_mem.cpu_addr;
635 	return (0);
636 }
637 
638 static void
639 gve_free_counter_array(struct gve_priv *priv)
640 {
641 	if (priv->counters != NULL)
642 		gve_dma_free_coherent(&priv->counter_array_mem);
643 	priv->counter_array_mem = (struct gve_dma_handle){};
644 }
645 
646 static int
647 gve_alloc_irq_db_array(struct gve_priv *priv)
648 {
649 	int err;
650 
651 	err = gve_dma_alloc_coherent(priv,
652 	    sizeof(struct gve_irq_db) * (priv->num_queues), PAGE_SIZE,
653 	    &priv->irqs_db_mem);
654 	if (err != 0)
655 		return (err);
656 
657 	priv->irq_db_indices = priv->irqs_db_mem.cpu_addr;
658 	return (0);
659 }
660 
661 static void
662 gve_free_irq_db_array(struct gve_priv *priv)
663 {
664 	if (priv->irq_db_indices != NULL)
665 		gve_dma_free_coherent(&priv->irqs_db_mem);
666 	priv->irqs_db_mem = (struct gve_dma_handle){};
667 }
668 
669 static void
670 gve_free_rings(struct gve_priv *priv)
671 {
672 	gve_free_irqs(priv);
673 
674 	gve_free_tx_rings(priv, 0, priv->tx_cfg.num_queues);
675 	free(priv->tx, M_GVE);
676 	priv->tx = NULL;
677 
678 	gve_free_rx_rings(priv, 0, priv->rx_cfg.num_queues);
679 	free(priv->rx, M_GVE);
680 	priv->rx = NULL;
681 }
682 
683 static int
684 gve_alloc_rings(struct gve_priv *priv)
685 {
686 	int err;
687 
688 	priv->rx = malloc(sizeof(struct gve_rx_ring) * priv->rx_cfg.max_queues,
689 	    M_GVE, M_WAITOK | M_ZERO);
690 	err = gve_alloc_rx_rings(priv, 0, priv->rx_cfg.num_queues);
691 	if (err != 0)
692 		goto abort;
693 
694 	priv->tx = malloc(sizeof(struct gve_tx_ring) * priv->tx_cfg.max_queues,
695 	    M_GVE, M_WAITOK | M_ZERO);
696 	err = gve_alloc_tx_rings(priv, 0, priv->tx_cfg.num_queues);
697 	if (err != 0)
698 		goto abort;
699 
700 	err = gve_alloc_irqs(priv);
701 	if (err != 0)
702 		goto abort;
703 
704 	return (0);
705 
706 abort:
707 	gve_free_rings(priv);
708 	return (err);
709 }
710 
711 static void
712 gve_deconfigure_and_free_device_resources(struct gve_priv *priv)
713 {
714 	int err;
715 
716 	if (gve_get_state_flag(priv, GVE_STATE_FLAG_RESOURCES_OK)) {
717 		err = gve_adminq_deconfigure_device_resources(priv);
718 		if (err != 0) {
719 			device_printf(priv->dev, "Failed to deconfigure device resources: err=%d\n",
720 			    err);
721 			return;
722 		}
723 		if (bootverbose)
724 			device_printf(priv->dev, "Deconfigured device resources\n");
725 		gve_clear_state_flag(priv, GVE_STATE_FLAG_RESOURCES_OK);
726 	}
727 
728 	gve_free_irq_db_array(priv);
729 	gve_free_counter_array(priv);
730 
731 	if (priv->ptype_lut_dqo) {
732 		free(priv->ptype_lut_dqo, M_GVE);
733 		priv->ptype_lut_dqo = NULL;
734 	}
735 }
736 
737 static int
738 gve_alloc_and_configure_device_resources(struct gve_priv *priv)
739 {
740 	int err;
741 
742 	if (gve_get_state_flag(priv, GVE_STATE_FLAG_RESOURCES_OK))
743 		return (0);
744 
745 	err = gve_alloc_counter_array(priv);
746 	if (err != 0)
747 		return (err);
748 
749 	err = gve_alloc_irq_db_array(priv);
750 	if (err != 0)
751 		goto abort;
752 
753 	err = gve_adminq_configure_device_resources(priv);
754 	if (err != 0) {
755 		device_printf(priv->dev, "Failed to configure device resources: err=%d\n",
756 			      err);
757 		err = (ENXIO);
758 		goto abort;
759 	}
760 
761 	if (!gve_is_gqi(priv)) {
762 		priv->ptype_lut_dqo = malloc(sizeof(*priv->ptype_lut_dqo), M_GVE,
763 		    M_WAITOK | M_ZERO);
764 
765 		err = gve_adminq_get_ptype_map_dqo(priv, priv->ptype_lut_dqo);
766 		if (err != 0) {
767 			device_printf(priv->dev, "Failed to configure ptype lut: err=%d\n",
768 			    err);
769 			goto abort;
770 		}
771 	}
772 
773 	gve_set_state_flag(priv, GVE_STATE_FLAG_RESOURCES_OK);
774 	if (bootverbose)
775 		device_printf(priv->dev, "Configured device resources\n");
776 	return (0);
777 
778 abort:
779 	gve_deconfigure_and_free_device_resources(priv);
780 	return (err);
781 }
782 
783 static void
784 gve_set_queue_cnts(struct gve_priv *priv)
785 {
786 	priv->tx_cfg.max_queues = gve_reg_bar_read_4(priv, MAX_TX_QUEUES);
787 	priv->rx_cfg.max_queues = gve_reg_bar_read_4(priv, MAX_RX_QUEUES);
788 	priv->tx_cfg.num_queues = priv->tx_cfg.max_queues;
789 	priv->rx_cfg.num_queues = priv->rx_cfg.max_queues;
790 
791 	if (priv->default_num_queues > 0) {
792 		priv->tx_cfg.num_queues = MIN(priv->default_num_queues,
793 		    priv->tx_cfg.num_queues);
794 		priv->rx_cfg.num_queues = MIN(priv->default_num_queues,
795 		    priv->rx_cfg.num_queues);
796 	}
797 
798 	priv->num_queues = priv->tx_cfg.max_queues + priv->rx_cfg.max_queues;
799 	priv->mgmt_msix_idx = priv->num_queues;
800 }
801 
802 static int
803 gve_alloc_adminq_and_describe_device(struct gve_priv *priv)
804 {
805 	int err;
806 
807 	if ((err = gve_adminq_alloc(priv)) != 0)
808 		return (err);
809 
810 	if ((err = gve_verify_driver_compatibility(priv)) != 0) {
811 		device_printf(priv->dev,
812 		    "Failed to verify driver compatibility: err=%d\n", err);
813 		goto abort;
814 	}
815 
816 	if ((err = gve_adminq_describe_device(priv)) != 0)
817 		goto abort;
818 
819 	gve_set_queue_cnts(priv);
820 
821 	priv->num_registered_pages = 0;
822 	return (0);
823 
824 abort:
825 	gve_release_adminq(priv);
826 	return (err);
827 }
828 
829 void
830 gve_schedule_reset(struct gve_priv *priv)
831 {
832 	if (gve_get_state_flag(priv, GVE_STATE_FLAG_IN_RESET))
833 		return;
834 
835 	device_printf(priv->dev, "Scheduling reset task!\n");
836 	gve_set_state_flag(priv, GVE_STATE_FLAG_DO_RESET);
837 	taskqueue_enqueue(priv->service_tq, &priv->service_task);
838 }
839 
840 static void
841 gve_destroy(struct gve_priv *priv)
842 {
843 	gve_down(priv);
844 	gve_deconfigure_and_free_device_resources(priv);
845 	gve_release_adminq(priv);
846 }
847 
848 static void
849 gve_restore(struct gve_priv *priv)
850 {
851 	int err;
852 
853 	err = gve_adminq_alloc(priv);
854 	if (err != 0)
855 		goto abort;
856 
857 	err = gve_adminq_configure_device_resources(priv);
858 	if (err != 0) {
859 		device_printf(priv->dev, "Failed to configure device resources: err=%d\n",
860 		    err);
861 		err = (ENXIO);
862 		goto abort;
863 	}
864 	if (!gve_is_gqi(priv)) {
865 		err = gve_adminq_get_ptype_map_dqo(priv, priv->ptype_lut_dqo);
866 		if (err != 0) {
867 			device_printf(priv->dev, "Failed to configure ptype lut: err=%d\n",
868 			    err);
869 			goto abort;
870 		}
871 	}
872 
873 	err = gve_up(priv);
874 	if (err != 0)
875 		goto abort;
876 
877 	return;
878 
879 abort:
880 	device_printf(priv->dev, "Restore failed!\n");
881 	return;
882 }
883 
884 static void
885 gve_clear_device_resources(struct gve_priv *priv)
886 {
887 	int i;
888 
889 	for (i = 0; i < priv->num_event_counters; i++)
890 		priv->counters[i] = 0;
891 	bus_dmamap_sync(priv->counter_array_mem.tag, priv->counter_array_mem.map,
892 	    BUS_DMASYNC_PREWRITE);
893 
894 	for (i = 0; i < priv->num_queues; i++)
895 		priv->irq_db_indices[i] = (struct gve_irq_db){};
896 	bus_dmamap_sync(priv->irqs_db_mem.tag, priv->irqs_db_mem.map,
897 	    BUS_DMASYNC_PREWRITE);
898 
899 	if (priv->ptype_lut_dqo)
900 		*priv->ptype_lut_dqo = (struct gve_ptype_lut){0};
901 }
902 
903 static void
904 gve_handle_reset(struct gve_priv *priv)
905 {
906 	if (!gve_get_state_flag(priv, GVE_STATE_FLAG_DO_RESET))
907 		return;
908 
909 	gve_clear_state_flag(priv, GVE_STATE_FLAG_DO_RESET);
910 	gve_set_state_flag(priv, GVE_STATE_FLAG_IN_RESET);
911 
912 	GVE_IFACE_LOCK_LOCK(priv->gve_iface_lock);
913 
914 	if_setdrvflagbits(priv->ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
915 	if_link_state_change(priv->ifp, LINK_STATE_DOWN);
916 	gve_clear_state_flag(priv, GVE_STATE_FLAG_LINK_UP);
917 
918 	/*
919 	 * Releasing the adminq causes the NIC to destroy all resources
920 	 * registered with it, so by clearing the flags beneath we cause
921 	 * the subsequent gve_down call below to not attempt to tell the
922 	 * NIC to destroy these resources again.
923 	 *
924 	 * The call to gve_down is needed in the first place to refresh
925 	 * the state and the DMA-able memory within each driver ring.
926 	 */
927 	gve_release_adminq(priv);
928 	gve_clear_state_flag(priv, GVE_STATE_FLAG_RESOURCES_OK);
929 	gve_clear_state_flag(priv, GVE_STATE_FLAG_QPLREG_OK);
930 	gve_clear_state_flag(priv, GVE_STATE_FLAG_RX_RINGS_OK);
931 	gve_clear_state_flag(priv, GVE_STATE_FLAG_TX_RINGS_OK);
932 
933 	gve_down(priv);
934 	gve_clear_device_resources(priv);
935 
936 	gve_restore(priv);
937 
938 	GVE_IFACE_LOCK_UNLOCK(priv->gve_iface_lock);
939 
940 	priv->reset_cnt++;
941 	gve_clear_state_flag(priv, GVE_STATE_FLAG_IN_RESET);
942 }
943 
944 static void
945 gve_handle_link_status(struct gve_priv *priv)
946 {
947 	uint32_t status = gve_reg_bar_read_4(priv, DEVICE_STATUS);
948 	bool link_up = status & GVE_DEVICE_STATUS_LINK_STATUS;
949 
950 	if (link_up == gve_get_state_flag(priv, GVE_STATE_FLAG_LINK_UP))
951 		return;
952 
953 	if (link_up) {
954 		if (bootverbose)
955 			device_printf(priv->dev, "Device link is up.\n");
956 		if_link_state_change(priv->ifp, LINK_STATE_UP);
957 		gve_set_state_flag(priv, GVE_STATE_FLAG_LINK_UP);
958 	} else {
959 		device_printf(priv->dev, "Device link is down.\n");
960 		if_link_state_change(priv->ifp, LINK_STATE_DOWN);
961 		gve_clear_state_flag(priv, GVE_STATE_FLAG_LINK_UP);
962 	}
963 }
964 
965 static void
966 gve_service_task(void *arg, int pending)
967 {
968 	struct gve_priv *priv = (struct gve_priv *)arg;
969 	uint32_t status = gve_reg_bar_read_4(priv, DEVICE_STATUS);
970 
971 	if (((GVE_DEVICE_STATUS_RESET_MASK & status) != 0) &&
972 	    !gve_get_state_flag(priv, GVE_STATE_FLAG_IN_RESET)) {
973 		device_printf(priv->dev, "Device requested reset\n");
974 		gve_set_state_flag(priv, GVE_STATE_FLAG_DO_RESET);
975 	}
976 
977 	gve_handle_reset(priv);
978 	gve_handle_link_status(priv);
979 }
980 
981 static int
982 gve_probe(device_t dev)
983 {
984 	uint16_t deviceid, vendorid;
985 	int i;
986 
987 	vendorid = pci_get_vendor(dev);
988 	deviceid = pci_get_device(dev);
989 
990 	for (i = 0; i < nitems(gve_devs); i++) {
991 		if (vendorid == gve_devs[i].vendor_id &&
992 		    deviceid == gve_devs[i].device_id) {
993 			device_set_desc(dev, gve_devs[i].name);
994 			return (BUS_PROBE_DEFAULT);
995 		}
996 	}
997 	return (ENXIO);
998 }
999 
1000 static void
1001 gve_free_sys_res_mem(struct gve_priv *priv)
1002 {
1003 	if (priv->msix_table != NULL)
1004 		bus_release_resource(priv->dev, SYS_RES_MEMORY,
1005 		    rman_get_rid(priv->msix_table), priv->msix_table);
1006 
1007 	if (priv->db_bar != NULL)
1008 		bus_release_resource(priv->dev, SYS_RES_MEMORY,
1009 		    rman_get_rid(priv->db_bar), priv->db_bar);
1010 
1011 	if (priv->reg_bar != NULL)
1012 		bus_release_resource(priv->dev, SYS_RES_MEMORY,
1013 		    rman_get_rid(priv->reg_bar), priv->reg_bar);
1014 }
1015 
1016 static int
1017 gve_attach(device_t dev)
1018 {
1019 	struct gve_priv *priv;
1020 	int rid;
1021 	int err;
1022 
1023 	snprintf(gve_version, sizeof(gve_version), "%d.%d.%d",
1024 	    GVE_VERSION_MAJOR, GVE_VERSION_MINOR, GVE_VERSION_SUB);
1025 
1026 	priv = device_get_softc(dev);
1027 	priv->dev = dev;
1028 	GVE_IFACE_LOCK_INIT(priv->gve_iface_lock);
1029 
1030 	pci_enable_busmaster(dev);
1031 
1032 	rid = PCIR_BAR(GVE_REGISTER_BAR);
1033 	priv->reg_bar = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
1034 	    &rid, RF_ACTIVE);
1035 	if (priv->reg_bar == NULL) {
1036 		device_printf(dev, "Failed to allocate BAR0\n");
1037 		err = ENXIO;
1038 		goto abort;
1039 	}
1040 
1041 	rid = PCIR_BAR(GVE_DOORBELL_BAR);
1042 	priv->db_bar = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
1043 	    &rid, RF_ACTIVE);
1044 	if (priv->db_bar == NULL) {
1045 		device_printf(dev, "Failed to allocate BAR2\n");
1046 		err = ENXIO;
1047 		goto abort;
1048 	}
1049 
1050 	rid = pci_msix_table_bar(priv->dev);
1051 	priv->msix_table = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
1052 	    &rid, RF_ACTIVE);
1053 	if (priv->msix_table == NULL) {
1054 		device_printf(dev, "Failed to allocate msix table\n");
1055 		err = ENXIO;
1056 		goto abort;
1057 	}
1058 
1059 	err = gve_alloc_adminq_and_describe_device(priv);
1060 	if (err != 0)
1061 		goto abort;
1062 
1063 	err = gve_alloc_and_configure_device_resources(priv);
1064 	if (err != 0)
1065 		goto abort;
1066 
1067 	err = gve_alloc_rings(priv);
1068 	if (err != 0)
1069 		goto abort;
1070 
1071 	gve_setup_ifnet(dev, priv);
1072 
1073 	priv->rx_copybreak = GVE_DEFAULT_RX_COPYBREAK;
1074 
1075 	bus_write_multi_1(priv->reg_bar, DRIVER_VERSION, GVE_DRIVER_VERSION,
1076 	    sizeof(GVE_DRIVER_VERSION) - 1);
1077 
1078 	TASK_INIT(&priv->service_task, 0, gve_service_task, priv);
1079 	priv->service_tq = taskqueue_create("gve service", M_WAITOK | M_ZERO,
1080 	    taskqueue_thread_enqueue, &priv->service_tq);
1081 	taskqueue_start_threads(&priv->service_tq, 1, PI_NET, "%s service tq",
1082 	    device_get_nameunit(priv->dev));
1083 
1084         gve_setup_sysctl(priv);
1085 
1086 	if (bootverbose)
1087 		device_printf(priv->dev, "Successfully attached %s", GVE_DRIVER_VERSION);
1088 	return (0);
1089 
1090 abort:
1091 	gve_free_rings(priv);
1092 	gve_deconfigure_and_free_device_resources(priv);
1093 	gve_release_adminq(priv);
1094 	gve_free_sys_res_mem(priv);
1095 	GVE_IFACE_LOCK_DESTROY(priv->gve_iface_lock);
1096 	return (err);
1097 }
1098 
1099 static int
1100 gve_detach(device_t dev)
1101 {
1102 	struct gve_priv *priv = device_get_softc(dev);
1103 	if_t ifp = priv->ifp;
1104 	int error;
1105 
1106 	error = bus_generic_detach(dev);
1107 	if (error != 0)
1108 		return (error);
1109 
1110 	ether_ifdetach(ifp);
1111 
1112 	GVE_IFACE_LOCK_LOCK(priv->gve_iface_lock);
1113 	gve_destroy(priv);
1114 	GVE_IFACE_LOCK_UNLOCK(priv->gve_iface_lock);
1115 
1116 	gve_free_rings(priv);
1117 	gve_free_sys_res_mem(priv);
1118 	GVE_IFACE_LOCK_DESTROY(priv->gve_iface_lock);
1119 
1120 	while (taskqueue_cancel(priv->service_tq, &priv->service_task, NULL))
1121 		taskqueue_drain(priv->service_tq, &priv->service_task);
1122 	taskqueue_free(priv->service_tq);
1123 
1124 	if_free(ifp);
1125 	return (0);
1126 }
1127 
1128 static device_method_t gve_methods[] = {
1129 	DEVMETHOD(device_probe, gve_probe),
1130 	DEVMETHOD(device_attach, gve_attach),
1131 	DEVMETHOD(device_detach, gve_detach),
1132 	DEVMETHOD_END
1133 };
1134 
1135 static driver_t gve_driver = {
1136 	"gve",
1137 	gve_methods,
1138 	sizeof(struct gve_priv)
1139 };
1140 
1141 #if __FreeBSD_version < 1301503
1142 static devclass_t gve_devclass;
1143 
1144 DRIVER_MODULE(gve, pci, gve_driver, gve_devclass, 0, 0);
1145 #else
1146 DRIVER_MODULE(gve, pci, gve_driver, 0, 0);
1147 #endif
1148 MODULE_PNP_INFO("U16:vendor;U16:device;D:#", pci, gve, gve_devs,
1149     nitems(gve_devs));
1150