1 /*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (c) 2023-2024 Google LLC
5 *
6 * Redistribution and use in source and binary forms, with or without modification,
7 * are permitted provided that the following conditions are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright notice, this
10 * list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright notice,
13 * this list of conditions and the following disclaimer in the documentation
14 * and/or other materials provided with the distribution.
15 *
16 * 3. Neither the name of the copyright holder nor the names of its contributors
17 * may be used to endorse or promote products derived from this software without
18 * specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
22 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
24 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
25 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
27 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
29 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 */
31 #include "gve.h"
32 #include "gve_adminq.h"
33 #include "gve_dqo.h"
34
35 #define GVE_DRIVER_VERSION "GVE-FBSD-1.3.4\n"
36 #define GVE_VERSION_MAJOR 1
37 #define GVE_VERSION_MINOR 3
38 #define GVE_VERSION_SUB 5
39
40 #define GVE_DEFAULT_RX_COPYBREAK 256
41
42 /* Devices supported by this driver. */
43 static struct gve_dev {
44 uint16_t vendor_id;
45 uint16_t device_id;
46 const char *name;
47 } gve_devs[] = {
48 { PCI_VENDOR_ID_GOOGLE, PCI_DEV_ID_GVNIC, "gVNIC" }
49 };
50
51 struct sx gve_global_lock;
52
53 static void gve_start_tx_timeout_service(struct gve_priv *priv);
54 static void gve_stop_tx_timeout_service(struct gve_priv *priv);
55
56 static int
gve_verify_driver_compatibility(struct gve_priv * priv)57 gve_verify_driver_compatibility(struct gve_priv *priv)
58 {
59 int err;
60 struct gve_driver_info *driver_info;
61 struct gve_dma_handle driver_info_mem;
62
63 err = gve_dma_alloc_coherent(priv, sizeof(struct gve_driver_info),
64 PAGE_SIZE, &driver_info_mem);
65
66 if (err != 0)
67 return (ENOMEM);
68
69 driver_info = driver_info_mem.cpu_addr;
70
71 *driver_info = (struct gve_driver_info) {
72 .os_type = 3, /* Freebsd */
73 .driver_major = GVE_VERSION_MAJOR,
74 .driver_minor = GVE_VERSION_MINOR,
75 .driver_sub = GVE_VERSION_SUB,
76 .os_version_major = htobe32(FBSD_VERSION_MAJOR),
77 .os_version_minor = htobe32(FBSD_VERSION_MINOR),
78 .os_version_sub = htobe32(FBSD_VERSION_PATCH),
79 .driver_capability_flags = {
80 htobe64(GVE_DRIVER_CAPABILITY_FLAGS1),
81 htobe64(GVE_DRIVER_CAPABILITY_FLAGS2),
82 htobe64(GVE_DRIVER_CAPABILITY_FLAGS3),
83 htobe64(GVE_DRIVER_CAPABILITY_FLAGS4),
84 },
85 };
86
87 snprintf(driver_info->os_version_str1, sizeof(driver_info->os_version_str1),
88 "FreeBSD %u", __FreeBSD_version);
89
90 bus_dmamap_sync(driver_info_mem.tag, driver_info_mem.map,
91 BUS_DMASYNC_PREREAD);
92
93 err = gve_adminq_verify_driver_compatibility(priv,
94 sizeof(struct gve_driver_info), driver_info_mem.bus_addr);
95
96 /* It's ok if the device doesn't support this */
97 if (err == EOPNOTSUPP)
98 err = 0;
99
100 gve_dma_free_coherent(&driver_info_mem);
101
102 return (err);
103 }
104
105 static void
gve_handle_tx_timeout(struct gve_priv * priv,struct gve_tx_ring * tx,int num_timeout_pkts)106 gve_handle_tx_timeout(struct gve_priv *priv, struct gve_tx_ring *tx,
107 int num_timeout_pkts)
108 {
109 int64_t time_since_last_kick;
110
111 counter_u64_add_protected(tx->stats.tx_timeout, 1);
112
113 /* last_kicked is never GVE_TIMESTAMP_INVALID so we can skip checking */
114 time_since_last_kick = gve_seconds_since(&tx->last_kicked);
115
116 /* Try kicking first in case the timeout is due to a missed interrupt */
117 if (time_since_last_kick > GVE_TX_TIMEOUT_KICK_COOLDOWN_SEC) {
118 device_printf(priv->dev,
119 "Found %d timed out packet(s) on txq%d, kicking it for completions\n",
120 num_timeout_pkts, tx->com.id);
121 gve_set_timestamp(&tx->last_kicked);
122 taskqueue_enqueue(tx->com.cleanup_tq, &tx->com.cleanup_task);
123 } else {
124 device_printf(priv->dev,
125 "Found %d timed out packet(s) on txq%d with its last kick %jd sec ago which is less than the cooldown period %d. Resetting device\n",
126 num_timeout_pkts, tx->com.id,
127 (intmax_t)time_since_last_kick,
128 GVE_TX_TIMEOUT_KICK_COOLDOWN_SEC);
129 gve_schedule_reset(priv);
130 }
131 }
132
133 static void
gve_tx_timeout_service_callback(void * data)134 gve_tx_timeout_service_callback(void *data)
135 {
136 struct gve_priv *priv = (struct gve_priv *)data;
137 struct gve_tx_ring *tx;
138 uint16_t num_timeout_pkts;
139
140 tx = &priv->tx[priv->check_tx_queue_idx];
141
142 num_timeout_pkts = gve_is_gqi(priv) ?
143 gve_check_tx_timeout_gqi(priv, tx) :
144 gve_check_tx_timeout_dqo(priv, tx);
145 if (num_timeout_pkts)
146 gve_handle_tx_timeout(priv, tx, num_timeout_pkts);
147
148 priv->check_tx_queue_idx = (priv->check_tx_queue_idx + 1) %
149 priv->tx_cfg.num_queues;
150 callout_reset_sbt(&priv->tx_timeout_service,
151 SBT_1S * GVE_TX_TIMEOUT_CHECK_CADENCE_SEC, 0,
152 gve_tx_timeout_service_callback, (void *)priv, 0);
153 }
154
155 static void
gve_start_tx_timeout_service(struct gve_priv * priv)156 gve_start_tx_timeout_service(struct gve_priv *priv)
157 {
158 priv->check_tx_queue_idx = 0;
159 callout_init(&priv->tx_timeout_service, true);
160 callout_reset_sbt(&priv->tx_timeout_service,
161 SBT_1S * GVE_TX_TIMEOUT_CHECK_CADENCE_SEC, 0,
162 gve_tx_timeout_service_callback, (void *)priv, 0);
163 }
164
165 static void
gve_stop_tx_timeout_service(struct gve_priv * priv)166 gve_stop_tx_timeout_service(struct gve_priv *priv)
167 {
168 callout_drain(&priv->tx_timeout_service);
169 }
170
171 static int
gve_up(struct gve_priv * priv)172 gve_up(struct gve_priv *priv)
173 {
174 if_t ifp = priv->ifp;
175 int err;
176
177 GVE_IFACE_LOCK_ASSERT(priv->gve_iface_lock);
178
179 if (device_is_attached(priv->dev) == 0) {
180 device_printf(priv->dev, "Cannot bring the iface up when detached\n");
181 return (ENXIO);
182 }
183
184 if (gve_get_state_flag(priv, GVE_STATE_FLAG_QUEUES_UP))
185 return (0);
186
187 if_clearhwassist(ifp);
188 if (if_getcapenable(ifp) & IFCAP_TXCSUM)
189 if_sethwassistbits(ifp, CSUM_TCP | CSUM_UDP, 0);
190 if (if_getcapenable(ifp) & IFCAP_TXCSUM_IPV6)
191 if_sethwassistbits(ifp, CSUM_IP6_TCP | CSUM_IP6_UDP, 0);
192 if (if_getcapenable(ifp) & IFCAP_TSO4)
193 if_sethwassistbits(ifp, CSUM_IP_TSO, 0);
194 if (if_getcapenable(ifp) & IFCAP_TSO6)
195 if_sethwassistbits(ifp, CSUM_IP6_TSO, 0);
196
197 if (gve_is_qpl(priv)) {
198 err = gve_register_qpls(priv);
199 if (err != 0)
200 goto reset;
201 }
202
203 err = gve_create_rx_rings(priv);
204 if (err != 0)
205 goto reset;
206
207 err = gve_create_tx_rings(priv);
208 if (err != 0)
209 goto reset;
210
211 if_setdrvflagbits(ifp, IFF_DRV_RUNNING, IFF_DRV_OACTIVE);
212
213 if (!gve_get_state_flag(priv, GVE_STATE_FLAG_LINK_UP)) {
214 if_link_state_change(ifp, LINK_STATE_UP);
215 gve_set_state_flag(priv, GVE_STATE_FLAG_LINK_UP);
216 }
217
218 gve_unmask_all_queue_irqs(priv);
219 gve_set_state_flag(priv, GVE_STATE_FLAG_QUEUES_UP);
220 priv->interface_up_cnt++;
221
222 gve_start_tx_timeout_service(priv);
223
224 return (0);
225
226 reset:
227 gve_schedule_reset(priv);
228 return (err);
229 }
230
231 static void
gve_down(struct gve_priv * priv)232 gve_down(struct gve_priv *priv)
233 {
234 GVE_IFACE_LOCK_ASSERT(priv->gve_iface_lock);
235
236 if (!gve_get_state_flag(priv, GVE_STATE_FLAG_QUEUES_UP))
237 return;
238
239 gve_stop_tx_timeout_service(priv);
240
241 if (gve_get_state_flag(priv, GVE_STATE_FLAG_LINK_UP)) {
242 if_link_state_change(priv->ifp, LINK_STATE_DOWN);
243 gve_clear_state_flag(priv, GVE_STATE_FLAG_LINK_UP);
244 }
245
246 if_setdrvflagbits(priv->ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
247
248 if (gve_destroy_rx_rings(priv) != 0)
249 goto reset;
250
251 if (gve_destroy_tx_rings(priv) != 0)
252 goto reset;
253
254 if (gve_is_qpl(priv)) {
255 if (gve_unregister_qpls(priv) != 0)
256 goto reset;
257 }
258
259 if (gve_is_gqi(priv))
260 gve_mask_all_queue_irqs(priv);
261 gve_clear_state_flag(priv, GVE_STATE_FLAG_QUEUES_UP);
262 priv->interface_down_cnt++;
263 return;
264
265 reset:
266 gve_schedule_reset(priv);
267 }
268
269 int
gve_adjust_rx_queues(struct gve_priv * priv,uint16_t new_queue_cnt)270 gve_adjust_rx_queues(struct gve_priv *priv, uint16_t new_queue_cnt)
271 {
272 int err;
273
274 GVE_IFACE_LOCK_ASSERT(priv->gve_iface_lock);
275
276 gve_down(priv);
277
278 if (new_queue_cnt < priv->rx_cfg.num_queues) {
279 /*
280 * Freeing a ring still preserves its ntfy_id,
281 * which is needed if we create the ring again.
282 */
283 gve_free_rx_rings(priv, new_queue_cnt, priv->rx_cfg.num_queues);
284 } else {
285 err = gve_alloc_rx_rings(priv, priv->rx_cfg.num_queues, new_queue_cnt);
286 if (err != 0) {
287 device_printf(priv->dev, "Failed to allocate new queues");
288 /* Failed to allocate rings, start back up with old ones */
289 gve_up(priv);
290 return (err);
291
292 }
293 }
294 priv->rx_cfg.num_queues = new_queue_cnt;
295
296 err = gve_up(priv);
297 if (err != 0)
298 gve_schedule_reset(priv);
299
300 return (err);
301 }
302
303 int
gve_adjust_tx_queues(struct gve_priv * priv,uint16_t new_queue_cnt)304 gve_adjust_tx_queues(struct gve_priv *priv, uint16_t new_queue_cnt)
305 {
306 int err;
307
308 GVE_IFACE_LOCK_ASSERT(priv->gve_iface_lock);
309
310 gve_down(priv);
311
312 if (new_queue_cnt < priv->tx_cfg.num_queues) {
313 /*
314 * Freeing a ring still preserves its ntfy_id,
315 * which is needed if we create the ring again.
316 */
317 gve_free_tx_rings(priv, new_queue_cnt, priv->tx_cfg.num_queues);
318 } else {
319 err = gve_alloc_tx_rings(priv, priv->tx_cfg.num_queues, new_queue_cnt);
320 if (err != 0) {
321 device_printf(priv->dev, "Failed to allocate new queues");
322 /* Failed to allocate rings, start back up with old ones */
323 gve_up(priv);
324 return (err);
325
326 }
327 }
328 priv->tx_cfg.num_queues = new_queue_cnt;
329
330 err = gve_up(priv);
331 if (err != 0)
332 gve_schedule_reset(priv);
333
334 return (err);
335 }
336
337 int
gve_adjust_ring_sizes(struct gve_priv * priv,uint16_t new_desc_cnt,bool is_rx)338 gve_adjust_ring_sizes(struct gve_priv *priv, uint16_t new_desc_cnt, bool is_rx)
339 {
340 int err;
341 uint16_t prev_desc_cnt;
342
343 GVE_IFACE_LOCK_ASSERT(priv->gve_iface_lock);
344
345 gve_down(priv);
346
347 if (is_rx) {
348 gve_free_rx_rings(priv, 0, priv->rx_cfg.num_queues);
349 prev_desc_cnt = priv->rx_desc_cnt;
350 priv->rx_desc_cnt = new_desc_cnt;
351 err = gve_alloc_rx_rings(priv, 0, priv->rx_cfg.num_queues);
352 if (err != 0) {
353 device_printf(priv->dev,
354 "Failed to allocate rings. Trying to start back up with previous ring size.");
355 priv->rx_desc_cnt = prev_desc_cnt;
356 err = gve_alloc_rx_rings(priv, 0, priv->rx_cfg.num_queues);
357 }
358 } else {
359 gve_free_tx_rings(priv, 0, priv->tx_cfg.num_queues);
360 prev_desc_cnt = priv->tx_desc_cnt;
361 priv->tx_desc_cnt = new_desc_cnt;
362 err = gve_alloc_tx_rings(priv, 0, priv->tx_cfg.num_queues);
363 if (err != 0) {
364 device_printf(priv->dev,
365 "Failed to allocate rings. Trying to start back up with previous ring size.");
366 priv->tx_desc_cnt = prev_desc_cnt;
367 err = gve_alloc_tx_rings(priv, 0, priv->tx_cfg.num_queues);
368 }
369 }
370
371 if (err != 0) {
372 device_printf(priv->dev, "Failed to allocate rings! Cannot start device back up!");
373 return (err);
374 }
375
376 err = gve_up(priv);
377 if (err != 0) {
378 gve_schedule_reset(priv);
379 return (err);
380 }
381
382 return (0);
383 }
384
385 static int
gve_get_dqo_rx_buf_size(struct gve_priv * priv,uint16_t mtu)386 gve_get_dqo_rx_buf_size(struct gve_priv *priv, uint16_t mtu)
387 {
388 /*
389 * Use 4k buffers only if mode is DQ, 4k buffers flag is on,
390 * and either hw LRO is enabled or mtu is greater than 2048
391 */
392 if (!gve_is_gqi(priv) && gve_allow_4k_rx_buffers &&
393 (!gve_disable_hw_lro || mtu > GVE_DEFAULT_RX_BUFFER_SIZE))
394 return (GVE_4K_RX_BUFFER_SIZE_DQO);
395
396 return (GVE_DEFAULT_RX_BUFFER_SIZE);
397 }
398
399 static int
gve_set_mtu(if_t ifp,uint32_t new_mtu)400 gve_set_mtu(if_t ifp, uint32_t new_mtu)
401 {
402 struct gve_priv *priv = if_getsoftc(ifp);
403 const uint32_t max_problem_range = 8227;
404 const uint32_t min_problem_range = 7822;
405 uint16_t new_rx_buf_size = gve_get_dqo_rx_buf_size(priv, new_mtu);
406 int err;
407
408 if ((new_mtu > priv->max_mtu) || (new_mtu < ETHERMIN)) {
409 device_printf(priv->dev, "Invalid new MTU setting. new mtu: %d max mtu: %d min mtu: %d\n",
410 new_mtu, priv->max_mtu, ETHERMIN);
411 return (EINVAL);
412 }
413
414 /*
415 * When hardware LRO is enabled in DQ mode, MTUs within the range
416 * [7822, 8227] trigger hardware issues which cause a drastic drop
417 * in throughput.
418 */
419 if (!gve_is_gqi(priv) && !gve_disable_hw_lro &&
420 new_mtu >= min_problem_range && new_mtu <= max_problem_range &&
421 new_rx_buf_size != GVE_4K_RX_BUFFER_SIZE_DQO) {
422 device_printf(priv->dev,
423 "Cannot set to MTU to %d within the range [%d, %d] while HW LRO is enabled and not using 4k RX Buffers\n",
424 new_mtu, min_problem_range, max_problem_range);
425 return (EINVAL);
426 }
427
428 err = gve_adminq_set_mtu(priv, new_mtu);
429 if (err == 0) {
430 if (bootverbose)
431 device_printf(priv->dev, "MTU set to %d\n", new_mtu);
432 if_setmtu(ifp, new_mtu);
433 /* Need to re-alloc RX queues if RX buffer size changed */
434 if (!gve_is_gqi(priv) &&
435 new_rx_buf_size != priv->rx_buf_size_dqo) {
436 gve_free_rx_rings(priv, 0, priv->rx_cfg.num_queues);
437 priv->rx_buf_size_dqo = new_rx_buf_size;
438 gve_alloc_rx_rings(priv, 0, priv->rx_cfg.num_queues);
439 }
440 } else {
441 device_printf(priv->dev, "Failed to set MTU to %d\n", new_mtu);
442 }
443
444 return (err);
445 }
446
447 static void
gve_init(void * arg)448 gve_init(void *arg)
449 {
450 struct gve_priv *priv = (struct gve_priv *)arg;
451
452 if (!gve_get_state_flag(priv, GVE_STATE_FLAG_QUEUES_UP)) {
453 GVE_IFACE_LOCK_LOCK(priv->gve_iface_lock);
454 gve_up(priv);
455 GVE_IFACE_LOCK_UNLOCK(priv->gve_iface_lock);
456 }
457 }
458
459 static int
gve_ioctl(if_t ifp,u_long command,caddr_t data)460 gve_ioctl(if_t ifp, u_long command, caddr_t data)
461 {
462 struct gve_priv *priv;
463 struct ifreq *ifr;
464 int rc = 0;
465
466 priv = if_getsoftc(ifp);
467 ifr = (struct ifreq *)data;
468
469 switch (command) {
470 case SIOCSIFMTU:
471 if (if_getmtu(ifp) == ifr->ifr_mtu)
472 break;
473 GVE_IFACE_LOCK_LOCK(priv->gve_iface_lock);
474 gve_down(priv);
475 gve_set_mtu(ifp, ifr->ifr_mtu);
476 rc = gve_up(priv);
477 GVE_IFACE_LOCK_UNLOCK(priv->gve_iface_lock);
478 break;
479
480 case SIOCSIFFLAGS:
481 if ((if_getflags(ifp) & IFF_UP) != 0) {
482 if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) {
483 GVE_IFACE_LOCK_LOCK(priv->gve_iface_lock);
484 rc = gve_up(priv);
485 GVE_IFACE_LOCK_UNLOCK(priv->gve_iface_lock);
486 }
487 } else {
488 if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) != 0) {
489 GVE_IFACE_LOCK_LOCK(priv->gve_iface_lock);
490 gve_down(priv);
491 GVE_IFACE_LOCK_UNLOCK(priv->gve_iface_lock);
492 }
493 }
494 break;
495
496 case SIOCSIFCAP:
497 if (ifr->ifr_reqcap == if_getcapenable(ifp))
498 break;
499 GVE_IFACE_LOCK_LOCK(priv->gve_iface_lock);
500 gve_down(priv);
501 if_setcapenable(ifp, ifr->ifr_reqcap);
502 rc = gve_up(priv);
503 GVE_IFACE_LOCK_UNLOCK(priv->gve_iface_lock);
504 break;
505
506 case SIOCSIFMEDIA:
507 /* FALLTHROUGH */
508 case SIOCGIFMEDIA:
509 rc = ifmedia_ioctl(ifp, ifr, &priv->media, command);
510 break;
511
512 default:
513 rc = ether_ioctl(ifp, command, data);
514 break;
515 }
516
517 return (rc);
518 }
519
520 static int
gve_media_change(if_t ifp)521 gve_media_change(if_t ifp)
522 {
523 struct gve_priv *priv = if_getsoftc(ifp);
524
525 device_printf(priv->dev, "Media change not supported\n");
526 return (0);
527 }
528
529 static void
gve_media_status(if_t ifp,struct ifmediareq * ifmr)530 gve_media_status(if_t ifp, struct ifmediareq *ifmr)
531 {
532 struct gve_priv *priv = if_getsoftc(ifp);
533
534 GVE_IFACE_LOCK_LOCK(priv->gve_iface_lock);
535
536 ifmr->ifm_status = IFM_AVALID;
537 ifmr->ifm_active = IFM_ETHER;
538
539 if (gve_get_state_flag(priv, GVE_STATE_FLAG_LINK_UP)) {
540 ifmr->ifm_status |= IFM_ACTIVE;
541 ifmr->ifm_active |= IFM_AUTO;
542 } else {
543 ifmr->ifm_active |= IFM_NONE;
544 }
545
546 GVE_IFACE_LOCK_UNLOCK(priv->gve_iface_lock);
547 }
548
549 static uint64_t
gve_get_counter(if_t ifp,ift_counter cnt)550 gve_get_counter(if_t ifp, ift_counter cnt)
551 {
552 struct gve_priv *priv;
553 uint64_t rpackets = 0;
554 uint64_t tpackets = 0;
555 uint64_t rbytes = 0;
556 uint64_t tbytes = 0;
557 uint64_t rx_dropped_pkt = 0;
558 uint64_t tx_dropped_pkt = 0;
559
560 priv = if_getsoftc(ifp);
561
562 gve_accum_stats(priv, &rpackets, &rbytes, &rx_dropped_pkt, &tpackets,
563 &tbytes, &tx_dropped_pkt);
564
565 switch (cnt) {
566 case IFCOUNTER_IPACKETS:
567 return (rpackets);
568
569 case IFCOUNTER_OPACKETS:
570 return (tpackets);
571
572 case IFCOUNTER_IBYTES:
573 return (rbytes);
574
575 case IFCOUNTER_OBYTES:
576 return (tbytes);
577
578 case IFCOUNTER_IQDROPS:
579 return (rx_dropped_pkt);
580
581 case IFCOUNTER_OQDROPS:
582 return (tx_dropped_pkt);
583
584 default:
585 return (if_get_counter_default(ifp, cnt));
586 }
587 }
588
589 static void
gve_setup_ifnet(device_t dev,struct gve_priv * priv)590 gve_setup_ifnet(device_t dev, struct gve_priv *priv)
591 {
592 int caps = 0;
593 if_t ifp;
594
595 ifp = priv->ifp = if_alloc(IFT_ETHER);
596 if_initname(ifp, device_get_name(dev), device_get_unit(dev));
597 if_setsoftc(ifp, priv);
598 if_setdev(ifp, dev);
599 if_setinitfn(ifp, gve_init);
600 if_setioctlfn(ifp, gve_ioctl);
601 if_settransmitfn(ifp, gve_xmit_ifp);
602 if_setqflushfn(ifp, gve_qflush);
603
604 /*
605 * Set TSO limits, must match the arguments to bus_dma_tag_create
606 * when creating tx->dqo.buf_dmatag. Only applies to the RDA mode
607 * because in QPL we copy the entire packet into the bounce buffer
608 * and thus it does not matter how fragmented the mbuf is.
609 */
610 if (!gve_is_gqi(priv) && !gve_is_qpl(priv)) {
611 if_sethwtsomaxsegcount(ifp, GVE_TX_MAX_DATA_DESCS_DQO);
612 if_sethwtsomaxsegsize(ifp, GVE_TX_MAX_BUF_SIZE_DQO);
613 }
614 if_sethwtsomax(ifp, GVE_TSO_MAXSIZE_DQO);
615
616 #if __FreeBSD_version >= 1400086
617 if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST);
618 #else
619 if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST | IFF_KNOWSEPOCH);
620 #endif
621
622 ifmedia_init(&priv->media, IFM_IMASK, gve_media_change, gve_media_status);
623 if_setgetcounterfn(ifp, gve_get_counter);
624
625 caps = IFCAP_RXCSUM |
626 IFCAP_TXCSUM |
627 IFCAP_TXCSUM_IPV6 |
628 IFCAP_TSO |
629 IFCAP_LRO;
630
631 if ((priv->supported_features & GVE_SUP_JUMBO_FRAMES_MASK) != 0)
632 caps |= IFCAP_JUMBO_MTU;
633
634 if_setcapabilities(ifp, caps);
635 if_setcapenable(ifp, caps);
636
637 if (bootverbose)
638 device_printf(priv->dev, "Setting initial MTU to %d\n", priv->max_mtu);
639 if_setmtu(ifp, priv->max_mtu);
640
641 ether_ifattach(ifp, priv->mac);
642
643 ifmedia_add(&priv->media, IFM_ETHER | IFM_AUTO, 0, NULL);
644 ifmedia_set(&priv->media, IFM_ETHER | IFM_AUTO);
645 }
646
647 static int
gve_alloc_counter_array(struct gve_priv * priv)648 gve_alloc_counter_array(struct gve_priv *priv)
649 {
650 int err;
651
652 err = gve_dma_alloc_coherent(priv, sizeof(uint32_t) * priv->num_event_counters,
653 PAGE_SIZE, &priv->counter_array_mem);
654 if (err != 0)
655 return (err);
656
657 priv->counters = priv->counter_array_mem.cpu_addr;
658 return (0);
659 }
660
661 static void
gve_free_counter_array(struct gve_priv * priv)662 gve_free_counter_array(struct gve_priv *priv)
663 {
664 if (priv->counters != NULL)
665 gve_dma_free_coherent(&priv->counter_array_mem);
666 priv->counter_array_mem = (struct gve_dma_handle){};
667 }
668
669 static int
gve_alloc_irq_db_array(struct gve_priv * priv)670 gve_alloc_irq_db_array(struct gve_priv *priv)
671 {
672 int err;
673
674 err = gve_dma_alloc_coherent(priv,
675 sizeof(struct gve_irq_db) * (priv->num_queues), PAGE_SIZE,
676 &priv->irqs_db_mem);
677 if (err != 0)
678 return (err);
679
680 priv->irq_db_indices = priv->irqs_db_mem.cpu_addr;
681 return (0);
682 }
683
684 static void
gve_free_irq_db_array(struct gve_priv * priv)685 gve_free_irq_db_array(struct gve_priv *priv)
686 {
687 if (priv->irq_db_indices != NULL)
688 gve_dma_free_coherent(&priv->irqs_db_mem);
689 priv->irqs_db_mem = (struct gve_dma_handle){};
690 }
691
692 static void
gve_free_rings(struct gve_priv * priv)693 gve_free_rings(struct gve_priv *priv)
694 {
695 gve_free_irqs(priv);
696
697 gve_free_tx_rings(priv, 0, priv->tx_cfg.num_queues);
698 free(priv->tx, M_GVE);
699 priv->tx = NULL;
700
701 gve_free_rx_rings(priv, 0, priv->rx_cfg.num_queues);
702 free(priv->rx, M_GVE);
703 priv->rx = NULL;
704 }
705
706 static int
gve_alloc_rings(struct gve_priv * priv)707 gve_alloc_rings(struct gve_priv *priv)
708 {
709 int err;
710
711 priv->rx = malloc(sizeof(struct gve_rx_ring) * priv->rx_cfg.max_queues,
712 M_GVE, M_WAITOK | M_ZERO);
713 err = gve_alloc_rx_rings(priv, 0, priv->rx_cfg.num_queues);
714 if (err != 0)
715 goto abort;
716
717 priv->tx = malloc(sizeof(struct gve_tx_ring) * priv->tx_cfg.max_queues,
718 M_GVE, M_WAITOK | M_ZERO);
719 err = gve_alloc_tx_rings(priv, 0, priv->tx_cfg.num_queues);
720 if (err != 0)
721 goto abort;
722
723 err = gve_alloc_irqs(priv);
724 if (err != 0)
725 goto abort;
726
727 return (0);
728
729 abort:
730 gve_free_rings(priv);
731 return (err);
732 }
733
734 static void
gve_deconfigure_and_free_device_resources(struct gve_priv * priv)735 gve_deconfigure_and_free_device_resources(struct gve_priv *priv)
736 {
737 int err;
738
739 if (gve_get_state_flag(priv, GVE_STATE_FLAG_RESOURCES_OK)) {
740 err = gve_adminq_deconfigure_device_resources(priv);
741 if (err != 0) {
742 device_printf(priv->dev, "Failed to deconfigure device resources: err=%d\n",
743 err);
744 return;
745 }
746 if (bootverbose)
747 device_printf(priv->dev, "Deconfigured device resources\n");
748 gve_clear_state_flag(priv, GVE_STATE_FLAG_RESOURCES_OK);
749 }
750
751 gve_free_irq_db_array(priv);
752 gve_free_counter_array(priv);
753
754 if (priv->ptype_lut_dqo) {
755 free(priv->ptype_lut_dqo, M_GVE);
756 priv->ptype_lut_dqo = NULL;
757 }
758 }
759
760 static int
gve_alloc_and_configure_device_resources(struct gve_priv * priv)761 gve_alloc_and_configure_device_resources(struct gve_priv *priv)
762 {
763 int err;
764
765 if (gve_get_state_flag(priv, GVE_STATE_FLAG_RESOURCES_OK))
766 return (0);
767
768 err = gve_alloc_counter_array(priv);
769 if (err != 0)
770 return (err);
771
772 err = gve_alloc_irq_db_array(priv);
773 if (err != 0)
774 goto abort;
775
776 err = gve_adminq_configure_device_resources(priv);
777 if (err != 0) {
778 device_printf(priv->dev, "Failed to configure device resources: err=%d\n",
779 err);
780 err = (ENXIO);
781 goto abort;
782 }
783
784 if (!gve_is_gqi(priv)) {
785 priv->ptype_lut_dqo = malloc(sizeof(*priv->ptype_lut_dqo), M_GVE,
786 M_WAITOK | M_ZERO);
787
788 err = gve_adminq_get_ptype_map_dqo(priv, priv->ptype_lut_dqo);
789 if (err != 0) {
790 device_printf(priv->dev, "Failed to configure ptype lut: err=%d\n",
791 err);
792 goto abort;
793 }
794 }
795
796 gve_set_state_flag(priv, GVE_STATE_FLAG_RESOURCES_OK);
797 if (bootverbose)
798 device_printf(priv->dev, "Configured device resources\n");
799 return (0);
800
801 abort:
802 gve_deconfigure_and_free_device_resources(priv);
803 return (err);
804 }
805
806 static void
gve_set_queue_cnts(struct gve_priv * priv)807 gve_set_queue_cnts(struct gve_priv *priv)
808 {
809 priv->tx_cfg.max_queues = gve_reg_bar_read_4(priv, MAX_TX_QUEUES);
810 priv->rx_cfg.max_queues = gve_reg_bar_read_4(priv, MAX_RX_QUEUES);
811 priv->tx_cfg.num_queues = priv->tx_cfg.max_queues;
812 priv->rx_cfg.num_queues = priv->rx_cfg.max_queues;
813
814 if (priv->default_num_queues > 0) {
815 priv->tx_cfg.num_queues = MIN(priv->default_num_queues,
816 priv->tx_cfg.num_queues);
817 priv->rx_cfg.num_queues = MIN(priv->default_num_queues,
818 priv->rx_cfg.num_queues);
819 }
820
821 priv->num_queues = priv->tx_cfg.max_queues + priv->rx_cfg.max_queues;
822 priv->mgmt_msix_idx = priv->num_queues;
823 }
824
825 static int
gve_alloc_adminq_and_describe_device(struct gve_priv * priv)826 gve_alloc_adminq_and_describe_device(struct gve_priv *priv)
827 {
828 int err;
829
830 if ((err = gve_adminq_alloc(priv)) != 0)
831 return (err);
832
833 if ((err = gve_verify_driver_compatibility(priv)) != 0) {
834 device_printf(priv->dev,
835 "Failed to verify driver compatibility: err=%d\n", err);
836 goto abort;
837 }
838
839 if ((err = gve_adminq_describe_device(priv)) != 0)
840 goto abort;
841
842 gve_set_queue_cnts(priv);
843
844 priv->num_registered_pages = 0;
845 return (0);
846
847 abort:
848 gve_release_adminq(priv);
849 return (err);
850 }
851
852 void
gve_schedule_reset(struct gve_priv * priv)853 gve_schedule_reset(struct gve_priv *priv)
854 {
855 if (gve_get_state_flag(priv, GVE_STATE_FLAG_IN_RESET))
856 return;
857
858 device_printf(priv->dev, "Scheduling reset task!\n");
859 gve_set_state_flag(priv, GVE_STATE_FLAG_DO_RESET);
860 taskqueue_enqueue(priv->service_tq, &priv->service_task);
861 }
862
863 static void
gve_destroy(struct gve_priv * priv)864 gve_destroy(struct gve_priv *priv)
865 {
866 gve_down(priv);
867 gve_deconfigure_and_free_device_resources(priv);
868 gve_release_adminq(priv);
869 }
870
871 static void
gve_restore(struct gve_priv * priv)872 gve_restore(struct gve_priv *priv)
873 {
874 int err;
875
876 err = gve_adminq_alloc(priv);
877 if (err != 0)
878 goto abort;
879
880 err = gve_adminq_configure_device_resources(priv);
881 if (err != 0) {
882 device_printf(priv->dev, "Failed to configure device resources: err=%d\n",
883 err);
884 err = (ENXIO);
885 goto abort;
886 }
887 if (!gve_is_gqi(priv)) {
888 err = gve_adminq_get_ptype_map_dqo(priv, priv->ptype_lut_dqo);
889 if (err != 0) {
890 device_printf(priv->dev, "Failed to configure ptype lut: err=%d\n",
891 err);
892 goto abort;
893 }
894 }
895
896 err = gve_up(priv);
897 if (err != 0)
898 goto abort;
899
900 return;
901
902 abort:
903 device_printf(priv->dev, "Restore failed!\n");
904 return;
905 }
906
907 static void
gve_clear_device_resources(struct gve_priv * priv)908 gve_clear_device_resources(struct gve_priv *priv)
909 {
910 int i;
911
912 for (i = 0; i < priv->num_event_counters; i++)
913 priv->counters[i] = 0;
914 bus_dmamap_sync(priv->counter_array_mem.tag, priv->counter_array_mem.map,
915 BUS_DMASYNC_PREWRITE);
916
917 for (i = 0; i < priv->num_queues; i++)
918 priv->irq_db_indices[i] = (struct gve_irq_db){};
919 bus_dmamap_sync(priv->irqs_db_mem.tag, priv->irqs_db_mem.map,
920 BUS_DMASYNC_PREWRITE);
921
922 if (priv->ptype_lut_dqo)
923 *priv->ptype_lut_dqo = (struct gve_ptype_lut){0};
924 }
925
926 static void
gve_handle_reset(struct gve_priv * priv)927 gve_handle_reset(struct gve_priv *priv)
928 {
929 if (!gve_get_state_flag(priv, GVE_STATE_FLAG_DO_RESET))
930 return;
931
932 gve_clear_state_flag(priv, GVE_STATE_FLAG_DO_RESET);
933 gve_set_state_flag(priv, GVE_STATE_FLAG_IN_RESET);
934
935 GVE_IFACE_LOCK_LOCK(priv->gve_iface_lock);
936
937 if_setdrvflagbits(priv->ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
938 if_link_state_change(priv->ifp, LINK_STATE_DOWN);
939 gve_clear_state_flag(priv, GVE_STATE_FLAG_LINK_UP);
940
941 /*
942 * Releasing the adminq causes the NIC to destroy all resources
943 * registered with it, so by clearing the flags beneath we cause
944 * the subsequent gve_down call below to not attempt to tell the
945 * NIC to destroy these resources again.
946 *
947 * The call to gve_down is needed in the first place to refresh
948 * the state and the DMA-able memory within each driver ring.
949 */
950 gve_release_adminq(priv);
951 gve_clear_state_flag(priv, GVE_STATE_FLAG_RESOURCES_OK);
952 gve_clear_state_flag(priv, GVE_STATE_FLAG_QPLREG_OK);
953 gve_clear_state_flag(priv, GVE_STATE_FLAG_RX_RINGS_OK);
954 gve_clear_state_flag(priv, GVE_STATE_FLAG_TX_RINGS_OK);
955
956 gve_down(priv);
957 gve_clear_device_resources(priv);
958
959 gve_restore(priv);
960
961 GVE_IFACE_LOCK_UNLOCK(priv->gve_iface_lock);
962
963 priv->reset_cnt++;
964 gve_clear_state_flag(priv, GVE_STATE_FLAG_IN_RESET);
965 }
966
967 static void
gve_handle_link_status(struct gve_priv * priv)968 gve_handle_link_status(struct gve_priv *priv)
969 {
970 uint32_t status = gve_reg_bar_read_4(priv, DEVICE_STATUS);
971 bool link_up = status & GVE_DEVICE_STATUS_LINK_STATUS;
972
973 if (link_up == gve_get_state_flag(priv, GVE_STATE_FLAG_LINK_UP))
974 return;
975
976 if (link_up) {
977 if (bootverbose)
978 device_printf(priv->dev, "Device link is up.\n");
979 if_link_state_change(priv->ifp, LINK_STATE_UP);
980 gve_set_state_flag(priv, GVE_STATE_FLAG_LINK_UP);
981 } else {
982 device_printf(priv->dev, "Device link is down.\n");
983 if_link_state_change(priv->ifp, LINK_STATE_DOWN);
984 gve_clear_state_flag(priv, GVE_STATE_FLAG_LINK_UP);
985 }
986 }
987
988 static void
gve_service_task(void * arg,int pending)989 gve_service_task(void *arg, int pending)
990 {
991 struct gve_priv *priv = (struct gve_priv *)arg;
992 uint32_t status = gve_reg_bar_read_4(priv, DEVICE_STATUS);
993
994 if (((GVE_DEVICE_STATUS_RESET_MASK & status) != 0) &&
995 !gve_get_state_flag(priv, GVE_STATE_FLAG_IN_RESET)) {
996 device_printf(priv->dev, "Device requested reset\n");
997 gve_set_state_flag(priv, GVE_STATE_FLAG_DO_RESET);
998 }
999
1000 gve_handle_reset(priv);
1001 gve_handle_link_status(priv);
1002 }
1003
1004 static int
gve_probe(device_t dev)1005 gve_probe(device_t dev)
1006 {
1007 uint16_t deviceid, vendorid;
1008 int i;
1009
1010 vendorid = pci_get_vendor(dev);
1011 deviceid = pci_get_device(dev);
1012
1013 for (i = 0; i < nitems(gve_devs); i++) {
1014 if (vendorid == gve_devs[i].vendor_id &&
1015 deviceid == gve_devs[i].device_id) {
1016 device_set_desc(dev, gve_devs[i].name);
1017 return (BUS_PROBE_DEFAULT);
1018 }
1019 }
1020 return (ENXIO);
1021 }
1022
1023 static void
gve_free_sys_res_mem(struct gve_priv * priv)1024 gve_free_sys_res_mem(struct gve_priv *priv)
1025 {
1026 if (priv->msix_table != NULL)
1027 bus_release_resource(priv->dev, SYS_RES_MEMORY,
1028 rman_get_rid(priv->msix_table), priv->msix_table);
1029
1030 if (priv->db_bar != NULL)
1031 bus_release_resource(priv->dev, SYS_RES_MEMORY,
1032 rman_get_rid(priv->db_bar), priv->db_bar);
1033
1034 if (priv->reg_bar != NULL)
1035 bus_release_resource(priv->dev, SYS_RES_MEMORY,
1036 rman_get_rid(priv->reg_bar), priv->reg_bar);
1037 }
1038
1039 static int
gve_attach(device_t dev)1040 gve_attach(device_t dev)
1041 {
1042 struct gve_priv *priv;
1043 int rid;
1044 int err;
1045
1046 snprintf(gve_version, sizeof(gve_version), "%d.%d.%d",
1047 GVE_VERSION_MAJOR, GVE_VERSION_MINOR, GVE_VERSION_SUB);
1048
1049 priv = device_get_softc(dev);
1050 priv->dev = dev;
1051 GVE_IFACE_LOCK_INIT(priv->gve_iface_lock);
1052
1053 pci_enable_busmaster(dev);
1054
1055 rid = PCIR_BAR(GVE_REGISTER_BAR);
1056 priv->reg_bar = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
1057 &rid, RF_ACTIVE);
1058 if (priv->reg_bar == NULL) {
1059 device_printf(dev, "Failed to allocate BAR0\n");
1060 err = ENXIO;
1061 goto abort;
1062 }
1063
1064 rid = PCIR_BAR(GVE_DOORBELL_BAR);
1065 priv->db_bar = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
1066 &rid, RF_ACTIVE);
1067 if (priv->db_bar == NULL) {
1068 device_printf(dev, "Failed to allocate BAR2\n");
1069 err = ENXIO;
1070 goto abort;
1071 }
1072
1073 rid = pci_msix_table_bar(priv->dev);
1074 priv->msix_table = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
1075 &rid, RF_ACTIVE);
1076 if (priv->msix_table == NULL) {
1077 device_printf(dev, "Failed to allocate msix table\n");
1078 err = ENXIO;
1079 goto abort;
1080 }
1081
1082 err = gve_alloc_adminq_and_describe_device(priv);
1083 if (err != 0)
1084 goto abort;
1085
1086 err = gve_alloc_and_configure_device_resources(priv);
1087 if (err != 0)
1088 goto abort;
1089
1090 priv->rx_buf_size_dqo = gve_get_dqo_rx_buf_size(priv, priv->max_mtu);
1091 err = gve_alloc_rings(priv);
1092 if (err != 0)
1093 goto abort;
1094
1095 gve_setup_ifnet(dev, priv);
1096
1097 priv->rx_copybreak = GVE_DEFAULT_RX_COPYBREAK;
1098
1099 bus_write_multi_1(priv->reg_bar, DRIVER_VERSION, GVE_DRIVER_VERSION,
1100 sizeof(GVE_DRIVER_VERSION) - 1);
1101
1102 TASK_INIT(&priv->service_task, 0, gve_service_task, priv);
1103 priv->service_tq = taskqueue_create("gve service", M_WAITOK | M_ZERO,
1104 taskqueue_thread_enqueue, &priv->service_tq);
1105 taskqueue_start_threads(&priv->service_tq, 1, PI_NET, "%s service tq",
1106 device_get_nameunit(priv->dev));
1107
1108 gve_setup_sysctl(priv);
1109
1110 if (bootverbose)
1111 device_printf(priv->dev, "Successfully attached %s", GVE_DRIVER_VERSION);
1112 return (0);
1113
1114 abort:
1115 gve_free_rings(priv);
1116 gve_deconfigure_and_free_device_resources(priv);
1117 gve_release_adminq(priv);
1118 gve_free_sys_res_mem(priv);
1119 GVE_IFACE_LOCK_DESTROY(priv->gve_iface_lock);
1120 return (err);
1121 }
1122
1123 static int
gve_detach(device_t dev)1124 gve_detach(device_t dev)
1125 {
1126 struct gve_priv *priv = device_get_softc(dev);
1127 if_t ifp = priv->ifp;
1128 int error;
1129
1130 error = bus_generic_detach(dev);
1131 if (error != 0)
1132 return (error);
1133
1134 ether_ifdetach(ifp);
1135
1136 GVE_IFACE_LOCK_LOCK(priv->gve_iface_lock);
1137 gve_destroy(priv);
1138 GVE_IFACE_LOCK_UNLOCK(priv->gve_iface_lock);
1139
1140 gve_free_rings(priv);
1141 gve_free_sys_res_mem(priv);
1142 GVE_IFACE_LOCK_DESTROY(priv->gve_iface_lock);
1143
1144 while (taskqueue_cancel(priv->service_tq, &priv->service_task, NULL))
1145 taskqueue_drain(priv->service_tq, &priv->service_task);
1146 taskqueue_free(priv->service_tq);
1147
1148 if_free(ifp);
1149 return (0);
1150 }
1151
1152 static device_method_t gve_methods[] = {
1153 DEVMETHOD(device_probe, gve_probe),
1154 DEVMETHOD(device_attach, gve_attach),
1155 DEVMETHOD(device_detach, gve_detach),
1156 DEVMETHOD_END
1157 };
1158
1159 static driver_t gve_driver = {
1160 "gve",
1161 gve_methods,
1162 sizeof(struct gve_priv)
1163 };
1164
1165 #if __FreeBSD_version < 1301503
1166 static devclass_t gve_devclass;
1167
1168 DRIVER_MODULE(gve, pci, gve_driver, gve_devclass, 0, 0);
1169 #else
1170 DRIVER_MODULE(gve, pci, gve_driver, 0, 0);
1171 #endif
1172 MODULE_PNP_INFO("U16:vendor;U16:device;D:#", pci, gve, gve_devs,
1173 nitems(gve_devs));
1174