1 /*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (c) 2024 Google LLC
5 *
6 * Redistribution and use in source and binary forms, with or without modification,
7 * are permitted provided that the following conditions are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright notice, this
10 * list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright notice,
13 * this list of conditions and the following disclaimer in the documentation
14 * and/or other materials provided with the distribution.
15 *
16 * 3. Neither the name of the copyright holder nor the names of its contributors
17 * may be used to endorse or promote products derived from this software without
18 * specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
22 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
24 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
25 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
27 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
29 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 #include "opt_inet6.h"
33
34 #include "gve.h"
35 #include "gve_dqo.h"
36
37 static void
gve_unmap_packet(struct gve_tx_ring * tx,struct gve_tx_pending_pkt_dqo * pending_pkt)38 gve_unmap_packet(struct gve_tx_ring *tx,
39 struct gve_tx_pending_pkt_dqo *pending_pkt)
40 {
41 bus_dmamap_sync(tx->dqo.buf_dmatag, pending_pkt->dmamap,
42 BUS_DMASYNC_POSTWRITE);
43 bus_dmamap_unload(tx->dqo.buf_dmatag, pending_pkt->dmamap);
44 }
45
46 static void
gve_clear_qpl_pending_pkt(struct gve_tx_pending_pkt_dqo * pending_pkt)47 gve_clear_qpl_pending_pkt(struct gve_tx_pending_pkt_dqo *pending_pkt)
48 {
49 pending_pkt->qpl_buf_head = -1;
50 pending_pkt->num_qpl_bufs = 0;
51 }
52
53 static void
gve_free_tx_mbufs_dqo(struct gve_tx_ring * tx)54 gve_free_tx_mbufs_dqo(struct gve_tx_ring *tx)
55 {
56 struct gve_tx_pending_pkt_dqo *pending_pkt;
57 int i;
58
59 for (i = 0; i < tx->dqo.num_pending_pkts; i++) {
60 pending_pkt = &tx->dqo.pending_pkts[i];
61 if (!pending_pkt->mbuf)
62 continue;
63
64 if (gve_is_qpl(tx->com.priv))
65 gve_clear_qpl_pending_pkt(pending_pkt);
66 else
67 gve_unmap_packet(tx, pending_pkt);
68
69 m_freem(pending_pkt->mbuf);
70 pending_pkt->mbuf = NULL;
71 }
72 }
73
74 void
gve_tx_free_ring_dqo(struct gve_priv * priv,int i)75 gve_tx_free_ring_dqo(struct gve_priv *priv, int i)
76 {
77 struct gve_tx_ring *tx = &priv->tx[i];
78 int j;
79
80 if (tx->dqo.desc_ring != NULL) {
81 gve_dma_free_coherent(&tx->desc_ring_mem);
82 tx->dqo.desc_ring = NULL;
83 }
84
85 if (tx->dqo.compl_ring != NULL) {
86 gve_dma_free_coherent(&tx->dqo.compl_ring_mem);
87 tx->dqo.compl_ring = NULL;
88 }
89
90 if (tx->dqo.pending_pkts != NULL) {
91 gve_free_tx_mbufs_dqo(tx);
92
93 if (!gve_is_qpl(priv) && tx->dqo.buf_dmatag) {
94 for (j = 0; j < tx->dqo.num_pending_pkts; j++)
95 if (tx->dqo.pending_pkts[j].state !=
96 GVE_PACKET_STATE_UNALLOCATED)
97 bus_dmamap_destroy(tx->dqo.buf_dmatag,
98 tx->dqo.pending_pkts[j].dmamap);
99 }
100
101 free(tx->dqo.pending_pkts, M_GVE);
102 tx->dqo.pending_pkts = NULL;
103 }
104
105 if (!gve_is_qpl(priv) && tx->dqo.buf_dmatag)
106 bus_dma_tag_destroy(tx->dqo.buf_dmatag);
107
108 if (gve_is_qpl(priv) && tx->dqo.qpl_bufs != NULL) {
109 free(tx->dqo.qpl_bufs, M_GVE);
110 tx->dqo.qpl_bufs = NULL;
111 }
112 }
113
114 static int
gve_tx_alloc_rda_fields_dqo(struct gve_tx_ring * tx)115 gve_tx_alloc_rda_fields_dqo(struct gve_tx_ring *tx)
116 {
117 struct gve_priv *priv = tx->com.priv;
118 int err;
119 int j;
120
121 /*
122 * DMA tag for mapping Tx mbufs
123 * The maxsize, nsegments, and maxsegsize params should match
124 * the if_sethwtso* arguments in gve_setup_ifnet in gve_main.c.
125 */
126 err = bus_dma_tag_create(
127 bus_get_dma_tag(priv->dev), /* parent */
128 1, 0, /* alignment, bounds */
129 BUS_SPACE_MAXADDR, /* lowaddr */
130 BUS_SPACE_MAXADDR, /* highaddr */
131 NULL, NULL, /* filter, filterarg */
132 GVE_TSO_MAXSIZE_DQO, /* maxsize */
133 GVE_TX_MAX_DATA_DESCS_DQO, /* nsegments */
134 GVE_TX_MAX_BUF_SIZE_DQO, /* maxsegsize */
135 BUS_DMA_ALLOCNOW, /* flags */
136 NULL, /* lockfunc */
137 NULL, /* lockarg */
138 &tx->dqo.buf_dmatag);
139 if (err != 0) {
140 device_printf(priv->dev, "%s: bus_dma_tag_create failed: %d\n",
141 __func__, err);
142 return (err);
143 }
144
145 for (j = 0; j < tx->dqo.num_pending_pkts; j++) {
146 err = bus_dmamap_create(tx->dqo.buf_dmatag, 0,
147 &tx->dqo.pending_pkts[j].dmamap);
148 if (err != 0) {
149 device_printf(priv->dev,
150 "err in creating pending pkt dmamap %d: %d",
151 j, err);
152 return (err);
153 }
154 tx->dqo.pending_pkts[j].state = GVE_PACKET_STATE_FREE;
155 }
156
157 return (0);
158 }
159
160 int
gve_tx_alloc_ring_dqo(struct gve_priv * priv,int i)161 gve_tx_alloc_ring_dqo(struct gve_priv *priv, int i)
162 {
163 struct gve_tx_ring *tx = &priv->tx[i];
164 uint16_t num_pending_pkts;
165 int err;
166
167 /* Descriptor ring */
168 err = gve_dma_alloc_coherent(priv,
169 sizeof(union gve_tx_desc_dqo) * priv->tx_desc_cnt,
170 CACHE_LINE_SIZE, &tx->desc_ring_mem);
171 if (err != 0) {
172 device_printf(priv->dev,
173 "Failed to alloc desc ring for tx ring %d", i);
174 goto abort;
175 }
176 tx->dqo.desc_ring = tx->desc_ring_mem.cpu_addr;
177
178 /* Completion ring */
179 err = gve_dma_alloc_coherent(priv,
180 sizeof(struct gve_tx_compl_desc_dqo) * priv->tx_desc_cnt,
181 CACHE_LINE_SIZE, &tx->dqo.compl_ring_mem);
182 if (err != 0) {
183 device_printf(priv->dev,
184 "Failed to alloc compl ring for tx ring %d", i);
185 goto abort;
186 }
187 tx->dqo.compl_ring = tx->dqo.compl_ring_mem.cpu_addr;
188
189 /*
190 * pending_pkts array
191 *
192 * The max number of pending packets determines the maximum number of
193 * descriptors which maybe written to the completion queue.
194 *
195 * We must set the number small enough to make sure we never overrun the
196 * completion queue.
197 */
198 num_pending_pkts = priv->tx_desc_cnt;
199 /*
200 * Reserve space for descriptor completions, which will be reported at
201 * most every GVE_TX_MIN_RE_INTERVAL packets.
202 */
203 num_pending_pkts -= num_pending_pkts / GVE_TX_MIN_RE_INTERVAL;
204
205 tx->dqo.num_pending_pkts = num_pending_pkts;
206 tx->dqo.pending_pkts = malloc(
207 sizeof(struct gve_tx_pending_pkt_dqo) * num_pending_pkts,
208 M_GVE, M_WAITOK | M_ZERO);
209
210 if (gve_is_qpl(priv)) {
211 int qpl_buf_cnt;
212
213 tx->com.qpl = &priv->qpls[i];
214 qpl_buf_cnt = GVE_TX_BUFS_PER_PAGE_DQO *
215 tx->com.qpl->num_pages;
216
217 tx->dqo.qpl_bufs = malloc(
218 sizeof(*tx->dqo.qpl_bufs) * qpl_buf_cnt,
219 M_GVE, M_WAITOK | M_ZERO);
220 } else
221 gve_tx_alloc_rda_fields_dqo(tx);
222 return (0);
223
224 abort:
225 gve_tx_free_ring_dqo(priv, i);
226 return (err);
227 }
228
229 static void
gve_extract_tx_metadata_dqo(const struct mbuf * mbuf,struct gve_tx_metadata_dqo * metadata)230 gve_extract_tx_metadata_dqo(const struct mbuf *mbuf,
231 struct gve_tx_metadata_dqo *metadata)
232 {
233 uint32_t hash = mbuf->m_pkthdr.flowid;
234 uint16_t path_hash;
235
236 metadata->version = GVE_TX_METADATA_VERSION_DQO;
237 if (hash) {
238 path_hash = hash ^ (hash >> 16);
239
240 path_hash &= (1 << 15) - 1;
241 if (__predict_false(path_hash == 0))
242 path_hash = ~path_hash;
243
244 metadata->path_hash = path_hash;
245 }
246 }
247
248 static void
gve_tx_fill_pkt_desc_dqo(struct gve_tx_ring * tx,uint32_t * desc_idx,uint32_t len,uint64_t addr,int16_t compl_tag,bool eop,bool csum_enabled)249 gve_tx_fill_pkt_desc_dqo(struct gve_tx_ring *tx,
250 uint32_t *desc_idx, uint32_t len, uint64_t addr,
251 int16_t compl_tag, bool eop, bool csum_enabled)
252 {
253 while (len > 0) {
254 struct gve_tx_pkt_desc_dqo *desc =
255 &tx->dqo.desc_ring[*desc_idx].pkt;
256 uint32_t cur_len = MIN(len, GVE_TX_MAX_BUF_SIZE_DQO);
257 bool cur_eop = eop && cur_len == len;
258
259 *desc = (struct gve_tx_pkt_desc_dqo){
260 .buf_addr = htole64(addr),
261 .dtype = GVE_TX_PKT_DESC_DTYPE_DQO,
262 .end_of_packet = cur_eop,
263 .checksum_offload_enable = csum_enabled,
264 .compl_tag = htole16(compl_tag),
265 .buf_size = cur_len,
266 };
267
268 addr += cur_len;
269 len -= cur_len;
270 *desc_idx = (*desc_idx + 1) & tx->dqo.desc_mask;
271 }
272 }
273
274 static void
gve_tx_fill_tso_ctx_desc(struct gve_tx_tso_context_desc_dqo * desc,const struct mbuf * mbuf,const struct gve_tx_metadata_dqo * metadata,int header_len)275 gve_tx_fill_tso_ctx_desc(struct gve_tx_tso_context_desc_dqo *desc,
276 const struct mbuf *mbuf, const struct gve_tx_metadata_dqo *metadata,
277 int header_len)
278 {
279 *desc = (struct gve_tx_tso_context_desc_dqo){
280 .header_len = header_len,
281 .cmd_dtype = {
282 .dtype = GVE_TX_TSO_CTX_DESC_DTYPE_DQO,
283 .tso = 1,
284 },
285 .flex0 = metadata->bytes[0],
286 .flex5 = metadata->bytes[5],
287 .flex6 = metadata->bytes[6],
288 .flex7 = metadata->bytes[7],
289 .flex8 = metadata->bytes[8],
290 .flex9 = metadata->bytes[9],
291 .flex10 = metadata->bytes[10],
292 .flex11 = metadata->bytes[11],
293 };
294 desc->tso_total_len = mbuf->m_pkthdr.len - header_len;
295 desc->mss = mbuf->m_pkthdr.tso_segsz;
296 }
297
298 static void
gve_tx_fill_general_ctx_desc(struct gve_tx_general_context_desc_dqo * desc,const struct gve_tx_metadata_dqo * metadata)299 gve_tx_fill_general_ctx_desc(struct gve_tx_general_context_desc_dqo *desc,
300 const struct gve_tx_metadata_dqo *metadata)
301 {
302 *desc = (struct gve_tx_general_context_desc_dqo){
303 .flex0 = metadata->bytes[0],
304 .flex1 = metadata->bytes[1],
305 .flex2 = metadata->bytes[2],
306 .flex3 = metadata->bytes[3],
307 .flex4 = metadata->bytes[4],
308 .flex5 = metadata->bytes[5],
309 .flex6 = metadata->bytes[6],
310 .flex7 = metadata->bytes[7],
311 .flex8 = metadata->bytes[8],
312 .flex9 = metadata->bytes[9],
313 .flex10 = metadata->bytes[10],
314 .flex11 = metadata->bytes[11],
315 .cmd_dtype = {.dtype = GVE_TX_GENERAL_CTX_DESC_DTYPE_DQO},
316 };
317 }
318
319 #define PULLUP_HDR(m, len) \
320 do { \
321 if (__predict_false((m)->m_len < (len))) { \
322 (m) = m_pullup((m), (len)); \
323 if ((m) == NULL) \
324 return (EINVAL); \
325 } \
326 } while (0)
327
328 static int
gve_prep_tso(struct mbuf * mbuf,int * header_len)329 gve_prep_tso(struct mbuf *mbuf, int *header_len)
330 {
331 uint8_t l3_off, l4_off = 0;
332 struct ether_header *eh;
333 struct tcphdr *th;
334 u_short csum;
335
336 PULLUP_HDR(mbuf, sizeof(*eh));
337 eh = mtod(mbuf, struct ether_header *);
338 KASSERT(eh->ether_type != ETHERTYPE_VLAN,
339 ("VLAN-tagged packets not supported"));
340 l3_off = ETHER_HDR_LEN;
341
342 #ifdef INET6
343 if (ntohs(eh->ether_type) == ETHERTYPE_IPV6) {
344 struct ip6_hdr *ip6;
345
346 PULLUP_HDR(mbuf, l3_off + sizeof(*ip6));
347 ip6 = (struct ip6_hdr *)(mtodo(mbuf, l3_off));
348 l4_off = l3_off + sizeof(struct ip6_hdr);
349 csum = in6_cksum_pseudo(ip6, /*len=*/0, IPPROTO_TCP,
350 /*csum=*/0);
351 } else
352 #endif
353 if (ntohs(eh->ether_type) == ETHERTYPE_IP) {
354 struct ip *ip;
355
356 PULLUP_HDR(mbuf, l3_off + sizeof(*ip));
357 ip = (struct ip *)(mtodo(mbuf, l3_off));
358 l4_off = l3_off + (ip->ip_hl << 2);
359 csum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
360 htons(IPPROTO_TCP));
361 }
362
363 PULLUP_HDR(mbuf, l4_off + sizeof(struct tcphdr *));
364 th = (struct tcphdr *)(mtodo(mbuf, l4_off));
365 *header_len = l4_off + (th->th_off << 2);
366
367 /*
368 * Hardware requires the th->th_sum to not include the TCP payload,
369 * hence we recompute the csum with it excluded.
370 */
371 th->th_sum = csum;
372
373 return (0);
374 }
375
376 static int
gve_tx_fill_ctx_descs(struct gve_tx_ring * tx,struct mbuf * mbuf,bool is_tso,uint32_t * desc_idx)377 gve_tx_fill_ctx_descs(struct gve_tx_ring *tx, struct mbuf *mbuf,
378 bool is_tso, uint32_t *desc_idx)
379 {
380 struct gve_tx_general_context_desc_dqo *gen_desc;
381 struct gve_tx_tso_context_desc_dqo *tso_desc;
382 struct gve_tx_metadata_dqo metadata;
383 int header_len;
384 int err;
385
386 metadata = (struct gve_tx_metadata_dqo){0};
387 gve_extract_tx_metadata_dqo(mbuf, &metadata);
388
389 if (is_tso) {
390 err = gve_prep_tso(mbuf, &header_len);
391 if (__predict_false(err)) {
392 counter_enter();
393 counter_u64_add_protected(
394 tx->stats.tx_delayed_pkt_tsoerr, 1);
395 counter_exit();
396 return (err);
397 }
398
399 tso_desc = &tx->dqo.desc_ring[*desc_idx].tso_ctx;
400 gve_tx_fill_tso_ctx_desc(tso_desc, mbuf, &metadata, header_len);
401
402 *desc_idx = (*desc_idx + 1) & tx->dqo.desc_mask;
403 counter_enter();
404 counter_u64_add_protected(tx->stats.tso_packet_cnt, 1);
405 counter_exit();
406 }
407
408 gen_desc = &tx->dqo.desc_ring[*desc_idx].general_ctx;
409 gve_tx_fill_general_ctx_desc(gen_desc, &metadata);
410 *desc_idx = (*desc_idx + 1) & tx->dqo.desc_mask;
411 return (0);
412 }
413
414 static int
gve_map_mbuf_dqo(struct gve_tx_ring * tx,struct mbuf ** mbuf,bus_dmamap_t dmamap,bus_dma_segment_t * segs,int * nsegs,int attempt)415 gve_map_mbuf_dqo(struct gve_tx_ring *tx,
416 struct mbuf **mbuf, bus_dmamap_t dmamap,
417 bus_dma_segment_t *segs, int *nsegs, int attempt)
418 {
419 struct mbuf *m_new = NULL;
420 int err;
421
422 err = bus_dmamap_load_mbuf_sg(tx->dqo.buf_dmatag, dmamap,
423 *mbuf, segs, nsegs, BUS_DMA_NOWAIT);
424
425 switch (err) {
426 case __predict_true(0):
427 break;
428 case EFBIG:
429 if (__predict_false(attempt > 0))
430 goto abort;
431
432 counter_enter();
433 counter_u64_add_protected(
434 tx->stats.tx_mbuf_collapse, 1);
435 counter_exit();
436
437 /* Try m_collapse before m_defrag */
438 m_new = m_collapse(*mbuf, M_NOWAIT,
439 GVE_TX_MAX_DATA_DESCS_DQO);
440 if (m_new == NULL) {
441 counter_enter();
442 counter_u64_add_protected(
443 tx->stats.tx_mbuf_defrag, 1);
444 counter_exit();
445 m_new = m_defrag(*mbuf, M_NOWAIT);
446 }
447
448 if (__predict_false(m_new == NULL)) {
449 counter_enter();
450 counter_u64_add_protected(
451 tx->stats.tx_mbuf_defrag_err, 1);
452 counter_exit();
453
454 m_freem(*mbuf);
455 *mbuf = NULL;
456 err = ENOMEM;
457 goto abort;
458 } else {
459 *mbuf = m_new;
460 return (gve_map_mbuf_dqo(tx, mbuf, dmamap,
461 segs, nsegs, ++attempt));
462 }
463 case ENOMEM:
464 counter_enter();
465 counter_u64_add_protected(
466 tx->stats.tx_mbuf_dmamap_enomem_err, 1);
467 counter_exit();
468 goto abort;
469 default:
470 goto abort;
471 }
472
473 return (0);
474
475 abort:
476 counter_enter();
477 counter_u64_add_protected(tx->stats.tx_mbuf_dmamap_err, 1);
478 counter_exit();
479 return (err);
480 }
481
482 static uint32_t
num_avail_desc_ring_slots(const struct gve_tx_ring * tx)483 num_avail_desc_ring_slots(const struct gve_tx_ring *tx)
484 {
485 uint32_t num_used = (tx->dqo.desc_tail - tx->dqo.desc_head) &
486 tx->dqo.desc_mask;
487
488 return (tx->dqo.desc_mask - num_used);
489 }
490
491 static struct gve_tx_pending_pkt_dqo *
gve_alloc_pending_packet(struct gve_tx_ring * tx)492 gve_alloc_pending_packet(struct gve_tx_ring *tx)
493 {
494 int32_t index = tx->dqo.free_pending_pkts_csm;
495 struct gve_tx_pending_pkt_dqo *pending_pkt;
496
497 /*
498 * No pending packets available in the consumer list,
499 * try to steal the producer list.
500 */
501 if (__predict_false(index == -1)) {
502 tx->dqo.free_pending_pkts_csm = atomic_swap_32(
503 &tx->dqo.free_pending_pkts_prd, -1);
504
505 index = tx->dqo.free_pending_pkts_csm;
506 if (__predict_false(index == -1))
507 return (NULL);
508 }
509
510 pending_pkt = &tx->dqo.pending_pkts[index];
511
512 /* Remove pending_pkt from the consumer list */
513 tx->dqo.free_pending_pkts_csm = pending_pkt->next;
514 pending_pkt->state = GVE_PACKET_STATE_PENDING_DATA_COMPL;
515
516 return (pending_pkt);
517 }
518
519 static void
gve_free_pending_packet(struct gve_tx_ring * tx,struct gve_tx_pending_pkt_dqo * pending_pkt)520 gve_free_pending_packet(struct gve_tx_ring *tx,
521 struct gve_tx_pending_pkt_dqo *pending_pkt)
522 {
523 int index = pending_pkt - tx->dqo.pending_pkts;
524 int32_t old_head;
525
526 pending_pkt->state = GVE_PACKET_STATE_FREE;
527
528 /* Add pending_pkt to the producer list */
529 while (true) {
530 old_head = atomic_load_acq_32(&tx->dqo.free_pending_pkts_prd);
531
532 pending_pkt->next = old_head;
533 if (atomic_cmpset_32(&tx->dqo.free_pending_pkts_prd,
534 old_head, index))
535 break;
536 }
537 }
538
539 /*
540 * Has the side-effect of retrieving the value of the last desc index
541 * processed by the NIC. hw_tx_head is written to by the completions-processing
542 * taskqueue upon receiving descriptor-completions.
543 */
544 static bool
gve_tx_has_desc_room_dqo(struct gve_tx_ring * tx,int needed_descs)545 gve_tx_has_desc_room_dqo(struct gve_tx_ring *tx, int needed_descs)
546 {
547 if (needed_descs <= num_avail_desc_ring_slots(tx))
548 return (true);
549
550 tx->dqo.desc_head = atomic_load_acq_32(&tx->dqo.hw_tx_head);
551 if (needed_descs > num_avail_desc_ring_slots(tx)) {
552 counter_enter();
553 counter_u64_add_protected(
554 tx->stats.tx_delayed_pkt_nospace_descring, 1);
555 counter_exit();
556 return (false);
557 }
558
559 return (0);
560 }
561
562 static void
gve_tx_request_desc_compl(struct gve_tx_ring * tx,uint32_t desc_idx)563 gve_tx_request_desc_compl(struct gve_tx_ring *tx, uint32_t desc_idx)
564 {
565 uint32_t last_report_event_interval;
566 uint32_t last_desc_idx;
567
568 last_desc_idx = (desc_idx - 1) & tx->dqo.desc_mask;
569 last_report_event_interval =
570 (last_desc_idx - tx->dqo.last_re_idx) & tx->dqo.desc_mask;
571
572 if (__predict_false(last_report_event_interval >=
573 GVE_TX_MIN_RE_INTERVAL)) {
574 tx->dqo.desc_ring[last_desc_idx].pkt.report_event = true;
575 tx->dqo.last_re_idx = last_desc_idx;
576 }
577 }
578
579 static bool
gve_tx_have_enough_qpl_bufs(struct gve_tx_ring * tx,int num_bufs)580 gve_tx_have_enough_qpl_bufs(struct gve_tx_ring *tx, int num_bufs)
581 {
582 uint32_t available = tx->dqo.qpl_bufs_produced_cached -
583 tx->dqo.qpl_bufs_consumed;
584
585 if (__predict_true(available >= num_bufs))
586 return (true);
587
588 tx->dqo.qpl_bufs_produced_cached = atomic_load_acq_32(
589 &tx->dqo.qpl_bufs_produced);
590 available = tx->dqo.qpl_bufs_produced_cached -
591 tx->dqo.qpl_bufs_consumed;
592
593 if (__predict_true(available >= num_bufs))
594 return (true);
595 return (false);
596 }
597
598 static int32_t
gve_tx_alloc_qpl_buf(struct gve_tx_ring * tx)599 gve_tx_alloc_qpl_buf(struct gve_tx_ring *tx)
600 {
601 int32_t buf = tx->dqo.free_qpl_bufs_csm;
602
603 if (__predict_false(buf == -1)) {
604 tx->dqo.free_qpl_bufs_csm = atomic_swap_32(
605 &tx->dqo.free_qpl_bufs_prd, -1);
606 buf = tx->dqo.free_qpl_bufs_csm;
607 if (__predict_false(buf == -1))
608 return (-1);
609 }
610
611 tx->dqo.free_qpl_bufs_csm = tx->dqo.qpl_bufs[buf];
612 tx->dqo.qpl_bufs_consumed++;
613 return (buf);
614 }
615
616 /*
617 * Tx buffer i corresponds to
618 * qpl_page_id = i / GVE_TX_BUFS_PER_PAGE_DQO
619 * qpl_page_offset = (i % GVE_TX_BUFS_PER_PAGE_DQO) * GVE_TX_BUF_SIZE_DQO
620 */
621 static void
gve_tx_buf_get_addr_dqo(struct gve_tx_ring * tx,int32_t index,void ** va,bus_addr_t * dma_addr)622 gve_tx_buf_get_addr_dqo(struct gve_tx_ring *tx,
623 int32_t index, void **va, bus_addr_t *dma_addr)
624 {
625 int page_id = index >> (PAGE_SHIFT - GVE_TX_BUF_SHIFT_DQO);
626 int offset = (index & (GVE_TX_BUFS_PER_PAGE_DQO - 1)) <<
627 GVE_TX_BUF_SHIFT_DQO;
628
629 *va = (char *)tx->com.qpl->dmas[page_id].cpu_addr + offset;
630 *dma_addr = tx->com.qpl->dmas[page_id].bus_addr + offset;
631 }
632
633 static struct gve_dma_handle *
gve_get_page_dma_handle(struct gve_tx_ring * tx,int32_t index)634 gve_get_page_dma_handle(struct gve_tx_ring *tx, int32_t index)
635 {
636 int page_id = index >> (PAGE_SHIFT - GVE_TX_BUF_SHIFT_DQO);
637
638 return (&tx->com.qpl->dmas[page_id]);
639 }
640
641 static void
gve_tx_copy_mbuf_and_write_pkt_descs(struct gve_tx_ring * tx,struct mbuf * mbuf,struct gve_tx_pending_pkt_dqo * pkt,bool csum_enabled,int16_t completion_tag,uint32_t * desc_idx)642 gve_tx_copy_mbuf_and_write_pkt_descs(struct gve_tx_ring *tx,
643 struct mbuf *mbuf, struct gve_tx_pending_pkt_dqo *pkt,
644 bool csum_enabled, int16_t completion_tag,
645 uint32_t *desc_idx)
646 {
647 int32_t pkt_len = mbuf->m_pkthdr.len;
648 struct gve_dma_handle *dma;
649 uint32_t copy_offset = 0;
650 int32_t prev_buf = -1;
651 uint32_t copy_len;
652 bus_addr_t addr;
653 int32_t buf;
654 void *va;
655
656 MPASS(pkt->num_qpl_bufs == 0);
657 MPASS(pkt->qpl_buf_head == -1);
658
659 while (copy_offset < pkt_len) {
660 buf = gve_tx_alloc_qpl_buf(tx);
661 /* We already checked for availability */
662 MPASS(buf != -1);
663
664 gve_tx_buf_get_addr_dqo(tx, buf, &va, &addr);
665 copy_len = MIN(GVE_TX_BUF_SIZE_DQO, pkt_len - copy_offset);
666 m_copydata(mbuf, copy_offset, copy_len, va);
667 copy_offset += copy_len;
668
669 dma = gve_get_page_dma_handle(tx, buf);
670 bus_dmamap_sync(dma->tag, dma->map, BUS_DMASYNC_PREWRITE);
671
672 gve_tx_fill_pkt_desc_dqo(tx, desc_idx,
673 copy_len, addr, completion_tag,
674 /*eop=*/copy_offset == pkt_len,
675 csum_enabled);
676
677 /* Link all the qpl bufs for a packet */
678 if (prev_buf == -1)
679 pkt->qpl_buf_head = buf;
680 else
681 tx->dqo.qpl_bufs[prev_buf] = buf;
682
683 prev_buf = buf;
684 pkt->num_qpl_bufs++;
685 }
686
687 tx->dqo.qpl_bufs[buf] = -1;
688 }
689
690 int
gve_xmit_dqo_qpl(struct gve_tx_ring * tx,struct mbuf * mbuf)691 gve_xmit_dqo_qpl(struct gve_tx_ring *tx, struct mbuf *mbuf)
692 {
693 uint32_t desc_idx = tx->dqo.desc_tail;
694 struct gve_tx_pending_pkt_dqo *pkt;
695 int total_descs_needed;
696 int16_t completion_tag;
697 bool has_csum_flag;
698 int csum_flags;
699 bool is_tso;
700 int nsegs;
701 int err;
702
703 csum_flags = mbuf->m_pkthdr.csum_flags;
704 has_csum_flag = csum_flags & (CSUM_TCP | CSUM_UDP |
705 CSUM_IP6_TCP | CSUM_IP6_UDP | CSUM_TSO);
706 is_tso = csum_flags & CSUM_TSO;
707
708 nsegs = howmany(mbuf->m_pkthdr.len, GVE_TX_BUF_SIZE_DQO);
709 /* Check if we have enough room in the desc ring */
710 total_descs_needed = 1 + /* general_ctx_desc */
711 nsegs + /* pkt_desc */
712 (is_tso ? 1 : 0); /* tso_ctx_desc */
713 if (__predict_false(!gve_tx_has_desc_room_dqo(tx, total_descs_needed)))
714 return (ENOBUFS);
715
716 if (!gve_tx_have_enough_qpl_bufs(tx, nsegs)) {
717 counter_enter();
718 counter_u64_add_protected(
719 tx->stats.tx_delayed_pkt_nospace_qpl_bufs, 1);
720 counter_exit();
721 return (ENOBUFS);
722 }
723
724 pkt = gve_alloc_pending_packet(tx);
725 if (pkt == NULL) {
726 counter_enter();
727 counter_u64_add_protected(
728 tx->stats.tx_delayed_pkt_nospace_compring, 1);
729 counter_exit();
730 return (ENOBUFS);
731 }
732 completion_tag = pkt - tx->dqo.pending_pkts;
733 pkt->mbuf = mbuf;
734
735 err = gve_tx_fill_ctx_descs(tx, mbuf, is_tso, &desc_idx);
736 if (err)
737 goto abort;
738
739 gve_tx_copy_mbuf_and_write_pkt_descs(tx, mbuf, pkt,
740 has_csum_flag, completion_tag, &desc_idx);
741
742 /* Remember the index of the last desc written */
743 tx->dqo.desc_tail = desc_idx;
744
745 /*
746 * Request a descriptor completion on the last descriptor of the
747 * packet if we are allowed to by the HW enforced interval.
748 */
749 gve_tx_request_desc_compl(tx, desc_idx);
750
751 tx->req += total_descs_needed; /* tx->req is just a sysctl counter */
752 return (0);
753
754 abort:
755 pkt->mbuf = NULL;
756 gve_free_pending_packet(tx, pkt);
757 return (err);
758 }
759
760 int
gve_xmit_dqo(struct gve_tx_ring * tx,struct mbuf ** mbuf_ptr)761 gve_xmit_dqo(struct gve_tx_ring *tx, struct mbuf **mbuf_ptr)
762 {
763 bus_dma_segment_t segs[GVE_TX_MAX_DATA_DESCS_DQO];
764 uint32_t desc_idx = tx->dqo.desc_tail;
765 struct gve_tx_pending_pkt_dqo *pkt;
766 struct mbuf *mbuf = *mbuf_ptr;
767 int total_descs_needed;
768 int16_t completion_tag;
769 bool has_csum_flag;
770 int csum_flags;
771 bool is_tso;
772 int nsegs;
773 int err;
774 int i;
775
776 csum_flags = mbuf->m_pkthdr.csum_flags;
777 has_csum_flag = csum_flags & (CSUM_TCP | CSUM_UDP |
778 CSUM_IP6_TCP | CSUM_IP6_UDP | CSUM_TSO);
779 is_tso = csum_flags & CSUM_TSO;
780
781 /*
782 * This mbuf might end up needing more than 1 pkt desc.
783 * The actual number, `nsegs` is known only after the
784 * expensive gve_map_mbuf_dqo call. This check beneath
785 * exists to fail early when the desc ring is really full.
786 */
787 total_descs_needed = 1 + /* general_ctx_desc */
788 1 + /* pkt_desc */
789 (is_tso ? 1 : 0); /* tso_ctx_desc */
790 if (__predict_false(!gve_tx_has_desc_room_dqo(tx, total_descs_needed)))
791 return (ENOBUFS);
792
793 pkt = gve_alloc_pending_packet(tx);
794 if (pkt == NULL) {
795 counter_enter();
796 counter_u64_add_protected(
797 tx->stats.tx_delayed_pkt_nospace_compring, 1);
798 counter_exit();
799 return (ENOBUFS);
800 }
801 completion_tag = pkt - tx->dqo.pending_pkts;
802
803 err = gve_map_mbuf_dqo(tx, mbuf_ptr, pkt->dmamap,
804 segs, &nsegs, /*attempt=*/0);
805 if (err)
806 goto abort;
807 mbuf = *mbuf_ptr; /* gve_map_mbuf_dqo might replace the mbuf chain */
808 pkt->mbuf = mbuf;
809
810 total_descs_needed = 1 + /* general_ctx_desc */
811 nsegs + /* pkt_desc */
812 (is_tso ? 1 : 0); /* tso_ctx_desc */
813 if (__predict_false(
814 !gve_tx_has_desc_room_dqo(tx, total_descs_needed))) {
815 err = ENOBUFS;
816 goto abort_with_dma;
817 }
818
819 err = gve_tx_fill_ctx_descs(tx, mbuf, is_tso, &desc_idx);
820 if (err)
821 goto abort_with_dma;
822
823 bus_dmamap_sync(tx->dqo.buf_dmatag, pkt->dmamap, BUS_DMASYNC_PREWRITE);
824 for (i = 0; i < nsegs; i++) {
825 gve_tx_fill_pkt_desc_dqo(tx, &desc_idx,
826 segs[i].ds_len, segs[i].ds_addr,
827 completion_tag, /*eop=*/i == (nsegs - 1),
828 has_csum_flag);
829 }
830
831 /* Remember the index of the last desc written */
832 tx->dqo.desc_tail = desc_idx;
833
834 /*
835 * Request a descriptor completion on the last descriptor of the
836 * packet if we are allowed to by the HW enforced interval.
837 */
838 gve_tx_request_desc_compl(tx, desc_idx);
839
840 tx->req += total_descs_needed; /* tx->req is just a sysctl counter */
841 return (0);
842
843 abort_with_dma:
844 gve_unmap_packet(tx, pkt);
845 abort:
846 pkt->mbuf = NULL;
847 gve_free_pending_packet(tx, pkt);
848 return (err);
849 }
850
851 static void
gve_reap_qpl_bufs_dqo(struct gve_tx_ring * tx,struct gve_tx_pending_pkt_dqo * pkt)852 gve_reap_qpl_bufs_dqo(struct gve_tx_ring *tx,
853 struct gve_tx_pending_pkt_dqo *pkt)
854 {
855 int32_t buf = pkt->qpl_buf_head;
856 struct gve_dma_handle *dma;
857 int32_t qpl_buf_tail;
858 int32_t old_head;
859 int i;
860
861 for (i = 0; i < pkt->num_qpl_bufs; i++) {
862 dma = gve_get_page_dma_handle(tx, buf);
863 bus_dmamap_sync(dma->tag, dma->map, BUS_DMASYNC_POSTWRITE);
864 qpl_buf_tail = buf;
865 buf = tx->dqo.qpl_bufs[buf];
866 }
867 MPASS(buf == -1);
868 buf = qpl_buf_tail;
869
870 while (true) {
871 old_head = atomic_load_32(&tx->dqo.free_qpl_bufs_prd);
872 tx->dqo.qpl_bufs[buf] = old_head;
873
874 /*
875 * The "rel" ensures that the update to dqo.free_qpl_bufs_prd
876 * is visible only after the linked list from this pkt is
877 * attached above to old_head.
878 */
879 if (atomic_cmpset_rel_32(&tx->dqo.free_qpl_bufs_prd,
880 old_head, pkt->qpl_buf_head))
881 break;
882 }
883 /*
884 * The "rel" ensures that the update to dqo.qpl_bufs_produced is
885 * visible only adter the update to dqo.free_qpl_bufs_prd above.
886 */
887 atomic_add_rel_32(&tx->dqo.qpl_bufs_produced, pkt->num_qpl_bufs);
888
889 gve_clear_qpl_pending_pkt(pkt);
890 }
891
892 static uint64_t
gve_handle_packet_completion(struct gve_priv * priv,struct gve_tx_ring * tx,uint16_t compl_tag)893 gve_handle_packet_completion(struct gve_priv *priv,
894 struct gve_tx_ring *tx, uint16_t compl_tag)
895 {
896 struct gve_tx_pending_pkt_dqo *pending_pkt;
897 int32_t pkt_len;
898
899 if (__predict_false(compl_tag >= tx->dqo.num_pending_pkts)) {
900 device_printf(priv->dev, "Invalid TX completion tag: %d\n",
901 compl_tag);
902 return (0);
903 }
904
905 pending_pkt = &tx->dqo.pending_pkts[compl_tag];
906
907 /* Packet is allocated but not pending data completion. */
908 if (__predict_false(pending_pkt->state !=
909 GVE_PACKET_STATE_PENDING_DATA_COMPL)) {
910 device_printf(priv->dev,
911 "No pending data completion: %d\n", compl_tag);
912 return (0);
913 }
914
915 pkt_len = pending_pkt->mbuf->m_pkthdr.len;
916
917 if (gve_is_qpl(priv))
918 gve_reap_qpl_bufs_dqo(tx, pending_pkt);
919 else
920 gve_unmap_packet(tx, pending_pkt);
921
922 m_freem(pending_pkt->mbuf);
923 pending_pkt->mbuf = NULL;
924 gve_free_pending_packet(tx, pending_pkt);
925 return (pkt_len);
926 }
927
928 int
gve_tx_intr_dqo(void * arg)929 gve_tx_intr_dqo(void *arg)
930 {
931 struct gve_tx_ring *tx = arg;
932 struct gve_priv *priv = tx->com.priv;
933 struct gve_ring_com *com = &tx->com;
934
935 if (__predict_false((if_getdrvflags(priv->ifp) & IFF_DRV_RUNNING) == 0))
936 return (FILTER_STRAY);
937
938 /* Interrupts are automatically masked */
939 taskqueue_enqueue(com->cleanup_tq, &com->cleanup_task);
940 return (FILTER_HANDLED);
941 }
942
943 static void
gve_tx_clear_desc_ring_dqo(struct gve_tx_ring * tx)944 gve_tx_clear_desc_ring_dqo(struct gve_tx_ring *tx)
945 {
946 struct gve_ring_com *com = &tx->com;
947 int i;
948
949 for (i = 0; i < com->priv->tx_desc_cnt; i++)
950 tx->dqo.desc_ring[i] = (union gve_tx_desc_dqo){};
951
952 bus_dmamap_sync(tx->desc_ring_mem.tag, tx->desc_ring_mem.map,
953 BUS_DMASYNC_PREWRITE);
954 }
955
956 static void
gve_tx_clear_compl_ring_dqo(struct gve_tx_ring * tx)957 gve_tx_clear_compl_ring_dqo(struct gve_tx_ring *tx)
958 {
959 struct gve_ring_com *com = &tx->com;
960 int entries;
961 int i;
962
963 entries = com->priv->tx_desc_cnt;
964 for (i = 0; i < entries; i++)
965 tx->dqo.compl_ring[i] = (struct gve_tx_compl_desc_dqo){};
966
967 bus_dmamap_sync(tx->dqo.compl_ring_mem.tag, tx->dqo.compl_ring_mem.map,
968 BUS_DMASYNC_PREWRITE);
969 }
970
971 void
gve_clear_tx_ring_dqo(struct gve_priv * priv,int i)972 gve_clear_tx_ring_dqo(struct gve_priv *priv, int i)
973 {
974 struct gve_tx_ring *tx = &priv->tx[i];
975 int j;
976
977 tx->dqo.desc_head = 0;
978 tx->dqo.desc_tail = 0;
979 tx->dqo.desc_mask = priv->tx_desc_cnt - 1;
980 tx->dqo.last_re_idx = 0;
981
982 tx->dqo.compl_head = 0;
983 tx->dqo.compl_mask = priv->tx_desc_cnt - 1;
984 atomic_store_32(&tx->dqo.hw_tx_head, 0);
985 tx->dqo.cur_gen_bit = 0;
986
987 gve_free_tx_mbufs_dqo(tx);
988
989 for (j = 0; j < tx->dqo.num_pending_pkts; j++) {
990 if (gve_is_qpl(tx->com.priv))
991 gve_clear_qpl_pending_pkt(&tx->dqo.pending_pkts[j]);
992 tx->dqo.pending_pkts[j].next =
993 (j == tx->dqo.num_pending_pkts - 1) ? -1 : j + 1;
994 tx->dqo.pending_pkts[j].state = GVE_PACKET_STATE_FREE;
995 }
996 tx->dqo.free_pending_pkts_csm = 0;
997 atomic_store_rel_32(&tx->dqo.free_pending_pkts_prd, -1);
998
999 if (gve_is_qpl(priv)) {
1000 int qpl_buf_cnt = GVE_TX_BUFS_PER_PAGE_DQO *
1001 tx->com.qpl->num_pages;
1002
1003 for (j = 0; j < qpl_buf_cnt - 1; j++)
1004 tx->dqo.qpl_bufs[j] = j + 1;
1005 tx->dqo.qpl_bufs[j] = -1;
1006
1007 tx->dqo.free_qpl_bufs_csm = 0;
1008 atomic_store_32(&tx->dqo.free_qpl_bufs_prd, -1);
1009 atomic_store_32(&tx->dqo.qpl_bufs_produced, qpl_buf_cnt);
1010 tx->dqo.qpl_bufs_produced_cached = qpl_buf_cnt;
1011 tx->dqo.qpl_bufs_consumed = 0;
1012 }
1013
1014 gve_tx_clear_desc_ring_dqo(tx);
1015 gve_tx_clear_compl_ring_dqo(tx);
1016 }
1017
1018 static bool
gve_tx_cleanup_dqo(struct gve_priv * priv,struct gve_tx_ring * tx,int budget)1019 gve_tx_cleanup_dqo(struct gve_priv *priv, struct gve_tx_ring *tx, int budget)
1020 {
1021 struct gve_tx_compl_desc_dqo *compl_desc;
1022 uint64_t bytes_done = 0;
1023 uint64_t pkts_done = 0;
1024 uint16_t compl_tag;
1025 int work_done = 0;
1026 uint16_t tx_head;
1027 uint16_t type;
1028
1029 while (work_done < budget) {
1030 bus_dmamap_sync(tx->dqo.compl_ring_mem.tag, tx->dqo.compl_ring_mem.map,
1031 BUS_DMASYNC_POSTREAD);
1032
1033 compl_desc = &tx->dqo.compl_ring[tx->dqo.compl_head];
1034 if (compl_desc->generation == tx->dqo.cur_gen_bit)
1035 break;
1036
1037 /*
1038 * Prevent generation bit from being read after the rest of the
1039 * descriptor.
1040 */
1041 atomic_thread_fence_acq();
1042 type = compl_desc->type;
1043
1044 if (type == GVE_COMPL_TYPE_DQO_DESC) {
1045 /* This is the last descriptor fetched by HW plus one */
1046 tx_head = le16toh(compl_desc->tx_head);
1047 atomic_store_rel_32(&tx->dqo.hw_tx_head, tx_head);
1048 } else if (type == GVE_COMPL_TYPE_DQO_PKT) {
1049 compl_tag = le16toh(compl_desc->completion_tag);
1050 bytes_done += gve_handle_packet_completion(priv,
1051 tx, compl_tag);
1052 pkts_done++;
1053 }
1054
1055 tx->dqo.compl_head = (tx->dqo.compl_head + 1) &
1056 tx->dqo.compl_mask;
1057 /* Flip the generation bit when we wrap around */
1058 tx->dqo.cur_gen_bit ^= tx->dqo.compl_head == 0;
1059 work_done++;
1060 }
1061
1062 /*
1063 * Waking the xmit taskqueue has to occur after room has been made in
1064 * the queue.
1065 */
1066 atomic_thread_fence_seq_cst();
1067 if (atomic_load_bool(&tx->stopped) && work_done) {
1068 atomic_store_bool(&tx->stopped, false);
1069 taskqueue_enqueue(tx->xmit_tq, &tx->xmit_task);
1070 }
1071
1072 tx->done += work_done; /* tx->done is just a sysctl counter */
1073 counter_enter();
1074 counter_u64_add_protected(tx->stats.tbytes, bytes_done);
1075 counter_u64_add_protected(tx->stats.tpackets, pkts_done);
1076 counter_exit();
1077
1078 return (work_done == budget);
1079 }
1080
1081 void
gve_tx_cleanup_tq_dqo(void * arg,int pending)1082 gve_tx_cleanup_tq_dqo(void *arg, int pending)
1083 {
1084 struct gve_tx_ring *tx = arg;
1085 struct gve_priv *priv = tx->com.priv;
1086
1087 if (__predict_false((if_getdrvflags(priv->ifp) & IFF_DRV_RUNNING) == 0))
1088 return;
1089
1090 if (gve_tx_cleanup_dqo(priv, tx, /*budget=*/1024)) {
1091 taskqueue_enqueue(tx->com.cleanup_tq, &tx->com.cleanup_task);
1092 return;
1093 }
1094
1095 gve_db_bar_dqo_write_4(priv, tx->com.irq_db_offset,
1096 GVE_ITR_NO_UPDATE_DQO | GVE_ITR_ENABLE_BIT_DQO);
1097 }
1098