1 /*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (c) 2024 Google LLC
5 *
6 * Redistribution and use in source and binary forms, with or without modification,
7 * are permitted provided that the following conditions are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright notice, this
10 * list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright notice,
13 * this list of conditions and the following disclaimer in the documentation
14 * and/or other materials provided with the distribution.
15 *
16 * 3. Neither the name of the copyright holder nor the names of its contributors
17 * may be used to endorse or promote products derived from this software without
18 * specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
22 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
24 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
25 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
27 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
29 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 #include "opt_inet6.h"
33
34 #include "gve.h"
35 #include "gve_dqo.h"
36
37 static void
gve_unmap_packet(struct gve_tx_ring * tx,struct gve_tx_pending_pkt_dqo * pending_pkt)38 gve_unmap_packet(struct gve_tx_ring *tx,
39 struct gve_tx_pending_pkt_dqo *pending_pkt)
40 {
41 bus_dmamap_sync(tx->dqo.buf_dmatag, pending_pkt->dmamap,
42 BUS_DMASYNC_POSTWRITE);
43 bus_dmamap_unload(tx->dqo.buf_dmatag, pending_pkt->dmamap);
44 }
45
46 static void
gve_free_tx_mbufs_dqo(struct gve_tx_ring * tx)47 gve_free_tx_mbufs_dqo(struct gve_tx_ring *tx)
48 {
49 struct gve_tx_pending_pkt_dqo *pending_pkt;
50 int i;
51
52 for (i = 0; i < tx->dqo.num_pending_pkts; i++) {
53 pending_pkt = &tx->dqo.pending_pkts[i];
54 if (!pending_pkt->mbuf)
55 continue;
56
57 if (gve_is_qpl(tx->com.priv)) {
58 pending_pkt->qpl_buf_head = -1;
59 pending_pkt->num_qpl_bufs = 0;
60 } else
61 gve_unmap_packet(tx, pending_pkt);
62
63 m_freem(pending_pkt->mbuf);
64 pending_pkt->mbuf = NULL;
65 }
66 }
67
68 void
gve_tx_free_ring_dqo(struct gve_priv * priv,int i)69 gve_tx_free_ring_dqo(struct gve_priv *priv, int i)
70 {
71 struct gve_tx_ring *tx = &priv->tx[i];
72 int j;
73
74 if (tx->dqo.desc_ring != NULL) {
75 gve_dma_free_coherent(&tx->desc_ring_mem);
76 tx->dqo.desc_ring = NULL;
77 }
78
79 if (tx->dqo.compl_ring != NULL) {
80 gve_dma_free_coherent(&tx->dqo.compl_ring_mem);
81 tx->dqo.compl_ring = NULL;
82 }
83
84 if (tx->dqo.pending_pkts != NULL) {
85 gve_free_tx_mbufs_dqo(tx);
86
87 if (!gve_is_qpl(priv) && tx->dqo.buf_dmatag) {
88 for (j = 0; j < tx->dqo.num_pending_pkts; j++)
89 if (tx->dqo.pending_pkts[j].state !=
90 GVE_PACKET_STATE_UNALLOCATED)
91 bus_dmamap_destroy(tx->dqo.buf_dmatag,
92 tx->dqo.pending_pkts[j].dmamap);
93 }
94
95 free(tx->dqo.pending_pkts, M_GVE);
96 tx->dqo.pending_pkts = NULL;
97 }
98
99 if (!gve_is_qpl(priv) && tx->dqo.buf_dmatag)
100 bus_dma_tag_destroy(tx->dqo.buf_dmatag);
101
102 if (gve_is_qpl(priv) && tx->dqo.qpl_bufs != NULL) {
103 free(tx->dqo.qpl_bufs, M_GVE);
104 tx->dqo.qpl_bufs = NULL;
105 }
106 }
107
108 static int
gve_tx_alloc_rda_fields_dqo(struct gve_tx_ring * tx)109 gve_tx_alloc_rda_fields_dqo(struct gve_tx_ring *tx)
110 {
111 struct gve_priv *priv = tx->com.priv;
112 int err;
113 int j;
114
115 /*
116 * DMA tag for mapping Tx mbufs
117 * The maxsize, nsegments, and maxsegsize params should match
118 * the if_sethwtso* arguments in gve_setup_ifnet in gve_main.c.
119 */
120 err = bus_dma_tag_create(
121 bus_get_dma_tag(priv->dev), /* parent */
122 1, 0, /* alignment, bounds */
123 BUS_SPACE_MAXADDR, /* lowaddr */
124 BUS_SPACE_MAXADDR, /* highaddr */
125 NULL, NULL, /* filter, filterarg */
126 GVE_TSO_MAXSIZE_DQO, /* maxsize */
127 GVE_TX_MAX_DATA_DESCS_DQO, /* nsegments */
128 GVE_TX_MAX_BUF_SIZE_DQO, /* maxsegsize */
129 BUS_DMA_ALLOCNOW, /* flags */
130 NULL, /* lockfunc */
131 NULL, /* lockarg */
132 &tx->dqo.buf_dmatag);
133 if (err != 0) {
134 device_printf(priv->dev, "%s: bus_dma_tag_create failed: %d\n",
135 __func__, err);
136 return (err);
137 }
138
139 for (j = 0; j < tx->dqo.num_pending_pkts; j++) {
140 err = bus_dmamap_create(tx->dqo.buf_dmatag, 0,
141 &tx->dqo.pending_pkts[j].dmamap);
142 if (err != 0) {
143 device_printf(priv->dev,
144 "err in creating pending pkt dmamap %d: %d",
145 j, err);
146 return (err);
147 }
148 tx->dqo.pending_pkts[j].state = GVE_PACKET_STATE_FREE;
149 }
150
151 return (0);
152 }
153
154 int
gve_tx_alloc_ring_dqo(struct gve_priv * priv,int i)155 gve_tx_alloc_ring_dqo(struct gve_priv *priv, int i)
156 {
157 struct gve_tx_ring *tx = &priv->tx[i];
158 uint16_t num_pending_pkts;
159 int err;
160
161 /* Descriptor ring */
162 err = gve_dma_alloc_coherent(priv,
163 sizeof(union gve_tx_desc_dqo) * priv->tx_desc_cnt,
164 CACHE_LINE_SIZE, &tx->desc_ring_mem);
165 if (err != 0) {
166 device_printf(priv->dev,
167 "Failed to alloc desc ring for tx ring %d", i);
168 goto abort;
169 }
170 tx->dqo.desc_ring = tx->desc_ring_mem.cpu_addr;
171
172 /* Completion ring */
173 err = gve_dma_alloc_coherent(priv,
174 sizeof(struct gve_tx_compl_desc_dqo) * priv->tx_desc_cnt,
175 CACHE_LINE_SIZE, &tx->dqo.compl_ring_mem);
176 if (err != 0) {
177 device_printf(priv->dev,
178 "Failed to alloc compl ring for tx ring %d", i);
179 goto abort;
180 }
181 tx->dqo.compl_ring = tx->dqo.compl_ring_mem.cpu_addr;
182
183 /*
184 * pending_pkts array
185 *
186 * The max number of pending packets determines the maximum number of
187 * descriptors which maybe written to the completion queue.
188 *
189 * We must set the number small enough to make sure we never overrun the
190 * completion queue.
191 */
192 num_pending_pkts = priv->tx_desc_cnt;
193 /*
194 * Reserve space for descriptor completions, which will be reported at
195 * most every GVE_TX_MIN_RE_INTERVAL packets.
196 */
197 num_pending_pkts -= num_pending_pkts / GVE_TX_MIN_RE_INTERVAL;
198
199 tx->dqo.num_pending_pkts = num_pending_pkts;
200 tx->dqo.pending_pkts = malloc(
201 sizeof(struct gve_tx_pending_pkt_dqo) * num_pending_pkts,
202 M_GVE, M_WAITOK | M_ZERO);
203
204 if (gve_is_qpl(priv)) {
205 int qpl_buf_cnt;
206
207 tx->com.qpl = &priv->qpls[i];
208 qpl_buf_cnt = GVE_TX_BUFS_PER_PAGE_DQO *
209 tx->com.qpl->num_pages;
210
211 tx->dqo.qpl_bufs = malloc(
212 sizeof(*tx->dqo.qpl_bufs) * qpl_buf_cnt,
213 M_GVE, M_WAITOK | M_ZERO);
214 } else
215 gve_tx_alloc_rda_fields_dqo(tx);
216 return (0);
217
218 abort:
219 gve_tx_free_ring_dqo(priv, i);
220 return (err);
221 }
222
223 static void
gve_extract_tx_metadata_dqo(const struct mbuf * mbuf,struct gve_tx_metadata_dqo * metadata)224 gve_extract_tx_metadata_dqo(const struct mbuf *mbuf,
225 struct gve_tx_metadata_dqo *metadata)
226 {
227 uint32_t hash = mbuf->m_pkthdr.flowid;
228 uint16_t path_hash;
229
230 metadata->version = GVE_TX_METADATA_VERSION_DQO;
231 if (hash) {
232 path_hash = hash ^ (hash >> 16);
233
234 path_hash &= (1 << 15) - 1;
235 if (__predict_false(path_hash == 0))
236 path_hash = ~path_hash;
237
238 metadata->path_hash = path_hash;
239 }
240 }
241
242 static void
gve_tx_fill_pkt_desc_dqo(struct gve_tx_ring * tx,uint32_t * desc_idx,uint32_t len,uint64_t addr,int16_t compl_tag,bool eop,bool csum_enabled)243 gve_tx_fill_pkt_desc_dqo(struct gve_tx_ring *tx,
244 uint32_t *desc_idx, uint32_t len, uint64_t addr,
245 int16_t compl_tag, bool eop, bool csum_enabled)
246 {
247 while (len > 0) {
248 struct gve_tx_pkt_desc_dqo *desc =
249 &tx->dqo.desc_ring[*desc_idx].pkt;
250 uint32_t cur_len = MIN(len, GVE_TX_MAX_BUF_SIZE_DQO);
251 bool cur_eop = eop && cur_len == len;
252
253 *desc = (struct gve_tx_pkt_desc_dqo){
254 .buf_addr = htole64(addr),
255 .dtype = GVE_TX_PKT_DESC_DTYPE_DQO,
256 .end_of_packet = cur_eop,
257 .checksum_offload_enable = csum_enabled,
258 .compl_tag = htole16(compl_tag),
259 .buf_size = cur_len,
260 };
261
262 addr += cur_len;
263 len -= cur_len;
264 *desc_idx = (*desc_idx + 1) & tx->dqo.desc_mask;
265 }
266 }
267
268 static void
gve_tx_fill_tso_ctx_desc(struct gve_tx_tso_context_desc_dqo * desc,const struct mbuf * mbuf,const struct gve_tx_metadata_dqo * metadata,int header_len)269 gve_tx_fill_tso_ctx_desc(struct gve_tx_tso_context_desc_dqo *desc,
270 const struct mbuf *mbuf, const struct gve_tx_metadata_dqo *metadata,
271 int header_len)
272 {
273 *desc = (struct gve_tx_tso_context_desc_dqo){
274 .header_len = header_len,
275 .cmd_dtype = {
276 .dtype = GVE_TX_TSO_CTX_DESC_DTYPE_DQO,
277 .tso = 1,
278 },
279 .flex0 = metadata->bytes[0],
280 .flex5 = metadata->bytes[5],
281 .flex6 = metadata->bytes[6],
282 .flex7 = metadata->bytes[7],
283 .flex8 = metadata->bytes[8],
284 .flex9 = metadata->bytes[9],
285 .flex10 = metadata->bytes[10],
286 .flex11 = metadata->bytes[11],
287 };
288 desc->tso_total_len = mbuf->m_pkthdr.len - header_len;
289 desc->mss = mbuf->m_pkthdr.tso_segsz;
290 }
291
292 static void
gve_tx_fill_general_ctx_desc(struct gve_tx_general_context_desc_dqo * desc,const struct gve_tx_metadata_dqo * metadata)293 gve_tx_fill_general_ctx_desc(struct gve_tx_general_context_desc_dqo *desc,
294 const struct gve_tx_metadata_dqo *metadata)
295 {
296 *desc = (struct gve_tx_general_context_desc_dqo){
297 .flex0 = metadata->bytes[0],
298 .flex1 = metadata->bytes[1],
299 .flex2 = metadata->bytes[2],
300 .flex3 = metadata->bytes[3],
301 .flex4 = metadata->bytes[4],
302 .flex5 = metadata->bytes[5],
303 .flex6 = metadata->bytes[6],
304 .flex7 = metadata->bytes[7],
305 .flex8 = metadata->bytes[8],
306 .flex9 = metadata->bytes[9],
307 .flex10 = metadata->bytes[10],
308 .flex11 = metadata->bytes[11],
309 .cmd_dtype = {.dtype = GVE_TX_GENERAL_CTX_DESC_DTYPE_DQO},
310 };
311 }
312
313 #define PULLUP_HDR(m, len) \
314 do { \
315 if (__predict_false((m)->m_len < (len))) { \
316 (m) = m_pullup((m), (len)); \
317 if ((m) == NULL) \
318 return (EINVAL); \
319 } \
320 } while (0)
321
322 static int
gve_prep_tso(struct mbuf * mbuf,int * header_len)323 gve_prep_tso(struct mbuf *mbuf, int *header_len)
324 {
325 uint8_t l3_off, l4_off = 0;
326 struct ether_header *eh;
327 struct tcphdr *th;
328 u_short csum;
329
330 PULLUP_HDR(mbuf, sizeof(*eh));
331 eh = mtod(mbuf, struct ether_header *);
332 KASSERT(eh->ether_type != ETHERTYPE_VLAN,
333 ("VLAN-tagged packets not supported"));
334 l3_off = ETHER_HDR_LEN;
335
336 #ifdef INET6
337 if (ntohs(eh->ether_type) == ETHERTYPE_IPV6) {
338 struct ip6_hdr *ip6;
339
340 PULLUP_HDR(mbuf, l3_off + sizeof(*ip6));
341 ip6 = (struct ip6_hdr *)(mtodo(mbuf, l3_off));
342 l4_off = l3_off + sizeof(struct ip6_hdr);
343 csum = in6_cksum_pseudo(ip6, /*len=*/0, IPPROTO_TCP,
344 /*csum=*/0);
345 } else
346 #endif
347 if (ntohs(eh->ether_type) == ETHERTYPE_IP) {
348 struct ip *ip;
349
350 PULLUP_HDR(mbuf, l3_off + sizeof(*ip));
351 ip = (struct ip *)(mtodo(mbuf, l3_off));
352 l4_off = l3_off + (ip->ip_hl << 2);
353 csum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
354 htons(IPPROTO_TCP));
355 }
356
357 PULLUP_HDR(mbuf, l4_off + sizeof(struct tcphdr *));
358 th = (struct tcphdr *)(mtodo(mbuf, l4_off));
359 *header_len = l4_off + (th->th_off << 2);
360
361 /*
362 * Hardware requires the th->th_sum to not include the TCP payload,
363 * hence we recompute the csum with it excluded.
364 */
365 th->th_sum = csum;
366
367 return (0);
368 }
369
370 static int
gve_tx_fill_ctx_descs(struct gve_tx_ring * tx,struct mbuf * mbuf,bool is_tso,uint32_t * desc_idx)371 gve_tx_fill_ctx_descs(struct gve_tx_ring *tx, struct mbuf *mbuf,
372 bool is_tso, uint32_t *desc_idx)
373 {
374 struct gve_tx_general_context_desc_dqo *gen_desc;
375 struct gve_tx_tso_context_desc_dqo *tso_desc;
376 struct gve_tx_metadata_dqo metadata;
377 int header_len;
378 int err;
379
380 metadata = (struct gve_tx_metadata_dqo){0};
381 gve_extract_tx_metadata_dqo(mbuf, &metadata);
382
383 if (is_tso) {
384 err = gve_prep_tso(mbuf, &header_len);
385 if (__predict_false(err)) {
386 counter_enter();
387 counter_u64_add_protected(
388 tx->stats.tx_delayed_pkt_tsoerr, 1);
389 counter_exit();
390 return (err);
391 }
392
393 tso_desc = &tx->dqo.desc_ring[*desc_idx].tso_ctx;
394 gve_tx_fill_tso_ctx_desc(tso_desc, mbuf, &metadata, header_len);
395
396 *desc_idx = (*desc_idx + 1) & tx->dqo.desc_mask;
397 counter_enter();
398 counter_u64_add_protected(tx->stats.tso_packet_cnt, 1);
399 counter_exit();
400 }
401
402 gen_desc = &tx->dqo.desc_ring[*desc_idx].general_ctx;
403 gve_tx_fill_general_ctx_desc(gen_desc, &metadata);
404 *desc_idx = (*desc_idx + 1) & tx->dqo.desc_mask;
405 return (0);
406 }
407
408 static int
gve_map_mbuf_dqo(struct gve_tx_ring * tx,struct mbuf ** mbuf,bus_dmamap_t dmamap,bus_dma_segment_t * segs,int * nsegs,int attempt)409 gve_map_mbuf_dqo(struct gve_tx_ring *tx,
410 struct mbuf **mbuf, bus_dmamap_t dmamap,
411 bus_dma_segment_t *segs, int *nsegs, int attempt)
412 {
413 struct mbuf *m_new = NULL;
414 int err;
415
416 err = bus_dmamap_load_mbuf_sg(tx->dqo.buf_dmatag, dmamap,
417 *mbuf, segs, nsegs, BUS_DMA_NOWAIT);
418
419 switch (err) {
420 case __predict_true(0):
421 break;
422 case EFBIG:
423 if (__predict_false(attempt > 0))
424 goto abort;
425
426 counter_enter();
427 counter_u64_add_protected(
428 tx->stats.tx_mbuf_collapse, 1);
429 counter_exit();
430
431 /* Try m_collapse before m_defrag */
432 m_new = m_collapse(*mbuf, M_NOWAIT,
433 GVE_TX_MAX_DATA_DESCS_DQO);
434 if (m_new == NULL) {
435 counter_enter();
436 counter_u64_add_protected(
437 tx->stats.tx_mbuf_defrag, 1);
438 counter_exit();
439 m_new = m_defrag(*mbuf, M_NOWAIT);
440 }
441
442 if (__predict_false(m_new == NULL)) {
443 counter_enter();
444 counter_u64_add_protected(
445 tx->stats.tx_mbuf_defrag_err, 1);
446 counter_exit();
447
448 m_freem(*mbuf);
449 *mbuf = NULL;
450 err = ENOMEM;
451 goto abort;
452 } else {
453 *mbuf = m_new;
454 return (gve_map_mbuf_dqo(tx, mbuf, dmamap,
455 segs, nsegs, ++attempt));
456 }
457 case ENOMEM:
458 counter_enter();
459 counter_u64_add_protected(
460 tx->stats.tx_mbuf_dmamap_enomem_err, 1);
461 counter_exit();
462 goto abort;
463 default:
464 goto abort;
465 }
466
467 return (0);
468
469 abort:
470 counter_enter();
471 counter_u64_add_protected(tx->stats.tx_mbuf_dmamap_err, 1);
472 counter_exit();
473 return (err);
474 }
475
476 static uint32_t
num_avail_desc_ring_slots(const struct gve_tx_ring * tx)477 num_avail_desc_ring_slots(const struct gve_tx_ring *tx)
478 {
479 uint32_t num_used = (tx->dqo.desc_tail - tx->dqo.desc_head) &
480 tx->dqo.desc_mask;
481
482 return (tx->dqo.desc_mask - num_used);
483 }
484
485 static struct gve_tx_pending_pkt_dqo *
gve_alloc_pending_packet(struct gve_tx_ring * tx)486 gve_alloc_pending_packet(struct gve_tx_ring *tx)
487 {
488 int32_t index = tx->dqo.free_pending_pkts_csm;
489 struct gve_tx_pending_pkt_dqo *pending_pkt;
490
491 /*
492 * No pending packets available in the consumer list,
493 * try to steal the producer list.
494 */
495 if (__predict_false(index == -1)) {
496 tx->dqo.free_pending_pkts_csm = atomic_swap_32(
497 &tx->dqo.free_pending_pkts_prd, -1);
498
499 index = tx->dqo.free_pending_pkts_csm;
500 if (__predict_false(index == -1))
501 return (NULL);
502 }
503
504 pending_pkt = &tx->dqo.pending_pkts[index];
505
506 /* Remove pending_pkt from the consumer list */
507 tx->dqo.free_pending_pkts_csm = pending_pkt->next;
508 pending_pkt->state = GVE_PACKET_STATE_PENDING_DATA_COMPL;
509
510 return (pending_pkt);
511 }
512
513 static void
gve_free_pending_packet(struct gve_tx_ring * tx,struct gve_tx_pending_pkt_dqo * pending_pkt)514 gve_free_pending_packet(struct gve_tx_ring *tx,
515 struct gve_tx_pending_pkt_dqo *pending_pkt)
516 {
517 int index = pending_pkt - tx->dqo.pending_pkts;
518 int32_t old_head;
519
520 pending_pkt->state = GVE_PACKET_STATE_FREE;
521
522 /* Add pending_pkt to the producer list */
523 while (true) {
524 old_head = atomic_load_acq_32(&tx->dqo.free_pending_pkts_prd);
525
526 pending_pkt->next = old_head;
527 if (atomic_cmpset_32(&tx->dqo.free_pending_pkts_prd,
528 old_head, index))
529 break;
530 }
531 }
532
533 /*
534 * Has the side-effect of retrieving the value of the last desc index
535 * processed by the NIC. hw_tx_head is written to by the completions-processing
536 * taskqueue upon receiving descriptor-completions.
537 */
538 static bool
gve_tx_has_desc_room_dqo(struct gve_tx_ring * tx,int needed_descs)539 gve_tx_has_desc_room_dqo(struct gve_tx_ring *tx, int needed_descs)
540 {
541 if (needed_descs <= num_avail_desc_ring_slots(tx))
542 return (true);
543
544 tx->dqo.desc_head = atomic_load_acq_32(&tx->dqo.hw_tx_head);
545 if (needed_descs > num_avail_desc_ring_slots(tx)) {
546 counter_enter();
547 counter_u64_add_protected(
548 tx->stats.tx_delayed_pkt_nospace_descring, 1);
549 counter_exit();
550 return (false);
551 }
552
553 return (0);
554 }
555
556 static void
gve_tx_request_desc_compl(struct gve_tx_ring * tx,uint32_t desc_idx)557 gve_tx_request_desc_compl(struct gve_tx_ring *tx, uint32_t desc_idx)
558 {
559 uint32_t last_report_event_interval;
560 uint32_t last_desc_idx;
561
562 last_desc_idx = (desc_idx - 1) & tx->dqo.desc_mask;
563 last_report_event_interval =
564 (last_desc_idx - tx->dqo.last_re_idx) & tx->dqo.desc_mask;
565
566 if (__predict_false(last_report_event_interval >=
567 GVE_TX_MIN_RE_INTERVAL)) {
568 tx->dqo.desc_ring[last_desc_idx].pkt.report_event = true;
569 tx->dqo.last_re_idx = last_desc_idx;
570 }
571 }
572
573 static bool
gve_tx_have_enough_qpl_bufs(struct gve_tx_ring * tx,int num_bufs)574 gve_tx_have_enough_qpl_bufs(struct gve_tx_ring *tx, int num_bufs)
575 {
576 uint32_t available = tx->dqo.qpl_bufs_produced_cached -
577 tx->dqo.qpl_bufs_consumed;
578
579 if (__predict_true(available >= num_bufs))
580 return (true);
581
582 tx->dqo.qpl_bufs_produced_cached = atomic_load_acq_32(
583 &tx->dqo.qpl_bufs_produced);
584 available = tx->dqo.qpl_bufs_produced_cached -
585 tx->dqo.qpl_bufs_consumed;
586
587 if (__predict_true(available >= num_bufs))
588 return (true);
589 return (false);
590 }
591
592 static int32_t
gve_tx_alloc_qpl_buf(struct gve_tx_ring * tx)593 gve_tx_alloc_qpl_buf(struct gve_tx_ring *tx)
594 {
595 int32_t buf = tx->dqo.free_qpl_bufs_csm;
596
597 if (__predict_false(buf == -1)) {
598 tx->dqo.free_qpl_bufs_csm = atomic_swap_32(
599 &tx->dqo.free_qpl_bufs_prd, -1);
600 buf = tx->dqo.free_qpl_bufs_csm;
601 if (__predict_false(buf == -1))
602 return (-1);
603 }
604
605 tx->dqo.free_qpl_bufs_csm = tx->dqo.qpl_bufs[buf];
606 tx->dqo.qpl_bufs_consumed++;
607 return (buf);
608 }
609
610 /*
611 * Tx buffer i corresponds to
612 * qpl_page_id = i / GVE_TX_BUFS_PER_PAGE_DQO
613 * qpl_page_offset = (i % GVE_TX_BUFS_PER_PAGE_DQO) * GVE_TX_BUF_SIZE_DQO
614 */
615 static void
gve_tx_buf_get_addr_dqo(struct gve_tx_ring * tx,int32_t index,void ** va,bus_addr_t * dma_addr)616 gve_tx_buf_get_addr_dqo(struct gve_tx_ring *tx,
617 int32_t index, void **va, bus_addr_t *dma_addr)
618 {
619 int page_id = index >> (PAGE_SHIFT - GVE_TX_BUF_SHIFT_DQO);
620 int offset = (index & (GVE_TX_BUFS_PER_PAGE_DQO - 1)) <<
621 GVE_TX_BUF_SHIFT_DQO;
622
623 *va = (char *)tx->com.qpl->dmas[page_id].cpu_addr + offset;
624 *dma_addr = tx->com.qpl->dmas[page_id].bus_addr + offset;
625 }
626
627 static struct gve_dma_handle *
gve_get_page_dma_handle(struct gve_tx_ring * tx,int32_t index)628 gve_get_page_dma_handle(struct gve_tx_ring *tx, int32_t index)
629 {
630 int page_id = index >> (PAGE_SHIFT - GVE_TX_BUF_SHIFT_DQO);
631
632 return (&tx->com.qpl->dmas[page_id]);
633 }
634
635 static void
gve_tx_copy_mbuf_and_write_pkt_descs(struct gve_tx_ring * tx,struct mbuf * mbuf,struct gve_tx_pending_pkt_dqo * pkt,bool csum_enabled,int16_t completion_tag,uint32_t * desc_idx)636 gve_tx_copy_mbuf_and_write_pkt_descs(struct gve_tx_ring *tx,
637 struct mbuf *mbuf, struct gve_tx_pending_pkt_dqo *pkt,
638 bool csum_enabled, int16_t completion_tag,
639 uint32_t *desc_idx)
640 {
641 int32_t pkt_len = mbuf->m_pkthdr.len;
642 struct gve_dma_handle *dma;
643 uint32_t copy_offset = 0;
644 int32_t prev_buf = -1;
645 uint32_t copy_len;
646 bus_addr_t addr;
647 int32_t buf;
648 void *va;
649
650 MPASS(pkt->num_qpl_bufs == 0);
651 MPASS(pkt->qpl_buf_head == -1);
652
653 while (copy_offset < pkt_len) {
654 buf = gve_tx_alloc_qpl_buf(tx);
655 /* We already checked for availability */
656 MPASS(buf != -1);
657
658 gve_tx_buf_get_addr_dqo(tx, buf, &va, &addr);
659 copy_len = MIN(GVE_TX_BUF_SIZE_DQO, pkt_len - copy_offset);
660 m_copydata(mbuf, copy_offset, copy_len, va);
661 copy_offset += copy_len;
662
663 dma = gve_get_page_dma_handle(tx, buf);
664 bus_dmamap_sync(dma->tag, dma->map, BUS_DMASYNC_PREWRITE);
665
666 gve_tx_fill_pkt_desc_dqo(tx, desc_idx,
667 copy_len, addr, completion_tag,
668 /*eop=*/copy_offset == pkt_len,
669 csum_enabled);
670
671 /* Link all the qpl bufs for a packet */
672 if (prev_buf == -1)
673 pkt->qpl_buf_head = buf;
674 else
675 tx->dqo.qpl_bufs[prev_buf] = buf;
676
677 prev_buf = buf;
678 pkt->num_qpl_bufs++;
679 }
680
681 tx->dqo.qpl_bufs[buf] = -1;
682 }
683
684 int
gve_xmit_dqo_qpl(struct gve_tx_ring * tx,struct mbuf * mbuf)685 gve_xmit_dqo_qpl(struct gve_tx_ring *tx, struct mbuf *mbuf)
686 {
687 uint32_t desc_idx = tx->dqo.desc_tail;
688 struct gve_tx_pending_pkt_dqo *pkt;
689 int total_descs_needed;
690 int16_t completion_tag;
691 bool has_csum_flag;
692 int csum_flags;
693 bool is_tso;
694 int nsegs;
695 int err;
696
697 csum_flags = mbuf->m_pkthdr.csum_flags;
698 has_csum_flag = csum_flags & (CSUM_TCP | CSUM_UDP |
699 CSUM_IP6_TCP | CSUM_IP6_UDP | CSUM_TSO);
700 is_tso = csum_flags & CSUM_TSO;
701
702 nsegs = howmany(mbuf->m_pkthdr.len, GVE_TX_BUF_SIZE_DQO);
703 /* Check if we have enough room in the desc ring */
704 total_descs_needed = 1 + /* general_ctx_desc */
705 nsegs + /* pkt_desc */
706 (is_tso ? 1 : 0); /* tso_ctx_desc */
707 if (__predict_false(!gve_tx_has_desc_room_dqo(tx, total_descs_needed)))
708 return (ENOBUFS);
709
710 if (!gve_tx_have_enough_qpl_bufs(tx, nsegs)) {
711 counter_enter();
712 counter_u64_add_protected(
713 tx->stats.tx_delayed_pkt_nospace_qpl_bufs, 1);
714 counter_exit();
715 return (ENOBUFS);
716 }
717
718 pkt = gve_alloc_pending_packet(tx);
719 if (pkt == NULL) {
720 counter_enter();
721 counter_u64_add_protected(
722 tx->stats.tx_delayed_pkt_nospace_compring, 1);
723 counter_exit();
724 return (ENOBUFS);
725 }
726 completion_tag = pkt - tx->dqo.pending_pkts;
727 pkt->mbuf = mbuf;
728
729 err = gve_tx_fill_ctx_descs(tx, mbuf, is_tso, &desc_idx);
730 if (err)
731 goto abort;
732
733 gve_tx_copy_mbuf_and_write_pkt_descs(tx, mbuf, pkt,
734 has_csum_flag, completion_tag, &desc_idx);
735
736 /* Remember the index of the last desc written */
737 tx->dqo.desc_tail = desc_idx;
738
739 /*
740 * Request a descriptor completion on the last descriptor of the
741 * packet if we are allowed to by the HW enforced interval.
742 */
743 gve_tx_request_desc_compl(tx, desc_idx);
744
745 tx->req += total_descs_needed; /* tx->req is just a sysctl counter */
746 return (0);
747
748 abort:
749 pkt->mbuf = NULL;
750 gve_free_pending_packet(tx, pkt);
751 return (err);
752 }
753
754 int
gve_xmit_dqo(struct gve_tx_ring * tx,struct mbuf ** mbuf_ptr)755 gve_xmit_dqo(struct gve_tx_ring *tx, struct mbuf **mbuf_ptr)
756 {
757 bus_dma_segment_t segs[GVE_TX_MAX_DATA_DESCS_DQO];
758 uint32_t desc_idx = tx->dqo.desc_tail;
759 struct gve_tx_pending_pkt_dqo *pkt;
760 struct mbuf *mbuf = *mbuf_ptr;
761 int total_descs_needed;
762 int16_t completion_tag;
763 bool has_csum_flag;
764 int csum_flags;
765 bool is_tso;
766 int nsegs;
767 int err;
768 int i;
769
770 csum_flags = mbuf->m_pkthdr.csum_flags;
771 has_csum_flag = csum_flags & (CSUM_TCP | CSUM_UDP |
772 CSUM_IP6_TCP | CSUM_IP6_UDP | CSUM_TSO);
773 is_tso = csum_flags & CSUM_TSO;
774
775 /*
776 * This mbuf might end up needing more than 1 pkt desc.
777 * The actual number, `nsegs` is known only after the
778 * expensive gve_map_mbuf_dqo call. This check beneath
779 * exists to fail early when the desc ring is really full.
780 */
781 total_descs_needed = 1 + /* general_ctx_desc */
782 1 + /* pkt_desc */
783 (is_tso ? 1 : 0); /* tso_ctx_desc */
784 if (__predict_false(!gve_tx_has_desc_room_dqo(tx, total_descs_needed)))
785 return (ENOBUFS);
786
787 pkt = gve_alloc_pending_packet(tx);
788 if (pkt == NULL) {
789 counter_enter();
790 counter_u64_add_protected(
791 tx->stats.tx_delayed_pkt_nospace_compring, 1);
792 counter_exit();
793 return (ENOBUFS);
794 }
795 completion_tag = pkt - tx->dqo.pending_pkts;
796
797 err = gve_map_mbuf_dqo(tx, mbuf_ptr, pkt->dmamap,
798 segs, &nsegs, /*attempt=*/0);
799 if (err)
800 goto abort;
801 mbuf = *mbuf_ptr; /* gve_map_mbuf_dqo might replace the mbuf chain */
802 pkt->mbuf = mbuf;
803
804 total_descs_needed = 1 + /* general_ctx_desc */
805 nsegs + /* pkt_desc */
806 (is_tso ? 1 : 0); /* tso_ctx_desc */
807 if (__predict_false(
808 !gve_tx_has_desc_room_dqo(tx, total_descs_needed))) {
809 err = ENOBUFS;
810 goto abort_with_dma;
811 }
812
813 err = gve_tx_fill_ctx_descs(tx, mbuf, is_tso, &desc_idx);
814 if (err)
815 goto abort_with_dma;
816
817 bus_dmamap_sync(tx->dqo.buf_dmatag, pkt->dmamap, BUS_DMASYNC_PREWRITE);
818 for (i = 0; i < nsegs; i++) {
819 gve_tx_fill_pkt_desc_dqo(tx, &desc_idx,
820 segs[i].ds_len, segs[i].ds_addr,
821 completion_tag, /*eop=*/i == (nsegs - 1),
822 has_csum_flag);
823 }
824
825 /* Remember the index of the last desc written */
826 tx->dqo.desc_tail = desc_idx;
827
828 /*
829 * Request a descriptor completion on the last descriptor of the
830 * packet if we are allowed to by the HW enforced interval.
831 */
832 gve_tx_request_desc_compl(tx, desc_idx);
833
834 tx->req += total_descs_needed; /* tx->req is just a sysctl counter */
835 return (0);
836
837 abort_with_dma:
838 gve_unmap_packet(tx, pkt);
839 abort:
840 pkt->mbuf = NULL;
841 gve_free_pending_packet(tx, pkt);
842 return (err);
843 }
844
845 static void
gve_reap_qpl_bufs_dqo(struct gve_tx_ring * tx,struct gve_tx_pending_pkt_dqo * pkt)846 gve_reap_qpl_bufs_dqo(struct gve_tx_ring *tx,
847 struct gve_tx_pending_pkt_dqo *pkt)
848 {
849 int32_t buf = pkt->qpl_buf_head;
850 struct gve_dma_handle *dma;
851 int32_t qpl_buf_tail;
852 int32_t old_head;
853 int i;
854
855 for (i = 0; i < pkt->num_qpl_bufs; i++) {
856 dma = gve_get_page_dma_handle(tx, buf);
857 bus_dmamap_sync(dma->tag, dma->map, BUS_DMASYNC_POSTWRITE);
858 qpl_buf_tail = buf;
859 buf = tx->dqo.qpl_bufs[buf];
860 }
861 MPASS(buf == -1);
862 buf = qpl_buf_tail;
863
864 while (true) {
865 old_head = atomic_load_32(&tx->dqo.free_qpl_bufs_prd);
866 tx->dqo.qpl_bufs[buf] = old_head;
867
868 /*
869 * The "rel" ensures that the update to dqo.free_qpl_bufs_prd
870 * is visible only after the linked list from this pkt is
871 * attached above to old_head.
872 */
873 if (atomic_cmpset_rel_32(&tx->dqo.free_qpl_bufs_prd,
874 old_head, pkt->qpl_buf_head))
875 break;
876 }
877 /*
878 * The "rel" ensures that the update to dqo.qpl_bufs_produced is
879 * visible only adter the update to dqo.free_qpl_bufs_prd above.
880 */
881 atomic_add_rel_32(&tx->dqo.qpl_bufs_produced, pkt->num_qpl_bufs);
882
883 pkt->qpl_buf_head = -1;
884 pkt->num_qpl_bufs = 0;
885 }
886
887 static uint64_t
gve_handle_packet_completion(struct gve_priv * priv,struct gve_tx_ring * tx,uint16_t compl_tag)888 gve_handle_packet_completion(struct gve_priv *priv,
889 struct gve_tx_ring *tx, uint16_t compl_tag)
890 {
891 struct gve_tx_pending_pkt_dqo *pending_pkt;
892 int32_t pkt_len;
893
894 if (__predict_false(compl_tag >= tx->dqo.num_pending_pkts)) {
895 device_printf(priv->dev, "Invalid TX completion tag: %d\n",
896 compl_tag);
897 return (0);
898 }
899
900 pending_pkt = &tx->dqo.pending_pkts[compl_tag];
901
902 /* Packet is allocated but not pending data completion. */
903 if (__predict_false(pending_pkt->state !=
904 GVE_PACKET_STATE_PENDING_DATA_COMPL)) {
905 device_printf(priv->dev,
906 "No pending data completion: %d\n", compl_tag);
907 return (0);
908 }
909
910 pkt_len = pending_pkt->mbuf->m_pkthdr.len;
911
912 if (gve_is_qpl(priv))
913 gve_reap_qpl_bufs_dqo(tx, pending_pkt);
914 else
915 gve_unmap_packet(tx, pending_pkt);
916
917 m_freem(pending_pkt->mbuf);
918 pending_pkt->mbuf = NULL;
919 gve_free_pending_packet(tx, pending_pkt);
920 return (pkt_len);
921 }
922
923 int
gve_tx_intr_dqo(void * arg)924 gve_tx_intr_dqo(void *arg)
925 {
926 struct gve_tx_ring *tx = arg;
927 struct gve_priv *priv = tx->com.priv;
928 struct gve_ring_com *com = &tx->com;
929
930 if (__predict_false((if_getdrvflags(priv->ifp) & IFF_DRV_RUNNING) == 0))
931 return (FILTER_STRAY);
932
933 /* Interrupts are automatically masked */
934 taskqueue_enqueue(com->cleanup_tq, &com->cleanup_task);
935 return (FILTER_HANDLED);
936 }
937
938 static void
gve_tx_clear_desc_ring_dqo(struct gve_tx_ring * tx)939 gve_tx_clear_desc_ring_dqo(struct gve_tx_ring *tx)
940 {
941 struct gve_ring_com *com = &tx->com;
942 int i;
943
944 for (i = 0; i < com->priv->tx_desc_cnt; i++)
945 tx->dqo.desc_ring[i] = (union gve_tx_desc_dqo){};
946
947 bus_dmamap_sync(tx->desc_ring_mem.tag, tx->desc_ring_mem.map,
948 BUS_DMASYNC_PREWRITE);
949 }
950
951 static void
gve_tx_clear_compl_ring_dqo(struct gve_tx_ring * tx)952 gve_tx_clear_compl_ring_dqo(struct gve_tx_ring *tx)
953 {
954 struct gve_ring_com *com = &tx->com;
955 int entries;
956 int i;
957
958 entries = com->priv->tx_desc_cnt;
959 for (i = 0; i < entries; i++)
960 tx->dqo.compl_ring[i] = (struct gve_tx_compl_desc_dqo){};
961
962 bus_dmamap_sync(tx->dqo.compl_ring_mem.tag, tx->dqo.compl_ring_mem.map,
963 BUS_DMASYNC_PREWRITE);
964 }
965
966 void
gve_clear_tx_ring_dqo(struct gve_priv * priv,int i)967 gve_clear_tx_ring_dqo(struct gve_priv *priv, int i)
968 {
969 struct gve_tx_ring *tx = &priv->tx[i];
970 int j;
971
972 tx->dqo.desc_head = 0;
973 tx->dqo.desc_tail = 0;
974 tx->dqo.desc_mask = priv->tx_desc_cnt - 1;
975 tx->dqo.last_re_idx = 0;
976
977 tx->dqo.compl_head = 0;
978 tx->dqo.compl_mask = priv->tx_desc_cnt - 1;
979 atomic_store_32(&tx->dqo.hw_tx_head, 0);
980 tx->dqo.cur_gen_bit = 0;
981
982 gve_free_tx_mbufs_dqo(tx);
983
984 for (j = 0; j < tx->dqo.num_pending_pkts - 1; j++) {
985 tx->dqo.pending_pkts[j].next = j + 1;
986 tx->dqo.pending_pkts[j].state = GVE_PACKET_STATE_FREE;
987 }
988 tx->dqo.pending_pkts[tx->dqo.num_pending_pkts - 1].next = -1;
989 tx->dqo.free_pending_pkts_csm = 0;
990 atomic_store_rel_32(&tx->dqo.free_pending_pkts_prd, -1);
991
992 if (gve_is_qpl(priv)) {
993 int qpl_buf_cnt = GVE_TX_BUFS_PER_PAGE_DQO *
994 tx->com.qpl->num_pages;
995
996 for (j = 0; j < qpl_buf_cnt - 1; j++)
997 tx->dqo.qpl_bufs[j] = j + 1;
998 tx->dqo.qpl_bufs[j] = -1;
999
1000 tx->dqo.free_qpl_bufs_csm = 0;
1001 atomic_store_32(&tx->dqo.free_qpl_bufs_prd, -1);
1002 atomic_store_32(&tx->dqo.qpl_bufs_produced, qpl_buf_cnt);
1003 tx->dqo.qpl_bufs_produced_cached = qpl_buf_cnt;
1004 tx->dqo.qpl_bufs_consumed = 0;
1005 }
1006
1007 gve_tx_clear_desc_ring_dqo(tx);
1008 gve_tx_clear_compl_ring_dqo(tx);
1009 }
1010
1011 static bool
gve_tx_cleanup_dqo(struct gve_priv * priv,struct gve_tx_ring * tx,int budget)1012 gve_tx_cleanup_dqo(struct gve_priv *priv, struct gve_tx_ring *tx, int budget)
1013 {
1014 struct gve_tx_compl_desc_dqo *compl_desc;
1015 uint64_t bytes_done = 0;
1016 uint64_t pkts_done = 0;
1017 uint16_t compl_tag;
1018 int work_done = 0;
1019 uint16_t tx_head;
1020 uint16_t type;
1021
1022 while (work_done < budget) {
1023 bus_dmamap_sync(tx->dqo.compl_ring_mem.tag, tx->dqo.compl_ring_mem.map,
1024 BUS_DMASYNC_POSTREAD);
1025
1026 compl_desc = &tx->dqo.compl_ring[tx->dqo.compl_head];
1027 if (compl_desc->generation == tx->dqo.cur_gen_bit)
1028 break;
1029
1030 /*
1031 * Prevent generation bit from being read after the rest of the
1032 * descriptor.
1033 */
1034 rmb();
1035 type = compl_desc->type;
1036
1037 if (type == GVE_COMPL_TYPE_DQO_DESC) {
1038 /* This is the last descriptor fetched by HW plus one */
1039 tx_head = le16toh(compl_desc->tx_head);
1040 atomic_store_rel_32(&tx->dqo.hw_tx_head, tx_head);
1041 } else if (type == GVE_COMPL_TYPE_DQO_PKT) {
1042 compl_tag = le16toh(compl_desc->completion_tag);
1043 bytes_done += gve_handle_packet_completion(priv,
1044 tx, compl_tag);
1045 pkts_done++;
1046 }
1047
1048 tx->dqo.compl_head = (tx->dqo.compl_head + 1) &
1049 tx->dqo.compl_mask;
1050 /* Flip the generation bit when we wrap around */
1051 tx->dqo.cur_gen_bit ^= tx->dqo.compl_head == 0;
1052 work_done++;
1053 }
1054
1055 /*
1056 * Waking the xmit taskqueue has to occur after room has been made in
1057 * the queue.
1058 */
1059 atomic_thread_fence_seq_cst();
1060 if (atomic_load_bool(&tx->stopped) && work_done) {
1061 atomic_store_bool(&tx->stopped, false);
1062 taskqueue_enqueue(tx->xmit_tq, &tx->xmit_task);
1063 }
1064
1065 tx->done += work_done; /* tx->done is just a sysctl counter */
1066 counter_enter();
1067 counter_u64_add_protected(tx->stats.tbytes, bytes_done);
1068 counter_u64_add_protected(tx->stats.tpackets, pkts_done);
1069 counter_exit();
1070
1071 return (work_done == budget);
1072 }
1073
1074 void
gve_tx_cleanup_tq_dqo(void * arg,int pending)1075 gve_tx_cleanup_tq_dqo(void *arg, int pending)
1076 {
1077 struct gve_tx_ring *tx = arg;
1078 struct gve_priv *priv = tx->com.priv;
1079
1080 if (__predict_false((if_getdrvflags(priv->ifp) & IFF_DRV_RUNNING) == 0))
1081 return;
1082
1083 if (gve_tx_cleanup_dqo(priv, tx, /*budget=*/1024)) {
1084 taskqueue_enqueue(tx->com.cleanup_tq, &tx->com.cleanup_task);
1085 return;
1086 }
1087
1088 gve_db_bar_dqo_write_4(priv, tx->com.irq_db_offset,
1089 GVE_ITR_NO_UPDATE_DQO | GVE_ITR_ENABLE_BIT_DQO);
1090 }
1091