1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2018, Intel Corporation. */
3
4 /* The driver transmit and receive code */
5
6 #include <linux/mm.h>
7 #include <linux/netdevice.h>
8 #include <linux/prefetch.h>
9 #include <linux/bpf_trace.h>
10 #include <linux/net/intel/libie/rx.h>
11 #include <net/libeth/xdp.h>
12 #include <net/dsfield.h>
13 #include <net/mpls.h>
14 #include <net/xdp.h>
15 #include "ice_txrx_lib.h"
16 #include "ice_lib.h"
17 #include "ice.h"
18 #include "ice_trace.h"
19 #include "ice_dcb_lib.h"
20 #include "ice_xsk.h"
21 #include "ice_eswitch.h"
22
23 #define ICE_RX_HDR_SIZE 256
24
25 #define ICE_FDIR_CLEAN_DELAY 10
26
27 /**
28 * ice_prgm_fdir_fltr - Program a Flow Director filter
29 * @vsi: VSI to send dummy packet
30 * @fdir_desc: flow director descriptor
31 * @raw_packet: allocated buffer for flow director
32 */
33 int
ice_prgm_fdir_fltr(struct ice_vsi * vsi,struct ice_fltr_desc * fdir_desc,u8 * raw_packet)34 ice_prgm_fdir_fltr(struct ice_vsi *vsi, struct ice_fltr_desc *fdir_desc,
35 u8 *raw_packet)
36 {
37 struct ice_tx_buf *tx_buf, *first;
38 struct ice_fltr_desc *f_desc;
39 struct ice_tx_desc *tx_desc;
40 struct ice_tx_ring *tx_ring;
41 struct device *dev;
42 dma_addr_t dma;
43 u32 td_cmd;
44 u16 i;
45
46 /* VSI and Tx ring */
47 if (!vsi)
48 return -ENOENT;
49 tx_ring = vsi->tx_rings[0];
50 if (!tx_ring || !tx_ring->desc)
51 return -ENOENT;
52 dev = tx_ring->dev;
53
54 /* we are using two descriptors to add/del a filter and we can wait */
55 for (i = ICE_FDIR_CLEAN_DELAY; ICE_DESC_UNUSED(tx_ring) < 2; i--) {
56 if (!i)
57 return -EAGAIN;
58 msleep_interruptible(1);
59 }
60
61 dma = dma_map_single(dev, raw_packet, ICE_FDIR_MAX_RAW_PKT_SIZE,
62 DMA_TO_DEVICE);
63
64 if (dma_mapping_error(dev, dma))
65 return -EINVAL;
66
67 /* grab the next descriptor */
68 i = tx_ring->next_to_use;
69 first = &tx_ring->tx_buf[i];
70 f_desc = ICE_TX_FDIRDESC(tx_ring, i);
71 memcpy(f_desc, fdir_desc, sizeof(*f_desc));
72
73 i++;
74 i = (i < tx_ring->count) ? i : 0;
75 tx_desc = ICE_TX_DESC(tx_ring, i);
76 tx_buf = &tx_ring->tx_buf[i];
77
78 i++;
79 tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
80
81 memset(tx_buf, 0, sizeof(*tx_buf));
82 dma_unmap_len_set(tx_buf, len, ICE_FDIR_MAX_RAW_PKT_SIZE);
83 dma_unmap_addr_set(tx_buf, dma, dma);
84
85 tx_desc->buf_addr = cpu_to_le64(dma);
86 td_cmd = ICE_TXD_LAST_DESC_CMD | ICE_TX_DESC_CMD_DUMMY |
87 ICE_TX_DESC_CMD_RE;
88
89 tx_buf->type = ICE_TX_BUF_DUMMY;
90 tx_buf->raw_buf = raw_packet;
91
92 tx_desc->cmd_type_offset_bsz =
93 ice_build_ctob(td_cmd, 0, ICE_FDIR_MAX_RAW_PKT_SIZE, 0);
94
95 /* Force memory write to complete before letting h/w know
96 * there are new descriptors to fetch.
97 */
98 wmb();
99
100 /* mark the data descriptor to be watched */
101 first->next_to_watch = tx_desc;
102
103 writel(tx_ring->next_to_use, tx_ring->tail);
104
105 return 0;
106 }
107
108 /**
109 * ice_unmap_and_free_tx_buf - Release a Tx buffer
110 * @ring: the ring that owns the buffer
111 * @tx_buf: the buffer to free
112 */
113 static void
ice_unmap_and_free_tx_buf(struct ice_tx_ring * ring,struct ice_tx_buf * tx_buf)114 ice_unmap_and_free_tx_buf(struct ice_tx_ring *ring, struct ice_tx_buf *tx_buf)
115 {
116 if (tx_buf->type != ICE_TX_BUF_XDP_TX && dma_unmap_len(tx_buf, len))
117 dma_unmap_page(ring->dev,
118 dma_unmap_addr(tx_buf, dma),
119 dma_unmap_len(tx_buf, len),
120 DMA_TO_DEVICE);
121
122 switch (tx_buf->type) {
123 case ICE_TX_BUF_DUMMY:
124 devm_kfree(ring->dev, tx_buf->raw_buf);
125 break;
126 case ICE_TX_BUF_SKB:
127 dev_kfree_skb_any(tx_buf->skb);
128 break;
129 case ICE_TX_BUF_XDP_TX:
130 libeth_xdp_return_va(tx_buf->raw_buf, false);
131 break;
132 case ICE_TX_BUF_XDP_XMIT:
133 xdp_return_frame(tx_buf->xdpf);
134 break;
135 }
136
137 tx_buf->next_to_watch = NULL;
138 tx_buf->type = ICE_TX_BUF_EMPTY;
139 dma_unmap_len_set(tx_buf, len, 0);
140 /* tx_buf must be completely set up in the transmit path */
141 }
142
txring_txq(const struct ice_tx_ring * ring)143 static struct netdev_queue *txring_txq(const struct ice_tx_ring *ring)
144 {
145 return netdev_get_tx_queue(ring->netdev, ring->q_index);
146 }
147
148 /**
149 * ice_clean_tstamp_ring - clean time stamp ring
150 * @tx_ring: Tx ring to clean the Time Stamp ring for
151 */
ice_clean_tstamp_ring(struct ice_tx_ring * tx_ring)152 static void ice_clean_tstamp_ring(struct ice_tx_ring *tx_ring)
153 {
154 struct ice_tstamp_ring *tstamp_ring = tx_ring->tstamp_ring;
155 u32 size;
156
157 if (!tstamp_ring->desc)
158 return;
159
160 size = ALIGN(tstamp_ring->count * sizeof(struct ice_ts_desc),
161 PAGE_SIZE);
162 memset(tstamp_ring->desc, 0, size);
163 tstamp_ring->next_to_use = 0;
164 }
165
166 /**
167 * ice_free_tstamp_ring - free time stamp resources per queue
168 * @tx_ring: Tx ring to free the Time Stamp ring for
169 */
ice_free_tstamp_ring(struct ice_tx_ring * tx_ring)170 void ice_free_tstamp_ring(struct ice_tx_ring *tx_ring)
171 {
172 struct ice_tstamp_ring *tstamp_ring = tx_ring->tstamp_ring;
173 u32 size;
174
175 if (!tstamp_ring->desc)
176 return;
177
178 ice_clean_tstamp_ring(tx_ring);
179 size = ALIGN(tstamp_ring->count * sizeof(struct ice_ts_desc),
180 PAGE_SIZE);
181 dmam_free_coherent(tx_ring->dev, size, tstamp_ring->desc,
182 tstamp_ring->dma);
183 tstamp_ring->desc = NULL;
184 }
185
186 /**
187 * ice_free_tx_tstamp_ring - free time stamp resources per Tx ring
188 * @tx_ring: Tx ring to free the Time Stamp ring for
189 */
ice_free_tx_tstamp_ring(struct ice_tx_ring * tx_ring)190 void ice_free_tx_tstamp_ring(struct ice_tx_ring *tx_ring)
191 {
192 ice_free_tstamp_ring(tx_ring);
193 kfree_rcu(tx_ring->tstamp_ring, rcu);
194 tx_ring->tstamp_ring = NULL;
195 tx_ring->flags &= ~ICE_TX_FLAGS_TXTIME;
196 }
197
198 /**
199 * ice_clean_tx_ring - Free any empty Tx buffers
200 * @tx_ring: ring to be cleaned
201 */
ice_clean_tx_ring(struct ice_tx_ring * tx_ring)202 void ice_clean_tx_ring(struct ice_tx_ring *tx_ring)
203 {
204 u32 size;
205 u16 i;
206
207 if (ice_ring_is_xdp(tx_ring) && tx_ring->xsk_pool) {
208 ice_xsk_clean_xdp_ring(tx_ring);
209 goto tx_skip_free;
210 }
211
212 /* ring already cleared, nothing to do */
213 if (!tx_ring->tx_buf)
214 return;
215
216 /* Free all the Tx ring sk_buffs */
217 for (i = 0; i < tx_ring->count; i++)
218 ice_unmap_and_free_tx_buf(tx_ring, &tx_ring->tx_buf[i]);
219
220 tx_skip_free:
221 memset(tx_ring->tx_buf, 0, sizeof(*tx_ring->tx_buf) * tx_ring->count);
222
223 size = ALIGN(tx_ring->count * sizeof(struct ice_tx_desc),
224 PAGE_SIZE);
225 /* Zero out the descriptor ring */
226 memset(tx_ring->desc, 0, size);
227
228 tx_ring->next_to_use = 0;
229 tx_ring->next_to_clean = 0;
230
231 if (!tx_ring->netdev)
232 return;
233
234 /* cleanup Tx queue statistics */
235 netdev_tx_reset_queue(txring_txq(tx_ring));
236
237 if (ice_is_txtime_cfg(tx_ring))
238 ice_free_tx_tstamp_ring(tx_ring);
239 }
240
241 /**
242 * ice_free_tx_ring - Free Tx resources per queue
243 * @tx_ring: Tx descriptor ring for a specific queue
244 *
245 * Free all transmit software resources
246 */
ice_free_tx_ring(struct ice_tx_ring * tx_ring)247 void ice_free_tx_ring(struct ice_tx_ring *tx_ring)
248 {
249 u32 size;
250
251 ice_clean_tx_ring(tx_ring);
252 devm_kfree(tx_ring->dev, tx_ring->tx_buf);
253 tx_ring->tx_buf = NULL;
254
255 if (tx_ring->desc) {
256 size = ALIGN(tx_ring->count * sizeof(struct ice_tx_desc),
257 PAGE_SIZE);
258 dmam_free_coherent(tx_ring->dev, size,
259 tx_ring->desc, tx_ring->dma);
260 tx_ring->desc = NULL;
261 }
262 }
263
264 /**
265 * ice_clean_tx_irq - Reclaim resources after transmit completes
266 * @tx_ring: Tx ring to clean
267 * @napi_budget: Used to determine if we are in netpoll
268 *
269 * Returns true if there's any budget left (e.g. the clean is finished)
270 */
ice_clean_tx_irq(struct ice_tx_ring * tx_ring,int napi_budget)271 static bool ice_clean_tx_irq(struct ice_tx_ring *tx_ring, int napi_budget)
272 {
273 unsigned int total_bytes = 0, total_pkts = 0;
274 unsigned int budget = ICE_DFLT_IRQ_WORK;
275 struct ice_vsi *vsi = tx_ring->vsi;
276 s16 i = tx_ring->next_to_clean;
277 struct ice_tx_desc *tx_desc;
278 struct ice_tx_buf *tx_buf;
279
280 /* get the bql data ready */
281 netdev_txq_bql_complete_prefetchw(txring_txq(tx_ring));
282
283 tx_buf = &tx_ring->tx_buf[i];
284 tx_desc = ICE_TX_DESC(tx_ring, i);
285 i -= tx_ring->count;
286
287 prefetch(&vsi->state);
288
289 do {
290 struct ice_tx_desc *eop_desc = tx_buf->next_to_watch;
291
292 /* if next_to_watch is not set then there is no work pending */
293 if (!eop_desc)
294 break;
295
296 /* follow the guidelines of other drivers */
297 prefetchw(&tx_buf->skb->users);
298
299 smp_rmb(); /* prevent any other reads prior to eop_desc */
300
301 ice_trace(clean_tx_irq, tx_ring, tx_desc, tx_buf);
302 /* if the descriptor isn't done, no work yet to do */
303 if (!(eop_desc->cmd_type_offset_bsz &
304 cpu_to_le64(ICE_TX_DESC_DTYPE_DESC_DONE)))
305 break;
306
307 /* clear next_to_watch to prevent false hangs */
308 tx_buf->next_to_watch = NULL;
309
310 /* update the statistics for this packet */
311 total_bytes += tx_buf->bytecount;
312 total_pkts += tx_buf->gso_segs;
313
314 /* free the skb */
315 napi_consume_skb(tx_buf->skb, napi_budget);
316
317 /* unmap skb header data */
318 dma_unmap_single(tx_ring->dev,
319 dma_unmap_addr(tx_buf, dma),
320 dma_unmap_len(tx_buf, len),
321 DMA_TO_DEVICE);
322
323 /* clear tx_buf data */
324 tx_buf->type = ICE_TX_BUF_EMPTY;
325 dma_unmap_len_set(tx_buf, len, 0);
326
327 /* unmap remaining buffers */
328 while (tx_desc != eop_desc) {
329 ice_trace(clean_tx_irq_unmap, tx_ring, tx_desc, tx_buf);
330 tx_buf++;
331 tx_desc++;
332 i++;
333 if (unlikely(!i)) {
334 i -= tx_ring->count;
335 tx_buf = tx_ring->tx_buf;
336 tx_desc = ICE_TX_DESC(tx_ring, 0);
337 }
338
339 /* unmap any remaining paged data */
340 if (dma_unmap_len(tx_buf, len)) {
341 dma_unmap_page(tx_ring->dev,
342 dma_unmap_addr(tx_buf, dma),
343 dma_unmap_len(tx_buf, len),
344 DMA_TO_DEVICE);
345 dma_unmap_len_set(tx_buf, len, 0);
346 }
347 }
348 ice_trace(clean_tx_irq_unmap_eop, tx_ring, tx_desc, tx_buf);
349
350 /* move us one more past the eop_desc for start of next pkt */
351 tx_buf++;
352 tx_desc++;
353 i++;
354 if (unlikely(!i)) {
355 i -= tx_ring->count;
356 tx_buf = tx_ring->tx_buf;
357 tx_desc = ICE_TX_DESC(tx_ring, 0);
358 }
359
360 prefetch(tx_desc);
361
362 /* update budget accounting */
363 budget--;
364 } while (likely(budget));
365
366 i += tx_ring->count;
367 tx_ring->next_to_clean = i;
368
369 ice_update_tx_ring_stats(tx_ring, total_pkts, total_bytes);
370 netdev_tx_completed_queue(txring_txq(tx_ring), total_pkts, total_bytes);
371
372 #define TX_WAKE_THRESHOLD ((s16)(DESC_NEEDED * 2))
373 if (unlikely(total_pkts && netif_carrier_ok(tx_ring->netdev) &&
374 (ICE_DESC_UNUSED(tx_ring) >= TX_WAKE_THRESHOLD))) {
375 /* Make sure that anybody stopping the queue after this
376 * sees the new next_to_clean.
377 */
378 smp_mb();
379 if (netif_tx_queue_stopped(txring_txq(tx_ring)) &&
380 !test_bit(ICE_VSI_DOWN, vsi->state)) {
381 netif_tx_wake_queue(txring_txq(tx_ring));
382 ice_stats_inc(tx_ring->ring_stats, tx_restart_q);
383 }
384 }
385
386 return !!budget;
387 }
388
389 /**
390 * ice_alloc_tstamp_ring - allocate the Time Stamp ring
391 * @tx_ring: Tx ring to allocate the Time Stamp ring for
392 *
393 * Return: 0 on success, negative on error
394 */
ice_alloc_tstamp_ring(struct ice_tx_ring * tx_ring)395 static int ice_alloc_tstamp_ring(struct ice_tx_ring *tx_ring)
396 {
397 struct ice_tstamp_ring *tstamp_ring;
398
399 /* allocate with kzalloc(), free with kfree_rcu() */
400 tstamp_ring = kzalloc_obj(*tstamp_ring);
401 if (!tstamp_ring)
402 return -ENOMEM;
403
404 tstamp_ring->tx_ring = tx_ring;
405 tx_ring->tstamp_ring = tstamp_ring;
406 tstamp_ring->desc = NULL;
407 tstamp_ring->count = ice_calc_ts_ring_count(tx_ring);
408 tx_ring->flags |= ICE_TX_FLAGS_TXTIME;
409 return 0;
410 }
411
412 /**
413 * ice_setup_tstamp_ring - allocate the Time Stamp ring
414 * @tx_ring: Tx ring to set up the Time Stamp ring for
415 *
416 * Return: 0 on success, negative on error
417 */
ice_setup_tstamp_ring(struct ice_tx_ring * tx_ring)418 static int ice_setup_tstamp_ring(struct ice_tx_ring *tx_ring)
419 {
420 struct ice_tstamp_ring *tstamp_ring = tx_ring->tstamp_ring;
421 struct device *dev = tx_ring->dev;
422 u32 size;
423
424 /* round up to nearest page */
425 size = ALIGN(tstamp_ring->count * sizeof(struct ice_ts_desc),
426 PAGE_SIZE);
427 tstamp_ring->desc = dmam_alloc_coherent(dev, size, &tstamp_ring->dma,
428 GFP_KERNEL);
429 if (!tstamp_ring->desc) {
430 dev_err(dev, "Unable to allocate memory for Time stamp Ring, size=%d\n",
431 size);
432 return -ENOMEM;
433 }
434
435 tstamp_ring->next_to_use = 0;
436 return 0;
437 }
438
439 /**
440 * ice_alloc_setup_tstamp_ring - Allocate and setup the Time Stamp ring
441 * @tx_ring: Tx ring to allocate and setup the Time Stamp ring for
442 *
443 * Return: 0 on success, negative on error
444 */
ice_alloc_setup_tstamp_ring(struct ice_tx_ring * tx_ring)445 int ice_alloc_setup_tstamp_ring(struct ice_tx_ring *tx_ring)
446 {
447 struct device *dev = tx_ring->dev;
448 int err;
449
450 err = ice_alloc_tstamp_ring(tx_ring);
451 if (err) {
452 dev_err(dev, "Unable to allocate Time stamp ring for Tx ring %d\n",
453 tx_ring->q_index);
454 return err;
455 }
456
457 err = ice_setup_tstamp_ring(tx_ring);
458 if (err) {
459 dev_err(dev, "Unable to setup Time stamp ring for Tx ring %d\n",
460 tx_ring->q_index);
461 ice_free_tx_tstamp_ring(tx_ring);
462 return err;
463 }
464 return 0;
465 }
466
467 /**
468 * ice_setup_tx_ring - Allocate the Tx descriptors
469 * @tx_ring: the Tx ring to set up
470 *
471 * Return 0 on success, negative on error
472 */
ice_setup_tx_ring(struct ice_tx_ring * tx_ring)473 int ice_setup_tx_ring(struct ice_tx_ring *tx_ring)
474 {
475 struct device *dev = tx_ring->dev;
476 u32 size;
477
478 if (!dev)
479 return -ENOMEM;
480
481 /* warn if we are about to overwrite the pointer */
482 WARN_ON(tx_ring->tx_buf);
483 tx_ring->tx_buf =
484 devm_kcalloc(dev, sizeof(*tx_ring->tx_buf), tx_ring->count,
485 GFP_KERNEL);
486 if (!tx_ring->tx_buf)
487 return -ENOMEM;
488
489 /* round up to nearest page */
490 size = ALIGN(tx_ring->count * sizeof(struct ice_tx_desc),
491 PAGE_SIZE);
492 tx_ring->desc = dmam_alloc_coherent(dev, size, &tx_ring->dma,
493 GFP_KERNEL);
494 if (!tx_ring->desc) {
495 dev_err(dev, "Unable to allocate memory for the Tx descriptor ring, size=%d\n",
496 size);
497 goto err;
498 }
499
500 tx_ring->next_to_use = 0;
501 tx_ring->next_to_clean = 0;
502 tx_ring->ring_stats->tx.prev_pkt = -1;
503 return 0;
504
505 err:
506 devm_kfree(dev, tx_ring->tx_buf);
507 tx_ring->tx_buf = NULL;
508 return -ENOMEM;
509 }
510
ice_rxq_pp_destroy(struct ice_rx_ring * rq)511 void ice_rxq_pp_destroy(struct ice_rx_ring *rq)
512 {
513 struct libeth_fq fq = {
514 .fqes = rq->rx_fqes,
515 .pp = rq->pp,
516 };
517
518 libeth_rx_fq_destroy(&fq);
519 rq->rx_fqes = NULL;
520 rq->pp = NULL;
521
522 if (!rq->hdr_pp)
523 return;
524
525 fq.fqes = rq->hdr_fqes;
526 fq.pp = rq->hdr_pp;
527
528 libeth_rx_fq_destroy(&fq);
529 rq->hdr_fqes = NULL;
530 rq->hdr_pp = NULL;
531 }
532
533 /**
534 * ice_clean_rx_ring - Free Rx buffers
535 * @rx_ring: ring to be cleaned
536 */
ice_clean_rx_ring(struct ice_rx_ring * rx_ring)537 void ice_clean_rx_ring(struct ice_rx_ring *rx_ring)
538 {
539 u32 size;
540
541 if (rx_ring->xsk_pool) {
542 ice_xsk_clean_rx_ring(rx_ring);
543 goto rx_skip_free;
544 }
545
546 /* ring already cleared, nothing to do */
547 if (!rx_ring->rx_fqes)
548 return;
549
550 libeth_xdp_return_stash(&rx_ring->xdp);
551
552 /* Free all the Rx ring sk_buffs */
553 for (u32 i = rx_ring->next_to_clean; i != rx_ring->next_to_use; ) {
554 libeth_rx_recycle_slow(rx_ring->rx_fqes[i].netmem);
555
556 if (rx_ring->hdr_pp)
557 libeth_rx_recycle_slow(rx_ring->hdr_fqes[i].netmem);
558
559 if (unlikely(++i == rx_ring->count))
560 i = 0;
561 }
562
563 if ((rx_ring->vsi->type == ICE_VSI_PF ||
564 rx_ring->vsi->type == ICE_VSI_SF ||
565 rx_ring->vsi->type == ICE_VSI_LB) &&
566 xdp_rxq_info_is_reg(&rx_ring->xdp_rxq)) {
567 xdp_rxq_info_detach_mem_model(&rx_ring->xdp_rxq);
568 xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
569 }
570
571 ice_rxq_pp_destroy(rx_ring);
572
573 rx_skip_free:
574 /* Zero out the descriptor ring */
575 size = ALIGN(rx_ring->count * sizeof(union ice_32byte_rx_desc),
576 PAGE_SIZE);
577 memset(rx_ring->desc, 0, size);
578
579 rx_ring->next_to_clean = 0;
580 rx_ring->next_to_use = 0;
581 }
582
583 /**
584 * ice_free_rx_ring - Free Rx resources
585 * @rx_ring: ring to clean the resources from
586 *
587 * Free all receive software resources
588 */
ice_free_rx_ring(struct ice_rx_ring * rx_ring)589 void ice_free_rx_ring(struct ice_rx_ring *rx_ring)
590 {
591 struct device *dev = ice_pf_to_dev(rx_ring->vsi->back);
592 u32 size;
593
594 ice_clean_rx_ring(rx_ring);
595 WRITE_ONCE(rx_ring->xdp_prog, NULL);
596 if (rx_ring->xsk_pool) {
597 kfree(rx_ring->xdp_buf);
598 rx_ring->xdp_buf = NULL;
599 }
600
601 if (rx_ring->desc) {
602 size = ALIGN(rx_ring->count * sizeof(union ice_32byte_rx_desc),
603 PAGE_SIZE);
604 dmam_free_coherent(dev, size, rx_ring->desc, rx_ring->dma);
605 rx_ring->desc = NULL;
606 }
607 }
608
609 /**
610 * ice_setup_rx_ring - Allocate the Rx descriptors
611 * @rx_ring: the Rx ring to set up
612 *
613 * Return 0 on success, negative on error
614 */
ice_setup_rx_ring(struct ice_rx_ring * rx_ring)615 int ice_setup_rx_ring(struct ice_rx_ring *rx_ring)
616 {
617 struct device *dev = ice_pf_to_dev(rx_ring->vsi->back);
618 u32 size;
619
620 /* round up to nearest page */
621 size = ALIGN(rx_ring->count * sizeof(union ice_32byte_rx_desc),
622 PAGE_SIZE);
623 rx_ring->desc = dmam_alloc_coherent(dev, size, &rx_ring->dma,
624 GFP_KERNEL);
625 if (!rx_ring->desc) {
626 dev_err(dev, "Unable to allocate memory for the Rx descriptor ring, size=%d\n",
627 size);
628 return -ENOMEM;
629 }
630
631 rx_ring->next_to_use = 0;
632 rx_ring->next_to_clean = 0;
633
634 if (ice_is_xdp_ena_vsi(rx_ring->vsi))
635 WRITE_ONCE(rx_ring->xdp_prog, rx_ring->vsi->xdp_prog);
636
637 return 0;
638 }
639
640 /**
641 * ice_run_xdp - Executes an XDP program on initialized xdp_buff
642 * @rx_ring: Rx ring
643 * @xdp: xdp_buff used as input to the XDP program
644 * @xdp_prog: XDP program to run
645 * @xdp_ring: ring to be used for XDP_TX action
646 * @eop_desc: Last descriptor in packet to read metadata from
647 *
648 * Returns any of ICE_XDP_{PASS, CONSUMED, TX, REDIR}
649 */
650 static u32
ice_run_xdp(struct ice_rx_ring * rx_ring,struct libeth_xdp_buff * xdp,struct bpf_prog * xdp_prog,struct ice_tx_ring * xdp_ring,union ice_32b_rx_flex_desc * eop_desc)651 ice_run_xdp(struct ice_rx_ring *rx_ring, struct libeth_xdp_buff *xdp,
652 struct bpf_prog *xdp_prog, struct ice_tx_ring *xdp_ring,
653 union ice_32b_rx_flex_desc *eop_desc)
654 {
655 unsigned int ret = ICE_XDP_PASS;
656 u32 act;
657
658 if (!xdp_prog)
659 goto exit;
660
661 xdp->desc = eop_desc;
662
663 act = bpf_prog_run_xdp(xdp_prog, &xdp->base);
664 switch (act) {
665 case XDP_PASS:
666 break;
667 case XDP_TX:
668 if (static_branch_unlikely(&ice_xdp_locking_key))
669 spin_lock(&xdp_ring->tx_lock);
670 ret = __ice_xmit_xdp_ring(&xdp->base, xdp_ring, false);
671 if (static_branch_unlikely(&ice_xdp_locking_key))
672 spin_unlock(&xdp_ring->tx_lock);
673 if (ret == ICE_XDP_CONSUMED)
674 goto out_failure;
675 break;
676 case XDP_REDIRECT:
677 if (xdp_do_redirect(rx_ring->netdev, &xdp->base, xdp_prog))
678 goto out_failure;
679 ret = ICE_XDP_REDIR;
680 break;
681 default:
682 bpf_warn_invalid_xdp_action(rx_ring->netdev, xdp_prog, act);
683 fallthrough;
684 case XDP_ABORTED:
685 out_failure:
686 trace_xdp_exception(rx_ring->netdev, xdp_prog, act);
687 fallthrough;
688 case XDP_DROP:
689 libeth_xdp_return_buff(xdp);
690 ret = ICE_XDP_CONSUMED;
691 }
692
693 exit:
694 return ret;
695 }
696
697 /**
698 * ice_xmit_xdp_ring - submit frame to XDP ring for transmission
699 * @xdpf: XDP frame that will be converted to XDP buff
700 * @xdp_ring: XDP ring for transmission
701 */
ice_xmit_xdp_ring(const struct xdp_frame * xdpf,struct ice_tx_ring * xdp_ring)702 static int ice_xmit_xdp_ring(const struct xdp_frame *xdpf,
703 struct ice_tx_ring *xdp_ring)
704 {
705 struct xdp_buff xdp;
706
707 xdp.data_hard_start = (void *)xdpf;
708 xdp.data = xdpf->data;
709 xdp.data_end = xdp.data + xdpf->len;
710 xdp.frame_sz = xdpf->frame_sz;
711 xdp.flags = xdpf->flags;
712
713 return __ice_xmit_xdp_ring(&xdp, xdp_ring, true);
714 }
715
716 /**
717 * ice_xdp_xmit - submit packets to XDP ring for transmission
718 * @dev: netdev
719 * @n: number of XDP frames to be transmitted
720 * @frames: XDP frames to be transmitted
721 * @flags: transmit flags
722 *
723 * Returns number of frames successfully sent. Failed frames
724 * will be free'ed by XDP core.
725 * For error cases, a negative errno code is returned and no-frames
726 * are transmitted (caller must handle freeing frames).
727 */
728 int
ice_xdp_xmit(struct net_device * dev,int n,struct xdp_frame ** frames,u32 flags)729 ice_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
730 u32 flags)
731 {
732 struct ice_netdev_priv *np = netdev_priv(dev);
733 unsigned int queue_index = smp_processor_id();
734 struct ice_vsi *vsi = np->vsi;
735 struct ice_tx_ring *xdp_ring;
736 struct ice_tx_buf *tx_buf;
737 int nxmit = 0, i;
738
739 if (test_bit(ICE_VSI_DOWN, vsi->state))
740 return -ENETDOWN;
741
742 if (!ice_is_xdp_ena_vsi(vsi))
743 return -ENXIO;
744
745 if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
746 return -EINVAL;
747
748 if (static_branch_unlikely(&ice_xdp_locking_key)) {
749 queue_index %= vsi->num_xdp_txq;
750 xdp_ring = vsi->xdp_rings[queue_index];
751 spin_lock(&xdp_ring->tx_lock);
752 } else {
753 /* Generally, should not happen */
754 if (unlikely(queue_index >= vsi->num_xdp_txq))
755 return -ENXIO;
756 xdp_ring = vsi->xdp_rings[queue_index];
757 }
758
759 tx_buf = &xdp_ring->tx_buf[xdp_ring->next_to_use];
760 for (i = 0; i < n; i++) {
761 const struct xdp_frame *xdpf = frames[i];
762 int err;
763
764 err = ice_xmit_xdp_ring(xdpf, xdp_ring);
765 if (err != ICE_XDP_TX)
766 break;
767 nxmit++;
768 }
769
770 tx_buf->rs_idx = ice_set_rs_bit(xdp_ring);
771 if (unlikely(flags & XDP_XMIT_FLUSH))
772 ice_xdp_ring_update_tail(xdp_ring);
773
774 if (static_branch_unlikely(&ice_xdp_locking_key))
775 spin_unlock(&xdp_ring->tx_lock);
776
777 return nxmit;
778 }
779
780 /**
781 * ice_init_ctrl_rx_descs - Initialize Rx descriptors for control vsi.
782 * @rx_ring: ring to init descriptors on
783 * @count: number of descriptors to initialize
784 */
ice_init_ctrl_rx_descs(struct ice_rx_ring * rx_ring,u32 count)785 void ice_init_ctrl_rx_descs(struct ice_rx_ring *rx_ring, u32 count)
786 {
787 union ice_32b_rx_flex_desc *rx_desc;
788 u32 ntu = rx_ring->next_to_use;
789
790 if (!count)
791 return;
792
793 rx_desc = ICE_RX_DESC(rx_ring, ntu);
794
795 do {
796 rx_desc++;
797 ntu++;
798 if (unlikely(ntu == rx_ring->count)) {
799 rx_desc = ICE_RX_DESC(rx_ring, 0);
800 ntu = 0;
801 }
802
803 rx_desc->wb.status_error0 = 0;
804 count--;
805 } while (count);
806
807 if (rx_ring->next_to_use != ntu)
808 ice_release_rx_desc(rx_ring, ntu);
809 }
810
811 /**
812 * ice_alloc_rx_bufs - Replace used receive buffers
813 * @rx_ring: ring to place buffers on
814 * @cleaned_count: number of buffers to replace
815 *
816 * Returns false if all allocations were successful, true if any fail. Returning
817 * true signals to the caller that we didn't replace cleaned_count buffers and
818 * there is more work to do.
819 *
820 * First, try to clean "cleaned_count" Rx buffers. Then refill the cleaned Rx
821 * buffers. Then bump tail at most one time. Grouping like this lets us avoid
822 * multiple tail writes per call.
823 */
ice_alloc_rx_bufs(struct ice_rx_ring * rx_ring,unsigned int cleaned_count)824 bool ice_alloc_rx_bufs(struct ice_rx_ring *rx_ring, unsigned int cleaned_count)
825 {
826 const struct libeth_fq_fp hdr_fq = {
827 .pp = rx_ring->hdr_pp,
828 .fqes = rx_ring->hdr_fqes,
829 .truesize = rx_ring->hdr_truesize,
830 .count = rx_ring->count,
831 };
832 const struct libeth_fq_fp fq = {
833 .pp = rx_ring->pp,
834 .fqes = rx_ring->rx_fqes,
835 .truesize = rx_ring->truesize,
836 .count = rx_ring->count,
837 };
838 union ice_32b_rx_flex_desc *rx_desc;
839 u16 ntu = rx_ring->next_to_use;
840
841 /* do nothing if no valid netdev defined */
842 if (!rx_ring->netdev || !cleaned_count)
843 return false;
844
845 /* get the Rx descriptor and buffer based on next_to_use */
846 rx_desc = ICE_RX_DESC(rx_ring, ntu);
847
848 do {
849 dma_addr_t addr;
850
851 addr = libeth_rx_alloc(&fq, ntu);
852 if (addr == DMA_MAPPING_ERROR) {
853 ice_stats_inc(rx_ring->ring_stats, rx_page_failed);
854 break;
855 }
856
857 /* Refresh the desc even if buffer_addrs didn't change
858 * because each write-back erases this info.
859 */
860 rx_desc->read.pkt_addr = cpu_to_le64(addr);
861
862 if (!hdr_fq.pp)
863 goto next;
864
865 addr = libeth_rx_alloc(&hdr_fq, ntu);
866 if (addr == DMA_MAPPING_ERROR) {
867 ice_stats_inc(rx_ring->ring_stats, rx_page_failed);
868
869 libeth_rx_recycle_slow(fq.fqes[ntu].netmem);
870 break;
871 }
872
873 rx_desc->read.hdr_addr = cpu_to_le64(addr);
874
875 next:
876 rx_desc++;
877 ntu++;
878 if (unlikely(ntu == rx_ring->count)) {
879 rx_desc = ICE_RX_DESC(rx_ring, 0);
880 ntu = 0;
881 }
882
883 /* clear the status bits for the next_to_use descriptor */
884 rx_desc->wb.status_error0 = 0;
885
886 cleaned_count--;
887 } while (cleaned_count);
888
889 if (rx_ring->next_to_use != ntu)
890 ice_release_rx_desc(rx_ring, ntu);
891
892 return !!cleaned_count;
893 }
894
895 /**
896 * ice_clean_ctrl_rx_irq - Clean descriptors from flow director Rx ring
897 * @rx_ring: Rx descriptor ring for ctrl_vsi to transact packets on
898 *
899 * This function cleans Rx descriptors from the ctrl_vsi Rx ring used
900 * to set flow director rules on VFs.
901 */
ice_clean_ctrl_rx_irq(struct ice_rx_ring * rx_ring)902 void ice_clean_ctrl_rx_irq(struct ice_rx_ring *rx_ring)
903 {
904 u32 ntc = rx_ring->next_to_clean;
905 unsigned int total_rx_pkts = 0;
906 u32 cnt = rx_ring->count;
907
908 while (likely(total_rx_pkts < ICE_DFLT_IRQ_WORK)) {
909 struct ice_vsi *ctrl_vsi = rx_ring->vsi;
910 union ice_32b_rx_flex_desc *rx_desc;
911 u16 stat_err_bits;
912
913 rx_desc = ICE_RX_DESC(rx_ring, ntc);
914
915 stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_DD_S);
916 if (!ice_test_staterr(rx_desc->wb.status_error0, stat_err_bits))
917 break;
918
919 dma_rmb();
920
921 if (ctrl_vsi->vf)
922 ice_vc_fdir_irq_handler(ctrl_vsi, rx_desc);
923
924 if (++ntc == cnt)
925 ntc = 0;
926 total_rx_pkts++;
927 }
928
929 rx_ring->next_to_clean = ntc;
930 ice_init_ctrl_rx_descs(rx_ring, ICE_DESC_UNUSED(rx_ring));
931 }
932
933 /**
934 * ice_clean_rx_irq - Clean completed descriptors from Rx ring - bounce buf
935 * @rx_ring: Rx descriptor ring to transact packets on
936 * @budget: Total limit on number of packets to process
937 *
938 * This function provides a "bounce buffer" approach to Rx interrupt
939 * processing. The advantage to this is that on systems that have
940 * expensive overhead for IOMMU access this provides a means of avoiding
941 * it by maintaining the mapping of the page to the system.
942 *
943 * Returns amount of work completed
944 */
ice_clean_rx_irq(struct ice_rx_ring * rx_ring,int budget)945 static int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget)
946 {
947 unsigned int total_rx_bytes = 0, total_rx_pkts = 0;
948 struct ice_tx_ring *xdp_ring = NULL;
949 struct bpf_prog *xdp_prog = NULL;
950 u32 ntc = rx_ring->next_to_clean;
951 LIBETH_XDP_ONSTACK_BUFF(xdp);
952 u32 cached_ntu, xdp_verdict;
953 u32 cnt = rx_ring->count;
954 u32 xdp_xmit = 0;
955 bool failure;
956
957 libeth_xdp_init_buff(xdp, &rx_ring->xdp, &rx_ring->xdp_rxq);
958
959 xdp_prog = READ_ONCE(rx_ring->xdp_prog);
960 if (xdp_prog) {
961 xdp_ring = rx_ring->xdp_ring;
962 cached_ntu = xdp_ring->next_to_use;
963 }
964
965 /* start the loop to process Rx packets bounded by 'budget' */
966 while (likely(total_rx_pkts < (unsigned int)budget)) {
967 union ice_32b_rx_flex_desc *rx_desc;
968 struct libeth_fqe *rx_buf;
969 struct sk_buff *skb;
970 unsigned int size;
971 u16 stat_err_bits;
972 u16 vlan_tci;
973 bool rxe;
974
975 /* get the Rx desc from Rx ring based on 'next_to_clean' */
976 rx_desc = ICE_RX_DESC(rx_ring, ntc);
977
978 /*
979 * The DD bit will always be zero for unused descriptors
980 * because it's cleared in cleanup or when setting the DMA
981 * address of the header buffer, which never uses the DD bit.
982 * If the hardware wrote the descriptor, it will be non-zero.
983 */
984 stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_DD_S);
985 if (!ice_test_staterr(rx_desc->wb.status_error0, stat_err_bits))
986 break;
987
988 /* This memory barrier is needed to keep us from reading
989 * any other fields out of the rx_desc until we know the
990 * DD bit is set.
991 */
992 dma_rmb();
993
994 ice_trace(clean_rx_irq, rx_ring, rx_desc);
995
996 stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_HBO_S) |
997 BIT(ICE_RX_FLEX_DESC_STATUS0_RXE_S);
998 rxe = ice_test_staterr(rx_desc->wb.status_error0,
999 stat_err_bits);
1000
1001 if (!rx_ring->hdr_pp)
1002 goto payload;
1003
1004 size = le16_get_bits(rx_desc->wb.hdr_len_sph_flex_flags1,
1005 ICE_RX_FLEX_DESC_HDR_LEN_M);
1006 if (unlikely(rxe))
1007 size = 0;
1008
1009 rx_buf = &rx_ring->hdr_fqes[ntc];
1010 libeth_xdp_process_buff(xdp, rx_buf, size);
1011 rx_buf->netmem = 0;
1012
1013 payload:
1014 size = le16_to_cpu(rx_desc->wb.pkt_len) &
1015 ICE_RX_FLX_DESC_PKT_LEN_M;
1016 if (unlikely(rxe))
1017 size = 0;
1018
1019 /* retrieve a buffer from the ring */
1020 rx_buf = &rx_ring->rx_fqes[ntc];
1021 libeth_xdp_process_buff(xdp, rx_buf, size);
1022
1023 if (++ntc == cnt)
1024 ntc = 0;
1025
1026 /* skip if it is NOP desc */
1027 if (ice_is_non_eop(rx_ring, rx_desc) || unlikely(!xdp->data))
1028 continue;
1029
1030 xdp_verdict = ice_run_xdp(rx_ring, xdp, xdp_prog, xdp_ring, rx_desc);
1031 if (xdp_verdict == ICE_XDP_PASS)
1032 goto construct_skb;
1033
1034 if (xdp_verdict & (ICE_XDP_TX | ICE_XDP_REDIR))
1035 xdp_xmit |= xdp_verdict;
1036 total_rx_bytes += xdp_get_buff_len(&xdp->base);
1037 total_rx_pkts++;
1038
1039 xdp->data = NULL;
1040 continue;
1041
1042 construct_skb:
1043 skb = xdp_build_skb_from_buff(&xdp->base);
1044 xdp->data = NULL;
1045
1046 /* exit if we failed to retrieve a buffer */
1047 if (!skb) {
1048 libeth_xdp_return_buff_slow(xdp);
1049 ice_stats_inc(rx_ring->ring_stats, rx_buf_failed);
1050 continue;
1051 }
1052
1053 vlan_tci = ice_get_vlan_tci(rx_desc);
1054
1055 /* probably a little skewed due to removing CRC */
1056 total_rx_bytes += skb->len;
1057
1058 /* populate checksum, VLAN, and protocol */
1059 ice_process_skb_fields(rx_ring, rx_desc, skb);
1060
1061 ice_trace(clean_rx_irq_indicate, rx_ring, rx_desc, skb);
1062 /* send completed skb up the stack */
1063 ice_receive_skb(rx_ring, skb, vlan_tci);
1064
1065 /* update budget accounting */
1066 total_rx_pkts++;
1067 }
1068
1069 rx_ring->next_to_clean = ntc;
1070 /* return up to cleaned_count buffers to hardware */
1071 failure = ice_alloc_rx_bufs(rx_ring, ICE_DESC_UNUSED(rx_ring));
1072
1073 if (xdp_xmit)
1074 ice_finalize_xdp_rx(xdp_ring, xdp_xmit, cached_ntu);
1075
1076 libeth_xdp_save_buff(&rx_ring->xdp, xdp);
1077
1078 if (rx_ring->ring_stats)
1079 ice_update_rx_ring_stats(rx_ring, total_rx_pkts,
1080 total_rx_bytes);
1081
1082 /* guarantee a trip back through this routine if there was a failure */
1083 return failure ? budget : (int)total_rx_pkts;
1084 }
1085
__ice_update_sample(struct ice_q_vector * q_vector,struct ice_ring_container * rc,struct dim_sample * sample,bool is_tx)1086 static void __ice_update_sample(struct ice_q_vector *q_vector,
1087 struct ice_ring_container *rc,
1088 struct dim_sample *sample,
1089 bool is_tx)
1090 {
1091 u64 total_packets = 0, total_bytes = 0, pkts, bytes;
1092
1093 if (is_tx) {
1094 struct ice_tx_ring *tx_ring;
1095
1096 ice_for_each_tx_ring(tx_ring, *rc) {
1097 if (!tx_ring->ring_stats)
1098 continue;
1099
1100 ice_fetch_tx_ring_stats(tx_ring, &pkts, &bytes);
1101
1102 total_packets += pkts;
1103 total_bytes += bytes;
1104 }
1105 } else {
1106 struct ice_rx_ring *rx_ring;
1107
1108 ice_for_each_rx_ring(rx_ring, *rc) {
1109 if (!rx_ring->ring_stats)
1110 continue;
1111
1112 ice_fetch_rx_ring_stats(rx_ring, &pkts, &bytes);
1113
1114 total_packets += pkts;
1115 total_bytes += bytes;
1116 }
1117 }
1118
1119 dim_update_sample(q_vector->total_events,
1120 total_packets, total_bytes, sample);
1121 sample->comp_ctr = 0;
1122
1123 /* if dim settings get stale, like when not updated for 1
1124 * second or longer, force it to start again. This addresses the
1125 * frequent case of an idle queue being switched to by the
1126 * scheduler. The 1,000 here means 1,000 milliseconds.
1127 */
1128 if (ktime_ms_delta(sample->time, rc->dim.start_sample.time) >= 1000)
1129 rc->dim.state = DIM_START_MEASURE;
1130 }
1131
1132 /**
1133 * ice_net_dim - Update net DIM algorithm
1134 * @q_vector: the vector associated with the interrupt
1135 *
1136 * Create a DIM sample and notify net_dim() so that it can possibly decide
1137 * a new ITR value based on incoming packets, bytes, and interrupts.
1138 *
1139 * This function is a no-op if the ring is not configured to dynamic ITR.
1140 */
ice_net_dim(struct ice_q_vector * q_vector)1141 static void ice_net_dim(struct ice_q_vector *q_vector)
1142 {
1143 struct ice_ring_container *tx = &q_vector->tx;
1144 struct ice_ring_container *rx = &q_vector->rx;
1145
1146 if (ITR_IS_DYNAMIC(tx)) {
1147 struct dim_sample dim_sample;
1148
1149 __ice_update_sample(q_vector, tx, &dim_sample, true);
1150 net_dim(&tx->dim, &dim_sample);
1151 }
1152
1153 if (ITR_IS_DYNAMIC(rx)) {
1154 struct dim_sample dim_sample;
1155
1156 __ice_update_sample(q_vector, rx, &dim_sample, false);
1157 net_dim(&rx->dim, &dim_sample);
1158 }
1159 }
1160
1161 /**
1162 * ice_buildreg_itr - build value for writing to the GLINT_DYN_CTL register
1163 * @itr_idx: interrupt throttling index
1164 * @itr: interrupt throttling value in usecs
1165 */
ice_buildreg_itr(u16 itr_idx,u16 itr)1166 static u32 ice_buildreg_itr(u16 itr_idx, u16 itr)
1167 {
1168 /* The ITR value is reported in microseconds, and the register value is
1169 * recorded in 2 microsecond units. For this reason we only need to
1170 * shift by the GLINT_DYN_CTL_INTERVAL_S - ICE_ITR_GRAN_S to apply this
1171 * granularity as a shift instead of division. The mask makes sure the
1172 * ITR value is never odd so we don't accidentally write into the field
1173 * prior to the ITR field.
1174 */
1175 itr &= ICE_ITR_MASK;
1176
1177 return GLINT_DYN_CTL_INTENA_M | GLINT_DYN_CTL_CLEARPBA_M |
1178 (itr_idx << GLINT_DYN_CTL_ITR_INDX_S) |
1179 (itr << (GLINT_DYN_CTL_INTERVAL_S - ICE_ITR_GRAN_S));
1180 }
1181
1182 /**
1183 * ice_enable_interrupt - re-enable MSI-X interrupt
1184 * @q_vector: the vector associated with the interrupt to enable
1185 *
1186 * If the VSI is down, the interrupt will not be re-enabled. Also,
1187 * when enabling the interrupt always reset the wb_on_itr to false
1188 * and trigger a software interrupt to clean out internal state.
1189 */
ice_enable_interrupt(struct ice_q_vector * q_vector)1190 static void ice_enable_interrupt(struct ice_q_vector *q_vector)
1191 {
1192 struct ice_vsi *vsi = q_vector->vsi;
1193 bool wb_en = q_vector->wb_on_itr;
1194 u32 itr_val;
1195
1196 if (test_bit(ICE_DOWN, vsi->state))
1197 return;
1198
1199 /* trigger an ITR delayed software interrupt when exiting busy poll, to
1200 * make sure to catch any pending cleanups that might have been missed
1201 * due to interrupt state transition. If busy poll or poll isn't
1202 * enabled, then don't update ITR, and just enable the interrupt.
1203 */
1204 if (!wb_en) {
1205 itr_val = ice_buildreg_itr(ICE_ITR_NONE, 0);
1206 } else {
1207 q_vector->wb_on_itr = false;
1208
1209 /* do two things here with a single write. Set up the third ITR
1210 * index to be used for software interrupt moderation, and then
1211 * trigger a software interrupt with a rate limit of 20K on
1212 * software interrupts, this will help avoid high interrupt
1213 * loads due to frequently polling and exiting polling.
1214 */
1215 itr_val = ice_buildreg_itr(ICE_IDX_ITR2, ICE_ITR_20K);
1216 itr_val |= GLINT_DYN_CTL_SWINT_TRIG_M |
1217 ICE_IDX_ITR2 << GLINT_DYN_CTL_SW_ITR_INDX_S |
1218 GLINT_DYN_CTL_SW_ITR_INDX_ENA_M;
1219 }
1220 wr32(&vsi->back->hw, GLINT_DYN_CTL(q_vector->reg_idx), itr_val);
1221 }
1222
1223 /**
1224 * ice_set_wb_on_itr - set WB_ON_ITR for this q_vector
1225 * @q_vector: q_vector to set WB_ON_ITR on
1226 *
1227 * We need to tell hardware to write-back completed descriptors even when
1228 * interrupts are disabled. Descriptors will be written back on cache line
1229 * boundaries without WB_ON_ITR enabled, but if we don't enable WB_ON_ITR
1230 * descriptors may not be written back if they don't fill a cache line until
1231 * the next interrupt.
1232 *
1233 * This sets the write-back frequency to whatever was set previously for the
1234 * ITR indices. Also, set the INTENA_MSK bit to make sure hardware knows we
1235 * aren't meddling with the INTENA_M bit.
1236 */
ice_set_wb_on_itr(struct ice_q_vector * q_vector)1237 static void ice_set_wb_on_itr(struct ice_q_vector *q_vector)
1238 {
1239 struct ice_vsi *vsi = q_vector->vsi;
1240
1241 /* already in wb_on_itr mode no need to change it */
1242 if (q_vector->wb_on_itr)
1243 return;
1244
1245 /* use previously set ITR values for all of the ITR indices by
1246 * specifying ICE_ITR_NONE, which will vary in adaptive (AIM) mode and
1247 * be static in non-adaptive mode (user configured)
1248 */
1249 wr32(&vsi->back->hw, GLINT_DYN_CTL(q_vector->reg_idx),
1250 FIELD_PREP(GLINT_DYN_CTL_ITR_INDX_M, ICE_ITR_NONE) |
1251 FIELD_PREP(GLINT_DYN_CTL_INTENA_MSK_M, 1) |
1252 FIELD_PREP(GLINT_DYN_CTL_WB_ON_ITR_M, 1));
1253
1254 q_vector->wb_on_itr = true;
1255 }
1256
1257 /**
1258 * ice_napi_poll - NAPI polling Rx/Tx cleanup routine
1259 * @napi: napi struct with our devices info in it
1260 * @budget: amount of work driver is allowed to do this pass, in packets
1261 *
1262 * This function will clean all queues associated with a q_vector.
1263 *
1264 * Returns the amount of work done
1265 */
ice_napi_poll(struct napi_struct * napi,int budget)1266 int ice_napi_poll(struct napi_struct *napi, int budget)
1267 {
1268 struct ice_q_vector *q_vector =
1269 container_of(napi, struct ice_q_vector, napi);
1270 struct ice_tx_ring *tx_ring;
1271 struct ice_rx_ring *rx_ring;
1272 bool clean_complete = true;
1273 int budget_per_ring;
1274 int work_done = 0;
1275
1276 /* Since the actual Tx work is minimal, we can give the Tx a larger
1277 * budget and be more aggressive about cleaning up the Tx descriptors.
1278 */
1279 ice_for_each_tx_ring(tx_ring, q_vector->tx) {
1280 struct xsk_buff_pool *xsk_pool = READ_ONCE(tx_ring->xsk_pool);
1281 bool wd;
1282
1283 if (xsk_pool)
1284 wd = ice_xmit_zc(tx_ring, xsk_pool);
1285 else if (ice_ring_is_xdp(tx_ring))
1286 wd = true;
1287 else
1288 wd = ice_clean_tx_irq(tx_ring, budget);
1289
1290 if (!wd)
1291 clean_complete = false;
1292 }
1293
1294 /* Handle case where we are called by netpoll with a budget of 0 */
1295 if (unlikely(budget <= 0))
1296 return budget;
1297
1298 /* normally we have 1 Rx ring per q_vector */
1299 if (unlikely(q_vector->num_ring_rx > 1))
1300 /* We attempt to distribute budget to each Rx queue fairly, but
1301 * don't allow the budget to go below 1 because that would exit
1302 * polling early.
1303 */
1304 budget_per_ring = max_t(int, budget / q_vector->num_ring_rx, 1);
1305 else
1306 /* Max of 1 Rx ring in this q_vector so give it the budget */
1307 budget_per_ring = budget;
1308
1309 ice_for_each_rx_ring(rx_ring, q_vector->rx) {
1310 struct xsk_buff_pool *xsk_pool = READ_ONCE(rx_ring->xsk_pool);
1311 int cleaned;
1312
1313 /* A dedicated path for zero-copy allows making a single
1314 * comparison in the irq context instead of many inside the
1315 * ice_clean_rx_irq function and makes the codebase cleaner.
1316 */
1317 cleaned = rx_ring->xsk_pool ?
1318 ice_clean_rx_irq_zc(rx_ring, xsk_pool, budget_per_ring) :
1319 ice_clean_rx_irq(rx_ring, budget_per_ring);
1320 work_done += cleaned;
1321 /* if we clean as many as budgeted, we must not be done */
1322 if (cleaned >= budget_per_ring)
1323 clean_complete = false;
1324 }
1325
1326 /* If work not completed, return budget and polling will return */
1327 if (!clean_complete) {
1328 /* Set the writeback on ITR so partial completions of
1329 * cache-lines will still continue even if we're polling.
1330 */
1331 ice_set_wb_on_itr(q_vector);
1332 return budget;
1333 }
1334
1335 /* Exit the polling mode, but don't re-enable interrupts if stack might
1336 * poll us due to busy-polling
1337 */
1338 if (napi_complete_done(napi, work_done)) {
1339 ice_net_dim(q_vector);
1340 ice_enable_interrupt(q_vector);
1341 } else {
1342 ice_set_wb_on_itr(q_vector);
1343 }
1344
1345 return min_t(int, work_done, budget - 1);
1346 }
1347
1348 /**
1349 * __ice_maybe_stop_tx - 2nd level check for Tx stop conditions
1350 * @tx_ring: the ring to be checked
1351 * @size: the size buffer we want to assure is available
1352 *
1353 * Returns -EBUSY if a stop is needed, else 0
1354 */
__ice_maybe_stop_tx(struct ice_tx_ring * tx_ring,unsigned int size)1355 static int __ice_maybe_stop_tx(struct ice_tx_ring *tx_ring, unsigned int size)
1356 {
1357 netif_tx_stop_queue(txring_txq(tx_ring));
1358 /* Memory barrier before checking head and tail */
1359 smp_mb();
1360
1361 /* Check again in a case another CPU has just made room available. */
1362 if (likely(ICE_DESC_UNUSED(tx_ring) < size))
1363 return -EBUSY;
1364
1365 /* A reprieve! - use start_queue because it doesn't call schedule */
1366 netif_tx_start_queue(txring_txq(tx_ring));
1367 ice_stats_inc(tx_ring->ring_stats, tx_restart_q);
1368 return 0;
1369 }
1370
1371 /**
1372 * ice_maybe_stop_tx - 1st level check for Tx stop conditions
1373 * @tx_ring: the ring to be checked
1374 * @size: the size buffer we want to assure is available
1375 *
1376 * Returns 0 if stop is not needed
1377 */
ice_maybe_stop_tx(struct ice_tx_ring * tx_ring,unsigned int size)1378 static int ice_maybe_stop_tx(struct ice_tx_ring *tx_ring, unsigned int size)
1379 {
1380 if (likely(ICE_DESC_UNUSED(tx_ring) >= size))
1381 return 0;
1382
1383 return __ice_maybe_stop_tx(tx_ring, size);
1384 }
1385
1386 /**
1387 * ice_tx_map - Build the Tx descriptor
1388 * @tx_ring: ring to send buffer on
1389 * @first: first buffer info buffer to use
1390 * @off: pointer to struct that holds offload parameters
1391 *
1392 * This function loops over the skb data pointed to by *first
1393 * and gets a physical address for each memory location and programs
1394 * it and the length into the transmit descriptor.
1395 */
1396 static void
ice_tx_map(struct ice_tx_ring * tx_ring,struct ice_tx_buf * first,struct ice_tx_offload_params * off)1397 ice_tx_map(struct ice_tx_ring *tx_ring, struct ice_tx_buf *first,
1398 struct ice_tx_offload_params *off)
1399 {
1400 u64 td_offset, td_tag, td_cmd;
1401 u16 i = tx_ring->next_to_use;
1402 unsigned int data_len, size;
1403 struct ice_tx_desc *tx_desc;
1404 struct ice_tx_buf *tx_buf;
1405 struct sk_buff *skb;
1406 skb_frag_t *frag;
1407 dma_addr_t dma;
1408 bool kick;
1409
1410 td_tag = off->td_l2tag1;
1411 td_cmd = off->td_cmd;
1412 td_offset = off->td_offset;
1413 skb = first->skb;
1414
1415 data_len = skb->data_len;
1416 size = skb_headlen(skb);
1417
1418 tx_desc = ICE_TX_DESC(tx_ring, i);
1419
1420 if (first->tx_flags & ICE_TX_FLAGS_HW_VLAN) {
1421 td_cmd |= (u64)ICE_TX_DESC_CMD_IL2TAG1;
1422 td_tag = first->vid;
1423 }
1424
1425 dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
1426
1427 tx_buf = first;
1428
1429 for (frag = &skb_shinfo(skb)->frags[0];; frag++) {
1430 unsigned int max_data = ICE_MAX_DATA_PER_TXD_ALIGNED;
1431
1432 if (dma_mapping_error(tx_ring->dev, dma))
1433 goto dma_error;
1434
1435 /* record length, and DMA address */
1436 dma_unmap_len_set(tx_buf, len, size);
1437 dma_unmap_addr_set(tx_buf, dma, dma);
1438
1439 /* align size to end of page */
1440 max_data += -dma & (ICE_MAX_READ_REQ_SIZE - 1);
1441 tx_desc->buf_addr = cpu_to_le64(dma);
1442
1443 /* account for data chunks larger than the hardware
1444 * can handle
1445 */
1446 while (unlikely(size > ICE_MAX_DATA_PER_TXD)) {
1447 tx_desc->cmd_type_offset_bsz =
1448 ice_build_ctob(td_cmd, td_offset, max_data,
1449 td_tag);
1450
1451 tx_desc++;
1452 i++;
1453
1454 if (i == tx_ring->count) {
1455 tx_desc = ICE_TX_DESC(tx_ring, 0);
1456 i = 0;
1457 }
1458
1459 dma += max_data;
1460 size -= max_data;
1461
1462 max_data = ICE_MAX_DATA_PER_TXD_ALIGNED;
1463 tx_desc->buf_addr = cpu_to_le64(dma);
1464 }
1465
1466 if (likely(!data_len))
1467 break;
1468
1469 tx_desc->cmd_type_offset_bsz = ice_build_ctob(td_cmd, td_offset,
1470 size, td_tag);
1471
1472 tx_desc++;
1473 i++;
1474
1475 if (i == tx_ring->count) {
1476 tx_desc = ICE_TX_DESC(tx_ring, 0);
1477 i = 0;
1478 }
1479
1480 size = skb_frag_size(frag);
1481 data_len -= size;
1482
1483 dma = skb_frag_dma_map(tx_ring->dev, frag, 0, size,
1484 DMA_TO_DEVICE);
1485
1486 tx_buf = &tx_ring->tx_buf[i];
1487 tx_buf->type = ICE_TX_BUF_FRAG;
1488 }
1489
1490 /* record SW timestamp if HW timestamp is not available */
1491 skb_tx_timestamp(first->skb);
1492
1493 i++;
1494 if (i == tx_ring->count)
1495 i = 0;
1496
1497 /* write last descriptor with RS and EOP bits */
1498 td_cmd |= (u64)ICE_TXD_LAST_DESC_CMD;
1499 tx_desc->cmd_type_offset_bsz =
1500 ice_build_ctob(td_cmd, td_offset, size, td_tag);
1501
1502 /* Force memory writes to complete before letting h/w know there
1503 * are new descriptors to fetch.
1504 *
1505 * We also use this memory barrier to make certain all of the
1506 * status bits have been updated before next_to_watch is written.
1507 */
1508 wmb();
1509
1510 /* set next_to_watch value indicating a packet is present */
1511 first->next_to_watch = tx_desc;
1512
1513 tx_ring->next_to_use = i;
1514
1515 ice_maybe_stop_tx(tx_ring, DESC_NEEDED);
1516
1517 /* notify HW of packet */
1518 kick = __netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount,
1519 netdev_xmit_more());
1520 if (!kick)
1521 return;
1522
1523 if (ice_is_txtime_cfg(tx_ring)) {
1524 struct ice_tstamp_ring *tstamp_ring = tx_ring->tstamp_ring;
1525 u32 tstamp_count = tstamp_ring->count;
1526 u32 j = tstamp_ring->next_to_use;
1527 struct ice_ts_desc *ts_desc;
1528 struct timespec64 ts;
1529 u32 tstamp;
1530
1531 ts = ktime_to_timespec64(first->skb->tstamp);
1532 tstamp = ts.tv_nsec >> ICE_TXTIME_CTX_RESOLUTION_128NS;
1533
1534 ts_desc = ICE_TS_DESC(tstamp_ring, j);
1535 ts_desc->tx_desc_idx_tstamp = ice_build_tstamp_desc(i, tstamp);
1536
1537 j++;
1538 if (j == tstamp_count) {
1539 u32 fetch = tstamp_count - tx_ring->count;
1540
1541 j = 0;
1542
1543 /* To prevent an MDD, when wrapping the tstamp ring
1544 * create additional TS descriptors equal to the number
1545 * of the fetch TS descriptors value. HW will merge the
1546 * TS descriptors with the same timestamp value into a
1547 * single descriptor.
1548 */
1549 for (; j < fetch; j++) {
1550 ts_desc = ICE_TS_DESC(tstamp_ring, j);
1551 ts_desc->tx_desc_idx_tstamp =
1552 ice_build_tstamp_desc(i, tstamp);
1553 }
1554 }
1555 tstamp_ring->next_to_use = j;
1556 writel_relaxed(j, tstamp_ring->tail);
1557 } else {
1558 writel_relaxed(i, tx_ring->tail);
1559 }
1560 return;
1561
1562 dma_error:
1563 /* clear DMA mappings for failed tx_buf map */
1564 for (;;) {
1565 tx_buf = &tx_ring->tx_buf[i];
1566 ice_unmap_and_free_tx_buf(tx_ring, tx_buf);
1567 if (tx_buf == first)
1568 break;
1569 if (i == 0)
1570 i = tx_ring->count;
1571 i--;
1572 }
1573
1574 tx_ring->next_to_use = i;
1575 }
1576
1577 /**
1578 * ice_tx_csum - Enable Tx checksum offloads
1579 * @first: pointer to the first descriptor
1580 * @off: pointer to struct that holds offload parameters
1581 *
1582 * Returns 0 or error (negative) if checksum offload can't happen, 1 otherwise.
1583 */
1584 static
ice_tx_csum(struct ice_tx_buf * first,struct ice_tx_offload_params * off)1585 int ice_tx_csum(struct ice_tx_buf *first, struct ice_tx_offload_params *off)
1586 {
1587 const struct ice_tx_ring *tx_ring = off->tx_ring;
1588 u32 l4_len = 0, l3_len = 0, l2_len = 0;
1589 struct sk_buff *skb = first->skb;
1590 union {
1591 struct iphdr *v4;
1592 struct ipv6hdr *v6;
1593 unsigned char *hdr;
1594 } ip;
1595 union {
1596 struct tcphdr *tcp;
1597 unsigned char *hdr;
1598 } l4;
1599 __be16 frag_off, protocol;
1600 unsigned char *exthdr;
1601 u32 offset, cmd = 0;
1602 u8 l4_proto = 0;
1603
1604 if (skb->ip_summed != CHECKSUM_PARTIAL)
1605 return 0;
1606
1607 protocol = vlan_get_protocol(skb);
1608
1609 if (eth_p_mpls(protocol)) {
1610 ip.hdr = skb_inner_network_header(skb);
1611 l4.hdr = skb_checksum_start(skb);
1612 } else {
1613 ip.hdr = skb_network_header(skb);
1614 l4.hdr = skb_transport_header(skb);
1615 }
1616
1617 /* compute outer L2 header size */
1618 l2_len = ip.hdr - skb->data;
1619 offset = (l2_len / 2) << ICE_TX_DESC_LEN_MACLEN_S;
1620
1621 /* set the tx_flags to indicate the IP protocol type. this is
1622 * required so that checksum header computation below is accurate.
1623 */
1624 if (ip.v4->version == 4)
1625 first->tx_flags |= ICE_TX_FLAGS_IPV4;
1626 else if (ip.v6->version == 6)
1627 first->tx_flags |= ICE_TX_FLAGS_IPV6;
1628
1629 if (skb->encapsulation) {
1630 bool gso_ena = false;
1631 u32 tunnel = 0;
1632
1633 /* define outer network header type */
1634 if (first->tx_flags & ICE_TX_FLAGS_IPV4) {
1635 tunnel |= (first->tx_flags & ICE_TX_FLAGS_TSO) ?
1636 ICE_TX_CTX_EIPT_IPV4 :
1637 ICE_TX_CTX_EIPT_IPV4_NO_CSUM;
1638 l4_proto = ip.v4->protocol;
1639 } else if (first->tx_flags & ICE_TX_FLAGS_IPV6) {
1640 int ret;
1641
1642 tunnel |= ICE_TX_CTX_EIPT_IPV6;
1643 exthdr = ip.hdr + sizeof(*ip.v6);
1644 l4_proto = ip.v6->nexthdr;
1645 ret = ipv6_skip_exthdr(skb, exthdr - skb->data,
1646 &l4_proto, &frag_off);
1647 if (ret < 0)
1648 return -1;
1649 }
1650
1651 /* define outer transport */
1652 switch (l4_proto) {
1653 case IPPROTO_UDP:
1654 tunnel |= ICE_TXD_CTX_UDP_TUNNELING;
1655 first->tx_flags |= ICE_TX_FLAGS_TUNNEL;
1656 break;
1657 case IPPROTO_GRE:
1658 tunnel |= ICE_TXD_CTX_GRE_TUNNELING;
1659 first->tx_flags |= ICE_TX_FLAGS_TUNNEL;
1660 break;
1661 case IPPROTO_IPIP:
1662 case IPPROTO_IPV6:
1663 first->tx_flags |= ICE_TX_FLAGS_TUNNEL;
1664 l4.hdr = skb_inner_network_header(skb);
1665 break;
1666 default:
1667 if (first->tx_flags & ICE_TX_FLAGS_TSO)
1668 return -1;
1669
1670 skb_checksum_help(skb);
1671 return 0;
1672 }
1673
1674 /* compute outer L3 header size */
1675 tunnel |= ((l4.hdr - ip.hdr) / 4) <<
1676 ICE_TXD_CTX_QW0_EIPLEN_S;
1677
1678 /* switch IP header pointer from outer to inner header */
1679 ip.hdr = skb_inner_network_header(skb);
1680
1681 /* compute tunnel header size */
1682 tunnel |= ((ip.hdr - l4.hdr) / 2) <<
1683 ICE_TXD_CTX_QW0_NATLEN_S;
1684
1685 gso_ena = skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL;
1686 /* indicate if we need to offload outer UDP header */
1687 if ((first->tx_flags & ICE_TX_FLAGS_TSO) && !gso_ena &&
1688 (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM))
1689 tunnel |= ICE_TXD_CTX_QW0_L4T_CS_M;
1690
1691 /* record tunnel offload values */
1692 off->cd_tunnel_params |= tunnel;
1693
1694 /* set DTYP=1 to indicate that it's an Tx context descriptor
1695 * in IPsec tunnel mode with Tx offloads in Quad word 1
1696 */
1697 off->cd_qw1 |= (u64)ICE_TX_DESC_DTYPE_CTX;
1698
1699 /* switch L4 header pointer from outer to inner */
1700 l4.hdr = skb_inner_transport_header(skb);
1701 l4_proto = 0;
1702
1703 /* reset type as we transition from outer to inner headers */
1704 first->tx_flags &= ~(ICE_TX_FLAGS_IPV4 | ICE_TX_FLAGS_IPV6);
1705 if (ip.v4->version == 4)
1706 first->tx_flags |= ICE_TX_FLAGS_IPV4;
1707 if (ip.v6->version == 6)
1708 first->tx_flags |= ICE_TX_FLAGS_IPV6;
1709 }
1710
1711 /* Enable IP checksum offloads */
1712 if (first->tx_flags & ICE_TX_FLAGS_IPV4) {
1713 l4_proto = ip.v4->protocol;
1714 /* the stack computes the IP header already, the only time we
1715 * need the hardware to recompute it is in the case of TSO.
1716 */
1717 if (first->tx_flags & ICE_TX_FLAGS_TSO)
1718 cmd |= ICE_TX_DESC_CMD_IIPT_IPV4_CSUM;
1719 else
1720 cmd |= ICE_TX_DESC_CMD_IIPT_IPV4;
1721
1722 } else if (first->tx_flags & ICE_TX_FLAGS_IPV6) {
1723 cmd |= ICE_TX_DESC_CMD_IIPT_IPV6;
1724 exthdr = ip.hdr + sizeof(*ip.v6);
1725 l4_proto = ip.v6->nexthdr;
1726 if (l4.hdr != exthdr)
1727 ipv6_skip_exthdr(skb, exthdr - skb->data, &l4_proto,
1728 &frag_off);
1729 } else {
1730 return -1;
1731 }
1732
1733 /* compute inner L3 header size */
1734 l3_len = l4.hdr - ip.hdr;
1735 offset |= (l3_len / 4) << ICE_TX_DESC_LEN_IPLEN_S;
1736
1737 if ((tx_ring->netdev->features & NETIF_F_HW_CSUM) &&
1738 !(first->tx_flags & ICE_TX_FLAGS_TSO) &&
1739 !skb_csum_is_sctp(skb)) {
1740 /* Set GCS */
1741 u16 csum_start = (skb->csum_start - skb->mac_header) / 2;
1742 u16 csum_offset = skb->csum_offset / 2;
1743 u16 gcs_params;
1744
1745 gcs_params = FIELD_PREP(ICE_TX_GCS_DESC_START_M, csum_start) |
1746 FIELD_PREP(ICE_TX_GCS_DESC_OFFSET_M, csum_offset) |
1747 FIELD_PREP(ICE_TX_GCS_DESC_TYPE_M,
1748 ICE_TX_GCS_DESC_CSUM_PSH);
1749
1750 /* Unlike legacy HW checksums, GCS requires a context
1751 * descriptor.
1752 */
1753 off->cd_qw1 |= ICE_TX_DESC_DTYPE_CTX;
1754 off->cd_gcs_params = gcs_params;
1755 /* Fill out CSO info in data descriptors */
1756 off->td_offset |= offset;
1757 off->td_cmd |= cmd;
1758 return 1;
1759 }
1760
1761 /* Enable L4 checksum offloads */
1762 switch (l4_proto) {
1763 case IPPROTO_TCP:
1764 /* enable checksum offloads */
1765 cmd |= ICE_TX_DESC_CMD_L4T_EOFT_TCP;
1766 l4_len = l4.tcp->doff;
1767 offset |= l4_len << ICE_TX_DESC_LEN_L4_LEN_S;
1768 break;
1769 case IPPROTO_UDP:
1770 /* enable UDP checksum offload */
1771 cmd |= ICE_TX_DESC_CMD_L4T_EOFT_UDP;
1772 l4_len = (sizeof(struct udphdr) >> 2);
1773 offset |= l4_len << ICE_TX_DESC_LEN_L4_LEN_S;
1774 break;
1775 case IPPROTO_SCTP:
1776 /* enable SCTP checksum offload */
1777 cmd |= ICE_TX_DESC_CMD_L4T_EOFT_SCTP;
1778 l4_len = sizeof(struct sctphdr) >> 2;
1779 offset |= l4_len << ICE_TX_DESC_LEN_L4_LEN_S;
1780 break;
1781
1782 default:
1783 if (first->tx_flags & ICE_TX_FLAGS_TSO)
1784 return -1;
1785 skb_checksum_help(skb);
1786 return 0;
1787 }
1788
1789 off->td_cmd |= cmd;
1790 off->td_offset |= offset;
1791 return 1;
1792 }
1793
1794 /**
1795 * ice_tx_prepare_vlan_flags - prepare generic Tx VLAN tagging flags for HW
1796 * @tx_ring: ring to send buffer on
1797 * @first: pointer to struct ice_tx_buf
1798 *
1799 * Checks the skb and set up correspondingly several generic transmit flags
1800 * related to VLAN tagging for the HW, such as VLAN, DCB, etc.
1801 */
1802 static void
ice_tx_prepare_vlan_flags(struct ice_tx_ring * tx_ring,struct ice_tx_buf * first)1803 ice_tx_prepare_vlan_flags(struct ice_tx_ring *tx_ring, struct ice_tx_buf *first)
1804 {
1805 struct sk_buff *skb = first->skb;
1806
1807 /* nothing left to do, software offloaded VLAN */
1808 if (!skb_vlan_tag_present(skb) && eth_type_vlan(skb->protocol))
1809 return;
1810
1811 /* the VLAN ethertype/tpid is determined by VSI configuration and netdev
1812 * feature flags, which the driver only allows either 802.1Q or 802.1ad
1813 * VLAN offloads exclusively so we only care about the VLAN ID here
1814 */
1815 if (skb_vlan_tag_present(skb)) {
1816 first->vid = skb_vlan_tag_get(skb);
1817 if (tx_ring->flags & ICE_TX_FLAGS_RING_VLAN_L2TAG2)
1818 first->tx_flags |= ICE_TX_FLAGS_HW_OUTER_SINGLE_VLAN;
1819 else
1820 first->tx_flags |= ICE_TX_FLAGS_HW_VLAN;
1821 }
1822
1823 ice_tx_prepare_vlan_flags_dcb(tx_ring, first);
1824 }
1825
1826 /**
1827 * ice_tso - computes mss and TSO length to prepare for TSO
1828 * @first: pointer to struct ice_tx_buf
1829 * @off: pointer to struct that holds offload parameters
1830 *
1831 * Returns 0 or error (negative) if TSO can't happen, 1 otherwise.
1832 */
1833 static
ice_tso(struct ice_tx_buf * first,struct ice_tx_offload_params * off)1834 int ice_tso(struct ice_tx_buf *first, struct ice_tx_offload_params *off)
1835 {
1836 struct sk_buff *skb = first->skb;
1837 union {
1838 struct iphdr *v4;
1839 struct ipv6hdr *v6;
1840 unsigned char *hdr;
1841 } ip;
1842 union {
1843 struct tcphdr *tcp;
1844 struct udphdr *udp;
1845 unsigned char *hdr;
1846 } l4;
1847 u64 cd_mss, cd_tso_len;
1848 __be16 protocol;
1849 u32 paylen;
1850 u8 l4_start;
1851 int err;
1852
1853 if (skb->ip_summed != CHECKSUM_PARTIAL)
1854 return 0;
1855
1856 if (!skb_is_gso(skb))
1857 return 0;
1858
1859 err = skb_cow_head(skb, 0);
1860 if (err < 0)
1861 return err;
1862
1863 protocol = vlan_get_protocol(skb);
1864
1865 if (eth_p_mpls(protocol))
1866 ip.hdr = skb_inner_network_header(skb);
1867 else
1868 ip.hdr = skb_network_header(skb);
1869 l4.hdr = skb_checksum_start(skb);
1870
1871 /* initialize outer IP header fields */
1872 if (ip.v4->version == 4) {
1873 ip.v4->tot_len = 0;
1874 ip.v4->check = 0;
1875 } else {
1876 ip.v6->payload_len = 0;
1877 }
1878
1879 if (skb_shinfo(skb)->gso_type & (SKB_GSO_GRE |
1880 SKB_GSO_GRE_CSUM |
1881 SKB_GSO_IPXIP4 |
1882 SKB_GSO_IPXIP6 |
1883 SKB_GSO_UDP_TUNNEL |
1884 SKB_GSO_UDP_TUNNEL_CSUM)) {
1885 if (!(skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL) &&
1886 (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM)) {
1887 l4.udp->len = 0;
1888
1889 /* determine offset of outer transport header */
1890 l4_start = (u8)(l4.hdr - skb->data);
1891
1892 /* remove payload length from outer checksum */
1893 paylen = skb->len - l4_start;
1894 csum_replace_by_diff(&l4.udp->check,
1895 (__force __wsum)htonl(paylen));
1896 }
1897
1898 /* reset pointers to inner headers */
1899 ip.hdr = skb_inner_network_header(skb);
1900 l4.hdr = skb_inner_transport_header(skb);
1901
1902 /* initialize inner IP header fields */
1903 if (ip.v4->version == 4) {
1904 ip.v4->tot_len = 0;
1905 ip.v4->check = 0;
1906 } else {
1907 ip.v6->payload_len = 0;
1908 }
1909 }
1910
1911 /* determine offset of transport header */
1912 l4_start = (u8)(l4.hdr - skb->data);
1913
1914 /* remove payload length from checksum */
1915 paylen = skb->len - l4_start;
1916
1917 if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) {
1918 csum_replace_by_diff(&l4.udp->check,
1919 (__force __wsum)htonl(paylen));
1920 /* compute length of UDP segmentation header */
1921 off->header_len = (u8)sizeof(l4.udp) + l4_start;
1922 } else {
1923 csum_replace_by_diff(&l4.tcp->check,
1924 (__force __wsum)htonl(paylen));
1925 /* compute length of TCP segmentation header */
1926 off->header_len = (u8)((l4.tcp->doff * 4) + l4_start);
1927 }
1928
1929 /* update gso_segs and bytecount */
1930 first->gso_segs = skb_shinfo(skb)->gso_segs;
1931 first->bytecount += (first->gso_segs - 1) * off->header_len;
1932
1933 cd_tso_len = skb->len - off->header_len;
1934 cd_mss = skb_shinfo(skb)->gso_size;
1935
1936 /* record cdesc_qw1 with TSO parameters */
1937 off->cd_qw1 |= (u64)(ICE_TX_DESC_DTYPE_CTX |
1938 (ICE_TX_CTX_DESC_TSO << ICE_TXD_CTX_QW1_CMD_S) |
1939 (cd_tso_len << ICE_TXD_CTX_QW1_TSO_LEN_S) |
1940 (cd_mss << ICE_TXD_CTX_QW1_MSS_S));
1941 first->tx_flags |= ICE_TX_FLAGS_TSO;
1942 return 1;
1943 }
1944
1945 /**
1946 * ice_txd_use_count - estimate the number of descriptors needed for Tx
1947 * @size: transmit request size in bytes
1948 *
1949 * Due to hardware alignment restrictions (4K alignment), we need to
1950 * assume that we can have no more than 12K of data per descriptor, even
1951 * though each descriptor can take up to 16K - 1 bytes of aligned memory.
1952 * Thus, we need to divide by 12K. But division is slow! Instead,
1953 * we decompose the operation into shifts and one relatively cheap
1954 * multiply operation.
1955 *
1956 * To divide by 12K, we first divide by 4K, then divide by 3:
1957 * To divide by 4K, shift right by 12 bits
1958 * To divide by 3, multiply by 85, then divide by 256
1959 * (Divide by 256 is done by shifting right by 8 bits)
1960 * Finally, we add one to round up. Because 256 isn't an exact multiple of
1961 * 3, we'll underestimate near each multiple of 12K. This is actually more
1962 * accurate as we have 4K - 1 of wiggle room that we can fit into the last
1963 * segment. For our purposes this is accurate out to 1M which is orders of
1964 * magnitude greater than our largest possible GSO size.
1965 *
1966 * This would then be implemented as:
1967 * return (((size >> 12) * 85) >> 8) + ICE_DESCS_FOR_SKB_DATA_PTR;
1968 *
1969 * Since multiplication and division are commutative, we can reorder
1970 * operations into:
1971 * return ((size * 85) >> 20) + ICE_DESCS_FOR_SKB_DATA_PTR;
1972 */
ice_txd_use_count(unsigned int size)1973 static unsigned int ice_txd_use_count(unsigned int size)
1974 {
1975 return ((size * 85) >> 20) + ICE_DESCS_FOR_SKB_DATA_PTR;
1976 }
1977
1978 /**
1979 * ice_xmit_desc_count - calculate number of Tx descriptors needed
1980 * @skb: send buffer
1981 *
1982 * Returns number of data descriptors needed for this skb.
1983 */
ice_xmit_desc_count(struct sk_buff * skb)1984 static unsigned int ice_xmit_desc_count(struct sk_buff *skb)
1985 {
1986 const skb_frag_t *frag = &skb_shinfo(skb)->frags[0];
1987 unsigned int nr_frags = skb_shinfo(skb)->nr_frags;
1988 unsigned int count = 0, size = skb_headlen(skb);
1989
1990 for (;;) {
1991 count += ice_txd_use_count(size);
1992
1993 if (!nr_frags--)
1994 break;
1995
1996 size = skb_frag_size(frag++);
1997 }
1998
1999 return count;
2000 }
2001
2002 /**
2003 * __ice_chk_linearize - Check if there are more than 8 buffers per packet
2004 * @skb: send buffer
2005 *
2006 * Note: This HW can't DMA more than 8 buffers to build a packet on the wire
2007 * and so we need to figure out the cases where we need to linearize the skb.
2008 *
2009 * For TSO we need to count the TSO header and segment payload separately.
2010 * As such we need to check cases where we have 7 fragments or more as we
2011 * can potentially require 9 DMA transactions, 1 for the TSO header, 1 for
2012 * the segment payload in the first descriptor, and another 7 for the
2013 * fragments.
2014 */
__ice_chk_linearize(struct sk_buff * skb)2015 static bool __ice_chk_linearize(struct sk_buff *skb)
2016 {
2017 const skb_frag_t *frag, *stale;
2018 int nr_frags, sum;
2019
2020 /* no need to check if number of frags is less than 7 */
2021 nr_frags = skb_shinfo(skb)->nr_frags;
2022 if (nr_frags < (ICE_MAX_BUF_TXD - 1))
2023 return false;
2024
2025 /* We need to walk through the list and validate that each group
2026 * of 6 fragments totals at least gso_size.
2027 */
2028 nr_frags -= ICE_MAX_BUF_TXD - 2;
2029 frag = &skb_shinfo(skb)->frags[0];
2030
2031 /* Initialize size to the negative value of gso_size minus 1. We
2032 * use this as the worst case scenario in which the frag ahead
2033 * of us only provides one byte which is why we are limited to 6
2034 * descriptors for a single transmit as the header and previous
2035 * fragment are already consuming 2 descriptors.
2036 */
2037 sum = 1 - skb_shinfo(skb)->gso_size;
2038
2039 /* Add size of frags 0 through 4 to create our initial sum */
2040 sum += skb_frag_size(frag++);
2041 sum += skb_frag_size(frag++);
2042 sum += skb_frag_size(frag++);
2043 sum += skb_frag_size(frag++);
2044 sum += skb_frag_size(frag++);
2045
2046 /* Walk through fragments adding latest fragment, testing it, and
2047 * then removing stale fragments from the sum.
2048 */
2049 for (stale = &skb_shinfo(skb)->frags[0];; stale++) {
2050 int stale_size = skb_frag_size(stale);
2051
2052 sum += skb_frag_size(frag++);
2053
2054 /* The stale fragment may present us with a smaller
2055 * descriptor than the actual fragment size. To account
2056 * for that we need to remove all the data on the front and
2057 * figure out what the remainder would be in the last
2058 * descriptor associated with the fragment.
2059 */
2060 if (stale_size > ICE_MAX_DATA_PER_TXD) {
2061 int align_pad = -(skb_frag_off(stale)) &
2062 (ICE_MAX_READ_REQ_SIZE - 1);
2063
2064 sum -= align_pad;
2065 stale_size -= align_pad;
2066
2067 do {
2068 sum -= ICE_MAX_DATA_PER_TXD_ALIGNED;
2069 stale_size -= ICE_MAX_DATA_PER_TXD_ALIGNED;
2070 } while (stale_size > ICE_MAX_DATA_PER_TXD);
2071 }
2072
2073 /* if sum is negative we failed to make sufficient progress */
2074 if (sum < 0)
2075 return true;
2076
2077 if (!nr_frags--)
2078 break;
2079
2080 sum -= stale_size;
2081 }
2082
2083 return false;
2084 }
2085
2086 /**
2087 * ice_chk_linearize - Check if there are more than 8 fragments per packet
2088 * @skb: send buffer
2089 * @count: number of buffers used
2090 *
2091 * Note: Our HW can't scatter-gather more than 8 fragments to build
2092 * a packet on the wire and so we need to figure out the cases where we
2093 * need to linearize the skb.
2094 */
ice_chk_linearize(struct sk_buff * skb,unsigned int count)2095 static bool ice_chk_linearize(struct sk_buff *skb, unsigned int count)
2096 {
2097 /* Both TSO and single send will work if count is less than 8 */
2098 if (likely(count < ICE_MAX_BUF_TXD))
2099 return false;
2100
2101 if (skb_is_gso(skb))
2102 return __ice_chk_linearize(skb);
2103
2104 /* we can support up to 8 data buffers for a single send */
2105 return count != ICE_MAX_BUF_TXD;
2106 }
2107
2108 /**
2109 * ice_tstamp - set up context descriptor for hardware timestamp
2110 * @tx_ring: pointer to the Tx ring to send buffer on
2111 * @skb: pointer to the SKB we're sending
2112 * @first: Tx buffer
2113 * @off: Tx offload parameters
2114 */
2115 static void
ice_tstamp(struct ice_tx_ring * tx_ring,struct sk_buff * skb,struct ice_tx_buf * first,struct ice_tx_offload_params * off)2116 ice_tstamp(struct ice_tx_ring *tx_ring, struct sk_buff *skb,
2117 struct ice_tx_buf *first, struct ice_tx_offload_params *off)
2118 {
2119 s8 idx;
2120
2121 /* only timestamp the outbound packet if the user has requested it */
2122 if (likely(!(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)))
2123 return;
2124
2125 /* Tx timestamps cannot be sampled when doing TSO */
2126 if (first->tx_flags & ICE_TX_FLAGS_TSO)
2127 return;
2128
2129 /* Grab an open timestamp slot */
2130 idx = ice_ptp_request_ts(tx_ring->tx_tstamps, skb);
2131 if (idx < 0) {
2132 tx_ring->vsi->back->ptp.tx_hwtstamp_skipped++;
2133 return;
2134 }
2135
2136 off->cd_qw1 |= (u64)(ICE_TX_DESC_DTYPE_CTX |
2137 (ICE_TX_CTX_DESC_TSYN << ICE_TXD_CTX_QW1_CMD_S) |
2138 ((u64)idx << ICE_TXD_CTX_QW1_TSO_LEN_S));
2139 first->tx_flags |= ICE_TX_FLAGS_TSYN;
2140 }
2141
2142 /**
2143 * ice_xmit_frame_ring - Sends buffer on Tx ring
2144 * @skb: send buffer
2145 * @tx_ring: ring to send buffer on
2146 *
2147 * Returns NETDEV_TX_OK if sent, else an error code
2148 */
2149 static netdev_tx_t
ice_xmit_frame_ring(struct sk_buff * skb,struct ice_tx_ring * tx_ring)2150 ice_xmit_frame_ring(struct sk_buff *skb, struct ice_tx_ring *tx_ring)
2151 {
2152 struct ice_tx_offload_params offload = { 0 };
2153 struct ice_vsi *vsi = tx_ring->vsi;
2154 struct ice_tx_buf *first;
2155 struct ethhdr *eth;
2156 unsigned int count;
2157 int tso, csum;
2158
2159 ice_trace(xmit_frame_ring, tx_ring, skb);
2160
2161 count = ice_xmit_desc_count(skb);
2162 if (ice_chk_linearize(skb, count)) {
2163 if (__skb_linearize(skb))
2164 goto out_drop;
2165 count = ice_txd_use_count(skb->len);
2166 ice_stats_inc(tx_ring->ring_stats, tx_linearize);
2167 }
2168
2169 /* need: 1 descriptor per page * PAGE_SIZE/ICE_MAX_DATA_PER_TXD,
2170 * + 1 desc for skb_head_len/ICE_MAX_DATA_PER_TXD,
2171 * + 4 desc gap to avoid the cache line where head is,
2172 * + 1 desc for context descriptor,
2173 * otherwise try next time
2174 */
2175 if (ice_maybe_stop_tx(tx_ring, count + ICE_DESCS_PER_CACHE_LINE +
2176 ICE_DESCS_FOR_CTX_DESC)) {
2177 ice_stats_inc(tx_ring->ring_stats, tx_busy);
2178 return NETDEV_TX_BUSY;
2179 }
2180
2181 /* prefetch for bql data which is infrequently used */
2182 netdev_txq_bql_enqueue_prefetchw(txring_txq(tx_ring));
2183
2184 offload.tx_ring = tx_ring;
2185
2186 /* record the location of the first descriptor for this packet */
2187 first = &tx_ring->tx_buf[tx_ring->next_to_use];
2188 first->skb = skb;
2189 first->type = ICE_TX_BUF_SKB;
2190 first->bytecount = max_t(unsigned int, skb->len, ETH_ZLEN);
2191 first->gso_segs = 1;
2192 first->tx_flags = 0;
2193
2194 /* prepare the VLAN tagging flags for Tx */
2195 ice_tx_prepare_vlan_flags(tx_ring, first);
2196 if (first->tx_flags & ICE_TX_FLAGS_HW_OUTER_SINGLE_VLAN) {
2197 offload.cd_qw1 |= (u64)(ICE_TX_DESC_DTYPE_CTX |
2198 (ICE_TX_CTX_DESC_IL2TAG2 <<
2199 ICE_TXD_CTX_QW1_CMD_S));
2200 offload.cd_l2tag2 = first->vid;
2201 }
2202
2203 /* set up TSO offload */
2204 tso = ice_tso(first, &offload);
2205 if (tso < 0)
2206 goto out_drop;
2207
2208 /* always set up Tx checksum offload */
2209 csum = ice_tx_csum(first, &offload);
2210 if (csum < 0)
2211 goto out_drop;
2212
2213 /* allow CONTROL frames egress from main VSI if FW LLDP disabled */
2214 eth = (struct ethhdr *)skb_mac_header(skb);
2215
2216 if ((ice_is_switchdev_running(vsi->back) ||
2217 ice_lag_is_switchdev_running(vsi->back)) &&
2218 vsi->type != ICE_VSI_SF)
2219 ice_eswitch_set_target_vsi(skb, &offload);
2220 else if (unlikely((skb->priority == TC_PRIO_CONTROL ||
2221 eth->h_proto == htons(ETH_P_LLDP)) &&
2222 vsi->type == ICE_VSI_PF &&
2223 vsi->port_info->qos_cfg.is_sw_lldp))
2224 offload.cd_qw1 |= (u64)(ICE_TX_DESC_DTYPE_CTX |
2225 ICE_TX_CTX_DESC_SWTCH_UPLINK <<
2226 ICE_TXD_CTX_QW1_CMD_S);
2227
2228 ice_tstamp(tx_ring, skb, first, &offload);
2229
2230 if (offload.cd_qw1 & ICE_TX_DESC_DTYPE_CTX) {
2231 struct ice_tx_ctx_desc *cdesc;
2232 u16 i = tx_ring->next_to_use;
2233
2234 /* grab the next descriptor */
2235 cdesc = ICE_TX_CTX_DESC(tx_ring, i);
2236 i++;
2237 tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
2238
2239 /* setup context descriptor */
2240 cdesc->tunneling_params = cpu_to_le32(offload.cd_tunnel_params);
2241 cdesc->l2tag2 = cpu_to_le16(offload.cd_l2tag2);
2242 cdesc->gcs = cpu_to_le16(offload.cd_gcs_params);
2243 cdesc->qw1 = cpu_to_le64(offload.cd_qw1);
2244 }
2245
2246 ice_tx_map(tx_ring, first, &offload);
2247 return NETDEV_TX_OK;
2248
2249 out_drop:
2250 ice_trace(xmit_frame_ring_drop, tx_ring, skb);
2251 dev_kfree_skb_any(skb);
2252 return NETDEV_TX_OK;
2253 }
2254
2255 /**
2256 * ice_start_xmit - Selects the correct VSI and Tx queue to send buffer
2257 * @skb: send buffer
2258 * @netdev: network interface device structure
2259 *
2260 * Returns NETDEV_TX_OK if sent, else an error code
2261 */
ice_start_xmit(struct sk_buff * skb,struct net_device * netdev)2262 netdev_tx_t ice_start_xmit(struct sk_buff *skb, struct net_device *netdev)
2263 {
2264 struct ice_netdev_priv *np = netdev_priv(netdev);
2265 struct ice_vsi *vsi = np->vsi;
2266 struct ice_tx_ring *tx_ring;
2267
2268 tx_ring = vsi->tx_rings[skb->queue_mapping];
2269
2270 /* hardware can't handle really short frames, hardware padding works
2271 * beyond this point
2272 */
2273 if (skb_put_padto(skb, ICE_MIN_TX_LEN))
2274 return NETDEV_TX_OK;
2275
2276 return ice_xmit_frame_ring(skb, tx_ring);
2277 }
2278
2279 /**
2280 * ice_get_dscp_up - return the UP/TC value for a SKB
2281 * @dcbcfg: DCB config that contains DSCP to UP/TC mapping
2282 * @skb: SKB to query for info to determine UP/TC
2283 *
2284 * This function is to only be called when the PF is in L3 DSCP PFC mode
2285 */
ice_get_dscp_up(struct ice_dcbx_cfg * dcbcfg,struct sk_buff * skb)2286 static u8 ice_get_dscp_up(struct ice_dcbx_cfg *dcbcfg, struct sk_buff *skb)
2287 {
2288 u8 dscp = 0;
2289
2290 if (skb->protocol == htons(ETH_P_IP))
2291 dscp = ipv4_get_dsfield(ip_hdr(skb)) >> 2;
2292 else if (skb->protocol == htons(ETH_P_IPV6))
2293 dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> 2;
2294
2295 return dcbcfg->dscp_map[dscp];
2296 }
2297
2298 u16
ice_select_queue(struct net_device * netdev,struct sk_buff * skb,struct net_device * sb_dev)2299 ice_select_queue(struct net_device *netdev, struct sk_buff *skb,
2300 struct net_device *sb_dev)
2301 {
2302 struct ice_pf *pf = ice_netdev_to_pf(netdev);
2303 struct ice_dcbx_cfg *dcbcfg;
2304
2305 dcbcfg = &pf->hw.port_info->qos_cfg.local_dcbx_cfg;
2306 if (dcbcfg->pfc_mode == ICE_QOS_MODE_DSCP)
2307 skb->priority = ice_get_dscp_up(dcbcfg, skb);
2308
2309 return netdev_pick_tx(netdev, skb, sb_dev);
2310 }
2311
2312 /**
2313 * ice_clean_ctrl_tx_irq - interrupt handler for flow director Tx queue
2314 * @tx_ring: tx_ring to clean
2315 */
ice_clean_ctrl_tx_irq(struct ice_tx_ring * tx_ring)2316 void ice_clean_ctrl_tx_irq(struct ice_tx_ring *tx_ring)
2317 {
2318 struct ice_vsi *vsi = tx_ring->vsi;
2319 s16 i = tx_ring->next_to_clean;
2320 int budget = ICE_DFLT_IRQ_WORK;
2321 struct ice_tx_desc *tx_desc;
2322 struct ice_tx_buf *tx_buf;
2323
2324 tx_buf = &tx_ring->tx_buf[i];
2325 tx_desc = ICE_TX_DESC(tx_ring, i);
2326 i -= tx_ring->count;
2327
2328 do {
2329 struct ice_tx_desc *eop_desc = tx_buf->next_to_watch;
2330
2331 /* if next_to_watch is not set then there is no pending work */
2332 if (!eop_desc)
2333 break;
2334
2335 /* prevent any other reads prior to eop_desc */
2336 smp_rmb();
2337
2338 /* if the descriptor isn't done, no work to do */
2339 if (!(eop_desc->cmd_type_offset_bsz &
2340 cpu_to_le64(ICE_TX_DESC_DTYPE_DESC_DONE)))
2341 break;
2342
2343 /* clear next_to_watch to prevent false hangs */
2344 tx_buf->next_to_watch = NULL;
2345 tx_desc->buf_addr = 0;
2346 tx_desc->cmd_type_offset_bsz = 0;
2347
2348 /* move past filter desc */
2349 tx_buf++;
2350 tx_desc++;
2351 i++;
2352 if (unlikely(!i)) {
2353 i -= tx_ring->count;
2354 tx_buf = tx_ring->tx_buf;
2355 tx_desc = ICE_TX_DESC(tx_ring, 0);
2356 }
2357
2358 /* unmap the data header */
2359 if (dma_unmap_len(tx_buf, len))
2360 dma_unmap_single(tx_ring->dev,
2361 dma_unmap_addr(tx_buf, dma),
2362 dma_unmap_len(tx_buf, len),
2363 DMA_TO_DEVICE);
2364 if (tx_buf->type == ICE_TX_BUF_DUMMY)
2365 devm_kfree(tx_ring->dev, tx_buf->raw_buf);
2366
2367 /* clear next_to_watch to prevent false hangs */
2368 tx_buf->type = ICE_TX_BUF_EMPTY;
2369 tx_buf->tx_flags = 0;
2370 tx_buf->next_to_watch = NULL;
2371 dma_unmap_len_set(tx_buf, len, 0);
2372 tx_desc->buf_addr = 0;
2373 tx_desc->cmd_type_offset_bsz = 0;
2374
2375 /* move past eop_desc for start of next FD desc */
2376 tx_buf++;
2377 tx_desc++;
2378 i++;
2379 if (unlikely(!i)) {
2380 i -= tx_ring->count;
2381 tx_buf = tx_ring->tx_buf;
2382 tx_desc = ICE_TX_DESC(tx_ring, 0);
2383 }
2384
2385 budget--;
2386 } while (likely(budget));
2387
2388 i += tx_ring->count;
2389 tx_ring->next_to_clean = i;
2390
2391 /* re-enable interrupt if needed */
2392 ice_irq_dynamic_ena(&vsi->back->hw, vsi, vsi->q_vectors[0]);
2393 }
2394