1 /*
2 * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33 #include <linux/bpf_trace.h>
34 #include <net/xdp_sock_drv.h>
35 #include "en/xdp.h"
36 #include "en/params.h"
37 #include <linux/bitfield.h>
38 #include <net/page_pool/helpers.h>
39
mlx5e_xdp_max_mtu(struct mlx5e_params * params,struct mlx5e_rq_opt_param * rqo)40 int mlx5e_xdp_max_mtu(struct mlx5e_params *params,
41 struct mlx5e_rq_opt_param *rqo)
42 {
43 int hr = mlx5e_get_linear_rq_headroom(params, rqo);
44
45 /* Let S := SKB_DATA_ALIGN(sizeof(struct skb_shared_info)).
46 * The condition checked in mlx5e_rx_is_linear_skb is:
47 * SKB_DATA_ALIGN(sw_mtu + hard_mtu + hr) + S <= PAGE_SIZE (1)
48 * (Note that hw_mtu == sw_mtu + hard_mtu.)
49 * What is returned from this function is:
50 * max_mtu = PAGE_SIZE - S - hr - hard_mtu (2)
51 * After assigning sw_mtu := max_mtu, the left side of (1) turns to
52 * SKB_DATA_ALIGN(PAGE_SIZE - S) + S, which is equal to PAGE_SIZE,
53 * because both PAGE_SIZE and S are already aligned. Any number greater
54 * than max_mtu would make the left side of (1) greater than PAGE_SIZE,
55 * so max_mtu is the maximum MTU allowed.
56 */
57
58 return MLX5E_HW2SW_MTU(params, SKB_MAX_HEAD(hr));
59 }
60
61 static inline bool
mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq * sq,struct mlx5e_rq * rq,struct xdp_buff * xdp)62 mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq,
63 struct xdp_buff *xdp)
64 {
65 struct page *page = virt_to_page(xdp->data);
66 struct mlx5e_xmit_data_frags xdptxdf = {};
67 struct mlx5e_xmit_data *xdptxd;
68 struct xdp_frame *xdpf;
69 dma_addr_t dma_addr;
70 int i;
71
72 xdpf = xdp_convert_buff_to_frame(xdp);
73 if (unlikely(!xdpf))
74 return false;
75
76 xdptxd = &xdptxdf.xd;
77 xdptxd->data = xdpf->data;
78 xdptxd->len = xdpf->len;
79 xdptxd->has_frags = xdp_frame_has_frags(xdpf);
80
81 if (xdp->rxq->mem.type == MEM_TYPE_XSK_BUFF_POOL) {
82 /* The xdp_buff was in the UMEM and was copied into a newly
83 * allocated page. The UMEM page was returned via the ZCA, and
84 * this new page has to be mapped at this point and has to be
85 * unmapped and returned via xdp_return_frame on completion.
86 */
87
88 /* Prevent double recycling of the UMEM page. Even in case this
89 * function returns false, the xdp_buff shouldn't be recycled,
90 * as it was already done in xdp_convert_zc_to_xdp_frame.
91 */
92 __set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags); /* non-atomic */
93
94 if (unlikely(xdptxd->has_frags))
95 return false;
96
97 dma_addr = dma_map_single(sq->pdev, xdptxd->data, xdptxd->len,
98 DMA_TO_DEVICE);
99 if (dma_mapping_error(sq->pdev, dma_addr)) {
100 xdp_return_frame(xdpf);
101 return false;
102 }
103
104 xdptxd->dma_addr = dma_addr;
105
106 if (unlikely(!INDIRECT_CALL_2(sq->xmit_xdp_frame, mlx5e_xmit_xdp_frame_mpwqe,
107 mlx5e_xmit_xdp_frame, sq, xdptxd, 0, NULL)))
108 return false;
109
110 /* xmit_mode == MLX5E_XDP_XMIT_MODE_FRAME */
111 mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo,
112 (union mlx5e_xdp_info) { .mode = MLX5E_XDP_XMIT_MODE_FRAME });
113 mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo,
114 (union mlx5e_xdp_info) { .frame.xdpf = xdpf });
115 mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo,
116 (union mlx5e_xdp_info) { .frame.dma_addr = dma_addr });
117 return true;
118 }
119
120 /* Driver assumes that xdp_convert_buff_to_frame returns an xdp_frame
121 * that points to the same memory region as the original xdp_buff. It
122 * allows to map the memory only once and to use the DMA_BIDIRECTIONAL
123 * mode.
124 */
125
126 dma_addr = page_pool_get_dma_addr(page) + offset_in_page(xdpf->data);
127 dma_sync_single_for_device(sq->pdev, dma_addr, xdptxd->len, DMA_BIDIRECTIONAL);
128
129 if (xdptxd->has_frags) {
130 xdptxdf.sinfo = xdp_get_shared_info_from_frame(xdpf);
131 xdptxdf.dma_arr = NULL;
132
133 for (i = 0; i < xdptxdf.sinfo->nr_frags; i++) {
134 skb_frag_t *frag = &xdptxdf.sinfo->frags[i];
135 dma_addr_t addr;
136 u32 len;
137
138 addr = page_pool_get_dma_addr(skb_frag_page(frag)) +
139 skb_frag_off(frag);
140 len = skb_frag_size(frag);
141 dma_sync_single_for_device(sq->pdev, addr, len,
142 DMA_BIDIRECTIONAL);
143 }
144 }
145
146 xdptxd->dma_addr = dma_addr;
147
148 if (unlikely(!INDIRECT_CALL_2(sq->xmit_xdp_frame, mlx5e_xmit_xdp_frame_mpwqe,
149 mlx5e_xmit_xdp_frame, sq, xdptxd, 0, NULL)))
150 return false;
151
152 /* xmit_mode == MLX5E_XDP_XMIT_MODE_PAGE */
153 mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo,
154 (union mlx5e_xdp_info) { .mode = MLX5E_XDP_XMIT_MODE_PAGE });
155
156 if (xdptxd->has_frags) {
157 mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo,
158 (union mlx5e_xdp_info)
159 { .page.num = 1 + xdptxdf.sinfo->nr_frags });
160 mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo,
161 (union mlx5e_xdp_info) { .page.page = page });
162 for (i = 0; i < xdptxdf.sinfo->nr_frags; i++) {
163 skb_frag_t *frag = &xdptxdf.sinfo->frags[i];
164
165 mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo,
166 (union mlx5e_xdp_info)
167 { .page.page = skb_frag_page(frag) });
168 }
169 } else {
170 mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo,
171 (union mlx5e_xdp_info) { .page.num = 1 });
172 mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo,
173 (union mlx5e_xdp_info) { .page.page = page });
174 }
175
176 return true;
177 }
178
mlx5e_xdp_rx_timestamp(const struct xdp_md * ctx,u64 * timestamp)179 static int mlx5e_xdp_rx_timestamp(const struct xdp_md *ctx, u64 *timestamp)
180 {
181 const struct mlx5e_xdp_buff *_ctx = (void *)ctx;
182
183 if (unlikely(!mlx5e_rx_hw_stamp(_ctx->rq->hwtstamp_config)))
184 return -ENODATA;
185
186 *timestamp = mlx5e_cqe_ts_to_ns(_ctx->rq->ptp_cyc2time,
187 _ctx->rq->clock, get_cqe_ts(_ctx->cqe));
188 return 0;
189 }
190
191 /* Mapping HW RSS Type bits CQE_RSS_HTYPE_IP + CQE_RSS_HTYPE_L4 into 4-bits*/
192 #define RSS_TYPE_MAX_TABLE 16 /* 4-bits max 16 entries */
193 #define RSS_L4 GENMASK(1, 0)
194 #define RSS_L3 GENMASK(3, 2) /* Same as CQE_RSS_HTYPE_IP */
195
196 /* Valid combinations of CQE_RSS_HTYPE_IP + CQE_RSS_HTYPE_L4 sorted numerical */
197 enum mlx5_rss_hash_type {
198 RSS_TYPE_NO_HASH = (FIELD_PREP_CONST(RSS_L3, CQE_RSS_IP_NONE) |
199 FIELD_PREP_CONST(RSS_L4, CQE_RSS_L4_NONE)),
200 RSS_TYPE_L3_IPV4 = (FIELD_PREP_CONST(RSS_L3, CQE_RSS_IPV4) |
201 FIELD_PREP_CONST(RSS_L4, CQE_RSS_L4_NONE)),
202 RSS_TYPE_L4_IPV4_TCP = (FIELD_PREP_CONST(RSS_L3, CQE_RSS_IPV4) |
203 FIELD_PREP_CONST(RSS_L4, CQE_RSS_L4_TCP)),
204 RSS_TYPE_L4_IPV4_UDP = (FIELD_PREP_CONST(RSS_L3, CQE_RSS_IPV4) |
205 FIELD_PREP_CONST(RSS_L4, CQE_RSS_L4_UDP)),
206 RSS_TYPE_L4_IPV4_IPSEC = (FIELD_PREP_CONST(RSS_L3, CQE_RSS_IPV4) |
207 FIELD_PREP_CONST(RSS_L4, CQE_RSS_L4_IPSEC)),
208 RSS_TYPE_L3_IPV6 = (FIELD_PREP_CONST(RSS_L3, CQE_RSS_IPV6) |
209 FIELD_PREP_CONST(RSS_L4, CQE_RSS_L4_NONE)),
210 RSS_TYPE_L4_IPV6_TCP = (FIELD_PREP_CONST(RSS_L3, CQE_RSS_IPV6) |
211 FIELD_PREP_CONST(RSS_L4, CQE_RSS_L4_TCP)),
212 RSS_TYPE_L4_IPV6_UDP = (FIELD_PREP_CONST(RSS_L3, CQE_RSS_IPV6) |
213 FIELD_PREP_CONST(RSS_L4, CQE_RSS_L4_UDP)),
214 RSS_TYPE_L4_IPV6_IPSEC = (FIELD_PREP_CONST(RSS_L3, CQE_RSS_IPV6) |
215 FIELD_PREP_CONST(RSS_L4, CQE_RSS_L4_IPSEC)),
216 };
217
218 /* Invalid combinations will simply return zero, allows no boundary checks */
219 static const enum xdp_rss_hash_type mlx5_xdp_rss_type[RSS_TYPE_MAX_TABLE] = {
220 [RSS_TYPE_NO_HASH] = XDP_RSS_TYPE_NONE,
221 [1] = XDP_RSS_TYPE_NONE, /* Implicit zero */
222 [2] = XDP_RSS_TYPE_NONE, /* Implicit zero */
223 [3] = XDP_RSS_TYPE_NONE, /* Implicit zero */
224 [RSS_TYPE_L3_IPV4] = XDP_RSS_TYPE_L3_IPV4,
225 [RSS_TYPE_L4_IPV4_TCP] = XDP_RSS_TYPE_L4_IPV4_TCP,
226 [RSS_TYPE_L4_IPV4_UDP] = XDP_RSS_TYPE_L4_IPV4_UDP,
227 [RSS_TYPE_L4_IPV4_IPSEC] = XDP_RSS_TYPE_L4_IPV4_IPSEC,
228 [RSS_TYPE_L3_IPV6] = XDP_RSS_TYPE_L3_IPV6,
229 [RSS_TYPE_L4_IPV6_TCP] = XDP_RSS_TYPE_L4_IPV6_TCP,
230 [RSS_TYPE_L4_IPV6_UDP] = XDP_RSS_TYPE_L4_IPV6_UDP,
231 [RSS_TYPE_L4_IPV6_IPSEC] = XDP_RSS_TYPE_L4_IPV6_IPSEC,
232 [12] = XDP_RSS_TYPE_NONE, /* Implicit zero */
233 [13] = XDP_RSS_TYPE_NONE, /* Implicit zero */
234 [14] = XDP_RSS_TYPE_NONE, /* Implicit zero */
235 [15] = XDP_RSS_TYPE_NONE, /* Implicit zero */
236 };
237
mlx5e_xdp_rx_hash(const struct xdp_md * ctx,u32 * hash,enum xdp_rss_hash_type * rss_type)238 static int mlx5e_xdp_rx_hash(const struct xdp_md *ctx, u32 *hash,
239 enum xdp_rss_hash_type *rss_type)
240 {
241 const struct mlx5e_xdp_buff *_ctx = (void *)ctx;
242 const struct mlx5_cqe64 *cqe = _ctx->cqe;
243 u32 hash_type, l4_type, ip_type, lookup;
244
245 if (unlikely(!(_ctx->xdp.rxq->dev->features & NETIF_F_RXHASH)))
246 return -ENODATA;
247
248 *hash = be32_to_cpu(cqe->rss_hash_result);
249
250 hash_type = cqe->rss_hash_type;
251 BUILD_BUG_ON(CQE_RSS_HTYPE_IP != RSS_L3); /* same mask */
252 ip_type = hash_type & CQE_RSS_HTYPE_IP;
253 l4_type = FIELD_GET(CQE_RSS_HTYPE_L4, hash_type);
254 lookup = ip_type | l4_type;
255 *rss_type = mlx5_xdp_rss_type[lookup];
256
257 return 0;
258 }
259
mlx5e_xdp_rx_vlan_tag(const struct xdp_md * ctx,__be16 * vlan_proto,u16 * vlan_tci)260 static int mlx5e_xdp_rx_vlan_tag(const struct xdp_md *ctx, __be16 *vlan_proto,
261 u16 *vlan_tci)
262 {
263 const struct mlx5e_xdp_buff *_ctx = (void *)ctx;
264 const struct mlx5_cqe64 *cqe = _ctx->cqe;
265
266 if (!cqe_has_vlan(cqe))
267 return -ENODATA;
268
269 *vlan_proto = htons(ETH_P_8021Q);
270 *vlan_tci = be16_to_cpu(cqe->vlan_info);
271 return 0;
272 }
273
274 const struct xdp_metadata_ops mlx5e_xdp_metadata_ops = {
275 .xmo_rx_timestamp = mlx5e_xdp_rx_timestamp,
276 .xmo_rx_hash = mlx5e_xdp_rx_hash,
277 .xmo_rx_vlan_tag = mlx5e_xdp_rx_vlan_tag,
278 };
279
280 struct mlx5e_xsk_tx_complete {
281 struct mlx5_cqe64 *cqe;
282 struct mlx5e_cq *cq;
283 };
284
mlx5e_xsk_fill_timestamp(void * _priv)285 static u64 mlx5e_xsk_fill_timestamp(void *_priv)
286 {
287 struct mlx5e_xsk_tx_complete *priv = _priv;
288 u64 ts;
289
290 ts = get_cqe_ts(priv->cqe);
291
292 if (mlx5_is_real_time_rq(priv->cq->mdev) || mlx5_is_real_time_sq(priv->cq->mdev))
293 return mlx5_real_time_cyc2time(priv->cq->mdev->clock, ts);
294
295 return mlx5_timecounter_cyc2time(priv->cq->mdev->clock, ts);
296 }
297
mlx5e_xsk_request_checksum(u16 csum_start,u16 csum_offset,void * priv)298 static void mlx5e_xsk_request_checksum(u16 csum_start, u16 csum_offset, void *priv)
299 {
300 struct mlx5_wqe_eth_seg *eseg = priv;
301
302 /* HW/FW is doing parsing, so offsets are largely ignored. */
303 eseg->cs_flags |= MLX5_ETH_WQE_L3_CSUM | MLX5_ETH_WQE_L4_CSUM;
304 }
305
306 const struct xsk_tx_metadata_ops mlx5e_xsk_tx_metadata_ops = {
307 .tmo_fill_timestamp = mlx5e_xsk_fill_timestamp,
308 .tmo_request_checksum = mlx5e_xsk_request_checksum,
309 };
310
311 /* returns true if packet was consumed by xdp */
mlx5e_xdp_handle(struct mlx5e_rq * rq,struct bpf_prog * prog,struct mlx5e_xdp_buff * mxbuf)312 bool mlx5e_xdp_handle(struct mlx5e_rq *rq,
313 struct bpf_prog *prog, struct mlx5e_xdp_buff *mxbuf)
314 {
315 struct xdp_buff *xdp = &mxbuf->xdp;
316 u32 act;
317 int err;
318
319 act = bpf_prog_run_xdp(prog, xdp);
320 switch (act) {
321 case XDP_PASS:
322 return false;
323 case XDP_TX:
324 if (unlikely(!mlx5e_xmit_xdp_buff(rq->xdpsq, rq, xdp)))
325 goto xdp_abort;
326 __set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags); /* non-atomic */
327 return true;
328 case XDP_REDIRECT:
329 /* When XDP enabled then page-refcnt==1 here */
330 err = xdp_do_redirect(rq->netdev, xdp, prog);
331 if (unlikely(err))
332 goto xdp_abort;
333 __set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags);
334 __set_bit(MLX5E_RQ_FLAG_XDP_REDIRECT, rq->flags);
335 rq->stats->xdp_redirect++;
336 return true;
337 default:
338 bpf_warn_invalid_xdp_action(rq->netdev, prog, act);
339 fallthrough;
340 case XDP_ABORTED:
341 xdp_abort:
342 trace_xdp_exception(rq->netdev, prog, act);
343 fallthrough;
344 case XDP_DROP:
345 rq->stats->xdp_drop++;
346 return true;
347 }
348 }
349
mlx5e_xdpsq_get_next_pi(struct mlx5e_xdpsq * sq,u16 size)350 static u16 mlx5e_xdpsq_get_next_pi(struct mlx5e_xdpsq *sq, u16 size)
351 {
352 struct mlx5_wq_cyc *wq = &sq->wq;
353 u16 pi, contig_wqebbs;
354
355 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
356 contig_wqebbs = mlx5_wq_cyc_get_contig_wqebbs(wq, pi);
357 if (unlikely(contig_wqebbs < size)) {
358 struct mlx5e_xdp_wqe_info *wi, *edge_wi;
359
360 wi = &sq->db.wqe_info[pi];
361 edge_wi = wi + contig_wqebbs;
362
363 /* Fill SQ frag edge with NOPs to avoid WQE wrapping two pages. */
364 for (; wi < edge_wi; wi++) {
365 *wi = (struct mlx5e_xdp_wqe_info) {
366 .num_wqebbs = 1,
367 .num_pkts = 0,
368 };
369 mlx5e_post_nop(wq, sq->sqn, &sq->pc);
370 }
371 sq->stats->nops += contig_wqebbs;
372
373 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
374 }
375
376 return pi;
377 }
378
mlx5e_xdp_mpwqe_session_start(struct mlx5e_xdpsq * sq)379 static void mlx5e_xdp_mpwqe_session_start(struct mlx5e_xdpsq *sq)
380 {
381 struct mlx5e_tx_mpwqe *session = &sq->mpwqe;
382 struct mlx5e_xdpsq_stats *stats = sq->stats;
383 struct mlx5e_tx_wqe *wqe;
384 u16 pi;
385
386 pi = mlx5e_xdpsq_get_next_pi(sq, sq->max_sq_mpw_wqebbs);
387 wqe = MLX5E_TX_FETCH_WQE(sq, pi);
388 net_prefetchw(wqe->data);
389
390 *session = (struct mlx5e_tx_mpwqe) {
391 .wqe = wqe,
392 .bytes_count = 0,
393 .ds_count = MLX5E_TX_WQE_EMPTY_DS_COUNT,
394 .ds_count_max = sq->max_sq_mpw_wqebbs * MLX5_SEND_WQEBB_NUM_DS,
395 .pkt_count = 0,
396 .inline_on = mlx5e_xdp_get_inline_state(sq, session->inline_on),
397 };
398
399 stats->mpwqe++;
400 }
401
mlx5e_xdp_mpwqe_complete(struct mlx5e_xdpsq * sq)402 void mlx5e_xdp_mpwqe_complete(struct mlx5e_xdpsq *sq)
403 {
404 struct mlx5_wq_cyc *wq = &sq->wq;
405 struct mlx5e_tx_mpwqe *session = &sq->mpwqe;
406 struct mlx5_wqe_ctrl_seg *cseg = &session->wqe->ctrl;
407 u16 ds_count = session->ds_count;
408 u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
409 struct mlx5e_xdp_wqe_info *wi = &sq->db.wqe_info[pi];
410
411 cseg->opmod_idx_opcode =
412 cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_ENHANCED_MPSW);
413 cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_count);
414
415 wi->num_wqebbs = DIV_ROUND_UP(ds_count, MLX5_SEND_WQEBB_NUM_DS);
416 wi->num_pkts = session->pkt_count;
417
418 sq->pc += wi->num_wqebbs;
419
420 sq->doorbell_cseg = cseg;
421
422 session->wqe = NULL; /* Close session */
423 }
424
425 enum {
426 MLX5E_XDP_CHECK_OK = 1,
427 MLX5E_XDP_CHECK_START_MPWQE = 2,
428 };
429
mlx5e_xmit_xdp_frame_check_mpwqe(struct mlx5e_xdpsq * sq)430 INDIRECT_CALLABLE_SCOPE int mlx5e_xmit_xdp_frame_check_mpwqe(struct mlx5e_xdpsq *sq)
431 {
432 if (unlikely(!sq->mpwqe.wqe)) {
433 if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc,
434 sq->stop_room))) {
435 /* SQ is full, ring doorbell */
436 mlx5e_xmit_xdp_doorbell(sq);
437 sq->stats->full++;
438 return -EBUSY;
439 }
440
441 return MLX5E_XDP_CHECK_START_MPWQE;
442 }
443
444 return MLX5E_XDP_CHECK_OK;
445 }
446
447 INDIRECT_CALLABLE_SCOPE bool
448 mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd,
449 int check_result, struct xsk_tx_metadata *meta);
450
451 INDIRECT_CALLABLE_SCOPE bool
mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq * sq,struct mlx5e_xmit_data * xdptxd,int check_result,struct xsk_tx_metadata * meta)452 mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd,
453 int check_result, struct xsk_tx_metadata *meta)
454 {
455 struct mlx5e_tx_mpwqe *session = &sq->mpwqe;
456 struct mlx5e_xdpsq_stats *stats = sq->stats;
457 struct mlx5e_xmit_data *p = xdptxd;
458 struct mlx5e_xmit_data tmp;
459
460 if (xdptxd->has_frags) {
461 struct mlx5e_xmit_data_frags *xdptxdf =
462 container_of(xdptxd, struct mlx5e_xmit_data_frags, xd);
463
464 if (!!xdptxd->len + xdptxdf->sinfo->nr_frags > 1) {
465 /* MPWQE is enabled, but a multi-buffer packet is queued for
466 * transmission. MPWQE can't send fragmented packets, so close
467 * the current session and fall back to a regular WQE.
468 */
469 if (unlikely(sq->mpwqe.wqe))
470 mlx5e_xdp_mpwqe_complete(sq);
471 return mlx5e_xmit_xdp_frame(sq, xdptxd, 0, meta);
472 }
473 if (!xdptxd->len) {
474 skb_frag_t *frag = &xdptxdf->sinfo->frags[0];
475
476 tmp.data = skb_frag_address(frag);
477 tmp.len = skb_frag_size(frag);
478 tmp.dma_addr = xdptxdf->dma_arr ? xdptxdf->dma_arr[0] :
479 page_pool_get_dma_addr(skb_frag_page(frag)) +
480 skb_frag_off(frag);
481 p = &tmp;
482 }
483 }
484
485 if (unlikely(p->len > sq->hw_mtu)) {
486 stats->err++;
487 return false;
488 }
489
490 if (!check_result)
491 check_result = mlx5e_xmit_xdp_frame_check_mpwqe(sq);
492 if (unlikely(check_result < 0))
493 return false;
494
495 if (check_result == MLX5E_XDP_CHECK_START_MPWQE) {
496 /* Start the session when nothing can fail, so it's guaranteed
497 * that if there is an active session, it has at least one dseg,
498 * and it's safe to complete it at any time.
499 */
500 mlx5e_xdp_mpwqe_session_start(sq);
501 xsk_tx_metadata_request(meta, &mlx5e_xsk_tx_metadata_ops, &session->wqe->eth);
502 }
503
504 mlx5e_xdp_mpwqe_add_dseg(sq, p, stats);
505
506 if (unlikely(mlx5e_xdp_mpwqe_is_full(session)))
507 mlx5e_xdp_mpwqe_complete(sq);
508
509 stats->xmit++;
510 return true;
511 }
512
mlx5e_xmit_xdp_frame_check_stop_room(struct mlx5e_xdpsq * sq,int stop_room)513 static int mlx5e_xmit_xdp_frame_check_stop_room(struct mlx5e_xdpsq *sq, int stop_room)
514 {
515 if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, stop_room))) {
516 /* SQ is full, ring doorbell */
517 mlx5e_xmit_xdp_doorbell(sq);
518 sq->stats->full++;
519 return -EBUSY;
520 }
521
522 return MLX5E_XDP_CHECK_OK;
523 }
524
mlx5e_xmit_xdp_frame_check(struct mlx5e_xdpsq * sq)525 INDIRECT_CALLABLE_SCOPE int mlx5e_xmit_xdp_frame_check(struct mlx5e_xdpsq *sq)
526 {
527 return mlx5e_xmit_xdp_frame_check_stop_room(sq, 1);
528 }
529
530 INDIRECT_CALLABLE_SCOPE bool
mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq * sq,struct mlx5e_xmit_data * xdptxd,int check_result,struct xsk_tx_metadata * meta)531 mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd,
532 int check_result, struct xsk_tx_metadata *meta)
533 {
534 struct mlx5e_xmit_data_frags *xdptxdf =
535 container_of(xdptxd, struct mlx5e_xmit_data_frags, xd);
536 struct mlx5_wq_cyc *wq = &sq->wq;
537 struct mlx5_wqe_ctrl_seg *cseg;
538 struct mlx5_wqe_data_seg *dseg;
539 struct mlx5_wqe_eth_seg *eseg;
540 struct mlx5e_tx_wqe *wqe;
541
542 dma_addr_t dma_addr = xdptxd->dma_addr;
543 u32 dma_len = xdptxd->len;
544 u16 ds_cnt, inline_hdr_sz;
545 unsigned int frags_size;
546 u8 num_wqebbs = 1;
547 int num_frags = 0;
548 bool inline_ok;
549 bool linear;
550 u16 pi;
551 int i;
552
553 struct mlx5e_xdpsq_stats *stats = sq->stats;
554
555 inline_ok = sq->min_inline_mode == MLX5_INLINE_MODE_NONE ||
556 dma_len >= MLX5E_XDP_MIN_INLINE;
557 frags_size = xdptxd->has_frags ? xdptxdf->sinfo->xdp_frags_size : 0;
558
559 if (unlikely(!inline_ok || sq->hw_mtu < dma_len + frags_size)) {
560 stats->err++;
561 return false;
562 }
563
564 inline_hdr_sz = 0;
565 if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE)
566 inline_hdr_sz = MLX5E_XDP_MIN_INLINE;
567
568 linear = !!(dma_len - inline_hdr_sz);
569 ds_cnt = MLX5E_TX_WQE_EMPTY_DS_COUNT + linear + !!inline_hdr_sz;
570
571 /* check_result must be 0 if xdptxd->has_frags is true. */
572 if (!check_result) {
573 int stop_room = 1;
574
575 if (xdptxd->has_frags) {
576 ds_cnt += xdptxdf->sinfo->nr_frags;
577 num_frags = xdptxdf->sinfo->nr_frags;
578 num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);
579 /* Assuming MLX5_CAP_GEN(mdev, max_wqe_sz_sq) is big
580 * enough to hold all fragments.
581 */
582 stop_room = MLX5E_STOP_ROOM(num_wqebbs);
583 }
584
585 check_result = mlx5e_xmit_xdp_frame_check_stop_room(sq, stop_room);
586 }
587 if (unlikely(check_result < 0))
588 return false;
589
590 pi = mlx5e_xdpsq_get_next_pi(sq, num_wqebbs);
591 wqe = mlx5_wq_cyc_get_wqe(wq, pi);
592 net_prefetchw(wqe);
593
594 cseg = &wqe->ctrl;
595 eseg = &wqe->eth;
596 dseg = wqe->data;
597
598 /* copy the inline part if required */
599 if (inline_hdr_sz) {
600 memcpy(eseg->inline_hdr.start, xdptxd->data, sizeof(eseg->inline_hdr.start));
601 memcpy(dseg, xdptxd->data + sizeof(eseg->inline_hdr.start),
602 inline_hdr_sz - sizeof(eseg->inline_hdr.start));
603 dma_len -= inline_hdr_sz;
604 dma_addr += inline_hdr_sz;
605 dseg++;
606 }
607
608 /* write the dma part */
609 if (linear) {
610 dseg->addr = cpu_to_be64(dma_addr);
611 dseg->byte_count = cpu_to_be32(dma_len);
612 dseg->lkey = sq->mkey_be;
613 dseg++;
614 }
615
616 cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_SEND);
617
618 memset(&cseg->trailer, 0, sizeof(cseg->trailer));
619 memset(eseg, 0, sizeof(*eseg) - sizeof(eseg->trailer));
620
621 eseg->inline_hdr.sz = cpu_to_be16(inline_hdr_sz);
622
623 for (i = 0; i < num_frags; i++) {
624 skb_frag_t *frag = &xdptxdf->sinfo->frags[i];
625 dma_addr_t addr;
626
627 addr = xdptxdf->dma_arr ? xdptxdf->dma_arr[i] :
628 page_pool_get_dma_addr(skb_frag_page(frag)) +
629 skb_frag_off(frag);
630
631 dseg->addr = cpu_to_be64(addr);
632 dseg->byte_count = cpu_to_be32(skb_frag_size(frag));
633 dseg->lkey = sq->mkey_be;
634 dseg++;
635 }
636
637 cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt);
638
639 sq->db.wqe_info[pi] = (struct mlx5e_xdp_wqe_info) {
640 .num_wqebbs = num_wqebbs,
641 .num_pkts = 1,
642 };
643
644 sq->pc += num_wqebbs;
645
646 xsk_tx_metadata_request(meta, &mlx5e_xsk_tx_metadata_ops, eseg);
647
648 sq->doorbell_cseg = cseg;
649
650 stats->xmit++;
651 return true;
652 }
653
mlx5e_free_xdpsq_desc(struct mlx5e_xdpsq * sq,struct mlx5e_xdp_wqe_info * wi,u32 * xsk_frames,struct xdp_frame_bulk * bq,struct mlx5e_cq * cq,struct mlx5_cqe64 * cqe)654 static void mlx5e_free_xdpsq_desc(struct mlx5e_xdpsq *sq,
655 struct mlx5e_xdp_wqe_info *wi,
656 u32 *xsk_frames,
657 struct xdp_frame_bulk *bq,
658 struct mlx5e_cq *cq,
659 struct mlx5_cqe64 *cqe)
660 {
661 struct mlx5e_xdp_info_fifo *xdpi_fifo = &sq->db.xdpi_fifo;
662 u16 i;
663
664 for (i = 0; i < wi->num_pkts; i++) {
665 union mlx5e_xdp_info xdpi = mlx5e_xdpi_fifo_pop(xdpi_fifo);
666
667 switch (xdpi.mode) {
668 case MLX5E_XDP_XMIT_MODE_FRAME: {
669 /* XDP_TX from the XSK RQ and XDP_REDIRECT */
670 struct xdp_frame *xdpf;
671 dma_addr_t dma_addr;
672
673 xdpi = mlx5e_xdpi_fifo_pop(xdpi_fifo);
674 xdpf = xdpi.frame.xdpf;
675 xdpi = mlx5e_xdpi_fifo_pop(xdpi_fifo);
676 dma_addr = xdpi.frame.dma_addr;
677
678 dma_unmap_single(sq->pdev, dma_addr,
679 xdpf->len, DMA_TO_DEVICE);
680 if (xdp_frame_has_frags(xdpf)) {
681 struct skb_shared_info *sinfo;
682 int j;
683
684 sinfo = xdp_get_shared_info_from_frame(xdpf);
685 for (j = 0; j < sinfo->nr_frags; j++) {
686 skb_frag_t *frag = &sinfo->frags[j];
687
688 xdpi = mlx5e_xdpi_fifo_pop(xdpi_fifo);
689 dma_addr = xdpi.frame.dma_addr;
690
691 dma_unmap_single(sq->pdev, dma_addr,
692 skb_frag_size(frag), DMA_TO_DEVICE);
693 }
694 }
695 xdp_return_frame_bulk(xdpf, bq);
696 break;
697 }
698 case MLX5E_XDP_XMIT_MODE_PAGE: {
699 /* XDP_TX from the regular RQ */
700 u8 num, n = 0;
701
702 xdpi = mlx5e_xdpi_fifo_pop(xdpi_fifo);
703 num = xdpi.page.num;
704
705 do {
706 struct page *page;
707
708 xdpi = mlx5e_xdpi_fifo_pop(xdpi_fifo);
709 page = xdpi.page.page;
710
711 /* No need to check page_pool_page_is_pp() as we
712 * know this is a page_pool page.
713 */
714 page_pool_recycle_direct(pp_page_to_nmdesc(page)->pp,
715 page);
716 } while (++n < num);
717
718 break;
719 }
720 case MLX5E_XDP_XMIT_MODE_XSK: {
721 /* AF_XDP send */
722 struct xsk_tx_metadata_compl *compl = NULL;
723 struct mlx5e_xsk_tx_complete priv = {
724 .cqe = cqe,
725 .cq = cq,
726 };
727
728 if (xp_tx_metadata_enabled(sq->xsk_pool)) {
729 xdpi = mlx5e_xdpi_fifo_pop(xdpi_fifo);
730 compl = &xdpi.xsk_meta;
731
732 xsk_tx_metadata_complete(compl, &mlx5e_xsk_tx_metadata_ops, &priv);
733 }
734
735 (*xsk_frames)++;
736 break;
737 }
738 default:
739 WARN_ON_ONCE(true);
740 }
741 }
742 }
743
mlx5e_poll_xdpsq_cq(struct mlx5e_cq * cq)744 bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq)
745 {
746 struct xdp_frame_bulk bq;
747 struct mlx5e_xdpsq *sq;
748 struct mlx5_cqe64 *cqe;
749 u32 xsk_frames = 0;
750 u16 sqcc;
751 int i;
752
753 xdp_frame_bulk_init(&bq);
754
755 sq = container_of(cq, struct mlx5e_xdpsq, cq);
756
757 if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state)))
758 return false;
759
760 cqe = mlx5_cqwq_get_cqe(&cq->wq);
761 if (!cqe)
762 return false;
763
764 /* sq->cc must be updated only after mlx5_cqwq_update_db_record(),
765 * otherwise a cq overrun may occur
766 */
767 sqcc = sq->cc;
768
769 i = 0;
770 do {
771 struct mlx5e_xdp_wqe_info *wi;
772 u16 wqe_counter, ci;
773 bool last_wqe;
774
775 mlx5_cqwq_pop(&cq->wq);
776
777 wqe_counter = be16_to_cpu(cqe->wqe_counter);
778
779 do {
780 last_wqe = (sqcc == wqe_counter);
781 ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc);
782 wi = &sq->db.wqe_info[ci];
783
784 sqcc += wi->num_wqebbs;
785
786 mlx5e_free_xdpsq_desc(sq, wi, &xsk_frames, &bq, cq, cqe);
787 } while (!last_wqe);
788
789 if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_REQ)) {
790 netdev_WARN_ONCE(sq->channel->netdev,
791 "Bad OP in XDPSQ CQE: 0x%x\n",
792 get_cqe_opcode(cqe));
793 mlx5e_dump_error_cqe(&sq->cq, sq->sqn,
794 (struct mlx5_err_cqe *)cqe);
795 mlx5_wq_cyc_wqe_dump(&sq->wq, ci, wi->num_wqebbs);
796 }
797 } while ((++i < MLX5E_TX_CQ_POLL_BUDGET) && (cqe = mlx5_cqwq_get_cqe(&cq->wq)));
798
799 xdp_flush_frame_bulk(&bq);
800
801 if (xsk_frames)
802 xsk_tx_completed(sq->xsk_pool, xsk_frames);
803
804 sq->stats->cqes += i;
805
806 mlx5_cqwq_update_db_record(&cq->wq);
807
808 /* ensure cq space is freed before enabling more cqes */
809 wmb();
810
811 sq->cc = sqcc;
812 return (i == MLX5E_TX_CQ_POLL_BUDGET);
813 }
814
mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq * sq)815 void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq)
816 {
817 struct xdp_frame_bulk bq;
818 u32 xsk_frames = 0;
819
820 xdp_frame_bulk_init(&bq);
821
822 rcu_read_lock(); /* need for xdp_return_frame_bulk */
823
824 while (sq->cc != sq->pc) {
825 struct mlx5e_xdp_wqe_info *wi;
826 u16 ci;
827
828 ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->cc);
829 wi = &sq->db.wqe_info[ci];
830
831 sq->cc += wi->num_wqebbs;
832
833 mlx5e_free_xdpsq_desc(sq, wi, &xsk_frames, &bq, NULL, NULL);
834 }
835
836 xdp_flush_frame_bulk(&bq);
837 rcu_read_unlock();
838
839 if (xsk_frames)
840 xsk_tx_completed(sq->xsk_pool, xsk_frames);
841 }
842
mlx5e_xdp_xmit(struct net_device * dev,int n,struct xdp_frame ** frames,u32 flags)843 int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
844 u32 flags)
845 {
846 struct mlx5e_priv *priv = netdev_priv(dev);
847 struct mlx5e_xdpsq *sq;
848 int nxmit = 0;
849 int sq_num;
850 int i;
851
852 /* this flag is sufficient, no need to test internal sq state */
853 if (unlikely(!mlx5e_xdp_tx_is_enabled(priv)))
854 return -ENETDOWN;
855
856 if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
857 return -EINVAL;
858
859 sq_num = smp_processor_id();
860
861 if (unlikely(sq_num >= priv->channels.num))
862 return -ENXIO;
863
864 sq = priv->channels.c[sq_num]->xdpsq;
865
866 for (i = 0; i < n; i++) {
867 struct mlx5e_xmit_data_frags xdptxdf = {};
868 struct xdp_frame *xdpf = frames[i];
869 dma_addr_t dma_arr[MAX_SKB_FRAGS];
870 struct mlx5e_xmit_data *xdptxd;
871 bool ret;
872
873 xdptxd = &xdptxdf.xd;
874 xdptxd->data = xdpf->data;
875 xdptxd->len = xdpf->len;
876 xdptxd->has_frags = xdp_frame_has_frags(xdpf);
877 xdptxd->dma_addr = dma_map_single(sq->pdev, xdptxd->data,
878 xdptxd->len, DMA_TO_DEVICE);
879
880 if (unlikely(dma_mapping_error(sq->pdev, xdptxd->dma_addr)))
881 break;
882
883 if (xdptxd->has_frags) {
884 int j;
885
886 xdptxdf.sinfo = xdp_get_shared_info_from_frame(xdpf);
887 xdptxdf.dma_arr = dma_arr;
888 for (j = 0; j < xdptxdf.sinfo->nr_frags; j++) {
889 skb_frag_t *frag = &xdptxdf.sinfo->frags[j];
890
891 dma_arr[j] = dma_map_single(sq->pdev, skb_frag_address(frag),
892 skb_frag_size(frag), DMA_TO_DEVICE);
893
894 if (!dma_mapping_error(sq->pdev, dma_arr[j]))
895 continue;
896 /* mapping error */
897 while (--j >= 0)
898 dma_unmap_single(sq->pdev, dma_arr[j],
899 skb_frag_size(&xdptxdf.sinfo->frags[j]),
900 DMA_TO_DEVICE);
901 goto out;
902 }
903 }
904
905 ret = INDIRECT_CALL_2(sq->xmit_xdp_frame, mlx5e_xmit_xdp_frame_mpwqe,
906 mlx5e_xmit_xdp_frame, sq, xdptxd, 0, NULL);
907 if (unlikely(!ret)) {
908 int j;
909
910 dma_unmap_single(sq->pdev, xdptxd->dma_addr,
911 xdptxd->len, DMA_TO_DEVICE);
912 if (!xdptxd->has_frags)
913 break;
914 for (j = 0; j < xdptxdf.sinfo->nr_frags; j++)
915 dma_unmap_single(sq->pdev, dma_arr[j],
916 skb_frag_size(&xdptxdf.sinfo->frags[j]),
917 DMA_TO_DEVICE);
918 break;
919 }
920
921 /* xmit_mode == MLX5E_XDP_XMIT_MODE_FRAME */
922 mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo,
923 (union mlx5e_xdp_info) { .mode = MLX5E_XDP_XMIT_MODE_FRAME });
924 mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo,
925 (union mlx5e_xdp_info) { .frame.xdpf = xdpf });
926 mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo,
927 (union mlx5e_xdp_info) { .frame.dma_addr = xdptxd->dma_addr });
928 if (xdptxd->has_frags) {
929 int j;
930
931 for (j = 0; j < xdptxdf.sinfo->nr_frags; j++)
932 mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo,
933 (union mlx5e_xdp_info)
934 { .frame.dma_addr = dma_arr[j] });
935 }
936 nxmit++;
937 }
938
939 out:
940 if (sq->mpwqe.wqe)
941 mlx5e_xdp_mpwqe_complete(sq);
942
943 if (flags & XDP_XMIT_FLUSH)
944 mlx5e_xmit_xdp_doorbell(sq);
945
946 return nxmit;
947 }
948
mlx5e_xdp_rx_poll_complete(struct mlx5e_rq * rq)949 void mlx5e_xdp_rx_poll_complete(struct mlx5e_rq *rq)
950 {
951 struct mlx5e_xdpsq *xdpsq = rq->xdpsq;
952
953 if (xdpsq->mpwqe.wqe)
954 mlx5e_xdp_mpwqe_complete(xdpsq);
955
956 mlx5e_xmit_xdp_doorbell(xdpsq);
957
958 if (test_bit(MLX5E_RQ_FLAG_XDP_REDIRECT, rq->flags)) {
959 xdp_do_flush();
960 __clear_bit(MLX5E_RQ_FLAG_XDP_REDIRECT, rq->flags);
961 }
962 }
963
mlx5e_set_xmit_fp(struct mlx5e_xdpsq * sq,bool is_mpw)964 void mlx5e_set_xmit_fp(struct mlx5e_xdpsq *sq, bool is_mpw)
965 {
966 sq->xmit_xdp_frame_check = is_mpw ?
967 mlx5e_xmit_xdp_frame_check_mpwqe : mlx5e_xmit_xdp_frame_check;
968 sq->xmit_xdp_frame = is_mpw ?
969 mlx5e_xmit_xdp_frame_mpwqe : mlx5e_xmit_xdp_frame;
970 }
971