1 /* 2 * Copyright (c) 2015-2016, Mellanox Technologies. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 */ 32 33 #include <linux/tcp.h> 34 #include <linux/if_vlan.h> 35 #include <net/geneve.h> 36 #include <net/dsfield.h> 37 #include "en.h" 38 #include "en/txrx.h" 39 #include "ipoib/ipoib.h" 40 #include "en_accel/en_accel.h" 41 #include "en_accel/ipsec_rxtx.h" 42 #include "en_accel/psp_rxtx.h" 43 #include "en_accel/macsec.h" 44 #include "en/ptp.h" 45 #include <net/ipv6.h> 46 47 static void mlx5e_dma_unmap_wqe_err(struct mlx5e_txqsq *sq, u8 num_dma) 48 { 49 int i; 50 51 for (i = 0; i < num_dma; i++) { 52 struct mlx5e_sq_dma *last_pushed_dma = 53 mlx5e_dma_get(sq, --sq->dma_fifo_pc); 54 55 mlx5e_tx_dma_unmap(sq->pdev, last_pushed_dma); 56 } 57 } 58 59 static inline int mlx5e_skb_l2_header_offset(struct sk_buff *skb) 60 { 61 #define MLX5E_MIN_INLINE (ETH_HLEN + VLAN_HLEN) 62 63 return max(skb_network_offset(skb), MLX5E_MIN_INLINE); 64 } 65 66 static inline int mlx5e_skb_l3_header_offset(struct sk_buff *skb) 67 { 68 if (skb_transport_header_was_set(skb)) 69 return skb_transport_offset(skb); 70 else 71 return mlx5e_skb_l2_header_offset(skb); 72 } 73 74 static inline u16 mlx5e_calc_min_inline(enum mlx5_inline_modes mode, 75 struct sk_buff *skb) 76 { 77 u16 hlen; 78 79 switch (mode) { 80 case MLX5_INLINE_MODE_NONE: 81 return 0; 82 case MLX5_INLINE_MODE_TCP_UDP: 83 hlen = eth_get_headlen(skb->dev, skb->data, skb_headlen(skb)); 84 if (hlen == ETH_HLEN && !skb_vlan_tag_present(skb)) 85 hlen += VLAN_HLEN; 86 break; 87 case MLX5_INLINE_MODE_IP: 88 hlen = mlx5e_skb_l3_header_offset(skb); 89 break; 90 case MLX5_INLINE_MODE_L2: 91 default: 92 hlen = mlx5e_skb_l2_header_offset(skb); 93 } 94 return min_t(u16, hlen, skb_headlen(skb)); 95 } 96 97 #define MLX5_UNSAFE_MEMCPY_DISCLAIMER \ 98 "This copy has been bounds-checked earlier in " \ 99 "mlx5i_sq_calc_wqe_attr() and intentionally " \ 100 "crosses a flex array boundary. Since it is " \ 101 "performance sensitive, splitting the copy is " \ 102 "undesirable." 103 104 static inline void mlx5e_insert_vlan(void *start, struct sk_buff *skb, u16 ihs) 105 { 106 struct vlan_ethhdr *vhdr = (struct vlan_ethhdr *)start; 107 int cpy1_sz = 2 * ETH_ALEN; 108 int cpy2_sz = ihs - cpy1_sz; 109 110 memcpy(&vhdr->addrs, skb->data, cpy1_sz); 111 vhdr->h_vlan_proto = skb->vlan_proto; 112 vhdr->h_vlan_TCI = cpu_to_be16(skb_vlan_tag_get(skb)); 113 unsafe_memcpy(&vhdr->h_vlan_encapsulated_proto, 114 skb->data + cpy1_sz, 115 cpy2_sz, 116 MLX5_UNSAFE_MEMCPY_DISCLAIMER); 117 } 118 119 static inline void 120 mlx5e_txwqe_build_eseg_csum(struct mlx5e_txqsq *sq, struct sk_buff *skb, 121 struct mlx5e_accel_tx_state *accel, 122 struct mlx5_wqe_eth_seg *eseg) 123 { 124 #ifdef CONFIG_MLX5_EN_PSP 125 if (unlikely(mlx5e_psp_txwqe_build_eseg_csum(sq, skb, &accel->psp_st, eseg))) 126 return; 127 #endif 128 129 if (unlikely(mlx5e_ipsec_txwqe_build_eseg_csum(sq, skb, eseg))) 130 return; 131 132 if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) { 133 eseg->cs_flags = MLX5_ETH_WQE_L3_CSUM; 134 if (skb->encapsulation) { 135 eseg->cs_flags |= MLX5_ETH_WQE_L3_INNER_CSUM | 136 MLX5_ETH_WQE_L4_INNER_CSUM; 137 sq->stats->csum_partial_inner++; 138 } else { 139 eseg->cs_flags |= MLX5_ETH_WQE_L4_CSUM; 140 sq->stats->csum_partial++; 141 } 142 #ifdef CONFIG_MLX5_EN_TLS 143 } else if (unlikely(accel && accel->tls.tls_tisn)) { 144 eseg->cs_flags = MLX5_ETH_WQE_L3_CSUM | MLX5_ETH_WQE_L4_CSUM; 145 sq->stats->csum_partial++; 146 #endif 147 } else 148 sq->stats->csum_none++; 149 } 150 151 /* Returns the number of header bytes that we plan 152 * to inline later in the transmit descriptor 153 */ 154 static inline u16 155 mlx5e_tx_get_gso_ihs(struct mlx5e_txqsq *sq, struct sk_buff *skb) 156 { 157 struct mlx5e_sq_stats *stats = sq->stats; 158 u16 ihs; 159 160 if (skb->encapsulation) { 161 if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) 162 ihs = skb_inner_transport_offset(skb) + 163 sizeof(struct udphdr); 164 else 165 ihs = skb_inner_tcp_all_headers(skb); 166 stats->tso_inner_packets++; 167 stats->tso_inner_bytes += skb->len; 168 } else { 169 if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) 170 ihs = skb_transport_offset(skb) + sizeof(struct udphdr); 171 else 172 ihs = skb_tcp_all_headers(skb); 173 stats->tso_packets++; 174 stats->tso_bytes += skb->len; 175 } 176 177 return ihs; 178 } 179 180 static inline int 181 mlx5e_txwqe_build_dsegs(struct mlx5e_txqsq *sq, struct sk_buff *skb, 182 unsigned char *skb_data, u16 headlen, 183 struct mlx5_wqe_data_seg *dseg) 184 { 185 dma_addr_t dma_addr = 0; 186 u8 num_dma = 0; 187 int i; 188 189 if (headlen) { 190 dma_addr = dma_map_single(sq->pdev, skb_data, headlen, 191 DMA_TO_DEVICE); 192 if (unlikely(dma_mapping_error(sq->pdev, dma_addr))) 193 goto dma_unmap_wqe_err; 194 195 dseg->addr = cpu_to_be64(dma_addr); 196 dseg->lkey = sq->mkey_be; 197 dseg->byte_count = cpu_to_be32(headlen); 198 199 mlx5e_dma_push_single(sq, dma_addr, headlen); 200 num_dma++; 201 dseg++; 202 } 203 204 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 205 skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; 206 int fsz = skb_frag_size(frag); 207 208 dma_addr = skb_frag_dma_map(sq->pdev, frag, 0, fsz, 209 DMA_TO_DEVICE); 210 if (unlikely(dma_mapping_error(sq->pdev, dma_addr))) 211 goto dma_unmap_wqe_err; 212 213 dseg->addr = cpu_to_be64(dma_addr); 214 dseg->lkey = sq->mkey_be; 215 dseg->byte_count = cpu_to_be32(fsz); 216 217 mlx5e_dma_push_netmem(sq, skb_frag_netmem(frag), dma_addr, fsz); 218 num_dma++; 219 dseg++; 220 } 221 222 return num_dma; 223 224 dma_unmap_wqe_err: 225 mlx5e_dma_unmap_wqe_err(sq, num_dma); 226 return -ENOMEM; 227 } 228 229 struct mlx5e_tx_attr { 230 u32 num_bytes; 231 u16 headlen; 232 u16 ihs; 233 __be16 mss; 234 u16 insz; 235 u8 opcode; 236 }; 237 238 struct mlx5e_tx_wqe_attr { 239 u16 ds_cnt; 240 u16 ds_cnt_inl; 241 u16 ds_cnt_ids; 242 u8 num_wqebbs; 243 }; 244 245 static u8 246 mlx5e_tx_wqe_inline_mode(struct mlx5e_txqsq *sq, struct sk_buff *skb, 247 struct mlx5e_accel_tx_state *accel) 248 { 249 u8 mode; 250 251 #ifdef CONFIG_MLX5_EN_TLS 252 if (accel->tls.tls_tisn) 253 return MLX5_INLINE_MODE_TCP_UDP; 254 #endif 255 256 mode = sq->min_inline_mode; 257 258 if (skb_vlan_tag_present(skb)) 259 mode = max_t(u8, MLX5_INLINE_MODE_L2, mode); 260 261 return mode; 262 } 263 264 static void mlx5e_sq_xmit_prepare(struct mlx5e_txqsq *sq, struct sk_buff *skb, 265 struct mlx5e_accel_tx_state *accel, 266 struct mlx5e_tx_attr *attr) 267 { 268 struct mlx5e_sq_stats *stats = sq->stats; 269 270 if (skb_is_gso(skb)) { 271 u16 ihs = mlx5e_tx_get_gso_ihs(sq, skb); 272 273 *attr = (struct mlx5e_tx_attr) { 274 .opcode = MLX5_OPCODE_LSO, 275 .mss = cpu_to_be16(skb_shinfo(skb)->gso_size), 276 .ihs = ihs, 277 .num_bytes = skb->len + (skb_shinfo(skb)->gso_segs - 1) * ihs, 278 .headlen = skb_headlen(skb) - ihs, 279 }; 280 281 stats->packets += skb_shinfo(skb)->gso_segs; 282 } else { 283 u8 mode = mlx5e_tx_wqe_inline_mode(sq, skb, accel); 284 u16 ihs = mlx5e_calc_min_inline(mode, skb); 285 286 *attr = (struct mlx5e_tx_attr) { 287 .opcode = MLX5_OPCODE_SEND, 288 .mss = cpu_to_be16(0), 289 .ihs = ihs, 290 .num_bytes = max_t(unsigned int, skb->len, ETH_ZLEN), 291 .headlen = skb_headlen(skb) - ihs, 292 }; 293 294 stats->packets++; 295 } 296 297 attr->insz = mlx5e_accel_tx_ids_len(sq, skb, accel); 298 stats->bytes += attr->num_bytes; 299 } 300 301 static void mlx5e_sq_calc_wqe_attr(struct sk_buff *skb, const struct mlx5e_tx_attr *attr, 302 struct mlx5e_tx_wqe_attr *wqe_attr) 303 { 304 u16 ds_cnt = MLX5E_TX_WQE_EMPTY_DS_COUNT; 305 u16 ds_cnt_inl = 0; 306 u16 ds_cnt_ids = 0; 307 308 /* Sync the calculation with MLX5E_MAX_TX_WQEBBS. */ 309 310 if (attr->insz) 311 ds_cnt_ids = DIV_ROUND_UP(sizeof(struct mlx5_wqe_inline_seg) + attr->insz, 312 MLX5_SEND_WQE_DS); 313 314 ds_cnt += !!attr->headlen + skb_shinfo(skb)->nr_frags + ds_cnt_ids; 315 if (attr->ihs) { 316 u16 inl = attr->ihs - INL_HDR_START_SZ; 317 318 if (skb_vlan_tag_present(skb)) 319 inl += VLAN_HLEN; 320 321 ds_cnt_inl = DIV_ROUND_UP(inl, MLX5_SEND_WQE_DS); 322 if (WARN_ON_ONCE(ds_cnt_inl > MLX5E_MAX_TX_INLINE_DS)) 323 netdev_warn(skb->dev, "ds_cnt_inl = %u > max %u\n", ds_cnt_inl, 324 (u16)MLX5E_MAX_TX_INLINE_DS); 325 ds_cnt += ds_cnt_inl; 326 } 327 328 *wqe_attr = (struct mlx5e_tx_wqe_attr) { 329 .ds_cnt = ds_cnt, 330 .ds_cnt_inl = ds_cnt_inl, 331 .ds_cnt_ids = ds_cnt_ids, 332 .num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS), 333 }; 334 } 335 336 static void mlx5e_tx_skb_update_ts_flags(struct sk_buff *skb) 337 { 338 if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) 339 skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; 340 skb_tx_timestamp(skb); 341 } 342 343 static void mlx5e_tx_check_stop(struct mlx5e_txqsq *sq) 344 { 345 if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, sq->stop_room))) { 346 netif_tx_stop_queue(sq->txq); 347 sq->stats->stopped++; 348 } 349 } 350 351 static void mlx5e_tx_flush(struct mlx5e_txqsq *sq) 352 { 353 struct mlx5e_tx_wqe_info *wi; 354 struct mlx5e_tx_wqe *wqe; 355 u16 pi; 356 357 /* Must not be called when a MPWQE session is active but empty. */ 358 mlx5e_tx_mpwqe_ensure_complete(sq); 359 360 pi = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->pc); 361 wi = &sq->db.wqe_info[pi]; 362 363 *wi = (struct mlx5e_tx_wqe_info) { 364 .num_wqebbs = 1, 365 }; 366 367 wqe = mlx5e_post_nop(&sq->wq, sq->sqn, &sq->pc); 368 mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, &wqe->ctrl); 369 } 370 371 static inline void 372 mlx5e_txwqe_complete(struct mlx5e_txqsq *sq, struct sk_buff *skb, 373 const struct mlx5e_tx_attr *attr, 374 const struct mlx5e_tx_wqe_attr *wqe_attr, u8 num_dma, 375 struct mlx5e_tx_wqe_info *wi, struct mlx5_wqe_ctrl_seg *cseg, 376 struct mlx5_wqe_eth_seg *eseg, bool xmit_more) 377 { 378 struct mlx5_wq_cyc *wq = &sq->wq; 379 bool send_doorbell; 380 381 *wi = (struct mlx5e_tx_wqe_info) { 382 .skb = skb, 383 .num_bytes = attr->num_bytes, 384 .num_dma = num_dma, 385 .num_wqebbs = wqe_attr->num_wqebbs, 386 .num_fifo_pkts = 0, 387 }; 388 389 cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | attr->opcode); 390 cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | wqe_attr->ds_cnt); 391 392 mlx5e_tx_skb_update_ts_flags(skb); 393 394 sq->pc += wi->num_wqebbs; 395 396 mlx5e_tx_check_stop(sq); 397 398 if (unlikely(sq->ptpsq && 399 (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP))) { 400 u8 metadata_index = be32_to_cpu(eseg->flow_table_metadata); 401 402 mlx5e_ptp_metadata_fifo_pop(&sq->ptpsq->metadata_freelist); 403 404 mlx5e_skb_cb_hwtstamp_init(skb); 405 mlx5e_ptp_metadata_map_put(&sq->ptpsq->metadata_map, skb, 406 metadata_index); 407 /* ensure skb is put on metadata_map before tracking the index */ 408 wmb(); 409 mlx5e_ptpsq_track_metadata(sq->ptpsq, metadata_index); 410 if (!netif_tx_queue_stopped(sq->txq) && 411 mlx5e_ptpsq_metadata_freelist_empty(sq->ptpsq)) { 412 netif_tx_stop_queue(sq->txq); 413 sq->stats->stopped++; 414 } 415 skb_get(skb); 416 } 417 418 send_doorbell = __netdev_tx_sent_queue(sq->txq, attr->num_bytes, xmit_more); 419 if (send_doorbell) 420 mlx5e_notify_hw(wq, sq->pc, sq->uar_map, cseg); 421 } 422 423 static void 424 mlx5e_sq_xmit_wqe(struct mlx5e_txqsq *sq, struct sk_buff *skb, 425 const struct mlx5e_tx_attr *attr, const struct mlx5e_tx_wqe_attr *wqe_attr, 426 struct mlx5e_tx_wqe *wqe, u16 pi, bool xmit_more) 427 { 428 struct mlx5_wqe_ctrl_seg *cseg; 429 struct mlx5_wqe_eth_seg *eseg; 430 struct mlx5_wqe_data_seg *dseg; 431 struct mlx5e_tx_wqe_info *wi; 432 u16 ihs = attr->ihs; 433 struct mlx5e_sq_stats *stats = sq->stats; 434 int num_dma; 435 436 stats->xmit_more += xmit_more; 437 438 /* fill wqe */ 439 wi = &sq->db.wqe_info[pi]; 440 cseg = &wqe->ctrl; 441 eseg = &wqe->eth; 442 dseg = wqe->data; 443 444 eseg->mss = attr->mss; 445 446 if (ihs) { 447 u8 *start = eseg->inline_hdr.start; 448 449 if (skb_vlan_tag_present(skb)) { 450 mlx5e_insert_vlan(start, skb, ihs); 451 ihs += VLAN_HLEN; 452 stats->added_vlan_packets++; 453 } else { 454 unsafe_memcpy(eseg->inline_hdr.start, skb->data, 455 attr->ihs, 456 MLX5_UNSAFE_MEMCPY_DISCLAIMER); 457 } 458 eseg->inline_hdr.sz |= cpu_to_be16(ihs); 459 dseg += wqe_attr->ds_cnt_inl; 460 } 461 462 dseg += wqe_attr->ds_cnt_ids; 463 num_dma = mlx5e_txwqe_build_dsegs(sq, skb, skb->data + attr->ihs, 464 attr->headlen, dseg); 465 if (unlikely(num_dma < 0)) 466 goto err_drop; 467 468 mlx5e_txwqe_complete(sq, skb, attr, wqe_attr, num_dma, wi, cseg, eseg, xmit_more); 469 470 return; 471 472 err_drop: 473 stats->dropped++; 474 dev_kfree_skb_any(skb); 475 mlx5e_tx_flush(sq); 476 } 477 478 static bool mlx5e_tx_skb_supports_mpwqe(struct sk_buff *skb, struct mlx5e_tx_attr *attr) 479 { 480 return !skb_is_nonlinear(skb) && !skb_vlan_tag_present(skb) && !attr->ihs && 481 !attr->insz && !mlx5e_macsec_skb_is_offload(skb); 482 } 483 484 static bool mlx5e_tx_mpwqe_same_eseg(struct mlx5e_txqsq *sq, struct mlx5_wqe_eth_seg *eseg) 485 { 486 struct mlx5e_tx_mpwqe *session = &sq->mpwqe; 487 488 /* Assumes the session is already running and has at least one packet. */ 489 return !memcmp(&session->wqe->eth, eseg, MLX5E_ACCEL_ESEG_LEN); 490 } 491 492 static void mlx5e_tx_mpwqe_session_start(struct mlx5e_txqsq *sq, 493 struct mlx5_wqe_eth_seg *eseg) 494 { 495 struct mlx5e_tx_mpwqe *session = &sq->mpwqe; 496 struct mlx5e_tx_wqe *wqe; 497 u16 pi, num_wqebbs; 498 499 pi = mlx5e_txqsq_get_next_pi_anysize(sq, &num_wqebbs); 500 wqe = MLX5E_TX_FETCH_WQE(sq, pi); 501 net_prefetchw(wqe->data); 502 503 *session = (struct mlx5e_tx_mpwqe) { 504 .wqe = wqe, 505 .bytes_count = 0, 506 .ds_count = MLX5E_TX_WQE_EMPTY_DS_COUNT, 507 .ds_count_max = num_wqebbs * MLX5_SEND_WQEBB_NUM_DS, 508 .pkt_count = 0, 509 .inline_on = 0, 510 }; 511 512 memcpy(&session->wqe->eth, eseg, MLX5E_ACCEL_ESEG_LEN); 513 514 sq->stats->mpwqe_blks++; 515 } 516 517 static bool mlx5e_tx_mpwqe_session_is_active(struct mlx5e_txqsq *sq) 518 { 519 return sq->mpwqe.wqe; 520 } 521 522 static void mlx5e_tx_mpwqe_add_dseg(struct mlx5e_txqsq *sq, struct mlx5e_xmit_data *txd) 523 { 524 struct mlx5e_tx_mpwqe *session = &sq->mpwqe; 525 struct mlx5_wqe_data_seg *dseg; 526 527 dseg = (struct mlx5_wqe_data_seg *)session->wqe + session->ds_count; 528 529 session->pkt_count++; 530 session->bytes_count += txd->len; 531 532 dseg->addr = cpu_to_be64(txd->dma_addr); 533 dseg->byte_count = cpu_to_be32(txd->len); 534 dseg->lkey = sq->mkey_be; 535 session->ds_count++; 536 537 sq->stats->mpwqe_pkts++; 538 } 539 540 static struct mlx5_wqe_ctrl_seg *mlx5e_tx_mpwqe_session_complete(struct mlx5e_txqsq *sq) 541 { 542 struct mlx5e_tx_mpwqe *session = &sq->mpwqe; 543 u8 ds_count = session->ds_count; 544 struct mlx5_wqe_ctrl_seg *cseg; 545 struct mlx5e_tx_wqe_info *wi; 546 u16 pi; 547 548 cseg = &session->wqe->ctrl; 549 cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_ENHANCED_MPSW); 550 cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_count); 551 552 pi = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->pc); 553 wi = &sq->db.wqe_info[pi]; 554 *wi = (struct mlx5e_tx_wqe_info) { 555 .skb = NULL, 556 .num_bytes = session->bytes_count, 557 .num_wqebbs = DIV_ROUND_UP(ds_count, MLX5_SEND_WQEBB_NUM_DS), 558 .num_dma = session->pkt_count, 559 .num_fifo_pkts = session->pkt_count, 560 }; 561 562 sq->pc += wi->num_wqebbs; 563 564 session->wqe = NULL; 565 566 mlx5e_tx_check_stop(sq); 567 568 return cseg; 569 } 570 571 static void 572 mlx5e_sq_xmit_mpwqe(struct mlx5e_txqsq *sq, struct sk_buff *skb, 573 struct mlx5_wqe_eth_seg *eseg, bool xmit_more) 574 { 575 struct mlx5_wqe_ctrl_seg *cseg; 576 struct mlx5e_xmit_data txd; 577 578 txd.data = skb->data; 579 txd.len = skb->len; 580 581 txd.dma_addr = dma_map_single(sq->pdev, txd.data, txd.len, DMA_TO_DEVICE); 582 if (unlikely(dma_mapping_error(sq->pdev, txd.dma_addr))) 583 goto err_unmap; 584 585 if (!mlx5e_tx_mpwqe_session_is_active(sq)) { 586 mlx5e_tx_mpwqe_session_start(sq, eseg); 587 } else if (!mlx5e_tx_mpwqe_same_eseg(sq, eseg)) { 588 mlx5e_tx_mpwqe_session_complete(sq); 589 mlx5e_tx_mpwqe_session_start(sq, eseg); 590 } 591 592 sq->stats->xmit_more += xmit_more; 593 594 mlx5e_dma_push_single(sq, txd.dma_addr, txd.len); 595 mlx5e_skb_fifo_push(&sq->db.skb_fifo, skb); 596 mlx5e_tx_mpwqe_add_dseg(sq, &txd); 597 mlx5e_tx_skb_update_ts_flags(skb); 598 599 if (unlikely(mlx5e_tx_mpwqe_is_full(&sq->mpwqe))) { 600 /* Might stop the queue and affect the retval of __netdev_tx_sent_queue. */ 601 cseg = mlx5e_tx_mpwqe_session_complete(sq); 602 603 if (__netdev_tx_sent_queue(sq->txq, txd.len, xmit_more)) 604 mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, cseg); 605 } else if (__netdev_tx_sent_queue(sq->txq, txd.len, xmit_more)) { 606 /* Might stop the queue, but we were asked to ring the doorbell anyway. */ 607 cseg = mlx5e_tx_mpwqe_session_complete(sq); 608 609 mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, cseg); 610 } 611 612 return; 613 614 err_unmap: 615 sq->stats->dropped++; 616 dev_kfree_skb_any(skb); 617 mlx5e_tx_flush(sq); 618 } 619 620 void mlx5e_tx_mpwqe_ensure_complete(struct mlx5e_txqsq *sq) 621 { 622 /* Unlikely in non-MPWQE workloads; not important in MPWQE workloads. */ 623 if (unlikely(mlx5e_tx_mpwqe_session_is_active(sq))) 624 mlx5e_tx_mpwqe_session_complete(sq); 625 } 626 627 static void mlx5e_cqe_ts_id_eseg(struct mlx5e_ptpsq *ptpsq, struct sk_buff *skb, 628 struct mlx5_wqe_eth_seg *eseg) 629 { 630 if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) 631 eseg->flow_table_metadata |= 632 cpu_to_be32(mlx5e_ptp_metadata_fifo_peek(&ptpsq->metadata_freelist)); 633 } 634 635 static void mlx5e_txwqe_build_eseg(struct mlx5e_priv *priv, struct mlx5e_txqsq *sq, 636 struct sk_buff *skb, struct mlx5e_accel_tx_state *accel, 637 struct mlx5_wqe_eth_seg *eseg, u16 ihs) 638 { 639 mlx5e_accel_tx_eseg(priv, skb, accel, eseg, ihs); 640 mlx5e_txwqe_build_eseg_csum(sq, skb, accel, eseg); 641 if (unlikely(sq->ptpsq)) 642 mlx5e_cqe_ts_id_eseg(sq->ptpsq, skb, eseg); 643 } 644 645 netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev) 646 { 647 struct mlx5e_priv *priv = netdev_priv(dev); 648 struct mlx5e_accel_tx_state accel = {}; 649 struct mlx5e_tx_wqe_attr wqe_attr; 650 struct mlx5e_tx_attr attr; 651 struct mlx5e_tx_wqe *wqe; 652 struct mlx5e_txqsq *sq; 653 u16 pi; 654 655 /* All changes to txq2sq are performed in sync with mlx5e_xmit, when the 656 * queue being changed is disabled, and smp_wmb guarantees that the 657 * changes are visible before mlx5e_xmit tries to read from txq2sq. It 658 * guarantees that the value of txq2sq[qid] doesn't change while 659 * mlx5e_xmit is running on queue number qid. smb_wmb is paired with 660 * HARD_TX_LOCK around ndo_start_xmit, which serves as an ACQUIRE. 661 */ 662 sq = priv->txq2sq[skb_get_queue_mapping(skb)]; 663 if (unlikely(!sq)) { 664 /* Two cases when sq can be NULL: 665 * 1. The HTB node is registered, and mlx5e_select_queue 666 * selected its queue ID, but the SQ itself is not yet created. 667 * 2. HTB SQ creation failed. Similar to the previous case, but 668 * the SQ won't be created. 669 */ 670 dev_kfree_skb_any(skb); 671 return NETDEV_TX_OK; 672 } 673 674 /* May send SKBs and WQEs. */ 675 if (unlikely(!mlx5e_accel_tx_begin(dev, sq, skb, &accel))) 676 return NETDEV_TX_OK; 677 678 mlx5e_sq_xmit_prepare(sq, skb, &accel, &attr); 679 680 if (test_bit(MLX5E_SQ_STATE_MPWQE, &sq->state)) { 681 if (mlx5e_tx_skb_supports_mpwqe(skb, &attr)) { 682 struct mlx5_wqe_eth_seg eseg = {}; 683 684 mlx5e_txwqe_build_eseg(priv, sq, skb, &accel, &eseg, attr.ihs); 685 mlx5e_sq_xmit_mpwqe(sq, skb, &eseg, netdev_xmit_more()); 686 return NETDEV_TX_OK; 687 } 688 689 mlx5e_tx_mpwqe_ensure_complete(sq); 690 } 691 692 mlx5e_sq_calc_wqe_attr(skb, &attr, &wqe_attr); 693 pi = mlx5e_txqsq_get_next_pi(sq, wqe_attr.num_wqebbs); 694 wqe = MLX5E_TX_FETCH_WQE(sq, pi); 695 696 /* May update the WQE, but may not post other WQEs. */ 697 mlx5e_accel_tx_finish(sq, wqe, &accel, 698 (struct mlx5_wqe_inline_seg *)(wqe->data + wqe_attr.ds_cnt_inl)); 699 mlx5e_txwqe_build_eseg(priv, sq, skb, &accel, &wqe->eth, attr.ihs); 700 mlx5e_sq_xmit_wqe(sq, skb, &attr, &wqe_attr, wqe, pi, netdev_xmit_more()); 701 702 return NETDEV_TX_OK; 703 } 704 705 static void mlx5e_tx_wi_dma_unmap(struct mlx5e_txqsq *sq, struct mlx5e_tx_wqe_info *wi, 706 u32 *dma_fifo_cc) 707 { 708 int i; 709 710 for (i = 0; i < wi->num_dma; i++) { 711 struct mlx5e_sq_dma *dma = mlx5e_dma_get(sq, (*dma_fifo_cc)++); 712 713 mlx5e_tx_dma_unmap(sq->pdev, dma); 714 } 715 } 716 717 static void mlx5e_consume_skb(struct mlx5e_txqsq *sq, struct sk_buff *skb, 718 struct mlx5_cqe64 *cqe, int napi_budget) 719 { 720 if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) { 721 struct skb_shared_hwtstamps hwts = {}; 722 u64 ts = get_cqe_ts(cqe); 723 724 hwts.hwtstamp = mlx5e_cqe_ts_to_ns(sq->ptp_cyc2time, sq->clock, ts); 725 if (sq->ptpsq) { 726 mlx5e_skb_cb_hwtstamp_handler(skb, MLX5E_SKB_CB_CQE_HWTSTAMP, 727 hwts.hwtstamp, sq->ptpsq); 728 } else { 729 skb_tstamp_tx(skb, &hwts); 730 sq->stats->timestamps++; 731 } 732 } 733 734 napi_consume_skb(skb, napi_budget); 735 } 736 737 static void mlx5e_tx_wi_consume_fifo_skbs(struct mlx5e_txqsq *sq, struct mlx5e_tx_wqe_info *wi, 738 struct mlx5_cqe64 *cqe, int napi_budget) 739 { 740 int i; 741 742 for (i = 0; i < wi->num_fifo_pkts; i++) { 743 struct sk_buff *skb = mlx5e_skb_fifo_pop(&sq->db.skb_fifo); 744 745 mlx5e_consume_skb(sq, skb, cqe, napi_budget); 746 } 747 } 748 749 void mlx5e_txqsq_wake(struct mlx5e_txqsq *sq) 750 { 751 if (netif_tx_queue_stopped(sq->txq) && 752 mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, sq->stop_room) && 753 !mlx5e_ptpsq_metadata_freelist_empty(sq->ptpsq) && 754 !test_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state)) { 755 netif_tx_wake_queue(sq->txq); 756 sq->stats->wake++; 757 } 758 } 759 760 bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) 761 { 762 struct mlx5e_sq_stats *stats; 763 struct mlx5e_txqsq *sq; 764 struct mlx5_cqe64 *cqe; 765 u32 dma_fifo_cc; 766 u32 nbytes; 767 u16 npkts; 768 u16 sqcc; 769 int i; 770 771 sq = container_of(cq, struct mlx5e_txqsq, cq); 772 773 if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state))) 774 return false; 775 776 cqe = mlx5_cqwq_get_cqe(&cq->wq); 777 if (!cqe) 778 return false; 779 780 stats = sq->stats; 781 782 npkts = 0; 783 nbytes = 0; 784 785 /* sq->cc must be updated only after mlx5_cqwq_update_db_record(), 786 * otherwise a cq overrun may occur 787 */ 788 sqcc = sq->cc; 789 790 /* avoid dirtying sq cache line every cqe */ 791 dma_fifo_cc = sq->dma_fifo_cc; 792 793 i = 0; 794 do { 795 struct mlx5e_tx_wqe_info *wi; 796 u16 wqe_counter; 797 bool last_wqe; 798 u16 ci; 799 800 mlx5_cqwq_pop(&cq->wq); 801 802 wqe_counter = be16_to_cpu(cqe->wqe_counter); 803 804 do { 805 last_wqe = (sqcc == wqe_counter); 806 807 ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc); 808 wi = &sq->db.wqe_info[ci]; 809 810 sqcc += wi->num_wqebbs; 811 812 if (likely(wi->skb)) { 813 mlx5e_tx_wi_dma_unmap(sq, wi, &dma_fifo_cc); 814 mlx5e_consume_skb(sq, wi->skb, cqe, napi_budget); 815 816 npkts++; 817 nbytes += wi->num_bytes; 818 continue; 819 } 820 821 if (unlikely(mlx5e_ktls_tx_try_handle_resync_dump_comp(sq, wi, 822 &dma_fifo_cc))) 823 continue; 824 825 if (wi->num_fifo_pkts) { 826 mlx5e_tx_wi_dma_unmap(sq, wi, &dma_fifo_cc); 827 mlx5e_tx_wi_consume_fifo_skbs(sq, wi, cqe, napi_budget); 828 829 npkts += wi->num_fifo_pkts; 830 nbytes += wi->num_bytes; 831 } 832 } while (!last_wqe); 833 834 if (unlikely(get_cqe_opcode(cqe) == MLX5_CQE_REQ_ERR)) { 835 if (!test_and_set_bit(MLX5E_SQ_STATE_RECOVERING, 836 &sq->state)) { 837 mlx5e_dump_error_cqe(&sq->cq, sq->sqn, 838 (struct mlx5_err_cqe *)cqe); 839 mlx5_wq_cyc_wqe_dump(&sq->wq, ci, wi->num_wqebbs); 840 queue_work(cq->workqueue, &sq->recover_work); 841 } 842 stats->cqe_err++; 843 } 844 845 } while ((++i < MLX5E_TX_CQ_POLL_BUDGET) && (cqe = mlx5_cqwq_get_cqe(&cq->wq))); 846 847 stats->cqes += i; 848 849 mlx5_cqwq_update_db_record(&cq->wq); 850 851 /* ensure cq space is freed before enabling more cqes */ 852 wmb(); 853 854 sq->dma_fifo_cc = dma_fifo_cc; 855 sq->cc = sqcc; 856 857 netdev_tx_completed_queue(sq->txq, npkts, nbytes); 858 859 mlx5e_txqsq_wake(sq); 860 861 return (i == MLX5E_TX_CQ_POLL_BUDGET); 862 } 863 864 static void mlx5e_tx_wi_kfree_fifo_skbs(struct mlx5e_txqsq *sq, struct mlx5e_tx_wqe_info *wi) 865 { 866 int i; 867 868 for (i = 0; i < wi->num_fifo_pkts; i++) 869 dev_kfree_skb_any(mlx5e_skb_fifo_pop(&sq->db.skb_fifo)); 870 } 871 872 void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq) 873 { 874 struct mlx5e_tx_wqe_info *wi; 875 u32 dma_fifo_cc, nbytes = 0; 876 u16 ci, sqcc, npkts = 0; 877 878 sqcc = sq->cc; 879 dma_fifo_cc = sq->dma_fifo_cc; 880 881 while (sqcc != sq->pc) { 882 ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc); 883 wi = &sq->db.wqe_info[ci]; 884 885 sqcc += wi->num_wqebbs; 886 887 if (likely(wi->skb)) { 888 mlx5e_tx_wi_dma_unmap(sq, wi, &dma_fifo_cc); 889 dev_kfree_skb_any(wi->skb); 890 891 npkts++; 892 nbytes += wi->num_bytes; 893 continue; 894 } 895 896 if (unlikely(mlx5e_ktls_tx_try_handle_resync_dump_comp(sq, wi, &dma_fifo_cc))) 897 continue; 898 899 if (wi->num_fifo_pkts) { 900 mlx5e_tx_wi_dma_unmap(sq, wi, &dma_fifo_cc); 901 mlx5e_tx_wi_kfree_fifo_skbs(sq, wi); 902 903 npkts += wi->num_fifo_pkts; 904 nbytes += wi->num_bytes; 905 } 906 } 907 908 sq->dma_fifo_cc = dma_fifo_cc; 909 sq->cc = sqcc; 910 911 /* Do not update BQL for TXQs that got replaced by new active ones, as 912 * netdev_tx_reset_queue() is called for them in mlx5e_activate_txqsq(). 913 */ 914 if (sq == sq->priv->txq2sq[sq->txq_ix]) 915 netdev_tx_completed_queue(sq->txq, npkts, nbytes); 916 } 917 918 #ifdef CONFIG_MLX5_CORE_IPOIB 919 static inline void 920 mlx5i_txwqe_build_datagram(struct mlx5_av *av, u32 dqpn, u32 dqkey, 921 struct mlx5_wqe_datagram_seg *dseg) 922 { 923 memcpy(&dseg->av, av, sizeof(struct mlx5_av)); 924 dseg->av.dqp_dct = cpu_to_be32(dqpn | MLX5_EXTENDED_UD_AV); 925 dseg->av.key.qkey.qkey = cpu_to_be32(dqkey); 926 } 927 928 static void mlx5i_sq_calc_wqe_attr(struct sk_buff *skb, 929 const struct mlx5e_tx_attr *attr, 930 struct mlx5e_tx_wqe_attr *wqe_attr) 931 { 932 u16 ds_cnt = sizeof(struct mlx5i_tx_wqe) / MLX5_SEND_WQE_DS; 933 u16 ds_cnt_inl = 0; 934 935 ds_cnt += !!attr->headlen + skb_shinfo(skb)->nr_frags; 936 937 if (attr->ihs) { 938 u16 inl = attr->ihs - INL_HDR_START_SZ; 939 940 ds_cnt_inl = DIV_ROUND_UP(inl, MLX5_SEND_WQE_DS); 941 ds_cnt += ds_cnt_inl; 942 } 943 944 *wqe_attr = (struct mlx5e_tx_wqe_attr) { 945 .ds_cnt = ds_cnt, 946 .ds_cnt_inl = ds_cnt_inl, 947 .num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS), 948 }; 949 } 950 951 void mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb, 952 struct mlx5_av *av, u32 dqpn, u32 dqkey, bool xmit_more) 953 { 954 struct mlx5e_tx_wqe_attr wqe_attr; 955 struct mlx5e_tx_attr attr; 956 struct mlx5i_tx_wqe *wqe; 957 958 struct mlx5e_accel_tx_state accel = {}; 959 struct mlx5_wqe_datagram_seg *datagram; 960 struct mlx5_wqe_ctrl_seg *cseg; 961 struct mlx5_wqe_eth_seg *eseg; 962 struct mlx5_wqe_data_seg *dseg; 963 struct mlx5e_tx_wqe_info *wi; 964 965 struct mlx5e_sq_stats *stats = sq->stats; 966 int num_dma; 967 u16 pi; 968 969 mlx5e_sq_xmit_prepare(sq, skb, &accel, &attr); 970 mlx5i_sq_calc_wqe_attr(skb, &attr, &wqe_attr); 971 972 pi = mlx5e_txqsq_get_next_pi(sq, wqe_attr.num_wqebbs); 973 wqe = MLX5I_SQ_FETCH_WQE(sq, pi); 974 975 stats->xmit_more += xmit_more; 976 977 /* fill wqe */ 978 wi = &sq->db.wqe_info[pi]; 979 cseg = &wqe->ctrl; 980 datagram = &wqe->datagram; 981 eseg = &wqe->eth; 982 dseg = wqe->data; 983 984 mlx5i_txwqe_build_datagram(av, dqpn, dqkey, datagram); 985 986 mlx5e_txwqe_build_eseg_csum(sq, skb, &accel, eseg); 987 988 eseg->mss = attr.mss; 989 990 if (attr.ihs) { 991 unsafe_memcpy(eseg->inline_hdr.start, skb->data, 992 attr.ihs, 993 MLX5_UNSAFE_MEMCPY_DISCLAIMER); 994 eseg->inline_hdr.sz = cpu_to_be16(attr.ihs); 995 dseg += wqe_attr.ds_cnt_inl; 996 } 997 998 num_dma = mlx5e_txwqe_build_dsegs(sq, skb, skb->data + attr.ihs, 999 attr.headlen, dseg); 1000 if (unlikely(num_dma < 0)) 1001 goto err_drop; 1002 1003 mlx5e_txwqe_complete(sq, skb, &attr, &wqe_attr, num_dma, wi, cseg, eseg, xmit_more); 1004 1005 return; 1006 1007 err_drop: 1008 stats->dropped++; 1009 dev_kfree_skb_any(skb); 1010 mlx5e_tx_flush(sq); 1011 } 1012 #endif 1013