1 /* 2 * Copyright (c) 2015-2016, Mellanox Technologies. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 */ 32 33 #include <linux/dim.h> 34 #include <net/tc_act/tc_gact.h> 35 #include <linux/mlx5/fs.h> 36 #include <net/vxlan.h> 37 #include <net/geneve.h> 38 #include <linux/bpf.h> 39 #include <linux/debugfs.h> 40 #include <linux/if_bridge.h> 41 #include <linux/filter.h> 42 #include <net/netdev_lock.h> 43 #include <net/netdev_queues.h> 44 #include <net/netdev_rx_queue.h> 45 #include <net/page_pool/types.h> 46 #include <net/pkt_sched.h> 47 #include <net/xdp_sock_drv.h> 48 #include "eswitch.h" 49 #include "en.h" 50 #include "en/dim.h" 51 #include "en/txrx.h" 52 #include "en_tc.h" 53 #include "en_rep.h" 54 #include "en_accel/ipsec.h" 55 #include "en_accel/psp.h" 56 #include "en_accel/macsec.h" 57 #include "en_accel/en_accel.h" 58 #include "en_accel/ktls.h" 59 #include "lib/vxlan.h" 60 #include "lib/clock.h" 61 #include "en/port.h" 62 #include "en/xdp.h" 63 #include "lib/eq.h" 64 #include "en/monitor_stats.h" 65 #include "en/health.h" 66 #include "en/params.h" 67 #include "en/xsk/pool.h" 68 #include "en/xsk/setup.h" 69 #include "en/xsk/rx.h" 70 #include "en/xsk/tx.h" 71 #include "en/hv_vhca_stats.h" 72 #include "en/devlink.h" 73 #include "lib/mlx5.h" 74 #include "en/ptp.h" 75 #include "en/htb.h" 76 #include "qos.h" 77 #include "en/trap.h" 78 #include "lib/devcom.h" 79 #include "lib/sd.h" 80 #include "en/pcie_cong_event.h" 81 82 static bool mlx5e_hw_gro_supported(struct mlx5_core_dev *mdev) 83 { 84 if (!MLX5_CAP_GEN(mdev, shampo) || 85 !MLX5_CAP_SHAMPO(mdev, shampo_header_split_data_merge)) 86 return false; 87 88 /* Our HW-GRO implementation relies on "KSM Mkey" for 89 * SHAMPO headers buffer mapping 90 */ 91 if (!MLX5_CAP_GEN(mdev, fixed_buffer_size)) 92 return false; 93 94 if (!MLX5_CAP_GEN_2(mdev, min_mkey_log_entity_size_fixed_buffer_valid)) 95 return false; 96 97 if (MLX5_CAP_GEN_2(mdev, min_mkey_log_entity_size_fixed_buffer) > 98 MLX5E_SHAMPO_LOG_HEADER_ENTRY_SIZE) 99 return false; 100 101 return true; 102 } 103 104 bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev, u8 page_shift, 105 enum mlx5e_mpwrq_umr_mode umr_mode) 106 { 107 u16 umr_wqebbs, max_wqebbs; 108 bool striding_rq_umr; 109 110 striding_rq_umr = MLX5_CAP_GEN(mdev, striding_rq) && MLX5_CAP_GEN(mdev, umr_ptr_rlky) && 111 MLX5_CAP_ETH(mdev, reg_umr_sq); 112 if (!striding_rq_umr) 113 return false; 114 115 umr_wqebbs = mlx5e_mpwrq_umr_wqebbs(mdev, page_shift, umr_mode); 116 max_wqebbs = mlx5e_get_max_sq_aligned_wqebbs(mdev); 117 /* Sanity check; should never happen, because mlx5e_mpwrq_umr_wqebbs is 118 * calculated from mlx5e_get_max_sq_aligned_wqebbs. 119 */ 120 if (WARN_ON(umr_wqebbs > max_wqebbs)) 121 return false; 122 123 return true; 124 } 125 126 void mlx5e_update_carrier(struct mlx5e_priv *priv) 127 { 128 struct mlx5_core_dev *mdev = priv->mdev; 129 u8 port_state; 130 bool up; 131 132 port_state = mlx5_query_vport_state(mdev, 133 MLX5_VPORT_STATE_OP_MOD_VNIC_VPORT, 134 0); 135 136 up = port_state == VPORT_STATE_UP; 137 if (up == netif_carrier_ok(priv->netdev)) 138 netif_carrier_event(priv->netdev); 139 if (up) { 140 netdev_info(priv->netdev, "Link up\n"); 141 netif_carrier_on(priv->netdev); 142 } else { 143 netdev_info(priv->netdev, "Link down\n"); 144 netif_carrier_off(priv->netdev); 145 } 146 } 147 148 static void mlx5e_update_carrier_work(struct work_struct *work) 149 { 150 struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv, 151 update_carrier_work); 152 153 mutex_lock(&priv->state_lock); 154 if (test_bit(MLX5E_STATE_OPENED, &priv->state)) 155 if (priv->profile->update_carrier) 156 priv->profile->update_carrier(priv); 157 mutex_unlock(&priv->state_lock); 158 } 159 160 static void mlx5e_update_stats_work(struct work_struct *work) 161 { 162 struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv, 163 update_stats_work); 164 165 mutex_lock(&priv->state_lock); 166 priv->profile->update_stats(priv); 167 mutex_unlock(&priv->state_lock); 168 } 169 170 void mlx5e_queue_update_stats(struct mlx5e_priv *priv) 171 { 172 if (!priv->profile->update_stats) 173 return; 174 175 if (unlikely(test_bit(MLX5E_STATE_DESTROYING, &priv->state))) 176 return; 177 178 queue_work(priv->wq, &priv->update_stats_work); 179 } 180 181 static int async_event(struct notifier_block *nb, unsigned long event, void *data) 182 { 183 struct mlx5e_priv *priv = container_of(nb, struct mlx5e_priv, events_nb); 184 struct mlx5_eqe *eqe = data; 185 186 if (event != MLX5_EVENT_TYPE_PORT_CHANGE) 187 return NOTIFY_DONE; 188 189 switch (eqe->sub_type) { 190 case MLX5_PORT_CHANGE_SUBTYPE_DOWN: 191 case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE: 192 queue_work(priv->wq, &priv->update_carrier_work); 193 break; 194 default: 195 return NOTIFY_DONE; 196 } 197 198 return NOTIFY_OK; 199 } 200 201 static void mlx5e_enable_async_events(struct mlx5e_priv *priv) 202 { 203 priv->events_nb.notifier_call = async_event; 204 mlx5_notifier_register(priv->mdev, &priv->events_nb); 205 } 206 207 static void mlx5e_disable_async_events(struct mlx5e_priv *priv) 208 { 209 mlx5_notifier_unregister(priv->mdev, &priv->events_nb); 210 } 211 212 static int mlx5e_devcom_event_mpv(int event, void *my_data, void *event_data) 213 { 214 struct mlx5e_priv *slave_priv = my_data; 215 216 switch (event) { 217 case MPV_DEVCOM_MASTER_UP: 218 mlx5_devcom_comp_set_ready(slave_priv->devcom, true); 219 break; 220 case MPV_DEVCOM_MASTER_DOWN: 221 /* no need for comp set ready false since we unregister after 222 * and it hurts cleanup flow. 223 */ 224 break; 225 case MPV_DEVCOM_IPSEC_MASTER_UP: 226 case MPV_DEVCOM_IPSEC_MASTER_DOWN: 227 mlx5e_ipsec_handle_mpv_event(event, my_data, event_data); 228 break; 229 } 230 231 return 0; 232 } 233 234 static int mlx5e_devcom_init_mpv(struct mlx5e_priv *priv, u64 *data) 235 { 236 struct mlx5_devcom_match_attr attr = { 237 .key.val = *data, 238 }; 239 240 priv->devcom = mlx5_devcom_register_component(priv->mdev->priv.devc, 241 MLX5_DEVCOM_MPV, 242 &attr, 243 mlx5e_devcom_event_mpv, 244 priv); 245 if (!priv->devcom) 246 return -EINVAL; 247 248 if (mlx5_core_is_mp_master(priv->mdev)) { 249 mlx5_devcom_send_event(priv->devcom, MPV_DEVCOM_MASTER_UP, 250 MPV_DEVCOM_MASTER_UP, priv); 251 mlx5e_ipsec_send_event(priv, MPV_DEVCOM_IPSEC_MASTER_UP); 252 } 253 254 return 0; 255 } 256 257 static void mlx5e_devcom_cleanup_mpv(struct mlx5e_priv *priv) 258 { 259 if (!priv->devcom) 260 return; 261 262 if (mlx5_core_is_mp_master(priv->mdev)) { 263 mlx5_devcom_send_event(priv->devcom, MPV_DEVCOM_MASTER_DOWN, 264 MPV_DEVCOM_MASTER_DOWN, priv); 265 mlx5e_ipsec_send_event(priv, MPV_DEVCOM_IPSEC_MASTER_DOWN); 266 } 267 268 mlx5_devcom_unregister_component(priv->devcom); 269 priv->devcom = NULL; 270 } 271 272 static int blocking_event(struct notifier_block *nb, unsigned long event, void *data) 273 { 274 struct mlx5e_priv *priv = container_of(nb, struct mlx5e_priv, blocking_events_nb); 275 struct mlx5_devlink_trap_event_ctx *trap_event_ctx = data; 276 int err; 277 278 switch (event) { 279 case MLX5_DRIVER_EVENT_TYPE_TRAP: 280 err = mlx5e_handle_trap_event(priv, trap_event_ctx->trap); 281 if (err) { 282 trap_event_ctx->err = err; 283 return NOTIFY_BAD; 284 } 285 break; 286 case MLX5_DRIVER_EVENT_AFFILIATION_DONE: 287 if (mlx5e_devcom_init_mpv(priv, data)) 288 return NOTIFY_BAD; 289 break; 290 case MLX5_DRIVER_EVENT_AFFILIATION_REMOVED: 291 mlx5e_devcom_cleanup_mpv(priv); 292 break; 293 default: 294 return NOTIFY_DONE; 295 } 296 return NOTIFY_OK; 297 } 298 299 static void mlx5e_enable_blocking_events(struct mlx5e_priv *priv) 300 { 301 priv->blocking_events_nb.notifier_call = blocking_event; 302 mlx5_blocking_notifier_register(priv->mdev, &priv->blocking_events_nb); 303 } 304 305 static void mlx5e_disable_blocking_events(struct mlx5e_priv *priv) 306 { 307 mlx5_blocking_notifier_unregister(priv->mdev, &priv->blocking_events_nb); 308 } 309 310 static u16 mlx5e_mpwrq_umr_octowords(u32 entries, enum mlx5e_mpwrq_umr_mode umr_mode) 311 { 312 u8 umr_entry_size = mlx5e_mpwrq_umr_entry_size(umr_mode); 313 u32 sz; 314 315 sz = ALIGN(entries * umr_entry_size, MLX5_UMR_FLEX_ALIGNMENT); 316 317 return sz / MLX5_OCTWORD; 318 } 319 320 static inline void mlx5e_build_umr_wqe(struct mlx5e_rq *rq, 321 struct mlx5e_icosq *sq, 322 struct mlx5e_umr_wqe *wqe) 323 { 324 struct mlx5_wqe_ctrl_seg *cseg = &wqe->hdr.ctrl; 325 struct mlx5_wqe_umr_ctrl_seg *ucseg = &wqe->hdr.uctrl; 326 u16 octowords; 327 u8 ds_cnt; 328 329 ds_cnt = DIV_ROUND_UP(mlx5e_mpwrq_umr_wqe_sz(rq->mdev, rq->mpwqe.page_shift, 330 rq->mpwqe.umr_mode), 331 MLX5_SEND_WQE_DS); 332 333 cseg->qpn_ds = cpu_to_be32((sq->sqn << MLX5_WQE_CTRL_QPN_SHIFT) | 334 ds_cnt); 335 cseg->umr_mkey = rq->mpwqe.umr_mkey_be; 336 337 ucseg->flags = MLX5_UMR_TRANSLATION_OFFSET_EN | MLX5_UMR_INLINE; 338 octowords = mlx5e_mpwrq_umr_octowords(rq->mpwqe.pages_per_wqe, rq->mpwqe.umr_mode); 339 ucseg->xlt_octowords = cpu_to_be16(octowords); 340 ucseg->mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE); 341 } 342 343 static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq, int node) 344 { 345 int wq_sz = mlx5_wq_ll_get_size(&rq->mpwqe.wq); 346 size_t alloc_size; 347 348 alloc_size = array_size(wq_sz, struct_size(rq->mpwqe.info, 349 alloc_units.frag_pages, 350 rq->mpwqe.pages_per_wqe)); 351 352 rq->mpwqe.info = kvzalloc_node(alloc_size, GFP_KERNEL, node); 353 if (!rq->mpwqe.info) 354 return -ENOMEM; 355 356 /* For deferred page release (release right before alloc), make sure 357 * that on first round release is not called. 358 */ 359 for (int i = 0; i < wq_sz; i++) { 360 struct mlx5e_mpw_info *wi = mlx5e_get_mpw_info(rq, i); 361 362 bitmap_fill(wi->skip_release_bitmap, rq->mpwqe.pages_per_wqe); 363 } 364 365 mlx5e_build_umr_wqe(rq, rq->icosq, 366 container_of(&rq->mpwqe.umr_wqe, 367 struct mlx5e_umr_wqe, hdr)); 368 369 return 0; 370 } 371 372 373 static u8 mlx5e_mpwrq_access_mode(enum mlx5e_mpwrq_umr_mode umr_mode) 374 { 375 switch (umr_mode) { 376 case MLX5E_MPWRQ_UMR_MODE_ALIGNED: 377 return MLX5_MKC_ACCESS_MODE_MTT; 378 case MLX5E_MPWRQ_UMR_MODE_UNALIGNED: 379 return MLX5_MKC_ACCESS_MODE_KSM; 380 case MLX5E_MPWRQ_UMR_MODE_OVERSIZED: 381 return MLX5_MKC_ACCESS_MODE_KLMS; 382 case MLX5E_MPWRQ_UMR_MODE_TRIPLE: 383 return MLX5_MKC_ACCESS_MODE_KSM; 384 } 385 WARN_ONCE(1, "MPWRQ UMR mode %d is not known\n", umr_mode); 386 return 0; 387 } 388 389 static int mlx5e_create_umr_mkey(struct mlx5_core_dev *mdev, 390 u32 npages, u8 page_shift, u32 *umr_mkey, 391 dma_addr_t filler_addr, 392 enum mlx5e_mpwrq_umr_mode umr_mode, 393 u32 xsk_chunk_size) 394 { 395 struct mlx5_mtt *mtt; 396 struct mlx5_ksm *ksm; 397 struct mlx5_klm *klm; 398 u32 octwords; 399 int inlen; 400 void *mkc; 401 u32 *in; 402 int err; 403 int i; 404 405 if ((umr_mode == MLX5E_MPWRQ_UMR_MODE_UNALIGNED || 406 umr_mode == MLX5E_MPWRQ_UMR_MODE_TRIPLE) && 407 !MLX5_CAP_GEN(mdev, fixed_buffer_size)) { 408 mlx5_core_warn(mdev, "Unaligned AF_XDP requires fixed_buffer_size capability\n"); 409 return -EINVAL; 410 } 411 412 octwords = mlx5e_mpwrq_umr_octowords(npages, umr_mode); 413 414 inlen = MLX5_FLEXIBLE_INLEN(mdev, MLX5_ST_SZ_BYTES(create_mkey_in), 415 MLX5_OCTWORD, octwords); 416 if (inlen < 0) 417 return inlen; 418 419 in = kvzalloc(inlen, GFP_KERNEL); 420 if (!in) 421 return -ENOMEM; 422 423 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); 424 425 MLX5_SET(mkc, mkc, free, 1); 426 MLX5_SET(mkc, mkc, umr_en, 1); 427 MLX5_SET(mkc, mkc, lw, 1); 428 MLX5_SET(mkc, mkc, lr, 1); 429 MLX5_SET(mkc, mkc, access_mode_1_0, mlx5e_mpwrq_access_mode(umr_mode)); 430 mlx5e_mkey_set_relaxed_ordering(mdev, mkc); 431 MLX5_SET(mkc, mkc, qpn, 0xffffff); 432 MLX5_SET(mkc, mkc, pd, mdev->mlx5e_res.hw_objs.pdn); 433 MLX5_SET64(mkc, mkc, len, npages << page_shift); 434 MLX5_SET(mkc, mkc, translations_octword_size, octwords); 435 if (umr_mode == MLX5E_MPWRQ_UMR_MODE_TRIPLE) 436 MLX5_SET(mkc, mkc, log_page_size, page_shift - 2); 437 else if (umr_mode != MLX5E_MPWRQ_UMR_MODE_OVERSIZED) 438 MLX5_SET(mkc, mkc, log_page_size, page_shift); 439 MLX5_SET(create_mkey_in, in, translations_octword_actual_size, octwords); 440 441 /* Initialize the mkey with all MTTs pointing to a default 442 * page (filler_addr). When the channels are activated, UMR 443 * WQEs will redirect the RX WQEs to the actual memory from 444 * the RQ's pool, while the gaps (wqe_overflow) remain mapped 445 * to the default page. 446 */ 447 switch (umr_mode) { 448 case MLX5E_MPWRQ_UMR_MODE_OVERSIZED: 449 klm = MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt); 450 for (i = 0; i < npages; i++) { 451 klm[i << 1] = (struct mlx5_klm) { 452 .va = cpu_to_be64(filler_addr), 453 .bcount = cpu_to_be32(xsk_chunk_size), 454 .key = cpu_to_be32(mdev->mlx5e_res.hw_objs.mkey), 455 }; 456 klm[(i << 1) + 1] = (struct mlx5_klm) { 457 .va = cpu_to_be64(filler_addr), 458 .bcount = cpu_to_be32((1 << page_shift) - xsk_chunk_size), 459 .key = cpu_to_be32(mdev->mlx5e_res.hw_objs.mkey), 460 }; 461 } 462 break; 463 case MLX5E_MPWRQ_UMR_MODE_UNALIGNED: 464 ksm = MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt); 465 for (i = 0; i < npages; i++) 466 ksm[i] = (struct mlx5_ksm) { 467 .key = cpu_to_be32(mdev->mlx5e_res.hw_objs.mkey), 468 .va = cpu_to_be64(filler_addr), 469 }; 470 break; 471 case MLX5E_MPWRQ_UMR_MODE_ALIGNED: 472 mtt = MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt); 473 for (i = 0; i < npages; i++) 474 mtt[i] = (struct mlx5_mtt) { 475 .ptag = cpu_to_be64(filler_addr), 476 }; 477 break; 478 case MLX5E_MPWRQ_UMR_MODE_TRIPLE: 479 ksm = MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt); 480 for (i = 0; i < npages * 4; i++) { 481 ksm[i] = (struct mlx5_ksm) { 482 .key = cpu_to_be32(mdev->mlx5e_res.hw_objs.mkey), 483 .va = cpu_to_be64(filler_addr), 484 }; 485 } 486 break; 487 } 488 489 err = mlx5_core_create_mkey(mdev, umr_mkey, in, inlen); 490 491 kvfree(in); 492 return err; 493 } 494 495 static int mlx5e_create_rq_umr_mkey(struct mlx5_core_dev *mdev, struct mlx5e_rq *rq) 496 { 497 u32 xsk_chunk_size = rq->xsk_pool ? rq->xsk_pool->chunk_size : 0; 498 u32 wq_size = mlx5_wq_ll_get_size(&rq->mpwqe.wq); 499 u32 num_entries, max_num_entries; 500 u32 umr_mkey; 501 int err; 502 503 max_num_entries = mlx5e_mpwrq_max_num_entries(mdev, rq->mpwqe.umr_mode); 504 505 /* Shouldn't overflow, the result is at most MLX5E_MAX_RQ_NUM_MTTS. */ 506 if (WARN_ON_ONCE(check_mul_overflow(wq_size, (u32)rq->mpwqe.mtts_per_wqe, 507 &num_entries) || 508 num_entries > max_num_entries)) 509 mlx5_core_err(mdev, "%s: multiplication overflow: %u * %u > %u\n", 510 __func__, wq_size, rq->mpwqe.mtts_per_wqe, 511 max_num_entries); 512 513 err = mlx5e_create_umr_mkey(mdev, num_entries, rq->mpwqe.page_shift, 514 &umr_mkey, rq->wqe_overflow.addr, 515 rq->mpwqe.umr_mode, xsk_chunk_size); 516 rq->mpwqe.umr_mkey_be = cpu_to_be32(umr_mkey); 517 return err; 518 } 519 520 static void mlx5e_init_frags_partition(struct mlx5e_rq *rq) 521 { 522 struct mlx5e_wqe_frag_info next_frag = {}; 523 struct mlx5e_wqe_frag_info *prev = NULL; 524 int i; 525 526 WARN_ON(rq->xsk_pool); 527 528 next_frag.frag_page = &rq->wqe.alloc_units->frag_pages[0]; 529 530 /* Skip first release due to deferred release. */ 531 next_frag.flags = BIT(MLX5E_WQE_FRAG_SKIP_RELEASE); 532 533 for (i = 0; i < mlx5_wq_cyc_get_size(&rq->wqe.wq); i++) { 534 struct mlx5e_rq_frag_info *frag_info = &rq->wqe.info.arr[0]; 535 struct mlx5e_wqe_frag_info *frag = 536 &rq->wqe.frags[i << rq->wqe.info.log_num_frags]; 537 int f; 538 539 for (f = 0; f < rq->wqe.info.num_frags; f++, frag++) { 540 if (next_frag.offset + frag_info[f].frag_stride > PAGE_SIZE) { 541 /* Pages are assigned at runtime. */ 542 next_frag.frag_page++; 543 next_frag.offset = 0; 544 if (prev) 545 prev->flags |= BIT(MLX5E_WQE_FRAG_LAST_IN_PAGE); 546 } 547 *frag = next_frag; 548 549 /* prepare next */ 550 next_frag.offset += frag_info[f].frag_stride; 551 prev = frag; 552 } 553 } 554 555 if (prev) 556 prev->flags |= BIT(MLX5E_WQE_FRAG_LAST_IN_PAGE); 557 } 558 559 static void mlx5e_init_xsk_buffs(struct mlx5e_rq *rq) 560 { 561 int i; 562 563 /* Assumptions used by XSK batched allocator. */ 564 WARN_ON(rq->wqe.info.num_frags != 1); 565 WARN_ON(rq->wqe.info.log_num_frags != 0); 566 WARN_ON(rq->wqe.info.arr[0].frag_stride != PAGE_SIZE); 567 568 /* Considering the above assumptions a fragment maps to a single 569 * xsk_buff. 570 */ 571 for (i = 0; i < mlx5_wq_cyc_get_size(&rq->wqe.wq); i++) { 572 rq->wqe.frags[i].xskp = &rq->wqe.alloc_units->xsk_buffs[i]; 573 574 /* Skip first release due to deferred release as WQES are 575 * not allocated yet. 576 */ 577 rq->wqe.frags[i].flags |= BIT(MLX5E_WQE_FRAG_SKIP_RELEASE); 578 } 579 } 580 581 static int mlx5e_init_wqe_alloc_info(struct mlx5e_rq *rq, int node) 582 { 583 int wq_sz = mlx5_wq_cyc_get_size(&rq->wqe.wq); 584 int len = wq_sz << rq->wqe.info.log_num_frags; 585 struct mlx5e_wqe_frag_info *frags; 586 union mlx5e_alloc_units *aus; 587 int aus_sz; 588 589 if (rq->xsk_pool) 590 aus_sz = sizeof(*aus->xsk_buffs); 591 else 592 aus_sz = sizeof(*aus->frag_pages); 593 594 aus = kvzalloc_node(array_size(len, aus_sz), GFP_KERNEL, node); 595 if (!aus) 596 return -ENOMEM; 597 598 frags = kvzalloc_node(array_size(len, sizeof(*frags)), GFP_KERNEL, node); 599 if (!frags) { 600 kvfree(aus); 601 return -ENOMEM; 602 } 603 604 rq->wqe.alloc_units = aus; 605 rq->wqe.frags = frags; 606 607 if (rq->xsk_pool) 608 mlx5e_init_xsk_buffs(rq); 609 else 610 mlx5e_init_frags_partition(rq); 611 612 return 0; 613 } 614 615 static void mlx5e_free_wqe_alloc_info(struct mlx5e_rq *rq) 616 { 617 kvfree(rq->wqe.frags); 618 kvfree(rq->wqe.alloc_units); 619 } 620 621 static void mlx5e_rq_err_cqe_work(struct work_struct *recover_work) 622 { 623 struct mlx5e_rq *rq = container_of(recover_work, struct mlx5e_rq, recover_work); 624 625 mlx5e_reporter_rq_cqe_err(rq); 626 } 627 628 static void mlx5e_rq_timeout_work(struct work_struct *timeout_work) 629 { 630 struct mlx5e_rq *rq = container_of(timeout_work, 631 struct mlx5e_rq, 632 rx_timeout_work); 633 634 /* Acquire netdev instance lock to synchronize with channel close and 635 * reopen flows. Either successfully obtain the lock, or detect that 636 * channels are closing for another reason, making this work no longer 637 * necessary. 638 */ 639 while (!netdev_trylock(rq->netdev)) { 640 if (!test_bit(MLX5E_STATE_CHANNELS_ACTIVE, &rq->priv->state)) 641 return; 642 msleep(20); 643 } 644 645 mlx5e_reporter_rx_timeout(rq); 646 netdev_unlock(rq->netdev); 647 } 648 649 static int mlx5e_alloc_mpwqe_rq_drop_page(struct mlx5e_rq *rq) 650 { 651 rq->wqe_overflow.page = alloc_page(GFP_KERNEL); 652 if (!rq->wqe_overflow.page) 653 return -ENOMEM; 654 655 rq->wqe_overflow.addr = dma_map_page(rq->pdev, rq->wqe_overflow.page, 0, 656 PAGE_SIZE, rq->buff.map_dir); 657 if (dma_mapping_error(rq->pdev, rq->wqe_overflow.addr)) { 658 __free_page(rq->wqe_overflow.page); 659 return -ENOMEM; 660 } 661 return 0; 662 } 663 664 static void mlx5e_free_mpwqe_rq_drop_page(struct mlx5e_rq *rq) 665 { 666 dma_unmap_page(rq->pdev, rq->wqe_overflow.addr, PAGE_SIZE, 667 rq->buff.map_dir); 668 __free_page(rq->wqe_overflow.page); 669 } 670 671 static int mlx5e_init_rxq_rq(struct mlx5e_channel *c, struct mlx5e_params *params, 672 u32 xdp_frag_size, struct mlx5e_rq *rq) 673 { 674 struct mlx5_core_dev *mdev = c->mdev; 675 int err; 676 677 rq->wq_type = params->rq_wq_type; 678 rq->pdev = c->pdev; 679 rq->netdev = c->netdev; 680 rq->priv = c->priv; 681 rq->hwtstamp_config = &c->priv->hwtstamp_config; 682 rq->clock = mdev->clock; 683 rq->icosq = &c->icosq; 684 rq->ix = c->ix; 685 rq->channel = c; 686 rq->mdev = mdev; 687 rq->hw_mtu = 688 MLX5E_SW2HW_MTU(params, params->sw_mtu) - ETH_FCS_LEN * !params->scatter_fcs_en; 689 rq->xdpsq = &c->rq_xdpsq; 690 rq->stats = &c->priv->channel_stats[c->ix]->rq; 691 rq->ptp_cyc2time = mlx5_rq_ts_translator(mdev); 692 err = mlx5e_rq_set_handlers(rq, params, NULL); 693 if (err) 694 return err; 695 696 return __xdp_rxq_info_reg(&rq->xdp_rxq, rq->netdev, rq->ix, c->napi.napi_id, 697 xdp_frag_size); 698 } 699 700 static void mlx5e_release_rq_hd_pages(struct mlx5e_rq *rq, 701 struct mlx5e_shampo_hd *shampo) 702 703 { 704 for (int i = 0; i < shampo->nentries; i++) { 705 struct mlx5e_dma_info *info = &shampo->hd_buf_pages[i]; 706 707 if (!info->page) 708 continue; 709 710 dma_unmap_page(rq->pdev, info->addr, PAGE_SIZE, 711 rq->buff.map_dir); 712 __free_page(info->page); 713 } 714 } 715 716 static int mlx5e_alloc_rq_hd_pages(struct mlx5e_rq *rq, int node, 717 struct mlx5e_shampo_hd *shampo) 718 { 719 int err, i; 720 721 for (i = 0; i < shampo->nentries; i++) { 722 struct page *page = alloc_pages_node(node, GFP_KERNEL, 0); 723 dma_addr_t addr; 724 725 if (!page) { 726 err = -ENOMEM; 727 goto err_free_pages; 728 } 729 730 addr = dma_map_page(rq->pdev, page, 0, PAGE_SIZE, 731 rq->buff.map_dir); 732 err = dma_mapping_error(rq->pdev, addr); 733 if (err) { 734 __free_page(page); 735 goto err_free_pages; 736 } 737 738 shampo->hd_buf_pages[i].page = page; 739 shampo->hd_buf_pages[i].addr = addr; 740 } 741 742 return 0; 743 744 err_free_pages: 745 mlx5e_release_rq_hd_pages(rq, shampo); 746 747 return err; 748 } 749 750 static int mlx5e_create_rq_hd_mkey(struct mlx5_core_dev *mdev, 751 struct mlx5e_shampo_hd *shampo) 752 { 753 enum mlx5e_mpwrq_umr_mode umr_mode = MLX5E_MPWRQ_UMR_MODE_ALIGNED; 754 struct mlx5_mtt *mtt; 755 void *mkc, *in; 756 int inlen, err; 757 u32 octwords; 758 759 octwords = mlx5e_mpwrq_umr_octowords(shampo->nentries, umr_mode); 760 inlen = MLX5_FLEXIBLE_INLEN(mdev, MLX5_ST_SZ_BYTES(create_mkey_in), 761 MLX5_OCTWORD, octwords); 762 if (inlen < 0) 763 return inlen; 764 765 in = kvzalloc(inlen, GFP_KERNEL); 766 if (!in) 767 return -ENOMEM; 768 769 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); 770 771 MLX5_SET(mkc, mkc, lw, 1); 772 MLX5_SET(mkc, mkc, lr, 1); 773 MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_MTT); 774 mlx5e_mkey_set_relaxed_ordering(mdev, mkc); 775 MLX5_SET(mkc, mkc, qpn, 0xffffff); 776 MLX5_SET(mkc, mkc, pd, mdev->mlx5e_res.hw_objs.pdn); 777 MLX5_SET64(mkc, mkc, len, shampo->hd_buf_size); 778 MLX5_SET(mkc, mkc, log_page_size, PAGE_SHIFT); 779 MLX5_SET(mkc, mkc, translations_octword_size, octwords); 780 MLX5_SET(create_mkey_in, in, translations_octword_actual_size, 781 octwords); 782 783 mtt = MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt); 784 for (int i = 0; i < shampo->nentries; i++) 785 mtt[i].ptag = cpu_to_be64(shampo->hd_buf_pages[i].addr); 786 787 err = mlx5_core_create_mkey(mdev, &shampo->mkey, in, inlen); 788 789 kvfree(in); 790 return err; 791 } 792 793 static int mlx5_rq_shampo_alloc(struct mlx5_core_dev *mdev, 794 struct mlx5e_params *params, 795 struct mlx5e_rq_param *rqp, 796 struct mlx5e_rq *rq, 797 int node) 798 { 799 struct mlx5e_shampo_hd *shampo; 800 int nentries, err, shampo_sz; 801 u32 hd_per_wq, hd_buf_size; 802 803 if (!test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state)) 804 return 0; 805 806 hd_per_wq = mlx5e_shampo_hd_per_wq(mdev, params, rqp); 807 hd_buf_size = hd_per_wq * BIT(MLX5E_SHAMPO_LOG_HEADER_ENTRY_SIZE); 808 nentries = hd_buf_size / PAGE_SIZE; 809 if (!nentries) { 810 mlx5_core_err(mdev, "SHAMPO header buffer size %u < %lu\n", 811 hd_buf_size, PAGE_SIZE); 812 return -EINVAL; 813 } 814 815 shampo_sz = struct_size(shampo, hd_buf_pages, nentries); 816 shampo = kvzalloc_node(shampo_sz, GFP_KERNEL, node); 817 if (!shampo) 818 return -ENOMEM; 819 820 shampo->hd_per_wq = hd_per_wq; 821 shampo->hd_buf_size = hd_buf_size; 822 shampo->nentries = nentries; 823 err = mlx5e_alloc_rq_hd_pages(rq, node, shampo); 824 if (err) 825 goto err_free; 826 827 err = mlx5e_create_rq_hd_mkey(mdev, shampo); 828 if (err) 829 goto err_release_pages; 830 831 /* gro only data structures */ 832 rq->hw_gro_data = kvzalloc_node(sizeof(*rq->hw_gro_data), GFP_KERNEL, node); 833 if (!rq->hw_gro_data) { 834 err = -ENOMEM; 835 goto err_destroy_mkey; 836 } 837 838 rq->mpwqe.shampo = shampo; 839 840 return 0; 841 842 err_destroy_mkey: 843 mlx5_core_destroy_mkey(mdev, shampo->mkey); 844 err_release_pages: 845 mlx5e_release_rq_hd_pages(rq, shampo); 846 err_free: 847 kvfree(shampo); 848 849 return err; 850 } 851 852 static void mlx5e_rq_free_shampo(struct mlx5e_rq *rq) 853 { 854 struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo; 855 856 if (!shampo) 857 return; 858 859 kvfree(rq->hw_gro_data); 860 mlx5_core_destroy_mkey(rq->mdev, shampo->mkey); 861 mlx5e_release_rq_hd_pages(rq, shampo); 862 kvfree(shampo); 863 } 864 865 static int mlx5e_alloc_rq(struct mlx5e_params *params, 866 struct mlx5e_xsk_param *xsk, 867 struct mlx5e_rq_param *rqp, 868 int node, struct mlx5e_rq *rq) 869 { 870 struct mlx5_core_dev *mdev = rq->mdev; 871 void *rqc = rqp->rqc; 872 void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq); 873 u32 pool_size; 874 int wq_sz; 875 int err; 876 int i; 877 878 rqp->wq.db_numa_node = node; 879 INIT_WORK(&rq->recover_work, mlx5e_rq_err_cqe_work); 880 INIT_WORK(&rq->rx_timeout_work, mlx5e_rq_timeout_work); 881 882 if (params->xdp_prog) 883 bpf_prog_inc(params->xdp_prog); 884 RCU_INIT_POINTER(rq->xdp_prog, params->xdp_prog); 885 886 rq->buff.map_dir = params->xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE; 887 rq->buff.headroom = mlx5e_get_rq_headroom(mdev, params, xsk); 888 pool_size = 1 << params->log_rq_mtu_frames; 889 890 rq->mkey_be = cpu_to_be32(mdev->mlx5e_res.hw_objs.mkey); 891 892 switch (rq->wq_type) { 893 case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: 894 err = mlx5_wq_ll_create(mdev, &rqp->wq, rqc_wq, &rq->mpwqe.wq, 895 &rq->wq_ctrl); 896 if (err) 897 goto err_rq_xdp_prog; 898 899 err = mlx5e_alloc_mpwqe_rq_drop_page(rq); 900 if (err) 901 goto err_rq_wq_destroy; 902 903 rq->mpwqe.wq.db = &rq->mpwqe.wq.db[MLX5_RCV_DBR]; 904 905 wq_sz = mlx5_wq_ll_get_size(&rq->mpwqe.wq); 906 907 rq->mpwqe.page_shift = mlx5e_mpwrq_page_shift(mdev, xsk); 908 rq->mpwqe.umr_mode = mlx5e_mpwrq_umr_mode(mdev, xsk); 909 rq->mpwqe.pages_per_wqe = 910 mlx5e_mpwrq_pages_per_wqe(mdev, rq->mpwqe.page_shift, 911 rq->mpwqe.umr_mode); 912 rq->mpwqe.umr_wqebbs = 913 mlx5e_mpwrq_umr_wqebbs(mdev, rq->mpwqe.page_shift, 914 rq->mpwqe.umr_mode); 915 rq->mpwqe.mtts_per_wqe = 916 mlx5e_mpwrq_mtts_per_wqe(mdev, rq->mpwqe.page_shift, 917 rq->mpwqe.umr_mode); 918 919 pool_size = rq->mpwqe.pages_per_wqe << 920 mlx5e_mpwqe_get_log_rq_size(mdev, params, xsk); 921 922 if (!mlx5e_rx_mpwqe_is_linear_skb(mdev, params, xsk) && params->xdp_prog) 923 pool_size *= 2; /* additional page per packet for the linear part */ 924 925 rq->mpwqe.log_stride_sz = mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk); 926 rq->mpwqe.num_strides = 927 BIT(mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk)); 928 rq->mpwqe.min_wqe_bulk = mlx5e_mpwqe_get_min_wqe_bulk(wq_sz); 929 930 rq->buff.frame0_sz = (1 << rq->mpwqe.log_stride_sz); 931 932 err = mlx5e_create_rq_umr_mkey(mdev, rq); 933 if (err) 934 goto err_rq_drop_page; 935 936 err = mlx5e_rq_alloc_mpwqe_info(rq, node); 937 if (err) 938 goto err_rq_mkey; 939 940 err = mlx5_rq_shampo_alloc(mdev, params, rqp, rq, node); 941 if (err) 942 goto err_free_mpwqe_info; 943 944 break; 945 default: /* MLX5_WQ_TYPE_CYCLIC */ 946 err = mlx5_wq_cyc_create(mdev, &rqp->wq, rqc_wq, &rq->wqe.wq, 947 &rq->wq_ctrl); 948 if (err) 949 goto err_rq_xdp_prog; 950 951 rq->wqe.wq.db = &rq->wqe.wq.db[MLX5_RCV_DBR]; 952 953 wq_sz = mlx5_wq_cyc_get_size(&rq->wqe.wq); 954 955 rq->wqe.info = rqp->frags_info; 956 rq->buff.frame0_sz = rq->wqe.info.arr[0].frag_stride; 957 958 err = mlx5e_init_wqe_alloc_info(rq, node); 959 if (err) 960 goto err_rq_wq_destroy; 961 } 962 963 if (xsk) { 964 err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq, 965 MEM_TYPE_XSK_BUFF_POOL, NULL); 966 if (err) 967 goto err_free_by_rq_type; 968 xsk_pool_set_rxq_info(rq->xsk_pool, &rq->xdp_rxq); 969 } else { 970 /* Create a page_pool and register it with rxq */ 971 struct page_pool_params pp_params = { 0 }; 972 973 pp_params.order = 0; 974 pp_params.flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV; 975 pp_params.pool_size = pool_size; 976 pp_params.nid = node; 977 pp_params.dev = rq->pdev; 978 pp_params.napi = rq->cq.napi; 979 pp_params.netdev = rq->netdev; 980 pp_params.dma_dir = rq->buff.map_dir; 981 pp_params.max_len = PAGE_SIZE; 982 pp_params.queue_idx = rq->ix; 983 984 /* Shampo header data split allow for unreadable netmem */ 985 if (test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state)) 986 pp_params.flags |= PP_FLAG_ALLOW_UNREADABLE_NETMEM; 987 988 /* page_pool can be used even when there is no rq->xdp_prog, 989 * given page_pool does not handle DMA mapping there is no 990 * required state to clear. And page_pool gracefully handle 991 * elevated refcnt. 992 */ 993 rq->page_pool = page_pool_create(&pp_params); 994 if (IS_ERR(rq->page_pool)) { 995 err = PTR_ERR(rq->page_pool); 996 rq->page_pool = NULL; 997 goto err_free_by_rq_type; 998 } 999 if (!rq->hd_page_pool) 1000 rq->hd_page_pool = rq->page_pool; 1001 if (xdp_rxq_info_is_reg(&rq->xdp_rxq)) { 1002 err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq, 1003 MEM_TYPE_PAGE_POOL, rq->page_pool); 1004 if (err) 1005 goto err_destroy_page_pool; 1006 } 1007 } 1008 1009 for (i = 0; i < wq_sz; i++) { 1010 if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) { 1011 struct mlx5e_rx_wqe_ll *wqe = 1012 mlx5_wq_ll_get_wqe(&rq->mpwqe.wq, i); 1013 u32 byte_count = 1014 rq->mpwqe.num_strides << rq->mpwqe.log_stride_sz; 1015 u64 dma_offset = mul_u32_u32(i, rq->mpwqe.mtts_per_wqe) << 1016 rq->mpwqe.page_shift; 1017 u16 headroom = test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state) ? 1018 0 : rq->buff.headroom; 1019 1020 wqe->data[0].addr = cpu_to_be64(dma_offset + headroom); 1021 wqe->data[0].byte_count = cpu_to_be32(byte_count); 1022 wqe->data[0].lkey = rq->mpwqe.umr_mkey_be; 1023 } else { 1024 struct mlx5e_rx_wqe_cyc *wqe = 1025 mlx5_wq_cyc_get_wqe(&rq->wqe.wq, i); 1026 int f; 1027 1028 for (f = 0; f < rq->wqe.info.num_frags; f++) { 1029 u32 frag_size = rq->wqe.info.arr[f].frag_size | 1030 MLX5_HW_START_PADDING; 1031 1032 wqe->data[f].byte_count = cpu_to_be32(frag_size); 1033 wqe->data[f].lkey = rq->mkey_be; 1034 } 1035 /* check if num_frags is not a pow of two */ 1036 if (rq->wqe.info.num_frags < (1 << rq->wqe.info.log_num_frags)) { 1037 wqe->data[f].byte_count = 0; 1038 wqe->data[f].lkey = params->terminate_lkey_be; 1039 wqe->data[f].addr = 0; 1040 } 1041 } 1042 } 1043 1044 return 0; 1045 1046 err_destroy_page_pool: 1047 page_pool_destroy(rq->page_pool); 1048 err_free_by_rq_type: 1049 switch (rq->wq_type) { 1050 case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: 1051 mlx5e_rq_free_shampo(rq); 1052 err_free_mpwqe_info: 1053 kvfree(rq->mpwqe.info); 1054 err_rq_mkey: 1055 mlx5_core_destroy_mkey(mdev, be32_to_cpu(rq->mpwqe.umr_mkey_be)); 1056 err_rq_drop_page: 1057 mlx5e_free_mpwqe_rq_drop_page(rq); 1058 break; 1059 default: /* MLX5_WQ_TYPE_CYCLIC */ 1060 mlx5e_free_wqe_alloc_info(rq); 1061 } 1062 err_rq_wq_destroy: 1063 mlx5_wq_destroy(&rq->wq_ctrl); 1064 err_rq_xdp_prog: 1065 if (params->xdp_prog) 1066 bpf_prog_put(params->xdp_prog); 1067 1068 return err; 1069 } 1070 1071 static void mlx5e_free_rq(struct mlx5e_rq *rq) 1072 { 1073 kvfree(rq->dim); 1074 page_pool_destroy(rq->page_pool); 1075 1076 switch (rq->wq_type) { 1077 case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: 1078 mlx5e_rq_free_shampo(rq); 1079 kvfree(rq->mpwqe.info); 1080 mlx5_core_destroy_mkey(rq->mdev, be32_to_cpu(rq->mpwqe.umr_mkey_be)); 1081 mlx5e_free_mpwqe_rq_drop_page(rq); 1082 break; 1083 default: /* MLX5_WQ_TYPE_CYCLIC */ 1084 mlx5e_free_wqe_alloc_info(rq); 1085 } 1086 1087 mlx5_wq_destroy(&rq->wq_ctrl); 1088 1089 if (xdp_rxq_info_is_reg(&rq->xdp_rxq)) { 1090 struct bpf_prog *old_prog; 1091 1092 old_prog = rcu_dereference_protected(rq->xdp_prog, 1093 lockdep_is_held(&rq->priv->state_lock)); 1094 if (old_prog) 1095 bpf_prog_put(old_prog); 1096 } 1097 xdp_rxq_info_unreg(&rq->xdp_rxq); 1098 } 1099 1100 int mlx5e_create_rq(struct mlx5e_rq *rq, struct mlx5e_rq_param *param, u16 q_counter) 1101 { 1102 struct mlx5_core_dev *mdev = rq->mdev; 1103 u8 ts_format; 1104 void *in; 1105 void *rqc; 1106 void *wq; 1107 int inlen; 1108 int err; 1109 1110 inlen = MLX5_ST_SZ_BYTES(create_rq_in) + 1111 sizeof(u64) * rq->wq_ctrl.buf.npages; 1112 in = kvzalloc(inlen, GFP_KERNEL); 1113 if (!in) 1114 return -ENOMEM; 1115 1116 ts_format = mlx5_is_real_time_rq(mdev) ? 1117 MLX5_TIMESTAMP_FORMAT_REAL_TIME : 1118 MLX5_TIMESTAMP_FORMAT_FREE_RUNNING; 1119 rqc = MLX5_ADDR_OF(create_rq_in, in, ctx); 1120 wq = MLX5_ADDR_OF(rqc, rqc, wq); 1121 1122 memcpy(rqc, param->rqc, sizeof(param->rqc)); 1123 1124 MLX5_SET(rqc, rqc, cqn, rq->cq.mcq.cqn); 1125 MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RST); 1126 MLX5_SET(rqc, rqc, ts_format, ts_format); 1127 MLX5_SET(rqc, rqc, counter_set_id, q_counter); 1128 MLX5_SET(wq, wq, log_wq_pg_sz, rq->wq_ctrl.buf.page_shift - 1129 MLX5_ADAPTER_PAGE_SHIFT); 1130 MLX5_SET64(wq, wq, dbr_addr, rq->wq_ctrl.db.dma); 1131 1132 if (test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state)) { 1133 MLX5_SET(wq, wq, log_headers_buffer_entry_num, 1134 order_base_2(rq->mpwqe.shampo->hd_per_wq)); 1135 MLX5_SET(wq, wq, headers_mkey, rq->mpwqe.shampo->mkey); 1136 } 1137 1138 mlx5_fill_page_frag_array(&rq->wq_ctrl.buf, 1139 (__be64 *)MLX5_ADDR_OF(wq, wq, pas)); 1140 1141 err = mlx5_core_create_rq(mdev, in, inlen, &rq->rqn); 1142 1143 kvfree(in); 1144 1145 return err; 1146 } 1147 1148 static int mlx5e_modify_rq_state(struct mlx5e_rq *rq, int curr_state, int next_state) 1149 { 1150 struct mlx5_core_dev *mdev = rq->mdev; 1151 1152 void *in; 1153 void *rqc; 1154 int inlen; 1155 int err; 1156 1157 inlen = MLX5_ST_SZ_BYTES(modify_rq_in); 1158 in = kvzalloc(inlen, GFP_KERNEL); 1159 if (!in) 1160 return -ENOMEM; 1161 1162 if (curr_state == MLX5_RQC_STATE_RST && next_state == MLX5_RQC_STATE_RDY) 1163 mlx5e_rqwq_reset(rq); 1164 1165 rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx); 1166 1167 MLX5_SET(modify_rq_in, in, rq_state, curr_state); 1168 MLX5_SET(rqc, rqc, state, next_state); 1169 1170 err = mlx5_core_modify_rq(mdev, rq->rqn, in); 1171 1172 kvfree(in); 1173 1174 return err; 1175 } 1176 1177 static void mlx5e_flush_rq_cq(struct mlx5e_rq *rq) 1178 { 1179 struct mlx5_cqwq *cqwq = &rq->cq.wq; 1180 struct mlx5_cqe64 *cqe; 1181 1182 if (test_bit(MLX5E_RQ_STATE_MINI_CQE_ENHANCED, &rq->state)) { 1183 while ((cqe = mlx5_cqwq_get_cqe_enhanced_comp(cqwq))) 1184 mlx5_cqwq_pop(cqwq); 1185 } else { 1186 while ((cqe = mlx5_cqwq_get_cqe(cqwq))) 1187 mlx5_cqwq_pop(cqwq); 1188 } 1189 1190 mlx5_cqwq_update_db_record(cqwq); 1191 } 1192 1193 int mlx5e_flush_rq(struct mlx5e_rq *rq, int curr_state) 1194 { 1195 struct net_device *dev = rq->netdev; 1196 int err; 1197 1198 err = mlx5e_modify_rq_state(rq, curr_state, MLX5_RQC_STATE_RST); 1199 if (err) { 1200 netdev_err(dev, "Failed to move rq 0x%x to reset\n", rq->rqn); 1201 return err; 1202 } 1203 1204 mlx5e_free_rx_descs(rq); 1205 mlx5e_flush_rq_cq(rq); 1206 1207 err = mlx5e_modify_rq_state(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY); 1208 if (err) { 1209 netdev_err(dev, "Failed to move rq 0x%x to ready\n", rq->rqn); 1210 return err; 1211 } 1212 1213 return 0; 1214 } 1215 1216 static int mlx5e_modify_rq_vsd(struct mlx5e_rq *rq, bool vsd) 1217 { 1218 struct mlx5_core_dev *mdev = rq->mdev; 1219 void *in; 1220 void *rqc; 1221 int inlen; 1222 int err; 1223 1224 inlen = MLX5_ST_SZ_BYTES(modify_rq_in); 1225 in = kvzalloc(inlen, GFP_KERNEL); 1226 if (!in) 1227 return -ENOMEM; 1228 1229 rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx); 1230 1231 MLX5_SET(modify_rq_in, in, rq_state, MLX5_RQC_STATE_RDY); 1232 MLX5_SET64(modify_rq_in, in, modify_bitmask, 1233 MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_VSD); 1234 MLX5_SET(rqc, rqc, vsd, vsd); 1235 MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RDY); 1236 1237 err = mlx5_core_modify_rq(mdev, rq->rqn, in); 1238 1239 kvfree(in); 1240 1241 return err; 1242 } 1243 1244 void mlx5e_destroy_rq(struct mlx5e_rq *rq) 1245 { 1246 mlx5_core_destroy_rq(rq->mdev, rq->rqn); 1247 } 1248 1249 int mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq, int wait_time) 1250 { 1251 unsigned long exp_time = jiffies + msecs_to_jiffies(wait_time); 1252 1253 u16 min_wqes = mlx5_min_rx_wqes(rq->wq_type, mlx5e_rqwq_get_size(rq)); 1254 1255 do { 1256 if (mlx5e_rqwq_get_cur_sz(rq) >= min_wqes) 1257 return 0; 1258 1259 msleep(20); 1260 } while (time_before(jiffies, exp_time)); 1261 1262 netdev_warn(rq->netdev, "Failed to get min RX wqes on Channel[%d] RQN[0x%x] wq cur_sz(%d) min_rx_wqes(%d)\n", 1263 rq->ix, rq->rqn, mlx5e_rqwq_get_cur_sz(rq), min_wqes); 1264 1265 queue_work(rq->priv->wq, &rq->rx_timeout_work); 1266 1267 return -ETIMEDOUT; 1268 } 1269 1270 void mlx5e_free_rx_missing_descs(struct mlx5e_rq *rq) 1271 { 1272 struct mlx5_wq_ll *wq; 1273 u16 head; 1274 int i; 1275 1276 if (rq->wq_type != MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) 1277 return; 1278 1279 wq = &rq->mpwqe.wq; 1280 head = wq->head; 1281 1282 /* Release WQEs that are in missing state: they have been 1283 * popped from the list after completion but were not freed 1284 * due to deferred release. 1285 * Also free the linked-list reserved entry, hence the "+ 1". 1286 */ 1287 for (i = 0; i < mlx5_wq_ll_missing(wq) + 1; i++) { 1288 rq->dealloc_wqe(rq, head); 1289 head = mlx5_wq_ll_get_wqe_next_ix(wq, head); 1290 } 1291 1292 rq->mpwqe.actual_wq_head = wq->head; 1293 rq->mpwqe.umr_in_progress = 0; 1294 rq->mpwqe.umr_completed = 0; 1295 } 1296 1297 void mlx5e_free_rx_descs(struct mlx5e_rq *rq) 1298 { 1299 __be16 wqe_ix_be; 1300 u16 wqe_ix; 1301 1302 if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) { 1303 struct mlx5_wq_ll *wq = &rq->mpwqe.wq; 1304 1305 mlx5e_free_rx_missing_descs(rq); 1306 1307 while (!mlx5_wq_ll_is_empty(wq)) { 1308 struct mlx5e_rx_wqe_ll *wqe; 1309 1310 wqe_ix_be = *wq->tail_next; 1311 wqe_ix = be16_to_cpu(wqe_ix_be); 1312 wqe = mlx5_wq_ll_get_wqe(wq, wqe_ix); 1313 rq->dealloc_wqe(rq, wqe_ix); 1314 mlx5_wq_ll_pop(wq, wqe_ix_be, 1315 &wqe->next.next_wqe_index); 1316 } 1317 } else { 1318 struct mlx5_wq_cyc *wq = &rq->wqe.wq; 1319 u16 missing = mlx5_wq_cyc_missing(wq); 1320 u16 head = mlx5_wq_cyc_get_head(wq); 1321 1322 while (!mlx5_wq_cyc_is_empty(wq)) { 1323 wqe_ix = mlx5_wq_cyc_get_tail(wq); 1324 rq->dealloc_wqe(rq, wqe_ix); 1325 mlx5_wq_cyc_pop(wq); 1326 } 1327 /* Missing slots might also contain unreleased pages due to 1328 * deferred release. 1329 */ 1330 while (missing--) { 1331 wqe_ix = mlx5_wq_cyc_ctr2ix(wq, head++); 1332 rq->dealloc_wqe(rq, wqe_ix); 1333 } 1334 } 1335 1336 } 1337 1338 int mlx5e_open_rq(struct mlx5e_params *params, struct mlx5e_rq_param *param, 1339 struct mlx5e_xsk_param *xsk, int node, u16 q_counter, 1340 struct mlx5e_rq *rq) 1341 { 1342 struct mlx5_core_dev *mdev = rq->mdev; 1343 int err; 1344 1345 if (params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO) 1346 __set_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state); 1347 1348 err = mlx5e_alloc_rq(params, xsk, param, node, rq); 1349 if (err) 1350 return err; 1351 1352 err = mlx5e_create_rq(rq, param, q_counter); 1353 if (err) 1354 goto err_free_rq; 1355 1356 err = mlx5e_modify_rq_state(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY); 1357 if (err) 1358 goto err_destroy_rq; 1359 1360 if (MLX5_CAP_ETH(mdev, cqe_checksum_full)) 1361 __set_bit(MLX5E_RQ_STATE_CSUM_FULL, &rq->state); 1362 1363 if (rq->channel && !params->rx_dim_enabled) { 1364 rq->channel->rx_cq_moder = params->rx_cq_moderation; 1365 } else if (rq->channel) { 1366 u8 cq_period_mode; 1367 1368 cq_period_mode = params->rx_moder_use_cqe_mode ? 1369 DIM_CQ_PERIOD_MODE_START_FROM_CQE : 1370 DIM_CQ_PERIOD_MODE_START_FROM_EQE; 1371 mlx5e_reset_rx_moderation(&rq->channel->rx_cq_moder, cq_period_mode, 1372 params->rx_dim_enabled); 1373 1374 err = mlx5e_dim_rx_change(rq, params->rx_dim_enabled); 1375 if (err) 1376 goto err_destroy_rq; 1377 } 1378 1379 /* We disable csum_complete when XDP is enabled since 1380 * XDP programs might manipulate packets which will render 1381 * skb->checksum incorrect. 1382 */ 1383 if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_NO_CSUM_COMPLETE) || params->xdp_prog) 1384 __set_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &rq->state); 1385 1386 /* For CQE compression on striding RQ, use stride index provided by 1387 * HW if capability is supported. 1388 */ 1389 if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ) && 1390 MLX5_CAP_GEN(mdev, mini_cqe_resp_stride_index)) 1391 __set_bit(MLX5E_RQ_STATE_MINI_CQE_HW_STRIDX, &rq->state); 1392 1393 /* For enhanced CQE compression packet processing. decompress 1394 * session according to the enhanced layout. 1395 */ 1396 if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS) && 1397 MLX5_CAP_GEN(mdev, enhanced_cqe_compression)) 1398 __set_bit(MLX5E_RQ_STATE_MINI_CQE_ENHANCED, &rq->state); 1399 1400 return 0; 1401 1402 err_destroy_rq: 1403 mlx5e_destroy_rq(rq); 1404 err_free_rq: 1405 mlx5e_free_rq(rq); 1406 1407 return err; 1408 } 1409 1410 void mlx5e_activate_rq(struct mlx5e_rq *rq) 1411 { 1412 set_bit(MLX5E_RQ_STATE_ENABLED, &rq->state); 1413 } 1414 1415 void mlx5e_deactivate_rq(struct mlx5e_rq *rq) 1416 { 1417 clear_bit(MLX5E_RQ_STATE_ENABLED, &rq->state); 1418 synchronize_net(); /* Sync with NAPI to prevent mlx5e_post_rx_wqes. */ 1419 } 1420 1421 void mlx5e_close_rq(struct mlx5e_rq *rq) 1422 { 1423 if (rq->dim) 1424 cancel_work_sync(&rq->dim->work); 1425 cancel_work_sync(&rq->recover_work); 1426 cancel_work_sync(&rq->rx_timeout_work); 1427 mlx5e_destroy_rq(rq); 1428 mlx5e_free_rx_descs(rq); 1429 mlx5e_free_rq(rq); 1430 } 1431 1432 u32 mlx5e_profile_get_tisn(struct mlx5_core_dev *mdev, 1433 struct mlx5e_priv *priv, 1434 const struct mlx5e_profile *profile, 1435 u8 lag_port, u8 tc) 1436 { 1437 if (profile->get_tisn) 1438 return profile->get_tisn(mdev, priv, lag_port, tc); 1439 1440 return mdev->mlx5e_res.hw_objs.tisn[lag_port][tc]; 1441 } 1442 1443 static void mlx5e_free_xdpsq_db(struct mlx5e_xdpsq *sq) 1444 { 1445 kvfree(sq->db.xdpi_fifo.xi); 1446 kvfree(sq->db.wqe_info); 1447 } 1448 1449 static int mlx5e_alloc_xdpsq_fifo(struct mlx5e_xdpsq *sq, int numa) 1450 { 1451 struct mlx5e_xdp_info_fifo *xdpi_fifo = &sq->db.xdpi_fifo; 1452 int wq_sz = mlx5_wq_cyc_get_size(&sq->wq); 1453 int entries; 1454 size_t size; 1455 1456 /* upper bound for maximum num of entries of all xmit_modes. */ 1457 entries = roundup_pow_of_two(wq_sz * MLX5_SEND_WQEBB_NUM_DS * 1458 MLX5E_XDP_FIFO_ENTRIES2DS_MAX_RATIO); 1459 1460 size = array_size(sizeof(*xdpi_fifo->xi), entries); 1461 xdpi_fifo->xi = kvzalloc_node(size, GFP_KERNEL, numa); 1462 if (!xdpi_fifo->xi) 1463 return -ENOMEM; 1464 1465 xdpi_fifo->pc = &sq->xdpi_fifo_pc; 1466 xdpi_fifo->cc = &sq->xdpi_fifo_cc; 1467 xdpi_fifo->mask = entries - 1; 1468 1469 return 0; 1470 } 1471 1472 static int mlx5e_alloc_xdpsq_db(struct mlx5e_xdpsq *sq, int numa) 1473 { 1474 int wq_sz = mlx5_wq_cyc_get_size(&sq->wq); 1475 size_t size; 1476 int err; 1477 1478 size = array_size(sizeof(*sq->db.wqe_info), wq_sz); 1479 sq->db.wqe_info = kvzalloc_node(size, GFP_KERNEL, numa); 1480 if (!sq->db.wqe_info) 1481 return -ENOMEM; 1482 1483 err = mlx5e_alloc_xdpsq_fifo(sq, numa); 1484 if (err) { 1485 mlx5e_free_xdpsq_db(sq); 1486 return err; 1487 } 1488 1489 return 0; 1490 } 1491 1492 static int mlx5e_alloc_xdpsq(struct mlx5e_channel *c, 1493 struct mlx5e_params *params, 1494 struct xsk_buff_pool *xsk_pool, 1495 struct mlx5e_sq_param *param, 1496 struct mlx5e_xdpsq *sq, 1497 bool is_redirect) 1498 { 1499 void *sqc_wq = MLX5_ADDR_OF(sqc, param->sqc, wq); 1500 struct mlx5_core_dev *mdev = c->mdev; 1501 struct mlx5_wq_cyc *wq = &sq->wq; 1502 int err; 1503 1504 sq->pdev = c->pdev; 1505 sq->mkey_be = c->mkey_be; 1506 sq->channel = c; 1507 sq->uar_map = c->bfreg->map; 1508 sq->min_inline_mode = params->tx_min_inline_mode; 1509 sq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu) - ETH_FCS_LEN; 1510 sq->xsk_pool = xsk_pool; 1511 1512 sq->stats = sq->xsk_pool ? 1513 &c->priv->channel_stats[c->ix]->xsksq : 1514 is_redirect ? 1515 &c->priv->channel_stats[c->ix]->xdpsq : 1516 &c->priv->channel_stats[c->ix]->rq_xdpsq; 1517 sq->stop_room = param->is_mpw ? mlx5e_stop_room_for_mpwqe(mdev) : 1518 mlx5e_stop_room_for_max_wqe(mdev); 1519 sq->max_sq_mpw_wqebbs = mlx5e_get_max_sq_aligned_wqebbs(mdev); 1520 1521 param->wq.db_numa_node = cpu_to_node(c->cpu); 1522 err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, wq, &sq->wq_ctrl); 1523 if (err) 1524 return err; 1525 wq->db = &wq->db[MLX5_SND_DBR]; 1526 1527 err = mlx5e_alloc_xdpsq_db(sq, cpu_to_node(c->cpu)); 1528 if (err) 1529 goto err_sq_wq_destroy; 1530 1531 return 0; 1532 1533 err_sq_wq_destroy: 1534 mlx5_wq_destroy(&sq->wq_ctrl); 1535 1536 return err; 1537 } 1538 1539 static void mlx5e_free_xdpsq(struct mlx5e_xdpsq *sq) 1540 { 1541 mlx5e_free_xdpsq_db(sq); 1542 mlx5_wq_destroy(&sq->wq_ctrl); 1543 } 1544 1545 static void mlx5e_free_icosq_db(struct mlx5e_icosq *sq) 1546 { 1547 kvfree(sq->db.wqe_info); 1548 } 1549 1550 static int mlx5e_alloc_icosq_db(struct mlx5e_icosq *sq, int numa) 1551 { 1552 int wq_sz = mlx5_wq_cyc_get_size(&sq->wq); 1553 size_t size; 1554 1555 size = array_size(wq_sz, sizeof(*sq->db.wqe_info)); 1556 sq->db.wqe_info = kvzalloc_node(size, GFP_KERNEL, numa); 1557 if (!sq->db.wqe_info) 1558 return -ENOMEM; 1559 1560 return 0; 1561 } 1562 1563 static void mlx5e_icosq_err_cqe_work(struct work_struct *recover_work) 1564 { 1565 struct mlx5e_icosq *sq = container_of(recover_work, struct mlx5e_icosq, 1566 recover_work); 1567 1568 mlx5e_reporter_icosq_cqe_err(sq); 1569 } 1570 1571 static void mlx5e_async_icosq_err_cqe_work(struct work_struct *recover_work) 1572 { 1573 struct mlx5e_icosq *sq = container_of(recover_work, struct mlx5e_icosq, 1574 recover_work); 1575 1576 /* Not implemented yet. */ 1577 1578 netdev_warn(sq->channel->netdev, "async_icosq recovery is not implemented\n"); 1579 } 1580 1581 static int mlx5e_alloc_icosq(struct mlx5e_channel *c, 1582 struct mlx5e_sq_param *param, 1583 struct mlx5e_icosq *sq, 1584 work_func_t recover_work_func) 1585 { 1586 void *sqc_wq = MLX5_ADDR_OF(sqc, param->sqc, wq); 1587 struct mlx5_core_dev *mdev = c->mdev; 1588 struct mlx5_wq_cyc *wq = &sq->wq; 1589 int err; 1590 1591 sq->channel = c; 1592 sq->uar_map = c->bfreg->map; 1593 sq->reserved_room = param->stop_room; 1594 1595 param->wq.db_numa_node = cpu_to_node(c->cpu); 1596 err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, wq, &sq->wq_ctrl); 1597 if (err) 1598 return err; 1599 wq->db = &wq->db[MLX5_SND_DBR]; 1600 1601 err = mlx5e_alloc_icosq_db(sq, cpu_to_node(c->cpu)); 1602 if (err) 1603 goto err_sq_wq_destroy; 1604 1605 INIT_WORK(&sq->recover_work, recover_work_func); 1606 1607 return 0; 1608 1609 err_sq_wq_destroy: 1610 mlx5_wq_destroy(&sq->wq_ctrl); 1611 1612 return err; 1613 } 1614 1615 static void mlx5e_free_icosq(struct mlx5e_icosq *sq) 1616 { 1617 mlx5e_free_icosq_db(sq); 1618 mlx5_wq_destroy(&sq->wq_ctrl); 1619 } 1620 1621 void mlx5e_free_txqsq_db(struct mlx5e_txqsq *sq) 1622 { 1623 kvfree(sq->db.wqe_info); 1624 kvfree(sq->db.skb_fifo.fifo); 1625 kvfree(sq->db.dma_fifo); 1626 } 1627 1628 int mlx5e_alloc_txqsq_db(struct mlx5e_txqsq *sq, int numa) 1629 { 1630 int wq_sz = mlx5_wq_cyc_get_size(&sq->wq); 1631 int df_sz = wq_sz * MLX5_SEND_WQEBB_NUM_DS; 1632 1633 sq->db.dma_fifo = kvzalloc_node(array_size(df_sz, 1634 sizeof(*sq->db.dma_fifo)), 1635 GFP_KERNEL, numa); 1636 sq->db.skb_fifo.fifo = kvzalloc_node(array_size(df_sz, 1637 sizeof(*sq->db.skb_fifo.fifo)), 1638 GFP_KERNEL, numa); 1639 sq->db.wqe_info = kvzalloc_node(array_size(wq_sz, 1640 sizeof(*sq->db.wqe_info)), 1641 GFP_KERNEL, numa); 1642 if (!sq->db.dma_fifo || !sq->db.skb_fifo.fifo || !sq->db.wqe_info) { 1643 mlx5e_free_txqsq_db(sq); 1644 return -ENOMEM; 1645 } 1646 1647 sq->dma_fifo_mask = df_sz - 1; 1648 1649 sq->db.skb_fifo.pc = &sq->skb_fifo_pc; 1650 sq->db.skb_fifo.cc = &sq->skb_fifo_cc; 1651 sq->db.skb_fifo.mask = df_sz - 1; 1652 1653 return 0; 1654 } 1655 1656 static int mlx5e_alloc_txqsq(struct mlx5e_channel *c, 1657 int txq_ix, 1658 struct mlx5e_params *params, 1659 struct mlx5e_sq_param *param, 1660 struct mlx5e_txqsq *sq, 1661 int tc) 1662 { 1663 void *sqc_wq = MLX5_ADDR_OF(sqc, param->sqc, wq); 1664 struct mlx5_core_dev *mdev = c->mdev; 1665 struct mlx5_wq_cyc *wq = &sq->wq; 1666 int err; 1667 1668 sq->pdev = c->pdev; 1669 sq->clock = mdev->clock; 1670 sq->mkey_be = c->mkey_be; 1671 sq->netdev = c->netdev; 1672 sq->mdev = c->mdev; 1673 sq->channel = c; 1674 sq->priv = c->priv; 1675 sq->ch_ix = c->ix; 1676 sq->txq_ix = txq_ix; 1677 sq->uar_map = c->bfreg->map; 1678 sq->min_inline_mode = params->tx_min_inline_mode; 1679 sq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); 1680 sq->max_sq_mpw_wqebbs = mlx5e_get_max_sq_aligned_wqebbs(mdev); 1681 INIT_WORK(&sq->recover_work, mlx5e_tx_err_cqe_work); 1682 if (mlx5_ipsec_device_caps(c->priv->mdev)) 1683 set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state); 1684 if (param->is_mpw) 1685 set_bit(MLX5E_SQ_STATE_MPWQE, &sq->state); 1686 sq->stop_room = param->stop_room; 1687 sq->ptp_cyc2time = mlx5_sq_ts_translator(mdev); 1688 1689 param->wq.db_numa_node = cpu_to_node(c->cpu); 1690 err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, wq, &sq->wq_ctrl); 1691 if (err) 1692 return err; 1693 wq->db = &wq->db[MLX5_SND_DBR]; 1694 1695 err = mlx5e_alloc_txqsq_db(sq, cpu_to_node(c->cpu)); 1696 if (err) 1697 goto err_sq_wq_destroy; 1698 1699 return 0; 1700 1701 err_sq_wq_destroy: 1702 mlx5_wq_destroy(&sq->wq_ctrl); 1703 1704 return err; 1705 } 1706 1707 void mlx5e_free_txqsq(struct mlx5e_txqsq *sq) 1708 { 1709 kvfree(sq->dim); 1710 mlx5e_free_txqsq_db(sq); 1711 mlx5_wq_destroy(&sq->wq_ctrl); 1712 } 1713 1714 static int mlx5e_create_sq(struct mlx5_core_dev *mdev, 1715 struct mlx5e_sq_param *param, 1716 struct mlx5e_create_sq_param *csp, 1717 u32 *sqn) 1718 { 1719 u8 ts_format; 1720 void *in; 1721 void *sqc; 1722 void *wq; 1723 int inlen; 1724 int err; 1725 1726 inlen = MLX5_ST_SZ_BYTES(create_sq_in) + 1727 sizeof(u64) * csp->wq_ctrl->buf.npages; 1728 in = kvzalloc(inlen, GFP_KERNEL); 1729 if (!in) 1730 return -ENOMEM; 1731 1732 ts_format = mlx5_is_real_time_sq(mdev) ? 1733 MLX5_TIMESTAMP_FORMAT_REAL_TIME : 1734 MLX5_TIMESTAMP_FORMAT_FREE_RUNNING; 1735 sqc = MLX5_ADDR_OF(create_sq_in, in, ctx); 1736 wq = MLX5_ADDR_OF(sqc, sqc, wq); 1737 1738 memcpy(sqc, param->sqc, sizeof(param->sqc)); 1739 MLX5_SET(sqc, sqc, tis_lst_sz, csp->tis_lst_sz); 1740 MLX5_SET(sqc, sqc, tis_num_0, csp->tisn); 1741 MLX5_SET(sqc, sqc, cqn, csp->cqn); 1742 MLX5_SET(sqc, sqc, ts_cqe_to_dest_cqn, csp->ts_cqe_to_dest_cqn); 1743 MLX5_SET(sqc, sqc, ts_format, ts_format); 1744 1745 1746 if (MLX5_CAP_ETH(mdev, wqe_inline_mode) == MLX5_CAP_INLINE_MODE_VPORT_CONTEXT) 1747 MLX5_SET(sqc, sqc, min_wqe_inline_mode, csp->min_inline_mode); 1748 1749 MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST); 1750 MLX5_SET(sqc, sqc, flush_in_error_en, 1); 1751 1752 MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC); 1753 MLX5_SET(wq, wq, uar_page, csp->uar_page); 1754 MLX5_SET(wq, wq, log_wq_pg_sz, csp->wq_ctrl->buf.page_shift - 1755 MLX5_ADAPTER_PAGE_SHIFT); 1756 MLX5_SET64(wq, wq, dbr_addr, csp->wq_ctrl->db.dma); 1757 1758 mlx5_fill_page_frag_array(&csp->wq_ctrl->buf, 1759 (__be64 *)MLX5_ADDR_OF(wq, wq, pas)); 1760 1761 err = mlx5_core_create_sq(mdev, in, inlen, sqn); 1762 1763 kvfree(in); 1764 1765 return err; 1766 } 1767 1768 int mlx5e_modify_sq(struct mlx5_core_dev *mdev, u32 sqn, 1769 struct mlx5e_modify_sq_param *p) 1770 { 1771 u64 bitmask = 0; 1772 void *in; 1773 void *sqc; 1774 int inlen; 1775 int err; 1776 1777 inlen = MLX5_ST_SZ_BYTES(modify_sq_in); 1778 in = kvzalloc(inlen, GFP_KERNEL); 1779 if (!in) 1780 return -ENOMEM; 1781 1782 sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx); 1783 1784 MLX5_SET(modify_sq_in, in, sq_state, p->curr_state); 1785 MLX5_SET(sqc, sqc, state, p->next_state); 1786 if (p->rl_update && p->next_state == MLX5_SQC_STATE_RDY) { 1787 bitmask |= 1; 1788 MLX5_SET(sqc, sqc, packet_pacing_rate_limit_index, p->rl_index); 1789 } 1790 if (p->qos_update && p->next_state == MLX5_SQC_STATE_RDY) { 1791 bitmask |= 1 << 2; 1792 MLX5_SET(sqc, sqc, qos_queue_group_id, p->qos_queue_group_id); 1793 } 1794 MLX5_SET64(modify_sq_in, in, modify_bitmask, bitmask); 1795 1796 err = mlx5_core_modify_sq(mdev, sqn, in); 1797 1798 kvfree(in); 1799 1800 return err; 1801 } 1802 1803 static void mlx5e_destroy_sq(struct mlx5_core_dev *mdev, u32 sqn) 1804 { 1805 mlx5_core_destroy_sq(mdev, sqn); 1806 } 1807 1808 int mlx5e_create_sq_rdy(struct mlx5_core_dev *mdev, 1809 struct mlx5e_sq_param *param, 1810 struct mlx5e_create_sq_param *csp, 1811 u16 qos_queue_group_id, 1812 u32 *sqn) 1813 { 1814 struct mlx5e_modify_sq_param msp = {0}; 1815 int err; 1816 1817 err = mlx5e_create_sq(mdev, param, csp, sqn); 1818 if (err) 1819 return err; 1820 1821 msp.curr_state = MLX5_SQC_STATE_RST; 1822 msp.next_state = MLX5_SQC_STATE_RDY; 1823 if (qos_queue_group_id) { 1824 msp.qos_update = true; 1825 msp.qos_queue_group_id = qos_queue_group_id; 1826 } 1827 err = mlx5e_modify_sq(mdev, *sqn, &msp); 1828 if (err) 1829 mlx5e_destroy_sq(mdev, *sqn); 1830 1831 return err; 1832 } 1833 1834 static int mlx5e_set_sq_maxrate(struct net_device *dev, 1835 struct mlx5e_txqsq *sq, u32 rate); 1836 1837 int mlx5e_open_txqsq(struct mlx5e_channel *c, u32 tisn, int txq_ix, 1838 struct mlx5e_params *params, struct mlx5e_sq_param *param, 1839 struct mlx5e_txqsq *sq, int tc, u16 qos_queue_group_id, 1840 struct mlx5e_sq_stats *sq_stats) 1841 { 1842 struct mlx5e_create_sq_param csp = {}; 1843 u32 tx_rate; 1844 int err; 1845 1846 err = mlx5e_alloc_txqsq(c, txq_ix, params, param, sq, tc); 1847 if (err) 1848 return err; 1849 1850 sq->stats = sq_stats; 1851 1852 csp.tisn = tisn; 1853 csp.tis_lst_sz = 1; 1854 csp.cqn = sq->cq.mcq.cqn; 1855 csp.wq_ctrl = &sq->wq_ctrl; 1856 csp.min_inline_mode = sq->min_inline_mode; 1857 csp.uar_page = c->bfreg->index; 1858 err = mlx5e_create_sq_rdy(c->mdev, param, &csp, qos_queue_group_id, &sq->sqn); 1859 if (err) 1860 goto err_free_txqsq; 1861 1862 tx_rate = c->priv->tx_rates[sq->txq_ix]; 1863 if (tx_rate) 1864 mlx5e_set_sq_maxrate(c->netdev, sq, tx_rate); 1865 1866 if (sq->channel && !params->tx_dim_enabled) { 1867 sq->channel->tx_cq_moder = params->tx_cq_moderation; 1868 } else if (sq->channel) { 1869 u8 cq_period_mode; 1870 1871 cq_period_mode = params->tx_moder_use_cqe_mode ? 1872 DIM_CQ_PERIOD_MODE_START_FROM_CQE : 1873 DIM_CQ_PERIOD_MODE_START_FROM_EQE; 1874 mlx5e_reset_tx_moderation(&sq->channel->tx_cq_moder, 1875 cq_period_mode, 1876 params->tx_dim_enabled); 1877 1878 err = mlx5e_dim_tx_change(sq, params->tx_dim_enabled); 1879 if (err) 1880 goto err_destroy_sq; 1881 } 1882 1883 return 0; 1884 1885 err_destroy_sq: 1886 mlx5e_destroy_sq(c->mdev, sq->sqn); 1887 err_free_txqsq: 1888 mlx5e_free_txqsq(sq); 1889 1890 return err; 1891 } 1892 1893 void mlx5e_activate_txqsq(struct mlx5e_txqsq *sq) 1894 { 1895 sq->txq = netdev_get_tx_queue(sq->netdev, sq->txq_ix); 1896 set_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); 1897 netdev_tx_reset_queue(sq->txq); 1898 netif_tx_start_queue(sq->txq); 1899 netif_queue_set_napi(sq->netdev, sq->txq_ix, NETDEV_QUEUE_TYPE_TX, sq->cq.napi); 1900 } 1901 1902 void mlx5e_tx_disable_queue(struct netdev_queue *txq) 1903 { 1904 __netif_tx_lock_bh(txq); 1905 netif_tx_stop_queue(txq); 1906 __netif_tx_unlock_bh(txq); 1907 } 1908 1909 void mlx5e_deactivate_txqsq(struct mlx5e_txqsq *sq) 1910 { 1911 struct mlx5_wq_cyc *wq = &sq->wq; 1912 1913 netif_queue_set_napi(sq->netdev, sq->txq_ix, NETDEV_QUEUE_TYPE_TX, NULL); 1914 clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); 1915 synchronize_net(); /* Sync with NAPI to prevent netif_tx_wake_queue. */ 1916 1917 mlx5e_tx_disable_queue(sq->txq); 1918 1919 /* last doorbell out, godspeed .. */ 1920 if (mlx5e_wqc_has_room_for(wq, sq->cc, sq->pc, 1)) { 1921 u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); 1922 struct mlx5e_tx_wqe *nop; 1923 1924 sq->db.wqe_info[pi] = (struct mlx5e_tx_wqe_info) { 1925 .num_wqebbs = 1, 1926 }; 1927 1928 nop = mlx5e_post_nop(wq, sq->sqn, &sq->pc); 1929 mlx5e_notify_hw(wq, sq->pc, sq->uar_map, &nop->ctrl); 1930 } 1931 } 1932 1933 void mlx5e_close_txqsq(struct mlx5e_txqsq *sq) 1934 { 1935 struct mlx5_core_dev *mdev = sq->mdev; 1936 struct mlx5_rate_limit rl = {0}; 1937 1938 if (sq->dim) 1939 cancel_work_sync(&sq->dim->work); 1940 cancel_work_sync(&sq->recover_work); 1941 mlx5e_destroy_sq(mdev, sq->sqn); 1942 if (sq->rate_limit) { 1943 rl.rate = sq->rate_limit; 1944 mlx5_rl_remove_rate(mdev, &rl); 1945 } 1946 mlx5e_free_txqsq_descs(sq); 1947 mlx5e_free_txqsq(sq); 1948 } 1949 1950 void mlx5e_tx_err_cqe_work(struct work_struct *recover_work) 1951 { 1952 struct mlx5e_txqsq *sq = container_of(recover_work, struct mlx5e_txqsq, 1953 recover_work); 1954 1955 /* Recovering queues means re-enabling NAPI, which requires the netdev 1956 * instance lock. However, SQ closing flows have to wait for work tasks 1957 * to finish while also holding the netdev instance lock. So either get 1958 * the lock or find that the SQ is no longer enabled and thus this work 1959 * is not relevant anymore. 1960 */ 1961 while (!netdev_trylock(sq->netdev)) { 1962 if (!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state)) 1963 return; 1964 msleep(20); 1965 } 1966 1967 mlx5e_reporter_tx_err_cqe(sq); 1968 netdev_unlock(sq->netdev); 1969 } 1970 1971 static struct dim_cq_moder mlx5e_get_def_tx_moderation(u8 cq_period_mode) 1972 { 1973 return (struct dim_cq_moder) { 1974 .cq_period_mode = cq_period_mode, 1975 .pkts = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS, 1976 .usec = cq_period_mode == DIM_CQ_PERIOD_MODE_START_FROM_CQE ? 1977 MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC_FROM_CQE : 1978 MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC, 1979 }; 1980 } 1981 1982 bool mlx5e_reset_tx_moderation(struct dim_cq_moder *cq_moder, u8 cq_period_mode, 1983 bool dim_enabled) 1984 { 1985 bool reset_needed = cq_moder->cq_period_mode != cq_period_mode; 1986 1987 if (dim_enabled) 1988 *cq_moder = net_dim_get_def_tx_moderation(cq_period_mode); 1989 else 1990 *cq_moder = mlx5e_get_def_tx_moderation(cq_period_mode); 1991 1992 return reset_needed; 1993 } 1994 1995 bool mlx5e_reset_tx_channels_moderation(struct mlx5e_channels *chs, u8 cq_period_mode, 1996 bool dim_enabled, bool keep_dim_state) 1997 { 1998 bool reset = false; 1999 int i, tc; 2000 2001 for (i = 0; i < chs->num; i++) { 2002 for (tc = 0; tc < mlx5e_get_dcb_num_tc(&chs->params); tc++) { 2003 if (keep_dim_state) 2004 dim_enabled = !!chs->c[i]->sq[tc].dim; 2005 2006 reset |= mlx5e_reset_tx_moderation(&chs->c[i]->tx_cq_moder, 2007 cq_period_mode, dim_enabled); 2008 } 2009 } 2010 2011 return reset; 2012 } 2013 2014 static int mlx5e_open_icosq(struct mlx5e_channel *c, struct mlx5e_params *params, 2015 struct mlx5e_sq_param *param, struct mlx5e_icosq *sq, 2016 work_func_t recover_work_func) 2017 { 2018 struct mlx5e_create_sq_param csp = {}; 2019 int err; 2020 2021 err = mlx5e_alloc_icosq(c, param, sq, recover_work_func); 2022 if (err) 2023 return err; 2024 2025 csp.cqn = sq->cq.mcq.cqn; 2026 csp.wq_ctrl = &sq->wq_ctrl; 2027 csp.min_inline_mode = params->tx_min_inline_mode; 2028 csp.uar_page = c->bfreg->index; 2029 err = mlx5e_create_sq_rdy(c->mdev, param, &csp, 0, &sq->sqn); 2030 if (err) 2031 goto err_free_icosq; 2032 2033 spin_lock_init(&sq->lock); 2034 2035 if (param->is_tls) { 2036 sq->ktls_resync = mlx5e_ktls_rx_resync_create_resp_list(); 2037 if (IS_ERR(sq->ktls_resync)) { 2038 err = PTR_ERR(sq->ktls_resync); 2039 goto err_destroy_icosq; 2040 } 2041 } 2042 return 0; 2043 2044 err_destroy_icosq: 2045 mlx5e_destroy_sq(c->mdev, sq->sqn); 2046 err_free_icosq: 2047 mlx5e_free_icosq(sq); 2048 2049 return err; 2050 } 2051 2052 void mlx5e_activate_icosq(struct mlx5e_icosq *icosq) 2053 { 2054 set_bit(MLX5E_SQ_STATE_ENABLED, &icosq->state); 2055 } 2056 2057 void mlx5e_deactivate_icosq(struct mlx5e_icosq *icosq) 2058 { 2059 clear_bit(MLX5E_SQ_STATE_ENABLED, &icosq->state); 2060 synchronize_net(); /* Sync with NAPI. */ 2061 } 2062 2063 static void mlx5e_close_icosq(struct mlx5e_icosq *sq) 2064 { 2065 struct mlx5e_channel *c = sq->channel; 2066 2067 if (sq->ktls_resync) 2068 mlx5e_ktls_rx_resync_destroy_resp_list(sq->ktls_resync); 2069 mlx5e_destroy_sq(c->mdev, sq->sqn); 2070 mlx5e_free_icosq_descs(sq); 2071 mlx5e_free_icosq(sq); 2072 } 2073 2074 int mlx5e_open_xdpsq(struct mlx5e_channel *c, struct mlx5e_params *params, 2075 struct mlx5e_sq_param *param, struct xsk_buff_pool *xsk_pool, 2076 struct mlx5e_xdpsq *sq, bool is_redirect) 2077 { 2078 struct mlx5e_create_sq_param csp = {}; 2079 int err; 2080 2081 err = mlx5e_alloc_xdpsq(c, params, xsk_pool, param, sq, is_redirect); 2082 if (err) 2083 return err; 2084 2085 csp.tis_lst_sz = 1; 2086 csp.tisn = mlx5e_profile_get_tisn(c->mdev, c->priv, c->priv->profile, 2087 c->lag_port, 0); /* tc = 0 */ 2088 csp.cqn = sq->cq.mcq.cqn; 2089 csp.wq_ctrl = &sq->wq_ctrl; 2090 csp.min_inline_mode = sq->min_inline_mode; 2091 csp.uar_page = c->bfreg->index; 2092 set_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); 2093 2094 err = mlx5e_create_sq_rdy(c->mdev, param, &csp, 0, &sq->sqn); 2095 if (err) 2096 goto err_free_xdpsq; 2097 2098 mlx5e_set_xmit_fp(sq, param->is_mpw); 2099 2100 return 0; 2101 2102 err_free_xdpsq: 2103 clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); 2104 mlx5e_free_xdpsq(sq); 2105 2106 return err; 2107 } 2108 2109 void mlx5e_close_xdpsq(struct mlx5e_xdpsq *sq) 2110 { 2111 struct mlx5e_channel *c = sq->channel; 2112 2113 clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); 2114 synchronize_net(); /* Sync with NAPI. */ 2115 2116 mlx5e_destroy_sq(c->mdev, sq->sqn); 2117 mlx5e_free_xdpsq_descs(sq); 2118 mlx5e_free_xdpsq(sq); 2119 } 2120 2121 static struct mlx5e_xdpsq *mlx5e_open_xdpredirect_sq(struct mlx5e_channel *c, 2122 struct mlx5e_params *params, 2123 struct mlx5e_channel_param *cparam, 2124 struct mlx5e_create_cq_param *ccp) 2125 { 2126 struct mlx5e_xdpsq *xdpsq; 2127 int err; 2128 2129 xdpsq = kvzalloc_node(sizeof(*xdpsq), GFP_KERNEL, cpu_to_node(c->cpu)); 2130 if (!xdpsq) 2131 return ERR_PTR(-ENOMEM); 2132 2133 err = mlx5e_open_cq(c->mdev, params->tx_cq_moderation, 2134 &cparam->xdp_sq.cqp, ccp, &xdpsq->cq); 2135 if (err) 2136 goto err_free_xdpsq; 2137 2138 err = mlx5e_open_xdpsq(c, params, &cparam->xdp_sq, NULL, xdpsq, true); 2139 if (err) 2140 goto err_close_xdpsq_cq; 2141 2142 return xdpsq; 2143 2144 err_close_xdpsq_cq: 2145 mlx5e_close_cq(&xdpsq->cq); 2146 err_free_xdpsq: 2147 kvfree(xdpsq); 2148 2149 return ERR_PTR(err); 2150 } 2151 2152 static void mlx5e_close_xdpredirect_sq(struct mlx5e_xdpsq *xdpsq) 2153 { 2154 mlx5e_close_xdpsq(xdpsq); 2155 mlx5e_close_cq(&xdpsq->cq); 2156 kvfree(xdpsq); 2157 } 2158 2159 static int mlx5e_alloc_cq_common(struct mlx5_core_dev *mdev, 2160 struct net_device *netdev, 2161 struct workqueue_struct *workqueue, 2162 struct mlx5_uars_page *uar, 2163 struct mlx5e_cq_param *param, 2164 struct mlx5e_cq *cq) 2165 { 2166 struct mlx5_core_cq *mcq = &cq->mcq; 2167 int err; 2168 u32 i; 2169 2170 err = mlx5_cqwq_create(mdev, ¶m->wq, param->cqc, &cq->wq, 2171 &cq->wq_ctrl); 2172 if (err) 2173 return err; 2174 2175 mcq->cqe_sz = 64; 2176 mcq->set_ci_db = cq->wq_ctrl.db.db; 2177 mcq->arm_db = cq->wq_ctrl.db.db + 1; 2178 *mcq->set_ci_db = 0; 2179 mcq->vector = param->eq_ix; 2180 mcq->comp = mlx5e_completion_event; 2181 mcq->event = mlx5e_cq_error_event; 2182 2183 for (i = 0; i < mlx5_cqwq_get_size(&cq->wq); i++) { 2184 struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(&cq->wq, i); 2185 2186 cqe->op_own = 0xf1; 2187 cqe->validity_iteration_count = 0xff; 2188 } 2189 2190 cq->mdev = mdev; 2191 cq->netdev = netdev; 2192 cq->workqueue = workqueue; 2193 cq->uar = uar; 2194 2195 return 0; 2196 } 2197 2198 static int mlx5e_alloc_cq(struct mlx5_core_dev *mdev, 2199 struct mlx5e_cq_param *param, 2200 struct mlx5e_create_cq_param *ccp, 2201 struct mlx5e_cq *cq) 2202 { 2203 int err; 2204 2205 param->wq.buf_numa_node = ccp->node; 2206 param->wq.db_numa_node = ccp->node; 2207 param->eq_ix = ccp->ix; 2208 2209 err = mlx5e_alloc_cq_common(mdev, ccp->netdev, ccp->wq, 2210 ccp->uar, param, cq); 2211 2212 cq->napi = ccp->napi; 2213 cq->ch_stats = ccp->ch_stats; 2214 2215 return err; 2216 } 2217 2218 static void mlx5e_free_cq(struct mlx5e_cq *cq) 2219 { 2220 mlx5_wq_destroy(&cq->wq_ctrl); 2221 } 2222 2223 static int mlx5e_create_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param) 2224 { 2225 u32 out[MLX5_ST_SZ_DW(create_cq_out)]; 2226 struct mlx5_core_dev *mdev = cq->mdev; 2227 struct mlx5_core_cq *mcq = &cq->mcq; 2228 2229 void *in; 2230 void *cqc; 2231 int inlen; 2232 int eqn; 2233 int err; 2234 2235 err = mlx5_comp_eqn_get(mdev, param->eq_ix, &eqn); 2236 if (err) 2237 return err; 2238 2239 inlen = MLX5_ST_SZ_BYTES(create_cq_in) + 2240 sizeof(u64) * cq->wq_ctrl.buf.npages; 2241 in = kvzalloc(inlen, GFP_KERNEL); 2242 if (!in) 2243 return -ENOMEM; 2244 2245 cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context); 2246 2247 memcpy(cqc, param->cqc, sizeof(param->cqc)); 2248 2249 mlx5_fill_page_frag_array(&cq->wq_ctrl.buf, 2250 (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas)); 2251 2252 MLX5_SET(cqc, cqc, cq_period_mode, mlx5e_cq_period_mode(param->cq_period_mode)); 2253 2254 MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn); 2255 MLX5_SET(cqc, cqc, uar_page, cq->uar->index); 2256 MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift - 2257 MLX5_ADAPTER_PAGE_SHIFT); 2258 MLX5_SET64(cqc, cqc, dbr_addr, cq->wq_ctrl.db.dma); 2259 2260 err = mlx5_core_create_cq(mdev, mcq, in, inlen, out, sizeof(out)); 2261 2262 kvfree(in); 2263 2264 if (err) 2265 return err; 2266 2267 mlx5e_cq_arm(cq); 2268 2269 return 0; 2270 } 2271 2272 static void mlx5e_destroy_cq(struct mlx5e_cq *cq) 2273 { 2274 mlx5_core_destroy_cq(cq->mdev, &cq->mcq); 2275 } 2276 2277 int mlx5e_open_cq(struct mlx5_core_dev *mdev, struct dim_cq_moder moder, 2278 struct mlx5e_cq_param *param, struct mlx5e_create_cq_param *ccp, 2279 struct mlx5e_cq *cq) 2280 { 2281 int err; 2282 2283 err = mlx5e_alloc_cq(mdev, param, ccp, cq); 2284 if (err) 2285 return err; 2286 2287 err = mlx5e_create_cq(cq, param); 2288 if (err) 2289 goto err_free_cq; 2290 2291 if (MLX5_CAP_GEN(mdev, cq_moderation) && 2292 MLX5_CAP_GEN(mdev, cq_period_mode_modify)) 2293 mlx5e_modify_cq_moderation(mdev, &cq->mcq, moder.usec, moder.pkts, 2294 mlx5e_cq_period_mode(moder.cq_period_mode)); 2295 return 0; 2296 2297 err_free_cq: 2298 mlx5e_free_cq(cq); 2299 2300 return err; 2301 } 2302 2303 void mlx5e_close_cq(struct mlx5e_cq *cq) 2304 { 2305 mlx5e_destroy_cq(cq); 2306 mlx5e_free_cq(cq); 2307 } 2308 2309 int mlx5e_modify_cq_period_mode(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, 2310 u8 cq_period_mode) 2311 { 2312 u32 in[MLX5_ST_SZ_DW(modify_cq_in)] = {}; 2313 void *cqc; 2314 2315 MLX5_SET(modify_cq_in, in, cqn, cq->cqn); 2316 cqc = MLX5_ADDR_OF(modify_cq_in, in, cq_context); 2317 MLX5_SET(cqc, cqc, cq_period_mode, mlx5e_cq_period_mode(cq_period_mode)); 2318 MLX5_SET(modify_cq_in, in, 2319 modify_field_select_resize_field_select.modify_field_select.modify_field_select, 2320 MLX5_CQ_MODIFY_PERIOD_MODE); 2321 2322 return mlx5_core_modify_cq(dev, cq, in, sizeof(in)); 2323 } 2324 2325 int mlx5e_modify_cq_moderation(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, 2326 u16 cq_period, u16 cq_max_count, u8 cq_period_mode) 2327 { 2328 u32 in[MLX5_ST_SZ_DW(modify_cq_in)] = {}; 2329 void *cqc; 2330 2331 MLX5_SET(modify_cq_in, in, cqn, cq->cqn); 2332 cqc = MLX5_ADDR_OF(modify_cq_in, in, cq_context); 2333 MLX5_SET(cqc, cqc, cq_period, cq_period); 2334 MLX5_SET(cqc, cqc, cq_max_count, cq_max_count); 2335 MLX5_SET(cqc, cqc, cq_period_mode, cq_period_mode); 2336 MLX5_SET(modify_cq_in, in, 2337 modify_field_select_resize_field_select.modify_field_select.modify_field_select, 2338 MLX5_CQ_MODIFY_PERIOD | MLX5_CQ_MODIFY_COUNT | MLX5_CQ_MODIFY_PERIOD_MODE); 2339 2340 return mlx5_core_modify_cq(dev, cq, in, sizeof(in)); 2341 } 2342 2343 static int mlx5e_open_tx_cqs(struct mlx5e_channel *c, 2344 struct mlx5e_params *params, 2345 struct mlx5e_create_cq_param *ccp, 2346 struct mlx5e_channel_param *cparam) 2347 { 2348 int err; 2349 int tc; 2350 2351 for (tc = 0; tc < c->num_tc; tc++) { 2352 err = mlx5e_open_cq(c->mdev, params->tx_cq_moderation, &cparam->txq_sq.cqp, 2353 ccp, &c->sq[tc].cq); 2354 if (err) 2355 goto err_close_tx_cqs; 2356 } 2357 2358 return 0; 2359 2360 err_close_tx_cqs: 2361 for (tc--; tc >= 0; tc--) 2362 mlx5e_close_cq(&c->sq[tc].cq); 2363 2364 return err; 2365 } 2366 2367 static void mlx5e_close_tx_cqs(struct mlx5e_channel *c) 2368 { 2369 int tc; 2370 2371 for (tc = 0; tc < c->num_tc; tc++) 2372 mlx5e_close_cq(&c->sq[tc].cq); 2373 } 2374 2375 static int mlx5e_mqprio_txq_to_tc(struct netdev_tc_txq *tc_to_txq, unsigned int txq) 2376 { 2377 int tc; 2378 2379 for (tc = 0; tc < TC_MAX_QUEUE; tc++) 2380 if (txq - tc_to_txq[tc].offset < tc_to_txq[tc].count) 2381 return tc; 2382 2383 WARN(1, "Unexpected TCs configuration. No match found for txq %u", txq); 2384 return -ENOENT; 2385 } 2386 2387 static int mlx5e_txq_get_qos_node_hw_id(struct mlx5e_params *params, int txq_ix, 2388 u32 *hw_id) 2389 { 2390 int tc; 2391 2392 if (params->mqprio.mode != TC_MQPRIO_MODE_CHANNEL) { 2393 *hw_id = 0; 2394 return 0; 2395 } 2396 2397 tc = mlx5e_mqprio_txq_to_tc(params->mqprio.tc_to_txq, txq_ix); 2398 if (tc < 0) 2399 return tc; 2400 2401 if (tc >= params->mqprio.num_tc) { 2402 WARN(1, "Unexpected TCs configuration. tc %d is out of range of %u", 2403 tc, params->mqprio.num_tc); 2404 return -EINVAL; 2405 } 2406 2407 *hw_id = params->mqprio.channel.hw_id[tc]; 2408 return 0; 2409 } 2410 2411 static int mlx5e_open_sqs(struct mlx5e_channel *c, 2412 struct mlx5e_params *params, 2413 struct mlx5e_channel_param *cparam) 2414 { 2415 int err, tc; 2416 2417 for (tc = 0; tc < mlx5e_get_dcb_num_tc(params); tc++) { 2418 int txq_ix = c->ix + tc * params->num_channels; 2419 u32 qos_queue_group_id; 2420 u32 tisn; 2421 2422 tisn = mlx5e_profile_get_tisn(c->mdev, c->priv, c->priv->profile, 2423 c->lag_port, tc); 2424 err = mlx5e_txq_get_qos_node_hw_id(params, txq_ix, &qos_queue_group_id); 2425 if (err) 2426 goto err_close_sqs; 2427 2428 err = mlx5e_open_txqsq(c, tisn, txq_ix, 2429 params, &cparam->txq_sq, &c->sq[tc], tc, 2430 qos_queue_group_id, 2431 &c->priv->channel_stats[c->ix]->sq[tc]); 2432 if (err) 2433 goto err_close_sqs; 2434 } 2435 2436 return 0; 2437 2438 err_close_sqs: 2439 for (tc--; tc >= 0; tc--) 2440 mlx5e_close_txqsq(&c->sq[tc]); 2441 2442 return err; 2443 } 2444 2445 static void mlx5e_close_sqs(struct mlx5e_channel *c) 2446 { 2447 int tc; 2448 2449 for (tc = 0; tc < c->num_tc; tc++) 2450 mlx5e_close_txqsq(&c->sq[tc]); 2451 } 2452 2453 static int mlx5e_set_sq_maxrate(struct net_device *dev, 2454 struct mlx5e_txqsq *sq, u32 rate) 2455 { 2456 struct mlx5e_priv *priv = netdev_priv(dev); 2457 struct mlx5_core_dev *mdev = priv->mdev; 2458 struct mlx5e_modify_sq_param msp = {0}; 2459 struct mlx5_rate_limit rl = {0}; 2460 u16 rl_index = 0; 2461 int err; 2462 2463 if (rate == sq->rate_limit) 2464 /* nothing to do */ 2465 return 0; 2466 2467 if (sq->rate_limit) { 2468 rl.rate = sq->rate_limit; 2469 /* remove current rl index to free space to next ones */ 2470 mlx5_rl_remove_rate(mdev, &rl); 2471 } 2472 2473 sq->rate_limit = 0; 2474 2475 if (rate) { 2476 rl.rate = rate; 2477 err = mlx5_rl_add_rate(mdev, &rl_index, &rl); 2478 if (err) { 2479 netdev_err(dev, "Failed configuring rate %u: %d\n", 2480 rate, err); 2481 return err; 2482 } 2483 } 2484 2485 msp.curr_state = MLX5_SQC_STATE_RDY; 2486 msp.next_state = MLX5_SQC_STATE_RDY; 2487 msp.rl_index = rl_index; 2488 msp.rl_update = true; 2489 err = mlx5e_modify_sq(mdev, sq->sqn, &msp); 2490 if (err) { 2491 netdev_err(dev, "Failed configuring rate %u: %d\n", 2492 rate, err); 2493 /* remove the rate from the table */ 2494 if (rate) 2495 mlx5_rl_remove_rate(mdev, &rl); 2496 return err; 2497 } 2498 2499 sq->rate_limit = rate; 2500 return 0; 2501 } 2502 2503 static int mlx5e_set_tx_maxrate(struct net_device *dev, int index, u32 rate) 2504 { 2505 struct mlx5e_priv *priv = netdev_priv(dev); 2506 struct mlx5_core_dev *mdev = priv->mdev; 2507 struct mlx5e_txqsq *sq = priv->txq2sq[index]; 2508 int err = 0; 2509 2510 if (!mlx5_rl_is_supported(mdev)) { 2511 netdev_err(dev, "Rate limiting is not supported on this device\n"); 2512 return -EINVAL; 2513 } 2514 2515 /* rate is given in Mb/sec, HW config is in Kb/sec */ 2516 rate = rate << 10; 2517 2518 /* Check whether rate in valid range, 0 is always valid */ 2519 if (rate && !mlx5_rl_is_in_range(mdev, rate)) { 2520 netdev_err(dev, "TX rate %u, is not in range\n", rate); 2521 return -ERANGE; 2522 } 2523 2524 mutex_lock(&priv->state_lock); 2525 if (test_bit(MLX5E_STATE_OPENED, &priv->state)) 2526 err = mlx5e_set_sq_maxrate(dev, sq, rate); 2527 if (!err) 2528 priv->tx_rates[index] = rate; 2529 mutex_unlock(&priv->state_lock); 2530 2531 return err; 2532 } 2533 2534 static int mlx5e_open_rxq_rq(struct mlx5e_channel *c, struct mlx5e_params *params, 2535 struct mlx5e_rq_param *rq_params) 2536 { 2537 u16 q_counter = c->priv->q_counter[c->sd_ix]; 2538 int err; 2539 2540 err = mlx5e_init_rxq_rq(c, params, rq_params->xdp_frag_size, &c->rq); 2541 if (err) 2542 return err; 2543 2544 return mlx5e_open_rq(params, rq_params, NULL, cpu_to_node(c->cpu), q_counter, &c->rq); 2545 } 2546 2547 static struct mlx5e_icosq * 2548 mlx5e_open_async_icosq(struct mlx5e_channel *c, 2549 struct mlx5e_params *params, 2550 struct mlx5e_channel_param *cparam, 2551 struct mlx5e_create_cq_param *ccp) 2552 { 2553 struct dim_cq_moder icocq_moder = {0, 0}; 2554 struct mlx5e_icosq *async_icosq; 2555 int err; 2556 2557 async_icosq = kvzalloc_node(sizeof(*async_icosq), GFP_KERNEL, 2558 cpu_to_node(c->cpu)); 2559 if (!async_icosq) 2560 return ERR_PTR(-ENOMEM); 2561 2562 err = mlx5e_open_cq(c->mdev, icocq_moder, &cparam->async_icosq.cqp, ccp, 2563 &async_icosq->cq); 2564 if (err) 2565 goto err_free_async_icosq; 2566 2567 err = mlx5e_open_icosq(c, params, &cparam->async_icosq, async_icosq, 2568 mlx5e_async_icosq_err_cqe_work); 2569 if (err) 2570 goto err_close_async_icosq_cq; 2571 2572 return async_icosq; 2573 2574 err_close_async_icosq_cq: 2575 mlx5e_close_cq(&async_icosq->cq); 2576 err_free_async_icosq: 2577 kvfree(async_icosq); 2578 return ERR_PTR(err); 2579 } 2580 2581 static void mlx5e_close_async_icosq(struct mlx5e_icosq *async_icosq) 2582 { 2583 mlx5e_close_icosq(async_icosq); 2584 mlx5e_close_cq(&async_icosq->cq); 2585 kvfree(async_icosq); 2586 } 2587 2588 static int mlx5e_open_queues(struct mlx5e_channel *c, 2589 struct mlx5e_params *params, 2590 struct mlx5e_channel_param *cparam, 2591 bool async_icosq_needed) 2592 { 2593 const struct net_device_ops *netdev_ops = c->netdev->netdev_ops; 2594 struct dim_cq_moder icocq_moder = {0, 0}; 2595 struct mlx5e_create_cq_param ccp; 2596 int err; 2597 2598 mlx5e_build_create_cq_param(&ccp, c); 2599 2600 err = mlx5e_open_cq(c->mdev, icocq_moder, &cparam->icosq.cqp, &ccp, 2601 &c->icosq.cq); 2602 if (err) 2603 return err; 2604 2605 err = mlx5e_open_tx_cqs(c, params, &ccp, cparam); 2606 if (err) 2607 goto err_close_icosq_cq; 2608 2609 if (netdev_ops->ndo_xdp_xmit && c->xdp) { 2610 c->xdpsq = mlx5e_open_xdpredirect_sq(c, params, cparam, &ccp); 2611 if (IS_ERR(c->xdpsq)) { 2612 err = PTR_ERR(c->xdpsq); 2613 goto err_close_tx_cqs; 2614 } 2615 } 2616 2617 err = mlx5e_open_cq(c->mdev, params->rx_cq_moderation, &cparam->rq.cqp, &ccp, 2618 &c->rq.cq); 2619 if (err) 2620 goto err_close_xdpredirect_sq; 2621 2622 err = c->xdp ? mlx5e_open_cq(c->mdev, params->tx_cq_moderation, &cparam->xdp_sq.cqp, 2623 &ccp, &c->rq_xdpsq.cq) : 0; 2624 if (err) 2625 goto err_close_rx_cq; 2626 2627 if (async_icosq_needed) { 2628 c->async_icosq = mlx5e_open_async_icosq(c, params, cparam, 2629 &ccp); 2630 if (IS_ERR(c->async_icosq)) { 2631 err = PTR_ERR(c->async_icosq); 2632 goto err_close_rq_xdpsq_cq; 2633 } 2634 } 2635 2636 mutex_init(&c->icosq_recovery_lock); 2637 2638 err = mlx5e_open_icosq(c, params, &cparam->icosq, &c->icosq, 2639 mlx5e_icosq_err_cqe_work); 2640 if (err) 2641 goto err_close_async_icosq; 2642 2643 err = mlx5e_open_sqs(c, params, cparam); 2644 if (err) 2645 goto err_close_icosq; 2646 2647 err = mlx5e_open_rxq_rq(c, params, &cparam->rq); 2648 if (err) 2649 goto err_close_sqs; 2650 2651 if (c->xdp) { 2652 err = mlx5e_open_xdpsq(c, params, &cparam->xdp_sq, NULL, 2653 &c->rq_xdpsq, false); 2654 if (err) 2655 goto err_close_rq; 2656 } 2657 2658 return 0; 2659 2660 err_close_rq: 2661 mlx5e_close_rq(&c->rq); 2662 2663 err_close_sqs: 2664 mlx5e_close_sqs(c); 2665 2666 err_close_icosq: 2667 mlx5e_close_icosq(&c->icosq); 2668 2669 err_close_async_icosq: 2670 if (c->async_icosq) 2671 mlx5e_close_async_icosq(c->async_icosq); 2672 2673 err_close_rq_xdpsq_cq: 2674 if (c->xdp) 2675 mlx5e_close_cq(&c->rq_xdpsq.cq); 2676 2677 err_close_rx_cq: 2678 mlx5e_close_cq(&c->rq.cq); 2679 2680 err_close_xdpredirect_sq: 2681 if (c->xdpsq) 2682 mlx5e_close_xdpredirect_sq(c->xdpsq); 2683 2684 err_close_tx_cqs: 2685 mlx5e_close_tx_cqs(c); 2686 2687 err_close_icosq_cq: 2688 mlx5e_close_cq(&c->icosq.cq); 2689 2690 return err; 2691 } 2692 2693 static void mlx5e_close_queues(struct mlx5e_channel *c) 2694 { 2695 if (c->xdp) 2696 mlx5e_close_xdpsq(&c->rq_xdpsq); 2697 /* The same ICOSQ is used for UMRs for both RQ and XSKRQ. */ 2698 cancel_work_sync(&c->icosq.recover_work); 2699 mlx5e_close_rq(&c->rq); 2700 mlx5e_close_sqs(c); 2701 mlx5e_close_icosq(&c->icosq); 2702 mutex_destroy(&c->icosq_recovery_lock); 2703 if (c->async_icosq) 2704 mlx5e_close_async_icosq(c->async_icosq); 2705 if (c->xdp) 2706 mlx5e_close_cq(&c->rq_xdpsq.cq); 2707 mlx5e_close_cq(&c->rq.cq); 2708 if (c->xdpsq) 2709 mlx5e_close_xdpredirect_sq(c->xdpsq); 2710 mlx5e_close_tx_cqs(c); 2711 mlx5e_close_cq(&c->icosq.cq); 2712 } 2713 2714 static u8 mlx5e_enumerate_lag_port(struct mlx5_core_dev *mdev, int ix) 2715 { 2716 u16 port_aff_bias = mlx5_core_is_pf(mdev) ? 0 : MLX5_CAP_GEN(mdev, vhca_id); 2717 2718 return (ix + port_aff_bias) % mlx5e_get_num_lag_ports(mdev); 2719 } 2720 2721 static int mlx5e_channel_stats_alloc(struct mlx5e_priv *priv, int ix, int cpu) 2722 { 2723 if (ix > priv->stats_nch) { 2724 netdev_warn(priv->netdev, "Unexpected channel stats index %d > %d\n", ix, 2725 priv->stats_nch); 2726 return -EINVAL; 2727 } 2728 2729 if (priv->channel_stats[ix]) 2730 return 0; 2731 2732 /* Asymmetric dynamic memory allocation. 2733 * Freed in mlx5e_priv_arrays_free, not on channel closure. 2734 */ 2735 netdev_dbg(priv->netdev, "Creating channel stats %d\n", ix); 2736 priv->channel_stats[ix] = kvzalloc_node(sizeof(**priv->channel_stats), 2737 GFP_KERNEL, cpu_to_node(cpu)); 2738 if (!priv->channel_stats[ix]) 2739 return -ENOMEM; 2740 priv->stats_nch++; 2741 2742 return 0; 2743 } 2744 2745 void mlx5e_trigger_napi_icosq(struct mlx5e_channel *c) 2746 { 2747 bool locked; 2748 2749 if (!test_and_set_bit(MLX5E_SQ_STATE_LOCK_NEEDED, &c->icosq.state)) 2750 synchronize_net(); 2751 2752 locked = mlx5e_icosq_sync_lock(&c->icosq); 2753 mlx5e_trigger_irq(&c->icosq); 2754 mlx5e_icosq_sync_unlock(&c->icosq, locked); 2755 2756 clear_bit(MLX5E_SQ_STATE_LOCK_NEEDED, &c->icosq.state); 2757 } 2758 2759 void mlx5e_trigger_napi_sched(struct napi_struct *napi) 2760 { 2761 local_bh_disable(); 2762 napi_schedule(napi); 2763 local_bh_enable(); 2764 } 2765 2766 static void mlx5e_channel_pick_doorbell(struct mlx5e_channel *c) 2767 { 2768 struct mlx5e_hw_objs *hw_objs = &c->mdev->mlx5e_res.hw_objs; 2769 2770 /* No dedicated Ethernet doorbells, use the global one. */ 2771 if (hw_objs->num_bfregs == 0) { 2772 c->bfreg = &c->mdev->priv.bfreg; 2773 return; 2774 } 2775 2776 /* Round-robin between doorbells. */ 2777 c->bfreg = hw_objs->bfregs + c->vec_ix % hw_objs->num_bfregs; 2778 } 2779 2780 static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, 2781 struct mlx5e_params *params, 2782 struct xsk_buff_pool *xsk_pool, 2783 struct mlx5e_channel **cp) 2784 { 2785 struct net_device *netdev = priv->netdev; 2786 struct mlx5e_channel_param *cparam; 2787 struct mlx5_core_dev *mdev; 2788 struct mlx5e_xsk_param xsk; 2789 bool async_icosq_needed; 2790 struct mlx5e_channel *c; 2791 unsigned int irq; 2792 int vec_ix; 2793 int cpu; 2794 int err; 2795 2796 mdev = mlx5_sd_ch_ix_get_dev(priv->mdev, ix); 2797 vec_ix = mlx5_sd_ch_ix_get_vec_ix(mdev, ix); 2798 cpu = mlx5_comp_vector_get_cpu(mdev, vec_ix); 2799 2800 err = mlx5_comp_irqn_get(mdev, vec_ix, &irq); 2801 if (err) 2802 return err; 2803 2804 err = mlx5e_channel_stats_alloc(priv, ix, cpu); 2805 if (err) 2806 return err; 2807 2808 c = kvzalloc_node(sizeof(*c), GFP_KERNEL, cpu_to_node(cpu)); 2809 cparam = kvzalloc(sizeof(*cparam), GFP_KERNEL); 2810 if (!c || !cparam) { 2811 err = -ENOMEM; 2812 goto err_free; 2813 } 2814 2815 err = mlx5e_build_channel_param(mdev, params, cparam); 2816 if (err) 2817 goto err_free; 2818 2819 c->priv = priv; 2820 c->mdev = mdev; 2821 c->ix = ix; 2822 c->vec_ix = vec_ix; 2823 c->sd_ix = mlx5_sd_ch_ix_get_dev_ix(mdev, ix); 2824 c->cpu = cpu; 2825 c->pdev = mlx5_core_dma_dev(mdev); 2826 c->netdev = priv->netdev; 2827 c->mkey_be = cpu_to_be32(mdev->mlx5e_res.hw_objs.mkey); 2828 c->num_tc = mlx5e_get_dcb_num_tc(params); 2829 c->xdp = !!params->xdp_prog; 2830 c->stats = &priv->channel_stats[ix]->ch; 2831 c->aff_mask = irq_get_effective_affinity_mask(irq); 2832 c->lag_port = mlx5e_enumerate_lag_port(mdev, ix); 2833 2834 mlx5e_channel_pick_doorbell(c); 2835 2836 netif_napi_add_config_locked(netdev, &c->napi, mlx5e_napi_poll, ix); 2837 netif_napi_set_irq_locked(&c->napi, irq); 2838 2839 async_icosq_needed = !!xsk_pool || priv->ktls_rx_was_enabled; 2840 err = mlx5e_open_queues(c, params, cparam, async_icosq_needed); 2841 if (unlikely(err)) 2842 goto err_napi_del; 2843 2844 if (xsk_pool) { 2845 mlx5e_build_xsk_param(xsk_pool, &xsk); 2846 err = mlx5e_open_xsk(priv, params, &xsk, xsk_pool, c); 2847 if (unlikely(err)) 2848 goto err_close_queues; 2849 } 2850 2851 *cp = c; 2852 2853 kvfree(cparam); 2854 return 0; 2855 2856 err_close_queues: 2857 mlx5e_close_queues(c); 2858 2859 err_napi_del: 2860 netif_napi_del_locked(&c->napi); 2861 2862 err_free: 2863 kvfree(cparam); 2864 kvfree(c); 2865 2866 return err; 2867 } 2868 2869 static void mlx5e_activate_channel(struct mlx5e_channel *c) 2870 { 2871 int tc; 2872 2873 napi_enable_locked(&c->napi); 2874 2875 for (tc = 0; tc < c->num_tc; tc++) 2876 mlx5e_activate_txqsq(&c->sq[tc]); 2877 mlx5e_activate_icosq(&c->icosq); 2878 if (c->async_icosq) 2879 mlx5e_activate_icosq(c->async_icosq); 2880 2881 if (test_bit(MLX5E_CHANNEL_STATE_XSK, c->state)) 2882 mlx5e_activate_xsk(c); 2883 else 2884 mlx5e_activate_rq(&c->rq); 2885 2886 netif_queue_set_napi(c->netdev, c->ix, NETDEV_QUEUE_TYPE_RX, &c->napi); 2887 } 2888 2889 static void mlx5e_deactivate_channel(struct mlx5e_channel *c) 2890 { 2891 int tc; 2892 2893 netif_queue_set_napi(c->netdev, c->ix, NETDEV_QUEUE_TYPE_RX, NULL); 2894 2895 if (test_bit(MLX5E_CHANNEL_STATE_XSK, c->state)) 2896 mlx5e_deactivate_xsk(c); 2897 else 2898 mlx5e_deactivate_rq(&c->rq); 2899 2900 if (c->async_icosq) 2901 mlx5e_deactivate_icosq(c->async_icosq); 2902 mlx5e_deactivate_icosq(&c->icosq); 2903 for (tc = 0; tc < c->num_tc; tc++) 2904 mlx5e_deactivate_txqsq(&c->sq[tc]); 2905 mlx5e_qos_deactivate_queues(c); 2906 2907 napi_disable_locked(&c->napi); 2908 } 2909 2910 static void mlx5e_close_channel(struct mlx5e_channel *c) 2911 { 2912 if (test_bit(MLX5E_CHANNEL_STATE_XSK, c->state)) 2913 mlx5e_close_xsk(c); 2914 mlx5e_close_queues(c); 2915 mlx5e_qos_close_queues(c); 2916 netif_napi_del_locked(&c->napi); 2917 2918 kvfree(c); 2919 } 2920 2921 int mlx5e_open_channels(struct mlx5e_priv *priv, 2922 struct mlx5e_channels *chs) 2923 { 2924 int err = -ENOMEM; 2925 int i; 2926 2927 chs->num = chs->params.num_channels; 2928 2929 chs->c = kcalloc(chs->num, sizeof(struct mlx5e_channel *), GFP_KERNEL); 2930 if (!chs->c) 2931 goto err_out; 2932 2933 for (i = 0; i < chs->num; i++) { 2934 struct xsk_buff_pool *xsk_pool = NULL; 2935 2936 if (chs->params.xdp_prog) 2937 xsk_pool = mlx5e_xsk_get_pool(&chs->params, chs->params.xsk, i); 2938 2939 err = mlx5e_open_channel(priv, i, &chs->params, xsk_pool, &chs->c[i]); 2940 if (err) 2941 goto err_close_channels; 2942 } 2943 2944 if (MLX5E_GET_PFLAG(&chs->params, MLX5E_PFLAG_TX_PORT_TS) || chs->params.ptp_rx) { 2945 err = mlx5e_ptp_open(priv, &chs->params, chs->c[0]->lag_port, &chs->ptp); 2946 if (err) 2947 goto err_close_channels; 2948 } 2949 2950 if (priv->htb) { 2951 err = mlx5e_qos_open_queues(priv, chs); 2952 if (err) 2953 goto err_close_ptp; 2954 } 2955 2956 mlx5e_health_channels_update(priv); 2957 return 0; 2958 2959 err_close_ptp: 2960 if (chs->ptp) 2961 mlx5e_ptp_close(chs->ptp); 2962 2963 err_close_channels: 2964 for (i--; i >= 0; i--) 2965 mlx5e_close_channel(chs->c[i]); 2966 2967 kfree(chs->c); 2968 err_out: 2969 chs->num = 0; 2970 return err; 2971 } 2972 2973 static void mlx5e_activate_channels(struct mlx5e_priv *priv, struct mlx5e_channels *chs) 2974 { 2975 int i; 2976 2977 for (i = 0; i < chs->num; i++) 2978 mlx5e_activate_channel(chs->c[i]); 2979 2980 if (priv->htb) 2981 mlx5e_qos_activate_queues(priv); 2982 2983 for (i = 0; i < chs->num; i++) 2984 mlx5e_trigger_napi_icosq(chs->c[i]); 2985 2986 if (chs->ptp) 2987 mlx5e_ptp_activate_channel(chs->ptp); 2988 } 2989 2990 static int mlx5e_wait_channels_min_rx_wqes(struct mlx5e_channels *chs) 2991 { 2992 int err = 0; 2993 int i; 2994 2995 for (i = 0; i < chs->num; i++) { 2996 int timeout = err ? 0 : MLX5E_RQ_WQES_TIMEOUT; 2997 struct mlx5e_channel *c = chs->c[i]; 2998 2999 if (test_bit(MLX5E_CHANNEL_STATE_XSK, c->state)) 3000 continue; 3001 3002 err |= mlx5e_wait_for_min_rx_wqes(&c->rq, timeout); 3003 3004 /* Don't wait on the XSK RQ, because the newer xdpsock sample 3005 * doesn't provide any Fill Ring entries at the setup stage. 3006 */ 3007 } 3008 3009 return err ? -ETIMEDOUT : 0; 3010 } 3011 3012 static void mlx5e_deactivate_channels(struct mlx5e_channels *chs) 3013 { 3014 int i; 3015 3016 if (chs->ptp) 3017 mlx5e_ptp_deactivate_channel(chs->ptp); 3018 3019 for (i = 0; i < chs->num; i++) 3020 mlx5e_deactivate_channel(chs->c[i]); 3021 } 3022 3023 void mlx5e_close_channels(struct mlx5e_channels *chs) 3024 { 3025 int i; 3026 3027 ASSERT_RTNL(); 3028 if (chs->ptp) { 3029 mlx5e_ptp_close(chs->ptp); 3030 chs->ptp = NULL; 3031 } 3032 for (i = 0; i < chs->num; i++) 3033 mlx5e_close_channel(chs->c[i]); 3034 3035 kfree(chs->c); 3036 chs->num = 0; 3037 } 3038 3039 static int mlx5e_modify_tirs_packet_merge(struct mlx5e_priv *priv) 3040 { 3041 struct mlx5e_rx_res *res = priv->rx_res; 3042 3043 return mlx5e_rx_res_packet_merge_set_param(res, &priv->channels.params.packet_merge); 3044 } 3045 3046 static MLX5E_DEFINE_PREACTIVATE_WRAPPER_CTX(mlx5e_modify_tirs_packet_merge); 3047 3048 static int mlx5e_set_mtu(struct mlx5_core_dev *mdev, 3049 struct mlx5e_params *params, u16 mtu) 3050 { 3051 u16 hw_mtu = MLX5E_SW2HW_MTU(params, mtu); 3052 int err; 3053 3054 err = mlx5_set_port_mtu(mdev, hw_mtu, 1); 3055 if (err) 3056 return err; 3057 3058 /* Update vport context MTU */ 3059 mlx5_modify_nic_vport_mtu(mdev, hw_mtu); 3060 return 0; 3061 } 3062 3063 static void mlx5e_query_mtu(struct mlx5_core_dev *mdev, 3064 struct mlx5e_params *params, u16 *mtu) 3065 { 3066 u16 hw_mtu = 0; 3067 int err; 3068 3069 err = mlx5_query_nic_vport_mtu(mdev, &hw_mtu); 3070 if (err || !hw_mtu) /* fallback to port oper mtu */ 3071 mlx5_query_port_oper_mtu(mdev, &hw_mtu, 1); 3072 3073 *mtu = MLX5E_HW2SW_MTU(params, hw_mtu); 3074 } 3075 3076 int mlx5e_set_dev_port_mtu(struct mlx5e_priv *priv) 3077 { 3078 struct mlx5e_params *params = &priv->channels.params; 3079 struct net_device *netdev = priv->netdev; 3080 struct mlx5_core_dev *mdev = priv->mdev; 3081 u16 mtu; 3082 int err; 3083 3084 err = mlx5e_set_mtu(mdev, params, params->sw_mtu); 3085 if (err) 3086 return err; 3087 3088 mlx5e_query_mtu(mdev, params, &mtu); 3089 if (mtu != params->sw_mtu) 3090 netdev_warn(netdev, "%s: VPort MTU %d is different than netdev mtu %d\n", 3091 __func__, mtu, params->sw_mtu); 3092 3093 params->sw_mtu = mtu; 3094 return 0; 3095 } 3096 3097 MLX5E_DEFINE_PREACTIVATE_WRAPPER_CTX(mlx5e_set_dev_port_mtu); 3098 3099 void mlx5e_set_netdev_mtu_boundaries(struct mlx5e_priv *priv) 3100 { 3101 struct mlx5e_params *params = &priv->channels.params; 3102 struct net_device *netdev = priv->netdev; 3103 struct mlx5_core_dev *mdev = priv->mdev; 3104 u16 max_mtu; 3105 3106 /* MTU range: 68 - hw-specific max */ 3107 netdev->min_mtu = ETH_MIN_MTU; 3108 3109 mlx5_query_port_max_mtu(mdev, &max_mtu, 1); 3110 netdev->max_mtu = min_t(unsigned int, MLX5E_HW2SW_MTU(params, max_mtu), 3111 ETH_MAX_MTU); 3112 } 3113 3114 static int mlx5e_netdev_set_tcs(struct net_device *netdev, u16 nch, u8 ntc, 3115 struct netdev_tc_txq *tc_to_txq) 3116 { 3117 int tc, err; 3118 3119 netdev_reset_tc(netdev); 3120 3121 if (ntc == 1) 3122 return 0; 3123 3124 err = netdev_set_num_tc(netdev, ntc); 3125 if (err) { 3126 netdev_WARN(netdev, "netdev_set_num_tc failed (%d), ntc = %d\n", err, ntc); 3127 return err; 3128 } 3129 3130 for (tc = 0; tc < ntc; tc++) { 3131 u16 count, offset; 3132 3133 count = tc_to_txq[tc].count; 3134 offset = tc_to_txq[tc].offset; 3135 netdev_set_tc_queue(netdev, tc, count, offset); 3136 } 3137 3138 return 0; 3139 } 3140 3141 int mlx5e_update_tx_netdev_queues(struct mlx5e_priv *priv) 3142 { 3143 int nch, ntc, num_txqs, err; 3144 int qos_queues = 0; 3145 3146 if (priv->htb) 3147 qos_queues = mlx5e_htb_cur_leaf_nodes(priv->htb); 3148 3149 nch = priv->channels.params.num_channels; 3150 ntc = mlx5e_get_dcb_num_tc(&priv->channels.params); 3151 num_txqs = nch * ntc + qos_queues; 3152 if (MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_TX_PORT_TS)) 3153 num_txqs += ntc; 3154 3155 netdev_dbg(priv->netdev, "Setting num_txqs %d\n", num_txqs); 3156 err = netif_set_real_num_tx_queues(priv->netdev, num_txqs); 3157 if (err) 3158 netdev_warn(priv->netdev, "netif_set_real_num_tx_queues failed, %d\n", err); 3159 3160 return err; 3161 } 3162 3163 static void mlx5e_set_default_xps_cpumasks(struct mlx5e_priv *priv, 3164 struct mlx5e_params *params) 3165 { 3166 int ix; 3167 3168 for (ix = 0; ix < params->num_channels; ix++) { 3169 int num_comp_vectors, irq, vec_ix; 3170 struct mlx5_core_dev *mdev; 3171 3172 mdev = mlx5_sd_ch_ix_get_dev(priv->mdev, ix); 3173 num_comp_vectors = mlx5_comp_vectors_max(mdev); 3174 cpumask_clear(priv->scratchpad.cpumask); 3175 vec_ix = mlx5_sd_ch_ix_get_vec_ix(mdev, ix); 3176 3177 for (irq = vec_ix; irq < num_comp_vectors; irq += params->num_channels) { 3178 int cpu = mlx5_comp_vector_get_cpu(mdev, irq); 3179 3180 cpumask_set_cpu(cpu, priv->scratchpad.cpumask); 3181 } 3182 3183 netif_set_xps_queue(priv->netdev, priv->scratchpad.cpumask, ix); 3184 } 3185 } 3186 3187 static int mlx5e_update_tc_and_tx_queues(struct mlx5e_priv *priv) 3188 { 3189 struct netdev_tc_txq old_tc_to_txq[TC_MAX_QUEUE], *tc_to_txq; 3190 struct net_device *netdev = priv->netdev; 3191 int old_num_txqs, old_ntc; 3192 int nch, ntc; 3193 int err; 3194 int i; 3195 3196 old_num_txqs = netdev->real_num_tx_queues; 3197 old_ntc = netdev->num_tc ? : 1; 3198 for (i = 0; i < ARRAY_SIZE(old_tc_to_txq); i++) 3199 old_tc_to_txq[i] = netdev->tc_to_txq[i]; 3200 3201 nch = priv->channels.params.num_channels; 3202 ntc = priv->channels.params.mqprio.num_tc; 3203 tc_to_txq = priv->channels.params.mqprio.tc_to_txq; 3204 3205 err = mlx5e_netdev_set_tcs(netdev, nch, ntc, tc_to_txq); 3206 if (err) 3207 goto err_out; 3208 err = mlx5e_update_tx_netdev_queues(priv); 3209 if (err) 3210 goto err_tcs; 3211 mlx5e_set_default_xps_cpumasks(priv, &priv->channels.params); 3212 3213 return 0; 3214 3215 err_tcs: 3216 WARN_ON_ONCE(mlx5e_netdev_set_tcs(netdev, old_num_txqs / old_ntc, old_ntc, 3217 old_tc_to_txq)); 3218 err_out: 3219 return err; 3220 } 3221 3222 MLX5E_DEFINE_PREACTIVATE_WRAPPER_CTX(mlx5e_update_tc_and_tx_queues); 3223 3224 static int mlx5e_num_channels_changed(struct mlx5e_priv *priv) 3225 { 3226 u16 count = priv->channels.params.num_channels; 3227 struct net_device *netdev = priv->netdev; 3228 int old_num_rxqs; 3229 int err; 3230 3231 old_num_rxqs = netdev->real_num_rx_queues; 3232 err = netif_set_real_num_rx_queues(netdev, count); 3233 if (err) { 3234 netdev_warn(netdev, "%s: netif_set_real_num_rx_queues failed, %d\n", 3235 __func__, err); 3236 return err; 3237 } 3238 err = mlx5e_update_tc_and_tx_queues(priv); 3239 if (err) { 3240 /* mlx5e_update_tc_and_tx_queues can fail if channels or TCs number increases. 3241 * Since channel number changed, it increased. That means, the call to 3242 * netif_set_real_num_rx_queues below should not fail, because it 3243 * decreases the number of RX queues. 3244 */ 3245 WARN_ON_ONCE(netif_set_real_num_rx_queues(netdev, old_num_rxqs)); 3246 return err; 3247 } 3248 3249 /* This function may be called on attach, before priv->rx_res is created. */ 3250 if (priv->rx_res) { 3251 mlx5e_rx_res_rss_update_num_channels(priv->rx_res, count); 3252 3253 if (!netif_is_rxfh_configured(priv->netdev)) 3254 mlx5e_rx_res_rss_set_indir_uniform(priv->rx_res, count); 3255 } 3256 3257 return 0; 3258 } 3259 3260 MLX5E_DEFINE_PREACTIVATE_WRAPPER_CTX(mlx5e_num_channels_changed); 3261 3262 static void mlx5e_build_txq_maps(struct mlx5e_priv *priv) 3263 { 3264 int i, ch, tc, num_tc; 3265 3266 ch = priv->channels.num; 3267 num_tc = mlx5e_get_dcb_num_tc(&priv->channels.params); 3268 3269 for (i = 0; i < ch; i++) { 3270 for (tc = 0; tc < num_tc; tc++) { 3271 struct mlx5e_channel *c = priv->channels.c[i]; 3272 struct mlx5e_txqsq *sq = &c->sq[tc]; 3273 3274 priv->txq2sq[sq->txq_ix] = sq; 3275 priv->txq2sq_stats[sq->txq_ix] = sq->stats; 3276 } 3277 } 3278 3279 if (!priv->channels.ptp) 3280 goto out; 3281 3282 if (!test_bit(MLX5E_PTP_STATE_TX, priv->channels.ptp->state)) 3283 goto out; 3284 3285 for (tc = 0; tc < num_tc; tc++) { 3286 struct mlx5e_ptp *c = priv->channels.ptp; 3287 struct mlx5e_txqsq *sq = &c->ptpsq[tc].txqsq; 3288 3289 priv->txq2sq[sq->txq_ix] = sq; 3290 priv->txq2sq_stats[sq->txq_ix] = sq->stats; 3291 } 3292 3293 out: 3294 /* Make the change to txq2sq visible before the queue is started. 3295 * As mlx5e_xmit runs under a spinlock, there is an implicit ACQUIRE, 3296 * which pairs with this barrier. 3297 */ 3298 smp_wmb(); 3299 } 3300 3301 void mlx5e_activate_priv_channels(struct mlx5e_priv *priv) 3302 { 3303 mlx5e_build_txq_maps(priv); 3304 mlx5e_activate_channels(priv, &priv->channels); 3305 mlx5e_xdp_tx_enable(priv); 3306 3307 /* dev_watchdog() wants all TX queues to be started when the carrier is 3308 * OK, including the ones in range real_num_tx_queues..num_tx_queues-1. 3309 * Make it happy to avoid TX timeout false alarms. 3310 */ 3311 netif_tx_start_all_queues(priv->netdev); 3312 3313 if (mlx5e_is_vport_rep(priv)) 3314 mlx5e_rep_activate_channels(priv); 3315 3316 set_bit(MLX5E_STATE_CHANNELS_ACTIVE, &priv->state); 3317 3318 mlx5e_wait_channels_min_rx_wqes(&priv->channels); 3319 3320 if (priv->rx_res) 3321 mlx5e_rx_res_channels_activate(priv->rx_res, &priv->channels); 3322 } 3323 3324 static void mlx5e_cancel_tx_timeout_work(struct mlx5e_priv *priv) 3325 { 3326 WARN_ON_ONCE(test_bit(MLX5E_STATE_CHANNELS_ACTIVE, &priv->state)); 3327 if (current_work() != &priv->tx_timeout_work) 3328 cancel_work_sync(&priv->tx_timeout_work); 3329 } 3330 3331 void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv) 3332 { 3333 if (priv->rx_res) 3334 mlx5e_rx_res_channels_deactivate(priv->rx_res); 3335 3336 clear_bit(MLX5E_STATE_CHANNELS_ACTIVE, &priv->state); 3337 mlx5e_cancel_tx_timeout_work(priv); 3338 3339 if (mlx5e_is_vport_rep(priv)) 3340 mlx5e_rep_deactivate_channels(priv); 3341 3342 /* The results of ndo_select_queue are unreliable, while netdev config 3343 * is being changed (real_num_tx_queues, num_tc). Stop all queues to 3344 * prevent ndo_start_xmit from being called, so that it can assume that 3345 * the selected queue is always valid. 3346 */ 3347 netif_tx_disable(priv->netdev); 3348 3349 mlx5e_xdp_tx_disable(priv); 3350 mlx5e_deactivate_channels(&priv->channels); 3351 } 3352 3353 static int mlx5e_switch_priv_params(struct mlx5e_priv *priv, 3354 struct mlx5e_params *new_params, 3355 mlx5e_fp_preactivate preactivate, 3356 void *context) 3357 { 3358 struct mlx5e_params old_params; 3359 3360 old_params = priv->channels.params; 3361 priv->channels.params = *new_params; 3362 3363 if (preactivate) { 3364 int err; 3365 3366 err = preactivate(priv, context); 3367 if (err) { 3368 priv->channels.params = old_params; 3369 return err; 3370 } 3371 } 3372 3373 mlx5e_set_xdp_feature(priv); 3374 return 0; 3375 } 3376 3377 static int mlx5e_switch_priv_channels(struct mlx5e_priv *priv, 3378 struct mlx5e_channels *old_chs, 3379 struct mlx5e_channels *new_chs, 3380 mlx5e_fp_preactivate preactivate, 3381 void *context) 3382 { 3383 struct net_device *netdev = priv->netdev; 3384 int carrier_ok; 3385 int err = 0; 3386 3387 carrier_ok = netif_carrier_ok(netdev); 3388 netif_carrier_off(netdev); 3389 3390 mlx5e_deactivate_priv_channels(priv); 3391 3392 priv->channels = *new_chs; 3393 3394 /* New channels are ready to roll, call the preactivate hook if needed 3395 * to modify HW settings or update kernel parameters. 3396 */ 3397 if (preactivate) { 3398 err = preactivate(priv, context); 3399 if (err) { 3400 priv->channels = *old_chs; 3401 goto out; 3402 } 3403 } 3404 3405 mlx5e_set_xdp_feature(priv); 3406 if (!MLX5_CAP_GEN(priv->mdev, tis_tir_td_order)) 3407 mlx5e_close_channels(old_chs); 3408 priv->profile->update_rx(priv); 3409 3410 mlx5e_selq_apply(&priv->selq); 3411 out: 3412 mlx5e_activate_priv_channels(priv); 3413 3414 /* return carrier back if needed */ 3415 if (carrier_ok) 3416 netif_carrier_on(netdev); 3417 3418 return err; 3419 } 3420 3421 int mlx5e_safe_switch_params(struct mlx5e_priv *priv, 3422 struct mlx5e_params *params, 3423 mlx5e_fp_preactivate preactivate, 3424 void *context, bool reset) 3425 { 3426 struct mlx5e_channels *old_chs, *new_chs; 3427 int err; 3428 3429 reset &= test_bit(MLX5E_STATE_OPENED, &priv->state); 3430 if (!reset) 3431 return mlx5e_switch_priv_params(priv, params, preactivate, context); 3432 3433 old_chs = kzalloc(sizeof(*old_chs), GFP_KERNEL); 3434 new_chs = kzalloc(sizeof(*new_chs), GFP_KERNEL); 3435 if (!old_chs || !new_chs) { 3436 err = -ENOMEM; 3437 goto err_free_chs; 3438 } 3439 3440 new_chs->params = *params; 3441 3442 mlx5e_selq_prepare_params(&priv->selq, &new_chs->params); 3443 3444 err = mlx5e_open_channels(priv, new_chs); 3445 if (err) 3446 goto err_cancel_selq; 3447 3448 *old_chs = priv->channels; 3449 3450 err = mlx5e_switch_priv_channels(priv, old_chs, new_chs, 3451 preactivate, context); 3452 if (err) 3453 goto err_close; 3454 3455 if (MLX5_CAP_GEN(priv->mdev, tis_tir_td_order)) 3456 mlx5e_close_channels(old_chs); 3457 3458 kfree(new_chs); 3459 kfree(old_chs); 3460 return 0; 3461 3462 err_close: 3463 mlx5e_close_channels(new_chs); 3464 3465 err_cancel_selq: 3466 mlx5e_selq_cancel(&priv->selq); 3467 err_free_chs: 3468 kfree(new_chs); 3469 kfree(old_chs); 3470 return err; 3471 } 3472 3473 int mlx5e_safe_reopen_channels(struct mlx5e_priv *priv) 3474 { 3475 return mlx5e_safe_switch_params(priv, &priv->channels.params, NULL, NULL, true); 3476 } 3477 3478 void mlx5e_timestamp_init(struct mlx5e_priv *priv) 3479 { 3480 priv->hwtstamp_config.tx_type = HWTSTAMP_TX_OFF; 3481 priv->hwtstamp_config.rx_filter = HWTSTAMP_FILTER_NONE; 3482 } 3483 3484 static void mlx5e_modify_admin_state(struct mlx5_core_dev *mdev, 3485 enum mlx5_port_status state) 3486 { 3487 struct mlx5_eswitch *esw = mdev->priv.eswitch; 3488 int vport_admin_state; 3489 3490 mlx5_set_port_admin_status(mdev, state); 3491 3492 if (mlx5_eswitch_mode(mdev) == MLX5_ESWITCH_OFFLOADS || 3493 !MLX5_CAP_GEN(mdev, uplink_follow)) 3494 return; 3495 3496 if (state == MLX5_PORT_UP) 3497 vport_admin_state = MLX5_VPORT_ADMIN_STATE_AUTO; 3498 else 3499 vport_admin_state = MLX5_VPORT_ADMIN_STATE_DOWN; 3500 3501 mlx5_eswitch_set_vport_state(esw, MLX5_VPORT_UPLINK, vport_admin_state); 3502 } 3503 3504 int mlx5e_open_locked(struct net_device *netdev) 3505 { 3506 struct mlx5e_priv *priv = netdev_priv(netdev); 3507 int err; 3508 3509 mlx5e_selq_prepare_params(&priv->selq, &priv->channels.params); 3510 3511 set_bit(MLX5E_STATE_OPENED, &priv->state); 3512 3513 err = mlx5e_open_channels(priv, &priv->channels); 3514 if (err) 3515 goto err_clear_state_opened_flag; 3516 3517 err = priv->profile->update_rx(priv); 3518 if (err) 3519 goto err_close_channels; 3520 3521 mlx5e_selq_apply(&priv->selq); 3522 mlx5e_activate_priv_channels(priv); 3523 mlx5e_apply_traps(priv, true); 3524 if (priv->profile->update_carrier) 3525 priv->profile->update_carrier(priv); 3526 3527 mlx5e_queue_update_stats(priv); 3528 return 0; 3529 3530 err_close_channels: 3531 mlx5e_close_channels(&priv->channels); 3532 err_clear_state_opened_flag: 3533 clear_bit(MLX5E_STATE_OPENED, &priv->state); 3534 mlx5e_selq_cancel(&priv->selq); 3535 return err; 3536 } 3537 3538 int mlx5e_open(struct net_device *netdev) 3539 { 3540 struct mlx5e_priv *priv = netdev_priv(netdev); 3541 int err; 3542 3543 mutex_lock(&priv->state_lock); 3544 err = mlx5e_open_locked(netdev); 3545 if (!err) 3546 mlx5e_modify_admin_state(priv->mdev, MLX5_PORT_UP); 3547 mutex_unlock(&priv->state_lock); 3548 3549 return err; 3550 } 3551 3552 int mlx5e_close_locked(struct net_device *netdev) 3553 { 3554 struct mlx5e_priv *priv = netdev_priv(netdev); 3555 3556 /* May already be CLOSED in case a previous configuration operation 3557 * (e.g RX/TX queue size change) that involves close&open failed. 3558 */ 3559 if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) 3560 return 0; 3561 3562 mlx5e_apply_traps(priv, false); 3563 clear_bit(MLX5E_STATE_OPENED, &priv->state); 3564 3565 netif_carrier_off(priv->netdev); 3566 mlx5e_deactivate_priv_channels(priv); 3567 mlx5e_close_channels(&priv->channels); 3568 3569 return 0; 3570 } 3571 3572 int mlx5e_close(struct net_device *netdev) 3573 { 3574 struct mlx5e_priv *priv = netdev_priv(netdev); 3575 int err; 3576 3577 if (!netif_device_present(netdev)) 3578 return -ENODEV; 3579 3580 mutex_lock(&priv->state_lock); 3581 mlx5e_modify_admin_state(priv->mdev, MLX5_PORT_DOWN); 3582 err = mlx5e_close_locked(netdev); 3583 mutex_unlock(&priv->state_lock); 3584 3585 return err; 3586 } 3587 3588 static void mlx5e_free_drop_rq(struct mlx5e_rq *rq) 3589 { 3590 mlx5_wq_destroy(&rq->wq_ctrl); 3591 } 3592 3593 static int mlx5e_alloc_drop_rq(struct mlx5_core_dev *mdev, 3594 struct mlx5e_rq *rq, 3595 struct mlx5e_rq_param *param) 3596 { 3597 void *rqc = param->rqc; 3598 void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq); 3599 int err; 3600 3601 param->wq.db_numa_node = param->wq.buf_numa_node; 3602 3603 err = mlx5_wq_cyc_create(mdev, ¶m->wq, rqc_wq, &rq->wqe.wq, 3604 &rq->wq_ctrl); 3605 if (err) 3606 return err; 3607 3608 /* Mark as unused given "Drop-RQ" packets never reach XDP */ 3609 xdp_rxq_info_unused(&rq->xdp_rxq); 3610 3611 rq->mdev = mdev; 3612 3613 return 0; 3614 } 3615 3616 static int mlx5e_alloc_drop_cq(struct mlx5e_priv *priv, 3617 struct mlx5e_cq *cq, 3618 struct mlx5e_cq_param *param) 3619 { 3620 struct mlx5_core_dev *mdev = priv->mdev; 3621 3622 param->wq.buf_numa_node = dev_to_node(mlx5_core_dma_dev(mdev)); 3623 param->wq.db_numa_node = dev_to_node(mlx5_core_dma_dev(mdev)); 3624 3625 return mlx5e_alloc_cq_common(priv->mdev, priv->netdev, priv->wq, 3626 mdev->priv.bfreg.up, param, cq); 3627 } 3628 3629 int mlx5e_open_drop_rq(struct mlx5e_priv *priv, 3630 struct mlx5e_rq *drop_rq) 3631 { 3632 struct mlx5_core_dev *mdev = priv->mdev; 3633 struct mlx5e_cq_param cq_param = {}; 3634 struct mlx5e_rq_param rq_param = {}; 3635 struct mlx5e_cq *cq = &drop_rq->cq; 3636 int err; 3637 3638 mlx5e_build_drop_rq_param(mdev, &rq_param); 3639 3640 err = mlx5e_alloc_drop_cq(priv, cq, &cq_param); 3641 if (err) 3642 return err; 3643 3644 err = mlx5e_create_cq(cq, &cq_param); 3645 if (err) 3646 goto err_free_cq; 3647 3648 err = mlx5e_alloc_drop_rq(mdev, drop_rq, &rq_param); 3649 if (err) 3650 goto err_destroy_cq; 3651 3652 err = mlx5e_create_rq(drop_rq, &rq_param, priv->drop_rq_q_counter); 3653 if (err) 3654 goto err_free_rq; 3655 3656 err = mlx5e_modify_rq_state(drop_rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY); 3657 if (err) 3658 mlx5_core_warn(priv->mdev, "modify_rq_state failed, rx_if_down_packets won't be counted %d\n", err); 3659 3660 return 0; 3661 3662 err_free_rq: 3663 mlx5e_free_drop_rq(drop_rq); 3664 3665 err_destroy_cq: 3666 mlx5e_destroy_cq(cq); 3667 3668 err_free_cq: 3669 mlx5e_free_cq(cq); 3670 3671 return err; 3672 } 3673 3674 void mlx5e_close_drop_rq(struct mlx5e_rq *drop_rq) 3675 { 3676 mlx5e_destroy_rq(drop_rq); 3677 mlx5e_free_drop_rq(drop_rq); 3678 mlx5e_destroy_cq(&drop_rq->cq); 3679 mlx5e_free_cq(&drop_rq->cq); 3680 } 3681 3682 static void mlx5e_cleanup_nic_tx(struct mlx5e_priv *priv) 3683 { 3684 if (priv->mqprio_rl) { 3685 mlx5e_mqprio_rl_cleanup(priv->mqprio_rl); 3686 mlx5e_mqprio_rl_free(priv->mqprio_rl); 3687 priv->mqprio_rl = NULL; 3688 } 3689 mlx5e_accel_cleanup_tx(priv); 3690 } 3691 3692 static int mlx5e_modify_channels_vsd(struct mlx5e_channels *chs, bool vsd) 3693 { 3694 int err; 3695 int i; 3696 3697 for (i = 0; i < chs->num; i++) { 3698 err = mlx5e_modify_rq_vsd(&chs->c[i]->rq, vsd); 3699 if (err) 3700 return err; 3701 } 3702 if (chs->ptp && test_bit(MLX5E_PTP_STATE_RX, chs->ptp->state)) 3703 return mlx5e_modify_rq_vsd(&chs->ptp->rq, vsd); 3704 3705 return 0; 3706 } 3707 3708 static void mlx5e_mqprio_build_default_tc_to_txq(struct netdev_tc_txq *tc_to_txq, 3709 int ntc, int nch) 3710 { 3711 int tc; 3712 3713 memset(tc_to_txq, 0, sizeof(*tc_to_txq) * TC_MAX_QUEUE); 3714 3715 /* Map netdev TCs to offset 0. 3716 * We have our own UP to TXQ mapping for DCB mode of QoS 3717 */ 3718 for (tc = 0; tc < ntc; tc++) { 3719 tc_to_txq[tc] = (struct netdev_tc_txq) { 3720 .count = nch, 3721 .offset = 0, 3722 }; 3723 } 3724 } 3725 3726 static void mlx5e_mqprio_build_tc_to_txq(struct netdev_tc_txq *tc_to_txq, 3727 struct tc_mqprio_qopt *qopt) 3728 { 3729 int tc; 3730 3731 for (tc = 0; tc < TC_MAX_QUEUE; tc++) { 3732 tc_to_txq[tc] = (struct netdev_tc_txq) { 3733 .count = qopt->count[tc], 3734 .offset = qopt->offset[tc], 3735 }; 3736 } 3737 } 3738 3739 static void mlx5e_params_mqprio_dcb_set(struct mlx5e_params *params, u8 num_tc) 3740 { 3741 params->mqprio.mode = TC_MQPRIO_MODE_DCB; 3742 params->mqprio.num_tc = num_tc; 3743 mlx5e_mqprio_build_default_tc_to_txq(params->mqprio.tc_to_txq, num_tc, 3744 params->num_channels); 3745 } 3746 3747 static void mlx5e_mqprio_rl_update_params(struct mlx5e_params *params, 3748 struct mlx5e_mqprio_rl *rl) 3749 { 3750 int tc; 3751 3752 for (tc = 0; tc < TC_MAX_QUEUE; tc++) { 3753 u32 hw_id = 0; 3754 3755 if (rl) 3756 mlx5e_mqprio_rl_get_node_hw_id(rl, tc, &hw_id); 3757 params->mqprio.channel.hw_id[tc] = hw_id; 3758 } 3759 } 3760 3761 static void mlx5e_params_mqprio_channel_set(struct mlx5e_params *params, 3762 struct tc_mqprio_qopt_offload *mqprio, 3763 struct mlx5e_mqprio_rl *rl) 3764 { 3765 int tc; 3766 3767 params->mqprio.mode = TC_MQPRIO_MODE_CHANNEL; 3768 params->mqprio.num_tc = mqprio->qopt.num_tc; 3769 3770 for (tc = 0; tc < TC_MAX_QUEUE; tc++) 3771 params->mqprio.channel.max_rate[tc] = mqprio->max_rate[tc]; 3772 3773 mlx5e_mqprio_rl_update_params(params, rl); 3774 mlx5e_mqprio_build_tc_to_txq(params->mqprio.tc_to_txq, &mqprio->qopt); 3775 } 3776 3777 static void mlx5e_params_mqprio_reset(struct mlx5e_params *params) 3778 { 3779 mlx5e_params_mqprio_dcb_set(params, 1); 3780 } 3781 3782 static int mlx5e_setup_tc_mqprio_dcb(struct mlx5e_priv *priv, 3783 struct tc_mqprio_qopt *mqprio) 3784 { 3785 struct mlx5e_params new_params; 3786 u8 tc = mqprio->num_tc; 3787 int err; 3788 3789 mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS; 3790 3791 if (tc && tc != MLX5_MAX_NUM_TC) 3792 return -EINVAL; 3793 3794 new_params = priv->channels.params; 3795 mlx5e_params_mqprio_dcb_set(&new_params, tc ? tc : 1); 3796 3797 err = mlx5e_safe_switch_params(priv, &new_params, 3798 mlx5e_update_tc_and_tx_queues_ctx, NULL, true); 3799 3800 if (!err && priv->mqprio_rl) { 3801 mlx5e_mqprio_rl_cleanup(priv->mqprio_rl); 3802 mlx5e_mqprio_rl_free(priv->mqprio_rl); 3803 priv->mqprio_rl = NULL; 3804 } 3805 3806 priv->max_opened_tc = max_t(u8, priv->max_opened_tc, 3807 mlx5e_get_dcb_num_tc(&priv->channels.params)); 3808 return err; 3809 } 3810 3811 static int mlx5e_mqprio_channel_validate(struct mlx5e_priv *priv, 3812 struct tc_mqprio_qopt_offload *mqprio) 3813 { 3814 struct net_device *netdev = priv->netdev; 3815 struct mlx5e_ptp *ptp_channel; 3816 int agg_count = 0; 3817 int i; 3818 3819 ptp_channel = priv->channels.ptp; 3820 if (ptp_channel && test_bit(MLX5E_PTP_STATE_TX, ptp_channel->state)) { 3821 netdev_err(netdev, 3822 "Cannot activate MQPRIO mode channel since it conflicts with TX port TS\n"); 3823 return -EINVAL; 3824 } 3825 3826 if (mqprio->qopt.offset[0] != 0 || mqprio->qopt.num_tc < 1 || 3827 mqprio->qopt.num_tc > MLX5E_MAX_NUM_MQPRIO_CH_TC) 3828 return -EINVAL; 3829 3830 for (i = 0; i < mqprio->qopt.num_tc; i++) { 3831 if (!mqprio->qopt.count[i]) { 3832 netdev_err(netdev, "Zero size for queue-group (%d) is not supported\n", i); 3833 return -EINVAL; 3834 } 3835 if (mqprio->min_rate[i]) { 3836 netdev_err(netdev, "Min tx rate is not supported\n"); 3837 return -EINVAL; 3838 } 3839 3840 if (mqprio->max_rate[i]) { 3841 int err; 3842 3843 err = mlx5e_qos_bytes_rate_check(priv->mdev, mqprio->max_rate[i]); 3844 if (err) 3845 return err; 3846 } 3847 3848 if (mqprio->qopt.offset[i] != agg_count) { 3849 netdev_err(netdev, "Discontinuous queues config is not supported\n"); 3850 return -EINVAL; 3851 } 3852 agg_count += mqprio->qopt.count[i]; 3853 } 3854 3855 if (priv->channels.params.num_channels != agg_count) { 3856 netdev_err(netdev, "Num of queues (%d) does not match available (%d)\n", 3857 agg_count, priv->channels.params.num_channels); 3858 return -EINVAL; 3859 } 3860 3861 return 0; 3862 } 3863 3864 static bool mlx5e_mqprio_rate_limit(u8 num_tc, u64 max_rate[]) 3865 { 3866 int tc; 3867 3868 for (tc = 0; tc < num_tc; tc++) 3869 if (max_rate[tc]) 3870 return true; 3871 return false; 3872 } 3873 3874 static struct mlx5e_mqprio_rl *mlx5e_mqprio_rl_create(struct mlx5_core_dev *mdev, 3875 u8 num_tc, u64 max_rate[]) 3876 { 3877 struct mlx5e_mqprio_rl *rl; 3878 int err; 3879 3880 if (!mlx5e_mqprio_rate_limit(num_tc, max_rate)) 3881 return NULL; 3882 3883 rl = mlx5e_mqprio_rl_alloc(); 3884 if (!rl) 3885 return ERR_PTR(-ENOMEM); 3886 3887 err = mlx5e_mqprio_rl_init(rl, mdev, num_tc, max_rate); 3888 if (err) { 3889 mlx5e_mqprio_rl_free(rl); 3890 return ERR_PTR(err); 3891 } 3892 3893 return rl; 3894 } 3895 3896 static int mlx5e_setup_tc_mqprio_channel(struct mlx5e_priv *priv, 3897 struct tc_mqprio_qopt_offload *mqprio) 3898 { 3899 struct mlx5e_params new_params; 3900 struct mlx5e_mqprio_rl *rl; 3901 int err; 3902 3903 err = mlx5e_mqprio_channel_validate(priv, mqprio); 3904 if (err) 3905 return err; 3906 3907 rl = mlx5e_mqprio_rl_create(priv->mdev, mqprio->qopt.num_tc, mqprio->max_rate); 3908 if (IS_ERR(rl)) 3909 return PTR_ERR(rl); 3910 3911 new_params = priv->channels.params; 3912 mlx5e_params_mqprio_channel_set(&new_params, mqprio, rl); 3913 3914 err = mlx5e_safe_switch_params(priv, &new_params, 3915 mlx5e_update_tc_and_tx_queues_ctx, NULL, true); 3916 if (err) { 3917 if (rl) { 3918 mlx5e_mqprio_rl_cleanup(rl); 3919 mlx5e_mqprio_rl_free(rl); 3920 } 3921 return err; 3922 } 3923 3924 if (priv->mqprio_rl) { 3925 mlx5e_mqprio_rl_cleanup(priv->mqprio_rl); 3926 mlx5e_mqprio_rl_free(priv->mqprio_rl); 3927 } 3928 priv->mqprio_rl = rl; 3929 3930 return 0; 3931 } 3932 3933 static int mlx5e_setup_tc_mqprio(struct mlx5e_priv *priv, 3934 struct tc_mqprio_qopt_offload *mqprio) 3935 { 3936 /* MQPRIO is another toplevel qdisc that can't be attached 3937 * simultaneously with the offloaded HTB. 3938 */ 3939 if (mlx5e_selq_is_htb_enabled(&priv->selq)) { 3940 NL_SET_ERR_MSG_MOD(mqprio->extack, 3941 "MQPRIO cannot be configured when HTB offload is enabled."); 3942 return -EOPNOTSUPP; 3943 } 3944 3945 switch (mqprio->mode) { 3946 case TC_MQPRIO_MODE_DCB: 3947 return mlx5e_setup_tc_mqprio_dcb(priv, &mqprio->qopt); 3948 case TC_MQPRIO_MODE_CHANNEL: 3949 return mlx5e_setup_tc_mqprio_channel(priv, mqprio); 3950 default: 3951 return -EOPNOTSUPP; 3952 } 3953 } 3954 3955 static LIST_HEAD(mlx5e_block_cb_list); 3956 3957 static int mlx5e_setup_tc(struct net_device *dev, enum tc_setup_type type, 3958 void *type_data) 3959 { 3960 struct mlx5e_priv *priv = netdev_priv(dev); 3961 bool tc_unbind = false; 3962 int err; 3963 3964 if (type == TC_SETUP_BLOCK && 3965 ((struct flow_block_offload *)type_data)->command == FLOW_BLOCK_UNBIND) 3966 tc_unbind = true; 3967 3968 if (!netif_device_present(dev) && !tc_unbind) 3969 return -ENODEV; 3970 3971 switch (type) { 3972 case TC_SETUP_BLOCK: { 3973 struct flow_block_offload *f = type_data; 3974 3975 f->unlocked_driver_cb = true; 3976 return flow_block_cb_setup_simple(type_data, 3977 &mlx5e_block_cb_list, 3978 mlx5e_setup_tc_block_cb, 3979 priv, priv, true); 3980 } 3981 case TC_SETUP_QDISC_MQPRIO: 3982 mutex_lock(&priv->state_lock); 3983 err = mlx5e_setup_tc_mqprio(priv, type_data); 3984 mutex_unlock(&priv->state_lock); 3985 return err; 3986 case TC_SETUP_QDISC_HTB: 3987 mutex_lock(&priv->state_lock); 3988 err = mlx5e_htb_setup_tc(priv, type_data); 3989 mutex_unlock(&priv->state_lock); 3990 return err; 3991 default: 3992 return -EOPNOTSUPP; 3993 } 3994 } 3995 3996 void mlx5e_fold_sw_stats64(struct mlx5e_priv *priv, struct rtnl_link_stats64 *s) 3997 { 3998 int i; 3999 4000 for (i = 0; i < priv->stats_nch; i++) { 4001 struct mlx5e_channel_stats *channel_stats = priv->channel_stats[i]; 4002 struct mlx5e_rq_stats *xskrq_stats = &channel_stats->xskrq; 4003 struct mlx5e_rq_stats *rq_stats = &channel_stats->rq; 4004 int j; 4005 4006 s->rx_packets += rq_stats->packets + xskrq_stats->packets; 4007 s->rx_bytes += rq_stats->bytes + xskrq_stats->bytes; 4008 s->multicast += rq_stats->mcast_packets + xskrq_stats->mcast_packets; 4009 4010 for (j = 0; j < priv->max_opened_tc; j++) { 4011 struct mlx5e_sq_stats *sq_stats = &channel_stats->sq[j]; 4012 4013 s->tx_packets += sq_stats->packets; 4014 s->tx_bytes += sq_stats->bytes; 4015 s->tx_dropped += sq_stats->dropped; 4016 } 4017 } 4018 if (priv->tx_ptp_opened) { 4019 for (i = 0; i < priv->max_opened_tc; i++) { 4020 struct mlx5e_sq_stats *sq_stats = &priv->ptp_stats.sq[i]; 4021 4022 s->tx_packets += sq_stats->packets; 4023 s->tx_bytes += sq_stats->bytes; 4024 s->tx_dropped += sq_stats->dropped; 4025 } 4026 } 4027 if (priv->rx_ptp_opened) { 4028 struct mlx5e_rq_stats *rq_stats = &priv->ptp_stats.rq; 4029 4030 s->rx_packets += rq_stats->packets; 4031 s->rx_bytes += rq_stats->bytes; 4032 s->multicast += rq_stats->mcast_packets; 4033 } 4034 4035 #ifdef CONFIG_MLX5_EN_PSP 4036 if (priv->psp) 4037 s->tx_dropped += atomic_read(&priv->psp->tx_drop); 4038 #endif 4039 } 4040 4041 void 4042 mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats) 4043 { 4044 struct mlx5e_priv *priv = netdev_priv(dev); 4045 struct mlx5e_pport_stats *pstats = &priv->stats.pport; 4046 4047 if (!netif_device_present(dev)) 4048 return; 4049 4050 /* In switchdev mode, monitor counters doesn't monitor 4051 * rx/tx stats of 802_3. The update stats mechanism 4052 * should keep the 802_3 layout counters updated 4053 */ 4054 if (!mlx5e_monitor_counter_supported(priv) || 4055 mlx5e_is_uplink_rep(priv)) { 4056 /* update HW stats in background for next time */ 4057 mlx5e_queue_update_stats(priv); 4058 } 4059 4060 netdev_stats_to_stats64(stats, &dev->stats); 4061 4062 if (mlx5e_is_uplink_rep(priv)) { 4063 struct mlx5e_vport_stats *vstats = &priv->stats.vport; 4064 4065 stats->rx_packets = PPORT_802_3_GET(pstats, a_frames_received_ok); 4066 stats->rx_bytes = PPORT_802_3_GET(pstats, a_octets_received_ok); 4067 stats->tx_packets = PPORT_802_3_GET(pstats, a_frames_transmitted_ok); 4068 stats->tx_bytes = PPORT_802_3_GET(pstats, a_octets_transmitted_ok); 4069 4070 /* vport multicast also counts packets that are dropped due to steering 4071 * or rx out of buffer 4072 */ 4073 stats->multicast = VPORT_COUNTER_GET(vstats, received_eth_multicast.packets); 4074 } else { 4075 mlx5e_fold_sw_stats64(priv, stats); 4076 } 4077 4078 stats->rx_missed_errors += priv->stats.qcnt.rx_out_of_buffer; 4079 stats->rx_dropped += PPORT_2863_GET(pstats, if_in_discards); 4080 4081 stats->rx_length_errors += 4082 PPORT_802_3_GET(pstats, a_in_range_length_errors) + 4083 PPORT_802_3_GET(pstats, a_out_of_range_length_field) + 4084 PPORT_802_3_GET(pstats, a_frame_too_long_errors) + 4085 VNIC_ENV_GET(&priv->stats.vnic, eth_wqe_too_small); 4086 stats->rx_crc_errors += 4087 PPORT_802_3_GET(pstats, a_frame_check_sequence_errors); 4088 stats->rx_frame_errors += PPORT_802_3_GET(pstats, a_alignment_errors); 4089 stats->tx_aborted_errors += PPORT_2863_GET(pstats, if_out_discards); 4090 stats->rx_errors += stats->rx_length_errors + stats->rx_crc_errors + 4091 stats->rx_frame_errors; 4092 stats->tx_errors += stats->tx_aborted_errors + stats->tx_carrier_errors; 4093 } 4094 4095 static void mlx5e_nic_set_rx_mode(struct mlx5e_priv *priv) 4096 { 4097 if (mlx5e_is_uplink_rep(priv)) 4098 return; /* no rx mode for uplink rep */ 4099 4100 queue_work(priv->wq, &priv->set_rx_mode_work); 4101 } 4102 4103 static void mlx5e_set_rx_mode(struct net_device *dev) 4104 { 4105 struct mlx5e_priv *priv = netdev_priv(dev); 4106 4107 mlx5e_nic_set_rx_mode(priv); 4108 } 4109 4110 static int mlx5e_set_mac(struct net_device *netdev, void *addr) 4111 { 4112 struct mlx5e_priv *priv = netdev_priv(netdev); 4113 struct sockaddr *saddr = addr; 4114 4115 if (!is_valid_ether_addr(saddr->sa_data)) 4116 return -EADDRNOTAVAIL; 4117 4118 netif_addr_lock_bh(netdev); 4119 eth_hw_addr_set(netdev, saddr->sa_data); 4120 netif_addr_unlock_bh(netdev); 4121 4122 mlx5e_nic_set_rx_mode(priv); 4123 4124 return 0; 4125 } 4126 4127 #define MLX5E_SET_FEATURE(features, feature, enable) \ 4128 do { \ 4129 if (enable) \ 4130 *features |= feature; \ 4131 else \ 4132 *features &= ~feature; \ 4133 } while (0) 4134 4135 typedef int (*mlx5e_feature_handler)(struct net_device *netdev, bool enable); 4136 4137 static int set_feature_lro(struct net_device *netdev, bool enable) 4138 { 4139 struct mlx5e_priv *priv = netdev_priv(netdev); 4140 struct mlx5_core_dev *mdev = priv->mdev; 4141 struct mlx5e_params *cur_params; 4142 struct mlx5e_params new_params; 4143 bool reset = true; 4144 int err = 0; 4145 4146 mutex_lock(&priv->state_lock); 4147 4148 cur_params = &priv->channels.params; 4149 new_params = *cur_params; 4150 4151 if (enable) 4152 new_params.packet_merge.type = MLX5E_PACKET_MERGE_LRO; 4153 else if (new_params.packet_merge.type == MLX5E_PACKET_MERGE_LRO) 4154 new_params.packet_merge.type = MLX5E_PACKET_MERGE_NONE; 4155 else 4156 goto out; 4157 4158 if (!(cur_params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO && 4159 new_params.packet_merge.type == MLX5E_PACKET_MERGE_LRO)) { 4160 if (cur_params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) { 4161 if (mlx5e_rx_mpwqe_is_linear_skb(mdev, cur_params, NULL) == 4162 mlx5e_rx_mpwqe_is_linear_skb(mdev, &new_params, NULL)) 4163 reset = false; 4164 } 4165 } 4166 4167 err = mlx5e_safe_switch_params(priv, &new_params, 4168 mlx5e_modify_tirs_packet_merge_ctx, NULL, reset); 4169 out: 4170 mutex_unlock(&priv->state_lock); 4171 return err; 4172 } 4173 4174 static int set_feature_hw_gro(struct net_device *netdev, bool enable) 4175 { 4176 struct mlx5e_priv *priv = netdev_priv(netdev); 4177 struct mlx5e_params new_params; 4178 bool reset = true; 4179 int err = 0; 4180 4181 mutex_lock(&priv->state_lock); 4182 new_params = priv->channels.params; 4183 4184 if (enable) { 4185 new_params.packet_merge.type = MLX5E_PACKET_MERGE_SHAMPO; 4186 } else if (new_params.packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO) { 4187 new_params.packet_merge.type = MLX5E_PACKET_MERGE_NONE; 4188 } else { 4189 goto out; 4190 } 4191 4192 err = mlx5e_safe_switch_params(priv, &new_params, NULL, NULL, reset); 4193 out: 4194 mutex_unlock(&priv->state_lock); 4195 return err; 4196 } 4197 4198 static int set_feature_cvlan_filter(struct net_device *netdev, bool enable) 4199 { 4200 struct mlx5e_priv *priv = netdev_priv(netdev); 4201 4202 if (enable) 4203 mlx5e_enable_cvlan_filter(priv->fs, 4204 !!(priv->netdev->flags & IFF_PROMISC)); 4205 else 4206 mlx5e_disable_cvlan_filter(priv->fs, 4207 !!(priv->netdev->flags & IFF_PROMISC)); 4208 4209 return 0; 4210 } 4211 4212 static int set_feature_hw_tc(struct net_device *netdev, bool enable) 4213 { 4214 struct mlx5e_priv *priv = netdev_priv(netdev); 4215 int err = 0; 4216 4217 #if IS_ENABLED(CONFIG_MLX5_CLS_ACT) 4218 int tc_flag = mlx5e_is_uplink_rep(priv) ? MLX5_TC_FLAG(ESW_OFFLOAD) : 4219 MLX5_TC_FLAG(NIC_OFFLOAD); 4220 if (!enable && mlx5e_tc_num_filters(priv, tc_flag)) { 4221 netdev_err(netdev, 4222 "Active offloaded tc filters, can't turn hw_tc_offload off\n"); 4223 return -EINVAL; 4224 } 4225 #endif 4226 4227 mutex_lock(&priv->state_lock); 4228 if (!enable && mlx5e_selq_is_htb_enabled(&priv->selq)) { 4229 netdev_err(netdev, "Active HTB offload, can't turn hw_tc_offload off\n"); 4230 err = -EINVAL; 4231 } 4232 mutex_unlock(&priv->state_lock); 4233 4234 return err; 4235 } 4236 4237 static int set_feature_rx_all(struct net_device *netdev, bool enable) 4238 { 4239 struct mlx5e_priv *priv = netdev_priv(netdev); 4240 struct mlx5_core_dev *mdev = priv->mdev; 4241 4242 return mlx5_set_port_fcs(mdev, !enable); 4243 } 4244 4245 static struct dim_cq_moder mlx5e_get_def_rx_moderation(u8 cq_period_mode) 4246 { 4247 return (struct dim_cq_moder) { 4248 .cq_period_mode = cq_period_mode, 4249 .pkts = MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS, 4250 .usec = cq_period_mode == DIM_CQ_PERIOD_MODE_START_FROM_CQE ? 4251 MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE : 4252 MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC, 4253 }; 4254 } 4255 4256 bool mlx5e_reset_rx_moderation(struct dim_cq_moder *cq_moder, u8 cq_period_mode, 4257 bool dim_enabled) 4258 { 4259 bool reset_needed = cq_moder->cq_period_mode != cq_period_mode; 4260 4261 if (dim_enabled) 4262 *cq_moder = net_dim_get_def_rx_moderation(cq_period_mode); 4263 else 4264 *cq_moder = mlx5e_get_def_rx_moderation(cq_period_mode); 4265 4266 return reset_needed; 4267 } 4268 4269 bool mlx5e_reset_rx_channels_moderation(struct mlx5e_channels *chs, u8 cq_period_mode, 4270 bool dim_enabled, bool keep_dim_state) 4271 { 4272 bool reset = false; 4273 int i; 4274 4275 for (i = 0; i < chs->num; i++) { 4276 if (keep_dim_state) 4277 dim_enabled = !!chs->c[i]->rq.dim; 4278 4279 reset |= mlx5e_reset_rx_moderation(&chs->c[i]->rx_cq_moder, 4280 cq_period_mode, dim_enabled); 4281 } 4282 4283 return reset; 4284 } 4285 4286 static int mlx5e_set_rx_port_ts(struct mlx5_core_dev *mdev, bool enable) 4287 { 4288 u32 in[MLX5_ST_SZ_DW(pcmr_reg)] = {}; 4289 bool supported, curr_state; 4290 int err; 4291 4292 if (!MLX5_CAP_GEN(mdev, ports_check)) 4293 return 0; 4294 4295 err = mlx5_query_ports_check(mdev, in, sizeof(in)); 4296 if (err) 4297 return err; 4298 4299 supported = MLX5_GET(pcmr_reg, in, rx_ts_over_crc_cap); 4300 curr_state = MLX5_GET(pcmr_reg, in, rx_ts_over_crc); 4301 4302 if (!supported || enable == curr_state) 4303 return 0; 4304 4305 MLX5_SET(pcmr_reg, in, local_port, 1); 4306 MLX5_SET(pcmr_reg, in, rx_ts_over_crc, enable); 4307 4308 return mlx5_set_ports_check(mdev, in, sizeof(in)); 4309 } 4310 4311 static int mlx5e_set_rx_port_ts_wrap(struct mlx5e_priv *priv, void *ctx) 4312 { 4313 struct mlx5_core_dev *mdev = priv->mdev; 4314 bool enable = *(bool *)ctx; 4315 4316 return mlx5e_set_rx_port_ts(mdev, enable); 4317 } 4318 4319 static int set_feature_rx_fcs(struct net_device *netdev, bool enable) 4320 { 4321 struct mlx5e_priv *priv = netdev_priv(netdev); 4322 struct mlx5e_channels *chs = &priv->channels; 4323 struct mlx5e_params new_params; 4324 int err; 4325 bool rx_ts_over_crc = !enable; 4326 4327 mutex_lock(&priv->state_lock); 4328 4329 new_params = chs->params; 4330 new_params.scatter_fcs_en = enable; 4331 err = mlx5e_safe_switch_params(priv, &new_params, mlx5e_set_rx_port_ts_wrap, 4332 &rx_ts_over_crc, true); 4333 mutex_unlock(&priv->state_lock); 4334 return err; 4335 } 4336 4337 static int set_feature_rx_vlan(struct net_device *netdev, bool enable) 4338 { 4339 struct mlx5e_priv *priv = netdev_priv(netdev); 4340 int err = 0; 4341 4342 mutex_lock(&priv->state_lock); 4343 4344 mlx5e_fs_set_vlan_strip_disable(priv->fs, !enable); 4345 priv->channels.params.vlan_strip_disable = !enable; 4346 4347 if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) 4348 goto unlock; 4349 4350 err = mlx5e_modify_channels_vsd(&priv->channels, !enable); 4351 if (err) { 4352 mlx5e_fs_set_vlan_strip_disable(priv->fs, enable); 4353 priv->channels.params.vlan_strip_disable = enable; 4354 } 4355 unlock: 4356 mutex_unlock(&priv->state_lock); 4357 4358 return err; 4359 } 4360 4361 int mlx5e_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid) 4362 { 4363 struct mlx5e_priv *priv = netdev_priv(dev); 4364 struct mlx5e_flow_steering *fs = priv->fs; 4365 4366 if (mlx5e_is_uplink_rep(priv)) 4367 return 0; /* no vlan table for uplink rep */ 4368 4369 return mlx5e_fs_vlan_rx_add_vid(fs, dev, proto, vid); 4370 } 4371 4372 int mlx5e_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid) 4373 { 4374 struct mlx5e_priv *priv = netdev_priv(dev); 4375 struct mlx5e_flow_steering *fs = priv->fs; 4376 4377 if (mlx5e_is_uplink_rep(priv)) 4378 return 0; /* no vlan table for uplink rep */ 4379 4380 return mlx5e_fs_vlan_rx_kill_vid(fs, dev, proto, vid); 4381 } 4382 4383 #ifdef CONFIG_MLX5_EN_ARFS 4384 static int set_feature_arfs(struct net_device *netdev, bool enable) 4385 { 4386 struct mlx5e_priv *priv = netdev_priv(netdev); 4387 int err; 4388 4389 if (enable) 4390 err = mlx5e_arfs_enable(priv->fs); 4391 else 4392 err = mlx5e_arfs_disable(priv->fs); 4393 4394 return err; 4395 } 4396 #endif 4397 4398 static int mlx5e_handle_feature(struct net_device *netdev, 4399 netdev_features_t *features, 4400 netdev_features_t feature, 4401 mlx5e_feature_handler feature_handler) 4402 { 4403 netdev_features_t changes = *features ^ netdev->features; 4404 bool enable = !!(*features & feature); 4405 int err; 4406 4407 if (!(changes & feature)) 4408 return 0; 4409 4410 err = feature_handler(netdev, enable); 4411 if (err) { 4412 MLX5E_SET_FEATURE(features, feature, !enable); 4413 netdev_err(netdev, "%s feature %pNF failed, err %d\n", 4414 enable ? "Enable" : "Disable", &feature, err); 4415 return err; 4416 } 4417 4418 return 0; 4419 } 4420 4421 void mlx5e_set_xdp_feature(struct mlx5e_priv *priv) 4422 { 4423 struct mlx5e_params *params = &priv->channels.params; 4424 struct net_device *netdev = priv->netdev; 4425 xdp_features_t val = 0; 4426 4427 if (netdev->netdev_ops->ndo_bpf && 4428 params->packet_merge.type == MLX5E_PACKET_MERGE_NONE) 4429 val = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | 4430 NETDEV_XDP_ACT_XSK_ZEROCOPY | 4431 NETDEV_XDP_ACT_RX_SG; 4432 4433 if (netdev->netdev_ops->ndo_xdp_xmit && params->xdp_prog) 4434 val |= NETDEV_XDP_ACT_NDO_XMIT | 4435 NETDEV_XDP_ACT_NDO_XMIT_SG; 4436 4437 xdp_set_features_flag_locked(netdev, val); 4438 } 4439 4440 int mlx5e_set_features(struct net_device *netdev, netdev_features_t features) 4441 { 4442 netdev_features_t oper_features = features; 4443 int err = 0; 4444 4445 #define MLX5E_HANDLE_FEATURE(feature, handler) \ 4446 mlx5e_handle_feature(netdev, &oper_features, feature, handler) 4447 4448 if (features & (NETIF_F_GRO_HW | NETIF_F_LRO)) { 4449 err |= MLX5E_HANDLE_FEATURE(NETIF_F_RXFCS, set_feature_rx_fcs); 4450 err |= MLX5E_HANDLE_FEATURE(NETIF_F_LRO, set_feature_lro); 4451 err |= MLX5E_HANDLE_FEATURE(NETIF_F_GRO_HW, set_feature_hw_gro); 4452 } else { 4453 err |= MLX5E_HANDLE_FEATURE(NETIF_F_LRO, set_feature_lro); 4454 err |= MLX5E_HANDLE_FEATURE(NETIF_F_GRO_HW, set_feature_hw_gro); 4455 err |= MLX5E_HANDLE_FEATURE(NETIF_F_RXFCS, set_feature_rx_fcs); 4456 } 4457 err |= MLX5E_HANDLE_FEATURE(NETIF_F_HW_VLAN_CTAG_FILTER, 4458 set_feature_cvlan_filter); 4459 err |= MLX5E_HANDLE_FEATURE(NETIF_F_HW_TC, set_feature_hw_tc); 4460 err |= MLX5E_HANDLE_FEATURE(NETIF_F_RXALL, set_feature_rx_all); 4461 err |= MLX5E_HANDLE_FEATURE(NETIF_F_HW_VLAN_CTAG_RX, set_feature_rx_vlan); 4462 #ifdef CONFIG_MLX5_EN_ARFS 4463 err |= MLX5E_HANDLE_FEATURE(NETIF_F_NTUPLE, set_feature_arfs); 4464 #endif 4465 err |= MLX5E_HANDLE_FEATURE(NETIF_F_HW_TLS_RX, mlx5e_ktls_set_feature_rx); 4466 4467 if (err) { 4468 netdev->features = oper_features; 4469 return -EINVAL; 4470 } 4471 4472 return 0; 4473 } 4474 4475 static netdev_features_t mlx5e_fix_uplink_rep_features(struct net_device *netdev, 4476 netdev_features_t features) 4477 { 4478 features &= ~NETIF_F_HW_TLS_RX; 4479 if (netdev->features & NETIF_F_HW_TLS_RX) 4480 netdev_warn(netdev, "Disabling hw_tls_rx, not supported in switchdev mode\n"); 4481 4482 features &= ~NETIF_F_HW_TLS_TX; 4483 if (netdev->features & NETIF_F_HW_TLS_TX) 4484 netdev_warn(netdev, "Disabling hw_tls_tx, not supported in switchdev mode\n"); 4485 4486 features &= ~NETIF_F_NTUPLE; 4487 if (netdev->features & NETIF_F_NTUPLE) 4488 netdev_warn(netdev, "Disabling ntuple, not supported in switchdev mode\n"); 4489 4490 features &= ~NETIF_F_GRO_HW; 4491 if (netdev->features & NETIF_F_GRO_HW) 4492 netdev_warn(netdev, "Disabling HW_GRO, not supported in switchdev mode\n"); 4493 4494 features &= ~NETIF_F_HW_VLAN_CTAG_FILTER; 4495 if (netdev->features & NETIF_F_HW_VLAN_CTAG_FILTER) 4496 netdev_warn(netdev, "Disabling HW_VLAN CTAG FILTERING, not supported in switchdev mode\n"); 4497 4498 features &= ~NETIF_F_HW_MACSEC; 4499 if (netdev->features & NETIF_F_HW_MACSEC) 4500 netdev_warn(netdev, "Disabling HW MACsec offload, not supported in switchdev mode\n"); 4501 4502 return features; 4503 } 4504 4505 static netdev_features_t mlx5e_fix_features(struct net_device *netdev, 4506 netdev_features_t features) 4507 { 4508 struct netdev_config *cfg = netdev->cfg_pending; 4509 struct mlx5e_priv *priv = netdev_priv(netdev); 4510 struct mlx5e_vlan_table *vlan; 4511 struct mlx5e_params *params; 4512 4513 if (!netif_device_present(netdev)) 4514 return features; 4515 4516 vlan = mlx5e_fs_get_vlan(priv->fs); 4517 mutex_lock(&priv->state_lock); 4518 params = &priv->channels.params; 4519 if (!vlan || 4520 !bitmap_empty(mlx5e_vlan_get_active_svlans(vlan), VLAN_N_VID)) { 4521 /* HW strips the outer C-tag header, this is a problem 4522 * for S-tag traffic. 4523 */ 4524 features &= ~NETIF_F_HW_VLAN_CTAG_RX; 4525 if (!params->vlan_strip_disable) 4526 netdev_warn(netdev, "Dropping C-tag vlan stripping offload due to S-tag vlan\n"); 4527 } 4528 4529 if (!MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ)) { 4530 if (features & NETIF_F_LRO) { 4531 netdev_warn(netdev, "Disabling LRO, not supported in legacy RQ\n"); 4532 features &= ~NETIF_F_LRO; 4533 } 4534 if (features & NETIF_F_GRO_HW) { 4535 netdev_warn(netdev, "Disabling HW-GRO, not supported in legacy RQ\n"); 4536 features &= ~NETIF_F_GRO_HW; 4537 } 4538 } 4539 4540 if (params->xdp_prog) { 4541 if (features & NETIF_F_LRO) { 4542 netdev_warn(netdev, "LRO is incompatible with XDP\n"); 4543 features &= ~NETIF_F_LRO; 4544 } 4545 if (features & NETIF_F_GRO_HW) { 4546 netdev_warn(netdev, "HW GRO is incompatible with XDP\n"); 4547 features &= ~NETIF_F_GRO_HW; 4548 } 4549 } 4550 4551 if (priv->xsk.refcnt) { 4552 if (features & NETIF_F_LRO) { 4553 netdev_warn(netdev, "LRO is incompatible with AF_XDP (%u XSKs are active)\n", 4554 priv->xsk.refcnt); 4555 features &= ~NETIF_F_LRO; 4556 } 4557 if (features & NETIF_F_GRO_HW) { 4558 netdev_warn(netdev, "HW GRO is incompatible with AF_XDP (%u XSKs are active)\n", 4559 priv->xsk.refcnt); 4560 features &= ~NETIF_F_GRO_HW; 4561 } 4562 } 4563 4564 if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS)) { 4565 features &= ~NETIF_F_RXHASH; 4566 if (netdev->features & NETIF_F_RXHASH) 4567 netdev_warn(netdev, "Disabling rxhash, not supported when CQE compress is active\n"); 4568 4569 if (features & NETIF_F_GRO_HW) { 4570 netdev_warn(netdev, "Disabling HW-GRO, not supported when CQE compress is active\n"); 4571 features &= ~NETIF_F_GRO_HW; 4572 } 4573 } 4574 4575 /* The header-data split ring param requires HW GRO to stay enabled. */ 4576 if (cfg && cfg->hds_config == ETHTOOL_TCP_DATA_SPLIT_ENABLED && 4577 !(features & NETIF_F_GRO_HW)) { 4578 netdev_warn(netdev, "Keeping HW-GRO enabled, TCP header-data split depends on it\n"); 4579 features |= NETIF_F_GRO_HW; 4580 } 4581 4582 if (mlx5e_is_uplink_rep(priv)) { 4583 features = mlx5e_fix_uplink_rep_features(netdev, features); 4584 netdev->netns_immutable = true; 4585 } else { 4586 netdev->netns_immutable = false; 4587 } 4588 4589 mutex_unlock(&priv->state_lock); 4590 4591 return features; 4592 } 4593 4594 static bool mlx5e_xsk_validate_mtu(struct net_device *netdev, 4595 struct mlx5e_channels *chs, 4596 struct mlx5e_params *new_params, 4597 struct mlx5_core_dev *mdev) 4598 { 4599 u16 ix; 4600 4601 for (ix = 0; ix < chs->params.num_channels; ix++) { 4602 struct xsk_buff_pool *xsk_pool = 4603 mlx5e_xsk_get_pool(&chs->params, chs->params.xsk, ix); 4604 struct mlx5e_xsk_param xsk; 4605 int max_xdp_mtu; 4606 4607 if (!xsk_pool) 4608 continue; 4609 4610 mlx5e_build_xsk_param(xsk_pool, &xsk); 4611 max_xdp_mtu = mlx5e_xdp_max_mtu(new_params, &xsk); 4612 4613 /* Validate XSK params and XDP MTU in advance */ 4614 if (!mlx5e_validate_xsk_param(new_params, &xsk, mdev) || 4615 new_params->sw_mtu > max_xdp_mtu) { 4616 u32 hr = mlx5e_get_linear_rq_headroom(new_params, &xsk); 4617 int max_mtu_frame, max_mtu_page, max_mtu; 4618 4619 /* Two criteria must be met: 4620 * 1. HW MTU + all headrooms <= XSK frame size. 4621 * 2. Size of SKBs allocated on XDP_PASS <= PAGE_SIZE. 4622 */ 4623 max_mtu_frame = MLX5E_HW2SW_MTU(new_params, xsk.chunk_size - hr); 4624 max_mtu_page = MLX5E_HW2SW_MTU(new_params, SKB_MAX_HEAD(0)); 4625 max_mtu = min3(max_mtu_frame, max_mtu_page, max_xdp_mtu); 4626 4627 netdev_err(netdev, "MTU %d is too big for an XSK running on channel %u or its redirection XDP program. Try MTU <= %d\n", 4628 new_params->sw_mtu, ix, max_mtu); 4629 return false; 4630 } 4631 } 4632 4633 return true; 4634 } 4635 4636 static bool mlx5e_params_validate_xdp(struct net_device *netdev, 4637 struct mlx5_core_dev *mdev, 4638 struct mlx5e_params *params) 4639 { 4640 bool is_linear; 4641 4642 /* No XSK params: AF_XDP can't be enabled yet at the point of setting 4643 * the XDP program. 4644 */ 4645 is_linear = params->rq_wq_type == MLX5_WQ_TYPE_CYCLIC ? 4646 mlx5e_rx_is_linear_skb(mdev, params, NULL) : 4647 mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL); 4648 4649 if (!is_linear) { 4650 if (!params->xdp_prog->aux->xdp_has_frags) { 4651 netdev_warn(netdev, "MTU(%d) > %d, too big for an XDP program not aware of multi buffer\n", 4652 params->sw_mtu, 4653 mlx5e_xdp_max_mtu(params, NULL)); 4654 return false; 4655 } 4656 if (params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ && 4657 !mlx5e_verify_params_rx_mpwqe_strides(mdev, params, NULL)) { 4658 netdev_warn(netdev, "XDP is not allowed with striding RQ and MTU(%d) > %d\n", 4659 params->sw_mtu, 4660 mlx5e_xdp_max_mtu(params, NULL)); 4661 return false; 4662 } 4663 } 4664 4665 return true; 4666 } 4667 4668 int mlx5e_change_mtu(struct net_device *netdev, int new_mtu, 4669 mlx5e_fp_preactivate preactivate) 4670 { 4671 struct mlx5e_priv *priv = netdev_priv(netdev); 4672 struct mlx5e_params new_params; 4673 struct mlx5e_params *params; 4674 int err = 0; 4675 4676 mutex_lock(&priv->state_lock); 4677 4678 params = &priv->channels.params; 4679 4680 new_params = *params; 4681 new_params.sw_mtu = new_mtu; 4682 err = mlx5e_validate_params(priv->mdev, &new_params); 4683 if (err) 4684 goto out; 4685 4686 if (new_params.xdp_prog && !mlx5e_params_validate_xdp(netdev, priv->mdev, 4687 &new_params)) { 4688 err = -EINVAL; 4689 goto out; 4690 } 4691 4692 if (priv->xsk.refcnt && 4693 !mlx5e_xsk_validate_mtu(netdev, &priv->channels, 4694 &new_params, priv->mdev)) { 4695 err = -EINVAL; 4696 goto out; 4697 } 4698 4699 err = mlx5e_safe_switch_params(priv, &new_params, preactivate, NULL, 4700 true); 4701 4702 out: 4703 WRITE_ONCE(netdev->mtu, params->sw_mtu); 4704 mutex_unlock(&priv->state_lock); 4705 4706 if (!err) 4707 netdev_update_features(netdev); 4708 4709 return err; 4710 } 4711 4712 static int mlx5e_change_nic_mtu(struct net_device *netdev, int new_mtu) 4713 { 4714 return mlx5e_change_mtu(netdev, new_mtu, mlx5e_set_dev_port_mtu_ctx); 4715 } 4716 4717 int mlx5e_ptp_rx_manage_fs_ctx(struct mlx5e_priv *priv, void *ctx) 4718 { 4719 bool set = *(bool *)ctx; 4720 4721 return mlx5e_ptp_rx_manage_fs(priv, set); 4722 } 4723 4724 static int mlx5e_hwstamp_config_no_ptp_rx(struct mlx5e_priv *priv, bool rx_filter) 4725 { 4726 bool rx_cqe_compress_def = priv->channels.params.rx_cqe_compress_def; 4727 int err; 4728 4729 if (!rx_filter) 4730 /* Reset CQE compression to Admin default */ 4731 return mlx5e_modify_rx_cqe_compression_locked(priv, rx_cqe_compress_def, false); 4732 4733 if (!MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_RX_CQE_COMPRESS)) 4734 return 0; 4735 4736 /* Disable CQE compression */ 4737 netdev_warn(priv->netdev, "Disabling RX cqe compression\n"); 4738 err = mlx5e_modify_rx_cqe_compression_locked(priv, false, true); 4739 if (err) 4740 netdev_err(priv->netdev, "Failed disabling cqe compression err=%d\n", err); 4741 4742 return err; 4743 } 4744 4745 static int mlx5e_hwstamp_config_ptp_rx(struct mlx5e_priv *priv, bool ptp_rx) 4746 { 4747 struct mlx5e_params new_params; 4748 4749 if (ptp_rx == priv->channels.params.ptp_rx) 4750 return 0; 4751 4752 new_params = priv->channels.params; 4753 new_params.ptp_rx = ptp_rx; 4754 return mlx5e_safe_switch_params(priv, &new_params, mlx5e_ptp_rx_manage_fs_ctx, 4755 &new_params.ptp_rx, true); 4756 } 4757 4758 int mlx5e_hwtstamp_set(struct mlx5e_priv *priv, 4759 struct kernel_hwtstamp_config *config, 4760 struct netlink_ext_ack *extack) 4761 { 4762 bool rx_cqe_compress_def; 4763 bool ptp_rx; 4764 int err; 4765 4766 if (!MLX5_CAP_GEN(priv->mdev, device_frequency_khz) || 4767 (mlx5_clock_get_ptp_index(priv->mdev) == -1)) { 4768 NL_SET_ERR_MSG_MOD(extack, 4769 "Timestamps are not supported on this device"); 4770 return -EOPNOTSUPP; 4771 } 4772 4773 /* TX HW timestamp */ 4774 switch (config->tx_type) { 4775 case HWTSTAMP_TX_OFF: 4776 case HWTSTAMP_TX_ON: 4777 break; 4778 default: 4779 return -ERANGE; 4780 } 4781 4782 mutex_lock(&priv->state_lock); 4783 rx_cqe_compress_def = priv->channels.params.rx_cqe_compress_def; 4784 4785 /* RX HW timestamp */ 4786 switch (config->rx_filter) { 4787 case HWTSTAMP_FILTER_NONE: 4788 ptp_rx = false; 4789 break; 4790 case HWTSTAMP_FILTER_ALL: 4791 case HWTSTAMP_FILTER_SOME: 4792 case HWTSTAMP_FILTER_PTP_V1_L4_EVENT: 4793 case HWTSTAMP_FILTER_PTP_V1_L4_SYNC: 4794 case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ: 4795 case HWTSTAMP_FILTER_PTP_V2_L4_EVENT: 4796 case HWTSTAMP_FILTER_PTP_V2_L4_SYNC: 4797 case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ: 4798 case HWTSTAMP_FILTER_PTP_V2_L2_EVENT: 4799 case HWTSTAMP_FILTER_PTP_V2_L2_SYNC: 4800 case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ: 4801 case HWTSTAMP_FILTER_PTP_V2_EVENT: 4802 case HWTSTAMP_FILTER_PTP_V2_SYNC: 4803 case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: 4804 case HWTSTAMP_FILTER_NTP_ALL: 4805 config->rx_filter = HWTSTAMP_FILTER_ALL; 4806 /* ptp_rx is set if both HW TS is set and CQE 4807 * compression is set 4808 */ 4809 ptp_rx = rx_cqe_compress_def; 4810 break; 4811 default: 4812 err = -ERANGE; 4813 goto err_unlock; 4814 } 4815 4816 if (!mlx5e_profile_feature_cap(priv->profile, PTP_RX)) 4817 err = mlx5e_hwstamp_config_no_ptp_rx(priv, 4818 config->rx_filter != HWTSTAMP_FILTER_NONE); 4819 else 4820 err = mlx5e_hwstamp_config_ptp_rx(priv, ptp_rx); 4821 if (err) 4822 goto err_unlock; 4823 4824 priv->hwtstamp_config = *config; 4825 mutex_unlock(&priv->state_lock); 4826 4827 /* might need to fix some features */ 4828 netdev_update_features(priv->netdev); 4829 4830 return 0; 4831 err_unlock: 4832 mutex_unlock(&priv->state_lock); 4833 return err; 4834 } 4835 4836 static int mlx5e_hwtstamp_set_ndo(struct net_device *netdev, 4837 struct kernel_hwtstamp_config *config, 4838 struct netlink_ext_ack *extack) 4839 { 4840 struct mlx5e_priv *priv = netdev_priv(netdev); 4841 4842 return mlx5e_hwtstamp_set(priv, config, extack); 4843 } 4844 4845 int mlx5e_hwtstamp_get(struct mlx5e_priv *priv, 4846 struct kernel_hwtstamp_config *config) 4847 { 4848 if (!MLX5_CAP_GEN(priv->mdev, device_frequency_khz)) 4849 return -EOPNOTSUPP; 4850 4851 *config = priv->hwtstamp_config; 4852 4853 return 0; 4854 } 4855 4856 static int mlx5e_hwtstamp_get_ndo(struct net_device *dev, 4857 struct kernel_hwtstamp_config *config) 4858 { 4859 struct mlx5e_priv *priv = netdev_priv(dev); 4860 4861 return mlx5e_hwtstamp_get(priv, config); 4862 } 4863 4864 #ifdef CONFIG_MLX5_ESWITCH 4865 int mlx5e_set_vf_mac(struct net_device *dev, int vf, u8 *mac) 4866 { 4867 struct mlx5e_priv *priv = netdev_priv(dev); 4868 struct mlx5_core_dev *mdev = priv->mdev; 4869 4870 return mlx5_eswitch_set_vport_mac(mdev->priv.eswitch, vf + 1, mac); 4871 } 4872 4873 static int mlx5e_set_vf_vlan(struct net_device *dev, int vf, u16 vlan, u8 qos, 4874 __be16 vlan_proto) 4875 { 4876 struct mlx5e_priv *priv = netdev_priv(dev); 4877 struct mlx5_core_dev *mdev = priv->mdev; 4878 4879 if (vlan_proto != htons(ETH_P_8021Q)) 4880 return -EPROTONOSUPPORT; 4881 4882 return mlx5_eswitch_set_vport_vlan(mdev->priv.eswitch, vf + 1, 4883 vlan, qos); 4884 } 4885 4886 static int mlx5e_set_vf_spoofchk(struct net_device *dev, int vf, bool setting) 4887 { 4888 struct mlx5e_priv *priv = netdev_priv(dev); 4889 struct mlx5_core_dev *mdev = priv->mdev; 4890 4891 return mlx5_eswitch_set_vport_spoofchk(mdev->priv.eswitch, vf + 1, setting); 4892 } 4893 4894 static int mlx5e_set_vf_trust(struct net_device *dev, int vf, bool setting) 4895 { 4896 struct mlx5e_priv *priv = netdev_priv(dev); 4897 struct mlx5_core_dev *mdev = priv->mdev; 4898 4899 return mlx5_eswitch_set_vport_trust(mdev->priv.eswitch, vf + 1, setting); 4900 } 4901 4902 int mlx5e_set_vf_rate(struct net_device *dev, int vf, int min_tx_rate, 4903 int max_tx_rate) 4904 { 4905 struct mlx5e_priv *priv = netdev_priv(dev); 4906 struct mlx5_core_dev *mdev = priv->mdev; 4907 4908 return mlx5_eswitch_set_vport_rate(mdev->priv.eswitch, vf + 1, 4909 max_tx_rate, min_tx_rate); 4910 } 4911 4912 static int mlx5_vport_link2ifla(u8 esw_link) 4913 { 4914 switch (esw_link) { 4915 case MLX5_VPORT_ADMIN_STATE_DOWN: 4916 return IFLA_VF_LINK_STATE_DISABLE; 4917 case MLX5_VPORT_ADMIN_STATE_UP: 4918 return IFLA_VF_LINK_STATE_ENABLE; 4919 } 4920 return IFLA_VF_LINK_STATE_AUTO; 4921 } 4922 4923 static int mlx5_ifla_link2vport(u8 ifla_link) 4924 { 4925 switch (ifla_link) { 4926 case IFLA_VF_LINK_STATE_DISABLE: 4927 return MLX5_VPORT_ADMIN_STATE_DOWN; 4928 case IFLA_VF_LINK_STATE_ENABLE: 4929 return MLX5_VPORT_ADMIN_STATE_UP; 4930 } 4931 return MLX5_VPORT_ADMIN_STATE_AUTO; 4932 } 4933 4934 static int mlx5e_set_vf_link_state(struct net_device *dev, int vf, 4935 int link_state) 4936 { 4937 struct mlx5e_priv *priv = netdev_priv(dev); 4938 struct mlx5_core_dev *mdev = priv->mdev; 4939 4940 if (mlx5e_is_uplink_rep(priv)) 4941 return -EOPNOTSUPP; 4942 4943 return mlx5_eswitch_set_vport_state(mdev->priv.eswitch, vf + 1, 4944 mlx5_ifla_link2vport(link_state)); 4945 } 4946 4947 int mlx5e_get_vf_config(struct net_device *dev, 4948 int vf, struct ifla_vf_info *ivi) 4949 { 4950 struct mlx5e_priv *priv = netdev_priv(dev); 4951 struct mlx5_core_dev *mdev = priv->mdev; 4952 int err; 4953 4954 if (!netif_device_present(dev)) 4955 return -EOPNOTSUPP; 4956 4957 err = mlx5_eswitch_get_vport_config(mdev->priv.eswitch, vf + 1, ivi); 4958 if (err) 4959 return err; 4960 ivi->linkstate = mlx5_vport_link2ifla(ivi->linkstate); 4961 return 0; 4962 } 4963 4964 int mlx5e_get_vf_stats(struct net_device *dev, 4965 int vf, struct ifla_vf_stats *vf_stats) 4966 { 4967 struct mlx5e_priv *priv = netdev_priv(dev); 4968 struct mlx5_core_dev *mdev = priv->mdev; 4969 4970 return mlx5_eswitch_get_vport_stats(mdev->priv.eswitch, vf + 1, 4971 vf_stats); 4972 } 4973 4974 static bool 4975 mlx5e_has_offload_stats(const struct net_device *dev, int attr_id) 4976 { 4977 struct mlx5e_priv *priv = netdev_priv(dev); 4978 4979 if (!netif_device_present(dev)) 4980 return false; 4981 4982 if (!mlx5e_is_uplink_rep(priv)) 4983 return false; 4984 4985 return mlx5e_rep_has_offload_stats(dev, attr_id); 4986 } 4987 4988 static int 4989 mlx5e_get_offload_stats(int attr_id, const struct net_device *dev, 4990 void *sp) 4991 { 4992 struct mlx5e_priv *priv = netdev_priv(dev); 4993 4994 if (!mlx5e_is_uplink_rep(priv)) 4995 return -EOPNOTSUPP; 4996 4997 return mlx5e_rep_get_offload_stats(attr_id, dev, sp); 4998 } 4999 #endif 5000 5001 static bool mlx5e_tunnel_proto_supported_tx(struct mlx5_core_dev *mdev, u8 proto_type) 5002 { 5003 switch (proto_type) { 5004 case IPPROTO_GRE: 5005 return MLX5_CAP_ETH(mdev, tunnel_stateless_gre); 5006 case IPPROTO_IPIP: 5007 case IPPROTO_IPV6: 5008 return (MLX5_CAP_ETH(mdev, tunnel_stateless_ip_over_ip) || 5009 MLX5_CAP_ETH(mdev, tunnel_stateless_ip_over_ip_tx)); 5010 default: 5011 return false; 5012 } 5013 } 5014 5015 static bool mlx5e_gre_tunnel_inner_proto_offload_supported(struct mlx5_core_dev *mdev, 5016 struct sk_buff *skb) 5017 { 5018 switch (skb->inner_protocol) { 5019 case htons(ETH_P_IP): 5020 case htons(ETH_P_IPV6): 5021 case htons(ETH_P_TEB): 5022 return true; 5023 case htons(ETH_P_MPLS_UC): 5024 case htons(ETH_P_MPLS_MC): 5025 return MLX5_CAP_ETH(mdev, tunnel_stateless_mpls_over_gre); 5026 } 5027 return false; 5028 } 5029 5030 static netdev_features_t mlx5e_tunnel_features_check(struct mlx5e_priv *priv, 5031 struct sk_buff *skb, 5032 netdev_features_t features) 5033 { 5034 unsigned int offset = 0; 5035 struct udphdr *udph; 5036 u8 proto; 5037 u16 port; 5038 5039 switch (vlan_get_protocol(skb)) { 5040 case htons(ETH_P_IP): 5041 proto = ip_hdr(skb)->protocol; 5042 break; 5043 case htons(ETH_P_IPV6): 5044 proto = ipv6_find_hdr(skb, &offset, -1, NULL, NULL); 5045 break; 5046 default: 5047 goto out; 5048 } 5049 5050 switch (proto) { 5051 case IPPROTO_GRE: 5052 if (mlx5e_gre_tunnel_inner_proto_offload_supported(priv->mdev, skb)) 5053 return features; 5054 break; 5055 case IPPROTO_IPIP: 5056 case IPPROTO_IPV6: 5057 if (mlx5e_tunnel_proto_supported_tx(priv->mdev, IPPROTO_IPIP)) 5058 return features; 5059 break; 5060 case IPPROTO_UDP: 5061 udph = udp_hdr(skb); 5062 port = be16_to_cpu(udph->dest); 5063 5064 /* Verify if UDP port is being offloaded by HW */ 5065 if (mlx5_vxlan_lookup_port(priv->mdev->vxlan, port)) 5066 return vxlan_features_check(skb, features); 5067 5068 #if IS_ENABLED(CONFIG_GENEVE) 5069 /* Support Geneve offload for default UDP port */ 5070 if (port == GENEVE_UDP_PORT && mlx5_geneve_tx_allowed(priv->mdev)) 5071 return features; 5072 #endif 5073 break; 5074 #ifdef CONFIG_MLX5_EN_IPSEC 5075 case IPPROTO_ESP: 5076 return mlx5e_ipsec_feature_check(skb, features); 5077 #endif 5078 } 5079 5080 out: 5081 /* Disable CSUM and GSO if skb cannot be offloaded by HW */ 5082 return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); 5083 } 5084 5085 netdev_features_t mlx5e_features_check(struct sk_buff *skb, 5086 struct net_device *netdev, 5087 netdev_features_t features) 5088 { 5089 struct mlx5e_priv *priv = netdev_priv(netdev); 5090 5091 features = vlan_features_check(skb, features); 5092 5093 /* Validate if the tunneled packet is being offloaded by HW */ 5094 if (skb->encapsulation && 5095 (features & NETIF_F_CSUM_MASK || features & NETIF_F_GSO_MASK)) 5096 return mlx5e_tunnel_features_check(priv, skb, features); 5097 5098 return features; 5099 } 5100 5101 static void mlx5e_tx_timeout_work(struct work_struct *work) 5102 { 5103 struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv, 5104 tx_timeout_work); 5105 struct net_device *netdev = priv->netdev; 5106 int i; 5107 5108 /* Recovering the TX queues implies re-enabling NAPI, which requires 5109 * the netdev instance lock. 5110 * However, channel closing flows have to wait for this work to finish 5111 * while holding the same lock. So either get the lock or find that 5112 * channels are being closed for other reason and this work is not 5113 * relevant anymore. 5114 */ 5115 while (!netdev_trylock(netdev)) { 5116 if (!test_bit(MLX5E_STATE_CHANNELS_ACTIVE, &priv->state)) 5117 return; 5118 msleep(20); 5119 } 5120 5121 for (i = 0; i < netdev->real_num_tx_queues; i++) { 5122 struct netdev_queue *dev_queue = 5123 netdev_get_tx_queue(netdev, i); 5124 struct mlx5e_txqsq *sq = priv->txq2sq[i]; 5125 5126 if (!netif_xmit_timeout_ms(dev_queue)) 5127 continue; 5128 5129 if (mlx5e_reporter_tx_timeout(sq)) 5130 /* break if tried to reopened channels */ 5131 break; 5132 } 5133 5134 netdev_unlock(netdev); 5135 } 5136 5137 static void mlx5e_tx_timeout(struct net_device *dev, unsigned int txqueue) 5138 { 5139 struct mlx5e_priv *priv = netdev_priv(dev); 5140 5141 netdev_err(dev, "TX timeout detected\n"); 5142 queue_work(priv->wq, &priv->tx_timeout_work); 5143 } 5144 5145 static int mlx5e_xdp_allowed(struct net_device *netdev, struct mlx5_core_dev *mdev, 5146 struct mlx5e_params *params) 5147 { 5148 if (params->packet_merge.type != MLX5E_PACKET_MERGE_NONE) { 5149 netdev_warn(netdev, "can't set XDP while HW-GRO/LRO is on, disable them first\n"); 5150 return -EINVAL; 5151 } 5152 5153 if (!mlx5e_params_validate_xdp(netdev, mdev, params)) 5154 return -EINVAL; 5155 5156 return 0; 5157 } 5158 5159 static void mlx5e_rq_replace_xdp_prog(struct mlx5e_rq *rq, struct bpf_prog *prog) 5160 { 5161 struct bpf_prog *old_prog; 5162 5163 old_prog = rcu_replace_pointer(rq->xdp_prog, prog, 5164 lockdep_is_held(&rq->priv->state_lock)); 5165 if (old_prog) 5166 bpf_prog_put(old_prog); 5167 } 5168 5169 static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog) 5170 { 5171 struct mlx5e_priv *priv = netdev_priv(netdev); 5172 struct mlx5e_params new_params; 5173 struct bpf_prog *old_prog; 5174 int err = 0; 5175 bool reset; 5176 int i; 5177 5178 mutex_lock(&priv->state_lock); 5179 5180 new_params = priv->channels.params; 5181 new_params.xdp_prog = prog; 5182 5183 if (prog) { 5184 err = mlx5e_xdp_allowed(netdev, priv->mdev, &new_params); 5185 if (err) 5186 goto unlock; 5187 } 5188 5189 /* no need for full reset when exchanging programs */ 5190 reset = (!priv->channels.params.xdp_prog || !prog); 5191 5192 old_prog = priv->channels.params.xdp_prog; 5193 5194 err = mlx5e_safe_switch_params(priv, &new_params, NULL, NULL, reset); 5195 if (err) 5196 goto unlock; 5197 5198 if (old_prog) 5199 bpf_prog_put(old_prog); 5200 5201 if (!test_bit(MLX5E_STATE_OPENED, &priv->state) || reset) 5202 goto unlock; 5203 5204 /* exchanging programs w/o reset, we update ref counts on behalf 5205 * of the channels RQs here. 5206 */ 5207 bpf_prog_add(prog, priv->channels.num); 5208 for (i = 0; i < priv->channels.num; i++) { 5209 struct mlx5e_channel *c = priv->channels.c[i]; 5210 5211 mlx5e_rq_replace_xdp_prog(&c->rq, prog); 5212 if (test_bit(MLX5E_CHANNEL_STATE_XSK, c->state)) { 5213 bpf_prog_inc(prog); 5214 mlx5e_rq_replace_xdp_prog(&c->xskrq, prog); 5215 } 5216 } 5217 5218 unlock: 5219 mutex_unlock(&priv->state_lock); 5220 5221 /* Need to fix some features. */ 5222 if (!err) 5223 netdev_update_features(netdev); 5224 5225 return err; 5226 } 5227 5228 static int mlx5e_xdp(struct net_device *dev, struct netdev_bpf *xdp) 5229 { 5230 switch (xdp->command) { 5231 case XDP_SETUP_PROG: 5232 return mlx5e_xdp_set(dev, xdp->prog); 5233 case XDP_SETUP_XSK_POOL: 5234 return mlx5e_xsk_setup_pool(dev, xdp->xsk.pool, 5235 xdp->xsk.queue_id); 5236 default: 5237 return -EINVAL; 5238 } 5239 } 5240 5241 #ifdef CONFIG_MLX5_ESWITCH 5242 static int mlx5e_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, 5243 struct net_device *dev, u32 filter_mask, 5244 int nlflags) 5245 { 5246 struct mlx5e_priv *priv = netdev_priv(dev); 5247 struct mlx5_core_dev *mdev = priv->mdev; 5248 u8 mode, setting; 5249 5250 if (mlx5_eswitch_get_vepa(mdev->priv.eswitch, &setting)) 5251 return -EOPNOTSUPP; 5252 mode = setting ? BRIDGE_MODE_VEPA : BRIDGE_MODE_VEB; 5253 return ndo_dflt_bridge_getlink(skb, pid, seq, dev, 5254 mode, 5255 0, 0, nlflags, filter_mask, NULL); 5256 } 5257 5258 static int mlx5e_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh, 5259 u16 flags, struct netlink_ext_ack *extack) 5260 { 5261 struct mlx5e_priv *priv = netdev_priv(dev); 5262 struct mlx5_core_dev *mdev = priv->mdev; 5263 struct nlattr *attr, *br_spec; 5264 u16 mode = BRIDGE_MODE_UNDEF; 5265 u8 setting; 5266 int rem; 5267 5268 br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC); 5269 if (!br_spec) 5270 return -EINVAL; 5271 5272 nla_for_each_nested_type(attr, IFLA_BRIDGE_MODE, br_spec, rem) { 5273 mode = nla_get_u16(attr); 5274 if (mode > BRIDGE_MODE_VEPA) 5275 return -EINVAL; 5276 5277 break; 5278 } 5279 5280 if (mode == BRIDGE_MODE_UNDEF) 5281 return -EINVAL; 5282 5283 setting = (mode == BRIDGE_MODE_VEPA) ? 1 : 0; 5284 return mlx5_eswitch_set_vepa(mdev->priv.eswitch, setting); 5285 } 5286 #endif 5287 5288 const struct net_device_ops mlx5e_netdev_ops = { 5289 .ndo_open = mlx5e_open, 5290 .ndo_stop = mlx5e_close, 5291 .ndo_start_xmit = mlx5e_xmit, 5292 .ndo_setup_tc = mlx5e_setup_tc, 5293 .ndo_select_queue = mlx5e_select_queue, 5294 .ndo_get_stats64 = mlx5e_get_stats, 5295 .ndo_set_rx_mode = mlx5e_set_rx_mode, 5296 .ndo_set_mac_address = mlx5e_set_mac, 5297 .ndo_vlan_rx_add_vid = mlx5e_vlan_rx_add_vid, 5298 .ndo_vlan_rx_kill_vid = mlx5e_vlan_rx_kill_vid, 5299 .ndo_set_features = mlx5e_set_features, 5300 .ndo_fix_features = mlx5e_fix_features, 5301 .ndo_change_mtu = mlx5e_change_nic_mtu, 5302 .ndo_set_tx_maxrate = mlx5e_set_tx_maxrate, 5303 .ndo_features_check = mlx5e_features_check, 5304 .ndo_tx_timeout = mlx5e_tx_timeout, 5305 .ndo_bpf = mlx5e_xdp, 5306 .ndo_xdp_xmit = mlx5e_xdp_xmit, 5307 .ndo_xsk_wakeup = mlx5e_xsk_wakeup, 5308 .ndo_hwtstamp_get = mlx5e_hwtstamp_get_ndo, 5309 .ndo_hwtstamp_set = mlx5e_hwtstamp_set_ndo, 5310 #ifdef CONFIG_MLX5_EN_ARFS 5311 .ndo_rx_flow_steer = mlx5e_rx_flow_steer, 5312 #endif 5313 #ifdef CONFIG_MLX5_ESWITCH 5314 .ndo_bridge_setlink = mlx5e_bridge_setlink, 5315 .ndo_bridge_getlink = mlx5e_bridge_getlink, 5316 5317 /* SRIOV E-Switch NDOs */ 5318 .ndo_set_vf_mac = mlx5e_set_vf_mac, 5319 .ndo_set_vf_vlan = mlx5e_set_vf_vlan, 5320 .ndo_set_vf_spoofchk = mlx5e_set_vf_spoofchk, 5321 .ndo_set_vf_trust = mlx5e_set_vf_trust, 5322 .ndo_set_vf_rate = mlx5e_set_vf_rate, 5323 .ndo_get_vf_config = mlx5e_get_vf_config, 5324 .ndo_set_vf_link_state = mlx5e_set_vf_link_state, 5325 .ndo_get_vf_stats = mlx5e_get_vf_stats, 5326 .ndo_has_offload_stats = mlx5e_has_offload_stats, 5327 .ndo_get_offload_stats = mlx5e_get_offload_stats, 5328 #endif 5329 }; 5330 5331 void mlx5e_build_nic_params(struct mlx5e_priv *priv, struct mlx5e_xsk *xsk, u16 mtu) 5332 { 5333 struct mlx5e_params *params = &priv->channels.params; 5334 struct mlx5_core_dev *mdev = priv->mdev; 5335 5336 params->sw_mtu = mtu; 5337 params->hard_mtu = MLX5E_ETH_HARD_MTU; 5338 params->num_channels = min_t(unsigned int, MLX5E_MAX_NUM_CHANNELS / 2, 5339 priv->max_nch); 5340 mlx5e_params_mqprio_reset(params); 5341 5342 /* SQ */ 5343 params->log_sq_size = is_kdump_kernel() ? 5344 MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE : 5345 MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE; 5346 MLX5E_SET_PFLAG(params, MLX5E_PFLAG_SKB_TX_MPWQE, mlx5e_tx_mpwqe_supported(mdev)); 5347 5348 /* XDP SQ */ 5349 MLX5E_SET_PFLAG(params, MLX5E_PFLAG_XDP_TX_MPWQE, mlx5e_tx_mpwqe_supported(mdev)); 5350 5351 /* set CQE compression */ 5352 params->rx_cqe_compress_def = false; 5353 if (MLX5_CAP_GEN(mdev, cqe_compression) && 5354 MLX5_CAP_GEN(mdev, vport_group_manager)) 5355 params->rx_cqe_compress_def = slow_pci_heuristic(mdev); 5356 5357 MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS, params->rx_cqe_compress_def); 5358 MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_NO_CSUM_COMPLETE, false); 5359 5360 /* RQ */ 5361 mlx5e_build_rq_params(mdev, params); 5362 5363 params->terminate_lkey_be = mlx5_core_get_terminate_scatter_list_mkey(mdev); 5364 5365 params->packet_merge.timeout = mlx5e_choose_lro_timeout(mdev, MLX5E_DEFAULT_LRO_TIMEOUT); 5366 5367 /* CQ moderation params */ 5368 params->rx_dim_enabled = MLX5_CAP_GEN(mdev, cq_moderation) && 5369 MLX5_CAP_GEN(mdev, cq_period_mode_modify); 5370 params->tx_dim_enabled = MLX5_CAP_GEN(mdev, cq_moderation) && 5371 MLX5_CAP_GEN(mdev, cq_period_mode_modify); 5372 params->rx_moder_use_cqe_mode = !!MLX5_CAP_GEN(mdev, cq_period_start_from_cqe); 5373 params->tx_moder_use_cqe_mode = false; 5374 mlx5e_reset_rx_moderation(¶ms->rx_cq_moderation, params->rx_moder_use_cqe_mode, 5375 params->rx_dim_enabled); 5376 mlx5e_reset_tx_moderation(¶ms->tx_cq_moderation, params->tx_moder_use_cqe_mode, 5377 params->tx_dim_enabled); 5378 5379 /* TX inline */ 5380 mlx5_query_min_inline(mdev, ¶ms->tx_min_inline_mode); 5381 5382 /* AF_XDP */ 5383 params->xsk = xsk; 5384 5385 /* Do not update netdev->features directly in here 5386 * on mlx5e_attach_netdev() we will call mlx5e_update_features() 5387 * To update netdev->features please modify mlx5e_fix_features() 5388 */ 5389 } 5390 5391 static void mlx5e_set_netdev_dev_addr(struct net_device *netdev) 5392 { 5393 struct mlx5e_priv *priv = netdev_priv(netdev); 5394 u8 addr[ETH_ALEN]; 5395 5396 mlx5_query_mac_address(priv->mdev, addr); 5397 if (is_zero_ether_addr(addr) && 5398 !MLX5_CAP_GEN(priv->mdev, vport_group_manager)) { 5399 eth_hw_addr_random(netdev); 5400 mlx5_core_info(priv->mdev, "Assigned random MAC address %pM\n", netdev->dev_addr); 5401 return; 5402 } 5403 5404 eth_hw_addr_set(netdev, addr); 5405 } 5406 5407 static int mlx5e_vxlan_set_port(struct net_device *netdev, unsigned int table, 5408 unsigned int entry, struct udp_tunnel_info *ti) 5409 { 5410 struct mlx5e_priv *priv = netdev_priv(netdev); 5411 5412 return mlx5_vxlan_add_port(priv->mdev->vxlan, ntohs(ti->port)); 5413 } 5414 5415 static int mlx5e_vxlan_unset_port(struct net_device *netdev, unsigned int table, 5416 unsigned int entry, struct udp_tunnel_info *ti) 5417 { 5418 struct mlx5e_priv *priv = netdev_priv(netdev); 5419 5420 return mlx5_vxlan_del_port(priv->mdev->vxlan, ntohs(ti->port)); 5421 } 5422 5423 void mlx5e_vxlan_set_netdev_info(struct mlx5e_priv *priv) 5424 { 5425 if (!mlx5_vxlan_allowed(priv->mdev->vxlan)) 5426 return; 5427 5428 priv->nic_info.set_port = mlx5e_vxlan_set_port; 5429 priv->nic_info.unset_port = mlx5e_vxlan_unset_port; 5430 priv->nic_info.flags = UDP_TUNNEL_NIC_INFO_STATIC_IANA_VXLAN; 5431 priv->nic_info.tables[0].tunnel_types = UDP_TUNNEL_TYPE_VXLAN; 5432 /* Don't count the space hard-coded to the IANA port */ 5433 priv->nic_info.tables[0].n_entries = 5434 mlx5_vxlan_max_udp_ports(priv->mdev) - 1; 5435 5436 priv->netdev->udp_tunnel_nic_info = &priv->nic_info; 5437 } 5438 5439 static bool mlx5e_tunnel_any_tx_proto_supported(struct mlx5_core_dev *mdev) 5440 { 5441 int tt; 5442 5443 for (tt = 0; tt < MLX5_NUM_TUNNEL_TT; tt++) { 5444 if (mlx5e_tunnel_proto_supported_tx(mdev, mlx5_get_proto_by_tunnel_type(tt))) 5445 return true; 5446 } 5447 return (mlx5_vxlan_allowed(mdev->vxlan) || mlx5_geneve_tx_allowed(mdev)); 5448 } 5449 5450 static void mlx5e_get_queue_stats_rx(struct net_device *dev, int i, 5451 struct netdev_queue_stats_rx *stats) 5452 { 5453 struct mlx5e_priv *priv = netdev_priv(dev); 5454 struct mlx5e_channel_stats *channel_stats; 5455 struct mlx5e_rq_stats *xskrq_stats; 5456 struct mlx5e_rq_stats *rq_stats; 5457 5458 if (mlx5e_is_uplink_rep(priv) || !priv->stats_nch) 5459 return; 5460 5461 channel_stats = priv->channel_stats[i]; 5462 xskrq_stats = &channel_stats->xskrq; 5463 rq_stats = &channel_stats->rq; 5464 5465 stats->packets = rq_stats->packets + xskrq_stats->packets; 5466 stats->bytes = rq_stats->bytes + xskrq_stats->bytes; 5467 stats->alloc_fail = rq_stats->buff_alloc_err + 5468 xskrq_stats->buff_alloc_err; 5469 } 5470 5471 static void mlx5e_get_queue_stats_tx(struct net_device *dev, int i, 5472 struct netdev_queue_stats_tx *stats) 5473 { 5474 struct mlx5e_priv *priv = netdev_priv(dev); 5475 struct mlx5e_sq_stats *sq_stats; 5476 5477 if (!priv->stats_nch) 5478 return; 5479 5480 /* no special case needed for ptp htb etc since txq2sq_stats is kept up 5481 * to date for active sq_stats, otherwise get_base_stats takes care of 5482 * inactive sqs. 5483 */ 5484 sq_stats = priv->txq2sq_stats[i]; 5485 stats->packets = sq_stats->packets; 5486 stats->bytes = sq_stats->bytes; 5487 } 5488 5489 static void mlx5e_get_base_stats(struct net_device *dev, 5490 struct netdev_queue_stats_rx *rx, 5491 struct netdev_queue_stats_tx *tx) 5492 { 5493 struct mlx5e_priv *priv = netdev_priv(dev); 5494 struct mlx5e_ptp *ptp_channel; 5495 int i, tc; 5496 5497 if (!mlx5e_is_uplink_rep(priv)) { 5498 rx->packets = 0; 5499 rx->bytes = 0; 5500 rx->alloc_fail = 0; 5501 5502 for (i = priv->channels.params.num_channels; i < priv->stats_nch; i++) { 5503 struct netdev_queue_stats_rx rx_i = {0}; 5504 5505 mlx5e_get_queue_stats_rx(dev, i, &rx_i); 5506 5507 rx->packets += rx_i.packets; 5508 rx->bytes += rx_i.bytes; 5509 rx->alloc_fail += rx_i.alloc_fail; 5510 } 5511 5512 /* always report PTP RX stats from base as there is no 5513 * corresponding channel to report them under in 5514 * mlx5e_get_queue_stats_rx. 5515 */ 5516 if (priv->rx_ptp_opened) { 5517 struct mlx5e_rq_stats *rq_stats = &priv->ptp_stats.rq; 5518 5519 rx->packets += rq_stats->packets; 5520 rx->bytes += rq_stats->bytes; 5521 } 5522 } 5523 5524 tx->packets = 0; 5525 tx->bytes = 0; 5526 5527 for (i = 0; i < priv->stats_nch; i++) { 5528 struct mlx5e_channel_stats *channel_stats = priv->channel_stats[i]; 5529 5530 /* handle two cases: 5531 * 5532 * 1. channels which are active. In this case, 5533 * report only deactivated TCs on these channels. 5534 * 5535 * 2. channels which were deactivated 5536 * (i > priv->channels.params.num_channels) 5537 * must have all of their TCs [0 .. priv->max_opened_tc) 5538 * examined because deactivated channels will not be in the 5539 * range of [0..real_num_tx_queues) and will not have their 5540 * stats reported by mlx5e_get_queue_stats_tx. 5541 */ 5542 if (i < priv->channels.params.num_channels) 5543 tc = mlx5e_get_dcb_num_tc(&priv->channels.params); 5544 else 5545 tc = 0; 5546 5547 for (; tc < priv->max_opened_tc; tc++) { 5548 struct mlx5e_sq_stats *sq_stats = &channel_stats->sq[tc]; 5549 5550 tx->packets += sq_stats->packets; 5551 tx->bytes += sq_stats->bytes; 5552 } 5553 } 5554 5555 /* if PTP TX was opened at some point and has since either: 5556 * - been shutdown and set to NULL, or 5557 * - simply disabled (bit unset) 5558 * 5559 * report stats directly from the ptp_stats structures as these queues 5560 * are now unavailable and there is no txq index to retrieve these 5561 * stats via calls to mlx5e_get_queue_stats_tx. 5562 */ 5563 ptp_channel = priv->channels.ptp; 5564 if (priv->tx_ptp_opened && (!ptp_channel || !test_bit(MLX5E_PTP_STATE_TX, ptp_channel->state))) { 5565 for (tc = 0; tc < priv->max_opened_tc; tc++) { 5566 struct mlx5e_sq_stats *sq_stats = &priv->ptp_stats.sq[tc]; 5567 5568 tx->packets += sq_stats->packets; 5569 tx->bytes += sq_stats->bytes; 5570 } 5571 } 5572 } 5573 5574 static const struct netdev_stat_ops mlx5e_stat_ops = { 5575 .get_queue_stats_rx = mlx5e_get_queue_stats_rx, 5576 .get_queue_stats_tx = mlx5e_get_queue_stats_tx, 5577 .get_base_stats = mlx5e_get_base_stats, 5578 }; 5579 5580 struct mlx5_qmgmt_data { 5581 struct mlx5e_channel *c; 5582 struct mlx5e_channel_param cparam; 5583 }; 5584 5585 static int mlx5e_queue_mem_alloc(struct net_device *dev, 5586 struct netdev_queue_config *qcfg, 5587 void *newq, int queue_index) 5588 { 5589 struct mlx5_qmgmt_data *new = (struct mlx5_qmgmt_data *)newq; 5590 struct mlx5e_priv *priv = netdev_priv(dev); 5591 struct mlx5e_channels *chs = &priv->channels; 5592 struct mlx5e_params params = chs->params; 5593 struct mlx5_core_dev *mdev; 5594 int err; 5595 5596 mutex_lock(&priv->state_lock); 5597 if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { 5598 err = -ENODEV; 5599 goto unlock; 5600 } 5601 5602 if (queue_index >= chs->num) { 5603 err = -ERANGE; 5604 goto unlock; 5605 } 5606 5607 if (MLX5E_GET_PFLAG(&chs->params, MLX5E_PFLAG_TX_PORT_TS) || 5608 chs->params.ptp_rx || 5609 chs->params.xdp_prog || 5610 priv->htb) { 5611 netdev_err(priv->netdev, 5612 "Cloning channels with Port/rx PTP, XDP or HTB is not supported\n"); 5613 err = -EOPNOTSUPP; 5614 goto unlock; 5615 } 5616 5617 mdev = mlx5_sd_ch_ix_get_dev(priv->mdev, queue_index); 5618 err = mlx5e_build_channel_param(mdev, ¶ms, &new->cparam); 5619 if (err) 5620 goto unlock; 5621 5622 err = mlx5e_open_channel(priv, queue_index, ¶ms, NULL, &new->c); 5623 unlock: 5624 mutex_unlock(&priv->state_lock); 5625 return err; 5626 } 5627 5628 static void mlx5e_queue_mem_free(struct net_device *dev, void *mem) 5629 { 5630 struct mlx5_qmgmt_data *data = (struct mlx5_qmgmt_data *)mem; 5631 5632 /* not supposed to happen since mlx5e_queue_start never fails 5633 * but this is how this should be implemented just in case 5634 */ 5635 if (data->c) 5636 mlx5e_close_channel(data->c); 5637 } 5638 5639 static int mlx5e_queue_stop(struct net_device *dev, void *oldq, int queue_index) 5640 { 5641 /* In mlx5 a txq cannot be simply stopped in isolation, only restarted. 5642 * mlx5e_queue_start does not fail, we stop the old queue there. 5643 * TODO: Improve this. 5644 */ 5645 return 0; 5646 } 5647 5648 static int mlx5e_queue_start(struct net_device *dev, 5649 struct netdev_queue_config *qcfg, 5650 void *newq, int queue_index) 5651 { 5652 struct mlx5_qmgmt_data *new = (struct mlx5_qmgmt_data *)newq; 5653 struct mlx5e_priv *priv = netdev_priv(dev); 5654 struct mlx5e_channel *old; 5655 5656 mutex_lock(&priv->state_lock); 5657 5658 /* stop and close the old */ 5659 old = priv->channels.c[queue_index]; 5660 mlx5e_deactivate_priv_channels(priv); 5661 /* close old before activating new, to avoid napi conflict */ 5662 mlx5e_close_channel(old); 5663 5664 /* start the new */ 5665 priv->channels.c[queue_index] = new->c; 5666 mlx5e_activate_priv_channels(priv); 5667 mutex_unlock(&priv->state_lock); 5668 return 0; 5669 } 5670 5671 static struct device *mlx5e_queue_get_dma_dev(struct net_device *dev, 5672 int queue_index) 5673 { 5674 struct mlx5e_priv *priv = netdev_priv(dev); 5675 struct mlx5e_channels *channels; 5676 struct device *pdev = NULL; 5677 struct mlx5e_channel *ch; 5678 5679 channels = &priv->channels; 5680 5681 mutex_lock(&priv->state_lock); 5682 5683 if (queue_index >= channels->num) 5684 goto out; 5685 5686 ch = channels->c[queue_index]; 5687 pdev = ch->pdev; 5688 out: 5689 mutex_unlock(&priv->state_lock); 5690 5691 return pdev; 5692 } 5693 5694 static const struct netdev_queue_mgmt_ops mlx5e_queue_mgmt_ops = { 5695 .ndo_queue_mem_size = sizeof(struct mlx5_qmgmt_data), 5696 .ndo_queue_mem_alloc = mlx5e_queue_mem_alloc, 5697 .ndo_queue_mem_free = mlx5e_queue_mem_free, 5698 .ndo_queue_start = mlx5e_queue_start, 5699 .ndo_queue_stop = mlx5e_queue_stop, 5700 .ndo_queue_get_dma_dev = mlx5e_queue_get_dma_dev, 5701 }; 5702 5703 static void mlx5e_build_nic_netdev(struct net_device *netdev) 5704 { 5705 struct mlx5e_priv *priv = netdev_priv(netdev); 5706 struct mlx5_core_dev *mdev = priv->mdev; 5707 bool fcs_supported; 5708 bool fcs_enabled; 5709 5710 SET_NETDEV_DEV(netdev, mdev->device); 5711 5712 netdev->netdev_ops = &mlx5e_netdev_ops; 5713 netdev->queue_mgmt_ops = &mlx5e_queue_mgmt_ops; 5714 netdev->xdp_metadata_ops = &mlx5e_xdp_metadata_ops; 5715 netdev->xsk_tx_metadata_ops = &mlx5e_xsk_tx_metadata_ops; 5716 netdev->request_ops_lock = true; 5717 netdev_lockdep_set_classes(netdev); 5718 5719 mlx5e_dcbnl_build_netdev(netdev); 5720 5721 netdev->watchdog_timeo = 15 * HZ; 5722 5723 netdev->stat_ops = &mlx5e_stat_ops; 5724 netdev->ethtool_ops = &mlx5e_ethtool_ops; 5725 5726 netdev->vlan_features |= NETIF_F_SG; 5727 netdev->vlan_features |= NETIF_F_HW_CSUM; 5728 netdev->vlan_features |= NETIF_F_HW_MACSEC; 5729 netdev->vlan_features |= NETIF_F_GRO; 5730 netdev->vlan_features |= NETIF_F_TSO; 5731 netdev->vlan_features |= NETIF_F_TSO6; 5732 netdev->vlan_features |= NETIF_F_RXCSUM; 5733 netdev->vlan_features |= NETIF_F_RXHASH; 5734 netdev->vlan_features |= NETIF_F_GSO_PARTIAL; 5735 5736 netdev->mpls_features |= NETIF_F_SG; 5737 netdev->mpls_features |= NETIF_F_HW_CSUM; 5738 netdev->mpls_features |= NETIF_F_TSO; 5739 netdev->mpls_features |= NETIF_F_TSO6; 5740 5741 netdev->hw_enc_features |= NETIF_F_HW_VLAN_CTAG_TX; 5742 netdev->hw_enc_features |= NETIF_F_HW_VLAN_CTAG_RX; 5743 5744 /* Tunneled LRO is not supported in the driver, and the same RQs are 5745 * shared between inner and outer TIRs, so the driver can't disable LRO 5746 * for inner TIRs while having it enabled for outer TIRs. Due to this, 5747 * block LRO altogether if the firmware declares tunneled LRO support. 5748 */ 5749 if (!!MLX5_CAP_ETH(mdev, lro_cap) && 5750 !MLX5_CAP_ETH(mdev, tunnel_lro_vxlan) && 5751 !MLX5_CAP_ETH(mdev, tunnel_lro_gre) && 5752 mlx5e_check_fragmented_striding_rq_cap(mdev, PAGE_SHIFT, 5753 MLX5E_MPWRQ_UMR_MODE_ALIGNED)) 5754 netdev->vlan_features |= NETIF_F_LRO; 5755 5756 if (mlx5e_hw_gro_supported(mdev) && 5757 mlx5e_check_fragmented_striding_rq_cap(mdev, PAGE_SHIFT, 5758 MLX5E_MPWRQ_UMR_MODE_ALIGNED)) 5759 netdev->vlan_features |= NETIF_F_GRO_HW; 5760 5761 netdev->hw_features = netdev->vlan_features; 5762 netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX; 5763 netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_RX; 5764 netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER; 5765 netdev->hw_features |= NETIF_F_HW_VLAN_STAG_TX; 5766 5767 if (mlx5e_tunnel_any_tx_proto_supported(mdev)) { 5768 netdev->hw_enc_features |= NETIF_F_HW_CSUM; 5769 netdev->hw_enc_features |= NETIF_F_TSO; 5770 netdev->hw_enc_features |= NETIF_F_TSO6; 5771 netdev->hw_enc_features |= NETIF_F_GSO_PARTIAL; 5772 } 5773 5774 if (mlx5_vxlan_allowed(mdev->vxlan) || mlx5_geneve_tx_allowed(mdev)) { 5775 netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL | 5776 NETIF_F_GSO_UDP_TUNNEL_CSUM; 5777 netdev->hw_enc_features |= NETIF_F_GSO_UDP_TUNNEL | 5778 NETIF_F_GSO_UDP_TUNNEL_CSUM; 5779 netdev->gso_partial_features = NETIF_F_GSO_UDP_TUNNEL_CSUM; 5780 netdev->vlan_features |= NETIF_F_GSO_UDP_TUNNEL | 5781 NETIF_F_GSO_UDP_TUNNEL_CSUM; 5782 } 5783 5784 if (mlx5e_tunnel_proto_supported_tx(mdev, IPPROTO_GRE)) { 5785 netdev->hw_features |= NETIF_F_GSO_GRE | 5786 NETIF_F_GSO_GRE_CSUM; 5787 netdev->hw_enc_features |= NETIF_F_GSO_GRE | 5788 NETIF_F_GSO_GRE_CSUM; 5789 netdev->gso_partial_features |= NETIF_F_GSO_GRE_CSUM; 5790 netdev->vlan_features |= NETIF_F_GSO_GRE | NETIF_F_GSO_GRE_CSUM; 5791 } 5792 5793 if (mlx5e_tunnel_proto_supported_tx(mdev, IPPROTO_IPIP)) { 5794 netdev->hw_features |= NETIF_F_GSO_IPXIP4 | 5795 NETIF_F_GSO_IPXIP6; 5796 netdev->hw_enc_features |= NETIF_F_GSO_IPXIP4 | 5797 NETIF_F_GSO_IPXIP6; 5798 netdev->gso_partial_features |= NETIF_F_GSO_IPXIP4 | 5799 NETIF_F_GSO_IPXIP6; 5800 } 5801 5802 netdev->gso_partial_features |= NETIF_F_GSO_UDP_L4; 5803 netdev->hw_features |= NETIF_F_GSO_UDP_L4; 5804 netdev->hw_enc_features |= NETIF_F_GSO_UDP_L4; 5805 5806 mlx5_query_port_fcs(mdev, &fcs_supported, &fcs_enabled); 5807 5808 if (fcs_supported) 5809 netdev->hw_features |= NETIF_F_RXALL; 5810 5811 if (MLX5_CAP_ETH(mdev, scatter_fcs)) 5812 netdev->hw_features |= NETIF_F_RXFCS; 5813 5814 if (mlx5_qos_is_supported(mdev)) 5815 netdev->hw_features |= NETIF_F_HW_TC; 5816 5817 netdev->features = netdev->hw_features; 5818 5819 /* Defaults */ 5820 if (fcs_enabled) 5821 netdev->features &= ~NETIF_F_RXALL; 5822 netdev->features &= ~NETIF_F_LRO; 5823 netdev->features &= ~NETIF_F_GRO_HW; 5824 netdev->features &= ~NETIF_F_RXFCS; 5825 5826 #define FT_CAP(f) MLX5_CAP_FLOWTABLE(mdev, flow_table_properties_nic_receive.f) 5827 if (FT_CAP(flow_modify_en) && 5828 FT_CAP(modify_root) && 5829 FT_CAP(identified_miss_table_mode) && 5830 FT_CAP(flow_table_modify)) { 5831 #if IS_ENABLED(CONFIG_MLX5_CLS_ACT) 5832 netdev->hw_features |= NETIF_F_HW_TC; 5833 #endif 5834 #if IS_ENABLED(CONFIG_MLX5_EN_ARFS) 5835 netdev->hw_features |= NETIF_F_NTUPLE; 5836 #elif IS_ENABLED(CONFIG_MLX5_EN_RXNFC) 5837 netdev->features |= NETIF_F_NTUPLE; 5838 #endif 5839 } 5840 5841 netdev->features |= NETIF_F_HIGHDMA; 5842 netdev->features |= NETIF_F_HW_VLAN_STAG_FILTER; 5843 5844 netdev->priv_flags |= IFF_UNICAST_FLT; 5845 5846 netdev->netmem_tx = true; 5847 5848 netif_set_tso_max_size(netdev, GSO_MAX_SIZE); 5849 mlx5e_set_xdp_feature(priv); 5850 mlx5e_set_netdev_dev_addr(netdev); 5851 mlx5e_macsec_build_netdev(priv); 5852 mlx5e_ipsec_build_netdev(priv); 5853 mlx5e_ktls_build_netdev(priv); 5854 } 5855 5856 void mlx5e_create_q_counters(struct mlx5e_priv *priv) 5857 { 5858 u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)] = {}; 5859 u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)] = {}; 5860 struct mlx5_core_dev *mdev = priv->mdev; 5861 struct mlx5_core_dev *pos; 5862 int err, i; 5863 5864 MLX5_SET(alloc_q_counter_in, in, opcode, MLX5_CMD_OP_ALLOC_Q_COUNTER); 5865 5866 mlx5_sd_for_each_dev(i, mdev, pos) { 5867 err = mlx5_cmd_exec_inout(pos, alloc_q_counter, in, out); 5868 if (!err) 5869 priv->q_counter[i] = 5870 MLX5_GET(alloc_q_counter_out, out, counter_set_id); 5871 } 5872 5873 err = mlx5_cmd_exec_inout(mdev, alloc_q_counter, in, out); 5874 if (!err) 5875 priv->drop_rq_q_counter = 5876 MLX5_GET(alloc_q_counter_out, out, counter_set_id); 5877 } 5878 5879 void mlx5e_destroy_q_counters(struct mlx5e_priv *priv) 5880 { 5881 u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {}; 5882 struct mlx5_core_dev *pos; 5883 int i; 5884 5885 MLX5_SET(dealloc_q_counter_in, in, opcode, 5886 MLX5_CMD_OP_DEALLOC_Q_COUNTER); 5887 mlx5_sd_for_each_dev(i, priv->mdev, pos) { 5888 if (priv->q_counter[i]) { 5889 MLX5_SET(dealloc_q_counter_in, in, counter_set_id, 5890 priv->q_counter[i]); 5891 mlx5_cmd_exec_in(pos, dealloc_q_counter, in); 5892 } 5893 } 5894 5895 if (priv->drop_rq_q_counter) { 5896 MLX5_SET(dealloc_q_counter_in, in, counter_set_id, 5897 priv->drop_rq_q_counter); 5898 mlx5_cmd_exec_in(priv->mdev, dealloc_q_counter, in); 5899 } 5900 } 5901 5902 static int mlx5e_nic_init(struct mlx5_core_dev *mdev, 5903 struct net_device *netdev) 5904 { 5905 const bool take_rtnl = netdev->reg_state == NETREG_REGISTERED; 5906 struct mlx5e_priv *priv = netdev_priv(netdev); 5907 struct mlx5e_flow_steering *fs; 5908 int err; 5909 5910 mlx5e_build_nic_params(priv, &priv->xsk, netdev->mtu); 5911 mlx5e_vxlan_set_netdev_info(priv); 5912 5913 mlx5e_timestamp_init(priv); 5914 5915 priv->dfs_root = debugfs_create_dir("nic", 5916 mlx5_debugfs_get_dev_root(mdev)); 5917 5918 fs = mlx5e_fs_init(priv->profile, mdev, 5919 !test_bit(MLX5E_STATE_DESTROYING, &priv->state), 5920 priv->dfs_root); 5921 if (!fs) { 5922 err = -ENOMEM; 5923 mlx5_core_err(mdev, "FS initialization failed, %d\n", err); 5924 debugfs_remove_recursive(priv->dfs_root); 5925 return err; 5926 } 5927 priv->fs = fs; 5928 5929 err = mlx5e_psp_init(priv); 5930 if (err) 5931 mlx5_core_err(mdev, "PSP initialization failed, %d\n", err); 5932 5933 err = mlx5e_ktls_init(priv); 5934 if (err) 5935 mlx5_core_err(mdev, "TLS initialization failed, %d\n", err); 5936 5937 mlx5e_health_create_reporters(priv); 5938 5939 /* If netdev is already registered (e.g. move from uplink to nic profile), 5940 * RTNL lock must be held before triggering netdev notifiers. 5941 */ 5942 if (take_rtnl) 5943 rtnl_lock(); 5944 5945 mlx5e_psp_register(priv); 5946 /* update XDP supported features */ 5947 mlx5e_set_xdp_feature(priv); 5948 5949 if (take_rtnl) 5950 rtnl_unlock(); 5951 5952 return 0; 5953 } 5954 5955 static void mlx5e_nic_cleanup(struct mlx5e_priv *priv) 5956 { 5957 mlx5e_health_destroy_reporters(priv); 5958 mlx5e_psp_unregister(priv); 5959 mlx5e_ktls_cleanup(priv); 5960 mlx5e_psp_cleanup(priv); 5961 mlx5e_fs_cleanup(priv->fs); 5962 debugfs_remove_recursive(priv->dfs_root); 5963 priv->fs = NULL; 5964 } 5965 5966 static int mlx5e_init_nic_rx(struct mlx5e_priv *priv) 5967 { 5968 struct mlx5_core_dev *mdev = priv->mdev; 5969 enum mlx5e_rx_res_features features; 5970 int err; 5971 5972 mlx5e_create_q_counters(priv); 5973 5974 err = mlx5e_open_drop_rq(priv, &priv->drop_rq); 5975 if (err) { 5976 mlx5_core_err(mdev, "open drop rq failed, %d\n", err); 5977 goto err_destroy_q_counters; 5978 } 5979 5980 features = MLX5E_RX_RES_FEATURE_PTP; 5981 if (mlx5_tunnel_inner_ft_supported(mdev)) 5982 features |= MLX5E_RX_RES_FEATURE_INNER_FT; 5983 if (mlx5_get_sd(priv->mdev)) 5984 features |= MLX5E_RX_RES_FEATURE_MULTI_VHCA; 5985 5986 priv->rx_res = mlx5e_rx_res_create(priv->mdev, features, priv->max_nch, priv->drop_rq.rqn, 5987 &priv->channels.params.packet_merge, 5988 priv->channels.params.num_channels); 5989 if (IS_ERR(priv->rx_res)) { 5990 err = PTR_ERR(priv->rx_res); 5991 priv->rx_res = NULL; 5992 mlx5_core_err(mdev, "create rx resources failed, %d\n", err); 5993 goto err_close_drop_rq; 5994 } 5995 5996 err = mlx5e_create_flow_steering(priv->fs, priv->rx_res, priv->profile, 5997 priv->netdev); 5998 if (err) { 5999 mlx5_core_warn(mdev, "create flow steering failed, %d\n", err); 6000 goto err_destroy_rx_res; 6001 } 6002 6003 err = mlx5e_tc_nic_init(priv); 6004 if (err) 6005 goto err_destroy_flow_steering; 6006 6007 err = mlx5e_accel_init_rx(priv); 6008 if (err) 6009 goto err_tc_nic_cleanup; 6010 6011 #ifdef CONFIG_MLX5_EN_ARFS 6012 priv->netdev->rx_cpu_rmap = mlx5_eq_table_get_rmap(priv->mdev); 6013 #endif 6014 6015 return 0; 6016 6017 err_tc_nic_cleanup: 6018 mlx5e_tc_nic_cleanup(priv); 6019 err_destroy_flow_steering: 6020 mlx5e_destroy_flow_steering(priv->fs, mlx5e_fs_has_arfs(priv->netdev), 6021 priv->profile); 6022 err_destroy_rx_res: 6023 mlx5e_rx_res_destroy(priv->rx_res); 6024 priv->rx_res = NULL; 6025 err_close_drop_rq: 6026 mlx5e_close_drop_rq(&priv->drop_rq); 6027 err_destroy_q_counters: 6028 mlx5e_destroy_q_counters(priv); 6029 return err; 6030 } 6031 6032 static void mlx5e_cleanup_nic_rx(struct mlx5e_priv *priv) 6033 { 6034 mlx5e_accel_cleanup_rx(priv); 6035 mlx5e_tc_nic_cleanup(priv); 6036 mlx5e_destroy_flow_steering(priv->fs, mlx5e_fs_has_arfs(priv->netdev), 6037 priv->profile); 6038 mlx5e_rx_res_destroy(priv->rx_res); 6039 priv->rx_res = NULL; 6040 mlx5e_close_drop_rq(&priv->drop_rq); 6041 mlx5e_destroy_q_counters(priv); 6042 } 6043 6044 static void mlx5e_set_mqprio_rl(struct mlx5e_priv *priv) 6045 { 6046 struct mlx5e_params *params; 6047 struct mlx5e_mqprio_rl *rl; 6048 6049 params = &priv->channels.params; 6050 if (params->mqprio.mode != TC_MQPRIO_MODE_CHANNEL) 6051 return; 6052 6053 rl = mlx5e_mqprio_rl_create(priv->mdev, params->mqprio.num_tc, 6054 params->mqprio.channel.max_rate); 6055 if (IS_ERR(rl)) 6056 rl = NULL; 6057 priv->mqprio_rl = rl; 6058 mlx5e_mqprio_rl_update_params(params, rl); 6059 } 6060 6061 static int mlx5e_init_nic_tx(struct mlx5e_priv *priv) 6062 { 6063 int err; 6064 6065 err = mlx5e_accel_init_tx(priv); 6066 if (err) 6067 return err; 6068 6069 mlx5e_set_mqprio_rl(priv); 6070 mlx5e_dcbnl_initialize(priv); 6071 return 0; 6072 } 6073 6074 static void mlx5e_nic_enable(struct mlx5e_priv *priv) 6075 { 6076 struct net_device *netdev = priv->netdev; 6077 struct mlx5_core_dev *mdev = priv->mdev; 6078 int err; 6079 6080 mlx5e_fs_init_l2_addr(priv->fs, netdev); 6081 mlx5e_ipsec_init(priv); 6082 6083 err = mlx5e_macsec_init(priv); 6084 if (err) 6085 mlx5_core_err(mdev, "MACsec initialization failed, %d\n", err); 6086 6087 /* Marking the link as currently not needed by the Driver */ 6088 if (!netif_running(netdev)) 6089 mlx5e_modify_admin_state(mdev, MLX5_PORT_DOWN); 6090 6091 mlx5e_set_netdev_mtu_boundaries(priv); 6092 mlx5e_set_dev_port_mtu(priv); 6093 6094 mlx5_lag_add_netdev(mdev, netdev); 6095 6096 mlx5e_enable_async_events(priv); 6097 mlx5e_enable_blocking_events(priv); 6098 if (mlx5e_monitor_counter_supported(priv)) 6099 mlx5e_monitor_counter_init(priv); 6100 6101 mlx5e_pcie_cong_event_init(priv); 6102 mlx5e_hv_vhca_stats_create(priv); 6103 if (netdev->reg_state != NETREG_REGISTERED) 6104 return; 6105 mlx5e_dcbnl_init_app(priv); 6106 6107 mlx5e_nic_set_rx_mode(priv); 6108 6109 rtnl_lock(); 6110 netdev_lock(netdev); 6111 if (netif_running(netdev)) 6112 mlx5e_open(netdev); 6113 udp_tunnel_nic_reset_ntf(priv->netdev); 6114 netdev_unlock(netdev); 6115 netif_device_attach(netdev); 6116 rtnl_unlock(); 6117 } 6118 6119 static void mlx5e_nic_disable(struct mlx5e_priv *priv) 6120 { 6121 struct mlx5_core_dev *mdev = priv->mdev; 6122 6123 if (priv->netdev->reg_state == NETREG_REGISTERED) 6124 mlx5e_dcbnl_delete_app(priv); 6125 6126 rtnl_lock(); 6127 netdev_lock(priv->netdev); 6128 if (netif_running(priv->netdev)) 6129 mlx5e_close(priv->netdev); 6130 netif_device_detach(priv->netdev); 6131 if (priv->en_trap) { 6132 mlx5e_deactivate_trap(priv); 6133 mlx5e_close_trap(priv->en_trap); 6134 priv->en_trap = NULL; 6135 } 6136 netdev_unlock(priv->netdev); 6137 rtnl_unlock(); 6138 6139 mlx5e_nic_set_rx_mode(priv); 6140 6141 mlx5e_pcie_cong_event_cleanup(priv); 6142 mlx5e_hv_vhca_stats_destroy(priv); 6143 if (mlx5e_monitor_counter_supported(priv)) 6144 mlx5e_monitor_counter_cleanup(priv); 6145 6146 mlx5e_ipsec_disable_events(priv); 6147 mlx5e_disable_blocking_events(priv); 6148 mlx5e_disable_async_events(priv); 6149 mlx5_lag_remove_netdev(mdev, priv->netdev); 6150 mlx5_vxlan_reset_to_default(mdev->vxlan); 6151 mlx5e_macsec_cleanup(priv); 6152 mlx5e_ipsec_cleanup(priv); 6153 } 6154 6155 static int mlx5e_update_nic_rx(struct mlx5e_priv *priv) 6156 { 6157 return mlx5e_refresh_tirs(priv->mdev, false, false); 6158 } 6159 6160 static const struct mlx5e_profile mlx5e_nic_profile = { 6161 .init = mlx5e_nic_init, 6162 .cleanup = mlx5e_nic_cleanup, 6163 .init_rx = mlx5e_init_nic_rx, 6164 .cleanup_rx = mlx5e_cleanup_nic_rx, 6165 .init_tx = mlx5e_init_nic_tx, 6166 .cleanup_tx = mlx5e_cleanup_nic_tx, 6167 .enable = mlx5e_nic_enable, 6168 .disable = mlx5e_nic_disable, 6169 .update_rx = mlx5e_update_nic_rx, 6170 .update_stats = mlx5e_stats_update_ndo_stats, 6171 .update_carrier = mlx5e_update_carrier, 6172 .rx_handlers = &mlx5e_rx_handlers_nic, 6173 .max_tc = MLX5_MAX_NUM_TC, 6174 .stats_grps = mlx5e_nic_stats_grps, 6175 .stats_grps_num = mlx5e_nic_stats_grps_num, 6176 .features = BIT(MLX5E_PROFILE_FEATURE_PTP_RX) | 6177 BIT(MLX5E_PROFILE_FEATURE_PTP_TX) | 6178 BIT(MLX5E_PROFILE_FEATURE_QOS_HTB) | 6179 BIT(MLX5E_PROFILE_FEATURE_FS_VLAN) | 6180 BIT(MLX5E_PROFILE_FEATURE_FS_TC), 6181 }; 6182 6183 static int mlx5e_profile_max_num_channels(struct mlx5_core_dev *mdev, 6184 const struct mlx5e_profile *profile) 6185 { 6186 int nch; 6187 6188 nch = mlx5e_get_max_num_channels(mdev); 6189 6190 if (profile->max_nch_limit) 6191 nch = min_t(int, nch, profile->max_nch_limit(mdev)); 6192 return nch; 6193 } 6194 6195 static unsigned int 6196 mlx5e_calc_max_nch(struct mlx5_core_dev *mdev, struct net_device *netdev, 6197 const struct mlx5e_profile *profile) 6198 6199 { 6200 unsigned int max_nch, tmp; 6201 6202 /* core resources */ 6203 max_nch = mlx5e_profile_max_num_channels(mdev, profile); 6204 6205 /* netdev rx queues */ 6206 max_nch = min_t(unsigned int, max_nch, netdev->num_rx_queues); 6207 6208 /* netdev tx queues */ 6209 tmp = netdev->num_tx_queues; 6210 if (mlx5_qos_is_supported(mdev)) 6211 tmp -= mlx5e_qos_max_leaf_nodes(mdev); 6212 if (MLX5_CAP_GEN(mdev, ts_cqe_to_dest_cqn)) 6213 tmp -= profile->max_tc; 6214 tmp = tmp / profile->max_tc; 6215 max_nch = min_t(unsigned int, max_nch, tmp); 6216 6217 return max_nch; 6218 } 6219 6220 int mlx5e_get_pf_num_tirs(struct mlx5_core_dev *mdev) 6221 { 6222 /* Indirect TIRS: 2 sets of TTCs (inner + outer steering) 6223 * and 1 set of direct TIRS 6224 */ 6225 return 2 * MLX5E_NUM_INDIR_TIRS 6226 + mlx5e_profile_max_num_channels(mdev, &mlx5e_nic_profile); 6227 } 6228 6229 void mlx5e_set_rx_mode_work(struct work_struct *work) 6230 { 6231 struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv, 6232 set_rx_mode_work); 6233 6234 return mlx5e_fs_set_rx_mode_work(priv->fs, priv->netdev); 6235 } 6236 6237 /* mlx5e generic netdev management API (move to en_common.c) */ 6238 int mlx5e_priv_init(struct mlx5e_priv *priv, 6239 const struct mlx5e_profile *profile, 6240 struct net_device *netdev, 6241 struct mlx5_core_dev *mdev) 6242 { 6243 int nch, num_txqs, node; 6244 int err; 6245 6246 num_txqs = netdev->num_tx_queues; 6247 nch = mlx5e_calc_max_nch(mdev, netdev, profile); 6248 node = dev_to_node(mlx5_core_dma_dev(mdev)); 6249 6250 /* priv init */ 6251 priv->mdev = mdev; 6252 priv->netdev = netdev; 6253 priv->max_nch = nch; 6254 priv->max_opened_tc = 1; 6255 6256 if (!alloc_cpumask_var(&priv->scratchpad.cpumask, GFP_KERNEL)) 6257 return -ENOMEM; 6258 6259 mutex_init(&priv->state_lock); 6260 6261 err = mlx5e_selq_init(&priv->selq, &priv->state_lock); 6262 if (err) 6263 goto err_free_cpumask; 6264 6265 INIT_WORK(&priv->update_carrier_work, mlx5e_update_carrier_work); 6266 INIT_WORK(&priv->set_rx_mode_work, mlx5e_set_rx_mode_work); 6267 INIT_WORK(&priv->tx_timeout_work, mlx5e_tx_timeout_work); 6268 INIT_WORK(&priv->update_stats_work, mlx5e_update_stats_work); 6269 6270 priv->wq = create_singlethread_workqueue("mlx5e"); 6271 if (!priv->wq) 6272 goto err_free_selq; 6273 6274 priv->txq2sq = kcalloc_node(num_txqs, sizeof(*priv->txq2sq), GFP_KERNEL, node); 6275 if (!priv->txq2sq) 6276 goto err_destroy_workqueue; 6277 6278 priv->txq2sq_stats = kcalloc_node(num_txqs, sizeof(*priv->txq2sq_stats), GFP_KERNEL, node); 6279 if (!priv->txq2sq_stats) 6280 goto err_free_txq2sq; 6281 6282 priv->tx_rates = kcalloc_node(num_txqs, sizeof(*priv->tx_rates), GFP_KERNEL, node); 6283 if (!priv->tx_rates) 6284 goto err_free_txq2sq_stats; 6285 6286 priv->channel_stats = 6287 kcalloc_node(nch, sizeof(*priv->channel_stats), GFP_KERNEL, node); 6288 if (!priv->channel_stats) 6289 goto err_free_tx_rates; 6290 6291 priv->fec_ranges = kcalloc(ETHTOOL_FEC_HIST_MAX, 6292 sizeof(*priv->fec_ranges), GFP_KERNEL); 6293 if (!priv->fec_ranges) 6294 goto err_free_channel_stats; 6295 6296 return 0; 6297 6298 err_free_channel_stats: 6299 kfree(priv->channel_stats); 6300 err_free_tx_rates: 6301 kfree(priv->tx_rates); 6302 err_free_txq2sq_stats: 6303 kfree(priv->txq2sq_stats); 6304 err_free_txq2sq: 6305 kfree(priv->txq2sq); 6306 err_destroy_workqueue: 6307 destroy_workqueue(priv->wq); 6308 err_free_selq: 6309 mlx5e_selq_cleanup(&priv->selq); 6310 err_free_cpumask: 6311 free_cpumask_var(priv->scratchpad.cpumask); 6312 return -ENOMEM; 6313 } 6314 6315 void mlx5e_priv_cleanup(struct mlx5e_priv *priv) 6316 { 6317 bool destroying = test_bit(MLX5E_STATE_DESTROYING, &priv->state); 6318 int i; 6319 6320 /* bail if change profile failed and also rollback failed */ 6321 if (!priv->mdev) 6322 return; 6323 6324 kfree(priv->fec_ranges); 6325 for (i = 0; i < priv->stats_nch; i++) 6326 kvfree(priv->channel_stats[i]); 6327 kfree(priv->channel_stats); 6328 kfree(priv->tx_rates); 6329 kfree(priv->txq2sq_stats); 6330 kfree(priv->txq2sq); 6331 destroy_workqueue(priv->wq); 6332 mlx5e_selq_cleanup(&priv->selq); 6333 free_cpumask_var(priv->scratchpad.cpumask); 6334 6335 for (i = 0; i < priv->htb_max_qos_sqs; i++) 6336 kfree(priv->htb_qos_sq_stats[i]); 6337 kvfree(priv->htb_qos_sq_stats); 6338 6339 if (priv->mqprio_rl) { 6340 mlx5e_mqprio_rl_cleanup(priv->mqprio_rl); 6341 mlx5e_mqprio_rl_free(priv->mqprio_rl); 6342 } 6343 6344 memset(priv, 0, sizeof(*priv)); 6345 if (destroying) /* restore destroying bit, to allow unload */ 6346 set_bit(MLX5E_STATE_DESTROYING, &priv->state); 6347 } 6348 6349 static unsigned int mlx5e_get_max_num_txqs(struct mlx5_core_dev *mdev, 6350 const struct mlx5e_profile *profile) 6351 { 6352 unsigned int nch, ptp_txqs, qos_txqs; 6353 6354 nch = mlx5e_profile_max_num_channels(mdev, profile); 6355 6356 ptp_txqs = MLX5_CAP_GEN(mdev, ts_cqe_to_dest_cqn) && 6357 mlx5e_profile_feature_cap(profile, PTP_TX) ? 6358 profile->max_tc : 0; 6359 6360 qos_txqs = mlx5_qos_is_supported(mdev) && 6361 mlx5e_profile_feature_cap(profile, QOS_HTB) ? 6362 mlx5e_qos_max_leaf_nodes(mdev) : 0; 6363 6364 return nch * profile->max_tc + ptp_txqs + qos_txqs; 6365 } 6366 6367 static unsigned int mlx5e_get_max_num_rxqs(struct mlx5_core_dev *mdev, 6368 const struct mlx5e_profile *profile) 6369 { 6370 return mlx5e_profile_max_num_channels(mdev, profile); 6371 } 6372 6373 struct net_device * 6374 mlx5e_create_netdev(struct mlx5_core_dev *mdev, const struct mlx5e_profile *profile) 6375 { 6376 struct net_device *netdev; 6377 unsigned int txqs, rxqs; 6378 int err; 6379 6380 txqs = mlx5e_get_max_num_txqs(mdev, profile); 6381 rxqs = mlx5e_get_max_num_rxqs(mdev, profile); 6382 6383 netdev = alloc_etherdev_mqs(sizeof(struct mlx5e_priv), txqs, rxqs); 6384 if (!netdev) { 6385 mlx5_core_err(mdev, "alloc_etherdev_mqs() failed\n"); 6386 return NULL; 6387 } 6388 6389 err = mlx5e_priv_init(netdev_priv(netdev), profile, netdev, mdev); 6390 if (err) { 6391 mlx5_core_err(mdev, "mlx5e_priv_init failed, err=%d\n", err); 6392 goto err_free_netdev; 6393 } 6394 6395 netif_carrier_off(netdev); 6396 netif_tx_disable(netdev); 6397 dev_net_set(netdev, mlx5_core_net(mdev)); 6398 6399 return netdev; 6400 6401 err_free_netdev: 6402 free_netdev(netdev); 6403 6404 return NULL; 6405 } 6406 6407 static void mlx5e_update_features(struct net_device *netdev) 6408 { 6409 if (netdev->reg_state != NETREG_REGISTERED) 6410 return; /* features will be updated on netdev registration */ 6411 6412 rtnl_lock(); 6413 netdev_lock(netdev); 6414 netdev_update_features(netdev); 6415 netdev_unlock(netdev); 6416 rtnl_unlock(); 6417 } 6418 6419 static void mlx5e_reset_channels(struct net_device *netdev) 6420 { 6421 netdev_reset_tc(netdev); 6422 } 6423 6424 int mlx5e_attach_netdev(struct mlx5e_priv *priv) 6425 { 6426 const bool need_lock = priv->netdev->reg_state == NETREG_REGISTERED; 6427 const struct mlx5e_profile *profile = priv->profile; 6428 int max_nch; 6429 int err; 6430 6431 clear_bit(MLX5E_STATE_DESTROYING, &priv->state); 6432 if (priv->fs) 6433 mlx5e_fs_set_state_destroy(priv->fs, 6434 !test_bit(MLX5E_STATE_DESTROYING, &priv->state)); 6435 6436 /* Validate the max_wqe_size_sq capability. */ 6437 if (WARN_ON_ONCE(mlx5e_get_max_sq_wqebbs(priv->mdev) < MLX5E_MAX_TX_WQEBBS)) { 6438 mlx5_core_warn(priv->mdev, "MLX5E: Max SQ WQEBBs firmware capability: %u, needed %u\n", 6439 mlx5e_get_max_sq_wqebbs(priv->mdev), (unsigned int)MLX5E_MAX_TX_WQEBBS); 6440 return -EIO; 6441 } 6442 6443 /* max number of channels may have changed */ 6444 max_nch = mlx5e_calc_max_nch(priv->mdev, priv->netdev, profile); 6445 if (priv->channels.params.num_channels > max_nch) { 6446 mlx5_core_warn(priv->mdev, "MLX5E: Reducing number of channels to %d\n", max_nch); 6447 /* Reducing the number of channels - RXFH has to be reset, and 6448 * mlx5e_num_channels_changed below will build the RQT. 6449 */ 6450 priv->netdev->priv_flags &= ~IFF_RXFH_CONFIGURED; 6451 priv->channels.params.num_channels = max_nch; 6452 if (priv->channels.params.mqprio.mode == TC_MQPRIO_MODE_CHANNEL) { 6453 mlx5_core_warn(priv->mdev, "MLX5E: Disabling MQPRIO channel mode\n"); 6454 mlx5e_params_mqprio_reset(&priv->channels.params); 6455 } 6456 } 6457 if (max_nch != priv->max_nch) { 6458 mlx5_core_warn(priv->mdev, 6459 "MLX5E: Updating max number of channels from %u to %u\n", 6460 priv->max_nch, max_nch); 6461 priv->max_nch = max_nch; 6462 } 6463 6464 /* 1. Set the real number of queues in the kernel the first time. 6465 * 2. Set our default XPS cpumask. 6466 * 3. Build the RQT. 6467 * 6468 * Locking is required by netif_set_real_num_*_queues in case the 6469 * netdev has been registered by this point (if this function was called 6470 * in the reload or resume flow). 6471 */ 6472 if (need_lock) { 6473 rtnl_lock(); 6474 netdev_lock(priv->netdev); 6475 } 6476 err = mlx5e_num_channels_changed(priv); 6477 if (need_lock) { 6478 netdev_unlock(priv->netdev); 6479 rtnl_unlock(); 6480 } 6481 if (err) 6482 goto out; 6483 6484 err = profile->init_tx(priv); 6485 if (err) 6486 goto out; 6487 6488 err = profile->init_rx(priv); 6489 if (err) 6490 goto err_cleanup_tx; 6491 6492 if (profile->enable) 6493 profile->enable(priv); 6494 6495 mlx5e_update_features(priv->netdev); 6496 6497 return 0; 6498 6499 err_cleanup_tx: 6500 profile->cleanup_tx(priv); 6501 6502 out: 6503 mlx5e_reset_channels(priv->netdev); 6504 set_bit(MLX5E_STATE_DESTROYING, &priv->state); 6505 if (priv->fs) 6506 mlx5e_fs_set_state_destroy(priv->fs, 6507 !test_bit(MLX5E_STATE_DESTROYING, &priv->state)); 6508 cancel_work_sync(&priv->update_stats_work); 6509 return err; 6510 } 6511 6512 void mlx5e_detach_netdev(struct mlx5e_priv *priv) 6513 { 6514 const struct mlx5e_profile *profile = priv->profile; 6515 6516 set_bit(MLX5E_STATE_DESTROYING, &priv->state); 6517 if (priv->fs) 6518 mlx5e_fs_set_state_destroy(priv->fs, 6519 !test_bit(MLX5E_STATE_DESTROYING, &priv->state)); 6520 6521 if (profile->disable) 6522 profile->disable(priv); 6523 flush_workqueue(priv->wq); 6524 6525 profile->cleanup_rx(priv); 6526 profile->cleanup_tx(priv); 6527 mlx5e_reset_channels(priv->netdev); 6528 cancel_work_sync(&priv->update_stats_work); 6529 } 6530 6531 static int 6532 mlx5e_netdev_init_profile(struct net_device *netdev, struct mlx5_core_dev *mdev, 6533 const struct mlx5e_profile *new_profile, void *new_ppriv) 6534 { 6535 struct mlx5e_priv *priv = netdev_priv(netdev); 6536 int err; 6537 6538 err = mlx5e_priv_init(priv, new_profile, netdev, mdev); 6539 if (err) { 6540 mlx5_core_err(mdev, "mlx5e_priv_init failed, err=%d\n", err); 6541 return err; 6542 } 6543 netif_carrier_off(netdev); 6544 priv->profile = new_profile; 6545 priv->ppriv = new_ppriv; 6546 err = new_profile->init(priv->mdev, priv->netdev); 6547 if (err) 6548 goto priv_cleanup; 6549 6550 return 0; 6551 6552 priv_cleanup: 6553 mlx5e_priv_cleanup(priv); 6554 return err; 6555 } 6556 6557 static int 6558 mlx5e_netdev_attach_profile(struct net_device *netdev, struct mlx5_core_dev *mdev, 6559 const struct mlx5e_profile *new_profile, void *new_ppriv) 6560 { 6561 struct mlx5e_priv *priv = netdev_priv(netdev); 6562 int err; 6563 6564 err = mlx5e_netdev_init_profile(netdev, mdev, new_profile, new_ppriv); 6565 if (err) 6566 return err; 6567 6568 err = mlx5e_attach_netdev(priv); 6569 if (err) 6570 goto profile_cleanup; 6571 return err; 6572 6573 profile_cleanup: 6574 new_profile->cleanup(priv); 6575 mlx5e_priv_cleanup(priv); 6576 return err; 6577 } 6578 6579 int mlx5e_netdev_change_profile(struct net_device *netdev, 6580 struct mlx5_core_dev *mdev, 6581 const struct mlx5e_profile *new_profile, 6582 void *new_ppriv) 6583 { 6584 struct mlx5e_priv *priv = netdev_priv(netdev); 6585 const struct mlx5e_profile *orig_profile; 6586 int err, rollback_err; 6587 void *orig_ppriv; 6588 6589 orig_profile = priv->profile; 6590 orig_ppriv = priv->ppriv; 6591 6592 /* NULL could happen if previous change_profile failed to rollback */ 6593 if (priv->profile) { 6594 WARN_ON_ONCE(priv->mdev != mdev); 6595 /* cleanup old profile */ 6596 mlx5e_detach_netdev(priv); 6597 priv->profile->cleanup(priv); 6598 mlx5e_priv_cleanup(priv); 6599 } 6600 /* priv members are not valid from this point ... */ 6601 6602 if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { 6603 mlx5e_netdev_init_profile(netdev, mdev, new_profile, new_ppriv); 6604 set_bit(MLX5E_STATE_DESTROYING, &priv->state); 6605 return -EIO; 6606 } 6607 6608 err = mlx5e_netdev_attach_profile(netdev, mdev, new_profile, new_ppriv); 6609 if (err) { /* roll back to original profile */ 6610 netdev_warn(netdev, "%s: new profile init failed, %d\n", __func__, err); 6611 goto rollback; 6612 } 6613 6614 return 0; 6615 6616 rollback: 6617 if (!orig_profile) { 6618 netdev_warn(netdev, "no original profile to rollback to\n"); 6619 priv->profile = NULL; 6620 return err; 6621 } 6622 6623 rollback_err = mlx5e_netdev_attach_profile(netdev, mdev, orig_profile, orig_ppriv); 6624 if (rollback_err) { 6625 netdev_err(netdev, "failed to rollback to orig profile, %d\n", 6626 rollback_err); 6627 priv->profile = NULL; 6628 } 6629 return err; 6630 } 6631 6632 void mlx5e_netdev_attach_nic_profile(struct net_device *netdev, 6633 struct mlx5_core_dev *mdev) 6634 { 6635 mlx5e_netdev_change_profile(netdev, mdev, &mlx5e_nic_profile, NULL); 6636 } 6637 6638 void mlx5e_destroy_netdev(struct net_device *netdev) 6639 { 6640 struct mlx5e_priv *priv = netdev_priv(netdev); 6641 6642 if (priv->profile) 6643 mlx5e_priv_cleanup(priv); 6644 free_netdev(netdev); 6645 } 6646 6647 static int _mlx5e_resume(struct auxiliary_device *adev) 6648 { 6649 struct mlx5_adev *edev = container_of(adev, struct mlx5_adev, adev); 6650 struct mlx5e_dev *mlx5e_dev = auxiliary_get_drvdata(adev); 6651 struct mlx5e_priv *priv = netdev_priv(mlx5e_dev->netdev); 6652 struct net_device *netdev = mlx5e_dev->netdev; 6653 struct mlx5_core_dev *mdev = edev->mdev; 6654 struct mlx5_core_dev *pos, *to; 6655 int err, i; 6656 6657 if (netif_device_present(netdev)) 6658 return 0; 6659 6660 mlx5_sd_for_each_dev(i, mdev, pos) { 6661 err = mlx5e_create_mdev_resources(pos, true); 6662 if (err) 6663 goto err_destroy_mdev_res; 6664 } 6665 6666 err = mlx5e_attach_netdev(priv); 6667 if (err) 6668 goto err_destroy_mdev_res; 6669 6670 return 0; 6671 6672 err_destroy_mdev_res: 6673 to = pos; 6674 mlx5_sd_for_each_dev_to(i, mdev, to, pos) 6675 mlx5e_destroy_mdev_resources(pos); 6676 return err; 6677 } 6678 6679 static int mlx5e_resume(struct auxiliary_device *adev) 6680 { 6681 struct mlx5_adev *edev = container_of(adev, struct mlx5_adev, adev); 6682 struct mlx5_core_dev *mdev = edev->mdev; 6683 struct auxiliary_device *actual_adev; 6684 int err; 6685 6686 err = mlx5_sd_init(mdev); 6687 if (err) 6688 return err; 6689 6690 actual_adev = mlx5_sd_get_adev(mdev, adev, edev->idx); 6691 if (actual_adev) 6692 return _mlx5e_resume(actual_adev); 6693 return 0; 6694 } 6695 6696 static int _mlx5e_suspend(struct auxiliary_device *adev, bool pre_netdev_reg) 6697 { 6698 struct mlx5_adev *edev = container_of(adev, struct mlx5_adev, adev); 6699 struct mlx5e_dev *mlx5e_dev = auxiliary_get_drvdata(adev); 6700 struct mlx5e_priv *priv = netdev_priv(mlx5e_dev->netdev); 6701 struct net_device *netdev = mlx5e_dev->netdev; 6702 struct mlx5_core_dev *mdev = edev->mdev; 6703 struct mlx5_core_dev *pos; 6704 int i; 6705 6706 if (!pre_netdev_reg && !netif_device_present(netdev)) { 6707 if (test_bit(MLX5E_STATE_DESTROYING, &priv->state)) 6708 mlx5_sd_for_each_dev(i, mdev, pos) 6709 mlx5e_destroy_mdev_resources(pos); 6710 return -ENODEV; 6711 } 6712 6713 mlx5e_detach_netdev(priv); 6714 mlx5_sd_for_each_dev(i, mdev, pos) 6715 mlx5e_destroy_mdev_resources(pos); 6716 6717 return 0; 6718 } 6719 6720 static int mlx5e_suspend(struct auxiliary_device *adev, pm_message_t state) 6721 { 6722 struct mlx5_adev *edev = container_of(adev, struct mlx5_adev, adev); 6723 struct mlx5_core_dev *mdev = edev->mdev; 6724 struct auxiliary_device *actual_adev; 6725 int err = 0; 6726 6727 actual_adev = mlx5_sd_get_adev(mdev, adev, edev->idx); 6728 if (actual_adev) 6729 err = _mlx5e_suspend(actual_adev, false); 6730 6731 mlx5_sd_cleanup(mdev); 6732 return err; 6733 } 6734 6735 static int _mlx5e_probe(struct auxiliary_device *adev) 6736 { 6737 struct mlx5_adev *edev = container_of(adev, struct mlx5_adev, adev); 6738 const struct mlx5e_profile *profile = &mlx5e_nic_profile; 6739 struct mlx5_core_dev *mdev = edev->mdev; 6740 struct mlx5e_dev *mlx5e_dev; 6741 struct net_device *netdev; 6742 struct mlx5e_priv *priv; 6743 int err; 6744 6745 mlx5e_dev = mlx5e_create_devlink(&adev->dev, mdev); 6746 if (IS_ERR(mlx5e_dev)) 6747 return PTR_ERR(mlx5e_dev); 6748 auxiliary_set_drvdata(adev, mlx5e_dev); 6749 6750 err = mlx5e_devlink_port_register(mlx5e_dev, mdev); 6751 if (err) { 6752 mlx5_core_err(mdev, "mlx5e_devlink_port_register failed, %d\n", err); 6753 goto err_devlink_unregister; 6754 } 6755 6756 netdev = mlx5e_create_netdev(mdev, profile); 6757 if (!netdev) { 6758 mlx5_core_err(mdev, "mlx5e_create_netdev failed\n"); 6759 err = -ENOMEM; 6760 goto err_devlink_port_unregister; 6761 } 6762 SET_NETDEV_DEVLINK_PORT(netdev, &mlx5e_dev->dl_port); 6763 mlx5e_dev->netdev = netdev; 6764 6765 mlx5e_build_nic_netdev(netdev); 6766 6767 priv = netdev_priv(netdev); 6768 6769 priv->profile = profile; 6770 priv->ppriv = NULL; 6771 6772 err = profile->init(mdev, netdev); 6773 if (err) { 6774 mlx5_core_err(mdev, "mlx5e_nic_profile init failed, %d\n", err); 6775 goto err_destroy_netdev; 6776 } 6777 6778 err = _mlx5e_resume(adev); 6779 if (err) { 6780 mlx5_core_err(mdev, "_mlx5e_resume failed, %d\n", err); 6781 goto err_profile_cleanup; 6782 } 6783 6784 err = register_netdev(netdev); 6785 if (err) { 6786 mlx5_core_err(mdev, "register_netdev failed, %d\n", err); 6787 goto err_resume; 6788 } 6789 6790 mlx5e_dcbnl_init_app(priv); 6791 mlx5_core_uplink_netdev_set(mdev, netdev); 6792 mlx5e_params_print_info(mdev, &priv->channels.params); 6793 return 0; 6794 6795 err_resume: 6796 _mlx5e_suspend(adev, true); 6797 err_profile_cleanup: 6798 profile->cleanup(priv); 6799 err_destroy_netdev: 6800 mlx5e_destroy_netdev(netdev); 6801 err_devlink_port_unregister: 6802 mlx5e_devlink_port_unregister(mlx5e_dev); 6803 err_devlink_unregister: 6804 mlx5e_destroy_devlink(mlx5e_dev); 6805 return err; 6806 } 6807 6808 static int mlx5e_probe(struct auxiliary_device *adev, 6809 const struct auxiliary_device_id *id) 6810 { 6811 struct mlx5_adev *edev = container_of(adev, struct mlx5_adev, adev); 6812 struct mlx5_core_dev *mdev = edev->mdev; 6813 struct auxiliary_device *actual_adev; 6814 int err; 6815 6816 err = mlx5_sd_init(mdev); 6817 if (err) 6818 return err; 6819 6820 actual_adev = mlx5_sd_get_adev(mdev, adev, edev->idx); 6821 if (actual_adev) 6822 return _mlx5e_probe(actual_adev); 6823 return 0; 6824 } 6825 6826 static void _mlx5e_remove(struct auxiliary_device *adev) 6827 { 6828 struct mlx5_adev *edev = container_of(adev, struct mlx5_adev, adev); 6829 struct mlx5e_dev *mlx5e_dev = auxiliary_get_drvdata(adev); 6830 struct net_device *netdev = mlx5e_dev->netdev; 6831 struct mlx5e_priv *priv = netdev_priv(netdev); 6832 struct mlx5_core_dev *mdev = edev->mdev; 6833 6834 mlx5_eswitch_safe_aux_devs_remove(mdev); 6835 mlx5_core_uplink_netdev_set(mdev, NULL); 6836 6837 if (priv->profile) 6838 mlx5e_dcbnl_delete_app(priv); 6839 /* When unload driver, the netdev is in registered state 6840 * if it's from legacy mode. If from switchdev mode, it 6841 * is already unregistered before changing to NIC profile. 6842 */ 6843 if (netdev->reg_state == NETREG_REGISTERED) { 6844 unregister_netdev(netdev); 6845 _mlx5e_suspend(adev, false); 6846 } else { 6847 struct mlx5_core_dev *pos; 6848 int i; 6849 6850 if (test_bit(MLX5E_STATE_DESTROYING, &priv->state)) 6851 mlx5_sd_for_each_dev(i, mdev, pos) 6852 mlx5e_destroy_mdev_resources(pos); 6853 else 6854 _mlx5e_suspend(adev, true); 6855 } 6856 /* Avoid cleanup if profile rollback failed. */ 6857 if (priv->profile) 6858 priv->profile->cleanup(priv); 6859 mlx5e_destroy_netdev(netdev); 6860 mlx5e_devlink_port_unregister(mlx5e_dev); 6861 mlx5e_destroy_devlink(mlx5e_dev); 6862 } 6863 6864 static void mlx5e_remove(struct auxiliary_device *adev) 6865 { 6866 struct mlx5_adev *edev = container_of(adev, struct mlx5_adev, adev); 6867 struct mlx5_core_dev *mdev = edev->mdev; 6868 struct auxiliary_device *actual_adev; 6869 6870 actual_adev = mlx5_sd_get_adev(mdev, adev, edev->idx); 6871 if (actual_adev) 6872 _mlx5e_remove(actual_adev); 6873 6874 mlx5_sd_cleanup(mdev); 6875 } 6876 6877 static const struct auxiliary_device_id mlx5e_id_table[] = { 6878 { .name = MLX5_ADEV_NAME ".eth", }, 6879 {}, 6880 }; 6881 6882 MODULE_DEVICE_TABLE(auxiliary, mlx5e_id_table); 6883 6884 static struct auxiliary_driver mlx5e_driver = { 6885 .name = "eth", 6886 .probe = mlx5e_probe, 6887 .remove = mlx5e_remove, 6888 .suspend = mlx5e_suspend, 6889 .resume = mlx5e_resume, 6890 .id_table = mlx5e_id_table, 6891 }; 6892 6893 int mlx5e_init(void) 6894 { 6895 int ret; 6896 6897 mlx5e_build_ptys2ethtool_map(); 6898 ret = auxiliary_driver_register(&mlx5e_driver); 6899 if (ret) 6900 return ret; 6901 6902 ret = mlx5e_rep_init(); 6903 if (ret) 6904 auxiliary_driver_unregister(&mlx5e_driver); 6905 return ret; 6906 } 6907 6908 void mlx5e_cleanup(void) 6909 { 6910 mlx5e_rep_cleanup(); 6911 auxiliary_driver_unregister(&mlx5e_driver); 6912 } 6913