1 /* 2 * Copyright (c) 2015-2016, Mellanox Technologies. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 */ 32 33 #include <linux/dim.h> 34 #include <net/tc_act/tc_gact.h> 35 #include <linux/mlx5/fs.h> 36 #include <net/vxlan.h> 37 #include <net/geneve.h> 38 #include <linux/bpf.h> 39 #include <linux/debugfs.h> 40 #include <linux/if_bridge.h> 41 #include <linux/filter.h> 42 #include <net/netdev_lock.h> 43 #include <net/netdev_queues.h> 44 #include <net/netdev_rx_queue.h> 45 #include <net/page_pool/types.h> 46 #include <net/pkt_sched.h> 47 #include <net/xdp_sock_drv.h> 48 #include "eswitch.h" 49 #include "en.h" 50 #include "en/dim.h" 51 #include "en/txrx.h" 52 #include "en_tc.h" 53 #include "en_rep.h" 54 #include "en_accel/ipsec.h" 55 #include "en_accel/psp.h" 56 #include "en_accel/macsec.h" 57 #include "en_accel/en_accel.h" 58 #include "en_accel/ktls.h" 59 #include "lib/vxlan.h" 60 #include "lib/clock.h" 61 #include "en/port.h" 62 #include "en/xdp.h" 63 #include "lib/eq.h" 64 #include "en/monitor_stats.h" 65 #include "en/health.h" 66 #include "en/params.h" 67 #include "en/xsk/pool.h" 68 #include "en/xsk/setup.h" 69 #include "en/xsk/rx.h" 70 #include "en/xsk/tx.h" 71 #include "en/hv_vhca_stats.h" 72 #include "en/devlink.h" 73 #include "lib/mlx5.h" 74 #include "en/ptp.h" 75 #include "en/htb.h" 76 #include "qos.h" 77 #include "en/trap.h" 78 #include "lib/devcom.h" 79 #include "lib/sd.h" 80 #include "en/pcie_cong_event.h" 81 82 static bool mlx5e_hw_gro_supported(struct mlx5_core_dev *mdev) 83 { 84 if (!MLX5_CAP_GEN(mdev, shampo) || 85 !MLX5_CAP_SHAMPO(mdev, shampo_header_split_data_merge)) 86 return false; 87 88 /* Our HW-GRO implementation relies on "KSM Mkey" for 89 * SHAMPO headers buffer mapping 90 */ 91 if (!MLX5_CAP_GEN(mdev, fixed_buffer_size)) 92 return false; 93 94 if (!MLX5_CAP_GEN_2(mdev, min_mkey_log_entity_size_fixed_buffer_valid)) 95 return false; 96 97 if (MLX5_CAP_GEN_2(mdev, min_mkey_log_entity_size_fixed_buffer) > 98 MLX5E_SHAMPO_LOG_HEADER_ENTRY_SIZE) 99 return false; 100 101 return true; 102 } 103 104 bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev, u8 page_shift, 105 enum mlx5e_mpwrq_umr_mode umr_mode) 106 { 107 u16 umr_wqebbs, max_wqebbs; 108 bool striding_rq_umr; 109 110 striding_rq_umr = MLX5_CAP_GEN(mdev, striding_rq) && MLX5_CAP_GEN(mdev, umr_ptr_rlky) && 111 MLX5_CAP_ETH(mdev, reg_umr_sq); 112 if (!striding_rq_umr) 113 return false; 114 115 umr_wqebbs = mlx5e_mpwrq_umr_wqebbs(mdev, page_shift, umr_mode); 116 max_wqebbs = mlx5e_get_max_sq_aligned_wqebbs(mdev); 117 /* Sanity check; should never happen, because mlx5e_mpwrq_umr_wqebbs is 118 * calculated from mlx5e_get_max_sq_aligned_wqebbs. 119 */ 120 if (WARN_ON(umr_wqebbs > max_wqebbs)) 121 return false; 122 123 return true; 124 } 125 126 void mlx5e_update_carrier(struct mlx5e_priv *priv) 127 { 128 struct mlx5_core_dev *mdev = priv->mdev; 129 u8 port_state; 130 bool up; 131 132 port_state = mlx5_query_vport_state(mdev, 133 MLX5_VPORT_STATE_OP_MOD_VNIC_VPORT, 134 0); 135 136 up = port_state == VPORT_STATE_UP; 137 if (up == netif_carrier_ok(priv->netdev)) 138 netif_carrier_event(priv->netdev); 139 if (up) { 140 netdev_info(priv->netdev, "Link up\n"); 141 netif_carrier_on(priv->netdev); 142 } else { 143 netdev_info(priv->netdev, "Link down\n"); 144 netif_carrier_off(priv->netdev); 145 } 146 } 147 148 static void mlx5e_update_carrier_work(struct work_struct *work) 149 { 150 struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv, 151 update_carrier_work); 152 153 mutex_lock(&priv->state_lock); 154 if (test_bit(MLX5E_STATE_OPENED, &priv->state)) 155 if (priv->profile->update_carrier) 156 priv->profile->update_carrier(priv); 157 mutex_unlock(&priv->state_lock); 158 } 159 160 static void mlx5e_update_stats_work(struct work_struct *work) 161 { 162 struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv, 163 update_stats_work); 164 165 mutex_lock(&priv->state_lock); 166 priv->profile->update_stats(priv); 167 mutex_unlock(&priv->state_lock); 168 } 169 170 void mlx5e_queue_update_stats(struct mlx5e_priv *priv) 171 { 172 if (!priv->profile->update_stats) 173 return; 174 175 if (unlikely(test_bit(MLX5E_STATE_DESTROYING, &priv->state))) 176 return; 177 178 queue_work(priv->wq, &priv->update_stats_work); 179 } 180 181 static int async_event(struct notifier_block *nb, unsigned long event, void *data) 182 { 183 struct mlx5e_priv *priv = container_of(nb, struct mlx5e_priv, events_nb); 184 struct mlx5_eqe *eqe = data; 185 186 if (event != MLX5_EVENT_TYPE_PORT_CHANGE) 187 return NOTIFY_DONE; 188 189 switch (eqe->sub_type) { 190 case MLX5_PORT_CHANGE_SUBTYPE_DOWN: 191 case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE: 192 queue_work(priv->wq, &priv->update_carrier_work); 193 break; 194 default: 195 return NOTIFY_DONE; 196 } 197 198 return NOTIFY_OK; 199 } 200 201 static void mlx5e_enable_async_events(struct mlx5e_priv *priv) 202 { 203 priv->events_nb.notifier_call = async_event; 204 mlx5_notifier_register(priv->mdev, &priv->events_nb); 205 } 206 207 static void mlx5e_disable_async_events(struct mlx5e_priv *priv) 208 { 209 mlx5_notifier_unregister(priv->mdev, &priv->events_nb); 210 } 211 212 static int mlx5e_devcom_event_mpv(int event, void *my_data, void *event_data) 213 { 214 struct mlx5e_priv *slave_priv = my_data; 215 216 switch (event) { 217 case MPV_DEVCOM_MASTER_UP: 218 mlx5_devcom_comp_set_ready(slave_priv->devcom, true); 219 break; 220 case MPV_DEVCOM_MASTER_DOWN: 221 /* no need for comp set ready false since we unregister after 222 * and it hurts cleanup flow. 223 */ 224 break; 225 case MPV_DEVCOM_IPSEC_MASTER_UP: 226 case MPV_DEVCOM_IPSEC_MASTER_DOWN: 227 mlx5e_ipsec_handle_mpv_event(event, my_data, event_data); 228 break; 229 } 230 231 return 0; 232 } 233 234 static int mlx5e_devcom_init_mpv(struct mlx5e_priv *priv, u64 *data) 235 { 236 struct mlx5_devcom_match_attr attr = { 237 .key.val = *data, 238 }; 239 240 priv->devcom = mlx5_devcom_register_component(priv->mdev->priv.devc, 241 MLX5_DEVCOM_MPV, 242 &attr, 243 mlx5e_devcom_event_mpv, 244 priv); 245 if (!priv->devcom) 246 return -EINVAL; 247 248 if (mlx5_core_is_mp_master(priv->mdev)) { 249 mlx5_devcom_send_event(priv->devcom, MPV_DEVCOM_MASTER_UP, 250 MPV_DEVCOM_MASTER_UP, priv); 251 mlx5e_ipsec_send_event(priv, MPV_DEVCOM_IPSEC_MASTER_UP); 252 } 253 254 return 0; 255 } 256 257 static void mlx5e_devcom_cleanup_mpv(struct mlx5e_priv *priv) 258 { 259 if (!priv->devcom) 260 return; 261 262 if (mlx5_core_is_mp_master(priv->mdev)) { 263 mlx5_devcom_send_event(priv->devcom, MPV_DEVCOM_MASTER_DOWN, 264 MPV_DEVCOM_MASTER_DOWN, priv); 265 mlx5e_ipsec_send_event(priv, MPV_DEVCOM_IPSEC_MASTER_DOWN); 266 } 267 268 mlx5_devcom_unregister_component(priv->devcom); 269 priv->devcom = NULL; 270 } 271 272 static int blocking_event(struct notifier_block *nb, unsigned long event, void *data) 273 { 274 struct mlx5e_priv *priv = container_of(nb, struct mlx5e_priv, blocking_events_nb); 275 struct mlx5_devlink_trap_event_ctx *trap_event_ctx = data; 276 int err; 277 278 switch (event) { 279 case MLX5_DRIVER_EVENT_TYPE_TRAP: 280 err = mlx5e_handle_trap_event(priv, trap_event_ctx->trap); 281 if (err) { 282 trap_event_ctx->err = err; 283 return NOTIFY_BAD; 284 } 285 break; 286 case MLX5_DRIVER_EVENT_AFFILIATION_DONE: 287 if (mlx5e_devcom_init_mpv(priv, data)) 288 return NOTIFY_BAD; 289 break; 290 case MLX5_DRIVER_EVENT_AFFILIATION_REMOVED: 291 mlx5e_devcom_cleanup_mpv(priv); 292 break; 293 default: 294 return NOTIFY_DONE; 295 } 296 return NOTIFY_OK; 297 } 298 299 static void mlx5e_enable_blocking_events(struct mlx5e_priv *priv) 300 { 301 priv->blocking_events_nb.notifier_call = blocking_event; 302 mlx5_blocking_notifier_register(priv->mdev, &priv->blocking_events_nb); 303 } 304 305 static void mlx5e_disable_blocking_events(struct mlx5e_priv *priv) 306 { 307 mlx5_blocking_notifier_unregister(priv->mdev, &priv->blocking_events_nb); 308 } 309 310 static u16 mlx5e_mpwrq_umr_octowords(u32 entries, enum mlx5e_mpwrq_umr_mode umr_mode) 311 { 312 u8 umr_entry_size = mlx5e_mpwrq_umr_entry_size(umr_mode); 313 u32 sz; 314 315 sz = ALIGN(entries * umr_entry_size, MLX5_UMR_FLEX_ALIGNMENT); 316 317 return sz / MLX5_OCTWORD; 318 } 319 320 static inline void mlx5e_build_umr_wqe(struct mlx5e_rq *rq, 321 struct mlx5e_icosq *sq, 322 struct mlx5e_umr_wqe *wqe) 323 { 324 struct mlx5_wqe_ctrl_seg *cseg = &wqe->hdr.ctrl; 325 struct mlx5_wqe_umr_ctrl_seg *ucseg = &wqe->hdr.uctrl; 326 u16 octowords; 327 u8 ds_cnt; 328 329 ds_cnt = DIV_ROUND_UP(mlx5e_mpwrq_umr_wqe_sz(rq->mdev, rq->mpwqe.page_shift, 330 rq->mpwqe.umr_mode), 331 MLX5_SEND_WQE_DS); 332 333 cseg->qpn_ds = cpu_to_be32((sq->sqn << MLX5_WQE_CTRL_QPN_SHIFT) | 334 ds_cnt); 335 cseg->umr_mkey = rq->mpwqe.umr_mkey_be; 336 337 ucseg->flags = MLX5_UMR_TRANSLATION_OFFSET_EN | MLX5_UMR_INLINE; 338 octowords = mlx5e_mpwrq_umr_octowords(rq->mpwqe.pages_per_wqe, rq->mpwqe.umr_mode); 339 ucseg->xlt_octowords = cpu_to_be16(octowords); 340 ucseg->mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE); 341 } 342 343 static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq, int node) 344 { 345 int wq_sz = mlx5_wq_ll_get_size(&rq->mpwqe.wq); 346 size_t alloc_size; 347 348 alloc_size = array_size(wq_sz, struct_size(rq->mpwqe.info, 349 alloc_units.frag_pages, 350 rq->mpwqe.pages_per_wqe)); 351 352 rq->mpwqe.info = kvzalloc_node(alloc_size, GFP_KERNEL, node); 353 if (!rq->mpwqe.info) 354 return -ENOMEM; 355 356 /* For deferred page release (release right before alloc), make sure 357 * that on first round release is not called. 358 */ 359 for (int i = 0; i < wq_sz; i++) { 360 struct mlx5e_mpw_info *wi = mlx5e_get_mpw_info(rq, i); 361 362 bitmap_fill(wi->skip_release_bitmap, rq->mpwqe.pages_per_wqe); 363 } 364 365 mlx5e_build_umr_wqe(rq, rq->icosq, 366 container_of(&rq->mpwqe.umr_wqe, 367 struct mlx5e_umr_wqe, hdr)); 368 369 return 0; 370 } 371 372 373 static u8 mlx5e_mpwrq_access_mode(enum mlx5e_mpwrq_umr_mode umr_mode) 374 { 375 switch (umr_mode) { 376 case MLX5E_MPWRQ_UMR_MODE_ALIGNED: 377 return MLX5_MKC_ACCESS_MODE_MTT; 378 case MLX5E_MPWRQ_UMR_MODE_UNALIGNED: 379 return MLX5_MKC_ACCESS_MODE_KSM; 380 case MLX5E_MPWRQ_UMR_MODE_OVERSIZED: 381 return MLX5_MKC_ACCESS_MODE_KLMS; 382 case MLX5E_MPWRQ_UMR_MODE_TRIPLE: 383 return MLX5_MKC_ACCESS_MODE_KSM; 384 } 385 WARN_ONCE(1, "MPWRQ UMR mode %d is not known\n", umr_mode); 386 return 0; 387 } 388 389 static int mlx5e_create_umr_mkey(struct mlx5_core_dev *mdev, 390 u32 npages, u8 page_shift, u32 *umr_mkey, 391 dma_addr_t filler_addr, 392 enum mlx5e_mpwrq_umr_mode umr_mode, 393 u32 xsk_chunk_size) 394 { 395 struct mlx5_mtt *mtt; 396 struct mlx5_ksm *ksm; 397 struct mlx5_klm *klm; 398 u32 octwords; 399 int inlen; 400 void *mkc; 401 u32 *in; 402 int err; 403 int i; 404 405 if ((umr_mode == MLX5E_MPWRQ_UMR_MODE_UNALIGNED || 406 umr_mode == MLX5E_MPWRQ_UMR_MODE_TRIPLE) && 407 !MLX5_CAP_GEN(mdev, fixed_buffer_size)) { 408 mlx5_core_warn(mdev, "Unaligned AF_XDP requires fixed_buffer_size capability\n"); 409 return -EINVAL; 410 } 411 412 octwords = mlx5e_mpwrq_umr_octowords(npages, umr_mode); 413 414 inlen = MLX5_FLEXIBLE_INLEN(mdev, MLX5_ST_SZ_BYTES(create_mkey_in), 415 MLX5_OCTWORD, octwords); 416 if (inlen < 0) 417 return inlen; 418 419 in = kvzalloc(inlen, GFP_KERNEL); 420 if (!in) 421 return -ENOMEM; 422 423 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); 424 425 MLX5_SET(mkc, mkc, free, 1); 426 MLX5_SET(mkc, mkc, umr_en, 1); 427 MLX5_SET(mkc, mkc, lw, 1); 428 MLX5_SET(mkc, mkc, lr, 1); 429 MLX5_SET(mkc, mkc, access_mode_1_0, mlx5e_mpwrq_access_mode(umr_mode)); 430 mlx5e_mkey_set_relaxed_ordering(mdev, mkc); 431 MLX5_SET(mkc, mkc, qpn, 0xffffff); 432 MLX5_SET(mkc, mkc, pd, mdev->mlx5e_res.hw_objs.pdn); 433 MLX5_SET64(mkc, mkc, len, npages << page_shift); 434 MLX5_SET(mkc, mkc, translations_octword_size, octwords); 435 if (umr_mode == MLX5E_MPWRQ_UMR_MODE_TRIPLE) 436 MLX5_SET(mkc, mkc, log_page_size, page_shift - 2); 437 else if (umr_mode != MLX5E_MPWRQ_UMR_MODE_OVERSIZED) 438 MLX5_SET(mkc, mkc, log_page_size, page_shift); 439 MLX5_SET(create_mkey_in, in, translations_octword_actual_size, octwords); 440 441 /* Initialize the mkey with all MTTs pointing to a default 442 * page (filler_addr). When the channels are activated, UMR 443 * WQEs will redirect the RX WQEs to the actual memory from 444 * the RQ's pool, while the gaps (wqe_overflow) remain mapped 445 * to the default page. 446 */ 447 switch (umr_mode) { 448 case MLX5E_MPWRQ_UMR_MODE_OVERSIZED: 449 klm = MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt); 450 for (i = 0; i < npages; i++) { 451 klm[i << 1] = (struct mlx5_klm) { 452 .va = cpu_to_be64(filler_addr), 453 .bcount = cpu_to_be32(xsk_chunk_size), 454 .key = cpu_to_be32(mdev->mlx5e_res.hw_objs.mkey), 455 }; 456 klm[(i << 1) + 1] = (struct mlx5_klm) { 457 .va = cpu_to_be64(filler_addr), 458 .bcount = cpu_to_be32((1 << page_shift) - xsk_chunk_size), 459 .key = cpu_to_be32(mdev->mlx5e_res.hw_objs.mkey), 460 }; 461 } 462 break; 463 case MLX5E_MPWRQ_UMR_MODE_UNALIGNED: 464 ksm = MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt); 465 for (i = 0; i < npages; i++) 466 ksm[i] = (struct mlx5_ksm) { 467 .key = cpu_to_be32(mdev->mlx5e_res.hw_objs.mkey), 468 .va = cpu_to_be64(filler_addr), 469 }; 470 break; 471 case MLX5E_MPWRQ_UMR_MODE_ALIGNED: 472 mtt = MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt); 473 for (i = 0; i < npages; i++) 474 mtt[i] = (struct mlx5_mtt) { 475 .ptag = cpu_to_be64(filler_addr), 476 }; 477 break; 478 case MLX5E_MPWRQ_UMR_MODE_TRIPLE: 479 ksm = MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt); 480 for (i = 0; i < npages * 4; i++) { 481 ksm[i] = (struct mlx5_ksm) { 482 .key = cpu_to_be32(mdev->mlx5e_res.hw_objs.mkey), 483 .va = cpu_to_be64(filler_addr), 484 }; 485 } 486 break; 487 } 488 489 err = mlx5_core_create_mkey(mdev, umr_mkey, in, inlen); 490 491 kvfree(in); 492 return err; 493 } 494 495 static int mlx5e_create_umr_ksm_mkey(struct mlx5_core_dev *mdev, 496 u64 nentries, u8 log_entry_size, 497 u32 *umr_mkey) 498 { 499 int inlen; 500 void *mkc; 501 u32 *in; 502 int err; 503 504 inlen = MLX5_ST_SZ_BYTES(create_mkey_in); 505 506 in = kvzalloc(inlen, GFP_KERNEL); 507 if (!in) 508 return -ENOMEM; 509 510 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); 511 512 MLX5_SET(mkc, mkc, free, 1); 513 MLX5_SET(mkc, mkc, umr_en, 1); 514 MLX5_SET(mkc, mkc, lw, 1); 515 MLX5_SET(mkc, mkc, lr, 1); 516 MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_KSM); 517 mlx5e_mkey_set_relaxed_ordering(mdev, mkc); 518 MLX5_SET(mkc, mkc, qpn, 0xffffff); 519 MLX5_SET(mkc, mkc, pd, mdev->mlx5e_res.hw_objs.pdn); 520 MLX5_SET(mkc, mkc, translations_octword_size, nentries); 521 MLX5_SET(mkc, mkc, log_page_size, log_entry_size); 522 MLX5_SET64(mkc, mkc, len, nentries << log_entry_size); 523 err = mlx5_core_create_mkey(mdev, umr_mkey, in, inlen); 524 525 kvfree(in); 526 return err; 527 } 528 529 static int mlx5e_create_rq_umr_mkey(struct mlx5_core_dev *mdev, struct mlx5e_rq *rq) 530 { 531 u32 xsk_chunk_size = rq->xsk_pool ? rq->xsk_pool->chunk_size : 0; 532 u32 wq_size = mlx5_wq_ll_get_size(&rq->mpwqe.wq); 533 u32 num_entries, max_num_entries; 534 u32 umr_mkey; 535 int err; 536 537 max_num_entries = mlx5e_mpwrq_max_num_entries(mdev, rq->mpwqe.umr_mode); 538 539 /* Shouldn't overflow, the result is at most MLX5E_MAX_RQ_NUM_MTTS. */ 540 if (WARN_ON_ONCE(check_mul_overflow(wq_size, (u32)rq->mpwqe.mtts_per_wqe, 541 &num_entries) || 542 num_entries > max_num_entries)) 543 mlx5_core_err(mdev, "%s: multiplication overflow: %u * %u > %u\n", 544 __func__, wq_size, rq->mpwqe.mtts_per_wqe, 545 max_num_entries); 546 547 err = mlx5e_create_umr_mkey(mdev, num_entries, rq->mpwqe.page_shift, 548 &umr_mkey, rq->wqe_overflow.addr, 549 rq->mpwqe.umr_mode, xsk_chunk_size); 550 rq->mpwqe.umr_mkey_be = cpu_to_be32(umr_mkey); 551 return err; 552 } 553 554 static int mlx5e_create_rq_hd_umr_mkey(struct mlx5_core_dev *mdev, 555 u16 hd_per_wq, __be32 *umr_mkey) 556 { 557 u32 max_ksm_size = BIT(MLX5_CAP_GEN(mdev, log_max_klm_list_size)); 558 u32 mkey; 559 int err; 560 561 if (max_ksm_size < hd_per_wq) { 562 mlx5_core_err(mdev, "max ksm list size 0x%x is smaller than shampo header buffer list size 0x%x\n", 563 max_ksm_size, hd_per_wq); 564 return -EINVAL; 565 } 566 567 err = mlx5e_create_umr_ksm_mkey(mdev, hd_per_wq, 568 MLX5E_SHAMPO_LOG_HEADER_ENTRY_SIZE, 569 &mkey); 570 if (err) 571 return err; 572 573 *umr_mkey = cpu_to_be32(mkey); 574 return 0; 575 } 576 577 static void mlx5e_init_frags_partition(struct mlx5e_rq *rq) 578 { 579 struct mlx5e_wqe_frag_info next_frag = {}; 580 struct mlx5e_wqe_frag_info *prev = NULL; 581 int i; 582 583 WARN_ON(rq->xsk_pool); 584 585 next_frag.frag_page = &rq->wqe.alloc_units->frag_pages[0]; 586 587 /* Skip first release due to deferred release. */ 588 next_frag.flags = BIT(MLX5E_WQE_FRAG_SKIP_RELEASE); 589 590 for (i = 0; i < mlx5_wq_cyc_get_size(&rq->wqe.wq); i++) { 591 struct mlx5e_rq_frag_info *frag_info = &rq->wqe.info.arr[0]; 592 struct mlx5e_wqe_frag_info *frag = 593 &rq->wqe.frags[i << rq->wqe.info.log_num_frags]; 594 int f; 595 596 for (f = 0; f < rq->wqe.info.num_frags; f++, frag++) { 597 if (next_frag.offset + frag_info[f].frag_stride > PAGE_SIZE) { 598 /* Pages are assigned at runtime. */ 599 next_frag.frag_page++; 600 next_frag.offset = 0; 601 if (prev) 602 prev->flags |= BIT(MLX5E_WQE_FRAG_LAST_IN_PAGE); 603 } 604 *frag = next_frag; 605 606 /* prepare next */ 607 next_frag.offset += frag_info[f].frag_stride; 608 prev = frag; 609 } 610 } 611 612 if (prev) 613 prev->flags |= BIT(MLX5E_WQE_FRAG_LAST_IN_PAGE); 614 } 615 616 static void mlx5e_init_xsk_buffs(struct mlx5e_rq *rq) 617 { 618 int i; 619 620 /* Assumptions used by XSK batched allocator. */ 621 WARN_ON(rq->wqe.info.num_frags != 1); 622 WARN_ON(rq->wqe.info.log_num_frags != 0); 623 WARN_ON(rq->wqe.info.arr[0].frag_stride != PAGE_SIZE); 624 625 /* Considering the above assumptions a fragment maps to a single 626 * xsk_buff. 627 */ 628 for (i = 0; i < mlx5_wq_cyc_get_size(&rq->wqe.wq); i++) { 629 rq->wqe.frags[i].xskp = &rq->wqe.alloc_units->xsk_buffs[i]; 630 631 /* Skip first release due to deferred release as WQES are 632 * not allocated yet. 633 */ 634 rq->wqe.frags[i].flags |= BIT(MLX5E_WQE_FRAG_SKIP_RELEASE); 635 } 636 } 637 638 static int mlx5e_init_wqe_alloc_info(struct mlx5e_rq *rq, int node) 639 { 640 int wq_sz = mlx5_wq_cyc_get_size(&rq->wqe.wq); 641 int len = wq_sz << rq->wqe.info.log_num_frags; 642 struct mlx5e_wqe_frag_info *frags; 643 union mlx5e_alloc_units *aus; 644 int aus_sz; 645 646 if (rq->xsk_pool) 647 aus_sz = sizeof(*aus->xsk_buffs); 648 else 649 aus_sz = sizeof(*aus->frag_pages); 650 651 aus = kvzalloc_node(array_size(len, aus_sz), GFP_KERNEL, node); 652 if (!aus) 653 return -ENOMEM; 654 655 frags = kvzalloc_node(array_size(len, sizeof(*frags)), GFP_KERNEL, node); 656 if (!frags) { 657 kvfree(aus); 658 return -ENOMEM; 659 } 660 661 rq->wqe.alloc_units = aus; 662 rq->wqe.frags = frags; 663 664 if (rq->xsk_pool) 665 mlx5e_init_xsk_buffs(rq); 666 else 667 mlx5e_init_frags_partition(rq); 668 669 return 0; 670 } 671 672 static void mlx5e_free_wqe_alloc_info(struct mlx5e_rq *rq) 673 { 674 kvfree(rq->wqe.frags); 675 kvfree(rq->wqe.alloc_units); 676 } 677 678 static void mlx5e_rq_err_cqe_work(struct work_struct *recover_work) 679 { 680 struct mlx5e_rq *rq = container_of(recover_work, struct mlx5e_rq, recover_work); 681 682 mlx5e_reporter_rq_cqe_err(rq); 683 } 684 685 static void mlx5e_rq_timeout_work(struct work_struct *timeout_work) 686 { 687 struct mlx5e_rq *rq = container_of(timeout_work, 688 struct mlx5e_rq, 689 rx_timeout_work); 690 691 /* Acquire netdev instance lock to synchronize with channel close and 692 * reopen flows. Either successfully obtain the lock, or detect that 693 * channels are closing for another reason, making this work no longer 694 * necessary. 695 */ 696 while (!netdev_trylock(rq->netdev)) { 697 if (!test_bit(MLX5E_STATE_CHANNELS_ACTIVE, &rq->priv->state)) 698 return; 699 msleep(20); 700 } 701 702 mlx5e_reporter_rx_timeout(rq); 703 netdev_unlock(rq->netdev); 704 } 705 706 static int mlx5e_alloc_mpwqe_rq_drop_page(struct mlx5e_rq *rq) 707 { 708 rq->wqe_overflow.page = alloc_page(GFP_KERNEL); 709 if (!rq->wqe_overflow.page) 710 return -ENOMEM; 711 712 rq->wqe_overflow.addr = dma_map_page(rq->pdev, rq->wqe_overflow.page, 0, 713 PAGE_SIZE, rq->buff.map_dir); 714 if (dma_mapping_error(rq->pdev, rq->wqe_overflow.addr)) { 715 __free_page(rq->wqe_overflow.page); 716 return -ENOMEM; 717 } 718 return 0; 719 } 720 721 static void mlx5e_free_mpwqe_rq_drop_page(struct mlx5e_rq *rq) 722 { 723 dma_unmap_page(rq->pdev, rq->wqe_overflow.addr, PAGE_SIZE, 724 rq->buff.map_dir); 725 __free_page(rq->wqe_overflow.page); 726 } 727 728 static int mlx5e_init_rxq_rq(struct mlx5e_channel *c, struct mlx5e_params *params, 729 u32 xdp_frag_size, struct mlx5e_rq *rq) 730 { 731 struct mlx5_core_dev *mdev = c->mdev; 732 int err; 733 734 rq->wq_type = params->rq_wq_type; 735 rq->pdev = c->pdev; 736 rq->netdev = c->netdev; 737 rq->priv = c->priv; 738 rq->hwtstamp_config = &c->priv->hwtstamp_config; 739 rq->clock = mdev->clock; 740 rq->icosq = &c->icosq; 741 rq->ix = c->ix; 742 rq->channel = c; 743 rq->mdev = mdev; 744 rq->hw_mtu = 745 MLX5E_SW2HW_MTU(params, params->sw_mtu) - ETH_FCS_LEN * !params->scatter_fcs_en; 746 rq->xdpsq = &c->rq_xdpsq; 747 rq->stats = &c->priv->channel_stats[c->ix]->rq; 748 rq->ptp_cyc2time = mlx5_rq_ts_translator(mdev); 749 err = mlx5e_rq_set_handlers(rq, params, NULL); 750 if (err) 751 return err; 752 753 return __xdp_rxq_info_reg(&rq->xdp_rxq, rq->netdev, rq->ix, c->napi.napi_id, 754 xdp_frag_size); 755 } 756 757 static int mlx5e_rq_shampo_hd_info_alloc(struct mlx5e_rq *rq, u16 hd_per_wq, 758 int node) 759 { 760 struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo; 761 762 shampo->hd_per_wq = hd_per_wq; 763 764 shampo->bitmap = bitmap_zalloc_node(hd_per_wq, GFP_KERNEL, node); 765 shampo->pages = kvzalloc_node(array_size(hd_per_wq, 766 sizeof(*shampo->pages)), 767 GFP_KERNEL, node); 768 if (!shampo->bitmap || !shampo->pages) 769 goto err_nomem; 770 771 return 0; 772 773 err_nomem: 774 kvfree(shampo->pages); 775 bitmap_free(shampo->bitmap); 776 777 return -ENOMEM; 778 } 779 780 static void mlx5e_rq_shampo_hd_info_free(struct mlx5e_rq *rq) 781 { 782 kvfree(rq->mpwqe.shampo->pages); 783 bitmap_free(rq->mpwqe.shampo->bitmap); 784 } 785 786 static int mlx5_rq_shampo_alloc(struct mlx5_core_dev *mdev, 787 struct mlx5e_params *params, 788 struct mlx5e_rq_param *rqp, 789 struct mlx5e_rq *rq, 790 u32 *pool_size, 791 int node) 792 { 793 void *wqc = MLX5_ADDR_OF(rqc, rqp->rqc, wq); 794 u8 log_hd_per_page, log_hd_entry_size; 795 u16 hd_per_wq, hd_per_wqe; 796 u32 hd_pool_size; 797 int wq_size; 798 int err; 799 800 if (!test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state)) 801 return 0; 802 803 rq->mpwqe.shampo = kvzalloc_node(sizeof(*rq->mpwqe.shampo), 804 GFP_KERNEL, node); 805 if (!rq->mpwqe.shampo) 806 return -ENOMEM; 807 808 /* split headers data structures */ 809 hd_per_wq = mlx5e_shampo_hd_per_wq(mdev, params, rqp); 810 err = mlx5e_rq_shampo_hd_info_alloc(rq, hd_per_wq, node); 811 if (err) 812 goto err_shampo_hd_info_alloc; 813 814 err = mlx5e_create_rq_hd_umr_mkey(mdev, hd_per_wq, 815 &rq->mpwqe.shampo->mkey_be); 816 if (err) 817 goto err_umr_mkey; 818 819 hd_per_wqe = mlx5e_shampo_hd_per_wqe(mdev, params, rqp); 820 wq_size = BIT(MLX5_GET(wq, wqc, log_wq_sz)); 821 822 BUILD_BUG_ON(MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE > PAGE_SHIFT); 823 if (hd_per_wqe >= MLX5E_SHAMPO_WQ_HEADER_PER_PAGE) { 824 log_hd_per_page = MLX5E_SHAMPO_LOG_WQ_HEADER_PER_PAGE; 825 log_hd_entry_size = MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE; 826 } else { 827 log_hd_per_page = order_base_2(hd_per_wqe); 828 log_hd_entry_size = order_base_2(PAGE_SIZE / hd_per_wqe); 829 } 830 831 rq->mpwqe.shampo->hd_per_wqe = hd_per_wqe; 832 rq->mpwqe.shampo->hd_per_page = BIT(log_hd_per_page); 833 rq->mpwqe.shampo->log_hd_per_page = log_hd_per_page; 834 rq->mpwqe.shampo->log_hd_entry_size = log_hd_entry_size; 835 836 hd_pool_size = (hd_per_wqe * wq_size) >> log_hd_per_page; 837 838 if (netif_rxq_has_unreadable_mp(rq->netdev, rq->ix)) { 839 /* Separate page pool for shampo headers */ 840 struct page_pool_params pp_params = { }; 841 842 pp_params.order = 0; 843 pp_params.flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV; 844 pp_params.pool_size = hd_pool_size; 845 pp_params.nid = node; 846 pp_params.dev = rq->pdev; 847 pp_params.napi = rq->cq.napi; 848 pp_params.netdev = rq->netdev; 849 pp_params.dma_dir = rq->buff.map_dir; 850 pp_params.max_len = PAGE_SIZE; 851 852 rq->hd_page_pool = page_pool_create(&pp_params); 853 if (IS_ERR(rq->hd_page_pool)) { 854 err = PTR_ERR(rq->hd_page_pool); 855 rq->hd_page_pool = NULL; 856 goto err_hds_page_pool; 857 } 858 } else { 859 /* Common page pool, reserve space for headers. */ 860 *pool_size += hd_pool_size; 861 rq->hd_page_pool = NULL; 862 } 863 864 /* gro only data structures */ 865 rq->hw_gro_data = kvzalloc_node(sizeof(*rq->hw_gro_data), GFP_KERNEL, node); 866 if (!rq->hw_gro_data) { 867 err = -ENOMEM; 868 goto err_hw_gro_data; 869 } 870 871 return 0; 872 873 err_hw_gro_data: 874 page_pool_destroy(rq->hd_page_pool); 875 err_hds_page_pool: 876 mlx5_core_destroy_mkey(mdev, be32_to_cpu(rq->mpwqe.shampo->mkey_be)); 877 err_umr_mkey: 878 mlx5e_rq_shampo_hd_info_free(rq); 879 err_shampo_hd_info_alloc: 880 kvfree(rq->mpwqe.shampo); 881 return err; 882 } 883 884 static void mlx5e_rq_free_shampo(struct mlx5e_rq *rq) 885 { 886 if (!test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state)) 887 return; 888 889 kvfree(rq->hw_gro_data); 890 if (rq->hd_page_pool != rq->page_pool) 891 page_pool_destroy(rq->hd_page_pool); 892 mlx5e_rq_shampo_hd_info_free(rq); 893 mlx5_core_destroy_mkey(rq->mdev, 894 be32_to_cpu(rq->mpwqe.shampo->mkey_be)); 895 kvfree(rq->mpwqe.shampo); 896 } 897 898 static int mlx5e_alloc_rq(struct mlx5e_params *params, 899 struct mlx5e_xsk_param *xsk, 900 struct mlx5e_rq_param *rqp, 901 int node, struct mlx5e_rq *rq) 902 { 903 struct mlx5_core_dev *mdev = rq->mdev; 904 void *rqc = rqp->rqc; 905 void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq); 906 u32 pool_size; 907 int wq_sz; 908 int err; 909 int i; 910 911 rqp->wq.db_numa_node = node; 912 INIT_WORK(&rq->recover_work, mlx5e_rq_err_cqe_work); 913 INIT_WORK(&rq->rx_timeout_work, mlx5e_rq_timeout_work); 914 915 if (params->xdp_prog) 916 bpf_prog_inc(params->xdp_prog); 917 RCU_INIT_POINTER(rq->xdp_prog, params->xdp_prog); 918 919 rq->buff.map_dir = params->xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE; 920 rq->buff.headroom = mlx5e_get_rq_headroom(mdev, params, xsk); 921 pool_size = 1 << params->log_rq_mtu_frames; 922 923 rq->mkey_be = cpu_to_be32(mdev->mlx5e_res.hw_objs.mkey); 924 925 switch (rq->wq_type) { 926 case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: 927 err = mlx5_wq_ll_create(mdev, &rqp->wq, rqc_wq, &rq->mpwqe.wq, 928 &rq->wq_ctrl); 929 if (err) 930 goto err_rq_xdp_prog; 931 932 err = mlx5e_alloc_mpwqe_rq_drop_page(rq); 933 if (err) 934 goto err_rq_wq_destroy; 935 936 rq->mpwqe.wq.db = &rq->mpwqe.wq.db[MLX5_RCV_DBR]; 937 938 wq_sz = mlx5_wq_ll_get_size(&rq->mpwqe.wq); 939 940 rq->mpwqe.page_shift = mlx5e_mpwrq_page_shift(mdev, xsk); 941 rq->mpwqe.umr_mode = mlx5e_mpwrq_umr_mode(mdev, xsk); 942 rq->mpwqe.pages_per_wqe = 943 mlx5e_mpwrq_pages_per_wqe(mdev, rq->mpwqe.page_shift, 944 rq->mpwqe.umr_mode); 945 rq->mpwqe.umr_wqebbs = 946 mlx5e_mpwrq_umr_wqebbs(mdev, rq->mpwqe.page_shift, 947 rq->mpwqe.umr_mode); 948 rq->mpwqe.mtts_per_wqe = 949 mlx5e_mpwrq_mtts_per_wqe(mdev, rq->mpwqe.page_shift, 950 rq->mpwqe.umr_mode); 951 952 pool_size = rq->mpwqe.pages_per_wqe << 953 mlx5e_mpwqe_get_log_rq_size(mdev, params, xsk); 954 955 if (!mlx5e_rx_mpwqe_is_linear_skb(mdev, params, xsk) && params->xdp_prog) 956 pool_size *= 2; /* additional page per packet for the linear part */ 957 958 rq->mpwqe.log_stride_sz = mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk); 959 rq->mpwqe.num_strides = 960 BIT(mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk)); 961 rq->mpwqe.min_wqe_bulk = mlx5e_mpwqe_get_min_wqe_bulk(wq_sz); 962 963 rq->buff.frame0_sz = (1 << rq->mpwqe.log_stride_sz); 964 965 err = mlx5e_create_rq_umr_mkey(mdev, rq); 966 if (err) 967 goto err_rq_drop_page; 968 969 err = mlx5e_rq_alloc_mpwqe_info(rq, node); 970 if (err) 971 goto err_rq_mkey; 972 973 err = mlx5_rq_shampo_alloc(mdev, params, rqp, rq, &pool_size, node); 974 if (err) 975 goto err_free_mpwqe_info; 976 977 break; 978 default: /* MLX5_WQ_TYPE_CYCLIC */ 979 err = mlx5_wq_cyc_create(mdev, &rqp->wq, rqc_wq, &rq->wqe.wq, 980 &rq->wq_ctrl); 981 if (err) 982 goto err_rq_xdp_prog; 983 984 rq->wqe.wq.db = &rq->wqe.wq.db[MLX5_RCV_DBR]; 985 986 wq_sz = mlx5_wq_cyc_get_size(&rq->wqe.wq); 987 988 rq->wqe.info = rqp->frags_info; 989 rq->buff.frame0_sz = rq->wqe.info.arr[0].frag_stride; 990 991 err = mlx5e_init_wqe_alloc_info(rq, node); 992 if (err) 993 goto err_rq_wq_destroy; 994 } 995 996 if (xsk) { 997 err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq, 998 MEM_TYPE_XSK_BUFF_POOL, NULL); 999 if (err) 1000 goto err_free_by_rq_type; 1001 xsk_pool_set_rxq_info(rq->xsk_pool, &rq->xdp_rxq); 1002 } else { 1003 /* Create a page_pool and register it with rxq */ 1004 struct page_pool_params pp_params = { 0 }; 1005 1006 pp_params.order = 0; 1007 pp_params.flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV; 1008 pp_params.pool_size = pool_size; 1009 pp_params.nid = node; 1010 pp_params.dev = rq->pdev; 1011 pp_params.napi = rq->cq.napi; 1012 pp_params.netdev = rq->netdev; 1013 pp_params.dma_dir = rq->buff.map_dir; 1014 pp_params.max_len = PAGE_SIZE; 1015 pp_params.queue_idx = rq->ix; 1016 1017 /* Shampo header data split allow for unreadable netmem */ 1018 if (test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state)) 1019 pp_params.flags |= PP_FLAG_ALLOW_UNREADABLE_NETMEM; 1020 1021 /* page_pool can be used even when there is no rq->xdp_prog, 1022 * given page_pool does not handle DMA mapping there is no 1023 * required state to clear. And page_pool gracefully handle 1024 * elevated refcnt. 1025 */ 1026 rq->page_pool = page_pool_create(&pp_params); 1027 if (IS_ERR(rq->page_pool)) { 1028 err = PTR_ERR(rq->page_pool); 1029 rq->page_pool = NULL; 1030 goto err_free_by_rq_type; 1031 } 1032 if (!rq->hd_page_pool) 1033 rq->hd_page_pool = rq->page_pool; 1034 if (xdp_rxq_info_is_reg(&rq->xdp_rxq)) { 1035 err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq, 1036 MEM_TYPE_PAGE_POOL, rq->page_pool); 1037 if (err) 1038 goto err_destroy_page_pool; 1039 } 1040 } 1041 1042 for (i = 0; i < wq_sz; i++) { 1043 if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) { 1044 struct mlx5e_rx_wqe_ll *wqe = 1045 mlx5_wq_ll_get_wqe(&rq->mpwqe.wq, i); 1046 u32 byte_count = 1047 rq->mpwqe.num_strides << rq->mpwqe.log_stride_sz; 1048 u64 dma_offset = mul_u32_u32(i, rq->mpwqe.mtts_per_wqe) << 1049 rq->mpwqe.page_shift; 1050 u16 headroom = test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state) ? 1051 0 : rq->buff.headroom; 1052 1053 wqe->data[0].addr = cpu_to_be64(dma_offset + headroom); 1054 wqe->data[0].byte_count = cpu_to_be32(byte_count); 1055 wqe->data[0].lkey = rq->mpwqe.umr_mkey_be; 1056 } else { 1057 struct mlx5e_rx_wqe_cyc *wqe = 1058 mlx5_wq_cyc_get_wqe(&rq->wqe.wq, i); 1059 int f; 1060 1061 for (f = 0; f < rq->wqe.info.num_frags; f++) { 1062 u32 frag_size = rq->wqe.info.arr[f].frag_size | 1063 MLX5_HW_START_PADDING; 1064 1065 wqe->data[f].byte_count = cpu_to_be32(frag_size); 1066 wqe->data[f].lkey = rq->mkey_be; 1067 } 1068 /* check if num_frags is not a pow of two */ 1069 if (rq->wqe.info.num_frags < (1 << rq->wqe.info.log_num_frags)) { 1070 wqe->data[f].byte_count = 0; 1071 wqe->data[f].lkey = params->terminate_lkey_be; 1072 wqe->data[f].addr = 0; 1073 } 1074 } 1075 } 1076 1077 return 0; 1078 1079 err_destroy_page_pool: 1080 page_pool_destroy(rq->page_pool); 1081 err_free_by_rq_type: 1082 switch (rq->wq_type) { 1083 case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: 1084 mlx5e_rq_free_shampo(rq); 1085 err_free_mpwqe_info: 1086 kvfree(rq->mpwqe.info); 1087 err_rq_mkey: 1088 mlx5_core_destroy_mkey(mdev, be32_to_cpu(rq->mpwqe.umr_mkey_be)); 1089 err_rq_drop_page: 1090 mlx5e_free_mpwqe_rq_drop_page(rq); 1091 break; 1092 default: /* MLX5_WQ_TYPE_CYCLIC */ 1093 mlx5e_free_wqe_alloc_info(rq); 1094 } 1095 err_rq_wq_destroy: 1096 mlx5_wq_destroy(&rq->wq_ctrl); 1097 err_rq_xdp_prog: 1098 if (params->xdp_prog) 1099 bpf_prog_put(params->xdp_prog); 1100 1101 return err; 1102 } 1103 1104 static void mlx5e_free_rq(struct mlx5e_rq *rq) 1105 { 1106 kvfree(rq->dim); 1107 page_pool_destroy(rq->page_pool); 1108 1109 switch (rq->wq_type) { 1110 case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: 1111 mlx5e_rq_free_shampo(rq); 1112 kvfree(rq->mpwqe.info); 1113 mlx5_core_destroy_mkey(rq->mdev, be32_to_cpu(rq->mpwqe.umr_mkey_be)); 1114 mlx5e_free_mpwqe_rq_drop_page(rq); 1115 break; 1116 default: /* MLX5_WQ_TYPE_CYCLIC */ 1117 mlx5e_free_wqe_alloc_info(rq); 1118 } 1119 1120 mlx5_wq_destroy(&rq->wq_ctrl); 1121 1122 if (xdp_rxq_info_is_reg(&rq->xdp_rxq)) { 1123 struct bpf_prog *old_prog; 1124 1125 old_prog = rcu_dereference_protected(rq->xdp_prog, 1126 lockdep_is_held(&rq->priv->state_lock)); 1127 if (old_prog) 1128 bpf_prog_put(old_prog); 1129 } 1130 xdp_rxq_info_unreg(&rq->xdp_rxq); 1131 } 1132 1133 int mlx5e_create_rq(struct mlx5e_rq *rq, struct mlx5e_rq_param *param, u16 q_counter) 1134 { 1135 struct mlx5_core_dev *mdev = rq->mdev; 1136 u8 ts_format; 1137 void *in; 1138 void *rqc; 1139 void *wq; 1140 int inlen; 1141 int err; 1142 1143 inlen = MLX5_ST_SZ_BYTES(create_rq_in) + 1144 sizeof(u64) * rq->wq_ctrl.buf.npages; 1145 in = kvzalloc(inlen, GFP_KERNEL); 1146 if (!in) 1147 return -ENOMEM; 1148 1149 ts_format = mlx5_is_real_time_rq(mdev) ? 1150 MLX5_TIMESTAMP_FORMAT_REAL_TIME : 1151 MLX5_TIMESTAMP_FORMAT_FREE_RUNNING; 1152 rqc = MLX5_ADDR_OF(create_rq_in, in, ctx); 1153 wq = MLX5_ADDR_OF(rqc, rqc, wq); 1154 1155 memcpy(rqc, param->rqc, sizeof(param->rqc)); 1156 1157 MLX5_SET(rqc, rqc, cqn, rq->cq.mcq.cqn); 1158 MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RST); 1159 MLX5_SET(rqc, rqc, ts_format, ts_format); 1160 MLX5_SET(rqc, rqc, counter_set_id, q_counter); 1161 MLX5_SET(wq, wq, log_wq_pg_sz, rq->wq_ctrl.buf.page_shift - 1162 MLX5_ADAPTER_PAGE_SHIFT); 1163 MLX5_SET64(wq, wq, dbr_addr, rq->wq_ctrl.db.dma); 1164 1165 if (test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state)) { 1166 MLX5_SET(wq, wq, log_headers_buffer_entry_num, 1167 order_base_2(rq->mpwqe.shampo->hd_per_wq)); 1168 MLX5_SET(wq, wq, headers_mkey, 1169 be32_to_cpu(rq->mpwqe.shampo->mkey_be)); 1170 } 1171 1172 mlx5_fill_page_frag_array(&rq->wq_ctrl.buf, 1173 (__be64 *)MLX5_ADDR_OF(wq, wq, pas)); 1174 1175 err = mlx5_core_create_rq(mdev, in, inlen, &rq->rqn); 1176 1177 kvfree(in); 1178 1179 return err; 1180 } 1181 1182 static int mlx5e_modify_rq_state(struct mlx5e_rq *rq, int curr_state, int next_state) 1183 { 1184 struct mlx5_core_dev *mdev = rq->mdev; 1185 1186 void *in; 1187 void *rqc; 1188 int inlen; 1189 int err; 1190 1191 inlen = MLX5_ST_SZ_BYTES(modify_rq_in); 1192 in = kvzalloc(inlen, GFP_KERNEL); 1193 if (!in) 1194 return -ENOMEM; 1195 1196 if (curr_state == MLX5_RQC_STATE_RST && next_state == MLX5_RQC_STATE_RDY) 1197 mlx5e_rqwq_reset(rq); 1198 1199 rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx); 1200 1201 MLX5_SET(modify_rq_in, in, rq_state, curr_state); 1202 MLX5_SET(rqc, rqc, state, next_state); 1203 1204 err = mlx5_core_modify_rq(mdev, rq->rqn, in); 1205 1206 kvfree(in); 1207 1208 return err; 1209 } 1210 1211 static void mlx5e_flush_rq_cq(struct mlx5e_rq *rq) 1212 { 1213 struct mlx5_cqwq *cqwq = &rq->cq.wq; 1214 struct mlx5_cqe64 *cqe; 1215 1216 if (test_bit(MLX5E_RQ_STATE_MINI_CQE_ENHANCED, &rq->state)) { 1217 while ((cqe = mlx5_cqwq_get_cqe_enhanced_comp(cqwq))) 1218 mlx5_cqwq_pop(cqwq); 1219 } else { 1220 while ((cqe = mlx5_cqwq_get_cqe(cqwq))) 1221 mlx5_cqwq_pop(cqwq); 1222 } 1223 1224 mlx5_cqwq_update_db_record(cqwq); 1225 } 1226 1227 int mlx5e_flush_rq(struct mlx5e_rq *rq, int curr_state) 1228 { 1229 struct net_device *dev = rq->netdev; 1230 int err; 1231 1232 err = mlx5e_modify_rq_state(rq, curr_state, MLX5_RQC_STATE_RST); 1233 if (err) { 1234 netdev_err(dev, "Failed to move rq 0x%x to reset\n", rq->rqn); 1235 return err; 1236 } 1237 1238 mlx5e_free_rx_descs(rq); 1239 mlx5e_flush_rq_cq(rq); 1240 1241 err = mlx5e_modify_rq_state(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY); 1242 if (err) { 1243 netdev_err(dev, "Failed to move rq 0x%x to ready\n", rq->rqn); 1244 return err; 1245 } 1246 1247 return 0; 1248 } 1249 1250 static int mlx5e_modify_rq_vsd(struct mlx5e_rq *rq, bool vsd) 1251 { 1252 struct mlx5_core_dev *mdev = rq->mdev; 1253 void *in; 1254 void *rqc; 1255 int inlen; 1256 int err; 1257 1258 inlen = MLX5_ST_SZ_BYTES(modify_rq_in); 1259 in = kvzalloc(inlen, GFP_KERNEL); 1260 if (!in) 1261 return -ENOMEM; 1262 1263 rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx); 1264 1265 MLX5_SET(modify_rq_in, in, rq_state, MLX5_RQC_STATE_RDY); 1266 MLX5_SET64(modify_rq_in, in, modify_bitmask, 1267 MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_VSD); 1268 MLX5_SET(rqc, rqc, vsd, vsd); 1269 MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RDY); 1270 1271 err = mlx5_core_modify_rq(mdev, rq->rqn, in); 1272 1273 kvfree(in); 1274 1275 return err; 1276 } 1277 1278 void mlx5e_destroy_rq(struct mlx5e_rq *rq) 1279 { 1280 mlx5_core_destroy_rq(rq->mdev, rq->rqn); 1281 } 1282 1283 int mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq, int wait_time) 1284 { 1285 unsigned long exp_time = jiffies + msecs_to_jiffies(wait_time); 1286 1287 u16 min_wqes = mlx5_min_rx_wqes(rq->wq_type, mlx5e_rqwq_get_size(rq)); 1288 1289 do { 1290 if (mlx5e_rqwq_get_cur_sz(rq) >= min_wqes) 1291 return 0; 1292 1293 msleep(20); 1294 } while (time_before(jiffies, exp_time)); 1295 1296 netdev_warn(rq->netdev, "Failed to get min RX wqes on Channel[%d] RQN[0x%x] wq cur_sz(%d) min_rx_wqes(%d)\n", 1297 rq->ix, rq->rqn, mlx5e_rqwq_get_cur_sz(rq), min_wqes); 1298 1299 queue_work(rq->priv->wq, &rq->rx_timeout_work); 1300 1301 return -ETIMEDOUT; 1302 } 1303 1304 void mlx5e_free_rx_missing_descs(struct mlx5e_rq *rq) 1305 { 1306 struct mlx5_wq_ll *wq; 1307 u16 head; 1308 int i; 1309 1310 if (rq->wq_type != MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) 1311 return; 1312 1313 wq = &rq->mpwqe.wq; 1314 head = wq->head; 1315 1316 /* Release WQEs that are in missing state: they have been 1317 * popped from the list after completion but were not freed 1318 * due to deferred release. 1319 * Also free the linked-list reserved entry, hence the "+ 1". 1320 */ 1321 for (i = 0; i < mlx5_wq_ll_missing(wq) + 1; i++) { 1322 rq->dealloc_wqe(rq, head); 1323 head = mlx5_wq_ll_get_wqe_next_ix(wq, head); 1324 } 1325 1326 rq->mpwqe.actual_wq_head = wq->head; 1327 rq->mpwqe.umr_in_progress = 0; 1328 rq->mpwqe.umr_completed = 0; 1329 1330 if (test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state)) { 1331 struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo; 1332 u16 len; 1333 1334 len = (shampo->pi - shampo->ci) & shampo->hd_per_wq; 1335 mlx5e_shampo_fill_umr(rq, len); 1336 } 1337 } 1338 1339 void mlx5e_free_rx_descs(struct mlx5e_rq *rq) 1340 { 1341 __be16 wqe_ix_be; 1342 u16 wqe_ix; 1343 1344 if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) { 1345 struct mlx5_wq_ll *wq = &rq->mpwqe.wq; 1346 1347 mlx5e_free_rx_missing_descs(rq); 1348 1349 while (!mlx5_wq_ll_is_empty(wq)) { 1350 struct mlx5e_rx_wqe_ll *wqe; 1351 1352 wqe_ix_be = *wq->tail_next; 1353 wqe_ix = be16_to_cpu(wqe_ix_be); 1354 wqe = mlx5_wq_ll_get_wqe(wq, wqe_ix); 1355 rq->dealloc_wqe(rq, wqe_ix); 1356 mlx5_wq_ll_pop(wq, wqe_ix_be, 1357 &wqe->next.next_wqe_index); 1358 } 1359 1360 if (test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state)) 1361 mlx5e_shampo_dealloc_hd(rq); 1362 } else { 1363 struct mlx5_wq_cyc *wq = &rq->wqe.wq; 1364 u16 missing = mlx5_wq_cyc_missing(wq); 1365 u16 head = mlx5_wq_cyc_get_head(wq); 1366 1367 while (!mlx5_wq_cyc_is_empty(wq)) { 1368 wqe_ix = mlx5_wq_cyc_get_tail(wq); 1369 rq->dealloc_wqe(rq, wqe_ix); 1370 mlx5_wq_cyc_pop(wq); 1371 } 1372 /* Missing slots might also contain unreleased pages due to 1373 * deferred release. 1374 */ 1375 while (missing--) { 1376 wqe_ix = mlx5_wq_cyc_ctr2ix(wq, head++); 1377 rq->dealloc_wqe(rq, wqe_ix); 1378 } 1379 } 1380 1381 } 1382 1383 int mlx5e_open_rq(struct mlx5e_params *params, struct mlx5e_rq_param *param, 1384 struct mlx5e_xsk_param *xsk, int node, u16 q_counter, 1385 struct mlx5e_rq *rq) 1386 { 1387 struct mlx5_core_dev *mdev = rq->mdev; 1388 int err; 1389 1390 if (params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO) 1391 __set_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state); 1392 1393 err = mlx5e_alloc_rq(params, xsk, param, node, rq); 1394 if (err) 1395 return err; 1396 1397 err = mlx5e_create_rq(rq, param, q_counter); 1398 if (err) 1399 goto err_free_rq; 1400 1401 err = mlx5e_modify_rq_state(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY); 1402 if (err) 1403 goto err_destroy_rq; 1404 1405 if (MLX5_CAP_ETH(mdev, cqe_checksum_full)) 1406 __set_bit(MLX5E_RQ_STATE_CSUM_FULL, &rq->state); 1407 1408 if (rq->channel && !params->rx_dim_enabled) { 1409 rq->channel->rx_cq_moder = params->rx_cq_moderation; 1410 } else if (rq->channel) { 1411 u8 cq_period_mode; 1412 1413 cq_period_mode = params->rx_moder_use_cqe_mode ? 1414 DIM_CQ_PERIOD_MODE_START_FROM_CQE : 1415 DIM_CQ_PERIOD_MODE_START_FROM_EQE; 1416 mlx5e_reset_rx_moderation(&rq->channel->rx_cq_moder, cq_period_mode, 1417 params->rx_dim_enabled); 1418 1419 err = mlx5e_dim_rx_change(rq, params->rx_dim_enabled); 1420 if (err) 1421 goto err_destroy_rq; 1422 } 1423 1424 /* We disable csum_complete when XDP is enabled since 1425 * XDP programs might manipulate packets which will render 1426 * skb->checksum incorrect. 1427 */ 1428 if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_NO_CSUM_COMPLETE) || params->xdp_prog) 1429 __set_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &rq->state); 1430 1431 /* For CQE compression on striding RQ, use stride index provided by 1432 * HW if capability is supported. 1433 */ 1434 if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ) && 1435 MLX5_CAP_GEN(mdev, mini_cqe_resp_stride_index)) 1436 __set_bit(MLX5E_RQ_STATE_MINI_CQE_HW_STRIDX, &rq->state); 1437 1438 /* For enhanced CQE compression packet processing. decompress 1439 * session according to the enhanced layout. 1440 */ 1441 if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS) && 1442 MLX5_CAP_GEN(mdev, enhanced_cqe_compression)) 1443 __set_bit(MLX5E_RQ_STATE_MINI_CQE_ENHANCED, &rq->state); 1444 1445 return 0; 1446 1447 err_destroy_rq: 1448 mlx5e_destroy_rq(rq); 1449 err_free_rq: 1450 mlx5e_free_rq(rq); 1451 1452 return err; 1453 } 1454 1455 void mlx5e_activate_rq(struct mlx5e_rq *rq) 1456 { 1457 set_bit(MLX5E_RQ_STATE_ENABLED, &rq->state); 1458 } 1459 1460 void mlx5e_deactivate_rq(struct mlx5e_rq *rq) 1461 { 1462 clear_bit(MLX5E_RQ_STATE_ENABLED, &rq->state); 1463 synchronize_net(); /* Sync with NAPI to prevent mlx5e_post_rx_wqes. */ 1464 } 1465 1466 void mlx5e_close_rq(struct mlx5e_rq *rq) 1467 { 1468 if (rq->dim) 1469 cancel_work_sync(&rq->dim->work); 1470 cancel_work_sync(&rq->recover_work); 1471 cancel_work_sync(&rq->rx_timeout_work); 1472 mlx5e_destroy_rq(rq); 1473 mlx5e_free_rx_descs(rq); 1474 mlx5e_free_rq(rq); 1475 } 1476 1477 u32 mlx5e_profile_get_tisn(struct mlx5_core_dev *mdev, 1478 struct mlx5e_priv *priv, 1479 const struct mlx5e_profile *profile, 1480 u8 lag_port, u8 tc) 1481 { 1482 if (profile->get_tisn) 1483 return profile->get_tisn(mdev, priv, lag_port, tc); 1484 1485 return mdev->mlx5e_res.hw_objs.tisn[lag_port][tc]; 1486 } 1487 1488 static void mlx5e_free_xdpsq_db(struct mlx5e_xdpsq *sq) 1489 { 1490 kvfree(sq->db.xdpi_fifo.xi); 1491 kvfree(sq->db.wqe_info); 1492 } 1493 1494 static int mlx5e_alloc_xdpsq_fifo(struct mlx5e_xdpsq *sq, int numa) 1495 { 1496 struct mlx5e_xdp_info_fifo *xdpi_fifo = &sq->db.xdpi_fifo; 1497 int wq_sz = mlx5_wq_cyc_get_size(&sq->wq); 1498 int entries; 1499 size_t size; 1500 1501 /* upper bound for maximum num of entries of all xmit_modes. */ 1502 entries = roundup_pow_of_two(wq_sz * MLX5_SEND_WQEBB_NUM_DS * 1503 MLX5E_XDP_FIFO_ENTRIES2DS_MAX_RATIO); 1504 1505 size = array_size(sizeof(*xdpi_fifo->xi), entries); 1506 xdpi_fifo->xi = kvzalloc_node(size, GFP_KERNEL, numa); 1507 if (!xdpi_fifo->xi) 1508 return -ENOMEM; 1509 1510 xdpi_fifo->pc = &sq->xdpi_fifo_pc; 1511 xdpi_fifo->cc = &sq->xdpi_fifo_cc; 1512 xdpi_fifo->mask = entries - 1; 1513 1514 return 0; 1515 } 1516 1517 static int mlx5e_alloc_xdpsq_db(struct mlx5e_xdpsq *sq, int numa) 1518 { 1519 int wq_sz = mlx5_wq_cyc_get_size(&sq->wq); 1520 size_t size; 1521 int err; 1522 1523 size = array_size(sizeof(*sq->db.wqe_info), wq_sz); 1524 sq->db.wqe_info = kvzalloc_node(size, GFP_KERNEL, numa); 1525 if (!sq->db.wqe_info) 1526 return -ENOMEM; 1527 1528 err = mlx5e_alloc_xdpsq_fifo(sq, numa); 1529 if (err) { 1530 mlx5e_free_xdpsq_db(sq); 1531 return err; 1532 } 1533 1534 return 0; 1535 } 1536 1537 static int mlx5e_alloc_xdpsq(struct mlx5e_channel *c, 1538 struct mlx5e_params *params, 1539 struct xsk_buff_pool *xsk_pool, 1540 struct mlx5e_sq_param *param, 1541 struct mlx5e_xdpsq *sq, 1542 bool is_redirect) 1543 { 1544 void *sqc_wq = MLX5_ADDR_OF(sqc, param->sqc, wq); 1545 struct mlx5_core_dev *mdev = c->mdev; 1546 struct mlx5_wq_cyc *wq = &sq->wq; 1547 int err; 1548 1549 sq->pdev = c->pdev; 1550 sq->mkey_be = c->mkey_be; 1551 sq->channel = c; 1552 sq->uar_map = c->bfreg->map; 1553 sq->min_inline_mode = params->tx_min_inline_mode; 1554 sq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu) - ETH_FCS_LEN; 1555 sq->xsk_pool = xsk_pool; 1556 1557 sq->stats = sq->xsk_pool ? 1558 &c->priv->channel_stats[c->ix]->xsksq : 1559 is_redirect ? 1560 &c->priv->channel_stats[c->ix]->xdpsq : 1561 &c->priv->channel_stats[c->ix]->rq_xdpsq; 1562 sq->stop_room = param->is_mpw ? mlx5e_stop_room_for_mpwqe(mdev) : 1563 mlx5e_stop_room_for_max_wqe(mdev); 1564 sq->max_sq_mpw_wqebbs = mlx5e_get_max_sq_aligned_wqebbs(mdev); 1565 1566 param->wq.db_numa_node = cpu_to_node(c->cpu); 1567 err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, wq, &sq->wq_ctrl); 1568 if (err) 1569 return err; 1570 wq->db = &wq->db[MLX5_SND_DBR]; 1571 1572 err = mlx5e_alloc_xdpsq_db(sq, cpu_to_node(c->cpu)); 1573 if (err) 1574 goto err_sq_wq_destroy; 1575 1576 return 0; 1577 1578 err_sq_wq_destroy: 1579 mlx5_wq_destroy(&sq->wq_ctrl); 1580 1581 return err; 1582 } 1583 1584 static void mlx5e_free_xdpsq(struct mlx5e_xdpsq *sq) 1585 { 1586 mlx5e_free_xdpsq_db(sq); 1587 mlx5_wq_destroy(&sq->wq_ctrl); 1588 } 1589 1590 static void mlx5e_free_icosq_db(struct mlx5e_icosq *sq) 1591 { 1592 kvfree(sq->db.wqe_info); 1593 } 1594 1595 static int mlx5e_alloc_icosq_db(struct mlx5e_icosq *sq, int numa) 1596 { 1597 int wq_sz = mlx5_wq_cyc_get_size(&sq->wq); 1598 size_t size; 1599 1600 size = array_size(wq_sz, sizeof(*sq->db.wqe_info)); 1601 sq->db.wqe_info = kvzalloc_node(size, GFP_KERNEL, numa); 1602 if (!sq->db.wqe_info) 1603 return -ENOMEM; 1604 1605 return 0; 1606 } 1607 1608 static void mlx5e_icosq_err_cqe_work(struct work_struct *recover_work) 1609 { 1610 struct mlx5e_icosq *sq = container_of(recover_work, struct mlx5e_icosq, 1611 recover_work); 1612 1613 mlx5e_reporter_icosq_cqe_err(sq); 1614 } 1615 1616 static void mlx5e_async_icosq_err_cqe_work(struct work_struct *recover_work) 1617 { 1618 struct mlx5e_icosq *sq = container_of(recover_work, struct mlx5e_icosq, 1619 recover_work); 1620 1621 /* Not implemented yet. */ 1622 1623 netdev_warn(sq->channel->netdev, "async_icosq recovery is not implemented\n"); 1624 } 1625 1626 static int mlx5e_alloc_icosq(struct mlx5e_channel *c, 1627 struct mlx5e_sq_param *param, 1628 struct mlx5e_icosq *sq, 1629 work_func_t recover_work_func) 1630 { 1631 void *sqc_wq = MLX5_ADDR_OF(sqc, param->sqc, wq); 1632 struct mlx5_core_dev *mdev = c->mdev; 1633 struct mlx5_wq_cyc *wq = &sq->wq; 1634 int err; 1635 1636 sq->channel = c; 1637 sq->uar_map = c->bfreg->map; 1638 sq->reserved_room = param->stop_room; 1639 1640 param->wq.db_numa_node = cpu_to_node(c->cpu); 1641 err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, wq, &sq->wq_ctrl); 1642 if (err) 1643 return err; 1644 wq->db = &wq->db[MLX5_SND_DBR]; 1645 1646 err = mlx5e_alloc_icosq_db(sq, cpu_to_node(c->cpu)); 1647 if (err) 1648 goto err_sq_wq_destroy; 1649 1650 INIT_WORK(&sq->recover_work, recover_work_func); 1651 1652 return 0; 1653 1654 err_sq_wq_destroy: 1655 mlx5_wq_destroy(&sq->wq_ctrl); 1656 1657 return err; 1658 } 1659 1660 static void mlx5e_free_icosq(struct mlx5e_icosq *sq) 1661 { 1662 mlx5e_free_icosq_db(sq); 1663 mlx5_wq_destroy(&sq->wq_ctrl); 1664 } 1665 1666 void mlx5e_free_txqsq_db(struct mlx5e_txqsq *sq) 1667 { 1668 kvfree(sq->db.wqe_info); 1669 kvfree(sq->db.skb_fifo.fifo); 1670 kvfree(sq->db.dma_fifo); 1671 } 1672 1673 int mlx5e_alloc_txqsq_db(struct mlx5e_txqsq *sq, int numa) 1674 { 1675 int wq_sz = mlx5_wq_cyc_get_size(&sq->wq); 1676 int df_sz = wq_sz * MLX5_SEND_WQEBB_NUM_DS; 1677 1678 sq->db.dma_fifo = kvzalloc_node(array_size(df_sz, 1679 sizeof(*sq->db.dma_fifo)), 1680 GFP_KERNEL, numa); 1681 sq->db.skb_fifo.fifo = kvzalloc_node(array_size(df_sz, 1682 sizeof(*sq->db.skb_fifo.fifo)), 1683 GFP_KERNEL, numa); 1684 sq->db.wqe_info = kvzalloc_node(array_size(wq_sz, 1685 sizeof(*sq->db.wqe_info)), 1686 GFP_KERNEL, numa); 1687 if (!sq->db.dma_fifo || !sq->db.skb_fifo.fifo || !sq->db.wqe_info) { 1688 mlx5e_free_txqsq_db(sq); 1689 return -ENOMEM; 1690 } 1691 1692 sq->dma_fifo_mask = df_sz - 1; 1693 1694 sq->db.skb_fifo.pc = &sq->skb_fifo_pc; 1695 sq->db.skb_fifo.cc = &sq->skb_fifo_cc; 1696 sq->db.skb_fifo.mask = df_sz - 1; 1697 1698 return 0; 1699 } 1700 1701 static int mlx5e_alloc_txqsq(struct mlx5e_channel *c, 1702 int txq_ix, 1703 struct mlx5e_params *params, 1704 struct mlx5e_sq_param *param, 1705 struct mlx5e_txqsq *sq, 1706 int tc) 1707 { 1708 void *sqc_wq = MLX5_ADDR_OF(sqc, param->sqc, wq); 1709 struct mlx5_core_dev *mdev = c->mdev; 1710 struct mlx5_wq_cyc *wq = &sq->wq; 1711 int err; 1712 1713 sq->pdev = c->pdev; 1714 sq->clock = mdev->clock; 1715 sq->mkey_be = c->mkey_be; 1716 sq->netdev = c->netdev; 1717 sq->mdev = c->mdev; 1718 sq->channel = c; 1719 sq->priv = c->priv; 1720 sq->ch_ix = c->ix; 1721 sq->txq_ix = txq_ix; 1722 sq->uar_map = c->bfreg->map; 1723 sq->min_inline_mode = params->tx_min_inline_mode; 1724 sq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); 1725 sq->max_sq_mpw_wqebbs = mlx5e_get_max_sq_aligned_wqebbs(mdev); 1726 INIT_WORK(&sq->recover_work, mlx5e_tx_err_cqe_work); 1727 if (mlx5_ipsec_device_caps(c->priv->mdev)) 1728 set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state); 1729 if (param->is_mpw) 1730 set_bit(MLX5E_SQ_STATE_MPWQE, &sq->state); 1731 sq->stop_room = param->stop_room; 1732 sq->ptp_cyc2time = mlx5_sq_ts_translator(mdev); 1733 1734 param->wq.db_numa_node = cpu_to_node(c->cpu); 1735 err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, wq, &sq->wq_ctrl); 1736 if (err) 1737 return err; 1738 wq->db = &wq->db[MLX5_SND_DBR]; 1739 1740 err = mlx5e_alloc_txqsq_db(sq, cpu_to_node(c->cpu)); 1741 if (err) 1742 goto err_sq_wq_destroy; 1743 1744 return 0; 1745 1746 err_sq_wq_destroy: 1747 mlx5_wq_destroy(&sq->wq_ctrl); 1748 1749 return err; 1750 } 1751 1752 void mlx5e_free_txqsq(struct mlx5e_txqsq *sq) 1753 { 1754 kvfree(sq->dim); 1755 mlx5e_free_txqsq_db(sq); 1756 mlx5_wq_destroy(&sq->wq_ctrl); 1757 } 1758 1759 static int mlx5e_create_sq(struct mlx5_core_dev *mdev, 1760 struct mlx5e_sq_param *param, 1761 struct mlx5e_create_sq_param *csp, 1762 u32 *sqn) 1763 { 1764 u8 ts_format; 1765 void *in; 1766 void *sqc; 1767 void *wq; 1768 int inlen; 1769 int err; 1770 1771 inlen = MLX5_ST_SZ_BYTES(create_sq_in) + 1772 sizeof(u64) * csp->wq_ctrl->buf.npages; 1773 in = kvzalloc(inlen, GFP_KERNEL); 1774 if (!in) 1775 return -ENOMEM; 1776 1777 ts_format = mlx5_is_real_time_sq(mdev) ? 1778 MLX5_TIMESTAMP_FORMAT_REAL_TIME : 1779 MLX5_TIMESTAMP_FORMAT_FREE_RUNNING; 1780 sqc = MLX5_ADDR_OF(create_sq_in, in, ctx); 1781 wq = MLX5_ADDR_OF(sqc, sqc, wq); 1782 1783 memcpy(sqc, param->sqc, sizeof(param->sqc)); 1784 MLX5_SET(sqc, sqc, tis_lst_sz, csp->tis_lst_sz); 1785 MLX5_SET(sqc, sqc, tis_num_0, csp->tisn); 1786 MLX5_SET(sqc, sqc, cqn, csp->cqn); 1787 MLX5_SET(sqc, sqc, ts_cqe_to_dest_cqn, csp->ts_cqe_to_dest_cqn); 1788 MLX5_SET(sqc, sqc, ts_format, ts_format); 1789 1790 1791 if (MLX5_CAP_ETH(mdev, wqe_inline_mode) == MLX5_CAP_INLINE_MODE_VPORT_CONTEXT) 1792 MLX5_SET(sqc, sqc, min_wqe_inline_mode, csp->min_inline_mode); 1793 1794 MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST); 1795 MLX5_SET(sqc, sqc, flush_in_error_en, 1); 1796 1797 MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC); 1798 MLX5_SET(wq, wq, uar_page, csp->uar_page); 1799 MLX5_SET(wq, wq, log_wq_pg_sz, csp->wq_ctrl->buf.page_shift - 1800 MLX5_ADAPTER_PAGE_SHIFT); 1801 MLX5_SET64(wq, wq, dbr_addr, csp->wq_ctrl->db.dma); 1802 1803 mlx5_fill_page_frag_array(&csp->wq_ctrl->buf, 1804 (__be64 *)MLX5_ADDR_OF(wq, wq, pas)); 1805 1806 err = mlx5_core_create_sq(mdev, in, inlen, sqn); 1807 1808 kvfree(in); 1809 1810 return err; 1811 } 1812 1813 int mlx5e_modify_sq(struct mlx5_core_dev *mdev, u32 sqn, 1814 struct mlx5e_modify_sq_param *p) 1815 { 1816 u64 bitmask = 0; 1817 void *in; 1818 void *sqc; 1819 int inlen; 1820 int err; 1821 1822 inlen = MLX5_ST_SZ_BYTES(modify_sq_in); 1823 in = kvzalloc(inlen, GFP_KERNEL); 1824 if (!in) 1825 return -ENOMEM; 1826 1827 sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx); 1828 1829 MLX5_SET(modify_sq_in, in, sq_state, p->curr_state); 1830 MLX5_SET(sqc, sqc, state, p->next_state); 1831 if (p->rl_update && p->next_state == MLX5_SQC_STATE_RDY) { 1832 bitmask |= 1; 1833 MLX5_SET(sqc, sqc, packet_pacing_rate_limit_index, p->rl_index); 1834 } 1835 if (p->qos_update && p->next_state == MLX5_SQC_STATE_RDY) { 1836 bitmask |= 1 << 2; 1837 MLX5_SET(sqc, sqc, qos_queue_group_id, p->qos_queue_group_id); 1838 } 1839 MLX5_SET64(modify_sq_in, in, modify_bitmask, bitmask); 1840 1841 err = mlx5_core_modify_sq(mdev, sqn, in); 1842 1843 kvfree(in); 1844 1845 return err; 1846 } 1847 1848 static void mlx5e_destroy_sq(struct mlx5_core_dev *mdev, u32 sqn) 1849 { 1850 mlx5_core_destroy_sq(mdev, sqn); 1851 } 1852 1853 int mlx5e_create_sq_rdy(struct mlx5_core_dev *mdev, 1854 struct mlx5e_sq_param *param, 1855 struct mlx5e_create_sq_param *csp, 1856 u16 qos_queue_group_id, 1857 u32 *sqn) 1858 { 1859 struct mlx5e_modify_sq_param msp = {0}; 1860 int err; 1861 1862 err = mlx5e_create_sq(mdev, param, csp, sqn); 1863 if (err) 1864 return err; 1865 1866 msp.curr_state = MLX5_SQC_STATE_RST; 1867 msp.next_state = MLX5_SQC_STATE_RDY; 1868 if (qos_queue_group_id) { 1869 msp.qos_update = true; 1870 msp.qos_queue_group_id = qos_queue_group_id; 1871 } 1872 err = mlx5e_modify_sq(mdev, *sqn, &msp); 1873 if (err) 1874 mlx5e_destroy_sq(mdev, *sqn); 1875 1876 return err; 1877 } 1878 1879 static int mlx5e_set_sq_maxrate(struct net_device *dev, 1880 struct mlx5e_txqsq *sq, u32 rate); 1881 1882 int mlx5e_open_txqsq(struct mlx5e_channel *c, u32 tisn, int txq_ix, 1883 struct mlx5e_params *params, struct mlx5e_sq_param *param, 1884 struct mlx5e_txqsq *sq, int tc, u16 qos_queue_group_id, 1885 struct mlx5e_sq_stats *sq_stats) 1886 { 1887 struct mlx5e_create_sq_param csp = {}; 1888 u32 tx_rate; 1889 int err; 1890 1891 err = mlx5e_alloc_txqsq(c, txq_ix, params, param, sq, tc); 1892 if (err) 1893 return err; 1894 1895 sq->stats = sq_stats; 1896 1897 csp.tisn = tisn; 1898 csp.tis_lst_sz = 1; 1899 csp.cqn = sq->cq.mcq.cqn; 1900 csp.wq_ctrl = &sq->wq_ctrl; 1901 csp.min_inline_mode = sq->min_inline_mode; 1902 csp.uar_page = c->bfreg->index; 1903 err = mlx5e_create_sq_rdy(c->mdev, param, &csp, qos_queue_group_id, &sq->sqn); 1904 if (err) 1905 goto err_free_txqsq; 1906 1907 tx_rate = c->priv->tx_rates[sq->txq_ix]; 1908 if (tx_rate) 1909 mlx5e_set_sq_maxrate(c->netdev, sq, tx_rate); 1910 1911 if (sq->channel && !params->tx_dim_enabled) { 1912 sq->channel->tx_cq_moder = params->tx_cq_moderation; 1913 } else if (sq->channel) { 1914 u8 cq_period_mode; 1915 1916 cq_period_mode = params->tx_moder_use_cqe_mode ? 1917 DIM_CQ_PERIOD_MODE_START_FROM_CQE : 1918 DIM_CQ_PERIOD_MODE_START_FROM_EQE; 1919 mlx5e_reset_tx_moderation(&sq->channel->tx_cq_moder, 1920 cq_period_mode, 1921 params->tx_dim_enabled); 1922 1923 err = mlx5e_dim_tx_change(sq, params->tx_dim_enabled); 1924 if (err) 1925 goto err_destroy_sq; 1926 } 1927 1928 return 0; 1929 1930 err_destroy_sq: 1931 mlx5e_destroy_sq(c->mdev, sq->sqn); 1932 err_free_txqsq: 1933 mlx5e_free_txqsq(sq); 1934 1935 return err; 1936 } 1937 1938 void mlx5e_activate_txqsq(struct mlx5e_txqsq *sq) 1939 { 1940 sq->txq = netdev_get_tx_queue(sq->netdev, sq->txq_ix); 1941 set_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); 1942 netdev_tx_reset_queue(sq->txq); 1943 netif_tx_start_queue(sq->txq); 1944 netif_queue_set_napi(sq->netdev, sq->txq_ix, NETDEV_QUEUE_TYPE_TX, sq->cq.napi); 1945 } 1946 1947 void mlx5e_tx_disable_queue(struct netdev_queue *txq) 1948 { 1949 __netif_tx_lock_bh(txq); 1950 netif_tx_stop_queue(txq); 1951 __netif_tx_unlock_bh(txq); 1952 } 1953 1954 void mlx5e_deactivate_txqsq(struct mlx5e_txqsq *sq) 1955 { 1956 struct mlx5_wq_cyc *wq = &sq->wq; 1957 1958 netif_queue_set_napi(sq->netdev, sq->txq_ix, NETDEV_QUEUE_TYPE_TX, NULL); 1959 clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); 1960 synchronize_net(); /* Sync with NAPI to prevent netif_tx_wake_queue. */ 1961 1962 mlx5e_tx_disable_queue(sq->txq); 1963 1964 /* last doorbell out, godspeed .. */ 1965 if (mlx5e_wqc_has_room_for(wq, sq->cc, sq->pc, 1)) { 1966 u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); 1967 struct mlx5e_tx_wqe *nop; 1968 1969 sq->db.wqe_info[pi] = (struct mlx5e_tx_wqe_info) { 1970 .num_wqebbs = 1, 1971 }; 1972 1973 nop = mlx5e_post_nop(wq, sq->sqn, &sq->pc); 1974 mlx5e_notify_hw(wq, sq->pc, sq->uar_map, &nop->ctrl); 1975 } 1976 } 1977 1978 void mlx5e_close_txqsq(struct mlx5e_txqsq *sq) 1979 { 1980 struct mlx5_core_dev *mdev = sq->mdev; 1981 struct mlx5_rate_limit rl = {0}; 1982 1983 if (sq->dim) 1984 cancel_work_sync(&sq->dim->work); 1985 cancel_work_sync(&sq->recover_work); 1986 mlx5e_destroy_sq(mdev, sq->sqn); 1987 if (sq->rate_limit) { 1988 rl.rate = sq->rate_limit; 1989 mlx5_rl_remove_rate(mdev, &rl); 1990 } 1991 mlx5e_free_txqsq_descs(sq); 1992 mlx5e_free_txqsq(sq); 1993 } 1994 1995 void mlx5e_tx_err_cqe_work(struct work_struct *recover_work) 1996 { 1997 struct mlx5e_txqsq *sq = container_of(recover_work, struct mlx5e_txqsq, 1998 recover_work); 1999 2000 /* Recovering queues means re-enabling NAPI, which requires the netdev 2001 * instance lock. However, SQ closing flows have to wait for work tasks 2002 * to finish while also holding the netdev instance lock. So either get 2003 * the lock or find that the SQ is no longer enabled and thus this work 2004 * is not relevant anymore. 2005 */ 2006 while (!netdev_trylock(sq->netdev)) { 2007 if (!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state)) 2008 return; 2009 msleep(20); 2010 } 2011 2012 mlx5e_reporter_tx_err_cqe(sq); 2013 netdev_unlock(sq->netdev); 2014 } 2015 2016 static struct dim_cq_moder mlx5e_get_def_tx_moderation(u8 cq_period_mode) 2017 { 2018 return (struct dim_cq_moder) { 2019 .cq_period_mode = cq_period_mode, 2020 .pkts = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS, 2021 .usec = cq_period_mode == DIM_CQ_PERIOD_MODE_START_FROM_CQE ? 2022 MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC_FROM_CQE : 2023 MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC, 2024 }; 2025 } 2026 2027 bool mlx5e_reset_tx_moderation(struct dim_cq_moder *cq_moder, u8 cq_period_mode, 2028 bool dim_enabled) 2029 { 2030 bool reset_needed = cq_moder->cq_period_mode != cq_period_mode; 2031 2032 if (dim_enabled) 2033 *cq_moder = net_dim_get_def_tx_moderation(cq_period_mode); 2034 else 2035 *cq_moder = mlx5e_get_def_tx_moderation(cq_period_mode); 2036 2037 return reset_needed; 2038 } 2039 2040 bool mlx5e_reset_tx_channels_moderation(struct mlx5e_channels *chs, u8 cq_period_mode, 2041 bool dim_enabled, bool keep_dim_state) 2042 { 2043 bool reset = false; 2044 int i, tc; 2045 2046 for (i = 0; i < chs->num; i++) { 2047 for (tc = 0; tc < mlx5e_get_dcb_num_tc(&chs->params); tc++) { 2048 if (keep_dim_state) 2049 dim_enabled = !!chs->c[i]->sq[tc].dim; 2050 2051 reset |= mlx5e_reset_tx_moderation(&chs->c[i]->tx_cq_moder, 2052 cq_period_mode, dim_enabled); 2053 } 2054 } 2055 2056 return reset; 2057 } 2058 2059 static int mlx5e_open_icosq(struct mlx5e_channel *c, struct mlx5e_params *params, 2060 struct mlx5e_sq_param *param, struct mlx5e_icosq *sq, 2061 work_func_t recover_work_func) 2062 { 2063 struct mlx5e_create_sq_param csp = {}; 2064 int err; 2065 2066 err = mlx5e_alloc_icosq(c, param, sq, recover_work_func); 2067 if (err) 2068 return err; 2069 2070 csp.cqn = sq->cq.mcq.cqn; 2071 csp.wq_ctrl = &sq->wq_ctrl; 2072 csp.min_inline_mode = params->tx_min_inline_mode; 2073 csp.uar_page = c->bfreg->index; 2074 err = mlx5e_create_sq_rdy(c->mdev, param, &csp, 0, &sq->sqn); 2075 if (err) 2076 goto err_free_icosq; 2077 2078 if (param->is_tls) { 2079 sq->ktls_resync = mlx5e_ktls_rx_resync_create_resp_list(); 2080 if (IS_ERR(sq->ktls_resync)) { 2081 err = PTR_ERR(sq->ktls_resync); 2082 goto err_destroy_icosq; 2083 } 2084 } 2085 return 0; 2086 2087 err_destroy_icosq: 2088 mlx5e_destroy_sq(c->mdev, sq->sqn); 2089 err_free_icosq: 2090 mlx5e_free_icosq(sq); 2091 2092 return err; 2093 } 2094 2095 void mlx5e_activate_icosq(struct mlx5e_icosq *icosq) 2096 { 2097 set_bit(MLX5E_SQ_STATE_ENABLED, &icosq->state); 2098 } 2099 2100 void mlx5e_deactivate_icosq(struct mlx5e_icosq *icosq) 2101 { 2102 clear_bit(MLX5E_SQ_STATE_ENABLED, &icosq->state); 2103 synchronize_net(); /* Sync with NAPI. */ 2104 } 2105 2106 static void mlx5e_close_icosq(struct mlx5e_icosq *sq) 2107 { 2108 struct mlx5e_channel *c = sq->channel; 2109 2110 if (sq->ktls_resync) 2111 mlx5e_ktls_rx_resync_destroy_resp_list(sq->ktls_resync); 2112 mlx5e_destroy_sq(c->mdev, sq->sqn); 2113 mlx5e_free_icosq_descs(sq); 2114 mlx5e_free_icosq(sq); 2115 } 2116 2117 int mlx5e_open_xdpsq(struct mlx5e_channel *c, struct mlx5e_params *params, 2118 struct mlx5e_sq_param *param, struct xsk_buff_pool *xsk_pool, 2119 struct mlx5e_xdpsq *sq, bool is_redirect) 2120 { 2121 struct mlx5e_create_sq_param csp = {}; 2122 int err; 2123 2124 err = mlx5e_alloc_xdpsq(c, params, xsk_pool, param, sq, is_redirect); 2125 if (err) 2126 return err; 2127 2128 csp.tis_lst_sz = 1; 2129 csp.tisn = mlx5e_profile_get_tisn(c->mdev, c->priv, c->priv->profile, 2130 c->lag_port, 0); /* tc = 0 */ 2131 csp.cqn = sq->cq.mcq.cqn; 2132 csp.wq_ctrl = &sq->wq_ctrl; 2133 csp.min_inline_mode = sq->min_inline_mode; 2134 csp.uar_page = c->bfreg->index; 2135 set_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); 2136 2137 err = mlx5e_create_sq_rdy(c->mdev, param, &csp, 0, &sq->sqn); 2138 if (err) 2139 goto err_free_xdpsq; 2140 2141 mlx5e_set_xmit_fp(sq, param->is_mpw); 2142 2143 return 0; 2144 2145 err_free_xdpsq: 2146 clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); 2147 mlx5e_free_xdpsq(sq); 2148 2149 return err; 2150 } 2151 2152 void mlx5e_close_xdpsq(struct mlx5e_xdpsq *sq) 2153 { 2154 struct mlx5e_channel *c = sq->channel; 2155 2156 clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); 2157 synchronize_net(); /* Sync with NAPI. */ 2158 2159 mlx5e_destroy_sq(c->mdev, sq->sqn); 2160 mlx5e_free_xdpsq_descs(sq); 2161 mlx5e_free_xdpsq(sq); 2162 } 2163 2164 static struct mlx5e_xdpsq *mlx5e_open_xdpredirect_sq(struct mlx5e_channel *c, 2165 struct mlx5e_params *params, 2166 struct mlx5e_channel_param *cparam, 2167 struct mlx5e_create_cq_param *ccp) 2168 { 2169 struct mlx5e_xdpsq *xdpsq; 2170 int err; 2171 2172 xdpsq = kvzalloc_node(sizeof(*xdpsq), GFP_KERNEL, cpu_to_node(c->cpu)); 2173 if (!xdpsq) 2174 return ERR_PTR(-ENOMEM); 2175 2176 err = mlx5e_open_cq(c->mdev, params->tx_cq_moderation, 2177 &cparam->xdp_sq.cqp, ccp, &xdpsq->cq); 2178 if (err) 2179 goto err_free_xdpsq; 2180 2181 err = mlx5e_open_xdpsq(c, params, &cparam->xdp_sq, NULL, xdpsq, true); 2182 if (err) 2183 goto err_close_xdpsq_cq; 2184 2185 return xdpsq; 2186 2187 err_close_xdpsq_cq: 2188 mlx5e_close_cq(&xdpsq->cq); 2189 err_free_xdpsq: 2190 kvfree(xdpsq); 2191 2192 return ERR_PTR(err); 2193 } 2194 2195 static void mlx5e_close_xdpredirect_sq(struct mlx5e_xdpsq *xdpsq) 2196 { 2197 mlx5e_close_xdpsq(xdpsq); 2198 mlx5e_close_cq(&xdpsq->cq); 2199 kvfree(xdpsq); 2200 } 2201 2202 static int mlx5e_alloc_cq_common(struct mlx5_core_dev *mdev, 2203 struct net_device *netdev, 2204 struct workqueue_struct *workqueue, 2205 struct mlx5_uars_page *uar, 2206 struct mlx5e_cq_param *param, 2207 struct mlx5e_cq *cq) 2208 { 2209 struct mlx5_core_cq *mcq = &cq->mcq; 2210 int err; 2211 u32 i; 2212 2213 err = mlx5_cqwq_create(mdev, ¶m->wq, param->cqc, &cq->wq, 2214 &cq->wq_ctrl); 2215 if (err) 2216 return err; 2217 2218 mcq->cqe_sz = 64; 2219 mcq->set_ci_db = cq->wq_ctrl.db.db; 2220 mcq->arm_db = cq->wq_ctrl.db.db + 1; 2221 *mcq->set_ci_db = 0; 2222 mcq->vector = param->eq_ix; 2223 mcq->comp = mlx5e_completion_event; 2224 mcq->event = mlx5e_cq_error_event; 2225 2226 for (i = 0; i < mlx5_cqwq_get_size(&cq->wq); i++) { 2227 struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(&cq->wq, i); 2228 2229 cqe->op_own = 0xf1; 2230 cqe->validity_iteration_count = 0xff; 2231 } 2232 2233 cq->mdev = mdev; 2234 cq->netdev = netdev; 2235 cq->workqueue = workqueue; 2236 cq->uar = uar; 2237 2238 return 0; 2239 } 2240 2241 static int mlx5e_alloc_cq(struct mlx5_core_dev *mdev, 2242 struct mlx5e_cq_param *param, 2243 struct mlx5e_create_cq_param *ccp, 2244 struct mlx5e_cq *cq) 2245 { 2246 int err; 2247 2248 param->wq.buf_numa_node = ccp->node; 2249 param->wq.db_numa_node = ccp->node; 2250 param->eq_ix = ccp->ix; 2251 2252 err = mlx5e_alloc_cq_common(mdev, ccp->netdev, ccp->wq, 2253 ccp->uar, param, cq); 2254 2255 cq->napi = ccp->napi; 2256 cq->ch_stats = ccp->ch_stats; 2257 2258 return err; 2259 } 2260 2261 static void mlx5e_free_cq(struct mlx5e_cq *cq) 2262 { 2263 mlx5_wq_destroy(&cq->wq_ctrl); 2264 } 2265 2266 static int mlx5e_create_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param) 2267 { 2268 u32 out[MLX5_ST_SZ_DW(create_cq_out)]; 2269 struct mlx5_core_dev *mdev = cq->mdev; 2270 struct mlx5_core_cq *mcq = &cq->mcq; 2271 2272 void *in; 2273 void *cqc; 2274 int inlen; 2275 int eqn; 2276 int err; 2277 2278 err = mlx5_comp_eqn_get(mdev, param->eq_ix, &eqn); 2279 if (err) 2280 return err; 2281 2282 inlen = MLX5_ST_SZ_BYTES(create_cq_in) + 2283 sizeof(u64) * cq->wq_ctrl.buf.npages; 2284 in = kvzalloc(inlen, GFP_KERNEL); 2285 if (!in) 2286 return -ENOMEM; 2287 2288 cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context); 2289 2290 memcpy(cqc, param->cqc, sizeof(param->cqc)); 2291 2292 mlx5_fill_page_frag_array(&cq->wq_ctrl.buf, 2293 (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas)); 2294 2295 MLX5_SET(cqc, cqc, cq_period_mode, mlx5e_cq_period_mode(param->cq_period_mode)); 2296 2297 MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn); 2298 MLX5_SET(cqc, cqc, uar_page, cq->uar->index); 2299 MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift - 2300 MLX5_ADAPTER_PAGE_SHIFT); 2301 MLX5_SET64(cqc, cqc, dbr_addr, cq->wq_ctrl.db.dma); 2302 2303 err = mlx5_core_create_cq(mdev, mcq, in, inlen, out, sizeof(out)); 2304 2305 kvfree(in); 2306 2307 if (err) 2308 return err; 2309 2310 mlx5e_cq_arm(cq); 2311 2312 return 0; 2313 } 2314 2315 static void mlx5e_destroy_cq(struct mlx5e_cq *cq) 2316 { 2317 mlx5_core_destroy_cq(cq->mdev, &cq->mcq); 2318 } 2319 2320 int mlx5e_open_cq(struct mlx5_core_dev *mdev, struct dim_cq_moder moder, 2321 struct mlx5e_cq_param *param, struct mlx5e_create_cq_param *ccp, 2322 struct mlx5e_cq *cq) 2323 { 2324 int err; 2325 2326 err = mlx5e_alloc_cq(mdev, param, ccp, cq); 2327 if (err) 2328 return err; 2329 2330 err = mlx5e_create_cq(cq, param); 2331 if (err) 2332 goto err_free_cq; 2333 2334 if (MLX5_CAP_GEN(mdev, cq_moderation) && 2335 MLX5_CAP_GEN(mdev, cq_period_mode_modify)) 2336 mlx5e_modify_cq_moderation(mdev, &cq->mcq, moder.usec, moder.pkts, 2337 mlx5e_cq_period_mode(moder.cq_period_mode)); 2338 return 0; 2339 2340 err_free_cq: 2341 mlx5e_free_cq(cq); 2342 2343 return err; 2344 } 2345 2346 void mlx5e_close_cq(struct mlx5e_cq *cq) 2347 { 2348 mlx5e_destroy_cq(cq); 2349 mlx5e_free_cq(cq); 2350 } 2351 2352 int mlx5e_modify_cq_period_mode(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, 2353 u8 cq_period_mode) 2354 { 2355 u32 in[MLX5_ST_SZ_DW(modify_cq_in)] = {}; 2356 void *cqc; 2357 2358 MLX5_SET(modify_cq_in, in, cqn, cq->cqn); 2359 cqc = MLX5_ADDR_OF(modify_cq_in, in, cq_context); 2360 MLX5_SET(cqc, cqc, cq_period_mode, mlx5e_cq_period_mode(cq_period_mode)); 2361 MLX5_SET(modify_cq_in, in, 2362 modify_field_select_resize_field_select.modify_field_select.modify_field_select, 2363 MLX5_CQ_MODIFY_PERIOD_MODE); 2364 2365 return mlx5_core_modify_cq(dev, cq, in, sizeof(in)); 2366 } 2367 2368 int mlx5e_modify_cq_moderation(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, 2369 u16 cq_period, u16 cq_max_count, u8 cq_period_mode) 2370 { 2371 u32 in[MLX5_ST_SZ_DW(modify_cq_in)] = {}; 2372 void *cqc; 2373 2374 MLX5_SET(modify_cq_in, in, cqn, cq->cqn); 2375 cqc = MLX5_ADDR_OF(modify_cq_in, in, cq_context); 2376 MLX5_SET(cqc, cqc, cq_period, cq_period); 2377 MLX5_SET(cqc, cqc, cq_max_count, cq_max_count); 2378 MLX5_SET(cqc, cqc, cq_period_mode, cq_period_mode); 2379 MLX5_SET(modify_cq_in, in, 2380 modify_field_select_resize_field_select.modify_field_select.modify_field_select, 2381 MLX5_CQ_MODIFY_PERIOD | MLX5_CQ_MODIFY_COUNT | MLX5_CQ_MODIFY_PERIOD_MODE); 2382 2383 return mlx5_core_modify_cq(dev, cq, in, sizeof(in)); 2384 } 2385 2386 static int mlx5e_open_tx_cqs(struct mlx5e_channel *c, 2387 struct mlx5e_params *params, 2388 struct mlx5e_create_cq_param *ccp, 2389 struct mlx5e_channel_param *cparam) 2390 { 2391 int err; 2392 int tc; 2393 2394 for (tc = 0; tc < c->num_tc; tc++) { 2395 err = mlx5e_open_cq(c->mdev, params->tx_cq_moderation, &cparam->txq_sq.cqp, 2396 ccp, &c->sq[tc].cq); 2397 if (err) 2398 goto err_close_tx_cqs; 2399 } 2400 2401 return 0; 2402 2403 err_close_tx_cqs: 2404 for (tc--; tc >= 0; tc--) 2405 mlx5e_close_cq(&c->sq[tc].cq); 2406 2407 return err; 2408 } 2409 2410 static void mlx5e_close_tx_cqs(struct mlx5e_channel *c) 2411 { 2412 int tc; 2413 2414 for (tc = 0; tc < c->num_tc; tc++) 2415 mlx5e_close_cq(&c->sq[tc].cq); 2416 } 2417 2418 static int mlx5e_mqprio_txq_to_tc(struct netdev_tc_txq *tc_to_txq, unsigned int txq) 2419 { 2420 int tc; 2421 2422 for (tc = 0; tc < TC_MAX_QUEUE; tc++) 2423 if (txq - tc_to_txq[tc].offset < tc_to_txq[tc].count) 2424 return tc; 2425 2426 WARN(1, "Unexpected TCs configuration. No match found for txq %u", txq); 2427 return -ENOENT; 2428 } 2429 2430 static int mlx5e_txq_get_qos_node_hw_id(struct mlx5e_params *params, int txq_ix, 2431 u32 *hw_id) 2432 { 2433 int tc; 2434 2435 if (params->mqprio.mode != TC_MQPRIO_MODE_CHANNEL) { 2436 *hw_id = 0; 2437 return 0; 2438 } 2439 2440 tc = mlx5e_mqprio_txq_to_tc(params->mqprio.tc_to_txq, txq_ix); 2441 if (tc < 0) 2442 return tc; 2443 2444 if (tc >= params->mqprio.num_tc) { 2445 WARN(1, "Unexpected TCs configuration. tc %d is out of range of %u", 2446 tc, params->mqprio.num_tc); 2447 return -EINVAL; 2448 } 2449 2450 *hw_id = params->mqprio.channel.hw_id[tc]; 2451 return 0; 2452 } 2453 2454 static int mlx5e_open_sqs(struct mlx5e_channel *c, 2455 struct mlx5e_params *params, 2456 struct mlx5e_channel_param *cparam) 2457 { 2458 int err, tc; 2459 2460 for (tc = 0; tc < mlx5e_get_dcb_num_tc(params); tc++) { 2461 int txq_ix = c->ix + tc * params->num_channels; 2462 u32 qos_queue_group_id; 2463 u32 tisn; 2464 2465 tisn = mlx5e_profile_get_tisn(c->mdev, c->priv, c->priv->profile, 2466 c->lag_port, tc); 2467 err = mlx5e_txq_get_qos_node_hw_id(params, txq_ix, &qos_queue_group_id); 2468 if (err) 2469 goto err_close_sqs; 2470 2471 err = mlx5e_open_txqsq(c, tisn, txq_ix, 2472 params, &cparam->txq_sq, &c->sq[tc], tc, 2473 qos_queue_group_id, 2474 &c->priv->channel_stats[c->ix]->sq[tc]); 2475 if (err) 2476 goto err_close_sqs; 2477 } 2478 2479 return 0; 2480 2481 err_close_sqs: 2482 for (tc--; tc >= 0; tc--) 2483 mlx5e_close_txqsq(&c->sq[tc]); 2484 2485 return err; 2486 } 2487 2488 static void mlx5e_close_sqs(struct mlx5e_channel *c) 2489 { 2490 int tc; 2491 2492 for (tc = 0; tc < c->num_tc; tc++) 2493 mlx5e_close_txqsq(&c->sq[tc]); 2494 } 2495 2496 static int mlx5e_set_sq_maxrate(struct net_device *dev, 2497 struct mlx5e_txqsq *sq, u32 rate) 2498 { 2499 struct mlx5e_priv *priv = netdev_priv(dev); 2500 struct mlx5_core_dev *mdev = priv->mdev; 2501 struct mlx5e_modify_sq_param msp = {0}; 2502 struct mlx5_rate_limit rl = {0}; 2503 u16 rl_index = 0; 2504 int err; 2505 2506 if (rate == sq->rate_limit) 2507 /* nothing to do */ 2508 return 0; 2509 2510 if (sq->rate_limit) { 2511 rl.rate = sq->rate_limit; 2512 /* remove current rl index to free space to next ones */ 2513 mlx5_rl_remove_rate(mdev, &rl); 2514 } 2515 2516 sq->rate_limit = 0; 2517 2518 if (rate) { 2519 rl.rate = rate; 2520 err = mlx5_rl_add_rate(mdev, &rl_index, &rl); 2521 if (err) { 2522 netdev_err(dev, "Failed configuring rate %u: %d\n", 2523 rate, err); 2524 return err; 2525 } 2526 } 2527 2528 msp.curr_state = MLX5_SQC_STATE_RDY; 2529 msp.next_state = MLX5_SQC_STATE_RDY; 2530 msp.rl_index = rl_index; 2531 msp.rl_update = true; 2532 err = mlx5e_modify_sq(mdev, sq->sqn, &msp); 2533 if (err) { 2534 netdev_err(dev, "Failed configuring rate %u: %d\n", 2535 rate, err); 2536 /* remove the rate from the table */ 2537 if (rate) 2538 mlx5_rl_remove_rate(mdev, &rl); 2539 return err; 2540 } 2541 2542 sq->rate_limit = rate; 2543 return 0; 2544 } 2545 2546 static int mlx5e_set_tx_maxrate(struct net_device *dev, int index, u32 rate) 2547 { 2548 struct mlx5e_priv *priv = netdev_priv(dev); 2549 struct mlx5_core_dev *mdev = priv->mdev; 2550 struct mlx5e_txqsq *sq = priv->txq2sq[index]; 2551 int err = 0; 2552 2553 if (!mlx5_rl_is_supported(mdev)) { 2554 netdev_err(dev, "Rate limiting is not supported on this device\n"); 2555 return -EINVAL; 2556 } 2557 2558 /* rate is given in Mb/sec, HW config is in Kb/sec */ 2559 rate = rate << 10; 2560 2561 /* Check whether rate in valid range, 0 is always valid */ 2562 if (rate && !mlx5_rl_is_in_range(mdev, rate)) { 2563 netdev_err(dev, "TX rate %u, is not in range\n", rate); 2564 return -ERANGE; 2565 } 2566 2567 mutex_lock(&priv->state_lock); 2568 if (test_bit(MLX5E_STATE_OPENED, &priv->state)) 2569 err = mlx5e_set_sq_maxrate(dev, sq, rate); 2570 if (!err) 2571 priv->tx_rates[index] = rate; 2572 mutex_unlock(&priv->state_lock); 2573 2574 return err; 2575 } 2576 2577 static int mlx5e_open_rxq_rq(struct mlx5e_channel *c, struct mlx5e_params *params, 2578 struct mlx5e_rq_param *rq_params) 2579 { 2580 u16 q_counter = c->priv->q_counter[c->sd_ix]; 2581 int err; 2582 2583 err = mlx5e_init_rxq_rq(c, params, rq_params->xdp_frag_size, &c->rq); 2584 if (err) 2585 return err; 2586 2587 return mlx5e_open_rq(params, rq_params, NULL, cpu_to_node(c->cpu), q_counter, &c->rq); 2588 } 2589 2590 static int mlx5e_open_queues(struct mlx5e_channel *c, 2591 struct mlx5e_params *params, 2592 struct mlx5e_channel_param *cparam) 2593 { 2594 const struct net_device_ops *netdev_ops = c->netdev->netdev_ops; 2595 struct dim_cq_moder icocq_moder = {0, 0}; 2596 struct mlx5e_create_cq_param ccp; 2597 int err; 2598 2599 mlx5e_build_create_cq_param(&ccp, c); 2600 2601 err = mlx5e_open_cq(c->mdev, icocq_moder, &cparam->async_icosq.cqp, &ccp, 2602 &c->async_icosq.cq); 2603 if (err) 2604 return err; 2605 2606 err = mlx5e_open_cq(c->mdev, icocq_moder, &cparam->icosq.cqp, &ccp, 2607 &c->icosq.cq); 2608 if (err) 2609 goto err_close_async_icosq_cq; 2610 2611 err = mlx5e_open_tx_cqs(c, params, &ccp, cparam); 2612 if (err) 2613 goto err_close_icosq_cq; 2614 2615 if (netdev_ops->ndo_xdp_xmit && c->xdp) { 2616 c->xdpsq = mlx5e_open_xdpredirect_sq(c, params, cparam, &ccp); 2617 if (IS_ERR(c->xdpsq)) { 2618 err = PTR_ERR(c->xdpsq); 2619 goto err_close_tx_cqs; 2620 } 2621 } 2622 2623 err = mlx5e_open_cq(c->mdev, params->rx_cq_moderation, &cparam->rq.cqp, &ccp, 2624 &c->rq.cq); 2625 if (err) 2626 goto err_close_xdpredirect_sq; 2627 2628 err = c->xdp ? mlx5e_open_cq(c->mdev, params->tx_cq_moderation, &cparam->xdp_sq.cqp, 2629 &ccp, &c->rq_xdpsq.cq) : 0; 2630 if (err) 2631 goto err_close_rx_cq; 2632 2633 spin_lock_init(&c->async_icosq_lock); 2634 2635 err = mlx5e_open_icosq(c, params, &cparam->async_icosq, &c->async_icosq, 2636 mlx5e_async_icosq_err_cqe_work); 2637 if (err) 2638 goto err_close_rq_xdpsq_cq; 2639 2640 mutex_init(&c->icosq_recovery_lock); 2641 2642 err = mlx5e_open_icosq(c, params, &cparam->icosq, &c->icosq, 2643 mlx5e_icosq_err_cqe_work); 2644 if (err) 2645 goto err_close_async_icosq; 2646 2647 err = mlx5e_open_sqs(c, params, cparam); 2648 if (err) 2649 goto err_close_icosq; 2650 2651 err = mlx5e_open_rxq_rq(c, params, &cparam->rq); 2652 if (err) 2653 goto err_close_sqs; 2654 2655 if (c->xdp) { 2656 err = mlx5e_open_xdpsq(c, params, &cparam->xdp_sq, NULL, 2657 &c->rq_xdpsq, false); 2658 if (err) 2659 goto err_close_rq; 2660 } 2661 2662 return 0; 2663 2664 err_close_rq: 2665 mlx5e_close_rq(&c->rq); 2666 2667 err_close_sqs: 2668 mlx5e_close_sqs(c); 2669 2670 err_close_icosq: 2671 mlx5e_close_icosq(&c->icosq); 2672 2673 err_close_async_icosq: 2674 mlx5e_close_icosq(&c->async_icosq); 2675 2676 err_close_rq_xdpsq_cq: 2677 if (c->xdp) 2678 mlx5e_close_cq(&c->rq_xdpsq.cq); 2679 2680 err_close_rx_cq: 2681 mlx5e_close_cq(&c->rq.cq); 2682 2683 err_close_xdpredirect_sq: 2684 if (c->xdpsq) 2685 mlx5e_close_xdpredirect_sq(c->xdpsq); 2686 2687 err_close_tx_cqs: 2688 mlx5e_close_tx_cqs(c); 2689 2690 err_close_icosq_cq: 2691 mlx5e_close_cq(&c->icosq.cq); 2692 2693 err_close_async_icosq_cq: 2694 mlx5e_close_cq(&c->async_icosq.cq); 2695 2696 return err; 2697 } 2698 2699 static void mlx5e_close_queues(struct mlx5e_channel *c) 2700 { 2701 if (c->xdp) 2702 mlx5e_close_xdpsq(&c->rq_xdpsq); 2703 /* The same ICOSQ is used for UMRs for both RQ and XSKRQ. */ 2704 cancel_work_sync(&c->icosq.recover_work); 2705 mlx5e_close_rq(&c->rq); 2706 mlx5e_close_sqs(c); 2707 mlx5e_close_icosq(&c->icosq); 2708 mutex_destroy(&c->icosq_recovery_lock); 2709 mlx5e_close_icosq(&c->async_icosq); 2710 if (c->xdp) 2711 mlx5e_close_cq(&c->rq_xdpsq.cq); 2712 mlx5e_close_cq(&c->rq.cq); 2713 if (c->xdpsq) 2714 mlx5e_close_xdpredirect_sq(c->xdpsq); 2715 mlx5e_close_tx_cqs(c); 2716 mlx5e_close_cq(&c->icosq.cq); 2717 mlx5e_close_cq(&c->async_icosq.cq); 2718 } 2719 2720 static u8 mlx5e_enumerate_lag_port(struct mlx5_core_dev *mdev, int ix) 2721 { 2722 u16 port_aff_bias = mlx5_core_is_pf(mdev) ? 0 : MLX5_CAP_GEN(mdev, vhca_id); 2723 2724 return (ix + port_aff_bias) % mlx5e_get_num_lag_ports(mdev); 2725 } 2726 2727 static int mlx5e_channel_stats_alloc(struct mlx5e_priv *priv, int ix, int cpu) 2728 { 2729 if (ix > priv->stats_nch) { 2730 netdev_warn(priv->netdev, "Unexpected channel stats index %d > %d\n", ix, 2731 priv->stats_nch); 2732 return -EINVAL; 2733 } 2734 2735 if (priv->channel_stats[ix]) 2736 return 0; 2737 2738 /* Asymmetric dynamic memory allocation. 2739 * Freed in mlx5e_priv_arrays_free, not on channel closure. 2740 */ 2741 netdev_dbg(priv->netdev, "Creating channel stats %d\n", ix); 2742 priv->channel_stats[ix] = kvzalloc_node(sizeof(**priv->channel_stats), 2743 GFP_KERNEL, cpu_to_node(cpu)); 2744 if (!priv->channel_stats[ix]) 2745 return -ENOMEM; 2746 priv->stats_nch++; 2747 2748 return 0; 2749 } 2750 2751 void mlx5e_trigger_napi_icosq(struct mlx5e_channel *c) 2752 { 2753 spin_lock_bh(&c->async_icosq_lock); 2754 mlx5e_trigger_irq(&c->async_icosq); 2755 spin_unlock_bh(&c->async_icosq_lock); 2756 } 2757 2758 void mlx5e_trigger_napi_sched(struct napi_struct *napi) 2759 { 2760 local_bh_disable(); 2761 napi_schedule(napi); 2762 local_bh_enable(); 2763 } 2764 2765 static void mlx5e_channel_pick_doorbell(struct mlx5e_channel *c) 2766 { 2767 struct mlx5e_hw_objs *hw_objs = &c->mdev->mlx5e_res.hw_objs; 2768 2769 /* No dedicated Ethernet doorbells, use the global one. */ 2770 if (hw_objs->num_bfregs == 0) { 2771 c->bfreg = &c->mdev->priv.bfreg; 2772 return; 2773 } 2774 2775 /* Round-robin between doorbells. */ 2776 c->bfreg = hw_objs->bfregs + c->vec_ix % hw_objs->num_bfregs; 2777 } 2778 2779 static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, 2780 struct mlx5e_params *params, 2781 struct xsk_buff_pool *xsk_pool, 2782 struct mlx5e_channel **cp) 2783 { 2784 struct net_device *netdev = priv->netdev; 2785 struct mlx5e_channel_param *cparam; 2786 struct mlx5_core_dev *mdev; 2787 struct mlx5e_xsk_param xsk; 2788 struct mlx5e_channel *c; 2789 unsigned int irq; 2790 int vec_ix; 2791 int cpu; 2792 int err; 2793 2794 mdev = mlx5_sd_ch_ix_get_dev(priv->mdev, ix); 2795 vec_ix = mlx5_sd_ch_ix_get_vec_ix(mdev, ix); 2796 cpu = mlx5_comp_vector_get_cpu(mdev, vec_ix); 2797 2798 err = mlx5_comp_irqn_get(mdev, vec_ix, &irq); 2799 if (err) 2800 return err; 2801 2802 err = mlx5e_channel_stats_alloc(priv, ix, cpu); 2803 if (err) 2804 return err; 2805 2806 c = kvzalloc_node(sizeof(*c), GFP_KERNEL, cpu_to_node(cpu)); 2807 cparam = kvzalloc(sizeof(*cparam), GFP_KERNEL); 2808 if (!c || !cparam) { 2809 err = -ENOMEM; 2810 goto err_free; 2811 } 2812 2813 err = mlx5e_build_channel_param(mdev, params, cparam); 2814 if (err) 2815 goto err_free; 2816 2817 c->priv = priv; 2818 c->mdev = mdev; 2819 c->ix = ix; 2820 c->vec_ix = vec_ix; 2821 c->sd_ix = mlx5_sd_ch_ix_get_dev_ix(mdev, ix); 2822 c->cpu = cpu; 2823 c->pdev = mlx5_core_dma_dev(mdev); 2824 c->netdev = priv->netdev; 2825 c->mkey_be = cpu_to_be32(mdev->mlx5e_res.hw_objs.mkey); 2826 c->num_tc = mlx5e_get_dcb_num_tc(params); 2827 c->xdp = !!params->xdp_prog; 2828 c->stats = &priv->channel_stats[ix]->ch; 2829 c->aff_mask = irq_get_effective_affinity_mask(irq); 2830 c->lag_port = mlx5e_enumerate_lag_port(mdev, ix); 2831 2832 mlx5e_channel_pick_doorbell(c); 2833 2834 netif_napi_add_config_locked(netdev, &c->napi, mlx5e_napi_poll, ix); 2835 netif_napi_set_irq_locked(&c->napi, irq); 2836 2837 err = mlx5e_open_queues(c, params, cparam); 2838 if (unlikely(err)) 2839 goto err_napi_del; 2840 2841 if (xsk_pool) { 2842 mlx5e_build_xsk_param(xsk_pool, &xsk); 2843 err = mlx5e_open_xsk(priv, params, &xsk, xsk_pool, c); 2844 if (unlikely(err)) 2845 goto err_close_queues; 2846 } 2847 2848 *cp = c; 2849 2850 kvfree(cparam); 2851 return 0; 2852 2853 err_close_queues: 2854 mlx5e_close_queues(c); 2855 2856 err_napi_del: 2857 netif_napi_del_locked(&c->napi); 2858 2859 err_free: 2860 kvfree(cparam); 2861 kvfree(c); 2862 2863 return err; 2864 } 2865 2866 static void mlx5e_activate_channel(struct mlx5e_channel *c) 2867 { 2868 int tc; 2869 2870 napi_enable_locked(&c->napi); 2871 2872 for (tc = 0; tc < c->num_tc; tc++) 2873 mlx5e_activate_txqsq(&c->sq[tc]); 2874 mlx5e_activate_icosq(&c->icosq); 2875 mlx5e_activate_icosq(&c->async_icosq); 2876 2877 if (test_bit(MLX5E_CHANNEL_STATE_XSK, c->state)) 2878 mlx5e_activate_xsk(c); 2879 else 2880 mlx5e_activate_rq(&c->rq); 2881 2882 netif_queue_set_napi(c->netdev, c->ix, NETDEV_QUEUE_TYPE_RX, &c->napi); 2883 } 2884 2885 static void mlx5e_deactivate_channel(struct mlx5e_channel *c) 2886 { 2887 int tc; 2888 2889 netif_queue_set_napi(c->netdev, c->ix, NETDEV_QUEUE_TYPE_RX, NULL); 2890 2891 if (test_bit(MLX5E_CHANNEL_STATE_XSK, c->state)) 2892 mlx5e_deactivate_xsk(c); 2893 else 2894 mlx5e_deactivate_rq(&c->rq); 2895 2896 mlx5e_deactivate_icosq(&c->async_icosq); 2897 mlx5e_deactivate_icosq(&c->icosq); 2898 for (tc = 0; tc < c->num_tc; tc++) 2899 mlx5e_deactivate_txqsq(&c->sq[tc]); 2900 mlx5e_qos_deactivate_queues(c); 2901 2902 napi_disable_locked(&c->napi); 2903 } 2904 2905 static void mlx5e_close_channel(struct mlx5e_channel *c) 2906 { 2907 if (test_bit(MLX5E_CHANNEL_STATE_XSK, c->state)) 2908 mlx5e_close_xsk(c); 2909 mlx5e_close_queues(c); 2910 mlx5e_qos_close_queues(c); 2911 netif_napi_del_locked(&c->napi); 2912 2913 kvfree(c); 2914 } 2915 2916 int mlx5e_open_channels(struct mlx5e_priv *priv, 2917 struct mlx5e_channels *chs) 2918 { 2919 int err = -ENOMEM; 2920 int i; 2921 2922 chs->num = chs->params.num_channels; 2923 2924 chs->c = kcalloc(chs->num, sizeof(struct mlx5e_channel *), GFP_KERNEL); 2925 if (!chs->c) 2926 goto err_out; 2927 2928 for (i = 0; i < chs->num; i++) { 2929 struct xsk_buff_pool *xsk_pool = NULL; 2930 2931 if (chs->params.xdp_prog) 2932 xsk_pool = mlx5e_xsk_get_pool(&chs->params, chs->params.xsk, i); 2933 2934 err = mlx5e_open_channel(priv, i, &chs->params, xsk_pool, &chs->c[i]); 2935 if (err) 2936 goto err_close_channels; 2937 } 2938 2939 if (MLX5E_GET_PFLAG(&chs->params, MLX5E_PFLAG_TX_PORT_TS) || chs->params.ptp_rx) { 2940 err = mlx5e_ptp_open(priv, &chs->params, chs->c[0]->lag_port, &chs->ptp); 2941 if (err) 2942 goto err_close_channels; 2943 } 2944 2945 if (priv->htb) { 2946 err = mlx5e_qos_open_queues(priv, chs); 2947 if (err) 2948 goto err_close_ptp; 2949 } 2950 2951 mlx5e_health_channels_update(priv); 2952 return 0; 2953 2954 err_close_ptp: 2955 if (chs->ptp) 2956 mlx5e_ptp_close(chs->ptp); 2957 2958 err_close_channels: 2959 for (i--; i >= 0; i--) 2960 mlx5e_close_channel(chs->c[i]); 2961 2962 kfree(chs->c); 2963 err_out: 2964 chs->num = 0; 2965 return err; 2966 } 2967 2968 static void mlx5e_activate_channels(struct mlx5e_priv *priv, struct mlx5e_channels *chs) 2969 { 2970 int i; 2971 2972 for (i = 0; i < chs->num; i++) 2973 mlx5e_activate_channel(chs->c[i]); 2974 2975 if (priv->htb) 2976 mlx5e_qos_activate_queues(priv); 2977 2978 for (i = 0; i < chs->num; i++) 2979 mlx5e_trigger_napi_icosq(chs->c[i]); 2980 2981 if (chs->ptp) 2982 mlx5e_ptp_activate_channel(chs->ptp); 2983 } 2984 2985 static int mlx5e_wait_channels_min_rx_wqes(struct mlx5e_channels *chs) 2986 { 2987 int err = 0; 2988 int i; 2989 2990 for (i = 0; i < chs->num; i++) { 2991 int timeout = err ? 0 : MLX5E_RQ_WQES_TIMEOUT; 2992 struct mlx5e_channel *c = chs->c[i]; 2993 2994 if (test_bit(MLX5E_CHANNEL_STATE_XSK, c->state)) 2995 continue; 2996 2997 err |= mlx5e_wait_for_min_rx_wqes(&c->rq, timeout); 2998 2999 /* Don't wait on the XSK RQ, because the newer xdpsock sample 3000 * doesn't provide any Fill Ring entries at the setup stage. 3001 */ 3002 } 3003 3004 return err ? -ETIMEDOUT : 0; 3005 } 3006 3007 static void mlx5e_deactivate_channels(struct mlx5e_channels *chs) 3008 { 3009 int i; 3010 3011 if (chs->ptp) 3012 mlx5e_ptp_deactivate_channel(chs->ptp); 3013 3014 for (i = 0; i < chs->num; i++) 3015 mlx5e_deactivate_channel(chs->c[i]); 3016 } 3017 3018 void mlx5e_close_channels(struct mlx5e_channels *chs) 3019 { 3020 int i; 3021 3022 ASSERT_RTNL(); 3023 if (chs->ptp) { 3024 mlx5e_ptp_close(chs->ptp); 3025 chs->ptp = NULL; 3026 } 3027 for (i = 0; i < chs->num; i++) 3028 mlx5e_close_channel(chs->c[i]); 3029 3030 kfree(chs->c); 3031 chs->num = 0; 3032 } 3033 3034 static int mlx5e_modify_tirs_packet_merge(struct mlx5e_priv *priv) 3035 { 3036 struct mlx5e_rx_res *res = priv->rx_res; 3037 3038 return mlx5e_rx_res_packet_merge_set_param(res, &priv->channels.params.packet_merge); 3039 } 3040 3041 static MLX5E_DEFINE_PREACTIVATE_WRAPPER_CTX(mlx5e_modify_tirs_packet_merge); 3042 3043 static int mlx5e_set_mtu(struct mlx5_core_dev *mdev, 3044 struct mlx5e_params *params, u16 mtu) 3045 { 3046 u16 hw_mtu = MLX5E_SW2HW_MTU(params, mtu); 3047 int err; 3048 3049 err = mlx5_set_port_mtu(mdev, hw_mtu, 1); 3050 if (err) 3051 return err; 3052 3053 /* Update vport context MTU */ 3054 mlx5_modify_nic_vport_mtu(mdev, hw_mtu); 3055 return 0; 3056 } 3057 3058 static void mlx5e_query_mtu(struct mlx5_core_dev *mdev, 3059 struct mlx5e_params *params, u16 *mtu) 3060 { 3061 u16 hw_mtu = 0; 3062 int err; 3063 3064 err = mlx5_query_nic_vport_mtu(mdev, &hw_mtu); 3065 if (err || !hw_mtu) /* fallback to port oper mtu */ 3066 mlx5_query_port_oper_mtu(mdev, &hw_mtu, 1); 3067 3068 *mtu = MLX5E_HW2SW_MTU(params, hw_mtu); 3069 } 3070 3071 int mlx5e_set_dev_port_mtu(struct mlx5e_priv *priv) 3072 { 3073 struct mlx5e_params *params = &priv->channels.params; 3074 struct net_device *netdev = priv->netdev; 3075 struct mlx5_core_dev *mdev = priv->mdev; 3076 u16 mtu; 3077 int err; 3078 3079 err = mlx5e_set_mtu(mdev, params, params->sw_mtu); 3080 if (err) 3081 return err; 3082 3083 mlx5e_query_mtu(mdev, params, &mtu); 3084 if (mtu != params->sw_mtu) 3085 netdev_warn(netdev, "%s: VPort MTU %d is different than netdev mtu %d\n", 3086 __func__, mtu, params->sw_mtu); 3087 3088 params->sw_mtu = mtu; 3089 return 0; 3090 } 3091 3092 MLX5E_DEFINE_PREACTIVATE_WRAPPER_CTX(mlx5e_set_dev_port_mtu); 3093 3094 void mlx5e_set_netdev_mtu_boundaries(struct mlx5e_priv *priv) 3095 { 3096 struct mlx5e_params *params = &priv->channels.params; 3097 struct net_device *netdev = priv->netdev; 3098 struct mlx5_core_dev *mdev = priv->mdev; 3099 u16 max_mtu; 3100 3101 /* MTU range: 68 - hw-specific max */ 3102 netdev->min_mtu = ETH_MIN_MTU; 3103 3104 mlx5_query_port_max_mtu(mdev, &max_mtu, 1); 3105 netdev->max_mtu = min_t(unsigned int, MLX5E_HW2SW_MTU(params, max_mtu), 3106 ETH_MAX_MTU); 3107 } 3108 3109 static int mlx5e_netdev_set_tcs(struct net_device *netdev, u16 nch, u8 ntc, 3110 struct netdev_tc_txq *tc_to_txq) 3111 { 3112 int tc, err; 3113 3114 netdev_reset_tc(netdev); 3115 3116 if (ntc == 1) 3117 return 0; 3118 3119 err = netdev_set_num_tc(netdev, ntc); 3120 if (err) { 3121 netdev_WARN(netdev, "netdev_set_num_tc failed (%d), ntc = %d\n", err, ntc); 3122 return err; 3123 } 3124 3125 for (tc = 0; tc < ntc; tc++) { 3126 u16 count, offset; 3127 3128 count = tc_to_txq[tc].count; 3129 offset = tc_to_txq[tc].offset; 3130 netdev_set_tc_queue(netdev, tc, count, offset); 3131 } 3132 3133 return 0; 3134 } 3135 3136 int mlx5e_update_tx_netdev_queues(struct mlx5e_priv *priv) 3137 { 3138 int nch, ntc, num_txqs, err; 3139 int qos_queues = 0; 3140 3141 if (priv->htb) 3142 qos_queues = mlx5e_htb_cur_leaf_nodes(priv->htb); 3143 3144 nch = priv->channels.params.num_channels; 3145 ntc = mlx5e_get_dcb_num_tc(&priv->channels.params); 3146 num_txqs = nch * ntc + qos_queues; 3147 if (MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_TX_PORT_TS)) 3148 num_txqs += ntc; 3149 3150 netdev_dbg(priv->netdev, "Setting num_txqs %d\n", num_txqs); 3151 err = netif_set_real_num_tx_queues(priv->netdev, num_txqs); 3152 if (err) 3153 netdev_warn(priv->netdev, "netif_set_real_num_tx_queues failed, %d\n", err); 3154 3155 return err; 3156 } 3157 3158 static void mlx5e_set_default_xps_cpumasks(struct mlx5e_priv *priv, 3159 struct mlx5e_params *params) 3160 { 3161 int ix; 3162 3163 for (ix = 0; ix < params->num_channels; ix++) { 3164 int num_comp_vectors, irq, vec_ix; 3165 struct mlx5_core_dev *mdev; 3166 3167 mdev = mlx5_sd_ch_ix_get_dev(priv->mdev, ix); 3168 num_comp_vectors = mlx5_comp_vectors_max(mdev); 3169 cpumask_clear(priv->scratchpad.cpumask); 3170 vec_ix = mlx5_sd_ch_ix_get_vec_ix(mdev, ix); 3171 3172 for (irq = vec_ix; irq < num_comp_vectors; irq += params->num_channels) { 3173 int cpu = mlx5_comp_vector_get_cpu(mdev, irq); 3174 3175 cpumask_set_cpu(cpu, priv->scratchpad.cpumask); 3176 } 3177 3178 netif_set_xps_queue(priv->netdev, priv->scratchpad.cpumask, ix); 3179 } 3180 } 3181 3182 static int mlx5e_update_tc_and_tx_queues(struct mlx5e_priv *priv) 3183 { 3184 struct netdev_tc_txq old_tc_to_txq[TC_MAX_QUEUE], *tc_to_txq; 3185 struct net_device *netdev = priv->netdev; 3186 int old_num_txqs, old_ntc; 3187 int nch, ntc; 3188 int err; 3189 int i; 3190 3191 old_num_txqs = netdev->real_num_tx_queues; 3192 old_ntc = netdev->num_tc ? : 1; 3193 for (i = 0; i < ARRAY_SIZE(old_tc_to_txq); i++) 3194 old_tc_to_txq[i] = netdev->tc_to_txq[i]; 3195 3196 nch = priv->channels.params.num_channels; 3197 ntc = priv->channels.params.mqprio.num_tc; 3198 tc_to_txq = priv->channels.params.mqprio.tc_to_txq; 3199 3200 err = mlx5e_netdev_set_tcs(netdev, nch, ntc, tc_to_txq); 3201 if (err) 3202 goto err_out; 3203 err = mlx5e_update_tx_netdev_queues(priv); 3204 if (err) 3205 goto err_tcs; 3206 mlx5e_set_default_xps_cpumasks(priv, &priv->channels.params); 3207 3208 return 0; 3209 3210 err_tcs: 3211 WARN_ON_ONCE(mlx5e_netdev_set_tcs(netdev, old_num_txqs / old_ntc, old_ntc, 3212 old_tc_to_txq)); 3213 err_out: 3214 return err; 3215 } 3216 3217 MLX5E_DEFINE_PREACTIVATE_WRAPPER_CTX(mlx5e_update_tc_and_tx_queues); 3218 3219 static int mlx5e_num_channels_changed(struct mlx5e_priv *priv) 3220 { 3221 u16 count = priv->channels.params.num_channels; 3222 struct net_device *netdev = priv->netdev; 3223 int old_num_rxqs; 3224 int err; 3225 3226 old_num_rxqs = netdev->real_num_rx_queues; 3227 err = netif_set_real_num_rx_queues(netdev, count); 3228 if (err) { 3229 netdev_warn(netdev, "%s: netif_set_real_num_rx_queues failed, %d\n", 3230 __func__, err); 3231 return err; 3232 } 3233 err = mlx5e_update_tc_and_tx_queues(priv); 3234 if (err) { 3235 /* mlx5e_update_tc_and_tx_queues can fail if channels or TCs number increases. 3236 * Since channel number changed, it increased. That means, the call to 3237 * netif_set_real_num_rx_queues below should not fail, because it 3238 * decreases the number of RX queues. 3239 */ 3240 WARN_ON_ONCE(netif_set_real_num_rx_queues(netdev, old_num_rxqs)); 3241 return err; 3242 } 3243 3244 /* This function may be called on attach, before priv->rx_res is created. */ 3245 if (priv->rx_res) { 3246 mlx5e_rx_res_rss_update_num_channels(priv->rx_res, count); 3247 3248 if (!netif_is_rxfh_configured(priv->netdev)) 3249 mlx5e_rx_res_rss_set_indir_uniform(priv->rx_res, count); 3250 } 3251 3252 return 0; 3253 } 3254 3255 MLX5E_DEFINE_PREACTIVATE_WRAPPER_CTX(mlx5e_num_channels_changed); 3256 3257 static void mlx5e_build_txq_maps(struct mlx5e_priv *priv) 3258 { 3259 int i, ch, tc, num_tc; 3260 3261 ch = priv->channels.num; 3262 num_tc = mlx5e_get_dcb_num_tc(&priv->channels.params); 3263 3264 for (i = 0; i < ch; i++) { 3265 for (tc = 0; tc < num_tc; tc++) { 3266 struct mlx5e_channel *c = priv->channels.c[i]; 3267 struct mlx5e_txqsq *sq = &c->sq[tc]; 3268 3269 priv->txq2sq[sq->txq_ix] = sq; 3270 priv->txq2sq_stats[sq->txq_ix] = sq->stats; 3271 } 3272 } 3273 3274 if (!priv->channels.ptp) 3275 goto out; 3276 3277 if (!test_bit(MLX5E_PTP_STATE_TX, priv->channels.ptp->state)) 3278 goto out; 3279 3280 for (tc = 0; tc < num_tc; tc++) { 3281 struct mlx5e_ptp *c = priv->channels.ptp; 3282 struct mlx5e_txqsq *sq = &c->ptpsq[tc].txqsq; 3283 3284 priv->txq2sq[sq->txq_ix] = sq; 3285 priv->txq2sq_stats[sq->txq_ix] = sq->stats; 3286 } 3287 3288 out: 3289 /* Make the change to txq2sq visible before the queue is started. 3290 * As mlx5e_xmit runs under a spinlock, there is an implicit ACQUIRE, 3291 * which pairs with this barrier. 3292 */ 3293 smp_wmb(); 3294 } 3295 3296 void mlx5e_activate_priv_channels(struct mlx5e_priv *priv) 3297 { 3298 mlx5e_build_txq_maps(priv); 3299 mlx5e_activate_channels(priv, &priv->channels); 3300 mlx5e_xdp_tx_enable(priv); 3301 3302 /* dev_watchdog() wants all TX queues to be started when the carrier is 3303 * OK, including the ones in range real_num_tx_queues..num_tx_queues-1. 3304 * Make it happy to avoid TX timeout false alarms. 3305 */ 3306 netif_tx_start_all_queues(priv->netdev); 3307 3308 if (mlx5e_is_vport_rep(priv)) 3309 mlx5e_rep_activate_channels(priv); 3310 3311 set_bit(MLX5E_STATE_CHANNELS_ACTIVE, &priv->state); 3312 3313 mlx5e_wait_channels_min_rx_wqes(&priv->channels); 3314 3315 if (priv->rx_res) 3316 mlx5e_rx_res_channels_activate(priv->rx_res, &priv->channels); 3317 } 3318 3319 static void mlx5e_cancel_tx_timeout_work(struct mlx5e_priv *priv) 3320 { 3321 WARN_ON_ONCE(test_bit(MLX5E_STATE_CHANNELS_ACTIVE, &priv->state)); 3322 if (current_work() != &priv->tx_timeout_work) 3323 cancel_work_sync(&priv->tx_timeout_work); 3324 } 3325 3326 void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv) 3327 { 3328 if (priv->rx_res) 3329 mlx5e_rx_res_channels_deactivate(priv->rx_res); 3330 3331 clear_bit(MLX5E_STATE_CHANNELS_ACTIVE, &priv->state); 3332 mlx5e_cancel_tx_timeout_work(priv); 3333 3334 if (mlx5e_is_vport_rep(priv)) 3335 mlx5e_rep_deactivate_channels(priv); 3336 3337 /* The results of ndo_select_queue are unreliable, while netdev config 3338 * is being changed (real_num_tx_queues, num_tc). Stop all queues to 3339 * prevent ndo_start_xmit from being called, so that it can assume that 3340 * the selected queue is always valid. 3341 */ 3342 netif_tx_disable(priv->netdev); 3343 3344 mlx5e_xdp_tx_disable(priv); 3345 mlx5e_deactivate_channels(&priv->channels); 3346 } 3347 3348 static int mlx5e_switch_priv_params(struct mlx5e_priv *priv, 3349 struct mlx5e_params *new_params, 3350 mlx5e_fp_preactivate preactivate, 3351 void *context) 3352 { 3353 struct mlx5e_params old_params; 3354 3355 old_params = priv->channels.params; 3356 priv->channels.params = *new_params; 3357 3358 if (preactivate) { 3359 int err; 3360 3361 err = preactivate(priv, context); 3362 if (err) { 3363 priv->channels.params = old_params; 3364 return err; 3365 } 3366 } 3367 3368 mlx5e_set_xdp_feature(priv); 3369 return 0; 3370 } 3371 3372 static int mlx5e_switch_priv_channels(struct mlx5e_priv *priv, 3373 struct mlx5e_channels *old_chs, 3374 struct mlx5e_channels *new_chs, 3375 mlx5e_fp_preactivate preactivate, 3376 void *context) 3377 { 3378 struct net_device *netdev = priv->netdev; 3379 int carrier_ok; 3380 int err = 0; 3381 3382 carrier_ok = netif_carrier_ok(netdev); 3383 netif_carrier_off(netdev); 3384 3385 mlx5e_deactivate_priv_channels(priv); 3386 3387 priv->channels = *new_chs; 3388 3389 /* New channels are ready to roll, call the preactivate hook if needed 3390 * to modify HW settings or update kernel parameters. 3391 */ 3392 if (preactivate) { 3393 err = preactivate(priv, context); 3394 if (err) { 3395 priv->channels = *old_chs; 3396 goto out; 3397 } 3398 } 3399 3400 mlx5e_set_xdp_feature(priv); 3401 if (!MLX5_CAP_GEN(priv->mdev, tis_tir_td_order)) 3402 mlx5e_close_channels(old_chs); 3403 priv->profile->update_rx(priv); 3404 3405 mlx5e_selq_apply(&priv->selq); 3406 out: 3407 mlx5e_activate_priv_channels(priv); 3408 3409 /* return carrier back if needed */ 3410 if (carrier_ok) 3411 netif_carrier_on(netdev); 3412 3413 return err; 3414 } 3415 3416 int mlx5e_safe_switch_params(struct mlx5e_priv *priv, 3417 struct mlx5e_params *params, 3418 mlx5e_fp_preactivate preactivate, 3419 void *context, bool reset) 3420 { 3421 struct mlx5e_channels *old_chs, *new_chs; 3422 int err; 3423 3424 reset &= test_bit(MLX5E_STATE_OPENED, &priv->state); 3425 if (!reset) 3426 return mlx5e_switch_priv_params(priv, params, preactivate, context); 3427 3428 old_chs = kzalloc(sizeof(*old_chs), GFP_KERNEL); 3429 new_chs = kzalloc(sizeof(*new_chs), GFP_KERNEL); 3430 if (!old_chs || !new_chs) { 3431 err = -ENOMEM; 3432 goto err_free_chs; 3433 } 3434 3435 new_chs->params = *params; 3436 3437 mlx5e_selq_prepare_params(&priv->selq, &new_chs->params); 3438 3439 err = mlx5e_open_channels(priv, new_chs); 3440 if (err) 3441 goto err_cancel_selq; 3442 3443 *old_chs = priv->channels; 3444 3445 err = mlx5e_switch_priv_channels(priv, old_chs, new_chs, 3446 preactivate, context); 3447 if (err) 3448 goto err_close; 3449 3450 if (MLX5_CAP_GEN(priv->mdev, tis_tir_td_order)) 3451 mlx5e_close_channels(old_chs); 3452 3453 kfree(new_chs); 3454 kfree(old_chs); 3455 return 0; 3456 3457 err_close: 3458 mlx5e_close_channels(new_chs); 3459 3460 err_cancel_selq: 3461 mlx5e_selq_cancel(&priv->selq); 3462 err_free_chs: 3463 kfree(new_chs); 3464 kfree(old_chs); 3465 return err; 3466 } 3467 3468 int mlx5e_safe_reopen_channels(struct mlx5e_priv *priv) 3469 { 3470 return mlx5e_safe_switch_params(priv, &priv->channels.params, NULL, NULL, true); 3471 } 3472 3473 void mlx5e_timestamp_init(struct mlx5e_priv *priv) 3474 { 3475 priv->hwtstamp_config.tx_type = HWTSTAMP_TX_OFF; 3476 priv->hwtstamp_config.rx_filter = HWTSTAMP_FILTER_NONE; 3477 } 3478 3479 static void mlx5e_modify_admin_state(struct mlx5_core_dev *mdev, 3480 enum mlx5_port_status state) 3481 { 3482 struct mlx5_eswitch *esw = mdev->priv.eswitch; 3483 int vport_admin_state; 3484 3485 mlx5_set_port_admin_status(mdev, state); 3486 3487 if (mlx5_eswitch_mode(mdev) == MLX5_ESWITCH_OFFLOADS || 3488 !MLX5_CAP_GEN(mdev, uplink_follow)) 3489 return; 3490 3491 if (state == MLX5_PORT_UP) 3492 vport_admin_state = MLX5_VPORT_ADMIN_STATE_AUTO; 3493 else 3494 vport_admin_state = MLX5_VPORT_ADMIN_STATE_DOWN; 3495 3496 mlx5_eswitch_set_vport_state(esw, MLX5_VPORT_UPLINK, vport_admin_state); 3497 } 3498 3499 int mlx5e_open_locked(struct net_device *netdev) 3500 { 3501 struct mlx5e_priv *priv = netdev_priv(netdev); 3502 int err; 3503 3504 mlx5e_selq_prepare_params(&priv->selq, &priv->channels.params); 3505 3506 set_bit(MLX5E_STATE_OPENED, &priv->state); 3507 3508 err = mlx5e_open_channels(priv, &priv->channels); 3509 if (err) 3510 goto err_clear_state_opened_flag; 3511 3512 err = priv->profile->update_rx(priv); 3513 if (err) 3514 goto err_close_channels; 3515 3516 mlx5e_selq_apply(&priv->selq); 3517 mlx5e_activate_priv_channels(priv); 3518 mlx5e_apply_traps(priv, true); 3519 if (priv->profile->update_carrier) 3520 priv->profile->update_carrier(priv); 3521 3522 mlx5e_queue_update_stats(priv); 3523 return 0; 3524 3525 err_close_channels: 3526 mlx5e_close_channels(&priv->channels); 3527 err_clear_state_opened_flag: 3528 clear_bit(MLX5E_STATE_OPENED, &priv->state); 3529 mlx5e_selq_cancel(&priv->selq); 3530 return err; 3531 } 3532 3533 int mlx5e_open(struct net_device *netdev) 3534 { 3535 struct mlx5e_priv *priv = netdev_priv(netdev); 3536 int err; 3537 3538 mutex_lock(&priv->state_lock); 3539 err = mlx5e_open_locked(netdev); 3540 if (!err) 3541 mlx5e_modify_admin_state(priv->mdev, MLX5_PORT_UP); 3542 mutex_unlock(&priv->state_lock); 3543 3544 return err; 3545 } 3546 3547 int mlx5e_close_locked(struct net_device *netdev) 3548 { 3549 struct mlx5e_priv *priv = netdev_priv(netdev); 3550 3551 /* May already be CLOSED in case a previous configuration operation 3552 * (e.g RX/TX queue size change) that involves close&open failed. 3553 */ 3554 if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) 3555 return 0; 3556 3557 mlx5e_apply_traps(priv, false); 3558 clear_bit(MLX5E_STATE_OPENED, &priv->state); 3559 3560 netif_carrier_off(priv->netdev); 3561 mlx5e_deactivate_priv_channels(priv); 3562 mlx5e_close_channels(&priv->channels); 3563 3564 return 0; 3565 } 3566 3567 int mlx5e_close(struct net_device *netdev) 3568 { 3569 struct mlx5e_priv *priv = netdev_priv(netdev); 3570 int err; 3571 3572 if (!netif_device_present(netdev)) 3573 return -ENODEV; 3574 3575 mutex_lock(&priv->state_lock); 3576 mlx5e_modify_admin_state(priv->mdev, MLX5_PORT_DOWN); 3577 err = mlx5e_close_locked(netdev); 3578 mutex_unlock(&priv->state_lock); 3579 3580 return err; 3581 } 3582 3583 static void mlx5e_free_drop_rq(struct mlx5e_rq *rq) 3584 { 3585 mlx5_wq_destroy(&rq->wq_ctrl); 3586 } 3587 3588 static int mlx5e_alloc_drop_rq(struct mlx5_core_dev *mdev, 3589 struct mlx5e_rq *rq, 3590 struct mlx5e_rq_param *param) 3591 { 3592 void *rqc = param->rqc; 3593 void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq); 3594 int err; 3595 3596 param->wq.db_numa_node = param->wq.buf_numa_node; 3597 3598 err = mlx5_wq_cyc_create(mdev, ¶m->wq, rqc_wq, &rq->wqe.wq, 3599 &rq->wq_ctrl); 3600 if (err) 3601 return err; 3602 3603 /* Mark as unused given "Drop-RQ" packets never reach XDP */ 3604 xdp_rxq_info_unused(&rq->xdp_rxq); 3605 3606 rq->mdev = mdev; 3607 3608 return 0; 3609 } 3610 3611 static int mlx5e_alloc_drop_cq(struct mlx5e_priv *priv, 3612 struct mlx5e_cq *cq, 3613 struct mlx5e_cq_param *param) 3614 { 3615 struct mlx5_core_dev *mdev = priv->mdev; 3616 3617 param->wq.buf_numa_node = dev_to_node(mlx5_core_dma_dev(mdev)); 3618 param->wq.db_numa_node = dev_to_node(mlx5_core_dma_dev(mdev)); 3619 3620 return mlx5e_alloc_cq_common(priv->mdev, priv->netdev, priv->wq, 3621 mdev->priv.bfreg.up, param, cq); 3622 } 3623 3624 int mlx5e_open_drop_rq(struct mlx5e_priv *priv, 3625 struct mlx5e_rq *drop_rq) 3626 { 3627 struct mlx5_core_dev *mdev = priv->mdev; 3628 struct mlx5e_cq_param cq_param = {}; 3629 struct mlx5e_rq_param rq_param = {}; 3630 struct mlx5e_cq *cq = &drop_rq->cq; 3631 int err; 3632 3633 mlx5e_build_drop_rq_param(mdev, &rq_param); 3634 3635 err = mlx5e_alloc_drop_cq(priv, cq, &cq_param); 3636 if (err) 3637 return err; 3638 3639 err = mlx5e_create_cq(cq, &cq_param); 3640 if (err) 3641 goto err_free_cq; 3642 3643 err = mlx5e_alloc_drop_rq(mdev, drop_rq, &rq_param); 3644 if (err) 3645 goto err_destroy_cq; 3646 3647 err = mlx5e_create_rq(drop_rq, &rq_param, priv->drop_rq_q_counter); 3648 if (err) 3649 goto err_free_rq; 3650 3651 err = mlx5e_modify_rq_state(drop_rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY); 3652 if (err) 3653 mlx5_core_warn(priv->mdev, "modify_rq_state failed, rx_if_down_packets won't be counted %d\n", err); 3654 3655 return 0; 3656 3657 err_free_rq: 3658 mlx5e_free_drop_rq(drop_rq); 3659 3660 err_destroy_cq: 3661 mlx5e_destroy_cq(cq); 3662 3663 err_free_cq: 3664 mlx5e_free_cq(cq); 3665 3666 return err; 3667 } 3668 3669 void mlx5e_close_drop_rq(struct mlx5e_rq *drop_rq) 3670 { 3671 mlx5e_destroy_rq(drop_rq); 3672 mlx5e_free_drop_rq(drop_rq); 3673 mlx5e_destroy_cq(&drop_rq->cq); 3674 mlx5e_free_cq(&drop_rq->cq); 3675 } 3676 3677 static void mlx5e_cleanup_nic_tx(struct mlx5e_priv *priv) 3678 { 3679 if (priv->mqprio_rl) { 3680 mlx5e_mqprio_rl_cleanup(priv->mqprio_rl); 3681 mlx5e_mqprio_rl_free(priv->mqprio_rl); 3682 priv->mqprio_rl = NULL; 3683 } 3684 mlx5e_accel_cleanup_tx(priv); 3685 } 3686 3687 static int mlx5e_modify_channels_vsd(struct mlx5e_channels *chs, bool vsd) 3688 { 3689 int err; 3690 int i; 3691 3692 for (i = 0; i < chs->num; i++) { 3693 err = mlx5e_modify_rq_vsd(&chs->c[i]->rq, vsd); 3694 if (err) 3695 return err; 3696 } 3697 if (chs->ptp && test_bit(MLX5E_PTP_STATE_RX, chs->ptp->state)) 3698 return mlx5e_modify_rq_vsd(&chs->ptp->rq, vsd); 3699 3700 return 0; 3701 } 3702 3703 static void mlx5e_mqprio_build_default_tc_to_txq(struct netdev_tc_txq *tc_to_txq, 3704 int ntc, int nch) 3705 { 3706 int tc; 3707 3708 memset(tc_to_txq, 0, sizeof(*tc_to_txq) * TC_MAX_QUEUE); 3709 3710 /* Map netdev TCs to offset 0. 3711 * We have our own UP to TXQ mapping for DCB mode of QoS 3712 */ 3713 for (tc = 0; tc < ntc; tc++) { 3714 tc_to_txq[tc] = (struct netdev_tc_txq) { 3715 .count = nch, 3716 .offset = 0, 3717 }; 3718 } 3719 } 3720 3721 static void mlx5e_mqprio_build_tc_to_txq(struct netdev_tc_txq *tc_to_txq, 3722 struct tc_mqprio_qopt *qopt) 3723 { 3724 int tc; 3725 3726 for (tc = 0; tc < TC_MAX_QUEUE; tc++) { 3727 tc_to_txq[tc] = (struct netdev_tc_txq) { 3728 .count = qopt->count[tc], 3729 .offset = qopt->offset[tc], 3730 }; 3731 } 3732 } 3733 3734 static void mlx5e_params_mqprio_dcb_set(struct mlx5e_params *params, u8 num_tc) 3735 { 3736 params->mqprio.mode = TC_MQPRIO_MODE_DCB; 3737 params->mqprio.num_tc = num_tc; 3738 mlx5e_mqprio_build_default_tc_to_txq(params->mqprio.tc_to_txq, num_tc, 3739 params->num_channels); 3740 } 3741 3742 static void mlx5e_mqprio_rl_update_params(struct mlx5e_params *params, 3743 struct mlx5e_mqprio_rl *rl) 3744 { 3745 int tc; 3746 3747 for (tc = 0; tc < TC_MAX_QUEUE; tc++) { 3748 u32 hw_id = 0; 3749 3750 if (rl) 3751 mlx5e_mqprio_rl_get_node_hw_id(rl, tc, &hw_id); 3752 params->mqprio.channel.hw_id[tc] = hw_id; 3753 } 3754 } 3755 3756 static void mlx5e_params_mqprio_channel_set(struct mlx5e_params *params, 3757 struct tc_mqprio_qopt_offload *mqprio, 3758 struct mlx5e_mqprio_rl *rl) 3759 { 3760 int tc; 3761 3762 params->mqprio.mode = TC_MQPRIO_MODE_CHANNEL; 3763 params->mqprio.num_tc = mqprio->qopt.num_tc; 3764 3765 for (tc = 0; tc < TC_MAX_QUEUE; tc++) 3766 params->mqprio.channel.max_rate[tc] = mqprio->max_rate[tc]; 3767 3768 mlx5e_mqprio_rl_update_params(params, rl); 3769 mlx5e_mqprio_build_tc_to_txq(params->mqprio.tc_to_txq, &mqprio->qopt); 3770 } 3771 3772 static void mlx5e_params_mqprio_reset(struct mlx5e_params *params) 3773 { 3774 mlx5e_params_mqprio_dcb_set(params, 1); 3775 } 3776 3777 static int mlx5e_setup_tc_mqprio_dcb(struct mlx5e_priv *priv, 3778 struct tc_mqprio_qopt *mqprio) 3779 { 3780 struct mlx5e_params new_params; 3781 u8 tc = mqprio->num_tc; 3782 int err; 3783 3784 mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS; 3785 3786 if (tc && tc != MLX5_MAX_NUM_TC) 3787 return -EINVAL; 3788 3789 new_params = priv->channels.params; 3790 mlx5e_params_mqprio_dcb_set(&new_params, tc ? tc : 1); 3791 3792 err = mlx5e_safe_switch_params(priv, &new_params, 3793 mlx5e_update_tc_and_tx_queues_ctx, NULL, true); 3794 3795 if (!err && priv->mqprio_rl) { 3796 mlx5e_mqprio_rl_cleanup(priv->mqprio_rl); 3797 mlx5e_mqprio_rl_free(priv->mqprio_rl); 3798 priv->mqprio_rl = NULL; 3799 } 3800 3801 priv->max_opened_tc = max_t(u8, priv->max_opened_tc, 3802 mlx5e_get_dcb_num_tc(&priv->channels.params)); 3803 return err; 3804 } 3805 3806 static int mlx5e_mqprio_channel_validate(struct mlx5e_priv *priv, 3807 struct tc_mqprio_qopt_offload *mqprio) 3808 { 3809 struct net_device *netdev = priv->netdev; 3810 struct mlx5e_ptp *ptp_channel; 3811 int agg_count = 0; 3812 int i; 3813 3814 ptp_channel = priv->channels.ptp; 3815 if (ptp_channel && test_bit(MLX5E_PTP_STATE_TX, ptp_channel->state)) { 3816 netdev_err(netdev, 3817 "Cannot activate MQPRIO mode channel since it conflicts with TX port TS\n"); 3818 return -EINVAL; 3819 } 3820 3821 if (mqprio->qopt.offset[0] != 0 || mqprio->qopt.num_tc < 1 || 3822 mqprio->qopt.num_tc > MLX5E_MAX_NUM_MQPRIO_CH_TC) 3823 return -EINVAL; 3824 3825 for (i = 0; i < mqprio->qopt.num_tc; i++) { 3826 if (!mqprio->qopt.count[i]) { 3827 netdev_err(netdev, "Zero size for queue-group (%d) is not supported\n", i); 3828 return -EINVAL; 3829 } 3830 if (mqprio->min_rate[i]) { 3831 netdev_err(netdev, "Min tx rate is not supported\n"); 3832 return -EINVAL; 3833 } 3834 3835 if (mqprio->max_rate[i]) { 3836 int err; 3837 3838 err = mlx5e_qos_bytes_rate_check(priv->mdev, mqprio->max_rate[i]); 3839 if (err) 3840 return err; 3841 } 3842 3843 if (mqprio->qopt.offset[i] != agg_count) { 3844 netdev_err(netdev, "Discontinuous queues config is not supported\n"); 3845 return -EINVAL; 3846 } 3847 agg_count += mqprio->qopt.count[i]; 3848 } 3849 3850 if (priv->channels.params.num_channels != agg_count) { 3851 netdev_err(netdev, "Num of queues (%d) does not match available (%d)\n", 3852 agg_count, priv->channels.params.num_channels); 3853 return -EINVAL; 3854 } 3855 3856 return 0; 3857 } 3858 3859 static bool mlx5e_mqprio_rate_limit(u8 num_tc, u64 max_rate[]) 3860 { 3861 int tc; 3862 3863 for (tc = 0; tc < num_tc; tc++) 3864 if (max_rate[tc]) 3865 return true; 3866 return false; 3867 } 3868 3869 static struct mlx5e_mqprio_rl *mlx5e_mqprio_rl_create(struct mlx5_core_dev *mdev, 3870 u8 num_tc, u64 max_rate[]) 3871 { 3872 struct mlx5e_mqprio_rl *rl; 3873 int err; 3874 3875 if (!mlx5e_mqprio_rate_limit(num_tc, max_rate)) 3876 return NULL; 3877 3878 rl = mlx5e_mqprio_rl_alloc(); 3879 if (!rl) 3880 return ERR_PTR(-ENOMEM); 3881 3882 err = mlx5e_mqprio_rl_init(rl, mdev, num_tc, max_rate); 3883 if (err) { 3884 mlx5e_mqprio_rl_free(rl); 3885 return ERR_PTR(err); 3886 } 3887 3888 return rl; 3889 } 3890 3891 static int mlx5e_setup_tc_mqprio_channel(struct mlx5e_priv *priv, 3892 struct tc_mqprio_qopt_offload *mqprio) 3893 { 3894 struct mlx5e_params new_params; 3895 struct mlx5e_mqprio_rl *rl; 3896 int err; 3897 3898 err = mlx5e_mqprio_channel_validate(priv, mqprio); 3899 if (err) 3900 return err; 3901 3902 rl = mlx5e_mqprio_rl_create(priv->mdev, mqprio->qopt.num_tc, mqprio->max_rate); 3903 if (IS_ERR(rl)) 3904 return PTR_ERR(rl); 3905 3906 new_params = priv->channels.params; 3907 mlx5e_params_mqprio_channel_set(&new_params, mqprio, rl); 3908 3909 err = mlx5e_safe_switch_params(priv, &new_params, 3910 mlx5e_update_tc_and_tx_queues_ctx, NULL, true); 3911 if (err) { 3912 if (rl) { 3913 mlx5e_mqprio_rl_cleanup(rl); 3914 mlx5e_mqprio_rl_free(rl); 3915 } 3916 return err; 3917 } 3918 3919 if (priv->mqprio_rl) { 3920 mlx5e_mqprio_rl_cleanup(priv->mqprio_rl); 3921 mlx5e_mqprio_rl_free(priv->mqprio_rl); 3922 } 3923 priv->mqprio_rl = rl; 3924 3925 return 0; 3926 } 3927 3928 static int mlx5e_setup_tc_mqprio(struct mlx5e_priv *priv, 3929 struct tc_mqprio_qopt_offload *mqprio) 3930 { 3931 /* MQPRIO is another toplevel qdisc that can't be attached 3932 * simultaneously with the offloaded HTB. 3933 */ 3934 if (mlx5e_selq_is_htb_enabled(&priv->selq)) { 3935 NL_SET_ERR_MSG_MOD(mqprio->extack, 3936 "MQPRIO cannot be configured when HTB offload is enabled."); 3937 return -EOPNOTSUPP; 3938 } 3939 3940 switch (mqprio->mode) { 3941 case TC_MQPRIO_MODE_DCB: 3942 return mlx5e_setup_tc_mqprio_dcb(priv, &mqprio->qopt); 3943 case TC_MQPRIO_MODE_CHANNEL: 3944 return mlx5e_setup_tc_mqprio_channel(priv, mqprio); 3945 default: 3946 return -EOPNOTSUPP; 3947 } 3948 } 3949 3950 static LIST_HEAD(mlx5e_block_cb_list); 3951 3952 static int mlx5e_setup_tc(struct net_device *dev, enum tc_setup_type type, 3953 void *type_data) 3954 { 3955 struct mlx5e_priv *priv = netdev_priv(dev); 3956 bool tc_unbind = false; 3957 int err; 3958 3959 if (type == TC_SETUP_BLOCK && 3960 ((struct flow_block_offload *)type_data)->command == FLOW_BLOCK_UNBIND) 3961 tc_unbind = true; 3962 3963 if (!netif_device_present(dev) && !tc_unbind) 3964 return -ENODEV; 3965 3966 switch (type) { 3967 case TC_SETUP_BLOCK: { 3968 struct flow_block_offload *f = type_data; 3969 3970 f->unlocked_driver_cb = true; 3971 return flow_block_cb_setup_simple(type_data, 3972 &mlx5e_block_cb_list, 3973 mlx5e_setup_tc_block_cb, 3974 priv, priv, true); 3975 } 3976 case TC_SETUP_QDISC_MQPRIO: 3977 mutex_lock(&priv->state_lock); 3978 err = mlx5e_setup_tc_mqprio(priv, type_data); 3979 mutex_unlock(&priv->state_lock); 3980 return err; 3981 case TC_SETUP_QDISC_HTB: 3982 mutex_lock(&priv->state_lock); 3983 err = mlx5e_htb_setup_tc(priv, type_data); 3984 mutex_unlock(&priv->state_lock); 3985 return err; 3986 default: 3987 return -EOPNOTSUPP; 3988 } 3989 } 3990 3991 void mlx5e_fold_sw_stats64(struct mlx5e_priv *priv, struct rtnl_link_stats64 *s) 3992 { 3993 int i; 3994 3995 for (i = 0; i < priv->stats_nch; i++) { 3996 struct mlx5e_channel_stats *channel_stats = priv->channel_stats[i]; 3997 struct mlx5e_rq_stats *xskrq_stats = &channel_stats->xskrq; 3998 struct mlx5e_rq_stats *rq_stats = &channel_stats->rq; 3999 int j; 4000 4001 s->rx_packets += rq_stats->packets + xskrq_stats->packets; 4002 s->rx_bytes += rq_stats->bytes + xskrq_stats->bytes; 4003 s->multicast += rq_stats->mcast_packets + xskrq_stats->mcast_packets; 4004 4005 for (j = 0; j < priv->max_opened_tc; j++) { 4006 struct mlx5e_sq_stats *sq_stats = &channel_stats->sq[j]; 4007 4008 s->tx_packets += sq_stats->packets; 4009 s->tx_bytes += sq_stats->bytes; 4010 s->tx_dropped += sq_stats->dropped; 4011 } 4012 } 4013 if (priv->tx_ptp_opened) { 4014 for (i = 0; i < priv->max_opened_tc; i++) { 4015 struct mlx5e_sq_stats *sq_stats = &priv->ptp_stats.sq[i]; 4016 4017 s->tx_packets += sq_stats->packets; 4018 s->tx_bytes += sq_stats->bytes; 4019 s->tx_dropped += sq_stats->dropped; 4020 } 4021 } 4022 if (priv->rx_ptp_opened) { 4023 struct mlx5e_rq_stats *rq_stats = &priv->ptp_stats.rq; 4024 4025 s->rx_packets += rq_stats->packets; 4026 s->rx_bytes += rq_stats->bytes; 4027 s->multicast += rq_stats->mcast_packets; 4028 } 4029 4030 #ifdef CONFIG_MLX5_EN_PSP 4031 if (priv->psp) 4032 s->tx_dropped += atomic_read(&priv->psp->tx_drop); 4033 #endif 4034 } 4035 4036 void 4037 mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats) 4038 { 4039 struct mlx5e_priv *priv = netdev_priv(dev); 4040 struct mlx5e_pport_stats *pstats = &priv->stats.pport; 4041 4042 if (!netif_device_present(dev)) 4043 return; 4044 4045 /* In switchdev mode, monitor counters doesn't monitor 4046 * rx/tx stats of 802_3. The update stats mechanism 4047 * should keep the 802_3 layout counters updated 4048 */ 4049 if (!mlx5e_monitor_counter_supported(priv) || 4050 mlx5e_is_uplink_rep(priv)) { 4051 /* update HW stats in background for next time */ 4052 mlx5e_queue_update_stats(priv); 4053 } 4054 4055 netdev_stats_to_stats64(stats, &dev->stats); 4056 4057 if (mlx5e_is_uplink_rep(priv)) { 4058 struct mlx5e_vport_stats *vstats = &priv->stats.vport; 4059 4060 stats->rx_packets = PPORT_802_3_GET(pstats, a_frames_received_ok); 4061 stats->rx_bytes = PPORT_802_3_GET(pstats, a_octets_received_ok); 4062 stats->tx_packets = PPORT_802_3_GET(pstats, a_frames_transmitted_ok); 4063 stats->tx_bytes = PPORT_802_3_GET(pstats, a_octets_transmitted_ok); 4064 4065 /* vport multicast also counts packets that are dropped due to steering 4066 * or rx out of buffer 4067 */ 4068 stats->multicast = VPORT_COUNTER_GET(vstats, received_eth_multicast.packets); 4069 } else { 4070 mlx5e_fold_sw_stats64(priv, stats); 4071 } 4072 4073 stats->rx_missed_errors += priv->stats.qcnt.rx_out_of_buffer; 4074 stats->rx_dropped += PPORT_2863_GET(pstats, if_in_discards); 4075 4076 stats->rx_length_errors += 4077 PPORT_802_3_GET(pstats, a_in_range_length_errors) + 4078 PPORT_802_3_GET(pstats, a_out_of_range_length_field) + 4079 PPORT_802_3_GET(pstats, a_frame_too_long_errors) + 4080 VNIC_ENV_GET(&priv->stats.vnic, eth_wqe_too_small); 4081 stats->rx_crc_errors += 4082 PPORT_802_3_GET(pstats, a_frame_check_sequence_errors); 4083 stats->rx_frame_errors += PPORT_802_3_GET(pstats, a_alignment_errors); 4084 stats->tx_aborted_errors += PPORT_2863_GET(pstats, if_out_discards); 4085 stats->rx_errors += stats->rx_length_errors + stats->rx_crc_errors + 4086 stats->rx_frame_errors; 4087 stats->tx_errors += stats->tx_aborted_errors + stats->tx_carrier_errors; 4088 } 4089 4090 static void mlx5e_nic_set_rx_mode(struct mlx5e_priv *priv) 4091 { 4092 if (mlx5e_is_uplink_rep(priv)) 4093 return; /* no rx mode for uplink rep */ 4094 4095 queue_work(priv->wq, &priv->set_rx_mode_work); 4096 } 4097 4098 static void mlx5e_set_rx_mode(struct net_device *dev) 4099 { 4100 struct mlx5e_priv *priv = netdev_priv(dev); 4101 4102 mlx5e_nic_set_rx_mode(priv); 4103 } 4104 4105 static int mlx5e_set_mac(struct net_device *netdev, void *addr) 4106 { 4107 struct mlx5e_priv *priv = netdev_priv(netdev); 4108 struct sockaddr *saddr = addr; 4109 4110 if (!is_valid_ether_addr(saddr->sa_data)) 4111 return -EADDRNOTAVAIL; 4112 4113 netif_addr_lock_bh(netdev); 4114 eth_hw_addr_set(netdev, saddr->sa_data); 4115 netif_addr_unlock_bh(netdev); 4116 4117 mlx5e_nic_set_rx_mode(priv); 4118 4119 return 0; 4120 } 4121 4122 #define MLX5E_SET_FEATURE(features, feature, enable) \ 4123 do { \ 4124 if (enable) \ 4125 *features |= feature; \ 4126 else \ 4127 *features &= ~feature; \ 4128 } while (0) 4129 4130 typedef int (*mlx5e_feature_handler)(struct net_device *netdev, bool enable); 4131 4132 static int set_feature_lro(struct net_device *netdev, bool enable) 4133 { 4134 struct mlx5e_priv *priv = netdev_priv(netdev); 4135 struct mlx5_core_dev *mdev = priv->mdev; 4136 struct mlx5e_params *cur_params; 4137 struct mlx5e_params new_params; 4138 bool reset = true; 4139 int err = 0; 4140 4141 mutex_lock(&priv->state_lock); 4142 4143 cur_params = &priv->channels.params; 4144 new_params = *cur_params; 4145 4146 if (enable) 4147 new_params.packet_merge.type = MLX5E_PACKET_MERGE_LRO; 4148 else if (new_params.packet_merge.type == MLX5E_PACKET_MERGE_LRO) 4149 new_params.packet_merge.type = MLX5E_PACKET_MERGE_NONE; 4150 else 4151 goto out; 4152 4153 if (!(cur_params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO && 4154 new_params.packet_merge.type == MLX5E_PACKET_MERGE_LRO)) { 4155 if (cur_params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) { 4156 if (mlx5e_rx_mpwqe_is_linear_skb(mdev, cur_params, NULL) == 4157 mlx5e_rx_mpwqe_is_linear_skb(mdev, &new_params, NULL)) 4158 reset = false; 4159 } 4160 } 4161 4162 err = mlx5e_safe_switch_params(priv, &new_params, 4163 mlx5e_modify_tirs_packet_merge_ctx, NULL, reset); 4164 out: 4165 mutex_unlock(&priv->state_lock); 4166 return err; 4167 } 4168 4169 static int set_feature_hw_gro(struct net_device *netdev, bool enable) 4170 { 4171 struct mlx5e_priv *priv = netdev_priv(netdev); 4172 struct mlx5e_params new_params; 4173 bool reset = true; 4174 int err = 0; 4175 4176 mutex_lock(&priv->state_lock); 4177 new_params = priv->channels.params; 4178 4179 if (enable) { 4180 new_params.packet_merge.type = MLX5E_PACKET_MERGE_SHAMPO; 4181 } else if (new_params.packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO) { 4182 new_params.packet_merge.type = MLX5E_PACKET_MERGE_NONE; 4183 } else { 4184 goto out; 4185 } 4186 4187 err = mlx5e_safe_switch_params(priv, &new_params, NULL, NULL, reset); 4188 out: 4189 mutex_unlock(&priv->state_lock); 4190 return err; 4191 } 4192 4193 static int set_feature_cvlan_filter(struct net_device *netdev, bool enable) 4194 { 4195 struct mlx5e_priv *priv = netdev_priv(netdev); 4196 4197 if (enable) 4198 mlx5e_enable_cvlan_filter(priv->fs, 4199 !!(priv->netdev->flags & IFF_PROMISC)); 4200 else 4201 mlx5e_disable_cvlan_filter(priv->fs, 4202 !!(priv->netdev->flags & IFF_PROMISC)); 4203 4204 return 0; 4205 } 4206 4207 static int set_feature_hw_tc(struct net_device *netdev, bool enable) 4208 { 4209 struct mlx5e_priv *priv = netdev_priv(netdev); 4210 int err = 0; 4211 4212 #if IS_ENABLED(CONFIG_MLX5_CLS_ACT) 4213 int tc_flag = mlx5e_is_uplink_rep(priv) ? MLX5_TC_FLAG(ESW_OFFLOAD) : 4214 MLX5_TC_FLAG(NIC_OFFLOAD); 4215 if (!enable && mlx5e_tc_num_filters(priv, tc_flag)) { 4216 netdev_err(netdev, 4217 "Active offloaded tc filters, can't turn hw_tc_offload off\n"); 4218 return -EINVAL; 4219 } 4220 #endif 4221 4222 mutex_lock(&priv->state_lock); 4223 if (!enable && mlx5e_selq_is_htb_enabled(&priv->selq)) { 4224 netdev_err(netdev, "Active HTB offload, can't turn hw_tc_offload off\n"); 4225 err = -EINVAL; 4226 } 4227 mutex_unlock(&priv->state_lock); 4228 4229 return err; 4230 } 4231 4232 static int set_feature_rx_all(struct net_device *netdev, bool enable) 4233 { 4234 struct mlx5e_priv *priv = netdev_priv(netdev); 4235 struct mlx5_core_dev *mdev = priv->mdev; 4236 4237 return mlx5_set_port_fcs(mdev, !enable); 4238 } 4239 4240 static struct dim_cq_moder mlx5e_get_def_rx_moderation(u8 cq_period_mode) 4241 { 4242 return (struct dim_cq_moder) { 4243 .cq_period_mode = cq_period_mode, 4244 .pkts = MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS, 4245 .usec = cq_period_mode == DIM_CQ_PERIOD_MODE_START_FROM_CQE ? 4246 MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE : 4247 MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC, 4248 }; 4249 } 4250 4251 bool mlx5e_reset_rx_moderation(struct dim_cq_moder *cq_moder, u8 cq_period_mode, 4252 bool dim_enabled) 4253 { 4254 bool reset_needed = cq_moder->cq_period_mode != cq_period_mode; 4255 4256 if (dim_enabled) 4257 *cq_moder = net_dim_get_def_rx_moderation(cq_period_mode); 4258 else 4259 *cq_moder = mlx5e_get_def_rx_moderation(cq_period_mode); 4260 4261 return reset_needed; 4262 } 4263 4264 bool mlx5e_reset_rx_channels_moderation(struct mlx5e_channels *chs, u8 cq_period_mode, 4265 bool dim_enabled, bool keep_dim_state) 4266 { 4267 bool reset = false; 4268 int i; 4269 4270 for (i = 0; i < chs->num; i++) { 4271 if (keep_dim_state) 4272 dim_enabled = !!chs->c[i]->rq.dim; 4273 4274 reset |= mlx5e_reset_rx_moderation(&chs->c[i]->rx_cq_moder, 4275 cq_period_mode, dim_enabled); 4276 } 4277 4278 return reset; 4279 } 4280 4281 static int mlx5e_set_rx_port_ts(struct mlx5_core_dev *mdev, bool enable) 4282 { 4283 u32 in[MLX5_ST_SZ_DW(pcmr_reg)] = {}; 4284 bool supported, curr_state; 4285 int err; 4286 4287 if (!MLX5_CAP_GEN(mdev, ports_check)) 4288 return 0; 4289 4290 err = mlx5_query_ports_check(mdev, in, sizeof(in)); 4291 if (err) 4292 return err; 4293 4294 supported = MLX5_GET(pcmr_reg, in, rx_ts_over_crc_cap); 4295 curr_state = MLX5_GET(pcmr_reg, in, rx_ts_over_crc); 4296 4297 if (!supported || enable == curr_state) 4298 return 0; 4299 4300 MLX5_SET(pcmr_reg, in, local_port, 1); 4301 MLX5_SET(pcmr_reg, in, rx_ts_over_crc, enable); 4302 4303 return mlx5_set_ports_check(mdev, in, sizeof(in)); 4304 } 4305 4306 static int mlx5e_set_rx_port_ts_wrap(struct mlx5e_priv *priv, void *ctx) 4307 { 4308 struct mlx5_core_dev *mdev = priv->mdev; 4309 bool enable = *(bool *)ctx; 4310 4311 return mlx5e_set_rx_port_ts(mdev, enable); 4312 } 4313 4314 static int set_feature_rx_fcs(struct net_device *netdev, bool enable) 4315 { 4316 struct mlx5e_priv *priv = netdev_priv(netdev); 4317 struct mlx5e_channels *chs = &priv->channels; 4318 struct mlx5e_params new_params; 4319 int err; 4320 bool rx_ts_over_crc = !enable; 4321 4322 mutex_lock(&priv->state_lock); 4323 4324 new_params = chs->params; 4325 new_params.scatter_fcs_en = enable; 4326 err = mlx5e_safe_switch_params(priv, &new_params, mlx5e_set_rx_port_ts_wrap, 4327 &rx_ts_over_crc, true); 4328 mutex_unlock(&priv->state_lock); 4329 return err; 4330 } 4331 4332 static int set_feature_rx_vlan(struct net_device *netdev, bool enable) 4333 { 4334 struct mlx5e_priv *priv = netdev_priv(netdev); 4335 int err = 0; 4336 4337 mutex_lock(&priv->state_lock); 4338 4339 mlx5e_fs_set_vlan_strip_disable(priv->fs, !enable); 4340 priv->channels.params.vlan_strip_disable = !enable; 4341 4342 if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) 4343 goto unlock; 4344 4345 err = mlx5e_modify_channels_vsd(&priv->channels, !enable); 4346 if (err) { 4347 mlx5e_fs_set_vlan_strip_disable(priv->fs, enable); 4348 priv->channels.params.vlan_strip_disable = enable; 4349 } 4350 unlock: 4351 mutex_unlock(&priv->state_lock); 4352 4353 return err; 4354 } 4355 4356 int mlx5e_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid) 4357 { 4358 struct mlx5e_priv *priv = netdev_priv(dev); 4359 struct mlx5e_flow_steering *fs = priv->fs; 4360 4361 if (mlx5e_is_uplink_rep(priv)) 4362 return 0; /* no vlan table for uplink rep */ 4363 4364 return mlx5e_fs_vlan_rx_add_vid(fs, dev, proto, vid); 4365 } 4366 4367 int mlx5e_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid) 4368 { 4369 struct mlx5e_priv *priv = netdev_priv(dev); 4370 struct mlx5e_flow_steering *fs = priv->fs; 4371 4372 if (mlx5e_is_uplink_rep(priv)) 4373 return 0; /* no vlan table for uplink rep */ 4374 4375 return mlx5e_fs_vlan_rx_kill_vid(fs, dev, proto, vid); 4376 } 4377 4378 #ifdef CONFIG_MLX5_EN_ARFS 4379 static int set_feature_arfs(struct net_device *netdev, bool enable) 4380 { 4381 struct mlx5e_priv *priv = netdev_priv(netdev); 4382 int err; 4383 4384 if (enable) 4385 err = mlx5e_arfs_enable(priv->fs); 4386 else 4387 err = mlx5e_arfs_disable(priv->fs); 4388 4389 return err; 4390 } 4391 #endif 4392 4393 static int mlx5e_handle_feature(struct net_device *netdev, 4394 netdev_features_t *features, 4395 netdev_features_t feature, 4396 mlx5e_feature_handler feature_handler) 4397 { 4398 netdev_features_t changes = *features ^ netdev->features; 4399 bool enable = !!(*features & feature); 4400 int err; 4401 4402 if (!(changes & feature)) 4403 return 0; 4404 4405 err = feature_handler(netdev, enable); 4406 if (err) { 4407 MLX5E_SET_FEATURE(features, feature, !enable); 4408 netdev_err(netdev, "%s feature %pNF failed, err %d\n", 4409 enable ? "Enable" : "Disable", &feature, err); 4410 return err; 4411 } 4412 4413 return 0; 4414 } 4415 4416 void mlx5e_set_xdp_feature(struct mlx5e_priv *priv) 4417 { 4418 struct mlx5e_params *params = &priv->channels.params; 4419 struct net_device *netdev = priv->netdev; 4420 xdp_features_t val = 0; 4421 4422 if (netdev->netdev_ops->ndo_bpf && 4423 params->packet_merge.type == MLX5E_PACKET_MERGE_NONE) 4424 val = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | 4425 NETDEV_XDP_ACT_XSK_ZEROCOPY | 4426 NETDEV_XDP_ACT_RX_SG; 4427 4428 if (netdev->netdev_ops->ndo_xdp_xmit && params->xdp_prog) 4429 val |= NETDEV_XDP_ACT_NDO_XMIT | 4430 NETDEV_XDP_ACT_NDO_XMIT_SG; 4431 4432 xdp_set_features_flag_locked(netdev, val); 4433 } 4434 4435 int mlx5e_set_features(struct net_device *netdev, netdev_features_t features) 4436 { 4437 netdev_features_t oper_features = features; 4438 int err = 0; 4439 4440 #define MLX5E_HANDLE_FEATURE(feature, handler) \ 4441 mlx5e_handle_feature(netdev, &oper_features, feature, handler) 4442 4443 if (features & (NETIF_F_GRO_HW | NETIF_F_LRO)) { 4444 err |= MLX5E_HANDLE_FEATURE(NETIF_F_RXFCS, set_feature_rx_fcs); 4445 err |= MLX5E_HANDLE_FEATURE(NETIF_F_LRO, set_feature_lro); 4446 err |= MLX5E_HANDLE_FEATURE(NETIF_F_GRO_HW, set_feature_hw_gro); 4447 } else { 4448 err |= MLX5E_HANDLE_FEATURE(NETIF_F_LRO, set_feature_lro); 4449 err |= MLX5E_HANDLE_FEATURE(NETIF_F_GRO_HW, set_feature_hw_gro); 4450 err |= MLX5E_HANDLE_FEATURE(NETIF_F_RXFCS, set_feature_rx_fcs); 4451 } 4452 err |= MLX5E_HANDLE_FEATURE(NETIF_F_HW_VLAN_CTAG_FILTER, 4453 set_feature_cvlan_filter); 4454 err |= MLX5E_HANDLE_FEATURE(NETIF_F_HW_TC, set_feature_hw_tc); 4455 err |= MLX5E_HANDLE_FEATURE(NETIF_F_RXALL, set_feature_rx_all); 4456 err |= MLX5E_HANDLE_FEATURE(NETIF_F_HW_VLAN_CTAG_RX, set_feature_rx_vlan); 4457 #ifdef CONFIG_MLX5_EN_ARFS 4458 err |= MLX5E_HANDLE_FEATURE(NETIF_F_NTUPLE, set_feature_arfs); 4459 #endif 4460 err |= MLX5E_HANDLE_FEATURE(NETIF_F_HW_TLS_RX, mlx5e_ktls_set_feature_rx); 4461 4462 if (err) { 4463 netdev->features = oper_features; 4464 return -EINVAL; 4465 } 4466 4467 return 0; 4468 } 4469 4470 static netdev_features_t mlx5e_fix_uplink_rep_features(struct net_device *netdev, 4471 netdev_features_t features) 4472 { 4473 features &= ~NETIF_F_HW_TLS_RX; 4474 if (netdev->features & NETIF_F_HW_TLS_RX) 4475 netdev_warn(netdev, "Disabling hw_tls_rx, not supported in switchdev mode\n"); 4476 4477 features &= ~NETIF_F_HW_TLS_TX; 4478 if (netdev->features & NETIF_F_HW_TLS_TX) 4479 netdev_warn(netdev, "Disabling hw_tls_tx, not supported in switchdev mode\n"); 4480 4481 features &= ~NETIF_F_NTUPLE; 4482 if (netdev->features & NETIF_F_NTUPLE) 4483 netdev_warn(netdev, "Disabling ntuple, not supported in switchdev mode\n"); 4484 4485 features &= ~NETIF_F_GRO_HW; 4486 if (netdev->features & NETIF_F_GRO_HW) 4487 netdev_warn(netdev, "Disabling HW_GRO, not supported in switchdev mode\n"); 4488 4489 features &= ~NETIF_F_HW_VLAN_CTAG_FILTER; 4490 if (netdev->features & NETIF_F_HW_VLAN_CTAG_FILTER) 4491 netdev_warn(netdev, "Disabling HW_VLAN CTAG FILTERING, not supported in switchdev mode\n"); 4492 4493 features &= ~NETIF_F_HW_MACSEC; 4494 if (netdev->features & NETIF_F_HW_MACSEC) 4495 netdev_warn(netdev, "Disabling HW MACsec offload, not supported in switchdev mode\n"); 4496 4497 return features; 4498 } 4499 4500 static netdev_features_t mlx5e_fix_features(struct net_device *netdev, 4501 netdev_features_t features) 4502 { 4503 struct netdev_config *cfg = netdev->cfg_pending; 4504 struct mlx5e_priv *priv = netdev_priv(netdev); 4505 struct mlx5e_vlan_table *vlan; 4506 struct mlx5e_params *params; 4507 4508 if (!netif_device_present(netdev)) 4509 return features; 4510 4511 vlan = mlx5e_fs_get_vlan(priv->fs); 4512 mutex_lock(&priv->state_lock); 4513 params = &priv->channels.params; 4514 if (!vlan || 4515 !bitmap_empty(mlx5e_vlan_get_active_svlans(vlan), VLAN_N_VID)) { 4516 /* HW strips the outer C-tag header, this is a problem 4517 * for S-tag traffic. 4518 */ 4519 features &= ~NETIF_F_HW_VLAN_CTAG_RX; 4520 if (!params->vlan_strip_disable) 4521 netdev_warn(netdev, "Dropping C-tag vlan stripping offload due to S-tag vlan\n"); 4522 } 4523 4524 if (!MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ)) { 4525 if (features & NETIF_F_LRO) { 4526 netdev_warn(netdev, "Disabling LRO, not supported in legacy RQ\n"); 4527 features &= ~NETIF_F_LRO; 4528 } 4529 if (features & NETIF_F_GRO_HW) { 4530 netdev_warn(netdev, "Disabling HW-GRO, not supported in legacy RQ\n"); 4531 features &= ~NETIF_F_GRO_HW; 4532 } 4533 } 4534 4535 if (params->xdp_prog) { 4536 if (features & NETIF_F_LRO) { 4537 netdev_warn(netdev, "LRO is incompatible with XDP\n"); 4538 features &= ~NETIF_F_LRO; 4539 } 4540 if (features & NETIF_F_GRO_HW) { 4541 netdev_warn(netdev, "HW GRO is incompatible with XDP\n"); 4542 features &= ~NETIF_F_GRO_HW; 4543 } 4544 } 4545 4546 if (priv->xsk.refcnt) { 4547 if (features & NETIF_F_LRO) { 4548 netdev_warn(netdev, "LRO is incompatible with AF_XDP (%u XSKs are active)\n", 4549 priv->xsk.refcnt); 4550 features &= ~NETIF_F_LRO; 4551 } 4552 if (features & NETIF_F_GRO_HW) { 4553 netdev_warn(netdev, "HW GRO is incompatible with AF_XDP (%u XSKs are active)\n", 4554 priv->xsk.refcnt); 4555 features &= ~NETIF_F_GRO_HW; 4556 } 4557 } 4558 4559 if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS)) { 4560 features &= ~NETIF_F_RXHASH; 4561 if (netdev->features & NETIF_F_RXHASH) 4562 netdev_warn(netdev, "Disabling rxhash, not supported when CQE compress is active\n"); 4563 4564 if (features & NETIF_F_GRO_HW) { 4565 netdev_warn(netdev, "Disabling HW-GRO, not supported when CQE compress is active\n"); 4566 features &= ~NETIF_F_GRO_HW; 4567 } 4568 } 4569 4570 /* The header-data split ring param requires HW GRO to stay enabled. */ 4571 if (cfg && cfg->hds_config == ETHTOOL_TCP_DATA_SPLIT_ENABLED && 4572 !(features & NETIF_F_GRO_HW)) { 4573 netdev_warn(netdev, "Keeping HW-GRO enabled, TCP header-data split depends on it\n"); 4574 features |= NETIF_F_GRO_HW; 4575 } 4576 4577 if (mlx5e_is_uplink_rep(priv)) { 4578 features = mlx5e_fix_uplink_rep_features(netdev, features); 4579 netdev->netns_immutable = true; 4580 } else { 4581 netdev->netns_immutable = false; 4582 } 4583 4584 mutex_unlock(&priv->state_lock); 4585 4586 return features; 4587 } 4588 4589 static bool mlx5e_xsk_validate_mtu(struct net_device *netdev, 4590 struct mlx5e_channels *chs, 4591 struct mlx5e_params *new_params, 4592 struct mlx5_core_dev *mdev) 4593 { 4594 u16 ix; 4595 4596 for (ix = 0; ix < chs->params.num_channels; ix++) { 4597 struct xsk_buff_pool *xsk_pool = 4598 mlx5e_xsk_get_pool(&chs->params, chs->params.xsk, ix); 4599 struct mlx5e_xsk_param xsk; 4600 int max_xdp_mtu; 4601 4602 if (!xsk_pool) 4603 continue; 4604 4605 mlx5e_build_xsk_param(xsk_pool, &xsk); 4606 max_xdp_mtu = mlx5e_xdp_max_mtu(new_params, &xsk); 4607 4608 /* Validate XSK params and XDP MTU in advance */ 4609 if (!mlx5e_validate_xsk_param(new_params, &xsk, mdev) || 4610 new_params->sw_mtu > max_xdp_mtu) { 4611 u32 hr = mlx5e_get_linear_rq_headroom(new_params, &xsk); 4612 int max_mtu_frame, max_mtu_page, max_mtu; 4613 4614 /* Two criteria must be met: 4615 * 1. HW MTU + all headrooms <= XSK frame size. 4616 * 2. Size of SKBs allocated on XDP_PASS <= PAGE_SIZE. 4617 */ 4618 max_mtu_frame = MLX5E_HW2SW_MTU(new_params, xsk.chunk_size - hr); 4619 max_mtu_page = MLX5E_HW2SW_MTU(new_params, SKB_MAX_HEAD(0)); 4620 max_mtu = min3(max_mtu_frame, max_mtu_page, max_xdp_mtu); 4621 4622 netdev_err(netdev, "MTU %d is too big for an XSK running on channel %u or its redirection XDP program. Try MTU <= %d\n", 4623 new_params->sw_mtu, ix, max_mtu); 4624 return false; 4625 } 4626 } 4627 4628 return true; 4629 } 4630 4631 static bool mlx5e_params_validate_xdp(struct net_device *netdev, 4632 struct mlx5_core_dev *mdev, 4633 struct mlx5e_params *params) 4634 { 4635 bool is_linear; 4636 4637 /* No XSK params: AF_XDP can't be enabled yet at the point of setting 4638 * the XDP program. 4639 */ 4640 is_linear = params->rq_wq_type == MLX5_WQ_TYPE_CYCLIC ? 4641 mlx5e_rx_is_linear_skb(mdev, params, NULL) : 4642 mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL); 4643 4644 if (!is_linear) { 4645 if (!params->xdp_prog->aux->xdp_has_frags) { 4646 netdev_warn(netdev, "MTU(%d) > %d, too big for an XDP program not aware of multi buffer\n", 4647 params->sw_mtu, 4648 mlx5e_xdp_max_mtu(params, NULL)); 4649 return false; 4650 } 4651 if (params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ && 4652 !mlx5e_verify_params_rx_mpwqe_strides(mdev, params, NULL)) { 4653 netdev_warn(netdev, "XDP is not allowed with striding RQ and MTU(%d) > %d\n", 4654 params->sw_mtu, 4655 mlx5e_xdp_max_mtu(params, NULL)); 4656 return false; 4657 } 4658 } 4659 4660 return true; 4661 } 4662 4663 int mlx5e_change_mtu(struct net_device *netdev, int new_mtu, 4664 mlx5e_fp_preactivate preactivate) 4665 { 4666 struct mlx5e_priv *priv = netdev_priv(netdev); 4667 struct mlx5e_params new_params; 4668 struct mlx5e_params *params; 4669 bool reset = true; 4670 int err = 0; 4671 4672 mutex_lock(&priv->state_lock); 4673 4674 params = &priv->channels.params; 4675 4676 new_params = *params; 4677 new_params.sw_mtu = new_mtu; 4678 err = mlx5e_validate_params(priv->mdev, &new_params); 4679 if (err) 4680 goto out; 4681 4682 if (new_params.xdp_prog && !mlx5e_params_validate_xdp(netdev, priv->mdev, 4683 &new_params)) { 4684 err = -EINVAL; 4685 goto out; 4686 } 4687 4688 if (priv->xsk.refcnt && 4689 !mlx5e_xsk_validate_mtu(netdev, &priv->channels, 4690 &new_params, priv->mdev)) { 4691 err = -EINVAL; 4692 goto out; 4693 } 4694 4695 if (params->packet_merge.type == MLX5E_PACKET_MERGE_LRO) 4696 reset = false; 4697 4698 if (params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ && 4699 params->packet_merge.type != MLX5E_PACKET_MERGE_SHAMPO) { 4700 bool is_linear_old = mlx5e_rx_mpwqe_is_linear_skb(priv->mdev, params, NULL); 4701 bool is_linear_new = mlx5e_rx_mpwqe_is_linear_skb(priv->mdev, 4702 &new_params, NULL); 4703 u8 sz_old = mlx5e_mpwqe_get_log_rq_size(priv->mdev, params, NULL); 4704 u8 sz_new = mlx5e_mpwqe_get_log_rq_size(priv->mdev, &new_params, NULL); 4705 4706 /* Always reset in linear mode - hw_mtu is used in data path. 4707 * Check that the mode was non-linear and didn't change. 4708 * If XSK is active, XSK RQs are linear. 4709 * Reset if the RQ size changed, even if it's non-linear. 4710 */ 4711 if (!is_linear_old && !is_linear_new && !priv->xsk.refcnt && 4712 sz_old == sz_new) 4713 reset = false; 4714 } 4715 4716 err = mlx5e_safe_switch_params(priv, &new_params, preactivate, NULL, reset); 4717 4718 out: 4719 WRITE_ONCE(netdev->mtu, params->sw_mtu); 4720 mutex_unlock(&priv->state_lock); 4721 4722 if (!err) 4723 netdev_update_features(netdev); 4724 4725 return err; 4726 } 4727 4728 static int mlx5e_change_nic_mtu(struct net_device *netdev, int new_mtu) 4729 { 4730 return mlx5e_change_mtu(netdev, new_mtu, mlx5e_set_dev_port_mtu_ctx); 4731 } 4732 4733 int mlx5e_ptp_rx_manage_fs_ctx(struct mlx5e_priv *priv, void *ctx) 4734 { 4735 bool set = *(bool *)ctx; 4736 4737 return mlx5e_ptp_rx_manage_fs(priv, set); 4738 } 4739 4740 static int mlx5e_hwstamp_config_no_ptp_rx(struct mlx5e_priv *priv, bool rx_filter) 4741 { 4742 bool rx_cqe_compress_def = priv->channels.params.rx_cqe_compress_def; 4743 int err; 4744 4745 if (!rx_filter) 4746 /* Reset CQE compression to Admin default */ 4747 return mlx5e_modify_rx_cqe_compression_locked(priv, rx_cqe_compress_def, false); 4748 4749 if (!MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_RX_CQE_COMPRESS)) 4750 return 0; 4751 4752 /* Disable CQE compression */ 4753 netdev_warn(priv->netdev, "Disabling RX cqe compression\n"); 4754 err = mlx5e_modify_rx_cqe_compression_locked(priv, false, true); 4755 if (err) 4756 netdev_err(priv->netdev, "Failed disabling cqe compression err=%d\n", err); 4757 4758 return err; 4759 } 4760 4761 static int mlx5e_hwstamp_config_ptp_rx(struct mlx5e_priv *priv, bool ptp_rx) 4762 { 4763 struct mlx5e_params new_params; 4764 4765 if (ptp_rx == priv->channels.params.ptp_rx) 4766 return 0; 4767 4768 new_params = priv->channels.params; 4769 new_params.ptp_rx = ptp_rx; 4770 return mlx5e_safe_switch_params(priv, &new_params, mlx5e_ptp_rx_manage_fs_ctx, 4771 &new_params.ptp_rx, true); 4772 } 4773 4774 int mlx5e_hwtstamp_set(struct mlx5e_priv *priv, 4775 struct kernel_hwtstamp_config *config, 4776 struct netlink_ext_ack *extack) 4777 { 4778 bool rx_cqe_compress_def; 4779 bool ptp_rx; 4780 int err; 4781 4782 if (!MLX5_CAP_GEN(priv->mdev, device_frequency_khz) || 4783 (mlx5_clock_get_ptp_index(priv->mdev) == -1)) { 4784 NL_SET_ERR_MSG_MOD(extack, 4785 "Timestamps are not supported on this device"); 4786 return -EOPNOTSUPP; 4787 } 4788 4789 /* TX HW timestamp */ 4790 switch (config->tx_type) { 4791 case HWTSTAMP_TX_OFF: 4792 case HWTSTAMP_TX_ON: 4793 break; 4794 default: 4795 return -ERANGE; 4796 } 4797 4798 mutex_lock(&priv->state_lock); 4799 rx_cqe_compress_def = priv->channels.params.rx_cqe_compress_def; 4800 4801 /* RX HW timestamp */ 4802 switch (config->rx_filter) { 4803 case HWTSTAMP_FILTER_NONE: 4804 ptp_rx = false; 4805 break; 4806 case HWTSTAMP_FILTER_ALL: 4807 case HWTSTAMP_FILTER_SOME: 4808 case HWTSTAMP_FILTER_PTP_V1_L4_EVENT: 4809 case HWTSTAMP_FILTER_PTP_V1_L4_SYNC: 4810 case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ: 4811 case HWTSTAMP_FILTER_PTP_V2_L4_EVENT: 4812 case HWTSTAMP_FILTER_PTP_V2_L4_SYNC: 4813 case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ: 4814 case HWTSTAMP_FILTER_PTP_V2_L2_EVENT: 4815 case HWTSTAMP_FILTER_PTP_V2_L2_SYNC: 4816 case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ: 4817 case HWTSTAMP_FILTER_PTP_V2_EVENT: 4818 case HWTSTAMP_FILTER_PTP_V2_SYNC: 4819 case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: 4820 case HWTSTAMP_FILTER_NTP_ALL: 4821 config->rx_filter = HWTSTAMP_FILTER_ALL; 4822 /* ptp_rx is set if both HW TS is set and CQE 4823 * compression is set 4824 */ 4825 ptp_rx = rx_cqe_compress_def; 4826 break; 4827 default: 4828 err = -ERANGE; 4829 goto err_unlock; 4830 } 4831 4832 if (!mlx5e_profile_feature_cap(priv->profile, PTP_RX)) 4833 err = mlx5e_hwstamp_config_no_ptp_rx(priv, 4834 config->rx_filter != HWTSTAMP_FILTER_NONE); 4835 else 4836 err = mlx5e_hwstamp_config_ptp_rx(priv, ptp_rx); 4837 if (err) 4838 goto err_unlock; 4839 4840 priv->hwtstamp_config = *config; 4841 mutex_unlock(&priv->state_lock); 4842 4843 /* might need to fix some features */ 4844 netdev_update_features(priv->netdev); 4845 4846 return 0; 4847 err_unlock: 4848 mutex_unlock(&priv->state_lock); 4849 return err; 4850 } 4851 4852 static int mlx5e_hwtstamp_set_ndo(struct net_device *netdev, 4853 struct kernel_hwtstamp_config *config, 4854 struct netlink_ext_ack *extack) 4855 { 4856 struct mlx5e_priv *priv = netdev_priv(netdev); 4857 4858 return mlx5e_hwtstamp_set(priv, config, extack); 4859 } 4860 4861 int mlx5e_hwtstamp_get(struct mlx5e_priv *priv, 4862 struct kernel_hwtstamp_config *config) 4863 { 4864 if (!MLX5_CAP_GEN(priv->mdev, device_frequency_khz)) 4865 return -EOPNOTSUPP; 4866 4867 *config = priv->hwtstamp_config; 4868 4869 return 0; 4870 } 4871 4872 static int mlx5e_hwtstamp_get_ndo(struct net_device *dev, 4873 struct kernel_hwtstamp_config *config) 4874 { 4875 struct mlx5e_priv *priv = netdev_priv(dev); 4876 4877 return mlx5e_hwtstamp_get(priv, config); 4878 } 4879 4880 #ifdef CONFIG_MLX5_ESWITCH 4881 int mlx5e_set_vf_mac(struct net_device *dev, int vf, u8 *mac) 4882 { 4883 struct mlx5e_priv *priv = netdev_priv(dev); 4884 struct mlx5_core_dev *mdev = priv->mdev; 4885 4886 return mlx5_eswitch_set_vport_mac(mdev->priv.eswitch, vf + 1, mac); 4887 } 4888 4889 static int mlx5e_set_vf_vlan(struct net_device *dev, int vf, u16 vlan, u8 qos, 4890 __be16 vlan_proto) 4891 { 4892 struct mlx5e_priv *priv = netdev_priv(dev); 4893 struct mlx5_core_dev *mdev = priv->mdev; 4894 4895 if (vlan_proto != htons(ETH_P_8021Q)) 4896 return -EPROTONOSUPPORT; 4897 4898 return mlx5_eswitch_set_vport_vlan(mdev->priv.eswitch, vf + 1, 4899 vlan, qos); 4900 } 4901 4902 static int mlx5e_set_vf_spoofchk(struct net_device *dev, int vf, bool setting) 4903 { 4904 struct mlx5e_priv *priv = netdev_priv(dev); 4905 struct mlx5_core_dev *mdev = priv->mdev; 4906 4907 return mlx5_eswitch_set_vport_spoofchk(mdev->priv.eswitch, vf + 1, setting); 4908 } 4909 4910 static int mlx5e_set_vf_trust(struct net_device *dev, int vf, bool setting) 4911 { 4912 struct mlx5e_priv *priv = netdev_priv(dev); 4913 struct mlx5_core_dev *mdev = priv->mdev; 4914 4915 return mlx5_eswitch_set_vport_trust(mdev->priv.eswitch, vf + 1, setting); 4916 } 4917 4918 int mlx5e_set_vf_rate(struct net_device *dev, int vf, int min_tx_rate, 4919 int max_tx_rate) 4920 { 4921 struct mlx5e_priv *priv = netdev_priv(dev); 4922 struct mlx5_core_dev *mdev = priv->mdev; 4923 4924 return mlx5_eswitch_set_vport_rate(mdev->priv.eswitch, vf + 1, 4925 max_tx_rate, min_tx_rate); 4926 } 4927 4928 static int mlx5_vport_link2ifla(u8 esw_link) 4929 { 4930 switch (esw_link) { 4931 case MLX5_VPORT_ADMIN_STATE_DOWN: 4932 return IFLA_VF_LINK_STATE_DISABLE; 4933 case MLX5_VPORT_ADMIN_STATE_UP: 4934 return IFLA_VF_LINK_STATE_ENABLE; 4935 } 4936 return IFLA_VF_LINK_STATE_AUTO; 4937 } 4938 4939 static int mlx5_ifla_link2vport(u8 ifla_link) 4940 { 4941 switch (ifla_link) { 4942 case IFLA_VF_LINK_STATE_DISABLE: 4943 return MLX5_VPORT_ADMIN_STATE_DOWN; 4944 case IFLA_VF_LINK_STATE_ENABLE: 4945 return MLX5_VPORT_ADMIN_STATE_UP; 4946 } 4947 return MLX5_VPORT_ADMIN_STATE_AUTO; 4948 } 4949 4950 static int mlx5e_set_vf_link_state(struct net_device *dev, int vf, 4951 int link_state) 4952 { 4953 struct mlx5e_priv *priv = netdev_priv(dev); 4954 struct mlx5_core_dev *mdev = priv->mdev; 4955 4956 if (mlx5e_is_uplink_rep(priv)) 4957 return -EOPNOTSUPP; 4958 4959 return mlx5_eswitch_set_vport_state(mdev->priv.eswitch, vf + 1, 4960 mlx5_ifla_link2vport(link_state)); 4961 } 4962 4963 int mlx5e_get_vf_config(struct net_device *dev, 4964 int vf, struct ifla_vf_info *ivi) 4965 { 4966 struct mlx5e_priv *priv = netdev_priv(dev); 4967 struct mlx5_core_dev *mdev = priv->mdev; 4968 int err; 4969 4970 if (!netif_device_present(dev)) 4971 return -EOPNOTSUPP; 4972 4973 err = mlx5_eswitch_get_vport_config(mdev->priv.eswitch, vf + 1, ivi); 4974 if (err) 4975 return err; 4976 ivi->linkstate = mlx5_vport_link2ifla(ivi->linkstate); 4977 return 0; 4978 } 4979 4980 int mlx5e_get_vf_stats(struct net_device *dev, 4981 int vf, struct ifla_vf_stats *vf_stats) 4982 { 4983 struct mlx5e_priv *priv = netdev_priv(dev); 4984 struct mlx5_core_dev *mdev = priv->mdev; 4985 4986 return mlx5_eswitch_get_vport_stats(mdev->priv.eswitch, vf + 1, 4987 vf_stats); 4988 } 4989 4990 static bool 4991 mlx5e_has_offload_stats(const struct net_device *dev, int attr_id) 4992 { 4993 struct mlx5e_priv *priv = netdev_priv(dev); 4994 4995 if (!netif_device_present(dev)) 4996 return false; 4997 4998 if (!mlx5e_is_uplink_rep(priv)) 4999 return false; 5000 5001 return mlx5e_rep_has_offload_stats(dev, attr_id); 5002 } 5003 5004 static int 5005 mlx5e_get_offload_stats(int attr_id, const struct net_device *dev, 5006 void *sp) 5007 { 5008 struct mlx5e_priv *priv = netdev_priv(dev); 5009 5010 if (!mlx5e_is_uplink_rep(priv)) 5011 return -EOPNOTSUPP; 5012 5013 return mlx5e_rep_get_offload_stats(attr_id, dev, sp); 5014 } 5015 #endif 5016 5017 static bool mlx5e_tunnel_proto_supported_tx(struct mlx5_core_dev *mdev, u8 proto_type) 5018 { 5019 switch (proto_type) { 5020 case IPPROTO_GRE: 5021 return MLX5_CAP_ETH(mdev, tunnel_stateless_gre); 5022 case IPPROTO_IPIP: 5023 case IPPROTO_IPV6: 5024 return (MLX5_CAP_ETH(mdev, tunnel_stateless_ip_over_ip) || 5025 MLX5_CAP_ETH(mdev, tunnel_stateless_ip_over_ip_tx)); 5026 default: 5027 return false; 5028 } 5029 } 5030 5031 static bool mlx5e_gre_tunnel_inner_proto_offload_supported(struct mlx5_core_dev *mdev, 5032 struct sk_buff *skb) 5033 { 5034 switch (skb->inner_protocol) { 5035 case htons(ETH_P_IP): 5036 case htons(ETH_P_IPV6): 5037 case htons(ETH_P_TEB): 5038 return true; 5039 case htons(ETH_P_MPLS_UC): 5040 case htons(ETH_P_MPLS_MC): 5041 return MLX5_CAP_ETH(mdev, tunnel_stateless_mpls_over_gre); 5042 } 5043 return false; 5044 } 5045 5046 static netdev_features_t mlx5e_tunnel_features_check(struct mlx5e_priv *priv, 5047 struct sk_buff *skb, 5048 netdev_features_t features) 5049 { 5050 unsigned int offset = 0; 5051 struct udphdr *udph; 5052 u8 proto; 5053 u16 port; 5054 5055 switch (vlan_get_protocol(skb)) { 5056 case htons(ETH_P_IP): 5057 proto = ip_hdr(skb)->protocol; 5058 break; 5059 case htons(ETH_P_IPV6): 5060 proto = ipv6_find_hdr(skb, &offset, -1, NULL, NULL); 5061 break; 5062 default: 5063 goto out; 5064 } 5065 5066 switch (proto) { 5067 case IPPROTO_GRE: 5068 if (mlx5e_gre_tunnel_inner_proto_offload_supported(priv->mdev, skb)) 5069 return features; 5070 break; 5071 case IPPROTO_IPIP: 5072 case IPPROTO_IPV6: 5073 if (mlx5e_tunnel_proto_supported_tx(priv->mdev, IPPROTO_IPIP)) 5074 return features; 5075 break; 5076 case IPPROTO_UDP: 5077 udph = udp_hdr(skb); 5078 port = be16_to_cpu(udph->dest); 5079 5080 /* Verify if UDP port is being offloaded by HW */ 5081 if (mlx5_vxlan_lookup_port(priv->mdev->vxlan, port)) 5082 return vxlan_features_check(skb, features); 5083 5084 #if IS_ENABLED(CONFIG_GENEVE) 5085 /* Support Geneve offload for default UDP port */ 5086 if (port == GENEVE_UDP_PORT && mlx5_geneve_tx_allowed(priv->mdev)) 5087 return features; 5088 #endif 5089 break; 5090 #ifdef CONFIG_MLX5_EN_IPSEC 5091 case IPPROTO_ESP: 5092 return mlx5e_ipsec_feature_check(skb, features); 5093 #endif 5094 } 5095 5096 out: 5097 /* Disable CSUM and GSO if skb cannot be offloaded by HW */ 5098 return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); 5099 } 5100 5101 netdev_features_t mlx5e_features_check(struct sk_buff *skb, 5102 struct net_device *netdev, 5103 netdev_features_t features) 5104 { 5105 struct mlx5e_priv *priv = netdev_priv(netdev); 5106 5107 features = vlan_features_check(skb, features); 5108 5109 /* Validate if the tunneled packet is being offloaded by HW */ 5110 if (skb->encapsulation && 5111 (features & NETIF_F_CSUM_MASK || features & NETIF_F_GSO_MASK)) 5112 return mlx5e_tunnel_features_check(priv, skb, features); 5113 5114 return features; 5115 } 5116 5117 static void mlx5e_tx_timeout_work(struct work_struct *work) 5118 { 5119 struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv, 5120 tx_timeout_work); 5121 struct net_device *netdev = priv->netdev; 5122 int i; 5123 5124 /* Recovering the TX queues implies re-enabling NAPI, which requires 5125 * the netdev instance lock. 5126 * However, channel closing flows have to wait for this work to finish 5127 * while holding the same lock. So either get the lock or find that 5128 * channels are being closed for other reason and this work is not 5129 * relevant anymore. 5130 */ 5131 while (!netdev_trylock(netdev)) { 5132 if (!test_bit(MLX5E_STATE_CHANNELS_ACTIVE, &priv->state)) 5133 return; 5134 msleep(20); 5135 } 5136 5137 for (i = 0; i < netdev->real_num_tx_queues; i++) { 5138 struct netdev_queue *dev_queue = 5139 netdev_get_tx_queue(netdev, i); 5140 struct mlx5e_txqsq *sq = priv->txq2sq[i]; 5141 5142 if (!netif_xmit_stopped(dev_queue)) 5143 continue; 5144 5145 if (mlx5e_reporter_tx_timeout(sq)) 5146 /* break if tried to reopened channels */ 5147 break; 5148 } 5149 5150 netdev_unlock(netdev); 5151 } 5152 5153 static void mlx5e_tx_timeout(struct net_device *dev, unsigned int txqueue) 5154 { 5155 struct mlx5e_priv *priv = netdev_priv(dev); 5156 5157 netdev_err(dev, "TX timeout detected\n"); 5158 queue_work(priv->wq, &priv->tx_timeout_work); 5159 } 5160 5161 static int mlx5e_xdp_allowed(struct net_device *netdev, struct mlx5_core_dev *mdev, 5162 struct mlx5e_params *params) 5163 { 5164 if (params->packet_merge.type != MLX5E_PACKET_MERGE_NONE) { 5165 netdev_warn(netdev, "can't set XDP while HW-GRO/LRO is on, disable them first\n"); 5166 return -EINVAL; 5167 } 5168 5169 if (!mlx5e_params_validate_xdp(netdev, mdev, params)) 5170 return -EINVAL; 5171 5172 return 0; 5173 } 5174 5175 static void mlx5e_rq_replace_xdp_prog(struct mlx5e_rq *rq, struct bpf_prog *prog) 5176 { 5177 struct bpf_prog *old_prog; 5178 5179 old_prog = rcu_replace_pointer(rq->xdp_prog, prog, 5180 lockdep_is_held(&rq->priv->state_lock)); 5181 if (old_prog) 5182 bpf_prog_put(old_prog); 5183 } 5184 5185 static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog) 5186 { 5187 struct mlx5e_priv *priv = netdev_priv(netdev); 5188 struct mlx5e_params new_params; 5189 struct bpf_prog *old_prog; 5190 int err = 0; 5191 bool reset; 5192 int i; 5193 5194 mutex_lock(&priv->state_lock); 5195 5196 new_params = priv->channels.params; 5197 new_params.xdp_prog = prog; 5198 5199 if (prog) { 5200 err = mlx5e_xdp_allowed(netdev, priv->mdev, &new_params); 5201 if (err) 5202 goto unlock; 5203 } 5204 5205 /* no need for full reset when exchanging programs */ 5206 reset = (!priv->channels.params.xdp_prog || !prog); 5207 5208 old_prog = priv->channels.params.xdp_prog; 5209 5210 err = mlx5e_safe_switch_params(priv, &new_params, NULL, NULL, reset); 5211 if (err) 5212 goto unlock; 5213 5214 if (old_prog) 5215 bpf_prog_put(old_prog); 5216 5217 if (!test_bit(MLX5E_STATE_OPENED, &priv->state) || reset) 5218 goto unlock; 5219 5220 /* exchanging programs w/o reset, we update ref counts on behalf 5221 * of the channels RQs here. 5222 */ 5223 bpf_prog_add(prog, priv->channels.num); 5224 for (i = 0; i < priv->channels.num; i++) { 5225 struct mlx5e_channel *c = priv->channels.c[i]; 5226 5227 mlx5e_rq_replace_xdp_prog(&c->rq, prog); 5228 if (test_bit(MLX5E_CHANNEL_STATE_XSK, c->state)) { 5229 bpf_prog_inc(prog); 5230 mlx5e_rq_replace_xdp_prog(&c->xskrq, prog); 5231 } 5232 } 5233 5234 unlock: 5235 mutex_unlock(&priv->state_lock); 5236 5237 /* Need to fix some features. */ 5238 if (!err) 5239 netdev_update_features(netdev); 5240 5241 return err; 5242 } 5243 5244 static int mlx5e_xdp(struct net_device *dev, struct netdev_bpf *xdp) 5245 { 5246 switch (xdp->command) { 5247 case XDP_SETUP_PROG: 5248 return mlx5e_xdp_set(dev, xdp->prog); 5249 case XDP_SETUP_XSK_POOL: 5250 return mlx5e_xsk_setup_pool(dev, xdp->xsk.pool, 5251 xdp->xsk.queue_id); 5252 default: 5253 return -EINVAL; 5254 } 5255 } 5256 5257 #ifdef CONFIG_MLX5_ESWITCH 5258 static int mlx5e_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, 5259 struct net_device *dev, u32 filter_mask, 5260 int nlflags) 5261 { 5262 struct mlx5e_priv *priv = netdev_priv(dev); 5263 struct mlx5_core_dev *mdev = priv->mdev; 5264 u8 mode, setting; 5265 5266 if (mlx5_eswitch_get_vepa(mdev->priv.eswitch, &setting)) 5267 return -EOPNOTSUPP; 5268 mode = setting ? BRIDGE_MODE_VEPA : BRIDGE_MODE_VEB; 5269 return ndo_dflt_bridge_getlink(skb, pid, seq, dev, 5270 mode, 5271 0, 0, nlflags, filter_mask, NULL); 5272 } 5273 5274 static int mlx5e_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh, 5275 u16 flags, struct netlink_ext_ack *extack) 5276 { 5277 struct mlx5e_priv *priv = netdev_priv(dev); 5278 struct mlx5_core_dev *mdev = priv->mdev; 5279 struct nlattr *attr, *br_spec; 5280 u16 mode = BRIDGE_MODE_UNDEF; 5281 u8 setting; 5282 int rem; 5283 5284 br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC); 5285 if (!br_spec) 5286 return -EINVAL; 5287 5288 nla_for_each_nested_type(attr, IFLA_BRIDGE_MODE, br_spec, rem) { 5289 mode = nla_get_u16(attr); 5290 if (mode > BRIDGE_MODE_VEPA) 5291 return -EINVAL; 5292 5293 break; 5294 } 5295 5296 if (mode == BRIDGE_MODE_UNDEF) 5297 return -EINVAL; 5298 5299 setting = (mode == BRIDGE_MODE_VEPA) ? 1 : 0; 5300 return mlx5_eswitch_set_vepa(mdev->priv.eswitch, setting); 5301 } 5302 #endif 5303 5304 const struct net_device_ops mlx5e_netdev_ops = { 5305 .ndo_open = mlx5e_open, 5306 .ndo_stop = mlx5e_close, 5307 .ndo_start_xmit = mlx5e_xmit, 5308 .ndo_setup_tc = mlx5e_setup_tc, 5309 .ndo_select_queue = mlx5e_select_queue, 5310 .ndo_get_stats64 = mlx5e_get_stats, 5311 .ndo_set_rx_mode = mlx5e_set_rx_mode, 5312 .ndo_set_mac_address = mlx5e_set_mac, 5313 .ndo_vlan_rx_add_vid = mlx5e_vlan_rx_add_vid, 5314 .ndo_vlan_rx_kill_vid = mlx5e_vlan_rx_kill_vid, 5315 .ndo_set_features = mlx5e_set_features, 5316 .ndo_fix_features = mlx5e_fix_features, 5317 .ndo_change_mtu = mlx5e_change_nic_mtu, 5318 .ndo_set_tx_maxrate = mlx5e_set_tx_maxrate, 5319 .ndo_features_check = mlx5e_features_check, 5320 .ndo_tx_timeout = mlx5e_tx_timeout, 5321 .ndo_bpf = mlx5e_xdp, 5322 .ndo_xdp_xmit = mlx5e_xdp_xmit, 5323 .ndo_xsk_wakeup = mlx5e_xsk_wakeup, 5324 .ndo_hwtstamp_get = mlx5e_hwtstamp_get_ndo, 5325 .ndo_hwtstamp_set = mlx5e_hwtstamp_set_ndo, 5326 #ifdef CONFIG_MLX5_EN_ARFS 5327 .ndo_rx_flow_steer = mlx5e_rx_flow_steer, 5328 #endif 5329 #ifdef CONFIG_MLX5_ESWITCH 5330 .ndo_bridge_setlink = mlx5e_bridge_setlink, 5331 .ndo_bridge_getlink = mlx5e_bridge_getlink, 5332 5333 /* SRIOV E-Switch NDOs */ 5334 .ndo_set_vf_mac = mlx5e_set_vf_mac, 5335 .ndo_set_vf_vlan = mlx5e_set_vf_vlan, 5336 .ndo_set_vf_spoofchk = mlx5e_set_vf_spoofchk, 5337 .ndo_set_vf_trust = mlx5e_set_vf_trust, 5338 .ndo_set_vf_rate = mlx5e_set_vf_rate, 5339 .ndo_get_vf_config = mlx5e_get_vf_config, 5340 .ndo_set_vf_link_state = mlx5e_set_vf_link_state, 5341 .ndo_get_vf_stats = mlx5e_get_vf_stats, 5342 .ndo_has_offload_stats = mlx5e_has_offload_stats, 5343 .ndo_get_offload_stats = mlx5e_get_offload_stats, 5344 #endif 5345 }; 5346 5347 void mlx5e_build_nic_params(struct mlx5e_priv *priv, struct mlx5e_xsk *xsk, u16 mtu) 5348 { 5349 struct mlx5e_params *params = &priv->channels.params; 5350 struct mlx5_core_dev *mdev = priv->mdev; 5351 5352 params->sw_mtu = mtu; 5353 params->hard_mtu = MLX5E_ETH_HARD_MTU; 5354 params->num_channels = min_t(unsigned int, MLX5E_MAX_NUM_CHANNELS / 2, 5355 priv->max_nch); 5356 mlx5e_params_mqprio_reset(params); 5357 5358 /* SQ */ 5359 params->log_sq_size = is_kdump_kernel() ? 5360 MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE : 5361 MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE; 5362 MLX5E_SET_PFLAG(params, MLX5E_PFLAG_SKB_TX_MPWQE, mlx5e_tx_mpwqe_supported(mdev)); 5363 5364 /* XDP SQ */ 5365 MLX5E_SET_PFLAG(params, MLX5E_PFLAG_XDP_TX_MPWQE, mlx5e_tx_mpwqe_supported(mdev)); 5366 5367 /* set CQE compression */ 5368 params->rx_cqe_compress_def = false; 5369 if (MLX5_CAP_GEN(mdev, cqe_compression) && 5370 MLX5_CAP_GEN(mdev, vport_group_manager)) 5371 params->rx_cqe_compress_def = slow_pci_heuristic(mdev); 5372 5373 MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS, params->rx_cqe_compress_def); 5374 MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_NO_CSUM_COMPLETE, false); 5375 5376 /* RQ */ 5377 mlx5e_build_rq_params(mdev, params); 5378 5379 params->terminate_lkey_be = mlx5_core_get_terminate_scatter_list_mkey(mdev); 5380 5381 params->packet_merge.timeout = mlx5e_choose_lro_timeout(mdev, MLX5E_DEFAULT_LRO_TIMEOUT); 5382 5383 /* CQ moderation params */ 5384 params->rx_dim_enabled = MLX5_CAP_GEN(mdev, cq_moderation) && 5385 MLX5_CAP_GEN(mdev, cq_period_mode_modify); 5386 params->tx_dim_enabled = MLX5_CAP_GEN(mdev, cq_moderation) && 5387 MLX5_CAP_GEN(mdev, cq_period_mode_modify); 5388 params->rx_moder_use_cqe_mode = !!MLX5_CAP_GEN(mdev, cq_period_start_from_cqe); 5389 params->tx_moder_use_cqe_mode = false; 5390 mlx5e_reset_rx_moderation(¶ms->rx_cq_moderation, params->rx_moder_use_cqe_mode, 5391 params->rx_dim_enabled); 5392 mlx5e_reset_tx_moderation(¶ms->tx_cq_moderation, params->tx_moder_use_cqe_mode, 5393 params->tx_dim_enabled); 5394 5395 /* TX inline */ 5396 mlx5_query_min_inline(mdev, ¶ms->tx_min_inline_mode); 5397 5398 /* AF_XDP */ 5399 params->xsk = xsk; 5400 5401 /* Do not update netdev->features directly in here 5402 * on mlx5e_attach_netdev() we will call mlx5e_update_features() 5403 * To update netdev->features please modify mlx5e_fix_features() 5404 */ 5405 } 5406 5407 static void mlx5e_set_netdev_dev_addr(struct net_device *netdev) 5408 { 5409 struct mlx5e_priv *priv = netdev_priv(netdev); 5410 u8 addr[ETH_ALEN]; 5411 5412 mlx5_query_mac_address(priv->mdev, addr); 5413 if (is_zero_ether_addr(addr) && 5414 !MLX5_CAP_GEN(priv->mdev, vport_group_manager)) { 5415 eth_hw_addr_random(netdev); 5416 mlx5_core_info(priv->mdev, "Assigned random MAC address %pM\n", netdev->dev_addr); 5417 return; 5418 } 5419 5420 eth_hw_addr_set(netdev, addr); 5421 } 5422 5423 static int mlx5e_vxlan_set_port(struct net_device *netdev, unsigned int table, 5424 unsigned int entry, struct udp_tunnel_info *ti) 5425 { 5426 struct mlx5e_priv *priv = netdev_priv(netdev); 5427 5428 return mlx5_vxlan_add_port(priv->mdev->vxlan, ntohs(ti->port)); 5429 } 5430 5431 static int mlx5e_vxlan_unset_port(struct net_device *netdev, unsigned int table, 5432 unsigned int entry, struct udp_tunnel_info *ti) 5433 { 5434 struct mlx5e_priv *priv = netdev_priv(netdev); 5435 5436 return mlx5_vxlan_del_port(priv->mdev->vxlan, ntohs(ti->port)); 5437 } 5438 5439 void mlx5e_vxlan_set_netdev_info(struct mlx5e_priv *priv) 5440 { 5441 if (!mlx5_vxlan_allowed(priv->mdev->vxlan)) 5442 return; 5443 5444 priv->nic_info.set_port = mlx5e_vxlan_set_port; 5445 priv->nic_info.unset_port = mlx5e_vxlan_unset_port; 5446 priv->nic_info.flags = UDP_TUNNEL_NIC_INFO_STATIC_IANA_VXLAN; 5447 priv->nic_info.tables[0].tunnel_types = UDP_TUNNEL_TYPE_VXLAN; 5448 /* Don't count the space hard-coded to the IANA port */ 5449 priv->nic_info.tables[0].n_entries = 5450 mlx5_vxlan_max_udp_ports(priv->mdev) - 1; 5451 5452 priv->netdev->udp_tunnel_nic_info = &priv->nic_info; 5453 } 5454 5455 static bool mlx5e_tunnel_any_tx_proto_supported(struct mlx5_core_dev *mdev) 5456 { 5457 int tt; 5458 5459 for (tt = 0; tt < MLX5_NUM_TUNNEL_TT; tt++) { 5460 if (mlx5e_tunnel_proto_supported_tx(mdev, mlx5_get_proto_by_tunnel_type(tt))) 5461 return true; 5462 } 5463 return (mlx5_vxlan_allowed(mdev->vxlan) || mlx5_geneve_tx_allowed(mdev)); 5464 } 5465 5466 static void mlx5e_get_queue_stats_rx(struct net_device *dev, int i, 5467 struct netdev_queue_stats_rx *stats) 5468 { 5469 struct mlx5e_priv *priv = netdev_priv(dev); 5470 struct mlx5e_channel_stats *channel_stats; 5471 struct mlx5e_rq_stats *xskrq_stats; 5472 struct mlx5e_rq_stats *rq_stats; 5473 5474 if (mlx5e_is_uplink_rep(priv) || !priv->stats_nch) 5475 return; 5476 5477 channel_stats = priv->channel_stats[i]; 5478 xskrq_stats = &channel_stats->xskrq; 5479 rq_stats = &channel_stats->rq; 5480 5481 stats->packets = rq_stats->packets + xskrq_stats->packets; 5482 stats->bytes = rq_stats->bytes + xskrq_stats->bytes; 5483 stats->alloc_fail = rq_stats->buff_alloc_err + 5484 xskrq_stats->buff_alloc_err; 5485 } 5486 5487 static void mlx5e_get_queue_stats_tx(struct net_device *dev, int i, 5488 struct netdev_queue_stats_tx *stats) 5489 { 5490 struct mlx5e_priv *priv = netdev_priv(dev); 5491 struct mlx5e_sq_stats *sq_stats; 5492 5493 if (!priv->stats_nch) 5494 return; 5495 5496 /* no special case needed for ptp htb etc since txq2sq_stats is kept up 5497 * to date for active sq_stats, otherwise get_base_stats takes care of 5498 * inactive sqs. 5499 */ 5500 sq_stats = priv->txq2sq_stats[i]; 5501 stats->packets = sq_stats->packets; 5502 stats->bytes = sq_stats->bytes; 5503 } 5504 5505 static void mlx5e_get_base_stats(struct net_device *dev, 5506 struct netdev_queue_stats_rx *rx, 5507 struct netdev_queue_stats_tx *tx) 5508 { 5509 struct mlx5e_priv *priv = netdev_priv(dev); 5510 struct mlx5e_ptp *ptp_channel; 5511 int i, tc; 5512 5513 if (!mlx5e_is_uplink_rep(priv)) { 5514 rx->packets = 0; 5515 rx->bytes = 0; 5516 rx->alloc_fail = 0; 5517 5518 for (i = priv->channels.params.num_channels; i < priv->stats_nch; i++) { 5519 struct netdev_queue_stats_rx rx_i = {0}; 5520 5521 mlx5e_get_queue_stats_rx(dev, i, &rx_i); 5522 5523 rx->packets += rx_i.packets; 5524 rx->bytes += rx_i.bytes; 5525 rx->alloc_fail += rx_i.alloc_fail; 5526 } 5527 5528 /* always report PTP RX stats from base as there is no 5529 * corresponding channel to report them under in 5530 * mlx5e_get_queue_stats_rx. 5531 */ 5532 if (priv->rx_ptp_opened) { 5533 struct mlx5e_rq_stats *rq_stats = &priv->ptp_stats.rq; 5534 5535 rx->packets += rq_stats->packets; 5536 rx->bytes += rq_stats->bytes; 5537 } 5538 } 5539 5540 tx->packets = 0; 5541 tx->bytes = 0; 5542 5543 for (i = 0; i < priv->stats_nch; i++) { 5544 struct mlx5e_channel_stats *channel_stats = priv->channel_stats[i]; 5545 5546 /* handle two cases: 5547 * 5548 * 1. channels which are active. In this case, 5549 * report only deactivated TCs on these channels. 5550 * 5551 * 2. channels which were deactivated 5552 * (i > priv->channels.params.num_channels) 5553 * must have all of their TCs [0 .. priv->max_opened_tc) 5554 * examined because deactivated channels will not be in the 5555 * range of [0..real_num_tx_queues) and will not have their 5556 * stats reported by mlx5e_get_queue_stats_tx. 5557 */ 5558 if (i < priv->channels.params.num_channels) 5559 tc = mlx5e_get_dcb_num_tc(&priv->channels.params); 5560 else 5561 tc = 0; 5562 5563 for (; tc < priv->max_opened_tc; tc++) { 5564 struct mlx5e_sq_stats *sq_stats = &channel_stats->sq[tc]; 5565 5566 tx->packets += sq_stats->packets; 5567 tx->bytes += sq_stats->bytes; 5568 } 5569 } 5570 5571 /* if PTP TX was opened at some point and has since either: 5572 * - been shutdown and set to NULL, or 5573 * - simply disabled (bit unset) 5574 * 5575 * report stats directly from the ptp_stats structures as these queues 5576 * are now unavailable and there is no txq index to retrieve these 5577 * stats via calls to mlx5e_get_queue_stats_tx. 5578 */ 5579 ptp_channel = priv->channels.ptp; 5580 if (priv->tx_ptp_opened && (!ptp_channel || !test_bit(MLX5E_PTP_STATE_TX, ptp_channel->state))) { 5581 for (tc = 0; tc < priv->max_opened_tc; tc++) { 5582 struct mlx5e_sq_stats *sq_stats = &priv->ptp_stats.sq[tc]; 5583 5584 tx->packets += sq_stats->packets; 5585 tx->bytes += sq_stats->bytes; 5586 } 5587 } 5588 } 5589 5590 static const struct netdev_stat_ops mlx5e_stat_ops = { 5591 .get_queue_stats_rx = mlx5e_get_queue_stats_rx, 5592 .get_queue_stats_tx = mlx5e_get_queue_stats_tx, 5593 .get_base_stats = mlx5e_get_base_stats, 5594 }; 5595 5596 struct mlx5_qmgmt_data { 5597 struct mlx5e_channel *c; 5598 struct mlx5e_channel_param cparam; 5599 }; 5600 5601 static int mlx5e_queue_mem_alloc(struct net_device *dev, void *newq, 5602 int queue_index) 5603 { 5604 struct mlx5_qmgmt_data *new = (struct mlx5_qmgmt_data *)newq; 5605 struct mlx5e_priv *priv = netdev_priv(dev); 5606 struct mlx5e_channels *chs = &priv->channels; 5607 struct mlx5e_params params = chs->params; 5608 struct mlx5_core_dev *mdev; 5609 int err; 5610 5611 mutex_lock(&priv->state_lock); 5612 if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { 5613 err = -ENODEV; 5614 goto unlock; 5615 } 5616 5617 if (queue_index >= chs->num) { 5618 err = -ERANGE; 5619 goto unlock; 5620 } 5621 5622 if (MLX5E_GET_PFLAG(&chs->params, MLX5E_PFLAG_TX_PORT_TS) || 5623 chs->params.ptp_rx || 5624 chs->params.xdp_prog || 5625 priv->htb) { 5626 netdev_err(priv->netdev, 5627 "Cloning channels with Port/rx PTP, XDP or HTB is not supported\n"); 5628 err = -EOPNOTSUPP; 5629 goto unlock; 5630 } 5631 5632 mdev = mlx5_sd_ch_ix_get_dev(priv->mdev, queue_index); 5633 err = mlx5e_build_channel_param(mdev, ¶ms, &new->cparam); 5634 if (err) 5635 goto unlock; 5636 5637 err = mlx5e_open_channel(priv, queue_index, ¶ms, NULL, &new->c); 5638 unlock: 5639 mutex_unlock(&priv->state_lock); 5640 return err; 5641 } 5642 5643 static void mlx5e_queue_mem_free(struct net_device *dev, void *mem) 5644 { 5645 struct mlx5_qmgmt_data *data = (struct mlx5_qmgmt_data *)mem; 5646 5647 /* not supposed to happen since mlx5e_queue_start never fails 5648 * but this is how this should be implemented just in case 5649 */ 5650 if (data->c) 5651 mlx5e_close_channel(data->c); 5652 } 5653 5654 static int mlx5e_queue_stop(struct net_device *dev, void *oldq, int queue_index) 5655 { 5656 /* In mlx5 a txq cannot be simply stopped in isolation, only restarted. 5657 * mlx5e_queue_start does not fail, we stop the old queue there. 5658 * TODO: Improve this. 5659 */ 5660 return 0; 5661 } 5662 5663 static int mlx5e_queue_start(struct net_device *dev, void *newq, 5664 int queue_index) 5665 { 5666 struct mlx5_qmgmt_data *new = (struct mlx5_qmgmt_data *)newq; 5667 struct mlx5e_priv *priv = netdev_priv(dev); 5668 struct mlx5e_channel *old; 5669 5670 mutex_lock(&priv->state_lock); 5671 5672 /* stop and close the old */ 5673 old = priv->channels.c[queue_index]; 5674 mlx5e_deactivate_priv_channels(priv); 5675 /* close old before activating new, to avoid napi conflict */ 5676 mlx5e_close_channel(old); 5677 5678 /* start the new */ 5679 priv->channels.c[queue_index] = new->c; 5680 mlx5e_activate_priv_channels(priv); 5681 mutex_unlock(&priv->state_lock); 5682 return 0; 5683 } 5684 5685 static struct device *mlx5e_queue_get_dma_dev(struct net_device *dev, 5686 int queue_index) 5687 { 5688 struct mlx5e_priv *priv = netdev_priv(dev); 5689 struct mlx5e_channels *channels; 5690 struct device *pdev = NULL; 5691 struct mlx5e_channel *ch; 5692 5693 channels = &priv->channels; 5694 5695 mutex_lock(&priv->state_lock); 5696 5697 if (queue_index >= channels->num) 5698 goto out; 5699 5700 ch = channels->c[queue_index]; 5701 pdev = ch->pdev; 5702 out: 5703 mutex_unlock(&priv->state_lock); 5704 5705 return pdev; 5706 } 5707 5708 static const struct netdev_queue_mgmt_ops mlx5e_queue_mgmt_ops = { 5709 .ndo_queue_mem_size = sizeof(struct mlx5_qmgmt_data), 5710 .ndo_queue_mem_alloc = mlx5e_queue_mem_alloc, 5711 .ndo_queue_mem_free = mlx5e_queue_mem_free, 5712 .ndo_queue_start = mlx5e_queue_start, 5713 .ndo_queue_stop = mlx5e_queue_stop, 5714 .ndo_queue_get_dma_dev = mlx5e_queue_get_dma_dev, 5715 }; 5716 5717 static void mlx5e_build_nic_netdev(struct net_device *netdev) 5718 { 5719 struct mlx5e_priv *priv = netdev_priv(netdev); 5720 struct mlx5_core_dev *mdev = priv->mdev; 5721 bool fcs_supported; 5722 bool fcs_enabled; 5723 5724 SET_NETDEV_DEV(netdev, mdev->device); 5725 5726 netdev->netdev_ops = &mlx5e_netdev_ops; 5727 netdev->queue_mgmt_ops = &mlx5e_queue_mgmt_ops; 5728 netdev->xdp_metadata_ops = &mlx5e_xdp_metadata_ops; 5729 netdev->xsk_tx_metadata_ops = &mlx5e_xsk_tx_metadata_ops; 5730 netdev->request_ops_lock = true; 5731 netdev_lockdep_set_classes(netdev); 5732 5733 mlx5e_dcbnl_build_netdev(netdev); 5734 5735 netdev->watchdog_timeo = 15 * HZ; 5736 5737 netdev->stat_ops = &mlx5e_stat_ops; 5738 netdev->ethtool_ops = &mlx5e_ethtool_ops; 5739 5740 netdev->vlan_features |= NETIF_F_SG; 5741 netdev->vlan_features |= NETIF_F_HW_CSUM; 5742 netdev->vlan_features |= NETIF_F_HW_MACSEC; 5743 netdev->vlan_features |= NETIF_F_GRO; 5744 netdev->vlan_features |= NETIF_F_TSO; 5745 netdev->vlan_features |= NETIF_F_TSO6; 5746 netdev->vlan_features |= NETIF_F_RXCSUM; 5747 netdev->vlan_features |= NETIF_F_RXHASH; 5748 netdev->vlan_features |= NETIF_F_GSO_PARTIAL; 5749 5750 netdev->mpls_features |= NETIF_F_SG; 5751 netdev->mpls_features |= NETIF_F_HW_CSUM; 5752 netdev->mpls_features |= NETIF_F_TSO; 5753 netdev->mpls_features |= NETIF_F_TSO6; 5754 5755 netdev->hw_enc_features |= NETIF_F_HW_VLAN_CTAG_TX; 5756 netdev->hw_enc_features |= NETIF_F_HW_VLAN_CTAG_RX; 5757 5758 /* Tunneled LRO is not supported in the driver, and the same RQs are 5759 * shared between inner and outer TIRs, so the driver can't disable LRO 5760 * for inner TIRs while having it enabled for outer TIRs. Due to this, 5761 * block LRO altogether if the firmware declares tunneled LRO support. 5762 */ 5763 if (!!MLX5_CAP_ETH(mdev, lro_cap) && 5764 !MLX5_CAP_ETH(mdev, tunnel_lro_vxlan) && 5765 !MLX5_CAP_ETH(mdev, tunnel_lro_gre) && 5766 mlx5e_check_fragmented_striding_rq_cap(mdev, PAGE_SHIFT, 5767 MLX5E_MPWRQ_UMR_MODE_ALIGNED)) 5768 netdev->vlan_features |= NETIF_F_LRO; 5769 5770 if (mlx5e_hw_gro_supported(mdev) && 5771 mlx5e_check_fragmented_striding_rq_cap(mdev, PAGE_SHIFT, 5772 MLX5E_MPWRQ_UMR_MODE_ALIGNED)) 5773 netdev->vlan_features |= NETIF_F_GRO_HW; 5774 5775 netdev->hw_features = netdev->vlan_features; 5776 netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX; 5777 netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_RX; 5778 netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER; 5779 netdev->hw_features |= NETIF_F_HW_VLAN_STAG_TX; 5780 5781 if (mlx5e_tunnel_any_tx_proto_supported(mdev)) { 5782 netdev->hw_enc_features |= NETIF_F_HW_CSUM; 5783 netdev->hw_enc_features |= NETIF_F_TSO; 5784 netdev->hw_enc_features |= NETIF_F_TSO6; 5785 netdev->hw_enc_features |= NETIF_F_GSO_PARTIAL; 5786 } 5787 5788 if (mlx5_vxlan_allowed(mdev->vxlan) || mlx5_geneve_tx_allowed(mdev)) { 5789 netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL | 5790 NETIF_F_GSO_UDP_TUNNEL_CSUM; 5791 netdev->hw_enc_features |= NETIF_F_GSO_UDP_TUNNEL | 5792 NETIF_F_GSO_UDP_TUNNEL_CSUM; 5793 netdev->gso_partial_features = NETIF_F_GSO_UDP_TUNNEL_CSUM; 5794 netdev->vlan_features |= NETIF_F_GSO_UDP_TUNNEL | 5795 NETIF_F_GSO_UDP_TUNNEL_CSUM; 5796 } 5797 5798 if (mlx5e_tunnel_proto_supported_tx(mdev, IPPROTO_GRE)) { 5799 netdev->hw_features |= NETIF_F_GSO_GRE | 5800 NETIF_F_GSO_GRE_CSUM; 5801 netdev->hw_enc_features |= NETIF_F_GSO_GRE | 5802 NETIF_F_GSO_GRE_CSUM; 5803 netdev->gso_partial_features |= NETIF_F_GSO_GRE | 5804 NETIF_F_GSO_GRE_CSUM; 5805 } 5806 5807 if (mlx5e_tunnel_proto_supported_tx(mdev, IPPROTO_IPIP)) { 5808 netdev->hw_features |= NETIF_F_GSO_IPXIP4 | 5809 NETIF_F_GSO_IPXIP6; 5810 netdev->hw_enc_features |= NETIF_F_GSO_IPXIP4 | 5811 NETIF_F_GSO_IPXIP6; 5812 netdev->gso_partial_features |= NETIF_F_GSO_IPXIP4 | 5813 NETIF_F_GSO_IPXIP6; 5814 } 5815 5816 netdev->gso_partial_features |= NETIF_F_GSO_UDP_L4; 5817 netdev->hw_features |= NETIF_F_GSO_UDP_L4; 5818 5819 mlx5_query_port_fcs(mdev, &fcs_supported, &fcs_enabled); 5820 5821 if (fcs_supported) 5822 netdev->hw_features |= NETIF_F_RXALL; 5823 5824 if (MLX5_CAP_ETH(mdev, scatter_fcs)) 5825 netdev->hw_features |= NETIF_F_RXFCS; 5826 5827 if (mlx5_qos_is_supported(mdev)) 5828 netdev->hw_features |= NETIF_F_HW_TC; 5829 5830 netdev->features = netdev->hw_features; 5831 5832 /* Defaults */ 5833 if (fcs_enabled) 5834 netdev->features &= ~NETIF_F_RXALL; 5835 netdev->features &= ~NETIF_F_LRO; 5836 netdev->features &= ~NETIF_F_GRO_HW; 5837 netdev->features &= ~NETIF_F_RXFCS; 5838 5839 #define FT_CAP(f) MLX5_CAP_FLOWTABLE(mdev, flow_table_properties_nic_receive.f) 5840 if (FT_CAP(flow_modify_en) && 5841 FT_CAP(modify_root) && 5842 FT_CAP(identified_miss_table_mode) && 5843 FT_CAP(flow_table_modify)) { 5844 #if IS_ENABLED(CONFIG_MLX5_CLS_ACT) 5845 netdev->hw_features |= NETIF_F_HW_TC; 5846 #endif 5847 #if IS_ENABLED(CONFIG_MLX5_EN_ARFS) 5848 netdev->hw_features |= NETIF_F_NTUPLE; 5849 #elif IS_ENABLED(CONFIG_MLX5_EN_RXNFC) 5850 netdev->features |= NETIF_F_NTUPLE; 5851 #endif 5852 } 5853 5854 netdev->features |= NETIF_F_HIGHDMA; 5855 netdev->features |= NETIF_F_HW_VLAN_STAG_FILTER; 5856 5857 netdev->priv_flags |= IFF_UNICAST_FLT; 5858 5859 netdev->netmem_tx = true; 5860 5861 netif_set_tso_max_size(netdev, GSO_MAX_SIZE); 5862 mlx5e_set_xdp_feature(priv); 5863 mlx5e_set_netdev_dev_addr(netdev); 5864 mlx5e_macsec_build_netdev(priv); 5865 mlx5e_ipsec_build_netdev(priv); 5866 mlx5e_ktls_build_netdev(priv); 5867 } 5868 5869 void mlx5e_create_q_counters(struct mlx5e_priv *priv) 5870 { 5871 u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)] = {}; 5872 u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)] = {}; 5873 struct mlx5_core_dev *mdev = priv->mdev; 5874 struct mlx5_core_dev *pos; 5875 int err, i; 5876 5877 MLX5_SET(alloc_q_counter_in, in, opcode, MLX5_CMD_OP_ALLOC_Q_COUNTER); 5878 5879 mlx5_sd_for_each_dev(i, mdev, pos) { 5880 err = mlx5_cmd_exec_inout(pos, alloc_q_counter, in, out); 5881 if (!err) 5882 priv->q_counter[i] = 5883 MLX5_GET(alloc_q_counter_out, out, counter_set_id); 5884 } 5885 5886 err = mlx5_cmd_exec_inout(mdev, alloc_q_counter, in, out); 5887 if (!err) 5888 priv->drop_rq_q_counter = 5889 MLX5_GET(alloc_q_counter_out, out, counter_set_id); 5890 } 5891 5892 void mlx5e_destroy_q_counters(struct mlx5e_priv *priv) 5893 { 5894 u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {}; 5895 struct mlx5_core_dev *pos; 5896 int i; 5897 5898 MLX5_SET(dealloc_q_counter_in, in, opcode, 5899 MLX5_CMD_OP_DEALLOC_Q_COUNTER); 5900 mlx5_sd_for_each_dev(i, priv->mdev, pos) { 5901 if (priv->q_counter[i]) { 5902 MLX5_SET(dealloc_q_counter_in, in, counter_set_id, 5903 priv->q_counter[i]); 5904 mlx5_cmd_exec_in(pos, dealloc_q_counter, in); 5905 } 5906 } 5907 5908 if (priv->drop_rq_q_counter) { 5909 MLX5_SET(dealloc_q_counter_in, in, counter_set_id, 5910 priv->drop_rq_q_counter); 5911 mlx5_cmd_exec_in(priv->mdev, dealloc_q_counter, in); 5912 } 5913 } 5914 5915 static int mlx5e_nic_init(struct mlx5_core_dev *mdev, 5916 struct net_device *netdev) 5917 { 5918 const bool take_rtnl = netdev->reg_state == NETREG_REGISTERED; 5919 struct mlx5e_priv *priv = netdev_priv(netdev); 5920 struct mlx5e_flow_steering *fs; 5921 int err; 5922 5923 mlx5e_build_nic_params(priv, &priv->xsk, netdev->mtu); 5924 mlx5e_vxlan_set_netdev_info(priv); 5925 5926 mlx5e_timestamp_init(priv); 5927 5928 priv->dfs_root = debugfs_create_dir("nic", 5929 mlx5_debugfs_get_dev_root(mdev)); 5930 5931 fs = mlx5e_fs_init(priv->profile, mdev, 5932 !test_bit(MLX5E_STATE_DESTROYING, &priv->state), 5933 priv->dfs_root); 5934 if (!fs) { 5935 err = -ENOMEM; 5936 mlx5_core_err(mdev, "FS initialization failed, %d\n", err); 5937 debugfs_remove_recursive(priv->dfs_root); 5938 return err; 5939 } 5940 priv->fs = fs; 5941 5942 err = mlx5e_psp_init(priv); 5943 if (err) 5944 mlx5_core_err(mdev, "PSP initialization failed, %d\n", err); 5945 5946 err = mlx5e_ktls_init(priv); 5947 if (err) 5948 mlx5_core_err(mdev, "TLS initialization failed, %d\n", err); 5949 5950 mlx5e_health_create_reporters(priv); 5951 5952 /* If netdev is already registered (e.g. move from uplink to nic profile), 5953 * RTNL lock must be held before triggering netdev notifiers. 5954 */ 5955 if (take_rtnl) 5956 rtnl_lock(); 5957 5958 mlx5e_psp_register(priv); 5959 /* update XDP supported features */ 5960 mlx5e_set_xdp_feature(priv); 5961 5962 if (take_rtnl) 5963 rtnl_unlock(); 5964 5965 return 0; 5966 } 5967 5968 static void mlx5e_nic_cleanup(struct mlx5e_priv *priv) 5969 { 5970 mlx5e_health_destroy_reporters(priv); 5971 mlx5e_psp_unregister(priv); 5972 mlx5e_ktls_cleanup(priv); 5973 mlx5e_psp_cleanup(priv); 5974 mlx5e_fs_cleanup(priv->fs); 5975 debugfs_remove_recursive(priv->dfs_root); 5976 priv->fs = NULL; 5977 } 5978 5979 static int mlx5e_init_nic_rx(struct mlx5e_priv *priv) 5980 { 5981 struct mlx5_core_dev *mdev = priv->mdev; 5982 enum mlx5e_rx_res_features features; 5983 int err; 5984 5985 mlx5e_create_q_counters(priv); 5986 5987 err = mlx5e_open_drop_rq(priv, &priv->drop_rq); 5988 if (err) { 5989 mlx5_core_err(mdev, "open drop rq failed, %d\n", err); 5990 goto err_destroy_q_counters; 5991 } 5992 5993 features = MLX5E_RX_RES_FEATURE_PTP; 5994 if (mlx5_tunnel_inner_ft_supported(mdev)) 5995 features |= MLX5E_RX_RES_FEATURE_INNER_FT; 5996 if (mlx5_get_sd(priv->mdev)) 5997 features |= MLX5E_RX_RES_FEATURE_MULTI_VHCA; 5998 5999 priv->rx_res = mlx5e_rx_res_create(priv->mdev, features, priv->max_nch, priv->drop_rq.rqn, 6000 &priv->channels.params.packet_merge, 6001 priv->channels.params.num_channels); 6002 if (IS_ERR(priv->rx_res)) { 6003 err = PTR_ERR(priv->rx_res); 6004 priv->rx_res = NULL; 6005 mlx5_core_err(mdev, "create rx resources failed, %d\n", err); 6006 goto err_close_drop_rq; 6007 } 6008 6009 err = mlx5e_create_flow_steering(priv->fs, priv->rx_res, priv->profile, 6010 priv->netdev); 6011 if (err) { 6012 mlx5_core_warn(mdev, "create flow steering failed, %d\n", err); 6013 goto err_destroy_rx_res; 6014 } 6015 6016 err = mlx5e_tc_nic_init(priv); 6017 if (err) 6018 goto err_destroy_flow_steering; 6019 6020 err = mlx5e_accel_init_rx(priv); 6021 if (err) 6022 goto err_tc_nic_cleanup; 6023 6024 #ifdef CONFIG_MLX5_EN_ARFS 6025 priv->netdev->rx_cpu_rmap = mlx5_eq_table_get_rmap(priv->mdev); 6026 #endif 6027 6028 return 0; 6029 6030 err_tc_nic_cleanup: 6031 mlx5e_tc_nic_cleanup(priv); 6032 err_destroy_flow_steering: 6033 mlx5e_destroy_flow_steering(priv->fs, mlx5e_fs_has_arfs(priv->netdev), 6034 priv->profile); 6035 err_destroy_rx_res: 6036 mlx5e_rx_res_destroy(priv->rx_res); 6037 priv->rx_res = NULL; 6038 err_close_drop_rq: 6039 mlx5e_close_drop_rq(&priv->drop_rq); 6040 err_destroy_q_counters: 6041 mlx5e_destroy_q_counters(priv); 6042 return err; 6043 } 6044 6045 static void mlx5e_cleanup_nic_rx(struct mlx5e_priv *priv) 6046 { 6047 mlx5e_accel_cleanup_rx(priv); 6048 mlx5e_tc_nic_cleanup(priv); 6049 mlx5e_destroy_flow_steering(priv->fs, mlx5e_fs_has_arfs(priv->netdev), 6050 priv->profile); 6051 mlx5e_rx_res_destroy(priv->rx_res); 6052 priv->rx_res = NULL; 6053 mlx5e_close_drop_rq(&priv->drop_rq); 6054 mlx5e_destroy_q_counters(priv); 6055 } 6056 6057 static void mlx5e_set_mqprio_rl(struct mlx5e_priv *priv) 6058 { 6059 struct mlx5e_params *params; 6060 struct mlx5e_mqprio_rl *rl; 6061 6062 params = &priv->channels.params; 6063 if (params->mqprio.mode != TC_MQPRIO_MODE_CHANNEL) 6064 return; 6065 6066 rl = mlx5e_mqprio_rl_create(priv->mdev, params->mqprio.num_tc, 6067 params->mqprio.channel.max_rate); 6068 if (IS_ERR(rl)) 6069 rl = NULL; 6070 priv->mqprio_rl = rl; 6071 mlx5e_mqprio_rl_update_params(params, rl); 6072 } 6073 6074 static int mlx5e_init_nic_tx(struct mlx5e_priv *priv) 6075 { 6076 int err; 6077 6078 err = mlx5e_accel_init_tx(priv); 6079 if (err) 6080 return err; 6081 6082 mlx5e_set_mqprio_rl(priv); 6083 mlx5e_dcbnl_initialize(priv); 6084 return 0; 6085 } 6086 6087 static void mlx5e_nic_enable(struct mlx5e_priv *priv) 6088 { 6089 struct net_device *netdev = priv->netdev; 6090 struct mlx5_core_dev *mdev = priv->mdev; 6091 int err; 6092 6093 mlx5e_fs_init_l2_addr(priv->fs, netdev); 6094 mlx5e_ipsec_init(priv); 6095 6096 err = mlx5e_macsec_init(priv); 6097 if (err) 6098 mlx5_core_err(mdev, "MACsec initialization failed, %d\n", err); 6099 6100 /* Marking the link as currently not needed by the Driver */ 6101 if (!netif_running(netdev)) 6102 mlx5e_modify_admin_state(mdev, MLX5_PORT_DOWN); 6103 6104 mlx5e_set_netdev_mtu_boundaries(priv); 6105 mlx5e_set_dev_port_mtu(priv); 6106 6107 mlx5_lag_add_netdev(mdev, netdev); 6108 6109 mlx5e_enable_async_events(priv); 6110 mlx5e_enable_blocking_events(priv); 6111 if (mlx5e_monitor_counter_supported(priv)) 6112 mlx5e_monitor_counter_init(priv); 6113 6114 mlx5e_pcie_cong_event_init(priv); 6115 mlx5e_hv_vhca_stats_create(priv); 6116 if (netdev->reg_state != NETREG_REGISTERED) 6117 return; 6118 mlx5e_dcbnl_init_app(priv); 6119 6120 mlx5e_nic_set_rx_mode(priv); 6121 6122 rtnl_lock(); 6123 netdev_lock(netdev); 6124 if (netif_running(netdev)) 6125 mlx5e_open(netdev); 6126 udp_tunnel_nic_reset_ntf(priv->netdev); 6127 netdev_unlock(netdev); 6128 netif_device_attach(netdev); 6129 rtnl_unlock(); 6130 } 6131 6132 static void mlx5e_nic_disable(struct mlx5e_priv *priv) 6133 { 6134 struct mlx5_core_dev *mdev = priv->mdev; 6135 6136 if (priv->netdev->reg_state == NETREG_REGISTERED) 6137 mlx5e_dcbnl_delete_app(priv); 6138 6139 rtnl_lock(); 6140 netdev_lock(priv->netdev); 6141 if (netif_running(priv->netdev)) 6142 mlx5e_close(priv->netdev); 6143 netif_device_detach(priv->netdev); 6144 if (priv->en_trap) { 6145 mlx5e_deactivate_trap(priv); 6146 mlx5e_close_trap(priv->en_trap); 6147 priv->en_trap = NULL; 6148 } 6149 netdev_unlock(priv->netdev); 6150 rtnl_unlock(); 6151 6152 mlx5e_nic_set_rx_mode(priv); 6153 6154 mlx5e_pcie_cong_event_cleanup(priv); 6155 mlx5e_hv_vhca_stats_destroy(priv); 6156 if (mlx5e_monitor_counter_supported(priv)) 6157 mlx5e_monitor_counter_cleanup(priv); 6158 6159 mlx5e_ipsec_disable_events(priv); 6160 mlx5e_disable_blocking_events(priv); 6161 mlx5e_disable_async_events(priv); 6162 mlx5_lag_remove_netdev(mdev, priv->netdev); 6163 mlx5_vxlan_reset_to_default(mdev->vxlan); 6164 mlx5e_macsec_cleanup(priv); 6165 mlx5e_ipsec_cleanup(priv); 6166 } 6167 6168 static int mlx5e_update_nic_rx(struct mlx5e_priv *priv) 6169 { 6170 return mlx5e_refresh_tirs(priv->mdev, false, false); 6171 } 6172 6173 static const struct mlx5e_profile mlx5e_nic_profile = { 6174 .init = mlx5e_nic_init, 6175 .cleanup = mlx5e_nic_cleanup, 6176 .init_rx = mlx5e_init_nic_rx, 6177 .cleanup_rx = mlx5e_cleanup_nic_rx, 6178 .init_tx = mlx5e_init_nic_tx, 6179 .cleanup_tx = mlx5e_cleanup_nic_tx, 6180 .enable = mlx5e_nic_enable, 6181 .disable = mlx5e_nic_disable, 6182 .update_rx = mlx5e_update_nic_rx, 6183 .update_stats = mlx5e_stats_update_ndo_stats, 6184 .update_carrier = mlx5e_update_carrier, 6185 .rx_handlers = &mlx5e_rx_handlers_nic, 6186 .max_tc = MLX5_MAX_NUM_TC, 6187 .stats_grps = mlx5e_nic_stats_grps, 6188 .stats_grps_num = mlx5e_nic_stats_grps_num, 6189 .features = BIT(MLX5E_PROFILE_FEATURE_PTP_RX) | 6190 BIT(MLX5E_PROFILE_FEATURE_PTP_TX) | 6191 BIT(MLX5E_PROFILE_FEATURE_QOS_HTB) | 6192 BIT(MLX5E_PROFILE_FEATURE_FS_VLAN) | 6193 BIT(MLX5E_PROFILE_FEATURE_FS_TC), 6194 }; 6195 6196 static int mlx5e_profile_max_num_channels(struct mlx5_core_dev *mdev, 6197 const struct mlx5e_profile *profile) 6198 { 6199 int nch; 6200 6201 nch = mlx5e_get_max_num_channels(mdev); 6202 6203 if (profile->max_nch_limit) 6204 nch = min_t(int, nch, profile->max_nch_limit(mdev)); 6205 return nch; 6206 } 6207 6208 static unsigned int 6209 mlx5e_calc_max_nch(struct mlx5_core_dev *mdev, struct net_device *netdev, 6210 const struct mlx5e_profile *profile) 6211 6212 { 6213 unsigned int max_nch, tmp; 6214 6215 /* core resources */ 6216 max_nch = mlx5e_profile_max_num_channels(mdev, profile); 6217 6218 /* netdev rx queues */ 6219 max_nch = min_t(unsigned int, max_nch, netdev->num_rx_queues); 6220 6221 /* netdev tx queues */ 6222 tmp = netdev->num_tx_queues; 6223 if (mlx5_qos_is_supported(mdev)) 6224 tmp -= mlx5e_qos_max_leaf_nodes(mdev); 6225 if (MLX5_CAP_GEN(mdev, ts_cqe_to_dest_cqn)) 6226 tmp -= profile->max_tc; 6227 tmp = tmp / profile->max_tc; 6228 max_nch = min_t(unsigned int, max_nch, tmp); 6229 6230 return max_nch; 6231 } 6232 6233 int mlx5e_get_pf_num_tirs(struct mlx5_core_dev *mdev) 6234 { 6235 /* Indirect TIRS: 2 sets of TTCs (inner + outer steering) 6236 * and 1 set of direct TIRS 6237 */ 6238 return 2 * MLX5E_NUM_INDIR_TIRS 6239 + mlx5e_profile_max_num_channels(mdev, &mlx5e_nic_profile); 6240 } 6241 6242 void mlx5e_set_rx_mode_work(struct work_struct *work) 6243 { 6244 struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv, 6245 set_rx_mode_work); 6246 6247 return mlx5e_fs_set_rx_mode_work(priv->fs, priv->netdev); 6248 } 6249 6250 /* mlx5e generic netdev management API (move to en_common.c) */ 6251 int mlx5e_priv_init(struct mlx5e_priv *priv, 6252 const struct mlx5e_profile *profile, 6253 struct net_device *netdev, 6254 struct mlx5_core_dev *mdev) 6255 { 6256 int nch, num_txqs, node; 6257 int err; 6258 6259 num_txqs = netdev->num_tx_queues; 6260 nch = mlx5e_calc_max_nch(mdev, netdev, profile); 6261 node = dev_to_node(mlx5_core_dma_dev(mdev)); 6262 6263 /* priv init */ 6264 priv->mdev = mdev; 6265 priv->netdev = netdev; 6266 priv->max_nch = nch; 6267 priv->max_opened_tc = 1; 6268 6269 if (!alloc_cpumask_var(&priv->scratchpad.cpumask, GFP_KERNEL)) 6270 return -ENOMEM; 6271 6272 mutex_init(&priv->state_lock); 6273 6274 err = mlx5e_selq_init(&priv->selq, &priv->state_lock); 6275 if (err) 6276 goto err_free_cpumask; 6277 6278 INIT_WORK(&priv->update_carrier_work, mlx5e_update_carrier_work); 6279 INIT_WORK(&priv->set_rx_mode_work, mlx5e_set_rx_mode_work); 6280 INIT_WORK(&priv->tx_timeout_work, mlx5e_tx_timeout_work); 6281 INIT_WORK(&priv->update_stats_work, mlx5e_update_stats_work); 6282 6283 priv->wq = create_singlethread_workqueue("mlx5e"); 6284 if (!priv->wq) 6285 goto err_free_selq; 6286 6287 priv->txq2sq = kcalloc_node(num_txqs, sizeof(*priv->txq2sq), GFP_KERNEL, node); 6288 if (!priv->txq2sq) 6289 goto err_destroy_workqueue; 6290 6291 priv->txq2sq_stats = kcalloc_node(num_txqs, sizeof(*priv->txq2sq_stats), GFP_KERNEL, node); 6292 if (!priv->txq2sq_stats) 6293 goto err_free_txq2sq; 6294 6295 priv->tx_rates = kcalloc_node(num_txqs, sizeof(*priv->tx_rates), GFP_KERNEL, node); 6296 if (!priv->tx_rates) 6297 goto err_free_txq2sq_stats; 6298 6299 priv->channel_stats = 6300 kcalloc_node(nch, sizeof(*priv->channel_stats), GFP_KERNEL, node); 6301 if (!priv->channel_stats) 6302 goto err_free_tx_rates; 6303 6304 priv->fec_ranges = kcalloc(ETHTOOL_FEC_HIST_MAX, 6305 sizeof(*priv->fec_ranges), GFP_KERNEL); 6306 if (!priv->fec_ranges) 6307 goto err_free_channel_stats; 6308 6309 return 0; 6310 6311 err_free_channel_stats: 6312 kfree(priv->channel_stats); 6313 err_free_tx_rates: 6314 kfree(priv->tx_rates); 6315 err_free_txq2sq_stats: 6316 kfree(priv->txq2sq_stats); 6317 err_free_txq2sq: 6318 kfree(priv->txq2sq); 6319 err_destroy_workqueue: 6320 destroy_workqueue(priv->wq); 6321 err_free_selq: 6322 mlx5e_selq_cleanup(&priv->selq); 6323 err_free_cpumask: 6324 free_cpumask_var(priv->scratchpad.cpumask); 6325 return -ENOMEM; 6326 } 6327 6328 void mlx5e_priv_cleanup(struct mlx5e_priv *priv) 6329 { 6330 bool destroying = test_bit(MLX5E_STATE_DESTROYING, &priv->state); 6331 int i; 6332 6333 /* bail if change profile failed and also rollback failed */ 6334 if (!priv->mdev) 6335 return; 6336 6337 kfree(priv->fec_ranges); 6338 for (i = 0; i < priv->stats_nch; i++) 6339 kvfree(priv->channel_stats[i]); 6340 kfree(priv->channel_stats); 6341 kfree(priv->tx_rates); 6342 kfree(priv->txq2sq_stats); 6343 kfree(priv->txq2sq); 6344 destroy_workqueue(priv->wq); 6345 mlx5e_selq_cleanup(&priv->selq); 6346 free_cpumask_var(priv->scratchpad.cpumask); 6347 6348 for (i = 0; i < priv->htb_max_qos_sqs; i++) 6349 kfree(priv->htb_qos_sq_stats[i]); 6350 kvfree(priv->htb_qos_sq_stats); 6351 6352 if (priv->mqprio_rl) { 6353 mlx5e_mqprio_rl_cleanup(priv->mqprio_rl); 6354 mlx5e_mqprio_rl_free(priv->mqprio_rl); 6355 } 6356 6357 memset(priv, 0, sizeof(*priv)); 6358 if (destroying) /* restore destroying bit, to allow unload */ 6359 set_bit(MLX5E_STATE_DESTROYING, &priv->state); 6360 } 6361 6362 static unsigned int mlx5e_get_max_num_txqs(struct mlx5_core_dev *mdev, 6363 const struct mlx5e_profile *profile) 6364 { 6365 unsigned int nch, ptp_txqs, qos_txqs; 6366 6367 nch = mlx5e_profile_max_num_channels(mdev, profile); 6368 6369 ptp_txqs = MLX5_CAP_GEN(mdev, ts_cqe_to_dest_cqn) && 6370 mlx5e_profile_feature_cap(profile, PTP_TX) ? 6371 profile->max_tc : 0; 6372 6373 qos_txqs = mlx5_qos_is_supported(mdev) && 6374 mlx5e_profile_feature_cap(profile, QOS_HTB) ? 6375 mlx5e_qos_max_leaf_nodes(mdev) : 0; 6376 6377 return nch * profile->max_tc + ptp_txqs + qos_txqs; 6378 } 6379 6380 static unsigned int mlx5e_get_max_num_rxqs(struct mlx5_core_dev *mdev, 6381 const struct mlx5e_profile *profile) 6382 { 6383 return mlx5e_profile_max_num_channels(mdev, profile); 6384 } 6385 6386 struct net_device * 6387 mlx5e_create_netdev(struct mlx5_core_dev *mdev, const struct mlx5e_profile *profile) 6388 { 6389 struct net_device *netdev; 6390 unsigned int txqs, rxqs; 6391 int err; 6392 6393 txqs = mlx5e_get_max_num_txqs(mdev, profile); 6394 rxqs = mlx5e_get_max_num_rxqs(mdev, profile); 6395 6396 netdev = alloc_etherdev_mqs(sizeof(struct mlx5e_priv), txqs, rxqs); 6397 if (!netdev) { 6398 mlx5_core_err(mdev, "alloc_etherdev_mqs() failed\n"); 6399 return NULL; 6400 } 6401 6402 err = mlx5e_priv_init(netdev_priv(netdev), profile, netdev, mdev); 6403 if (err) { 6404 mlx5_core_err(mdev, "mlx5e_priv_init failed, err=%d\n", err); 6405 goto err_free_netdev; 6406 } 6407 6408 netif_carrier_off(netdev); 6409 netif_tx_disable(netdev); 6410 dev_net_set(netdev, mlx5_core_net(mdev)); 6411 6412 return netdev; 6413 6414 err_free_netdev: 6415 free_netdev(netdev); 6416 6417 return NULL; 6418 } 6419 6420 static void mlx5e_update_features(struct net_device *netdev) 6421 { 6422 if (netdev->reg_state != NETREG_REGISTERED) 6423 return; /* features will be updated on netdev registration */ 6424 6425 rtnl_lock(); 6426 netdev_lock(netdev); 6427 netdev_update_features(netdev); 6428 netdev_unlock(netdev); 6429 rtnl_unlock(); 6430 } 6431 6432 static void mlx5e_reset_channels(struct net_device *netdev) 6433 { 6434 netdev_reset_tc(netdev); 6435 } 6436 6437 int mlx5e_attach_netdev(struct mlx5e_priv *priv) 6438 { 6439 const bool need_lock = priv->netdev->reg_state == NETREG_REGISTERED; 6440 const struct mlx5e_profile *profile = priv->profile; 6441 int max_nch; 6442 int err; 6443 6444 clear_bit(MLX5E_STATE_DESTROYING, &priv->state); 6445 if (priv->fs) 6446 mlx5e_fs_set_state_destroy(priv->fs, 6447 !test_bit(MLX5E_STATE_DESTROYING, &priv->state)); 6448 6449 /* Validate the max_wqe_size_sq capability. */ 6450 if (WARN_ON_ONCE(mlx5e_get_max_sq_wqebbs(priv->mdev) < MLX5E_MAX_TX_WQEBBS)) { 6451 mlx5_core_warn(priv->mdev, "MLX5E: Max SQ WQEBBs firmware capability: %u, needed %u\n", 6452 mlx5e_get_max_sq_wqebbs(priv->mdev), (unsigned int)MLX5E_MAX_TX_WQEBBS); 6453 return -EIO; 6454 } 6455 6456 /* max number of channels may have changed */ 6457 max_nch = mlx5e_calc_max_nch(priv->mdev, priv->netdev, profile); 6458 if (priv->channels.params.num_channels > max_nch) { 6459 mlx5_core_warn(priv->mdev, "MLX5E: Reducing number of channels to %d\n", max_nch); 6460 /* Reducing the number of channels - RXFH has to be reset, and 6461 * mlx5e_num_channels_changed below will build the RQT. 6462 */ 6463 priv->netdev->priv_flags &= ~IFF_RXFH_CONFIGURED; 6464 priv->channels.params.num_channels = max_nch; 6465 if (priv->channels.params.mqprio.mode == TC_MQPRIO_MODE_CHANNEL) { 6466 mlx5_core_warn(priv->mdev, "MLX5E: Disabling MQPRIO channel mode\n"); 6467 mlx5e_params_mqprio_reset(&priv->channels.params); 6468 } 6469 } 6470 if (max_nch != priv->max_nch) { 6471 mlx5_core_warn(priv->mdev, 6472 "MLX5E: Updating max number of channels from %u to %u\n", 6473 priv->max_nch, max_nch); 6474 priv->max_nch = max_nch; 6475 } 6476 6477 /* 1. Set the real number of queues in the kernel the first time. 6478 * 2. Set our default XPS cpumask. 6479 * 3. Build the RQT. 6480 * 6481 * Locking is required by netif_set_real_num_*_queues in case the 6482 * netdev has been registered by this point (if this function was called 6483 * in the reload or resume flow). 6484 */ 6485 if (need_lock) { 6486 rtnl_lock(); 6487 netdev_lock(priv->netdev); 6488 } 6489 err = mlx5e_num_channels_changed(priv); 6490 if (need_lock) { 6491 netdev_unlock(priv->netdev); 6492 rtnl_unlock(); 6493 } 6494 if (err) 6495 goto out; 6496 6497 err = profile->init_tx(priv); 6498 if (err) 6499 goto out; 6500 6501 err = profile->init_rx(priv); 6502 if (err) 6503 goto err_cleanup_tx; 6504 6505 if (profile->enable) 6506 profile->enable(priv); 6507 6508 mlx5e_update_features(priv->netdev); 6509 6510 return 0; 6511 6512 err_cleanup_tx: 6513 profile->cleanup_tx(priv); 6514 6515 out: 6516 mlx5e_reset_channels(priv->netdev); 6517 set_bit(MLX5E_STATE_DESTROYING, &priv->state); 6518 if (priv->fs) 6519 mlx5e_fs_set_state_destroy(priv->fs, 6520 !test_bit(MLX5E_STATE_DESTROYING, &priv->state)); 6521 cancel_work_sync(&priv->update_stats_work); 6522 return err; 6523 } 6524 6525 void mlx5e_detach_netdev(struct mlx5e_priv *priv) 6526 { 6527 const struct mlx5e_profile *profile = priv->profile; 6528 6529 set_bit(MLX5E_STATE_DESTROYING, &priv->state); 6530 if (priv->fs) 6531 mlx5e_fs_set_state_destroy(priv->fs, 6532 !test_bit(MLX5E_STATE_DESTROYING, &priv->state)); 6533 6534 if (profile->disable) 6535 profile->disable(priv); 6536 flush_workqueue(priv->wq); 6537 6538 profile->cleanup_rx(priv); 6539 profile->cleanup_tx(priv); 6540 mlx5e_reset_channels(priv->netdev); 6541 cancel_work_sync(&priv->update_stats_work); 6542 } 6543 6544 static int 6545 mlx5e_netdev_init_profile(struct net_device *netdev, struct mlx5_core_dev *mdev, 6546 const struct mlx5e_profile *new_profile, void *new_ppriv) 6547 { 6548 struct mlx5e_priv *priv = netdev_priv(netdev); 6549 int err; 6550 6551 err = mlx5e_priv_init(priv, new_profile, netdev, mdev); 6552 if (err) { 6553 mlx5_core_err(mdev, "mlx5e_priv_init failed, err=%d\n", err); 6554 return err; 6555 } 6556 netif_carrier_off(netdev); 6557 priv->profile = new_profile; 6558 priv->ppriv = new_ppriv; 6559 err = new_profile->init(priv->mdev, priv->netdev); 6560 if (err) 6561 goto priv_cleanup; 6562 6563 return 0; 6564 6565 priv_cleanup: 6566 mlx5e_priv_cleanup(priv); 6567 return err; 6568 } 6569 6570 static int 6571 mlx5e_netdev_attach_profile(struct net_device *netdev, struct mlx5_core_dev *mdev, 6572 const struct mlx5e_profile *new_profile, void *new_ppriv) 6573 { 6574 struct mlx5e_priv *priv = netdev_priv(netdev); 6575 int err; 6576 6577 err = mlx5e_netdev_init_profile(netdev, mdev, new_profile, new_ppriv); 6578 if (err) 6579 return err; 6580 6581 err = mlx5e_attach_netdev(priv); 6582 if (err) 6583 goto profile_cleanup; 6584 return err; 6585 6586 profile_cleanup: 6587 new_profile->cleanup(priv); 6588 mlx5e_priv_cleanup(priv); 6589 return err; 6590 } 6591 6592 int mlx5e_netdev_change_profile(struct net_device *netdev, 6593 struct mlx5_core_dev *mdev, 6594 const struct mlx5e_profile *new_profile, 6595 void *new_ppriv) 6596 { 6597 struct mlx5e_priv *priv = netdev_priv(netdev); 6598 const struct mlx5e_profile *orig_profile; 6599 int err, rollback_err; 6600 void *orig_ppriv; 6601 6602 orig_profile = priv->profile; 6603 orig_ppriv = priv->ppriv; 6604 6605 /* NULL could happen if previous change_profile failed to rollback */ 6606 if (priv->profile) { 6607 WARN_ON_ONCE(priv->mdev != mdev); 6608 /* cleanup old profile */ 6609 mlx5e_detach_netdev(priv); 6610 priv->profile->cleanup(priv); 6611 mlx5e_priv_cleanup(priv); 6612 } 6613 /* priv members are not valid from this point ... */ 6614 6615 if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { 6616 mlx5e_netdev_init_profile(netdev, mdev, new_profile, new_ppriv); 6617 set_bit(MLX5E_STATE_DESTROYING, &priv->state); 6618 return -EIO; 6619 } 6620 6621 err = mlx5e_netdev_attach_profile(netdev, mdev, new_profile, new_ppriv); 6622 if (err) { /* roll back to original profile */ 6623 netdev_warn(netdev, "%s: new profile init failed, %d\n", __func__, err); 6624 goto rollback; 6625 } 6626 6627 return 0; 6628 6629 rollback: 6630 if (!orig_profile) { 6631 netdev_warn(netdev, "no original profile to rollback to\n"); 6632 priv->profile = NULL; 6633 return err; 6634 } 6635 6636 rollback_err = mlx5e_netdev_attach_profile(netdev, mdev, orig_profile, orig_ppriv); 6637 if (rollback_err) { 6638 netdev_err(netdev, "failed to rollback to orig profile, %d\n", 6639 rollback_err); 6640 priv->profile = NULL; 6641 } 6642 return err; 6643 } 6644 6645 void mlx5e_netdev_attach_nic_profile(struct net_device *netdev, 6646 struct mlx5_core_dev *mdev) 6647 { 6648 mlx5e_netdev_change_profile(netdev, mdev, &mlx5e_nic_profile, NULL); 6649 } 6650 6651 void mlx5e_destroy_netdev(struct net_device *netdev) 6652 { 6653 struct mlx5e_priv *priv = netdev_priv(netdev); 6654 6655 if (priv->profile) 6656 mlx5e_priv_cleanup(priv); 6657 free_netdev(netdev); 6658 } 6659 6660 static int _mlx5e_resume(struct auxiliary_device *adev) 6661 { 6662 struct mlx5_adev *edev = container_of(adev, struct mlx5_adev, adev); 6663 struct mlx5e_dev *mlx5e_dev = auxiliary_get_drvdata(adev); 6664 struct mlx5e_priv *priv = netdev_priv(mlx5e_dev->netdev); 6665 struct net_device *netdev = mlx5e_dev->netdev; 6666 struct mlx5_core_dev *mdev = edev->mdev; 6667 struct mlx5_core_dev *pos, *to; 6668 int err, i; 6669 6670 if (netif_device_present(netdev)) 6671 return 0; 6672 6673 mlx5_sd_for_each_dev(i, mdev, pos) { 6674 err = mlx5e_create_mdev_resources(pos, true); 6675 if (err) 6676 goto err_destroy_mdev_res; 6677 } 6678 6679 err = mlx5e_attach_netdev(priv); 6680 if (err) 6681 goto err_destroy_mdev_res; 6682 6683 return 0; 6684 6685 err_destroy_mdev_res: 6686 to = pos; 6687 mlx5_sd_for_each_dev_to(i, mdev, to, pos) 6688 mlx5e_destroy_mdev_resources(pos); 6689 return err; 6690 } 6691 6692 static int mlx5e_resume(struct auxiliary_device *adev) 6693 { 6694 struct mlx5_adev *edev = container_of(adev, struct mlx5_adev, adev); 6695 struct mlx5_core_dev *mdev = edev->mdev; 6696 struct auxiliary_device *actual_adev; 6697 int err; 6698 6699 err = mlx5_sd_init(mdev); 6700 if (err) 6701 return err; 6702 6703 actual_adev = mlx5_sd_get_adev(mdev, adev, edev->idx); 6704 if (actual_adev) 6705 return _mlx5e_resume(actual_adev); 6706 return 0; 6707 } 6708 6709 static int _mlx5e_suspend(struct auxiliary_device *adev, bool pre_netdev_reg) 6710 { 6711 struct mlx5_adev *edev = container_of(adev, struct mlx5_adev, adev); 6712 struct mlx5e_dev *mlx5e_dev = auxiliary_get_drvdata(adev); 6713 struct mlx5e_priv *priv = netdev_priv(mlx5e_dev->netdev); 6714 struct net_device *netdev = mlx5e_dev->netdev; 6715 struct mlx5_core_dev *mdev = edev->mdev; 6716 struct mlx5_core_dev *pos; 6717 int i; 6718 6719 if (!pre_netdev_reg && !netif_device_present(netdev)) { 6720 if (test_bit(MLX5E_STATE_DESTROYING, &priv->state)) 6721 mlx5_sd_for_each_dev(i, mdev, pos) 6722 mlx5e_destroy_mdev_resources(pos); 6723 return -ENODEV; 6724 } 6725 6726 mlx5e_detach_netdev(priv); 6727 mlx5_sd_for_each_dev(i, mdev, pos) 6728 mlx5e_destroy_mdev_resources(pos); 6729 6730 return 0; 6731 } 6732 6733 static int mlx5e_suspend(struct auxiliary_device *adev, pm_message_t state) 6734 { 6735 struct mlx5_adev *edev = container_of(adev, struct mlx5_adev, adev); 6736 struct mlx5_core_dev *mdev = edev->mdev; 6737 struct auxiliary_device *actual_adev; 6738 int err = 0; 6739 6740 actual_adev = mlx5_sd_get_adev(mdev, adev, edev->idx); 6741 if (actual_adev) 6742 err = _mlx5e_suspend(actual_adev, false); 6743 6744 mlx5_sd_cleanup(mdev); 6745 return err; 6746 } 6747 6748 static int _mlx5e_probe(struct auxiliary_device *adev) 6749 { 6750 struct mlx5_adev *edev = container_of(adev, struct mlx5_adev, adev); 6751 const struct mlx5e_profile *profile = &mlx5e_nic_profile; 6752 struct mlx5_core_dev *mdev = edev->mdev; 6753 struct mlx5e_dev *mlx5e_dev; 6754 struct net_device *netdev; 6755 struct mlx5e_priv *priv; 6756 int err; 6757 6758 mlx5e_dev = mlx5e_create_devlink(&adev->dev, mdev); 6759 if (IS_ERR(mlx5e_dev)) 6760 return PTR_ERR(mlx5e_dev); 6761 auxiliary_set_drvdata(adev, mlx5e_dev); 6762 6763 err = mlx5e_devlink_port_register(mlx5e_dev, mdev); 6764 if (err) { 6765 mlx5_core_err(mdev, "mlx5e_devlink_port_register failed, %d\n", err); 6766 goto err_devlink_unregister; 6767 } 6768 6769 netdev = mlx5e_create_netdev(mdev, profile); 6770 if (!netdev) { 6771 mlx5_core_err(mdev, "mlx5e_create_netdev failed\n"); 6772 err = -ENOMEM; 6773 goto err_devlink_port_unregister; 6774 } 6775 SET_NETDEV_DEVLINK_PORT(netdev, &mlx5e_dev->dl_port); 6776 mlx5e_dev->netdev = netdev; 6777 6778 mlx5e_build_nic_netdev(netdev); 6779 6780 priv = netdev_priv(netdev); 6781 6782 priv->profile = profile; 6783 priv->ppriv = NULL; 6784 6785 err = profile->init(mdev, netdev); 6786 if (err) { 6787 mlx5_core_err(mdev, "mlx5e_nic_profile init failed, %d\n", err); 6788 goto err_destroy_netdev; 6789 } 6790 6791 err = _mlx5e_resume(adev); 6792 if (err) { 6793 mlx5_core_err(mdev, "_mlx5e_resume failed, %d\n", err); 6794 goto err_profile_cleanup; 6795 } 6796 6797 err = register_netdev(netdev); 6798 if (err) { 6799 mlx5_core_err(mdev, "register_netdev failed, %d\n", err); 6800 goto err_resume; 6801 } 6802 6803 mlx5e_dcbnl_init_app(priv); 6804 mlx5_core_uplink_netdev_set(mdev, netdev); 6805 mlx5e_params_print_info(mdev, &priv->channels.params); 6806 return 0; 6807 6808 err_resume: 6809 _mlx5e_suspend(adev, true); 6810 err_profile_cleanup: 6811 profile->cleanup(priv); 6812 err_destroy_netdev: 6813 mlx5e_destroy_netdev(netdev); 6814 err_devlink_port_unregister: 6815 mlx5e_devlink_port_unregister(mlx5e_dev); 6816 err_devlink_unregister: 6817 mlx5e_destroy_devlink(mlx5e_dev); 6818 return err; 6819 } 6820 6821 static int mlx5e_probe(struct auxiliary_device *adev, 6822 const struct auxiliary_device_id *id) 6823 { 6824 struct mlx5_adev *edev = container_of(adev, struct mlx5_adev, adev); 6825 struct mlx5_core_dev *mdev = edev->mdev; 6826 struct auxiliary_device *actual_adev; 6827 int err; 6828 6829 err = mlx5_sd_init(mdev); 6830 if (err) 6831 return err; 6832 6833 actual_adev = mlx5_sd_get_adev(mdev, adev, edev->idx); 6834 if (actual_adev) 6835 return _mlx5e_probe(actual_adev); 6836 return 0; 6837 } 6838 6839 static void _mlx5e_remove(struct auxiliary_device *adev) 6840 { 6841 struct mlx5_adev *edev = container_of(adev, struct mlx5_adev, adev); 6842 struct mlx5e_dev *mlx5e_dev = auxiliary_get_drvdata(adev); 6843 struct net_device *netdev = mlx5e_dev->netdev; 6844 struct mlx5e_priv *priv = netdev_priv(netdev); 6845 struct mlx5_core_dev *mdev = edev->mdev; 6846 6847 mlx5_eswitch_safe_aux_devs_remove(mdev); 6848 mlx5_core_uplink_netdev_set(mdev, NULL); 6849 6850 if (priv->profile) 6851 mlx5e_dcbnl_delete_app(priv); 6852 /* When unload driver, the netdev is in registered state 6853 * if it's from legacy mode. If from switchdev mode, it 6854 * is already unregistered before changing to NIC profile. 6855 */ 6856 if (netdev->reg_state == NETREG_REGISTERED) { 6857 unregister_netdev(netdev); 6858 _mlx5e_suspend(adev, false); 6859 } else { 6860 struct mlx5_core_dev *pos; 6861 int i; 6862 6863 if (test_bit(MLX5E_STATE_DESTROYING, &priv->state)) 6864 mlx5_sd_for_each_dev(i, mdev, pos) 6865 mlx5e_destroy_mdev_resources(pos); 6866 else 6867 _mlx5e_suspend(adev, true); 6868 } 6869 /* Avoid cleanup if profile rollback failed. */ 6870 if (priv->profile) 6871 priv->profile->cleanup(priv); 6872 mlx5e_destroy_netdev(netdev); 6873 mlx5e_devlink_port_unregister(mlx5e_dev); 6874 mlx5e_destroy_devlink(mlx5e_dev); 6875 } 6876 6877 static void mlx5e_remove(struct auxiliary_device *adev) 6878 { 6879 struct mlx5_adev *edev = container_of(adev, struct mlx5_adev, adev); 6880 struct mlx5_core_dev *mdev = edev->mdev; 6881 struct auxiliary_device *actual_adev; 6882 6883 actual_adev = mlx5_sd_get_adev(mdev, adev, edev->idx); 6884 if (actual_adev) 6885 _mlx5e_remove(actual_adev); 6886 6887 mlx5_sd_cleanup(mdev); 6888 } 6889 6890 static const struct auxiliary_device_id mlx5e_id_table[] = { 6891 { .name = MLX5_ADEV_NAME ".eth", }, 6892 {}, 6893 }; 6894 6895 MODULE_DEVICE_TABLE(auxiliary, mlx5e_id_table); 6896 6897 static struct auxiliary_driver mlx5e_driver = { 6898 .name = "eth", 6899 .probe = mlx5e_probe, 6900 .remove = mlx5e_remove, 6901 .suspend = mlx5e_suspend, 6902 .resume = mlx5e_resume, 6903 .id_table = mlx5e_id_table, 6904 }; 6905 6906 int mlx5e_init(void) 6907 { 6908 int ret; 6909 6910 mlx5e_build_ptys2ethtool_map(); 6911 ret = auxiliary_driver_register(&mlx5e_driver); 6912 if (ret) 6913 return ret; 6914 6915 ret = mlx5e_rep_init(); 6916 if (ret) 6917 auxiliary_driver_unregister(&mlx5e_driver); 6918 return ret; 6919 } 6920 6921 void mlx5e_cleanup(void) 6922 { 6923 mlx5e_rep_cleanup(); 6924 auxiliary_driver_unregister(&mlx5e_driver); 6925 } 6926