1 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause 2 /* Copyright (c) 2021, Microsoft Corporation. */ 3 4 #include <uapi/linux/bpf.h> 5 6 #include <linux/debugfs.h> 7 #include <linux/inetdevice.h> 8 #include <linux/etherdevice.h> 9 #include <linux/ethtool.h> 10 #include <linux/filter.h> 11 #include <linux/mm.h> 12 #include <linux/pci.h> 13 #include <linux/export.h> 14 #include <linux/skbuff.h> 15 16 #include <net/checksum.h> 17 #include <net/ip6_checksum.h> 18 #include <net/netdev_lock.h> 19 #include <net/page_pool/helpers.h> 20 #include <net/xdp.h> 21 22 #include <net/mana/mana.h> 23 #include <net/mana/mana_auxiliary.h> 24 #include <net/mana/hw_channel.h> 25 26 static DEFINE_IDA(mana_adev_ida); 27 28 static int mana_adev_idx_alloc(void) 29 { 30 return ida_alloc(&mana_adev_ida, GFP_KERNEL); 31 } 32 33 static void mana_adev_idx_free(int idx) 34 { 35 ida_free(&mana_adev_ida, idx); 36 } 37 38 static ssize_t mana_dbg_q_read(struct file *filp, char __user *buf, size_t count, 39 loff_t *pos) 40 { 41 struct gdma_queue *gdma_q = filp->private_data; 42 43 return simple_read_from_buffer(buf, count, pos, gdma_q->queue_mem_ptr, 44 gdma_q->queue_size); 45 } 46 47 static const struct file_operations mana_dbg_q_fops = { 48 .owner = THIS_MODULE, 49 .open = simple_open, 50 .read = mana_dbg_q_read, 51 }; 52 53 static bool mana_en_need_log(struct mana_port_context *apc, int err) 54 { 55 if (apc && apc->ac && apc->ac->gdma_dev && 56 apc->ac->gdma_dev->gdma_context) 57 return mana_need_log(apc->ac->gdma_dev->gdma_context, err); 58 else 59 return true; 60 } 61 62 static void mana_put_rx_page(struct mana_rxq *rxq, struct page *page, 63 bool from_pool) 64 { 65 if (from_pool) 66 page_pool_put_full_page(rxq->page_pool, page, false); 67 else 68 put_page(page); 69 } 70 71 /* Microsoft Azure Network Adapter (MANA) functions */ 72 73 static int mana_open(struct net_device *ndev) 74 { 75 struct mana_port_context *apc = netdev_priv(ndev); 76 int err; 77 err = mana_alloc_queues(ndev); 78 79 if (err) { 80 netdev_err(ndev, "%s failed to allocate queues: %d\n", __func__, err); 81 return err; 82 } 83 84 apc->port_is_up = true; 85 86 /* Ensure port state updated before txq state */ 87 smp_wmb(); 88 89 netif_tx_wake_all_queues(ndev); 90 netdev_dbg(ndev, "%s successful\n", __func__); 91 return 0; 92 } 93 94 static int mana_close(struct net_device *ndev) 95 { 96 struct mana_port_context *apc = netdev_priv(ndev); 97 98 if (!apc->port_is_up) 99 return 0; 100 101 return mana_detach(ndev, true); 102 } 103 104 static void mana_link_state_handle(struct work_struct *w) 105 { 106 struct mana_context *ac; 107 struct net_device *ndev; 108 u32 link_event; 109 bool link_up; 110 int i; 111 112 ac = container_of(w, struct mana_context, link_change_work); 113 114 rtnl_lock(); 115 116 link_event = READ_ONCE(ac->link_event); 117 118 if (link_event == HWC_DATA_HW_LINK_CONNECT) 119 link_up = true; 120 else if (link_event == HWC_DATA_HW_LINK_DISCONNECT) 121 link_up = false; 122 else 123 goto out; 124 125 /* Process all ports */ 126 for (i = 0; i < ac->num_ports; i++) { 127 ndev = ac->ports[i]; 128 if (!ndev) 129 continue; 130 131 if (link_up) { 132 netif_carrier_on(ndev); 133 134 __netdev_notify_peers(ndev); 135 } else { 136 netif_carrier_off(ndev); 137 } 138 } 139 140 out: 141 rtnl_unlock(); 142 } 143 144 static bool mana_can_tx(struct gdma_queue *wq) 145 { 146 return mana_gd_wq_avail_space(wq) >= MAX_TX_WQE_SIZE; 147 } 148 149 static unsigned int mana_checksum_info(struct sk_buff *skb) 150 { 151 if (skb->protocol == htons(ETH_P_IP)) { 152 struct iphdr *ip = ip_hdr(skb); 153 154 if (ip->protocol == IPPROTO_TCP) 155 return IPPROTO_TCP; 156 157 if (ip->protocol == IPPROTO_UDP) 158 return IPPROTO_UDP; 159 } else if (skb->protocol == htons(ETH_P_IPV6)) { 160 struct ipv6hdr *ip6 = ipv6_hdr(skb); 161 162 if (ip6->nexthdr == IPPROTO_TCP) 163 return IPPROTO_TCP; 164 165 if (ip6->nexthdr == IPPROTO_UDP) 166 return IPPROTO_UDP; 167 } 168 169 /* No csum offloading */ 170 return 0; 171 } 172 173 static void mana_add_sge(struct mana_tx_package *tp, struct mana_skb_head *ash, 174 int sg_i, dma_addr_t da, int sge_len, u32 gpa_mkey) 175 { 176 ash->dma_handle[sg_i] = da; 177 ash->size[sg_i] = sge_len; 178 179 tp->wqe_req.sgl[sg_i].address = da; 180 tp->wqe_req.sgl[sg_i].mem_key = gpa_mkey; 181 tp->wqe_req.sgl[sg_i].size = sge_len; 182 } 183 184 static int mana_map_skb(struct sk_buff *skb, struct mana_port_context *apc, 185 struct mana_tx_package *tp, int gso_hs) 186 { 187 struct mana_skb_head *ash = (struct mana_skb_head *)skb->head; 188 int hsg = 1; /* num of SGEs of linear part */ 189 struct gdma_dev *gd = apc->ac->gdma_dev; 190 int skb_hlen = skb_headlen(skb); 191 int sge0_len, sge1_len = 0; 192 struct gdma_context *gc; 193 struct device *dev; 194 skb_frag_t *frag; 195 dma_addr_t da; 196 int sg_i; 197 int i; 198 199 gc = gd->gdma_context; 200 dev = gc->dev; 201 202 if (gso_hs && gso_hs < skb_hlen) { 203 sge0_len = gso_hs; 204 sge1_len = skb_hlen - gso_hs; 205 } else { 206 sge0_len = skb_hlen; 207 } 208 209 da = dma_map_single(dev, skb->data, sge0_len, DMA_TO_DEVICE); 210 if (dma_mapping_error(dev, da)) 211 return -ENOMEM; 212 213 mana_add_sge(tp, ash, 0, da, sge0_len, gd->gpa_mkey); 214 215 if (sge1_len) { 216 sg_i = 1; 217 da = dma_map_single(dev, skb->data + sge0_len, sge1_len, 218 DMA_TO_DEVICE); 219 if (dma_mapping_error(dev, da)) 220 goto frag_err; 221 222 mana_add_sge(tp, ash, sg_i, da, sge1_len, gd->gpa_mkey); 223 hsg = 2; 224 } 225 226 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 227 sg_i = hsg + i; 228 229 frag = &skb_shinfo(skb)->frags[i]; 230 da = skb_frag_dma_map(dev, frag, 0, skb_frag_size(frag), 231 DMA_TO_DEVICE); 232 if (dma_mapping_error(dev, da)) 233 goto frag_err; 234 235 mana_add_sge(tp, ash, sg_i, da, skb_frag_size(frag), 236 gd->gpa_mkey); 237 } 238 239 return 0; 240 241 frag_err: 242 if (net_ratelimit()) 243 netdev_err(apc->ndev, "Failed to map skb of size %u to DMA\n", 244 skb->len); 245 for (i = sg_i - 1; i >= hsg; i--) 246 dma_unmap_page(dev, ash->dma_handle[i], ash->size[i], 247 DMA_TO_DEVICE); 248 249 for (i = hsg - 1; i >= 0; i--) 250 dma_unmap_single(dev, ash->dma_handle[i], ash->size[i], 251 DMA_TO_DEVICE); 252 253 return -ENOMEM; 254 } 255 256 /* Handle the case when GSO SKB linear length is too large. 257 * MANA NIC requires GSO packets to put only the packet header to SGE0. 258 * So, we need 2 SGEs for the skb linear part which contains more than the 259 * header. 260 * Return a positive value for the number of SGEs, or a negative value 261 * for an error. 262 */ 263 static int mana_fix_skb_head(struct net_device *ndev, struct sk_buff *skb, 264 int gso_hs) 265 { 266 int num_sge = 1 + skb_shinfo(skb)->nr_frags; 267 int skb_hlen = skb_headlen(skb); 268 269 if (gso_hs < skb_hlen) { 270 num_sge++; 271 } else if (gso_hs > skb_hlen) { 272 if (net_ratelimit()) 273 netdev_err(ndev, 274 "TX nonlinear head: hs:%d, skb_hlen:%d\n", 275 gso_hs, skb_hlen); 276 277 return -EINVAL; 278 } 279 280 return num_sge; 281 } 282 283 /* Get the GSO packet's header size */ 284 static int mana_get_gso_hs(struct sk_buff *skb) 285 { 286 int gso_hs; 287 288 if (skb->encapsulation) { 289 gso_hs = skb_inner_tcp_all_headers(skb); 290 } else { 291 if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) { 292 gso_hs = skb_transport_offset(skb) + 293 sizeof(struct udphdr); 294 } else { 295 gso_hs = skb_tcp_all_headers(skb); 296 } 297 } 298 299 return gso_hs; 300 } 301 302 static void mana_per_port_queue_reset_work_handler(struct work_struct *work) 303 { 304 struct mana_port_context *apc = container_of(work, 305 struct mana_port_context, 306 queue_reset_work); 307 struct net_device *ndev = apc->ndev; 308 int err; 309 310 rtnl_lock(); 311 312 /* Block RDMA from grabbing the vport during the detach/attach 313 * window, same as mana_set_channels(). 314 */ 315 mutex_lock(&apc->vport_mutex); 316 apc->channel_changing = true; 317 mutex_unlock(&apc->vport_mutex); 318 319 /* Pre-allocate buffers to prevent failure in mana_attach later */ 320 err = mana_pre_alloc_rxbufs(apc, ndev->mtu, apc->num_queues); 321 if (err) { 322 netdev_err(ndev, "Insufficient memory for reset post tx stall detection\n"); 323 goto clear_flag; 324 } 325 326 err = mana_detach(ndev, false); 327 if (err) { 328 netdev_err(ndev, "mana_detach failed: %d\n", err); 329 goto dealloc_pre_rxbufs; 330 } 331 332 err = mana_attach(ndev); 333 if (err) 334 netdev_err(ndev, "mana_attach failed: %d\n", err); 335 336 dealloc_pre_rxbufs: 337 mana_pre_dealloc_rxbufs(apc); 338 clear_flag: 339 mutex_lock(&apc->vport_mutex); 340 apc->channel_changing = false; 341 mutex_unlock(&apc->vport_mutex); 342 343 rtnl_unlock(); 344 } 345 346 netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev) 347 { 348 enum mana_tx_pkt_format pkt_fmt = MANA_SHORT_PKT_FMT; 349 struct mana_port_context *apc = netdev_priv(ndev); 350 int gso_hs = 0; /* zero for non-GSO pkts */ 351 u16 txq_idx = skb_get_queue_mapping(skb); 352 struct gdma_dev *gd = apc->ac->gdma_dev; 353 bool ipv4 = false, ipv6 = false; 354 struct mana_tx_package pkg = {}; 355 struct netdev_queue *net_txq; 356 struct mana_stats_tx *tx_stats; 357 struct gdma_queue *gdma_sq; 358 int err, len, num_gso_seg; 359 unsigned int csum_type; 360 struct mana_txq *txq; 361 struct mana_cq *cq; 362 363 if (unlikely(!apc->port_is_up)) 364 goto tx_drop; 365 366 if (skb_cow_head(skb, MANA_HEADROOM)) 367 goto tx_drop_count; 368 369 txq = &apc->tx_qp[txq_idx]->txq; 370 gdma_sq = txq->gdma_sq; 371 cq = &apc->tx_qp[txq_idx]->tx_cq; 372 tx_stats = &txq->stats; 373 374 BUILD_BUG_ON(MAX_TX_WQE_SGL_ENTRIES != MANA_MAX_TX_WQE_SGL_ENTRIES); 375 if (MAX_SKB_FRAGS + 2 > MAX_TX_WQE_SGL_ENTRIES && 376 skb_shinfo(skb)->nr_frags + 2 > MAX_TX_WQE_SGL_ENTRIES) { 377 /* GSO skb with Hardware SGE limit exceeded is not expected here 378 * as they are handled in mana_features_check() callback 379 */ 380 if (skb_linearize(skb)) { 381 netdev_warn_once(ndev, "Failed to linearize skb with nr_frags=%d and is_gso=%d\n", 382 skb_shinfo(skb)->nr_frags, 383 skb_is_gso(skb)); 384 goto tx_drop_count; 385 } 386 apc->eth_stats.tx_linear_pkt_cnt++; 387 } 388 389 pkg.tx_oob.s_oob.vcq_num = cq->gdma_id; 390 pkg.tx_oob.s_oob.vsq_frame = txq->vsq_frame; 391 392 if (txq->vp_offset > MANA_SHORT_VPORT_OFFSET_MAX) { 393 pkg.tx_oob.l_oob.long_vp_offset = txq->vp_offset; 394 pkt_fmt = MANA_LONG_PKT_FMT; 395 } else { 396 pkg.tx_oob.s_oob.short_vp_offset = txq->vp_offset; 397 } 398 399 if (skb_vlan_tag_present(skb)) { 400 pkt_fmt = MANA_LONG_PKT_FMT; 401 pkg.tx_oob.l_oob.inject_vlan_pri_tag = 1; 402 pkg.tx_oob.l_oob.pcp = skb_vlan_tag_get_prio(skb); 403 pkg.tx_oob.l_oob.dei = skb_vlan_tag_get_cfi(skb); 404 pkg.tx_oob.l_oob.vlan_id = skb_vlan_tag_get_id(skb); 405 } 406 407 pkg.tx_oob.s_oob.pkt_fmt = pkt_fmt; 408 409 if (pkt_fmt == MANA_SHORT_PKT_FMT) { 410 pkg.wqe_req.inline_oob_size = sizeof(struct mana_tx_short_oob); 411 u64_stats_update_begin(&tx_stats->syncp); 412 tx_stats->short_pkt_fmt++; 413 u64_stats_update_end(&tx_stats->syncp); 414 } else { 415 pkg.wqe_req.inline_oob_size = sizeof(struct mana_tx_oob); 416 u64_stats_update_begin(&tx_stats->syncp); 417 tx_stats->long_pkt_fmt++; 418 u64_stats_update_end(&tx_stats->syncp); 419 } 420 421 pkg.wqe_req.inline_oob_data = &pkg.tx_oob; 422 pkg.wqe_req.flags = 0; 423 pkg.wqe_req.client_data_unit = 0; 424 425 pkg.wqe_req.num_sge = 1 + skb_shinfo(skb)->nr_frags; 426 427 if (skb->protocol == htons(ETH_P_IP)) 428 ipv4 = true; 429 else if (skb->protocol == htons(ETH_P_IPV6)) 430 ipv6 = true; 431 432 if (skb_is_gso(skb)) { 433 int num_sge; 434 435 gso_hs = mana_get_gso_hs(skb); 436 437 num_sge = mana_fix_skb_head(ndev, skb, gso_hs); 438 if (num_sge > 0) 439 pkg.wqe_req.num_sge = num_sge; 440 else 441 goto tx_drop_count; 442 443 u64_stats_update_begin(&tx_stats->syncp); 444 if (skb->encapsulation) { 445 tx_stats->tso_inner_packets++; 446 tx_stats->tso_inner_bytes += skb->len - gso_hs; 447 } else { 448 tx_stats->tso_packets++; 449 tx_stats->tso_bytes += skb->len - gso_hs; 450 } 451 u64_stats_update_end(&tx_stats->syncp); 452 453 pkg.tx_oob.s_oob.is_outer_ipv4 = ipv4; 454 pkg.tx_oob.s_oob.is_outer_ipv6 = ipv6; 455 456 pkg.tx_oob.s_oob.comp_iphdr_csum = 1; 457 pkg.tx_oob.s_oob.comp_tcp_csum = 1; 458 pkg.tx_oob.s_oob.trans_off = skb_transport_offset(skb); 459 460 pkg.wqe_req.client_data_unit = skb_shinfo(skb)->gso_size; 461 pkg.wqe_req.flags = GDMA_WR_OOB_IN_SGL | GDMA_WR_PAD_BY_SGE0; 462 if (ipv4) { 463 ip_hdr(skb)->tot_len = 0; 464 ip_hdr(skb)->check = 0; 465 tcp_hdr(skb)->check = 466 ~csum_tcpudp_magic(ip_hdr(skb)->saddr, 467 ip_hdr(skb)->daddr, 0, 468 IPPROTO_TCP, 0); 469 } else { 470 ipv6_hdr(skb)->payload_len = 0; 471 tcp_hdr(skb)->check = 472 ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr, 473 &ipv6_hdr(skb)->daddr, 0, 474 IPPROTO_TCP, 0); 475 } 476 } else if (skb->ip_summed == CHECKSUM_PARTIAL) { 477 csum_type = mana_checksum_info(skb); 478 479 u64_stats_update_begin(&tx_stats->syncp); 480 tx_stats->csum_partial++; 481 u64_stats_update_end(&tx_stats->syncp); 482 483 if (csum_type == IPPROTO_TCP) { 484 pkg.tx_oob.s_oob.is_outer_ipv4 = ipv4; 485 pkg.tx_oob.s_oob.is_outer_ipv6 = ipv6; 486 487 pkg.tx_oob.s_oob.comp_tcp_csum = 1; 488 pkg.tx_oob.s_oob.trans_off = skb_transport_offset(skb); 489 490 } else if (csum_type == IPPROTO_UDP) { 491 pkg.tx_oob.s_oob.is_outer_ipv4 = ipv4; 492 pkg.tx_oob.s_oob.is_outer_ipv6 = ipv6; 493 494 pkg.tx_oob.s_oob.comp_udp_csum = 1; 495 } else { 496 /* Can't do offload of this type of checksum */ 497 if (skb_checksum_help(skb)) 498 goto tx_drop_count; 499 } 500 } 501 502 if (pkg.wqe_req.num_sge <= ARRAY_SIZE(pkg.sgl_array)) { 503 pkg.wqe_req.sgl = pkg.sgl_array; 504 } else { 505 pkg.sgl_ptr = kmalloc_objs(struct gdma_sge, pkg.wqe_req.num_sge, 506 GFP_ATOMIC); 507 if (!pkg.sgl_ptr) 508 goto tx_drop_count; 509 510 pkg.wqe_req.sgl = pkg.sgl_ptr; 511 } 512 513 if (mana_map_skb(skb, apc, &pkg, gso_hs)) { 514 u64_stats_update_begin(&tx_stats->syncp); 515 tx_stats->mana_map_err++; 516 u64_stats_update_end(&tx_stats->syncp); 517 goto free_sgl_ptr; 518 } 519 520 skb_queue_tail(&txq->pending_skbs, skb); 521 522 len = skb->len; 523 num_gso_seg = skb_is_gso(skb) ? skb_shinfo(skb)->gso_segs : 1; 524 net_txq = netdev_get_tx_queue(ndev, txq_idx); 525 526 err = mana_gd_post_work_request(gdma_sq, &pkg.wqe_req, 527 (struct gdma_posted_wqe_info *)skb->cb); 528 if (!mana_can_tx(gdma_sq)) { 529 netif_tx_stop_queue(net_txq); 530 apc->eth_stats.stop_queue++; 531 } 532 533 if (err) { 534 (void)skb_dequeue_tail(&txq->pending_skbs); 535 mana_unmap_skb(skb, apc); 536 netdev_warn(ndev, "Failed to post TX OOB: %d\n", err); 537 goto free_sgl_ptr; 538 } 539 540 err = NETDEV_TX_OK; 541 atomic_inc(&txq->pending_sends); 542 543 mana_gd_wq_ring_doorbell(gd->gdma_context, gdma_sq); 544 545 /* skb may be freed after mana_gd_post_work_request. Do not use it. */ 546 skb = NULL; 547 548 /* Populated the packet and bytes counters based on post GSO packet 549 * calculations 550 */ 551 tx_stats = &txq->stats; 552 u64_stats_update_begin(&tx_stats->syncp); 553 tx_stats->packets += num_gso_seg; 554 tx_stats->bytes += len + ((num_gso_seg - 1) * gso_hs); 555 u64_stats_update_end(&tx_stats->syncp); 556 557 if (netif_tx_queue_stopped(net_txq) && mana_can_tx(gdma_sq)) { 558 netif_tx_wake_queue(net_txq); 559 apc->eth_stats.wake_queue++; 560 } 561 562 kfree(pkg.sgl_ptr); 563 return err; 564 565 free_sgl_ptr: 566 kfree(pkg.sgl_ptr); 567 tx_drop_count: 568 ndev->stats.tx_dropped++; 569 tx_drop: 570 dev_kfree_skb_any(skb); 571 return NETDEV_TX_OK; 572 } 573 574 #if (MAX_SKB_FRAGS + 2 > MANA_MAX_TX_WQE_SGL_ENTRIES) 575 static netdev_features_t mana_features_check(struct sk_buff *skb, 576 struct net_device *ndev, 577 netdev_features_t features) 578 { 579 if (skb_shinfo(skb)->nr_frags + 2 > MAX_TX_WQE_SGL_ENTRIES) { 580 /* Exceeds HW SGE limit. 581 * GSO case: 582 * Disable GSO so the stack will software-segment the skb 583 * into smaller skbs that fit the SGE budget. 584 * Non-GSO case: 585 * The xmit path will attempt skb_linearize() as a fallback. 586 */ 587 features &= ~NETIF_F_GSO_MASK; 588 } 589 return features; 590 } 591 #endif 592 593 static void mana_get_stats64(struct net_device *ndev, 594 struct rtnl_link_stats64 *st) 595 { 596 struct mana_port_context *apc = netdev_priv(ndev); 597 unsigned int num_queues = apc->num_queues; 598 struct mana_stats_rx *rx_stats; 599 struct mana_stats_tx *tx_stats; 600 unsigned int start; 601 u64 packets, bytes; 602 int q; 603 604 if (!apc->port_is_up) 605 return; 606 607 netdev_stats_to_stats64(st, &ndev->stats); 608 609 if (apc->ac->hwc_timeout_occurred) 610 netdev_warn_once(ndev, "HWC timeout occurred\n"); 611 612 st->rx_missed_errors = apc->ac->hc_stats.hc_rx_discards_no_wqe; 613 614 for (q = 0; q < num_queues; q++) { 615 rx_stats = &apc->rxqs[q]->stats; 616 617 do { 618 start = u64_stats_fetch_begin(&rx_stats->syncp); 619 packets = rx_stats->packets; 620 bytes = rx_stats->bytes; 621 } while (u64_stats_fetch_retry(&rx_stats->syncp, start)); 622 623 st->rx_packets += packets; 624 st->rx_bytes += bytes; 625 } 626 627 for (q = 0; q < num_queues; q++) { 628 tx_stats = &apc->tx_qp[q]->txq.stats; 629 630 do { 631 start = u64_stats_fetch_begin(&tx_stats->syncp); 632 packets = tx_stats->packets; 633 bytes = tx_stats->bytes; 634 } while (u64_stats_fetch_retry(&tx_stats->syncp, start)); 635 636 st->tx_packets += packets; 637 st->tx_bytes += bytes; 638 } 639 } 640 641 static int mana_get_tx_queue(struct net_device *ndev, struct sk_buff *skb, 642 int old_q) 643 { 644 struct mana_port_context *apc = netdev_priv(ndev); 645 u32 hash = skb_get_hash(skb); 646 struct sock *sk = skb->sk; 647 int txq; 648 649 txq = apc->indir_table[hash & (apc->indir_table_sz - 1)]; 650 651 if (txq != old_q && sk && sk_fullsock(sk) && 652 rcu_access_pointer(sk->sk_dst_cache)) 653 sk_tx_queue_set(sk, txq); 654 655 return txq; 656 } 657 658 static u16 mana_select_queue(struct net_device *ndev, struct sk_buff *skb, 659 struct net_device *sb_dev) 660 { 661 int txq; 662 663 if (ndev->real_num_tx_queues == 1) 664 return 0; 665 666 txq = sk_tx_queue_get(skb->sk); 667 668 if (txq < 0 || skb->ooo_okay || txq >= ndev->real_num_tx_queues) { 669 if (skb_rx_queue_recorded(skb)) 670 txq = skb_get_rx_queue(skb); 671 else 672 txq = mana_get_tx_queue(ndev, skb, txq); 673 } 674 675 return txq; 676 } 677 678 /* Release pre-allocated RX buffers */ 679 void mana_pre_dealloc_rxbufs(struct mana_port_context *mpc) 680 { 681 struct device *dev; 682 int i; 683 684 dev = mpc->ac->gdma_dev->gdma_context->dev; 685 686 if (!mpc->rxbufs_pre) 687 goto out1; 688 689 if (!mpc->das_pre) 690 goto out2; 691 692 while (mpc->rxbpre_total) { 693 i = --mpc->rxbpre_total; 694 dma_unmap_single(dev, mpc->das_pre[i], mpc->rxbpre_datasize, 695 DMA_FROM_DEVICE); 696 put_page(virt_to_head_page(mpc->rxbufs_pre[i])); 697 } 698 699 kvfree(mpc->das_pre); 700 mpc->das_pre = NULL; 701 702 out2: 703 kvfree(mpc->rxbufs_pre); 704 mpc->rxbufs_pre = NULL; 705 706 out1: 707 mpc->rxbpre_datasize = 0; 708 mpc->rxbpre_alloc_size = 0; 709 mpc->rxbpre_headroom = 0; 710 } 711 712 /* Get a buffer from the pre-allocated RX buffers */ 713 static void *mana_get_rxbuf_pre(struct mana_rxq *rxq, dma_addr_t *da) 714 { 715 struct net_device *ndev = rxq->ndev; 716 struct mana_port_context *mpc; 717 void *va; 718 719 mpc = netdev_priv(ndev); 720 721 if (!mpc->rxbufs_pre || !mpc->das_pre || !mpc->rxbpre_total) { 722 netdev_err(ndev, "No RX pre-allocated bufs\n"); 723 return NULL; 724 } 725 726 /* Check sizes to catch unexpected coding error */ 727 if (mpc->rxbpre_datasize != rxq->datasize) { 728 netdev_err(ndev, "rxbpre_datasize mismatch: %u: %u\n", 729 mpc->rxbpre_datasize, rxq->datasize); 730 return NULL; 731 } 732 733 if (mpc->rxbpre_alloc_size != rxq->alloc_size) { 734 netdev_err(ndev, "rxbpre_alloc_size mismatch: %u: %u\n", 735 mpc->rxbpre_alloc_size, rxq->alloc_size); 736 return NULL; 737 } 738 739 if (mpc->rxbpre_headroom != rxq->headroom) { 740 netdev_err(ndev, "rxbpre_headroom mismatch: %u: %u\n", 741 mpc->rxbpre_headroom, rxq->headroom); 742 return NULL; 743 } 744 745 mpc->rxbpre_total--; 746 747 *da = mpc->das_pre[mpc->rxbpre_total]; 748 va = mpc->rxbufs_pre[mpc->rxbpre_total]; 749 mpc->rxbufs_pre[mpc->rxbpre_total] = NULL; 750 751 /* Deallocate the array after all buffers are gone */ 752 if (!mpc->rxbpre_total) 753 mana_pre_dealloc_rxbufs(mpc); 754 755 return va; 756 } 757 758 /* Get RX buffer's data size, alloc size, XDP headroom based on MTU */ 759 static void mana_get_rxbuf_cfg(struct mana_port_context *apc, 760 int mtu, u32 *datasize, u32 *alloc_size, 761 u32 *headroom, u32 *frag_count) 762 { 763 u32 len, buf_size; 764 765 /* Calculate datasize first (consistent across all cases) */ 766 *datasize = mtu + ETH_HLEN; 767 768 /* For xdp and jumbo frames make sure only one packet fits per page */ 769 if (mtu + MANA_RXBUF_PAD > PAGE_SIZE / 2 || mana_xdp_get(apc)) { 770 if (mana_xdp_get(apc)) { 771 *headroom = XDP_PACKET_HEADROOM; 772 *alloc_size = PAGE_SIZE; 773 } else { 774 *headroom = 0; /* no support for XDP */ 775 *alloc_size = SKB_DATA_ALIGN(mtu + MANA_RXBUF_PAD + 776 *headroom); 777 } 778 779 *frag_count = 1; 780 781 /* In the single-buffer path, napi_build_skb() must see the 782 * actual backing allocation size so skb->truesize reflects 783 * the full page (or higher-order page), not just the usable 784 * packet area. 785 */ 786 *alloc_size = PAGE_SIZE << get_order(*alloc_size); 787 return; 788 } 789 790 /* Standard MTU case - optimize for multiple packets per page */ 791 *headroom = 0; 792 793 /* Calculate base buffer size needed */ 794 len = SKB_DATA_ALIGN(mtu + MANA_RXBUF_PAD + *headroom); 795 buf_size = ALIGN(len, MANA_RX_FRAG_ALIGNMENT); 796 797 /* Calculate how many packets can fit in a page */ 798 *frag_count = PAGE_SIZE / buf_size; 799 *alloc_size = buf_size; 800 } 801 802 int mana_pre_alloc_rxbufs(struct mana_port_context *mpc, int new_mtu, int num_queues) 803 { 804 struct device *dev; 805 struct page *page; 806 dma_addr_t da; 807 int num_rxb; 808 void *va; 809 int i; 810 811 mana_get_rxbuf_cfg(mpc, new_mtu, &mpc->rxbpre_datasize, 812 &mpc->rxbpre_alloc_size, &mpc->rxbpre_headroom, 813 &mpc->rxbpre_frag_count); 814 815 dev = mpc->ac->gdma_dev->gdma_context->dev; 816 817 num_rxb = num_queues * mpc->rx_queue_size; 818 819 WARN(mpc->rxbufs_pre, "mana rxbufs_pre exists\n"); 820 mpc->rxbufs_pre = kvmalloc_array(num_rxb, sizeof(void *), GFP_KERNEL); 821 if (!mpc->rxbufs_pre) 822 goto error; 823 824 mpc->das_pre = kvmalloc_objs(dma_addr_t, num_rxb); 825 if (!mpc->das_pre) 826 goto error; 827 828 mpc->rxbpre_total = 0; 829 830 for (i = 0; i < num_rxb; i++) { 831 page = dev_alloc_pages(get_order(mpc->rxbpre_alloc_size)); 832 if (!page) 833 goto error; 834 835 va = page_to_virt(page); 836 837 da = dma_map_single(dev, va + mpc->rxbpre_headroom, 838 mpc->rxbpre_datasize, DMA_FROM_DEVICE); 839 if (dma_mapping_error(dev, da)) { 840 put_page(page); 841 goto error; 842 } 843 844 mpc->rxbufs_pre[i] = va; 845 mpc->das_pre[i] = da; 846 mpc->rxbpre_total = i + 1; 847 } 848 849 return 0; 850 851 error: 852 netdev_err(mpc->ndev, "Failed to pre-allocate RX buffers for %d queues\n", num_queues); 853 mana_pre_dealloc_rxbufs(mpc); 854 return -ENOMEM; 855 } 856 857 static int mana_change_mtu(struct net_device *ndev, int new_mtu) 858 { 859 struct mana_port_context *mpc = netdev_priv(ndev); 860 unsigned int old_mtu = ndev->mtu; 861 int err; 862 863 /* Pre-allocate buffers to prevent failure in mana_attach later */ 864 err = mana_pre_alloc_rxbufs(mpc, new_mtu, mpc->num_queues); 865 if (err) { 866 netdev_err(ndev, "Insufficient memory for new MTU\n"); 867 return err; 868 } 869 870 err = mana_detach(ndev, false); 871 if (err) { 872 netdev_err(ndev, "mana_detach failed: %d\n", err); 873 goto out; 874 } 875 876 WRITE_ONCE(ndev->mtu, new_mtu); 877 878 err = mana_attach(ndev); 879 if (err) { 880 netdev_err(ndev, "mana_attach failed: %d\n", err); 881 WRITE_ONCE(ndev->mtu, old_mtu); 882 } 883 884 out: 885 mana_pre_dealloc_rxbufs(mpc); 886 return err; 887 } 888 889 static void mana_tx_timeout(struct net_device *netdev, unsigned int txqueue) 890 { 891 struct mana_port_context *apc = netdev_priv(netdev); 892 struct mana_context *ac = apc->ac; 893 struct gdma_context *gc = ac->gdma_dev->gdma_context; 894 895 /* Already in service, hence tx queue reset is not required.*/ 896 if (test_bit(GC_IN_SERVICE, &gc->flags)) 897 return; 898 899 /* Note: If there are pending queue reset work for this port(apc), 900 * subsequent request queued up from here are ignored. This is because 901 * we are using the same work instance per port(apc). 902 */ 903 queue_work(ac->per_port_queue_reset_wq, &apc->queue_reset_work); 904 } 905 906 static int mana_shaper_set(struct net_shaper_binding *binding, 907 const struct net_shaper *shaper, 908 struct netlink_ext_ack *extack) 909 { 910 struct mana_port_context *apc = netdev_priv(binding->netdev); 911 u32 old_speed, rate; 912 int err; 913 914 if (shaper->handle.scope != NET_SHAPER_SCOPE_NETDEV) { 915 NL_SET_ERR_MSG_MOD(extack, "net shaper scope should be netdev"); 916 return -EINVAL; 917 } 918 919 if (apc->handle.id && shaper->handle.id != apc->handle.id) { 920 NL_SET_ERR_MSG_MOD(extack, "Cannot create multiple shapers"); 921 return -EOPNOTSUPP; 922 } 923 924 if (!shaper->bw_max || (shaper->bw_max % 100000000)) { 925 NL_SET_ERR_MSG_MOD(extack, "Please use multiples of 100Mbps for bandwidth"); 926 return -EINVAL; 927 } 928 929 rate = div_u64(shaper->bw_max, 1000); /* Convert bps to Kbps */ 930 rate = div_u64(rate, 1000); /* Convert Kbps to Mbps */ 931 932 /* Get current speed */ 933 err = mana_query_link_cfg(apc); 934 old_speed = (err) ? SPEED_UNKNOWN : apc->speed; 935 936 if (!err) { 937 err = mana_set_bw_clamp(apc, rate, TRI_STATE_TRUE); 938 apc->speed = (err) ? old_speed : rate; 939 apc->handle = (err) ? apc->handle : shaper->handle; 940 } 941 942 return err; 943 } 944 945 static int mana_shaper_del(struct net_shaper_binding *binding, 946 const struct net_shaper_handle *handle, 947 struct netlink_ext_ack *extack) 948 { 949 struct mana_port_context *apc = netdev_priv(binding->netdev); 950 int err; 951 952 err = mana_set_bw_clamp(apc, 0, TRI_STATE_FALSE); 953 954 if (!err) { 955 /* Reset mana port context parameters */ 956 apc->handle.id = 0; 957 apc->handle.scope = NET_SHAPER_SCOPE_UNSPEC; 958 apc->speed = apc->max_speed; 959 } 960 961 return err; 962 } 963 964 static void mana_shaper_cap(struct net_shaper_binding *binding, 965 enum net_shaper_scope scope, 966 unsigned long *flags) 967 { 968 *flags = BIT(NET_SHAPER_A_CAPS_SUPPORT_BW_MAX) | 969 BIT(NET_SHAPER_A_CAPS_SUPPORT_METRIC_BPS); 970 } 971 972 static const struct net_shaper_ops mana_shaper_ops = { 973 .set = mana_shaper_set, 974 .delete = mana_shaper_del, 975 .capabilities = mana_shaper_cap, 976 }; 977 978 static const struct net_device_ops mana_devops = { 979 .ndo_open = mana_open, 980 .ndo_stop = mana_close, 981 .ndo_select_queue = mana_select_queue, 982 #if (MAX_SKB_FRAGS + 2 > MANA_MAX_TX_WQE_SGL_ENTRIES) 983 .ndo_features_check = mana_features_check, 984 #endif 985 .ndo_start_xmit = mana_start_xmit, 986 .ndo_validate_addr = eth_validate_addr, 987 .ndo_get_stats64 = mana_get_stats64, 988 .ndo_bpf = mana_bpf, 989 .ndo_xdp_xmit = mana_xdp_xmit, 990 .ndo_change_mtu = mana_change_mtu, 991 .ndo_tx_timeout = mana_tx_timeout, 992 .net_shaper_ops = &mana_shaper_ops, 993 }; 994 995 static void mana_cleanup_port_context(struct mana_port_context *apc) 996 { 997 /* 998 * make sure subsequent cleanup attempts don't end up removing already 999 * cleaned dentry pointer 1000 */ 1001 debugfs_remove(apc->mana_port_debugfs); 1002 apc->mana_port_debugfs = NULL; 1003 kfree(apc->rxqs); 1004 apc->rxqs = NULL; 1005 } 1006 1007 static void mana_cleanup_indir_table(struct mana_port_context *apc) 1008 { 1009 apc->indir_table_sz = 0; 1010 kfree(apc->indir_table); 1011 kfree(apc->rxobj_table); 1012 } 1013 1014 static int mana_init_port_context(struct mana_port_context *apc) 1015 { 1016 apc->rxqs = kzalloc_objs(struct mana_rxq *, apc->num_queues); 1017 1018 return !apc->rxqs ? -ENOMEM : 0; 1019 } 1020 1021 static int gdma_mana_send_request(struct gdma_context *gc, void *in_buf, 1022 u32 in_len, void *out_buf, u32 out_len) 1023 { 1024 struct gdma_resp_hdr *resp = out_buf; 1025 struct gdma_req_hdr *req = in_buf; 1026 struct device *dev = gc->dev; 1027 static atomic_t activity_id; 1028 int err; 1029 1030 req->dev_id = gc->mana.dev_id; 1031 req->activity_id = atomic_inc_return(&activity_id); 1032 1033 err = mana_gd_send_request(gc, in_len, in_buf, out_len, 1034 out_buf); 1035 if (err || resp->status) { 1036 if (err == -EOPNOTSUPP) 1037 return err; 1038 1039 if (req->req.msg_type != MANA_QUERY_PHY_STAT && 1040 mana_need_log(gc, err)) 1041 dev_err(dev, "Command 0x%x failed with status: 0x%x, err: %d\n", 1042 req->req.msg_type, resp->status, err); 1043 return err ? err : -EPROTO; 1044 } 1045 1046 if (req->dev_id.as_uint32 != resp->dev_id.as_uint32 || 1047 req->activity_id != resp->activity_id) { 1048 dev_err(dev, "Unexpected mana message response: %x,%x,%x,%x\n", 1049 req->dev_id.as_uint32, resp->dev_id.as_uint32, 1050 req->activity_id, resp->activity_id); 1051 return -EPROTO; 1052 } 1053 1054 return 0; 1055 } 1056 1057 static int mana_send_request(struct mana_context *ac, void *in_buf, 1058 u32 in_len, void *out_buf, u32 out_len) 1059 { 1060 struct gdma_context *gc = ac->gdma_dev->gdma_context; 1061 1062 return gdma_mana_send_request(gc, in_buf, in_len, out_buf, out_len); 1063 } 1064 1065 static int mana_verify_resp_hdr(const struct gdma_resp_hdr *resp_hdr, 1066 const enum mana_command_code expected_code, 1067 const u32 min_size) 1068 { 1069 if (resp_hdr->response.msg_type != expected_code) 1070 return -EPROTO; 1071 1072 if (resp_hdr->response.msg_version < GDMA_MESSAGE_V1) 1073 return -EPROTO; 1074 1075 if (resp_hdr->response.msg_size < min_size) 1076 return -EPROTO; 1077 1078 return 0; 1079 } 1080 1081 static int mana_pf_register_hw_vport(struct mana_port_context *apc) 1082 { 1083 struct mana_register_hw_vport_resp resp = {}; 1084 struct mana_register_hw_vport_req req = {}; 1085 int err; 1086 1087 mana_gd_init_req_hdr(&req.hdr, MANA_REGISTER_HW_PORT, 1088 sizeof(req), sizeof(resp)); 1089 req.attached_gfid = 1; 1090 req.is_pf_default_vport = 1; 1091 req.allow_all_ether_types = 1; 1092 1093 err = mana_send_request(apc->ac, &req, sizeof(req), &resp, 1094 sizeof(resp)); 1095 if (err) { 1096 netdev_err(apc->ndev, "Failed to register hw vPort: %d\n", err); 1097 return err; 1098 } 1099 1100 err = mana_verify_resp_hdr(&resp.hdr, MANA_REGISTER_HW_PORT, 1101 sizeof(resp)); 1102 if (err || resp.hdr.status) { 1103 netdev_err(apc->ndev, "Failed to register hw vPort: %d, 0x%x\n", 1104 err, resp.hdr.status); 1105 return err ? err : -EPROTO; 1106 } 1107 1108 apc->port_handle = resp.hw_vport_handle; 1109 return 0; 1110 } 1111 1112 static void mana_pf_deregister_hw_vport(struct mana_port_context *apc) 1113 { 1114 struct mana_deregister_hw_vport_resp resp = {}; 1115 struct mana_deregister_hw_vport_req req = {}; 1116 int err; 1117 1118 mana_gd_init_req_hdr(&req.hdr, MANA_DEREGISTER_HW_PORT, 1119 sizeof(req), sizeof(resp)); 1120 req.hw_vport_handle = apc->port_handle; 1121 1122 err = mana_send_request(apc->ac, &req, sizeof(req), &resp, 1123 sizeof(resp)); 1124 if (err) { 1125 if (mana_en_need_log(apc, err)) 1126 netdev_err(apc->ndev, "Failed to unregister hw vPort: %d\n", 1127 err); 1128 1129 return; 1130 } 1131 1132 err = mana_verify_resp_hdr(&resp.hdr, MANA_DEREGISTER_HW_PORT, 1133 sizeof(resp)); 1134 if (err || resp.hdr.status) 1135 netdev_err(apc->ndev, 1136 "Failed to deregister hw vPort: %d, 0x%x\n", 1137 err, resp.hdr.status); 1138 } 1139 1140 static int mana_pf_register_filter(struct mana_port_context *apc) 1141 { 1142 struct mana_register_filter_resp resp = {}; 1143 struct mana_register_filter_req req = {}; 1144 int err; 1145 1146 mana_gd_init_req_hdr(&req.hdr, MANA_REGISTER_FILTER, 1147 sizeof(req), sizeof(resp)); 1148 req.vport = apc->port_handle; 1149 memcpy(req.mac_addr, apc->mac_addr, ETH_ALEN); 1150 1151 err = mana_send_request(apc->ac, &req, sizeof(req), &resp, 1152 sizeof(resp)); 1153 if (err) { 1154 netdev_err(apc->ndev, "Failed to register filter: %d\n", err); 1155 return err; 1156 } 1157 1158 err = mana_verify_resp_hdr(&resp.hdr, MANA_REGISTER_FILTER, 1159 sizeof(resp)); 1160 if (err || resp.hdr.status) { 1161 netdev_err(apc->ndev, "Failed to register filter: %d, 0x%x\n", 1162 err, resp.hdr.status); 1163 return err ? err : -EPROTO; 1164 } 1165 1166 apc->pf_filter_handle = resp.filter_handle; 1167 return 0; 1168 } 1169 1170 static void mana_pf_deregister_filter(struct mana_port_context *apc) 1171 { 1172 struct mana_deregister_filter_resp resp = {}; 1173 struct mana_deregister_filter_req req = {}; 1174 int err; 1175 1176 mana_gd_init_req_hdr(&req.hdr, MANA_DEREGISTER_FILTER, 1177 sizeof(req), sizeof(resp)); 1178 req.filter_handle = apc->pf_filter_handle; 1179 1180 err = mana_send_request(apc->ac, &req, sizeof(req), &resp, 1181 sizeof(resp)); 1182 if (err) { 1183 if (mana_en_need_log(apc, err)) 1184 netdev_err(apc->ndev, "Failed to unregister filter: %d\n", 1185 err); 1186 1187 return; 1188 } 1189 1190 err = mana_verify_resp_hdr(&resp.hdr, MANA_DEREGISTER_FILTER, 1191 sizeof(resp)); 1192 if (err || resp.hdr.status) 1193 netdev_err(apc->ndev, 1194 "Failed to deregister filter: %d, 0x%x\n", 1195 err, resp.hdr.status); 1196 } 1197 1198 int mana_gd_query_device_cfg(struct gdma_context *gc, u32 proto_major_ver, 1199 u32 proto_minor_ver, u32 proto_micro_ver, 1200 u16 *max_num_vports, u8 *bm_hostmode) 1201 { 1202 struct mana_query_device_cfg_resp resp = {}; 1203 struct mana_query_device_cfg_req req = {}; 1204 struct device *dev = gc->dev; 1205 int err = 0; 1206 1207 mana_gd_init_req_hdr(&req.hdr, MANA_QUERY_DEV_CONFIG, 1208 sizeof(req), sizeof(resp)); 1209 1210 req.hdr.resp.msg_version = GDMA_MESSAGE_V3; 1211 1212 req.proto_major_ver = proto_major_ver; 1213 req.proto_minor_ver = proto_minor_ver; 1214 req.proto_micro_ver = proto_micro_ver; 1215 1216 err = gdma_mana_send_request(gc, &req, sizeof(req), 1217 &resp, sizeof(resp)); 1218 if (err) { 1219 dev_err(dev, "Failed to query config: %d", err); 1220 return err; 1221 } 1222 1223 err = mana_verify_resp_hdr(&resp.hdr, MANA_QUERY_DEV_CONFIG, 1224 sizeof(resp)); 1225 if (err || resp.hdr.status) { 1226 dev_err(dev, "Invalid query result: %d, 0x%x\n", err, 1227 resp.hdr.status); 1228 if (!err) 1229 err = -EPROTO; 1230 return err; 1231 } 1232 1233 *max_num_vports = resp.max_num_vports; 1234 1235 if (resp.hdr.response.msg_version >= GDMA_MESSAGE_V2) { 1236 if (resp.adapter_mtu == 0) { 1237 /* 1238 * Some older PF firmware versions report an 1239 * adapter_mtu of 0. MANA hardware always supports the 1240 * standard Ethernet MTU, so fall back to ETH_FRAME_LEN. 1241 * Jumbo frames will not be available in this case. 1242 */ 1243 dev_info(dev, 1244 "PF reported adapter_mtu of 0, falling back to %u (jumbo frames disabled)\n", 1245 ETH_FRAME_LEN); 1246 gc->adapter_mtu = ETH_FRAME_LEN; 1247 } else if (resp.adapter_mtu < ETH_MIN_MTU + ETH_HLEN) { 1248 dev_err(dev, "Adapter MTU too small: %u\n", 1249 resp.adapter_mtu); 1250 return -EPROTO; 1251 } else { 1252 gc->adapter_mtu = resp.adapter_mtu; 1253 } 1254 } else { 1255 gc->adapter_mtu = ETH_FRAME_LEN; 1256 } 1257 1258 if (resp.hdr.response.msg_version >= GDMA_MESSAGE_V3) 1259 *bm_hostmode = resp.bm_hostmode; 1260 else 1261 *bm_hostmode = 0; 1262 1263 return 0; 1264 } 1265 1266 static int mana_query_vport_cfg(struct mana_port_context *apc, u32 vport_index, 1267 u32 *max_sq, u32 *max_rq, u32 *num_indir_entry) 1268 { 1269 struct mana_query_vport_cfg_resp resp = {}; 1270 struct mana_query_vport_cfg_req req = {}; 1271 int err; 1272 1273 mana_gd_init_req_hdr(&req.hdr, MANA_QUERY_VPORT_CONFIG, 1274 sizeof(req), sizeof(resp)); 1275 1276 req.vport_index = vport_index; 1277 1278 err = mana_send_request(apc->ac, &req, sizeof(req), &resp, 1279 sizeof(resp)); 1280 if (err) 1281 return err; 1282 1283 err = mana_verify_resp_hdr(&resp.hdr, MANA_QUERY_VPORT_CONFIG, 1284 sizeof(resp)); 1285 if (err) 1286 return err; 1287 1288 if (resp.hdr.status) 1289 return -EPROTO; 1290 1291 *max_sq = resp.max_num_sq; 1292 *max_rq = resp.max_num_rq; 1293 1294 if (*max_sq == 0 || *max_rq == 0) { 1295 netdev_err(apc->ndev, "Invalid max queues from vPort config\n"); 1296 return -EPROTO; 1297 } 1298 1299 if (resp.num_indirection_ent > 0 && 1300 resp.num_indirection_ent <= MANA_INDIRECT_TABLE_MAX_SIZE && 1301 is_power_of_2(resp.num_indirection_ent)) { 1302 *num_indir_entry = resp.num_indirection_ent; 1303 } else { 1304 netdev_warn(apc->ndev, 1305 "Setting indirection table size to default %d for vPort %d\n", 1306 MANA_INDIRECT_TABLE_DEF_SIZE, apc->port_idx); 1307 *num_indir_entry = MANA_INDIRECT_TABLE_DEF_SIZE; 1308 } 1309 1310 apc->port_handle = resp.vport; 1311 ether_addr_copy(apc->mac_addr, resp.mac_addr); 1312 1313 apc->vport_max_sq = *max_sq; 1314 apc->vport_max_rq = *max_rq; 1315 1316 return 0; 1317 } 1318 1319 void mana_uncfg_vport(struct mana_port_context *apc) 1320 { 1321 mutex_lock(&apc->vport_mutex); 1322 apc->vport_use_count--; 1323 WARN_ON(apc->vport_use_count < 0); 1324 mutex_unlock(&apc->vport_mutex); 1325 } 1326 EXPORT_SYMBOL_NS(mana_uncfg_vport, "NET_MANA"); 1327 1328 int mana_cfg_vport(struct mana_port_context *apc, u32 protection_dom_id, 1329 u32 doorbell_pg_id, bool check_channel_changing) 1330 { 1331 struct mana_config_vport_resp resp = {}; 1332 struct mana_config_vport_req req = {}; 1333 int err; 1334 1335 /* This function is used to program the Ethernet port in the hardware 1336 * table. It can be called from the Ethernet driver or the RDMA driver. 1337 * 1338 * For Ethernet usage, the hardware supports only one active user on a 1339 * physical port. The driver checks on the port usage before programming 1340 * the hardware when creating the RAW QP (RDMA driver) or exposing the 1341 * device to kernel NET layer (Ethernet driver). 1342 * 1343 * Because the RDMA driver doesn't know in advance which QP type the 1344 * user will create, it exposes the device with all its ports. The user 1345 * may not be able to create RAW QP on a port if this port is already 1346 * in used by the Ethernet driver from the kernel. 1347 * 1348 * This physical port limitation only applies to the RAW QP. For RC QP, 1349 * the hardware doesn't have this limitation. The user can create RC 1350 * QPs on a physical port up to the hardware limits independent of the 1351 * Ethernet usage on the same port. 1352 */ 1353 mutex_lock(&apc->vport_mutex); 1354 if (apc->vport_use_count > 0 || 1355 (check_channel_changing && apc->channel_changing)) { 1356 mutex_unlock(&apc->vport_mutex); 1357 return -EBUSY; 1358 } 1359 apc->vport_use_count++; 1360 mutex_unlock(&apc->vport_mutex); 1361 1362 mana_gd_init_req_hdr(&req.hdr, MANA_CONFIG_VPORT_TX, 1363 sizeof(req), sizeof(resp)); 1364 req.vport = apc->port_handle; 1365 req.pdid = protection_dom_id; 1366 req.doorbell_pageid = doorbell_pg_id; 1367 1368 err = mana_send_request(apc->ac, &req, sizeof(req), &resp, 1369 sizeof(resp)); 1370 if (err) { 1371 netdev_err(apc->ndev, "Failed to configure vPort: %d\n", err); 1372 goto out; 1373 } 1374 1375 err = mana_verify_resp_hdr(&resp.hdr, MANA_CONFIG_VPORT_TX, 1376 sizeof(resp)); 1377 if (err || resp.hdr.status) { 1378 netdev_err(apc->ndev, "Failed to configure vPort: %d, 0x%x\n", 1379 err, resp.hdr.status); 1380 if (!err) 1381 err = -EPROTO; 1382 1383 goto out; 1384 } 1385 1386 apc->tx_shortform_allowed = resp.short_form_allowed; 1387 apc->tx_vp_offset = resp.tx_vport_offset; 1388 1389 netdev_info(apc->ndev, "Enabled vPort %llu PD %u DB %u MAC %pM\n", 1390 apc->port_handle, protection_dom_id, doorbell_pg_id, apc->mac_addr); 1391 out: 1392 if (err) 1393 mana_uncfg_vport(apc); 1394 1395 return err; 1396 } 1397 EXPORT_SYMBOL_NS(mana_cfg_vport, "NET_MANA"); 1398 1399 static int mana_cfg_vport_steering(struct mana_port_context *apc, 1400 enum TRI_STATE rx, 1401 bool update_default_rxobj, bool update_key, 1402 bool update_tab) 1403 { 1404 struct mana_cfg_rx_steer_req_v2 *req; 1405 struct mana_cfg_rx_steer_resp resp = {}; 1406 struct net_device *ndev = apc->ndev; 1407 u32 req_buf_size; 1408 int err; 1409 1410 req_buf_size = struct_size(req, indir_tab, apc->indir_table_sz); 1411 req = kzalloc(req_buf_size, GFP_KERNEL); 1412 if (!req) 1413 return -ENOMEM; 1414 1415 mana_gd_init_req_hdr(&req->hdr, MANA_CONFIG_VPORT_RX, req_buf_size, 1416 sizeof(resp)); 1417 1418 req->hdr.req.msg_version = GDMA_MESSAGE_V2; 1419 req->hdr.resp.msg_version = GDMA_MESSAGE_V2; 1420 1421 req->vport = apc->port_handle; 1422 req->num_indir_entries = apc->indir_table_sz; 1423 req->indir_tab_offset = offsetof(struct mana_cfg_rx_steer_req_v2, 1424 indir_tab); 1425 req->rx_enable = rx; 1426 req->rss_enable = apc->rss_state; 1427 req->update_default_rxobj = update_default_rxobj; 1428 req->update_hashkey = update_key; 1429 req->update_indir_tab = update_tab; 1430 req->default_rxobj = apc->default_rxobj; 1431 1432 if (rx != TRI_STATE_FALSE) 1433 req->cqe_coalescing_enable = apc->cqe_coalescing_enable; 1434 1435 if (update_key) 1436 memcpy(&req->hashkey, apc->hashkey, MANA_HASH_KEY_SIZE); 1437 1438 if (update_tab) 1439 memcpy(req->indir_tab, apc->rxobj_table, 1440 flex_array_size(req, indir_tab, req->num_indir_entries)); 1441 1442 err = mana_send_request(apc->ac, req, req_buf_size, &resp, 1443 sizeof(resp)); 1444 if (err) { 1445 if (mana_en_need_log(apc, err)) 1446 netdev_err(ndev, "Failed to configure vPort RX: %d\n", err); 1447 1448 goto out; 1449 } 1450 1451 err = mana_verify_resp_hdr(&resp.hdr, MANA_CONFIG_VPORT_RX, 1452 sizeof(resp)); 1453 if (err) { 1454 netdev_err(ndev, "vPort RX configuration failed: %d\n", err); 1455 goto out; 1456 } 1457 1458 if (resp.hdr.status) { 1459 netdev_err(ndev, "vPort RX configuration failed: 0x%x\n", 1460 resp.hdr.status); 1461 err = -EPROTO; 1462 goto out; 1463 } 1464 1465 if (resp.hdr.response.msg_version >= GDMA_MESSAGE_V2) 1466 apc->cqe_coalescing_timeout_ns = 1467 resp.cqe_coalescing_timeout_ns; 1468 1469 netdev_info(ndev, "Configured steering vPort %llu entries %u\n", 1470 apc->port_handle, apc->indir_table_sz); 1471 1472 apc->steer_rx = rx; 1473 apc->steer_rss = apc->rss_state; 1474 apc->steer_update_tab = update_tab; 1475 apc->steer_cqe_coalescing = req->cqe_coalescing_enable; 1476 out: 1477 kfree(req); 1478 return err; 1479 } 1480 1481 int mana_query_link_cfg(struct mana_port_context *apc) 1482 { 1483 struct net_device *ndev = apc->ndev; 1484 struct mana_query_link_config_resp resp = {}; 1485 struct mana_query_link_config_req req = {}; 1486 int err; 1487 1488 netdev_assert_locked(ndev); 1489 1490 err = apc->link_cfg_error; 1491 if (err <= 0) 1492 return err; 1493 1494 mana_gd_init_req_hdr(&req.hdr, MANA_QUERY_LINK_CONFIG, 1495 sizeof(req), sizeof(resp)); 1496 1497 req.vport = apc->port_handle; 1498 req.hdr.resp.msg_version = GDMA_MESSAGE_V2; 1499 1500 err = mana_send_request(apc->ac, &req, sizeof(req), &resp, 1501 sizeof(resp)); 1502 1503 if (err) { 1504 if (err == -EOPNOTSUPP) { 1505 netdev_info_once(ndev, "MANA_QUERY_LINK_CONFIG not supported\n"); 1506 apc->link_cfg_error = err; 1507 return err; 1508 } 1509 netdev_err(ndev, "Failed to query link config: %d\n", err); 1510 return err; 1511 } 1512 1513 err = mana_verify_resp_hdr(&resp.hdr, MANA_QUERY_LINK_CONFIG, 1514 sizeof(resp)); 1515 1516 if (err || resp.hdr.status) { 1517 netdev_err(ndev, "Failed to query link config: %d, 0x%x\n", err, 1518 resp.hdr.status); 1519 if (!err) 1520 err = -EOPNOTSUPP; 1521 return err; 1522 } 1523 1524 if (resp.qos_unconfigured) 1525 return -EINVAL; 1526 1527 apc->speed = resp.link_speed_mbps; 1528 apc->max_speed = resp.qos_speed_mbps; 1529 apc->link_cfg_error = 0; 1530 return 0; 1531 } 1532 1533 int mana_set_bw_clamp(struct mana_port_context *apc, u32 speed, 1534 int enable_clamping) 1535 { 1536 struct mana_set_bw_clamp_resp resp = {}; 1537 struct mana_set_bw_clamp_req req = {}; 1538 struct net_device *ndev = apc->ndev; 1539 int err; 1540 1541 netdev_assert_locked(ndev); 1542 1543 mana_gd_init_req_hdr(&req.hdr, MANA_SET_BW_CLAMP, 1544 sizeof(req), sizeof(resp)); 1545 req.vport = apc->port_handle; 1546 req.link_speed_mbps = speed; 1547 req.enable_clamping = enable_clamping; 1548 1549 err = mana_send_request(apc->ac, &req, sizeof(req), &resp, 1550 sizeof(resp)); 1551 1552 if (err) { 1553 if (err == -EOPNOTSUPP) { 1554 netdev_info_once(ndev, "MANA_SET_BW_CLAMP not supported\n"); 1555 return err; 1556 } 1557 netdev_err(ndev, "Failed to set bandwidth clamp for speed %u, err = %d", 1558 speed, err); 1559 return err; 1560 } 1561 1562 err = mana_verify_resp_hdr(&resp.hdr, MANA_SET_BW_CLAMP, 1563 sizeof(resp)); 1564 1565 if (err || resp.hdr.status) { 1566 netdev_err(ndev, "Failed to set bandwidth clamp: %d, 0x%x\n", err, 1567 resp.hdr.status); 1568 if (!err) 1569 err = -EOPNOTSUPP; 1570 return err; 1571 } 1572 1573 if (resp.qos_unconfigured) 1574 netdev_info(ndev, "QoS is unconfigured\n"); 1575 1576 /* Invalidate the cache; next query will re-fetch from firmware. */ 1577 apc->link_cfg_error = 1; 1578 return 0; 1579 } 1580 1581 int mana_create_wq_obj(struct mana_port_context *apc, 1582 mana_handle_t vport, 1583 u32 wq_type, struct mana_obj_spec *wq_spec, 1584 struct mana_obj_spec *cq_spec, 1585 mana_handle_t *wq_obj) 1586 { 1587 struct mana_create_wqobj_resp resp = {}; 1588 struct mana_create_wqobj_req req = {}; 1589 struct net_device *ndev = apc->ndev; 1590 int err; 1591 1592 mana_gd_init_req_hdr(&req.hdr, MANA_CREATE_WQ_OBJ, 1593 sizeof(req), sizeof(resp)); 1594 req.vport = vport; 1595 req.wq_type = wq_type; 1596 req.wq_gdma_region = wq_spec->gdma_region; 1597 req.cq_gdma_region = cq_spec->gdma_region; 1598 req.wq_size = wq_spec->queue_size; 1599 req.cq_size = cq_spec->queue_size; 1600 req.cq_moderation_ctx_id = cq_spec->modr_ctx_id; 1601 req.cq_parent_qid = cq_spec->attached_eq; 1602 1603 err = mana_send_request(apc->ac, &req, sizeof(req), &resp, 1604 sizeof(resp)); 1605 if (err) { 1606 netdev_err(ndev, "Failed to create WQ object: %d\n", err); 1607 goto out; 1608 } 1609 1610 err = mana_verify_resp_hdr(&resp.hdr, MANA_CREATE_WQ_OBJ, 1611 sizeof(resp)); 1612 if (err || resp.hdr.status) { 1613 netdev_err(ndev, "Failed to create WQ object: %d, 0x%x\n", err, 1614 resp.hdr.status); 1615 if (!err) 1616 err = -EPROTO; 1617 goto out; 1618 } 1619 1620 if (resp.wq_obj == INVALID_MANA_HANDLE) { 1621 netdev_err(ndev, "Got an invalid WQ object handle\n"); 1622 err = -EPROTO; 1623 goto out; 1624 } 1625 1626 *wq_obj = resp.wq_obj; 1627 wq_spec->queue_index = resp.wq_id; 1628 cq_spec->queue_index = resp.cq_id; 1629 1630 return 0; 1631 out: 1632 return err; 1633 } 1634 EXPORT_SYMBOL_NS(mana_create_wq_obj, "NET_MANA"); 1635 1636 void mana_destroy_wq_obj(struct mana_port_context *apc, u32 wq_type, 1637 mana_handle_t wq_obj) 1638 { 1639 struct mana_destroy_wqobj_resp resp = {}; 1640 struct mana_destroy_wqobj_req req = {}; 1641 struct net_device *ndev = apc->ndev; 1642 int err; 1643 1644 mana_gd_init_req_hdr(&req.hdr, MANA_DESTROY_WQ_OBJ, 1645 sizeof(req), sizeof(resp)); 1646 req.wq_type = wq_type; 1647 req.wq_obj_handle = wq_obj; 1648 1649 err = mana_send_request(apc->ac, &req, sizeof(req), &resp, 1650 sizeof(resp)); 1651 if (err) { 1652 if (mana_en_need_log(apc, err)) 1653 netdev_err(ndev, "Failed to destroy WQ object: %d\n", err); 1654 1655 return; 1656 } 1657 1658 err = mana_verify_resp_hdr(&resp.hdr, MANA_DESTROY_WQ_OBJ, 1659 sizeof(resp)); 1660 if (err || resp.hdr.status) 1661 netdev_err(ndev, "Failed to destroy WQ object: %d, 0x%x\n", err, 1662 resp.hdr.status); 1663 } 1664 EXPORT_SYMBOL_NS(mana_destroy_wq_obj, "NET_MANA"); 1665 1666 void mana_destroy_eq(struct mana_port_context *apc) 1667 { 1668 struct mana_context *ac = apc->ac; 1669 struct gdma_context *gc = ac->gdma_dev->gdma_context; 1670 struct gdma_queue *eq; 1671 unsigned int msi; 1672 int i; 1673 1674 if (!apc->eqs) 1675 return; 1676 1677 debugfs_remove_recursive(apc->mana_eqs_debugfs); 1678 apc->mana_eqs_debugfs = NULL; 1679 1680 for (i = 0; i < apc->num_queues; i++) { 1681 eq = apc->eqs[i].eq; 1682 if (!eq) 1683 continue; 1684 1685 msi = eq->eq.msix_index; 1686 mana_gd_destroy_queue(gc, eq); 1687 mana_gd_put_gic(gc, !gc->msi_sharing, msi); 1688 } 1689 1690 kfree(apc->eqs); 1691 apc->eqs = NULL; 1692 } 1693 EXPORT_SYMBOL_NS(mana_destroy_eq, "NET_MANA"); 1694 1695 static void mana_create_eq_debugfs(struct mana_port_context *apc, int i) 1696 { 1697 struct mana_eq eq = apc->eqs[i]; 1698 char eqnum[32]; 1699 1700 sprintf(eqnum, "eq%d", i); 1701 eq.mana_eq_debugfs = debugfs_create_dir(eqnum, apc->mana_eqs_debugfs); 1702 debugfs_create_u32("head", 0400, eq.mana_eq_debugfs, &eq.eq->head); 1703 debugfs_create_u32("tail", 0400, eq.mana_eq_debugfs, &eq.eq->tail); 1704 debugfs_create_u32("irq", 0400, eq.mana_eq_debugfs, &eq.eq->eq.irq); 1705 debugfs_create_file("eq_dump", 0400, eq.mana_eq_debugfs, eq.eq, &mana_dbg_q_fops); 1706 } 1707 1708 int mana_create_eq(struct mana_port_context *apc) 1709 { 1710 struct gdma_dev *gd = apc->ac->gdma_dev; 1711 struct gdma_context *gc = gd->gdma_context; 1712 struct gdma_queue_spec spec = {}; 1713 struct gdma_irq_context *gic; 1714 int err; 1715 int msi; 1716 int i; 1717 1718 if (WARN_ON(apc->eqs)) 1719 return -EEXIST; 1720 apc->eqs = kzalloc_objs(struct mana_eq, apc->num_queues); 1721 if (!apc->eqs) 1722 return -ENOMEM; 1723 1724 spec.type = GDMA_EQ; 1725 spec.monitor_avl_buf = false; 1726 spec.queue_size = EQ_SIZE; 1727 spec.eq.callback = NULL; 1728 spec.eq.context = apc->eqs; 1729 spec.eq.log2_throttle_limit = LOG2_EQ_THROTTLE; 1730 1731 apc->mana_eqs_debugfs = 1732 debugfs_create_dir("EQs", apc->mana_port_debugfs); 1733 1734 for (i = 0; i < apc->num_queues; i++) { 1735 msi = (i + 1) % gc->num_msix_usable; 1736 1737 gic = mana_gd_get_gic(gc, !gc->msi_sharing, &msi); 1738 if (IS_ERR(gic)) { 1739 err = PTR_ERR(gic); 1740 goto out; 1741 } 1742 spec.eq.msix_index = msi; 1743 1744 err = mana_gd_create_mana_eq(gd, &spec, &apc->eqs[i].eq); 1745 if (err) { 1746 dev_err(gc->dev, "Failed to create EQ %d : %d\n", i, err); 1747 mana_gd_put_gic(gc, !gc->msi_sharing, msi); 1748 goto out; 1749 } 1750 apc->eqs[i].eq->eq.irq = gic->irq; 1751 mana_create_eq_debugfs(apc, i); 1752 } 1753 1754 return 0; 1755 out: 1756 mana_destroy_eq(apc); 1757 return err; 1758 } 1759 EXPORT_SYMBOL_NS(mana_create_eq, "NET_MANA"); 1760 1761 static int mana_fence_rq(struct mana_port_context *apc, struct mana_rxq *rxq) 1762 { 1763 struct mana_fence_rq_resp resp = {}; 1764 struct mana_fence_rq_req req = {}; 1765 int err; 1766 1767 init_completion(&rxq->fence_event); 1768 1769 mana_gd_init_req_hdr(&req.hdr, MANA_FENCE_RQ, 1770 sizeof(req), sizeof(resp)); 1771 req.wq_obj_handle = rxq->rxobj; 1772 1773 err = mana_send_request(apc->ac, &req, sizeof(req), &resp, 1774 sizeof(resp)); 1775 if (err) { 1776 netdev_err(apc->ndev, "Failed to fence RQ %u: %d\n", 1777 rxq->rxq_idx, err); 1778 return err; 1779 } 1780 1781 err = mana_verify_resp_hdr(&resp.hdr, MANA_FENCE_RQ, sizeof(resp)); 1782 if (err || resp.hdr.status) { 1783 netdev_err(apc->ndev, "Failed to fence RQ %u: %d, 0x%x\n", 1784 rxq->rxq_idx, err, resp.hdr.status); 1785 if (!err) 1786 err = -EPROTO; 1787 1788 return err; 1789 } 1790 1791 if (wait_for_completion_timeout(&rxq->fence_event, 10 * HZ) == 0) { 1792 netdev_err(apc->ndev, "Failed to fence RQ %u: timed out\n", 1793 rxq->rxq_idx); 1794 return -ETIMEDOUT; 1795 } 1796 1797 return 0; 1798 } 1799 1800 static void mana_fence_rqs(struct mana_port_context *apc) 1801 { 1802 unsigned int rxq_idx; 1803 struct mana_rxq *rxq; 1804 int err; 1805 1806 if (!apc->rxqs) 1807 return; 1808 1809 for (rxq_idx = 0; rxq_idx < apc->num_queues; rxq_idx++) { 1810 rxq = apc->rxqs[rxq_idx]; 1811 err = mana_fence_rq(apc, rxq); 1812 1813 /* In case of any error, use sleep instead. */ 1814 if (err) 1815 msleep(100); 1816 } 1817 } 1818 1819 static int mana_move_wq_tail(struct gdma_queue *wq, u32 num_units) 1820 { 1821 u32 used_space_old; 1822 u32 used_space_new; 1823 1824 used_space_old = wq->head - wq->tail; 1825 used_space_new = wq->head - (wq->tail + num_units); 1826 1827 if (WARN_ON_ONCE(used_space_new > used_space_old)) 1828 return -ERANGE; 1829 1830 wq->tail += num_units; 1831 return 0; 1832 } 1833 1834 void mana_unmap_skb(struct sk_buff *skb, struct mana_port_context *apc) 1835 { 1836 struct mana_skb_head *ash = (struct mana_skb_head *)skb->head; 1837 struct gdma_context *gc = apc->ac->gdma_dev->gdma_context; 1838 struct device *dev = gc->dev; 1839 int hsg, i; 1840 1841 /* Number of SGEs of linear part */ 1842 hsg = (skb_is_gso(skb) && skb_headlen(skb) > ash->size[0]) ? 2 : 1; 1843 1844 for (i = 0; i < hsg; i++) 1845 dma_unmap_single(dev, ash->dma_handle[i], ash->size[i], 1846 DMA_TO_DEVICE); 1847 1848 for (i = hsg; i < skb_shinfo(skb)->nr_frags + hsg; i++) 1849 dma_unmap_page(dev, ash->dma_handle[i], ash->size[i], 1850 DMA_TO_DEVICE); 1851 } 1852 1853 static void mana_poll_tx_cq(struct mana_cq *cq) 1854 { 1855 struct gdma_comp *completions = cq->gdma_comp_buf; 1856 struct gdma_posted_wqe_info *wqe_info; 1857 unsigned int pkt_transmitted = 0; 1858 unsigned int wqe_unit_cnt = 0; 1859 struct mana_txq *txq = cq->txq; 1860 struct mana_port_context *apc; 1861 struct netdev_queue *net_txq; 1862 struct gdma_queue *gdma_wq; 1863 unsigned int avail_space; 1864 struct net_device *ndev; 1865 struct sk_buff *skb; 1866 bool txq_stopped; 1867 int comp_read; 1868 int i; 1869 1870 ndev = txq->ndev; 1871 apc = netdev_priv(ndev); 1872 1873 /* Limit CQEs polled to 4 wraparounds of the CQ to ensure the 1874 * doorbell can be rung in time for the hardware's requirement 1875 * of at least one doorbell ring every 8 wraparounds. 1876 */ 1877 comp_read = mana_gd_poll_cq(cq->gdma_cq, completions, 1878 min((cq->gdma_cq->queue_size / 1879 COMP_ENTRY_SIZE) * 4, 1880 CQE_POLLING_BUFFER)); 1881 1882 if (comp_read < 1) 1883 return; 1884 1885 for (i = 0; i < comp_read; i++) { 1886 struct mana_tx_comp_oob *cqe_oob; 1887 1888 if (WARN_ON_ONCE(!completions[i].is_sq)) 1889 return; 1890 1891 cqe_oob = (struct mana_tx_comp_oob *)completions[i].cqe_data; 1892 if (WARN_ON_ONCE(cqe_oob->cqe_hdr.client_type != 1893 MANA_CQE_COMPLETION)) 1894 return; 1895 1896 switch (cqe_oob->cqe_hdr.cqe_type) { 1897 case CQE_TX_OKAY: 1898 break; 1899 1900 case CQE_TX_SA_DROP: 1901 case CQE_TX_MTU_DROP: 1902 case CQE_TX_INVALID_OOB: 1903 case CQE_TX_INVALID_ETH_TYPE: 1904 case CQE_TX_HDR_PROCESSING_ERROR: 1905 case CQE_TX_VF_DISABLED: 1906 case CQE_TX_VPORT_IDX_OUT_OF_RANGE: 1907 case CQE_TX_VPORT_DISABLED: 1908 case CQE_TX_VLAN_TAGGING_VIOLATION: 1909 if (net_ratelimit()) 1910 netdev_err(ndev, "TX: CQE error %d\n", 1911 cqe_oob->cqe_hdr.cqe_type); 1912 1913 apc->eth_stats.tx_cqe_err++; 1914 break; 1915 1916 default: 1917 /* If the CQE type is unknown, log an error, 1918 * and still free the SKB, update tail, etc. 1919 */ 1920 if (net_ratelimit()) 1921 netdev_err(ndev, "TX: unknown CQE type %d\n", 1922 cqe_oob->cqe_hdr.cqe_type); 1923 1924 apc->eth_stats.tx_cqe_unknown_type++; 1925 break; 1926 } 1927 1928 if (WARN_ON_ONCE(txq->gdma_txq_id != completions[i].wq_num)) 1929 return; 1930 1931 skb = skb_dequeue(&txq->pending_skbs); 1932 if (WARN_ON_ONCE(!skb)) 1933 return; 1934 1935 wqe_info = (struct gdma_posted_wqe_info *)skb->cb; 1936 wqe_unit_cnt += wqe_info->wqe_size_in_bu; 1937 1938 mana_unmap_skb(skb, apc); 1939 1940 napi_consume_skb(skb, cq->budget); 1941 1942 pkt_transmitted++; 1943 } 1944 1945 if (WARN_ON_ONCE(wqe_unit_cnt == 0)) 1946 return; 1947 1948 mana_move_wq_tail(txq->gdma_sq, wqe_unit_cnt); 1949 1950 gdma_wq = txq->gdma_sq; 1951 avail_space = mana_gd_wq_avail_space(gdma_wq); 1952 1953 /* Ensure tail updated before checking q stop */ 1954 smp_mb(); 1955 1956 net_txq = txq->net_txq; 1957 txq_stopped = netif_tx_queue_stopped(net_txq); 1958 1959 /* Ensure checking txq_stopped before apc->port_is_up. */ 1960 smp_rmb(); 1961 1962 if (txq_stopped && apc->port_is_up && avail_space >= MAX_TX_WQE_SIZE) { 1963 netif_tx_wake_queue(net_txq); 1964 apc->eth_stats.wake_queue++; 1965 } 1966 1967 if (atomic_sub_return(pkt_transmitted, &txq->pending_sends) < 0) 1968 WARN_ON_ONCE(1); 1969 1970 cq->work_done = pkt_transmitted; 1971 } 1972 1973 static void mana_post_pkt_rxq(struct mana_rxq *rxq) 1974 { 1975 struct mana_recv_buf_oob *recv_buf_oob; 1976 u32 curr_index; 1977 int err; 1978 1979 curr_index = rxq->buf_index++; 1980 if (rxq->buf_index == rxq->num_rx_buf) 1981 rxq->buf_index = 0; 1982 1983 recv_buf_oob = &rxq->rx_oobs[curr_index]; 1984 1985 err = mana_gd_post_work_request(rxq->gdma_rq, &recv_buf_oob->wqe_req, 1986 &recv_buf_oob->wqe_inf); 1987 if (WARN_ON_ONCE(err)) 1988 return; 1989 1990 WARN_ON_ONCE(recv_buf_oob->wqe_inf.wqe_size_in_bu != 1); 1991 } 1992 1993 static struct sk_buff *mana_build_skb(struct mana_rxq *rxq, void *buf_va, 1994 uint pkt_len, struct xdp_buff *xdp) 1995 { 1996 struct sk_buff *skb = napi_build_skb(buf_va, rxq->alloc_size); 1997 1998 if (!skb) 1999 return NULL; 2000 2001 if (xdp->data_hard_start) { 2002 u32 metasize = xdp->data - xdp->data_meta; 2003 2004 skb_reserve(skb, xdp->data - xdp->data_hard_start); 2005 skb_put(skb, xdp->data_end - xdp->data); 2006 if (metasize) 2007 skb_metadata_set(skb, metasize); 2008 return skb; 2009 } 2010 2011 skb_reserve(skb, rxq->headroom); 2012 skb_put(skb, pkt_len); 2013 2014 return skb; 2015 } 2016 2017 static void mana_rx_skb(void *buf_va, bool from_pool, 2018 struct mana_rxcomp_oob *cqe, struct mana_rxq *rxq, 2019 int i) 2020 { 2021 struct mana_stats_rx *rx_stats = &rxq->stats; 2022 struct net_device *ndev = rxq->ndev; 2023 uint pkt_len = cqe->ppi[i].pkt_len; 2024 u16 rxq_idx = rxq->rxq_idx; 2025 struct napi_struct *napi; 2026 struct xdp_buff xdp = {}; 2027 struct sk_buff *skb; 2028 u32 hash_value; 2029 u32 act; 2030 2031 rxq->rx_cq.work_done++; 2032 napi = &rxq->rx_cq.napi; 2033 2034 if (!buf_va) { 2035 ++ndev->stats.rx_dropped; 2036 return; 2037 } 2038 2039 act = mana_run_xdp(ndev, rxq, &xdp, buf_va, pkt_len); 2040 2041 if (act == XDP_REDIRECT && !rxq->xdp_rc) 2042 return; 2043 2044 if (act != XDP_PASS && act != XDP_TX) 2045 goto drop_xdp; 2046 2047 skb = mana_build_skb(rxq, buf_va, pkt_len, &xdp); 2048 2049 if (!skb) 2050 goto drop; 2051 2052 if (from_pool) 2053 skb_mark_for_recycle(skb); 2054 2055 skb->dev = napi->dev; 2056 2057 skb->protocol = eth_type_trans(skb, ndev); 2058 skb_checksum_none_assert(skb); 2059 skb_record_rx_queue(skb, rxq_idx); 2060 2061 if ((ndev->features & NETIF_F_RXCSUM) && cqe->rx_iphdr_csum_succeed) { 2062 if (cqe->rx_tcp_csum_succeed || cqe->rx_udp_csum_succeed) 2063 skb->ip_summed = CHECKSUM_UNNECESSARY; 2064 } 2065 2066 if (cqe->rx_hashtype != 0 && (ndev->features & NETIF_F_RXHASH)) { 2067 hash_value = cqe->ppi[i].pkt_hash; 2068 2069 if (cqe->rx_hashtype & MANA_HASH_L4) 2070 skb_set_hash(skb, hash_value, PKT_HASH_TYPE_L4); 2071 else 2072 skb_set_hash(skb, hash_value, PKT_HASH_TYPE_L3); 2073 } 2074 2075 if (cqe->rx_vlantag_present) { 2076 u16 vlan_tci = cqe->rx_vlan_id; 2077 2078 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tci); 2079 } 2080 2081 u64_stats_update_begin(&rx_stats->syncp); 2082 rx_stats->packets++; 2083 rx_stats->bytes += pkt_len; 2084 2085 if (act == XDP_TX) 2086 rx_stats->xdp_tx++; 2087 u64_stats_update_end(&rx_stats->syncp); 2088 2089 if (act == XDP_TX) { 2090 skb_set_queue_mapping(skb, rxq_idx); 2091 mana_xdp_tx(skb, ndev); 2092 return; 2093 } 2094 2095 napi_gro_receive(napi, skb); 2096 2097 return; 2098 2099 drop_xdp: 2100 u64_stats_update_begin(&rx_stats->syncp); 2101 rx_stats->xdp_drop++; 2102 u64_stats_update_end(&rx_stats->syncp); 2103 2104 drop: 2105 if (from_pool) { 2106 if (rxq->frag_count == 1) 2107 page_pool_recycle_direct(rxq->page_pool, 2108 virt_to_head_page(buf_va)); 2109 else 2110 page_pool_free_va(rxq->page_pool, buf_va, true); 2111 } else { 2112 WARN_ON_ONCE(rxq->xdp_save_va); 2113 /* Save for reuse */ 2114 rxq->xdp_save_va = buf_va; 2115 } 2116 2117 ++ndev->stats.rx_dropped; 2118 2119 return; 2120 } 2121 2122 static void *mana_get_rxfrag(struct mana_rxq *rxq, struct device *dev, 2123 dma_addr_t *da, bool *from_pool) 2124 { 2125 struct page *page; 2126 u32 offset; 2127 void *va; 2128 *from_pool = false; 2129 2130 /* Don't use fragments for jumbo frames or XDP where it's 1 fragment 2131 * per page. 2132 */ 2133 if (rxq->frag_count == 1) { 2134 /* Reuse XDP dropped page if available */ 2135 if (rxq->xdp_save_va) { 2136 va = rxq->xdp_save_va; 2137 page = virt_to_head_page(va); 2138 rxq->xdp_save_va = NULL; 2139 } else { 2140 page = page_pool_dev_alloc_pages(rxq->page_pool); 2141 if (!page) 2142 return NULL; 2143 2144 *from_pool = true; 2145 va = page_to_virt(page); 2146 } 2147 2148 *da = dma_map_single(dev, va + rxq->headroom, rxq->datasize, 2149 DMA_FROM_DEVICE); 2150 if (dma_mapping_error(dev, *da)) { 2151 mana_put_rx_page(rxq, page, *from_pool); 2152 return NULL; 2153 } 2154 2155 return va; 2156 } 2157 2158 page = page_pool_dev_alloc_frag(rxq->page_pool, &offset, 2159 rxq->alloc_size); 2160 if (!page) 2161 return NULL; 2162 2163 va = page_to_virt(page) + offset; 2164 *da = page_pool_get_dma_addr(page) + offset + rxq->headroom; 2165 *from_pool = true; 2166 2167 return va; 2168 } 2169 2170 /* Allocate frag for rx buffer, and save the old buf */ 2171 static void mana_refill_rx_oob(struct device *dev, struct mana_rxq *rxq, 2172 struct mana_recv_buf_oob *rxoob, void **old_buf, 2173 bool *old_fp) 2174 { 2175 bool from_pool; 2176 dma_addr_t da; 2177 void *va; 2178 2179 va = mana_get_rxfrag(rxq, dev, &da, &from_pool); 2180 if (!va) 2181 return; 2182 if (!rxoob->from_pool || rxq->frag_count == 1) 2183 dma_unmap_single(dev, rxoob->sgl[0].address, rxq->datasize, 2184 DMA_FROM_DEVICE); 2185 *old_buf = rxoob->buf_va; 2186 *old_fp = rxoob->from_pool; 2187 2188 rxoob->buf_va = va; 2189 rxoob->sgl[0].address = da; 2190 rxoob->from_pool = from_pool; 2191 } 2192 2193 static void mana_process_rx_cqe(struct mana_rxq *rxq, struct mana_cq *cq, 2194 struct gdma_comp *cqe) 2195 { 2196 struct mana_rxcomp_oob *oob = (struct mana_rxcomp_oob *)cqe->cqe_data; 2197 struct gdma_context *gc = rxq->gdma_rq->gdma_dev->gdma_context; 2198 struct net_device *ndev = rxq->ndev; 2199 struct mana_recv_buf_oob *rxbuf_oob; 2200 struct mana_port_context *apc; 2201 struct device *dev = gc->dev; 2202 bool coalesced = false; 2203 void *old_buf = NULL; 2204 u32 curr, pktlen; 2205 bool old_fp; 2206 int i; 2207 2208 apc = netdev_priv(ndev); 2209 2210 switch (oob->cqe_hdr.cqe_type) { 2211 case CQE_RX_OKAY: 2212 break; 2213 2214 case CQE_RX_TRUNCATED: 2215 ++ndev->stats.rx_dropped; 2216 rxbuf_oob = &rxq->rx_oobs[rxq->buf_index]; 2217 netdev_warn_once(ndev, "Dropped a truncated packet\n"); 2218 2219 mana_move_wq_tail(rxq->gdma_rq, 2220 rxbuf_oob->wqe_inf.wqe_size_in_bu); 2221 mana_post_pkt_rxq(rxq); 2222 return; 2223 2224 case CQE_RX_COALESCED_4: 2225 coalesced = true; 2226 break; 2227 2228 case CQE_RX_OBJECT_FENCE: 2229 complete(&rxq->fence_event); 2230 return; 2231 2232 default: 2233 netdev_err(ndev, "Unknown RX CQE type = %d\n", 2234 oob->cqe_hdr.cqe_type); 2235 apc->eth_stats.rx_cqe_unknown_type++; 2236 return; 2237 } 2238 2239 for (i = 0; i < MANA_RXCOMP_OOB_NUM_PPI; i++) { 2240 old_buf = NULL; 2241 pktlen = oob->ppi[i].pkt_len; 2242 if (pktlen == 0) 2243 break; 2244 2245 curr = rxq->buf_index; 2246 rxbuf_oob = &rxq->rx_oobs[curr]; 2247 WARN_ON_ONCE(rxbuf_oob->wqe_inf.wqe_size_in_bu != 1); 2248 2249 mana_refill_rx_oob(dev, rxq, rxbuf_oob, &old_buf, &old_fp); 2250 2251 /* Unsuccessful refill will have old_buf == NULL. 2252 * In this case, mana_rx_skb() will drop the packet. 2253 */ 2254 mana_rx_skb(old_buf, old_fp, oob, rxq, i); 2255 2256 mana_move_wq_tail(rxq->gdma_rq, 2257 rxbuf_oob->wqe_inf.wqe_size_in_bu); 2258 2259 mana_post_pkt_rxq(rxq); 2260 2261 if (!coalesced) 2262 break; 2263 } 2264 2265 /* Collect coalesced CQE count based on packets processed. 2266 * Coalesced CQEs have at least 2 packets, so index is i - 2. 2267 */ 2268 if (i > 1) { 2269 u64_stats_update_begin(&rxq->stats.syncp); 2270 rxq->stats.coalesced_cqe[i - 2]++; 2271 u64_stats_update_end(&rxq->stats.syncp); 2272 } else if (!i && !pktlen) { 2273 u64_stats_update_begin(&rxq->stats.syncp); 2274 rxq->stats.pkt_len0_err++; 2275 u64_stats_update_end(&rxq->stats.syncp); 2276 netdev_err_once(ndev, 2277 "RX pkt len=0, rq=%u, cq=%u, rxobj=0x%llx\n", 2278 rxq->gdma_id, cq->gdma_id, rxq->rxobj); 2279 } 2280 } 2281 2282 static void mana_poll_rx_cq(struct mana_cq *cq) 2283 { 2284 struct gdma_comp *comp = cq->gdma_comp_buf; 2285 struct mana_rxq *rxq = cq->rxq; 2286 int comp_read, i; 2287 2288 /* Limit CQEs polled to 4 wraparounds of the CQ to ensure the 2289 * doorbell can be rung in time for the hardware's requirement 2290 * of at least one doorbell ring every 8 wraparounds. 2291 */ 2292 comp_read = mana_gd_poll_cq(cq->gdma_cq, comp, 2293 min((cq->gdma_cq->queue_size / 2294 COMP_ENTRY_SIZE) * 4, 2295 CQE_POLLING_BUFFER)); 2296 WARN_ON_ONCE(comp_read > CQE_POLLING_BUFFER); 2297 2298 rxq->xdp_flush = false; 2299 2300 for (i = 0; i < comp_read; i++) { 2301 if (WARN_ON_ONCE(comp[i].is_sq)) 2302 return; 2303 2304 /* verify recv cqe references the right rxq */ 2305 if (WARN_ON_ONCE(comp[i].wq_num != cq->rxq->gdma_id)) 2306 return; 2307 2308 mana_process_rx_cqe(rxq, cq, &comp[i]); 2309 } 2310 2311 if (comp_read > 0) { 2312 struct gdma_context *gc = rxq->gdma_rq->gdma_dev->gdma_context; 2313 2314 mana_gd_wq_ring_doorbell(gc, rxq->gdma_rq); 2315 } 2316 2317 if (rxq->xdp_flush) 2318 xdp_do_flush(); 2319 } 2320 2321 static int mana_cq_handler(void *context, struct gdma_queue *gdma_queue) 2322 { 2323 struct mana_cq *cq = context; 2324 int w; 2325 2326 WARN_ON_ONCE(cq->gdma_cq != gdma_queue); 2327 2328 if (cq->type == MANA_CQ_TYPE_RX) 2329 mana_poll_rx_cq(cq); 2330 else 2331 mana_poll_tx_cq(cq); 2332 2333 w = cq->work_done; 2334 cq->work_done_since_doorbell += w; 2335 2336 if (w < cq->budget) { 2337 mana_gd_ring_cq(gdma_queue, SET_ARM_BIT); 2338 cq->work_done_since_doorbell = 0; 2339 napi_complete_done(&cq->napi, w); 2340 } else if (cq->work_done_since_doorbell >= 2341 (cq->gdma_cq->queue_size / COMP_ENTRY_SIZE) * 4) { 2342 /* MANA hardware requires at least one doorbell ring every 8 2343 * wraparounds of CQ even if there is no need to arm the CQ. 2344 * This driver rings the doorbell as soon as it has processed 2345 * 4 wraparounds. 2346 */ 2347 mana_gd_ring_cq(gdma_queue, 0); 2348 cq->work_done_since_doorbell = 0; 2349 } 2350 2351 return w; 2352 } 2353 2354 static int mana_poll(struct napi_struct *napi, int budget) 2355 { 2356 struct mana_cq *cq = container_of(napi, struct mana_cq, napi); 2357 int w; 2358 2359 cq->work_done = 0; 2360 cq->budget = budget; 2361 2362 w = mana_cq_handler(cq, cq->gdma_cq); 2363 2364 return min(w, budget); 2365 } 2366 2367 static void mana_schedule_napi(void *context, struct gdma_queue *gdma_queue) 2368 { 2369 struct mana_cq *cq = context; 2370 2371 napi_schedule_irqoff(&cq->napi); 2372 } 2373 2374 static void mana_deinit_cq(struct mana_port_context *apc, struct mana_cq *cq) 2375 { 2376 struct gdma_dev *gd = apc->ac->gdma_dev; 2377 2378 if (!cq->gdma_cq) 2379 return; 2380 2381 mana_gd_destroy_queue(gd->gdma_context, cq->gdma_cq); 2382 } 2383 2384 static void mana_deinit_txq(struct mana_port_context *apc, struct mana_txq *txq) 2385 { 2386 struct gdma_dev *gd = apc->ac->gdma_dev; 2387 2388 if (!txq->gdma_sq) 2389 return; 2390 2391 mana_gd_destroy_queue(gd->gdma_context, txq->gdma_sq); 2392 } 2393 2394 static void mana_destroy_txq(struct mana_port_context *apc) 2395 { 2396 struct napi_struct *napi; 2397 int i; 2398 2399 if (!apc->tx_qp) 2400 return; 2401 2402 for (i = 0; i < apc->num_queues; i++) { 2403 if (!apc->tx_qp[i]) 2404 continue; 2405 2406 debugfs_remove_recursive(apc->tx_qp[i]->mana_tx_debugfs); 2407 apc->tx_qp[i]->mana_tx_debugfs = NULL; 2408 2409 napi = &apc->tx_qp[i]->tx_cq.napi; 2410 if (apc->tx_qp[i]->txq.napi_initialized) { 2411 napi_synchronize(napi); 2412 napi_disable_locked(napi); 2413 netif_napi_del_locked(napi); 2414 apc->tx_qp[i]->txq.napi_initialized = false; 2415 } 2416 2417 if (apc->tx_qp[i]->tx_object != INVALID_MANA_HANDLE) 2418 mana_destroy_wq_obj(apc, GDMA_SQ, apc->tx_qp[i]->tx_object); 2419 2420 mana_deinit_cq(apc, &apc->tx_qp[i]->tx_cq); 2421 2422 mana_deinit_txq(apc, &apc->tx_qp[i]->txq); 2423 2424 kvfree(apc->tx_qp[i]); 2425 } 2426 2427 kfree(apc->tx_qp); 2428 apc->tx_qp = NULL; 2429 } 2430 2431 static void mana_create_txq_debugfs(struct mana_port_context *apc, int idx) 2432 { 2433 struct mana_tx_qp *tx_qp = apc->tx_qp[idx]; 2434 char qnum[32]; 2435 2436 sprintf(qnum, "TX-%d", idx); 2437 tx_qp->mana_tx_debugfs = debugfs_create_dir(qnum, apc->mana_port_debugfs); 2438 debugfs_create_u32("sq_head", 0400, tx_qp->mana_tx_debugfs, 2439 &tx_qp->txq.gdma_sq->head); 2440 debugfs_create_u32("sq_tail", 0400, tx_qp->mana_tx_debugfs, 2441 &tx_qp->txq.gdma_sq->tail); 2442 debugfs_create_u32("sq_pend_skb_qlen", 0400, tx_qp->mana_tx_debugfs, 2443 &tx_qp->txq.pending_skbs.qlen); 2444 debugfs_create_u32("cq_head", 0400, tx_qp->mana_tx_debugfs, 2445 &tx_qp->tx_cq.gdma_cq->head); 2446 debugfs_create_u32("cq_tail", 0400, tx_qp->mana_tx_debugfs, 2447 &tx_qp->tx_cq.gdma_cq->tail); 2448 debugfs_create_u32("cq_budget", 0400, tx_qp->mana_tx_debugfs, 2449 &tx_qp->tx_cq.budget); 2450 debugfs_create_file("txq_dump", 0400, tx_qp->mana_tx_debugfs, 2451 tx_qp->txq.gdma_sq, &mana_dbg_q_fops); 2452 debugfs_create_file("cq_dump", 0400, tx_qp->mana_tx_debugfs, 2453 tx_qp->tx_cq.gdma_cq, &mana_dbg_q_fops); 2454 } 2455 2456 static int mana_create_txq(struct mana_port_context *apc, 2457 struct net_device *net) 2458 { 2459 struct mana_context *ac = apc->ac; 2460 struct gdma_dev *gd = ac->gdma_dev; 2461 struct mana_obj_spec wq_spec; 2462 struct mana_obj_spec cq_spec; 2463 struct gdma_queue_spec spec; 2464 struct gdma_context *gc; 2465 struct mana_txq *txq; 2466 struct mana_cq *cq; 2467 u32 txq_size; 2468 u32 cq_size; 2469 int err; 2470 int i; 2471 2472 apc->tx_qp = kzalloc_objs(struct mana_tx_qp *, apc->num_queues); 2473 if (!apc->tx_qp) 2474 return -ENOMEM; 2475 2476 /* The minimum size of the WQE is 32 bytes, hence 2477 * apc->tx_queue_size represents the maximum number of WQEs 2478 * the SQ can store. This value is then used to size other queues 2479 * to prevent overflow. 2480 * Also note that the txq_size is always going to be MANA_PAGE_ALIGNED, 2481 * as min val of apc->tx_queue_size is 128 and that would make 2482 * txq_size 128*32 = 4096 and the other higher values of apc->tx_queue_size 2483 * are always power of two 2484 */ 2485 txq_size = apc->tx_queue_size * 32; 2486 2487 cq_size = apc->tx_queue_size * COMP_ENTRY_SIZE; 2488 2489 gc = gd->gdma_context; 2490 2491 for (i = 0; i < apc->num_queues; i++) { 2492 apc->tx_qp[i] = kvzalloc_obj(*apc->tx_qp[i]); 2493 if (!apc->tx_qp[i]) { 2494 err = -ENOMEM; 2495 goto out; 2496 } 2497 2498 apc->tx_qp[i]->tx_object = INVALID_MANA_HANDLE; 2499 2500 /* Create SQ */ 2501 txq = &apc->tx_qp[i]->txq; 2502 2503 u64_stats_init(&txq->stats.syncp); 2504 txq->ndev = net; 2505 txq->net_txq = netdev_get_tx_queue(net, i); 2506 txq->vp_offset = apc->tx_vp_offset; 2507 txq->napi_initialized = false; 2508 skb_queue_head_init(&txq->pending_skbs); 2509 2510 memset(&spec, 0, sizeof(spec)); 2511 spec.type = GDMA_SQ; 2512 spec.monitor_avl_buf = true; 2513 spec.queue_size = txq_size; 2514 err = mana_gd_create_mana_wq_cq(gd, &spec, &txq->gdma_sq); 2515 if (err) 2516 goto out; 2517 2518 /* Create SQ's CQ */ 2519 cq = &apc->tx_qp[i]->tx_cq; 2520 cq->type = MANA_CQ_TYPE_TX; 2521 2522 cq->txq = txq; 2523 2524 memset(&spec, 0, sizeof(spec)); 2525 spec.type = GDMA_CQ; 2526 spec.monitor_avl_buf = false; 2527 spec.queue_size = cq_size; 2528 spec.cq.callback = mana_schedule_napi; 2529 spec.cq.parent_eq = apc->eqs[i].eq; 2530 spec.cq.context = cq; 2531 err = mana_gd_create_mana_wq_cq(gd, &spec, &cq->gdma_cq); 2532 if (err) 2533 goto out; 2534 2535 memset(&wq_spec, 0, sizeof(wq_spec)); 2536 memset(&cq_spec, 0, sizeof(cq_spec)); 2537 2538 wq_spec.gdma_region = txq->gdma_sq->mem_info.dma_region_handle; 2539 wq_spec.queue_size = txq->gdma_sq->queue_size; 2540 2541 cq_spec.gdma_region = cq->gdma_cq->mem_info.dma_region_handle; 2542 cq_spec.queue_size = cq->gdma_cq->queue_size; 2543 cq_spec.modr_ctx_id = 0; 2544 cq_spec.attached_eq = cq->gdma_cq->cq.parent->id; 2545 2546 err = mana_create_wq_obj(apc, apc->port_handle, GDMA_SQ, 2547 &wq_spec, &cq_spec, 2548 &apc->tx_qp[i]->tx_object); 2549 2550 if (err) 2551 goto out; 2552 2553 txq->gdma_sq->id = wq_spec.queue_index; 2554 cq->gdma_cq->id = cq_spec.queue_index; 2555 2556 txq->gdma_sq->mem_info.dma_region_handle = 2557 GDMA_INVALID_DMA_REGION; 2558 cq->gdma_cq->mem_info.dma_region_handle = 2559 GDMA_INVALID_DMA_REGION; 2560 2561 txq->gdma_txq_id = txq->gdma_sq->id; 2562 2563 cq->gdma_id = cq->gdma_cq->id; 2564 2565 if (WARN_ON(cq->gdma_id >= gc->max_num_cqs)) { 2566 err = -EINVAL; 2567 goto out; 2568 } 2569 2570 gc->cq_table[cq->gdma_id] = cq->gdma_cq; 2571 2572 mana_create_txq_debugfs(apc, i); 2573 2574 set_bit(NAPI_STATE_NO_BUSY_POLL, &cq->napi.state); 2575 netif_napi_add_locked(net, &cq->napi, mana_poll); 2576 napi_enable_locked(&cq->napi); 2577 txq->napi_initialized = true; 2578 2579 mana_gd_ring_cq(cq->gdma_cq, SET_ARM_BIT); 2580 } 2581 2582 return 0; 2583 out: 2584 netdev_err(net, "Failed to create %d TX queues, %d\n", 2585 apc->num_queues, err); 2586 mana_destroy_txq(apc); 2587 return err; 2588 } 2589 2590 static void mana_destroy_rxq(struct mana_port_context *apc, 2591 struct mana_rxq *rxq, bool napi_initialized) 2592 2593 { 2594 struct gdma_context *gc = apc->ac->gdma_dev->gdma_context; 2595 struct mana_recv_buf_oob *rx_oob; 2596 struct device *dev = gc->dev; 2597 struct napi_struct *napi; 2598 struct page *page; 2599 int i; 2600 2601 if (!rxq) 2602 return; 2603 2604 debugfs_remove_recursive(rxq->mana_rx_debugfs); 2605 rxq->mana_rx_debugfs = NULL; 2606 2607 napi = &rxq->rx_cq.napi; 2608 2609 if (napi_initialized) { 2610 napi_synchronize(napi); 2611 2612 napi_disable_locked(napi); 2613 netif_napi_del_locked(napi); 2614 } 2615 2616 if (xdp_rxq_info_is_reg(&rxq->xdp_rxq)) 2617 xdp_rxq_info_unreg(&rxq->xdp_rxq); 2618 2619 if (rxq->rxobj != INVALID_MANA_HANDLE) 2620 mana_destroy_wq_obj(apc, GDMA_RQ, rxq->rxobj); 2621 2622 mana_deinit_cq(apc, &rxq->rx_cq); 2623 2624 if (rxq->xdp_save_va) 2625 put_page(virt_to_head_page(rxq->xdp_save_va)); 2626 2627 for (i = 0; i < rxq->num_rx_buf; i++) { 2628 rx_oob = &rxq->rx_oobs[i]; 2629 2630 if (!rx_oob->buf_va) 2631 continue; 2632 2633 page = virt_to_head_page(rx_oob->buf_va); 2634 2635 if (rxq->frag_count == 1 || !rx_oob->from_pool) { 2636 dma_unmap_single(dev, rx_oob->sgl[0].address, 2637 rx_oob->sgl[0].size, DMA_FROM_DEVICE); 2638 mana_put_rx_page(rxq, page, rx_oob->from_pool); 2639 } else { 2640 page_pool_free_va(rxq->page_pool, rx_oob->buf_va, true); 2641 } 2642 2643 rx_oob->buf_va = NULL; 2644 } 2645 2646 page_pool_destroy(rxq->page_pool); 2647 2648 if (rxq->gdma_rq) 2649 mana_gd_destroy_queue(gc, rxq->gdma_rq); 2650 2651 kvfree(rxq); 2652 } 2653 2654 static int mana_fill_rx_oob(struct mana_recv_buf_oob *rx_oob, u32 mem_key, 2655 struct mana_rxq *rxq, struct device *dev) 2656 { 2657 struct mana_port_context *mpc = netdev_priv(rxq->ndev); 2658 bool from_pool = false; 2659 dma_addr_t da; 2660 void *va; 2661 2662 if (mpc->rxbufs_pre) 2663 va = mana_get_rxbuf_pre(rxq, &da); 2664 else 2665 va = mana_get_rxfrag(rxq, dev, &da, &from_pool); 2666 2667 if (!va) 2668 return -ENOMEM; 2669 2670 rx_oob->buf_va = va; 2671 rx_oob->from_pool = from_pool; 2672 2673 rx_oob->sgl[0].address = da; 2674 rx_oob->sgl[0].size = rxq->datasize; 2675 rx_oob->sgl[0].mem_key = mem_key; 2676 2677 return 0; 2678 } 2679 2680 #define MANA_WQE_HEADER_SIZE 16 2681 #define MANA_WQE_SGE_SIZE 16 2682 2683 static int mana_alloc_rx_wqe(struct mana_port_context *apc, 2684 struct mana_rxq *rxq, u32 *rxq_size, u32 *cq_size) 2685 { 2686 struct gdma_context *gc = apc->ac->gdma_dev->gdma_context; 2687 struct mana_recv_buf_oob *rx_oob; 2688 struct device *dev = gc->dev; 2689 u32 buf_idx; 2690 int ret; 2691 2692 WARN_ON(rxq->datasize == 0); 2693 2694 *rxq_size = 0; 2695 *cq_size = 0; 2696 2697 for (buf_idx = 0; buf_idx < rxq->num_rx_buf; buf_idx++) { 2698 rx_oob = &rxq->rx_oobs[buf_idx]; 2699 memset(rx_oob, 0, sizeof(*rx_oob)); 2700 2701 rx_oob->num_sge = 1; 2702 2703 ret = mana_fill_rx_oob(rx_oob, apc->ac->gdma_dev->gpa_mkey, rxq, 2704 dev); 2705 if (ret) 2706 return ret; 2707 2708 rx_oob->wqe_req.sgl = rx_oob->sgl; 2709 rx_oob->wqe_req.num_sge = rx_oob->num_sge; 2710 rx_oob->wqe_req.inline_oob_size = 0; 2711 rx_oob->wqe_req.inline_oob_data = NULL; 2712 rx_oob->wqe_req.flags = 0; 2713 rx_oob->wqe_req.client_data_unit = 0; 2714 2715 *rxq_size += ALIGN(MANA_WQE_HEADER_SIZE + 2716 MANA_WQE_SGE_SIZE * rx_oob->num_sge, 32); 2717 *cq_size += COMP_ENTRY_SIZE; 2718 } 2719 2720 return 0; 2721 } 2722 2723 static int mana_push_wqe(struct mana_rxq *rxq) 2724 { 2725 struct mana_recv_buf_oob *rx_oob; 2726 u32 buf_idx; 2727 int err; 2728 2729 for (buf_idx = 0; buf_idx < rxq->num_rx_buf; buf_idx++) { 2730 rx_oob = &rxq->rx_oobs[buf_idx]; 2731 2732 err = mana_gd_post_and_ring(rxq->gdma_rq, &rx_oob->wqe_req, 2733 &rx_oob->wqe_inf); 2734 if (err) 2735 return -ENOSPC; 2736 } 2737 2738 return 0; 2739 } 2740 2741 static int mana_create_page_pool(struct mana_rxq *rxq, struct gdma_context *gc) 2742 { 2743 struct mana_port_context *mpc = netdev_priv(rxq->ndev); 2744 struct page_pool_params pprm = {}; 2745 int ret; 2746 2747 pprm.pool_size = mpc->rx_queue_size / rxq->frag_count + 1; 2748 pprm.nid = gc->numa_node; 2749 pprm.napi = &rxq->rx_cq.napi; 2750 pprm.netdev = rxq->ndev; 2751 pprm.order = get_order(rxq->alloc_size); 2752 pprm.queue_idx = rxq->rxq_idx; 2753 pprm.dev = gc->dev; 2754 2755 /* Let the page pool do the dma map when page sharing with multiple 2756 * fragments enabled for rx buffers. 2757 */ 2758 if (rxq->frag_count > 1) { 2759 pprm.flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV; 2760 pprm.max_len = PAGE_SIZE; 2761 pprm.dma_dir = DMA_FROM_DEVICE; 2762 } 2763 2764 rxq->page_pool = page_pool_create(&pprm); 2765 2766 if (IS_ERR(rxq->page_pool)) { 2767 ret = PTR_ERR(rxq->page_pool); 2768 rxq->page_pool = NULL; 2769 return ret; 2770 } 2771 2772 return 0; 2773 } 2774 2775 static struct mana_rxq *mana_create_rxq(struct mana_port_context *apc, 2776 u32 rxq_idx, struct mana_eq *eq, 2777 struct net_device *ndev) 2778 { 2779 struct gdma_dev *gd = apc->ac->gdma_dev; 2780 struct mana_obj_spec wq_spec; 2781 struct mana_obj_spec cq_spec; 2782 struct gdma_queue_spec spec; 2783 struct mana_cq *cq = NULL; 2784 struct gdma_context *gc; 2785 u32 cq_size, rq_size; 2786 struct mana_rxq *rxq; 2787 int err; 2788 2789 gc = gd->gdma_context; 2790 2791 rxq = kvzalloc_flex(*rxq, rx_oobs, apc->rx_queue_size); 2792 if (!rxq) 2793 return NULL; 2794 2795 rxq->ndev = ndev; 2796 rxq->num_rx_buf = apc->rx_queue_size; 2797 rxq->rxq_idx = rxq_idx; 2798 rxq->rxobj = INVALID_MANA_HANDLE; 2799 2800 mana_get_rxbuf_cfg(apc, ndev->mtu, &rxq->datasize, &rxq->alloc_size, 2801 &rxq->headroom, &rxq->frag_count); 2802 /* Create page pool for RX queue */ 2803 err = mana_create_page_pool(rxq, gc); 2804 if (err) { 2805 netdev_err(ndev, "Create page pool err:%d\n", err); 2806 goto out; 2807 } 2808 2809 err = mana_alloc_rx_wqe(apc, rxq, &rq_size, &cq_size); 2810 if (err) 2811 goto out; 2812 2813 rq_size = MANA_PAGE_ALIGN(rq_size); 2814 cq_size = MANA_PAGE_ALIGN(cq_size); 2815 2816 /* Create RQ */ 2817 memset(&spec, 0, sizeof(spec)); 2818 spec.type = GDMA_RQ; 2819 spec.monitor_avl_buf = true; 2820 spec.queue_size = rq_size; 2821 err = mana_gd_create_mana_wq_cq(gd, &spec, &rxq->gdma_rq); 2822 if (err) 2823 goto out; 2824 2825 /* Create RQ's CQ */ 2826 cq = &rxq->rx_cq; 2827 cq->type = MANA_CQ_TYPE_RX; 2828 cq->rxq = rxq; 2829 2830 memset(&spec, 0, sizeof(spec)); 2831 spec.type = GDMA_CQ; 2832 spec.monitor_avl_buf = false; 2833 spec.queue_size = cq_size; 2834 spec.cq.callback = mana_schedule_napi; 2835 spec.cq.parent_eq = eq->eq; 2836 spec.cq.context = cq; 2837 err = mana_gd_create_mana_wq_cq(gd, &spec, &cq->gdma_cq); 2838 if (err) 2839 goto out; 2840 2841 memset(&wq_spec, 0, sizeof(wq_spec)); 2842 memset(&cq_spec, 0, sizeof(cq_spec)); 2843 wq_spec.gdma_region = rxq->gdma_rq->mem_info.dma_region_handle; 2844 wq_spec.queue_size = rxq->gdma_rq->queue_size; 2845 2846 cq_spec.gdma_region = cq->gdma_cq->mem_info.dma_region_handle; 2847 cq_spec.queue_size = cq->gdma_cq->queue_size; 2848 cq_spec.modr_ctx_id = 0; 2849 cq_spec.attached_eq = cq->gdma_cq->cq.parent->id; 2850 2851 err = mana_create_wq_obj(apc, apc->port_handle, GDMA_RQ, 2852 &wq_spec, &cq_spec, &rxq->rxobj); 2853 if (err) 2854 goto out; 2855 2856 rxq->gdma_rq->id = wq_spec.queue_index; 2857 cq->gdma_cq->id = cq_spec.queue_index; 2858 2859 rxq->gdma_rq->mem_info.dma_region_handle = GDMA_INVALID_DMA_REGION; 2860 cq->gdma_cq->mem_info.dma_region_handle = GDMA_INVALID_DMA_REGION; 2861 2862 rxq->gdma_id = rxq->gdma_rq->id; 2863 cq->gdma_id = cq->gdma_cq->id; 2864 2865 err = mana_push_wqe(rxq); 2866 if (err) 2867 goto out; 2868 2869 if (WARN_ON(cq->gdma_id >= gc->max_num_cqs)) { 2870 err = -EINVAL; 2871 goto out; 2872 } 2873 2874 gc->cq_table[cq->gdma_id] = cq->gdma_cq; 2875 2876 netif_napi_add_weight_locked(ndev, &cq->napi, mana_poll, 1); 2877 2878 WARN_ON(xdp_rxq_info_reg(&rxq->xdp_rxq, ndev, rxq_idx, 2879 cq->napi.napi_id)); 2880 WARN_ON(xdp_rxq_info_reg_mem_model(&rxq->xdp_rxq, MEM_TYPE_PAGE_POOL, 2881 rxq->page_pool)); 2882 2883 napi_enable_locked(&cq->napi); 2884 2885 mana_gd_ring_cq(cq->gdma_cq, SET_ARM_BIT); 2886 out: 2887 if (!err) 2888 return rxq; 2889 2890 netdev_err(ndev, "Failed to create RXQ: err = %d\n", err); 2891 2892 mana_destroy_rxq(apc, rxq, false); 2893 2894 return NULL; 2895 } 2896 2897 static void mana_create_rxq_debugfs(struct mana_port_context *apc, int idx) 2898 { 2899 struct mana_rxq *rxq; 2900 char qnum[32]; 2901 2902 rxq = apc->rxqs[idx]; 2903 2904 sprintf(qnum, "RX-%d", idx); 2905 rxq->mana_rx_debugfs = debugfs_create_dir(qnum, apc->mana_port_debugfs); 2906 debugfs_create_u32("rq_head", 0400, rxq->mana_rx_debugfs, &rxq->gdma_rq->head); 2907 debugfs_create_u32("rq_tail", 0400, rxq->mana_rx_debugfs, &rxq->gdma_rq->tail); 2908 debugfs_create_u32("rq_nbuf", 0400, rxq->mana_rx_debugfs, &rxq->num_rx_buf); 2909 debugfs_create_u32("cq_head", 0400, rxq->mana_rx_debugfs, 2910 &rxq->rx_cq.gdma_cq->head); 2911 debugfs_create_u32("cq_tail", 0400, rxq->mana_rx_debugfs, 2912 &rxq->rx_cq.gdma_cq->tail); 2913 debugfs_create_u32("cq_budget", 0400, rxq->mana_rx_debugfs, &rxq->rx_cq.budget); 2914 debugfs_create_file("rxq_dump", 0400, rxq->mana_rx_debugfs, rxq->gdma_rq, &mana_dbg_q_fops); 2915 debugfs_create_file("cq_dump", 0400, rxq->mana_rx_debugfs, rxq->rx_cq.gdma_cq, 2916 &mana_dbg_q_fops); 2917 } 2918 2919 static int mana_add_rx_queues(struct mana_port_context *apc, 2920 struct net_device *ndev) 2921 { 2922 struct mana_rxq *rxq; 2923 int err = 0; 2924 int i; 2925 2926 for (i = 0; i < apc->num_queues; i++) { 2927 rxq = mana_create_rxq(apc, i, &apc->eqs[i], ndev); 2928 if (!rxq) { 2929 err = -ENOMEM; 2930 netdev_err(ndev, "Failed to create rxq %d : %d\n", i, err); 2931 goto out; 2932 } 2933 2934 u64_stats_init(&rxq->stats.syncp); 2935 2936 apc->rxqs[i] = rxq; 2937 2938 mana_create_rxq_debugfs(apc, i); 2939 } 2940 2941 apc->default_rxobj = apc->rxqs[0]->rxobj; 2942 out: 2943 return err; 2944 } 2945 2946 static void mana_destroy_rxqs(struct mana_port_context *apc) 2947 { 2948 struct mana_rxq *rxq; 2949 u32 rxq_idx; 2950 2951 if (apc->rxqs) { 2952 2953 for (rxq_idx = 0; rxq_idx < apc->num_queues; rxq_idx++) { 2954 rxq = apc->rxqs[rxq_idx]; 2955 if (!rxq) 2956 continue; 2957 2958 mana_destroy_rxq(apc, rxq, true); 2959 apc->rxqs[rxq_idx] = NULL; 2960 } 2961 } 2962 } 2963 2964 static void mana_destroy_vport(struct mana_port_context *apc) 2965 { 2966 struct gdma_dev *gd = apc->ac->gdma_dev; 2967 2968 mana_uncfg_vport(apc); 2969 2970 if (gd->gdma_context->is_pf && !apc->ac->bm_hostmode) 2971 mana_pf_deregister_hw_vport(apc); 2972 } 2973 2974 static int mana_create_vport(struct mana_port_context *apc, 2975 struct net_device *net) 2976 { 2977 struct gdma_dev *gd = apc->ac->gdma_dev; 2978 int err; 2979 2980 apc->default_rxobj = INVALID_MANA_HANDLE; 2981 2982 if (gd->gdma_context->is_pf && !apc->ac->bm_hostmode) { 2983 err = mana_pf_register_hw_vport(apc); 2984 if (err) 2985 return err; 2986 } 2987 2988 err = mana_cfg_vport(apc, gd->pdid, gd->doorbell, false); 2989 if (err) { 2990 if (gd->gdma_context->is_pf && !apc->ac->bm_hostmode) 2991 mana_pf_deregister_hw_vport(apc); 2992 return err; 2993 } 2994 2995 return 0; 2996 } 2997 2998 static int mana_rss_table_alloc(struct mana_port_context *apc) 2999 { 3000 if (!apc->indir_table_sz) { 3001 netdev_err(apc->ndev, 3002 "Indirection table size not set for vPort %d\n", 3003 apc->port_idx); 3004 return -EINVAL; 3005 } 3006 3007 apc->indir_table = kcalloc(apc->indir_table_sz, sizeof(u32), GFP_KERNEL); 3008 if (!apc->indir_table) 3009 return -ENOMEM; 3010 3011 apc->rxobj_table = kzalloc_objs(mana_handle_t, apc->indir_table_sz); 3012 if (!apc->rxobj_table) { 3013 kfree(apc->indir_table); 3014 return -ENOMEM; 3015 } 3016 3017 return 0; 3018 } 3019 3020 static void mana_rss_table_init(struct mana_port_context *apc) 3021 { 3022 int i; 3023 3024 for (i = 0; i < apc->indir_table_sz; i++) 3025 apc->indir_table[i] = 3026 ethtool_rxfh_indir_default(i, apc->num_queues); 3027 } 3028 3029 int mana_disable_vport_rx(struct mana_port_context *apc) 3030 { 3031 return mana_cfg_vport_steering(apc, TRI_STATE_FALSE, false, false, 3032 false); 3033 } 3034 EXPORT_SYMBOL_NS(mana_disable_vport_rx, "NET_MANA"); 3035 3036 int mana_config_rss(struct mana_port_context *apc, enum TRI_STATE rx, 3037 bool update_hash, bool update_tab) 3038 { 3039 u32 queue_idx; 3040 int err; 3041 int i; 3042 3043 if (update_tab) { 3044 for (i = 0; i < apc->indir_table_sz; i++) { 3045 queue_idx = apc->indir_table[i]; 3046 apc->rxobj_table[i] = apc->rxqs[queue_idx]->rxobj; 3047 } 3048 } 3049 3050 err = mana_cfg_vport_steering(apc, rx, true, update_hash, update_tab); 3051 if (err) 3052 return err; 3053 3054 mana_fence_rqs(apc); 3055 3056 return 0; 3057 } 3058 3059 int mana_query_gf_stats(struct mana_context *ac) 3060 { 3061 struct gdma_context *gc = ac->gdma_dev->gdma_context; 3062 struct mana_query_gf_stat_resp resp = {}; 3063 struct mana_query_gf_stat_req req = {}; 3064 struct device *dev = gc->dev; 3065 int err; 3066 3067 mana_gd_init_req_hdr(&req.hdr, MANA_QUERY_GF_STAT, 3068 sizeof(req), sizeof(resp)); 3069 req.hdr.resp.msg_version = GDMA_MESSAGE_V2; 3070 req.req_stats = STATISTICS_FLAGS_RX_DISCARDS_NO_WQE | 3071 STATISTICS_FLAGS_RX_ERRORS_VPORT_DISABLED | 3072 STATISTICS_FLAGS_HC_RX_BYTES | 3073 STATISTICS_FLAGS_HC_RX_UCAST_PACKETS | 3074 STATISTICS_FLAGS_HC_RX_UCAST_BYTES | 3075 STATISTICS_FLAGS_HC_RX_MCAST_PACKETS | 3076 STATISTICS_FLAGS_HC_RX_MCAST_BYTES | 3077 STATISTICS_FLAGS_HC_RX_BCAST_PACKETS | 3078 STATISTICS_FLAGS_HC_RX_BCAST_BYTES | 3079 STATISTICS_FLAGS_TX_ERRORS_GF_DISABLED | 3080 STATISTICS_FLAGS_TX_ERRORS_VPORT_DISABLED | 3081 STATISTICS_FLAGS_TX_ERRORS_INVAL_VPORT_OFFSET_PACKETS | 3082 STATISTICS_FLAGS_TX_ERRORS_VLAN_ENFORCEMENT | 3083 STATISTICS_FLAGS_TX_ERRORS_ETH_TYPE_ENFORCEMENT | 3084 STATISTICS_FLAGS_TX_ERRORS_SA_ENFORCEMENT | 3085 STATISTICS_FLAGS_TX_ERRORS_SQPDID_ENFORCEMENT | 3086 STATISTICS_FLAGS_TX_ERRORS_CQPDID_ENFORCEMENT | 3087 STATISTICS_FLAGS_TX_ERRORS_MTU_VIOLATION | 3088 STATISTICS_FLAGS_TX_ERRORS_INVALID_OOB | 3089 STATISTICS_FLAGS_HC_TX_BYTES | 3090 STATISTICS_FLAGS_HC_TX_UCAST_PACKETS | 3091 STATISTICS_FLAGS_HC_TX_UCAST_BYTES | 3092 STATISTICS_FLAGS_HC_TX_MCAST_PACKETS | 3093 STATISTICS_FLAGS_HC_TX_MCAST_BYTES | 3094 STATISTICS_FLAGS_HC_TX_BCAST_PACKETS | 3095 STATISTICS_FLAGS_HC_TX_BCAST_BYTES | 3096 STATISTICS_FLAGS_TX_ERRORS_GDMA_ERROR; 3097 3098 err = mana_send_request(ac, &req, sizeof(req), &resp, 3099 sizeof(resp)); 3100 if (err) { 3101 dev_err(dev, "Failed to query GF stats: %d\n", err); 3102 return err; 3103 } 3104 err = mana_verify_resp_hdr(&resp.hdr, MANA_QUERY_GF_STAT, 3105 sizeof(resp)); 3106 if (err || resp.hdr.status) { 3107 dev_err(dev, "Failed to query GF stats: %d, 0x%x\n", err, 3108 resp.hdr.status); 3109 return err; 3110 } 3111 3112 ac->hc_stats.hc_rx_discards_no_wqe = resp.rx_discards_nowqe; 3113 ac->hc_stats.hc_rx_err_vport_disabled = resp.rx_err_vport_disabled; 3114 ac->hc_stats.hc_rx_bytes = resp.hc_rx_bytes; 3115 ac->hc_stats.hc_rx_ucast_pkts = resp.hc_rx_ucast_pkts; 3116 ac->hc_stats.hc_rx_ucast_bytes = resp.hc_rx_ucast_bytes; 3117 ac->hc_stats.hc_rx_bcast_pkts = resp.hc_rx_bcast_pkts; 3118 ac->hc_stats.hc_rx_bcast_bytes = resp.hc_rx_bcast_bytes; 3119 ac->hc_stats.hc_rx_mcast_pkts = resp.hc_rx_mcast_pkts; 3120 ac->hc_stats.hc_rx_mcast_bytes = resp.hc_rx_mcast_bytes; 3121 ac->hc_stats.hc_tx_err_gf_disabled = resp.tx_err_gf_disabled; 3122 ac->hc_stats.hc_tx_err_vport_disabled = resp.tx_err_vport_disabled; 3123 ac->hc_stats.hc_tx_err_inval_vportoffset_pkt = 3124 resp.tx_err_inval_vport_offset_pkt; 3125 ac->hc_stats.hc_tx_err_vlan_enforcement = 3126 resp.tx_err_vlan_enforcement; 3127 ac->hc_stats.hc_tx_err_eth_type_enforcement = 3128 resp.tx_err_ethtype_enforcement; 3129 ac->hc_stats.hc_tx_err_sa_enforcement = resp.tx_err_SA_enforcement; 3130 ac->hc_stats.hc_tx_err_sqpdid_enforcement = 3131 resp.tx_err_SQPDID_enforcement; 3132 ac->hc_stats.hc_tx_err_cqpdid_enforcement = 3133 resp.tx_err_CQPDID_enforcement; 3134 ac->hc_stats.hc_tx_err_mtu_violation = resp.tx_err_mtu_violation; 3135 ac->hc_stats.hc_tx_err_inval_oob = resp.tx_err_inval_oob; 3136 ac->hc_stats.hc_tx_bytes = resp.hc_tx_bytes; 3137 ac->hc_stats.hc_tx_ucast_pkts = resp.hc_tx_ucast_pkts; 3138 ac->hc_stats.hc_tx_ucast_bytes = resp.hc_tx_ucast_bytes; 3139 ac->hc_stats.hc_tx_bcast_pkts = resp.hc_tx_bcast_pkts; 3140 ac->hc_stats.hc_tx_bcast_bytes = resp.hc_tx_bcast_bytes; 3141 ac->hc_stats.hc_tx_mcast_pkts = resp.hc_tx_mcast_pkts; 3142 ac->hc_stats.hc_tx_mcast_bytes = resp.hc_tx_mcast_bytes; 3143 ac->hc_stats.hc_tx_err_gdma = resp.tx_err_gdma; 3144 3145 return 0; 3146 } 3147 3148 void mana_query_phy_stats(struct mana_port_context *apc) 3149 { 3150 struct mana_query_phy_stat_resp resp = {}; 3151 struct mana_query_phy_stat_req req = {}; 3152 struct net_device *ndev = apc->ndev; 3153 int err; 3154 3155 mana_gd_init_req_hdr(&req.hdr, MANA_QUERY_PHY_STAT, 3156 sizeof(req), sizeof(resp)); 3157 err = mana_send_request(apc->ac, &req, sizeof(req), &resp, 3158 sizeof(resp)); 3159 if (err) 3160 return; 3161 3162 err = mana_verify_resp_hdr(&resp.hdr, MANA_QUERY_PHY_STAT, 3163 sizeof(resp)); 3164 if (err || resp.hdr.status) { 3165 netdev_err(ndev, 3166 "Failed to query PHY stats: %d, resp:0x%x\n", 3167 err, resp.hdr.status); 3168 return; 3169 } 3170 3171 /* Aggregate drop counters */ 3172 apc->phy_stats.rx_pkt_drop_phy = resp.rx_pkt_drop_phy; 3173 apc->phy_stats.tx_pkt_drop_phy = resp.tx_pkt_drop_phy; 3174 3175 /* Per TC traffic Counters */ 3176 apc->phy_stats.rx_pkt_tc0_phy = resp.rx_pkt_tc0_phy; 3177 apc->phy_stats.tx_pkt_tc0_phy = resp.tx_pkt_tc0_phy; 3178 apc->phy_stats.rx_pkt_tc1_phy = resp.rx_pkt_tc1_phy; 3179 apc->phy_stats.tx_pkt_tc1_phy = resp.tx_pkt_tc1_phy; 3180 apc->phy_stats.rx_pkt_tc2_phy = resp.rx_pkt_tc2_phy; 3181 apc->phy_stats.tx_pkt_tc2_phy = resp.tx_pkt_tc2_phy; 3182 apc->phy_stats.rx_pkt_tc3_phy = resp.rx_pkt_tc3_phy; 3183 apc->phy_stats.tx_pkt_tc3_phy = resp.tx_pkt_tc3_phy; 3184 apc->phy_stats.rx_pkt_tc4_phy = resp.rx_pkt_tc4_phy; 3185 apc->phy_stats.tx_pkt_tc4_phy = resp.tx_pkt_tc4_phy; 3186 apc->phy_stats.rx_pkt_tc5_phy = resp.rx_pkt_tc5_phy; 3187 apc->phy_stats.tx_pkt_tc5_phy = resp.tx_pkt_tc5_phy; 3188 apc->phy_stats.rx_pkt_tc6_phy = resp.rx_pkt_tc6_phy; 3189 apc->phy_stats.tx_pkt_tc6_phy = resp.tx_pkt_tc6_phy; 3190 apc->phy_stats.rx_pkt_tc7_phy = resp.rx_pkt_tc7_phy; 3191 apc->phy_stats.tx_pkt_tc7_phy = resp.tx_pkt_tc7_phy; 3192 3193 /* Per TC byte Counters */ 3194 apc->phy_stats.rx_byte_tc0_phy = resp.rx_byte_tc0_phy; 3195 apc->phy_stats.tx_byte_tc0_phy = resp.tx_byte_tc0_phy; 3196 apc->phy_stats.rx_byte_tc1_phy = resp.rx_byte_tc1_phy; 3197 apc->phy_stats.tx_byte_tc1_phy = resp.tx_byte_tc1_phy; 3198 apc->phy_stats.rx_byte_tc2_phy = resp.rx_byte_tc2_phy; 3199 apc->phy_stats.tx_byte_tc2_phy = resp.tx_byte_tc2_phy; 3200 apc->phy_stats.rx_byte_tc3_phy = resp.rx_byte_tc3_phy; 3201 apc->phy_stats.tx_byte_tc3_phy = resp.tx_byte_tc3_phy; 3202 apc->phy_stats.rx_byte_tc4_phy = resp.rx_byte_tc4_phy; 3203 apc->phy_stats.tx_byte_tc4_phy = resp.tx_byte_tc4_phy; 3204 apc->phy_stats.rx_byte_tc5_phy = resp.rx_byte_tc5_phy; 3205 apc->phy_stats.tx_byte_tc5_phy = resp.tx_byte_tc5_phy; 3206 apc->phy_stats.rx_byte_tc6_phy = resp.rx_byte_tc6_phy; 3207 apc->phy_stats.tx_byte_tc6_phy = resp.tx_byte_tc6_phy; 3208 apc->phy_stats.rx_byte_tc7_phy = resp.rx_byte_tc7_phy; 3209 apc->phy_stats.tx_byte_tc7_phy = resp.tx_byte_tc7_phy; 3210 3211 /* Per TC pause Counters */ 3212 apc->phy_stats.rx_pause_tc0_phy = resp.rx_pause_tc0_phy; 3213 apc->phy_stats.tx_pause_tc0_phy = resp.tx_pause_tc0_phy; 3214 apc->phy_stats.rx_pause_tc1_phy = resp.rx_pause_tc1_phy; 3215 apc->phy_stats.tx_pause_tc1_phy = resp.tx_pause_tc1_phy; 3216 apc->phy_stats.rx_pause_tc2_phy = resp.rx_pause_tc2_phy; 3217 apc->phy_stats.tx_pause_tc2_phy = resp.tx_pause_tc2_phy; 3218 apc->phy_stats.rx_pause_tc3_phy = resp.rx_pause_tc3_phy; 3219 apc->phy_stats.tx_pause_tc3_phy = resp.tx_pause_tc3_phy; 3220 apc->phy_stats.rx_pause_tc4_phy = resp.rx_pause_tc4_phy; 3221 apc->phy_stats.tx_pause_tc4_phy = resp.tx_pause_tc4_phy; 3222 apc->phy_stats.rx_pause_tc5_phy = resp.rx_pause_tc5_phy; 3223 apc->phy_stats.tx_pause_tc5_phy = resp.tx_pause_tc5_phy; 3224 apc->phy_stats.rx_pause_tc6_phy = resp.rx_pause_tc6_phy; 3225 apc->phy_stats.tx_pause_tc6_phy = resp.tx_pause_tc6_phy; 3226 apc->phy_stats.rx_pause_tc7_phy = resp.rx_pause_tc7_phy; 3227 apc->phy_stats.tx_pause_tc7_phy = resp.tx_pause_tc7_phy; 3228 } 3229 3230 static int mana_init_port(struct net_device *ndev) 3231 { 3232 struct mana_port_context *apc = netdev_priv(ndev); 3233 struct gdma_dev *gd = apc->ac->gdma_dev; 3234 u32 max_txq, max_rxq, max_queues; 3235 int port_idx = apc->port_idx; 3236 struct gdma_context *gc; 3237 char vport[32]; 3238 int err; 3239 3240 err = mana_init_port_context(apc); 3241 if (err) 3242 return err; 3243 3244 gc = gd->gdma_context; 3245 3246 err = mana_query_vport_cfg(apc, port_idx, &max_txq, &max_rxq, 3247 &apc->indir_table_sz); 3248 if (err) { 3249 netdev_err(ndev, "Failed to query info for vPort %d\n", 3250 port_idx); 3251 goto reset_apc; 3252 } 3253 3254 max_queues = min_t(u32, max_txq, max_rxq); 3255 if (apc->max_queues > max_queues) 3256 apc->max_queues = max_queues; 3257 if (apc->max_queues > gc->max_num_queues_vport) 3258 apc->max_queues = gc->max_num_queues_vport; 3259 3260 if (apc->num_queues > apc->max_queues) 3261 apc->num_queues = apc->max_queues; 3262 3263 eth_hw_addr_set(ndev, apc->mac_addr); 3264 sprintf(vport, "vport%d", port_idx); 3265 apc->mana_port_debugfs = debugfs_create_dir(vport, gc->mana_pci_debugfs); 3266 3267 debugfs_create_u64("port_handle", 0400, apc->mana_port_debugfs, 3268 &apc->port_handle); 3269 debugfs_create_u32("max_sq", 0400, apc->mana_port_debugfs, 3270 &apc->vport_max_sq); 3271 debugfs_create_u32("max_rq", 0400, apc->mana_port_debugfs, 3272 &apc->vport_max_rq); 3273 debugfs_create_u32("indir_table_sz", 0400, apc->mana_port_debugfs, 3274 &apc->indir_table_sz); 3275 debugfs_create_u32("steer_rx", 0400, apc->mana_port_debugfs, 3276 &apc->steer_rx); 3277 debugfs_create_u32("steer_rss", 0400, apc->mana_port_debugfs, 3278 &apc->steer_rss); 3279 debugfs_create_bool("steer_update_tab", 0400, apc->mana_port_debugfs, 3280 &apc->steer_update_tab); 3281 debugfs_create_u32("steer_cqe_coalescing", 0400, apc->mana_port_debugfs, 3282 &apc->steer_cqe_coalescing); 3283 debugfs_create_u32("current_speed", 0400, apc->mana_port_debugfs, 3284 &apc->speed); 3285 return 0; 3286 3287 reset_apc: 3288 mana_cleanup_port_context(apc); 3289 return err; 3290 } 3291 3292 int mana_alloc_queues(struct net_device *ndev) 3293 { 3294 struct mana_port_context *apc = netdev_priv(ndev); 3295 struct gdma_dev *gd = apc->ac->gdma_dev; 3296 int err; 3297 3298 err = mana_create_vport(apc, ndev); 3299 if (err) { 3300 netdev_err(ndev, "Failed to create vPort %u : %d\n", 3301 apc->port_idx, err); 3302 return err; 3303 } 3304 3305 err = mana_create_eq(apc); 3306 if (err) { 3307 netdev_err(ndev, "Failed to create EQ on vPort %u: %d\n", 3308 apc->port_idx, err); 3309 goto destroy_vport; 3310 } 3311 3312 err = mana_create_txq(apc, ndev); 3313 if (err) { 3314 netdev_err(ndev, "Failed to create TXQ on vPort %u: %d\n", 3315 apc->port_idx, err); 3316 goto destroy_eq; 3317 } 3318 3319 err = netif_set_real_num_tx_queues(ndev, apc->num_queues); 3320 if (err) { 3321 netdev_err(ndev, 3322 "netif_set_real_num_tx_queues () failed for ndev with num_queues %u : %d\n", 3323 apc->num_queues, err); 3324 goto destroy_txq; 3325 } 3326 3327 err = mana_add_rx_queues(apc, ndev); 3328 if (err) 3329 goto destroy_rxq; 3330 3331 apc->rss_state = apc->num_queues > 1 ? TRI_STATE_TRUE : TRI_STATE_FALSE; 3332 3333 err = netif_set_real_num_rx_queues(ndev, apc->num_queues); 3334 if (err) { 3335 netdev_err(ndev, 3336 "netif_set_real_num_rx_queues () failed for ndev with num_queues %u : %d\n", 3337 apc->num_queues, err); 3338 goto destroy_rxq; 3339 } 3340 3341 mana_rss_table_init(apc); 3342 3343 err = mana_config_rss(apc, TRI_STATE_TRUE, true, true); 3344 if (err) { 3345 netdev_err(ndev, "Failed to configure RSS table: %d\n", err); 3346 goto destroy_rxq; 3347 } 3348 3349 if (gd->gdma_context->is_pf && !apc->ac->bm_hostmode) { 3350 err = mana_pf_register_filter(apc); 3351 if (err) 3352 goto destroy_rxq; 3353 } 3354 3355 mana_chn_setxdp(apc, mana_xdp_get(apc)); 3356 3357 return 0; 3358 3359 destroy_rxq: 3360 mana_destroy_rxqs(apc); 3361 destroy_txq: 3362 mana_destroy_txq(apc); 3363 destroy_eq: 3364 mana_destroy_eq(apc); 3365 destroy_vport: 3366 mana_destroy_vport(apc); 3367 return err; 3368 } 3369 3370 int mana_attach(struct net_device *ndev) 3371 { 3372 struct mana_port_context *apc = netdev_priv(ndev); 3373 int err; 3374 3375 ASSERT_RTNL(); 3376 3377 err = mana_init_port(ndev); 3378 if (err) 3379 return err; 3380 3381 if (apc->port_st_save) { 3382 err = mana_alloc_queues(ndev); 3383 if (err) { 3384 mana_cleanup_port_context(apc); 3385 return err; 3386 } 3387 } 3388 3389 apc->port_is_up = apc->port_st_save; 3390 3391 /* Ensure port state updated before txq state */ 3392 smp_wmb(); 3393 3394 netif_device_attach(ndev); 3395 3396 return 0; 3397 } 3398 3399 static int mana_dealloc_queues(struct net_device *ndev) 3400 { 3401 struct mana_port_context *apc = netdev_priv(ndev); 3402 unsigned long timeout = jiffies + 120 * HZ; 3403 struct gdma_dev *gd = apc->ac->gdma_dev; 3404 struct mana_txq *txq; 3405 struct sk_buff *skb; 3406 int i, err; 3407 u32 tsleep; 3408 3409 if (apc->port_is_up) 3410 return -EINVAL; 3411 3412 if (apc->rxqs) 3413 mana_chn_setxdp(apc, NULL); 3414 3415 if (gd->gdma_context->is_pf && !apc->ac->bm_hostmode) 3416 mana_pf_deregister_filter(apc); 3417 3418 /* No packet can be transmitted now since apc->port_is_up is false. 3419 * There is still a tiny chance that mana_poll_tx_cq() can re-enable 3420 * a txq because it may not timely see apc->port_is_up being cleared 3421 * to false, but it doesn't matter since mana_start_xmit() drops any 3422 * new packets due to apc->port_is_up being false. 3423 * 3424 * Drain all the in-flight TX packets. 3425 * A timeout of 120 seconds for all the queues is used. 3426 * This will break the while loop when h/w is not responding. 3427 * This value of 120 has been decided here considering max 3428 * number of queues. 3429 */ 3430 3431 if (apc->tx_qp) { 3432 for (i = 0; i < apc->num_queues; i++) { 3433 txq = &apc->tx_qp[i]->txq; 3434 tsleep = 1000; 3435 while (atomic_read(&txq->pending_sends) > 0 && 3436 time_before(jiffies, timeout)) { 3437 usleep_range(tsleep, tsleep + 1000); 3438 tsleep <<= 1; 3439 } 3440 if (atomic_read(&txq->pending_sends)) { 3441 err = 3442 pcie_flr(to_pci_dev(gd->gdma_context->dev)); 3443 if (err) { 3444 netdev_err(ndev, "flr failed %d with %d pkts pending in txq %u\n", 3445 err, 3446 atomic_read(&txq->pending_sends), 3447 txq->gdma_txq_id); 3448 } 3449 break; 3450 } 3451 } 3452 3453 for (i = 0; i < apc->num_queues; i++) { 3454 txq = &apc->tx_qp[i]->txq; 3455 while ((skb = skb_dequeue(&txq->pending_skbs))) { 3456 mana_unmap_skb(skb, apc); 3457 dev_kfree_skb_any(skb); 3458 } 3459 atomic_set(&txq->pending_sends, 0); 3460 } 3461 } 3462 3463 /* We're 100% sure the queues can no longer be woken up, because 3464 * we're sure now mana_poll_tx_cq() can't be running. 3465 */ 3466 3467 apc->rss_state = TRI_STATE_FALSE; 3468 err = mana_disable_vport_rx(apc); 3469 if (err && mana_en_need_log(apc, err)) 3470 netdev_err(ndev, "Failed to disable vPort: %d\n", err); 3471 3472 mana_fence_rqs(apc); 3473 3474 /* Even in err case, still need to cleanup the vPort */ 3475 mana_destroy_rxqs(apc); 3476 mana_destroy_txq(apc); 3477 mana_destroy_eq(apc); 3478 mana_destroy_vport(apc); 3479 3480 return 0; 3481 } 3482 3483 int mana_detach(struct net_device *ndev, bool from_close) 3484 { 3485 struct mana_port_context *apc = netdev_priv(ndev); 3486 int err; 3487 3488 ASSERT_RTNL(); 3489 3490 /* If already detached (indicates detach succeeded but attach failed 3491 * previously). Now skip mana detach and just retry mana_attach. 3492 */ 3493 if (!from_close && !netif_device_present(ndev)) 3494 return 0; 3495 3496 apc->port_st_save = apc->port_is_up; 3497 apc->port_is_up = false; 3498 3499 /* Ensure port state updated before txq state */ 3500 smp_wmb(); 3501 3502 netif_tx_disable(ndev); 3503 3504 if (apc->port_st_save) { 3505 err = mana_dealloc_queues(ndev); 3506 if (err) { 3507 netdev_err(ndev, "%s failed to deallocate queues: %d\n", __func__, err); 3508 return err; 3509 } 3510 } 3511 3512 if (!from_close) { 3513 netif_device_detach(ndev); 3514 mana_cleanup_port_context(apc); 3515 } 3516 3517 return 0; 3518 } 3519 3520 static int mana_probe_port(struct mana_context *ac, int port_idx, 3521 struct net_device **ndev_storage) 3522 { 3523 struct gdma_context *gc = ac->gdma_dev->gdma_context; 3524 struct mana_port_context *apc; 3525 struct net_device *ndev; 3526 int err; 3527 3528 ndev = alloc_etherdev_mq(sizeof(struct mana_port_context), 3529 gc->max_num_queues_vport); 3530 if (!ndev) 3531 return -ENOMEM; 3532 3533 *ndev_storage = ndev; 3534 3535 apc = netdev_priv(ndev); 3536 apc->ac = ac; 3537 apc->ndev = ndev; 3538 apc->max_queues = gc->max_num_queues_vport; 3539 /* Use MANA_DEF_NUM_QUEUES as default, still honoring the HW limit */ 3540 apc->num_queues = min(gc->max_num_queues_vport, MANA_DEF_NUM_QUEUES); 3541 apc->tx_queue_size = DEF_TX_BUFFERS_PER_QUEUE; 3542 apc->rx_queue_size = DEF_RX_BUFFERS_PER_QUEUE; 3543 apc->port_handle = INVALID_MANA_HANDLE; 3544 apc->pf_filter_handle = INVALID_MANA_HANDLE; 3545 apc->port_idx = port_idx; 3546 apc->link_cfg_error = 1; 3547 apc->cqe_coalescing_enable = 0; 3548 3549 mutex_init(&apc->vport_mutex); 3550 apc->vport_use_count = 0; 3551 3552 ndev->netdev_ops = &mana_devops; 3553 ndev->ethtool_ops = &mana_ethtool_ops; 3554 ndev->mtu = ETH_DATA_LEN; 3555 ndev->max_mtu = gc->adapter_mtu - ETH_HLEN; 3556 ndev->min_mtu = ETH_MIN_MTU; 3557 ndev->needed_headroom = MANA_HEADROOM; 3558 ndev->dev_port = port_idx; 3559 /* Recommended timeout based on HW FPGA re-config scenario. */ 3560 ndev->watchdog_timeo = 15 * HZ; 3561 SET_NETDEV_DEV(ndev, gc->dev); 3562 3563 netif_set_tso_max_size(ndev, GSO_MAX_SIZE); 3564 3565 netif_carrier_off(ndev); 3566 3567 netdev_rss_key_fill(apc->hashkey, MANA_HASH_KEY_SIZE); 3568 3569 err = mana_init_port(ndev); 3570 if (err) 3571 goto free_net; 3572 3573 err = mana_rss_table_alloc(apc); 3574 if (err) 3575 goto reset_apc; 3576 3577 /* Initialize the per port queue reset work.*/ 3578 INIT_WORK(&apc->queue_reset_work, 3579 mana_per_port_queue_reset_work_handler); 3580 3581 netdev_lockdep_set_classes(ndev); 3582 3583 ndev->hw_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM; 3584 ndev->hw_features |= NETIF_F_RXCSUM; 3585 ndev->hw_features |= NETIF_F_TSO | NETIF_F_TSO6; 3586 ndev->hw_features |= NETIF_F_RXHASH; 3587 ndev->features = ndev->hw_features | NETIF_F_HW_VLAN_CTAG_TX | 3588 NETIF_F_HW_VLAN_CTAG_RX; 3589 ndev->vlan_features = ndev->features; 3590 xdp_set_features_flag(ndev, NETDEV_XDP_ACT_BASIC | 3591 NETDEV_XDP_ACT_REDIRECT | 3592 NETDEV_XDP_ACT_NDO_XMIT); 3593 3594 err = register_netdev(ndev); 3595 if (err) { 3596 netdev_err(ndev, "Unable to register netdev.\n"); 3597 goto free_indir; 3598 } 3599 3600 netif_carrier_on(ndev); 3601 3602 return 0; 3603 3604 free_indir: 3605 mana_cleanup_indir_table(apc); 3606 reset_apc: 3607 mana_cleanup_port_context(apc); 3608 free_net: 3609 *ndev_storage = NULL; 3610 netdev_err(ndev, "Failed to probe vPort %d: %d\n", port_idx, err); 3611 free_netdev(ndev); 3612 return err; 3613 } 3614 3615 static void adev_release(struct device *dev) 3616 { 3617 struct mana_adev *madev = container_of(dev, struct mana_adev, adev.dev); 3618 3619 kfree(madev); 3620 } 3621 3622 static void remove_adev(struct gdma_dev *gd) 3623 { 3624 struct auxiliary_device *adev = gd->adev; 3625 int id = adev->id; 3626 3627 auxiliary_device_delete(adev); 3628 auxiliary_device_uninit(adev); 3629 3630 mana_adev_idx_free(id); 3631 gd->adev = NULL; 3632 } 3633 3634 static int add_adev(struct gdma_dev *gd, const char *name) 3635 { 3636 struct auxiliary_device *adev; 3637 struct mana_adev *madev; 3638 int ret; 3639 int id; 3640 3641 madev = kzalloc_obj(*madev); 3642 if (!madev) 3643 return -ENOMEM; 3644 3645 adev = &madev->adev; 3646 ret = mana_adev_idx_alloc(); 3647 if (ret < 0) 3648 goto idx_fail; 3649 id = ret; 3650 adev->id = id; 3651 3652 adev->name = name; 3653 adev->dev.parent = gd->gdma_context->dev; 3654 adev->dev.release = adev_release; 3655 madev->mdev = gd; 3656 3657 ret = auxiliary_device_init(adev); 3658 if (ret) 3659 goto init_fail; 3660 3661 /* madev is owned by the auxiliary device */ 3662 madev = NULL; 3663 ret = auxiliary_device_add(adev); 3664 if (ret) 3665 goto add_fail; 3666 3667 gd->adev = adev; 3668 dev_dbg(gd->gdma_context->dev, 3669 "Auxiliary device added successfully\n"); 3670 return 0; 3671 3672 add_fail: 3673 auxiliary_device_uninit(adev); 3674 3675 init_fail: 3676 mana_adev_idx_free(id); 3677 3678 idx_fail: 3679 kfree(madev); 3680 3681 return ret; 3682 } 3683 3684 static void mana_rdma_service_handle(struct work_struct *work) 3685 { 3686 struct mana_service_work *serv_work = 3687 container_of(work, struct mana_service_work, work); 3688 struct gdma_dev *gd = serv_work->gdma_dev; 3689 struct device *dev = gd->gdma_context->dev; 3690 int ret; 3691 3692 if (READ_ONCE(gd->rdma_teardown)) 3693 goto out; 3694 3695 switch (serv_work->event) { 3696 case GDMA_SERVICE_TYPE_RDMA_SUSPEND: 3697 if (!gd->adev || gd->is_suspended) 3698 break; 3699 3700 remove_adev(gd); 3701 gd->is_suspended = true; 3702 break; 3703 3704 case GDMA_SERVICE_TYPE_RDMA_RESUME: 3705 if (!gd->is_suspended) 3706 break; 3707 3708 ret = add_adev(gd, "rdma"); 3709 if (ret) 3710 dev_err(dev, "Failed to add adev on resume: %d\n", ret); 3711 else 3712 gd->is_suspended = false; 3713 break; 3714 3715 default: 3716 dev_warn(dev, "unknown adev service event %u\n", 3717 serv_work->event); 3718 break; 3719 } 3720 3721 out: 3722 kfree(serv_work); 3723 } 3724 3725 int mana_rdma_service_event(struct gdma_context *gc, enum gdma_service_type event) 3726 { 3727 struct gdma_dev *gd = &gc->mana_ib; 3728 struct mana_service_work *serv_work; 3729 3730 if (gd->dev_id.type != GDMA_DEVICE_MANA_IB) { 3731 /* RDMA device is not detected on pci */ 3732 return 0; 3733 } 3734 3735 serv_work = kzalloc_obj(*serv_work, GFP_ATOMIC); 3736 if (!serv_work) 3737 return -ENOMEM; 3738 3739 serv_work->event = event; 3740 serv_work->gdma_dev = gd; 3741 3742 INIT_WORK(&serv_work->work, mana_rdma_service_handle); 3743 queue_work(gc->service_wq, &serv_work->work); 3744 3745 return 0; 3746 } 3747 3748 #define MANA_GF_STATS_PERIOD (2 * HZ) 3749 3750 static void mana_gf_stats_work_handler(struct work_struct *work) 3751 { 3752 struct mana_context *ac = 3753 container_of(to_delayed_work(work), struct mana_context, gf_stats_work); 3754 struct gdma_context *gc = ac->gdma_dev->gdma_context; 3755 int err; 3756 3757 err = mana_query_gf_stats(ac); 3758 if (err == -ETIMEDOUT) { 3759 /* HWC timeout detected - reset stats and stop rescheduling */ 3760 ac->hwc_timeout_occurred = true; 3761 memset(&ac->hc_stats, 0, sizeof(ac->hc_stats)); 3762 dev_warn(gc->dev, 3763 "Gf stats wk handler: gf stats query timed out.\n"); 3764 /* As HWC timed out, indicating a faulty HW state and needs a 3765 * reset. 3766 */ 3767 mana_schedule_serv_work(gc, GDMA_EQE_HWC_RESET_REQUEST); 3768 return; 3769 } 3770 schedule_delayed_work(&ac->gf_stats_work, MANA_GF_STATS_PERIOD); 3771 } 3772 3773 int mana_probe(struct gdma_dev *gd, bool resuming) 3774 { 3775 struct gdma_context *gc = gd->gdma_context; 3776 struct mana_context *ac = gd->driver_data; 3777 struct mana_port_context *apc = NULL; 3778 struct device *dev = gc->dev; 3779 u8 bm_hostmode = 0; 3780 u16 num_ports = 0; 3781 int err; 3782 int i; 3783 3784 dev_info(dev, 3785 "Microsoft Azure Network Adapter protocol version: %d.%d.%d\n", 3786 MANA_MAJOR_VERSION, MANA_MINOR_VERSION, MANA_MICRO_VERSION); 3787 3788 err = mana_gd_register_device(gd); 3789 if (err) 3790 return err; 3791 3792 if (!resuming) { 3793 ac = kzalloc_obj(*ac); 3794 if (!ac) 3795 return -ENOMEM; 3796 3797 ac->gdma_dev = gd; 3798 gd->driver_data = ac; 3799 3800 INIT_WORK(&ac->link_change_work, mana_link_state_handle); 3801 } 3802 3803 INIT_DELAYED_WORK(&ac->gf_stats_work, mana_gf_stats_work_handler); 3804 3805 err = mana_gd_query_device_cfg(gc, MANA_MAJOR_VERSION, 3806 MANA_MINOR_VERSION, 3807 MANA_MICRO_VERSION, 3808 &num_ports, &bm_hostmode); 3809 if (err) 3810 goto out; 3811 3812 ac->bm_hostmode = bm_hostmode; 3813 3814 debugfs_create_u16("adapter-MTU", 0400, 3815 gc->mana_pci_debugfs, &gc->adapter_mtu); 3816 3817 if (!resuming) { 3818 ac->num_ports = num_ports; 3819 } else { 3820 if (ac->num_ports != num_ports) { 3821 dev_err(dev, "The number of vPorts changed: %d->%d\n", 3822 ac->num_ports, num_ports); 3823 err = -EPROTO; 3824 goto out; 3825 } 3826 3827 enable_work(&ac->link_change_work); 3828 } 3829 3830 if (ac->num_ports > MAX_PORTS_IN_MANA_DEV) 3831 ac->num_ports = MAX_PORTS_IN_MANA_DEV; 3832 3833 debugfs_create_u16("num_vports", 0400, gc->mana_pci_debugfs, 3834 &ac->num_ports); 3835 debugfs_create_u8("bm_hostmode", 0400, gc->mana_pci_debugfs, 3836 &ac->bm_hostmode); 3837 3838 ac->per_port_queue_reset_wq = 3839 create_singlethread_workqueue("mana_per_port_queue_reset_wq"); 3840 if (!ac->per_port_queue_reset_wq) { 3841 dev_err(dev, "Failed to allocate per port queue reset workqueue\n"); 3842 err = -ENOMEM; 3843 goto out; 3844 } 3845 3846 if (!resuming) { 3847 for (i = 0; i < ac->num_ports; i++) { 3848 err = mana_probe_port(ac, i, &ac->ports[i]); 3849 /* Log the port for which the probe failed, stop probing 3850 * subsequent ports, and skip add_adev. 3851 * mana_remove() will clean up already-probed ports. 3852 */ 3853 if (err) { 3854 dev_err(dev, "Probe Failed for port %d\n", i); 3855 break; 3856 } 3857 } 3858 } else { 3859 for (i = 0; i < ac->num_ports; i++) { 3860 rtnl_lock(); 3861 apc = netdev_priv(ac->ports[i]); 3862 enable_work(&apc->queue_reset_work); 3863 netdev_lock(ac->ports[i]); 3864 apc->link_cfg_error = 1; 3865 netdev_unlock(ac->ports[i]); 3866 err = mana_attach(ac->ports[i]); 3867 rtnl_unlock(); 3868 /* Log the port for which the attach failed, stop 3869 * attaching subsequent ports, and skip add_adev. 3870 * mana_remove() will clean up already-attached ports. 3871 */ 3872 if (err) { 3873 dev_err(dev, "Attach Failed for port %d\n", i); 3874 break; 3875 } 3876 } 3877 } 3878 3879 if (!err) 3880 err = add_adev(gd, "eth"); 3881 3882 schedule_delayed_work(&ac->gf_stats_work, MANA_GF_STATS_PERIOD); 3883 3884 out: 3885 if (err) { 3886 mana_remove(gd, false); 3887 } else { 3888 dev_dbg(dev, "gd=%p, id=%u, num_ports=%d, type=%u, instance=%u\n", 3889 gd, gd->dev_id.as_uint32, ac->num_ports, 3890 gd->dev_id.type, gd->dev_id.instance); 3891 dev_dbg(dev, "%s succeeded\n", __func__); 3892 } 3893 3894 return err; 3895 } 3896 3897 void mana_remove(struct gdma_dev *gd, bool suspending) 3898 { 3899 struct gdma_context *gc = gd->gdma_context; 3900 struct mana_context *ac = gd->driver_data; 3901 struct mana_port_context *apc; 3902 struct device *dev; 3903 struct net_device *ndev; 3904 int err; 3905 int i; 3906 3907 if (!gc || !ac) 3908 return; 3909 3910 dev = gc->dev; 3911 3912 disable_work_sync(&ac->link_change_work); 3913 cancel_delayed_work_sync(&ac->gf_stats_work); 3914 3915 /* adev currently doesn't support suspending, always remove it */ 3916 if (gd->adev) 3917 remove_adev(gd); 3918 3919 for (i = 0; i < ac->num_ports; i++) { 3920 ndev = ac->ports[i]; 3921 if (!ndev) { 3922 if (i == 0) 3923 dev_err(dev, "No net device to remove\n"); 3924 break; 3925 } 3926 3927 apc = netdev_priv(ndev); 3928 disable_work_sync(&apc->queue_reset_work); 3929 3930 /* All cleanup actions should stay after rtnl_lock(), otherwise 3931 * other functions may access partially cleaned up data. 3932 */ 3933 rtnl_lock(); 3934 3935 err = mana_detach(ndev, false); 3936 if (err) 3937 netdev_err(ndev, "Failed to detach vPort %d: %d\n", 3938 i, err); 3939 3940 if (suspending) { 3941 /* No need to unregister the ndev. */ 3942 rtnl_unlock(); 3943 continue; 3944 } 3945 3946 unregister_netdevice(ndev); 3947 mana_cleanup_indir_table(apc); 3948 3949 rtnl_unlock(); 3950 3951 free_netdev(ndev); 3952 } 3953 3954 if (ac->per_port_queue_reset_wq) { 3955 destroy_workqueue(ac->per_port_queue_reset_wq); 3956 ac->per_port_queue_reset_wq = NULL; 3957 } 3958 3959 mana_gd_deregister_device(gd); 3960 3961 if (gc->mana_pci_debugfs) { 3962 debugfs_lookup_and_remove("bm_hostmode", gc->mana_pci_debugfs); 3963 debugfs_lookup_and_remove("num_vports", gc->mana_pci_debugfs); 3964 } 3965 3966 if (suspending) 3967 return; 3968 3969 gd->driver_data = NULL; 3970 gd->gdma_context = NULL; 3971 kfree(ac); 3972 dev_dbg(dev, "%s succeeded\n", __func__); 3973 } 3974 3975 int mana_rdma_probe(struct gdma_dev *gd) 3976 { 3977 int err = 0; 3978 3979 if (gd->dev_id.type != GDMA_DEVICE_MANA_IB) { 3980 /* RDMA device is not detected on pci */ 3981 return err; 3982 } 3983 3984 err = mana_gd_register_device(gd); 3985 if (err) 3986 return err; 3987 3988 err = add_adev(gd, "rdma"); 3989 if (err) 3990 mana_gd_deregister_device(gd); 3991 3992 return err; 3993 } 3994 3995 void mana_rdma_remove(struct gdma_dev *gd) 3996 { 3997 struct gdma_context *gc = gd->gdma_context; 3998 3999 if (gd->dev_id.type != GDMA_DEVICE_MANA_IB) { 4000 /* RDMA device is not detected on pci */ 4001 return; 4002 } 4003 4004 WRITE_ONCE(gd->rdma_teardown, true); 4005 4006 if (gc->service_wq) 4007 flush_workqueue(gc->service_wq); 4008 4009 if (gd->adev) 4010 remove_adev(gd); 4011 4012 mana_gd_deregister_device(gd); 4013 } 4014 4015 struct net_device *mana_get_primary_netdev(struct mana_context *ac, 4016 u32 port_index, 4017 netdevice_tracker *tracker) 4018 { 4019 struct net_device *ndev; 4020 4021 if (port_index >= ac->num_ports) 4022 return NULL; 4023 4024 rcu_read_lock(); 4025 4026 /* If mana is used in netvsc, the upper netdevice should be returned. */ 4027 ndev = netdev_master_upper_dev_get_rcu(ac->ports[port_index]); 4028 4029 /* If there is no upper device, use the parent Ethernet device */ 4030 if (!ndev) 4031 ndev = ac->ports[port_index]; 4032 4033 netdev_hold(ndev, tracker, GFP_ATOMIC); 4034 rcu_read_unlock(); 4035 4036 return ndev; 4037 } 4038 EXPORT_SYMBOL_NS(mana_get_primary_netdev, "NET_MANA"); 4039