1 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause 2 /* Copyright (c) 2021, Microsoft Corporation. */ 3 4 #include <uapi/linux/bpf.h> 5 6 #include <linux/debugfs.h> 7 #include <linux/inetdevice.h> 8 #include <linux/etherdevice.h> 9 #include <linux/ethtool.h> 10 #include <linux/filter.h> 11 #include <linux/mm.h> 12 #include <linux/pci.h> 13 #include <linux/export.h> 14 #include <linux/skbuff.h> 15 16 #include <net/checksum.h> 17 #include <net/ip6_checksum.h> 18 #include <net/netdev_lock.h> 19 #include <net/page_pool/helpers.h> 20 #include <net/xdp.h> 21 22 #include <net/mana/mana.h> 23 #include <net/mana/mana_auxiliary.h> 24 #include <net/mana/hw_channel.h> 25 26 static DEFINE_IDA(mana_adev_ida); 27 28 static int mana_adev_idx_alloc(void) 29 { 30 return ida_alloc(&mana_adev_ida, GFP_KERNEL); 31 } 32 33 static void mana_adev_idx_free(int idx) 34 { 35 ida_free(&mana_adev_ida, idx); 36 } 37 38 static ssize_t mana_dbg_q_read(struct file *filp, char __user *buf, size_t count, 39 loff_t *pos) 40 { 41 struct gdma_queue *gdma_q = filp->private_data; 42 43 return simple_read_from_buffer(buf, count, pos, gdma_q->queue_mem_ptr, 44 gdma_q->queue_size); 45 } 46 47 static const struct file_operations mana_dbg_q_fops = { 48 .owner = THIS_MODULE, 49 .open = simple_open, 50 .read = mana_dbg_q_read, 51 }; 52 53 static bool mana_en_need_log(struct mana_port_context *apc, int err) 54 { 55 if (apc && apc->ac && apc->ac->gdma_dev && 56 apc->ac->gdma_dev->gdma_context) 57 return mana_need_log(apc->ac->gdma_dev->gdma_context, err); 58 else 59 return true; 60 } 61 62 static void mana_put_rx_page(struct mana_rxq *rxq, struct page *page, 63 bool from_pool) 64 { 65 if (from_pool) 66 page_pool_put_full_page(rxq->page_pool, page, false); 67 else 68 put_page(page); 69 } 70 71 /* Microsoft Azure Network Adapter (MANA) functions */ 72 73 static int mana_open(struct net_device *ndev) 74 { 75 struct mana_port_context *apc = netdev_priv(ndev); 76 int err; 77 err = mana_alloc_queues(ndev); 78 79 if (err) { 80 netdev_err(ndev, "%s failed to allocate queues: %d\n", __func__, err); 81 return err; 82 } 83 84 apc->port_is_up = true; 85 86 /* Ensure port state updated before txq state */ 87 smp_wmb(); 88 89 netif_tx_wake_all_queues(ndev); 90 netdev_dbg(ndev, "%s successful\n", __func__); 91 return 0; 92 } 93 94 static int mana_close(struct net_device *ndev) 95 { 96 struct mana_port_context *apc = netdev_priv(ndev); 97 98 if (!apc->port_is_up) 99 return 0; 100 101 return mana_detach(ndev, true); 102 } 103 104 static void mana_link_state_handle(struct work_struct *w) 105 { 106 struct mana_context *ac; 107 struct net_device *ndev; 108 u32 link_event; 109 bool link_up; 110 int i; 111 112 ac = container_of(w, struct mana_context, link_change_work); 113 114 rtnl_lock(); 115 116 link_event = READ_ONCE(ac->link_event); 117 118 if (link_event == HWC_DATA_HW_LINK_CONNECT) 119 link_up = true; 120 else if (link_event == HWC_DATA_HW_LINK_DISCONNECT) 121 link_up = false; 122 else 123 goto out; 124 125 /* Process all ports */ 126 for (i = 0; i < ac->num_ports; i++) { 127 ndev = ac->ports[i]; 128 if (!ndev) 129 continue; 130 131 if (link_up) { 132 netif_carrier_on(ndev); 133 134 __netdev_notify_peers(ndev); 135 } else { 136 netif_carrier_off(ndev); 137 } 138 } 139 140 out: 141 rtnl_unlock(); 142 } 143 144 static bool mana_can_tx(struct gdma_queue *wq) 145 { 146 return mana_gd_wq_avail_space(wq) >= MAX_TX_WQE_SIZE; 147 } 148 149 static unsigned int mana_checksum_info(struct sk_buff *skb) 150 { 151 if (skb->protocol == htons(ETH_P_IP)) { 152 struct iphdr *ip = ip_hdr(skb); 153 154 if (ip->protocol == IPPROTO_TCP) 155 return IPPROTO_TCP; 156 157 if (ip->protocol == IPPROTO_UDP) 158 return IPPROTO_UDP; 159 } else if (skb->protocol == htons(ETH_P_IPV6)) { 160 struct ipv6hdr *ip6 = ipv6_hdr(skb); 161 162 if (ip6->nexthdr == IPPROTO_TCP) 163 return IPPROTO_TCP; 164 165 if (ip6->nexthdr == IPPROTO_UDP) 166 return IPPROTO_UDP; 167 } 168 169 /* No csum offloading */ 170 return 0; 171 } 172 173 static void mana_add_sge(struct mana_tx_package *tp, struct mana_skb_head *ash, 174 int sg_i, dma_addr_t da, int sge_len, u32 gpa_mkey) 175 { 176 ash->dma_handle[sg_i] = da; 177 ash->size[sg_i] = sge_len; 178 179 tp->wqe_req.sgl[sg_i].address = da; 180 tp->wqe_req.sgl[sg_i].mem_key = gpa_mkey; 181 tp->wqe_req.sgl[sg_i].size = sge_len; 182 } 183 184 static int mana_map_skb(struct sk_buff *skb, struct mana_port_context *apc, 185 struct mana_tx_package *tp, int gso_hs) 186 { 187 struct mana_skb_head *ash = (struct mana_skb_head *)skb->head; 188 int hsg = 1; /* num of SGEs of linear part */ 189 struct gdma_dev *gd = apc->ac->gdma_dev; 190 int skb_hlen = skb_headlen(skb); 191 int sge0_len, sge1_len = 0; 192 struct gdma_context *gc; 193 struct device *dev; 194 skb_frag_t *frag; 195 dma_addr_t da; 196 int sg_i; 197 int i; 198 199 gc = gd->gdma_context; 200 dev = gc->dev; 201 202 if (gso_hs && gso_hs < skb_hlen) { 203 sge0_len = gso_hs; 204 sge1_len = skb_hlen - gso_hs; 205 } else { 206 sge0_len = skb_hlen; 207 } 208 209 da = dma_map_single(dev, skb->data, sge0_len, DMA_TO_DEVICE); 210 if (dma_mapping_error(dev, da)) 211 return -ENOMEM; 212 213 mana_add_sge(tp, ash, 0, da, sge0_len, gd->gpa_mkey); 214 215 if (sge1_len) { 216 sg_i = 1; 217 da = dma_map_single(dev, skb->data + sge0_len, sge1_len, 218 DMA_TO_DEVICE); 219 if (dma_mapping_error(dev, da)) 220 goto frag_err; 221 222 mana_add_sge(tp, ash, sg_i, da, sge1_len, gd->gpa_mkey); 223 hsg = 2; 224 } 225 226 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 227 sg_i = hsg + i; 228 229 frag = &skb_shinfo(skb)->frags[i]; 230 da = skb_frag_dma_map(dev, frag, 0, skb_frag_size(frag), 231 DMA_TO_DEVICE); 232 if (dma_mapping_error(dev, da)) 233 goto frag_err; 234 235 mana_add_sge(tp, ash, sg_i, da, skb_frag_size(frag), 236 gd->gpa_mkey); 237 } 238 239 return 0; 240 241 frag_err: 242 if (net_ratelimit()) 243 netdev_err(apc->ndev, "Failed to map skb of size %u to DMA\n", 244 skb->len); 245 for (i = sg_i - 1; i >= hsg; i--) 246 dma_unmap_page(dev, ash->dma_handle[i], ash->size[i], 247 DMA_TO_DEVICE); 248 249 for (i = hsg - 1; i >= 0; i--) 250 dma_unmap_single(dev, ash->dma_handle[i], ash->size[i], 251 DMA_TO_DEVICE); 252 253 return -ENOMEM; 254 } 255 256 /* Handle the case when GSO SKB linear length is too large. 257 * MANA NIC requires GSO packets to put only the packet header to SGE0. 258 * So, we need 2 SGEs for the skb linear part which contains more than the 259 * header. 260 * Return a positive value for the number of SGEs, or a negative value 261 * for an error. 262 */ 263 static int mana_fix_skb_head(struct net_device *ndev, struct sk_buff *skb, 264 int gso_hs) 265 { 266 int num_sge = 1 + skb_shinfo(skb)->nr_frags; 267 int skb_hlen = skb_headlen(skb); 268 269 if (gso_hs < skb_hlen) { 270 num_sge++; 271 } else if (gso_hs > skb_hlen) { 272 if (net_ratelimit()) 273 netdev_err(ndev, 274 "TX nonlinear head: hs:%d, skb_hlen:%d\n", 275 gso_hs, skb_hlen); 276 277 return -EINVAL; 278 } 279 280 return num_sge; 281 } 282 283 /* Get the GSO packet's header size */ 284 static int mana_get_gso_hs(struct sk_buff *skb) 285 { 286 int gso_hs; 287 288 if (skb->encapsulation) { 289 gso_hs = skb_inner_tcp_all_headers(skb); 290 } else { 291 if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) { 292 gso_hs = skb_transport_offset(skb) + 293 sizeof(struct udphdr); 294 } else { 295 gso_hs = skb_tcp_all_headers(skb); 296 } 297 } 298 299 return gso_hs; 300 } 301 302 netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev) 303 { 304 enum mana_tx_pkt_format pkt_fmt = MANA_SHORT_PKT_FMT; 305 struct mana_port_context *apc = netdev_priv(ndev); 306 int gso_hs = 0; /* zero for non-GSO pkts */ 307 u16 txq_idx = skb_get_queue_mapping(skb); 308 struct gdma_dev *gd = apc->ac->gdma_dev; 309 bool ipv4 = false, ipv6 = false; 310 struct mana_tx_package pkg = {}; 311 struct netdev_queue *net_txq; 312 struct mana_stats_tx *tx_stats; 313 struct gdma_queue *gdma_sq; 314 int err, len, num_gso_seg; 315 unsigned int csum_type; 316 struct mana_txq *txq; 317 struct mana_cq *cq; 318 319 if (unlikely(!apc->port_is_up)) 320 goto tx_drop; 321 322 if (skb_cow_head(skb, MANA_HEADROOM)) 323 goto tx_drop_count; 324 325 if (unlikely(ipv6_hopopt_jumbo_remove(skb))) 326 goto tx_drop_count; 327 328 txq = &apc->tx_qp[txq_idx].txq; 329 gdma_sq = txq->gdma_sq; 330 cq = &apc->tx_qp[txq_idx].tx_cq; 331 tx_stats = &txq->stats; 332 333 BUILD_BUG_ON(MAX_TX_WQE_SGL_ENTRIES != MANA_MAX_TX_WQE_SGL_ENTRIES); 334 if (MAX_SKB_FRAGS + 2 > MAX_TX_WQE_SGL_ENTRIES && 335 skb_shinfo(skb)->nr_frags + 2 > MAX_TX_WQE_SGL_ENTRIES) { 336 /* GSO skb with Hardware SGE limit exceeded is not expected here 337 * as they are handled in mana_features_check() callback 338 */ 339 if (skb_linearize(skb)) { 340 netdev_warn_once(ndev, "Failed to linearize skb with nr_frags=%d and is_gso=%d\n", 341 skb_shinfo(skb)->nr_frags, 342 skb_is_gso(skb)); 343 goto tx_drop_count; 344 } 345 apc->eth_stats.tx_linear_pkt_cnt++; 346 } 347 348 pkg.tx_oob.s_oob.vcq_num = cq->gdma_id; 349 pkg.tx_oob.s_oob.vsq_frame = txq->vsq_frame; 350 351 if (txq->vp_offset > MANA_SHORT_VPORT_OFFSET_MAX) { 352 pkg.tx_oob.l_oob.long_vp_offset = txq->vp_offset; 353 pkt_fmt = MANA_LONG_PKT_FMT; 354 } else { 355 pkg.tx_oob.s_oob.short_vp_offset = txq->vp_offset; 356 } 357 358 if (skb_vlan_tag_present(skb)) { 359 pkt_fmt = MANA_LONG_PKT_FMT; 360 pkg.tx_oob.l_oob.inject_vlan_pri_tag = 1; 361 pkg.tx_oob.l_oob.pcp = skb_vlan_tag_get_prio(skb); 362 pkg.tx_oob.l_oob.dei = skb_vlan_tag_get_cfi(skb); 363 pkg.tx_oob.l_oob.vlan_id = skb_vlan_tag_get_id(skb); 364 } 365 366 pkg.tx_oob.s_oob.pkt_fmt = pkt_fmt; 367 368 if (pkt_fmt == MANA_SHORT_PKT_FMT) { 369 pkg.wqe_req.inline_oob_size = sizeof(struct mana_tx_short_oob); 370 u64_stats_update_begin(&tx_stats->syncp); 371 tx_stats->short_pkt_fmt++; 372 u64_stats_update_end(&tx_stats->syncp); 373 } else { 374 pkg.wqe_req.inline_oob_size = sizeof(struct mana_tx_oob); 375 u64_stats_update_begin(&tx_stats->syncp); 376 tx_stats->long_pkt_fmt++; 377 u64_stats_update_end(&tx_stats->syncp); 378 } 379 380 pkg.wqe_req.inline_oob_data = &pkg.tx_oob; 381 pkg.wqe_req.flags = 0; 382 pkg.wqe_req.client_data_unit = 0; 383 384 pkg.wqe_req.num_sge = 1 + skb_shinfo(skb)->nr_frags; 385 386 if (skb->protocol == htons(ETH_P_IP)) 387 ipv4 = true; 388 else if (skb->protocol == htons(ETH_P_IPV6)) 389 ipv6 = true; 390 391 if (skb_is_gso(skb)) { 392 int num_sge; 393 394 gso_hs = mana_get_gso_hs(skb); 395 396 num_sge = mana_fix_skb_head(ndev, skb, gso_hs); 397 if (num_sge > 0) 398 pkg.wqe_req.num_sge = num_sge; 399 else 400 goto tx_drop_count; 401 402 u64_stats_update_begin(&tx_stats->syncp); 403 if (skb->encapsulation) { 404 tx_stats->tso_inner_packets++; 405 tx_stats->tso_inner_bytes += skb->len - gso_hs; 406 } else { 407 tx_stats->tso_packets++; 408 tx_stats->tso_bytes += skb->len - gso_hs; 409 } 410 u64_stats_update_end(&tx_stats->syncp); 411 412 pkg.tx_oob.s_oob.is_outer_ipv4 = ipv4; 413 pkg.tx_oob.s_oob.is_outer_ipv6 = ipv6; 414 415 pkg.tx_oob.s_oob.comp_iphdr_csum = 1; 416 pkg.tx_oob.s_oob.comp_tcp_csum = 1; 417 pkg.tx_oob.s_oob.trans_off = skb_transport_offset(skb); 418 419 pkg.wqe_req.client_data_unit = skb_shinfo(skb)->gso_size; 420 pkg.wqe_req.flags = GDMA_WR_OOB_IN_SGL | GDMA_WR_PAD_BY_SGE0; 421 if (ipv4) { 422 ip_hdr(skb)->tot_len = 0; 423 ip_hdr(skb)->check = 0; 424 tcp_hdr(skb)->check = 425 ~csum_tcpudp_magic(ip_hdr(skb)->saddr, 426 ip_hdr(skb)->daddr, 0, 427 IPPROTO_TCP, 0); 428 } else { 429 ipv6_hdr(skb)->payload_len = 0; 430 tcp_hdr(skb)->check = 431 ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr, 432 &ipv6_hdr(skb)->daddr, 0, 433 IPPROTO_TCP, 0); 434 } 435 } else if (skb->ip_summed == CHECKSUM_PARTIAL) { 436 csum_type = mana_checksum_info(skb); 437 438 u64_stats_update_begin(&tx_stats->syncp); 439 tx_stats->csum_partial++; 440 u64_stats_update_end(&tx_stats->syncp); 441 442 if (csum_type == IPPROTO_TCP) { 443 pkg.tx_oob.s_oob.is_outer_ipv4 = ipv4; 444 pkg.tx_oob.s_oob.is_outer_ipv6 = ipv6; 445 446 pkg.tx_oob.s_oob.comp_tcp_csum = 1; 447 pkg.tx_oob.s_oob.trans_off = skb_transport_offset(skb); 448 449 } else if (csum_type == IPPROTO_UDP) { 450 pkg.tx_oob.s_oob.is_outer_ipv4 = ipv4; 451 pkg.tx_oob.s_oob.is_outer_ipv6 = ipv6; 452 453 pkg.tx_oob.s_oob.comp_udp_csum = 1; 454 } else { 455 /* Can't do offload of this type of checksum */ 456 if (skb_checksum_help(skb)) 457 goto tx_drop_count; 458 } 459 } 460 461 if (pkg.wqe_req.num_sge <= ARRAY_SIZE(pkg.sgl_array)) { 462 pkg.wqe_req.sgl = pkg.sgl_array; 463 } else { 464 pkg.sgl_ptr = kmalloc_array(pkg.wqe_req.num_sge, 465 sizeof(struct gdma_sge), 466 GFP_ATOMIC); 467 if (!pkg.sgl_ptr) 468 goto tx_drop_count; 469 470 pkg.wqe_req.sgl = pkg.sgl_ptr; 471 } 472 473 if (mana_map_skb(skb, apc, &pkg, gso_hs)) { 474 u64_stats_update_begin(&tx_stats->syncp); 475 tx_stats->mana_map_err++; 476 u64_stats_update_end(&tx_stats->syncp); 477 goto free_sgl_ptr; 478 } 479 480 skb_queue_tail(&txq->pending_skbs, skb); 481 482 len = skb->len; 483 num_gso_seg = skb_is_gso(skb) ? skb_shinfo(skb)->gso_segs : 1; 484 net_txq = netdev_get_tx_queue(ndev, txq_idx); 485 486 err = mana_gd_post_work_request(gdma_sq, &pkg.wqe_req, 487 (struct gdma_posted_wqe_info *)skb->cb); 488 if (!mana_can_tx(gdma_sq)) { 489 netif_tx_stop_queue(net_txq); 490 apc->eth_stats.stop_queue++; 491 } 492 493 if (err) { 494 (void)skb_dequeue_tail(&txq->pending_skbs); 495 mana_unmap_skb(skb, apc); 496 netdev_warn(ndev, "Failed to post TX OOB: %d\n", err); 497 goto free_sgl_ptr; 498 } 499 500 err = NETDEV_TX_OK; 501 atomic_inc(&txq->pending_sends); 502 503 mana_gd_wq_ring_doorbell(gd->gdma_context, gdma_sq); 504 505 /* skb may be freed after mana_gd_post_work_request. Do not use it. */ 506 skb = NULL; 507 508 /* Populated the packet and bytes counters based on post GSO packet 509 * calculations 510 */ 511 tx_stats = &txq->stats; 512 u64_stats_update_begin(&tx_stats->syncp); 513 tx_stats->packets += num_gso_seg; 514 tx_stats->bytes += len + ((num_gso_seg - 1) * gso_hs); 515 u64_stats_update_end(&tx_stats->syncp); 516 517 if (netif_tx_queue_stopped(net_txq) && mana_can_tx(gdma_sq)) { 518 netif_tx_wake_queue(net_txq); 519 apc->eth_stats.wake_queue++; 520 } 521 522 kfree(pkg.sgl_ptr); 523 return err; 524 525 free_sgl_ptr: 526 kfree(pkg.sgl_ptr); 527 tx_drop_count: 528 ndev->stats.tx_dropped++; 529 tx_drop: 530 dev_kfree_skb_any(skb); 531 return NETDEV_TX_OK; 532 } 533 534 #if (MAX_SKB_FRAGS + 2 > MANA_MAX_TX_WQE_SGL_ENTRIES) 535 static netdev_features_t mana_features_check(struct sk_buff *skb, 536 struct net_device *ndev, 537 netdev_features_t features) 538 { 539 if (skb_shinfo(skb)->nr_frags + 2 > MAX_TX_WQE_SGL_ENTRIES) { 540 /* Exceeds HW SGE limit. 541 * GSO case: 542 * Disable GSO so the stack will software-segment the skb 543 * into smaller skbs that fit the SGE budget. 544 * Non-GSO case: 545 * The xmit path will attempt skb_linearize() as a fallback. 546 */ 547 features &= ~NETIF_F_GSO_MASK; 548 } 549 return features; 550 } 551 #endif 552 553 static void mana_get_stats64(struct net_device *ndev, 554 struct rtnl_link_stats64 *st) 555 { 556 struct mana_port_context *apc = netdev_priv(ndev); 557 unsigned int num_queues = apc->num_queues; 558 struct mana_stats_rx *rx_stats; 559 struct mana_stats_tx *tx_stats; 560 unsigned int start; 561 u64 packets, bytes; 562 int q; 563 564 if (!apc->port_is_up) 565 return; 566 567 netdev_stats_to_stats64(st, &ndev->stats); 568 569 if (apc->ac->hwc_timeout_occurred) 570 netdev_warn_once(ndev, "HWC timeout occurred\n"); 571 572 st->rx_missed_errors = apc->ac->hc_stats.hc_rx_discards_no_wqe; 573 574 for (q = 0; q < num_queues; q++) { 575 rx_stats = &apc->rxqs[q]->stats; 576 577 do { 578 start = u64_stats_fetch_begin(&rx_stats->syncp); 579 packets = rx_stats->packets; 580 bytes = rx_stats->bytes; 581 } while (u64_stats_fetch_retry(&rx_stats->syncp, start)); 582 583 st->rx_packets += packets; 584 st->rx_bytes += bytes; 585 } 586 587 for (q = 0; q < num_queues; q++) { 588 tx_stats = &apc->tx_qp[q].txq.stats; 589 590 do { 591 start = u64_stats_fetch_begin(&tx_stats->syncp); 592 packets = tx_stats->packets; 593 bytes = tx_stats->bytes; 594 } while (u64_stats_fetch_retry(&tx_stats->syncp, start)); 595 596 st->tx_packets += packets; 597 st->tx_bytes += bytes; 598 } 599 } 600 601 static int mana_get_tx_queue(struct net_device *ndev, struct sk_buff *skb, 602 int old_q) 603 { 604 struct mana_port_context *apc = netdev_priv(ndev); 605 u32 hash = skb_get_hash(skb); 606 struct sock *sk = skb->sk; 607 int txq; 608 609 txq = apc->indir_table[hash & (apc->indir_table_sz - 1)]; 610 611 if (txq != old_q && sk && sk_fullsock(sk) && 612 rcu_access_pointer(sk->sk_dst_cache)) 613 sk_tx_queue_set(sk, txq); 614 615 return txq; 616 } 617 618 static u16 mana_select_queue(struct net_device *ndev, struct sk_buff *skb, 619 struct net_device *sb_dev) 620 { 621 int txq; 622 623 if (ndev->real_num_tx_queues == 1) 624 return 0; 625 626 txq = sk_tx_queue_get(skb->sk); 627 628 if (txq < 0 || skb->ooo_okay || txq >= ndev->real_num_tx_queues) { 629 if (skb_rx_queue_recorded(skb)) 630 txq = skb_get_rx_queue(skb); 631 else 632 txq = mana_get_tx_queue(ndev, skb, txq); 633 } 634 635 return txq; 636 } 637 638 /* Release pre-allocated RX buffers */ 639 void mana_pre_dealloc_rxbufs(struct mana_port_context *mpc) 640 { 641 struct device *dev; 642 int i; 643 644 dev = mpc->ac->gdma_dev->gdma_context->dev; 645 646 if (!mpc->rxbufs_pre) 647 goto out1; 648 649 if (!mpc->das_pre) 650 goto out2; 651 652 while (mpc->rxbpre_total) { 653 i = --mpc->rxbpre_total; 654 dma_unmap_single(dev, mpc->das_pre[i], mpc->rxbpre_datasize, 655 DMA_FROM_DEVICE); 656 put_page(virt_to_head_page(mpc->rxbufs_pre[i])); 657 } 658 659 kfree(mpc->das_pre); 660 mpc->das_pre = NULL; 661 662 out2: 663 kfree(mpc->rxbufs_pre); 664 mpc->rxbufs_pre = NULL; 665 666 out1: 667 mpc->rxbpre_datasize = 0; 668 mpc->rxbpre_alloc_size = 0; 669 mpc->rxbpre_headroom = 0; 670 } 671 672 /* Get a buffer from the pre-allocated RX buffers */ 673 static void *mana_get_rxbuf_pre(struct mana_rxq *rxq, dma_addr_t *da) 674 { 675 struct net_device *ndev = rxq->ndev; 676 struct mana_port_context *mpc; 677 void *va; 678 679 mpc = netdev_priv(ndev); 680 681 if (!mpc->rxbufs_pre || !mpc->das_pre || !mpc->rxbpre_total) { 682 netdev_err(ndev, "No RX pre-allocated bufs\n"); 683 return NULL; 684 } 685 686 /* Check sizes to catch unexpected coding error */ 687 if (mpc->rxbpre_datasize != rxq->datasize) { 688 netdev_err(ndev, "rxbpre_datasize mismatch: %u: %u\n", 689 mpc->rxbpre_datasize, rxq->datasize); 690 return NULL; 691 } 692 693 if (mpc->rxbpre_alloc_size != rxq->alloc_size) { 694 netdev_err(ndev, "rxbpre_alloc_size mismatch: %u: %u\n", 695 mpc->rxbpre_alloc_size, rxq->alloc_size); 696 return NULL; 697 } 698 699 if (mpc->rxbpre_headroom != rxq->headroom) { 700 netdev_err(ndev, "rxbpre_headroom mismatch: %u: %u\n", 701 mpc->rxbpre_headroom, rxq->headroom); 702 return NULL; 703 } 704 705 mpc->rxbpre_total--; 706 707 *da = mpc->das_pre[mpc->rxbpre_total]; 708 va = mpc->rxbufs_pre[mpc->rxbpre_total]; 709 mpc->rxbufs_pre[mpc->rxbpre_total] = NULL; 710 711 /* Deallocate the array after all buffers are gone */ 712 if (!mpc->rxbpre_total) 713 mana_pre_dealloc_rxbufs(mpc); 714 715 return va; 716 } 717 718 /* Get RX buffer's data size, alloc size, XDP headroom based on MTU */ 719 static void mana_get_rxbuf_cfg(struct mana_port_context *apc, 720 int mtu, u32 *datasize, u32 *alloc_size, 721 u32 *headroom, u32 *frag_count) 722 { 723 u32 len, buf_size; 724 725 /* Calculate datasize first (consistent across all cases) */ 726 *datasize = mtu + ETH_HLEN; 727 728 /* For xdp and jumbo frames make sure only one packet fits per page */ 729 if (mtu + MANA_RXBUF_PAD > PAGE_SIZE / 2 || mana_xdp_get(apc)) { 730 if (mana_xdp_get(apc)) { 731 *headroom = XDP_PACKET_HEADROOM; 732 *alloc_size = PAGE_SIZE; 733 } else { 734 *headroom = 0; /* no support for XDP */ 735 *alloc_size = SKB_DATA_ALIGN(mtu + MANA_RXBUF_PAD + 736 *headroom); 737 } 738 739 *frag_count = 1; 740 return; 741 } 742 743 /* Standard MTU case - optimize for multiple packets per page */ 744 *headroom = 0; 745 746 /* Calculate base buffer size needed */ 747 len = SKB_DATA_ALIGN(mtu + MANA_RXBUF_PAD + *headroom); 748 buf_size = ALIGN(len, MANA_RX_FRAG_ALIGNMENT); 749 750 /* Calculate how many packets can fit in a page */ 751 *frag_count = PAGE_SIZE / buf_size; 752 *alloc_size = buf_size; 753 } 754 755 int mana_pre_alloc_rxbufs(struct mana_port_context *mpc, int new_mtu, int num_queues) 756 { 757 struct device *dev; 758 struct page *page; 759 dma_addr_t da; 760 int num_rxb; 761 void *va; 762 int i; 763 764 mana_get_rxbuf_cfg(mpc, new_mtu, &mpc->rxbpre_datasize, 765 &mpc->rxbpre_alloc_size, &mpc->rxbpre_headroom, 766 &mpc->rxbpre_frag_count); 767 768 dev = mpc->ac->gdma_dev->gdma_context->dev; 769 770 num_rxb = num_queues * mpc->rx_queue_size; 771 772 WARN(mpc->rxbufs_pre, "mana rxbufs_pre exists\n"); 773 mpc->rxbufs_pre = kmalloc_array(num_rxb, sizeof(void *), GFP_KERNEL); 774 if (!mpc->rxbufs_pre) 775 goto error; 776 777 mpc->das_pre = kmalloc_array(num_rxb, sizeof(dma_addr_t), GFP_KERNEL); 778 if (!mpc->das_pre) 779 goto error; 780 781 mpc->rxbpre_total = 0; 782 783 for (i = 0; i < num_rxb; i++) { 784 page = dev_alloc_pages(get_order(mpc->rxbpre_alloc_size)); 785 if (!page) 786 goto error; 787 788 va = page_to_virt(page); 789 790 da = dma_map_single(dev, va + mpc->rxbpre_headroom, 791 mpc->rxbpre_datasize, DMA_FROM_DEVICE); 792 if (dma_mapping_error(dev, da)) { 793 put_page(page); 794 goto error; 795 } 796 797 mpc->rxbufs_pre[i] = va; 798 mpc->das_pre[i] = da; 799 mpc->rxbpre_total = i + 1; 800 } 801 802 return 0; 803 804 error: 805 netdev_err(mpc->ndev, "Failed to pre-allocate RX buffers for %d queues\n", num_queues); 806 mana_pre_dealloc_rxbufs(mpc); 807 return -ENOMEM; 808 } 809 810 static int mana_change_mtu(struct net_device *ndev, int new_mtu) 811 { 812 struct mana_port_context *mpc = netdev_priv(ndev); 813 unsigned int old_mtu = ndev->mtu; 814 int err; 815 816 /* Pre-allocate buffers to prevent failure in mana_attach later */ 817 err = mana_pre_alloc_rxbufs(mpc, new_mtu, mpc->num_queues); 818 if (err) { 819 netdev_err(ndev, "Insufficient memory for new MTU\n"); 820 return err; 821 } 822 823 err = mana_detach(ndev, false); 824 if (err) { 825 netdev_err(ndev, "mana_detach failed: %d\n", err); 826 goto out; 827 } 828 829 WRITE_ONCE(ndev->mtu, new_mtu); 830 831 err = mana_attach(ndev); 832 if (err) { 833 netdev_err(ndev, "mana_attach failed: %d\n", err); 834 WRITE_ONCE(ndev->mtu, old_mtu); 835 } 836 837 out: 838 mana_pre_dealloc_rxbufs(mpc); 839 return err; 840 } 841 842 static int mana_shaper_set(struct net_shaper_binding *binding, 843 const struct net_shaper *shaper, 844 struct netlink_ext_ack *extack) 845 { 846 struct mana_port_context *apc = netdev_priv(binding->netdev); 847 u32 old_speed, rate; 848 int err; 849 850 if (shaper->handle.scope != NET_SHAPER_SCOPE_NETDEV) { 851 NL_SET_ERR_MSG_MOD(extack, "net shaper scope should be netdev"); 852 return -EINVAL; 853 } 854 855 if (apc->handle.id && shaper->handle.id != apc->handle.id) { 856 NL_SET_ERR_MSG_MOD(extack, "Cannot create multiple shapers"); 857 return -EOPNOTSUPP; 858 } 859 860 if (!shaper->bw_max || (shaper->bw_max % 100000000)) { 861 NL_SET_ERR_MSG_MOD(extack, "Please use multiples of 100Mbps for bandwidth"); 862 return -EINVAL; 863 } 864 865 rate = div_u64(shaper->bw_max, 1000); /* Convert bps to Kbps */ 866 rate = div_u64(rate, 1000); /* Convert Kbps to Mbps */ 867 868 /* Get current speed */ 869 err = mana_query_link_cfg(apc); 870 old_speed = (err) ? SPEED_UNKNOWN : apc->speed; 871 872 if (!err) { 873 err = mana_set_bw_clamp(apc, rate, TRI_STATE_TRUE); 874 apc->speed = (err) ? old_speed : rate; 875 apc->handle = (err) ? apc->handle : shaper->handle; 876 } 877 878 return err; 879 } 880 881 static int mana_shaper_del(struct net_shaper_binding *binding, 882 const struct net_shaper_handle *handle, 883 struct netlink_ext_ack *extack) 884 { 885 struct mana_port_context *apc = netdev_priv(binding->netdev); 886 int err; 887 888 err = mana_set_bw_clamp(apc, 0, TRI_STATE_FALSE); 889 890 if (!err) { 891 /* Reset mana port context parameters */ 892 apc->handle.id = 0; 893 apc->handle.scope = NET_SHAPER_SCOPE_UNSPEC; 894 apc->speed = apc->max_speed; 895 } 896 897 return err; 898 } 899 900 static void mana_shaper_cap(struct net_shaper_binding *binding, 901 enum net_shaper_scope scope, 902 unsigned long *flags) 903 { 904 *flags = BIT(NET_SHAPER_A_CAPS_SUPPORT_BW_MAX) | 905 BIT(NET_SHAPER_A_CAPS_SUPPORT_METRIC_BPS); 906 } 907 908 static const struct net_shaper_ops mana_shaper_ops = { 909 .set = mana_shaper_set, 910 .delete = mana_shaper_del, 911 .capabilities = mana_shaper_cap, 912 }; 913 914 static const struct net_device_ops mana_devops = { 915 .ndo_open = mana_open, 916 .ndo_stop = mana_close, 917 .ndo_select_queue = mana_select_queue, 918 #if (MAX_SKB_FRAGS + 2 > MANA_MAX_TX_WQE_SGL_ENTRIES) 919 .ndo_features_check = mana_features_check, 920 #endif 921 .ndo_start_xmit = mana_start_xmit, 922 .ndo_validate_addr = eth_validate_addr, 923 .ndo_get_stats64 = mana_get_stats64, 924 .ndo_bpf = mana_bpf, 925 .ndo_xdp_xmit = mana_xdp_xmit, 926 .ndo_change_mtu = mana_change_mtu, 927 .net_shaper_ops = &mana_shaper_ops, 928 }; 929 930 static void mana_cleanup_port_context(struct mana_port_context *apc) 931 { 932 /* 933 * make sure subsequent cleanup attempts don't end up removing already 934 * cleaned dentry pointer 935 */ 936 debugfs_remove(apc->mana_port_debugfs); 937 apc->mana_port_debugfs = NULL; 938 kfree(apc->rxqs); 939 apc->rxqs = NULL; 940 } 941 942 static void mana_cleanup_indir_table(struct mana_port_context *apc) 943 { 944 apc->indir_table_sz = 0; 945 kfree(apc->indir_table); 946 kfree(apc->rxobj_table); 947 } 948 949 static int mana_init_port_context(struct mana_port_context *apc) 950 { 951 apc->rxqs = kcalloc(apc->num_queues, sizeof(struct mana_rxq *), 952 GFP_KERNEL); 953 954 return !apc->rxqs ? -ENOMEM : 0; 955 } 956 957 static int mana_send_request(struct mana_context *ac, void *in_buf, 958 u32 in_len, void *out_buf, u32 out_len) 959 { 960 struct gdma_context *gc = ac->gdma_dev->gdma_context; 961 struct gdma_resp_hdr *resp = out_buf; 962 struct gdma_req_hdr *req = in_buf; 963 struct device *dev = gc->dev; 964 static atomic_t activity_id; 965 int err; 966 967 req->dev_id = gc->mana.dev_id; 968 req->activity_id = atomic_inc_return(&activity_id); 969 970 err = mana_gd_send_request(gc, in_len, in_buf, out_len, 971 out_buf); 972 if (err || resp->status) { 973 if (err == -EOPNOTSUPP) 974 return err; 975 976 if (req->req.msg_type != MANA_QUERY_PHY_STAT && 977 mana_need_log(gc, err)) 978 dev_err(dev, "Failed to send mana message: %d, 0x%x\n", 979 err, resp->status); 980 return err ? err : -EPROTO; 981 } 982 983 if (req->dev_id.as_uint32 != resp->dev_id.as_uint32 || 984 req->activity_id != resp->activity_id) { 985 dev_err(dev, "Unexpected mana message response: %x,%x,%x,%x\n", 986 req->dev_id.as_uint32, resp->dev_id.as_uint32, 987 req->activity_id, resp->activity_id); 988 return -EPROTO; 989 } 990 991 return 0; 992 } 993 994 static int mana_verify_resp_hdr(const struct gdma_resp_hdr *resp_hdr, 995 const enum mana_command_code expected_code, 996 const u32 min_size) 997 { 998 if (resp_hdr->response.msg_type != expected_code) 999 return -EPROTO; 1000 1001 if (resp_hdr->response.msg_version < GDMA_MESSAGE_V1) 1002 return -EPROTO; 1003 1004 if (resp_hdr->response.msg_size < min_size) 1005 return -EPROTO; 1006 1007 return 0; 1008 } 1009 1010 static int mana_pf_register_hw_vport(struct mana_port_context *apc) 1011 { 1012 struct mana_register_hw_vport_resp resp = {}; 1013 struct mana_register_hw_vport_req req = {}; 1014 int err; 1015 1016 mana_gd_init_req_hdr(&req.hdr, MANA_REGISTER_HW_PORT, 1017 sizeof(req), sizeof(resp)); 1018 req.attached_gfid = 1; 1019 req.is_pf_default_vport = 1; 1020 req.allow_all_ether_types = 1; 1021 1022 err = mana_send_request(apc->ac, &req, sizeof(req), &resp, 1023 sizeof(resp)); 1024 if (err) { 1025 netdev_err(apc->ndev, "Failed to register hw vPort: %d\n", err); 1026 return err; 1027 } 1028 1029 err = mana_verify_resp_hdr(&resp.hdr, MANA_REGISTER_HW_PORT, 1030 sizeof(resp)); 1031 if (err || resp.hdr.status) { 1032 netdev_err(apc->ndev, "Failed to register hw vPort: %d, 0x%x\n", 1033 err, resp.hdr.status); 1034 return err ? err : -EPROTO; 1035 } 1036 1037 apc->port_handle = resp.hw_vport_handle; 1038 return 0; 1039 } 1040 1041 static void mana_pf_deregister_hw_vport(struct mana_port_context *apc) 1042 { 1043 struct mana_deregister_hw_vport_resp resp = {}; 1044 struct mana_deregister_hw_vport_req req = {}; 1045 int err; 1046 1047 mana_gd_init_req_hdr(&req.hdr, MANA_DEREGISTER_HW_PORT, 1048 sizeof(req), sizeof(resp)); 1049 req.hw_vport_handle = apc->port_handle; 1050 1051 err = mana_send_request(apc->ac, &req, sizeof(req), &resp, 1052 sizeof(resp)); 1053 if (err) { 1054 if (mana_en_need_log(apc, err)) 1055 netdev_err(apc->ndev, "Failed to unregister hw vPort: %d\n", 1056 err); 1057 1058 return; 1059 } 1060 1061 err = mana_verify_resp_hdr(&resp.hdr, MANA_DEREGISTER_HW_PORT, 1062 sizeof(resp)); 1063 if (err || resp.hdr.status) 1064 netdev_err(apc->ndev, 1065 "Failed to deregister hw vPort: %d, 0x%x\n", 1066 err, resp.hdr.status); 1067 } 1068 1069 static int mana_pf_register_filter(struct mana_port_context *apc) 1070 { 1071 struct mana_register_filter_resp resp = {}; 1072 struct mana_register_filter_req req = {}; 1073 int err; 1074 1075 mana_gd_init_req_hdr(&req.hdr, MANA_REGISTER_FILTER, 1076 sizeof(req), sizeof(resp)); 1077 req.vport = apc->port_handle; 1078 memcpy(req.mac_addr, apc->mac_addr, ETH_ALEN); 1079 1080 err = mana_send_request(apc->ac, &req, sizeof(req), &resp, 1081 sizeof(resp)); 1082 if (err) { 1083 netdev_err(apc->ndev, "Failed to register filter: %d\n", err); 1084 return err; 1085 } 1086 1087 err = mana_verify_resp_hdr(&resp.hdr, MANA_REGISTER_FILTER, 1088 sizeof(resp)); 1089 if (err || resp.hdr.status) { 1090 netdev_err(apc->ndev, "Failed to register filter: %d, 0x%x\n", 1091 err, resp.hdr.status); 1092 return err ? err : -EPROTO; 1093 } 1094 1095 apc->pf_filter_handle = resp.filter_handle; 1096 return 0; 1097 } 1098 1099 static void mana_pf_deregister_filter(struct mana_port_context *apc) 1100 { 1101 struct mana_deregister_filter_resp resp = {}; 1102 struct mana_deregister_filter_req req = {}; 1103 int err; 1104 1105 mana_gd_init_req_hdr(&req.hdr, MANA_DEREGISTER_FILTER, 1106 sizeof(req), sizeof(resp)); 1107 req.filter_handle = apc->pf_filter_handle; 1108 1109 err = mana_send_request(apc->ac, &req, sizeof(req), &resp, 1110 sizeof(resp)); 1111 if (err) { 1112 if (mana_en_need_log(apc, err)) 1113 netdev_err(apc->ndev, "Failed to unregister filter: %d\n", 1114 err); 1115 1116 return; 1117 } 1118 1119 err = mana_verify_resp_hdr(&resp.hdr, MANA_DEREGISTER_FILTER, 1120 sizeof(resp)); 1121 if (err || resp.hdr.status) 1122 netdev_err(apc->ndev, 1123 "Failed to deregister filter: %d, 0x%x\n", 1124 err, resp.hdr.status); 1125 } 1126 1127 static int mana_query_device_cfg(struct mana_context *ac, u32 proto_major_ver, 1128 u32 proto_minor_ver, u32 proto_micro_ver, 1129 u16 *max_num_vports, u8 *bm_hostmode) 1130 { 1131 struct gdma_context *gc = ac->gdma_dev->gdma_context; 1132 struct mana_query_device_cfg_resp resp = {}; 1133 struct mana_query_device_cfg_req req = {}; 1134 struct device *dev = gc->dev; 1135 int err = 0; 1136 1137 mana_gd_init_req_hdr(&req.hdr, MANA_QUERY_DEV_CONFIG, 1138 sizeof(req), sizeof(resp)); 1139 1140 req.hdr.resp.msg_version = GDMA_MESSAGE_V3; 1141 1142 req.proto_major_ver = proto_major_ver; 1143 req.proto_minor_ver = proto_minor_ver; 1144 req.proto_micro_ver = proto_micro_ver; 1145 1146 err = mana_send_request(ac, &req, sizeof(req), &resp, sizeof(resp)); 1147 if (err) { 1148 dev_err(dev, "Failed to query config: %d", err); 1149 return err; 1150 } 1151 1152 err = mana_verify_resp_hdr(&resp.hdr, MANA_QUERY_DEV_CONFIG, 1153 sizeof(resp)); 1154 if (err || resp.hdr.status) { 1155 dev_err(dev, "Invalid query result: %d, 0x%x\n", err, 1156 resp.hdr.status); 1157 if (!err) 1158 err = -EPROTO; 1159 return err; 1160 } 1161 1162 *max_num_vports = resp.max_num_vports; 1163 1164 if (resp.hdr.response.msg_version >= GDMA_MESSAGE_V2) 1165 gc->adapter_mtu = resp.adapter_mtu; 1166 else 1167 gc->adapter_mtu = ETH_FRAME_LEN; 1168 1169 if (resp.hdr.response.msg_version >= GDMA_MESSAGE_V3) 1170 *bm_hostmode = resp.bm_hostmode; 1171 else 1172 *bm_hostmode = 0; 1173 1174 debugfs_create_u16("adapter-MTU", 0400, gc->mana_pci_debugfs, &gc->adapter_mtu); 1175 1176 return 0; 1177 } 1178 1179 static int mana_query_vport_cfg(struct mana_port_context *apc, u32 vport_index, 1180 u32 *max_sq, u32 *max_rq, u32 *num_indir_entry) 1181 { 1182 struct mana_query_vport_cfg_resp resp = {}; 1183 struct mana_query_vport_cfg_req req = {}; 1184 int err; 1185 1186 mana_gd_init_req_hdr(&req.hdr, MANA_QUERY_VPORT_CONFIG, 1187 sizeof(req), sizeof(resp)); 1188 1189 req.vport_index = vport_index; 1190 1191 err = mana_send_request(apc->ac, &req, sizeof(req), &resp, 1192 sizeof(resp)); 1193 if (err) 1194 return err; 1195 1196 err = mana_verify_resp_hdr(&resp.hdr, MANA_QUERY_VPORT_CONFIG, 1197 sizeof(resp)); 1198 if (err) 1199 return err; 1200 1201 if (resp.hdr.status) 1202 return -EPROTO; 1203 1204 *max_sq = resp.max_num_sq; 1205 *max_rq = resp.max_num_rq; 1206 if (resp.num_indirection_ent > 0 && 1207 resp.num_indirection_ent <= MANA_INDIRECT_TABLE_MAX_SIZE && 1208 is_power_of_2(resp.num_indirection_ent)) { 1209 *num_indir_entry = resp.num_indirection_ent; 1210 } else { 1211 netdev_warn(apc->ndev, 1212 "Setting indirection table size to default %d for vPort %d\n", 1213 MANA_INDIRECT_TABLE_DEF_SIZE, apc->port_idx); 1214 *num_indir_entry = MANA_INDIRECT_TABLE_DEF_SIZE; 1215 } 1216 1217 apc->port_handle = resp.vport; 1218 ether_addr_copy(apc->mac_addr, resp.mac_addr); 1219 1220 return 0; 1221 } 1222 1223 void mana_uncfg_vport(struct mana_port_context *apc) 1224 { 1225 mutex_lock(&apc->vport_mutex); 1226 apc->vport_use_count--; 1227 WARN_ON(apc->vport_use_count < 0); 1228 mutex_unlock(&apc->vport_mutex); 1229 } 1230 EXPORT_SYMBOL_NS(mana_uncfg_vport, "NET_MANA"); 1231 1232 int mana_cfg_vport(struct mana_port_context *apc, u32 protection_dom_id, 1233 u32 doorbell_pg_id) 1234 { 1235 struct mana_config_vport_resp resp = {}; 1236 struct mana_config_vport_req req = {}; 1237 int err; 1238 1239 /* This function is used to program the Ethernet port in the hardware 1240 * table. It can be called from the Ethernet driver or the RDMA driver. 1241 * 1242 * For Ethernet usage, the hardware supports only one active user on a 1243 * physical port. The driver checks on the port usage before programming 1244 * the hardware when creating the RAW QP (RDMA driver) or exposing the 1245 * device to kernel NET layer (Ethernet driver). 1246 * 1247 * Because the RDMA driver doesn't know in advance which QP type the 1248 * user will create, it exposes the device with all its ports. The user 1249 * may not be able to create RAW QP on a port if this port is already 1250 * in used by the Ethernet driver from the kernel. 1251 * 1252 * This physical port limitation only applies to the RAW QP. For RC QP, 1253 * the hardware doesn't have this limitation. The user can create RC 1254 * QPs on a physical port up to the hardware limits independent of the 1255 * Ethernet usage on the same port. 1256 */ 1257 mutex_lock(&apc->vport_mutex); 1258 if (apc->vport_use_count > 0) { 1259 mutex_unlock(&apc->vport_mutex); 1260 return -EBUSY; 1261 } 1262 apc->vport_use_count++; 1263 mutex_unlock(&apc->vport_mutex); 1264 1265 mana_gd_init_req_hdr(&req.hdr, MANA_CONFIG_VPORT_TX, 1266 sizeof(req), sizeof(resp)); 1267 req.vport = apc->port_handle; 1268 req.pdid = protection_dom_id; 1269 req.doorbell_pageid = doorbell_pg_id; 1270 1271 err = mana_send_request(apc->ac, &req, sizeof(req), &resp, 1272 sizeof(resp)); 1273 if (err) { 1274 netdev_err(apc->ndev, "Failed to configure vPort: %d\n", err); 1275 goto out; 1276 } 1277 1278 err = mana_verify_resp_hdr(&resp.hdr, MANA_CONFIG_VPORT_TX, 1279 sizeof(resp)); 1280 if (err || resp.hdr.status) { 1281 netdev_err(apc->ndev, "Failed to configure vPort: %d, 0x%x\n", 1282 err, resp.hdr.status); 1283 if (!err) 1284 err = -EPROTO; 1285 1286 goto out; 1287 } 1288 1289 apc->tx_shortform_allowed = resp.short_form_allowed; 1290 apc->tx_vp_offset = resp.tx_vport_offset; 1291 1292 netdev_info(apc->ndev, "Configured vPort %llu PD %u DB %u\n", 1293 apc->port_handle, protection_dom_id, doorbell_pg_id); 1294 out: 1295 if (err) 1296 mana_uncfg_vport(apc); 1297 1298 return err; 1299 } 1300 EXPORT_SYMBOL_NS(mana_cfg_vport, "NET_MANA"); 1301 1302 static int mana_cfg_vport_steering(struct mana_port_context *apc, 1303 enum TRI_STATE rx, 1304 bool update_default_rxobj, bool update_key, 1305 bool update_tab) 1306 { 1307 struct mana_cfg_rx_steer_req_v2 *req; 1308 struct mana_cfg_rx_steer_resp resp = {}; 1309 struct net_device *ndev = apc->ndev; 1310 u32 req_buf_size; 1311 int err; 1312 1313 req_buf_size = struct_size(req, indir_tab, apc->indir_table_sz); 1314 req = kzalloc(req_buf_size, GFP_KERNEL); 1315 if (!req) 1316 return -ENOMEM; 1317 1318 mana_gd_init_req_hdr(&req->hdr, MANA_CONFIG_VPORT_RX, req_buf_size, 1319 sizeof(resp)); 1320 1321 req->hdr.req.msg_version = GDMA_MESSAGE_V2; 1322 1323 req->vport = apc->port_handle; 1324 req->num_indir_entries = apc->indir_table_sz; 1325 req->indir_tab_offset = offsetof(struct mana_cfg_rx_steer_req_v2, 1326 indir_tab); 1327 req->rx_enable = rx; 1328 req->rss_enable = apc->rss_state; 1329 req->update_default_rxobj = update_default_rxobj; 1330 req->update_hashkey = update_key; 1331 req->update_indir_tab = update_tab; 1332 req->default_rxobj = apc->default_rxobj; 1333 req->cqe_coalescing_enable = 0; 1334 1335 if (update_key) 1336 memcpy(&req->hashkey, apc->hashkey, MANA_HASH_KEY_SIZE); 1337 1338 if (update_tab) 1339 memcpy(req->indir_tab, apc->rxobj_table, 1340 flex_array_size(req, indir_tab, req->num_indir_entries)); 1341 1342 err = mana_send_request(apc->ac, req, req_buf_size, &resp, 1343 sizeof(resp)); 1344 if (err) { 1345 if (mana_en_need_log(apc, err)) 1346 netdev_err(ndev, "Failed to configure vPort RX: %d\n", err); 1347 1348 goto out; 1349 } 1350 1351 err = mana_verify_resp_hdr(&resp.hdr, MANA_CONFIG_VPORT_RX, 1352 sizeof(resp)); 1353 if (err) { 1354 netdev_err(ndev, "vPort RX configuration failed: %d\n", err); 1355 goto out; 1356 } 1357 1358 if (resp.hdr.status) { 1359 netdev_err(ndev, "vPort RX configuration failed: 0x%x\n", 1360 resp.hdr.status); 1361 err = -EPROTO; 1362 } 1363 1364 netdev_info(ndev, "Configured steering vPort %llu entries %u\n", 1365 apc->port_handle, apc->indir_table_sz); 1366 out: 1367 kfree(req); 1368 return err; 1369 } 1370 1371 int mana_query_link_cfg(struct mana_port_context *apc) 1372 { 1373 struct net_device *ndev = apc->ndev; 1374 struct mana_query_link_config_resp resp = {}; 1375 struct mana_query_link_config_req req = {}; 1376 int err; 1377 1378 mana_gd_init_req_hdr(&req.hdr, MANA_QUERY_LINK_CONFIG, 1379 sizeof(req), sizeof(resp)); 1380 1381 req.vport = apc->port_handle; 1382 req.hdr.resp.msg_version = GDMA_MESSAGE_V2; 1383 1384 err = mana_send_request(apc->ac, &req, sizeof(req), &resp, 1385 sizeof(resp)); 1386 1387 if (err) { 1388 if (err == -EOPNOTSUPP) { 1389 netdev_info_once(ndev, "MANA_QUERY_LINK_CONFIG not supported\n"); 1390 return err; 1391 } 1392 netdev_err(ndev, "Failed to query link config: %d\n", err); 1393 return err; 1394 } 1395 1396 err = mana_verify_resp_hdr(&resp.hdr, MANA_QUERY_LINK_CONFIG, 1397 sizeof(resp)); 1398 1399 if (err || resp.hdr.status) { 1400 netdev_err(ndev, "Failed to query link config: %d, 0x%x\n", err, 1401 resp.hdr.status); 1402 if (!err) 1403 err = -EOPNOTSUPP; 1404 return err; 1405 } 1406 1407 if (resp.qos_unconfigured) { 1408 err = -EINVAL; 1409 return err; 1410 } 1411 apc->speed = resp.link_speed_mbps; 1412 apc->max_speed = resp.qos_speed_mbps; 1413 return 0; 1414 } 1415 1416 int mana_set_bw_clamp(struct mana_port_context *apc, u32 speed, 1417 int enable_clamping) 1418 { 1419 struct mana_set_bw_clamp_resp resp = {}; 1420 struct mana_set_bw_clamp_req req = {}; 1421 struct net_device *ndev = apc->ndev; 1422 int err; 1423 1424 mana_gd_init_req_hdr(&req.hdr, MANA_SET_BW_CLAMP, 1425 sizeof(req), sizeof(resp)); 1426 req.vport = apc->port_handle; 1427 req.link_speed_mbps = speed; 1428 req.enable_clamping = enable_clamping; 1429 1430 err = mana_send_request(apc->ac, &req, sizeof(req), &resp, 1431 sizeof(resp)); 1432 1433 if (err) { 1434 if (err == -EOPNOTSUPP) { 1435 netdev_info_once(ndev, "MANA_SET_BW_CLAMP not supported\n"); 1436 return err; 1437 } 1438 netdev_err(ndev, "Failed to set bandwidth clamp for speed %u, err = %d", 1439 speed, err); 1440 return err; 1441 } 1442 1443 err = mana_verify_resp_hdr(&resp.hdr, MANA_SET_BW_CLAMP, 1444 sizeof(resp)); 1445 1446 if (err || resp.hdr.status) { 1447 netdev_err(ndev, "Failed to set bandwidth clamp: %d, 0x%x\n", err, 1448 resp.hdr.status); 1449 if (!err) 1450 err = -EOPNOTSUPP; 1451 return err; 1452 } 1453 1454 if (resp.qos_unconfigured) 1455 netdev_info(ndev, "QoS is unconfigured\n"); 1456 1457 return 0; 1458 } 1459 1460 int mana_create_wq_obj(struct mana_port_context *apc, 1461 mana_handle_t vport, 1462 u32 wq_type, struct mana_obj_spec *wq_spec, 1463 struct mana_obj_spec *cq_spec, 1464 mana_handle_t *wq_obj) 1465 { 1466 struct mana_create_wqobj_resp resp = {}; 1467 struct mana_create_wqobj_req req = {}; 1468 struct net_device *ndev = apc->ndev; 1469 int err; 1470 1471 mana_gd_init_req_hdr(&req.hdr, MANA_CREATE_WQ_OBJ, 1472 sizeof(req), sizeof(resp)); 1473 req.vport = vport; 1474 req.wq_type = wq_type; 1475 req.wq_gdma_region = wq_spec->gdma_region; 1476 req.cq_gdma_region = cq_spec->gdma_region; 1477 req.wq_size = wq_spec->queue_size; 1478 req.cq_size = cq_spec->queue_size; 1479 req.cq_moderation_ctx_id = cq_spec->modr_ctx_id; 1480 req.cq_parent_qid = cq_spec->attached_eq; 1481 1482 err = mana_send_request(apc->ac, &req, sizeof(req), &resp, 1483 sizeof(resp)); 1484 if (err) { 1485 netdev_err(ndev, "Failed to create WQ object: %d\n", err); 1486 goto out; 1487 } 1488 1489 err = mana_verify_resp_hdr(&resp.hdr, MANA_CREATE_WQ_OBJ, 1490 sizeof(resp)); 1491 if (err || resp.hdr.status) { 1492 netdev_err(ndev, "Failed to create WQ object: %d, 0x%x\n", err, 1493 resp.hdr.status); 1494 if (!err) 1495 err = -EPROTO; 1496 goto out; 1497 } 1498 1499 if (resp.wq_obj == INVALID_MANA_HANDLE) { 1500 netdev_err(ndev, "Got an invalid WQ object handle\n"); 1501 err = -EPROTO; 1502 goto out; 1503 } 1504 1505 *wq_obj = resp.wq_obj; 1506 wq_spec->queue_index = resp.wq_id; 1507 cq_spec->queue_index = resp.cq_id; 1508 1509 return 0; 1510 out: 1511 return err; 1512 } 1513 EXPORT_SYMBOL_NS(mana_create_wq_obj, "NET_MANA"); 1514 1515 void mana_destroy_wq_obj(struct mana_port_context *apc, u32 wq_type, 1516 mana_handle_t wq_obj) 1517 { 1518 struct mana_destroy_wqobj_resp resp = {}; 1519 struct mana_destroy_wqobj_req req = {}; 1520 struct net_device *ndev = apc->ndev; 1521 int err; 1522 1523 mana_gd_init_req_hdr(&req.hdr, MANA_DESTROY_WQ_OBJ, 1524 sizeof(req), sizeof(resp)); 1525 req.wq_type = wq_type; 1526 req.wq_obj_handle = wq_obj; 1527 1528 err = mana_send_request(apc->ac, &req, sizeof(req), &resp, 1529 sizeof(resp)); 1530 if (err) { 1531 if (mana_en_need_log(apc, err)) 1532 netdev_err(ndev, "Failed to destroy WQ object: %d\n", err); 1533 1534 return; 1535 } 1536 1537 err = mana_verify_resp_hdr(&resp.hdr, MANA_DESTROY_WQ_OBJ, 1538 sizeof(resp)); 1539 if (err || resp.hdr.status) 1540 netdev_err(ndev, "Failed to destroy WQ object: %d, 0x%x\n", err, 1541 resp.hdr.status); 1542 } 1543 EXPORT_SYMBOL_NS(mana_destroy_wq_obj, "NET_MANA"); 1544 1545 static void mana_destroy_eq(struct mana_context *ac) 1546 { 1547 struct gdma_context *gc = ac->gdma_dev->gdma_context; 1548 struct gdma_queue *eq; 1549 int i; 1550 1551 if (!ac->eqs) 1552 return; 1553 1554 debugfs_remove_recursive(ac->mana_eqs_debugfs); 1555 ac->mana_eqs_debugfs = NULL; 1556 1557 for (i = 0; i < gc->max_num_queues; i++) { 1558 eq = ac->eqs[i].eq; 1559 if (!eq) 1560 continue; 1561 1562 mana_gd_destroy_queue(gc, eq); 1563 } 1564 1565 kfree(ac->eqs); 1566 ac->eqs = NULL; 1567 } 1568 1569 static void mana_create_eq_debugfs(struct mana_context *ac, int i) 1570 { 1571 struct mana_eq eq = ac->eqs[i]; 1572 char eqnum[32]; 1573 1574 sprintf(eqnum, "eq%d", i); 1575 eq.mana_eq_debugfs = debugfs_create_dir(eqnum, ac->mana_eqs_debugfs); 1576 debugfs_create_u32("head", 0400, eq.mana_eq_debugfs, &eq.eq->head); 1577 debugfs_create_u32("tail", 0400, eq.mana_eq_debugfs, &eq.eq->tail); 1578 debugfs_create_file("eq_dump", 0400, eq.mana_eq_debugfs, eq.eq, &mana_dbg_q_fops); 1579 } 1580 1581 static int mana_create_eq(struct mana_context *ac) 1582 { 1583 struct gdma_dev *gd = ac->gdma_dev; 1584 struct gdma_context *gc = gd->gdma_context; 1585 struct gdma_queue_spec spec = {}; 1586 int err; 1587 int i; 1588 1589 ac->eqs = kcalloc(gc->max_num_queues, sizeof(struct mana_eq), 1590 GFP_KERNEL); 1591 if (!ac->eqs) 1592 return -ENOMEM; 1593 1594 spec.type = GDMA_EQ; 1595 spec.monitor_avl_buf = false; 1596 spec.queue_size = EQ_SIZE; 1597 spec.eq.callback = NULL; 1598 spec.eq.context = ac->eqs; 1599 spec.eq.log2_throttle_limit = LOG2_EQ_THROTTLE; 1600 1601 ac->mana_eqs_debugfs = debugfs_create_dir("EQs", gc->mana_pci_debugfs); 1602 1603 for (i = 0; i < gc->max_num_queues; i++) { 1604 spec.eq.msix_index = (i + 1) % gc->num_msix_usable; 1605 err = mana_gd_create_mana_eq(gd, &spec, &ac->eqs[i].eq); 1606 if (err) { 1607 dev_err(gc->dev, "Failed to create EQ %d : %d\n", i, err); 1608 goto out; 1609 } 1610 mana_create_eq_debugfs(ac, i); 1611 } 1612 1613 return 0; 1614 out: 1615 mana_destroy_eq(ac); 1616 return err; 1617 } 1618 1619 static int mana_fence_rq(struct mana_port_context *apc, struct mana_rxq *rxq) 1620 { 1621 struct mana_fence_rq_resp resp = {}; 1622 struct mana_fence_rq_req req = {}; 1623 int err; 1624 1625 init_completion(&rxq->fence_event); 1626 1627 mana_gd_init_req_hdr(&req.hdr, MANA_FENCE_RQ, 1628 sizeof(req), sizeof(resp)); 1629 req.wq_obj_handle = rxq->rxobj; 1630 1631 err = mana_send_request(apc->ac, &req, sizeof(req), &resp, 1632 sizeof(resp)); 1633 if (err) { 1634 netdev_err(apc->ndev, "Failed to fence RQ %u: %d\n", 1635 rxq->rxq_idx, err); 1636 return err; 1637 } 1638 1639 err = mana_verify_resp_hdr(&resp.hdr, MANA_FENCE_RQ, sizeof(resp)); 1640 if (err || resp.hdr.status) { 1641 netdev_err(apc->ndev, "Failed to fence RQ %u: %d, 0x%x\n", 1642 rxq->rxq_idx, err, resp.hdr.status); 1643 if (!err) 1644 err = -EPROTO; 1645 1646 return err; 1647 } 1648 1649 if (wait_for_completion_timeout(&rxq->fence_event, 10 * HZ) == 0) { 1650 netdev_err(apc->ndev, "Failed to fence RQ %u: timed out\n", 1651 rxq->rxq_idx); 1652 return -ETIMEDOUT; 1653 } 1654 1655 return 0; 1656 } 1657 1658 static void mana_fence_rqs(struct mana_port_context *apc) 1659 { 1660 unsigned int rxq_idx; 1661 struct mana_rxq *rxq; 1662 int err; 1663 1664 for (rxq_idx = 0; rxq_idx < apc->num_queues; rxq_idx++) { 1665 rxq = apc->rxqs[rxq_idx]; 1666 err = mana_fence_rq(apc, rxq); 1667 1668 /* In case of any error, use sleep instead. */ 1669 if (err) 1670 msleep(100); 1671 } 1672 } 1673 1674 static int mana_move_wq_tail(struct gdma_queue *wq, u32 num_units) 1675 { 1676 u32 used_space_old; 1677 u32 used_space_new; 1678 1679 used_space_old = wq->head - wq->tail; 1680 used_space_new = wq->head - (wq->tail + num_units); 1681 1682 if (WARN_ON_ONCE(used_space_new > used_space_old)) 1683 return -ERANGE; 1684 1685 wq->tail += num_units; 1686 return 0; 1687 } 1688 1689 void mana_unmap_skb(struct sk_buff *skb, struct mana_port_context *apc) 1690 { 1691 struct mana_skb_head *ash = (struct mana_skb_head *)skb->head; 1692 struct gdma_context *gc = apc->ac->gdma_dev->gdma_context; 1693 struct device *dev = gc->dev; 1694 int hsg, i; 1695 1696 /* Number of SGEs of linear part */ 1697 hsg = (skb_is_gso(skb) && skb_headlen(skb) > ash->size[0]) ? 2 : 1; 1698 1699 for (i = 0; i < hsg; i++) 1700 dma_unmap_single(dev, ash->dma_handle[i], ash->size[i], 1701 DMA_TO_DEVICE); 1702 1703 for (i = hsg; i < skb_shinfo(skb)->nr_frags + hsg; i++) 1704 dma_unmap_page(dev, ash->dma_handle[i], ash->size[i], 1705 DMA_TO_DEVICE); 1706 } 1707 1708 static void mana_poll_tx_cq(struct mana_cq *cq) 1709 { 1710 struct gdma_comp *completions = cq->gdma_comp_buf; 1711 struct gdma_posted_wqe_info *wqe_info; 1712 unsigned int pkt_transmitted = 0; 1713 unsigned int wqe_unit_cnt = 0; 1714 struct mana_txq *txq = cq->txq; 1715 struct mana_port_context *apc; 1716 struct netdev_queue *net_txq; 1717 struct gdma_queue *gdma_wq; 1718 unsigned int avail_space; 1719 struct net_device *ndev; 1720 struct sk_buff *skb; 1721 bool txq_stopped; 1722 int comp_read; 1723 int i; 1724 1725 ndev = txq->ndev; 1726 apc = netdev_priv(ndev); 1727 1728 comp_read = mana_gd_poll_cq(cq->gdma_cq, completions, 1729 CQE_POLLING_BUFFER); 1730 1731 if (comp_read < 1) 1732 return; 1733 1734 for (i = 0; i < comp_read; i++) { 1735 struct mana_tx_comp_oob *cqe_oob; 1736 1737 if (WARN_ON_ONCE(!completions[i].is_sq)) 1738 return; 1739 1740 cqe_oob = (struct mana_tx_comp_oob *)completions[i].cqe_data; 1741 if (WARN_ON_ONCE(cqe_oob->cqe_hdr.client_type != 1742 MANA_CQE_COMPLETION)) 1743 return; 1744 1745 switch (cqe_oob->cqe_hdr.cqe_type) { 1746 case CQE_TX_OKAY: 1747 break; 1748 1749 case CQE_TX_SA_DROP: 1750 case CQE_TX_MTU_DROP: 1751 case CQE_TX_INVALID_OOB: 1752 case CQE_TX_INVALID_ETH_TYPE: 1753 case CQE_TX_HDR_PROCESSING_ERROR: 1754 case CQE_TX_VF_DISABLED: 1755 case CQE_TX_VPORT_IDX_OUT_OF_RANGE: 1756 case CQE_TX_VPORT_DISABLED: 1757 case CQE_TX_VLAN_TAGGING_VIOLATION: 1758 if (net_ratelimit()) 1759 netdev_err(ndev, "TX: CQE error %d\n", 1760 cqe_oob->cqe_hdr.cqe_type); 1761 1762 apc->eth_stats.tx_cqe_err++; 1763 break; 1764 1765 default: 1766 /* If the CQE type is unknown, log an error, 1767 * and still free the SKB, update tail, etc. 1768 */ 1769 if (net_ratelimit()) 1770 netdev_err(ndev, "TX: unknown CQE type %d\n", 1771 cqe_oob->cqe_hdr.cqe_type); 1772 1773 apc->eth_stats.tx_cqe_unknown_type++; 1774 break; 1775 } 1776 1777 if (WARN_ON_ONCE(txq->gdma_txq_id != completions[i].wq_num)) 1778 return; 1779 1780 skb = skb_dequeue(&txq->pending_skbs); 1781 if (WARN_ON_ONCE(!skb)) 1782 return; 1783 1784 wqe_info = (struct gdma_posted_wqe_info *)skb->cb; 1785 wqe_unit_cnt += wqe_info->wqe_size_in_bu; 1786 1787 mana_unmap_skb(skb, apc); 1788 1789 napi_consume_skb(skb, cq->budget); 1790 1791 pkt_transmitted++; 1792 } 1793 1794 if (WARN_ON_ONCE(wqe_unit_cnt == 0)) 1795 return; 1796 1797 mana_move_wq_tail(txq->gdma_sq, wqe_unit_cnt); 1798 1799 gdma_wq = txq->gdma_sq; 1800 avail_space = mana_gd_wq_avail_space(gdma_wq); 1801 1802 /* Ensure tail updated before checking q stop */ 1803 smp_mb(); 1804 1805 net_txq = txq->net_txq; 1806 txq_stopped = netif_tx_queue_stopped(net_txq); 1807 1808 /* Ensure checking txq_stopped before apc->port_is_up. */ 1809 smp_rmb(); 1810 1811 if (txq_stopped && apc->port_is_up && avail_space >= MAX_TX_WQE_SIZE) { 1812 netif_tx_wake_queue(net_txq); 1813 apc->eth_stats.wake_queue++; 1814 } 1815 1816 if (atomic_sub_return(pkt_transmitted, &txq->pending_sends) < 0) 1817 WARN_ON_ONCE(1); 1818 1819 cq->work_done = pkt_transmitted; 1820 } 1821 1822 static void mana_post_pkt_rxq(struct mana_rxq *rxq) 1823 { 1824 struct mana_recv_buf_oob *recv_buf_oob; 1825 u32 curr_index; 1826 int err; 1827 1828 curr_index = rxq->buf_index++; 1829 if (rxq->buf_index == rxq->num_rx_buf) 1830 rxq->buf_index = 0; 1831 1832 recv_buf_oob = &rxq->rx_oobs[curr_index]; 1833 1834 err = mana_gd_post_work_request(rxq->gdma_rq, &recv_buf_oob->wqe_req, 1835 &recv_buf_oob->wqe_inf); 1836 if (WARN_ON_ONCE(err)) 1837 return; 1838 1839 WARN_ON_ONCE(recv_buf_oob->wqe_inf.wqe_size_in_bu != 1); 1840 } 1841 1842 static struct sk_buff *mana_build_skb(struct mana_rxq *rxq, void *buf_va, 1843 uint pkt_len, struct xdp_buff *xdp) 1844 { 1845 struct sk_buff *skb = napi_build_skb(buf_va, rxq->alloc_size); 1846 1847 if (!skb) 1848 return NULL; 1849 1850 if (xdp->data_hard_start) { 1851 u32 metasize = xdp->data - xdp->data_meta; 1852 1853 skb_reserve(skb, xdp->data - xdp->data_hard_start); 1854 skb_put(skb, xdp->data_end - xdp->data); 1855 if (metasize) 1856 skb_metadata_set(skb, metasize); 1857 return skb; 1858 } 1859 1860 skb_reserve(skb, rxq->headroom); 1861 skb_put(skb, pkt_len); 1862 1863 return skb; 1864 } 1865 1866 static void mana_rx_skb(void *buf_va, bool from_pool, 1867 struct mana_rxcomp_oob *cqe, struct mana_rxq *rxq) 1868 { 1869 struct mana_stats_rx *rx_stats = &rxq->stats; 1870 struct net_device *ndev = rxq->ndev; 1871 uint pkt_len = cqe->ppi[0].pkt_len; 1872 u16 rxq_idx = rxq->rxq_idx; 1873 struct napi_struct *napi; 1874 struct xdp_buff xdp = {}; 1875 struct sk_buff *skb; 1876 u32 hash_value; 1877 u32 act; 1878 1879 rxq->rx_cq.work_done++; 1880 napi = &rxq->rx_cq.napi; 1881 1882 if (!buf_va) { 1883 ++ndev->stats.rx_dropped; 1884 return; 1885 } 1886 1887 act = mana_run_xdp(ndev, rxq, &xdp, buf_va, pkt_len); 1888 1889 if (act == XDP_REDIRECT && !rxq->xdp_rc) 1890 return; 1891 1892 if (act != XDP_PASS && act != XDP_TX) 1893 goto drop_xdp; 1894 1895 skb = mana_build_skb(rxq, buf_va, pkt_len, &xdp); 1896 1897 if (!skb) 1898 goto drop; 1899 1900 if (from_pool) 1901 skb_mark_for_recycle(skb); 1902 1903 skb->dev = napi->dev; 1904 1905 skb->protocol = eth_type_trans(skb, ndev); 1906 skb_checksum_none_assert(skb); 1907 skb_record_rx_queue(skb, rxq_idx); 1908 1909 if ((ndev->features & NETIF_F_RXCSUM) && cqe->rx_iphdr_csum_succeed) { 1910 if (cqe->rx_tcp_csum_succeed || cqe->rx_udp_csum_succeed) 1911 skb->ip_summed = CHECKSUM_UNNECESSARY; 1912 } 1913 1914 if (cqe->rx_hashtype != 0 && (ndev->features & NETIF_F_RXHASH)) { 1915 hash_value = cqe->ppi[0].pkt_hash; 1916 1917 if (cqe->rx_hashtype & MANA_HASH_L4) 1918 skb_set_hash(skb, hash_value, PKT_HASH_TYPE_L4); 1919 else 1920 skb_set_hash(skb, hash_value, PKT_HASH_TYPE_L3); 1921 } 1922 1923 if (cqe->rx_vlantag_present) { 1924 u16 vlan_tci = cqe->rx_vlan_id; 1925 1926 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tci); 1927 } 1928 1929 u64_stats_update_begin(&rx_stats->syncp); 1930 rx_stats->packets++; 1931 rx_stats->bytes += pkt_len; 1932 1933 if (act == XDP_TX) 1934 rx_stats->xdp_tx++; 1935 u64_stats_update_end(&rx_stats->syncp); 1936 1937 if (act == XDP_TX) { 1938 skb_set_queue_mapping(skb, rxq_idx); 1939 mana_xdp_tx(skb, ndev); 1940 return; 1941 } 1942 1943 napi_gro_receive(napi, skb); 1944 1945 return; 1946 1947 drop_xdp: 1948 u64_stats_update_begin(&rx_stats->syncp); 1949 rx_stats->xdp_drop++; 1950 u64_stats_update_end(&rx_stats->syncp); 1951 1952 drop: 1953 if (from_pool) { 1954 if (rxq->frag_count == 1) 1955 page_pool_recycle_direct(rxq->page_pool, 1956 virt_to_head_page(buf_va)); 1957 else 1958 page_pool_free_va(rxq->page_pool, buf_va, true); 1959 } else { 1960 WARN_ON_ONCE(rxq->xdp_save_va); 1961 /* Save for reuse */ 1962 rxq->xdp_save_va = buf_va; 1963 } 1964 1965 ++ndev->stats.rx_dropped; 1966 1967 return; 1968 } 1969 1970 static void *mana_get_rxfrag(struct mana_rxq *rxq, struct device *dev, 1971 dma_addr_t *da, bool *from_pool) 1972 { 1973 struct page *page; 1974 u32 offset; 1975 void *va; 1976 *from_pool = false; 1977 1978 /* Don't use fragments for jumbo frames or XDP where it's 1 fragment 1979 * per page. 1980 */ 1981 if (rxq->frag_count == 1) { 1982 /* Reuse XDP dropped page if available */ 1983 if (rxq->xdp_save_va) { 1984 va = rxq->xdp_save_va; 1985 page = virt_to_head_page(va); 1986 rxq->xdp_save_va = NULL; 1987 } else { 1988 page = page_pool_dev_alloc_pages(rxq->page_pool); 1989 if (!page) 1990 return NULL; 1991 1992 *from_pool = true; 1993 va = page_to_virt(page); 1994 } 1995 1996 *da = dma_map_single(dev, va + rxq->headroom, rxq->datasize, 1997 DMA_FROM_DEVICE); 1998 if (dma_mapping_error(dev, *da)) { 1999 mana_put_rx_page(rxq, page, *from_pool); 2000 return NULL; 2001 } 2002 2003 return va; 2004 } 2005 2006 page = page_pool_dev_alloc_frag(rxq->page_pool, &offset, 2007 rxq->alloc_size); 2008 if (!page) 2009 return NULL; 2010 2011 va = page_to_virt(page) + offset; 2012 *da = page_pool_get_dma_addr(page) + offset + rxq->headroom; 2013 *from_pool = true; 2014 2015 return va; 2016 } 2017 2018 /* Allocate frag for rx buffer, and save the old buf */ 2019 static void mana_refill_rx_oob(struct device *dev, struct mana_rxq *rxq, 2020 struct mana_recv_buf_oob *rxoob, void **old_buf, 2021 bool *old_fp) 2022 { 2023 bool from_pool; 2024 dma_addr_t da; 2025 void *va; 2026 2027 va = mana_get_rxfrag(rxq, dev, &da, &from_pool); 2028 if (!va) 2029 return; 2030 if (!rxoob->from_pool || rxq->frag_count == 1) 2031 dma_unmap_single(dev, rxoob->sgl[0].address, rxq->datasize, 2032 DMA_FROM_DEVICE); 2033 *old_buf = rxoob->buf_va; 2034 *old_fp = rxoob->from_pool; 2035 2036 rxoob->buf_va = va; 2037 rxoob->sgl[0].address = da; 2038 rxoob->from_pool = from_pool; 2039 } 2040 2041 static void mana_process_rx_cqe(struct mana_rxq *rxq, struct mana_cq *cq, 2042 struct gdma_comp *cqe) 2043 { 2044 struct mana_rxcomp_oob *oob = (struct mana_rxcomp_oob *)cqe->cqe_data; 2045 struct gdma_context *gc = rxq->gdma_rq->gdma_dev->gdma_context; 2046 struct net_device *ndev = rxq->ndev; 2047 struct mana_recv_buf_oob *rxbuf_oob; 2048 struct mana_port_context *apc; 2049 struct device *dev = gc->dev; 2050 void *old_buf = NULL; 2051 u32 curr, pktlen; 2052 bool old_fp; 2053 2054 apc = netdev_priv(ndev); 2055 2056 switch (oob->cqe_hdr.cqe_type) { 2057 case CQE_RX_OKAY: 2058 break; 2059 2060 case CQE_RX_TRUNCATED: 2061 ++ndev->stats.rx_dropped; 2062 rxbuf_oob = &rxq->rx_oobs[rxq->buf_index]; 2063 netdev_warn_once(ndev, "Dropped a truncated packet\n"); 2064 goto drop; 2065 2066 case CQE_RX_COALESCED_4: 2067 netdev_err(ndev, "RX coalescing is unsupported\n"); 2068 apc->eth_stats.rx_coalesced_err++; 2069 return; 2070 2071 case CQE_RX_OBJECT_FENCE: 2072 complete(&rxq->fence_event); 2073 return; 2074 2075 default: 2076 netdev_err(ndev, "Unknown RX CQE type = %d\n", 2077 oob->cqe_hdr.cqe_type); 2078 apc->eth_stats.rx_cqe_unknown_type++; 2079 return; 2080 } 2081 2082 pktlen = oob->ppi[0].pkt_len; 2083 2084 if (pktlen == 0) { 2085 /* data packets should never have packetlength of zero */ 2086 netdev_err(ndev, "RX pkt len=0, rq=%u, cq=%u, rxobj=0x%llx\n", 2087 rxq->gdma_id, cq->gdma_id, rxq->rxobj); 2088 return; 2089 } 2090 2091 curr = rxq->buf_index; 2092 rxbuf_oob = &rxq->rx_oobs[curr]; 2093 WARN_ON_ONCE(rxbuf_oob->wqe_inf.wqe_size_in_bu != 1); 2094 2095 mana_refill_rx_oob(dev, rxq, rxbuf_oob, &old_buf, &old_fp); 2096 2097 /* Unsuccessful refill will have old_buf == NULL. 2098 * In this case, mana_rx_skb() will drop the packet. 2099 */ 2100 mana_rx_skb(old_buf, old_fp, oob, rxq); 2101 2102 drop: 2103 mana_move_wq_tail(rxq->gdma_rq, rxbuf_oob->wqe_inf.wqe_size_in_bu); 2104 2105 mana_post_pkt_rxq(rxq); 2106 } 2107 2108 static void mana_poll_rx_cq(struct mana_cq *cq) 2109 { 2110 struct gdma_comp *comp = cq->gdma_comp_buf; 2111 struct mana_rxq *rxq = cq->rxq; 2112 int comp_read, i; 2113 2114 comp_read = mana_gd_poll_cq(cq->gdma_cq, comp, CQE_POLLING_BUFFER); 2115 WARN_ON_ONCE(comp_read > CQE_POLLING_BUFFER); 2116 2117 rxq->xdp_flush = false; 2118 2119 for (i = 0; i < comp_read; i++) { 2120 if (WARN_ON_ONCE(comp[i].is_sq)) 2121 return; 2122 2123 /* verify recv cqe references the right rxq */ 2124 if (WARN_ON_ONCE(comp[i].wq_num != cq->rxq->gdma_id)) 2125 return; 2126 2127 mana_process_rx_cqe(rxq, cq, &comp[i]); 2128 } 2129 2130 if (comp_read > 0) { 2131 struct gdma_context *gc = rxq->gdma_rq->gdma_dev->gdma_context; 2132 2133 mana_gd_wq_ring_doorbell(gc, rxq->gdma_rq); 2134 } 2135 2136 if (rxq->xdp_flush) 2137 xdp_do_flush(); 2138 } 2139 2140 static int mana_cq_handler(void *context, struct gdma_queue *gdma_queue) 2141 { 2142 struct mana_cq *cq = context; 2143 int w; 2144 2145 WARN_ON_ONCE(cq->gdma_cq != gdma_queue); 2146 2147 if (cq->type == MANA_CQ_TYPE_RX) 2148 mana_poll_rx_cq(cq); 2149 else 2150 mana_poll_tx_cq(cq); 2151 2152 w = cq->work_done; 2153 cq->work_done_since_doorbell += w; 2154 2155 if (w < cq->budget) { 2156 mana_gd_ring_cq(gdma_queue, SET_ARM_BIT); 2157 cq->work_done_since_doorbell = 0; 2158 napi_complete_done(&cq->napi, w); 2159 } else if (cq->work_done_since_doorbell > 2160 cq->gdma_cq->queue_size / COMP_ENTRY_SIZE * 4) { 2161 /* MANA hardware requires at least one doorbell ring every 8 2162 * wraparounds of CQ even if there is no need to arm the CQ. 2163 * This driver rings the doorbell as soon as we have exceeded 2164 * 4 wraparounds. 2165 */ 2166 mana_gd_ring_cq(gdma_queue, 0); 2167 cq->work_done_since_doorbell = 0; 2168 } 2169 2170 return w; 2171 } 2172 2173 static int mana_poll(struct napi_struct *napi, int budget) 2174 { 2175 struct mana_cq *cq = container_of(napi, struct mana_cq, napi); 2176 int w; 2177 2178 cq->work_done = 0; 2179 cq->budget = budget; 2180 2181 w = mana_cq_handler(cq, cq->gdma_cq); 2182 2183 return min(w, budget); 2184 } 2185 2186 static void mana_schedule_napi(void *context, struct gdma_queue *gdma_queue) 2187 { 2188 struct mana_cq *cq = context; 2189 2190 napi_schedule_irqoff(&cq->napi); 2191 } 2192 2193 static void mana_deinit_cq(struct mana_port_context *apc, struct mana_cq *cq) 2194 { 2195 struct gdma_dev *gd = apc->ac->gdma_dev; 2196 2197 if (!cq->gdma_cq) 2198 return; 2199 2200 mana_gd_destroy_queue(gd->gdma_context, cq->gdma_cq); 2201 } 2202 2203 static void mana_deinit_txq(struct mana_port_context *apc, struct mana_txq *txq) 2204 { 2205 struct gdma_dev *gd = apc->ac->gdma_dev; 2206 2207 if (!txq->gdma_sq) 2208 return; 2209 2210 mana_gd_destroy_queue(gd->gdma_context, txq->gdma_sq); 2211 } 2212 2213 static void mana_destroy_txq(struct mana_port_context *apc) 2214 { 2215 struct napi_struct *napi; 2216 int i; 2217 2218 if (!apc->tx_qp) 2219 return; 2220 2221 for (i = 0; i < apc->num_queues; i++) { 2222 debugfs_remove_recursive(apc->tx_qp[i].mana_tx_debugfs); 2223 apc->tx_qp[i].mana_tx_debugfs = NULL; 2224 2225 napi = &apc->tx_qp[i].tx_cq.napi; 2226 if (apc->tx_qp[i].txq.napi_initialized) { 2227 napi_synchronize(napi); 2228 napi_disable_locked(napi); 2229 netif_napi_del_locked(napi); 2230 apc->tx_qp[i].txq.napi_initialized = false; 2231 } 2232 mana_destroy_wq_obj(apc, GDMA_SQ, apc->tx_qp[i].tx_object); 2233 2234 mana_deinit_cq(apc, &apc->tx_qp[i].tx_cq); 2235 2236 mana_deinit_txq(apc, &apc->tx_qp[i].txq); 2237 } 2238 2239 kfree(apc->tx_qp); 2240 apc->tx_qp = NULL; 2241 } 2242 2243 static void mana_create_txq_debugfs(struct mana_port_context *apc, int idx) 2244 { 2245 struct mana_tx_qp *tx_qp = &apc->tx_qp[idx]; 2246 char qnum[32]; 2247 2248 sprintf(qnum, "TX-%d", idx); 2249 tx_qp->mana_tx_debugfs = debugfs_create_dir(qnum, apc->mana_port_debugfs); 2250 debugfs_create_u32("sq_head", 0400, tx_qp->mana_tx_debugfs, 2251 &tx_qp->txq.gdma_sq->head); 2252 debugfs_create_u32("sq_tail", 0400, tx_qp->mana_tx_debugfs, 2253 &tx_qp->txq.gdma_sq->tail); 2254 debugfs_create_u32("sq_pend_skb_qlen", 0400, tx_qp->mana_tx_debugfs, 2255 &tx_qp->txq.pending_skbs.qlen); 2256 debugfs_create_u32("cq_head", 0400, tx_qp->mana_tx_debugfs, 2257 &tx_qp->tx_cq.gdma_cq->head); 2258 debugfs_create_u32("cq_tail", 0400, tx_qp->mana_tx_debugfs, 2259 &tx_qp->tx_cq.gdma_cq->tail); 2260 debugfs_create_u32("cq_budget", 0400, tx_qp->mana_tx_debugfs, 2261 &tx_qp->tx_cq.budget); 2262 debugfs_create_file("txq_dump", 0400, tx_qp->mana_tx_debugfs, 2263 tx_qp->txq.gdma_sq, &mana_dbg_q_fops); 2264 debugfs_create_file("cq_dump", 0400, tx_qp->mana_tx_debugfs, 2265 tx_qp->tx_cq.gdma_cq, &mana_dbg_q_fops); 2266 } 2267 2268 static int mana_create_txq(struct mana_port_context *apc, 2269 struct net_device *net) 2270 { 2271 struct mana_context *ac = apc->ac; 2272 struct gdma_dev *gd = ac->gdma_dev; 2273 struct mana_obj_spec wq_spec; 2274 struct mana_obj_spec cq_spec; 2275 struct gdma_queue_spec spec; 2276 struct gdma_context *gc; 2277 struct mana_txq *txq; 2278 struct mana_cq *cq; 2279 u32 txq_size; 2280 u32 cq_size; 2281 int err; 2282 int i; 2283 2284 apc->tx_qp = kcalloc(apc->num_queues, sizeof(struct mana_tx_qp), 2285 GFP_KERNEL); 2286 if (!apc->tx_qp) 2287 return -ENOMEM; 2288 2289 /* The minimum size of the WQE is 32 bytes, hence 2290 * apc->tx_queue_size represents the maximum number of WQEs 2291 * the SQ can store. This value is then used to size other queues 2292 * to prevent overflow. 2293 * Also note that the txq_size is always going to be MANA_PAGE_ALIGNED, 2294 * as min val of apc->tx_queue_size is 128 and that would make 2295 * txq_size 128*32 = 4096 and the other higher values of apc->tx_queue_size 2296 * are always power of two 2297 */ 2298 txq_size = apc->tx_queue_size * 32; 2299 2300 cq_size = apc->tx_queue_size * COMP_ENTRY_SIZE; 2301 2302 gc = gd->gdma_context; 2303 2304 for (i = 0; i < apc->num_queues; i++) { 2305 apc->tx_qp[i].tx_object = INVALID_MANA_HANDLE; 2306 2307 /* Create SQ */ 2308 txq = &apc->tx_qp[i].txq; 2309 2310 u64_stats_init(&txq->stats.syncp); 2311 txq->ndev = net; 2312 txq->net_txq = netdev_get_tx_queue(net, i); 2313 txq->vp_offset = apc->tx_vp_offset; 2314 txq->napi_initialized = false; 2315 skb_queue_head_init(&txq->pending_skbs); 2316 2317 memset(&spec, 0, sizeof(spec)); 2318 spec.type = GDMA_SQ; 2319 spec.monitor_avl_buf = true; 2320 spec.queue_size = txq_size; 2321 err = mana_gd_create_mana_wq_cq(gd, &spec, &txq->gdma_sq); 2322 if (err) 2323 goto out; 2324 2325 /* Create SQ's CQ */ 2326 cq = &apc->tx_qp[i].tx_cq; 2327 cq->type = MANA_CQ_TYPE_TX; 2328 2329 cq->txq = txq; 2330 2331 memset(&spec, 0, sizeof(spec)); 2332 spec.type = GDMA_CQ; 2333 spec.monitor_avl_buf = false; 2334 spec.queue_size = cq_size; 2335 spec.cq.callback = mana_schedule_napi; 2336 spec.cq.parent_eq = ac->eqs[i].eq; 2337 spec.cq.context = cq; 2338 err = mana_gd_create_mana_wq_cq(gd, &spec, &cq->gdma_cq); 2339 if (err) 2340 goto out; 2341 2342 memset(&wq_spec, 0, sizeof(wq_spec)); 2343 memset(&cq_spec, 0, sizeof(cq_spec)); 2344 2345 wq_spec.gdma_region = txq->gdma_sq->mem_info.dma_region_handle; 2346 wq_spec.queue_size = txq->gdma_sq->queue_size; 2347 2348 cq_spec.gdma_region = cq->gdma_cq->mem_info.dma_region_handle; 2349 cq_spec.queue_size = cq->gdma_cq->queue_size; 2350 cq_spec.modr_ctx_id = 0; 2351 cq_spec.attached_eq = cq->gdma_cq->cq.parent->id; 2352 2353 err = mana_create_wq_obj(apc, apc->port_handle, GDMA_SQ, 2354 &wq_spec, &cq_spec, 2355 &apc->tx_qp[i].tx_object); 2356 2357 if (err) 2358 goto out; 2359 2360 txq->gdma_sq->id = wq_spec.queue_index; 2361 cq->gdma_cq->id = cq_spec.queue_index; 2362 2363 txq->gdma_sq->mem_info.dma_region_handle = 2364 GDMA_INVALID_DMA_REGION; 2365 cq->gdma_cq->mem_info.dma_region_handle = 2366 GDMA_INVALID_DMA_REGION; 2367 2368 txq->gdma_txq_id = txq->gdma_sq->id; 2369 2370 cq->gdma_id = cq->gdma_cq->id; 2371 2372 if (WARN_ON(cq->gdma_id >= gc->max_num_cqs)) { 2373 err = -EINVAL; 2374 goto out; 2375 } 2376 2377 gc->cq_table[cq->gdma_id] = cq->gdma_cq; 2378 2379 mana_create_txq_debugfs(apc, i); 2380 2381 set_bit(NAPI_STATE_NO_BUSY_POLL, &cq->napi.state); 2382 netif_napi_add_locked(net, &cq->napi, mana_poll); 2383 napi_enable_locked(&cq->napi); 2384 txq->napi_initialized = true; 2385 2386 mana_gd_ring_cq(cq->gdma_cq, SET_ARM_BIT); 2387 } 2388 2389 return 0; 2390 out: 2391 netdev_err(net, "Failed to create %d TX queues, %d\n", 2392 apc->num_queues, err); 2393 mana_destroy_txq(apc); 2394 return err; 2395 } 2396 2397 static void mana_destroy_rxq(struct mana_port_context *apc, 2398 struct mana_rxq *rxq, bool napi_initialized) 2399 2400 { 2401 struct gdma_context *gc = apc->ac->gdma_dev->gdma_context; 2402 struct mana_recv_buf_oob *rx_oob; 2403 struct device *dev = gc->dev; 2404 struct napi_struct *napi; 2405 struct page *page; 2406 int i; 2407 2408 if (!rxq) 2409 return; 2410 2411 debugfs_remove_recursive(rxq->mana_rx_debugfs); 2412 rxq->mana_rx_debugfs = NULL; 2413 2414 napi = &rxq->rx_cq.napi; 2415 2416 if (napi_initialized) { 2417 napi_synchronize(napi); 2418 2419 napi_disable_locked(napi); 2420 netif_napi_del_locked(napi); 2421 } 2422 xdp_rxq_info_unreg(&rxq->xdp_rxq); 2423 2424 mana_destroy_wq_obj(apc, GDMA_RQ, rxq->rxobj); 2425 2426 mana_deinit_cq(apc, &rxq->rx_cq); 2427 2428 if (rxq->xdp_save_va) 2429 put_page(virt_to_head_page(rxq->xdp_save_va)); 2430 2431 for (i = 0; i < rxq->num_rx_buf; i++) { 2432 rx_oob = &rxq->rx_oobs[i]; 2433 2434 if (!rx_oob->buf_va) 2435 continue; 2436 2437 page = virt_to_head_page(rx_oob->buf_va); 2438 2439 if (rxq->frag_count == 1 || !rx_oob->from_pool) { 2440 dma_unmap_single(dev, rx_oob->sgl[0].address, 2441 rx_oob->sgl[0].size, DMA_FROM_DEVICE); 2442 mana_put_rx_page(rxq, page, rx_oob->from_pool); 2443 } else { 2444 page_pool_free_va(rxq->page_pool, rx_oob->buf_va, true); 2445 } 2446 2447 rx_oob->buf_va = NULL; 2448 } 2449 2450 page_pool_destroy(rxq->page_pool); 2451 2452 if (rxq->gdma_rq) 2453 mana_gd_destroy_queue(gc, rxq->gdma_rq); 2454 2455 kfree(rxq); 2456 } 2457 2458 static int mana_fill_rx_oob(struct mana_recv_buf_oob *rx_oob, u32 mem_key, 2459 struct mana_rxq *rxq, struct device *dev) 2460 { 2461 struct mana_port_context *mpc = netdev_priv(rxq->ndev); 2462 bool from_pool = false; 2463 dma_addr_t da; 2464 void *va; 2465 2466 if (mpc->rxbufs_pre) 2467 va = mana_get_rxbuf_pre(rxq, &da); 2468 else 2469 va = mana_get_rxfrag(rxq, dev, &da, &from_pool); 2470 2471 if (!va) 2472 return -ENOMEM; 2473 2474 rx_oob->buf_va = va; 2475 rx_oob->from_pool = from_pool; 2476 2477 rx_oob->sgl[0].address = da; 2478 rx_oob->sgl[0].size = rxq->datasize; 2479 rx_oob->sgl[0].mem_key = mem_key; 2480 2481 return 0; 2482 } 2483 2484 #define MANA_WQE_HEADER_SIZE 16 2485 #define MANA_WQE_SGE_SIZE 16 2486 2487 static int mana_alloc_rx_wqe(struct mana_port_context *apc, 2488 struct mana_rxq *rxq, u32 *rxq_size, u32 *cq_size) 2489 { 2490 struct gdma_context *gc = apc->ac->gdma_dev->gdma_context; 2491 struct mana_recv_buf_oob *rx_oob; 2492 struct device *dev = gc->dev; 2493 u32 buf_idx; 2494 int ret; 2495 2496 WARN_ON(rxq->datasize == 0); 2497 2498 *rxq_size = 0; 2499 *cq_size = 0; 2500 2501 for (buf_idx = 0; buf_idx < rxq->num_rx_buf; buf_idx++) { 2502 rx_oob = &rxq->rx_oobs[buf_idx]; 2503 memset(rx_oob, 0, sizeof(*rx_oob)); 2504 2505 rx_oob->num_sge = 1; 2506 2507 ret = mana_fill_rx_oob(rx_oob, apc->ac->gdma_dev->gpa_mkey, rxq, 2508 dev); 2509 if (ret) 2510 return ret; 2511 2512 rx_oob->wqe_req.sgl = rx_oob->sgl; 2513 rx_oob->wqe_req.num_sge = rx_oob->num_sge; 2514 rx_oob->wqe_req.inline_oob_size = 0; 2515 rx_oob->wqe_req.inline_oob_data = NULL; 2516 rx_oob->wqe_req.flags = 0; 2517 rx_oob->wqe_req.client_data_unit = 0; 2518 2519 *rxq_size += ALIGN(MANA_WQE_HEADER_SIZE + 2520 MANA_WQE_SGE_SIZE * rx_oob->num_sge, 32); 2521 *cq_size += COMP_ENTRY_SIZE; 2522 } 2523 2524 return 0; 2525 } 2526 2527 static int mana_push_wqe(struct mana_rxq *rxq) 2528 { 2529 struct mana_recv_buf_oob *rx_oob; 2530 u32 buf_idx; 2531 int err; 2532 2533 for (buf_idx = 0; buf_idx < rxq->num_rx_buf; buf_idx++) { 2534 rx_oob = &rxq->rx_oobs[buf_idx]; 2535 2536 err = mana_gd_post_and_ring(rxq->gdma_rq, &rx_oob->wqe_req, 2537 &rx_oob->wqe_inf); 2538 if (err) 2539 return -ENOSPC; 2540 } 2541 2542 return 0; 2543 } 2544 2545 static int mana_create_page_pool(struct mana_rxq *rxq, struct gdma_context *gc) 2546 { 2547 struct mana_port_context *mpc = netdev_priv(rxq->ndev); 2548 struct page_pool_params pprm = {}; 2549 int ret; 2550 2551 pprm.pool_size = mpc->rx_queue_size / rxq->frag_count + 1; 2552 pprm.nid = gc->numa_node; 2553 pprm.napi = &rxq->rx_cq.napi; 2554 pprm.netdev = rxq->ndev; 2555 pprm.order = get_order(rxq->alloc_size); 2556 pprm.queue_idx = rxq->rxq_idx; 2557 pprm.dev = gc->dev; 2558 2559 /* Let the page pool do the dma map when page sharing with multiple 2560 * fragments enabled for rx buffers. 2561 */ 2562 if (rxq->frag_count > 1) { 2563 pprm.flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV; 2564 pprm.max_len = PAGE_SIZE; 2565 pprm.dma_dir = DMA_FROM_DEVICE; 2566 } 2567 2568 rxq->page_pool = page_pool_create(&pprm); 2569 2570 if (IS_ERR(rxq->page_pool)) { 2571 ret = PTR_ERR(rxq->page_pool); 2572 rxq->page_pool = NULL; 2573 return ret; 2574 } 2575 2576 return 0; 2577 } 2578 2579 static struct mana_rxq *mana_create_rxq(struct mana_port_context *apc, 2580 u32 rxq_idx, struct mana_eq *eq, 2581 struct net_device *ndev) 2582 { 2583 struct gdma_dev *gd = apc->ac->gdma_dev; 2584 struct mana_obj_spec wq_spec; 2585 struct mana_obj_spec cq_spec; 2586 struct gdma_queue_spec spec; 2587 struct mana_cq *cq = NULL; 2588 struct gdma_context *gc; 2589 u32 cq_size, rq_size; 2590 struct mana_rxq *rxq; 2591 int err; 2592 2593 gc = gd->gdma_context; 2594 2595 rxq = kzalloc(struct_size(rxq, rx_oobs, apc->rx_queue_size), 2596 GFP_KERNEL); 2597 if (!rxq) 2598 return NULL; 2599 2600 rxq->ndev = ndev; 2601 rxq->num_rx_buf = apc->rx_queue_size; 2602 rxq->rxq_idx = rxq_idx; 2603 rxq->rxobj = INVALID_MANA_HANDLE; 2604 2605 mana_get_rxbuf_cfg(apc, ndev->mtu, &rxq->datasize, &rxq->alloc_size, 2606 &rxq->headroom, &rxq->frag_count); 2607 /* Create page pool for RX queue */ 2608 err = mana_create_page_pool(rxq, gc); 2609 if (err) { 2610 netdev_err(ndev, "Create page pool err:%d\n", err); 2611 goto out; 2612 } 2613 2614 err = mana_alloc_rx_wqe(apc, rxq, &rq_size, &cq_size); 2615 if (err) 2616 goto out; 2617 2618 rq_size = MANA_PAGE_ALIGN(rq_size); 2619 cq_size = MANA_PAGE_ALIGN(cq_size); 2620 2621 /* Create RQ */ 2622 memset(&spec, 0, sizeof(spec)); 2623 spec.type = GDMA_RQ; 2624 spec.monitor_avl_buf = true; 2625 spec.queue_size = rq_size; 2626 err = mana_gd_create_mana_wq_cq(gd, &spec, &rxq->gdma_rq); 2627 if (err) 2628 goto out; 2629 2630 /* Create RQ's CQ */ 2631 cq = &rxq->rx_cq; 2632 cq->type = MANA_CQ_TYPE_RX; 2633 cq->rxq = rxq; 2634 2635 memset(&spec, 0, sizeof(spec)); 2636 spec.type = GDMA_CQ; 2637 spec.monitor_avl_buf = false; 2638 spec.queue_size = cq_size; 2639 spec.cq.callback = mana_schedule_napi; 2640 spec.cq.parent_eq = eq->eq; 2641 spec.cq.context = cq; 2642 err = mana_gd_create_mana_wq_cq(gd, &spec, &cq->gdma_cq); 2643 if (err) 2644 goto out; 2645 2646 memset(&wq_spec, 0, sizeof(wq_spec)); 2647 memset(&cq_spec, 0, sizeof(cq_spec)); 2648 wq_spec.gdma_region = rxq->gdma_rq->mem_info.dma_region_handle; 2649 wq_spec.queue_size = rxq->gdma_rq->queue_size; 2650 2651 cq_spec.gdma_region = cq->gdma_cq->mem_info.dma_region_handle; 2652 cq_spec.queue_size = cq->gdma_cq->queue_size; 2653 cq_spec.modr_ctx_id = 0; 2654 cq_spec.attached_eq = cq->gdma_cq->cq.parent->id; 2655 2656 err = mana_create_wq_obj(apc, apc->port_handle, GDMA_RQ, 2657 &wq_spec, &cq_spec, &rxq->rxobj); 2658 if (err) 2659 goto out; 2660 2661 rxq->gdma_rq->id = wq_spec.queue_index; 2662 cq->gdma_cq->id = cq_spec.queue_index; 2663 2664 rxq->gdma_rq->mem_info.dma_region_handle = GDMA_INVALID_DMA_REGION; 2665 cq->gdma_cq->mem_info.dma_region_handle = GDMA_INVALID_DMA_REGION; 2666 2667 rxq->gdma_id = rxq->gdma_rq->id; 2668 cq->gdma_id = cq->gdma_cq->id; 2669 2670 err = mana_push_wqe(rxq); 2671 if (err) 2672 goto out; 2673 2674 if (WARN_ON(cq->gdma_id >= gc->max_num_cqs)) { 2675 err = -EINVAL; 2676 goto out; 2677 } 2678 2679 gc->cq_table[cq->gdma_id] = cq->gdma_cq; 2680 2681 netif_napi_add_weight_locked(ndev, &cq->napi, mana_poll, 1); 2682 2683 WARN_ON(xdp_rxq_info_reg(&rxq->xdp_rxq, ndev, rxq_idx, 2684 cq->napi.napi_id)); 2685 WARN_ON(xdp_rxq_info_reg_mem_model(&rxq->xdp_rxq, MEM_TYPE_PAGE_POOL, 2686 rxq->page_pool)); 2687 2688 napi_enable_locked(&cq->napi); 2689 2690 mana_gd_ring_cq(cq->gdma_cq, SET_ARM_BIT); 2691 out: 2692 if (!err) 2693 return rxq; 2694 2695 netdev_err(ndev, "Failed to create RXQ: err = %d\n", err); 2696 2697 mana_destroy_rxq(apc, rxq, false); 2698 2699 if (cq) 2700 mana_deinit_cq(apc, cq); 2701 2702 return NULL; 2703 } 2704 2705 static void mana_create_rxq_debugfs(struct mana_port_context *apc, int idx) 2706 { 2707 struct mana_rxq *rxq; 2708 char qnum[32]; 2709 2710 rxq = apc->rxqs[idx]; 2711 2712 sprintf(qnum, "RX-%d", idx); 2713 rxq->mana_rx_debugfs = debugfs_create_dir(qnum, apc->mana_port_debugfs); 2714 debugfs_create_u32("rq_head", 0400, rxq->mana_rx_debugfs, &rxq->gdma_rq->head); 2715 debugfs_create_u32("rq_tail", 0400, rxq->mana_rx_debugfs, &rxq->gdma_rq->tail); 2716 debugfs_create_u32("rq_nbuf", 0400, rxq->mana_rx_debugfs, &rxq->num_rx_buf); 2717 debugfs_create_u32("cq_head", 0400, rxq->mana_rx_debugfs, 2718 &rxq->rx_cq.gdma_cq->head); 2719 debugfs_create_u32("cq_tail", 0400, rxq->mana_rx_debugfs, 2720 &rxq->rx_cq.gdma_cq->tail); 2721 debugfs_create_u32("cq_budget", 0400, rxq->mana_rx_debugfs, &rxq->rx_cq.budget); 2722 debugfs_create_file("rxq_dump", 0400, rxq->mana_rx_debugfs, rxq->gdma_rq, &mana_dbg_q_fops); 2723 debugfs_create_file("cq_dump", 0400, rxq->mana_rx_debugfs, rxq->rx_cq.gdma_cq, 2724 &mana_dbg_q_fops); 2725 } 2726 2727 static int mana_add_rx_queues(struct mana_port_context *apc, 2728 struct net_device *ndev) 2729 { 2730 struct mana_context *ac = apc->ac; 2731 struct mana_rxq *rxq; 2732 int err = 0; 2733 int i; 2734 2735 for (i = 0; i < apc->num_queues; i++) { 2736 rxq = mana_create_rxq(apc, i, &ac->eqs[i], ndev); 2737 if (!rxq) { 2738 err = -ENOMEM; 2739 netdev_err(ndev, "Failed to create rxq %d : %d\n", i, err); 2740 goto out; 2741 } 2742 2743 u64_stats_init(&rxq->stats.syncp); 2744 2745 apc->rxqs[i] = rxq; 2746 2747 mana_create_rxq_debugfs(apc, i); 2748 } 2749 2750 apc->default_rxobj = apc->rxqs[0]->rxobj; 2751 out: 2752 return err; 2753 } 2754 2755 static void mana_destroy_vport(struct mana_port_context *apc) 2756 { 2757 struct gdma_dev *gd = apc->ac->gdma_dev; 2758 struct mana_rxq *rxq; 2759 u32 rxq_idx; 2760 2761 for (rxq_idx = 0; rxq_idx < apc->num_queues; rxq_idx++) { 2762 rxq = apc->rxqs[rxq_idx]; 2763 if (!rxq) 2764 continue; 2765 2766 mana_destroy_rxq(apc, rxq, true); 2767 apc->rxqs[rxq_idx] = NULL; 2768 } 2769 2770 mana_destroy_txq(apc); 2771 mana_uncfg_vport(apc); 2772 2773 if (gd->gdma_context->is_pf && !apc->ac->bm_hostmode) 2774 mana_pf_deregister_hw_vport(apc); 2775 } 2776 2777 static int mana_create_vport(struct mana_port_context *apc, 2778 struct net_device *net) 2779 { 2780 struct gdma_dev *gd = apc->ac->gdma_dev; 2781 int err; 2782 2783 apc->default_rxobj = INVALID_MANA_HANDLE; 2784 2785 if (gd->gdma_context->is_pf && !apc->ac->bm_hostmode) { 2786 err = mana_pf_register_hw_vport(apc); 2787 if (err) 2788 return err; 2789 } 2790 2791 err = mana_cfg_vport(apc, gd->pdid, gd->doorbell); 2792 if (err) 2793 return err; 2794 2795 return mana_create_txq(apc, net); 2796 } 2797 2798 static int mana_rss_table_alloc(struct mana_port_context *apc) 2799 { 2800 if (!apc->indir_table_sz) { 2801 netdev_err(apc->ndev, 2802 "Indirection table size not set for vPort %d\n", 2803 apc->port_idx); 2804 return -EINVAL; 2805 } 2806 2807 apc->indir_table = kcalloc(apc->indir_table_sz, sizeof(u32), GFP_KERNEL); 2808 if (!apc->indir_table) 2809 return -ENOMEM; 2810 2811 apc->rxobj_table = kcalloc(apc->indir_table_sz, sizeof(mana_handle_t), GFP_KERNEL); 2812 if (!apc->rxobj_table) { 2813 kfree(apc->indir_table); 2814 return -ENOMEM; 2815 } 2816 2817 return 0; 2818 } 2819 2820 static void mana_rss_table_init(struct mana_port_context *apc) 2821 { 2822 int i; 2823 2824 for (i = 0; i < apc->indir_table_sz; i++) 2825 apc->indir_table[i] = 2826 ethtool_rxfh_indir_default(i, apc->num_queues); 2827 } 2828 2829 int mana_config_rss(struct mana_port_context *apc, enum TRI_STATE rx, 2830 bool update_hash, bool update_tab) 2831 { 2832 u32 queue_idx; 2833 int err; 2834 int i; 2835 2836 if (update_tab) { 2837 for (i = 0; i < apc->indir_table_sz; i++) { 2838 queue_idx = apc->indir_table[i]; 2839 apc->rxobj_table[i] = apc->rxqs[queue_idx]->rxobj; 2840 } 2841 } 2842 2843 err = mana_cfg_vport_steering(apc, rx, true, update_hash, update_tab); 2844 if (err) 2845 return err; 2846 2847 mana_fence_rqs(apc); 2848 2849 return 0; 2850 } 2851 2852 int mana_query_gf_stats(struct mana_context *ac) 2853 { 2854 struct gdma_context *gc = ac->gdma_dev->gdma_context; 2855 struct mana_query_gf_stat_resp resp = {}; 2856 struct mana_query_gf_stat_req req = {}; 2857 struct device *dev = gc->dev; 2858 int err; 2859 2860 mana_gd_init_req_hdr(&req.hdr, MANA_QUERY_GF_STAT, 2861 sizeof(req), sizeof(resp)); 2862 req.hdr.resp.msg_version = GDMA_MESSAGE_V2; 2863 req.req_stats = STATISTICS_FLAGS_RX_DISCARDS_NO_WQE | 2864 STATISTICS_FLAGS_RX_ERRORS_VPORT_DISABLED | 2865 STATISTICS_FLAGS_HC_RX_BYTES | 2866 STATISTICS_FLAGS_HC_RX_UCAST_PACKETS | 2867 STATISTICS_FLAGS_HC_RX_UCAST_BYTES | 2868 STATISTICS_FLAGS_HC_RX_MCAST_PACKETS | 2869 STATISTICS_FLAGS_HC_RX_MCAST_BYTES | 2870 STATISTICS_FLAGS_HC_RX_BCAST_PACKETS | 2871 STATISTICS_FLAGS_HC_RX_BCAST_BYTES | 2872 STATISTICS_FLAGS_TX_ERRORS_GF_DISABLED | 2873 STATISTICS_FLAGS_TX_ERRORS_VPORT_DISABLED | 2874 STATISTICS_FLAGS_TX_ERRORS_INVAL_VPORT_OFFSET_PACKETS | 2875 STATISTICS_FLAGS_TX_ERRORS_VLAN_ENFORCEMENT | 2876 STATISTICS_FLAGS_TX_ERRORS_ETH_TYPE_ENFORCEMENT | 2877 STATISTICS_FLAGS_TX_ERRORS_SA_ENFORCEMENT | 2878 STATISTICS_FLAGS_TX_ERRORS_SQPDID_ENFORCEMENT | 2879 STATISTICS_FLAGS_TX_ERRORS_CQPDID_ENFORCEMENT | 2880 STATISTICS_FLAGS_TX_ERRORS_MTU_VIOLATION | 2881 STATISTICS_FLAGS_TX_ERRORS_INVALID_OOB | 2882 STATISTICS_FLAGS_HC_TX_BYTES | 2883 STATISTICS_FLAGS_HC_TX_UCAST_PACKETS | 2884 STATISTICS_FLAGS_HC_TX_UCAST_BYTES | 2885 STATISTICS_FLAGS_HC_TX_MCAST_PACKETS | 2886 STATISTICS_FLAGS_HC_TX_MCAST_BYTES | 2887 STATISTICS_FLAGS_HC_TX_BCAST_PACKETS | 2888 STATISTICS_FLAGS_HC_TX_BCAST_BYTES | 2889 STATISTICS_FLAGS_TX_ERRORS_GDMA_ERROR; 2890 2891 err = mana_send_request(ac, &req, sizeof(req), &resp, 2892 sizeof(resp)); 2893 if (err) { 2894 dev_err(dev, "Failed to query GF stats: %d\n", err); 2895 return err; 2896 } 2897 err = mana_verify_resp_hdr(&resp.hdr, MANA_QUERY_GF_STAT, 2898 sizeof(resp)); 2899 if (err || resp.hdr.status) { 2900 dev_err(dev, "Failed to query GF stats: %d, 0x%x\n", err, 2901 resp.hdr.status); 2902 return err; 2903 } 2904 2905 ac->hc_stats.hc_rx_discards_no_wqe = resp.rx_discards_nowqe; 2906 ac->hc_stats.hc_rx_err_vport_disabled = resp.rx_err_vport_disabled; 2907 ac->hc_stats.hc_rx_bytes = resp.hc_rx_bytes; 2908 ac->hc_stats.hc_rx_ucast_pkts = resp.hc_rx_ucast_pkts; 2909 ac->hc_stats.hc_rx_ucast_bytes = resp.hc_rx_ucast_bytes; 2910 ac->hc_stats.hc_rx_bcast_pkts = resp.hc_rx_bcast_pkts; 2911 ac->hc_stats.hc_rx_bcast_bytes = resp.hc_rx_bcast_bytes; 2912 ac->hc_stats.hc_rx_mcast_pkts = resp.hc_rx_mcast_pkts; 2913 ac->hc_stats.hc_rx_mcast_bytes = resp.hc_rx_mcast_bytes; 2914 ac->hc_stats.hc_tx_err_gf_disabled = resp.tx_err_gf_disabled; 2915 ac->hc_stats.hc_tx_err_vport_disabled = resp.tx_err_vport_disabled; 2916 ac->hc_stats.hc_tx_err_inval_vportoffset_pkt = 2917 resp.tx_err_inval_vport_offset_pkt; 2918 ac->hc_stats.hc_tx_err_vlan_enforcement = 2919 resp.tx_err_vlan_enforcement; 2920 ac->hc_stats.hc_tx_err_eth_type_enforcement = 2921 resp.tx_err_ethtype_enforcement; 2922 ac->hc_stats.hc_tx_err_sa_enforcement = resp.tx_err_SA_enforcement; 2923 ac->hc_stats.hc_tx_err_sqpdid_enforcement = 2924 resp.tx_err_SQPDID_enforcement; 2925 ac->hc_stats.hc_tx_err_cqpdid_enforcement = 2926 resp.tx_err_CQPDID_enforcement; 2927 ac->hc_stats.hc_tx_err_mtu_violation = resp.tx_err_mtu_violation; 2928 ac->hc_stats.hc_tx_err_inval_oob = resp.tx_err_inval_oob; 2929 ac->hc_stats.hc_tx_bytes = resp.hc_tx_bytes; 2930 ac->hc_stats.hc_tx_ucast_pkts = resp.hc_tx_ucast_pkts; 2931 ac->hc_stats.hc_tx_ucast_bytes = resp.hc_tx_ucast_bytes; 2932 ac->hc_stats.hc_tx_bcast_pkts = resp.hc_tx_bcast_pkts; 2933 ac->hc_stats.hc_tx_bcast_bytes = resp.hc_tx_bcast_bytes; 2934 ac->hc_stats.hc_tx_mcast_pkts = resp.hc_tx_mcast_pkts; 2935 ac->hc_stats.hc_tx_mcast_bytes = resp.hc_tx_mcast_bytes; 2936 ac->hc_stats.hc_tx_err_gdma = resp.tx_err_gdma; 2937 2938 return 0; 2939 } 2940 2941 void mana_query_phy_stats(struct mana_port_context *apc) 2942 { 2943 struct mana_query_phy_stat_resp resp = {}; 2944 struct mana_query_phy_stat_req req = {}; 2945 struct net_device *ndev = apc->ndev; 2946 int err; 2947 2948 mana_gd_init_req_hdr(&req.hdr, MANA_QUERY_PHY_STAT, 2949 sizeof(req), sizeof(resp)); 2950 err = mana_send_request(apc->ac, &req, sizeof(req), &resp, 2951 sizeof(resp)); 2952 if (err) 2953 return; 2954 2955 err = mana_verify_resp_hdr(&resp.hdr, MANA_QUERY_PHY_STAT, 2956 sizeof(resp)); 2957 if (err || resp.hdr.status) { 2958 netdev_err(ndev, 2959 "Failed to query PHY stats: %d, resp:0x%x\n", 2960 err, resp.hdr.status); 2961 return; 2962 } 2963 2964 /* Aggregate drop counters */ 2965 apc->phy_stats.rx_pkt_drop_phy = resp.rx_pkt_drop_phy; 2966 apc->phy_stats.tx_pkt_drop_phy = resp.tx_pkt_drop_phy; 2967 2968 /* Per TC traffic Counters */ 2969 apc->phy_stats.rx_pkt_tc0_phy = resp.rx_pkt_tc0_phy; 2970 apc->phy_stats.tx_pkt_tc0_phy = resp.tx_pkt_tc0_phy; 2971 apc->phy_stats.rx_pkt_tc1_phy = resp.rx_pkt_tc1_phy; 2972 apc->phy_stats.tx_pkt_tc1_phy = resp.tx_pkt_tc1_phy; 2973 apc->phy_stats.rx_pkt_tc2_phy = resp.rx_pkt_tc2_phy; 2974 apc->phy_stats.tx_pkt_tc2_phy = resp.tx_pkt_tc2_phy; 2975 apc->phy_stats.rx_pkt_tc3_phy = resp.rx_pkt_tc3_phy; 2976 apc->phy_stats.tx_pkt_tc3_phy = resp.tx_pkt_tc3_phy; 2977 apc->phy_stats.rx_pkt_tc4_phy = resp.rx_pkt_tc4_phy; 2978 apc->phy_stats.tx_pkt_tc4_phy = resp.tx_pkt_tc4_phy; 2979 apc->phy_stats.rx_pkt_tc5_phy = resp.rx_pkt_tc5_phy; 2980 apc->phy_stats.tx_pkt_tc5_phy = resp.tx_pkt_tc5_phy; 2981 apc->phy_stats.rx_pkt_tc6_phy = resp.rx_pkt_tc6_phy; 2982 apc->phy_stats.tx_pkt_tc6_phy = resp.tx_pkt_tc6_phy; 2983 apc->phy_stats.rx_pkt_tc7_phy = resp.rx_pkt_tc7_phy; 2984 apc->phy_stats.tx_pkt_tc7_phy = resp.tx_pkt_tc7_phy; 2985 2986 /* Per TC byte Counters */ 2987 apc->phy_stats.rx_byte_tc0_phy = resp.rx_byte_tc0_phy; 2988 apc->phy_stats.tx_byte_tc0_phy = resp.tx_byte_tc0_phy; 2989 apc->phy_stats.rx_byte_tc1_phy = resp.rx_byte_tc1_phy; 2990 apc->phy_stats.tx_byte_tc1_phy = resp.tx_byte_tc1_phy; 2991 apc->phy_stats.rx_byte_tc2_phy = resp.rx_byte_tc2_phy; 2992 apc->phy_stats.tx_byte_tc2_phy = resp.tx_byte_tc2_phy; 2993 apc->phy_stats.rx_byte_tc3_phy = resp.rx_byte_tc3_phy; 2994 apc->phy_stats.tx_byte_tc3_phy = resp.tx_byte_tc3_phy; 2995 apc->phy_stats.rx_byte_tc4_phy = resp.rx_byte_tc4_phy; 2996 apc->phy_stats.tx_byte_tc4_phy = resp.tx_byte_tc4_phy; 2997 apc->phy_stats.rx_byte_tc5_phy = resp.rx_byte_tc5_phy; 2998 apc->phy_stats.tx_byte_tc5_phy = resp.tx_byte_tc5_phy; 2999 apc->phy_stats.rx_byte_tc6_phy = resp.rx_byte_tc6_phy; 3000 apc->phy_stats.tx_byte_tc6_phy = resp.tx_byte_tc6_phy; 3001 apc->phy_stats.rx_byte_tc7_phy = resp.rx_byte_tc7_phy; 3002 apc->phy_stats.tx_byte_tc7_phy = resp.tx_byte_tc7_phy; 3003 3004 /* Per TC pause Counters */ 3005 apc->phy_stats.rx_pause_tc0_phy = resp.rx_pause_tc0_phy; 3006 apc->phy_stats.tx_pause_tc0_phy = resp.tx_pause_tc0_phy; 3007 apc->phy_stats.rx_pause_tc1_phy = resp.rx_pause_tc1_phy; 3008 apc->phy_stats.tx_pause_tc1_phy = resp.tx_pause_tc1_phy; 3009 apc->phy_stats.rx_pause_tc2_phy = resp.rx_pause_tc2_phy; 3010 apc->phy_stats.tx_pause_tc2_phy = resp.tx_pause_tc2_phy; 3011 apc->phy_stats.rx_pause_tc3_phy = resp.rx_pause_tc3_phy; 3012 apc->phy_stats.tx_pause_tc3_phy = resp.tx_pause_tc3_phy; 3013 apc->phy_stats.rx_pause_tc4_phy = resp.rx_pause_tc4_phy; 3014 apc->phy_stats.tx_pause_tc4_phy = resp.tx_pause_tc4_phy; 3015 apc->phy_stats.rx_pause_tc5_phy = resp.rx_pause_tc5_phy; 3016 apc->phy_stats.tx_pause_tc5_phy = resp.tx_pause_tc5_phy; 3017 apc->phy_stats.rx_pause_tc6_phy = resp.rx_pause_tc6_phy; 3018 apc->phy_stats.tx_pause_tc6_phy = resp.tx_pause_tc6_phy; 3019 apc->phy_stats.rx_pause_tc7_phy = resp.rx_pause_tc7_phy; 3020 apc->phy_stats.tx_pause_tc7_phy = resp.tx_pause_tc7_phy; 3021 } 3022 3023 static int mana_init_port(struct net_device *ndev) 3024 { 3025 struct mana_port_context *apc = netdev_priv(ndev); 3026 struct gdma_dev *gd = apc->ac->gdma_dev; 3027 u32 max_txq, max_rxq, max_queues; 3028 int port_idx = apc->port_idx; 3029 struct gdma_context *gc; 3030 char vport[32]; 3031 int err; 3032 3033 err = mana_init_port_context(apc); 3034 if (err) 3035 return err; 3036 3037 gc = gd->gdma_context; 3038 3039 err = mana_query_vport_cfg(apc, port_idx, &max_txq, &max_rxq, 3040 &apc->indir_table_sz); 3041 if (err) { 3042 netdev_err(ndev, "Failed to query info for vPort %d\n", 3043 port_idx); 3044 goto reset_apc; 3045 } 3046 3047 max_queues = min_t(u32, max_txq, max_rxq); 3048 if (apc->max_queues > max_queues) 3049 apc->max_queues = max_queues; 3050 3051 if (apc->num_queues > apc->max_queues) 3052 apc->num_queues = apc->max_queues; 3053 3054 eth_hw_addr_set(ndev, apc->mac_addr); 3055 sprintf(vport, "vport%d", port_idx); 3056 apc->mana_port_debugfs = debugfs_create_dir(vport, gc->mana_pci_debugfs); 3057 return 0; 3058 3059 reset_apc: 3060 mana_cleanup_port_context(apc); 3061 return err; 3062 } 3063 3064 int mana_alloc_queues(struct net_device *ndev) 3065 { 3066 struct mana_port_context *apc = netdev_priv(ndev); 3067 struct gdma_dev *gd = apc->ac->gdma_dev; 3068 int err; 3069 3070 err = mana_create_vport(apc, ndev); 3071 if (err) { 3072 netdev_err(ndev, "Failed to create vPort %u : %d\n", apc->port_idx, err); 3073 return err; 3074 } 3075 3076 err = netif_set_real_num_tx_queues(ndev, apc->num_queues); 3077 if (err) { 3078 netdev_err(ndev, 3079 "netif_set_real_num_tx_queues () failed for ndev with num_queues %u : %d\n", 3080 apc->num_queues, err); 3081 goto destroy_vport; 3082 } 3083 3084 err = mana_add_rx_queues(apc, ndev); 3085 if (err) 3086 goto destroy_vport; 3087 3088 apc->rss_state = apc->num_queues > 1 ? TRI_STATE_TRUE : TRI_STATE_FALSE; 3089 3090 err = netif_set_real_num_rx_queues(ndev, apc->num_queues); 3091 if (err) { 3092 netdev_err(ndev, 3093 "netif_set_real_num_rx_queues () failed for ndev with num_queues %u : %d\n", 3094 apc->num_queues, err); 3095 goto destroy_vport; 3096 } 3097 3098 mana_rss_table_init(apc); 3099 3100 err = mana_config_rss(apc, TRI_STATE_TRUE, true, true); 3101 if (err) { 3102 netdev_err(ndev, "Failed to configure RSS table: %d\n", err); 3103 goto destroy_vport; 3104 } 3105 3106 if (gd->gdma_context->is_pf && !apc->ac->bm_hostmode) { 3107 err = mana_pf_register_filter(apc); 3108 if (err) 3109 goto destroy_vport; 3110 } 3111 3112 mana_chn_setxdp(apc, mana_xdp_get(apc)); 3113 3114 return 0; 3115 3116 destroy_vport: 3117 mana_destroy_vport(apc); 3118 return err; 3119 } 3120 3121 int mana_attach(struct net_device *ndev) 3122 { 3123 struct mana_port_context *apc = netdev_priv(ndev); 3124 int err; 3125 3126 ASSERT_RTNL(); 3127 3128 err = mana_init_port(ndev); 3129 if (err) 3130 return err; 3131 3132 if (apc->port_st_save) { 3133 err = mana_alloc_queues(ndev); 3134 if (err) { 3135 mana_cleanup_port_context(apc); 3136 return err; 3137 } 3138 } 3139 3140 apc->port_is_up = apc->port_st_save; 3141 3142 /* Ensure port state updated before txq state */ 3143 smp_wmb(); 3144 3145 netif_device_attach(ndev); 3146 3147 return 0; 3148 } 3149 3150 static int mana_dealloc_queues(struct net_device *ndev) 3151 { 3152 struct mana_port_context *apc = netdev_priv(ndev); 3153 unsigned long timeout = jiffies + 120 * HZ; 3154 struct gdma_dev *gd = apc->ac->gdma_dev; 3155 struct mana_txq *txq; 3156 struct sk_buff *skb; 3157 int i, err; 3158 u32 tsleep; 3159 3160 if (apc->port_is_up) 3161 return -EINVAL; 3162 3163 mana_chn_setxdp(apc, NULL); 3164 3165 if (gd->gdma_context->is_pf && !apc->ac->bm_hostmode) 3166 mana_pf_deregister_filter(apc); 3167 3168 /* No packet can be transmitted now since apc->port_is_up is false. 3169 * There is still a tiny chance that mana_poll_tx_cq() can re-enable 3170 * a txq because it may not timely see apc->port_is_up being cleared 3171 * to false, but it doesn't matter since mana_start_xmit() drops any 3172 * new packets due to apc->port_is_up being false. 3173 * 3174 * Drain all the in-flight TX packets. 3175 * A timeout of 120 seconds for all the queues is used. 3176 * This will break the while loop when h/w is not responding. 3177 * This value of 120 has been decided here considering max 3178 * number of queues. 3179 */ 3180 3181 for (i = 0; i < apc->num_queues; i++) { 3182 txq = &apc->tx_qp[i].txq; 3183 tsleep = 1000; 3184 while (atomic_read(&txq->pending_sends) > 0 && 3185 time_before(jiffies, timeout)) { 3186 usleep_range(tsleep, tsleep + 1000); 3187 tsleep <<= 1; 3188 } 3189 if (atomic_read(&txq->pending_sends)) { 3190 err = pcie_flr(to_pci_dev(gd->gdma_context->dev)); 3191 if (err) { 3192 netdev_err(ndev, "flr failed %d with %d pkts pending in txq %u\n", 3193 err, atomic_read(&txq->pending_sends), 3194 txq->gdma_txq_id); 3195 } 3196 break; 3197 } 3198 } 3199 3200 for (i = 0; i < apc->num_queues; i++) { 3201 txq = &apc->tx_qp[i].txq; 3202 while ((skb = skb_dequeue(&txq->pending_skbs))) { 3203 mana_unmap_skb(skb, apc); 3204 dev_kfree_skb_any(skb); 3205 } 3206 atomic_set(&txq->pending_sends, 0); 3207 } 3208 /* We're 100% sure the queues can no longer be woken up, because 3209 * we're sure now mana_poll_tx_cq() can't be running. 3210 */ 3211 3212 apc->rss_state = TRI_STATE_FALSE; 3213 err = mana_config_rss(apc, TRI_STATE_FALSE, false, false); 3214 if (err && mana_en_need_log(apc, err)) 3215 netdev_err(ndev, "Failed to disable vPort: %d\n", err); 3216 3217 /* Even in err case, still need to cleanup the vPort */ 3218 mana_destroy_vport(apc); 3219 3220 return 0; 3221 } 3222 3223 int mana_detach(struct net_device *ndev, bool from_close) 3224 { 3225 struct mana_port_context *apc = netdev_priv(ndev); 3226 int err; 3227 3228 ASSERT_RTNL(); 3229 3230 apc->port_st_save = apc->port_is_up; 3231 apc->port_is_up = false; 3232 3233 /* Ensure port state updated before txq state */ 3234 smp_wmb(); 3235 3236 netif_tx_disable(ndev); 3237 3238 if (apc->port_st_save) { 3239 err = mana_dealloc_queues(ndev); 3240 if (err) { 3241 netdev_err(ndev, "%s failed to deallocate queues: %d\n", __func__, err); 3242 return err; 3243 } 3244 } 3245 3246 if (!from_close) { 3247 netif_device_detach(ndev); 3248 mana_cleanup_port_context(apc); 3249 } 3250 3251 return 0; 3252 } 3253 3254 static int mana_probe_port(struct mana_context *ac, int port_idx, 3255 struct net_device **ndev_storage) 3256 { 3257 struct gdma_context *gc = ac->gdma_dev->gdma_context; 3258 struct mana_port_context *apc; 3259 struct net_device *ndev; 3260 int err; 3261 3262 ndev = alloc_etherdev_mq(sizeof(struct mana_port_context), 3263 gc->max_num_queues); 3264 if (!ndev) 3265 return -ENOMEM; 3266 3267 *ndev_storage = ndev; 3268 3269 apc = netdev_priv(ndev); 3270 apc->ac = ac; 3271 apc->ndev = ndev; 3272 apc->max_queues = gc->max_num_queues; 3273 apc->num_queues = gc->max_num_queues; 3274 apc->tx_queue_size = DEF_TX_BUFFERS_PER_QUEUE; 3275 apc->rx_queue_size = DEF_RX_BUFFERS_PER_QUEUE; 3276 apc->port_handle = INVALID_MANA_HANDLE; 3277 apc->pf_filter_handle = INVALID_MANA_HANDLE; 3278 apc->port_idx = port_idx; 3279 3280 mutex_init(&apc->vport_mutex); 3281 apc->vport_use_count = 0; 3282 3283 ndev->netdev_ops = &mana_devops; 3284 ndev->ethtool_ops = &mana_ethtool_ops; 3285 ndev->mtu = ETH_DATA_LEN; 3286 ndev->max_mtu = gc->adapter_mtu - ETH_HLEN; 3287 ndev->min_mtu = ETH_MIN_MTU; 3288 ndev->needed_headroom = MANA_HEADROOM; 3289 ndev->dev_port = port_idx; 3290 SET_NETDEV_DEV(ndev, gc->dev); 3291 3292 netif_set_tso_max_size(ndev, GSO_MAX_SIZE); 3293 3294 netif_carrier_off(ndev); 3295 3296 netdev_rss_key_fill(apc->hashkey, MANA_HASH_KEY_SIZE); 3297 3298 err = mana_init_port(ndev); 3299 if (err) 3300 goto free_net; 3301 3302 err = mana_rss_table_alloc(apc); 3303 if (err) 3304 goto reset_apc; 3305 3306 netdev_lockdep_set_classes(ndev); 3307 3308 ndev->hw_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM; 3309 ndev->hw_features |= NETIF_F_RXCSUM; 3310 ndev->hw_features |= NETIF_F_TSO | NETIF_F_TSO6; 3311 ndev->hw_features |= NETIF_F_RXHASH; 3312 ndev->features = ndev->hw_features | NETIF_F_HW_VLAN_CTAG_TX | 3313 NETIF_F_HW_VLAN_CTAG_RX; 3314 ndev->vlan_features = ndev->features; 3315 xdp_set_features_flag(ndev, NETDEV_XDP_ACT_BASIC | 3316 NETDEV_XDP_ACT_REDIRECT | 3317 NETDEV_XDP_ACT_NDO_XMIT); 3318 3319 err = register_netdev(ndev); 3320 if (err) { 3321 netdev_err(ndev, "Unable to register netdev.\n"); 3322 goto free_indir; 3323 } 3324 3325 netif_carrier_on(ndev); 3326 3327 debugfs_create_u32("current_speed", 0400, apc->mana_port_debugfs, &apc->speed); 3328 3329 return 0; 3330 3331 free_indir: 3332 mana_cleanup_indir_table(apc); 3333 reset_apc: 3334 mana_cleanup_port_context(apc); 3335 free_net: 3336 *ndev_storage = NULL; 3337 netdev_err(ndev, "Failed to probe vPort %d: %d\n", port_idx, err); 3338 free_netdev(ndev); 3339 return err; 3340 } 3341 3342 static void adev_release(struct device *dev) 3343 { 3344 struct mana_adev *madev = container_of(dev, struct mana_adev, adev.dev); 3345 3346 kfree(madev); 3347 } 3348 3349 static void remove_adev(struct gdma_dev *gd) 3350 { 3351 struct auxiliary_device *adev = gd->adev; 3352 int id = adev->id; 3353 3354 auxiliary_device_delete(adev); 3355 auxiliary_device_uninit(adev); 3356 3357 mana_adev_idx_free(id); 3358 gd->adev = NULL; 3359 } 3360 3361 static int add_adev(struct gdma_dev *gd, const char *name) 3362 { 3363 struct auxiliary_device *adev; 3364 struct mana_adev *madev; 3365 int ret; 3366 3367 madev = kzalloc(sizeof(*madev), GFP_KERNEL); 3368 if (!madev) 3369 return -ENOMEM; 3370 3371 adev = &madev->adev; 3372 ret = mana_adev_idx_alloc(); 3373 if (ret < 0) 3374 goto idx_fail; 3375 adev->id = ret; 3376 3377 adev->name = name; 3378 adev->dev.parent = gd->gdma_context->dev; 3379 adev->dev.release = adev_release; 3380 madev->mdev = gd; 3381 3382 ret = auxiliary_device_init(adev); 3383 if (ret) 3384 goto init_fail; 3385 3386 /* madev is owned by the auxiliary device */ 3387 madev = NULL; 3388 ret = auxiliary_device_add(adev); 3389 if (ret) 3390 goto add_fail; 3391 3392 gd->adev = adev; 3393 dev_dbg(gd->gdma_context->dev, 3394 "Auxiliary device added successfully\n"); 3395 return 0; 3396 3397 add_fail: 3398 auxiliary_device_uninit(adev); 3399 3400 init_fail: 3401 mana_adev_idx_free(adev->id); 3402 3403 idx_fail: 3404 kfree(madev); 3405 3406 return ret; 3407 } 3408 3409 static void mana_rdma_service_handle(struct work_struct *work) 3410 { 3411 struct mana_service_work *serv_work = 3412 container_of(work, struct mana_service_work, work); 3413 struct gdma_dev *gd = serv_work->gdma_dev; 3414 struct device *dev = gd->gdma_context->dev; 3415 int ret; 3416 3417 if (READ_ONCE(gd->rdma_teardown)) 3418 goto out; 3419 3420 switch (serv_work->event) { 3421 case GDMA_SERVICE_TYPE_RDMA_SUSPEND: 3422 if (!gd->adev || gd->is_suspended) 3423 break; 3424 3425 remove_adev(gd); 3426 gd->is_suspended = true; 3427 break; 3428 3429 case GDMA_SERVICE_TYPE_RDMA_RESUME: 3430 if (!gd->is_suspended) 3431 break; 3432 3433 ret = add_adev(gd, "rdma"); 3434 if (ret) 3435 dev_err(dev, "Failed to add adev on resume: %d\n", ret); 3436 else 3437 gd->is_suspended = false; 3438 break; 3439 3440 default: 3441 dev_warn(dev, "unknown adev service event %u\n", 3442 serv_work->event); 3443 break; 3444 } 3445 3446 out: 3447 kfree(serv_work); 3448 } 3449 3450 int mana_rdma_service_event(struct gdma_context *gc, enum gdma_service_type event) 3451 { 3452 struct gdma_dev *gd = &gc->mana_ib; 3453 struct mana_service_work *serv_work; 3454 3455 if (gd->dev_id.type != GDMA_DEVICE_MANA_IB) { 3456 /* RDMA device is not detected on pci */ 3457 return 0; 3458 } 3459 3460 serv_work = kzalloc(sizeof(*serv_work), GFP_ATOMIC); 3461 if (!serv_work) 3462 return -ENOMEM; 3463 3464 serv_work->event = event; 3465 serv_work->gdma_dev = gd; 3466 3467 INIT_WORK(&serv_work->work, mana_rdma_service_handle); 3468 queue_work(gc->service_wq, &serv_work->work); 3469 3470 return 0; 3471 } 3472 3473 #define MANA_GF_STATS_PERIOD (2 * HZ) 3474 3475 static void mana_gf_stats_work_handler(struct work_struct *work) 3476 { 3477 struct mana_context *ac = 3478 container_of(to_delayed_work(work), struct mana_context, gf_stats_work); 3479 int err; 3480 3481 err = mana_query_gf_stats(ac); 3482 if (err == -ETIMEDOUT) { 3483 /* HWC timeout detected - reset stats and stop rescheduling */ 3484 ac->hwc_timeout_occurred = true; 3485 memset(&ac->hc_stats, 0, sizeof(ac->hc_stats)); 3486 return; 3487 } 3488 schedule_delayed_work(&ac->gf_stats_work, MANA_GF_STATS_PERIOD); 3489 } 3490 3491 int mana_probe(struct gdma_dev *gd, bool resuming) 3492 { 3493 struct gdma_context *gc = gd->gdma_context; 3494 struct mana_context *ac = gd->driver_data; 3495 struct device *dev = gc->dev; 3496 u8 bm_hostmode = 0; 3497 u16 num_ports = 0; 3498 int err; 3499 int i; 3500 3501 dev_info(dev, 3502 "Microsoft Azure Network Adapter protocol version: %d.%d.%d\n", 3503 MANA_MAJOR_VERSION, MANA_MINOR_VERSION, MANA_MICRO_VERSION); 3504 3505 err = mana_gd_register_device(gd); 3506 if (err) 3507 return err; 3508 3509 if (!resuming) { 3510 ac = kzalloc(sizeof(*ac), GFP_KERNEL); 3511 if (!ac) 3512 return -ENOMEM; 3513 3514 ac->gdma_dev = gd; 3515 gd->driver_data = ac; 3516 } 3517 3518 err = mana_create_eq(ac); 3519 if (err) { 3520 dev_err(dev, "Failed to create EQs: %d\n", err); 3521 goto out; 3522 } 3523 3524 err = mana_query_device_cfg(ac, MANA_MAJOR_VERSION, MANA_MINOR_VERSION, 3525 MANA_MICRO_VERSION, &num_ports, &bm_hostmode); 3526 if (err) 3527 goto out; 3528 3529 ac->bm_hostmode = bm_hostmode; 3530 3531 if (!resuming) { 3532 ac->num_ports = num_ports; 3533 3534 INIT_WORK(&ac->link_change_work, mana_link_state_handle); 3535 } else { 3536 if (ac->num_ports != num_ports) { 3537 dev_err(dev, "The number of vPorts changed: %d->%d\n", 3538 ac->num_ports, num_ports); 3539 err = -EPROTO; 3540 goto out; 3541 } 3542 3543 enable_work(&ac->link_change_work); 3544 } 3545 3546 if (ac->num_ports == 0) 3547 dev_err(dev, "Failed to detect any vPort\n"); 3548 3549 if (ac->num_ports > MAX_PORTS_IN_MANA_DEV) 3550 ac->num_ports = MAX_PORTS_IN_MANA_DEV; 3551 3552 if (!resuming) { 3553 for (i = 0; i < ac->num_ports; i++) { 3554 err = mana_probe_port(ac, i, &ac->ports[i]); 3555 /* we log the port for which the probe failed and stop 3556 * probes for subsequent ports. 3557 * Note that we keep running ports, for which the probes 3558 * were successful, unless add_adev fails too 3559 */ 3560 if (err) { 3561 dev_err(dev, "Probe Failed for port %d\n", i); 3562 break; 3563 } 3564 } 3565 } else { 3566 for (i = 0; i < ac->num_ports; i++) { 3567 rtnl_lock(); 3568 err = mana_attach(ac->ports[i]); 3569 rtnl_unlock(); 3570 /* we log the port for which the attach failed and stop 3571 * attach for subsequent ports 3572 * Note that we keep running ports, for which the attach 3573 * were successful, unless add_adev fails too 3574 */ 3575 if (err) { 3576 dev_err(dev, "Attach Failed for port %d\n", i); 3577 break; 3578 } 3579 } 3580 } 3581 3582 err = add_adev(gd, "eth"); 3583 3584 INIT_DELAYED_WORK(&ac->gf_stats_work, mana_gf_stats_work_handler); 3585 schedule_delayed_work(&ac->gf_stats_work, MANA_GF_STATS_PERIOD); 3586 3587 out: 3588 if (err) { 3589 mana_remove(gd, false); 3590 } else { 3591 dev_dbg(dev, "gd=%p, id=%u, num_ports=%d, type=%u, instance=%u\n", 3592 gd, gd->dev_id.as_uint32, ac->num_ports, 3593 gd->dev_id.type, gd->dev_id.instance); 3594 dev_dbg(dev, "%s succeeded\n", __func__); 3595 } 3596 3597 return err; 3598 } 3599 3600 void mana_remove(struct gdma_dev *gd, bool suspending) 3601 { 3602 struct gdma_context *gc = gd->gdma_context; 3603 struct mana_context *ac = gd->driver_data; 3604 struct mana_port_context *apc; 3605 struct device *dev = gc->dev; 3606 struct net_device *ndev; 3607 int err; 3608 int i; 3609 3610 disable_work_sync(&ac->link_change_work); 3611 cancel_delayed_work_sync(&ac->gf_stats_work); 3612 3613 /* adev currently doesn't support suspending, always remove it */ 3614 if (gd->adev) 3615 remove_adev(gd); 3616 3617 for (i = 0; i < ac->num_ports; i++) { 3618 ndev = ac->ports[i]; 3619 apc = netdev_priv(ndev); 3620 if (!ndev) { 3621 if (i == 0) 3622 dev_err(dev, "No net device to remove\n"); 3623 goto out; 3624 } 3625 3626 /* All cleanup actions should stay after rtnl_lock(), otherwise 3627 * other functions may access partially cleaned up data. 3628 */ 3629 rtnl_lock(); 3630 3631 err = mana_detach(ndev, false); 3632 if (err) 3633 netdev_err(ndev, "Failed to detach vPort %d: %d\n", 3634 i, err); 3635 3636 if (suspending) { 3637 /* No need to unregister the ndev. */ 3638 rtnl_unlock(); 3639 continue; 3640 } 3641 3642 unregister_netdevice(ndev); 3643 mana_cleanup_indir_table(apc); 3644 3645 rtnl_unlock(); 3646 3647 free_netdev(ndev); 3648 } 3649 3650 mana_destroy_eq(ac); 3651 out: 3652 mana_gd_deregister_device(gd); 3653 3654 if (suspending) 3655 return; 3656 3657 gd->driver_data = NULL; 3658 gd->gdma_context = NULL; 3659 kfree(ac); 3660 dev_dbg(dev, "%s succeeded\n", __func__); 3661 } 3662 3663 int mana_rdma_probe(struct gdma_dev *gd) 3664 { 3665 int err = 0; 3666 3667 if (gd->dev_id.type != GDMA_DEVICE_MANA_IB) { 3668 /* RDMA device is not detected on pci */ 3669 return err; 3670 } 3671 3672 err = mana_gd_register_device(gd); 3673 if (err) 3674 return err; 3675 3676 err = add_adev(gd, "rdma"); 3677 if (err) 3678 mana_gd_deregister_device(gd); 3679 3680 return err; 3681 } 3682 3683 void mana_rdma_remove(struct gdma_dev *gd) 3684 { 3685 struct gdma_context *gc = gd->gdma_context; 3686 3687 if (gd->dev_id.type != GDMA_DEVICE_MANA_IB) { 3688 /* RDMA device is not detected on pci */ 3689 return; 3690 } 3691 3692 WRITE_ONCE(gd->rdma_teardown, true); 3693 flush_workqueue(gc->service_wq); 3694 3695 if (gd->adev) 3696 remove_adev(gd); 3697 3698 mana_gd_deregister_device(gd); 3699 } 3700 3701 struct net_device *mana_get_primary_netdev(struct mana_context *ac, 3702 u32 port_index, 3703 netdevice_tracker *tracker) 3704 { 3705 struct net_device *ndev; 3706 3707 if (port_index >= ac->num_ports) 3708 return NULL; 3709 3710 rcu_read_lock(); 3711 3712 /* If mana is used in netvsc, the upper netdevice should be returned. */ 3713 ndev = netdev_master_upper_dev_get_rcu(ac->ports[port_index]); 3714 3715 /* If there is no upper device, use the parent Ethernet device */ 3716 if (!ndev) 3717 ndev = ac->ports[port_index]; 3718 3719 netdev_hold(ndev, tracker, GFP_ATOMIC); 3720 rcu_read_unlock(); 3721 3722 return ndev; 3723 } 3724 EXPORT_SYMBOL_NS(mana_get_primary_netdev, "NET_MANA"); 3725