1 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause 2 /* Copyright (c) 2021, Microsoft Corporation. */ 3 4 #include <uapi/linux/bpf.h> 5 6 #include <linux/inetdevice.h> 7 #include <linux/etherdevice.h> 8 #include <linux/ethtool.h> 9 #include <linux/filter.h> 10 #include <linux/mm.h> 11 12 #include <net/checksum.h> 13 #include <net/ip6_checksum.h> 14 15 #include <net/mana/mana.h> 16 #include <net/mana/mana_auxiliary.h> 17 18 static DEFINE_IDA(mana_adev_ida); 19 20 static int mana_adev_idx_alloc(void) 21 { 22 return ida_alloc(&mana_adev_ida, GFP_KERNEL); 23 } 24 25 static void mana_adev_idx_free(int idx) 26 { 27 ida_free(&mana_adev_ida, idx); 28 } 29 30 /* Microsoft Azure Network Adapter (MANA) functions */ 31 32 static int mana_open(struct net_device *ndev) 33 { 34 struct mana_port_context *apc = netdev_priv(ndev); 35 int err; 36 37 err = mana_alloc_queues(ndev); 38 if (err) 39 return err; 40 41 apc->port_is_up = true; 42 43 /* Ensure port state updated before txq state */ 44 smp_wmb(); 45 46 netif_carrier_on(ndev); 47 netif_tx_wake_all_queues(ndev); 48 49 return 0; 50 } 51 52 static int mana_close(struct net_device *ndev) 53 { 54 struct mana_port_context *apc = netdev_priv(ndev); 55 56 if (!apc->port_is_up) 57 return 0; 58 59 return mana_detach(ndev, true); 60 } 61 62 static bool mana_can_tx(struct gdma_queue *wq) 63 { 64 return mana_gd_wq_avail_space(wq) >= MAX_TX_WQE_SIZE; 65 } 66 67 static unsigned int mana_checksum_info(struct sk_buff *skb) 68 { 69 if (skb->protocol == htons(ETH_P_IP)) { 70 struct iphdr *ip = ip_hdr(skb); 71 72 if (ip->protocol == IPPROTO_TCP) 73 return IPPROTO_TCP; 74 75 if (ip->protocol == IPPROTO_UDP) 76 return IPPROTO_UDP; 77 } else if (skb->protocol == htons(ETH_P_IPV6)) { 78 struct ipv6hdr *ip6 = ipv6_hdr(skb); 79 80 if (ip6->nexthdr == IPPROTO_TCP) 81 return IPPROTO_TCP; 82 83 if (ip6->nexthdr == IPPROTO_UDP) 84 return IPPROTO_UDP; 85 } 86 87 /* No csum offloading */ 88 return 0; 89 } 90 91 static int mana_map_skb(struct sk_buff *skb, struct mana_port_context *apc, 92 struct mana_tx_package *tp) 93 { 94 struct mana_skb_head *ash = (struct mana_skb_head *)skb->head; 95 struct gdma_dev *gd = apc->ac->gdma_dev; 96 struct gdma_context *gc; 97 struct device *dev; 98 skb_frag_t *frag; 99 dma_addr_t da; 100 int i; 101 102 gc = gd->gdma_context; 103 dev = gc->dev; 104 da = dma_map_single(dev, skb->data, skb_headlen(skb), DMA_TO_DEVICE); 105 106 if (dma_mapping_error(dev, da)) 107 return -ENOMEM; 108 109 ash->dma_handle[0] = da; 110 ash->size[0] = skb_headlen(skb); 111 112 tp->wqe_req.sgl[0].address = ash->dma_handle[0]; 113 tp->wqe_req.sgl[0].mem_key = gd->gpa_mkey; 114 tp->wqe_req.sgl[0].size = ash->size[0]; 115 116 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 117 frag = &skb_shinfo(skb)->frags[i]; 118 da = skb_frag_dma_map(dev, frag, 0, skb_frag_size(frag), 119 DMA_TO_DEVICE); 120 121 if (dma_mapping_error(dev, da)) 122 goto frag_err; 123 124 ash->dma_handle[i + 1] = da; 125 ash->size[i + 1] = skb_frag_size(frag); 126 127 tp->wqe_req.sgl[i + 1].address = ash->dma_handle[i + 1]; 128 tp->wqe_req.sgl[i + 1].mem_key = gd->gpa_mkey; 129 tp->wqe_req.sgl[i + 1].size = ash->size[i + 1]; 130 } 131 132 return 0; 133 134 frag_err: 135 for (i = i - 1; i >= 0; i--) 136 dma_unmap_page(dev, ash->dma_handle[i + 1], ash->size[i + 1], 137 DMA_TO_DEVICE); 138 139 dma_unmap_single(dev, ash->dma_handle[0], ash->size[0], DMA_TO_DEVICE); 140 141 return -ENOMEM; 142 } 143 144 netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev) 145 { 146 enum mana_tx_pkt_format pkt_fmt = MANA_SHORT_PKT_FMT; 147 struct mana_port_context *apc = netdev_priv(ndev); 148 u16 txq_idx = skb_get_queue_mapping(skb); 149 struct gdma_dev *gd = apc->ac->gdma_dev; 150 bool ipv4 = false, ipv6 = false; 151 struct mana_tx_package pkg = {}; 152 struct netdev_queue *net_txq; 153 struct mana_stats_tx *tx_stats; 154 struct gdma_queue *gdma_sq; 155 unsigned int csum_type; 156 struct mana_txq *txq; 157 struct mana_cq *cq; 158 int err, len; 159 u16 ihs; 160 161 if (unlikely(!apc->port_is_up)) 162 goto tx_drop; 163 164 if (skb_cow_head(skb, MANA_HEADROOM)) 165 goto tx_drop_count; 166 167 txq = &apc->tx_qp[txq_idx].txq; 168 gdma_sq = txq->gdma_sq; 169 cq = &apc->tx_qp[txq_idx].tx_cq; 170 tx_stats = &txq->stats; 171 172 pkg.tx_oob.s_oob.vcq_num = cq->gdma_id; 173 pkg.tx_oob.s_oob.vsq_frame = txq->vsq_frame; 174 175 if (txq->vp_offset > MANA_SHORT_VPORT_OFFSET_MAX) { 176 pkg.tx_oob.l_oob.long_vp_offset = txq->vp_offset; 177 pkt_fmt = MANA_LONG_PKT_FMT; 178 } else { 179 pkg.tx_oob.s_oob.short_vp_offset = txq->vp_offset; 180 } 181 182 pkg.tx_oob.s_oob.pkt_fmt = pkt_fmt; 183 184 if (pkt_fmt == MANA_SHORT_PKT_FMT) { 185 pkg.wqe_req.inline_oob_size = sizeof(struct mana_tx_short_oob); 186 u64_stats_update_begin(&tx_stats->syncp); 187 tx_stats->short_pkt_fmt++; 188 u64_stats_update_end(&tx_stats->syncp); 189 } else { 190 pkg.wqe_req.inline_oob_size = sizeof(struct mana_tx_oob); 191 u64_stats_update_begin(&tx_stats->syncp); 192 tx_stats->long_pkt_fmt++; 193 u64_stats_update_end(&tx_stats->syncp); 194 } 195 196 pkg.wqe_req.inline_oob_data = &pkg.tx_oob; 197 pkg.wqe_req.flags = 0; 198 pkg.wqe_req.client_data_unit = 0; 199 200 pkg.wqe_req.num_sge = 1 + skb_shinfo(skb)->nr_frags; 201 WARN_ON_ONCE(pkg.wqe_req.num_sge > MAX_TX_WQE_SGL_ENTRIES); 202 203 if (pkg.wqe_req.num_sge <= ARRAY_SIZE(pkg.sgl_array)) { 204 pkg.wqe_req.sgl = pkg.sgl_array; 205 } else { 206 pkg.sgl_ptr = kmalloc_array(pkg.wqe_req.num_sge, 207 sizeof(struct gdma_sge), 208 GFP_ATOMIC); 209 if (!pkg.sgl_ptr) 210 goto tx_drop_count; 211 212 pkg.wqe_req.sgl = pkg.sgl_ptr; 213 } 214 215 if (skb->protocol == htons(ETH_P_IP)) 216 ipv4 = true; 217 else if (skb->protocol == htons(ETH_P_IPV6)) 218 ipv6 = true; 219 220 if (skb_is_gso(skb)) { 221 pkg.tx_oob.s_oob.is_outer_ipv4 = ipv4; 222 pkg.tx_oob.s_oob.is_outer_ipv6 = ipv6; 223 224 pkg.tx_oob.s_oob.comp_iphdr_csum = 1; 225 pkg.tx_oob.s_oob.comp_tcp_csum = 1; 226 pkg.tx_oob.s_oob.trans_off = skb_transport_offset(skb); 227 228 pkg.wqe_req.client_data_unit = skb_shinfo(skb)->gso_size; 229 pkg.wqe_req.flags = GDMA_WR_OOB_IN_SGL | GDMA_WR_PAD_BY_SGE0; 230 if (ipv4) { 231 ip_hdr(skb)->tot_len = 0; 232 ip_hdr(skb)->check = 0; 233 tcp_hdr(skb)->check = 234 ~csum_tcpudp_magic(ip_hdr(skb)->saddr, 235 ip_hdr(skb)->daddr, 0, 236 IPPROTO_TCP, 0); 237 } else { 238 ipv6_hdr(skb)->payload_len = 0; 239 tcp_hdr(skb)->check = 240 ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr, 241 &ipv6_hdr(skb)->daddr, 0, 242 IPPROTO_TCP, 0); 243 } 244 245 if (skb->encapsulation) { 246 ihs = skb_inner_tcp_all_headers(skb); 247 u64_stats_update_begin(&tx_stats->syncp); 248 tx_stats->tso_inner_packets++; 249 tx_stats->tso_inner_bytes += skb->len - ihs; 250 u64_stats_update_end(&tx_stats->syncp); 251 } else { 252 if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) { 253 ihs = skb_transport_offset(skb) + sizeof(struct udphdr); 254 } else { 255 ihs = skb_tcp_all_headers(skb); 256 if (ipv6_has_hopopt_jumbo(skb)) 257 ihs -= sizeof(struct hop_jumbo_hdr); 258 } 259 260 u64_stats_update_begin(&tx_stats->syncp); 261 tx_stats->tso_packets++; 262 tx_stats->tso_bytes += skb->len - ihs; 263 u64_stats_update_end(&tx_stats->syncp); 264 } 265 266 } else if (skb->ip_summed == CHECKSUM_PARTIAL) { 267 csum_type = mana_checksum_info(skb); 268 269 u64_stats_update_begin(&tx_stats->syncp); 270 tx_stats->csum_partial++; 271 u64_stats_update_end(&tx_stats->syncp); 272 273 if (csum_type == IPPROTO_TCP) { 274 pkg.tx_oob.s_oob.is_outer_ipv4 = ipv4; 275 pkg.tx_oob.s_oob.is_outer_ipv6 = ipv6; 276 277 pkg.tx_oob.s_oob.comp_tcp_csum = 1; 278 pkg.tx_oob.s_oob.trans_off = skb_transport_offset(skb); 279 280 } else if (csum_type == IPPROTO_UDP) { 281 pkg.tx_oob.s_oob.is_outer_ipv4 = ipv4; 282 pkg.tx_oob.s_oob.is_outer_ipv6 = ipv6; 283 284 pkg.tx_oob.s_oob.comp_udp_csum = 1; 285 } else { 286 /* Can't do offload of this type of checksum */ 287 if (skb_checksum_help(skb)) 288 goto free_sgl_ptr; 289 } 290 } 291 292 if (mana_map_skb(skb, apc, &pkg)) { 293 u64_stats_update_begin(&tx_stats->syncp); 294 tx_stats->mana_map_err++; 295 u64_stats_update_end(&tx_stats->syncp); 296 goto free_sgl_ptr; 297 } 298 299 skb_queue_tail(&txq->pending_skbs, skb); 300 301 len = skb->len; 302 net_txq = netdev_get_tx_queue(ndev, txq_idx); 303 304 err = mana_gd_post_work_request(gdma_sq, &pkg.wqe_req, 305 (struct gdma_posted_wqe_info *)skb->cb); 306 if (!mana_can_tx(gdma_sq)) { 307 netif_tx_stop_queue(net_txq); 308 apc->eth_stats.stop_queue++; 309 } 310 311 if (err) { 312 (void)skb_dequeue_tail(&txq->pending_skbs); 313 netdev_warn(ndev, "Failed to post TX OOB: %d\n", err); 314 err = NETDEV_TX_BUSY; 315 goto tx_busy; 316 } 317 318 err = NETDEV_TX_OK; 319 atomic_inc(&txq->pending_sends); 320 321 mana_gd_wq_ring_doorbell(gd->gdma_context, gdma_sq); 322 323 /* skb may be freed after mana_gd_post_work_request. Do not use it. */ 324 skb = NULL; 325 326 tx_stats = &txq->stats; 327 u64_stats_update_begin(&tx_stats->syncp); 328 tx_stats->packets++; 329 tx_stats->bytes += len; 330 u64_stats_update_end(&tx_stats->syncp); 331 332 tx_busy: 333 if (netif_tx_queue_stopped(net_txq) && mana_can_tx(gdma_sq)) { 334 netif_tx_wake_queue(net_txq); 335 apc->eth_stats.wake_queue++; 336 } 337 338 kfree(pkg.sgl_ptr); 339 return err; 340 341 free_sgl_ptr: 342 kfree(pkg.sgl_ptr); 343 tx_drop_count: 344 ndev->stats.tx_dropped++; 345 tx_drop: 346 dev_kfree_skb_any(skb); 347 return NETDEV_TX_OK; 348 } 349 350 static void mana_get_stats64(struct net_device *ndev, 351 struct rtnl_link_stats64 *st) 352 { 353 struct mana_port_context *apc = netdev_priv(ndev); 354 unsigned int num_queues = apc->num_queues; 355 struct mana_stats_rx *rx_stats; 356 struct mana_stats_tx *tx_stats; 357 unsigned int start; 358 u64 packets, bytes; 359 int q; 360 361 if (!apc->port_is_up) 362 return; 363 364 netdev_stats_to_stats64(st, &ndev->stats); 365 366 for (q = 0; q < num_queues; q++) { 367 rx_stats = &apc->rxqs[q]->stats; 368 369 do { 370 start = u64_stats_fetch_begin(&rx_stats->syncp); 371 packets = rx_stats->packets; 372 bytes = rx_stats->bytes; 373 } while (u64_stats_fetch_retry(&rx_stats->syncp, start)); 374 375 st->rx_packets += packets; 376 st->rx_bytes += bytes; 377 } 378 379 for (q = 0; q < num_queues; q++) { 380 tx_stats = &apc->tx_qp[q].txq.stats; 381 382 do { 383 start = u64_stats_fetch_begin(&tx_stats->syncp); 384 packets = tx_stats->packets; 385 bytes = tx_stats->bytes; 386 } while (u64_stats_fetch_retry(&tx_stats->syncp, start)); 387 388 st->tx_packets += packets; 389 st->tx_bytes += bytes; 390 } 391 } 392 393 static int mana_get_tx_queue(struct net_device *ndev, struct sk_buff *skb, 394 int old_q) 395 { 396 struct mana_port_context *apc = netdev_priv(ndev); 397 u32 hash = skb_get_hash(skb); 398 struct sock *sk = skb->sk; 399 int txq; 400 401 txq = apc->indir_table[hash & MANA_INDIRECT_TABLE_MASK]; 402 403 if (txq != old_q && sk && sk_fullsock(sk) && 404 rcu_access_pointer(sk->sk_dst_cache)) 405 sk_tx_queue_set(sk, txq); 406 407 return txq; 408 } 409 410 static u16 mana_select_queue(struct net_device *ndev, struct sk_buff *skb, 411 struct net_device *sb_dev) 412 { 413 int txq; 414 415 if (ndev->real_num_tx_queues == 1) 416 return 0; 417 418 txq = sk_tx_queue_get(skb->sk); 419 420 if (txq < 0 || skb->ooo_okay || txq >= ndev->real_num_tx_queues) { 421 if (skb_rx_queue_recorded(skb)) 422 txq = skb_get_rx_queue(skb); 423 else 424 txq = mana_get_tx_queue(ndev, skb, txq); 425 } 426 427 return txq; 428 } 429 430 static const struct net_device_ops mana_devops = { 431 .ndo_open = mana_open, 432 .ndo_stop = mana_close, 433 .ndo_select_queue = mana_select_queue, 434 .ndo_start_xmit = mana_start_xmit, 435 .ndo_validate_addr = eth_validate_addr, 436 .ndo_get_stats64 = mana_get_stats64, 437 .ndo_bpf = mana_bpf, 438 .ndo_xdp_xmit = mana_xdp_xmit, 439 }; 440 441 static void mana_cleanup_port_context(struct mana_port_context *apc) 442 { 443 kfree(apc->rxqs); 444 apc->rxqs = NULL; 445 } 446 447 static int mana_init_port_context(struct mana_port_context *apc) 448 { 449 apc->rxqs = kcalloc(apc->num_queues, sizeof(struct mana_rxq *), 450 GFP_KERNEL); 451 452 return !apc->rxqs ? -ENOMEM : 0; 453 } 454 455 static int mana_send_request(struct mana_context *ac, void *in_buf, 456 u32 in_len, void *out_buf, u32 out_len) 457 { 458 struct gdma_context *gc = ac->gdma_dev->gdma_context; 459 struct gdma_resp_hdr *resp = out_buf; 460 struct gdma_req_hdr *req = in_buf; 461 struct device *dev = gc->dev; 462 static atomic_t activity_id; 463 int err; 464 465 req->dev_id = gc->mana.dev_id; 466 req->activity_id = atomic_inc_return(&activity_id); 467 468 err = mana_gd_send_request(gc, in_len, in_buf, out_len, 469 out_buf); 470 if (err || resp->status) { 471 dev_err(dev, "Failed to send mana message: %d, 0x%x\n", 472 err, resp->status); 473 return err ? err : -EPROTO; 474 } 475 476 if (req->dev_id.as_uint32 != resp->dev_id.as_uint32 || 477 req->activity_id != resp->activity_id) { 478 dev_err(dev, "Unexpected mana message response: %x,%x,%x,%x\n", 479 req->dev_id.as_uint32, resp->dev_id.as_uint32, 480 req->activity_id, resp->activity_id); 481 return -EPROTO; 482 } 483 484 return 0; 485 } 486 487 static int mana_verify_resp_hdr(const struct gdma_resp_hdr *resp_hdr, 488 const enum mana_command_code expected_code, 489 const u32 min_size) 490 { 491 if (resp_hdr->response.msg_type != expected_code) 492 return -EPROTO; 493 494 if (resp_hdr->response.msg_version < GDMA_MESSAGE_V1) 495 return -EPROTO; 496 497 if (resp_hdr->response.msg_size < min_size) 498 return -EPROTO; 499 500 return 0; 501 } 502 503 static int mana_pf_register_hw_vport(struct mana_port_context *apc) 504 { 505 struct mana_register_hw_vport_resp resp = {}; 506 struct mana_register_hw_vport_req req = {}; 507 int err; 508 509 mana_gd_init_req_hdr(&req.hdr, MANA_REGISTER_HW_PORT, 510 sizeof(req), sizeof(resp)); 511 req.attached_gfid = 1; 512 req.is_pf_default_vport = 1; 513 req.allow_all_ether_types = 1; 514 515 err = mana_send_request(apc->ac, &req, sizeof(req), &resp, 516 sizeof(resp)); 517 if (err) { 518 netdev_err(apc->ndev, "Failed to register hw vPort: %d\n", err); 519 return err; 520 } 521 522 err = mana_verify_resp_hdr(&resp.hdr, MANA_REGISTER_HW_PORT, 523 sizeof(resp)); 524 if (err || resp.hdr.status) { 525 netdev_err(apc->ndev, "Failed to register hw vPort: %d, 0x%x\n", 526 err, resp.hdr.status); 527 return err ? err : -EPROTO; 528 } 529 530 apc->port_handle = resp.hw_vport_handle; 531 return 0; 532 } 533 534 static void mana_pf_deregister_hw_vport(struct mana_port_context *apc) 535 { 536 struct mana_deregister_hw_vport_resp resp = {}; 537 struct mana_deregister_hw_vport_req req = {}; 538 int err; 539 540 mana_gd_init_req_hdr(&req.hdr, MANA_DEREGISTER_HW_PORT, 541 sizeof(req), sizeof(resp)); 542 req.hw_vport_handle = apc->port_handle; 543 544 err = mana_send_request(apc->ac, &req, sizeof(req), &resp, 545 sizeof(resp)); 546 if (err) { 547 netdev_err(apc->ndev, "Failed to unregister hw vPort: %d\n", 548 err); 549 return; 550 } 551 552 err = mana_verify_resp_hdr(&resp.hdr, MANA_DEREGISTER_HW_PORT, 553 sizeof(resp)); 554 if (err || resp.hdr.status) 555 netdev_err(apc->ndev, 556 "Failed to deregister hw vPort: %d, 0x%x\n", 557 err, resp.hdr.status); 558 } 559 560 static int mana_pf_register_filter(struct mana_port_context *apc) 561 { 562 struct mana_register_filter_resp resp = {}; 563 struct mana_register_filter_req req = {}; 564 int err; 565 566 mana_gd_init_req_hdr(&req.hdr, MANA_REGISTER_FILTER, 567 sizeof(req), sizeof(resp)); 568 req.vport = apc->port_handle; 569 memcpy(req.mac_addr, apc->mac_addr, ETH_ALEN); 570 571 err = mana_send_request(apc->ac, &req, sizeof(req), &resp, 572 sizeof(resp)); 573 if (err) { 574 netdev_err(apc->ndev, "Failed to register filter: %d\n", err); 575 return err; 576 } 577 578 err = mana_verify_resp_hdr(&resp.hdr, MANA_REGISTER_FILTER, 579 sizeof(resp)); 580 if (err || resp.hdr.status) { 581 netdev_err(apc->ndev, "Failed to register filter: %d, 0x%x\n", 582 err, resp.hdr.status); 583 return err ? err : -EPROTO; 584 } 585 586 apc->pf_filter_handle = resp.filter_handle; 587 return 0; 588 } 589 590 static void mana_pf_deregister_filter(struct mana_port_context *apc) 591 { 592 struct mana_deregister_filter_resp resp = {}; 593 struct mana_deregister_filter_req req = {}; 594 int err; 595 596 mana_gd_init_req_hdr(&req.hdr, MANA_DEREGISTER_FILTER, 597 sizeof(req), sizeof(resp)); 598 req.filter_handle = apc->pf_filter_handle; 599 600 err = mana_send_request(apc->ac, &req, sizeof(req), &resp, 601 sizeof(resp)); 602 if (err) { 603 netdev_err(apc->ndev, "Failed to unregister filter: %d\n", 604 err); 605 return; 606 } 607 608 err = mana_verify_resp_hdr(&resp.hdr, MANA_DEREGISTER_FILTER, 609 sizeof(resp)); 610 if (err || resp.hdr.status) 611 netdev_err(apc->ndev, 612 "Failed to deregister filter: %d, 0x%x\n", 613 err, resp.hdr.status); 614 } 615 616 static int mana_query_device_cfg(struct mana_context *ac, u32 proto_major_ver, 617 u32 proto_minor_ver, u32 proto_micro_ver, 618 u16 *max_num_vports) 619 { 620 struct gdma_context *gc = ac->gdma_dev->gdma_context; 621 struct mana_query_device_cfg_resp resp = {}; 622 struct mana_query_device_cfg_req req = {}; 623 struct device *dev = gc->dev; 624 int err = 0; 625 626 mana_gd_init_req_hdr(&req.hdr, MANA_QUERY_DEV_CONFIG, 627 sizeof(req), sizeof(resp)); 628 req.proto_major_ver = proto_major_ver; 629 req.proto_minor_ver = proto_minor_ver; 630 req.proto_micro_ver = proto_micro_ver; 631 632 err = mana_send_request(ac, &req, sizeof(req), &resp, sizeof(resp)); 633 if (err) { 634 dev_err(dev, "Failed to query config: %d", err); 635 return err; 636 } 637 638 err = mana_verify_resp_hdr(&resp.hdr, MANA_QUERY_DEV_CONFIG, 639 sizeof(resp)); 640 if (err || resp.hdr.status) { 641 dev_err(dev, "Invalid query result: %d, 0x%x\n", err, 642 resp.hdr.status); 643 if (!err) 644 err = -EPROTO; 645 return err; 646 } 647 648 *max_num_vports = resp.max_num_vports; 649 650 return 0; 651 } 652 653 static int mana_query_vport_cfg(struct mana_port_context *apc, u32 vport_index, 654 u32 *max_sq, u32 *max_rq, u32 *num_indir_entry) 655 { 656 struct mana_query_vport_cfg_resp resp = {}; 657 struct mana_query_vport_cfg_req req = {}; 658 int err; 659 660 mana_gd_init_req_hdr(&req.hdr, MANA_QUERY_VPORT_CONFIG, 661 sizeof(req), sizeof(resp)); 662 663 req.vport_index = vport_index; 664 665 err = mana_send_request(apc->ac, &req, sizeof(req), &resp, 666 sizeof(resp)); 667 if (err) 668 return err; 669 670 err = mana_verify_resp_hdr(&resp.hdr, MANA_QUERY_VPORT_CONFIG, 671 sizeof(resp)); 672 if (err) 673 return err; 674 675 if (resp.hdr.status) 676 return -EPROTO; 677 678 *max_sq = resp.max_num_sq; 679 *max_rq = resp.max_num_rq; 680 *num_indir_entry = resp.num_indirection_ent; 681 682 apc->port_handle = resp.vport; 683 ether_addr_copy(apc->mac_addr, resp.mac_addr); 684 685 return 0; 686 } 687 688 void mana_uncfg_vport(struct mana_port_context *apc) 689 { 690 mutex_lock(&apc->vport_mutex); 691 apc->vport_use_count--; 692 WARN_ON(apc->vport_use_count < 0); 693 mutex_unlock(&apc->vport_mutex); 694 } 695 EXPORT_SYMBOL_NS(mana_uncfg_vport, NET_MANA); 696 697 int mana_cfg_vport(struct mana_port_context *apc, u32 protection_dom_id, 698 u32 doorbell_pg_id) 699 { 700 struct mana_config_vport_resp resp = {}; 701 struct mana_config_vport_req req = {}; 702 int err; 703 704 /* This function is used to program the Ethernet port in the hardware 705 * table. It can be called from the Ethernet driver or the RDMA driver. 706 * 707 * For Ethernet usage, the hardware supports only one active user on a 708 * physical port. The driver checks on the port usage before programming 709 * the hardware when creating the RAW QP (RDMA driver) or exposing the 710 * device to kernel NET layer (Ethernet driver). 711 * 712 * Because the RDMA driver doesn't know in advance which QP type the 713 * user will create, it exposes the device with all its ports. The user 714 * may not be able to create RAW QP on a port if this port is already 715 * in used by the Ethernet driver from the kernel. 716 * 717 * This physical port limitation only applies to the RAW QP. For RC QP, 718 * the hardware doesn't have this limitation. The user can create RC 719 * QPs on a physical port up to the hardware limits independent of the 720 * Ethernet usage on the same port. 721 */ 722 mutex_lock(&apc->vport_mutex); 723 if (apc->vport_use_count > 0) { 724 mutex_unlock(&apc->vport_mutex); 725 return -EBUSY; 726 } 727 apc->vport_use_count++; 728 mutex_unlock(&apc->vport_mutex); 729 730 mana_gd_init_req_hdr(&req.hdr, MANA_CONFIG_VPORT_TX, 731 sizeof(req), sizeof(resp)); 732 req.vport = apc->port_handle; 733 req.pdid = protection_dom_id; 734 req.doorbell_pageid = doorbell_pg_id; 735 736 err = mana_send_request(apc->ac, &req, sizeof(req), &resp, 737 sizeof(resp)); 738 if (err) { 739 netdev_err(apc->ndev, "Failed to configure vPort: %d\n", err); 740 goto out; 741 } 742 743 err = mana_verify_resp_hdr(&resp.hdr, MANA_CONFIG_VPORT_TX, 744 sizeof(resp)); 745 if (err || resp.hdr.status) { 746 netdev_err(apc->ndev, "Failed to configure vPort: %d, 0x%x\n", 747 err, resp.hdr.status); 748 if (!err) 749 err = -EPROTO; 750 751 goto out; 752 } 753 754 apc->tx_shortform_allowed = resp.short_form_allowed; 755 apc->tx_vp_offset = resp.tx_vport_offset; 756 757 netdev_info(apc->ndev, "Configured vPort %llu PD %u DB %u\n", 758 apc->port_handle, protection_dom_id, doorbell_pg_id); 759 out: 760 if (err) 761 mana_uncfg_vport(apc); 762 763 return err; 764 } 765 EXPORT_SYMBOL_NS(mana_cfg_vport, NET_MANA); 766 767 static int mana_cfg_vport_steering(struct mana_port_context *apc, 768 enum TRI_STATE rx, 769 bool update_default_rxobj, bool update_key, 770 bool update_tab) 771 { 772 u16 num_entries = MANA_INDIRECT_TABLE_SIZE; 773 struct mana_cfg_rx_steer_req *req = NULL; 774 struct mana_cfg_rx_steer_resp resp = {}; 775 struct net_device *ndev = apc->ndev; 776 mana_handle_t *req_indir_tab; 777 u32 req_buf_size; 778 int err; 779 780 req_buf_size = sizeof(*req) + sizeof(mana_handle_t) * num_entries; 781 req = kzalloc(req_buf_size, GFP_KERNEL); 782 if (!req) 783 return -ENOMEM; 784 785 mana_gd_init_req_hdr(&req->hdr, MANA_CONFIG_VPORT_RX, req_buf_size, 786 sizeof(resp)); 787 788 req->vport = apc->port_handle; 789 req->num_indir_entries = num_entries; 790 req->indir_tab_offset = sizeof(*req); 791 req->rx_enable = rx; 792 req->rss_enable = apc->rss_state; 793 req->update_default_rxobj = update_default_rxobj; 794 req->update_hashkey = update_key; 795 req->update_indir_tab = update_tab; 796 req->default_rxobj = apc->default_rxobj; 797 798 if (update_key) 799 memcpy(&req->hashkey, apc->hashkey, MANA_HASH_KEY_SIZE); 800 801 if (update_tab) { 802 req_indir_tab = (mana_handle_t *)(req + 1); 803 memcpy(req_indir_tab, apc->rxobj_table, 804 req->num_indir_entries * sizeof(mana_handle_t)); 805 } 806 807 err = mana_send_request(apc->ac, req, req_buf_size, &resp, 808 sizeof(resp)); 809 if (err) { 810 netdev_err(ndev, "Failed to configure vPort RX: %d\n", err); 811 goto out; 812 } 813 814 err = mana_verify_resp_hdr(&resp.hdr, MANA_CONFIG_VPORT_RX, 815 sizeof(resp)); 816 if (err) { 817 netdev_err(ndev, "vPort RX configuration failed: %d\n", err); 818 goto out; 819 } 820 821 if (resp.hdr.status) { 822 netdev_err(ndev, "vPort RX configuration failed: 0x%x\n", 823 resp.hdr.status); 824 err = -EPROTO; 825 } 826 827 netdev_info(ndev, "Configured steering vPort %llu entries %u\n", 828 apc->port_handle, num_entries); 829 out: 830 kfree(req); 831 return err; 832 } 833 834 int mana_create_wq_obj(struct mana_port_context *apc, 835 mana_handle_t vport, 836 u32 wq_type, struct mana_obj_spec *wq_spec, 837 struct mana_obj_spec *cq_spec, 838 mana_handle_t *wq_obj) 839 { 840 struct mana_create_wqobj_resp resp = {}; 841 struct mana_create_wqobj_req req = {}; 842 struct net_device *ndev = apc->ndev; 843 int err; 844 845 mana_gd_init_req_hdr(&req.hdr, MANA_CREATE_WQ_OBJ, 846 sizeof(req), sizeof(resp)); 847 req.vport = vport; 848 req.wq_type = wq_type; 849 req.wq_gdma_region = wq_spec->gdma_region; 850 req.cq_gdma_region = cq_spec->gdma_region; 851 req.wq_size = wq_spec->queue_size; 852 req.cq_size = cq_spec->queue_size; 853 req.cq_moderation_ctx_id = cq_spec->modr_ctx_id; 854 req.cq_parent_qid = cq_spec->attached_eq; 855 856 err = mana_send_request(apc->ac, &req, sizeof(req), &resp, 857 sizeof(resp)); 858 if (err) { 859 netdev_err(ndev, "Failed to create WQ object: %d\n", err); 860 goto out; 861 } 862 863 err = mana_verify_resp_hdr(&resp.hdr, MANA_CREATE_WQ_OBJ, 864 sizeof(resp)); 865 if (err || resp.hdr.status) { 866 netdev_err(ndev, "Failed to create WQ object: %d, 0x%x\n", err, 867 resp.hdr.status); 868 if (!err) 869 err = -EPROTO; 870 goto out; 871 } 872 873 if (resp.wq_obj == INVALID_MANA_HANDLE) { 874 netdev_err(ndev, "Got an invalid WQ object handle\n"); 875 err = -EPROTO; 876 goto out; 877 } 878 879 *wq_obj = resp.wq_obj; 880 wq_spec->queue_index = resp.wq_id; 881 cq_spec->queue_index = resp.cq_id; 882 883 return 0; 884 out: 885 return err; 886 } 887 EXPORT_SYMBOL_NS(mana_create_wq_obj, NET_MANA); 888 889 void mana_destroy_wq_obj(struct mana_port_context *apc, u32 wq_type, 890 mana_handle_t wq_obj) 891 { 892 struct mana_destroy_wqobj_resp resp = {}; 893 struct mana_destroy_wqobj_req req = {}; 894 struct net_device *ndev = apc->ndev; 895 int err; 896 897 mana_gd_init_req_hdr(&req.hdr, MANA_DESTROY_WQ_OBJ, 898 sizeof(req), sizeof(resp)); 899 req.wq_type = wq_type; 900 req.wq_obj_handle = wq_obj; 901 902 err = mana_send_request(apc->ac, &req, sizeof(req), &resp, 903 sizeof(resp)); 904 if (err) { 905 netdev_err(ndev, "Failed to destroy WQ object: %d\n", err); 906 return; 907 } 908 909 err = mana_verify_resp_hdr(&resp.hdr, MANA_DESTROY_WQ_OBJ, 910 sizeof(resp)); 911 if (err || resp.hdr.status) 912 netdev_err(ndev, "Failed to destroy WQ object: %d, 0x%x\n", err, 913 resp.hdr.status); 914 } 915 EXPORT_SYMBOL_NS(mana_destroy_wq_obj, NET_MANA); 916 917 static void mana_destroy_eq(struct mana_context *ac) 918 { 919 struct gdma_context *gc = ac->gdma_dev->gdma_context; 920 struct gdma_queue *eq; 921 int i; 922 923 if (!ac->eqs) 924 return; 925 926 for (i = 0; i < gc->max_num_queues; i++) { 927 eq = ac->eqs[i].eq; 928 if (!eq) 929 continue; 930 931 mana_gd_destroy_queue(gc, eq); 932 } 933 934 kfree(ac->eqs); 935 ac->eqs = NULL; 936 } 937 938 static int mana_create_eq(struct mana_context *ac) 939 { 940 struct gdma_dev *gd = ac->gdma_dev; 941 struct gdma_context *gc = gd->gdma_context; 942 struct gdma_queue_spec spec = {}; 943 int err; 944 int i; 945 946 ac->eqs = kcalloc(gc->max_num_queues, sizeof(struct mana_eq), 947 GFP_KERNEL); 948 if (!ac->eqs) 949 return -ENOMEM; 950 951 spec.type = GDMA_EQ; 952 spec.monitor_avl_buf = false; 953 spec.queue_size = EQ_SIZE; 954 spec.eq.callback = NULL; 955 spec.eq.context = ac->eqs; 956 spec.eq.log2_throttle_limit = LOG2_EQ_THROTTLE; 957 958 for (i = 0; i < gc->max_num_queues; i++) { 959 err = mana_gd_create_mana_eq(gd, &spec, &ac->eqs[i].eq); 960 if (err) 961 goto out; 962 } 963 964 return 0; 965 out: 966 mana_destroy_eq(ac); 967 return err; 968 } 969 970 static int mana_fence_rq(struct mana_port_context *apc, struct mana_rxq *rxq) 971 { 972 struct mana_fence_rq_resp resp = {}; 973 struct mana_fence_rq_req req = {}; 974 int err; 975 976 init_completion(&rxq->fence_event); 977 978 mana_gd_init_req_hdr(&req.hdr, MANA_FENCE_RQ, 979 sizeof(req), sizeof(resp)); 980 req.wq_obj_handle = rxq->rxobj; 981 982 err = mana_send_request(apc->ac, &req, sizeof(req), &resp, 983 sizeof(resp)); 984 if (err) { 985 netdev_err(apc->ndev, "Failed to fence RQ %u: %d\n", 986 rxq->rxq_idx, err); 987 return err; 988 } 989 990 err = mana_verify_resp_hdr(&resp.hdr, MANA_FENCE_RQ, sizeof(resp)); 991 if (err || resp.hdr.status) { 992 netdev_err(apc->ndev, "Failed to fence RQ %u: %d, 0x%x\n", 993 rxq->rxq_idx, err, resp.hdr.status); 994 if (!err) 995 err = -EPROTO; 996 997 return err; 998 } 999 1000 if (wait_for_completion_timeout(&rxq->fence_event, 10 * HZ) == 0) { 1001 netdev_err(apc->ndev, "Failed to fence RQ %u: timed out\n", 1002 rxq->rxq_idx); 1003 return -ETIMEDOUT; 1004 } 1005 1006 return 0; 1007 } 1008 1009 static void mana_fence_rqs(struct mana_port_context *apc) 1010 { 1011 unsigned int rxq_idx; 1012 struct mana_rxq *rxq; 1013 int err; 1014 1015 for (rxq_idx = 0; rxq_idx < apc->num_queues; rxq_idx++) { 1016 rxq = apc->rxqs[rxq_idx]; 1017 err = mana_fence_rq(apc, rxq); 1018 1019 /* In case of any error, use sleep instead. */ 1020 if (err) 1021 msleep(100); 1022 } 1023 } 1024 1025 static int mana_move_wq_tail(struct gdma_queue *wq, u32 num_units) 1026 { 1027 u32 used_space_old; 1028 u32 used_space_new; 1029 1030 used_space_old = wq->head - wq->tail; 1031 used_space_new = wq->head - (wq->tail + num_units); 1032 1033 if (WARN_ON_ONCE(used_space_new > used_space_old)) 1034 return -ERANGE; 1035 1036 wq->tail += num_units; 1037 return 0; 1038 } 1039 1040 static void mana_unmap_skb(struct sk_buff *skb, struct mana_port_context *apc) 1041 { 1042 struct mana_skb_head *ash = (struct mana_skb_head *)skb->head; 1043 struct gdma_context *gc = apc->ac->gdma_dev->gdma_context; 1044 struct device *dev = gc->dev; 1045 int i; 1046 1047 dma_unmap_single(dev, ash->dma_handle[0], ash->size[0], DMA_TO_DEVICE); 1048 1049 for (i = 1; i < skb_shinfo(skb)->nr_frags + 1; i++) 1050 dma_unmap_page(dev, ash->dma_handle[i], ash->size[i], 1051 DMA_TO_DEVICE); 1052 } 1053 1054 static void mana_poll_tx_cq(struct mana_cq *cq) 1055 { 1056 struct gdma_comp *completions = cq->gdma_comp_buf; 1057 struct gdma_posted_wqe_info *wqe_info; 1058 unsigned int pkt_transmitted = 0; 1059 unsigned int wqe_unit_cnt = 0; 1060 struct mana_txq *txq = cq->txq; 1061 struct mana_port_context *apc; 1062 struct netdev_queue *net_txq; 1063 struct gdma_queue *gdma_wq; 1064 unsigned int avail_space; 1065 struct net_device *ndev; 1066 struct sk_buff *skb; 1067 bool txq_stopped; 1068 int comp_read; 1069 int i; 1070 1071 ndev = txq->ndev; 1072 apc = netdev_priv(ndev); 1073 1074 comp_read = mana_gd_poll_cq(cq->gdma_cq, completions, 1075 CQE_POLLING_BUFFER); 1076 1077 if (comp_read < 1) 1078 return; 1079 1080 apc->eth_stats.tx_cqes = comp_read; 1081 1082 for (i = 0; i < comp_read; i++) { 1083 struct mana_tx_comp_oob *cqe_oob; 1084 1085 if (WARN_ON_ONCE(!completions[i].is_sq)) 1086 return; 1087 1088 cqe_oob = (struct mana_tx_comp_oob *)completions[i].cqe_data; 1089 if (WARN_ON_ONCE(cqe_oob->cqe_hdr.client_type != 1090 MANA_CQE_COMPLETION)) 1091 return; 1092 1093 switch (cqe_oob->cqe_hdr.cqe_type) { 1094 case CQE_TX_OKAY: 1095 break; 1096 1097 case CQE_TX_SA_DROP: 1098 case CQE_TX_MTU_DROP: 1099 case CQE_TX_INVALID_OOB: 1100 case CQE_TX_INVALID_ETH_TYPE: 1101 case CQE_TX_HDR_PROCESSING_ERROR: 1102 case CQE_TX_VF_DISABLED: 1103 case CQE_TX_VPORT_IDX_OUT_OF_RANGE: 1104 case CQE_TX_VPORT_DISABLED: 1105 case CQE_TX_VLAN_TAGGING_VIOLATION: 1106 WARN_ONCE(1, "TX: CQE error %d: ignored.\n", 1107 cqe_oob->cqe_hdr.cqe_type); 1108 apc->eth_stats.tx_cqe_err++; 1109 break; 1110 1111 default: 1112 /* If the CQE type is unexpected, log an error, assert, 1113 * and go through the error path. 1114 */ 1115 WARN_ONCE(1, "TX: Unexpected CQE type %d: HW BUG?\n", 1116 cqe_oob->cqe_hdr.cqe_type); 1117 apc->eth_stats.tx_cqe_unknown_type++; 1118 return; 1119 } 1120 1121 if (WARN_ON_ONCE(txq->gdma_txq_id != completions[i].wq_num)) 1122 return; 1123 1124 skb = skb_dequeue(&txq->pending_skbs); 1125 if (WARN_ON_ONCE(!skb)) 1126 return; 1127 1128 wqe_info = (struct gdma_posted_wqe_info *)skb->cb; 1129 wqe_unit_cnt += wqe_info->wqe_size_in_bu; 1130 1131 mana_unmap_skb(skb, apc); 1132 1133 napi_consume_skb(skb, cq->budget); 1134 1135 pkt_transmitted++; 1136 } 1137 1138 if (WARN_ON_ONCE(wqe_unit_cnt == 0)) 1139 return; 1140 1141 mana_move_wq_tail(txq->gdma_sq, wqe_unit_cnt); 1142 1143 gdma_wq = txq->gdma_sq; 1144 avail_space = mana_gd_wq_avail_space(gdma_wq); 1145 1146 /* Ensure tail updated before checking q stop */ 1147 smp_mb(); 1148 1149 net_txq = txq->net_txq; 1150 txq_stopped = netif_tx_queue_stopped(net_txq); 1151 1152 /* Ensure checking txq_stopped before apc->port_is_up. */ 1153 smp_rmb(); 1154 1155 if (txq_stopped && apc->port_is_up && avail_space >= MAX_TX_WQE_SIZE) { 1156 netif_tx_wake_queue(net_txq); 1157 apc->eth_stats.wake_queue++; 1158 } 1159 1160 if (atomic_sub_return(pkt_transmitted, &txq->pending_sends) < 0) 1161 WARN_ON_ONCE(1); 1162 1163 cq->work_done = pkt_transmitted; 1164 1165 apc->eth_stats.tx_cqes -= pkt_transmitted; 1166 } 1167 1168 static void mana_post_pkt_rxq(struct mana_rxq *rxq) 1169 { 1170 struct mana_recv_buf_oob *recv_buf_oob; 1171 u32 curr_index; 1172 int err; 1173 1174 curr_index = rxq->buf_index++; 1175 if (rxq->buf_index == rxq->num_rx_buf) 1176 rxq->buf_index = 0; 1177 1178 recv_buf_oob = &rxq->rx_oobs[curr_index]; 1179 1180 err = mana_gd_post_and_ring(rxq->gdma_rq, &recv_buf_oob->wqe_req, 1181 &recv_buf_oob->wqe_inf); 1182 if (WARN_ON_ONCE(err)) 1183 return; 1184 1185 WARN_ON_ONCE(recv_buf_oob->wqe_inf.wqe_size_in_bu != 1); 1186 } 1187 1188 static struct sk_buff *mana_build_skb(void *buf_va, uint pkt_len, 1189 struct xdp_buff *xdp) 1190 { 1191 struct sk_buff *skb = build_skb(buf_va, PAGE_SIZE); 1192 1193 if (!skb) 1194 return NULL; 1195 1196 if (xdp->data_hard_start) { 1197 skb_reserve(skb, xdp->data - xdp->data_hard_start); 1198 skb_put(skb, xdp->data_end - xdp->data); 1199 } else { 1200 skb_reserve(skb, XDP_PACKET_HEADROOM); 1201 skb_put(skb, pkt_len); 1202 } 1203 1204 return skb; 1205 } 1206 1207 static void mana_rx_skb(void *buf_va, struct mana_rxcomp_oob *cqe, 1208 struct mana_rxq *rxq) 1209 { 1210 struct mana_stats_rx *rx_stats = &rxq->stats; 1211 struct net_device *ndev = rxq->ndev; 1212 uint pkt_len = cqe->ppi[0].pkt_len; 1213 u16 rxq_idx = rxq->rxq_idx; 1214 struct napi_struct *napi; 1215 struct xdp_buff xdp = {}; 1216 struct sk_buff *skb; 1217 u32 hash_value; 1218 u32 act; 1219 1220 rxq->rx_cq.work_done++; 1221 napi = &rxq->rx_cq.napi; 1222 1223 if (!buf_va) { 1224 ++ndev->stats.rx_dropped; 1225 return; 1226 } 1227 1228 act = mana_run_xdp(ndev, rxq, &xdp, buf_va, pkt_len); 1229 1230 if (act == XDP_REDIRECT && !rxq->xdp_rc) 1231 return; 1232 1233 if (act != XDP_PASS && act != XDP_TX) 1234 goto drop_xdp; 1235 1236 skb = mana_build_skb(buf_va, pkt_len, &xdp); 1237 1238 if (!skb) 1239 goto drop; 1240 1241 skb->dev = napi->dev; 1242 1243 skb->protocol = eth_type_trans(skb, ndev); 1244 skb_checksum_none_assert(skb); 1245 skb_record_rx_queue(skb, rxq_idx); 1246 1247 if ((ndev->features & NETIF_F_RXCSUM) && cqe->rx_iphdr_csum_succeed) { 1248 if (cqe->rx_tcp_csum_succeed || cqe->rx_udp_csum_succeed) 1249 skb->ip_summed = CHECKSUM_UNNECESSARY; 1250 } 1251 1252 if (cqe->rx_hashtype != 0 && (ndev->features & NETIF_F_RXHASH)) { 1253 hash_value = cqe->ppi[0].pkt_hash; 1254 1255 if (cqe->rx_hashtype & MANA_HASH_L4) 1256 skb_set_hash(skb, hash_value, PKT_HASH_TYPE_L4); 1257 else 1258 skb_set_hash(skb, hash_value, PKT_HASH_TYPE_L3); 1259 } 1260 1261 u64_stats_update_begin(&rx_stats->syncp); 1262 rx_stats->packets++; 1263 rx_stats->bytes += pkt_len; 1264 1265 if (act == XDP_TX) 1266 rx_stats->xdp_tx++; 1267 u64_stats_update_end(&rx_stats->syncp); 1268 1269 if (act == XDP_TX) { 1270 skb_set_queue_mapping(skb, rxq_idx); 1271 mana_xdp_tx(skb, ndev); 1272 return; 1273 } 1274 1275 napi_gro_receive(napi, skb); 1276 1277 return; 1278 1279 drop_xdp: 1280 u64_stats_update_begin(&rx_stats->syncp); 1281 rx_stats->xdp_drop++; 1282 u64_stats_update_end(&rx_stats->syncp); 1283 1284 drop: 1285 WARN_ON_ONCE(rxq->xdp_save_page); 1286 rxq->xdp_save_page = virt_to_page(buf_va); 1287 1288 ++ndev->stats.rx_dropped; 1289 1290 return; 1291 } 1292 1293 static void mana_process_rx_cqe(struct mana_rxq *rxq, struct mana_cq *cq, 1294 struct gdma_comp *cqe) 1295 { 1296 struct mana_rxcomp_oob *oob = (struct mana_rxcomp_oob *)cqe->cqe_data; 1297 struct gdma_context *gc = rxq->gdma_rq->gdma_dev->gdma_context; 1298 struct net_device *ndev = rxq->ndev; 1299 struct mana_recv_buf_oob *rxbuf_oob; 1300 struct mana_port_context *apc; 1301 struct device *dev = gc->dev; 1302 void *new_buf, *old_buf; 1303 struct page *new_page; 1304 u32 curr, pktlen; 1305 dma_addr_t da; 1306 1307 apc = netdev_priv(ndev); 1308 1309 switch (oob->cqe_hdr.cqe_type) { 1310 case CQE_RX_OKAY: 1311 break; 1312 1313 case CQE_RX_TRUNCATED: 1314 ++ndev->stats.rx_dropped; 1315 rxbuf_oob = &rxq->rx_oobs[rxq->buf_index]; 1316 netdev_warn_once(ndev, "Dropped a truncated packet\n"); 1317 goto drop; 1318 1319 case CQE_RX_COALESCED_4: 1320 netdev_err(ndev, "RX coalescing is unsupported\n"); 1321 apc->eth_stats.rx_coalesced_err++; 1322 return; 1323 1324 case CQE_RX_OBJECT_FENCE: 1325 complete(&rxq->fence_event); 1326 return; 1327 1328 default: 1329 netdev_err(ndev, "Unknown RX CQE type = %d\n", 1330 oob->cqe_hdr.cqe_type); 1331 apc->eth_stats.rx_cqe_unknown_type++; 1332 return; 1333 } 1334 1335 pktlen = oob->ppi[0].pkt_len; 1336 1337 if (pktlen == 0) { 1338 /* data packets should never have packetlength of zero */ 1339 netdev_err(ndev, "RX pkt len=0, rq=%u, cq=%u, rxobj=0x%llx\n", 1340 rxq->gdma_id, cq->gdma_id, rxq->rxobj); 1341 return; 1342 } 1343 1344 curr = rxq->buf_index; 1345 rxbuf_oob = &rxq->rx_oobs[curr]; 1346 WARN_ON_ONCE(rxbuf_oob->wqe_inf.wqe_size_in_bu != 1); 1347 1348 /* Reuse XDP dropped page if available */ 1349 if (rxq->xdp_save_page) { 1350 new_page = rxq->xdp_save_page; 1351 rxq->xdp_save_page = NULL; 1352 } else { 1353 new_page = alloc_page(GFP_ATOMIC); 1354 } 1355 1356 if (new_page) { 1357 da = dma_map_page(dev, new_page, XDP_PACKET_HEADROOM, rxq->datasize, 1358 DMA_FROM_DEVICE); 1359 1360 if (dma_mapping_error(dev, da)) { 1361 __free_page(new_page); 1362 new_page = NULL; 1363 } 1364 } 1365 1366 new_buf = new_page ? page_to_virt(new_page) : NULL; 1367 1368 if (new_buf) { 1369 dma_unmap_page(dev, rxbuf_oob->buf_dma_addr, rxq->datasize, 1370 DMA_FROM_DEVICE); 1371 1372 old_buf = rxbuf_oob->buf_va; 1373 1374 /* refresh the rxbuf_oob with the new page */ 1375 rxbuf_oob->buf_va = new_buf; 1376 rxbuf_oob->buf_dma_addr = da; 1377 rxbuf_oob->sgl[0].address = rxbuf_oob->buf_dma_addr; 1378 } else { 1379 old_buf = NULL; /* drop the packet if no memory */ 1380 } 1381 1382 mana_rx_skb(old_buf, oob, rxq); 1383 1384 drop: 1385 mana_move_wq_tail(rxq->gdma_rq, rxbuf_oob->wqe_inf.wqe_size_in_bu); 1386 1387 mana_post_pkt_rxq(rxq); 1388 } 1389 1390 static void mana_poll_rx_cq(struct mana_cq *cq) 1391 { 1392 struct gdma_comp *comp = cq->gdma_comp_buf; 1393 struct mana_rxq *rxq = cq->rxq; 1394 struct mana_port_context *apc; 1395 int comp_read, i; 1396 1397 apc = netdev_priv(rxq->ndev); 1398 1399 comp_read = mana_gd_poll_cq(cq->gdma_cq, comp, CQE_POLLING_BUFFER); 1400 WARN_ON_ONCE(comp_read > CQE_POLLING_BUFFER); 1401 1402 apc->eth_stats.rx_cqes = comp_read; 1403 rxq->xdp_flush = false; 1404 1405 for (i = 0; i < comp_read; i++) { 1406 if (WARN_ON_ONCE(comp[i].is_sq)) 1407 return; 1408 1409 /* verify recv cqe references the right rxq */ 1410 if (WARN_ON_ONCE(comp[i].wq_num != cq->rxq->gdma_id)) 1411 return; 1412 1413 mana_process_rx_cqe(rxq, cq, &comp[i]); 1414 1415 apc->eth_stats.rx_cqes--; 1416 } 1417 1418 if (rxq->xdp_flush) 1419 xdp_do_flush(); 1420 } 1421 1422 static int mana_cq_handler(void *context, struct gdma_queue *gdma_queue) 1423 { 1424 struct mana_cq *cq = context; 1425 u8 arm_bit; 1426 int w; 1427 1428 WARN_ON_ONCE(cq->gdma_cq != gdma_queue); 1429 1430 if (cq->type == MANA_CQ_TYPE_RX) 1431 mana_poll_rx_cq(cq); 1432 else 1433 mana_poll_tx_cq(cq); 1434 1435 w = cq->work_done; 1436 1437 if (w < cq->budget && 1438 napi_complete_done(&cq->napi, w)) { 1439 arm_bit = SET_ARM_BIT; 1440 } else { 1441 arm_bit = 0; 1442 } 1443 1444 mana_gd_ring_cq(gdma_queue, arm_bit); 1445 1446 return w; 1447 } 1448 1449 static int mana_poll(struct napi_struct *napi, int budget) 1450 { 1451 struct mana_cq *cq = container_of(napi, struct mana_cq, napi); 1452 int w; 1453 1454 cq->work_done = 0; 1455 cq->budget = budget; 1456 1457 w = mana_cq_handler(cq, cq->gdma_cq); 1458 1459 return min(w, budget); 1460 } 1461 1462 static void mana_schedule_napi(void *context, struct gdma_queue *gdma_queue) 1463 { 1464 struct mana_cq *cq = context; 1465 1466 napi_schedule_irqoff(&cq->napi); 1467 } 1468 1469 static void mana_deinit_cq(struct mana_port_context *apc, struct mana_cq *cq) 1470 { 1471 struct gdma_dev *gd = apc->ac->gdma_dev; 1472 1473 if (!cq->gdma_cq) 1474 return; 1475 1476 mana_gd_destroy_queue(gd->gdma_context, cq->gdma_cq); 1477 } 1478 1479 static void mana_deinit_txq(struct mana_port_context *apc, struct mana_txq *txq) 1480 { 1481 struct gdma_dev *gd = apc->ac->gdma_dev; 1482 1483 if (!txq->gdma_sq) 1484 return; 1485 1486 mana_gd_destroy_queue(gd->gdma_context, txq->gdma_sq); 1487 } 1488 1489 static void mana_destroy_txq(struct mana_port_context *apc) 1490 { 1491 struct napi_struct *napi; 1492 int i; 1493 1494 if (!apc->tx_qp) 1495 return; 1496 1497 for (i = 0; i < apc->num_queues; i++) { 1498 napi = &apc->tx_qp[i].tx_cq.napi; 1499 napi_synchronize(napi); 1500 napi_disable(napi); 1501 netif_napi_del(napi); 1502 1503 mana_destroy_wq_obj(apc, GDMA_SQ, apc->tx_qp[i].tx_object); 1504 1505 mana_deinit_cq(apc, &apc->tx_qp[i].tx_cq); 1506 1507 mana_deinit_txq(apc, &apc->tx_qp[i].txq); 1508 } 1509 1510 kfree(apc->tx_qp); 1511 apc->tx_qp = NULL; 1512 } 1513 1514 static int mana_create_txq(struct mana_port_context *apc, 1515 struct net_device *net) 1516 { 1517 struct mana_context *ac = apc->ac; 1518 struct gdma_dev *gd = ac->gdma_dev; 1519 struct mana_obj_spec wq_spec; 1520 struct mana_obj_spec cq_spec; 1521 struct gdma_queue_spec spec; 1522 struct gdma_context *gc; 1523 struct mana_txq *txq; 1524 struct mana_cq *cq; 1525 u32 txq_size; 1526 u32 cq_size; 1527 int err; 1528 int i; 1529 1530 apc->tx_qp = kcalloc(apc->num_queues, sizeof(struct mana_tx_qp), 1531 GFP_KERNEL); 1532 if (!apc->tx_qp) 1533 return -ENOMEM; 1534 1535 /* The minimum size of the WQE is 32 bytes, hence 1536 * MAX_SEND_BUFFERS_PER_QUEUE represents the maximum number of WQEs 1537 * the SQ can store. This value is then used to size other queues 1538 * to prevent overflow. 1539 */ 1540 txq_size = MAX_SEND_BUFFERS_PER_QUEUE * 32; 1541 BUILD_BUG_ON(!PAGE_ALIGNED(txq_size)); 1542 1543 cq_size = MAX_SEND_BUFFERS_PER_QUEUE * COMP_ENTRY_SIZE; 1544 cq_size = PAGE_ALIGN(cq_size); 1545 1546 gc = gd->gdma_context; 1547 1548 for (i = 0; i < apc->num_queues; i++) { 1549 apc->tx_qp[i].tx_object = INVALID_MANA_HANDLE; 1550 1551 /* Create SQ */ 1552 txq = &apc->tx_qp[i].txq; 1553 1554 u64_stats_init(&txq->stats.syncp); 1555 txq->ndev = net; 1556 txq->net_txq = netdev_get_tx_queue(net, i); 1557 txq->vp_offset = apc->tx_vp_offset; 1558 skb_queue_head_init(&txq->pending_skbs); 1559 1560 memset(&spec, 0, sizeof(spec)); 1561 spec.type = GDMA_SQ; 1562 spec.monitor_avl_buf = true; 1563 spec.queue_size = txq_size; 1564 err = mana_gd_create_mana_wq_cq(gd, &spec, &txq->gdma_sq); 1565 if (err) 1566 goto out; 1567 1568 /* Create SQ's CQ */ 1569 cq = &apc->tx_qp[i].tx_cq; 1570 cq->type = MANA_CQ_TYPE_TX; 1571 1572 cq->txq = txq; 1573 1574 memset(&spec, 0, sizeof(spec)); 1575 spec.type = GDMA_CQ; 1576 spec.monitor_avl_buf = false; 1577 spec.queue_size = cq_size; 1578 spec.cq.callback = mana_schedule_napi; 1579 spec.cq.parent_eq = ac->eqs[i].eq; 1580 spec.cq.context = cq; 1581 err = mana_gd_create_mana_wq_cq(gd, &spec, &cq->gdma_cq); 1582 if (err) 1583 goto out; 1584 1585 memset(&wq_spec, 0, sizeof(wq_spec)); 1586 memset(&cq_spec, 0, sizeof(cq_spec)); 1587 1588 wq_spec.gdma_region = txq->gdma_sq->mem_info.dma_region_handle; 1589 wq_spec.queue_size = txq->gdma_sq->queue_size; 1590 1591 cq_spec.gdma_region = cq->gdma_cq->mem_info.dma_region_handle; 1592 cq_spec.queue_size = cq->gdma_cq->queue_size; 1593 cq_spec.modr_ctx_id = 0; 1594 cq_spec.attached_eq = cq->gdma_cq->cq.parent->id; 1595 1596 err = mana_create_wq_obj(apc, apc->port_handle, GDMA_SQ, 1597 &wq_spec, &cq_spec, 1598 &apc->tx_qp[i].tx_object); 1599 1600 if (err) 1601 goto out; 1602 1603 txq->gdma_sq->id = wq_spec.queue_index; 1604 cq->gdma_cq->id = cq_spec.queue_index; 1605 1606 txq->gdma_sq->mem_info.dma_region_handle = 1607 GDMA_INVALID_DMA_REGION; 1608 cq->gdma_cq->mem_info.dma_region_handle = 1609 GDMA_INVALID_DMA_REGION; 1610 1611 txq->gdma_txq_id = txq->gdma_sq->id; 1612 1613 cq->gdma_id = cq->gdma_cq->id; 1614 1615 if (WARN_ON(cq->gdma_id >= gc->max_num_cqs)) { 1616 err = -EINVAL; 1617 goto out; 1618 } 1619 1620 gc->cq_table[cq->gdma_id] = cq->gdma_cq; 1621 1622 netif_napi_add_tx(net, &cq->napi, mana_poll); 1623 napi_enable(&cq->napi); 1624 1625 mana_gd_ring_cq(cq->gdma_cq, SET_ARM_BIT); 1626 } 1627 1628 return 0; 1629 out: 1630 mana_destroy_txq(apc); 1631 return err; 1632 } 1633 1634 static void mana_destroy_rxq(struct mana_port_context *apc, 1635 struct mana_rxq *rxq, bool validate_state) 1636 1637 { 1638 struct gdma_context *gc = apc->ac->gdma_dev->gdma_context; 1639 struct mana_recv_buf_oob *rx_oob; 1640 struct device *dev = gc->dev; 1641 struct napi_struct *napi; 1642 int i; 1643 1644 if (!rxq) 1645 return; 1646 1647 napi = &rxq->rx_cq.napi; 1648 1649 if (validate_state) 1650 napi_synchronize(napi); 1651 1652 napi_disable(napi); 1653 1654 xdp_rxq_info_unreg(&rxq->xdp_rxq); 1655 1656 netif_napi_del(napi); 1657 1658 mana_destroy_wq_obj(apc, GDMA_RQ, rxq->rxobj); 1659 1660 mana_deinit_cq(apc, &rxq->rx_cq); 1661 1662 if (rxq->xdp_save_page) 1663 __free_page(rxq->xdp_save_page); 1664 1665 for (i = 0; i < rxq->num_rx_buf; i++) { 1666 rx_oob = &rxq->rx_oobs[i]; 1667 1668 if (!rx_oob->buf_va) 1669 continue; 1670 1671 dma_unmap_page(dev, rx_oob->buf_dma_addr, rxq->datasize, 1672 DMA_FROM_DEVICE); 1673 1674 free_page((unsigned long)rx_oob->buf_va); 1675 rx_oob->buf_va = NULL; 1676 } 1677 1678 if (rxq->gdma_rq) 1679 mana_gd_destroy_queue(gc, rxq->gdma_rq); 1680 1681 kfree(rxq); 1682 } 1683 1684 #define MANA_WQE_HEADER_SIZE 16 1685 #define MANA_WQE_SGE_SIZE 16 1686 1687 static int mana_alloc_rx_wqe(struct mana_port_context *apc, 1688 struct mana_rxq *rxq, u32 *rxq_size, u32 *cq_size) 1689 { 1690 struct gdma_context *gc = apc->ac->gdma_dev->gdma_context; 1691 struct mana_recv_buf_oob *rx_oob; 1692 struct device *dev = gc->dev; 1693 struct page *page; 1694 dma_addr_t da; 1695 u32 buf_idx; 1696 1697 WARN_ON(rxq->datasize == 0 || rxq->datasize > PAGE_SIZE); 1698 1699 *rxq_size = 0; 1700 *cq_size = 0; 1701 1702 for (buf_idx = 0; buf_idx < rxq->num_rx_buf; buf_idx++) { 1703 rx_oob = &rxq->rx_oobs[buf_idx]; 1704 memset(rx_oob, 0, sizeof(*rx_oob)); 1705 1706 page = alloc_page(GFP_KERNEL); 1707 if (!page) 1708 return -ENOMEM; 1709 1710 da = dma_map_page(dev, page, XDP_PACKET_HEADROOM, rxq->datasize, 1711 DMA_FROM_DEVICE); 1712 1713 if (dma_mapping_error(dev, da)) { 1714 __free_page(page); 1715 return -ENOMEM; 1716 } 1717 1718 rx_oob->buf_va = page_to_virt(page); 1719 rx_oob->buf_dma_addr = da; 1720 1721 rx_oob->num_sge = 1; 1722 rx_oob->sgl[0].address = rx_oob->buf_dma_addr; 1723 rx_oob->sgl[0].size = rxq->datasize; 1724 rx_oob->sgl[0].mem_key = apc->ac->gdma_dev->gpa_mkey; 1725 1726 rx_oob->wqe_req.sgl = rx_oob->sgl; 1727 rx_oob->wqe_req.num_sge = rx_oob->num_sge; 1728 rx_oob->wqe_req.inline_oob_size = 0; 1729 rx_oob->wqe_req.inline_oob_data = NULL; 1730 rx_oob->wqe_req.flags = 0; 1731 rx_oob->wqe_req.client_data_unit = 0; 1732 1733 *rxq_size += ALIGN(MANA_WQE_HEADER_SIZE + 1734 MANA_WQE_SGE_SIZE * rx_oob->num_sge, 32); 1735 *cq_size += COMP_ENTRY_SIZE; 1736 } 1737 1738 return 0; 1739 } 1740 1741 static int mana_push_wqe(struct mana_rxq *rxq) 1742 { 1743 struct mana_recv_buf_oob *rx_oob; 1744 u32 buf_idx; 1745 int err; 1746 1747 for (buf_idx = 0; buf_idx < rxq->num_rx_buf; buf_idx++) { 1748 rx_oob = &rxq->rx_oobs[buf_idx]; 1749 1750 err = mana_gd_post_and_ring(rxq->gdma_rq, &rx_oob->wqe_req, 1751 &rx_oob->wqe_inf); 1752 if (err) 1753 return -ENOSPC; 1754 } 1755 1756 return 0; 1757 } 1758 1759 static struct mana_rxq *mana_create_rxq(struct mana_port_context *apc, 1760 u32 rxq_idx, struct mana_eq *eq, 1761 struct net_device *ndev) 1762 { 1763 struct gdma_dev *gd = apc->ac->gdma_dev; 1764 struct mana_obj_spec wq_spec; 1765 struct mana_obj_spec cq_spec; 1766 struct gdma_queue_spec spec; 1767 struct mana_cq *cq = NULL; 1768 struct gdma_context *gc; 1769 u32 cq_size, rq_size; 1770 struct mana_rxq *rxq; 1771 int err; 1772 1773 gc = gd->gdma_context; 1774 1775 rxq = kzalloc(struct_size(rxq, rx_oobs, RX_BUFFERS_PER_QUEUE), 1776 GFP_KERNEL); 1777 if (!rxq) 1778 return NULL; 1779 1780 rxq->ndev = ndev; 1781 rxq->num_rx_buf = RX_BUFFERS_PER_QUEUE; 1782 rxq->rxq_idx = rxq_idx; 1783 rxq->datasize = ALIGN(MAX_FRAME_SIZE, 64); 1784 rxq->rxobj = INVALID_MANA_HANDLE; 1785 1786 err = mana_alloc_rx_wqe(apc, rxq, &rq_size, &cq_size); 1787 if (err) 1788 goto out; 1789 1790 rq_size = PAGE_ALIGN(rq_size); 1791 cq_size = PAGE_ALIGN(cq_size); 1792 1793 /* Create RQ */ 1794 memset(&spec, 0, sizeof(spec)); 1795 spec.type = GDMA_RQ; 1796 spec.monitor_avl_buf = true; 1797 spec.queue_size = rq_size; 1798 err = mana_gd_create_mana_wq_cq(gd, &spec, &rxq->gdma_rq); 1799 if (err) 1800 goto out; 1801 1802 /* Create RQ's CQ */ 1803 cq = &rxq->rx_cq; 1804 cq->type = MANA_CQ_TYPE_RX; 1805 cq->rxq = rxq; 1806 1807 memset(&spec, 0, sizeof(spec)); 1808 spec.type = GDMA_CQ; 1809 spec.monitor_avl_buf = false; 1810 spec.queue_size = cq_size; 1811 spec.cq.callback = mana_schedule_napi; 1812 spec.cq.parent_eq = eq->eq; 1813 spec.cq.context = cq; 1814 err = mana_gd_create_mana_wq_cq(gd, &spec, &cq->gdma_cq); 1815 if (err) 1816 goto out; 1817 1818 memset(&wq_spec, 0, sizeof(wq_spec)); 1819 memset(&cq_spec, 0, sizeof(cq_spec)); 1820 wq_spec.gdma_region = rxq->gdma_rq->mem_info.dma_region_handle; 1821 wq_spec.queue_size = rxq->gdma_rq->queue_size; 1822 1823 cq_spec.gdma_region = cq->gdma_cq->mem_info.dma_region_handle; 1824 cq_spec.queue_size = cq->gdma_cq->queue_size; 1825 cq_spec.modr_ctx_id = 0; 1826 cq_spec.attached_eq = cq->gdma_cq->cq.parent->id; 1827 1828 err = mana_create_wq_obj(apc, apc->port_handle, GDMA_RQ, 1829 &wq_spec, &cq_spec, &rxq->rxobj); 1830 if (err) 1831 goto out; 1832 1833 rxq->gdma_rq->id = wq_spec.queue_index; 1834 cq->gdma_cq->id = cq_spec.queue_index; 1835 1836 rxq->gdma_rq->mem_info.dma_region_handle = GDMA_INVALID_DMA_REGION; 1837 cq->gdma_cq->mem_info.dma_region_handle = GDMA_INVALID_DMA_REGION; 1838 1839 rxq->gdma_id = rxq->gdma_rq->id; 1840 cq->gdma_id = cq->gdma_cq->id; 1841 1842 err = mana_push_wqe(rxq); 1843 if (err) 1844 goto out; 1845 1846 if (WARN_ON(cq->gdma_id >= gc->max_num_cqs)) { 1847 err = -EINVAL; 1848 goto out; 1849 } 1850 1851 gc->cq_table[cq->gdma_id] = cq->gdma_cq; 1852 1853 netif_napi_add_weight(ndev, &cq->napi, mana_poll, 1); 1854 1855 WARN_ON(xdp_rxq_info_reg(&rxq->xdp_rxq, ndev, rxq_idx, 1856 cq->napi.napi_id)); 1857 WARN_ON(xdp_rxq_info_reg_mem_model(&rxq->xdp_rxq, 1858 MEM_TYPE_PAGE_SHARED, NULL)); 1859 1860 napi_enable(&cq->napi); 1861 1862 mana_gd_ring_cq(cq->gdma_cq, SET_ARM_BIT); 1863 out: 1864 if (!err) 1865 return rxq; 1866 1867 netdev_err(ndev, "Failed to create RXQ: err = %d\n", err); 1868 1869 mana_destroy_rxq(apc, rxq, false); 1870 1871 if (cq) 1872 mana_deinit_cq(apc, cq); 1873 1874 return NULL; 1875 } 1876 1877 static int mana_add_rx_queues(struct mana_port_context *apc, 1878 struct net_device *ndev) 1879 { 1880 struct mana_context *ac = apc->ac; 1881 struct mana_rxq *rxq; 1882 int err = 0; 1883 int i; 1884 1885 for (i = 0; i < apc->num_queues; i++) { 1886 rxq = mana_create_rxq(apc, i, &ac->eqs[i], ndev); 1887 if (!rxq) { 1888 err = -ENOMEM; 1889 goto out; 1890 } 1891 1892 u64_stats_init(&rxq->stats.syncp); 1893 1894 apc->rxqs[i] = rxq; 1895 } 1896 1897 apc->default_rxobj = apc->rxqs[0]->rxobj; 1898 out: 1899 return err; 1900 } 1901 1902 static void mana_destroy_vport(struct mana_port_context *apc) 1903 { 1904 struct gdma_dev *gd = apc->ac->gdma_dev; 1905 struct mana_rxq *rxq; 1906 u32 rxq_idx; 1907 1908 for (rxq_idx = 0; rxq_idx < apc->num_queues; rxq_idx++) { 1909 rxq = apc->rxqs[rxq_idx]; 1910 if (!rxq) 1911 continue; 1912 1913 mana_destroy_rxq(apc, rxq, true); 1914 apc->rxqs[rxq_idx] = NULL; 1915 } 1916 1917 mana_destroy_txq(apc); 1918 mana_uncfg_vport(apc); 1919 1920 if (gd->gdma_context->is_pf) 1921 mana_pf_deregister_hw_vport(apc); 1922 } 1923 1924 static int mana_create_vport(struct mana_port_context *apc, 1925 struct net_device *net) 1926 { 1927 struct gdma_dev *gd = apc->ac->gdma_dev; 1928 int err; 1929 1930 apc->default_rxobj = INVALID_MANA_HANDLE; 1931 1932 if (gd->gdma_context->is_pf) { 1933 err = mana_pf_register_hw_vport(apc); 1934 if (err) 1935 return err; 1936 } 1937 1938 err = mana_cfg_vport(apc, gd->pdid, gd->doorbell); 1939 if (err) 1940 return err; 1941 1942 return mana_create_txq(apc, net); 1943 } 1944 1945 static void mana_rss_table_init(struct mana_port_context *apc) 1946 { 1947 int i; 1948 1949 for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++) 1950 apc->indir_table[i] = 1951 ethtool_rxfh_indir_default(i, apc->num_queues); 1952 } 1953 1954 int mana_config_rss(struct mana_port_context *apc, enum TRI_STATE rx, 1955 bool update_hash, bool update_tab) 1956 { 1957 u32 queue_idx; 1958 int err; 1959 int i; 1960 1961 if (update_tab) { 1962 for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++) { 1963 queue_idx = apc->indir_table[i]; 1964 apc->rxobj_table[i] = apc->rxqs[queue_idx]->rxobj; 1965 } 1966 } 1967 1968 err = mana_cfg_vport_steering(apc, rx, true, update_hash, update_tab); 1969 if (err) 1970 return err; 1971 1972 mana_fence_rqs(apc); 1973 1974 return 0; 1975 } 1976 1977 static int mana_init_port(struct net_device *ndev) 1978 { 1979 struct mana_port_context *apc = netdev_priv(ndev); 1980 u32 max_txq, max_rxq, max_queues; 1981 int port_idx = apc->port_idx; 1982 u32 num_indirect_entries; 1983 int err; 1984 1985 err = mana_init_port_context(apc); 1986 if (err) 1987 return err; 1988 1989 err = mana_query_vport_cfg(apc, port_idx, &max_txq, &max_rxq, 1990 &num_indirect_entries); 1991 if (err) { 1992 netdev_err(ndev, "Failed to query info for vPort %d\n", 1993 port_idx); 1994 goto reset_apc; 1995 } 1996 1997 max_queues = min_t(u32, max_txq, max_rxq); 1998 if (apc->max_queues > max_queues) 1999 apc->max_queues = max_queues; 2000 2001 if (apc->num_queues > apc->max_queues) 2002 apc->num_queues = apc->max_queues; 2003 2004 eth_hw_addr_set(ndev, apc->mac_addr); 2005 2006 return 0; 2007 2008 reset_apc: 2009 kfree(apc->rxqs); 2010 apc->rxqs = NULL; 2011 return err; 2012 } 2013 2014 int mana_alloc_queues(struct net_device *ndev) 2015 { 2016 struct mana_port_context *apc = netdev_priv(ndev); 2017 struct gdma_dev *gd = apc->ac->gdma_dev; 2018 int err; 2019 2020 err = mana_create_vport(apc, ndev); 2021 if (err) 2022 return err; 2023 2024 err = netif_set_real_num_tx_queues(ndev, apc->num_queues); 2025 if (err) 2026 goto destroy_vport; 2027 2028 err = mana_add_rx_queues(apc, ndev); 2029 if (err) 2030 goto destroy_vport; 2031 2032 apc->rss_state = apc->num_queues > 1 ? TRI_STATE_TRUE : TRI_STATE_FALSE; 2033 2034 err = netif_set_real_num_rx_queues(ndev, apc->num_queues); 2035 if (err) 2036 goto destroy_vport; 2037 2038 mana_rss_table_init(apc); 2039 2040 err = mana_config_rss(apc, TRI_STATE_TRUE, true, true); 2041 if (err) 2042 goto destroy_vport; 2043 2044 if (gd->gdma_context->is_pf) { 2045 err = mana_pf_register_filter(apc); 2046 if (err) 2047 goto destroy_vport; 2048 } 2049 2050 mana_chn_setxdp(apc, mana_xdp_get(apc)); 2051 2052 return 0; 2053 2054 destroy_vport: 2055 mana_destroy_vport(apc); 2056 return err; 2057 } 2058 2059 int mana_attach(struct net_device *ndev) 2060 { 2061 struct mana_port_context *apc = netdev_priv(ndev); 2062 int err; 2063 2064 ASSERT_RTNL(); 2065 2066 err = mana_init_port(ndev); 2067 if (err) 2068 return err; 2069 2070 if (apc->port_st_save) { 2071 err = mana_alloc_queues(ndev); 2072 if (err) { 2073 mana_cleanup_port_context(apc); 2074 return err; 2075 } 2076 } 2077 2078 apc->port_is_up = apc->port_st_save; 2079 2080 /* Ensure port state updated before txq state */ 2081 smp_wmb(); 2082 2083 if (apc->port_is_up) 2084 netif_carrier_on(ndev); 2085 2086 netif_device_attach(ndev); 2087 2088 return 0; 2089 } 2090 2091 static int mana_dealloc_queues(struct net_device *ndev) 2092 { 2093 struct mana_port_context *apc = netdev_priv(ndev); 2094 struct gdma_dev *gd = apc->ac->gdma_dev; 2095 struct mana_txq *txq; 2096 int i, err; 2097 2098 if (apc->port_is_up) 2099 return -EINVAL; 2100 2101 mana_chn_setxdp(apc, NULL); 2102 2103 if (gd->gdma_context->is_pf) 2104 mana_pf_deregister_filter(apc); 2105 2106 /* No packet can be transmitted now since apc->port_is_up is false. 2107 * There is still a tiny chance that mana_poll_tx_cq() can re-enable 2108 * a txq because it may not timely see apc->port_is_up being cleared 2109 * to false, but it doesn't matter since mana_start_xmit() drops any 2110 * new packets due to apc->port_is_up being false. 2111 * 2112 * Drain all the in-flight TX packets 2113 */ 2114 for (i = 0; i < apc->num_queues; i++) { 2115 txq = &apc->tx_qp[i].txq; 2116 2117 while (atomic_read(&txq->pending_sends) > 0) 2118 usleep_range(1000, 2000); 2119 } 2120 2121 /* We're 100% sure the queues can no longer be woken up, because 2122 * we're sure now mana_poll_tx_cq() can't be running. 2123 */ 2124 2125 apc->rss_state = TRI_STATE_FALSE; 2126 err = mana_config_rss(apc, TRI_STATE_FALSE, false, false); 2127 if (err) { 2128 netdev_err(ndev, "Failed to disable vPort: %d\n", err); 2129 return err; 2130 } 2131 2132 mana_destroy_vport(apc); 2133 2134 return 0; 2135 } 2136 2137 int mana_detach(struct net_device *ndev, bool from_close) 2138 { 2139 struct mana_port_context *apc = netdev_priv(ndev); 2140 int err; 2141 2142 ASSERT_RTNL(); 2143 2144 apc->port_st_save = apc->port_is_up; 2145 apc->port_is_up = false; 2146 2147 /* Ensure port state updated before txq state */ 2148 smp_wmb(); 2149 2150 netif_tx_disable(ndev); 2151 netif_carrier_off(ndev); 2152 2153 if (apc->port_st_save) { 2154 err = mana_dealloc_queues(ndev); 2155 if (err) 2156 return err; 2157 } 2158 2159 if (!from_close) { 2160 netif_device_detach(ndev); 2161 mana_cleanup_port_context(apc); 2162 } 2163 2164 return 0; 2165 } 2166 2167 static int mana_probe_port(struct mana_context *ac, int port_idx, 2168 struct net_device **ndev_storage) 2169 { 2170 struct gdma_context *gc = ac->gdma_dev->gdma_context; 2171 struct mana_port_context *apc; 2172 struct net_device *ndev; 2173 int err; 2174 2175 ndev = alloc_etherdev_mq(sizeof(struct mana_port_context), 2176 gc->max_num_queues); 2177 if (!ndev) 2178 return -ENOMEM; 2179 2180 *ndev_storage = ndev; 2181 2182 apc = netdev_priv(ndev); 2183 apc->ac = ac; 2184 apc->ndev = ndev; 2185 apc->max_queues = gc->max_num_queues; 2186 apc->num_queues = gc->max_num_queues; 2187 apc->port_handle = INVALID_MANA_HANDLE; 2188 apc->pf_filter_handle = INVALID_MANA_HANDLE; 2189 apc->port_idx = port_idx; 2190 2191 mutex_init(&apc->vport_mutex); 2192 apc->vport_use_count = 0; 2193 2194 ndev->netdev_ops = &mana_devops; 2195 ndev->ethtool_ops = &mana_ethtool_ops; 2196 ndev->mtu = ETH_DATA_LEN; 2197 ndev->max_mtu = ndev->mtu; 2198 ndev->min_mtu = ndev->mtu; 2199 ndev->needed_headroom = MANA_HEADROOM; 2200 ndev->dev_port = port_idx; 2201 SET_NETDEV_DEV(ndev, gc->dev); 2202 2203 netif_carrier_off(ndev); 2204 2205 netdev_rss_key_fill(apc->hashkey, MANA_HASH_KEY_SIZE); 2206 2207 err = mana_init_port(ndev); 2208 if (err) 2209 goto free_net; 2210 2211 netdev_lockdep_set_classes(ndev); 2212 2213 ndev->hw_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM; 2214 ndev->hw_features |= NETIF_F_RXCSUM; 2215 ndev->hw_features |= NETIF_F_TSO | NETIF_F_TSO6; 2216 ndev->hw_features |= NETIF_F_RXHASH; 2217 ndev->features = ndev->hw_features; 2218 ndev->vlan_features = 0; 2219 ndev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | 2220 NETDEV_XDP_ACT_NDO_XMIT; 2221 2222 err = register_netdev(ndev); 2223 if (err) { 2224 netdev_err(ndev, "Unable to register netdev.\n"); 2225 goto reset_apc; 2226 } 2227 2228 return 0; 2229 2230 reset_apc: 2231 kfree(apc->rxqs); 2232 apc->rxqs = NULL; 2233 free_net: 2234 *ndev_storage = NULL; 2235 netdev_err(ndev, "Failed to probe vPort %d: %d\n", port_idx, err); 2236 free_netdev(ndev); 2237 return err; 2238 } 2239 2240 static void adev_release(struct device *dev) 2241 { 2242 struct mana_adev *madev = container_of(dev, struct mana_adev, adev.dev); 2243 2244 kfree(madev); 2245 } 2246 2247 static void remove_adev(struct gdma_dev *gd) 2248 { 2249 struct auxiliary_device *adev = gd->adev; 2250 int id = adev->id; 2251 2252 auxiliary_device_delete(adev); 2253 auxiliary_device_uninit(adev); 2254 2255 mana_adev_idx_free(id); 2256 gd->adev = NULL; 2257 } 2258 2259 static int add_adev(struct gdma_dev *gd) 2260 { 2261 struct auxiliary_device *adev; 2262 struct mana_adev *madev; 2263 int ret; 2264 2265 madev = kzalloc(sizeof(*madev), GFP_KERNEL); 2266 if (!madev) 2267 return -ENOMEM; 2268 2269 adev = &madev->adev; 2270 ret = mana_adev_idx_alloc(); 2271 if (ret < 0) 2272 goto idx_fail; 2273 adev->id = ret; 2274 2275 adev->name = "rdma"; 2276 adev->dev.parent = gd->gdma_context->dev; 2277 adev->dev.release = adev_release; 2278 madev->mdev = gd; 2279 2280 ret = auxiliary_device_init(adev); 2281 if (ret) 2282 goto init_fail; 2283 2284 ret = auxiliary_device_add(adev); 2285 if (ret) 2286 goto add_fail; 2287 2288 gd->adev = adev; 2289 return 0; 2290 2291 add_fail: 2292 auxiliary_device_uninit(adev); 2293 2294 init_fail: 2295 mana_adev_idx_free(adev->id); 2296 2297 idx_fail: 2298 kfree(madev); 2299 2300 return ret; 2301 } 2302 2303 int mana_probe(struct gdma_dev *gd, bool resuming) 2304 { 2305 struct gdma_context *gc = gd->gdma_context; 2306 struct mana_context *ac = gd->driver_data; 2307 struct device *dev = gc->dev; 2308 u16 num_ports = 0; 2309 int err; 2310 int i; 2311 2312 dev_info(dev, 2313 "Microsoft Azure Network Adapter protocol version: %d.%d.%d\n", 2314 MANA_MAJOR_VERSION, MANA_MINOR_VERSION, MANA_MICRO_VERSION); 2315 2316 err = mana_gd_register_device(gd); 2317 if (err) 2318 return err; 2319 2320 if (!resuming) { 2321 ac = kzalloc(sizeof(*ac), GFP_KERNEL); 2322 if (!ac) 2323 return -ENOMEM; 2324 2325 ac->gdma_dev = gd; 2326 gd->driver_data = ac; 2327 } 2328 2329 err = mana_create_eq(ac); 2330 if (err) 2331 goto out; 2332 2333 err = mana_query_device_cfg(ac, MANA_MAJOR_VERSION, MANA_MINOR_VERSION, 2334 MANA_MICRO_VERSION, &num_ports); 2335 if (err) 2336 goto out; 2337 2338 if (!resuming) { 2339 ac->num_ports = num_ports; 2340 } else { 2341 if (ac->num_ports != num_ports) { 2342 dev_err(dev, "The number of vPorts changed: %d->%d\n", 2343 ac->num_ports, num_ports); 2344 err = -EPROTO; 2345 goto out; 2346 } 2347 } 2348 2349 if (ac->num_ports == 0) 2350 dev_err(dev, "Failed to detect any vPort\n"); 2351 2352 if (ac->num_ports > MAX_PORTS_IN_MANA_DEV) 2353 ac->num_ports = MAX_PORTS_IN_MANA_DEV; 2354 2355 if (!resuming) { 2356 for (i = 0; i < ac->num_ports; i++) { 2357 err = mana_probe_port(ac, i, &ac->ports[i]); 2358 if (err) 2359 break; 2360 } 2361 } else { 2362 for (i = 0; i < ac->num_ports; i++) { 2363 rtnl_lock(); 2364 err = mana_attach(ac->ports[i]); 2365 rtnl_unlock(); 2366 if (err) 2367 break; 2368 } 2369 } 2370 2371 err = add_adev(gd); 2372 out: 2373 if (err) 2374 mana_remove(gd, false); 2375 2376 return err; 2377 } 2378 2379 void mana_remove(struct gdma_dev *gd, bool suspending) 2380 { 2381 struct gdma_context *gc = gd->gdma_context; 2382 struct mana_context *ac = gd->driver_data; 2383 struct device *dev = gc->dev; 2384 struct net_device *ndev; 2385 int err; 2386 int i; 2387 2388 /* adev currently doesn't support suspending, always remove it */ 2389 if (gd->adev) 2390 remove_adev(gd); 2391 2392 for (i = 0; i < ac->num_ports; i++) { 2393 ndev = ac->ports[i]; 2394 if (!ndev) { 2395 if (i == 0) 2396 dev_err(dev, "No net device to remove\n"); 2397 goto out; 2398 } 2399 2400 /* All cleanup actions should stay after rtnl_lock(), otherwise 2401 * other functions may access partially cleaned up data. 2402 */ 2403 rtnl_lock(); 2404 2405 err = mana_detach(ndev, false); 2406 if (err) 2407 netdev_err(ndev, "Failed to detach vPort %d: %d\n", 2408 i, err); 2409 2410 if (suspending) { 2411 /* No need to unregister the ndev. */ 2412 rtnl_unlock(); 2413 continue; 2414 } 2415 2416 unregister_netdevice(ndev); 2417 2418 rtnl_unlock(); 2419 2420 free_netdev(ndev); 2421 } 2422 2423 mana_destroy_eq(ac); 2424 out: 2425 mana_gd_deregister_device(gd); 2426 2427 if (suspending) 2428 return; 2429 2430 gd->driver_data = NULL; 2431 gd->gdma_context = NULL; 2432 kfree(ac); 2433 } 2434