1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2021 Microsoft Corp. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 */ 30 31 #include <sys/param.h> 32 #include <sys/systm.h> 33 #include <sys/bus.h> 34 #include <sys/kernel.h> 35 #include <sys/kthread.h> 36 #include <sys/malloc.h> 37 #include <sys/mbuf.h> 38 #include <sys/smp.h> 39 #include <sys/socket.h> 40 #include <sys/sockio.h> 41 #include <sys/time.h> 42 #include <sys/eventhandler.h> 43 44 #include <machine/bus.h> 45 #include <machine/resource.h> 46 #include <machine/in_cksum.h> 47 48 #include <net/if.h> 49 #include <net/if_var.h> 50 #include <net/if_types.h> 51 #include <net/if_vlan_var.h> 52 #ifdef RSS 53 #include <net/rss_config.h> 54 #endif 55 56 #include <netinet/in_systm.h> 57 #include <netinet/in.h> 58 #include <netinet/if_ether.h> 59 #include <netinet/ip.h> 60 #include <netinet/ip6.h> 61 #include <netinet/tcp.h> 62 #include <netinet/udp.h> 63 64 #include "mana.h" 65 #include "mana_sysctl.h" 66 67 static int mana_up(struct mana_port_context *apc); 68 static int mana_down(struct mana_port_context *apc); 69 70 extern unsigned int mana_tx_req_size; 71 extern unsigned int mana_rx_req_size; 72 extern unsigned int mana_rx_refill_threshold; 73 74 static void 75 mana_rss_key_fill(void *k, size_t size) 76 { 77 static bool rss_key_generated = false; 78 static uint8_t rss_key[MANA_HASH_KEY_SIZE]; 79 80 KASSERT(size <= MANA_HASH_KEY_SIZE, 81 ("Request more buytes than MANA RSS key can hold")); 82 83 if (!rss_key_generated) { 84 arc4random_buf(rss_key, MANA_HASH_KEY_SIZE); 85 rss_key_generated = true; 86 } 87 memcpy(k, rss_key, size); 88 } 89 90 static int 91 mana_ifmedia_change(if_t ifp __unused) 92 { 93 return EOPNOTSUPP; 94 } 95 96 static void 97 mana_ifmedia_status(if_t ifp, struct ifmediareq *ifmr) 98 { 99 struct mana_port_context *apc = if_getsoftc(ifp); 100 101 if (!apc) { 102 if_printf(ifp, "Port not available\n"); 103 return; 104 } 105 106 MANA_APC_LOCK_LOCK(apc); 107 108 ifmr->ifm_status = IFM_AVALID; 109 ifmr->ifm_active = IFM_ETHER; 110 111 if (!apc->port_is_up) { 112 MANA_APC_LOCK_UNLOCK(apc); 113 mana_dbg(NULL, "Port %u link is down\n", apc->port_idx); 114 return; 115 } 116 117 ifmr->ifm_status |= IFM_ACTIVE; 118 ifmr->ifm_active |= IFM_100G_DR | IFM_FDX; 119 120 MANA_APC_LOCK_UNLOCK(apc); 121 } 122 123 static uint64_t 124 mana_get_counter(if_t ifp, ift_counter cnt) 125 { 126 struct mana_port_context *apc = if_getsoftc(ifp); 127 struct mana_port_stats *stats = &apc->port_stats; 128 129 switch (cnt) { 130 case IFCOUNTER_IPACKETS: 131 return (counter_u64_fetch(stats->rx_packets)); 132 case IFCOUNTER_OPACKETS: 133 return (counter_u64_fetch(stats->tx_packets)); 134 case IFCOUNTER_IBYTES: 135 return (counter_u64_fetch(stats->rx_bytes)); 136 case IFCOUNTER_OBYTES: 137 return (counter_u64_fetch(stats->tx_bytes)); 138 case IFCOUNTER_IQDROPS: 139 return (counter_u64_fetch(stats->rx_drops)); 140 case IFCOUNTER_OQDROPS: 141 return (counter_u64_fetch(stats->tx_drops)); 142 default: 143 return (if_get_counter_default(ifp, cnt)); 144 } 145 } 146 147 static void 148 mana_qflush(if_t ifp) 149 { 150 if_qflush(ifp); 151 } 152 153 int 154 mana_restart(struct mana_port_context *apc) 155 { 156 int rc = 0; 157 158 MANA_APC_LOCK_LOCK(apc); 159 if (apc->port_is_up) 160 mana_down(apc); 161 162 rc = mana_up(apc); 163 MANA_APC_LOCK_UNLOCK(apc); 164 165 return (rc); 166 } 167 168 static int 169 mana_ioctl(if_t ifp, u_long command, caddr_t data) 170 { 171 struct mana_port_context *apc = if_getsoftc(ifp); 172 struct ifrsskey *ifrk; 173 struct ifrsshash *ifrh; 174 struct ifreq *ifr; 175 uint16_t new_mtu; 176 int rc = 0, mask; 177 178 switch (command) { 179 case SIOCSIFMTU: 180 ifr = (struct ifreq *)data; 181 new_mtu = ifr->ifr_mtu; 182 if (if_getmtu(ifp) == new_mtu) 183 break; 184 if ((new_mtu > apc->max_mtu) || 185 (new_mtu < apc->min_mtu)) { 186 if_printf(ifp, "Invalid MTU. new_mtu: %d, " 187 "max allowed: %d, min allowed: %d\n", 188 new_mtu, apc->max_mtu, apc->min_mtu); 189 return EINVAL; 190 } 191 MANA_APC_LOCK_LOCK(apc); 192 if (apc->port_is_up) 193 mana_down(apc); 194 195 apc->frame_size = new_mtu + ETHER_HDR_LEN; 196 apc->mtu = new_mtu; 197 if_setmtu(ifp, new_mtu); 198 mana_dbg(NULL, "Set MTU to %d\n", new_mtu); 199 200 rc = mana_up(apc); 201 MANA_APC_LOCK_UNLOCK(apc); 202 break; 203 204 case SIOCSIFFLAGS: 205 if (if_getflags(ifp) & IFF_UP) { 206 if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) { 207 MANA_APC_LOCK_LOCK(apc); 208 if (!apc->port_is_up) 209 rc = mana_up(apc); 210 MANA_APC_LOCK_UNLOCK(apc); 211 } 212 } else { 213 if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { 214 MANA_APC_LOCK_LOCK(apc); 215 if (apc->port_is_up) 216 mana_down(apc); 217 MANA_APC_LOCK_UNLOCK(apc); 218 } 219 } 220 break; 221 222 case SIOCSIFCAP: 223 MANA_APC_LOCK_LOCK(apc); 224 ifr = (struct ifreq *)data; 225 /* 226 * Fix up requested capabilities w/ supported capabilities, 227 * since the supported capabilities could have been changed. 228 */ 229 mask = (ifr->ifr_reqcap & if_getcapabilities(ifp)) ^ 230 if_getcapenable(ifp); 231 232 if (mask & IFCAP_TXCSUM) { 233 if_togglecapenable(ifp, IFCAP_TXCSUM); 234 if_togglehwassist(ifp, (CSUM_TCP | CSUM_UDP | CSUM_IP)); 235 236 if ((IFCAP_TSO4 & if_getcapenable(ifp)) && 237 !(IFCAP_TXCSUM & if_getcapenable(ifp))) { 238 mask &= ~IFCAP_TSO4; 239 if_setcapenablebit(ifp, 0, IFCAP_TSO4); 240 if_sethwassistbits(ifp, 0, CSUM_IP_TSO); 241 mana_warn(NULL, 242 "Also disabled tso4 due to -txcsum.\n"); 243 } 244 } 245 246 if (mask & IFCAP_TXCSUM_IPV6) { 247 if_togglecapenable(ifp, IFCAP_TXCSUM_IPV6); 248 if_togglehwassist(ifp, (CSUM_UDP_IPV6 | CSUM_TCP_IPV6)); 249 250 if ((IFCAP_TSO6 & if_getcapenable(ifp)) && 251 !(IFCAP_TXCSUM_IPV6 & if_getcapenable(ifp))) { 252 mask &= ~IFCAP_TSO6; 253 if_setcapenablebit(ifp, 0, IFCAP_TSO6); 254 if_sethwassistbits(ifp, 0, CSUM_IP6_TSO); 255 mana_warn(ifp, 256 "Also disabled tso6 due to -txcsum6.\n"); 257 } 258 } 259 260 if (mask & IFCAP_RXCSUM) 261 if_togglecapenable(ifp, IFCAP_RXCSUM); 262 /* We can't diff IPv6 packets from IPv4 packets on RX path. */ 263 if (mask & IFCAP_RXCSUM_IPV6) 264 if_togglecapenable(ifp, IFCAP_RXCSUM_IPV6); 265 266 if (mask & IFCAP_LRO) 267 if_togglecapenable(ifp, IFCAP_LRO); 268 269 if (mask & IFCAP_TSO4) { 270 if (!(IFCAP_TSO4 & if_getcapenable(ifp)) && 271 !(IFCAP_TXCSUM & if_getcapenable(ifp))) { 272 MANA_APC_LOCK_UNLOCK(apc); 273 if_printf(ifp, "Enable txcsum first.\n"); 274 rc = EAGAIN; 275 goto out; 276 } 277 if_togglecapenable(ifp, IFCAP_TSO4); 278 if_togglehwassist(ifp, CSUM_IP_TSO); 279 } 280 281 if (mask & IFCAP_TSO6) { 282 if (!(IFCAP_TSO6 & if_getcapenable(ifp)) && 283 !(IFCAP_TXCSUM_IPV6 & if_getcapenable(ifp))) { 284 MANA_APC_LOCK_UNLOCK(apc); 285 if_printf(ifp, "Enable txcsum6 first.\n"); 286 rc = EAGAIN; 287 goto out; 288 } 289 if_togglecapenable(ifp, IFCAP_TSO6); 290 if_togglehwassist(ifp, CSUM_IP6_TSO); 291 } 292 293 MANA_APC_LOCK_UNLOCK(apc); 294 out: 295 break; 296 297 case SIOCSIFMEDIA: 298 case SIOCGIFMEDIA: 299 case SIOCGIFXMEDIA: 300 ifr = (struct ifreq *)data; 301 rc = ifmedia_ioctl(ifp, ifr, &apc->media, command); 302 break; 303 304 case SIOCGIFRSSKEY: 305 ifrk = (struct ifrsskey *)data; 306 ifrk->ifrk_func = RSS_FUNC_TOEPLITZ; 307 ifrk->ifrk_keylen = MANA_HASH_KEY_SIZE; 308 memcpy(ifrk->ifrk_key, apc->hashkey, MANA_HASH_KEY_SIZE); 309 break; 310 311 case SIOCGIFRSSHASH: 312 ifrh = (struct ifrsshash *)data; 313 ifrh->ifrh_func = RSS_FUNC_TOEPLITZ; 314 ifrh->ifrh_types = 315 RSS_TYPE_TCP_IPV4 | 316 RSS_TYPE_UDP_IPV4 | 317 RSS_TYPE_TCP_IPV6 | 318 RSS_TYPE_UDP_IPV6; 319 break; 320 321 default: 322 rc = ether_ioctl(ifp, command, data); 323 break; 324 } 325 326 return (rc); 327 } 328 329 static inline void 330 mana_alloc_counters(counter_u64_t *begin, int size) 331 { 332 counter_u64_t *end = (counter_u64_t *)((char *)begin + size); 333 334 for (; begin < end; ++begin) 335 *begin = counter_u64_alloc(M_WAITOK); 336 } 337 338 static inline void 339 mana_free_counters(counter_u64_t *begin, int size) 340 { 341 counter_u64_t *end = (counter_u64_t *)((char *)begin + size); 342 343 for (; begin < end; ++begin) 344 counter_u64_free(*begin); 345 } 346 347 static bool 348 mana_can_tx(struct gdma_queue *wq) 349 { 350 return mana_gd_wq_avail_space(wq) >= MAX_TX_WQE_SIZE; 351 } 352 353 static inline int 354 mana_tx_map_mbuf(struct mana_port_context *apc, 355 struct mana_send_buf_info *tx_info, 356 struct mbuf **m_head, struct mana_tx_package *tp, 357 struct mana_stats *tx_stats) 358 { 359 struct gdma_dev *gd = apc->ac->gdma_dev; 360 bus_dma_segment_t segs[MAX_MBUF_FRAGS]; 361 struct mbuf *m = *m_head; 362 int err, nsegs, i; 363 364 err = bus_dmamap_load_mbuf_sg(apc->tx_buf_tag, tx_info->dma_map, 365 m, segs, &nsegs, BUS_DMA_NOWAIT); 366 if (err == EFBIG) { 367 struct mbuf *m_new; 368 369 counter_u64_add(tx_stats->collapse, 1); 370 m_new = m_collapse(m, M_NOWAIT, MAX_MBUF_FRAGS); 371 if (unlikely(m_new == NULL)) { 372 counter_u64_add(tx_stats->collapse_err, 1); 373 return ENOBUFS; 374 } else { 375 *m_head = m = m_new; 376 } 377 378 mana_warn(NULL, 379 "Too many segs in orig mbuf, m_collapse called\n"); 380 381 err = bus_dmamap_load_mbuf_sg(apc->tx_buf_tag, 382 tx_info->dma_map, m, segs, &nsegs, BUS_DMA_NOWAIT); 383 } 384 if (!err) { 385 for (i = 0; i < nsegs; i++) { 386 tp->wqe_req.sgl[i].address = segs[i].ds_addr; 387 tp->wqe_req.sgl[i].mem_key = gd->gpa_mkey; 388 tp->wqe_req.sgl[i].size = segs[i].ds_len; 389 } 390 tp->wqe_req.num_sge = nsegs; 391 392 tx_info->mbuf = *m_head; 393 394 bus_dmamap_sync(apc->tx_buf_tag, tx_info->dma_map, 395 BUS_DMASYNC_PREWRITE); 396 } 397 398 return err; 399 } 400 401 static inline void 402 mana_tx_unmap_mbuf(struct mana_port_context *apc, 403 struct mana_send_buf_info *tx_info) 404 { 405 bus_dmamap_sync(apc->tx_buf_tag, tx_info->dma_map, 406 BUS_DMASYNC_POSTWRITE); 407 bus_dmamap_unload(apc->tx_buf_tag, tx_info->dma_map); 408 if (tx_info->mbuf) { 409 m_freem(tx_info->mbuf); 410 tx_info->mbuf = NULL; 411 } 412 } 413 414 static inline int 415 mana_load_rx_mbuf(struct mana_port_context *apc, struct mana_rxq *rxq, 416 struct mana_recv_buf_oob *rx_oob, bool alloc_mbuf) 417 { 418 bus_dma_segment_t segs[1]; 419 struct mbuf *mbuf; 420 int nsegs, err; 421 uint32_t mlen; 422 423 if (alloc_mbuf) { 424 mbuf = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, rxq->datasize); 425 if (unlikely(mbuf == NULL)) 426 return ENOMEM; 427 428 mbuf->m_pkthdr.len = mbuf->m_len = rxq->datasize; 429 mlen = rxq->datasize; 430 } else { 431 if (rx_oob->mbuf) { 432 mbuf = rx_oob->mbuf; 433 mlen = rx_oob->mbuf->m_pkthdr.len; 434 } else { 435 return ENOMEM; 436 } 437 } 438 439 err = bus_dmamap_load_mbuf_sg(apc->rx_buf_tag, rx_oob->dma_map, 440 mbuf, segs, &nsegs, BUS_DMA_NOWAIT); 441 442 if (unlikely((err != 0) || (nsegs != 1))) { 443 mana_warn(NULL, "Failed to map mbuf, error: %d, " 444 "nsegs: %d\n", err, nsegs); 445 counter_u64_add(rxq->stats.dma_mapping_err, 1); 446 goto error; 447 } 448 449 bus_dmamap_sync(apc->rx_buf_tag, rx_oob->dma_map, 450 BUS_DMASYNC_PREREAD); 451 452 rx_oob->mbuf = mbuf; 453 rx_oob->num_sge = 1; 454 rx_oob->sgl[0].address = segs[0].ds_addr; 455 rx_oob->sgl[0].size = mlen; 456 rx_oob->sgl[0].mem_key = apc->ac->gdma_dev->gpa_mkey; 457 458 return 0; 459 460 error: 461 m_freem(mbuf); 462 return EFAULT; 463 } 464 465 static inline void 466 mana_unload_rx_mbuf(struct mana_port_context *apc, struct mana_rxq *rxq, 467 struct mana_recv_buf_oob *rx_oob, bool free_mbuf) 468 { 469 bus_dmamap_sync(apc->rx_buf_tag, rx_oob->dma_map, 470 BUS_DMASYNC_POSTREAD); 471 bus_dmamap_unload(apc->rx_buf_tag, rx_oob->dma_map); 472 473 if (free_mbuf && rx_oob->mbuf) { 474 m_freem(rx_oob->mbuf); 475 rx_oob->mbuf = NULL; 476 } 477 } 478 479 480 /* Use couple mbuf PH_loc spaces for l3 and l4 protocal type */ 481 #define MANA_L3_PROTO(_mbuf) ((_mbuf)->m_pkthdr.PH_loc.sixteen[0]) 482 #define MANA_L4_PROTO(_mbuf) ((_mbuf)->m_pkthdr.PH_loc.sixteen[1]) 483 484 #define MANA_TXQ_FULL (IFF_DRV_RUNNING | IFF_DRV_OACTIVE) 485 486 static void 487 mana_xmit(struct mana_txq *txq) 488 { 489 enum mana_tx_pkt_format pkt_fmt = MANA_SHORT_PKT_FMT; 490 struct mana_send_buf_info *tx_info; 491 if_t ndev = txq->ndev; 492 struct mbuf *mbuf; 493 struct mana_port_context *apc = if_getsoftc(ndev); 494 unsigned int tx_queue_size = apc->tx_queue_size; 495 struct mana_port_stats *port_stats = &apc->port_stats; 496 struct gdma_dev *gd = apc->ac->gdma_dev; 497 uint64_t packets, bytes; 498 uint16_t next_to_use; 499 struct mana_tx_package pkg = {}; 500 struct mana_stats *tx_stats; 501 struct gdma_queue *gdma_sq; 502 struct mana_cq *cq; 503 int err, len; 504 bool is_tso; 505 506 gdma_sq = txq->gdma_sq; 507 cq = &apc->tx_qp[txq->idx].tx_cq; 508 tx_stats = &txq->stats; 509 510 packets = 0; 511 bytes = 0; 512 next_to_use = txq->next_to_use; 513 514 while ((mbuf = drbr_peek(ndev, txq->txq_br)) != NULL) { 515 if (!apc->port_is_up || 516 (if_getdrvflags(ndev) & MANA_TXQ_FULL) != IFF_DRV_RUNNING) { 517 drbr_putback(ndev, txq->txq_br, mbuf); 518 break; 519 } 520 521 if (!mana_can_tx(gdma_sq)) { 522 /* SQ is full. Set the IFF_DRV_OACTIVE flag */ 523 if_setdrvflagbits(apc->ndev, IFF_DRV_OACTIVE, 0); 524 counter_u64_add(tx_stats->stop, 1); 525 uint64_t stops = counter_u64_fetch(tx_stats->stop); 526 uint64_t wakeups = counter_u64_fetch(tx_stats->wakeup); 527 #define MANA_TXQ_STOP_THRESHOLD 50 528 if (stops > MANA_TXQ_STOP_THRESHOLD && wakeups > 0 && 529 stops > wakeups && txq->alt_txq_idx == txq->idx) { 530 txq->alt_txq_idx = 531 (txq->idx + (stops / wakeups)) 532 % apc->num_queues; 533 counter_u64_add(tx_stats->alt_chg, 1); 534 } 535 536 drbr_putback(ndev, txq->txq_br, mbuf); 537 538 taskqueue_enqueue(cq->cleanup_tq, &cq->cleanup_task); 539 break; 540 } 541 542 tx_info = &txq->tx_buf_info[next_to_use]; 543 544 memset(&pkg, 0, sizeof(struct mana_tx_package)); 545 pkg.wqe_req.sgl = pkg.sgl_array; 546 547 err = mana_tx_map_mbuf(apc, tx_info, &mbuf, &pkg, tx_stats); 548 if (unlikely(err)) { 549 mana_dbg(NULL, 550 "Failed to map tx mbuf, err %d\n", err); 551 552 counter_u64_add(tx_stats->dma_mapping_err, 1); 553 554 /* The mbuf is still there. Free it */ 555 m_freem(mbuf); 556 /* Advance the drbr queue */ 557 drbr_advance(ndev, txq->txq_br); 558 continue; 559 } 560 561 pkg.tx_oob.s_oob.vcq_num = cq->gdma_id; 562 pkg.tx_oob.s_oob.vsq_frame = txq->vsq_frame; 563 564 if (txq->vp_offset > MANA_SHORT_VPORT_OFFSET_MAX) { 565 pkg.tx_oob.l_oob.long_vp_offset = txq->vp_offset; 566 pkt_fmt = MANA_LONG_PKT_FMT; 567 } else { 568 pkg.tx_oob.s_oob.short_vp_offset = txq->vp_offset; 569 } 570 571 pkg.tx_oob.s_oob.pkt_fmt = pkt_fmt; 572 573 if (pkt_fmt == MANA_SHORT_PKT_FMT) 574 pkg.wqe_req.inline_oob_size = sizeof(struct mana_tx_short_oob); 575 else 576 pkg.wqe_req.inline_oob_size = sizeof(struct mana_tx_oob); 577 578 pkg.wqe_req.inline_oob_data = &pkg.tx_oob; 579 pkg.wqe_req.flags = 0; 580 pkg.wqe_req.client_data_unit = 0; 581 582 is_tso = false; 583 if (mbuf->m_pkthdr.csum_flags & CSUM_TSO) { 584 is_tso = true; 585 586 if (MANA_L3_PROTO(mbuf) == ETHERTYPE_IP) 587 pkg.tx_oob.s_oob.is_outer_ipv4 = 1; 588 else 589 pkg.tx_oob.s_oob.is_outer_ipv6 = 1; 590 591 pkg.tx_oob.s_oob.comp_iphdr_csum = 1; 592 pkg.tx_oob.s_oob.comp_tcp_csum = 1; 593 pkg.tx_oob.s_oob.trans_off = mbuf->m_pkthdr.l3hlen; 594 595 pkg.wqe_req.client_data_unit = mbuf->m_pkthdr.tso_segsz; 596 pkg.wqe_req.flags = GDMA_WR_OOB_IN_SGL | GDMA_WR_PAD_BY_SGE0; 597 } else if (mbuf->m_pkthdr.csum_flags & 598 (CSUM_IP_UDP | CSUM_IP_TCP | CSUM_IP6_UDP | CSUM_IP6_TCP)) { 599 if (MANA_L3_PROTO(mbuf) == ETHERTYPE_IP) { 600 pkg.tx_oob.s_oob.is_outer_ipv4 = 1; 601 pkg.tx_oob.s_oob.comp_iphdr_csum = 1; 602 } else { 603 pkg.tx_oob.s_oob.is_outer_ipv6 = 1; 604 } 605 606 if (MANA_L4_PROTO(mbuf) == IPPROTO_TCP) { 607 pkg.tx_oob.s_oob.comp_tcp_csum = 1; 608 pkg.tx_oob.s_oob.trans_off = 609 mbuf->m_pkthdr.l3hlen; 610 } else { 611 pkg.tx_oob.s_oob.comp_udp_csum = 1; 612 } 613 } else if (mbuf->m_pkthdr.csum_flags & CSUM_IP) { 614 pkg.tx_oob.s_oob.is_outer_ipv4 = 1; 615 pkg.tx_oob.s_oob.comp_iphdr_csum = 1; 616 } else { 617 if (MANA_L3_PROTO(mbuf) == ETHERTYPE_IP) 618 pkg.tx_oob.s_oob.is_outer_ipv4 = 1; 619 else if (MANA_L3_PROTO(mbuf) == ETHERTYPE_IPV6) 620 pkg.tx_oob.s_oob.is_outer_ipv6 = 1; 621 } 622 623 len = mbuf->m_pkthdr.len; 624 625 err = mana_gd_post_work_request(gdma_sq, &pkg.wqe_req, 626 (struct gdma_posted_wqe_info *)&tx_info->wqe_inf); 627 if (unlikely(err)) { 628 /* Should not happen */ 629 if_printf(ndev, "Failed to post TX OOB: %d\n", err); 630 631 mana_tx_unmap_mbuf(apc, tx_info); 632 633 drbr_advance(ndev, txq->txq_br); 634 continue; 635 } 636 637 next_to_use = MANA_IDX_NEXT(next_to_use, tx_queue_size); 638 639 (void)atomic_inc_return(&txq->pending_sends); 640 641 drbr_advance(ndev, txq->txq_br); 642 643 mana_gd_wq_ring_doorbell(gd->gdma_context, gdma_sq); 644 645 packets++; 646 bytes += len; 647 648 if (is_tso) { 649 txq->tso_pkts++; 650 txq->tso_bytes += len; 651 } 652 } 653 654 counter_enter(); 655 counter_u64_add_protected(tx_stats->packets, packets); 656 counter_u64_add_protected(port_stats->tx_packets, packets); 657 counter_u64_add_protected(tx_stats->bytes, bytes); 658 counter_u64_add_protected(port_stats->tx_bytes, bytes); 659 counter_exit(); 660 661 txq->next_to_use = next_to_use; 662 } 663 664 static void 665 mana_xmit_taskfunc(void *arg, int pending) 666 { 667 struct mana_txq *txq = (struct mana_txq *)arg; 668 if_t ndev = txq->ndev; 669 struct mana_port_context *apc = if_getsoftc(ndev); 670 671 while (!drbr_empty(ndev, txq->txq_br) && apc->port_is_up && 672 (if_getdrvflags(ndev) & MANA_TXQ_FULL) == IFF_DRV_RUNNING) { 673 mtx_lock(&txq->txq_mtx); 674 mana_xmit(txq); 675 mtx_unlock(&txq->txq_mtx); 676 } 677 } 678 679 #define PULLUP_HDR(m, len) \ 680 do { \ 681 if (unlikely((m)->m_len < (len))) { \ 682 (m) = m_pullup((m), (len)); \ 683 if ((m) == NULL) \ 684 return (NULL); \ 685 } \ 686 } while (0) 687 688 /* 689 * If this function failed, the mbuf would be freed. 690 */ 691 static inline struct mbuf * 692 mana_tso_fixup(struct mbuf *mbuf) 693 { 694 struct ether_vlan_header *eh = mtod(mbuf, struct ether_vlan_header *); 695 struct tcphdr *th; 696 uint16_t etype; 697 int ehlen; 698 699 if (eh->evl_encap_proto == ntohs(ETHERTYPE_VLAN)) { 700 etype = ntohs(eh->evl_proto); 701 ehlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 702 } else { 703 etype = ntohs(eh->evl_encap_proto); 704 ehlen = ETHER_HDR_LEN; 705 } 706 707 if (etype == ETHERTYPE_IP) { 708 struct ip *ip; 709 int iphlen; 710 711 PULLUP_HDR(mbuf, ehlen + sizeof(*ip)); 712 ip = mtodo(mbuf, ehlen); 713 iphlen = ip->ip_hl << 2; 714 mbuf->m_pkthdr.l3hlen = ehlen + iphlen; 715 716 PULLUP_HDR(mbuf, ehlen + iphlen + sizeof(*th)); 717 th = mtodo(mbuf, ehlen + iphlen); 718 719 ip->ip_len = 0; 720 ip->ip_sum = 0; 721 th->th_sum = in_pseudo(ip->ip_src.s_addr, 722 ip->ip_dst.s_addr, htons(IPPROTO_TCP)); 723 } else if (etype == ETHERTYPE_IPV6) { 724 struct ip6_hdr *ip6; 725 726 PULLUP_HDR(mbuf, ehlen + sizeof(*ip6) + sizeof(*th)); 727 ip6 = mtodo(mbuf, ehlen); 728 if (ip6->ip6_nxt != IPPROTO_TCP) { 729 /* Realy something wrong, just return */ 730 mana_dbg(NULL, "TSO mbuf not TCP, freed.\n"); 731 m_freem(mbuf); 732 return NULL; 733 } 734 mbuf->m_pkthdr.l3hlen = ehlen + sizeof(*ip6); 735 736 th = mtodo(mbuf, ehlen + sizeof(*ip6)); 737 738 ip6->ip6_plen = 0; 739 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0); 740 } else { 741 /* CSUM_TSO is set but not IP protocol. */ 742 mana_warn(NULL, "TSO mbuf not right, freed.\n"); 743 m_freem(mbuf); 744 return NULL; 745 } 746 747 MANA_L3_PROTO(mbuf) = etype; 748 749 return (mbuf); 750 } 751 752 /* 753 * If this function failed, the mbuf would be freed. 754 */ 755 static inline struct mbuf * 756 mana_mbuf_csum_check(struct mbuf *mbuf) 757 { 758 struct ether_vlan_header *eh = mtod(mbuf, struct ether_vlan_header *); 759 struct mbuf *mbuf_next; 760 uint16_t etype; 761 int offset; 762 int ehlen; 763 764 if (eh->evl_encap_proto == ntohs(ETHERTYPE_VLAN)) { 765 etype = ntohs(eh->evl_proto); 766 ehlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 767 } else { 768 etype = ntohs(eh->evl_encap_proto); 769 ehlen = ETHER_HDR_LEN; 770 } 771 772 mbuf_next = m_getptr(mbuf, ehlen, &offset); 773 774 MANA_L4_PROTO(mbuf) = 0; 775 if (etype == ETHERTYPE_IP) { 776 const struct ip *ip; 777 int iphlen; 778 779 ip = (struct ip *)(mtodo(mbuf_next, offset)); 780 iphlen = ip->ip_hl << 2; 781 mbuf->m_pkthdr.l3hlen = ehlen + iphlen; 782 783 MANA_L4_PROTO(mbuf) = ip->ip_p; 784 } else if (etype == ETHERTYPE_IPV6) { 785 const struct ip6_hdr *ip6; 786 787 ip6 = (struct ip6_hdr *)(mtodo(mbuf_next, offset)); 788 mbuf->m_pkthdr.l3hlen = ehlen + sizeof(*ip6); 789 790 MANA_L4_PROTO(mbuf) = ip6->ip6_nxt; 791 } else { 792 MANA_L4_PROTO(mbuf) = 0; 793 } 794 795 MANA_L3_PROTO(mbuf) = etype; 796 797 return (mbuf); 798 } 799 800 static int 801 mana_start_xmit(if_t ifp, struct mbuf *m) 802 { 803 struct mana_port_context *apc = if_getsoftc(ifp); 804 struct mana_txq *txq; 805 int is_drbr_empty; 806 uint16_t txq_id; 807 int err; 808 809 if (unlikely((!apc->port_is_up) || 810 (if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0)) 811 return ENODEV; 812 813 if (m->m_pkthdr.csum_flags & CSUM_TSO) { 814 m = mana_tso_fixup(m); 815 if (unlikely(m == NULL)) { 816 counter_enter(); 817 counter_u64_add_protected(apc->port_stats.tx_drops, 1); 818 counter_exit(); 819 return EIO; 820 } 821 } else { 822 m = mana_mbuf_csum_check(m); 823 if (unlikely(m == NULL)) { 824 counter_enter(); 825 counter_u64_add_protected(apc->port_stats.tx_drops, 1); 826 counter_exit(); 827 return EIO; 828 } 829 } 830 831 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) { 832 uint32_t hash = m->m_pkthdr.flowid; 833 txq_id = apc->indir_table[(hash) & MANA_INDIRECT_TABLE_MASK] % 834 apc->num_queues; 835 } else { 836 txq_id = m->m_pkthdr.flowid % apc->num_queues; 837 } 838 839 if (apc->enable_tx_altq) 840 txq_id = apc->tx_qp[txq_id].txq.alt_txq_idx; 841 842 txq = &apc->tx_qp[txq_id].txq; 843 844 is_drbr_empty = drbr_empty(ifp, txq->txq_br); 845 err = drbr_enqueue(ifp, txq->txq_br, m); 846 if (unlikely(err)) { 847 mana_warn(NULL, "txq %u failed to enqueue: %d\n", 848 txq_id, err); 849 taskqueue_enqueue(txq->enqueue_tq, &txq->enqueue_task); 850 return err; 851 } 852 853 if (is_drbr_empty && mtx_trylock(&txq->txq_mtx)) { 854 mana_xmit(txq); 855 mtx_unlock(&txq->txq_mtx); 856 } else { 857 taskqueue_enqueue(txq->enqueue_tq, &txq->enqueue_task); 858 } 859 860 return 0; 861 } 862 863 static void 864 mana_cleanup_port_context(struct mana_port_context *apc) 865 { 866 bus_dma_tag_destroy(apc->tx_buf_tag); 867 bus_dma_tag_destroy(apc->rx_buf_tag); 868 apc->rx_buf_tag = NULL; 869 870 free(apc->rxqs, M_DEVBUF); 871 apc->rxqs = NULL; 872 873 mana_free_counters((counter_u64_t *)&apc->port_stats, 874 sizeof(struct mana_port_stats)); 875 } 876 877 static int 878 mana_init_port_context(struct mana_port_context *apc) 879 { 880 device_t dev = apc->ac->gdma_dev->gdma_context->dev; 881 uint32_t tso_maxsize; 882 int err; 883 884 tso_maxsize = MANA_TSO_MAX_SZ; 885 886 /* Create DMA tag for tx bufs */ 887 err = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */ 888 1, 0, /* alignment, boundary */ 889 BUS_SPACE_MAXADDR, /* lowaddr */ 890 BUS_SPACE_MAXADDR, /* highaddr */ 891 NULL, NULL, /* filter, filterarg */ 892 tso_maxsize, /* maxsize */ 893 MAX_MBUF_FRAGS, /* nsegments */ 894 tso_maxsize, /* maxsegsize */ 895 0, /* flags */ 896 NULL, NULL, /* lockfunc, lockfuncarg*/ 897 &apc->tx_buf_tag); 898 if (unlikely(err)) { 899 device_printf(dev, "Feiled to create TX DMA tag\n"); 900 return err; 901 } 902 903 /* Create DMA tag for rx bufs */ 904 err = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */ 905 64, 0, /* alignment, boundary */ 906 BUS_SPACE_MAXADDR, /* lowaddr */ 907 BUS_SPACE_MAXADDR, /* highaddr */ 908 NULL, NULL, /* filter, filterarg */ 909 MJUM16BYTES, /* maxsize */ 910 1, /* nsegments */ 911 MJUM16BYTES, /* maxsegsize */ 912 0, /* flags */ 913 NULL, NULL, /* lockfunc, lockfuncarg*/ 914 &apc->rx_buf_tag); 915 if (unlikely(err)) { 916 device_printf(dev, "Feiled to create RX DMA tag\n"); 917 return err; 918 } 919 920 apc->rxqs = mallocarray(apc->num_queues, sizeof(struct mana_rxq *), 921 M_DEVBUF, M_WAITOK | M_ZERO); 922 923 return 0; 924 } 925 926 static int 927 mana_send_request(struct mana_context *ac, void *in_buf, 928 uint32_t in_len, void *out_buf, uint32_t out_len) 929 { 930 struct gdma_context *gc = ac->gdma_dev->gdma_context; 931 struct gdma_resp_hdr *resp = out_buf; 932 struct gdma_req_hdr *req = in_buf; 933 device_t dev = gc->dev; 934 static atomic_t activity_id; 935 int err; 936 937 req->dev_id = gc->mana.dev_id; 938 req->activity_id = atomic_inc_return(&activity_id); 939 940 mana_dbg(NULL, "activity_id = %u\n", activity_id); 941 942 err = mana_gd_send_request(gc, in_len, in_buf, out_len, 943 out_buf); 944 if (err || resp->status) { 945 device_printf(dev, "Failed to send mana message: %d, 0x%x\n", 946 err, resp->status); 947 return err ? err : EPROTO; 948 } 949 950 if (req->dev_id.as_uint32 != resp->dev_id.as_uint32 || 951 req->activity_id != resp->activity_id) { 952 device_printf(dev, 953 "Unexpected mana message response: %x,%x,%x,%x\n", 954 req->dev_id.as_uint32, resp->dev_id.as_uint32, 955 req->activity_id, resp->activity_id); 956 return EPROTO; 957 } 958 959 return 0; 960 } 961 962 static int 963 mana_verify_resp_hdr(const struct gdma_resp_hdr *resp_hdr, 964 const enum mana_command_code expected_code, 965 const uint32_t min_size) 966 { 967 if (resp_hdr->response.msg_type != expected_code) 968 return EPROTO; 969 970 if (resp_hdr->response.msg_version < GDMA_MESSAGE_V1) 971 return EPROTO; 972 973 if (resp_hdr->response.msg_size < min_size) 974 return EPROTO; 975 976 return 0; 977 } 978 979 static int 980 mana_query_device_cfg(struct mana_context *ac, uint32_t proto_major_ver, 981 uint32_t proto_minor_ver, uint32_t proto_micro_ver, 982 uint16_t *max_num_vports) 983 { 984 struct gdma_context *gc = ac->gdma_dev->gdma_context; 985 struct mana_query_device_cfg_resp resp = {}; 986 struct mana_query_device_cfg_req req = {}; 987 device_t dev = gc->dev; 988 int err = 0; 989 990 mana_gd_init_req_hdr(&req.hdr, MANA_QUERY_DEV_CONFIG, 991 sizeof(req), sizeof(resp)); 992 993 req.hdr.resp.msg_version = GDMA_MESSAGE_V2; 994 995 req.proto_major_ver = proto_major_ver; 996 req.proto_minor_ver = proto_minor_ver; 997 req.proto_micro_ver = proto_micro_ver; 998 999 err = mana_send_request(ac, &req, sizeof(req), &resp, sizeof(resp)); 1000 if (err) { 1001 device_printf(dev, "Failed to query config: %d", err); 1002 return err; 1003 } 1004 1005 err = mana_verify_resp_hdr(&resp.hdr, MANA_QUERY_DEV_CONFIG, 1006 sizeof(resp)); 1007 if (err || resp.hdr.status) { 1008 device_printf(dev, "Invalid query result: %d, 0x%x\n", err, 1009 resp.hdr.status); 1010 if (!err) 1011 err = EPROTO; 1012 return err; 1013 } 1014 1015 *max_num_vports = resp.max_num_vports; 1016 1017 if (resp.hdr.response.msg_version >= GDMA_MESSAGE_V2) 1018 gc->adapter_mtu = resp.adapter_mtu; 1019 else 1020 gc->adapter_mtu = ETHERMTU + ETHER_HDR_LEN; 1021 1022 mana_dbg(NULL, "mana max_num_vports from device = %d, " 1023 "adapter_mtu = %u\n", 1024 *max_num_vports, gc->adapter_mtu); 1025 1026 return 0; 1027 } 1028 1029 static int 1030 mana_query_vport_cfg(struct mana_port_context *apc, uint32_t vport_index, 1031 uint32_t *max_sq, uint32_t *max_rq, uint32_t *num_indir_entry) 1032 { 1033 struct mana_query_vport_cfg_resp resp = {}; 1034 struct mana_query_vport_cfg_req req = {}; 1035 int err; 1036 1037 mana_gd_init_req_hdr(&req.hdr, MANA_QUERY_VPORT_CONFIG, 1038 sizeof(req), sizeof(resp)); 1039 1040 req.vport_index = vport_index; 1041 1042 err = mana_send_request(apc->ac, &req, sizeof(req), &resp, 1043 sizeof(resp)); 1044 if (err) 1045 return err; 1046 1047 err = mana_verify_resp_hdr(&resp.hdr, MANA_QUERY_VPORT_CONFIG, 1048 sizeof(resp)); 1049 if (err) 1050 return err; 1051 1052 if (resp.hdr.status) 1053 return EPROTO; 1054 1055 *max_sq = resp.max_num_sq; 1056 *max_rq = resp.max_num_rq; 1057 *num_indir_entry = resp.num_indirection_ent; 1058 1059 apc->port_handle = resp.vport; 1060 memcpy(apc->mac_addr, resp.mac_addr, ETHER_ADDR_LEN); 1061 1062 return 0; 1063 } 1064 1065 void 1066 mana_uncfg_vport(struct mana_port_context *apc) 1067 { 1068 apc->vport_use_count--; 1069 if (apc->vport_use_count < 0) { 1070 mana_err(NULL, 1071 "WARNING: vport_use_count less than 0: %u\n", 1072 apc->vport_use_count); 1073 } 1074 } 1075 1076 int 1077 mana_cfg_vport(struct mana_port_context *apc, uint32_t protection_dom_id, 1078 uint32_t doorbell_pg_id) 1079 { 1080 struct mana_config_vport_resp resp = {}; 1081 struct mana_config_vport_req req = {}; 1082 int err; 1083 1084 /* This function is used to program the Ethernet port in the hardware 1085 * table. It can be called from the Ethernet driver or the RDMA driver. 1086 * 1087 * For Ethernet usage, the hardware supports only one active user on a 1088 * physical port. The driver checks on the port usage before programming 1089 * the hardware when creating the RAW QP (RDMA driver) or exposing the 1090 * device to kernel NET layer (Ethernet driver). 1091 * 1092 * Because the RDMA driver doesn't know in advance which QP type the 1093 * user will create, it exposes the device with all its ports. The user 1094 * may not be able to create RAW QP on a port if this port is already 1095 * in used by the Ethernet driver from the kernel. 1096 * 1097 * This physical port limitation only applies to the RAW QP. For RC QP, 1098 * the hardware doesn't have this limitation. The user can create RC 1099 * QPs on a physical port up to the hardware limits independent of the 1100 * Ethernet usage on the same port. 1101 */ 1102 if (apc->vport_use_count > 0) { 1103 return EBUSY; 1104 } 1105 apc->vport_use_count++; 1106 1107 mana_gd_init_req_hdr(&req.hdr, MANA_CONFIG_VPORT_TX, 1108 sizeof(req), sizeof(resp)); 1109 req.vport = apc->port_handle; 1110 req.pdid = protection_dom_id; 1111 req.doorbell_pageid = doorbell_pg_id; 1112 1113 err = mana_send_request(apc->ac, &req, sizeof(req), &resp, 1114 sizeof(resp)); 1115 if (err) { 1116 if_printf(apc->ndev, "Failed to configure vPort: %d\n", err); 1117 goto out; 1118 } 1119 1120 err = mana_verify_resp_hdr(&resp.hdr, MANA_CONFIG_VPORT_TX, 1121 sizeof(resp)); 1122 if (err || resp.hdr.status) { 1123 if_printf(apc->ndev, "Failed to configure vPort: %d, 0x%x\n", 1124 err, resp.hdr.status); 1125 if (!err) 1126 err = EPROTO; 1127 1128 goto out; 1129 } 1130 1131 apc->tx_shortform_allowed = resp.short_form_allowed; 1132 apc->tx_vp_offset = resp.tx_vport_offset; 1133 1134 if_printf(apc->ndev, "Configured vPort %ju PD %u DB %u\n", 1135 apc->port_handle, protection_dom_id, doorbell_pg_id); 1136 1137 out: 1138 if (err) 1139 mana_uncfg_vport(apc); 1140 1141 return err; 1142 } 1143 1144 static int 1145 mana_cfg_vport_steering(struct mana_port_context *apc, 1146 enum TRI_STATE rx, 1147 bool update_default_rxobj, bool update_key, 1148 bool update_tab) 1149 { 1150 uint16_t num_entries = MANA_INDIRECT_TABLE_SIZE; 1151 struct mana_cfg_rx_steer_req *req = NULL; 1152 struct mana_cfg_rx_steer_resp resp = {}; 1153 if_t ndev = apc->ndev; 1154 mana_handle_t *req_indir_tab; 1155 uint32_t req_buf_size; 1156 int err; 1157 1158 req_buf_size = sizeof(*req) + sizeof(mana_handle_t) * num_entries; 1159 req = malloc(req_buf_size, M_DEVBUF, M_WAITOK | M_ZERO); 1160 1161 mana_gd_init_req_hdr(&req->hdr, MANA_CONFIG_VPORT_RX, req_buf_size, 1162 sizeof(resp)); 1163 1164 req->vport = apc->port_handle; 1165 req->num_indir_entries = num_entries; 1166 req->indir_tab_offset = sizeof(*req); 1167 req->rx_enable = rx; 1168 req->rss_enable = apc->rss_state; 1169 req->update_default_rxobj = update_default_rxobj; 1170 req->update_hashkey = update_key; 1171 req->update_indir_tab = update_tab; 1172 req->default_rxobj = apc->default_rxobj; 1173 1174 if (update_key) 1175 memcpy(&req->hashkey, apc->hashkey, MANA_HASH_KEY_SIZE); 1176 1177 if (update_tab) { 1178 req_indir_tab = (mana_handle_t *)(req + 1); 1179 memcpy(req_indir_tab, apc->rxobj_table, 1180 req->num_indir_entries * sizeof(mana_handle_t)); 1181 } 1182 1183 err = mana_send_request(apc->ac, req, req_buf_size, &resp, 1184 sizeof(resp)); 1185 if (err) { 1186 if_printf(ndev, "Failed to configure vPort RX: %d\n", err); 1187 goto out; 1188 } 1189 1190 err = mana_verify_resp_hdr(&resp.hdr, MANA_CONFIG_VPORT_RX, 1191 sizeof(resp)); 1192 if (err) { 1193 if_printf(ndev, "vPort RX configuration failed: %d\n", err); 1194 goto out; 1195 } 1196 1197 if (resp.hdr.status) { 1198 if_printf(ndev, "vPort RX configuration failed: 0x%x\n", 1199 resp.hdr.status); 1200 err = EPROTO; 1201 } 1202 1203 if_printf(ndev, "Configured steering vPort %ju entries %u\n", 1204 apc->port_handle, num_entries); 1205 1206 out: 1207 free(req, M_DEVBUF); 1208 return err; 1209 } 1210 1211 int 1212 mana_create_wq_obj(struct mana_port_context *apc, 1213 mana_handle_t vport, 1214 uint32_t wq_type, struct mana_obj_spec *wq_spec, 1215 struct mana_obj_spec *cq_spec, 1216 mana_handle_t *wq_obj) 1217 { 1218 struct mana_create_wqobj_resp resp = {}; 1219 struct mana_create_wqobj_req req = {}; 1220 if_t ndev = apc->ndev; 1221 int err; 1222 1223 mana_gd_init_req_hdr(&req.hdr, MANA_CREATE_WQ_OBJ, 1224 sizeof(req), sizeof(resp)); 1225 req.vport = vport; 1226 req.wq_type = wq_type; 1227 req.wq_gdma_region = wq_spec->gdma_region; 1228 req.cq_gdma_region = cq_spec->gdma_region; 1229 req.wq_size = wq_spec->queue_size; 1230 req.cq_size = cq_spec->queue_size; 1231 req.cq_moderation_ctx_id = cq_spec->modr_ctx_id; 1232 req.cq_parent_qid = cq_spec->attached_eq; 1233 1234 err = mana_send_request(apc->ac, &req, sizeof(req), &resp, 1235 sizeof(resp)); 1236 if (err) { 1237 if_printf(ndev, "Failed to create WQ object: %d\n", err); 1238 goto out; 1239 } 1240 1241 err = mana_verify_resp_hdr(&resp.hdr, MANA_CREATE_WQ_OBJ, 1242 sizeof(resp)); 1243 if (err || resp.hdr.status) { 1244 if_printf(ndev, "Failed to create WQ object: %d, 0x%x\n", err, 1245 resp.hdr.status); 1246 if (!err) 1247 err = EPROTO; 1248 goto out; 1249 } 1250 1251 if (resp.wq_obj == INVALID_MANA_HANDLE) { 1252 if_printf(ndev, "Got an invalid WQ object handle\n"); 1253 err = EPROTO; 1254 goto out; 1255 } 1256 1257 *wq_obj = resp.wq_obj; 1258 wq_spec->queue_index = resp.wq_id; 1259 cq_spec->queue_index = resp.cq_id; 1260 1261 return 0; 1262 out: 1263 return err; 1264 } 1265 1266 void 1267 mana_destroy_wq_obj(struct mana_port_context *apc, uint32_t wq_type, 1268 mana_handle_t wq_obj) 1269 { 1270 struct mana_destroy_wqobj_resp resp = {}; 1271 struct mana_destroy_wqobj_req req = {}; 1272 if_t ndev = apc->ndev; 1273 int err; 1274 1275 mana_gd_init_req_hdr(&req.hdr, MANA_DESTROY_WQ_OBJ, 1276 sizeof(req), sizeof(resp)); 1277 req.wq_type = wq_type; 1278 req.wq_obj_handle = wq_obj; 1279 1280 err = mana_send_request(apc->ac, &req, sizeof(req), &resp, 1281 sizeof(resp)); 1282 if (err) { 1283 if_printf(ndev, "Failed to destroy WQ object: %d\n", err); 1284 return; 1285 } 1286 1287 err = mana_verify_resp_hdr(&resp.hdr, MANA_DESTROY_WQ_OBJ, 1288 sizeof(resp)); 1289 if (err || resp.hdr.status) 1290 if_printf(ndev, "Failed to destroy WQ object: %d, 0x%x\n", 1291 err, resp.hdr.status); 1292 } 1293 1294 static void 1295 mana_destroy_eq(struct mana_context *ac) 1296 { 1297 struct gdma_context *gc = ac->gdma_dev->gdma_context; 1298 struct gdma_queue *eq; 1299 int i; 1300 1301 if (!ac->eqs) 1302 return; 1303 1304 for (i = 0; i < gc->max_num_queues; i++) { 1305 eq = ac->eqs[i].eq; 1306 if (!eq) 1307 continue; 1308 1309 mana_gd_destroy_queue(gc, eq); 1310 } 1311 1312 free(ac->eqs, M_DEVBUF); 1313 ac->eqs = NULL; 1314 } 1315 1316 static int 1317 mana_create_eq(struct mana_context *ac) 1318 { 1319 struct gdma_dev *gd = ac->gdma_dev; 1320 struct gdma_context *gc = gd->gdma_context; 1321 struct gdma_queue_spec spec = {}; 1322 int err; 1323 int i; 1324 1325 ac->eqs = mallocarray(gc->max_num_queues, sizeof(struct mana_eq), 1326 M_DEVBUF, M_WAITOK | M_ZERO); 1327 1328 spec.type = GDMA_EQ; 1329 spec.monitor_avl_buf = false; 1330 spec.queue_size = EQ_SIZE; 1331 spec.eq.callback = NULL; 1332 spec.eq.context = ac->eqs; 1333 spec.eq.log2_throttle_limit = LOG2_EQ_THROTTLE; 1334 1335 for (i = 0; i < gc->max_num_queues; i++) { 1336 err = mana_gd_create_mana_eq(gd, &spec, &ac->eqs[i].eq); 1337 if (err) 1338 goto out; 1339 } 1340 1341 return 0; 1342 out: 1343 mana_destroy_eq(ac); 1344 return err; 1345 } 1346 1347 static int 1348 mana_fence_rq(struct mana_port_context *apc, struct mana_rxq *rxq) 1349 { 1350 struct mana_fence_rq_resp resp = {}; 1351 struct mana_fence_rq_req req = {}; 1352 int err; 1353 1354 init_completion(&rxq->fence_event); 1355 1356 mana_gd_init_req_hdr(&req.hdr, MANA_FENCE_RQ, 1357 sizeof(req), sizeof(resp)); 1358 req.wq_obj_handle = rxq->rxobj; 1359 1360 err = mana_send_request(apc->ac, &req, sizeof(req), &resp, 1361 sizeof(resp)); 1362 if (err) { 1363 if_printf(apc->ndev, "Failed to fence RQ %u: %d\n", 1364 rxq->rxq_idx, err); 1365 return err; 1366 } 1367 1368 err = mana_verify_resp_hdr(&resp.hdr, MANA_FENCE_RQ, sizeof(resp)); 1369 if (err || resp.hdr.status) { 1370 if_printf(apc->ndev, "Failed to fence RQ %u: %d, 0x%x\n", 1371 rxq->rxq_idx, err, resp.hdr.status); 1372 if (!err) 1373 err = EPROTO; 1374 1375 return err; 1376 } 1377 1378 if (wait_for_completion_timeout(&rxq->fence_event, 10 * hz)) { 1379 if_printf(apc->ndev, "Failed to fence RQ %u: timed out\n", 1380 rxq->rxq_idx); 1381 return ETIMEDOUT; 1382 } 1383 1384 return 0; 1385 } 1386 1387 static void 1388 mana_fence_rqs(struct mana_port_context *apc) 1389 { 1390 unsigned int rxq_idx; 1391 struct mana_rxq *rxq; 1392 int err; 1393 1394 for (rxq_idx = 0; rxq_idx < apc->num_queues; rxq_idx++) { 1395 rxq = apc->rxqs[rxq_idx]; 1396 err = mana_fence_rq(apc, rxq); 1397 1398 /* In case of any error, use sleep instead. */ 1399 if (err) 1400 gdma_msleep(100); 1401 } 1402 } 1403 1404 static int 1405 mana_move_wq_tail(struct gdma_queue *wq, uint32_t num_units) 1406 { 1407 uint32_t used_space_old; 1408 uint32_t used_space_new; 1409 1410 used_space_old = wq->head - wq->tail; 1411 used_space_new = wq->head - (wq->tail + num_units); 1412 1413 if (used_space_new > used_space_old) { 1414 mana_err(NULL, 1415 "WARNING: new used space %u greater than old one %u\n", 1416 used_space_new, used_space_old); 1417 return ERANGE; 1418 } 1419 1420 wq->tail += num_units; 1421 return 0; 1422 } 1423 1424 static void 1425 mana_poll_tx_cq(struct mana_cq *cq) 1426 { 1427 struct gdma_comp *completions = cq->gdma_comp_buf; 1428 struct gdma_posted_wqe_info *wqe_info; 1429 struct mana_send_buf_info *tx_info; 1430 unsigned int pkt_transmitted = 0; 1431 unsigned int wqe_unit_cnt = 0; 1432 struct mana_txq *txq = cq->txq; 1433 struct mana_port_context *apc; 1434 unsigned int tx_queue_size; 1435 uint16_t next_to_complete; 1436 if_t ndev; 1437 int comp_read; 1438 int txq_idx = txq->idx; 1439 int i; 1440 int sa_drop = 0; 1441 1442 struct gdma_queue *gdma_wq; 1443 unsigned int avail_space; 1444 bool txq_full = false; 1445 1446 ndev = txq->ndev; 1447 apc = if_getsoftc(ndev); 1448 tx_queue_size = apc->tx_queue_size; 1449 1450 comp_read = mana_gd_poll_cq(cq->gdma_cq, completions, 1451 CQE_POLLING_BUFFER); 1452 1453 if (comp_read < 1) 1454 return; 1455 1456 next_to_complete = txq->next_to_complete; 1457 1458 for (i = 0; i < comp_read; i++) { 1459 struct mana_tx_comp_oob *cqe_oob; 1460 1461 if (!completions[i].is_sq) { 1462 mana_err(NULL, "WARNING: Not for SQ\n"); 1463 return; 1464 } 1465 1466 cqe_oob = (struct mana_tx_comp_oob *)completions[i].cqe_data; 1467 if (cqe_oob->cqe_hdr.client_type != 1468 MANA_CQE_COMPLETION) { 1469 mana_err(NULL, 1470 "WARNING: Invalid CQE client type %u\n", 1471 cqe_oob->cqe_hdr.client_type); 1472 return; 1473 } 1474 1475 switch (cqe_oob->cqe_hdr.cqe_type) { 1476 case CQE_TX_OKAY: 1477 break; 1478 1479 case CQE_TX_SA_DROP: 1480 case CQE_TX_MTU_DROP: 1481 case CQE_TX_INVALID_OOB: 1482 case CQE_TX_INVALID_ETH_TYPE: 1483 case CQE_TX_HDR_PROCESSING_ERROR: 1484 case CQE_TX_VF_DISABLED: 1485 case CQE_TX_VPORT_IDX_OUT_OF_RANGE: 1486 case CQE_TX_VPORT_DISABLED: 1487 case CQE_TX_VLAN_TAGGING_VIOLATION: 1488 sa_drop ++; 1489 mana_dbg(NULL, 1490 "TX: txq %d CQE error %d, ntc = %d, " 1491 "pending sends = %d: err ignored.\n", 1492 txq_idx, cqe_oob->cqe_hdr.cqe_type, 1493 next_to_complete, txq->pending_sends); 1494 counter_u64_add(txq->stats.cqe_err, 1); 1495 break; 1496 1497 default: 1498 /* If the CQE type is unknown, log a debug msg, 1499 * and still free the mbuf, etc. 1500 */ 1501 mana_dbg(NULL, 1502 "ERROR: TX: Unknown CQE type %d\n", 1503 cqe_oob->cqe_hdr.cqe_type); 1504 counter_u64_add(txq->stats.cqe_unknown_type, 1); 1505 break; 1506 } 1507 if (txq->gdma_txq_id != completions[i].wq_num) { 1508 mana_dbg(NULL, 1509 "txq gdma id not match completion wq num: " 1510 "%d != %d\n", 1511 txq->gdma_txq_id, completions[i].wq_num); 1512 break; 1513 } 1514 1515 tx_info = &txq->tx_buf_info[next_to_complete]; 1516 if (!tx_info->mbuf) { 1517 mana_err(NULL, 1518 "WARNING: txq %d Empty mbuf on tx_info: %u, " 1519 "ntu = %u, pending_sends = %d, " 1520 "transmitted = %d, sa_drop = %d, i = %d, comp_read = %d\n", 1521 txq_idx, next_to_complete, txq->next_to_use, 1522 txq->pending_sends, pkt_transmitted, sa_drop, 1523 i, comp_read); 1524 break; 1525 } 1526 1527 wqe_info = &tx_info->wqe_inf; 1528 wqe_unit_cnt += wqe_info->wqe_size_in_bu; 1529 1530 mana_tx_unmap_mbuf(apc, tx_info); 1531 mb(); 1532 1533 next_to_complete = 1534 MANA_IDX_NEXT(next_to_complete, tx_queue_size); 1535 1536 pkt_transmitted++; 1537 } 1538 1539 txq->next_to_complete = next_to_complete; 1540 1541 if (wqe_unit_cnt == 0) { 1542 mana_err(NULL, 1543 "WARNING: TX ring not proceeding!\n"); 1544 return; 1545 } 1546 1547 mana_move_wq_tail(txq->gdma_sq, wqe_unit_cnt); 1548 1549 /* Ensure tail updated before checking q stop */ 1550 wmb(); 1551 1552 gdma_wq = txq->gdma_sq; 1553 avail_space = mana_gd_wq_avail_space(gdma_wq); 1554 1555 1556 if ((if_getdrvflags(ndev) & MANA_TXQ_FULL) == MANA_TXQ_FULL) { 1557 txq_full = true; 1558 } 1559 1560 /* Ensure checking txq_full before apc->port_is_up. */ 1561 rmb(); 1562 1563 if (txq_full && apc->port_is_up && avail_space >= MAX_TX_WQE_SIZE) { 1564 /* Grab the txq lock and re-test */ 1565 mtx_lock(&txq->txq_mtx); 1566 avail_space = mana_gd_wq_avail_space(gdma_wq); 1567 1568 if ((if_getdrvflags(ndev) & MANA_TXQ_FULL) == MANA_TXQ_FULL && 1569 apc->port_is_up && avail_space >= MAX_TX_WQE_SIZE) { 1570 /* Clear the Q full flag */ 1571 if_setdrvflagbits(apc->ndev, IFF_DRV_RUNNING, 1572 IFF_DRV_OACTIVE); 1573 counter_u64_add(txq->stats.wakeup, 1); 1574 if (txq->alt_txq_idx != txq->idx) { 1575 uint64_t stops = counter_u64_fetch(txq->stats.stop); 1576 uint64_t wakeups = counter_u64_fetch(txq->stats.wakeup); 1577 /* Reset alt_txq_idx back if it is not overloaded */ 1578 if (stops < wakeups) { 1579 txq->alt_txq_idx = txq->idx; 1580 counter_u64_add(txq->stats.alt_reset, 1); 1581 } 1582 } 1583 rmb(); 1584 /* Schedule a tx enqueue task */ 1585 taskqueue_enqueue(txq->enqueue_tq, &txq->enqueue_task); 1586 } 1587 mtx_unlock(&txq->txq_mtx); 1588 } 1589 1590 if (atomic_sub_return(pkt_transmitted, &txq->pending_sends) < 0) 1591 mana_err(NULL, 1592 "WARNING: TX %d pending_sends error: %d\n", 1593 txq->idx, txq->pending_sends); 1594 1595 cq->work_done = pkt_transmitted; 1596 } 1597 1598 static void 1599 mana_post_pkt_rxq(struct mana_rxq *rxq, 1600 struct mana_recv_buf_oob *recv_buf_oob) 1601 { 1602 int err; 1603 1604 err = mana_gd_post_work_request(rxq->gdma_rq, &recv_buf_oob->wqe_req, 1605 &recv_buf_oob->wqe_inf); 1606 if (err) { 1607 mana_err(NULL, "WARNING: rxq %u post pkt err %d\n", 1608 rxq->rxq_idx, err); 1609 return; 1610 } 1611 1612 if (recv_buf_oob->wqe_inf.wqe_size_in_bu != 1) { 1613 mana_err(NULL, "WARNING: rxq %u wqe_size_in_bu %u\n", 1614 rxq->rxq_idx, recv_buf_oob->wqe_inf.wqe_size_in_bu); 1615 } 1616 } 1617 1618 static void 1619 mana_rx_mbuf(struct mbuf *mbuf, struct mana_rxcomp_oob *cqe, 1620 struct mana_rxq *rxq) 1621 { 1622 struct mana_stats *rx_stats = &rxq->stats; 1623 if_t ndev = rxq->ndev; 1624 uint32_t pkt_len = cqe->ppi[0].pkt_len; 1625 uint16_t rxq_idx = rxq->rxq_idx; 1626 struct mana_port_context *apc; 1627 bool do_lro = false; 1628 bool do_if_input; 1629 1630 apc = if_getsoftc(ndev); 1631 rxq->rx_cq.work_done++; 1632 1633 if (!mbuf) { 1634 return; 1635 } 1636 1637 mbuf->m_flags |= M_PKTHDR; 1638 mbuf->m_pkthdr.len = pkt_len; 1639 mbuf->m_len = pkt_len; 1640 mbuf->m_pkthdr.rcvif = ndev; 1641 1642 if ((if_getcapenable(ndev) & IFCAP_RXCSUM || 1643 if_getcapenable(ndev) & IFCAP_RXCSUM_IPV6) && 1644 (cqe->rx_iphdr_csum_succeed)) { 1645 mbuf->m_pkthdr.csum_flags = CSUM_IP_CHECKED; 1646 mbuf->m_pkthdr.csum_flags |= CSUM_IP_VALID; 1647 if (cqe->rx_tcp_csum_succeed || cqe->rx_udp_csum_succeed) { 1648 mbuf->m_pkthdr.csum_flags |= 1649 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); 1650 mbuf->m_pkthdr.csum_data = 0xffff; 1651 1652 if (cqe->rx_tcp_csum_succeed) 1653 do_lro = true; 1654 } 1655 } 1656 1657 if (cqe->rx_hashtype != 0) { 1658 mbuf->m_pkthdr.flowid = cqe->ppi[0].pkt_hash; 1659 1660 uint16_t hashtype = cqe->rx_hashtype; 1661 if (hashtype & NDIS_HASH_IPV4_MASK) { 1662 hashtype &= NDIS_HASH_IPV4_MASK; 1663 switch (hashtype) { 1664 case NDIS_HASH_TCP_IPV4: 1665 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_TCP_IPV4); 1666 break; 1667 case NDIS_HASH_UDP_IPV4: 1668 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_UDP_IPV4); 1669 break; 1670 default: 1671 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_IPV4); 1672 } 1673 } else if (hashtype & NDIS_HASH_IPV6_MASK) { 1674 hashtype &= NDIS_HASH_IPV6_MASK; 1675 switch (hashtype) { 1676 case NDIS_HASH_TCP_IPV6: 1677 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_TCP_IPV6); 1678 break; 1679 case NDIS_HASH_TCP_IPV6_EX: 1680 M_HASHTYPE_SET(mbuf, 1681 M_HASHTYPE_RSS_TCP_IPV6_EX); 1682 break; 1683 case NDIS_HASH_UDP_IPV6: 1684 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_UDP_IPV6); 1685 break; 1686 case NDIS_HASH_UDP_IPV6_EX: 1687 M_HASHTYPE_SET(mbuf, 1688 M_HASHTYPE_RSS_UDP_IPV6_EX); 1689 break; 1690 default: 1691 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_IPV6); 1692 } 1693 } else { 1694 M_HASHTYPE_SET(mbuf, M_HASHTYPE_OPAQUE_HASH); 1695 } 1696 } else { 1697 mbuf->m_pkthdr.flowid = rxq_idx; 1698 M_HASHTYPE_SET(mbuf, M_HASHTYPE_NONE); 1699 } 1700 1701 do_if_input = true; 1702 if ((if_getcapenable(ndev) & IFCAP_LRO) && do_lro) { 1703 rxq->lro_tried++; 1704 if (rxq->lro.lro_cnt != 0 && 1705 tcp_lro_rx(&rxq->lro, mbuf, 0) == 0) 1706 do_if_input = false; 1707 else 1708 rxq->lro_failed++; 1709 } 1710 if (do_if_input) { 1711 if_input(ndev, mbuf); 1712 } 1713 1714 counter_enter(); 1715 counter_u64_add_protected(rx_stats->packets, 1); 1716 counter_u64_add_protected(apc->port_stats.rx_packets, 1); 1717 counter_u64_add_protected(rx_stats->bytes, pkt_len); 1718 counter_u64_add_protected(apc->port_stats.rx_bytes, pkt_len); 1719 counter_exit(); 1720 } 1721 1722 static int 1723 mana_refill_rx_mbufs(struct mana_port_context *apc, 1724 struct mana_rxq *rxq, uint32_t num) 1725 { 1726 struct mana_recv_buf_oob *rxbuf_oob; 1727 uint32_t next_to_refill; 1728 uint32_t i; 1729 int err; 1730 1731 next_to_refill = rxq->next_to_refill; 1732 1733 for (i = 0; i < num; i++) { 1734 if (next_to_refill == rxq->buf_index) { 1735 mana_warn(NULL, "refilling index reached current, " 1736 "aborted! rxq %u, oob idx %u\n", 1737 rxq->rxq_idx, next_to_refill); 1738 break; 1739 } 1740 1741 rxbuf_oob = &rxq->rx_oobs[next_to_refill]; 1742 1743 if (likely(rxbuf_oob->mbuf == NULL)) { 1744 err = mana_load_rx_mbuf(apc, rxq, rxbuf_oob, true); 1745 } else { 1746 mana_warn(NULL, "mbuf not null when refilling, " 1747 "rxq %u, oob idx %u, reusing\n", 1748 rxq->rxq_idx, next_to_refill); 1749 err = mana_load_rx_mbuf(apc, rxq, rxbuf_oob, false); 1750 } 1751 1752 if (unlikely(err != 0)) { 1753 mana_dbg(NULL, 1754 "failed to load rx mbuf, err = %d, rxq = %u\n", 1755 err, rxq->rxq_idx); 1756 counter_u64_add(rxq->stats.mbuf_alloc_fail, 1); 1757 break; 1758 } 1759 1760 mana_post_pkt_rxq(rxq, rxbuf_oob); 1761 1762 next_to_refill = MANA_IDX_NEXT(next_to_refill, 1763 rxq->num_rx_buf); 1764 } 1765 1766 if (likely(i != 0)) { 1767 struct gdma_context *gc = 1768 rxq->gdma_rq->gdma_dev->gdma_context; 1769 1770 mana_gd_wq_ring_doorbell(gc, rxq->gdma_rq); 1771 } 1772 1773 if (unlikely(i < num)) { 1774 counter_u64_add(rxq->stats.partial_refill, 1); 1775 mana_dbg(NULL, 1776 "refilled rxq %u with only %u mbufs (%u requested)\n", 1777 rxq->rxq_idx, i, num); 1778 } 1779 1780 rxq->next_to_refill = next_to_refill; 1781 return (i); 1782 } 1783 1784 static void 1785 mana_process_rx_cqe(struct mana_rxq *rxq, struct mana_cq *cq, 1786 struct gdma_comp *cqe) 1787 { 1788 struct mana_rxcomp_oob *oob = (struct mana_rxcomp_oob *)cqe->cqe_data; 1789 struct mana_recv_buf_oob *rxbuf_oob; 1790 if_t ndev = rxq->ndev; 1791 struct mana_port_context *apc; 1792 struct mbuf *old_mbuf; 1793 uint32_t refill_required; 1794 uint32_t curr, pktlen; 1795 1796 switch (oob->cqe_hdr.cqe_type) { 1797 case CQE_RX_OKAY: 1798 break; 1799 1800 case CQE_RX_TRUNCATED: 1801 apc = if_getsoftc(ndev); 1802 counter_u64_add(apc->port_stats.rx_drops, 1); 1803 rxbuf_oob = &rxq->rx_oobs[rxq->buf_index]; 1804 if_printf(ndev, "Dropped a truncated packet\n"); 1805 goto drop; 1806 1807 case CQE_RX_COALESCED_4: 1808 if_printf(ndev, "RX coalescing is unsupported\n"); 1809 return; 1810 1811 case CQE_RX_OBJECT_FENCE: 1812 complete(&rxq->fence_event); 1813 return; 1814 1815 default: 1816 if_printf(ndev, "Unknown RX CQE type = %d\n", 1817 oob->cqe_hdr.cqe_type); 1818 return; 1819 } 1820 1821 if (oob->cqe_hdr.cqe_type != CQE_RX_OKAY) 1822 return; 1823 1824 pktlen = oob->ppi[0].pkt_len; 1825 1826 if (pktlen == 0) { 1827 /* data packets should never have packetlength of zero */ 1828 if_printf(ndev, "RX pkt len=0, rq=%u, cq=%u, rxobj=0x%jx\n", 1829 rxq->gdma_id, cq->gdma_id, rxq->rxobj); 1830 return; 1831 } 1832 1833 curr = rxq->buf_index; 1834 rxbuf_oob = &rxq->rx_oobs[curr]; 1835 if (rxbuf_oob->wqe_inf.wqe_size_in_bu != 1) { 1836 mana_err(NULL, "WARNING: Rx Incorrect complete " 1837 "WQE size %u\n", 1838 rxbuf_oob->wqe_inf.wqe_size_in_bu); 1839 } 1840 1841 apc = if_getsoftc(ndev); 1842 1843 old_mbuf = rxbuf_oob->mbuf; 1844 1845 /* Unload DMA map for the old mbuf */ 1846 mana_unload_rx_mbuf(apc, rxq, rxbuf_oob, false); 1847 /* Clear the mbuf pointer to avoid reuse */ 1848 rxbuf_oob->mbuf = NULL; 1849 1850 mana_rx_mbuf(old_mbuf, oob, rxq); 1851 1852 drop: 1853 mana_move_wq_tail(rxq->gdma_rq, rxbuf_oob->wqe_inf.wqe_size_in_bu); 1854 1855 rxq->buf_index = MANA_IDX_NEXT(rxq->buf_index, rxq->num_rx_buf); 1856 1857 /* Check if refill is needed */ 1858 refill_required = MANA_GET_SPACE(rxq->next_to_refill, 1859 rxq->buf_index, rxq->num_rx_buf); 1860 1861 if (refill_required >= rxq->refill_thresh) { 1862 /* Refill empty rx_oobs with new mbufs */ 1863 mana_refill_rx_mbufs(apc, rxq, refill_required); 1864 } 1865 } 1866 1867 static void 1868 mana_poll_rx_cq(struct mana_cq *cq) 1869 { 1870 struct gdma_comp *comp = cq->gdma_comp_buf; 1871 int comp_read, i; 1872 1873 comp_read = mana_gd_poll_cq(cq->gdma_cq, comp, CQE_POLLING_BUFFER); 1874 KASSERT(comp_read <= CQE_POLLING_BUFFER, 1875 ("comp_read %d great than buf size %d", 1876 comp_read, CQE_POLLING_BUFFER)); 1877 1878 for (i = 0; i < comp_read; i++) { 1879 if (comp[i].is_sq == true) { 1880 mana_err(NULL, 1881 "WARNING: CQE not for receive queue\n"); 1882 return; 1883 } 1884 1885 /* verify recv cqe references the right rxq */ 1886 if (comp[i].wq_num != cq->rxq->gdma_id) { 1887 mana_err(NULL, 1888 "WARNING: Received CQE %d not for " 1889 "this receive queue %d\n", 1890 comp[i].wq_num, cq->rxq->gdma_id); 1891 return; 1892 } 1893 1894 mana_process_rx_cqe(cq->rxq, cq, &comp[i]); 1895 } 1896 1897 tcp_lro_flush_all(&cq->rxq->lro); 1898 } 1899 1900 static void 1901 mana_cq_handler(void *context, struct gdma_queue *gdma_queue) 1902 { 1903 struct mana_cq *cq = context; 1904 uint8_t arm_bit; 1905 1906 KASSERT(cq->gdma_cq == gdma_queue, 1907 ("cq do not match %p, %p", cq->gdma_cq, gdma_queue)); 1908 1909 if (cq->type == MANA_CQ_TYPE_RX) { 1910 mana_poll_rx_cq(cq); 1911 } else { 1912 mana_poll_tx_cq(cq); 1913 } 1914 1915 if (cq->work_done < cq->budget && cq->do_not_ring_db == false) 1916 arm_bit = SET_ARM_BIT; 1917 else 1918 arm_bit = 0; 1919 1920 mana_gd_ring_cq(gdma_queue, arm_bit); 1921 } 1922 1923 #define MANA_POLL_BUDGET 256 1924 #define MANA_RX_BUDGET 8 1925 #define MANA_TX_BUDGET 8 1926 1927 static void 1928 mana_poll(void *arg, int pending) 1929 { 1930 struct mana_cq *cq = arg; 1931 int i; 1932 1933 cq->work_done = 0; 1934 if (cq->type == MANA_CQ_TYPE_RX) { 1935 cq->budget = MANA_RX_BUDGET; 1936 } else { 1937 cq->budget = MANA_TX_BUDGET; 1938 } 1939 1940 for (i = 0; i < MANA_POLL_BUDGET; i++) { 1941 /* 1942 * If this is the last loop, set the budget big enough 1943 * so it will arm the CQ any way. 1944 */ 1945 if (i == (MANA_POLL_BUDGET - 1)) 1946 cq->budget = CQE_POLLING_BUFFER + 1; 1947 1948 mana_cq_handler(cq, cq->gdma_cq); 1949 1950 if (cq->work_done < cq->budget) 1951 break; 1952 1953 cq->work_done = 0; 1954 } 1955 } 1956 1957 static void 1958 mana_schedule_task(void *arg, struct gdma_queue *gdma_queue) 1959 { 1960 struct mana_cq *cq = arg; 1961 1962 taskqueue_enqueue(cq->cleanup_tq, &cq->cleanup_task); 1963 } 1964 1965 static void 1966 mana_deinit_cq(struct mana_port_context *apc, struct mana_cq *cq) 1967 { 1968 struct gdma_dev *gd = apc->ac->gdma_dev; 1969 1970 if (!cq->gdma_cq) 1971 return; 1972 1973 /* Drain cleanup taskqueue */ 1974 if (cq->cleanup_tq) { 1975 while (taskqueue_cancel(cq->cleanup_tq, 1976 &cq->cleanup_task, NULL)) { 1977 taskqueue_drain(cq->cleanup_tq, 1978 &cq->cleanup_task); 1979 } 1980 1981 taskqueue_free(cq->cleanup_tq); 1982 } 1983 1984 mana_gd_destroy_queue(gd->gdma_context, cq->gdma_cq); 1985 } 1986 1987 static void 1988 mana_deinit_txq(struct mana_port_context *apc, struct mana_txq *txq) 1989 { 1990 struct gdma_dev *gd = apc->ac->gdma_dev; 1991 struct mana_send_buf_info *txbuf_info; 1992 uint32_t pending_sends; 1993 int i; 1994 1995 if (!txq->gdma_sq) 1996 return; 1997 1998 if ((pending_sends = atomic_read(&txq->pending_sends)) > 0) { 1999 mana_err(NULL, 2000 "WARNING: txq pending sends not zero: %u\n", 2001 pending_sends); 2002 } 2003 2004 if (txq->next_to_use != txq->next_to_complete) { 2005 mana_err(NULL, 2006 "WARNING: txq buf not completed, " 2007 "next use %u, next complete %u\n", 2008 txq->next_to_use, txq->next_to_complete); 2009 } 2010 2011 /* Flush buf ring. Grab txq mtx lock */ 2012 if (txq->txq_br) { 2013 mtx_lock(&txq->txq_mtx); 2014 drbr_flush(apc->ndev, txq->txq_br); 2015 mtx_unlock(&txq->txq_mtx); 2016 buf_ring_free(txq->txq_br, M_DEVBUF); 2017 } 2018 2019 /* Drain taskqueue */ 2020 if (txq->enqueue_tq) { 2021 while (taskqueue_cancel(txq->enqueue_tq, 2022 &txq->enqueue_task, NULL)) { 2023 taskqueue_drain(txq->enqueue_tq, 2024 &txq->enqueue_task); 2025 } 2026 2027 taskqueue_free(txq->enqueue_tq); 2028 } 2029 2030 if (txq->tx_buf_info) { 2031 /* Free all mbufs which are still in-flight */ 2032 for (i = 0; i < apc->tx_queue_size; i++) { 2033 txbuf_info = &txq->tx_buf_info[i]; 2034 if (txbuf_info->mbuf) { 2035 mana_tx_unmap_mbuf(apc, txbuf_info); 2036 } 2037 } 2038 2039 free(txq->tx_buf_info, M_DEVBUF); 2040 } 2041 2042 mana_free_counters((counter_u64_t *)&txq->stats, 2043 sizeof(txq->stats)); 2044 2045 mana_gd_destroy_queue(gd->gdma_context, txq->gdma_sq); 2046 2047 mtx_destroy(&txq->txq_mtx); 2048 } 2049 2050 static void 2051 mana_destroy_txq(struct mana_port_context *apc) 2052 { 2053 int i; 2054 2055 if (!apc->tx_qp) 2056 return; 2057 2058 for (i = 0; i < apc->num_queues; i++) { 2059 mana_destroy_wq_obj(apc, GDMA_SQ, apc->tx_qp[i].tx_object); 2060 2061 mana_deinit_cq(apc, &apc->tx_qp[i].tx_cq); 2062 2063 mana_deinit_txq(apc, &apc->tx_qp[i].txq); 2064 } 2065 2066 free(apc->tx_qp, M_DEVBUF); 2067 apc->tx_qp = NULL; 2068 } 2069 2070 static int 2071 mana_create_txq(struct mana_port_context *apc, if_t net) 2072 { 2073 struct mana_context *ac = apc->ac; 2074 struct gdma_dev *gd = ac->gdma_dev; 2075 struct mana_obj_spec wq_spec; 2076 struct mana_obj_spec cq_spec; 2077 struct gdma_queue_spec spec; 2078 struct gdma_context *gc; 2079 struct mana_txq *txq; 2080 struct mana_cq *cq; 2081 uint32_t txq_size; 2082 uint32_t cq_size; 2083 int err; 2084 int i; 2085 2086 apc->tx_qp = mallocarray(apc->num_queues, sizeof(struct mana_tx_qp), 2087 M_DEVBUF, M_WAITOK | M_ZERO); 2088 2089 /* The minimum size of the WQE is 32 bytes, hence 2090 * apc->tx_queue_size represents the maximum number of WQEs 2091 * the SQ can store. This value is then used to size other queues 2092 * to prevent overflow. 2093 * Also note that the txq_size is always going to be page aligned, 2094 * as min val of apc->tx_queue_size is 128 and that would make 2095 * txq_size 128 * 32 = 4096 and the other higher values of 2096 * apc->tx_queue_size are always power of two. 2097 */ 2098 txq_size = apc->tx_queue_size * 32; 2099 KASSERT(IS_ALIGNED(txq_size, PAGE_SIZE), 2100 ("txq size not page aligned")); 2101 2102 cq_size = apc->tx_queue_size * COMP_ENTRY_SIZE; 2103 cq_size = ALIGN(cq_size, PAGE_SIZE); 2104 2105 gc = gd->gdma_context; 2106 2107 for (i = 0; i < apc->num_queues; i++) { 2108 apc->tx_qp[i].tx_object = INVALID_MANA_HANDLE; 2109 2110 /* Create SQ */ 2111 txq = &apc->tx_qp[i].txq; 2112 2113 txq->ndev = net; 2114 txq->vp_offset = apc->tx_vp_offset; 2115 txq->idx = i; 2116 txq->alt_txq_idx = i; 2117 2118 memset(&spec, 0, sizeof(spec)); 2119 spec.type = GDMA_SQ; 2120 spec.monitor_avl_buf = true; 2121 spec.queue_size = txq_size; 2122 err = mana_gd_create_mana_wq_cq(gd, &spec, &txq->gdma_sq); 2123 if (err) 2124 goto out; 2125 2126 /* Create SQ's CQ */ 2127 cq = &apc->tx_qp[i].tx_cq; 2128 cq->type = MANA_CQ_TYPE_TX; 2129 2130 cq->txq = txq; 2131 2132 memset(&spec, 0, sizeof(spec)); 2133 spec.type = GDMA_CQ; 2134 spec.monitor_avl_buf = false; 2135 spec.queue_size = cq_size; 2136 spec.cq.callback = mana_schedule_task; 2137 spec.cq.parent_eq = ac->eqs[i].eq; 2138 spec.cq.context = cq; 2139 err = mana_gd_create_mana_wq_cq(gd, &spec, &cq->gdma_cq); 2140 if (err) 2141 goto out; 2142 2143 memset(&wq_spec, 0, sizeof(wq_spec)); 2144 memset(&cq_spec, 0, sizeof(cq_spec)); 2145 2146 wq_spec.gdma_region = txq->gdma_sq->mem_info.dma_region_handle; 2147 wq_spec.queue_size = txq->gdma_sq->queue_size; 2148 2149 cq_spec.gdma_region = cq->gdma_cq->mem_info.dma_region_handle; 2150 cq_spec.queue_size = cq->gdma_cq->queue_size; 2151 cq_spec.modr_ctx_id = 0; 2152 cq_spec.attached_eq = cq->gdma_cq->cq.parent->id; 2153 2154 err = mana_create_wq_obj(apc, apc->port_handle, GDMA_SQ, 2155 &wq_spec, &cq_spec, &apc->tx_qp[i].tx_object); 2156 2157 if (err) 2158 goto out; 2159 2160 txq->gdma_sq->id = wq_spec.queue_index; 2161 cq->gdma_cq->id = cq_spec.queue_index; 2162 2163 txq->gdma_sq->mem_info.dma_region_handle = 2164 GDMA_INVALID_DMA_REGION; 2165 cq->gdma_cq->mem_info.dma_region_handle = 2166 GDMA_INVALID_DMA_REGION; 2167 2168 txq->gdma_txq_id = txq->gdma_sq->id; 2169 2170 cq->gdma_id = cq->gdma_cq->id; 2171 2172 mana_dbg(NULL, 2173 "txq %d, txq gdma id %d, txq cq gdma id %d\n", 2174 i, txq->gdma_txq_id, cq->gdma_id); 2175 2176 if (cq->gdma_id >= gc->max_num_cqs) { 2177 if_printf(net, "CQ id %u too large.\n", cq->gdma_id); 2178 err = EINVAL; 2179 goto out; 2180 } 2181 2182 gc->cq_table[cq->gdma_id] = cq->gdma_cq; 2183 2184 /* Initialize tx specific data */ 2185 txq->tx_buf_info = malloc(apc->tx_queue_size * 2186 sizeof(struct mana_send_buf_info), 2187 M_DEVBUF, M_WAITOK | M_ZERO); 2188 2189 snprintf(txq->txq_mtx_name, nitems(txq->txq_mtx_name), 2190 "mana:tx(%d)", i); 2191 mtx_init(&txq->txq_mtx, txq->txq_mtx_name, NULL, MTX_DEF); 2192 2193 txq->txq_br = buf_ring_alloc(4 * apc->tx_queue_size, 2194 M_DEVBUF, M_WAITOK, &txq->txq_mtx); 2195 2196 /* Allocate taskqueue for deferred send */ 2197 TASK_INIT(&txq->enqueue_task, 0, mana_xmit_taskfunc, txq); 2198 txq->enqueue_tq = taskqueue_create_fast("mana_tx_enque", 2199 M_NOWAIT, taskqueue_thread_enqueue, &txq->enqueue_tq); 2200 if (unlikely(txq->enqueue_tq == NULL)) { 2201 if_printf(net, 2202 "Unable to create tx %d enqueue task queue\n", i); 2203 err = ENOMEM; 2204 goto out; 2205 } 2206 taskqueue_start_threads(&txq->enqueue_tq, 1, PI_NET, 2207 "mana txq p%u-tx%d", apc->port_idx, i); 2208 2209 mana_alloc_counters((counter_u64_t *)&txq->stats, 2210 sizeof(txq->stats)); 2211 2212 /* Allocate and start the cleanup task on CQ */ 2213 cq->do_not_ring_db = false; 2214 2215 NET_TASK_INIT(&cq->cleanup_task, 0, mana_poll, cq); 2216 cq->cleanup_tq = 2217 taskqueue_create_fast("mana tx cq cleanup", 2218 M_WAITOK, taskqueue_thread_enqueue, 2219 &cq->cleanup_tq); 2220 2221 if (apc->last_tx_cq_bind_cpu < 0) 2222 apc->last_tx_cq_bind_cpu = CPU_FIRST(); 2223 cq->cpu = apc->last_tx_cq_bind_cpu; 2224 apc->last_tx_cq_bind_cpu = CPU_NEXT(apc->last_tx_cq_bind_cpu); 2225 2226 if (apc->bind_cleanup_thread_cpu) { 2227 cpuset_t cpu_mask; 2228 CPU_SETOF(cq->cpu, &cpu_mask); 2229 taskqueue_start_threads_cpuset(&cq->cleanup_tq, 2230 1, PI_NET, &cpu_mask, 2231 "mana cq p%u-tx%u-cpu%d", 2232 apc->port_idx, txq->idx, cq->cpu); 2233 } else { 2234 taskqueue_start_threads(&cq->cleanup_tq, 1, 2235 PI_NET, "mana cq p%u-tx%u", 2236 apc->port_idx, txq->idx); 2237 } 2238 2239 mana_gd_ring_cq(cq->gdma_cq, SET_ARM_BIT); 2240 } 2241 2242 return 0; 2243 out: 2244 mana_destroy_txq(apc); 2245 return err; 2246 } 2247 2248 static void 2249 mana_destroy_rxq(struct mana_port_context *apc, struct mana_rxq *rxq, 2250 bool validate_state) 2251 { 2252 struct gdma_context *gc = apc->ac->gdma_dev->gdma_context; 2253 struct mana_recv_buf_oob *rx_oob; 2254 int i; 2255 2256 if (!rxq) 2257 return; 2258 2259 if (validate_state) { 2260 /* 2261 * XXX Cancel and drain cleanup task queue here. 2262 */ 2263 ; 2264 } 2265 2266 mana_destroy_wq_obj(apc, GDMA_RQ, rxq->rxobj); 2267 2268 mana_deinit_cq(apc, &rxq->rx_cq); 2269 2270 mana_free_counters((counter_u64_t *)&rxq->stats, 2271 sizeof(rxq->stats)); 2272 2273 /* Free LRO resources */ 2274 tcp_lro_free(&rxq->lro); 2275 2276 for (i = 0; i < rxq->num_rx_buf; i++) { 2277 rx_oob = &rxq->rx_oobs[i]; 2278 2279 if (rx_oob->mbuf) 2280 mana_unload_rx_mbuf(apc, rxq, rx_oob, true); 2281 2282 bus_dmamap_destroy(apc->rx_buf_tag, rx_oob->dma_map); 2283 } 2284 2285 if (rxq->gdma_rq) 2286 mana_gd_destroy_queue(gc, rxq->gdma_rq); 2287 2288 free(rxq, M_DEVBUF); 2289 } 2290 2291 #define MANA_WQE_HEADER_SIZE 16 2292 #define MANA_WQE_SGE_SIZE 16 2293 2294 static int 2295 mana_alloc_rx_wqe(struct mana_port_context *apc, 2296 struct mana_rxq *rxq, uint32_t *rxq_size, uint32_t *cq_size) 2297 { 2298 struct mana_recv_buf_oob *rx_oob; 2299 uint32_t buf_idx; 2300 int err; 2301 2302 if (rxq->datasize == 0) { 2303 mana_err(NULL, 2304 "WARNING: Invalid rxq datasize %u\n", rxq->datasize); 2305 } 2306 2307 *rxq_size = 0; 2308 *cq_size = 0; 2309 2310 for (buf_idx = 0; buf_idx < rxq->num_rx_buf; buf_idx++) { 2311 rx_oob = &rxq->rx_oobs[buf_idx]; 2312 memset(rx_oob, 0, sizeof(*rx_oob)); 2313 2314 err = bus_dmamap_create(apc->rx_buf_tag, 0, 2315 &rx_oob->dma_map); 2316 if (err) { 2317 mana_err(NULL, 2318 "Failed to create rx DMA map for buf %d\n", 2319 buf_idx); 2320 return err; 2321 } 2322 2323 err = mana_load_rx_mbuf(apc, rxq, rx_oob, true); 2324 if (err) { 2325 mana_err(NULL, 2326 "Failed to create rx DMA map for buf %d\n", 2327 buf_idx); 2328 bus_dmamap_destroy(apc->rx_buf_tag, rx_oob->dma_map); 2329 return err; 2330 } 2331 2332 rx_oob->wqe_req.sgl = rx_oob->sgl; 2333 rx_oob->wqe_req.num_sge = rx_oob->num_sge; 2334 rx_oob->wqe_req.inline_oob_size = 0; 2335 rx_oob->wqe_req.inline_oob_data = NULL; 2336 rx_oob->wqe_req.flags = 0; 2337 rx_oob->wqe_req.client_data_unit = 0; 2338 2339 *rxq_size += ALIGN(MANA_WQE_HEADER_SIZE + 2340 MANA_WQE_SGE_SIZE * rx_oob->num_sge, 32); 2341 *cq_size += COMP_ENTRY_SIZE; 2342 } 2343 2344 return 0; 2345 } 2346 2347 static int 2348 mana_push_wqe(struct mana_rxq *rxq) 2349 { 2350 struct mana_recv_buf_oob *rx_oob; 2351 uint32_t buf_idx; 2352 int err; 2353 2354 for (buf_idx = 0; buf_idx < rxq->num_rx_buf; buf_idx++) { 2355 rx_oob = &rxq->rx_oobs[buf_idx]; 2356 2357 err = mana_gd_post_and_ring(rxq->gdma_rq, &rx_oob->wqe_req, 2358 &rx_oob->wqe_inf); 2359 if (err) 2360 return ENOSPC; 2361 } 2362 2363 return 0; 2364 } 2365 2366 static uint32_t 2367 mana_calc_rx_datasize(struct mana_port_context *apc) 2368 { 2369 uint32_t effective_mtu = 0; 2370 2371 if (apc->frame_size > MJUM16BYTES) { 2372 mana_err(NULL, "mana frame_size %u is too big\n", 2373 apc->frame_size); 2374 effective_mtu = MJUM16BYTES; 2375 } else if (apc->frame_size > MJUM9BYTES) { 2376 effective_mtu = MJUM16BYTES; 2377 } else if (apc->frame_size > MJUMPAGESIZE) { 2378 effective_mtu = MJUM9BYTES; 2379 } else if (apc->frame_size > MCLBYTES) { 2380 effective_mtu = MJUMPAGESIZE; 2381 } else { 2382 effective_mtu = MCLBYTES; 2383 } 2384 2385 return effective_mtu; 2386 } 2387 2388 static struct mana_rxq * 2389 mana_create_rxq(struct mana_port_context *apc, uint32_t rxq_idx, 2390 struct mana_eq *eq, if_t ndev) 2391 { 2392 struct gdma_dev *gd = apc->ac->gdma_dev; 2393 struct mana_obj_spec wq_spec; 2394 struct mana_obj_spec cq_spec; 2395 struct gdma_queue_spec spec; 2396 struct mana_cq *cq = NULL; 2397 uint32_t cq_size, rq_size; 2398 struct gdma_context *gc; 2399 struct mana_rxq *rxq; 2400 int err; 2401 2402 gc = gd->gdma_context; 2403 2404 rxq = malloc(sizeof(*rxq) + 2405 apc->rx_queue_size * sizeof(struct mana_recv_buf_oob), 2406 M_DEVBUF, M_WAITOK | M_ZERO); 2407 rxq->ndev = ndev; 2408 rxq->num_rx_buf = apc->rx_queue_size; 2409 rxq->rxq_idx = rxq_idx; 2410 2411 rxq->datasize = mana_calc_rx_datasize(apc); 2412 mana_dbg(NULL, "Setting rxq %d datasize %d\n", 2413 rxq_idx, rxq->datasize); 2414 2415 /* 2416 * Two steps to set the mbuf refill_thresh. 2417 * 1) If mana_rx_refill_threshold is set, honor it. 2418 * Set to default value otherwise. 2419 * 2) Select the smaller of 1) above and 1/4 of the 2420 * rx buffer size. 2421 */ 2422 if (mana_rx_refill_threshold != 0) 2423 rxq->refill_thresh = mana_rx_refill_threshold; 2424 else 2425 rxq->refill_thresh = MANA_RX_REFILL_THRESH; 2426 rxq->refill_thresh = min_t(uint32_t, 2427 rxq->num_rx_buf / 4, rxq->refill_thresh); 2428 2429 mana_dbg(NULL, "Setting rxq %d refill thresh %u\n", 2430 rxq_idx, rxq->refill_thresh); 2431 2432 rxq->rxobj = INVALID_MANA_HANDLE; 2433 2434 err = mana_alloc_rx_wqe(apc, rxq, &rq_size, &cq_size); 2435 if (err) 2436 goto out; 2437 2438 /* Create LRO for the RQ */ 2439 if (if_getcapenable(ndev) & IFCAP_LRO) { 2440 err = tcp_lro_init(&rxq->lro); 2441 if (err) { 2442 if_printf(ndev, "Failed to create LRO for rxq %d\n", 2443 rxq_idx); 2444 } else { 2445 rxq->lro.ifp = ndev; 2446 } 2447 } 2448 2449 mana_alloc_counters((counter_u64_t *)&rxq->stats, 2450 sizeof(rxq->stats)); 2451 2452 rq_size = ALIGN(rq_size, PAGE_SIZE); 2453 cq_size = ALIGN(cq_size, PAGE_SIZE); 2454 2455 /* Create RQ */ 2456 memset(&spec, 0, sizeof(spec)); 2457 spec.type = GDMA_RQ; 2458 spec.monitor_avl_buf = true; 2459 spec.queue_size = rq_size; 2460 err = mana_gd_create_mana_wq_cq(gd, &spec, &rxq->gdma_rq); 2461 if (err) 2462 goto out; 2463 2464 /* Create RQ's CQ */ 2465 cq = &rxq->rx_cq; 2466 cq->type = MANA_CQ_TYPE_RX; 2467 cq->rxq = rxq; 2468 2469 memset(&spec, 0, sizeof(spec)); 2470 spec.type = GDMA_CQ; 2471 spec.monitor_avl_buf = false; 2472 spec.queue_size = cq_size; 2473 spec.cq.callback = mana_schedule_task; 2474 spec.cq.parent_eq = eq->eq; 2475 spec.cq.context = cq; 2476 err = mana_gd_create_mana_wq_cq(gd, &spec, &cq->gdma_cq); 2477 if (err) 2478 goto out; 2479 2480 memset(&wq_spec, 0, sizeof(wq_spec)); 2481 memset(&cq_spec, 0, sizeof(cq_spec)); 2482 wq_spec.gdma_region = rxq->gdma_rq->mem_info.dma_region_handle; 2483 wq_spec.queue_size = rxq->gdma_rq->queue_size; 2484 2485 cq_spec.gdma_region = cq->gdma_cq->mem_info.dma_region_handle; 2486 cq_spec.queue_size = cq->gdma_cq->queue_size; 2487 cq_spec.modr_ctx_id = 0; 2488 cq_spec.attached_eq = cq->gdma_cq->cq.parent->id; 2489 2490 err = mana_create_wq_obj(apc, apc->port_handle, GDMA_RQ, 2491 &wq_spec, &cq_spec, &rxq->rxobj); 2492 if (err) 2493 goto out; 2494 2495 rxq->gdma_rq->id = wq_spec.queue_index; 2496 cq->gdma_cq->id = cq_spec.queue_index; 2497 2498 rxq->gdma_rq->mem_info.dma_region_handle = GDMA_INVALID_DMA_REGION; 2499 cq->gdma_cq->mem_info.dma_region_handle = GDMA_INVALID_DMA_REGION; 2500 2501 rxq->gdma_id = rxq->gdma_rq->id; 2502 cq->gdma_id = cq->gdma_cq->id; 2503 2504 err = mana_push_wqe(rxq); 2505 if (err) 2506 goto out; 2507 2508 if (cq->gdma_id >= gc->max_num_cqs) { 2509 err = EINVAL; 2510 goto out; 2511 } 2512 2513 gc->cq_table[cq->gdma_id] = cq->gdma_cq; 2514 2515 /* Allocate and start the cleanup task on CQ */ 2516 cq->do_not_ring_db = false; 2517 2518 NET_TASK_INIT(&cq->cleanup_task, 0, mana_poll, cq); 2519 cq->cleanup_tq = 2520 taskqueue_create_fast("mana rx cq cleanup", 2521 M_WAITOK, taskqueue_thread_enqueue, 2522 &cq->cleanup_tq); 2523 2524 if (apc->last_rx_cq_bind_cpu < 0) 2525 apc->last_rx_cq_bind_cpu = CPU_FIRST(); 2526 cq->cpu = apc->last_rx_cq_bind_cpu; 2527 apc->last_rx_cq_bind_cpu = CPU_NEXT(apc->last_rx_cq_bind_cpu); 2528 2529 if (apc->bind_cleanup_thread_cpu) { 2530 cpuset_t cpu_mask; 2531 CPU_SETOF(cq->cpu, &cpu_mask); 2532 taskqueue_start_threads_cpuset(&cq->cleanup_tq, 2533 1, PI_NET, &cpu_mask, 2534 "mana cq p%u-rx%u-cpu%d", 2535 apc->port_idx, rxq->rxq_idx, cq->cpu); 2536 } else { 2537 taskqueue_start_threads(&cq->cleanup_tq, 1, 2538 PI_NET, "mana cq p%u-rx%u", 2539 apc->port_idx, rxq->rxq_idx); 2540 } 2541 2542 mana_gd_ring_cq(cq->gdma_cq, SET_ARM_BIT); 2543 out: 2544 if (!err) 2545 return rxq; 2546 2547 if_printf(ndev, "Failed to create RXQ: err = %d\n", err); 2548 2549 mana_destroy_rxq(apc, rxq, false); 2550 2551 if (cq) 2552 mana_deinit_cq(apc, cq); 2553 2554 return NULL; 2555 } 2556 2557 static int 2558 mana_add_rx_queues(struct mana_port_context *apc, if_t ndev) 2559 { 2560 struct mana_context *ac = apc->ac; 2561 struct mana_rxq *rxq; 2562 int err = 0; 2563 int i; 2564 2565 for (i = 0; i < apc->num_queues; i++) { 2566 rxq = mana_create_rxq(apc, i, &ac->eqs[i], ndev); 2567 if (!rxq) { 2568 err = ENOMEM; 2569 goto out; 2570 } 2571 2572 apc->rxqs[i] = rxq; 2573 } 2574 2575 apc->default_rxobj = apc->rxqs[0]->rxobj; 2576 out: 2577 return err; 2578 } 2579 2580 static void 2581 mana_destroy_vport(struct mana_port_context *apc) 2582 { 2583 struct mana_rxq *rxq; 2584 uint32_t rxq_idx; 2585 2586 for (rxq_idx = 0; rxq_idx < apc->num_queues; rxq_idx++) { 2587 rxq = apc->rxqs[rxq_idx]; 2588 if (!rxq) 2589 continue; 2590 2591 mana_destroy_rxq(apc, rxq, true); 2592 apc->rxqs[rxq_idx] = NULL; 2593 } 2594 2595 mana_destroy_txq(apc); 2596 2597 mana_uncfg_vport(apc); 2598 } 2599 2600 static int 2601 mana_create_vport(struct mana_port_context *apc, if_t net) 2602 { 2603 struct gdma_dev *gd = apc->ac->gdma_dev; 2604 int err; 2605 2606 apc->default_rxobj = INVALID_MANA_HANDLE; 2607 2608 err = mana_cfg_vport(apc, gd->pdid, gd->doorbell); 2609 if (err) 2610 return err; 2611 2612 return mana_create_txq(apc, net); 2613 } 2614 2615 2616 static void mana_rss_table_init(struct mana_port_context *apc) 2617 { 2618 int i; 2619 2620 for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++) 2621 apc->indir_table[i] = i % apc->num_queues; 2622 } 2623 2624 int mana_config_rss(struct mana_port_context *apc, enum TRI_STATE rx, 2625 bool update_hash, bool update_tab) 2626 { 2627 uint32_t queue_idx; 2628 int err; 2629 int i; 2630 2631 if (update_tab) { 2632 for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++) { 2633 queue_idx = apc->indir_table[i]; 2634 apc->rxobj_table[i] = apc->rxqs[queue_idx]->rxobj; 2635 } 2636 } 2637 2638 err = mana_cfg_vport_steering(apc, rx, true, update_hash, update_tab); 2639 if (err) 2640 return err; 2641 2642 mana_fence_rqs(apc); 2643 2644 return 0; 2645 } 2646 2647 static int 2648 mana_init_port(if_t ndev) 2649 { 2650 struct mana_port_context *apc = if_getsoftc(ndev); 2651 uint32_t max_txq, max_rxq, max_queues; 2652 int port_idx = apc->port_idx; 2653 uint32_t num_indirect_entries; 2654 int err; 2655 2656 err = mana_init_port_context(apc); 2657 if (err) 2658 return err; 2659 2660 err = mana_query_vport_cfg(apc, port_idx, &max_txq, &max_rxq, 2661 &num_indirect_entries); 2662 if (err) { 2663 if_printf(ndev, "Failed to query info for vPort %d\n", 2664 port_idx); 2665 goto reset_apc; 2666 } 2667 2668 max_queues = min_t(uint32_t, max_txq, max_rxq); 2669 if (apc->max_queues > max_queues) 2670 apc->max_queues = max_queues; 2671 2672 if (apc->num_queues > apc->max_queues) 2673 apc->num_queues = apc->max_queues; 2674 2675 return 0; 2676 2677 reset_apc: 2678 bus_dma_tag_destroy(apc->rx_buf_tag); 2679 apc->rx_buf_tag = NULL; 2680 free(apc->rxqs, M_DEVBUF); 2681 apc->rxqs = NULL; 2682 return err; 2683 } 2684 2685 int 2686 mana_alloc_queues(if_t ndev) 2687 { 2688 struct mana_port_context *apc = if_getsoftc(ndev); 2689 int err; 2690 2691 err = mana_create_vport(apc, ndev); 2692 if (err) 2693 return err; 2694 2695 err = mana_add_rx_queues(apc, ndev); 2696 if (err) 2697 goto destroy_vport; 2698 2699 apc->rss_state = apc->num_queues > 1 ? TRI_STATE_TRUE : TRI_STATE_FALSE; 2700 2701 mana_rss_table_init(apc); 2702 2703 err = mana_config_rss(apc, TRI_STATE_TRUE, true, true); 2704 if (err) 2705 goto destroy_vport; 2706 2707 return 0; 2708 2709 destroy_vport: 2710 mana_destroy_vport(apc); 2711 return err; 2712 } 2713 2714 static int 2715 mana_up(struct mana_port_context *apc) 2716 { 2717 int err; 2718 2719 mana_dbg(NULL, "mana_up called\n"); 2720 2721 err = mana_alloc_queues(apc->ndev); 2722 if (err) { 2723 mana_err(NULL, "Faile alloc mana queues: %d\n", err); 2724 return err; 2725 } 2726 2727 /* Add queue specific sysctl */ 2728 mana_sysctl_add_queues(apc); 2729 2730 apc->port_is_up = true; 2731 2732 /* Ensure port state updated before txq state */ 2733 wmb(); 2734 2735 if_link_state_change(apc->ndev, LINK_STATE_UP); 2736 if_setdrvflagbits(apc->ndev, IFF_DRV_RUNNING, IFF_DRV_OACTIVE); 2737 2738 return 0; 2739 } 2740 2741 2742 static void 2743 mana_init(void *arg) 2744 { 2745 struct mana_port_context *apc = (struct mana_port_context *)arg; 2746 2747 MANA_APC_LOCK_LOCK(apc); 2748 if (!apc->port_is_up) { 2749 mana_up(apc); 2750 } 2751 MANA_APC_LOCK_UNLOCK(apc); 2752 } 2753 2754 static int 2755 mana_dealloc_queues(if_t ndev) 2756 { 2757 struct mana_port_context *apc = if_getsoftc(ndev); 2758 struct mana_txq *txq; 2759 int i, err; 2760 2761 if (apc->port_is_up) 2762 return EINVAL; 2763 2764 /* No packet can be transmitted now since apc->port_is_up is false. 2765 * There is still a tiny chance that mana_poll_tx_cq() can re-enable 2766 * a txq because it may not timely see apc->port_is_up being cleared 2767 * to false, but it doesn't matter since mana_start_xmit() drops any 2768 * new packets due to apc->port_is_up being false. 2769 * 2770 * Drain all the in-flight TX packets 2771 */ 2772 for (i = 0; i < apc->num_queues; i++) { 2773 txq = &apc->tx_qp[i].txq; 2774 2775 struct mana_cq *tx_cq = &apc->tx_qp[i].tx_cq; 2776 struct mana_cq *rx_cq = &(apc->rxqs[i]->rx_cq); 2777 2778 tx_cq->do_not_ring_db = true; 2779 rx_cq->do_not_ring_db = true; 2780 2781 /* Schedule a cleanup task */ 2782 taskqueue_enqueue(tx_cq->cleanup_tq, &tx_cq->cleanup_task); 2783 2784 while (atomic_read(&txq->pending_sends) > 0) 2785 usleep_range(1000, 2000); 2786 } 2787 2788 /* We're 100% sure the queues can no longer be woken up, because 2789 * we're sure now mana_poll_tx_cq() can't be running. 2790 */ 2791 2792 apc->rss_state = TRI_STATE_FALSE; 2793 err = mana_config_rss(apc, TRI_STATE_FALSE, false, false); 2794 if (err) { 2795 if_printf(ndev, "Failed to disable vPort: %d\n", err); 2796 return err; 2797 } 2798 2799 mana_destroy_vport(apc); 2800 2801 return 0; 2802 } 2803 2804 static int 2805 mana_down(struct mana_port_context *apc) 2806 { 2807 int err = 0; 2808 2809 apc->port_st_save = apc->port_is_up; 2810 apc->port_is_up = false; 2811 2812 /* Ensure port state updated before txq state */ 2813 wmb(); 2814 2815 if (apc->port_st_save) { 2816 if_setdrvflagbits(apc->ndev, IFF_DRV_OACTIVE, 2817 IFF_DRV_RUNNING); 2818 if_link_state_change(apc->ndev, LINK_STATE_DOWN); 2819 2820 mana_sysctl_free_queues(apc); 2821 2822 err = mana_dealloc_queues(apc->ndev); 2823 if (err) { 2824 if_printf(apc->ndev, 2825 "Failed to bring down mana interface: %d\n", err); 2826 } 2827 } 2828 2829 return err; 2830 } 2831 2832 int 2833 mana_detach(if_t ndev) 2834 { 2835 struct mana_port_context *apc = if_getsoftc(ndev); 2836 int err; 2837 2838 ether_ifdetach(ndev); 2839 2840 if (!apc) 2841 return 0; 2842 2843 MANA_APC_LOCK_LOCK(apc); 2844 err = mana_down(apc); 2845 MANA_APC_LOCK_UNLOCK(apc); 2846 2847 mana_cleanup_port_context(apc); 2848 2849 MANA_APC_LOCK_DESTROY(apc); 2850 2851 free(apc, M_DEVBUF); 2852 2853 return err; 2854 } 2855 2856 static unsigned int 2857 mana_get_tx_queue_size(int port_idx, unsigned int request_size) 2858 { 2859 unsigned int new_size; 2860 2861 if (request_size == 0) 2862 /* Uninitialized */ 2863 new_size = DEF_SEND_BUFFERS_PER_QUEUE; 2864 else 2865 new_size = roundup_pow_of_two(request_size); 2866 2867 if (new_size < MIN_SEND_BUFFERS_PER_QUEUE || 2868 new_size > MAX_SEND_BUFFERS_PER_QUEUE) { 2869 mana_info(NULL, "mana port %d: requested tx buffer " 2870 "size %u out of allowable range (%u - %u), " 2871 "setting to default\n", 2872 port_idx, request_size, 2873 MIN_SEND_BUFFERS_PER_QUEUE, 2874 MAX_SEND_BUFFERS_PER_QUEUE); 2875 new_size = DEF_SEND_BUFFERS_PER_QUEUE; 2876 } 2877 mana_info(NULL, "mana port %d: tx buffer size %u " 2878 "(%u requested)\n", 2879 port_idx, new_size, request_size); 2880 2881 return (new_size); 2882 } 2883 2884 static unsigned int 2885 mana_get_rx_queue_size(int port_idx, unsigned int request_size) 2886 { 2887 unsigned int new_size; 2888 2889 if (request_size == 0) 2890 /* Uninitialized */ 2891 new_size = DEF_RX_BUFFERS_PER_QUEUE; 2892 else 2893 new_size = roundup_pow_of_two(request_size); 2894 2895 if (new_size < MIN_RX_BUFFERS_PER_QUEUE || 2896 new_size > MAX_RX_BUFFERS_PER_QUEUE) { 2897 mana_info(NULL, "mana port %d: requested rx buffer " 2898 "size %u out of allowable range (%u - %u), " 2899 "setting to default\n", 2900 port_idx, request_size, 2901 MIN_RX_BUFFERS_PER_QUEUE, 2902 MAX_RX_BUFFERS_PER_QUEUE); 2903 new_size = DEF_RX_BUFFERS_PER_QUEUE; 2904 } 2905 mana_info(NULL, "mana port %d: rx buffer size %u " 2906 "(%u requested)\n", 2907 port_idx, new_size, request_size); 2908 2909 return (new_size); 2910 } 2911 2912 static int 2913 mana_probe_port(struct mana_context *ac, int port_idx, 2914 if_t *ndev_storage) 2915 { 2916 struct gdma_context *gc = ac->gdma_dev->gdma_context; 2917 struct mana_port_context *apc; 2918 uint32_t hwassist; 2919 if_t ndev; 2920 int err; 2921 2922 ndev = if_alloc_dev(IFT_ETHER, gc->dev); 2923 *ndev_storage = ndev; 2924 2925 apc = malloc(sizeof(*apc), M_DEVBUF, M_WAITOK | M_ZERO); 2926 apc->ac = ac; 2927 apc->ndev = ndev; 2928 apc->max_queues = gc->max_num_queues; 2929 apc->num_queues = min_t(unsigned int, 2930 gc->max_num_queues, MANA_MAX_NUM_QUEUES); 2931 apc->tx_queue_size = mana_get_tx_queue_size(port_idx, 2932 mana_tx_req_size); 2933 apc->rx_queue_size = mana_get_rx_queue_size(port_idx, 2934 mana_rx_req_size); 2935 apc->port_handle = INVALID_MANA_HANDLE; 2936 apc->port_idx = port_idx; 2937 apc->last_tx_cq_bind_cpu = -1; 2938 apc->last_rx_cq_bind_cpu = -1; 2939 apc->vport_use_count = 0; 2940 apc->max_mtu = gc->adapter_mtu - ETHER_HDR_LEN; 2941 apc->min_mtu = MIN_FRAME_SIZE; 2942 apc->mtu = ETHERMTU; 2943 apc->frame_size = apc->mtu + ETHER_HDR_LEN; 2944 2945 MANA_APC_LOCK_INIT(apc); 2946 2947 if_initname(ndev, device_get_name(gc->dev), port_idx); 2948 if_setdev(ndev,gc->dev); 2949 if_setsoftc(ndev, apc); 2950 2951 if_setflags(ndev, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST); 2952 if_setinitfn(ndev, mana_init); 2953 if_settransmitfn(ndev, mana_start_xmit); 2954 if_setqflushfn(ndev, mana_qflush); 2955 if_setioctlfn(ndev, mana_ioctl); 2956 if_setgetcounterfn(ndev, mana_get_counter); 2957 2958 if_setmtu(ndev, apc->mtu); 2959 if_setbaudrate(ndev, IF_Gbps(100)); 2960 2961 mana_rss_key_fill(apc->hashkey, MANA_HASH_KEY_SIZE); 2962 2963 err = mana_init_port(ndev); 2964 if (err) 2965 goto reset_apc; 2966 2967 if_setcapabilitiesbit(ndev, 2968 IFCAP_TXCSUM | IFCAP_TXCSUM_IPV6 | 2969 IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 | 2970 IFCAP_TSO4 | IFCAP_TSO6 | 2971 IFCAP_LRO | IFCAP_LINKSTATE, 0); 2972 2973 /* Enable all available capabilities by default. */ 2974 if_setcapenable(ndev, if_getcapabilities(ndev)); 2975 2976 /* TSO parameters */ 2977 if_sethwtsomax(ndev, MANA_TSO_MAX_SZ - 2978 (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN)); 2979 if_sethwtsomaxsegcount(ndev, MAX_MBUF_FRAGS); 2980 if_sethwtsomaxsegsize(ndev, PAGE_SIZE); 2981 2982 hwassist = 0; 2983 if (if_getcapenable(ndev) & (IFCAP_TSO4 | IFCAP_TSO6)) 2984 hwassist |= CSUM_TSO; 2985 if (if_getcapenable(ndev) & IFCAP_TXCSUM) 2986 hwassist |= (CSUM_TCP | CSUM_UDP | CSUM_IP); 2987 if (if_getcapenable(ndev) & IFCAP_TXCSUM_IPV6) 2988 hwassist |= (CSUM_UDP_IPV6 | CSUM_TCP_IPV6); 2989 mana_dbg(NULL, "set hwassist 0x%x\n", hwassist); 2990 if_sethwassist(ndev, hwassist); 2991 2992 ifmedia_init(&apc->media, IFM_IMASK, 2993 mana_ifmedia_change, mana_ifmedia_status); 2994 ifmedia_add(&apc->media, IFM_ETHER | IFM_AUTO, 0, NULL); 2995 ifmedia_set(&apc->media, IFM_ETHER | IFM_AUTO); 2996 2997 ether_ifattach(ndev, apc->mac_addr); 2998 2999 /* Initialize statistics */ 3000 mana_alloc_counters((counter_u64_t *)&apc->port_stats, 3001 sizeof(struct mana_port_stats)); 3002 mana_sysctl_add_port(apc); 3003 3004 /* Tell the stack that the interface is not active */ 3005 if_setdrvflagbits(ndev, IFF_DRV_OACTIVE, IFF_DRV_RUNNING); 3006 3007 return 0; 3008 3009 reset_apc: 3010 free(apc, M_DEVBUF); 3011 *ndev_storage = NULL; 3012 if_printf(ndev, "Failed to probe vPort %d: %d\n", port_idx, err); 3013 if_free(ndev); 3014 return err; 3015 } 3016 3017 int mana_probe(struct gdma_dev *gd) 3018 { 3019 struct gdma_context *gc = gd->gdma_context; 3020 device_t dev = gc->dev; 3021 struct mana_context *ac; 3022 int err; 3023 int i; 3024 3025 device_printf(dev, "%s protocol version: %d.%d.%d\n", DEVICE_NAME, 3026 MANA_MAJOR_VERSION, MANA_MINOR_VERSION, MANA_MICRO_VERSION); 3027 3028 err = mana_gd_register_device(gd); 3029 if (err) 3030 return err; 3031 3032 ac = malloc(sizeof(*ac), M_DEVBUF, M_WAITOK | M_ZERO); 3033 ac->gdma_dev = gd; 3034 ac->num_ports = 1; 3035 gd->driver_data = ac; 3036 3037 err = mana_create_eq(ac); 3038 if (err) 3039 goto out; 3040 3041 err = mana_query_device_cfg(ac, MANA_MAJOR_VERSION, MANA_MINOR_VERSION, 3042 MANA_MICRO_VERSION, &ac->num_ports); 3043 if (err) 3044 goto out; 3045 3046 if (ac->num_ports > MAX_PORTS_IN_MANA_DEV) 3047 ac->num_ports = MAX_PORTS_IN_MANA_DEV; 3048 3049 for (i = 0; i < ac->num_ports; i++) { 3050 err = mana_probe_port(ac, i, &ac->ports[i]); 3051 if (err) { 3052 device_printf(dev, 3053 "Failed to probe mana port %d\n", i); 3054 break; 3055 } 3056 } 3057 3058 out: 3059 if (err) 3060 mana_remove(gd); 3061 3062 return err; 3063 } 3064 3065 void 3066 mana_remove(struct gdma_dev *gd) 3067 { 3068 struct gdma_context *gc = gd->gdma_context; 3069 struct mana_context *ac = gd->driver_data; 3070 device_t dev = gc->dev; 3071 if_t ndev; 3072 int i; 3073 3074 for (i = 0; i < ac->num_ports; i++) { 3075 ndev = ac->ports[i]; 3076 if (!ndev) { 3077 if (i == 0) 3078 device_printf(dev, "No net device to remove\n"); 3079 goto out; 3080 } 3081 3082 mana_detach(ndev); 3083 3084 if_free(ndev); 3085 } 3086 3087 mana_destroy_eq(ac); 3088 3089 out: 3090 mana_gd_deregister_device(gd); 3091 gd->driver_data = NULL; 3092 gd->gdma_context = NULL; 3093 free(ac, M_DEVBUF); 3094 } 3095