1 /*- 2 * Copyright (c) 2015 Mellanox Technologies. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND 14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16 * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE 17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23 * SUCH DAMAGE. 24 * 25 * $FreeBSD$ 26 */ 27 28 #include "en.h" 29 #include <machine/atomic.h> 30 31 static inline bool 32 mlx5e_do_send_cqe(struct mlx5e_sq *sq) 33 { 34 sq->cev_counter++; 35 /* interleave the CQEs */ 36 if (sq->cev_counter >= sq->cev_factor) { 37 sq->cev_counter = 0; 38 return (1); 39 } 40 return (0); 41 } 42 43 void 44 mlx5e_send_nop(struct mlx5e_sq *sq, u32 ds_cnt) 45 { 46 u16 pi = sq->pc & sq->wq.sz_m1; 47 struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(&sq->wq, pi); 48 49 memset(&wqe->ctrl, 0, sizeof(wqe->ctrl)); 50 51 wqe->ctrl.opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_NOP); 52 wqe->ctrl.qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); 53 if (mlx5e_do_send_cqe(sq)) 54 wqe->ctrl.fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE; 55 else 56 wqe->ctrl.fm_ce_se = 0; 57 58 /* Copy data for doorbell */ 59 memcpy(sq->doorbell.d32, &wqe->ctrl, sizeof(sq->doorbell.d32)); 60 61 sq->mbuf[pi].mbuf = NULL; 62 sq->mbuf[pi].num_bytes = 0; 63 sq->mbuf[pi].num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS); 64 sq->pc += sq->mbuf[pi].num_wqebbs; 65 } 66 67 #if (__FreeBSD_version >= 1100000) 68 static uint32_t mlx5e_hash_value; 69 70 static void 71 mlx5e_hash_init(void *arg) 72 { 73 mlx5e_hash_value = m_ether_tcpip_hash_init(); 74 } 75 76 /* Make kernel call mlx5e_hash_init after the random stack finished initializing */ 77 SYSINIT(mlx5e_hash_init, SI_SUB_RANDOM, SI_ORDER_ANY, &mlx5e_hash_init, NULL); 78 #endif 79 80 static struct mlx5e_sq * 81 mlx5e_select_queue(struct ifnet *ifp, struct mbuf *mb) 82 { 83 struct mlx5e_priv *priv = ifp->if_softc; 84 u32 ch; 85 u32 tc; 86 87 /* check if channels are successfully opened */ 88 if (unlikely(priv->channel == NULL)) 89 return (NULL); 90 91 /* obtain VLAN information if present */ 92 if (mb->m_flags & M_VLANTAG) { 93 tc = (mb->m_pkthdr.ether_vtag >> 13); 94 if (tc >= priv->num_tc) 95 tc = priv->default_vlan_prio; 96 } else { 97 tc = priv->default_vlan_prio; 98 } 99 100 ch = priv->params.num_channels; 101 102 /* check if flowid is set */ 103 if (M_HASHTYPE_GET(mb) != M_HASHTYPE_NONE) { 104 #ifdef RSS 105 u32 temp; 106 107 if (rss_hash2bucket(mb->m_pkthdr.flowid, 108 M_HASHTYPE_GET(mb), &temp) == 0) 109 ch = temp % ch; 110 else 111 #endif 112 ch = (mb->m_pkthdr.flowid % 128) % ch; 113 } else { 114 #if (__FreeBSD_version >= 1100000) 115 ch = m_ether_tcpip_hash(MBUF_HASHFLAG_L3 | 116 MBUF_HASHFLAG_L4, mb, mlx5e_hash_value) % ch; 117 #else 118 /* 119 * m_ether_tcpip_hash not present in stable, so just 120 * throw unhashed mbufs on queue 0 121 */ 122 ch = 0; 123 #endif 124 } 125 126 /* check if channel is allocated */ 127 if (unlikely(priv->channel[ch] == NULL)) 128 return (NULL); 129 130 return (&priv->channel[ch]->sq[tc]); 131 } 132 133 static inline u16 134 mlx5e_get_inline_hdr_size(struct mlx5e_sq *sq, struct mbuf *mb) 135 { 136 return (MIN(MLX5E_MAX_TX_INLINE, mb->m_len)); 137 } 138 139 static int 140 mlx5e_get_header_size(struct mbuf *mb) 141 { 142 struct ether_vlan_header *eh; 143 struct tcphdr *th; 144 struct ip *ip; 145 int ip_hlen, tcp_hlen; 146 struct ip6_hdr *ip6; 147 uint16_t eth_type; 148 int eth_hdr_len; 149 150 eh = mtod(mb, struct ether_vlan_header *); 151 if (mb->m_len < ETHER_HDR_LEN) 152 return (0); 153 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { 154 eth_type = ntohs(eh->evl_proto); 155 eth_hdr_len = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 156 } else { 157 eth_type = ntohs(eh->evl_encap_proto); 158 eth_hdr_len = ETHER_HDR_LEN; 159 } 160 if (mb->m_len < eth_hdr_len) 161 return (0); 162 switch (eth_type) { 163 case ETHERTYPE_IP: 164 ip = (struct ip *)(mb->m_data + eth_hdr_len); 165 if (mb->m_len < eth_hdr_len + sizeof(*ip)) 166 return (0); 167 if (ip->ip_p != IPPROTO_TCP) 168 return (0); 169 ip_hlen = ip->ip_hl << 2; 170 eth_hdr_len += ip_hlen; 171 break; 172 case ETHERTYPE_IPV6: 173 ip6 = (struct ip6_hdr *)(mb->m_data + eth_hdr_len); 174 if (mb->m_len < eth_hdr_len + sizeof(*ip6)) 175 return (0); 176 if (ip6->ip6_nxt != IPPROTO_TCP) 177 return (0); 178 eth_hdr_len += sizeof(*ip6); 179 break; 180 default: 181 return (0); 182 } 183 if (mb->m_len < eth_hdr_len + sizeof(*th)) 184 return (0); 185 th = (struct tcphdr *)(mb->m_data + eth_hdr_len); 186 tcp_hlen = th->th_off << 2; 187 eth_hdr_len += tcp_hlen; 188 if (mb->m_len < eth_hdr_len) 189 return (0); 190 return (eth_hdr_len); 191 } 192 193 /* 194 * The return value is not going back to the stack because of 195 * the drbr 196 */ 197 static int 198 mlx5e_sq_xmit(struct mlx5e_sq *sq, struct mbuf **mbp) 199 { 200 bus_dma_segment_t segs[MLX5E_MAX_TX_MBUF_FRAGS]; 201 struct mlx5_wqe_data_seg *dseg; 202 struct mlx5e_tx_wqe *wqe; 203 struct ifnet *ifp; 204 int nsegs; 205 int err; 206 int x; 207 struct mbuf *mb = *mbp; 208 u16 ds_cnt; 209 u16 ihs; 210 u16 pi; 211 u8 opcode; 212 213 /* 214 * Return ENOBUFS if the queue is full, this may trigger reinsertion 215 * of the mbuf into the drbr (see mlx5e_xmit_locked) 216 */ 217 if (unlikely(!mlx5e_sq_has_room_for(sq, 2 * MLX5_SEND_WQE_MAX_WQEBBS))) { 218 return (ENOBUFS); 219 } 220 221 /* Align SQ edge with NOPs to avoid WQE wrap around */ 222 pi = ((~sq->pc) & sq->wq.sz_m1); 223 if (pi < (MLX5_SEND_WQE_MAX_WQEBBS - 1)) { 224 /* Send one multi NOP message instead of many */ 225 mlx5e_send_nop(sq, (pi + 1) * MLX5_SEND_WQEBB_NUM_DS); 226 pi = ((~sq->pc) & sq->wq.sz_m1); 227 if (pi < (MLX5_SEND_WQE_MAX_WQEBBS - 1)) { 228 m_freem(mb); 229 return (ENOMEM); 230 } 231 } 232 233 /* Setup local variables */ 234 pi = sq->pc & sq->wq.sz_m1; 235 wqe = mlx5_wq_cyc_get_wqe(&sq->wq, pi); 236 ifp = sq->channel->ifp; 237 238 memset(wqe, 0, sizeof(*wqe)); 239 240 /* Send a copy of the frame to the BPF listener, if any */ 241 if (ifp != NULL && ifp->if_bpf != NULL) 242 ETHER_BPF_MTAP(ifp, mb); 243 244 if (mb->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TSO)) { 245 wqe->eth.cs_flags |= MLX5_ETH_WQE_L3_CSUM; 246 } 247 if (mb->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP | CSUM_UDP_IPV6 | CSUM_TCP_IPV6 | CSUM_TSO)) { 248 wqe->eth.cs_flags |= MLX5_ETH_WQE_L4_CSUM; 249 } 250 if (wqe->eth.cs_flags == 0) { 251 sq->stats.csum_offload_none++; 252 } 253 if (mb->m_pkthdr.csum_flags & CSUM_TSO) { 254 u32 payload_len; 255 u32 mss = mb->m_pkthdr.tso_segsz; 256 u32 num_pkts; 257 258 wqe->eth.mss = cpu_to_be16(mss); 259 opcode = MLX5_OPCODE_LSO; 260 ihs = mlx5e_get_header_size(mb); 261 payload_len = mb->m_pkthdr.len - ihs; 262 if (payload_len == 0) 263 num_pkts = 1; 264 else 265 num_pkts = DIV_ROUND_UP(payload_len, mss); 266 sq->mbuf[pi].num_bytes = payload_len + (num_pkts * ihs); 267 268 sq->stats.tso_packets++; 269 sq->stats.tso_bytes += payload_len; 270 } else { 271 opcode = MLX5_OPCODE_SEND; 272 ihs = mlx5e_get_inline_hdr_size(sq, mb); 273 sq->mbuf[pi].num_bytes = max_t (unsigned int, 274 mb->m_pkthdr.len, ETHER_MIN_LEN - ETHER_CRC_LEN); 275 } 276 if (mb->m_flags & M_VLANTAG) { 277 struct ether_vlan_header *eh = 278 (struct ether_vlan_header *)wqe->eth.inline_hdr_start; 279 280 /* Range checks */ 281 if (ihs > (MLX5E_MAX_TX_INLINE - ETHER_VLAN_ENCAP_LEN)) 282 ihs = (MLX5E_MAX_TX_INLINE - ETHER_VLAN_ENCAP_LEN); 283 else if (ihs < ETHER_HDR_LEN) { 284 err = EINVAL; 285 goto tx_drop; 286 } 287 m_copydata(mb, 0, ETHER_HDR_LEN, (caddr_t)eh); 288 m_adj(mb, ETHER_HDR_LEN); 289 /* Insert 4 bytes VLAN tag into data stream */ 290 eh->evl_proto = eh->evl_encap_proto; 291 eh->evl_encap_proto = htons(ETHERTYPE_VLAN); 292 eh->evl_tag = htons(mb->m_pkthdr.ether_vtag); 293 /* Copy rest of header data, if any */ 294 m_copydata(mb, 0, ihs - ETHER_HDR_LEN, (caddr_t)(eh + 1)); 295 m_adj(mb, ihs - ETHER_HDR_LEN); 296 /* Extend header by 4 bytes */ 297 ihs += ETHER_VLAN_ENCAP_LEN; 298 } else { 299 m_copydata(mb, 0, ihs, wqe->eth.inline_hdr_start); 300 m_adj(mb, ihs); 301 } 302 303 wqe->eth.inline_hdr_sz = cpu_to_be16(ihs); 304 305 ds_cnt = sizeof(*wqe) / MLX5_SEND_WQE_DS; 306 if (likely(ihs > sizeof(wqe->eth.inline_hdr_start))) { 307 ds_cnt += DIV_ROUND_UP(ihs - sizeof(wqe->eth.inline_hdr_start), 308 MLX5_SEND_WQE_DS); 309 } 310 dseg = ((struct mlx5_wqe_data_seg *)&wqe->ctrl) + ds_cnt; 311 312 /* Trim off empty mbufs */ 313 while (mb->m_len == 0) { 314 mb = m_free(mb); 315 /* Check if all data has been inlined */ 316 if (mb == NULL) 317 goto skip_dma; 318 } 319 320 err = bus_dmamap_load_mbuf_sg(sq->dma_tag, sq->mbuf[pi].dma_map, 321 mb, segs, &nsegs, BUS_DMA_NOWAIT); 322 if (err == EFBIG) { 323 /* 324 * Update *mbp before defrag in case it was trimmed in the 325 * loop above 326 */ 327 *mbp = mb; 328 /* Update statistics */ 329 sq->stats.defragged++; 330 /* Too many mbuf fragments */ 331 mb = m_defrag(*mbp, M_NOWAIT); 332 if (mb == NULL) { 333 mb = *mbp; 334 goto tx_drop; 335 } 336 /* Try again */ 337 err = bus_dmamap_load_mbuf_sg(sq->dma_tag, sq->mbuf[pi].dma_map, 338 mb, segs, &nsegs, BUS_DMA_NOWAIT); 339 } 340 /* Catch errors */ 341 if (err != 0) { 342 goto tx_drop; 343 } 344 *mbp = mb; 345 346 for (x = 0; x != nsegs; x++) { 347 if (segs[x].ds_len == 0) 348 continue; 349 dseg->addr = cpu_to_be64((uint64_t)segs[x].ds_addr); 350 dseg->lkey = sq->mkey_be; 351 dseg->byte_count = cpu_to_be32((uint32_t)segs[x].ds_len); 352 dseg++; 353 } 354 skip_dma: 355 ds_cnt = (dseg - ((struct mlx5_wqe_data_seg *)&wqe->ctrl)); 356 357 wqe->ctrl.opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | opcode); 358 wqe->ctrl.qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); 359 if (mlx5e_do_send_cqe(sq)) 360 wqe->ctrl.fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE; 361 else 362 wqe->ctrl.fm_ce_se = 0; 363 364 /* Copy data for doorbell */ 365 memcpy(sq->doorbell.d32, &wqe->ctrl, sizeof(sq->doorbell.d32)); 366 367 /* Store pointer to mbuf */ 368 sq->mbuf[pi].mbuf = mb; 369 sq->mbuf[pi].num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS); 370 sq->pc += sq->mbuf[pi].num_wqebbs; 371 372 /* Make sure all mbuf data is written to RAM */ 373 if (mb != NULL) 374 bus_dmamap_sync(sq->dma_tag, sq->mbuf[pi].dma_map, BUS_DMASYNC_PREWRITE); 375 376 sq->stats.packets++; 377 return (0); 378 379 tx_drop: 380 sq->stats.dropped++; 381 *mbp = NULL; 382 m_freem(mb); 383 return err; 384 } 385 386 static void 387 mlx5e_poll_tx_cq(struct mlx5e_sq *sq, int budget) 388 { 389 u16 sqcc; 390 391 /* 392 * sq->cc must be updated only after mlx5_cqwq_update_db_record(), 393 * otherwise a cq overrun may occur 394 */ 395 sqcc = sq->cc; 396 397 while (budget > 0) { 398 struct mlx5_cqe64 *cqe; 399 struct mbuf *mb; 400 u16 x; 401 u16 ci; 402 403 cqe = mlx5e_get_cqe(&sq->cq); 404 if (!cqe) 405 break; 406 407 mlx5_cqwq_pop(&sq->cq.wq); 408 409 /* update budget according to the event factor */ 410 budget -= sq->cev_factor; 411 412 for (x = 0; x != sq->cev_factor; x++) { 413 ci = sqcc & sq->wq.sz_m1; 414 mb = sq->mbuf[ci].mbuf; 415 sq->mbuf[ci].mbuf = NULL; /* Safety clear */ 416 417 if (mb == NULL) { 418 if (sq->mbuf[ci].num_bytes == 0) { 419 /* NOP */ 420 sq->stats.nop++; 421 } 422 } else { 423 bus_dmamap_sync(sq->dma_tag, sq->mbuf[ci].dma_map, 424 BUS_DMASYNC_POSTWRITE); 425 bus_dmamap_unload(sq->dma_tag, sq->mbuf[ci].dma_map); 426 427 /* Free transmitted mbuf */ 428 m_freem(mb); 429 } 430 sqcc += sq->mbuf[ci].num_wqebbs; 431 } 432 } 433 434 mlx5_cqwq_update_db_record(&sq->cq.wq); 435 436 /* Ensure cq space is freed before enabling more cqes */ 437 wmb(); 438 439 sq->cc = sqcc; 440 441 if (atomic_cmpset_int(&sq->queue_state, MLX5E_SQ_FULL, MLX5E_SQ_READY)) 442 taskqueue_enqueue(sq->sq_tq, &sq->sq_task); 443 } 444 445 static int 446 mlx5e_xmit_locked(struct ifnet *ifp, struct mlx5e_sq *sq, struct mbuf *mb) 447 { 448 struct mbuf *next; 449 int err = 0; 450 451 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { 452 if (mb) 453 err = drbr_enqueue(ifp, sq->br, mb); 454 return (err); 455 } 456 457 if (mb != NULL) 458 /* 459 * If we can't insert mbuf into drbr, try to xmit anyway. 460 * We keep the error we got so we could return that after xmit. 461 */ 462 err = drbr_enqueue(ifp, sq->br, mb); 463 464 /* Process the queue */ 465 while ((next = drbr_peek(ifp, sq->br)) != NULL) { 466 if (mlx5e_sq_xmit(sq, &next) != 0) { 467 if (next == NULL) { 468 drbr_advance(ifp, sq->br); 469 } else { 470 drbr_putback(ifp, sq->br, next); 471 atomic_store_rel_int(&sq->queue_state, MLX5E_SQ_FULL); 472 } 473 break; 474 } 475 drbr_advance(ifp, sq->br); 476 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 477 break; 478 } 479 /* Check if we need to write the doorbell */ 480 if (likely(sq->doorbell.d64 != 0)) { 481 mlx5e_tx_notify_hw(sq, sq->doorbell.d32, 0); 482 sq->doorbell.d64 = 0; 483 } 484 /* 485 * Check if we need to start the event timer which flushes the 486 * transmit ring on timeout: 487 */ 488 if (unlikely(sq->cev_next_state == MLX5E_CEV_STATE_INITIAL && 489 sq->cev_factor != 1)) { 490 /* start the timer */ 491 mlx5e_sq_cev_timeout(sq); 492 } else { 493 /* don't send NOPs yet */ 494 sq->cev_next_state = MLX5E_CEV_STATE_HOLD_NOPS; 495 } 496 return (err); 497 } 498 499 int 500 mlx5e_xmit(struct ifnet *ifp, struct mbuf *mb) 501 { 502 struct mlx5e_sq *sq; 503 int ret; 504 505 sq = mlx5e_select_queue(ifp, mb); 506 if (unlikely(sq == NULL)) { 507 /* Invalid send queue */ 508 m_freem(mb); 509 return (ENXIO); 510 } 511 if (mtx_trylock(&sq->lock)) { 512 ret = mlx5e_xmit_locked(ifp, sq, mb); 513 mtx_unlock(&sq->lock); 514 } else { 515 ret = drbr_enqueue(ifp, sq->br, mb); 516 taskqueue_enqueue(sq->sq_tq, &sq->sq_task); 517 } 518 519 return (ret); 520 } 521 522 void 523 mlx5e_tx_cq_comp(struct mlx5_core_cq *mcq) 524 { 525 struct mlx5e_sq *sq = container_of(mcq, struct mlx5e_sq, cq.mcq); 526 527 mtx_lock(&sq->comp_lock); 528 mlx5e_poll_tx_cq(sq, MLX5E_BUDGET_MAX); 529 mlx5e_cq_arm(&sq->cq); 530 mtx_unlock(&sq->comp_lock); 531 } 532 533 void 534 mlx5e_tx_que(void *context, int pending) 535 { 536 struct mlx5e_sq *sq = context; 537 struct ifnet *ifp = sq->channel->ifp; 538 539 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 540 mtx_lock(&sq->lock); 541 if (!drbr_empty(ifp, sq->br)) 542 mlx5e_xmit_locked(ifp, sq, NULL); 543 mtx_unlock(&sq->lock); 544 } 545 } 546