1 /*- 2 * Copyright (c) 2015 Mellanox Technologies. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND 14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16 * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE 17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23 * SUCH DAMAGE. 24 * 25 * $FreeBSD$ 26 */ 27 28 #include "en.h" 29 #include <machine/atomic.h> 30 31 static inline bool 32 mlx5e_do_send_cqe(struct mlx5e_sq *sq) 33 { 34 sq->cev_counter++; 35 /* interleave the CQEs */ 36 if (sq->cev_counter >= sq->cev_factor) { 37 sq->cev_counter = 0; 38 return (1); 39 } 40 return (0); 41 } 42 43 void 44 mlx5e_send_nop(struct mlx5e_sq *sq, u32 ds_cnt) 45 { 46 u16 pi = sq->pc & sq->wq.sz_m1; 47 struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(&sq->wq, pi); 48 49 memset(&wqe->ctrl, 0, sizeof(wqe->ctrl)); 50 51 wqe->ctrl.opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_NOP); 52 wqe->ctrl.qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); 53 if (mlx5e_do_send_cqe(sq)) 54 wqe->ctrl.fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE; 55 else 56 wqe->ctrl.fm_ce_se = 0; 57 58 /* Copy data for doorbell */ 59 memcpy(sq->doorbell.d32, &wqe->ctrl, sizeof(sq->doorbell.d32)); 60 61 sq->mbuf[pi].mbuf = NULL; 62 sq->mbuf[pi].num_bytes = 0; 63 sq->mbuf[pi].num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS); 64 sq->pc += sq->mbuf[pi].num_wqebbs; 65 } 66 67 #if (__FreeBSD_version >= 1100000) 68 static uint32_t mlx5e_hash_value; 69 70 static void 71 mlx5e_hash_init(void *arg) 72 { 73 mlx5e_hash_value = m_ether_tcpip_hash_init(); 74 } 75 76 /* Make kernel call mlx5e_hash_init after the random stack finished initializing */ 77 SYSINIT(mlx5e_hash_init, SI_SUB_RANDOM, SI_ORDER_ANY, &mlx5e_hash_init, NULL); 78 #endif 79 80 static struct mlx5e_sq * 81 mlx5e_select_queue(struct ifnet *ifp, struct mbuf *mb) 82 { 83 struct mlx5e_priv *priv = ifp->if_softc; 84 u32 ch; 85 u32 tc; 86 87 /* check if channels are successfully opened */ 88 if (unlikely(priv->channel == NULL)) 89 return (NULL); 90 91 /* obtain VLAN information if present */ 92 if (mb->m_flags & M_VLANTAG) { 93 tc = (mb->m_pkthdr.ether_vtag >> 13); 94 if (tc >= priv->num_tc) 95 tc = priv->default_vlan_prio; 96 } else { 97 tc = priv->default_vlan_prio; 98 } 99 100 ch = priv->params.num_channels; 101 102 /* check if flowid is set */ 103 if (M_HASHTYPE_GET(mb) != M_HASHTYPE_NONE) { 104 #ifdef RSS 105 u32 temp; 106 107 if (rss_hash2bucket(mb->m_pkthdr.flowid, 108 M_HASHTYPE_GET(mb), &temp) == 0) 109 ch = temp % ch; 110 else 111 #endif 112 ch = (mb->m_pkthdr.flowid % 128) % ch; 113 } else { 114 #if (__FreeBSD_version >= 1100000) 115 ch = m_ether_tcpip_hash(MBUF_HASHFLAG_L3 | 116 MBUF_HASHFLAG_L4, mb, mlx5e_hash_value) % ch; 117 #else 118 /* 119 * m_ether_tcpip_hash not present in stable, so just 120 * throw unhashed mbufs on queue 0 121 */ 122 ch = 0; 123 #endif 124 } 125 126 /* check if channel is allocated */ 127 if (unlikely(priv->channel[ch] == NULL)) 128 return (NULL); 129 130 return (&priv->channel[ch]->sq[tc]); 131 } 132 133 static inline u16 134 mlx5e_get_inline_hdr_size(struct mlx5e_sq *sq, struct mbuf *mb) 135 { 136 return (MIN(MLX5E_MAX_TX_INLINE, mb->m_len)); 137 } 138 139 static int 140 mlx5e_get_header_size(struct mbuf *mb) 141 { 142 struct ether_vlan_header *eh; 143 struct tcphdr *th; 144 struct ip *ip; 145 int ip_hlen, tcp_hlen; 146 struct ip6_hdr *ip6; 147 uint16_t eth_type; 148 int eth_hdr_len; 149 150 eh = mtod(mb, struct ether_vlan_header *); 151 if (mb->m_len < ETHER_HDR_LEN) 152 return (0); 153 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { 154 eth_type = ntohs(eh->evl_proto); 155 eth_hdr_len = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 156 } else { 157 eth_type = ntohs(eh->evl_encap_proto); 158 eth_hdr_len = ETHER_HDR_LEN; 159 } 160 if (mb->m_len < eth_hdr_len) 161 return (0); 162 switch (eth_type) { 163 case ETHERTYPE_IP: 164 ip = (struct ip *)(mb->m_data + eth_hdr_len); 165 if (mb->m_len < eth_hdr_len + sizeof(*ip)) 166 return (0); 167 if (ip->ip_p != IPPROTO_TCP) 168 return (0); 169 ip_hlen = ip->ip_hl << 2; 170 eth_hdr_len += ip_hlen; 171 break; 172 case ETHERTYPE_IPV6: 173 ip6 = (struct ip6_hdr *)(mb->m_data + eth_hdr_len); 174 if (mb->m_len < eth_hdr_len + sizeof(*ip6)) 175 return (0); 176 if (ip6->ip6_nxt != IPPROTO_TCP) 177 return (0); 178 eth_hdr_len += sizeof(*ip6); 179 break; 180 default: 181 return (0); 182 } 183 if (mb->m_len < eth_hdr_len + sizeof(*th)) 184 return (0); 185 th = (struct tcphdr *)(mb->m_data + eth_hdr_len); 186 tcp_hlen = th->th_off << 2; 187 eth_hdr_len += tcp_hlen; 188 if (mb->m_len < eth_hdr_len) 189 return (0); 190 return (eth_hdr_len); 191 } 192 193 /* 194 * The return value is not going back to the stack because of 195 * the drbr 196 */ 197 static int 198 mlx5e_sq_xmit(struct mlx5e_sq *sq, struct mbuf **mbp) 199 { 200 bus_dma_segment_t segs[MLX5E_MAX_TX_MBUF_FRAGS]; 201 struct mlx5_wqe_data_seg *dseg; 202 struct mlx5e_tx_wqe *wqe; 203 struct ifnet *ifp; 204 int nsegs; 205 int err; 206 int x; 207 struct mbuf *mb = *mbp; 208 u16 ds_cnt; 209 u16 ihs; 210 u16 pi; 211 u8 opcode; 212 213 /* 214 * Return ENOBUFS if the queue is full, this may trigger reinsertion 215 * of the mbuf into the drbr (see mlx5e_xmit_locked) 216 */ 217 if (unlikely(!mlx5e_sq_has_room_for(sq, 2 * MLX5_SEND_WQE_MAX_WQEBBS))) { 218 return (ENOBUFS); 219 } 220 221 /* Align SQ edge with NOPs to avoid WQE wrap around */ 222 pi = ((~sq->pc) & sq->wq.sz_m1); 223 if (pi < (MLX5_SEND_WQE_MAX_WQEBBS - 1)) { 224 /* Send one multi NOP message instead of many */ 225 mlx5e_send_nop(sq, (pi + 1) * MLX5_SEND_WQEBB_NUM_DS); 226 pi = ((~sq->pc) & sq->wq.sz_m1); 227 if (pi < (MLX5_SEND_WQE_MAX_WQEBBS - 1)) 228 return (ENOMEM); 229 } 230 231 /* Setup local variables */ 232 pi = sq->pc & sq->wq.sz_m1; 233 wqe = mlx5_wq_cyc_get_wqe(&sq->wq, pi); 234 ifp = sq->ifp; 235 236 memset(wqe, 0, sizeof(*wqe)); 237 238 /* Send a copy of the frame to the BPF listener, if any */ 239 if (ifp != NULL && ifp->if_bpf != NULL) 240 ETHER_BPF_MTAP(ifp, mb); 241 242 if (mb->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TSO)) { 243 wqe->eth.cs_flags |= MLX5_ETH_WQE_L3_CSUM; 244 } 245 if (mb->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP | CSUM_UDP_IPV6 | CSUM_TCP_IPV6 | CSUM_TSO)) { 246 wqe->eth.cs_flags |= MLX5_ETH_WQE_L4_CSUM; 247 } 248 if (wqe->eth.cs_flags == 0) { 249 sq->stats.csum_offload_none++; 250 } 251 if (mb->m_pkthdr.csum_flags & CSUM_TSO) { 252 u32 payload_len; 253 u32 mss = mb->m_pkthdr.tso_segsz; 254 u32 num_pkts; 255 256 wqe->eth.mss = cpu_to_be16(mss); 257 opcode = MLX5_OPCODE_LSO; 258 ihs = mlx5e_get_header_size(mb); 259 payload_len = mb->m_pkthdr.len - ihs; 260 if (payload_len == 0) 261 num_pkts = 1; 262 else 263 num_pkts = DIV_ROUND_UP(payload_len, mss); 264 sq->mbuf[pi].num_bytes = payload_len + (num_pkts * ihs); 265 266 sq->stats.tso_packets++; 267 sq->stats.tso_bytes += payload_len; 268 } else { 269 opcode = MLX5_OPCODE_SEND; 270 ihs = mlx5e_get_inline_hdr_size(sq, mb); 271 sq->mbuf[pi].num_bytes = max_t (unsigned int, 272 mb->m_pkthdr.len, ETHER_MIN_LEN - ETHER_CRC_LEN); 273 } 274 if (mb->m_flags & M_VLANTAG) { 275 struct ether_vlan_header *eh = 276 (struct ether_vlan_header *)wqe->eth.inline_hdr_start; 277 278 /* Range checks */ 279 if (ihs > (MLX5E_MAX_TX_INLINE - ETHER_VLAN_ENCAP_LEN)) 280 ihs = (MLX5E_MAX_TX_INLINE - ETHER_VLAN_ENCAP_LEN); 281 else if (ihs < ETHER_HDR_LEN) { 282 err = EINVAL; 283 goto tx_drop; 284 } 285 m_copydata(mb, 0, ETHER_HDR_LEN, (caddr_t)eh); 286 m_adj(mb, ETHER_HDR_LEN); 287 /* Insert 4 bytes VLAN tag into data stream */ 288 eh->evl_proto = eh->evl_encap_proto; 289 eh->evl_encap_proto = htons(ETHERTYPE_VLAN); 290 eh->evl_tag = htons(mb->m_pkthdr.ether_vtag); 291 /* Copy rest of header data, if any */ 292 m_copydata(mb, 0, ihs - ETHER_HDR_LEN, (caddr_t)(eh + 1)); 293 m_adj(mb, ihs - ETHER_HDR_LEN); 294 /* Extend header by 4 bytes */ 295 ihs += ETHER_VLAN_ENCAP_LEN; 296 } else { 297 m_copydata(mb, 0, ihs, wqe->eth.inline_hdr_start); 298 m_adj(mb, ihs); 299 } 300 301 wqe->eth.inline_hdr_sz = cpu_to_be16(ihs); 302 303 ds_cnt = sizeof(*wqe) / MLX5_SEND_WQE_DS; 304 if (likely(ihs > sizeof(wqe->eth.inline_hdr_start))) { 305 ds_cnt += DIV_ROUND_UP(ihs - sizeof(wqe->eth.inline_hdr_start), 306 MLX5_SEND_WQE_DS); 307 } 308 dseg = ((struct mlx5_wqe_data_seg *)&wqe->ctrl) + ds_cnt; 309 310 /* Trim off empty mbufs */ 311 while (mb->m_len == 0) { 312 mb = m_free(mb); 313 /* Check if all data has been inlined */ 314 if (mb == NULL) 315 goto skip_dma; 316 } 317 318 err = bus_dmamap_load_mbuf_sg(sq->dma_tag, sq->mbuf[pi].dma_map, 319 mb, segs, &nsegs, BUS_DMA_NOWAIT); 320 if (err == EFBIG) { 321 /* 322 * Update *mbp before defrag in case it was trimmed in the 323 * loop above 324 */ 325 *mbp = mb; 326 /* Update statistics */ 327 sq->stats.defragged++; 328 /* Too many mbuf fragments */ 329 mb = m_defrag(*mbp, M_NOWAIT); 330 if (mb == NULL) { 331 mb = *mbp; 332 goto tx_drop; 333 } 334 /* Try again */ 335 err = bus_dmamap_load_mbuf_sg(sq->dma_tag, sq->mbuf[pi].dma_map, 336 mb, segs, &nsegs, BUS_DMA_NOWAIT); 337 } 338 /* Catch errors */ 339 if (err != 0) 340 goto tx_drop; 341 342 for (x = 0; x != nsegs; x++) { 343 if (segs[x].ds_len == 0) 344 continue; 345 dseg->addr = cpu_to_be64((uint64_t)segs[x].ds_addr); 346 dseg->lkey = sq->mkey_be; 347 dseg->byte_count = cpu_to_be32((uint32_t)segs[x].ds_len); 348 dseg++; 349 } 350 skip_dma: 351 ds_cnt = (dseg - ((struct mlx5_wqe_data_seg *)&wqe->ctrl)); 352 353 wqe->ctrl.opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | opcode); 354 wqe->ctrl.qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); 355 if (mlx5e_do_send_cqe(sq)) 356 wqe->ctrl.fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE; 357 else 358 wqe->ctrl.fm_ce_se = 0; 359 360 /* Copy data for doorbell */ 361 memcpy(sq->doorbell.d32, &wqe->ctrl, sizeof(sq->doorbell.d32)); 362 363 /* Store pointer to mbuf */ 364 sq->mbuf[pi].mbuf = mb; 365 sq->mbuf[pi].num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS); 366 sq->pc += sq->mbuf[pi].num_wqebbs; 367 368 /* Make sure all mbuf data is written to RAM */ 369 if (mb != NULL) 370 bus_dmamap_sync(sq->dma_tag, sq->mbuf[pi].dma_map, BUS_DMASYNC_PREWRITE); 371 372 sq->stats.packets++; 373 *mbp = NULL; /* safety clear */ 374 return (0); 375 376 tx_drop: 377 sq->stats.dropped++; 378 *mbp = NULL; 379 m_freem(mb); 380 return err; 381 } 382 383 static void 384 mlx5e_poll_tx_cq(struct mlx5e_sq *sq, int budget) 385 { 386 u16 sqcc; 387 388 /* 389 * sq->cc must be updated only after mlx5_cqwq_update_db_record(), 390 * otherwise a cq overrun may occur 391 */ 392 sqcc = sq->cc; 393 394 while (budget > 0) { 395 struct mlx5_cqe64 *cqe; 396 struct mbuf *mb; 397 u16 x; 398 u16 ci; 399 400 cqe = mlx5e_get_cqe(&sq->cq); 401 if (!cqe) 402 break; 403 404 mlx5_cqwq_pop(&sq->cq.wq); 405 406 /* update budget according to the event factor */ 407 budget -= sq->cev_factor; 408 409 for (x = 0; x != sq->cev_factor; x++) { 410 ci = sqcc & sq->wq.sz_m1; 411 mb = sq->mbuf[ci].mbuf; 412 sq->mbuf[ci].mbuf = NULL; /* Safety clear */ 413 414 if (mb == NULL) { 415 if (sq->mbuf[ci].num_bytes == 0) { 416 /* NOP */ 417 sq->stats.nop++; 418 } 419 } else { 420 bus_dmamap_sync(sq->dma_tag, sq->mbuf[ci].dma_map, 421 BUS_DMASYNC_POSTWRITE); 422 bus_dmamap_unload(sq->dma_tag, sq->mbuf[ci].dma_map); 423 424 /* Free transmitted mbuf */ 425 m_freem(mb); 426 } 427 sqcc += sq->mbuf[ci].num_wqebbs; 428 } 429 } 430 431 mlx5_cqwq_update_db_record(&sq->cq.wq); 432 433 /* Ensure cq space is freed before enabling more cqes */ 434 wmb(); 435 436 sq->cc = sqcc; 437 438 if (atomic_cmpset_int(&sq->queue_state, MLX5E_SQ_FULL, MLX5E_SQ_READY)) 439 taskqueue_enqueue(sq->sq_tq, &sq->sq_task); 440 } 441 442 static int 443 mlx5e_xmit_locked(struct ifnet *ifp, struct mlx5e_sq *sq, struct mbuf *mb) 444 { 445 struct mbuf *next; 446 int err = 0; 447 448 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { 449 if (mb) 450 err = drbr_enqueue(ifp, sq->br, mb); 451 return (err); 452 } 453 454 if (mb != NULL) 455 /* 456 * If we can't insert mbuf into drbr, try to xmit anyway. 457 * We keep the error we got so we could return that after xmit. 458 */ 459 err = drbr_enqueue(ifp, sq->br, mb); 460 461 /* Process the queue */ 462 while ((next = drbr_peek(ifp, sq->br)) != NULL) { 463 if (mlx5e_sq_xmit(sq, &next) != 0) { 464 if (next == NULL) { 465 drbr_advance(ifp, sq->br); 466 } else { 467 drbr_putback(ifp, sq->br, next); 468 atomic_store_rel_int(&sq->queue_state, MLX5E_SQ_FULL); 469 } 470 break; 471 } 472 drbr_advance(ifp, sq->br); 473 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 474 break; 475 } 476 /* Check if we need to write the doorbell */ 477 if (likely(sq->doorbell.d64 != 0)) { 478 mlx5e_tx_notify_hw(sq, sq->doorbell.d32, 0); 479 sq->doorbell.d64 = 0; 480 } 481 /* 482 * Check if we need to start the event timer which flushes the 483 * transmit ring on timeout: 484 */ 485 if (unlikely(sq->cev_next_state == MLX5E_CEV_STATE_INITIAL && 486 sq->cev_factor != 1)) { 487 /* start the timer */ 488 mlx5e_sq_cev_timeout(sq); 489 } else { 490 /* don't send NOPs yet */ 491 sq->cev_next_state = MLX5E_CEV_STATE_HOLD_NOPS; 492 } 493 return (err); 494 } 495 496 int 497 mlx5e_xmit(struct ifnet *ifp, struct mbuf *mb) 498 { 499 struct mlx5e_sq *sq; 500 int ret; 501 502 sq = mlx5e_select_queue(ifp, mb); 503 if (unlikely(sq == NULL)) { 504 /* Invalid send queue */ 505 m_freem(mb); 506 return (ENXIO); 507 } 508 if (mtx_trylock(&sq->lock)) { 509 ret = mlx5e_xmit_locked(ifp, sq, mb); 510 mtx_unlock(&sq->lock); 511 } else { 512 ret = drbr_enqueue(ifp, sq->br, mb); 513 taskqueue_enqueue(sq->sq_tq, &sq->sq_task); 514 } 515 516 return (ret); 517 } 518 519 void 520 mlx5e_tx_cq_comp(struct mlx5_core_cq *mcq) 521 { 522 struct mlx5e_sq *sq = container_of(mcq, struct mlx5e_sq, cq.mcq); 523 524 mtx_lock(&sq->comp_lock); 525 mlx5e_poll_tx_cq(sq, MLX5E_BUDGET_MAX); 526 mlx5e_cq_arm(&sq->cq); 527 mtx_unlock(&sq->comp_lock); 528 } 529 530 void 531 mlx5e_tx_que(void *context, int pending) 532 { 533 struct mlx5e_sq *sq = context; 534 struct ifnet *ifp = sq->ifp; 535 536 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 537 mtx_lock(&sq->lock); 538 if (!drbr_empty(ifp, sq->br)) 539 mlx5e_xmit_locked(ifp, sq, NULL); 540 mtx_unlock(&sq->lock); 541 } 542 } 543