1 /* 2 * Copyright (c) 2007, 2014 Mellanox Technologies. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 * 32 */ 33 #include "opt_inet.h" 34 #include <dev/mlx4/cq.h> 35 #include <linux/slab.h> 36 #include <dev/mlx4/qp.h> 37 #include <linux/if_ether.h> 38 #include <linux/if_vlan.h> 39 #include <linux/vmalloc.h> 40 #include <dev/mlx4/driver.h> 41 #ifdef CONFIG_NET_RX_BUSY_POLL 42 #include <net/busy_poll.h> 43 #endif 44 45 #include "en.h" 46 47 #if (MLX4_EN_MAX_RX_SEGS == 1) 48 static void mlx4_en_init_rx_desc(struct mlx4_en_priv *priv, 49 struct mlx4_en_rx_ring *ring, 50 int index) 51 { 52 struct mlx4_en_rx_desc *rx_desc = 53 ((struct mlx4_en_rx_desc *)ring->buf) + index; 54 int i; 55 56 /* Set size and memtype fields */ 57 rx_desc->data[0].byte_count = cpu_to_be32(priv->rx_mb_size - MLX4_NET_IP_ALIGN); 58 rx_desc->data[0].lkey = cpu_to_be32(priv->mdev->mr.key); 59 60 /* 61 * If the number of used fragments does not fill up the ring 62 * stride, remaining (unused) fragments must be padded with 63 * null address/size and a special memory key: 64 */ 65 for (i = 1; i < MLX4_EN_MAX_RX_SEGS; i++) { 66 rx_desc->data[i].byte_count = 0; 67 rx_desc->data[i].lkey = cpu_to_be32(MLX4_EN_MEMTYPE_PAD); 68 rx_desc->data[i].addr = 0; 69 } 70 } 71 #endif 72 73 static inline struct mbuf * 74 mlx4_en_alloc_mbuf(struct mlx4_en_rx_ring *ring) 75 { 76 struct mbuf *mb; 77 78 #if (MLX4_EN_MAX_RX_SEGS == 1) 79 mb = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, ring->rx_mb_size); 80 if (likely(mb != NULL)) 81 mb->m_pkthdr.len = mb->m_len = ring->rx_mb_size; 82 #else 83 mb = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, MLX4_EN_MAX_RX_BYTES); 84 if (likely(mb != NULL)) { 85 struct mbuf *mb_head = mb; 86 int i; 87 88 mb->m_len = MLX4_EN_MAX_RX_BYTES; 89 mb->m_pkthdr.len = MLX4_EN_MAX_RX_BYTES; 90 91 for (i = 1; i != MLX4_EN_MAX_RX_SEGS; i++) { 92 if (mb_head->m_pkthdr.len >= ring->rx_mb_size) 93 break; 94 mb = (mb->m_next = m_getjcl(M_NOWAIT, MT_DATA, 0, MLX4_EN_MAX_RX_BYTES)); 95 if (unlikely(mb == NULL)) { 96 m_freem(mb_head); 97 return (NULL); 98 } 99 mb->m_len = MLX4_EN_MAX_RX_BYTES; 100 mb_head->m_pkthdr.len += MLX4_EN_MAX_RX_BYTES; 101 } 102 /* rewind to first mbuf in chain */ 103 mb = mb_head; 104 } 105 #endif 106 return (mb); 107 } 108 109 static int 110 mlx4_en_alloc_buf(struct mlx4_en_rx_ring *ring, struct mlx4_en_rx_desc *rx_desc, 111 struct mlx4_en_rx_mbuf *mb_list) 112 { 113 bus_dma_segment_t segs[MLX4_EN_MAX_RX_SEGS]; 114 bus_dmamap_t map; 115 struct mbuf *mb; 116 int nsegs; 117 int err; 118 #if (MLX4_EN_MAX_RX_SEGS != 1) 119 int i; 120 #endif 121 122 /* try to allocate a new spare mbuf */ 123 if (unlikely(ring->spare.mbuf == NULL)) { 124 mb = mlx4_en_alloc_mbuf(ring); 125 if (unlikely(mb == NULL)) 126 return (-ENOMEM); 127 128 /* make sure IP header gets aligned */ 129 m_adj(mb, MLX4_NET_IP_ALIGN); 130 131 /* load spare mbuf into BUSDMA */ 132 err = -bus_dmamap_load_mbuf_sg(ring->dma_tag, ring->spare.dma_map, 133 mb, ring->spare.segs, &nsegs, BUS_DMA_NOWAIT); 134 if (unlikely(err != 0)) { 135 m_freem(mb); 136 return (err); 137 } 138 139 /* store spare info */ 140 ring->spare.mbuf = mb; 141 142 #if (MLX4_EN_MAX_RX_SEGS != 1) 143 /* zero remaining segs */ 144 for (i = nsegs; i != MLX4_EN_MAX_RX_SEGS; i++) { 145 ring->spare.segs[i].ds_addr = 0; 146 ring->spare.segs[i].ds_len = 0; 147 } 148 #endif 149 bus_dmamap_sync(ring->dma_tag, ring->spare.dma_map, 150 BUS_DMASYNC_PREREAD); 151 } 152 153 /* synchronize and unload the current mbuf, if any */ 154 if (likely(mb_list->mbuf != NULL)) { 155 bus_dmamap_sync(ring->dma_tag, mb_list->dma_map, 156 BUS_DMASYNC_POSTREAD); 157 bus_dmamap_unload(ring->dma_tag, mb_list->dma_map); 158 } 159 160 mb = mlx4_en_alloc_mbuf(ring); 161 if (unlikely(mb == NULL)) 162 goto use_spare; 163 164 /* make sure IP header gets aligned */ 165 m_adj(mb, MLX4_NET_IP_ALIGN); 166 167 err = -bus_dmamap_load_mbuf_sg(ring->dma_tag, mb_list->dma_map, 168 mb, segs, &nsegs, BUS_DMA_NOWAIT); 169 if (unlikely(err != 0)) { 170 m_freem(mb); 171 goto use_spare; 172 } 173 174 #if (MLX4_EN_MAX_RX_SEGS == 1) 175 rx_desc->data[0].addr = cpu_to_be64(segs[0].ds_addr); 176 #else 177 for (i = 0; i != nsegs; i++) { 178 rx_desc->data[i].byte_count = cpu_to_be32(segs[i].ds_len); 179 rx_desc->data[i].lkey = ring->rx_mr_key_be; 180 rx_desc->data[i].addr = cpu_to_be64(segs[i].ds_addr); 181 } 182 for (; i != MLX4_EN_MAX_RX_SEGS; i++) { 183 rx_desc->data[i].byte_count = 0; 184 rx_desc->data[i].lkey = cpu_to_be32(MLX4_EN_MEMTYPE_PAD); 185 rx_desc->data[i].addr = 0; 186 } 187 #endif 188 mb_list->mbuf = mb; 189 190 bus_dmamap_sync(ring->dma_tag, mb_list->dma_map, BUS_DMASYNC_PREREAD); 191 return (0); 192 193 use_spare: 194 /* swap DMA maps */ 195 map = mb_list->dma_map; 196 mb_list->dma_map = ring->spare.dma_map; 197 ring->spare.dma_map = map; 198 199 /* swap MBUFs */ 200 mb_list->mbuf = ring->spare.mbuf; 201 ring->spare.mbuf = NULL; 202 203 /* store physical address */ 204 #if (MLX4_EN_MAX_RX_SEGS == 1) 205 rx_desc->data[0].addr = cpu_to_be64(ring->spare.segs[0].ds_addr); 206 #else 207 for (i = 0; i != MLX4_EN_MAX_RX_SEGS; i++) { 208 if (ring->spare.segs[i].ds_len != 0) { 209 rx_desc->data[i].byte_count = cpu_to_be32(ring->spare.segs[i].ds_len); 210 rx_desc->data[i].lkey = ring->rx_mr_key_be; 211 rx_desc->data[i].addr = cpu_to_be64(ring->spare.segs[i].ds_addr); 212 } else { 213 rx_desc->data[i].byte_count = 0; 214 rx_desc->data[i].lkey = cpu_to_be32(MLX4_EN_MEMTYPE_PAD); 215 rx_desc->data[i].addr = 0; 216 } 217 } 218 #endif 219 return (0); 220 } 221 222 static void 223 mlx4_en_free_buf(struct mlx4_en_rx_ring *ring, struct mlx4_en_rx_mbuf *mb_list) 224 { 225 bus_dmamap_t map = mb_list->dma_map; 226 bus_dmamap_sync(ring->dma_tag, map, BUS_DMASYNC_POSTREAD); 227 bus_dmamap_unload(ring->dma_tag, map); 228 m_freem(mb_list->mbuf); 229 mb_list->mbuf = NULL; /* safety clearing */ 230 } 231 232 static int 233 mlx4_en_prepare_rx_desc(struct mlx4_en_priv *priv, 234 struct mlx4_en_rx_ring *ring, int index) 235 { 236 struct mlx4_en_rx_desc *rx_desc = 237 ((struct mlx4_en_rx_desc *)ring->buf) + index; 238 struct mlx4_en_rx_mbuf *mb_list = ring->mbuf + index; 239 240 mb_list->mbuf = NULL; 241 242 if (mlx4_en_alloc_buf(ring, rx_desc, mb_list)) { 243 priv->port_stats.rx_alloc_failed++; 244 return (-ENOMEM); 245 } 246 return (0); 247 } 248 249 static inline void 250 mlx4_en_update_rx_prod_db(struct mlx4_en_rx_ring *ring) 251 { 252 *ring->wqres.db.db = cpu_to_be32(ring->prod & 0xffff); 253 } 254 255 static int mlx4_en_fill_rx_buffers(struct mlx4_en_priv *priv) 256 { 257 struct mlx4_en_rx_ring *ring; 258 int ring_ind; 259 int buf_ind; 260 int new_size; 261 int err; 262 263 for (buf_ind = 0; buf_ind < priv->prof->rx_ring_size; buf_ind++) { 264 for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++) { 265 ring = priv->rx_ring[ring_ind]; 266 267 err = mlx4_en_prepare_rx_desc(priv, ring, 268 ring->actual_size); 269 if (err) { 270 if (ring->actual_size == 0) { 271 en_err(priv, "Failed to allocate " 272 "enough rx buffers\n"); 273 return -ENOMEM; 274 } else { 275 new_size = 276 rounddown_pow_of_two(ring->actual_size); 277 en_warn(priv, "Only %d buffers allocated " 278 "reducing ring size to %d\n", 279 ring->actual_size, new_size); 280 goto reduce_rings; 281 } 282 } 283 ring->actual_size++; 284 ring->prod++; 285 } 286 } 287 return 0; 288 289 reduce_rings: 290 for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++) { 291 ring = priv->rx_ring[ring_ind]; 292 while (ring->actual_size > new_size) { 293 ring->actual_size--; 294 ring->prod--; 295 mlx4_en_free_buf(ring, 296 ring->mbuf + ring->actual_size); 297 } 298 } 299 300 return 0; 301 } 302 303 static void mlx4_en_free_rx_buf(struct mlx4_en_priv *priv, 304 struct mlx4_en_rx_ring *ring) 305 { 306 int index; 307 308 en_dbg(DRV, priv, "Freeing Rx buf - cons:%d prod:%d\n", 309 ring->cons, ring->prod); 310 311 /* Unmap and free Rx buffers */ 312 BUG_ON((u32) (ring->prod - ring->cons) > ring->actual_size); 313 while (ring->cons != ring->prod) { 314 index = ring->cons & ring->size_mask; 315 en_dbg(DRV, priv, "Processing descriptor:%d\n", index); 316 mlx4_en_free_buf(ring, ring->mbuf + index); 317 ++ring->cons; 318 } 319 } 320 321 void mlx4_en_set_num_rx_rings(struct mlx4_en_dev *mdev) 322 { 323 int i; 324 int num_of_eqs; 325 int num_rx_rings; 326 struct mlx4_dev *dev = mdev->dev; 327 328 mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_ETH) { 329 num_of_eqs = max_t(int, MIN_RX_RINGS, 330 min_t(int, 331 mlx4_get_eqs_per_port(mdev->dev, i), 332 DEF_RX_RINGS)); 333 334 num_rx_rings = mlx4_low_memory_profile() ? MIN_RX_RINGS : 335 num_of_eqs; 336 mdev->profile.prof[i].rx_ring_num = 337 rounddown_pow_of_two(num_rx_rings); 338 } 339 } 340 341 void mlx4_en_calc_rx_buf(if_t dev) 342 { 343 struct mlx4_en_priv *priv = mlx4_netdev_priv(dev); 344 int eff_mtu = if_getmtu(dev) + ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN + 345 MLX4_NET_IP_ALIGN; 346 347 if (eff_mtu > MJUM16BYTES) { 348 en_err(priv, "MTU(%u) is too big\n", (unsigned)if_getmtu(dev)); 349 eff_mtu = MJUM16BYTES; 350 } else if (eff_mtu > MJUM9BYTES) { 351 eff_mtu = MJUM16BYTES; 352 } else if (eff_mtu > MJUMPAGESIZE) { 353 eff_mtu = MJUM9BYTES; 354 } else if (eff_mtu > MCLBYTES) { 355 eff_mtu = MJUMPAGESIZE; 356 } else { 357 eff_mtu = MCLBYTES; 358 } 359 360 priv->rx_mb_size = eff_mtu; 361 362 en_dbg(DRV, priv, "Effective RX MTU: %d bytes\n", eff_mtu); 363 } 364 365 int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv, 366 struct mlx4_en_rx_ring **pring, 367 u32 size, int node) 368 { 369 struct mlx4_en_dev *mdev = priv->mdev; 370 struct mlx4_en_rx_ring *ring; 371 int err; 372 int tmp; 373 uint32_t x; 374 375 ring = kzalloc(sizeof(struct mlx4_en_rx_ring), GFP_KERNEL); 376 if (!ring) { 377 en_err(priv, "Failed to allocate RX ring structure\n"); 378 return -ENOMEM; 379 } 380 381 /* Create DMA descriptor TAG */ 382 if ((err = -bus_dma_tag_create( 383 bus_get_dma_tag(mdev->pdev->dev.bsddev), 384 1, /* any alignment */ 385 0, /* no boundary */ 386 BUS_SPACE_MAXADDR, /* lowaddr */ 387 BUS_SPACE_MAXADDR, /* highaddr */ 388 NULL, NULL, /* filter, filterarg */ 389 MJUM16BYTES, /* maxsize */ 390 MLX4_EN_MAX_RX_SEGS, /* nsegments */ 391 MJUM16BYTES, /* maxsegsize */ 392 0, /* flags */ 393 NULL, NULL, /* lockfunc, lockfuncarg */ 394 &ring->dma_tag))) { 395 en_err(priv, "Failed to create DMA tag\n"); 396 goto err_ring; 397 } 398 399 ring->prod = 0; 400 ring->cons = 0; 401 ring->size = size; 402 ring->size_mask = size - 1; 403 404 ring->log_stride = ilog2(sizeof(struct mlx4_en_rx_desc)); 405 ring->buf_size = (ring->size * sizeof(struct mlx4_en_rx_desc)) + TXBB_SIZE; 406 407 tmp = size * sizeof(struct mlx4_en_rx_mbuf); 408 409 ring->mbuf = kzalloc(tmp, GFP_KERNEL); 410 if (ring->mbuf == NULL) { 411 err = -ENOMEM; 412 goto err_dma_tag; 413 } 414 415 err = -bus_dmamap_create(ring->dma_tag, 0, &ring->spare.dma_map); 416 if (err != 0) 417 goto err_info; 418 419 for (x = 0; x != size; x++) { 420 err = -bus_dmamap_create(ring->dma_tag, 0, 421 &ring->mbuf[x].dma_map); 422 if (err != 0) { 423 while (x--) 424 bus_dmamap_destroy(ring->dma_tag, 425 ring->mbuf[x].dma_map); 426 goto err_info; 427 } 428 } 429 en_dbg(DRV, priv, "Allocated MBUF ring at addr:%p size:%d\n", 430 ring->mbuf, tmp); 431 432 err = mlx4_alloc_hwq_res(mdev->dev, &ring->wqres, 433 ring->buf_size, 2 * PAGE_SIZE); 434 if (err) 435 goto err_dma_map; 436 437 err = mlx4_en_map_buffer(&ring->wqres.buf); 438 if (err) { 439 en_err(priv, "Failed to map RX buffer\n"); 440 goto err_hwq; 441 } 442 ring->buf = ring->wqres.buf.direct.buf; 443 *pring = ring; 444 return 0; 445 446 err_hwq: 447 mlx4_free_hwq_res(mdev->dev, &ring->wqres, ring->buf_size); 448 err_dma_map: 449 for (x = 0; x != size; x++) { 450 bus_dmamap_destroy(ring->dma_tag, 451 ring->mbuf[x].dma_map); 452 } 453 bus_dmamap_destroy(ring->dma_tag, ring->spare.dma_map); 454 err_info: 455 vfree(ring->mbuf); 456 err_dma_tag: 457 bus_dma_tag_destroy(ring->dma_tag); 458 err_ring: 459 kfree(ring); 460 return (err); 461 } 462 463 int mlx4_en_activate_rx_rings(struct mlx4_en_priv *priv) 464 { 465 struct mlx4_en_rx_ring *ring; 466 #if (MLX4_EN_MAX_RX_SEGS == 1) 467 int i; 468 #endif 469 int ring_ind; 470 int err; 471 472 for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++) { 473 ring = priv->rx_ring[ring_ind]; 474 475 ring->prod = 0; 476 ring->cons = 0; 477 ring->actual_size = 0; 478 ring->cqn = priv->rx_cq[ring_ind]->mcq.cqn; 479 ring->rx_mb_size = priv->rx_mb_size; 480 481 if (sizeof(struct mlx4_en_rx_desc) <= TXBB_SIZE) { 482 /* Stamp first unused send wqe */ 483 __be32 *ptr = (__be32 *)ring->buf; 484 __be32 stamp = cpu_to_be32(1 << STAMP_SHIFT); 485 *ptr = stamp; 486 /* Move pointer to start of rx section */ 487 ring->buf += TXBB_SIZE; 488 } 489 490 ring->log_stride = ilog2(sizeof(struct mlx4_en_rx_desc)); 491 ring->buf_size = ring->size * sizeof(struct mlx4_en_rx_desc); 492 493 memset(ring->buf, 0, ring->buf_size); 494 mlx4_en_update_rx_prod_db(ring); 495 496 #if (MLX4_EN_MAX_RX_SEGS == 1) 497 /* Initialize all descriptors */ 498 for (i = 0; i < ring->size; i++) 499 mlx4_en_init_rx_desc(priv, ring, i); 500 #endif 501 ring->rx_mr_key_be = cpu_to_be32(priv->mdev->mr.key); 502 503 #ifdef INET 504 /* Configure lro mngr */ 505 if (if_getcapenable(priv->dev) & IFCAP_LRO) { 506 if (tcp_lro_init(&ring->lro)) 507 if_setcapenablebit(priv->dev, 0, IFCAP_LRO); 508 else 509 ring->lro.ifp = priv->dev; 510 } 511 #endif 512 } 513 514 515 err = mlx4_en_fill_rx_buffers(priv); 516 if (err) 517 goto err_buffers; 518 519 for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++) { 520 ring = priv->rx_ring[ring_ind]; 521 522 ring->size_mask = ring->actual_size - 1; 523 mlx4_en_update_rx_prod_db(ring); 524 } 525 526 return 0; 527 528 err_buffers: 529 for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++) 530 mlx4_en_free_rx_buf(priv, priv->rx_ring[ring_ind]); 531 532 ring_ind = priv->rx_ring_num - 1; 533 534 while (ring_ind >= 0) { 535 ring = priv->rx_ring[ring_ind]; 536 if (sizeof(struct mlx4_en_rx_desc) <= TXBB_SIZE) 537 ring->buf -= TXBB_SIZE; 538 ring_ind--; 539 } 540 541 return err; 542 } 543 544 545 void mlx4_en_destroy_rx_ring(struct mlx4_en_priv *priv, 546 struct mlx4_en_rx_ring **pring, 547 u32 size) 548 { 549 struct mlx4_en_dev *mdev = priv->mdev; 550 struct mlx4_en_rx_ring *ring = *pring; 551 uint32_t x; 552 553 mlx4_en_unmap_buffer(&ring->wqres.buf); 554 mlx4_free_hwq_res(mdev->dev, &ring->wqres, size * sizeof(struct mlx4_en_rx_desc) + TXBB_SIZE); 555 for (x = 0; x != size; x++) 556 bus_dmamap_destroy(ring->dma_tag, ring->mbuf[x].dma_map); 557 /* free spare mbuf, if any */ 558 if (ring->spare.mbuf != NULL) { 559 bus_dmamap_sync(ring->dma_tag, ring->spare.dma_map, 560 BUS_DMASYNC_POSTREAD); 561 bus_dmamap_unload(ring->dma_tag, ring->spare.dma_map); 562 m_freem(ring->spare.mbuf); 563 } 564 bus_dmamap_destroy(ring->dma_tag, ring->spare.dma_map); 565 vfree(ring->mbuf); 566 bus_dma_tag_destroy(ring->dma_tag); 567 kfree(ring); 568 *pring = NULL; 569 #ifdef CONFIG_RFS_ACCEL 570 mlx4_en_cleanup_filters(priv, ring); 571 #endif 572 } 573 574 void mlx4_en_deactivate_rx_ring(struct mlx4_en_priv *priv, 575 struct mlx4_en_rx_ring *ring) 576 { 577 #ifdef INET 578 tcp_lro_free(&ring->lro); 579 #endif 580 mlx4_en_free_rx_buf(priv, ring); 581 if (sizeof(struct mlx4_en_rx_desc) <= TXBB_SIZE) 582 ring->buf -= TXBB_SIZE; 583 } 584 585 586 static void validate_loopback(struct mlx4_en_priv *priv, struct mbuf *mb) 587 { 588 int i; 589 int offset = ETHER_HDR_LEN; 590 591 for (i = 0; i < MLX4_LOOPBACK_TEST_PAYLOAD; i++, offset++) { 592 if (*(mb->m_data + offset) != (unsigned char) (i & 0xff)) 593 goto out_loopback; 594 } 595 /* Loopback found */ 596 priv->loopback_ok = 1; 597 598 out_loopback: 599 m_freem(mb); 600 } 601 602 603 static inline int invalid_cqe(struct mlx4_en_priv *priv, 604 struct mlx4_cqe *cqe) 605 { 606 /* Drop packet on bad receive or bad checksum */ 607 if (unlikely((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) == 608 MLX4_CQE_OPCODE_ERROR)) { 609 en_err(priv, "CQE completed in error - vendor syndrom:%d syndrom:%d\n", 610 ((struct mlx4_err_cqe *)cqe)->vendor_err_syndrome, 611 ((struct mlx4_err_cqe *)cqe)->syndrome); 612 return 1; 613 } 614 if (unlikely(cqe->badfcs_enc & MLX4_CQE_BAD_FCS)) { 615 en_dbg(RX_ERR, priv, "Accepted frame with bad FCS\n"); 616 return 1; 617 } 618 619 return 0; 620 } 621 622 static struct mbuf * 623 mlx4_en_rx_mb(struct mlx4_en_priv *priv, struct mlx4_en_rx_ring *ring, 624 struct mlx4_en_rx_desc *rx_desc, struct mlx4_en_rx_mbuf *mb_list, 625 int length) 626 { 627 #if (MLX4_EN_MAX_RX_SEGS != 1) 628 struct mbuf *mb_head; 629 #endif 630 struct mbuf *mb; 631 632 /* optimise reception of small packets */ 633 if (length <= (MHLEN - MLX4_NET_IP_ALIGN) && 634 (mb = m_gethdr(M_NOWAIT, MT_DATA)) != NULL) { 635 636 /* set packet length */ 637 mb->m_pkthdr.len = mb->m_len = length; 638 639 /* make sure IP header gets aligned */ 640 mb->m_data += MLX4_NET_IP_ALIGN; 641 642 bus_dmamap_sync(ring->dma_tag, mb_list->dma_map, 643 BUS_DMASYNC_POSTREAD); 644 645 bcopy(mtod(mb_list->mbuf, caddr_t), mtod(mb, caddr_t), length); 646 647 return (mb); 648 } 649 650 /* get mbuf */ 651 mb = mb_list->mbuf; 652 653 /* collect used fragment while atomically replacing it */ 654 if (mlx4_en_alloc_buf(ring, rx_desc, mb_list)) 655 return (NULL); 656 657 /* range check hardware computed value */ 658 if (unlikely(length > mb->m_pkthdr.len)) 659 length = mb->m_pkthdr.len; 660 661 #if (MLX4_EN_MAX_RX_SEGS == 1) 662 /* update total packet length in packet header */ 663 mb->m_len = mb->m_pkthdr.len = length; 664 #else 665 mb->m_pkthdr.len = length; 666 for (mb_head = mb; mb != NULL; mb = mb->m_next) { 667 if (mb->m_len > length) 668 mb->m_len = length; 669 length -= mb->m_len; 670 if (likely(length == 0)) { 671 if (likely(mb->m_next != NULL)) { 672 /* trim off empty mbufs */ 673 m_freem(mb->m_next); 674 mb->m_next = NULL; 675 } 676 break; 677 } 678 } 679 /* rewind to first mbuf in chain */ 680 mb = mb_head; 681 #endif 682 return (mb); 683 } 684 685 static __inline int 686 mlx4_en_rss_hash(__be16 status, int udp_rss) 687 { 688 enum { 689 status_all = cpu_to_be16( 690 MLX4_CQE_STATUS_IPV4 | 691 MLX4_CQE_STATUS_IPV4F | 692 MLX4_CQE_STATUS_IPV6 | 693 MLX4_CQE_STATUS_TCP | 694 MLX4_CQE_STATUS_UDP), 695 status_ipv4_tcp = cpu_to_be16( 696 MLX4_CQE_STATUS_IPV4 | 697 MLX4_CQE_STATUS_TCP), 698 status_ipv6_tcp = cpu_to_be16( 699 MLX4_CQE_STATUS_IPV6 | 700 MLX4_CQE_STATUS_TCP), 701 status_ipv4_udp = cpu_to_be16( 702 MLX4_CQE_STATUS_IPV4 | 703 MLX4_CQE_STATUS_UDP), 704 status_ipv6_udp = cpu_to_be16( 705 MLX4_CQE_STATUS_IPV6 | 706 MLX4_CQE_STATUS_UDP), 707 status_ipv4 = cpu_to_be16(MLX4_CQE_STATUS_IPV4), 708 status_ipv6 = cpu_to_be16(MLX4_CQE_STATUS_IPV6) 709 }; 710 711 status &= status_all; 712 switch (status) { 713 case status_ipv4_tcp: 714 return (M_HASHTYPE_RSS_TCP_IPV4); 715 case status_ipv6_tcp: 716 return (M_HASHTYPE_RSS_TCP_IPV6); 717 case status_ipv4_udp: 718 return (udp_rss ? M_HASHTYPE_RSS_UDP_IPV4 719 : M_HASHTYPE_RSS_IPV4); 720 case status_ipv6_udp: 721 return (udp_rss ? M_HASHTYPE_RSS_UDP_IPV6 722 : M_HASHTYPE_RSS_IPV6); 723 default: 724 if (status & status_ipv4) 725 return (M_HASHTYPE_RSS_IPV4); 726 if (status & status_ipv6) 727 return (M_HASHTYPE_RSS_IPV6); 728 return (M_HASHTYPE_OPAQUE_HASH); 729 } 730 } 731 732 /* For cpu arch with cache line of 64B the performance is better when cqe size==64B 733 * To enlarge cqe size from 32B to 64B --> 32B of garbage (i.e. 0xccccccc) 734 * was added in the beginning of each cqe (the real data is in the corresponding 32B). 735 * The following calc ensures that when factor==1, it means we are aligned to 64B 736 * and we get the real cqe data*/ 737 #define CQE_FACTOR_INDEX(index, factor) (((index) << (factor)) + (factor)) 738 int mlx4_en_process_rx_cq(if_t dev, struct mlx4_en_cq *cq, int budget) 739 { 740 struct mlx4_en_priv *priv = mlx4_netdev_priv(dev); 741 struct mlx4_cqe *cqe; 742 struct mlx4_en_rx_ring *ring = priv->rx_ring[cq->ring]; 743 struct mlx4_en_rx_mbuf *mb_list; 744 struct mlx4_en_rx_desc *rx_desc; 745 struct mbuf *mb; 746 struct mlx4_cq *mcq = &cq->mcq; 747 struct mlx4_cqe *buf = cq->buf; 748 int index; 749 unsigned int length; 750 int polled = 0; 751 u32 cons_index = mcq->cons_index; 752 u32 size_mask = ring->size_mask; 753 int size = cq->size; 754 int factor = priv->cqe_factor; 755 const int udp_rss = priv->mdev->profile.udp_rss; 756 757 if (!priv->port_up) 758 return 0; 759 760 /* We assume a 1:1 mapping between CQEs and Rx descriptors, so Rx 761 * descriptor offset can be deducted from the CQE index instead of 762 * reading 'cqe->index' */ 763 index = cons_index & size_mask; 764 cqe = &buf[CQE_FACTOR_INDEX(index, factor)]; 765 766 /* Process all completed CQEs */ 767 while (XNOR(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK, 768 cons_index & size)) { 769 mb_list = ring->mbuf + index; 770 rx_desc = ((struct mlx4_en_rx_desc *)ring->buf) + index; 771 772 /* 773 * make sure we read the CQE after we read the ownership bit 774 */ 775 rmb(); 776 777 if (invalid_cqe(priv, cqe)) { 778 goto next; 779 } 780 /* 781 * Packet is OK - process it. 782 */ 783 length = be32_to_cpu(cqe->byte_cnt); 784 length -= ring->fcs_del; 785 786 mb = mlx4_en_rx_mb(priv, ring, rx_desc, mb_list, length); 787 if (unlikely(!mb)) { 788 ring->errors++; 789 goto next; 790 } 791 792 ring->bytes += length; 793 ring->packets++; 794 795 if (unlikely(priv->validate_loopback)) { 796 validate_loopback(priv, mb); 797 goto next; 798 } 799 800 /* forward Toeplitz compatible hash value */ 801 mb->m_pkthdr.flowid = be32_to_cpu(cqe->immed_rss_invalid); 802 M_HASHTYPE_SET(mb, mlx4_en_rss_hash(cqe->status, udp_rss)); 803 mb->m_pkthdr.rcvif = dev; 804 if (be32_to_cpu(cqe->vlan_my_qpn) & 805 MLX4_CQE_CVLAN_PRESENT_MASK) { 806 mb->m_pkthdr.ether_vtag = be16_to_cpu(cqe->sl_vid); 807 mb->m_flags |= M_VLANTAG; 808 } 809 if (likely(if_getcapenable(dev) & 810 (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) && 811 (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPOK)) && 812 (cqe->checksum == cpu_to_be16(0xffff))) { 813 priv->port_stats.rx_chksum_good++; 814 mb->m_pkthdr.csum_flags = 815 CSUM_IP_CHECKED | CSUM_IP_VALID | 816 CSUM_DATA_VALID | CSUM_PSEUDO_HDR; 817 mb->m_pkthdr.csum_data = htons(0xffff); 818 /* This packet is eligible for LRO if it is: 819 * - DIX Ethernet (type interpretation) 820 * - TCP/IP (v4) 821 * - without IP options 822 * - not an IP fragment 823 */ 824 #ifdef INET 825 if (mlx4_en_can_lro(cqe->status) && 826 (if_getcapenable(dev) & IFCAP_LRO)) { 827 if (ring->lro.lro_cnt != 0 && 828 tcp_lro_rx(&ring->lro, mb, 0) == 0) 829 goto next; 830 } 831 832 #endif 833 /* LRO not possible, complete processing here */ 834 INC_PERF_COUNTER(priv->pstats.lro_misses); 835 } else { 836 mb->m_pkthdr.csum_flags = 0; 837 priv->port_stats.rx_chksum_none++; 838 } 839 840 /* Push it up the stack */ 841 if_input(dev, mb); 842 843 next: 844 ++cons_index; 845 index = cons_index & size_mask; 846 cqe = &buf[CQE_FACTOR_INDEX(index, factor)]; 847 if (++polled == budget) 848 goto out; 849 } 850 /* Flush all pending IP reassembly sessions */ 851 out: 852 #ifdef INET 853 tcp_lro_flush_all(&ring->lro); 854 #endif 855 AVG_PERF_COUNTER(priv->pstats.rx_coal_avg, polled); 856 mcq->cons_index = cons_index; 857 mlx4_cq_set_ci(mcq); 858 wmb(); /* ensure HW sees CQ consumer before we post new buffers */ 859 ring->cons = mcq->cons_index; 860 ring->prod += polled; /* Polled descriptors were reallocated in place */ 861 mlx4_en_update_rx_prod_db(ring); 862 return polled; 863 864 } 865 866 /* Rx CQ polling - called by NAPI */ 867 static int mlx4_en_poll_rx_cq(struct mlx4_en_cq *cq, int budget) 868 { 869 if_t dev = cq->dev; 870 struct epoch_tracker et; 871 int done; 872 873 NET_EPOCH_ENTER(et); 874 done = mlx4_en_process_rx_cq(dev, cq, budget); 875 NET_EPOCH_EXIT(et); 876 cq->tot_rx += done; 877 878 return done; 879 } 880 void mlx4_en_rx_irq(struct mlx4_cq *mcq) 881 { 882 struct mlx4_en_cq *cq = container_of(mcq, struct mlx4_en_cq, mcq); 883 struct mlx4_en_priv *priv = mlx4_netdev_priv(cq->dev); 884 int done; 885 886 // Shoot one within the irq context 887 // Because there is no NAPI in freeBSD 888 done = mlx4_en_poll_rx_cq(cq, MLX4_EN_RX_BUDGET); 889 if (priv->port_up && (done == MLX4_EN_RX_BUDGET) ) { 890 cq->curr_poll_rx_cpu_id = curcpu; 891 taskqueue_enqueue(cq->tq, &cq->cq_task); 892 } 893 else { 894 mlx4_en_arm_cq(priv, cq); 895 } 896 } 897 898 void mlx4_en_rx_que(void *context, int pending) 899 { 900 struct epoch_tracker et; 901 struct mlx4_en_cq *cq; 902 struct thread *td; 903 904 cq = context; 905 td = curthread; 906 907 thread_lock(td); 908 sched_bind(td, cq->curr_poll_rx_cpu_id); 909 thread_unlock(td); 910 911 NET_EPOCH_ENTER(et); 912 while (mlx4_en_poll_rx_cq(cq, MLX4_EN_RX_BUDGET) 913 == MLX4_EN_RX_BUDGET); 914 NET_EPOCH_EXIT(et); 915 mlx4_en_arm_cq(if_getsoftc(cq->dev), cq); 916 } 917 918 919 /* RSS related functions */ 920 921 static int mlx4_en_config_rss_qp(struct mlx4_en_priv *priv, int qpn, 922 struct mlx4_en_rx_ring *ring, 923 enum mlx4_qp_state *state, 924 struct mlx4_qp *qp) 925 { 926 struct mlx4_en_dev *mdev = priv->mdev; 927 struct mlx4_qp_context *context; 928 int err = 0; 929 930 context = kmalloc(sizeof *context , GFP_KERNEL); 931 if (!context) { 932 en_err(priv, "Failed to allocate qp context\n"); 933 return -ENOMEM; 934 } 935 936 err = mlx4_qp_alloc(mdev->dev, qpn, qp, GFP_KERNEL); 937 if (err) { 938 en_err(priv, "Failed to allocate qp #%x\n", qpn); 939 goto out; 940 } 941 qp->event = mlx4_en_sqp_event; 942 943 memset(context, 0, sizeof *context); 944 mlx4_en_fill_qp_context(priv, ring->actual_size, sizeof(struct mlx4_en_rx_desc), 0, 0, 945 qpn, ring->cqn, -1, context); 946 context->db_rec_addr = cpu_to_be64(ring->wqres.db.dma); 947 948 /* Cancel FCS removal if FW allows */ 949 if (mdev->dev->caps.flags & MLX4_DEV_CAP_FLAG_FCS_KEEP) { 950 context->param3 |= cpu_to_be32(1 << 29); 951 ring->fcs_del = ETH_FCS_LEN; 952 } else 953 ring->fcs_del = 0; 954 955 err = mlx4_qp_to_ready(mdev->dev, &ring->wqres.mtt, context, qp, state); 956 if (err) { 957 mlx4_qp_remove(mdev->dev, qp); 958 mlx4_qp_free(mdev->dev, qp); 959 } 960 mlx4_en_update_rx_prod_db(ring); 961 out: 962 kfree(context); 963 return err; 964 } 965 966 int mlx4_en_create_drop_qp(struct mlx4_en_priv *priv) 967 { 968 int err; 969 u32 qpn; 970 971 err = mlx4_qp_reserve_range(priv->mdev->dev, 1, 1, &qpn, 0); 972 if (err) { 973 en_err(priv, "Failed reserving drop qpn\n"); 974 return err; 975 } 976 err = mlx4_qp_alloc(priv->mdev->dev, qpn, &priv->drop_qp, GFP_KERNEL); 977 if (err) { 978 en_err(priv, "Failed allocating drop qp\n"); 979 mlx4_qp_release_range(priv->mdev->dev, qpn, 1); 980 return err; 981 } 982 983 return 0; 984 } 985 986 void mlx4_en_destroy_drop_qp(struct mlx4_en_priv *priv) 987 { 988 u32 qpn; 989 990 qpn = priv->drop_qp.qpn; 991 mlx4_qp_remove(priv->mdev->dev, &priv->drop_qp); 992 mlx4_qp_free(priv->mdev->dev, &priv->drop_qp); 993 mlx4_qp_release_range(priv->mdev->dev, qpn, 1); 994 } 995 996 const u32 * 997 mlx4_en_get_rss_key(struct mlx4_en_priv *priv __unused, 998 u16 *keylen) 999 { 1000 static const u32 rsskey[10] = { 1001 cpu_to_be32(0xD181C62C), 1002 cpu_to_be32(0xF7F4DB5B), 1003 cpu_to_be32(0x1983A2FC), 1004 cpu_to_be32(0x943E1ADB), 1005 cpu_to_be32(0xD9389E6B), 1006 cpu_to_be32(0xD1039C2C), 1007 cpu_to_be32(0xA74499AD), 1008 cpu_to_be32(0x593D56D9), 1009 cpu_to_be32(0xF3253C06), 1010 cpu_to_be32(0x2ADC1FFC) 1011 }; 1012 1013 if (keylen != NULL) 1014 *keylen = sizeof(rsskey); 1015 return (rsskey); 1016 } 1017 1018 u8 mlx4_en_get_rss_mask(struct mlx4_en_priv *priv) 1019 { 1020 u8 rss_mask = (MLX4_RSS_IPV4 | MLX4_RSS_TCP_IPV4 | MLX4_RSS_IPV6 | 1021 MLX4_RSS_TCP_IPV6); 1022 1023 if (priv->mdev->profile.udp_rss) 1024 rss_mask |= MLX4_RSS_UDP_IPV4 | MLX4_RSS_UDP_IPV6; 1025 return (rss_mask); 1026 } 1027 1028 /* Allocate rx qp's and configure them according to rss map */ 1029 int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv) 1030 { 1031 struct mlx4_en_dev *mdev = priv->mdev; 1032 struct mlx4_en_rss_map *rss_map = &priv->rss_map; 1033 struct mlx4_qp_context context; 1034 struct mlx4_rss_context *rss_context; 1035 const u32 *key; 1036 int rss_rings; 1037 void *ptr; 1038 int i; 1039 int err = 0; 1040 int good_qps = 0; 1041 1042 en_dbg(DRV, priv, "Configuring rss steering\n"); 1043 err = mlx4_qp_reserve_range(mdev->dev, priv->rx_ring_num, 1044 priv->rx_ring_num, 1045 &rss_map->base_qpn, 0); 1046 if (err) { 1047 en_err(priv, "Failed reserving %d qps\n", priv->rx_ring_num); 1048 return err; 1049 } 1050 1051 for (i = 0; i < priv->rx_ring_num; i++) { 1052 priv->rx_ring[i]->qpn = rss_map->base_qpn + i; 1053 err = mlx4_en_config_rss_qp(priv, priv->rx_ring[i]->qpn, 1054 priv->rx_ring[i], 1055 &rss_map->state[i], 1056 &rss_map->qps[i]); 1057 if (err) 1058 goto rss_err; 1059 1060 ++good_qps; 1061 } 1062 1063 /* Configure RSS indirection qp */ 1064 err = mlx4_qp_alloc(mdev->dev, priv->base_qpn, &rss_map->indir_qp, GFP_KERNEL); 1065 if (err) { 1066 en_err(priv, "Failed to allocate RSS indirection QP\n"); 1067 goto rss_err; 1068 } 1069 rss_map->indir_qp.event = mlx4_en_sqp_event; 1070 mlx4_en_fill_qp_context(priv, 0, 0, 0, 1, priv->base_qpn, 1071 priv->rx_ring[0]->cqn, -1, &context); 1072 1073 if (!priv->prof->rss_rings || priv->prof->rss_rings > priv->rx_ring_num) 1074 rss_rings = priv->rx_ring_num; 1075 else 1076 rss_rings = priv->prof->rss_rings; 1077 1078 ptr = ((u8 *)&context) + offsetof(struct mlx4_qp_context, pri_path) + 1079 MLX4_RSS_OFFSET_IN_QPC_PRI_PATH; 1080 rss_context = ptr; 1081 rss_context->base_qpn = cpu_to_be32(ilog2(rss_rings) << 24 | 1082 (rss_map->base_qpn)); 1083 rss_context->default_qpn = cpu_to_be32(rss_map->base_qpn); 1084 if (priv->mdev->profile.udp_rss) 1085 rss_context->base_qpn_udp = rss_context->default_qpn; 1086 rss_context->flags = mlx4_en_get_rss_mask(priv); 1087 rss_context->hash_fn = MLX4_RSS_HASH_TOP; 1088 key = mlx4_en_get_rss_key(priv, NULL); 1089 for (i = 0; i < 10; i++) 1090 rss_context->rss_key[i] = key[i]; 1091 1092 err = mlx4_qp_to_ready(mdev->dev, &priv->res.mtt, &context, 1093 &rss_map->indir_qp, &rss_map->indir_state); 1094 if (err) 1095 goto indir_err; 1096 1097 return 0; 1098 1099 indir_err: 1100 mlx4_qp_modify(mdev->dev, NULL, rss_map->indir_state, 1101 MLX4_QP_STATE_RST, NULL, 0, 0, &rss_map->indir_qp); 1102 mlx4_qp_remove(mdev->dev, &rss_map->indir_qp); 1103 mlx4_qp_free(mdev->dev, &rss_map->indir_qp); 1104 rss_err: 1105 for (i = 0; i < good_qps; i++) { 1106 mlx4_qp_modify(mdev->dev, NULL, rss_map->state[i], 1107 MLX4_QP_STATE_RST, NULL, 0, 0, &rss_map->qps[i]); 1108 mlx4_qp_remove(mdev->dev, &rss_map->qps[i]); 1109 mlx4_qp_free(mdev->dev, &rss_map->qps[i]); 1110 } 1111 mlx4_qp_release_range(mdev->dev, rss_map->base_qpn, priv->rx_ring_num); 1112 return err; 1113 } 1114 1115 void mlx4_en_release_rss_steer(struct mlx4_en_priv *priv) 1116 { 1117 struct mlx4_en_dev *mdev = priv->mdev; 1118 struct mlx4_en_rss_map *rss_map = &priv->rss_map; 1119 int i; 1120 1121 mlx4_qp_modify(mdev->dev, NULL, rss_map->indir_state, 1122 MLX4_QP_STATE_RST, NULL, 0, 0, &rss_map->indir_qp); 1123 mlx4_qp_remove(mdev->dev, &rss_map->indir_qp); 1124 mlx4_qp_free(mdev->dev, &rss_map->indir_qp); 1125 1126 for (i = 0; i < priv->rx_ring_num; i++) { 1127 mlx4_qp_modify(mdev->dev, NULL, rss_map->state[i], 1128 MLX4_QP_STATE_RST, NULL, 0, 0, &rss_map->qps[i]); 1129 mlx4_qp_remove(mdev->dev, &rss_map->qps[i]); 1130 mlx4_qp_free(mdev->dev, &rss_map->qps[i]); 1131 } 1132 mlx4_qp_release_range(mdev->dev, rss_map->base_qpn, priv->rx_ring_num); 1133 } 1134 1135