1 /*- 2 * Copyright (c) 2014 Chelsio Communications, Inc. 3 * All rights reserved. 4 * Written by: Navdeep Parhar <np@FreeBSD.org> 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __FBSDID("$FreeBSD$"); 30 31 #include "opt_inet.h" 32 #include "opt_inet6.h" 33 34 #ifdef DEV_NETMAP 35 #include <sys/param.h> 36 #include <sys/bus.h> 37 #include <sys/eventhandler.h> 38 #include <sys/lock.h> 39 #include <sys/mbuf.h> 40 #include <sys/module.h> 41 #include <sys/selinfo.h> 42 #include <sys/socket.h> 43 #include <sys/sockio.h> 44 #include <machine/bus.h> 45 #include <net/ethernet.h> 46 #include <net/if.h> 47 #include <net/if_media.h> 48 #include <net/if_var.h> 49 #include <net/if_clone.h> 50 #include <net/if_types.h> 51 #include <net/netmap.h> 52 #include <dev/netmap/netmap_kern.h> 53 54 #include "common/common.h" 55 #include "common/t4_regs.h" 56 #include "common/t4_regs_values.h" 57 58 extern int fl_pad; /* XXXNM */ 59 60 /* 61 * 0 = normal netmap rx 62 * 1 = black hole 63 * 2 = supermassive black hole (buffer packing enabled) 64 */ 65 int black_hole = 0; 66 SYSCTL_INT(_hw_cxgbe, OID_AUTO, nm_black_hole, CTLFLAG_RWTUN, &black_hole, 0, 67 "Sink incoming packets."); 68 69 int rx_ndesc = 256; 70 SYSCTL_INT(_hw_cxgbe, OID_AUTO, nm_rx_ndesc, CTLFLAG_RWTUN, 71 &rx_ndesc, 0, "# of rx descriptors after which the hw cidx is updated."); 72 73 int rx_nframes = 64; 74 SYSCTL_INT(_hw_cxgbe, OID_AUTO, nm_rx_nframes, CTLFLAG_RWTUN, 75 &rx_nframes, 0, "max # of frames received before waking up netmap rx."); 76 77 int holdoff_tmr_idx = 2; 78 SYSCTL_INT(_hw_cxgbe, OID_AUTO, nm_holdoff_tmr_idx, CTLFLAG_RWTUN, 79 &holdoff_tmr_idx, 0, "Holdoff timer index for netmap rx queues."); 80 81 /* 82 * Congestion drops. 83 * -1: no congestion feedback (not recommended). 84 * 0: backpressure the channel instead of dropping packets right away. 85 * 1: no backpressure, drop packets for the congested queue immediately. 86 */ 87 static int nm_cong_drop = 1; 88 SYSCTL_INT(_hw_cxgbe, OID_AUTO, nm_cong_drop, CTLFLAG_RWTUN, 89 &nm_cong_drop, 0, 90 "Congestion control for netmap rx queues (0 = backpressure, 1 = drop"); 91 92 int starve_fl = 0; 93 SYSCTL_INT(_hw_cxgbe, OID_AUTO, starve_fl, CTLFLAG_RWTUN, 94 &starve_fl, 0, "Don't ring fl db for netmap rx queues."); 95 96 /* 97 * Try to process tx credits in bulk. This may cause a delay in the return of 98 * tx credits and is suitable for bursty or non-stop tx only. 99 */ 100 int lazy_tx_credit_flush = 1; 101 SYSCTL_INT(_hw_cxgbe, OID_AUTO, lazy_tx_credit_flush, CTLFLAG_RWTUN, 102 &lazy_tx_credit_flush, 0, "lazy credit flush for netmap tx queues."); 103 104 /* 105 * Split the netmap rx queues into two groups that populate separate halves of 106 * the RSS indirection table. This allows filters with hashmask to steer to a 107 * particular group of queues. 108 */ 109 static int nm_split_rss = 0; 110 SYSCTL_INT(_hw_cxgbe, OID_AUTO, nm_split_rss, CTLFLAG_RWTUN, 111 &nm_split_rss, 0, "Split the netmap rx queues into two groups."); 112 113 /* 114 * netmap(4) says "netmap does not use features such as checksum offloading, TCP 115 * segmentation offloading, encryption, VLAN encapsulation/decapsulation, etc." 116 * but this knob can be used to get the hardware to checksum all tx traffic 117 * anyway. 118 */ 119 static int nm_txcsum = 0; 120 SYSCTL_INT(_hw_cxgbe, OID_AUTO, nm_txcsum, CTLFLAG_RWTUN, 121 &nm_txcsum, 0, "Enable transmit checksum offloading."); 122 123 static int 124 alloc_nm_rxq_hwq(struct vi_info *vi, struct sge_nm_rxq *nm_rxq, int cong) 125 { 126 int rc, cntxt_id, i; 127 __be32 v; 128 struct adapter *sc = vi->adapter; 129 struct sge_params *sp = &sc->params.sge; 130 struct netmap_adapter *na = NA(vi->ifp); 131 struct fw_iq_cmd c; 132 133 MPASS(na != NULL); 134 MPASS(nm_rxq->iq_desc != NULL); 135 MPASS(nm_rxq->fl_desc != NULL); 136 137 bzero(nm_rxq->iq_desc, vi->qsize_rxq * IQ_ESIZE); 138 bzero(nm_rxq->fl_desc, na->num_rx_desc * EQ_ESIZE + sp->spg_len); 139 140 bzero(&c, sizeof(c)); 141 c.op_to_vfn = htobe32(V_FW_CMD_OP(FW_IQ_CMD) | F_FW_CMD_REQUEST | 142 F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_IQ_CMD_PFN(sc->pf) | 143 V_FW_IQ_CMD_VFN(0)); 144 c.alloc_to_len16 = htobe32(F_FW_IQ_CMD_ALLOC | F_FW_IQ_CMD_IQSTART | 145 FW_LEN16(c)); 146 MPASS(!forwarding_intr_to_fwq(sc)); 147 KASSERT(nm_rxq->intr_idx < sc->intr_count, 148 ("%s: invalid direct intr_idx %d", __func__, nm_rxq->intr_idx)); 149 v = V_FW_IQ_CMD_IQANDSTINDEX(nm_rxq->intr_idx); 150 c.type_to_iqandstindex = htobe32(v | 151 V_FW_IQ_CMD_TYPE(FW_IQ_TYPE_FL_INT_CAP) | 152 V_FW_IQ_CMD_VIID(vi->viid) | 153 V_FW_IQ_CMD_IQANUD(X_UPDATEDELIVERY_INTERRUPT)); 154 c.iqdroprss_to_iqesize = htobe16(V_FW_IQ_CMD_IQPCIECH(vi->pi->tx_chan) | 155 F_FW_IQ_CMD_IQGTSMODE | 156 V_FW_IQ_CMD_IQINTCNTTHRESH(0) | 157 V_FW_IQ_CMD_IQESIZE(ilog2(IQ_ESIZE) - 4)); 158 c.iqsize = htobe16(vi->qsize_rxq); 159 c.iqaddr = htobe64(nm_rxq->iq_ba); 160 if (cong >= 0) { 161 c.iqns_to_fl0congen = htobe32(F_FW_IQ_CMD_IQFLINTCONGEN | 162 V_FW_IQ_CMD_FL0CNGCHMAP(cong) | F_FW_IQ_CMD_FL0CONGCIF | 163 F_FW_IQ_CMD_FL0CONGEN); 164 } 165 c.iqns_to_fl0congen |= 166 htobe32(V_FW_IQ_CMD_FL0HOSTFCMODE(X_HOSTFCMODE_NONE) | 167 F_FW_IQ_CMD_FL0FETCHRO | F_FW_IQ_CMD_FL0DATARO | 168 (fl_pad ? F_FW_IQ_CMD_FL0PADEN : 0) | 169 (black_hole == 2 ? F_FW_IQ_CMD_FL0PACKEN : 0)); 170 c.fl0dcaen_to_fl0cidxfthresh = 171 htobe16(V_FW_IQ_CMD_FL0FBMIN(chip_id(sc) <= CHELSIO_T5 ? 172 X_FETCHBURSTMIN_128B : X_FETCHBURSTMIN_64B_T6) | 173 V_FW_IQ_CMD_FL0FBMAX(chip_id(sc) <= CHELSIO_T5 ? 174 X_FETCHBURSTMAX_512B : X_FETCHBURSTMAX_256B)); 175 c.fl0size = htobe16(na->num_rx_desc / 8 + sp->spg_len / EQ_ESIZE); 176 c.fl0addr = htobe64(nm_rxq->fl_ba); 177 178 rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c); 179 if (rc != 0) { 180 device_printf(sc->dev, 181 "failed to create netmap ingress queue: %d\n", rc); 182 return (rc); 183 } 184 185 nm_rxq->iq_cidx = 0; 186 MPASS(nm_rxq->iq_sidx == vi->qsize_rxq - sp->spg_len / IQ_ESIZE); 187 nm_rxq->iq_gen = F_RSPD_GEN; 188 nm_rxq->iq_cntxt_id = be16toh(c.iqid); 189 nm_rxq->iq_abs_id = be16toh(c.physiqid); 190 cntxt_id = nm_rxq->iq_cntxt_id - sc->sge.iq_start; 191 if (cntxt_id >= sc->sge.iqmap_sz) { 192 panic ("%s: nm_rxq->iq_cntxt_id (%d) more than the max (%d)", 193 __func__, cntxt_id, sc->sge.iqmap_sz - 1); 194 } 195 sc->sge.iqmap[cntxt_id] = (void *)nm_rxq; 196 197 nm_rxq->fl_cntxt_id = be16toh(c.fl0id); 198 nm_rxq->fl_pidx = nm_rxq->fl_cidx = 0; 199 nm_rxq->fl_db_saved = 0; 200 /* matches the X_FETCHBURSTMAX_512B or X_FETCHBURSTMAX_256B above. */ 201 nm_rxq->fl_db_threshold = chip_id(sc) <= CHELSIO_T5 ? 8 : 4; 202 MPASS(nm_rxq->fl_sidx == na->num_rx_desc); 203 cntxt_id = nm_rxq->fl_cntxt_id - sc->sge.eq_start; 204 if (cntxt_id >= sc->sge.eqmap_sz) { 205 panic("%s: nm_rxq->fl_cntxt_id (%d) more than the max (%d)", 206 __func__, cntxt_id, sc->sge.eqmap_sz - 1); 207 } 208 sc->sge.eqmap[cntxt_id] = (void *)nm_rxq; 209 210 nm_rxq->fl_db_val = V_QID(nm_rxq->fl_cntxt_id) | 211 sc->chip_params->sge_fl_db; 212 213 if (chip_id(sc) >= CHELSIO_T5 && cong >= 0) { 214 uint32_t param, val; 215 216 param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DMAQ) | 217 V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DMAQ_CONM_CTXT) | 218 V_FW_PARAMS_PARAM_YZ(nm_rxq->iq_cntxt_id); 219 if (cong == 0) 220 val = 1 << 19; 221 else { 222 val = 2 << 19; 223 for (i = 0; i < 4; i++) { 224 if (cong & (1 << i)) 225 val |= 1 << (i << 2); 226 } 227 } 228 229 rc = -t4_set_params(sc, sc->mbox, sc->pf, 0, 1, ¶m, &val); 230 if (rc != 0) { 231 /* report error but carry on */ 232 device_printf(sc->dev, 233 "failed to set congestion manager context for " 234 "ingress queue %d: %d\n", nm_rxq->iq_cntxt_id, rc); 235 } 236 } 237 238 t4_write_reg(sc, sc->sge_gts_reg, 239 V_INGRESSQID(nm_rxq->iq_cntxt_id) | 240 V_SEINTARM(V_QINTR_TIMER_IDX(holdoff_tmr_idx))); 241 242 return (rc); 243 } 244 245 static int 246 free_nm_rxq_hwq(struct vi_info *vi, struct sge_nm_rxq *nm_rxq) 247 { 248 struct adapter *sc = vi->adapter; 249 int rc; 250 251 rc = -t4_iq_free(sc, sc->mbox, sc->pf, 0, FW_IQ_TYPE_FL_INT_CAP, 252 nm_rxq->iq_cntxt_id, nm_rxq->fl_cntxt_id, 0xffff); 253 if (rc != 0) 254 device_printf(sc->dev, "%s: failed for iq %d, fl %d: %d\n", 255 __func__, nm_rxq->iq_cntxt_id, nm_rxq->fl_cntxt_id, rc); 256 nm_rxq->iq_cntxt_id = INVALID_NM_RXQ_CNTXT_ID; 257 return (rc); 258 } 259 260 static int 261 alloc_nm_txq_hwq(struct vi_info *vi, struct sge_nm_txq *nm_txq) 262 { 263 int rc, cntxt_id; 264 size_t len; 265 struct adapter *sc = vi->adapter; 266 struct netmap_adapter *na = NA(vi->ifp); 267 struct fw_eq_eth_cmd c; 268 269 MPASS(na != NULL); 270 MPASS(nm_txq->desc != NULL); 271 272 len = na->num_tx_desc * EQ_ESIZE + sc->params.sge.spg_len; 273 bzero(nm_txq->desc, len); 274 275 bzero(&c, sizeof(c)); 276 c.op_to_vfn = htobe32(V_FW_CMD_OP(FW_EQ_ETH_CMD) | F_FW_CMD_REQUEST | 277 F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_EQ_ETH_CMD_PFN(sc->pf) | 278 V_FW_EQ_ETH_CMD_VFN(0)); 279 c.alloc_to_len16 = htobe32(F_FW_EQ_ETH_CMD_ALLOC | 280 F_FW_EQ_ETH_CMD_EQSTART | FW_LEN16(c)); 281 c.autoequiqe_to_viid = htobe32(F_FW_EQ_ETH_CMD_AUTOEQUIQE | 282 F_FW_EQ_ETH_CMD_AUTOEQUEQE | V_FW_EQ_ETH_CMD_VIID(vi->viid)); 283 c.fetchszm_to_iqid = 284 htobe32(V_FW_EQ_ETH_CMD_HOSTFCMODE(X_HOSTFCMODE_NONE) | 285 V_FW_EQ_ETH_CMD_PCIECHN(vi->pi->tx_chan) | F_FW_EQ_ETH_CMD_FETCHRO | 286 V_FW_EQ_ETH_CMD_IQID(sc->sge.nm_rxq[nm_txq->iqidx].iq_cntxt_id)); 287 c.dcaen_to_eqsize = 288 htobe32(V_FW_EQ_ETH_CMD_FBMIN(chip_id(sc) <= CHELSIO_T5 ? 289 X_FETCHBURSTMIN_64B : X_FETCHBURSTMIN_64B_T6) | 290 V_FW_EQ_ETH_CMD_FBMAX(X_FETCHBURSTMAX_512B) | 291 V_FW_EQ_ETH_CMD_EQSIZE(len / EQ_ESIZE)); 292 c.eqaddr = htobe64(nm_txq->ba); 293 294 rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c); 295 if (rc != 0) { 296 device_printf(vi->dev, 297 "failed to create netmap egress queue: %d\n", rc); 298 return (rc); 299 } 300 301 nm_txq->cntxt_id = G_FW_EQ_ETH_CMD_EQID(be32toh(c.eqid_pkd)); 302 cntxt_id = nm_txq->cntxt_id - sc->sge.eq_start; 303 if (cntxt_id >= sc->sge.eqmap_sz) 304 panic("%s: nm_txq->cntxt_id (%d) more than the max (%d)", __func__, 305 cntxt_id, sc->sge.eqmap_sz - 1); 306 sc->sge.eqmap[cntxt_id] = (void *)nm_txq; 307 308 nm_txq->pidx = nm_txq->cidx = 0; 309 MPASS(nm_txq->sidx == na->num_tx_desc); 310 nm_txq->equiqidx = nm_txq->equeqidx = nm_txq->dbidx = 0; 311 312 nm_txq->doorbells = sc->doorbells; 313 if (isset(&nm_txq->doorbells, DOORBELL_UDB) || 314 isset(&nm_txq->doorbells, DOORBELL_UDBWC) || 315 isset(&nm_txq->doorbells, DOORBELL_WCWR)) { 316 uint32_t s_qpp = sc->params.sge.eq_s_qpp; 317 uint32_t mask = (1 << s_qpp) - 1; 318 volatile uint8_t *udb; 319 320 udb = sc->udbs_base + UDBS_DB_OFFSET; 321 udb += (nm_txq->cntxt_id >> s_qpp) << PAGE_SHIFT; 322 nm_txq->udb_qid = nm_txq->cntxt_id & mask; 323 if (nm_txq->udb_qid >= PAGE_SIZE / UDBS_SEG_SIZE) 324 clrbit(&nm_txq->doorbells, DOORBELL_WCWR); 325 else { 326 udb += nm_txq->udb_qid << UDBS_SEG_SHIFT; 327 nm_txq->udb_qid = 0; 328 } 329 nm_txq->udb = (volatile void *)udb; 330 } 331 332 if (sc->params.fw_vers < FW_VERSION32(1, 25, 1, 0)) { 333 uint32_t param, val; 334 335 param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DMAQ) | 336 V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DMAQ_EQ_SCHEDCLASS_ETH) | 337 V_FW_PARAMS_PARAM_YZ(nm_txq->cntxt_id); 338 val = 0xff; 339 rc = -t4_set_params(sc, sc->mbox, sc->pf, 0, 1, ¶m, &val); 340 if (rc != 0) { 341 device_printf(vi->dev, 342 "failed to bind netmap txq %d to class 0xff: %d\n", 343 nm_txq->cntxt_id, rc); 344 rc = 0; 345 } 346 } 347 348 return (rc); 349 } 350 351 static int 352 free_nm_txq_hwq(struct vi_info *vi, struct sge_nm_txq *nm_txq) 353 { 354 struct adapter *sc = vi->adapter; 355 int rc; 356 357 rc = -t4_eth_eq_free(sc, sc->mbox, sc->pf, 0, nm_txq->cntxt_id); 358 if (rc != 0) 359 device_printf(sc->dev, "%s: failed for eq %d: %d\n", __func__, 360 nm_txq->cntxt_id, rc); 361 nm_txq->cntxt_id = INVALID_NM_TXQ_CNTXT_ID; 362 return (rc); 363 } 364 365 static int 366 cxgbe_netmap_simple_rss(struct adapter *sc, struct vi_info *vi, 367 struct ifnet *ifp, struct netmap_adapter *na) 368 { 369 struct netmap_kring *kring; 370 struct sge_nm_rxq *nm_rxq; 371 int rc, i, j, nm_state, defq; 372 uint16_t *rss; 373 374 /* 375 * Check if there's at least one active (or about to go active) netmap 376 * rx queue. 377 */ 378 defq = -1; 379 for_each_nm_rxq(vi, j, nm_rxq) { 380 nm_state = atomic_load_int(&nm_rxq->nm_state); 381 kring = na->rx_rings[nm_rxq->nid]; 382 if ((nm_state != NM_OFF && !nm_kring_pending_off(kring)) || 383 (nm_state == NM_OFF && nm_kring_pending_on(kring))) { 384 MPASS(nm_rxq->iq_cntxt_id != INVALID_NM_RXQ_CNTXT_ID); 385 if (defq == -1) { 386 defq = nm_rxq->iq_abs_id; 387 break; 388 } 389 } 390 } 391 392 if (defq == -1) { 393 /* No active netmap queues. Switch back to NIC queues. */ 394 rss = vi->rss; 395 defq = vi->rss[0]; 396 } else { 397 for (i = 0; i < vi->rss_size;) { 398 for_each_nm_rxq(vi, j, nm_rxq) { 399 nm_state = atomic_load_int(&nm_rxq->nm_state); 400 kring = na->rx_rings[nm_rxq->nid]; 401 if ((nm_state != NM_OFF && 402 !nm_kring_pending_off(kring)) || 403 (nm_state == NM_OFF && 404 nm_kring_pending_on(kring))) { 405 MPASS(nm_rxq->iq_cntxt_id != 406 INVALID_NM_RXQ_CNTXT_ID); 407 vi->nm_rss[i++] = nm_rxq->iq_abs_id; 408 if (i == vi->rss_size) 409 break; 410 } 411 } 412 } 413 rss = vi->nm_rss; 414 } 415 416 rc = -t4_config_rss_range(sc, sc->mbox, vi->viid, 0, vi->rss_size, rss, 417 vi->rss_size); 418 if (rc != 0) 419 if_printf(ifp, "netmap rss_config failed: %d\n", rc); 420 421 rc = -t4_config_vi_rss(sc, sc->mbox, vi->viid, vi->hashen, defq, 0, 0); 422 if (rc != 0) { 423 if_printf(ifp, "netmap defaultq config failed: %d\n", rc); 424 } 425 426 return (rc); 427 } 428 429 /* 430 * Odd number of rx queues work best for split RSS mode as the first queue can 431 * be dedicated for non-RSS traffic and the rest divided into two equal halves. 432 */ 433 static int 434 cxgbe_netmap_split_rss(struct adapter *sc, struct vi_info *vi, 435 struct ifnet *ifp, struct netmap_adapter *na) 436 { 437 struct netmap_kring *kring; 438 struct sge_nm_rxq *nm_rxq; 439 int rc, i, j, nm_state, defq; 440 int nactive[2] = {0, 0}; 441 int dq[2] = {-1, -1}; 442 bool dq_norss; /* default queue should not be in RSS table. */ 443 444 MPASS(nm_split_rss != 0); 445 MPASS(vi->nnmrxq > 1); 446 447 for_each_nm_rxq(vi, i, nm_rxq) { 448 j = i / ((vi->nnmrxq + 1) / 2); 449 nm_state = atomic_load_int(&nm_rxq->nm_state); 450 kring = na->rx_rings[nm_rxq->nid]; 451 if ((nm_state != NM_OFF && !nm_kring_pending_off(kring)) || 452 (nm_state == NM_OFF && nm_kring_pending_on(kring))) { 453 MPASS(nm_rxq->iq_cntxt_id != INVALID_NM_RXQ_CNTXT_ID); 454 nactive[j]++; 455 if (dq[j] == -1) { 456 dq[j] = nm_rxq->iq_abs_id; 457 break; 458 } 459 } 460 } 461 462 if (nactive[0] == 0 || nactive[1] == 0) 463 return (cxgbe_netmap_simple_rss(sc, vi, ifp, na)); 464 465 MPASS(dq[0] != -1 && dq[1] != -1); 466 if (nactive[0] > nactive[1]) { 467 defq = dq[0]; 468 dq_norss = true; 469 } else if (nactive[0] < nactive[1]) { 470 defq = dq[1]; 471 dq_norss = true; 472 } else { 473 defq = dq[0]; 474 dq_norss = false; 475 } 476 477 i = 0; 478 nm_rxq = &sc->sge.nm_rxq[vi->first_nm_rxq]; 479 while (i < vi->rss_size / 2) { 480 for (j = 0; j < (vi->nnmrxq + 1) / 2; j++) { 481 nm_state = atomic_load_int(&nm_rxq[j].nm_state); 482 kring = na->rx_rings[nm_rxq[j].nid]; 483 if ((nm_state == NM_OFF && 484 !nm_kring_pending_on(kring)) || 485 (nm_state == NM_ON && 486 nm_kring_pending_off(kring))) { 487 continue; 488 } 489 MPASS(nm_rxq[j].iq_cntxt_id != INVALID_NM_RXQ_CNTXT_ID); 490 if (dq_norss && defq == nm_rxq[j].iq_abs_id) 491 continue; 492 vi->nm_rss[i++] = nm_rxq[j].iq_abs_id; 493 if (i == vi->rss_size / 2) 494 break; 495 } 496 } 497 while (i < vi->rss_size) { 498 for (j = (vi->nnmrxq + 1) / 2; j < vi->nnmrxq; j++) { 499 nm_state = atomic_load_int(&nm_rxq[j].nm_state); 500 kring = na->rx_rings[nm_rxq[j].nid]; 501 if ((nm_state == NM_OFF && 502 !nm_kring_pending_on(kring)) || 503 (nm_state == NM_ON && 504 nm_kring_pending_off(kring))) { 505 continue; 506 } 507 MPASS(nm_rxq[j].iq_cntxt_id != INVALID_NM_RXQ_CNTXT_ID); 508 if (dq_norss && defq == nm_rxq[j].iq_abs_id) 509 continue; 510 vi->nm_rss[i++] = nm_rxq[j].iq_abs_id; 511 if (i == vi->rss_size) 512 break; 513 } 514 } 515 516 rc = -t4_config_rss_range(sc, sc->mbox, vi->viid, 0, vi->rss_size, 517 vi->nm_rss, vi->rss_size); 518 if (rc != 0) 519 if_printf(ifp, "netmap split_rss_config failed: %d\n", rc); 520 521 rc = -t4_config_vi_rss(sc, sc->mbox, vi->viid, vi->hashen, defq, 0, 0); 522 if (rc != 0) 523 if_printf(ifp, "netmap defaultq config failed: %d\n", rc); 524 525 return (rc); 526 } 527 528 static inline int 529 cxgbe_netmap_rss(struct adapter *sc, struct vi_info *vi, struct ifnet *ifp, 530 struct netmap_adapter *na) 531 { 532 533 if (nm_split_rss == 0 || vi->nnmrxq == 1) 534 return (cxgbe_netmap_simple_rss(sc, vi, ifp, na)); 535 else 536 return (cxgbe_netmap_split_rss(sc, vi, ifp, na)); 537 } 538 539 static int 540 cxgbe_netmap_on(struct adapter *sc, struct vi_info *vi, struct ifnet *ifp, 541 struct netmap_adapter *na) 542 { 543 struct netmap_slot *slot; 544 struct netmap_kring *kring; 545 struct sge_nm_rxq *nm_rxq; 546 struct sge_nm_txq *nm_txq; 547 int i, j, hwidx; 548 struct rx_buf_info *rxb; 549 550 ASSERT_SYNCHRONIZED_OP(sc); 551 MPASS(vi->nnmrxq > 0); 552 MPASS(vi->nnmtxq > 0); 553 554 if ((vi->flags & VI_INIT_DONE) == 0 || 555 (ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { 556 if_printf(ifp, "cannot enable netmap operation because " 557 "interface is not UP.\n"); 558 return (EAGAIN); 559 } 560 561 rxb = &sc->sge.rx_buf_info[0]; 562 for (i = 0; i < SW_ZONE_SIZES; i++, rxb++) { 563 if (rxb->size1 == NETMAP_BUF_SIZE(na)) { 564 hwidx = rxb->hwidx1; 565 break; 566 } 567 if (rxb->size2 == NETMAP_BUF_SIZE(na)) { 568 hwidx = rxb->hwidx2; 569 break; 570 } 571 } 572 if (i >= SW_ZONE_SIZES) { 573 if_printf(ifp, "no hwidx for netmap buffer size %d.\n", 574 NETMAP_BUF_SIZE(na)); 575 return (ENXIO); 576 } 577 578 /* Must set caps before calling netmap_reset */ 579 nm_set_native_flags(na); 580 581 for_each_nm_rxq(vi, i, nm_rxq) { 582 kring = na->rx_rings[nm_rxq->nid]; 583 if (!nm_kring_pending_on(kring) || 584 nm_rxq->iq_cntxt_id != INVALID_NM_RXQ_CNTXT_ID) 585 continue; 586 587 alloc_nm_rxq_hwq(vi, nm_rxq, tnl_cong(vi->pi, nm_cong_drop)); 588 nm_rxq->fl_hwidx = hwidx; 589 slot = netmap_reset(na, NR_RX, i, 0); 590 MPASS(slot != NULL); /* XXXNM: error check, not assert */ 591 592 /* We deal with 8 bufs at a time */ 593 MPASS((na->num_rx_desc & 7) == 0); 594 MPASS(na->num_rx_desc == nm_rxq->fl_sidx); 595 for (j = 0; j < nm_rxq->fl_sidx; j++) { 596 uint64_t ba; 597 598 PNMB(na, &slot[j], &ba); 599 MPASS(ba != 0); 600 nm_rxq->fl_desc[j] = htobe64(ba | hwidx); 601 } 602 j = nm_rxq->fl_pidx = nm_rxq->fl_sidx - 8; 603 MPASS((j & 7) == 0); 604 j /= 8; /* driver pidx to hardware pidx */ 605 wmb(); 606 t4_write_reg(sc, sc->sge_kdoorbell_reg, 607 nm_rxq->fl_db_val | V_PIDX(j)); 608 609 (void) atomic_cmpset_int(&nm_rxq->nm_state, NM_OFF, NM_ON); 610 } 611 612 for_each_nm_txq(vi, i, nm_txq) { 613 kring = na->tx_rings[nm_txq->nid]; 614 if (!nm_kring_pending_on(kring) || 615 nm_txq->cntxt_id != INVALID_NM_TXQ_CNTXT_ID) 616 continue; 617 618 alloc_nm_txq_hwq(vi, nm_txq); 619 slot = netmap_reset(na, NR_TX, i, 0); 620 MPASS(slot != NULL); /* XXXNM: error check, not assert */ 621 } 622 623 if (vi->nm_rss == NULL) { 624 vi->nm_rss = malloc(vi->rss_size * sizeof(uint16_t), M_CXGBE, 625 M_ZERO | M_WAITOK); 626 } 627 628 return (cxgbe_netmap_rss(sc, vi, ifp, na)); 629 } 630 631 static int 632 cxgbe_netmap_off(struct adapter *sc, struct vi_info *vi, struct ifnet *ifp, 633 struct netmap_adapter *na) 634 { 635 struct netmap_kring *kring; 636 int rc, i, nm_state, nactive; 637 struct sge_nm_txq *nm_txq; 638 struct sge_nm_rxq *nm_rxq; 639 640 ASSERT_SYNCHRONIZED_OP(sc); 641 MPASS(vi->nnmrxq > 0); 642 MPASS(vi->nnmtxq > 0); 643 644 if (!nm_netmap_on(na)) 645 return (0); 646 647 if ((vi->flags & VI_INIT_DONE) == 0) 648 return (0); 649 650 /* First remove the queues that are stopping from the RSS table. */ 651 rc = cxgbe_netmap_rss(sc, vi, ifp, na); 652 if (rc != 0) 653 return (rc); /* error message logged already. */ 654 655 for_each_nm_txq(vi, i, nm_txq) { 656 struct sge_qstat *spg = (void *)&nm_txq->desc[nm_txq->sidx]; 657 658 kring = na->tx_rings[nm_txq->nid]; 659 if (!nm_kring_pending_off(kring) || 660 nm_txq->cntxt_id == INVALID_NM_TXQ_CNTXT_ID) 661 continue; 662 663 /* Wait for hw pidx to catch up ... */ 664 while (be16toh(nm_txq->pidx) != spg->pidx) 665 pause("nmpidx", 1); 666 667 /* ... and then for the cidx. */ 668 while (spg->pidx != spg->cidx) 669 pause("nmcidx", 1); 670 671 free_nm_txq_hwq(vi, nm_txq); 672 673 /* XXX: netmap, not the driver, should do this. */ 674 kring->rhead = kring->rcur = kring->nr_hwcur = 0; 675 kring->rtail = kring->nr_hwtail = kring->nkr_num_slots - 1; 676 } 677 nactive = 0; 678 for_each_nm_rxq(vi, i, nm_rxq) { 679 nm_state = atomic_load_int(&nm_rxq->nm_state); 680 kring = na->rx_rings[nm_rxq->nid]; 681 if (nm_state != NM_OFF && !nm_kring_pending_off(kring)) 682 nactive++; 683 if (nm_state == NM_OFF || !nm_kring_pending_off(kring)) 684 continue; 685 686 MPASS(nm_rxq->iq_cntxt_id != INVALID_NM_RXQ_CNTXT_ID); 687 while (!atomic_cmpset_int(&nm_rxq->nm_state, NM_ON, NM_OFF)) 688 pause("nmst", 1); 689 690 free_nm_rxq_hwq(vi, nm_rxq); 691 692 /* XXX: netmap, not the driver, should do this. */ 693 kring->rhead = kring->rcur = kring->nr_hwcur = 0; 694 kring->rtail = kring->nr_hwtail = 0; 695 } 696 netmap_krings_mode_commit(na, 0); 697 if (nactive == 0) 698 nm_clear_native_flags(na); 699 700 return (rc); 701 } 702 703 static int 704 cxgbe_netmap_reg(struct netmap_adapter *na, int on) 705 { 706 struct ifnet *ifp = na->ifp; 707 struct vi_info *vi = ifp->if_softc; 708 struct adapter *sc = vi->adapter; 709 int rc; 710 711 rc = begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4nmreg"); 712 if (rc != 0) 713 return (rc); 714 if (on) 715 rc = cxgbe_netmap_on(sc, vi, ifp, na); 716 else 717 rc = cxgbe_netmap_off(sc, vi, ifp, na); 718 end_synchronized_op(sc, 0); 719 720 return (rc); 721 } 722 723 /* How many packets can a single type1 WR carry in n descriptors */ 724 static inline int 725 ndesc_to_npkt(const int n) 726 { 727 728 MPASS(n > 0 && n <= SGE_MAX_WR_NDESC); 729 730 return (n * 2 - 1); 731 } 732 #define MAX_NPKT_IN_TYPE1_WR (ndesc_to_npkt(SGE_MAX_WR_NDESC)) 733 734 /* 735 * Space (in descriptors) needed for a type1 WR (TX_PKTS or TX_PKTS2) that 736 * carries n packets 737 */ 738 static inline int 739 npkt_to_ndesc(const int n) 740 { 741 742 MPASS(n > 0 && n <= MAX_NPKT_IN_TYPE1_WR); 743 744 return ((n + 2) / 2); 745 } 746 747 /* 748 * Space (in 16B units) needed for a type1 WR (TX_PKTS or TX_PKTS2) that 749 * carries n packets 750 */ 751 static inline int 752 npkt_to_len16(const int n) 753 { 754 755 MPASS(n > 0 && n <= MAX_NPKT_IN_TYPE1_WR); 756 757 return (n * 2 + 1); 758 } 759 760 #define NMIDXDIFF(q, idx) IDXDIFF((q)->pidx, (q)->idx, (q)->sidx) 761 762 static void 763 ring_nm_txq_db(struct adapter *sc, struct sge_nm_txq *nm_txq) 764 { 765 int n; 766 u_int db = nm_txq->doorbells; 767 768 MPASS(nm_txq->pidx != nm_txq->dbidx); 769 770 n = NMIDXDIFF(nm_txq, dbidx); 771 if (n > 1) 772 clrbit(&db, DOORBELL_WCWR); 773 wmb(); 774 775 switch (ffs(db) - 1) { 776 case DOORBELL_UDB: 777 *nm_txq->udb = htole32(V_QID(nm_txq->udb_qid) | V_PIDX(n)); 778 break; 779 780 case DOORBELL_WCWR: { 781 volatile uint64_t *dst, *src; 782 783 /* 784 * Queues whose 128B doorbell segment fits in the page do not 785 * use relative qid (udb_qid is always 0). Only queues with 786 * doorbell segments can do WCWR. 787 */ 788 KASSERT(nm_txq->udb_qid == 0 && n == 1, 789 ("%s: inappropriate doorbell (0x%x, %d, %d) for nm_txq %p", 790 __func__, nm_txq->doorbells, n, nm_txq->pidx, nm_txq)); 791 792 dst = (volatile void *)((uintptr_t)nm_txq->udb + 793 UDBS_WR_OFFSET - UDBS_DB_OFFSET); 794 src = (void *)&nm_txq->desc[nm_txq->dbidx]; 795 while (src != (void *)&nm_txq->desc[nm_txq->dbidx + 1]) 796 *dst++ = *src++; 797 wmb(); 798 break; 799 } 800 801 case DOORBELL_UDBWC: 802 *nm_txq->udb = htole32(V_QID(nm_txq->udb_qid) | V_PIDX(n)); 803 wmb(); 804 break; 805 806 case DOORBELL_KDB: 807 t4_write_reg(sc, sc->sge_kdoorbell_reg, 808 V_QID(nm_txq->cntxt_id) | V_PIDX(n)); 809 break; 810 } 811 nm_txq->dbidx = nm_txq->pidx; 812 } 813 814 /* 815 * Write work requests to send 'npkt' frames and ring the doorbell to send them 816 * on their way. No need to check for wraparound. 817 */ 818 static void 819 cxgbe_nm_tx(struct adapter *sc, struct sge_nm_txq *nm_txq, 820 struct netmap_kring *kring, int npkt, int npkt_remaining) 821 { 822 struct netmap_ring *ring = kring->ring; 823 struct netmap_slot *slot; 824 const u_int lim = kring->nkr_num_slots - 1; 825 struct fw_eth_tx_pkts_wr *wr = (void *)&nm_txq->desc[nm_txq->pidx]; 826 uint16_t len; 827 uint64_t ba; 828 struct cpl_tx_pkt_core *cpl; 829 struct ulptx_sgl *usgl; 830 int i, n; 831 832 while (npkt) { 833 n = min(npkt, MAX_NPKT_IN_TYPE1_WR); 834 len = 0; 835 836 wr = (void *)&nm_txq->desc[nm_txq->pidx]; 837 wr->op_pkd = nm_txq->op_pkd; 838 wr->equiq_to_len16 = htobe32(V_FW_WR_LEN16(npkt_to_len16(n))); 839 wr->npkt = n; 840 wr->r3 = 0; 841 wr->type = 1; 842 cpl = (void *)(wr + 1); 843 844 for (i = 0; i < n; i++) { 845 slot = &ring->slot[kring->nr_hwcur]; 846 PNMB(kring->na, slot, &ba); 847 MPASS(ba != 0); 848 849 cpl->ctrl0 = nm_txq->cpl_ctrl0; 850 cpl->pack = 0; 851 cpl->len = htobe16(slot->len); 852 cpl->ctrl1 = nm_txcsum ? 0 : 853 htobe64(F_TXPKT_IPCSUM_DIS | F_TXPKT_L4CSUM_DIS); 854 855 usgl = (void *)(cpl + 1); 856 usgl->cmd_nsge = htobe32(V_ULPTX_CMD(ULP_TX_SC_DSGL) | 857 V_ULPTX_NSGE(1)); 858 usgl->len0 = htobe32(slot->len); 859 usgl->addr0 = htobe64(ba); 860 861 slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED); 862 cpl = (void *)(usgl + 1); 863 MPASS(slot->len + len <= UINT16_MAX); 864 len += slot->len; 865 kring->nr_hwcur = nm_next(kring->nr_hwcur, lim); 866 } 867 wr->plen = htobe16(len); 868 869 npkt -= n; 870 nm_txq->pidx += npkt_to_ndesc(n); 871 MPASS(nm_txq->pidx <= nm_txq->sidx); 872 if (__predict_false(nm_txq->pidx == nm_txq->sidx)) { 873 /* 874 * This routine doesn't know how to write WRs that wrap 875 * around. Make sure it wasn't asked to. 876 */ 877 MPASS(npkt == 0); 878 nm_txq->pidx = 0; 879 } 880 881 if (npkt == 0 && npkt_remaining == 0) { 882 /* All done. */ 883 if (lazy_tx_credit_flush == 0) { 884 wr->equiq_to_len16 |= htobe32(F_FW_WR_EQUEQ | 885 F_FW_WR_EQUIQ); 886 nm_txq->equeqidx = nm_txq->pidx; 887 nm_txq->equiqidx = nm_txq->pidx; 888 } 889 ring_nm_txq_db(sc, nm_txq); 890 return; 891 } 892 893 if (NMIDXDIFF(nm_txq, equiqidx) >= nm_txq->sidx / 2) { 894 wr->equiq_to_len16 |= htobe32(F_FW_WR_EQUEQ | 895 F_FW_WR_EQUIQ); 896 nm_txq->equeqidx = nm_txq->pidx; 897 nm_txq->equiqidx = nm_txq->pidx; 898 } else if (NMIDXDIFF(nm_txq, equeqidx) >= 64) { 899 wr->equiq_to_len16 |= htobe32(F_FW_WR_EQUEQ); 900 nm_txq->equeqidx = nm_txq->pidx; 901 } 902 if (NMIDXDIFF(nm_txq, dbidx) >= 2 * SGE_MAX_WR_NDESC) 903 ring_nm_txq_db(sc, nm_txq); 904 } 905 906 /* Will get called again. */ 907 MPASS(npkt_remaining); 908 } 909 910 /* How many contiguous free descriptors starting at pidx */ 911 static inline int 912 contiguous_ndesc_available(struct sge_nm_txq *nm_txq) 913 { 914 915 if (nm_txq->cidx > nm_txq->pidx) 916 return (nm_txq->cidx - nm_txq->pidx - 1); 917 else if (nm_txq->cidx > 0) 918 return (nm_txq->sidx - nm_txq->pidx); 919 else 920 return (nm_txq->sidx - nm_txq->pidx - 1); 921 } 922 923 static int 924 reclaim_nm_tx_desc(struct sge_nm_txq *nm_txq) 925 { 926 struct sge_qstat *spg = (void *)&nm_txq->desc[nm_txq->sidx]; 927 uint16_t hw_cidx = spg->cidx; /* snapshot */ 928 struct fw_eth_tx_pkts_wr *wr; 929 int n = 0; 930 931 hw_cidx = be16toh(hw_cidx); 932 933 while (nm_txq->cidx != hw_cidx) { 934 wr = (void *)&nm_txq->desc[nm_txq->cidx]; 935 936 MPASS(wr->op_pkd == htobe32(V_FW_WR_OP(FW_ETH_TX_PKTS_WR)) || 937 wr->op_pkd == htobe32(V_FW_WR_OP(FW_ETH_TX_PKTS2_WR))); 938 MPASS(wr->type == 1); 939 MPASS(wr->npkt > 0 && wr->npkt <= MAX_NPKT_IN_TYPE1_WR); 940 941 n += wr->npkt; 942 nm_txq->cidx += npkt_to_ndesc(wr->npkt); 943 944 /* 945 * We never sent a WR that wrapped around so the credits coming 946 * back, WR by WR, should never cause the cidx to wrap around 947 * either. 948 */ 949 MPASS(nm_txq->cidx <= nm_txq->sidx); 950 if (__predict_false(nm_txq->cidx == nm_txq->sidx)) 951 nm_txq->cidx = 0; 952 } 953 954 return (n); 955 } 956 957 static int 958 cxgbe_netmap_txsync(struct netmap_kring *kring, int flags) 959 { 960 struct netmap_adapter *na = kring->na; 961 struct ifnet *ifp = na->ifp; 962 struct vi_info *vi = ifp->if_softc; 963 struct adapter *sc = vi->adapter; 964 struct sge_nm_txq *nm_txq = &sc->sge.nm_txq[vi->first_nm_txq + kring->ring_id]; 965 const u_int head = kring->rhead; 966 u_int reclaimed = 0; 967 int n, d, npkt_remaining, ndesc_remaining; 968 969 /* 970 * Tx was at kring->nr_hwcur last time around and now we need to advance 971 * to kring->rhead. Note that the driver's pidx moves independent of 972 * netmap's kring->nr_hwcur (pidx counts descriptors and the relation 973 * between descriptors and frames isn't 1:1). 974 */ 975 976 npkt_remaining = head >= kring->nr_hwcur ? head - kring->nr_hwcur : 977 kring->nkr_num_slots - kring->nr_hwcur + head; 978 while (npkt_remaining) { 979 reclaimed += reclaim_nm_tx_desc(nm_txq); 980 ndesc_remaining = contiguous_ndesc_available(nm_txq); 981 /* Can't run out of descriptors with packets still remaining */ 982 MPASS(ndesc_remaining > 0); 983 984 /* # of desc needed to tx all remaining packets */ 985 d = (npkt_remaining / MAX_NPKT_IN_TYPE1_WR) * SGE_MAX_WR_NDESC; 986 if (npkt_remaining % MAX_NPKT_IN_TYPE1_WR) 987 d += npkt_to_ndesc(npkt_remaining % MAX_NPKT_IN_TYPE1_WR); 988 989 if (d <= ndesc_remaining) 990 n = npkt_remaining; 991 else { 992 /* Can't send all, calculate how many can be sent */ 993 n = (ndesc_remaining / SGE_MAX_WR_NDESC) * 994 MAX_NPKT_IN_TYPE1_WR; 995 if (ndesc_remaining % SGE_MAX_WR_NDESC) 996 n += ndesc_to_npkt(ndesc_remaining % SGE_MAX_WR_NDESC); 997 } 998 999 /* Send n packets and update nm_txq->pidx and kring->nr_hwcur */ 1000 npkt_remaining -= n; 1001 cxgbe_nm_tx(sc, nm_txq, kring, n, npkt_remaining); 1002 } 1003 MPASS(npkt_remaining == 0); 1004 MPASS(kring->nr_hwcur == head); 1005 MPASS(nm_txq->dbidx == nm_txq->pidx); 1006 1007 /* 1008 * Second part: reclaim buffers for completed transmissions. 1009 */ 1010 if (reclaimed || flags & NAF_FORCE_RECLAIM || nm_kr_txempty(kring)) { 1011 reclaimed += reclaim_nm_tx_desc(nm_txq); 1012 kring->nr_hwtail += reclaimed; 1013 if (kring->nr_hwtail >= kring->nkr_num_slots) 1014 kring->nr_hwtail -= kring->nkr_num_slots; 1015 } 1016 1017 return (0); 1018 } 1019 1020 static int 1021 cxgbe_netmap_rxsync(struct netmap_kring *kring, int flags) 1022 { 1023 struct netmap_adapter *na = kring->na; 1024 struct netmap_ring *ring = kring->ring; 1025 struct ifnet *ifp = na->ifp; 1026 struct vi_info *vi = ifp->if_softc; 1027 struct adapter *sc = vi->adapter; 1028 struct sge_nm_rxq *nm_rxq = &sc->sge.nm_rxq[vi->first_nm_rxq + kring->ring_id]; 1029 u_int const head = kring->rhead; 1030 u_int n; 1031 int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR; 1032 1033 if (black_hole) 1034 return (0); /* No updates ever. */ 1035 1036 if (netmap_no_pendintr || force_update) { 1037 kring->nr_hwtail = atomic_load_acq_32(&nm_rxq->fl_cidx); 1038 kring->nr_kflags &= ~NKR_PENDINTR; 1039 } 1040 1041 if (nm_rxq->fl_db_saved > 0 && starve_fl == 0) { 1042 wmb(); 1043 t4_write_reg(sc, sc->sge_kdoorbell_reg, 1044 nm_rxq->fl_db_val | V_PIDX(nm_rxq->fl_db_saved)); 1045 nm_rxq->fl_db_saved = 0; 1046 } 1047 1048 /* Userspace done with buffers from kring->nr_hwcur to head */ 1049 n = head >= kring->nr_hwcur ? head - kring->nr_hwcur : 1050 kring->nkr_num_slots - kring->nr_hwcur + head; 1051 n &= ~7U; 1052 if (n > 0) { 1053 u_int fl_pidx = nm_rxq->fl_pidx; 1054 struct netmap_slot *slot = &ring->slot[fl_pidx]; 1055 uint64_t ba; 1056 int i, dbinc = 0, hwidx = nm_rxq->fl_hwidx; 1057 1058 /* 1059 * We always deal with 8 buffers at a time. We must have 1060 * stopped at an 8B boundary (fl_pidx) last time around and we 1061 * must have a multiple of 8B buffers to give to the freelist. 1062 */ 1063 MPASS((fl_pidx & 7) == 0); 1064 MPASS((n & 7) == 0); 1065 1066 IDXINCR(kring->nr_hwcur, n, kring->nkr_num_slots); 1067 IDXINCR(nm_rxq->fl_pidx, n, nm_rxq->fl_sidx2); 1068 1069 while (n > 0) { 1070 for (i = 0; i < 8; i++, fl_pidx++, slot++) { 1071 PNMB(na, slot, &ba); 1072 MPASS(ba != 0); 1073 nm_rxq->fl_desc[fl_pidx] = htobe64(ba | hwidx); 1074 slot->flags &= ~NS_BUF_CHANGED; 1075 MPASS(fl_pidx <= nm_rxq->fl_sidx2); 1076 } 1077 n -= 8; 1078 if (fl_pidx == nm_rxq->fl_sidx2) { 1079 fl_pidx = 0; 1080 slot = &ring->slot[0]; 1081 } 1082 if (++dbinc == nm_rxq->fl_db_threshold) { 1083 wmb(); 1084 if (starve_fl) 1085 nm_rxq->fl_db_saved += dbinc; 1086 else { 1087 t4_write_reg(sc, sc->sge_kdoorbell_reg, 1088 nm_rxq->fl_db_val | V_PIDX(dbinc)); 1089 } 1090 dbinc = 0; 1091 } 1092 } 1093 MPASS(nm_rxq->fl_pidx == fl_pidx); 1094 1095 if (dbinc > 0) { 1096 wmb(); 1097 if (starve_fl) 1098 nm_rxq->fl_db_saved += dbinc; 1099 else { 1100 t4_write_reg(sc, sc->sge_kdoorbell_reg, 1101 nm_rxq->fl_db_val | V_PIDX(dbinc)); 1102 } 1103 } 1104 } 1105 1106 return (0); 1107 } 1108 1109 void 1110 cxgbe_nm_attach(struct vi_info *vi) 1111 { 1112 struct port_info *pi; 1113 struct adapter *sc; 1114 struct netmap_adapter na; 1115 1116 MPASS(vi->nnmrxq > 0); 1117 MPASS(vi->ifp != NULL); 1118 1119 pi = vi->pi; 1120 sc = pi->adapter; 1121 1122 bzero(&na, sizeof(na)); 1123 1124 na.ifp = vi->ifp; 1125 na.na_flags = NAF_BDG_MAYSLEEP; 1126 1127 /* Netmap doesn't know about the space reserved for the status page. */ 1128 na.num_tx_desc = vi->qsize_txq - sc->params.sge.spg_len / EQ_ESIZE; 1129 1130 /* 1131 * The freelist's cidx/pidx drives netmap's rx cidx/pidx. So 1132 * num_rx_desc is based on the number of buffers that can be held in the 1133 * freelist, and not the number of entries in the iq. (These two are 1134 * not exactly the same due to the space taken up by the status page). 1135 */ 1136 na.num_rx_desc = rounddown(vi->qsize_rxq, 8); 1137 na.nm_txsync = cxgbe_netmap_txsync; 1138 na.nm_rxsync = cxgbe_netmap_rxsync; 1139 na.nm_register = cxgbe_netmap_reg; 1140 na.num_tx_rings = vi->nnmtxq; 1141 na.num_rx_rings = vi->nnmrxq; 1142 na.rx_buf_maxsize = MAX_MTU; 1143 netmap_attach(&na); /* This adds IFCAP_NETMAP to if_capabilities */ 1144 } 1145 1146 void 1147 cxgbe_nm_detach(struct vi_info *vi) 1148 { 1149 1150 MPASS(vi->nnmrxq > 0); 1151 MPASS(vi->ifp != NULL); 1152 1153 netmap_detach(vi->ifp); 1154 } 1155 1156 static inline const void * 1157 unwrap_nm_fw6_msg(const struct cpl_fw6_msg *cpl) 1158 { 1159 1160 MPASS(cpl->type == FW_TYPE_RSSCPL || cpl->type == FW6_TYPE_RSSCPL); 1161 1162 /* data[0] is RSS header */ 1163 return (&cpl->data[1]); 1164 } 1165 1166 static void 1167 handle_nm_sge_egr_update(struct adapter *sc, struct ifnet *ifp, 1168 const struct cpl_sge_egr_update *egr) 1169 { 1170 uint32_t oq; 1171 struct sge_nm_txq *nm_txq; 1172 1173 oq = be32toh(egr->opcode_qid); 1174 MPASS(G_CPL_OPCODE(oq) == CPL_SGE_EGR_UPDATE); 1175 nm_txq = (void *)sc->sge.eqmap[G_EGR_QID(oq) - sc->sge.eq_start]; 1176 1177 netmap_tx_irq(ifp, nm_txq->nid); 1178 } 1179 1180 void 1181 service_nm_rxq(struct sge_nm_rxq *nm_rxq) 1182 { 1183 struct vi_info *vi = nm_rxq->vi; 1184 struct adapter *sc = vi->adapter; 1185 struct ifnet *ifp = vi->ifp; 1186 struct netmap_adapter *na = NA(ifp); 1187 struct netmap_kring *kring = na->rx_rings[nm_rxq->nid]; 1188 struct netmap_ring *ring = kring->ring; 1189 struct iq_desc *d = &nm_rxq->iq_desc[nm_rxq->iq_cidx]; 1190 const void *cpl; 1191 uint32_t lq; 1192 u_int work = 0; 1193 uint8_t opcode; 1194 uint32_t fl_cidx = atomic_load_acq_32(&nm_rxq->fl_cidx); 1195 u_int fl_credits = fl_cidx & 7; 1196 u_int ndesc = 0; /* desc processed since last cidx update */ 1197 u_int nframes = 0; /* frames processed since last netmap wakeup */ 1198 1199 while ((d->rsp.u.type_gen & F_RSPD_GEN) == nm_rxq->iq_gen) { 1200 1201 rmb(); 1202 1203 lq = be32toh(d->rsp.pldbuflen_qid); 1204 opcode = d->rss.opcode; 1205 cpl = &d->cpl[0]; 1206 1207 switch (G_RSPD_TYPE(d->rsp.u.type_gen)) { 1208 case X_RSPD_TYPE_FLBUF: 1209 1210 /* fall through */ 1211 1212 case X_RSPD_TYPE_CPL: 1213 MPASS(opcode < NUM_CPL_CMDS); 1214 1215 switch (opcode) { 1216 case CPL_FW4_MSG: 1217 case CPL_FW6_MSG: 1218 cpl = unwrap_nm_fw6_msg(cpl); 1219 /* fall through */ 1220 case CPL_SGE_EGR_UPDATE: 1221 handle_nm_sge_egr_update(sc, ifp, cpl); 1222 break; 1223 case CPL_RX_PKT: 1224 ring->slot[fl_cidx].len = G_RSPD_LEN(lq) - 1225 sc->params.sge.fl_pktshift; 1226 ring->slot[fl_cidx].flags = 0; 1227 nframes++; 1228 if (!(lq & F_RSPD_NEWBUF)) { 1229 MPASS(black_hole == 2); 1230 break; 1231 } 1232 fl_credits++; 1233 if (__predict_false(++fl_cidx == nm_rxq->fl_sidx)) 1234 fl_cidx = 0; 1235 break; 1236 default: 1237 panic("%s: unexpected opcode 0x%x on nm_rxq %p", 1238 __func__, opcode, nm_rxq); 1239 } 1240 break; 1241 1242 case X_RSPD_TYPE_INTR: 1243 /* Not equipped to handle forwarded interrupts. */ 1244 panic("%s: netmap queue received interrupt for iq %u\n", 1245 __func__, lq); 1246 1247 default: 1248 panic("%s: illegal response type %d on nm_rxq %p", 1249 __func__, G_RSPD_TYPE(d->rsp.u.type_gen), nm_rxq); 1250 } 1251 1252 d++; 1253 if (__predict_false(++nm_rxq->iq_cidx == nm_rxq->iq_sidx)) { 1254 nm_rxq->iq_cidx = 0; 1255 d = &nm_rxq->iq_desc[0]; 1256 nm_rxq->iq_gen ^= F_RSPD_GEN; 1257 } 1258 1259 if (__predict_false(++nframes == rx_nframes) && !black_hole) { 1260 atomic_store_rel_32(&nm_rxq->fl_cidx, fl_cidx); 1261 netmap_rx_irq(ifp, nm_rxq->nid, &work); 1262 nframes = 0; 1263 } 1264 1265 if (__predict_false(++ndesc == rx_ndesc)) { 1266 if (black_hole && fl_credits >= 8) { 1267 fl_credits /= 8; 1268 IDXINCR(nm_rxq->fl_pidx, fl_credits * 8, 1269 nm_rxq->fl_sidx); 1270 t4_write_reg(sc, sc->sge_kdoorbell_reg, 1271 nm_rxq->fl_db_val | V_PIDX(fl_credits)); 1272 fl_credits = fl_cidx & 7; 1273 } 1274 t4_write_reg(sc, sc->sge_gts_reg, 1275 V_CIDXINC(ndesc) | 1276 V_INGRESSQID(nm_rxq->iq_cntxt_id) | 1277 V_SEINTARM(V_QINTR_TIMER_IDX(X_TIMERREG_UPDATE_CIDX))); 1278 ndesc = 0; 1279 } 1280 } 1281 1282 atomic_store_rel_32(&nm_rxq->fl_cidx, fl_cidx); 1283 if (black_hole) { 1284 fl_credits /= 8; 1285 IDXINCR(nm_rxq->fl_pidx, fl_credits * 8, nm_rxq->fl_sidx); 1286 t4_write_reg(sc, sc->sge_kdoorbell_reg, 1287 nm_rxq->fl_db_val | V_PIDX(fl_credits)); 1288 } else if (nframes > 0) 1289 netmap_rx_irq(ifp, nm_rxq->nid, &work); 1290 1291 t4_write_reg(sc, sc->sge_gts_reg, V_CIDXINC(ndesc) | 1292 V_INGRESSQID((u32)nm_rxq->iq_cntxt_id) | 1293 V_SEINTARM(V_QINTR_TIMER_IDX(holdoff_tmr_idx))); 1294 } 1295 #endif 1296