1 /*- 2 * Copyright (c) 2014 Chelsio Communications, Inc. 3 * All rights reserved. 4 * Written by: Navdeep Parhar <np@FreeBSD.org> 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 #include "opt_inet.h" 30 #include "opt_inet6.h" 31 32 #ifdef DEV_NETMAP 33 #include <sys/param.h> 34 #include <sys/bus.h> 35 #include <sys/eventhandler.h> 36 #include <sys/lock.h> 37 #include <sys/mbuf.h> 38 #include <sys/module.h> 39 #include <sys/selinfo.h> 40 #include <sys/socket.h> 41 #include <sys/sockio.h> 42 #include <machine/bus.h> 43 #include <net/ethernet.h> 44 #include <net/if.h> 45 #include <net/if_media.h> 46 #include <net/if_var.h> 47 #include <net/if_clone.h> 48 #include <net/if_types.h> 49 #include <net/netmap.h> 50 #include <dev/netmap/netmap_kern.h> 51 52 #include "common/common.h" 53 #include "common/t4_regs.h" 54 #include "common/t4_regs_values.h" 55 56 extern int fl_pad; /* XXXNM */ 57 58 /* 59 * 0 = normal netmap rx 60 * 1 = black hole 61 * 2 = supermassive black hole (buffer packing enabled) 62 */ 63 int black_hole = 0; 64 SYSCTL_INT(_hw_cxgbe, OID_AUTO, nm_black_hole, CTLFLAG_RWTUN, &black_hole, 0, 65 "Sink incoming packets."); 66 67 int rx_ndesc = 256; 68 SYSCTL_INT(_hw_cxgbe, OID_AUTO, nm_rx_ndesc, CTLFLAG_RWTUN, 69 &rx_ndesc, 0, "# of rx descriptors after which the hw cidx is updated."); 70 71 int rx_nframes = 64; 72 SYSCTL_INT(_hw_cxgbe, OID_AUTO, nm_rx_nframes, CTLFLAG_RWTUN, 73 &rx_nframes, 0, "max # of frames received before waking up netmap rx."); 74 75 int holdoff_tmr_idx = 2; 76 SYSCTL_INT(_hw_cxgbe, OID_AUTO, nm_holdoff_tmr_idx, CTLFLAG_RWTUN, 77 &holdoff_tmr_idx, 0, "Holdoff timer index for netmap rx queues."); 78 79 /* 80 * Congestion drops. 81 * -1: no congestion feedback (not recommended). 82 * 0: backpressure the channel instead of dropping packets right away. 83 * 1: no backpressure, drop packets for the congested queue immediately. 84 */ 85 static int nm_cong_drop = 1; 86 SYSCTL_INT(_hw_cxgbe, OID_AUTO, nm_cong_drop, CTLFLAG_RWTUN, 87 &nm_cong_drop, 0, 88 "Congestion control for netmap rx queues (0 = backpressure, 1 = drop"); 89 90 int starve_fl = 0; 91 SYSCTL_INT(_hw_cxgbe, OID_AUTO, starve_fl, CTLFLAG_RWTUN, 92 &starve_fl, 0, "Don't ring fl db for netmap rx queues."); 93 94 /* 95 * Try to process tx credits in bulk. This may cause a delay in the return of 96 * tx credits and is suitable for bursty or non-stop tx only. 97 */ 98 int lazy_tx_credit_flush = 1; 99 SYSCTL_INT(_hw_cxgbe, OID_AUTO, lazy_tx_credit_flush, CTLFLAG_RWTUN, 100 &lazy_tx_credit_flush, 0, "lazy credit flush for netmap tx queues."); 101 102 /* 103 * Split the netmap rx queues into two groups that populate separate halves of 104 * the RSS indirection table. This allows filters with hashmask to steer to a 105 * particular group of queues. 106 */ 107 static int nm_split_rss = 0; 108 SYSCTL_INT(_hw_cxgbe, OID_AUTO, nm_split_rss, CTLFLAG_RWTUN, 109 &nm_split_rss, 0, "Split the netmap rx queues into two groups."); 110 111 /* 112 * netmap(4) says "netmap does not use features such as checksum offloading, TCP 113 * segmentation offloading, encryption, VLAN encapsulation/decapsulation, etc." 114 * but this knob can be used to get the hardware to checksum all tx traffic 115 * anyway. 116 */ 117 static int nm_txcsum = 0; 118 SYSCTL_INT(_hw_cxgbe, OID_AUTO, nm_txcsum, CTLFLAG_RWTUN, 119 &nm_txcsum, 0, "Enable transmit checksum offloading."); 120 121 static int free_nm_rxq_hwq(struct vi_info *, struct sge_nm_rxq *); 122 static int free_nm_txq_hwq(struct vi_info *, struct sge_nm_txq *); 123 124 int 125 alloc_nm_rxq(struct vi_info *vi, struct sge_nm_rxq *nm_rxq, int intr_idx, 126 int idx) 127 { 128 int rc; 129 struct sysctl_oid *oid; 130 struct sysctl_oid_list *children; 131 struct sysctl_ctx_list *ctx; 132 char name[16]; 133 size_t len; 134 struct adapter *sc = vi->adapter; 135 struct netmap_adapter *na = NA(vi->ifp); 136 137 MPASS(na != NULL); 138 139 len = vi->qsize_rxq * IQ_ESIZE; 140 rc = alloc_ring(sc, len, &nm_rxq->iq_desc_tag, &nm_rxq->iq_desc_map, 141 &nm_rxq->iq_ba, (void **)&nm_rxq->iq_desc); 142 if (rc != 0) 143 return (rc); 144 145 len = na->num_rx_desc * EQ_ESIZE + sc->params.sge.spg_len; 146 rc = alloc_ring(sc, len, &nm_rxq->fl_desc_tag, &nm_rxq->fl_desc_map, 147 &nm_rxq->fl_ba, (void **)&nm_rxq->fl_desc); 148 if (rc != 0) 149 return (rc); 150 151 nm_rxq->vi = vi; 152 nm_rxq->nid = idx; 153 nm_rxq->iq_cidx = 0; 154 nm_rxq->iq_sidx = vi->qsize_rxq - sc->params.sge.spg_len / IQ_ESIZE; 155 nm_rxq->iq_gen = F_RSPD_GEN; 156 nm_rxq->fl_pidx = nm_rxq->fl_cidx = 0; 157 nm_rxq->fl_sidx = na->num_rx_desc; 158 nm_rxq->fl_sidx2 = nm_rxq->fl_sidx; /* copy for rxsync cacheline */ 159 nm_rxq->intr_idx = intr_idx; 160 nm_rxq->iq_cntxt_id = INVALID_NM_RXQ_CNTXT_ID; 161 162 ctx = &vi->ctx; 163 children = SYSCTL_CHILDREN(vi->nm_rxq_oid); 164 165 snprintf(name, sizeof(name), "%d", idx); 166 oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, name, 167 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "rx queue"); 168 children = SYSCTL_CHILDREN(oid); 169 170 SYSCTL_ADD_U16(ctx, children, OID_AUTO, "abs_id", CTLFLAG_RD, 171 &nm_rxq->iq_abs_id, 0, "absolute id of the queue"); 172 SYSCTL_ADD_U16(ctx, children, OID_AUTO, "cntxt_id", CTLFLAG_RD, 173 &nm_rxq->iq_cntxt_id, 0, "SGE context id of the queue"); 174 SYSCTL_ADD_U16(ctx, children, OID_AUTO, "cidx", CTLFLAG_RD, 175 &nm_rxq->iq_cidx, 0, "consumer index"); 176 177 children = SYSCTL_CHILDREN(oid); 178 oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "fl", 179 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "freelist"); 180 children = SYSCTL_CHILDREN(oid); 181 182 SYSCTL_ADD_U16(ctx, children, OID_AUTO, "cntxt_id", CTLFLAG_RD, 183 &nm_rxq->fl_cntxt_id, 0, "SGE context id of the freelist"); 184 SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "cidx", CTLFLAG_RD, 185 &nm_rxq->fl_cidx, 0, "consumer index"); 186 SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "pidx", CTLFLAG_RD, 187 &nm_rxq->fl_pidx, 0, "producer index"); 188 189 return (rc); 190 } 191 192 int 193 free_nm_rxq(struct vi_info *vi, struct sge_nm_rxq *nm_rxq) 194 { 195 struct adapter *sc = vi->adapter; 196 197 if (!(vi->flags & VI_INIT_DONE)) 198 return (0); 199 200 if (nm_rxq->iq_cntxt_id != INVALID_NM_RXQ_CNTXT_ID) 201 free_nm_rxq_hwq(vi, nm_rxq); 202 MPASS(nm_rxq->iq_cntxt_id == INVALID_NM_RXQ_CNTXT_ID); 203 204 free_ring(sc, nm_rxq->iq_desc_tag, nm_rxq->iq_desc_map, nm_rxq->iq_ba, 205 nm_rxq->iq_desc); 206 free_ring(sc, nm_rxq->fl_desc_tag, nm_rxq->fl_desc_map, nm_rxq->fl_ba, 207 nm_rxq->fl_desc); 208 209 return (0); 210 } 211 212 int 213 alloc_nm_txq(struct vi_info *vi, struct sge_nm_txq *nm_txq, int iqidx, int idx) 214 { 215 int rc; 216 size_t len; 217 struct port_info *pi = vi->pi; 218 struct adapter *sc = pi->adapter; 219 struct netmap_adapter *na = NA(vi->ifp); 220 char name[16]; 221 struct sysctl_oid *oid; 222 struct sysctl_oid_list *children = SYSCTL_CHILDREN(vi->nm_txq_oid); 223 224 len = na->num_tx_desc * EQ_ESIZE + sc->params.sge.spg_len; 225 rc = alloc_ring(sc, len, &nm_txq->desc_tag, &nm_txq->desc_map, 226 &nm_txq->ba, (void **)&nm_txq->desc); 227 if (rc) 228 return (rc); 229 230 nm_txq->pidx = nm_txq->cidx = 0; 231 nm_txq->sidx = na->num_tx_desc; 232 nm_txq->nid = idx; 233 nm_txq->iqidx = iqidx; 234 nm_txq->cpl_ctrl0 = htobe32(V_TXPKT_OPCODE(CPL_TX_PKT) | 235 V_TXPKT_INTF(pi->tx_chan) | V_TXPKT_PF(sc->pf) | 236 V_TXPKT_VF(vi->vin) | V_TXPKT_VF_VLD(vi->vfvld)); 237 if (sc->params.fw_vers >= FW_VERSION32(1, 24, 11, 0)) 238 nm_txq->op_pkd = htobe32(V_FW_WR_OP(FW_ETH_TX_PKTS2_WR)); 239 else 240 nm_txq->op_pkd = htobe32(V_FW_WR_OP(FW_ETH_TX_PKTS_WR)); 241 nm_txq->cntxt_id = INVALID_NM_TXQ_CNTXT_ID; 242 243 snprintf(name, sizeof(name), "%d", idx); 244 oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, name, 245 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "netmap tx queue"); 246 children = SYSCTL_CHILDREN(oid); 247 248 SYSCTL_ADD_UINT(&vi->ctx, children, OID_AUTO, "cntxt_id", CTLFLAG_RD, 249 &nm_txq->cntxt_id, 0, "SGE context id of the queue"); 250 SYSCTL_ADD_U16(&vi->ctx, children, OID_AUTO, "cidx", CTLFLAG_RD, 251 &nm_txq->cidx, 0, "consumer index"); 252 SYSCTL_ADD_U16(&vi->ctx, children, OID_AUTO, "pidx", CTLFLAG_RD, 253 &nm_txq->pidx, 0, "producer index"); 254 255 return (rc); 256 } 257 258 int 259 free_nm_txq(struct vi_info *vi, struct sge_nm_txq *nm_txq) 260 { 261 struct adapter *sc = vi->adapter; 262 263 if (!(vi->flags & VI_INIT_DONE)) 264 return (0); 265 266 if (nm_txq->cntxt_id != INVALID_NM_TXQ_CNTXT_ID) 267 free_nm_txq_hwq(vi, nm_txq); 268 MPASS(nm_txq->cntxt_id == INVALID_NM_TXQ_CNTXT_ID); 269 270 free_ring(sc, nm_txq->desc_tag, nm_txq->desc_map, nm_txq->ba, 271 nm_txq->desc); 272 273 return (0); 274 } 275 276 static int 277 alloc_nm_rxq_hwq(struct vi_info *vi, struct sge_nm_rxq *nm_rxq) 278 { 279 int rc, cntxt_id; 280 __be32 v; 281 struct adapter *sc = vi->adapter; 282 struct port_info *pi = vi->pi; 283 struct sge_params *sp = &sc->params.sge; 284 struct netmap_adapter *na = NA(vi->ifp); 285 struct fw_iq_cmd c; 286 const int cong_drop = nm_cong_drop; 287 const int cong_map = pi->rx_e_chan_map; 288 289 MPASS(na != NULL); 290 MPASS(nm_rxq->iq_desc != NULL); 291 MPASS(nm_rxq->fl_desc != NULL); 292 293 bzero(nm_rxq->iq_desc, vi->qsize_rxq * IQ_ESIZE); 294 bzero(nm_rxq->fl_desc, na->num_rx_desc * EQ_ESIZE + sp->spg_len); 295 296 bzero(&c, sizeof(c)); 297 c.op_to_vfn = htobe32(V_FW_CMD_OP(FW_IQ_CMD) | F_FW_CMD_REQUEST | 298 F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_IQ_CMD_PFN(sc->pf) | 299 V_FW_IQ_CMD_VFN(0)); 300 c.alloc_to_len16 = htobe32(F_FW_IQ_CMD_IQSTART | FW_LEN16(c)); 301 if (nm_rxq->iq_cntxt_id == INVALID_NM_RXQ_CNTXT_ID) 302 c.alloc_to_len16 |= htobe32(F_FW_IQ_CMD_ALLOC); 303 else { 304 c.iqid = htobe16(nm_rxq->iq_cntxt_id); 305 c.fl0id = htobe16(nm_rxq->fl_cntxt_id); 306 c.fl1id = htobe16(0xffff); 307 c.physiqid = htobe16(nm_rxq->iq_abs_id); 308 } 309 MPASS(!forwarding_intr_to_fwq(sc)); 310 KASSERT(nm_rxq->intr_idx < sc->intr_count, 311 ("%s: invalid direct intr_idx %d", __func__, nm_rxq->intr_idx)); 312 v = V_FW_IQ_CMD_IQANDSTINDEX(nm_rxq->intr_idx); 313 c.type_to_iqandstindex = htobe32(v | 314 V_FW_IQ_CMD_TYPE(FW_IQ_TYPE_FL_INT_CAP) | 315 V_FW_IQ_CMD_VIID(vi->viid) | 316 V_FW_IQ_CMD_IQANUD(X_UPDATEDELIVERY_INTERRUPT)); 317 c.iqdroprss_to_iqesize = htobe16(V_FW_IQ_CMD_IQPCIECH(pi->tx_chan) | 318 F_FW_IQ_CMD_IQGTSMODE | 319 V_FW_IQ_CMD_IQINTCNTTHRESH(0) | 320 V_FW_IQ_CMD_IQESIZE(ilog2(IQ_ESIZE) - 4)); 321 c.iqsize = htobe16(vi->qsize_rxq); 322 c.iqaddr = htobe64(nm_rxq->iq_ba); 323 if (cong_drop != -1) { 324 c.iqns_to_fl0congen = htobe32(F_FW_IQ_CMD_IQFLINTCONGEN | 325 V_FW_IQ_CMD_FL0CNGCHMAP(cong_map) | F_FW_IQ_CMD_FL0CONGCIF | 326 F_FW_IQ_CMD_FL0CONGEN); 327 } 328 c.iqns_to_fl0congen |= 329 htobe32(V_FW_IQ_CMD_FL0HOSTFCMODE(X_HOSTFCMODE_NONE) | 330 V_FW_IQ_CMD_IQTYPE(FW_IQ_IQTYPE_NIC) | 331 F_FW_IQ_CMD_FL0FETCHRO | F_FW_IQ_CMD_FL0DATARO | 332 (fl_pad ? F_FW_IQ_CMD_FL0PADEN : 0) | 333 (black_hole == 2 ? F_FW_IQ_CMD_FL0PACKEN : 0)); 334 c.fl0dcaen_to_fl0cidxfthresh = 335 htobe16(V_FW_IQ_CMD_FL0FBMIN(chip_id(sc) <= CHELSIO_T5 ? 336 X_FETCHBURSTMIN_128B : X_FETCHBURSTMIN_64B_T6) | 337 V_FW_IQ_CMD_FL0FBMAX(chip_id(sc) <= CHELSIO_T5 ? 338 X_FETCHBURSTMAX_512B : X_FETCHBURSTMAX_256B)); 339 c.fl0size = htobe16(na->num_rx_desc / 8 + sp->spg_len / EQ_ESIZE); 340 c.fl0addr = htobe64(nm_rxq->fl_ba); 341 342 rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c); 343 if (rc != 0) { 344 device_printf(sc->dev, 345 "failed to create netmap ingress queue: %d\n", rc); 346 return (rc); 347 } 348 349 nm_rxq->iq_cidx = 0; 350 MPASS(nm_rxq->iq_sidx == vi->qsize_rxq - sp->spg_len / IQ_ESIZE); 351 nm_rxq->iq_gen = F_RSPD_GEN; 352 nm_rxq->iq_cntxt_id = be16toh(c.iqid); 353 nm_rxq->iq_abs_id = be16toh(c.physiqid); 354 cntxt_id = nm_rxq->iq_cntxt_id - sc->sge.iq_start; 355 if (cntxt_id >= sc->sge.iqmap_sz) { 356 panic ("%s: nm_rxq->iq_cntxt_id (%d) more than the max (%d)", 357 __func__, cntxt_id, sc->sge.iqmap_sz - 1); 358 } 359 sc->sge.iqmap[cntxt_id] = (void *)nm_rxq; 360 361 nm_rxq->fl_cntxt_id = be16toh(c.fl0id); 362 nm_rxq->fl_pidx = nm_rxq->fl_cidx = 0; 363 nm_rxq->fl_db_saved = 0; 364 /* matches the X_FETCHBURSTMAX_512B or X_FETCHBURSTMAX_256B above. */ 365 nm_rxq->fl_db_threshold = chip_id(sc) <= CHELSIO_T5 ? 8 : 4; 366 MPASS(nm_rxq->fl_sidx == na->num_rx_desc); 367 cntxt_id = nm_rxq->fl_cntxt_id - sc->sge.eq_start; 368 if (cntxt_id >= sc->sge.eqmap_sz) { 369 panic("%s: nm_rxq->fl_cntxt_id (%d) more than the max (%d)", 370 __func__, cntxt_id, sc->sge.eqmap_sz - 1); 371 } 372 sc->sge.eqmap[cntxt_id] = (void *)nm_rxq; 373 374 nm_rxq->fl_db_val = V_QID(nm_rxq->fl_cntxt_id) | 375 sc->chip_params->sge_fl_db; 376 377 if (chip_id(sc) >= CHELSIO_T5 && cong_drop != -1) { 378 t4_sge_set_conm_context(sc, nm_rxq->iq_cntxt_id, cong_drop, 379 cong_map); 380 } 381 382 t4_write_reg(sc, sc->sge_gts_reg, 383 V_INGRESSQID(nm_rxq->iq_cntxt_id) | 384 V_SEINTARM(V_QINTR_TIMER_IDX(holdoff_tmr_idx))); 385 386 return (rc); 387 } 388 389 static int 390 free_nm_rxq_hwq(struct vi_info *vi, struct sge_nm_rxq *nm_rxq) 391 { 392 struct adapter *sc = vi->adapter; 393 int rc; 394 395 rc = -t4_iq_free(sc, sc->mbox, sc->pf, 0, FW_IQ_TYPE_FL_INT_CAP, 396 nm_rxq->iq_cntxt_id, nm_rxq->fl_cntxt_id, 0xffff); 397 if (rc != 0) 398 device_printf(sc->dev, "%s: failed for iq %d, fl %d: %d\n", 399 __func__, nm_rxq->iq_cntxt_id, nm_rxq->fl_cntxt_id, rc); 400 nm_rxq->iq_cntxt_id = INVALID_NM_RXQ_CNTXT_ID; 401 return (rc); 402 } 403 404 static int 405 alloc_nm_txq_hwq(struct vi_info *vi, struct sge_nm_txq *nm_txq) 406 { 407 int rc, cntxt_id; 408 size_t len; 409 struct adapter *sc = vi->adapter; 410 struct netmap_adapter *na = NA(vi->ifp); 411 struct fw_eq_eth_cmd c; 412 413 MPASS(na != NULL); 414 MPASS(nm_txq->desc != NULL); 415 416 len = na->num_tx_desc * EQ_ESIZE + sc->params.sge.spg_len; 417 bzero(nm_txq->desc, len); 418 419 bzero(&c, sizeof(c)); 420 c.op_to_vfn = htobe32(V_FW_CMD_OP(FW_EQ_ETH_CMD) | F_FW_CMD_REQUEST | 421 F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_EQ_ETH_CMD_PFN(sc->pf) | 422 V_FW_EQ_ETH_CMD_VFN(0)); 423 c.alloc_to_len16 = htobe32(F_FW_EQ_ETH_CMD_EQSTART | FW_LEN16(c)); 424 if (nm_txq->cntxt_id == INVALID_NM_TXQ_CNTXT_ID) 425 c.alloc_to_len16 |= htobe32(F_FW_EQ_ETH_CMD_ALLOC); 426 else 427 c.eqid_pkd = htobe32(V_FW_EQ_ETH_CMD_EQID(nm_txq->cntxt_id)); 428 c.autoequiqe_to_viid = htobe32(F_FW_EQ_ETH_CMD_AUTOEQUIQE | 429 F_FW_EQ_ETH_CMD_AUTOEQUEQE | V_FW_EQ_ETH_CMD_VIID(vi->viid)); 430 c.fetchszm_to_iqid = 431 htobe32(V_FW_EQ_ETH_CMD_HOSTFCMODE(X_HOSTFCMODE_NONE) | 432 V_FW_EQ_ETH_CMD_PCIECHN(vi->pi->tx_chan) | F_FW_EQ_ETH_CMD_FETCHRO | 433 V_FW_EQ_ETH_CMD_IQID(sc->sge.nm_rxq[nm_txq->iqidx].iq_cntxt_id)); 434 c.dcaen_to_eqsize = 435 htobe32(V_FW_EQ_ETH_CMD_FBMIN(chip_id(sc) <= CHELSIO_T5 ? 436 X_FETCHBURSTMIN_64B : X_FETCHBURSTMIN_64B_T6) | 437 V_FW_EQ_ETH_CMD_FBMAX(X_FETCHBURSTMAX_512B) | 438 V_FW_EQ_ETH_CMD_EQSIZE(len / EQ_ESIZE)); 439 c.eqaddr = htobe64(nm_txq->ba); 440 441 rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c); 442 if (rc != 0) { 443 device_printf(vi->dev, 444 "failed to create netmap egress queue: %d\n", rc); 445 return (rc); 446 } 447 448 nm_txq->cntxt_id = G_FW_EQ_ETH_CMD_EQID(be32toh(c.eqid_pkd)); 449 cntxt_id = nm_txq->cntxt_id - sc->sge.eq_start; 450 if (cntxt_id >= sc->sge.eqmap_sz) 451 panic("%s: nm_txq->cntxt_id (%d) more than the max (%d)", __func__, 452 cntxt_id, sc->sge.eqmap_sz - 1); 453 sc->sge.eqmap[cntxt_id] = (void *)nm_txq; 454 455 nm_txq->pidx = nm_txq->cidx = 0; 456 MPASS(nm_txq->sidx == na->num_tx_desc); 457 nm_txq->equiqidx = nm_txq->equeqidx = nm_txq->dbidx = 0; 458 459 nm_txq->doorbells = sc->doorbells; 460 if (isset(&nm_txq->doorbells, DOORBELL_UDB) || 461 isset(&nm_txq->doorbells, DOORBELL_UDBWC) || 462 isset(&nm_txq->doorbells, DOORBELL_WCWR)) { 463 uint32_t s_qpp = sc->params.sge.eq_s_qpp; 464 uint32_t mask = (1 << s_qpp) - 1; 465 volatile uint8_t *udb; 466 467 udb = sc->udbs_base + UDBS_DB_OFFSET; 468 udb += (nm_txq->cntxt_id >> s_qpp) << PAGE_SHIFT; 469 nm_txq->udb_qid = nm_txq->cntxt_id & mask; 470 if (nm_txq->udb_qid >= PAGE_SIZE / UDBS_SEG_SIZE) 471 clrbit(&nm_txq->doorbells, DOORBELL_WCWR); 472 else { 473 udb += nm_txq->udb_qid << UDBS_SEG_SHIFT; 474 nm_txq->udb_qid = 0; 475 } 476 nm_txq->udb = (volatile void *)udb; 477 } 478 479 if (sc->params.fw_vers < FW_VERSION32(1, 25, 1, 0)) { 480 uint32_t param, val; 481 482 param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DMAQ) | 483 V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DMAQ_EQ_SCHEDCLASS_ETH) | 484 V_FW_PARAMS_PARAM_YZ(nm_txq->cntxt_id); 485 val = 0xff; 486 rc = -t4_set_params(sc, sc->mbox, sc->pf, 0, 1, ¶m, &val); 487 if (rc != 0) { 488 device_printf(vi->dev, 489 "failed to bind netmap txq %d to class 0xff: %d\n", 490 nm_txq->cntxt_id, rc); 491 rc = 0; 492 } 493 } 494 495 return (rc); 496 } 497 498 static int 499 free_nm_txq_hwq(struct vi_info *vi, struct sge_nm_txq *nm_txq) 500 { 501 struct adapter *sc = vi->adapter; 502 int rc; 503 504 rc = -t4_eth_eq_free(sc, sc->mbox, sc->pf, 0, nm_txq->cntxt_id); 505 if (rc != 0) 506 device_printf(sc->dev, "%s: failed for eq %d: %d\n", __func__, 507 nm_txq->cntxt_id, rc); 508 nm_txq->cntxt_id = INVALID_NM_TXQ_CNTXT_ID; 509 return (rc); 510 } 511 512 static int 513 cxgbe_netmap_simple_rss(struct adapter *sc, struct vi_info *vi, 514 if_t ifp, struct netmap_adapter *na) 515 { 516 struct netmap_kring *kring; 517 struct sge_nm_rxq *nm_rxq; 518 int rc, i, j, nm_state, defq; 519 uint16_t *rss; 520 521 /* 522 * Check if there's at least one active (or about to go active) netmap 523 * rx queue. 524 */ 525 defq = -1; 526 for_each_nm_rxq(vi, j, nm_rxq) { 527 nm_state = atomic_load_int(&nm_rxq->nm_state); 528 kring = na->rx_rings[nm_rxq->nid]; 529 if ((nm_state != NM_OFF && !nm_kring_pending_off(kring)) || 530 (nm_state == NM_OFF && nm_kring_pending_on(kring))) { 531 MPASS(nm_rxq->iq_cntxt_id != INVALID_NM_RXQ_CNTXT_ID); 532 if (defq == -1) { 533 defq = nm_rxq->iq_abs_id; 534 break; 535 } 536 } 537 } 538 539 if (defq == -1) { 540 /* No active netmap queues. Switch back to NIC queues. */ 541 rss = vi->rss; 542 defq = vi->rss[0]; 543 } else { 544 for (i = 0; i < vi->rss_size;) { 545 for_each_nm_rxq(vi, j, nm_rxq) { 546 nm_state = atomic_load_int(&nm_rxq->nm_state); 547 kring = na->rx_rings[nm_rxq->nid]; 548 if ((nm_state != NM_OFF && 549 !nm_kring_pending_off(kring)) || 550 (nm_state == NM_OFF && 551 nm_kring_pending_on(kring))) { 552 MPASS(nm_rxq->iq_cntxt_id != 553 INVALID_NM_RXQ_CNTXT_ID); 554 vi->nm_rss[i++] = nm_rxq->iq_abs_id; 555 if (i == vi->rss_size) 556 break; 557 } 558 } 559 } 560 rss = vi->nm_rss; 561 } 562 563 rc = -t4_config_rss_range(sc, sc->mbox, vi->viid, 0, vi->rss_size, rss, 564 vi->rss_size); 565 if (rc != 0) 566 if_printf(ifp, "netmap rss_config failed: %d\n", rc); 567 568 rc = -t4_config_vi_rss(sc, sc->mbox, vi->viid, vi->hashen, defq, 0, 0); 569 if (rc != 0) { 570 if_printf(ifp, "netmap defaultq config failed: %d\n", rc); 571 } 572 573 return (rc); 574 } 575 576 /* 577 * Odd number of rx queues work best for split RSS mode as the first queue can 578 * be dedicated for non-RSS traffic and the rest divided into two equal halves. 579 */ 580 static int 581 cxgbe_netmap_split_rss(struct adapter *sc, struct vi_info *vi, 582 if_t ifp, struct netmap_adapter *na) 583 { 584 struct netmap_kring *kring; 585 struct sge_nm_rxq *nm_rxq; 586 int rc, i, j, nm_state, defq; 587 int nactive[2] = {0, 0}; 588 int dq[2] = {-1, -1}; 589 bool dq_norss; /* default queue should not be in RSS table. */ 590 591 MPASS(nm_split_rss != 0); 592 MPASS(vi->nnmrxq > 1); 593 594 for_each_nm_rxq(vi, i, nm_rxq) { 595 j = i / ((vi->nnmrxq + 1) / 2); 596 nm_state = atomic_load_int(&nm_rxq->nm_state); 597 kring = na->rx_rings[nm_rxq->nid]; 598 if ((nm_state != NM_OFF && !nm_kring_pending_off(kring)) || 599 (nm_state == NM_OFF && nm_kring_pending_on(kring))) { 600 MPASS(nm_rxq->iq_cntxt_id != INVALID_NM_RXQ_CNTXT_ID); 601 nactive[j]++; 602 if (dq[j] == -1) { 603 dq[j] = nm_rxq->iq_abs_id; 604 break; 605 } 606 } 607 } 608 609 if (nactive[0] == 0 || nactive[1] == 0) 610 return (cxgbe_netmap_simple_rss(sc, vi, ifp, na)); 611 612 MPASS(dq[0] != -1 && dq[1] != -1); 613 if (nactive[0] > nactive[1]) { 614 defq = dq[0]; 615 dq_norss = true; 616 } else if (nactive[0] < nactive[1]) { 617 defq = dq[1]; 618 dq_norss = true; 619 } else { 620 defq = dq[0]; 621 dq_norss = false; 622 } 623 624 i = 0; 625 nm_rxq = &sc->sge.nm_rxq[vi->first_nm_rxq]; 626 while (i < vi->rss_size / 2) { 627 for (j = 0; j < (vi->nnmrxq + 1) / 2; j++) { 628 nm_state = atomic_load_int(&nm_rxq[j].nm_state); 629 kring = na->rx_rings[nm_rxq[j].nid]; 630 if ((nm_state == NM_OFF && 631 !nm_kring_pending_on(kring)) || 632 (nm_state == NM_ON && 633 nm_kring_pending_off(kring))) { 634 continue; 635 } 636 MPASS(nm_rxq[j].iq_cntxt_id != INVALID_NM_RXQ_CNTXT_ID); 637 if (dq_norss && defq == nm_rxq[j].iq_abs_id) 638 continue; 639 vi->nm_rss[i++] = nm_rxq[j].iq_abs_id; 640 if (i == vi->rss_size / 2) 641 break; 642 } 643 } 644 while (i < vi->rss_size) { 645 for (j = (vi->nnmrxq + 1) / 2; j < vi->nnmrxq; j++) { 646 nm_state = atomic_load_int(&nm_rxq[j].nm_state); 647 kring = na->rx_rings[nm_rxq[j].nid]; 648 if ((nm_state == NM_OFF && 649 !nm_kring_pending_on(kring)) || 650 (nm_state == NM_ON && 651 nm_kring_pending_off(kring))) { 652 continue; 653 } 654 MPASS(nm_rxq[j].iq_cntxt_id != INVALID_NM_RXQ_CNTXT_ID); 655 if (dq_norss && defq == nm_rxq[j].iq_abs_id) 656 continue; 657 vi->nm_rss[i++] = nm_rxq[j].iq_abs_id; 658 if (i == vi->rss_size) 659 break; 660 } 661 } 662 663 rc = -t4_config_rss_range(sc, sc->mbox, vi->viid, 0, vi->rss_size, 664 vi->nm_rss, vi->rss_size); 665 if (rc != 0) 666 if_printf(ifp, "netmap split_rss_config failed: %d\n", rc); 667 668 rc = -t4_config_vi_rss(sc, sc->mbox, vi->viid, vi->hashen, defq, 0, 0); 669 if (rc != 0) 670 if_printf(ifp, "netmap defaultq config failed: %d\n", rc); 671 672 return (rc); 673 } 674 675 static inline int 676 cxgbe_netmap_rss(struct adapter *sc, struct vi_info *vi, if_t ifp, 677 struct netmap_adapter *na) 678 { 679 680 if (nm_split_rss == 0 || vi->nnmrxq == 1) 681 return (cxgbe_netmap_simple_rss(sc, vi, ifp, na)); 682 else 683 return (cxgbe_netmap_split_rss(sc, vi, ifp, na)); 684 } 685 686 static int 687 cxgbe_netmap_on(struct adapter *sc, struct vi_info *vi, if_t ifp, 688 struct netmap_adapter *na) 689 { 690 struct netmap_slot *slot; 691 struct netmap_kring *kring; 692 struct sge_nm_rxq *nm_rxq; 693 struct sge_nm_txq *nm_txq; 694 int i, j, hwidx; 695 struct rx_buf_info *rxb; 696 697 ASSERT_SYNCHRONIZED_OP(sc); 698 MPASS(vi->nnmrxq > 0); 699 MPASS(vi->nnmtxq > 0); 700 701 if ((vi->flags & VI_INIT_DONE) == 0 || 702 (if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) { 703 if_printf(ifp, "cannot enable netmap operation because " 704 "interface is not UP.\n"); 705 return (EAGAIN); 706 } 707 708 rxb = &sc->sge.rx_buf_info[0]; 709 for (i = 0; i < SW_ZONE_SIZES; i++, rxb++) { 710 if (rxb->size1 == NETMAP_BUF_SIZE(na)) { 711 hwidx = rxb->hwidx1; 712 break; 713 } 714 if (rxb->size2 == NETMAP_BUF_SIZE(na)) { 715 hwidx = rxb->hwidx2; 716 break; 717 } 718 } 719 if (i >= SW_ZONE_SIZES) { 720 if_printf(ifp, "no hwidx for netmap buffer size %d.\n", 721 NETMAP_BUF_SIZE(na)); 722 return (ENXIO); 723 } 724 725 /* Must set caps before calling netmap_reset */ 726 nm_set_native_flags(na); 727 728 for_each_nm_rxq(vi, i, nm_rxq) { 729 kring = na->rx_rings[nm_rxq->nid]; 730 if (!nm_kring_pending_on(kring)) 731 continue; 732 733 alloc_nm_rxq_hwq(vi, nm_rxq); 734 nm_rxq->fl_hwidx = hwidx; 735 slot = netmap_reset(na, NR_RX, i, 0); 736 MPASS(slot != NULL); /* XXXNM: error check, not assert */ 737 738 /* We deal with 8 bufs at a time */ 739 MPASS((na->num_rx_desc & 7) == 0); 740 MPASS(na->num_rx_desc == nm_rxq->fl_sidx); 741 for (j = 0; j < nm_rxq->fl_sidx; j++) { 742 uint64_t ba; 743 744 PNMB(na, &slot[j], &ba); 745 MPASS(ba != 0); 746 nm_rxq->fl_desc[j] = htobe64(ba | hwidx); 747 } 748 j = nm_rxq->fl_pidx = nm_rxq->fl_sidx - 8; 749 MPASS((j & 7) == 0); 750 j /= 8; /* driver pidx to hardware pidx */ 751 wmb(); 752 t4_write_reg(sc, sc->sge_kdoorbell_reg, 753 nm_rxq->fl_db_val | V_PIDX(j)); 754 755 (void) atomic_cmpset_int(&nm_rxq->nm_state, NM_OFF, NM_ON); 756 } 757 758 for_each_nm_txq(vi, i, nm_txq) { 759 kring = na->tx_rings[nm_txq->nid]; 760 if (!nm_kring_pending_on(kring)) 761 continue; 762 763 alloc_nm_txq_hwq(vi, nm_txq); 764 slot = netmap_reset(na, NR_TX, i, 0); 765 MPASS(slot != NULL); /* XXXNM: error check, not assert */ 766 } 767 768 if (vi->nm_rss == NULL) { 769 vi->nm_rss = malloc(vi->rss_size * sizeof(uint16_t), M_CXGBE, 770 M_ZERO | M_WAITOK); 771 } 772 773 return (cxgbe_netmap_rss(sc, vi, ifp, na)); 774 } 775 776 static int 777 cxgbe_netmap_off(struct adapter *sc, struct vi_info *vi, if_t ifp, 778 struct netmap_adapter *na) 779 { 780 struct netmap_kring *kring; 781 int rc, i, nm_state, nactive; 782 struct sge_nm_txq *nm_txq; 783 struct sge_nm_rxq *nm_rxq; 784 785 ASSERT_SYNCHRONIZED_OP(sc); 786 MPASS(vi->nnmrxq > 0); 787 MPASS(vi->nnmtxq > 0); 788 789 if (!nm_netmap_on(na)) 790 return (0); 791 792 if ((vi->flags & VI_INIT_DONE) == 0) 793 return (0); 794 795 /* First remove the queues that are stopping from the RSS table. */ 796 rc = cxgbe_netmap_rss(sc, vi, ifp, na); 797 if (rc != 0) 798 return (rc); /* error message logged already. */ 799 800 for_each_nm_txq(vi, i, nm_txq) { 801 kring = na->tx_rings[nm_txq->nid]; 802 if (!nm_kring_pending_off(kring)) 803 continue; 804 MPASS(nm_txq->cntxt_id != INVALID_NM_TXQ_CNTXT_ID); 805 806 rc = -t4_eth_eq_stop(sc, sc->mbox, sc->pf, 0, nm_txq->cntxt_id); 807 if (rc != 0) { 808 device_printf(vi->dev, 809 "failed to stop nm_txq[%d]: %d.\n", i, rc); 810 return (rc); 811 } 812 813 /* XXX: netmap, not the driver, should do this. */ 814 kring->rhead = kring->rcur = kring->nr_hwcur = 0; 815 kring->rtail = kring->nr_hwtail = kring->nkr_num_slots - 1; 816 } 817 nactive = 0; 818 for_each_nm_rxq(vi, i, nm_rxq) { 819 nm_state = atomic_load_int(&nm_rxq->nm_state); 820 kring = na->rx_rings[nm_rxq->nid]; 821 if (nm_state != NM_OFF && !nm_kring_pending_off(kring)) 822 nactive++; 823 if (!nm_kring_pending_off(kring)) 824 continue; 825 MPASS(nm_state != NM_OFF); 826 MPASS(nm_rxq->iq_cntxt_id != INVALID_NM_RXQ_CNTXT_ID); 827 828 rc = -t4_iq_stop(sc, sc->mbox, sc->pf, 0, FW_IQ_TYPE_FL_INT_CAP, 829 nm_rxq->iq_cntxt_id, nm_rxq->fl_cntxt_id, 0xffff); 830 if (rc != 0) { 831 device_printf(vi->dev, 832 "failed to stop nm_rxq[%d]: %d.\n", i, rc); 833 return (rc); 834 } 835 836 while (!atomic_cmpset_int(&nm_rxq->nm_state, NM_ON, NM_OFF)) 837 pause("nmst", 1); 838 839 /* XXX: netmap, not the driver, should do this. */ 840 kring->rhead = kring->rcur = kring->nr_hwcur = 0; 841 kring->rtail = kring->nr_hwtail = 0; 842 } 843 netmap_krings_mode_commit(na, 0); 844 if (nactive == 0) 845 nm_clear_native_flags(na); 846 847 return (rc); 848 } 849 850 static int 851 cxgbe_netmap_reg(struct netmap_adapter *na, int on) 852 { 853 if_t ifp = na->ifp; 854 struct vi_info *vi = if_getsoftc(ifp); 855 struct adapter *sc = vi->adapter; 856 int rc; 857 858 rc = begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4nmreg"); 859 if (rc != 0) 860 return (rc); 861 if (on) 862 rc = cxgbe_netmap_on(sc, vi, ifp, na); 863 else 864 rc = cxgbe_netmap_off(sc, vi, ifp, na); 865 end_synchronized_op(sc, 0); 866 867 return (rc); 868 } 869 870 /* How many packets can a single type1 WR carry in n descriptors */ 871 static inline int 872 ndesc_to_npkt(const int n) 873 { 874 875 MPASS(n > 0 && n <= SGE_MAX_WR_NDESC); 876 877 return (n * 2 - 1); 878 } 879 #define MAX_NPKT_IN_TYPE1_WR (ndesc_to_npkt(SGE_MAX_WR_NDESC)) 880 881 /* 882 * Space (in descriptors) needed for a type1 WR (TX_PKTS or TX_PKTS2) that 883 * carries n packets 884 */ 885 static inline int 886 npkt_to_ndesc(const int n) 887 { 888 889 MPASS(n > 0 && n <= MAX_NPKT_IN_TYPE1_WR); 890 891 return ((n + 2) / 2); 892 } 893 894 /* 895 * Space (in 16B units) needed for a type1 WR (TX_PKTS or TX_PKTS2) that 896 * carries n packets 897 */ 898 static inline int 899 npkt_to_len16(const int n) 900 { 901 902 MPASS(n > 0 && n <= MAX_NPKT_IN_TYPE1_WR); 903 904 return (n * 2 + 1); 905 } 906 907 #define NMIDXDIFF(q, idx) IDXDIFF((q)->pidx, (q)->idx, (q)->sidx) 908 909 static void 910 ring_nm_txq_db(struct adapter *sc, struct sge_nm_txq *nm_txq) 911 { 912 int n; 913 u_int db = nm_txq->doorbells; 914 915 MPASS(nm_txq->pidx != nm_txq->dbidx); 916 917 n = NMIDXDIFF(nm_txq, dbidx); 918 if (n > 1) 919 clrbit(&db, DOORBELL_WCWR); 920 wmb(); 921 922 switch (ffs(db) - 1) { 923 case DOORBELL_UDB: 924 *nm_txq->udb = htole32(V_QID(nm_txq->udb_qid) | V_PIDX(n)); 925 break; 926 927 case DOORBELL_WCWR: { 928 volatile uint64_t *dst, *src; 929 930 /* 931 * Queues whose 128B doorbell segment fits in the page do not 932 * use relative qid (udb_qid is always 0). Only queues with 933 * doorbell segments can do WCWR. 934 */ 935 KASSERT(nm_txq->udb_qid == 0 && n == 1, 936 ("%s: inappropriate doorbell (0x%x, %d, %d) for nm_txq %p", 937 __func__, nm_txq->doorbells, n, nm_txq->pidx, nm_txq)); 938 939 dst = (volatile void *)((uintptr_t)nm_txq->udb + 940 UDBS_WR_OFFSET - UDBS_DB_OFFSET); 941 src = (void *)&nm_txq->desc[nm_txq->dbidx]; 942 while (src != (void *)&nm_txq->desc[nm_txq->dbidx + 1]) 943 *dst++ = *src++; 944 wmb(); 945 break; 946 } 947 948 case DOORBELL_UDBWC: 949 *nm_txq->udb = htole32(V_QID(nm_txq->udb_qid) | V_PIDX(n)); 950 wmb(); 951 break; 952 953 case DOORBELL_KDB: 954 t4_write_reg(sc, sc->sge_kdoorbell_reg, 955 V_QID(nm_txq->cntxt_id) | V_PIDX(n)); 956 break; 957 } 958 nm_txq->dbidx = nm_txq->pidx; 959 } 960 961 /* 962 * Write work requests to send 'npkt' frames and ring the doorbell to send them 963 * on their way. No need to check for wraparound. 964 */ 965 static void 966 cxgbe_nm_tx(struct adapter *sc, struct sge_nm_txq *nm_txq, 967 struct netmap_kring *kring, int npkt, int npkt_remaining) 968 { 969 struct netmap_ring *ring = kring->ring; 970 struct netmap_slot *slot; 971 const u_int lim = kring->nkr_num_slots - 1; 972 struct fw_eth_tx_pkts_wr *wr = (void *)&nm_txq->desc[nm_txq->pidx]; 973 uint16_t len; 974 uint64_t ba; 975 struct cpl_tx_pkt_core *cpl; 976 struct ulptx_sgl *usgl; 977 int i, n; 978 979 while (npkt) { 980 n = min(npkt, MAX_NPKT_IN_TYPE1_WR); 981 len = 0; 982 983 wr = (void *)&nm_txq->desc[nm_txq->pidx]; 984 wr->op_pkd = nm_txq->op_pkd; 985 wr->equiq_to_len16 = htobe32(V_FW_WR_LEN16(npkt_to_len16(n))); 986 wr->npkt = n; 987 wr->r3 = 0; 988 wr->type = 1; 989 cpl = (void *)(wr + 1); 990 991 for (i = 0; i < n; i++) { 992 slot = &ring->slot[kring->nr_hwcur]; 993 PNMB(kring->na, slot, &ba); 994 MPASS(ba != 0); 995 996 cpl->ctrl0 = nm_txq->cpl_ctrl0; 997 cpl->pack = 0; 998 cpl->len = htobe16(slot->len); 999 cpl->ctrl1 = nm_txcsum ? 0 : 1000 htobe64(F_TXPKT_IPCSUM_DIS | F_TXPKT_L4CSUM_DIS); 1001 1002 usgl = (void *)(cpl + 1); 1003 usgl->cmd_nsge = htobe32(V_ULPTX_CMD(ULP_TX_SC_DSGL) | 1004 V_ULPTX_NSGE(1)); 1005 usgl->len0 = htobe32(slot->len); 1006 usgl->addr0 = htobe64(ba + nm_get_offset(kring, slot)); 1007 1008 slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED); 1009 cpl = (void *)(usgl + 1); 1010 MPASS(slot->len + len <= UINT16_MAX); 1011 len += slot->len; 1012 kring->nr_hwcur = nm_next(kring->nr_hwcur, lim); 1013 } 1014 wr->plen = htobe16(len); 1015 1016 npkt -= n; 1017 nm_txq->pidx += npkt_to_ndesc(n); 1018 MPASS(nm_txq->pidx <= nm_txq->sidx); 1019 if (__predict_false(nm_txq->pidx == nm_txq->sidx)) { 1020 /* 1021 * This routine doesn't know how to write WRs that wrap 1022 * around. Make sure it wasn't asked to. 1023 */ 1024 MPASS(npkt == 0); 1025 nm_txq->pidx = 0; 1026 } 1027 1028 if (npkt == 0 && npkt_remaining == 0) { 1029 /* All done. */ 1030 if (lazy_tx_credit_flush == 0) { 1031 wr->equiq_to_len16 |= htobe32(F_FW_WR_EQUEQ | 1032 F_FW_WR_EQUIQ); 1033 nm_txq->equeqidx = nm_txq->pidx; 1034 nm_txq->equiqidx = nm_txq->pidx; 1035 } 1036 ring_nm_txq_db(sc, nm_txq); 1037 return; 1038 } 1039 1040 if (NMIDXDIFF(nm_txq, equiqidx) >= nm_txq->sidx / 2) { 1041 wr->equiq_to_len16 |= htobe32(F_FW_WR_EQUEQ | 1042 F_FW_WR_EQUIQ); 1043 nm_txq->equeqidx = nm_txq->pidx; 1044 nm_txq->equiqidx = nm_txq->pidx; 1045 } else if (NMIDXDIFF(nm_txq, equeqidx) >= 64) { 1046 wr->equiq_to_len16 |= htobe32(F_FW_WR_EQUEQ); 1047 nm_txq->equeqidx = nm_txq->pidx; 1048 } 1049 if (NMIDXDIFF(nm_txq, dbidx) >= 2 * SGE_MAX_WR_NDESC) 1050 ring_nm_txq_db(sc, nm_txq); 1051 } 1052 1053 /* Will get called again. */ 1054 MPASS(npkt_remaining); 1055 } 1056 1057 /* How many contiguous free descriptors starting at pidx */ 1058 static inline int 1059 contiguous_ndesc_available(struct sge_nm_txq *nm_txq) 1060 { 1061 1062 if (nm_txq->cidx > nm_txq->pidx) 1063 return (nm_txq->cidx - nm_txq->pidx - 1); 1064 else if (nm_txq->cidx > 0) 1065 return (nm_txq->sidx - nm_txq->pidx); 1066 else 1067 return (nm_txq->sidx - nm_txq->pidx - 1); 1068 } 1069 1070 static int 1071 reclaim_nm_tx_desc(struct sge_nm_txq *nm_txq) 1072 { 1073 struct sge_qstat *spg = (void *)&nm_txq->desc[nm_txq->sidx]; 1074 uint16_t hw_cidx = spg->cidx; /* snapshot */ 1075 struct fw_eth_tx_pkts_wr *wr; 1076 int n = 0; 1077 1078 hw_cidx = be16toh(hw_cidx); 1079 1080 while (nm_txq->cidx != hw_cidx) { 1081 wr = (void *)&nm_txq->desc[nm_txq->cidx]; 1082 1083 MPASS(wr->op_pkd == htobe32(V_FW_WR_OP(FW_ETH_TX_PKTS_WR)) || 1084 wr->op_pkd == htobe32(V_FW_WR_OP(FW_ETH_TX_PKTS2_WR))); 1085 MPASS(wr->type == 1); 1086 MPASS(wr->npkt > 0 && wr->npkt <= MAX_NPKT_IN_TYPE1_WR); 1087 1088 n += wr->npkt; 1089 nm_txq->cidx += npkt_to_ndesc(wr->npkt); 1090 1091 /* 1092 * We never sent a WR that wrapped around so the credits coming 1093 * back, WR by WR, should never cause the cidx to wrap around 1094 * either. 1095 */ 1096 MPASS(nm_txq->cidx <= nm_txq->sidx); 1097 if (__predict_false(nm_txq->cidx == nm_txq->sidx)) 1098 nm_txq->cidx = 0; 1099 } 1100 1101 return (n); 1102 } 1103 1104 static int 1105 cxgbe_netmap_txsync(struct netmap_kring *kring, int flags) 1106 { 1107 struct netmap_adapter *na = kring->na; 1108 if_t ifp = na->ifp; 1109 struct vi_info *vi = if_getsoftc(ifp); 1110 struct adapter *sc = vi->adapter; 1111 struct sge_nm_txq *nm_txq = &sc->sge.nm_txq[vi->first_nm_txq + kring->ring_id]; 1112 const u_int head = kring->rhead; 1113 u_int reclaimed = 0; 1114 int n, d, npkt_remaining, ndesc_remaining; 1115 1116 /* 1117 * Tx was at kring->nr_hwcur last time around and now we need to advance 1118 * to kring->rhead. Note that the driver's pidx moves independent of 1119 * netmap's kring->nr_hwcur (pidx counts descriptors and the relation 1120 * between descriptors and frames isn't 1:1). 1121 */ 1122 1123 npkt_remaining = head >= kring->nr_hwcur ? head - kring->nr_hwcur : 1124 kring->nkr_num_slots - kring->nr_hwcur + head; 1125 while (npkt_remaining) { 1126 reclaimed += reclaim_nm_tx_desc(nm_txq); 1127 ndesc_remaining = contiguous_ndesc_available(nm_txq); 1128 /* Can't run out of descriptors with packets still remaining */ 1129 MPASS(ndesc_remaining > 0); 1130 1131 /* # of desc needed to tx all remaining packets */ 1132 d = (npkt_remaining / MAX_NPKT_IN_TYPE1_WR) * SGE_MAX_WR_NDESC; 1133 if (npkt_remaining % MAX_NPKT_IN_TYPE1_WR) 1134 d += npkt_to_ndesc(npkt_remaining % MAX_NPKT_IN_TYPE1_WR); 1135 1136 if (d <= ndesc_remaining) 1137 n = npkt_remaining; 1138 else { 1139 /* Can't send all, calculate how many can be sent */ 1140 n = (ndesc_remaining / SGE_MAX_WR_NDESC) * 1141 MAX_NPKT_IN_TYPE1_WR; 1142 if (ndesc_remaining % SGE_MAX_WR_NDESC) 1143 n += ndesc_to_npkt(ndesc_remaining % SGE_MAX_WR_NDESC); 1144 } 1145 1146 /* Send n packets and update nm_txq->pidx and kring->nr_hwcur */ 1147 npkt_remaining -= n; 1148 cxgbe_nm_tx(sc, nm_txq, kring, n, npkt_remaining); 1149 } 1150 MPASS(npkt_remaining == 0); 1151 MPASS(kring->nr_hwcur == head); 1152 MPASS(nm_txq->dbidx == nm_txq->pidx); 1153 1154 /* 1155 * Second part: reclaim buffers for completed transmissions. 1156 */ 1157 if (reclaimed || flags & NAF_FORCE_RECLAIM || nm_kr_txempty(kring)) { 1158 reclaimed += reclaim_nm_tx_desc(nm_txq); 1159 kring->nr_hwtail += reclaimed; 1160 if (kring->nr_hwtail >= kring->nkr_num_slots) 1161 kring->nr_hwtail -= kring->nkr_num_slots; 1162 } 1163 1164 return (0); 1165 } 1166 1167 static int 1168 cxgbe_netmap_rxsync(struct netmap_kring *kring, int flags) 1169 { 1170 struct netmap_adapter *na = kring->na; 1171 struct netmap_ring *ring = kring->ring; 1172 if_t ifp = na->ifp; 1173 struct vi_info *vi = if_getsoftc(ifp); 1174 struct adapter *sc = vi->adapter; 1175 struct sge_nm_rxq *nm_rxq = &sc->sge.nm_rxq[vi->first_nm_rxq + kring->ring_id]; 1176 u_int const head = kring->rhead; 1177 u_int n; 1178 int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR; 1179 1180 if (black_hole) 1181 return (0); /* No updates ever. */ 1182 1183 if (netmap_no_pendintr || force_update) { 1184 kring->nr_hwtail = atomic_load_acq_32(&nm_rxq->fl_cidx); 1185 kring->nr_kflags &= ~NKR_PENDINTR; 1186 } 1187 1188 if (nm_rxq->fl_db_saved > 0 && starve_fl == 0) { 1189 wmb(); 1190 t4_write_reg(sc, sc->sge_kdoorbell_reg, 1191 nm_rxq->fl_db_val | V_PIDX(nm_rxq->fl_db_saved)); 1192 nm_rxq->fl_db_saved = 0; 1193 } 1194 1195 /* Userspace done with buffers from kring->nr_hwcur to head */ 1196 n = head >= kring->nr_hwcur ? head - kring->nr_hwcur : 1197 kring->nkr_num_slots - kring->nr_hwcur + head; 1198 n &= ~7U; 1199 if (n > 0) { 1200 u_int fl_pidx = nm_rxq->fl_pidx; 1201 struct netmap_slot *slot = &ring->slot[fl_pidx]; 1202 uint64_t ba; 1203 int i, dbinc = 0, hwidx = nm_rxq->fl_hwidx; 1204 1205 /* 1206 * We always deal with 8 buffers at a time. We must have 1207 * stopped at an 8B boundary (fl_pidx) last time around and we 1208 * must have a multiple of 8B buffers to give to the freelist. 1209 */ 1210 MPASS((fl_pidx & 7) == 0); 1211 MPASS((n & 7) == 0); 1212 1213 IDXINCR(kring->nr_hwcur, n, kring->nkr_num_slots); 1214 IDXINCR(nm_rxq->fl_pidx, n, nm_rxq->fl_sidx2); 1215 1216 while (n > 0) { 1217 for (i = 0; i < 8; i++, fl_pidx++, slot++) { 1218 PNMB(na, slot, &ba); 1219 MPASS(ba != 0); 1220 nm_rxq->fl_desc[fl_pidx] = htobe64(ba | hwidx); 1221 slot->flags &= ~NS_BUF_CHANGED; 1222 MPASS(fl_pidx <= nm_rxq->fl_sidx2); 1223 } 1224 n -= 8; 1225 if (fl_pidx == nm_rxq->fl_sidx2) { 1226 fl_pidx = 0; 1227 slot = &ring->slot[0]; 1228 } 1229 if (++dbinc == nm_rxq->fl_db_threshold) { 1230 wmb(); 1231 if (starve_fl) 1232 nm_rxq->fl_db_saved += dbinc; 1233 else { 1234 t4_write_reg(sc, sc->sge_kdoorbell_reg, 1235 nm_rxq->fl_db_val | V_PIDX(dbinc)); 1236 } 1237 dbinc = 0; 1238 } 1239 } 1240 MPASS(nm_rxq->fl_pidx == fl_pidx); 1241 1242 if (dbinc > 0) { 1243 wmb(); 1244 if (starve_fl) 1245 nm_rxq->fl_db_saved += dbinc; 1246 else { 1247 t4_write_reg(sc, sc->sge_kdoorbell_reg, 1248 nm_rxq->fl_db_val | V_PIDX(dbinc)); 1249 } 1250 } 1251 } 1252 1253 return (0); 1254 } 1255 1256 void 1257 cxgbe_nm_attach(struct vi_info *vi) 1258 { 1259 struct port_info *pi; 1260 struct adapter *sc; 1261 struct netmap_adapter na; 1262 1263 MPASS(vi->nnmrxq > 0); 1264 MPASS(vi->ifp != NULL); 1265 1266 pi = vi->pi; 1267 sc = pi->adapter; 1268 1269 bzero(&na, sizeof(na)); 1270 1271 na.ifp = vi->ifp; 1272 na.na_flags = NAF_BDG_MAYSLEEP | NAF_OFFSETS; 1273 1274 /* Netmap doesn't know about the space reserved for the status page. */ 1275 na.num_tx_desc = vi->qsize_txq - sc->params.sge.spg_len / EQ_ESIZE; 1276 1277 /* 1278 * The freelist's cidx/pidx drives netmap's rx cidx/pidx. So 1279 * num_rx_desc is based on the number of buffers that can be held in the 1280 * freelist, and not the number of entries in the iq. (These two are 1281 * not exactly the same due to the space taken up by the status page). 1282 */ 1283 na.num_rx_desc = rounddown(vi->qsize_rxq, 8); 1284 na.nm_txsync = cxgbe_netmap_txsync; 1285 na.nm_rxsync = cxgbe_netmap_rxsync; 1286 na.nm_register = cxgbe_netmap_reg; 1287 na.num_tx_rings = vi->nnmtxq; 1288 na.num_rx_rings = vi->nnmrxq; 1289 na.rx_buf_maxsize = MAX_MTU + sc->params.sge.fl_pktshift; 1290 netmap_attach(&na); /* This adds IFCAP_NETMAP to if_capabilities */ 1291 } 1292 1293 void 1294 cxgbe_nm_detach(struct vi_info *vi) 1295 { 1296 1297 MPASS(vi->nnmrxq > 0); 1298 MPASS(vi->ifp != NULL); 1299 1300 netmap_detach(vi->ifp); 1301 } 1302 1303 static inline const void * 1304 unwrap_nm_fw6_msg(const struct cpl_fw6_msg *cpl) 1305 { 1306 1307 MPASS(cpl->type == FW_TYPE_RSSCPL || cpl->type == FW6_TYPE_RSSCPL); 1308 1309 /* data[0] is RSS header */ 1310 return (&cpl->data[1]); 1311 } 1312 1313 static void 1314 handle_nm_sge_egr_update(struct adapter *sc, if_t ifp, 1315 const struct cpl_sge_egr_update *egr) 1316 { 1317 uint32_t oq; 1318 struct sge_nm_txq *nm_txq; 1319 1320 oq = be32toh(egr->opcode_qid); 1321 MPASS(G_CPL_OPCODE(oq) == CPL_SGE_EGR_UPDATE); 1322 nm_txq = (void *)sc->sge.eqmap[G_EGR_QID(oq) - sc->sge.eq_start]; 1323 1324 netmap_tx_irq(ifp, nm_txq->nid); 1325 } 1326 1327 void 1328 service_nm_rxq(struct sge_nm_rxq *nm_rxq) 1329 { 1330 struct vi_info *vi = nm_rxq->vi; 1331 struct adapter *sc = vi->adapter; 1332 if_t ifp = vi->ifp; 1333 struct netmap_adapter *na = NA(ifp); 1334 struct netmap_kring *kring = na->rx_rings[nm_rxq->nid]; 1335 struct netmap_ring *ring = kring->ring; 1336 struct iq_desc *d = &nm_rxq->iq_desc[nm_rxq->iq_cidx]; 1337 const void *cpl; 1338 uint32_t lq; 1339 u_int work = 0; 1340 uint8_t opcode; 1341 uint32_t fl_cidx = atomic_load_acq_32(&nm_rxq->fl_cidx); 1342 u_int fl_credits = fl_cidx & 7; 1343 u_int ndesc = 0; /* desc processed since last cidx update */ 1344 u_int nframes = 0; /* frames processed since last netmap wakeup */ 1345 1346 while ((d->rsp.u.type_gen & F_RSPD_GEN) == nm_rxq->iq_gen) { 1347 1348 rmb(); 1349 1350 lq = be32toh(d->rsp.pldbuflen_qid); 1351 opcode = d->rss.opcode; 1352 cpl = &d->cpl[0]; 1353 1354 switch (G_RSPD_TYPE(d->rsp.u.type_gen)) { 1355 case X_RSPD_TYPE_FLBUF: 1356 1357 /* fall through */ 1358 1359 case X_RSPD_TYPE_CPL: 1360 MPASS(opcode < NUM_CPL_CMDS); 1361 1362 switch (opcode) { 1363 case CPL_FW4_MSG: 1364 case CPL_FW6_MSG: 1365 cpl = unwrap_nm_fw6_msg(cpl); 1366 /* fall through */ 1367 case CPL_SGE_EGR_UPDATE: 1368 handle_nm_sge_egr_update(sc, ifp, cpl); 1369 break; 1370 case CPL_RX_PKT: 1371 /* 1372 * Note that the application must have netmap 1373 * offsets (NETMAP_REQ_OPT_OFFSETS) enabled on 1374 * the ring or its rx will not work correctly 1375 * when fl_pktshift > 0. 1376 */ 1377 nm_write_offset(kring, &ring->slot[fl_cidx], 1378 sc->params.sge.fl_pktshift); 1379 ring->slot[fl_cidx].len = G_RSPD_LEN(lq) - 1380 sc->params.sge.fl_pktshift; 1381 ring->slot[fl_cidx].flags = 0; 1382 nframes++; 1383 if (!(lq & F_RSPD_NEWBUF)) { 1384 MPASS(black_hole == 2); 1385 break; 1386 } 1387 fl_credits++; 1388 if (__predict_false(++fl_cidx == nm_rxq->fl_sidx)) 1389 fl_cidx = 0; 1390 break; 1391 default: 1392 panic("%s: unexpected opcode 0x%x on nm_rxq %p", 1393 __func__, opcode, nm_rxq); 1394 } 1395 break; 1396 1397 case X_RSPD_TYPE_INTR: 1398 /* Not equipped to handle forwarded interrupts. */ 1399 panic("%s: netmap queue received interrupt for iq %u\n", 1400 __func__, lq); 1401 1402 default: 1403 panic("%s: illegal response type %d on nm_rxq %p", 1404 __func__, G_RSPD_TYPE(d->rsp.u.type_gen), nm_rxq); 1405 } 1406 1407 d++; 1408 if (__predict_false(++nm_rxq->iq_cidx == nm_rxq->iq_sidx)) { 1409 nm_rxq->iq_cidx = 0; 1410 d = &nm_rxq->iq_desc[0]; 1411 nm_rxq->iq_gen ^= F_RSPD_GEN; 1412 } 1413 1414 if (__predict_false(++nframes == rx_nframes) && !black_hole) { 1415 atomic_store_rel_32(&nm_rxq->fl_cidx, fl_cidx); 1416 netmap_rx_irq(ifp, nm_rxq->nid, &work); 1417 nframes = 0; 1418 } 1419 1420 if (__predict_false(++ndesc == rx_ndesc)) { 1421 if (black_hole && fl_credits >= 8) { 1422 fl_credits /= 8; 1423 IDXINCR(nm_rxq->fl_pidx, fl_credits * 8, 1424 nm_rxq->fl_sidx); 1425 t4_write_reg(sc, sc->sge_kdoorbell_reg, 1426 nm_rxq->fl_db_val | V_PIDX(fl_credits)); 1427 fl_credits = fl_cidx & 7; 1428 } 1429 t4_write_reg(sc, sc->sge_gts_reg, 1430 V_CIDXINC(ndesc) | 1431 V_INGRESSQID(nm_rxq->iq_cntxt_id) | 1432 V_SEINTARM(V_QINTR_TIMER_IDX(X_TIMERREG_UPDATE_CIDX))); 1433 ndesc = 0; 1434 } 1435 } 1436 1437 atomic_store_rel_32(&nm_rxq->fl_cidx, fl_cidx); 1438 if (black_hole) { 1439 fl_credits /= 8; 1440 IDXINCR(nm_rxq->fl_pidx, fl_credits * 8, nm_rxq->fl_sidx); 1441 t4_write_reg(sc, sc->sge_kdoorbell_reg, 1442 nm_rxq->fl_db_val | V_PIDX(fl_credits)); 1443 } else if (nframes > 0) 1444 netmap_rx_irq(ifp, nm_rxq->nid, &work); 1445 1446 t4_write_reg(sc, sc->sge_gts_reg, V_CIDXINC(ndesc) | 1447 V_INGRESSQID((u32)nm_rxq->iq_cntxt_id) | 1448 V_SEINTARM(V_QINTR_TIMER_IDX(holdoff_tmr_idx))); 1449 } 1450 #endif 1451