1 /*
2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
5 * 1.0 of the CDDL.
6 *
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
10 */
11
12 /*
13 * This file is part of the Chelsio T4 support code.
14 *
15 * Copyright (C) 2010-2013 Chelsio Communications. All rights reserved.
16 *
17 * This program is distributed in the hope that it will be useful, but WITHOUT
18 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
19 * FITNESS FOR A PARTICULAR PURPOSE. See the LICENSE file included in this
20 * release for licensing terms and conditions.
21 */
22
23 /*
24 * Copyright 2025 Oxide Computer Company
25 */
26
27 #include <sys/ddi.h>
28 #include <sys/sunddi.h>
29 #include <sys/sunndi.h>
30 #include <sys/atomic.h>
31 #include <sys/dlpi.h>
32 #include <sys/pattr.h>
33 #include <sys/strsubr.h>
34 #include <sys/stream.h>
35 #include <sys/strsun.h>
36 #include <inet/ip.h>
37 #include <inet/tcp.h>
38
39 #include "common/common.h"
40 #include "common/t4_msg.h"
41 #include "common/t4_regs.h"
42 #include "common/t4_regs_values.h"
43
44 /* TODO: Tune. */
45 int rx_buf_size = 8192;
46 int tx_copy_threshold = 256;
47 uint16_t rx_copy_threshold = 256;
48
49 /* Used to track coalesced tx work request */
50 struct txpkts {
51 mblk_t *tail; /* head is in the software descriptor */
52 uint64_t *flitp; /* ptr to flit where next pkt should start */
53 uint8_t npkt; /* # of packets in this work request */
54 uint8_t nflits; /* # of flits used by this work request */
55 uint16_t plen; /* total payload (sum of all packets) */
56 };
57
58 /* All information needed to tx a frame */
59 struct txinfo {
60 uint32_t len; /* Total length of frame */
61 uint32_t flags; /* Checksum and LSO flags */
62 uint32_t mss; /* MSS for LSO */
63 uint8_t nsegs; /* # of segments in the SGL, 0 means imm. tx */
64 uint8_t nflits; /* # of flits needed for the SGL */
65 uint8_t hdls_used; /* # of DMA handles used */
66 uint32_t txb_used; /* txb_space used */
67 mac_ether_offload_info_t meoi; /* pkt hdr info for offloads */
68 struct ulptx_sgl sgl __attribute__((aligned(8)));
69 struct ulptx_sge_pair reserved[TX_SGL_SEGS / 2];
70 };
71
72 struct mblk_pair {
73 mblk_t *head, *tail;
74 };
75
76 struct rxbuf {
77 kmem_cache_t *cache; /* the kmem_cache this rxb came from */
78 ddi_dma_handle_t dhdl;
79 ddi_acc_handle_t ahdl;
80 caddr_t va; /* KVA of buffer */
81 uint64_t ba; /* bus address of buffer */
82 frtn_t freefunc;
83 uint_t buf_size;
84 volatile uint_t ref_cnt;
85 };
86
87 static int service_iq(struct sge_iq *iq, int budget);
88 static inline void init_iq(struct sge_iq *iq, struct adapter *sc, int tmr_idx,
89 int8_t pktc_idx, int qsize, uint8_t esize);
90 static inline void init_fl(struct sge_fl *fl, uint16_t qsize);
91 static int alloc_iq_fl(struct port_info *pi, struct sge_iq *iq,
92 struct sge_fl *fl, int intr_idx, int cong);
93 static int free_iq_fl(struct port_info *pi, struct sge_iq *iq,
94 struct sge_fl *fl);
95 static int alloc_rxq(struct port_info *pi, struct sge_rxq *rxq, int intr_idx,
96 int i);
97 static int free_rxq(struct port_info *pi, struct sge_rxq *rxq);
98 static int eth_eq_alloc(struct adapter *sc, struct port_info *pi,
99 struct sge_eq *eq);
100 static int alloc_eq(struct adapter *sc, struct port_info *pi,
101 struct sge_eq *eq);
102 static int free_eq(struct adapter *sc, struct sge_eq *eq);
103 static int alloc_txq(struct port_info *pi, struct sge_txq *txq, int idx);
104 static int free_txq(struct port_info *pi, struct sge_txq *txq);
105 static int alloc_dma_memory(struct adapter *sc, size_t len, int flags,
106 ddi_device_acc_attr_t *acc_attr, ddi_dma_attr_t *dma_attr,
107 ddi_dma_handle_t *dma_hdl, ddi_acc_handle_t *acc_hdl, uint64_t *pba,
108 caddr_t *pva);
109 static int free_dma_memory(ddi_dma_handle_t *dhdl, ddi_acc_handle_t *ahdl);
110 static int alloc_desc_ring(struct adapter *sc, size_t len, int rw,
111 ddi_dma_handle_t *dma_hdl, ddi_acc_handle_t *acc_hdl, uint64_t *pba,
112 caddr_t *pva);
113 static int free_desc_ring(ddi_dma_handle_t *dhdl, ddi_acc_handle_t *ahdl);
114 static int alloc_tx_copybuffer(struct adapter *sc, size_t len,
115 ddi_dma_handle_t *dma_hdl, ddi_acc_handle_t *acc_hdl, uint64_t *pba,
116 caddr_t *pva);
117 static inline bool is_new_response(const struct sge_iq *iq,
118 struct rsp_ctrl **ctrl);
119 static inline void iq_next(struct sge_iq *iq);
120 static int refill_fl(struct adapter *sc, struct sge_fl *fl, int nbufs);
121 static void refill_sfl(void *arg);
122 static void add_fl_to_sfl(struct adapter *sc, struct sge_fl *fl);
123 static void free_fl_bufs(struct sge_fl *fl);
124 static mblk_t *get_fl_payload(struct adapter *sc, struct sge_fl *fl,
125 uint32_t len_newbuf, int *fl_bufs_used);
126 static int get_frame_txinfo(struct sge_txq *txq, mblk_t **fp,
127 struct txinfo *txinfo, int sgl_only);
128 static inline int fits_in_txb(struct sge_txq *txq, int len, int *waste);
129 static inline int copy_into_txb(struct sge_txq *txq, mblk_t *m, int len,
130 struct txinfo *txinfo);
131 static inline void add_seg(struct txinfo *txinfo, uint64_t ba, uint32_t len);
132 static inline int add_mblk(struct sge_txq *txq, struct txinfo *txinfo,
133 mblk_t *m, int len);
134 static void free_txinfo_resources(struct sge_txq *txq, struct txinfo *txinfo);
135 static int add_to_txpkts(struct sge_txq *txq, struct txpkts *txpkts, mblk_t *m,
136 struct txinfo *txinfo);
137 static void write_txpkts_wr(struct sge_txq *txq, struct txpkts *txpkts);
138 static int write_txpkt_wr(struct port_info *pi, struct sge_txq *txq, mblk_t *m,
139 struct txinfo *txinfo);
140 static void t4_write_flush_wr(struct sge_txq *);
141 static inline void write_ulp_cpl_sgl(struct port_info *pi, struct sge_txq *txq,
142 struct txpkts *txpkts, struct txinfo *txinfo);
143 static inline void copy_to_txd(struct sge_eq *eq, caddr_t from, caddr_t *to,
144 int len);
145 static void t4_tx_ring_db(struct sge_txq *);
146 static uint_t t4_tx_reclaim_descs(struct sge_txq *, uint_t, mblk_t **);
147 static int t4_eth_rx(struct sge_iq *iq, const struct rss_header *rss,
148 mblk_t *m);
149 static inline void ring_fl_db(struct adapter *sc, struct sge_fl *fl);
150 static kstat_t *setup_port_config_kstats(struct port_info *pi);
151 static kstat_t *setup_port_info_kstats(struct port_info *pi);
152 static kstat_t *setup_rxq_kstats(struct port_info *pi, struct sge_rxq *rxq,
153 int idx);
154 static int update_rxq_kstats(kstat_t *ksp, int rw);
155 static int update_port_info_kstats(kstat_t *ksp, int rw);
156 static kstat_t *setup_txq_kstats(struct port_info *pi, struct sge_txq *txq,
157 int idx);
158 static int update_txq_kstats(kstat_t *ksp, int rw);
159 static void t4_sge_egr_update(struct sge_iq *, const struct rss_header *);
160 static int t4_handle_cpl_msg(struct sge_iq *, const struct rss_header *,
161 mblk_t *);
162 static int t4_handle_fw_msg(struct sge_iq *, const struct rss_header *);
163
164 static kmem_cache_t *rxbuf_cache_create(struct rxbuf_cache_params *);
165 static struct rxbuf *rxbuf_alloc(kmem_cache_t *, int, uint_t);
166 static void rxbuf_free(struct rxbuf *);
167 static int rxbuf_ctor(void *, void *, int);
168 static void rxbuf_dtor(void *, void *);
169
170 static inline void *
t4_rss_payload(const struct rss_header * rss)171 t4_rss_payload(const struct rss_header *rss)
172 {
173 return ((void *)(&rss[1]));
174 }
175
176 static inline struct sge_iq **
t4_iqmap_slot(struct adapter * sc,uint_t cntxt_id)177 t4_iqmap_slot(struct adapter *sc, uint_t cntxt_id)
178 {
179 const uint_t idx = cntxt_id - sc->sge.iq_start;
180 VERIFY3U(idx, <, sc->sge.iqmap_sz);
181 return (&sc->sge.iqmap[idx]);
182 }
183
184 static inline struct sge_eq **
t4_eqmap_slot(struct adapter * sc,uint_t cntxt_id)185 t4_eqmap_slot(struct adapter *sc, uint_t cntxt_id)
186 {
187 const uint_t idx = cntxt_id - sc->sge.eq_start;
188 VERIFY3U(idx, <, sc->sge.eqmap_sz);
189 return (&sc->sge.eqmap[idx]);
190 }
191
192 static inline int
reclaimable(struct sge_eq * eq)193 reclaimable(struct sge_eq *eq)
194 {
195 unsigned int cidx;
196
197 cidx = eq->spg->cidx; /* stable snapshot */
198 cidx = be16_to_cpu(cidx);
199
200 if (cidx >= eq->cidx)
201 return (cidx - eq->cidx);
202 else
203 return (cidx + eq->cap - eq->cidx);
204 }
205
206 void
t4_sge_init(struct adapter * sc)207 t4_sge_init(struct adapter *sc)
208 {
209 struct driver_properties *p = &sc->props;
210 ddi_dma_attr_t *dma_attr;
211 ddi_device_acc_attr_t *acc_attr;
212 uint32_t sge_control, sge_conm_ctrl;
213 int egress_threshold;
214
215 /*
216 * Device access and DMA attributes for descriptor rings
217 */
218 acc_attr = &sc->sge.acc_attr_desc;
219 acc_attr->devacc_attr_version = DDI_DEVICE_ATTR_V0;
220 acc_attr->devacc_attr_endian_flags = DDI_NEVERSWAP_ACC;
221 acc_attr->devacc_attr_dataorder = DDI_STRICTORDER_ACC;
222
223 dma_attr = &sc->sge.dma_attr_desc;
224 dma_attr->dma_attr_version = DMA_ATTR_V0;
225 dma_attr->dma_attr_addr_lo = 0;
226 dma_attr->dma_attr_addr_hi = UINT64_MAX;
227 dma_attr->dma_attr_count_max = UINT64_MAX;
228 dma_attr->dma_attr_align = 512;
229 dma_attr->dma_attr_burstsizes = 0xfff;
230 dma_attr->dma_attr_minxfer = 1;
231 dma_attr->dma_attr_maxxfer = UINT64_MAX;
232 dma_attr->dma_attr_seg = UINT64_MAX;
233 dma_attr->dma_attr_sgllen = 1;
234 dma_attr->dma_attr_granular = 1;
235 dma_attr->dma_attr_flags = 0;
236
237 /*
238 * Device access and DMA attributes for tx buffers
239 */
240 acc_attr = &sc->sge.acc_attr_tx;
241 acc_attr->devacc_attr_version = DDI_DEVICE_ATTR_V0;
242 acc_attr->devacc_attr_endian_flags = DDI_NEVERSWAP_ACC;
243
244 dma_attr = &sc->sge.dma_attr_tx;
245 dma_attr->dma_attr_version = DMA_ATTR_V0;
246 dma_attr->dma_attr_addr_lo = 0;
247 dma_attr->dma_attr_addr_hi = UINT64_MAX;
248 dma_attr->dma_attr_count_max = UINT64_MAX;
249 dma_attr->dma_attr_align = 1;
250 dma_attr->dma_attr_burstsizes = 0xfff;
251 dma_attr->dma_attr_minxfer = 1;
252 dma_attr->dma_attr_maxxfer = UINT64_MAX;
253 dma_attr->dma_attr_seg = UINT64_MAX;
254 dma_attr->dma_attr_sgllen = TX_SGL_SEGS;
255 dma_attr->dma_attr_granular = 1;
256 dma_attr->dma_attr_flags = 0;
257
258 /*
259 * Ingress Padding Boundary and Egress Status Page Size are set up by
260 * t4_fixup_host_params().
261 */
262 sge_control = t4_read_reg(sc, A_SGE_CONTROL);
263 sc->sge.pktshift = G_PKTSHIFT(sge_control);
264 sc->sge.stat_len = (sge_control & F_EGRSTATUSPAGESIZE) ? 128 : 64;
265
266 /* t4_nex uses FLM packed mode */
267 sc->sge.fl_align = t4_fl_pkt_align(sc, true);
268
269 /*
270 * Device access and DMA attributes for rx buffers
271 */
272 sc->sge.rxb_params.dip = sc->dip;
273 sc->sge.rxb_params.buf_size = rx_buf_size;
274
275 acc_attr = &sc->sge.rxb_params.acc_attr_rx;
276 acc_attr->devacc_attr_version = DDI_DEVICE_ATTR_V0;
277 acc_attr->devacc_attr_endian_flags = DDI_NEVERSWAP_ACC;
278
279 dma_attr = &sc->sge.rxb_params.dma_attr_rx;
280 dma_attr->dma_attr_version = DMA_ATTR_V0;
281 dma_attr->dma_attr_addr_lo = 0;
282 dma_attr->dma_attr_addr_hi = UINT64_MAX;
283 dma_attr->dma_attr_count_max = UINT64_MAX;
284 /*
285 * Low 4 bits of an rx buffer address have a special meaning to the SGE
286 * and an rx buf cannot have an address with any of these bits set.
287 * FL_ALIGN is >= 32 so we're sure things are ok.
288 */
289 dma_attr->dma_attr_align = sc->sge.fl_align;
290 dma_attr->dma_attr_burstsizes = 0xfff;
291 dma_attr->dma_attr_minxfer = 1;
292 dma_attr->dma_attr_maxxfer = UINT64_MAX;
293 dma_attr->dma_attr_seg = UINT64_MAX;
294 dma_attr->dma_attr_sgllen = 1;
295 dma_attr->dma_attr_granular = 1;
296 dma_attr->dma_attr_flags = 0;
297
298 sc->sge.rxbuf_cache = rxbuf_cache_create(&sc->sge.rxb_params);
299
300 /*
301 * A FL with <= fl_starve_thres buffers is starving and a periodic
302 * timer will attempt to refill it. This needs to be larger than the
303 * SGE's Egress Congestion Threshold. If it isn't, then we can get
304 * stuck waiting for new packets while the SGE is waiting for us to
305 * give it more Free List entries. (Note that the SGE's Egress
306 * Congestion Threshold is in units of 2 Free List pointers.) For T4,
307 * there was only a single field to control this. For T5 there's the
308 * original field which now only applies to Unpacked Mode Free List
309 * buffers and a new field which only applies to Packed Mode Free List
310 * buffers.
311 */
312
313 sge_conm_ctrl = t4_read_reg(sc, A_SGE_CONM_CTRL);
314 switch (CHELSIO_CHIP_VERSION(sc->params.chip)) {
315 case CHELSIO_T4:
316 egress_threshold = G_EGRTHRESHOLD(sge_conm_ctrl);
317 break;
318 case CHELSIO_T5:
319 egress_threshold = G_EGRTHRESHOLDPACKING(sge_conm_ctrl);
320 break;
321 case CHELSIO_T6:
322 default:
323 egress_threshold = G_T6_EGRTHRESHOLDPACKING(sge_conm_ctrl);
324 }
325 sc->sge.fl_starve_threshold = 2*egress_threshold + 1;
326
327 t4_write_reg(sc, A_SGE_FL_BUFFER_SIZE0, rx_buf_size);
328
329 t4_write_reg(sc, A_SGE_INGRESS_RX_THRESHOLD,
330 V_THRESHOLD_0(p->holdoff_pktcnt[0]) |
331 V_THRESHOLD_1(p->holdoff_pktcnt[1]) |
332 V_THRESHOLD_2(p->holdoff_pktcnt[2]) |
333 V_THRESHOLD_3(p->holdoff_pktcnt[3]));
334
335 t4_write_reg(sc, A_SGE_TIMER_VALUE_0_AND_1,
336 V_TIMERVALUE0(us_to_core_ticks(sc, p->holdoff_timer_us[0])) |
337 V_TIMERVALUE1(us_to_core_ticks(sc, p->holdoff_timer_us[1])));
338 t4_write_reg(sc, A_SGE_TIMER_VALUE_2_AND_3,
339 V_TIMERVALUE2(us_to_core_ticks(sc, p->holdoff_timer_us[2])) |
340 V_TIMERVALUE3(us_to_core_ticks(sc, p->holdoff_timer_us[3])));
341 t4_write_reg(sc, A_SGE_TIMER_VALUE_4_AND_5,
342 V_TIMERVALUE4(us_to_core_ticks(sc, p->holdoff_timer_us[4])) |
343 V_TIMERVALUE5(us_to_core_ticks(sc, p->holdoff_timer_us[5])));
344 }
345
346 static inline int
first_vector(struct port_info * pi)347 first_vector(struct port_info *pi)
348 {
349 struct adapter *sc = pi->adapter;
350 int rc = T4_EXTRA_INTR, i;
351
352 if (sc->intr_count == 1)
353 return (0);
354
355 for_each_port(sc, i) {
356 struct port_info *p = sc->port[i];
357
358 if (i == pi->port_id)
359 break;
360
361 /*
362 * Not compiled with offload support and intr_count > 1. Only
363 * NIC queues exist and they'd better be taking direct
364 * interrupts.
365 */
366 ASSERT(!(sc->flags & TAF_INTR_FWD));
367 rc += p->nrxq;
368 }
369 return (rc);
370 }
371
372 /*
373 * Given an arbitrary "index," come up with an iq that can be used by other
374 * queues (of this port) for interrupt forwarding, SGE egress updates, etc.
375 * The iq returned is guaranteed to be something that takes direct interrupts.
376 */
377 static struct sge_iq *
port_intr_iq(struct port_info * pi,int idx)378 port_intr_iq(struct port_info *pi, int idx)
379 {
380 struct adapter *sc = pi->adapter;
381 struct sge *s = &sc->sge;
382 struct sge_iq *iq = NULL;
383
384 if (sc->intr_count == 1)
385 return (&sc->sge.fwq);
386
387 /*
388 * Not compiled with offload support and intr_count > 1. Only NIC
389 * queues exist and they'd better be taking direct interrupts.
390 */
391 ASSERT(!(sc->flags & TAF_INTR_FWD));
392
393 idx %= pi->nrxq;
394 iq = &s->rxq[pi->first_rxq + idx].iq;
395
396 return (iq);
397 }
398
399 int
t4_setup_port_queues(struct port_info * pi)400 t4_setup_port_queues(struct port_info *pi)
401 {
402 int rc = 0, i, intr_idx, j;
403 struct sge_rxq *rxq;
404 struct sge_txq *txq;
405 struct adapter *sc = pi->adapter;
406 struct driver_properties *p = &sc->props;
407
408 pi->ksp_config = setup_port_config_kstats(pi);
409 pi->ksp_info = setup_port_info_kstats(pi);
410
411 /* Interrupt vector to start from (when using multiple vectors) */
412 intr_idx = first_vector(pi);
413
414 /*
415 * First pass over all rx queues (NIC and TOE):
416 * a) initialize iq and fl
417 * b) allocate queue iff it will take direct interrupts.
418 */
419
420 for_each_rxq(pi, i, rxq) {
421
422 init_iq(&rxq->iq, sc, pi->tmr_idx, pi->pktc_idx, p->qsize_rxq,
423 RX_IQ_ESIZE);
424
425 init_fl(&rxq->fl, p->qsize_rxq / 8); /* 8 bufs in each entry */
426
427 if ((!(sc->flags & TAF_INTR_FWD)) ||
428 (sc->intr_count > 1 && pi->nrxq)) {
429 rxq->iq.flags |= IQ_INTR;
430 rc = alloc_rxq(pi, rxq, intr_idx, i);
431 if (rc != 0)
432 goto done;
433 intr_idx++;
434 }
435
436 }
437
438 /*
439 * Second pass over all rx queues (NIC and TOE). The queues forwarding
440 * their interrupts are allocated now.
441 */
442 j = 0;
443 for_each_rxq(pi, i, rxq) {
444 if (rxq->iq.flags & IQ_INTR)
445 continue;
446
447 intr_idx = port_intr_iq(pi, j)->abs_id;
448
449 rc = alloc_rxq(pi, rxq, intr_idx, i);
450 if (rc != 0)
451 goto done;
452 j++;
453 }
454
455 /*
456 * Now the tx queues. Only one pass needed.
457 */
458 j = 0;
459 for_each_txq(pi, i, txq) {
460 txq->eq.flags = 0;
461 txq->eq.tx_chan = pi->tx_chan;
462 txq->eq.qsize = p->qsize_txq;
463
464 /* For now, direct all TX queue notifications to the FW IQ. */
465 txq->eq.iqid = sc->sge.fwq.cntxt_id;
466
467 rc = alloc_txq(pi, txq, i);
468 if (rc != 0)
469 goto done;
470 }
471
472 done:
473 if (rc != 0)
474 (void) t4_teardown_port_queues(pi);
475
476 return (rc);
477 }
478
479 /*
480 * Idempotent
481 */
482 int
t4_teardown_port_queues(struct port_info * pi)483 t4_teardown_port_queues(struct port_info *pi)
484 {
485 int i;
486 struct sge_rxq *rxq;
487 struct sge_txq *txq;
488
489 if (pi->ksp_config != NULL) {
490 kstat_delete(pi->ksp_config);
491 pi->ksp_config = NULL;
492 }
493 if (pi->ksp_info != NULL) {
494 kstat_delete(pi->ksp_info);
495 pi->ksp_info = NULL;
496 }
497
498 for_each_txq(pi, i, txq) {
499 (void) free_txq(pi, txq);
500 }
501
502 for_each_rxq(pi, i, rxq) {
503 if ((rxq->iq.flags & IQ_INTR) == 0)
504 (void) free_rxq(pi, rxq);
505 }
506
507 /*
508 * Then take down the rx queues that take direct interrupts.
509 */
510
511 for_each_rxq(pi, i, rxq) {
512 if (rxq->iq.flags & IQ_INTR)
513 (void) free_rxq(pi, rxq);
514 }
515
516 return (0);
517 }
518
519 /* Deals with errors and forwarded interrupts */
520 uint_t
t4_intr_all(caddr_t arg1,caddr_t arg2)521 t4_intr_all(caddr_t arg1, caddr_t arg2)
522 {
523
524 (void) t4_intr_err(arg1, arg2);
525 (void) t4_intr(arg1, arg2);
526
527 return (DDI_INTR_CLAIMED);
528 }
529
530 /*
531 * We are counting on the values of t4_intr_config_t matching the register
532 * definitions from the shared code.
533 */
534 CTASSERT(TIC_SE_INTR_ARM == F_QINTR_CNT_EN);
535 CTASSERT(TIC_TIMER0 == V_QINTR_TIMER_IDX(X_TIMERREG_COUNTER0));
536 CTASSERT(TIC_TIMER5 == V_QINTR_TIMER_IDX(X_TIMERREG_COUNTER5));
537 CTASSERT(TIC_START_COUNTER == V_QINTR_TIMER_IDX(X_TIMERREG_RESTART_COUNTER));
538
539 void
t4_iq_update_intr_cfg(struct sge_iq * iq,uint8_t tmr_idx,int8_t pktc_idx)540 t4_iq_update_intr_cfg(struct sge_iq *iq, uint8_t tmr_idx, int8_t pktc_idx)
541 {
542 ASSERT((pktc_idx >= 0 && pktc_idx < SGE_NCOUNTERS) || pktc_idx == -1);
543 IQ_LOCK_ASSERT_OWNED(iq);
544 /*
545 * Strictly speaking, the IQ could be programmed with a TimerReg value
546 * of 6 (TICK_START_COUNTER), which is outside the range of SGE_NTIMERS.
547 *
548 * Since we do not currently offer an interface to configure such
549 * behavior, we assert its absence here for now.
550 */
551 ASSERT3U(tmr_idx, <, SGE_NTIMERS);
552
553 iq->intr_params = V_QINTR_TIMER_IDX(tmr_idx) |
554 ((pktc_idx != -1) ? TIC_SE_INTR_ARM : 0);
555
556 /* Update IQ for new packet count threshold, but only if enabled */
557 if (pktc_idx != iq->intr_pktc_idx && pktc_idx >= 0) {
558 const uint32_t param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DMAQ) |
559 V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DMAQ_IQ_INTCNTTHRESH) |
560 V_FW_PARAMS_PARAM_YZ(iq->cntxt_id);
561 const uint32_t val = pktc_idx;
562
563 struct adapter *sc = iq->adapter;
564 int rc =
565 -t4_set_params(sc, sc->mbox, sc->pf, 0, 1, ¶m, &val);
566 if (rc != 0) {
567 /* report error but carry on */
568 cxgb_printf(sc->dip, CE_WARN,
569 "failed to set intr pktcnt index for IQ %d: %d",
570 iq->cntxt_id, rc);
571 }
572 }
573 iq->intr_pktc_idx = pktc_idx;
574 }
575
576 void
t4_eq_update_dbq_timer(struct sge_eq * eq,struct port_info * pi)577 t4_eq_update_dbq_timer(struct sge_eq *eq, struct port_info *pi)
578 {
579 struct adapter *sc = pi->adapter;
580
581 const uint32_t param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DMAQ) |
582 V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DMAQ_EQ_TIMERIX) |
583 V_FW_PARAMS_PARAM_YZ(eq->cntxt_id);
584 const uint32_t val = pi->dbq_timer_idx;
585
586 int rc = -t4_set_params(sc, sc->mbox, sc->pf, 0, 1, ¶m, &val);
587 if (rc != 0) {
588 /* report error but carry on */
589 cxgb_printf(sc->dip, CE_WARN,
590 "failed to set DBQ timer index for EQ %d: %d",
591 eq->cntxt_id, rc);
592 }
593 }
594
595 /*
596 * Update (via GTS) the interrupt/timer config and CIDX value for a specified
597 * ingress queue.
598 */
599 void
t4_iq_gts_update(struct sge_iq * iq,t4_intr_config_t cfg,uint16_t cidx_incr)600 t4_iq_gts_update(struct sge_iq *iq, t4_intr_config_t cfg, uint16_t cidx_incr)
601 {
602 const uint32_t value =
603 V_INGRESSQID((uint32_t)iq->cntxt_id) |
604 V_CIDXINC((uint32_t)cidx_incr) |
605 V_SEINTARM((uint32_t)cfg);
606 t4_write_reg(iq->adapter, MYPF_REG(A_SGE_PF_GTS), value);
607 }
608
609 /*
610 * Update (via GTS) the CIDX value for a specified ingress queue.
611 *
612 * This _only_ increments CIDX and does not alter any other timer related state
613 * associated with the IQ.
614 */
615 static void
t4_iq_gts_incr(struct sge_iq * iq,uint16_t cidx_incr)616 t4_iq_gts_incr(struct sge_iq *iq, uint16_t cidx_incr)
617 {
618 if (cidx_incr == 0) {
619 return;
620 }
621
622 const uint32_t value =
623 V_INGRESSQID((uint32_t)iq->cntxt_id) |
624 V_CIDXINC((uint32_t)cidx_incr) |
625 V_SEINTARM((uint32_t)V_QINTR_TIMER_IDX(X_TIMERREG_UPDATE_CIDX));
626 t4_write_reg(iq->adapter, MYPF_REG(A_SGE_PF_GTS), value);
627 }
628
629 static void
t4_intr_rx_work(struct sge_iq * iq)630 t4_intr_rx_work(struct sge_iq *iq)
631 {
632 mblk_t *mp = NULL;
633 struct sge_rxq *rxq = iq_to_rxq(iq); /* Use iff iq is part of rxq */
634 RXQ_LOCK(rxq);
635 if (!iq->polling) {
636 mp = t4_ring_rx(rxq, iq->qsize/8);
637 t4_iq_gts_update(iq, iq->intr_params, 0);
638 }
639 RXQ_UNLOCK(rxq);
640 if (mp != NULL) {
641 mac_rx_ring(rxq->port->mh, rxq->ring_handle, mp,
642 rxq->ring_gen_num);
643 }
644 }
645
646 /* Deals with interrupts on the given ingress queue */
647 /* ARGSUSED */
648 uint_t
t4_intr(caddr_t arg1,caddr_t arg2)649 t4_intr(caddr_t arg1, caddr_t arg2)
650 {
651 struct sge_iq *iq = (struct sge_iq *)arg2;
652 int state;
653
654 /*
655 * Right now receive polling is only enabled for MSI-X and
656 * when we have enough msi-x vectors i.e no interrupt forwarding.
657 */
658 if (iq->adapter->props.multi_rings) {
659 t4_intr_rx_work(iq);
660 } else {
661 state = atomic_cas_uint(&iq->state, IQS_IDLE, IQS_BUSY);
662 if (state == IQS_IDLE) {
663 (void) service_iq(iq, 0);
664 (void) atomic_cas_uint(&iq->state, IQS_BUSY, IQS_IDLE);
665 }
666 }
667 return (DDI_INTR_CLAIMED);
668 }
669
670 /* Deals with error interrupts */
671 /* ARGSUSED */
672 uint_t
t4_intr_err(caddr_t arg1,caddr_t arg2)673 t4_intr_err(caddr_t arg1, caddr_t arg2)
674 {
675 struct adapter *sc = (struct adapter *)arg1;
676
677 t4_write_reg(sc, MYPF_REG(A_PCIE_PF_CLI), 0);
678 (void) t4_slow_intr_handler(sc);
679
680 return (DDI_INTR_CLAIMED);
681 }
682
683 /*
684 * t4_ring_rx - Process responses from an SGE response queue.
685 *
686 * This function processes responses from an SGE response queue up to the
687 * supplied budget. Responses include received packets as well as control
688 * messages from FW or HW.
689 *
690 * It returns a chain of mblks containing the received data, to be
691 * passed up to mac_rx_ring().
692 */
693 mblk_t *
t4_ring_rx(struct sge_rxq * rxq,int budget)694 t4_ring_rx(struct sge_rxq *rxq, int budget)
695 {
696 struct sge_iq *iq = &rxq->iq;
697 struct sge_fl *fl = &rxq->fl; /* Use iff IQ_HAS_FL */
698 struct adapter *sc = iq->adapter;
699 struct rsp_ctrl *ctrl;
700 int ndescs = 0, fl_bufs_used = 0;
701 mblk_t *mblk_head = NULL, **mblk_tail = &mblk_head;
702 uint32_t received_bytes = 0, pkt_len = 0;
703 uint16_t err_vec;
704
705 while (is_new_response(iq, &ctrl)) {
706 membar_consumer();
707
708 const uint8_t type_gen = ctrl->u.type_gen;
709 const uint8_t rsp_type = G_RSPD_TYPE(type_gen);
710 const bool overflowed = (type_gen & F_RSPD_QOVFL) != 0;
711 const uint32_t data_len = BE_32(ctrl->pldbuflen_qid);
712
713 iq->stats.sis_processed++;
714 if (overflowed) {
715 iq->stats.sis_overflow++;
716 }
717
718 const struct rss_header *rss =
719 (const struct rss_header *)iq->cdesc;
720 mblk_t *m = NULL;
721
722 switch (rsp_type) {
723 case X_RSPD_TYPE_FLBUF:
724
725 ASSERT(iq->flags & IQ_HAS_FL);
726
727 if (CPL_RX_PKT == rss->opcode) {
728 const struct cpl_rx_pkt *cpl =
729 t4_rss_payload(rss);
730 pkt_len = be16_to_cpu(cpl->len);
731
732 if (iq->polling &&
733 ((received_bytes + pkt_len) > budget))
734 goto done;
735
736 m = get_fl_payload(sc, fl, data_len,
737 &fl_bufs_used);
738 if (m == NULL)
739 goto done;
740
741 m->b_rptr += sc->sge.pktshift;
742 if (sc->params.tp.rx_pkt_encap) {
743 /* Enabled only in T6 config file */
744 err_vec = G_T6_COMPR_RXERR_VEC(
745 ntohs(cpl->err_vec));
746 } else {
747 err_vec = ntohs(cpl->err_vec);
748 }
749
750 const bool csum_ok = cpl->csum_calc && !err_vec;
751
752 /* TODO: what about cpl->ip_frag? */
753 if (csum_ok && !cpl->ip_frag) {
754 mac_hcksum_set(m, 0, 0, 0, 0xffff,
755 HCK_FULLCKSUM_OK | HCK_FULLCKSUM |
756 HCK_IPV4_HDRCKSUM_OK);
757 rxq->rxcsum++;
758 }
759 rxq->rxpkts++;
760 rxq->rxbytes += pkt_len;
761 received_bytes += pkt_len;
762
763 *mblk_tail = m;
764 mblk_tail = &m->b_next;
765
766 break;
767 }
768
769 m = get_fl_payload(sc, fl, data_len, &fl_bufs_used);
770 if (m == NULL)
771 goto done;
772 /* FALLTHROUGH */
773
774 case X_RSPD_TYPE_CPL:
775 (void) t4_handle_cpl_msg(iq, rss, m);
776 break;
777
778 default:
779 break;
780 }
781 iq_next(iq);
782 ++ndescs;
783 if (!iq->polling && (ndescs == budget))
784 break;
785 }
786
787 done:
788
789 t4_iq_gts_incr(iq, ndescs);
790
791 if ((fl_bufs_used > 0) || (iq->flags & IQ_HAS_FL)) {
792 int starved;
793 FL_LOCK(fl);
794 fl->needed += fl_bufs_used;
795 starved = refill_fl(sc, fl, fl->cap / 8);
796 FL_UNLOCK(fl);
797 if (starved)
798 add_fl_to_sfl(sc, fl);
799 }
800 return (mblk_head);
801 }
802
803 /*
804 * Deals with anything and everything on the given ingress queue.
805 */
806 static int
service_iq(struct sge_iq * iq,int budget)807 service_iq(struct sge_iq *iq, int budget)
808 {
809 struct sge_iq *q;
810 struct sge_rxq *rxq = iq_to_rxq(iq); /* Use iff iq is part of rxq */
811 struct sge_fl *fl = &rxq->fl; /* Use iff IQ_HAS_FL */
812 struct adapter *sc = iq->adapter;
813 struct rsp_ctrl *ctrl;
814 int ndescs = 0, fl_bufs_used = 0;
815 int starved;
816 STAILQ_HEAD(, sge_iq) iql = STAILQ_HEAD_INITIALIZER(iql);
817
818 const uint_t limit = (budget != 0) ? budget : iq->qsize / 8;
819
820 /*
821 * We always come back and check the descriptor ring for new indirect
822 * interrupts and other responses after running a single handler.
823 */
824 for (;;) {
825 while (is_new_response(iq, &ctrl)) {
826 membar_consumer();
827
828 const uint8_t type_gen = ctrl->u.type_gen;
829 const uint8_t rsp_type = G_RSPD_TYPE(type_gen);
830 const uint32_t dlen_qid = BE_32(ctrl->pldbuflen_qid);
831
832 mblk_t *m = NULL;
833 const struct rss_header *rss =
834 (const struct rss_header *)iq->cdesc;
835
836 switch (rsp_type) {
837 case X_RSPD_TYPE_FLBUF:
838
839 ASSERT(iq->flags & IQ_HAS_FL);
840
841 m = get_fl_payload(sc, fl, dlen_qid,
842 &fl_bufs_used);
843 if (m == NULL) {
844 /*
845 * Rearm the iq with a
846 * longer-than-default timer
847 */
848 t4_iq_gts_update(iq, TIC_TIMER5,
849 ndescs);
850 if (fl_bufs_used > 0) {
851 ASSERT(iq->flags & IQ_HAS_FL);
852 FL_LOCK(fl);
853 fl->needed += fl_bufs_used;
854 starved = refill_fl(sc, fl,
855 fl->cap / 8);
856 FL_UNLOCK(fl);
857 if (starved)
858 add_fl_to_sfl(sc, fl);
859 }
860 return (0);
861 }
862
863 /* FALLTHRU */
864 case X_RSPD_TYPE_CPL:
865 (void) t4_handle_cpl_msg(iq, rss, m);
866 break;
867
868 case X_RSPD_TYPE_INTR:
869
870 /*
871 * Interrupts should be forwarded only to queues
872 * that are not forwarding their interrupts.
873 * This means service_iq can recurse but only 1
874 * level deep.
875 */
876 ASSERT(budget == 0);
877
878 q = *t4_iqmap_slot(sc, dlen_qid);
879 if (atomic_cas_uint(&q->state, IQS_IDLE,
880 IQS_BUSY) == IQS_IDLE) {
881 if (service_iq(q, q->qsize / 8) == 0) {
882 (void) atomic_cas_uint(
883 &q->state, IQS_BUSY,
884 IQS_IDLE);
885 } else {
886 STAILQ_INSERT_TAIL(&iql, q,
887 link);
888 }
889 }
890 break;
891
892 default:
893 break;
894 }
895
896 iq_next(iq);
897 if (++ndescs == limit) {
898 t4_iq_gts_incr(iq, ndescs);
899 ndescs = 0;
900
901 if (fl_bufs_used > 0) {
902 ASSERT(iq->flags & IQ_HAS_FL);
903 FL_LOCK(fl);
904 fl->needed += fl_bufs_used;
905 (void) refill_fl(sc, fl, fl->cap / 8);
906 FL_UNLOCK(fl);
907 fl_bufs_used = 0;
908 }
909
910 if (budget != 0)
911 return (EINPROGRESS);
912 }
913 }
914
915 if (STAILQ_EMPTY(&iql) != 0)
916 break;
917
918 /*
919 * Process the head only, and send it to the back of the list if
920 * it's still not done.
921 */
922 q = STAILQ_FIRST(&iql);
923 STAILQ_REMOVE_HEAD(&iql, link);
924 if (service_iq(q, q->qsize / 8) == 0)
925 (void) atomic_cas_uint(&q->state, IQS_BUSY, IQS_IDLE);
926 else
927 STAILQ_INSERT_TAIL(&iql, q, link);
928 }
929
930 t4_iq_gts_update(iq, iq->intr_params, ndescs);
931
932 if (iq->flags & IQ_HAS_FL) {
933 FL_LOCK(fl);
934 fl->needed += fl_bufs_used;
935 starved = refill_fl(sc, fl, fl->cap / 4);
936 FL_UNLOCK(fl);
937 if (starved != 0)
938 add_fl_to_sfl(sc, fl);
939 }
940
941 return (0);
942 }
943
944 /* Per-packet header in a coalesced tx WR, before the SGL starts (in flits) */
945 #define TXPKTS_PKT_HDR ((\
946 sizeof (struct ulp_txpkt) + \
947 sizeof (struct ulptx_idata) + \
948 sizeof (struct cpl_tx_pkt_core)) / 8)
949
950 /* Header of a coalesced tx WR, before SGL of first packet (in flits) */
951 #define TXPKTS_WR_HDR (\
952 sizeof (struct fw_eth_tx_pkts_wr) / 8 + \
953 TXPKTS_PKT_HDR)
954
955 /* Header of a tx WR, before SGL of first packet (in flits) */
956 #define TXPKT_WR_HDR ((\
957 sizeof (struct fw_eth_tx_pkt_wr) + \
958 sizeof (struct cpl_tx_pkt_core)) / 8)
959
960 /* Header of a tx LSO WR, before SGL of first packet (in flits) */
961 #define TXPKT_LSO_WR_HDR ((\
962 sizeof (struct fw_eth_tx_pkt_wr) + \
963 sizeof (struct cpl_tx_pkt_lso_core) + \
964 sizeof (struct cpl_tx_pkt_core)) / 8)
965
966 mblk_t *
t4_eth_tx(void * arg,mblk_t * frame)967 t4_eth_tx(void *arg, mblk_t *frame)
968 {
969 struct sge_txq *txq = (struct sge_txq *)arg;
970 struct port_info *pi = txq->port;
971 struct sge_eq *eq = &txq->eq;
972 mblk_t *next_frame;
973 int rc, coalescing;
974 struct txpkts txpkts;
975 struct txinfo txinfo;
976
977 txpkts.npkt = 0; /* indicates there's nothing in txpkts */
978 coalescing = 0;
979
980 TXQ_LOCK(txq);
981 if (eq->avail < 8)
982 (void) t4_tx_reclaim_descs(txq, 8, NULL);
983 for (; frame; frame = next_frame) {
984
985 if (eq->avail < 8)
986 break;
987
988 next_frame = frame->b_next;
989 frame->b_next = NULL;
990
991 if (next_frame != NULL)
992 coalescing = 1;
993
994 rc = get_frame_txinfo(txq, &frame, &txinfo, coalescing);
995 if (rc != 0) {
996 if (rc == ENOMEM) {
997 /* Short of resources, suspend tx */
998 frame->b_next = next_frame;
999
1000 /*
1001 * Since we are out of memory for this packet,
1002 * rather than TX descriptors, enqueue an
1003 * flush work request. This will ensure that a
1004 * completion notification is delivered for this
1005 * EQ which will trigger a call to update the
1006 * state in mac to continue transmissions.
1007 */
1008 t4_write_flush_wr(txq);
1009
1010 break;
1011 }
1012
1013 /*
1014 * Unrecoverable error for this frame, throw it away and
1015 * move on to the next.
1016 */
1017 freemsg(frame);
1018 continue;
1019 }
1020
1021 if (coalescing != 0 &&
1022 add_to_txpkts(txq, &txpkts, frame, &txinfo) == 0) {
1023
1024 /* Successfully absorbed into txpkts */
1025
1026 write_ulp_cpl_sgl(pi, txq, &txpkts, &txinfo);
1027 goto doorbell;
1028 }
1029
1030 /*
1031 * We weren't coalescing to begin with, or current frame could
1032 * not be coalesced (add_to_txpkts flushes txpkts if a frame
1033 * given to it can't be coalesced). Either way there should be
1034 * nothing in txpkts.
1035 */
1036 ASSERT(txpkts.npkt == 0);
1037
1038 /* We're sending out individual frames now */
1039 coalescing = 0;
1040
1041 if (eq->avail < 8)
1042 (void) t4_tx_reclaim_descs(txq, 8, NULL);
1043 rc = write_txpkt_wr(pi, txq, frame, &txinfo);
1044 if (rc != 0) {
1045
1046 /* Short of hardware descriptors, suspend tx */
1047
1048 /*
1049 * This is an unlikely but expensive failure. We've
1050 * done all the hard work (DMA bindings etc.) and now we
1051 * can't send out the frame. What's worse, we have to
1052 * spend even more time freeing up everything in txinfo.
1053 */
1054 txq->qfull++;
1055 free_txinfo_resources(txq, &txinfo);
1056
1057 frame->b_next = next_frame;
1058 break;
1059 }
1060
1061 doorbell:
1062 /* Fewer and fewer doorbells as the queue fills up */
1063 if (eq->pending >= (1 << (fls(eq->qsize - eq->avail) / 2))) {
1064 txq->txbytes += txinfo.len;
1065 txq->txpkts++;
1066 t4_tx_ring_db(txq);
1067 }
1068 (void) t4_tx_reclaim_descs(txq, 32, NULL);
1069 }
1070
1071 if (txpkts.npkt > 0) {
1072 write_txpkts_wr(txq, &txpkts);
1073 }
1074
1075 if (eq->pending != 0) {
1076 t4_tx_ring_db(txq);
1077 }
1078
1079 if (frame != NULL) {
1080 eq->flags |= EQ_CORKED;
1081 }
1082
1083 (void) t4_tx_reclaim_descs(txq, eq->qsize, NULL);
1084 TXQ_UNLOCK(txq);
1085
1086 return (frame);
1087 }
1088
1089 static inline void
init_iq(struct sge_iq * iq,struct adapter * sc,int tmr_idx,int8_t pktc_idx,int qsize,uint8_t esize)1090 init_iq(struct sge_iq *iq, struct adapter *sc, int tmr_idx, int8_t pktc_idx,
1091 int qsize, uint8_t esize)
1092 {
1093 ASSERT(tmr_idx >= 0 && tmr_idx < SGE_NTIMERS);
1094 ASSERT(pktc_idx < SGE_NCOUNTERS); /* -ve is ok, means don't use */
1095
1096 iq->flags = 0;
1097 iq->adapter = sc;
1098 iq->intr_params = V_QINTR_TIMER_IDX(tmr_idx);
1099 iq->intr_pktc_idx = -1;
1100 if (pktc_idx >= 0) {
1101 iq->intr_params |= TIC_SE_INTR_ARM;
1102 iq->intr_pktc_idx = pktc_idx;
1103 }
1104 iq->qsize = roundup(qsize, 16); /* See FW_IQ_CMD/iqsize */
1105 iq->esize = max(esize, 16); /* See FW_IQ_CMD/iqesize */
1106 }
1107
1108 static inline void
init_fl(struct sge_fl * fl,uint16_t qsize)1109 init_fl(struct sge_fl *fl, uint16_t qsize)
1110 {
1111
1112 fl->qsize = qsize;
1113 fl->allocb_fail = 0;
1114 }
1115
1116 /*
1117 * Allocates the ring for an ingress queue and an optional freelist. If the
1118 * freelist is specified it will be allocated and then associated with the
1119 * ingress queue.
1120 *
1121 * Returns errno on failure. Resources allocated up to that point may still be
1122 * allocated. Caller is responsible for cleanup in case this function fails.
1123 *
1124 * If the ingress queue will take interrupts directly (iq->flags & IQ_INTR) then
1125 * the intr_idx specifies the vector, starting from 0. Otherwise it specifies
1126 * the index of the queue to which its interrupts will be forwarded.
1127 */
1128 static int
alloc_iq_fl(struct port_info * pi,struct sge_iq * iq,struct sge_fl * fl,int intr_idx,int cong)1129 alloc_iq_fl(struct port_info *pi, struct sge_iq *iq, struct sge_fl *fl,
1130 int intr_idx, int cong)
1131 {
1132 int rc, i;
1133 size_t len;
1134 struct fw_iq_cmd c;
1135 struct adapter *sc = iq->adapter;
1136 uint32_t v = 0;
1137
1138 len = iq->qsize * iq->esize;
1139 rc = alloc_desc_ring(sc, len, DDI_DMA_READ, &iq->dhdl, &iq->ahdl,
1140 &iq->ba, (caddr_t *)&iq->desc);
1141 if (rc != 0)
1142 return (rc);
1143
1144 bzero(&c, sizeof (c));
1145 c.op_to_vfn = cpu_to_be32(V_FW_CMD_OP(FW_IQ_CMD) | F_FW_CMD_REQUEST |
1146 F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_IQ_CMD_PFN(sc->pf) |
1147 V_FW_IQ_CMD_VFN(0));
1148
1149 c.alloc_to_len16 = cpu_to_be32(F_FW_IQ_CMD_ALLOC | F_FW_IQ_CMD_IQSTART |
1150 FW_LEN16(c));
1151
1152 /* Special handling for firmware event queue */
1153 if (iq == &sc->sge.fwq)
1154 v |= F_FW_IQ_CMD_IQASYNCH;
1155
1156 if (iq->flags & IQ_INTR)
1157 ASSERT(intr_idx < sc->intr_count);
1158 else
1159 v |= F_FW_IQ_CMD_IQANDST;
1160 v |= V_FW_IQ_CMD_IQANDSTINDEX(intr_idx);
1161
1162 /*
1163 * If the coalescing counter is not enabled for this IQ, use the 0
1164 * index, rather than populating it with the invalid -1 value.
1165 *
1166 * The selected index does not matter when the counter is not enabled
1167 * through the GTS flags.
1168 */
1169 const uint_t pktc_idx = (iq->intr_pktc_idx < 0) ? 0 : iq->intr_pktc_idx;
1170
1171 c.type_to_iqandstindex = cpu_to_be32(v |
1172 V_FW_IQ_CMD_TYPE(FW_IQ_TYPE_FL_INT_CAP) |
1173 V_FW_IQ_CMD_VIID(pi->viid) |
1174 V_FW_IQ_CMD_IQANUD(X_UPDATEDELIVERY_INTERRUPT));
1175 c.iqdroprss_to_iqesize = cpu_to_be16(V_FW_IQ_CMD_IQPCIECH(pi->tx_chan) |
1176 F_FW_IQ_CMD_IQGTSMODE |
1177 V_FW_IQ_CMD_IQINTCNTTHRESH(pktc_idx) |
1178 V_FW_IQ_CMD_IQESIZE(ilog2(iq->esize) - 4));
1179 c.iqsize = cpu_to_be16(iq->qsize);
1180 c.iqaddr = cpu_to_be64(iq->ba);
1181 if (cong >= 0) {
1182 const uint32_t iq_type =
1183 cong ? FW_IQ_IQTYPE_NIC : FW_IQ_IQTYPE_OFLD;
1184 c.iqns_to_fl0congen = BE_32(F_FW_IQ_CMD_IQFLINTCONGEN |
1185 V_FW_IQ_CMD_IQTYPE(iq_type));
1186 }
1187
1188 if (fl != NULL) {
1189 mutex_init(&fl->lock, NULL, MUTEX_DRIVER,
1190 DDI_INTR_PRI(sc->intr_pri));
1191 fl->flags |= FL_MTX;
1192
1193 len = fl->qsize * RX_FL_ESIZE;
1194 rc = alloc_desc_ring(sc, len, DDI_DMA_WRITE, &fl->dhdl,
1195 &fl->ahdl, &fl->ba, (caddr_t *)&fl->desc);
1196 if (rc != 0)
1197 return (rc);
1198
1199 /* Allocate space for one software descriptor per buffer. */
1200 fl->cap = (fl->qsize - sc->sge.stat_len / RX_FL_ESIZE) * 8;
1201 fl->sdesc = kmem_zalloc(sizeof (struct fl_sdesc) * fl->cap,
1202 KM_SLEEP);
1203 fl->needed = fl->cap;
1204 fl->lowat = roundup(sc->sge.fl_starve_threshold, 8);
1205
1206 c.iqns_to_fl0congen |=
1207 cpu_to_be32(V_FW_IQ_CMD_FL0HOSTFCMODE(X_HOSTFCMODE_NONE) |
1208 F_FW_IQ_CMD_FL0PACKEN | F_FW_IQ_CMD_FL0PADEN);
1209 if (cong >= 0) {
1210 c.iqns_to_fl0congen |=
1211 BE_32(V_FW_IQ_CMD_FL0CNGCHMAP(cong) |
1212 F_FW_IQ_CMD_FL0CONGCIF |
1213 F_FW_IQ_CMD_FL0CONGEN);
1214 }
1215
1216 /*
1217 * In T6, for egress queue type FL there is internal overhead
1218 * of 16B for header going into FLM module. Hence the maximum
1219 * allowed burst size is 448 bytes. For T4/T5, the hardware
1220 * doesn't coalesce fetch requests if more than 64 bytes of
1221 * Free List pointers are provided, so we use a 128-byte Fetch
1222 * Burst Minimum there (T6 implements coalescing so we can use
1223 * the smaller 64-byte value there).
1224 */
1225 const uint_t fbmin = t4_cver_ge(sc, CHELSIO_T6) ?
1226 X_FETCHBURSTMIN_64B_T6: X_FETCHBURSTMIN_128B;
1227 const uint_t fbmax = t4_cver_ge(sc, CHELSIO_T6) ?
1228 X_FETCHBURSTMAX_256B : X_FETCHBURSTMAX_512B;
1229 c.fl0dcaen_to_fl0cidxfthresh = cpu_to_be16(
1230 V_FW_IQ_CMD_FL0FBMIN(fbmin) |
1231 V_FW_IQ_CMD_FL0FBMAX(fbmax));
1232 c.fl0size = cpu_to_be16(fl->qsize);
1233 c.fl0addr = cpu_to_be64(fl->ba);
1234 }
1235
1236 rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof (c), &c);
1237 if (rc != 0) {
1238 cxgb_printf(sc->dip, CE_WARN,
1239 "failed to create ingress queue: %d", rc);
1240 return (rc);
1241 }
1242
1243 iq->cdesc = iq->desc;
1244 iq->cidx = 0;
1245 iq->gen = 1;
1246 iq->adapter = sc;
1247 iq->cntxt_id = be16_to_cpu(c.iqid);
1248 iq->abs_id = be16_to_cpu(c.physiqid);
1249 iq->flags |= IQ_ALLOCATED;
1250 mutex_init(&iq->lock, NULL, MUTEX_DRIVER,
1251 DDI_INTR_PRI(DDI_INTR_PRI(sc->intr_pri)));
1252 iq->polling = 0;
1253
1254 *t4_iqmap_slot(sc, iq->cntxt_id) = iq;
1255
1256 if (fl != NULL) {
1257 fl->cntxt_id = be16_to_cpu(c.fl0id);
1258 fl->pidx = fl->cidx = 0;
1259 fl->copy_threshold = rx_copy_threshold;
1260
1261 *t4_eqmap_slot(sc, fl->cntxt_id) = (struct sge_eq *)fl;
1262
1263 FL_LOCK(fl);
1264 (void) refill_fl(sc, fl, fl->lowat);
1265 FL_UNLOCK(fl);
1266
1267 iq->flags |= IQ_HAS_FL;
1268 }
1269
1270 if (t4_cver_ge(sc, CHELSIO_T5) && cong >= 0) {
1271 uint32_t param, val;
1272
1273 param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DMAQ) |
1274 V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DMAQ_CONM_CTXT) |
1275 V_FW_PARAMS_PARAM_YZ(iq->cntxt_id);
1276 if (cong == 0)
1277 val = 1 << 19;
1278 else {
1279 val = 2 << 19;
1280 for (i = 0; i < 4; i++) {
1281 if (cong & (1 << i))
1282 val |= 1 << (i << 2);
1283 }
1284 }
1285
1286 rc = -t4_set_params(sc, sc->mbox, sc->pf, 0, 1, ¶m, &val);
1287 if (rc != 0) {
1288 /* report error but carry on */
1289 cxgb_printf(sc->dip, CE_WARN,
1290 "failed to set congestion manager context for "
1291 "ingress queue %d: %d", iq->cntxt_id, rc);
1292 }
1293 }
1294
1295 /* Enable IQ interrupts */
1296 iq->state = IQS_IDLE;
1297 t4_iq_gts_update(iq, iq->intr_params, 0);
1298
1299 return (0);
1300 }
1301
1302 static int
free_iq_fl(struct port_info * pi,struct sge_iq * iq,struct sge_fl * fl)1303 free_iq_fl(struct port_info *pi, struct sge_iq *iq, struct sge_fl *fl)
1304 {
1305 int rc;
1306
1307 if (iq != NULL) {
1308 struct adapter *sc = iq->adapter;
1309 dev_info_t *dip;
1310
1311 dip = pi ? pi->dip : sc->dip;
1312 if (iq->flags & IQ_ALLOCATED) {
1313 rc = -t4_iq_free(sc, sc->mbox, sc->pf, 0,
1314 FW_IQ_TYPE_FL_INT_CAP, iq->cntxt_id,
1315 fl ? fl->cntxt_id : 0xffff, 0xffff);
1316 if (rc != 0) {
1317 cxgb_printf(dip, CE_WARN,
1318 "failed to free queue %p: %d", iq, rc);
1319 return (rc);
1320 }
1321 mutex_destroy(&iq->lock);
1322 iq->flags &= ~IQ_ALLOCATED;
1323 }
1324
1325 if (iq->desc != NULL) {
1326 (void) free_desc_ring(&iq->dhdl, &iq->ahdl);
1327 iq->desc = NULL;
1328 }
1329
1330 bzero(iq, sizeof (*iq));
1331 }
1332
1333 if (fl != NULL) {
1334 if (fl->sdesc != NULL) {
1335 FL_LOCK(fl);
1336 free_fl_bufs(fl);
1337 FL_UNLOCK(fl);
1338
1339 kmem_free(fl->sdesc, sizeof (struct fl_sdesc) *
1340 fl->cap);
1341 fl->sdesc = NULL;
1342 }
1343
1344 if (fl->desc != NULL) {
1345 (void) free_desc_ring(&fl->dhdl, &fl->ahdl);
1346 fl->desc = NULL;
1347 }
1348
1349 if (fl->flags & FL_MTX) {
1350 mutex_destroy(&fl->lock);
1351 fl->flags &= ~FL_MTX;
1352 }
1353
1354 bzero(fl, sizeof (struct sge_fl));
1355 }
1356
1357 return (0);
1358 }
1359
1360 int
t4_alloc_fwq(struct adapter * sc)1361 t4_alloc_fwq(struct adapter *sc)
1362 {
1363 int rc, intr_idx;
1364 struct sge_iq *fwq = &sc->sge.fwq;
1365
1366 init_iq(fwq, sc, sc->sge.fwq_tmr_idx, sc->sge.fwq_pktc_idx,
1367 FW_IQ_QSIZE, FW_IQ_ESIZE);
1368 fwq->flags |= IQ_INTR; /* always */
1369 intr_idx = sc->intr_count > 1 ? 1 : 0;
1370 rc = alloc_iq_fl(sc->port[0], fwq, NULL, intr_idx, -1);
1371 if (rc != 0) {
1372 cxgb_printf(sc->dip, CE_WARN,
1373 "failed to create firmware event queue: %d.", rc);
1374 return (rc);
1375 }
1376
1377 return (0);
1378 }
1379
1380 int
t4_free_fwq(struct adapter * sc)1381 t4_free_fwq(struct adapter *sc)
1382 {
1383 return (free_iq_fl(NULL, &sc->sge.fwq, NULL));
1384 }
1385
1386 static int
alloc_rxq(struct port_info * pi,struct sge_rxq * rxq,int intr_idx,int i)1387 alloc_rxq(struct port_info *pi, struct sge_rxq *rxq, int intr_idx, int i)
1388 {
1389 int rc;
1390
1391 rxq->port = pi;
1392 rc = alloc_iq_fl(pi, &rxq->iq, &rxq->fl, intr_idx,
1393 t4_get_tp_ch_map(pi->adapter, pi->tx_chan));
1394 if (rc != 0)
1395 return (rc);
1396
1397 rxq->ksp = setup_rxq_kstats(pi, rxq, i);
1398
1399 return (rc);
1400 }
1401
1402 static int
free_rxq(struct port_info * pi,struct sge_rxq * rxq)1403 free_rxq(struct port_info *pi, struct sge_rxq *rxq)
1404 {
1405 int rc;
1406
1407 if (rxq->ksp != NULL) {
1408 kstat_delete(rxq->ksp);
1409 rxq->ksp = NULL;
1410 }
1411
1412 rc = free_iq_fl(pi, &rxq->iq, &rxq->fl);
1413 if (rc == 0)
1414 bzero(&rxq->fl, sizeof (*rxq) - offsetof(struct sge_rxq, fl));
1415
1416 return (rc);
1417 }
1418
1419 static int
eth_eq_alloc(struct adapter * sc,struct port_info * pi,struct sge_eq * eq)1420 eth_eq_alloc(struct adapter *sc, struct port_info *pi, struct sge_eq *eq)
1421 {
1422 struct fw_eq_eth_cmd c = {
1423 .op_to_vfn = BE_32(
1424 V_FW_CMD_OP(FW_EQ_ETH_CMD) |
1425 F_FW_CMD_REQUEST | F_FW_CMD_WRITE | F_FW_CMD_EXEC |
1426 V_FW_EQ_ETH_CMD_PFN(sc->pf) |
1427 V_FW_EQ_ETH_CMD_VFN(0)),
1428 .alloc_to_len16 = BE_32(
1429 F_FW_EQ_ETH_CMD_ALLOC |
1430 F_FW_EQ_ETH_CMD_EQSTART |
1431 FW_LEN16(struct fw_eq_eth_cmd)),
1432 .autoequiqe_to_viid = BE_32(
1433 F_FW_EQ_ETH_CMD_AUTOEQUIQE |
1434 F_FW_EQ_ETH_CMD_AUTOEQUEQE |
1435 V_FW_EQ_ETH_CMD_VIID(pi->viid)),
1436 .fetchszm_to_iqid = BE_32(
1437 V_FW_EQ_ETH_CMD_HOSTFCMODE(X_HOSTFCMODE_BOTH) |
1438 V_FW_EQ_ETH_CMD_PCIECHN(eq->tx_chan) |
1439 F_FW_EQ_ETH_CMD_FETCHRO |
1440 V_FW_EQ_ETH_CMD_IQID(eq->iqid)),
1441 .dcaen_to_eqsize = BE_32(
1442 V_FW_EQ_ETH_CMD_FBMIN(X_FETCHBURSTMIN_64B) |
1443 V_FW_EQ_ETH_CMD_FBMAX(X_FETCHBURSTMAX_512B) |
1444 V_FW_EQ_ETH_CMD_CIDXFTHRESH(X_CIDXFLUSHTHRESH_32) |
1445 V_FW_EQ_ETH_CMD_EQSIZE(eq->qsize)),
1446 .eqaddr = BE_64(eq->ba),
1447 };
1448
1449 /*
1450 * The EQ is configured to send a notification for every 32 consumed
1451 * entries (X_CIDXFLUSHTHRESH_32). In order to ensure timely
1452 * notification of entry consumption during slow periods when that
1453 * threshold may not be reached with regularity, two mechanisms exist:
1454 *
1455 * 1. The DBQ timer can be configured to fire (and send a notification)
1456 * after a period when the EQ has gone idle. This is available on T6
1457 * and later adapters.
1458 *
1459 * 2. The CIDXFlushThresholdOverride flag will send a notification
1460 * whenever a consumed entry causes CDIX==PIDX, even if the
1461 * CIDXFlushThreshold has not been reached.
1462 *
1463 * The DBQ timer is preferred, as it results in no additional
1464 * notifications when the EQ is kept busy with small transmissions.
1465 * Comparatively, flows of many short packets (like frequent ACKs) can
1466 * cause the CIDXFlushThresholdOverride mechanism to induce a
1467 * notification for every transmitted packet.
1468 */
1469 if (sc->flags & TAF_DBQ_TIMER) {
1470 /* Configure the DBQ timer when it is available */
1471 c.timeren_timerix = BE_32(
1472 F_FW_EQ_ETH_CMD_TIMEREN |
1473 V_FW_EQ_ETH_CMD_TIMERIX(pi->dbq_timer_idx));
1474 } else {
1475 /* Otherwise fall back to CIDXFlushThresholdOverride */
1476 c.dcaen_to_eqsize |= BE_32(F_FW_EQ_ETH_CMD_CIDXFTHRESHO);
1477 }
1478
1479 int rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof (c), &c);
1480 if (rc != 0) {
1481 cxgb_printf(pi->dip, CE_WARN,
1482 "failed to create Ethernet egress queue: %d", rc);
1483 return (rc);
1484 }
1485 eq->flags |= EQ_ALLOCATED;
1486
1487 eq->cntxt_id = G_FW_EQ_ETH_CMD_EQID(BE_32(c.eqid_pkd));
1488
1489 *t4_eqmap_slot(sc, eq->cntxt_id) = eq;
1490
1491 return (rc);
1492 }
1493
1494 static int
alloc_eq(struct adapter * sc,struct port_info * pi,struct sge_eq * eq)1495 alloc_eq(struct adapter *sc, struct port_info *pi, struct sge_eq *eq)
1496 {
1497 int rc;
1498 size_t len;
1499
1500 mutex_init(&eq->lock, NULL, MUTEX_DRIVER, DDI_INTR_PRI(sc->intr_pri));
1501 eq->flags |= EQ_MTX;
1502
1503 len = eq->qsize * EQ_ESIZE;
1504 rc = alloc_desc_ring(sc, len, DDI_DMA_WRITE, &eq->desc_dhdl,
1505 &eq->desc_ahdl, &eq->ba, (caddr_t *)&eq->desc);
1506 if (rc != 0)
1507 return (rc);
1508
1509 eq->cap = eq->qsize - sc->sge.stat_len / EQ_ESIZE;
1510 eq->spg = (void *)&eq->desc[eq->cap];
1511 eq->avail = eq->cap - 1; /* one less to avoid cidx = pidx */
1512 eq->pidx = eq->cidx = 0;
1513 eq->doorbells = sc->doorbells;
1514
1515 rc = eth_eq_alloc(sc, pi, eq);
1516 if (rc != 0) {
1517 cxgb_printf(sc->dip, CE_WARN,
1518 "failed to allocate egress queue: %d", rc);
1519 }
1520
1521 if (eq->doorbells & (DOORBELL_UDB | DOORBELL_UDBWC | DOORBELL_WCWR)) {
1522 uint64_t udb_offset;
1523 uint_t udb_qid;
1524
1525 rc = t4_bar2_sge_qregs(sc, eq->cntxt_id, T4_BAR2_QTYPE_EGRESS,
1526 0, &udb_offset, &udb_qid);
1527
1528 if (rc == 0) {
1529 eq->udb = sc->bar2_ptr + udb_offset;
1530 eq->udb_qid = udb_qid;
1531 } else {
1532 eq->doorbells &=
1533 ~(DOORBELL_UDB | DOORBELL_UDBWC | DOORBELL_WCWR);
1534 eq->udb = NULL;
1535 eq->udb_qid = 0;
1536 }
1537 }
1538
1539 return (rc);
1540 }
1541
1542 static int
free_eq(struct adapter * sc,struct sge_eq * eq)1543 free_eq(struct adapter *sc, struct sge_eq *eq)
1544 {
1545 int rc;
1546
1547 if (eq->flags & EQ_ALLOCATED) {
1548 rc = -t4_eth_eq_free(sc, sc->mbox, sc->pf, 0, eq->cntxt_id);
1549 if (rc != 0) {
1550 cxgb_printf(sc->dip, CE_WARN,
1551 "failed to free egress queue: %d", rc);
1552 return (rc);
1553 }
1554 eq->flags &= ~EQ_ALLOCATED;
1555 }
1556
1557 if (eq->desc != NULL) {
1558 (void) free_desc_ring(&eq->desc_dhdl, &eq->desc_ahdl);
1559 eq->desc = NULL;
1560 }
1561
1562 if (eq->flags & EQ_MTX)
1563 mutex_destroy(&eq->lock);
1564
1565 bzero(eq, sizeof (*eq));
1566 return (0);
1567 }
1568
1569 static int
alloc_txq(struct port_info * pi,struct sge_txq * txq,int idx)1570 alloc_txq(struct port_info *pi, struct sge_txq *txq, int idx)
1571 {
1572 int rc, i;
1573 struct adapter *sc = pi->adapter;
1574 struct sge_eq *eq = &txq->eq;
1575
1576 rc = alloc_eq(sc, pi, eq);
1577 if (rc != 0)
1578 return (rc);
1579
1580 txq->port = pi;
1581 txq->sdesc = kmem_zalloc(sizeof (struct tx_sdesc) * eq->cap, KM_SLEEP);
1582 txq->copy_threshold = tx_copy_threshold;
1583 txq->txb_size = eq->qsize * txq->copy_threshold;
1584 rc = alloc_tx_copybuffer(sc, txq->txb_size, &txq->txb_dhdl,
1585 &txq->txb_ahdl, &txq->txb_ba, &txq->txb_va);
1586 if (rc == 0)
1587 txq->txb_avail = txq->txb_size;
1588 else
1589 txq->txb_avail = txq->txb_size = 0;
1590
1591 /*
1592 * TODO: is this too low? Worst case would need around 4 times qsize
1593 * (all tx descriptors filled to the brim with SGLs, with each entry in
1594 * the SGL coming from a distinct DMA handle). Increase tx_dhdl_total
1595 * if you see too many dma_hdl_failed.
1596 */
1597 txq->tx_dhdl_total = eq->qsize * 2;
1598 txq->tx_dhdl = kmem_zalloc(sizeof (ddi_dma_handle_t) *
1599 txq->tx_dhdl_total, KM_SLEEP);
1600 for (i = 0; i < txq->tx_dhdl_total; i++) {
1601 rc = ddi_dma_alloc_handle(sc->dip, &sc->sge.dma_attr_tx,
1602 DDI_DMA_SLEEP, 0, &txq->tx_dhdl[i]);
1603 if (rc != DDI_SUCCESS) {
1604 cxgb_printf(sc->dip, CE_WARN,
1605 "%s: failed to allocate DMA handle (%d)",
1606 __func__, rc);
1607 return (rc == DDI_DMA_NORESOURCES ? ENOMEM : EINVAL);
1608 }
1609 txq->tx_dhdl_avail++;
1610 }
1611
1612 txq->ksp = setup_txq_kstats(pi, txq, idx);
1613
1614 return (rc);
1615 }
1616
1617 static int
free_txq(struct port_info * pi,struct sge_txq * txq)1618 free_txq(struct port_info *pi, struct sge_txq *txq)
1619 {
1620 int i;
1621 struct adapter *sc = pi->adapter;
1622 struct sge_eq *eq = &txq->eq;
1623
1624 if (txq->ksp != NULL) {
1625 kstat_delete(txq->ksp);
1626 txq->ksp = NULL;
1627 }
1628
1629 if (txq->txb_va != NULL) {
1630 (void) free_desc_ring(&txq->txb_dhdl, &txq->txb_ahdl);
1631 txq->txb_va = NULL;
1632 }
1633
1634 if (txq->sdesc != NULL) {
1635 struct tx_sdesc *sd;
1636 ddi_dma_handle_t hdl;
1637
1638 TXQ_LOCK(txq);
1639 while (eq->cidx != eq->pidx) {
1640 sd = &txq->sdesc[eq->cidx];
1641
1642 for (i = sd->hdls_used; i; i--) {
1643 hdl = txq->tx_dhdl[txq->tx_dhdl_cidx];
1644 (void) ddi_dma_unbind_handle(hdl);
1645 if (++txq->tx_dhdl_cidx == txq->tx_dhdl_total)
1646 txq->tx_dhdl_cidx = 0;
1647 }
1648
1649 ASSERT(sd->mp_head);
1650 freemsgchain(sd->mp_head);
1651 sd->mp_head = sd->mp_tail = NULL;
1652
1653 eq->cidx += sd->desc_used;
1654 if (eq->cidx >= eq->cap)
1655 eq->cidx -= eq->cap;
1656
1657 txq->txb_avail += txq->txb_used;
1658 }
1659 ASSERT(txq->tx_dhdl_cidx == txq->tx_dhdl_pidx);
1660 ASSERT(txq->txb_avail == txq->txb_size);
1661 TXQ_UNLOCK(txq);
1662
1663 kmem_free(txq->sdesc, sizeof (struct tx_sdesc) * eq->cap);
1664 txq->sdesc = NULL;
1665 }
1666
1667 if (txq->tx_dhdl != NULL) {
1668 for (i = 0; i < txq->tx_dhdl_total; i++) {
1669 if (txq->tx_dhdl[i] != NULL)
1670 ddi_dma_free_handle(&txq->tx_dhdl[i]);
1671 }
1672 kmem_free(txq->tx_dhdl,
1673 sizeof (ddi_dma_handle_t) * txq->tx_dhdl_total);
1674 txq->tx_dhdl = NULL;
1675 }
1676
1677 (void) free_eq(sc, &txq->eq);
1678
1679 bzero(txq, sizeof (*txq));
1680 return (0);
1681 }
1682
1683 /*
1684 * Allocates a block of contiguous memory for DMA. Can be used to allocate
1685 * memory for descriptor rings or for tx/rx copy buffers.
1686 *
1687 * Caller does not have to clean up anything if this function fails, it cleans
1688 * up after itself.
1689 *
1690 * Caller provides the following:
1691 * len length of the block of memory to allocate.
1692 * flags DDI_DMA_* flags to use (CONSISTENT/STREAMING, READ/WRITE/RDWR)
1693 * acc_attr device access attributes for the allocation.
1694 * dma_attr DMA attributes for the allocation
1695 *
1696 * If the function is successful it fills up this information:
1697 * dma_hdl DMA handle for the allocated memory
1698 * acc_hdl access handle for the allocated memory
1699 * ba bus address of the allocated memory
1700 * va KVA of the allocated memory.
1701 */
1702 static int
alloc_dma_memory(struct adapter * sc,size_t len,int flags,ddi_device_acc_attr_t * acc_attr,ddi_dma_attr_t * dma_attr,ddi_dma_handle_t * dma_hdl,ddi_acc_handle_t * acc_hdl,uint64_t * pba,caddr_t * pva)1703 alloc_dma_memory(struct adapter *sc, size_t len, int flags,
1704 ddi_device_acc_attr_t *acc_attr, ddi_dma_attr_t *dma_attr,
1705 ddi_dma_handle_t *dma_hdl, ddi_acc_handle_t *acc_hdl,
1706 uint64_t *pba, caddr_t *pva)
1707 {
1708 int rc;
1709 ddi_dma_handle_t dhdl;
1710 ddi_acc_handle_t ahdl;
1711 ddi_dma_cookie_t cookie;
1712 uint_t ccount;
1713 caddr_t va;
1714 size_t real_len;
1715
1716 *pva = NULL;
1717
1718 /*
1719 * DMA handle.
1720 */
1721 rc = ddi_dma_alloc_handle(sc->dip, dma_attr, DDI_DMA_SLEEP, 0, &dhdl);
1722 if (rc != DDI_SUCCESS) {
1723 return (rc == DDI_DMA_NORESOURCES ? ENOMEM : EINVAL);
1724 }
1725
1726 /*
1727 * Memory suitable for DMA.
1728 */
1729 rc = ddi_dma_mem_alloc(dhdl, len, acc_attr,
1730 flags & DDI_DMA_CONSISTENT ? DDI_DMA_CONSISTENT : DDI_DMA_STREAMING,
1731 DDI_DMA_SLEEP, 0, &va, &real_len, &ahdl);
1732 if (rc != DDI_SUCCESS) {
1733 ddi_dma_free_handle(&dhdl);
1734 return (ENOMEM);
1735 }
1736
1737 /*
1738 * DMA bindings.
1739 */
1740 rc = ddi_dma_addr_bind_handle(dhdl, NULL, va, real_len, flags, NULL,
1741 NULL, &cookie, &ccount);
1742 if (rc != DDI_DMA_MAPPED) {
1743 ddi_dma_mem_free(&ahdl);
1744 ddi_dma_free_handle(&dhdl);
1745 return (ENOMEM);
1746 }
1747 if (ccount != 1) {
1748 /* unusable DMA mapping */
1749 (void) free_desc_ring(&dhdl, &ahdl);
1750 return (ENOMEM);
1751 }
1752
1753 bzero(va, real_len);
1754 *dma_hdl = dhdl;
1755 *acc_hdl = ahdl;
1756 *pba = cookie.dmac_laddress;
1757 *pva = va;
1758
1759 return (0);
1760 }
1761
1762 static int
free_dma_memory(ddi_dma_handle_t * dhdl,ddi_acc_handle_t * ahdl)1763 free_dma_memory(ddi_dma_handle_t *dhdl, ddi_acc_handle_t *ahdl)
1764 {
1765 (void) ddi_dma_unbind_handle(*dhdl);
1766 ddi_dma_mem_free(ahdl);
1767 ddi_dma_free_handle(dhdl);
1768
1769 return (0);
1770 }
1771
1772 static int
alloc_desc_ring(struct adapter * sc,size_t len,int rw,ddi_dma_handle_t * dma_hdl,ddi_acc_handle_t * acc_hdl,uint64_t * pba,caddr_t * pva)1773 alloc_desc_ring(struct adapter *sc, size_t len, int rw,
1774 ddi_dma_handle_t *dma_hdl, ddi_acc_handle_t *acc_hdl,
1775 uint64_t *pba, caddr_t *pva)
1776 {
1777 ddi_device_acc_attr_t *acc_attr = &sc->sge.acc_attr_desc;
1778 ddi_dma_attr_t *dma_attr = &sc->sge.dma_attr_desc;
1779
1780 return (alloc_dma_memory(sc, len, DDI_DMA_CONSISTENT | rw, acc_attr,
1781 dma_attr, dma_hdl, acc_hdl, pba, pva));
1782 }
1783
1784 static int
free_desc_ring(ddi_dma_handle_t * dhdl,ddi_acc_handle_t * ahdl)1785 free_desc_ring(ddi_dma_handle_t *dhdl, ddi_acc_handle_t *ahdl)
1786 {
1787 return (free_dma_memory(dhdl, ahdl));
1788 }
1789
1790 static int
alloc_tx_copybuffer(struct adapter * sc,size_t len,ddi_dma_handle_t * dma_hdl,ddi_acc_handle_t * acc_hdl,uint64_t * pba,caddr_t * pva)1791 alloc_tx_copybuffer(struct adapter *sc, size_t len,
1792 ddi_dma_handle_t *dma_hdl, ddi_acc_handle_t *acc_hdl,
1793 uint64_t *pba, caddr_t *pva)
1794 {
1795 ddi_device_acc_attr_t *acc_attr = &sc->sge.acc_attr_tx;
1796 ddi_dma_attr_t *dma_attr = &sc->sge.dma_attr_desc; /* NOT dma_attr_tx */
1797
1798 return (alloc_dma_memory(sc, len, DDI_DMA_STREAMING | DDI_DMA_WRITE,
1799 acc_attr, dma_attr, dma_hdl, acc_hdl, pba, pva));
1800 }
1801
1802 static inline bool
is_new_response(const struct sge_iq * iq,struct rsp_ctrl ** ctrl)1803 is_new_response(const struct sge_iq *iq, struct rsp_ctrl **ctrl)
1804 {
1805 (void) ddi_dma_sync(iq->dhdl, (uintptr_t)iq->cdesc -
1806 (uintptr_t)iq->desc, iq->esize, DDI_DMA_SYNC_FORKERNEL);
1807
1808 *ctrl = (void *)((uintptr_t)iq->cdesc +
1809 (iq->esize - sizeof (struct rsp_ctrl)));
1810
1811 return ((((*ctrl)->u.type_gen >> S_RSPD_GEN) == iq->gen));
1812 }
1813
1814 static inline void
iq_next(struct sge_iq * iq)1815 iq_next(struct sge_iq *iq)
1816 {
1817 iq->cdesc = (void *) ((uintptr_t)iq->cdesc + iq->esize);
1818 if (++iq->cidx == iq->qsize - 1) {
1819 iq->cidx = 0;
1820 iq->gen ^= 1;
1821 iq->cdesc = iq->desc;
1822 }
1823 }
1824
1825 /*
1826 * Fill up the freelist by upto nbufs and maybe ring its doorbell.
1827 *
1828 * Returns non-zero to indicate that it should be added to the list of starving
1829 * freelists.
1830 */
1831 static int
refill_fl(struct adapter * sc,struct sge_fl * fl,int nbufs)1832 refill_fl(struct adapter *sc, struct sge_fl *fl, int nbufs)
1833 {
1834 uint64_t *d = &fl->desc[fl->pidx];
1835 struct fl_sdesc *sd = &fl->sdesc[fl->pidx];
1836
1837 FL_LOCK_ASSERT_OWNED(fl);
1838 ASSERT(nbufs >= 0);
1839
1840 if (nbufs > fl->needed)
1841 nbufs = fl->needed;
1842
1843 while (nbufs--) {
1844 if (sd->rxb != NULL) {
1845 if (sd->rxb->ref_cnt == 1) {
1846 /*
1847 * Buffer is available for recycling. Two ways
1848 * this can happen:
1849 *
1850 * a) All the packets DMA'd into it last time
1851 * around were within the rx_copy_threshold
1852 * and no part of the buffer was ever passed
1853 * up (ref_cnt never went over 1).
1854 *
1855 * b) Packets DMA'd into the buffer were passed
1856 * up but have all been freed by the upper
1857 * layers by now (ref_cnt went over 1 but is
1858 * now back to 1).
1859 *
1860 * Either way the bus address in the descriptor
1861 * ring is already valid.
1862 */
1863 ASSERT(*d == cpu_to_be64(sd->rxb->ba));
1864 d++;
1865 goto recycled;
1866 } else {
1867 /*
1868 * Buffer still in use and we need a
1869 * replacement. But first release our reference
1870 * on the existing buffer.
1871 */
1872 rxbuf_free(sd->rxb);
1873 }
1874 }
1875
1876 sd->rxb = rxbuf_alloc(sc->sge.rxbuf_cache, KM_NOSLEEP, 1);
1877 if (sd->rxb == NULL)
1878 break;
1879 *d++ = cpu_to_be64(sd->rxb->ba);
1880
1881 recycled: fl->pending++;
1882 sd++;
1883 fl->needed--;
1884 if (++fl->pidx == fl->cap) {
1885 fl->pidx = 0;
1886 sd = fl->sdesc;
1887 d = fl->desc;
1888 }
1889 }
1890
1891 if (fl->pending >= 8)
1892 ring_fl_db(sc, fl);
1893
1894 return (FL_RUNNING_LOW(fl) && !(fl->flags & FL_STARVING));
1895 }
1896
1897 #ifndef TAILQ_FOREACH_SAFE
1898 #define TAILQ_FOREACH_SAFE(var, head, field, tvar) \
1899 for ((var) = TAILQ_FIRST((head)); \
1900 (var) && ((tvar) = TAILQ_NEXT((var), field), 1); \
1901 (var) = (tvar))
1902 #endif
1903
1904 /*
1905 * Attempt to refill all starving freelists.
1906 */
1907 static void
refill_sfl(void * arg)1908 refill_sfl(void *arg)
1909 {
1910 struct adapter *sc = arg;
1911 struct sge_fl *fl, *fl_temp;
1912
1913 mutex_enter(&sc->sfl_lock);
1914 TAILQ_FOREACH_SAFE(fl, &sc->sfl, link, fl_temp) {
1915 FL_LOCK(fl);
1916 (void) refill_fl(sc, fl, 64);
1917 if (FL_NOT_RUNNING_LOW(fl) || fl->flags & FL_DOOMED) {
1918 TAILQ_REMOVE(&sc->sfl, fl, link);
1919 fl->flags &= ~FL_STARVING;
1920 }
1921 FL_UNLOCK(fl);
1922 }
1923
1924 if (!TAILQ_EMPTY(&sc->sfl) != 0)
1925 sc->sfl_timer = timeout(refill_sfl, sc, drv_usectohz(100000));
1926 mutex_exit(&sc->sfl_lock);
1927 }
1928
1929 static void
add_fl_to_sfl(struct adapter * sc,struct sge_fl * fl)1930 add_fl_to_sfl(struct adapter *sc, struct sge_fl *fl)
1931 {
1932 mutex_enter(&sc->sfl_lock);
1933 FL_LOCK(fl);
1934 if ((fl->flags & FL_DOOMED) == 0) {
1935 if (TAILQ_EMPTY(&sc->sfl) != 0) {
1936 sc->sfl_timer = timeout(refill_sfl, sc,
1937 drv_usectohz(100000));
1938 }
1939 fl->flags |= FL_STARVING;
1940 TAILQ_INSERT_TAIL(&sc->sfl, fl, link);
1941 }
1942 FL_UNLOCK(fl);
1943 mutex_exit(&sc->sfl_lock);
1944 }
1945
1946 static void
free_fl_bufs(struct sge_fl * fl)1947 free_fl_bufs(struct sge_fl *fl)
1948 {
1949 struct fl_sdesc *sd;
1950 unsigned int i;
1951
1952 FL_LOCK_ASSERT_OWNED(fl);
1953
1954 for (i = 0; i < fl->cap; i++) {
1955 sd = &fl->sdesc[i];
1956
1957 if (sd->rxb != NULL) {
1958 rxbuf_free(sd->rxb);
1959 sd->rxb = NULL;
1960 }
1961 }
1962 }
1963
1964 /*
1965 * Note that fl->cidx and fl->offset are left unchanged in case of failure.
1966 */
1967 static mblk_t *
get_fl_payload(struct adapter * sc,struct sge_fl * fl,uint32_t len_newbuf,int * fl_bufs_used)1968 get_fl_payload(struct adapter *sc, struct sge_fl *fl, uint32_t len_newbuf,
1969 int *fl_bufs_used)
1970 {
1971 struct mblk_pair frame = {0};
1972 struct rxbuf *rxb;
1973 mblk_t *m = NULL;
1974 uint_t nbuf = 0, len, copy, n;
1975 uint32_t cidx, offset, rcidx, roffset;
1976
1977 /*
1978 * The SGE won't pack a new frame into the current buffer if the entire
1979 * payload doesn't fit in the remaining space. Move on to the next buf
1980 * in that case.
1981 */
1982 rcidx = fl->cidx;
1983 roffset = fl->offset;
1984 if (fl->offset > 0 && len_newbuf & F_RSPD_NEWBUF) {
1985 fl->offset = 0;
1986 if (++fl->cidx == fl->cap)
1987 fl->cidx = 0;
1988 nbuf++;
1989 }
1990 cidx = fl->cidx;
1991 offset = fl->offset;
1992
1993 len = G_RSPD_LEN(len_newbuf); /* pktshift + payload length */
1994 copy = (len <= fl->copy_threshold);
1995 if (copy != 0) {
1996 frame.head = m = allocb(len, BPRI_HI);
1997 if (m == NULL) {
1998 fl->allocb_fail++;
1999 DTRACE_PROBE1(t4__fl_alloc_fail, struct sge_fl *, fl);
2000 fl->cidx = rcidx;
2001 fl->offset = roffset;
2002 return (NULL);
2003 }
2004 }
2005
2006 while (len) {
2007 rxb = fl->sdesc[cidx].rxb;
2008 n = min(len, rxb->buf_size - offset);
2009
2010 (void) ddi_dma_sync(rxb->dhdl, offset, n,
2011 DDI_DMA_SYNC_FORKERNEL);
2012
2013 if (copy != 0)
2014 bcopy(rxb->va + offset, m->b_wptr, n);
2015 else {
2016 m = desballoc((unsigned char *)rxb->va + offset, n,
2017 BPRI_HI, &rxb->freefunc);
2018 if (m == NULL) {
2019 fl->allocb_fail++;
2020 DTRACE_PROBE1(t4__fl_alloc_fail,
2021 struct sge_fl *, fl);
2022 if (frame.head)
2023 freemsgchain(frame.head);
2024 fl->cidx = rcidx;
2025 fl->offset = roffset;
2026 return (NULL);
2027 }
2028 atomic_inc_uint(&rxb->ref_cnt);
2029 if (frame.head != NULL)
2030 frame.tail->b_cont = m;
2031 else
2032 frame.head = m;
2033 frame.tail = m;
2034 }
2035 m->b_wptr += n;
2036 len -= n;
2037 offset += roundup(n, sc->sge.fl_align);
2038 ASSERT(offset <= rxb->buf_size);
2039 if (offset == rxb->buf_size) {
2040 offset = 0;
2041 if (++cidx == fl->cap)
2042 cidx = 0;
2043 nbuf++;
2044 }
2045 }
2046
2047 fl->cidx = cidx;
2048 fl->offset = offset;
2049 (*fl_bufs_used) += nbuf;
2050
2051 ASSERT(frame.head != NULL);
2052 return (frame.head);
2053 }
2054
2055 /*
2056 * We'll do immediate data tx for non-LSO, but only when not coalescing. We're
2057 * willing to use upto 2 hardware descriptors which means a maximum of 96 bytes
2058 * of immediate data.
2059 */
2060 #define IMM_LEN ( \
2061 2 * EQ_ESIZE \
2062 - sizeof (struct fw_eth_tx_pkt_wr) \
2063 - sizeof (struct cpl_tx_pkt_core))
2064
2065 /*
2066 * Returns non-zero on failure, no need to cleanup anything in that case.
2067 *
2068 * Note 1: We always try to pull up the mblk if required and return E2BIG only
2069 * if this fails.
2070 *
2071 * Note 2: We'll also pullup incoming mblk if HW_LSO is set and the first mblk
2072 * does not have the TCP header in it.
2073 */
2074 static int
get_frame_txinfo(struct sge_txq * txq,mblk_t ** fp,struct txinfo * txinfo,int sgl_only)2075 get_frame_txinfo(struct sge_txq *txq, mblk_t **fp, struct txinfo *txinfo,
2076 int sgl_only)
2077 {
2078 uint32_t flags = 0, len, n;
2079 mblk_t *m = *fp;
2080 int rc;
2081
2082 TXQ_LOCK_ASSERT_OWNED(txq); /* will manipulate txb and dma_hdls */
2083
2084 mac_hcksum_get(m, NULL, NULL, NULL, NULL, &flags);
2085 txinfo->flags = (flags & HCK_TX_FLAGS);
2086
2087 mac_lso_get(m, &txinfo->mss, &flags);
2088 txinfo->flags |= (flags & HW_LSO_FLAGS);
2089
2090 if (flags & HW_LSO)
2091 sgl_only = 1; /* Do not allow immediate data with LSO */
2092
2093 /*
2094 * If checksum or segmentation offloads are requested, gather
2095 * information about the sizes and types of headers in the packet.
2096 */
2097 if (txinfo->flags != 0) {
2098 mac_ether_offload_info(m, &txinfo->meoi);
2099 } else {
2100 bzero(&txinfo->meoi, sizeof (txinfo->meoi));
2101 }
2102
2103 start: txinfo->nsegs = 0;
2104 txinfo->hdls_used = 0;
2105 txinfo->txb_used = 0;
2106 txinfo->len = 0;
2107
2108 /* total length and a rough estimate of # of segments */
2109 n = 0;
2110 for (; m; m = m->b_cont) {
2111 len = MBLKL(m);
2112 n += (len / PAGE_SIZE) + 1;
2113 txinfo->len += len;
2114 }
2115 m = *fp;
2116
2117 if (n >= TX_SGL_SEGS || (flags & HW_LSO && MBLKL(m) < 50)) {
2118 txq->pullup_early++;
2119 m = msgpullup(*fp, -1);
2120 if (m == NULL) {
2121 txq->pullup_failed++;
2122 return (E2BIG); /* (*fp) left as it was */
2123 }
2124 freemsg(*fp);
2125 *fp = m;
2126 mac_hcksum_set(m, 0, 0, 0, 0, txinfo->flags);
2127 }
2128
2129 if (txinfo->len <= IMM_LEN && !sgl_only)
2130 return (0); /* nsegs = 0 tells caller to use imm. tx */
2131
2132 if (txinfo->len <= txq->copy_threshold &&
2133 copy_into_txb(txq, m, txinfo->len, txinfo) == 0)
2134 goto done;
2135
2136 for (; m; m = m->b_cont) {
2137
2138 len = MBLKL(m);
2139
2140 /* Use tx copy buffer if this mblk is small enough */
2141 if (len <= txq->copy_threshold &&
2142 copy_into_txb(txq, m, len, txinfo) == 0)
2143 continue;
2144
2145 /* Add DMA bindings for this mblk to the SGL */
2146 rc = add_mblk(txq, txinfo, m, len);
2147
2148 if (rc == E2BIG ||
2149 (txinfo->nsegs == TX_SGL_SEGS && m->b_cont)) {
2150
2151 txq->pullup_late++;
2152 m = msgpullup(*fp, -1);
2153 if (m != NULL) {
2154 free_txinfo_resources(txq, txinfo);
2155 freemsg(*fp);
2156 *fp = m;
2157 mac_hcksum_set(m, 0, 0, 0, 0, txinfo->flags);
2158 goto start;
2159 }
2160
2161 txq->pullup_failed++;
2162 rc = E2BIG;
2163 }
2164
2165 if (rc != 0) {
2166 free_txinfo_resources(txq, txinfo);
2167 return (rc);
2168 }
2169 }
2170
2171 ASSERT(txinfo->nsegs > 0 && txinfo->nsegs <= TX_SGL_SEGS);
2172
2173 done:
2174
2175 /*
2176 * Store the # of flits required to hold this frame's SGL in nflits. An
2177 * SGL has a (ULPTX header + len0, addr0) tuple optionally followed by
2178 * multiple (len0 + len1, addr0, addr1) tuples. If addr1 is not used
2179 * then len1 must be set to 0.
2180 */
2181 n = txinfo->nsegs - 1;
2182 txinfo->nflits = (3 * n) / 2 + (n & 1) + 2;
2183 if (n & 1)
2184 txinfo->sgl.sge[n / 2].len[1] = cpu_to_be32(0);
2185
2186 txinfo->sgl.cmd_nsge = cpu_to_be32(V_ULPTX_CMD((u32)ULP_TX_SC_DSGL) |
2187 V_ULPTX_NSGE(txinfo->nsegs));
2188
2189 return (0);
2190 }
2191
2192 static inline int
fits_in_txb(struct sge_txq * txq,int len,int * waste)2193 fits_in_txb(struct sge_txq *txq, int len, int *waste)
2194 {
2195 if (txq->txb_avail < len)
2196 return (0);
2197
2198 if (txq->txb_next + len <= txq->txb_size) {
2199 *waste = 0;
2200 return (1);
2201 }
2202
2203 *waste = txq->txb_size - txq->txb_next;
2204
2205 return (txq->txb_avail - *waste < len ? 0 : 1);
2206 }
2207
2208 #define TXB_CHUNK 64
2209
2210 /*
2211 * Copies the specified # of bytes into txq's tx copy buffer and updates txinfo
2212 * and txq to indicate resources used. Caller has to make sure that those many
2213 * bytes are available in the mblk chain (b_cont linked).
2214 */
2215 static inline int
copy_into_txb(struct sge_txq * txq,mblk_t * m,int len,struct txinfo * txinfo)2216 copy_into_txb(struct sge_txq *txq, mblk_t *m, int len, struct txinfo *txinfo)
2217 {
2218 int waste, n;
2219
2220 TXQ_LOCK_ASSERT_OWNED(txq); /* will manipulate txb */
2221
2222 if (!fits_in_txb(txq, len, &waste)) {
2223 txq->txb_full++;
2224 return (ENOMEM);
2225 }
2226
2227 if (waste != 0) {
2228 ASSERT((waste & (TXB_CHUNK - 1)) == 0);
2229 txinfo->txb_used += waste;
2230 txq->txb_avail -= waste;
2231 txq->txb_next = 0;
2232 }
2233
2234 for (n = 0; n < len; m = m->b_cont) {
2235 bcopy(m->b_rptr, txq->txb_va + txq->txb_next + n, MBLKL(m));
2236 n += MBLKL(m);
2237 }
2238
2239 add_seg(txinfo, txq->txb_ba + txq->txb_next, len);
2240
2241 n = roundup(len, TXB_CHUNK);
2242 txinfo->txb_used += n;
2243 txq->txb_avail -= n;
2244 txq->txb_next += n;
2245 ASSERT(txq->txb_next <= txq->txb_size);
2246 if (txq->txb_next == txq->txb_size)
2247 txq->txb_next = 0;
2248
2249 return (0);
2250 }
2251
2252 static inline void
add_seg(struct txinfo * txinfo,uint64_t ba,uint32_t len)2253 add_seg(struct txinfo *txinfo, uint64_t ba, uint32_t len)
2254 {
2255 ASSERT(txinfo->nsegs < TX_SGL_SEGS); /* must have room */
2256
2257 if (txinfo->nsegs != 0) {
2258 int idx = txinfo->nsegs - 1;
2259 txinfo->sgl.sge[idx / 2].len[idx & 1] = cpu_to_be32(len);
2260 txinfo->sgl.sge[idx / 2].addr[idx & 1] = cpu_to_be64(ba);
2261 } else {
2262 txinfo->sgl.len0 = cpu_to_be32(len);
2263 txinfo->sgl.addr0 = cpu_to_be64(ba);
2264 }
2265 txinfo->nsegs++;
2266 }
2267
2268 /*
2269 * This function cleans up any partially allocated resources when it fails so
2270 * there's nothing for the caller to clean up in that case.
2271 *
2272 * EIO indicates permanent failure. Caller should drop the frame containing
2273 * this mblk and continue.
2274 *
2275 * E2BIG indicates that the SGL length for this mblk exceeds the hardware
2276 * limit. Caller should pull up the frame before trying to send it out.
2277 * (This error means our pullup_early heuristic did not work for this frame)
2278 *
2279 * ENOMEM indicates a temporary shortage of resources (DMA handles, other DMA
2280 * resources, etc.). Caller should suspend the tx queue and wait for reclaim to
2281 * free up resources.
2282 */
2283 static inline int
add_mblk(struct sge_txq * txq,struct txinfo * txinfo,mblk_t * m,int len)2284 add_mblk(struct sge_txq *txq, struct txinfo *txinfo, mblk_t *m, int len)
2285 {
2286 ddi_dma_handle_t dhdl;
2287 ddi_dma_cookie_t cookie;
2288 uint_t ccount = 0;
2289 int rc;
2290
2291 TXQ_LOCK_ASSERT_OWNED(txq); /* will manipulate dhdls */
2292
2293 if (txq->tx_dhdl_avail == 0) {
2294 txq->dma_hdl_failed++;
2295 return (ENOMEM);
2296 }
2297
2298 dhdl = txq->tx_dhdl[txq->tx_dhdl_pidx];
2299 rc = ddi_dma_addr_bind_handle(dhdl, NULL, (caddr_t)m->b_rptr, len,
2300 DDI_DMA_WRITE | DDI_DMA_STREAMING, DDI_DMA_DONTWAIT, NULL, &cookie,
2301 &ccount);
2302 if (rc != DDI_DMA_MAPPED) {
2303 txq->dma_map_failed++;
2304
2305 ASSERT(rc != DDI_DMA_INUSE && rc != DDI_DMA_PARTIAL_MAP);
2306
2307 return (rc == DDI_DMA_NORESOURCES ? ENOMEM : EIO);
2308 }
2309
2310 if (ccount + txinfo->nsegs > TX_SGL_SEGS) {
2311 (void) ddi_dma_unbind_handle(dhdl);
2312 return (E2BIG);
2313 }
2314
2315 add_seg(txinfo, cookie.dmac_laddress, cookie.dmac_size);
2316 while (--ccount) {
2317 ddi_dma_nextcookie(dhdl, &cookie);
2318 add_seg(txinfo, cookie.dmac_laddress, cookie.dmac_size);
2319 }
2320
2321 if (++txq->tx_dhdl_pidx == txq->tx_dhdl_total)
2322 txq->tx_dhdl_pidx = 0;
2323 txq->tx_dhdl_avail--;
2324 txinfo->hdls_used++;
2325
2326 return (0);
2327 }
2328
2329 /*
2330 * Releases all the txq resources used up in the specified txinfo.
2331 */
2332 static void
free_txinfo_resources(struct sge_txq * txq,struct txinfo * txinfo)2333 free_txinfo_resources(struct sge_txq *txq, struct txinfo *txinfo)
2334 {
2335 int n;
2336
2337 TXQ_LOCK_ASSERT_OWNED(txq); /* dhdls, txb */
2338
2339 n = txinfo->txb_used;
2340 if (n > 0) {
2341 txq->txb_avail += n;
2342 if (n <= txq->txb_next)
2343 txq->txb_next -= n;
2344 else {
2345 n -= txq->txb_next;
2346 txq->txb_next = txq->txb_size - n;
2347 }
2348 }
2349
2350 for (n = txinfo->hdls_used; n > 0; n--) {
2351 if (txq->tx_dhdl_pidx > 0)
2352 txq->tx_dhdl_pidx--;
2353 else
2354 txq->tx_dhdl_pidx = txq->tx_dhdl_total - 1;
2355 txq->tx_dhdl_avail++;
2356 (void) ddi_dma_unbind_handle(txq->tx_dhdl[txq->tx_dhdl_pidx]);
2357 }
2358 }
2359
2360 /*
2361 * Returns 0 to indicate that m has been accepted into a coalesced tx work
2362 * request. It has either been folded into txpkts or txpkts was flushed and m
2363 * has started a new coalesced work request (as the first frame in a fresh
2364 * txpkts).
2365 *
2366 * Returns non-zero to indicate a failure - caller is responsible for
2367 * transmitting m, if there was anything in txpkts it has been flushed.
2368 */
2369 static int
add_to_txpkts(struct sge_txq * txq,struct txpkts * txpkts,mblk_t * m,struct txinfo * txinfo)2370 add_to_txpkts(struct sge_txq *txq, struct txpkts *txpkts, mblk_t *m,
2371 struct txinfo *txinfo)
2372 {
2373 struct sge_eq *eq = &txq->eq;
2374 int can_coalesce;
2375 struct tx_sdesc *txsd;
2376 uint8_t flits;
2377
2378 TXQ_LOCK_ASSERT_OWNED(txq);
2379 ASSERT(m->b_next == NULL);
2380
2381 if (txpkts->npkt > 0) {
2382 flits = TXPKTS_PKT_HDR + txinfo->nflits;
2383 can_coalesce = (txinfo->flags & HW_LSO) == 0 &&
2384 txpkts->nflits + flits <= TX_WR_FLITS &&
2385 txpkts->nflits + flits <= eq->avail * 8 &&
2386 txpkts->plen + txinfo->len < 65536;
2387
2388 if (can_coalesce != 0) {
2389 txpkts->tail->b_next = m;
2390 txpkts->tail = m;
2391 txpkts->npkt++;
2392 txpkts->nflits += flits;
2393 txpkts->plen += txinfo->len;
2394
2395 txsd = &txq->sdesc[eq->pidx];
2396 txsd->txb_used += txinfo->txb_used;
2397 txsd->hdls_used += txinfo->hdls_used;
2398
2399 /*
2400 * The txpkts chaining above has already placed `m` at
2401 * the end with b_next. Keep the txsd notion of this
2402 * new tail up to date.
2403 */
2404 ASSERT3P(txsd->mp_tail->b_next, ==, m);
2405 txsd->mp_tail = m;
2406
2407 return (0);
2408 }
2409
2410 /*
2411 * Couldn't coalesce m into txpkts. The first order of business
2412 * is to send txpkts on its way. Then we'll revisit m.
2413 */
2414 write_txpkts_wr(txq, txpkts);
2415 }
2416
2417 /*
2418 * Check if we can start a new coalesced tx work request with m as
2419 * the first packet in it.
2420 */
2421
2422 ASSERT(txpkts->npkt == 0);
2423 ASSERT(txinfo->len < 65536);
2424
2425 flits = TXPKTS_WR_HDR + txinfo->nflits;
2426 can_coalesce = (txinfo->flags & HW_LSO) == 0 &&
2427 flits <= eq->avail * 8 && flits <= TX_WR_FLITS;
2428
2429 if (can_coalesce == 0)
2430 return (EINVAL);
2431
2432 /*
2433 * Start a fresh coalesced tx WR with m as the first frame in it.
2434 */
2435 txpkts->tail = m;
2436 txpkts->npkt = 1;
2437 txpkts->nflits = flits;
2438 txpkts->flitp = &eq->desc[eq->pidx].flit[2];
2439 txpkts->plen = txinfo->len;
2440
2441 txsd = &txq->sdesc[eq->pidx];
2442 txsd->mp_head = txsd->mp_tail = m;
2443 txsd->txb_used = txinfo->txb_used;
2444 txsd->hdls_used = txinfo->hdls_used;
2445
2446 return (0);
2447 }
2448
2449 static inline void
t4_tx_incr_pending(struct sge_txq * txq,uint_t ndesc)2450 t4_tx_incr_pending(struct sge_txq *txq, uint_t ndesc)
2451 {
2452 struct sge_eq *eq = &txq->eq;
2453
2454 TXQ_LOCK_ASSERT_OWNED(txq);
2455 ASSERT3U(ndesc, !=, 0);
2456 ASSERT3U(eq->avail, >=, ndesc);
2457
2458 eq->pending += ndesc;
2459 eq->avail -= ndesc;
2460 eq->pidx += ndesc;
2461 if (eq->pidx >= eq->cap) {
2462 eq->pidx -= eq->cap;
2463 }
2464 }
2465
2466 /*
2467 * Note that write_txpkts_wr can never run out of hardware descriptors (but
2468 * write_txpkt_wr can). add_to_txpkts ensures that a frame is accepted for
2469 * coalescing only if sufficient hardware descriptors are available.
2470 */
2471 static void
write_txpkts_wr(struct sge_txq * txq,struct txpkts * txpkts)2472 write_txpkts_wr(struct sge_txq *txq, struct txpkts *txpkts)
2473 {
2474 struct sge_eq *eq = &txq->eq;
2475 struct fw_eth_tx_pkts_wr *wr;
2476 struct tx_sdesc *txsd;
2477 uint32_t ctrl;
2478 uint16_t ndesc;
2479
2480 TXQ_LOCK_ASSERT_OWNED(txq); /* pidx, avail */
2481
2482 ndesc = howmany(txpkts->nflits, 8);
2483
2484 wr = (void *)&eq->desc[eq->pidx];
2485 wr->op_pkd = cpu_to_be32(V_FW_WR_OP(FW_ETH_TX_PKTS_WR) |
2486 V_FW_WR_IMMDLEN(0)); /* immdlen does not matter in this WR */
2487 ctrl = V_FW_WR_LEN16(howmany(txpkts->nflits, 2));
2488 if (eq->avail == ndesc)
2489 ctrl |= F_FW_WR_EQUEQ | F_FW_WR_EQUIQ;
2490 wr->equiq_to_len16 = cpu_to_be32(ctrl);
2491 wr->plen = cpu_to_be16(txpkts->plen);
2492 wr->npkt = txpkts->npkt;
2493 wr->r3 = wr->type = 0;
2494
2495 /* Everything else already written */
2496
2497 txsd = &txq->sdesc[eq->pidx];
2498 txsd->desc_used = ndesc;
2499
2500 txq->txb_used += txsd->txb_used / TXB_CHUNK;
2501 txq->hdl_used += txsd->hdls_used;
2502
2503 t4_tx_incr_pending(txq, ndesc);
2504
2505 txq->txpkts_pkts += txpkts->npkt;
2506 txq->txpkts_wrs++;
2507 txpkts->npkt = 0; /* emptied */
2508 }
2509
2510 typedef enum {
2511 COS_SUCCESS, /* ctrl flit contains proper bits for csum offload */
2512 COS_IGNORE, /* no csum offload requested */
2513 COS_FAIL, /* csum offload requested, but pkt data missing */
2514 } csum_offload_status_t;
2515 /*
2516 * Build a ctrl1 flit for checksum offload in CPL_TX_PKT_XT command
2517 */
2518 static csum_offload_status_t
csum_to_ctrl(const struct txinfo * txinfo,uint32_t chip_version,uint64_t * ctrlp)2519 csum_to_ctrl(const struct txinfo *txinfo, uint32_t chip_version,
2520 uint64_t *ctrlp)
2521 {
2522 const mac_ether_offload_info_t *meoi = &txinfo->meoi;
2523 const uint32_t tx_flags = txinfo->flags;
2524 const boolean_t needs_l3_csum = (tx_flags & HW_LSO) != 0 ||
2525 (tx_flags & HCK_IPV4_HDRCKSUM) != 0;
2526 const boolean_t needs_l4_csum = (tx_flags & HW_LSO) != 0 ||
2527 (tx_flags & (HCK_FULLCKSUM | HCK_PARTIALCKSUM)) != 0;
2528
2529 /*
2530 * Default to disabling any checksumming both for cases where it is not
2531 * requested, but also if we cannot appropriately interrogate the
2532 * required information from the packet.
2533 */
2534 uint64_t ctrl = F_TXPKT_L4CSUM_DIS | F_TXPKT_IPCSUM_DIS;
2535 if (!needs_l3_csum && !needs_l4_csum) {
2536 *ctrlp = ctrl;
2537 return (COS_IGNORE);
2538 }
2539
2540 if (needs_l3_csum) {
2541 /* Only IPv4 checksums are supported (for L3) */
2542 if ((meoi->meoi_flags & MEOI_L3INFO_SET) == 0 ||
2543 meoi->meoi_l3proto != ETHERTYPE_IP) {
2544 *ctrlp = ctrl;
2545 return (COS_FAIL);
2546 }
2547 ctrl &= ~F_TXPKT_IPCSUM_DIS;
2548 }
2549
2550 if (needs_l4_csum) {
2551 /*
2552 * We need at least all of the L3 header to make decisions about
2553 * the contained L4 protocol. If not all of the L4 information
2554 * is present, we will leave it to the NIC to checksum all it is
2555 * able to.
2556 */
2557 if ((meoi->meoi_flags & MEOI_L3INFO_SET) == 0) {
2558 *ctrlp = ctrl;
2559 return (COS_FAIL);
2560 }
2561
2562 /*
2563 * Since we are parsing the packet anyways, make the checksum
2564 * decision based on the L4 protocol, rather than using the
2565 * Generic TCP/UDP checksum using start & end offsets in the
2566 * packet (like requested with PARTIALCKSUM).
2567 */
2568 int csum_type = -1;
2569 if (meoi->meoi_l3proto == ETHERTYPE_IP &&
2570 meoi->meoi_l4proto == IPPROTO_TCP) {
2571 csum_type = TX_CSUM_TCPIP;
2572 } else if (meoi->meoi_l3proto == ETHERTYPE_IPV6 &&
2573 meoi->meoi_l4proto == IPPROTO_TCP) {
2574 csum_type = TX_CSUM_TCPIP6;
2575 } else if (meoi->meoi_l3proto == ETHERTYPE_IP &&
2576 meoi->meoi_l4proto == IPPROTO_UDP) {
2577 csum_type = TX_CSUM_UDPIP;
2578 } else if (meoi->meoi_l3proto == ETHERTYPE_IPV6 &&
2579 meoi->meoi_l4proto == IPPROTO_UDP) {
2580 csum_type = TX_CSUM_UDPIP6;
2581 } else {
2582 *ctrlp = ctrl;
2583 return (COS_FAIL);
2584 }
2585
2586 ASSERT(csum_type != -1);
2587 ctrl &= ~F_TXPKT_L4CSUM_DIS;
2588 ctrl |= V_TXPKT_CSUM_TYPE(csum_type);
2589 }
2590
2591 if ((ctrl & F_TXPKT_IPCSUM_DIS) == 0 &&
2592 (ctrl & F_TXPKT_L4CSUM_DIS) != 0) {
2593 /*
2594 * If only the IPv4 checksum is requested, we need to set an
2595 * appropriate type in the command for it.
2596 */
2597 ctrl |= V_TXPKT_CSUM_TYPE(TX_CSUM_IP);
2598 }
2599
2600 ASSERT(ctrl != (F_TXPKT_L4CSUM_DIS | F_TXPKT_IPCSUM_DIS));
2601
2602 /*
2603 * Fill in the requisite L2/L3 header length data.
2604 *
2605 * The Ethernet header length is recorded as 'size - 14 bytes'
2606 */
2607 const uint8_t eth_len = meoi->meoi_l2hlen - 14;
2608 if (chip_version >= CHELSIO_T6) {
2609 ctrl |= V_T6_TXPKT_ETHHDR_LEN(eth_len);
2610 } else {
2611 ctrl |= V_TXPKT_ETHHDR_LEN(eth_len);
2612 }
2613 ctrl |= V_TXPKT_IPHDR_LEN(meoi->meoi_l3hlen);
2614
2615 *ctrlp = ctrl;
2616 return (COS_SUCCESS);
2617 }
2618
2619 static int
write_txpkt_wr(struct port_info * pi,struct sge_txq * txq,mblk_t * m,struct txinfo * txinfo)2620 write_txpkt_wr(struct port_info *pi, struct sge_txq *txq, mblk_t *m,
2621 struct txinfo *txinfo)
2622 {
2623 struct sge_eq *eq = &txq->eq;
2624 struct fw_eth_tx_pkt_wr *wr;
2625 struct cpl_tx_pkt_core *cpl;
2626 uint32_t ctrl; /* used in many unrelated places */
2627 uint64_t ctrl1;
2628 int nflits, ndesc;
2629 struct tx_sdesc *txsd;
2630 caddr_t dst;
2631 const mac_ether_offload_info_t *meoi = &txinfo->meoi;
2632
2633 TXQ_LOCK_ASSERT_OWNED(txq); /* pidx, avail */
2634
2635 /*
2636 * Do we have enough flits to send this frame out?
2637 */
2638 ctrl = sizeof (struct cpl_tx_pkt_core);
2639 if (txinfo->flags & HW_LSO) {
2640 nflits = TXPKT_LSO_WR_HDR;
2641 ctrl += sizeof (struct cpl_tx_pkt_lso_core);
2642 } else {
2643 nflits = TXPKT_WR_HDR;
2644 }
2645 if (txinfo->nsegs > 0)
2646 nflits += txinfo->nflits;
2647 else {
2648 nflits += howmany(txinfo->len, 8);
2649 ctrl += txinfo->len;
2650 }
2651 ndesc = howmany(nflits, 8);
2652 if (ndesc > eq->avail)
2653 return (ENOMEM);
2654
2655 /* Firmware work request header */
2656 wr = (void *)&eq->desc[eq->pidx];
2657 wr->op_immdlen = cpu_to_be32(V_FW_WR_OP(FW_ETH_TX_PKT_WR) |
2658 V_FW_WR_IMMDLEN(ctrl));
2659 ctrl = V_FW_WR_LEN16(howmany(nflits, 2));
2660 if (eq->avail == ndesc)
2661 ctrl |= F_FW_WR_EQUEQ | F_FW_WR_EQUIQ;
2662 wr->equiq_to_len16 = cpu_to_be32(ctrl);
2663 wr->r3 = 0;
2664
2665 if (txinfo->flags & HW_LSO &&
2666 (meoi->meoi_flags & MEOI_L4INFO_SET) != 0 &&
2667 meoi->meoi_l4proto == IPPROTO_TCP) {
2668 struct cpl_tx_pkt_lso_core *lso = (void *)(wr + 1);
2669
2670 ctrl = V_LSO_OPCODE((u32)CPL_TX_PKT_LSO) | F_LSO_FIRST_SLICE |
2671 F_LSO_LAST_SLICE;
2672
2673 if (meoi->meoi_l2hlen > sizeof (struct ether_header)) {
2674 /*
2675 * This presently assumes a standard VLAN header,
2676 * without support for Q-in-Q.
2677 */
2678 ctrl |= V_LSO_ETHHDR_LEN(1);
2679 }
2680
2681 switch (meoi->meoi_l3proto) {
2682 case ETHERTYPE_IPV6:
2683 ctrl |= F_LSO_IPV6;
2684 /* FALLTHROUGH */
2685 case ETHERTYPE_IP:
2686 ctrl |= V_LSO_IPHDR_LEN(meoi->meoi_l3hlen / 4);
2687 break;
2688 default:
2689 break;
2690 }
2691
2692 ctrl |= V_LSO_TCPHDR_LEN(meoi->meoi_l4hlen / 4);
2693
2694 lso->lso_ctrl = cpu_to_be32(ctrl);
2695 lso->ipid_ofst = cpu_to_be16(0);
2696 lso->mss = cpu_to_be16(txinfo->mss);
2697 lso->seqno_offset = cpu_to_be32(0);
2698 if (t4_cver_eq(pi->adapter, CHELSIO_T4))
2699 lso->len = cpu_to_be32(txinfo->len);
2700 else
2701 lso->len = cpu_to_be32(V_LSO_T5_XFER_SIZE(txinfo->len));
2702
2703 cpl = (void *)(lso + 1);
2704
2705 txq->tso_wrs++;
2706 } else {
2707 cpl = (void *)(wr + 1);
2708 }
2709
2710 /* Checksum offload */
2711 switch (csum_to_ctrl(txinfo,
2712 CHELSIO_CHIP_VERSION(pi->adapter->params.chip), &ctrl1)) {
2713 case COS_SUCCESS:
2714 txq->txcsum++;
2715 break;
2716 case COS_FAIL:
2717 /*
2718 * Packet will be going out with checksums which are probably
2719 * wrong but there is little we can do now.
2720 */
2721 txq->csum_failed++;
2722 break;
2723 default:
2724 break;
2725 }
2726
2727 /* CPL header */
2728 cpl->ctrl0 = cpu_to_be32(V_TXPKT_OPCODE(CPL_TX_PKT_XT) |
2729 V_TXPKT_INTF(pi->tx_chan) | V_TXPKT_PF(pi->adapter->pf));
2730 cpl->pack = 0;
2731 cpl->len = cpu_to_be16(txinfo->len);
2732 cpl->ctrl1 = cpu_to_be64(ctrl1);
2733
2734 /* Software descriptor */
2735 txsd = &txq->sdesc[eq->pidx];
2736 txsd->mp_head = txsd->mp_tail = m;
2737 txsd->txb_used = txinfo->txb_used;
2738 txsd->hdls_used = txinfo->hdls_used;
2739 txsd->desc_used = ndesc;
2740
2741 txq->txb_used += txinfo->txb_used / TXB_CHUNK;
2742 txq->hdl_used += txinfo->hdls_used;
2743
2744 t4_tx_incr_pending(txq, ndesc);
2745
2746 /* SGL */
2747 dst = (void *)(cpl + 1);
2748 if (txinfo->nsegs > 0) {
2749 txq->sgl_wrs++;
2750 copy_to_txd(eq, (void *)&txinfo->sgl, &dst, txinfo->nflits * 8);
2751
2752 /* Need to zero-pad to a 16 byte boundary if not on one */
2753 if ((uintptr_t)dst & 0xf)
2754 *(uint64_t *)dst = 0;
2755
2756 } else {
2757 txq->imm_wrs++;
2758 #ifdef DEBUG
2759 ctrl = txinfo->len;
2760 #endif
2761 for (; m; m = m->b_cont) {
2762 copy_to_txd(eq, (void *)m->b_rptr, &dst, MBLKL(m));
2763 #ifdef DEBUG
2764 ctrl -= MBLKL(m);
2765 #endif
2766 }
2767 ASSERT(ctrl == 0);
2768 }
2769
2770 txq->txpkt_wrs++;
2771 return (0);
2772 }
2773
2774 static void
t4_write_flush_wr(struct sge_txq * txq)2775 t4_write_flush_wr(struct sge_txq *txq)
2776 {
2777 struct sge_eq *eq = &txq->eq;
2778
2779 EQ_LOCK_ASSERT_OWNED(eq);
2780 ASSERT(eq->avail > 0);
2781
2782 const struct fw_eq_flush_wr wr = {
2783 .opcode = FW_EQ_FLUSH_WR,
2784 .equiq_to_len16 = BE_32(
2785 V_FW_WR_LEN16(sizeof (struct fw_eq_flush_wr) / 16) |
2786 F_FW_WR_EQUEQ | F_FW_WR_EQUIQ),
2787 };
2788 *(struct fw_eq_flush_wr *)&eq->desc[eq->pidx] = wr;
2789
2790 const struct tx_sdesc txsd = {
2791 .mp_head = NULL,
2792 .mp_tail = NULL,
2793 .txb_used = 0,
2794 .hdls_used = 0,
2795 .desc_used = 1,
2796 };
2797 txq->sdesc[eq->pidx] = txsd;
2798
2799 t4_tx_incr_pending(txq, 1);
2800 }
2801
2802 static inline void
write_ulp_cpl_sgl(struct port_info * pi,struct sge_txq * txq,struct txpkts * txpkts,struct txinfo * txinfo)2803 write_ulp_cpl_sgl(struct port_info *pi, struct sge_txq *txq,
2804 struct txpkts *txpkts, struct txinfo *txinfo)
2805 {
2806 struct ulp_txpkt *ulpmc;
2807 struct ulptx_idata *ulpsc;
2808 struct cpl_tx_pkt_core *cpl;
2809 uintptr_t flitp, start, end;
2810 uint64_t ctrl;
2811 caddr_t dst;
2812
2813 ASSERT(txpkts->npkt > 0);
2814
2815 start = (uintptr_t)txq->eq.desc;
2816 end = (uintptr_t)txq->eq.spg;
2817
2818 /* Checksum offload */
2819 switch (csum_to_ctrl(txinfo,
2820 CHELSIO_CHIP_VERSION(pi->adapter->params.chip), &ctrl)) {
2821 case COS_SUCCESS:
2822 txq->txcsum++;
2823 break;
2824 case COS_FAIL:
2825 /*
2826 * Packet will be going out with checksums which are probably
2827 * wrong but there is little we can do now.
2828 */
2829 txq->csum_failed++;
2830 break;
2831 default:
2832 break;
2833 }
2834
2835 /*
2836 * The previous packet's SGL must have ended at a 16 byte boundary (this
2837 * is required by the firmware/hardware). It follows that flitp cannot
2838 * wrap around between the ULPTX master command and ULPTX subcommand (8
2839 * bytes each), and that it can not wrap around in the middle of the
2840 * cpl_tx_pkt_core either.
2841 */
2842 flitp = (uintptr_t)txpkts->flitp;
2843 ASSERT((flitp & 0xf) == 0);
2844
2845 /* ULP master command */
2846 ulpmc = (void *)flitp;
2847 ulpmc->cmd_dest = htonl(V_ULPTX_CMD(ULP_TX_PKT) | V_ULP_TXPKT_DEST(0));
2848 ulpmc->len = htonl(howmany(sizeof (*ulpmc) + sizeof (*ulpsc) +
2849 sizeof (*cpl) + 8 * txinfo->nflits, 16));
2850
2851 /* ULP subcommand */
2852 ulpsc = (void *)(ulpmc + 1);
2853 ulpsc->cmd_more = cpu_to_be32(V_ULPTX_CMD((u32)ULP_TX_SC_IMM) |
2854 F_ULP_TX_SC_MORE);
2855 ulpsc->len = cpu_to_be32(sizeof (struct cpl_tx_pkt_core));
2856
2857 flitp += sizeof (*ulpmc) + sizeof (*ulpsc);
2858 if (flitp == end)
2859 flitp = start;
2860
2861 /* CPL_TX_PKT_XT */
2862 cpl = (void *)flitp;
2863 cpl->ctrl0 = cpu_to_be32(V_TXPKT_OPCODE(CPL_TX_PKT_XT) |
2864 V_TXPKT_INTF(pi->tx_chan) | V_TXPKT_PF(pi->adapter->pf));
2865 cpl->pack = 0;
2866 cpl->len = cpu_to_be16(txinfo->len);
2867 cpl->ctrl1 = cpu_to_be64(ctrl);
2868
2869 flitp += sizeof (*cpl);
2870 if (flitp == end)
2871 flitp = start;
2872
2873 /* SGL for this frame */
2874 dst = (caddr_t)flitp;
2875 copy_to_txd(&txq->eq, (void *)&txinfo->sgl, &dst, txinfo->nflits * 8);
2876 flitp = (uintptr_t)dst;
2877
2878 /* Zero pad and advance to a 16 byte boundary if not already at one. */
2879 if (flitp & 0xf) {
2880
2881 /* no matter what, flitp should be on an 8 byte boundary */
2882 ASSERT((flitp & 0x7) == 0);
2883
2884 *(uint64_t *)flitp = 0;
2885 flitp += sizeof (uint64_t);
2886 txpkts->nflits++;
2887 }
2888
2889 if (flitp == end)
2890 flitp = start;
2891
2892 txpkts->flitp = (void *)flitp;
2893 }
2894
2895 static inline void
copy_to_txd(struct sge_eq * eq,caddr_t from,caddr_t * to,int len)2896 copy_to_txd(struct sge_eq *eq, caddr_t from, caddr_t *to, int len)
2897 {
2898 if ((uintptr_t)(*to) + len <= (uintptr_t)eq->spg) {
2899 bcopy(from, *to, len);
2900 (*to) += len;
2901 } else {
2902 int portion = (uintptr_t)eq->spg - (uintptr_t)(*to);
2903
2904 bcopy(from, *to, portion);
2905 from += portion;
2906 portion = len - portion; /* remaining */
2907 bcopy(from, (void *)eq->desc, portion);
2908 (*to) = (caddr_t)eq->desc + portion;
2909 }
2910 }
2911
2912 static void
t4_tx_ring_db(struct sge_txq * txq)2913 t4_tx_ring_db(struct sge_txq *txq)
2914 {
2915 struct sge_eq *eq = &txq->eq;
2916 struct adapter *sc = txq->port->adapter;
2917 int val, db_mode;
2918 t4_doorbells_t db = eq->doorbells;
2919
2920 EQ_LOCK_ASSERT_OWNED(eq);
2921
2922 if (eq->pending > 1)
2923 db &= ~DOORBELL_WCWR;
2924
2925 if (eq->pending > eq->pidx) {
2926 int offset = eq->cap - (eq->pending - eq->pidx);
2927
2928 /* pidx has wrapped around since last doorbell */
2929
2930 (void) ddi_dma_sync(eq->desc_dhdl,
2931 offset * sizeof (struct tx_desc), 0,
2932 DDI_DMA_SYNC_FORDEV);
2933 (void) ddi_dma_sync(eq->desc_dhdl,
2934 0, eq->pidx * sizeof (struct tx_desc),
2935 DDI_DMA_SYNC_FORDEV);
2936 } else if (eq->pending > 0) {
2937 (void) ddi_dma_sync(eq->desc_dhdl,
2938 (eq->pidx - eq->pending) * sizeof (struct tx_desc),
2939 eq->pending * sizeof (struct tx_desc),
2940 DDI_DMA_SYNC_FORDEV);
2941 }
2942
2943 membar_producer();
2944
2945 if (t4_cver_eq(sc, CHELSIO_T4))
2946 val = V_PIDX(eq->pending);
2947 else
2948 val = V_PIDX_T5(eq->pending);
2949
2950 db_mode = (1 << (ffs(db) - 1));
2951 switch (db_mode) {
2952 case DOORBELL_WCWR: {
2953 /*
2954 * Queues whose 128B doorbell segment fits in
2955 * the page do not use relative qid
2956 * (udb_qid is always 0). Only queues with
2957 * doorbell segments can do WCWR.
2958 */
2959 ASSERT(eq->udb_qid == 0 && eq->pending == 1);
2960
2961 const uint_t desc_idx =
2962 eq->pidx != 0 ? eq->pidx - 1 : eq->cap - 1;
2963 uint64_t *src = (uint64_t *)&eq->desc[desc_idx];
2964 volatile uint64_t *dst =
2965 (uint64_t *)(eq->udb + UDBS_WR_OFFSET);
2966
2967 /* Copy the 8 flits of the TX descriptor to the DB */
2968 const uint_t flit_count =
2969 sizeof (struct tx_desc) / sizeof (uint64_t);
2970 for (uint_t i = 0; i < flit_count; i++) {
2971 /*
2972 * Perform the copy directly through the BAR
2973 * mapping, rather than using ddi_put64().
2974 *
2975 * The latter was found to impose a significant
2976 * performance burden when called in this loop.
2977 */
2978 dst[i] = src[i];
2979 }
2980
2981 membar_producer();
2982 break;
2983 }
2984
2985 case DOORBELL_UDB:
2986 case DOORBELL_UDBWC:
2987 ddi_put32(sc->bar2_hdl,
2988 (uint32_t *)(eq->udb + UDBS_DB_OFFSET),
2989 LE_32(V_QID(eq->udb_qid) | val));
2990 membar_producer();
2991 break;
2992
2993 case DOORBELL_KDB:
2994 t4_write_reg(sc, MYPF_REG(A_SGE_PF_KDOORBELL),
2995 V_QID(eq->cntxt_id) | val);
2996 break;
2997 }
2998
2999 eq->pending = 0;
3000 }
3001
3002 /*
3003 * Reclaim consumed descriptors from egress queue. This will be capped at an
3004 * upper bound of `howmany`. The corresponding mblks will be freed inline,
3005 * unless a non-NULL `defer_freemp` is provided, in which case the to-be-freed
3006 * mblk chain will be provided to the caller.
3007 *
3008 * Returns the number of descriptors which underwent reclamation.
3009 */
3010 static uint_t
t4_tx_reclaim_descs(struct sge_txq * txq,uint_t howmany,mblk_t ** defer_freemp)3011 t4_tx_reclaim_descs(struct sge_txq *txq, uint_t howmany, mblk_t **defer_freemp)
3012 {
3013 struct sge_eq *eq = &txq->eq;
3014
3015 EQ_LOCK_ASSERT_OWNED(eq);
3016
3017 const uint_t cur_cidx = BE_16(eq->spg->cidx);
3018 const uint_t reclaim_avail = (cur_cidx >= eq->cidx) ?
3019 (cur_cidx - eq->cidx) : (cur_cidx + eq->cap - eq->cidx);
3020
3021 if (reclaim_avail == 0) {
3022 return (0);
3023 }
3024
3025 uint_t txb_freed = 0, hdl_freed = 0, reclaimed = 0;
3026 do {
3027 struct tx_sdesc *txsd = &txq->sdesc[eq->cidx];
3028 const uint_t ndesc = txsd->desc_used;
3029
3030 /* Firmware doesn't return "partial" credits. */
3031 ASSERT3U(reclaimed + ndesc, <=, reclaim_avail);
3032
3033 if (txsd->mp_head != NULL) {
3034 /*
3035 * Even when packet content fits entirely in immediate
3036 * buffer, the mblk is kept around until the
3037 * transmission completes.
3038 */
3039 if (defer_freemp != NULL) {
3040 /*
3041 * Append the mblk chain from this descriptor
3042 * onto the end of the defer list.
3043 *
3044 * In the case that this is the first mblk we
3045 * have processed, the below assignment will
3046 * communicate the head of the chain to the
3047 * caller.
3048 */
3049 *defer_freemp = txsd->mp_head;
3050 defer_freemp = &txsd->mp_tail->b_next;
3051 } else {
3052 freemsgchain(txsd->mp_head);
3053 }
3054 txsd->mp_head = txsd->mp_tail = NULL;
3055 } else {
3056 /*
3057 * If mblk is NULL, this has to be the software
3058 * descriptor for a credit flush work request.
3059 */
3060 ASSERT0(txsd->txb_used);
3061 ASSERT0(txsd->hdls_used);
3062 ASSERT3U(ndesc, ==, 1);
3063 }
3064
3065 txb_freed += txsd->txb_used;
3066 hdl_freed += txsd->hdls_used;
3067 reclaimed += ndesc;
3068
3069 eq->cidx += ndesc;
3070 if (eq->cidx >= eq->cap) {
3071 eq->cidx -= eq->cap;
3072 }
3073 } while (reclaimed < reclaim_avail && reclaimed < howmany);
3074
3075 eq->avail += reclaimed;
3076 txq->txb_avail += txb_freed;
3077 txq->tx_dhdl_avail += hdl_freed;
3078
3079 ASSERT3U(eq->avail, <, eq->cap);
3080 ASSERT3U(txq->tx_dhdl_avail, <=, txq->tx_dhdl_total);
3081
3082 for (; hdl_freed; hdl_freed--) {
3083 (void) ddi_dma_unbind_handle(txq->tx_dhdl[txq->tx_dhdl_cidx]);
3084 if (++txq->tx_dhdl_cidx == txq->tx_dhdl_total)
3085 txq->tx_dhdl_cidx = 0;
3086 }
3087
3088 return (reclaimed);
3089 }
3090
3091 static int
t4_handle_cpl_msg(struct sge_iq * iq,const struct rss_header * rss,mblk_t * mp)3092 t4_handle_cpl_msg(struct sge_iq *iq, const struct rss_header *rss, mblk_t *mp)
3093 {
3094 const uint8_t opcode = rss->opcode;
3095
3096 DTRACE_PROBE4(t4__cpl_msg, struct sge_iq *, iq, uint8_t, opcode,
3097 const struct rss_header *, rss, mblk_t *, mp);
3098
3099 switch (opcode) {
3100 case CPL_FW4_MSG:
3101 case CPL_FW6_MSG:
3102 ASSERT3P(mp, ==, NULL);
3103 return (t4_handle_fw_msg(iq, rss));
3104 case CPL_SGE_EGR_UPDATE:
3105 ASSERT3P(mp, ==, NULL);
3106 t4_sge_egr_update(iq, rss);
3107 return (0);
3108 case CPL_RX_PKT:
3109 return (t4_eth_rx(iq, rss, mp));
3110 default:
3111 cxgb_printf(iq->adapter->dip, CE_WARN,
3112 "unhandled CPL opcode 0x%02x", opcode);
3113 if (mp != NULL) {
3114 freemsg(mp);
3115 }
3116 return (0);
3117 }
3118 }
3119
3120 static int
t4_handle_fw_msg(struct sge_iq * iq,const struct rss_header * rss)3121 t4_handle_fw_msg(struct sge_iq *iq, const struct rss_header *rss)
3122 {
3123 const struct cpl_fw6_msg *cpl = (const void *)(rss + 1);
3124 const uint8_t msg_type = cpl->type;
3125 const struct rss_header *rss2;
3126 struct adapter *sc = iq->adapter;
3127
3128 DTRACE_PROBE3(t4__fw_msg, struct sge_iq *, iq, uint8_t, msg_type,
3129 const struct rss_header *, rss);
3130
3131 switch (msg_type) {
3132 case FW_TYPE_RSSCPL: /* also synonym for FW6_TYPE_RSSCPL */
3133 rss2 = (const struct rss_header *)&cpl->data[0];
3134 return (t4_handle_cpl_msg(iq, rss2, NULL));
3135 case FW6_TYPE_CMD_RPL:
3136 return (t4_handle_fw_rpl(sc, &cpl->data[0]));
3137 default:
3138 cxgb_printf(sc->dip, CE_WARN,
3139 "unhandled fw_msg type 0x%02x", msg_type);
3140 return (0);
3141 }
3142 }
3143
3144 static int
t4_eth_rx(struct sge_iq * iq,const struct rss_header * rss,mblk_t * m)3145 t4_eth_rx(struct sge_iq *iq, const struct rss_header *rss, mblk_t *m)
3146 {
3147 bool csum_ok;
3148 uint16_t err_vec;
3149 struct sge_rxq *rxq = (void *)iq;
3150 struct mblk_pair chain = {0};
3151 struct adapter *sc = iq->adapter;
3152 const struct cpl_rx_pkt *cpl = t4_rss_payload(rss);
3153
3154 m->b_rptr += sc->sge.pktshift;
3155
3156 /* Compressed error vector is enabled for T6 only */
3157 if (sc->params.tp.rx_pkt_encap)
3158 /* It is enabled only in T6 config file */
3159 err_vec = G_T6_COMPR_RXERR_VEC(ntohs(cpl->err_vec));
3160 else
3161 err_vec = ntohs(cpl->err_vec);
3162
3163 csum_ok = cpl->csum_calc && !err_vec;
3164 /* TODO: what about cpl->ip_frag? */
3165 if (csum_ok && !cpl->ip_frag) {
3166 mac_hcksum_set(m, 0, 0, 0, 0xffff,
3167 HCK_FULLCKSUM_OK | HCK_FULLCKSUM |
3168 HCK_IPV4_HDRCKSUM_OK);
3169 rxq->rxcsum++;
3170 }
3171
3172 /* Add to the chain that we'll send up */
3173 if (chain.head != NULL)
3174 chain.tail->b_next = m;
3175 else
3176 chain.head = m;
3177 chain.tail = m;
3178
3179 t4_mac_rx(rxq->port, rxq, chain.head);
3180
3181 rxq->rxpkts++;
3182 rxq->rxbytes += be16_to_cpu(cpl->len);
3183 return (0);
3184 }
3185
3186 #define FL_HW_IDX(idx) ((idx) >> 3)
3187
3188 static inline void
ring_fl_db(struct adapter * sc,struct sge_fl * fl)3189 ring_fl_db(struct adapter *sc, struct sge_fl *fl)
3190 {
3191 int desc_start, desc_last, ndesc;
3192 uint32_t v = sc->params.arch.sge_fl_db;
3193
3194 ndesc = FL_HW_IDX(fl->pending);
3195
3196 /* Hold back one credit if pidx = cidx */
3197 if (FL_HW_IDX(fl->pidx) == FL_HW_IDX(fl->cidx))
3198 ndesc--;
3199
3200 /*
3201 * There are chances of ndesc modified above (to avoid pidx = cidx).
3202 * If there is nothing to post, return.
3203 */
3204 if (ndesc <= 0)
3205 return;
3206
3207 desc_last = FL_HW_IDX(fl->pidx);
3208
3209 if (fl->pidx < fl->pending) {
3210 /* There was a wrap */
3211 desc_start = FL_HW_IDX(fl->pidx + fl->cap - fl->pending);
3212
3213 /* From desc_start to the end of list */
3214 (void) ddi_dma_sync(fl->dhdl, desc_start * RX_FL_ESIZE, 0,
3215 DDI_DMA_SYNC_FORDEV);
3216
3217 /* From start of list to the desc_last */
3218 if (desc_last != 0)
3219 (void) ddi_dma_sync(fl->dhdl, 0, desc_last *
3220 RX_FL_ESIZE, DDI_DMA_SYNC_FORDEV);
3221 } else {
3222 /* There was no wrap, sync from start_desc to last_desc */
3223 desc_start = FL_HW_IDX(fl->pidx - fl->pending);
3224 (void) ddi_dma_sync(fl->dhdl, desc_start * RX_FL_ESIZE,
3225 ndesc * RX_FL_ESIZE, DDI_DMA_SYNC_FORDEV);
3226 }
3227
3228 if (t4_cver_eq(sc, CHELSIO_T4))
3229 v |= V_PIDX(ndesc);
3230 else
3231 v |= V_PIDX_T5(ndesc);
3232 v |= V_QID(fl->cntxt_id) | V_PIDX(ndesc);
3233
3234 membar_producer();
3235
3236 t4_write_reg(sc, MYPF_REG(A_SGE_PF_KDOORBELL), v);
3237
3238 /*
3239 * Update pending count:
3240 * Deduct the number of descriptors posted
3241 */
3242 fl->pending -= ndesc * 8;
3243 }
3244
3245 static void
t4_sge_egr_update(struct sge_iq * iq,const struct rss_header * rss)3246 t4_sge_egr_update(struct sge_iq *iq, const struct rss_header *rss)
3247 {
3248 struct adapter *sc = iq->adapter;
3249 const struct cpl_sge_egr_update *cpl = t4_rss_payload(rss);
3250 const uint_t qid = G_EGR_QID(BE_32(cpl->opcode_qid));
3251 struct sge_txq *txq = (struct sge_txq *)(*t4_eqmap_slot(sc, qid));
3252 struct sge_eq *eq = &txq->eq;
3253
3254 /*
3255 * Get a "live" snapshot of the flags and PIDX state from the TXQ.
3256 *
3257 * This is done without the protection of the TXQ/EQ lock, since the
3258 * gathered information is used to avoid contending on that lock for the
3259 * reclaim.
3260 */
3261 membar_consumer();
3262 const uint16_t live_pidx = BE_16(eq->pidx);
3263 const t4_eq_flags_t live_flags = eq->flags;
3264
3265 if ((live_flags & EQ_CORKED) == 0 &&
3266 (cpl->pidx != cpl->cidx || live_pidx != cpl->cidx)) {
3267 /*
3268 * A reclaim of the ring can be skipped if:
3269 *
3270 * 1. The EQ is not in the "corked" state, where it was unable
3271 * allocate descriptors (or memory) while attempting to place
3272 * a packet in the TXQ.
3273 *
3274 * 2. There are additional transmit descriptors in the EQ which
3275 * will trigger a subsequent SGE_EGR_UPDATE notification.
3276 *
3277 * When those conditions are met, it is safe to skip performing
3278 * a reclaim here, reducing the chance that we contend with
3279 * other transmission activity against the TXQ.
3280 */
3281 DTRACE_PROBE2(t4__elide__reclaim,
3282 struct sge_txq *, txq, struct cpl_sge_egr_update *, cpl);
3283 return;
3284 }
3285
3286 mblk_t *freemp = NULL;
3287 bool do_mac_update = false;
3288
3289 TXQ_LOCK(txq);
3290 (void) t4_tx_reclaim_descs(txq, eq->qsize, &freemp);
3291 if (eq->flags & EQ_CORKED && eq->avail != 0) {
3292 do_mac_update = true;
3293 eq->flags &= ~EQ_CORKED;
3294 }
3295 TXQ_UNLOCK(txq);
3296
3297 freemsgchain(freemp);
3298 if (do_mac_update) {
3299 t4_mac_tx_update(txq->port, txq);
3300 }
3301 }
3302
3303 #define KS_UINIT(x) kstat_named_init(&kstatp->x, #x, KSTAT_DATA_ULONG)
3304 #define KS_CINIT(x) kstat_named_init(&kstatp->x, #x, KSTAT_DATA_CHAR)
3305 #define KS_U_SET(x, y) kstatp->x.value.ul = (y)
3306 #define KS_U_FROM(x, y) kstatp->x.value.ul = (y)->x
3307 #define KS_C_SET(x, ...) \
3308 (void) snprintf(kstatp->x.value.c, 16, __VA_ARGS__)
3309
3310 /*
3311 * cxgbe:X:config
3312 */
3313 struct cxgbe_port_config_kstats {
3314 kstat_named_t idx;
3315 kstat_named_t nrxq;
3316 kstat_named_t ntxq;
3317 kstat_named_t first_rxq;
3318 kstat_named_t first_txq;
3319 kstat_named_t controller;
3320 kstat_named_t factory_mac_address;
3321 };
3322
3323 /*
3324 * cxgbe:X:info
3325 */
3326 struct cxgbe_port_info_kstats {
3327 kstat_named_t transceiver;
3328 kstat_named_t rx_ovflow0;
3329 kstat_named_t rx_ovflow1;
3330 kstat_named_t rx_ovflow2;
3331 kstat_named_t rx_ovflow3;
3332 kstat_named_t rx_trunc0;
3333 kstat_named_t rx_trunc1;
3334 kstat_named_t rx_trunc2;
3335 kstat_named_t rx_trunc3;
3336 kstat_named_t tx_pause;
3337 kstat_named_t rx_pause;
3338 };
3339
3340 static kstat_t *
setup_port_config_kstats(struct port_info * pi)3341 setup_port_config_kstats(struct port_info *pi)
3342 {
3343 kstat_t *ksp;
3344 struct cxgbe_port_config_kstats *kstatp;
3345 int ndata;
3346 dev_info_t *pdip = ddi_get_parent(pi->dip);
3347 uint8_t *ma = &pi->hw_addr[0];
3348
3349 ndata = sizeof (struct cxgbe_port_config_kstats) /
3350 sizeof (kstat_named_t);
3351
3352 ksp = kstat_create(T4_PORT_NAME, ddi_get_instance(pi->dip), "config",
3353 "net", KSTAT_TYPE_NAMED, ndata, 0);
3354 if (ksp == NULL) {
3355 cxgb_printf(pi->dip, CE_WARN, "failed to initialize kstats.");
3356 return (NULL);
3357 }
3358
3359 kstatp = (struct cxgbe_port_config_kstats *)ksp->ks_data;
3360
3361 KS_UINIT(idx);
3362 KS_UINIT(nrxq);
3363 KS_UINIT(ntxq);
3364 KS_UINIT(first_rxq);
3365 KS_UINIT(first_txq);
3366 KS_CINIT(controller);
3367 KS_CINIT(factory_mac_address);
3368
3369 KS_U_SET(idx, pi->port_id);
3370 KS_U_SET(nrxq, pi->nrxq);
3371 KS_U_SET(ntxq, pi->ntxq);
3372 KS_U_SET(first_rxq, pi->first_rxq);
3373 KS_U_SET(first_txq, pi->first_txq);
3374 KS_C_SET(controller, "%s%d", ddi_driver_name(pdip),
3375 ddi_get_instance(pdip));
3376 KS_C_SET(factory_mac_address, "%02X%02X%02X%02X%02X%02X",
3377 ma[0], ma[1], ma[2], ma[3], ma[4], ma[5]);
3378
3379 /* Do NOT set ksp->ks_update. These kstats do not change. */
3380
3381 /* Install the kstat */
3382 ksp->ks_private = (void *)pi;
3383 kstat_install(ksp);
3384
3385 return (ksp);
3386 }
3387
3388 static kstat_t *
setup_port_info_kstats(struct port_info * pi)3389 setup_port_info_kstats(struct port_info *pi)
3390 {
3391 kstat_t *ksp;
3392 struct cxgbe_port_info_kstats *kstatp;
3393 int ndata;
3394
3395 ndata = sizeof (struct cxgbe_port_info_kstats) / sizeof (kstat_named_t);
3396
3397 ksp = kstat_create(T4_PORT_NAME, ddi_get_instance(pi->dip), "info",
3398 "net", KSTAT_TYPE_NAMED, ndata, 0);
3399 if (ksp == NULL) {
3400 cxgb_printf(pi->dip, CE_WARN, "failed to initialize kstats.");
3401 return (NULL);
3402 }
3403
3404 kstatp = (struct cxgbe_port_info_kstats *)ksp->ks_data;
3405
3406 KS_CINIT(transceiver);
3407 KS_UINIT(rx_ovflow0);
3408 KS_UINIT(rx_ovflow1);
3409 KS_UINIT(rx_ovflow2);
3410 KS_UINIT(rx_ovflow3);
3411 KS_UINIT(rx_trunc0);
3412 KS_UINIT(rx_trunc1);
3413 KS_UINIT(rx_trunc2);
3414 KS_UINIT(rx_trunc3);
3415 KS_UINIT(tx_pause);
3416 KS_UINIT(rx_pause);
3417
3418 /* Install the kstat */
3419 ksp->ks_update = update_port_info_kstats;
3420 ksp->ks_private = (void *)pi;
3421 kstat_install(ksp);
3422
3423 return (ksp);
3424 }
3425
3426 static int
update_port_info_kstats(kstat_t * ksp,int rw)3427 update_port_info_kstats(kstat_t *ksp, int rw)
3428 {
3429 struct cxgbe_port_info_kstats *kstatp =
3430 (struct cxgbe_port_info_kstats *)ksp->ks_data;
3431 struct port_info *pi = ksp->ks_private;
3432 static const char *mod_str[] = { NULL, "LR", "SR", "ER", "TWINAX",
3433 "active TWINAX", "LRM" };
3434 uint32_t bgmap;
3435
3436 if (rw == KSTAT_WRITE)
3437 return (0);
3438
3439 if (pi->mod_type == FW_PORT_MOD_TYPE_NONE)
3440 KS_C_SET(transceiver, "unplugged");
3441 else if (pi->mod_type == FW_PORT_MOD_TYPE_UNKNOWN)
3442 KS_C_SET(transceiver, "unknown");
3443 else if (pi->mod_type == FW_PORT_MOD_TYPE_NOTSUPPORTED)
3444 KS_C_SET(transceiver, "unsupported");
3445 else if (pi->mod_type > 0 && pi->mod_type < ARRAY_SIZE(mod_str))
3446 KS_C_SET(transceiver, "%s", mod_str[pi->mod_type]);
3447 else
3448 KS_C_SET(transceiver, "type %d", pi->mod_type);
3449
3450 #define GET_STAT(name) t4_read_reg64(pi->adapter, \
3451 PORT_REG(pi->port_id, A_MPS_PORT_STAT_##name##_L))
3452 #define GET_STAT_COM(name) t4_read_reg64(pi->adapter, \
3453 A_MPS_STAT_##name##_L)
3454
3455 bgmap = G_NUMPORTS(t4_read_reg(pi->adapter, A_MPS_CMN_CTL));
3456 if (bgmap == 0)
3457 bgmap = (pi->port_id == 0) ? 0xf : 0;
3458 else if (bgmap == 1)
3459 bgmap = (pi->port_id < 2) ? (3 << (2 * pi->port_id)) : 0;
3460 else
3461 bgmap = 1;
3462
3463 KS_U_SET(rx_ovflow0, (bgmap & 1) ?
3464 GET_STAT_COM(RX_BG_0_MAC_DROP_FRAME) : 0);
3465 KS_U_SET(rx_ovflow1, (bgmap & 2) ?
3466 GET_STAT_COM(RX_BG_1_MAC_DROP_FRAME) : 0);
3467 KS_U_SET(rx_ovflow2, (bgmap & 4) ?
3468 GET_STAT_COM(RX_BG_2_MAC_DROP_FRAME) : 0);
3469 KS_U_SET(rx_ovflow3, (bgmap & 8) ?
3470 GET_STAT_COM(RX_BG_3_MAC_DROP_FRAME) : 0);
3471 KS_U_SET(rx_trunc0, (bgmap & 1) ?
3472 GET_STAT_COM(RX_BG_0_MAC_TRUNC_FRAME) : 0);
3473 KS_U_SET(rx_trunc1, (bgmap & 2) ?
3474 GET_STAT_COM(RX_BG_1_MAC_TRUNC_FRAME) : 0);
3475 KS_U_SET(rx_trunc2, (bgmap & 4) ?
3476 GET_STAT_COM(RX_BG_2_MAC_TRUNC_FRAME) : 0);
3477 KS_U_SET(rx_trunc3, (bgmap & 8) ?
3478 GET_STAT_COM(RX_BG_3_MAC_TRUNC_FRAME) : 0);
3479
3480 KS_U_SET(tx_pause, GET_STAT(TX_PORT_PAUSE));
3481 KS_U_SET(rx_pause, GET_STAT(RX_PORT_PAUSE));
3482
3483 return (0);
3484
3485 }
3486
3487 /*
3488 * cxgbe:X:rxqY
3489 */
3490 struct rxq_kstats {
3491 kstat_named_t rxcsum;
3492 kstat_named_t rxpkts;
3493 kstat_named_t rxbytes;
3494 kstat_named_t nomem;
3495 };
3496
3497 static kstat_t *
setup_rxq_kstats(struct port_info * pi,struct sge_rxq * rxq,int idx)3498 setup_rxq_kstats(struct port_info *pi, struct sge_rxq *rxq, int idx)
3499 {
3500 struct kstat *ksp;
3501 struct rxq_kstats *kstatp;
3502 int ndata;
3503 char str[16];
3504
3505 ndata = sizeof (struct rxq_kstats) / sizeof (kstat_named_t);
3506 (void) snprintf(str, sizeof (str), "rxq%u", idx);
3507
3508 ksp = kstat_create(T4_PORT_NAME, ddi_get_instance(pi->dip), str, "rxq",
3509 KSTAT_TYPE_NAMED, ndata, 0);
3510 if (ksp == NULL) {
3511 cxgb_printf(pi->dip, CE_WARN,
3512 "%s: failed to initialize rxq kstats for queue %d.",
3513 __func__, idx);
3514 return (NULL);
3515 }
3516
3517 kstatp = (struct rxq_kstats *)ksp->ks_data;
3518
3519 KS_UINIT(rxcsum);
3520 KS_UINIT(rxpkts);
3521 KS_UINIT(rxbytes);
3522 KS_UINIT(nomem);
3523
3524 ksp->ks_update = update_rxq_kstats;
3525 ksp->ks_private = (void *)rxq;
3526 kstat_install(ksp);
3527
3528 return (ksp);
3529 }
3530
3531 static int
update_rxq_kstats(kstat_t * ksp,int rw)3532 update_rxq_kstats(kstat_t *ksp, int rw)
3533 {
3534 struct rxq_kstats *kstatp = (struct rxq_kstats *)ksp->ks_data;
3535 struct sge_rxq *rxq = ksp->ks_private;
3536
3537 if (rw == KSTAT_WRITE)
3538 return (0);
3539
3540 KS_U_FROM(rxcsum, rxq);
3541 KS_U_FROM(rxpkts, rxq);
3542 KS_U_FROM(rxbytes, rxq);
3543 KS_U_FROM(nomem, rxq);
3544
3545 return (0);
3546 }
3547
3548 /*
3549 * cxgbe:X:txqY
3550 */
3551 struct txq_kstats {
3552 kstat_named_t txcsum;
3553 kstat_named_t tso_wrs;
3554 kstat_named_t imm_wrs;
3555 kstat_named_t sgl_wrs;
3556 kstat_named_t txpkt_wrs;
3557 kstat_named_t txpkts_wrs;
3558 kstat_named_t txpkts_pkts;
3559 kstat_named_t txb_used;
3560 kstat_named_t hdl_used;
3561 kstat_named_t txb_full;
3562 kstat_named_t dma_hdl_failed;
3563 kstat_named_t dma_map_failed;
3564 kstat_named_t qfull;
3565 kstat_named_t pullup_early;
3566 kstat_named_t pullup_late;
3567 kstat_named_t pullup_failed;
3568 kstat_named_t csum_failed;
3569 };
3570
3571 static kstat_t *
setup_txq_kstats(struct port_info * pi,struct sge_txq * txq,int idx)3572 setup_txq_kstats(struct port_info *pi, struct sge_txq *txq, int idx)
3573 {
3574 struct kstat *ksp;
3575 struct txq_kstats *kstatp;
3576 int ndata;
3577 char str[16];
3578
3579 ndata = sizeof (struct txq_kstats) / sizeof (kstat_named_t);
3580 (void) snprintf(str, sizeof (str), "txq%u", idx);
3581
3582 ksp = kstat_create(T4_PORT_NAME, ddi_get_instance(pi->dip), str, "txq",
3583 KSTAT_TYPE_NAMED, ndata, 0);
3584 if (ksp == NULL) {
3585 cxgb_printf(pi->dip, CE_WARN,
3586 "%s: failed to initialize txq kstats for queue %d.",
3587 __func__, idx);
3588 return (NULL);
3589 }
3590
3591 kstatp = (struct txq_kstats *)ksp->ks_data;
3592
3593 KS_UINIT(txcsum);
3594 KS_UINIT(tso_wrs);
3595 KS_UINIT(imm_wrs);
3596 KS_UINIT(sgl_wrs);
3597 KS_UINIT(txpkt_wrs);
3598 KS_UINIT(txpkts_wrs);
3599 KS_UINIT(txpkts_pkts);
3600 KS_UINIT(txb_used);
3601 KS_UINIT(hdl_used);
3602 KS_UINIT(txb_full);
3603 KS_UINIT(dma_hdl_failed);
3604 KS_UINIT(dma_map_failed);
3605 KS_UINIT(qfull);
3606 KS_UINIT(pullup_early);
3607 KS_UINIT(pullup_late);
3608 KS_UINIT(pullup_failed);
3609 KS_UINIT(csum_failed);
3610
3611 ksp->ks_update = update_txq_kstats;
3612 ksp->ks_private = (void *)txq;
3613 kstat_install(ksp);
3614
3615 return (ksp);
3616 }
3617
3618 static int
update_txq_kstats(kstat_t * ksp,int rw)3619 update_txq_kstats(kstat_t *ksp, int rw)
3620 {
3621 struct txq_kstats *kstatp = (struct txq_kstats *)ksp->ks_data;
3622 struct sge_txq *txq = ksp->ks_private;
3623
3624 if (rw == KSTAT_WRITE)
3625 return (0);
3626
3627 KS_U_FROM(txcsum, txq);
3628 KS_U_FROM(tso_wrs, txq);
3629 KS_U_FROM(imm_wrs, txq);
3630 KS_U_FROM(sgl_wrs, txq);
3631 KS_U_FROM(txpkt_wrs, txq);
3632 KS_U_FROM(txpkts_wrs, txq);
3633 KS_U_FROM(txpkts_pkts, txq);
3634 KS_U_FROM(txb_used, txq);
3635 KS_U_FROM(hdl_used, txq);
3636 KS_U_FROM(txb_full, txq);
3637 KS_U_FROM(dma_hdl_failed, txq);
3638 KS_U_FROM(dma_map_failed, txq);
3639 KS_U_FROM(qfull, txq);
3640 KS_U_FROM(pullup_early, txq);
3641 KS_U_FROM(pullup_late, txq);
3642 KS_U_FROM(pullup_failed, txq);
3643 KS_U_FROM(csum_failed, txq);
3644
3645 return (0);
3646 }
3647
3648 static int rxbuf_ctor(void *, void *, int);
3649 static void rxbuf_dtor(void *, void *);
3650
3651 static kmem_cache_t *
rxbuf_cache_create(struct rxbuf_cache_params * p)3652 rxbuf_cache_create(struct rxbuf_cache_params *p)
3653 {
3654 char name[32];
3655
3656 (void) snprintf(name, sizeof (name), "%s%d_rxbuf_cache",
3657 ddi_driver_name(p->dip), ddi_get_instance(p->dip));
3658
3659 return kmem_cache_create(name, sizeof (struct rxbuf), _CACHE_LINE_SIZE,
3660 rxbuf_ctor, rxbuf_dtor, NULL, p, NULL, 0);
3661 }
3662
3663 /*
3664 * If ref_cnt is more than 1 then those many calls to rxbuf_free will
3665 * have to be made before the rxb is released back to the kmem_cache.
3666 */
3667 static struct rxbuf *
rxbuf_alloc(kmem_cache_t * cache,int kmflags,uint_t ref_cnt)3668 rxbuf_alloc(kmem_cache_t *cache, int kmflags, uint_t ref_cnt)
3669 {
3670 struct rxbuf *rxb;
3671
3672 ASSERT(ref_cnt > 0);
3673
3674 rxb = kmem_cache_alloc(cache, kmflags);
3675 if (rxb != NULL) {
3676 rxb->ref_cnt = ref_cnt;
3677 rxb->cache = cache;
3678 }
3679
3680 return (rxb);
3681 }
3682
3683 /*
3684 * This is normally called via the rxb's freefunc, when an mblk referencing the
3685 * rxb is freed.
3686 */
3687 static void
rxbuf_free(struct rxbuf * rxb)3688 rxbuf_free(struct rxbuf *rxb)
3689 {
3690 if (atomic_dec_uint_nv(&rxb->ref_cnt) == 0)
3691 kmem_cache_free(rxb->cache, rxb);
3692 }
3693
3694 static int
rxbuf_ctor(void * arg1,void * arg2,int kmflag)3695 rxbuf_ctor(void *arg1, void *arg2, int kmflag)
3696 {
3697 struct rxbuf *rxb = arg1;
3698 struct rxbuf_cache_params *p = arg2;
3699 size_t real_len;
3700 ddi_dma_cookie_t cookie;
3701 uint_t ccount = 0;
3702 int (*callback)(caddr_t);
3703 int rc = ENOMEM;
3704
3705 if ((kmflag & KM_NOSLEEP) != 0)
3706 callback = DDI_DMA_DONTWAIT;
3707 else
3708 callback = DDI_DMA_SLEEP;
3709
3710 rc = ddi_dma_alloc_handle(p->dip, &p->dma_attr_rx, callback, 0,
3711 &rxb->dhdl);
3712 if (rc != DDI_SUCCESS)
3713 return (rc == DDI_DMA_BADATTR ? EINVAL : ENOMEM);
3714
3715 rc = ddi_dma_mem_alloc(rxb->dhdl, p->buf_size, &p->acc_attr_rx,
3716 DDI_DMA_STREAMING, callback, 0, &rxb->va, &real_len, &rxb->ahdl);
3717 if (rc != DDI_SUCCESS) {
3718 rc = ENOMEM;
3719 goto fail1;
3720 }
3721
3722 rc = ddi_dma_addr_bind_handle(rxb->dhdl, NULL, rxb->va, p->buf_size,
3723 DDI_DMA_READ | DDI_DMA_STREAMING, NULL, NULL, &cookie, &ccount);
3724 if (rc != DDI_DMA_MAPPED) {
3725 if (rc == DDI_DMA_INUSE)
3726 rc = EBUSY;
3727 else if (rc == DDI_DMA_TOOBIG)
3728 rc = E2BIG;
3729 else
3730 rc = ENOMEM;
3731 goto fail2;
3732 }
3733
3734 if (ccount != 1) {
3735 rc = E2BIG;
3736 goto fail3;
3737 }
3738
3739 rxb->ref_cnt = 0;
3740 rxb->buf_size = p->buf_size;
3741 rxb->freefunc.free_arg = (caddr_t)rxb;
3742 rxb->freefunc.free_func = rxbuf_free;
3743 rxb->ba = cookie.dmac_laddress;
3744
3745 return (0);
3746
3747 fail3: (void) ddi_dma_unbind_handle(rxb->dhdl);
3748 fail2: ddi_dma_mem_free(&rxb->ahdl);
3749 fail1: ddi_dma_free_handle(&rxb->dhdl);
3750 return (rc);
3751 }
3752
3753 static void
rxbuf_dtor(void * arg1,void * arg2)3754 rxbuf_dtor(void *arg1, void *arg2)
3755 {
3756 struct rxbuf *rxb = arg1;
3757
3758 (void) ddi_dma_unbind_handle(rxb->dhdl);
3759 ddi_dma_mem_free(&rxb->ahdl);
3760 ddi_dma_free_handle(&rxb->dhdl);
3761 }
3762