1 /*
2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
5 * 1.0 of the CDDL.
6 *
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
10 */
11
12 /*
13 * This file is part of the Chelsio T4 support code.
14 *
15 * Copyright (C) 2010-2013 Chelsio Communications. All rights reserved.
16 *
17 * This program is distributed in the hope that it will be useful, but WITHOUT
18 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
19 * FITNESS FOR A PARTICULAR PURPOSE. See the LICENSE file included in this
20 * release for licensing terms and conditions.
21 */
22
23 /*
24 * Copyright 2024 Oxide Computer Company
25 */
26
27 #include <sys/ddi.h>
28 #include <sys/sunddi.h>
29 #include <sys/sunndi.h>
30 #include <sys/atomic.h>
31 #include <sys/dlpi.h>
32 #include <sys/pattr.h>
33 #include <sys/strsubr.h>
34 #include <sys/stream.h>
35 #include <sys/strsun.h>
36 #include <inet/ip.h>
37 #include <inet/tcp.h>
38
39 #include "version.h"
40 #include "common/common.h"
41 #include "common/t4_msg.h"
42 #include "common/t4_regs.h"
43 #include "common/t4_regs_values.h"
44
45 /* TODO: Tune. */
46 int rx_buf_size = 8192;
47 int tx_copy_threshold = 256;
48 uint16_t rx_copy_threshold = 256;
49
50 /* Used to track coalesced tx work request */
51 struct txpkts {
52 mblk_t *tail; /* head is in the software descriptor */
53 uint64_t *flitp; /* ptr to flit where next pkt should start */
54 uint8_t npkt; /* # of packets in this work request */
55 uint8_t nflits; /* # of flits used by this work request */
56 uint16_t plen; /* total payload (sum of all packets) */
57 };
58
59 /* All information needed to tx a frame */
60 struct txinfo {
61 uint32_t len; /* Total length of frame */
62 uint32_t flags; /* Checksum and LSO flags */
63 uint32_t mss; /* MSS for LSO */
64 uint8_t nsegs; /* # of segments in the SGL, 0 means imm. tx */
65 uint8_t nflits; /* # of flits needed for the SGL */
66 uint8_t hdls_used; /* # of DMA handles used */
67 uint32_t txb_used; /* txb_space used */
68 mac_ether_offload_info_t meoi; /* pkt hdr info for offloads */
69 struct ulptx_sgl sgl __attribute__((aligned(8)));
70 struct ulptx_sge_pair reserved[TX_SGL_SEGS / 2];
71 };
72
73 static int service_iq(struct sge_iq *iq, int budget);
74 static inline void init_iq(struct sge_iq *iq, struct adapter *sc, int tmr_idx,
75 int8_t pktc_idx, int qsize, uint8_t esize);
76 static inline void init_fl(struct sge_fl *fl, uint16_t qsize);
77 static inline void init_eq(struct adapter *sc, struct sge_eq *eq,
78 uint16_t eqtype, uint16_t qsize,uint8_t tx_chan, uint16_t iqid);
79 static int alloc_iq_fl(struct port_info *pi, struct sge_iq *iq,
80 struct sge_fl *fl, int intr_idx, int cong);
81 static int free_iq_fl(struct port_info *pi, struct sge_iq *iq,
82 struct sge_fl *fl);
83 static int alloc_fwq(struct adapter *sc);
84 static int free_fwq(struct adapter *sc);
85 #ifdef TCP_OFFLOAD_ENABLE
86 static int alloc_mgmtq(struct adapter *sc);
87 #endif
88 static int alloc_rxq(struct port_info *pi, struct sge_rxq *rxq, int intr_idx,
89 int i);
90 static int free_rxq(struct port_info *pi, struct sge_rxq *rxq);
91 #ifdef TCP_OFFLOAD_ENABLE
92 static int alloc_ofld_rxq(struct port_info *pi, struct sge_ofld_rxq *ofld_rxq,
93 int intr_idx);
94 static int free_ofld_rxq(struct port_info *pi, struct sge_ofld_rxq *ofld_rxq);
95 #endif
96 static int ctrl_eq_alloc(struct adapter *sc, struct sge_eq *eq);
97 static int eth_eq_alloc(struct adapter *sc, struct port_info *pi,
98 struct sge_eq *eq);
99 #ifdef TCP_OFFLOAD_ENABLE
100 static int ofld_eq_alloc(struct adapter *sc, struct port_info *pi,
101 struct sge_eq *eq);
102 #endif
103 static int alloc_eq(struct adapter *sc, struct port_info *pi,
104 struct sge_eq *eq);
105 static int free_eq(struct adapter *sc, struct sge_eq *eq);
106 #ifdef TCP_OFFLOAD_ENABLE
107 static int alloc_wrq(struct adapter *sc, struct port_info *pi,
108 struct sge_wrq *wrq, int idx);
109 static int free_wrq(struct adapter *sc, struct sge_wrq *wrq);
110 #endif
111 static int alloc_txq(struct port_info *pi, struct sge_txq *txq, int idx);
112 static int free_txq(struct port_info *pi, struct sge_txq *txq);
113 static int alloc_dma_memory(struct adapter *sc, size_t len, int flags,
114 ddi_device_acc_attr_t *acc_attr, ddi_dma_attr_t *dma_attr,
115 ddi_dma_handle_t *dma_hdl, ddi_acc_handle_t *acc_hdl, uint64_t *pba,
116 caddr_t *pva);
117 static int free_dma_memory(ddi_dma_handle_t *dhdl, ddi_acc_handle_t *ahdl);
118 static int alloc_desc_ring(struct adapter *sc, size_t len, int rw,
119 ddi_dma_handle_t *dma_hdl, ddi_acc_handle_t *acc_hdl, uint64_t *pba,
120 caddr_t *pva);
121 static int free_desc_ring(ddi_dma_handle_t *dhdl, ddi_acc_handle_t *ahdl);
122 static int alloc_tx_copybuffer(struct adapter *sc, size_t len,
123 ddi_dma_handle_t *dma_hdl, ddi_acc_handle_t *acc_hdl, uint64_t *pba,
124 caddr_t *pva);
125 static inline bool is_new_response(const struct sge_iq *iq,
126 struct rsp_ctrl **ctrl);
127 static inline void iq_next(struct sge_iq *iq);
128 static int refill_fl(struct adapter *sc, struct sge_fl *fl, int nbufs);
129 static void refill_sfl(void *arg);
130 static void add_fl_to_sfl(struct adapter *sc, struct sge_fl *fl);
131 static void free_fl_bufs(struct sge_fl *fl);
132 static mblk_t *get_fl_payload(struct adapter *sc, struct sge_fl *fl,
133 uint32_t len_newbuf, int *fl_bufs_used);
134 static int get_frame_txinfo(struct sge_txq *txq, mblk_t **fp,
135 struct txinfo *txinfo, int sgl_only);
136 static inline int fits_in_txb(struct sge_txq *txq, int len, int *waste);
137 static inline int copy_into_txb(struct sge_txq *txq, mblk_t *m, int len,
138 struct txinfo *txinfo);
139 static inline void add_seg(struct txinfo *txinfo, uint64_t ba, uint32_t len);
140 static inline int add_mblk(struct sge_txq *txq, struct txinfo *txinfo,
141 mblk_t *m, int len);
142 static void free_txinfo_resources(struct sge_txq *txq, struct txinfo *txinfo);
143 static int add_to_txpkts(struct sge_txq *txq, struct txpkts *txpkts, mblk_t *m,
144 struct txinfo *txinfo);
145 static void write_txpkts_wr(struct sge_txq *txq, struct txpkts *txpkts);
146 static int write_txpkt_wr(struct port_info *pi, struct sge_txq *txq, mblk_t *m,
147 struct txinfo *txinfo);
148 static inline void write_ulp_cpl_sgl(struct port_info *pi, struct sge_txq *txq,
149 struct txpkts *txpkts, struct txinfo *txinfo);
150 static inline void copy_to_txd(struct sge_eq *eq, caddr_t from, caddr_t *to,
151 int len);
152 static inline void ring_tx_db(struct adapter *sc, struct sge_eq *eq);
153 static int reclaim_tx_descs(struct sge_txq *txq, int howmany);
154 static void write_txqflush_wr(struct sge_txq *txq);
155 static int t4_eth_rx(struct sge_iq *iq, const struct rss_header *rss,
156 mblk_t *m);
157 static inline void ring_fl_db(struct adapter *sc, struct sge_fl *fl);
158 static kstat_t *setup_port_config_kstats(struct port_info *pi);
159 static kstat_t *setup_port_info_kstats(struct port_info *pi);
160 static kstat_t *setup_rxq_kstats(struct port_info *pi, struct sge_rxq *rxq,
161 int idx);
162 static int update_rxq_kstats(kstat_t *ksp, int rw);
163 static int update_port_info_kstats(kstat_t *ksp, int rw);
164 static kstat_t *setup_txq_kstats(struct port_info *pi, struct sge_txq *txq,
165 int idx);
166 static int update_txq_kstats(kstat_t *ksp, int rw);
167 static int handle_sge_egr_update(struct sge_iq *, const struct rss_header *,
168 mblk_t *);
169 static int handle_fw_rpl(struct sge_iq *iq, const struct rss_header *rss,
170 mblk_t *m);
171
172 static inline int
reclaimable(struct sge_eq * eq)173 reclaimable(struct sge_eq *eq)
174 {
175 unsigned int cidx;
176
177 cidx = eq->spg->cidx; /* stable snapshot */
178 cidx = be16_to_cpu(cidx);
179
180 if (cidx >= eq->cidx)
181 return (cidx - eq->cidx);
182 else
183 return (cidx + eq->cap - eq->cidx);
184 }
185
186 void
t4_sge_init(struct adapter * sc)187 t4_sge_init(struct adapter *sc)
188 {
189 struct driver_properties *p = &sc->props;
190 ddi_dma_attr_t *dma_attr;
191 ddi_device_acc_attr_t *acc_attr;
192 uint32_t sge_control, sge_conm_ctrl;
193 int egress_threshold;
194
195 /*
196 * Device access and DMA attributes for descriptor rings
197 */
198 acc_attr = &sc->sge.acc_attr_desc;
199 acc_attr->devacc_attr_version = DDI_DEVICE_ATTR_V0;
200 acc_attr->devacc_attr_endian_flags = DDI_NEVERSWAP_ACC;
201 acc_attr->devacc_attr_dataorder = DDI_STRICTORDER_ACC;
202
203 dma_attr = &sc->sge.dma_attr_desc;
204 dma_attr->dma_attr_version = DMA_ATTR_V0;
205 dma_attr->dma_attr_addr_lo = 0;
206 dma_attr->dma_attr_addr_hi = UINT64_MAX;
207 dma_attr->dma_attr_count_max = UINT64_MAX;
208 dma_attr->dma_attr_align = 512;
209 dma_attr->dma_attr_burstsizes = 0xfff;
210 dma_attr->dma_attr_minxfer = 1;
211 dma_attr->dma_attr_maxxfer = UINT64_MAX;
212 dma_attr->dma_attr_seg = UINT64_MAX;
213 dma_attr->dma_attr_sgllen = 1;
214 dma_attr->dma_attr_granular = 1;
215 dma_attr->dma_attr_flags = 0;
216
217 /*
218 * Device access and DMA attributes for tx buffers
219 */
220 acc_attr = &sc->sge.acc_attr_tx;
221 acc_attr->devacc_attr_version = DDI_DEVICE_ATTR_V0;
222 acc_attr->devacc_attr_endian_flags = DDI_NEVERSWAP_ACC;
223
224 dma_attr = &sc->sge.dma_attr_tx;
225 dma_attr->dma_attr_version = DMA_ATTR_V0;
226 dma_attr->dma_attr_addr_lo = 0;
227 dma_attr->dma_attr_addr_hi = UINT64_MAX;
228 dma_attr->dma_attr_count_max = UINT64_MAX;
229 dma_attr->dma_attr_align = 1;
230 dma_attr->dma_attr_burstsizes = 0xfff;
231 dma_attr->dma_attr_minxfer = 1;
232 dma_attr->dma_attr_maxxfer = UINT64_MAX;
233 dma_attr->dma_attr_seg = UINT64_MAX;
234 dma_attr->dma_attr_sgllen = TX_SGL_SEGS;
235 dma_attr->dma_attr_granular = 1;
236 dma_attr->dma_attr_flags = 0;
237
238 /*
239 * Ingress Padding Boundary and Egress Status Page Size are set up by
240 * t4_fixup_host_params().
241 */
242 sge_control = t4_read_reg(sc, A_SGE_CONTROL);
243 sc->sge.pktshift = G_PKTSHIFT(sge_control);
244 sc->sge.stat_len = (sge_control & F_EGRSTATUSPAGESIZE) ? 128 : 64;
245
246 /* t4_nex uses FLM packed mode */
247 sc->sge.fl_align = t4_fl_pkt_align(sc, true);
248
249 /*
250 * Device access and DMA attributes for rx buffers
251 */
252 sc->sge.rxb_params.dip = sc->dip;
253 sc->sge.rxb_params.buf_size = rx_buf_size;
254
255 acc_attr = &sc->sge.rxb_params.acc_attr_rx;
256 acc_attr->devacc_attr_version = DDI_DEVICE_ATTR_V0;
257 acc_attr->devacc_attr_endian_flags = DDI_NEVERSWAP_ACC;
258
259 dma_attr = &sc->sge.rxb_params.dma_attr_rx;
260 dma_attr->dma_attr_version = DMA_ATTR_V0;
261 dma_attr->dma_attr_addr_lo = 0;
262 dma_attr->dma_attr_addr_hi = UINT64_MAX;
263 dma_attr->dma_attr_count_max = UINT64_MAX;
264 /*
265 * Low 4 bits of an rx buffer address have a special meaning to the SGE
266 * and an rx buf cannot have an address with any of these bits set.
267 * FL_ALIGN is >= 32 so we're sure things are ok.
268 */
269 dma_attr->dma_attr_align = sc->sge.fl_align;
270 dma_attr->dma_attr_burstsizes = 0xfff;
271 dma_attr->dma_attr_minxfer = 1;
272 dma_attr->dma_attr_maxxfer = UINT64_MAX;
273 dma_attr->dma_attr_seg = UINT64_MAX;
274 dma_attr->dma_attr_sgllen = 1;
275 dma_attr->dma_attr_granular = 1;
276 dma_attr->dma_attr_flags = 0;
277
278 sc->sge.rxbuf_cache = rxbuf_cache_create(&sc->sge.rxb_params);
279
280 /*
281 * A FL with <= fl_starve_thres buffers is starving and a periodic
282 * timer will attempt to refill it. This needs to be larger than the
283 * SGE's Egress Congestion Threshold. If it isn't, then we can get
284 * stuck waiting for new packets while the SGE is waiting for us to
285 * give it more Free List entries. (Note that the SGE's Egress
286 * Congestion Threshold is in units of 2 Free List pointers.) For T4,
287 * there was only a single field to control this. For T5 there's the
288 * original field which now only applies to Unpacked Mode Free List
289 * buffers and a new field which only applies to Packed Mode Free List
290 * buffers.
291 */
292
293 sge_conm_ctrl = t4_read_reg(sc, A_SGE_CONM_CTRL);
294 switch (CHELSIO_CHIP_VERSION(sc->params.chip)) {
295 case CHELSIO_T4:
296 egress_threshold = G_EGRTHRESHOLD(sge_conm_ctrl);
297 break;
298 case CHELSIO_T5:
299 egress_threshold = G_EGRTHRESHOLDPACKING(sge_conm_ctrl);
300 break;
301 case CHELSIO_T6:
302 default:
303 egress_threshold = G_T6_EGRTHRESHOLDPACKING(sge_conm_ctrl);
304 }
305 sc->sge.fl_starve_threshold = 2*egress_threshold + 1;
306
307 t4_write_reg(sc, A_SGE_FL_BUFFER_SIZE0, rx_buf_size);
308
309 t4_write_reg(sc, A_SGE_INGRESS_RX_THRESHOLD,
310 V_THRESHOLD_0(p->counter_val[0]) |
311 V_THRESHOLD_1(p->counter_val[1]) |
312 V_THRESHOLD_2(p->counter_val[2]) |
313 V_THRESHOLD_3(p->counter_val[3]));
314
315 t4_write_reg(sc, A_SGE_TIMER_VALUE_0_AND_1,
316 V_TIMERVALUE0(us_to_core_ticks(sc, p->timer_val[0])) |
317 V_TIMERVALUE1(us_to_core_ticks(sc, p->timer_val[1])));
318 t4_write_reg(sc, A_SGE_TIMER_VALUE_2_AND_3,
319 V_TIMERVALUE2(us_to_core_ticks(sc, p->timer_val[2])) |
320 V_TIMERVALUE3(us_to_core_ticks(sc, p->timer_val[3])));
321 t4_write_reg(sc, A_SGE_TIMER_VALUE_4_AND_5,
322 V_TIMERVALUE4(us_to_core_ticks(sc, p->timer_val[4])) |
323 V_TIMERVALUE5(us_to_core_ticks(sc, p->timer_val[5])));
324
325 (void) t4_register_cpl_handler(sc, CPL_FW4_MSG, handle_fw_rpl);
326 (void) t4_register_cpl_handler(sc, CPL_FW6_MSG, handle_fw_rpl);
327 (void) t4_register_cpl_handler(sc, CPL_SGE_EGR_UPDATE, handle_sge_egr_update);
328 (void) t4_register_cpl_handler(sc, CPL_RX_PKT, t4_eth_rx);
329 (void) t4_register_fw_msg_handler(sc, FW6_TYPE_CMD_RPL,
330 t4_handle_fw_rpl);
331 }
332
333 /*
334 * Allocate and initialize the firmware event queue and the forwarded interrupt
335 * queues, if any. The adapter owns all these queues as they are not associated
336 * with any particular port.
337 *
338 * Returns errno on failure. Resources allocated up to that point may still be
339 * allocated. Caller is responsible for cleanup in case this function fails.
340 */
341 int
t4_setup_adapter_queues(struct adapter * sc)342 t4_setup_adapter_queues(struct adapter *sc)
343 {
344 int rc;
345
346 ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
347
348 /*
349 * Firmware event queue
350 */
351 rc = alloc_fwq(sc);
352 if (rc != 0)
353 return (rc);
354
355 #ifdef TCP_OFFLOAD_ENABLE
356 /*
357 * Management queue. This is just a control queue that uses the fwq as
358 * its associated iq.
359 */
360 rc = alloc_mgmtq(sc);
361 #endif
362
363 return (rc);
364 }
365
366 /*
367 * Idempotent
368 */
369 int
t4_teardown_adapter_queues(struct adapter * sc)370 t4_teardown_adapter_queues(struct adapter *sc)
371 {
372
373 ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
374
375 (void) free_fwq(sc);
376
377 return (0);
378 }
379
380 static inline int
first_vector(struct port_info * pi)381 first_vector(struct port_info *pi)
382 {
383 struct adapter *sc = pi->adapter;
384 int rc = T4_EXTRA_INTR, i;
385
386 if (sc->intr_count == 1)
387 return (0);
388
389 for_each_port(sc, i) {
390 struct port_info *p = sc->port[i];
391
392 if (i == pi->port_id)
393 break;
394
395 #ifdef TCP_OFFLOAD_ENABLE
396 if (!(sc->flags & INTR_FWD))
397 rc += p->nrxq + p->nofldrxq;
398 else
399 rc += max(p->nrxq, p->nofldrxq);
400 #else
401 /*
402 * Not compiled with offload support and intr_count > 1. Only
403 * NIC queues exist and they'd better be taking direct
404 * interrupts.
405 */
406 ASSERT(!(sc->flags & INTR_FWD));
407 rc += p->nrxq;
408 #endif
409 }
410 return (rc);
411 }
412
413 /*
414 * Given an arbitrary "index," come up with an iq that can be used by other
415 * queues (of this port) for interrupt forwarding, SGE egress updates, etc.
416 * The iq returned is guaranteed to be something that takes direct interrupts.
417 */
418 static struct sge_iq *
port_intr_iq(struct port_info * pi,int idx)419 port_intr_iq(struct port_info *pi, int idx)
420 {
421 struct adapter *sc = pi->adapter;
422 struct sge *s = &sc->sge;
423 struct sge_iq *iq = NULL;
424
425 if (sc->intr_count == 1)
426 return (&sc->sge.fwq);
427
428 #ifdef TCP_OFFLOAD_ENABLE
429 if (!(sc->flags & INTR_FWD)) {
430 idx %= pi->nrxq + pi->nofldrxq;
431
432 if (idx >= pi->nrxq) {
433 idx -= pi->nrxq;
434 iq = &s->ofld_rxq[pi->first_ofld_rxq + idx].iq;
435 } else
436 iq = &s->rxq[pi->first_rxq + idx].iq;
437
438 } else {
439 idx %= max(pi->nrxq, pi->nofldrxq);
440
441 if (pi->nrxq >= pi->nofldrxq)
442 iq = &s->rxq[pi->first_rxq + idx].iq;
443 else
444 iq = &s->ofld_rxq[pi->first_ofld_rxq + idx].iq;
445 }
446 #else
447 /*
448 * Not compiled with offload support and intr_count > 1. Only NIC
449 * queues exist and they'd better be taking direct interrupts.
450 */
451 ASSERT(!(sc->flags & INTR_FWD));
452
453 idx %= pi->nrxq;
454 iq = &s->rxq[pi->first_rxq + idx].iq;
455 #endif
456
457 return (iq);
458 }
459
460 int
t4_setup_port_queues(struct port_info * pi)461 t4_setup_port_queues(struct port_info *pi)
462 {
463 int rc = 0, i, intr_idx, j;
464 struct sge_rxq *rxq;
465 struct sge_txq *txq;
466 #ifdef TCP_OFFLOAD_ENABLE
467 int iqid;
468 struct sge_wrq *ctrlq;
469 struct sge_ofld_rxq *ofld_rxq;
470 struct sge_wrq *ofld_txq;
471 #endif
472 struct adapter *sc = pi->adapter;
473 struct driver_properties *p = &sc->props;
474
475 pi->ksp_config = setup_port_config_kstats(pi);
476 pi->ksp_info = setup_port_info_kstats(pi);
477
478 /* Interrupt vector to start from (when using multiple vectors) */
479 intr_idx = first_vector(pi);
480
481 /*
482 * First pass over all rx queues (NIC and TOE):
483 * a) initialize iq and fl
484 * b) allocate queue iff it will take direct interrupts.
485 */
486
487 for_each_rxq(pi, i, rxq) {
488
489 init_iq(&rxq->iq, sc, pi->tmr_idx, pi->pktc_idx, p->qsize_rxq,
490 RX_IQ_ESIZE);
491
492 init_fl(&rxq->fl, p->qsize_rxq / 8); /* 8 bufs in each entry */
493
494 if ((!(sc->flags & INTR_FWD))
495 #ifdef TCP_OFFLOAD_ENABLE
496 || (sc->intr_count > 1 && pi->nrxq >= pi->nofldrxq)
497 #else
498 || (sc->intr_count > 1 && pi->nrxq)
499 #endif
500 ) {
501 rxq->iq.flags |= IQ_INTR;
502 rc = alloc_rxq(pi, rxq, intr_idx, i);
503 if (rc != 0)
504 goto done;
505 intr_idx++;
506 }
507
508 }
509
510 #ifdef TCP_OFFLOAD_ENABLE
511 for_each_ofld_rxq(pi, i, ofld_rxq) {
512
513 init_iq(&ofld_rxq->iq, sc, pi->tmr_idx, pi->pktc_idx,
514 p->qsize_rxq, RX_IQ_ESIZE);
515
516 init_fl(&ofld_rxq->fl, p->qsize_rxq / 8);
517
518 if (!(sc->flags & INTR_FWD) ||
519 (sc->intr_count > 1 && pi->nofldrxq > pi->nrxq)) {
520 ofld_rxq->iq.flags = IQ_INTR;
521 rc = alloc_ofld_rxq(pi, ofld_rxq, intr_idx);
522 if (rc != 0)
523 goto done;
524
525 intr_idx++;
526 }
527 }
528 #endif
529
530 /*
531 * Second pass over all rx queues (NIC and TOE). The queues forwarding
532 * their interrupts are allocated now.
533 */
534 j = 0;
535 for_each_rxq(pi, i, rxq) {
536 if (rxq->iq.flags & IQ_INTR)
537 continue;
538
539 intr_idx = port_intr_iq(pi, j)->abs_id;
540
541 rc = alloc_rxq(pi, rxq, intr_idx, i);
542 if (rc != 0)
543 goto done;
544 j++;
545 }
546
547 #ifdef TCP_OFFLOAD_ENABLE
548 for_each_ofld_rxq(pi, i, ofld_rxq) {
549 if (ofld_rxq->iq.flags & IQ_INTR)
550 continue;
551
552 intr_idx = port_intr_iq(pi, j)->abs_id;
553 rc = alloc_ofld_rxq(pi, ofld_rxq, intr_idx);
554 if (rc != 0)
555 goto done;
556 j++;
557 }
558 #endif
559 /*
560 * Now the tx queues. Only one pass needed.
561 */
562 j = 0;
563 for_each_txq(pi, i, txq) {
564 uint16_t iqid;
565
566 iqid = port_intr_iq(pi, j)->cntxt_id;
567 init_eq(sc, &txq->eq, EQ_ETH, p->qsize_txq, pi->tx_chan, iqid);
568 rc = alloc_txq(pi, txq, i);
569 if (rc != 0)
570 goto done;
571 }
572
573 #ifdef TCP_OFFLOAD_ENABLE
574 for_each_ofld_txq(pi, i, ofld_txq) {
575 uint16_t iqid;
576
577 iqid = port_intr_iq(pi, j)->cntxt_id;
578 init_eq(sc, &ofld_txq->eq, EQ_OFLD, p->qsize_txq, pi->tx_chan,
579 iqid);
580 rc = alloc_wrq(sc, pi, ofld_txq, i);
581 if (rc != 0)
582 goto done;
583 }
584
585 /*
586 * Finally, the control queue.
587 */
588 ctrlq = &sc->sge.ctrlq[pi->port_id];
589 iqid = port_intr_iq(pi, 0)->cntxt_id;
590 init_eq(sc, &ctrlq->eq, EQ_CTRL, CTRL_EQ_QSIZE, pi->tx_chan, iqid);
591 rc = alloc_wrq(sc, pi, ctrlq, 0);
592 #endif
593
594 done:
595 if (rc != 0)
596 (void) t4_teardown_port_queues(pi);
597
598 return (rc);
599 }
600
601 /*
602 * Idempotent
603 */
604 int
t4_teardown_port_queues(struct port_info * pi)605 t4_teardown_port_queues(struct port_info *pi)
606 {
607 int i;
608 struct sge_rxq *rxq;
609 struct sge_txq *txq;
610 #ifdef TCP_OFFLOAD_ENABLE
611 struct adapter *sc = pi->adapter;
612 struct sge_ofld_rxq *ofld_rxq;
613 struct sge_wrq *ofld_txq;
614 #endif
615
616 if (pi->ksp_config != NULL) {
617 kstat_delete(pi->ksp_config);
618 pi->ksp_config = NULL;
619 }
620 if (pi->ksp_info != NULL) {
621 kstat_delete(pi->ksp_info);
622 pi->ksp_info = NULL;
623 }
624
625 #ifdef TCP_OFFLOAD_ENABLE
626 (void) free_wrq(sc, &sc->sge.ctrlq[pi->port_id]);
627 #endif
628
629 for_each_txq(pi, i, txq) {
630 (void) free_txq(pi, txq);
631 }
632
633 #ifdef TCP_OFFLOAD_ENABLE
634 for_each_ofld_txq(pi, i, ofld_txq) {
635 (void) free_wrq(sc, ofld_txq);
636 }
637
638 for_each_ofld_rxq(pi, i, ofld_rxq) {
639 if ((ofld_rxq->iq.flags & IQ_INTR) == 0)
640 (void) free_ofld_rxq(pi, ofld_rxq);
641 }
642 #endif
643
644 for_each_rxq(pi, i, rxq) {
645 if ((rxq->iq.flags & IQ_INTR) == 0)
646 (void) free_rxq(pi, rxq);
647 }
648
649 /*
650 * Then take down the rx queues that take direct interrupts.
651 */
652
653 for_each_rxq(pi, i, rxq) {
654 if (rxq->iq.flags & IQ_INTR)
655 (void) free_rxq(pi, rxq);
656 }
657
658 #ifdef TCP_OFFLOAD_ENABLE
659 for_each_ofld_rxq(pi, i, ofld_rxq) {
660 if (ofld_rxq->iq.flags & IQ_INTR)
661 (void) free_ofld_rxq(pi, ofld_rxq);
662 }
663 #endif
664
665 return (0);
666 }
667
668 /* Deals with errors and forwarded interrupts */
669 uint_t
t4_intr_all(caddr_t arg1,caddr_t arg2)670 t4_intr_all(caddr_t arg1, caddr_t arg2)
671 {
672
673 (void) t4_intr_err(arg1, arg2);
674 (void) t4_intr(arg1, arg2);
675
676 return (DDI_INTR_CLAIMED);
677 }
678
679 static void
t4_intr_rx_work(struct sge_iq * iq)680 t4_intr_rx_work(struct sge_iq *iq)
681 {
682 mblk_t *mp = NULL;
683 struct sge_rxq *rxq = iq_to_rxq(iq); /* Use iff iq is part of rxq */
684 RXQ_LOCK(rxq);
685 if (!iq->polling) {
686 mp = t4_ring_rx(rxq, iq->qsize/8);
687 t4_write_reg(iq->adapter, MYPF_REG(A_SGE_PF_GTS),
688 V_INGRESSQID((u32)iq->cntxt_id) | V_SEINTARM(iq->intr_next));
689 }
690 RXQ_UNLOCK(rxq);
691 if (mp != NULL)
692 mac_rx_ring(rxq->port->mh, rxq->ring_handle, mp,
693 rxq->ring_gen_num);
694 }
695
696 /* Deals with interrupts on the given ingress queue */
697 /* ARGSUSED */
698 uint_t
t4_intr(caddr_t arg1,caddr_t arg2)699 t4_intr(caddr_t arg1, caddr_t arg2)
700 {
701 struct sge_iq *iq = (struct sge_iq *)arg2;
702 int state;
703
704 /* Right now receive polling is only enabled for MSI-X and
705 * when we have enough msi-x vectors i.e no interrupt forwarding.
706 */
707 if (iq->adapter->props.multi_rings) {
708 t4_intr_rx_work(iq);
709 } else {
710 state = atomic_cas_uint(&iq->state, IQS_IDLE, IQS_BUSY);
711 if (state == IQS_IDLE) {
712 (void) service_iq(iq, 0);
713 (void) atomic_cas_uint(&iq->state, IQS_BUSY, IQS_IDLE);
714 }
715 }
716 return (DDI_INTR_CLAIMED);
717 }
718
719 /* Deals with error interrupts */
720 /* ARGSUSED */
721 uint_t
t4_intr_err(caddr_t arg1,caddr_t arg2)722 t4_intr_err(caddr_t arg1, caddr_t arg2)
723 {
724 /* LINTED: E_BAD_PTR_CAST_ALIGN */
725 struct adapter *sc = (struct adapter *)arg1;
726
727 t4_write_reg(sc, MYPF_REG(A_PCIE_PF_CLI), 0);
728 (void) t4_slow_intr_handler(sc);
729
730 return (DDI_INTR_CLAIMED);
731 }
732
733 /*
734 * t4_ring_rx - Process responses from an SGE response queue.
735 *
736 * This function processes responses from an SGE response queue up to the supplied budget.
737 * Responses include received packets as well as control messages from FW
738 * or HW.
739 * It returns a chain of mblks containing the received data, to be
740 * passed up to mac_ring_rx().
741 */
742 mblk_t *
t4_ring_rx(struct sge_rxq * rxq,int budget)743 t4_ring_rx(struct sge_rxq *rxq, int budget)
744 {
745 struct sge_iq *iq = &rxq->iq;
746 struct sge_fl *fl = &rxq->fl; /* Use iff IQ_HAS_FL */
747 struct adapter *sc = iq->adapter;
748 struct rsp_ctrl *ctrl;
749 const struct rss_header *rss;
750 int ndescs = 0, fl_bufs_used = 0;
751 int rsp_type;
752 uint32_t lq;
753 mblk_t *mblk_head = NULL, **mblk_tail, *m;
754 struct cpl_rx_pkt *cpl;
755 uint32_t received_bytes = 0, pkt_len = 0;
756 bool csum_ok;
757 uint16_t err_vec;
758
759 mblk_tail = &mblk_head;
760
761 while (is_new_response(iq, &ctrl)) {
762
763 membar_consumer();
764
765 m = NULL;
766 rsp_type = G_RSPD_TYPE(ctrl->u.type_gen);
767 lq = be32_to_cpu(ctrl->pldbuflen_qid);
768 rss = (const void *)iq->cdesc;
769
770 switch (rsp_type) {
771 case X_RSPD_TYPE_FLBUF:
772
773 ASSERT(iq->flags & IQ_HAS_FL);
774
775 if (CPL_RX_PKT == rss->opcode) {
776 cpl = (void *)(rss + 1);
777 pkt_len = be16_to_cpu(cpl->len);
778
779 if (iq->polling && ((received_bytes + pkt_len) > budget))
780 goto done;
781
782 m = get_fl_payload(sc, fl, lq, &fl_bufs_used);
783 if (m == NULL)
784 goto done;
785
786 iq->intr_next = iq->intr_params;
787 m->b_rptr += sc->sge.pktshift;
788 if (sc->params.tp.rx_pkt_encap)
789 /* It is enabled only in T6 config file */
790 err_vec = G_T6_COMPR_RXERR_VEC(ntohs(cpl->err_vec));
791 else
792 err_vec = ntohs(cpl->err_vec);
793
794 csum_ok = cpl->csum_calc && !err_vec;
795
796 /* TODO: what about cpl->ip_frag? */
797 if (csum_ok && !cpl->ip_frag) {
798 mac_hcksum_set(m, 0, 0, 0, 0xffff,
799 HCK_FULLCKSUM_OK | HCK_FULLCKSUM |
800 HCK_IPV4_HDRCKSUM_OK);
801 rxq->rxcsum++;
802 }
803 rxq->rxpkts++;
804 rxq->rxbytes += pkt_len;
805 received_bytes += pkt_len;
806
807 *mblk_tail = m;
808 mblk_tail = &m->b_next;
809
810 break;
811 }
812
813 m = get_fl_payload(sc, fl, lq, &fl_bufs_used);
814 if (m == NULL)
815 goto done;
816 /* FALLTHROUGH */
817
818 case X_RSPD_TYPE_CPL:
819 ASSERT(rss->opcode < NUM_CPL_CMDS);
820 sc->cpl_handler[rss->opcode](iq, rss, m);
821 break;
822
823 default:
824 break;
825 }
826 iq_next(iq);
827 ++ndescs;
828 if (!iq->polling && (ndescs == budget))
829 break;
830 }
831
832 done:
833
834 t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS),
835 V_CIDXINC(ndescs) | V_INGRESSQID(iq->cntxt_id) |
836 V_SEINTARM(V_QINTR_TIMER_IDX(X_TIMERREG_UPDATE_CIDX)));
837
838 if ((fl_bufs_used > 0) || (iq->flags & IQ_HAS_FL)) {
839 int starved;
840 FL_LOCK(fl);
841 fl->needed += fl_bufs_used;
842 starved = refill_fl(sc, fl, fl->cap / 8);
843 FL_UNLOCK(fl);
844 if (starved)
845 add_fl_to_sfl(sc, fl);
846 }
847 return (mblk_head);
848 }
849
850 /*
851 * Deals with anything and everything on the given ingress queue.
852 */
853 static int
service_iq(struct sge_iq * iq,int budget)854 service_iq(struct sge_iq *iq, int budget)
855 {
856 struct sge_iq *q;
857 struct sge_rxq *rxq = iq_to_rxq(iq); /* Use iff iq is part of rxq */
858 struct sge_fl *fl = &rxq->fl; /* Use iff IQ_HAS_FL */
859 struct adapter *sc = iq->adapter;
860 struct rsp_ctrl *ctrl;
861 const struct rss_header *rss;
862 int ndescs = 0, limit, fl_bufs_used = 0;
863 int rsp_type;
864 uint32_t lq;
865 int starved;
866 mblk_t *m;
867 STAILQ_HEAD(, sge_iq) iql = STAILQ_HEAD_INITIALIZER(iql);
868
869 limit = budget ? budget : iq->qsize / 8;
870
871 /*
872 * We always come back and check the descriptor ring for new indirect
873 * interrupts and other responses after running a single handler.
874 */
875 for (;;) {
876 while (is_new_response(iq, &ctrl)) {
877
878 membar_consumer();
879
880 m = NULL;
881 rsp_type = G_RSPD_TYPE(ctrl->u.type_gen);
882 lq = be32_to_cpu(ctrl->pldbuflen_qid);
883 rss = (const void *)iq->cdesc;
884
885 switch (rsp_type) {
886 case X_RSPD_TYPE_FLBUF:
887
888 ASSERT(iq->flags & IQ_HAS_FL);
889
890 m = get_fl_payload(sc, fl, lq, &fl_bufs_used);
891 if (m == NULL) {
892 /*
893 * Rearm the iq with a
894 * longer-than-default timer
895 */
896 t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS), V_CIDXINC(ndescs) |
897 V_INGRESSQID((u32)iq->cntxt_id) |
898 V_SEINTARM(V_QINTR_TIMER_IDX(SGE_NTIMERS-1)));
899 if (fl_bufs_used > 0) {
900 ASSERT(iq->flags & IQ_HAS_FL);
901 FL_LOCK(fl);
902 fl->needed += fl_bufs_used;
903 starved = refill_fl(sc, fl, fl->cap / 8);
904 FL_UNLOCK(fl);
905 if (starved)
906 add_fl_to_sfl(sc, fl);
907 }
908 return (0);
909 }
910
911 /* FALLTHRU */
912 case X_RSPD_TYPE_CPL:
913
914 ASSERT(rss->opcode < NUM_CPL_CMDS);
915 sc->cpl_handler[rss->opcode](iq, rss, m);
916 break;
917
918 case X_RSPD_TYPE_INTR:
919
920 /*
921 * Interrupts should be forwarded only to queues
922 * that are not forwarding their interrupts.
923 * This means service_iq can recurse but only 1
924 * level deep.
925 */
926 ASSERT(budget == 0);
927
928 q = sc->sge.iqmap[lq - sc->sge.iq_start];
929 if (atomic_cas_uint(&q->state, IQS_IDLE,
930 IQS_BUSY) == IQS_IDLE) {
931 if (service_iq(q, q->qsize / 8) == 0) {
932 (void) atomic_cas_uint(
933 &q->state, IQS_BUSY,
934 IQS_IDLE);
935 } else {
936 STAILQ_INSERT_TAIL(&iql, q,
937 link);
938 }
939 }
940 break;
941
942 default:
943 break;
944 }
945
946 iq_next(iq);
947 if (++ndescs == limit) {
948 t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS),
949 V_CIDXINC(ndescs) |
950 V_INGRESSQID(iq->cntxt_id) |
951 V_SEINTARM(V_QINTR_TIMER_IDX(
952 X_TIMERREG_UPDATE_CIDX)));
953 ndescs = 0;
954
955 if (fl_bufs_used > 0) {
956 ASSERT(iq->flags & IQ_HAS_FL);
957 FL_LOCK(fl);
958 fl->needed += fl_bufs_used;
959 (void) refill_fl(sc, fl, fl->cap / 8);
960 FL_UNLOCK(fl);
961 fl_bufs_used = 0;
962 }
963
964 if (budget != 0)
965 return (EINPROGRESS);
966 }
967 }
968
969 if (STAILQ_EMPTY(&iql) != 0)
970 break;
971
972 /*
973 * Process the head only, and send it to the back of the list if
974 * it's still not done.
975 */
976 q = STAILQ_FIRST(&iql);
977 STAILQ_REMOVE_HEAD(&iql, link);
978 if (service_iq(q, q->qsize / 8) == 0)
979 (void) atomic_cas_uint(&q->state, IQS_BUSY, IQS_IDLE);
980 else
981 STAILQ_INSERT_TAIL(&iql, q, link);
982 }
983
984 t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS), V_CIDXINC(ndescs) |
985 V_INGRESSQID((u32)iq->cntxt_id) | V_SEINTARM(iq->intr_next));
986
987 if (iq->flags & IQ_HAS_FL) {
988
989 FL_LOCK(fl);
990 fl->needed += fl_bufs_used;
991 starved = refill_fl(sc, fl, fl->cap / 4);
992 FL_UNLOCK(fl);
993 if (starved != 0)
994 add_fl_to_sfl(sc, fl);
995 }
996
997 return (0);
998 }
999
1000 #ifdef TCP_OFFLOAD_ENABLE
1001 int
t4_mgmt_tx(struct adapter * sc,mblk_t * m)1002 t4_mgmt_tx(struct adapter *sc, mblk_t *m)
1003 {
1004 return (t4_wrq_tx(sc, &sc->sge.mgmtq, m));
1005 }
1006
1007 /*
1008 * Doesn't fail. Holds on to work requests it can't send right away.
1009 */
1010 int
t4_wrq_tx_locked(struct adapter * sc,struct sge_wrq * wrq,mblk_t * m0)1011 t4_wrq_tx_locked(struct adapter *sc, struct sge_wrq *wrq, mblk_t *m0)
1012 {
1013 struct sge_eq *eq = &wrq->eq;
1014 struct mblk_pair *wr_list = &wrq->wr_list;
1015 int can_reclaim;
1016 caddr_t dst;
1017 mblk_t *wr, *next;
1018
1019 TXQ_LOCK_ASSERT_OWNED(wrq);
1020 #ifdef TCP_OFFLOAD_ENABLE
1021 ASSERT((eq->flags & EQ_TYPEMASK) == EQ_OFLD ||
1022 (eq->flags & EQ_TYPEMASK) == EQ_CTRL);
1023 #else
1024 ASSERT((eq->flags & EQ_TYPEMASK) == EQ_CTRL);
1025 #endif
1026
1027 if (m0 != NULL) {
1028 if (wr_list->head != NULL)
1029 wr_list->tail->b_next = m0;
1030 else
1031 wr_list->head = m0;
1032 while (m0->b_next)
1033 m0 = m0->b_next;
1034 wr_list->tail = m0;
1035 }
1036
1037 can_reclaim = reclaimable(eq);
1038 eq->cidx += can_reclaim;
1039 eq->avail += can_reclaim;
1040 if (eq->cidx >= eq->cap)
1041 eq->cidx -= eq->cap;
1042
1043 for (wr = wr_list->head; wr; wr = next) {
1044 int ndesc, len = 0;
1045 mblk_t *m;
1046
1047 next = wr->b_next;
1048 wr->b_next = NULL;
1049
1050 for (m = wr; m; m = m->b_cont)
1051 len += MBLKL(m);
1052
1053 ASSERT(len > 0 && (len & 0x7) == 0);
1054 ASSERT(len <= SGE_MAX_WR_LEN);
1055
1056 ndesc = howmany(len, EQ_ESIZE);
1057 if (eq->avail < ndesc) {
1058 wr->b_next = next;
1059 wrq->no_desc++;
1060 break;
1061 }
1062
1063 dst = (void *)&eq->desc[eq->pidx];
1064 for (m = wr; m; m = m->b_cont)
1065 copy_to_txd(eq, (void *)m->b_rptr, &dst, MBLKL(m));
1066
1067 eq->pidx += ndesc;
1068 eq->avail -= ndesc;
1069 if (eq->pidx >= eq->cap)
1070 eq->pidx -= eq->cap;
1071
1072 eq->pending += ndesc;
1073 if (eq->pending > 16)
1074 ring_tx_db(sc, eq);
1075
1076 wrq->tx_wrs++;
1077 freemsg(wr);
1078
1079 if (eq->avail < 8) {
1080 can_reclaim = reclaimable(eq);
1081 eq->cidx += can_reclaim;
1082 eq->avail += can_reclaim;
1083 if (eq->cidx >= eq->cap)
1084 eq->cidx -= eq->cap;
1085 }
1086 }
1087
1088 if (eq->pending != 0)
1089 ring_tx_db(sc, eq);
1090
1091 if (wr == NULL)
1092 wr_list->head = wr_list->tail = NULL;
1093 else {
1094 wr_list->head = wr;
1095
1096 ASSERT(wr_list->tail->b_next == NULL);
1097 }
1098
1099 return (0);
1100 }
1101 #endif
1102
1103 /* Per-packet header in a coalesced tx WR, before the SGL starts (in flits) */
1104 #define TXPKTS_PKT_HDR ((\
1105 sizeof (struct ulp_txpkt) + \
1106 sizeof (struct ulptx_idata) + \
1107 sizeof (struct cpl_tx_pkt_core)) / 8)
1108
1109 /* Header of a coalesced tx WR, before SGL of first packet (in flits) */
1110 #define TXPKTS_WR_HDR (\
1111 sizeof (struct fw_eth_tx_pkts_wr) / 8 + \
1112 TXPKTS_PKT_HDR)
1113
1114 /* Header of a tx WR, before SGL of first packet (in flits) */
1115 #define TXPKT_WR_HDR ((\
1116 sizeof (struct fw_eth_tx_pkt_wr) + \
1117 sizeof (struct cpl_tx_pkt_core)) / 8)
1118
1119 /* Header of a tx LSO WR, before SGL of first packet (in flits) */
1120 #define TXPKT_LSO_WR_HDR ((\
1121 sizeof (struct fw_eth_tx_pkt_wr) + \
1122 sizeof(struct cpl_tx_pkt_lso_core) + \
1123 sizeof (struct cpl_tx_pkt_core)) / 8)
1124
1125 mblk_t *
t4_eth_tx(void * arg,mblk_t * frame)1126 t4_eth_tx(void *arg, mblk_t *frame)
1127 {
1128 struct sge_txq *txq = (struct sge_txq *) arg;
1129 struct port_info *pi = txq->port;
1130 struct adapter *sc = pi->adapter;
1131 struct sge_eq *eq = &txq->eq;
1132 mblk_t *next_frame;
1133 int rc, coalescing;
1134 struct txpkts txpkts;
1135 struct txinfo txinfo;
1136
1137 txpkts.npkt = 0; /* indicates there's nothing in txpkts */
1138 coalescing = 0;
1139
1140 TXQ_LOCK(txq);
1141 if (eq->avail < 8)
1142 (void) reclaim_tx_descs(txq, 8);
1143 for (; frame; frame = next_frame) {
1144
1145 if (eq->avail < 8)
1146 break;
1147
1148 next_frame = frame->b_next;
1149 frame->b_next = NULL;
1150
1151 if (next_frame != NULL)
1152 coalescing = 1;
1153
1154 rc = get_frame_txinfo(txq, &frame, &txinfo, coalescing);
1155 if (rc != 0) {
1156 if (rc == ENOMEM) {
1157
1158 /* Short of resources, suspend tx */
1159
1160 frame->b_next = next_frame;
1161 break;
1162 }
1163
1164 /*
1165 * Unrecoverable error for this frame, throw it
1166 * away and move on to the next.
1167 */
1168
1169 freemsg(frame);
1170 continue;
1171 }
1172
1173 if (coalescing != 0 &&
1174 add_to_txpkts(txq, &txpkts, frame, &txinfo) == 0) {
1175
1176 /* Successfully absorbed into txpkts */
1177
1178 write_ulp_cpl_sgl(pi, txq, &txpkts, &txinfo);
1179 goto doorbell;
1180 }
1181
1182 /*
1183 * We weren't coalescing to begin with, or current frame could
1184 * not be coalesced (add_to_txpkts flushes txpkts if a frame
1185 * given to it can't be coalesced). Either way there should be
1186 * nothing in txpkts.
1187 */
1188 ASSERT(txpkts.npkt == 0);
1189
1190 /* We're sending out individual frames now */
1191 coalescing = 0;
1192
1193 if (eq->avail < 8)
1194 (void) reclaim_tx_descs(txq, 8);
1195 rc = write_txpkt_wr(pi, txq, frame, &txinfo);
1196 if (rc != 0) {
1197
1198 /* Short of hardware descriptors, suspend tx */
1199
1200 /*
1201 * This is an unlikely but expensive failure. We've
1202 * done all the hard work (DMA bindings etc.) and now we
1203 * can't send out the frame. What's worse, we have to
1204 * spend even more time freeing up everything in txinfo.
1205 */
1206 txq->qfull++;
1207 free_txinfo_resources(txq, &txinfo);
1208
1209 frame->b_next = next_frame;
1210 break;
1211 }
1212
1213 doorbell:
1214 /* Fewer and fewer doorbells as the queue fills up */
1215 if (eq->pending >= (1 << (fls(eq->qsize - eq->avail) / 2))) {
1216 txq->txbytes += txinfo.len;
1217 txq->txpkts++;
1218 ring_tx_db(sc, eq);
1219 }
1220 (void) reclaim_tx_descs(txq, 32);
1221 }
1222
1223 if (txpkts.npkt > 0)
1224 write_txpkts_wr(txq, &txpkts);
1225
1226 /*
1227 * frame not NULL means there was an error but we haven't thrown it
1228 * away. This can happen when we're short of tx descriptors (qfull) or
1229 * maybe even DMA handles (dma_hdl_failed). Either way, a credit flush
1230 * and reclaim will get things going again.
1231 *
1232 * If eq->avail is already 0 we know a credit flush was requested in the
1233 * WR that reduced it to 0 so we don't need another flush (we don't have
1234 * any descriptor for a flush WR anyway, duh).
1235 */
1236 if (frame && eq->avail > 0)
1237 write_txqflush_wr(txq);
1238
1239 if (eq->pending != 0)
1240 ring_tx_db(sc, eq);
1241
1242 (void) reclaim_tx_descs(txq, eq->qsize);
1243 TXQ_UNLOCK(txq);
1244
1245 return (frame);
1246 }
1247
1248 static inline void
init_iq(struct sge_iq * iq,struct adapter * sc,int tmr_idx,int8_t pktc_idx,int qsize,uint8_t esize)1249 init_iq(struct sge_iq *iq, struct adapter *sc, int tmr_idx, int8_t pktc_idx,
1250 int qsize, uint8_t esize)
1251 {
1252 ASSERT(tmr_idx >= 0 && tmr_idx < SGE_NTIMERS);
1253 ASSERT(pktc_idx < SGE_NCOUNTERS); /* -ve is ok, means don't use */
1254
1255 iq->flags = 0;
1256 iq->adapter = sc;
1257 iq->intr_params = V_QINTR_TIMER_IDX(tmr_idx);
1258 iq->intr_pktc_idx = SGE_NCOUNTERS - 1;
1259 if (pktc_idx >= 0) {
1260 iq->intr_params |= F_QINTR_CNT_EN;
1261 iq->intr_pktc_idx = pktc_idx;
1262 }
1263 iq->qsize = roundup(qsize, 16); /* See FW_IQ_CMD/iqsize */
1264 iq->esize = max(esize, 16); /* See FW_IQ_CMD/iqesize */
1265 }
1266
1267 static inline void
init_fl(struct sge_fl * fl,uint16_t qsize)1268 init_fl(struct sge_fl *fl, uint16_t qsize)
1269 {
1270
1271 fl->qsize = qsize;
1272 fl->allocb_fail = 0;
1273 }
1274
1275 static inline void
init_eq(struct adapter * sc,struct sge_eq * eq,uint16_t eqtype,uint16_t qsize,uint8_t tx_chan,uint16_t iqid)1276 init_eq(struct adapter *sc, struct sge_eq *eq, uint16_t eqtype, uint16_t qsize,
1277 uint8_t tx_chan, uint16_t iqid)
1278 {
1279 struct sge *s = &sc->sge;
1280 uint32_t r;
1281
1282 ASSERT(tx_chan < NCHAN);
1283 ASSERT(eqtype <= EQ_TYPEMASK);
1284
1285 if (is_t5(sc->params.chip)) {
1286 r = t4_read_reg(sc, A_SGE_EGRESS_QUEUES_PER_PAGE_PF);
1287 r >>= S_QUEUESPERPAGEPF0 +
1288 (S_QUEUESPERPAGEPF1 - S_QUEUESPERPAGEPF0) * sc->pf;
1289 s->s_qpp = r & M_QUEUESPERPAGEPF0;
1290 }
1291
1292 eq->flags = eqtype & EQ_TYPEMASK;
1293 eq->tx_chan = tx_chan;
1294 eq->iqid = iqid;
1295 eq->qsize = qsize;
1296 }
1297
1298 /*
1299 * Allocates the ring for an ingress queue and an optional freelist. If the
1300 * freelist is specified it will be allocated and then associated with the
1301 * ingress queue.
1302 *
1303 * Returns errno on failure. Resources allocated up to that point may still be
1304 * allocated. Caller is responsible for cleanup in case this function fails.
1305 *
1306 * If the ingress queue will take interrupts directly (iq->flags & IQ_INTR) then
1307 * the intr_idx specifies the vector, starting from 0. Otherwise it specifies
1308 * the index of the queue to which its interrupts will be forwarded.
1309 */
1310 static int
alloc_iq_fl(struct port_info * pi,struct sge_iq * iq,struct sge_fl * fl,int intr_idx,int cong)1311 alloc_iq_fl(struct port_info *pi, struct sge_iq *iq, struct sge_fl *fl,
1312 int intr_idx, int cong)
1313 {
1314 int rc, i, cntxt_id;
1315 size_t len;
1316 struct fw_iq_cmd c;
1317 struct adapter *sc = iq->adapter;
1318 uint32_t v = 0;
1319
1320 len = iq->qsize * iq->esize;
1321 rc = alloc_desc_ring(sc, len, DDI_DMA_READ, &iq->dhdl, &iq->ahdl,
1322 &iq->ba, (caddr_t *)&iq->desc);
1323 if (rc != 0)
1324 return (rc);
1325
1326 bzero(&c, sizeof (c));
1327 c.op_to_vfn = cpu_to_be32(V_FW_CMD_OP(FW_IQ_CMD) | F_FW_CMD_REQUEST |
1328 F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_IQ_CMD_PFN(sc->pf) |
1329 V_FW_IQ_CMD_VFN(0));
1330
1331 c.alloc_to_len16 = cpu_to_be32(F_FW_IQ_CMD_ALLOC | F_FW_IQ_CMD_IQSTART |
1332 FW_LEN16(c));
1333
1334 /* Special handling for firmware event queue */
1335 if (iq == &sc->sge.fwq)
1336 v |= F_FW_IQ_CMD_IQASYNCH;
1337
1338 if (iq->flags & IQ_INTR)
1339 ASSERT(intr_idx < sc->intr_count);
1340 else
1341 v |= F_FW_IQ_CMD_IQANDST;
1342 v |= V_FW_IQ_CMD_IQANDSTINDEX(intr_idx);
1343
1344 c.type_to_iqandstindex = cpu_to_be32(v |
1345 V_FW_IQ_CMD_TYPE(FW_IQ_TYPE_FL_INT_CAP) |
1346 V_FW_IQ_CMD_VIID(pi->viid) |
1347 V_FW_IQ_CMD_IQANUD(X_UPDATEDELIVERY_INTERRUPT));
1348 c.iqdroprss_to_iqesize = cpu_to_be16(V_FW_IQ_CMD_IQPCIECH(pi->tx_chan) |
1349 F_FW_IQ_CMD_IQGTSMODE |
1350 V_FW_IQ_CMD_IQINTCNTTHRESH(iq->intr_pktc_idx) |
1351 V_FW_IQ_CMD_IQESIZE(ilog2(iq->esize) - 4));
1352 c.iqsize = cpu_to_be16(iq->qsize);
1353 c.iqaddr = cpu_to_be64(iq->ba);
1354 if (cong >= 0)
1355 c.iqns_to_fl0congen = BE_32(F_FW_IQ_CMD_IQFLINTCONGEN |
1356 V_FW_IQ_CMD_IQTYPE(cong ?
1357 FW_IQ_IQTYPE_NIC : FW_IQ_IQTYPE_OFLD));
1358
1359 if (fl != NULL) {
1360 unsigned int chip_ver = CHELSIO_CHIP_VERSION(sc->params.chip);
1361
1362 mutex_init(&fl->lock, NULL, MUTEX_DRIVER,
1363 DDI_INTR_PRI(sc->intr_pri));
1364 fl->flags |= FL_MTX;
1365
1366 len = fl->qsize * RX_FL_ESIZE;
1367 rc = alloc_desc_ring(sc, len, DDI_DMA_WRITE, &fl->dhdl,
1368 &fl->ahdl, &fl->ba, (caddr_t *)&fl->desc);
1369 if (rc != 0)
1370 return (rc);
1371
1372 /* Allocate space for one software descriptor per buffer. */
1373 fl->cap = (fl->qsize - sc->sge.stat_len / RX_FL_ESIZE) * 8;
1374 fl->sdesc = kmem_zalloc(sizeof (struct fl_sdesc) * fl->cap,
1375 KM_SLEEP);
1376 fl->needed = fl->cap;
1377 fl->lowat = roundup(sc->sge.fl_starve_threshold, 8);
1378
1379 c.iqns_to_fl0congen |=
1380 cpu_to_be32(V_FW_IQ_CMD_FL0HOSTFCMODE(X_HOSTFCMODE_NONE) |
1381 F_FW_IQ_CMD_FL0PACKEN | F_FW_IQ_CMD_FL0PADEN);
1382 if (cong >= 0) {
1383 c.iqns_to_fl0congen |=
1384 BE_32(V_FW_IQ_CMD_FL0CNGCHMAP(cong) |
1385 F_FW_IQ_CMD_FL0CONGCIF |
1386 F_FW_IQ_CMD_FL0CONGEN);
1387 }
1388
1389 /* In T6, for egress queue type FL there is internal overhead
1390 * of 16B for header going into FLM module. Hence the maximum
1391 * allowed burst size is 448 bytes. For T4/T5, the hardware
1392 * doesn't coalesce fetch requests if more than 64 bytes of
1393 * Free List pointers are provided, so we use a 128-byte Fetch
1394 * Burst Minimum there (T6 implements coalescing so we can use
1395 * the smaller 64-byte value there).
1396 */
1397
1398 c.fl0dcaen_to_fl0cidxfthresh =
1399 cpu_to_be16(V_FW_IQ_CMD_FL0FBMIN(chip_ver <= CHELSIO_T5
1400 ? X_FETCHBURSTMIN_128B
1401 : X_FETCHBURSTMIN_64B) |
1402 V_FW_IQ_CMD_FL0FBMAX(chip_ver <= CHELSIO_T5
1403 ? X_FETCHBURSTMAX_512B
1404 : X_FETCHBURSTMAX_256B));
1405 c.fl0size = cpu_to_be16(fl->qsize);
1406 c.fl0addr = cpu_to_be64(fl->ba);
1407 }
1408
1409 rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof (c), &c);
1410 if (rc != 0) {
1411 cxgb_printf(sc->dip, CE_WARN,
1412 "failed to create ingress queue: %d", rc);
1413 return (rc);
1414 }
1415
1416 iq->cdesc = iq->desc;
1417 iq->cidx = 0;
1418 iq->gen = 1;
1419 iq->intr_next = iq->intr_params;
1420 iq->adapter = sc;
1421 iq->cntxt_id = be16_to_cpu(c.iqid);
1422 iq->abs_id = be16_to_cpu(c.physiqid);
1423 iq->flags |= IQ_ALLOCATED;
1424 mutex_init(&iq->lock, NULL,
1425 MUTEX_DRIVER, DDI_INTR_PRI(DDI_INTR_PRI(sc->intr_pri)));
1426 iq->polling = 0;
1427
1428 cntxt_id = iq->cntxt_id - sc->sge.iq_start;
1429 if (cntxt_id >= sc->sge.iqmap_sz) {
1430 panic("%s: iq->cntxt_id (%d) more than the max (%d)", __func__,
1431 cntxt_id, sc->sge.iqmap_sz - 1);
1432 }
1433 sc->sge.iqmap[cntxt_id] = iq;
1434
1435 if (fl != NULL) {
1436 fl->cntxt_id = be16_to_cpu(c.fl0id);
1437 fl->pidx = fl->cidx = 0;
1438 fl->copy_threshold = rx_copy_threshold;
1439
1440 cntxt_id = fl->cntxt_id - sc->sge.eq_start;
1441 if (cntxt_id >= sc->sge.eqmap_sz) {
1442 panic("%s: fl->cntxt_id (%d) more than the max (%d)",
1443 __func__, cntxt_id, sc->sge.eqmap_sz - 1);
1444 }
1445 sc->sge.eqmap[cntxt_id] = (void *)fl;
1446
1447 FL_LOCK(fl);
1448 (void) refill_fl(sc, fl, fl->lowat);
1449 FL_UNLOCK(fl);
1450
1451 iq->flags |= IQ_HAS_FL;
1452 }
1453
1454 if (is_t5(sc->params.chip) && cong >= 0) {
1455 uint32_t param, val;
1456
1457 param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DMAQ) |
1458 V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DMAQ_CONM_CTXT) |
1459 V_FW_PARAMS_PARAM_YZ(iq->cntxt_id);
1460 if (cong == 0)
1461 val = 1 << 19;
1462 else {
1463 val = 2 << 19;
1464 for (i = 0; i < 4; i++) {
1465 if (cong & (1 << i))
1466 val |= 1 << (i << 2);
1467 }
1468 }
1469
1470 rc = -t4_set_params(sc, sc->mbox, sc->pf, 0, 1, ¶m, &val);
1471 if (rc != 0) {
1472 /* report error but carry on */
1473 cxgb_printf(sc->dip, CE_WARN,
1474 "failed to set congestion manager context for "
1475 "ingress queue %d: %d", iq->cntxt_id, rc);
1476 }
1477 }
1478
1479 /* Enable IQ interrupts */
1480 iq->state = IQS_IDLE;
1481 t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS), V_SEINTARM(iq->intr_params) |
1482 V_INGRESSQID(iq->cntxt_id));
1483
1484 return (0);
1485 }
1486
1487 static int
free_iq_fl(struct port_info * pi,struct sge_iq * iq,struct sge_fl * fl)1488 free_iq_fl(struct port_info *pi, struct sge_iq *iq, struct sge_fl *fl)
1489 {
1490 int rc;
1491
1492 if (iq != NULL) {
1493 struct adapter *sc = iq->adapter;
1494 dev_info_t *dip;
1495
1496 dip = pi ? pi->dip : sc->dip;
1497 if (iq->flags & IQ_ALLOCATED) {
1498 rc = -t4_iq_free(sc, sc->mbox, sc->pf, 0,
1499 FW_IQ_TYPE_FL_INT_CAP, iq->cntxt_id,
1500 fl ? fl->cntxt_id : 0xffff, 0xffff);
1501 if (rc != 0) {
1502 cxgb_printf(dip, CE_WARN,
1503 "failed to free queue %p: %d", iq, rc);
1504 return (rc);
1505 }
1506 mutex_destroy(&iq->lock);
1507 iq->flags &= ~IQ_ALLOCATED;
1508 }
1509
1510 if (iq->desc != NULL) {
1511 (void) free_desc_ring(&iq->dhdl, &iq->ahdl);
1512 iq->desc = NULL;
1513 }
1514
1515 bzero(iq, sizeof (*iq));
1516 }
1517
1518 if (fl != NULL) {
1519 if (fl->sdesc != NULL) {
1520 FL_LOCK(fl);
1521 free_fl_bufs(fl);
1522 FL_UNLOCK(fl);
1523
1524 kmem_free(fl->sdesc, sizeof (struct fl_sdesc) *
1525 fl->cap);
1526 fl->sdesc = NULL;
1527 }
1528
1529 if (fl->desc != NULL) {
1530 (void) free_desc_ring(&fl->dhdl, &fl->ahdl);
1531 fl->desc = NULL;
1532 }
1533
1534 if (fl->flags & FL_MTX) {
1535 mutex_destroy(&fl->lock);
1536 fl->flags &= ~FL_MTX;
1537 }
1538
1539 bzero(fl, sizeof (struct sge_fl));
1540 }
1541
1542 return (0);
1543 }
1544
1545 static int
alloc_fwq(struct adapter * sc)1546 alloc_fwq(struct adapter *sc)
1547 {
1548 int rc, intr_idx;
1549 struct sge_iq *fwq = &sc->sge.fwq;
1550
1551 init_iq(fwq, sc, 0, 0, FW_IQ_QSIZE, FW_IQ_ESIZE);
1552 fwq->flags |= IQ_INTR; /* always */
1553 intr_idx = sc->intr_count > 1 ? 1 : 0;
1554 rc = alloc_iq_fl(sc->port[0], fwq, NULL, intr_idx, -1);
1555 if (rc != 0) {
1556 cxgb_printf(sc->dip, CE_WARN,
1557 "failed to create firmware event queue: %d.", rc);
1558 return (rc);
1559 }
1560
1561 return (0);
1562 }
1563
1564 static int
free_fwq(struct adapter * sc)1565 free_fwq(struct adapter *sc)
1566 {
1567
1568 return (free_iq_fl(NULL, &sc->sge.fwq, NULL));
1569 }
1570
1571 #ifdef TCP_OFFLOAD_ENABLE
1572 static int
alloc_mgmtq(struct adapter * sc)1573 alloc_mgmtq(struct adapter *sc)
1574 {
1575 int rc;
1576 struct sge_wrq *mgmtq = &sc->sge.mgmtq;
1577
1578 init_eq(sc, &mgmtq->eq, EQ_CTRL, CTRL_EQ_QSIZE, sc->port[0]->tx_chan,
1579 sc->sge.fwq.cntxt_id);
1580 rc = alloc_wrq(sc, NULL, mgmtq, 0);
1581 if (rc != 0) {
1582 cxgb_printf(sc->dip, CE_WARN,
1583 "failed to create management queue: %d\n", rc);
1584 return (rc);
1585 }
1586
1587 return (0);
1588 }
1589 #endif
1590
1591 static int
alloc_rxq(struct port_info * pi,struct sge_rxq * rxq,int intr_idx,int i)1592 alloc_rxq(struct port_info *pi, struct sge_rxq *rxq, int intr_idx, int i)
1593 {
1594 int rc;
1595
1596 rxq->port = pi;
1597 rc = alloc_iq_fl(pi, &rxq->iq, &rxq->fl, intr_idx,
1598 t4_get_tp_ch_map(pi->adapter, pi->tx_chan));
1599 if (rc != 0)
1600 return (rc);
1601
1602 rxq->ksp = setup_rxq_kstats(pi, rxq, i);
1603
1604 return (rc);
1605 }
1606
1607 static int
free_rxq(struct port_info * pi,struct sge_rxq * rxq)1608 free_rxq(struct port_info *pi, struct sge_rxq *rxq)
1609 {
1610 int rc;
1611
1612 if (rxq->ksp != NULL) {
1613 kstat_delete(rxq->ksp);
1614 rxq->ksp = NULL;
1615 }
1616
1617 rc = free_iq_fl(pi, &rxq->iq, &rxq->fl);
1618 if (rc == 0)
1619 bzero(&rxq->fl, sizeof (*rxq) - offsetof(struct sge_rxq, fl));
1620
1621 return (rc);
1622 }
1623
1624 #ifdef TCP_OFFLOAD_ENABLE
1625 static int
alloc_ofld_rxq(struct port_info * pi,struct sge_ofld_rxq * ofld_rxq,int intr_idx)1626 alloc_ofld_rxq(struct port_info *pi, struct sge_ofld_rxq *ofld_rxq,
1627 int intr_idx)
1628 {
1629 int rc;
1630
1631 rc = alloc_iq_fl(pi, &ofld_rxq->iq, &ofld_rxq->fl, intr_idx,
1632 t4_get_tp_ch_map(pi->adapter, pi->tx_chan));
1633 if (rc != 0)
1634 return (rc);
1635
1636 return (rc);
1637 }
1638
1639 static int
free_ofld_rxq(struct port_info * pi,struct sge_ofld_rxq * ofld_rxq)1640 free_ofld_rxq(struct port_info *pi, struct sge_ofld_rxq *ofld_rxq)
1641 {
1642 int rc;
1643
1644 rc = free_iq_fl(pi, &ofld_rxq->iq, &ofld_rxq->fl);
1645 if (rc == 0)
1646 bzero(&ofld_rxq->fl, sizeof (*ofld_rxq) -
1647 offsetof(struct sge_ofld_rxq, fl));
1648
1649 return (rc);
1650 }
1651 #endif
1652
1653 static int
ctrl_eq_alloc(struct adapter * sc,struct sge_eq * eq)1654 ctrl_eq_alloc(struct adapter *sc, struct sge_eq *eq)
1655 {
1656 int rc, cntxt_id;
1657 struct fw_eq_ctrl_cmd c;
1658
1659 bzero(&c, sizeof (c));
1660
1661 c.op_to_vfn = BE_32(V_FW_CMD_OP(FW_EQ_CTRL_CMD) | F_FW_CMD_REQUEST |
1662 F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_EQ_CTRL_CMD_PFN(sc->pf) |
1663 V_FW_EQ_CTRL_CMD_VFN(0));
1664 c.alloc_to_len16 = BE_32(F_FW_EQ_CTRL_CMD_ALLOC |
1665 F_FW_EQ_CTRL_CMD_EQSTART | FW_LEN16(c));
1666 c.cmpliqid_eqid = htonl(V_FW_EQ_CTRL_CMD_CMPLIQID(eq->iqid)); /* TODO */
1667 c.physeqid_pkd = BE_32(0);
1668 c.fetchszm_to_iqid =
1669 BE_32(V_FW_EQ_CTRL_CMD_HOSTFCMODE(X_HOSTFCMODE_STATUS_PAGE) |
1670 V_FW_EQ_CTRL_CMD_PCIECHN(eq->tx_chan) |
1671 F_FW_EQ_CTRL_CMD_FETCHRO | V_FW_EQ_CTRL_CMD_IQID(eq->iqid));
1672 c.dcaen_to_eqsize =
1673 BE_32(V_FW_EQ_CTRL_CMD_FBMIN(X_FETCHBURSTMIN_64B) |
1674 V_FW_EQ_CTRL_CMD_FBMAX(X_FETCHBURSTMAX_512B) |
1675 V_FW_EQ_CTRL_CMD_CIDXFTHRESH(X_CIDXFLUSHTHRESH_32) |
1676 V_FW_EQ_CTRL_CMD_EQSIZE(eq->qsize));
1677 c.eqaddr = BE_64(eq->ba);
1678
1679 rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof (c), &c);
1680 if (rc != 0) {
1681 cxgb_printf(sc->dip, CE_WARN,
1682 "failed to create control queue %d: %d", eq->tx_chan, rc);
1683 return (rc);
1684 }
1685 eq->flags |= EQ_ALLOCATED;
1686
1687 eq->cntxt_id = G_FW_EQ_CTRL_CMD_EQID(BE_32(c.cmpliqid_eqid));
1688 cntxt_id = eq->cntxt_id - sc->sge.eq_start;
1689 if (cntxt_id >= sc->sge.eqmap_sz)
1690 panic("%s: eq->cntxt_id (%d) more than the max (%d)", __func__,
1691 cntxt_id, sc->sge.eqmap_sz - 1);
1692 sc->sge.eqmap[cntxt_id] = eq;
1693
1694 return (rc);
1695 }
1696
1697 static int
eth_eq_alloc(struct adapter * sc,struct port_info * pi,struct sge_eq * eq)1698 eth_eq_alloc(struct adapter *sc, struct port_info *pi, struct sge_eq *eq)
1699 {
1700 int rc, cntxt_id;
1701 struct fw_eq_eth_cmd c;
1702
1703 bzero(&c, sizeof (c));
1704
1705 c.op_to_vfn = BE_32(V_FW_CMD_OP(FW_EQ_ETH_CMD) | F_FW_CMD_REQUEST |
1706 F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_EQ_ETH_CMD_PFN(sc->pf) |
1707 V_FW_EQ_ETH_CMD_VFN(0));
1708 c.alloc_to_len16 = BE_32(F_FW_EQ_ETH_CMD_ALLOC |
1709 F_FW_EQ_ETH_CMD_EQSTART | FW_LEN16(c));
1710 c.autoequiqe_to_viid = BE_32(F_FW_EQ_ETH_CMD_AUTOEQUIQE |
1711 F_FW_EQ_ETH_CMD_AUTOEQUEQE | V_FW_EQ_ETH_CMD_VIID(pi->viid));
1712 c.fetchszm_to_iqid =
1713 BE_32(V_FW_EQ_ETH_CMD_HOSTFCMODE(X_HOSTFCMODE_STATUS_PAGE) |
1714 V_FW_EQ_ETH_CMD_PCIECHN(eq->tx_chan) | F_FW_EQ_ETH_CMD_FETCHRO |
1715 V_FW_EQ_ETH_CMD_IQID(eq->iqid));
1716 c.dcaen_to_eqsize = BE_32(V_FW_EQ_ETH_CMD_FBMIN(X_FETCHBURSTMIN_64B) |
1717 V_FW_EQ_ETH_CMD_FBMAX(X_FETCHBURSTMAX_512B) |
1718 V_FW_EQ_ETH_CMD_CIDXFTHRESH(X_CIDXFLUSHTHRESH_32) |
1719 V_FW_EQ_ETH_CMD_EQSIZE(eq->qsize));
1720 c.eqaddr = BE_64(eq->ba);
1721
1722 rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof (c), &c);
1723 if (rc != 0) {
1724 cxgb_printf(pi->dip, CE_WARN,
1725 "failed to create Ethernet egress queue: %d", rc);
1726 return (rc);
1727 }
1728 eq->flags |= EQ_ALLOCATED;
1729
1730 eq->cntxt_id = G_FW_EQ_ETH_CMD_EQID(BE_32(c.eqid_pkd));
1731 cntxt_id = eq->cntxt_id - sc->sge.eq_start;
1732 if (cntxt_id >= sc->sge.eqmap_sz)
1733 panic("%s: eq->cntxt_id (%d) more than the max (%d)", __func__,
1734 cntxt_id, sc->sge.eqmap_sz - 1);
1735 sc->sge.eqmap[cntxt_id] = eq;
1736
1737 return (rc);
1738 }
1739
1740 #ifdef TCP_OFFLOAD_ENABLE
1741 static int
ofld_eq_alloc(struct adapter * sc,struct port_info * pi,struct sge_eq * eq)1742 ofld_eq_alloc(struct adapter *sc, struct port_info *pi, struct sge_eq *eq)
1743 {
1744 int rc, cntxt_id;
1745 struct fw_eq_ofld_cmd c;
1746
1747 bzero(&c, sizeof (c));
1748
1749 c.op_to_vfn = htonl(V_FW_CMD_OP(FW_EQ_OFLD_CMD) | F_FW_CMD_REQUEST |
1750 F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_EQ_OFLD_CMD_PFN(sc->pf) |
1751 V_FW_EQ_OFLD_CMD_VFN(0));
1752 c.alloc_to_len16 = htonl(F_FW_EQ_OFLD_CMD_ALLOC |
1753 F_FW_EQ_OFLD_CMD_EQSTART | FW_LEN16(c));
1754 c.fetchszm_to_iqid =
1755 htonl(V_FW_EQ_OFLD_CMD_HOSTFCMODE(X_HOSTFCMODE_STATUS_PAGE) |
1756 V_FW_EQ_OFLD_CMD_PCIECHN(eq->tx_chan) |
1757 F_FW_EQ_OFLD_CMD_FETCHRO | V_FW_EQ_OFLD_CMD_IQID(eq->iqid));
1758 c.dcaen_to_eqsize =
1759 BE_32(V_FW_EQ_OFLD_CMD_FBMIN(X_FETCHBURSTMIN_64B) |
1760 V_FW_EQ_OFLD_CMD_FBMAX(X_FETCHBURSTMAX_512B) |
1761 V_FW_EQ_OFLD_CMD_CIDXFTHRESH(X_CIDXFLUSHTHRESH_32) |
1762 V_FW_EQ_OFLD_CMD_EQSIZE(eq->qsize));
1763 c.eqaddr = BE_64(eq->ba);
1764
1765 rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof (c), &c);
1766 if (rc != 0) {
1767 cxgb_printf(pi->dip, CE_WARN,
1768 "failed to create egress queue for TCP offload: %d", rc);
1769 return (rc);
1770 }
1771 eq->flags |= EQ_ALLOCATED;
1772
1773 eq->cntxt_id = G_FW_EQ_OFLD_CMD_EQID(BE_32(c.eqid_pkd));
1774 cntxt_id = eq->cntxt_id - sc->sge.eq_start;
1775 if (cntxt_id >= sc->sge.eqmap_sz)
1776 panic("%s: eq->cntxt_id (%d) more than the max (%d)", __func__,
1777 cntxt_id, sc->sge.eqmap_sz - 1);
1778 sc->sge.eqmap[cntxt_id] = eq;
1779
1780 return (rc);
1781 }
1782 #endif
1783
1784 static int
alloc_eq(struct adapter * sc,struct port_info * pi,struct sge_eq * eq)1785 alloc_eq(struct adapter *sc, struct port_info *pi, struct sge_eq *eq)
1786 {
1787 int rc;
1788 size_t len;
1789
1790 mutex_init(&eq->lock, NULL, MUTEX_DRIVER, DDI_INTR_PRI(sc->intr_pri));
1791 eq->flags |= EQ_MTX;
1792
1793 len = eq->qsize * EQ_ESIZE;
1794 rc = alloc_desc_ring(sc, len, DDI_DMA_WRITE, &eq->desc_dhdl,
1795 &eq->desc_ahdl, &eq->ba, (caddr_t *)&eq->desc);
1796 if (rc != 0)
1797 return (rc);
1798
1799 eq->cap = eq->qsize - sc->sge.stat_len / EQ_ESIZE;
1800 eq->spg = (void *)&eq->desc[eq->cap];
1801 eq->avail = eq->cap - 1; /* one less to avoid cidx = pidx */
1802 eq->pidx = eq->cidx = 0;
1803 eq->doorbells = sc->doorbells;
1804
1805 switch (eq->flags & EQ_TYPEMASK) {
1806 case EQ_CTRL:
1807 rc = ctrl_eq_alloc(sc, eq);
1808 break;
1809
1810 case EQ_ETH:
1811 rc = eth_eq_alloc(sc, pi, eq);
1812 break;
1813
1814 #ifdef TCP_OFFLOAD_ENABLE
1815 case EQ_OFLD:
1816 rc = ofld_eq_alloc(sc, pi, eq);
1817 break;
1818 #endif
1819
1820 default:
1821 panic("%s: invalid eq type %d.", __func__,
1822 eq->flags & EQ_TYPEMASK);
1823 }
1824
1825 if (eq->doorbells &
1826 (DOORBELL_UDB | DOORBELL_UDBWC | DOORBELL_WCWR)) {
1827 uint32_t s_qpp = sc->sge.s_qpp;
1828 uint32_t mask = (1 << s_qpp) - 1;
1829 volatile uint8_t *udb;
1830
1831 udb = (volatile uint8_t *)sc->reg1p + UDBS_DB_OFFSET;
1832 udb += (eq->cntxt_id >> s_qpp) << PAGE_SHIFT; /* pg offset */
1833 eq->udb_qid = eq->cntxt_id & mask; /* id in page */
1834 if (eq->udb_qid > PAGE_SIZE / UDBS_SEG_SIZE)
1835 eq->doorbells &= ~DOORBELL_WCWR;
1836 else {
1837 udb += eq->udb_qid << UDBS_SEG_SHIFT; /* seg offset */
1838 eq->udb_qid = 0;
1839 }
1840 eq->udb = (volatile void *)udb;
1841 }
1842
1843 if (rc != 0) {
1844 cxgb_printf(sc->dip, CE_WARN,
1845 "failed to allocate egress queue(%d): %d",
1846 eq->flags & EQ_TYPEMASK, rc);
1847 }
1848
1849 return (rc);
1850 }
1851
1852 static int
free_eq(struct adapter * sc,struct sge_eq * eq)1853 free_eq(struct adapter *sc, struct sge_eq *eq)
1854 {
1855 int rc;
1856
1857 if (eq->flags & EQ_ALLOCATED) {
1858 switch (eq->flags & EQ_TYPEMASK) {
1859 case EQ_CTRL:
1860 rc = -t4_ctrl_eq_free(sc, sc->mbox, sc->pf, 0,
1861 eq->cntxt_id);
1862 break;
1863
1864 case EQ_ETH:
1865 rc = -t4_eth_eq_free(sc, sc->mbox, sc->pf, 0,
1866 eq->cntxt_id);
1867 break;
1868
1869 #ifdef TCP_OFFLOAD_ENABLE
1870 case EQ_OFLD:
1871 rc = -t4_ofld_eq_free(sc, sc->mbox, sc->pf, 0,
1872 eq->cntxt_id);
1873 break;
1874 #endif
1875
1876 default:
1877 panic("%s: invalid eq type %d.", __func__,
1878 eq->flags & EQ_TYPEMASK);
1879 }
1880 if (rc != 0) {
1881 cxgb_printf(sc->dip, CE_WARN,
1882 "failed to free egress queue (%d): %d",
1883 eq->flags & EQ_TYPEMASK, rc);
1884 return (rc);
1885 }
1886 eq->flags &= ~EQ_ALLOCATED;
1887 }
1888
1889 if (eq->desc != NULL) {
1890 (void) free_desc_ring(&eq->desc_dhdl, &eq->desc_ahdl);
1891 eq->desc = NULL;
1892 }
1893
1894 if (eq->flags & EQ_MTX)
1895 mutex_destroy(&eq->lock);
1896
1897 bzero(eq, sizeof (*eq));
1898 return (0);
1899 }
1900
1901 #ifdef TCP_OFFLOAD_ENABLE
1902 /* ARGSUSED */
1903 static int
alloc_wrq(struct adapter * sc,struct port_info * pi,struct sge_wrq * wrq,int idx)1904 alloc_wrq(struct adapter *sc, struct port_info *pi, struct sge_wrq *wrq,
1905 int idx)
1906 {
1907 int rc;
1908
1909 rc = alloc_eq(sc, pi, &wrq->eq);
1910 if (rc != 0)
1911 return (rc);
1912
1913 wrq->adapter = sc;
1914 wrq->wr_list.head = NULL;
1915 wrq->wr_list.tail = NULL;
1916
1917 /*
1918 * TODO: use idx to figure out what kind of wrq this is and install
1919 * useful kstats for it.
1920 */
1921
1922 return (rc);
1923 }
1924
1925 static int
free_wrq(struct adapter * sc,struct sge_wrq * wrq)1926 free_wrq(struct adapter *sc, struct sge_wrq *wrq)
1927 {
1928 int rc;
1929
1930 rc = free_eq(sc, &wrq->eq);
1931 if (rc != 0)
1932 return (rc);
1933
1934 bzero(wrq, sizeof (*wrq));
1935 return (0);
1936 }
1937 #endif
1938
1939 static int
alloc_txq(struct port_info * pi,struct sge_txq * txq,int idx)1940 alloc_txq(struct port_info *pi, struct sge_txq *txq, int idx)
1941 {
1942 int rc, i;
1943 struct adapter *sc = pi->adapter;
1944 struct sge_eq *eq = &txq->eq;
1945
1946 rc = alloc_eq(sc, pi, eq);
1947 if (rc != 0)
1948 return (rc);
1949
1950 txq->port = pi;
1951 txq->sdesc = kmem_zalloc(sizeof (struct tx_sdesc) * eq->cap, KM_SLEEP);
1952 txq->txb_size = eq->qsize * tx_copy_threshold;
1953 rc = alloc_tx_copybuffer(sc, txq->txb_size, &txq->txb_dhdl,
1954 &txq->txb_ahdl, &txq->txb_ba, &txq->txb_va);
1955 if (rc == 0)
1956 txq->txb_avail = txq->txb_size;
1957 else
1958 txq->txb_avail = txq->txb_size = 0;
1959
1960 /*
1961 * TODO: is this too low? Worst case would need around 4 times qsize
1962 * (all tx descriptors filled to the brim with SGLs, with each entry in
1963 * the SGL coming from a distinct DMA handle). Increase tx_dhdl_total
1964 * if you see too many dma_hdl_failed.
1965 */
1966 txq->tx_dhdl_total = eq->qsize * 2;
1967 txq->tx_dhdl = kmem_zalloc(sizeof (ddi_dma_handle_t) *
1968 txq->tx_dhdl_total, KM_SLEEP);
1969 for (i = 0; i < txq->tx_dhdl_total; i++) {
1970 rc = ddi_dma_alloc_handle(sc->dip, &sc->sge.dma_attr_tx,
1971 DDI_DMA_SLEEP, 0, &txq->tx_dhdl[i]);
1972 if (rc != DDI_SUCCESS) {
1973 cxgb_printf(sc->dip, CE_WARN,
1974 "%s: failed to allocate DMA handle (%d)",
1975 __func__, rc);
1976 return (rc == DDI_DMA_NORESOURCES ? ENOMEM : EINVAL);
1977 }
1978 txq->tx_dhdl_avail++;
1979 }
1980
1981 txq->ksp = setup_txq_kstats(pi, txq, idx);
1982
1983 return (rc);
1984 }
1985
1986 static int
free_txq(struct port_info * pi,struct sge_txq * txq)1987 free_txq(struct port_info *pi, struct sge_txq *txq)
1988 {
1989 int i;
1990 struct adapter *sc = pi->adapter;
1991 struct sge_eq *eq = &txq->eq;
1992
1993 if (txq->ksp != NULL) {
1994 kstat_delete(txq->ksp);
1995 txq->ksp = NULL;
1996 }
1997
1998 if (txq->txb_va != NULL) {
1999 (void) free_desc_ring(&txq->txb_dhdl, &txq->txb_ahdl);
2000 txq->txb_va = NULL;
2001 }
2002
2003 if (txq->sdesc != NULL) {
2004 struct tx_sdesc *sd;
2005 ddi_dma_handle_t hdl;
2006
2007 TXQ_LOCK(txq);
2008 while (eq->cidx != eq->pidx) {
2009 sd = &txq->sdesc[eq->cidx];
2010
2011 for (i = sd->hdls_used; i; i--) {
2012 hdl = txq->tx_dhdl[txq->tx_dhdl_cidx];
2013 (void) ddi_dma_unbind_handle(hdl);
2014 if (++txq->tx_dhdl_cidx == txq->tx_dhdl_total)
2015 txq->tx_dhdl_cidx = 0;
2016 }
2017
2018 ASSERT(sd->m);
2019 freemsgchain(sd->m);
2020
2021 eq->cidx += sd->desc_used;
2022 if (eq->cidx >= eq->cap)
2023 eq->cidx -= eq->cap;
2024
2025 txq->txb_avail += txq->txb_used;
2026 }
2027 ASSERT(txq->tx_dhdl_cidx == txq->tx_dhdl_pidx);
2028 ASSERT(txq->txb_avail == txq->txb_size);
2029 TXQ_UNLOCK(txq);
2030
2031 kmem_free(txq->sdesc, sizeof (struct tx_sdesc) * eq->cap);
2032 txq->sdesc = NULL;
2033 }
2034
2035 if (txq->tx_dhdl != NULL) {
2036 for (i = 0; i < txq->tx_dhdl_total; i++) {
2037 if (txq->tx_dhdl[i] != NULL)
2038 ddi_dma_free_handle(&txq->tx_dhdl[i]);
2039 }
2040 }
2041
2042 (void) free_eq(sc, &txq->eq);
2043
2044 bzero(txq, sizeof (*txq));
2045 return (0);
2046 }
2047
2048 /*
2049 * Allocates a block of contiguous memory for DMA. Can be used to allocate
2050 * memory for descriptor rings or for tx/rx copy buffers.
2051 *
2052 * Caller does not have to clean up anything if this function fails, it cleans
2053 * up after itself.
2054 *
2055 * Caller provides the following:
2056 * len length of the block of memory to allocate.
2057 * flags DDI_DMA_* flags to use (CONSISTENT/STREAMING, READ/WRITE/RDWR)
2058 * acc_attr device access attributes for the allocation.
2059 * dma_attr DMA attributes for the allocation
2060 *
2061 * If the function is successful it fills up this information:
2062 * dma_hdl DMA handle for the allocated memory
2063 * acc_hdl access handle for the allocated memory
2064 * ba bus address of the allocated memory
2065 * va KVA of the allocated memory.
2066 */
2067 static int
alloc_dma_memory(struct adapter * sc,size_t len,int flags,ddi_device_acc_attr_t * acc_attr,ddi_dma_attr_t * dma_attr,ddi_dma_handle_t * dma_hdl,ddi_acc_handle_t * acc_hdl,uint64_t * pba,caddr_t * pva)2068 alloc_dma_memory(struct adapter *sc, size_t len, int flags,
2069 ddi_device_acc_attr_t *acc_attr, ddi_dma_attr_t *dma_attr,
2070 ddi_dma_handle_t *dma_hdl, ddi_acc_handle_t *acc_hdl,
2071 uint64_t *pba, caddr_t *pva)
2072 {
2073 int rc;
2074 ddi_dma_handle_t dhdl;
2075 ddi_acc_handle_t ahdl;
2076 ddi_dma_cookie_t cookie;
2077 uint_t ccount;
2078 caddr_t va;
2079 size_t real_len;
2080
2081 *pva = NULL;
2082
2083 /*
2084 * DMA handle.
2085 */
2086 rc = ddi_dma_alloc_handle(sc->dip, dma_attr, DDI_DMA_SLEEP, 0, &dhdl);
2087 if (rc != DDI_SUCCESS) {
2088 cxgb_printf(sc->dip, CE_WARN,
2089 "failed to allocate DMA handle: %d", rc);
2090
2091 return (rc == DDI_DMA_NORESOURCES ? ENOMEM : EINVAL);
2092 }
2093
2094 /*
2095 * Memory suitable for DMA.
2096 */
2097 rc = ddi_dma_mem_alloc(dhdl, len, acc_attr,
2098 flags & DDI_DMA_CONSISTENT ? DDI_DMA_CONSISTENT : DDI_DMA_STREAMING,
2099 DDI_DMA_SLEEP, 0, &va, &real_len, &ahdl);
2100 if (rc != DDI_SUCCESS) {
2101 cxgb_printf(sc->dip, CE_WARN,
2102 "failed to allocate DMA memory: %d", rc);
2103
2104 ddi_dma_free_handle(&dhdl);
2105 return (ENOMEM);
2106 }
2107
2108 if (len != real_len) {
2109 cxgb_printf(sc->dip, CE_WARN,
2110 "%s: len (%u) != real_len (%u)\n", len, real_len);
2111 }
2112
2113 /*
2114 * DMA bindings.
2115 */
2116 rc = ddi_dma_addr_bind_handle(dhdl, NULL, va, real_len, flags, NULL,
2117 NULL, &cookie, &ccount);
2118 if (rc != DDI_DMA_MAPPED) {
2119 cxgb_printf(sc->dip, CE_WARN,
2120 "failed to map DMA memory: %d", rc);
2121
2122 ddi_dma_mem_free(&ahdl);
2123 ddi_dma_free_handle(&dhdl);
2124 return (ENOMEM);
2125 }
2126 if (ccount != 1) {
2127 cxgb_printf(sc->dip, CE_WARN,
2128 "unusable DMA mapping (%d segments)", ccount);
2129 (void) free_desc_ring(&dhdl, &ahdl);
2130 }
2131
2132 bzero(va, real_len);
2133 *dma_hdl = dhdl;
2134 *acc_hdl = ahdl;
2135 *pba = cookie.dmac_laddress;
2136 *pva = va;
2137
2138 return (0);
2139 }
2140
2141 static int
free_dma_memory(ddi_dma_handle_t * dhdl,ddi_acc_handle_t * ahdl)2142 free_dma_memory(ddi_dma_handle_t *dhdl, ddi_acc_handle_t *ahdl)
2143 {
2144 (void) ddi_dma_unbind_handle(*dhdl);
2145 ddi_dma_mem_free(ahdl);
2146 ddi_dma_free_handle(dhdl);
2147
2148 return (0);
2149 }
2150
2151 static int
alloc_desc_ring(struct adapter * sc,size_t len,int rw,ddi_dma_handle_t * dma_hdl,ddi_acc_handle_t * acc_hdl,uint64_t * pba,caddr_t * pva)2152 alloc_desc_ring(struct adapter *sc, size_t len, int rw,
2153 ddi_dma_handle_t *dma_hdl, ddi_acc_handle_t *acc_hdl,
2154 uint64_t *pba, caddr_t *pva)
2155 {
2156 ddi_device_acc_attr_t *acc_attr = &sc->sge.acc_attr_desc;
2157 ddi_dma_attr_t *dma_attr = &sc->sge.dma_attr_desc;
2158
2159 return (alloc_dma_memory(sc, len, DDI_DMA_CONSISTENT | rw, acc_attr,
2160 dma_attr, dma_hdl, acc_hdl, pba, pva));
2161 }
2162
2163 static int
free_desc_ring(ddi_dma_handle_t * dhdl,ddi_acc_handle_t * ahdl)2164 free_desc_ring(ddi_dma_handle_t *dhdl, ddi_acc_handle_t *ahdl)
2165 {
2166 return (free_dma_memory(dhdl, ahdl));
2167 }
2168
2169 static int
alloc_tx_copybuffer(struct adapter * sc,size_t len,ddi_dma_handle_t * dma_hdl,ddi_acc_handle_t * acc_hdl,uint64_t * pba,caddr_t * pva)2170 alloc_tx_copybuffer(struct adapter *sc, size_t len,
2171 ddi_dma_handle_t *dma_hdl, ddi_acc_handle_t *acc_hdl,
2172 uint64_t *pba, caddr_t *pva)
2173 {
2174 ddi_device_acc_attr_t *acc_attr = &sc->sge.acc_attr_tx;
2175 ddi_dma_attr_t *dma_attr = &sc->sge.dma_attr_desc; /* NOT dma_attr_tx */
2176
2177 return (alloc_dma_memory(sc, len, DDI_DMA_STREAMING | DDI_DMA_WRITE,
2178 acc_attr, dma_attr, dma_hdl, acc_hdl, pba, pva));
2179 }
2180
2181 static inline bool
is_new_response(const struct sge_iq * iq,struct rsp_ctrl ** ctrl)2182 is_new_response(const struct sge_iq *iq, struct rsp_ctrl **ctrl)
2183 {
2184 (void) ddi_dma_sync(iq->dhdl, (uintptr_t)iq->cdesc -
2185 (uintptr_t)iq->desc, iq->esize, DDI_DMA_SYNC_FORKERNEL);
2186
2187 *ctrl = (void *)((uintptr_t)iq->cdesc +
2188 (iq->esize - sizeof (struct rsp_ctrl)));
2189
2190 return ((((*ctrl)->u.type_gen >> S_RSPD_GEN) == iq->gen));
2191 }
2192
2193 static inline void
iq_next(struct sge_iq * iq)2194 iq_next(struct sge_iq *iq)
2195 {
2196 iq->cdesc = (void *) ((uintptr_t)iq->cdesc + iq->esize);
2197 if (++iq->cidx == iq->qsize - 1) {
2198 iq->cidx = 0;
2199 iq->gen ^= 1;
2200 iq->cdesc = iq->desc;
2201 }
2202 }
2203
2204 /*
2205 * Fill up the freelist by upto nbufs and maybe ring its doorbell.
2206 *
2207 * Returns non-zero to indicate that it should be added to the list of starving
2208 * freelists.
2209 */
2210 static int
refill_fl(struct adapter * sc,struct sge_fl * fl,int nbufs)2211 refill_fl(struct adapter *sc, struct sge_fl *fl, int nbufs)
2212 {
2213 uint64_t *d = &fl->desc[fl->pidx];
2214 struct fl_sdesc *sd = &fl->sdesc[fl->pidx];
2215
2216 FL_LOCK_ASSERT_OWNED(fl);
2217 ASSERT(nbufs >= 0);
2218
2219 if (nbufs > fl->needed)
2220 nbufs = fl->needed;
2221
2222 while (nbufs--) {
2223 if (sd->rxb != NULL) {
2224 if (sd->rxb->ref_cnt == 1) {
2225 /*
2226 * Buffer is available for recycling. Two ways
2227 * this can happen:
2228 *
2229 * a) All the packets DMA'd into it last time
2230 * around were within the rx_copy_threshold
2231 * and no part of the buffer was ever passed
2232 * up (ref_cnt never went over 1).
2233 *
2234 * b) Packets DMA'd into the buffer were passed
2235 * up but have all been freed by the upper
2236 * layers by now (ref_cnt went over 1 but is
2237 * now back to 1).
2238 *
2239 * Either way the bus address in the descriptor
2240 * ring is already valid.
2241 */
2242 ASSERT(*d == cpu_to_be64(sd->rxb->ba));
2243 d++;
2244 goto recycled;
2245 } else {
2246 /*
2247 * Buffer still in use and we need a
2248 * replacement. But first release our reference
2249 * on the existing buffer.
2250 */
2251 rxbuf_free(sd->rxb);
2252 }
2253 }
2254
2255 sd->rxb = rxbuf_alloc(sc->sge.rxbuf_cache, KM_NOSLEEP, 1);
2256 if (sd->rxb == NULL)
2257 break;
2258 *d++ = cpu_to_be64(sd->rxb->ba);
2259
2260 recycled: fl->pending++;
2261 sd++;
2262 fl->needed--;
2263 if (++fl->pidx == fl->cap) {
2264 fl->pidx = 0;
2265 sd = fl->sdesc;
2266 d = fl->desc;
2267 }
2268 }
2269
2270 if (fl->pending >= 8)
2271 ring_fl_db(sc, fl);
2272
2273 return (FL_RUNNING_LOW(fl) && !(fl->flags & FL_STARVING));
2274 }
2275
2276 #ifndef TAILQ_FOREACH_SAFE
2277 #define TAILQ_FOREACH_SAFE(var, head, field, tvar) \
2278 for ((var) = TAILQ_FIRST((head)); \
2279 (var) && ((tvar) = TAILQ_NEXT((var), field), 1); \
2280 (var) = (tvar))
2281 #endif
2282
2283 /*
2284 * Attempt to refill all starving freelists.
2285 */
2286 static void
refill_sfl(void * arg)2287 refill_sfl(void *arg)
2288 {
2289 struct adapter *sc = arg;
2290 struct sge_fl *fl, *fl_temp;
2291
2292 mutex_enter(&sc->sfl_lock);
2293 TAILQ_FOREACH_SAFE(fl, &sc->sfl, link, fl_temp) {
2294 FL_LOCK(fl);
2295 (void) refill_fl(sc, fl, 64);
2296 if (FL_NOT_RUNNING_LOW(fl) || fl->flags & FL_DOOMED) {
2297 TAILQ_REMOVE(&sc->sfl, fl, link);
2298 fl->flags &= ~FL_STARVING;
2299 }
2300 FL_UNLOCK(fl);
2301 }
2302
2303 if (!TAILQ_EMPTY(&sc->sfl) != 0)
2304 sc->sfl_timer = timeout(refill_sfl, sc, drv_usectohz(100000));
2305 mutex_exit(&sc->sfl_lock);
2306 }
2307
2308 static void
add_fl_to_sfl(struct adapter * sc,struct sge_fl * fl)2309 add_fl_to_sfl(struct adapter *sc, struct sge_fl *fl)
2310 {
2311 mutex_enter(&sc->sfl_lock);
2312 FL_LOCK(fl);
2313 if ((fl->flags & FL_DOOMED) == 0) {
2314 if (TAILQ_EMPTY(&sc->sfl) != 0) {
2315 sc->sfl_timer = timeout(refill_sfl, sc,
2316 drv_usectohz(100000));
2317 }
2318 fl->flags |= FL_STARVING;
2319 TAILQ_INSERT_TAIL(&sc->sfl, fl, link);
2320 }
2321 FL_UNLOCK(fl);
2322 mutex_exit(&sc->sfl_lock);
2323 }
2324
2325 static void
free_fl_bufs(struct sge_fl * fl)2326 free_fl_bufs(struct sge_fl *fl)
2327 {
2328 struct fl_sdesc *sd;
2329 unsigned int i;
2330
2331 FL_LOCK_ASSERT_OWNED(fl);
2332
2333 for (i = 0; i < fl->cap; i++) {
2334 sd = &fl->sdesc[i];
2335
2336 if (sd->rxb != NULL) {
2337 rxbuf_free(sd->rxb);
2338 sd->rxb = NULL;
2339 }
2340 }
2341 }
2342
2343 /*
2344 * Note that fl->cidx and fl->offset are left unchanged in case of failure.
2345 */
2346 static mblk_t *
get_fl_payload(struct adapter * sc,struct sge_fl * fl,uint32_t len_newbuf,int * fl_bufs_used)2347 get_fl_payload(struct adapter *sc, struct sge_fl *fl,
2348 uint32_t len_newbuf, int *fl_bufs_used)
2349 {
2350 struct mblk_pair frame = {0};
2351 struct rxbuf *rxb;
2352 mblk_t *m = NULL;
2353 uint_t nbuf = 0, len, copy, n;
2354 uint32_t cidx, offset, rcidx, roffset;
2355
2356 /*
2357 * The SGE won't pack a new frame into the current buffer if the entire
2358 * payload doesn't fit in the remaining space. Move on to the next buf
2359 * in that case.
2360 */
2361 rcidx = fl->cidx;
2362 roffset = fl->offset;
2363 if (fl->offset > 0 && len_newbuf & F_RSPD_NEWBUF) {
2364 fl->offset = 0;
2365 if (++fl->cidx == fl->cap)
2366 fl->cidx = 0;
2367 nbuf++;
2368 }
2369 cidx = fl->cidx;
2370 offset = fl->offset;
2371
2372 len = G_RSPD_LEN(len_newbuf); /* pktshift + payload length */
2373 copy = (len <= fl->copy_threshold);
2374 if (copy != 0) {
2375 frame.head = m = allocb(len, BPRI_HI);
2376 if (m == NULL) {
2377 fl->allocb_fail++;
2378 cmn_err(CE_WARN,"%s: mbuf allocation failure "
2379 "count = %llu", __func__,
2380 (unsigned long long)fl->allocb_fail);
2381 fl->cidx = rcidx;
2382 fl->offset = roffset;
2383 return (NULL);
2384 }
2385 }
2386
2387 while (len) {
2388 rxb = fl->sdesc[cidx].rxb;
2389 n = min(len, rxb->buf_size - offset);
2390
2391 (void) ddi_dma_sync(rxb->dhdl, offset, n,
2392 DDI_DMA_SYNC_FORKERNEL);
2393
2394 if (copy != 0)
2395 bcopy(rxb->va + offset, m->b_wptr, n);
2396 else {
2397 m = desballoc((unsigned char *)rxb->va + offset, n,
2398 BPRI_HI, &rxb->freefunc);
2399 if (m == NULL) {
2400 fl->allocb_fail++;
2401 cmn_err(CE_WARN,
2402 "%s: mbuf allocation failure "
2403 "count = %llu", __func__,
2404 (unsigned long long)fl->allocb_fail);
2405 if (frame.head)
2406 freemsgchain(frame.head);
2407 fl->cidx = rcidx;
2408 fl->offset = roffset;
2409 return (NULL);
2410 }
2411 atomic_inc_uint(&rxb->ref_cnt);
2412 if (frame.head != NULL)
2413 frame.tail->b_cont = m;
2414 else
2415 frame.head = m;
2416 frame.tail = m;
2417 }
2418 m->b_wptr += n;
2419 len -= n;
2420 offset += roundup(n, sc->sge.fl_align);
2421 ASSERT(offset <= rxb->buf_size);
2422 if (offset == rxb->buf_size) {
2423 offset = 0;
2424 if (++cidx == fl->cap)
2425 cidx = 0;
2426 nbuf++;
2427 }
2428 }
2429
2430 fl->cidx = cidx;
2431 fl->offset = offset;
2432 (*fl_bufs_used) += nbuf;
2433
2434 ASSERT(frame.head != NULL);
2435 return (frame.head);
2436 }
2437
2438 /*
2439 * We'll do immediate data tx for non-LSO, but only when not coalescing. We're
2440 * willing to use upto 2 hardware descriptors which means a maximum of 96 bytes
2441 * of immediate data.
2442 */
2443 #define IMM_LEN ( \
2444 2 * EQ_ESIZE \
2445 - sizeof (struct fw_eth_tx_pkt_wr) \
2446 - sizeof (struct cpl_tx_pkt_core))
2447
2448 /*
2449 * Returns non-zero on failure, no need to cleanup anything in that case.
2450 *
2451 * Note 1: We always try to pull up the mblk if required and return E2BIG only
2452 * if this fails.
2453 *
2454 * Note 2: We'll also pullup incoming mblk if HW_LSO is set and the first mblk
2455 * does not have the TCP header in it.
2456 */
2457 static int
get_frame_txinfo(struct sge_txq * txq,mblk_t ** fp,struct txinfo * txinfo,int sgl_only)2458 get_frame_txinfo(struct sge_txq *txq, mblk_t **fp, struct txinfo *txinfo,
2459 int sgl_only)
2460 {
2461 uint32_t flags = 0, len, n;
2462 mblk_t *m = *fp;
2463 int rc;
2464
2465 TXQ_LOCK_ASSERT_OWNED(txq); /* will manipulate txb and dma_hdls */
2466
2467 mac_hcksum_get(m, NULL, NULL, NULL, NULL, &flags);
2468 txinfo->flags = (flags & HCK_TX_FLAGS);
2469
2470 mac_lso_get(m, &txinfo->mss, &flags);
2471 txinfo->flags |= (flags & HW_LSO_FLAGS);
2472
2473 if (flags & HW_LSO)
2474 sgl_only = 1; /* Do not allow immediate data with LSO */
2475
2476 /*
2477 * If checksum or segmentation offloads are requested, gather
2478 * information about the sizes and types of headers in the packet.
2479 */
2480 if (txinfo->flags != 0) {
2481 /*
2482 * Even if this fails, the meoi_flags field will be capable of
2483 * communicating the lack of useful packet information.
2484 */
2485 (void) mac_ether_offload_info(m, &txinfo->meoi);
2486 } else {
2487 bzero(&txinfo->meoi, sizeof (txinfo->meoi));
2488 }
2489
2490 start: txinfo->nsegs = 0;
2491 txinfo->hdls_used = 0;
2492 txinfo->txb_used = 0;
2493 txinfo->len = 0;
2494
2495 /* total length and a rough estimate of # of segments */
2496 n = 0;
2497 for (; m; m = m->b_cont) {
2498 len = MBLKL(m);
2499 n += (len / PAGE_SIZE) + 1;
2500 txinfo->len += len;
2501 }
2502 m = *fp;
2503
2504 if (n >= TX_SGL_SEGS || (flags & HW_LSO && MBLKL(m) < 50)) {
2505 txq->pullup_early++;
2506 m = msgpullup(*fp, -1);
2507 if (m == NULL) {
2508 txq->pullup_failed++;
2509 return (E2BIG); /* (*fp) left as it was */
2510 }
2511 freemsg(*fp);
2512 *fp = m;
2513 mac_hcksum_set(m, 0, 0, 0, 0, txinfo->flags);
2514 }
2515
2516 if (txinfo->len <= IMM_LEN && !sgl_only)
2517 return (0); /* nsegs = 0 tells caller to use imm. tx */
2518
2519 if (txinfo->len <= txq->copy_threshold &&
2520 copy_into_txb(txq, m, txinfo->len, txinfo) == 0)
2521 goto done;
2522
2523 for (; m; m = m->b_cont) {
2524
2525 len = MBLKL(m);
2526
2527 /* Use tx copy buffer if this mblk is small enough */
2528 if (len <= txq->copy_threshold &&
2529 copy_into_txb(txq, m, len, txinfo) == 0)
2530 continue;
2531
2532 /* Add DMA bindings for this mblk to the SGL */
2533 rc = add_mblk(txq, txinfo, m, len);
2534
2535 if (rc == E2BIG ||
2536 (txinfo->nsegs == TX_SGL_SEGS && m->b_cont)) {
2537
2538 txq->pullup_late++;
2539 m = msgpullup(*fp, -1);
2540 if (m != NULL) {
2541 free_txinfo_resources(txq, txinfo);
2542 freemsg(*fp);
2543 *fp = m;
2544 mac_hcksum_set(m, 0, 0, 0, 0, txinfo->flags);
2545 goto start;
2546 }
2547
2548 txq->pullup_failed++;
2549 rc = E2BIG;
2550 }
2551
2552 if (rc != 0) {
2553 free_txinfo_resources(txq, txinfo);
2554 return (rc);
2555 }
2556 }
2557
2558 ASSERT(txinfo->nsegs > 0 && txinfo->nsegs <= TX_SGL_SEGS);
2559
2560 done:
2561
2562 /*
2563 * Store the # of flits required to hold this frame's SGL in nflits. An
2564 * SGL has a (ULPTX header + len0, addr0) tuple optionally followed by
2565 * multiple (len0 + len1, addr0, addr1) tuples. If addr1 is not used
2566 * then len1 must be set to 0.
2567 */
2568 n = txinfo->nsegs - 1;
2569 txinfo->nflits = (3 * n) / 2 + (n & 1) + 2;
2570 if (n & 1)
2571 txinfo->sgl.sge[n / 2].len[1] = cpu_to_be32(0);
2572
2573 txinfo->sgl.cmd_nsge = cpu_to_be32(V_ULPTX_CMD((u32)ULP_TX_SC_DSGL) |
2574 V_ULPTX_NSGE(txinfo->nsegs));
2575
2576 return (0);
2577 }
2578
2579 static inline int
fits_in_txb(struct sge_txq * txq,int len,int * waste)2580 fits_in_txb(struct sge_txq *txq, int len, int *waste)
2581 {
2582 if (txq->txb_avail < len)
2583 return (0);
2584
2585 if (txq->txb_next + len <= txq->txb_size) {
2586 *waste = 0;
2587 return (1);
2588 }
2589
2590 *waste = txq->txb_size - txq->txb_next;
2591
2592 return (txq->txb_avail - *waste < len ? 0 : 1);
2593 }
2594
2595 #define TXB_CHUNK 64
2596
2597 /*
2598 * Copies the specified # of bytes into txq's tx copy buffer and updates txinfo
2599 * and txq to indicate resources used. Caller has to make sure that those many
2600 * bytes are available in the mblk chain (b_cont linked).
2601 */
2602 static inline int
copy_into_txb(struct sge_txq * txq,mblk_t * m,int len,struct txinfo * txinfo)2603 copy_into_txb(struct sge_txq *txq, mblk_t *m, int len, struct txinfo *txinfo)
2604 {
2605 int waste, n;
2606
2607 TXQ_LOCK_ASSERT_OWNED(txq); /* will manipulate txb */
2608
2609 if (!fits_in_txb(txq, len, &waste)) {
2610 txq->txb_full++;
2611 return (ENOMEM);
2612 }
2613
2614 if (waste != 0) {
2615 ASSERT((waste & (TXB_CHUNK - 1)) == 0);
2616 txinfo->txb_used += waste;
2617 txq->txb_avail -= waste;
2618 txq->txb_next = 0;
2619 }
2620
2621 for (n = 0; n < len; m = m->b_cont) {
2622 bcopy(m->b_rptr, txq->txb_va + txq->txb_next + n, MBLKL(m));
2623 n += MBLKL(m);
2624 }
2625
2626 add_seg(txinfo, txq->txb_ba + txq->txb_next, len);
2627
2628 n = roundup(len, TXB_CHUNK);
2629 txinfo->txb_used += n;
2630 txq->txb_avail -= n;
2631 txq->txb_next += n;
2632 ASSERT(txq->txb_next <= txq->txb_size);
2633 if (txq->txb_next == txq->txb_size)
2634 txq->txb_next = 0;
2635
2636 return (0);
2637 }
2638
2639 static inline void
add_seg(struct txinfo * txinfo,uint64_t ba,uint32_t len)2640 add_seg(struct txinfo *txinfo, uint64_t ba, uint32_t len)
2641 {
2642 ASSERT(txinfo->nsegs < TX_SGL_SEGS); /* must have room */
2643
2644 if (txinfo->nsegs != 0) {
2645 int idx = txinfo->nsegs - 1;
2646 txinfo->sgl.sge[idx / 2].len[idx & 1] = cpu_to_be32(len);
2647 txinfo->sgl.sge[idx / 2].addr[idx & 1] = cpu_to_be64(ba);
2648 } else {
2649 txinfo->sgl.len0 = cpu_to_be32(len);
2650 txinfo->sgl.addr0 = cpu_to_be64(ba);
2651 }
2652 txinfo->nsegs++;
2653 }
2654
2655 /*
2656 * This function cleans up any partially allocated resources when it fails so
2657 * there's nothing for the caller to clean up in that case.
2658 *
2659 * EIO indicates permanent failure. Caller should drop the frame containing
2660 * this mblk and continue.
2661 *
2662 * E2BIG indicates that the SGL length for this mblk exceeds the hardware
2663 * limit. Caller should pull up the frame before trying to send it out.
2664 * (This error means our pullup_early heuristic did not work for this frame)
2665 *
2666 * ENOMEM indicates a temporary shortage of resources (DMA handles, other DMA
2667 * resources, etc.). Caller should suspend the tx queue and wait for reclaim to
2668 * free up resources.
2669 */
2670 static inline int
add_mblk(struct sge_txq * txq,struct txinfo * txinfo,mblk_t * m,int len)2671 add_mblk(struct sge_txq *txq, struct txinfo *txinfo, mblk_t *m, int len)
2672 {
2673 ddi_dma_handle_t dhdl;
2674 ddi_dma_cookie_t cookie;
2675 uint_t ccount = 0;
2676 int rc;
2677
2678 TXQ_LOCK_ASSERT_OWNED(txq); /* will manipulate dhdls */
2679
2680 if (txq->tx_dhdl_avail == 0) {
2681 txq->dma_hdl_failed++;
2682 return (ENOMEM);
2683 }
2684
2685 dhdl = txq->tx_dhdl[txq->tx_dhdl_pidx];
2686 rc = ddi_dma_addr_bind_handle(dhdl, NULL, (caddr_t)m->b_rptr, len,
2687 DDI_DMA_WRITE | DDI_DMA_STREAMING, DDI_DMA_DONTWAIT, NULL, &cookie,
2688 &ccount);
2689 if (rc != DDI_DMA_MAPPED) {
2690 txq->dma_map_failed++;
2691
2692 ASSERT(rc != DDI_DMA_INUSE && rc != DDI_DMA_PARTIAL_MAP);
2693
2694 return (rc == DDI_DMA_NORESOURCES ? ENOMEM : EIO);
2695 }
2696
2697 if (ccount + txinfo->nsegs > TX_SGL_SEGS) {
2698 (void) ddi_dma_unbind_handle(dhdl);
2699 return (E2BIG);
2700 }
2701
2702 add_seg(txinfo, cookie.dmac_laddress, cookie.dmac_size);
2703 while (--ccount) {
2704 ddi_dma_nextcookie(dhdl, &cookie);
2705 add_seg(txinfo, cookie.dmac_laddress, cookie.dmac_size);
2706 }
2707
2708 if (++txq->tx_dhdl_pidx == txq->tx_dhdl_total)
2709 txq->tx_dhdl_pidx = 0;
2710 txq->tx_dhdl_avail--;
2711 txinfo->hdls_used++;
2712
2713 return (0);
2714 }
2715
2716 /*
2717 * Releases all the txq resources used up in the specified txinfo.
2718 */
2719 static void
free_txinfo_resources(struct sge_txq * txq,struct txinfo * txinfo)2720 free_txinfo_resources(struct sge_txq *txq, struct txinfo *txinfo)
2721 {
2722 int n;
2723
2724 TXQ_LOCK_ASSERT_OWNED(txq); /* dhdls, txb */
2725
2726 n = txinfo->txb_used;
2727 if (n > 0) {
2728 txq->txb_avail += n;
2729 if (n <= txq->txb_next)
2730 txq->txb_next -= n;
2731 else {
2732 n -= txq->txb_next;
2733 txq->txb_next = txq->txb_size - n;
2734 }
2735 }
2736
2737 for (n = txinfo->hdls_used; n > 0; n--) {
2738 if (txq->tx_dhdl_pidx > 0)
2739 txq->tx_dhdl_pidx--;
2740 else
2741 txq->tx_dhdl_pidx = txq->tx_dhdl_total - 1;
2742 txq->tx_dhdl_avail++;
2743 (void) ddi_dma_unbind_handle(txq->tx_dhdl[txq->tx_dhdl_pidx]);
2744 }
2745 }
2746
2747 /*
2748 * Returns 0 to indicate that m has been accepted into a coalesced tx work
2749 * request. It has either been folded into txpkts or txpkts was flushed and m
2750 * has started a new coalesced work request (as the first frame in a fresh
2751 * txpkts).
2752 *
2753 * Returns non-zero to indicate a failure - caller is responsible for
2754 * transmitting m, if there was anything in txpkts it has been flushed.
2755 */
2756 static int
add_to_txpkts(struct sge_txq * txq,struct txpkts * txpkts,mblk_t * m,struct txinfo * txinfo)2757 add_to_txpkts(struct sge_txq *txq, struct txpkts *txpkts, mblk_t *m,
2758 struct txinfo *txinfo)
2759 {
2760 struct sge_eq *eq = &txq->eq;
2761 int can_coalesce;
2762 struct tx_sdesc *txsd;
2763 uint8_t flits;
2764
2765 TXQ_LOCK_ASSERT_OWNED(txq);
2766
2767 if (txpkts->npkt > 0) {
2768 flits = TXPKTS_PKT_HDR + txinfo->nflits;
2769 can_coalesce = (txinfo->flags & HW_LSO) == 0 &&
2770 txpkts->nflits + flits <= TX_WR_FLITS &&
2771 txpkts->nflits + flits <= eq->avail * 8 &&
2772 txpkts->plen + txinfo->len < 65536;
2773
2774 if (can_coalesce != 0) {
2775 txpkts->tail->b_next = m;
2776 txpkts->tail = m;
2777 txpkts->npkt++;
2778 txpkts->nflits += flits;
2779 txpkts->plen += txinfo->len;
2780
2781 txsd = &txq->sdesc[eq->pidx];
2782 txsd->txb_used += txinfo->txb_used;
2783 txsd->hdls_used += txinfo->hdls_used;
2784
2785 return (0);
2786 }
2787
2788 /*
2789 * Couldn't coalesce m into txpkts. The first order of business
2790 * is to send txpkts on its way. Then we'll revisit m.
2791 */
2792 write_txpkts_wr(txq, txpkts);
2793 }
2794
2795 /*
2796 * Check if we can start a new coalesced tx work request with m as
2797 * the first packet in it.
2798 */
2799
2800 ASSERT(txpkts->npkt == 0);
2801 ASSERT(txinfo->len < 65536);
2802
2803 flits = TXPKTS_WR_HDR + txinfo->nflits;
2804 can_coalesce = (txinfo->flags & HW_LSO) == 0 &&
2805 flits <= eq->avail * 8 && flits <= TX_WR_FLITS;
2806
2807 if (can_coalesce == 0)
2808 return (EINVAL);
2809
2810 /*
2811 * Start a fresh coalesced tx WR with m as the first frame in it.
2812 */
2813 txpkts->tail = m;
2814 txpkts->npkt = 1;
2815 txpkts->nflits = flits;
2816 txpkts->flitp = &eq->desc[eq->pidx].flit[2];
2817 txpkts->plen = txinfo->len;
2818
2819 txsd = &txq->sdesc[eq->pidx];
2820 txsd->m = m;
2821 txsd->txb_used = txinfo->txb_used;
2822 txsd->hdls_used = txinfo->hdls_used;
2823
2824 return (0);
2825 }
2826
2827 /*
2828 * Note that write_txpkts_wr can never run out of hardware descriptors (but
2829 * write_txpkt_wr can). add_to_txpkts ensures that a frame is accepted for
2830 * coalescing only if sufficient hardware descriptors are available.
2831 */
2832 static void
write_txpkts_wr(struct sge_txq * txq,struct txpkts * txpkts)2833 write_txpkts_wr(struct sge_txq *txq, struct txpkts *txpkts)
2834 {
2835 struct sge_eq *eq = &txq->eq;
2836 struct fw_eth_tx_pkts_wr *wr;
2837 struct tx_sdesc *txsd;
2838 uint32_t ctrl;
2839 uint16_t ndesc;
2840
2841 TXQ_LOCK_ASSERT_OWNED(txq); /* pidx, avail */
2842
2843 ndesc = howmany(txpkts->nflits, 8);
2844
2845 wr = (void *)&eq->desc[eq->pidx];
2846 wr->op_pkd = cpu_to_be32(V_FW_WR_OP(FW_ETH_TX_PKTS_WR) |
2847 V_FW_WR_IMMDLEN(0)); /* immdlen does not matter in this WR */
2848 ctrl = V_FW_WR_LEN16(howmany(txpkts->nflits, 2));
2849 if (eq->avail == ndesc)
2850 ctrl |= F_FW_WR_EQUEQ | F_FW_WR_EQUIQ;
2851 wr->equiq_to_len16 = cpu_to_be32(ctrl);
2852 wr->plen = cpu_to_be16(txpkts->plen);
2853 wr->npkt = txpkts->npkt;
2854 wr->r3 = wr->type = 0;
2855
2856 /* Everything else already written */
2857
2858 txsd = &txq->sdesc[eq->pidx];
2859 txsd->desc_used = ndesc;
2860
2861 txq->txb_used += txsd->txb_used / TXB_CHUNK;
2862 txq->hdl_used += txsd->hdls_used;
2863
2864 ASSERT(eq->avail >= ndesc);
2865
2866 eq->pending += ndesc;
2867 eq->avail -= ndesc;
2868 eq->pidx += ndesc;
2869 if (eq->pidx >= eq->cap)
2870 eq->pidx -= eq->cap;
2871
2872 txq->txpkts_pkts += txpkts->npkt;
2873 txq->txpkts_wrs++;
2874 txpkts->npkt = 0; /* emptied */
2875 }
2876
2877 typedef enum {
2878 COS_SUCCESS, /* ctrl flit contains proper bits for csum offload */
2879 COS_IGNORE, /* no csum offload requested */
2880 COS_FAIL, /* csum offload requested, but pkt data missing */
2881 } csum_offload_status_t;
2882 /*
2883 * Build a ctrl1 flit for checksum offload in CPL_TX_PKT_XT command
2884 */
2885 static csum_offload_status_t
csum_to_ctrl(const struct txinfo * txinfo,uint32_t chip_version,uint64_t * ctrlp)2886 csum_to_ctrl(const struct txinfo *txinfo, uint32_t chip_version,
2887 uint64_t *ctrlp)
2888 {
2889 const mac_ether_offload_info_t *meoi = &txinfo->meoi;
2890 const uint32_t tx_flags = txinfo->flags;
2891 const boolean_t needs_l3_csum = (tx_flags & HW_LSO) != 0 ||
2892 (tx_flags & HCK_IPV4_HDRCKSUM) != 0;
2893 const boolean_t needs_l4_csum = (tx_flags & HW_LSO) != 0 ||
2894 (tx_flags & (HCK_FULLCKSUM | HCK_PARTIALCKSUM)) != 0;
2895
2896 /*
2897 * Default to disabling any checksumming both for cases where it is not
2898 * requested, but also if we cannot appropriately interrogate the
2899 * required information from the packet.
2900 */
2901 uint64_t ctrl = F_TXPKT_L4CSUM_DIS | F_TXPKT_IPCSUM_DIS;
2902 if (!needs_l3_csum && !needs_l4_csum) {
2903 *ctrlp = ctrl;
2904 return (COS_IGNORE);
2905 }
2906
2907 if (needs_l3_csum) {
2908 /* Only IPv4 checksums are supported (for L3) */
2909 if ((meoi->meoi_flags & MEOI_L3INFO_SET) == 0 ||
2910 meoi->meoi_l3proto != ETHERTYPE_IP) {
2911 *ctrlp = ctrl;
2912 return (COS_FAIL);
2913 }
2914 ctrl &= ~F_TXPKT_IPCSUM_DIS;
2915 }
2916
2917 if (needs_l4_csum) {
2918 /*
2919 * We need at least all of the L3 header to make decisions about
2920 * the contained L4 protocol. If not all of the L4 information
2921 * is present, we will leave it to the NIC to checksum all it is
2922 * able to.
2923 */
2924 if ((meoi->meoi_flags & MEOI_L3INFO_SET) == 0) {
2925 *ctrlp = ctrl;
2926 return (COS_FAIL);
2927 }
2928
2929 /*
2930 * Since we are parsing the packet anyways, make the checksum
2931 * decision based on the L4 protocol, rather than using the
2932 * Generic TCP/UDP checksum using start & end offsets in the
2933 * packet (like requested with PARTIALCKSUM).
2934 */
2935 int csum_type = -1;
2936 if (meoi->meoi_l3proto == ETHERTYPE_IP &&
2937 meoi->meoi_l4proto == IPPROTO_TCP) {
2938 csum_type = TX_CSUM_TCPIP;
2939 } else if (meoi->meoi_l3proto == ETHERTYPE_IPV6 &&
2940 meoi->meoi_l4proto == IPPROTO_TCP) {
2941 csum_type = TX_CSUM_TCPIP6;
2942 } else if (meoi->meoi_l3proto == ETHERTYPE_IP &&
2943 meoi->meoi_l4proto == IPPROTO_UDP) {
2944 csum_type = TX_CSUM_UDPIP;
2945 } else if (meoi->meoi_l3proto == ETHERTYPE_IPV6 &&
2946 meoi->meoi_l4proto == IPPROTO_UDP) {
2947 csum_type = TX_CSUM_UDPIP6;
2948 } else {
2949 *ctrlp = ctrl;
2950 return (COS_FAIL);
2951 }
2952
2953 ASSERT(csum_type != -1);
2954 ctrl &= ~F_TXPKT_L4CSUM_DIS;
2955 ctrl |= V_TXPKT_CSUM_TYPE(csum_type);
2956 }
2957
2958 if ((ctrl & F_TXPKT_IPCSUM_DIS) == 0 &&
2959 (ctrl & F_TXPKT_L4CSUM_DIS) != 0) {
2960 /*
2961 * If only the IPv4 checksum is requested, we need to set an
2962 * appropriate type in the command for it.
2963 */
2964 ctrl |= V_TXPKT_CSUM_TYPE(TX_CSUM_IP);
2965 }
2966
2967 ASSERT(ctrl != (F_TXPKT_L4CSUM_DIS | F_TXPKT_IPCSUM_DIS));
2968
2969 /*
2970 * Fill in the requisite L2/L3 header length data.
2971 *
2972 * The Ethernet header length is recorded as 'size - 14 bytes'
2973 */
2974 const uint8_t eth_len = meoi->meoi_l2hlen - 14;
2975 if (chip_version >= CHELSIO_T6) {
2976 ctrl |= V_T6_TXPKT_ETHHDR_LEN(eth_len);
2977 } else {
2978 ctrl |= V_TXPKT_ETHHDR_LEN(eth_len);
2979 }
2980 ctrl |= V_TXPKT_IPHDR_LEN(meoi->meoi_l3hlen);
2981
2982 *ctrlp = ctrl;
2983 return (COS_SUCCESS);
2984 }
2985
2986 static int
write_txpkt_wr(struct port_info * pi,struct sge_txq * txq,mblk_t * m,struct txinfo * txinfo)2987 write_txpkt_wr(struct port_info *pi, struct sge_txq *txq, mblk_t *m,
2988 struct txinfo *txinfo)
2989 {
2990 struct sge_eq *eq = &txq->eq;
2991 struct fw_eth_tx_pkt_wr *wr;
2992 struct cpl_tx_pkt_core *cpl;
2993 uint32_t ctrl; /* used in many unrelated places */
2994 uint64_t ctrl1;
2995 int nflits, ndesc;
2996 struct tx_sdesc *txsd;
2997 caddr_t dst;
2998 const mac_ether_offload_info_t *meoi = &txinfo->meoi;
2999
3000 TXQ_LOCK_ASSERT_OWNED(txq); /* pidx, avail */
3001
3002 /*
3003 * Do we have enough flits to send this frame out?
3004 */
3005 ctrl = sizeof (struct cpl_tx_pkt_core);
3006 if (txinfo->flags & HW_LSO) {
3007 nflits = TXPKT_LSO_WR_HDR;
3008 ctrl += sizeof(struct cpl_tx_pkt_lso_core);
3009 } else
3010 nflits = TXPKT_WR_HDR;
3011 if (txinfo->nsegs > 0)
3012 nflits += txinfo->nflits;
3013 else {
3014 nflits += howmany(txinfo->len, 8);
3015 ctrl += txinfo->len;
3016 }
3017 ndesc = howmany(nflits, 8);
3018 if (ndesc > eq->avail)
3019 return (ENOMEM);
3020
3021 /* Firmware work request header */
3022 wr = (void *)&eq->desc[eq->pidx];
3023 wr->op_immdlen = cpu_to_be32(V_FW_WR_OP(FW_ETH_TX_PKT_WR) |
3024 V_FW_WR_IMMDLEN(ctrl));
3025 ctrl = V_FW_WR_LEN16(howmany(nflits, 2));
3026 if (eq->avail == ndesc)
3027 ctrl |= F_FW_WR_EQUEQ | F_FW_WR_EQUIQ;
3028 wr->equiq_to_len16 = cpu_to_be32(ctrl);
3029 wr->r3 = 0;
3030
3031 if (txinfo->flags & HW_LSO &&
3032 (meoi->meoi_flags & MEOI_L4INFO_SET) != 0 &&
3033 meoi->meoi_l4proto == IPPROTO_TCP) {
3034 struct cpl_tx_pkt_lso_core *lso = (void *)(wr + 1);
3035
3036 ctrl = V_LSO_OPCODE((u32)CPL_TX_PKT_LSO) | F_LSO_FIRST_SLICE |
3037 F_LSO_LAST_SLICE;
3038
3039 if (meoi->meoi_l2hlen > sizeof (struct ether_header)) {
3040 /*
3041 * This presently assumes a standard VLAN header,
3042 * without support for Q-in-Q.
3043 */
3044 ctrl |= V_LSO_ETHHDR_LEN(1);
3045 }
3046
3047 switch (meoi->meoi_l3proto) {
3048 case ETHERTYPE_IPV6:
3049 ctrl |= F_LSO_IPV6;
3050 /* FALLTHROUGH */
3051 case ETHERTYPE_IP:
3052 ctrl |= V_LSO_IPHDR_LEN(meoi->meoi_l3hlen / 4);
3053 break;
3054 default:
3055 break;
3056 }
3057
3058 ctrl |= V_LSO_TCPHDR_LEN(meoi->meoi_l4hlen / 4);
3059
3060 lso->lso_ctrl = cpu_to_be32(ctrl);
3061 lso->ipid_ofst = cpu_to_be16(0);
3062 lso->mss = cpu_to_be16(txinfo->mss);
3063 lso->seqno_offset = cpu_to_be32(0);
3064 if (is_t4(pi->adapter->params.chip))
3065 lso->len = cpu_to_be32(txinfo->len);
3066 else
3067 lso->len = cpu_to_be32(V_LSO_T5_XFER_SIZE(txinfo->len));
3068
3069 cpl = (void *)(lso + 1);
3070
3071 txq->tso_wrs++;
3072 } else {
3073 cpl = (void *)(wr + 1);
3074 }
3075
3076 /* Checksum offload */
3077 switch (csum_to_ctrl(txinfo,
3078 CHELSIO_CHIP_VERSION(pi->adapter->params.chip), &ctrl1)) {
3079 case COS_SUCCESS:
3080 txq->txcsum++;
3081 break;
3082 case COS_FAIL:
3083 /*
3084 * Packet will be going out with checksums which are probably
3085 * wrong but there is little we can do now.
3086 */
3087 txq->csum_failed++;
3088 break;
3089 default:
3090 break;
3091 }
3092
3093 /* CPL header */
3094 cpl->ctrl0 = cpu_to_be32(V_TXPKT_OPCODE(CPL_TX_PKT_XT) |
3095 V_TXPKT_INTF(pi->tx_chan) | V_TXPKT_PF(pi->adapter->pf));
3096 cpl->pack = 0;
3097 cpl->len = cpu_to_be16(txinfo->len);
3098 cpl->ctrl1 = cpu_to_be64(ctrl1);
3099
3100 /* Software descriptor */
3101 txsd = &txq->sdesc[eq->pidx];
3102 txsd->m = m;
3103 txsd->txb_used = txinfo->txb_used;
3104 txsd->hdls_used = txinfo->hdls_used;
3105 /* LINTED: E_ASSIGN_NARROW_CONV */
3106 txsd->desc_used = ndesc;
3107
3108 txq->txb_used += txinfo->txb_used / TXB_CHUNK;
3109 txq->hdl_used += txinfo->hdls_used;
3110
3111 eq->pending += ndesc;
3112 eq->avail -= ndesc;
3113 eq->pidx += ndesc;
3114 if (eq->pidx >= eq->cap)
3115 eq->pidx -= eq->cap;
3116
3117 /* SGL */
3118 dst = (void *)(cpl + 1);
3119 if (txinfo->nsegs > 0) {
3120 txq->sgl_wrs++;
3121 copy_to_txd(eq, (void *)&txinfo->sgl, &dst, txinfo->nflits * 8);
3122
3123 /* Need to zero-pad to a 16 byte boundary if not on one */
3124 if ((uintptr_t)dst & 0xf)
3125 /* LINTED: E_BAD_PTR_CAST_ALIGN */
3126 *(uint64_t *)dst = 0;
3127
3128 } else {
3129 txq->imm_wrs++;
3130 #ifdef DEBUG
3131 ctrl = txinfo->len;
3132 #endif
3133 for (; m; m = m->b_cont) {
3134 copy_to_txd(eq, (void *)m->b_rptr, &dst, MBLKL(m));
3135 #ifdef DEBUG
3136 ctrl -= MBLKL(m);
3137 #endif
3138 }
3139 ASSERT(ctrl == 0);
3140 }
3141
3142 txq->txpkt_wrs++;
3143 return (0);
3144 }
3145
3146 static inline void
write_ulp_cpl_sgl(struct port_info * pi,struct sge_txq * txq,struct txpkts * txpkts,struct txinfo * txinfo)3147 write_ulp_cpl_sgl(struct port_info *pi, struct sge_txq *txq,
3148 struct txpkts *txpkts, struct txinfo *txinfo)
3149 {
3150 struct ulp_txpkt *ulpmc;
3151 struct ulptx_idata *ulpsc;
3152 struct cpl_tx_pkt_core *cpl;
3153 uintptr_t flitp, start, end;
3154 uint64_t ctrl;
3155 caddr_t dst;
3156
3157 ASSERT(txpkts->npkt > 0);
3158
3159 start = (uintptr_t)txq->eq.desc;
3160 end = (uintptr_t)txq->eq.spg;
3161
3162 /* Checksum offload */
3163 switch (csum_to_ctrl(txinfo,
3164 CHELSIO_CHIP_VERSION(pi->adapter->params.chip), &ctrl)) {
3165 case COS_SUCCESS:
3166 txq->txcsum++;
3167 break;
3168 case COS_FAIL:
3169 /*
3170 * Packet will be going out with checksums which are probably
3171 * wrong but there is little we can do now.
3172 */
3173 txq->csum_failed++;
3174 break;
3175 default:
3176 break;
3177 }
3178
3179 /*
3180 * The previous packet's SGL must have ended at a 16 byte boundary (this
3181 * is required by the firmware/hardware). It follows that flitp cannot
3182 * wrap around between the ULPTX master command and ULPTX subcommand (8
3183 * bytes each), and that it can not wrap around in the middle of the
3184 * cpl_tx_pkt_core either.
3185 */
3186 flitp = (uintptr_t)txpkts->flitp;
3187 ASSERT((flitp & 0xf) == 0);
3188
3189 /* ULP master command */
3190 ulpmc = (void *)flitp;
3191 ulpmc->cmd_dest = htonl(V_ULPTX_CMD(ULP_TX_PKT) | V_ULP_TXPKT_DEST(0));
3192 ulpmc->len = htonl(howmany(sizeof (*ulpmc) + sizeof (*ulpsc) +
3193 sizeof (*cpl) + 8 * txinfo->nflits, 16));
3194
3195 /* ULP subcommand */
3196 ulpsc = (void *)(ulpmc + 1);
3197 ulpsc->cmd_more = cpu_to_be32(V_ULPTX_CMD((u32)ULP_TX_SC_IMM) |
3198 F_ULP_TX_SC_MORE);
3199 ulpsc->len = cpu_to_be32(sizeof (struct cpl_tx_pkt_core));
3200
3201 flitp += sizeof (*ulpmc) + sizeof (*ulpsc);
3202 if (flitp == end)
3203 flitp = start;
3204
3205 /* CPL_TX_PKT_XT */
3206 cpl = (void *)flitp;
3207 cpl->ctrl0 = cpu_to_be32(V_TXPKT_OPCODE(CPL_TX_PKT_XT) |
3208 V_TXPKT_INTF(pi->tx_chan) | V_TXPKT_PF(pi->adapter->pf));
3209 cpl->pack = 0;
3210 cpl->len = cpu_to_be16(txinfo->len);
3211 cpl->ctrl1 = cpu_to_be64(ctrl);
3212
3213 flitp += sizeof (*cpl);
3214 if (flitp == end)
3215 flitp = start;
3216
3217 /* SGL for this frame */
3218 dst = (caddr_t)flitp;
3219 copy_to_txd(&txq->eq, (void *)&txinfo->sgl, &dst, txinfo->nflits * 8);
3220 flitp = (uintptr_t)dst;
3221
3222 /* Zero pad and advance to a 16 byte boundary if not already at one. */
3223 if (flitp & 0xf) {
3224
3225 /* no matter what, flitp should be on an 8 byte boundary */
3226 ASSERT((flitp & 0x7) == 0);
3227
3228 *(uint64_t *)flitp = 0;
3229 flitp += sizeof (uint64_t);
3230 txpkts->nflits++;
3231 }
3232
3233 if (flitp == end)
3234 flitp = start;
3235
3236 txpkts->flitp = (void *)flitp;
3237 }
3238
3239 static inline void
copy_to_txd(struct sge_eq * eq,caddr_t from,caddr_t * to,int len)3240 copy_to_txd(struct sge_eq *eq, caddr_t from, caddr_t *to, int len)
3241 {
3242 if ((uintptr_t)(*to) + len <= (uintptr_t)eq->spg) {
3243 bcopy(from, *to, len);
3244 (*to) += len;
3245 } else {
3246 int portion = (uintptr_t)eq->spg - (uintptr_t)(*to);
3247
3248 bcopy(from, *to, portion);
3249 from += portion;
3250 portion = len - portion; /* remaining */
3251 bcopy(from, (void *)eq->desc, portion);
3252 (*to) = (caddr_t)eq->desc + portion;
3253 }
3254 }
3255
3256 static inline void
ring_tx_db(struct adapter * sc,struct sge_eq * eq)3257 ring_tx_db(struct adapter *sc, struct sge_eq *eq)
3258 {
3259 int val, db_mode;
3260 u_int db = eq->doorbells;
3261
3262 if (eq->pending > 1)
3263 db &= ~DOORBELL_WCWR;
3264
3265 if (eq->pending > eq->pidx) {
3266 int offset = eq->cap - (eq->pending - eq->pidx);
3267
3268 /* pidx has wrapped around since last doorbell */
3269
3270 (void) ddi_dma_sync(eq->desc_dhdl,
3271 offset * sizeof (struct tx_desc), 0,
3272 DDI_DMA_SYNC_FORDEV);
3273 (void) ddi_dma_sync(eq->desc_dhdl,
3274 0, eq->pidx * sizeof (struct tx_desc),
3275 DDI_DMA_SYNC_FORDEV);
3276 } else if (eq->pending > 0) {
3277 (void) ddi_dma_sync(eq->desc_dhdl,
3278 (eq->pidx - eq->pending) * sizeof (struct tx_desc),
3279 eq->pending * sizeof (struct tx_desc),
3280 DDI_DMA_SYNC_FORDEV);
3281 }
3282
3283 membar_producer();
3284
3285 if (is_t4(sc->params.chip))
3286 val = V_PIDX(eq->pending);
3287 else
3288 val = V_PIDX_T5(eq->pending);
3289
3290 db_mode = (1 << (ffs(db) - 1));
3291 switch (db_mode) {
3292 case DOORBELL_UDB:
3293 *eq->udb = LE_32(V_QID(eq->udb_qid) | val);
3294 break;
3295
3296 case DOORBELL_WCWR:
3297 {
3298 volatile uint64_t *dst, *src;
3299 int i;
3300 /*
3301 * Queues whose 128B doorbell segment fits in
3302 * the page do not use relative qid
3303 * (udb_qid is always 0). Only queues with
3304 * doorbell segments can do WCWR.
3305 */
3306 ASSERT(eq->udb_qid == 0 && eq->pending == 1);
3307
3308 dst = (volatile void *)((uintptr_t)eq->udb +
3309 UDBS_WR_OFFSET - UDBS_DB_OFFSET);
3310 i = eq->pidx ? eq->pidx - 1 : eq->cap - 1;
3311 src = (void *)&eq->desc[i];
3312 while (src != (void *)&eq->desc[i + 1])
3313 *dst++ = *src++;
3314 membar_producer();
3315 break;
3316 }
3317
3318 case DOORBELL_UDBWC:
3319 *eq->udb = LE_32(V_QID(eq->udb_qid) | val);
3320 membar_producer();
3321 break;
3322
3323 case DOORBELL_KDB:
3324 t4_write_reg(sc, MYPF_REG(A_SGE_PF_KDOORBELL),
3325 V_QID(eq->cntxt_id) | val);
3326 break;
3327 }
3328
3329 eq->pending = 0;
3330 }
3331
3332 static int
reclaim_tx_descs(struct sge_txq * txq,int howmany)3333 reclaim_tx_descs(struct sge_txq *txq, int howmany)
3334 {
3335 struct tx_sdesc *txsd;
3336 uint_t cidx, can_reclaim, reclaimed, txb_freed, hdls_freed;
3337 struct sge_eq *eq = &txq->eq;
3338
3339 EQ_LOCK_ASSERT_OWNED(eq);
3340
3341 cidx = eq->spg->cidx; /* stable snapshot */
3342 cidx = be16_to_cpu(cidx);
3343
3344 if (cidx >= eq->cidx)
3345 can_reclaim = cidx - eq->cidx;
3346 else
3347 can_reclaim = cidx + eq->cap - eq->cidx;
3348
3349 if (can_reclaim == 0)
3350 return (0);
3351
3352 txb_freed = hdls_freed = reclaimed = 0;
3353 do {
3354 int ndesc;
3355
3356 txsd = &txq->sdesc[eq->cidx];
3357 ndesc = txsd->desc_used;
3358
3359 /* Firmware doesn't return "partial" credits. */
3360 ASSERT(can_reclaim >= ndesc);
3361
3362 /*
3363 * We always keep mblk around, even for immediate data. If mblk
3364 * is NULL, this has to be the software descriptor for a credit
3365 * flush work request.
3366 */
3367 if (txsd->m != NULL)
3368 freemsgchain(txsd->m);
3369 #ifdef DEBUG
3370 else {
3371 ASSERT(txsd->txb_used == 0);
3372 ASSERT(txsd->hdls_used == 0);
3373 ASSERT(ndesc == 1);
3374 }
3375 #endif
3376
3377 txb_freed += txsd->txb_used;
3378 hdls_freed += txsd->hdls_used;
3379 reclaimed += ndesc;
3380
3381 eq->cidx += ndesc;
3382 if (eq->cidx >= eq->cap)
3383 eq->cidx -= eq->cap;
3384
3385 can_reclaim -= ndesc;
3386
3387 } while (can_reclaim && reclaimed < howmany);
3388
3389 eq->avail += reclaimed;
3390 ASSERT(eq->avail < eq->cap); /* avail tops out at (cap - 1) */
3391
3392 txq->txb_avail += txb_freed;
3393
3394 txq->tx_dhdl_avail += hdls_freed;
3395 ASSERT(txq->tx_dhdl_avail <= txq->tx_dhdl_total);
3396 for (; hdls_freed; hdls_freed--) {
3397 (void) ddi_dma_unbind_handle(txq->tx_dhdl[txq->tx_dhdl_cidx]);
3398 if (++txq->tx_dhdl_cidx == txq->tx_dhdl_total)
3399 txq->tx_dhdl_cidx = 0;
3400 }
3401
3402 return (reclaimed);
3403 }
3404
3405 static void
write_txqflush_wr(struct sge_txq * txq)3406 write_txqflush_wr(struct sge_txq *txq)
3407 {
3408 struct sge_eq *eq = &txq->eq;
3409 struct fw_eq_flush_wr *wr;
3410 struct tx_sdesc *txsd;
3411
3412 EQ_LOCK_ASSERT_OWNED(eq);
3413 ASSERT(eq->avail > 0);
3414
3415 wr = (void *)&eq->desc[eq->pidx];
3416 bzero(wr, sizeof (*wr));
3417 wr->opcode = FW_EQ_FLUSH_WR;
3418 wr->equiq_to_len16 = cpu_to_be32(V_FW_WR_LEN16(sizeof (*wr) / 16) |
3419 F_FW_WR_EQUEQ | F_FW_WR_EQUIQ);
3420
3421 txsd = &txq->sdesc[eq->pidx];
3422 txsd->m = NULL;
3423 txsd->txb_used = 0;
3424 txsd->hdls_used = 0;
3425 txsd->desc_used = 1;
3426
3427 eq->pending++;
3428 eq->avail--;
3429 if (++eq->pidx == eq->cap)
3430 eq->pidx = 0;
3431 }
3432
3433 static int
t4_eth_rx(struct sge_iq * iq,const struct rss_header * rss,mblk_t * m)3434 t4_eth_rx(struct sge_iq *iq, const struct rss_header *rss, mblk_t *m)
3435 {
3436 bool csum_ok;
3437 uint16_t err_vec;
3438 struct sge_rxq *rxq = (void *)iq;
3439 struct mblk_pair chain = {0};
3440 struct adapter *sc = iq->adapter;
3441 const struct cpl_rx_pkt *cpl = (const void *)(rss + 1);
3442
3443 iq->intr_next = iq->intr_params;
3444
3445 m->b_rptr += sc->sge.pktshift;
3446
3447 /* Compressed error vector is enabled for T6 only */
3448 if (sc->params.tp.rx_pkt_encap)
3449 /* It is enabled only in T6 config file */
3450 err_vec = G_T6_COMPR_RXERR_VEC(ntohs(cpl->err_vec));
3451 else
3452 err_vec = ntohs(cpl->err_vec);
3453
3454 csum_ok = cpl->csum_calc && !err_vec;
3455 /* TODO: what about cpl->ip_frag? */
3456 if (csum_ok && !cpl->ip_frag) {
3457 mac_hcksum_set(m, 0, 0, 0, 0xffff,
3458 HCK_FULLCKSUM_OK | HCK_FULLCKSUM |
3459 HCK_IPV4_HDRCKSUM_OK);
3460 rxq->rxcsum++;
3461 }
3462
3463 /* Add to the chain that we'll send up */
3464 if (chain.head != NULL)
3465 chain.tail->b_next = m;
3466 else
3467 chain.head = m;
3468 chain.tail = m;
3469
3470 t4_mac_rx(rxq->port, rxq, chain.head);
3471
3472 rxq->rxpkts++;
3473 rxq->rxbytes += be16_to_cpu(cpl->len);
3474 return (0);
3475 }
3476
3477 #define FL_HW_IDX(idx) ((idx) >> 3)
3478
3479 static inline void
ring_fl_db(struct adapter * sc,struct sge_fl * fl)3480 ring_fl_db(struct adapter *sc, struct sge_fl *fl)
3481 {
3482 int desc_start, desc_last, ndesc;
3483 uint32_t v = sc->params.arch.sge_fl_db ;
3484
3485 ndesc = FL_HW_IDX(fl->pending);
3486
3487 /* Hold back one credit if pidx = cidx */
3488 if (FL_HW_IDX(fl->pidx) == FL_HW_IDX(fl->cidx))
3489 ndesc--;
3490
3491 /*
3492 * There are chances of ndesc modified above (to avoid pidx = cidx).
3493 * If there is nothing to post, return.
3494 */
3495 if (ndesc <= 0)
3496 return;
3497
3498 desc_last = FL_HW_IDX(fl->pidx);
3499
3500 if (fl->pidx < fl->pending) {
3501 /* There was a wrap */
3502 desc_start = FL_HW_IDX(fl->pidx + fl->cap - fl->pending);
3503
3504 /* From desc_start to the end of list */
3505 (void) ddi_dma_sync(fl->dhdl, desc_start * RX_FL_ESIZE, 0,
3506 DDI_DMA_SYNC_FORDEV);
3507
3508 /* From start of list to the desc_last */
3509 if (desc_last != 0)
3510 (void) ddi_dma_sync(fl->dhdl, 0, desc_last *
3511 RX_FL_ESIZE, DDI_DMA_SYNC_FORDEV);
3512 } else {
3513 /* There was no wrap, sync from start_desc to last_desc */
3514 desc_start = FL_HW_IDX(fl->pidx - fl->pending);
3515 (void) ddi_dma_sync(fl->dhdl, desc_start * RX_FL_ESIZE,
3516 ndesc * RX_FL_ESIZE, DDI_DMA_SYNC_FORDEV);
3517 }
3518
3519 if (is_t4(sc->params.chip))
3520 v |= V_PIDX(ndesc);
3521 else
3522 v |= V_PIDX_T5(ndesc);
3523 v |= V_QID(fl->cntxt_id) | V_PIDX(ndesc);
3524
3525 membar_producer();
3526
3527 t4_write_reg(sc, MYPF_REG(A_SGE_PF_KDOORBELL), v);
3528
3529 /*
3530 * Update pending count:
3531 * Deduct the number of descriptors posted
3532 */
3533 fl->pending -= ndesc * 8;
3534 }
3535
3536 static void
tx_reclaim_task(void * arg)3537 tx_reclaim_task(void *arg)
3538 {
3539 struct sge_txq *txq = arg;
3540
3541 TXQ_LOCK(txq);
3542 reclaim_tx_descs(txq, txq->eq.qsize);
3543 TXQ_UNLOCK(txq);
3544 }
3545
3546 /* ARGSUSED */
3547 static int
handle_sge_egr_update(struct sge_iq * iq,const struct rss_header * rss,mblk_t * m)3548 handle_sge_egr_update(struct sge_iq *iq, const struct rss_header *rss,
3549 mblk_t *m)
3550 {
3551 const struct cpl_sge_egr_update *cpl = (const void *)(rss + 1);
3552 unsigned int qid = G_EGR_QID(ntohl(cpl->opcode_qid));
3553 struct adapter *sc = iq->adapter;
3554 struct sge *s = &sc->sge;
3555 struct sge_eq *eq;
3556 struct sge_txq *txq;
3557
3558 txq = (void *)s->eqmap[qid - s->eq_start];
3559 eq = &txq->eq;
3560 txq->qflush++;
3561 t4_mac_tx_update(txq->port, txq);
3562
3563 ddi_taskq_dispatch(sc->tq[eq->tx_chan], tx_reclaim_task,
3564 (void *)txq, DDI_NOSLEEP);
3565
3566 return (0);
3567 }
3568
3569 static int
handle_fw_rpl(struct sge_iq * iq,const struct rss_header * rss,mblk_t * m)3570 handle_fw_rpl(struct sge_iq *iq, const struct rss_header *rss, mblk_t *m)
3571 {
3572 struct adapter *sc = iq->adapter;
3573 const struct cpl_fw6_msg *cpl = (const void *)(rss + 1);
3574
3575 ASSERT(m == NULL);
3576
3577 if (cpl->type == FW_TYPE_RSSCPL || cpl->type == FW6_TYPE_RSSCPL) {
3578 const struct rss_header *rss2;
3579
3580 rss2 = (const struct rss_header *)&cpl->data[0];
3581 return (sc->cpl_handler[rss2->opcode](iq, rss2, m));
3582 }
3583 return (sc->fw_msg_handler[cpl->type](sc, &cpl->data[0]));
3584 }
3585
3586 int
t4_alloc_tx_maps(struct adapter * sc,struct tx_maps * txmaps,int count,int flags)3587 t4_alloc_tx_maps(struct adapter *sc, struct tx_maps *txmaps, int count,
3588 int flags)
3589 {
3590 int i, rc;
3591
3592 txmaps->map_total = count;
3593 txmaps->map_avail = txmaps->map_cidx = txmaps->map_pidx = 0;
3594
3595 txmaps->map = kmem_zalloc(sizeof (ddi_dma_handle_t) *
3596 txmaps->map_total, flags);
3597
3598 for (i = 0; i < count; i++) {
3599 rc = ddi_dma_alloc_handle(sc->dip, &sc->sge.dma_attr_tx,
3600 DDI_DMA_SLEEP, 0, &txmaps->map[i]);
3601 if (rc != DDI_SUCCESS) {
3602 cxgb_printf(sc->dip, CE_WARN,
3603 "%s: failed to allocate DMA handle (%d)",
3604 __func__, rc);
3605 return (rc == DDI_DMA_NORESOURCES ? ENOMEM : EINVAL);
3606 }
3607 txmaps->map_avail++;
3608 }
3609
3610 return (0);
3611 }
3612
3613 #define KS_UINIT(x) kstat_named_init(&kstatp->x, #x, KSTAT_DATA_ULONG)
3614 #define KS_CINIT(x) kstat_named_init(&kstatp->x, #x, KSTAT_DATA_CHAR)
3615 #define KS_U_SET(x, y) kstatp->x.value.ul = (y)
3616 #define KS_U_FROM(x, y) kstatp->x.value.ul = (y)->x
3617 #define KS_C_SET(x, ...) \
3618 (void) snprintf(kstatp->x.value.c, 16, __VA_ARGS__)
3619
3620 /*
3621 * cxgbe:X:config
3622 */
3623 struct cxgbe_port_config_kstats {
3624 kstat_named_t idx;
3625 kstat_named_t nrxq;
3626 kstat_named_t ntxq;
3627 kstat_named_t first_rxq;
3628 kstat_named_t first_txq;
3629 kstat_named_t controller;
3630 kstat_named_t factory_mac_address;
3631 };
3632
3633 /*
3634 * cxgbe:X:info
3635 */
3636 struct cxgbe_port_info_kstats {
3637 kstat_named_t transceiver;
3638 kstat_named_t rx_ovflow0;
3639 kstat_named_t rx_ovflow1;
3640 kstat_named_t rx_ovflow2;
3641 kstat_named_t rx_ovflow3;
3642 kstat_named_t rx_trunc0;
3643 kstat_named_t rx_trunc1;
3644 kstat_named_t rx_trunc2;
3645 kstat_named_t rx_trunc3;
3646 kstat_named_t tx_pause;
3647 kstat_named_t rx_pause;
3648 };
3649
3650 static kstat_t *
setup_port_config_kstats(struct port_info * pi)3651 setup_port_config_kstats(struct port_info *pi)
3652 {
3653 kstat_t *ksp;
3654 struct cxgbe_port_config_kstats *kstatp;
3655 int ndata;
3656 dev_info_t *pdip = ddi_get_parent(pi->dip);
3657 uint8_t *ma = &pi->hw_addr[0];
3658
3659 ndata = sizeof (struct cxgbe_port_config_kstats) /
3660 sizeof (kstat_named_t);
3661
3662 ksp = kstat_create(T4_PORT_NAME, ddi_get_instance(pi->dip), "config",
3663 "net", KSTAT_TYPE_NAMED, ndata, 0);
3664 if (ksp == NULL) {
3665 cxgb_printf(pi->dip, CE_WARN, "failed to initialize kstats.");
3666 return (NULL);
3667 }
3668
3669 kstatp = (struct cxgbe_port_config_kstats *)ksp->ks_data;
3670
3671 KS_UINIT(idx);
3672 KS_UINIT(nrxq);
3673 KS_UINIT(ntxq);
3674 KS_UINIT(first_rxq);
3675 KS_UINIT(first_txq);
3676 KS_CINIT(controller);
3677 KS_CINIT(factory_mac_address);
3678
3679 KS_U_SET(idx, pi->port_id);
3680 KS_U_SET(nrxq, pi->nrxq);
3681 KS_U_SET(ntxq, pi->ntxq);
3682 KS_U_SET(first_rxq, pi->first_rxq);
3683 KS_U_SET(first_txq, pi->first_txq);
3684 KS_C_SET(controller, "%s%d", ddi_driver_name(pdip),
3685 ddi_get_instance(pdip));
3686 KS_C_SET(factory_mac_address, "%02X%02X%02X%02X%02X%02X",
3687 ma[0], ma[1], ma[2], ma[3], ma[4], ma[5]);
3688
3689 /* Do NOT set ksp->ks_update. These kstats do not change. */
3690
3691 /* Install the kstat */
3692 ksp->ks_private = (void *)pi;
3693 kstat_install(ksp);
3694
3695 return (ksp);
3696 }
3697
3698 static kstat_t *
setup_port_info_kstats(struct port_info * pi)3699 setup_port_info_kstats(struct port_info *pi)
3700 {
3701 kstat_t *ksp;
3702 struct cxgbe_port_info_kstats *kstatp;
3703 int ndata;
3704
3705 ndata = sizeof (struct cxgbe_port_info_kstats) / sizeof (kstat_named_t);
3706
3707 ksp = kstat_create(T4_PORT_NAME, ddi_get_instance(pi->dip), "info",
3708 "net", KSTAT_TYPE_NAMED, ndata, 0);
3709 if (ksp == NULL) {
3710 cxgb_printf(pi->dip, CE_WARN, "failed to initialize kstats.");
3711 return (NULL);
3712 }
3713
3714 kstatp = (struct cxgbe_port_info_kstats *)ksp->ks_data;
3715
3716 KS_CINIT(transceiver);
3717 KS_UINIT(rx_ovflow0);
3718 KS_UINIT(rx_ovflow1);
3719 KS_UINIT(rx_ovflow2);
3720 KS_UINIT(rx_ovflow3);
3721 KS_UINIT(rx_trunc0);
3722 KS_UINIT(rx_trunc1);
3723 KS_UINIT(rx_trunc2);
3724 KS_UINIT(rx_trunc3);
3725 KS_UINIT(tx_pause);
3726 KS_UINIT(rx_pause);
3727
3728 /* Install the kstat */
3729 ksp->ks_update = update_port_info_kstats;
3730 ksp->ks_private = (void *)pi;
3731 kstat_install(ksp);
3732
3733 return (ksp);
3734 }
3735
3736 static int
update_port_info_kstats(kstat_t * ksp,int rw)3737 update_port_info_kstats(kstat_t *ksp, int rw)
3738 {
3739 struct cxgbe_port_info_kstats *kstatp =
3740 (struct cxgbe_port_info_kstats *)ksp->ks_data;
3741 struct port_info *pi = ksp->ks_private;
3742 static const char *mod_str[] = { NULL, "LR", "SR", "ER", "TWINAX",
3743 "active TWINAX", "LRM" };
3744 uint32_t bgmap;
3745
3746 if (rw == KSTAT_WRITE)
3747 return (0);
3748
3749 if (pi->mod_type == FW_PORT_MOD_TYPE_NONE)
3750 KS_C_SET(transceiver, "unplugged");
3751 else if (pi->mod_type == FW_PORT_MOD_TYPE_UNKNOWN)
3752 KS_C_SET(transceiver, "unknown");
3753 else if (pi->mod_type == FW_PORT_MOD_TYPE_NOTSUPPORTED)
3754 KS_C_SET(transceiver, "unsupported");
3755 else if (pi->mod_type > 0 && pi->mod_type < ARRAY_SIZE(mod_str))
3756 KS_C_SET(transceiver, "%s", mod_str[pi->mod_type]);
3757 else
3758 KS_C_SET(transceiver, "type %d", pi->mod_type);
3759
3760 #define GET_STAT(name) t4_read_reg64(pi->adapter, \
3761 PORT_REG(pi->port_id, A_MPS_PORT_STAT_##name##_L))
3762 #define GET_STAT_COM(name) t4_read_reg64(pi->adapter, \
3763 A_MPS_STAT_##name##_L)
3764
3765 bgmap = G_NUMPORTS(t4_read_reg(pi->adapter, A_MPS_CMN_CTL));
3766 if (bgmap == 0)
3767 bgmap = (pi->port_id == 0) ? 0xf : 0;
3768 else if (bgmap == 1)
3769 bgmap = (pi->port_id < 2) ? (3 << (2 * pi->port_id)) : 0;
3770 else
3771 bgmap = 1;
3772
3773 KS_U_SET(rx_ovflow0, (bgmap & 1) ?
3774 GET_STAT_COM(RX_BG_0_MAC_DROP_FRAME) : 0);
3775 KS_U_SET(rx_ovflow1, (bgmap & 2) ?
3776 GET_STAT_COM(RX_BG_1_MAC_DROP_FRAME) : 0);
3777 KS_U_SET(rx_ovflow2, (bgmap & 4) ?
3778 GET_STAT_COM(RX_BG_2_MAC_DROP_FRAME) : 0);
3779 KS_U_SET(rx_ovflow3, (bgmap & 8) ?
3780 GET_STAT_COM(RX_BG_3_MAC_DROP_FRAME) : 0);
3781 KS_U_SET(rx_trunc0, (bgmap & 1) ?
3782 GET_STAT_COM(RX_BG_0_MAC_TRUNC_FRAME) : 0);
3783 KS_U_SET(rx_trunc1, (bgmap & 2) ?
3784 GET_STAT_COM(RX_BG_1_MAC_TRUNC_FRAME) : 0);
3785 KS_U_SET(rx_trunc2, (bgmap & 4) ?
3786 GET_STAT_COM(RX_BG_2_MAC_TRUNC_FRAME) : 0);
3787 KS_U_SET(rx_trunc3, (bgmap & 8) ?
3788 GET_STAT_COM(RX_BG_3_MAC_TRUNC_FRAME) : 0);
3789
3790 KS_U_SET(tx_pause, GET_STAT(TX_PORT_PAUSE));
3791 KS_U_SET(rx_pause, GET_STAT(RX_PORT_PAUSE));
3792
3793 return (0);
3794
3795 }
3796
3797 /*
3798 * cxgbe:X:rxqY
3799 */
3800 struct rxq_kstats {
3801 kstat_named_t rxcsum;
3802 kstat_named_t rxpkts;
3803 kstat_named_t rxbytes;
3804 kstat_named_t nomem;
3805 };
3806
3807 static kstat_t *
setup_rxq_kstats(struct port_info * pi,struct sge_rxq * rxq,int idx)3808 setup_rxq_kstats(struct port_info *pi, struct sge_rxq *rxq, int idx)
3809 {
3810 struct kstat *ksp;
3811 struct rxq_kstats *kstatp;
3812 int ndata;
3813 char str[16];
3814
3815 ndata = sizeof (struct rxq_kstats) / sizeof (kstat_named_t);
3816 (void) snprintf(str, sizeof (str), "rxq%u", idx);
3817
3818 ksp = kstat_create(T4_PORT_NAME, ddi_get_instance(pi->dip), str, "rxq",
3819 KSTAT_TYPE_NAMED, ndata, 0);
3820 if (ksp == NULL) {
3821 cxgb_printf(pi->dip, CE_WARN,
3822 "%s: failed to initialize rxq kstats for queue %d.",
3823 __func__, idx);
3824 return (NULL);
3825 }
3826
3827 kstatp = (struct rxq_kstats *)ksp->ks_data;
3828
3829 KS_UINIT(rxcsum);
3830 KS_UINIT(rxpkts);
3831 KS_UINIT(rxbytes);
3832 KS_UINIT(nomem);
3833
3834 ksp->ks_update = update_rxq_kstats;
3835 ksp->ks_private = (void *)rxq;
3836 kstat_install(ksp);
3837
3838 return (ksp);
3839 }
3840
3841 static int
update_rxq_kstats(kstat_t * ksp,int rw)3842 update_rxq_kstats(kstat_t *ksp, int rw)
3843 {
3844 struct rxq_kstats *kstatp = (struct rxq_kstats *)ksp->ks_data;
3845 struct sge_rxq *rxq = ksp->ks_private;
3846
3847 if (rw == KSTAT_WRITE)
3848 return (0);
3849
3850 KS_U_FROM(rxcsum, rxq);
3851 KS_U_FROM(rxpkts, rxq);
3852 KS_U_FROM(rxbytes, rxq);
3853 KS_U_FROM(nomem, rxq);
3854
3855 return (0);
3856 }
3857
3858 /*
3859 * cxgbe:X:txqY
3860 */
3861 struct txq_kstats {
3862 kstat_named_t txcsum;
3863 kstat_named_t tso_wrs;
3864 kstat_named_t imm_wrs;
3865 kstat_named_t sgl_wrs;
3866 kstat_named_t txpkt_wrs;
3867 kstat_named_t txpkts_wrs;
3868 kstat_named_t txpkts_pkts;
3869 kstat_named_t txb_used;
3870 kstat_named_t hdl_used;
3871 kstat_named_t txb_full;
3872 kstat_named_t dma_hdl_failed;
3873 kstat_named_t dma_map_failed;
3874 kstat_named_t qfull;
3875 kstat_named_t qflush;
3876 kstat_named_t pullup_early;
3877 kstat_named_t pullup_late;
3878 kstat_named_t pullup_failed;
3879 kstat_named_t csum_failed;
3880 };
3881
3882 static kstat_t *
setup_txq_kstats(struct port_info * pi,struct sge_txq * txq,int idx)3883 setup_txq_kstats(struct port_info *pi, struct sge_txq *txq, int idx)
3884 {
3885 struct kstat *ksp;
3886 struct txq_kstats *kstatp;
3887 int ndata;
3888 char str[16];
3889
3890 ndata = sizeof (struct txq_kstats) / sizeof (kstat_named_t);
3891 (void) snprintf(str, sizeof (str), "txq%u", idx);
3892
3893 ksp = kstat_create(T4_PORT_NAME, ddi_get_instance(pi->dip), str, "txq",
3894 KSTAT_TYPE_NAMED, ndata, 0);
3895 if (ksp == NULL) {
3896 cxgb_printf(pi->dip, CE_WARN,
3897 "%s: failed to initialize txq kstats for queue %d.",
3898 __func__, idx);
3899 return (NULL);
3900 }
3901
3902 kstatp = (struct txq_kstats *)ksp->ks_data;
3903
3904 KS_UINIT(txcsum);
3905 KS_UINIT(tso_wrs);
3906 KS_UINIT(imm_wrs);
3907 KS_UINIT(sgl_wrs);
3908 KS_UINIT(txpkt_wrs);
3909 KS_UINIT(txpkts_wrs);
3910 KS_UINIT(txpkts_pkts);
3911 KS_UINIT(txb_used);
3912 KS_UINIT(hdl_used);
3913 KS_UINIT(txb_full);
3914 KS_UINIT(dma_hdl_failed);
3915 KS_UINIT(dma_map_failed);
3916 KS_UINIT(qfull);
3917 KS_UINIT(qflush);
3918 KS_UINIT(pullup_early);
3919 KS_UINIT(pullup_late);
3920 KS_UINIT(pullup_failed);
3921 KS_UINIT(csum_failed);
3922
3923 ksp->ks_update = update_txq_kstats;
3924 ksp->ks_private = (void *)txq;
3925 kstat_install(ksp);
3926
3927 return (ksp);
3928 }
3929
3930 static int
update_txq_kstats(kstat_t * ksp,int rw)3931 update_txq_kstats(kstat_t *ksp, int rw)
3932 {
3933 struct txq_kstats *kstatp = (struct txq_kstats *)ksp->ks_data;
3934 struct sge_txq *txq = ksp->ks_private;
3935
3936 if (rw == KSTAT_WRITE)
3937 return (0);
3938
3939 KS_U_FROM(txcsum, txq);
3940 KS_U_FROM(tso_wrs, txq);
3941 KS_U_FROM(imm_wrs, txq);
3942 KS_U_FROM(sgl_wrs, txq);
3943 KS_U_FROM(txpkt_wrs, txq);
3944 KS_U_FROM(txpkts_wrs, txq);
3945 KS_U_FROM(txpkts_pkts, txq);
3946 KS_U_FROM(txb_used, txq);
3947 KS_U_FROM(hdl_used, txq);
3948 KS_U_FROM(txb_full, txq);
3949 KS_U_FROM(dma_hdl_failed, txq);
3950 KS_U_FROM(dma_map_failed, txq);
3951 KS_U_FROM(qfull, txq);
3952 KS_U_FROM(qflush, txq);
3953 KS_U_FROM(pullup_early, txq);
3954 KS_U_FROM(pullup_late, txq);
3955 KS_U_FROM(pullup_failed, txq);
3956 KS_U_FROM(csum_failed, txq);
3957
3958 return (0);
3959 }
3960