1 /*
2 * Copyright (c) 2008-2016 Solarflare Communications Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright notice,
11 * this list of conditions and the following disclaimer in the documentation
12 * and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
15 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
16 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
21 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
22 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
23 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
24 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 *
26 * The views and conclusions contained in the software and documentation are
27 * those of the authors and should not be interpreted as representing official
28 * policies, either expressed or implied, of the FreeBSD Project.
29 */
30
31 #include <sys/types.h>
32 #include <sys/sysmacros.h>
33 #include <sys/ddi.h>
34 #include <sys/sunddi.h>
35 #include <sys/atomic.h>
36 #include <sys/stream.h>
37 #include <sys/strsun.h>
38 #include <sys/strsubr.h>
39 #include <sys/pattr.h>
40 #include <sys/cpu.h>
41
42 #include <sys/ethernet.h>
43 #include <inet/ip.h>
44
45 #include <netinet/in.h>
46 #include <netinet/ip.h>
47 #include <netinet/tcp.h>
48
49 #include "sfxge.h"
50
51 #include "efx.h"
52
53 /* TXQ flush response timeout (in microseconds) */
54 #define SFXGE_TX_QFLUSH_USEC (2000000)
55
56 /* See sfxge.conf.private for descriptions */
57 #define SFXGE_TX_DPL_GET_PKT_LIMIT_DEFAULT 4096
58 #define SFXGE_TX_DPL_PUT_PKT_LIMIT_DEFAULT 256
59
60
61 /* Transmit buffer DMA attributes */
62 static ddi_device_acc_attr_t sfxge_tx_buffer_devacc = {
63
64 DDI_DEVICE_ATTR_V0, /* devacc_attr_version */
65 DDI_NEVERSWAP_ACC, /* devacc_attr_endian_flags */
66 DDI_STRICTORDER_ACC /* devacc_attr_dataorder */
67 };
68
69 static ddi_dma_attr_t sfxge_tx_buffer_dma_attr = {
70 DMA_ATTR_V0, /* dma_attr_version */
71 0, /* dma_attr_addr_lo */
72 0xffffffffffffffffull, /* dma_attr_addr_hi */
73 0xffffffffffffffffull, /* dma_attr_count_max */
74 SFXGE_TX_BUFFER_SIZE, /* dma_attr_align */
75 0xffffffff, /* dma_attr_burstsizes */
76 1, /* dma_attr_minxfer */
77 0xffffffffffffffffull, /* dma_attr_maxxfer */
78 0xffffffffffffffffull, /* dma_attr_seg */
79 1, /* dma_attr_sgllen */
80 1, /* dma_attr_granular */
81 0 /* dma_attr_flags */
82 };
83
84 /* Transmit mapping DMA attributes */
85 static ddi_dma_attr_t sfxge_tx_mapping_dma_attr = {
86 DMA_ATTR_V0, /* dma_attr_version */
87 0, /* dma_attr_addr_lo */
88 0xffffffffffffffffull, /* dma_attr_addr_hi */
89 0xffffffffffffffffull, /* dma_attr_count_max */
90 1, /* dma_attr_align */
91 0xffffffff, /* dma_attr_burstsizes */
92 1, /* dma_attr_minxfer */
93 0xffffffffffffffffull, /* dma_attr_maxxfer */
94 0xffffffffffffffffull, /* dma_attr_seg */
95 0x7fffffff, /* dma_attr_sgllen */
96 1, /* dma_attr_granular */
97 0 /* dma_attr_flags */
98 };
99
100 /* Transmit queue DMA attributes */
101 static ddi_device_acc_attr_t sfxge_txq_devacc = {
102
103 DDI_DEVICE_ATTR_V0, /* devacc_attr_version */
104 DDI_NEVERSWAP_ACC, /* devacc_attr_endian_flags */
105 DDI_STRICTORDER_ACC /* devacc_attr_dataorder */
106 };
107
108 static ddi_dma_attr_t sfxge_txq_dma_attr = {
109 DMA_ATTR_V0, /* dma_attr_version */
110 0, /* dma_attr_addr_lo */
111 0xffffffffffffffffull, /* dma_attr_addr_hi */
112 0xffffffffffffffffull, /* dma_attr_count_max */
113 EFX_BUF_SIZE, /* dma_attr_align */
114 0xffffffff, /* dma_attr_burstsizes */
115 1, /* dma_attr_minxfer */
116 0xffffffffffffffffull, /* dma_attr_maxxfer */
117 0xffffffffffffffffull, /* dma_attr_seg */
118 1, /* dma_attr_sgllen */
119 1, /* dma_attr_granular */
120 0 /* dma_attr_flags */
121 };
122
123
124 /*
125 * A sfxge_tx_qdpl_swizzle() can happen when the DPL get list is one packet
126 * under the limit, and must move all packets from the DPL put->get list
127 * Hence this is the real maximum length of the TX DPL get list.
128 */
129 static int
sfxge_tx_dpl_get_pkt_max(sfxge_txq_t * stp)130 sfxge_tx_dpl_get_pkt_max(sfxge_txq_t *stp)
131 {
132 sfxge_tx_dpl_t *stdp = &(stp->st_dpl);
133 return (stdp->get_pkt_limit + stdp->put_pkt_limit - 1);
134 }
135
136
137 static int
sfxge_tx_packet_ctor(void * buf,void * arg,int kmflags)138 sfxge_tx_packet_ctor(void *buf, void *arg, int kmflags)
139 {
140 _NOTE(ARGUNUSED(arg, kmflags))
141
142 bzero(buf, sizeof (sfxge_tx_packet_t));
143
144 return (0);
145 }
146
147 static void
sfxge_tx_packet_dtor(void * buf,void * arg)148 sfxge_tx_packet_dtor(void *buf, void *arg)
149 {
150 sfxge_tx_packet_t *stpp = buf;
151
152 _NOTE(ARGUNUSED(arg))
153
154 SFXGE_OBJ_CHECK(stpp, sfxge_tx_packet_t);
155 }
156
157 static int
sfxge_tx_buffer_ctor(void * buf,void * arg,int kmflags)158 sfxge_tx_buffer_ctor(void *buf, void *arg, int kmflags)
159 {
160 sfxge_tx_buffer_t *stbp = buf;
161 sfxge_t *sp = arg;
162 sfxge_dma_buffer_attr_t dma_attr;
163 int rc;
164
165 bzero(buf, sizeof (sfxge_tx_buffer_t));
166
167 dma_attr.sdba_dip = sp->s_dip;
168 dma_attr.sdba_dattrp = &sfxge_tx_buffer_dma_attr;
169 dma_attr.sdba_callback = ((kmflags == KM_SLEEP) ?
170 DDI_DMA_SLEEP : DDI_DMA_DONTWAIT);
171 dma_attr.sdba_length = SFXGE_TX_BUFFER_SIZE;
172 dma_attr.sdba_memflags = DDI_DMA_STREAMING;
173 dma_attr.sdba_devaccp = &sfxge_tx_buffer_devacc;
174 dma_attr.sdba_bindflags = DDI_DMA_WRITE | DDI_DMA_STREAMING;
175 dma_attr.sdba_maxcookies = 1;
176 dma_attr.sdba_zeroinit = B_FALSE;
177
178 if ((rc = sfxge_dma_buffer_create(&(stbp->stb_esm), &dma_attr)) != 0)
179 goto fail1;
180
181 return (0);
182
183 fail1:
184 DTRACE_PROBE1(fail1, int, rc);
185
186 SFXGE_OBJ_CHECK(stbp, sfxge_tx_buffer_t);
187
188 return (-1);
189 }
190
191 static void
sfxge_tx_buffer_dtor(void * buf,void * arg)192 sfxge_tx_buffer_dtor(void *buf, void *arg)
193 {
194 sfxge_tx_buffer_t *stbp = buf;
195
196 _NOTE(ARGUNUSED(arg))
197
198 sfxge_dma_buffer_destroy(&(stbp->stb_esm));
199
200 SFXGE_OBJ_CHECK(stbp, sfxge_tx_buffer_t);
201 }
202
203 static int
sfxge_tx_mapping_ctor(void * buf,void * arg,int kmflags)204 sfxge_tx_mapping_ctor(void *buf, void *arg, int kmflags)
205 {
206 sfxge_tx_mapping_t *stmp = buf;
207 sfxge_t *sp = arg;
208 dev_info_t *dip = sp->s_dip;
209 int rc;
210
211 bzero(buf, sizeof (sfxge_tx_mapping_t));
212
213 stmp->stm_sp = sp;
214
215 /* Allocate DMA handle */
216 rc = ddi_dma_alloc_handle(dip, &sfxge_tx_mapping_dma_attr,
217 (kmflags == KM_SLEEP) ? DDI_DMA_SLEEP : DDI_DMA_DONTWAIT,
218 NULL, &(stmp->stm_dma_handle));
219 if (rc != DDI_SUCCESS)
220 goto fail1;
221
222 return (0);
223
224 fail1:
225 DTRACE_PROBE1(fail1, int, rc);
226
227 stmp->stm_sp = NULL;
228
229 SFXGE_OBJ_CHECK(stmp, sfxge_tx_mapping_t);
230
231 return (-1);
232 }
233
234 static void
sfxge_tx_mapping_dtor(void * buf,void * arg)235 sfxge_tx_mapping_dtor(void *buf, void *arg)
236 {
237 sfxge_tx_mapping_t *stmp = buf;
238
239 ASSERT3P(stmp->stm_sp, ==, arg);
240
241 /* Free the DMA handle */
242 ddi_dma_free_handle(&(stmp->stm_dma_handle));
243 stmp->stm_dma_handle = NULL;
244
245 stmp->stm_sp = NULL;
246
247 SFXGE_OBJ_CHECK(stmp, sfxge_tx_mapping_t);
248 }
249
250 static int
sfxge_tx_qctor(void * buf,void * arg,int kmflags)251 sfxge_tx_qctor(void *buf, void *arg, int kmflags)
252 {
253 sfxge_txq_t *stp = buf;
254 efsys_mem_t *esmp = &(stp->st_mem);
255 sfxge_t *sp = arg;
256 sfxge_dma_buffer_attr_t dma_attr;
257 sfxge_tx_dpl_t *stdp;
258 int rc;
259
260 /* Compile-time structure layout checks */
261 EFX_STATIC_ASSERT(sizeof (stp->__st_u1.__st_s1) <=
262 sizeof (stp->__st_u1.__st_pad));
263 EFX_STATIC_ASSERT(sizeof (stp->__st_u2.__st_s2) <=
264 sizeof (stp->__st_u2.__st_pad));
265 EFX_STATIC_ASSERT(sizeof (stp->__st_u3.__st_s3) <=
266 sizeof (stp->__st_u3.__st_pad));
267 EFX_STATIC_ASSERT(sizeof (stp->__st_u4.__st_s4) <=
268 sizeof (stp->__st_u4.__st_pad));
269
270 bzero(buf, sizeof (sfxge_txq_t));
271
272 stp->st_sp = sp;
273
274 dma_attr.sdba_dip = sp->s_dip;
275 dma_attr.sdba_dattrp = &sfxge_txq_dma_attr;
276 dma_attr.sdba_callback = DDI_DMA_SLEEP;
277 dma_attr.sdba_length = EFX_TXQ_SIZE(SFXGE_TX_NDESCS);
278 dma_attr.sdba_memflags = DDI_DMA_CONSISTENT;
279 dma_attr.sdba_devaccp = &sfxge_txq_devacc;
280 dma_attr.sdba_bindflags = DDI_DMA_READ | DDI_DMA_CONSISTENT;
281 dma_attr.sdba_maxcookies = EFX_TXQ_NBUFS(SFXGE_TX_NDESCS);
282 dma_attr.sdba_zeroinit = B_FALSE;
283
284 if ((rc = sfxge_dma_buffer_create(esmp, &dma_attr)) != 0)
285 goto fail1;
286
287 /* Allocate some buffer table entries */
288 if ((rc = sfxge_sram_buf_tbl_alloc(sp, EFX_TXQ_NBUFS(SFXGE_TX_NDESCS),
289 &(stp->st_id))) != 0)
290 goto fail2;
291
292 /* Allocate the descriptor array */
293 if ((stp->st_eb = kmem_zalloc(sizeof (efx_buffer_t) *
294 EFX_TXQ_LIMIT(SFXGE_TX_NDESCS), kmflags)) == NULL) {
295 rc = ENOMEM;
296 goto fail3;
297 }
298
299 /* Allocate the context arrays */
300 if ((stp->st_stmp = kmem_zalloc(sizeof (sfxge_tx_mapping_t *) *
301 SFXGE_TX_NDESCS, kmflags)) == NULL) {
302 rc = ENOMEM;
303 goto fail4;
304 }
305
306 if ((stp->st_stbp = kmem_zalloc(sizeof (sfxge_tx_buffer_t *) *
307 SFXGE_TX_NDESCS, kmflags)) == NULL) {
308 rc = ENOMEM;
309 goto fail5;
310 }
311
312 if ((stp->st_mp = kmem_zalloc(sizeof (mblk_t *) *
313 SFXGE_TX_NDESCS, kmflags)) == NULL) {
314 rc = ENOMEM;
315 goto fail6;
316 }
317
318 /* Initialize the deferred packet list */
319 stdp = &(stp->st_dpl);
320 stdp->std_getp = &(stdp->std_get);
321
322 stp->st_unblock = SFXGE_TXQ_NOT_BLOCKED;
323
324 return (0);
325
326 fail6:
327 DTRACE_PROBE(fail6);
328
329 kmem_free(stp->st_stbp, sizeof (sfxge_tx_buffer_t *) * SFXGE_TX_NDESCS);
330 stp->st_stbp = NULL;
331
332 fail5:
333 DTRACE_PROBE(fail5);
334
335 kmem_free(stp->st_stmp,
336 sizeof (sfxge_tx_mapping_t *) * SFXGE_TX_NDESCS);
337 stp->st_stmp = NULL;
338
339 fail4:
340 DTRACE_PROBE(fail4);
341
342 /* Free the descriptor array */
343 kmem_free(stp->st_eb, sizeof (efx_buffer_t) *
344 EFX_TXQ_LIMIT(SFXGE_TX_NDESCS));
345 stp->st_eb = NULL;
346
347 fail3:
348 DTRACE_PROBE(fail3);
349
350 /* Free the buffer table entries */
351 sfxge_sram_buf_tbl_free(sp, stp->st_id, EFX_TXQ_NBUFS(SFXGE_TX_NDESCS));
352 stp->st_id = 0;
353
354 fail2:
355 DTRACE_PROBE(fail2);
356
357 /* Tear down DMA setup */
358 sfxge_dma_buffer_destroy(esmp);
359
360 fail1:
361 DTRACE_PROBE1(fail1, int, rc);
362
363 stp->st_sp = NULL;
364
365 SFXGE_OBJ_CHECK(stp, sfxge_txq_t);
366
367 return (-1);
368 }
369
370 static void
sfxge_tx_qdtor(void * buf,void * arg)371 sfxge_tx_qdtor(void *buf, void *arg)
372 {
373 sfxge_txq_t *stp = buf;
374 efsys_mem_t *esmp = &(stp->st_mem);
375 sfxge_t *sp = stp->st_sp;
376 sfxge_tx_dpl_t *stdp;
377
378 _NOTE(ARGUNUSED(arg))
379
380 stp->st_unblock = 0;
381
382 /* Tear down the deferred packet list */
383 stdp = &(stp->st_dpl);
384 ASSERT3P(stdp->std_getp, ==, &(stdp->std_get));
385 stdp->std_getp = NULL;
386
387 /* Free the context arrays */
388 kmem_free(stp->st_mp, sizeof (mblk_t *) * SFXGE_TX_NDESCS);
389 stp->st_mp = NULL;
390
391 kmem_free(stp->st_stbp, sizeof (sfxge_tx_buffer_t *) * SFXGE_TX_NDESCS);
392 stp->st_stbp = NULL;
393
394 kmem_free(stp->st_stmp,
395 sizeof (sfxge_tx_mapping_t *) * SFXGE_TX_NDESCS);
396 stp->st_stmp = NULL;
397
398 /* Free the descriptor array */
399 kmem_free(stp->st_eb, sizeof (efx_buffer_t) *
400 EFX_TXQ_LIMIT(SFXGE_TX_NDESCS));
401 stp->st_eb = NULL;
402
403 /* Free the buffer table entries */
404 sfxge_sram_buf_tbl_free(sp, stp->st_id, EFX_TXQ_NBUFS(SFXGE_TX_NDESCS));
405 stp->st_id = 0;
406
407 /* Tear down dma setup */
408 sfxge_dma_buffer_destroy(esmp);
409
410 stp->st_sp = NULL;
411
412 SFXGE_OBJ_CHECK(stp, sfxge_txq_t);
413 }
414
415 static void
sfxge_tx_packet_destroy(sfxge_t * sp,sfxge_tx_packet_t * stpp)416 sfxge_tx_packet_destroy(sfxge_t *sp, sfxge_tx_packet_t *stpp)
417 {
418 kmem_cache_free(sp->s_tpc, stpp);
419 }
420
421 static sfxge_tx_packet_t *
sfxge_tx_packet_create(sfxge_t * sp)422 sfxge_tx_packet_create(sfxge_t *sp)
423 {
424 sfxge_tx_packet_t *stpp;
425
426 stpp = kmem_cache_alloc(sp->s_tpc, KM_NOSLEEP);
427
428 return (stpp);
429 }
430
431 static inline int
sfxge_tx_qfpp_put(sfxge_txq_t * stp,sfxge_tx_packet_t * stpp)432 sfxge_tx_qfpp_put(sfxge_txq_t *stp, sfxge_tx_packet_t *stpp)
433 {
434 sfxge_tx_fpp_t *stfp = &(stp->st_fpp);
435
436 ASSERT(mutex_owned(&(stp->st_lock)));
437
438 ASSERT3P(stpp->stp_next, ==, NULL);
439 ASSERT3P(stpp->stp_mp, ==, NULL);
440 ASSERT3P(stpp->stp_etherhp, ==, NULL);
441 ASSERT3P(stpp->stp_iphp, ==, NULL);
442 ASSERT3P(stpp->stp_thp, ==, NULL);
443 ASSERT3U(stpp->stp_off, ==, 0);
444 ASSERT3U(stpp->stp_size, ==, 0);
445 ASSERT3U(stpp->stp_mss, ==, 0);
446 ASSERT3U(stpp->stp_dpl_put_len, ==, 0);
447
448 if (stfp->stf_count < SFXGE_TX_FPP_MAX) {
449 /* Add to the start of the list */
450 stpp->stp_next = stfp->stf_stpp;
451 stfp->stf_stpp = stpp;
452 stfp->stf_count++;
453
454 return (0);
455 }
456
457 DTRACE_PROBE(fpp_full);
458 return (ENOSPC);
459 }
460
461 static inline sfxge_tx_packet_t *
sfxge_tx_qfpp_get(sfxge_txq_t * stp)462 sfxge_tx_qfpp_get(sfxge_txq_t *stp)
463 {
464 sfxge_tx_packet_t *stpp;
465 sfxge_tx_fpp_t *stfp = &(stp->st_fpp);
466
467 ASSERT(mutex_owned(&(stp->st_lock)));
468
469 stpp = stfp->stf_stpp;
470 if (stpp == NULL) {
471 ASSERT3U(stfp->stf_count, ==, 0);
472 return (NULL);
473 }
474
475 /* Remove item from the head of the list */
476 stfp->stf_stpp = stpp->stp_next;
477 stpp->stp_next = NULL;
478
479 ASSERT3U(stfp->stf_count, >, 0);
480 stfp->stf_count--;
481
482 if (stfp->stf_count != 0) {
483 ASSERT(stfp->stf_stpp != NULL);
484 prefetch_read_many(stfp->stf_stpp);
485 }
486 return (stpp);
487 }
488
489 static void
sfxge_tx_qfpp_empty(sfxge_txq_t * stp)490 sfxge_tx_qfpp_empty(sfxge_txq_t *stp)
491 {
492 sfxge_t *sp = stp->st_sp;
493 sfxge_tx_fpp_t *stfp = &(stp->st_fpp);
494 sfxge_tx_packet_t *stpp;
495
496 mutex_enter(&(stp->st_lock));
497
498 stpp = stfp->stf_stpp;
499 stfp->stf_stpp = NULL;
500
501 while (stpp != NULL) {
502 sfxge_tx_packet_t *next;
503
504 next = stpp->stp_next;
505 stpp->stp_next = NULL;
506
507 ASSERT3U(stfp->stf_count, >, 0);
508 stfp->stf_count--;
509
510 sfxge_tx_packet_destroy(sp, stpp);
511
512 stpp = next;
513 }
514 ASSERT3U(stfp->stf_count, ==, 0);
515
516 mutex_exit(&(stp->st_lock));
517 }
518
519 static inline void
sfxge_tx_qfbp_put(sfxge_txq_t * stp,sfxge_tx_buffer_t * stbp)520 sfxge_tx_qfbp_put(sfxge_txq_t *stp, sfxge_tx_buffer_t *stbp)
521 {
522 sfxge_tx_fbp_t *stfp = &(stp->st_fbp);
523
524 ASSERT3P(stbp->stb_next, ==, NULL);
525 ASSERT3U(stbp->stb_off, ==, 0);
526 ASSERT3U(stbp->stb_esm.esm_used, ==, 0);
527
528 stbp->stb_next = stfp->stf_stbp;
529 stfp->stf_stbp = stbp;
530 stfp->stf_count++;
531 }
532
533
534 static inline sfxge_tx_buffer_t *
sfxge_tx_qfbp_get(sfxge_txq_t * stp)535 sfxge_tx_qfbp_get(sfxge_txq_t *stp)
536 {
537 sfxge_tx_buffer_t *stbp;
538 sfxge_tx_fbp_t *stfp = &(stp->st_fbp);
539
540 stbp = stfp->stf_stbp;
541 if (stbp == NULL) {
542 ASSERT3U(stfp->stf_count, ==, 0);
543 return (NULL);
544 }
545
546 stfp->stf_stbp = stbp->stb_next;
547 stbp->stb_next = NULL;
548
549 ASSERT3U(stfp->stf_count, >, 0);
550 stfp->stf_count--;
551
552 if (stfp->stf_count != 0) {
553 ASSERT(stfp->stf_stbp != NULL);
554 prefetch_read_many(stfp->stf_stbp);
555 }
556
557 return (stbp);
558 }
559
560 static void
sfxge_tx_qfbp_empty(sfxge_txq_t * stp)561 sfxge_tx_qfbp_empty(sfxge_txq_t *stp)
562 {
563 sfxge_t *sp = stp->st_sp;
564 sfxge_tx_fbp_t *stfp = &(stp->st_fbp);
565 sfxge_tx_buffer_t *stbp;
566
567 mutex_enter(&(stp->st_lock));
568
569 stbp = stfp->stf_stbp;
570 stfp->stf_stbp = NULL;
571
572 while (stbp != NULL) {
573 sfxge_tx_buffer_t *next;
574
575 next = stbp->stb_next;
576 stbp->stb_next = NULL;
577
578 ASSERT3U(stfp->stf_count, >, 0);
579 stfp->stf_count--;
580
581 kmem_cache_free(sp->s_tbc, stbp);
582
583 stbp = next;
584 }
585 ASSERT3U(stfp->stf_count, ==, 0);
586
587 mutex_exit(&(stp->st_lock));
588 }
589
590 static inline void
sfxge_tx_qfmp_put(sfxge_txq_t * stp,sfxge_tx_mapping_t * stmp)591 sfxge_tx_qfmp_put(sfxge_txq_t *stp, sfxge_tx_mapping_t *stmp)
592 {
593 sfxge_tx_fmp_t *stfp = &(stp->st_fmp);
594
595 ASSERT3P(stmp->stm_next, ==, NULL);
596 ASSERT3P(stmp->stm_mp, ==, NULL);
597 ASSERT3P(stmp->stm_base, ==, NULL);
598 ASSERT3U(stmp->stm_off, ==, 0);
599 ASSERT3U(stmp->stm_size, ==, 0);
600
601 stmp->stm_next = stfp->stf_stmp;
602 stfp->stf_stmp = stmp;
603 stfp->stf_count++;
604 }
605
606 static inline sfxge_tx_mapping_t *
sfxge_tx_qfmp_get(sfxge_txq_t * stp)607 sfxge_tx_qfmp_get(sfxge_txq_t *stp)
608 {
609 sfxge_tx_mapping_t *stmp;
610 sfxge_tx_fmp_t *stfp = &(stp->st_fmp);
611
612 stmp = stfp->stf_stmp;
613 if (stmp == NULL) {
614 ASSERT3U(stfp->stf_count, ==, 0);
615 return (NULL);
616 }
617
618 stfp->stf_stmp = stmp->stm_next;
619 stmp->stm_next = NULL;
620
621 ASSERT3U(stfp->stf_count, >, 0);
622 stfp->stf_count--;
623
624 if (stfp->stf_count != 0) {
625 ASSERT(stfp->stf_stmp != NULL);
626 prefetch_read_many(stfp->stf_stmp);
627 }
628 return (stmp);
629 }
630
631 static void
sfxge_tx_qfmp_empty(sfxge_txq_t * stp)632 sfxge_tx_qfmp_empty(sfxge_txq_t *stp)
633 {
634 sfxge_t *sp = stp->st_sp;
635 sfxge_tx_fmp_t *stfp = &(stp->st_fmp);
636 sfxge_tx_mapping_t *stmp;
637
638 mutex_enter(&(stp->st_lock));
639
640 stmp = stfp->stf_stmp;
641 stfp->stf_stmp = NULL;
642
643 while (stmp != NULL) {
644 sfxge_tx_mapping_t *next;
645
646 next = stmp->stm_next;
647 stmp->stm_next = NULL;
648
649 ASSERT3U(stfp->stf_count, >, 0);
650 stfp->stf_count--;
651
652 kmem_cache_free(sp->s_tmc, stmp);
653
654 stmp = next;
655 }
656 ASSERT3U(stfp->stf_count, ==, 0);
657
658 mutex_exit(&(stp->st_lock));
659 }
660
661 static void
sfxge_tx_msgb_unbind(sfxge_tx_mapping_t * stmp)662 sfxge_tx_msgb_unbind(sfxge_tx_mapping_t *stmp)
663 {
664 bzero(stmp->stm_addr, sizeof (uint64_t) * SFXGE_TX_MAPPING_NADDR);
665 stmp->stm_off = 0;
666
667 (void) ddi_dma_unbind_handle(stmp->stm_dma_handle);
668
669 stmp->stm_size = 0;
670 stmp->stm_base = NULL;
671
672 stmp->stm_mp = NULL;
673 }
674
675 #define SFXGE_TX_DESCSHIFT 12
676 #define SFXGE_TX_DESCSIZE (1 << 12)
677
678 #define SFXGE_TX_DESCOFFSET (SFXGE_TX_DESCSIZE - 1)
679 #define SFXGE_TX_DESCMASK (~SFXGE_TX_DESCOFFSET)
680
681 static int
sfxge_tx_msgb_bind(mblk_t * mp,sfxge_tx_mapping_t * stmp)682 sfxge_tx_msgb_bind(mblk_t *mp, sfxge_tx_mapping_t *stmp)
683 {
684 ddi_dma_cookie_t dmac;
685 unsigned int ncookies;
686 size_t size;
687 unsigned int n;
688 int rc;
689
690 ASSERT(mp != NULL);
691 ASSERT3U(DB_TYPE(mp), ==, M_DATA);
692
693 ASSERT(stmp->stm_mp == NULL);
694 stmp->stm_mp = mp;
695
696 stmp->stm_base = (caddr_t)(mp->b_rptr);
697 stmp->stm_size = MBLKL(mp);
698
699 /* Bind the STREAMS block to the mapping */
700 rc = ddi_dma_addr_bind_handle(stmp->stm_dma_handle, NULL,
701 stmp->stm_base, stmp->stm_size, DDI_DMA_WRITE | DDI_DMA_STREAMING,
702 DDI_DMA_DONTWAIT, NULL, &dmac, &ncookies);
703 if (rc != DDI_DMA_MAPPED)
704 goto fail1;
705
706 ASSERT3U(ncookies, <=, SFXGE_TX_MAPPING_NADDR);
707
708 /*
709 * Construct an array of addresses and an initial
710 * offset.
711 */
712 n = 0;
713 stmp->stm_addr[n++] = dmac.dmac_laddress & SFXGE_TX_DESCMASK;
714 DTRACE_PROBE1(addr, uint64_t, dmac.dmac_laddress & SFXGE_TX_DESCMASK);
715
716 stmp->stm_off = dmac.dmac_laddress & SFXGE_TX_DESCOFFSET;
717
718 size = MIN(SFXGE_TX_DESCSIZE - stmp->stm_off, dmac.dmac_size);
719 dmac.dmac_laddress += size;
720 dmac.dmac_size -= size;
721
722 for (;;) {
723 ASSERT3U(n, <, SFXGE_TX_MAPPING_NADDR);
724
725 if (dmac.dmac_size == 0) {
726 if (--ncookies == 0)
727 break;
728
729 ddi_dma_nextcookie(stmp->stm_dma_handle, &dmac);
730 }
731
732 ASSERT((dmac.dmac_laddress & SFXGE_TX_DESCMASK) != 0);
733 ASSERT((dmac.dmac_laddress & SFXGE_TX_DESCOFFSET) == 0);
734 stmp->stm_addr[n++] = dmac.dmac_laddress;
735 DTRACE_PROBE1(addr, uint64_t, dmac.dmac_laddress);
736
737 size = MIN(SFXGE_TX_DESCSIZE, dmac.dmac_size);
738 dmac.dmac_laddress += size;
739 dmac.dmac_size -= size;
740 }
741 ASSERT3U(n, <=, SFXGE_TX_MAPPING_NADDR);
742
743 return (0);
744
745 fail1:
746 DTRACE_PROBE1(fail1, int, rc);
747
748 stmp->stm_size = 0;
749 stmp->stm_base = NULL;
750
751 stmp->stm_mp = NULL;
752
753 return (-1);
754 }
755
756 static void
sfxge_tx_qreap(sfxge_txq_t * stp)757 sfxge_tx_qreap(sfxge_txq_t *stp)
758 {
759 unsigned int reaped;
760
761 ASSERT(mutex_owned(&(stp->st_lock)));
762
763 reaped = stp->st_reaped;
764 while (reaped != stp->st_completed) {
765 unsigned int id;
766 sfxge_tx_mapping_t *stmp;
767 sfxge_tx_buffer_t *stbp;
768
769 id = reaped++ & (SFXGE_TX_NDESCS - 1);
770
771 ASSERT3P(stp->st_mp[id], ==, NULL);
772
773 if ((stmp = stp->st_stmp[id]) != NULL) {
774 stp->st_stmp[id] = NULL;
775
776 /* Free all the mappings */
777 do {
778 sfxge_tx_mapping_t *next;
779
780 next = stmp->stm_next;
781 stmp->stm_next = NULL;
782
783 sfxge_tx_qfmp_put(stp, stmp);
784
785 stmp = next;
786 } while (stmp != NULL);
787 }
788
789 if ((stbp = stp->st_stbp[id]) != NULL) {
790 stp->st_stbp[id] = NULL;
791
792 /* Free all the buffers */
793 do {
794 sfxge_tx_buffer_t *next;
795
796 next = stbp->stb_next;
797 stbp->stb_next = NULL;
798
799 stbp->stb_esm.esm_used = 0;
800 stbp->stb_off = 0;
801
802 sfxge_tx_qfbp_put(stp, stbp);
803
804 stbp = next;
805 } while (stbp != NULL);
806 }
807 }
808 stp->st_reaped = reaped;
809 }
810
811 static void
sfxge_tx_qlist_abort(sfxge_txq_t * stp)812 sfxge_tx_qlist_abort(sfxge_txq_t *stp)
813 {
814 unsigned int id;
815 sfxge_tx_mapping_t *stmp;
816 sfxge_tx_buffer_t *stbp;
817 mblk_t *mp;
818
819 ASSERT(mutex_owned(&(stp->st_lock)));
820
821 id = stp->st_added & (SFXGE_TX_NDESCS - 1);
822
823 /* Clear the completion information */
824 stmp = stp->st_stmp[id];
825 stp->st_stmp[id] = NULL;
826
827 /* Free any mappings that were used */
828 while (stmp != NULL) {
829 sfxge_tx_mapping_t *next;
830
831 next = stmp->stm_next;
832 stmp->stm_next = NULL;
833
834 if (stmp->stm_mp != NULL)
835 sfxge_tx_msgb_unbind(stmp);
836
837 sfxge_tx_qfmp_put(stp, stmp);
838
839 stmp = next;
840 }
841
842 stbp = stp->st_stbp[id];
843 stp->st_stbp[id] = NULL;
844
845 /* Free any buffers that were used */
846 while (stbp != NULL) {
847 sfxge_tx_buffer_t *next;
848
849 next = stbp->stb_next;
850 stbp->stb_next = NULL;
851
852 stbp->stb_off = 0;
853 stbp->stb_esm.esm_used = 0;
854
855 sfxge_tx_qfbp_put(stp, stbp);
856
857 stbp = next;
858 }
859
860 mp = stp->st_mp[id];
861 stp->st_mp[id] = NULL;
862
863 if (mp != NULL)
864 freemsg(mp);
865
866 /* Clear the fragment list */
867 stp->st_n = 0;
868 }
869
870 /* Push descriptors to the TX ring setting blocked if no space */
871 static void
sfxge_tx_qlist_post(sfxge_txq_t * stp)872 sfxge_tx_qlist_post(sfxge_txq_t *stp)
873 {
874 unsigned int id;
875 unsigned int level;
876 unsigned int available;
877 int rc;
878
879 ASSERT(mutex_owned(&(stp->st_lock)));
880
881 ASSERT(stp->st_n != 0);
882
883 again:
884 level = stp->st_added - stp->st_reaped;
885 available = EFX_TXQ_LIMIT(SFXGE_TX_NDESCS) - level;
886
887 id = stp->st_added & (SFXGE_TX_NDESCS - 1);
888
889 if (available < stp->st_n) {
890 rc = ENOSPC;
891 goto fail1;
892 }
893
894 ASSERT3U(available, >=, stp->st_n);
895
896 /* Post the fragment list */
897 if ((rc = efx_tx_qpost(stp->st_etp, stp->st_eb, stp->st_n,
898 stp->st_reaped, &(stp->st_added))) != 0)
899 goto fail2;
900
901 /*
902 * If the list took more than a single descriptor then we need to
903 * to move the completion information so it is referenced by the last
904 * descriptor.
905 */
906 if (((stp->st_added - 1) & (SFXGE_TX_NDESCS - 1)) != id) {
907 sfxge_tx_mapping_t *stmp;
908 sfxge_tx_buffer_t *stbp;
909 mblk_t *mp;
910
911 stmp = stp->st_stmp[id];
912 stp->st_stmp[id] = NULL;
913
914 stbp = stp->st_stbp[id];
915 stp->st_stbp[id] = NULL;
916
917 mp = stp->st_mp[id];
918 stp->st_mp[id] = NULL;
919
920 id = (stp->st_added - 1) & (SFXGE_TX_NDESCS - 1);
921
922 ASSERT(stp->st_stmp[id] == NULL);
923 stp->st_stmp[id] = stmp;
924
925 ASSERT(stp->st_stbp[id] == NULL);
926 stp->st_stbp[id] = stbp;
927
928 ASSERT(stp->st_mp[id] == NULL);
929 stp->st_mp[id] = mp;
930 }
931
932 /* Clear the list */
933 stp->st_n = 0;
934
935 ASSERT3U(stp->st_unblock, ==, SFXGE_TXQ_NOT_BLOCKED);
936 return;
937
938 fail2:
939 DTRACE_PROBE(fail2);
940 fail1:
941 DTRACE_PROBE1(fail1, int, rc);
942
943 ASSERT(rc == ENOSPC);
944
945 level = stp->st_added - stp->st_completed;
946 available = EFX_TXQ_LIMIT(SFXGE_TX_NDESCS) - level;
947
948 /*
949 * If there would be enough space after we've reaped any completed
950 * mappings and buffers, and we gain sufficient queue space by doing
951 * so, then reap now and try posting again.
952 */
953 if (stp->st_n <= available &&
954 stp->st_completed - stp->st_reaped >= SFXGE_TX_BATCH) {
955 sfxge_tx_qreap(stp);
956
957 goto again;
958 }
959
960 /* Set the unblock level */
961 if (stp->st_unblock == SFXGE_TXQ_NOT_BLOCKED) {
962 stp->st_unblock = SFXGE_TXQ_UNBLOCK_LEVEL1;
963 } else {
964 ASSERT(stp->st_unblock == SFXGE_TXQ_UNBLOCK_LEVEL1);
965
966 stp->st_unblock = SFXGE_TXQ_UNBLOCK_LEVEL2;
967 }
968
969 /*
970 * Avoid a race with completion interrupt handling that could leave the
971 * queue blocked.
972 *
973 * NOTE: The use of st_pending rather than st_completed is intentional
974 * as st_pending is updated per-event rather than per-batch and
975 * therefore avoids needless deferring.
976 */
977 if (stp->st_pending == stp->st_added) {
978 sfxge_tx_qreap(stp);
979
980 stp->st_unblock = SFXGE_TXQ_NOT_BLOCKED;
981 goto again;
982 }
983
984 ASSERT(stp->st_unblock != SFXGE_TXQ_NOT_BLOCKED);
985 }
986
987 static int
sfxge_tx_kstat_update(kstat_t * ksp,int rw)988 sfxge_tx_kstat_update(kstat_t *ksp, int rw)
989 {
990 sfxge_txq_t *stp = ksp->ks_private;
991 sfxge_tx_dpl_t *stdp = &(stp->st_dpl);
992 kstat_named_t *knp;
993 int rc;
994
995 ASSERT(mutex_owned(&(stp->st_lock)));
996
997 if (rw != KSTAT_READ) {
998 rc = EACCES;
999 goto fail1;
1000 }
1001
1002 if (stp->st_state != SFXGE_TXQ_STARTED)
1003 goto done;
1004
1005 efx_tx_qstats_update(stp->st_etp, stp->st_stat);
1006 knp = (kstat_named_t *)ksp->ks_data + TX_NQSTATS;
1007 knp->value.ui64 = stdp->get_pkt_limit;
1008 knp++;
1009 knp->value.ui64 = stdp->put_pkt_limit;
1010 knp++;
1011 knp->value.ui64 = stdp->get_full_count;
1012 knp++;
1013 knp->value.ui64 = stdp->put_full_count;
1014
1015 done:
1016 return (0);
1017
1018 fail1:
1019 DTRACE_PROBE1(fail1, int, rc);
1020
1021 return (rc);
1022 }
1023
1024 static int
sfxge_tx_kstat_init(sfxge_txq_t * stp)1025 sfxge_tx_kstat_init(sfxge_txq_t *stp)
1026 {
1027 sfxge_t *sp = stp->st_sp;
1028 unsigned int index = stp->st_index;
1029 dev_info_t *dip = sp->s_dip;
1030 kstat_t *ksp;
1031 kstat_named_t *knp;
1032 char name[MAXNAMELEN];
1033 unsigned int id;
1034 int rc;
1035
1036 /* Create the set */
1037 (void) snprintf(name, MAXNAMELEN - 1, "%s_txq%04d",
1038 ddi_driver_name(dip), index);
1039
1040 if ((ksp = kstat_create((char *)ddi_driver_name(dip),
1041 ddi_get_instance(dip), name, "queue", KSTAT_TYPE_NAMED,
1042 TX_NQSTATS + 4, 0)) == NULL) {
1043 rc = ENOMEM;
1044 goto fail1;
1045 }
1046
1047 stp->st_ksp = ksp;
1048
1049 ksp->ks_update = sfxge_tx_kstat_update;
1050 ksp->ks_private = stp;
1051 ksp->ks_lock = &(stp->st_lock);
1052
1053 /* Initialise the named stats */
1054 stp->st_stat = knp = ksp->ks_data;
1055 for (id = 0; id < TX_NQSTATS; id++) {
1056 kstat_named_init(knp, (char *)efx_tx_qstat_name(sp->s_enp, id),
1057 KSTAT_DATA_UINT64);
1058 knp++;
1059 }
1060 kstat_named_init(knp, "dpl_get_pkt_limit", KSTAT_DATA_UINT64);
1061 knp++;
1062 kstat_named_init(knp, "dpl_put_pkt_limit", KSTAT_DATA_UINT64);
1063 knp++;
1064 kstat_named_init(knp, "dpl_get_full_count", KSTAT_DATA_UINT64);
1065 knp++;
1066 kstat_named_init(knp, "dpl_put_full_count", KSTAT_DATA_UINT64);
1067
1068 kstat_install(ksp);
1069 return (0);
1070
1071 fail1:
1072 DTRACE_PROBE1(fail1, int, rc);
1073
1074 return (rc);
1075 }
1076
1077 static void
sfxge_tx_kstat_fini(sfxge_txq_t * stp)1078 sfxge_tx_kstat_fini(sfxge_txq_t *stp)
1079 {
1080 /* Destroy the set */
1081 kstat_delete(stp->st_ksp);
1082 stp->st_ksp = NULL;
1083 stp->st_stat = NULL;
1084 }
1085
1086 static int
sfxge_tx_qinit(sfxge_t * sp,unsigned int index,sfxge_txq_type_t type,unsigned int evq)1087 sfxge_tx_qinit(sfxge_t *sp, unsigned int index, sfxge_txq_type_t type,
1088 unsigned int evq)
1089 {
1090 sfxge_txq_t *stp;
1091 sfxge_tx_dpl_t *stdp;
1092 int rc;
1093
1094 ASSERT3U(index, <, EFX_ARRAY_SIZE(sp->s_stp));
1095 ASSERT3U(type, <, SFXGE_TXQ_NTYPES);
1096 ASSERT3U(evq, <, EFX_ARRAY_SIZE(sp->s_sep));
1097
1098 if ((stp = kmem_cache_alloc(sp->s_tqc, KM_SLEEP)) == NULL) {
1099 rc = ENOMEM;
1100 goto fail1;
1101 }
1102 ASSERT3U(stp->st_state, ==, SFXGE_TXQ_UNINITIALIZED);
1103
1104 stdp = &(stp->st_dpl);
1105
1106 stp->st_index = index;
1107 stp->st_type = type;
1108 stp->st_evq = evq;
1109
1110 mutex_init(&(stp->st_lock), NULL, MUTEX_DRIVER,
1111 DDI_INTR_PRI(sp->s_intr.si_intr_pri));
1112
1113 /* Initialize the statistics */
1114 if ((rc = sfxge_tx_kstat_init(stp)) != 0)
1115 goto fail2;
1116
1117 stdp->get_pkt_limit = ddi_prop_get_int(DDI_DEV_T_ANY, sp->s_dip,
1118 DDI_PROP_DONTPASS, "tx_dpl_get_pkt_limit",
1119 SFXGE_TX_DPL_GET_PKT_LIMIT_DEFAULT);
1120
1121 stdp->put_pkt_limit = ddi_prop_get_int(DDI_DEV_T_ANY, sp->s_dip,
1122 DDI_PROP_DONTPASS, "tx_dpl_put_pkt_limit",
1123 SFXGE_TX_DPL_PUT_PKT_LIMIT_DEFAULT);
1124
1125 /* Allocate a per-EVQ label for events from this TXQ */
1126 if ((rc = sfxge_ev_txlabel_alloc(sp, evq, stp, &(stp->st_label))) != 0)
1127 goto fail2;
1128
1129 stp->st_state = SFXGE_TXQ_INITIALIZED;
1130
1131 /* Attach the TXQ to the driver */
1132 ASSERT3P(sp->s_stp[index], ==, NULL);
1133 sp->s_stp[index] = stp;
1134 sp->s_tx_qcount++;
1135
1136 return (0);
1137
1138 fail2:
1139 DTRACE_PROBE(fail2);
1140
1141 sfxge_tx_kstat_fini(stp);
1142
1143
1144 stp->st_evq = 0;
1145 stp->st_type = 0;
1146 stp->st_index = 0;
1147
1148 mutex_destroy(&(stp->st_lock));
1149
1150 kmem_cache_free(sp->s_tqc, stp);
1151
1152 fail1:
1153 DTRACE_PROBE1(fail1, int, rc);
1154
1155 return (rc);
1156 }
1157
1158 static int
sfxge_tx_qstart(sfxge_t * sp,unsigned int index)1159 sfxge_tx_qstart(sfxge_t *sp, unsigned int index)
1160 {
1161 sfxge_txq_t *stp = sp->s_stp[index];
1162 efx_nic_t *enp = sp->s_enp;
1163 efsys_mem_t *esmp;
1164 sfxge_evq_t *sep;
1165 unsigned int evq;
1166 unsigned int flags;
1167 unsigned int desc_index;
1168 int rc;
1169
1170 mutex_enter(&(stp->st_lock));
1171
1172 esmp = &(stp->st_mem);
1173 evq = stp->st_evq;
1174 sep = sp->s_sep[evq];
1175
1176 ASSERT3U(stp->st_state, ==, SFXGE_TXQ_INITIALIZED);
1177 ASSERT3U(sep->se_state, ==, SFXGE_EVQ_STARTED);
1178
1179 /* Zero the memory */
1180 bzero(esmp->esm_base, EFX_TXQ_SIZE(SFXGE_TX_NDESCS));
1181
1182 /* Program the buffer table */
1183 if ((rc = sfxge_sram_buf_tbl_set(sp, stp->st_id, esmp,
1184 EFX_TXQ_NBUFS(SFXGE_TX_NDESCS))) != 0)
1185 goto fail1;
1186
1187 switch (stp->st_type) {
1188 case SFXGE_TXQ_NON_CKSUM:
1189 flags = 0;
1190 break;
1191
1192 case SFXGE_TXQ_IP_CKSUM:
1193 flags = EFX_TXQ_CKSUM_IPV4;
1194 break;
1195
1196 case SFXGE_TXQ_IP_TCP_UDP_CKSUM:
1197 flags = EFX_TXQ_CKSUM_IPV4 | EFX_TXQ_CKSUM_TCPUDP;
1198 break;
1199
1200 default:
1201 ASSERT(B_FALSE);
1202
1203 flags = 0;
1204 break;
1205 }
1206
1207 /* Create the transmit queue */
1208 if ((rc = efx_tx_qcreate(enp, index, stp->st_label, esmp,
1209 SFXGE_TX_NDESCS, stp->st_id, flags, sep->se_eep,
1210 &(stp->st_etp), &desc_index)) != 0)
1211 goto fail2;
1212
1213 /* Initialise queue descriptor indexes */
1214 stp->st_added = desc_index;
1215 stp->st_pending = desc_index;
1216 stp->st_completed = desc_index;
1217 stp->st_reaped = desc_index;
1218
1219 /* Enable the transmit queue */
1220 efx_tx_qenable(stp->st_etp);
1221
1222 stp->st_state = SFXGE_TXQ_STARTED;
1223
1224 mutex_exit(&(stp->st_lock));
1225
1226 return (0);
1227
1228 fail2:
1229 DTRACE_PROBE(fail2);
1230
1231 /* Clear entries from the buffer table */
1232 sfxge_sram_buf_tbl_clear(sp, stp->st_id,
1233 EFX_TXQ_NBUFS(SFXGE_TX_NDESCS));
1234
1235 fail1:
1236 DTRACE_PROBE1(fail1, int, rc);
1237
1238 mutex_exit(&(stp->st_lock));
1239
1240 return (rc);
1241 }
1242
1243 static inline int
sfxge_tx_qmapping_add(sfxge_txq_t * stp,sfxge_tx_mapping_t * stmp,size_t * offp,size_t * limitp)1244 sfxge_tx_qmapping_add(sfxge_txq_t *stp, sfxge_tx_mapping_t *stmp,
1245 size_t *offp, size_t *limitp)
1246 {
1247 mblk_t *mp;
1248 size_t mapping_off;
1249 size_t mapping_size;
1250 int rc;
1251
1252 ASSERT3U(*offp, <, stmp->stm_size);
1253 ASSERT(*limitp != 0);
1254
1255 mp = stmp->stm_mp;
1256
1257 ASSERT3P(stmp->stm_base, ==, mp->b_rptr);
1258 ASSERT3U(stmp->stm_size, ==, MBLKL(mp));
1259
1260 mapping_off = stmp->stm_off + *offp;
1261 mapping_size = stmp->stm_size - *offp;
1262
1263 while (mapping_size != 0 && *limitp != 0) {
1264 size_t page =
1265 mapping_off >> SFXGE_TX_DESCSHIFT;
1266 size_t page_off =
1267 mapping_off & SFXGE_TX_DESCOFFSET;
1268 size_t page_size =
1269 SFXGE_TX_DESCSIZE - page_off;
1270 efx_buffer_t *ebp;
1271
1272 ASSERT3U(page, <, SFXGE_TX_MAPPING_NADDR);
1273 ASSERT((stmp->stm_addr[page] & SFXGE_TX_DESCMASK) != 0);
1274
1275 page_size = MIN(page_size, mapping_size);
1276 page_size = MIN(page_size, *limitp);
1277
1278 ASSERT3U(stp->st_n, <=,
1279 EFX_TXQ_LIMIT(SFXGE_TX_NDESCS));
1280 if (stp->st_n ==
1281 EFX_TXQ_LIMIT(SFXGE_TX_NDESCS)) {
1282 rc = ENOSPC;
1283 goto fail1;
1284 }
1285
1286 ebp = &(stp->st_eb[stp->st_n++]);
1287 ebp->eb_addr = stmp->stm_addr[page] +
1288 page_off;
1289 ebp->eb_size = page_size;
1290
1291 *offp += page_size;
1292 *limitp -= page_size;
1293
1294 mapping_off += page_size;
1295 mapping_size -= page_size;
1296
1297 ebp->eb_eop = (*limitp == 0 ||
1298 (mapping_size == 0 && mp->b_cont == NULL));
1299
1300 DTRACE_PROBE5(tx_mapping_add,
1301 unsigned int, stp->st_index,
1302 unsigned int, stp->st_n - 1,
1303 uint64_t, ebp->eb_addr,
1304 size_t, ebp->eb_size,
1305 boolean_t, ebp->eb_eop);
1306 }
1307
1308 ASSERT3U(*offp, <=, stmp->stm_size);
1309
1310 return (0);
1311
1312 fail1:
1313 DTRACE_PROBE1(fail1, int, rc);
1314
1315 return (rc);
1316 }
1317
1318 static inline int
sfxge_tx_qbuffer_add(sfxge_txq_t * stp,sfxge_tx_buffer_t * stbp,boolean_t eop)1319 sfxge_tx_qbuffer_add(sfxge_txq_t *stp, sfxge_tx_buffer_t *stbp, boolean_t eop)
1320 {
1321 efx_buffer_t *ebp;
1322 int rc;
1323
1324 ASSERT3U(stp->st_n, <=,
1325 EFX_TXQ_LIMIT(SFXGE_TX_NDESCS));
1326 if (stp->st_n == EFX_TXQ_LIMIT(SFXGE_TX_NDESCS)) {
1327 rc = ENOSPC;
1328 goto fail1;
1329 }
1330
1331 ebp = &(stp->st_eb[stp->st_n++]);
1332 ebp->eb_addr = stbp->stb_esm.esm_addr + stbp->stb_off;
1333 ebp->eb_size = stbp->stb_esm.esm_used - stbp->stb_off;
1334 ebp->eb_eop = eop;
1335
1336 (void) ddi_dma_sync(stbp->stb_esm.esm_dma_handle,
1337 stbp->stb_off, ebp->eb_size,
1338 DDI_DMA_SYNC_FORDEV);
1339
1340 stbp->stb_off = stbp->stb_esm.esm_used;
1341
1342 DTRACE_PROBE5(tx_buffer_add,
1343 unsigned int, stp->st_index,
1344 unsigned int, stp->st_n - 1,
1345 uint64_t, ebp->eb_addr, size_t, ebp->eb_size,
1346 boolean_t, ebp->eb_eop);
1347
1348 return (0);
1349
1350 fail1:
1351 DTRACE_PROBE1(fail1, int, rc);
1352
1353 return (rc);
1354 }
1355
1356 static inline boolean_t
sfxge_tx_msgb_copy(mblk_t * mp,sfxge_tx_buffer_t * stbp,size_t * offp,size_t * limitp)1357 sfxge_tx_msgb_copy(mblk_t *mp, sfxge_tx_buffer_t *stbp, size_t *offp,
1358 size_t *limitp)
1359 {
1360 size_t data_off;
1361 size_t data_size;
1362 size_t copy_off;
1363 size_t copy_size;
1364 boolean_t eop;
1365
1366 ASSERT3U(*offp, <=, MBLKL(mp));
1367 ASSERT(*limitp != 0);
1368
1369 data_off = *offp;
1370 data_size = MBLKL(mp) - *offp;
1371
1372 copy_off = stbp->stb_esm.esm_used;
1373 copy_size = SFXGE_TX_BUFFER_SIZE - copy_off;
1374
1375 copy_size = MIN(copy_size, data_size);
1376 copy_size = MIN(copy_size, *limitp);
1377
1378 bcopy(mp->b_rptr + data_off,
1379 stbp->stb_esm.esm_base + copy_off, copy_size);
1380
1381 stbp->stb_esm.esm_used += copy_size;
1382 ASSERT3U(stbp->stb_esm.esm_used, <=,
1383 SFXGE_TX_BUFFER_SIZE);
1384
1385 *offp += copy_size;
1386 *limitp -= copy_size;
1387
1388 data_off += copy_size;
1389 data_size -= copy_size;
1390
1391 eop = (*limitp == 0 ||
1392 (data_size == 0 && mp->b_cont == NULL));
1393
1394 ASSERT3U(*offp, <=, MBLKL(mp));
1395
1396 return (eop);
1397 }
1398
1399 static int
sfxge_tx_qpayload_fragment(sfxge_txq_t * stp,unsigned int id,mblk_t ** mpp,size_t * offp,size_t size,boolean_t copy)1400 sfxge_tx_qpayload_fragment(sfxge_txq_t *stp, unsigned int id, mblk_t **mpp,
1401 size_t *offp, size_t size, boolean_t copy)
1402 {
1403 sfxge_t *sp = stp->st_sp;
1404 mblk_t *mp = *mpp;
1405 size_t off = *offp;
1406 sfxge_tx_buffer_t *stbp;
1407 sfxge_tx_mapping_t *stmp;
1408 int rc;
1409
1410 stbp = stp->st_stbp[id];
1411 ASSERT(stbp == NULL || (stbp->stb_esm.esm_used == stbp->stb_off));
1412
1413 stmp = stp->st_stmp[id];
1414
1415 while (size != 0) {
1416 boolean_t eop;
1417
1418 ASSERT(mp != NULL);
1419
1420 if (mp->b_cont != NULL)
1421 prefetch_read_many(mp->b_cont);
1422
1423 ASSERT3U(off, <, MBLKL(mp));
1424
1425 if (copy)
1426 goto copy;
1427
1428 /*
1429 * Check whether we have already mapped this data block for
1430 * DMA.
1431 */
1432 if (stmp == NULL || stmp->stm_mp != mp) {
1433 /*
1434 * If we are part way through copying a data block then
1435 * there's no point in trying to map it for DMA.
1436 */
1437 if (off != 0)
1438 goto copy;
1439
1440 /*
1441 * If the data block is too short then the cost of
1442 * mapping it for DMA would outweigh the cost of
1443 * copying it.
1444 */
1445 if (MBLKL(mp) < SFXGE_TX_COPY_THRESHOLD)
1446 goto copy;
1447
1448 /* Try to grab a transmit mapping from the pool */
1449 stmp = sfxge_tx_qfmp_get(stp);
1450 if (stmp == NULL) {
1451 /*
1452 * The pool was empty so allocate a new
1453 * mapping.
1454 */
1455 if ((stmp = kmem_cache_alloc(sp->s_tmc,
1456 KM_NOSLEEP)) == NULL)
1457 goto copy;
1458 }
1459
1460 /* Add the DMA mapping to the list */
1461 stmp->stm_next = stp->st_stmp[id];
1462 stp->st_stmp[id] = stmp;
1463
1464 /* Try to bind the data block to the mapping */
1465 if (sfxge_tx_msgb_bind(mp, stmp) != 0)
1466 goto copy;
1467 }
1468 ASSERT3P(stmp->stm_mp, ==, mp);
1469
1470 /*
1471 * If we have a partially filled buffer then we must add it to
1472 * the fragment list before adding the mapping.
1473 */
1474 if (stbp != NULL && (stbp->stb_esm.esm_used > stbp->stb_off)) {
1475 rc = sfxge_tx_qbuffer_add(stp, stbp, B_FALSE);
1476 if (rc != 0)
1477 goto fail1;
1478 }
1479
1480 /* Add the mapping to the fragment list */
1481 rc = sfxge_tx_qmapping_add(stp, stmp, &off, &size);
1482 if (rc != 0)
1483 goto fail2;
1484
1485 ASSERT(off == MBLKL(mp) || size == 0);
1486
1487 /*
1488 * If the data block has been exhausted then Skip over the
1489 * control block and advance to the next data block.
1490 */
1491 if (off == MBLKL(mp)) {
1492 mp = mp->b_cont;
1493 off = 0;
1494 }
1495
1496 continue;
1497
1498 copy:
1499 if (stbp == NULL ||
1500 stbp->stb_esm.esm_used == SFXGE_TX_BUFFER_SIZE) {
1501 /* Try to grab a buffer from the pool */
1502 stbp = sfxge_tx_qfbp_get(stp);
1503 if (stbp == NULL) {
1504 /*
1505 * The pool was empty so allocate a new
1506 * buffer.
1507 */
1508 if ((stbp = kmem_cache_alloc(sp->s_tbc,
1509 KM_NOSLEEP)) == NULL) {
1510 rc = ENOMEM;
1511 goto fail3;
1512 }
1513 }
1514
1515 /* Add it to the list */
1516 stbp->stb_next = stp->st_stbp[id];
1517 stp->st_stbp[id] = stbp;
1518 }
1519
1520 /* Copy as much of the data block as we can into the buffer */
1521 eop = sfxge_tx_msgb_copy(mp, stbp, &off, &size);
1522
1523 ASSERT(off == MBLKL(mp) || size == 0 ||
1524 stbp->stb_esm.esm_used == SFXGE_TX_BUFFER_SIZE);
1525
1526 /*
1527 * If we have reached the end of the packet, or the buffer is
1528 * full, then add the buffer to the fragment list.
1529 */
1530 if (stbp->stb_esm.esm_used == SFXGE_TX_BUFFER_SIZE || eop) {
1531 rc = sfxge_tx_qbuffer_add(stp, stbp, eop);
1532 if (rc != 0)
1533 goto fail4;
1534 }
1535
1536 /*
1537 * If the data block has been exhaused then advance to the next
1538 * one.
1539 */
1540 if (off == MBLKL(mp)) {
1541 mp = mp->b_cont;
1542 off = 0;
1543 }
1544 }
1545
1546 *mpp = mp;
1547 *offp = off;
1548
1549 return (0);
1550
1551 fail4:
1552 DTRACE_PROBE(fail4);
1553 fail3:
1554 DTRACE_PROBE(fail3);
1555 fail2:
1556 DTRACE_PROBE(fail2);
1557 fail1:
1558 DTRACE_PROBE1(fail1, int, rc);
1559
1560 return (rc);
1561 }
1562
1563 static int
sfxge_tx_qlso_fragment(sfxge_txq_t * stp,sfxge_tx_packet_t * stpp,boolean_t copy)1564 sfxge_tx_qlso_fragment(sfxge_txq_t *stp, sfxge_tx_packet_t *stpp,
1565 boolean_t copy)
1566 {
1567 sfxge_t *sp = stp->st_sp;
1568 mblk_t *mp = stpp->stp_mp;
1569 struct ether_header *etherhp = stpp->stp_etherhp;
1570 struct ip *iphp = stpp->stp_iphp;
1571 struct tcphdr *thp = stpp->stp_thp;
1572 size_t size = stpp->stp_size;
1573 size_t off = stpp->stp_off;
1574 size_t mss = stpp->stp_mss;
1575 unsigned int id;
1576 caddr_t hp;
1577 size_t ehs, hs;
1578 uint16_t start_len;
1579 uint16_t start_id;
1580 uint16_t ip_id;
1581 uint8_t start_flags;
1582 uint32_t start_seq;
1583 uint32_t th_seq;
1584 size_t lss;
1585 sfxge_tx_buffer_t *stbp;
1586 int rc;
1587
1588 ASSERT(mutex_owned(&(stp->st_lock)));
1589
1590 if ((DB_LSOFLAGS(mp) & HW_LSO) == 0) {
1591 rc = EINVAL;
1592 goto fail1;
1593 }
1594
1595 id = stp->st_added & (SFXGE_TX_NDESCS - 1);
1596
1597 ASSERT(stp->st_n == 0);
1598 ASSERT(stp->st_stbp[id] == NULL);
1599 ASSERT(stp->st_stmp[id] == NULL);
1600
1601 ehs = (etherhp->ether_type == htons(ETHERTYPE_VLAN)) ?
1602 sizeof (struct ether_vlan_header) :
1603 sizeof (struct ether_header);
1604 if (msgdsize(mp) != ehs + ntohs(iphp->ip_len)) {
1605 rc = EINVAL;
1606 goto fail2;
1607 }
1608
1609 /* The payload offset is equivalent to the size of the headers */
1610 hp = (caddr_t)(mp->b_rptr);
1611 hs = off;
1612
1613 /*
1614 * If the initial data block only contains the headers then advance
1615 * to the next one.
1616 */
1617 if (hs > MBLKL(mp)) {
1618 rc = EINVAL;
1619 goto fail3;
1620 }
1621 mp->b_rptr += hs;
1622
1623 if (MBLKL(mp) == 0)
1624 mp = mp->b_cont;
1625
1626 off = 0;
1627
1628 /* Check IP and TCP headers are suitable for LSO */
1629 if (((iphp->ip_off & ~htons(IP_DF)) != 0) ||
1630 ((thp->th_flags & (TH_URG | TH_SYN)) != 0) ||
1631 (thp->th_urp != 0)) {
1632 rc = EINVAL;
1633 goto fail4;
1634 }
1635
1636 if (size + (thp->th_off << 2) + (iphp->ip_hl << 2) !=
1637 ntohs(iphp->ip_len)) {
1638 rc = EINVAL;
1639 goto fail4;
1640 }
1641
1642 /*
1643 * Get the base IP id, The stack leaves enough of a gap in id space
1644 * for us to increment this for each segment we send out.
1645 */
1646 start_len = ntohs(iphp->ip_len);
1647 start_id = ip_id = ntohs(iphp->ip_id);
1648
1649 /* Get the base TCP sequence number and flags */
1650 start_flags = thp->th_flags;
1651 start_seq = th_seq = ntohl(thp->th_seq);
1652
1653 /* Adjust the header for interim segments */
1654 iphp->ip_len = htons((iphp->ip_hl << 2) + (thp->th_off << 2) + mss);
1655 thp->th_flags = start_flags & ~(TH_PUSH | TH_FIN);
1656
1657 lss = size;
1658 if ((lss / mss) >= (EFX_TXQ_LIMIT(SFXGE_TX_NDESCS) / 2)) {
1659 rc = EINVAL;
1660 goto fail5;
1661 }
1662
1663 stbp = NULL;
1664 while (lss != 0) {
1665 size_t ss = MIN(lss, mss);
1666 boolean_t eol = (ss == lss);
1667
1668 /* Adjust the header for this segment */
1669 iphp->ip_id = htons(ip_id);
1670 ip_id++;
1671
1672 thp->th_seq = htonl(th_seq);
1673 th_seq += ss;
1674
1675 /* If this is the final segment then do some extra adjustment */
1676 if (eol) {
1677 iphp->ip_len = htons((iphp->ip_hl << 2) +
1678 (thp->th_off << 2) + ss);
1679 thp->th_flags = start_flags;
1680 }
1681
1682 if (stbp == NULL ||
1683 stbp->stb_esm.esm_used + hs > SFXGE_TX_BUFFER_SIZE) {
1684 /* Try to grab a buffer from the pool */
1685 stbp = sfxge_tx_qfbp_get(stp);
1686 if (stbp == NULL) {
1687 /*
1688 * The pool was empty so allocate a new
1689 * buffer.
1690 */
1691 if ((stbp = kmem_cache_alloc(sp->s_tbc,
1692 KM_NOSLEEP)) == NULL) {
1693 rc = ENOMEM;
1694 goto fail6;
1695 }
1696 }
1697
1698 /* Add it to the list */
1699 stbp->stb_next = stp->st_stbp[id];
1700 stp->st_stbp[id] = stbp;
1701 }
1702
1703 /* Copy in the headers */
1704 ASSERT3U(stbp->stb_off, ==, stbp->stb_esm.esm_used);
1705 bcopy(hp, stbp->stb_esm.esm_base + stbp->stb_off, hs);
1706 stbp->stb_esm.esm_used += hs;
1707
1708 /* Add the buffer to the fragment list */
1709 rc = sfxge_tx_qbuffer_add(stp, stbp, B_FALSE);
1710 if (rc != 0)
1711 goto fail7;
1712
1713 /* Add the payload to the fragment list */
1714 if ((rc = sfxge_tx_qpayload_fragment(stp, id, &mp, &off,
1715 ss, copy)) != 0)
1716 goto fail8;
1717
1718 lss -= ss;
1719 }
1720 ASSERT3U(off, ==, 0);
1721 ASSERT3P(mp, ==, NULL);
1722
1723 ASSERT3U(th_seq - start_seq, ==, size);
1724
1725 /*
1726 * If no part of the packet has been mapped for DMA then we can free
1727 * it now, otherwise it can only be freed on completion.
1728 */
1729 if (stp->st_stmp[id] == NULL)
1730 freemsg(stpp->stp_mp);
1731 else
1732 stp->st_mp[id] = stpp->stp_mp;
1733
1734 stpp->stp_mp = NULL;
1735
1736 return (0);
1737
1738 fail8:
1739 DTRACE_PROBE(fail8);
1740 fail7:
1741 DTRACE_PROBE(fail7);
1742 fail6:
1743 DTRACE_PROBE(fail6);
1744 fail5:
1745 DTRACE_PROBE(fail5);
1746
1747 /* Restore the header */
1748 thp->th_seq = htonl(start_seq);
1749 thp->th_flags = start_flags;
1750
1751 iphp->ip_len = htons(start_len);
1752 iphp->ip_id = htons(start_id);
1753
1754 fail4:
1755 DTRACE_PROBE(fail4);
1756
1757 mp = stpp->stp_mp;
1758 mp->b_rptr -= hs;
1759
1760 ASSERT3U(((etherhp->ether_type == htons(ETHERTYPE_VLAN)) ?
1761 sizeof (struct ether_vlan_header) :
1762 sizeof (struct ether_header)) +
1763 ntohs(iphp->ip_len), ==, msgdsize(mp));
1764
1765 ASSERT(stp->st_mp[id] == NULL);
1766
1767 fail3:
1768 DTRACE_PROBE(fail3);
1769 fail2:
1770 DTRACE_PROBE(fail2);
1771 fail1:
1772 DTRACE_PROBE1(fail1, int, rc);
1773
1774 return (rc);
1775 }
1776
1777 static int
sfxge_tx_qpacket_fragment(sfxge_txq_t * stp,sfxge_tx_packet_t * stpp,boolean_t copy)1778 sfxge_tx_qpacket_fragment(sfxge_txq_t *stp, sfxge_tx_packet_t *stpp,
1779 boolean_t copy)
1780 {
1781 sfxge_t *sp = stp->st_sp;
1782 mblk_t *mp = stpp->stp_mp;
1783 unsigned int id;
1784 size_t off;
1785 size_t size;
1786 sfxge_tx_mapping_t *stmp;
1787 sfxge_tx_buffer_t *stbp;
1788 int rc;
1789
1790 ASSERT(mutex_owned(&(stp->st_lock)));
1791
1792 ASSERT(stp->st_n == 0);
1793
1794 id = stp->st_added & (SFXGE_TX_NDESCS - 1);
1795
1796 ASSERT(stp->st_stbp[id] == NULL);
1797 ASSERT(stp->st_stmp[id] == NULL);
1798
1799 off = 0;
1800 size = LONG_MAX; /* must be larger than the packet */
1801
1802 stbp = NULL;
1803 stmp = NULL;
1804
1805 while (mp != NULL) {
1806 boolean_t eop;
1807
1808 ASSERT(mp != NULL);
1809
1810 if (mp->b_cont != NULL)
1811 prefetch_read_many(mp->b_cont);
1812
1813 ASSERT(stmp == NULL || stmp->stm_mp != mp);
1814
1815 if (copy)
1816 goto copy;
1817
1818 /*
1819 * If we are part way through copying a data block then there's
1820 * no point in trying to map it for DMA.
1821 */
1822 if (off != 0)
1823 goto copy;
1824
1825 /*
1826 * If the data block is too short then the cost of mapping it
1827 * for DMA would outweigh the cost of copying it.
1828 *
1829 * TX copy break
1830 */
1831 if (MBLKL(mp) < SFXGE_TX_COPY_THRESHOLD)
1832 goto copy;
1833
1834 /* Try to grab a transmit mapping from the pool */
1835 stmp = sfxge_tx_qfmp_get(stp);
1836 if (stmp == NULL) {
1837 /*
1838 * The pool was empty so allocate a new
1839 * mapping.
1840 */
1841 if ((stmp = kmem_cache_alloc(sp->s_tmc,
1842 KM_NOSLEEP)) == NULL)
1843 goto copy;
1844 }
1845
1846 /* Add the DMA mapping to the list */
1847 stmp->stm_next = stp->st_stmp[id];
1848 stp->st_stmp[id] = stmp;
1849
1850 /* Try to bind the data block to the mapping */
1851 if (sfxge_tx_msgb_bind(mp, stmp) != 0)
1852 goto copy;
1853
1854 /*
1855 * If we have a partially filled buffer then we must add it to
1856 * the fragment list before adding the mapping.
1857 */
1858 if (stbp != NULL && (stbp->stb_esm.esm_used > stbp->stb_off)) {
1859 rc = sfxge_tx_qbuffer_add(stp, stbp, B_FALSE);
1860 if (rc != 0)
1861 goto fail1;
1862 }
1863
1864 /* Add the mapping to the fragment list */
1865 rc = sfxge_tx_qmapping_add(stp, stmp, &off, &size);
1866 if (rc != 0)
1867 goto fail2;
1868
1869 ASSERT3U(off, ==, MBLKL(mp));
1870
1871 /* Advance to the next data block */
1872 mp = mp->b_cont;
1873 off = 0;
1874 continue;
1875
1876 copy:
1877 if (stbp == NULL ||
1878 stbp->stb_esm.esm_used == SFXGE_TX_BUFFER_SIZE) {
1879 /* Try to grab a buffer from the pool */
1880 stbp = sfxge_tx_qfbp_get(stp);
1881 if (stbp == NULL) {
1882 /*
1883 * The pool was empty so allocate a new
1884 * buffer.
1885 */
1886 if ((stbp = kmem_cache_alloc(sp->s_tbc,
1887 KM_NOSLEEP)) == NULL) {
1888 rc = ENOMEM;
1889 goto fail3;
1890 }
1891 }
1892
1893 /* Add it to the list */
1894 stbp->stb_next = stp->st_stbp[id];
1895 stp->st_stbp[id] = stbp;
1896 }
1897
1898 /* Copy as much of the data block as we can into the buffer */
1899 eop = sfxge_tx_msgb_copy(mp, stbp, &off, &size);
1900
1901 ASSERT(off == MBLKL(mp) ||
1902 stbp->stb_esm.esm_used == SFXGE_TX_BUFFER_SIZE);
1903
1904 /*
1905 * If we have reached the end of the packet, or the buffer is
1906 * full, then add the buffer to the fragment list.
1907 */
1908 if (stbp->stb_esm.esm_used == SFXGE_TX_BUFFER_SIZE || eop) {
1909 rc = sfxge_tx_qbuffer_add(stp, stbp, eop);
1910 if (rc != 0)
1911 goto fail4;
1912 }
1913
1914 /*
1915 * If the data block has been exhaused then advance to the next
1916 * one.
1917 */
1918 if (off == MBLKL(mp)) {
1919 mp = mp->b_cont;
1920 off = 0;
1921 }
1922 }
1923 ASSERT3U(off, ==, 0);
1924 ASSERT3P(mp, ==, NULL);
1925 ASSERT3U(size, !=, 0);
1926
1927 /*
1928 * If no part of the packet has been mapped for DMA then we can free
1929 * it now, otherwise it can only be freed on completion.
1930 */
1931 if (stp->st_stmp[id] == NULL)
1932 freemsg(stpp->stp_mp);
1933 else
1934 stp->st_mp[id] = stpp->stp_mp;
1935
1936 stpp->stp_mp = NULL;
1937
1938 return (0);
1939
1940 fail4:
1941 DTRACE_PROBE(fail4);
1942 fail3:
1943 DTRACE_PROBE(fail3);
1944 fail2:
1945 DTRACE_PROBE(fail2);
1946 fail1:
1947 DTRACE_PROBE1(fail1, int, rc);
1948
1949 ASSERT(stp->st_stmp[id] == NULL);
1950
1951 return (rc);
1952 }
1953
1954
1955 #define SFXGE_TX_QDPL_PUT_PENDING(_stp) \
1956 ((_stp)->st_dpl.std_put != 0)
1957
1958 static void
sfxge_tx_qdpl_swizzle(sfxge_txq_t * stp)1959 sfxge_tx_qdpl_swizzle(sfxge_txq_t *stp)
1960 {
1961 sfxge_tx_dpl_t *stdp = &(stp->st_dpl);
1962 volatile uintptr_t *putp;
1963 uintptr_t put;
1964 sfxge_tx_packet_t *stpp;
1965 sfxge_tx_packet_t *p;
1966 sfxge_tx_packet_t **pp;
1967 unsigned int count;
1968
1969 ASSERT(mutex_owned(&(stp->st_lock)));
1970
1971 /*
1972 * Guaranteed that in flight TX packets will cause more TX completions
1973 * hence more swizzles must happen
1974 */
1975 ASSERT3U(stdp->std_count, <=, sfxge_tx_dpl_get_pkt_max(stp));
1976 if (stdp->std_count >= stdp->get_pkt_limit)
1977 return;
1978
1979 /* Acquire the put list - replacing with an empty list */
1980 putp = &(stdp->std_put);
1981 put = atomic_swap_ulong(putp, 0);
1982 stpp = (void *)put;
1983
1984 if (stpp == NULL)
1985 return;
1986
1987 /* Reverse the list */
1988 pp = &(stpp->stp_next);
1989 p = NULL;
1990
1991 count = 0;
1992 do {
1993 sfxge_tx_packet_t *next;
1994
1995 next = stpp->stp_next;
1996
1997 stpp->stp_next = p;
1998 p = stpp;
1999
2000 count++;
2001 stpp = next;
2002 } while (stpp != NULL);
2003
2004 /* Add it to the tail of the get list */
2005 ASSERT3P(*pp, ==, NULL);
2006
2007 *(stdp->std_getp) = p;
2008 stdp->std_getp = pp;
2009 stdp->std_count += count;
2010 ASSERT3U(stdp->std_count, <=, sfxge_tx_dpl_get_pkt_max(stp));
2011
2012 DTRACE_PROBE2(dpl_counts, int, stdp->std_count, int, count);
2013 }
2014
2015
2016 /*
2017 * If TXQ locked, add the RX DPL put list and this packet to the TX DPL get list
2018 * If TXQ unlocked, atomically add this packet to TX DPL put list
2019 *
2020 * The only possible error is ENOSPC (used for TX backpressure)
2021 * For the TX DPL put or get list becoming full, in both cases there must be
2022 * future TX completions (as represented by the packets on the DPL get lists).
2023 *
2024 * This ensures that in the future mac_tx_update() will be called from
2025 * sfxge_tx_qcomplete()
2026 */
2027 static inline int
sfxge_tx_qdpl_add(sfxge_txq_t * stp,sfxge_tx_packet_t * stpp,int locked)2028 sfxge_tx_qdpl_add(sfxge_txq_t *stp, sfxge_tx_packet_t *stpp, int locked)
2029 {
2030 sfxge_tx_dpl_t *stdp = &stp->st_dpl;
2031
2032 ASSERT3P(stpp->stp_next, ==, NULL);
2033
2034 if (locked) {
2035 ASSERT(mutex_owned(&stp->st_lock));
2036
2037 if (stdp->std_count >= stdp->get_pkt_limit) {
2038 stdp->get_full_count++;
2039 return (ENOSPC);
2040 }
2041
2042 /* Reverse the put list onto the get list */
2043 sfxge_tx_qdpl_swizzle(stp);
2044
2045 /* Add to the tail of the get list */
2046 *(stdp->std_getp) = stpp;
2047 stdp->std_getp = &stpp->stp_next;
2048 stdp->std_count++;
2049 ASSERT3U(stdp->std_count, <=, sfxge_tx_dpl_get_pkt_max(stp));
2050
2051 } else {
2052 volatile uintptr_t *putp;
2053 uintptr_t old;
2054 uintptr_t new;
2055 sfxge_tx_packet_t *old_pkt;
2056
2057 putp = &(stdp->std_put);
2058 new = (uintptr_t)stpp;
2059
2060 /* Add to the head of the put list, keeping a list length */
2061 do {
2062 old = *putp;
2063 old_pkt = (sfxge_tx_packet_t *)old;
2064
2065 stpp->stp_dpl_put_len = old ?
2066 old_pkt->stp_dpl_put_len + 1 : 1;
2067
2068 if (stpp->stp_dpl_put_len >= stdp->put_pkt_limit) {
2069 stpp->stp_next = 0;
2070 stpp->stp_dpl_put_len = 0;
2071 stdp->put_full_count++;
2072 return (ENOSPC);
2073 }
2074
2075 stpp->stp_next = (void *)old;
2076 } while (atomic_cas_ulong(putp, old, new) != old);
2077 }
2078 return (0);
2079 }
2080
2081
2082 /* Take all packets from DPL get list and try to send to HW */
2083 static void
sfxge_tx_qdpl_drain(sfxge_txq_t * stp)2084 sfxge_tx_qdpl_drain(sfxge_txq_t *stp)
2085 {
2086 sfxge_t *sp = stp->st_sp;
2087 sfxge_tx_dpl_t *stdp = &(stp->st_dpl);
2088 unsigned int pushed = stp->st_added;
2089 sfxge_tx_packet_t *stpp;
2090 unsigned int count;
2091
2092 ASSERT(mutex_owned(&(stp->st_lock)));
2093
2094 prefetch_read_many(sp->s_enp);
2095 prefetch_read_many(stp->st_etp);
2096
2097 stpp = stdp->std_get;
2098 count = stdp->std_count;
2099
2100 while (count != 0) {
2101 sfxge_tx_packet_t *next;
2102 boolean_t copy;
2103 int rc;
2104
2105 ASSERT(stpp != NULL);
2106
2107 /* Split stpp off */
2108 next = stpp->stp_next;
2109 stpp->stp_next = NULL;
2110
2111 if (next != NULL)
2112 prefetch_read_many(next);
2113
2114 if (stp->st_state != SFXGE_TXQ_STARTED)
2115 goto reject;
2116
2117 copy = B_FALSE;
2118
2119 again:
2120 /* Fragment the packet */
2121 if (stpp->stp_mss != 0) {
2122 rc = sfxge_tx_qlso_fragment(stp, stpp, copy);
2123 } else {
2124 rc = sfxge_tx_qpacket_fragment(stp, stpp, copy);
2125 }
2126
2127 switch (rc) {
2128 case 0:
2129 break;
2130
2131 case ENOSPC:
2132 if (!copy)
2133 goto copy;
2134
2135 /*FALLTHRU*/
2136 default:
2137 goto reject;
2138 }
2139
2140 /* Free the packet structure */
2141 stpp->stp_etherhp = NULL;
2142 stpp->stp_iphp = NULL;
2143 stpp->stp_thp = NULL;
2144 stpp->stp_off = 0;
2145 stpp->stp_size = 0;
2146 stpp->stp_mss = 0;
2147 stpp->stp_dpl_put_len = 0;
2148
2149 ASSERT3P(stpp->stp_mp, ==, NULL);
2150
2151 if (sfxge_tx_qfpp_put(stp, stpp) != 0) {
2152 sfxge_tx_packet_destroy(sp, stpp);
2153 stpp = NULL;
2154 }
2155
2156 --count;
2157 stpp = next;
2158
2159 /* Post the packet */
2160 sfxge_tx_qlist_post(stp);
2161
2162 if (stp->st_unblock != SFXGE_TXQ_NOT_BLOCKED)
2163 goto defer;
2164
2165 if (stp->st_added - pushed >= SFXGE_TX_BATCH) {
2166 efx_tx_qpush(stp->st_etp, stp->st_added, pushed);
2167 pushed = stp->st_added;
2168 }
2169
2170 continue;
2171
2172 copy:
2173 /* Abort the current fragment list */
2174 sfxge_tx_qlist_abort(stp);
2175
2176 /* Try copying the packet to flatten it */
2177 ASSERT(!copy);
2178 copy = B_TRUE;
2179
2180 goto again;
2181
2182 reject:
2183 /* Abort the current fragment list */
2184 sfxge_tx_qlist_abort(stp);
2185
2186 /* Discard the packet */
2187 freemsg(stpp->stp_mp);
2188 stpp->stp_mp = NULL;
2189
2190 /* Free the packet structure */
2191 stpp->stp_etherhp = NULL;
2192 stpp->stp_iphp = NULL;
2193 stpp->stp_thp = NULL;
2194 stpp->stp_off = 0;
2195 stpp->stp_size = 0;
2196 stpp->stp_mss = 0;
2197 stpp->stp_dpl_put_len = 0;
2198
2199 if (sfxge_tx_qfpp_put(stp, stpp) != 0) {
2200 sfxge_tx_packet_destroy(sp, stpp);
2201 stpp = NULL;
2202 }
2203
2204 --count;
2205 stpp = next;
2206 continue;
2207 defer:
2208 DTRACE_PROBE1(defer, unsigned int, stp->st_index);
2209 break;
2210 }
2211
2212 if (count == 0) {
2213 /* New empty get list */
2214 ASSERT3P(stpp, ==, NULL);
2215 stdp->std_get = NULL;
2216 stdp->std_count = 0;
2217
2218 stdp->std_getp = &(stdp->std_get);
2219 } else {
2220 /* shorten the list by moving the head */
2221 stdp->std_get = stpp;
2222 stdp->std_count = count;
2223 ASSERT3U(stdp->std_count, <=, sfxge_tx_dpl_get_pkt_max(stp));
2224 }
2225
2226 if (stp->st_added != pushed)
2227 efx_tx_qpush(stp->st_etp, stp->st_added, pushed);
2228
2229 ASSERT(stp->st_unblock != SFXGE_TXQ_NOT_BLOCKED ||
2230 stdp->std_count == 0);
2231 }
2232
2233 /* Swizzle deferred packet list, try and push to HW */
2234 static inline void
sfxge_tx_qdpl_service(sfxge_txq_t * stp)2235 sfxge_tx_qdpl_service(sfxge_txq_t *stp)
2236 {
2237 do {
2238 ASSERT(mutex_owned(&(stp->st_lock)));
2239
2240 if (SFXGE_TX_QDPL_PUT_PENDING(stp))
2241 sfxge_tx_qdpl_swizzle(stp);
2242
2243 if (stp->st_unblock == SFXGE_TXQ_NOT_BLOCKED)
2244 sfxge_tx_qdpl_drain(stp);
2245
2246 mutex_exit(&(stp->st_lock));
2247
2248 if (!SFXGE_TX_QDPL_PUT_PENDING(stp))
2249 break;
2250 } while (mutex_tryenter(&(stp->st_lock)));
2251 }
2252
2253 static void
sfxge_tx_qdpl_flush_locked(sfxge_txq_t * stp)2254 sfxge_tx_qdpl_flush_locked(sfxge_txq_t *stp)
2255 {
2256 sfxge_t *sp = stp->st_sp;
2257 sfxge_tx_dpl_t *stdp = &(stp->st_dpl);
2258 sfxge_tx_packet_t *stpp;
2259 unsigned int count;
2260
2261 ASSERT(mutex_owned(&(stp->st_lock)));
2262
2263 /* Swizzle put list to the get list */
2264 sfxge_tx_qdpl_swizzle(stp);
2265
2266 stpp = stdp->std_get;
2267 count = stdp->std_count;
2268
2269 while (count != 0) {
2270 sfxge_tx_packet_t *next;
2271
2272 next = stpp->stp_next;
2273 stpp->stp_next = NULL;
2274
2275 /* Discard the packet */
2276 freemsg(stpp->stp_mp);
2277 stpp->stp_mp = NULL;
2278
2279 /* Free the packet structure */
2280 stpp->stp_etherhp = NULL;
2281 stpp->stp_iphp = NULL;
2282 stpp->stp_thp = NULL;
2283 stpp->stp_off = 0;
2284 stpp->stp_size = 0;
2285 stpp->stp_mss = 0;
2286 stpp->stp_dpl_put_len = 0;
2287
2288 sfxge_tx_packet_destroy(sp, stpp);
2289
2290 --count;
2291 stpp = next;
2292 }
2293
2294 ASSERT3P(stpp, ==, NULL);
2295
2296 /* Empty list */
2297 stdp->std_get = NULL;
2298 stdp->std_count = 0;
2299 stdp->std_getp = &(stdp->std_get);
2300 }
2301
2302
2303 void
sfxge_tx_qdpl_flush(sfxge_txq_t * stp)2304 sfxge_tx_qdpl_flush(sfxge_txq_t *stp)
2305 {
2306 mutex_enter(&(stp->st_lock));
2307 sfxge_tx_qdpl_flush_locked(stp);
2308 mutex_exit(&(stp->st_lock));
2309 }
2310
2311
2312 static void
sfxge_tx_qunblock(sfxge_txq_t * stp)2313 sfxge_tx_qunblock(sfxge_txq_t *stp)
2314 {
2315 sfxge_t *sp = stp->st_sp;
2316 unsigned int evq = stp->st_evq;
2317 sfxge_evq_t *sep = sp->s_sep[evq];
2318
2319 ASSERT(mutex_owned(&(sep->se_lock)));
2320
2321 mutex_enter(&(stp->st_lock));
2322
2323 if (stp->st_state != SFXGE_TXQ_STARTED) {
2324 mutex_exit(&(stp->st_lock));
2325 return;
2326 }
2327
2328 if (stp->st_unblock != SFXGE_TXQ_NOT_BLOCKED) {
2329 unsigned int level;
2330
2331 level = stp->st_added - stp->st_completed;
2332 if (level <= stp->st_unblock) {
2333 stp->st_unblock = SFXGE_TXQ_NOT_BLOCKED;
2334 sfxge_tx_qlist_post(stp);
2335 }
2336 }
2337
2338 sfxge_tx_qdpl_service(stp);
2339 /* lock has been dropped */
2340 }
2341
2342 void
sfxge_tx_qcomplete(sfxge_txq_t * stp)2343 sfxge_tx_qcomplete(sfxge_txq_t *stp)
2344 {
2345 sfxge_t *sp = stp->st_sp;
2346 sfxge_tx_dpl_t *stdp = &(stp->st_dpl);
2347 unsigned int evq = stp->st_evq;
2348 sfxge_evq_t *sep = sp->s_sep[evq];
2349 unsigned int completed;
2350
2351 ASSERT(mutex_owned(&(sep->se_lock)));
2352
2353 completed = stp->st_completed;
2354 while (completed != stp->st_pending) {
2355 unsigned int id;
2356 sfxge_tx_mapping_t *stmp;
2357
2358 id = completed++ & (SFXGE_TX_NDESCS - 1);
2359
2360 if ((stmp = stp->st_stmp[id]) != NULL) {
2361 mblk_t *mp;
2362
2363 /* Unbind all the mappings */
2364 do {
2365 ASSERT(stmp->stm_mp != NULL);
2366 sfxge_tx_msgb_unbind(stmp);
2367
2368 stmp = stmp->stm_next;
2369 } while (stmp != NULL);
2370
2371 /*
2372 * Now that the packet is no longer mapped for DMA it
2373 * can be freed.
2374 */
2375 mp = stp->st_mp[id];
2376 stp->st_mp[id] = NULL;
2377
2378 ASSERT(mp != NULL);
2379 freemsg(mp);
2380 }
2381 }
2382 stp->st_completed = completed;
2383
2384 /* Check whether we need to unblock the queue */
2385 if (stp->st_unblock != SFXGE_TXQ_NOT_BLOCKED) {
2386 unsigned int level;
2387
2388 level = stp->st_added - stp->st_completed;
2389 if (level <= stp->st_unblock)
2390 sfxge_tx_qunblock(stp);
2391 }
2392
2393 /* Release TX backpressure from the TX DPL put/get list being full */
2394 if (stdp->std_count < stdp->get_pkt_limit)
2395 mac_tx_update(sp->s_mh);
2396 }
2397
2398 void
sfxge_tx_qflush_done(sfxge_txq_t * stp)2399 sfxge_tx_qflush_done(sfxge_txq_t *stp)
2400 {
2401 sfxge_t *sp = stp->st_sp;
2402 boolean_t flush_pending = B_FALSE;
2403
2404 ASSERT(mutex_owned(&(sp->s_sep[stp->st_evq]->se_lock)));
2405
2406 mutex_enter(&(stp->st_lock));
2407
2408 switch (stp->st_state) {
2409 case SFXGE_TXQ_INITIALIZED:
2410 /* Ignore flush event after TxQ destroyed */
2411 break;
2412
2413 case SFXGE_TXQ_FLUSH_PENDING:
2414 flush_pending = B_TRUE;
2415 stp->st_state = SFXGE_TXQ_FLUSH_DONE;
2416 break;
2417
2418 case SFXGE_TXQ_FLUSH_FAILED:
2419 /* MC may have rebooted before handling the flush request */
2420 stp->st_state = SFXGE_TXQ_FLUSH_DONE;
2421 break;
2422
2423 case SFXGE_TXQ_STARTED:
2424 /*
2425 * MC initiated flush on MC reboot or because of bad Tx
2426 * descriptor
2427 */
2428 stp->st_state = SFXGE_TXQ_FLUSH_DONE;
2429 break;
2430
2431 case SFXGE_TXQ_FLUSH_DONE:
2432 /* Ignore unexpected extra flush event */
2433 ASSERT(B_FALSE);
2434 break;
2435
2436 default:
2437 ASSERT(B_FALSE);
2438 }
2439
2440
2441 mutex_exit(&(stp->st_lock));
2442
2443 if (flush_pending == B_FALSE) {
2444 /* Flush was not pending */
2445 return;
2446 }
2447
2448 mutex_enter(&(sp->s_tx_flush_lock));
2449 sp->s_tx_flush_pending--;
2450 if (sp->s_tx_flush_pending <= 0) {
2451 /* All queues flushed: wakeup sfxge_tx_stop() */
2452 cv_signal(&(sp->s_tx_flush_kv));
2453 }
2454 mutex_exit(&(sp->s_tx_flush_lock));
2455 }
2456
2457 static void
sfxge_tx_qflush(sfxge_t * sp,unsigned int index,boolean_t wait_for_flush)2458 sfxge_tx_qflush(sfxge_t *sp, unsigned int index, boolean_t wait_for_flush)
2459 {
2460 sfxge_txq_t *stp = sp->s_stp[index];
2461 int rc;
2462
2463 ASSERT(mutex_owned(&(sp->s_state_lock)));
2464 ASSERT(mutex_owned(&(sp->s_tx_flush_lock)));
2465
2466 mutex_enter(&(stp->st_lock));
2467
2468 /* Prepare to flush and stop the queue */
2469 if (stp->st_state == SFXGE_TXQ_STARTED) {
2470 /* Flush the transmit queue */
2471 if ((rc = efx_tx_qflush(stp->st_etp)) == EALREADY) {
2472 /* Already flushed, may be initiated by MC */
2473 stp->st_state = SFXGE_TXQ_FLUSH_DONE;
2474 } else if (rc != 0) {
2475 /* Unexpected error */
2476 stp->st_state = SFXGE_TXQ_FLUSH_FAILED;
2477 } else if (wait_for_flush) {
2478 stp->st_state = SFXGE_TXQ_FLUSH_PENDING;
2479 sp->s_tx_flush_pending++;
2480 } else {
2481 /* Assume the flush is done */
2482 stp->st_state = SFXGE_TXQ_FLUSH_DONE;
2483 }
2484 }
2485
2486 mutex_exit(&(stp->st_lock));
2487 }
2488
2489 static void
sfxge_tx_qstop(sfxge_t * sp,unsigned int index)2490 sfxge_tx_qstop(sfxge_t *sp, unsigned int index)
2491 {
2492 sfxge_txq_t *stp = sp->s_stp[index];
2493 unsigned int evq = stp->st_evq;
2494 sfxge_evq_t *sep = sp->s_sep[evq];
2495
2496 mutex_enter(&(sep->se_lock));
2497 mutex_enter(&(stp->st_lock));
2498
2499 if (stp->st_state == SFXGE_TXQ_INITIALIZED)
2500 goto done;
2501
2502 ASSERT(stp->st_state == SFXGE_TXQ_FLUSH_PENDING ||
2503 stp->st_state == SFXGE_TXQ_FLUSH_DONE ||
2504 stp->st_state == SFXGE_TXQ_FLUSH_FAILED);
2505
2506 /* All queues should have been flushed */
2507 if (stp->st_sp->s_tx_flush_pending != 0) {
2508 dev_err(sp->s_dip, CE_NOTE,
2509 SFXGE_CMN_ERR "txq[%d] stop with flush_pending=%d",
2510 index, stp->st_sp->s_tx_flush_pending);
2511 }
2512 if (stp->st_state == SFXGE_TXQ_FLUSH_FAILED) {
2513 dev_err(sp->s_dip, CE_NOTE,
2514 SFXGE_CMN_ERR "txq[%d] flush failed", index);
2515 }
2516
2517 /* Destroy the transmit queue */
2518 efx_tx_qdestroy(stp->st_etp);
2519 stp->st_etp = NULL;
2520
2521 /* Clear entries from the buffer table */
2522 sfxge_sram_buf_tbl_clear(sp, stp->st_id,
2523 EFX_TXQ_NBUFS(SFXGE_TX_NDESCS));
2524
2525 sfxge_tx_qlist_abort(stp);
2526 ASSERT3U(stp->st_n, ==, 0);
2527
2528 stp->st_unblock = SFXGE_TXQ_NOT_BLOCKED;
2529
2530 stp->st_pending = stp->st_added;
2531
2532 sfxge_tx_qcomplete(stp);
2533 ASSERT3U(stp->st_completed, ==, stp->st_pending);
2534
2535 sfxge_tx_qreap(stp);
2536 ASSERT3U(stp->st_reaped, ==, stp->st_completed);
2537
2538 /*
2539 * Ensure the deferred packet list is cleared
2540 * Can race with sfxge_tx_packet_add() adding to the put list
2541 */
2542 sfxge_tx_qdpl_flush_locked(stp);
2543
2544 stp->st_added = 0;
2545 stp->st_pending = 0;
2546 stp->st_completed = 0;
2547 stp->st_reaped = 0;
2548
2549 stp->st_state = SFXGE_TXQ_INITIALIZED;
2550
2551 done:
2552 mutex_exit(&(stp->st_lock));
2553 mutex_exit(&(sep->se_lock));
2554 }
2555
2556 static void
sfxge_tx_qfini(sfxge_t * sp,unsigned int index)2557 sfxge_tx_qfini(sfxge_t *sp, unsigned int index)
2558 {
2559 sfxge_txq_t *stp = sp->s_stp[index];
2560 sfxge_tx_dpl_t *stdp = &(stp->st_dpl);
2561
2562 ASSERT3U(stp->st_state, ==, SFXGE_TXQ_INITIALIZED);
2563 stp->st_state = SFXGE_TXQ_UNINITIALIZED;
2564
2565 /* Detach the TXQ from the driver */
2566 sp->s_stp[index] = NULL;
2567 ASSERT(sp->s_tx_qcount > 0);
2568 sp->s_tx_qcount--;
2569
2570 /* Free the EVQ label for events from this TXQ */
2571 (void) sfxge_ev_txlabel_free(sp, stp->st_evq, stp, stp->st_label);
2572 stp->st_label = 0;
2573
2574 /* Tear down the statistics */
2575 sfxge_tx_kstat_fini(stp);
2576
2577 /* Ensure the deferred packet list is empty */
2578 ASSERT3U(stdp->std_count, ==, 0);
2579 ASSERT3P(stdp->std_get, ==, NULL);
2580 ASSERT3U(stdp->std_put, ==, 0);
2581
2582 /* Clear the free buffer pool */
2583 sfxge_tx_qfbp_empty(stp);
2584
2585 /* Clear the free mapping pool */
2586 sfxge_tx_qfmp_empty(stp);
2587
2588 /* Clear the free packet pool */
2589 sfxge_tx_qfpp_empty(stp);
2590
2591 mutex_destroy(&(stp->st_lock));
2592
2593 stp->st_evq = 0;
2594 stp->st_type = 0;
2595 stp->st_index = 0;
2596
2597 kmem_cache_free(sp->s_tqc, stp);
2598 }
2599
2600 int
sfxge_tx_init(sfxge_t * sp)2601 sfxge_tx_init(sfxge_t *sp)
2602 {
2603 sfxge_intr_t *sip = &(sp->s_intr);
2604 char name[MAXNAMELEN];
2605 sfxge_txq_type_t qtype;
2606 unsigned int txq, evq;
2607 int index;
2608 int rc;
2609
2610 (void) snprintf(name, MAXNAMELEN - 1, "%s%d_tx_packet_cache",
2611 ddi_driver_name(sp->s_dip), ddi_get_instance(sp->s_dip));
2612
2613 sp->s_tpc = kmem_cache_create(name, sizeof (sfxge_tx_packet_t),
2614 SFXGE_CPU_CACHE_SIZE, sfxge_tx_packet_ctor, sfxge_tx_packet_dtor,
2615 NULL, sp, NULL, 0);
2616 ASSERT(sp->s_tpc != NULL);
2617
2618 (void) snprintf(name, MAXNAMELEN - 1, "%s%d_tx_buffer_cache",
2619 ddi_driver_name(sp->s_dip), ddi_get_instance(sp->s_dip));
2620
2621 sp->s_tbc = kmem_cache_create(name, sizeof (sfxge_tx_buffer_t),
2622 SFXGE_CPU_CACHE_SIZE, sfxge_tx_buffer_ctor, sfxge_tx_buffer_dtor,
2623 NULL, sp, NULL, 0);
2624 ASSERT(sp->s_tbc != NULL);
2625
2626 (void) snprintf(name, MAXNAMELEN - 1, "%s%d_tx_mapping_cache",
2627 ddi_driver_name(sp->s_dip), ddi_get_instance(sp->s_dip));
2628
2629 sp->s_tmc = kmem_cache_create(name, sizeof (sfxge_tx_mapping_t),
2630 SFXGE_CPU_CACHE_SIZE, sfxge_tx_mapping_ctor, sfxge_tx_mapping_dtor,
2631 NULL, sp, NULL, 0);
2632 ASSERT(sp->s_tmc != NULL);
2633
2634 (void) snprintf(name, MAXNAMELEN - 1, "%s%d_txq_cache",
2635 ddi_driver_name(sp->s_dip), ddi_get_instance(sp->s_dip));
2636
2637 sp->s_tqc = kmem_cache_create(name, sizeof (sfxge_txq_t),
2638 SFXGE_CPU_CACHE_SIZE, sfxge_tx_qctor, sfxge_tx_qdtor, NULL, sp,
2639 NULL, 0);
2640 ASSERT(sp->s_tqc != NULL);
2641
2642 /* Initialize the transmit queues. */
2643 sp->s_tx_scale_max[SFXGE_TXQ_NON_CKSUM] = sip->si_nalloc;
2644 sp->s_tx_scale_max[SFXGE_TXQ_IP_CKSUM] = 1;
2645 sp->s_tx_scale_max[SFXGE_TXQ_IP_TCP_UDP_CKSUM] = sip->si_nalloc;
2646
2647 /* Ensure minimum queue counts required by sfxge_tx_packet_add(). */
2648 if (sp->s_tx_scale_max[SFXGE_TXQ_NON_CKSUM] < 1)
2649 sp->s_tx_scale_max[SFXGE_TXQ_NON_CKSUM] = 1;
2650
2651 if (sp->s_tx_scale_max[SFXGE_TXQ_IP_CKSUM] < 1)
2652 sp->s_tx_scale_max[SFXGE_TXQ_IP_CKSUM] = 1;
2653
2654 txq = 0;
2655 for (qtype = 0; qtype < SFXGE_TXQ_NTYPES; qtype++) {
2656 unsigned int tx_scale = sp->s_tx_scale_max[qtype];
2657
2658 if (txq + tx_scale > EFX_ARRAY_SIZE(sp->s_stp)) {
2659 rc = EINVAL;
2660 goto fail1;
2661 }
2662
2663 sp->s_tx_scale_base[qtype] = txq;
2664
2665 for (evq = 0; evq < tx_scale; evq++) {
2666 if ((rc = sfxge_tx_qinit(sp, txq, qtype, evq)) != 0) {
2667 goto fail2;
2668 }
2669 txq++;
2670 }
2671 ASSERT3U(txq, <=, EFX_ARRAY_SIZE(sp->s_stp));
2672 }
2673
2674 return (0);
2675
2676 fail2:
2677 DTRACE_PROBE(fail2);
2678
2679 fail1:
2680 DTRACE_PROBE1(fail1, int, rc);
2681
2682 index = EFX_ARRAY_SIZE(sp->s_stp);
2683 while (--index >= 0) {
2684 if (sp->s_stp[index] != NULL)
2685 sfxge_tx_qfini(sp, index);
2686 }
2687
2688 kmem_cache_destroy(sp->s_tqc);
2689 sp->s_tqc = NULL;
2690
2691 kmem_cache_destroy(sp->s_tmc);
2692 sp->s_tmc = NULL;
2693
2694 kmem_cache_destroy(sp->s_tbc);
2695 sp->s_tbc = NULL;
2696
2697 kmem_cache_destroy(sp->s_tpc);
2698 sp->s_tpc = NULL;
2699
2700 return (rc);
2701 }
2702
2703 int
sfxge_tx_start(sfxge_t * sp)2704 sfxge_tx_start(sfxge_t *sp)
2705 {
2706 efx_nic_t *enp = sp->s_enp;
2707 int index;
2708 int rc;
2709
2710 /* Initialize the transmit module */
2711 if ((rc = efx_tx_init(enp)) != 0)
2712 goto fail1;
2713
2714 for (index = 0; index < EFX_ARRAY_SIZE(sp->s_stp); index++) {
2715 if (sp->s_stp[index] != NULL)
2716 if ((rc = sfxge_tx_qstart(sp, index)) != 0)
2717 goto fail2;
2718 }
2719
2720 return (0);
2721
2722 fail2:
2723 DTRACE_PROBE(fail2);
2724
2725 sfxge_tx_stop(sp);
2726
2727 fail1:
2728 DTRACE_PROBE1(fail1, int, rc);
2729
2730 return (rc);
2731 }
2732
2733
2734 /*
2735 * Add a packet to the TX Deferred Packet List and if the TX queue lock
2736 * can be acquired then call sfxge_tx_qdpl_service() to fragment and push
2737 * to the H/W transmit descriptor ring
2738 *
2739 * If ENOSPC is returned then the DPL is full or the packet create failed, but
2740 * the mblk isn't freed so that the caller can return this mblk from mc_tx() to
2741 * back-pressure the OS stack.
2742 *
2743 * For all other errors the mblk is freed
2744 */
2745 int
sfxge_tx_packet_add(sfxge_t * sp,mblk_t * mp)2746 sfxge_tx_packet_add(sfxge_t *sp, mblk_t *mp)
2747 {
2748 struct ether_header *etherhp;
2749 struct ip *iphp;
2750 struct tcphdr *thp;
2751 size_t off;
2752 size_t size;
2753 size_t mss;
2754 sfxge_txq_t *stp;
2755 unsigned int txq;
2756 int index;
2757 boolean_t locked;
2758 sfxge_tx_packet_t *stpp;
2759 sfxge_packet_type_t pkt_type;
2760 uint16_t sport, dport;
2761 int rc = 0;
2762
2763 ASSERT3P(mp->b_next, ==, NULL);
2764 ASSERT(!(DB_CKSUMFLAGS(mp) & HCK_PARTIALCKSUM));
2765
2766 /*
2767 * Do not enqueue packets during startup/shutdown;
2768 *
2769 * NOTE: This access to the state is NOT protected by the state lock. It
2770 * is an imperfect test and anything further getting onto the get/put
2771 * deferred packet lists is cleaned up in (possibly repeated) calls to
2772 * sfxge_can_destroy().
2773 */
2774 if (sp->s_state != SFXGE_STARTED) {
2775 rc = EINVAL;
2776 goto fail1;
2777 }
2778
2779 etherhp = NULL;
2780 iphp = NULL;
2781 thp = NULL;
2782 off = 0;
2783 size = 0;
2784 mss = 0;
2785
2786 /* Check whether we need the header pointers for LSO segmentation */
2787 if (DB_LSOFLAGS(mp) & HW_LSO) {
2788 /* LSO segmentation relies on hardware checksum offload */
2789 DB_CKSUMFLAGS(mp) |= HCK_FULLCKSUM;
2790
2791 if ((mss = DB_LSOMSS(mp)) == 0) {
2792 rc = EINVAL;
2793 goto fail1;
2794 }
2795
2796 pkt_type = sfxge_pkthdr_parse(mp, ðerhp, &iphp, &thp,
2797 &off, &size, &sport, &dport);
2798
2799 if (pkt_type != SFXGE_PACKET_TYPE_IPV4_TCP ||
2800 etherhp == NULL ||
2801 iphp == NULL ||
2802 thp == NULL ||
2803 off == 0) {
2804 rc = EINVAL;
2805 goto fail2;
2806 }
2807 }
2808
2809 /* Choose the appropriate transit queue */
2810 if (DB_CKSUMFLAGS(mp) & HCK_FULLCKSUM) {
2811 sfxge_rx_scale_t *srsp = &(sp->s_rx_scale);
2812
2813 if (srsp->srs_state == SFXGE_RX_SCALE_STARTED) {
2814 uint32_t hash;
2815
2816 if (srsp->srs_count > 1) {
2817 /*
2818 * If we have not already parsed the headers
2819 * for LSO segmentation then we need to do it
2820 * now so we can calculate the hash.
2821 */
2822 if (thp == NULL) {
2823 (void) sfxge_pkthdr_parse(mp, ðerhp,
2824 &iphp, &thp, &off, &size,
2825 &sport, &dport);
2826 }
2827
2828 if (thp != NULL) {
2829 SFXGE_TCP_HASH(sp,
2830 &iphp->ip_dst.s_addr,
2831 thp->th_dport,
2832 &iphp->ip_src.s_addr,
2833 thp->th_sport, hash);
2834
2835 index = srsp->srs_tbl[hash %
2836 SFXGE_RX_SCALE_MAX];
2837 } else if (iphp != NULL) {
2838 /*
2839 * Calculate IPv4 4-tuple hash, with
2840 * TCP/UDP/SCTP src/dest ports. Ports
2841 * are zero for other IPv4 protocols.
2842 */
2843 SFXGE_IP_HASH(sp,
2844 &iphp->ip_dst.s_addr, dport,
2845 &iphp->ip_src.s_addr, sport, hash);
2846
2847 index = srsp->srs_tbl[hash %
2848 SFXGE_RX_SCALE_MAX];
2849 } else {
2850 /*
2851 * Other traffic always goes to the
2852 * the queue in the zero-th entry of
2853 * the RSS table.
2854 */
2855 index = srsp->srs_tbl[0];
2856 }
2857 } else {
2858 /*
2859 * It does not matter what the hash is
2860 * because all the RSS table entries will be
2861 * the same.
2862 */
2863 index = srsp->srs_tbl[0];
2864 }
2865
2866 /*
2867 * Find the event queue corresponding to the hash in
2868 * the RSS table.
2869 */
2870 txq = sp->s_tx_scale_base[SFXGE_TXQ_IP_TCP_UDP_CKSUM] +
2871 index;
2872 stp = sp->s_stp[txq];
2873 ASSERT3U(stp->st_evq, ==, index);
2874 } else {
2875 index = 0;
2876 txq = sp->s_tx_scale_base[SFXGE_TXQ_IP_TCP_UDP_CKSUM] +
2877 index;
2878 stp = sp->s_stp[txq];
2879 }
2880 } else if (DB_CKSUMFLAGS(mp) & HCK_IPV4_HDRCKSUM) {
2881 ASSERT3U(sp->s_tx_scale_max[SFXGE_TXQ_IP_CKSUM], >=, 1);
2882 index = 0;
2883 txq = sp->s_tx_scale_base[SFXGE_TXQ_IP_CKSUM] + index;
2884 stp = sp->s_stp[txq];
2885 } else {
2886 /*
2887 * No hardware checksum offload requested.
2888 */
2889 sfxge_rx_scale_t *srsp = &(sp->s_rx_scale);
2890
2891 if (srsp->srs_state == SFXGE_RX_SCALE_STARTED) {
2892 uint32_t hash = 0;
2893
2894 if (srsp->srs_count > 1) {
2895 if (iphp == NULL) {
2896 (void) sfxge_pkthdr_parse(mp, ðerhp,
2897 &iphp, &thp, &off, &size,
2898 &sport, &dport);
2899 }
2900
2901 if (iphp != NULL) {
2902 /*
2903 * Calculate IPv4 4-tuple hash, with
2904 * TCP/UDP/SCTP src/dest ports. Ports
2905 * are zero for other IPv4 protocols.
2906 */
2907 SFXGE_IP_HASH(sp,
2908 &iphp->ip_dst.s_addr, dport,
2909 &iphp->ip_src.s_addr, sport, hash);
2910
2911 hash = hash % SFXGE_RX_SCALE_MAX;
2912 }
2913 }
2914 index = srsp->srs_tbl[hash];
2915
2916 /*
2917 * The RSS table (indexed by hash) gives the RXQ index,
2918 * (mapped 1:1 with EVQs). Find the TXQ that results in
2919 * using the same EVQ as for the RX data path.
2920 */
2921 ASSERT3U(sp->s_tx_scale_max[SFXGE_TXQ_NON_CKSUM],
2922 >, index);
2923 txq = sp->s_tx_scale_base[SFXGE_TXQ_NON_CKSUM] + index;
2924 stp = sp->s_stp[txq];
2925 ASSERT3U(stp->st_evq, ==, index);
2926 } else {
2927 ASSERT3U(sp->s_tx_scale_max[SFXGE_TXQ_NON_CKSUM], >, 0);
2928 index = 0;
2929 txq = sp->s_tx_scale_base[SFXGE_TXQ_NON_CKSUM] + index;
2930 stp = sp->s_stp[txq];
2931 }
2932
2933
2934 }
2935 ASSERT(stp != NULL);
2936
2937 ASSERT(mss == 0 || (DB_LSOFLAGS(mp) & HW_LSO));
2938
2939 /* Try to grab the lock */
2940 locked = mutex_tryenter(&(stp->st_lock));
2941
2942 if (locked) {
2943 /* Try to grab a packet from the pool */
2944 stpp = sfxge_tx_qfpp_get(stp);
2945 } else {
2946 stpp = NULL;
2947 }
2948
2949 if (stpp == NULL) {
2950 /*
2951 * Either the pool was empty or we don't have the lock so
2952 * allocate a new packet.
2953 */
2954 if ((stpp = sfxge_tx_packet_create(sp)) == NULL) {
2955 rc = ENOSPC;
2956 goto fail3;
2957 }
2958 }
2959
2960 stpp->stp_mp = mp;
2961 stpp->stp_etherhp = etherhp;
2962 stpp->stp_iphp = iphp;
2963 stpp->stp_thp = thp;
2964 stpp->stp_off = off;
2965 stpp->stp_size = size;
2966 stpp->stp_mss = mss;
2967 stpp->stp_dpl_put_len = 0;
2968
2969 rc = sfxge_tx_qdpl_add(stp, stpp, locked);
2970 if (rc != 0) {
2971 /* ENOSPC can happen for DPL get or put list is full */
2972 ASSERT3U(rc, ==, ENOSPC);
2973
2974 /*
2975 * Note; if this is the unlocked DPL put list full case there is
2976 * no need to worry about a race with locked
2977 * sfxge_tx_qdpl_swizzle() as we know that the TX DPL put list
2978 * was full and would have been swizzle'd to the TX DPL get
2979 * list; hence guaranteeing future TX completions and calls
2980 * to mac_tx_update() via sfxge_tx_qcomplete()
2981 */
2982 goto fail4;
2983 }
2984
2985 /* Try to grab the lock again */
2986 if (!locked)
2987 locked = mutex_tryenter(&(stp->st_lock));
2988
2989 if (locked) {
2990 /* Try to service the list */
2991 sfxge_tx_qdpl_service(stp);
2992 /* lock has been dropped */
2993 }
2994
2995 return (0);
2996
2997 fail4:
2998 DTRACE_PROBE(fail4);
2999 sfxge_tx_packet_destroy(sp, stpp);
3000 fail3:
3001 DTRACE_PROBE(fail3);
3002 if (locked)
3003 mutex_exit(&(stp->st_lock));
3004 fail2:
3005 DTRACE_PROBE(fail2);
3006 fail1:
3007 DTRACE_PROBE1(fail1, int, rc);
3008
3009 if (rc != ENOSPC)
3010 freemsg(mp);
3011 return (rc);
3012 }
3013
3014 void
sfxge_tx_stop(sfxge_t * sp)3015 sfxge_tx_stop(sfxge_t *sp)
3016 {
3017 efx_nic_t *enp = sp->s_enp;
3018 clock_t timeout;
3019 boolean_t wait_for_flush;
3020 int index;
3021
3022 ASSERT(mutex_owned(&(sp->s_state_lock)));
3023
3024 mutex_enter(&(sp->s_tx_flush_lock));
3025
3026 /* Flush all the queues */
3027 if (sp->s_hw_err == SFXGE_HW_OK) {
3028 wait_for_flush = B_TRUE;
3029 } else {
3030 /*
3031 * Flag indicates possible hardware failure.
3032 * Attempt flush but do not wait for it to complete.
3033 */
3034 wait_for_flush = B_FALSE;
3035 }
3036
3037 /* Prepare queues to stop and flush the hardware ring */
3038 index = EFX_ARRAY_SIZE(sp->s_stp);
3039 while (--index >= 0) {
3040 if (sp->s_stp[index] != NULL)
3041 sfxge_tx_qflush(sp, index, wait_for_flush);
3042 }
3043
3044 if (wait_for_flush == B_FALSE)
3045 goto flush_done;
3046
3047 /* Wait upto 2sec for queue flushing to complete */
3048 timeout = ddi_get_lbolt() + drv_usectohz(SFXGE_TX_QFLUSH_USEC);
3049
3050 while (sp->s_tx_flush_pending > 0) {
3051 if (cv_timedwait(&(sp->s_tx_flush_kv), &(sp->s_tx_flush_lock),
3052 timeout) < 0) {
3053 /* Timeout waiting for queues to flush */
3054 dev_info_t *dip = sp->s_dip;
3055
3056 DTRACE_PROBE(timeout);
3057 dev_err(dip, CE_NOTE,
3058 SFXGE_CMN_ERR "tx qflush timeout");
3059 break;
3060 }
3061 }
3062
3063 flush_done:
3064 sp->s_tx_flush_pending = 0;
3065 mutex_exit(&(sp->s_tx_flush_lock));
3066
3067 /* Stop all the queues */
3068 index = EFX_ARRAY_SIZE(sp->s_stp);
3069 while (--index >= 0) {
3070 if (sp->s_stp[index] != NULL)
3071 sfxge_tx_qstop(sp, index);
3072 }
3073
3074 /* Tear down the transmit module */
3075 efx_tx_fini(enp);
3076 }
3077
3078 void
sfxge_tx_fini(sfxge_t * sp)3079 sfxge_tx_fini(sfxge_t *sp)
3080 {
3081 int index;
3082
3083 index = EFX_ARRAY_SIZE(sp->s_stp);
3084 while (--index >= 0) {
3085 if (sp->s_stp[index] != NULL)
3086 sfxge_tx_qfini(sp, index);
3087 }
3088
3089 kmem_cache_destroy(sp->s_tqc);
3090 sp->s_tqc = NULL;
3091
3092 kmem_cache_destroy(sp->s_tmc);
3093 sp->s_tmc = NULL;
3094
3095 kmem_cache_destroy(sp->s_tbc);
3096 sp->s_tbc = NULL;
3097
3098 kmem_cache_destroy(sp->s_tpc);
3099 sp->s_tpc = NULL;
3100 }
3101