1 /*
2 * Copyright (c) 2008-2016 Solarflare Communications Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright notice,
11 * this list of conditions and the following disclaimer in the documentation
12 * and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
15 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
16 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
21 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
22 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
23 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
24 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 *
26 * The views and conclusions contained in the software and documentation are
27 * those of the authors and should not be interpreted as representing official
28 * policies, either expressed or implied, of the FreeBSD Project.
29 */
30
31 #include <sys/types.h>
32 #include <sys/sysmacros.h>
33 #include <sys/ddi.h>
34 #include <sys/sunddi.h>
35 #include <sys/atomic.h>
36 #include <sys/stream.h>
37 #include <sys/strsun.h>
38 #include <sys/strsubr.h>
39 #include <sys/strft.h>
40 #include <sys/ksynch.h>
41 #include <sys/ethernet.h>
42 #include <sys/crc32.h>
43 #include <sys/pattr.h>
44 #include <sys/cpu.h>
45
46 #include <sys/ethernet.h>
47 #include <inet/ip.h>
48
49 #include <netinet/in.h>
50 #include <netinet/ip.h>
51 #include <netinet/tcp.h>
52
53 #include "sfxge.h"
54
55 #include "efx.h"
56
57 /* RXQ flush response timeout (in microseconds) */
58 #define SFXGE_RX_QFLUSH_USEC (2000000)
59
60 /* RXQ flush tries in the case of failure */
61 #define SFXGE_RX_QFLUSH_TRIES (5)
62
63 /* RXQ default packet buffer preallocation (number of packet buffers) */
64 #define SFXGE_RX_QPREALLOC (0)
65
66 /* Receive packet DMA attributes */
67 static ddi_device_acc_attr_t sfxge_rx_packet_devacc = {
68
69 DDI_DEVICE_ATTR_V0, /* devacc_attr_version */
70 DDI_NEVERSWAP_ACC, /* devacc_attr_endian_flags */
71 DDI_STRICTORDER_ACC /* devacc_attr_dataorder */
72 };
73
74 static ddi_dma_attr_t sfxge_rx_packet_dma_attr = {
75 DMA_ATTR_V0, /* dma_attr_version */
76 0, /* dma_attr_addr_lo */
77 0xffffffffffffffffull, /* dma_attr_addr_hi */
78 0xffffffffffffffffull, /* dma_attr_count_max */
79 SFXGE_CPU_CACHE_SIZE, /* dma_attr_align */
80 0xffffffff, /* dma_attr_burstsizes */
81 1, /* dma_attr_minxfer */
82 0xffffffffffffffffull, /* dma_attr_maxxfer */
83 0xffffffffffffffffull, /* dma_attr_seg */
84 1, /* dma_attr_sgllen */
85 1, /* dma_attr_granular */
86 0 /* dma_attr_flags */
87 };
88
89 /* Receive queue DMA attributes */
90 static ddi_device_acc_attr_t sfxge_rxq_devacc = {
91
92 DDI_DEVICE_ATTR_V0, /* devacc_attr_version */
93 DDI_NEVERSWAP_ACC, /* devacc_attr_endian_flags */
94 DDI_STRICTORDER_ACC /* devacc_attr_dataorder */
95 };
96
97 static ddi_dma_attr_t sfxge_rxq_dma_attr = {
98 DMA_ATTR_V0, /* dma_attr_version */
99 0, /* dma_attr_addr_lo */
100 0xffffffffffffffffull, /* dma_attr_addr_hi */
101 0xffffffffffffffffull, /* dma_attr_count_max */
102 EFX_BUF_SIZE, /* dma_attr_align */
103 0xffffffff, /* dma_attr_burstsizes */
104 1, /* dma_attr_minxfer */
105 0xffffffffffffffffull, /* dma_attr_maxxfer */
106 0xffffffffffffffffull, /* dma_attr_seg */
107 1, /* dma_attr_sgllen */
108 1, /* dma_attr_granular */
109 0 /* dma_attr_flags */
110 };
111
112 /* Forward declaration */
113 static void sfxge_rx_qpreallocate(sfxge_rxq_t *srp, int nprealloc);
114
115 static int
sfxge_rx_packet_ctor(void * buf,void * arg,int kmflags)116 sfxge_rx_packet_ctor(void *buf, void *arg, int kmflags)
117 {
118 sfxge_rx_packet_t *srpp = buf;
119 sfxge_t *sp = arg;
120 dev_info_t *dip = sp->s_dip;
121 int err;
122
123 ASSERT3U(sizeof (srpp->__srp_u1.__srp_s1), <=,
124 sizeof (srpp->__srp_u1.__srp_pad));
125 ASSERT3U(sizeof (srpp->__srp_u2.__srp_s2), <=,
126 sizeof (srpp->__srp_u2.__srp_pad));
127
128 bzero(buf, sizeof (sfxge_rx_packet_t));
129
130 /* Allocate a DMA handle */
131 err = ddi_dma_alloc_handle(dip, &sfxge_rx_packet_dma_attr,
132 (kmflags == KM_SLEEP) ? DDI_DMA_SLEEP : DDI_DMA_DONTWAIT,
133 NULL, &(srpp->srp_dma_handle));
134 if (err != DDI_SUCCESS)
135 goto fail1;
136
137 return (0);
138
139 fail1:
140 DTRACE_PROBE1(fail1, int, err);
141
142 SFXGE_OBJ_CHECK(srpp, sfxge_rx_packet_t);
143
144 return (-1);
145 }
146
147 static void
sfxge_rx_packet_dtor(void * buf,void * arg)148 sfxge_rx_packet_dtor(void *buf, void *arg)
149 {
150 sfxge_rx_packet_t *srpp = buf;
151
152 _NOTE(ARGUNUSED(arg))
153
154 /* Free the DMA handle */
155 ddi_dma_free_handle(&(srpp->srp_dma_handle));
156 srpp->srp_dma_handle = NULL;
157
158 SFXGE_OBJ_CHECK(srpp, sfxge_rx_packet_t);
159 }
160
161 static int
sfxge_rx_qctor(void * buf,void * arg,int kmflags)162 sfxge_rx_qctor(void *buf, void *arg, int kmflags)
163 {
164 sfxge_rxq_t *srp = buf;
165 efsys_mem_t *esmp = &(srp->sr_mem);
166 sfxge_t *sp = arg;
167 sfxge_dma_buffer_attr_t dma_attr;
168 sfxge_rx_fpp_t *srfppp;
169 int nprealloc;
170 unsigned int id;
171 int rc;
172
173 /* Compile-time structure layout checks */
174 EFX_STATIC_ASSERT(sizeof (srp->__sr_u1.__sr_s1) <=
175 sizeof (srp->__sr_u1.__sr_pad));
176 EFX_STATIC_ASSERT(sizeof (srp->__sr_u2.__sr_s2) <=
177 sizeof (srp->__sr_u2.__sr_pad));
178 EFX_STATIC_ASSERT(sizeof (srp->__sr_u3.__sr_s3) <=
179 sizeof (srp->__sr_u3.__sr_pad));
180
181 bzero(buf, sizeof (sfxge_rxq_t));
182
183 srp->sr_sp = sp;
184
185 dma_attr.sdba_dip = sp->s_dip;
186 dma_attr.sdba_dattrp = &sfxge_rxq_dma_attr;
187 dma_attr.sdba_callback = DDI_DMA_SLEEP;
188 dma_attr.sdba_length = EFX_RXQ_SIZE(sp->s_rxq_size);
189 dma_attr.sdba_memflags = DDI_DMA_CONSISTENT;
190 dma_attr.sdba_devaccp = &sfxge_rxq_devacc;
191 dma_attr.sdba_bindflags = DDI_DMA_READ | DDI_DMA_CONSISTENT;
192 dma_attr.sdba_maxcookies = 1;
193 dma_attr.sdba_zeroinit = B_FALSE;
194
195 if ((rc = sfxge_dma_buffer_create(esmp, &dma_attr)) != 0)
196 goto fail1;
197
198 /* Allocate some buffer table entries */
199 if ((rc = sfxge_sram_buf_tbl_alloc(sp, EFX_RXQ_NBUFS(sp->s_rxq_size),
200 &(srp->sr_id))) != 0)
201 goto fail2;
202
203 /* Allocate the context array */
204 if ((srp->sr_srpp = kmem_zalloc(sizeof (sfxge_rx_packet_t *) *
205 sp->s_rxq_size, kmflags)) == NULL) {
206 rc = ENOMEM;
207 goto fail3;
208 }
209
210 /* Allocate the flow table */
211 if ((srp->sr_flow = kmem_zalloc(sizeof (sfxge_rx_flow_t) *
212 SFXGE_MAX_FLOW, kmflags)) == NULL) {
213 rc = ENOMEM;
214 goto fail4;
215 }
216
217 srp->sr_srfpp = &(srp->sr_srfp);
218 srp->sr_rto = drv_usectohz(200000);
219
220 srp->sr_mpp = &(srp->sr_mp);
221
222 /* Initialize the free packet pool */
223 srfppp = &(srp->sr_fpp);
224 if ((srfppp->srfpp_putp = kmem_zalloc(SFXGE_CPU_CACHE_SIZE *
225 SFXGE_RX_FPP_NSLOTS, kmflags)) == NULL) {
226 rc = ENOMEM;
227 goto fail5;
228 }
229 for (id = 0; id < SFXGE_RX_FPP_NSLOTS; id++) {
230 sfxge_rx_fpp_putlist_t *putp;
231 size_t off;
232
233 off = id * SFXGE_CPU_CACHE_SIZE;
234 putp = (void *)(srfppp->srfpp_putp + off);
235
236 putp->srfpl_putp = NULL;
237 putp->srfpl_putpp = &(putp->srfpl_putp);
238 mutex_init(&(putp->srfpl_lock), NULL, MUTEX_DRIVER,
239 DDI_INTR_PRI(sp->s_intr.si_intr_pri));
240 }
241
242 cv_init(&(srp->sr_flush_kv), NULL, CV_DRIVER, NULL);
243
244 /* Preallocate some packets on the free packet pool */
245 nprealloc = ddi_prop_get_int(DDI_DEV_T_ANY, sp->s_dip,
246 DDI_PROP_DONTPASS, "rx_prealloc_pkt_buffers", SFXGE_RX_QPREALLOC);
247 sfxge_rx_qpreallocate(srp, nprealloc);
248
249
250 return (0);
251
252 fail5:
253 DTRACE_PROBE(fail5);
254
255 srp->sr_mpp = NULL;
256
257 srp->sr_rto = 0;
258 srp->sr_srfpp = NULL;
259
260 /* Free the flow table */
261 kmem_free(srp->sr_flow, sizeof (sfxge_rx_flow_t) *
262 SFXGE_MAX_FLOW);
263 srp->sr_flow = NULL;
264
265 fail4:
266 DTRACE_PROBE(fail4);
267
268 /* Free the context array */
269 kmem_free(srp->sr_srpp, sizeof (sfxge_rx_packet_t *) *
270 sp->s_rxq_size);
271 srp->sr_srpp = NULL;
272
273 fail3:
274 DTRACE_PROBE(fail3);
275
276 /* Free the buffer table entries */
277 sfxge_sram_buf_tbl_free(sp, srp->sr_id,
278 EFX_RXQ_NBUFS(sp->s_rxq_size));
279 srp->sr_id = 0;
280
281 fail2:
282 DTRACE_PROBE(fail2);
283 /* Remove dma setup */
284 sfxge_dma_buffer_destroy(esmp);
285
286 fail1:
287 DTRACE_PROBE1(fail1, int, rc);
288
289 srp->sr_sp = NULL;
290
291 SFXGE_OBJ_CHECK(srp, sfxge_rxq_t);
292
293 return (-1);
294 }
295
296 static void
sfxge_rx_qdtor(void * buf,void * arg)297 sfxge_rx_qdtor(void *buf, void *arg)
298 {
299 sfxge_rxq_t *srp = buf;
300 efsys_mem_t *esmp = &(srp->sr_mem);
301 sfxge_t *sp = srp->sr_sp;
302 sfxge_rx_fpp_t *srfppp = &(srp->sr_fpp);
303 unsigned int id;
304
305 _NOTE(ARGUNUSED(arg))
306
307 cv_destroy(&(srp->sr_flush_kv));
308
309 /* Tear down the free packet pool */
310 for (id = 0; id < SFXGE_RX_FPP_NSLOTS; id++) {
311 sfxge_rx_fpp_putlist_t *putp;
312 size_t off;
313
314 off = id * SFXGE_CPU_CACHE_SIZE;
315 putp = (void *)(srfppp->srfpp_putp + off);
316
317 putp->srfpl_putpp = NULL;
318 mutex_destroy(&(putp->srfpl_lock));
319
320 SFXGE_OBJ_CHECK(putp, sfxge_rx_fpp_putlist_t);
321 }
322 kmem_free(srfppp->srfpp_putp, SFXGE_CPU_CACHE_SIZE *
323 SFXGE_RX_FPP_NSLOTS);
324 srfppp->srfpp_putp = NULL;
325
326 srp->sr_mpp = NULL;
327
328 srp->sr_rto = 0;
329 srp->sr_srfpp = NULL;
330
331 /* Free the flow table */
332 kmem_free(srp->sr_flow, sizeof (sfxge_rx_flow_t) *
333 SFXGE_MAX_FLOW);
334 srp->sr_flow = NULL;
335
336 /* Free the context array */
337 kmem_free(srp->sr_srpp, sizeof (sfxge_rx_packet_t *) *
338 sp->s_rxq_size);
339 srp->sr_srpp = NULL;
340
341 /* Free the buffer table entries */
342 sfxge_sram_buf_tbl_free(sp, srp->sr_id,
343 EFX_RXQ_NBUFS(sp->s_rxq_size));
344 srp->sr_id = 0;
345
346 /* Tear down dma setup */
347 sfxge_dma_buffer_destroy(esmp);
348
349 SFXGE_OBJ_CHECK(srp, sfxge_rxq_t);
350 }
351
352 /* Note: This function takes ownership of *srpp. */
353 static inline void
sfxge_rx_qfpp_put(sfxge_rxq_t * srp,sfxge_rx_packet_t * srpp)354 sfxge_rx_qfpp_put(sfxge_rxq_t *srp, sfxge_rx_packet_t *srpp)
355 {
356 sfxge_rx_fpp_t *srfppp = &(srp->sr_fpp);
357 mblk_t *mp = srpp->srp_mp;
358 unsigned int id;
359 size_t off;
360 sfxge_rx_fpp_putlist_t *putp;
361
362 ASSERT3P(mp->b_next, ==, NULL);
363 ASSERT3P(mp->b_prev, ==, NULL);
364
365 id = CPU->cpu_seqid & SFXGE_RX_FPP_MASK;
366 off = id * SFXGE_CPU_CACHE_SIZE;
367
368 ASSERT3P(srpp->srp_putp, ==, srfppp->srfpp_putp);
369 putp = (void *)(srpp->srp_putp + off);
370
371 mutex_enter(&(putp->srfpl_lock));
372 putp->srfpl_count++;
373 *putp->srfpl_putpp = mp;
374 putp->srfpl_putpp = &(mp->b_next);
375 mutex_exit(&(putp->srfpl_lock));
376 }
377
378 static unsigned int
sfxge_rx_qfpp_swizzle(sfxge_rxq_t * srp)379 sfxge_rx_qfpp_swizzle(sfxge_rxq_t *srp)
380 {
381 sfxge_t *sp = srp->sr_sp;
382 unsigned int index = srp->sr_index;
383 sfxge_evq_t *sep = sp->s_sep[index];
384 sfxge_rx_fpp_t *srfppp = &(srp->sr_fpp);
385 unsigned int start;
386 unsigned int id;
387 mblk_t *p;
388 mblk_t **pp;
389 unsigned int count;
390 unsigned int loaned;
391
392 ASSERT(mutex_owned(&(sep->se_lock)));
393
394 /* We want to access the put list for the current CPU last */
395 id = start = (CPU->cpu_seqid + 1) & SFXGE_RX_FPP_MASK;
396
397 do {
398 sfxge_rx_fpp_putlist_t *putp;
399 size_t off;
400
401 off = id * SFXGE_CPU_CACHE_SIZE;
402 id = (id + 1) & SFXGE_RX_FPP_MASK;
403
404 putp = (void *)(srfppp->srfpp_putp + off);
405
406 /* Acquire the put list */
407 mutex_enter(&(putp->srfpl_lock));
408
409 p = putp->srfpl_putp;
410 pp = putp->srfpl_putpp;
411 count = putp->srfpl_count;
412
413 putp->srfpl_putp = NULL;
414 putp->srfpl_putpp = &(putp->srfpl_putp);
415 putp->srfpl_count = 0;
416
417 mutex_exit(&(putp->srfpl_lock));
418
419 if (p == NULL)
420 continue;
421
422 /* Add the list to the head of the get list */
423 *pp = srfppp->srfpp_get;
424 srfppp->srfpp_get = p;
425
426 /* Adjust the counters */
427 ASSERT3U(srfppp->srfpp_loaned, >=, count);
428 srfppp->srfpp_loaned -= count;
429 srfppp->srfpp_count += count;
430
431 #if 0
432 /* NOTE: this probe is disabled because it is expensive!! */
433 DTRACE_PROBE2(count,
434 unsigned int, (id - 1) & SFXGE_RX_FPP_MASK,
435 unsigned int, count);
436 #endif
437
438 } while (id != start);
439
440 /* Return the number of packets yet to appear in the put list */
441 loaned = srfppp->srfpp_loaned;
442
443
444 return (loaned);
445 }
446
447
448 #define DB_FRTNP(mp) ((mp)->b_datap->db_frtnp)
449
450 static void
sfxge_rx_qfpp_empty(sfxge_rxq_t * srp)451 sfxge_rx_qfpp_empty(sfxge_rxq_t *srp)
452 {
453 sfxge_t *sp = srp->sr_sp;
454 unsigned int index = srp->sr_index;
455 sfxge_evq_t *sep = sp->s_sep[index];
456 sfxge_rx_fpp_t *srfppp;
457 mblk_t *mp;
458
459 mutex_enter(&(sep->se_lock));
460 srfppp = &(srp->sr_fpp);
461
462 /* Swizzle put list to get list */
463 (void) sfxge_rx_qfpp_swizzle(srp);
464 ASSERT3U(srfppp->srfpp_loaned, ==, 0);
465
466 mp = srfppp->srfpp_get;
467 srfppp->srfpp_get = NULL;
468
469 /* Free the remainder */
470 while (mp != NULL) {
471 mblk_t *next;
472 frtn_t *freep;
473 sfxge_rx_packet_t *srpp;
474
475 next = mp->b_next;
476 mp->b_next = NULL;
477
478 ASSERT3U(srfppp->srfpp_count, >, 0);
479 srfppp->srfpp_count--;
480
481 freep = DB_FRTNP(mp);
482 /*
483 * ASSERT3P(freep->free_func, ==, sfxge_rx_qpacket_free);
484 * is implied by srpp test below
485 */
486 /*LINTED*/
487 srpp = (sfxge_rx_packet_t *)(freep->free_arg);
488 ASSERT3P(srpp->srp_mp, ==, mp);
489 ASSERT3P(mp->b_cont, ==, NULL);
490 srpp->srp_recycle = B_FALSE;
491
492 freeb(mp);
493
494 mp = next;
495 }
496 ASSERT3U(srfppp->srfpp_count, ==, 0);
497
498 srfppp->srfpp_min = 0;
499
500 mutex_exit(&(sep->se_lock));
501 }
502
503 /*
504 * This is an estimate of all memory consumed per RX packet
505 * it can be inaccurate but but sp->s_rx_pkt_mem_alloc mustn't drift
506 */
507 static uint64_t
sfxge_rx_pkt_mem_approx(const sfxge_rx_packet_t * srpp)508 sfxge_rx_pkt_mem_approx(const sfxge_rx_packet_t *srpp)
509 {
510 return (srpp->srp_mblksize + sizeof (mblk_t) + sizeof (dblk_t) +
511 sizeof (sfxge_rx_packet_t));
512 }
513
514 static void
sfxge_rx_qpacket_destroy(sfxge_rxq_t * srp,sfxge_rx_packet_t * srpp)515 sfxge_rx_qpacket_destroy(sfxge_rxq_t *srp, sfxge_rx_packet_t *srpp)
516 {
517 sfxge_t *sp = srp->sr_sp;
518 int64_t delta = sfxge_rx_pkt_mem_approx(srpp);
519
520 ASSERT(!(srpp->srp_recycle));
521 ASSERT3P(srpp->srp_mp, ==, NULL);
522
523 srpp->srp_off = 0;
524 srpp->srp_thp = NULL;
525 srpp->srp_iphp = NULL;
526 srpp->srp_etherhp = NULL;
527 srpp->srp_size = 0;
528 srpp->srp_flags = 0;
529
530 bzero(&(srpp->srp_free), sizeof (frtn_t));
531
532 srpp->srp_mblksize = 0;
533 srpp->srp_base = NULL;
534
535 /* Unbind the DMA memory from the DMA handle */
536 srpp->srp_addr = 0;
537 (void) ddi_dma_unbind_handle(srpp->srp_dma_handle);
538
539 /* Free the DMA memory */
540 srpp->srp_base = NULL;
541 ddi_dma_mem_free(&(srpp->srp_acc_handle));
542 srpp->srp_acc_handle = NULL;
543
544 srpp->srp_putp = NULL;
545 srpp->srp_srp = NULL;
546
547 kmem_cache_free(sp->s_rpc, srpp);
548 if (sp->s_rx_pkt_mem_max)
549 atomic_add_64(&sp->s_rx_pkt_mem_alloc, -delta);
550 }
551
552 static void
sfxge_rx_qpacket_free(void * arg)553 sfxge_rx_qpacket_free(void *arg)
554 {
555 sfxge_rx_packet_t *srpp = arg;
556 sfxge_rxq_t *srp = srpp->srp_srp;
557
558 /*
559 * WARNING "man -s 9f esballoc" states:
560 * => runs sync from the thread calling freeb()
561 * => must not sleep, or access data structures that could be freed
562 */
563
564 /* Check whether we want to recycle the receive packets */
565 if (srpp->srp_recycle) {
566 frtn_t *freep;
567 mblk_t *mp;
568 size_t size;
569
570 freep = &(srpp->srp_free);
571 ASSERT3P(freep->free_func, ==, sfxge_rx_qpacket_free);
572 ASSERT3P(freep->free_arg, ==, (caddr_t)srpp);
573
574 /*
575 * Allocate a matching mblk_t before the current one is
576 * freed.
577 */
578 size = srpp->srp_mblksize;
579
580 if ((mp = desballoc(srpp->srp_base, size, BPRI_HI,
581 freep)) != NULL) {
582 srpp->srp_mp = mp;
583
584 /* NORMAL recycled case */
585 sfxge_rx_qfpp_put(srp, srpp);
586 return;
587 }
588 }
589
590 srpp->srp_mp = NULL;
591
592 sfxge_rx_qpacket_destroy(srp, srpp);
593 }
594
595 static sfxge_rx_packet_t *
sfxge_rx_qpacket_create(sfxge_rxq_t * srp)596 sfxge_rx_qpacket_create(sfxge_rxq_t *srp)
597 {
598 sfxge_t *sp = srp->sr_sp;
599 sfxge_rx_fpp_t *srfppp = &(srp->sr_fpp);
600 sfxge_rx_packet_t *srpp;
601 size_t size;
602 caddr_t base;
603 size_t unit;
604 ddi_dma_cookie_t dmac;
605 unsigned int ncookies;
606 frtn_t *freep;
607 mblk_t *mp;
608 int err;
609 int rc;
610
611 size = sp->s_rx_buffer_size;
612
613 if (sp->s_rx_pkt_mem_max &&
614 (sp->s_rx_pkt_mem_alloc + size >= sp->s_rx_pkt_mem_max)) {
615 DTRACE_PROBE(rx_pkt_mem_max);
616 srp->sr_kstat.srk_rx_pkt_mem_limit++;
617 return (NULL);
618 }
619
620 /* Allocate a new packet */
621 if ((srpp = kmem_cache_alloc(sp->s_rpc, KM_NOSLEEP)) == NULL) {
622 srp->sr_kstat.srk_kcache_alloc_nomem++;
623 rc = ENOMEM;
624 goto fail1;
625 }
626
627 srpp->srp_srp = srp;
628 srpp->srp_putp = srfppp->srfpp_putp;
629
630 /* Allocate some DMA memory */
631 err = ddi_dma_mem_alloc(srpp->srp_dma_handle, size,
632 &sfxge_rx_packet_devacc, DDI_DMA_STREAMING, DDI_DMA_DONTWAIT,
633 NULL, &base, &unit, &(srpp->srp_acc_handle));
634 switch (err) {
635 case DDI_SUCCESS:
636 break;
637
638 case DDI_FAILURE:
639 srp->sr_kstat.srk_dma_alloc_nomem++;
640 rc = ENOMEM;
641 goto fail2;
642
643 default:
644 srp->sr_kstat.srk_dma_alloc_fail++;
645 rc = EFAULT;
646 goto fail2;
647 }
648
649 /* Adjust the buffer to align the start of the DMA area correctly */
650 base += sp->s_rx_buffer_align;
651 size -= sp->s_rx_buffer_align;
652
653 /* Bind the DMA memory to the DMA handle */
654 err = ddi_dma_addr_bind_handle(srpp->srp_dma_handle, NULL,
655 base, size, DDI_DMA_READ | DDI_DMA_STREAMING,
656 DDI_DMA_DONTWAIT, NULL, &dmac, &ncookies);
657 switch (err) {
658 case DDI_DMA_MAPPED:
659 break;
660
661 case DDI_DMA_INUSE:
662 srp->sr_kstat.srk_dma_bind_fail++;
663 rc = EEXIST;
664 goto fail3;
665
666 case DDI_DMA_NORESOURCES:
667 srp->sr_kstat.srk_dma_bind_nomem++;
668 rc = ENOMEM;
669 goto fail3;
670
671 case DDI_DMA_NOMAPPING:
672 srp->sr_kstat.srk_dma_bind_fail++;
673 rc = ENOTSUP;
674 goto fail3;
675
676 case DDI_DMA_TOOBIG:
677 srp->sr_kstat.srk_dma_bind_fail++;
678 rc = EFBIG;
679 goto fail3;
680
681 default:
682 srp->sr_kstat.srk_dma_bind_fail++;
683 rc = EFAULT;
684 goto fail3;
685 }
686 ASSERT3U(ncookies, ==, 1);
687
688 srpp->srp_addr = dmac.dmac_laddress;
689
690 srpp->srp_base = (unsigned char *)base;
691 srpp->srp_mblksize = size;
692
693 /*
694 * Allocate a STREAMS block: We use size 1 so that the allocator will
695 * use the first (and smallest) dblk cache.
696 */
697 freep = &(srpp->srp_free);
698 freep->free_func = sfxge_rx_qpacket_free;
699 freep->free_arg = (caddr_t)srpp;
700
701 if ((mp = desballoc(srpp->srp_base, size, BPRI_HI, freep)) == NULL) {
702 srp->sr_kstat.srk_desballoc_fail++;
703 rc = ENOMEM;
704 goto fail4;
705 }
706
707 srpp->srp_mp = mp;
708 srpp->srp_recycle = B_TRUE;
709
710 if (sp->s_rx_pkt_mem_max) {
711 int64_t delta = sfxge_rx_pkt_mem_approx(srpp);
712 atomic_add_64(&sp->s_rx_pkt_mem_alloc, delta);
713 }
714
715 return (srpp);
716
717 fail4:
718 DTRACE_PROBE(fail4);
719
720 bzero(&(srpp->srp_free), sizeof (frtn_t));
721
722 srpp->srp_mblksize = 0;
723 srpp->srp_base = NULL;
724
725 /* Unbind the DMA memory from the DMA handle */
726 srpp->srp_addr = 0;
727 (void) ddi_dma_unbind_handle(srpp->srp_dma_handle);
728
729 fail3:
730 DTRACE_PROBE(fail3);
731
732 /* Free the DMA memory */
733 ddi_dma_mem_free(&(srpp->srp_acc_handle));
734 srpp->srp_acc_handle = NULL;
735
736 fail2:
737 DTRACE_PROBE(fail2);
738
739 srpp->srp_putp = NULL;
740 srpp->srp_srp = NULL;
741
742 kmem_cache_free(sp->s_rpc, srpp);
743
744 fail1:
745 DTRACE_PROBE1(fail1, int, rc);
746
747 return (NULL);
748 }
749
750 #define SFXGE_REFILL_BATCH 64
751
752 /* Try to refill the RX descriptor ring from the associated free pkt pool */
753 static void
sfxge_rx_qrefill(sfxge_rxq_t * srp,unsigned int target)754 sfxge_rx_qrefill(sfxge_rxq_t *srp, unsigned int target)
755 {
756 sfxge_t *sp = srp->sr_sp;
757 sfxge_rx_fpp_t *srfppp = &(srp->sr_fpp);
758 unsigned int index = srp->sr_index;
759 sfxge_evq_t *sep = sp->s_sep[index];
760 efsys_dma_addr_t addr[SFXGE_REFILL_BATCH];
761 mblk_t *mp;
762 int ntodo;
763 unsigned int count;
764 unsigned int batch;
765 unsigned int rxfill;
766 unsigned int mblksize;
767
768 prefetch_read_many(sp->s_enp);
769 prefetch_read_many(srp->sr_erp);
770
771 ASSERT(mutex_owned(&(sep->se_lock)));
772
773 if (srp->sr_state != SFXGE_RXQ_STARTED)
774 return;
775
776 rxfill = srp->sr_added - srp->sr_completed;
777 ASSERT3U(rxfill, <=, EFX_RXQ_LIMIT(sp->s_rxq_size));
778 ntodo = min(EFX_RXQ_LIMIT(sp->s_rxq_size) - rxfill, target);
779 ASSERT3U(ntodo, <=, EFX_RXQ_LIMIT(sp->s_rxq_size));
780
781 if (ntodo == 0)
782 goto out;
783
784 (void) sfxge_rx_qfpp_swizzle(srp);
785
786 mp = srfppp->srfpp_get;
787 count = srfppp->srfpp_count;
788 mblksize = sp->s_rx_buffer_size - sp->s_rx_buffer_align;
789
790 batch = 0;
791 while (ntodo-- > 0) {
792 mblk_t *next;
793 frtn_t *freep;
794 sfxge_rx_packet_t *srpp;
795 unsigned int id;
796
797 if (mp == NULL)
798 break;
799
800 next = mp->b_next;
801 mp->b_next = NULL;
802
803 if (next != NULL)
804 prefetch_read_many(next);
805
806 freep = DB_FRTNP(mp);
807 /*LINTED*/
808 srpp = (sfxge_rx_packet_t *)(freep->free_arg);
809 ASSERT3P(srpp->srp_mp, ==, mp);
810
811 /* The MTU may have changed since the packet was allocated */
812 if (MBLKSIZE(mp) != mblksize) {
813 srpp->srp_recycle = B_FALSE;
814
815 freeb(mp);
816
817 --count;
818 mp = next;
819 continue;
820 }
821
822 srpp->srp_off = 0;
823 srpp->srp_thp = NULL;
824 srpp->srp_iphp = NULL;
825 srpp->srp_etherhp = NULL;
826 srpp->srp_size = 0;
827 srpp->srp_flags = EFX_DISCARD;
828
829 id = (srp->sr_added + batch) & (sp->s_rxq_size - 1);
830 ASSERT(srp->sr_srpp[id] == NULL);
831 srp->sr_srpp[id] = srpp;
832
833 addr[batch++] = srpp->srp_addr;
834 if (batch == SFXGE_REFILL_BATCH) {
835 efx_rx_qpost(srp->sr_erp, addr, mblksize, batch,
836 srp->sr_completed, srp->sr_added);
837 srp->sr_added += batch;
838 batch = 0;
839 }
840
841 --count;
842 mp = next;
843 }
844
845 srfppp->srfpp_get = mp;
846 srfppp->srfpp_count = count;
847
848 if (batch != 0) {
849 efx_rx_qpost(srp->sr_erp, addr, mblksize, batch,
850 srp->sr_completed, srp->sr_added);
851 srp->sr_added += batch;
852 }
853
854 efx_rx_qpush(srp->sr_erp, srp->sr_added, &srp->sr_pushed);
855
856 out:
857 if (srfppp->srfpp_count < srfppp->srfpp_min)
858 srfppp->srfpp_min = srfppp->srfpp_count;
859 }
860
861 /* Preallocate packets and put them in the free packet pool */
862 static void
sfxge_rx_qpreallocate(sfxge_rxq_t * srp,int nprealloc)863 sfxge_rx_qpreallocate(sfxge_rxq_t *srp, int nprealloc)
864 {
865 sfxge_rx_fpp_t *srfppp = &((srp)->sr_fpp);
866 srfppp->srfpp_lowat = nprealloc;
867 while (nprealloc-- > 0) {
868 sfxge_rx_packet_t *srpp;
869
870 if ((srpp = sfxge_rx_qpacket_create(srp)) == NULL)
871 break;
872 sfxge_rx_qfpp_put(srp, srpp);
873 }
874 }
875
876 /* Try to refill the RX descriptor ring by allocating new packets */
877 static void
sfxge_rx_qfill(sfxge_rxq_t * srp,unsigned int target)878 sfxge_rx_qfill(sfxge_rxq_t *srp, unsigned int target)
879 {
880 sfxge_t *sp = srp->sr_sp;
881 unsigned int index = srp->sr_index;
882 sfxge_evq_t *sep = sp->s_sep[index];
883 unsigned int batch;
884 unsigned int rxfill;
885 unsigned int mblksize;
886 int ntodo;
887 efsys_dma_addr_t addr[SFXGE_REFILL_BATCH];
888 mblk_t *mp = NULL;
889
890 prefetch_read_many(sp->s_enp);
891 prefetch_read_many(srp->sr_erp);
892
893 ASSERT(mutex_owned(&(sep->se_lock)));
894
895 if (srp->sr_state != SFXGE_RXQ_STARTED)
896 return;
897
898 rxfill = srp->sr_added - srp->sr_completed;
899 ASSERT3U(rxfill, <=, EFX_RXQ_LIMIT(sp->s_rxq_size));
900 ntodo = min(EFX_RXQ_LIMIT(sp->s_rxq_size) - rxfill, target);
901 ASSERT3U(ntodo, <=, EFX_RXQ_LIMIT(sp->s_rxq_size));
902
903 if (ntodo == 0)
904 return;
905
906 mblksize = sp->s_rx_buffer_size - sp->s_rx_buffer_align;
907
908 batch = 0;
909 while (ntodo-- > 0) {
910 sfxge_rx_packet_t *srpp;
911 unsigned int id;
912
913 if ((srpp = sfxge_rx_qpacket_create(srp)) == NULL)
914 break;
915
916 mp = srpp->srp_mp;
917
918 ASSERT3U(MBLKSIZE(mp), ==, mblksize);
919
920 ASSERT3U(srpp->srp_off, ==, 0);
921 ASSERT3P(srpp->srp_thp, ==, NULL);
922 ASSERT3P(srpp->srp_iphp, ==, NULL);
923 ASSERT3P(srpp->srp_etherhp, ==, NULL);
924 ASSERT3U(srpp->srp_size, ==, 0);
925
926 srpp->srp_flags = EFX_DISCARD;
927
928 id = (srp->sr_added + batch) & (sp->s_rxq_size - 1);
929 ASSERT(srp->sr_srpp[id] == NULL);
930 srp->sr_srpp[id] = srpp;
931
932 addr[batch++] = srpp->srp_addr;
933 if (batch == SFXGE_REFILL_BATCH) {
934 efx_rx_qpost(srp->sr_erp, addr, mblksize, batch,
935 srp->sr_completed, srp->sr_added);
936 srp->sr_added += batch;
937 batch = 0;
938 }
939 }
940
941 if (batch != 0) {
942 efx_rx_qpost(srp->sr_erp, addr, mblksize, batch,
943 srp->sr_completed, srp->sr_added);
944 srp->sr_added += batch;
945 }
946
947 efx_rx_qpush(srp->sr_erp, srp->sr_added, &srp->sr_pushed);
948 }
949
950 void
sfxge_rx_qfpp_trim(sfxge_rxq_t * srp)951 sfxge_rx_qfpp_trim(sfxge_rxq_t *srp)
952 {
953 sfxge_rx_fpp_t *srfppp = &(srp->sr_fpp);
954 sfxge_t *sp = srp->sr_sp;
955 unsigned int index = srp->sr_index;
956 sfxge_evq_t *sep = sp->s_sep[index];
957 mblk_t *p;
958 mblk_t **pp;
959 int count;
960
961 ASSERT(mutex_owned(&(sep->se_lock)));
962
963 if (srp->sr_state != SFXGE_RXQ_STARTED)
964 goto done;
965
966 /* Make sure the queue is full */
967 sfxge_rx_qrefill(srp, EFX_RXQ_LIMIT(sp->s_rxq_size));
968
969 /* The refill may have emptied the pool */
970 if (srfppp->srfpp_min == 0)
971 goto done;
972
973 /* Don't trim below the pool's low water mark */
974 if (srfppp->srfpp_count <= srfppp->srfpp_lowat)
975 goto done;
976
977 ASSERT(srfppp->srfpp_min <= srfppp->srfpp_count);
978
979 /* Trim to the largest of srfppp->srfpp_min and srfpp->srfpp_lowat */
980 if (srfppp->srfpp_lowat > srfppp->srfpp_min)
981 count = srfppp->srfpp_count - srfppp->srfpp_lowat;
982 else
983 count = srfppp->srfpp_count - srfppp->srfpp_min;
984
985 /* Walk the get list */
986 pp = &(srfppp->srfpp_get);
987 while (--count >= 0) {
988 ASSERT(pp);
989 p = *pp;
990 ASSERT(p != NULL);
991
992 pp = &(p->b_next);
993 }
994 ASSERT(pp);
995 p = *pp;
996
997 /* Truncate the get list */
998 *pp = NULL;
999
1000 /* Free the remainder */
1001 while (p != NULL) {
1002 mblk_t *next;
1003 frtn_t *freep;
1004 sfxge_rx_packet_t *srpp;
1005
1006 next = p->b_next;
1007 p->b_next = NULL;
1008
1009 ASSERT3U(srfppp->srfpp_min, >, 0);
1010 srfppp->srfpp_min--;
1011 srfppp->srfpp_count--;
1012
1013 freep = DB_FRTNP(p);
1014 /*LINTED*/
1015 srpp = (sfxge_rx_packet_t *)(freep->free_arg);
1016 ASSERT3P(srpp->srp_mp, ==, p);
1017
1018 srpp->srp_recycle = B_FALSE;
1019
1020 freeb(p);
1021
1022 p = next;
1023 }
1024
1025 done:
1026 srfppp->srfpp_min = srfppp->srfpp_count;
1027 }
1028
1029 static void
sfxge_rx_qpoll(void * arg)1030 sfxge_rx_qpoll(void *arg)
1031 {
1032 sfxge_rxq_t *srp = arg;
1033 sfxge_t *sp = srp->sr_sp;
1034 unsigned int index = srp->sr_index;
1035 sfxge_evq_t *sep = sp->s_sep[index];
1036 uint16_t magic;
1037
1038 /*
1039 * man timeout(9f) states that this code should adhere to the
1040 * same requirements as a softirq handler - DO NOT BLOCK
1041 */
1042
1043 /*
1044 * Post an event to the event queue to cause the free packet pool to be
1045 * trimmed if it is oversize.
1046 */
1047 magic = SFXGE_MAGIC_RX_QFPP_TRIM | index;
1048
1049 #if defined(DEBUG)
1050 /* This is guaranteed due to the start/stop order of rx and ev */
1051 ASSERT3U(sep->se_state, ==, SFXGE_EVQ_STARTED);
1052 ASSERT3U(srp->sr_state, ==, SFXGE_RXQ_STARTED);
1053 #else
1054 /*
1055 * Bug22691 WORKAROUND:
1056 * This handler has been observed in the field to be invoked for a
1057 * queue in the INITIALIZED state, which should never happen.
1058 * Until the mechanism for this is properly understood, add defensive
1059 * checks.
1060 */
1061 if ((sep->se_state != SFXGE_EVQ_STARTED) ||
1062 (srp->sr_state != SFXGE_RXQ_STARTED) ||
1063 (!sep->se_eep)) {
1064 dev_err(sp->s_dip, CE_WARN, SFXGE_CMN_ERR
1065 "RXQ[%d] bad state in sfxge_rx_qpoll %d %d %p",
1066 index, sep->se_state, srp->sr_state, sep->se_eep);
1067 return;
1068 }
1069 #endif
1070 efx_ev_qpost(sep->se_eep, magic);
1071
1072 srp->sr_tid = timeout(sfxge_rx_qpoll, srp,
1073 drv_usectohz(sp->s_rxq_poll_usec));
1074 }
1075
1076 static void
sfxge_rx_qpoll_start(sfxge_rxq_t * srp)1077 sfxge_rx_qpoll_start(sfxge_rxq_t *srp)
1078 {
1079 sfxge_t *sp = srp->sr_sp;
1080 unsigned int index = srp->sr_index;
1081 sfxge_evq_t *sep = sp->s_sep[index];
1082
1083 ASSERT(mutex_owned(&(sep->se_lock)));
1084 ASSERT3U(srp->sr_state, ==, SFXGE_RXQ_STARTED);
1085
1086 /* Schedule a poll */
1087 ASSERT3P(srp->sr_tid, ==, 0);
1088 srp->sr_tid = timeout(sfxge_rx_qpoll, srp, 0);
1089 }
1090
1091 static void
sfxge_rx_qpoll_stop(sfxge_rxq_t * srp)1092 sfxge_rx_qpoll_stop(sfxge_rxq_t *srp)
1093 {
1094 sfxge_t *sp = srp->sr_sp;
1095 unsigned int index = srp->sr_index;
1096 sfxge_evq_t *sep = sp->s_sep[index];
1097 timeout_id_t tid;
1098
1099 ASSERT(mutex_owned(&(sep->se_lock)));
1100 ASSERT3U(srp->sr_state, ==, SFXGE_RXQ_STARTED);
1101
1102 /*
1103 * Cancel the qpoll timer. Care is needed as this function
1104 * can race with sfxge_rx_qpoll() for timeout id updates.
1105 *
1106 * Do not hold locks used by any timeout(9f) handlers across
1107 * calls to untimeout(9f) as this will deadlock.
1108 */
1109 tid = 0;
1110 while ((srp->sr_tid != 0) && (srp->sr_tid != tid)) {
1111 tid = srp->sr_tid;
1112 (void) untimeout(tid);
1113 }
1114 srp->sr_tid = 0;
1115 }
1116
1117 static int
sfxge_rx_kstat_update(kstat_t * ksp,int rw)1118 sfxge_rx_kstat_update(kstat_t *ksp, int rw)
1119 {
1120 sfxge_rxq_t *srp = ksp->ks_private;
1121 sfxge_t *sp = srp->sr_sp;
1122 unsigned int index = srp->sr_index;
1123 sfxge_evq_t *sep = sp->s_sep[index];
1124 kstat_named_t *knp;
1125 int rc;
1126
1127 if (rw != KSTAT_READ) {
1128 rc = EACCES;
1129 goto fail1;
1130 }
1131
1132 ASSERT(mutex_owned(&(sep->se_lock)));
1133 if (srp->sr_state != SFXGE_RXQ_STARTED)
1134 goto done;
1135
1136 knp = ksp->ks_data;
1137 /* NB pointer post-increment below */
1138 knp++->value.ui32 = srp->sr_kstat.srk_rx_pkt_mem_limit;
1139 knp++->value.ui32 = srp->sr_kstat.srk_kcache_alloc_nomem;
1140 knp++->value.ui32 = srp->sr_kstat.srk_dma_alloc_nomem;
1141 knp++->value.ui32 = srp->sr_kstat.srk_dma_alloc_fail;
1142 knp++->value.ui32 = srp->sr_kstat.srk_dma_bind_nomem;
1143 knp++->value.ui32 = srp->sr_kstat.srk_dma_bind_fail;
1144 knp++->value.ui32 = srp->sr_kstat.srk_desballoc_fail;
1145 knp++->value.ui32 = srp->sr_kstat.srk_rxq_empty_discard;
1146
1147 done:
1148 return (0);
1149
1150 fail1:
1151 DTRACE_PROBE1(fail1, int, rc);
1152
1153 return (rc);
1154 }
1155
1156 static int
sfxge_rx_kstat_init(sfxge_rxq_t * srp)1157 sfxge_rx_kstat_init(sfxge_rxq_t *srp)
1158 {
1159 sfxge_t *sp = srp->sr_sp;
1160 unsigned int index = srp->sr_index;
1161 sfxge_evq_t *sep = sp->s_sep[index];
1162 dev_info_t *dip = sp->s_dip;
1163 char name[MAXNAMELEN];
1164 kstat_t *ksp;
1165 kstat_named_t *knp;
1166 int rc;
1167
1168 /* Create the set */
1169 (void) snprintf(name, MAXNAMELEN - 1, "%s_rxq%04d",
1170 ddi_driver_name(dip), index);
1171
1172 if ((ksp = kstat_create((char *)ddi_driver_name(dip),
1173 ddi_get_instance(dip), name, "rxq", KSTAT_TYPE_NAMED,
1174 SFXGE_RX_NSTATS, 0)) == NULL) {
1175 rc = ENOMEM;
1176 goto fail1;
1177 }
1178
1179 srp->sr_ksp = ksp;
1180
1181 ksp->ks_update = sfxge_rx_kstat_update;
1182 ksp->ks_private = srp;
1183 ksp->ks_lock = &(sep->se_lock);
1184
1185 /* Initialise the named stats */
1186 knp = ksp->ks_data;
1187 kstat_named_init(knp, "rx_pkt_mem_limit", KSTAT_DATA_UINT32);
1188 knp++;
1189 kstat_named_init(knp, "kcache_alloc_nomem", KSTAT_DATA_UINT32);
1190 knp++;
1191 kstat_named_init(knp, "dma_alloc_nomem", KSTAT_DATA_UINT32);
1192 knp++;
1193 kstat_named_init(knp, "dma_alloc_fail", KSTAT_DATA_UINT32);
1194 knp++;
1195 kstat_named_init(knp, "dma_bind_nomem", KSTAT_DATA_UINT32);
1196 knp++;
1197 kstat_named_init(knp, "dma_bind_fail", KSTAT_DATA_UINT32);
1198 knp++;
1199 kstat_named_init(knp, "desballoc_fail", KSTAT_DATA_UINT32);
1200 knp++;
1201 kstat_named_init(knp, "rxq_empty_discard", KSTAT_DATA_UINT32);
1202
1203 kstat_install(ksp);
1204 return (0);
1205
1206 fail1:
1207 DTRACE_PROBE1(fail1, int, rc);
1208
1209 return (rc);
1210 }
1211
1212 static int
sfxge_rx_qinit(sfxge_t * sp,unsigned int index)1213 sfxge_rx_qinit(sfxge_t *sp, unsigned int index)
1214 {
1215 sfxge_rxq_t *srp;
1216 int rc;
1217
1218 ASSERT3U(index, <, SFXGE_RX_SCALE_MAX);
1219
1220 if ((srp = kmem_cache_alloc(sp->s_rqc, KM_SLEEP)) == NULL) {
1221 rc = ENOMEM;
1222 goto fail1;
1223 }
1224 ASSERT3U(srp->sr_state, ==, SFXGE_RXQ_UNINITIALIZED);
1225
1226 srp->sr_index = index;
1227 sp->s_srp[index] = srp;
1228
1229 if ((rc = sfxge_rx_kstat_init(srp)) != 0)
1230 goto fail2;
1231
1232 srp->sr_state = SFXGE_RXQ_INITIALIZED;
1233
1234 return (0);
1235
1236 fail2:
1237 DTRACE_PROBE(fail2);
1238 kmem_cache_free(sp->s_rqc, srp);
1239
1240 fail1:
1241 DTRACE_PROBE1(fail1, int, rc);
1242
1243 return (rc);
1244 }
1245
1246 static int
sfxge_rx_qstart(sfxge_t * sp,unsigned int index)1247 sfxge_rx_qstart(sfxge_t *sp, unsigned int index)
1248 {
1249 sfxge_evq_t *sep = sp->s_sep[index];
1250 sfxge_rxq_t *srp;
1251 efsys_mem_t *esmp;
1252 efx_nic_t *enp;
1253 unsigned int level;
1254 int rc;
1255
1256 mutex_enter(&(sep->se_lock));
1257 srp = sp->s_srp[index];
1258 enp = sp->s_enp;
1259 esmp = &(srp->sr_mem);
1260
1261 ASSERT3U(srp->sr_state, ==, SFXGE_RXQ_INITIALIZED);
1262 ASSERT3U(sep->se_state, ==, SFXGE_EVQ_STARTED);
1263
1264 /* Zero the memory */
1265 bzero(esmp->esm_base, EFX_RXQ_SIZE(sp->s_rxq_size));
1266
1267 /* Program the buffer table */
1268 if ((rc = sfxge_sram_buf_tbl_set(sp, srp->sr_id, esmp,
1269 EFX_RXQ_NBUFS(sp->s_rxq_size))) != 0)
1270 goto fail1;
1271
1272 /* Create the receive queue */
1273 if ((rc = efx_rx_qcreate(enp, index, index, EFX_RXQ_TYPE_DEFAULT,
1274 esmp, sp->s_rxq_size, srp->sr_id, sep->se_eep, &(srp->sr_erp)))
1275 != 0)
1276 goto fail2;
1277
1278 /* Enable the receive queue */
1279 efx_rx_qenable(srp->sr_erp);
1280
1281 /* Set the water marks */
1282 srp->sr_hiwat = EFX_RXQ_LIMIT(sp->s_rxq_size) * 9 / 10;
1283 srp->sr_lowat = srp->sr_hiwat / 2;
1284
1285 srp->sr_state = SFXGE_RXQ_STARTED;
1286 srp->sr_flush = SFXGE_FLUSH_INACTIVE;
1287
1288 sfxge_rx_qpoll_start(srp);
1289
1290 /* Try to fill the queue from the pool */
1291 sfxge_rx_qrefill(srp, EFX_RXQ_LIMIT(sp->s_rxq_size));
1292
1293 /*
1294 * If there were insufficient buffers in the pool to reach the at
1295 * least a batch then allocate some.
1296 */
1297 level = srp->sr_added - srp->sr_completed;
1298 if (level < SFXGE_RX_BATCH)
1299 sfxge_rx_qfill(srp, SFXGE_RX_BATCH);
1300
1301 mutex_exit(&(sep->se_lock));
1302
1303 return (0);
1304
1305 fail2:
1306 DTRACE_PROBE(fail2);
1307
1308 /* Clear entries from the buffer table */
1309 sfxge_sram_buf_tbl_clear(sp, srp->sr_id,
1310 EFX_RXQ_NBUFS(sp->s_rxq_size));
1311
1312 fail1:
1313 DTRACE_PROBE1(fail1, int, rc);
1314
1315 mutex_exit(&(sep->se_lock));
1316
1317 return (rc);
1318 }
1319
1320 static void
sfxge_rx_qflow_complete(sfxge_rxq_t * srp,sfxge_rx_flow_t * srfp)1321 sfxge_rx_qflow_complete(sfxge_rxq_t *srp, sfxge_rx_flow_t *srfp)
1322 {
1323 mblk_t *mp;
1324 struct ether_header *etherhp;
1325 struct ip *iphp;
1326 struct tcphdr *thp;
1327
1328 if (srfp->srf_mp == NULL)
1329 return;
1330
1331 mp = srfp->srf_mp;
1332 etherhp = srfp->srf_etherhp;
1333 iphp = srfp->srf_iphp;
1334 thp = srfp->srf_last_thp;
1335
1336 ASSERT3U(((etherhp->ether_type == htons(ETHERTYPE_VLAN)) ?
1337 sizeof (struct ether_vlan_header) :
1338 sizeof (struct ether_header)) +
1339 srfp->srf_len, ==, msgdsize(mp));
1340
1341 ASSERT3U(srfp->srf_len & 0xffff, ==, srfp->srf_len);
1342 iphp->ip_len = htons(srfp->srf_len);
1343
1344 srfp->srf_first_thp->th_ack = thp->th_ack;
1345 srfp->srf_first_thp->th_win = thp->th_win;
1346 srfp->srf_first_thp->th_flags = thp->th_flags;
1347
1348 DTRACE_PROBE2(flow_complete, uint32_t, srfp->srf_tag,
1349 size_t, srfp->srf_len);
1350
1351 srfp->srf_mp = NULL;
1352 srfp->srf_len = 0;
1353
1354 ASSERT(mp->b_next == NULL);
1355 *(srp->sr_mpp) = mp;
1356 srp->sr_mpp = &(mp->b_next);
1357 }
1358
1359 static boolean_t
sfxge_rx_qflow_add(sfxge_rxq_t * srp,sfxge_rx_flow_t * srfp,sfxge_rx_packet_t * srpp,clock_t now)1360 sfxge_rx_qflow_add(sfxge_rxq_t *srp, sfxge_rx_flow_t *srfp,
1361 sfxge_rx_packet_t *srpp, clock_t now)
1362 {
1363 sfxge_t *sp = srp->sr_sp;
1364 struct ether_header *etherhp = srpp->srp_etherhp;
1365 struct ip *iphp = srpp->srp_iphp;
1366 struct tcphdr *thp = srpp->srp_thp;
1367 size_t off = srpp->srp_off;
1368 size_t size = (size_t)(srpp->srp_size);
1369 mblk_t *mp = srpp->srp_mp;
1370 uint32_t seq;
1371 unsigned int shift;
1372
1373 ASSERT3U(MBLKL(mp), ==, off + size);
1374 ASSERT3U(DB_CKSUMFLAGS(mp), ==,
1375 HCK_FULLCKSUM | HCK_FULLCKSUM_OK | HCK_IPV4_HDRCKSUM);
1376
1377 seq = htonl(thp->th_seq);
1378
1379 /*
1380 * If the time between this segment and the last is greater than RTO
1381 * then consider this a new flow.
1382 */
1383 if (now - srfp->srf_lbolt > srp->sr_rto) {
1384 srfp->srf_count = 1;
1385 srfp->srf_seq = seq + size;
1386
1387 goto fail1;
1388 }
1389
1390 if (seq != srfp->srf_seq) {
1391 if (srfp->srf_count > SFXGE_SLOW_START)
1392 srfp->srf_count = SFXGE_SLOW_START;
1393
1394 srfp->srf_count >>= 1;
1395
1396 srfp->srf_count++;
1397 srfp->srf_seq = seq + size;
1398
1399 goto fail2;
1400 }
1401
1402 /* Update the in-order segment count and sequence number */
1403 srfp->srf_count++;
1404 srfp->srf_seq = seq + size;
1405
1406 /* Don't merge across pure ACK, URG, SYN or RST segments */
1407 if (size == 0 || thp->th_flags & (TH_URG | TH_SYN | TH_RST) ||
1408 thp->th_urp != 0)
1409 goto fail3;
1410
1411 /*
1412 * If the in-order segment count has not yet reached the slow-start
1413 * threshold then we cannot coalesce.
1414 */
1415 if (srfp->srf_count < SFXGE_SLOW_START)
1416 goto fail4;
1417
1418 /* Scale up the packet size from 4k (the maximum being 64k) */
1419 ASSERT3U(srfp->srf_count, >=, SFXGE_SLOW_START);
1420 shift = MIN(srfp->srf_count - SFXGE_SLOW_START + 12, 16);
1421 if (srfp->srf_len + size >= (1 << shift))
1422 sfxge_rx_qflow_complete(srp, srfp);
1423
1424 ASSERT(mp->b_cont == NULL);
1425
1426 if (srfp->srf_mp == NULL) {
1427 /* First packet in this flow */
1428 srfp->srf_etherhp = etherhp;
1429 srfp->srf_iphp = iphp;
1430 srfp->srf_first_thp = srfp->srf_last_thp = thp;
1431
1432 ASSERT3P(mp->b_cont, ==, NULL);
1433 srfp->srf_mp = mp;
1434 srfp->srf_mpp = &(mp->b_cont);
1435
1436 srfp->srf_len = ntohs(iphp->ip_len);
1437
1438 /*
1439 * If the flow is not already in the list of occupied flows then
1440 * add it.
1441 */
1442 if (srfp->srf_next == NULL &&
1443 srp->sr_srfpp != &(srfp->srf_next)) {
1444 *(srp->sr_srfpp) = srfp;
1445 srp->sr_srfpp = &(srfp->srf_next);
1446 }
1447 } else {
1448 /* Later packet in this flow - skip TCP header */
1449 srfp->srf_last_thp = thp;
1450
1451 mp->b_rptr += off;
1452 ASSERT3U(MBLKL(mp), ==, size);
1453
1454 ASSERT3P(mp->b_cont, ==, NULL);
1455 *(srfp->srf_mpp) = mp;
1456 srfp->srf_mpp = &(mp->b_cont);
1457
1458 srfp->srf_len += size;
1459
1460 ASSERT(srfp->srf_next != NULL ||
1461 srp->sr_srfpp == &(srfp->srf_next));
1462 }
1463
1464 DTRACE_PROBE2(flow_add, uint32_t, srfp->srf_tag, size_t, size);
1465
1466 /*
1467 * Try to align coalesced segments on push boundaries, unless they
1468 * are too frequent.
1469 */
1470 if (sp->s_rx_coalesce_mode == SFXGE_RX_COALESCE_ALLOW_PUSH &&
1471 thp->th_flags & TH_PUSH)
1472 sfxge_rx_qflow_complete(srp, srfp);
1473
1474 srfp->srf_lbolt = now;
1475 return (B_TRUE);
1476
1477 fail4:
1478 fail3:
1479 fail2:
1480 fail1:
1481 sfxge_rx_qflow_complete(srp, srfp);
1482
1483 srfp->srf_lbolt = now;
1484 return (B_FALSE);
1485 }
1486
1487 void
sfxge_rx_qpacket_coalesce(sfxge_rxq_t * srp)1488 sfxge_rx_qpacket_coalesce(sfxge_rxq_t *srp)
1489 {
1490 sfxge_t *sp = srp->sr_sp;
1491 clock_t now;
1492 mblk_t *mp;
1493 sfxge_rx_flow_t *srfp;
1494
1495 ASSERT(sp->s_rx_coalesce_mode != SFXGE_RX_COALESCE_OFF);
1496
1497 now = ddi_get_lbolt();
1498
1499 mp = srp->sr_mp;
1500
1501 srp->sr_mp = NULL;
1502 srp->sr_mpp = &(srp->sr_mp);
1503
1504 /* Start with the last flow to be appended to */
1505 srfp = *(srp->sr_srfpp);
1506
1507 while (mp != NULL) {
1508 frtn_t *freep;
1509 sfxge_rx_packet_t *srpp;
1510 struct ether_header *etherhp;
1511 struct ip *iphp;
1512 struct tcphdr *thp;
1513 size_t off;
1514 size_t size;
1515 uint16_t ether_tci;
1516 uint32_t hash;
1517 uint32_t tag;
1518 mblk_t *next;
1519 sfxge_packet_type_t pkt_type;
1520 uint16_t sport, dport;
1521
1522 next = mp->b_next;
1523 mp->b_next = NULL;
1524
1525 if (next != NULL)
1526 prefetch_read_many(next);
1527
1528 freep = DB_FRTNP(mp);
1529 /*LINTED*/
1530 srpp = (sfxge_rx_packet_t *)(freep->free_arg);
1531 ASSERT3P(srpp->srp_mp, ==, mp);
1532
1533 /* If the packet is not TCP then we cannot coalesce it */
1534 if (~(srpp->srp_flags) & EFX_PKT_TCP)
1535 goto reject;
1536
1537 /*
1538 * If the packet is not fully checksummed then we cannot
1539 * coalesce it.
1540 */
1541 if (~(srpp->srp_flags) & (EFX_CKSUM_TCPUDP | EFX_CKSUM_IPV4))
1542 goto reject;
1543
1544 /* Parse the TCP header */
1545 pkt_type = sfxge_pkthdr_parse(mp, ðerhp, &iphp, &thp, &off,
1546 &size, &sport, &dport);
1547 ASSERT(pkt_type == SFXGE_PACKET_TYPE_IPV4_TCP);
1548 ASSERT(etherhp != NULL);
1549 ASSERT(iphp != NULL);
1550 ASSERT(thp != NULL);
1551 ASSERT(off != 0);
1552
1553 if ((iphp->ip_off & ~htons(IP_DF)) != 0)
1554 goto reject;
1555
1556 if (etherhp->ether_type == htons(ETHERTYPE_VLAN)) {
1557 struct ether_vlan_header *ethervhp;
1558
1559 ethervhp = (struct ether_vlan_header *)etherhp;
1560 ether_tci = ethervhp->ether_tci;
1561 } else {
1562 ether_tci = 0;
1563 }
1564
1565 /*
1566 * Make sure any minimum length padding is stripped
1567 * before we try to add the packet to a flow.
1568 */
1569 ASSERT3U(sp->s_rx_prefix_size + MBLKL(mp), ==,
1570 (size_t)(srpp->srp_size));
1571 ASSERT3U(sp->s_rx_prefix_size + off + size, <=,
1572 (size_t)(srpp->srp_size));
1573
1574 if (sp->s_rx_prefix_size + off + size <
1575 (size_t)(srpp->srp_size))
1576 mp->b_wptr = mp->b_rptr + off + size;
1577
1578 /*
1579 * If there is no current flow, or the segment does not match
1580 * the current flow then we must attempt to look up the
1581 * correct flow in the table.
1582 */
1583 if (srfp == NULL)
1584 goto lookup;
1585
1586 if (srfp->srf_saddr != iphp->ip_src.s_addr ||
1587 srfp->srf_daddr != iphp->ip_dst.s_addr)
1588 goto lookup;
1589
1590 if (srfp->srf_sport != thp->th_sport ||
1591 srfp->srf_dport != thp->th_dport)
1592 goto lookup;
1593
1594 if (srfp->srf_tci != ether_tci)
1595 goto lookup;
1596
1597 add:
1598 ASSERT(srfp != NULL);
1599
1600 srpp->srp_etherhp = etherhp;
1601 srpp->srp_iphp = iphp;
1602 srpp->srp_thp = thp;
1603 srpp->srp_off = off;
1604
1605 ASSERT3U(size, <, (1 << 16));
1606 srpp->srp_size = (uint16_t)size;
1607
1608 /* Try to append the packet to the flow */
1609 if (!sfxge_rx_qflow_add(srp, srfp, srpp, now))
1610 goto reject;
1611
1612 mp = next;
1613 continue;
1614
1615 lookup:
1616 /*
1617 * If there is a prefix area then read the hash from that,
1618 * otherwise calculate it.
1619 */
1620 if (sp->s_rx_prefix_size != 0) {
1621 hash = efx_psuedo_hdr_hash_get(sp->s_enp,
1622 EFX_RX_HASHALG_TOEPLITZ,
1623 DB_BASE(mp));
1624 } else {
1625 SFXGE_TCP_HASH(sp,
1626 &iphp->ip_src.s_addr,
1627 thp->th_sport,
1628 &iphp->ip_dst.s_addr,
1629 thp->th_dport,
1630 hash);
1631 }
1632
1633 srfp = &(srp->sr_flow[(hash >> 6) % SFXGE_MAX_FLOW]);
1634 tag = hash + 1; /* Make sure it's not zero */
1635
1636 /*
1637 * If the flow we have found does not match the hash then
1638 * it may be an unused flow, or it may be stale.
1639 */
1640 if (tag != srfp->srf_tag) {
1641 if (srfp->srf_count != 0) {
1642 if (now - srfp->srf_lbolt <= srp->sr_rto)
1643 goto reject;
1644 }
1645
1646 if (srfp->srf_mp != NULL)
1647 goto reject;
1648
1649 /* Start a new flow */
1650 ASSERT(srfp->srf_next == NULL);
1651
1652 srfp->srf_tag = tag;
1653
1654 srfp->srf_saddr = iphp->ip_src.s_addr;
1655 srfp->srf_daddr = iphp->ip_dst.s_addr;
1656 srfp->srf_sport = thp->th_sport;
1657 srfp->srf_dport = thp->th_dport;
1658 srfp->srf_tci = ether_tci;
1659
1660 srfp->srf_count = 0;
1661 srfp->srf_seq = ntohl(thp->th_seq);
1662
1663 srfp->srf_lbolt = now;
1664 goto add;
1665 }
1666
1667 /*
1668 * If the flow we have found does match the hash then it could
1669 * still be an alias.
1670 */
1671 if (srfp->srf_saddr != iphp->ip_src.s_addr ||
1672 srfp->srf_daddr != iphp->ip_dst.s_addr)
1673 goto reject;
1674
1675 if (srfp->srf_sport != thp->th_sport ||
1676 srfp->srf_dport != thp->th_dport)
1677 goto reject;
1678
1679 if (srfp->srf_tci != ether_tci)
1680 goto reject;
1681
1682 goto add;
1683
1684 reject:
1685 *(srp->sr_mpp) = mp;
1686 srp->sr_mpp = &(mp->b_next);
1687
1688 mp = next;
1689 }
1690 }
1691
1692 void
sfxge_rx_qcomplete(sfxge_rxq_t * srp,boolean_t eop)1693 sfxge_rx_qcomplete(sfxge_rxq_t *srp, boolean_t eop)
1694 {
1695 sfxge_t *sp = srp->sr_sp;
1696 unsigned int index = srp->sr_index;
1697 sfxge_evq_t *sep = sp->s_sep[index];
1698 unsigned int completed;
1699 sfxge_rx_fpp_t *srfppp = &(srp->sr_fpp);
1700 unsigned int level;
1701
1702 ASSERT(mutex_owned(&(sep->se_lock)));
1703
1704 ASSERT(srp->sr_mp == NULL);
1705 ASSERT(srp->sr_mpp == &(srp->sr_mp));
1706
1707 completed = srp->sr_completed;
1708 while (completed != srp->sr_pending) {
1709 unsigned int id;
1710 sfxge_rx_packet_t *srpp;
1711 mblk_t *mp;
1712 size_t size;
1713 uint16_t flags;
1714 int rc;
1715
1716 id = completed++ & (sp->s_rxq_size - 1);
1717
1718 if (srp->sr_pending - completed >= 4) {
1719 unsigned int prefetch;
1720
1721 prefetch = (id + 4) & (sp->s_rxq_size - 1);
1722
1723 srpp = srp->sr_srpp[prefetch];
1724 ASSERT(srpp != NULL);
1725
1726 mp = srpp->srp_mp;
1727 prefetch_read_many(mp->b_datap);
1728 } else if (completed == srp->sr_pending) {
1729 prefetch_read_many(srp->sr_mp);
1730 }
1731
1732 srpp = srp->sr_srpp[id];
1733 ASSERT(srpp != NULL);
1734
1735 srp->sr_srpp[id] = NULL;
1736
1737 mp = srpp->srp_mp;
1738 ASSERT(mp->b_cont == NULL);
1739
1740 /* when called from sfxge_rx_qstop() */
1741 if (srp->sr_state != SFXGE_RXQ_STARTED)
1742 goto discard;
1743
1744 if (srpp->srp_flags & (EFX_ADDR_MISMATCH | EFX_DISCARD))
1745 goto discard;
1746
1747 /* Make the data visible to the kernel */
1748 rc = ddi_dma_sync(srpp->srp_dma_handle, 0,
1749 sp->s_rx_buffer_size, DDI_DMA_SYNC_FORKERNEL);
1750 ASSERT3P(rc, ==, DDI_SUCCESS);
1751
1752 /* Read the length from the psuedo header if required */
1753 if (srpp->srp_flags & EFX_PKT_PREFIX_LEN) {
1754 rc = efx_psuedo_hdr_pkt_length_get(sp->s_enp,
1755 mp->b_rptr,
1756 &srpp->srp_size);
1757 ASSERT3P(rc, ==, 0);
1758 srpp->srp_size += sp->s_rx_prefix_size;
1759 }
1760
1761 /* Set up the packet length */
1762 ASSERT3P(mp->b_rptr, ==, DB_BASE(mp));
1763 mp->b_rptr += sp->s_rx_prefix_size;
1764
1765 prefetch_read_many(mp->b_rptr);
1766
1767 ASSERT3P(mp->b_wptr, ==, DB_BASE(mp));
1768 mp->b_wptr += (size_t)(srpp->srp_size);
1769 ASSERT3P(mp->b_wptr, <=, DB_LIM(mp));
1770
1771 /* Calculate the maximum packet size */
1772 size = sp->s_mtu;
1773 size += (srpp->srp_flags & EFX_PKT_VLAN_TAGGED) ?
1774 sizeof (struct ether_vlan_header) :
1775 sizeof (struct ether_header);
1776
1777 if (MBLKL(mp) > size)
1778 goto discard;
1779
1780 /* Check for loopback packets */
1781 if (!(srpp->srp_flags & EFX_PKT_IPV4) &&
1782 !(srpp->srp_flags & EFX_PKT_IPV6)) {
1783 struct ether_header *etherhp;
1784
1785 /*LINTED*/
1786 etherhp = (struct ether_header *)(mp->b_rptr);
1787
1788 if (etherhp->ether_type ==
1789 htons(SFXGE_ETHERTYPE_LOOPBACK)) {
1790 DTRACE_PROBE(loopback);
1791
1792 srp->sr_loopback++;
1793 goto discard;
1794 }
1795 }
1796
1797 /* Set up the checksum information */
1798 flags = 0;
1799
1800 if (srpp->srp_flags & EFX_CKSUM_IPV4) {
1801 ASSERT(srpp->srp_flags & EFX_PKT_IPV4);
1802 flags |= HCK_IPV4_HDRCKSUM;
1803 }
1804
1805 if (srpp->srp_flags & EFX_CKSUM_TCPUDP) {
1806 ASSERT(srpp->srp_flags & EFX_PKT_TCP ||
1807 srpp->srp_flags & EFX_PKT_UDP);
1808 flags |= HCK_FULLCKSUM | HCK_FULLCKSUM_OK;
1809 }
1810
1811 DB_CKSUMSTART(mp) = 0;
1812 DB_CKSUMSTUFF(mp) = 0;
1813 DB_CKSUMEND(mp) = 0;
1814 DB_CKSUMFLAGS(mp) = flags;
1815 DB_CKSUM16(mp) = 0;
1816
1817 /* Add the packet to the tail of the chain */
1818 srfppp->srfpp_loaned++;
1819
1820 ASSERT(mp->b_next == NULL);
1821 *(srp->sr_mpp) = mp;
1822 srp->sr_mpp = &(mp->b_next);
1823
1824 continue;
1825
1826 discard:
1827 /* Return the packet to the pool */
1828 srfppp->srfpp_loaned++;
1829 freeb(mp); /* Equivalent to freemsg() as b_cont==0 */
1830 }
1831 srp->sr_completed = completed;
1832
1833 /* Attempt to coalesce any TCP packets */
1834 if (sp->s_rx_coalesce_mode != SFXGE_RX_COALESCE_OFF)
1835 sfxge_rx_qpacket_coalesce(srp);
1836
1837 /*
1838 * If there are any pending flows and this is the end of the
1839 * poll then they must be completed.
1840 */
1841 if (srp->sr_srfp != NULL && eop) {
1842 sfxge_rx_flow_t *srfp;
1843
1844 srfp = srp->sr_srfp;
1845
1846 srp->sr_srfp = NULL;
1847 srp->sr_srfpp = &(srp->sr_srfp);
1848
1849 do {
1850 sfxge_rx_flow_t *next;
1851
1852 next = srfp->srf_next;
1853 srfp->srf_next = NULL;
1854
1855 sfxge_rx_qflow_complete(srp, srfp);
1856
1857 srfp = next;
1858 } while (srfp != NULL);
1859 }
1860
1861 level = srp->sr_pushed - srp->sr_completed;
1862
1863 /* If there are any packets then pass them up the stack */
1864 if (srp->sr_mp != NULL) {
1865 mblk_t *mp;
1866
1867 mp = srp->sr_mp;
1868
1869 srp->sr_mp = NULL;
1870 srp->sr_mpp = &(srp->sr_mp);
1871
1872 if (level == 0) {
1873 /* Try to refill ASAP */
1874 sfxge_rx_qrefill(srp, EFX_RXQ_LIMIT(sp->s_rxq_size));
1875 level = srp->sr_pushed - srp->sr_completed;
1876 }
1877
1878 /*
1879 * If the RXQ is still empty, discard and recycle the
1880 * current entry to ensure that the ring always
1881 * contains at least one descriptor. This ensures that
1882 * the next hardware RX will trigger an event
1883 * (possibly delayed by interrupt moderation) and
1884 * trigger another refill/fill attempt.
1885 *
1886 * Note this drops a complete LRO fragment from the
1887 * start of the batch.
1888 *
1889 * Note also that copymsgchain() does not help with
1890 * resource starvation here, unless we are short of DMA
1891 * mappings.
1892 */
1893 if (level == 0) {
1894 mblk_t *nmp;
1895
1896 srp->sr_kstat.srk_rxq_empty_discard++;
1897 DTRACE_PROBE1(rxq_empty_discard, int, index);
1898 nmp = mp->b_next;
1899 if (nmp)
1900 sfxge_gld_rx_post(sp, index, nmp);
1901 /* as level==0 will swizzle,rxpost below */
1902 freemsg(mp);
1903 } else {
1904 sfxge_gld_rx_post(sp, index, mp);
1905 }
1906 }
1907
1908 /* Top up the queue if necessary */
1909 if (level < srp->sr_hiwat) {
1910 sfxge_rx_qrefill(srp, EFX_RXQ_LIMIT(sp->s_rxq_size));
1911
1912 level = srp->sr_added - srp->sr_completed;
1913 if (level < srp->sr_lowat)
1914 sfxge_rx_qfill(srp, EFX_RXQ_LIMIT(sp->s_rxq_size));
1915 }
1916 }
1917
1918 void
sfxge_rx_qflush_done(sfxge_rxq_t * srp)1919 sfxge_rx_qflush_done(sfxge_rxq_t *srp)
1920 {
1921 sfxge_t *sp = srp->sr_sp;
1922 unsigned int index = srp->sr_index;
1923 sfxge_evq_t *sep = sp->s_sep[index];
1924 boolean_t flush_pending;
1925
1926 ASSERT(mutex_owned(&(sep->se_lock)));
1927
1928 /*
1929 * Flush successful: wakeup sfxge_rx_qstop() if flush is pending.
1930 *
1931 * A delayed flush event received after RxQ stop has timed out
1932 * will be ignored, as then the flush state will not be PENDING
1933 * (see SFCbug22989).
1934 */
1935 flush_pending = (srp->sr_flush == SFXGE_FLUSH_PENDING);
1936 srp->sr_flush = SFXGE_FLUSH_DONE;
1937 if (flush_pending)
1938 cv_broadcast(&(srp->sr_flush_kv));
1939 }
1940
1941 void
sfxge_rx_qflush_failed(sfxge_rxq_t * srp)1942 sfxge_rx_qflush_failed(sfxge_rxq_t *srp)
1943 {
1944 sfxge_t *sp = srp->sr_sp;
1945 unsigned int index = srp->sr_index;
1946 sfxge_evq_t *sep = sp->s_sep[index];
1947 boolean_t flush_pending;
1948
1949 ASSERT(mutex_owned(&(sep->se_lock)));
1950
1951 /*
1952 * Flush failed: wakeup sfxge_rx_qstop() if flush is pending.
1953 *
1954 * A delayed flush event received after RxQ stop has timed out
1955 * will be ignored, as then the flush state will not be PENDING
1956 * (see SFCbug22989).
1957 */
1958 flush_pending = (srp->sr_flush == SFXGE_FLUSH_PENDING);
1959 srp->sr_flush = SFXGE_FLUSH_FAILED;
1960 if (flush_pending)
1961 cv_broadcast(&(srp->sr_flush_kv));
1962 }
1963
1964 static void
sfxge_rx_qstop(sfxge_t * sp,unsigned int index)1965 sfxge_rx_qstop(sfxge_t *sp, unsigned int index)
1966 {
1967 dev_info_t *dip = sp->s_dip;
1968 sfxge_evq_t *sep = sp->s_sep[index];
1969 sfxge_rxq_t *srp;
1970 clock_t timeout;
1971 unsigned int flush_tries = SFXGE_RX_QFLUSH_TRIES;
1972 int rc;
1973
1974 ASSERT(mutex_owned(&(sp->s_state_lock)));
1975
1976 mutex_enter(&(sep->se_lock));
1977
1978 srp = sp->s_srp[index];
1979 ASSERT3U(srp->sr_state, ==, SFXGE_RXQ_STARTED);
1980
1981 sfxge_rx_qpoll_stop(srp);
1982
1983 /* Further packets are discarded by sfxge_rx_qcomplete() */
1984 srp->sr_state = SFXGE_RXQ_INITIALIZED;
1985
1986 if (sp->s_hw_err != SFXGE_HW_OK) {
1987 /*
1988 * Flag indicates possible hardware failure.
1989 * Attempt flush but do not wait for it to complete.
1990 */
1991 srp->sr_flush = SFXGE_FLUSH_DONE;
1992 (void) efx_rx_qflush(srp->sr_erp);
1993 }
1994
1995 /* Wait upto 2sec for queue flushing to complete */
1996 timeout = ddi_get_lbolt() + drv_usectohz(SFXGE_RX_QFLUSH_USEC);
1997
1998 while (srp->sr_flush != SFXGE_FLUSH_DONE && flush_tries-- > 0) {
1999 if ((rc = efx_rx_qflush(srp->sr_erp)) != 0) {
2000 if (rc == EALREADY)
2001 srp->sr_flush = SFXGE_FLUSH_DONE;
2002 else
2003 srp->sr_flush = SFXGE_FLUSH_FAILED;
2004 break;
2005 }
2006 srp->sr_flush = SFXGE_FLUSH_PENDING;
2007 if (cv_timedwait(&(srp->sr_flush_kv), &(sep->se_lock),
2008 timeout) < 0) {
2009 /* Timeout waiting for successful or failed flush */
2010 dev_err(dip, CE_NOTE,
2011 SFXGE_CMN_ERR "rxq[%d] flush timeout", index);
2012 break;
2013 }
2014 }
2015
2016 if (srp->sr_flush == SFXGE_FLUSH_FAILED)
2017 dev_err(dip, CE_NOTE,
2018 SFXGE_CMN_ERR "rxq[%d] flush failed", index);
2019
2020 DTRACE_PROBE1(flush, sfxge_flush_state_t, srp->sr_flush);
2021 srp->sr_flush = SFXGE_FLUSH_DONE;
2022
2023 /* Destroy the receive queue */
2024 efx_rx_qdestroy(srp->sr_erp);
2025 srp->sr_erp = NULL;
2026
2027 /* Clear entries from the buffer table */
2028 sfxge_sram_buf_tbl_clear(sp, srp->sr_id,
2029 EFX_RXQ_NBUFS(sp->s_rxq_size));
2030
2031 /*
2032 * Free any unused RX packets which had descriptors on the RXQ
2033 * Packets will be discard as state != STARTED
2034 */
2035 srp->sr_pending = srp->sr_added;
2036 sfxge_rx_qcomplete(srp, B_TRUE);
2037
2038 ASSERT3U(srp->sr_completed, ==, srp->sr_pending);
2039
2040 srp->sr_added = 0;
2041 srp->sr_pushed = 0;
2042 srp->sr_pending = 0;
2043 srp->sr_completed = 0;
2044 srp->sr_loopback = 0;
2045
2046 srp->sr_lowat = 0;
2047 srp->sr_hiwat = 0;
2048
2049 mutex_exit(&(sep->se_lock));
2050 }
2051
2052 static void
sfxge_rx_kstat_fini(sfxge_rxq_t * srp)2053 sfxge_rx_kstat_fini(sfxge_rxq_t *srp)
2054 {
2055 kstat_delete(srp->sr_ksp);
2056 srp->sr_ksp = NULL;
2057 }
2058
2059 static void
sfxge_rx_qfini(sfxge_t * sp,unsigned int index)2060 sfxge_rx_qfini(sfxge_t *sp, unsigned int index)
2061 {
2062 sfxge_rxq_t *srp = sp->s_srp[index];
2063
2064 ASSERT3U(srp->sr_state, ==, SFXGE_RXQ_INITIALIZED);
2065
2066 sp->s_srp[index] = NULL;
2067 srp->sr_state = SFXGE_RXQ_UNINITIALIZED;
2068
2069 sfxge_rx_kstat_fini(srp);
2070
2071 /* Empty the pool */
2072 sfxge_rx_qfpp_empty(srp);
2073
2074 srp->sr_index = 0;
2075
2076 kmem_cache_free(sp->s_rqc, srp);
2077 }
2078
2079 static int
sfxge_rx_scale_kstat_update(kstat_t * ksp,int rw)2080 sfxge_rx_scale_kstat_update(kstat_t *ksp, int rw)
2081 {
2082 sfxge_t *sp = ksp->ks_private;
2083 sfxge_rx_scale_t *srsp = &(sp->s_rx_scale);
2084 sfxge_intr_t *sip = &(sp->s_intr);
2085 kstat_named_t *knp;
2086 unsigned int index;
2087 unsigned int entry;
2088 unsigned int *freq;
2089 int rc;
2090
2091 ASSERT(mutex_owned(&(srsp->srs_lock)));
2092
2093 if (rw != KSTAT_READ) {
2094 rc = EACCES;
2095 goto fail1;
2096 }
2097
2098 if ((freq = kmem_zalloc(sizeof (unsigned int) * sip->si_nalloc,
2099 KM_NOSLEEP)) == NULL) {
2100 rc = ENOMEM;
2101 goto fail2;
2102 }
2103
2104 for (entry = 0; entry < SFXGE_RX_SCALE_MAX; entry++) {
2105 index = srsp->srs_tbl[entry];
2106
2107 freq[index]++;
2108 }
2109
2110 knp = ksp->ks_data;
2111 for (index = 0; index < sip->si_nalloc; index++) {
2112 knp->value.ui64 = freq[index];
2113 knp++;
2114 }
2115
2116 knp->value.ui64 = srsp->srs_count;
2117
2118 kmem_free(freq, sizeof (unsigned int) * sip->si_nalloc);
2119
2120 return (0);
2121
2122 fail2:
2123 DTRACE_PROBE(fail2);
2124 fail1:
2125 DTRACE_PROBE1(fail1, int, rc);
2126 return (rc);
2127 }
2128
2129 static int
sfxge_rx_scale_kstat_init(sfxge_t * sp)2130 sfxge_rx_scale_kstat_init(sfxge_t *sp)
2131 {
2132 dev_info_t *dip = sp->s_dip;
2133 sfxge_rx_scale_t *srsp = &(sp->s_rx_scale);
2134 sfxge_intr_t *sip = &(sp->s_intr);
2135 char name[MAXNAMELEN];
2136 kstat_t *ksp;
2137 kstat_named_t *knp;
2138 unsigned int index;
2139 int rc;
2140
2141 /* Create the set */
2142 (void) snprintf(name, MAXNAMELEN - 1, "%s_rss", ddi_driver_name(dip));
2143
2144 if ((ksp = kstat_create((char *)ddi_driver_name(dip),
2145 ddi_get_instance(dip), name, "rss", KSTAT_TYPE_NAMED,
2146 sip->si_nalloc + 1, 0)) == NULL) {
2147 rc = ENOMEM;
2148 goto fail1;
2149 }
2150
2151 srsp->srs_ksp = ksp;
2152
2153 ksp->ks_update = sfxge_rx_scale_kstat_update;
2154 ksp->ks_private = sp;
2155 ksp->ks_lock = &(srsp->srs_lock);
2156
2157 /* Initialise the named stats */
2158 knp = ksp->ks_data;
2159 for (index = 0; index < sip->si_nalloc; index++) {
2160 char name[MAXNAMELEN];
2161
2162 (void) snprintf(name, MAXNAMELEN - 1, "evq%04d_count", index);
2163 kstat_named_init(knp, name, KSTAT_DATA_UINT64);
2164 knp++;
2165 }
2166
2167 kstat_named_init(knp, "scale", KSTAT_DATA_UINT64);
2168
2169 kstat_install(ksp);
2170 return (0);
2171
2172 fail1:
2173 DTRACE_PROBE1(fail1, int, rc);
2174
2175 return (rc);
2176 }
2177
2178 static void
sfxge_rx_scale_kstat_fini(sfxge_t * sp)2179 sfxge_rx_scale_kstat_fini(sfxge_t *sp)
2180 {
2181 sfxge_rx_scale_t *srsp = &(sp->s_rx_scale);
2182
2183 /* Destroy the set */
2184 kstat_delete(srsp->srs_ksp);
2185 srsp->srs_ksp = NULL;
2186 }
2187
2188
2189 unsigned int
sfxge_rx_scale_prop_get(sfxge_t * sp)2190 sfxge_rx_scale_prop_get(sfxge_t *sp)
2191 {
2192 int rx_scale;
2193
2194 rx_scale = ddi_prop_get_int(DDI_DEV_T_ANY, sp->s_dip,
2195 DDI_PROP_DONTPASS, "rx_scale_count", SFXGE_RX_SCALE_MAX);
2196 /* 0 and all -ve numbers sets to number of logical CPUs */
2197 if (rx_scale <= 0)
2198 rx_scale = ncpus;
2199
2200 return (rx_scale);
2201 }
2202
2203
2204 static int
sfxge_rx_scale_init(sfxge_t * sp)2205 sfxge_rx_scale_init(sfxge_t *sp)
2206 {
2207 sfxge_rx_scale_t *srsp = &(sp->s_rx_scale);
2208 sfxge_intr_t *sip = &(sp->s_intr);
2209 int rc;
2210
2211 ASSERT3U(srsp->srs_state, ==, SFXGE_RX_SCALE_UNINITIALIZED);
2212
2213 /* Create tables for CPU, core, cache and chip counts */
2214 srsp->srs_cpu = kmem_zalloc(sizeof (unsigned int) * NCPU, KM_SLEEP);
2215
2216 mutex_init(&(srsp->srs_lock), NULL, MUTEX_DRIVER, NULL);
2217
2218 /* We need at least one event queue */
2219 srsp->srs_count = sfxge_rx_scale_prop_get(sp);
2220 if (srsp->srs_count > sip->si_nalloc)
2221 srsp->srs_count = sip->si_nalloc;
2222 if (srsp->srs_count < 1)
2223 srsp->srs_count = 1;
2224
2225 /* Set up the kstats */
2226 if ((rc = sfxge_rx_scale_kstat_init(sp)) != 0)
2227 goto fail1;
2228
2229 srsp->srs_state = SFXGE_RX_SCALE_INITIALIZED;
2230
2231 return (0);
2232
2233 fail1:
2234 DTRACE_PROBE1(fail1, int, rc);
2235 mutex_destroy(&(srsp->srs_lock));
2236
2237 return (rc);
2238 }
2239
2240 void
sfxge_rx_scale_update(void * arg)2241 sfxge_rx_scale_update(void *arg)
2242 {
2243 sfxge_t *sp = arg;
2244 sfxge_rx_scale_t *srsp = &(sp->s_rx_scale);
2245 sfxge_intr_t *sip;
2246 processorid_t id;
2247 unsigned int count;
2248 unsigned int *tbl;
2249 unsigned int *rating;
2250 unsigned int entry;
2251 int rc;
2252
2253 mutex_enter(&(srsp->srs_lock));
2254
2255 if (srsp->srs_state != SFXGE_RX_SCALE_STARTED) {
2256 rc = EFAULT;
2257 goto fail1;
2258 }
2259
2260 if ((tbl = kmem_zalloc(sizeof (unsigned int) * SFXGE_RX_SCALE_MAX,
2261 KM_NOSLEEP)) == NULL) {
2262 rc = ENOMEM;
2263 goto fail2;
2264 }
2265
2266 sip = &(sp->s_intr);
2267 if ((rating = kmem_zalloc(sizeof (unsigned int) * sip->si_nalloc,
2268 KM_NOSLEEP)) == NULL) {
2269 rc = ENOMEM;
2270 goto fail3;
2271 }
2272
2273 mutex_enter(&cpu_lock);
2274
2275 /*
2276 * Substract any current CPU, core, cache and chip usage from the
2277 * global contention tables.
2278 */
2279 for (id = 0; id < NCPU; id++) {
2280 ASSERT3U(sfxge_cpu[id], >=, srsp->srs_cpu[id]);
2281 sfxge_cpu[id] -= srsp->srs_cpu[id];
2282 srsp->srs_cpu[id] = 0;
2283 }
2284
2285 ASSERT(srsp->srs_count != 0);
2286
2287 /* Choose as many event queues as we need */
2288 for (count = 0; count < srsp->srs_count; count++) {
2289 unsigned int index;
2290 sfxge_evq_t *sep;
2291 unsigned int choice;
2292 unsigned int choice_rating;
2293
2294 bzero(rating, sizeof (unsigned int) * sip->si_nalloc);
2295
2296 /*
2297 * Rate each event queue on its global level of CPU
2298 * contention.
2299 */
2300 for (index = 0; index < sip->si_nalloc; index++) {
2301 sep = sp->s_sep[index];
2302
2303 id = sep->se_cpu_id;
2304 rating[index] += sfxge_cpu[id];
2305 }
2306
2307 /* Choose the queue with the lowest CPU contention */
2308 choice = 0;
2309 choice_rating = rating[0];
2310
2311 for (index = 1; index < sip->si_nalloc; index++) {
2312 if (rating[index] < choice_rating) {
2313 choice = index;
2314 choice_rating = rating[index];
2315 }
2316 }
2317
2318 /* Add our choice to the condensed RSS table */
2319 tbl[count] = choice;
2320
2321 /* Add information to the global contention tables */
2322 sep = sp->s_sep[choice];
2323
2324 id = sep->se_cpu_id;
2325 srsp->srs_cpu[id]++;
2326 sfxge_cpu[id]++;
2327 }
2328
2329 mutex_exit(&cpu_lock);
2330
2331 /* Build the expanded RSS table */
2332 count = 0;
2333 for (entry = 0; entry < SFXGE_RX_SCALE_MAX; entry++) {
2334 unsigned int index;
2335
2336 index = tbl[count];
2337 count = (count + 1) % srsp->srs_count;
2338
2339 srsp->srs_tbl[entry] = index;
2340 }
2341
2342 /* Program the expanded RSS table into the hardware */
2343 (void) efx_rx_scale_tbl_set(sp->s_enp, srsp->srs_tbl,
2344 SFXGE_RX_SCALE_MAX);
2345
2346 mutex_exit(&(srsp->srs_lock));
2347 kmem_free(rating, sizeof (unsigned int) * sip->si_nalloc);
2348 kmem_free(tbl, sizeof (unsigned int) * SFXGE_RX_SCALE_MAX);
2349 return;
2350
2351 fail3:
2352 DTRACE_PROBE(fail3);
2353 kmem_free(tbl, sizeof (unsigned int) * SFXGE_RX_SCALE_MAX);
2354 fail2:
2355 DTRACE_PROBE(fail2);
2356 fail1:
2357 DTRACE_PROBE1(fail1, int, rc);
2358
2359 mutex_exit(&(srsp->srs_lock));
2360 }
2361
2362 static int
sfxge_rx_scale_start(sfxge_t * sp)2363 sfxge_rx_scale_start(sfxge_t *sp)
2364 {
2365 sfxge_rx_scale_t *srsp = &(sp->s_rx_scale);
2366 int rc;
2367
2368 mutex_enter(&(srsp->srs_lock));
2369
2370 ASSERT3U(srsp->srs_state, ==, SFXGE_RX_SCALE_INITIALIZED);
2371
2372 /* Clear down the RSS table */
2373 bzero(srsp->srs_tbl, sizeof (unsigned int) * SFXGE_RX_SCALE_MAX);
2374
2375 (void) efx_rx_scale_tbl_set(sp->s_enp, srsp->srs_tbl,
2376 SFXGE_RX_SCALE_MAX);
2377
2378 if ((rc = sfxge_toeplitz_hash_init(sp)) != 0)
2379 goto fail1;
2380
2381 srsp->srs_state = SFXGE_RX_SCALE_STARTED;
2382
2383 mutex_exit(&(srsp->srs_lock));
2384
2385 /* sfxge_t->s_state_lock held */
2386 (void) ddi_taskq_dispatch(sp->s_tqp, sfxge_rx_scale_update, sp,
2387 DDI_SLEEP);
2388
2389 return (0);
2390
2391 fail1:
2392 DTRACE_PROBE1(fail1, int, rc);
2393
2394 mutex_exit(&(srsp->srs_lock));
2395
2396 return (rc);
2397 }
2398
2399 int
sfxge_rx_scale_count_get(sfxge_t * sp,unsigned int * countp)2400 sfxge_rx_scale_count_get(sfxge_t *sp, unsigned int *countp)
2401 {
2402 sfxge_rx_scale_t *srsp = &(sp->s_rx_scale);
2403 int rc;
2404
2405 mutex_enter(&(srsp->srs_lock));
2406
2407 if (srsp->srs_state != SFXGE_RX_SCALE_INITIALIZED &&
2408 srsp->srs_state != SFXGE_RX_SCALE_STARTED) {
2409 rc = ENOTSUP;
2410 goto fail1;
2411 }
2412
2413 *countp = srsp->srs_count;
2414
2415 mutex_exit(&(srsp->srs_lock));
2416
2417 return (0);
2418
2419 fail1:
2420 DTRACE_PROBE1(fail1, int, rc);
2421
2422 mutex_exit(&(srsp->srs_lock));
2423
2424 return (rc);
2425 }
2426
2427 int
sfxge_rx_scale_count_set(sfxge_t * sp,unsigned int count)2428 sfxge_rx_scale_count_set(sfxge_t *sp, unsigned int count)
2429 {
2430 sfxge_rx_scale_t *srsp = &(sp->s_rx_scale);
2431 sfxge_intr_t *sip = &(sp->s_intr);
2432 int dispatch = 1;
2433 int rc;
2434
2435 if (count < 1 || count > sip->si_nalloc) {
2436 rc = EINVAL;
2437 goto fail1;
2438 }
2439
2440 mutex_enter(&(srsp->srs_lock));
2441
2442 if (srsp->srs_state != SFXGE_RX_SCALE_INITIALIZED &&
2443 srsp->srs_state != SFXGE_RX_SCALE_STARTED) {
2444 rc = ENOTSUP;
2445 goto fail2;
2446 }
2447
2448 srsp->srs_count = count;
2449
2450 if (srsp->srs_state != SFXGE_RX_SCALE_STARTED)
2451 dispatch = 0;
2452
2453 mutex_exit(&(srsp->srs_lock));
2454
2455 if (dispatch)
2456 /* no locks held */
2457 (void) ddi_taskq_dispatch(sp->s_tqp, sfxge_rx_scale_update, sp,
2458 DDI_SLEEP);
2459
2460 return (0);
2461
2462 fail2:
2463 DTRACE_PROBE(fail2);
2464
2465 mutex_exit(&(srsp->srs_lock));
2466
2467 fail1:
2468 DTRACE_PROBE1(fail1, int, rc);
2469
2470 return (rc);
2471 }
2472
2473 static void
sfxge_rx_scale_stop(sfxge_t * sp)2474 sfxge_rx_scale_stop(sfxge_t *sp)
2475 {
2476 sfxge_rx_scale_t *srsp = &(sp->s_rx_scale);
2477 processorid_t id;
2478
2479 mutex_enter(&(srsp->srs_lock));
2480
2481 ASSERT3U(srsp->srs_state, ==, SFXGE_RX_SCALE_STARTED);
2482
2483 srsp->srs_state = SFXGE_RX_SCALE_INITIALIZED;
2484
2485 mutex_enter(&cpu_lock);
2486
2487 /*
2488 * Substract any current CPU, core, cache and chip usage from the
2489 * global contention tables.
2490 */
2491 for (id = 0; id < NCPU; id++) {
2492 ASSERT3U(sfxge_cpu[id], >=, srsp->srs_cpu[id]);
2493 sfxge_cpu[id] -= srsp->srs_cpu[id];
2494 srsp->srs_cpu[id] = 0;
2495 }
2496
2497 mutex_exit(&cpu_lock);
2498
2499 /* Clear down the RSS table */
2500 bzero(srsp->srs_tbl, sizeof (unsigned int) * SFXGE_RX_SCALE_MAX);
2501
2502 (void) efx_rx_scale_tbl_set(sp->s_enp, srsp->srs_tbl,
2503 SFXGE_RX_SCALE_MAX);
2504
2505 mutex_exit(&(srsp->srs_lock));
2506 }
2507
2508 static void
sfxge_rx_scale_fini(sfxge_t * sp)2509 sfxge_rx_scale_fini(sfxge_t *sp)
2510 {
2511 sfxge_rx_scale_t *srsp = &(sp->s_rx_scale);
2512
2513 ASSERT3U(srsp->srs_state, ==, SFXGE_RX_SCALE_INITIALIZED);
2514
2515 srsp->srs_state = SFXGE_RX_SCALE_UNINITIALIZED;
2516
2517 /* Tear down the kstats */
2518 sfxge_rx_scale_kstat_fini(sp);
2519
2520 srsp->srs_count = 0;
2521
2522 mutex_destroy(&(srsp->srs_lock));
2523
2524 /* Destroy tables */
2525 kmem_free(srsp->srs_cpu, sizeof (unsigned int) * NCPU);
2526 srsp->srs_cpu = NULL;
2527
2528 sfxge_toeplitz_hash_fini(sp);
2529 }
2530
2531 int
sfxge_rx_init(sfxge_t * sp)2532 sfxge_rx_init(sfxge_t *sp)
2533 {
2534 sfxge_intr_t *sip = &(sp->s_intr);
2535 char name[MAXNAMELEN];
2536 int index;
2537 int rc;
2538
2539 if (sip->si_state == SFXGE_INTR_UNINITIALIZED) {
2540 rc = EINVAL;
2541 goto fail1;
2542 }
2543
2544 if ((rc = sfxge_rx_scale_init(sp)) != 0)
2545 goto fail2;
2546
2547 (void) snprintf(name, MAXNAMELEN - 1, "%s%d_rx_packet_cache",
2548 ddi_driver_name(sp->s_dip), ddi_get_instance(sp->s_dip));
2549
2550 sp->s_rpc = kmem_cache_create(name, sizeof (sfxge_rx_packet_t),
2551 SFXGE_CPU_CACHE_SIZE, sfxge_rx_packet_ctor, sfxge_rx_packet_dtor,
2552 NULL, sp, NULL, 0);
2553 ASSERT(sp->s_rpc != NULL);
2554
2555 (void) snprintf(name, MAXNAMELEN - 1, "%s%d_rxq_cache",
2556 ddi_driver_name(sp->s_dip), ddi_get_instance(sp->s_dip));
2557
2558 sp->s_rqc = kmem_cache_create(name, sizeof (sfxge_rxq_t),
2559 SFXGE_CPU_CACHE_SIZE, sfxge_rx_qctor, sfxge_rx_qdtor, NULL, sp,
2560 NULL, 0);
2561 ASSERT(sp->s_rqc != NULL);
2562
2563 sp->s_rx_pkt_mem_max = ddi_prop_get_int64(DDI_DEV_T_ANY, sp->s_dip,
2564 DDI_PROP_DONTPASS, "rx_pkt_mem_max", 0); /* disabled */
2565
2566 /* Initialize the receive queue(s) */
2567 for (index = 0; index < sip->si_nalloc; index++) {
2568 if ((rc = sfxge_rx_qinit(sp, index)) != 0)
2569 goto fail3;
2570 }
2571
2572 sp->s_rx_coalesce_mode = ddi_prop_get_int(DDI_DEV_T_ANY, sp->s_dip,
2573 DDI_PROP_DONTPASS, "rx_coalesce_mode", SFXGE_RX_COALESCE_OFF);
2574
2575 return (0);
2576
2577 fail3:
2578 DTRACE_PROBE(fail3);
2579
2580 /* Tear down the receive queue(s) */
2581 while (--index >= 0)
2582 sfxge_rx_qfini(sp, index);
2583
2584 kmem_cache_destroy(sp->s_rqc);
2585 sp->s_rqc = NULL;
2586
2587 kmem_cache_destroy(sp->s_rpc);
2588 sp->s_rpc = NULL;
2589
2590 sfxge_rx_scale_fini(sp);
2591
2592 fail2:
2593 DTRACE_PROBE(fail2);
2594 fail1:
2595 DTRACE_PROBE1(fail1, int, rc);
2596
2597 return (rc);
2598 }
2599
2600 int
sfxge_rx_start(sfxge_t * sp)2601 sfxge_rx_start(sfxge_t *sp)
2602 {
2603 sfxge_mac_t *smp = &(sp->s_mac);
2604 sfxge_intr_t *sip;
2605 const efx_nic_cfg_t *encp;
2606 size_t hdrlen, align;
2607 int index;
2608 int rc;
2609
2610 mutex_enter(&(smp->sm_lock));
2611
2612 /* Calculate the receive packet buffer size and alignment */
2613 sp->s_rx_buffer_size = EFX_MAC_PDU(sp->s_mtu);
2614
2615 encp = efx_nic_cfg_get(sp->s_enp);
2616
2617 /* Packet buffer allocations are cache line aligned */
2618 EFSYS_ASSERT3U(encp->enc_rx_buf_align_start, <=, SFXGE_CPU_CACHE_SIZE);
2619
2620 if (sp->s_family == EFX_FAMILY_HUNTINGTON) {
2621 sp->s_rx_prefix_size = encp->enc_rx_prefix_size;
2622
2623 hdrlen = sp->s_rx_prefix_size + sizeof (struct ether_header);
2624
2625 /* Ensure IP headers are 32bit aligned */
2626 sp->s_rx_buffer_align = P2ROUNDUP(hdrlen, 4) - hdrlen;
2627 sp->s_rx_buffer_size += sp->s_rx_buffer_align;
2628
2629 } else if (encp->enc_features & EFX_FEATURE_LFSR_HASH_INSERT) {
2630 sp->s_rx_prefix_size = encp->enc_rx_prefix_size;
2631
2632 /*
2633 * Place the start of the buffer a prefix length minus 2
2634 * before the start of a cache line. This ensures that the
2635 * last two bytes of the prefix (which is where the LFSR hash
2636 * is located) are in the same cache line as the headers, and
2637 * the IP header is 32-bit aligned.
2638 */
2639 sp->s_rx_buffer_align =
2640 SFXGE_CPU_CACHE_SIZE - (encp->enc_rx_prefix_size - 2);
2641 sp->s_rx_buffer_size += sp->s_rx_buffer_align;
2642 } else {
2643 sp->s_rx_prefix_size = 0;
2644
2645 /*
2646 * Place the start of the buffer 2 bytes after a cache line
2647 * boundary so that the headers fit into the cache line and
2648 * the IP header is 32-bit aligned.
2649 */
2650 hdrlen = sp->s_rx_prefix_size + sizeof (struct ether_header);
2651
2652 sp->s_rx_buffer_align = P2ROUNDUP(hdrlen, 4) - hdrlen;
2653 sp->s_rx_buffer_size += sp->s_rx_buffer_align;
2654 }
2655
2656 /* Align end of packet buffer for RX DMA end padding */
2657 align = MAX(1, encp->enc_rx_buf_align_end);
2658 EFSYS_ASSERT(ISP2(align));
2659 sp->s_rx_buffer_size = P2ROUNDUP(sp->s_rx_buffer_size, align);
2660
2661 /* Initialize the receive module */
2662 if ((rc = efx_rx_init(sp->s_enp)) != 0)
2663 goto fail1;
2664
2665 mutex_exit(&(smp->sm_lock));
2666
2667 if ((rc = sfxge_rx_scale_start(sp)) != 0)
2668 goto fail2;
2669
2670 /* Start the receive queue(s) */
2671 sip = &(sp->s_intr);
2672 for (index = 0; index < sip->si_nalloc; index++) {
2673 if ((rc = sfxge_rx_qstart(sp, index)) != 0)
2674 goto fail3;
2675 }
2676
2677 ASSERT3U(sp->s_srp[0]->sr_state, ==, SFXGE_RXQ_STARTED);
2678 /* It is sufficient to have Rx scale initialized */
2679 ASSERT3U(sp->s_rx_scale.srs_state, ==, SFXGE_RX_SCALE_STARTED);
2680 rc = efx_mac_filter_default_rxq_set(sp->s_enp, sp->s_srp[0]->sr_erp,
2681 sp->s_rx_scale.srs_count > 1);
2682 if (rc != 0)
2683 goto fail4;
2684
2685 return (0);
2686
2687 fail4:
2688 DTRACE_PROBE(fail4);
2689
2690 fail3:
2691 DTRACE_PROBE(fail3);
2692
2693 /* Stop the receive queue(s) */
2694 while (--index >= 0)
2695 sfxge_rx_qstop(sp, index);
2696
2697 sfxge_rx_scale_stop(sp);
2698
2699 fail2:
2700 DTRACE_PROBE(fail2);
2701
2702 mutex_enter(&(smp->sm_lock));
2703
2704 /* Tear down the receive module */
2705 efx_rx_fini(sp->s_enp);
2706
2707 fail1:
2708 DTRACE_PROBE1(fail1, int, rc);
2709
2710 mutex_exit(&(smp->sm_lock));
2711
2712 return (rc);
2713 }
2714
2715 void
sfxge_rx_coalesce_mode_get(sfxge_t * sp,sfxge_rx_coalesce_mode_t * modep)2716 sfxge_rx_coalesce_mode_get(sfxge_t *sp, sfxge_rx_coalesce_mode_t *modep)
2717 {
2718 *modep = sp->s_rx_coalesce_mode;
2719 }
2720
2721 int
sfxge_rx_coalesce_mode_set(sfxge_t * sp,sfxge_rx_coalesce_mode_t mode)2722 sfxge_rx_coalesce_mode_set(sfxge_t *sp, sfxge_rx_coalesce_mode_t mode)
2723 {
2724 int rc;
2725
2726 switch (mode) {
2727 case SFXGE_RX_COALESCE_OFF:
2728 case SFXGE_RX_COALESCE_DISALLOW_PUSH:
2729 case SFXGE_RX_COALESCE_ALLOW_PUSH:
2730 break;
2731
2732 default:
2733 rc = EINVAL;
2734 goto fail1;
2735 }
2736
2737 sp->s_rx_coalesce_mode = mode;
2738
2739 return (0);
2740
2741 fail1:
2742 DTRACE_PROBE1(fail1, int, rc);
2743
2744 return (rc);
2745 }
2746
2747 void
sfxge_rx_stop(sfxge_t * sp)2748 sfxge_rx_stop(sfxge_t *sp)
2749 {
2750 sfxge_mac_t *smp = &(sp->s_mac);
2751 sfxge_intr_t *sip = &(sp->s_intr);
2752 efx_nic_t *enp = sp->s_enp;
2753 int index;
2754
2755 ASSERT(mutex_owned(&(sp->s_state_lock)));
2756
2757 efx_mac_filter_default_rxq_clear(enp);
2758
2759 /* Stop the receive queue(s) */
2760 index = sip->si_nalloc;
2761 while (--index >= 0) {
2762 /* TBD: Flush RXQs in parallel; HW has limit + may need retry */
2763 sfxge_rx_qstop(sp, index);
2764 }
2765
2766 sfxge_rx_scale_stop(sp);
2767
2768 mutex_enter(&(smp->sm_lock));
2769
2770 /* Tear down the receive module */
2771 efx_rx_fini(enp);
2772
2773 sp->s_rx_buffer_align = 0;
2774 sp->s_rx_prefix_size = 0;
2775 sp->s_rx_buffer_size = 0;
2776
2777 mutex_exit(&(smp->sm_lock));
2778 }
2779
2780 unsigned int
sfxge_rx_loaned(sfxge_t * sp)2781 sfxge_rx_loaned(sfxge_t *sp)
2782 {
2783 sfxge_intr_t *sip = &(sp->s_intr);
2784 int index;
2785 unsigned int loaned;
2786
2787 ASSERT3U(sip->si_state, ==, SFXGE_INTR_INITIALIZED);
2788
2789 loaned = 0;
2790 for (index = 0; index < sip->si_nalloc; index++) {
2791 sfxge_rxq_t *srp = sp->s_srp[index];
2792 sfxge_evq_t *sep = sp->s_sep[srp->sr_index];
2793
2794 mutex_enter(&(sep->se_lock));
2795
2796 loaned += sfxge_rx_qfpp_swizzle(srp);
2797
2798 mutex_exit(&(sep->se_lock));
2799 }
2800
2801 return (loaned);
2802 }
2803
2804 void
sfxge_rx_fini(sfxge_t * sp)2805 sfxge_rx_fini(sfxge_t *sp)
2806 {
2807 sfxge_intr_t *sip = &(sp->s_intr);
2808 int index;
2809
2810 ASSERT3U(sip->si_state, ==, SFXGE_INTR_INITIALIZED);
2811
2812 sp->s_rx_coalesce_mode = SFXGE_RX_COALESCE_OFF;
2813
2814 /* Tear down the receive queue(s) */
2815 index = sip->si_nalloc;
2816 while (--index >= 0)
2817 sfxge_rx_qfini(sp, index);
2818
2819 ASSERT3U(sp->s_rx_pkt_mem_alloc, ==, 0);
2820
2821 kmem_cache_destroy(sp->s_rqc);
2822 sp->s_rqc = NULL;
2823
2824 kmem_cache_destroy(sp->s_rpc);
2825 sp->s_rpc = NULL;
2826
2827 sfxge_rx_scale_fini(sp);
2828 }
2829