1 /*
2 * This file is provided under a CDDLv1 license. When using or
3 * redistributing this file, you may do so under this license.
4 * In redistributing this file this license must be included
5 * and no other modification of this header file is permitted.
6 *
7 * CDDL LICENSE SUMMARY
8 *
9 * Copyright(c) 1999 - 2009 Intel Corporation. All rights reserved.
10 *
11 * The contents of this file are subject to the terms of Version
12 * 1.0 of the Common Development and Distribution License (the "License").
13 *
14 * You should have received a copy of the License with this software.
15 * You can obtain a copy of the License at
16 * http://www.opensolaris.org/os/licensing.
17 * See the License for the specific language governing permissions
18 * and limitations under the License.
19 */
20
21 /*
22 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
23 */
24
25 /*
26 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
27 * Copyright 2016 Joyent, Inc.
28 */
29
30 /*
31 * **********************************************************************
32 * *
33 * Module Name: *
34 * e1000g_rx.c *
35 * *
36 * Abstract: *
37 * This file contains some routines that take care of Receive *
38 * interrupt and also for the received packets it sends up to *
39 * upper layer. *
40 * It tries to do a zero copy if free buffers are available in *
41 * the pool. *
42 * *
43 * **********************************************************************
44 */
45
46 #include "e1000g_sw.h"
47 #include "e1000g_debug.h"
48
49 static p_rx_sw_packet_t e1000g_get_buf(e1000g_rx_data_t *rx_data);
50
51 /*
52 * e1000g_rxfree_func - the call-back function to reclaim rx buffer
53 *
54 * This function is called when an mp is freed by the user thru
55 * freeb call (Only for mp constructed through desballoc call)
56 * It returns back the freed buffer to the freelist
57 */
58 void
e1000g_rxfree_func(p_rx_sw_packet_t packet)59 e1000g_rxfree_func(p_rx_sw_packet_t packet)
60 {
61 e1000g_rx_data_t *rx_data;
62 private_devi_list_t *devi_node;
63 struct e1000g *Adapter;
64 uint32_t ring_cnt;
65 uint32_t ref_cnt;
66 unsigned char *address;
67
68 if (packet->ref_cnt == 0) {
69 /*
70 * This case only happens when rx buffers are being freed
71 * in e1000g_stop() and freemsg() is called.
72 */
73 return;
74 }
75
76 rx_data = (e1000g_rx_data_t *)(uintptr_t)packet->rx_data;
77
78 if (packet->mp == NULL) {
79 /*
80 * Allocate a mblk that binds to the data buffer
81 */
82 address = (unsigned char *)packet->rx_buf->address;
83 if (address != NULL) {
84 packet->mp = desballoc((unsigned char *)
85 address, packet->rx_buf->size,
86 BPRI_MED, &packet->free_rtn);
87 }
88 }
89
90 /*
91 * Enqueue the recycled packets in a recycle queue. When freelist
92 * dries up, move the entire chain of packets from recycle queue
93 * to freelist. This helps in avoiding per packet mutex contention
94 * around freelist.
95 */
96 mutex_enter(&rx_data->recycle_lock);
97 QUEUE_PUSH_TAIL(&rx_data->recycle_list, &packet->Link);
98 rx_data->recycle_freepkt++;
99 mutex_exit(&rx_data->recycle_lock);
100
101 ref_cnt = atomic_dec_32_nv(&packet->ref_cnt);
102 if (ref_cnt == 0) {
103 mutex_enter(&e1000g_rx_detach_lock);
104 e1000g_free_rx_sw_packet(packet, B_FALSE);
105
106 atomic_dec_32(&rx_data->pending_count);
107 atomic_dec_32(&e1000g_mblks_pending);
108
109 if ((rx_data->pending_count == 0) &&
110 (rx_data->flag & E1000G_RX_STOPPED)) {
111 devi_node = rx_data->priv_devi_node;
112
113 if (devi_node != NULL) {
114 ring_cnt = atomic_dec_32_nv(
115 &devi_node->pending_rx_count);
116 if ((ring_cnt == 0) &&
117 (devi_node->flag &
118 E1000G_PRIV_DEVI_DETACH)) {
119 e1000g_free_priv_devi_node(
120 devi_node);
121 }
122 } else {
123 Adapter = rx_data->rx_ring->adapter;
124 atomic_dec_32(
125 &Adapter->pending_rx_count);
126 }
127
128 e1000g_free_rx_pending_buffers(rx_data);
129 e1000g_free_rx_data(rx_data);
130 }
131 mutex_exit(&e1000g_rx_detach_lock);
132 }
133 }
134
135 /*
136 * e1000g_rx_setup - setup rx data structures
137 *
138 * This routine initializes all of the receive related
139 * structures. This includes the receive descriptors, the
140 * actual receive buffers, and the rx_sw_packet software
141 * structures.
142 */
143 void
e1000g_rx_setup(struct e1000g * Adapter)144 e1000g_rx_setup(struct e1000g *Adapter)
145 {
146 struct e1000_hw *hw;
147 p_rx_sw_packet_t packet;
148 struct e1000_rx_desc *descriptor;
149 uint32_t buf_low;
150 uint32_t buf_high;
151 uint32_t reg_val;
152 uint32_t rctl;
153 uint32_t rxdctl;
154 uint32_t ert;
155 uint16_t phy_data;
156 int i;
157 int size;
158 e1000g_rx_data_t *rx_data;
159
160 hw = &Adapter->shared;
161 rx_data = Adapter->rx_ring->rx_data;
162
163 /*
164 * zero out all of the receive buffer descriptor memory
165 * assures any previous data or status is erased
166 */
167 bzero(rx_data->rbd_area,
168 sizeof (struct e1000_rx_desc) * Adapter->rx_desc_num);
169
170 if (!Adapter->rx_buffer_setup) {
171 /* Init the list of "Receive Buffer" */
172 QUEUE_INIT_LIST(&rx_data->recv_list);
173
174 /* Init the list of "Free Receive Buffer" */
175 QUEUE_INIT_LIST(&rx_data->free_list);
176
177 /* Init the list of "Free Receive Buffer" */
178 QUEUE_INIT_LIST(&rx_data->recycle_list);
179 /*
180 * Setup Receive list and the Free list. Note that
181 * the both were allocated in one packet area.
182 */
183 packet = rx_data->packet_area;
184 descriptor = rx_data->rbd_first;
185
186 for (i = 0; i < Adapter->rx_desc_num;
187 i++, packet = packet->next, descriptor++) {
188 ASSERT(packet != NULL);
189 ASSERT(descriptor != NULL);
190 descriptor->buffer_addr =
191 packet->rx_buf->dma_address;
192
193 /* Add this rx_sw_packet to the receive list */
194 QUEUE_PUSH_TAIL(&rx_data->recv_list,
195 &packet->Link);
196 }
197
198 for (i = 0; i < Adapter->rx_freelist_num;
199 i++, packet = packet->next) {
200 ASSERT(packet != NULL);
201 /* Add this rx_sw_packet to the free list */
202 QUEUE_PUSH_TAIL(&rx_data->free_list,
203 &packet->Link);
204 }
205 rx_data->avail_freepkt = Adapter->rx_freelist_num;
206 rx_data->recycle_freepkt = 0;
207
208 Adapter->rx_buffer_setup = B_TRUE;
209 } else {
210 /* Setup the initial pointer to the first rx descriptor */
211 packet = (p_rx_sw_packet_t)
212 QUEUE_GET_HEAD(&rx_data->recv_list);
213 descriptor = rx_data->rbd_first;
214
215 for (i = 0; i < Adapter->rx_desc_num; i++) {
216 ASSERT(packet != NULL);
217 ASSERT(descriptor != NULL);
218 descriptor->buffer_addr =
219 packet->rx_buf->dma_address;
220
221 /* Get next rx_sw_packet */
222 packet = (p_rx_sw_packet_t)
223 QUEUE_GET_NEXT(&rx_data->recv_list, &packet->Link);
224 descriptor++;
225 }
226 }
227
228 E1000_WRITE_REG(&Adapter->shared, E1000_RDTR, Adapter->rx_intr_delay);
229 E1000G_DEBUGLOG_1(Adapter, E1000G_INFO_LEVEL,
230 "E1000_RDTR: 0x%x\n", Adapter->rx_intr_delay);
231 if (hw->mac.type >= e1000_82540) {
232 E1000_WRITE_REG(&Adapter->shared, E1000_RADV,
233 Adapter->rx_intr_abs_delay);
234 E1000G_DEBUGLOG_1(Adapter, E1000G_INFO_LEVEL,
235 "E1000_RADV: 0x%x\n", Adapter->rx_intr_abs_delay);
236 }
237
238 /*
239 * Setup our descriptor pointers
240 */
241 rx_data->rbd_next = rx_data->rbd_first;
242
243 size = Adapter->rx_desc_num * sizeof (struct e1000_rx_desc);
244 E1000_WRITE_REG(hw, E1000_RDLEN(0), size);
245 size = E1000_READ_REG(hw, E1000_RDLEN(0));
246
247 /* To get lower order bits */
248 buf_low = (uint32_t)rx_data->rbd_dma_addr;
249 /* To get the higher order bits */
250 buf_high = (uint32_t)(rx_data->rbd_dma_addr >> 32);
251
252 E1000_WRITE_REG(hw, E1000_RDBAH(0), buf_high);
253 E1000_WRITE_REG(hw, E1000_RDBAL(0), buf_low);
254
255 /*
256 * Setup our HW Rx Head & Tail descriptor pointers
257 */
258 E1000_WRITE_REG(hw, E1000_RDT(0),
259 (uint32_t)(rx_data->rbd_last - rx_data->rbd_first));
260 E1000_WRITE_REG(hw, E1000_RDH(0), 0);
261
262 /*
263 * Setup the Receive Control Register (RCTL), and ENABLE the
264 * receiver. The initial configuration is to: Enable the receiver,
265 * accept broadcasts, discard bad packets (and long packets),
266 * disable VLAN filter checking, set the receive descriptor
267 * minimum threshold size to 1/2, and the receive buffer size to
268 * 2k.
269 */
270 rctl = E1000_RCTL_EN | /* Enable Receive Unit */
271 E1000_RCTL_BAM | /* Accept Broadcast Packets */
272 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT) |
273 E1000_RCTL_RDMTS_HALF |
274 E1000_RCTL_LBM_NO; /* Loopback Mode = none */
275
276 if (Adapter->default_mtu > ETHERMTU)
277 rctl |= E1000_RCTL_LPE; /* Large Packet Enable bit */
278
279 if (Adapter->strip_crc)
280 rctl |= E1000_RCTL_SECRC; /* Strip Ethernet CRC */
281
282 if (Adapter->mem_workaround_82546 &&
283 ((hw->mac.type == e1000_82545) ||
284 (hw->mac.type == e1000_82546) ||
285 (hw->mac.type == e1000_82546_rev_3))) {
286 rctl |= E1000_RCTL_SZ_2048;
287 } else {
288 if ((Adapter->max_frame_size > FRAME_SIZE_UPTO_2K) &&
289 (Adapter->max_frame_size <= FRAME_SIZE_UPTO_4K))
290 rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
291 else if ((Adapter->max_frame_size > FRAME_SIZE_UPTO_4K) &&
292 (Adapter->max_frame_size <= FRAME_SIZE_UPTO_8K))
293 rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
294 else if ((Adapter->max_frame_size > FRAME_SIZE_UPTO_8K) &&
295 (Adapter->max_frame_size <= FRAME_SIZE_UPTO_16K))
296 rctl |= E1000_RCTL_SZ_16384 | E1000_RCTL_BSEX;
297 else
298 rctl |= E1000_RCTL_SZ_2048;
299 }
300
301 if (e1000_tbi_sbp_enabled_82543(hw))
302 rctl |= E1000_RCTL_SBP;
303
304 /*
305 * Enable Early Receive Threshold (ERT) on supported devices.
306 * Only takes effect when packet size is equal or larger than the
307 * specified value (in 8 byte units), e.g. using jumbo frames.
308 */
309 if ((hw->mac.type == e1000_82573) ||
310 (hw->mac.type == e1000_82574) ||
311 (hw->mac.type == e1000_ich9lan) ||
312 (hw->mac.type == e1000_ich10lan)) {
313
314 ert = E1000_ERT_2048;
315
316 /*
317 * Special modification when ERT and
318 * jumbo frames are enabled
319 */
320 if (Adapter->default_mtu > ETHERMTU) {
321 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
322 E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 0x3);
323 ert |= (1 << 13);
324 }
325
326 E1000_WRITE_REG(hw, E1000_ERT, ert);
327 }
328
329 /* Workaround errata on 82577/8 adapters with large frames */
330 if ((hw->mac.type == e1000_pchlan) &&
331 (Adapter->default_mtu > ETHERMTU)) {
332
333 (void) e1000_read_phy_reg(hw, PHY_REG(770, 26), &phy_data);
334 phy_data &= 0xfff8;
335 phy_data |= (1 << 2);
336 (void) e1000_write_phy_reg(hw, PHY_REG(770, 26), phy_data);
337
338 if (hw->phy.type == e1000_phy_82577) {
339 (void) e1000_read_phy_reg(hw, 22, &phy_data);
340 phy_data &= 0x0fff;
341 phy_data |= (1 << 14);
342 (void) e1000_write_phy_reg(hw, 0x10, 0x2823);
343 (void) e1000_write_phy_reg(hw, 0x11, 0x0003);
344 (void) e1000_write_phy_reg(hw, 22, phy_data);
345 }
346 }
347
348 /* Workaround errata on 82579 adapters with large frames */
349 if (hw->mac.type == e1000_pch2lan) {
350 boolean_t enable_jumbo = (Adapter->default_mtu > ETHERMTU ?
351 B_TRUE : B_FALSE);
352
353 if (e1000_lv_jumbo_workaround_ich8lan(hw, enable_jumbo) != 0)
354 E1000G_DEBUGLOG_0(Adapter, E1000G_INFO_LEVEL,
355 "failed to enable jumbo frame workaround mode\n");
356 }
357
358 reg_val =
359 E1000_RXCSUM_TUOFL | /* TCP/UDP checksum offload Enable */
360 E1000_RXCSUM_IPOFL; /* IP checksum offload Enable */
361
362 E1000_WRITE_REG(hw, E1000_RXCSUM, reg_val);
363
364 /*
365 * Workaround: Set bit 16 (IPv6_ExDIS) to disable the
366 * processing of received IPV6 extension headers
367 */
368 if ((hw->mac.type == e1000_82571) || (hw->mac.type == e1000_82572)) {
369 reg_val = E1000_READ_REG(hw, E1000_RFCTL);
370 reg_val |= (E1000_RFCTL_IPV6_EX_DIS |
371 E1000_RFCTL_NEW_IPV6_EXT_DIS);
372 E1000_WRITE_REG(hw, E1000_RFCTL, reg_val);
373 }
374
375 /* Write to enable the receive unit */
376 E1000_WRITE_REG(hw, E1000_RCTL, rctl);
377 }
378
379 /*
380 * e1000g_get_buf - get an rx sw packet from the free_list
381 */
382 static p_rx_sw_packet_t
e1000g_get_buf(e1000g_rx_data_t * rx_data)383 e1000g_get_buf(e1000g_rx_data_t *rx_data)
384 {
385 p_rx_sw_packet_t packet;
386 struct e1000g *Adapter;
387
388 Adapter = rx_data->rx_ring->adapter;
389
390 mutex_enter(&rx_data->freelist_lock);
391 packet = (p_rx_sw_packet_t)
392 QUEUE_POP_HEAD(&rx_data->free_list);
393 if (packet != NULL) {
394 rx_data->avail_freepkt--;
395 goto end;
396 }
397
398 /*
399 * If the freelist has no packets, check the recycle list
400 * to see if there are any available descriptor there.
401 */
402 mutex_enter(&rx_data->recycle_lock);
403 QUEUE_SWITCH(&rx_data->free_list, &rx_data->recycle_list);
404 rx_data->avail_freepkt = rx_data->recycle_freepkt;
405 rx_data->recycle_freepkt = 0;
406 mutex_exit(&rx_data->recycle_lock);
407 packet = (p_rx_sw_packet_t)QUEUE_POP_HEAD(&rx_data->free_list);
408 if (packet != NULL) {
409 rx_data->avail_freepkt--;
410 goto end;
411 }
412
413 if (Adapter->rx_freelist_num < Adapter->rx_freelist_limit) {
414 (void) e1000g_increase_rx_packets(rx_data);
415 packet = (p_rx_sw_packet_t)
416 QUEUE_POP_HEAD(&rx_data->free_list);
417 if (packet != NULL) {
418 rx_data->avail_freepkt--;
419 }
420 }
421
422 end:
423 mutex_exit(&rx_data->freelist_lock);
424 return (packet);
425 }
426
427 /*
428 * e1000g_receive - main receive routine
429 *
430 * This routine will process packets received in an interrupt
431 */
432 mblk_t *
e1000g_receive(e1000g_rx_ring_t * rx_ring,mblk_t ** tail,uint_t sz)433 e1000g_receive(e1000g_rx_ring_t *rx_ring, mblk_t **tail, uint_t sz)
434 {
435 struct e1000_hw *hw;
436 mblk_t *nmp;
437 mblk_t *ret_mp;
438 mblk_t *ret_nmp;
439 struct e1000_rx_desc *current_desc;
440 struct e1000_rx_desc *last_desc;
441 p_rx_sw_packet_t packet;
442 p_rx_sw_packet_t newpkt;
443 uint16_t length;
444 uint32_t pkt_count;
445 uint32_t desc_count;
446 boolean_t accept_frame;
447 boolean_t end_of_packet;
448 boolean_t need_copy;
449 struct e1000g *Adapter;
450 dma_buffer_t *rx_buf;
451 uint16_t cksumflags;
452 uint_t chain_sz = 0;
453 e1000g_rx_data_t *rx_data;
454 uint32_t max_size;
455 uint32_t min_size;
456
457 ret_mp = NULL;
458 ret_nmp = NULL;
459 pkt_count = 0;
460 desc_count = 0;
461 cksumflags = 0;
462
463 Adapter = rx_ring->adapter;
464 rx_data = rx_ring->rx_data;
465 hw = &Adapter->shared;
466
467 /* Sync the Rx descriptor DMA buffers */
468 (void) ddi_dma_sync(rx_data->rbd_dma_handle,
469 0, 0, DDI_DMA_SYNC_FORKERNEL);
470
471 if (e1000g_check_dma_handle(rx_data->rbd_dma_handle) != DDI_FM_OK) {
472 ddi_fm_service_impact(Adapter->dip, DDI_SERVICE_DEGRADED);
473 Adapter->e1000g_state |= E1000G_ERROR;
474 return (NULL);
475 }
476
477 current_desc = rx_data->rbd_next;
478 if (!(current_desc->status & E1000_RXD_STAT_DD)) {
479 /*
480 * don't send anything up. just clear the RFD
481 */
482 E1000G_DEBUG_STAT(rx_ring->stat_none);
483 return (NULL);
484 }
485
486 max_size = Adapter->max_frame_size - ETHERFCSL - VLAN_TAGSZ;
487 min_size = ETHERMIN;
488
489 /*
490 * Loop through the receive descriptors starting at the last known
491 * descriptor owned by the hardware that begins a packet.
492 */
493 while ((current_desc->status & E1000_RXD_STAT_DD) &&
494 (pkt_count < Adapter->rx_limit_onintr) &&
495 ((sz == E1000G_CHAIN_NO_LIMIT) || (chain_sz <= sz))) {
496
497 desc_count++;
498 /*
499 * Now this can happen in Jumbo frame situation.
500 */
501 if (current_desc->status & E1000_RXD_STAT_EOP) {
502 /* packet has EOP set */
503 end_of_packet = B_TRUE;
504 } else {
505 /*
506 * If this received buffer does not have the
507 * End-Of-Packet bit set, the received packet
508 * will consume multiple buffers. We won't send this
509 * packet upstack till we get all the related buffers.
510 */
511 end_of_packet = B_FALSE;
512 }
513
514 /*
515 * Get a pointer to the actual receive buffer
516 * The mp->b_rptr is mapped to The CurrentDescriptor
517 * Buffer Address.
518 */
519 packet =
520 (p_rx_sw_packet_t)QUEUE_POP_HEAD(&rx_data->recv_list);
521 ASSERT(packet != NULL);
522
523 rx_buf = packet->rx_buf;
524
525 length = current_desc->length;
526
527 #ifdef __sparc
528 if (packet->dma_type == USE_DVMA)
529 dvma_sync(rx_buf->dma_handle, 0,
530 DDI_DMA_SYNC_FORKERNEL);
531 else
532 (void) ddi_dma_sync(rx_buf->dma_handle,
533 E1000G_IPALIGNROOM, length,
534 DDI_DMA_SYNC_FORKERNEL);
535 #else
536 (void) ddi_dma_sync(rx_buf->dma_handle,
537 E1000G_IPALIGNROOM, length,
538 DDI_DMA_SYNC_FORKERNEL);
539 #endif
540
541 if (e1000g_check_dma_handle(
542 rx_buf->dma_handle) != DDI_FM_OK) {
543 ddi_fm_service_impact(Adapter->dip,
544 DDI_SERVICE_DEGRADED);
545 Adapter->e1000g_state |= E1000G_ERROR;
546
547 goto rx_drop;
548 }
549
550 accept_frame = (current_desc->errors == 0) ||
551 ((current_desc->errors &
552 (E1000_RXD_ERR_TCPE | E1000_RXD_ERR_IPE)) != 0);
553
554 if (hw->mac.type == e1000_82543) {
555 unsigned char last_byte;
556
557 last_byte =
558 *((unsigned char *)rx_buf->address + length - 1);
559
560 if (TBI_ACCEPT(hw,
561 current_desc->status, current_desc->errors,
562 current_desc->length, last_byte,
563 Adapter->min_frame_size, Adapter->max_frame_size)) {
564
565 e1000_tbi_adjust_stats(Adapter,
566 length, hw->mac.addr);
567
568 length--;
569 accept_frame = B_TRUE;
570 } else if (e1000_tbi_sbp_enabled_82543(hw) &&
571 (current_desc->errors == E1000_RXD_ERR_CE)) {
572 accept_frame = B_TRUE;
573 }
574 }
575
576 /*
577 * Indicate the packet to the NOS if it was good.
578 * Normally, hardware will discard bad packets for us.
579 * Check for the packet to be a valid Ethernet packet
580 */
581 if (!accept_frame) {
582 /*
583 * error in incoming packet, either the packet is not a
584 * ethernet size packet, or the packet has an error. In
585 * either case, the packet will simply be discarded.
586 */
587 E1000G_DEBUGLOG_0(Adapter, E1000G_INFO_LEVEL,
588 "Process Receive Interrupts: Error in Packet\n");
589
590 E1000G_STAT(rx_ring->stat_error);
591 /*
592 * Returning here as we are done here. There is
593 * no point in waiting for while loop to elapse
594 * and the things which were done. More efficient
595 * and less error prone...
596 */
597 goto rx_drop;
598 }
599
600 /*
601 * If the Ethernet CRC is not stripped by the hardware,
602 * we need to strip it before sending it up to the stack.
603 */
604 if (end_of_packet && !Adapter->strip_crc) {
605 if (length > ETHERFCSL) {
606 length -= ETHERFCSL;
607 } else {
608 /*
609 * If the fragment is smaller than the CRC,
610 * drop this fragment, do the processing of
611 * the end of the packet.
612 */
613 if (rx_data->rx_mblk_tail == NULL) {
614 E1000G_STAT(rx_ring->stat_crc_only_pkt);
615 goto rx_next_desc;
616 }
617
618 rx_data->rx_mblk_tail->b_wptr -=
619 ETHERFCSL - length;
620 rx_data->rx_mblk_len -=
621 ETHERFCSL - length;
622 goto rx_end_of_packet;
623 }
624 }
625
626 need_copy = B_TRUE;
627
628 if (length <= Adapter->rx_bcopy_thresh)
629 goto rx_copy;
630
631 /*
632 * Get the pre-constructed mblk that was associated
633 * to the receive data buffer.
634 */
635 if (packet->mp == NULL) {
636 packet->mp = desballoc((unsigned char *)
637 rx_buf->address, length,
638 BPRI_MED, &packet->free_rtn);
639 }
640
641 if (packet->mp != NULL) {
642 /*
643 * We have two sets of buffer pool. One associated with
644 * the Rxdescriptors and other a freelist buffer pool.
645 * Each time we get a good packet, Try to get a buffer
646 * from the freelist pool using e1000g_get_buf. If we
647 * get free buffer, then replace the descriptor buffer
648 * address with the free buffer we just got, and pass
649 * the pre-constructed mblk upstack. (note no copying)
650 *
651 * If we failed to get a free buffer, then try to
652 * allocate a new buffer(mp) and copy the recv buffer
653 * content to our newly allocated buffer(mp). Don't
654 * disturb the desriptor buffer address. (note copying)
655 */
656 newpkt = e1000g_get_buf(rx_data);
657
658 if (newpkt != NULL) {
659 /*
660 * Get the mblk associated to the data,
661 * and strip it off the sw packet.
662 */
663 nmp = packet->mp;
664 packet->mp = NULL;
665 atomic_inc_32(&packet->ref_cnt);
666
667 /*
668 * Now replace old buffer with the new
669 * one we got from free list
670 * Both the RxSwPacket as well as the
671 * Receive Buffer Descriptor will now
672 * point to this new packet.
673 */
674 packet = newpkt;
675
676 current_desc->buffer_addr =
677 newpkt->rx_buf->dma_address;
678
679 need_copy = B_FALSE;
680 } else {
681 /* EMPTY */
682 E1000G_DEBUG_STAT(rx_ring->stat_no_freepkt);
683 }
684 }
685
686 rx_copy:
687 if (need_copy) {
688 /*
689 * No buffers available on free list,
690 * bcopy the data from the buffer and
691 * keep the original buffer. Dont want to
692 * do this.. Yack but no other way
693 */
694 if ((nmp = allocb(length + E1000G_IPALIGNROOM,
695 BPRI_MED)) == NULL) {
696 /*
697 * The system has no buffers available
698 * to send up the incoming packet, hence
699 * the packet will have to be processed
700 * when there're more buffers available.
701 */
702 E1000G_STAT(rx_ring->stat_allocb_fail);
703 goto rx_drop;
704 }
705 nmp->b_rptr += E1000G_IPALIGNROOM;
706 nmp->b_wptr += E1000G_IPALIGNROOM;
707 /*
708 * The free list did not have any buffers
709 * available, so, the received packet will
710 * have to be copied into a mp and the original
711 * buffer will have to be retained for future
712 * packet reception.
713 */
714 bcopy(rx_buf->address, nmp->b_wptr, length);
715 }
716
717 ASSERT(nmp != NULL);
718 nmp->b_wptr += length;
719
720 if (rx_data->rx_mblk == NULL) {
721 /*
722 * TCP/UDP checksum offload and
723 * IP checksum offload
724 */
725 if (!(current_desc->status & E1000_RXD_STAT_IXSM)) {
726 /*
727 * Check TCP/UDP checksum
728 */
729 if ((current_desc->status &
730 E1000_RXD_STAT_TCPCS) &&
731 !(current_desc->errors &
732 E1000_RXD_ERR_TCPE))
733 cksumflags |= HCK_FULLCKSUM_OK;
734 /*
735 * Check IP Checksum
736 */
737 if ((current_desc->status &
738 E1000_RXD_STAT_IPCS) &&
739 !(current_desc->errors &
740 E1000_RXD_ERR_IPE))
741 cksumflags |= HCK_IPV4_HDRCKSUM_OK;
742 }
743 }
744
745 /*
746 * We need to maintain our packet chain in the global
747 * Adapter structure, for the Rx processing can end
748 * with a fragment that has no EOP set.
749 */
750 if (rx_data->rx_mblk == NULL) {
751 /* Get the head of the message chain */
752 rx_data->rx_mblk = nmp;
753 rx_data->rx_mblk_tail = nmp;
754 rx_data->rx_mblk_len = length;
755 } else { /* Not the first packet */
756 /* Continue adding buffers */
757 rx_data->rx_mblk_tail->b_cont = nmp;
758 rx_data->rx_mblk_tail = nmp;
759 rx_data->rx_mblk_len += length;
760 }
761 ASSERT(rx_data->rx_mblk != NULL);
762 ASSERT(rx_data->rx_mblk_tail != NULL);
763 ASSERT(rx_data->rx_mblk_tail->b_cont == NULL);
764
765 /*
766 * Now this MP is ready to travel upwards but some more
767 * fragments are coming.
768 * We will send packet upwards as soon as we get EOP
769 * set on the packet.
770 */
771 if (!end_of_packet) {
772 /*
773 * continue to get the next descriptor,
774 * Tail would be advanced at the end
775 */
776 goto rx_next_desc;
777 }
778
779 rx_end_of_packet:
780 if (E1000G_IS_VLAN_PACKET(rx_data->rx_mblk->b_rptr))
781 max_size = Adapter->max_frame_size - ETHERFCSL;
782
783 if ((rx_data->rx_mblk_len > max_size) ||
784 (rx_data->rx_mblk_len < min_size)) {
785 E1000G_STAT(rx_ring->stat_size_error);
786 goto rx_drop;
787 }
788
789 /*
790 * Found packet with EOP
791 * Process the last fragment.
792 */
793 if (cksumflags != 0) {
794 mac_hcksum_set(rx_data->rx_mblk,
795 0, 0, 0, 0, cksumflags);
796 cksumflags = 0;
797 }
798
799 /*
800 * Count packets that span multi-descriptors
801 */
802 E1000G_DEBUG_STAT_COND(rx_ring->stat_multi_desc,
803 (rx_data->rx_mblk->b_cont != NULL));
804
805 /*
806 * Append to list to send upstream
807 */
808 if (ret_mp == NULL) {
809 ret_mp = ret_nmp = rx_data->rx_mblk;
810 } else {
811 ret_nmp->b_next = rx_data->rx_mblk;
812 ret_nmp = rx_data->rx_mblk;
813 }
814 ret_nmp->b_next = NULL;
815 *tail = ret_nmp;
816 chain_sz += length;
817
818 rx_data->rx_mblk = NULL;
819 rx_data->rx_mblk_tail = NULL;
820 rx_data->rx_mblk_len = 0;
821
822 pkt_count++;
823
824 rx_next_desc:
825 /*
826 * Zero out the receive descriptors status
827 */
828 current_desc->status = 0;
829
830 if (current_desc == rx_data->rbd_last)
831 rx_data->rbd_next = rx_data->rbd_first;
832 else
833 rx_data->rbd_next++;
834
835 last_desc = current_desc;
836 current_desc = rx_data->rbd_next;
837
838 /*
839 * Put the buffer that we just indicated back
840 * at the end of our list
841 */
842 QUEUE_PUSH_TAIL(&rx_data->recv_list,
843 &packet->Link);
844 } /* while loop */
845
846 /* Sync the Rx descriptor DMA buffers */
847 (void) ddi_dma_sync(rx_data->rbd_dma_handle,
848 0, 0, DDI_DMA_SYNC_FORDEV);
849
850 /*
851 * Advance the E1000's Receive Queue #0 "Tail Pointer".
852 */
853 E1000_WRITE_REG(hw, E1000_RDT(0),
854 (uint32_t)(last_desc - rx_data->rbd_first));
855
856 if (e1000g_check_acc_handle(Adapter->osdep.reg_handle) != DDI_FM_OK) {
857 ddi_fm_service_impact(Adapter->dip, DDI_SERVICE_DEGRADED);
858 Adapter->e1000g_state |= E1000G_ERROR;
859 }
860
861 Adapter->rx_pkt_cnt = pkt_count;
862
863 return (ret_mp);
864
865 rx_drop:
866 /*
867 * Zero out the receive descriptors status
868 */
869 current_desc->status = 0;
870
871 /* Sync the Rx descriptor DMA buffers */
872 (void) ddi_dma_sync(rx_data->rbd_dma_handle,
873 0, 0, DDI_DMA_SYNC_FORDEV);
874
875 if (current_desc == rx_data->rbd_last)
876 rx_data->rbd_next = rx_data->rbd_first;
877 else
878 rx_data->rbd_next++;
879
880 last_desc = current_desc;
881
882 QUEUE_PUSH_TAIL(&rx_data->recv_list, &packet->Link);
883 /*
884 * Reclaim all old buffers already allocated during
885 * Jumbo receives.....for incomplete reception
886 */
887 if (rx_data->rx_mblk != NULL) {
888 freemsg(rx_data->rx_mblk);
889 rx_data->rx_mblk = NULL;
890 rx_data->rx_mblk_tail = NULL;
891 rx_data->rx_mblk_len = 0;
892 }
893 /*
894 * Advance the E1000's Receive Queue #0 "Tail Pointer".
895 */
896 E1000_WRITE_REG(hw, E1000_RDT(0),
897 (uint32_t)(last_desc - rx_data->rbd_first));
898
899 if (e1000g_check_acc_handle(Adapter->osdep.reg_handle) != DDI_FM_OK) {
900 ddi_fm_service_impact(Adapter->dip, DDI_SERVICE_DEGRADED);
901 Adapter->e1000g_state |= E1000G_ERROR;
902 }
903
904 return (ret_mp);
905 }
906
907 /*
908 * This is part of a workaround for the I219, see e1000g_flush_desc_rings() for
909 * more information.
910 *
911 * Flush all descriptors in the rx ring and disable it.
912 */
913 void
e1000g_flush_rx_ring(struct e1000g * Adapter)914 e1000g_flush_rx_ring(struct e1000g *Adapter)
915 {
916 struct e1000_hw *hw = &Adapter->shared;
917 uint32_t rctl, rxdctl;
918
919 rctl = E1000_READ_REG(hw, E1000_RCTL);
920 E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
921 E1000_WRITE_FLUSH(hw);
922 usec_delay(150);
923
924 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
925 /* Zero the lower 14 bits (prefetch and host thresholds). */
926 rxdctl &= 0xffffc000;
927 /*
928 * Update thresholds: prefetch threshold to 31, host threshold to 1
929 * and make sure the granularity is "descriptors" and not "cache lines"
930 */
931 rxdctl |= (0x1F | (1 << 8) | E1000_RXDCTL_THRESH_UNIT_DESC);
932 E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl);
933
934 /* Momentarily enable the RX ring for the changes to take effect */
935 E1000_WRITE_REG(hw, E1000_RCTL, rctl | E1000_RCTL_EN);
936 E1000_WRITE_FLUSH(hw);
937 usec_delay(150);
938 E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
939
940 }
941