xref: /illumos-gate/usr/src/uts/common/io/igb/igb_rx.c (revision 9b4e3ac25d882519cad3fc11f0c53b07f4e60536)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright(c) 2007-2008 Intel Corporation. All rights reserved.
24  */
25 
26 /*
27  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
28  * Use is subject to license terms.
29  */
30 
31 #include "igb_sw.h"
32 
33 /* function prototypes */
34 static mblk_t *igb_rx_bind(igb_rx_ring_t *, uint32_t, uint32_t);
35 static mblk_t *igb_rx_copy(igb_rx_ring_t *, uint32_t, uint32_t);
36 static void igb_rx_assoc_hcksum(mblk_t *, uint32_t);
37 
38 #ifndef IGB_DEBUG
39 #pragma inline(igb_rx_assoc_hcksum)
40 #endif
41 
42 
43 /*
44  * igb_rx_recycle - the call-back function to reclaim rx buffer
45  *
46  * This function is called when an mp is freed by the user thru
47  * freeb call (Only for mp constructed through desballoc call).
48  * It returns back the freed buffer to the free list.
49  */
50 void
51 igb_rx_recycle(caddr_t arg)
52 {
53 	igb_rx_ring_t *rx_ring;
54 	rx_control_block_t *recycle_rcb;
55 	uint32_t free_index;
56 
57 	recycle_rcb = (rx_control_block_t *)(uintptr_t)arg;
58 	rx_ring = recycle_rcb->rx_ring;
59 
60 	if (recycle_rcb->state == RCB_FREE)
61 		return;
62 
63 	recycle_rcb->state = RCB_FREE;
64 
65 	ASSERT(recycle_rcb->mp == NULL);
66 
67 	/*
68 	 * Using the recycled data buffer to generate a new mblk
69 	 */
70 	recycle_rcb->mp = desballoc((unsigned char *)
71 	    (recycle_rcb->rx_buf.address - IPHDR_ALIGN_ROOM),
72 	    (recycle_rcb->rx_buf.size + IPHDR_ALIGN_ROOM),
73 	    0, &recycle_rcb->free_rtn);
74 	if (recycle_rcb->mp != NULL) {
75 		recycle_rcb->mp->b_rptr += IPHDR_ALIGN_ROOM;
76 		recycle_rcb->mp->b_wptr += IPHDR_ALIGN_ROOM;
77 	}
78 
79 	/*
80 	 * Put the recycled rx control block into free list
81 	 */
82 	mutex_enter(&rx_ring->recycle_lock);
83 
84 	free_index = rx_ring->rcb_tail;
85 	ASSERT(rx_ring->free_list[free_index] == NULL);
86 
87 	rx_ring->free_list[free_index] = recycle_rcb;
88 	rx_ring->rcb_tail = NEXT_INDEX(free_index, 1, rx_ring->free_list_size);
89 
90 	mutex_exit(&rx_ring->recycle_lock);
91 
92 	/*
93 	 * The atomic operation on the number of the available rx control
94 	 * blocks in the free list is used to make the recycling mutual
95 	 * exclusive with the receiving.
96 	 */
97 	atomic_inc_32(&rx_ring->rcb_free);
98 	ASSERT(rx_ring->rcb_free <= rx_ring->free_list_size);
99 }
100 
101 /*
102  * igb_rx_copy - Use copy to process the received packet
103  *
104  * This function will use bcopy to process the packet
105  * and send the copied packet upstream
106  */
107 static mblk_t *
108 igb_rx_copy(igb_rx_ring_t *rx_ring, uint32_t index, uint32_t pkt_len)
109 {
110 	rx_control_block_t *current_rcb;
111 	mblk_t *mp;
112 	igb_t *igb = rx_ring->igb;
113 
114 	current_rcb = rx_ring->work_list[index];
115 
116 	DMA_SYNC(&current_rcb->rx_buf, DDI_DMA_SYNC_FORKERNEL);
117 
118 	if (igb_check_dma_handle(
119 	    current_rcb->rx_buf.dma_handle) != DDI_FM_OK) {
120 		ddi_fm_service_impact(igb->dip, DDI_SERVICE_DEGRADED);
121 	}
122 
123 	/*
124 	 * Allocate buffer to receive this packet
125 	 */
126 	mp = allocb(pkt_len + IPHDR_ALIGN_ROOM, 0);
127 	if (mp == NULL) {
128 		igb_log(rx_ring->igb, "igb_rx_copy: allocate buffer failed");
129 		return (NULL);
130 	}
131 
132 	/*
133 	 * Copy the data received into the new cluster
134 	 */
135 	mp->b_rptr += IPHDR_ALIGN_ROOM;
136 	bcopy(current_rcb->rx_buf.address, mp->b_rptr, pkt_len);
137 	mp->b_wptr = mp->b_rptr + pkt_len;
138 
139 	return (mp);
140 }
141 
142 /*
143  * igb_rx_bind - Use existing DMA buffer to build mblk for receiving
144  *
145  * This function will use pre-bound DMA buffer to receive the packet
146  * and build mblk that will be sent upstream.
147  */
148 static mblk_t *
149 igb_rx_bind(igb_rx_ring_t *rx_ring, uint32_t index, uint32_t pkt_len)
150 {
151 	rx_control_block_t *current_rcb;
152 	rx_control_block_t *free_rcb;
153 	uint32_t free_index;
154 	mblk_t *mp;
155 	igb_t *igb = rx_ring->igb;
156 
157 	/*
158 	 * If the free list is empty, we cannot proceed to send
159 	 * the current DMA buffer upstream. We'll have to return
160 	 * and use bcopy to process the packet.
161 	 */
162 	if (igb_atomic_reserve(&rx_ring->rcb_free, 1) < 0)
163 		return (NULL);
164 
165 	current_rcb = rx_ring->work_list[index];
166 	/*
167 	 * If the mp of the rx control block is NULL, try to do
168 	 * desballoc again.
169 	 */
170 	if (current_rcb->mp == NULL) {
171 		current_rcb->mp = desballoc((unsigned char *)
172 		    (current_rcb->rx_buf.address - IPHDR_ALIGN_ROOM),
173 		    (current_rcb->rx_buf.size + IPHDR_ALIGN_ROOM),
174 		    0, &current_rcb->free_rtn);
175 		/*
176 		 * If it is failed to built a mblk using the current
177 		 * DMA buffer, we have to return and use bcopy to
178 		 * process the packet.
179 		 */
180 		if (current_rcb->mp == NULL) {
181 			atomic_inc_32(&rx_ring->rcb_free);
182 			return (NULL);
183 		}
184 	}
185 	/*
186 	 * Sync up the data received
187 	 */
188 	DMA_SYNC(&current_rcb->rx_buf, DDI_DMA_SYNC_FORKERNEL);
189 
190 	if (igb_check_dma_handle(
191 	    current_rcb->rx_buf.dma_handle) != DDI_FM_OK) {
192 		ddi_fm_service_impact(igb->dip, DDI_SERVICE_DEGRADED);
193 	}
194 
195 	mp = current_rcb->mp;
196 	current_rcb->mp = NULL;
197 	current_rcb->state = RCB_SENDUP;
198 
199 	mp->b_wptr = mp->b_rptr + pkt_len;
200 	mp->b_next = mp->b_cont = NULL;
201 
202 	/*
203 	 * Strip off one free rx control block from the free list
204 	 */
205 	free_index = rx_ring->rcb_head;
206 	free_rcb = rx_ring->free_list[free_index];
207 	ASSERT(free_rcb != NULL);
208 	rx_ring->free_list[free_index] = NULL;
209 	rx_ring->rcb_head = NEXT_INDEX(free_index, 1, rx_ring->free_list_size);
210 
211 	/*
212 	 * Put the rx control block to the work list
213 	 */
214 	rx_ring->work_list[index] = free_rcb;
215 
216 	return (mp);
217 }
218 
219 /*
220  * igb_rx_assoc_hcksum
221  *
222  * Check the rx hardware checksum status and associate the hcksum flags
223  */
224 static void
225 igb_rx_assoc_hcksum(mblk_t *mp, uint32_t status_error)
226 {
227 	uint32_t hcksum_flags = 0;
228 
229 	/* Ignore Checksum Indication */
230 	if (status_error & E1000_RXD_STAT_IXSM)
231 		return;
232 
233 	/*
234 	 * Check TCP/UDP checksum
235 	 */
236 	if (((status_error & E1000_RXD_STAT_TCPCS) ||
237 	    (status_error & E1000_RXD_STAT_UDPCS)) &&
238 	    !(status_error & E1000_RXDEXT_STATERR_TCPE))
239 		hcksum_flags |= HCK_FULLCKSUM | HCK_FULLCKSUM_OK;
240 
241 	/*
242 	 * Check IP Checksum
243 	 */
244 	if ((status_error & E1000_RXD_STAT_IPCS) &&
245 	    !(status_error & E1000_RXDEXT_STATERR_IPE))
246 		hcksum_flags |= HCK_IPV4_HDRCKSUM;
247 
248 	if (hcksum_flags != 0) {
249 		(void) hcksum_assoc(mp,
250 		    NULL, NULL, 0, 0, 0, 0, hcksum_flags, 0);
251 	}
252 }
253 
254 mblk_t *
255 igb_rx_ring_poll(void *arg, int bytes)
256 {
257 	igb_rx_ring_t *rx_ring = (igb_rx_ring_t *)arg;
258 	mblk_t *mp = NULL;
259 
260 	ASSERT(bytes >= 0);
261 
262 	if (bytes == 0)
263 		return (mp);
264 
265 	mutex_enter(&rx_ring->rx_lock);
266 	mp = igb_rx(rx_ring, bytes);
267 	mutex_exit(&rx_ring->rx_lock);
268 
269 	return (mp);
270 }
271 
272 /*
273  * igb_rx - Receive the data of one ring
274  *
275  * This function goes throught h/w descriptor in one specified rx ring,
276  * receives the data if the descriptor status shows the data is ready.
277  * It returns a chain of mblks containing the received data, to be
278  * passed up to mac_rx().
279  */
280 mblk_t *
281 igb_rx(igb_rx_ring_t *rx_ring, int poll_bytes)
282 {
283 	union e1000_adv_rx_desc *current_rbd;
284 	rx_control_block_t *current_rcb;
285 	mblk_t *mp;
286 	mblk_t *mblk_head;
287 	mblk_t **mblk_tail;
288 	uint32_t rx_next;
289 	uint32_t rx_tail;
290 	uint32_t pkt_len;
291 	uint32_t status_error;
292 	uint32_t pkt_num;
293 	uint32_t total_bytes;
294 	igb_t *igb = rx_ring->igb;
295 
296 	mblk_head = NULL;
297 	mblk_tail = &mblk_head;
298 
299 	/*
300 	 * Sync the receive descriptors before
301 	 * accepting the packets
302 	 */
303 	DMA_SYNC(&rx_ring->rbd_area, DDI_DMA_SYNC_FORKERNEL);
304 
305 	if (igb_check_dma_handle(
306 	    rx_ring->rbd_area.dma_handle) != DDI_FM_OK) {
307 		ddi_fm_service_impact(igb->dip, DDI_SERVICE_DEGRADED);
308 	}
309 
310 	/*
311 	 * Get the start point of rx bd ring which should be examined
312 	 * during this cycle.
313 	 */
314 	rx_next = rx_ring->rbd_next;
315 
316 	current_rbd = &rx_ring->rbd_ring[rx_next];
317 	pkt_num = 0;
318 	total_bytes = 0;
319 	status_error = current_rbd->wb.upper.status_error;
320 	while (status_error & E1000_RXD_STAT_DD) {
321 		/*
322 		 * If hardware has found the errors, but the error
323 		 * is hardware checksum error, here does not discard the
324 		 * packet, and let upper layer compute the checksum;
325 		 * Otherwise discard the packet.
326 		 */
327 		if ((status_error & E1000_RXDEXT_ERR_FRAME_ERR_MASK) ||
328 		    !(status_error & E1000_RXD_STAT_EOP)) {
329 			IGB_DEBUG_STAT(rx_ring->stat_frame_error);
330 			goto rx_discard;
331 		}
332 
333 		IGB_DEBUG_STAT_COND(rx_ring->stat_cksum_error,
334 		    (status_error & E1000_RXDEXT_STATERR_TCPE) ||
335 		    (status_error & E1000_RXDEXT_STATERR_IPE));
336 
337 		pkt_len = current_rbd->wb.upper.length;
338 
339 		if ((poll_bytes != IGB_NO_POLL) &&
340 		    ((pkt_len + total_bytes) > poll_bytes))
341 			break;
342 
343 		IGB_DEBUG_STAT(rx_ring->stat_pkt_cnt);
344 		total_bytes += pkt_len;
345 
346 		mp = NULL;
347 		/*
348 		 * For packets with length more than the copy threshold,
349 		 * we'll firstly try to use the existed DMA buffer to built
350 		 * a mblk and send the mblk upstream.
351 		 *
352 		 * If the first method fails, or the packet length is less
353 		 * than the copy threshold, we'll allocate a new mblk and
354 		 * copy the packet data to the mblk.
355 		 */
356 		if (pkt_len > rx_ring->copy_thresh)
357 			mp = igb_rx_bind(rx_ring, rx_next, pkt_len);
358 
359 		if (mp == NULL)
360 			mp = igb_rx_copy(rx_ring, rx_next, pkt_len);
361 
362 		if (mp != NULL) {
363 			/*
364 			 * Check h/w checksum offload status
365 			 */
366 			if (igb->rx_hcksum_enable)
367 				igb_rx_assoc_hcksum(mp, status_error);
368 
369 			*mblk_tail = mp;
370 			mblk_tail = &mp->b_next;
371 		}
372 
373 rx_discard:
374 		/*
375 		 * Reset rx descriptor read bits
376 		 */
377 		current_rcb = rx_ring->work_list[rx_next];
378 		current_rbd->read.pkt_addr = current_rcb->rx_buf.dma_address;
379 		current_rbd->read.hdr_addr = 0;
380 
381 		rx_next = NEXT_INDEX(rx_next, 1, rx_ring->ring_size);
382 
383 		/*
384 		 * The receive function is in interrupt context, so here
385 		 * limit_per_intr is used to avoid doing receiving too long
386 		 * per interrupt.
387 		 */
388 		if (++pkt_num > rx_ring->limit_per_intr) {
389 			IGB_DEBUG_STAT(rx_ring->stat_exceed_pkt);
390 			break;
391 		}
392 
393 		current_rbd = &rx_ring->rbd_ring[rx_next];
394 		status_error = current_rbd->wb.upper.status_error;
395 	}
396 
397 	DMA_SYNC(&rx_ring->rbd_area, DDI_DMA_SYNC_FORDEV);
398 
399 	rx_ring->rbd_next = rx_next;
400 
401 	/*
402 	 * Update the h/w tail accordingly
403 	 */
404 	rx_tail = PREV_INDEX(rx_next, 1, rx_ring->ring_size);
405 
406 	E1000_WRITE_REG(&igb->hw, E1000_RDT(rx_ring->index), rx_tail);
407 
408 	if (igb_check_acc_handle(igb->osdep.reg_handle) != DDI_FM_OK) {
409 		ddi_fm_service_impact(igb->dip, DDI_SERVICE_DEGRADED);
410 	}
411 
412 	return (mblk_head);
413 }
414