1 /*
2 * CDDL HEADER START
3 *
4 * Copyright(c) 2007-2009 Intel Corporation. All rights reserved.
5 * The contents of this file are subject to the terms of the
6 * Common Development and Distribution License (the "License").
7 * You may not use this file except in compliance with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22
23 /*
24 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
25 * Use is subject to license terms.
26 */
27
28 #include "igb_sw.h"
29
30 /* function prototypes */
31 static mblk_t *igb_rx_bind(igb_rx_data_t *, uint32_t, uint32_t);
32 static mblk_t *igb_rx_copy(igb_rx_data_t *, uint32_t, uint32_t);
33 static void igb_rx_assoc_hcksum(mblk_t *, uint32_t);
34
35 #ifndef IGB_DEBUG
36 #pragma inline(igb_rx_assoc_hcksum)
37 #endif
38
39
40 /*
41 * igb_rx_recycle - the call-back function to reclaim rx buffer
42 *
43 * This function is called when an mp is freed by the user thru
44 * freeb call (Only for mp constructed through desballoc call).
45 * It returns back the freed buffer to the free list.
46 */
47 void
igb_rx_recycle(caddr_t arg)48 igb_rx_recycle(caddr_t arg)
49 {
50 igb_t *igb;
51 igb_rx_ring_t *rx_ring;
52 igb_rx_data_t *rx_data;
53 rx_control_block_t *recycle_rcb;
54 uint32_t free_index;
55 uint32_t ref_cnt;
56
57 recycle_rcb = (rx_control_block_t *)(uintptr_t)arg;
58 rx_data = recycle_rcb->rx_data;
59 rx_ring = rx_data->rx_ring;
60 igb = rx_ring->igb;
61
62 if (recycle_rcb->ref_cnt == 0) {
63 /*
64 * This case only happens when rx buffers are being freed
65 * in igb_stop() and freemsg() is called.
66 */
67 return;
68 }
69
70 ASSERT(recycle_rcb->mp == NULL);
71
72 /*
73 * Using the recycled data buffer to generate a new mblk
74 */
75 recycle_rcb->mp = desballoc((unsigned char *)
76 recycle_rcb->rx_buf.address,
77 recycle_rcb->rx_buf.size,
78 0, &recycle_rcb->free_rtn);
79
80 /*
81 * Put the recycled rx control block into free list
82 */
83 mutex_enter(&rx_data->recycle_lock);
84
85 free_index = rx_data->rcb_tail;
86 ASSERT(rx_data->free_list[free_index] == NULL);
87
88 rx_data->free_list[free_index] = recycle_rcb;
89 rx_data->rcb_tail = NEXT_INDEX(free_index, 1, rx_data->free_list_size);
90
91 mutex_exit(&rx_data->recycle_lock);
92
93 /*
94 * The atomic operation on the number of the available rx control
95 * blocks in the free list is used to make the recycling mutual
96 * exclusive with the receiving.
97 */
98 atomic_inc_32(&rx_data->rcb_free);
99 ASSERT(rx_data->rcb_free <= rx_data->free_list_size);
100
101 /*
102 * Considering the case that the interface is unplumbed
103 * and there are still some buffers held by the upper layer.
104 * When the buffer is returned back, we need to free it.
105 */
106 ref_cnt = atomic_dec_32_nv(&recycle_rcb->ref_cnt);
107 if (ref_cnt == 0) {
108 if (recycle_rcb->mp != NULL) {
109 freemsg(recycle_rcb->mp);
110 recycle_rcb->mp = NULL;
111 }
112
113 igb_free_dma_buffer(&recycle_rcb->rx_buf);
114
115 mutex_enter(&igb->rx_pending_lock);
116 atomic_dec_32(&rx_data->rcb_pending);
117 atomic_dec_32(&igb->rcb_pending);
118
119 /*
120 * When there is not any buffer belonging to this rx_data
121 * held by the upper layer, the rx_data can be freed.
122 */
123 if ((rx_data->flag & IGB_RX_STOPPED) &&
124 (rx_data->rcb_pending == 0))
125 igb_free_rx_ring_data(rx_data);
126
127 mutex_exit(&igb->rx_pending_lock);
128 }
129 }
130
131 /*
132 * igb_rx_copy - Use copy to process the received packet
133 *
134 * This function will use bcopy to process the packet
135 * and send the copied packet upstream
136 */
137 static mblk_t *
igb_rx_copy(igb_rx_data_t * rx_data,uint32_t index,uint32_t pkt_len)138 igb_rx_copy(igb_rx_data_t *rx_data, uint32_t index, uint32_t pkt_len)
139 {
140 rx_control_block_t *current_rcb;
141 mblk_t *mp;
142 igb_t *igb = rx_data->rx_ring->igb;
143
144 current_rcb = rx_data->work_list[index];
145
146 DMA_SYNC(¤t_rcb->rx_buf, DDI_DMA_SYNC_FORKERNEL);
147
148 if (igb_check_dma_handle(
149 current_rcb->rx_buf.dma_handle) != DDI_FM_OK) {
150 ddi_fm_service_impact(igb->dip, DDI_SERVICE_DEGRADED);
151 atomic_or_32(&igb->igb_state, IGB_ERROR);
152 return (NULL);
153 }
154
155 /*
156 * Allocate buffer to receive this packet
157 */
158 mp = allocb(pkt_len + IPHDR_ALIGN_ROOM, 0);
159 if (mp == NULL) {
160 igb_log(igb, IGB_LOG_INFO,
161 "igb_rx_copy: allocate buffer failed");
162 return (NULL);
163 }
164
165 /*
166 * Copy the data received into the new cluster
167 */
168 mp->b_rptr += IPHDR_ALIGN_ROOM;
169 bcopy(current_rcb->rx_buf.address, mp->b_rptr, pkt_len);
170 mp->b_wptr = mp->b_rptr + pkt_len;
171
172 return (mp);
173 }
174
175 /*
176 * igb_rx_bind - Use existing DMA buffer to build mblk for receiving
177 *
178 * This function will use pre-bound DMA buffer to receive the packet
179 * and build mblk that will be sent upstream.
180 */
181 static mblk_t *
igb_rx_bind(igb_rx_data_t * rx_data,uint32_t index,uint32_t pkt_len)182 igb_rx_bind(igb_rx_data_t *rx_data, uint32_t index, uint32_t pkt_len)
183 {
184 rx_control_block_t *current_rcb;
185 rx_control_block_t *free_rcb;
186 uint32_t free_index;
187 mblk_t *mp;
188 igb_t *igb = rx_data->rx_ring->igb;
189
190 /*
191 * If the free list is empty, we cannot proceed to send
192 * the current DMA buffer upstream. We'll have to return
193 * and use bcopy to process the packet.
194 */
195 if (igb_atomic_reserve(&rx_data->rcb_free, 1) < 0)
196 return (NULL);
197
198 current_rcb = rx_data->work_list[index];
199 /*
200 * If the mp of the rx control block is NULL, try to do
201 * desballoc again.
202 */
203 if (current_rcb->mp == NULL) {
204 current_rcb->mp = desballoc((unsigned char *)
205 current_rcb->rx_buf.address,
206 current_rcb->rx_buf.size,
207 0, ¤t_rcb->free_rtn);
208 /*
209 * If it is failed to built a mblk using the current
210 * DMA buffer, we have to return and use bcopy to
211 * process the packet.
212 */
213 if (current_rcb->mp == NULL) {
214 atomic_inc_32(&rx_data->rcb_free);
215 return (NULL);
216 }
217 }
218 /*
219 * Sync up the data received
220 */
221 DMA_SYNC(¤t_rcb->rx_buf, DDI_DMA_SYNC_FORKERNEL);
222
223 if (igb_check_dma_handle(
224 current_rcb->rx_buf.dma_handle) != DDI_FM_OK) {
225 ddi_fm_service_impact(igb->dip, DDI_SERVICE_DEGRADED);
226 atomic_or_32(&igb->igb_state, IGB_ERROR);
227 atomic_inc_32(&rx_data->rcb_free);
228 return (NULL);
229 }
230
231 mp = current_rcb->mp;
232 current_rcb->mp = NULL;
233 atomic_inc_32(¤t_rcb->ref_cnt);
234
235 mp->b_wptr = mp->b_rptr + pkt_len;
236 mp->b_next = mp->b_cont = NULL;
237
238 /*
239 * Strip off one free rx control block from the free list
240 */
241 free_index = rx_data->rcb_head;
242 free_rcb = rx_data->free_list[free_index];
243 ASSERT(free_rcb != NULL);
244 rx_data->free_list[free_index] = NULL;
245 rx_data->rcb_head = NEXT_INDEX(free_index, 1, rx_data->free_list_size);
246
247 /*
248 * Put the rx control block to the work list
249 */
250 rx_data->work_list[index] = free_rcb;
251
252 return (mp);
253 }
254
255 /*
256 * igb_rx_assoc_hcksum
257 *
258 * Check the rx hardware checksum status and associate the hcksum flags
259 */
260 static void
igb_rx_assoc_hcksum(mblk_t * mp,uint32_t status_error)261 igb_rx_assoc_hcksum(mblk_t *mp, uint32_t status_error)
262 {
263 uint32_t hcksum_flags = 0;
264
265 /* Ignore Checksum Indication */
266 if (status_error & E1000_RXD_STAT_IXSM)
267 return;
268
269 /*
270 * Check TCP/UDP checksum
271 */
272 if (((status_error & E1000_RXD_STAT_TCPCS) ||
273 (status_error & E1000_RXD_STAT_UDPCS)) &&
274 !(status_error & E1000_RXDEXT_STATERR_TCPE))
275 hcksum_flags |= HCK_FULLCKSUM_OK;
276
277 /*
278 * Check IP Checksum
279 */
280 if ((status_error & E1000_RXD_STAT_IPCS) &&
281 !(status_error & E1000_RXDEXT_STATERR_IPE))
282 hcksum_flags |= HCK_IPV4_HDRCKSUM_OK;
283
284 if (hcksum_flags != 0) {
285 mac_hcksum_set(mp, 0, 0, 0, 0, hcksum_flags);
286 }
287 }
288
289 mblk_t *
igb_rx_ring_poll(void * arg,int bytes)290 igb_rx_ring_poll(void *arg, int bytes)
291 {
292 igb_rx_ring_t *rx_ring = (igb_rx_ring_t *)arg;
293 mblk_t *mp = NULL;
294
295 ASSERT(bytes >= 0);
296
297 if ((bytes == 0) || (rx_ring->igb->igb_state & IGB_SUSPENDED) ||
298 !(rx_ring->igb->igb_state & IGB_STARTED))
299 return (NULL);
300
301 mutex_enter(&rx_ring->rx_lock);
302 mp = igb_rx(rx_ring, bytes);
303 mutex_exit(&rx_ring->rx_lock);
304
305 return (mp);
306 }
307
308 /*
309 * igb_rx - Receive the data of one ring
310 *
311 * This function goes throught h/w descriptor in one specified rx ring,
312 * receives the data if the descriptor status shows the data is ready.
313 * It returns a chain of mblks containing the received data, to be
314 * passed up to mac_rx().
315 */
316 mblk_t *
igb_rx(igb_rx_ring_t * rx_ring,int poll_bytes)317 igb_rx(igb_rx_ring_t *rx_ring, int poll_bytes)
318 {
319 union e1000_adv_rx_desc *current_rbd;
320 rx_control_block_t *current_rcb;
321 mblk_t *mp;
322 mblk_t *mblk_head;
323 mblk_t **mblk_tail;
324 uint32_t rx_next;
325 uint32_t rx_tail;
326 uint32_t pkt_len;
327 uint32_t status_error;
328 uint32_t pkt_num;
329 uint32_t total_bytes;
330 igb_t *igb = rx_ring->igb;
331 igb_rx_data_t *rx_data = rx_ring->rx_data;
332
333 mblk_head = NULL;
334 mblk_tail = &mblk_head;
335
336 if (igb->igb_state & IGB_ERROR)
337 return (NULL);
338
339 /*
340 * Sync the receive descriptors before
341 * accepting the packets
342 */
343 DMA_SYNC(&rx_data->rbd_area, DDI_DMA_SYNC_FORKERNEL);
344
345 if (igb_check_dma_handle(
346 rx_data->rbd_area.dma_handle) != DDI_FM_OK) {
347 ddi_fm_service_impact(igb->dip, DDI_SERVICE_DEGRADED);
348 atomic_or_32(&igb->igb_state, IGB_ERROR);
349 return (NULL);
350 }
351
352 /*
353 * Get the start point of rx bd ring which should be examined
354 * during this cycle.
355 */
356 rx_next = rx_data->rbd_next;
357
358 current_rbd = &rx_data->rbd_ring[rx_next];
359 pkt_num = 0;
360 total_bytes = 0;
361 status_error = current_rbd->wb.upper.status_error;
362 while (status_error & E1000_RXD_STAT_DD) {
363 /*
364 * If hardware has found the errors, but the error
365 * is hardware checksum error, here does not discard the
366 * packet, and let upper layer compute the checksum;
367 * Otherwise discard the packet.
368 */
369 if ((status_error & E1000_RXDEXT_ERR_FRAME_ERR_MASK) ||
370 !(status_error & E1000_RXD_STAT_EOP)) {
371 IGB_DEBUG_STAT(rx_ring->stat_frame_error);
372 goto rx_discard;
373 }
374
375 IGB_DEBUG_STAT_COND(rx_ring->stat_cksum_error,
376 (status_error & E1000_RXDEXT_STATERR_TCPE) ||
377 (status_error & E1000_RXDEXT_STATERR_IPE));
378
379 pkt_len = current_rbd->wb.upper.length;
380
381 if ((poll_bytes != IGB_NO_POLL) &&
382 ((pkt_len + total_bytes) > poll_bytes))
383 break;
384
385 IGB_DEBUG_STAT(rx_ring->stat_pkt_cnt);
386 total_bytes += pkt_len;
387
388 mp = NULL;
389 /*
390 * For packets with length more than the copy threshold,
391 * we'll firstly try to use the existed DMA buffer to built
392 * a mblk and send the mblk upstream.
393 *
394 * If the first method fails, or the packet length is less
395 * than the copy threshold, we'll allocate a new mblk and
396 * copy the packet data to the mblk.
397 */
398 if (pkt_len > igb->rx_copy_thresh)
399 mp = igb_rx_bind(rx_data, rx_next, pkt_len);
400
401 if (mp == NULL)
402 mp = igb_rx_copy(rx_data, rx_next, pkt_len);
403
404 if (mp != NULL) {
405 /*
406 * Check h/w checksum offload status
407 */
408 if (igb->rx_hcksum_enable)
409 igb_rx_assoc_hcksum(mp, status_error);
410
411 *mblk_tail = mp;
412 mblk_tail = &mp->b_next;
413 }
414
415 /* Update per-ring rx statistics */
416 rx_ring->rx_pkts++;
417 rx_ring->rx_bytes += pkt_len;
418
419 rx_discard:
420 /*
421 * Reset rx descriptor read bits
422 */
423 current_rcb = rx_data->work_list[rx_next];
424 current_rbd->read.pkt_addr = current_rcb->rx_buf.dma_address;
425 current_rbd->read.hdr_addr = 0;
426
427 rx_next = NEXT_INDEX(rx_next, 1, rx_data->ring_size);
428
429 /*
430 * The receive function is in interrupt context, so here
431 * rx_limit_per_intr is used to avoid doing receiving too long
432 * per interrupt.
433 */
434 if (++pkt_num > igb->rx_limit_per_intr) {
435 IGB_DEBUG_STAT(rx_ring->stat_exceed_pkt);
436 break;
437 }
438
439 current_rbd = &rx_data->rbd_ring[rx_next];
440 status_error = current_rbd->wb.upper.status_error;
441 }
442
443 DMA_SYNC(&rx_data->rbd_area, DDI_DMA_SYNC_FORDEV);
444
445 rx_data->rbd_next = rx_next;
446
447 /*
448 * Update the h/w tail accordingly
449 */
450 rx_tail = PREV_INDEX(rx_next, 1, rx_data->ring_size);
451
452 E1000_WRITE_REG(&igb->hw, E1000_RDT(rx_ring->index), rx_tail);
453
454 if (igb_check_acc_handle(igb->osdep.reg_handle) != DDI_FM_OK) {
455 ddi_fm_service_impact(igb->dip, DDI_SERVICE_DEGRADED);
456 atomic_or_32(&igb->igb_state, IGB_ERROR);
457 }
458
459 return (mblk_head);
460 }
461