xref: /titanic_51/usr/src/uts/common/io/hxge/hxge_rxdma.c (revision 96400bb6b8e1e66106f3b1de3cf1dbd8268e581c)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <hxge_impl.h>
27 #include <hxge_rxdma.h>
28 #include <hpi.h>
29 #include <hpi_vir.h>
30 
31 /*
32  * Number of blocks to accumulate before re-enabling DMA
33  * when we get RBR empty.
34  */
35 #define	HXGE_RBR_EMPTY_THRESHOLD	64
36 
37 /*
38  * Globals: tunable parameters (/etc/system or adb)
39  *
40  */
41 extern uint32_t hxge_rbr_size;
42 extern uint32_t hxge_rcr_size;
43 extern uint32_t hxge_rbr_spare_size;
44 extern uint32_t hxge_mblks_pending;
45 
46 /*
47  * Tunables to manage the receive buffer blocks.
48  *
49  * hxge_rx_threshold_hi: copy all buffers.
50  * hxge_rx_bcopy_size_type: receive buffer block size type.
51  * hxge_rx_threshold_lo: copy only up to tunable block size type.
52  */
53 extern hxge_rxbuf_threshold_t hxge_rx_threshold_hi;
54 extern hxge_rxbuf_type_t hxge_rx_buf_size_type;
55 extern hxge_rxbuf_threshold_t hxge_rx_threshold_lo;
56 
57 /*
58  * Static local functions.
59  */
60 static hxge_status_t hxge_map_rxdma(p_hxge_t hxgep);
61 static void hxge_unmap_rxdma(p_hxge_t hxgep);
62 static hxge_status_t hxge_rxdma_hw_start_common(p_hxge_t hxgep);
63 static hxge_status_t hxge_rxdma_hw_start(p_hxge_t hxgep);
64 static void hxge_rxdma_hw_stop(p_hxge_t hxgep);
65 static hxge_status_t hxge_map_rxdma_channel(p_hxge_t hxgep, uint16_t channel,
66     p_hxge_dma_common_t *dma_buf_p, p_rx_rbr_ring_t *rbr_p,
67     uint32_t num_chunks, p_hxge_dma_common_t *dma_rbr_cntl_p,
68     p_hxge_dma_common_t *dma_rcr_cntl_p, p_hxge_dma_common_t *dma_mbox_cntl_p,
69     p_rx_rcr_ring_t *rcr_p, p_rx_mbox_t *rx_mbox_p);
70 static void hxge_unmap_rxdma_channel(p_hxge_t hxgep, uint16_t channel,
71 	p_rx_rbr_ring_t rbr_p, p_rx_rcr_ring_t rcr_p, p_rx_mbox_t rx_mbox_p);
72 static hxge_status_t hxge_map_rxdma_channel_cfg_ring(p_hxge_t hxgep,
73     uint16_t dma_channel, p_hxge_dma_common_t *dma_rbr_cntl_p,
74     p_hxge_dma_common_t *dma_rcr_cntl_p, p_hxge_dma_common_t *dma_mbox_cntl_p,
75     p_rx_rbr_ring_t *rbr_p, p_rx_rcr_ring_t *rcr_p, p_rx_mbox_t *rx_mbox_p);
76 static void hxge_unmap_rxdma_channel_cfg_ring(p_hxge_t hxgep,
77 	p_rx_rcr_ring_t rcr_p, p_rx_mbox_t rx_mbox_p);
78 static hxge_status_t hxge_map_rxdma_channel_buf_ring(p_hxge_t hxgep,
79 	uint16_t channel, p_hxge_dma_common_t *dma_buf_p,
80 	p_rx_rbr_ring_t *rbr_p, uint32_t num_chunks);
81 static void hxge_unmap_rxdma_channel_buf_ring(p_hxge_t hxgep,
82 	p_rx_rbr_ring_t rbr_p);
83 static hxge_status_t hxge_rxdma_start_channel(p_hxge_t hxgep, uint16_t channel,
84 	p_rx_rbr_ring_t rbr_p, p_rx_rcr_ring_t rcr_p, p_rx_mbox_t mbox_p,
85 	int n_init_kick);
86 static hxge_status_t hxge_rxdma_stop_channel(p_hxge_t hxgep, uint16_t channel);
87 static mblk_t *hxge_rx_pkts(p_hxge_t hxgep, uint_t vindex, p_hxge_ldv_t ldvp,
88 	p_rx_rcr_ring_t	rcr_p, rdc_stat_t cs, int bytes_to_read);
89 static uint32_t hxge_scan_for_last_eop(p_rx_rcr_ring_t rcr_p,
90     p_rcr_entry_t rcr_desc_rd_head_p, uint32_t num_rcrs);
91 static void hxge_receive_packet(p_hxge_t hxgep, p_rx_rcr_ring_t rcr_p,
92 	p_rcr_entry_t rcr_desc_rd_head_p, boolean_t *multi_p,
93 	mblk_t ** mp, mblk_t ** mp_cont, uint32_t *invalid_rcr_entry);
94 static hxge_status_t hxge_disable_rxdma_channel(p_hxge_t hxgep,
95 	uint16_t channel);
96 static p_rx_msg_t hxge_allocb(size_t, uint32_t, p_hxge_dma_common_t);
97 static void hxge_freeb(p_rx_msg_t);
98 static hxge_status_t hxge_rx_err_evnts(p_hxge_t hxgep, uint_t index,
99 	p_hxge_ldv_t ldvp, rdc_stat_t cs);
100 static hxge_status_t hxge_rxbuf_index_info_init(p_hxge_t hxgep,
101 	p_rx_rbr_ring_t rx_dmap);
102 static hxge_status_t hxge_rxdma_fatal_err_recover(p_hxge_t hxgep,
103 	uint16_t channel);
104 static hxge_status_t hxge_rx_port_fatal_err_recover(p_hxge_t hxgep);
105 static void hxge_rbr_empty_restore(p_hxge_t hxgep,
106 	p_rx_rbr_ring_t rx_rbr_p);
107 
108 hxge_status_t
109 hxge_init_rxdma_channels(p_hxge_t hxgep)
110 {
111 	hxge_status_t		status = HXGE_OK;
112 	block_reset_t		reset_reg;
113 	int			i;
114 
115 	HXGE_DEBUG_MSG((hxgep, MEM2_CTL, "==> hxge_init_rxdma_channels"));
116 
117 	for (i = 0; i < HXGE_MAX_RDCS; i++)
118 		hxgep->rdc_first_intr[i] = B_TRUE;
119 
120 	/* Reset RDC block from PEU to clear any previous state */
121 	reset_reg.value = 0;
122 	reset_reg.bits.rdc_rst = 1;
123 	HXGE_REG_WR32(hxgep->hpi_handle, BLOCK_RESET, reset_reg.value);
124 	HXGE_DELAY(1000);
125 
126 	status = hxge_map_rxdma(hxgep);
127 	if (status != HXGE_OK) {
128 		HXGE_ERROR_MSG((hxgep, HXGE_ERR_CTL,
129 		    "<== hxge_init_rxdma: status 0x%x", status));
130 		return (status);
131 	}
132 
133 	status = hxge_rxdma_hw_start_common(hxgep);
134 	if (status != HXGE_OK) {
135 		hxge_unmap_rxdma(hxgep);
136 	}
137 
138 	status = hxge_rxdma_hw_start(hxgep);
139 	if (status != HXGE_OK) {
140 		hxge_unmap_rxdma(hxgep);
141 	}
142 
143 	HXGE_DEBUG_MSG((hxgep, MEM2_CTL,
144 	    "<== hxge_init_rxdma_channels: status 0x%x", status));
145 	return (status);
146 }
147 
148 void
149 hxge_uninit_rxdma_channels(p_hxge_t hxgep)
150 {
151 	HXGE_DEBUG_MSG((hxgep, MEM2_CTL, "==> hxge_uninit_rxdma_channels"));
152 
153 	hxge_rxdma_hw_stop(hxgep);
154 	hxge_unmap_rxdma(hxgep);
155 
156 	HXGE_DEBUG_MSG((hxgep, MEM2_CTL, "<== hxge_uinit_rxdma_channels"));
157 }
158 
159 hxge_status_t
160 hxge_init_rxdma_channel_cntl_stat(p_hxge_t hxgep, uint16_t channel,
161     rdc_stat_t *cs_p)
162 {
163 	hpi_handle_t	handle;
164 	hpi_status_t	rs = HPI_SUCCESS;
165 	hxge_status_t	status = HXGE_OK;
166 
167 	HXGE_DEBUG_MSG((hxgep, DMA_CTL,
168 	    "<== hxge_init_rxdma_channel_cntl_stat"));
169 
170 	handle = HXGE_DEV_HPI_HANDLE(hxgep);
171 	rs = hpi_rxdma_control_status(handle, OP_SET, channel, cs_p);
172 
173 	if (rs != HPI_SUCCESS) {
174 		status = HXGE_ERROR | rs;
175 	}
176 	return (status);
177 }
178 
179 
180 hxge_status_t
181 hxge_enable_rxdma_channel(p_hxge_t hxgep, uint16_t channel,
182     p_rx_rbr_ring_t rbr_p, p_rx_rcr_ring_t rcr_p, p_rx_mbox_t mbox_p,
183     int n_init_kick)
184 {
185 	hpi_handle_t		handle;
186 	rdc_desc_cfg_t 		rdc_desc;
187 	rdc_rcr_cfg_b_t		*cfgb_p;
188 	hpi_status_t		rs = HPI_SUCCESS;
189 
190 	HXGE_DEBUG_MSG((hxgep, DMA_CTL, "==> hxge_enable_rxdma_channel"));
191 	handle = HXGE_DEV_HPI_HANDLE(hxgep);
192 
193 	/*
194 	 * Use configuration data composed at init time. Write to hardware the
195 	 * receive ring configurations.
196 	 */
197 	rdc_desc.mbox_enable = 1;
198 	rdc_desc.mbox_addr = mbox_p->mbox_addr;
199 	HXGE_DEBUG_MSG((hxgep, RX_CTL,
200 	    "==> hxge_enable_rxdma_channel: mboxp $%p($%p)",
201 	    mbox_p->mbox_addr, rdc_desc.mbox_addr));
202 
203 	rdc_desc.rbr_len = rbr_p->rbb_max;
204 	rdc_desc.rbr_addr = rbr_p->rbr_addr;
205 
206 	switch (hxgep->rx_bksize_code) {
207 	case RBR_BKSIZE_4K:
208 		rdc_desc.page_size = SIZE_4KB;
209 		break;
210 	case RBR_BKSIZE_8K:
211 		rdc_desc.page_size = SIZE_8KB;
212 		break;
213 	}
214 
215 	rdc_desc.size0 = rbr_p->hpi_pkt_buf_size0;
216 	rdc_desc.valid0 = 1;
217 
218 	rdc_desc.size1 = rbr_p->hpi_pkt_buf_size1;
219 	rdc_desc.valid1 = 1;
220 
221 	rdc_desc.size2 = rbr_p->hpi_pkt_buf_size2;
222 	rdc_desc.valid2 = 1;
223 
224 	rdc_desc.full_hdr = rcr_p->full_hdr_flag;
225 	rdc_desc.offset = rcr_p->sw_priv_hdr_len;
226 
227 	rdc_desc.rcr_len = rcr_p->comp_size;
228 	rdc_desc.rcr_addr = rcr_p->rcr_addr;
229 
230 	cfgb_p = &(rcr_p->rcr_cfgb);
231 	rdc_desc.rcr_threshold = cfgb_p->bits.pthres;
232 	rdc_desc.rcr_timeout = cfgb_p->bits.timeout;
233 	rdc_desc.rcr_timeout_enable = cfgb_p->bits.entout;
234 
235 	HXGE_DEBUG_MSG((hxgep, DMA_CTL, "==> hxge_enable_rxdma_channel: "
236 	    "rbr_len qlen %d pagesize code %d rcr_len %d",
237 	    rdc_desc.rbr_len, rdc_desc.page_size, rdc_desc.rcr_len));
238 	HXGE_DEBUG_MSG((hxgep, DMA_CTL, "==> hxge_enable_rxdma_channel: "
239 	    "size 0 %d size 1 %d size 2 %d",
240 	    rbr_p->hpi_pkt_buf_size0, rbr_p->hpi_pkt_buf_size1,
241 	    rbr_p->hpi_pkt_buf_size2));
242 
243 	rs = hpi_rxdma_cfg_rdc_ring(handle, rbr_p->rdc, &rdc_desc);
244 	if (rs != HPI_SUCCESS) {
245 		return (HXGE_ERROR | rs);
246 	}
247 
248 	/*
249 	 * Enable the timeout and threshold.
250 	 */
251 	rs = hpi_rxdma_cfg_rdc_rcr_threshold(handle, channel,
252 	    rdc_desc.rcr_threshold);
253 	if (rs != HPI_SUCCESS) {
254 		return (HXGE_ERROR | rs);
255 	}
256 
257 	rs = hpi_rxdma_cfg_rdc_rcr_timeout(handle, channel,
258 	    rdc_desc.rcr_timeout);
259 	if (rs != HPI_SUCCESS) {
260 		return (HXGE_ERROR | rs);
261 	}
262 
263 	/* Kick the DMA engine */
264 	hpi_rxdma_rdc_rbr_kick(handle, channel, n_init_kick);
265 
266 	/* Clear the rbr empty bit */
267 	(void) hpi_rxdma_channel_rbr_empty_clear(handle, channel);
268 
269 	/*
270 	 * Enable the DMA
271 	 */
272 	rs = hpi_rxdma_cfg_rdc_enable(handle, channel);
273 	if (rs != HPI_SUCCESS) {
274 		return (HXGE_ERROR | rs);
275 	}
276 
277 	HXGE_DEBUG_MSG((hxgep, DMA_CTL, "<== hxge_enable_rxdma_channel"));
278 
279 	return (HXGE_OK);
280 }
281 
282 static hxge_status_t
283 hxge_disable_rxdma_channel(p_hxge_t hxgep, uint16_t channel)
284 {
285 	hpi_handle_t handle;
286 	hpi_status_t rs = HPI_SUCCESS;
287 
288 	HXGE_DEBUG_MSG((hxgep, DMA_CTL, "==> hxge_disable_rxdma_channel"));
289 
290 	handle = HXGE_DEV_HPI_HANDLE(hxgep);
291 
292 	/* disable the DMA */
293 	rs = hpi_rxdma_cfg_rdc_disable(handle, channel);
294 	if (rs != HPI_SUCCESS) {
295 		HXGE_DEBUG_MSG((hxgep, RX_CTL,
296 		    "<== hxge_disable_rxdma_channel:failed (0x%x)", rs));
297 		return (HXGE_ERROR | rs);
298 	}
299 	HXGE_DEBUG_MSG((hxgep, DMA_CTL, "<== hxge_disable_rxdma_channel"));
300 	return (HXGE_OK);
301 }
302 
303 hxge_status_t
304 hxge_rxdma_channel_rcrflush(p_hxge_t hxgep, uint8_t channel)
305 {
306 	hpi_handle_t	handle;
307 	hxge_status_t	status = HXGE_OK;
308 
309 	HXGE_DEBUG_MSG((hxgep, DMA_CTL,
310 	    "==> hxge_rxdma_channel_rcrflush"));
311 
312 	handle = HXGE_DEV_HPI_HANDLE(hxgep);
313 	hpi_rxdma_rdc_rcr_flush(handle, channel);
314 
315 	HXGE_DEBUG_MSG((hxgep, DMA_CTL,
316 	    "<== hxge_rxdma_channel_rcrflush"));
317 	return (status);
318 
319 }
320 
321 #define	MID_INDEX(l, r) ((r + l + 1) >> 1)
322 
323 #define	TO_LEFT -1
324 #define	TO_RIGHT 1
325 #define	BOTH_RIGHT (TO_RIGHT + TO_RIGHT)
326 #define	BOTH_LEFT (TO_LEFT + TO_LEFT)
327 #define	IN_MIDDLE (TO_RIGHT + TO_LEFT)
328 #define	NO_HINT 0xffffffff
329 
330 /*ARGSUSED*/
331 hxge_status_t
332 hxge_rxbuf_pp_to_vp(p_hxge_t hxgep, p_rx_rbr_ring_t rbr_p,
333     uint8_t pktbufsz_type, uint64_t *pkt_buf_addr_pp,
334     uint64_t **pkt_buf_addr_p, uint32_t *bufoffset, uint32_t *msg_index)
335 {
336 	int			bufsize;
337 	uint64_t		pktbuf_pp;
338 	uint64_t		dvma_addr;
339 	rxring_info_t		*ring_info;
340 	int			base_side, end_side;
341 	int			r_index, l_index, anchor_index;
342 	int			found, search_done;
343 	uint32_t		offset, chunk_size, block_size, page_size_mask;
344 	uint32_t		chunk_index, block_index, total_index;
345 	int			max_iterations, iteration;
346 	rxbuf_index_info_t	*bufinfo;
347 
348 	HXGE_DEBUG_MSG((hxgep, RX2_CTL, "==> hxge_rxbuf_pp_to_vp"));
349 
350 	HXGE_DEBUG_MSG((hxgep, RX2_CTL,
351 	    "==> hxge_rxbuf_pp_to_vp: buf_pp $%p btype %d",
352 	    pkt_buf_addr_pp, pktbufsz_type));
353 
354 #if defined(__i386)
355 	pktbuf_pp = (uint64_t)(uint32_t)pkt_buf_addr_pp;
356 #else
357 	pktbuf_pp = (uint64_t)pkt_buf_addr_pp;
358 #endif
359 
360 	switch (pktbufsz_type) {
361 	case 0:
362 		bufsize = rbr_p->pkt_buf_size0;
363 		break;
364 	case 1:
365 		bufsize = rbr_p->pkt_buf_size1;
366 		break;
367 	case 2:
368 		bufsize = rbr_p->pkt_buf_size2;
369 		break;
370 	case RCR_SINGLE_BLOCK:
371 		bufsize = 0;
372 		anchor_index = 0;
373 		break;
374 	default:
375 		return (HXGE_ERROR);
376 	}
377 
378 	if (rbr_p->num_blocks == 1) {
379 		anchor_index = 0;
380 		ring_info = rbr_p->ring_info;
381 		bufinfo = (rxbuf_index_info_t *)ring_info->buffer;
382 
383 		HXGE_DEBUG_MSG((hxgep, RX2_CTL,
384 		    "==> hxge_rxbuf_pp_to_vp: (found, 1 block) "
385 		    "buf_pp $%p btype %d anchor_index %d bufinfo $%p",
386 		    pkt_buf_addr_pp, pktbufsz_type, anchor_index, bufinfo));
387 
388 		goto found_index;
389 	}
390 
391 	HXGE_DEBUG_MSG((hxgep, RX2_CTL,
392 	    "==> hxge_rxbuf_pp_to_vp: buf_pp $%p btype %d anchor_index %d",
393 	    pkt_buf_addr_pp, pktbufsz_type, anchor_index));
394 
395 	ring_info = rbr_p->ring_info;
396 	found = B_FALSE;
397 	bufinfo = (rxbuf_index_info_t *)ring_info->buffer;
398 	iteration = 0;
399 	max_iterations = ring_info->max_iterations;
400 
401 	/*
402 	 * First check if this block have been seen recently. This is indicated
403 	 * by a hint which is initialized when the first buffer of the block is
404 	 * seen. The hint is reset when the last buffer of the block has been
405 	 * processed. As three block sizes are supported, three hints are kept.
406 	 * The idea behind the hints is that once the hardware  uses a block
407 	 * for a buffer  of that size, it will use it exclusively for that size
408 	 * and will use it until it is exhausted. It is assumed that there
409 	 * would a single block being used for the same buffer sizes at any
410 	 * given time.
411 	 */
412 	if (ring_info->hint[pktbufsz_type] != NO_HINT) {
413 		anchor_index = ring_info->hint[pktbufsz_type];
414 		dvma_addr = bufinfo[anchor_index].dvma_addr;
415 		chunk_size = bufinfo[anchor_index].buf_size;
416 		if ((pktbuf_pp >= dvma_addr) &&
417 		    (pktbuf_pp < (dvma_addr + chunk_size))) {
418 			found = B_TRUE;
419 			/*
420 			 * check if this is the last buffer in the block If so,
421 			 * then reset the hint for the size;
422 			 */
423 
424 			if ((pktbuf_pp + bufsize) >= (dvma_addr + chunk_size))
425 				ring_info->hint[pktbufsz_type] = NO_HINT;
426 		}
427 	}
428 
429 	if (found == B_FALSE) {
430 		HXGE_DEBUG_MSG((hxgep, RX2_CTL,
431 		    "==> hxge_rxbuf_pp_to_vp: (!found)"
432 		    "buf_pp $%p btype %d anchor_index %d",
433 		    pkt_buf_addr_pp, pktbufsz_type, anchor_index));
434 
435 		/*
436 		 * This is the first buffer of the block of this size. Need to
437 		 * search the whole information array. the search algorithm
438 		 * uses a binary tree search algorithm. It assumes that the
439 		 * information is already sorted with increasing order info[0]
440 		 * < info[1] < info[2]  .... < info[n-1] where n is the size of
441 		 * the information array
442 		 */
443 		r_index = rbr_p->num_blocks - 1;
444 		l_index = 0;
445 		search_done = B_FALSE;
446 		anchor_index = MID_INDEX(r_index, l_index);
447 		while (search_done == B_FALSE) {
448 			if ((r_index == l_index) ||
449 			    (iteration >= max_iterations))
450 				search_done = B_TRUE;
451 
452 			end_side = TO_RIGHT;	/* to the right */
453 			base_side = TO_LEFT;	/* to the left */
454 			/* read the DVMA address information and sort it */
455 			dvma_addr = bufinfo[anchor_index].dvma_addr;
456 			chunk_size = bufinfo[anchor_index].buf_size;
457 
458 			HXGE_DEBUG_MSG((hxgep, RX2_CTL,
459 			    "==> hxge_rxbuf_pp_to_vp: (searching)"
460 			    "buf_pp $%p btype %d "
461 			    "anchor_index %d chunk_size %d dvmaaddr $%p",
462 			    pkt_buf_addr_pp, pktbufsz_type, anchor_index,
463 			    chunk_size, dvma_addr));
464 
465 			if (pktbuf_pp >= dvma_addr)
466 				base_side = TO_RIGHT;	/* to the right */
467 			if (pktbuf_pp < (dvma_addr + chunk_size))
468 				end_side = TO_LEFT;	/* to the left */
469 
470 			switch (base_side + end_side) {
471 			case IN_MIDDLE:
472 				/* found */
473 				found = B_TRUE;
474 				search_done = B_TRUE;
475 				if ((pktbuf_pp + bufsize) <
476 				    (dvma_addr + chunk_size))
477 					ring_info->hint[pktbufsz_type] =
478 					    bufinfo[anchor_index].buf_index;
479 				break;
480 			case BOTH_RIGHT:
481 				/* not found: go to the right */
482 				l_index = anchor_index + 1;
483 				anchor_index = MID_INDEX(r_index, l_index);
484 				break;
485 
486 			case BOTH_LEFT:
487 				/* not found: go to the left */
488 				r_index = anchor_index - 1;
489 				anchor_index = MID_INDEX(r_index, l_index);
490 				break;
491 			default:	/* should not come here */
492 				return (HXGE_ERROR);
493 			}
494 			iteration++;
495 		}
496 
497 		HXGE_DEBUG_MSG((hxgep, RX2_CTL,
498 		    "==> hxge_rxbuf_pp_to_vp: (search done)"
499 		    "buf_pp $%p btype %d anchor_index %d",
500 		    pkt_buf_addr_pp, pktbufsz_type, anchor_index));
501 	}
502 
503 	if (found == B_FALSE) {
504 		HXGE_DEBUG_MSG((hxgep, RX2_CTL,
505 		    "==> hxge_rxbuf_pp_to_vp: (search failed)"
506 		    "buf_pp $%p btype %d anchor_index %d",
507 		    pkt_buf_addr_pp, pktbufsz_type, anchor_index));
508 		return (HXGE_ERROR);
509 	}
510 
511 found_index:
512 	HXGE_DEBUG_MSG((hxgep, RX2_CTL,
513 	    "==> hxge_rxbuf_pp_to_vp: (FOUND1)"
514 	    "buf_pp $%p btype %d bufsize %d anchor_index %d",
515 	    pkt_buf_addr_pp, pktbufsz_type, bufsize, anchor_index));
516 
517 	/* index of the first block in this chunk */
518 	chunk_index = bufinfo[anchor_index].start_index;
519 	dvma_addr = bufinfo[anchor_index].dvma_addr;
520 	page_size_mask = ring_info->block_size_mask;
521 
522 	HXGE_DEBUG_MSG((hxgep, RX2_CTL,
523 	    "==> hxge_rxbuf_pp_to_vp: (FOUND3), get chunk)"
524 	    "buf_pp $%p btype %d bufsize %d "
525 	    "anchor_index %d chunk_index %d dvma $%p",
526 	    pkt_buf_addr_pp, pktbufsz_type, bufsize,
527 	    anchor_index, chunk_index, dvma_addr));
528 
529 	offset = pktbuf_pp - dvma_addr;	/* offset within the chunk */
530 	block_size = rbr_p->block_size;	/* System  block(page) size */
531 
532 	HXGE_DEBUG_MSG((hxgep, RX2_CTL,
533 	    "==> hxge_rxbuf_pp_to_vp: (FOUND4), get chunk)"
534 	    "buf_pp $%p btype %d bufsize %d "
535 	    "anchor_index %d chunk_index %d dvma $%p "
536 	    "offset %d block_size %d",
537 	    pkt_buf_addr_pp, pktbufsz_type, bufsize, anchor_index,
538 	    chunk_index, dvma_addr, offset, block_size));
539 	HXGE_DEBUG_MSG((hxgep, RX2_CTL, "==> getting total index"));
540 
541 	block_index = (offset / block_size);	/* index within chunk */
542 	total_index = chunk_index + block_index;
543 
544 	HXGE_DEBUG_MSG((hxgep, RX2_CTL,
545 	    "==> hxge_rxbuf_pp_to_vp: "
546 	    "total_index %d dvma_addr $%p "
547 	    "offset %d block_size %d "
548 	    "block_index %d ",
549 	    total_index, dvma_addr, offset, block_size, block_index));
550 
551 #if defined(__i386)
552 	*pkt_buf_addr_p = (uint64_t *)((uint32_t)bufinfo[anchor_index].kaddr +
553 	    (uint32_t)offset);
554 #else
555 	*pkt_buf_addr_p = (uint64_t *)((uint64_t)bufinfo[anchor_index].kaddr +
556 	    offset);
557 #endif
558 
559 	HXGE_DEBUG_MSG((hxgep, RX2_CTL,
560 	    "==> hxge_rxbuf_pp_to_vp: "
561 	    "total_index %d dvma_addr $%p "
562 	    "offset %d block_size %d "
563 	    "block_index %d "
564 	    "*pkt_buf_addr_p $%p",
565 	    total_index, dvma_addr, offset, block_size,
566 	    block_index, *pkt_buf_addr_p));
567 
568 	*msg_index = total_index;
569 	*bufoffset = (offset & page_size_mask);
570 
571 	HXGE_DEBUG_MSG((hxgep, RX2_CTL,
572 	    "==> hxge_rxbuf_pp_to_vp: get msg index: "
573 	    "msg_index %d bufoffset_index %d",
574 	    *msg_index, *bufoffset));
575 	HXGE_DEBUG_MSG((hxgep, RX2_CTL, "<== hxge_rxbuf_pp_to_vp"));
576 
577 	return (HXGE_OK);
578 }
579 
580 
581 /*
582  * used by quick sort (qsort) function
583  * to perform comparison
584  */
585 static int
586 hxge_sort_compare(const void *p1, const void *p2)
587 {
588 
589 	rxbuf_index_info_t *a, *b;
590 
591 	a = (rxbuf_index_info_t *)p1;
592 	b = (rxbuf_index_info_t *)p2;
593 
594 	if (a->dvma_addr > b->dvma_addr)
595 		return (1);
596 	if (a->dvma_addr < b->dvma_addr)
597 		return (-1);
598 	return (0);
599 }
600 
601 /*
602  * Grabbed this sort implementation from common/syscall/avl.c
603  *
604  * Generic shellsort, from K&R (1st ed, p 58.), somewhat modified.
605  * v = Ptr to array/vector of objs
606  * n = # objs in the array
607  * s = size of each obj (must be multiples of a word size)
608  * f = ptr to function to compare two objs
609  *	returns (-1 = less than, 0 = equal, 1 = greater than
610  */
611 void
612 hxge_ksort(caddr_t v, int n, int s, int (*f) ())
613 {
614 	int		g, i, j, ii;
615 	unsigned int	*p1, *p2;
616 	unsigned int	tmp;
617 
618 	/* No work to do */
619 	if (v == NULL || n <= 1)
620 		return;
621 	/* Sanity check on arguments */
622 	ASSERT(((uintptr_t)v & 0x3) == 0 && (s & 0x3) == 0);
623 	ASSERT(s > 0);
624 
625 	for (g = n / 2; g > 0; g /= 2) {
626 		for (i = g; i < n; i++) {
627 			for (j = i - g; j >= 0 &&
628 			    (*f) (v + j * s, v + (j + g) * s) == 1; j -= g) {
629 				p1 = (unsigned *)(v + j * s);
630 				p2 = (unsigned *)(v + (j + g) * s);
631 				for (ii = 0; ii < s / 4; ii++) {
632 					tmp = *p1;
633 					*p1++ = *p2;
634 					*p2++ = tmp;
635 				}
636 			}
637 		}
638 	}
639 }
640 
641 /*
642  * Initialize data structures required for rxdma
643  * buffer dvma->vmem address lookup
644  */
645 /*ARGSUSED*/
646 static hxge_status_t
647 hxge_rxbuf_index_info_init(p_hxge_t hxgep, p_rx_rbr_ring_t rbrp)
648 {
649 	int		index;
650 	rxring_info_t	*ring_info;
651 	int		max_iteration = 0, max_index = 0;
652 
653 	HXGE_DEBUG_MSG((hxgep, DMA_CTL, "==> hxge_rxbuf_index_info_init"));
654 
655 	ring_info = rbrp->ring_info;
656 	ring_info->hint[0] = NO_HINT;
657 	ring_info->hint[1] = NO_HINT;
658 	ring_info->hint[2] = NO_HINT;
659 	max_index = rbrp->num_blocks;
660 
661 	/* read the DVMA address information and sort it */
662 	/* do init of the information array */
663 
664 	HXGE_DEBUG_MSG((hxgep, DMA2_CTL,
665 	    " hxge_rxbuf_index_info_init Sort ptrs"));
666 
667 	/* sort the array */
668 	hxge_ksort((void *) ring_info->buffer, max_index,
669 	    sizeof (rxbuf_index_info_t), hxge_sort_compare);
670 
671 	for (index = 0; index < max_index; index++) {
672 		HXGE_DEBUG_MSG((hxgep, DMA2_CTL,
673 		    " hxge_rxbuf_index_info_init: sorted chunk %d "
674 		    " ioaddr $%p kaddr $%p size %x",
675 		    index, ring_info->buffer[index].dvma_addr,
676 		    ring_info->buffer[index].kaddr,
677 		    ring_info->buffer[index].buf_size));
678 	}
679 
680 	max_iteration = 0;
681 	while (max_index >= (1ULL << max_iteration))
682 		max_iteration++;
683 	ring_info->max_iterations = max_iteration + 1;
684 
685 	HXGE_DEBUG_MSG((hxgep, DMA2_CTL,
686 	    " hxge_rxbuf_index_info_init Find max iter %d",
687 	    ring_info->max_iterations));
688 	HXGE_DEBUG_MSG((hxgep, DMA_CTL, "<== hxge_rxbuf_index_info_init"));
689 
690 	return (HXGE_OK);
691 }
692 
693 /*ARGSUSED*/
694 void
695 hxge_dump_rcr_entry(p_hxge_t hxgep, p_rcr_entry_t entry_p)
696 {
697 #ifdef	HXGE_DEBUG
698 
699 	uint32_t bptr;
700 	uint64_t pp;
701 
702 	bptr = entry_p->bits.pkt_buf_addr;
703 
704 	HXGE_DEBUG_MSG((hxgep, RX_CTL,
705 	    "\trcr entry $%p "
706 	    "\trcr entry 0x%0llx "
707 	    "\trcr entry 0x%08x "
708 	    "\trcr entry 0x%08x "
709 	    "\tvalue 0x%0llx\n"
710 	    "\tmulti = %d\n"
711 	    "\tpkt_type = 0x%x\n"
712 	    "\terror = 0x%04x\n"
713 	    "\tl2_len = %d\n"
714 	    "\tpktbufsize = %d\n"
715 	    "\tpkt_buf_addr = $%p\n"
716 	    "\tpkt_buf_addr (<< 6) = $%p\n",
717 	    entry_p,
718 	    *(int64_t *)entry_p,
719 	    *(int32_t *)entry_p,
720 	    *(int32_t *)((char *)entry_p + 32),
721 	    entry_p->value,
722 	    entry_p->bits.multi,
723 	    entry_p->bits.pkt_type,
724 	    entry_p->bits.error,
725 	    entry_p->bits.l2_len,
726 	    entry_p->bits.pktbufsz,
727 	    bptr,
728 	    entry_p->bits.pkt_buf_addr_l));
729 
730 	pp = (entry_p->value & RCR_PKT_BUF_ADDR_MASK) <<
731 	    RCR_PKT_BUF_ADDR_SHIFT;
732 
733 	HXGE_DEBUG_MSG((hxgep, RX_CTL, "rcr pp 0x%llx l2 len %d",
734 	    pp, (*(int64_t *)entry_p >> 40) & 0x3fff));
735 #endif
736 }
737 
738 /*ARGSUSED*/
739 void
740 hxge_rxdma_stop(p_hxge_t hxgep)
741 {
742 	HXGE_DEBUG_MSG((hxgep, RX_CTL, "==> hxge_rxdma_stop"));
743 
744 	MUTEX_ENTER(&hxgep->vmac_lock);
745 	(void) hxge_rx_vmac_disable(hxgep);
746 	(void) hxge_rxdma_hw_mode(hxgep, HXGE_DMA_STOP);
747 	MUTEX_EXIT(&hxgep->vmac_lock);
748 
749 	HXGE_DEBUG_MSG((hxgep, RX_CTL, "<== hxge_rxdma_stop"));
750 }
751 
752 void
753 hxge_rxdma_stop_reinit(p_hxge_t hxgep)
754 {
755 	HXGE_DEBUG_MSG((hxgep, RX_CTL, "==> hxge_rxdma_stop_reinit"));
756 
757 	(void) hxge_rxdma_stop(hxgep);
758 	(void) hxge_uninit_rxdma_channels(hxgep);
759 	(void) hxge_init_rxdma_channels(hxgep);
760 
761 	MUTEX_ENTER(&hxgep->vmac_lock);
762 	(void) hxge_rx_vmac_enable(hxgep);
763 	MUTEX_EXIT(&hxgep->vmac_lock);
764 
765 	HXGE_DEBUG_MSG((hxgep, RX_CTL, "<== hxge_rxdma_stop_reinit"));
766 }
767 
768 hxge_status_t
769 hxge_rxdma_hw_mode(p_hxge_t hxgep, boolean_t enable)
770 {
771 	int			i, ndmas;
772 	uint16_t		channel;
773 	p_rx_rbr_rings_t	rx_rbr_rings;
774 	p_rx_rbr_ring_t		*rbr_rings;
775 	hpi_handle_t		handle;
776 	hpi_status_t		rs = HPI_SUCCESS;
777 	hxge_status_t		status = HXGE_OK;
778 
779 	HXGE_DEBUG_MSG((hxgep, MEM2_CTL,
780 	    "==> hxge_rxdma_hw_mode: mode %d", enable));
781 
782 	if (!(hxgep->drv_state & STATE_HW_INITIALIZED)) {
783 		HXGE_DEBUG_MSG((hxgep, RX_CTL,
784 		    "<== hxge_rxdma_mode: not initialized"));
785 		return (HXGE_ERROR);
786 	}
787 
788 	rx_rbr_rings = hxgep->rx_rbr_rings;
789 	if (rx_rbr_rings == NULL) {
790 		HXGE_DEBUG_MSG((hxgep, RX_CTL,
791 		    "<== hxge_rxdma_mode: NULL ring pointer"));
792 		return (HXGE_ERROR);
793 	}
794 
795 	if (rx_rbr_rings->rbr_rings == NULL) {
796 		HXGE_DEBUG_MSG((hxgep, RX_CTL,
797 		    "<== hxge_rxdma_mode: NULL rbr rings pointer"));
798 		return (HXGE_ERROR);
799 	}
800 
801 	ndmas = rx_rbr_rings->ndmas;
802 	if (!ndmas) {
803 		HXGE_DEBUG_MSG((hxgep, RX_CTL,
804 		    "<== hxge_rxdma_mode: no channel"));
805 		return (HXGE_ERROR);
806 	}
807 
808 	HXGE_DEBUG_MSG((hxgep, MEM2_CTL,
809 	    "==> hxge_rxdma_mode (ndmas %d)", ndmas));
810 
811 	rbr_rings = rx_rbr_rings->rbr_rings;
812 
813 	handle = HXGE_DEV_HPI_HANDLE(hxgep);
814 
815 	for (i = 0; i < ndmas; i++) {
816 		if (rbr_rings == NULL || rbr_rings[i] == NULL) {
817 			continue;
818 		}
819 		channel = rbr_rings[i]->rdc;
820 		if (enable) {
821 			HXGE_DEBUG_MSG((hxgep, MEM2_CTL,
822 			    "==> hxge_rxdma_hw_mode: channel %d (enable)",
823 			    channel));
824 			rs = hpi_rxdma_cfg_rdc_enable(handle, channel);
825 		} else {
826 			HXGE_DEBUG_MSG((hxgep, MEM2_CTL,
827 			    "==> hxge_rxdma_hw_mode: channel %d (disable)",
828 			    channel));
829 			rs = hpi_rxdma_cfg_rdc_disable(handle, channel);
830 		}
831 	}
832 
833 	status = ((rs == HPI_SUCCESS) ? HXGE_OK : HXGE_ERROR | rs);
834 	HXGE_DEBUG_MSG((hxgep, MEM2_CTL,
835 	    "<== hxge_rxdma_hw_mode: status 0x%x", status));
836 
837 	return (status);
838 }
839 
840 int
841 hxge_rxdma_get_ring_index(p_hxge_t hxgep, uint16_t channel)
842 {
843 	int			i, ndmas;
844 	uint16_t		rdc;
845 	p_rx_rbr_rings_t 	rx_rbr_rings;
846 	p_rx_rbr_ring_t		*rbr_rings;
847 
848 	HXGE_DEBUG_MSG((hxgep, RX_CTL,
849 	    "==> hxge_rxdma_get_ring_index: channel %d", channel));
850 
851 	rx_rbr_rings = hxgep->rx_rbr_rings;
852 	if (rx_rbr_rings == NULL) {
853 		HXGE_DEBUG_MSG((hxgep, RX_CTL,
854 		    "<== hxge_rxdma_get_ring_index: NULL ring pointer"));
855 		return (-1);
856 	}
857 
858 	ndmas = rx_rbr_rings->ndmas;
859 	if (!ndmas) {
860 		HXGE_DEBUG_MSG((hxgep, RX_CTL,
861 		    "<== hxge_rxdma_get_ring_index: no channel"));
862 		return (-1);
863 	}
864 
865 	HXGE_DEBUG_MSG((hxgep, RX_CTL,
866 	    "==> hxge_rxdma_get_ring_index (ndmas %d)", ndmas));
867 
868 	rbr_rings = rx_rbr_rings->rbr_rings;
869 	for (i = 0; i < ndmas; i++) {
870 		rdc = rbr_rings[i]->rdc;
871 		if (channel == rdc) {
872 			HXGE_DEBUG_MSG((hxgep, RX_CTL,
873 			    "==> hxge_rxdma_get_rbr_ring: "
874 			    "channel %d (index %d) "
875 			    "ring %d", channel, i, rbr_rings[i]));
876 
877 			return (i);
878 		}
879 	}
880 
881 	HXGE_DEBUG_MSG((hxgep, RX_CTL,
882 	    "<== hxge_rxdma_get_rbr_ring_index: not found"));
883 
884 	return (-1);
885 }
886 
887 /*
888  * Static functions start here.
889  */
890 static p_rx_msg_t
891 hxge_allocb(size_t size, uint32_t pri, p_hxge_dma_common_t dmabuf_p)
892 {
893 	p_rx_msg_t		hxge_mp = NULL;
894 	p_hxge_dma_common_t	dmamsg_p;
895 	uchar_t			*buffer;
896 
897 	hxge_mp = KMEM_ZALLOC(sizeof (rx_msg_t), KM_NOSLEEP);
898 	if (hxge_mp == NULL) {
899 		HXGE_ERROR_MSG((NULL, HXGE_ERR_CTL,
900 		    "Allocation of a rx msg failed."));
901 		goto hxge_allocb_exit;
902 	}
903 
904 	hxge_mp->use_buf_pool = B_FALSE;
905 	if (dmabuf_p) {
906 		hxge_mp->use_buf_pool = B_TRUE;
907 
908 		dmamsg_p = (p_hxge_dma_common_t)&hxge_mp->buf_dma;
909 		*dmamsg_p = *dmabuf_p;
910 		dmamsg_p->nblocks = 1;
911 		dmamsg_p->block_size = size;
912 		dmamsg_p->alength = size;
913 		buffer = (uchar_t *)dmabuf_p->kaddrp;
914 
915 		dmabuf_p->kaddrp = (void *)((char *)dmabuf_p->kaddrp + size);
916 		dmabuf_p->ioaddr_pp = (void *)
917 		    ((char *)dmabuf_p->ioaddr_pp + size);
918 
919 		dmabuf_p->alength -= size;
920 		dmabuf_p->offset += size;
921 		dmabuf_p->dma_cookie.dmac_laddress += size;
922 		dmabuf_p->dma_cookie.dmac_size -= size;
923 	} else {
924 		buffer = KMEM_ALLOC(size, KM_NOSLEEP);
925 		if (buffer == NULL) {
926 			HXGE_ERROR_MSG((NULL, HXGE_ERR_CTL,
927 			    "Allocation of a receive page failed."));
928 			goto hxge_allocb_fail1;
929 		}
930 	}
931 
932 	hxge_mp->rx_mblk_p = desballoc(buffer, size, pri, &hxge_mp->freeb);
933 	if (hxge_mp->rx_mblk_p == NULL) {
934 		HXGE_ERROR_MSG((NULL, HXGE_ERR_CTL, "desballoc failed."));
935 		goto hxge_allocb_fail2;
936 	}
937 	hxge_mp->buffer = buffer;
938 	hxge_mp->block_size = size;
939 	hxge_mp->freeb.free_func = (void (*) ()) hxge_freeb;
940 	hxge_mp->freeb.free_arg = (caddr_t)hxge_mp;
941 	hxge_mp->ref_cnt = 1;
942 	hxge_mp->free = B_TRUE;
943 	hxge_mp->rx_use_bcopy = B_FALSE;
944 
945 	atomic_inc_32(&hxge_mblks_pending);
946 
947 	goto hxge_allocb_exit;
948 
949 hxge_allocb_fail2:
950 	if (!hxge_mp->use_buf_pool) {
951 		KMEM_FREE(buffer, size);
952 	}
953 hxge_allocb_fail1:
954 	KMEM_FREE(hxge_mp, sizeof (rx_msg_t));
955 	hxge_mp = NULL;
956 
957 hxge_allocb_exit:
958 	return (hxge_mp);
959 }
960 
961 p_mblk_t
962 hxge_dupb(p_rx_msg_t hxge_mp, uint_t offset, size_t size)
963 {
964 	p_mblk_t mp;
965 
966 	HXGE_DEBUG_MSG((NULL, MEM_CTL, "==> hxge_dupb"));
967 	HXGE_DEBUG_MSG((NULL, MEM_CTL, "hxge_mp = $%p "
968 	    "offset = 0x%08X " "size = 0x%08X", hxge_mp, offset, size));
969 
970 	mp = desballoc(&hxge_mp->buffer[offset], size, 0, &hxge_mp->freeb);
971 	if (mp == NULL) {
972 		HXGE_DEBUG_MSG((NULL, RX_CTL, "desballoc failed"));
973 		goto hxge_dupb_exit;
974 	}
975 
976 	atomic_inc_32(&hxge_mp->ref_cnt);
977 
978 hxge_dupb_exit:
979 	HXGE_DEBUG_MSG((NULL, MEM_CTL, "<== hxge_dupb mp = $%p", hxge_mp));
980 	return (mp);
981 }
982 
983 p_mblk_t
984 hxge_dupb_bcopy(p_rx_msg_t hxge_mp, uint_t offset, size_t size)
985 {
986 	p_mblk_t	mp;
987 	uchar_t		*dp;
988 
989 	mp = allocb(size + HXGE_RXBUF_EXTRA, 0);
990 	if (mp == NULL) {
991 		HXGE_DEBUG_MSG((NULL, RX_CTL, "desballoc failed"));
992 		goto hxge_dupb_bcopy_exit;
993 	}
994 	dp = mp->b_rptr = mp->b_rptr + HXGE_RXBUF_EXTRA;
995 	bcopy((void *) &hxge_mp->buffer[offset], dp, size);
996 	mp->b_wptr = dp + size;
997 
998 hxge_dupb_bcopy_exit:
999 
1000 	HXGE_DEBUG_MSG((NULL, MEM_CTL, "<== hxge_dupb mp = $%p", hxge_mp));
1001 
1002 	return (mp);
1003 }
1004 
1005 void hxge_post_page(p_hxge_t hxgep, p_rx_rbr_ring_t rx_rbr_p,
1006     p_rx_msg_t rx_msg_p);
1007 
1008 void
1009 hxge_post_page(p_hxge_t hxgep, p_rx_rbr_ring_t rx_rbr_p, p_rx_msg_t rx_msg_p)
1010 {
1011 	HXGE_DEBUG_MSG((hxgep, RX_CTL, "==> hxge_post_page"));
1012 
1013 	/* Reuse this buffer */
1014 	rx_msg_p->free = B_FALSE;
1015 	rx_msg_p->cur_usage_cnt = 0;
1016 	rx_msg_p->max_usage_cnt = 0;
1017 	rx_msg_p->pkt_buf_size = 0;
1018 
1019 	if (rx_rbr_p->rbr_use_bcopy) {
1020 		rx_msg_p->rx_use_bcopy = B_FALSE;
1021 		atomic_dec_32(&rx_rbr_p->rbr_consumed);
1022 	}
1023 	atomic_dec_32(&rx_rbr_p->rbr_used);
1024 
1025 	/*
1026 	 * Get the rbr header pointer and its offset index.
1027 	 */
1028 	rx_rbr_p->rbr_wr_index = ((rx_rbr_p->rbr_wr_index + 1) &
1029 	    rx_rbr_p->rbr_wrap_mask);
1030 	rx_rbr_p->rbr_desc_vp[rx_rbr_p->rbr_wr_index] = rx_msg_p->shifted_addr;
1031 
1032 	/*
1033 	 * Accumulate some buffers in the ring before re-enabling the
1034 	 * DMA channel, if rbr empty was signaled.
1035 	 */
1036 	hpi_rxdma_rdc_rbr_kick(HXGE_DEV_HPI_HANDLE(hxgep), rx_rbr_p->rdc, 1);
1037 	if (rx_rbr_p->rbr_is_empty &&
1038 	    (rx_rbr_p->rbb_max - rx_rbr_p->rbr_used) >=
1039 	    HXGE_RBR_EMPTY_THRESHOLD) {
1040 		hxge_rbr_empty_restore(hxgep, rx_rbr_p);
1041 	}
1042 
1043 	HXGE_DEBUG_MSG((hxgep, RX_CTL,
1044 	    "<== hxge_post_page (channel %d post_next_index %d)",
1045 	    rx_rbr_p->rdc, rx_rbr_p->rbr_wr_index));
1046 	HXGE_DEBUG_MSG((hxgep, RX_CTL, "<== hxge_post_page"));
1047 }
1048 
1049 void
1050 hxge_freeb(p_rx_msg_t rx_msg_p)
1051 {
1052 	size_t		size;
1053 	uchar_t		*buffer = NULL;
1054 	int		ref_cnt;
1055 	boolean_t	free_state = B_FALSE;
1056 	rx_rbr_ring_t	*ring = rx_msg_p->rx_rbr_p;
1057 
1058 	HXGE_DEBUG_MSG((NULL, MEM2_CTL, "==> hxge_freeb"));
1059 	HXGE_DEBUG_MSG((NULL, MEM2_CTL,
1060 	    "hxge_freeb:rx_msg_p = $%p (block pending %d)",
1061 	    rx_msg_p, hxge_mblks_pending));
1062 
1063 	if (ring == NULL)
1064 		return;
1065 
1066 	/*
1067 	 * This is to prevent posting activities while we are recovering
1068 	 * from fatal errors. This should not be a performance drag since
1069 	 * ref_cnt != 0 most times.
1070 	 */
1071 	if (ring->rbr_state == RBR_POSTING)
1072 		MUTEX_ENTER(&ring->post_lock);
1073 
1074 	/*
1075 	 * First we need to get the free state, then
1076 	 * atomic decrement the reference count to prevent
1077 	 * the race condition with the interrupt thread that
1078 	 * is processing a loaned up buffer block.
1079 	 */
1080 	free_state = rx_msg_p->free;
1081 	ref_cnt = atomic_add_32_nv(&rx_msg_p->ref_cnt, -1);
1082 	if (!ref_cnt) {
1083 		atomic_dec_32(&hxge_mblks_pending);
1084 
1085 		buffer = rx_msg_p->buffer;
1086 		size = rx_msg_p->block_size;
1087 
1088 		HXGE_DEBUG_MSG((NULL, MEM2_CTL, "hxge_freeb: "
1089 		    "will free: rx_msg_p = $%p (block pending %d)",
1090 		    rx_msg_p, hxge_mblks_pending));
1091 
1092 		if (!rx_msg_p->use_buf_pool) {
1093 			KMEM_FREE(buffer, size);
1094 		}
1095 
1096 		KMEM_FREE(rx_msg_p, sizeof (rx_msg_t));
1097 		/*
1098 		 * Decrement the receive buffer ring's reference
1099 		 * count, too.
1100 		 */
1101 		atomic_dec_32(&ring->rbr_ref_cnt);
1102 
1103 		/*
1104 		 * Free the receive buffer ring, iff
1105 		 * 1. all the receive buffers have been freed
1106 		 * 2. and we are in the proper state (that is,
1107 		 *    we are not UNMAPPING).
1108 		 */
1109 		if (ring->rbr_ref_cnt == 0 &&
1110 		    ring->rbr_state == RBR_UNMAPPED) {
1111 			KMEM_FREE(ring, sizeof (*ring));
1112 			/* post_lock has been destroyed already */
1113 			return;
1114 		}
1115 	}
1116 
1117 	/*
1118 	 * Repost buffer.
1119 	 */
1120 	if (free_state && (ref_cnt == 1)) {
1121 		HXGE_DEBUG_MSG((NULL, RX_CTL,
1122 		    "hxge_freeb: post page $%p:", rx_msg_p));
1123 		if (ring->rbr_state == RBR_POSTING)
1124 			hxge_post_page(rx_msg_p->hxgep, ring, rx_msg_p);
1125 	}
1126 
1127 	if (ring->rbr_state == RBR_POSTING)
1128 		MUTEX_EXIT(&ring->post_lock);
1129 
1130 	HXGE_DEBUG_MSG((NULL, MEM2_CTL, "<== hxge_freeb"));
1131 }
1132 
1133 uint_t
1134 hxge_rx_intr(caddr_t arg1, caddr_t arg2)
1135 {
1136 	p_hxge_ring_handle_t	rhp;
1137 	p_hxge_ldv_t		ldvp = (p_hxge_ldv_t)arg1;
1138 	p_hxge_t		hxgep = (p_hxge_t)arg2;
1139 	p_hxge_ldg_t		ldgp;
1140 	uint8_t			channel;
1141 	hpi_handle_t		handle;
1142 	rdc_stat_t		cs;
1143 	p_rx_rcr_ring_t		ring;
1144 	mblk_t			*mp = NULL;
1145 
1146 	if (ldvp == NULL) {
1147 		HXGE_DEBUG_MSG((NULL, RX_INT_CTL,
1148 		    "<== hxge_rx_intr: arg2 $%p arg1 $%p", hxgep, ldvp));
1149 		return (DDI_INTR_UNCLAIMED);
1150 	}
1151 
1152 	if (arg2 == NULL || (void *) ldvp->hxgep != arg2) {
1153 		hxgep = ldvp->hxgep;
1154 	}
1155 
1156 	HXGE_DEBUG_MSG((hxgep, RX_INT_CTL,
1157 	    "==> hxge_rx_intr: arg2 $%p arg1 $%p", hxgep, ldvp));
1158 
1159 	/*
1160 	 * This interrupt handler is for a specific receive dma channel.
1161 	 */
1162 	handle = HXGE_DEV_HPI_HANDLE(hxgep);
1163 
1164 	/*
1165 	 * Get the control and status for this channel.
1166 	 */
1167 	channel = ldvp->vdma_index;
1168 	ring = hxgep->rx_rcr_rings->rcr_rings[channel];
1169 	rhp = &hxgep->rx_ring_handles[channel];
1170 	ldgp = ldvp->ldgp;
1171 
1172 	ASSERT(ring != NULL);
1173 	ASSERT(ring->ldgp == ldgp);
1174 	ASSERT(ring->ldvp == ldvp);
1175 
1176 	MUTEX_ENTER(&ring->lock);
1177 
1178 	if (!ring->poll_flag) {
1179 		RXDMA_REG_READ64(handle, RDC_STAT, channel, &cs.value);
1180 		cs.bits.ptrread = 0;
1181 		cs.bits.pktread = 0;
1182 		RXDMA_REG_WRITE64(handle, RDC_STAT, channel, cs.value);
1183 
1184 		/*
1185 		 * Process packets, if we are not in polling mode, the ring is
1186 		 * started and the interface is started. The MAC layer under
1187 		 * load will be operating in polling mode for RX traffic.
1188 		 */
1189 		if ((rhp->started) &&
1190 		    (hxgep->hxge_mac_state == HXGE_MAC_STARTED)) {
1191 			mp = hxge_rx_pkts(hxgep, ldvp->vdma_index,
1192 			    ldvp, ring, cs, -1);
1193 		}
1194 
1195 		/* Process error events. */
1196 		if (cs.value & RDC_STAT_ERROR) {
1197 			MUTEX_EXIT(&ring->lock);
1198 			(void) hxge_rx_err_evnts(hxgep, channel, ldvp, cs);
1199 			MUTEX_ENTER(&ring->lock);
1200 		}
1201 
1202 		/*
1203 		 * Enable the mailbox update interrupt if we want to use
1204 		 * mailbox. We probably don't need to use mailbox as it only
1205 		 * saves us one pio read.  Also write 1 to rcrthres and
1206 		 * rcrto to clear these two edge triggered bits.
1207 		 */
1208 		cs.value &= RDC_STAT_WR1C;
1209 		cs.bits.mex = 1;
1210 		cs.bits.ptrread = 0;
1211 		cs.bits.pktread = 0;
1212 		RXDMA_REG_WRITE64(handle, RDC_STAT, channel, cs.value);
1213 
1214 		if (ldgp->nldvs == 1) {
1215 			/*
1216 			 * Re-arm the group.
1217 			 */
1218 			(void) hpi_intr_ldg_mgmt_set(handle, ldgp->ldg, B_TRUE,
1219 			    ldgp->ldg_timer);
1220 		}
1221 	} else if ((ldgp->nldvs == 1) && (ring->poll_flag)) {
1222 		/*
1223 		 * Disarm the group, if we are not a shared interrupt.
1224 		 */
1225 		(void) hpi_intr_ldg_mgmt_set(handle, ldgp->ldg, B_FALSE, 0);
1226 	} else if (ring->poll_flag) {
1227 		/*
1228 		 * Mask-off this device from the group.
1229 		 */
1230 		(void) hpi_intr_mask_set(handle, ldvp->ldv, 1);
1231 	}
1232 
1233 	MUTEX_EXIT(&ring->lock);
1234 
1235 	/*
1236 	 * Send the packets up the stack.
1237 	 */
1238 	if (mp != NULL) {
1239 		mac_rx_ring(hxgep->mach, ring->rcr_mac_handle, mp,
1240 		    ring->rcr_gen_num);
1241 	}
1242 
1243 	HXGE_DEBUG_MSG((NULL, RX_INT_CTL, "<== hxge_rx_intr"));
1244 	return (DDI_INTR_CLAIMED);
1245 }
1246 
1247 /*
1248  * Enable polling for a ring. Interrupt for the ring is disabled when
1249  * the hxge interrupt comes (see hxge_rx_intr).
1250  */
1251 int
1252 hxge_enable_poll(void *arg)
1253 {
1254 	p_hxge_ring_handle_t	ring_handle = (p_hxge_ring_handle_t)arg;
1255 	p_rx_rcr_ring_t		ringp;
1256 	p_hxge_t		hxgep;
1257 	p_hxge_ldg_t		ldgp;
1258 
1259 	if (ring_handle == NULL) {
1260 		ASSERT(ring_handle != NULL);
1261 		return (1);
1262 	}
1263 
1264 
1265 	hxgep = ring_handle->hxgep;
1266 	ringp = hxgep->rx_rcr_rings->rcr_rings[ring_handle->index];
1267 
1268 	MUTEX_ENTER(&ringp->lock);
1269 
1270 	/*
1271 	 * Are we already polling ?
1272 	 */
1273 	if (ringp->poll_flag) {
1274 		MUTEX_EXIT(&ringp->lock);
1275 		return (1);
1276 	}
1277 
1278 	ldgp = ringp->ldgp;
1279 	if (ldgp == NULL) {
1280 		MUTEX_EXIT(&ringp->lock);
1281 		return (1);
1282 	}
1283 
1284 	/*
1285 	 * Enable polling
1286 	 */
1287 	ringp->poll_flag = B_TRUE;
1288 
1289 	MUTEX_EXIT(&ringp->lock);
1290 	return (0);
1291 }
1292 
1293 /*
1294  * Disable polling for a ring and enable its interrupt.
1295  */
1296 int
1297 hxge_disable_poll(void *arg)
1298 {
1299 	p_hxge_ring_handle_t	ring_handle = (p_hxge_ring_handle_t)arg;
1300 	p_rx_rcr_ring_t		ringp;
1301 	p_hxge_t		hxgep;
1302 
1303 	if (ring_handle == NULL) {
1304 		ASSERT(ring_handle != NULL);
1305 		return (0);
1306 	}
1307 
1308 	hxgep = ring_handle->hxgep;
1309 	ringp = hxgep->rx_rcr_rings->rcr_rings[ring_handle->index];
1310 
1311 	MUTEX_ENTER(&ringp->lock);
1312 
1313 	/*
1314 	 * Disable polling: enable interrupt
1315 	 */
1316 	if (ringp->poll_flag) {
1317 		hpi_handle_t		handle;
1318 		rdc_stat_t		cs;
1319 		p_hxge_ldg_t		ldgp;
1320 
1321 		/*
1322 		 * Get the control and status for this channel.
1323 		 */
1324 		handle = HXGE_DEV_HPI_HANDLE(hxgep);
1325 
1326 		/*
1327 		 * Rearm this logical group if this is a single device
1328 		 * group.
1329 		 */
1330 		ldgp = ringp->ldgp;
1331 		if (ldgp == NULL) {
1332 			MUTEX_EXIT(&ringp->lock);
1333 			return (1);
1334 		}
1335 
1336 		ringp->poll_flag = B_FALSE;
1337 
1338 		/*
1339 		 * Enable mailbox update, to start interrupts again.
1340 		 */
1341 		cs.value = 0ULL;
1342 		cs.bits.mex = 1;
1343 		cs.bits.pktread = 0;
1344 		cs.bits.ptrread = 0;
1345 		RXDMA_REG_WRITE64(handle, RDC_STAT, ringp->rdc, cs.value);
1346 
1347 		if (ldgp->nldvs == 1) {
1348 			/*
1349 			 * Re-arm the group, since it is the only member
1350 			 * of the group.
1351 			 */
1352 			(void) hpi_intr_ldg_mgmt_set(handle, ldgp->ldg, B_TRUE,
1353 			    ldgp->ldg_timer);
1354 		} else {
1355 			/*
1356 			 * Mask-on interrupts for the device and re-arm
1357 			 * the group.
1358 			 */
1359 			(void) hpi_intr_mask_set(handle, ringp->ldvp->ldv, 0);
1360 			(void) hpi_intr_ldg_mgmt_set(handle, ldgp->ldg, B_TRUE,
1361 			    ldgp->ldg_timer);
1362 		}
1363 	}
1364 	MUTEX_EXIT(&ringp->lock);
1365 	return (0);
1366 }
1367 
1368 /*
1369  * Poll 'bytes_to_pickup' bytes of message from the rx ring.
1370  */
1371 mblk_t *
1372 hxge_rx_poll(void *arg, int bytes_to_pickup)
1373 {
1374 	p_hxge_ring_handle_t	rhp = (p_hxge_ring_handle_t)arg;
1375 	p_rx_rcr_ring_t		ring;
1376 	p_hxge_t		hxgep;
1377 	hpi_handle_t		handle;
1378 	rdc_stat_t		cs;
1379 	mblk_t			*mblk;
1380 	p_hxge_ldv_t		ldvp;
1381 
1382 	hxgep = rhp->hxgep;
1383 
1384 	/*
1385 	 * Get the control and status for this channel.
1386 	 */
1387 	handle = HXGE_DEV_HPI_HANDLE(hxgep);
1388 	ring = hxgep->rx_rcr_rings->rcr_rings[rhp->index];
1389 
1390 	MUTEX_ENTER(&ring->lock);
1391 	ASSERT(ring->poll_flag == B_TRUE);
1392 	ASSERT(rhp->started);
1393 
1394 	if (!ring->poll_flag) {
1395 		MUTEX_EXIT(&ring->lock);
1396 		return ((mblk_t *)NULL);
1397 	}
1398 
1399 	/*
1400 	 * Get the control and status bits for the ring.
1401 	 */
1402 	RXDMA_REG_READ64(handle, RDC_STAT, rhp->index, &cs.value);
1403 	cs.bits.ptrread = 0;
1404 	cs.bits.pktread = 0;
1405 	RXDMA_REG_WRITE64(handle, RDC_STAT, rhp->index, cs.value);
1406 
1407 	/*
1408 	 * Process packets.
1409 	 */
1410 	mblk = hxge_rx_pkts(hxgep, ring->ldvp->vdma_index,
1411 	    ring->ldvp, ring, cs, bytes_to_pickup);
1412 	ldvp = ring->ldvp;
1413 
1414 	/*
1415 	 * Process Error Events.
1416 	 */
1417 	if (ldvp && (cs.value & RDC_STAT_ERROR)) {
1418 		/*
1419 		 * Recovery routines will grab the RCR ring lock.
1420 		 */
1421 		MUTEX_EXIT(&ring->lock);
1422 		(void) hxge_rx_err_evnts(hxgep, ldvp->vdma_index, ldvp, cs);
1423 		MUTEX_ENTER(&ring->lock);
1424 	}
1425 
1426 	/*
1427 	 * Clear any control and status bits and update
1428 	 * the hardware.
1429 	 */
1430 	cs.value &= RDC_STAT_WR1C;
1431 	cs.bits.ptrread = 0;
1432 	cs.bits.pktread = 0;
1433 	RXDMA_REG_WRITE64(handle, RDC_STAT, rhp->index, cs.value);
1434 
1435 	MUTEX_EXIT(&ring->lock);
1436 	return (mblk);
1437 }
1438 
1439 /*ARGSUSED*/
1440 mblk_t *
1441 hxge_rx_pkts(p_hxge_t hxgep, uint_t vindex, p_hxge_ldv_t ldvp,
1442     p_rx_rcr_ring_t rcrp, rdc_stat_t cs, int bytes_to_read)
1443 {
1444 	hpi_handle_t		handle;
1445 	uint8_t			channel;
1446 	uint32_t		comp_rd_index;
1447 	p_rcr_entry_t		rcr_desc_rd_head_p;
1448 	p_rcr_entry_t		rcr_desc_rd_head_pp;
1449 	p_mblk_t		nmp, mp_cont, head_mp, *tail_mp;
1450 	uint16_t		qlen, nrcr_read, npkt_read;
1451 	uint32_t		qlen_hw, qlen_sw, num_rcrs;
1452 	uint32_t		invalid_rcr_entry;
1453 	boolean_t		multi;
1454 	rdc_rcr_cfg_b_t		rcr_cfg_b;
1455 	uint64_t		rcr_head_index, rcr_tail_index;
1456 	uint64_t		rcr_tail;
1457 	rdc_rcr_tail_t		rcr_tail_reg;
1458 	p_hxge_rx_ring_stats_t	rdc_stats;
1459 	int			totallen = 0;
1460 
1461 	HXGE_DEBUG_MSG((hxgep, RX_INT_CTL, "==> hxge_rx_pkts:vindex %d "
1462 	    "channel %d", vindex, ldvp->channel));
1463 
1464 	handle = HXGE_DEV_HPI_HANDLE(hxgep);
1465 	channel = rcrp->rdc;
1466 	if (channel != ldvp->channel) {
1467 		HXGE_DEBUG_MSG((hxgep, RX_INT_CTL, "==> hxge_rx_pkts:index %d "
1468 		    "channel %d, and rcr channel %d not matched.",
1469 		    vindex, ldvp->channel, channel));
1470 		return (NULL);
1471 	}
1472 
1473 	HXGE_DEBUG_MSG((hxgep, RX_INT_CTL,
1474 	    "==> hxge_rx_pkts: START: rcr channel %d "
1475 	    "head_p $%p head_pp $%p  index %d ",
1476 	    channel, rcrp->rcr_desc_rd_head_p,
1477 	    rcrp->rcr_desc_rd_head_pp, rcrp->comp_rd_index));
1478 
1479 	(void) hpi_rxdma_rdc_rcr_qlen_get(handle, channel, &qlen);
1480 	RXDMA_REG_READ64(handle, RDC_RCR_TAIL, channel, &rcr_tail_reg.value);
1481 	rcr_tail = rcr_tail_reg.bits.tail;
1482 
1483 	if (!qlen) {
1484 		HXGE_DEBUG_MSG((hxgep, RX_INT_CTL,
1485 		    "<== hxge_rx_pkts:rcr channel %d qlen %d (no pkts)",
1486 		    channel, qlen));
1487 		return (NULL);
1488 	}
1489 
1490 	HXGE_DEBUG_MSG((hxgep, RX_CTL, "==> hxge_rx_pkts:rcr channel %d "
1491 	    "qlen %d", channel, qlen));
1492 
1493 	comp_rd_index = rcrp->comp_rd_index;
1494 
1495 	rcr_desc_rd_head_p = rcrp->rcr_desc_rd_head_p;
1496 	rcr_desc_rd_head_pp = rcrp->rcr_desc_rd_head_pp;
1497 	nrcr_read = npkt_read = 0;
1498 
1499 	if (hxgep->rdc_first_intr[channel])
1500 		qlen_hw = qlen;
1501 	else
1502 		qlen_hw = qlen - 1;
1503 
1504 	head_mp = NULL;
1505 	tail_mp = &head_mp;
1506 	nmp = mp_cont = NULL;
1507 	multi = B_FALSE;
1508 
1509 	rcr_head_index = rcrp->rcr_desc_rd_head_p - rcrp->rcr_desc_first_p;
1510 	rcr_tail_index = rcr_tail - rcrp->rcr_tail_begin;
1511 
1512 	if (rcr_tail_index >= rcr_head_index) {
1513 		num_rcrs = rcr_tail_index - rcr_head_index;
1514 	} else {
1515 		/* rcr_tail has wrapped around */
1516 		num_rcrs = (rcrp->comp_size - rcr_head_index) + rcr_tail_index;
1517 	}
1518 
1519 	qlen_sw = hxge_scan_for_last_eop(rcrp, rcr_desc_rd_head_p, num_rcrs);
1520 	if (!qlen_sw)
1521 		return (NULL);
1522 
1523 	if (qlen_hw > qlen_sw) {
1524 		HXGE_DEBUG_MSG((hxgep, RX_INT_CTL,
1525 		    "Channel %d, rcr_qlen from reg %d and from rcr_tail %d\n",
1526 		    channel, qlen_hw, qlen_sw));
1527 		qlen_hw = qlen_sw;
1528 	}
1529 
1530 	while (qlen_hw) {
1531 #ifdef HXGE_DEBUG
1532 		hxge_dump_rcr_entry(hxgep, rcr_desc_rd_head_p);
1533 #endif
1534 		/*
1535 		 * Process one completion ring entry.
1536 		 */
1537 		invalid_rcr_entry = 0;
1538 		hxge_receive_packet(hxgep,
1539 		    rcrp, rcr_desc_rd_head_p, &multi, &nmp, &mp_cont,
1540 		    &invalid_rcr_entry);
1541 		if (invalid_rcr_entry != 0) {
1542 			rdc_stats = rcrp->rdc_stats;
1543 			rdc_stats->rcr_invalids++;
1544 			HXGE_DEBUG_MSG((hxgep, RX_INT_CTL,
1545 			    "Channel %d could only read 0x%x packets, "
1546 			    "but 0x%x pending\n", channel, npkt_read, qlen_hw));
1547 			break;
1548 		}
1549 
1550 		/*
1551 		 * message chaining modes (nemo msg chaining)
1552 		 */
1553 		if (nmp) {
1554 			nmp->b_next = NULL;
1555 			if (!multi && !mp_cont) { /* frame fits a partition */
1556 				*tail_mp = nmp;
1557 				tail_mp = &nmp->b_next;
1558 				nmp = NULL;
1559 			} else if (multi && !mp_cont) { /* first segment */
1560 				*tail_mp = nmp;
1561 				tail_mp = &nmp->b_cont;
1562 			} else if (multi && mp_cont) {	/* mid of multi segs */
1563 				*tail_mp = mp_cont;
1564 				tail_mp = &mp_cont->b_cont;
1565 			} else if (!multi && mp_cont) { /* last segment */
1566 				*tail_mp = mp_cont;
1567 				tail_mp = &nmp->b_next;
1568 				totallen += MBLKL(mp_cont);
1569 				nmp = NULL;
1570 			}
1571 		}
1572 
1573 		HXGE_DEBUG_MSG((hxgep, RX_INT_CTL,
1574 		    "==> hxge_rx_pkts: loop: rcr channel %d "
1575 		    "before updating: multi %d "
1576 		    "nrcr_read %d "
1577 		    "npk read %d "
1578 		    "head_pp $%p  index %d ",
1579 		    channel, multi,
1580 		    nrcr_read, npkt_read, rcr_desc_rd_head_pp, comp_rd_index));
1581 
1582 		if (!multi) {
1583 			qlen_hw--;
1584 			npkt_read++;
1585 		}
1586 
1587 		/*
1588 		 * Update the next read entry.
1589 		 */
1590 		comp_rd_index = NEXT_ENTRY(comp_rd_index,
1591 		    rcrp->comp_wrap_mask);
1592 
1593 		rcr_desc_rd_head_p = NEXT_ENTRY_PTR(rcr_desc_rd_head_p,
1594 		    rcrp->rcr_desc_first_p, rcrp->rcr_desc_last_p);
1595 
1596 		nrcr_read++;
1597 
1598 		HXGE_DEBUG_MSG((hxgep, RX_INT_CTL,
1599 		    "<== hxge_rx_pkts: (SAM, process one packet) "
1600 		    "nrcr_read %d", nrcr_read));
1601 		HXGE_DEBUG_MSG((hxgep, RX_INT_CTL,
1602 		    "==> hxge_rx_pkts: loop: rcr channel %d "
1603 		    "multi %d nrcr_read %d npk read %d head_pp $%p  index %d ",
1604 		    channel, multi, nrcr_read, npkt_read, rcr_desc_rd_head_pp,
1605 		    comp_rd_index));
1606 
1607 		if ((bytes_to_read != -1) &&
1608 		    (totallen >= bytes_to_read)) {
1609 			break;
1610 		}
1611 	}
1612 
1613 	rcrp->rcr_desc_rd_head_pp = rcr_desc_rd_head_pp;
1614 	rcrp->comp_rd_index = comp_rd_index;
1615 	rcrp->rcr_desc_rd_head_p = rcr_desc_rd_head_p;
1616 
1617 	if ((hxgep->intr_timeout != rcrp->intr_timeout) ||
1618 	    (hxgep->intr_threshold != rcrp->intr_threshold)) {
1619 		rcrp->intr_timeout = hxgep->intr_timeout;
1620 		rcrp->intr_threshold = hxgep->intr_threshold;
1621 		rcr_cfg_b.value = 0x0ULL;
1622 		if (rcrp->intr_timeout)
1623 			rcr_cfg_b.bits.entout = 1;
1624 		rcr_cfg_b.bits.timeout = rcrp->intr_timeout;
1625 		rcr_cfg_b.bits.pthres = rcrp->intr_threshold;
1626 		RXDMA_REG_WRITE64(handle, RDC_RCR_CFG_B,
1627 		    channel, rcr_cfg_b.value);
1628 	}
1629 
1630 	if (hxgep->rdc_first_intr[channel] && (npkt_read > 0)) {
1631 		hxgep->rdc_first_intr[channel] = B_FALSE;
1632 		cs.bits.pktread = npkt_read - 1;
1633 	} else
1634 		cs.bits.pktread = npkt_read;
1635 	cs.bits.ptrread = nrcr_read;
1636 	cs.value &= 0xffffffffULL;
1637 	RXDMA_REG_WRITE64(handle, RDC_STAT, channel, cs.value);
1638 
1639 	HXGE_DEBUG_MSG((hxgep, RX_INT_CTL,
1640 	    "==> hxge_rx_pkts: EXIT: rcr channel %d "
1641 	    "head_pp $%p  index %016llx ",
1642 	    channel, rcrp->rcr_desc_rd_head_pp, rcrp->comp_rd_index));
1643 
1644 	HXGE_DEBUG_MSG((hxgep, RX_INT_CTL, "<== hxge_rx_pkts"));
1645 
1646 	return (head_mp);
1647 }
1648 
1649 #define	RCR_ENTRY_PATTERN	0x5a5a6b6b7c7c8d8dULL
1650 #define	NO_PORT_BIT		0x20
1651 #define	L4_CS_EQ_BIT		0x40
1652 
1653 static uint32_t hxge_scan_for_last_eop(p_rx_rcr_ring_t rcrp,
1654     p_rcr_entry_t rcr_desc_rd_head_p, uint32_t num_rcrs)
1655 {
1656 	uint64_t	rcr_entry;
1657 	uint32_t	rcrs = 0;
1658 	uint32_t	pkts = 0;
1659 
1660 	while (rcrs++ < num_rcrs) {
1661 		rcr_entry = *((uint64_t *)rcr_desc_rd_head_p);
1662 
1663 		if ((rcr_entry == 0x0) || (rcr_entry == RCR_ENTRY_PATTERN))
1664 			break;
1665 
1666 		if (!(rcr_entry & RCR_MULTI_MASK))
1667 			pkts++;
1668 
1669 		rcr_desc_rd_head_p = NEXT_ENTRY_PTR(rcr_desc_rd_head_p,
1670 		    rcrp->rcr_desc_first_p, rcrp->rcr_desc_last_p);
1671 	}
1672 
1673 	return (pkts);
1674 }
1675 
1676 /*ARGSUSED*/
1677 void
1678 hxge_receive_packet(p_hxge_t hxgep, p_rx_rcr_ring_t rcr_p,
1679     p_rcr_entry_t rcr_desc_rd_head_p, boolean_t *multi_p, mblk_t **mp,
1680     mblk_t **mp_cont, uint32_t *invalid_rcr_entry)
1681 {
1682 	p_mblk_t nmp = NULL;
1683 	uint64_t multi;
1684 	uint8_t channel;
1685 	boolean_t first_entry = B_TRUE;
1686 	boolean_t is_tcp_udp = B_FALSE;
1687 	boolean_t buffer_free = B_FALSE;
1688 	boolean_t error_send_up = B_FALSE;
1689 	uint8_t error_type;
1690 	uint16_t l2_len;
1691 	uint16_t skip_len;
1692 	uint8_t pktbufsz_type;
1693 	uint64_t rcr_entry;
1694 	uint64_t *pkt_buf_addr_pp;
1695 	uint64_t *pkt_buf_addr_p;
1696 	uint32_t buf_offset;
1697 	uint32_t bsize;
1698 	uint32_t msg_index;
1699 	p_rx_rbr_ring_t rx_rbr_p;
1700 	p_rx_msg_t *rx_msg_ring_p;
1701 	p_rx_msg_t rx_msg_p;
1702 	uint16_t sw_offset_bytes = 0, hdr_size = 0;
1703 	hxge_status_t status = HXGE_OK;
1704 	boolean_t is_valid = B_FALSE;
1705 	p_hxge_rx_ring_stats_t rdc_stats;
1706 	uint32_t bytes_read;
1707 	uint8_t header0 = 0;
1708 	uint8_t header1 = 0;
1709 	uint64_t pkt_type;
1710 	uint8_t no_port_bit = 0;
1711 	uint8_t l4_cs_eq_bit = 0;
1712 
1713 	channel = rcr_p->rdc;
1714 
1715 	HXGE_DEBUG_MSG((hxgep, RX2_CTL, "==> hxge_receive_packet"));
1716 
1717 	first_entry = (*mp == NULL) ? B_TRUE : B_FALSE;
1718 	rcr_entry = *((uint64_t *)rcr_desc_rd_head_p);
1719 
1720 	/* Verify the content of the rcr_entry for a hardware bug workaround */
1721 	if ((rcr_entry == 0x0) || (rcr_entry == RCR_ENTRY_PATTERN)) {
1722 		*invalid_rcr_entry = 1;
1723 		HXGE_DEBUG_MSG((hxgep, RX2_CTL, "hxge_receive_packet "
1724 		    "Channel %d invalid RCR entry 0x%llx found, returning\n",
1725 		    channel, (long long) rcr_entry));
1726 		return;
1727 	}
1728 	*((uint64_t *)rcr_desc_rd_head_p) = RCR_ENTRY_PATTERN;
1729 
1730 	multi = (rcr_entry & RCR_MULTI_MASK);
1731 	pkt_type = (rcr_entry & RCR_PKT_TYPE_MASK);
1732 
1733 	error_type = ((rcr_entry & RCR_ERROR_MASK) >> RCR_ERROR_SHIFT);
1734 	l2_len = ((rcr_entry & RCR_L2_LEN_MASK) >> RCR_L2_LEN_SHIFT);
1735 
1736 	/*
1737 	 * Hardware does not strip the CRC due bug ID 11451 where
1738 	 * the hardware mis handles minimum size packets.
1739 	 */
1740 	l2_len -= ETHERFCSL;
1741 
1742 	pktbufsz_type = ((rcr_entry & RCR_PKTBUFSZ_MASK) >>
1743 	    RCR_PKTBUFSZ_SHIFT);
1744 #if defined(__i386)
1745 	pkt_buf_addr_pp = (uint64_t *)(uint32_t)((rcr_entry &
1746 	    RCR_PKT_BUF_ADDR_MASK) << RCR_PKT_BUF_ADDR_SHIFT);
1747 #else
1748 	pkt_buf_addr_pp = (uint64_t *)((rcr_entry & RCR_PKT_BUF_ADDR_MASK) <<
1749 	    RCR_PKT_BUF_ADDR_SHIFT);
1750 #endif
1751 
1752 	HXGE_DEBUG_MSG((hxgep, RX2_CTL,
1753 	    "==> hxge_receive_packet: entryp $%p entry 0x%0llx "
1754 	    "pkt_buf_addr_pp $%p l2_len %d multi %d "
1755 	    "error_type 0x%x pktbufsz_type %d ",
1756 	    rcr_desc_rd_head_p, rcr_entry, pkt_buf_addr_pp, l2_len,
1757 	    multi, error_type, pktbufsz_type));
1758 
1759 	HXGE_DEBUG_MSG((hxgep, RX2_CTL,
1760 	    "==> hxge_receive_packet: entryp $%p entry 0x%0llx "
1761 	    "pkt_buf_addr_pp $%p l2_len %d multi %d "
1762 	    "error_type 0x%x ", rcr_desc_rd_head_p,
1763 	    rcr_entry, pkt_buf_addr_pp, l2_len, multi, error_type));
1764 
1765 	HXGE_DEBUG_MSG((hxgep, RX2_CTL,
1766 	    "==> (rbr) hxge_receive_packet: entry 0x%0llx "
1767 	    "full pkt_buf_addr_pp $%p l2_len %d",
1768 	    rcr_entry, pkt_buf_addr_pp, l2_len));
1769 
1770 	/* get the stats ptr */
1771 	rdc_stats = rcr_p->rdc_stats;
1772 
1773 	if (!l2_len) {
1774 		HXGE_DEBUG_MSG((hxgep, RX_CTL,
1775 		    "<== hxge_receive_packet: failed: l2 length is 0."));
1776 		return;
1777 	}
1778 
1779 	/* shift 6 bits to get the full io address */
1780 #if defined(__i386)
1781 	pkt_buf_addr_pp = (uint64_t *)((uint32_t)pkt_buf_addr_pp <<
1782 	    RCR_PKT_BUF_ADDR_SHIFT_FULL);
1783 #else
1784 	pkt_buf_addr_pp = (uint64_t *)((uint64_t)pkt_buf_addr_pp <<
1785 	    RCR_PKT_BUF_ADDR_SHIFT_FULL);
1786 #endif
1787 	HXGE_DEBUG_MSG((hxgep, RX2_CTL,
1788 	    "==> (rbr) hxge_receive_packet: entry 0x%0llx "
1789 	    "full pkt_buf_addr_pp $%p l2_len %d",
1790 	    rcr_entry, pkt_buf_addr_pp, l2_len));
1791 
1792 	rx_rbr_p = rcr_p->rx_rbr_p;
1793 	rx_msg_ring_p = rx_rbr_p->rx_msg_ring;
1794 
1795 	if (first_entry) {
1796 		hdr_size = (rcr_p->full_hdr_flag ? RXDMA_HDR_SIZE_FULL :
1797 		    RXDMA_HDR_SIZE_DEFAULT);
1798 
1799 		HXGE_DEBUG_MSG((hxgep, RX_CTL,
1800 		    "==> hxge_receive_packet: first entry 0x%016llx "
1801 		    "pkt_buf_addr_pp $%p l2_len %d hdr %d",
1802 		    rcr_entry, pkt_buf_addr_pp, l2_len, hdr_size));
1803 	}
1804 
1805 	MUTEX_ENTER(&rx_rbr_p->lock);
1806 
1807 	HXGE_DEBUG_MSG((hxgep, RX_CTL,
1808 	    "==> (rbr 1) hxge_receive_packet: entry 0x%0llx "
1809 	    "full pkt_buf_addr_pp $%p l2_len %d",
1810 	    rcr_entry, pkt_buf_addr_pp, l2_len));
1811 
1812 	/*
1813 	 * Packet buffer address in the completion entry points to the starting
1814 	 * buffer address (offset 0). Use the starting buffer address to locate
1815 	 * the corresponding kernel address.
1816 	 */
1817 	status = hxge_rxbuf_pp_to_vp(hxgep, rx_rbr_p,
1818 	    pktbufsz_type, pkt_buf_addr_pp, &pkt_buf_addr_p,
1819 	    &buf_offset, &msg_index);
1820 
1821 	HXGE_DEBUG_MSG((hxgep, RX_CTL,
1822 	    "==> (rbr 2) hxge_receive_packet: entry 0x%0llx "
1823 	    "full pkt_buf_addr_pp $%p l2_len %d",
1824 	    rcr_entry, pkt_buf_addr_pp, l2_len));
1825 
1826 	if (status != HXGE_OK) {
1827 		MUTEX_EXIT(&rx_rbr_p->lock);
1828 		HXGE_DEBUG_MSG((hxgep, RX_CTL,
1829 		    "<== hxge_receive_packet: found vaddr failed %d", status));
1830 		return;
1831 	}
1832 
1833 	HXGE_DEBUG_MSG((hxgep, RX2_CTL,
1834 	    "==> (rbr 3) hxge_receive_packet: entry 0x%0llx "
1835 	    "full pkt_buf_addr_pp $%p l2_len %d",
1836 	    rcr_entry, pkt_buf_addr_pp, l2_len));
1837 	HXGE_DEBUG_MSG((hxgep, RX2_CTL,
1838 	    "==> (rbr 4 msgindex %d) hxge_receive_packet: entry 0x%0llx "
1839 	    "full pkt_buf_addr_pp $%p l2_len %d",
1840 	    msg_index, rcr_entry, pkt_buf_addr_pp, l2_len));
1841 
1842 	if (msg_index >= rx_rbr_p->tnblocks) {
1843 		MUTEX_EXIT(&rx_rbr_p->lock);
1844 		HXGE_DEBUG_MSG((hxgep, RX2_CTL,
1845 		    "==> hxge_receive_packet: FATAL msg_index (%d) "
1846 		    "should be smaller than tnblocks (%d)\n",
1847 		    msg_index, rx_rbr_p->tnblocks));
1848 		return;
1849 	}
1850 
1851 	rx_msg_p = rx_msg_ring_p[msg_index];
1852 
1853 	HXGE_DEBUG_MSG((hxgep, RX2_CTL,
1854 	    "==> (rbr 4 msgindex %d) hxge_receive_packet: entry 0x%0llx "
1855 	    "full pkt_buf_addr_pp $%p l2_len %d",
1856 	    msg_index, rcr_entry, pkt_buf_addr_pp, l2_len));
1857 
1858 	switch (pktbufsz_type) {
1859 	case RCR_PKTBUFSZ_0:
1860 		bsize = rx_rbr_p->pkt_buf_size0_bytes;
1861 		HXGE_DEBUG_MSG((hxgep, RX2_CTL,
1862 		    "==> hxge_receive_packet: 0 buf %d", bsize));
1863 		break;
1864 	case RCR_PKTBUFSZ_1:
1865 		bsize = rx_rbr_p->pkt_buf_size1_bytes;
1866 		HXGE_DEBUG_MSG((hxgep, RX2_CTL,
1867 		    "==> hxge_receive_packet: 1 buf %d", bsize));
1868 		break;
1869 	case RCR_PKTBUFSZ_2:
1870 		bsize = rx_rbr_p->pkt_buf_size2_bytes;
1871 		HXGE_DEBUG_MSG((hxgep, RX_CTL,
1872 		    "==> hxge_receive_packet: 2 buf %d", bsize));
1873 		break;
1874 	case RCR_SINGLE_BLOCK:
1875 		bsize = rx_msg_p->block_size;
1876 		HXGE_DEBUG_MSG((hxgep, RX2_CTL,
1877 		    "==> hxge_receive_packet: single %d", bsize));
1878 
1879 		break;
1880 	default:
1881 		MUTEX_EXIT(&rx_rbr_p->lock);
1882 		return;
1883 	}
1884 
1885 	DMA_COMMON_SYNC_OFFSET(rx_msg_p->buf_dma,
1886 	    (buf_offset + sw_offset_bytes), (hdr_size + l2_len),
1887 	    DDI_DMA_SYNC_FORCPU);
1888 
1889 	HXGE_DEBUG_MSG((hxgep, RX2_CTL,
1890 	    "==> hxge_receive_packet: after first dump:usage count"));
1891 
1892 	if (rx_msg_p->cur_usage_cnt == 0) {
1893 		atomic_inc_32(&rx_rbr_p->rbr_used);
1894 		if (rx_rbr_p->rbr_use_bcopy) {
1895 			atomic_inc_32(&rx_rbr_p->rbr_consumed);
1896 			if (rx_rbr_p->rbr_consumed <
1897 			    rx_rbr_p->rbr_threshold_hi) {
1898 				if (rx_rbr_p->rbr_threshold_lo == 0 ||
1899 				    ((rx_rbr_p->rbr_consumed >=
1900 				    rx_rbr_p->rbr_threshold_lo) &&
1901 				    (rx_rbr_p->rbr_bufsize_type >=
1902 				    pktbufsz_type))) {
1903 					rx_msg_p->rx_use_bcopy = B_TRUE;
1904 				}
1905 			} else {
1906 				rx_msg_p->rx_use_bcopy = B_TRUE;
1907 			}
1908 		}
1909 		HXGE_DEBUG_MSG((hxgep, RX2_CTL,
1910 		    "==> hxge_receive_packet: buf %d (new block) ", bsize));
1911 
1912 		rx_msg_p->pkt_buf_size_code = pktbufsz_type;
1913 		rx_msg_p->pkt_buf_size = bsize;
1914 		rx_msg_p->cur_usage_cnt = 1;
1915 		if (pktbufsz_type == RCR_SINGLE_BLOCK) {
1916 			HXGE_DEBUG_MSG((hxgep, RX2_CTL,
1917 			    "==> hxge_receive_packet: buf %d (single block) ",
1918 			    bsize));
1919 			/*
1920 			 * Buffer can be reused once the free function is
1921 			 * called.
1922 			 */
1923 			rx_msg_p->max_usage_cnt = 1;
1924 			buffer_free = B_TRUE;
1925 		} else {
1926 			rx_msg_p->max_usage_cnt = rx_msg_p->block_size / bsize;
1927 			if (rx_msg_p->max_usage_cnt == 1) {
1928 				buffer_free = B_TRUE;
1929 			}
1930 		}
1931 	} else {
1932 		rx_msg_p->cur_usage_cnt++;
1933 		if (rx_msg_p->cur_usage_cnt == rx_msg_p->max_usage_cnt) {
1934 			buffer_free = B_TRUE;
1935 		}
1936 	}
1937 
1938 	HXGE_DEBUG_MSG((hxgep, RX_CTL,
1939 	    "msgbuf index = %d l2len %d bytes usage %d max_usage %d ",
1940 	    msg_index, l2_len,
1941 	    rx_msg_p->cur_usage_cnt, rx_msg_p->max_usage_cnt));
1942 
1943 	if (error_type) {
1944 		rdc_stats->ierrors++;
1945 		/* Update error stats */
1946 		rdc_stats->errlog.compl_err_type = error_type;
1947 		HXGE_FM_REPORT_ERROR(hxgep, NULL, HXGE_FM_EREPORT_RDMC_RCR_ERR);
1948 
1949 		if (error_type & RCR_CTRL_FIFO_DED) {
1950 			rdc_stats->ctrl_fifo_ecc_err++;
1951 			HXGE_ERROR_MSG((hxgep, HXGE_ERR_CTL,
1952 			    " hxge_receive_packet: "
1953 			    " channel %d RCR ctrl_fifo_ded error", channel));
1954 		} else if (error_type & RCR_DATA_FIFO_DED) {
1955 			rdc_stats->data_fifo_ecc_err++;
1956 			HXGE_ERROR_MSG((hxgep, HXGE_ERR_CTL,
1957 			    " hxge_receive_packet: channel %d"
1958 			    " RCR data_fifo_ded error", channel));
1959 		}
1960 
1961 		/*
1962 		 * Update and repost buffer block if max usage count is
1963 		 * reached.
1964 		 */
1965 		if (error_send_up == B_FALSE) {
1966 			atomic_inc_32(&rx_msg_p->ref_cnt);
1967 			if (buffer_free == B_TRUE) {
1968 				rx_msg_p->free = B_TRUE;
1969 			}
1970 
1971 			MUTEX_EXIT(&rx_rbr_p->lock);
1972 			hxge_freeb(rx_msg_p);
1973 			return;
1974 		}
1975 	}
1976 
1977 	HXGE_DEBUG_MSG((hxgep, RX2_CTL,
1978 	    "==> hxge_receive_packet: DMA sync second "));
1979 
1980 	bytes_read = rcr_p->rcvd_pkt_bytes;
1981 	skip_len = sw_offset_bytes + hdr_size;
1982 
1983 	if (first_entry) {
1984 		header0 = rx_msg_p->buffer[buf_offset];
1985 		no_port_bit = header0 & NO_PORT_BIT;
1986 		header1 = rx_msg_p->buffer[buf_offset + 1];
1987 		l4_cs_eq_bit = header1 & L4_CS_EQ_BIT;
1988 	}
1989 
1990 	if (!rx_msg_p->rx_use_bcopy) {
1991 		/*
1992 		 * For loaned up buffers, the driver reference count
1993 		 * will be incremented first and then the free state.
1994 		 */
1995 		if ((nmp = hxge_dupb(rx_msg_p, buf_offset, bsize)) != NULL) {
1996 			if (first_entry) {
1997 				nmp->b_rptr = &nmp->b_rptr[skip_len];
1998 				if (l2_len < bsize - skip_len) {
1999 					nmp->b_wptr = &nmp->b_rptr[l2_len];
2000 				} else {
2001 					nmp->b_wptr = &nmp->b_rptr[bsize
2002 					    - skip_len];
2003 				}
2004 			} else {
2005 				if (l2_len - bytes_read < bsize) {
2006 					nmp->b_wptr =
2007 					    &nmp->b_rptr[l2_len - bytes_read];
2008 				} else {
2009 					nmp->b_wptr = &nmp->b_rptr[bsize];
2010 				}
2011 			}
2012 		}
2013 	} else {
2014 		if (first_entry) {
2015 			nmp = hxge_dupb_bcopy(rx_msg_p, buf_offset + skip_len,
2016 			    l2_len < bsize - skip_len ?
2017 			    l2_len : bsize - skip_len);
2018 		} else {
2019 			nmp = hxge_dupb_bcopy(rx_msg_p, buf_offset,
2020 			    l2_len - bytes_read < bsize ?
2021 			    l2_len - bytes_read : bsize);
2022 		}
2023 	}
2024 
2025 	if (nmp != NULL) {
2026 		if (first_entry)
2027 			bytes_read  = nmp->b_wptr - nmp->b_rptr;
2028 		else
2029 			bytes_read += nmp->b_wptr - nmp->b_rptr;
2030 
2031 		HXGE_DEBUG_MSG((hxgep, RX_CTL,
2032 		    "==> hxge_receive_packet after dupb: "
2033 		    "rbr consumed %d "
2034 		    "pktbufsz_type %d "
2035 		    "nmp $%p rptr $%p wptr $%p "
2036 		    "buf_offset %d bzise %d l2_len %d skip_len %d",
2037 		    rx_rbr_p->rbr_consumed,
2038 		    pktbufsz_type,
2039 		    nmp, nmp->b_rptr, nmp->b_wptr,
2040 		    buf_offset, bsize, l2_len, skip_len));
2041 	} else {
2042 		cmn_err(CE_WARN, "!hxge_receive_packet: update stats (error)");
2043 
2044 		atomic_inc_32(&rx_msg_p->ref_cnt);
2045 		if (buffer_free == B_TRUE) {
2046 			rx_msg_p->free = B_TRUE;
2047 		}
2048 
2049 		MUTEX_EXIT(&rx_rbr_p->lock);
2050 		hxge_freeb(rx_msg_p);
2051 		return;
2052 	}
2053 
2054 	if (buffer_free == B_TRUE) {
2055 		rx_msg_p->free = B_TRUE;
2056 	}
2057 
2058 	/*
2059 	 * ERROR, FRAG and PKT_TYPE are only reported in the first entry. If a
2060 	 * packet is not fragmented and no error bit is set, then L4 checksum
2061 	 * is OK.
2062 	 */
2063 	is_valid = (nmp != NULL);
2064 	if (first_entry) {
2065 		rdc_stats->ipackets++; /* count only 1st seg for jumbo */
2066 		if (l2_len > (STD_FRAME_SIZE - ETHERFCSL))
2067 			rdc_stats->jumbo_pkts++;
2068 		rdc_stats->ibytes += skip_len + l2_len < bsize ?
2069 		    l2_len : bsize;
2070 	} else {
2071 		/*
2072 		 * Add the current portion of the packet to the kstats.
2073 		 * The current portion of the packet is calculated by using
2074 		 * length of the packet and the previously received portion.
2075 		 */
2076 		rdc_stats->ibytes += l2_len - rcr_p->rcvd_pkt_bytes < bsize ?
2077 		    l2_len - rcr_p->rcvd_pkt_bytes : bsize;
2078 	}
2079 
2080 	rcr_p->rcvd_pkt_bytes = bytes_read;
2081 
2082 	if (rx_msg_p->free && rx_msg_p->rx_use_bcopy) {
2083 		atomic_inc_32(&rx_msg_p->ref_cnt);
2084 		MUTEX_EXIT(&rx_rbr_p->lock);
2085 		hxge_freeb(rx_msg_p);
2086 	} else
2087 		MUTEX_EXIT(&rx_rbr_p->lock);
2088 
2089 	if (is_valid) {
2090 		nmp->b_cont = NULL;
2091 		if (first_entry) {
2092 			*mp = nmp;
2093 			*mp_cont = NULL;
2094 		} else {
2095 			*mp_cont = nmp;
2096 		}
2097 	}
2098 
2099 	/*
2100 	 * Update stats and hardware checksuming.
2101 	 */
2102 	if (is_valid && !multi) {
2103 		is_tcp_udp = ((pkt_type == RCR_PKT_IS_TCP ||
2104 		    pkt_type == RCR_PKT_IS_UDP) ? B_TRUE : B_FALSE);
2105 
2106 		if (!no_port_bit && l4_cs_eq_bit && is_tcp_udp && !error_type) {
2107 			(void) hcksum_assoc(nmp, NULL, NULL, 0, 0, 0, 0,
2108 			    HCK_FULLCKSUM_OK | HCK_FULLCKSUM, 0);
2109 
2110 			HXGE_DEBUG_MSG((hxgep, RX_CTL,
2111 			    "==> hxge_receive_packet: Full tcp/udp cksum "
2112 			    "is_valid 0x%x multi %d error %d",
2113 			    is_valid, multi, error_type));
2114 		}
2115 	}
2116 
2117 	HXGE_DEBUG_MSG((hxgep, RX2_CTL,
2118 	    "==> hxge_receive_packet: *mp 0x%016llx", *mp));
2119 
2120 	*multi_p = (multi == RCR_MULTI_MASK);
2121 
2122 	HXGE_DEBUG_MSG((hxgep, RX_CTL, "<== hxge_receive_packet: "
2123 	    "multi %d nmp 0x%016llx *mp 0x%016llx *mp_cont 0x%016llx",
2124 	    *multi_p, nmp, *mp, *mp_cont));
2125 }
2126 
2127 static void
2128 hxge_rx_rbr_empty_recover(p_hxge_t hxgep, uint8_t channel)
2129 {
2130 	hpi_handle_t	handle;
2131 	p_rx_rcr_ring_t	rcrp;
2132 	p_rx_rbr_ring_t	rbrp;
2133 
2134 	rcrp = hxgep->rx_rcr_rings->rcr_rings[channel];
2135 	rbrp = rcrp->rx_rbr_p;
2136 	handle = HXGE_DEV_HPI_HANDLE(hxgep);
2137 
2138 	/*
2139 	 * Wait for the channel to be quiet
2140 	 */
2141 	(void) hpi_rxdma_cfg_rdc_wait_for_qst(handle, channel);
2142 
2143 	/*
2144 	 * Post page will accumulate some buffers before re-enabling
2145 	 * the DMA channel.
2146 	 */
2147 
2148 	MUTEX_ENTER(&rbrp->post_lock);
2149 	if ((rbrp->rbb_max - rbrp->rbr_used) >= HXGE_RBR_EMPTY_THRESHOLD) {
2150 		hxge_rbr_empty_restore(hxgep, rbrp);
2151 	} else {
2152 		rbrp->rbr_is_empty = B_TRUE;
2153 	}
2154 	MUTEX_EXIT(&rbrp->post_lock);
2155 }
2156 
2157 
2158 /*ARGSUSED*/
2159 static hxge_status_t
2160 hxge_rx_err_evnts(p_hxge_t hxgep, uint_t index, p_hxge_ldv_t ldvp,
2161     rdc_stat_t cs)
2162 {
2163 	p_hxge_rx_ring_stats_t	rdc_stats;
2164 	hpi_handle_t		handle;
2165 	boolean_t		rxchan_fatal = B_FALSE;
2166 	uint8_t			channel;
2167 	hxge_status_t		status = HXGE_OK;
2168 	uint64_t		cs_val;
2169 
2170 	HXGE_DEBUG_MSG((hxgep, INT_CTL, "==> hxge_rx_err_evnts"));
2171 
2172 	handle = HXGE_DEV_HPI_HANDLE(hxgep);
2173 	channel = ldvp->channel;
2174 
2175 	/* Clear the interrupts */
2176 	cs.bits.pktread = 0;
2177 	cs.bits.ptrread = 0;
2178 	cs_val = cs.value & RDC_STAT_WR1C;
2179 	RXDMA_REG_WRITE64(handle, RDC_STAT, channel, cs_val);
2180 
2181 	rdc_stats = &hxgep->statsp->rdc_stats[ldvp->vdma_index];
2182 
2183 	if (cs.bits.rbr_cpl_to) {
2184 		rdc_stats->rbr_tmout++;
2185 		HXGE_FM_REPORT_ERROR(hxgep, channel,
2186 		    HXGE_FM_EREPORT_RDMC_RBR_CPL_TO);
2187 		rxchan_fatal = B_TRUE;
2188 		HXGE_ERROR_MSG((hxgep, HXGE_ERR_CTL,
2189 		    "==> hxge_rx_err_evnts(channel %d): "
2190 		    "fatal error: rx_rbr_timeout", channel));
2191 	}
2192 
2193 	if ((cs.bits.rcr_shadow_par_err) || (cs.bits.rbr_prefetch_par_err)) {
2194 		(void) hpi_rxdma_ring_perr_stat_get(handle,
2195 		    &rdc_stats->errlog.pre_par, &rdc_stats->errlog.sha_par);
2196 	}
2197 
2198 	if (cs.bits.rcr_shadow_par_err) {
2199 		rdc_stats->rcr_sha_par++;
2200 		HXGE_FM_REPORT_ERROR(hxgep, channel,
2201 		    HXGE_FM_EREPORT_RDMC_RCR_SHA_PAR);
2202 		rxchan_fatal = B_TRUE;
2203 		HXGE_ERROR_MSG((hxgep, HXGE_ERR_CTL,
2204 		    "==> hxge_rx_err_evnts(channel %d): "
2205 		    "fatal error: rcr_shadow_par_err", channel));
2206 	}
2207 
2208 	if (cs.bits.rbr_prefetch_par_err) {
2209 		rdc_stats->rbr_pre_par++;
2210 		HXGE_FM_REPORT_ERROR(hxgep, channel,
2211 		    HXGE_FM_EREPORT_RDMC_RBR_PRE_PAR);
2212 		rxchan_fatal = B_TRUE;
2213 		HXGE_ERROR_MSG((hxgep, HXGE_ERR_CTL,
2214 		    "==> hxge_rx_err_evnts(channel %d): "
2215 		    "fatal error: rbr_prefetch_par_err", channel));
2216 	}
2217 
2218 	if (cs.bits.rbr_pre_empty) {
2219 		rdc_stats->rbr_pre_empty++;
2220 		HXGE_FM_REPORT_ERROR(hxgep, channel,
2221 		    HXGE_FM_EREPORT_RDMC_RBR_PRE_EMPTY);
2222 		rxchan_fatal = B_TRUE;
2223 		HXGE_ERROR_MSG((hxgep, HXGE_ERR_CTL,
2224 		    "==> hxge_rx_err_evnts(channel %d): "
2225 		    "fatal error: rbr_pre_empty", channel));
2226 	}
2227 
2228 	if (cs.bits.peu_resp_err) {
2229 		rdc_stats->peu_resp_err++;
2230 		HXGE_FM_REPORT_ERROR(hxgep, channel,
2231 		    HXGE_FM_EREPORT_RDMC_PEU_RESP_ERR);
2232 		rxchan_fatal = B_TRUE;
2233 		HXGE_ERROR_MSG((hxgep, HXGE_ERR_CTL,
2234 		    "==> hxge_rx_err_evnts(channel %d): "
2235 		    "fatal error: peu_resp_err", channel));
2236 	}
2237 
2238 	if (cs.bits.rcr_thres) {
2239 		rdc_stats->rcr_thres++;
2240 	}
2241 
2242 	if (cs.bits.rcr_to) {
2243 		rdc_stats->rcr_to++;
2244 	}
2245 
2246 	if (cs.bits.rcr_shadow_full) {
2247 		rdc_stats->rcr_shadow_full++;
2248 		HXGE_FM_REPORT_ERROR(hxgep, channel,
2249 		    HXGE_FM_EREPORT_RDMC_RCR_SHA_FULL);
2250 		rxchan_fatal = B_TRUE;
2251 		HXGE_ERROR_MSG((hxgep, HXGE_ERR_CTL,
2252 		    "==> hxge_rx_err_evnts(channel %d): "
2253 		    "fatal error: rcr_shadow_full", channel));
2254 	}
2255 
2256 	if (cs.bits.rcr_full) {
2257 		rdc_stats->rcrfull++;
2258 		HXGE_FM_REPORT_ERROR(hxgep, channel,
2259 		    HXGE_FM_EREPORT_RDMC_RCRFULL);
2260 		rxchan_fatal = B_TRUE;
2261 		HXGE_ERROR_MSG((hxgep, HXGE_ERR_CTL,
2262 		    "==> hxge_rx_err_evnts(channel %d): "
2263 		    "fatal error: rcrfull error", channel));
2264 	}
2265 
2266 	if (cs.bits.rbr_empty) {
2267 		rdc_stats->rbr_empty++;
2268 		hxge_rx_rbr_empty_recover(hxgep, channel);
2269 	}
2270 
2271 	if (cs.bits.rbr_full) {
2272 		rdc_stats->rbrfull++;
2273 		HXGE_FM_REPORT_ERROR(hxgep, channel,
2274 		    HXGE_FM_EREPORT_RDMC_RBRFULL);
2275 		rxchan_fatal = B_TRUE;
2276 		HXGE_ERROR_MSG((hxgep, HXGE_ERR_CTL,
2277 		    "==> hxge_rx_err_evnts(channel %d): "
2278 		    "fatal error: rbr_full error", channel));
2279 	}
2280 
2281 	if (rxchan_fatal) {
2282 		p_rx_rcr_ring_t	rcrp;
2283 		p_rx_rbr_ring_t rbrp;
2284 
2285 		rcrp = hxgep->rx_rcr_rings->rcr_rings[channel];
2286 		rbrp = rcrp->rx_rbr_p;
2287 
2288 		HXGE_ERROR_MSG((hxgep, HXGE_ERR_CTL,
2289 		    " hxge_rx_err_evnts: fatal error on Channel #%d\n",
2290 		    channel));
2291 
2292 		MUTEX_ENTER(&rbrp->post_lock);
2293 		/* This function needs to be inside the post_lock */
2294 		status = hxge_rxdma_fatal_err_recover(hxgep, channel);
2295 		MUTEX_EXIT(&rbrp->post_lock);
2296 		if (status == HXGE_OK) {
2297 			FM_SERVICE_RESTORED(hxgep);
2298 		}
2299 	}
2300 
2301 	HXGE_DEBUG_MSG((hxgep, INT_CTL, "<== hxge_rx_err_evnts"));
2302 	return (status);
2303 }
2304 
2305 static hxge_status_t
2306 hxge_map_rxdma(p_hxge_t hxgep)
2307 {
2308 	int			i, ndmas;
2309 	uint16_t		channel;
2310 	p_rx_rbr_rings_t	rx_rbr_rings;
2311 	p_rx_rbr_ring_t		*rbr_rings;
2312 	p_rx_rcr_rings_t	rx_rcr_rings;
2313 	p_rx_rcr_ring_t		*rcr_rings;
2314 	p_rx_mbox_areas_t	rx_mbox_areas_p;
2315 	p_rx_mbox_t		*rx_mbox_p;
2316 	p_hxge_dma_pool_t	dma_buf_poolp;
2317 	p_hxge_dma_common_t	*dma_buf_p;
2318 	p_hxge_dma_pool_t	dma_rbr_cntl_poolp;
2319 	p_hxge_dma_common_t	*dma_rbr_cntl_p;
2320 	p_hxge_dma_pool_t	dma_rcr_cntl_poolp;
2321 	p_hxge_dma_common_t	*dma_rcr_cntl_p;
2322 	p_hxge_dma_pool_t	dma_mbox_cntl_poolp;
2323 	p_hxge_dma_common_t	*dma_mbox_cntl_p;
2324 	uint32_t		*num_chunks;
2325 	hxge_status_t		status = HXGE_OK;
2326 
2327 	HXGE_DEBUG_MSG((hxgep, MEM2_CTL, "==> hxge_map_rxdma"));
2328 
2329 	dma_buf_poolp = hxgep->rx_buf_pool_p;
2330 	dma_rbr_cntl_poolp = hxgep->rx_rbr_cntl_pool_p;
2331 	dma_rcr_cntl_poolp = hxgep->rx_rcr_cntl_pool_p;
2332 	dma_mbox_cntl_poolp = hxgep->rx_mbox_cntl_pool_p;
2333 
2334 	if (!dma_buf_poolp->buf_allocated ||
2335 	    !dma_rbr_cntl_poolp->buf_allocated ||
2336 	    !dma_rcr_cntl_poolp->buf_allocated ||
2337 	    !dma_mbox_cntl_poolp->buf_allocated) {
2338 		HXGE_ERROR_MSG((hxgep, HXGE_ERR_CTL,
2339 		    "<== hxge_map_rxdma: buf not allocated"));
2340 		return (HXGE_ERROR);
2341 	}
2342 
2343 	ndmas = dma_buf_poolp->ndmas;
2344 	if (!ndmas) {
2345 		HXGE_DEBUG_MSG((hxgep, RX_CTL,
2346 		    "<== hxge_map_rxdma: no dma allocated"));
2347 		return (HXGE_ERROR);
2348 	}
2349 
2350 	num_chunks = dma_buf_poolp->num_chunks;
2351 	dma_buf_p = dma_buf_poolp->dma_buf_pool_p;
2352 	dma_rbr_cntl_p = dma_rbr_cntl_poolp->dma_buf_pool_p;
2353 	dma_rcr_cntl_p = dma_rcr_cntl_poolp->dma_buf_pool_p;
2354 	dma_mbox_cntl_p = dma_mbox_cntl_poolp->dma_buf_pool_p;
2355 
2356 	rx_rbr_rings = (p_rx_rbr_rings_t)
2357 	    KMEM_ZALLOC(sizeof (rx_rbr_rings_t), KM_SLEEP);
2358 	rbr_rings = (p_rx_rbr_ring_t *)KMEM_ZALLOC(
2359 	    sizeof (p_rx_rbr_ring_t) * ndmas, KM_SLEEP);
2360 
2361 	rx_rcr_rings = (p_rx_rcr_rings_t)
2362 	    KMEM_ZALLOC(sizeof (rx_rcr_rings_t), KM_SLEEP);
2363 	rcr_rings = (p_rx_rcr_ring_t *)KMEM_ZALLOC(
2364 	    sizeof (p_rx_rcr_ring_t) * ndmas, KM_SLEEP);
2365 
2366 	rx_mbox_areas_p = (p_rx_mbox_areas_t)
2367 	    KMEM_ZALLOC(sizeof (rx_mbox_areas_t), KM_SLEEP);
2368 	rx_mbox_p = (p_rx_mbox_t *)KMEM_ZALLOC(
2369 	    sizeof (p_rx_mbox_t) * ndmas, KM_SLEEP);
2370 
2371 	/*
2372 	 * Timeout should be set based on the system clock divider.
2373 	 * The following timeout value of 1 assumes that the
2374 	 * granularity (1000) is 3 microseconds running at 300MHz.
2375 	 */
2376 
2377 	hxgep->intr_threshold = RXDMA_RCR_PTHRES_DEFAULT;
2378 	hxgep->intr_timeout = RXDMA_RCR_TO_DEFAULT;
2379 
2380 	/*
2381 	 * Map descriptors from the buffer polls for each dam channel.
2382 	 */
2383 	for (i = 0; i < ndmas; i++) {
2384 		/*
2385 		 * Set up and prepare buffer blocks, descriptors and mailbox.
2386 		 */
2387 		channel = ((p_hxge_dma_common_t)dma_buf_p[i])->dma_channel;
2388 		status = hxge_map_rxdma_channel(hxgep, channel,
2389 		    (p_hxge_dma_common_t *)&dma_buf_p[i],
2390 		    (p_rx_rbr_ring_t *)&rbr_rings[i],
2391 		    num_chunks[i],
2392 		    (p_hxge_dma_common_t *)&dma_rbr_cntl_p[i],
2393 		    (p_hxge_dma_common_t *)&dma_rcr_cntl_p[i],
2394 		    (p_hxge_dma_common_t *)&dma_mbox_cntl_p[i],
2395 		    (p_rx_rcr_ring_t *)&rcr_rings[i],
2396 		    (p_rx_mbox_t *)&rx_mbox_p[i]);
2397 		if (status != HXGE_OK) {
2398 			goto hxge_map_rxdma_fail1;
2399 		}
2400 		rbr_rings[i]->index = (uint16_t)i;
2401 		rcr_rings[i]->index = (uint16_t)i;
2402 		rcr_rings[i]->rdc_stats = &hxgep->statsp->rdc_stats[i];
2403 	}
2404 
2405 	rx_rbr_rings->ndmas = rx_rcr_rings->ndmas = ndmas;
2406 	rx_rbr_rings->rbr_rings = rbr_rings;
2407 	hxgep->rx_rbr_rings = rx_rbr_rings;
2408 	rx_rcr_rings->rcr_rings = rcr_rings;
2409 	hxgep->rx_rcr_rings = rx_rcr_rings;
2410 
2411 	rx_mbox_areas_p->rxmbox_areas = rx_mbox_p;
2412 	hxgep->rx_mbox_areas_p = rx_mbox_areas_p;
2413 
2414 	goto hxge_map_rxdma_exit;
2415 
2416 hxge_map_rxdma_fail1:
2417 	HXGE_ERROR_MSG((hxgep, HXGE_ERR_CTL,
2418 	    "==> hxge_map_rxdma: unmap rbr,rcr (status 0x%x channel %d i %d)",
2419 	    status, channel, i));
2420 	i--;
2421 	for (; i >= 0; i--) {
2422 		channel = ((p_hxge_dma_common_t)dma_buf_p[i])->dma_channel;
2423 		hxge_unmap_rxdma_channel(hxgep, channel,
2424 		    rbr_rings[i], rcr_rings[i], rx_mbox_p[i]);
2425 	}
2426 
2427 	KMEM_FREE(rbr_rings, sizeof (p_rx_rbr_ring_t) * ndmas);
2428 	KMEM_FREE(rx_rbr_rings, sizeof (rx_rbr_rings_t));
2429 	KMEM_FREE(rcr_rings, sizeof (p_rx_rcr_ring_t) * ndmas);
2430 	KMEM_FREE(rx_rcr_rings, sizeof (rx_rcr_rings_t));
2431 	KMEM_FREE(rx_mbox_p, sizeof (p_rx_mbox_t) * ndmas);
2432 	KMEM_FREE(rx_mbox_areas_p, sizeof (rx_mbox_areas_t));
2433 
2434 hxge_map_rxdma_exit:
2435 	HXGE_DEBUG_MSG((hxgep, MEM2_CTL,
2436 	    "<== hxge_map_rxdma: (status 0x%x channel %d)", status, channel));
2437 
2438 	return (status);
2439 }
2440 
2441 static void
2442 hxge_unmap_rxdma(p_hxge_t hxgep)
2443 {
2444 	int			i, ndmas;
2445 	uint16_t		channel;
2446 	p_rx_rbr_rings_t	rx_rbr_rings;
2447 	p_rx_rbr_ring_t		*rbr_rings;
2448 	p_rx_rcr_rings_t	rx_rcr_rings;
2449 	p_rx_rcr_ring_t		*rcr_rings;
2450 	p_rx_mbox_areas_t	rx_mbox_areas_p;
2451 	p_rx_mbox_t		*rx_mbox_p;
2452 	p_hxge_dma_pool_t	dma_buf_poolp;
2453 	p_hxge_dma_pool_t	dma_rbr_cntl_poolp;
2454 	p_hxge_dma_pool_t	dma_rcr_cntl_poolp;
2455 	p_hxge_dma_pool_t	dma_mbox_cntl_poolp;
2456 	p_hxge_dma_common_t	*dma_buf_p;
2457 
2458 	HXGE_DEBUG_MSG((hxgep, MEM2_CTL, "==> hxge_unmap_rxdma"));
2459 
2460 	dma_buf_poolp = hxgep->rx_buf_pool_p;
2461 	dma_rbr_cntl_poolp = hxgep->rx_rbr_cntl_pool_p;
2462 	dma_rcr_cntl_poolp = hxgep->rx_rcr_cntl_pool_p;
2463 	dma_mbox_cntl_poolp = hxgep->rx_mbox_cntl_pool_p;
2464 
2465 	if (!dma_buf_poolp->buf_allocated ||
2466 	    !dma_rbr_cntl_poolp->buf_allocated ||
2467 	    !dma_rcr_cntl_poolp->buf_allocated ||
2468 	    !dma_mbox_cntl_poolp->buf_allocated) {
2469 		HXGE_ERROR_MSG((hxgep, HXGE_ERR_CTL,
2470 		    "<== hxge_unmap_rxdma: NULL buf pointers"));
2471 		return;
2472 	}
2473 
2474 	rx_rbr_rings = hxgep->rx_rbr_rings;
2475 	rx_rcr_rings = hxgep->rx_rcr_rings;
2476 	if (rx_rbr_rings == NULL || rx_rcr_rings == NULL) {
2477 		HXGE_ERROR_MSG((hxgep, HXGE_ERR_CTL,
2478 		    "<== hxge_unmap_rxdma: NULL pointers"));
2479 		return;
2480 	}
2481 
2482 	ndmas = rx_rbr_rings->ndmas;
2483 	if (!ndmas) {
2484 		HXGE_ERROR_MSG((hxgep, HXGE_ERR_CTL,
2485 		    "<== hxge_unmap_rxdma: no channel"));
2486 		return;
2487 	}
2488 
2489 	HXGE_DEBUG_MSG((hxgep, MEM2_CTL,
2490 	    "==> hxge_unmap_rxdma (ndmas %d)", ndmas));
2491 
2492 	rbr_rings = rx_rbr_rings->rbr_rings;
2493 	rcr_rings = rx_rcr_rings->rcr_rings;
2494 	rx_mbox_areas_p = hxgep->rx_mbox_areas_p;
2495 	rx_mbox_p = rx_mbox_areas_p->rxmbox_areas;
2496 	dma_buf_p = dma_buf_poolp->dma_buf_pool_p;
2497 
2498 	for (i = 0; i < ndmas; i++) {
2499 		channel = ((p_hxge_dma_common_t)dma_buf_p[i])->dma_channel;
2500 		HXGE_DEBUG_MSG((hxgep, MEM2_CTL,
2501 		    "==> hxge_unmap_rxdma (ndmas %d) channel %d",
2502 		    ndmas, channel));
2503 		(void) hxge_unmap_rxdma_channel(hxgep, channel,
2504 		    (p_rx_rbr_ring_t)rbr_rings[i],
2505 		    (p_rx_rcr_ring_t)rcr_rings[i],
2506 		    (p_rx_mbox_t)rx_mbox_p[i]);
2507 	}
2508 
2509 	KMEM_FREE(rx_rbr_rings, sizeof (rx_rbr_rings_t));
2510 	KMEM_FREE(rbr_rings, sizeof (p_rx_rbr_ring_t) * ndmas);
2511 	KMEM_FREE(rx_rcr_rings, sizeof (rx_rcr_rings_t));
2512 	KMEM_FREE(rcr_rings, sizeof (p_rx_rcr_ring_t) * ndmas);
2513 	KMEM_FREE(rx_mbox_areas_p, sizeof (rx_mbox_areas_t));
2514 	KMEM_FREE(rx_mbox_p, sizeof (p_rx_mbox_t) * ndmas);
2515 
2516 	HXGE_DEBUG_MSG((hxgep, MEM2_CTL, "<== hxge_unmap_rxdma"));
2517 }
2518 
2519 hxge_status_t
2520 hxge_map_rxdma_channel(p_hxge_t hxgep, uint16_t channel,
2521     p_hxge_dma_common_t *dma_buf_p, p_rx_rbr_ring_t *rbr_p,
2522     uint32_t num_chunks, p_hxge_dma_common_t *dma_rbr_cntl_p,
2523     p_hxge_dma_common_t *dma_rcr_cntl_p, p_hxge_dma_common_t *dma_mbox_cntl_p,
2524     p_rx_rcr_ring_t *rcr_p, p_rx_mbox_t *rx_mbox_p)
2525 {
2526 	int status = HXGE_OK;
2527 
2528 	/*
2529 	 * Set up and prepare buffer blocks, descriptors and mailbox.
2530 	 */
2531 	HXGE_DEBUG_MSG((hxgep, MEM2_CTL,
2532 	    "==> hxge_map_rxdma_channel (channel %d)", channel));
2533 
2534 	/*
2535 	 * Receive buffer blocks
2536 	 */
2537 	status = hxge_map_rxdma_channel_buf_ring(hxgep, channel,
2538 	    dma_buf_p, rbr_p, num_chunks);
2539 	if (status != HXGE_OK) {
2540 		HXGE_ERROR_MSG((hxgep, HXGE_ERR_CTL,
2541 		    "==> hxge_map_rxdma_channel (channel %d): "
2542 		    "map buffer failed 0x%x", channel, status));
2543 		goto hxge_map_rxdma_channel_exit;
2544 	}
2545 
2546 	/*
2547 	 * Receive block ring, completion ring and mailbox.
2548 	 */
2549 	status = hxge_map_rxdma_channel_cfg_ring(hxgep, channel,
2550 	    dma_rbr_cntl_p, dma_rcr_cntl_p, dma_mbox_cntl_p,
2551 	    rbr_p, rcr_p, rx_mbox_p);
2552 	if (status != HXGE_OK) {
2553 		HXGE_ERROR_MSG((hxgep, HXGE_ERR_CTL,
2554 		    "==> hxge_map_rxdma_channel (channel %d): "
2555 		    "map config failed 0x%x", channel, status));
2556 		goto hxge_map_rxdma_channel_fail2;
2557 	}
2558 	goto hxge_map_rxdma_channel_exit;
2559 
2560 hxge_map_rxdma_channel_fail3:
2561 	/* Free rbr, rcr */
2562 	HXGE_ERROR_MSG((hxgep, HXGE_ERR_CTL,
2563 	    "==> hxge_map_rxdma_channel: free rbr/rcr (status 0x%x channel %d)",
2564 	    status, channel));
2565 	hxge_unmap_rxdma_channel_cfg_ring(hxgep, *rcr_p, *rx_mbox_p);
2566 
2567 hxge_map_rxdma_channel_fail2:
2568 	/* Free buffer blocks */
2569 	HXGE_ERROR_MSG((hxgep, HXGE_ERR_CTL,
2570 	    "==> hxge_map_rxdma_channel: free rx buffers"
2571 	    "(hxgep 0x%x status 0x%x channel %d)",
2572 	    hxgep, status, channel));
2573 	hxge_unmap_rxdma_channel_buf_ring(hxgep, *rbr_p);
2574 
2575 	status = HXGE_ERROR;
2576 
2577 hxge_map_rxdma_channel_exit:
2578 	HXGE_DEBUG_MSG((hxgep, MEM2_CTL,
2579 	    "<== hxge_map_rxdma_channel: (hxgep 0x%x status 0x%x channel %d)",
2580 	    hxgep, status, channel));
2581 
2582 	return (status);
2583 }
2584 
2585 /*ARGSUSED*/
2586 static void
2587 hxge_unmap_rxdma_channel(p_hxge_t hxgep, uint16_t channel,
2588     p_rx_rbr_ring_t rbr_p, p_rx_rcr_ring_t rcr_p, p_rx_mbox_t rx_mbox_p)
2589 {
2590 	HXGE_DEBUG_MSG((hxgep, MEM2_CTL,
2591 	    "==> hxge_unmap_rxdma_channel (channel %d)", channel));
2592 
2593 	/*
2594 	 * unmap receive block ring, completion ring and mailbox.
2595 	 */
2596 	(void) hxge_unmap_rxdma_channel_cfg_ring(hxgep, rcr_p, rx_mbox_p);
2597 
2598 	/* unmap buffer blocks */
2599 	(void) hxge_unmap_rxdma_channel_buf_ring(hxgep, rbr_p);
2600 
2601 	HXGE_DEBUG_MSG((hxgep, MEM2_CTL, "<== hxge_unmap_rxdma_channel"));
2602 }
2603 
2604 /*ARGSUSED*/
2605 static hxge_status_t
2606 hxge_map_rxdma_channel_cfg_ring(p_hxge_t hxgep, uint16_t dma_channel,
2607     p_hxge_dma_common_t *dma_rbr_cntl_p, p_hxge_dma_common_t *dma_rcr_cntl_p,
2608     p_hxge_dma_common_t *dma_mbox_cntl_p, p_rx_rbr_ring_t *rbr_p,
2609     p_rx_rcr_ring_t *rcr_p, p_rx_mbox_t *rx_mbox_p)
2610 {
2611 	p_rx_rbr_ring_t 	rbrp;
2612 	p_rx_rcr_ring_t 	rcrp;
2613 	p_rx_mbox_t 		mboxp;
2614 	p_hxge_dma_common_t 	cntl_dmap;
2615 	p_hxge_dma_common_t 	dmap;
2616 	p_rx_msg_t 		*rx_msg_ring;
2617 	p_rx_msg_t 		rx_msg_p;
2618 	rdc_rbr_cfg_a_t		*rcfga_p;
2619 	rdc_rbr_cfg_b_t		*rcfgb_p;
2620 	rdc_rcr_cfg_a_t		*cfga_p;
2621 	rdc_rcr_cfg_b_t		*cfgb_p;
2622 	rdc_rx_cfg1_t		*cfig1_p;
2623 	rdc_rx_cfg2_t		*cfig2_p;
2624 	rdc_rbr_kick_t		*kick_p;
2625 	uint32_t		dmaaddrp;
2626 	uint32_t		*rbr_vaddrp;
2627 	uint32_t		bkaddr;
2628 	hxge_status_t		status = HXGE_OK;
2629 	int			i;
2630 	uint32_t 		hxge_port_rcr_size;
2631 
2632 	HXGE_DEBUG_MSG((hxgep, MEM2_CTL,
2633 	    "==> hxge_map_rxdma_channel_cfg_ring"));
2634 
2635 	cntl_dmap = *dma_rbr_cntl_p;
2636 
2637 	/*
2638 	 * Map in the receive block ring
2639 	 */
2640 	rbrp = *rbr_p;
2641 	dmap = (p_hxge_dma_common_t)&rbrp->rbr_desc;
2642 	hxge_setup_dma_common(dmap, cntl_dmap, rbrp->rbb_max, 4);
2643 
2644 	/*
2645 	 * Zero out buffer block ring descriptors.
2646 	 */
2647 	bzero((caddr_t)dmap->kaddrp, dmap->alength);
2648 
2649 	rcfga_p = &(rbrp->rbr_cfga);
2650 	rcfgb_p = &(rbrp->rbr_cfgb);
2651 	kick_p = &(rbrp->rbr_kick);
2652 	rcfga_p->value = 0;
2653 	rcfgb_p->value = 0;
2654 	kick_p->value = 0;
2655 	rbrp->rbr_addr = dmap->dma_cookie.dmac_laddress;
2656 	rcfga_p->value = (rbrp->rbr_addr &
2657 	    (RBR_CFIG_A_STDADDR_MASK | RBR_CFIG_A_STDADDR_BASE_MASK));
2658 	rcfga_p->value |= ((uint64_t)rbrp->rbb_max << RBR_CFIG_A_LEN_SHIFT);
2659 
2660 	/* XXXX: how to choose packet buffer sizes */
2661 	rcfgb_p->bits.bufsz0 = rbrp->pkt_buf_size0;
2662 	rcfgb_p->bits.vld0 = 1;
2663 	rcfgb_p->bits.bufsz1 = rbrp->pkt_buf_size1;
2664 	rcfgb_p->bits.vld1 = 1;
2665 	rcfgb_p->bits.bufsz2 = rbrp->pkt_buf_size2;
2666 	rcfgb_p->bits.vld2 = 1;
2667 	rcfgb_p->bits.bksize = hxgep->rx_bksize_code;
2668 
2669 	/*
2670 	 * For each buffer block, enter receive block address to the ring.
2671 	 */
2672 	rbr_vaddrp = (uint32_t *)dmap->kaddrp;
2673 	rbrp->rbr_desc_vp = (uint32_t *)dmap->kaddrp;
2674 	HXGE_DEBUG_MSG((hxgep, MEM2_CTL,
2675 	    "==> hxge_map_rxdma_channel_cfg_ring: channel %d "
2676 	    "rbr_vaddrp $%p", dma_channel, rbr_vaddrp));
2677 
2678 	rx_msg_ring = rbrp->rx_msg_ring;
2679 	for (i = 0; i < rbrp->tnblocks; i++) {
2680 		rx_msg_p = rx_msg_ring[i];
2681 		rx_msg_p->hxgep = hxgep;
2682 		rx_msg_p->rx_rbr_p = rbrp;
2683 		bkaddr = (uint32_t)
2684 		    ((rx_msg_p->buf_dma.dma_cookie.dmac_laddress >>
2685 		    RBR_BKADDR_SHIFT));
2686 		rx_msg_p->free = B_FALSE;
2687 		rx_msg_p->max_usage_cnt = 0xbaddcafe;
2688 
2689 		*rbr_vaddrp++ = bkaddr;
2690 	}
2691 
2692 	kick_p->bits.bkadd = rbrp->rbb_max;
2693 	rbrp->rbr_wr_index = (rbrp->rbb_max - 1);
2694 
2695 	rbrp->rbr_rd_index = 0;
2696 
2697 	rbrp->rbr_consumed = 0;
2698 	rbrp->rbr_used = 0;
2699 	rbrp->rbr_use_bcopy = B_TRUE;
2700 	rbrp->rbr_bufsize_type = RCR_PKTBUFSZ_0;
2701 
2702 	/*
2703 	 * Do bcopy on packets greater than bcopy size once the lo threshold is
2704 	 * reached. This lo threshold should be less than the hi threshold.
2705 	 *
2706 	 * Do bcopy on every packet once the hi threshold is reached.
2707 	 */
2708 	if (hxge_rx_threshold_lo >= hxge_rx_threshold_hi) {
2709 		/* default it to use hi */
2710 		hxge_rx_threshold_lo = hxge_rx_threshold_hi;
2711 	}
2712 	if (hxge_rx_buf_size_type > HXGE_RBR_TYPE2) {
2713 		hxge_rx_buf_size_type = HXGE_RBR_TYPE2;
2714 	}
2715 	rbrp->rbr_bufsize_type = hxge_rx_buf_size_type;
2716 
2717 	switch (hxge_rx_threshold_hi) {
2718 	default:
2719 	case HXGE_RX_COPY_NONE:
2720 		/* Do not do bcopy at all */
2721 		rbrp->rbr_use_bcopy = B_FALSE;
2722 		rbrp->rbr_threshold_hi = rbrp->rbb_max;
2723 		break;
2724 
2725 	case HXGE_RX_COPY_1:
2726 	case HXGE_RX_COPY_2:
2727 	case HXGE_RX_COPY_3:
2728 	case HXGE_RX_COPY_4:
2729 	case HXGE_RX_COPY_5:
2730 	case HXGE_RX_COPY_6:
2731 	case HXGE_RX_COPY_7:
2732 		rbrp->rbr_threshold_hi =
2733 		    rbrp->rbb_max * (hxge_rx_threshold_hi) /
2734 		    HXGE_RX_BCOPY_SCALE;
2735 		break;
2736 
2737 	case HXGE_RX_COPY_ALL:
2738 		rbrp->rbr_threshold_hi = 0;
2739 		break;
2740 	}
2741 
2742 	switch (hxge_rx_threshold_lo) {
2743 	default:
2744 	case HXGE_RX_COPY_NONE:
2745 		/* Do not do bcopy at all */
2746 		if (rbrp->rbr_use_bcopy) {
2747 			rbrp->rbr_use_bcopy = B_FALSE;
2748 		}
2749 		rbrp->rbr_threshold_lo = rbrp->rbb_max;
2750 		break;
2751 
2752 	case HXGE_RX_COPY_1:
2753 	case HXGE_RX_COPY_2:
2754 	case HXGE_RX_COPY_3:
2755 	case HXGE_RX_COPY_4:
2756 	case HXGE_RX_COPY_5:
2757 	case HXGE_RX_COPY_6:
2758 	case HXGE_RX_COPY_7:
2759 		rbrp->rbr_threshold_lo =
2760 		    rbrp->rbb_max * (hxge_rx_threshold_lo) /
2761 		    HXGE_RX_BCOPY_SCALE;
2762 		break;
2763 
2764 	case HXGE_RX_COPY_ALL:
2765 		rbrp->rbr_threshold_lo = 0;
2766 		break;
2767 	}
2768 
2769 	HXGE_DEBUG_MSG((hxgep, RX_CTL,
2770 	    "hxge_map_rxdma_channel_cfg_ring: channel %d rbb_max %d "
2771 	    "rbrp->rbr_bufsize_type %d rbb_threshold_hi %d "
2772 	    "rbb_threshold_lo %d",
2773 	    dma_channel, rbrp->rbb_max, rbrp->rbr_bufsize_type,
2774 	    rbrp->rbr_threshold_hi, rbrp->rbr_threshold_lo));
2775 
2776 	/* Map in the receive completion ring */
2777 	rcrp = (p_rx_rcr_ring_t)KMEM_ZALLOC(sizeof (rx_rcr_ring_t), KM_SLEEP);
2778 	MUTEX_INIT(&rcrp->lock, NULL, MUTEX_DRIVER,
2779 	    (void *) hxgep->interrupt_cookie);
2780 	rcrp->rdc = dma_channel;
2781 	rcrp->hxgep = hxgep;
2782 
2783 	hxge_port_rcr_size = hxgep->hxge_port_rcr_size;
2784 	rcrp->comp_size = hxge_port_rcr_size;
2785 	rcrp->comp_wrap_mask = hxge_port_rcr_size - 1;
2786 
2787 	cntl_dmap = *dma_rcr_cntl_p;
2788 
2789 	dmap = (p_hxge_dma_common_t)&rcrp->rcr_desc;
2790 	hxge_setup_dma_common(dmap, cntl_dmap, rcrp->comp_size,
2791 	    sizeof (rcr_entry_t));
2792 	rcrp->comp_rd_index = 0;
2793 	rcrp->comp_wt_index = 0;
2794 	rcrp->rcr_desc_rd_head_p = rcrp->rcr_desc_first_p =
2795 	    (p_rcr_entry_t)DMA_COMMON_VPTR(rcrp->rcr_desc);
2796 #if defined(__i386)
2797 	rcrp->rcr_desc_rd_head_pp = rcrp->rcr_desc_first_pp =
2798 	    (p_rcr_entry_t)(uint32_t)DMA_COMMON_IOADDR(rcrp->rcr_desc);
2799 #else
2800 	rcrp->rcr_desc_rd_head_pp = rcrp->rcr_desc_first_pp =
2801 	    (p_rcr_entry_t)DMA_COMMON_IOADDR(rcrp->rcr_desc);
2802 #endif
2803 	rcrp->rcr_desc_last_p = rcrp->rcr_desc_rd_head_p +
2804 	    (hxge_port_rcr_size - 1);
2805 	rcrp->rcr_desc_last_pp = rcrp->rcr_desc_rd_head_pp +
2806 	    (hxge_port_rcr_size - 1);
2807 
2808 	rcrp->rcr_tail_begin = DMA_COMMON_IOADDR(rcrp->rcr_desc);
2809 	rcrp->rcr_tail_begin = (rcrp->rcr_tail_begin & 0x7ffffULL) >> 3;
2810 
2811 	HXGE_DEBUG_MSG((hxgep, MEM2_CTL,
2812 	    "==> hxge_map_rxdma_channel_cfg_ring: channel %d "
2813 	    "rbr_vaddrp $%p rcr_desc_rd_head_p $%p "
2814 	    "rcr_desc_rd_head_pp $%p rcr_desc_rd_last_p $%p "
2815 	    "rcr_desc_rd_last_pp $%p ",
2816 	    dma_channel, rbr_vaddrp, rcrp->rcr_desc_rd_head_p,
2817 	    rcrp->rcr_desc_rd_head_pp, rcrp->rcr_desc_last_p,
2818 	    rcrp->rcr_desc_last_pp));
2819 
2820 	/*
2821 	 * Zero out buffer block ring descriptors.
2822 	 */
2823 	bzero((caddr_t)dmap->kaddrp, dmap->alength);
2824 	rcrp->intr_timeout = hxgep->intr_timeout;
2825 	rcrp->intr_threshold = hxgep->intr_threshold;
2826 	rcrp->full_hdr_flag = B_FALSE;
2827 	rcrp->sw_priv_hdr_len = 0;
2828 
2829 	cfga_p = &(rcrp->rcr_cfga);
2830 	cfgb_p = &(rcrp->rcr_cfgb);
2831 	cfga_p->value = 0;
2832 	cfgb_p->value = 0;
2833 	rcrp->rcr_addr = dmap->dma_cookie.dmac_laddress;
2834 
2835 	cfga_p->value = (rcrp->rcr_addr &
2836 	    (RCRCFIG_A_STADDR_MASK | RCRCFIG_A_STADDR_BASE_MASK));
2837 
2838 	cfga_p->value |= ((uint64_t)rcrp->comp_size << RCRCFIG_A_LEN_SHIF);
2839 
2840 	/*
2841 	 * Timeout should be set based on the system clock divider. The
2842 	 * following timeout value of 1 assumes that the granularity (1000) is
2843 	 * 3 microseconds running at 300MHz.
2844 	 */
2845 	cfgb_p->bits.pthres = rcrp->intr_threshold;
2846 	cfgb_p->bits.timeout = rcrp->intr_timeout;
2847 	cfgb_p->bits.entout = 1;
2848 
2849 	/* Map in the mailbox */
2850 	cntl_dmap = *dma_mbox_cntl_p;
2851 	mboxp = (p_rx_mbox_t)KMEM_ZALLOC(sizeof (rx_mbox_t), KM_SLEEP);
2852 	dmap = (p_hxge_dma_common_t)&mboxp->rx_mbox;
2853 	hxge_setup_dma_common(dmap, cntl_dmap, 1, sizeof (rxdma_mailbox_t));
2854 	cfig1_p = (rdc_rx_cfg1_t *)&mboxp->rx_cfg1;
2855 	cfig2_p = (rdc_rx_cfg2_t *)&mboxp->rx_cfg2;
2856 	cfig1_p->value = cfig2_p->value = 0;
2857 
2858 	mboxp->mbox_addr = dmap->dma_cookie.dmac_laddress;
2859 	HXGE_DEBUG_MSG((hxgep, MEM2_CTL,
2860 	    "==> hxge_map_rxdma_channel_cfg_ring: "
2861 	    "channel %d cfg1 0x%016llx cfig2 0x%016llx cookie 0x%016llx",
2862 	    dma_channel, cfig1_p->value, cfig2_p->value,
2863 	    mboxp->mbox_addr));
2864 
2865 	dmaaddrp = (uint32_t)((dmap->dma_cookie.dmac_laddress >> 32) & 0xfff);
2866 	cfig1_p->bits.mbaddr_h = dmaaddrp;
2867 
2868 	dmaaddrp = (uint32_t)(dmap->dma_cookie.dmac_laddress & 0xffffffff);
2869 	dmaaddrp = (uint32_t)(dmap->dma_cookie.dmac_laddress &
2870 	    RXDMA_CFIG2_MBADDR_L_MASK);
2871 
2872 	cfig2_p->bits.mbaddr_l = (dmaaddrp >> RXDMA_CFIG2_MBADDR_L_SHIFT);
2873 
2874 	HXGE_DEBUG_MSG((hxgep, MEM2_CTL,
2875 	    "==> hxge_map_rxdma_channel_cfg_ring: channel %d damaddrp $%p "
2876 	    "cfg1 0x%016llx cfig2 0x%016llx",
2877 	    dma_channel, dmaaddrp, cfig1_p->value, cfig2_p->value));
2878 
2879 	cfig2_p->bits.full_hdr = rcrp->full_hdr_flag;
2880 	cfig2_p->bits.offset = rcrp->sw_priv_hdr_len;
2881 
2882 	rbrp->rx_rcr_p = rcrp;
2883 	rcrp->rx_rbr_p = rbrp;
2884 	*rcr_p = rcrp;
2885 	*rx_mbox_p = mboxp;
2886 
2887 	HXGE_DEBUG_MSG((hxgep, MEM2_CTL,
2888 	    "<== hxge_map_rxdma_channel_cfg_ring status 0x%08x", status));
2889 	return (status);
2890 }
2891 
2892 /*ARGSUSED*/
2893 static void
2894 hxge_unmap_rxdma_channel_cfg_ring(p_hxge_t hxgep,
2895     p_rx_rcr_ring_t rcr_p, p_rx_mbox_t rx_mbox_p)
2896 {
2897 	HXGE_DEBUG_MSG((hxgep, MEM2_CTL,
2898 	    "==> hxge_unmap_rxdma_channel_cfg_ring: channel %d", rcr_p->rdc));
2899 
2900 	MUTEX_DESTROY(&rcr_p->lock);
2901 	KMEM_FREE(rcr_p, sizeof (rx_rcr_ring_t));
2902 	KMEM_FREE(rx_mbox_p, sizeof (rx_mbox_t));
2903 
2904 	HXGE_DEBUG_MSG((hxgep, MEM2_CTL,
2905 	    "<== hxge_unmap_rxdma_channel_cfg_ring"));
2906 }
2907 
2908 static hxge_status_t
2909 hxge_map_rxdma_channel_buf_ring(p_hxge_t hxgep, uint16_t channel,
2910     p_hxge_dma_common_t *dma_buf_p,
2911     p_rx_rbr_ring_t *rbr_p, uint32_t num_chunks)
2912 {
2913 	p_rx_rbr_ring_t		rbrp;
2914 	p_hxge_dma_common_t	dma_bufp, tmp_bufp;
2915 	p_rx_msg_t		*rx_msg_ring;
2916 	p_rx_msg_t		rx_msg_p;
2917 	p_mblk_t		mblk_p;
2918 
2919 	rxring_info_t *ring_info;
2920 	hxge_status_t status = HXGE_OK;
2921 	int i, j, index;
2922 	uint32_t size, bsize, nblocks, nmsgs;
2923 
2924 	HXGE_DEBUG_MSG((hxgep, MEM2_CTL,
2925 	    "==> hxge_map_rxdma_channel_buf_ring: channel %d", channel));
2926 
2927 	dma_bufp = tmp_bufp = *dma_buf_p;
2928 	HXGE_DEBUG_MSG((hxgep, MEM2_CTL,
2929 	    " hxge_map_rxdma_channel_buf_ring: channel %d to map %d "
2930 	    "chunks bufp 0x%016llx", channel, num_chunks, dma_bufp));
2931 
2932 	nmsgs = 0;
2933 	for (i = 0; i < num_chunks; i++, tmp_bufp++) {
2934 		HXGE_DEBUG_MSG((hxgep, MEM2_CTL,
2935 		    "==> hxge_map_rxdma_channel_buf_ring: channel %d "
2936 		    "bufp 0x%016llx nblocks %d nmsgs %d",
2937 		    channel, tmp_bufp, tmp_bufp->nblocks, nmsgs));
2938 		nmsgs += tmp_bufp->nblocks;
2939 	}
2940 	if (!nmsgs) {
2941 		HXGE_ERROR_MSG((hxgep, HXGE_ERR_CTL,
2942 		    "<== hxge_map_rxdma_channel_buf_ring: channel %d "
2943 		    "no msg blocks", channel));
2944 		status = HXGE_ERROR;
2945 		goto hxge_map_rxdma_channel_buf_ring_exit;
2946 	}
2947 	rbrp = (p_rx_rbr_ring_t)KMEM_ZALLOC(sizeof (rx_rbr_ring_t), KM_SLEEP);
2948 
2949 	size = nmsgs * sizeof (p_rx_msg_t);
2950 	rx_msg_ring = KMEM_ZALLOC(size, KM_SLEEP);
2951 	ring_info = (rxring_info_t *)KMEM_ZALLOC(sizeof (rxring_info_t),
2952 	    KM_SLEEP);
2953 
2954 	MUTEX_INIT(&rbrp->lock, NULL, MUTEX_DRIVER,
2955 	    (void *) hxgep->interrupt_cookie);
2956 	MUTEX_INIT(&rbrp->post_lock, NULL, MUTEX_DRIVER,
2957 	    (void *) hxgep->interrupt_cookie);
2958 
2959 	rbrp->rdc = channel;
2960 	rbrp->num_blocks = num_chunks;
2961 	rbrp->tnblocks = nmsgs;
2962 	rbrp->rbb_max = nmsgs;
2963 	rbrp->rbr_max_size = nmsgs;
2964 	rbrp->rbr_wrap_mask = (rbrp->rbb_max - 1);
2965 
2966 	/*
2967 	 * Buffer sizes suggested by NIU architect. 256, 512 and 2K.
2968 	 */
2969 
2970 	switch (hxgep->rx_bksize_code) {
2971 	case RBR_BKSIZE_4K:
2972 		rbrp->pkt_buf_size0 = RBR_BUFSZ0_256B;
2973 		rbrp->pkt_buf_size0_bytes = RBR_BUFSZ0_256_BYTES;
2974 		rbrp->hpi_pkt_buf_size0 = SIZE_256B;
2975 		break;
2976 	case RBR_BKSIZE_8K:
2977 		/* Use 512 to avoid possible rcr_full condition */
2978 		rbrp->pkt_buf_size0 = RBR_BUFSZ0_512B;
2979 		rbrp->pkt_buf_size0_bytes = RBR_BUFSZ0_512_BYTES;
2980 		rbrp->hpi_pkt_buf_size0 = SIZE_512B;
2981 		break;
2982 	}
2983 
2984 	rbrp->pkt_buf_size1 = RBR_BUFSZ1_1K;
2985 	rbrp->pkt_buf_size1_bytes = RBR_BUFSZ1_1K_BYTES;
2986 	rbrp->hpi_pkt_buf_size1 = SIZE_1KB;
2987 
2988 	rbrp->block_size = hxgep->rx_default_block_size;
2989 
2990 	if (!hxgep->param_arr[param_accept_jumbo].value) {
2991 		rbrp->pkt_buf_size2 = RBR_BUFSZ2_2K;
2992 		rbrp->pkt_buf_size2_bytes = RBR_BUFSZ2_2K_BYTES;
2993 		rbrp->hpi_pkt_buf_size2 = SIZE_2KB;
2994 	} else {
2995 		rbrp->hpi_pkt_buf_size2 = SIZE_4KB;
2996 		rbrp->pkt_buf_size2 = RBR_BUFSZ2_4K;
2997 		rbrp->pkt_buf_size2_bytes = RBR_BUFSZ2_4K_BYTES;
2998 	}
2999 
3000 	HXGE_DEBUG_MSG((hxgep, MEM2_CTL,
3001 	    "==> hxge_map_rxdma_channel_buf_ring: channel %d "
3002 	    "actual rbr max %d rbb_max %d nmsgs %d "
3003 	    "rbrp->block_size %d default_block_size %d "
3004 	    "(config hxge_rbr_size %d hxge_rbr_spare_size %d)",
3005 	    channel, rbrp->rbr_max_size, rbrp->rbb_max, nmsgs,
3006 	    rbrp->block_size, hxgep->rx_default_block_size,
3007 	    hxge_rbr_size, hxge_rbr_spare_size));
3008 
3009 	/*
3010 	 * Map in buffers from the buffer pool.
3011 	 * Note that num_blocks is the num_chunks. For Sparc, there is likely
3012 	 * only one chunk. For x86, there will be many chunks.
3013 	 * Loop over chunks.
3014 	 */
3015 	index = 0;
3016 	for (i = 0; i < rbrp->num_blocks; i++, dma_bufp++) {
3017 		bsize = dma_bufp->block_size;
3018 		nblocks = dma_bufp->nblocks;
3019 #if defined(__i386)
3020 		ring_info->buffer[i].dvma_addr = (uint32_t)dma_bufp->ioaddr_pp;
3021 #else
3022 		ring_info->buffer[i].dvma_addr = (uint64_t)dma_bufp->ioaddr_pp;
3023 #endif
3024 		ring_info->buffer[i].buf_index = i;
3025 		ring_info->buffer[i].buf_size = dma_bufp->alength;
3026 		ring_info->buffer[i].start_index = index;
3027 #if defined(__i386)
3028 		ring_info->buffer[i].kaddr = (uint32_t)dma_bufp->kaddrp;
3029 #else
3030 		ring_info->buffer[i].kaddr = (uint64_t)dma_bufp->kaddrp;
3031 #endif
3032 
3033 		HXGE_DEBUG_MSG((hxgep, MEM2_CTL,
3034 		    " hxge_map_rxdma_channel_buf_ring: map channel %d "
3035 		    "chunk %d nblocks %d chunk_size %x block_size 0x%x "
3036 		    "dma_bufp $%p dvma_addr $%p", channel, i,
3037 		    dma_bufp->nblocks,
3038 		    ring_info->buffer[i].buf_size, bsize, dma_bufp,
3039 		    ring_info->buffer[i].dvma_addr));
3040 
3041 		/* loop over blocks within a chunk */
3042 		for (j = 0; j < nblocks; j++) {
3043 			if ((rx_msg_p = hxge_allocb(bsize, BPRI_LO,
3044 			    dma_bufp)) == NULL) {
3045 				HXGE_ERROR_MSG((hxgep, HXGE_ERR_CTL,
3046 				    "allocb failed (index %d i %d j %d)",
3047 				    index, i, j));
3048 				goto hxge_map_rxdma_channel_buf_ring_fail1;
3049 			}
3050 			rx_msg_ring[index] = rx_msg_p;
3051 			rx_msg_p->block_index = index;
3052 			rx_msg_p->shifted_addr = (uint32_t)
3053 			    ((rx_msg_p->buf_dma.dma_cookie.dmac_laddress >>
3054 			    RBR_BKADDR_SHIFT));
3055 			/*
3056 			 * Too much output
3057 			 * HXGE_DEBUG_MSG((hxgep, MEM2_CTL,
3058 			 *	"index %d j %d rx_msg_p $%p mblk %p",
3059 			 *	index, j, rx_msg_p, rx_msg_p->rx_mblk_p));
3060 			 */
3061 			mblk_p = rx_msg_p->rx_mblk_p;
3062 			mblk_p->b_wptr = mblk_p->b_rptr + bsize;
3063 
3064 			rbrp->rbr_ref_cnt++;
3065 			index++;
3066 			rx_msg_p->buf_dma.dma_channel = channel;
3067 		}
3068 	}
3069 	if (i < rbrp->num_blocks) {
3070 		goto hxge_map_rxdma_channel_buf_ring_fail1;
3071 	}
3072 	HXGE_DEBUG_MSG((hxgep, MEM2_CTL,
3073 	    "hxge_map_rxdma_channel_buf_ring: done buf init "
3074 	    "channel %d msg block entries %d", channel, index));
3075 	ring_info->block_size_mask = bsize - 1;
3076 	rbrp->rx_msg_ring = rx_msg_ring;
3077 	rbrp->dma_bufp = dma_buf_p;
3078 	rbrp->ring_info = ring_info;
3079 
3080 	status = hxge_rxbuf_index_info_init(hxgep, rbrp);
3081 	HXGE_DEBUG_MSG((hxgep, MEM2_CTL, " hxge_map_rxdma_channel_buf_ring: "
3082 	    "channel %d done buf info init", channel));
3083 
3084 	/*
3085 	 * Finally, permit hxge_freeb() to call hxge_post_page().
3086 	 */
3087 	rbrp->rbr_state = RBR_POSTING;
3088 
3089 	*rbr_p = rbrp;
3090 
3091 	goto hxge_map_rxdma_channel_buf_ring_exit;
3092 
3093 hxge_map_rxdma_channel_buf_ring_fail1:
3094 	HXGE_DEBUG_MSG((hxgep, MEM2_CTL,
3095 	    " hxge_map_rxdma_channel_buf_ring: failed channel (0x%x)",
3096 	    channel, status));
3097 
3098 	index--;
3099 	for (; index >= 0; index--) {
3100 		rx_msg_p = rx_msg_ring[index];
3101 		if (rx_msg_p != NULL) {
3102 			freeb(rx_msg_p->rx_mblk_p);
3103 			rx_msg_ring[index] = NULL;
3104 		}
3105 	}
3106 
3107 hxge_map_rxdma_channel_buf_ring_fail:
3108 	MUTEX_DESTROY(&rbrp->post_lock);
3109 	MUTEX_DESTROY(&rbrp->lock);
3110 	KMEM_FREE(ring_info, sizeof (rxring_info_t));
3111 	KMEM_FREE(rx_msg_ring, size);
3112 	KMEM_FREE(rbrp, sizeof (rx_rbr_ring_t));
3113 
3114 	status = HXGE_ERROR;
3115 
3116 hxge_map_rxdma_channel_buf_ring_exit:
3117 	HXGE_DEBUG_MSG((hxgep, MEM2_CTL,
3118 	    "<== hxge_map_rxdma_channel_buf_ring status 0x%08x", status));
3119 
3120 	return (status);
3121 }
3122 
3123 /*ARGSUSED*/
3124 static void
3125 hxge_unmap_rxdma_channel_buf_ring(p_hxge_t hxgep,
3126     p_rx_rbr_ring_t rbr_p)
3127 {
3128 	p_rx_msg_t	*rx_msg_ring;
3129 	p_rx_msg_t	rx_msg_p;
3130 	rxring_info_t	*ring_info;
3131 	int		i;
3132 	uint32_t	size;
3133 
3134 	HXGE_DEBUG_MSG((hxgep, MEM2_CTL,
3135 	    "==> hxge_unmap_rxdma_channel_buf_ring"));
3136 	if (rbr_p == NULL) {
3137 		HXGE_DEBUG_MSG((hxgep, RX_CTL,
3138 		    "<== hxge_unmap_rxdma_channel_buf_ring: NULL rbrp"));
3139 		return;
3140 	}
3141 	HXGE_DEBUG_MSG((hxgep, MEM2_CTL,
3142 	    "==> hxge_unmap_rxdma_channel_buf_ring: channel %d", rbr_p->rdc));
3143 
3144 	rx_msg_ring = rbr_p->rx_msg_ring;
3145 	ring_info = rbr_p->ring_info;
3146 
3147 	if (rx_msg_ring == NULL || ring_info == NULL) {
3148 		HXGE_DEBUG_MSG((hxgep, MEM2_CTL,
3149 		    "<== hxge_unmap_rxdma_channel_buf_ring: "
3150 		    "rx_msg_ring $%p ring_info $%p", rx_msg_p, ring_info));
3151 		return;
3152 	}
3153 
3154 	size = rbr_p->tnblocks * sizeof (p_rx_msg_t);
3155 	HXGE_DEBUG_MSG((hxgep, MEM2_CTL,
3156 	    " hxge_unmap_rxdma_channel_buf_ring: channel %d chunks %d "
3157 	    "tnblocks %d (max %d) size ptrs %d ", rbr_p->rdc, rbr_p->num_blocks,
3158 	    rbr_p->tnblocks, rbr_p->rbr_max_size, size));
3159 
3160 	for (i = 0; i < rbr_p->tnblocks; i++) {
3161 		rx_msg_p = rx_msg_ring[i];
3162 		HXGE_DEBUG_MSG((hxgep, MEM2_CTL,
3163 		    " hxge_unmap_rxdma_channel_buf_ring: "
3164 		    "rx_msg_p $%p", rx_msg_p));
3165 		if (rx_msg_p != NULL) {
3166 			freeb(rx_msg_p->rx_mblk_p);
3167 			rx_msg_ring[i] = NULL;
3168 		}
3169 	}
3170 
3171 	/*
3172 	 * We no longer may use the mutex <post_lock>. By setting
3173 	 * <rbr_state> to anything but POSTING, we prevent
3174 	 * hxge_post_page() from accessing a dead mutex.
3175 	 */
3176 	rbr_p->rbr_state = RBR_UNMAPPING;
3177 	MUTEX_DESTROY(&rbr_p->post_lock);
3178 
3179 	MUTEX_DESTROY(&rbr_p->lock);
3180 	KMEM_FREE(ring_info, sizeof (rxring_info_t));
3181 	KMEM_FREE(rx_msg_ring, size);
3182 
3183 	if (rbr_p->rbr_ref_cnt == 0) {
3184 		/* This is the normal state of affairs. */
3185 		KMEM_FREE(rbr_p, sizeof (*rbr_p));
3186 	} else {
3187 		/*
3188 		 * Some of our buffers are still being used.
3189 		 * Therefore, tell hxge_freeb() this ring is
3190 		 * unmapped, so it may free <rbr_p> for us.
3191 		 */
3192 		rbr_p->rbr_state = RBR_UNMAPPED;
3193 		HXGE_DEBUG_MSG((hxgep, MEM2_CTL,
3194 		    "unmap_rxdma_buf_ring: %d %s outstanding.",
3195 		    rbr_p->rbr_ref_cnt,
3196 		    rbr_p->rbr_ref_cnt == 1 ? "msg" : "msgs"));
3197 	}
3198 
3199 	HXGE_DEBUG_MSG((hxgep, MEM2_CTL,
3200 	    "<== hxge_unmap_rxdma_channel_buf_ring"));
3201 }
3202 
3203 static hxge_status_t
3204 hxge_rxdma_hw_start_common(p_hxge_t hxgep)
3205 {
3206 	hxge_status_t status = HXGE_OK;
3207 
3208 	HXGE_DEBUG_MSG((hxgep, MEM2_CTL, "==> hxge_rxdma_hw_start_common"));
3209 
3210 	/*
3211 	 * Load the sharable parameters by writing to the function zero control
3212 	 * registers. These FZC registers should be initialized only once for
3213 	 * the entire chip.
3214 	 */
3215 	(void) hxge_init_fzc_rx_common(hxgep);
3216 
3217 	HXGE_DEBUG_MSG((hxgep, MEM2_CTL, "==> hxge_rxdma_hw_start_common"));
3218 
3219 	return (status);
3220 }
3221 
3222 static hxge_status_t
3223 hxge_rxdma_hw_start(p_hxge_t hxgep)
3224 {
3225 	int			i, ndmas;
3226 	uint16_t		channel;
3227 	p_rx_rbr_rings_t	rx_rbr_rings;
3228 	p_rx_rbr_ring_t		*rbr_rings;
3229 	p_rx_rcr_rings_t	rx_rcr_rings;
3230 	p_rx_rcr_ring_t		*rcr_rings;
3231 	p_rx_mbox_areas_t	rx_mbox_areas_p;
3232 	p_rx_mbox_t		*rx_mbox_p;
3233 	hxge_status_t		status = HXGE_OK;
3234 
3235 	HXGE_DEBUG_MSG((hxgep, MEM2_CTL, "==> hxge_rxdma_hw_start"));
3236 
3237 	rx_rbr_rings = hxgep->rx_rbr_rings;
3238 	rx_rcr_rings = hxgep->rx_rcr_rings;
3239 	if (rx_rbr_rings == NULL || rx_rcr_rings == NULL) {
3240 		HXGE_DEBUG_MSG((hxgep, RX_CTL,
3241 		    "<== hxge_rxdma_hw_start: NULL ring pointers"));
3242 		return (HXGE_ERROR);
3243 	}
3244 
3245 	ndmas = rx_rbr_rings->ndmas;
3246 	if (ndmas == 0) {
3247 		HXGE_DEBUG_MSG((hxgep, RX_CTL,
3248 		    "<== hxge_rxdma_hw_start: no dma channel allocated"));
3249 		return (HXGE_ERROR);
3250 	}
3251 	HXGE_DEBUG_MSG((hxgep, MEM2_CTL,
3252 	    "==> hxge_rxdma_hw_start (ndmas %d)", ndmas));
3253 
3254 	/*
3255 	 * Scrub the RDC Rx DMA Prefetch Buffer Command.
3256 	 */
3257 	for (i = 0; i < 128; i++) {
3258 		HXGE_REG_WR64(hxgep->hpi_handle, RDC_PREF_CMD, i);
3259 	}
3260 
3261 	/*
3262 	 * Scrub Rx DMA Shadow Tail Command.
3263 	 */
3264 	for (i = 0; i < 64; i++) {
3265 		HXGE_REG_WR64(hxgep->hpi_handle, RDC_SHADOW_CMD, i);
3266 	}
3267 
3268 	/*
3269 	 * Scrub Rx DMA Control Fifo Command.
3270 	 */
3271 	for (i = 0; i < 512; i++) {
3272 		HXGE_REG_WR64(hxgep->hpi_handle, RDC_CTRL_FIFO_CMD, i);
3273 	}
3274 
3275 	/*
3276 	 * Scrub Rx DMA Data Fifo Command.
3277 	 */
3278 	for (i = 0; i < 1536; i++) {
3279 		HXGE_REG_WR64(hxgep->hpi_handle, RDC_DATA_FIFO_CMD, i);
3280 	}
3281 
3282 	/*
3283 	 * Reset the FIFO Error Stat.
3284 	 */
3285 	HXGE_REG_WR64(hxgep->hpi_handle, RDC_FIFO_ERR_STAT, 0xFF);
3286 
3287 	/* Set the error mask to receive interrupts */
3288 	HXGE_REG_WR64(hxgep->hpi_handle, RDC_FIFO_ERR_INT_MASK, 0x0);
3289 
3290 	rbr_rings = rx_rbr_rings->rbr_rings;
3291 	rcr_rings = rx_rcr_rings->rcr_rings;
3292 	rx_mbox_areas_p = hxgep->rx_mbox_areas_p;
3293 	if (rx_mbox_areas_p) {
3294 		rx_mbox_p = rx_mbox_areas_p->rxmbox_areas;
3295 	}
3296 
3297 	for (i = 0; i < ndmas; i++) {
3298 		channel = rbr_rings[i]->rdc;
3299 		HXGE_DEBUG_MSG((hxgep, MEM2_CTL,
3300 		    "==> hxge_rxdma_hw_start (ndmas %d) channel %d",
3301 		    ndmas, channel));
3302 		status = hxge_rxdma_start_channel(hxgep, channel,
3303 		    (p_rx_rbr_ring_t)rbr_rings[i],
3304 		    (p_rx_rcr_ring_t)rcr_rings[i],
3305 		    (p_rx_mbox_t)rx_mbox_p[i], rbr_rings[i]->rbb_max);
3306 		if (status != HXGE_OK) {
3307 			goto hxge_rxdma_hw_start_fail1;
3308 		}
3309 	}
3310 
3311 	HXGE_DEBUG_MSG((hxgep, MEM2_CTL, "==> hxge_rxdma_hw_start: "
3312 	    "rx_rbr_rings 0x%016llx rings 0x%016llx",
3313 	    rx_rbr_rings, rx_rcr_rings));
3314 	goto hxge_rxdma_hw_start_exit;
3315 
3316 hxge_rxdma_hw_start_fail1:
3317 	HXGE_ERROR_MSG((hxgep, HXGE_ERR_CTL,
3318 	    "==> hxge_rxdma_hw_start: disable "
3319 	    "(status 0x%x channel %d i %d)", status, channel, i));
3320 	for (; i >= 0; i--) {
3321 		channel = rbr_rings[i]->rdc;
3322 		(void) hxge_rxdma_stop_channel(hxgep, channel);
3323 	}
3324 
3325 hxge_rxdma_hw_start_exit:
3326 	HXGE_DEBUG_MSG((hxgep, MEM2_CTL,
3327 	    "==> hxge_rxdma_hw_start: (status 0x%x)", status));
3328 	return (status);
3329 }
3330 
3331 static void
3332 hxge_rxdma_hw_stop(p_hxge_t hxgep)
3333 {
3334 	int			i, ndmas;
3335 	uint16_t		channel;
3336 	p_rx_rbr_rings_t	rx_rbr_rings;
3337 	p_rx_rbr_ring_t		*rbr_rings;
3338 	p_rx_rcr_rings_t	rx_rcr_rings;
3339 
3340 	HXGE_DEBUG_MSG((hxgep, MEM2_CTL, "==> hxge_rxdma_hw_stop"));
3341 
3342 	rx_rbr_rings = hxgep->rx_rbr_rings;
3343 	rx_rcr_rings = hxgep->rx_rcr_rings;
3344 
3345 	if (rx_rbr_rings == NULL || rx_rcr_rings == NULL) {
3346 		HXGE_DEBUG_MSG((hxgep, RX_CTL,
3347 		    "<== hxge_rxdma_hw_stop: NULL ring pointers"));
3348 		return;
3349 	}
3350 
3351 	ndmas = rx_rbr_rings->ndmas;
3352 	if (!ndmas) {
3353 		HXGE_DEBUG_MSG((hxgep, RX_CTL,
3354 		    "<== hxge_rxdma_hw_stop: no dma channel allocated"));
3355 		return;
3356 	}
3357 
3358 	HXGE_DEBUG_MSG((hxgep, MEM2_CTL,
3359 	    "==> hxge_rxdma_hw_stop (ndmas %d)", ndmas));
3360 
3361 	rbr_rings = rx_rbr_rings->rbr_rings;
3362 	for (i = 0; i < ndmas; i++) {
3363 		channel = rbr_rings[i]->rdc;
3364 		HXGE_DEBUG_MSG((hxgep, MEM2_CTL,
3365 		    "==> hxge_rxdma_hw_stop (ndmas %d) channel %d",
3366 		    ndmas, channel));
3367 		(void) hxge_rxdma_stop_channel(hxgep, channel);
3368 	}
3369 
3370 	HXGE_DEBUG_MSG((hxgep, MEM2_CTL, "==> hxge_rxdma_hw_stop: "
3371 	    "rx_rbr_rings 0x%016llx rings 0x%016llx",
3372 	    rx_rbr_rings, rx_rcr_rings));
3373 
3374 	HXGE_DEBUG_MSG((hxgep, MEM2_CTL, "<== hxge_rxdma_hw_stop"));
3375 }
3376 
3377 static hxge_status_t
3378 hxge_rxdma_start_channel(p_hxge_t hxgep, uint16_t channel,
3379     p_rx_rbr_ring_t rbr_p, p_rx_rcr_ring_t rcr_p, p_rx_mbox_t mbox_p,
3380     int n_init_kick)
3381 {
3382 	hpi_handle_t		handle;
3383 	hpi_status_t		rs = HPI_SUCCESS;
3384 	rdc_stat_t		cs;
3385 	rdc_int_mask_t		ent_mask;
3386 	hxge_status_t		status = HXGE_OK;
3387 
3388 	HXGE_DEBUG_MSG((hxgep, MEM2_CTL, "==> hxge_rxdma_start_channel"));
3389 
3390 	handle = HXGE_DEV_HPI_HANDLE(hxgep);
3391 
3392 	HXGE_DEBUG_MSG((hxgep, MEM2_CTL, "hxge_rxdma_start_channel: "
3393 	    "hpi handle addr $%p acc $%p",
3394 	    hxgep->hpi_handle.regp, hxgep->hpi_handle.regh));
3395 
3396 	/* Reset RXDMA channel */
3397 	rs = hpi_rxdma_cfg_rdc_reset(handle, channel);
3398 	if (rs != HPI_SUCCESS) {
3399 		HXGE_ERROR_MSG((hxgep, HXGE_ERR_CTL,
3400 		    "==> hxge_rxdma_start_channel: "
3401 		    "reset rxdma failed (0x%08x channel %d)",
3402 		    status, channel));
3403 		return (HXGE_ERROR | rs);
3404 	}
3405 	HXGE_DEBUG_MSG((hxgep, MEM2_CTL,
3406 	    "==> hxge_rxdma_start_channel: reset done: channel %d", channel));
3407 
3408 	/*
3409 	 * Initialize the RXDMA channel specific FZC control configurations.
3410 	 * These FZC registers are pertaining to each RX channel (logical
3411 	 * pages).
3412 	 */
3413 	status = hxge_init_fzc_rxdma_channel(hxgep,
3414 	    channel, rbr_p, rcr_p, mbox_p);
3415 	if (status != HXGE_OK) {
3416 		HXGE_ERROR_MSG((hxgep, HXGE_ERR_CTL,
3417 		    "==> hxge_rxdma_start_channel: "
3418 		    "init fzc rxdma failed (0x%08x channel %d)",
3419 		    status, channel));
3420 		return (status);
3421 	}
3422 	HXGE_DEBUG_MSG((hxgep, MEM2_CTL,
3423 	    "==> hxge_rxdma_start_channel: fzc done"));
3424 
3425 	/*
3426 	 * Zero out the shadow  and prefetch ram.
3427 	 */
3428 	HXGE_DEBUG_MSG((hxgep, MEM2_CTL,
3429 	    "==> hxge_rxdma_start_channel: ram done"));
3430 
3431 	/* Set up the interrupt event masks. */
3432 	ent_mask.value = 0;
3433 	rs = hpi_rxdma_event_mask(handle, OP_SET, channel, &ent_mask);
3434 	if (rs != HPI_SUCCESS) {
3435 		HXGE_ERROR_MSG((hxgep, HXGE_ERR_CTL,
3436 		    "==> hxge_rxdma_start_channel: "
3437 		    "init rxdma event masks failed (0x%08x channel %d)",
3438 		    status, channel));
3439 		return (HXGE_ERROR | rs);
3440 	}
3441 	HXGE_DEBUG_MSG((hxgep, MEM2_CTL, "==> hxge_rxdma_start_channel: "
3442 	    "event done: channel %d (mask 0x%016llx)",
3443 	    channel, ent_mask.value));
3444 
3445 	/*
3446 	 * Load RXDMA descriptors, buffers, mailbox, initialise the receive DMA
3447 	 * channels and enable each DMA channel.
3448 	 */
3449 	status = hxge_enable_rxdma_channel(hxgep,
3450 	    channel, rbr_p, rcr_p, mbox_p, n_init_kick);
3451 	if (status != HXGE_OK) {
3452 		HXGE_ERROR_MSG((hxgep, HXGE_ERR_CTL,
3453 		    " hxge_rxdma_start_channel: "
3454 		    " init enable rxdma failed (0x%08x channel %d)",
3455 		    status, channel));
3456 		return (status);
3457 	}
3458 
3459 	HXGE_DEBUG_MSG((hxgep, MEM2_CTL, "==> hxge_rxdma_start_channel: "
3460 	    "control done - channel %d cs 0x%016llx", channel, cs.value));
3461 
3462 	/*
3463 	 * Initialize the receive DMA control and status register
3464 	 * Note that rdc_stat HAS to be set after RBR and RCR rings are set
3465 	 */
3466 	cs.value = 0;
3467 	cs.bits.mex = 1;
3468 	cs.bits.rcr_thres = 1;
3469 	cs.bits.rcr_to = 1;
3470 	cs.bits.rbr_empty = 1;
3471 	status = hxge_init_rxdma_channel_cntl_stat(hxgep, channel, &cs);
3472 	HXGE_DEBUG_MSG((hxgep, MEM2_CTL, "==> hxge_rxdma_start_channel: "
3473 	    "channel %d rx_dma_cntl_stat 0x%0016llx", channel, cs.value));
3474 	if (status != HXGE_OK) {
3475 		HXGE_ERROR_MSG((hxgep, HXGE_ERR_CTL,
3476 		    "==> hxge_rxdma_start_channel: "
3477 		    "init rxdma control register failed (0x%08x channel %d",
3478 		    status, channel));
3479 		return (status);
3480 	}
3481 
3482 	HXGE_DEBUG_MSG((hxgep, MEM2_CTL, "==> hxge_rxdma_start_channel: "
3483 	    "control done - channel %d cs 0x%016llx", channel, cs.value));
3484 	HXGE_DEBUG_MSG((hxgep, MEM2_CTL,
3485 	    "==> hxge_rxdma_start_channel: enable done"));
3486 	HXGE_DEBUG_MSG((hxgep, MEM2_CTL, "<== hxge_rxdma_start_channel"));
3487 	return (HXGE_OK);
3488 }
3489 
3490 static hxge_status_t
3491 hxge_rxdma_stop_channel(p_hxge_t hxgep, uint16_t channel)
3492 {
3493 	hpi_handle_t		handle;
3494 	hpi_status_t		rs = HPI_SUCCESS;
3495 	rdc_stat_t		cs;
3496 	rdc_int_mask_t		ent_mask;
3497 	hxge_status_t		status = HXGE_OK;
3498 
3499 	HXGE_DEBUG_MSG((hxgep, RX_CTL, "==> hxge_rxdma_stop_channel"));
3500 
3501 	handle = HXGE_DEV_HPI_HANDLE(hxgep);
3502 
3503 	HXGE_DEBUG_MSG((hxgep, RX_CTL, "hxge_rxdma_stop_channel: "
3504 	    "hpi handle addr $%p acc $%p",
3505 	    hxgep->hpi_handle.regp, hxgep->hpi_handle.regh));
3506 
3507 	/* Reset RXDMA channel */
3508 	rs = hpi_rxdma_cfg_rdc_reset(handle, channel);
3509 	if (rs != HPI_SUCCESS) {
3510 		HXGE_ERROR_MSG((hxgep, HXGE_ERR_CTL,
3511 		    " hxge_rxdma_stop_channel: "
3512 		    " reset rxdma failed (0x%08x channel %d)",
3513 		    rs, channel));
3514 		return (HXGE_ERROR | rs);
3515 	}
3516 	HXGE_DEBUG_MSG((hxgep, RX_CTL,
3517 	    "==> hxge_rxdma_stop_channel: reset done"));
3518 
3519 	/* Set up the interrupt event masks. */
3520 	ent_mask.value = RDC_INT_MASK_ALL;
3521 	rs = hpi_rxdma_event_mask(handle, OP_SET, channel, &ent_mask);
3522 	if (rs != HPI_SUCCESS) {
3523 		HXGE_ERROR_MSG((hxgep, HXGE_ERR_CTL,
3524 		    "==> hxge_rxdma_stop_channel: "
3525 		    "set rxdma event masks failed (0x%08x channel %d)",
3526 		    rs, channel));
3527 		return (HXGE_ERROR | rs);
3528 	}
3529 	HXGE_DEBUG_MSG((hxgep, RX_CTL,
3530 	    "==> hxge_rxdma_stop_channel: event done"));
3531 
3532 	/* Initialize the receive DMA control and status register */
3533 	cs.value = 0;
3534 	status = hxge_init_rxdma_channel_cntl_stat(hxgep, channel, &cs);
3535 
3536 	HXGE_DEBUG_MSG((hxgep, RX_CTL, "==> hxge_rxdma_stop_channel: control "
3537 	    " to default (all 0s) 0x%08x", cs.value));
3538 
3539 	if (status != HXGE_OK) {
3540 		HXGE_ERROR_MSG((hxgep, HXGE_ERR_CTL,
3541 		    " hxge_rxdma_stop_channel: init rxdma"
3542 		    " control register failed (0x%08x channel %d",
3543 		    status, channel));
3544 		return (status);
3545 	}
3546 
3547 	HXGE_DEBUG_MSG((hxgep, RX_CTL,
3548 	    "==> hxge_rxdma_stop_channel: control done"));
3549 
3550 	/* disable dma channel */
3551 	status = hxge_disable_rxdma_channel(hxgep, channel);
3552 
3553 	if (status != HXGE_OK) {
3554 		HXGE_ERROR_MSG((hxgep, HXGE_ERR_CTL,
3555 		    " hxge_rxdma_stop_channel: "
3556 		    " init enable rxdma failed (0x%08x channel %d)",
3557 		    status, channel));
3558 		return (status);
3559 	}
3560 
3561 	HXGE_DEBUG_MSG((hxgep, RX_CTL,
3562 	    "==> hxge_rxdma_stop_channel: disable done"));
3563 	HXGE_DEBUG_MSG((hxgep, RX_CTL, "<== hxge_rxdma_stop_channel"));
3564 
3565 	return (HXGE_OK);
3566 }
3567 
3568 hxge_status_t
3569 hxge_rxdma_handle_sys_errors(p_hxge_t hxgep)
3570 {
3571 	hpi_handle_t		handle;
3572 	p_hxge_rdc_sys_stats_t	statsp;
3573 	rdc_fifo_err_stat_t	stat;
3574 	hxge_status_t		status = HXGE_OK;
3575 
3576 	handle = hxgep->hpi_handle;
3577 	statsp = (p_hxge_rdc_sys_stats_t)&hxgep->statsp->rdc_sys_stats;
3578 
3579 	/* Clear the int_dbg register in case it is an injected err */
3580 	HXGE_REG_WR64(handle, RDC_FIFO_ERR_INT_DBG, 0x0);
3581 
3582 	/* Get the error status and clear the register */
3583 	HXGE_REG_RD64(handle, RDC_FIFO_ERR_STAT, &stat.value);
3584 	HXGE_REG_WR64(handle, RDC_FIFO_ERR_STAT, stat.value);
3585 
3586 	if (stat.bits.rx_ctrl_fifo_sec) {
3587 		statsp->ctrl_fifo_sec++;
3588 		if (statsp->ctrl_fifo_sec == 1)
3589 			HXGE_ERROR_MSG((hxgep, HXGE_ERR_CTL,
3590 			    "==> hxge_rxdma_handle_sys_errors: "
3591 			    "rx_ctrl_fifo_sec"));
3592 	}
3593 
3594 	if (stat.bits.rx_ctrl_fifo_ded) {
3595 		/* Global fatal error encountered */
3596 		statsp->ctrl_fifo_ded++;
3597 		HXGE_FM_REPORT_ERROR(hxgep, NULL,
3598 		    HXGE_FM_EREPORT_RDMC_CTRL_FIFO_DED);
3599 		HXGE_ERROR_MSG((hxgep, HXGE_ERR_CTL,
3600 		    "==> hxge_rxdma_handle_sys_errors: "
3601 		    "fatal error: rx_ctrl_fifo_ded error"));
3602 	}
3603 
3604 	if (stat.bits.rx_data_fifo_sec) {
3605 		statsp->data_fifo_sec++;
3606 		if (statsp->data_fifo_sec == 1)
3607 			HXGE_ERROR_MSG((hxgep, HXGE_ERR_CTL,
3608 			    "==> hxge_rxdma_handle_sys_errors: "
3609 			    "rx_data_fifo_sec"));
3610 	}
3611 
3612 	if (stat.bits.rx_data_fifo_ded) {
3613 		/* Global fatal error encountered */
3614 		statsp->data_fifo_ded++;
3615 		HXGE_FM_REPORT_ERROR(hxgep, NULL,
3616 		    HXGE_FM_EREPORT_RDMC_DATA_FIFO_DED);
3617 		HXGE_ERROR_MSG((hxgep, HXGE_ERR_CTL,
3618 		    "==> hxge_rxdma_handle_sys_errors: "
3619 		    "fatal error: rx_data_fifo_ded error"));
3620 	}
3621 
3622 	if (stat.bits.rx_ctrl_fifo_ded || stat.bits.rx_data_fifo_ded) {
3623 		HXGE_ERROR_MSG((hxgep, HXGE_ERR_CTL,
3624 		    " hxge_rxdma_handle_sys_errors: fatal error\n"));
3625 		status = hxge_rx_port_fatal_err_recover(hxgep);
3626 		if (status == HXGE_OK) {
3627 			FM_SERVICE_RESTORED(hxgep);
3628 		}
3629 	}
3630 
3631 	return (HXGE_OK);
3632 }
3633 
3634 static hxge_status_t
3635 hxge_rxdma_fatal_err_recover(p_hxge_t hxgep, uint16_t channel)
3636 {
3637 	hpi_handle_t		handle;
3638 	hpi_status_t 		rs = HPI_SUCCESS;
3639 	p_rx_rbr_ring_t		rbrp;
3640 	p_rx_rcr_ring_t		rcrp;
3641 	p_rx_mbox_t		mboxp;
3642 	rdc_int_mask_t		ent_mask;
3643 	p_hxge_dma_common_t	dmap;
3644 	int			ring_idx;
3645 	p_rx_msg_t		rx_msg_p;
3646 	int			i;
3647 	uint32_t		hxge_port_rcr_size;
3648 	uint64_t		tmp;
3649 	int			n_init_kick = 0;
3650 
3651 	HXGE_DEBUG_MSG((hxgep, RX_CTL, "==> hxge_rxdma_fatal_err_recover"));
3652 
3653 	/*
3654 	 * Stop the dma channel waits for the stop done. If the stop done bit
3655 	 * is not set, then create an error.
3656 	 */
3657 
3658 	handle = HXGE_DEV_HPI_HANDLE(hxgep);
3659 
3660 	HXGE_DEBUG_MSG((hxgep, RX_CTL, "Rx DMA stop..."));
3661 
3662 	ring_idx = hxge_rxdma_get_ring_index(hxgep, channel);
3663 	if (ring_idx < 0) {
3664 		return (HXGE_ERROR);
3665 	}
3666 
3667 	rbrp = (p_rx_rbr_ring_t)hxgep->rx_rbr_rings->rbr_rings[ring_idx];
3668 	rcrp = (p_rx_rcr_ring_t)hxgep->rx_rcr_rings->rcr_rings[ring_idx];
3669 
3670 	MUTEX_ENTER(&rcrp->lock);
3671 	MUTEX_ENTER(&rbrp->lock);
3672 
3673 	HXGE_DEBUG_MSG((hxgep, RX_CTL, "Disable RxDMA channel..."));
3674 
3675 	rs = hpi_rxdma_cfg_rdc_disable(handle, channel);
3676 	if (rs != HPI_SUCCESS) {
3677 		HXGE_ERROR_MSG((hxgep, HXGE_ERR_CTL,
3678 		    "hxge_disable_rxdma_channel:failed"));
3679 		goto fail;
3680 	}
3681 	HXGE_DEBUG_MSG((hxgep, RX_CTL, "Disable RxDMA interrupt..."));
3682 
3683 	/* Disable interrupt */
3684 	ent_mask.value = RDC_INT_MASK_ALL;
3685 	rs = hpi_rxdma_event_mask(handle, OP_SET, channel, &ent_mask);
3686 	if (rs != HPI_SUCCESS) {
3687 		HXGE_ERROR_MSG((hxgep, HXGE_ERR_CTL,
3688 		    "Set rxdma event masks failed (channel %d)", channel));
3689 	}
3690 	HXGE_DEBUG_MSG((hxgep, RX_CTL, "RxDMA channel reset..."));
3691 
3692 	/* Reset RXDMA channel */
3693 	rs = hpi_rxdma_cfg_rdc_reset(handle, channel);
3694 	if (rs != HPI_SUCCESS) {
3695 		HXGE_ERROR_MSG((hxgep, HXGE_ERR_CTL,
3696 		    "Reset rxdma failed (channel %d)", channel));
3697 		goto fail;
3698 	}
3699 	hxge_port_rcr_size = hxgep->hxge_port_rcr_size;
3700 	mboxp = (p_rx_mbox_t)hxgep->rx_mbox_areas_p->rxmbox_areas[ring_idx];
3701 
3702 	rbrp->rbr_wr_index = (rbrp->rbb_max - 1);
3703 	rbrp->rbr_rd_index = 0;
3704 
3705 	rcrp->comp_rd_index = 0;
3706 	rcrp->comp_wt_index = 0;
3707 	rcrp->rcr_desc_rd_head_p = rcrp->rcr_desc_first_p =
3708 	    (p_rcr_entry_t)DMA_COMMON_VPTR(rcrp->rcr_desc);
3709 #if defined(__i386)
3710 	rcrp->rcr_desc_rd_head_pp = rcrp->rcr_desc_first_pp =
3711 	    (p_rcr_entry_t)(uint32_t)DMA_COMMON_IOADDR(rcrp->rcr_desc);
3712 #else
3713 	rcrp->rcr_desc_rd_head_pp = rcrp->rcr_desc_first_pp =
3714 	    (p_rcr_entry_t)DMA_COMMON_IOADDR(rcrp->rcr_desc);
3715 #endif
3716 
3717 	rcrp->rcr_desc_last_p = rcrp->rcr_desc_rd_head_p +
3718 	    (hxge_port_rcr_size - 1);
3719 	rcrp->rcr_desc_last_pp = rcrp->rcr_desc_rd_head_pp +
3720 	    (hxge_port_rcr_size - 1);
3721 
3722 	rcrp->rcr_tail_begin = DMA_COMMON_IOADDR(rcrp->rcr_desc);
3723 	rcrp->rcr_tail_begin = (rcrp->rcr_tail_begin & 0x7ffffULL) >> 3;
3724 
3725 	dmap = (p_hxge_dma_common_t)&rcrp->rcr_desc;
3726 	bzero((caddr_t)dmap->kaddrp, dmap->alength);
3727 
3728 	HXGE_DEBUG_MSG((hxgep, RX_CTL, "rbr entries = %d\n",
3729 	    rbrp->rbr_max_size));
3730 
3731 	/* Count the number of buffers owned by the hardware at this moment */
3732 	for (i = 0; i < rbrp->rbr_max_size; i++) {
3733 		rx_msg_p = rbrp->rx_msg_ring[i];
3734 		if (rx_msg_p->ref_cnt == 1) {
3735 			n_init_kick++;
3736 		}
3737 	}
3738 
3739 	HXGE_DEBUG_MSG((hxgep, RX_CTL, "RxDMA channel re-start..."));
3740 
3741 	/*
3742 	 * This is error recover! Some buffers are owned by the hardware and
3743 	 * the rest are owned by the apps. We should only kick in those
3744 	 * owned by the hardware initially. The apps will post theirs
3745 	 * eventually.
3746 	 */
3747 	(void) hxge_rxdma_start_channel(hxgep, channel, rbrp, rcrp, mboxp,
3748 	    n_init_kick);
3749 
3750 	/*
3751 	 * The DMA channel may disable itself automatically.
3752 	 * The following is a work-around.
3753 	 */
3754 	HXGE_REG_RD64(handle, RDC_RX_CFG1, &tmp);
3755 	rs = hpi_rxdma_cfg_rdc_enable(handle, channel);
3756 	if (rs != HPI_SUCCESS) {
3757 		HXGE_ERROR_MSG((hxgep, HXGE_ERR_CTL,
3758 		    "hpi_rxdma_cfg_rdc_enable (channel %d)", channel));
3759 	}
3760 
3761 	/*
3762 	 * Delay a bit of time by doing reads.
3763 	 */
3764 	for (i = 0; i < 1024; i++) {
3765 		uint64_t value;
3766 		RXDMA_REG_READ64(HXGE_DEV_HPI_HANDLE(hxgep),
3767 		    RDC_INT_MASK, i & 3, &value);
3768 	}
3769 
3770 	MUTEX_EXIT(&rbrp->lock);
3771 	MUTEX_EXIT(&rcrp->lock);
3772 
3773 	HXGE_DEBUG_MSG((hxgep, RX_CTL, "<== hxge_rxdma_fatal_err_recover"));
3774 	return (HXGE_OK);
3775 
3776 fail:
3777 	MUTEX_EXIT(&rbrp->lock);
3778 	MUTEX_EXIT(&rcrp->lock);
3779 	HXGE_ERROR_MSG((hxgep, HXGE_ERR_CTL,
3780 	    "Error Recovery failed for channel(%d)", channel));
3781 	return (HXGE_ERROR | rs);
3782 }
3783 
3784 static hxge_status_t
3785 hxge_rx_port_fatal_err_recover(p_hxge_t hxgep)
3786 {
3787 	hxge_status_t		status = HXGE_OK;
3788 	p_hxge_dma_common_t	*dma_buf_p;
3789 	uint16_t		channel;
3790 	int			ndmas;
3791 	int			i;
3792 	block_reset_t		reset_reg;
3793 	p_rx_rcr_ring_t	rcrp;
3794 	p_rx_rbr_ring_t rbrp;
3795 
3796 	HXGE_DEBUG_MSG((hxgep, RX_CTL, "==> hxge_rx_port_fatal_err_recover"));
3797 	HXGE_ERROR_MSG((hxgep, HXGE_ERR_CTL, "Recovering from RDC error ..."));
3798 
3799 	/* Reset RDC block from PEU for this fatal error */
3800 	reset_reg.value = 0;
3801 	reset_reg.bits.rdc_rst = 1;
3802 	HXGE_REG_WR32(hxgep->hpi_handle, BLOCK_RESET, reset_reg.value);
3803 
3804 	/* Disable RxMAC */
3805 	HXGE_DEBUG_MSG((hxgep, RX_CTL, "Disable RxMAC...\n"));
3806 	MUTEX_ENTER(&hxgep->vmac_lock);
3807 	if (hxge_rx_vmac_disable(hxgep) != HXGE_OK)
3808 		goto fail;
3809 
3810 	HXGE_DELAY(1000);
3811 
3812 	/* Restore any common settings after PEU reset */
3813 	if (hxge_rxdma_hw_start_common(hxgep) != HXGE_OK)
3814 		goto fail;
3815 
3816 	HXGE_DEBUG_MSG((hxgep, RX_CTL, "Stop all RxDMA channels..."));
3817 
3818 	ndmas = hxgep->rx_buf_pool_p->ndmas;
3819 	dma_buf_p = hxgep->rx_buf_pool_p->dma_buf_pool_p;
3820 
3821 	for (i = 0; i < ndmas; i++) {
3822 		channel = ((p_hxge_dma_common_t)dma_buf_p[i])->dma_channel;
3823 		rcrp = hxgep->rx_rcr_rings->rcr_rings[channel];
3824 		rbrp = rcrp->rx_rbr_p;
3825 
3826 		MUTEX_ENTER(&rbrp->post_lock);
3827 		/* This function needs to be inside the post_lock */
3828 		if (hxge_rxdma_fatal_err_recover(hxgep, channel) != HXGE_OK) {
3829 			HXGE_ERROR_MSG((hxgep, HXGE_ERR_CTL,
3830 			    "Could not recover channel %d", channel));
3831 		}
3832 		MUTEX_EXIT(&rbrp->post_lock);
3833 	}
3834 
3835 	HXGE_DEBUG_MSG((hxgep, RX_CTL, "Reset RxMAC..."));
3836 
3837 	/* Reset RxMAC */
3838 	if (hxge_rx_vmac_reset(hxgep) != HXGE_OK) {
3839 		HXGE_ERROR_MSG((hxgep, HXGE_ERR_CTL,
3840 		    "hxge_rx_port_fatal_err_recover: Failed to reset RxMAC"));
3841 		goto fail;
3842 	}
3843 
3844 	HXGE_DEBUG_MSG((hxgep, RX_CTL, "Re-initialize RxMAC..."));
3845 
3846 	/* Re-Initialize RxMAC */
3847 	if ((status = hxge_rx_vmac_init(hxgep)) != HXGE_OK) {
3848 		HXGE_ERROR_MSG((hxgep, HXGE_ERR_CTL,
3849 		    "hxge_rx_port_fatal_err_recover: Failed to reset RxMAC"));
3850 		goto fail;
3851 	}
3852 	HXGE_DEBUG_MSG((hxgep, RX_CTL, "Re-enable RxMAC..."));
3853 
3854 	/* Re-enable RxMAC */
3855 	if ((status = hxge_rx_vmac_enable(hxgep)) != HXGE_OK) {
3856 		HXGE_ERROR_MSG((hxgep, HXGE_ERR_CTL,
3857 		    "hxge_rx_port_fatal_err_recover: Failed to enable RxMAC"));
3858 		goto fail;
3859 	}
3860 	MUTEX_EXIT(&hxgep->vmac_lock);
3861 
3862 	/* Reset the error mask since PEU reset cleared it */
3863 	HXGE_REG_WR64(hxgep->hpi_handle, RDC_FIFO_ERR_INT_MASK, 0x0);
3864 
3865 	HXGE_ERROR_MSG((hxgep, HXGE_ERR_CTL,
3866 	    "Recovery Successful, RxPort Restored"));
3867 	HXGE_DEBUG_MSG((hxgep, RX_CTL, "<== hxge_rx_port_fatal_err_recover"));
3868 	return (HXGE_OK);
3869 
3870 fail:
3871 	MUTEX_EXIT(&hxgep->vmac_lock);
3872 	HXGE_ERROR_MSG((hxgep, HXGE_ERR_CTL,
3873 	    "Error Recovery failed for hxge(%d)", hxgep->instance));
3874 	return (status);
3875 }
3876 
3877 static void
3878 hxge_rbr_empty_restore(p_hxge_t hxgep, p_rx_rbr_ring_t rx_rbr_p)
3879 {
3880 	hpi_status_t		hpi_status;
3881 	hxge_status_t		status;
3882 	p_hxge_rx_ring_stats_t	rdc_stats;
3883 
3884 	rdc_stats = &hxgep->statsp->rdc_stats[rx_rbr_p->rdc];
3885 	rdc_stats->rbr_empty_restore++;
3886 	rx_rbr_p->rbr_is_empty = B_FALSE;
3887 
3888 	/*
3889 	 * Complete the processing for the RBR Empty by:
3890 	 *	0) kicking back HXGE_RBR_EMPTY_THRESHOLD
3891 	 *	   packets.
3892 	 *	1) Disable the RX vmac.
3893 	 *	2) Re-enable the affected DMA channel.
3894 	 *	3) Re-enable the RX vmac.
3895 	 */
3896 
3897 	/*
3898 	 * Disable the RX VMAC, but setting the framelength
3899 	 * to 0, since there is a hardware bug when disabling
3900 	 * the vmac.
3901 	 */
3902 	MUTEX_ENTER(&hxgep->vmac_lock);
3903 	(void) hxge_rx_vmac_disable(hxgep);
3904 
3905 	hpi_status = hpi_rxdma_cfg_rdc_enable(
3906 	    HXGE_DEV_HPI_HANDLE(hxgep), rx_rbr_p->rdc);
3907 	if (hpi_status != HPI_SUCCESS) {
3908 		rdc_stats->rbr_empty_fail++;
3909 
3910 		/* Assume we are already inside the post_lock */
3911 		status = hxge_rxdma_fatal_err_recover(hxgep, rx_rbr_p->rdc);
3912 		if (status != HXGE_OK) {
3913 			HXGE_ERROR_MSG((hxgep, HXGE_ERR_CTL,
3914 			    "hxge(%d): channel(%d) is empty.",
3915 			    hxgep->instance, rx_rbr_p->rdc));
3916 		}
3917 	}
3918 
3919 	/*
3920 	 * Re-enable the RX VMAC.
3921 	 */
3922 	(void) hxge_rx_vmac_enable(hxgep);
3923 	MUTEX_EXIT(&hxgep->vmac_lock);
3924 }
3925