xref: /illumos-gate/usr/src/uts/common/io/hxge/hxge_rxdma.c (revision e0e638160d72f8685f1481f6308bc368cd233c3f)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <hxge_impl.h>
27 #include <hxge_rxdma.h>
28 
29 /*
30  * Number of blocks to accumulate before re-enabling DMA
31  * when we get RBR empty.
32  */
33 #define	HXGE_RBR_EMPTY_THRESHOLD	64
34 
35 /*
36  * Globals: tunable parameters (/etc/system or adb)
37  *
38  */
39 extern uint32_t hxge_rbr_size;
40 extern uint32_t hxge_rcr_size;
41 extern uint32_t hxge_rbr_spare_size;
42 extern uint32_t hxge_mblks_pending;
43 
44 /*
45  * Tunables to manage the receive buffer blocks.
46  *
47  * hxge_rx_threshold_hi: copy all buffers.
48  * hxge_rx_bcopy_size_type: receive buffer block size type.
49  * hxge_rx_threshold_lo: copy only up to tunable block size type.
50  */
51 extern hxge_rxbuf_threshold_t hxge_rx_threshold_hi;
52 extern hxge_rxbuf_type_t hxge_rx_buf_size_type;
53 extern hxge_rxbuf_threshold_t hxge_rx_threshold_lo;
54 
55 /*
56  * Static local functions.
57  */
58 static hxge_status_t hxge_map_rxdma(p_hxge_t hxgep);
59 static void hxge_unmap_rxdma(p_hxge_t hxgep);
60 static hxge_status_t hxge_rxdma_hw_start_common(p_hxge_t hxgep);
61 static hxge_status_t hxge_rxdma_hw_start(p_hxge_t hxgep);
62 static void hxge_rxdma_hw_stop(p_hxge_t hxgep);
63 static hxge_status_t hxge_map_rxdma_channel(p_hxge_t hxgep, uint16_t channel,
64     p_hxge_dma_common_t *dma_buf_p, p_rx_rbr_ring_t *rbr_p,
65     uint32_t num_chunks, p_hxge_dma_common_t *dma_rbr_cntl_p,
66     p_hxge_dma_common_t *dma_rcr_cntl_p, p_hxge_dma_common_t *dma_mbox_cntl_p,
67     p_rx_rcr_ring_t *rcr_p, p_rx_mbox_t *rx_mbox_p);
68 static void hxge_unmap_rxdma_channel(p_hxge_t hxgep, uint16_t channel,
69 	p_rx_rbr_ring_t rbr_p, p_rx_rcr_ring_t rcr_p, p_rx_mbox_t rx_mbox_p);
70 static hxge_status_t hxge_map_rxdma_channel_cfg_ring(p_hxge_t hxgep,
71     uint16_t dma_channel, p_hxge_dma_common_t *dma_rbr_cntl_p,
72     p_hxge_dma_common_t *dma_rcr_cntl_p, p_hxge_dma_common_t *dma_mbox_cntl_p,
73     p_rx_rbr_ring_t *rbr_p, p_rx_rcr_ring_t *rcr_p, p_rx_mbox_t *rx_mbox_p);
74 static void hxge_unmap_rxdma_channel_cfg_ring(p_hxge_t hxgep,
75 	p_rx_rcr_ring_t rcr_p, p_rx_mbox_t rx_mbox_p);
76 static hxge_status_t hxge_map_rxdma_channel_buf_ring(p_hxge_t hxgep,
77 	uint16_t channel, p_hxge_dma_common_t *dma_buf_p,
78 	p_rx_rbr_ring_t *rbr_p, uint32_t num_chunks);
79 static void hxge_unmap_rxdma_channel_buf_ring(p_hxge_t hxgep,
80 	p_rx_rbr_ring_t rbr_p);
81 static hxge_status_t hxge_rxdma_start_channel(p_hxge_t hxgep, uint16_t channel,
82 	p_rx_rbr_ring_t rbr_p, p_rx_rcr_ring_t rcr_p, p_rx_mbox_t mbox_p,
83 	int n_init_kick);
84 static hxge_status_t hxge_rxdma_stop_channel(p_hxge_t hxgep, uint16_t channel);
85 static mblk_t *hxge_rx_pkts(p_hxge_t hxgep, uint_t vindex, p_hxge_ldv_t ldvp,
86 	p_rx_rcr_ring_t	rcr_p, rdc_stat_t cs, int bytes_to_read);
87 static uint32_t hxge_scan_for_last_eop(p_rx_rcr_ring_t rcr_p,
88     p_rcr_entry_t rcr_desc_rd_head_p, uint32_t num_rcrs);
89 static void hxge_receive_packet(p_hxge_t hxgep, p_rx_rcr_ring_t rcr_p,
90 	p_rcr_entry_t rcr_desc_rd_head_p, boolean_t *multi_p,
91 	mblk_t ** mp, mblk_t ** mp_cont, uint32_t *invalid_rcr_entry);
92 static hxge_status_t hxge_disable_rxdma_channel(p_hxge_t hxgep,
93 	uint16_t channel);
94 static p_rx_msg_t hxge_allocb(size_t, uint32_t, p_hxge_dma_common_t);
95 static void hxge_freeb(p_rx_msg_t);
96 static hxge_status_t hxge_rx_err_evnts(p_hxge_t hxgep, uint_t index,
97 	p_hxge_ldv_t ldvp, rdc_stat_t cs);
98 static hxge_status_t hxge_rxbuf_index_info_init(p_hxge_t hxgep,
99 	p_rx_rbr_ring_t rx_dmap);
100 static hxge_status_t hxge_rxdma_fatal_err_recover(p_hxge_t hxgep,
101 	uint16_t channel);
102 static hxge_status_t hxge_rx_port_fatal_err_recover(p_hxge_t hxgep);
103 static void hxge_rbr_empty_restore(p_hxge_t hxgep,
104 	p_rx_rbr_ring_t rx_rbr_p);
105 
106 hxge_status_t
107 hxge_init_rxdma_channels(p_hxge_t hxgep)
108 {
109 	hxge_status_t		status = HXGE_OK;
110 	block_reset_t		reset_reg;
111 	int			i;
112 
113 	HXGE_DEBUG_MSG((hxgep, MEM2_CTL, "==> hxge_init_rxdma_channels"));
114 
115 	for (i = 0; i < HXGE_MAX_RDCS; i++)
116 		hxgep->rdc_first_intr[i] = B_TRUE;
117 
118 	/* Reset RDC block from PEU to clear any previous state */
119 	reset_reg.value = 0;
120 	reset_reg.bits.rdc_rst = 1;
121 	HXGE_REG_WR32(hxgep->hpi_handle, BLOCK_RESET, reset_reg.value);
122 	HXGE_DELAY(1000);
123 
124 	status = hxge_map_rxdma(hxgep);
125 	if (status != HXGE_OK) {
126 		HXGE_ERROR_MSG((hxgep, HXGE_ERR_CTL,
127 		    "<== hxge_init_rxdma: status 0x%x", status));
128 		return (status);
129 	}
130 
131 	status = hxge_rxdma_hw_start_common(hxgep);
132 	if (status != HXGE_OK) {
133 		hxge_unmap_rxdma(hxgep);
134 	}
135 
136 	status = hxge_rxdma_hw_start(hxgep);
137 	if (status != HXGE_OK) {
138 		hxge_unmap_rxdma(hxgep);
139 	}
140 
141 	HXGE_DEBUG_MSG((hxgep, MEM2_CTL,
142 	    "<== hxge_init_rxdma_channels: status 0x%x", status));
143 	return (status);
144 }
145 
146 void
147 hxge_uninit_rxdma_channels(p_hxge_t hxgep)
148 {
149 	HXGE_DEBUG_MSG((hxgep, MEM2_CTL, "==> hxge_uninit_rxdma_channels"));
150 
151 	hxge_rxdma_hw_stop(hxgep);
152 	hxge_unmap_rxdma(hxgep);
153 
154 	HXGE_DEBUG_MSG((hxgep, MEM2_CTL, "<== hxge_uinit_rxdma_channels"));
155 }
156 
157 hxge_status_t
158 hxge_init_rxdma_channel_cntl_stat(p_hxge_t hxgep, uint16_t channel,
159     rdc_stat_t *cs_p)
160 {
161 	hpi_handle_t	handle;
162 	hpi_status_t	rs = HPI_SUCCESS;
163 	hxge_status_t	status = HXGE_OK;
164 
165 	HXGE_DEBUG_MSG((hxgep, DMA_CTL,
166 	    "<== hxge_init_rxdma_channel_cntl_stat"));
167 
168 	handle = HXGE_DEV_HPI_HANDLE(hxgep);
169 	rs = hpi_rxdma_control_status(handle, OP_SET, channel, cs_p);
170 
171 	if (rs != HPI_SUCCESS) {
172 		status = HXGE_ERROR | rs;
173 	}
174 	return (status);
175 }
176 
177 
178 hxge_status_t
179 hxge_enable_rxdma_channel(p_hxge_t hxgep, uint16_t channel,
180     p_rx_rbr_ring_t rbr_p, p_rx_rcr_ring_t rcr_p, p_rx_mbox_t mbox_p,
181     int n_init_kick)
182 {
183 	hpi_handle_t		handle;
184 	rdc_desc_cfg_t 		rdc_desc;
185 	rdc_rcr_cfg_b_t		*cfgb_p;
186 	hpi_status_t		rs = HPI_SUCCESS;
187 
188 	HXGE_DEBUG_MSG((hxgep, DMA_CTL, "==> hxge_enable_rxdma_channel"));
189 	handle = HXGE_DEV_HPI_HANDLE(hxgep);
190 
191 	/*
192 	 * Use configuration data composed at init time. Write to hardware the
193 	 * receive ring configurations.
194 	 */
195 	rdc_desc.mbox_enable = 1;
196 	rdc_desc.mbox_addr = mbox_p->mbox_addr;
197 	HXGE_DEBUG_MSG((hxgep, RX_CTL,
198 	    "==> hxge_enable_rxdma_channel: mboxp $%p($%p)",
199 	    mbox_p->mbox_addr, rdc_desc.mbox_addr));
200 
201 	rdc_desc.rbr_len = rbr_p->rbb_max;
202 	rdc_desc.rbr_addr = rbr_p->rbr_addr;
203 
204 	switch (hxgep->rx_bksize_code) {
205 	case RBR_BKSIZE_4K:
206 		rdc_desc.page_size = SIZE_4KB;
207 		break;
208 	case RBR_BKSIZE_8K:
209 		rdc_desc.page_size = SIZE_8KB;
210 		break;
211 	}
212 
213 	rdc_desc.size0 = rbr_p->hpi_pkt_buf_size0;
214 	rdc_desc.valid0 = 1;
215 
216 	rdc_desc.size1 = rbr_p->hpi_pkt_buf_size1;
217 	rdc_desc.valid1 = 1;
218 
219 	rdc_desc.size2 = rbr_p->hpi_pkt_buf_size2;
220 	rdc_desc.valid2 = 1;
221 
222 	rdc_desc.full_hdr = rcr_p->full_hdr_flag;
223 	rdc_desc.offset = rcr_p->sw_priv_hdr_len;
224 
225 	rdc_desc.rcr_len = rcr_p->comp_size;
226 	rdc_desc.rcr_addr = rcr_p->rcr_addr;
227 
228 	cfgb_p = &(rcr_p->rcr_cfgb);
229 	rdc_desc.rcr_threshold = cfgb_p->bits.pthres;
230 	rdc_desc.rcr_timeout = cfgb_p->bits.timeout;
231 	rdc_desc.rcr_timeout_enable = cfgb_p->bits.entout;
232 
233 	HXGE_DEBUG_MSG((hxgep, DMA_CTL, "==> hxge_enable_rxdma_channel: "
234 	    "rbr_len qlen %d pagesize code %d rcr_len %d",
235 	    rdc_desc.rbr_len, rdc_desc.page_size, rdc_desc.rcr_len));
236 	HXGE_DEBUG_MSG((hxgep, DMA_CTL, "==> hxge_enable_rxdma_channel: "
237 	    "size 0 %d size 1 %d size 2 %d",
238 	    rbr_p->hpi_pkt_buf_size0, rbr_p->hpi_pkt_buf_size1,
239 	    rbr_p->hpi_pkt_buf_size2));
240 
241 	rs = hpi_rxdma_cfg_rdc_ring(handle, rbr_p->rdc, &rdc_desc);
242 	if (rs != HPI_SUCCESS) {
243 		return (HXGE_ERROR | rs);
244 	}
245 
246 	/*
247 	 * Enable the timeout and threshold.
248 	 */
249 	rs = hpi_rxdma_cfg_rdc_rcr_threshold(handle, channel,
250 	    rdc_desc.rcr_threshold);
251 	if (rs != HPI_SUCCESS) {
252 		return (HXGE_ERROR | rs);
253 	}
254 
255 	rs = hpi_rxdma_cfg_rdc_rcr_timeout(handle, channel,
256 	    rdc_desc.rcr_timeout);
257 	if (rs != HPI_SUCCESS) {
258 		return (HXGE_ERROR | rs);
259 	}
260 
261 	/* Enable the DMA */
262 	rs = hpi_rxdma_cfg_rdc_enable(handle, channel);
263 	if (rs != HPI_SUCCESS) {
264 		return (HXGE_ERROR | rs);
265 	}
266 
267 	/* Kick the DMA engine */
268 	hpi_rxdma_rdc_rbr_kick(handle, channel, n_init_kick);
269 
270 	/* Clear the rbr empty bit */
271 	(void) hpi_rxdma_channel_rbr_empty_clear(handle, channel);
272 
273 	HXGE_DEBUG_MSG((hxgep, DMA_CTL, "<== hxge_enable_rxdma_channel"));
274 
275 	return (HXGE_OK);
276 }
277 
278 static hxge_status_t
279 hxge_disable_rxdma_channel(p_hxge_t hxgep, uint16_t channel)
280 {
281 	hpi_handle_t handle;
282 	hpi_status_t rs = HPI_SUCCESS;
283 
284 	HXGE_DEBUG_MSG((hxgep, DMA_CTL, "==> hxge_disable_rxdma_channel"));
285 
286 	handle = HXGE_DEV_HPI_HANDLE(hxgep);
287 
288 	/* disable the DMA */
289 	rs = hpi_rxdma_cfg_rdc_disable(handle, channel);
290 	if (rs != HPI_SUCCESS) {
291 		HXGE_DEBUG_MSG((hxgep, RX_CTL,
292 		    "<== hxge_disable_rxdma_channel:failed (0x%x)", rs));
293 		return (HXGE_ERROR | rs);
294 	}
295 	HXGE_DEBUG_MSG((hxgep, DMA_CTL, "<== hxge_disable_rxdma_channel"));
296 	return (HXGE_OK);
297 }
298 
299 hxge_status_t
300 hxge_rxdma_channel_rcrflush(p_hxge_t hxgep, uint8_t channel)
301 {
302 	hpi_handle_t	handle;
303 	hxge_status_t	status = HXGE_OK;
304 
305 	HXGE_DEBUG_MSG((hxgep, DMA_CTL,
306 	    "==> hxge_rxdma_channel_rcrflush"));
307 
308 	handle = HXGE_DEV_HPI_HANDLE(hxgep);
309 	hpi_rxdma_rdc_rcr_flush(handle, channel);
310 
311 	HXGE_DEBUG_MSG((hxgep, DMA_CTL,
312 	    "<== hxge_rxdma_channel_rcrflush"));
313 	return (status);
314 
315 }
316 
317 #define	MID_INDEX(l, r) ((r + l + 1) >> 1)
318 
319 #define	TO_LEFT -1
320 #define	TO_RIGHT 1
321 #define	BOTH_RIGHT (TO_RIGHT + TO_RIGHT)
322 #define	BOTH_LEFT (TO_LEFT + TO_LEFT)
323 #define	IN_MIDDLE (TO_RIGHT + TO_LEFT)
324 #define	NO_HINT 0xffffffff
325 
326 /*ARGSUSED*/
327 hxge_status_t
328 hxge_rxbuf_pp_to_vp(p_hxge_t hxgep, p_rx_rbr_ring_t rbr_p,
329     uint8_t pktbufsz_type, uint64_t *pkt_buf_addr_pp,
330     uint64_t **pkt_buf_addr_p, uint32_t *bufoffset, uint32_t *msg_index)
331 {
332 	int			bufsize;
333 	uint64_t		pktbuf_pp;
334 	uint64_t		dvma_addr;
335 	rxring_info_t		*ring_info;
336 	int			base_side, end_side;
337 	int			r_index, l_index, anchor_index;
338 	int			found, search_done;
339 	uint32_t		offset, chunk_size, block_size, page_size_mask;
340 	uint32_t		chunk_index, block_index, total_index;
341 	int			max_iterations, iteration;
342 	rxbuf_index_info_t	*bufinfo;
343 
344 	HXGE_DEBUG_MSG((hxgep, RX2_CTL, "==> hxge_rxbuf_pp_to_vp"));
345 
346 	HXGE_DEBUG_MSG((hxgep, RX2_CTL,
347 	    "==> hxge_rxbuf_pp_to_vp: buf_pp $%p btype %d",
348 	    pkt_buf_addr_pp, pktbufsz_type));
349 
350 #if defined(__i386)
351 	pktbuf_pp = (uint64_t)(uint32_t)pkt_buf_addr_pp;
352 #else
353 	pktbuf_pp = (uint64_t)pkt_buf_addr_pp;
354 #endif
355 
356 	switch (pktbufsz_type) {
357 	case 0:
358 		bufsize = rbr_p->pkt_buf_size0;
359 		break;
360 	case 1:
361 		bufsize = rbr_p->pkt_buf_size1;
362 		break;
363 	case 2:
364 		bufsize = rbr_p->pkt_buf_size2;
365 		break;
366 	case RCR_SINGLE_BLOCK:
367 		bufsize = 0;
368 		anchor_index = 0;
369 		break;
370 	default:
371 		return (HXGE_ERROR);
372 	}
373 
374 	if (rbr_p->num_blocks == 1) {
375 		anchor_index = 0;
376 		ring_info = rbr_p->ring_info;
377 		bufinfo = (rxbuf_index_info_t *)ring_info->buffer;
378 
379 		HXGE_DEBUG_MSG((hxgep, RX2_CTL,
380 		    "==> hxge_rxbuf_pp_to_vp: (found, 1 block) "
381 		    "buf_pp $%p btype %d anchor_index %d bufinfo $%p",
382 		    pkt_buf_addr_pp, pktbufsz_type, anchor_index, bufinfo));
383 
384 		goto found_index;
385 	}
386 
387 	HXGE_DEBUG_MSG((hxgep, RX2_CTL,
388 	    "==> hxge_rxbuf_pp_to_vp: buf_pp $%p btype %d anchor_index %d",
389 	    pkt_buf_addr_pp, pktbufsz_type, anchor_index));
390 
391 	ring_info = rbr_p->ring_info;
392 	found = B_FALSE;
393 	bufinfo = (rxbuf_index_info_t *)ring_info->buffer;
394 	iteration = 0;
395 	max_iterations = ring_info->max_iterations;
396 
397 	/*
398 	 * First check if this block have been seen recently. This is indicated
399 	 * by a hint which is initialized when the first buffer of the block is
400 	 * seen. The hint is reset when the last buffer of the block has been
401 	 * processed. As three block sizes are supported, three hints are kept.
402 	 * The idea behind the hints is that once the hardware  uses a block
403 	 * for a buffer  of that size, it will use it exclusively for that size
404 	 * and will use it until it is exhausted. It is assumed that there
405 	 * would a single block being used for the same buffer sizes at any
406 	 * given time.
407 	 */
408 	if (ring_info->hint[pktbufsz_type] != NO_HINT) {
409 		anchor_index = ring_info->hint[pktbufsz_type];
410 		dvma_addr = bufinfo[anchor_index].dvma_addr;
411 		chunk_size = bufinfo[anchor_index].buf_size;
412 		if ((pktbuf_pp >= dvma_addr) &&
413 		    (pktbuf_pp < (dvma_addr + chunk_size))) {
414 			found = B_TRUE;
415 			/*
416 			 * check if this is the last buffer in the block If so,
417 			 * then reset the hint for the size;
418 			 */
419 
420 			if ((pktbuf_pp + bufsize) >= (dvma_addr + chunk_size))
421 				ring_info->hint[pktbufsz_type] = NO_HINT;
422 		}
423 	}
424 
425 	if (found == B_FALSE) {
426 		HXGE_DEBUG_MSG((hxgep, RX2_CTL,
427 		    "==> hxge_rxbuf_pp_to_vp: (!found)"
428 		    "buf_pp $%p btype %d anchor_index %d",
429 		    pkt_buf_addr_pp, pktbufsz_type, anchor_index));
430 
431 		/*
432 		 * This is the first buffer of the block of this size. Need to
433 		 * search the whole information array. the search algorithm
434 		 * uses a binary tree search algorithm. It assumes that the
435 		 * information is already sorted with increasing order info[0]
436 		 * < info[1] < info[2]  .... < info[n-1] where n is the size of
437 		 * the information array
438 		 */
439 		r_index = rbr_p->num_blocks - 1;
440 		l_index = 0;
441 		search_done = B_FALSE;
442 		anchor_index = MID_INDEX(r_index, l_index);
443 		while (search_done == B_FALSE) {
444 			if ((r_index == l_index) ||
445 			    (iteration >= max_iterations))
446 				search_done = B_TRUE;
447 
448 			end_side = TO_RIGHT;	/* to the right */
449 			base_side = TO_LEFT;	/* to the left */
450 			/* read the DVMA address information and sort it */
451 			dvma_addr = bufinfo[anchor_index].dvma_addr;
452 			chunk_size = bufinfo[anchor_index].buf_size;
453 
454 			HXGE_DEBUG_MSG((hxgep, RX2_CTL,
455 			    "==> hxge_rxbuf_pp_to_vp: (searching)"
456 			    "buf_pp $%p btype %d "
457 			    "anchor_index %d chunk_size %d dvmaaddr $%p",
458 			    pkt_buf_addr_pp, pktbufsz_type, anchor_index,
459 			    chunk_size, dvma_addr));
460 
461 			if (pktbuf_pp >= dvma_addr)
462 				base_side = TO_RIGHT;	/* to the right */
463 			if (pktbuf_pp < (dvma_addr + chunk_size))
464 				end_side = TO_LEFT;	/* to the left */
465 
466 			switch (base_side + end_side) {
467 			case IN_MIDDLE:
468 				/* found */
469 				found = B_TRUE;
470 				search_done = B_TRUE;
471 				if ((pktbuf_pp + bufsize) <
472 				    (dvma_addr + chunk_size))
473 					ring_info->hint[pktbufsz_type] =
474 					    bufinfo[anchor_index].buf_index;
475 				break;
476 			case BOTH_RIGHT:
477 				/* not found: go to the right */
478 				l_index = anchor_index + 1;
479 				anchor_index = MID_INDEX(r_index, l_index);
480 				break;
481 
482 			case BOTH_LEFT:
483 				/* not found: go to the left */
484 				r_index = anchor_index - 1;
485 				anchor_index = MID_INDEX(r_index, l_index);
486 				break;
487 			default:	/* should not come here */
488 				return (HXGE_ERROR);
489 			}
490 			iteration++;
491 		}
492 
493 		HXGE_DEBUG_MSG((hxgep, RX2_CTL,
494 		    "==> hxge_rxbuf_pp_to_vp: (search done)"
495 		    "buf_pp $%p btype %d anchor_index %d",
496 		    pkt_buf_addr_pp, pktbufsz_type, anchor_index));
497 	}
498 
499 	if (found == B_FALSE) {
500 		HXGE_DEBUG_MSG((hxgep, RX2_CTL,
501 		    "==> hxge_rxbuf_pp_to_vp: (search failed)"
502 		    "buf_pp $%p btype %d anchor_index %d",
503 		    pkt_buf_addr_pp, pktbufsz_type, anchor_index));
504 		return (HXGE_ERROR);
505 	}
506 
507 found_index:
508 	HXGE_DEBUG_MSG((hxgep, RX2_CTL,
509 	    "==> hxge_rxbuf_pp_to_vp: (FOUND1)"
510 	    "buf_pp $%p btype %d bufsize %d anchor_index %d",
511 	    pkt_buf_addr_pp, pktbufsz_type, bufsize, anchor_index));
512 
513 	/* index of the first block in this chunk */
514 	chunk_index = bufinfo[anchor_index].start_index;
515 	dvma_addr = bufinfo[anchor_index].dvma_addr;
516 	page_size_mask = ring_info->block_size_mask;
517 
518 	HXGE_DEBUG_MSG((hxgep, RX2_CTL,
519 	    "==> hxge_rxbuf_pp_to_vp: (FOUND3), get chunk)"
520 	    "buf_pp $%p btype %d bufsize %d "
521 	    "anchor_index %d chunk_index %d dvma $%p",
522 	    pkt_buf_addr_pp, pktbufsz_type, bufsize,
523 	    anchor_index, chunk_index, dvma_addr));
524 
525 	offset = pktbuf_pp - dvma_addr;	/* offset within the chunk */
526 	block_size = rbr_p->block_size;	/* System  block(page) size */
527 
528 	HXGE_DEBUG_MSG((hxgep, RX2_CTL,
529 	    "==> hxge_rxbuf_pp_to_vp: (FOUND4), get chunk)"
530 	    "buf_pp $%p btype %d bufsize %d "
531 	    "anchor_index %d chunk_index %d dvma $%p "
532 	    "offset %d block_size %d",
533 	    pkt_buf_addr_pp, pktbufsz_type, bufsize, anchor_index,
534 	    chunk_index, dvma_addr, offset, block_size));
535 	HXGE_DEBUG_MSG((hxgep, RX2_CTL, "==> getting total index"));
536 
537 	block_index = (offset / block_size);	/* index within chunk */
538 	total_index = chunk_index + block_index;
539 
540 	HXGE_DEBUG_MSG((hxgep, RX2_CTL,
541 	    "==> hxge_rxbuf_pp_to_vp: "
542 	    "total_index %d dvma_addr $%p "
543 	    "offset %d block_size %d "
544 	    "block_index %d ",
545 	    total_index, dvma_addr, offset, block_size, block_index));
546 
547 #if defined(__i386)
548 	*pkt_buf_addr_p = (uint64_t *)((uint32_t)bufinfo[anchor_index].kaddr +
549 	    (uint32_t)offset);
550 #else
551 	*pkt_buf_addr_p = (uint64_t *)((uint64_t)bufinfo[anchor_index].kaddr +
552 	    offset);
553 #endif
554 
555 	HXGE_DEBUG_MSG((hxgep, RX2_CTL,
556 	    "==> hxge_rxbuf_pp_to_vp: "
557 	    "total_index %d dvma_addr $%p "
558 	    "offset %d block_size %d "
559 	    "block_index %d "
560 	    "*pkt_buf_addr_p $%p",
561 	    total_index, dvma_addr, offset, block_size,
562 	    block_index, *pkt_buf_addr_p));
563 
564 	*msg_index = total_index;
565 	*bufoffset = (offset & page_size_mask);
566 
567 	HXGE_DEBUG_MSG((hxgep, RX2_CTL,
568 	    "==> hxge_rxbuf_pp_to_vp: get msg index: "
569 	    "msg_index %d bufoffset_index %d",
570 	    *msg_index, *bufoffset));
571 	HXGE_DEBUG_MSG((hxgep, RX2_CTL, "<== hxge_rxbuf_pp_to_vp"));
572 
573 	return (HXGE_OK);
574 }
575 
576 
577 /*
578  * used by quick sort (qsort) function
579  * to perform comparison
580  */
581 static int
582 hxge_sort_compare(const void *p1, const void *p2)
583 {
584 
585 	rxbuf_index_info_t *a, *b;
586 
587 	a = (rxbuf_index_info_t *)p1;
588 	b = (rxbuf_index_info_t *)p2;
589 
590 	if (a->dvma_addr > b->dvma_addr)
591 		return (1);
592 	if (a->dvma_addr < b->dvma_addr)
593 		return (-1);
594 	return (0);
595 }
596 
597 /*
598  * Grabbed this sort implementation from common/syscall/avl.c
599  *
600  * Generic shellsort, from K&R (1st ed, p 58.), somewhat modified.
601  * v = Ptr to array/vector of objs
602  * n = # objs in the array
603  * s = size of each obj (must be multiples of a word size)
604  * f = ptr to function to compare two objs
605  *	returns (-1 = less than, 0 = equal, 1 = greater than
606  */
607 void
608 hxge_ksort(caddr_t v, int n, int s, int (*f) ())
609 {
610 	int		g, i, j, ii;
611 	unsigned int	*p1, *p2;
612 	unsigned int	tmp;
613 
614 	/* No work to do */
615 	if (v == NULL || n <= 1)
616 		return;
617 	/* Sanity check on arguments */
618 	ASSERT(((uintptr_t)v & 0x3) == 0 && (s & 0x3) == 0);
619 	ASSERT(s > 0);
620 
621 	for (g = n / 2; g > 0; g /= 2) {
622 		for (i = g; i < n; i++) {
623 			for (j = i - g; j >= 0 &&
624 			    (*f) (v + j * s, v + (j + g) * s) == 1; j -= g) {
625 				p1 = (unsigned *)(v + j * s);
626 				p2 = (unsigned *)(v + (j + g) * s);
627 				for (ii = 0; ii < s / 4; ii++) {
628 					tmp = *p1;
629 					*p1++ = *p2;
630 					*p2++ = tmp;
631 				}
632 			}
633 		}
634 	}
635 }
636 
637 /*
638  * Initialize data structures required for rxdma
639  * buffer dvma->vmem address lookup
640  */
641 /*ARGSUSED*/
642 static hxge_status_t
643 hxge_rxbuf_index_info_init(p_hxge_t hxgep, p_rx_rbr_ring_t rbrp)
644 {
645 	int		index;
646 	rxring_info_t	*ring_info;
647 	int		max_iteration = 0, max_index = 0;
648 
649 	HXGE_DEBUG_MSG((hxgep, DMA_CTL, "==> hxge_rxbuf_index_info_init"));
650 
651 	ring_info = rbrp->ring_info;
652 	ring_info->hint[0] = NO_HINT;
653 	ring_info->hint[1] = NO_HINT;
654 	ring_info->hint[2] = NO_HINT;
655 	max_index = rbrp->num_blocks;
656 
657 	/* read the DVMA address information and sort it */
658 	/* do init of the information array */
659 
660 	HXGE_DEBUG_MSG((hxgep, DMA2_CTL,
661 	    " hxge_rxbuf_index_info_init Sort ptrs"));
662 
663 	/* sort the array */
664 	hxge_ksort((void *) ring_info->buffer, max_index,
665 	    sizeof (rxbuf_index_info_t), hxge_sort_compare);
666 
667 	for (index = 0; index < max_index; index++) {
668 		HXGE_DEBUG_MSG((hxgep, DMA2_CTL,
669 		    " hxge_rxbuf_index_info_init: sorted chunk %d "
670 		    " ioaddr $%p kaddr $%p size %x",
671 		    index, ring_info->buffer[index].dvma_addr,
672 		    ring_info->buffer[index].kaddr,
673 		    ring_info->buffer[index].buf_size));
674 	}
675 
676 	max_iteration = 0;
677 	while (max_index >= (1ULL << max_iteration))
678 		max_iteration++;
679 	ring_info->max_iterations = max_iteration + 1;
680 
681 	HXGE_DEBUG_MSG((hxgep, DMA2_CTL,
682 	    " hxge_rxbuf_index_info_init Find max iter %d",
683 	    ring_info->max_iterations));
684 	HXGE_DEBUG_MSG((hxgep, DMA_CTL, "<== hxge_rxbuf_index_info_init"));
685 
686 	return (HXGE_OK);
687 }
688 
689 /*ARGSUSED*/
690 void
691 hxge_dump_rcr_entry(p_hxge_t hxgep, p_rcr_entry_t entry_p)
692 {
693 #ifdef	HXGE_DEBUG
694 
695 	uint32_t bptr;
696 	uint64_t pp;
697 
698 	bptr = entry_p->bits.pkt_buf_addr;
699 
700 	HXGE_DEBUG_MSG((hxgep, RX_CTL,
701 	    "\trcr entry $%p "
702 	    "\trcr entry 0x%0llx "
703 	    "\trcr entry 0x%08x "
704 	    "\trcr entry 0x%08x "
705 	    "\tvalue 0x%0llx\n"
706 	    "\tmulti = %d\n"
707 	    "\tpkt_type = 0x%x\n"
708 	    "\terror = 0x%04x\n"
709 	    "\tl2_len = %d\n"
710 	    "\tpktbufsize = %d\n"
711 	    "\tpkt_buf_addr = $%p\n"
712 	    "\tpkt_buf_addr (<< 6) = $%p\n",
713 	    entry_p,
714 	    *(int64_t *)entry_p,
715 	    *(int32_t *)entry_p,
716 	    *(int32_t *)((char *)entry_p + 32),
717 	    entry_p->value,
718 	    entry_p->bits.multi,
719 	    entry_p->bits.pkt_type,
720 	    entry_p->bits.error,
721 	    entry_p->bits.l2_len,
722 	    entry_p->bits.pktbufsz,
723 	    bptr,
724 	    entry_p->bits.pkt_buf_addr_l));
725 
726 	pp = (entry_p->value & RCR_PKT_BUF_ADDR_MASK) <<
727 	    RCR_PKT_BUF_ADDR_SHIFT;
728 
729 	HXGE_DEBUG_MSG((hxgep, RX_CTL, "rcr pp 0x%llx l2 len %d",
730 	    pp, (*(int64_t *)entry_p >> 40) & 0x3fff));
731 #endif
732 }
733 
734 /*ARGSUSED*/
735 void
736 hxge_rxdma_stop(p_hxge_t hxgep)
737 {
738 	HXGE_DEBUG_MSG((hxgep, RX_CTL, "==> hxge_rxdma_stop"));
739 
740 	(void) hxge_rx_vmac_disable(hxgep);
741 	(void) hxge_rxdma_hw_mode(hxgep, HXGE_DMA_STOP);
742 
743 	HXGE_DEBUG_MSG((hxgep, RX_CTL, "<== hxge_rxdma_stop"));
744 }
745 
746 void
747 hxge_rxdma_stop_reinit(p_hxge_t hxgep)
748 {
749 	HXGE_DEBUG_MSG((hxgep, RX_CTL, "==> hxge_rxdma_stop_reinit"));
750 
751 	(void) hxge_rxdma_stop(hxgep);
752 	(void) hxge_uninit_rxdma_channels(hxgep);
753 	(void) hxge_init_rxdma_channels(hxgep);
754 
755 	(void) hxge_rx_vmac_enable(hxgep);
756 
757 	HXGE_DEBUG_MSG((hxgep, RX_CTL, "<== hxge_rxdma_stop_reinit"));
758 }
759 
760 hxge_status_t
761 hxge_rxdma_hw_mode(p_hxge_t hxgep, boolean_t enable)
762 {
763 	int			i, ndmas;
764 	uint16_t		channel;
765 	p_rx_rbr_rings_t	rx_rbr_rings;
766 	p_rx_rbr_ring_t		*rbr_rings;
767 	hpi_handle_t		handle;
768 	hpi_status_t		rs = HPI_SUCCESS;
769 	hxge_status_t		status = HXGE_OK;
770 
771 	HXGE_DEBUG_MSG((hxgep, MEM2_CTL,
772 	    "==> hxge_rxdma_hw_mode: mode %d", enable));
773 
774 	if (!(hxgep->drv_state & STATE_HW_INITIALIZED)) {
775 		HXGE_DEBUG_MSG((hxgep, RX_CTL,
776 		    "<== hxge_rxdma_mode: not initialized"));
777 		return (HXGE_ERROR);
778 	}
779 
780 	rx_rbr_rings = hxgep->rx_rbr_rings;
781 	if (rx_rbr_rings == NULL) {
782 		HXGE_DEBUG_MSG((hxgep, RX_CTL,
783 		    "<== hxge_rxdma_mode: NULL ring pointer"));
784 		return (HXGE_ERROR);
785 	}
786 
787 	if (rx_rbr_rings->rbr_rings == NULL) {
788 		HXGE_DEBUG_MSG((hxgep, RX_CTL,
789 		    "<== hxge_rxdma_mode: NULL rbr rings pointer"));
790 		return (HXGE_ERROR);
791 	}
792 
793 	ndmas = rx_rbr_rings->ndmas;
794 	if (!ndmas) {
795 		HXGE_DEBUG_MSG((hxgep, RX_CTL,
796 		    "<== hxge_rxdma_mode: no channel"));
797 		return (HXGE_ERROR);
798 	}
799 
800 	HXGE_DEBUG_MSG((hxgep, MEM2_CTL,
801 	    "==> hxge_rxdma_mode (ndmas %d)", ndmas));
802 
803 	rbr_rings = rx_rbr_rings->rbr_rings;
804 
805 	handle = HXGE_DEV_HPI_HANDLE(hxgep);
806 
807 	for (i = 0; i < ndmas; i++) {
808 		if (rbr_rings == NULL || rbr_rings[i] == NULL) {
809 			continue;
810 		}
811 		channel = rbr_rings[i]->rdc;
812 		if (enable) {
813 			HXGE_DEBUG_MSG((hxgep, MEM2_CTL,
814 			    "==> hxge_rxdma_hw_mode: channel %d (enable)",
815 			    channel));
816 			rs = hpi_rxdma_cfg_rdc_enable(handle, channel);
817 		} else {
818 			HXGE_DEBUG_MSG((hxgep, MEM2_CTL,
819 			    "==> hxge_rxdma_hw_mode: channel %d (disable)",
820 			    channel));
821 			rs = hpi_rxdma_cfg_rdc_disable(handle, channel);
822 		}
823 	}
824 
825 	status = ((rs == HPI_SUCCESS) ? HXGE_OK : HXGE_ERROR | rs);
826 	HXGE_DEBUG_MSG((hxgep, MEM2_CTL,
827 	    "<== hxge_rxdma_hw_mode: status 0x%x", status));
828 
829 	return (status);
830 }
831 
832 int
833 hxge_rxdma_get_ring_index(p_hxge_t hxgep, uint16_t channel)
834 {
835 	int			i, ndmas;
836 	uint16_t		rdc;
837 	p_rx_rbr_rings_t 	rx_rbr_rings;
838 	p_rx_rbr_ring_t		*rbr_rings;
839 
840 	HXGE_DEBUG_MSG((hxgep, RX_CTL,
841 	    "==> hxge_rxdma_get_ring_index: channel %d", channel));
842 
843 	rx_rbr_rings = hxgep->rx_rbr_rings;
844 	if (rx_rbr_rings == NULL) {
845 		HXGE_DEBUG_MSG((hxgep, RX_CTL,
846 		    "<== hxge_rxdma_get_ring_index: NULL ring pointer"));
847 		return (-1);
848 	}
849 
850 	ndmas = rx_rbr_rings->ndmas;
851 	if (!ndmas) {
852 		HXGE_DEBUG_MSG((hxgep, RX_CTL,
853 		    "<== hxge_rxdma_get_ring_index: no channel"));
854 		return (-1);
855 	}
856 
857 	HXGE_DEBUG_MSG((hxgep, RX_CTL,
858 	    "==> hxge_rxdma_get_ring_index (ndmas %d)", ndmas));
859 
860 	rbr_rings = rx_rbr_rings->rbr_rings;
861 	for (i = 0; i < ndmas; i++) {
862 		rdc = rbr_rings[i]->rdc;
863 		if (channel == rdc) {
864 			HXGE_DEBUG_MSG((hxgep, RX_CTL,
865 			    "==> hxge_rxdma_get_rbr_ring: "
866 			    "channel %d (index %d) "
867 			    "ring %d", channel, i, rbr_rings[i]));
868 
869 			return (i);
870 		}
871 	}
872 
873 	HXGE_DEBUG_MSG((hxgep, RX_CTL,
874 	    "<== hxge_rxdma_get_rbr_ring_index: not found"));
875 
876 	return (-1);
877 }
878 
879 /*
880  * Static functions start here.
881  */
882 static p_rx_msg_t
883 hxge_allocb(size_t size, uint32_t pri, p_hxge_dma_common_t dmabuf_p)
884 {
885 	p_rx_msg_t		hxge_mp = NULL;
886 	p_hxge_dma_common_t	dmamsg_p;
887 	uchar_t			*buffer;
888 
889 	hxge_mp = KMEM_ZALLOC(sizeof (rx_msg_t), KM_NOSLEEP);
890 	if (hxge_mp == NULL) {
891 		HXGE_ERROR_MSG((NULL, HXGE_ERR_CTL,
892 		    "Allocation of a rx msg failed."));
893 		goto hxge_allocb_exit;
894 	}
895 
896 	hxge_mp->use_buf_pool = B_FALSE;
897 	if (dmabuf_p) {
898 		hxge_mp->use_buf_pool = B_TRUE;
899 
900 		dmamsg_p = (p_hxge_dma_common_t)&hxge_mp->buf_dma;
901 		*dmamsg_p = *dmabuf_p;
902 		dmamsg_p->nblocks = 1;
903 		dmamsg_p->block_size = size;
904 		dmamsg_p->alength = size;
905 		buffer = (uchar_t *)dmabuf_p->kaddrp;
906 
907 		dmabuf_p->kaddrp = (void *)((char *)dmabuf_p->kaddrp + size);
908 		dmabuf_p->ioaddr_pp = (void *)
909 		    ((char *)dmabuf_p->ioaddr_pp + size);
910 
911 		dmabuf_p->alength -= size;
912 		dmabuf_p->offset += size;
913 		dmabuf_p->dma_cookie.dmac_laddress += size;
914 		dmabuf_p->dma_cookie.dmac_size -= size;
915 	} else {
916 		buffer = KMEM_ALLOC(size, KM_NOSLEEP);
917 		if (buffer == NULL) {
918 			HXGE_ERROR_MSG((NULL, HXGE_ERR_CTL,
919 			    "Allocation of a receive page failed."));
920 			goto hxge_allocb_fail1;
921 		}
922 	}
923 
924 	hxge_mp->rx_mblk_p = desballoc(buffer, size, pri, &hxge_mp->freeb);
925 	if (hxge_mp->rx_mblk_p == NULL) {
926 		HXGE_ERROR_MSG((NULL, HXGE_ERR_CTL, "desballoc failed."));
927 		goto hxge_allocb_fail2;
928 	}
929 	hxge_mp->buffer = buffer;
930 	hxge_mp->block_size = size;
931 	hxge_mp->freeb.free_func = (void (*) ()) hxge_freeb;
932 	hxge_mp->freeb.free_arg = (caddr_t)hxge_mp;
933 	hxge_mp->ref_cnt = 1;
934 	hxge_mp->free = B_TRUE;
935 	hxge_mp->rx_use_bcopy = B_FALSE;
936 
937 	atomic_inc_32(&hxge_mblks_pending);
938 
939 	goto hxge_allocb_exit;
940 
941 hxge_allocb_fail2:
942 	if (!hxge_mp->use_buf_pool) {
943 		KMEM_FREE(buffer, size);
944 	}
945 hxge_allocb_fail1:
946 	KMEM_FREE(hxge_mp, sizeof (rx_msg_t));
947 	hxge_mp = NULL;
948 
949 hxge_allocb_exit:
950 	return (hxge_mp);
951 }
952 
953 p_mblk_t
954 hxge_dupb(p_rx_msg_t hxge_mp, uint_t offset, size_t size)
955 {
956 	p_mblk_t mp;
957 
958 	HXGE_DEBUG_MSG((NULL, MEM_CTL, "==> hxge_dupb"));
959 	HXGE_DEBUG_MSG((NULL, MEM_CTL, "hxge_mp = $%p "
960 	    "offset = 0x%08X " "size = 0x%08X", hxge_mp, offset, size));
961 
962 	mp = desballoc(&hxge_mp->buffer[offset], size, 0, &hxge_mp->freeb);
963 	if (mp == NULL) {
964 		HXGE_DEBUG_MSG((NULL, RX_CTL, "desballoc failed"));
965 		goto hxge_dupb_exit;
966 	}
967 
968 	atomic_inc_32(&hxge_mp->ref_cnt);
969 
970 hxge_dupb_exit:
971 	HXGE_DEBUG_MSG((NULL, MEM_CTL, "<== hxge_dupb mp = $%p", hxge_mp));
972 	return (mp);
973 }
974 
975 p_mblk_t
976 hxge_dupb_bcopy(p_rx_msg_t hxge_mp, uint_t offset, size_t size)
977 {
978 	p_mblk_t	mp;
979 	uchar_t		*dp;
980 
981 	mp = allocb(size + HXGE_RXBUF_EXTRA, 0);
982 	if (mp == NULL) {
983 		HXGE_DEBUG_MSG((NULL, RX_CTL, "desballoc failed"));
984 		goto hxge_dupb_bcopy_exit;
985 	}
986 	dp = mp->b_rptr = mp->b_rptr + HXGE_RXBUF_EXTRA;
987 	bcopy((void *) &hxge_mp->buffer[offset], dp, size);
988 	mp->b_wptr = dp + size;
989 
990 hxge_dupb_bcopy_exit:
991 
992 	HXGE_DEBUG_MSG((NULL, MEM_CTL, "<== hxge_dupb mp = $%p", hxge_mp));
993 
994 	return (mp);
995 }
996 
997 void hxge_post_page(p_hxge_t hxgep, p_rx_rbr_ring_t rx_rbr_p,
998     p_rx_msg_t rx_msg_p);
999 
1000 void
1001 hxge_post_page(p_hxge_t hxgep, p_rx_rbr_ring_t rx_rbr_p, p_rx_msg_t rx_msg_p)
1002 {
1003 	HXGE_DEBUG_MSG((hxgep, RX_CTL, "==> hxge_post_page"));
1004 
1005 	/* Reuse this buffer */
1006 	rx_msg_p->free = B_FALSE;
1007 	rx_msg_p->cur_usage_cnt = 0;
1008 	rx_msg_p->max_usage_cnt = 0;
1009 	rx_msg_p->pkt_buf_size = 0;
1010 
1011 	if (rx_rbr_p->rbr_use_bcopy) {
1012 		rx_msg_p->rx_use_bcopy = B_FALSE;
1013 		atomic_dec_32(&rx_rbr_p->rbr_consumed);
1014 	}
1015 	atomic_dec_32(&rx_rbr_p->rbr_used);
1016 
1017 	/*
1018 	 * Get the rbr header pointer and its offset index.
1019 	 */
1020 	rx_rbr_p->rbr_wr_index = ((rx_rbr_p->rbr_wr_index + 1) &
1021 	    rx_rbr_p->rbr_wrap_mask);
1022 	rx_rbr_p->rbr_desc_vp[rx_rbr_p->rbr_wr_index] = rx_msg_p->shifted_addr;
1023 
1024 	/*
1025 	 * Accumulate some buffers in the ring before re-enabling the
1026 	 * DMA channel, if rbr empty was signaled.
1027 	 */
1028 	hpi_rxdma_rdc_rbr_kick(HXGE_DEV_HPI_HANDLE(hxgep), rx_rbr_p->rdc, 1);
1029 	if (rx_rbr_p->rbr_is_empty &&
1030 	    (rx_rbr_p->rbb_max - rx_rbr_p->rbr_used) >=
1031 	    HXGE_RBR_EMPTY_THRESHOLD) {
1032 		hxge_rbr_empty_restore(hxgep, rx_rbr_p);
1033 	}
1034 
1035 	HXGE_DEBUG_MSG((hxgep, RX_CTL,
1036 	    "<== hxge_post_page (channel %d post_next_index %d)",
1037 	    rx_rbr_p->rdc, rx_rbr_p->rbr_wr_index));
1038 	HXGE_DEBUG_MSG((hxgep, RX_CTL, "<== hxge_post_page"));
1039 }
1040 
1041 void
1042 hxge_freeb(p_rx_msg_t rx_msg_p)
1043 {
1044 	size_t		size;
1045 	uchar_t		*buffer = NULL;
1046 	int		ref_cnt;
1047 	boolean_t	free_state = B_FALSE;
1048 	rx_rbr_ring_t	*ring = rx_msg_p->rx_rbr_p;
1049 
1050 	HXGE_DEBUG_MSG((NULL, MEM2_CTL, "==> hxge_freeb"));
1051 	HXGE_DEBUG_MSG((NULL, MEM2_CTL,
1052 	    "hxge_freeb:rx_msg_p = $%p (block pending %d)",
1053 	    rx_msg_p, hxge_mblks_pending));
1054 
1055 	if (ring == NULL)
1056 		return;
1057 
1058 	/*
1059 	 * This is to prevent posting activities while we are recovering
1060 	 * from fatal errors. This should not be a performance drag since
1061 	 * ref_cnt != 0 most times.
1062 	 */
1063 	if (ring->rbr_state == RBR_POSTING)
1064 		MUTEX_ENTER(&ring->post_lock);
1065 
1066 	/*
1067 	 * First we need to get the free state, then
1068 	 * atomic decrement the reference count to prevent
1069 	 * the race condition with the interrupt thread that
1070 	 * is processing a loaned up buffer block.
1071 	 */
1072 	free_state = rx_msg_p->free;
1073 	ref_cnt = atomic_add_32_nv(&rx_msg_p->ref_cnt, -1);
1074 	if (!ref_cnt) {
1075 		atomic_dec_32(&hxge_mblks_pending);
1076 
1077 		buffer = rx_msg_p->buffer;
1078 		size = rx_msg_p->block_size;
1079 
1080 		HXGE_DEBUG_MSG((NULL, MEM2_CTL, "hxge_freeb: "
1081 		    "will free: rx_msg_p = $%p (block pending %d)",
1082 		    rx_msg_p, hxge_mblks_pending));
1083 
1084 		if (!rx_msg_p->use_buf_pool) {
1085 			KMEM_FREE(buffer, size);
1086 		}
1087 
1088 		KMEM_FREE(rx_msg_p, sizeof (rx_msg_t));
1089 		/*
1090 		 * Decrement the receive buffer ring's reference
1091 		 * count, too.
1092 		 */
1093 		atomic_dec_32(&ring->rbr_ref_cnt);
1094 
1095 		/*
1096 		 * Free the receive buffer ring, iff
1097 		 * 1. all the receive buffers have been freed
1098 		 * 2. and we are in the proper state (that is,
1099 		 *    we are not UNMAPPING).
1100 		 */
1101 		if (ring->rbr_ref_cnt == 0 &&
1102 		    ring->rbr_state == RBR_UNMAPPED) {
1103 			KMEM_FREE(ring, sizeof (*ring));
1104 			/* post_lock has been destroyed already */
1105 			return;
1106 		}
1107 	}
1108 
1109 	/*
1110 	 * Repost buffer.
1111 	 */
1112 	if (free_state && (ref_cnt == 1)) {
1113 		HXGE_DEBUG_MSG((NULL, RX_CTL,
1114 		    "hxge_freeb: post page $%p:", rx_msg_p));
1115 		if (ring->rbr_state == RBR_POSTING)
1116 			hxge_post_page(rx_msg_p->hxgep, ring, rx_msg_p);
1117 	}
1118 
1119 	if (ring->rbr_state == RBR_POSTING)
1120 		MUTEX_EXIT(&ring->post_lock);
1121 
1122 	HXGE_DEBUG_MSG((NULL, MEM2_CTL, "<== hxge_freeb"));
1123 }
1124 
1125 uint_t
1126 hxge_rx_intr(caddr_t arg1, caddr_t arg2)
1127 {
1128 	p_hxge_ring_handle_t	rhp;
1129 	p_hxge_ldv_t		ldvp = (p_hxge_ldv_t)arg1;
1130 	p_hxge_t		hxgep = (p_hxge_t)arg2;
1131 	p_hxge_ldg_t		ldgp;
1132 	uint8_t			channel;
1133 	hpi_handle_t		handle;
1134 	rdc_stat_t		cs;
1135 	uint_t			serviced = DDI_INTR_UNCLAIMED;
1136 	p_rx_rcr_ring_t		ring;
1137 	mblk_t			*mp = NULL;
1138 
1139 	if (ldvp == NULL) {
1140 		HXGE_DEBUG_MSG((NULL, RX_INT_CTL,
1141 		    "<== hxge_rx_intr: arg2 $%p arg1 $%p", hxgep, ldvp));
1142 		return (DDI_INTR_UNCLAIMED);
1143 	}
1144 
1145 	if (arg2 == NULL || (void *) ldvp->hxgep != arg2) {
1146 		hxgep = ldvp->hxgep;
1147 	}
1148 
1149 	/*
1150 	 * If the interface is not started, just swallow the interrupt
1151 	 * for the logical device and don't rearm it.
1152 	 */
1153 	if (hxgep->hxge_mac_state != HXGE_MAC_STARTED)
1154 		return (DDI_INTR_CLAIMED);
1155 
1156 	HXGE_DEBUG_MSG((hxgep, RX_INT_CTL,
1157 	    "==> hxge_rx_intr: arg2 $%p arg1 $%p", hxgep, ldvp));
1158 
1159 	/*
1160 	 * This interrupt handler is for a specific receive dma channel.
1161 	 */
1162 	handle = HXGE_DEV_HPI_HANDLE(hxgep);
1163 
1164 	/*
1165 	 * Get the control and status for this channel.
1166 	 */
1167 	channel = ldvp->vdma_index;
1168 	ring = hxgep->rx_rcr_rings->rcr_rings[channel];
1169 	rhp = &hxgep->rx_ring_handles[channel];
1170 	ldgp = ldvp->ldgp;
1171 
1172 	ASSERT(ring != NULL);
1173 	ASSERT(ring->ldgp == ldgp);
1174 	ASSERT(ring->ldvp == ldvp);
1175 
1176 	MUTEX_ENTER(&ring->lock);
1177 
1178 	/*
1179 	 * If the channel is not started, then we are not
1180 	 * ready to process packets.
1181 	 */
1182 	if (!rhp->started) {
1183 		MUTEX_EXIT(&ring->lock);
1184 		return (DDI_INTR_CLAIMED);
1185 	}
1186 
1187 	RXDMA_REG_READ64(handle, RDC_STAT, channel, &cs.value);
1188 	cs.bits.ptrread = 0;
1189 	cs.bits.pktread = 0;
1190 	RXDMA_REG_WRITE64(handle, RDC_STAT, channel, cs.value);
1191 
1192 	HXGE_DEBUG_MSG((hxgep, RX_INT_CTL, "==> hxge_rx_intr:channel %d "
1193 	    "cs 0x%016llx rcrto 0x%x rcrthres %x",
1194 	    channel, cs.value, cs.bits.rcr_to, cs.bits.rcr_thres));
1195 
1196 	/*
1197 	 * Process packets, if we are not in polling mode. The MAC layer
1198 	 * under load will be operating in polling mode for RX traffic.
1199 	 */
1200 	if (ring->poll_flag == 0) {
1201 		mp = hxge_rx_pkts(hxgep, ldvp->vdma_index, ldvp, ring, cs, -1);
1202 	}
1203 	serviced = DDI_INTR_CLAIMED;
1204 
1205 	/* error events. */
1206 	if (cs.value & RDC_STAT_ERROR) {
1207 		(void) hxge_rx_err_evnts(hxgep, ldvp->vdma_index, ldvp, cs);
1208 	}
1209 
1210 	/*
1211 	 * Enable the mailbox update interrupt if we want to use mailbox. We
1212 	 * probably don't need to use mailbox as it only saves us one pio read.
1213 	 * Also write 1 to rcrthres and rcrto to clear these two edge triggered
1214 	 * bits.
1215 	 */
1216 	cs.value &= RDC_STAT_WR1C;
1217 	cs.bits.mex = 1;
1218 	cs.bits.ptrread = 0;
1219 	cs.bits.pktread = 0;
1220 	RXDMA_REG_WRITE64(handle, RDC_STAT, channel, cs.value);
1221 
1222 	/*
1223 	 * Rearm this logical group if this is a single device group.
1224 	 */
1225 	if (ring->poll_flag) {
1226 		if (ldgp->nldvs == 1) {
1227 			ld_intr_mgmt_t mgm;
1228 
1229 			mgm.value = 0;
1230 			mgm.bits.arm = 0;
1231 			HXGE_REG_WR32(handle,
1232 			    LD_INTR_MGMT + LDSV_OFFSET(ldgp->ldg), mgm.value);
1233 		}
1234 	} else if (ldgp->nldvs == 1) {
1235 		ld_intr_mgmt_t mgm;
1236 
1237 		mgm.value = 0;
1238 		mgm.bits.arm = 1;
1239 		mgm.bits.timer = ldgp->ldg_timer;
1240 		HXGE_REG_WR32(handle,
1241 		    LD_INTR_MGMT + LDSV_OFFSET(ldgp->ldg), mgm.value);
1242 	}
1243 	MUTEX_EXIT(&ring->lock);
1244 
1245 	/*
1246 	 * Send the packets up the stack.
1247 	 */
1248 	if (mp != NULL) {
1249 		mac_rx_ring(hxgep->mach, ring->rcr_mac_handle, mp,
1250 		    ring->rcr_gen_num);
1251 	}
1252 
1253 	HXGE_DEBUG_MSG((hxgep, RX_INT_CTL,
1254 	    "<== hxge_rx_intr: serviced %d", serviced));
1255 	return (serviced);
1256 }
1257 
1258 /*
1259  * Enable polling for a ring. Interrupt for the ring is disabled when
1260  * the hxge interrupt comes (see hxge_rx_intr).
1261  */
1262 int
1263 hxge_enable_poll(void *arg)
1264 {
1265 	p_hxge_ring_handle_t	ring_handle = (p_hxge_ring_handle_t)arg;
1266 	p_rx_rcr_ring_t		ringp;
1267 	p_hxge_t		hxgep;
1268 	p_hxge_ldg_t		ldgp;
1269 
1270 	if (ring_handle == NULL) {
1271 		return (0);
1272 	}
1273 
1274 	hxgep = ring_handle->hxgep;
1275 	ringp = hxgep->rx_rcr_rings->rcr_rings[ring_handle->index];
1276 
1277 	MUTEX_ENTER(&ringp->lock);
1278 
1279 	ldgp = ringp->ldgp;
1280 	if (ldgp == NULL) {
1281 		MUTEX_EXIT(&ringp->lock);
1282 		return (0);
1283 	}
1284 
1285 	/*
1286 	 * Enable polling
1287 	 */
1288 	if (ringp->poll_flag == 0) {
1289 		ringp->poll_flag = 1;
1290 	}
1291 
1292 	MUTEX_EXIT(&ringp->lock);
1293 	return (0);
1294 }
1295 
1296 /*
1297  * Disable polling for a ring and enable its interrupt.
1298  */
1299 int
1300 hxge_disable_poll(void *arg)
1301 {
1302 	p_hxge_ring_handle_t	ring_handle = (p_hxge_ring_handle_t)arg;
1303 	p_rx_rcr_ring_t		ringp;
1304 	p_hxge_t		hxgep;
1305 
1306 	if (ring_handle == NULL) {
1307 		return (0);
1308 	}
1309 
1310 	hxgep = ring_handle->hxgep;
1311 	ringp = hxgep->rx_rcr_rings->rcr_rings[ring_handle->index];
1312 
1313 	MUTEX_ENTER(&ringp->lock);
1314 
1315 	/*
1316 	 * Disable polling: enable interrupt
1317 	 */
1318 	if (ringp->poll_flag) {
1319 		hpi_handle_t		handle;
1320 		rdc_stat_t		cs;
1321 		uint8_t			channel;
1322 		p_hxge_ldg_t		ldgp;
1323 
1324 		/*
1325 		 * Get the control and status for this channel.
1326 		 */
1327 		handle = HXGE_DEV_HPI_HANDLE(hxgep);
1328 		channel = ringp->rdc;
1329 		RXDMA_REG_READ64(handle, RDC_STAT, channel, &cs.value);
1330 
1331 		/*
1332 		 * Enable mailbox update
1333 		 * Since packets were not read and the hardware uses
1334 		 * bits pktread and ptrread to update the queue
1335 		 * length, we need to set both bits to 0.
1336 		 */
1337 		cs.bits.pktread = 0;
1338 		cs.bits.ptrread = 0;
1339 		cs.bits.mex = 1;
1340 		RXDMA_REG_WRITE64(handle, RDC_STAT, channel, cs.value);
1341 
1342 		/*
1343 		 * Rearm this logical group if this is a single device
1344 		 * group.
1345 		 */
1346 		ldgp = ringp->ldgp;
1347 		if (ldgp == NULL) {
1348 			ringp->poll_flag = 0;
1349 			MUTEX_EXIT(&ringp->lock);
1350 			return (0);
1351 		}
1352 
1353 		if (ldgp->nldvs == 1) {
1354 			ld_intr_mgmt_t mgm;
1355 
1356 			mgm.value = 0;
1357 			mgm.bits.arm = 1;
1358 			mgm.bits.timer = ldgp->ldg_timer;
1359 			HXGE_REG_WR32(handle,
1360 			    LD_INTR_MGMT + LDSV_OFFSET(ldgp->ldg), mgm.value);
1361 		}
1362 		ringp->poll_flag = 0;
1363 	}
1364 	MUTEX_EXIT(&ringp->lock);
1365 	return (0);
1366 }
1367 
1368 /*
1369  * Poll 'bytes_to_pickup' bytes of message from the rx ring.
1370  */
1371 mblk_t *
1372 hxge_rx_poll(void *arg, int bytes_to_pickup)
1373 {
1374 	p_hxge_ring_handle_t	rhp = (p_hxge_ring_handle_t)arg;
1375 	p_rx_rcr_ring_t		ring;
1376 	p_hxge_t		hxgep;
1377 	hpi_handle_t		handle;
1378 	rdc_stat_t		cs;
1379 	mblk_t			*mblk;
1380 	p_hxge_ldv_t		ldvp;
1381 
1382 	hxgep = rhp->hxgep;
1383 
1384 	/*
1385 	 * Get the control and status for this channel.
1386 	 */
1387 	handle = HXGE_DEV_HPI_HANDLE(hxgep);
1388 	ring = hxgep->rx_rcr_rings->rcr_rings[rhp->index];
1389 
1390 	MUTEX_ENTER(&ring->lock);
1391 	ASSERT(ring->poll_flag == 1);
1392 	ASSERT(rhp->started);
1393 
1394 	/*
1395 	 * Make sure the ring is started and polling is
1396 	 * started before processing packets.
1397 	 */
1398 	if ((!rhp->started) || (ring->poll_flag == 0)) {
1399 		MUTEX_EXIT(&ring->lock);
1400 		return ((mblk_t *)NULL);
1401 	}
1402 
1403 	RXDMA_REG_READ64(handle, RDC_STAT, rhp->index, &cs.value);
1404 	cs.bits.ptrread = 0;
1405 	cs.bits.pktread = 0;
1406 	RXDMA_REG_WRITE64(handle, RDC_STAT, rhp->index, cs.value);
1407 
1408 	mblk = hxge_rx_pkts(hxgep, ring->ldvp->vdma_index,
1409 	    ring->ldvp, ring, cs, bytes_to_pickup);
1410 	ldvp = ring->ldvp;
1411 
1412 	/*
1413 	 * Process Error Events.
1414 	 */
1415 	if (ldvp && (cs.value & RDC_STAT_ERROR)) {
1416 		(void) hxge_rx_err_evnts(hxgep, ldvp->vdma_index, ldvp, cs);
1417 	}
1418 
1419 	MUTEX_EXIT(&ring->lock);
1420 	return (mblk);
1421 }
1422 
1423 /*ARGSUSED*/
1424 mblk_t *
1425 hxge_rx_pkts(p_hxge_t hxgep, uint_t vindex, p_hxge_ldv_t ldvp,
1426     p_rx_rcr_ring_t rcrp, rdc_stat_t cs, int bytes_to_read)
1427 {
1428 	hpi_handle_t		handle;
1429 	uint8_t			channel;
1430 	uint32_t		comp_rd_index;
1431 	p_rcr_entry_t		rcr_desc_rd_head_p;
1432 	p_rcr_entry_t		rcr_desc_rd_head_pp;
1433 	p_mblk_t		nmp, mp_cont, head_mp, *tail_mp;
1434 	uint16_t		qlen, nrcr_read, npkt_read;
1435 	uint32_t		qlen_hw, qlen_sw, num_rcrs;
1436 	uint32_t		invalid_rcr_entry;
1437 	boolean_t		multi;
1438 	rdc_rcr_cfg_b_t		rcr_cfg_b;
1439 	uint64_t		rcr_head_index, rcr_tail_index;
1440 	uint64_t		rcr_tail;
1441 	rdc_rcr_tail_t		rcr_tail_reg;
1442 	p_hxge_rx_ring_stats_t	rdc_stats;
1443 	int			totallen = 0;
1444 
1445 	HXGE_DEBUG_MSG((hxgep, RX_INT_CTL, "==> hxge_rx_pkts:vindex %d "
1446 	    "channel %d", vindex, ldvp->channel));
1447 
1448 	if (!(hxgep->drv_state & STATE_HW_INITIALIZED)) {
1449 		return (NULL);
1450 	}
1451 
1452 	handle = HXGE_DEV_HPI_HANDLE(hxgep);
1453 	channel = rcrp->rdc;
1454 	if (channel != ldvp->channel) {
1455 		HXGE_DEBUG_MSG((hxgep, RX_INT_CTL, "==> hxge_rx_pkts:index %d "
1456 		    "channel %d, and rcr channel %d not matched.",
1457 		    vindex, ldvp->channel, channel));
1458 		return (NULL);
1459 	}
1460 
1461 	HXGE_DEBUG_MSG((hxgep, RX_INT_CTL,
1462 	    "==> hxge_rx_pkts: START: rcr channel %d "
1463 	    "head_p $%p head_pp $%p  index %d ",
1464 	    channel, rcrp->rcr_desc_rd_head_p,
1465 	    rcrp->rcr_desc_rd_head_pp, rcrp->comp_rd_index));
1466 
1467 	(void) hpi_rxdma_rdc_rcr_qlen_get(handle, channel, &qlen);
1468 	RXDMA_REG_READ64(handle, RDC_RCR_TAIL, channel, &rcr_tail_reg.value);
1469 	rcr_tail = rcr_tail_reg.bits.tail;
1470 
1471 	if (!qlen) {
1472 		HXGE_DEBUG_MSG((hxgep, RX_INT_CTL,
1473 		    "<== hxge_rx_pkts:rcr channel %d qlen %d (no pkts)",
1474 		    channel, qlen));
1475 		return (NULL);
1476 	}
1477 
1478 	HXGE_DEBUG_MSG((hxgep, RX_CTL, "==> hxge_rx_pkts:rcr channel %d "
1479 	    "qlen %d", channel, qlen));
1480 
1481 	comp_rd_index = rcrp->comp_rd_index;
1482 
1483 	rcr_desc_rd_head_p = rcrp->rcr_desc_rd_head_p;
1484 	rcr_desc_rd_head_pp = rcrp->rcr_desc_rd_head_pp;
1485 	nrcr_read = npkt_read = 0;
1486 
1487 	if (hxgep->rdc_first_intr[channel])
1488 		qlen_hw = qlen;
1489 	else
1490 		qlen_hw = qlen - 1;
1491 
1492 	head_mp = NULL;
1493 	tail_mp = &head_mp;
1494 	nmp = mp_cont = NULL;
1495 	multi = B_FALSE;
1496 
1497 	rcr_head_index = rcrp->rcr_desc_rd_head_p - rcrp->rcr_desc_first_p;
1498 	rcr_tail_index = rcr_tail - rcrp->rcr_tail_begin;
1499 
1500 	if (rcr_tail_index >= rcr_head_index) {
1501 		num_rcrs = rcr_tail_index - rcr_head_index;
1502 	} else {
1503 		/* rcr_tail has wrapped around */
1504 		num_rcrs = (rcrp->comp_size - rcr_head_index) + rcr_tail_index;
1505 	}
1506 
1507 	qlen_sw = hxge_scan_for_last_eop(rcrp, rcr_desc_rd_head_p, num_rcrs);
1508 	if (!qlen_sw)
1509 		return (NULL);
1510 
1511 	if (qlen_hw > qlen_sw) {
1512 		HXGE_DEBUG_MSG((hxgep, RX_INT_CTL,
1513 		    "Channel %d, rcr_qlen from reg %d and from rcr_tail %d\n",
1514 		    channel, qlen_hw, qlen_sw));
1515 		qlen_hw = qlen_sw;
1516 	}
1517 
1518 	while (qlen_hw) {
1519 #ifdef HXGE_DEBUG
1520 		hxge_dump_rcr_entry(hxgep, rcr_desc_rd_head_p);
1521 #endif
1522 		/*
1523 		 * Process one completion ring entry.
1524 		 */
1525 		invalid_rcr_entry = 0;
1526 		hxge_receive_packet(hxgep,
1527 		    rcrp, rcr_desc_rd_head_p, &multi, &nmp, &mp_cont,
1528 		    &invalid_rcr_entry);
1529 		if (invalid_rcr_entry != 0) {
1530 			rdc_stats = rcrp->rdc_stats;
1531 			rdc_stats->rcr_invalids++;
1532 			HXGE_DEBUG_MSG((hxgep, RX_INT_CTL,
1533 			    "Channel %d could only read 0x%x packets, "
1534 			    "but 0x%x pending\n", channel, npkt_read, qlen_hw));
1535 			break;
1536 		}
1537 
1538 		/*
1539 		 * message chaining modes (nemo msg chaining)
1540 		 */
1541 		if (nmp) {
1542 			nmp->b_next = NULL;
1543 			if (!multi && !mp_cont) { /* frame fits a partition */
1544 				*tail_mp = nmp;
1545 				tail_mp = &nmp->b_next;
1546 				nmp = NULL;
1547 			} else if (multi && !mp_cont) { /* first segment */
1548 				*tail_mp = nmp;
1549 				tail_mp = &nmp->b_cont;
1550 			} else if (multi && mp_cont) {	/* mid of multi segs */
1551 				*tail_mp = mp_cont;
1552 				tail_mp = &mp_cont->b_cont;
1553 			} else if (!multi && mp_cont) { /* last segment */
1554 				*tail_mp = mp_cont;
1555 				tail_mp = &nmp->b_next;
1556 				totallen += MBLKL(mp_cont);
1557 				nmp = NULL;
1558 			}
1559 		}
1560 
1561 		HXGE_DEBUG_MSG((hxgep, RX_INT_CTL,
1562 		    "==> hxge_rx_pkts: loop: rcr channel %d "
1563 		    "before updating: multi %d "
1564 		    "nrcr_read %d "
1565 		    "npk read %d "
1566 		    "head_pp $%p  index %d ",
1567 		    channel, multi,
1568 		    nrcr_read, npkt_read, rcr_desc_rd_head_pp, comp_rd_index));
1569 
1570 		if (!multi) {
1571 			qlen_hw--;
1572 			npkt_read++;
1573 		}
1574 
1575 		/*
1576 		 * Update the next read entry.
1577 		 */
1578 		comp_rd_index = NEXT_ENTRY(comp_rd_index,
1579 		    rcrp->comp_wrap_mask);
1580 
1581 		rcr_desc_rd_head_p = NEXT_ENTRY_PTR(rcr_desc_rd_head_p,
1582 		    rcrp->rcr_desc_first_p, rcrp->rcr_desc_last_p);
1583 
1584 		nrcr_read++;
1585 
1586 		HXGE_DEBUG_MSG((hxgep, RX_INT_CTL,
1587 		    "<== hxge_rx_pkts: (SAM, process one packet) "
1588 		    "nrcr_read %d", nrcr_read));
1589 		HXGE_DEBUG_MSG((hxgep, RX_INT_CTL,
1590 		    "==> hxge_rx_pkts: loop: rcr channel %d "
1591 		    "multi %d nrcr_read %d npk read %d head_pp $%p  index %d ",
1592 		    channel, multi, nrcr_read, npkt_read, rcr_desc_rd_head_pp,
1593 		    comp_rd_index));
1594 
1595 		if ((bytes_to_read != -1) &&
1596 		    (totallen >= bytes_to_read)) {
1597 			break;
1598 		}
1599 	}
1600 
1601 	rcrp->rcr_desc_rd_head_pp = rcr_desc_rd_head_pp;
1602 	rcrp->comp_rd_index = comp_rd_index;
1603 	rcrp->rcr_desc_rd_head_p = rcr_desc_rd_head_p;
1604 
1605 	if ((hxgep->intr_timeout != rcrp->intr_timeout) ||
1606 	    (hxgep->intr_threshold != rcrp->intr_threshold)) {
1607 		rcrp->intr_timeout = hxgep->intr_timeout;
1608 		rcrp->intr_threshold = hxgep->intr_threshold;
1609 		rcr_cfg_b.value = 0x0ULL;
1610 		if (rcrp->intr_timeout)
1611 			rcr_cfg_b.bits.entout = 1;
1612 		rcr_cfg_b.bits.timeout = rcrp->intr_timeout;
1613 		rcr_cfg_b.bits.pthres = rcrp->intr_threshold;
1614 		RXDMA_REG_WRITE64(handle, RDC_RCR_CFG_B,
1615 		    channel, rcr_cfg_b.value);
1616 	}
1617 
1618 	if (hxgep->rdc_first_intr[channel] && (npkt_read > 0)) {
1619 		hxgep->rdc_first_intr[channel] = B_FALSE;
1620 		cs.bits.pktread = npkt_read - 1;
1621 	} else
1622 		cs.bits.pktread = npkt_read;
1623 	cs.bits.ptrread = nrcr_read;
1624 	cs.value &= 0xffffffffULL;
1625 	RXDMA_REG_WRITE64(handle, RDC_STAT, channel, cs.value);
1626 
1627 	HXGE_DEBUG_MSG((hxgep, RX_INT_CTL,
1628 	    "==> hxge_rx_pkts: EXIT: rcr channel %d "
1629 	    "head_pp $%p  index %016llx ",
1630 	    channel, rcrp->rcr_desc_rd_head_pp, rcrp->comp_rd_index));
1631 
1632 	HXGE_DEBUG_MSG((hxgep, RX_INT_CTL, "<== hxge_rx_pkts"));
1633 
1634 	return (head_mp);
1635 }
1636 
1637 #define	RCR_ENTRY_PATTERN	0x5a5a6b6b7c7c8d8dULL
1638 #define	NO_PORT_BIT		0x20
1639 #define	L4_CS_EQ_BIT		0x40
1640 
1641 static uint32_t hxge_scan_for_last_eop(p_rx_rcr_ring_t rcrp,
1642     p_rcr_entry_t rcr_desc_rd_head_p, uint32_t num_rcrs)
1643 {
1644 	uint64_t	rcr_entry;
1645 	uint32_t	rcrs = 0;
1646 	uint32_t	pkts = 0;
1647 
1648 	while (rcrs++ < num_rcrs) {
1649 		rcr_entry = *((uint64_t *)rcr_desc_rd_head_p);
1650 
1651 		if ((rcr_entry == 0x0) || (rcr_entry == RCR_ENTRY_PATTERN))
1652 			break;
1653 
1654 		if (!(rcr_entry & RCR_MULTI_MASK))
1655 			pkts++;
1656 
1657 		rcr_desc_rd_head_p = NEXT_ENTRY_PTR(rcr_desc_rd_head_p,
1658 		    rcrp->rcr_desc_first_p, rcrp->rcr_desc_last_p);
1659 	}
1660 
1661 	return (pkts);
1662 }
1663 
1664 /*ARGSUSED*/
1665 void
1666 hxge_receive_packet(p_hxge_t hxgep, p_rx_rcr_ring_t rcr_p,
1667     p_rcr_entry_t rcr_desc_rd_head_p, boolean_t *multi_p, mblk_t **mp,
1668     mblk_t **mp_cont, uint32_t *invalid_rcr_entry)
1669 {
1670 	p_mblk_t nmp = NULL;
1671 	uint64_t multi;
1672 	uint8_t channel;
1673 	boolean_t first_entry = B_TRUE;
1674 	boolean_t is_tcp_udp = B_FALSE;
1675 	boolean_t buffer_free = B_FALSE;
1676 	boolean_t error_send_up = B_FALSE;
1677 	uint8_t error_type;
1678 	uint16_t l2_len;
1679 	uint16_t skip_len;
1680 	uint8_t pktbufsz_type;
1681 	uint64_t rcr_entry;
1682 	uint64_t *pkt_buf_addr_pp;
1683 	uint64_t *pkt_buf_addr_p;
1684 	uint32_t buf_offset;
1685 	uint32_t bsize;
1686 	uint32_t msg_index;
1687 	p_rx_rbr_ring_t rx_rbr_p;
1688 	p_rx_msg_t *rx_msg_ring_p;
1689 	p_rx_msg_t rx_msg_p;
1690 	uint16_t sw_offset_bytes = 0, hdr_size = 0;
1691 	hxge_status_t status = HXGE_OK;
1692 	boolean_t is_valid = B_FALSE;
1693 	p_hxge_rx_ring_stats_t rdc_stats;
1694 	uint32_t bytes_read;
1695 	uint8_t header0 = 0;
1696 	uint8_t header1 = 0;
1697 	uint64_t pkt_type;
1698 	uint8_t no_port_bit = 0;
1699 	uint8_t l4_cs_eq_bit = 0;
1700 
1701 	channel = rcr_p->rdc;
1702 
1703 	HXGE_DEBUG_MSG((hxgep, RX2_CTL, "==> hxge_receive_packet"));
1704 
1705 	first_entry = (*mp == NULL) ? B_TRUE : B_FALSE;
1706 	rcr_entry = *((uint64_t *)rcr_desc_rd_head_p);
1707 
1708 	/* Verify the content of the rcr_entry for a hardware bug workaround */
1709 	if ((rcr_entry == 0x0) || (rcr_entry == RCR_ENTRY_PATTERN)) {
1710 		*invalid_rcr_entry = 1;
1711 		HXGE_DEBUG_MSG((hxgep, RX2_CTL, "hxge_receive_packet "
1712 		    "Channel %d invalid RCR entry 0x%llx found, returning\n",
1713 		    channel, (long long) rcr_entry));
1714 		return;
1715 	}
1716 	*((uint64_t *)rcr_desc_rd_head_p) = RCR_ENTRY_PATTERN;
1717 
1718 	multi = (rcr_entry & RCR_MULTI_MASK);
1719 	pkt_type = (rcr_entry & RCR_PKT_TYPE_MASK);
1720 
1721 	error_type = ((rcr_entry & RCR_ERROR_MASK) >> RCR_ERROR_SHIFT);
1722 	l2_len = ((rcr_entry & RCR_L2_LEN_MASK) >> RCR_L2_LEN_SHIFT);
1723 
1724 	/*
1725 	 * Hardware does not strip the CRC due bug ID 11451 where
1726 	 * the hardware mis handles minimum size packets.
1727 	 */
1728 	l2_len -= ETHERFCSL;
1729 
1730 	pktbufsz_type = ((rcr_entry & RCR_PKTBUFSZ_MASK) >>
1731 	    RCR_PKTBUFSZ_SHIFT);
1732 #if defined(__i386)
1733 	pkt_buf_addr_pp = (uint64_t *)(uint32_t)((rcr_entry &
1734 	    RCR_PKT_BUF_ADDR_MASK) << RCR_PKT_BUF_ADDR_SHIFT);
1735 #else
1736 	pkt_buf_addr_pp = (uint64_t *)((rcr_entry & RCR_PKT_BUF_ADDR_MASK) <<
1737 	    RCR_PKT_BUF_ADDR_SHIFT);
1738 #endif
1739 
1740 	HXGE_DEBUG_MSG((hxgep, RX2_CTL,
1741 	    "==> hxge_receive_packet: entryp $%p entry 0x%0llx "
1742 	    "pkt_buf_addr_pp $%p l2_len %d multi %d "
1743 	    "error_type 0x%x pktbufsz_type %d ",
1744 	    rcr_desc_rd_head_p, rcr_entry, pkt_buf_addr_pp, l2_len,
1745 	    multi, error_type, pktbufsz_type));
1746 
1747 	HXGE_DEBUG_MSG((hxgep, RX2_CTL,
1748 	    "==> hxge_receive_packet: entryp $%p entry 0x%0llx "
1749 	    "pkt_buf_addr_pp $%p l2_len %d multi %d "
1750 	    "error_type 0x%x ", rcr_desc_rd_head_p,
1751 	    rcr_entry, pkt_buf_addr_pp, l2_len, multi, error_type));
1752 
1753 	HXGE_DEBUG_MSG((hxgep, RX2_CTL,
1754 	    "==> (rbr) hxge_receive_packet: entry 0x%0llx "
1755 	    "full pkt_buf_addr_pp $%p l2_len %d",
1756 	    rcr_entry, pkt_buf_addr_pp, l2_len));
1757 
1758 	/* get the stats ptr */
1759 	rdc_stats = rcr_p->rdc_stats;
1760 
1761 	if (!l2_len) {
1762 		HXGE_DEBUG_MSG((hxgep, RX_CTL,
1763 		    "<== hxge_receive_packet: failed: l2 length is 0."));
1764 		return;
1765 	}
1766 
1767 	/* shift 6 bits to get the full io address */
1768 #if defined(__i386)
1769 	pkt_buf_addr_pp = (uint64_t *)((uint32_t)pkt_buf_addr_pp <<
1770 	    RCR_PKT_BUF_ADDR_SHIFT_FULL);
1771 #else
1772 	pkt_buf_addr_pp = (uint64_t *)((uint64_t)pkt_buf_addr_pp <<
1773 	    RCR_PKT_BUF_ADDR_SHIFT_FULL);
1774 #endif
1775 	HXGE_DEBUG_MSG((hxgep, RX2_CTL,
1776 	    "==> (rbr) hxge_receive_packet: entry 0x%0llx "
1777 	    "full pkt_buf_addr_pp $%p l2_len %d",
1778 	    rcr_entry, pkt_buf_addr_pp, l2_len));
1779 
1780 	rx_rbr_p = rcr_p->rx_rbr_p;
1781 	rx_msg_ring_p = rx_rbr_p->rx_msg_ring;
1782 
1783 	if (first_entry) {
1784 		hdr_size = (rcr_p->full_hdr_flag ? RXDMA_HDR_SIZE_FULL :
1785 		    RXDMA_HDR_SIZE_DEFAULT);
1786 
1787 		HXGE_DEBUG_MSG((hxgep, RX_CTL,
1788 		    "==> hxge_receive_packet: first entry 0x%016llx "
1789 		    "pkt_buf_addr_pp $%p l2_len %d hdr %d",
1790 		    rcr_entry, pkt_buf_addr_pp, l2_len, hdr_size));
1791 	}
1792 
1793 	MUTEX_ENTER(&rx_rbr_p->lock);
1794 
1795 	HXGE_DEBUG_MSG((hxgep, RX_CTL,
1796 	    "==> (rbr 1) hxge_receive_packet: entry 0x%0llx "
1797 	    "full pkt_buf_addr_pp $%p l2_len %d",
1798 	    rcr_entry, pkt_buf_addr_pp, l2_len));
1799 
1800 	/*
1801 	 * Packet buffer address in the completion entry points to the starting
1802 	 * buffer address (offset 0). Use the starting buffer address to locate
1803 	 * the corresponding kernel address.
1804 	 */
1805 	status = hxge_rxbuf_pp_to_vp(hxgep, rx_rbr_p,
1806 	    pktbufsz_type, pkt_buf_addr_pp, &pkt_buf_addr_p,
1807 	    &buf_offset, &msg_index);
1808 
1809 	HXGE_DEBUG_MSG((hxgep, RX_CTL,
1810 	    "==> (rbr 2) hxge_receive_packet: entry 0x%0llx "
1811 	    "full pkt_buf_addr_pp $%p l2_len %d",
1812 	    rcr_entry, pkt_buf_addr_pp, l2_len));
1813 
1814 	if (status != HXGE_OK) {
1815 		MUTEX_EXIT(&rx_rbr_p->lock);
1816 		HXGE_DEBUG_MSG((hxgep, RX_CTL,
1817 		    "<== hxge_receive_packet: found vaddr failed %d", status));
1818 		return;
1819 	}
1820 
1821 	HXGE_DEBUG_MSG((hxgep, RX2_CTL,
1822 	    "==> (rbr 3) hxge_receive_packet: entry 0x%0llx "
1823 	    "full pkt_buf_addr_pp $%p l2_len %d",
1824 	    rcr_entry, pkt_buf_addr_pp, l2_len));
1825 	HXGE_DEBUG_MSG((hxgep, RX2_CTL,
1826 	    "==> (rbr 4 msgindex %d) hxge_receive_packet: entry 0x%0llx "
1827 	    "full pkt_buf_addr_pp $%p l2_len %d",
1828 	    msg_index, rcr_entry, pkt_buf_addr_pp, l2_len));
1829 
1830 	if (msg_index >= rx_rbr_p->tnblocks) {
1831 		MUTEX_EXIT(&rx_rbr_p->lock);
1832 		HXGE_DEBUG_MSG((hxgep, RX2_CTL,
1833 		    "==> hxge_receive_packet: FATAL msg_index (%d) "
1834 		    "should be smaller than tnblocks (%d)\n",
1835 		    msg_index, rx_rbr_p->tnblocks));
1836 		return;
1837 	}
1838 
1839 	rx_msg_p = rx_msg_ring_p[msg_index];
1840 
1841 	HXGE_DEBUG_MSG((hxgep, RX2_CTL,
1842 	    "==> (rbr 4 msgindex %d) hxge_receive_packet: entry 0x%0llx "
1843 	    "full pkt_buf_addr_pp $%p l2_len %d",
1844 	    msg_index, rcr_entry, pkt_buf_addr_pp, l2_len));
1845 
1846 	switch (pktbufsz_type) {
1847 	case RCR_PKTBUFSZ_0:
1848 		bsize = rx_rbr_p->pkt_buf_size0_bytes;
1849 		HXGE_DEBUG_MSG((hxgep, RX2_CTL,
1850 		    "==> hxge_receive_packet: 0 buf %d", bsize));
1851 		break;
1852 	case RCR_PKTBUFSZ_1:
1853 		bsize = rx_rbr_p->pkt_buf_size1_bytes;
1854 		HXGE_DEBUG_MSG((hxgep, RX2_CTL,
1855 		    "==> hxge_receive_packet: 1 buf %d", bsize));
1856 		break;
1857 	case RCR_PKTBUFSZ_2:
1858 		bsize = rx_rbr_p->pkt_buf_size2_bytes;
1859 		HXGE_DEBUG_MSG((hxgep, RX_CTL,
1860 		    "==> hxge_receive_packet: 2 buf %d", bsize));
1861 		break;
1862 	case RCR_SINGLE_BLOCK:
1863 		bsize = rx_msg_p->block_size;
1864 		HXGE_DEBUG_MSG((hxgep, RX2_CTL,
1865 		    "==> hxge_receive_packet: single %d", bsize));
1866 
1867 		break;
1868 	default:
1869 		MUTEX_EXIT(&rx_rbr_p->lock);
1870 		return;
1871 	}
1872 
1873 	DMA_COMMON_SYNC_OFFSET(rx_msg_p->buf_dma,
1874 	    (buf_offset + sw_offset_bytes), (hdr_size + l2_len),
1875 	    DDI_DMA_SYNC_FORCPU);
1876 
1877 	HXGE_DEBUG_MSG((hxgep, RX2_CTL,
1878 	    "==> hxge_receive_packet: after first dump:usage count"));
1879 
1880 	if (rx_msg_p->cur_usage_cnt == 0) {
1881 		atomic_inc_32(&rx_rbr_p->rbr_used);
1882 		if (rx_rbr_p->rbr_use_bcopy) {
1883 			atomic_inc_32(&rx_rbr_p->rbr_consumed);
1884 			if (rx_rbr_p->rbr_consumed <
1885 			    rx_rbr_p->rbr_threshold_hi) {
1886 				if (rx_rbr_p->rbr_threshold_lo == 0 ||
1887 				    ((rx_rbr_p->rbr_consumed >=
1888 				    rx_rbr_p->rbr_threshold_lo) &&
1889 				    (rx_rbr_p->rbr_bufsize_type >=
1890 				    pktbufsz_type))) {
1891 					rx_msg_p->rx_use_bcopy = B_TRUE;
1892 				}
1893 			} else {
1894 				rx_msg_p->rx_use_bcopy = B_TRUE;
1895 			}
1896 		}
1897 		HXGE_DEBUG_MSG((hxgep, RX2_CTL,
1898 		    "==> hxge_receive_packet: buf %d (new block) ", bsize));
1899 
1900 		rx_msg_p->pkt_buf_size_code = pktbufsz_type;
1901 		rx_msg_p->pkt_buf_size = bsize;
1902 		rx_msg_p->cur_usage_cnt = 1;
1903 		if (pktbufsz_type == RCR_SINGLE_BLOCK) {
1904 			HXGE_DEBUG_MSG((hxgep, RX2_CTL,
1905 			    "==> hxge_receive_packet: buf %d (single block) ",
1906 			    bsize));
1907 			/*
1908 			 * Buffer can be reused once the free function is
1909 			 * called.
1910 			 */
1911 			rx_msg_p->max_usage_cnt = 1;
1912 			buffer_free = B_TRUE;
1913 		} else {
1914 			rx_msg_p->max_usage_cnt = rx_msg_p->block_size / bsize;
1915 			if (rx_msg_p->max_usage_cnt == 1) {
1916 				buffer_free = B_TRUE;
1917 			}
1918 		}
1919 	} else {
1920 		rx_msg_p->cur_usage_cnt++;
1921 		if (rx_msg_p->cur_usage_cnt == rx_msg_p->max_usage_cnt) {
1922 			buffer_free = B_TRUE;
1923 		}
1924 	}
1925 
1926 	HXGE_DEBUG_MSG((hxgep, RX_CTL,
1927 	    "msgbuf index = %d l2len %d bytes usage %d max_usage %d ",
1928 	    msg_index, l2_len,
1929 	    rx_msg_p->cur_usage_cnt, rx_msg_p->max_usage_cnt));
1930 
1931 	if (error_type) {
1932 		rdc_stats->ierrors++;
1933 		/* Update error stats */
1934 		rdc_stats->errlog.compl_err_type = error_type;
1935 		HXGE_FM_REPORT_ERROR(hxgep, NULL, HXGE_FM_EREPORT_RDMC_RCR_ERR);
1936 
1937 		if (error_type & RCR_CTRL_FIFO_DED) {
1938 			rdc_stats->ctrl_fifo_ecc_err++;
1939 			HXGE_ERROR_MSG((hxgep, HXGE_ERR_CTL,
1940 			    " hxge_receive_packet: "
1941 			    " channel %d RCR ctrl_fifo_ded error", channel));
1942 		} else if (error_type & RCR_DATA_FIFO_DED) {
1943 			rdc_stats->data_fifo_ecc_err++;
1944 			HXGE_ERROR_MSG((hxgep, HXGE_ERR_CTL,
1945 			    " hxge_receive_packet: channel %d"
1946 			    " RCR data_fifo_ded error", channel));
1947 		}
1948 
1949 		/*
1950 		 * Update and repost buffer block if max usage count is
1951 		 * reached.
1952 		 */
1953 		if (error_send_up == B_FALSE) {
1954 			atomic_inc_32(&rx_msg_p->ref_cnt);
1955 			if (buffer_free == B_TRUE) {
1956 				rx_msg_p->free = B_TRUE;
1957 			}
1958 
1959 			MUTEX_EXIT(&rx_rbr_p->lock);
1960 			hxge_freeb(rx_msg_p);
1961 			return;
1962 		}
1963 	}
1964 
1965 	HXGE_DEBUG_MSG((hxgep, RX2_CTL,
1966 	    "==> hxge_receive_packet: DMA sync second "));
1967 
1968 	bytes_read = rcr_p->rcvd_pkt_bytes;
1969 	skip_len = sw_offset_bytes + hdr_size;
1970 
1971 	if (first_entry) {
1972 		header0 = rx_msg_p->buffer[buf_offset];
1973 		no_port_bit = header0 & NO_PORT_BIT;
1974 		header1 = rx_msg_p->buffer[buf_offset + 1];
1975 		l4_cs_eq_bit = header1 & L4_CS_EQ_BIT;
1976 	}
1977 
1978 	if (!rx_msg_p->rx_use_bcopy) {
1979 		/*
1980 		 * For loaned up buffers, the driver reference count
1981 		 * will be incremented first and then the free state.
1982 		 */
1983 		if ((nmp = hxge_dupb(rx_msg_p, buf_offset, bsize)) != NULL) {
1984 			if (first_entry) {
1985 				nmp->b_rptr = &nmp->b_rptr[skip_len];
1986 				if (l2_len < bsize - skip_len) {
1987 					nmp->b_wptr = &nmp->b_rptr[l2_len];
1988 				} else {
1989 					nmp->b_wptr = &nmp->b_rptr[bsize
1990 					    - skip_len];
1991 				}
1992 			} else {
1993 				if (l2_len - bytes_read < bsize) {
1994 					nmp->b_wptr =
1995 					    &nmp->b_rptr[l2_len - bytes_read];
1996 				} else {
1997 					nmp->b_wptr = &nmp->b_rptr[bsize];
1998 				}
1999 			}
2000 		}
2001 	} else {
2002 		if (first_entry) {
2003 			nmp = hxge_dupb_bcopy(rx_msg_p, buf_offset + skip_len,
2004 			    l2_len < bsize - skip_len ?
2005 			    l2_len : bsize - skip_len);
2006 		} else {
2007 			nmp = hxge_dupb_bcopy(rx_msg_p, buf_offset,
2008 			    l2_len - bytes_read < bsize ?
2009 			    l2_len - bytes_read : bsize);
2010 		}
2011 	}
2012 
2013 	if (nmp != NULL) {
2014 		if (first_entry)
2015 			bytes_read  = nmp->b_wptr - nmp->b_rptr;
2016 		else
2017 			bytes_read += nmp->b_wptr - nmp->b_rptr;
2018 
2019 		HXGE_DEBUG_MSG((hxgep, RX_CTL,
2020 		    "==> hxge_receive_packet after dupb: "
2021 		    "rbr consumed %d "
2022 		    "pktbufsz_type %d "
2023 		    "nmp $%p rptr $%p wptr $%p "
2024 		    "buf_offset %d bzise %d l2_len %d skip_len %d",
2025 		    rx_rbr_p->rbr_consumed,
2026 		    pktbufsz_type,
2027 		    nmp, nmp->b_rptr, nmp->b_wptr,
2028 		    buf_offset, bsize, l2_len, skip_len));
2029 	} else {
2030 		cmn_err(CE_WARN, "!hxge_receive_packet: update stats (error)");
2031 
2032 		atomic_inc_32(&rx_msg_p->ref_cnt);
2033 		if (buffer_free == B_TRUE) {
2034 			rx_msg_p->free = B_TRUE;
2035 		}
2036 
2037 		MUTEX_EXIT(&rx_rbr_p->lock);
2038 		hxge_freeb(rx_msg_p);
2039 		return;
2040 	}
2041 
2042 	if (buffer_free == B_TRUE) {
2043 		rx_msg_p->free = B_TRUE;
2044 	}
2045 
2046 	/*
2047 	 * ERROR, FRAG and PKT_TYPE are only reported in the first entry. If a
2048 	 * packet is not fragmented and no error bit is set, then L4 checksum
2049 	 * is OK.
2050 	 */
2051 	is_valid = (nmp != NULL);
2052 	if (first_entry) {
2053 		rdc_stats->ipackets++; /* count only 1st seg for jumbo */
2054 		if (l2_len > (STD_FRAME_SIZE - ETHERFCSL))
2055 			rdc_stats->jumbo_pkts++;
2056 		rdc_stats->ibytes += skip_len + l2_len < bsize ?
2057 		    l2_len : bsize;
2058 	} else {
2059 		/*
2060 		 * Add the current portion of the packet to the kstats.
2061 		 * The current portion of the packet is calculated by using
2062 		 * length of the packet and the previously received portion.
2063 		 */
2064 		rdc_stats->ibytes += l2_len - rcr_p->rcvd_pkt_bytes < bsize ?
2065 		    l2_len - rcr_p->rcvd_pkt_bytes : bsize;
2066 	}
2067 
2068 	rcr_p->rcvd_pkt_bytes = bytes_read;
2069 
2070 	if (rx_msg_p->free && rx_msg_p->rx_use_bcopy) {
2071 		atomic_inc_32(&rx_msg_p->ref_cnt);
2072 		MUTEX_EXIT(&rx_rbr_p->lock);
2073 		hxge_freeb(rx_msg_p);
2074 	} else
2075 		MUTEX_EXIT(&rx_rbr_p->lock);
2076 
2077 	if (is_valid) {
2078 		nmp->b_cont = NULL;
2079 		if (first_entry) {
2080 			*mp = nmp;
2081 			*mp_cont = NULL;
2082 		} else {
2083 			*mp_cont = nmp;
2084 		}
2085 	}
2086 
2087 	/*
2088 	 * Update stats and hardware checksuming.
2089 	 */
2090 	if (is_valid && !multi) {
2091 		is_tcp_udp = ((pkt_type == RCR_PKT_IS_TCP ||
2092 		    pkt_type == RCR_PKT_IS_UDP) ? B_TRUE : B_FALSE);
2093 
2094 		if (!no_port_bit && l4_cs_eq_bit && is_tcp_udp && !error_type) {
2095 			(void) hcksum_assoc(nmp, NULL, NULL, 0, 0, 0, 0,
2096 			    HCK_FULLCKSUM_OK | HCK_FULLCKSUM, 0);
2097 
2098 			HXGE_DEBUG_MSG((hxgep, RX_CTL,
2099 			    "==> hxge_receive_packet: Full tcp/udp cksum "
2100 			    "is_valid 0x%x multi %d error %d",
2101 			    is_valid, multi, error_type));
2102 		}
2103 	}
2104 
2105 	HXGE_DEBUG_MSG((hxgep, RX2_CTL,
2106 	    "==> hxge_receive_packet: *mp 0x%016llx", *mp));
2107 
2108 	*multi_p = (multi == RCR_MULTI_MASK);
2109 
2110 	HXGE_DEBUG_MSG((hxgep, RX_CTL, "<== hxge_receive_packet: "
2111 	    "multi %d nmp 0x%016llx *mp 0x%016llx *mp_cont 0x%016llx",
2112 	    *multi_p, nmp, *mp, *mp_cont));
2113 }
2114 
2115 static void
2116 hxge_rx_rbr_empty_recover(p_hxge_t hxgep, uint8_t channel)
2117 {
2118 	hpi_handle_t	handle;
2119 	p_rx_rcr_ring_t	rcrp;
2120 	p_rx_rbr_ring_t	rbrp;
2121 
2122 	rcrp = hxgep->rx_rcr_rings->rcr_rings[channel];
2123 	rbrp = rcrp->rx_rbr_p;
2124 	handle = HXGE_DEV_HPI_HANDLE(hxgep);
2125 
2126 	/*
2127 	 * Wait for the channel to be quiet
2128 	 */
2129 	(void) hpi_rxdma_cfg_rdc_wait_for_qst(handle, channel);
2130 
2131 	/*
2132 	 * Post page will accumulate some buffers before re-enabling
2133 	 * the DMA channel.
2134 	 */
2135 
2136 	MUTEX_ENTER(&rbrp->post_lock);
2137 	if ((rbrp->rbb_max - rbrp->rbr_used) >= HXGE_RBR_EMPTY_THRESHOLD) {
2138 		hxge_rbr_empty_restore(hxgep, rbrp);
2139 	} else {
2140 		rbrp->rbr_is_empty = B_TRUE;
2141 	}
2142 	MUTEX_EXIT(&rbrp->post_lock);
2143 }
2144 
2145 
2146 /*ARGSUSED*/
2147 static hxge_status_t
2148 hxge_rx_err_evnts(p_hxge_t hxgep, uint_t index, p_hxge_ldv_t ldvp,
2149     rdc_stat_t cs)
2150 {
2151 	p_hxge_rx_ring_stats_t	rdc_stats;
2152 	hpi_handle_t		handle;
2153 	boolean_t		rxchan_fatal = B_FALSE;
2154 	uint8_t			channel;
2155 	hxge_status_t		status = HXGE_OK;
2156 	uint64_t		cs_val;
2157 
2158 	HXGE_DEBUG_MSG((hxgep, INT_CTL, "==> hxge_rx_err_evnts"));
2159 
2160 	handle = HXGE_DEV_HPI_HANDLE(hxgep);
2161 	channel = ldvp->channel;
2162 
2163 	/* Clear the interrupts */
2164 	cs.bits.pktread = 0;
2165 	cs.bits.ptrread = 0;
2166 	cs_val = cs.value & RDC_STAT_WR1C;
2167 	RXDMA_REG_WRITE64(handle, RDC_STAT, channel, cs_val);
2168 
2169 	rdc_stats = &hxgep->statsp->rdc_stats[ldvp->vdma_index];
2170 
2171 	if (cs.bits.rbr_cpl_to) {
2172 		rdc_stats->rbr_tmout++;
2173 		HXGE_FM_REPORT_ERROR(hxgep, channel,
2174 		    HXGE_FM_EREPORT_RDMC_RBR_CPL_TO);
2175 		rxchan_fatal = B_TRUE;
2176 		HXGE_ERROR_MSG((hxgep, HXGE_ERR_CTL,
2177 		    "==> hxge_rx_err_evnts(channel %d): "
2178 		    "fatal error: rx_rbr_timeout", channel));
2179 	}
2180 
2181 	if ((cs.bits.rcr_shadow_par_err) || (cs.bits.rbr_prefetch_par_err)) {
2182 		(void) hpi_rxdma_ring_perr_stat_get(handle,
2183 		    &rdc_stats->errlog.pre_par, &rdc_stats->errlog.sha_par);
2184 	}
2185 
2186 	if (cs.bits.rcr_shadow_par_err) {
2187 		rdc_stats->rcr_sha_par++;
2188 		HXGE_FM_REPORT_ERROR(hxgep, channel,
2189 		    HXGE_FM_EREPORT_RDMC_RCR_SHA_PAR);
2190 		rxchan_fatal = B_TRUE;
2191 		HXGE_ERROR_MSG((hxgep, HXGE_ERR_CTL,
2192 		    "==> hxge_rx_err_evnts(channel %d): "
2193 		    "fatal error: rcr_shadow_par_err", channel));
2194 	}
2195 
2196 	if (cs.bits.rbr_prefetch_par_err) {
2197 		rdc_stats->rbr_pre_par++;
2198 		HXGE_FM_REPORT_ERROR(hxgep, channel,
2199 		    HXGE_FM_EREPORT_RDMC_RBR_PRE_PAR);
2200 		rxchan_fatal = B_TRUE;
2201 		HXGE_ERROR_MSG((hxgep, HXGE_ERR_CTL,
2202 		    "==> hxge_rx_err_evnts(channel %d): "
2203 		    "fatal error: rbr_prefetch_par_err", channel));
2204 	}
2205 
2206 	if (cs.bits.rbr_pre_empty) {
2207 		rdc_stats->rbr_pre_empty++;
2208 		HXGE_FM_REPORT_ERROR(hxgep, channel,
2209 		    HXGE_FM_EREPORT_RDMC_RBR_PRE_EMPTY);
2210 		rxchan_fatal = B_TRUE;
2211 		HXGE_ERROR_MSG((hxgep, HXGE_ERR_CTL,
2212 		    "==> hxge_rx_err_evnts(channel %d): "
2213 		    "fatal error: rbr_pre_empty", channel));
2214 	}
2215 
2216 	if (cs.bits.peu_resp_err) {
2217 		rdc_stats->peu_resp_err++;
2218 		HXGE_FM_REPORT_ERROR(hxgep, channel,
2219 		    HXGE_FM_EREPORT_RDMC_PEU_RESP_ERR);
2220 		rxchan_fatal = B_TRUE;
2221 		HXGE_ERROR_MSG((hxgep, HXGE_ERR_CTL,
2222 		    "==> hxge_rx_err_evnts(channel %d): "
2223 		    "fatal error: peu_resp_err", channel));
2224 	}
2225 
2226 	if (cs.bits.rcr_thres) {
2227 		rdc_stats->rcr_thres++;
2228 	}
2229 
2230 	if (cs.bits.rcr_to) {
2231 		rdc_stats->rcr_to++;
2232 	}
2233 
2234 	if (cs.bits.rcr_shadow_full) {
2235 		rdc_stats->rcr_shadow_full++;
2236 		HXGE_FM_REPORT_ERROR(hxgep, channel,
2237 		    HXGE_FM_EREPORT_RDMC_RCR_SHA_FULL);
2238 		rxchan_fatal = B_TRUE;
2239 		HXGE_ERROR_MSG((hxgep, HXGE_ERR_CTL,
2240 		    "==> hxge_rx_err_evnts(channel %d): "
2241 		    "fatal error: rcr_shadow_full", channel));
2242 	}
2243 
2244 	if (cs.bits.rcr_full) {
2245 		rdc_stats->rcrfull++;
2246 		HXGE_FM_REPORT_ERROR(hxgep, channel,
2247 		    HXGE_FM_EREPORT_RDMC_RCRFULL);
2248 		rxchan_fatal = B_TRUE;
2249 		HXGE_ERROR_MSG((hxgep, HXGE_ERR_CTL,
2250 		    "==> hxge_rx_err_evnts(channel %d): "
2251 		    "fatal error: rcrfull error", channel));
2252 	}
2253 
2254 	if (cs.bits.rbr_empty) {
2255 		rdc_stats->rbr_empty++;
2256 		hxge_rx_rbr_empty_recover(hxgep, channel);
2257 	}
2258 
2259 	if (cs.bits.rbr_full) {
2260 		rdc_stats->rbrfull++;
2261 		HXGE_FM_REPORT_ERROR(hxgep, channel,
2262 		    HXGE_FM_EREPORT_RDMC_RBRFULL);
2263 		rxchan_fatal = B_TRUE;
2264 		HXGE_ERROR_MSG((hxgep, HXGE_ERR_CTL,
2265 		    "==> hxge_rx_err_evnts(channel %d): "
2266 		    "fatal error: rbr_full error", channel));
2267 	}
2268 
2269 	if (rxchan_fatal) {
2270 		p_rx_rcr_ring_t	rcrp;
2271 		p_rx_rbr_ring_t rbrp;
2272 
2273 		rcrp = hxgep->rx_rcr_rings->rcr_rings[channel];
2274 		rbrp = rcrp->rx_rbr_p;
2275 
2276 		HXGE_ERROR_MSG((hxgep, HXGE_ERR_CTL,
2277 		    " hxge_rx_err_evnts: fatal error on Channel #%d\n",
2278 		    channel));
2279 		MUTEX_ENTER(&rbrp->post_lock);
2280 		/* This function needs to be inside the post_lock */
2281 		status = hxge_rxdma_fatal_err_recover(hxgep, channel);
2282 		MUTEX_EXIT(&rbrp->post_lock);
2283 		if (status == HXGE_OK) {
2284 			FM_SERVICE_RESTORED(hxgep);
2285 		}
2286 	}
2287 	HXGE_DEBUG_MSG((hxgep, INT_CTL, "<== hxge_rx_err_evnts"));
2288 
2289 	return (status);
2290 }
2291 
2292 static hxge_status_t
2293 hxge_map_rxdma(p_hxge_t hxgep)
2294 {
2295 	int			i, ndmas;
2296 	uint16_t		channel;
2297 	p_rx_rbr_rings_t	rx_rbr_rings;
2298 	p_rx_rbr_ring_t		*rbr_rings;
2299 	p_rx_rcr_rings_t	rx_rcr_rings;
2300 	p_rx_rcr_ring_t		*rcr_rings;
2301 	p_rx_mbox_areas_t	rx_mbox_areas_p;
2302 	p_rx_mbox_t		*rx_mbox_p;
2303 	p_hxge_dma_pool_t	dma_buf_poolp;
2304 	p_hxge_dma_common_t	*dma_buf_p;
2305 	p_hxge_dma_pool_t	dma_rbr_cntl_poolp;
2306 	p_hxge_dma_common_t	*dma_rbr_cntl_p;
2307 	p_hxge_dma_pool_t	dma_rcr_cntl_poolp;
2308 	p_hxge_dma_common_t	*dma_rcr_cntl_p;
2309 	p_hxge_dma_pool_t	dma_mbox_cntl_poolp;
2310 	p_hxge_dma_common_t	*dma_mbox_cntl_p;
2311 	uint32_t		*num_chunks;
2312 	hxge_status_t		status = HXGE_OK;
2313 
2314 	HXGE_DEBUG_MSG((hxgep, MEM2_CTL, "==> hxge_map_rxdma"));
2315 
2316 	dma_buf_poolp = hxgep->rx_buf_pool_p;
2317 	dma_rbr_cntl_poolp = hxgep->rx_rbr_cntl_pool_p;
2318 	dma_rcr_cntl_poolp = hxgep->rx_rcr_cntl_pool_p;
2319 	dma_mbox_cntl_poolp = hxgep->rx_mbox_cntl_pool_p;
2320 
2321 	if (!dma_buf_poolp->buf_allocated ||
2322 	    !dma_rbr_cntl_poolp->buf_allocated ||
2323 	    !dma_rcr_cntl_poolp->buf_allocated ||
2324 	    !dma_mbox_cntl_poolp->buf_allocated) {
2325 		HXGE_ERROR_MSG((hxgep, HXGE_ERR_CTL,
2326 		    "<== hxge_map_rxdma: buf not allocated"));
2327 		return (HXGE_ERROR);
2328 	}
2329 
2330 	ndmas = dma_buf_poolp->ndmas;
2331 	if (!ndmas) {
2332 		HXGE_DEBUG_MSG((hxgep, RX_CTL,
2333 		    "<== hxge_map_rxdma: no dma allocated"));
2334 		return (HXGE_ERROR);
2335 	}
2336 
2337 	num_chunks = dma_buf_poolp->num_chunks;
2338 	dma_buf_p = dma_buf_poolp->dma_buf_pool_p;
2339 	dma_rbr_cntl_p = dma_rbr_cntl_poolp->dma_buf_pool_p;
2340 	dma_rcr_cntl_p = dma_rcr_cntl_poolp->dma_buf_pool_p;
2341 	dma_mbox_cntl_p = dma_mbox_cntl_poolp->dma_buf_pool_p;
2342 
2343 	rx_rbr_rings = (p_rx_rbr_rings_t)
2344 	    KMEM_ZALLOC(sizeof (rx_rbr_rings_t), KM_SLEEP);
2345 	rbr_rings = (p_rx_rbr_ring_t *)KMEM_ZALLOC(
2346 	    sizeof (p_rx_rbr_ring_t) * ndmas, KM_SLEEP);
2347 
2348 	rx_rcr_rings = (p_rx_rcr_rings_t)
2349 	    KMEM_ZALLOC(sizeof (rx_rcr_rings_t), KM_SLEEP);
2350 	rcr_rings = (p_rx_rcr_ring_t *)KMEM_ZALLOC(
2351 	    sizeof (p_rx_rcr_ring_t) * ndmas, KM_SLEEP);
2352 
2353 	rx_mbox_areas_p = (p_rx_mbox_areas_t)
2354 	    KMEM_ZALLOC(sizeof (rx_mbox_areas_t), KM_SLEEP);
2355 	rx_mbox_p = (p_rx_mbox_t *)KMEM_ZALLOC(
2356 	    sizeof (p_rx_mbox_t) * ndmas, KM_SLEEP);
2357 
2358 	/*
2359 	 * Timeout should be set based on the system clock divider.
2360 	 * The following timeout value of 1 assumes that the
2361 	 * granularity (1000) is 3 microseconds running at 300MHz.
2362 	 */
2363 
2364 	hxgep->intr_threshold = RXDMA_RCR_PTHRES_DEFAULT;
2365 	hxgep->intr_timeout = RXDMA_RCR_TO_DEFAULT;
2366 
2367 	/*
2368 	 * Map descriptors from the buffer polls for each dam channel.
2369 	 */
2370 	for (i = 0; i < ndmas; i++) {
2371 		/*
2372 		 * Set up and prepare buffer blocks, descriptors and mailbox.
2373 		 */
2374 		channel = ((p_hxge_dma_common_t)dma_buf_p[i])->dma_channel;
2375 		status = hxge_map_rxdma_channel(hxgep, channel,
2376 		    (p_hxge_dma_common_t *)&dma_buf_p[i],
2377 		    (p_rx_rbr_ring_t *)&rbr_rings[i],
2378 		    num_chunks[i],
2379 		    (p_hxge_dma_common_t *)&dma_rbr_cntl_p[i],
2380 		    (p_hxge_dma_common_t *)&dma_rcr_cntl_p[i],
2381 		    (p_hxge_dma_common_t *)&dma_mbox_cntl_p[i],
2382 		    (p_rx_rcr_ring_t *)&rcr_rings[i],
2383 		    (p_rx_mbox_t *)&rx_mbox_p[i]);
2384 		if (status != HXGE_OK) {
2385 			goto hxge_map_rxdma_fail1;
2386 		}
2387 		rbr_rings[i]->index = (uint16_t)i;
2388 		rcr_rings[i]->index = (uint16_t)i;
2389 		rcr_rings[i]->rdc_stats = &hxgep->statsp->rdc_stats[i];
2390 	}
2391 
2392 	rx_rbr_rings->ndmas = rx_rcr_rings->ndmas = ndmas;
2393 	rx_rbr_rings->rbr_rings = rbr_rings;
2394 	hxgep->rx_rbr_rings = rx_rbr_rings;
2395 	rx_rcr_rings->rcr_rings = rcr_rings;
2396 	hxgep->rx_rcr_rings = rx_rcr_rings;
2397 
2398 	rx_mbox_areas_p->rxmbox_areas = rx_mbox_p;
2399 	hxgep->rx_mbox_areas_p = rx_mbox_areas_p;
2400 
2401 	goto hxge_map_rxdma_exit;
2402 
2403 hxge_map_rxdma_fail1:
2404 	HXGE_ERROR_MSG((hxgep, HXGE_ERR_CTL,
2405 	    "==> hxge_map_rxdma: unmap rbr,rcr (status 0x%x channel %d i %d)",
2406 	    status, channel, i));
2407 	i--;
2408 	for (; i >= 0; i--) {
2409 		channel = ((p_hxge_dma_common_t)dma_buf_p[i])->dma_channel;
2410 		hxge_unmap_rxdma_channel(hxgep, channel,
2411 		    rbr_rings[i], rcr_rings[i], rx_mbox_p[i]);
2412 	}
2413 
2414 	KMEM_FREE(rbr_rings, sizeof (p_rx_rbr_ring_t) * ndmas);
2415 	KMEM_FREE(rx_rbr_rings, sizeof (rx_rbr_rings_t));
2416 	KMEM_FREE(rcr_rings, sizeof (p_rx_rcr_ring_t) * ndmas);
2417 	KMEM_FREE(rx_rcr_rings, sizeof (rx_rcr_rings_t));
2418 	KMEM_FREE(rx_mbox_p, sizeof (p_rx_mbox_t) * ndmas);
2419 	KMEM_FREE(rx_mbox_areas_p, sizeof (rx_mbox_areas_t));
2420 
2421 hxge_map_rxdma_exit:
2422 	HXGE_DEBUG_MSG((hxgep, MEM2_CTL,
2423 	    "<== hxge_map_rxdma: (status 0x%x channel %d)", status, channel));
2424 
2425 	return (status);
2426 }
2427 
2428 static void
2429 hxge_unmap_rxdma(p_hxge_t hxgep)
2430 {
2431 	int			i, ndmas;
2432 	uint16_t		channel;
2433 	p_rx_rbr_rings_t	rx_rbr_rings;
2434 	p_rx_rbr_ring_t		*rbr_rings;
2435 	p_rx_rcr_rings_t	rx_rcr_rings;
2436 	p_rx_rcr_ring_t		*rcr_rings;
2437 	p_rx_mbox_areas_t	rx_mbox_areas_p;
2438 	p_rx_mbox_t		*rx_mbox_p;
2439 	p_hxge_dma_pool_t	dma_buf_poolp;
2440 	p_hxge_dma_pool_t	dma_rbr_cntl_poolp;
2441 	p_hxge_dma_pool_t	dma_rcr_cntl_poolp;
2442 	p_hxge_dma_pool_t	dma_mbox_cntl_poolp;
2443 	p_hxge_dma_common_t	*dma_buf_p;
2444 
2445 	HXGE_DEBUG_MSG((hxgep, MEM2_CTL, "==> hxge_unmap_rxdma"));
2446 
2447 	dma_buf_poolp = hxgep->rx_buf_pool_p;
2448 	dma_rbr_cntl_poolp = hxgep->rx_rbr_cntl_pool_p;
2449 	dma_rcr_cntl_poolp = hxgep->rx_rcr_cntl_pool_p;
2450 	dma_mbox_cntl_poolp = hxgep->rx_mbox_cntl_pool_p;
2451 
2452 	if (!dma_buf_poolp->buf_allocated ||
2453 	    !dma_rbr_cntl_poolp->buf_allocated ||
2454 	    !dma_rcr_cntl_poolp->buf_allocated ||
2455 	    !dma_mbox_cntl_poolp->buf_allocated) {
2456 		HXGE_ERROR_MSG((hxgep, HXGE_ERR_CTL,
2457 		    "<== hxge_unmap_rxdma: NULL buf pointers"));
2458 		return;
2459 	}
2460 
2461 	rx_rbr_rings = hxgep->rx_rbr_rings;
2462 	rx_rcr_rings = hxgep->rx_rcr_rings;
2463 	if (rx_rbr_rings == NULL || rx_rcr_rings == NULL) {
2464 		HXGE_ERROR_MSG((hxgep, HXGE_ERR_CTL,
2465 		    "<== hxge_unmap_rxdma: NULL pointers"));
2466 		return;
2467 	}
2468 
2469 	ndmas = rx_rbr_rings->ndmas;
2470 	if (!ndmas) {
2471 		HXGE_ERROR_MSG((hxgep, HXGE_ERR_CTL,
2472 		    "<== hxge_unmap_rxdma: no channel"));
2473 		return;
2474 	}
2475 
2476 	HXGE_DEBUG_MSG((hxgep, MEM2_CTL,
2477 	    "==> hxge_unmap_rxdma (ndmas %d)", ndmas));
2478 
2479 	rbr_rings = rx_rbr_rings->rbr_rings;
2480 	rcr_rings = rx_rcr_rings->rcr_rings;
2481 	rx_mbox_areas_p = hxgep->rx_mbox_areas_p;
2482 	rx_mbox_p = rx_mbox_areas_p->rxmbox_areas;
2483 	dma_buf_p = dma_buf_poolp->dma_buf_pool_p;
2484 
2485 	for (i = 0; i < ndmas; i++) {
2486 		channel = ((p_hxge_dma_common_t)dma_buf_p[i])->dma_channel;
2487 		HXGE_DEBUG_MSG((hxgep, MEM2_CTL,
2488 		    "==> hxge_unmap_rxdma (ndmas %d) channel %d",
2489 		    ndmas, channel));
2490 		(void) hxge_unmap_rxdma_channel(hxgep, channel,
2491 		    (p_rx_rbr_ring_t)rbr_rings[i],
2492 		    (p_rx_rcr_ring_t)rcr_rings[i],
2493 		    (p_rx_mbox_t)rx_mbox_p[i]);
2494 	}
2495 
2496 	KMEM_FREE(rx_rbr_rings, sizeof (rx_rbr_rings_t));
2497 	KMEM_FREE(rbr_rings, sizeof (p_rx_rbr_ring_t) * ndmas);
2498 	KMEM_FREE(rx_rcr_rings, sizeof (rx_rcr_rings_t));
2499 	KMEM_FREE(rcr_rings, sizeof (p_rx_rcr_ring_t) * ndmas);
2500 	KMEM_FREE(rx_mbox_areas_p, sizeof (rx_mbox_areas_t));
2501 	KMEM_FREE(rx_mbox_p, sizeof (p_rx_mbox_t) * ndmas);
2502 
2503 	HXGE_DEBUG_MSG((hxgep, MEM2_CTL, "<== hxge_unmap_rxdma"));
2504 }
2505 
2506 hxge_status_t
2507 hxge_map_rxdma_channel(p_hxge_t hxgep, uint16_t channel,
2508     p_hxge_dma_common_t *dma_buf_p, p_rx_rbr_ring_t *rbr_p,
2509     uint32_t num_chunks, p_hxge_dma_common_t *dma_rbr_cntl_p,
2510     p_hxge_dma_common_t *dma_rcr_cntl_p, p_hxge_dma_common_t *dma_mbox_cntl_p,
2511     p_rx_rcr_ring_t *rcr_p, p_rx_mbox_t *rx_mbox_p)
2512 {
2513 	int status = HXGE_OK;
2514 
2515 	/*
2516 	 * Set up and prepare buffer blocks, descriptors and mailbox.
2517 	 */
2518 	HXGE_DEBUG_MSG((hxgep, MEM2_CTL,
2519 	    "==> hxge_map_rxdma_channel (channel %d)", channel));
2520 
2521 	/*
2522 	 * Receive buffer blocks
2523 	 */
2524 	status = hxge_map_rxdma_channel_buf_ring(hxgep, channel,
2525 	    dma_buf_p, rbr_p, num_chunks);
2526 	if (status != HXGE_OK) {
2527 		HXGE_ERROR_MSG((hxgep, HXGE_ERR_CTL,
2528 		    "==> hxge_map_rxdma_channel (channel %d): "
2529 		    "map buffer failed 0x%x", channel, status));
2530 		goto hxge_map_rxdma_channel_exit;
2531 	}
2532 
2533 	/*
2534 	 * Receive block ring, completion ring and mailbox.
2535 	 */
2536 	status = hxge_map_rxdma_channel_cfg_ring(hxgep, channel,
2537 	    dma_rbr_cntl_p, dma_rcr_cntl_p, dma_mbox_cntl_p,
2538 	    rbr_p, rcr_p, rx_mbox_p);
2539 	if (status != HXGE_OK) {
2540 		HXGE_ERROR_MSG((hxgep, HXGE_ERR_CTL,
2541 		    "==> hxge_map_rxdma_channel (channel %d): "
2542 		    "map config failed 0x%x", channel, status));
2543 		goto hxge_map_rxdma_channel_fail2;
2544 	}
2545 	goto hxge_map_rxdma_channel_exit;
2546 
2547 hxge_map_rxdma_channel_fail3:
2548 	/* Free rbr, rcr */
2549 	HXGE_ERROR_MSG((hxgep, HXGE_ERR_CTL,
2550 	    "==> hxge_map_rxdma_channel: free rbr/rcr (status 0x%x channel %d)",
2551 	    status, channel));
2552 	hxge_unmap_rxdma_channel_cfg_ring(hxgep, *rcr_p, *rx_mbox_p);
2553 
2554 hxge_map_rxdma_channel_fail2:
2555 	/* Free buffer blocks */
2556 	HXGE_ERROR_MSG((hxgep, HXGE_ERR_CTL,
2557 	    "==> hxge_map_rxdma_channel: free rx buffers"
2558 	    "(hxgep 0x%x status 0x%x channel %d)",
2559 	    hxgep, status, channel));
2560 	hxge_unmap_rxdma_channel_buf_ring(hxgep, *rbr_p);
2561 
2562 	status = HXGE_ERROR;
2563 
2564 hxge_map_rxdma_channel_exit:
2565 	HXGE_DEBUG_MSG((hxgep, MEM2_CTL,
2566 	    "<== hxge_map_rxdma_channel: (hxgep 0x%x status 0x%x channel %d)",
2567 	    hxgep, status, channel));
2568 
2569 	return (status);
2570 }
2571 
2572 /*ARGSUSED*/
2573 static void
2574 hxge_unmap_rxdma_channel(p_hxge_t hxgep, uint16_t channel,
2575     p_rx_rbr_ring_t rbr_p, p_rx_rcr_ring_t rcr_p, p_rx_mbox_t rx_mbox_p)
2576 {
2577 	HXGE_DEBUG_MSG((hxgep, MEM2_CTL,
2578 	    "==> hxge_unmap_rxdma_channel (channel %d)", channel));
2579 
2580 	/*
2581 	 * unmap receive block ring, completion ring and mailbox.
2582 	 */
2583 	(void) hxge_unmap_rxdma_channel_cfg_ring(hxgep, rcr_p, rx_mbox_p);
2584 
2585 	/* unmap buffer blocks */
2586 	(void) hxge_unmap_rxdma_channel_buf_ring(hxgep, rbr_p);
2587 
2588 	HXGE_DEBUG_MSG((hxgep, MEM2_CTL, "<== hxge_unmap_rxdma_channel"));
2589 }
2590 
2591 /*ARGSUSED*/
2592 static hxge_status_t
2593 hxge_map_rxdma_channel_cfg_ring(p_hxge_t hxgep, uint16_t dma_channel,
2594     p_hxge_dma_common_t *dma_rbr_cntl_p, p_hxge_dma_common_t *dma_rcr_cntl_p,
2595     p_hxge_dma_common_t *dma_mbox_cntl_p, p_rx_rbr_ring_t *rbr_p,
2596     p_rx_rcr_ring_t *rcr_p, p_rx_mbox_t *rx_mbox_p)
2597 {
2598 	p_rx_rbr_ring_t 	rbrp;
2599 	p_rx_rcr_ring_t 	rcrp;
2600 	p_rx_mbox_t 		mboxp;
2601 	p_hxge_dma_common_t 	cntl_dmap;
2602 	p_hxge_dma_common_t 	dmap;
2603 	p_rx_msg_t 		*rx_msg_ring;
2604 	p_rx_msg_t 		rx_msg_p;
2605 	rdc_rbr_cfg_a_t		*rcfga_p;
2606 	rdc_rbr_cfg_b_t		*rcfgb_p;
2607 	rdc_rcr_cfg_a_t		*cfga_p;
2608 	rdc_rcr_cfg_b_t		*cfgb_p;
2609 	rdc_rx_cfg1_t		*cfig1_p;
2610 	rdc_rx_cfg2_t		*cfig2_p;
2611 	rdc_rbr_kick_t		*kick_p;
2612 	uint32_t		dmaaddrp;
2613 	uint32_t		*rbr_vaddrp;
2614 	uint32_t		bkaddr;
2615 	hxge_status_t		status = HXGE_OK;
2616 	int			i;
2617 	uint32_t 		hxge_port_rcr_size;
2618 
2619 	HXGE_DEBUG_MSG((hxgep, MEM2_CTL,
2620 	    "==> hxge_map_rxdma_channel_cfg_ring"));
2621 
2622 	cntl_dmap = *dma_rbr_cntl_p;
2623 
2624 	/*
2625 	 * Map in the receive block ring
2626 	 */
2627 	rbrp = *rbr_p;
2628 	dmap = (p_hxge_dma_common_t)&rbrp->rbr_desc;
2629 	hxge_setup_dma_common(dmap, cntl_dmap, rbrp->rbb_max, 4);
2630 
2631 	/*
2632 	 * Zero out buffer block ring descriptors.
2633 	 */
2634 	bzero((caddr_t)dmap->kaddrp, dmap->alength);
2635 
2636 	rcfga_p = &(rbrp->rbr_cfga);
2637 	rcfgb_p = &(rbrp->rbr_cfgb);
2638 	kick_p = &(rbrp->rbr_kick);
2639 	rcfga_p->value = 0;
2640 	rcfgb_p->value = 0;
2641 	kick_p->value = 0;
2642 	rbrp->rbr_addr = dmap->dma_cookie.dmac_laddress;
2643 	rcfga_p->value = (rbrp->rbr_addr &
2644 	    (RBR_CFIG_A_STDADDR_MASK | RBR_CFIG_A_STDADDR_BASE_MASK));
2645 	rcfga_p->value |= ((uint64_t)rbrp->rbb_max << RBR_CFIG_A_LEN_SHIFT);
2646 
2647 	/* XXXX: how to choose packet buffer sizes */
2648 	rcfgb_p->bits.bufsz0 = rbrp->pkt_buf_size0;
2649 	rcfgb_p->bits.vld0 = 1;
2650 	rcfgb_p->bits.bufsz1 = rbrp->pkt_buf_size1;
2651 	rcfgb_p->bits.vld1 = 1;
2652 	rcfgb_p->bits.bufsz2 = rbrp->pkt_buf_size2;
2653 	rcfgb_p->bits.vld2 = 1;
2654 	rcfgb_p->bits.bksize = hxgep->rx_bksize_code;
2655 
2656 	/*
2657 	 * For each buffer block, enter receive block address to the ring.
2658 	 */
2659 	rbr_vaddrp = (uint32_t *)dmap->kaddrp;
2660 	rbrp->rbr_desc_vp = (uint32_t *)dmap->kaddrp;
2661 	HXGE_DEBUG_MSG((hxgep, MEM2_CTL,
2662 	    "==> hxge_map_rxdma_channel_cfg_ring: channel %d "
2663 	    "rbr_vaddrp $%p", dma_channel, rbr_vaddrp));
2664 
2665 	rx_msg_ring = rbrp->rx_msg_ring;
2666 	for (i = 0; i < rbrp->tnblocks; i++) {
2667 		rx_msg_p = rx_msg_ring[i];
2668 		rx_msg_p->hxgep = hxgep;
2669 		rx_msg_p->rx_rbr_p = rbrp;
2670 		bkaddr = (uint32_t)
2671 		    ((rx_msg_p->buf_dma.dma_cookie.dmac_laddress >>
2672 		    RBR_BKADDR_SHIFT));
2673 		rx_msg_p->free = B_FALSE;
2674 		rx_msg_p->max_usage_cnt = 0xbaddcafe;
2675 
2676 		*rbr_vaddrp++ = bkaddr;
2677 	}
2678 
2679 	kick_p->bits.bkadd = rbrp->rbb_max;
2680 	rbrp->rbr_wr_index = (rbrp->rbb_max - 1);
2681 
2682 	rbrp->rbr_rd_index = 0;
2683 
2684 	rbrp->rbr_consumed = 0;
2685 	rbrp->rbr_used = 0;
2686 	rbrp->rbr_use_bcopy = B_TRUE;
2687 	rbrp->rbr_bufsize_type = RCR_PKTBUFSZ_0;
2688 
2689 	/*
2690 	 * Do bcopy on packets greater than bcopy size once the lo threshold is
2691 	 * reached. This lo threshold should be less than the hi threshold.
2692 	 *
2693 	 * Do bcopy on every packet once the hi threshold is reached.
2694 	 */
2695 	if (hxge_rx_threshold_lo >= hxge_rx_threshold_hi) {
2696 		/* default it to use hi */
2697 		hxge_rx_threshold_lo = hxge_rx_threshold_hi;
2698 	}
2699 	if (hxge_rx_buf_size_type > HXGE_RBR_TYPE2) {
2700 		hxge_rx_buf_size_type = HXGE_RBR_TYPE2;
2701 	}
2702 	rbrp->rbr_bufsize_type = hxge_rx_buf_size_type;
2703 
2704 	switch (hxge_rx_threshold_hi) {
2705 	default:
2706 	case HXGE_RX_COPY_NONE:
2707 		/* Do not do bcopy at all */
2708 		rbrp->rbr_use_bcopy = B_FALSE;
2709 		rbrp->rbr_threshold_hi = rbrp->rbb_max;
2710 		break;
2711 
2712 	case HXGE_RX_COPY_1:
2713 	case HXGE_RX_COPY_2:
2714 	case HXGE_RX_COPY_3:
2715 	case HXGE_RX_COPY_4:
2716 	case HXGE_RX_COPY_5:
2717 	case HXGE_RX_COPY_6:
2718 	case HXGE_RX_COPY_7:
2719 		rbrp->rbr_threshold_hi =
2720 		    rbrp->rbb_max * (hxge_rx_threshold_hi) /
2721 		    HXGE_RX_BCOPY_SCALE;
2722 		break;
2723 
2724 	case HXGE_RX_COPY_ALL:
2725 		rbrp->rbr_threshold_hi = 0;
2726 		break;
2727 	}
2728 
2729 	switch (hxge_rx_threshold_lo) {
2730 	default:
2731 	case HXGE_RX_COPY_NONE:
2732 		/* Do not do bcopy at all */
2733 		if (rbrp->rbr_use_bcopy) {
2734 			rbrp->rbr_use_bcopy = B_FALSE;
2735 		}
2736 		rbrp->rbr_threshold_lo = rbrp->rbb_max;
2737 		break;
2738 
2739 	case HXGE_RX_COPY_1:
2740 	case HXGE_RX_COPY_2:
2741 	case HXGE_RX_COPY_3:
2742 	case HXGE_RX_COPY_4:
2743 	case HXGE_RX_COPY_5:
2744 	case HXGE_RX_COPY_6:
2745 	case HXGE_RX_COPY_7:
2746 		rbrp->rbr_threshold_lo =
2747 		    rbrp->rbb_max * (hxge_rx_threshold_lo) /
2748 		    HXGE_RX_BCOPY_SCALE;
2749 		break;
2750 
2751 	case HXGE_RX_COPY_ALL:
2752 		rbrp->rbr_threshold_lo = 0;
2753 		break;
2754 	}
2755 
2756 	HXGE_DEBUG_MSG((hxgep, RX_CTL,
2757 	    "hxge_map_rxdma_channel_cfg_ring: channel %d rbb_max %d "
2758 	    "rbrp->rbr_bufsize_type %d rbb_threshold_hi %d "
2759 	    "rbb_threshold_lo %d",
2760 	    dma_channel, rbrp->rbb_max, rbrp->rbr_bufsize_type,
2761 	    rbrp->rbr_threshold_hi, rbrp->rbr_threshold_lo));
2762 
2763 	/* Map in the receive completion ring */
2764 	rcrp = (p_rx_rcr_ring_t)KMEM_ZALLOC(sizeof (rx_rcr_ring_t), KM_SLEEP);
2765 	rcrp->rdc = dma_channel;
2766 	rcrp->hxgep = hxgep;
2767 
2768 	hxge_port_rcr_size = hxgep->hxge_port_rcr_size;
2769 	rcrp->comp_size = hxge_port_rcr_size;
2770 	rcrp->comp_wrap_mask = hxge_port_rcr_size - 1;
2771 
2772 	cntl_dmap = *dma_rcr_cntl_p;
2773 
2774 	dmap = (p_hxge_dma_common_t)&rcrp->rcr_desc;
2775 	hxge_setup_dma_common(dmap, cntl_dmap, rcrp->comp_size,
2776 	    sizeof (rcr_entry_t));
2777 	rcrp->comp_rd_index = 0;
2778 	rcrp->comp_wt_index = 0;
2779 	rcrp->rcr_desc_rd_head_p = rcrp->rcr_desc_first_p =
2780 	    (p_rcr_entry_t)DMA_COMMON_VPTR(rcrp->rcr_desc);
2781 #if defined(__i386)
2782 	rcrp->rcr_desc_rd_head_pp = rcrp->rcr_desc_first_pp =
2783 	    (p_rcr_entry_t)(uint32_t)DMA_COMMON_IOADDR(rcrp->rcr_desc);
2784 #else
2785 	rcrp->rcr_desc_rd_head_pp = rcrp->rcr_desc_first_pp =
2786 	    (p_rcr_entry_t)DMA_COMMON_IOADDR(rcrp->rcr_desc);
2787 #endif
2788 	rcrp->rcr_desc_last_p = rcrp->rcr_desc_rd_head_p +
2789 	    (hxge_port_rcr_size - 1);
2790 	rcrp->rcr_desc_last_pp = rcrp->rcr_desc_rd_head_pp +
2791 	    (hxge_port_rcr_size - 1);
2792 
2793 	rcrp->rcr_tail_begin = DMA_COMMON_IOADDR(rcrp->rcr_desc);
2794 	rcrp->rcr_tail_begin = (rcrp->rcr_tail_begin & 0x7ffffULL) >> 3;
2795 
2796 	HXGE_DEBUG_MSG((hxgep, MEM2_CTL,
2797 	    "==> hxge_map_rxdma_channel_cfg_ring: channel %d "
2798 	    "rbr_vaddrp $%p rcr_desc_rd_head_p $%p "
2799 	    "rcr_desc_rd_head_pp $%p rcr_desc_rd_last_p $%p "
2800 	    "rcr_desc_rd_last_pp $%p ",
2801 	    dma_channel, rbr_vaddrp, rcrp->rcr_desc_rd_head_p,
2802 	    rcrp->rcr_desc_rd_head_pp, rcrp->rcr_desc_last_p,
2803 	    rcrp->rcr_desc_last_pp));
2804 
2805 	/*
2806 	 * Zero out buffer block ring descriptors.
2807 	 */
2808 	bzero((caddr_t)dmap->kaddrp, dmap->alength);
2809 	rcrp->intr_timeout = hxgep->intr_timeout;
2810 	rcrp->intr_threshold = hxgep->intr_threshold;
2811 	rcrp->full_hdr_flag = B_FALSE;
2812 	rcrp->sw_priv_hdr_len = 0;
2813 
2814 	cfga_p = &(rcrp->rcr_cfga);
2815 	cfgb_p = &(rcrp->rcr_cfgb);
2816 	cfga_p->value = 0;
2817 	cfgb_p->value = 0;
2818 	rcrp->rcr_addr = dmap->dma_cookie.dmac_laddress;
2819 
2820 	cfga_p->value = (rcrp->rcr_addr &
2821 	    (RCRCFIG_A_STADDR_MASK | RCRCFIG_A_STADDR_BASE_MASK));
2822 
2823 	cfga_p->value |= ((uint64_t)rcrp->comp_size << RCRCFIG_A_LEN_SHIF);
2824 
2825 	/*
2826 	 * Timeout should be set based on the system clock divider. The
2827 	 * following timeout value of 1 assumes that the granularity (1000) is
2828 	 * 3 microseconds running at 300MHz.
2829 	 */
2830 	cfgb_p->bits.pthres = rcrp->intr_threshold;
2831 	cfgb_p->bits.timeout = rcrp->intr_timeout;
2832 	cfgb_p->bits.entout = 1;
2833 
2834 	/* Map in the mailbox */
2835 	cntl_dmap = *dma_mbox_cntl_p;
2836 	mboxp = (p_rx_mbox_t)KMEM_ZALLOC(sizeof (rx_mbox_t), KM_SLEEP);
2837 	dmap = (p_hxge_dma_common_t)&mboxp->rx_mbox;
2838 	hxge_setup_dma_common(dmap, cntl_dmap, 1, sizeof (rxdma_mailbox_t));
2839 	cfig1_p = (rdc_rx_cfg1_t *)&mboxp->rx_cfg1;
2840 	cfig2_p = (rdc_rx_cfg2_t *)&mboxp->rx_cfg2;
2841 	cfig1_p->value = cfig2_p->value = 0;
2842 
2843 	mboxp->mbox_addr = dmap->dma_cookie.dmac_laddress;
2844 	HXGE_DEBUG_MSG((hxgep, MEM2_CTL,
2845 	    "==> hxge_map_rxdma_channel_cfg_ring: "
2846 	    "channel %d cfg1 0x%016llx cfig2 0x%016llx cookie 0x%016llx",
2847 	    dma_channel, cfig1_p->value, cfig2_p->value,
2848 	    mboxp->mbox_addr));
2849 
2850 	dmaaddrp = (uint32_t)((dmap->dma_cookie.dmac_laddress >> 32) & 0xfff);
2851 	cfig1_p->bits.mbaddr_h = dmaaddrp;
2852 
2853 	dmaaddrp = (uint32_t)(dmap->dma_cookie.dmac_laddress & 0xffffffff);
2854 	dmaaddrp = (uint32_t)(dmap->dma_cookie.dmac_laddress &
2855 	    RXDMA_CFIG2_MBADDR_L_MASK);
2856 
2857 	cfig2_p->bits.mbaddr_l = (dmaaddrp >> RXDMA_CFIG2_MBADDR_L_SHIFT);
2858 
2859 	HXGE_DEBUG_MSG((hxgep, MEM2_CTL,
2860 	    "==> hxge_map_rxdma_channel_cfg_ring: channel %d damaddrp $%p "
2861 	    "cfg1 0x%016llx cfig2 0x%016llx",
2862 	    dma_channel, dmaaddrp, cfig1_p->value, cfig2_p->value));
2863 
2864 	cfig2_p->bits.full_hdr = rcrp->full_hdr_flag;
2865 	cfig2_p->bits.offset = rcrp->sw_priv_hdr_len;
2866 
2867 	rbrp->rx_rcr_p = rcrp;
2868 	rcrp->rx_rbr_p = rbrp;
2869 	*rcr_p = rcrp;
2870 	*rx_mbox_p = mboxp;
2871 
2872 	HXGE_DEBUG_MSG((hxgep, MEM2_CTL,
2873 	    "<== hxge_map_rxdma_channel_cfg_ring status 0x%08x", status));
2874 	return (status);
2875 }
2876 
2877 /*ARGSUSED*/
2878 static void
2879 hxge_unmap_rxdma_channel_cfg_ring(p_hxge_t hxgep,
2880     p_rx_rcr_ring_t rcr_p, p_rx_mbox_t rx_mbox_p)
2881 {
2882 	HXGE_DEBUG_MSG((hxgep, MEM2_CTL,
2883 	    "==> hxge_unmap_rxdma_channel_cfg_ring: channel %d", rcr_p->rdc));
2884 
2885 	KMEM_FREE(rcr_p, sizeof (rx_rcr_ring_t));
2886 	KMEM_FREE(rx_mbox_p, sizeof (rx_mbox_t));
2887 
2888 	HXGE_DEBUG_MSG((hxgep, MEM2_CTL,
2889 	    "<== hxge_unmap_rxdma_channel_cfg_ring"));
2890 }
2891 
2892 static hxge_status_t
2893 hxge_map_rxdma_channel_buf_ring(p_hxge_t hxgep, uint16_t channel,
2894     p_hxge_dma_common_t *dma_buf_p,
2895     p_rx_rbr_ring_t *rbr_p, uint32_t num_chunks)
2896 {
2897 	p_rx_rbr_ring_t		rbrp;
2898 	p_hxge_dma_common_t	dma_bufp, tmp_bufp;
2899 	p_rx_msg_t		*rx_msg_ring;
2900 	p_rx_msg_t		rx_msg_p;
2901 	p_mblk_t		mblk_p;
2902 
2903 	rxring_info_t *ring_info;
2904 	hxge_status_t status = HXGE_OK;
2905 	int i, j, index;
2906 	uint32_t size, bsize, nblocks, nmsgs;
2907 
2908 	HXGE_DEBUG_MSG((hxgep, MEM2_CTL,
2909 	    "==> hxge_map_rxdma_channel_buf_ring: channel %d", channel));
2910 
2911 	dma_bufp = tmp_bufp = *dma_buf_p;
2912 	HXGE_DEBUG_MSG((hxgep, MEM2_CTL,
2913 	    " hxge_map_rxdma_channel_buf_ring: channel %d to map %d "
2914 	    "chunks bufp 0x%016llx", channel, num_chunks, dma_bufp));
2915 
2916 	nmsgs = 0;
2917 	for (i = 0; i < num_chunks; i++, tmp_bufp++) {
2918 		HXGE_DEBUG_MSG((hxgep, MEM2_CTL,
2919 		    "==> hxge_map_rxdma_channel_buf_ring: channel %d "
2920 		    "bufp 0x%016llx nblocks %d nmsgs %d",
2921 		    channel, tmp_bufp, tmp_bufp->nblocks, nmsgs));
2922 		nmsgs += tmp_bufp->nblocks;
2923 	}
2924 	if (!nmsgs) {
2925 		HXGE_ERROR_MSG((hxgep, HXGE_ERR_CTL,
2926 		    "<== hxge_map_rxdma_channel_buf_ring: channel %d "
2927 		    "no msg blocks", channel));
2928 		status = HXGE_ERROR;
2929 		goto hxge_map_rxdma_channel_buf_ring_exit;
2930 	}
2931 	rbrp = (p_rx_rbr_ring_t)KMEM_ZALLOC(sizeof (rx_rbr_ring_t), KM_SLEEP);
2932 
2933 	size = nmsgs * sizeof (p_rx_msg_t);
2934 	rx_msg_ring = KMEM_ZALLOC(size, KM_SLEEP);
2935 	ring_info = (rxring_info_t *)KMEM_ZALLOC(sizeof (rxring_info_t),
2936 	    KM_SLEEP);
2937 
2938 	MUTEX_INIT(&rbrp->lock, NULL, MUTEX_DRIVER,
2939 	    (void *) hxgep->interrupt_cookie);
2940 	MUTEX_INIT(&rbrp->post_lock, NULL, MUTEX_DRIVER,
2941 	    (void *) hxgep->interrupt_cookie);
2942 
2943 	rbrp->rdc = channel;
2944 	rbrp->num_blocks = num_chunks;
2945 	rbrp->tnblocks = nmsgs;
2946 	rbrp->rbb_max = nmsgs;
2947 	rbrp->rbr_max_size = nmsgs;
2948 	rbrp->rbr_wrap_mask = (rbrp->rbb_max - 1);
2949 
2950 	/*
2951 	 * Buffer sizes suggested by NIU architect. 256, 512 and 2K.
2952 	 */
2953 
2954 	switch (hxgep->rx_bksize_code) {
2955 	case RBR_BKSIZE_4K:
2956 		rbrp->pkt_buf_size0 = RBR_BUFSZ0_256B;
2957 		rbrp->pkt_buf_size0_bytes = RBR_BUFSZ0_256_BYTES;
2958 		rbrp->hpi_pkt_buf_size0 = SIZE_256B;
2959 		break;
2960 	case RBR_BKSIZE_8K:
2961 		/* Use 512 to avoid possible rcr_full condition */
2962 		rbrp->pkt_buf_size0 = RBR_BUFSZ0_512B;
2963 		rbrp->pkt_buf_size0_bytes = RBR_BUFSZ0_512_BYTES;
2964 		rbrp->hpi_pkt_buf_size0 = SIZE_512B;
2965 		break;
2966 	}
2967 
2968 	rbrp->pkt_buf_size1 = RBR_BUFSZ1_1K;
2969 	rbrp->pkt_buf_size1_bytes = RBR_BUFSZ1_1K_BYTES;
2970 	rbrp->hpi_pkt_buf_size1 = SIZE_1KB;
2971 
2972 	rbrp->block_size = hxgep->rx_default_block_size;
2973 
2974 	if (!hxgep->param_arr[param_accept_jumbo].value) {
2975 		rbrp->pkt_buf_size2 = RBR_BUFSZ2_2K;
2976 		rbrp->pkt_buf_size2_bytes = RBR_BUFSZ2_2K_BYTES;
2977 		rbrp->hpi_pkt_buf_size2 = SIZE_2KB;
2978 	} else {
2979 		rbrp->hpi_pkt_buf_size2 = SIZE_4KB;
2980 		rbrp->pkt_buf_size2 = RBR_BUFSZ2_4K;
2981 		rbrp->pkt_buf_size2_bytes = RBR_BUFSZ2_4K_BYTES;
2982 	}
2983 
2984 	HXGE_DEBUG_MSG((hxgep, MEM2_CTL,
2985 	    "==> hxge_map_rxdma_channel_buf_ring: channel %d "
2986 	    "actual rbr max %d rbb_max %d nmsgs %d "
2987 	    "rbrp->block_size %d default_block_size %d "
2988 	    "(config hxge_rbr_size %d hxge_rbr_spare_size %d)",
2989 	    channel, rbrp->rbr_max_size, rbrp->rbb_max, nmsgs,
2990 	    rbrp->block_size, hxgep->rx_default_block_size,
2991 	    hxge_rbr_size, hxge_rbr_spare_size));
2992 
2993 	/*
2994 	 * Map in buffers from the buffer pool.
2995 	 * Note that num_blocks is the num_chunks. For Sparc, there is likely
2996 	 * only one chunk. For x86, there will be many chunks.
2997 	 * Loop over chunks.
2998 	 */
2999 	index = 0;
3000 	for (i = 0; i < rbrp->num_blocks; i++, dma_bufp++) {
3001 		bsize = dma_bufp->block_size;
3002 		nblocks = dma_bufp->nblocks;
3003 #if defined(__i386)
3004 		ring_info->buffer[i].dvma_addr = (uint32_t)dma_bufp->ioaddr_pp;
3005 #else
3006 		ring_info->buffer[i].dvma_addr = (uint64_t)dma_bufp->ioaddr_pp;
3007 #endif
3008 		ring_info->buffer[i].buf_index = i;
3009 		ring_info->buffer[i].buf_size = dma_bufp->alength;
3010 		ring_info->buffer[i].start_index = index;
3011 #if defined(__i386)
3012 		ring_info->buffer[i].kaddr = (uint32_t)dma_bufp->kaddrp;
3013 #else
3014 		ring_info->buffer[i].kaddr = (uint64_t)dma_bufp->kaddrp;
3015 #endif
3016 
3017 		HXGE_DEBUG_MSG((hxgep, MEM2_CTL,
3018 		    " hxge_map_rxdma_channel_buf_ring: map channel %d "
3019 		    "chunk %d nblocks %d chunk_size %x block_size 0x%x "
3020 		    "dma_bufp $%p dvma_addr $%p", channel, i,
3021 		    dma_bufp->nblocks,
3022 		    ring_info->buffer[i].buf_size, bsize, dma_bufp,
3023 		    ring_info->buffer[i].dvma_addr));
3024 
3025 		/* loop over blocks within a chunk */
3026 		for (j = 0; j < nblocks; j++) {
3027 			if ((rx_msg_p = hxge_allocb(bsize, BPRI_LO,
3028 			    dma_bufp)) == NULL) {
3029 				HXGE_ERROR_MSG((hxgep, HXGE_ERR_CTL,
3030 				    "allocb failed (index %d i %d j %d)",
3031 				    index, i, j));
3032 				goto hxge_map_rxdma_channel_buf_ring_fail1;
3033 			}
3034 			rx_msg_ring[index] = rx_msg_p;
3035 			rx_msg_p->block_index = index;
3036 			rx_msg_p->shifted_addr = (uint32_t)
3037 			    ((rx_msg_p->buf_dma.dma_cookie.dmac_laddress >>
3038 			    RBR_BKADDR_SHIFT));
3039 			/*
3040 			 * Too much output
3041 			 * HXGE_DEBUG_MSG((hxgep, MEM2_CTL,
3042 			 *	"index %d j %d rx_msg_p $%p mblk %p",
3043 			 *	index, j, rx_msg_p, rx_msg_p->rx_mblk_p));
3044 			 */
3045 			mblk_p = rx_msg_p->rx_mblk_p;
3046 			mblk_p->b_wptr = mblk_p->b_rptr + bsize;
3047 
3048 			rbrp->rbr_ref_cnt++;
3049 			index++;
3050 			rx_msg_p->buf_dma.dma_channel = channel;
3051 		}
3052 	}
3053 	if (i < rbrp->num_blocks) {
3054 		goto hxge_map_rxdma_channel_buf_ring_fail1;
3055 	}
3056 	HXGE_DEBUG_MSG((hxgep, MEM2_CTL,
3057 	    "hxge_map_rxdma_channel_buf_ring: done buf init "
3058 	    "channel %d msg block entries %d", channel, index));
3059 	ring_info->block_size_mask = bsize - 1;
3060 	rbrp->rx_msg_ring = rx_msg_ring;
3061 	rbrp->dma_bufp = dma_buf_p;
3062 	rbrp->ring_info = ring_info;
3063 
3064 	status = hxge_rxbuf_index_info_init(hxgep, rbrp);
3065 	HXGE_DEBUG_MSG((hxgep, MEM2_CTL, " hxge_map_rxdma_channel_buf_ring: "
3066 	    "channel %d done buf info init", channel));
3067 
3068 	/*
3069 	 * Finally, permit hxge_freeb() to call hxge_post_page().
3070 	 */
3071 	rbrp->rbr_state = RBR_POSTING;
3072 
3073 	*rbr_p = rbrp;
3074 
3075 	goto hxge_map_rxdma_channel_buf_ring_exit;
3076 
3077 hxge_map_rxdma_channel_buf_ring_fail1:
3078 	HXGE_DEBUG_MSG((hxgep, MEM2_CTL,
3079 	    " hxge_map_rxdma_channel_buf_ring: failed channel (0x%x)",
3080 	    channel, status));
3081 
3082 	index--;
3083 	for (; index >= 0; index--) {
3084 		rx_msg_p = rx_msg_ring[index];
3085 		if (rx_msg_p != NULL) {
3086 			freeb(rx_msg_p->rx_mblk_p);
3087 			rx_msg_ring[index] = NULL;
3088 		}
3089 	}
3090 
3091 hxge_map_rxdma_channel_buf_ring_fail:
3092 	MUTEX_DESTROY(&rbrp->post_lock);
3093 	MUTEX_DESTROY(&rbrp->lock);
3094 	KMEM_FREE(ring_info, sizeof (rxring_info_t));
3095 	KMEM_FREE(rx_msg_ring, size);
3096 	KMEM_FREE(rbrp, sizeof (rx_rbr_ring_t));
3097 
3098 	status = HXGE_ERROR;
3099 
3100 hxge_map_rxdma_channel_buf_ring_exit:
3101 	HXGE_DEBUG_MSG((hxgep, MEM2_CTL,
3102 	    "<== hxge_map_rxdma_channel_buf_ring status 0x%08x", status));
3103 
3104 	return (status);
3105 }
3106 
3107 /*ARGSUSED*/
3108 static void
3109 hxge_unmap_rxdma_channel_buf_ring(p_hxge_t hxgep,
3110     p_rx_rbr_ring_t rbr_p)
3111 {
3112 	p_rx_msg_t	*rx_msg_ring;
3113 	p_rx_msg_t	rx_msg_p;
3114 	rxring_info_t	*ring_info;
3115 	int		i;
3116 	uint32_t	size;
3117 
3118 	HXGE_DEBUG_MSG((hxgep, MEM2_CTL,
3119 	    "==> hxge_unmap_rxdma_channel_buf_ring"));
3120 	if (rbr_p == NULL) {
3121 		HXGE_DEBUG_MSG((hxgep, RX_CTL,
3122 		    "<== hxge_unmap_rxdma_channel_buf_ring: NULL rbrp"));
3123 		return;
3124 	}
3125 	HXGE_DEBUG_MSG((hxgep, MEM2_CTL,
3126 	    "==> hxge_unmap_rxdma_channel_buf_ring: channel %d", rbr_p->rdc));
3127 
3128 	rx_msg_ring = rbr_p->rx_msg_ring;
3129 	ring_info = rbr_p->ring_info;
3130 
3131 	if (rx_msg_ring == NULL || ring_info == NULL) {
3132 		HXGE_DEBUG_MSG((hxgep, MEM2_CTL,
3133 		    "<== hxge_unmap_rxdma_channel_buf_ring: "
3134 		    "rx_msg_ring $%p ring_info $%p", rx_msg_p, ring_info));
3135 		return;
3136 	}
3137 
3138 	size = rbr_p->tnblocks * sizeof (p_rx_msg_t);
3139 	HXGE_DEBUG_MSG((hxgep, MEM2_CTL,
3140 	    " hxge_unmap_rxdma_channel_buf_ring: channel %d chunks %d "
3141 	    "tnblocks %d (max %d) size ptrs %d ", rbr_p->rdc, rbr_p->num_blocks,
3142 	    rbr_p->tnblocks, rbr_p->rbr_max_size, size));
3143 
3144 	for (i = 0; i < rbr_p->tnblocks; i++) {
3145 		rx_msg_p = rx_msg_ring[i];
3146 		HXGE_DEBUG_MSG((hxgep, MEM2_CTL,
3147 		    " hxge_unmap_rxdma_channel_buf_ring: "
3148 		    "rx_msg_p $%p", rx_msg_p));
3149 		if (rx_msg_p != NULL) {
3150 			freeb(rx_msg_p->rx_mblk_p);
3151 			rx_msg_ring[i] = NULL;
3152 		}
3153 	}
3154 
3155 	/*
3156 	 * We no longer may use the mutex <post_lock>. By setting
3157 	 * <rbr_state> to anything but POSTING, we prevent
3158 	 * hxge_post_page() from accessing a dead mutex.
3159 	 */
3160 	rbr_p->rbr_state = RBR_UNMAPPING;
3161 	MUTEX_DESTROY(&rbr_p->post_lock);
3162 
3163 	MUTEX_DESTROY(&rbr_p->lock);
3164 	KMEM_FREE(ring_info, sizeof (rxring_info_t));
3165 	KMEM_FREE(rx_msg_ring, size);
3166 
3167 	if (rbr_p->rbr_ref_cnt == 0) {
3168 		/* This is the normal state of affairs. */
3169 		KMEM_FREE(rbr_p, sizeof (*rbr_p));
3170 	} else {
3171 		/*
3172 		 * Some of our buffers are still being used.
3173 		 * Therefore, tell hxge_freeb() this ring is
3174 		 * unmapped, so it may free <rbr_p> for us.
3175 		 */
3176 		rbr_p->rbr_state = RBR_UNMAPPED;
3177 		HXGE_ERROR_MSG((hxgep, HXGE_ERR_CTL,
3178 		    "unmap_rxdma_buf_ring: %d %s outstanding.",
3179 		    rbr_p->rbr_ref_cnt,
3180 		    rbr_p->rbr_ref_cnt == 1 ? "msg" : "msgs"));
3181 	}
3182 
3183 	HXGE_DEBUG_MSG((hxgep, MEM2_CTL,
3184 	    "<== hxge_unmap_rxdma_channel_buf_ring"));
3185 }
3186 
3187 static hxge_status_t
3188 hxge_rxdma_hw_start_common(p_hxge_t hxgep)
3189 {
3190 	hxge_status_t status = HXGE_OK;
3191 
3192 	HXGE_DEBUG_MSG((hxgep, MEM2_CTL, "==> hxge_rxdma_hw_start_common"));
3193 
3194 	/*
3195 	 * Load the sharable parameters by writing to the function zero control
3196 	 * registers. These FZC registers should be initialized only once for
3197 	 * the entire chip.
3198 	 */
3199 	(void) hxge_init_fzc_rx_common(hxgep);
3200 
3201 	HXGE_DEBUG_MSG((hxgep, MEM2_CTL, "==> hxge_rxdma_hw_start_common"));
3202 
3203 	return (status);
3204 }
3205 
3206 static hxge_status_t
3207 hxge_rxdma_hw_start(p_hxge_t hxgep)
3208 {
3209 	int			i, ndmas;
3210 	uint16_t		channel;
3211 	p_rx_rbr_rings_t	rx_rbr_rings;
3212 	p_rx_rbr_ring_t		*rbr_rings;
3213 	p_rx_rcr_rings_t	rx_rcr_rings;
3214 	p_rx_rcr_ring_t		*rcr_rings;
3215 	p_rx_mbox_areas_t	rx_mbox_areas_p;
3216 	p_rx_mbox_t		*rx_mbox_p;
3217 	hxge_status_t		status = HXGE_OK;
3218 
3219 	HXGE_DEBUG_MSG((hxgep, MEM2_CTL, "==> hxge_rxdma_hw_start"));
3220 
3221 	rx_rbr_rings = hxgep->rx_rbr_rings;
3222 	rx_rcr_rings = hxgep->rx_rcr_rings;
3223 	if (rx_rbr_rings == NULL || rx_rcr_rings == NULL) {
3224 		HXGE_DEBUG_MSG((hxgep, RX_CTL,
3225 		    "<== hxge_rxdma_hw_start: NULL ring pointers"));
3226 		return (HXGE_ERROR);
3227 	}
3228 
3229 	ndmas = rx_rbr_rings->ndmas;
3230 	if (ndmas == 0) {
3231 		HXGE_DEBUG_MSG((hxgep, RX_CTL,
3232 		    "<== hxge_rxdma_hw_start: no dma channel allocated"));
3233 		return (HXGE_ERROR);
3234 	}
3235 	HXGE_DEBUG_MSG((hxgep, MEM2_CTL,
3236 	    "==> hxge_rxdma_hw_start (ndmas %d)", ndmas));
3237 
3238 	/*
3239 	 * Scrub the RDC Rx DMA Prefetch Buffer Command.
3240 	 */
3241 	for (i = 0; i < 128; i++) {
3242 		HXGE_REG_WR64(hxgep->hpi_handle, RDC_PREF_CMD, i);
3243 	}
3244 
3245 	/*
3246 	 * Scrub Rx DMA Shadow Tail Command.
3247 	 */
3248 	for (i = 0; i < 64; i++) {
3249 		HXGE_REG_WR64(hxgep->hpi_handle, RDC_SHADOW_CMD, i);
3250 	}
3251 
3252 	/*
3253 	 * Scrub Rx DMA Control Fifo Command.
3254 	 */
3255 	for (i = 0; i < 512; i++) {
3256 		HXGE_REG_WR64(hxgep->hpi_handle, RDC_CTRL_FIFO_CMD, i);
3257 	}
3258 
3259 	/*
3260 	 * Scrub Rx DMA Data Fifo Command.
3261 	 */
3262 	for (i = 0; i < 1536; i++) {
3263 		HXGE_REG_WR64(hxgep->hpi_handle, RDC_DATA_FIFO_CMD, i);
3264 	}
3265 
3266 	/*
3267 	 * Reset the FIFO Error Stat.
3268 	 */
3269 	HXGE_REG_WR64(hxgep->hpi_handle, RDC_FIFO_ERR_STAT, 0xFF);
3270 
3271 	/* Set the error mask to receive interrupts */
3272 	HXGE_REG_WR64(hxgep->hpi_handle, RDC_FIFO_ERR_INT_MASK, 0x0);
3273 
3274 	rbr_rings = rx_rbr_rings->rbr_rings;
3275 	rcr_rings = rx_rcr_rings->rcr_rings;
3276 	rx_mbox_areas_p = hxgep->rx_mbox_areas_p;
3277 	if (rx_mbox_areas_p) {
3278 		rx_mbox_p = rx_mbox_areas_p->rxmbox_areas;
3279 	}
3280 
3281 	for (i = 0; i < ndmas; i++) {
3282 		channel = rbr_rings[i]->rdc;
3283 		HXGE_DEBUG_MSG((hxgep, MEM2_CTL,
3284 		    "==> hxge_rxdma_hw_start (ndmas %d) channel %d",
3285 		    ndmas, channel));
3286 		status = hxge_rxdma_start_channel(hxgep, channel,
3287 		    (p_rx_rbr_ring_t)rbr_rings[i],
3288 		    (p_rx_rcr_ring_t)rcr_rings[i],
3289 		    (p_rx_mbox_t)rx_mbox_p[i], rbr_rings[i]->rbb_max);
3290 		if (status != HXGE_OK) {
3291 			goto hxge_rxdma_hw_start_fail1;
3292 		}
3293 	}
3294 
3295 	HXGE_DEBUG_MSG((hxgep, MEM2_CTL, "==> hxge_rxdma_hw_start: "
3296 	    "rx_rbr_rings 0x%016llx rings 0x%016llx",
3297 	    rx_rbr_rings, rx_rcr_rings));
3298 	goto hxge_rxdma_hw_start_exit;
3299 
3300 hxge_rxdma_hw_start_fail1:
3301 	HXGE_ERROR_MSG((hxgep, HXGE_ERR_CTL,
3302 	    "==> hxge_rxdma_hw_start: disable "
3303 	    "(status 0x%x channel %d i %d)", status, channel, i));
3304 	for (; i >= 0; i--) {
3305 		channel = rbr_rings[i]->rdc;
3306 		(void) hxge_rxdma_stop_channel(hxgep, channel);
3307 	}
3308 
3309 hxge_rxdma_hw_start_exit:
3310 	HXGE_DEBUG_MSG((hxgep, MEM2_CTL,
3311 	    "==> hxge_rxdma_hw_start: (status 0x%x)", status));
3312 	return (status);
3313 }
3314 
3315 static void
3316 hxge_rxdma_hw_stop(p_hxge_t hxgep)
3317 {
3318 	int			i, ndmas;
3319 	uint16_t		channel;
3320 	p_rx_rbr_rings_t	rx_rbr_rings;
3321 	p_rx_rbr_ring_t		*rbr_rings;
3322 	p_rx_rcr_rings_t	rx_rcr_rings;
3323 
3324 	HXGE_DEBUG_MSG((hxgep, MEM2_CTL, "==> hxge_rxdma_hw_stop"));
3325 
3326 	rx_rbr_rings = hxgep->rx_rbr_rings;
3327 	rx_rcr_rings = hxgep->rx_rcr_rings;
3328 
3329 	if (rx_rbr_rings == NULL || rx_rcr_rings == NULL) {
3330 		HXGE_DEBUG_MSG((hxgep, RX_CTL,
3331 		    "<== hxge_rxdma_hw_stop: NULL ring pointers"));
3332 		return;
3333 	}
3334 
3335 	ndmas = rx_rbr_rings->ndmas;
3336 	if (!ndmas) {
3337 		HXGE_DEBUG_MSG((hxgep, RX_CTL,
3338 		    "<== hxge_rxdma_hw_stop: no dma channel allocated"));
3339 		return;
3340 	}
3341 
3342 	HXGE_DEBUG_MSG((hxgep, MEM2_CTL,
3343 	    "==> hxge_rxdma_hw_stop (ndmas %d)", ndmas));
3344 
3345 	rbr_rings = rx_rbr_rings->rbr_rings;
3346 	for (i = 0; i < ndmas; i++) {
3347 		channel = rbr_rings[i]->rdc;
3348 		HXGE_DEBUG_MSG((hxgep, MEM2_CTL,
3349 		    "==> hxge_rxdma_hw_stop (ndmas %d) channel %d",
3350 		    ndmas, channel));
3351 		(void) hxge_rxdma_stop_channel(hxgep, channel);
3352 	}
3353 
3354 	HXGE_DEBUG_MSG((hxgep, MEM2_CTL, "==> hxge_rxdma_hw_stop: "
3355 	    "rx_rbr_rings 0x%016llx rings 0x%016llx",
3356 	    rx_rbr_rings, rx_rcr_rings));
3357 
3358 	HXGE_DEBUG_MSG((hxgep, MEM2_CTL, "<== hxge_rxdma_hw_stop"));
3359 }
3360 
3361 static hxge_status_t
3362 hxge_rxdma_start_channel(p_hxge_t hxgep, uint16_t channel,
3363     p_rx_rbr_ring_t rbr_p, p_rx_rcr_ring_t rcr_p, p_rx_mbox_t mbox_p,
3364     int n_init_kick)
3365 {
3366 	hpi_handle_t		handle;
3367 	hpi_status_t		rs = HPI_SUCCESS;
3368 	rdc_stat_t		cs;
3369 	rdc_int_mask_t		ent_mask;
3370 	hxge_status_t		status = HXGE_OK;
3371 
3372 	HXGE_DEBUG_MSG((hxgep, MEM2_CTL, "==> hxge_rxdma_start_channel"));
3373 
3374 	handle = HXGE_DEV_HPI_HANDLE(hxgep);
3375 
3376 	HXGE_DEBUG_MSG((hxgep, MEM2_CTL, "hxge_rxdma_start_channel: "
3377 	    "hpi handle addr $%p acc $%p",
3378 	    hxgep->hpi_handle.regp, hxgep->hpi_handle.regh));
3379 
3380 	/* Reset RXDMA channel */
3381 	rs = hpi_rxdma_cfg_rdc_reset(handle, channel);
3382 	if (rs != HPI_SUCCESS) {
3383 		HXGE_ERROR_MSG((hxgep, HXGE_ERR_CTL,
3384 		    "==> hxge_rxdma_start_channel: "
3385 		    "reset rxdma failed (0x%08x channel %d)",
3386 		    status, channel));
3387 		return (HXGE_ERROR | rs);
3388 	}
3389 	HXGE_DEBUG_MSG((hxgep, MEM2_CTL,
3390 	    "==> hxge_rxdma_start_channel: reset done: channel %d", channel));
3391 
3392 	/*
3393 	 * Initialize the RXDMA channel specific FZC control configurations.
3394 	 * These FZC registers are pertaining to each RX channel (logical
3395 	 * pages).
3396 	 */
3397 	status = hxge_init_fzc_rxdma_channel(hxgep,
3398 	    channel, rbr_p, rcr_p, mbox_p);
3399 	if (status != HXGE_OK) {
3400 		HXGE_ERROR_MSG((hxgep, HXGE_ERR_CTL,
3401 		    "==> hxge_rxdma_start_channel: "
3402 		    "init fzc rxdma failed (0x%08x channel %d)",
3403 		    status, channel));
3404 		return (status);
3405 	}
3406 	HXGE_DEBUG_MSG((hxgep, MEM2_CTL,
3407 	    "==> hxge_rxdma_start_channel: fzc done"));
3408 
3409 	/*
3410 	 * Zero out the shadow  and prefetch ram.
3411 	 */
3412 	HXGE_DEBUG_MSG((hxgep, MEM2_CTL,
3413 	    "==> hxge_rxdma_start_channel: ram done"));
3414 
3415 	/* Set up the interrupt event masks. */
3416 	ent_mask.value = 0;
3417 	rs = hpi_rxdma_event_mask(handle, OP_SET, channel, &ent_mask);
3418 	if (rs != HPI_SUCCESS) {
3419 		HXGE_ERROR_MSG((hxgep, HXGE_ERR_CTL,
3420 		    "==> hxge_rxdma_start_channel: "
3421 		    "init rxdma event masks failed (0x%08x channel %d)",
3422 		    status, channel));
3423 		return (HXGE_ERROR | rs);
3424 	}
3425 	HXGE_DEBUG_MSG((hxgep, MEM2_CTL, "==> hxge_rxdma_start_channel: "
3426 	    "event done: channel %d (mask 0x%016llx)",
3427 	    channel, ent_mask.value));
3428 
3429 	/*
3430 	 * Load RXDMA descriptors, buffers, mailbox, initialise the receive DMA
3431 	 * channels and enable each DMA channel.
3432 	 */
3433 	status = hxge_enable_rxdma_channel(hxgep,
3434 	    channel, rbr_p, rcr_p, mbox_p, n_init_kick);
3435 	if (status != HXGE_OK) {
3436 		HXGE_ERROR_MSG((hxgep, HXGE_ERR_CTL,
3437 		    " hxge_rxdma_start_channel: "
3438 		    " init enable rxdma failed (0x%08x channel %d)",
3439 		    status, channel));
3440 		return (status);
3441 	}
3442 
3443 	HXGE_DEBUG_MSG((hxgep, MEM2_CTL, "==> hxge_rxdma_start_channel: "
3444 	    "control done - channel %d cs 0x%016llx", channel, cs.value));
3445 
3446 	/*
3447 	 * Initialize the receive DMA control and status register
3448 	 * Note that rdc_stat HAS to be set after RBR and RCR rings are set
3449 	 */
3450 	cs.value = 0;
3451 	cs.bits.mex = 1;
3452 	cs.bits.rcr_thres = 1;
3453 	cs.bits.rcr_to = 1;
3454 	cs.bits.rbr_empty = 1;
3455 	status = hxge_init_rxdma_channel_cntl_stat(hxgep, channel, &cs);
3456 	HXGE_DEBUG_MSG((hxgep, MEM2_CTL, "==> hxge_rxdma_start_channel: "
3457 	    "channel %d rx_dma_cntl_stat 0x%0016llx", channel, cs.value));
3458 	if (status != HXGE_OK) {
3459 		HXGE_ERROR_MSG((hxgep, HXGE_ERR_CTL,
3460 		    "==> hxge_rxdma_start_channel: "
3461 		    "init rxdma control register failed (0x%08x channel %d",
3462 		    status, channel));
3463 		return (status);
3464 	}
3465 
3466 	HXGE_DEBUG_MSG((hxgep, MEM2_CTL, "==> hxge_rxdma_start_channel: "
3467 	    "control done - channel %d cs 0x%016llx", channel, cs.value));
3468 	HXGE_DEBUG_MSG((hxgep, MEM2_CTL,
3469 	    "==> hxge_rxdma_start_channel: enable done"));
3470 	HXGE_DEBUG_MSG((hxgep, MEM2_CTL, "<== hxge_rxdma_start_channel"));
3471 
3472 	return (HXGE_OK);
3473 }
3474 
3475 static hxge_status_t
3476 hxge_rxdma_stop_channel(p_hxge_t hxgep, uint16_t channel)
3477 {
3478 	hpi_handle_t		handle;
3479 	hpi_status_t		rs = HPI_SUCCESS;
3480 	rdc_stat_t		cs;
3481 	rdc_int_mask_t		ent_mask;
3482 	hxge_status_t		status = HXGE_OK;
3483 
3484 	HXGE_DEBUG_MSG((hxgep, RX_CTL, "==> hxge_rxdma_stop_channel"));
3485 
3486 	handle = HXGE_DEV_HPI_HANDLE(hxgep);
3487 
3488 	HXGE_DEBUG_MSG((hxgep, RX_CTL, "hxge_rxdma_stop_channel: "
3489 	    "hpi handle addr $%p acc $%p",
3490 	    hxgep->hpi_handle.regp, hxgep->hpi_handle.regh));
3491 
3492 	/* Reset RXDMA channel */
3493 	rs = hpi_rxdma_cfg_rdc_reset(handle, channel);
3494 	if (rs != HPI_SUCCESS) {
3495 		HXGE_ERROR_MSG((hxgep, HXGE_ERR_CTL,
3496 		    " hxge_rxdma_stop_channel: "
3497 		    " reset rxdma failed (0x%08x channel %d)",
3498 		    rs, channel));
3499 		return (HXGE_ERROR | rs);
3500 	}
3501 	HXGE_DEBUG_MSG((hxgep, RX_CTL,
3502 	    "==> hxge_rxdma_stop_channel: reset done"));
3503 
3504 	/* Set up the interrupt event masks. */
3505 	ent_mask.value = RDC_INT_MASK_ALL;
3506 	rs = hpi_rxdma_event_mask(handle, OP_SET, channel, &ent_mask);
3507 	if (rs != HPI_SUCCESS) {
3508 		HXGE_ERROR_MSG((hxgep, HXGE_ERR_CTL,
3509 		    "==> hxge_rxdma_stop_channel: "
3510 		    "set rxdma event masks failed (0x%08x channel %d)",
3511 		    rs, channel));
3512 		return (HXGE_ERROR | rs);
3513 	}
3514 	HXGE_DEBUG_MSG((hxgep, RX_CTL,
3515 	    "==> hxge_rxdma_stop_channel: event done"));
3516 
3517 	/* Initialize the receive DMA control and status register */
3518 	cs.value = 0;
3519 	status = hxge_init_rxdma_channel_cntl_stat(hxgep, channel, &cs);
3520 
3521 	HXGE_DEBUG_MSG((hxgep, RX_CTL, "==> hxge_rxdma_stop_channel: control "
3522 	    " to default (all 0s) 0x%08x", cs.value));
3523 
3524 	if (status != HXGE_OK) {
3525 		HXGE_ERROR_MSG((hxgep, HXGE_ERR_CTL,
3526 		    " hxge_rxdma_stop_channel: init rxdma"
3527 		    " control register failed (0x%08x channel %d",
3528 		    status, channel));
3529 		return (status);
3530 	}
3531 
3532 	HXGE_DEBUG_MSG((hxgep, RX_CTL,
3533 	    "==> hxge_rxdma_stop_channel: control done"));
3534 
3535 	/* disable dma channel */
3536 	status = hxge_disable_rxdma_channel(hxgep, channel);
3537 
3538 	if (status != HXGE_OK) {
3539 		HXGE_ERROR_MSG((hxgep, HXGE_ERR_CTL,
3540 		    " hxge_rxdma_stop_channel: "
3541 		    " init enable rxdma failed (0x%08x channel %d)",
3542 		    status, channel));
3543 		return (status);
3544 	}
3545 
3546 	HXGE_DEBUG_MSG((hxgep, RX_CTL,
3547 	    "==> hxge_rxdma_stop_channel: disable done"));
3548 	HXGE_DEBUG_MSG((hxgep, RX_CTL, "<== hxge_rxdma_stop_channel"));
3549 
3550 	return (HXGE_OK);
3551 }
3552 
3553 hxge_status_t
3554 hxge_rxdma_handle_sys_errors(p_hxge_t hxgep)
3555 {
3556 	hpi_handle_t		handle;
3557 	p_hxge_rdc_sys_stats_t	statsp;
3558 	rdc_fifo_err_stat_t	stat;
3559 	hxge_status_t		status = HXGE_OK;
3560 
3561 	handle = hxgep->hpi_handle;
3562 	statsp = (p_hxge_rdc_sys_stats_t)&hxgep->statsp->rdc_sys_stats;
3563 
3564 	/* Clear the int_dbg register in case it is an injected err */
3565 	HXGE_REG_WR64(handle, RDC_FIFO_ERR_INT_DBG, 0x0);
3566 
3567 	/* Get the error status and clear the register */
3568 	HXGE_REG_RD64(handle, RDC_FIFO_ERR_STAT, &stat.value);
3569 	HXGE_REG_WR64(handle, RDC_FIFO_ERR_STAT, stat.value);
3570 
3571 	if (stat.bits.rx_ctrl_fifo_sec) {
3572 		statsp->ctrl_fifo_sec++;
3573 		if (statsp->ctrl_fifo_sec == 1)
3574 			HXGE_ERROR_MSG((hxgep, HXGE_ERR_CTL,
3575 			    "==> hxge_rxdma_handle_sys_errors: "
3576 			    "rx_ctrl_fifo_sec"));
3577 	}
3578 
3579 	if (stat.bits.rx_ctrl_fifo_ded) {
3580 		/* Global fatal error encountered */
3581 		statsp->ctrl_fifo_ded++;
3582 		HXGE_FM_REPORT_ERROR(hxgep, NULL,
3583 		    HXGE_FM_EREPORT_RDMC_CTRL_FIFO_DED);
3584 		HXGE_ERROR_MSG((hxgep, HXGE_ERR_CTL,
3585 		    "==> hxge_rxdma_handle_sys_errors: "
3586 		    "fatal error: rx_ctrl_fifo_ded error"));
3587 	}
3588 
3589 	if (stat.bits.rx_data_fifo_sec) {
3590 		statsp->data_fifo_sec++;
3591 		if (statsp->data_fifo_sec == 1)
3592 			HXGE_ERROR_MSG((hxgep, HXGE_ERR_CTL,
3593 			    "==> hxge_rxdma_handle_sys_errors: "
3594 			    "rx_data_fifo_sec"));
3595 	}
3596 
3597 	if (stat.bits.rx_data_fifo_ded) {
3598 		/* Global fatal error encountered */
3599 		statsp->data_fifo_ded++;
3600 		HXGE_FM_REPORT_ERROR(hxgep, NULL,
3601 		    HXGE_FM_EREPORT_RDMC_DATA_FIFO_DED);
3602 		HXGE_ERROR_MSG((hxgep, HXGE_ERR_CTL,
3603 		    "==> hxge_rxdma_handle_sys_errors: "
3604 		    "fatal error: rx_data_fifo_ded error"));
3605 	}
3606 
3607 	if (stat.bits.rx_ctrl_fifo_ded || stat.bits.rx_data_fifo_ded) {
3608 		HXGE_ERROR_MSG((hxgep, HXGE_ERR_CTL,
3609 		    " hxge_rxdma_handle_sys_errors: fatal error\n"));
3610 		status = hxge_rx_port_fatal_err_recover(hxgep);
3611 		if (status == HXGE_OK) {
3612 			FM_SERVICE_RESTORED(hxgep);
3613 		}
3614 	}
3615 
3616 	return (HXGE_OK);
3617 }
3618 
3619 static hxge_status_t
3620 hxge_rxdma_fatal_err_recover(p_hxge_t hxgep, uint16_t channel)
3621 {
3622 	hpi_handle_t		handle;
3623 	hpi_status_t 		rs = HPI_SUCCESS;
3624 	hxge_status_t 		status = HXGE_OK;
3625 	p_rx_rbr_ring_t		rbrp;
3626 	p_rx_rcr_ring_t		rcrp;
3627 	p_rx_mbox_t		mboxp;
3628 	rdc_int_mask_t		ent_mask;
3629 	p_hxge_dma_common_t	dmap;
3630 	int			ring_idx;
3631 	p_rx_msg_t		rx_msg_p;
3632 	int			i;
3633 	uint32_t		hxge_port_rcr_size;
3634 	uint64_t		tmp;
3635 	int			n_init_kick = 0;
3636 
3637 	HXGE_DEBUG_MSG((hxgep, RX_CTL, "==> hxge_rxdma_fatal_err_recover"));
3638 	HXGE_ERROR_MSG((hxgep, HXGE_ERR_CTL,
3639 	    "Recovering from RxDMAChannel#%d error...", channel));
3640 
3641 	/*
3642 	 * Stop the dma channel waits for the stop done. If the stop done bit
3643 	 * is not set, then create an error.
3644 	 */
3645 
3646 	handle = HXGE_DEV_HPI_HANDLE(hxgep);
3647 
3648 	HXGE_DEBUG_MSG((hxgep, RX_CTL, "Rx DMA stop..."));
3649 
3650 	ring_idx = hxge_rxdma_get_ring_index(hxgep, channel);
3651 	rbrp = (p_rx_rbr_ring_t)hxgep->rx_rbr_rings->rbr_rings[ring_idx];
3652 	rcrp = (p_rx_rcr_ring_t)hxgep->rx_rcr_rings->rcr_rings[ring_idx];
3653 
3654 	MUTEX_ENTER(&rcrp->lock);
3655 	MUTEX_ENTER(&rbrp->lock);
3656 
3657 	HXGE_DEBUG_MSG((hxgep, RX_CTL, "Disable RxDMA channel..."));
3658 
3659 	rs = hpi_rxdma_cfg_rdc_disable(handle, channel);
3660 	if (rs != HPI_SUCCESS) {
3661 		HXGE_ERROR_MSG((hxgep, HXGE_ERR_CTL,
3662 		    "hxge_disable_rxdma_channel:failed"));
3663 		goto fail;
3664 	}
3665 	HXGE_DEBUG_MSG((hxgep, RX_CTL, "Disable RxDMA interrupt..."));
3666 
3667 	/* Disable interrupt */
3668 	ent_mask.value = RDC_INT_MASK_ALL;
3669 	rs = hpi_rxdma_event_mask(handle, OP_SET, channel, &ent_mask);
3670 	if (rs != HPI_SUCCESS) {
3671 		HXGE_ERROR_MSG((hxgep, HXGE_ERR_CTL,
3672 		    "Set rxdma event masks failed (channel %d)", channel));
3673 	}
3674 	HXGE_DEBUG_MSG((hxgep, RX_CTL, "RxDMA channel reset..."));
3675 
3676 	/* Reset RXDMA channel */
3677 	rs = hpi_rxdma_cfg_rdc_reset(handle, channel);
3678 	if (rs != HPI_SUCCESS) {
3679 		HXGE_ERROR_MSG((hxgep, HXGE_ERR_CTL,
3680 		    "Reset rxdma failed (channel %d)", channel));
3681 		goto fail;
3682 	}
3683 	hxge_port_rcr_size = hxgep->hxge_port_rcr_size;
3684 	mboxp = (p_rx_mbox_t)hxgep->rx_mbox_areas_p->rxmbox_areas[ring_idx];
3685 
3686 	rbrp->rbr_wr_index = (rbrp->rbb_max - 1);
3687 	rbrp->rbr_rd_index = 0;
3688 
3689 	rcrp->comp_rd_index = 0;
3690 	rcrp->comp_wt_index = 0;
3691 	rcrp->rcr_desc_rd_head_p = rcrp->rcr_desc_first_p =
3692 	    (p_rcr_entry_t)DMA_COMMON_VPTR(rcrp->rcr_desc);
3693 #if defined(__i386)
3694 	rcrp->rcr_desc_rd_head_pp = rcrp->rcr_desc_first_pp =
3695 	    (p_rcr_entry_t)(uint32_t)DMA_COMMON_IOADDR(rcrp->rcr_desc);
3696 #else
3697 	rcrp->rcr_desc_rd_head_pp = rcrp->rcr_desc_first_pp =
3698 	    (p_rcr_entry_t)DMA_COMMON_IOADDR(rcrp->rcr_desc);
3699 #endif
3700 
3701 	rcrp->rcr_desc_last_p = rcrp->rcr_desc_rd_head_p +
3702 	    (hxge_port_rcr_size - 1);
3703 	rcrp->rcr_desc_last_pp = rcrp->rcr_desc_rd_head_pp +
3704 	    (hxge_port_rcr_size - 1);
3705 
3706 	rcrp->rcr_tail_begin = DMA_COMMON_IOADDR(rcrp->rcr_desc);
3707 	rcrp->rcr_tail_begin = (rcrp->rcr_tail_begin & 0x7ffffULL) >> 3;
3708 
3709 	dmap = (p_hxge_dma_common_t)&rcrp->rcr_desc;
3710 	bzero((caddr_t)dmap->kaddrp, dmap->alength);
3711 
3712 	HXGE_DEBUG_MSG((hxgep, RX_CTL, "rbr entries = %d\n",
3713 	    rbrp->rbr_max_size));
3714 
3715 	/* Count the number of buffers owned by the hardware at this moment */
3716 	for (i = 0; i < rbrp->rbr_max_size; i++) {
3717 		rx_msg_p = rbrp->rx_msg_ring[i];
3718 		if (rx_msg_p->ref_cnt == 1) {
3719 			n_init_kick++;
3720 		}
3721 	}
3722 
3723 	HXGE_DEBUG_MSG((hxgep, RX_CTL, "RxDMA channel re-start..."));
3724 
3725 	/*
3726 	 * This is error recover! Some buffers are owned by the hardware and
3727 	 * the rest are owned by the apps. We should only kick in those
3728 	 * owned by the hardware initially. The apps will post theirs
3729 	 * eventually.
3730 	 */
3731 	status = hxge_rxdma_start_channel(hxgep, channel, rbrp, rcrp, mboxp,
3732 	    n_init_kick);
3733 	if (status != HXGE_OK) {
3734 		goto fail;
3735 	}
3736 
3737 	/*
3738 	 * The DMA channel may disable itself automatically.
3739 	 * The following is a work-around.
3740 	 */
3741 	HXGE_REG_RD64(handle, RDC_RX_CFG1, &tmp);
3742 	rs = hpi_rxdma_cfg_rdc_enable(handle, channel);
3743 	if (rs != HPI_SUCCESS) {
3744 		HXGE_ERROR_MSG((hxgep, HXGE_ERR_CTL,
3745 		    "hpi_rxdma_cfg_rdc_enable (channel %d)", channel));
3746 	}
3747 
3748 	MUTEX_EXIT(&rbrp->lock);
3749 	MUTEX_EXIT(&rcrp->lock);
3750 
3751 	HXGE_ERROR_MSG((hxgep, HXGE_ERR_CTL,
3752 	    "Recovery Successful, RxDMAChannel#%d Restored", channel));
3753 	HXGE_DEBUG_MSG((hxgep, RX_CTL, "<== hxge_rxdma_fatal_err_recover"));
3754 
3755 	return (HXGE_OK);
3756 
3757 fail:
3758 	MUTEX_EXIT(&rbrp->lock);
3759 	MUTEX_EXIT(&rcrp->lock);
3760 	HXGE_ERROR_MSG((hxgep, HXGE_ERR_CTL, "Recovery failed"));
3761 
3762 	return (HXGE_ERROR | rs);
3763 }
3764 
3765 static hxge_status_t
3766 hxge_rx_port_fatal_err_recover(p_hxge_t hxgep)
3767 {
3768 	hxge_status_t		status = HXGE_OK;
3769 	p_hxge_dma_common_t	*dma_buf_p;
3770 	uint16_t		channel;
3771 	int			ndmas;
3772 	int			i;
3773 	block_reset_t		reset_reg;
3774 	p_rx_rcr_ring_t	rcrp;
3775 	p_rx_rbr_ring_t rbrp;
3776 
3777 	HXGE_DEBUG_MSG((hxgep, RX_CTL, "==> hxge_rx_port_fatal_err_recover"));
3778 	HXGE_ERROR_MSG((hxgep, HXGE_ERR_CTL, "Recovering from RDC error ..."));
3779 
3780 	/* Reset RDC block from PEU for this fatal error */
3781 	reset_reg.value = 0;
3782 	reset_reg.bits.rdc_rst = 1;
3783 	HXGE_REG_WR32(hxgep->hpi_handle, BLOCK_RESET, reset_reg.value);
3784 
3785 	/* Disable RxMAC */
3786 	HXGE_DEBUG_MSG((hxgep, RX_CTL, "Disable RxMAC...\n"));
3787 	if (hxge_rx_vmac_disable(hxgep) != HXGE_OK)
3788 		goto fail;
3789 
3790 	HXGE_DELAY(1000);
3791 
3792 	/* Restore any common settings after PEU reset */
3793 	if (hxge_rxdma_hw_start_common(hxgep) != HXGE_OK)
3794 		goto fail;
3795 
3796 	HXGE_DEBUG_MSG((hxgep, RX_CTL, "Stop all RxDMA channels..."));
3797 
3798 	ndmas = hxgep->rx_buf_pool_p->ndmas;
3799 	dma_buf_p = hxgep->rx_buf_pool_p->dma_buf_pool_p;
3800 
3801 	for (i = 0; i < ndmas; i++) {
3802 		channel = ((p_hxge_dma_common_t)dma_buf_p[i])->dma_channel;
3803 		rcrp = hxgep->rx_rcr_rings->rcr_rings[channel];
3804 		rbrp = rcrp->rx_rbr_p;
3805 
3806 		MUTEX_ENTER(&rbrp->post_lock);
3807 		/* This function needs to be inside the post_lock */
3808 		if (hxge_rxdma_fatal_err_recover(hxgep, channel) != HXGE_OK) {
3809 			HXGE_ERROR_MSG((hxgep, HXGE_ERR_CTL,
3810 			    "Could not recover channel %d", channel));
3811 		}
3812 		MUTEX_EXIT(&rbrp->post_lock);
3813 	}
3814 
3815 	HXGE_DEBUG_MSG((hxgep, RX_CTL, "Reset RxMAC..."));
3816 
3817 	/* Reset RxMAC */
3818 	if (hxge_rx_vmac_reset(hxgep) != HXGE_OK) {
3819 		HXGE_ERROR_MSG((hxgep, HXGE_ERR_CTL,
3820 		    "hxge_rx_port_fatal_err_recover: Failed to reset RxMAC"));
3821 		goto fail;
3822 	}
3823 
3824 	HXGE_DEBUG_MSG((hxgep, RX_CTL, "Re-initialize RxMAC..."));
3825 
3826 	/* Re-Initialize RxMAC */
3827 	if ((status = hxge_rx_vmac_init(hxgep)) != HXGE_OK) {
3828 		HXGE_ERROR_MSG((hxgep, HXGE_ERR_CTL,
3829 		    "hxge_rx_port_fatal_err_recover: Failed to reset RxMAC"));
3830 		goto fail;
3831 	}
3832 	HXGE_DEBUG_MSG((hxgep, RX_CTL, "Re-enable RxMAC..."));
3833 
3834 	/* Re-enable RxMAC */
3835 	if ((status = hxge_rx_vmac_enable(hxgep)) != HXGE_OK) {
3836 		HXGE_ERROR_MSG((hxgep, HXGE_ERR_CTL,
3837 		    "hxge_rx_port_fatal_err_recover: Failed to enable RxMAC"));
3838 		goto fail;
3839 	}
3840 
3841 	/* Reset the error mask since PEU reset cleared it */
3842 	HXGE_REG_WR64(hxgep->hpi_handle, RDC_FIFO_ERR_INT_MASK, 0x0);
3843 
3844 	HXGE_ERROR_MSG((hxgep, HXGE_ERR_CTL,
3845 	    "Recovery Successful, RxPort Restored"));
3846 	HXGE_DEBUG_MSG((hxgep, RX_CTL, "<== hxge_rx_port_fatal_err_recover"));
3847 
3848 	return (HXGE_OK);
3849 fail:
3850 	HXGE_ERROR_MSG((hxgep, HXGE_ERR_CTL, "Recovery failed"));
3851 	return (status);
3852 }
3853 
3854 static void
3855 hxge_rbr_empty_restore(p_hxge_t hxgep, p_rx_rbr_ring_t rx_rbr_p)
3856 {
3857 	hpi_status_t		hpi_status;
3858 	hxge_status_t		status;
3859 	int			i;
3860 	p_hxge_rx_ring_stats_t	rdc_stats;
3861 
3862 	rdc_stats = &hxgep->statsp->rdc_stats[rx_rbr_p->rdc];
3863 	rdc_stats->rbr_empty_restore++;
3864 	rx_rbr_p->rbr_is_empty = B_FALSE;
3865 
3866 	/*
3867 	 * Complete the processing for the RBR Empty by:
3868 	 *	0) kicking back HXGE_RBR_EMPTY_THRESHOLD
3869 	 *	   packets.
3870 	 *	1) Disable the RX vmac.
3871 	 *	2) Re-enable the affected DMA channel.
3872 	 *	3) Re-enable the RX vmac.
3873 	 */
3874 
3875 	/*
3876 	 * Disable the RX VMAC, but setting the framelength
3877 	 * to 0, since there is a hardware bug when disabling
3878 	 * the vmac.
3879 	 */
3880 	MUTEX_ENTER(hxgep->genlock);
3881 	(void) hpi_vmac_rx_set_framesize(
3882 	    HXGE_DEV_HPI_HANDLE(hxgep), (uint16_t)0);
3883 
3884 	hpi_status = hpi_rxdma_cfg_rdc_enable(
3885 	    HXGE_DEV_HPI_HANDLE(hxgep), rx_rbr_p->rdc);
3886 	if (hpi_status != HPI_SUCCESS) {
3887 		rdc_stats->rbr_empty_fail++;
3888 
3889 		/* Assume we are already inside the post_lock */
3890 		status = hxge_rxdma_fatal_err_recover(hxgep, rx_rbr_p->rdc);
3891 		if (status != HXGE_OK) {
3892 			HXGE_ERROR_MSG((hxgep, HXGE_ERR_CTL,
3893 			    "hxge(%d): channel(%d) is empty.",
3894 			    hxgep->instance, rx_rbr_p->rdc));
3895 		}
3896 	}
3897 
3898 	for (i = 0; i < 1024; i++) {
3899 		uint64_t value;
3900 		RXDMA_REG_READ64(HXGE_DEV_HPI_HANDLE(hxgep),
3901 		    RDC_STAT, i & 3, &value);
3902 	}
3903 
3904 	/*
3905 	 * Re-enable the RX VMAC.
3906 	 */
3907 	(void) hpi_vmac_rx_set_framesize(HXGE_DEV_HPI_HANDLE(hxgep),
3908 	    (uint16_t)hxgep->vmac.maxframesize);
3909 	MUTEX_EXIT(hxgep->genlock);
3910 }
3911