xref: /titanic_52/usr/src/uts/common/io/rge/rge_main.c (revision b6c3f7863936abeae522e48a13887dddeb691a45)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include "rge.h"
29 
30 /*
31  * This is the string displayed by modinfo, etc.
32  * Make sure you keep the version ID up to date!
33  */
34 static char rge_ident[] = "Realtek 1Gb Ethernet v%I%";
35 
36 /*
37  * Used for buffers allocated by ddi_dma_mem_alloc()
38  */
39 static ddi_dma_attr_t dma_attr_buf = {
40 	DMA_ATTR_V0,		/* dma_attr version */
41 	(uint32_t)0,		/* dma_attr_addr_lo */
42 	(uint32_t)0xFFFFFFFF,	/* dma_attr_addr_hi */
43 	(uint32_t)0xFFFFFFFF,	/* dma_attr_count_max */
44 	(uint32_t)16,		/* dma_attr_align */
45 	0xFFFFFFFF,		/* dma_attr_burstsizes */
46 	1,			/* dma_attr_minxfer */
47 	(uint32_t)0xFFFFFFFF,	/* dma_attr_maxxfer */
48 	(uint32_t)0xFFFFFFFF,	/* dma_attr_seg */
49 	1,			/* dma_attr_sgllen */
50 	1,			/* dma_attr_granular */
51 	0,			/* dma_attr_flags */
52 };
53 
54 /*
55  * Used for BDs allocated by ddi_dma_mem_alloc()
56  */
57 static ddi_dma_attr_t dma_attr_desc = {
58 	DMA_ATTR_V0,		/* dma_attr version */
59 	(uint32_t)0,		/* dma_attr_addr_lo */
60 	(uint32_t)0xFFFFFFFF,	/* dma_attr_addr_hi */
61 	(uint32_t)0xFFFFFFFF,	/* dma_attr_count_max */
62 	(uint32_t)256,		/* dma_attr_align */
63 	0xFFFFFFFF,		/* dma_attr_burstsizes */
64 	1,			/* dma_attr_minxfer */
65 	(uint32_t)0xFFFFFFFF,	/* dma_attr_maxxfer */
66 	(uint32_t)0xFFFFFFFF,	/* dma_attr_seg */
67 	1,			/* dma_attr_sgllen */
68 	1,			/* dma_attr_granular */
69 	0,			/* dma_attr_flags */
70 };
71 
72 /*
73  * PIO access attributes for registers
74  */
75 static ddi_device_acc_attr_t rge_reg_accattr = {
76 	DDI_DEVICE_ATTR_V0,
77 	DDI_STRUCTURE_LE_ACC,
78 	DDI_STRICTORDER_ACC,
79 	DDI_DEFAULT_ACC
80 };
81 
82 /*
83  * DMA access attributes for descriptors
84  */
85 static ddi_device_acc_attr_t rge_desc_accattr = {
86 	DDI_DEVICE_ATTR_V0,
87 	DDI_NEVERSWAP_ACC,
88 	DDI_STRICTORDER_ACC,
89 	DDI_DEFAULT_ACC
90 };
91 
92 /*
93  * DMA access attributes for data
94  */
95 static ddi_device_acc_attr_t rge_buf_accattr = {
96 	DDI_DEVICE_ATTR_V0,
97 	DDI_NEVERSWAP_ACC,
98 	DDI_STRICTORDER_ACC,
99 	DDI_DEFAULT_ACC
100 };
101 
102 /*
103  * Property names
104  */
105 static char debug_propname[] = "rge_debug_flags";
106 static char mtu_propname[] = "default_mtu";
107 static char msi_propname[] = "msi_enable";
108 
109 static int		rge_m_start(void *);
110 static void		rge_m_stop(void *);
111 static int		rge_m_promisc(void *, boolean_t);
112 static int		rge_m_multicst(void *, boolean_t, const uint8_t *);
113 static int		rge_m_unicst(void *, const uint8_t *);
114 static void		rge_m_resources(void *);
115 static void		rge_m_ioctl(void *, queue_t *, mblk_t *);
116 static boolean_t	rge_m_getcapab(void *, mac_capab_t, void *);
117 
118 #define	RGE_M_CALLBACK_FLAGS	(MC_RESOURCES | MC_IOCTL | MC_GETCAPAB)
119 
120 static mac_callbacks_t rge_m_callbacks = {
121 	RGE_M_CALLBACK_FLAGS,
122 	rge_m_stat,
123 	rge_m_start,
124 	rge_m_stop,
125 	rge_m_promisc,
126 	rge_m_multicst,
127 	rge_m_unicst,
128 	rge_m_tx,
129 	rge_m_resources,
130 	rge_m_ioctl,
131 	rge_m_getcapab
132 };
133 
134 /*
135  * Allocate an area of memory and a DMA handle for accessing it
136  */
137 static int
138 rge_alloc_dma_mem(rge_t *rgep, size_t memsize, ddi_dma_attr_t *dma_attr_p,
139 	ddi_device_acc_attr_t *acc_attr_p, uint_t dma_flags, dma_area_t *dma_p)
140 {
141 	caddr_t vaddr;
142 	int err;
143 
144 	/*
145 	 * Allocate handle
146 	 */
147 	err = ddi_dma_alloc_handle(rgep->devinfo, dma_attr_p,
148 	    DDI_DMA_SLEEP, NULL, &dma_p->dma_hdl);
149 	if (err != DDI_SUCCESS) {
150 		dma_p->dma_hdl = NULL;
151 		return (DDI_FAILURE);
152 	}
153 
154 	/*
155 	 * Allocate memory
156 	 */
157 	err = ddi_dma_mem_alloc(dma_p->dma_hdl, memsize, acc_attr_p,
158 	    dma_flags & (DDI_DMA_CONSISTENT | DDI_DMA_STREAMING),
159 	    DDI_DMA_SLEEP, NULL, &vaddr, &dma_p->alength, &dma_p->acc_hdl);
160 	if (err != DDI_SUCCESS) {
161 		ddi_dma_free_handle(&dma_p->dma_hdl);
162 		dma_p->dma_hdl = NULL;
163 		dma_p->acc_hdl = NULL;
164 		return (DDI_FAILURE);
165 	}
166 
167 	/*
168 	 * Bind the two together
169 	 */
170 	dma_p->mem_va = vaddr;
171 	err = ddi_dma_addr_bind_handle(dma_p->dma_hdl, NULL,
172 	    vaddr, dma_p->alength, dma_flags, DDI_DMA_SLEEP, NULL,
173 	    &dma_p->cookie, &dma_p->ncookies);
174 	if (err != DDI_DMA_MAPPED || dma_p->ncookies != 1) {
175 		ddi_dma_mem_free(&dma_p->acc_hdl);
176 		ddi_dma_free_handle(&dma_p->dma_hdl);
177 		dma_p->acc_hdl = NULL;
178 		dma_p->dma_hdl = NULL;
179 		return (DDI_FAILURE);
180 	}
181 
182 	dma_p->nslots = ~0U;
183 	dma_p->size = ~0U;
184 	dma_p->token = ~0U;
185 	dma_p->offset = 0;
186 	return (DDI_SUCCESS);
187 }
188 
189 /*
190  * Free one allocated area of DMAable memory
191  */
192 static void
193 rge_free_dma_mem(dma_area_t *dma_p)
194 {
195 	if (dma_p->dma_hdl != NULL) {
196 		if (dma_p->ncookies) {
197 			(void) ddi_dma_unbind_handle(dma_p->dma_hdl);
198 			dma_p->ncookies = 0;
199 		}
200 		ddi_dma_free_handle(&dma_p->dma_hdl);
201 		dma_p->dma_hdl = NULL;
202 	}
203 
204 	if (dma_p->acc_hdl != NULL) {
205 		ddi_dma_mem_free(&dma_p->acc_hdl);
206 		dma_p->acc_hdl = NULL;
207 	}
208 }
209 
210 /*
211  * Utility routine to carve a slice off a chunk of allocated memory,
212  * updating the chunk descriptor accordingly.  The size of the slice
213  * is given by the product of the <qty> and <size> parameters.
214  */
215 static void
216 rge_slice_chunk(dma_area_t *slice, dma_area_t *chunk,
217 	uint32_t qty, uint32_t size)
218 {
219 	static uint32_t sequence = 0xbcd5704a;
220 	size_t totsize;
221 
222 	totsize = qty*size;
223 	ASSERT(size >= 0);
224 	ASSERT(totsize <= chunk->alength);
225 
226 	*slice = *chunk;
227 	slice->nslots = qty;
228 	slice->size = size;
229 	slice->alength = totsize;
230 	slice->token = ++sequence;
231 
232 	chunk->mem_va = (caddr_t)chunk->mem_va + totsize;
233 	chunk->alength -= totsize;
234 	chunk->offset += totsize;
235 	chunk->cookie.dmac_laddress += totsize;
236 	chunk->cookie.dmac_size -= totsize;
237 }
238 
239 static int
240 rge_alloc_bufs(rge_t *rgep)
241 {
242 	size_t txdescsize;
243 	size_t rxdescsize;
244 	int err;
245 
246 	/*
247 	 * Allocate memory & handle for packet statistics
248 	 */
249 	err = rge_alloc_dma_mem(rgep,
250 	    RGE_STATS_DUMP_SIZE,
251 	    &dma_attr_desc,
252 	    &rge_desc_accattr,
253 	    DDI_DMA_RDWR | DDI_DMA_CONSISTENT,
254 	    &rgep->dma_area_stats);
255 	if (err != DDI_SUCCESS)
256 		return (DDI_FAILURE);
257 	rgep->hw_stats = DMA_VPTR(rgep->dma_area_stats);
258 
259 	/*
260 	 * Allocate memory & handle for Tx descriptor ring
261 	 */
262 	txdescsize = RGE_SEND_SLOTS * sizeof (rge_bd_t);
263 	err = rge_alloc_dma_mem(rgep,
264 	    txdescsize,
265 	    &dma_attr_desc,
266 	    &rge_desc_accattr,
267 	    DDI_DMA_RDWR | DDI_DMA_CONSISTENT,
268 	    &rgep->dma_area_txdesc);
269 	if (err != DDI_SUCCESS)
270 		return (DDI_FAILURE);
271 
272 	/*
273 	 * Allocate memory & handle for Rx descriptor ring
274 	 */
275 	rxdescsize = RGE_RECV_SLOTS * sizeof (rge_bd_t);
276 	err = rge_alloc_dma_mem(rgep,
277 	    rxdescsize,
278 	    &dma_attr_desc,
279 	    &rge_desc_accattr,
280 	    DDI_DMA_RDWR | DDI_DMA_CONSISTENT,
281 	    &rgep->dma_area_rxdesc);
282 	if (err != DDI_SUCCESS)
283 		return (DDI_FAILURE);
284 
285 	return (DDI_SUCCESS);
286 }
287 
288 /*
289  * rge_free_bufs() -- free descriptors/buffers allocated for this
290  * device instance.
291  */
292 static void
293 rge_free_bufs(rge_t *rgep)
294 {
295 	rge_free_dma_mem(&rgep->dma_area_stats);
296 	rge_free_dma_mem(&rgep->dma_area_txdesc);
297 	rge_free_dma_mem(&rgep->dma_area_rxdesc);
298 }
299 
300 /*
301  * ========== Transmit and receive ring reinitialisation ==========
302  */
303 
304 /*
305  * These <reinit> routines each reset the rx/tx rings to an initial
306  * state, assuming that the corresponding <init> routine has already
307  * been called exactly once.
308  */
309 static void
310 rge_reinit_send_ring(rge_t *rgep)
311 {
312 	sw_sbd_t *ssbdp;
313 	rge_bd_t *bdp;
314 	uint32_t slot;
315 
316 	/*
317 	 * re-init send ring
318 	 */
319 	DMA_ZERO(rgep->tx_desc);
320 	ssbdp = rgep->sw_sbds;
321 	bdp = rgep->tx_ring;
322 	for (slot = 0; slot < RGE_SEND_SLOTS; slot++) {
323 		bdp->host_buf_addr =
324 		    RGE_BSWAP_32(ssbdp->pbuf.cookie.dmac_laddress);
325 		bdp->host_buf_addr_hi =
326 		    RGE_BSWAP_32(ssbdp->pbuf.cookie.dmac_laddress >> 32);
327 		/* last BD in Tx ring */
328 		if (slot == (RGE_SEND_SLOTS - 1))
329 			bdp->flags_len = RGE_BSWAP_32(BD_FLAG_EOR);
330 		ssbdp++;
331 		bdp++;
332 	}
333 	DMA_SYNC(rgep->tx_desc, DDI_DMA_SYNC_FORDEV);
334 	rgep->tx_next = 0;
335 	rgep->tc_next = 0;
336 	rgep->tc_tail = 0;
337 	rgep->tx_flow = 0;
338 	rgep->tx_free = RGE_SEND_SLOTS;
339 }
340 
341 static void
342 rge_reinit_recv_ring(rge_t *rgep)
343 {
344 	rge_bd_t *bdp;
345 	sw_rbd_t *srbdp;
346 	dma_area_t *pbuf;
347 	uint32_t slot;
348 
349 	/*
350 	 * re-init receive ring
351 	 */
352 	DMA_ZERO(rgep->rx_desc);
353 	srbdp = rgep->sw_rbds;
354 	bdp = rgep->rx_ring;
355 	for (slot = 0; slot < RGE_RECV_SLOTS; slot++) {
356 		pbuf = &srbdp->rx_buf->pbuf;
357 		bdp->host_buf_addr =
358 		    RGE_BSWAP_32(pbuf->cookie.dmac_laddress + rgep->head_room);
359 		bdp->host_buf_addr_hi =
360 		    RGE_BSWAP_32(pbuf->cookie.dmac_laddress >> 32);
361 		bdp->flags_len = RGE_BSWAP_32(BD_FLAG_HW_OWN |
362 		    (rgep->rxbuf_size - rgep->head_room));
363 		/* last BD in Tx ring */
364 		if (slot == (RGE_RECV_SLOTS - 1))
365 			bdp->flags_len |= RGE_BSWAP_32(BD_FLAG_EOR);
366 		srbdp++;
367 		bdp++;
368 	}
369 	DMA_SYNC(rgep->rx_desc, DDI_DMA_SYNC_FORDEV);
370 	rgep->watchdog = 0;
371 	rgep->rx_next = 0;
372 }
373 
374 static void
375 rge_reinit_buf_ring(rge_t *rgep)
376 {
377 
378 	if (rgep->chip_flags & CHIP_FLAG_FORCE_BCOPY)
379 		return;
380 
381 	/*
382 	 * If all the up-sending buffers haven't been returned to driver,
383 	 * use bcopy() only in rx process.
384 	 */
385 	if (rgep->rx_free != RGE_BUF_SLOTS)
386 		rgep->rx_bcopy = B_TRUE;
387 }
388 
389 static void
390 rge_reinit_rings(rge_t *rgep)
391 {
392 	rge_reinit_send_ring(rgep);
393 	rge_reinit_recv_ring(rgep);
394 	rge_reinit_buf_ring(rgep);
395 }
396 
397 static void
398 rge_fini_send_ring(rge_t *rgep)
399 {
400 	sw_sbd_t *ssbdp;
401 	uint32_t slot;
402 
403 	ssbdp = rgep->sw_sbds;
404 	for (slot = 0; slot < RGE_SEND_SLOTS; ++slot) {
405 		rge_free_dma_mem(&ssbdp->pbuf);
406 		ssbdp++;
407 	}
408 
409 	kmem_free(rgep->sw_sbds, RGE_SEND_SLOTS * sizeof (sw_sbd_t));
410 	rgep->sw_sbds = NULL;
411 }
412 
413 static void
414 rge_fini_recv_ring(rge_t *rgep)
415 {
416 	sw_rbd_t *srbdp;
417 	uint32_t slot;
418 
419 	srbdp = rgep->sw_rbds;
420 	for (slot = 0; slot < RGE_RECV_SLOTS; ++srbdp, ++slot) {
421 		if (srbdp->rx_buf) {
422 			if (srbdp->rx_buf->mp != NULL) {
423 				freemsg(srbdp->rx_buf->mp);
424 				srbdp->rx_buf->mp = NULL;
425 			}
426 			rge_free_dma_mem(&srbdp->rx_buf->pbuf);
427 			kmem_free(srbdp->rx_buf, sizeof (dma_buf_t));
428 			srbdp->rx_buf = NULL;
429 		}
430 	}
431 
432 	kmem_free(rgep->sw_rbds, RGE_RECV_SLOTS * sizeof (sw_rbd_t));
433 	rgep->sw_rbds = NULL;
434 }
435 
436 static void
437 rge_fini_buf_ring(rge_t *rgep)
438 {
439 	sw_rbd_t *srbdp;
440 	uint32_t slot;
441 
442 	if (rgep->chip_flags & CHIP_FLAG_FORCE_BCOPY)
443 		return;
444 
445 	ASSERT(rgep->rx_free == RGE_BUF_SLOTS);
446 
447 	srbdp = rgep->free_srbds;
448 	for (slot = 0; slot < RGE_BUF_SLOTS; ++srbdp, ++slot) {
449 		if (srbdp->rx_buf != NULL) {
450 			if (srbdp->rx_buf->mp != NULL) {
451 				freemsg(srbdp->rx_buf->mp);
452 				srbdp->rx_buf->mp = NULL;
453 			}
454 			rge_free_dma_mem(&srbdp->rx_buf->pbuf);
455 			kmem_free(srbdp->rx_buf, sizeof (dma_buf_t));
456 			srbdp->rx_buf = NULL;
457 		}
458 	}
459 
460 	kmem_free(rgep->free_srbds, RGE_BUF_SLOTS * sizeof (sw_rbd_t));
461 	rgep->free_srbds = NULL;
462 }
463 
464 static void
465 rge_fini_rings(rge_t *rgep)
466 {
467 	rge_fini_send_ring(rgep);
468 	rge_fini_recv_ring(rgep);
469 	rge_fini_buf_ring(rgep);
470 }
471 
472 static int
473 rge_init_send_ring(rge_t *rgep)
474 {
475 	uint32_t slot;
476 	sw_sbd_t *ssbdp;
477 	dma_area_t *pbuf;
478 	dma_area_t desc;
479 	int err;
480 
481 	/*
482 	 * Allocate the array of s/w Tx Buffer Descriptors
483 	 */
484 	ssbdp = kmem_zalloc(RGE_SEND_SLOTS*sizeof (*ssbdp), KM_SLEEP);
485 	rgep->sw_sbds = ssbdp;
486 
487 	/*
488 	 * Init send ring
489 	 */
490 	rgep->tx_desc = rgep->dma_area_txdesc;
491 	DMA_ZERO(rgep->tx_desc);
492 	rgep->tx_ring = rgep->tx_desc.mem_va;
493 
494 	desc = rgep->tx_desc;
495 	for (slot = 0; slot < RGE_SEND_SLOTS; slot++) {
496 		rge_slice_chunk(&ssbdp->desc, &desc, 1, sizeof (rge_bd_t));
497 
498 		/*
499 		 * Allocate memory & handle for Tx buffers
500 		 */
501 		pbuf = &ssbdp->pbuf;
502 		err = rge_alloc_dma_mem(rgep, rgep->txbuf_size,
503 		    &dma_attr_buf, &rge_buf_accattr,
504 		    DDI_DMA_WRITE | DDI_DMA_STREAMING, pbuf);
505 		if (err != DDI_SUCCESS) {
506 			rge_error(rgep,
507 			    "rge_init_send_ring: alloc tx buffer failed");
508 			rge_fini_send_ring(rgep);
509 			return (DDI_FAILURE);
510 		}
511 		ssbdp++;
512 	}
513 	ASSERT(desc.alength == 0);
514 
515 	DMA_SYNC(rgep->tx_desc, DDI_DMA_SYNC_FORDEV);
516 	return (DDI_SUCCESS);
517 }
518 
519 static int
520 rge_init_recv_ring(rge_t *rgep)
521 {
522 	uint32_t slot;
523 	sw_rbd_t *srbdp;
524 	dma_buf_t *rx_buf;
525 	dma_area_t *pbuf;
526 	int err;
527 
528 	/*
529 	 * Allocate the array of s/w Rx Buffer Descriptors
530 	 */
531 	srbdp = kmem_zalloc(RGE_RECV_SLOTS*sizeof (*srbdp), KM_SLEEP);
532 	rgep->sw_rbds = srbdp;
533 
534 	/*
535 	 * Init receive ring
536 	 */
537 	rgep->rx_next = 0;
538 	rgep->rx_desc = rgep->dma_area_rxdesc;
539 	DMA_ZERO(rgep->rx_desc);
540 	rgep->rx_ring = rgep->rx_desc.mem_va;
541 
542 	for (slot = 0; slot < RGE_RECV_SLOTS; slot++) {
543 		srbdp->rx_buf = rx_buf =
544 		    kmem_zalloc(sizeof (dma_buf_t), KM_SLEEP);
545 
546 		/*
547 		 * Allocate memory & handle for Rx buffers
548 		 */
549 		pbuf = &rx_buf->pbuf;
550 		err = rge_alloc_dma_mem(rgep, rgep->rxbuf_size,
551 		    &dma_attr_buf, &rge_buf_accattr,
552 		    DDI_DMA_READ | DDI_DMA_STREAMING, pbuf);
553 		if (err != DDI_SUCCESS) {
554 			rge_fini_recv_ring(rgep);
555 			rge_error(rgep,
556 			    "rge_init_recv_ring: alloc rx buffer failed");
557 			return (DDI_FAILURE);
558 		}
559 
560 		pbuf->alength -= rgep->head_room;
561 		pbuf->offset += rgep->head_room;
562 		if (!(rgep->chip_flags & CHIP_FLAG_FORCE_BCOPY)) {
563 			rx_buf->rx_recycle.free_func = rge_rx_recycle;
564 			rx_buf->rx_recycle.free_arg = (caddr_t)rx_buf;
565 			rx_buf->private = (caddr_t)rgep;
566 			rx_buf->mp = desballoc(DMA_VPTR(rx_buf->pbuf),
567 			    rgep->rxbuf_size, 0, &rx_buf->rx_recycle);
568 			if (rx_buf->mp == NULL) {
569 				rge_fini_recv_ring(rgep);
570 				rge_problem(rgep,
571 				    "rge_init_recv_ring: desballoc() failed");
572 				return (DDI_FAILURE);
573 			}
574 		}
575 		srbdp++;
576 	}
577 	DMA_SYNC(rgep->rx_desc, DDI_DMA_SYNC_FORDEV);
578 	return (DDI_SUCCESS);
579 }
580 
581 static int
582 rge_init_buf_ring(rge_t *rgep)
583 {
584 	uint32_t slot;
585 	sw_rbd_t *free_srbdp;
586 	dma_buf_t *rx_buf;
587 	dma_area_t *pbuf;
588 	int err;
589 
590 	if (rgep->chip_flags & CHIP_FLAG_FORCE_BCOPY) {
591 		rgep->rx_bcopy = B_TRUE;
592 		return (DDI_SUCCESS);
593 	}
594 
595 	/*
596 	 * Allocate the array of s/w free Buffer Descriptors
597 	 */
598 	free_srbdp = kmem_zalloc(RGE_BUF_SLOTS*sizeof (*free_srbdp), KM_SLEEP);
599 	rgep->free_srbds = free_srbdp;
600 
601 	/*
602 	 * Init free buffer ring
603 	 */
604 	rgep->rc_next = 0;
605 	rgep->rf_next = 0;
606 	rgep->rx_bcopy = B_FALSE;
607 	rgep->rx_free = RGE_BUF_SLOTS;
608 	for (slot = 0; slot < RGE_BUF_SLOTS; slot++) {
609 		free_srbdp->rx_buf = rx_buf =
610 		    kmem_zalloc(sizeof (dma_buf_t), KM_SLEEP);
611 
612 		/*
613 		 * Allocate memory & handle for free Rx buffers
614 		 */
615 		pbuf = &rx_buf->pbuf;
616 		err = rge_alloc_dma_mem(rgep, rgep->rxbuf_size,
617 		    &dma_attr_buf, &rge_buf_accattr,
618 		    DDI_DMA_READ | DDI_DMA_STREAMING, pbuf);
619 		if (err != DDI_SUCCESS) {
620 			rge_fini_buf_ring(rgep);
621 			rge_error(rgep,
622 			    "rge_init_buf_ring: alloc rx free buffer failed");
623 			return (DDI_FAILURE);
624 		}
625 		pbuf->alength -= rgep->head_room;
626 		pbuf->offset += rgep->head_room;
627 		rx_buf->rx_recycle.free_func = rge_rx_recycle;
628 		rx_buf->rx_recycle.free_arg = (caddr_t)rx_buf;
629 		rx_buf->private = (caddr_t)rgep;
630 		rx_buf->mp = desballoc(DMA_VPTR(rx_buf->pbuf),
631 		    rgep->rxbuf_size, 0, &rx_buf->rx_recycle);
632 		if (rx_buf->mp == NULL) {
633 			rge_fini_buf_ring(rgep);
634 			rge_problem(rgep,
635 			    "rge_init_buf_ring: desballoc() failed");
636 			return (DDI_FAILURE);
637 		}
638 		free_srbdp++;
639 	}
640 	return (DDI_SUCCESS);
641 }
642 
643 static int
644 rge_init_rings(rge_t *rgep)
645 {
646 	int err;
647 
648 	err = rge_init_send_ring(rgep);
649 	if (err != DDI_SUCCESS)
650 		return (DDI_FAILURE);
651 
652 	err = rge_init_recv_ring(rgep);
653 	if (err != DDI_SUCCESS) {
654 		rge_fini_send_ring(rgep);
655 		return (DDI_FAILURE);
656 	}
657 
658 	err = rge_init_buf_ring(rgep);
659 	if (err != DDI_SUCCESS) {
660 		rge_fini_send_ring(rgep);
661 		rge_fini_recv_ring(rgep);
662 		return (DDI_FAILURE);
663 	}
664 
665 	return (DDI_SUCCESS);
666 }
667 
668 /*
669  * ========== Internal state management entry points ==========
670  */
671 
672 #undef	RGE_DBG
673 #define	RGE_DBG		RGE_DBG_NEMO	/* debug flag for this code	*/
674 
675 /*
676  * These routines provide all the functionality required by the
677  * corresponding MAC layer entry points, but don't update the
678  * MAC state so they can be called internally without disturbing
679  * our record of what NEMO thinks we should be doing ...
680  */
681 
682 /*
683  *	rge_reset() -- reset h/w & rings to initial state
684  */
685 static void
686 rge_reset(rge_t *rgep)
687 {
688 	ASSERT(mutex_owned(rgep->genlock));
689 
690 	/*
691 	 * Grab all the other mutexes in the world (this should
692 	 * ensure no other threads are manipulating driver state)
693 	 */
694 	mutex_enter(rgep->rx_lock);
695 	mutex_enter(rgep->rc_lock);
696 	rw_enter(rgep->errlock, RW_WRITER);
697 
698 	(void) rge_chip_reset(rgep);
699 	rge_reinit_rings(rgep);
700 	rge_chip_init(rgep);
701 
702 	/*
703 	 * Free the world ...
704 	 */
705 	rw_exit(rgep->errlock);
706 	mutex_exit(rgep->rc_lock);
707 	mutex_exit(rgep->rx_lock);
708 
709 	RGE_DEBUG(("rge_reset($%p) done", (void *)rgep));
710 }
711 
712 /*
713  *	rge_stop() -- stop processing, don't reset h/w or rings
714  */
715 static void
716 rge_stop(rge_t *rgep)
717 {
718 	ASSERT(mutex_owned(rgep->genlock));
719 
720 	rge_chip_stop(rgep, B_FALSE);
721 
722 	RGE_DEBUG(("rge_stop($%p) done", (void *)rgep));
723 }
724 
725 /*
726  *	rge_start() -- start transmitting/receiving
727  */
728 static void
729 rge_start(rge_t *rgep)
730 {
731 	ASSERT(mutex_owned(rgep->genlock));
732 
733 	/*
734 	 * Start chip processing, including enabling interrupts
735 	 */
736 	rge_chip_start(rgep);
737 	rgep->watchdog = 0;
738 }
739 
740 /*
741  * rge_restart - restart transmitting/receiving after error or suspend
742  */
743 void
744 rge_restart(rge_t *rgep)
745 {
746 	uint32_t i;
747 
748 	ASSERT(mutex_owned(rgep->genlock));
749 	/*
750 	 * Wait for posted buffer to be freed...
751 	 */
752 	if (!rgep->rx_bcopy) {
753 		for (i = 0; i < RXBUFF_FREE_LOOP; i++) {
754 			if (rgep->rx_free == RGE_BUF_SLOTS)
755 				break;
756 			drv_usecwait(1000);
757 			RGE_DEBUG(("rge_restart: waiting for rx buf free..."));
758 		}
759 	}
760 	rge_reset(rgep);
761 	rgep->stats.chip_reset++;
762 	if (rgep->rge_mac_state == RGE_MAC_STARTED) {
763 		rge_start(rgep);
764 		rgep->resched_needed = B_TRUE;
765 		(void) ddi_intr_trigger_softint(rgep->resched_hdl, NULL);
766 	}
767 }
768 
769 
770 /*
771  * ========== Nemo-required management entry points ==========
772  */
773 
774 #undef	RGE_DBG
775 #define	RGE_DBG		RGE_DBG_NEMO	/* debug flag for this code	*/
776 
777 /*
778  *	rge_m_stop() -- stop transmitting/receiving
779  */
780 static void
781 rge_m_stop(void *arg)
782 {
783 	rge_t *rgep = arg;		/* private device info	*/
784 	uint32_t i;
785 
786 	/*
787 	 * Just stop processing, then record new MAC state
788 	 */
789 	mutex_enter(rgep->genlock);
790 	rge_stop(rgep);
791 	/*
792 	 * Wait for posted buffer to be freed...
793 	 */
794 	if (!rgep->rx_bcopy) {
795 		for (i = 0; i < RXBUFF_FREE_LOOP; i++) {
796 			if (rgep->rx_free == RGE_BUF_SLOTS)
797 				break;
798 			drv_usecwait(1000);
799 			RGE_DEBUG(("rge_m_stop: waiting for rx buf free..."));
800 		}
801 	}
802 	rgep->rge_mac_state = RGE_MAC_STOPPED;
803 	RGE_DEBUG(("rge_m_stop($%p) done", arg));
804 	mutex_exit(rgep->genlock);
805 }
806 
807 /*
808  *	rge_m_start() -- start transmitting/receiving
809  */
810 static int
811 rge_m_start(void *arg)
812 {
813 	rge_t *rgep = arg;		/* private device info	*/
814 
815 	mutex_enter(rgep->genlock);
816 
817 	/*
818 	 * Clear hw/sw statistics
819 	 */
820 	DMA_ZERO(rgep->dma_area_stats);
821 	bzero(&rgep->stats, sizeof (rge_stats_t));
822 
823 	/*
824 	 * Start processing and record new MAC state
825 	 */
826 	rge_reset(rgep);
827 	rge_start(rgep);
828 	rgep->rge_mac_state = RGE_MAC_STARTED;
829 	RGE_DEBUG(("rge_m_start($%p) done", arg));
830 
831 	mutex_exit(rgep->genlock);
832 
833 	return (0);
834 }
835 
836 /*
837  *	rge_m_unicst_set() -- set the physical network address
838  */
839 static int
840 rge_m_unicst(void *arg, const uint8_t *macaddr)
841 {
842 	rge_t *rgep = arg;		/* private device info	*/
843 
844 	/*
845 	 * Remember the new current address in the driver state
846 	 * Sync the chip's idea of the address too ...
847 	 */
848 	mutex_enter(rgep->genlock);
849 	bcopy(macaddr, rgep->netaddr, ETHERADDRL);
850 	rge_chip_sync(rgep, RGE_SET_MAC);
851 	mutex_exit(rgep->genlock);
852 
853 	return (0);
854 }
855 
856 /*
857  * Compute the index of the required bit in the multicast hash map.
858  * This must mirror the way the hardware actually does it!
859  */
860 static uint32_t
861 rge_hash_index(const uint8_t *mca)
862 {
863 	uint32_t crc = (uint32_t)RGE_HASH_CRC;
864 	uint32_t const POLY = RGE_HASH_POLY;
865 	uint32_t msb;
866 	int bytes;
867 	uchar_t currentbyte;
868 	uint32_t index;
869 	int bit;
870 
871 	for (bytes = 0; bytes < ETHERADDRL; bytes++) {
872 		currentbyte = mca[bytes];
873 		for (bit = 0; bit < 8; bit++) {
874 			msb = crc >> 31;
875 			crc <<= 1;
876 			if (msb ^ (currentbyte & 1))
877 				crc ^= POLY;
878 			currentbyte >>= 1;
879 		}
880 	}
881 	index = crc >> 26;
882 		/* the index value is between 0 and 63(0x3f) */
883 
884 	return (index);
885 }
886 
887 /*
888  *	rge_m_multicst_add() -- enable/disable a multicast address
889  */
890 static int
891 rge_m_multicst(void *arg, boolean_t add, const uint8_t *mca)
892 {
893 	rge_t *rgep = arg;		/* private device info	*/
894 	struct ether_addr *addr;
895 	uint32_t index;
896 	uint32_t reg;
897 	uint8_t *hashp;
898 
899 	mutex_enter(rgep->genlock);
900 	hashp = rgep->mcast_hash;
901 	addr = (struct ether_addr *)mca;
902 	/*
903 	 * Calculate the Multicast address hash index value
904 	 *	Normally, the position of MAR0-MAR7 is
905 	 *	MAR0: offset 0x08, ..., MAR7: offset 0x0F.
906 	 *
907 	 *	For pcie chipset, the position of MAR0-MAR7 is
908 	 *	different from others:
909 	 *	MAR0: offset 0x0F, ..., MAR7: offset 0x08.
910 	 */
911 	index = rge_hash_index(addr->ether_addr_octet);
912 	if (rgep->chipid.is_pcie)
913 		reg = (~(index / RGE_MCAST_NUM)) & 0x7;
914 	else
915 		reg = index / RGE_MCAST_NUM;
916 
917 	if (add) {
918 		if (rgep->mcast_refs[index]++) {
919 			mutex_exit(rgep->genlock);
920 			return (0);
921 		}
922 		hashp[reg] |= 1 << (index % RGE_MCAST_NUM);
923 	} else {
924 		if (--rgep->mcast_refs[index]) {
925 			mutex_exit(rgep->genlock);
926 			return (0);
927 		}
928 		hashp[reg] &= ~ (1 << (index % RGE_MCAST_NUM));
929 	}
930 
931 	/*
932 	 * Set multicast register
933 	 */
934 	rge_chip_sync(rgep, RGE_SET_MUL);
935 
936 	mutex_exit(rgep->genlock);
937 	return (0);
938 }
939 
940 /*
941  * rge_m_promisc() -- set or reset promiscuous mode on the board
942  *
943  *	Program the hardware to enable/disable promiscuous and/or
944  *	receive-all-multicast modes.
945  */
946 static int
947 rge_m_promisc(void *arg, boolean_t on)
948 {
949 	rge_t *rgep = arg;
950 
951 	/*
952 	 * Store MAC layer specified mode and pass to chip layer to update h/w
953 	 */
954 	mutex_enter(rgep->genlock);
955 
956 	if (rgep->promisc == on) {
957 		mutex_exit(rgep->genlock);
958 		return (0);
959 	}
960 	rgep->promisc = on;
961 	rge_chip_sync(rgep, RGE_SET_PROMISC);
962 	RGE_DEBUG(("rge_m_promisc_set($%p) done", arg));
963 	mutex_exit(rgep->genlock);
964 	return (0);
965 }
966 
967 /*
968  * Loopback ioctl code
969  */
970 
971 static lb_property_t loopmodes[] = {
972 	{ normal,	"normal",	RGE_LOOP_NONE		},
973 	{ internal,	"PHY",		RGE_LOOP_INTERNAL_PHY	},
974 	{ internal,	"MAC",		RGE_LOOP_INTERNAL_MAC	}
975 };
976 
977 static enum ioc_reply
978 rge_set_loop_mode(rge_t *rgep, uint32_t mode)
979 {
980 	/*
981 	 * If the mode isn't being changed, there's nothing to do ...
982 	 */
983 	if (mode == rgep->param_loop_mode)
984 		return (IOC_ACK);
985 
986 	/*
987 	 * Validate the requested mode and prepare a suitable message
988 	 * to explain the link down/up cycle that the change will
989 	 * probably induce ...
990 	 */
991 	switch (mode) {
992 	default:
993 		return (IOC_INVAL);
994 
995 	case RGE_LOOP_NONE:
996 	case RGE_LOOP_INTERNAL_PHY:
997 	case RGE_LOOP_INTERNAL_MAC:
998 		break;
999 	}
1000 
1001 	/*
1002 	 * All OK; tell the caller to reprogram
1003 	 * the PHY and/or MAC for the new mode ...
1004 	 */
1005 	rgep->param_loop_mode = mode;
1006 	return (IOC_RESTART_ACK);
1007 }
1008 
1009 static enum ioc_reply
1010 rge_loop_ioctl(rge_t *rgep, queue_t *wq, mblk_t *mp, struct iocblk *iocp)
1011 {
1012 	lb_info_sz_t *lbsp;
1013 	lb_property_t *lbpp;
1014 	uint32_t *lbmp;
1015 	int cmd;
1016 
1017 	_NOTE(ARGUNUSED(wq))
1018 
1019 	/*
1020 	 * Validate format of ioctl
1021 	 */
1022 	if (mp->b_cont == NULL)
1023 		return (IOC_INVAL);
1024 
1025 	cmd = iocp->ioc_cmd;
1026 	switch (cmd) {
1027 	default:
1028 		/* NOTREACHED */
1029 		rge_error(rgep, "rge_loop_ioctl: invalid cmd 0x%x", cmd);
1030 		return (IOC_INVAL);
1031 
1032 	case LB_GET_INFO_SIZE:
1033 		if (iocp->ioc_count != sizeof (lb_info_sz_t))
1034 			return (IOC_INVAL);
1035 		lbsp = (lb_info_sz_t *)mp->b_cont->b_rptr;
1036 		*lbsp = sizeof (loopmodes);
1037 		return (IOC_REPLY);
1038 
1039 	case LB_GET_INFO:
1040 		if (iocp->ioc_count != sizeof (loopmodes))
1041 			return (IOC_INVAL);
1042 		lbpp = (lb_property_t *)mp->b_cont->b_rptr;
1043 		bcopy(loopmodes, lbpp, sizeof (loopmodes));
1044 		return (IOC_REPLY);
1045 
1046 	case LB_GET_MODE:
1047 		if (iocp->ioc_count != sizeof (uint32_t))
1048 			return (IOC_INVAL);
1049 		lbmp = (uint32_t *)mp->b_cont->b_rptr;
1050 		*lbmp = rgep->param_loop_mode;
1051 		return (IOC_REPLY);
1052 
1053 	case LB_SET_MODE:
1054 		if (iocp->ioc_count != sizeof (uint32_t))
1055 			return (IOC_INVAL);
1056 		lbmp = (uint32_t *)mp->b_cont->b_rptr;
1057 		return (rge_set_loop_mode(rgep, *lbmp));
1058 	}
1059 }
1060 
1061 /*
1062  * Specific rge IOCTLs, the MAC layer handles the generic ones.
1063  */
1064 static void
1065 rge_m_ioctl(void *arg, queue_t *wq, mblk_t *mp)
1066 {
1067 	rge_t *rgep = arg;
1068 	struct iocblk *iocp;
1069 	enum ioc_reply status;
1070 	boolean_t need_privilege;
1071 	int err;
1072 	int cmd;
1073 
1074 	/*
1075 	 * Validate the command before bothering with the mutex ...
1076 	 */
1077 	iocp = (struct iocblk *)mp->b_rptr;
1078 	iocp->ioc_error = 0;
1079 	need_privilege = B_TRUE;
1080 	cmd = iocp->ioc_cmd;
1081 	switch (cmd) {
1082 	default:
1083 		miocnak(wq, mp, 0, EINVAL);
1084 		return;
1085 
1086 	case RGE_MII_READ:
1087 	case RGE_MII_WRITE:
1088 	case RGE_DIAG:
1089 	case RGE_PEEK:
1090 	case RGE_POKE:
1091 	case RGE_PHY_RESET:
1092 	case RGE_SOFT_RESET:
1093 	case RGE_HARD_RESET:
1094 		break;
1095 
1096 	case LB_GET_INFO_SIZE:
1097 	case LB_GET_INFO:
1098 	case LB_GET_MODE:
1099 		need_privilege = B_FALSE;
1100 		/* FALLTHRU */
1101 	case LB_SET_MODE:
1102 		break;
1103 
1104 	case ND_GET:
1105 		need_privilege = B_FALSE;
1106 		/* FALLTHRU */
1107 	case ND_SET:
1108 		break;
1109 	}
1110 
1111 	if (need_privilege) {
1112 		/*
1113 		 * Check for specific net_config privilege
1114 		 */
1115 		err = secpolicy_net_config(iocp->ioc_cr, B_FALSE);
1116 		if (err != 0) {
1117 			miocnak(wq, mp, 0, err);
1118 			return;
1119 		}
1120 	}
1121 
1122 	mutex_enter(rgep->genlock);
1123 
1124 	switch (cmd) {
1125 	default:
1126 		_NOTE(NOTREACHED)
1127 		status = IOC_INVAL;
1128 		break;
1129 
1130 	case RGE_MII_READ:
1131 	case RGE_MII_WRITE:
1132 	case RGE_DIAG:
1133 	case RGE_PEEK:
1134 	case RGE_POKE:
1135 	case RGE_PHY_RESET:
1136 	case RGE_SOFT_RESET:
1137 	case RGE_HARD_RESET:
1138 		status = rge_chip_ioctl(rgep, wq, mp, iocp);
1139 		break;
1140 
1141 	case LB_GET_INFO_SIZE:
1142 	case LB_GET_INFO:
1143 	case LB_GET_MODE:
1144 	case LB_SET_MODE:
1145 		status = rge_loop_ioctl(rgep, wq, mp, iocp);
1146 		break;
1147 
1148 	case ND_GET:
1149 	case ND_SET:
1150 		status = rge_nd_ioctl(rgep, wq, mp, iocp);
1151 		break;
1152 	}
1153 
1154 	/*
1155 	 * Do we need to reprogram the PHY and/or the MAC?
1156 	 * Do it now, while we still have the mutex.
1157 	 *
1158 	 * Note: update the PHY first, 'cos it controls the
1159 	 * speed/duplex parameters that the MAC code uses.
1160 	 */
1161 	switch (status) {
1162 	case IOC_RESTART_REPLY:
1163 	case IOC_RESTART_ACK:
1164 		rge_phy_update(rgep);
1165 		break;
1166 	}
1167 
1168 	mutex_exit(rgep->genlock);
1169 
1170 	/*
1171 	 * Finally, decide how to reply
1172 	 */
1173 	switch (status) {
1174 	default:
1175 	case IOC_INVAL:
1176 		/*
1177 		 * Error, reply with a NAK and EINVAL or the specified error
1178 		 */
1179 		miocnak(wq, mp, 0, iocp->ioc_error == 0 ?
1180 		    EINVAL : iocp->ioc_error);
1181 		break;
1182 
1183 	case IOC_DONE:
1184 		/*
1185 		 * OK, reply already sent
1186 		 */
1187 		break;
1188 
1189 	case IOC_RESTART_ACK:
1190 	case IOC_ACK:
1191 		/*
1192 		 * OK, reply with an ACK
1193 		 */
1194 		miocack(wq, mp, 0, 0);
1195 		break;
1196 
1197 	case IOC_RESTART_REPLY:
1198 	case IOC_REPLY:
1199 		/*
1200 		 * OK, send prepared reply as ACK or NAK
1201 		 */
1202 		mp->b_datap->db_type = iocp->ioc_error == 0 ?
1203 		    M_IOCACK : M_IOCNAK;
1204 		qreply(wq, mp);
1205 		break;
1206 	}
1207 }
1208 
1209 static void
1210 rge_m_resources(void *arg)
1211 {
1212 	rge_t *rgep = arg;
1213 	mac_rx_fifo_t mrf;
1214 
1215 	mutex_enter(rgep->genlock);
1216 
1217 	/*
1218 	 * Register Rx rings as resources and save mac
1219 	 * resource id for future reference
1220 	 */
1221 	mrf.mrf_type = MAC_RX_FIFO;
1222 	mrf.mrf_blank = rge_chip_blank;
1223 	mrf.mrf_arg = (void *)rgep;
1224 	mrf.mrf_normal_blank_time = RGE_RX_INT_TIME;
1225 	mrf.mrf_normal_pkt_count = RGE_RX_INT_PKTS;
1226 	rgep->handle = mac_resource_add(rgep->mh, (mac_resource_t *)&mrf);
1227 
1228 	mutex_exit(rgep->genlock);
1229 }
1230 
1231 /* ARGSUSED */
1232 static boolean_t
1233 rge_m_getcapab(void *arg, mac_capab_t cap, void *cap_data)
1234 {
1235 	switch (cap) {
1236 	case MAC_CAPAB_HCKSUM: {
1237 		uint32_t *hcksum_txflags = cap_data;
1238 		*hcksum_txflags = HCKSUM_INET_FULL_V4 | HCKSUM_IPHDRCKSUM;
1239 		break;
1240 	}
1241 	case MAC_CAPAB_POLL:
1242 		/*
1243 		 * There's nothing for us to fill in, simply returning
1244 		 * B_TRUE stating that we support polling is sufficient.
1245 		 */
1246 		break;
1247 	default:
1248 		return (B_FALSE);
1249 	}
1250 	return (B_TRUE);
1251 }
1252 
1253 /*
1254  * ============ Init MSI/Fixed Interrupt routines ==============
1255  */
1256 
1257 /*
1258  * rge_add_intrs:
1259  *
1260  * Register FIXED or MSI interrupts.
1261  */
1262 static int
1263 rge_add_intrs(rge_t *rgep, int intr_type)
1264 {
1265 	dev_info_t *dip = rgep->devinfo;
1266 	int avail;
1267 	int actual;
1268 	int intr_size;
1269 	int count;
1270 	int i, j;
1271 	int ret;
1272 
1273 	/* Get number of interrupts */
1274 	ret = ddi_intr_get_nintrs(dip, intr_type, &count);
1275 	if ((ret != DDI_SUCCESS) || (count == 0)) {
1276 		rge_error(rgep, "ddi_intr_get_nintrs() failure, ret: %d, "
1277 		    "count: %d", ret, count);
1278 		return (DDI_FAILURE);
1279 	}
1280 
1281 	/* Get number of available interrupts */
1282 	ret = ddi_intr_get_navail(dip, intr_type, &avail);
1283 	if ((ret != DDI_SUCCESS) || (avail == 0)) {
1284 		rge_error(rgep, "ddi_intr_get_navail() failure, "
1285 		    "ret: %d, avail: %d\n", ret, avail);
1286 		return (DDI_FAILURE);
1287 	}
1288 
1289 	/* Allocate an array of interrupt handles */
1290 	intr_size = count * sizeof (ddi_intr_handle_t);
1291 	rgep->htable = kmem_alloc(intr_size, KM_SLEEP);
1292 	rgep->intr_rqst = count;
1293 
1294 	/* Call ddi_intr_alloc() */
1295 	ret = ddi_intr_alloc(dip, rgep->htable, intr_type, 0,
1296 	    count, &actual, DDI_INTR_ALLOC_NORMAL);
1297 	if (ret != DDI_SUCCESS || actual == 0) {
1298 		rge_error(rgep, "ddi_intr_alloc() failed %d\n", ret);
1299 		kmem_free(rgep->htable, intr_size);
1300 		return (DDI_FAILURE);
1301 	}
1302 	if (actual < count) {
1303 		rge_log(rgep, "ddi_intr_alloc() Requested: %d, Received: %d\n",
1304 		    count, actual);
1305 	}
1306 	rgep->intr_cnt = actual;
1307 
1308 	/*
1309 	 * Get priority for first msi, assume remaining are all the same
1310 	 */
1311 	if ((ret = ddi_intr_get_pri(rgep->htable[0], &rgep->intr_pri)) !=
1312 	    DDI_SUCCESS) {
1313 		rge_error(rgep, "ddi_intr_get_pri() failed %d\n", ret);
1314 		/* Free already allocated intr */
1315 		for (i = 0; i < actual; i++) {
1316 			(void) ddi_intr_free(rgep->htable[i]);
1317 		}
1318 		kmem_free(rgep->htable, intr_size);
1319 		return (DDI_FAILURE);
1320 	}
1321 
1322 	/* Test for high level mutex */
1323 	if (rgep->intr_pri >= ddi_intr_get_hilevel_pri()) {
1324 		rge_error(rgep, "rge_add_intrs:"
1325 		    "Hi level interrupt not supported");
1326 		for (i = 0; i < actual; i++)
1327 			(void) ddi_intr_free(rgep->htable[i]);
1328 		kmem_free(rgep->htable, intr_size);
1329 		return (DDI_FAILURE);
1330 	}
1331 
1332 	/* Call ddi_intr_add_handler() */
1333 	for (i = 0; i < actual; i++) {
1334 		if ((ret = ddi_intr_add_handler(rgep->htable[i], rge_intr,
1335 		    (caddr_t)rgep, (caddr_t)(uintptr_t)i)) != DDI_SUCCESS) {
1336 			rge_error(rgep, "ddi_intr_add_handler() "
1337 			    "failed %d\n", ret);
1338 			/* Remove already added intr */
1339 			for (j = 0; j < i; j++)
1340 				(void) ddi_intr_remove_handler(rgep->htable[j]);
1341 			/* Free already allocated intr */
1342 			for (i = 0; i < actual; i++) {
1343 				(void) ddi_intr_free(rgep->htable[i]);
1344 			}
1345 			kmem_free(rgep->htable, intr_size);
1346 			return (DDI_FAILURE);
1347 		}
1348 	}
1349 
1350 	if ((ret = ddi_intr_get_cap(rgep->htable[0], &rgep->intr_cap))
1351 	    != DDI_SUCCESS) {
1352 		rge_error(rgep, "ddi_intr_get_cap() failed %d\n", ret);
1353 		for (i = 0; i < actual; i++) {
1354 			(void) ddi_intr_remove_handler(rgep->htable[i]);
1355 			(void) ddi_intr_free(rgep->htable[i]);
1356 		}
1357 		kmem_free(rgep->htable, intr_size);
1358 		return (DDI_FAILURE);
1359 	}
1360 
1361 	return (DDI_SUCCESS);
1362 }
1363 
1364 /*
1365  * rge_rem_intrs:
1366  *
1367  * Unregister FIXED or MSI interrupts
1368  */
1369 static void
1370 rge_rem_intrs(rge_t *rgep)
1371 {
1372 	int i;
1373 
1374 	/* Disable all interrupts */
1375 	if (rgep->intr_cap & DDI_INTR_FLAG_BLOCK) {
1376 		/* Call ddi_intr_block_disable() */
1377 		(void) ddi_intr_block_disable(rgep->htable, rgep->intr_cnt);
1378 	} else {
1379 		for (i = 0; i < rgep->intr_cnt; i++) {
1380 			(void) ddi_intr_disable(rgep->htable[i]);
1381 		}
1382 	}
1383 
1384 	/* Call ddi_intr_remove_handler() */
1385 	for (i = 0; i < rgep->intr_cnt; i++) {
1386 		(void) ddi_intr_remove_handler(rgep->htable[i]);
1387 		(void) ddi_intr_free(rgep->htable[i]);
1388 	}
1389 
1390 	kmem_free(rgep->htable, rgep->intr_rqst * sizeof (ddi_intr_handle_t));
1391 }
1392 
1393 /*
1394  * ========== Per-instance setup/teardown code ==========
1395  */
1396 
1397 #undef	RGE_DBG
1398 #define	RGE_DBG		RGE_DBG_INIT	/* debug flag for this code	*/
1399 
1400 static void
1401 rge_unattach(rge_t *rgep)
1402 {
1403 	/*
1404 	 * Flag that no more activity may be initiated
1405 	 */
1406 	rgep->progress &= ~PROGRESS_READY;
1407 	rgep->rge_mac_state = RGE_MAC_UNATTACH;
1408 
1409 	/*
1410 	 * Quiesce the PHY and MAC (leave it reset but still powered).
1411 	 * Clean up and free all RGE data structures
1412 	 */
1413 	if (rgep->periodic_id != NULL) {
1414 		ddi_periodic_delete(rgep->periodic_id);
1415 		rgep->periodic_id = NULL;
1416 	}
1417 
1418 	if (rgep->progress & PROGRESS_KSTATS)
1419 		rge_fini_kstats(rgep);
1420 
1421 	if (rgep->progress & PROGRESS_PHY)
1422 		(void) rge_phy_reset(rgep);
1423 
1424 	if (rgep->progress & PROGRESS_INIT) {
1425 		mutex_enter(rgep->genlock);
1426 		(void) rge_chip_reset(rgep);
1427 		mutex_exit(rgep->genlock);
1428 		rge_fini_rings(rgep);
1429 	}
1430 
1431 	if (rgep->progress & PROGRESS_INTR) {
1432 		rge_rem_intrs(rgep);
1433 		mutex_destroy(rgep->rc_lock);
1434 		mutex_destroy(rgep->rx_lock);
1435 		mutex_destroy(rgep->tc_lock);
1436 		mutex_destroy(rgep->tx_lock);
1437 		rw_destroy(rgep->errlock);
1438 		mutex_destroy(rgep->genlock);
1439 	}
1440 
1441 	if (rgep->progress & PROGRESS_FACTOTUM)
1442 		(void) ddi_intr_remove_softint(rgep->factotum_hdl);
1443 
1444 	if (rgep->progress & PROGRESS_RESCHED)
1445 		(void) ddi_intr_remove_softint(rgep->resched_hdl);
1446 
1447 	if (rgep->progress & PROGRESS_NDD)
1448 		rge_nd_cleanup(rgep);
1449 
1450 	rge_free_bufs(rgep);
1451 
1452 	if (rgep->progress & PROGRESS_REGS)
1453 		ddi_regs_map_free(&rgep->io_handle);
1454 
1455 	if (rgep->progress & PROGRESS_CFG)
1456 		pci_config_teardown(&rgep->cfg_handle);
1457 
1458 	ddi_remove_minor_node(rgep->devinfo, NULL);
1459 	kmem_free(rgep, sizeof (*rgep));
1460 }
1461 
1462 static int
1463 rge_resume(dev_info_t *devinfo)
1464 {
1465 	rge_t *rgep;			/* Our private data	*/
1466 	chip_id_t *cidp;
1467 	chip_id_t chipid;
1468 
1469 	rgep = ddi_get_driver_private(devinfo);
1470 	if (rgep == NULL)
1471 		return (DDI_FAILURE);
1472 
1473 	/*
1474 	 * Refuse to resume if the data structures aren't consistent
1475 	 */
1476 	if (rgep->devinfo != devinfo)
1477 		return (DDI_FAILURE);
1478 
1479 	/*
1480 	 * Read chip ID & set up config space command register(s)
1481 	 * Refuse to resume if the chip has changed its identity!
1482 	 */
1483 	cidp = &rgep->chipid;
1484 	rge_chip_cfg_init(rgep, &chipid);
1485 	if (chipid.vendor != cidp->vendor)
1486 		return (DDI_FAILURE);
1487 	if (chipid.device != cidp->device)
1488 		return (DDI_FAILURE);
1489 	if (chipid.revision != cidp->revision)
1490 		return (DDI_FAILURE);
1491 
1492 	/*
1493 	 * All OK, reinitialise h/w & kick off NEMO scheduling
1494 	 */
1495 	mutex_enter(rgep->genlock);
1496 	rge_restart(rgep);
1497 	mutex_exit(rgep->genlock);
1498 	return (DDI_SUCCESS);
1499 }
1500 
1501 
1502 /*
1503  * attach(9E) -- Attach a device to the system
1504  *
1505  * Called once for each board successfully probed.
1506  */
1507 static int
1508 rge_attach(dev_info_t *devinfo, ddi_attach_cmd_t cmd)
1509 {
1510 	rge_t *rgep;			/* Our private data	*/
1511 	mac_register_t *macp;
1512 	chip_id_t *cidp;
1513 	int intr_types;
1514 	caddr_t regs;
1515 	int instance;
1516 	int i;
1517 	int err;
1518 
1519 	/*
1520 	 * we don't support high level interrupts in the driver
1521 	 */
1522 	if (ddi_intr_hilevel(devinfo, 0) != 0) {
1523 		cmn_err(CE_WARN,
1524 		    "rge_attach -- unsupported high level interrupt");
1525 		return (DDI_FAILURE);
1526 	}
1527 
1528 	instance = ddi_get_instance(devinfo);
1529 	RGE_GTRACE(("rge_attach($%p, %d) instance %d",
1530 	    (void *)devinfo, cmd, instance));
1531 	RGE_BRKPT(NULL, "rge_attach");
1532 
1533 	switch (cmd) {
1534 	default:
1535 		return (DDI_FAILURE);
1536 
1537 	case DDI_RESUME:
1538 		return (rge_resume(devinfo));
1539 
1540 	case DDI_ATTACH:
1541 		break;
1542 	}
1543 
1544 	rgep = kmem_zalloc(sizeof (*rgep), KM_SLEEP);
1545 	ddi_set_driver_private(devinfo, rgep);
1546 	rgep->devinfo = devinfo;
1547 
1548 	/*
1549 	 * Initialize more fields in RGE private data
1550 	 */
1551 	rgep->rge_mac_state = RGE_MAC_ATTACH;
1552 	rgep->debug = ddi_prop_get_int(DDI_DEV_T_ANY, devinfo,
1553 	    DDI_PROP_DONTPASS, debug_propname, rge_debug);
1554 	rgep->default_mtu = ddi_prop_get_int(DDI_DEV_T_ANY, devinfo,
1555 	    DDI_PROP_DONTPASS, mtu_propname, ETHERMTU);
1556 	rgep->msi_enable = ddi_prop_get_int(DDI_DEV_T_ANY, devinfo,
1557 	    DDI_PROP_DONTPASS, msi_propname, B_TRUE);
1558 	(void) snprintf(rgep->ifname, sizeof (rgep->ifname), "%s%d",
1559 	    RGE_DRIVER_NAME, instance);
1560 
1561 	/*
1562 	 * Map config space registers
1563 	 * Read chip ID & set up config space command register(s)
1564 	 *
1565 	 * Note: this leaves the chip accessible by Memory Space
1566 	 * accesses, but with interrupts and Bus Mastering off.
1567 	 * This should ensure that nothing untoward will happen
1568 	 * if it has been left active by the (net-)bootloader.
1569 	 * We'll re-enable Bus Mastering once we've reset the chip,
1570 	 * and allow interrupts only when everything else is set up.
1571 	 */
1572 	err = pci_config_setup(devinfo, &rgep->cfg_handle);
1573 	if (err != DDI_SUCCESS) {
1574 		rge_problem(rgep, "pci_config_setup() failed");
1575 		goto attach_fail;
1576 	}
1577 	rgep->progress |= PROGRESS_CFG;
1578 	cidp = &rgep->chipid;
1579 	bzero(cidp, sizeof (*cidp));
1580 	rge_chip_cfg_init(rgep, cidp);
1581 
1582 	/*
1583 	 * Map operating registers
1584 	 */
1585 	err = ddi_regs_map_setup(devinfo, 1, &regs,
1586 	    0, 0, &rge_reg_accattr, &rgep->io_handle);
1587 	if (err != DDI_SUCCESS) {
1588 		rge_problem(rgep, "ddi_regs_map_setup() failed");
1589 		goto attach_fail;
1590 	}
1591 	rgep->io_regs = regs;
1592 	rgep->progress |= PROGRESS_REGS;
1593 
1594 	/*
1595 	 * Characterise the device, so we know its requirements.
1596 	 * Then allocate the appropriate TX and RX descriptors & buffers.
1597 	 */
1598 	rge_chip_ident(rgep);
1599 	err = rge_alloc_bufs(rgep);
1600 	if (err != DDI_SUCCESS) {
1601 		rge_problem(rgep, "DMA buffer allocation failed");
1602 		goto attach_fail;
1603 	}
1604 
1605 	/*
1606 	 * Register NDD-tweakable parameters
1607 	 */
1608 	if (rge_nd_init(rgep)) {
1609 		rge_problem(rgep, "rge_nd_init() failed");
1610 		goto attach_fail;
1611 	}
1612 	rgep->progress |= PROGRESS_NDD;
1613 
1614 	/*
1615 	 * Add the softint handlers:
1616 	 *
1617 	 * Both of these handlers are used to avoid restrictions on the
1618 	 * context and/or mutexes required for some operations.  In
1619 	 * particular, the hardware interrupt handler and its subfunctions
1620 	 * can detect a number of conditions that we don't want to handle
1621 	 * in that context or with that set of mutexes held.  So, these
1622 	 * softints are triggered instead:
1623 	 *
1624 	 * the <resched> softint is triggered if if we have previously
1625 	 * had to refuse to send a packet because of resource shortage
1626 	 * (we've run out of transmit buffers), but the send completion
1627 	 * interrupt handler has now detected that more buffers have
1628 	 * become available.
1629 	 *
1630 	 * the <factotum> is triggered if the h/w interrupt handler
1631 	 * sees the <link state changed> or <error> bits in the status
1632 	 * block.  It's also triggered periodically to poll the link
1633 	 * state, just in case we aren't getting link status change
1634 	 * interrupts ...
1635 	 */
1636 	err = ddi_intr_add_softint(devinfo, &rgep->resched_hdl,
1637 	    DDI_INTR_SOFTPRI_MIN, rge_reschedule, (caddr_t)rgep);
1638 	if (err != DDI_SUCCESS) {
1639 		rge_problem(rgep, "ddi_intr_add_softint() failed");
1640 		goto attach_fail;
1641 	}
1642 	rgep->progress |= PROGRESS_RESCHED;
1643 	err = ddi_intr_add_softint(devinfo, &rgep->factotum_hdl,
1644 	    DDI_INTR_SOFTPRI_MIN, rge_chip_factotum, (caddr_t)rgep);
1645 	if (err != DDI_SUCCESS) {
1646 		rge_problem(rgep, "ddi_intr_add_softint() failed");
1647 		goto attach_fail;
1648 	}
1649 	rgep->progress |= PROGRESS_FACTOTUM;
1650 
1651 	/*
1652 	 * Get supported interrupt types
1653 	 */
1654 	if (ddi_intr_get_supported_types(devinfo, &intr_types)
1655 	    != DDI_SUCCESS) {
1656 		rge_error(rgep, "ddi_intr_get_supported_types failed\n");
1657 		goto attach_fail;
1658 	}
1659 
1660 	/*
1661 	 * Add the h/w interrupt handler and initialise mutexes
1662 	 */
1663 	if ((intr_types & DDI_INTR_TYPE_MSI) && rgep->msi_enable) {
1664 		if (rge_add_intrs(rgep, DDI_INTR_TYPE_MSI) != DDI_SUCCESS) {
1665 			rge_error(rgep, "MSI registration failed, "
1666 			    "trying FIXED interrupt type\n");
1667 		} else {
1668 			rge_log(rgep, "Using MSI interrupt type\n");
1669 			rgep->intr_type = DDI_INTR_TYPE_MSI;
1670 			rgep->progress |= PROGRESS_INTR;
1671 		}
1672 	}
1673 	if (!(rgep->progress & PROGRESS_INTR) &&
1674 	    (intr_types & DDI_INTR_TYPE_FIXED)) {
1675 		if (rge_add_intrs(rgep, DDI_INTR_TYPE_FIXED) != DDI_SUCCESS) {
1676 			rge_error(rgep, "FIXED interrupt "
1677 			    "registration failed\n");
1678 			goto attach_fail;
1679 		}
1680 		rge_log(rgep, "Using FIXED interrupt type\n");
1681 		rgep->intr_type = DDI_INTR_TYPE_FIXED;
1682 		rgep->progress |= PROGRESS_INTR;
1683 	}
1684 	if (!(rgep->progress & PROGRESS_INTR)) {
1685 		rge_error(rgep, "No interrupts registered\n");
1686 		goto attach_fail;
1687 	}
1688 	mutex_init(rgep->genlock, NULL, MUTEX_DRIVER,
1689 	    DDI_INTR_PRI(rgep->intr_pri));
1690 	rw_init(rgep->errlock, NULL, RW_DRIVER,
1691 	    DDI_INTR_PRI(rgep->intr_pri));
1692 	mutex_init(rgep->tx_lock, NULL, MUTEX_DRIVER,
1693 	    DDI_INTR_PRI(rgep->intr_pri));
1694 	mutex_init(rgep->tc_lock, NULL, MUTEX_DRIVER,
1695 	    DDI_INTR_PRI(rgep->intr_pri));
1696 	mutex_init(rgep->rx_lock, NULL, MUTEX_DRIVER,
1697 	    DDI_INTR_PRI(rgep->intr_pri));
1698 	mutex_init(rgep->rc_lock, NULL, MUTEX_DRIVER,
1699 	    DDI_INTR_PRI(rgep->intr_pri));
1700 
1701 	/*
1702 	 * Initialize rings
1703 	 */
1704 	err = rge_init_rings(rgep);
1705 	if (err != DDI_SUCCESS) {
1706 		rge_problem(rgep, "rge_init_rings() failed");
1707 		goto attach_fail;
1708 	}
1709 	rgep->progress |= PROGRESS_INIT;
1710 
1711 	/*
1712 	 * Now that mutex locks are initialized, enable interrupts.
1713 	 */
1714 	if (rgep->intr_cap & DDI_INTR_FLAG_BLOCK) {
1715 		/* Call ddi_intr_block_enable() for MSI interrupts */
1716 		(void) ddi_intr_block_enable(rgep->htable, rgep->intr_cnt);
1717 	} else {
1718 		/* Call ddi_intr_enable for MSI or FIXED interrupts */
1719 		for (i = 0; i < rgep->intr_cnt; i++) {
1720 			(void) ddi_intr_enable(rgep->htable[i]);
1721 		}
1722 	}
1723 
1724 	/*
1725 	 * Initialise link state variables
1726 	 * Stop, reset & reinitialise the chip.
1727 	 * Initialise the (internal) PHY.
1728 	 */
1729 	rgep->param_link_up = LINK_STATE_UNKNOWN;
1730 
1731 	/*
1732 	 * Reset chip & rings to initial state; also reset address
1733 	 * filtering, promiscuity, loopback mode.
1734 	 */
1735 	mutex_enter(rgep->genlock);
1736 	(void) rge_chip_reset(rgep);
1737 	rge_chip_sync(rgep, RGE_GET_MAC);
1738 	bzero(rgep->mcast_hash, sizeof (rgep->mcast_hash));
1739 	bzero(rgep->mcast_refs, sizeof (rgep->mcast_refs));
1740 	rgep->promisc = B_FALSE;
1741 	rgep->param_loop_mode = RGE_LOOP_NONE;
1742 	mutex_exit(rgep->genlock);
1743 	rge_phy_init(rgep);
1744 	rgep->progress |= PROGRESS_PHY;
1745 
1746 	/*
1747 	 * Create & initialise named kstats
1748 	 */
1749 	rge_init_kstats(rgep, instance);
1750 	rgep->progress |= PROGRESS_KSTATS;
1751 
1752 	if ((macp = mac_alloc(MAC_VERSION)) == NULL)
1753 		goto attach_fail;
1754 	macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
1755 	macp->m_driver = rgep;
1756 	macp->m_dip = devinfo;
1757 	macp->m_src_addr = rgep->netaddr;
1758 	macp->m_callbacks = &rge_m_callbacks;
1759 	macp->m_min_sdu = 0;
1760 	macp->m_max_sdu = rgep->default_mtu;
1761 	macp->m_margin = VLAN_TAGSZ;
1762 
1763 	/*
1764 	 * Finally, we're ready to register ourselves with the MAC layer
1765 	 * interface; if this succeeds, we're all ready to start()
1766 	 */
1767 	err = mac_register(macp, &rgep->mh);
1768 	mac_free(macp);
1769 	if (err != 0)
1770 		goto attach_fail;
1771 
1772 	/*
1773 	 * Register a periodical handler.
1774 	 * reg_chip_cyclic() is invoked in kernel context.
1775 	 */
1776 	rgep->periodic_id = ddi_periodic_add(rge_chip_cyclic, rgep,
1777 	    RGE_CYCLIC_PERIOD, DDI_IPL_0);
1778 
1779 	rgep->progress |= PROGRESS_READY;
1780 	return (DDI_SUCCESS);
1781 
1782 attach_fail:
1783 	rge_unattach(rgep);
1784 	return (DDI_FAILURE);
1785 }
1786 
1787 /*
1788  *	rge_suspend() -- suspend transmit/receive for powerdown
1789  */
1790 static int
1791 rge_suspend(rge_t *rgep)
1792 {
1793 	/*
1794 	 * Stop processing and idle (powerdown) the PHY ...
1795 	 */
1796 	mutex_enter(rgep->genlock);
1797 	rge_stop(rgep);
1798 	mutex_exit(rgep->genlock);
1799 
1800 	return (DDI_SUCCESS);
1801 }
1802 
1803 /*
1804  * detach(9E) -- Detach a device from the system
1805  */
1806 static int
1807 rge_detach(dev_info_t *devinfo, ddi_detach_cmd_t cmd)
1808 {
1809 	rge_t *rgep;
1810 
1811 	RGE_GTRACE(("rge_detach($%p, %d)", (void *)devinfo, cmd));
1812 
1813 	rgep = ddi_get_driver_private(devinfo);
1814 
1815 	switch (cmd) {
1816 	default:
1817 		return (DDI_FAILURE);
1818 
1819 	case DDI_SUSPEND:
1820 		return (rge_suspend(rgep));
1821 
1822 	case DDI_DETACH:
1823 		break;
1824 	}
1825 
1826 	/*
1827 	 * If there is any posted buffer, the driver should reject to be
1828 	 * detached. Need notice upper layer to release them.
1829 	 */
1830 	if (!(rgep->chip_flags & CHIP_FLAG_FORCE_BCOPY) &&
1831 	    rgep->rx_free != RGE_BUF_SLOTS)
1832 		return (DDI_FAILURE);
1833 
1834 	/*
1835 	 * Unregister from the MAC layer subsystem.  This can fail, in
1836 	 * particular if there are DLPI style-2 streams still open -
1837 	 * in which case we just return failure without shutting
1838 	 * down chip operations.
1839 	 */
1840 	if (mac_unregister(rgep->mh) != 0)
1841 		return (DDI_FAILURE);
1842 
1843 	/*
1844 	 * All activity stopped, so we can clean up & exit
1845 	 */
1846 	rge_unattach(rgep);
1847 	return (DDI_SUCCESS);
1848 }
1849 
1850 
1851 /*
1852  * ========== Module Loading Data & Entry Points ==========
1853  */
1854 
1855 #undef	RGE_DBG
1856 #define	RGE_DBG		RGE_DBG_INIT	/* debug flag for this code	*/
1857 DDI_DEFINE_STREAM_OPS(rge_dev_ops, nulldev, nulldev, rge_attach, rge_detach,
1858     nodev, NULL, D_MP, NULL);
1859 
1860 static struct modldrv rge_modldrv = {
1861 	&mod_driverops,		/* Type of module.  This one is a driver */
1862 	rge_ident,		/* short description */
1863 	&rge_dev_ops		/* driver specific ops */
1864 };
1865 
1866 static struct modlinkage modlinkage = {
1867 	MODREV_1, (void *)&rge_modldrv, NULL
1868 };
1869 
1870 
1871 int
1872 _info(struct modinfo *modinfop)
1873 {
1874 	return (mod_info(&modlinkage, modinfop));
1875 }
1876 
1877 int
1878 _init(void)
1879 {
1880 	int status;
1881 
1882 	mac_init_ops(&rge_dev_ops, "rge");
1883 	status = mod_install(&modlinkage);
1884 	if (status == DDI_SUCCESS)
1885 		mutex_init(rge_log_mutex, NULL, MUTEX_DRIVER, NULL);
1886 	else
1887 		mac_fini_ops(&rge_dev_ops);
1888 
1889 	return (status);
1890 }
1891 
1892 int
1893 _fini(void)
1894 {
1895 	int status;
1896 
1897 	status = mod_remove(&modlinkage);
1898 	if (status == DDI_SUCCESS) {
1899 		mac_fini_ops(&rge_dev_ops);
1900 		mutex_destroy(rge_log_mutex);
1901 	}
1902 	return (status);
1903 }
1904