xref: /titanic_52/usr/src/uts/common/io/rge/rge_main.c (revision a83cadce5d3331b64803bfc641036cec23602c74)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include "rge.h"
29 
30 /*
31  * This is the string displayed by modinfo, etc.
32  * Make sure you keep the version ID up to date!
33  */
34 static char rge_ident[] = "Realtek 1Gb Ethernet v%I%";
35 
36 /*
37  * Used for buffers allocated by ddi_dma_mem_alloc()
38  */
39 static ddi_dma_attr_t dma_attr_buf = {
40 	DMA_ATTR_V0,		/* dma_attr version */
41 	(uint32_t)0,		/* dma_attr_addr_lo */
42 	(uint32_t)0xFFFFFFFF,	/* dma_attr_addr_hi */
43 	(uint32_t)0xFFFFFFFF,	/* dma_attr_count_max */
44 	(uint32_t)16,		/* dma_attr_align */
45 	0xFFFFFFFF,		/* dma_attr_burstsizes */
46 	1,			/* dma_attr_minxfer */
47 	(uint32_t)0xFFFFFFFF,	/* dma_attr_maxxfer */
48 	(uint32_t)0xFFFFFFFF,	/* dma_attr_seg */
49 	1,			/* dma_attr_sgllen */
50 	1,			/* dma_attr_granular */
51 	0,			/* dma_attr_flags */
52 };
53 
54 /*
55  * Used for BDs allocated by ddi_dma_mem_alloc()
56  */
57 static ddi_dma_attr_t dma_attr_desc = {
58 	DMA_ATTR_V0,		/* dma_attr version */
59 	(uint32_t)0,		/* dma_attr_addr_lo */
60 	(uint32_t)0xFFFFFFFF,	/* dma_attr_addr_hi */
61 	(uint32_t)0xFFFFFFFF,	/* dma_attr_count_max */
62 	(uint32_t)256,		/* dma_attr_align */
63 	0xFFFFFFFF,		/* dma_attr_burstsizes */
64 	1,			/* dma_attr_minxfer */
65 	(uint32_t)0xFFFFFFFF,	/* dma_attr_maxxfer */
66 	(uint32_t)0xFFFFFFFF,	/* dma_attr_seg */
67 	1,			/* dma_attr_sgllen */
68 	1,			/* dma_attr_granular */
69 	0,			/* dma_attr_flags */
70 };
71 
72 /*
73  * PIO access attributes for registers
74  */
75 static ddi_device_acc_attr_t rge_reg_accattr = {
76 	DDI_DEVICE_ATTR_V0,
77 	DDI_STRUCTURE_LE_ACC,
78 	DDI_STRICTORDER_ACC,
79 	DDI_DEFAULT_ACC
80 };
81 
82 /*
83  * DMA access attributes for descriptors
84  */
85 static ddi_device_acc_attr_t rge_desc_accattr = {
86 	DDI_DEVICE_ATTR_V0,
87 	DDI_NEVERSWAP_ACC,
88 	DDI_STRICTORDER_ACC,
89 	DDI_DEFAULT_ACC
90 };
91 
92 /*
93  * DMA access attributes for data
94  */
95 static ddi_device_acc_attr_t rge_buf_accattr = {
96 	DDI_DEVICE_ATTR_V0,
97 	DDI_NEVERSWAP_ACC,
98 	DDI_STRICTORDER_ACC,
99 	DDI_DEFAULT_ACC
100 };
101 
102 /*
103  * Property names
104  */
105 static char debug_propname[] = "rge_debug_flags";
106 static char mtu_propname[] = "default_mtu";
107 static char msi_propname[] = "msi_enable";
108 
109 static int		rge_m_start(void *);
110 static void		rge_m_stop(void *);
111 static int		rge_m_promisc(void *, boolean_t);
112 static int		rge_m_multicst(void *, boolean_t, const uint8_t *);
113 static int		rge_m_unicst(void *, const uint8_t *);
114 static void		rge_m_resources(void *);
115 static void		rge_m_ioctl(void *, queue_t *, mblk_t *);
116 static boolean_t	rge_m_getcapab(void *, mac_capab_t, void *);
117 
118 #define	RGE_M_CALLBACK_FLAGS	(MC_RESOURCES | MC_IOCTL | MC_GETCAPAB)
119 
120 static mac_callbacks_t rge_m_callbacks = {
121 	RGE_M_CALLBACK_FLAGS,
122 	rge_m_stat,
123 	rge_m_start,
124 	rge_m_stop,
125 	rge_m_promisc,
126 	rge_m_multicst,
127 	rge_m_unicst,
128 	rge_m_tx,
129 	rge_m_resources,
130 	rge_m_ioctl,
131 	rge_m_getcapab
132 };
133 
134 /*
135  * Allocate an area of memory and a DMA handle for accessing it
136  */
137 static int
138 rge_alloc_dma_mem(rge_t *rgep, size_t memsize, ddi_dma_attr_t *dma_attr_p,
139 	ddi_device_acc_attr_t *acc_attr_p, uint_t dma_flags, dma_area_t *dma_p)
140 {
141 	caddr_t vaddr;
142 	int err;
143 
144 	/*
145 	 * Allocate handle
146 	 */
147 	err = ddi_dma_alloc_handle(rgep->devinfo, dma_attr_p,
148 		    DDI_DMA_SLEEP, NULL, &dma_p->dma_hdl);
149 	if (err != DDI_SUCCESS) {
150 		dma_p->dma_hdl = NULL;
151 		return (DDI_FAILURE);
152 	}
153 
154 	/*
155 	 * Allocate memory
156 	 */
157 	err = ddi_dma_mem_alloc(dma_p->dma_hdl, memsize, acc_attr_p,
158 	    dma_flags & (DDI_DMA_CONSISTENT | DDI_DMA_STREAMING),
159 	    DDI_DMA_SLEEP, NULL, &vaddr, &dma_p->alength, &dma_p->acc_hdl);
160 	if (err != DDI_SUCCESS) {
161 		ddi_dma_free_handle(&dma_p->dma_hdl);
162 		dma_p->dma_hdl = NULL;
163 		dma_p->acc_hdl = NULL;
164 		return (DDI_FAILURE);
165 	}
166 
167 	/*
168 	 * Bind the two together
169 	 */
170 	dma_p->mem_va = vaddr;
171 	err = ddi_dma_addr_bind_handle(dma_p->dma_hdl, NULL,
172 	    vaddr, dma_p->alength, dma_flags, DDI_DMA_SLEEP, NULL,
173 	    &dma_p->cookie, &dma_p->ncookies);
174 	if (err != DDI_DMA_MAPPED || dma_p->ncookies != 1) {
175 		ddi_dma_mem_free(&dma_p->acc_hdl);
176 		ddi_dma_free_handle(&dma_p->dma_hdl);
177 		dma_p->acc_hdl = NULL;
178 		dma_p->dma_hdl = NULL;
179 		return (DDI_FAILURE);
180 	}
181 
182 	dma_p->nslots = ~0U;
183 	dma_p->size = ~0U;
184 	dma_p->token = ~0U;
185 	dma_p->offset = 0;
186 	return (DDI_SUCCESS);
187 }
188 
189 /*
190  * Free one allocated area of DMAable memory
191  */
192 static void
193 rge_free_dma_mem(dma_area_t *dma_p)
194 {
195 	if (dma_p->dma_hdl != NULL) {
196 		if (dma_p->ncookies) {
197 			(void) ddi_dma_unbind_handle(dma_p->dma_hdl);
198 			dma_p->ncookies = 0;
199 		}
200 		ddi_dma_free_handle(&dma_p->dma_hdl);
201 		dma_p->dma_hdl = NULL;
202 	}
203 
204 	if (dma_p->acc_hdl != NULL) {
205 		ddi_dma_mem_free(&dma_p->acc_hdl);
206 		dma_p->acc_hdl = NULL;
207 	}
208 }
209 
210 /*
211  * Utility routine to carve a slice off a chunk of allocated memory,
212  * updating the chunk descriptor accordingly.  The size of the slice
213  * is given by the product of the <qty> and <size> parameters.
214  */
215 static void
216 rge_slice_chunk(dma_area_t *slice, dma_area_t *chunk,
217 	uint32_t qty, uint32_t size)
218 {
219 	static uint32_t sequence = 0xbcd5704a;
220 	size_t totsize;
221 
222 	totsize = qty*size;
223 	ASSERT(size >= 0);
224 	ASSERT(totsize <= chunk->alength);
225 
226 	*slice = *chunk;
227 	slice->nslots = qty;
228 	slice->size = size;
229 	slice->alength = totsize;
230 	slice->token = ++sequence;
231 
232 	chunk->mem_va = (caddr_t)chunk->mem_va + totsize;
233 	chunk->alength -= totsize;
234 	chunk->offset += totsize;
235 	chunk->cookie.dmac_laddress += totsize;
236 	chunk->cookie.dmac_size -= totsize;
237 }
238 
239 static int
240 rge_alloc_bufs(rge_t *rgep)
241 {
242 	size_t txdescsize;
243 	size_t rxdescsize;
244 	int err;
245 
246 	/*
247 	 * Allocate memory & handle for packet statistics
248 	 */
249 	err = rge_alloc_dma_mem(rgep,
250 	    RGE_STATS_DUMP_SIZE,
251 	    &dma_attr_desc,
252 	    &rge_desc_accattr,
253 	    DDI_DMA_RDWR | DDI_DMA_CONSISTENT,
254 	    &rgep->dma_area_stats);
255 	if (err != DDI_SUCCESS)
256 		return (DDI_FAILURE);
257 	rgep->hw_stats = DMA_VPTR(rgep->dma_area_stats);
258 
259 	/*
260 	 * Allocate memory & handle for Tx descriptor ring
261 	 */
262 	txdescsize = RGE_SEND_SLOTS * sizeof (rge_bd_t);
263 	err = rge_alloc_dma_mem(rgep,
264 	    txdescsize,
265 	    &dma_attr_desc,
266 	    &rge_desc_accattr,
267 	    DDI_DMA_RDWR | DDI_DMA_CONSISTENT,
268 	    &rgep->dma_area_txdesc);
269 	if (err != DDI_SUCCESS)
270 		return (DDI_FAILURE);
271 
272 	/*
273 	 * Allocate memory & handle for Rx descriptor ring
274 	 */
275 	rxdescsize = RGE_RECV_SLOTS * sizeof (rge_bd_t);
276 	err = rge_alloc_dma_mem(rgep,
277 	    rxdescsize,
278 	    &dma_attr_desc,
279 	    &rge_desc_accattr,
280 	    DDI_DMA_RDWR | DDI_DMA_CONSISTENT,
281 	    &rgep->dma_area_rxdesc);
282 	if (err != DDI_SUCCESS)
283 		return (DDI_FAILURE);
284 
285 	return (DDI_SUCCESS);
286 }
287 
288 /*
289  * rge_free_bufs() -- free descriptors/buffers allocated for this
290  * device instance.
291  */
292 static void
293 rge_free_bufs(rge_t *rgep)
294 {
295 	rge_free_dma_mem(&rgep->dma_area_stats);
296 	rge_free_dma_mem(&rgep->dma_area_txdesc);
297 	rge_free_dma_mem(&rgep->dma_area_rxdesc);
298 }
299 
300 /*
301  * ========== Transmit and receive ring reinitialisation ==========
302  */
303 
304 /*
305  * These <reinit> routines each reset the rx/tx rings to an initial
306  * state, assuming that the corresponding <init> routine has already
307  * been called exactly once.
308  */
309 static void
310 rge_reinit_send_ring(rge_t *rgep)
311 {
312 	sw_sbd_t *ssbdp;
313 	rge_bd_t *bdp;
314 	uint32_t slot;
315 
316 	/*
317 	 * re-init send ring
318 	 */
319 	DMA_ZERO(rgep->tx_desc);
320 	ssbdp = rgep->sw_sbds;
321 	bdp = rgep->tx_ring;
322 	for (slot = 0; slot < RGE_SEND_SLOTS; slot++) {
323 		bdp->host_buf_addr =
324 		    RGE_BSWAP_32(ssbdp->pbuf.cookie.dmac_laddress);
325 		bdp->host_buf_addr_hi =
326 		    RGE_BSWAP_32(ssbdp->pbuf.cookie.dmac_laddress >> 32);
327 		/* last BD in Tx ring */
328 		if (slot == (RGE_SEND_SLOTS - 1))
329 			bdp->flags_len = RGE_BSWAP_32(BD_FLAG_EOR);
330 		ssbdp++;
331 		bdp++;
332 	}
333 	DMA_SYNC(rgep->tx_desc, DDI_DMA_SYNC_FORDEV);
334 	rgep->tx_next = 0;
335 	rgep->tc_next = 0;
336 	rgep->tc_tail = 0;
337 	rgep->tx_flow = 0;
338 	rgep->tx_free = RGE_SEND_SLOTS;
339 }
340 
341 static void
342 rge_reinit_recv_ring(rge_t *rgep)
343 {
344 	rge_bd_t *bdp;
345 	sw_rbd_t *srbdp;
346 	dma_area_t *pbuf;
347 	uint32_t slot;
348 
349 	/*
350 	 * re-init receive ring
351 	 */
352 	DMA_ZERO(rgep->rx_desc);
353 	srbdp = rgep->sw_rbds;
354 	bdp = rgep->rx_ring;
355 	for (slot = 0; slot < RGE_RECV_SLOTS; slot++) {
356 		pbuf = &srbdp->rx_buf->pbuf;
357 		bdp->host_buf_addr =
358 		    RGE_BSWAP_32(pbuf->cookie.dmac_laddress + rgep->head_room);
359 		bdp->host_buf_addr_hi =
360 		    RGE_BSWAP_32(pbuf->cookie.dmac_laddress >> 32);
361 		bdp->flags_len = RGE_BSWAP_32(BD_FLAG_HW_OWN |
362 		    (rgep->rxbuf_size - rgep->head_room));
363 		/* last BD in Tx ring */
364 		if (slot == (RGE_RECV_SLOTS - 1))
365 			bdp->flags_len |= RGE_BSWAP_32(BD_FLAG_EOR);
366 		srbdp++;
367 		bdp++;
368 	}
369 	DMA_SYNC(rgep->rx_desc, DDI_DMA_SYNC_FORDEV);
370 	rgep->watchdog = 0;
371 	rgep->rx_next = 0;
372 }
373 
374 static void
375 rge_reinit_buf_ring(rge_t *rgep)
376 {
377 
378 	if (rgep->chip_flags & CHIP_FLAG_FORCE_BCOPY)
379 		return;
380 
381 	/*
382 	 * If all the up-sending buffers haven't been returned to driver,
383 	 * use bcopy() only in rx process.
384 	 */
385 	if (rgep->rx_free != RGE_BUF_SLOTS)
386 		rgep->rx_bcopy = B_TRUE;
387 }
388 
389 static void
390 rge_reinit_rings(rge_t *rgep)
391 {
392 	rge_reinit_send_ring(rgep);
393 	rge_reinit_recv_ring(rgep);
394 	rge_reinit_buf_ring(rgep);
395 }
396 
397 static void
398 rge_fini_send_ring(rge_t *rgep)
399 {
400 	sw_sbd_t *ssbdp;
401 	uint32_t slot;
402 
403 	ssbdp = rgep->sw_sbds;
404 	for (slot = 0; slot < RGE_SEND_SLOTS; ++slot) {
405 		rge_free_dma_mem(&ssbdp->pbuf);
406 		ssbdp++;
407 	}
408 
409 	kmem_free(rgep->sw_sbds, RGE_SEND_SLOTS * sizeof (sw_sbd_t));
410 	rgep->sw_sbds = NULL;
411 }
412 
413 static void
414 rge_fini_recv_ring(rge_t *rgep)
415 {
416 	sw_rbd_t *srbdp;
417 	uint32_t slot;
418 
419 	srbdp = rgep->sw_rbds;
420 	for (slot = 0; slot < RGE_RECV_SLOTS; ++srbdp, ++slot) {
421 		if (srbdp->rx_buf) {
422 			if (srbdp->rx_buf->mp != NULL) {
423 				freemsg(srbdp->rx_buf->mp);
424 				srbdp->rx_buf->mp = NULL;
425 			}
426 			rge_free_dma_mem(&srbdp->rx_buf->pbuf);
427 			kmem_free(srbdp->rx_buf, sizeof (dma_buf_t));
428 			srbdp->rx_buf = NULL;
429 		}
430 	}
431 
432 	kmem_free(rgep->sw_rbds, RGE_RECV_SLOTS * sizeof (sw_rbd_t));
433 	rgep->sw_rbds = NULL;
434 }
435 
436 static void
437 rge_fini_buf_ring(rge_t *rgep)
438 {
439 	sw_rbd_t *srbdp;
440 	uint32_t slot;
441 
442 	if (rgep->chip_flags & CHIP_FLAG_FORCE_BCOPY)
443 		return;
444 
445 	ASSERT(rgep->rx_free == RGE_BUF_SLOTS);
446 
447 	srbdp = rgep->free_srbds;
448 	for (slot = 0; slot < RGE_BUF_SLOTS; ++srbdp, ++slot) {
449 		if (srbdp->rx_buf != NULL) {
450 			if (srbdp->rx_buf->mp != NULL) {
451 				freemsg(srbdp->rx_buf->mp);
452 				srbdp->rx_buf->mp = NULL;
453 			}
454 			rge_free_dma_mem(&srbdp->rx_buf->pbuf);
455 			kmem_free(srbdp->rx_buf, sizeof (dma_buf_t));
456 			srbdp->rx_buf = NULL;
457 		}
458 	}
459 
460 	kmem_free(rgep->free_srbds, RGE_BUF_SLOTS * sizeof (sw_rbd_t));
461 	rgep->free_srbds = NULL;
462 }
463 
464 static void
465 rge_fini_rings(rge_t *rgep)
466 {
467 	rge_fini_send_ring(rgep);
468 	rge_fini_recv_ring(rgep);
469 	rge_fini_buf_ring(rgep);
470 }
471 
472 static int
473 rge_init_send_ring(rge_t *rgep)
474 {
475 	uint32_t slot;
476 	sw_sbd_t *ssbdp;
477 	dma_area_t *pbuf;
478 	dma_area_t desc;
479 	int err;
480 
481 	/*
482 	 * Allocate the array of s/w Tx Buffer Descriptors
483 	 */
484 	ssbdp = kmem_zalloc(RGE_SEND_SLOTS*sizeof (*ssbdp), KM_SLEEP);
485 	rgep->sw_sbds = ssbdp;
486 
487 	/*
488 	 * Init send ring
489 	 */
490 	rgep->tx_desc = rgep->dma_area_txdesc;
491 	DMA_ZERO(rgep->tx_desc);
492 	rgep->tx_ring = rgep->tx_desc.mem_va;
493 
494 	desc = rgep->tx_desc;
495 	for (slot = 0; slot < RGE_SEND_SLOTS; slot++) {
496 		rge_slice_chunk(&ssbdp->desc, &desc, 1, sizeof (rge_bd_t));
497 
498 		/*
499 		 * Allocate memory & handle for Tx buffers
500 		 */
501 		pbuf = &ssbdp->pbuf;
502 		err = rge_alloc_dma_mem(rgep, rgep->txbuf_size,
503 		    &dma_attr_buf, &rge_buf_accattr,
504 		    DDI_DMA_WRITE | DDI_DMA_STREAMING, pbuf);
505 		if (err != DDI_SUCCESS) {
506 			rge_error(rgep,
507 			    "rge_init_send_ring: alloc tx buffer failed");
508 			rge_fini_send_ring(rgep);
509 			return (DDI_FAILURE);
510 		}
511 		ssbdp++;
512 	}
513 	ASSERT(desc.alength == 0);
514 
515 	DMA_SYNC(rgep->tx_desc, DDI_DMA_SYNC_FORDEV);
516 	return (DDI_SUCCESS);
517 }
518 
519 static int
520 rge_init_recv_ring(rge_t *rgep)
521 {
522 	uint32_t slot;
523 	sw_rbd_t *srbdp;
524 	dma_buf_t *rx_buf;
525 	dma_area_t *pbuf;
526 	int err;
527 
528 	/*
529 	 * Allocate the array of s/w Rx Buffer Descriptors
530 	 */
531 	srbdp = kmem_zalloc(RGE_RECV_SLOTS*sizeof (*srbdp), KM_SLEEP);
532 	rgep->sw_rbds = srbdp;
533 
534 	/*
535 	 * Init receive ring
536 	 */
537 	rgep->rx_next = 0;
538 	rgep->rx_desc = rgep->dma_area_rxdesc;
539 	DMA_ZERO(rgep->rx_desc);
540 	rgep->rx_ring = rgep->rx_desc.mem_va;
541 
542 	for (slot = 0; slot < RGE_RECV_SLOTS; slot++) {
543 		srbdp->rx_buf = rx_buf =
544 		    kmem_zalloc(sizeof (dma_buf_t), KM_SLEEP);
545 
546 		/*
547 		 * Allocate memory & handle for Rx buffers
548 		 */
549 		pbuf = &rx_buf->pbuf;
550 		err = rge_alloc_dma_mem(rgep, rgep->rxbuf_size,
551 		    &dma_attr_buf, &rge_buf_accattr,
552 		    DDI_DMA_READ | DDI_DMA_STREAMING, pbuf);
553 		if (err != DDI_SUCCESS) {
554 			rge_fini_recv_ring(rgep);
555 			rge_error(rgep,
556 			    "rge_init_recv_ring: alloc rx buffer failed");
557 			return (DDI_FAILURE);
558 		}
559 
560 		pbuf->alength -= rgep->head_room;
561 		pbuf->offset += rgep->head_room;
562 		if (!(rgep->chip_flags & CHIP_FLAG_FORCE_BCOPY)) {
563 			rx_buf->rx_recycle.free_func = rge_rx_recycle;
564 			rx_buf->rx_recycle.free_arg = (caddr_t)rx_buf;
565 			rx_buf->private = (caddr_t)rgep;
566 			rx_buf->mp = desballoc(DMA_VPTR(rx_buf->pbuf),
567 			    rgep->rxbuf_size, 0, &rx_buf->rx_recycle);
568 			if (rx_buf->mp == NULL) {
569 				rge_fini_recv_ring(rgep);
570 				rge_problem(rgep,
571 				    "rge_init_recv_ring: desballoc() failed");
572 				return (DDI_FAILURE);
573 			}
574 		}
575 		srbdp++;
576 	}
577 	DMA_SYNC(rgep->rx_desc, DDI_DMA_SYNC_FORDEV);
578 	return (DDI_SUCCESS);
579 }
580 
581 static int
582 rge_init_buf_ring(rge_t *rgep)
583 {
584 	uint32_t slot;
585 	sw_rbd_t *free_srbdp;
586 	dma_buf_t *rx_buf;
587 	dma_area_t *pbuf;
588 	int err;
589 
590 	if (rgep->chip_flags & CHIP_FLAG_FORCE_BCOPY) {
591 		rgep->rx_bcopy = B_TRUE;
592 		return (DDI_SUCCESS);
593 	}
594 
595 	/*
596 	 * Allocate the array of s/w free Buffer Descriptors
597 	 */
598 	free_srbdp = kmem_zalloc(RGE_BUF_SLOTS*sizeof (*free_srbdp), KM_SLEEP);
599 	rgep->free_srbds = free_srbdp;
600 
601 	/*
602 	 * Init free buffer ring
603 	 */
604 	rgep->rc_next = 0;
605 	rgep->rf_next = 0;
606 	rgep->rx_bcopy = B_FALSE;
607 	rgep->rx_free = RGE_BUF_SLOTS;
608 	for (slot = 0; slot < RGE_BUF_SLOTS; slot++) {
609 		free_srbdp->rx_buf = rx_buf =
610 		    kmem_zalloc(sizeof (dma_buf_t), KM_SLEEP);
611 
612 		/*
613 		 * Allocate memory & handle for free Rx buffers
614 		 */
615 		pbuf = &rx_buf->pbuf;
616 		err = rge_alloc_dma_mem(rgep, rgep->rxbuf_size,
617 		    &dma_attr_buf, &rge_buf_accattr,
618 		    DDI_DMA_READ | DDI_DMA_STREAMING, pbuf);
619 		if (err != DDI_SUCCESS) {
620 			rge_fini_buf_ring(rgep);
621 			rge_error(rgep,
622 			    "rge_init_buf_ring: alloc rx free buffer failed");
623 			return (DDI_FAILURE);
624 		}
625 		pbuf->alength -= rgep->head_room;
626 		pbuf->offset += rgep->head_room;
627 		rx_buf->rx_recycle.free_func = rge_rx_recycle;
628 		rx_buf->rx_recycle.free_arg = (caddr_t)rx_buf;
629 		rx_buf->private = (caddr_t)rgep;
630 		rx_buf->mp = desballoc(DMA_VPTR(rx_buf->pbuf),
631 		    rgep->rxbuf_size, 0, &rx_buf->rx_recycle);
632 		if (rx_buf->mp == NULL) {
633 			rge_fini_buf_ring(rgep);
634 			rge_problem(rgep,
635 			    "rge_init_buf_ring: desballoc() failed");
636 			return (DDI_FAILURE);
637 		}
638 		free_srbdp++;
639 	}
640 	return (DDI_SUCCESS);
641 }
642 
643 static int
644 rge_init_rings(rge_t *rgep)
645 {
646 	int err;
647 
648 	err = rge_init_send_ring(rgep);
649 	if (err != DDI_SUCCESS)
650 		return (DDI_FAILURE);
651 
652 	err = rge_init_recv_ring(rgep);
653 	if (err != DDI_SUCCESS) {
654 		rge_fini_send_ring(rgep);
655 		return (DDI_FAILURE);
656 	}
657 
658 	err = rge_init_buf_ring(rgep);
659 	if (err != DDI_SUCCESS) {
660 		rge_fini_send_ring(rgep);
661 		rge_fini_recv_ring(rgep);
662 		return (DDI_FAILURE);
663 	}
664 
665 	return (DDI_SUCCESS);
666 }
667 
668 /*
669  * ========== Internal state management entry points ==========
670  */
671 
672 #undef	RGE_DBG
673 #define	RGE_DBG		RGE_DBG_NEMO	/* debug flag for this code	*/
674 
675 /*
676  * These routines provide all the functionality required by the
677  * corresponding MAC layer entry points, but don't update the
678  * MAC state so they can be called internally without disturbing
679  * our record of what NEMO thinks we should be doing ...
680  */
681 
682 /*
683  *	rge_reset() -- reset h/w & rings to initial state
684  */
685 static void
686 rge_reset(rge_t *rgep)
687 {
688 	ASSERT(mutex_owned(rgep->genlock));
689 
690 	/*
691 	 * Grab all the other mutexes in the world (this should
692 	 * ensure no other threads are manipulating driver state)
693 	 */
694 	mutex_enter(rgep->rx_lock);
695 	mutex_enter(rgep->rc_lock);
696 	rw_enter(rgep->errlock, RW_WRITER);
697 
698 	(void) rge_chip_reset(rgep);
699 	rge_reinit_rings(rgep);
700 	rge_chip_init(rgep);
701 
702 	/*
703 	 * Free the world ...
704 	 */
705 	rw_exit(rgep->errlock);
706 	mutex_exit(rgep->rc_lock);
707 	mutex_exit(rgep->rx_lock);
708 
709 	RGE_DEBUG(("rge_reset($%p) done", (void *)rgep));
710 }
711 
712 /*
713  *	rge_stop() -- stop processing, don't reset h/w or rings
714  */
715 static void
716 rge_stop(rge_t *rgep)
717 {
718 	ASSERT(mutex_owned(rgep->genlock));
719 
720 	rge_chip_stop(rgep, B_FALSE);
721 
722 	RGE_DEBUG(("rge_stop($%p) done", (void *)rgep));
723 }
724 
725 /*
726  *	rge_start() -- start transmitting/receiving
727  */
728 static void
729 rge_start(rge_t *rgep)
730 {
731 	ASSERT(mutex_owned(rgep->genlock));
732 
733 	/*
734 	 * Start chip processing, including enabling interrupts
735 	 */
736 	rge_chip_start(rgep);
737 	rgep->watchdog = 0;
738 }
739 
740 /*
741  * rge_restart - restart transmitting/receiving after error or suspend
742  */
743 void
744 rge_restart(rge_t *rgep)
745 {
746 	uint32_t i;
747 
748 	ASSERT(mutex_owned(rgep->genlock));
749 	/*
750 	 * Wait for posted buffer to be freed...
751 	 */
752 	if (!rgep->rx_bcopy) {
753 		for (i = 0; i < RXBUFF_FREE_LOOP; i++) {
754 			if (rgep->rx_free == RGE_BUF_SLOTS)
755 				break;
756 			drv_usecwait(1000);
757 			RGE_DEBUG(("rge_restart: waiting for rx buf free..."));
758 		}
759 	}
760 	rge_reset(rgep);
761 	rgep->stats.chip_reset++;
762 	if (rgep->rge_mac_state == RGE_MAC_STARTED) {
763 		rge_start(rgep);
764 		rgep->resched_needed = B_TRUE;
765 		(void) ddi_intr_trigger_softint(rgep->resched_hdl, NULL);
766 	}
767 }
768 
769 
770 /*
771  * ========== Nemo-required management entry points ==========
772  */
773 
774 #undef	RGE_DBG
775 #define	RGE_DBG		RGE_DBG_NEMO	/* debug flag for this code	*/
776 
777 /*
778  *	rge_m_stop() -- stop transmitting/receiving
779  */
780 static void
781 rge_m_stop(void *arg)
782 {
783 	rge_t *rgep = arg;		/* private device info	*/
784 	uint32_t i;
785 
786 	/*
787 	 * Just stop processing, then record new MAC state
788 	 */
789 	mutex_enter(rgep->genlock);
790 	rge_stop(rgep);
791 	/*
792 	 * Wait for posted buffer to be freed...
793 	 */
794 	if (!rgep->rx_bcopy) {
795 		for (i = 0; i < RXBUFF_FREE_LOOP; i++) {
796 			if (rgep->rx_free == RGE_BUF_SLOTS)
797 				break;
798 			drv_usecwait(1000);
799 			RGE_DEBUG(("rge_m_stop: waiting for rx buf free..."));
800 		}
801 	}
802 	rgep->rge_mac_state = RGE_MAC_STOPPED;
803 	RGE_DEBUG(("rge_m_stop($%p) done", arg));
804 	mutex_exit(rgep->genlock);
805 }
806 
807 /*
808  *	rge_m_start() -- start transmitting/receiving
809  */
810 static int
811 rge_m_start(void *arg)
812 {
813 	rge_t *rgep = arg;		/* private device info	*/
814 
815 	mutex_enter(rgep->genlock);
816 
817 	/*
818 	 * Clear hw/sw statistics
819 	 */
820 	DMA_ZERO(rgep->dma_area_stats);
821 	bzero(&rgep->stats, sizeof (rge_stats_t));
822 
823 	/*
824 	 * Start processing and record new MAC state
825 	 */
826 	rge_reset(rgep);
827 	rge_start(rgep);
828 	rgep->rge_mac_state = RGE_MAC_STARTED;
829 	RGE_DEBUG(("rge_m_start($%p) done", arg));
830 
831 	mutex_exit(rgep->genlock);
832 
833 	return (0);
834 }
835 
836 /*
837  *	rge_m_unicst_set() -- set the physical network address
838  */
839 static int
840 rge_m_unicst(void *arg, const uint8_t *macaddr)
841 {
842 	rge_t *rgep = arg;		/* private device info	*/
843 
844 	/*
845 	 * Remember the new current address in the driver state
846 	 * Sync the chip's idea of the address too ...
847 	 */
848 	mutex_enter(rgep->genlock);
849 	bcopy(macaddr, rgep->netaddr, ETHERADDRL);
850 	rge_chip_sync(rgep, RGE_SET_MAC);
851 	mutex_exit(rgep->genlock);
852 
853 	return (0);
854 }
855 
856 /*
857  * Compute the index of the required bit in the multicast hash map.
858  * This must mirror the way the hardware actually does it!
859  */
860 static uint32_t
861 rge_hash_index(const uint8_t *mca)
862 {
863 	uint32_t crc = (uint32_t)RGE_HASH_CRC;
864 	uint32_t const POLY = RGE_HASH_POLY;
865 	uint32_t msb;
866 	int bytes;
867 	uchar_t currentbyte;
868 	uint32_t index;
869 	int bit;
870 
871 	for (bytes = 0; bytes < ETHERADDRL; bytes++) {
872 		currentbyte = mca[bytes];
873 		for (bit = 0; bit < 8; bit++) {
874 			msb = crc >> 31;
875 			crc <<= 1;
876 			if (msb ^ (currentbyte & 1))
877 				crc ^= POLY;
878 			currentbyte >>= 1;
879 		}
880 	}
881 	index = crc >> 26;
882 		/* the index value is between 0 and 63(0x3f) */
883 
884 	return (index);
885 }
886 
887 /*
888  *	rge_m_multicst_add() -- enable/disable a multicast address
889  */
890 static int
891 rge_m_multicst(void *arg, boolean_t add, const uint8_t *mca)
892 {
893 	rge_t *rgep = arg;		/* private device info	*/
894 	struct ether_addr *addr;
895 	uint32_t index;
896 	uint32_t reg;
897 	uint8_t *hashp;
898 
899 	mutex_enter(rgep->genlock);
900 	hashp = rgep->mcast_hash;
901 	addr = (struct ether_addr *)mca;
902 	/*
903 	 * Calculate the Multicast address hash index value
904 	 *	Normally, the position of MAR0-MAR7 is
905 	 *	MAR0: offset 0x08, ..., MAR7: offset 0x0F.
906 	 *
907 	 *	For pcie chipset, the position of MAR0-MAR7 is
908 	 *	different from others:
909 	 *	MAR0: offset 0x0F, ..., MAR7: offset 0x08.
910 	 */
911 	index = rge_hash_index(addr->ether_addr_octet);
912 	if (rgep->chipid.is_pcie)
913 		reg = (~(index / RGE_MCAST_NUM)) & 0x7;
914 	else
915 		reg = index / RGE_MCAST_NUM;
916 
917 	if (add) {
918 		if (rgep->mcast_refs[index]++) {
919 			mutex_exit(rgep->genlock);
920 			return (0);
921 		}
922 		hashp[reg] |= 1 << (index % RGE_MCAST_NUM);
923 	} else {
924 		if (--rgep->mcast_refs[index]) {
925 			mutex_exit(rgep->genlock);
926 			return (0);
927 		}
928 		hashp[reg] &= ~ (1 << (index % RGE_MCAST_NUM));
929 	}
930 
931 	/*
932 	 * Set multicast register
933 	 */
934 	rge_chip_sync(rgep, RGE_SET_MUL);
935 
936 	mutex_exit(rgep->genlock);
937 	return (0);
938 }
939 
940 /*
941  * rge_m_promisc() -- set or reset promiscuous mode on the board
942  *
943  *	Program the hardware to enable/disable promiscuous and/or
944  *	receive-all-multicast modes.
945  */
946 static int
947 rge_m_promisc(void *arg, boolean_t on)
948 {
949 	rge_t *rgep = arg;
950 
951 	/*
952 	 * Store MAC layer specified mode and pass to chip layer to update h/w
953 	 */
954 	mutex_enter(rgep->genlock);
955 
956 	if (rgep->promisc == on) {
957 		mutex_exit(rgep->genlock);
958 		return (0);
959 	}
960 	rgep->promisc = on;
961 	rge_chip_sync(rgep, RGE_SET_PROMISC);
962 	RGE_DEBUG(("rge_m_promisc_set($%p) done", arg));
963 	mutex_exit(rgep->genlock);
964 	return (0);
965 }
966 
967 /*
968  * Loopback ioctl code
969  */
970 
971 static lb_property_t loopmodes[] = {
972 	{ normal,	"normal",	RGE_LOOP_NONE		},
973 	{ internal,	"PHY",		RGE_LOOP_INTERNAL_PHY	},
974 	{ internal,	"MAC",		RGE_LOOP_INTERNAL_MAC	}
975 };
976 
977 static enum ioc_reply
978 rge_set_loop_mode(rge_t *rgep, uint32_t mode)
979 {
980 	/*
981 	 * If the mode isn't being changed, there's nothing to do ...
982 	 */
983 	if (mode == rgep->param_loop_mode)
984 		return (IOC_ACK);
985 
986 	/*
987 	 * Validate the requested mode and prepare a suitable message
988 	 * to explain the link down/up cycle that the change will
989 	 * probably induce ...
990 	 */
991 	switch (mode) {
992 	default:
993 		return (IOC_INVAL);
994 
995 	case RGE_LOOP_NONE:
996 	case RGE_LOOP_INTERNAL_PHY:
997 	case RGE_LOOP_INTERNAL_MAC:
998 		break;
999 	}
1000 
1001 	/*
1002 	 * All OK; tell the caller to reprogram
1003 	 * the PHY and/or MAC for the new mode ...
1004 	 */
1005 	rgep->param_loop_mode = mode;
1006 	return (IOC_RESTART_ACK);
1007 }
1008 
1009 static enum ioc_reply
1010 rge_loop_ioctl(rge_t *rgep, queue_t *wq, mblk_t *mp, struct iocblk *iocp)
1011 {
1012 	lb_info_sz_t *lbsp;
1013 	lb_property_t *lbpp;
1014 	uint32_t *lbmp;
1015 	int cmd;
1016 
1017 	_NOTE(ARGUNUSED(wq))
1018 
1019 	/*
1020 	 * Validate format of ioctl
1021 	 */
1022 	if (mp->b_cont == NULL)
1023 		return (IOC_INVAL);
1024 
1025 	cmd = iocp->ioc_cmd;
1026 	switch (cmd) {
1027 	default:
1028 		/* NOTREACHED */
1029 		rge_error(rgep, "rge_loop_ioctl: invalid cmd 0x%x", cmd);
1030 		return (IOC_INVAL);
1031 
1032 	case LB_GET_INFO_SIZE:
1033 		if (iocp->ioc_count != sizeof (lb_info_sz_t))
1034 			return (IOC_INVAL);
1035 		lbsp = (lb_info_sz_t *)mp->b_cont->b_rptr;
1036 		*lbsp = sizeof (loopmodes);
1037 		return (IOC_REPLY);
1038 
1039 	case LB_GET_INFO:
1040 		if (iocp->ioc_count != sizeof (loopmodes))
1041 			return (IOC_INVAL);
1042 		lbpp = (lb_property_t *)mp->b_cont->b_rptr;
1043 		bcopy(loopmodes, lbpp, sizeof (loopmodes));
1044 		return (IOC_REPLY);
1045 
1046 	case LB_GET_MODE:
1047 		if (iocp->ioc_count != sizeof (uint32_t))
1048 			return (IOC_INVAL);
1049 		lbmp = (uint32_t *)mp->b_cont->b_rptr;
1050 		*lbmp = rgep->param_loop_mode;
1051 		return (IOC_REPLY);
1052 
1053 	case LB_SET_MODE:
1054 		if (iocp->ioc_count != sizeof (uint32_t))
1055 			return (IOC_INVAL);
1056 		lbmp = (uint32_t *)mp->b_cont->b_rptr;
1057 		return (rge_set_loop_mode(rgep, *lbmp));
1058 	}
1059 }
1060 
1061 /*
1062  * Specific rge IOCTLs, the MAC layer handles the generic ones.
1063  */
1064 static void
1065 rge_m_ioctl(void *arg, queue_t *wq, mblk_t *mp)
1066 {
1067 	rge_t *rgep = arg;
1068 	struct iocblk *iocp;
1069 	enum ioc_reply status;
1070 	boolean_t need_privilege;
1071 	int err;
1072 	int cmd;
1073 
1074 	/*
1075 	 * Validate the command before bothering with the mutex ...
1076 	 */
1077 	iocp = (struct iocblk *)mp->b_rptr;
1078 	iocp->ioc_error = 0;
1079 	need_privilege = B_TRUE;
1080 	cmd = iocp->ioc_cmd;
1081 	switch (cmd) {
1082 	default:
1083 		miocnak(wq, mp, 0, EINVAL);
1084 		return;
1085 
1086 	case RGE_MII_READ:
1087 	case RGE_MII_WRITE:
1088 	case RGE_DIAG:
1089 	case RGE_PEEK:
1090 	case RGE_POKE:
1091 	case RGE_PHY_RESET:
1092 	case RGE_SOFT_RESET:
1093 	case RGE_HARD_RESET:
1094 		break;
1095 
1096 	case LB_GET_INFO_SIZE:
1097 	case LB_GET_INFO:
1098 	case LB_GET_MODE:
1099 		need_privilege = B_FALSE;
1100 		/* FALLTHRU */
1101 	case LB_SET_MODE:
1102 		break;
1103 
1104 	case ND_GET:
1105 		need_privilege = B_FALSE;
1106 		/* FALLTHRU */
1107 	case ND_SET:
1108 		break;
1109 	}
1110 
1111 	if (need_privilege) {
1112 		/*
1113 		 * Check for specific net_config privilege
1114 		 */
1115 		err = secpolicy_net_config(iocp->ioc_cr, B_FALSE);
1116 		if (err != 0) {
1117 			miocnak(wq, mp, 0, err);
1118 			return;
1119 		}
1120 	}
1121 
1122 	mutex_enter(rgep->genlock);
1123 
1124 	switch (cmd) {
1125 	default:
1126 		_NOTE(NOTREACHED)
1127 		status = IOC_INVAL;
1128 		break;
1129 
1130 	case RGE_MII_READ:
1131 	case RGE_MII_WRITE:
1132 	case RGE_DIAG:
1133 	case RGE_PEEK:
1134 	case RGE_POKE:
1135 	case RGE_PHY_RESET:
1136 	case RGE_SOFT_RESET:
1137 	case RGE_HARD_RESET:
1138 		status = rge_chip_ioctl(rgep, wq, mp, iocp);
1139 		break;
1140 
1141 	case LB_GET_INFO_SIZE:
1142 	case LB_GET_INFO:
1143 	case LB_GET_MODE:
1144 	case LB_SET_MODE:
1145 		status = rge_loop_ioctl(rgep, wq, mp, iocp);
1146 		break;
1147 
1148 	case ND_GET:
1149 	case ND_SET:
1150 		status = rge_nd_ioctl(rgep, wq, mp, iocp);
1151 		break;
1152 	}
1153 
1154 	/*
1155 	 * Do we need to reprogram the PHY and/or the MAC?
1156 	 * Do it now, while we still have the mutex.
1157 	 *
1158 	 * Note: update the PHY first, 'cos it controls the
1159 	 * speed/duplex parameters that the MAC code uses.
1160 	 */
1161 	switch (status) {
1162 	case IOC_RESTART_REPLY:
1163 	case IOC_RESTART_ACK:
1164 		rge_phy_update(rgep);
1165 		break;
1166 	}
1167 
1168 	mutex_exit(rgep->genlock);
1169 
1170 	/*
1171 	 * Finally, decide how to reply
1172 	 */
1173 	switch (status) {
1174 	default:
1175 	case IOC_INVAL:
1176 		/*
1177 		 * Error, reply with a NAK and EINVAL or the specified error
1178 		 */
1179 		miocnak(wq, mp, 0, iocp->ioc_error == 0 ?
1180 			EINVAL : iocp->ioc_error);
1181 		break;
1182 
1183 	case IOC_DONE:
1184 		/*
1185 		 * OK, reply already sent
1186 		 */
1187 		break;
1188 
1189 	case IOC_RESTART_ACK:
1190 	case IOC_ACK:
1191 		/*
1192 		 * OK, reply with an ACK
1193 		 */
1194 		miocack(wq, mp, 0, 0);
1195 		break;
1196 
1197 	case IOC_RESTART_REPLY:
1198 	case IOC_REPLY:
1199 		/*
1200 		 * OK, send prepared reply as ACK or NAK
1201 		 */
1202 		mp->b_datap->db_type = iocp->ioc_error == 0 ?
1203 			M_IOCACK : M_IOCNAK;
1204 		qreply(wq, mp);
1205 		break;
1206 	}
1207 }
1208 
1209 static void
1210 rge_m_resources(void *arg)
1211 {
1212 	rge_t *rgep = arg;
1213 	mac_rx_fifo_t mrf;
1214 
1215 	mutex_enter(rgep->genlock);
1216 
1217 	/*
1218 	 * Register Rx rings as resources and save mac
1219 	 * resource id for future reference
1220 	 */
1221 	mrf.mrf_type = MAC_RX_FIFO;
1222 	mrf.mrf_blank = rge_chip_blank;
1223 	mrf.mrf_arg = (void *)rgep;
1224 	mrf.mrf_normal_blank_time = RGE_RX_INT_TIME;
1225 	mrf.mrf_normal_pkt_count = RGE_RX_INT_PKTS;
1226 	rgep->handle = mac_resource_add(rgep->mh, (mac_resource_t *)&mrf);
1227 
1228 	mutex_exit(rgep->genlock);
1229 }
1230 
1231 /* ARGSUSED */
1232 static boolean_t
1233 rge_m_getcapab(void *arg, mac_capab_t cap, void *cap_data)
1234 {
1235 	switch (cap) {
1236 	case MAC_CAPAB_HCKSUM: {
1237 		uint32_t *hcksum_txflags = cap_data;
1238 		*hcksum_txflags = HCKSUM_INET_FULL_V4 | HCKSUM_IPHDRCKSUM;
1239 		break;
1240 	}
1241 	case MAC_CAPAB_POLL:
1242 		/*
1243 		 * There's nothing for us to fill in, simply returning
1244 		 * B_TRUE stating that we support polling is sufficient.
1245 		 */
1246 		break;
1247 	default:
1248 		return (B_FALSE);
1249 	}
1250 	return (B_TRUE);
1251 }
1252 
1253 /*
1254  * ============ Init MSI/Fixed Interrupt routines ==============
1255  */
1256 
1257 /*
1258  * rge_add_intrs:
1259  *
1260  * Register FIXED or MSI interrupts.
1261  */
1262 static int
1263 rge_add_intrs(rge_t *rgep, int intr_type)
1264 {
1265 	dev_info_t *dip = rgep->devinfo;
1266 	int avail;
1267 	int actual;
1268 	int intr_size;
1269 	int count;
1270 	int i, j;
1271 	int ret;
1272 
1273 	/* Get number of interrupts */
1274 	ret = ddi_intr_get_nintrs(dip, intr_type, &count);
1275 	if ((ret != DDI_SUCCESS) || (count == 0)) {
1276 		rge_error(rgep, "ddi_intr_get_nintrs() failure, ret: %d, "
1277 		    "count: %d", ret, count);
1278 		return (DDI_FAILURE);
1279 	}
1280 
1281 	/* Get number of available interrupts */
1282 	ret = ddi_intr_get_navail(dip, intr_type, &avail);
1283 	if ((ret != DDI_SUCCESS) || (avail == 0)) {
1284 		rge_error(rgep, "ddi_intr_get_navail() failure, "
1285 		    "ret: %d, avail: %d\n", ret, avail);
1286 		return (DDI_FAILURE);
1287 	}
1288 
1289 	/* Allocate an array of interrupt handles */
1290 	intr_size = count * sizeof (ddi_intr_handle_t);
1291 	rgep->htable = kmem_alloc(intr_size, KM_SLEEP);
1292 	rgep->intr_rqst = count;
1293 
1294 	/* Call ddi_intr_alloc() */
1295 	ret = ddi_intr_alloc(dip, rgep->htable, intr_type, 0,
1296 	    count, &actual, DDI_INTR_ALLOC_NORMAL);
1297 	if (ret != DDI_SUCCESS || actual == 0) {
1298 		rge_error(rgep, "ddi_intr_alloc() failed %d\n", ret);
1299 		kmem_free(rgep->htable, intr_size);
1300 		return (DDI_FAILURE);
1301 	}
1302 	if (actual < count) {
1303 		rge_log(rgep, "ddi_intr_alloc() Requested: %d, Received: %d\n",
1304 		    count, actual);
1305 	}
1306 	rgep->intr_cnt = actual;
1307 
1308 	/*
1309 	 * Get priority for first msi, assume remaining are all the same
1310 	 */
1311 	if ((ret = ddi_intr_get_pri(rgep->htable[0], &rgep->intr_pri)) !=
1312 	    DDI_SUCCESS) {
1313 		rge_error(rgep, "ddi_intr_get_pri() failed %d\n", ret);
1314 		/* Free already allocated intr */
1315 		for (i = 0; i < actual; i++) {
1316 			(void) ddi_intr_free(rgep->htable[i]);
1317 		}
1318 		kmem_free(rgep->htable, intr_size);
1319 		return (DDI_FAILURE);
1320 	}
1321 
1322 	/* Test for high level mutex */
1323 	if (rgep->intr_pri >= ddi_intr_get_hilevel_pri()) {
1324 		rge_error(rgep, "rge_add_intrs:"
1325 		    "Hi level interrupt not supported");
1326 		for (i = 0; i < actual; i++)
1327 			(void) ddi_intr_free(rgep->htable[i]);
1328 		kmem_free(rgep->htable, intr_size);
1329 		return (DDI_FAILURE);
1330 	}
1331 
1332 	/* Call ddi_intr_add_handler() */
1333 	for (i = 0; i < actual; i++) {
1334 		if ((ret = ddi_intr_add_handler(rgep->htable[i], rge_intr,
1335 		    (caddr_t)rgep, (caddr_t)(uintptr_t)i)) != DDI_SUCCESS) {
1336 			rge_error(rgep, "ddi_intr_add_handler() "
1337 			    "failed %d\n", ret);
1338 			/* Remove already added intr */
1339 			for (j = 0; j < i; j++)
1340 				(void) ddi_intr_remove_handler(rgep->htable[j]);
1341 			/* Free already allocated intr */
1342 			for (i = 0; i < actual; i++) {
1343 				(void) ddi_intr_free(rgep->htable[i]);
1344 			}
1345 			kmem_free(rgep->htable, intr_size);
1346 			return (DDI_FAILURE);
1347 		}
1348 	}
1349 
1350 	if ((ret = ddi_intr_get_cap(rgep->htable[0], &rgep->intr_cap))
1351 	    != DDI_SUCCESS) {
1352 		rge_error(rgep, "ddi_intr_get_cap() failed %d\n", ret);
1353 		for (i = 0; i < actual; i++) {
1354 			(void) ddi_intr_remove_handler(rgep->htable[i]);
1355 			(void) ddi_intr_free(rgep->htable[i]);
1356 		}
1357 		kmem_free(rgep->htable, intr_size);
1358 		return (DDI_FAILURE);
1359 	}
1360 
1361 	return (DDI_SUCCESS);
1362 }
1363 
1364 /*
1365  * rge_rem_intrs:
1366  *
1367  * Unregister FIXED or MSI interrupts
1368  */
1369 static void
1370 rge_rem_intrs(rge_t *rgep)
1371 {
1372 	int i;
1373 
1374 	/* Disable all interrupts */
1375 	if (rgep->intr_cap & DDI_INTR_FLAG_BLOCK) {
1376 		/* Call ddi_intr_block_disable() */
1377 		(void) ddi_intr_block_disable(rgep->htable, rgep->intr_cnt);
1378 	} else {
1379 		for (i = 0; i < rgep->intr_cnt; i++) {
1380 			(void) ddi_intr_disable(rgep->htable[i]);
1381 		}
1382 	}
1383 
1384 	/* Call ddi_intr_remove_handler() */
1385 	for (i = 0; i < rgep->intr_cnt; i++) {
1386 		(void) ddi_intr_remove_handler(rgep->htable[i]);
1387 		(void) ddi_intr_free(rgep->htable[i]);
1388 	}
1389 
1390 	kmem_free(rgep->htable, rgep->intr_rqst * sizeof (ddi_intr_handle_t));
1391 }
1392 
1393 /*
1394  * ========== Per-instance setup/teardown code ==========
1395  */
1396 
1397 #undef	RGE_DBG
1398 #define	RGE_DBG		RGE_DBG_INIT	/* debug flag for this code	*/
1399 
1400 static void
1401 rge_unattach(rge_t *rgep)
1402 {
1403 	/*
1404 	 * Flag that no more activity may be initiated
1405 	 */
1406 	rgep->progress &= ~PROGRESS_READY;
1407 	rgep->rge_mac_state = RGE_MAC_UNATTACH;
1408 
1409 	/*
1410 	 * Quiesce the PHY and MAC (leave it reset but still powered).
1411 	 * Clean up and free all RGE data structures
1412 	 */
1413 	if (rgep->cyclic_id) {
1414 		mutex_enter(&cpu_lock);
1415 		cyclic_remove(rgep->cyclic_id);
1416 		mutex_exit(&cpu_lock);
1417 	}
1418 
1419 	if (rgep->progress & PROGRESS_KSTATS)
1420 		rge_fini_kstats(rgep);
1421 
1422 	if (rgep->progress & PROGRESS_PHY)
1423 		(void) rge_phy_reset(rgep);
1424 
1425 	if (rgep->progress & PROGRESS_INIT) {
1426 		mutex_enter(rgep->genlock);
1427 		(void) rge_chip_reset(rgep);
1428 		mutex_exit(rgep->genlock);
1429 		rge_fini_rings(rgep);
1430 	}
1431 
1432 	if (rgep->progress & PROGRESS_INTR) {
1433 		rge_rem_intrs(rgep);
1434 		mutex_destroy(rgep->rc_lock);
1435 		mutex_destroy(rgep->rx_lock);
1436 		mutex_destroy(rgep->tc_lock);
1437 		mutex_destroy(rgep->tx_lock);
1438 		rw_destroy(rgep->errlock);
1439 		mutex_destroy(rgep->genlock);
1440 	}
1441 
1442 	if (rgep->progress & PROGRESS_FACTOTUM)
1443 		(void) ddi_intr_remove_softint(rgep->factotum_hdl);
1444 
1445 	if (rgep->progress & PROGRESS_RESCHED)
1446 		(void) ddi_intr_remove_softint(rgep->resched_hdl);
1447 
1448 	rge_free_bufs(rgep);
1449 
1450 	if (rgep->progress & PROGRESS_NDD)
1451 		rge_nd_cleanup(rgep);
1452 
1453 	if (rgep->progress & PROGRESS_REGS)
1454 		ddi_regs_map_free(&rgep->io_handle);
1455 
1456 	if (rgep->progress & PROGRESS_CFG)
1457 		pci_config_teardown(&rgep->cfg_handle);
1458 
1459 	ddi_remove_minor_node(rgep->devinfo, NULL);
1460 	kmem_free(rgep, sizeof (*rgep));
1461 }
1462 
1463 static int
1464 rge_resume(dev_info_t *devinfo)
1465 {
1466 	rge_t *rgep;			/* Our private data	*/
1467 	chip_id_t *cidp;
1468 	chip_id_t chipid;
1469 
1470 	rgep = ddi_get_driver_private(devinfo);
1471 	if (rgep == NULL)
1472 		return (DDI_FAILURE);
1473 
1474 	/*
1475 	 * Refuse to resume if the data structures aren't consistent
1476 	 */
1477 	if (rgep->devinfo != devinfo)
1478 		return (DDI_FAILURE);
1479 
1480 	/*
1481 	 * Read chip ID & set up config space command register(s)
1482 	 * Refuse to resume if the chip has changed its identity!
1483 	 */
1484 	cidp = &rgep->chipid;
1485 	rge_chip_cfg_init(rgep, &chipid);
1486 	if (chipid.vendor != cidp->vendor)
1487 		return (DDI_FAILURE);
1488 	if (chipid.device != cidp->device)
1489 		return (DDI_FAILURE);
1490 	if (chipid.revision != cidp->revision)
1491 		return (DDI_FAILURE);
1492 
1493 	/*
1494 	 * All OK, reinitialise h/w & kick off NEMO scheduling
1495 	 */
1496 	mutex_enter(rgep->genlock);
1497 	rge_restart(rgep);
1498 	mutex_exit(rgep->genlock);
1499 	return (DDI_SUCCESS);
1500 }
1501 
1502 
1503 /*
1504  * attach(9E) -- Attach a device to the system
1505  *
1506  * Called once for each board successfully probed.
1507  */
1508 static int
1509 rge_attach(dev_info_t *devinfo, ddi_attach_cmd_t cmd)
1510 {
1511 	rge_t *rgep;			/* Our private data	*/
1512 	mac_register_t *macp;
1513 	chip_id_t *cidp;
1514 	cyc_handler_t cychand;
1515 	cyc_time_t cyctime;
1516 	int intr_types;
1517 	caddr_t regs;
1518 	int instance;
1519 	int i;
1520 	int err;
1521 
1522 	/*
1523 	 * we don't support high level interrupts in the driver
1524 	 */
1525 	if (ddi_intr_hilevel(devinfo, 0) != 0) {
1526 		cmn_err(CE_WARN,
1527 		    "rge_attach -- unsupported high level interrupt");
1528 		return (DDI_FAILURE);
1529 	}
1530 
1531 	instance = ddi_get_instance(devinfo);
1532 	RGE_GTRACE(("rge_attach($%p, %d) instance %d",
1533 		(void *)devinfo, cmd, instance));
1534 	RGE_BRKPT(NULL, "rge_attach");
1535 
1536 	switch (cmd) {
1537 	default:
1538 		return (DDI_FAILURE);
1539 
1540 	case DDI_RESUME:
1541 		return (rge_resume(devinfo));
1542 
1543 	case DDI_ATTACH:
1544 		break;
1545 	}
1546 
1547 	rgep = kmem_zalloc(sizeof (*rgep), KM_SLEEP);
1548 	ddi_set_driver_private(devinfo, rgep);
1549 	rgep->devinfo = devinfo;
1550 
1551 	/*
1552 	 * Initialize more fields in RGE private data
1553 	 */
1554 	rgep->rge_mac_state = RGE_MAC_ATTACH;
1555 	rgep->debug = ddi_prop_get_int(DDI_DEV_T_ANY, devinfo,
1556 		DDI_PROP_DONTPASS, debug_propname, rge_debug);
1557 	rgep->default_mtu = ddi_prop_get_int(DDI_DEV_T_ANY, devinfo,
1558 		DDI_PROP_DONTPASS, mtu_propname, ETHERMTU);
1559 	rgep->msi_enable = ddi_prop_get_int(DDI_DEV_T_ANY, devinfo,
1560 		DDI_PROP_DONTPASS, msi_propname, B_TRUE);
1561 	(void) snprintf(rgep->ifname, sizeof (rgep->ifname), "%s%d",
1562 		RGE_DRIVER_NAME, instance);
1563 
1564 	/*
1565 	 * Map config space registers
1566 	 * Read chip ID & set up config space command register(s)
1567 	 *
1568 	 * Note: this leaves the chip accessible by Memory Space
1569 	 * accesses, but with interrupts and Bus Mastering off.
1570 	 * This should ensure that nothing untoward will happen
1571 	 * if it has been left active by the (net-)bootloader.
1572 	 * We'll re-enable Bus Mastering once we've reset the chip,
1573 	 * and allow interrupts only when everything else is set up.
1574 	 */
1575 	err = pci_config_setup(devinfo, &rgep->cfg_handle);
1576 	if (err != DDI_SUCCESS) {
1577 		rge_problem(rgep, "pci_config_setup() failed");
1578 		goto attach_fail;
1579 	}
1580 	rgep->progress |= PROGRESS_CFG;
1581 	cidp = &rgep->chipid;
1582 	bzero(cidp, sizeof (*cidp));
1583 	rge_chip_cfg_init(rgep, cidp);
1584 
1585 	/*
1586 	 * Map operating registers
1587 	 */
1588 	err = ddi_regs_map_setup(devinfo, 1, &regs,
1589 	    0, 0, &rge_reg_accattr, &rgep->io_handle);
1590 	if (err != DDI_SUCCESS) {
1591 		rge_problem(rgep, "ddi_regs_map_setup() failed");
1592 		goto attach_fail;
1593 	}
1594 	rgep->io_regs = regs;
1595 	rgep->progress |= PROGRESS_REGS;
1596 
1597 	/*
1598 	 * Register NDD-tweakable parameters
1599 	 */
1600 	if (rge_nd_init(rgep)) {
1601 		rge_problem(rgep, "rge_nd_init() failed");
1602 		goto attach_fail;
1603 	}
1604 	rgep->progress |= PROGRESS_NDD;
1605 
1606 	/*
1607 	 * Characterise the device, so we know its requirements.
1608 	 * Then allocate the appropriate TX and RX descriptors & buffers.
1609 	 */
1610 	rge_chip_ident(rgep);
1611 	err = rge_alloc_bufs(rgep);
1612 	if (err != DDI_SUCCESS) {
1613 		rge_problem(rgep, "DMA buffer allocation failed");
1614 		goto attach_fail;
1615 	}
1616 
1617 	/*
1618 	 * Add the softint handlers:
1619 	 *
1620 	 * Both of these handlers are used to avoid restrictions on the
1621 	 * context and/or mutexes required for some operations.  In
1622 	 * particular, the hardware interrupt handler and its subfunctions
1623 	 * can detect a number of conditions that we don't want to handle
1624 	 * in that context or with that set of mutexes held.  So, these
1625 	 * softints are triggered instead:
1626 	 *
1627 	 * the <resched> softint is triggered if if we have previously
1628 	 * had to refuse to send a packet because of resource shortage
1629 	 * (we've run out of transmit buffers), but the send completion
1630 	 * interrupt handler has now detected that more buffers have
1631 	 * become available.
1632 	 *
1633 	 * the <factotum> is triggered if the h/w interrupt handler
1634 	 * sees the <link state changed> or <error> bits in the status
1635 	 * block.  It's also triggered periodically to poll the link
1636 	 * state, just in case we aren't getting link status change
1637 	 * interrupts ...
1638 	 */
1639 	err = ddi_intr_add_softint(devinfo, &rgep->resched_hdl,
1640 		DDI_INTR_SOFTPRI_MIN, rge_reschedule, (caddr_t)rgep);
1641 	if (err != DDI_SUCCESS) {
1642 		rge_problem(rgep, "ddi_intr_add_softint() failed");
1643 		goto attach_fail;
1644 	}
1645 	rgep->progress |= PROGRESS_RESCHED;
1646 	err = ddi_intr_add_softint(devinfo, &rgep->factotum_hdl,
1647 		DDI_INTR_SOFTPRI_MIN, rge_chip_factotum, (caddr_t)rgep);
1648 	if (err != DDI_SUCCESS) {
1649 		rge_problem(rgep, "ddi_intr_add_softint() failed");
1650 		goto attach_fail;
1651 	}
1652 	rgep->progress |= PROGRESS_FACTOTUM;
1653 
1654 	/*
1655 	 * Get supported interrupt types
1656 	 */
1657 	if (ddi_intr_get_supported_types(devinfo, &intr_types)
1658 	    != DDI_SUCCESS) {
1659 		rge_error(rgep, "ddi_intr_get_supported_types failed\n");
1660 		goto attach_fail;
1661 	}
1662 
1663 	/*
1664 	 * Add the h/w interrupt handler and initialise mutexes
1665 	 */
1666 	if ((intr_types & DDI_INTR_TYPE_MSI) && rgep->msi_enable) {
1667 		if (rge_add_intrs(rgep, DDI_INTR_TYPE_MSI) != DDI_SUCCESS) {
1668 			rge_error(rgep, "MSI registration failed, "
1669 			    "trying FIXED interrupt type\n");
1670 		} else {
1671 			rge_log(rgep, "Using MSI interrupt type\n");
1672 			rgep->intr_type = DDI_INTR_TYPE_MSI;
1673 			rgep->progress |= PROGRESS_INTR;
1674 		}
1675 	}
1676 	if (!(rgep->progress & PROGRESS_INTR) &&
1677 	    (intr_types & DDI_INTR_TYPE_FIXED)) {
1678 		if (rge_add_intrs(rgep, DDI_INTR_TYPE_FIXED) != DDI_SUCCESS) {
1679 			rge_error(rgep, "FIXED interrupt "
1680 			    "registration failed\n");
1681 			goto attach_fail;
1682 		}
1683 		rge_log(rgep, "Using FIXED interrupt type\n");
1684 		rgep->intr_type = DDI_INTR_TYPE_FIXED;
1685 		rgep->progress |= PROGRESS_INTR;
1686 	}
1687 	if (!(rgep->progress & PROGRESS_INTR)) {
1688 		rge_error(rgep, "No interrupts registered\n");
1689 		goto attach_fail;
1690 	}
1691 	mutex_init(rgep->genlock, NULL, MUTEX_DRIVER,
1692 	    DDI_INTR_PRI(rgep->intr_pri));
1693 	rw_init(rgep->errlock, NULL, RW_DRIVER,
1694 	    DDI_INTR_PRI(rgep->intr_pri));
1695 	mutex_init(rgep->tx_lock, NULL, MUTEX_DRIVER,
1696 	    DDI_INTR_PRI(rgep->intr_pri));
1697 	mutex_init(rgep->tc_lock, NULL, MUTEX_DRIVER,
1698 	    DDI_INTR_PRI(rgep->intr_pri));
1699 	mutex_init(rgep->rx_lock, NULL, MUTEX_DRIVER,
1700 	    DDI_INTR_PRI(rgep->intr_pri));
1701 	mutex_init(rgep->rc_lock, NULL, MUTEX_DRIVER,
1702 	    DDI_INTR_PRI(rgep->intr_pri));
1703 
1704 	/*
1705 	 * Initialize rings
1706 	 */
1707 	err = rge_init_rings(rgep);
1708 	if (err != DDI_SUCCESS) {
1709 		rge_problem(rgep, "rge_init_rings() failed");
1710 		goto attach_fail;
1711 	}
1712 	rgep->progress |= PROGRESS_INIT;
1713 
1714 	/*
1715 	 * Now that mutex locks are initialized, enable interrupts.
1716 	 */
1717 	if (rgep->intr_cap & DDI_INTR_FLAG_BLOCK) {
1718 		/* Call ddi_intr_block_enable() for MSI interrupts */
1719 		(void) ddi_intr_block_enable(rgep->htable, rgep->intr_cnt);
1720 	} else {
1721 		/* Call ddi_intr_enable for MSI or FIXED interrupts */
1722 		for (i = 0; i < rgep->intr_cnt; i++) {
1723 			(void) ddi_intr_enable(rgep->htable[i]);
1724 		}
1725 	}
1726 
1727 	/*
1728 	 * Initialise link state variables
1729 	 * Stop, reset & reinitialise the chip.
1730 	 * Initialise the (internal) PHY.
1731 	 */
1732 	rgep->param_link_up = LINK_STATE_UNKNOWN;
1733 
1734 	/*
1735 	 * Reset chip & rings to initial state; also reset address
1736 	 * filtering, promiscuity, loopback mode.
1737 	 */
1738 	mutex_enter(rgep->genlock);
1739 	(void) rge_chip_reset(rgep);
1740 	rge_chip_sync(rgep, RGE_GET_MAC);
1741 	bzero(rgep->mcast_hash, sizeof (rgep->mcast_hash));
1742 	bzero(rgep->mcast_refs, sizeof (rgep->mcast_refs));
1743 	rgep->promisc = B_FALSE;
1744 	rgep->param_loop_mode = RGE_LOOP_NONE;
1745 	mutex_exit(rgep->genlock);
1746 	rge_phy_init(rgep);
1747 	rgep->progress |= PROGRESS_PHY;
1748 
1749 	/*
1750 	 * Create & initialise named kstats
1751 	 */
1752 	rge_init_kstats(rgep, instance);
1753 	rgep->progress |= PROGRESS_KSTATS;
1754 
1755 	if ((macp = mac_alloc(MAC_VERSION)) == NULL)
1756 		goto attach_fail;
1757 	macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
1758 	macp->m_driver = rgep;
1759 	macp->m_dip = devinfo;
1760 	macp->m_src_addr = rgep->netaddr;
1761 	macp->m_callbacks = &rge_m_callbacks;
1762 	macp->m_min_sdu = 0;
1763 	macp->m_max_sdu = rgep->default_mtu;
1764 
1765 	/*
1766 	 * Finally, we're ready to register ourselves with the MAC layer
1767 	 * interface; if this succeeds, we're all ready to start()
1768 	 */
1769 	err = mac_register(macp, &rgep->mh);
1770 	mac_free(macp);
1771 	if (err != 0)
1772 		goto attach_fail;
1773 
1774 	cychand.cyh_func = rge_chip_cyclic;
1775 	cychand.cyh_arg = rgep;
1776 	cychand.cyh_level = CY_LOCK_LEVEL;
1777 	cyctime.cyt_when = 0;
1778 	cyctime.cyt_interval = RGE_CYCLIC_PERIOD;
1779 	mutex_enter(&cpu_lock);
1780 	rgep->cyclic_id = cyclic_add(&cychand, &cyctime);
1781 	mutex_exit(&cpu_lock);
1782 
1783 	rgep->progress |= PROGRESS_READY;
1784 	return (DDI_SUCCESS);
1785 
1786 attach_fail:
1787 	rge_unattach(rgep);
1788 	return (DDI_FAILURE);
1789 }
1790 
1791 /*
1792  *	rge_suspend() -- suspend transmit/receive for powerdown
1793  */
1794 static int
1795 rge_suspend(rge_t *rgep)
1796 {
1797 	/*
1798 	 * Stop processing and idle (powerdown) the PHY ...
1799 	 */
1800 	mutex_enter(rgep->genlock);
1801 	rge_stop(rgep);
1802 	mutex_exit(rgep->genlock);
1803 
1804 	return (DDI_SUCCESS);
1805 }
1806 
1807 /*
1808  * detach(9E) -- Detach a device from the system
1809  */
1810 static int
1811 rge_detach(dev_info_t *devinfo, ddi_detach_cmd_t cmd)
1812 {
1813 	rge_t *rgep;
1814 
1815 	RGE_GTRACE(("rge_detach($%p, %d)", (void *)devinfo, cmd));
1816 
1817 	rgep = ddi_get_driver_private(devinfo);
1818 
1819 	switch (cmd) {
1820 	default:
1821 		return (DDI_FAILURE);
1822 
1823 	case DDI_SUSPEND:
1824 		return (rge_suspend(rgep));
1825 
1826 	case DDI_DETACH:
1827 		break;
1828 	}
1829 
1830 	/*
1831 	 * If there is any posted buffer, the driver should reject to be
1832 	 * detached. Need notice upper layer to release them.
1833 	 */
1834 	if (!(rgep->chip_flags & CHIP_FLAG_FORCE_BCOPY) &&
1835 	    rgep->rx_free != RGE_BUF_SLOTS)
1836 		return (DDI_FAILURE);
1837 
1838 	/*
1839 	 * Unregister from the MAC layer subsystem.  This can fail, in
1840 	 * particular if there are DLPI style-2 streams still open -
1841 	 * in which case we just return failure without shutting
1842 	 * down chip operations.
1843 	 */
1844 	if (mac_unregister(rgep->mh) != 0)
1845 		return (DDI_FAILURE);
1846 
1847 	/*
1848 	 * All activity stopped, so we can clean up & exit
1849 	 */
1850 	rge_unattach(rgep);
1851 	return (DDI_SUCCESS);
1852 }
1853 
1854 
1855 /*
1856  * ========== Module Loading Data & Entry Points ==========
1857  */
1858 
1859 #undef	RGE_DBG
1860 #define	RGE_DBG		RGE_DBG_INIT	/* debug flag for this code	*/
1861 DDI_DEFINE_STREAM_OPS(rge_dev_ops, nulldev, nulldev, rge_attach, rge_detach,
1862     nodev, NULL, D_MP, NULL);
1863 
1864 static struct modldrv rge_modldrv = {
1865 	&mod_driverops,		/* Type of module.  This one is a driver */
1866 	rge_ident,		/* short description */
1867 	&rge_dev_ops		/* driver specific ops */
1868 };
1869 
1870 static struct modlinkage modlinkage = {
1871 	MODREV_1, (void *)&rge_modldrv, NULL
1872 };
1873 
1874 
1875 int
1876 _info(struct modinfo *modinfop)
1877 {
1878 	return (mod_info(&modlinkage, modinfop));
1879 }
1880 
1881 int
1882 _init(void)
1883 {
1884 	int status;
1885 
1886 	mac_init_ops(&rge_dev_ops, "rge");
1887 	status = mod_install(&modlinkage);
1888 	if (status == DDI_SUCCESS)
1889 		mutex_init(rge_log_mutex, NULL, MUTEX_DRIVER, NULL);
1890 	else
1891 		mac_fini_ops(&rge_dev_ops);
1892 
1893 	return (status);
1894 }
1895 
1896 int
1897 _fini(void)
1898 {
1899 	int status;
1900 
1901 	status = mod_remove(&modlinkage);
1902 	if (status == DDI_SUCCESS) {
1903 		mac_fini_ops(&rge_dev_ops);
1904 		mutex_destroy(rge_log_mutex);
1905 	}
1906 	return (status);
1907 }
1908