xref: /titanic_52/usr/src/uts/common/io/rge/rge_main.c (revision d2ec54f7875f7e05edd56195adbeb593c947763f)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include "rge.h"
29 
30 /*
31  * This is the string displayed by modinfo, etc.
32  * Make sure you keep the version ID up to date!
33  */
34 static char rge_ident[] = "Realtek 1Gb Ethernet v%I%";
35 
36 /*
37  * Used for buffers allocated by ddi_dma_mem_alloc()
38  */
39 static ddi_dma_attr_t dma_attr_buf = {
40 	DMA_ATTR_V0,		/* dma_attr version */
41 	(uint32_t)0,		/* dma_attr_addr_lo */
42 	(uint32_t)0xFFFFFFFF,	/* dma_attr_addr_hi */
43 	(uint32_t)0xFFFFFFFF,	/* dma_attr_count_max */
44 	(uint32_t)16,		/* dma_attr_align */
45 	0xFFFFFFFF,		/* dma_attr_burstsizes */
46 	1,			/* dma_attr_minxfer */
47 	(uint32_t)0xFFFFFFFF,	/* dma_attr_maxxfer */
48 	(uint32_t)0xFFFFFFFF,	/* dma_attr_seg */
49 	1,			/* dma_attr_sgllen */
50 	1,			/* dma_attr_granular */
51 	0,			/* dma_attr_flags */
52 };
53 
54 /*
55  * Used for BDs allocated by ddi_dma_mem_alloc()
56  */
57 static ddi_dma_attr_t dma_attr_desc = {
58 	DMA_ATTR_V0,		/* dma_attr version */
59 	(uint32_t)0,		/* dma_attr_addr_lo */
60 	(uint32_t)0xFFFFFFFF,	/* dma_attr_addr_hi */
61 	(uint32_t)0xFFFFFFFF,	/* dma_attr_count_max */
62 	(uint32_t)256,		/* dma_attr_align */
63 	0xFFFFFFFF,		/* dma_attr_burstsizes */
64 	1,			/* dma_attr_minxfer */
65 	(uint32_t)0xFFFFFFFF,	/* dma_attr_maxxfer */
66 	(uint32_t)0xFFFFFFFF,	/* dma_attr_seg */
67 	1,			/* dma_attr_sgllen */
68 	1,			/* dma_attr_granular */
69 	0,			/* dma_attr_flags */
70 };
71 
72 /*
73  * PIO access attributes for registers
74  */
75 static ddi_device_acc_attr_t rge_reg_accattr = {
76 	DDI_DEVICE_ATTR_V0,
77 	DDI_STRUCTURE_LE_ACC,
78 	DDI_STRICTORDER_ACC,
79 	DDI_DEFAULT_ACC
80 };
81 
82 /*
83  * DMA access attributes for descriptors
84  */
85 static ddi_device_acc_attr_t rge_desc_accattr = {
86 	DDI_DEVICE_ATTR_V0,
87 	DDI_NEVERSWAP_ACC,
88 	DDI_STRICTORDER_ACC,
89 	DDI_DEFAULT_ACC
90 };
91 
92 /*
93  * DMA access attributes for data
94  */
95 static ddi_device_acc_attr_t rge_buf_accattr = {
96 	DDI_DEVICE_ATTR_V0,
97 	DDI_NEVERSWAP_ACC,
98 	DDI_STRICTORDER_ACC,
99 	DDI_DEFAULT_ACC
100 };
101 
102 /*
103  * Property names
104  */
105 static char debug_propname[] = "rge_debug_flags";
106 static char mtu_propname[] = "default_mtu";
107 static char msi_propname[] = "msi_enable";
108 
109 static int		rge_m_start(void *);
110 static void		rge_m_stop(void *);
111 static int		rge_m_promisc(void *, boolean_t);
112 static int		rge_m_multicst(void *, boolean_t, const uint8_t *);
113 static int		rge_m_unicst(void *, const uint8_t *);
114 static void		rge_m_resources(void *);
115 static void		rge_m_ioctl(void *, queue_t *, mblk_t *);
116 static boolean_t	rge_m_getcapab(void *, mac_capab_t, void *);
117 
118 #define	RGE_M_CALLBACK_FLAGS	(MC_RESOURCES | MC_IOCTL | MC_GETCAPAB)
119 
120 static mac_callbacks_t rge_m_callbacks = {
121 	RGE_M_CALLBACK_FLAGS,
122 	rge_m_stat,
123 	rge_m_start,
124 	rge_m_stop,
125 	rge_m_promisc,
126 	rge_m_multicst,
127 	rge_m_unicst,
128 	rge_m_tx,
129 	rge_m_resources,
130 	rge_m_ioctl,
131 	rge_m_getcapab
132 };
133 
134 /*
135  * Allocate an area of memory and a DMA handle for accessing it
136  */
137 static int
138 rge_alloc_dma_mem(rge_t *rgep, size_t memsize, ddi_dma_attr_t *dma_attr_p,
139 	ddi_device_acc_attr_t *acc_attr_p, uint_t dma_flags, dma_area_t *dma_p)
140 {
141 	caddr_t vaddr;
142 	int err;
143 
144 	/*
145 	 * Allocate handle
146 	 */
147 	err = ddi_dma_alloc_handle(rgep->devinfo, dma_attr_p,
148 	    DDI_DMA_SLEEP, NULL, &dma_p->dma_hdl);
149 	if (err != DDI_SUCCESS) {
150 		dma_p->dma_hdl = NULL;
151 		return (DDI_FAILURE);
152 	}
153 
154 	/*
155 	 * Allocate memory
156 	 */
157 	err = ddi_dma_mem_alloc(dma_p->dma_hdl, memsize, acc_attr_p,
158 	    dma_flags & (DDI_DMA_CONSISTENT | DDI_DMA_STREAMING),
159 	    DDI_DMA_SLEEP, NULL, &vaddr, &dma_p->alength, &dma_p->acc_hdl);
160 	if (err != DDI_SUCCESS) {
161 		ddi_dma_free_handle(&dma_p->dma_hdl);
162 		dma_p->dma_hdl = NULL;
163 		dma_p->acc_hdl = NULL;
164 		return (DDI_FAILURE);
165 	}
166 
167 	/*
168 	 * Bind the two together
169 	 */
170 	dma_p->mem_va = vaddr;
171 	err = ddi_dma_addr_bind_handle(dma_p->dma_hdl, NULL,
172 	    vaddr, dma_p->alength, dma_flags, DDI_DMA_SLEEP, NULL,
173 	    &dma_p->cookie, &dma_p->ncookies);
174 	if (err != DDI_DMA_MAPPED || dma_p->ncookies != 1) {
175 		ddi_dma_mem_free(&dma_p->acc_hdl);
176 		ddi_dma_free_handle(&dma_p->dma_hdl);
177 		dma_p->acc_hdl = NULL;
178 		dma_p->dma_hdl = NULL;
179 		return (DDI_FAILURE);
180 	}
181 
182 	dma_p->nslots = ~0U;
183 	dma_p->size = ~0U;
184 	dma_p->token = ~0U;
185 	dma_p->offset = 0;
186 	return (DDI_SUCCESS);
187 }
188 
189 /*
190  * Free one allocated area of DMAable memory
191  */
192 static void
193 rge_free_dma_mem(dma_area_t *dma_p)
194 {
195 	if (dma_p->dma_hdl != NULL) {
196 		if (dma_p->ncookies) {
197 			(void) ddi_dma_unbind_handle(dma_p->dma_hdl);
198 			dma_p->ncookies = 0;
199 		}
200 		ddi_dma_free_handle(&dma_p->dma_hdl);
201 		dma_p->dma_hdl = NULL;
202 	}
203 
204 	if (dma_p->acc_hdl != NULL) {
205 		ddi_dma_mem_free(&dma_p->acc_hdl);
206 		dma_p->acc_hdl = NULL;
207 	}
208 }
209 
210 /*
211  * Utility routine to carve a slice off a chunk of allocated memory,
212  * updating the chunk descriptor accordingly.  The size of the slice
213  * is given by the product of the <qty> and <size> parameters.
214  */
215 static void
216 rge_slice_chunk(dma_area_t *slice, dma_area_t *chunk,
217 	uint32_t qty, uint32_t size)
218 {
219 	static uint32_t sequence = 0xbcd5704a;
220 	size_t totsize;
221 
222 	totsize = qty*size;
223 	ASSERT(size >= 0);
224 	ASSERT(totsize <= chunk->alength);
225 
226 	*slice = *chunk;
227 	slice->nslots = qty;
228 	slice->size = size;
229 	slice->alength = totsize;
230 	slice->token = ++sequence;
231 
232 	chunk->mem_va = (caddr_t)chunk->mem_va + totsize;
233 	chunk->alength -= totsize;
234 	chunk->offset += totsize;
235 	chunk->cookie.dmac_laddress += totsize;
236 	chunk->cookie.dmac_size -= totsize;
237 }
238 
239 static int
240 rge_alloc_bufs(rge_t *rgep)
241 {
242 	size_t txdescsize;
243 	size_t rxdescsize;
244 	int err;
245 
246 	/*
247 	 * Allocate memory & handle for packet statistics
248 	 */
249 	err = rge_alloc_dma_mem(rgep,
250 	    RGE_STATS_DUMP_SIZE,
251 	    &dma_attr_desc,
252 	    &rge_desc_accattr,
253 	    DDI_DMA_RDWR | DDI_DMA_CONSISTENT,
254 	    &rgep->dma_area_stats);
255 	if (err != DDI_SUCCESS)
256 		return (DDI_FAILURE);
257 	rgep->hw_stats = DMA_VPTR(rgep->dma_area_stats);
258 
259 	/*
260 	 * Allocate memory & handle for Tx descriptor ring
261 	 */
262 	txdescsize = RGE_SEND_SLOTS * sizeof (rge_bd_t);
263 	err = rge_alloc_dma_mem(rgep,
264 	    txdescsize,
265 	    &dma_attr_desc,
266 	    &rge_desc_accattr,
267 	    DDI_DMA_RDWR | DDI_DMA_CONSISTENT,
268 	    &rgep->dma_area_txdesc);
269 	if (err != DDI_SUCCESS)
270 		return (DDI_FAILURE);
271 
272 	/*
273 	 * Allocate memory & handle for Rx descriptor ring
274 	 */
275 	rxdescsize = RGE_RECV_SLOTS * sizeof (rge_bd_t);
276 	err = rge_alloc_dma_mem(rgep,
277 	    rxdescsize,
278 	    &dma_attr_desc,
279 	    &rge_desc_accattr,
280 	    DDI_DMA_RDWR | DDI_DMA_CONSISTENT,
281 	    &rgep->dma_area_rxdesc);
282 	if (err != DDI_SUCCESS)
283 		return (DDI_FAILURE);
284 
285 	return (DDI_SUCCESS);
286 }
287 
288 /*
289  * rge_free_bufs() -- free descriptors/buffers allocated for this
290  * device instance.
291  */
292 static void
293 rge_free_bufs(rge_t *rgep)
294 {
295 	rge_free_dma_mem(&rgep->dma_area_stats);
296 	rge_free_dma_mem(&rgep->dma_area_txdesc);
297 	rge_free_dma_mem(&rgep->dma_area_rxdesc);
298 }
299 
300 /*
301  * ========== Transmit and receive ring reinitialisation ==========
302  */
303 
304 /*
305  * These <reinit> routines each reset the rx/tx rings to an initial
306  * state, assuming that the corresponding <init> routine has already
307  * been called exactly once.
308  */
309 static void
310 rge_reinit_send_ring(rge_t *rgep)
311 {
312 	sw_sbd_t *ssbdp;
313 	rge_bd_t *bdp;
314 	uint32_t slot;
315 
316 	/*
317 	 * re-init send ring
318 	 */
319 	DMA_ZERO(rgep->tx_desc);
320 	ssbdp = rgep->sw_sbds;
321 	bdp = rgep->tx_ring;
322 	for (slot = 0; slot < RGE_SEND_SLOTS; slot++) {
323 		bdp->host_buf_addr =
324 		    RGE_BSWAP_32(ssbdp->pbuf.cookie.dmac_laddress);
325 		bdp->host_buf_addr_hi =
326 		    RGE_BSWAP_32(ssbdp->pbuf.cookie.dmac_laddress >> 32);
327 		/* last BD in Tx ring */
328 		if (slot == (RGE_SEND_SLOTS - 1))
329 			bdp->flags_len = RGE_BSWAP_32(BD_FLAG_EOR);
330 		ssbdp++;
331 		bdp++;
332 	}
333 	DMA_SYNC(rgep->tx_desc, DDI_DMA_SYNC_FORDEV);
334 	rgep->tx_next = 0;
335 	rgep->tc_next = 0;
336 	rgep->tc_tail = 0;
337 	rgep->tx_flow = 0;
338 	rgep->tx_free = RGE_SEND_SLOTS;
339 }
340 
341 static void
342 rge_reinit_recv_ring(rge_t *rgep)
343 {
344 	rge_bd_t *bdp;
345 	sw_rbd_t *srbdp;
346 	dma_area_t *pbuf;
347 	uint32_t slot;
348 
349 	/*
350 	 * re-init receive ring
351 	 */
352 	DMA_ZERO(rgep->rx_desc);
353 	srbdp = rgep->sw_rbds;
354 	bdp = rgep->rx_ring;
355 	for (slot = 0; slot < RGE_RECV_SLOTS; slot++) {
356 		pbuf = &srbdp->rx_buf->pbuf;
357 		bdp->host_buf_addr =
358 		    RGE_BSWAP_32(pbuf->cookie.dmac_laddress + rgep->head_room);
359 		bdp->host_buf_addr_hi =
360 		    RGE_BSWAP_32(pbuf->cookie.dmac_laddress >> 32);
361 		bdp->flags_len = RGE_BSWAP_32(BD_FLAG_HW_OWN |
362 		    (rgep->rxbuf_size - rgep->head_room));
363 		/* last BD in Tx ring */
364 		if (slot == (RGE_RECV_SLOTS - 1))
365 			bdp->flags_len |= RGE_BSWAP_32(BD_FLAG_EOR);
366 		srbdp++;
367 		bdp++;
368 	}
369 	DMA_SYNC(rgep->rx_desc, DDI_DMA_SYNC_FORDEV);
370 	rgep->watchdog = 0;
371 	rgep->rx_next = 0;
372 }
373 
374 static void
375 rge_reinit_buf_ring(rge_t *rgep)
376 {
377 
378 	if (rgep->chip_flags & CHIP_FLAG_FORCE_BCOPY)
379 		return;
380 
381 	/*
382 	 * If all the up-sending buffers haven't been returned to driver,
383 	 * use bcopy() only in rx process.
384 	 */
385 	if (rgep->rx_free != RGE_BUF_SLOTS)
386 		rgep->rx_bcopy = B_TRUE;
387 }
388 
389 static void
390 rge_reinit_rings(rge_t *rgep)
391 {
392 	rge_reinit_send_ring(rgep);
393 	rge_reinit_recv_ring(rgep);
394 	rge_reinit_buf_ring(rgep);
395 }
396 
397 static void
398 rge_fini_send_ring(rge_t *rgep)
399 {
400 	sw_sbd_t *ssbdp;
401 	uint32_t slot;
402 
403 	ssbdp = rgep->sw_sbds;
404 	for (slot = 0; slot < RGE_SEND_SLOTS; ++slot) {
405 		rge_free_dma_mem(&ssbdp->pbuf);
406 		ssbdp++;
407 	}
408 
409 	kmem_free(rgep->sw_sbds, RGE_SEND_SLOTS * sizeof (sw_sbd_t));
410 	rgep->sw_sbds = NULL;
411 }
412 
413 static void
414 rge_fini_recv_ring(rge_t *rgep)
415 {
416 	sw_rbd_t *srbdp;
417 	uint32_t slot;
418 
419 	srbdp = rgep->sw_rbds;
420 	for (slot = 0; slot < RGE_RECV_SLOTS; ++srbdp, ++slot) {
421 		if (srbdp->rx_buf) {
422 			if (srbdp->rx_buf->mp != NULL) {
423 				freemsg(srbdp->rx_buf->mp);
424 				srbdp->rx_buf->mp = NULL;
425 			}
426 			rge_free_dma_mem(&srbdp->rx_buf->pbuf);
427 			kmem_free(srbdp->rx_buf, sizeof (dma_buf_t));
428 			srbdp->rx_buf = NULL;
429 		}
430 	}
431 
432 	kmem_free(rgep->sw_rbds, RGE_RECV_SLOTS * sizeof (sw_rbd_t));
433 	rgep->sw_rbds = NULL;
434 }
435 
436 static void
437 rge_fini_buf_ring(rge_t *rgep)
438 {
439 	sw_rbd_t *srbdp;
440 	uint32_t slot;
441 
442 	if (rgep->chip_flags & CHIP_FLAG_FORCE_BCOPY)
443 		return;
444 
445 	ASSERT(rgep->rx_free == RGE_BUF_SLOTS);
446 
447 	srbdp = rgep->free_srbds;
448 	for (slot = 0; slot < RGE_BUF_SLOTS; ++srbdp, ++slot) {
449 		if (srbdp->rx_buf != NULL) {
450 			if (srbdp->rx_buf->mp != NULL) {
451 				freemsg(srbdp->rx_buf->mp);
452 				srbdp->rx_buf->mp = NULL;
453 			}
454 			rge_free_dma_mem(&srbdp->rx_buf->pbuf);
455 			kmem_free(srbdp->rx_buf, sizeof (dma_buf_t));
456 			srbdp->rx_buf = NULL;
457 		}
458 	}
459 
460 	kmem_free(rgep->free_srbds, RGE_BUF_SLOTS * sizeof (sw_rbd_t));
461 	rgep->free_srbds = NULL;
462 }
463 
464 static void
465 rge_fini_rings(rge_t *rgep)
466 {
467 	rge_fini_send_ring(rgep);
468 	rge_fini_recv_ring(rgep);
469 	rge_fini_buf_ring(rgep);
470 }
471 
472 static int
473 rge_init_send_ring(rge_t *rgep)
474 {
475 	uint32_t slot;
476 	sw_sbd_t *ssbdp;
477 	dma_area_t *pbuf;
478 	dma_area_t desc;
479 	int err;
480 
481 	/*
482 	 * Allocate the array of s/w Tx Buffer Descriptors
483 	 */
484 	ssbdp = kmem_zalloc(RGE_SEND_SLOTS*sizeof (*ssbdp), KM_SLEEP);
485 	rgep->sw_sbds = ssbdp;
486 
487 	/*
488 	 * Init send ring
489 	 */
490 	rgep->tx_desc = rgep->dma_area_txdesc;
491 	DMA_ZERO(rgep->tx_desc);
492 	rgep->tx_ring = rgep->tx_desc.mem_va;
493 
494 	desc = rgep->tx_desc;
495 	for (slot = 0; slot < RGE_SEND_SLOTS; slot++) {
496 		rge_slice_chunk(&ssbdp->desc, &desc, 1, sizeof (rge_bd_t));
497 
498 		/*
499 		 * Allocate memory & handle for Tx buffers
500 		 */
501 		pbuf = &ssbdp->pbuf;
502 		err = rge_alloc_dma_mem(rgep, rgep->txbuf_size,
503 		    &dma_attr_buf, &rge_buf_accattr,
504 		    DDI_DMA_WRITE | DDI_DMA_STREAMING, pbuf);
505 		if (err != DDI_SUCCESS) {
506 			rge_error(rgep,
507 			    "rge_init_send_ring: alloc tx buffer failed");
508 			rge_fini_send_ring(rgep);
509 			return (DDI_FAILURE);
510 		}
511 		ssbdp++;
512 	}
513 	ASSERT(desc.alength == 0);
514 
515 	DMA_SYNC(rgep->tx_desc, DDI_DMA_SYNC_FORDEV);
516 	return (DDI_SUCCESS);
517 }
518 
519 static int
520 rge_init_recv_ring(rge_t *rgep)
521 {
522 	uint32_t slot;
523 	sw_rbd_t *srbdp;
524 	dma_buf_t *rx_buf;
525 	dma_area_t *pbuf;
526 	int err;
527 
528 	/*
529 	 * Allocate the array of s/w Rx Buffer Descriptors
530 	 */
531 	srbdp = kmem_zalloc(RGE_RECV_SLOTS*sizeof (*srbdp), KM_SLEEP);
532 	rgep->sw_rbds = srbdp;
533 
534 	/*
535 	 * Init receive ring
536 	 */
537 	rgep->rx_next = 0;
538 	rgep->rx_desc = rgep->dma_area_rxdesc;
539 	DMA_ZERO(rgep->rx_desc);
540 	rgep->rx_ring = rgep->rx_desc.mem_va;
541 
542 	for (slot = 0; slot < RGE_RECV_SLOTS; slot++) {
543 		srbdp->rx_buf = rx_buf =
544 		    kmem_zalloc(sizeof (dma_buf_t), KM_SLEEP);
545 
546 		/*
547 		 * Allocate memory & handle for Rx buffers
548 		 */
549 		pbuf = &rx_buf->pbuf;
550 		err = rge_alloc_dma_mem(rgep, rgep->rxbuf_size,
551 		    &dma_attr_buf, &rge_buf_accattr,
552 		    DDI_DMA_READ | DDI_DMA_STREAMING, pbuf);
553 		if (err != DDI_SUCCESS) {
554 			rge_fini_recv_ring(rgep);
555 			rge_error(rgep,
556 			    "rge_init_recv_ring: alloc rx buffer failed");
557 			return (DDI_FAILURE);
558 		}
559 
560 		pbuf->alength -= rgep->head_room;
561 		pbuf->offset += rgep->head_room;
562 		if (!(rgep->chip_flags & CHIP_FLAG_FORCE_BCOPY)) {
563 			rx_buf->rx_recycle.free_func = rge_rx_recycle;
564 			rx_buf->rx_recycle.free_arg = (caddr_t)rx_buf;
565 			rx_buf->private = (caddr_t)rgep;
566 			rx_buf->mp = desballoc(DMA_VPTR(rx_buf->pbuf),
567 			    rgep->rxbuf_size, 0, &rx_buf->rx_recycle);
568 			if (rx_buf->mp == NULL) {
569 				rge_fini_recv_ring(rgep);
570 				rge_problem(rgep,
571 				    "rge_init_recv_ring: desballoc() failed");
572 				return (DDI_FAILURE);
573 			}
574 		}
575 		srbdp++;
576 	}
577 	DMA_SYNC(rgep->rx_desc, DDI_DMA_SYNC_FORDEV);
578 	return (DDI_SUCCESS);
579 }
580 
581 static int
582 rge_init_buf_ring(rge_t *rgep)
583 {
584 	uint32_t slot;
585 	sw_rbd_t *free_srbdp;
586 	dma_buf_t *rx_buf;
587 	dma_area_t *pbuf;
588 	int err;
589 
590 	if (rgep->chip_flags & CHIP_FLAG_FORCE_BCOPY) {
591 		rgep->rx_bcopy = B_TRUE;
592 		return (DDI_SUCCESS);
593 	}
594 
595 	/*
596 	 * Allocate the array of s/w free Buffer Descriptors
597 	 */
598 	free_srbdp = kmem_zalloc(RGE_BUF_SLOTS*sizeof (*free_srbdp), KM_SLEEP);
599 	rgep->free_srbds = free_srbdp;
600 
601 	/*
602 	 * Init free buffer ring
603 	 */
604 	rgep->rc_next = 0;
605 	rgep->rf_next = 0;
606 	rgep->rx_bcopy = B_FALSE;
607 	rgep->rx_free = RGE_BUF_SLOTS;
608 	for (slot = 0; slot < RGE_BUF_SLOTS; slot++) {
609 		free_srbdp->rx_buf = rx_buf =
610 		    kmem_zalloc(sizeof (dma_buf_t), KM_SLEEP);
611 
612 		/*
613 		 * Allocate memory & handle for free Rx buffers
614 		 */
615 		pbuf = &rx_buf->pbuf;
616 		err = rge_alloc_dma_mem(rgep, rgep->rxbuf_size,
617 		    &dma_attr_buf, &rge_buf_accattr,
618 		    DDI_DMA_READ | DDI_DMA_STREAMING, pbuf);
619 		if (err != DDI_SUCCESS) {
620 			rge_fini_buf_ring(rgep);
621 			rge_error(rgep,
622 			    "rge_init_buf_ring: alloc rx free buffer failed");
623 			return (DDI_FAILURE);
624 		}
625 		pbuf->alength -= rgep->head_room;
626 		pbuf->offset += rgep->head_room;
627 		rx_buf->rx_recycle.free_func = rge_rx_recycle;
628 		rx_buf->rx_recycle.free_arg = (caddr_t)rx_buf;
629 		rx_buf->private = (caddr_t)rgep;
630 		rx_buf->mp = desballoc(DMA_VPTR(rx_buf->pbuf),
631 		    rgep->rxbuf_size, 0, &rx_buf->rx_recycle);
632 		if (rx_buf->mp == NULL) {
633 			rge_fini_buf_ring(rgep);
634 			rge_problem(rgep,
635 			    "rge_init_buf_ring: desballoc() failed");
636 			return (DDI_FAILURE);
637 		}
638 		free_srbdp++;
639 	}
640 	return (DDI_SUCCESS);
641 }
642 
643 static int
644 rge_init_rings(rge_t *rgep)
645 {
646 	int err;
647 
648 	err = rge_init_send_ring(rgep);
649 	if (err != DDI_SUCCESS)
650 		return (DDI_FAILURE);
651 
652 	err = rge_init_recv_ring(rgep);
653 	if (err != DDI_SUCCESS) {
654 		rge_fini_send_ring(rgep);
655 		return (DDI_FAILURE);
656 	}
657 
658 	err = rge_init_buf_ring(rgep);
659 	if (err != DDI_SUCCESS) {
660 		rge_fini_send_ring(rgep);
661 		rge_fini_recv_ring(rgep);
662 		return (DDI_FAILURE);
663 	}
664 
665 	return (DDI_SUCCESS);
666 }
667 
668 /*
669  * ========== Internal state management entry points ==========
670  */
671 
672 #undef	RGE_DBG
673 #define	RGE_DBG		RGE_DBG_NEMO	/* debug flag for this code	*/
674 
675 /*
676  * These routines provide all the functionality required by the
677  * corresponding MAC layer entry points, but don't update the
678  * MAC state so they can be called internally without disturbing
679  * our record of what NEMO thinks we should be doing ...
680  */
681 
682 /*
683  *	rge_reset() -- reset h/w & rings to initial state
684  */
685 static void
686 rge_reset(rge_t *rgep)
687 {
688 	ASSERT(mutex_owned(rgep->genlock));
689 
690 	/*
691 	 * Grab all the other mutexes in the world (this should
692 	 * ensure no other threads are manipulating driver state)
693 	 */
694 	mutex_enter(rgep->rx_lock);
695 	mutex_enter(rgep->rc_lock);
696 	rw_enter(rgep->errlock, RW_WRITER);
697 
698 	(void) rge_chip_reset(rgep);
699 	rge_reinit_rings(rgep);
700 	rge_chip_init(rgep);
701 
702 	/*
703 	 * Free the world ...
704 	 */
705 	rw_exit(rgep->errlock);
706 	mutex_exit(rgep->rc_lock);
707 	mutex_exit(rgep->rx_lock);
708 
709 	rgep->stats.rpackets = 0;
710 	rgep->stats.rbytes = 0;
711 	rgep->stats.opackets = 0;
712 	rgep->stats.obytes = 0;
713 	rgep->stats.tx_pre_ismax = B_FALSE;
714 	rgep->stats.tx_cur_ismax = B_FALSE;
715 
716 	RGE_DEBUG(("rge_reset($%p) done", (void *)rgep));
717 }
718 
719 /*
720  *	rge_stop() -- stop processing, don't reset h/w or rings
721  */
722 static void
723 rge_stop(rge_t *rgep)
724 {
725 	ASSERT(mutex_owned(rgep->genlock));
726 
727 	rge_chip_stop(rgep, B_FALSE);
728 
729 	RGE_DEBUG(("rge_stop($%p) done", (void *)rgep));
730 }
731 
732 /*
733  *	rge_start() -- start transmitting/receiving
734  */
735 static void
736 rge_start(rge_t *rgep)
737 {
738 	ASSERT(mutex_owned(rgep->genlock));
739 
740 	/*
741 	 * Start chip processing, including enabling interrupts
742 	 */
743 	rge_chip_start(rgep);
744 	rgep->watchdog = 0;
745 }
746 
747 /*
748  * rge_restart - restart transmitting/receiving after error or suspend
749  */
750 void
751 rge_restart(rge_t *rgep)
752 {
753 	uint32_t i;
754 
755 	ASSERT(mutex_owned(rgep->genlock));
756 	/*
757 	 * Wait for posted buffer to be freed...
758 	 */
759 	if (!rgep->rx_bcopy) {
760 		for (i = 0; i < RXBUFF_FREE_LOOP; i++) {
761 			if (rgep->rx_free == RGE_BUF_SLOTS)
762 				break;
763 			drv_usecwait(1000);
764 			RGE_DEBUG(("rge_restart: waiting for rx buf free..."));
765 		}
766 	}
767 	rge_reset(rgep);
768 	rgep->stats.chip_reset++;
769 	if (rgep->rge_mac_state == RGE_MAC_STARTED) {
770 		rge_start(rgep);
771 		rgep->resched_needed = B_TRUE;
772 		(void) ddi_intr_trigger_softint(rgep->resched_hdl, NULL);
773 	}
774 }
775 
776 
777 /*
778  * ========== Nemo-required management entry points ==========
779  */
780 
781 #undef	RGE_DBG
782 #define	RGE_DBG		RGE_DBG_NEMO	/* debug flag for this code	*/
783 
784 /*
785  *	rge_m_stop() -- stop transmitting/receiving
786  */
787 static void
788 rge_m_stop(void *arg)
789 {
790 	rge_t *rgep = arg;		/* private device info	*/
791 	uint32_t i;
792 
793 	/*
794 	 * Just stop processing, then record new MAC state
795 	 */
796 	mutex_enter(rgep->genlock);
797 	rge_stop(rgep);
798 	/*
799 	 * Wait for posted buffer to be freed...
800 	 */
801 	if (!rgep->rx_bcopy) {
802 		for (i = 0; i < RXBUFF_FREE_LOOP; i++) {
803 			if (rgep->rx_free == RGE_BUF_SLOTS)
804 				break;
805 			drv_usecwait(1000);
806 			RGE_DEBUG(("rge_m_stop: waiting for rx buf free..."));
807 		}
808 	}
809 	rgep->rge_mac_state = RGE_MAC_STOPPED;
810 	RGE_DEBUG(("rge_m_stop($%p) done", arg));
811 	mutex_exit(rgep->genlock);
812 }
813 
814 /*
815  *	rge_m_start() -- start transmitting/receiving
816  */
817 static int
818 rge_m_start(void *arg)
819 {
820 	rge_t *rgep = arg;		/* private device info	*/
821 
822 	mutex_enter(rgep->genlock);
823 
824 	/*
825 	 * Clear hw/sw statistics
826 	 */
827 	DMA_ZERO(rgep->dma_area_stats);
828 	bzero(&rgep->stats, sizeof (rge_stats_t));
829 
830 	/*
831 	 * Start processing and record new MAC state
832 	 */
833 	rge_reset(rgep);
834 	rge_start(rgep);
835 	rgep->rge_mac_state = RGE_MAC_STARTED;
836 	RGE_DEBUG(("rge_m_start($%p) done", arg));
837 
838 	mutex_exit(rgep->genlock);
839 
840 	return (0);
841 }
842 
843 /*
844  *	rge_m_unicst_set() -- set the physical network address
845  */
846 static int
847 rge_m_unicst(void *arg, const uint8_t *macaddr)
848 {
849 	rge_t *rgep = arg;		/* private device info	*/
850 
851 	/*
852 	 * Remember the new current address in the driver state
853 	 * Sync the chip's idea of the address too ...
854 	 */
855 	mutex_enter(rgep->genlock);
856 	bcopy(macaddr, rgep->netaddr, ETHERADDRL);
857 	rge_chip_sync(rgep, RGE_SET_MAC);
858 	mutex_exit(rgep->genlock);
859 
860 	return (0);
861 }
862 
863 /*
864  * Compute the index of the required bit in the multicast hash map.
865  * This must mirror the way the hardware actually does it!
866  */
867 static uint32_t
868 rge_hash_index(const uint8_t *mca)
869 {
870 	uint32_t crc = (uint32_t)RGE_HASH_CRC;
871 	uint32_t const POLY = RGE_HASH_POLY;
872 	uint32_t msb;
873 	int bytes;
874 	uchar_t currentbyte;
875 	uint32_t index;
876 	int bit;
877 
878 	for (bytes = 0; bytes < ETHERADDRL; bytes++) {
879 		currentbyte = mca[bytes];
880 		for (bit = 0; bit < 8; bit++) {
881 			msb = crc >> 31;
882 			crc <<= 1;
883 			if (msb ^ (currentbyte & 1))
884 				crc ^= POLY;
885 			currentbyte >>= 1;
886 		}
887 	}
888 	index = crc >> 26;
889 		/* the index value is between 0 and 63(0x3f) */
890 
891 	return (index);
892 }
893 
894 /*
895  *	rge_m_multicst_add() -- enable/disable a multicast address
896  */
897 static int
898 rge_m_multicst(void *arg, boolean_t add, const uint8_t *mca)
899 {
900 	rge_t *rgep = arg;		/* private device info	*/
901 	struct ether_addr *addr;
902 	uint32_t index;
903 	uint32_t reg;
904 	uint8_t *hashp;
905 
906 	mutex_enter(rgep->genlock);
907 	hashp = rgep->mcast_hash;
908 	addr = (struct ether_addr *)mca;
909 	/*
910 	 * Calculate the Multicast address hash index value
911 	 *	Normally, the position of MAR0-MAR7 is
912 	 *	MAR0: offset 0x08, ..., MAR7: offset 0x0F.
913 	 *
914 	 *	For pcie chipset, the position of MAR0-MAR7 is
915 	 *	different from others:
916 	 *	MAR0: offset 0x0F, ..., MAR7: offset 0x08.
917 	 */
918 	index = rge_hash_index(addr->ether_addr_octet);
919 	if (rgep->chipid.is_pcie)
920 		reg = (~(index / RGE_MCAST_NUM)) & 0x7;
921 	else
922 		reg = index / RGE_MCAST_NUM;
923 
924 	if (add) {
925 		if (rgep->mcast_refs[index]++) {
926 			mutex_exit(rgep->genlock);
927 			return (0);
928 		}
929 		hashp[reg] |= 1 << (index % RGE_MCAST_NUM);
930 	} else {
931 		if (--rgep->mcast_refs[index]) {
932 			mutex_exit(rgep->genlock);
933 			return (0);
934 		}
935 		hashp[reg] &= ~ (1 << (index % RGE_MCAST_NUM));
936 	}
937 
938 	/*
939 	 * Set multicast register
940 	 */
941 	rge_chip_sync(rgep, RGE_SET_MUL);
942 
943 	mutex_exit(rgep->genlock);
944 	return (0);
945 }
946 
947 /*
948  * rge_m_promisc() -- set or reset promiscuous mode on the board
949  *
950  *	Program the hardware to enable/disable promiscuous and/or
951  *	receive-all-multicast modes.
952  */
953 static int
954 rge_m_promisc(void *arg, boolean_t on)
955 {
956 	rge_t *rgep = arg;
957 
958 	/*
959 	 * Store MAC layer specified mode and pass to chip layer to update h/w
960 	 */
961 	mutex_enter(rgep->genlock);
962 
963 	if (rgep->promisc == on) {
964 		mutex_exit(rgep->genlock);
965 		return (0);
966 	}
967 	rgep->promisc = on;
968 	rge_chip_sync(rgep, RGE_SET_PROMISC);
969 	RGE_DEBUG(("rge_m_promisc_set($%p) done", arg));
970 	mutex_exit(rgep->genlock);
971 	return (0);
972 }
973 
974 /*
975  * Loopback ioctl code
976  */
977 
978 static lb_property_t loopmodes[] = {
979 	{ normal,	"normal",	RGE_LOOP_NONE		},
980 	{ internal,	"PHY",		RGE_LOOP_INTERNAL_PHY	},
981 	{ internal,	"MAC",		RGE_LOOP_INTERNAL_MAC	}
982 };
983 
984 static enum ioc_reply
985 rge_set_loop_mode(rge_t *rgep, uint32_t mode)
986 {
987 	/*
988 	 * If the mode isn't being changed, there's nothing to do ...
989 	 */
990 	if (mode == rgep->param_loop_mode)
991 		return (IOC_ACK);
992 
993 	/*
994 	 * Validate the requested mode and prepare a suitable message
995 	 * to explain the link down/up cycle that the change will
996 	 * probably induce ...
997 	 */
998 	switch (mode) {
999 	default:
1000 		return (IOC_INVAL);
1001 
1002 	case RGE_LOOP_NONE:
1003 	case RGE_LOOP_INTERNAL_PHY:
1004 	case RGE_LOOP_INTERNAL_MAC:
1005 		break;
1006 	}
1007 
1008 	/*
1009 	 * All OK; tell the caller to reprogram
1010 	 * the PHY and/or MAC for the new mode ...
1011 	 */
1012 	rgep->param_loop_mode = mode;
1013 	return (IOC_RESTART_ACK);
1014 }
1015 
1016 static enum ioc_reply
1017 rge_loop_ioctl(rge_t *rgep, queue_t *wq, mblk_t *mp, struct iocblk *iocp)
1018 {
1019 	lb_info_sz_t *lbsp;
1020 	lb_property_t *lbpp;
1021 	uint32_t *lbmp;
1022 	int cmd;
1023 
1024 	_NOTE(ARGUNUSED(wq))
1025 
1026 	/*
1027 	 * Validate format of ioctl
1028 	 */
1029 	if (mp->b_cont == NULL)
1030 		return (IOC_INVAL);
1031 
1032 	cmd = iocp->ioc_cmd;
1033 	switch (cmd) {
1034 	default:
1035 		/* NOTREACHED */
1036 		rge_error(rgep, "rge_loop_ioctl: invalid cmd 0x%x", cmd);
1037 		return (IOC_INVAL);
1038 
1039 	case LB_GET_INFO_SIZE:
1040 		if (iocp->ioc_count != sizeof (lb_info_sz_t))
1041 			return (IOC_INVAL);
1042 		lbsp = (lb_info_sz_t *)mp->b_cont->b_rptr;
1043 		*lbsp = sizeof (loopmodes);
1044 		return (IOC_REPLY);
1045 
1046 	case LB_GET_INFO:
1047 		if (iocp->ioc_count != sizeof (loopmodes))
1048 			return (IOC_INVAL);
1049 		lbpp = (lb_property_t *)mp->b_cont->b_rptr;
1050 		bcopy(loopmodes, lbpp, sizeof (loopmodes));
1051 		return (IOC_REPLY);
1052 
1053 	case LB_GET_MODE:
1054 		if (iocp->ioc_count != sizeof (uint32_t))
1055 			return (IOC_INVAL);
1056 		lbmp = (uint32_t *)mp->b_cont->b_rptr;
1057 		*lbmp = rgep->param_loop_mode;
1058 		return (IOC_REPLY);
1059 
1060 	case LB_SET_MODE:
1061 		if (iocp->ioc_count != sizeof (uint32_t))
1062 			return (IOC_INVAL);
1063 		lbmp = (uint32_t *)mp->b_cont->b_rptr;
1064 		return (rge_set_loop_mode(rgep, *lbmp));
1065 	}
1066 }
1067 
1068 /*
1069  * Specific rge IOCTLs, the MAC layer handles the generic ones.
1070  */
1071 static void
1072 rge_m_ioctl(void *arg, queue_t *wq, mblk_t *mp)
1073 {
1074 	rge_t *rgep = arg;
1075 	struct iocblk *iocp;
1076 	enum ioc_reply status;
1077 	boolean_t need_privilege;
1078 	int err;
1079 	int cmd;
1080 
1081 	/*
1082 	 * Validate the command before bothering with the mutex ...
1083 	 */
1084 	iocp = (struct iocblk *)mp->b_rptr;
1085 	iocp->ioc_error = 0;
1086 	need_privilege = B_TRUE;
1087 	cmd = iocp->ioc_cmd;
1088 	switch (cmd) {
1089 	default:
1090 		miocnak(wq, mp, 0, EINVAL);
1091 		return;
1092 
1093 	case RGE_MII_READ:
1094 	case RGE_MII_WRITE:
1095 	case RGE_DIAG:
1096 	case RGE_PEEK:
1097 	case RGE_POKE:
1098 	case RGE_PHY_RESET:
1099 	case RGE_SOFT_RESET:
1100 	case RGE_HARD_RESET:
1101 		break;
1102 
1103 	case LB_GET_INFO_SIZE:
1104 	case LB_GET_INFO:
1105 	case LB_GET_MODE:
1106 		need_privilege = B_FALSE;
1107 		/* FALLTHRU */
1108 	case LB_SET_MODE:
1109 		break;
1110 
1111 	case ND_GET:
1112 		need_privilege = B_FALSE;
1113 		/* FALLTHRU */
1114 	case ND_SET:
1115 		break;
1116 	}
1117 
1118 	if (need_privilege) {
1119 		/*
1120 		 * Check for specific net_config privilege
1121 		 */
1122 		err = secpolicy_net_config(iocp->ioc_cr, B_FALSE);
1123 		if (err != 0) {
1124 			miocnak(wq, mp, 0, err);
1125 			return;
1126 		}
1127 	}
1128 
1129 	mutex_enter(rgep->genlock);
1130 
1131 	switch (cmd) {
1132 	default:
1133 		_NOTE(NOTREACHED)
1134 		status = IOC_INVAL;
1135 		break;
1136 
1137 	case RGE_MII_READ:
1138 	case RGE_MII_WRITE:
1139 	case RGE_DIAG:
1140 	case RGE_PEEK:
1141 	case RGE_POKE:
1142 	case RGE_PHY_RESET:
1143 	case RGE_SOFT_RESET:
1144 	case RGE_HARD_RESET:
1145 		status = rge_chip_ioctl(rgep, wq, mp, iocp);
1146 		break;
1147 
1148 	case LB_GET_INFO_SIZE:
1149 	case LB_GET_INFO:
1150 	case LB_GET_MODE:
1151 	case LB_SET_MODE:
1152 		status = rge_loop_ioctl(rgep, wq, mp, iocp);
1153 		break;
1154 
1155 	case ND_GET:
1156 	case ND_SET:
1157 		status = rge_nd_ioctl(rgep, wq, mp, iocp);
1158 		break;
1159 	}
1160 
1161 	/*
1162 	 * Do we need to reprogram the PHY and/or the MAC?
1163 	 * Do it now, while we still have the mutex.
1164 	 *
1165 	 * Note: update the PHY first, 'cos it controls the
1166 	 * speed/duplex parameters that the MAC code uses.
1167 	 */
1168 	switch (status) {
1169 	case IOC_RESTART_REPLY:
1170 	case IOC_RESTART_ACK:
1171 		rge_phy_update(rgep);
1172 		break;
1173 	}
1174 
1175 	mutex_exit(rgep->genlock);
1176 
1177 	/*
1178 	 * Finally, decide how to reply
1179 	 */
1180 	switch (status) {
1181 	default:
1182 	case IOC_INVAL:
1183 		/*
1184 		 * Error, reply with a NAK and EINVAL or the specified error
1185 		 */
1186 		miocnak(wq, mp, 0, iocp->ioc_error == 0 ?
1187 		    EINVAL : iocp->ioc_error);
1188 		break;
1189 
1190 	case IOC_DONE:
1191 		/*
1192 		 * OK, reply already sent
1193 		 */
1194 		break;
1195 
1196 	case IOC_RESTART_ACK:
1197 	case IOC_ACK:
1198 		/*
1199 		 * OK, reply with an ACK
1200 		 */
1201 		miocack(wq, mp, 0, 0);
1202 		break;
1203 
1204 	case IOC_RESTART_REPLY:
1205 	case IOC_REPLY:
1206 		/*
1207 		 * OK, send prepared reply as ACK or NAK
1208 		 */
1209 		mp->b_datap->db_type = iocp->ioc_error == 0 ?
1210 		    M_IOCACK : M_IOCNAK;
1211 		qreply(wq, mp);
1212 		break;
1213 	}
1214 }
1215 
1216 static void
1217 rge_m_resources(void *arg)
1218 {
1219 	rge_t *rgep = arg;
1220 	mac_rx_fifo_t mrf;
1221 
1222 	mutex_enter(rgep->genlock);
1223 
1224 	/*
1225 	 * Register Rx rings as resources and save mac
1226 	 * resource id for future reference
1227 	 */
1228 	mrf.mrf_type = MAC_RX_FIFO;
1229 	mrf.mrf_blank = rge_chip_blank;
1230 	mrf.mrf_arg = (void *)rgep;
1231 	mrf.mrf_normal_blank_time = RGE_RX_INT_TIME;
1232 	mrf.mrf_normal_pkt_count = RGE_RX_INT_PKTS;
1233 	rgep->handle = mac_resource_add(rgep->mh, (mac_resource_t *)&mrf);
1234 
1235 	mutex_exit(rgep->genlock);
1236 }
1237 
1238 /* ARGSUSED */
1239 static boolean_t
1240 rge_m_getcapab(void *arg, mac_capab_t cap, void *cap_data)
1241 {
1242 	switch (cap) {
1243 	case MAC_CAPAB_HCKSUM: {
1244 		uint32_t *hcksum_txflags = cap_data;
1245 		*hcksum_txflags = HCKSUM_INET_FULL_V4 | HCKSUM_IPHDRCKSUM;
1246 		break;
1247 	}
1248 	case MAC_CAPAB_POLL:
1249 		/*
1250 		 * There's nothing for us to fill in, simply returning
1251 		 * B_TRUE stating that we support polling is sufficient.
1252 		 */
1253 		break;
1254 	default:
1255 		return (B_FALSE);
1256 	}
1257 	return (B_TRUE);
1258 }
1259 
1260 /*
1261  * ============ Init MSI/Fixed Interrupt routines ==============
1262  */
1263 
1264 /*
1265  * rge_add_intrs:
1266  *
1267  * Register FIXED or MSI interrupts.
1268  */
1269 static int
1270 rge_add_intrs(rge_t *rgep, int intr_type)
1271 {
1272 	dev_info_t *dip = rgep->devinfo;
1273 	int avail;
1274 	int actual;
1275 	int intr_size;
1276 	int count;
1277 	int i, j;
1278 	int ret;
1279 
1280 	/* Get number of interrupts */
1281 	ret = ddi_intr_get_nintrs(dip, intr_type, &count);
1282 	if ((ret != DDI_SUCCESS) || (count == 0)) {
1283 		rge_error(rgep, "ddi_intr_get_nintrs() failure, ret: %d, "
1284 		    "count: %d", ret, count);
1285 		return (DDI_FAILURE);
1286 	}
1287 
1288 	/* Get number of available interrupts */
1289 	ret = ddi_intr_get_navail(dip, intr_type, &avail);
1290 	if ((ret != DDI_SUCCESS) || (avail == 0)) {
1291 		rge_error(rgep, "ddi_intr_get_navail() failure, "
1292 		    "ret: %d, avail: %d\n", ret, avail);
1293 		return (DDI_FAILURE);
1294 	}
1295 
1296 	/* Allocate an array of interrupt handles */
1297 	intr_size = count * sizeof (ddi_intr_handle_t);
1298 	rgep->htable = kmem_alloc(intr_size, KM_SLEEP);
1299 	rgep->intr_rqst = count;
1300 
1301 	/* Call ddi_intr_alloc() */
1302 	ret = ddi_intr_alloc(dip, rgep->htable, intr_type, 0,
1303 	    count, &actual, DDI_INTR_ALLOC_NORMAL);
1304 	if (ret != DDI_SUCCESS || actual == 0) {
1305 		rge_error(rgep, "ddi_intr_alloc() failed %d\n", ret);
1306 		kmem_free(rgep->htable, intr_size);
1307 		return (DDI_FAILURE);
1308 	}
1309 	if (actual < count) {
1310 		rge_log(rgep, "ddi_intr_alloc() Requested: %d, Received: %d\n",
1311 		    count, actual);
1312 	}
1313 	rgep->intr_cnt = actual;
1314 
1315 	/*
1316 	 * Get priority for first msi, assume remaining are all the same
1317 	 */
1318 	if ((ret = ddi_intr_get_pri(rgep->htable[0], &rgep->intr_pri)) !=
1319 	    DDI_SUCCESS) {
1320 		rge_error(rgep, "ddi_intr_get_pri() failed %d\n", ret);
1321 		/* Free already allocated intr */
1322 		for (i = 0; i < actual; i++) {
1323 			(void) ddi_intr_free(rgep->htable[i]);
1324 		}
1325 		kmem_free(rgep->htable, intr_size);
1326 		return (DDI_FAILURE);
1327 	}
1328 
1329 	/* Test for high level mutex */
1330 	if (rgep->intr_pri >= ddi_intr_get_hilevel_pri()) {
1331 		rge_error(rgep, "rge_add_intrs:"
1332 		    "Hi level interrupt not supported");
1333 		for (i = 0; i < actual; i++)
1334 			(void) ddi_intr_free(rgep->htable[i]);
1335 		kmem_free(rgep->htable, intr_size);
1336 		return (DDI_FAILURE);
1337 	}
1338 
1339 	/* Call ddi_intr_add_handler() */
1340 	for (i = 0; i < actual; i++) {
1341 		if ((ret = ddi_intr_add_handler(rgep->htable[i], rge_intr,
1342 		    (caddr_t)rgep, (caddr_t)(uintptr_t)i)) != DDI_SUCCESS) {
1343 			rge_error(rgep, "ddi_intr_add_handler() "
1344 			    "failed %d\n", ret);
1345 			/* Remove already added intr */
1346 			for (j = 0; j < i; j++)
1347 				(void) ddi_intr_remove_handler(rgep->htable[j]);
1348 			/* Free already allocated intr */
1349 			for (i = 0; i < actual; i++) {
1350 				(void) ddi_intr_free(rgep->htable[i]);
1351 			}
1352 			kmem_free(rgep->htable, intr_size);
1353 			return (DDI_FAILURE);
1354 		}
1355 	}
1356 
1357 	if ((ret = ddi_intr_get_cap(rgep->htable[0], &rgep->intr_cap))
1358 	    != DDI_SUCCESS) {
1359 		rge_error(rgep, "ddi_intr_get_cap() failed %d\n", ret);
1360 		for (i = 0; i < actual; i++) {
1361 			(void) ddi_intr_remove_handler(rgep->htable[i]);
1362 			(void) ddi_intr_free(rgep->htable[i]);
1363 		}
1364 		kmem_free(rgep->htable, intr_size);
1365 		return (DDI_FAILURE);
1366 	}
1367 
1368 	return (DDI_SUCCESS);
1369 }
1370 
1371 /*
1372  * rge_rem_intrs:
1373  *
1374  * Unregister FIXED or MSI interrupts
1375  */
1376 static void
1377 rge_rem_intrs(rge_t *rgep)
1378 {
1379 	int i;
1380 
1381 	/* Disable all interrupts */
1382 	if (rgep->intr_cap & DDI_INTR_FLAG_BLOCK) {
1383 		/* Call ddi_intr_block_disable() */
1384 		(void) ddi_intr_block_disable(rgep->htable, rgep->intr_cnt);
1385 	} else {
1386 		for (i = 0; i < rgep->intr_cnt; i++) {
1387 			(void) ddi_intr_disable(rgep->htable[i]);
1388 		}
1389 	}
1390 
1391 	/* Call ddi_intr_remove_handler() */
1392 	for (i = 0; i < rgep->intr_cnt; i++) {
1393 		(void) ddi_intr_remove_handler(rgep->htable[i]);
1394 		(void) ddi_intr_free(rgep->htable[i]);
1395 	}
1396 
1397 	kmem_free(rgep->htable, rgep->intr_rqst * sizeof (ddi_intr_handle_t));
1398 }
1399 
1400 /*
1401  * ========== Per-instance setup/teardown code ==========
1402  */
1403 
1404 #undef	RGE_DBG
1405 #define	RGE_DBG		RGE_DBG_INIT	/* debug flag for this code	*/
1406 
1407 static void
1408 rge_unattach(rge_t *rgep)
1409 {
1410 	/*
1411 	 * Flag that no more activity may be initiated
1412 	 */
1413 	rgep->progress &= ~PROGRESS_READY;
1414 	rgep->rge_mac_state = RGE_MAC_UNATTACH;
1415 
1416 	/*
1417 	 * Quiesce the PHY and MAC (leave it reset but still powered).
1418 	 * Clean up and free all RGE data structures
1419 	 */
1420 	if (rgep->periodic_id != NULL) {
1421 		ddi_periodic_delete(rgep->periodic_id);
1422 		rgep->periodic_id = NULL;
1423 	}
1424 
1425 	if (rgep->progress & PROGRESS_KSTATS)
1426 		rge_fini_kstats(rgep);
1427 
1428 	if (rgep->progress & PROGRESS_PHY)
1429 		(void) rge_phy_reset(rgep);
1430 
1431 	if (rgep->progress & PROGRESS_INIT) {
1432 		mutex_enter(rgep->genlock);
1433 		(void) rge_chip_reset(rgep);
1434 		mutex_exit(rgep->genlock);
1435 		rge_fini_rings(rgep);
1436 	}
1437 
1438 	if (rgep->progress & PROGRESS_INTR) {
1439 		rge_rem_intrs(rgep);
1440 		mutex_destroy(rgep->rc_lock);
1441 		mutex_destroy(rgep->rx_lock);
1442 		mutex_destroy(rgep->tc_lock);
1443 		mutex_destroy(rgep->tx_lock);
1444 		rw_destroy(rgep->errlock);
1445 		mutex_destroy(rgep->genlock);
1446 	}
1447 
1448 	if (rgep->progress & PROGRESS_FACTOTUM)
1449 		(void) ddi_intr_remove_softint(rgep->factotum_hdl);
1450 
1451 	if (rgep->progress & PROGRESS_RESCHED)
1452 		(void) ddi_intr_remove_softint(rgep->resched_hdl);
1453 
1454 	if (rgep->progress & PROGRESS_NDD)
1455 		rge_nd_cleanup(rgep);
1456 
1457 	rge_free_bufs(rgep);
1458 
1459 	if (rgep->progress & PROGRESS_REGS)
1460 		ddi_regs_map_free(&rgep->io_handle);
1461 
1462 	if (rgep->progress & PROGRESS_CFG)
1463 		pci_config_teardown(&rgep->cfg_handle);
1464 
1465 	ddi_remove_minor_node(rgep->devinfo, NULL);
1466 	kmem_free(rgep, sizeof (*rgep));
1467 }
1468 
1469 static int
1470 rge_resume(dev_info_t *devinfo)
1471 {
1472 	rge_t *rgep;			/* Our private data	*/
1473 	chip_id_t *cidp;
1474 	chip_id_t chipid;
1475 
1476 	rgep = ddi_get_driver_private(devinfo);
1477 	if (rgep == NULL)
1478 		return (DDI_FAILURE);
1479 
1480 	/*
1481 	 * Refuse to resume if the data structures aren't consistent
1482 	 */
1483 	if (rgep->devinfo != devinfo)
1484 		return (DDI_FAILURE);
1485 
1486 	/*
1487 	 * Read chip ID & set up config space command register(s)
1488 	 * Refuse to resume if the chip has changed its identity!
1489 	 */
1490 	cidp = &rgep->chipid;
1491 	rge_chip_cfg_init(rgep, &chipid);
1492 	if (chipid.vendor != cidp->vendor)
1493 		return (DDI_FAILURE);
1494 	if (chipid.device != cidp->device)
1495 		return (DDI_FAILURE);
1496 	if (chipid.revision != cidp->revision)
1497 		return (DDI_FAILURE);
1498 
1499 	/*
1500 	 * All OK, reinitialise h/w & kick off NEMO scheduling
1501 	 */
1502 	mutex_enter(rgep->genlock);
1503 	rge_restart(rgep);
1504 	mutex_exit(rgep->genlock);
1505 	return (DDI_SUCCESS);
1506 }
1507 
1508 
1509 /*
1510  * attach(9E) -- Attach a device to the system
1511  *
1512  * Called once for each board successfully probed.
1513  */
1514 static int
1515 rge_attach(dev_info_t *devinfo, ddi_attach_cmd_t cmd)
1516 {
1517 	rge_t *rgep;			/* Our private data	*/
1518 	mac_register_t *macp;
1519 	chip_id_t *cidp;
1520 	int intr_types;
1521 	caddr_t regs;
1522 	int instance;
1523 	int i;
1524 	int err;
1525 
1526 	/*
1527 	 * we don't support high level interrupts in the driver
1528 	 */
1529 	if (ddi_intr_hilevel(devinfo, 0) != 0) {
1530 		cmn_err(CE_WARN,
1531 		    "rge_attach -- unsupported high level interrupt");
1532 		return (DDI_FAILURE);
1533 	}
1534 
1535 	instance = ddi_get_instance(devinfo);
1536 	RGE_GTRACE(("rge_attach($%p, %d) instance %d",
1537 	    (void *)devinfo, cmd, instance));
1538 	RGE_BRKPT(NULL, "rge_attach");
1539 
1540 	switch (cmd) {
1541 	default:
1542 		return (DDI_FAILURE);
1543 
1544 	case DDI_RESUME:
1545 		return (rge_resume(devinfo));
1546 
1547 	case DDI_ATTACH:
1548 		break;
1549 	}
1550 
1551 	rgep = kmem_zalloc(sizeof (*rgep), KM_SLEEP);
1552 	ddi_set_driver_private(devinfo, rgep);
1553 	rgep->devinfo = devinfo;
1554 
1555 	/*
1556 	 * Initialize more fields in RGE private data
1557 	 */
1558 	rgep->rge_mac_state = RGE_MAC_ATTACH;
1559 	rgep->debug = ddi_prop_get_int(DDI_DEV_T_ANY, devinfo,
1560 	    DDI_PROP_DONTPASS, debug_propname, rge_debug);
1561 	rgep->default_mtu = ddi_prop_get_int(DDI_DEV_T_ANY, devinfo,
1562 	    DDI_PROP_DONTPASS, mtu_propname, ETHERMTU);
1563 	rgep->msi_enable = ddi_prop_get_int(DDI_DEV_T_ANY, devinfo,
1564 	    DDI_PROP_DONTPASS, msi_propname, B_TRUE);
1565 	(void) snprintf(rgep->ifname, sizeof (rgep->ifname), "%s%d",
1566 	    RGE_DRIVER_NAME, instance);
1567 
1568 	/*
1569 	 * Map config space registers
1570 	 * Read chip ID & set up config space command register(s)
1571 	 *
1572 	 * Note: this leaves the chip accessible by Memory Space
1573 	 * accesses, but with interrupts and Bus Mastering off.
1574 	 * This should ensure that nothing untoward will happen
1575 	 * if it has been left active by the (net-)bootloader.
1576 	 * We'll re-enable Bus Mastering once we've reset the chip,
1577 	 * and allow interrupts only when everything else is set up.
1578 	 */
1579 	err = pci_config_setup(devinfo, &rgep->cfg_handle);
1580 	if (err != DDI_SUCCESS) {
1581 		rge_problem(rgep, "pci_config_setup() failed");
1582 		goto attach_fail;
1583 	}
1584 	rgep->progress |= PROGRESS_CFG;
1585 	cidp = &rgep->chipid;
1586 	bzero(cidp, sizeof (*cidp));
1587 	rge_chip_cfg_init(rgep, cidp);
1588 
1589 	/*
1590 	 * Map operating registers
1591 	 */
1592 	err = ddi_regs_map_setup(devinfo, 1, &regs,
1593 	    0, 0, &rge_reg_accattr, &rgep->io_handle);
1594 	if (err != DDI_SUCCESS) {
1595 		rge_problem(rgep, "ddi_regs_map_setup() failed");
1596 		goto attach_fail;
1597 	}
1598 	rgep->io_regs = regs;
1599 	rgep->progress |= PROGRESS_REGS;
1600 
1601 	/*
1602 	 * Characterise the device, so we know its requirements.
1603 	 * Then allocate the appropriate TX and RX descriptors & buffers.
1604 	 */
1605 	rge_chip_ident(rgep);
1606 	err = rge_alloc_bufs(rgep);
1607 	if (err != DDI_SUCCESS) {
1608 		rge_problem(rgep, "DMA buffer allocation failed");
1609 		goto attach_fail;
1610 	}
1611 
1612 	/*
1613 	 * Register NDD-tweakable parameters
1614 	 */
1615 	if (rge_nd_init(rgep)) {
1616 		rge_problem(rgep, "rge_nd_init() failed");
1617 		goto attach_fail;
1618 	}
1619 	rgep->progress |= PROGRESS_NDD;
1620 
1621 	/*
1622 	 * Add the softint handlers:
1623 	 *
1624 	 * Both of these handlers are used to avoid restrictions on the
1625 	 * context and/or mutexes required for some operations.  In
1626 	 * particular, the hardware interrupt handler and its subfunctions
1627 	 * can detect a number of conditions that we don't want to handle
1628 	 * in that context or with that set of mutexes held.  So, these
1629 	 * softints are triggered instead:
1630 	 *
1631 	 * the <resched> softint is triggered if if we have previously
1632 	 * had to refuse to send a packet because of resource shortage
1633 	 * (we've run out of transmit buffers), but the send completion
1634 	 * interrupt handler has now detected that more buffers have
1635 	 * become available.
1636 	 *
1637 	 * the <factotum> is triggered if the h/w interrupt handler
1638 	 * sees the <link state changed> or <error> bits in the status
1639 	 * block.  It's also triggered periodically to poll the link
1640 	 * state, just in case we aren't getting link status change
1641 	 * interrupts ...
1642 	 */
1643 	err = ddi_intr_add_softint(devinfo, &rgep->resched_hdl,
1644 	    DDI_INTR_SOFTPRI_MIN, rge_reschedule, (caddr_t)rgep);
1645 	if (err != DDI_SUCCESS) {
1646 		rge_problem(rgep, "ddi_intr_add_softint() failed");
1647 		goto attach_fail;
1648 	}
1649 	rgep->progress |= PROGRESS_RESCHED;
1650 	err = ddi_intr_add_softint(devinfo, &rgep->factotum_hdl,
1651 	    DDI_INTR_SOFTPRI_MIN, rge_chip_factotum, (caddr_t)rgep);
1652 	if (err != DDI_SUCCESS) {
1653 		rge_problem(rgep, "ddi_intr_add_softint() failed");
1654 		goto attach_fail;
1655 	}
1656 	rgep->progress |= PROGRESS_FACTOTUM;
1657 
1658 	/*
1659 	 * Get supported interrupt types
1660 	 */
1661 	if (ddi_intr_get_supported_types(devinfo, &intr_types)
1662 	    != DDI_SUCCESS) {
1663 		rge_error(rgep, "ddi_intr_get_supported_types failed\n");
1664 		goto attach_fail;
1665 	}
1666 
1667 	/*
1668 	 * Add the h/w interrupt handler and initialise mutexes
1669 	 */
1670 	if ((intr_types & DDI_INTR_TYPE_MSI) && rgep->msi_enable) {
1671 		if (rge_add_intrs(rgep, DDI_INTR_TYPE_MSI) != DDI_SUCCESS) {
1672 			rge_error(rgep, "MSI registration failed, "
1673 			    "trying FIXED interrupt type\n");
1674 		} else {
1675 			rge_log(rgep, "Using MSI interrupt type\n");
1676 			rgep->intr_type = DDI_INTR_TYPE_MSI;
1677 			rgep->progress |= PROGRESS_INTR;
1678 		}
1679 	}
1680 	if (!(rgep->progress & PROGRESS_INTR) &&
1681 	    (intr_types & DDI_INTR_TYPE_FIXED)) {
1682 		if (rge_add_intrs(rgep, DDI_INTR_TYPE_FIXED) != DDI_SUCCESS) {
1683 			rge_error(rgep, "FIXED interrupt "
1684 			    "registration failed\n");
1685 			goto attach_fail;
1686 		}
1687 		rge_log(rgep, "Using FIXED interrupt type\n");
1688 		rgep->intr_type = DDI_INTR_TYPE_FIXED;
1689 		rgep->progress |= PROGRESS_INTR;
1690 	}
1691 	if (!(rgep->progress & PROGRESS_INTR)) {
1692 		rge_error(rgep, "No interrupts registered\n");
1693 		goto attach_fail;
1694 	}
1695 	mutex_init(rgep->genlock, NULL, MUTEX_DRIVER,
1696 	    DDI_INTR_PRI(rgep->intr_pri));
1697 	rw_init(rgep->errlock, NULL, RW_DRIVER,
1698 	    DDI_INTR_PRI(rgep->intr_pri));
1699 	mutex_init(rgep->tx_lock, NULL, MUTEX_DRIVER,
1700 	    DDI_INTR_PRI(rgep->intr_pri));
1701 	mutex_init(rgep->tc_lock, NULL, MUTEX_DRIVER,
1702 	    DDI_INTR_PRI(rgep->intr_pri));
1703 	mutex_init(rgep->rx_lock, NULL, MUTEX_DRIVER,
1704 	    DDI_INTR_PRI(rgep->intr_pri));
1705 	mutex_init(rgep->rc_lock, NULL, MUTEX_DRIVER,
1706 	    DDI_INTR_PRI(rgep->intr_pri));
1707 
1708 	/*
1709 	 * Initialize rings
1710 	 */
1711 	err = rge_init_rings(rgep);
1712 	if (err != DDI_SUCCESS) {
1713 		rge_problem(rgep, "rge_init_rings() failed");
1714 		goto attach_fail;
1715 	}
1716 	rgep->progress |= PROGRESS_INIT;
1717 
1718 	/*
1719 	 * Now that mutex locks are initialized, enable interrupts.
1720 	 */
1721 	if (rgep->intr_cap & DDI_INTR_FLAG_BLOCK) {
1722 		/* Call ddi_intr_block_enable() for MSI interrupts */
1723 		(void) ddi_intr_block_enable(rgep->htable, rgep->intr_cnt);
1724 	} else {
1725 		/* Call ddi_intr_enable for MSI or FIXED interrupts */
1726 		for (i = 0; i < rgep->intr_cnt; i++) {
1727 			(void) ddi_intr_enable(rgep->htable[i]);
1728 		}
1729 	}
1730 
1731 	/*
1732 	 * Initialise link state variables
1733 	 * Stop, reset & reinitialise the chip.
1734 	 * Initialise the (internal) PHY.
1735 	 */
1736 	rgep->param_link_up = LINK_STATE_UNKNOWN;
1737 
1738 	/*
1739 	 * Reset chip & rings to initial state; also reset address
1740 	 * filtering, promiscuity, loopback mode.
1741 	 */
1742 	mutex_enter(rgep->genlock);
1743 	(void) rge_chip_reset(rgep);
1744 	rge_chip_sync(rgep, RGE_GET_MAC);
1745 	bzero(rgep->mcast_hash, sizeof (rgep->mcast_hash));
1746 	bzero(rgep->mcast_refs, sizeof (rgep->mcast_refs));
1747 	rgep->promisc = B_FALSE;
1748 	rgep->param_loop_mode = RGE_LOOP_NONE;
1749 	mutex_exit(rgep->genlock);
1750 	rge_phy_init(rgep);
1751 	rgep->progress |= PROGRESS_PHY;
1752 
1753 	/*
1754 	 * Create & initialise named kstats
1755 	 */
1756 	rge_init_kstats(rgep, instance);
1757 	rgep->progress |= PROGRESS_KSTATS;
1758 
1759 	if ((macp = mac_alloc(MAC_VERSION)) == NULL)
1760 		goto attach_fail;
1761 	macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
1762 	macp->m_driver = rgep;
1763 	macp->m_dip = devinfo;
1764 	macp->m_src_addr = rgep->netaddr;
1765 	macp->m_callbacks = &rge_m_callbacks;
1766 	macp->m_min_sdu = 0;
1767 	macp->m_max_sdu = rgep->default_mtu;
1768 	macp->m_margin = VLAN_TAGSZ;
1769 
1770 	/*
1771 	 * Finally, we're ready to register ourselves with the MAC layer
1772 	 * interface; if this succeeds, we're all ready to start()
1773 	 */
1774 	err = mac_register(macp, &rgep->mh);
1775 	mac_free(macp);
1776 	if (err != 0)
1777 		goto attach_fail;
1778 
1779 	/*
1780 	 * Register a periodical handler.
1781 	 * reg_chip_cyclic() is invoked in kernel context.
1782 	 */
1783 	rgep->periodic_id = ddi_periodic_add(rge_chip_cyclic, rgep,
1784 	    RGE_CYCLIC_PERIOD, DDI_IPL_0);
1785 
1786 	rgep->progress |= PROGRESS_READY;
1787 	return (DDI_SUCCESS);
1788 
1789 attach_fail:
1790 	rge_unattach(rgep);
1791 	return (DDI_FAILURE);
1792 }
1793 
1794 /*
1795  *	rge_suspend() -- suspend transmit/receive for powerdown
1796  */
1797 static int
1798 rge_suspend(rge_t *rgep)
1799 {
1800 	/*
1801 	 * Stop processing and idle (powerdown) the PHY ...
1802 	 */
1803 	mutex_enter(rgep->genlock);
1804 	rge_stop(rgep);
1805 	mutex_exit(rgep->genlock);
1806 
1807 	return (DDI_SUCCESS);
1808 }
1809 
1810 /*
1811  * detach(9E) -- Detach a device from the system
1812  */
1813 static int
1814 rge_detach(dev_info_t *devinfo, ddi_detach_cmd_t cmd)
1815 {
1816 	rge_t *rgep;
1817 
1818 	RGE_GTRACE(("rge_detach($%p, %d)", (void *)devinfo, cmd));
1819 
1820 	rgep = ddi_get_driver_private(devinfo);
1821 
1822 	switch (cmd) {
1823 	default:
1824 		return (DDI_FAILURE);
1825 
1826 	case DDI_SUSPEND:
1827 		return (rge_suspend(rgep));
1828 
1829 	case DDI_DETACH:
1830 		break;
1831 	}
1832 
1833 	/*
1834 	 * If there is any posted buffer, the driver should reject to be
1835 	 * detached. Need notice upper layer to release them.
1836 	 */
1837 	if (!(rgep->chip_flags & CHIP_FLAG_FORCE_BCOPY) &&
1838 	    rgep->rx_free != RGE_BUF_SLOTS)
1839 		return (DDI_FAILURE);
1840 
1841 	/*
1842 	 * Unregister from the MAC layer subsystem.  This can fail, in
1843 	 * particular if there are DLPI style-2 streams still open -
1844 	 * in which case we just return failure without shutting
1845 	 * down chip operations.
1846 	 */
1847 	if (mac_unregister(rgep->mh) != 0)
1848 		return (DDI_FAILURE);
1849 
1850 	/*
1851 	 * All activity stopped, so we can clean up & exit
1852 	 */
1853 	rge_unattach(rgep);
1854 	return (DDI_SUCCESS);
1855 }
1856 
1857 
1858 /*
1859  * ========== Module Loading Data & Entry Points ==========
1860  */
1861 
1862 #undef	RGE_DBG
1863 #define	RGE_DBG		RGE_DBG_INIT	/* debug flag for this code	*/
1864 DDI_DEFINE_STREAM_OPS(rge_dev_ops, nulldev, nulldev, rge_attach, rge_detach,
1865     nodev, NULL, D_MP, NULL);
1866 
1867 static struct modldrv rge_modldrv = {
1868 	&mod_driverops,		/* Type of module.  This one is a driver */
1869 	rge_ident,		/* short description */
1870 	&rge_dev_ops		/* driver specific ops */
1871 };
1872 
1873 static struct modlinkage modlinkage = {
1874 	MODREV_1, (void *)&rge_modldrv, NULL
1875 };
1876 
1877 
1878 int
1879 _info(struct modinfo *modinfop)
1880 {
1881 	return (mod_info(&modlinkage, modinfop));
1882 }
1883 
1884 int
1885 _init(void)
1886 {
1887 	int status;
1888 
1889 	mac_init_ops(&rge_dev_ops, "rge");
1890 	status = mod_install(&modlinkage);
1891 	if (status == DDI_SUCCESS)
1892 		mutex_init(rge_log_mutex, NULL, MUTEX_DRIVER, NULL);
1893 	else
1894 		mac_fini_ops(&rge_dev_ops);
1895 
1896 	return (status);
1897 }
1898 
1899 int
1900 _fini(void)
1901 {
1902 	int status;
1903 
1904 	status = mod_remove(&modlinkage);
1905 	if (status == DDI_SUCCESS) {
1906 		mac_fini_ops(&rge_dev_ops);
1907 		mutex_destroy(rge_log_mutex);
1908 	}
1909 	return (status);
1910 }
1911