xref: /titanic_41/usr/src/uts/i86pc/os/ddi_impl.c (revision bbb1277b6ec1b0daad4e3ed1a2b891d3e2ece2eb)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  * PC specific DDI implementation
29  */
30 #include <sys/types.h>
31 #include <sys/autoconf.h>
32 #include <sys/avintr.h>
33 #include <sys/bootconf.h>
34 #include <sys/conf.h>
35 #include <sys/cpuvar.h>
36 #include <sys/ddi_impldefs.h>
37 #include <sys/ddi_subrdefs.h>
38 #include <sys/ethernet.h>
39 #include <sys/fp.h>
40 #include <sys/instance.h>
41 #include <sys/kmem.h>
42 #include <sys/machsystm.h>
43 #include <sys/modctl.h>
44 #include <sys/promif.h>
45 #include <sys/prom_plat.h>
46 #include <sys/sunndi.h>
47 #include <sys/ndi_impldefs.h>
48 #include <sys/ddi_impldefs.h>
49 #include <sys/sysmacros.h>
50 #include <sys/systeminfo.h>
51 #include <sys/utsname.h>
52 #include <sys/atomic.h>
53 #include <sys/spl.h>
54 #include <sys/archsystm.h>
55 #include <vm/seg_kmem.h>
56 #include <sys/ontrap.h>
57 #include <sys/fm/protocol.h>
58 #include <sys/ramdisk.h>
59 #include <sys/sunndi.h>
60 #include <sys/vmem.h>
61 #include <sys/pci_impl.h>
62 #if defined(__xpv)
63 #include <sys/hypervisor.h>
64 #endif
65 #include <sys/mach_intr.h>
66 #include <vm/hat_i86.h>
67 #include <sys/x86_archext.h>
68 
69 /*
70  * DDI Boot Configuration
71  */
72 
73 /*
74  * Platform drivers on this platform
75  */
76 char *platform_module_list[] = {
77 	"acpippm",
78 	"ppm",
79 	(char *)0
80 };
81 
82 /* pci bus resource maps */
83 struct pci_bus_resource *pci_bus_res;
84 
85 size_t dma_max_copybuf_size = 0x101000;		/* 1M + 4K */
86 
87 uint64_t ramdisk_start, ramdisk_end;
88 
89 int pseudo_isa = 0;
90 
91 /*
92  * Forward declarations
93  */
94 static int getlongprop_buf();
95 static void get_boot_properties(void);
96 static void impl_bus_initialprobe(void);
97 static void impl_bus_reprobe(void);
98 
99 static int poke_mem(peekpoke_ctlops_t *in_args);
100 static int peek_mem(peekpoke_ctlops_t *in_args);
101 
102 static int kmem_override_cache_attrs(caddr_t, size_t, uint_t);
103 
104 #define	CTGENTRIES	15
105 
106 static struct ctgas {
107 	struct ctgas	*ctg_next;
108 	int		ctg_index;
109 	void		*ctg_addr[CTGENTRIES];
110 	size_t		ctg_size[CTGENTRIES];
111 } ctglist;
112 
113 static kmutex_t		ctgmutex;
114 #define	CTGLOCK()	mutex_enter(&ctgmutex)
115 #define	CTGUNLOCK()	mutex_exit(&ctgmutex)
116 
117 /*
118  * Minimum pfn value of page_t's put on the free list.  This is to simplify
119  * support of ddi dma memory requests which specify small, non-zero addr_lo
120  * values.
121  *
122  * The default value of 2, which corresponds to the only known non-zero addr_lo
123  * value used, means a single page will be sacrificed (pfn typically starts
124  * at 1).  ddiphysmin can be set to 0 to disable. It cannot be set above 0x100
125  * otherwise mp startup panics.
126  */
127 pfn_t	ddiphysmin = 2;
128 
129 static void
130 check_driver_disable(void)
131 {
132 	int proplen = 128;
133 	char *prop_name;
134 	char *drv_name, *propval;
135 	major_t major;
136 
137 	prop_name = kmem_alloc(proplen, KM_SLEEP);
138 	for (major = 0; major < devcnt; major++) {
139 		drv_name = ddi_major_to_name(major);
140 		if (drv_name == NULL)
141 			continue;
142 		(void) snprintf(prop_name, proplen, "disable-%s", drv_name);
143 		if (ddi_prop_lookup_string(DDI_DEV_T_ANY, ddi_root_node(),
144 		    DDI_PROP_DONTPASS, prop_name, &propval) == DDI_SUCCESS) {
145 			if (strcmp(propval, "true") == 0) {
146 				devnamesp[major].dn_flags |= DN_DRIVER_REMOVED;
147 				cmn_err(CE_NOTE, "driver %s disabled",
148 				    drv_name);
149 			}
150 			ddi_prop_free(propval);
151 		}
152 	}
153 	kmem_free(prop_name, proplen);
154 }
155 
156 
157 /*
158  * Configure the hardware on the system.
159  * Called before the rootfs is mounted
160  */
161 void
162 configure(void)
163 {
164 	extern void i_ddi_init_root();
165 
166 #if defined(__i386)
167 	extern int fpu_pentium_fdivbug;
168 #endif	/* __i386 */
169 	extern int fpu_ignored;
170 
171 	/*
172 	 * Determine if an FPU is attached
173 	 */
174 
175 	fpu_probe();
176 
177 #if defined(__i386)
178 	if (fpu_pentium_fdivbug) {
179 		printf("\
180 FP hardware exhibits Pentium floating point divide problem\n");
181 	}
182 #endif	/* __i386 */
183 
184 	if (fpu_ignored) {
185 		printf("FP hardware will not be used\n");
186 	} else if (!fpu_exists) {
187 		printf("No FPU in configuration\n");
188 	}
189 
190 	/*
191 	 * Initialize devices on the machine.
192 	 * Uses configuration tree built by the PROMs to determine what
193 	 * is present, and builds a tree of prototype dev_info nodes
194 	 * corresponding to the hardware which identified itself.
195 	 */
196 
197 	/*
198 	 * Initialize root node.
199 	 */
200 	i_ddi_init_root();
201 
202 	/* reprogram devices not set up by firmware (BIOS) */
203 	impl_bus_reprobe();
204 
205 	/*
206 	 * attach the isa nexus to get ACPI resource usage
207 	 * isa is "kind of" a pseudo node
208 	 */
209 #if defined(__xpv)
210 	if (DOMAIN_IS_INITDOMAIN(xen_info)) {
211 		if (pseudo_isa)
212 			(void) i_ddi_attach_pseudo_node("isa");
213 		else
214 			(void) i_ddi_attach_hw_nodes("isa");
215 	}
216 #else
217 	if (pseudo_isa)
218 		(void) i_ddi_attach_pseudo_node("isa");
219 	else
220 		(void) i_ddi_attach_hw_nodes("isa");
221 #endif
222 }
223 
224 /*
225  * The "status" property indicates the operational status of a device.
226  * If this property is present, the value is a string indicating the
227  * status of the device as follows:
228  *
229  *	"okay"		operational.
230  *	"disabled"	not operational, but might become operational.
231  *	"fail"		not operational because a fault has been detected,
232  *			and it is unlikely that the device will become
233  *			operational without repair. no additional details
234  *			are available.
235  *	"fail-xxx"	not operational because a fault has been detected,
236  *			and it is unlikely that the device will become
237  *			operational without repair. "xxx" is additional
238  *			human-readable information about the particular
239  *			fault condition that was detected.
240  *
241  * The absence of this property means that the operational status is
242  * unknown or okay.
243  *
244  * This routine checks the status property of the specified device node
245  * and returns 0 if the operational status indicates failure, and 1 otherwise.
246  *
247  * The property may exist on plug-in cards the existed before IEEE 1275-1994.
248  * And, in that case, the property may not even be a string. So we carefully
249  * check for the value "fail", in the beginning of the string, noting
250  * the property length.
251  */
252 int
253 status_okay(int id, char *buf, int buflen)
254 {
255 	char status_buf[OBP_MAXPROPNAME];
256 	char *bufp = buf;
257 	int len = buflen;
258 	int proplen;
259 	static const char *status = "status";
260 	static const char *fail = "fail";
261 	int fail_len = (int)strlen(fail);
262 
263 	/*
264 	 * Get the proplen ... if it's smaller than "fail",
265 	 * or doesn't exist ... then we don't care, since
266 	 * the value can't begin with the char string "fail".
267 	 *
268 	 * NB: proplen, if it's a string, includes the NULL in the
269 	 * the size of the property, and fail_len does not.
270 	 */
271 	proplen = prom_getproplen((pnode_t)id, (caddr_t)status);
272 	if (proplen <= fail_len)	/* nonexistant or uninteresting len */
273 		return (1);
274 
275 	/*
276 	 * if a buffer was provided, use it
277 	 */
278 	if ((buf == (char *)NULL) || (buflen <= 0)) {
279 		bufp = status_buf;
280 		len = sizeof (status_buf);
281 	}
282 	*bufp = (char)0;
283 
284 	/*
285 	 * Get the property into the buffer, to the extent of the buffer,
286 	 * and in case the buffer is smaller than the property size,
287 	 * NULL terminate the buffer. (This handles the case where
288 	 * a buffer was passed in and the caller wants to print the
289 	 * value, but the buffer was too small).
290 	 */
291 	(void) prom_bounded_getprop((pnode_t)id, (caddr_t)status,
292 	    (caddr_t)bufp, len);
293 	*(bufp + len - 1) = (char)0;
294 
295 	/*
296 	 * If the value begins with the char string "fail",
297 	 * then it means the node is failed. We don't care
298 	 * about any other values. We assume the node is ok
299 	 * although it might be 'disabled'.
300 	 */
301 	if (strncmp(bufp, fail, fail_len) == 0)
302 		return (0);
303 
304 	return (1);
305 }
306 
307 /*
308  * Check the status of the device node passed as an argument.
309  *
310  *	if ((status is OKAY) || (status is DISABLED))
311  *		return DDI_SUCCESS
312  *	else
313  *		print a warning and return DDI_FAILURE
314  */
315 /*ARGSUSED1*/
316 int
317 check_status(int id, char *name, dev_info_t *parent)
318 {
319 	char status_buf[64];
320 	char devtype_buf[OBP_MAXPROPNAME];
321 	int retval = DDI_FAILURE;
322 
323 	/*
324 	 * is the status okay?
325 	 */
326 	if (status_okay(id, status_buf, sizeof (status_buf)))
327 		return (DDI_SUCCESS);
328 
329 	/*
330 	 * a status property indicating bad memory will be associated
331 	 * with a node which has a "device_type" property with a value of
332 	 * "memory-controller". in this situation, return DDI_SUCCESS
333 	 */
334 	if (getlongprop_buf(id, OBP_DEVICETYPE, devtype_buf,
335 	    sizeof (devtype_buf)) > 0) {
336 		if (strcmp(devtype_buf, "memory-controller") == 0)
337 			retval = DDI_SUCCESS;
338 	}
339 
340 	/*
341 	 * print the status property information
342 	 */
343 	cmn_err(CE_WARN, "status '%s' for '%s'", status_buf, name);
344 	return (retval);
345 }
346 
347 /*ARGSUSED*/
348 uint_t
349 softlevel1(caddr_t arg1, caddr_t arg2)
350 {
351 	softint();
352 	return (1);
353 }
354 
355 /*
356  * Allow for implementation specific correction of PROM property values.
357  */
358 
359 /*ARGSUSED*/
360 void
361 impl_fix_props(dev_info_t *dip, dev_info_t *ch_dip, char *name, int len,
362     caddr_t buffer)
363 {
364 	/*
365 	 * There are no adjustments needed in this implementation.
366 	 */
367 }
368 
369 static int
370 getlongprop_buf(int id, char *name, char *buf, int maxlen)
371 {
372 	int size;
373 
374 	size = prom_getproplen((pnode_t)id, name);
375 	if (size <= 0 || (size > maxlen - 1))
376 		return (-1);
377 
378 	if (-1 == prom_getprop((pnode_t)id, name, buf))
379 		return (-1);
380 
381 	if (strcmp("name", name) == 0) {
382 		if (buf[size - 1] != '\0') {
383 			buf[size] = '\0';
384 			size += 1;
385 		}
386 	}
387 
388 	return (size);
389 }
390 
391 static int
392 get_prop_int_array(dev_info_t *di, char *pname, int **pval, uint_t *plen)
393 {
394 	int ret;
395 
396 	if ((ret = ddi_prop_lookup_int_array(DDI_DEV_T_ANY, di,
397 	    DDI_PROP_DONTPASS, pname, pval, plen))
398 	    == DDI_PROP_SUCCESS) {
399 		*plen = (*plen) * (sizeof (int));
400 	}
401 	return (ret);
402 }
403 
404 
405 /*
406  * Node Configuration
407  */
408 
409 struct prop_ispec {
410 	uint_t	pri, vec;
411 };
412 
413 /*
414  * For the x86, we're prepared to claim that the interrupt string
415  * is in the form of a list of <ipl,vec> specifications.
416  */
417 
418 #define	VEC_MIN	1
419 #define	VEC_MAX	255
420 
421 static int
422 impl_xlate_intrs(dev_info_t *child, int *in,
423     struct ddi_parent_private_data *pdptr)
424 {
425 	size_t size;
426 	int n;
427 	struct intrspec *new;
428 	caddr_t got_prop;
429 	int *inpri;
430 	int got_len;
431 	extern int ignore_hardware_nodes;	/* force flag from ddi_impl.c */
432 
433 	static char bad_intr_fmt[] =
434 	    "bad interrupt spec from %s%d - ipl %d, irq %d\n";
435 
436 	/*
437 	 * determine if the driver is expecting the new style "interrupts"
438 	 * property which just contains the IRQ, or the old style which
439 	 * contains pairs of <IPL,IRQ>.  if it is the new style, we always
440 	 * assign IPL 5 unless an "interrupt-priorities" property exists.
441 	 * in that case, the "interrupt-priorities" property contains the
442 	 * IPL values that match, one for one, the IRQ values in the
443 	 * "interrupts" property.
444 	 */
445 	inpri = NULL;
446 	if ((ddi_getprop(DDI_DEV_T_ANY, child, DDI_PROP_DONTPASS,
447 	    "ignore-hardware-nodes", -1) != -1) || ignore_hardware_nodes) {
448 		/* the old style "interrupts" property... */
449 
450 		/*
451 		 * The list consists of <ipl,vec> elements
452 		 */
453 		if ((n = (*in++ >> 1)) < 1)
454 			return (DDI_FAILURE);
455 
456 		pdptr->par_nintr = n;
457 		size = n * sizeof (struct intrspec);
458 		new = pdptr->par_intr = kmem_zalloc(size, KM_SLEEP);
459 
460 		while (n--) {
461 			int level = *in++;
462 			int vec = *in++;
463 
464 			if (level < 1 || level > MAXIPL ||
465 			    vec < VEC_MIN || vec > VEC_MAX) {
466 				cmn_err(CE_CONT, bad_intr_fmt,
467 				    DEVI(child)->devi_name,
468 				    DEVI(child)->devi_instance, level, vec);
469 				goto broken;
470 			}
471 			new->intrspec_pri = level;
472 			if (vec != 2)
473 				new->intrspec_vec = vec;
474 			else
475 				/*
476 				 * irq 2 on the PC bus is tied to irq 9
477 				 * on ISA, EISA and MicroChannel
478 				 */
479 				new->intrspec_vec = 9;
480 			new++;
481 		}
482 
483 		return (DDI_SUCCESS);
484 	} else {
485 		/* the new style "interrupts" property... */
486 
487 		/*
488 		 * The list consists of <vec> elements
489 		 */
490 		if ((n = (*in++)) < 1)
491 			return (DDI_FAILURE);
492 
493 		pdptr->par_nintr = n;
494 		size = n * sizeof (struct intrspec);
495 		new = pdptr->par_intr = kmem_zalloc(size, KM_SLEEP);
496 
497 		/* XXX check for "interrupt-priorities" property... */
498 		if (ddi_getlongprop(DDI_DEV_T_ANY, child, DDI_PROP_DONTPASS,
499 		    "interrupt-priorities", (caddr_t)&got_prop, &got_len)
500 		    == DDI_PROP_SUCCESS) {
501 			if (n != (got_len / sizeof (int))) {
502 				cmn_err(CE_CONT,
503 				    "bad interrupt-priorities length"
504 				    " from %s%d: expected %d, got %d\n",
505 				    DEVI(child)->devi_name,
506 				    DEVI(child)->devi_instance, n,
507 				    (int)(got_len / sizeof (int)));
508 				goto broken;
509 			}
510 			inpri = (int *)got_prop;
511 		}
512 
513 		while (n--) {
514 			int level;
515 			int vec = *in++;
516 
517 			if (inpri == NULL)
518 				level = 5;
519 			else
520 				level = *inpri++;
521 
522 			if (level < 1 || level > MAXIPL ||
523 			    vec < VEC_MIN || vec > VEC_MAX) {
524 				cmn_err(CE_CONT, bad_intr_fmt,
525 				    DEVI(child)->devi_name,
526 				    DEVI(child)->devi_instance, level, vec);
527 				goto broken;
528 			}
529 			new->intrspec_pri = level;
530 			if (vec != 2)
531 				new->intrspec_vec = vec;
532 			else
533 				/*
534 				 * irq 2 on the PC bus is tied to irq 9
535 				 * on ISA, EISA and MicroChannel
536 				 */
537 				new->intrspec_vec = 9;
538 			new++;
539 		}
540 
541 		if (inpri != NULL)
542 			kmem_free(got_prop, got_len);
543 		return (DDI_SUCCESS);
544 	}
545 
546 broken:
547 	kmem_free(pdptr->par_intr, size);
548 	pdptr->par_intr = NULL;
549 	pdptr->par_nintr = 0;
550 	if (inpri != NULL)
551 		kmem_free(got_prop, got_len);
552 
553 	return (DDI_FAILURE);
554 }
555 
556 /*
557  * Create a ddi_parent_private_data structure from the ddi properties of
558  * the dev_info node.
559  *
560  * The "reg" and either an "intr" or "interrupts" properties are required
561  * if the driver wishes to create mappings or field interrupts on behalf
562  * of the device.
563  *
564  * The "reg" property is assumed to be a list of at least one triple
565  *
566  *	<bustype, address, size>*1
567  *
568  * The "intr" property is assumed to be a list of at least one duple
569  *
570  *	<SPARC ipl, vector#>*1
571  *
572  * The "interrupts" property is assumed to be a list of at least one
573  * n-tuples that describes the interrupt capabilities of the bus the device
574  * is connected to.  For SBus, this looks like
575  *
576  *	<SBus-level>*1
577  *
578  * (This property obsoletes the 'intr' property).
579  *
580  * The "ranges" property is optional.
581  */
582 void
583 make_ddi_ppd(dev_info_t *child, struct ddi_parent_private_data **ppd)
584 {
585 	struct ddi_parent_private_data *pdptr;
586 	int n;
587 	int *reg_prop, *rng_prop, *intr_prop, *irupts_prop;
588 	uint_t reg_len, rng_len, intr_len, irupts_len;
589 
590 	*ppd = pdptr = kmem_zalloc(sizeof (*pdptr), KM_SLEEP);
591 
592 	/*
593 	 * Handle the 'reg' property.
594 	 */
595 	if ((get_prop_int_array(child, "reg", &reg_prop, &reg_len) ==
596 	    DDI_PROP_SUCCESS) && (reg_len != 0)) {
597 		pdptr->par_nreg = reg_len / (int)sizeof (struct regspec);
598 		pdptr->par_reg = (struct regspec *)reg_prop;
599 	}
600 
601 	/*
602 	 * See if I have a range (adding one where needed - this
603 	 * means to add one for sbus node in sun4c, when romvec > 0,
604 	 * if no range is already defined in the PROM node.
605 	 * (Currently no sun4c PROMS define range properties,
606 	 * but they should and may in the future.)  For the SBus
607 	 * node, the range is defined by the SBus reg property.
608 	 */
609 	if (get_prop_int_array(child, "ranges", &rng_prop, &rng_len)
610 	    == DDI_PROP_SUCCESS) {
611 		pdptr->par_nrng = rng_len / (int)(sizeof (struct rangespec));
612 		pdptr->par_rng = (struct rangespec *)rng_prop;
613 	}
614 
615 	/*
616 	 * Handle the 'intr' and 'interrupts' properties
617 	 */
618 
619 	/*
620 	 * For backwards compatibility
621 	 * we first look for the 'intr' property for the device.
622 	 */
623 	if (get_prop_int_array(child, "intr", &intr_prop, &intr_len)
624 	    != DDI_PROP_SUCCESS) {
625 		intr_len = 0;
626 	}
627 
628 	/*
629 	 * If we're to support bus adapters and future platforms cleanly,
630 	 * we need to support the generalized 'interrupts' property.
631 	 */
632 	if (get_prop_int_array(child, "interrupts", &irupts_prop,
633 	    &irupts_len) != DDI_PROP_SUCCESS) {
634 		irupts_len = 0;
635 	} else if (intr_len != 0) {
636 		/*
637 		 * If both 'intr' and 'interrupts' are defined,
638 		 * then 'interrupts' wins and we toss the 'intr' away.
639 		 */
640 		ddi_prop_free((void *)intr_prop);
641 		intr_len = 0;
642 	}
643 
644 	if (intr_len != 0) {
645 
646 		/*
647 		 * Translate the 'intr' property into an array
648 		 * an array of struct intrspec's.  There's not really
649 		 * very much to do here except copy what's out there.
650 		 */
651 
652 		struct intrspec *new;
653 		struct prop_ispec *l;
654 
655 		n = pdptr->par_nintr = intr_len / sizeof (struct prop_ispec);
656 		l = (struct prop_ispec *)intr_prop;
657 		pdptr->par_intr =
658 		    new = kmem_zalloc(n * sizeof (struct intrspec), KM_SLEEP);
659 		while (n--) {
660 			new->intrspec_pri = l->pri;
661 			new->intrspec_vec = l->vec;
662 			new++;
663 			l++;
664 		}
665 		ddi_prop_free((void *)intr_prop);
666 
667 	} else if ((n = irupts_len) != 0) {
668 		size_t size;
669 		int *out;
670 
671 		/*
672 		 * Translate the 'interrupts' property into an array
673 		 * of intrspecs for the rest of the DDI framework to
674 		 * toy with.  Only our ancestors really know how to
675 		 * do this, so ask 'em.  We massage the 'interrupts'
676 		 * property so that it is pre-pended by a count of
677 		 * the number of integers in the argument.
678 		 */
679 		size = sizeof (int) + n;
680 		out = kmem_alloc(size, KM_SLEEP);
681 		*out = n / sizeof (int);
682 		bcopy(irupts_prop, out + 1, (size_t)n);
683 		ddi_prop_free((void *)irupts_prop);
684 		if (impl_xlate_intrs(child, out, pdptr) != DDI_SUCCESS) {
685 			cmn_err(CE_CONT,
686 			    "Unable to translate 'interrupts' for %s%d\n",
687 			    DEVI(child)->devi_binding_name,
688 			    DEVI(child)->devi_instance);
689 		}
690 		kmem_free(out, size);
691 	}
692 }
693 
694 /*
695  * Name a child
696  */
697 static int
698 impl_sunbus_name_child(dev_info_t *child, char *name, int namelen)
699 {
700 	/*
701 	 * Fill in parent-private data and this function returns to us
702 	 * an indication if it used "registers" to fill in the data.
703 	 */
704 	if (ddi_get_parent_data(child) == NULL) {
705 		struct ddi_parent_private_data *pdptr;
706 		make_ddi_ppd(child, &pdptr);
707 		ddi_set_parent_data(child, pdptr);
708 	}
709 
710 	name[0] = '\0';
711 	if (sparc_pd_getnreg(child) > 0) {
712 		(void) snprintf(name, namelen, "%x,%x",
713 		    (uint_t)sparc_pd_getreg(child, 0)->regspec_bustype,
714 		    (uint_t)sparc_pd_getreg(child, 0)->regspec_addr);
715 	}
716 
717 	return (DDI_SUCCESS);
718 }
719 
720 /*
721  * Called from the bus_ctl op of sunbus (sbus, obio, etc) nexus drivers
722  * to implement the DDI_CTLOPS_INITCHILD operation.  That is, it names
723  * the children of sun busses based on the reg spec.
724  *
725  * Handles the following properties (in make_ddi_ppd):
726  *	Property		value
727  *	  Name			type
728  *	reg		register spec
729  *	intr		old-form interrupt spec
730  *	interrupts	new (bus-oriented) interrupt spec
731  *	ranges		range spec
732  */
733 int
734 impl_ddi_sunbus_initchild(dev_info_t *child)
735 {
736 	char name[MAXNAMELEN];
737 	void impl_ddi_sunbus_removechild(dev_info_t *);
738 
739 	/*
740 	 * Name the child, also makes parent private data
741 	 */
742 	(void) impl_sunbus_name_child(child, name, MAXNAMELEN);
743 	ddi_set_name_addr(child, name);
744 
745 	/*
746 	 * Attempt to merge a .conf node; if successful, remove the
747 	 * .conf node.
748 	 */
749 	if ((ndi_dev_is_persistent_node(child) == 0) &&
750 	    (ndi_merge_node(child, impl_sunbus_name_child) == DDI_SUCCESS)) {
751 		/*
752 		 * Return failure to remove node
753 		 */
754 		impl_ddi_sunbus_removechild(child);
755 		return (DDI_FAILURE);
756 	}
757 	return (DDI_SUCCESS);
758 }
759 
760 void
761 impl_free_ddi_ppd(dev_info_t *dip)
762 {
763 	struct ddi_parent_private_data *pdptr;
764 	size_t n;
765 
766 	if ((pdptr = ddi_get_parent_data(dip)) == NULL)
767 		return;
768 
769 	if ((n = (size_t)pdptr->par_nintr) != 0)
770 		/*
771 		 * Note that kmem_free is used here (instead of
772 		 * ddi_prop_free) because the contents of the
773 		 * property were placed into a separate buffer and
774 		 * mucked with a bit before being stored in par_intr.
775 		 * The actual return value from the prop lookup
776 		 * was freed with ddi_prop_free previously.
777 		 */
778 		kmem_free(pdptr->par_intr, n * sizeof (struct intrspec));
779 
780 	if ((n = (size_t)pdptr->par_nrng) != 0)
781 		ddi_prop_free((void *)pdptr->par_rng);
782 
783 	if ((n = pdptr->par_nreg) != 0)
784 		ddi_prop_free((void *)pdptr->par_reg);
785 
786 	kmem_free(pdptr, sizeof (*pdptr));
787 	ddi_set_parent_data(dip, NULL);
788 }
789 
790 void
791 impl_ddi_sunbus_removechild(dev_info_t *dip)
792 {
793 	impl_free_ddi_ppd(dip);
794 	ddi_set_name_addr(dip, NULL);
795 	/*
796 	 * Strip the node to properly convert it back to prototype form
797 	 */
798 	impl_rem_dev_props(dip);
799 }
800 
801 /*
802  * DDI Interrupt
803  */
804 
805 /*
806  * turn this on to force isa, eisa, and mca device to ignore the new
807  * hardware nodes in the device tree (normally turned on only for
808  * drivers that need it by setting the property "ignore-hardware-nodes"
809  * in their driver.conf file).
810  *
811  * 7/31/96 -- Turned off globally.  Leaving variable in for the moment
812  *		as safety valve.
813  */
814 int ignore_hardware_nodes = 0;
815 
816 /*
817  * Local data
818  */
819 static struct impl_bus_promops *impl_busp;
820 
821 
822 /*
823  * New DDI interrupt framework
824  */
825 
826 /*
827  * i_ddi_intr_ops:
828  *
829  * This is the interrupt operator function wrapper for the bus function
830  * bus_intr_op.
831  */
832 int
833 i_ddi_intr_ops(dev_info_t *dip, dev_info_t *rdip, ddi_intr_op_t op,
834     ddi_intr_handle_impl_t *hdlp, void * result)
835 {
836 	dev_info_t	*pdip = (dev_info_t *)DEVI(dip)->devi_parent;
837 	int		ret = DDI_FAILURE;
838 
839 	/* request parent to process this interrupt op */
840 	if (NEXUS_HAS_INTR_OP(pdip))
841 		ret = (*(DEVI(pdip)->devi_ops->devo_bus_ops->bus_intr_op))(
842 		    pdip, rdip, op, hdlp, result);
843 	else
844 		cmn_err(CE_WARN, "Failed to process interrupt "
845 		    "for %s%d due to down-rev nexus driver %s%d",
846 		    ddi_get_name(rdip), ddi_get_instance(rdip),
847 		    ddi_get_name(pdip), ddi_get_instance(pdip));
848 	return (ret);
849 }
850 
851 /*
852  * i_ddi_add_softint - allocate and add a soft interrupt to the system
853  */
854 int
855 i_ddi_add_softint(ddi_softint_hdl_impl_t *hdlp)
856 {
857 	int ret;
858 
859 	/* add soft interrupt handler */
860 	ret = add_avsoftintr((void *)hdlp, hdlp->ih_pri, hdlp->ih_cb_func,
861 	    DEVI(hdlp->ih_dip)->devi_name, hdlp->ih_cb_arg1, hdlp->ih_cb_arg2);
862 	return (ret ? DDI_SUCCESS : DDI_FAILURE);
863 }
864 
865 
866 void
867 i_ddi_remove_softint(ddi_softint_hdl_impl_t *hdlp)
868 {
869 	(void) rem_avsoftintr((void *)hdlp, hdlp->ih_pri, hdlp->ih_cb_func);
870 }
871 
872 
873 extern void (*setsoftint)(int, struct av_softinfo *);
874 extern boolean_t av_check_softint_pending(struct av_softinfo *, boolean_t);
875 
876 int
877 i_ddi_trigger_softint(ddi_softint_hdl_impl_t *hdlp, void *arg2)
878 {
879 	if (av_check_softint_pending(hdlp->ih_pending, B_FALSE))
880 		return (DDI_EPENDING);
881 
882 	update_avsoftintr_args((void *)hdlp, hdlp->ih_pri, arg2);
883 
884 	(*setsoftint)(hdlp->ih_pri, hdlp->ih_pending);
885 	return (DDI_SUCCESS);
886 }
887 
888 /*
889  * i_ddi_set_softint_pri:
890  *
891  * The way this works is that it first tries to add a softint vector
892  * at the new priority in hdlp. If that succeeds; then it removes the
893  * existing softint vector at the old priority.
894  */
895 int
896 i_ddi_set_softint_pri(ddi_softint_hdl_impl_t *hdlp, uint_t old_pri)
897 {
898 	int ret;
899 
900 	/*
901 	 * If a softint is pending at the old priority then fail the request.
902 	 */
903 	if (av_check_softint_pending(hdlp->ih_pending, B_TRUE))
904 		return (DDI_FAILURE);
905 
906 	ret = av_softint_movepri((void *)hdlp, old_pri);
907 	return (ret ? DDI_SUCCESS : DDI_FAILURE);
908 }
909 
910 void
911 i_ddi_alloc_intr_phdl(ddi_intr_handle_impl_t *hdlp)
912 {
913 	hdlp->ih_private = (void *)kmem_zalloc(sizeof (ihdl_plat_t), KM_SLEEP);
914 }
915 
916 void
917 i_ddi_free_intr_phdl(ddi_intr_handle_impl_t *hdlp)
918 {
919 	kmem_free(hdlp->ih_private, sizeof (ihdl_plat_t));
920 	hdlp->ih_private = NULL;
921 }
922 
923 int
924 i_ddi_get_intx_nintrs(dev_info_t *dip)
925 {
926 	struct ddi_parent_private_data *pdp;
927 
928 	if ((pdp = ddi_get_parent_data(dip)) == NULL)
929 		return (0);
930 
931 	return (pdp->par_nintr);
932 }
933 
934 /*
935  * DDI Memory/DMA
936  */
937 
938 /*
939  * Support for allocating DMAable memory to implement
940  * ddi_dma_mem_alloc(9F) interface.
941  */
942 
943 #define	KA_ALIGN_SHIFT	7
944 #define	KA_ALIGN	(1 << KA_ALIGN_SHIFT)
945 #define	KA_NCACHE	(PAGESHIFT + 1 - KA_ALIGN_SHIFT)
946 
947 /*
948  * Dummy DMA attribute template for kmem_io[].kmem_io_attr.  We only
949  * care about addr_lo, addr_hi, and align.  addr_hi will be dynamically set.
950  */
951 
952 static ddi_dma_attr_t kmem_io_attr = {
953 	DMA_ATTR_V0,
954 	0x0000000000000000ULL,		/* dma_attr_addr_lo */
955 	0x0000000000000000ULL,		/* dma_attr_addr_hi */
956 	0x00ffffff,
957 	0x1000,				/* dma_attr_align */
958 	1, 1, 0xffffffffULL, 0xffffffffULL, 0x1, 1, 0
959 };
960 
961 /* kmem io memory ranges and indices */
962 enum {
963 	IO_4P, IO_64G, IO_4G, IO_2G, IO_1G, IO_512M,
964 	IO_256M, IO_128M, IO_64M, IO_32M, IO_16M, MAX_MEM_RANGES
965 };
966 
967 static struct {
968 	vmem_t		*kmem_io_arena;
969 	kmem_cache_t	*kmem_io_cache[KA_NCACHE];
970 	ddi_dma_attr_t	kmem_io_attr;
971 } kmem_io[MAX_MEM_RANGES];
972 
973 static int kmem_io_idx;		/* index of first populated kmem_io[] */
974 
975 static page_t *
976 page_create_io_wrapper(void *addr, size_t len, int vmflag, void *arg)
977 {
978 	extern page_t *page_create_io(vnode_t *, u_offset_t, uint_t,
979 	    uint_t, struct as *, caddr_t, ddi_dma_attr_t *);
980 
981 	return (page_create_io(&kvp, (u_offset_t)(uintptr_t)addr, len,
982 	    PG_EXCL | ((vmflag & VM_NOSLEEP) ? 0 : PG_WAIT), &kas, addr, arg));
983 }
984 
985 #ifdef __xpv
986 static void
987 segkmem_free_io(vmem_t *vmp, void * ptr, size_t size)
988 {
989 	extern void page_destroy_io(page_t *);
990 	segkmem_xfree(vmp, ptr, size, page_destroy_io);
991 }
992 #endif
993 
994 static void *
995 segkmem_alloc_io_4P(vmem_t *vmp, size_t size, int vmflag)
996 {
997 	return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
998 	    page_create_io_wrapper, &kmem_io[IO_4P].kmem_io_attr));
999 }
1000 
1001 static void *
1002 segkmem_alloc_io_64G(vmem_t *vmp, size_t size, int vmflag)
1003 {
1004 	return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
1005 	    page_create_io_wrapper, &kmem_io[IO_64G].kmem_io_attr));
1006 }
1007 
1008 static void *
1009 segkmem_alloc_io_4G(vmem_t *vmp, size_t size, int vmflag)
1010 {
1011 	return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
1012 	    page_create_io_wrapper, &kmem_io[IO_4G].kmem_io_attr));
1013 }
1014 
1015 static void *
1016 segkmem_alloc_io_2G(vmem_t *vmp, size_t size, int vmflag)
1017 {
1018 	return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
1019 	    page_create_io_wrapper, &kmem_io[IO_2G].kmem_io_attr));
1020 }
1021 
1022 static void *
1023 segkmem_alloc_io_1G(vmem_t *vmp, size_t size, int vmflag)
1024 {
1025 	return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
1026 	    page_create_io_wrapper, &kmem_io[IO_1G].kmem_io_attr));
1027 }
1028 
1029 static void *
1030 segkmem_alloc_io_512M(vmem_t *vmp, size_t size, int vmflag)
1031 {
1032 	return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
1033 	    page_create_io_wrapper, &kmem_io[IO_512M].kmem_io_attr));
1034 }
1035 
1036 static void *
1037 segkmem_alloc_io_256M(vmem_t *vmp, size_t size, int vmflag)
1038 {
1039 	return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
1040 	    page_create_io_wrapper, &kmem_io[IO_256M].kmem_io_attr));
1041 }
1042 
1043 static void *
1044 segkmem_alloc_io_128M(vmem_t *vmp, size_t size, int vmflag)
1045 {
1046 	return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
1047 	    page_create_io_wrapper, &kmem_io[IO_128M].kmem_io_attr));
1048 }
1049 
1050 static void *
1051 segkmem_alloc_io_64M(vmem_t *vmp, size_t size, int vmflag)
1052 {
1053 	return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
1054 	    page_create_io_wrapper, &kmem_io[IO_64M].kmem_io_attr));
1055 }
1056 
1057 static void *
1058 segkmem_alloc_io_32M(vmem_t *vmp, size_t size, int vmflag)
1059 {
1060 	return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
1061 	    page_create_io_wrapper, &kmem_io[IO_32M].kmem_io_attr));
1062 }
1063 
1064 static void *
1065 segkmem_alloc_io_16M(vmem_t *vmp, size_t size, int vmflag)
1066 {
1067 	return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
1068 	    page_create_io_wrapper, &kmem_io[IO_16M].kmem_io_attr));
1069 }
1070 
1071 struct {
1072 	uint64_t	io_limit;
1073 	char		*io_name;
1074 	void		*(*io_alloc)(vmem_t *, size_t, int);
1075 	int		io_initial;	/* kmem_io_init during startup */
1076 } io_arena_params[MAX_MEM_RANGES] = {
1077 	{0x000fffffffffffffULL,	"kmem_io_4P",	segkmem_alloc_io_4P,	1},
1078 	{0x0000000fffffffffULL,	"kmem_io_64G",	segkmem_alloc_io_64G,	0},
1079 	{0x00000000ffffffffULL,	"kmem_io_4G",	segkmem_alloc_io_4G,	1},
1080 	{0x000000007fffffffULL,	"kmem_io_2G",	segkmem_alloc_io_2G,	1},
1081 	{0x000000003fffffffULL,	"kmem_io_1G",	segkmem_alloc_io_1G,	0},
1082 	{0x000000001fffffffULL,	"kmem_io_512M",	segkmem_alloc_io_512M,	0},
1083 	{0x000000000fffffffULL,	"kmem_io_256M",	segkmem_alloc_io_256M,	0},
1084 	{0x0000000007ffffffULL,	"kmem_io_128M",	segkmem_alloc_io_128M,	0},
1085 	{0x0000000003ffffffULL,	"kmem_io_64M",	segkmem_alloc_io_64M,	0},
1086 	{0x0000000001ffffffULL,	"kmem_io_32M",	segkmem_alloc_io_32M,	0},
1087 	{0x0000000000ffffffULL,	"kmem_io_16M",	segkmem_alloc_io_16M,	1}
1088 };
1089 
1090 void
1091 kmem_io_init(int a)
1092 {
1093 	int	c;
1094 	char name[40];
1095 
1096 	kmem_io[a].kmem_io_arena = vmem_create(io_arena_params[a].io_name,
1097 	    NULL, 0, PAGESIZE, io_arena_params[a].io_alloc,
1098 #ifdef __xpv
1099 	    segkmem_free_io,
1100 #else
1101 	    segkmem_free,
1102 #endif
1103 	    heap_arena, 0, VM_SLEEP);
1104 
1105 	for (c = 0; c < KA_NCACHE; c++) {
1106 		size_t size = KA_ALIGN << c;
1107 		(void) sprintf(name, "%s_%lu",
1108 		    io_arena_params[a].io_name, size);
1109 		kmem_io[a].kmem_io_cache[c] = kmem_cache_create(name,
1110 		    size, size, NULL, NULL, NULL, NULL,
1111 		    kmem_io[a].kmem_io_arena, 0);
1112 	}
1113 }
1114 
1115 /*
1116  * Return the index of the highest memory range for addr.
1117  */
1118 static int
1119 kmem_io_index(uint64_t addr)
1120 {
1121 	int n;
1122 
1123 	for (n = kmem_io_idx; n < MAX_MEM_RANGES; n++) {
1124 		if (kmem_io[n].kmem_io_attr.dma_attr_addr_hi <= addr) {
1125 			if (kmem_io[n].kmem_io_arena == NULL)
1126 				kmem_io_init(n);
1127 			return (n);
1128 		}
1129 	}
1130 	panic("kmem_io_index: invalid addr - must be at least 16m");
1131 
1132 	/*NOTREACHED*/
1133 }
1134 
1135 /*
1136  * Return the index of the next kmem_io populated memory range
1137  * after curindex.
1138  */
1139 static int
1140 kmem_io_index_next(int curindex)
1141 {
1142 	int n;
1143 
1144 	for (n = curindex + 1; n < MAX_MEM_RANGES; n++) {
1145 		if (kmem_io[n].kmem_io_arena)
1146 			return (n);
1147 	}
1148 	return (-1);
1149 }
1150 
1151 /*
1152  * allow kmem to be mapped in with different PTE cache attribute settings.
1153  * Used by i_ddi_mem_alloc()
1154  */
1155 int
1156 kmem_override_cache_attrs(caddr_t kva, size_t size, uint_t order)
1157 {
1158 	uint_t hat_flags;
1159 	caddr_t kva_end;
1160 	uint_t hat_attr;
1161 	pfn_t pfn;
1162 
1163 	if (hat_getattr(kas.a_hat, kva, &hat_attr) == -1) {
1164 		return (-1);
1165 	}
1166 
1167 	hat_attr &= ~HAT_ORDER_MASK;
1168 	hat_attr |= order | HAT_NOSYNC;
1169 	hat_flags = HAT_LOAD_LOCK;
1170 
1171 	kva_end = (caddr_t)(((uintptr_t)kva + size + PAGEOFFSET) &
1172 	    (uintptr_t)PAGEMASK);
1173 	kva = (caddr_t)((uintptr_t)kva & (uintptr_t)PAGEMASK);
1174 
1175 	while (kva < kva_end) {
1176 		pfn = hat_getpfnum(kas.a_hat, kva);
1177 		hat_unload(kas.a_hat, kva, PAGESIZE, HAT_UNLOAD_UNLOCK);
1178 		hat_devload(kas.a_hat, kva, PAGESIZE, pfn, hat_attr, hat_flags);
1179 		kva += MMU_PAGESIZE;
1180 	}
1181 
1182 	return (0);
1183 }
1184 
1185 void
1186 ka_init(void)
1187 {
1188 	int a;
1189 	paddr_t maxphysaddr;
1190 #if !defined(__xpv)
1191 	extern pfn_t physmax;
1192 
1193 	maxphysaddr = mmu_ptob((paddr_t)physmax) + MMU_PAGEOFFSET;
1194 #else
1195 	maxphysaddr = mmu_ptob((paddr_t)HYPERVISOR_memory_op(
1196 	    XENMEM_maximum_ram_page, NULL)) + MMU_PAGEOFFSET;
1197 #endif
1198 
1199 	ASSERT(maxphysaddr <= io_arena_params[0].io_limit);
1200 
1201 	for (a = 0; a < MAX_MEM_RANGES; a++) {
1202 		if (maxphysaddr >= io_arena_params[a + 1].io_limit) {
1203 			if (maxphysaddr > io_arena_params[a + 1].io_limit)
1204 				io_arena_params[a].io_limit = maxphysaddr;
1205 			else
1206 				a++;
1207 			break;
1208 		}
1209 	}
1210 	kmem_io_idx = a;
1211 
1212 	for (; a < MAX_MEM_RANGES; a++) {
1213 		kmem_io[a].kmem_io_attr = kmem_io_attr;
1214 		kmem_io[a].kmem_io_attr.dma_attr_addr_hi =
1215 		    io_arena_params[a].io_limit;
1216 		/*
1217 		 * initialize kmem_io[] arena/cache corresponding to
1218 		 * maxphysaddr and to the "common" io memory ranges that
1219 		 * have io_initial set to a non-zero value.
1220 		 */
1221 		if (io_arena_params[a].io_initial || a == kmem_io_idx)
1222 			kmem_io_init(a);
1223 	}
1224 }
1225 
1226 /*
1227  * put contig address/size
1228  */
1229 static void *
1230 putctgas(void *addr, size_t size)
1231 {
1232 	struct ctgas	*ctgp = &ctglist;
1233 	int		i;
1234 
1235 	CTGLOCK();
1236 	do {
1237 		if ((i = ctgp->ctg_index) < CTGENTRIES) {
1238 			ctgp->ctg_addr[i] = addr;
1239 			ctgp->ctg_size[i] = size;
1240 			ctgp->ctg_index++;
1241 			break;
1242 		}
1243 		if (!ctgp->ctg_next)
1244 			ctgp->ctg_next = kmem_zalloc(sizeof (struct ctgas),
1245 			    KM_NOSLEEP);
1246 		ctgp = ctgp->ctg_next;
1247 	} while (ctgp);
1248 
1249 	CTGUNLOCK();
1250 	return (ctgp);
1251 }
1252 
1253 /*
1254  * get contig size by addr
1255  */
1256 static size_t
1257 getctgsz(void *addr)
1258 {
1259 	struct ctgas	*ctgp = &ctglist;
1260 	int		i, j;
1261 	size_t		sz;
1262 
1263 	ASSERT(addr);
1264 	CTGLOCK();
1265 
1266 	while (ctgp) {
1267 		for (i = 0; i < ctgp->ctg_index; i++) {
1268 			if (addr != ctgp->ctg_addr[i])
1269 				continue;
1270 
1271 			sz = ctgp->ctg_size[i];
1272 			j = --ctgp->ctg_index;
1273 			if (i != j) {
1274 				ctgp->ctg_size[i] = ctgp->ctg_size[j];
1275 				ctgp->ctg_addr[i] = ctgp->ctg_addr[j];
1276 			}
1277 			CTGUNLOCK();
1278 			return (sz);
1279 		}
1280 		ctgp = ctgp->ctg_next;
1281 	}
1282 
1283 	CTGUNLOCK();
1284 	return (0);
1285 }
1286 
1287 /*
1288  * contig_alloc:
1289  *
1290  *	allocates contiguous memory to satisfy the 'size' and dma attributes
1291  *	specified in 'attr'.
1292  *
1293  *	Not all of memory need to be physically contiguous if the
1294  *	scatter-gather list length is greater than 1.
1295  */
1296 
1297 /*ARGSUSED*/
1298 void *
1299 contig_alloc(size_t size, ddi_dma_attr_t *attr, uintptr_t align, int cansleep)
1300 {
1301 	pgcnt_t		pgcnt = btopr(size);
1302 	size_t		asize = pgcnt * PAGESIZE;
1303 	page_t		*ppl;
1304 	int		pflag;
1305 	void		*addr;
1306 
1307 	extern page_t *page_create_io(vnode_t *, u_offset_t, uint_t,
1308 	    uint_t, struct as *, caddr_t, ddi_dma_attr_t *);
1309 
1310 	/* segkmem_xalloc */
1311 
1312 	if (align <= PAGESIZE)
1313 		addr = vmem_alloc(heap_arena, asize,
1314 		    (cansleep) ? VM_SLEEP : VM_NOSLEEP);
1315 	else
1316 		addr = vmem_xalloc(heap_arena, asize, align, 0, 0, NULL, NULL,
1317 		    (cansleep) ? VM_SLEEP : VM_NOSLEEP);
1318 	if (addr) {
1319 		ASSERT(!((uintptr_t)addr & (align - 1)));
1320 
1321 		if (page_resv(pgcnt, (cansleep) ? KM_SLEEP : KM_NOSLEEP) == 0) {
1322 			vmem_free(heap_arena, addr, asize);
1323 			return (NULL);
1324 		}
1325 		pflag = PG_EXCL;
1326 
1327 		if (cansleep)
1328 			pflag |= PG_WAIT;
1329 
1330 		/* 4k req gets from freelists rather than pfn search */
1331 		if (pgcnt > 1 || align > PAGESIZE)
1332 			pflag |= PG_PHYSCONTIG;
1333 
1334 		ppl = page_create_io(&kvp, (u_offset_t)(uintptr_t)addr,
1335 		    asize, pflag, &kas, (caddr_t)addr, attr);
1336 
1337 		if (!ppl) {
1338 			vmem_free(heap_arena, addr, asize);
1339 			page_unresv(pgcnt);
1340 			return (NULL);
1341 		}
1342 
1343 		while (ppl != NULL) {
1344 			page_t	*pp = ppl;
1345 			page_sub(&ppl, pp);
1346 			ASSERT(page_iolock_assert(pp));
1347 			page_io_unlock(pp);
1348 			page_downgrade(pp);
1349 			hat_memload(kas.a_hat, (caddr_t)(uintptr_t)pp->p_offset,
1350 			    pp, (PROT_ALL & ~PROT_USER) |
1351 			    HAT_NOSYNC, HAT_LOAD_LOCK);
1352 		}
1353 	}
1354 	return (addr);
1355 }
1356 
1357 void
1358 contig_free(void *addr, size_t size)
1359 {
1360 	pgcnt_t	pgcnt = btopr(size);
1361 	size_t	asize = pgcnt * PAGESIZE;
1362 	caddr_t	a, ea;
1363 	page_t	*pp;
1364 
1365 	hat_unload(kas.a_hat, addr, asize, HAT_UNLOAD_UNLOCK);
1366 
1367 	for (a = addr, ea = a + asize; a < ea; a += PAGESIZE) {
1368 		pp = page_find(&kvp, (u_offset_t)(uintptr_t)a);
1369 		if (!pp)
1370 			panic("contig_free: contig pp not found");
1371 
1372 		if (!page_tryupgrade(pp)) {
1373 			page_unlock(pp);
1374 			pp = page_lookup(&kvp,
1375 			    (u_offset_t)(uintptr_t)a, SE_EXCL);
1376 			if (pp == NULL)
1377 				panic("contig_free: page freed");
1378 		}
1379 		page_destroy(pp, 0);
1380 	}
1381 
1382 	page_unresv(pgcnt);
1383 	vmem_free(heap_arena, addr, asize);
1384 }
1385 
1386 /*
1387  * Allocate from the system, aligned on a specific boundary.
1388  * The alignment, if non-zero, must be a power of 2.
1389  */
1390 static void *
1391 kalloca(size_t size, size_t align, int cansleep, int physcontig,
1392 	ddi_dma_attr_t *attr)
1393 {
1394 	size_t *addr, *raddr, rsize;
1395 	size_t hdrsize = 4 * sizeof (size_t);	/* must be power of 2 */
1396 	int a, i, c;
1397 	vmem_t *vmp;
1398 	kmem_cache_t *cp = NULL;
1399 
1400 	if (attr->dma_attr_addr_lo > mmu_ptob((uint64_t)ddiphysmin))
1401 		return (NULL);
1402 
1403 	align = MAX(align, hdrsize);
1404 	ASSERT((align & (align - 1)) == 0);
1405 
1406 	/*
1407 	 * All of our allocators guarantee 16-byte alignment, so we don't
1408 	 * need to reserve additional space for the header.
1409 	 * To simplify picking the correct kmem_io_cache, we round up to
1410 	 * a multiple of KA_ALIGN.
1411 	 */
1412 	rsize = P2ROUNDUP_TYPED(size + align, KA_ALIGN, size_t);
1413 
1414 	if (physcontig && rsize > PAGESIZE) {
1415 		if (addr = contig_alloc(size, attr, align, cansleep)) {
1416 			if (!putctgas(addr, size))
1417 				contig_free(addr, size);
1418 			else
1419 				return (addr);
1420 		}
1421 		return (NULL);
1422 	}
1423 
1424 	a = kmem_io_index(attr->dma_attr_addr_hi);
1425 
1426 	if (rsize > PAGESIZE) {
1427 		vmp = kmem_io[a].kmem_io_arena;
1428 		raddr = vmem_alloc(vmp, rsize,
1429 		    (cansleep) ? VM_SLEEP : VM_NOSLEEP);
1430 	} else {
1431 		c = highbit((rsize >> KA_ALIGN_SHIFT) - 1);
1432 		cp = kmem_io[a].kmem_io_cache[c];
1433 		raddr = kmem_cache_alloc(cp, (cansleep) ? KM_SLEEP :
1434 		    KM_NOSLEEP);
1435 	}
1436 
1437 	if (raddr == NULL) {
1438 		int	na;
1439 
1440 		ASSERT(cansleep == 0);
1441 		if (rsize > PAGESIZE)
1442 			return (NULL);
1443 		/*
1444 		 * System does not have memory in the requested range.
1445 		 * Try smaller kmem io ranges and larger cache sizes
1446 		 * to see if there might be memory available in
1447 		 * these other caches.
1448 		 */
1449 
1450 		for (na = kmem_io_index_next(a); na >= 0;
1451 		    na = kmem_io_index_next(na)) {
1452 			ASSERT(kmem_io[na].kmem_io_arena);
1453 			cp = kmem_io[na].kmem_io_cache[c];
1454 			raddr = kmem_cache_alloc(cp, KM_NOSLEEP);
1455 			if (raddr)
1456 				goto kallocdone;
1457 		}
1458 		/* now try the larger kmem io cache sizes */
1459 		for (na = a; na >= 0; na = kmem_io_index_next(na)) {
1460 			for (i = c + 1; i < KA_NCACHE; i++) {
1461 				cp = kmem_io[na].kmem_io_cache[i];
1462 				raddr = kmem_cache_alloc(cp, KM_NOSLEEP);
1463 				if (raddr)
1464 					goto kallocdone;
1465 			}
1466 		}
1467 		return (NULL);
1468 	}
1469 
1470 kallocdone:
1471 	ASSERT(!P2BOUNDARY((uintptr_t)raddr, rsize, PAGESIZE) ||
1472 	    rsize > PAGESIZE);
1473 
1474 	addr = (size_t *)P2ROUNDUP((uintptr_t)raddr + hdrsize, align);
1475 	ASSERT((uintptr_t)addr + size - (uintptr_t)raddr <= rsize);
1476 
1477 	addr[-4] = (size_t)cp;
1478 	addr[-3] = (size_t)vmp;
1479 	addr[-2] = (size_t)raddr;
1480 	addr[-1] = rsize;
1481 
1482 	return (addr);
1483 }
1484 
1485 static void
1486 kfreea(void *addr)
1487 {
1488 	size_t		size;
1489 
1490 	if (!((uintptr_t)addr & PAGEOFFSET) && (size = getctgsz(addr))) {
1491 		contig_free(addr, size);
1492 	} else {
1493 		size_t	*saddr = addr;
1494 		if (saddr[-4] == 0)
1495 			vmem_free((vmem_t *)saddr[-3], (void *)saddr[-2],
1496 			    saddr[-1]);
1497 		else
1498 			kmem_cache_free((kmem_cache_t *)saddr[-4],
1499 			    (void *)saddr[-2]);
1500 	}
1501 }
1502 
1503 /*ARGSUSED*/
1504 void
1505 i_ddi_devacc_to_hatacc(ddi_device_acc_attr_t *devaccp, uint_t *hataccp)
1506 {
1507 }
1508 
1509 /*
1510  * Check if the specified cache attribute is supported on the platform.
1511  * This function must be called before i_ddi_cacheattr_to_hatacc().
1512  */
1513 boolean_t
1514 i_ddi_check_cache_attr(uint_t flags)
1515 {
1516 	/*
1517 	 * The cache attributes are mutually exclusive. Any combination of
1518 	 * the attributes leads to a failure.
1519 	 */
1520 	uint_t cache_attr = IOMEM_CACHE_ATTR(flags);
1521 	if ((cache_attr != 0) && ((cache_attr & (cache_attr - 1)) != 0))
1522 		return (B_FALSE);
1523 
1524 	/* All cache attributes are supported on X86/X64 */
1525 	if (cache_attr & (IOMEM_DATA_UNCACHED | IOMEM_DATA_CACHED |
1526 	    IOMEM_DATA_UC_WR_COMBINE))
1527 		return (B_TRUE);
1528 
1529 	/* undefined attributes */
1530 	return (B_FALSE);
1531 }
1532 
1533 /* set HAT cache attributes from the cache attributes */
1534 void
1535 i_ddi_cacheattr_to_hatacc(uint_t flags, uint_t *hataccp)
1536 {
1537 	uint_t cache_attr = IOMEM_CACHE_ATTR(flags);
1538 	static char *fname = "i_ddi_cacheattr_to_hatacc";
1539 
1540 	/*
1541 	 * If write-combining is not supported, then it falls back
1542 	 * to uncacheable.
1543 	 */
1544 	if (cache_attr == IOMEM_DATA_UC_WR_COMBINE && !(x86_feature & X86_PAT))
1545 		cache_attr = IOMEM_DATA_UNCACHED;
1546 
1547 	/*
1548 	 * set HAT attrs according to the cache attrs.
1549 	 */
1550 	switch (cache_attr) {
1551 	case IOMEM_DATA_UNCACHED:
1552 		*hataccp &= ~HAT_ORDER_MASK;
1553 		*hataccp |= (HAT_STRICTORDER | HAT_PLAT_NOCACHE);
1554 		break;
1555 	case IOMEM_DATA_UC_WR_COMBINE:
1556 		*hataccp &= ~HAT_ORDER_MASK;
1557 		*hataccp |= (HAT_MERGING_OK | HAT_PLAT_NOCACHE);
1558 		break;
1559 	case IOMEM_DATA_CACHED:
1560 		*hataccp &= ~HAT_ORDER_MASK;
1561 		*hataccp |= HAT_UNORDERED_OK;
1562 		break;
1563 	/*
1564 	 * This case must not occur because the cache attribute is scrutinized
1565 	 * before this function is called.
1566 	 */
1567 	default:
1568 		/*
1569 		 * set cacheable to hat attrs.
1570 		 */
1571 		*hataccp &= ~HAT_ORDER_MASK;
1572 		*hataccp |= HAT_UNORDERED_OK;
1573 		cmn_err(CE_WARN, "%s: cache_attr=0x%x is ignored.",
1574 		    fname, cache_attr);
1575 	}
1576 }
1577 
1578 /*
1579  * This should actually be called i_ddi_dma_mem_alloc. There should
1580  * also be an i_ddi_pio_mem_alloc. i_ddi_dma_mem_alloc should call
1581  * through the device tree with the DDI_CTLOPS_DMA_ALIGN ctl ops to
1582  * get alignment requirements for DMA memory. i_ddi_pio_mem_alloc
1583  * should use DDI_CTLOPS_PIO_ALIGN. Since we only have i_ddi_mem_alloc
1584  * so far which is used for both, DMA and PIO, we have to use the DMA
1585  * ctl ops to make everybody happy.
1586  */
1587 /*ARGSUSED*/
1588 int
1589 i_ddi_mem_alloc(dev_info_t *dip, ddi_dma_attr_t *attr,
1590 	size_t length, int cansleep, int flags,
1591 	ddi_device_acc_attr_t *accattrp, caddr_t *kaddrp,
1592 	size_t *real_length, ddi_acc_hdl_t *ap)
1593 {
1594 	caddr_t a;
1595 	int iomin;
1596 	ddi_acc_impl_t *iap;
1597 	int physcontig = 0;
1598 	pgcnt_t npages;
1599 	pgcnt_t minctg;
1600 	uint_t order;
1601 	int e;
1602 
1603 	/*
1604 	 * Check legality of arguments
1605 	 */
1606 	if (length == 0 || kaddrp == NULL || attr == NULL) {
1607 		return (DDI_FAILURE);
1608 	}
1609 
1610 	if (attr->dma_attr_minxfer == 0 || attr->dma_attr_align == 0 ||
1611 	    (attr->dma_attr_align & (attr->dma_attr_align - 1)) ||
1612 	    (attr->dma_attr_minxfer & (attr->dma_attr_minxfer - 1))) {
1613 			return (DDI_FAILURE);
1614 	}
1615 
1616 	/*
1617 	 * figure out most restrictive alignment requirement
1618 	 */
1619 	iomin = attr->dma_attr_minxfer;
1620 	iomin = maxbit(iomin, attr->dma_attr_align);
1621 	if (iomin == 0)
1622 		return (DDI_FAILURE);
1623 
1624 	ASSERT((iomin & (iomin - 1)) == 0);
1625 
1626 	/*
1627 	 * if we allocate memory with IOMEM_DATA_UNCACHED or
1628 	 * IOMEM_DATA_UC_WR_COMBINE, make sure we allocate a page aligned
1629 	 * memory that ends on a page boundry.
1630 	 * Don't want to have to different cache mappings to the same
1631 	 * physical page.
1632 	 */
1633 	if (OVERRIDE_CACHE_ATTR(flags)) {
1634 		iomin = (iomin + MMU_PAGEOFFSET) & MMU_PAGEMASK;
1635 		length = (length + MMU_PAGEOFFSET) & (size_t)MMU_PAGEMASK;
1636 	}
1637 
1638 	/*
1639 	 * Determine if we need to satisfy the request for physically
1640 	 * contiguous memory or alignments larger than pagesize.
1641 	 */
1642 	npages = btopr(length + attr->dma_attr_align);
1643 	minctg = howmany(npages, attr->dma_attr_sgllen);
1644 
1645 	if (minctg > 1) {
1646 		uint64_t pfnseg = attr->dma_attr_seg >> PAGESHIFT;
1647 		/*
1648 		 * verify that the minimum contig requirement for the
1649 		 * actual length does not cross segment boundary.
1650 		 */
1651 		length = P2ROUNDUP_TYPED(length, attr->dma_attr_minxfer,
1652 		    size_t);
1653 		npages = btopr(length);
1654 		minctg = howmany(npages, attr->dma_attr_sgllen);
1655 		if (minctg > pfnseg + 1)
1656 			return (DDI_FAILURE);
1657 		physcontig = 1;
1658 	} else {
1659 		length = P2ROUNDUP_TYPED(length, iomin, size_t);
1660 	}
1661 
1662 	/*
1663 	 * Allocate the requested amount from the system.
1664 	 */
1665 	a = kalloca(length, iomin, cansleep, physcontig, attr);
1666 
1667 	if ((*kaddrp = a) == NULL)
1668 		return (DDI_FAILURE);
1669 
1670 	/*
1671 	 * if we to modify the cache attributes, go back and muck with the
1672 	 * mappings.
1673 	 */
1674 	if (OVERRIDE_CACHE_ATTR(flags)) {
1675 		order = 0;
1676 		i_ddi_cacheattr_to_hatacc(flags, &order);
1677 		e = kmem_override_cache_attrs(a, length, order);
1678 		if (e != 0) {
1679 			kfreea(a);
1680 			return (DDI_FAILURE);
1681 		}
1682 	}
1683 
1684 	if (real_length) {
1685 		*real_length = length;
1686 	}
1687 	if (ap) {
1688 		/*
1689 		 * initialize access handle
1690 		 */
1691 		iap = (ddi_acc_impl_t *)ap->ah_platform_private;
1692 		iap->ahi_acc_attr |= DDI_ACCATTR_CPU_VADDR;
1693 		impl_acc_hdl_init(ap);
1694 	}
1695 
1696 	return (DDI_SUCCESS);
1697 }
1698 
1699 /*
1700  * covert old DMA limits structure to DMA attribute structure
1701  * and continue
1702  */
1703 int
1704 i_ddi_mem_alloc_lim(dev_info_t *dip, ddi_dma_lim_t *limits,
1705 	size_t length, int cansleep, int streaming,
1706 	ddi_device_acc_attr_t *accattrp, caddr_t *kaddrp,
1707 	uint_t *real_length, ddi_acc_hdl_t *ap)
1708 {
1709 	ddi_dma_attr_t dma_attr, *attrp;
1710 	size_t rlen;
1711 	int ret;
1712 
1713 	if (limits == NULL) {
1714 		return (DDI_FAILURE);
1715 	}
1716 
1717 	/*
1718 	 * set up DMA attribute structure to pass to i_ddi_mem_alloc()
1719 	 */
1720 	attrp = &dma_attr;
1721 	attrp->dma_attr_version = DMA_ATTR_V0;
1722 	attrp->dma_attr_addr_lo = (uint64_t)limits->dlim_addr_lo;
1723 	attrp->dma_attr_addr_hi = (uint64_t)limits->dlim_addr_hi;
1724 	attrp->dma_attr_count_max = (uint64_t)limits->dlim_ctreg_max;
1725 	attrp->dma_attr_align = 1;
1726 	attrp->dma_attr_burstsizes = (uint_t)limits->dlim_burstsizes;
1727 	attrp->dma_attr_minxfer = (uint32_t)limits->dlim_minxfer;
1728 	attrp->dma_attr_maxxfer = (uint64_t)limits->dlim_reqsize;
1729 	attrp->dma_attr_seg = (uint64_t)limits->dlim_adreg_max;
1730 	attrp->dma_attr_sgllen = limits->dlim_sgllen;
1731 	attrp->dma_attr_granular = (uint32_t)limits->dlim_granular;
1732 	attrp->dma_attr_flags = 0;
1733 
1734 	ret = i_ddi_mem_alloc(dip, attrp, length, cansleep, streaming,
1735 	    accattrp, kaddrp, &rlen, ap);
1736 	if (ret == DDI_SUCCESS) {
1737 		if (real_length)
1738 			*real_length = (uint_t)rlen;
1739 	}
1740 	return (ret);
1741 }
1742 
1743 /* ARGSUSED */
1744 void
1745 i_ddi_mem_free(caddr_t kaddr, ddi_acc_hdl_t *ap)
1746 {
1747 	if (ap != NULL) {
1748 		/*
1749 		 * if we modified the cache attributes on alloc, go back and
1750 		 * fix them since this memory could be returned to the
1751 		 * general pool.
1752 		 */
1753 		if (OVERRIDE_CACHE_ATTR(ap->ah_xfermodes)) {
1754 			uint_t order = 0;
1755 			int e;
1756 			i_ddi_cacheattr_to_hatacc(IOMEM_DATA_CACHED, &order);
1757 			e = kmem_override_cache_attrs(kaddr, ap->ah_len, order);
1758 			if (e != 0) {
1759 				cmn_err(CE_WARN, "i_ddi_mem_free() failed to "
1760 				    "override cache attrs, memory leaked\n");
1761 				return;
1762 			}
1763 		}
1764 	}
1765 	kfreea(kaddr);
1766 }
1767 
1768 /*
1769  * Access Barriers
1770  *
1771  */
1772 /*ARGSUSED*/
1773 int
1774 i_ddi_ontrap(ddi_acc_handle_t hp)
1775 {
1776 	return (DDI_FAILURE);
1777 }
1778 
1779 /*ARGSUSED*/
1780 void
1781 i_ddi_notrap(ddi_acc_handle_t hp)
1782 {
1783 }
1784 
1785 
1786 /*
1787  * Misc Functions
1788  */
1789 
1790 /*
1791  * Implementation instance override functions
1792  *
1793  * No override on i86pc
1794  */
1795 /*ARGSUSED*/
1796 uint_t
1797 impl_assign_instance(dev_info_t *dip)
1798 {
1799 	return ((uint_t)-1);
1800 }
1801 
1802 /*ARGSUSED*/
1803 int
1804 impl_keep_instance(dev_info_t *dip)
1805 {
1806 
1807 #if defined(__xpv)
1808 	/*
1809 	 * Do not persist instance numbers assigned to devices in dom0
1810 	 */
1811 	dev_info_t *pdip;
1812 	if (DOMAIN_IS_INITDOMAIN(xen_info)) {
1813 		if (((pdip = ddi_get_parent(dip)) != NULL) &&
1814 		    (strcmp(ddi_get_name(pdip), "xpvd") == 0))
1815 			return (DDI_SUCCESS);
1816 	}
1817 #endif
1818 	return (DDI_FAILURE);
1819 }
1820 
1821 /*ARGSUSED*/
1822 int
1823 impl_free_instance(dev_info_t *dip)
1824 {
1825 	return (DDI_FAILURE);
1826 }
1827 
1828 /*ARGSUSED*/
1829 int
1830 impl_check_cpu(dev_info_t *devi)
1831 {
1832 	return (DDI_SUCCESS);
1833 }
1834 
1835 /*
1836  * Referenced in common/cpr_driver.c: Power off machine.
1837  * Don't know how to power off i86pc.
1838  */
1839 void
1840 arch_power_down()
1841 {}
1842 
1843 /*
1844  * Copy name to property_name, since name
1845  * is in the low address range below kernelbase.
1846  */
1847 static void
1848 copy_boot_str(const char *boot_str, char *kern_str, int len)
1849 {
1850 	int i = 0;
1851 
1852 	while (i < len - 1 && boot_str[i] != '\0') {
1853 		kern_str[i] = boot_str[i];
1854 		i++;
1855 	}
1856 
1857 	kern_str[i] = 0;	/* null terminate */
1858 	if (boot_str[i] != '\0')
1859 		cmn_err(CE_WARN,
1860 		    "boot property string is truncated to %s", kern_str);
1861 }
1862 
1863 static void
1864 get_boot_properties(void)
1865 {
1866 	extern char hw_provider[];
1867 	dev_info_t *devi;
1868 	char *name;
1869 	int length;
1870 	char property_name[50], property_val[50];
1871 	void *bop_staging_area;
1872 
1873 	bop_staging_area = kmem_zalloc(MMU_PAGESIZE, KM_NOSLEEP);
1874 
1875 	/*
1876 	 * Import "root" properties from the boot.
1877 	 *
1878 	 * We do this by invoking BOP_NEXTPROP until the list
1879 	 * is completely copied in.
1880 	 */
1881 
1882 	devi = ddi_root_node();
1883 	for (name = BOP_NEXTPROP(bootops, "");		/* get first */
1884 	    name;					/* NULL => DONE */
1885 	    name = BOP_NEXTPROP(bootops, name)) {	/* get next */
1886 
1887 		/* copy string to memory above kernelbase */
1888 		copy_boot_str(name, property_name, 50);
1889 
1890 		/*
1891 		 * Skip vga properties. They will be picked up later
1892 		 * by get_vga_properties.
1893 		 */
1894 		if (strcmp(property_name, "display-edif-block") == 0 ||
1895 		    strcmp(property_name, "display-edif-id") == 0) {
1896 			continue;
1897 		}
1898 
1899 		length = BOP_GETPROPLEN(bootops, property_name);
1900 		if (length == 0)
1901 			continue;
1902 		if (length > MMU_PAGESIZE) {
1903 			cmn_err(CE_NOTE,
1904 			    "boot property %s longer than 0x%x, ignored\n",
1905 			    property_name, MMU_PAGESIZE);
1906 			continue;
1907 		}
1908 		BOP_GETPROP(bootops, property_name, bop_staging_area);
1909 
1910 		/*
1911 		 * special properties:
1912 		 * si-machine, si-hw-provider
1913 		 *	goes to kernel data structures.
1914 		 * bios-boot-device and stdout
1915 		 *	goes to hardware property list so it may show up
1916 		 *	in the prtconf -vp output. This is needed by
1917 		 *	Install/Upgrade. Once we fix install upgrade,
1918 		 *	this can be taken out.
1919 		 */
1920 		if (strcmp(name, "si-machine") == 0) {
1921 			(void) strncpy(utsname.machine, bop_staging_area,
1922 			    SYS_NMLN);
1923 			utsname.machine[SYS_NMLN - 1] = (char)NULL;
1924 		} else if (strcmp(name, "si-hw-provider") == 0) {
1925 			(void) strncpy(hw_provider, bop_staging_area, SYS_NMLN);
1926 			hw_provider[SYS_NMLN - 1] = (char)NULL;
1927 		} else if (strcmp(name, "bios-boot-device") == 0) {
1928 			copy_boot_str(bop_staging_area, property_val, 50);
1929 			(void) ndi_prop_update_string(DDI_DEV_T_NONE, devi,
1930 			    property_name, property_val);
1931 		} else if (strcmp(name, "stdout") == 0) {
1932 			(void) ndi_prop_update_int(DDI_DEV_T_NONE, devi,
1933 			    property_name, *((int *)bop_staging_area));
1934 		} else {
1935 			/* Property type unknown, use old prop interface */
1936 			(void) e_ddi_prop_create(DDI_DEV_T_NONE, devi,
1937 			    DDI_PROP_CANSLEEP, property_name, bop_staging_area,
1938 			    length);
1939 		}
1940 	}
1941 
1942 	kmem_free(bop_staging_area, MMU_PAGESIZE);
1943 }
1944 
1945 static void
1946 get_vga_properties(void)
1947 {
1948 	dev_info_t *devi;
1949 	major_t major;
1950 	char *name;
1951 	int length;
1952 	char property_val[50];
1953 	void *bop_staging_area;
1954 
1955 	/*
1956 	 * XXXX Hack Allert!
1957 	 * There really needs to be a better way for identifying various
1958 	 * console framebuffers and their related issues.  Till then,
1959 	 * check for this one as a replacement to vgatext.
1960 	 */
1961 	major = ddi_name_to_major("ragexl");
1962 	if (major == (major_t)-1) {
1963 		major = ddi_name_to_major("vgatext");
1964 		if (major == (major_t)-1)
1965 			return;
1966 	}
1967 	devi = devnamesp[major].dn_head;
1968 	if (devi == NULL)
1969 		return;
1970 
1971 	bop_staging_area = kmem_zalloc(MMU_PAGESIZE, KM_SLEEP);
1972 
1973 	/*
1974 	 * Import "vga" properties from the boot.
1975 	 */
1976 	name = "display-edif-block";
1977 	length = BOP_GETPROPLEN(bootops, name);
1978 	if (length > 0 && length < MMU_PAGESIZE) {
1979 		BOP_GETPROP(bootops, name, bop_staging_area);
1980 		(void) ndi_prop_update_byte_array(DDI_DEV_T_NONE,
1981 		    devi, name, bop_staging_area, length);
1982 	}
1983 
1984 	/*
1985 	 * kdmconfig is also looking for display-type and
1986 	 * video-adapter-type. We default to color and svga.
1987 	 *
1988 	 * Could it be "monochrome", "vga"?
1989 	 * Nah, you've got to come to the 21st century...
1990 	 * And you can set monitor type manually in kdmconfig
1991 	 * if you are really an old junky.
1992 	 */
1993 	(void) ndi_prop_update_string(DDI_DEV_T_NONE,
1994 	    devi, "display-type", "color");
1995 	(void) ndi_prop_update_string(DDI_DEV_T_NONE,
1996 	    devi, "video-adapter-type", "svga");
1997 
1998 	name = "display-edif-id";
1999 	length = BOP_GETPROPLEN(bootops, name);
2000 	if (length > 0 && length < MMU_PAGESIZE) {
2001 		BOP_GETPROP(bootops, name, bop_staging_area);
2002 		copy_boot_str(bop_staging_area, property_val, length);
2003 		(void) ndi_prop_update_string(DDI_DEV_T_NONE,
2004 		    devi, name, property_val);
2005 	}
2006 
2007 	kmem_free(bop_staging_area, MMU_PAGESIZE);
2008 }
2009 
2010 
2011 /*
2012  * This is temporary, but absolutely necessary.  If we are being
2013  * booted with a device tree created by the DevConf project's bootconf
2014  * program, then we have device information nodes that reflect
2015  * reality.  At this point in time in the Solaris release schedule, the
2016  * kernel drivers aren't prepared for reality.  They still depend on their
2017  * own ad-hoc interpretations of the properties created when their .conf
2018  * files were interpreted. These drivers use an "ignore-hardware-nodes"
2019  * property to prevent them from using the nodes passed up from the bootconf
2020  * device tree.
2021  *
2022  * Trying to assemble root file system drivers as we are booting from
2023  * devconf will fail if the kernel driver is basing its name_addr's on the
2024  * psuedo-node device info while the bootpath passed up from bootconf is using
2025  * reality-based name_addrs.  We help the boot along in this case by
2026  * looking at the pre-bootconf bootpath and determining if we would have
2027  * successfully matched if that had been the bootpath we had chosen.
2028  *
2029  * Note that we only even perform this extra check if we've booted
2030  * using bootconf's 1275 compliant bootpath, this is the boot device, and
2031  * we're trying to match the name_addr specified in the 1275 bootpath.
2032  */
2033 
2034 #define	MAXCOMPONENTLEN	32
2035 
2036 int
2037 x86_old_bootpath_name_addr_match(dev_info_t *cdip, char *caddr, char *naddr)
2038 {
2039 	/*
2040 	 *  There are multiple criteria to be met before we can even
2041 	 *  consider allowing a name_addr match here.
2042 	 *
2043 	 *  1) We must have been booted such that the bootconf program
2044 	 *	created device tree nodes and properties.  This can be
2045 	 *	determined by examining the 'bootpath' property.  This
2046 	 *	property will be a non-null string iff bootconf was
2047 	 *	involved in the boot.
2048 	 *
2049 	 *  2) The module that we want to match must be the boot device.
2050 	 *
2051 	 *  3) The instance of the module we are thinking of letting be
2052 	 *	our match must be ignoring hardware nodes.
2053 	 *
2054 	 *  4) The name_addr we want to match must be the name_addr
2055 	 *	specified in the 1275 bootpath.
2056 	 */
2057 	static char bootdev_module[MAXCOMPONENTLEN];
2058 	static char bootdev_oldmod[MAXCOMPONENTLEN];
2059 	static char bootdev_newaddr[MAXCOMPONENTLEN];
2060 	static char bootdev_oldaddr[MAXCOMPONENTLEN];
2061 	static int  quickexit;
2062 
2063 	char *daddr;
2064 	int dlen;
2065 
2066 	char	*lkupname;
2067 	int	rv = DDI_FAILURE;
2068 
2069 	if ((ddi_getlongprop(DDI_DEV_T_ANY, cdip, DDI_PROP_DONTPASS,
2070 	    "devconf-addr", (caddr_t)&daddr, &dlen) == DDI_PROP_SUCCESS) &&
2071 	    (ddi_getprop(DDI_DEV_T_ANY, cdip, DDI_PROP_DONTPASS,
2072 	    "ignore-hardware-nodes", -1) != -1)) {
2073 		if (strcmp(daddr, caddr) == 0) {
2074 			return (DDI_SUCCESS);
2075 		}
2076 	}
2077 
2078 	if (quickexit)
2079 		return (rv);
2080 
2081 	if (bootdev_module[0] == '\0') {
2082 		char *addrp, *eoaddrp;
2083 		char *busp, *modp, *atp;
2084 		char *bp1275, *bp;
2085 		int  bp1275len, bplen;
2086 
2087 		bp1275 = bp = addrp = eoaddrp = busp = modp = atp = NULL;
2088 
2089 		if (ddi_getlongprop(DDI_DEV_T_ANY,
2090 		    ddi_root_node(), 0, "bootpath",
2091 		    (caddr_t)&bp1275, &bp1275len) != DDI_PROP_SUCCESS ||
2092 		    bp1275len <= 1) {
2093 			/*
2094 			 * We didn't boot from bootconf so we never need to
2095 			 * do any special matches.
2096 			 */
2097 			quickexit = 1;
2098 			if (bp1275)
2099 				kmem_free(bp1275, bp1275len);
2100 			return (rv);
2101 		}
2102 
2103 		if (ddi_getlongprop(DDI_DEV_T_ANY,
2104 		    ddi_root_node(), 0, "boot-path",
2105 		    (caddr_t)&bp, &bplen) != DDI_PROP_SUCCESS || bplen <= 1) {
2106 			/*
2107 			 * No fallback position for matching. This is
2108 			 * certainly unexpected, but we'll handle it
2109 			 * just in case.
2110 			 */
2111 			quickexit = 1;
2112 			kmem_free(bp1275, bp1275len);
2113 			if (bp)
2114 				kmem_free(bp, bplen);
2115 			return (rv);
2116 		}
2117 
2118 		/*
2119 		 *  Determine boot device module and 1275 name_addr
2120 		 *
2121 		 *  bootpath assumed to be of the form /bus/module@name_addr
2122 		 */
2123 		if (busp = strchr(bp1275, '/')) {
2124 			if (modp = strchr(busp + 1, '/')) {
2125 				if (atp = strchr(modp + 1, '@')) {
2126 					*atp = '\0';
2127 					addrp = atp + 1;
2128 					if (eoaddrp = strchr(addrp, '/'))
2129 						*eoaddrp = '\0';
2130 				}
2131 			}
2132 		}
2133 
2134 		if (modp && addrp) {
2135 			(void) strncpy(bootdev_module, modp + 1,
2136 			    MAXCOMPONENTLEN);
2137 			bootdev_module[MAXCOMPONENTLEN - 1] = '\0';
2138 
2139 			(void) strncpy(bootdev_newaddr, addrp, MAXCOMPONENTLEN);
2140 			bootdev_newaddr[MAXCOMPONENTLEN - 1] = '\0';
2141 		} else {
2142 			quickexit = 1;
2143 			kmem_free(bp1275, bp1275len);
2144 			kmem_free(bp, bplen);
2145 			return (rv);
2146 		}
2147 
2148 		/*
2149 		 *  Determine fallback name_addr
2150 		 *
2151 		 *  10/3/96 - Also save fallback module name because it
2152 		 *  might actually be different than the current module
2153 		 *  name.  E.G., ISA pnp drivers have new names.
2154 		 *
2155 		 *  bootpath assumed to be of the form /bus/module@name_addr
2156 		 */
2157 		addrp = NULL;
2158 		if (busp = strchr(bp, '/')) {
2159 			if (modp = strchr(busp + 1, '/')) {
2160 				if (atp = strchr(modp + 1, '@')) {
2161 					*atp = '\0';
2162 					addrp = atp + 1;
2163 					if (eoaddrp = strchr(addrp, '/'))
2164 						*eoaddrp = '\0';
2165 				}
2166 			}
2167 		}
2168 
2169 		if (modp && addrp) {
2170 			(void) strncpy(bootdev_oldmod, modp + 1,
2171 			    MAXCOMPONENTLEN);
2172 			bootdev_module[MAXCOMPONENTLEN - 1] = '\0';
2173 
2174 			(void) strncpy(bootdev_oldaddr, addrp, MAXCOMPONENTLEN);
2175 			bootdev_oldaddr[MAXCOMPONENTLEN - 1] = '\0';
2176 		}
2177 
2178 		/* Free up the bootpath storage now that we're done with it. */
2179 		kmem_free(bp1275, bp1275len);
2180 		kmem_free(bp, bplen);
2181 
2182 		if (bootdev_oldaddr[0] == '\0') {
2183 			quickexit = 1;
2184 			return (rv);
2185 		}
2186 	}
2187 
2188 	if (((lkupname = ddi_get_name(cdip)) != NULL) &&
2189 	    (strcmp(bootdev_module, lkupname) == 0 ||
2190 	    strcmp(bootdev_oldmod, lkupname) == 0) &&
2191 	    ((ddi_getprop(DDI_DEV_T_ANY, cdip, DDI_PROP_DONTPASS,
2192 	    "ignore-hardware-nodes", -1) != -1) ||
2193 	    ignore_hardware_nodes) &&
2194 	    strcmp(bootdev_newaddr, caddr) == 0 &&
2195 	    strcmp(bootdev_oldaddr, naddr) == 0) {
2196 		rv = DDI_SUCCESS;
2197 	}
2198 
2199 	return (rv);
2200 }
2201 
2202 /*
2203  * Perform a copy from a memory mapped device (whose devinfo pointer is devi)
2204  * separately mapped at devaddr in the kernel to a kernel buffer at kaddr.
2205  */
2206 /*ARGSUSED*/
2207 int
2208 e_ddi_copyfromdev(dev_info_t *devi,
2209     off_t off, const void *devaddr, void *kaddr, size_t len)
2210 {
2211 	bcopy(devaddr, kaddr, len);
2212 	return (0);
2213 }
2214 
2215 /*
2216  * Perform a copy to a memory mapped device (whose devinfo pointer is devi)
2217  * separately mapped at devaddr in the kernel from a kernel buffer at kaddr.
2218  */
2219 /*ARGSUSED*/
2220 int
2221 e_ddi_copytodev(dev_info_t *devi,
2222     off_t off, const void *kaddr, void *devaddr, size_t len)
2223 {
2224 	bcopy(kaddr, devaddr, len);
2225 	return (0);
2226 }
2227 
2228 
2229 static int
2230 poke_mem(peekpoke_ctlops_t *in_args)
2231 {
2232 	int err = DDI_SUCCESS;
2233 	on_trap_data_t otd;
2234 
2235 	/* Set up protected environment. */
2236 	if (!on_trap(&otd, OT_DATA_ACCESS)) {
2237 		switch (in_args->size) {
2238 		case sizeof (uint8_t):
2239 			*(uint8_t *)(in_args->dev_addr) =
2240 			    *(uint8_t *)in_args->host_addr;
2241 			break;
2242 
2243 		case sizeof (uint16_t):
2244 			*(uint16_t *)(in_args->dev_addr) =
2245 			    *(uint16_t *)in_args->host_addr;
2246 			break;
2247 
2248 		case sizeof (uint32_t):
2249 			*(uint32_t *)(in_args->dev_addr) =
2250 			    *(uint32_t *)in_args->host_addr;
2251 			break;
2252 
2253 		case sizeof (uint64_t):
2254 			*(uint64_t *)(in_args->dev_addr) =
2255 			    *(uint64_t *)in_args->host_addr;
2256 			break;
2257 
2258 		default:
2259 			err = DDI_FAILURE;
2260 			break;
2261 		}
2262 	} else
2263 		err = DDI_FAILURE;
2264 
2265 	/* Take down protected environment. */
2266 	no_trap();
2267 
2268 	return (err);
2269 }
2270 
2271 
2272 static int
2273 peek_mem(peekpoke_ctlops_t *in_args)
2274 {
2275 	int err = DDI_SUCCESS;
2276 	on_trap_data_t otd;
2277 
2278 	if (!on_trap(&otd, OT_DATA_ACCESS)) {
2279 		switch (in_args->size) {
2280 		case sizeof (uint8_t):
2281 			*(uint8_t *)in_args->host_addr =
2282 			    *(uint8_t *)in_args->dev_addr;
2283 			break;
2284 
2285 		case sizeof (uint16_t):
2286 			*(uint16_t *)in_args->host_addr =
2287 			    *(uint16_t *)in_args->dev_addr;
2288 			break;
2289 
2290 		case sizeof (uint32_t):
2291 			*(uint32_t *)in_args->host_addr =
2292 			    *(uint32_t *)in_args->dev_addr;
2293 			break;
2294 
2295 		case sizeof (uint64_t):
2296 			*(uint64_t *)in_args->host_addr =
2297 			    *(uint64_t *)in_args->dev_addr;
2298 			break;
2299 
2300 		default:
2301 			err = DDI_FAILURE;
2302 			break;
2303 		}
2304 	} else
2305 		err = DDI_FAILURE;
2306 
2307 	no_trap();
2308 	return (err);
2309 }
2310 
2311 
2312 /*
2313  * This is called only to process peek/poke when the DIP is NULL.
2314  * Assume that this is for memory, as nexi take care of device safe accesses.
2315  */
2316 int
2317 peekpoke_mem(ddi_ctl_enum_t cmd, peekpoke_ctlops_t *in_args)
2318 {
2319 	return (cmd == DDI_CTLOPS_PEEK ? peek_mem(in_args) : poke_mem(in_args));
2320 }
2321 
2322 /*
2323  * we've just done a cautious put/get. Check if it was successful by
2324  * calling pci_ereport_post() on all puts and for any gets that return -1
2325  */
2326 static int
2327 pci_peekpoke_check_fma(dev_info_t *dip, void *arg, ddi_ctl_enum_t ctlop,
2328     void (*scan)(dev_info_t *, ddi_fm_error_t *))
2329 {
2330 	int	rval = DDI_SUCCESS;
2331 	peekpoke_ctlops_t *in_args = (peekpoke_ctlops_t *)arg;
2332 	ddi_fm_error_t de;
2333 	ddi_acc_impl_t *hp = (ddi_acc_impl_t *)in_args->handle;
2334 	ddi_acc_hdl_t *hdlp = (ddi_acc_hdl_t *)in_args->handle;
2335 	int check_err = 0;
2336 	int repcount = in_args->repcount;
2337 
2338 	if (ctlop == DDI_CTLOPS_POKE &&
2339 	    hdlp->ah_acc.devacc_attr_access != DDI_CAUTIOUS_ACC)
2340 		return (DDI_SUCCESS);
2341 
2342 	if (ctlop == DDI_CTLOPS_PEEK &&
2343 	    hdlp->ah_acc.devacc_attr_access != DDI_CAUTIOUS_ACC) {
2344 		for (; repcount; repcount--) {
2345 			switch (in_args->size) {
2346 			case sizeof (uint8_t):
2347 				if (*(uint8_t *)in_args->host_addr == 0xff)
2348 					check_err = 1;
2349 				break;
2350 			case sizeof (uint16_t):
2351 				if (*(uint16_t *)in_args->host_addr == 0xffff)
2352 					check_err = 1;
2353 				break;
2354 			case sizeof (uint32_t):
2355 				if (*(uint32_t *)in_args->host_addr ==
2356 				    0xffffffff)
2357 					check_err = 1;
2358 				break;
2359 			case sizeof (uint64_t):
2360 				if (*(uint64_t *)in_args->host_addr ==
2361 				    0xffffffffffffffff)
2362 					check_err = 1;
2363 				break;
2364 			}
2365 		}
2366 		if (check_err == 0)
2367 			return (DDI_SUCCESS);
2368 	}
2369 	/*
2370 	 * for a cautious put or get or a non-cautious get that returned -1 call
2371 	 * io framework to see if there really was an error
2372 	 */
2373 	bzero(&de, sizeof (ddi_fm_error_t));
2374 	de.fme_version = DDI_FME_VERSION;
2375 	de.fme_ena = fm_ena_generate(0, FM_ENA_FMT1);
2376 	if (hdlp->ah_acc.devacc_attr_access == DDI_CAUTIOUS_ACC) {
2377 		de.fme_flag = DDI_FM_ERR_EXPECTED;
2378 		de.fme_acc_handle = in_args->handle;
2379 	} else if (hdlp->ah_acc.devacc_attr_access == DDI_DEFAULT_ACC) {
2380 		/*
2381 		 * We only get here with DDI_DEFAULT_ACC for config space gets.
2382 		 * Non-hardened drivers may be probing the hardware and
2383 		 * expecting -1 returned. So need to treat errors on
2384 		 * DDI_DEFAULT_ACC as DDI_FM_ERR_EXPECTED.
2385 		 */
2386 		de.fme_flag = DDI_FM_ERR_EXPECTED;
2387 		de.fme_acc_handle = in_args->handle;
2388 	} else {
2389 		/*
2390 		 * Hardened driver doing protected accesses shouldn't
2391 		 * get errors unless there's a hardware problem. Treat
2392 		 * as nonfatal if there's an error, but set UNEXPECTED
2393 		 * so we raise ereports on any errors and potentially
2394 		 * fault the device
2395 		 */
2396 		de.fme_flag = DDI_FM_ERR_UNEXPECTED;
2397 	}
2398 	(void) scan(dip, &de);
2399 	if (hdlp->ah_acc.devacc_attr_access != DDI_DEFAULT_ACC &&
2400 	    de.fme_status != DDI_FM_OK) {
2401 		ndi_err_t *errp = (ndi_err_t *)hp->ahi_err;
2402 		rval = DDI_FAILURE;
2403 		errp->err_ena = de.fme_ena;
2404 		errp->err_expected = de.fme_flag;
2405 		errp->err_status = DDI_FM_NONFATAL;
2406 	}
2407 	return (rval);
2408 }
2409 
2410 /*
2411  * pci_peekpoke_check_nofma() is for when an error occurs on a register access
2412  * during pci_ereport_post(). We can't call pci_ereport_post() again or we'd
2413  * recurse, so assume all puts are OK and gets have failed if they return -1
2414  */
2415 static int
2416 pci_peekpoke_check_nofma(void *arg, ddi_ctl_enum_t ctlop)
2417 {
2418 	int rval = DDI_SUCCESS;
2419 	peekpoke_ctlops_t *in_args = (peekpoke_ctlops_t *)arg;
2420 	ddi_acc_impl_t *hp = (ddi_acc_impl_t *)in_args->handle;
2421 	ddi_acc_hdl_t *hdlp = (ddi_acc_hdl_t *)in_args->handle;
2422 	int repcount = in_args->repcount;
2423 
2424 	if (ctlop == DDI_CTLOPS_POKE)
2425 		return (rval);
2426 
2427 	for (; repcount; repcount--) {
2428 		switch (in_args->size) {
2429 		case sizeof (uint8_t):
2430 			if (*(uint8_t *)in_args->host_addr == 0xff)
2431 				rval = DDI_FAILURE;
2432 			break;
2433 		case sizeof (uint16_t):
2434 			if (*(uint16_t *)in_args->host_addr == 0xffff)
2435 				rval = DDI_FAILURE;
2436 			break;
2437 		case sizeof (uint32_t):
2438 			if (*(uint32_t *)in_args->host_addr == 0xffffffff)
2439 				rval = DDI_FAILURE;
2440 			break;
2441 		case sizeof (uint64_t):
2442 			if (*(uint64_t *)in_args->host_addr ==
2443 			    0xffffffffffffffff)
2444 				rval = DDI_FAILURE;
2445 			break;
2446 		}
2447 	}
2448 	if (hdlp->ah_acc.devacc_attr_access != DDI_DEFAULT_ACC &&
2449 	    rval == DDI_FAILURE) {
2450 		ndi_err_t *errp = (ndi_err_t *)hp->ahi_err;
2451 		errp->err_ena = fm_ena_generate(0, FM_ENA_FMT1);
2452 		errp->err_expected = DDI_FM_ERR_UNEXPECTED;
2453 		errp->err_status = DDI_FM_NONFATAL;
2454 	}
2455 	return (rval);
2456 }
2457 
2458 int
2459 pci_peekpoke_check(dev_info_t *dip, dev_info_t *rdip,
2460 	ddi_ctl_enum_t ctlop, void *arg, void *result,
2461 	int (*handler)(dev_info_t *, dev_info_t *, ddi_ctl_enum_t, void *,
2462 	void *), kmutex_t *err_mutexp, kmutex_t *peek_poke_mutexp,
2463 	void (*scan)(dev_info_t *, ddi_fm_error_t *))
2464 {
2465 	int rval;
2466 	peekpoke_ctlops_t *in_args = (peekpoke_ctlops_t *)arg;
2467 	ddi_acc_impl_t *hp = (ddi_acc_impl_t *)in_args->handle;
2468 
2469 	/*
2470 	 * this function only supports cautious accesses, not peeks/pokes
2471 	 * which don't have a handle
2472 	 */
2473 	if (hp == NULL)
2474 		return (DDI_FAILURE);
2475 
2476 	if (hp->ahi_acc_attr & DDI_ACCATTR_CONFIG_SPACE) {
2477 		if (!mutex_tryenter(err_mutexp)) {
2478 			/*
2479 			 * As this may be a recursive call from within
2480 			 * pci_ereport_post() we can't wait for the mutexes.
2481 			 * Fortunately we know someone is already calling
2482 			 * pci_ereport_post() which will handle the error bits
2483 			 * for us, and as this is a config space access we can
2484 			 * just do the access and check return value for -1
2485 			 * using pci_peekpoke_check_nofma().
2486 			 */
2487 			rval = handler(dip, rdip, ctlop, arg, result);
2488 			if (rval == DDI_SUCCESS)
2489 				rval = pci_peekpoke_check_nofma(arg, ctlop);
2490 			return (rval);
2491 		}
2492 		/*
2493 		 * This can't be a recursive call. Drop the err_mutex and get
2494 		 * both mutexes in the right order. If an error hasn't already
2495 		 * been detected by the ontrap code, use pci_peekpoke_check_fma
2496 		 * which will call pci_ereport_post() to check error status.
2497 		 */
2498 		mutex_exit(err_mutexp);
2499 	}
2500 	mutex_enter(peek_poke_mutexp);
2501 	rval = handler(dip, rdip, ctlop, arg, result);
2502 	if (rval == DDI_SUCCESS) {
2503 		mutex_enter(err_mutexp);
2504 		rval = pci_peekpoke_check_fma(dip, arg, ctlop, scan);
2505 		mutex_exit(err_mutexp);
2506 	}
2507 	mutex_exit(peek_poke_mutexp);
2508 	return (rval);
2509 }
2510 
2511 void
2512 impl_setup_ddi(void)
2513 {
2514 #if !defined(__xpv)
2515 	extern void startup_bios_disk(void);
2516 	extern int post_fastreboot;
2517 #endif
2518 	dev_info_t *xdip, *isa_dip;
2519 	rd_existing_t rd_mem_prop;
2520 	int err;
2521 
2522 	ndi_devi_alloc_sleep(ddi_root_node(), "ramdisk",
2523 	    (pnode_t)DEVI_SID_NODEID, &xdip);
2524 
2525 	(void) BOP_GETPROP(bootops,
2526 	    "ramdisk_start", (void *)&ramdisk_start);
2527 	(void) BOP_GETPROP(bootops,
2528 	    "ramdisk_end", (void *)&ramdisk_end);
2529 
2530 #ifdef __xpv
2531 	ramdisk_start -= ONE_GIG;
2532 	ramdisk_end -= ONE_GIG;
2533 #endif
2534 	rd_mem_prop.phys = ramdisk_start;
2535 	rd_mem_prop.size = ramdisk_end - ramdisk_start + 1;
2536 
2537 	(void) ndi_prop_update_byte_array(DDI_DEV_T_NONE, xdip,
2538 	    RD_EXISTING_PROP_NAME, (uchar_t *)&rd_mem_prop,
2539 	    sizeof (rd_mem_prop));
2540 	err = ndi_devi_bind_driver(xdip, 0);
2541 	ASSERT(err == 0);
2542 
2543 	/* isa node */
2544 	if (pseudo_isa) {
2545 		ndi_devi_alloc_sleep(ddi_root_node(), "isa",
2546 		    (pnode_t)DEVI_SID_NODEID, &isa_dip);
2547 		(void) ndi_prop_update_string(DDI_DEV_T_NONE, isa_dip,
2548 		    "device_type", "isa");
2549 		(void) ndi_prop_update_string(DDI_DEV_T_NONE, isa_dip,
2550 		    "bus-type", "isa");
2551 		(void) ndi_devi_bind_driver(isa_dip, 0);
2552 	}
2553 
2554 	/*
2555 	 * Read in the properties from the boot.
2556 	 */
2557 	get_boot_properties();
2558 
2559 	/* not framebuffer should be enumerated, if present */
2560 	get_vga_properties();
2561 
2562 	/*
2563 	 * Check for administratively disabled drivers.
2564 	 */
2565 	check_driver_disable();
2566 
2567 #if !defined(__xpv)
2568 	if (!post_fastreboot)
2569 		startup_bios_disk();
2570 #endif
2571 	/* do bus dependent probes. */
2572 	impl_bus_initialprobe();
2573 }
2574 
2575 dev_t
2576 getrootdev(void)
2577 {
2578 	/*
2579 	 * Precedence given to rootdev if set in /etc/system
2580 	 */
2581 	if (root_is_svm == B_TRUE) {
2582 		return (ddi_pathname_to_dev_t(svm_bootpath));
2583 	}
2584 
2585 	/*
2586 	 * Usually rootfs.bo_name is initialized by the
2587 	 * the bootpath property from bootenv.rc, but
2588 	 * defaults to "/ramdisk:a" otherwise.
2589 	 */
2590 	return (ddi_pathname_to_dev_t(rootfs.bo_name));
2591 }
2592 
2593 static struct bus_probe {
2594 	struct bus_probe *next;
2595 	void (*probe)(int);
2596 } *bus_probes;
2597 
2598 void
2599 impl_bus_add_probe(void (*func)(int))
2600 {
2601 	struct bus_probe *probe;
2602 	struct bus_probe *lastprobe = NULL;
2603 
2604 	probe = kmem_alloc(sizeof (*probe), KM_SLEEP);
2605 	probe->probe = func;
2606 	probe->next = NULL;
2607 
2608 	if (!bus_probes) {
2609 		bus_probes = probe;
2610 		return;
2611 	}
2612 
2613 	lastprobe = bus_probes;
2614 	while (lastprobe->next)
2615 		lastprobe = lastprobe->next;
2616 	lastprobe->next = probe;
2617 }
2618 
2619 /*ARGSUSED*/
2620 void
2621 impl_bus_delete_probe(void (*func)(int))
2622 {
2623 	struct bus_probe *prev = NULL;
2624 	struct bus_probe *probe = bus_probes;
2625 
2626 	while (probe) {
2627 		if (probe->probe == func)
2628 			break;
2629 		prev = probe;
2630 		probe = probe->next;
2631 	}
2632 
2633 	if (probe == NULL)
2634 		return;
2635 
2636 	if (prev)
2637 		prev->next = probe->next;
2638 	else
2639 		bus_probes = probe->next;
2640 
2641 	kmem_free(probe, sizeof (struct bus_probe));
2642 }
2643 
2644 /*
2645  * impl_bus_initialprobe
2646  *	Modload the prom simulator, then let it probe to verify existence
2647  *	and type of PCI support.
2648  */
2649 static void
2650 impl_bus_initialprobe(void)
2651 {
2652 	struct bus_probe *probe;
2653 
2654 	/* load modules to install bus probes */
2655 #if defined(__xpv)
2656 	if (DOMAIN_IS_INITDOMAIN(xen_info)) {
2657 		if (modload("misc", "pci_autoconfig") < 0) {
2658 			panic("failed to load misc/pci_autoconfig");
2659 		}
2660 
2661 		if (modload("drv", "isa") < 0)
2662 			panic("failed to load drv/isa");
2663 	}
2664 
2665 	(void) modload("misc", "xpv_autoconfig");
2666 #else
2667 	if (modload("misc", "pci_autoconfig") < 0) {
2668 		panic("failed to load misc/pci_autoconfig");
2669 	}
2670 
2671 	(void) modload("misc", "acpidev");
2672 
2673 	if (modload("drv", "isa") < 0)
2674 		panic("failed to load drv/isa");
2675 #endif
2676 
2677 	probe = bus_probes;
2678 	while (probe) {
2679 		/* run the probe functions */
2680 		(*probe->probe)(0);
2681 		probe = probe->next;
2682 	}
2683 }
2684 
2685 /*
2686  * impl_bus_reprobe
2687  *	Reprogram devices not set up by firmware.
2688  */
2689 static void
2690 impl_bus_reprobe(void)
2691 {
2692 	struct bus_probe *probe;
2693 
2694 	probe = bus_probes;
2695 	while (probe) {
2696 		/* run the probe function */
2697 		(*probe->probe)(1);
2698 		probe = probe->next;
2699 	}
2700 }
2701 
2702 
2703 /*
2704  * The following functions ready a cautious request to go up to the nexus
2705  * driver.  It is up to the nexus driver to decide how to process the request.
2706  * It may choose to call i_ddi_do_caut_get/put in this file, or do it
2707  * differently.
2708  */
2709 
2710 static void
2711 i_ddi_caut_getput_ctlops(ddi_acc_impl_t *hp, uint64_t host_addr,
2712     uint64_t dev_addr, size_t size, size_t repcount, uint_t flags,
2713     ddi_ctl_enum_t cmd)
2714 {
2715 	peekpoke_ctlops_t	cautacc_ctlops_arg;
2716 
2717 	cautacc_ctlops_arg.size = size;
2718 	cautacc_ctlops_arg.dev_addr = dev_addr;
2719 	cautacc_ctlops_arg.host_addr = host_addr;
2720 	cautacc_ctlops_arg.handle = (ddi_acc_handle_t)hp;
2721 	cautacc_ctlops_arg.repcount = repcount;
2722 	cautacc_ctlops_arg.flags = flags;
2723 
2724 	(void) ddi_ctlops(hp->ahi_common.ah_dip, hp->ahi_common.ah_dip, cmd,
2725 	    &cautacc_ctlops_arg, NULL);
2726 }
2727 
2728 uint8_t
2729 i_ddi_caut_get8(ddi_acc_impl_t *hp, uint8_t *addr)
2730 {
2731 	uint8_t value;
2732 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)&value, (uintptr_t)addr,
2733 	    sizeof (uint8_t), 1, 0, DDI_CTLOPS_PEEK);
2734 
2735 	return (value);
2736 }
2737 
2738 uint16_t
2739 i_ddi_caut_get16(ddi_acc_impl_t *hp, uint16_t *addr)
2740 {
2741 	uint16_t value;
2742 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)&value, (uintptr_t)addr,
2743 	    sizeof (uint16_t), 1, 0, DDI_CTLOPS_PEEK);
2744 
2745 	return (value);
2746 }
2747 
2748 uint32_t
2749 i_ddi_caut_get32(ddi_acc_impl_t *hp, uint32_t *addr)
2750 {
2751 	uint32_t value;
2752 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)&value, (uintptr_t)addr,
2753 	    sizeof (uint32_t), 1, 0, DDI_CTLOPS_PEEK);
2754 
2755 	return (value);
2756 }
2757 
2758 uint64_t
2759 i_ddi_caut_get64(ddi_acc_impl_t *hp, uint64_t *addr)
2760 {
2761 	uint64_t value;
2762 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)&value, (uintptr_t)addr,
2763 	    sizeof (uint64_t), 1, 0, DDI_CTLOPS_PEEK);
2764 
2765 	return (value);
2766 }
2767 
2768 void
2769 i_ddi_caut_put8(ddi_acc_impl_t *hp, uint8_t *addr, uint8_t value)
2770 {
2771 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)&value, (uintptr_t)addr,
2772 	    sizeof (uint8_t), 1, 0, DDI_CTLOPS_POKE);
2773 }
2774 
2775 void
2776 i_ddi_caut_put16(ddi_acc_impl_t *hp, uint16_t *addr, uint16_t value)
2777 {
2778 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)&value, (uintptr_t)addr,
2779 	    sizeof (uint16_t), 1, 0, DDI_CTLOPS_POKE);
2780 }
2781 
2782 void
2783 i_ddi_caut_put32(ddi_acc_impl_t *hp, uint32_t *addr, uint32_t value)
2784 {
2785 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)&value, (uintptr_t)addr,
2786 	    sizeof (uint32_t), 1, 0, DDI_CTLOPS_POKE);
2787 }
2788 
2789 void
2790 i_ddi_caut_put64(ddi_acc_impl_t *hp, uint64_t *addr, uint64_t value)
2791 {
2792 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)&value, (uintptr_t)addr,
2793 	    sizeof (uint64_t), 1, 0, DDI_CTLOPS_POKE);
2794 }
2795 
2796 void
2797 i_ddi_caut_rep_get8(ddi_acc_impl_t *hp, uint8_t *host_addr, uint8_t *dev_addr,
2798 	size_t repcount, uint_t flags)
2799 {
2800 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)host_addr, (uintptr_t)dev_addr,
2801 	    sizeof (uint8_t), repcount, flags, DDI_CTLOPS_PEEK);
2802 }
2803 
2804 void
2805 i_ddi_caut_rep_get16(ddi_acc_impl_t *hp, uint16_t *host_addr,
2806     uint16_t *dev_addr, size_t repcount, uint_t flags)
2807 {
2808 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)host_addr, (uintptr_t)dev_addr,
2809 	    sizeof (uint16_t), repcount, flags, DDI_CTLOPS_PEEK);
2810 }
2811 
2812 void
2813 i_ddi_caut_rep_get32(ddi_acc_impl_t *hp, uint32_t *host_addr,
2814     uint32_t *dev_addr, size_t repcount, uint_t flags)
2815 {
2816 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)host_addr, (uintptr_t)dev_addr,
2817 	    sizeof (uint32_t), repcount, flags, DDI_CTLOPS_PEEK);
2818 }
2819 
2820 void
2821 i_ddi_caut_rep_get64(ddi_acc_impl_t *hp, uint64_t *host_addr,
2822     uint64_t *dev_addr, size_t repcount, uint_t flags)
2823 {
2824 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)host_addr, (uintptr_t)dev_addr,
2825 	    sizeof (uint64_t), repcount, flags, DDI_CTLOPS_PEEK);
2826 }
2827 
2828 void
2829 i_ddi_caut_rep_put8(ddi_acc_impl_t *hp, uint8_t *host_addr, uint8_t *dev_addr,
2830 	size_t repcount, uint_t flags)
2831 {
2832 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)host_addr, (uintptr_t)dev_addr,
2833 	    sizeof (uint8_t), repcount, flags, DDI_CTLOPS_POKE);
2834 }
2835 
2836 void
2837 i_ddi_caut_rep_put16(ddi_acc_impl_t *hp, uint16_t *host_addr,
2838     uint16_t *dev_addr, size_t repcount, uint_t flags)
2839 {
2840 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)host_addr, (uintptr_t)dev_addr,
2841 	    sizeof (uint16_t), repcount, flags, DDI_CTLOPS_POKE);
2842 }
2843 
2844 void
2845 i_ddi_caut_rep_put32(ddi_acc_impl_t *hp, uint32_t *host_addr,
2846     uint32_t *dev_addr, size_t repcount, uint_t flags)
2847 {
2848 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)host_addr, (uintptr_t)dev_addr,
2849 	    sizeof (uint32_t), repcount, flags, DDI_CTLOPS_POKE);
2850 }
2851 
2852 void
2853 i_ddi_caut_rep_put64(ddi_acc_impl_t *hp, uint64_t *host_addr,
2854     uint64_t *dev_addr, size_t repcount, uint_t flags)
2855 {
2856 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)host_addr, (uintptr_t)dev_addr,
2857 	    sizeof (uint64_t), repcount, flags, DDI_CTLOPS_POKE);
2858 }
2859 
2860 boolean_t
2861 i_ddi_copybuf_required(ddi_dma_attr_t *attrp)
2862 {
2863 	uint64_t hi_pa;
2864 
2865 	hi_pa = ((uint64_t)physmax + 1ull) << PAGESHIFT;
2866 	if (attrp->dma_attr_addr_hi < hi_pa) {
2867 		return (B_TRUE);
2868 	}
2869 
2870 	return (B_FALSE);
2871 }
2872 
2873 size_t
2874 i_ddi_copybuf_size()
2875 {
2876 	return (dma_max_copybuf_size);
2877 }
2878 
2879 /*
2880  * i_ddi_dma_max()
2881  *    returns the maximum DMA size which can be performed in a single DMA
2882  *    window taking into account the devices DMA contraints (attrp), the
2883  *    maximum copy buffer size (if applicable), and the worse case buffer
2884  *    fragmentation.
2885  */
2886 /*ARGSUSED*/
2887 uint32_t
2888 i_ddi_dma_max(dev_info_t *dip, ddi_dma_attr_t *attrp)
2889 {
2890 	uint64_t maxxfer;
2891 
2892 
2893 	/*
2894 	 * take the min of maxxfer and the the worse case fragementation
2895 	 * (e.g. every cookie <= 1 page)
2896 	 */
2897 	maxxfer = MIN(attrp->dma_attr_maxxfer,
2898 	    ((uint64_t)(attrp->dma_attr_sgllen - 1) << PAGESHIFT));
2899 
2900 	/*
2901 	 * If the DMA engine can't reach all off memory, we also need to take
2902 	 * the max size of the copybuf into consideration.
2903 	 */
2904 	if (i_ddi_copybuf_required(attrp)) {
2905 		maxxfer = MIN(i_ddi_copybuf_size(), maxxfer);
2906 	}
2907 
2908 	/*
2909 	 * we only return a 32-bit value. Make sure it's not -1. Round to a
2910 	 * page so it won't be mistaken for an error value during debug.
2911 	 */
2912 	if (maxxfer >= 0xFFFFFFFF) {
2913 		maxxfer = 0xFFFFF000;
2914 	}
2915 
2916 	/*
2917 	 * make sure the value we return is a whole multiple of the
2918 	 * granlarity.
2919 	 */
2920 	if (attrp->dma_attr_granular > 1) {
2921 		maxxfer = maxxfer - (maxxfer % attrp->dma_attr_granular);
2922 	}
2923 
2924 	return ((uint32_t)maxxfer);
2925 }
2926 
2927 pfn_t
2928 i_ddi_paddr_to_pfn(paddr_t paddr)
2929 {
2930 	pfn_t pfn;
2931 
2932 #ifdef __xpv
2933 	if (DOMAIN_IS_INITDOMAIN(xen_info)) {
2934 		pfn = xen_assign_pfn(mmu_btop(paddr));
2935 	} else {
2936 		pfn = mmu_btop(paddr);
2937 	}
2938 #else
2939 	pfn = mmu_btop(paddr);
2940 #endif
2941 
2942 	return (pfn);
2943 }
2944