xref: /illumos-gate/usr/src/uts/i86pc/os/ddi_impl.c (revision 46b592853d0f4f11781b6b0a7533f267c6aee132)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  * PC specific DDI implementation
29  */
30 #include <sys/types.h>
31 #include <sys/autoconf.h>
32 #include <sys/avintr.h>
33 #include <sys/bootconf.h>
34 #include <sys/conf.h>
35 #include <sys/cpuvar.h>
36 #include <sys/ddi_impldefs.h>
37 #include <sys/ddi_subrdefs.h>
38 #include <sys/ethernet.h>
39 #include <sys/fp.h>
40 #include <sys/instance.h>
41 #include <sys/kmem.h>
42 #include <sys/machsystm.h>
43 #include <sys/modctl.h>
44 #include <sys/promif.h>
45 #include <sys/prom_plat.h>
46 #include <sys/sunndi.h>
47 #include <sys/ndi_impldefs.h>
48 #include <sys/ddi_impldefs.h>
49 #include <sys/sysmacros.h>
50 #include <sys/systeminfo.h>
51 #include <sys/utsname.h>
52 #include <sys/atomic.h>
53 #include <sys/spl.h>
54 #include <sys/archsystm.h>
55 #include <vm/seg_kmem.h>
56 #include <sys/ontrap.h>
57 #include <sys/fm/protocol.h>
58 #include <sys/ramdisk.h>
59 #include <sys/sunndi.h>
60 #include <sys/vmem.h>
61 #include <sys/pci_impl.h>
62 #if defined(__xpv)
63 #include <sys/hypervisor.h>
64 #endif
65 #include <sys/mach_intr.h>
66 #include <vm/hat_i86.h>
67 #include <sys/x86_archext.h>
68 
69 /*
70  * DDI Boot Configuration
71  */
72 
73 /*
74  * Platform drivers on this platform
75  */
76 char *platform_module_list[] = {
77 	"acpippm",
78 	"ppm",
79 	(char *)0
80 };
81 
82 /* pci bus resource maps */
83 struct pci_bus_resource *pci_bus_res;
84 
85 size_t dma_max_copybuf_size = 0x101000;		/* 1M + 4K */
86 
87 uint64_t ramdisk_start, ramdisk_end;
88 
89 int pseudo_isa = 0;
90 
91 /*
92  * Forward declarations
93  */
94 static int getlongprop_buf();
95 static void get_boot_properties(void);
96 static void impl_bus_initialprobe(void);
97 static void impl_bus_reprobe(void);
98 
99 static int poke_mem(peekpoke_ctlops_t *in_args);
100 static int peek_mem(peekpoke_ctlops_t *in_args);
101 
102 static int kmem_override_cache_attrs(caddr_t, size_t, uint_t);
103 
104 #define	CTGENTRIES	15
105 
106 static struct ctgas {
107 	struct ctgas	*ctg_next;
108 	int		ctg_index;
109 	void		*ctg_addr[CTGENTRIES];
110 	size_t		ctg_size[CTGENTRIES];
111 } ctglist;
112 
113 static kmutex_t		ctgmutex;
114 #define	CTGLOCK()	mutex_enter(&ctgmutex)
115 #define	CTGUNLOCK()	mutex_exit(&ctgmutex)
116 
117 /*
118  * Minimum pfn value of page_t's put on the free list.  This is to simplify
119  * support of ddi dma memory requests which specify small, non-zero addr_lo
120  * values.
121  *
122  * The default value of 2, which corresponds to the only known non-zero addr_lo
123  * value used, means a single page will be sacrificed (pfn typically starts
124  * at 1).  ddiphysmin can be set to 0 to disable. It cannot be set above 0x100
125  * otherwise mp startup panics.
126  */
127 pfn_t	ddiphysmin = 2;
128 
129 static void
130 check_driver_disable(void)
131 {
132 	int proplen = 128;
133 	char *prop_name;
134 	char *drv_name, *propval;
135 	major_t major;
136 
137 	prop_name = kmem_alloc(proplen, KM_SLEEP);
138 	for (major = 0; major < devcnt; major++) {
139 		drv_name = ddi_major_to_name(major);
140 		if (drv_name == NULL)
141 			continue;
142 		(void) snprintf(prop_name, proplen, "disable-%s", drv_name);
143 		if (ddi_prop_lookup_string(DDI_DEV_T_ANY, ddi_root_node(),
144 		    DDI_PROP_DONTPASS, prop_name, &propval) == DDI_SUCCESS) {
145 			if (strcmp(propval, "true") == 0) {
146 				devnamesp[major].dn_flags |= DN_DRIVER_REMOVED;
147 				cmn_err(CE_NOTE, "driver %s disabled",
148 				    drv_name);
149 			}
150 			ddi_prop_free(propval);
151 		}
152 	}
153 	kmem_free(prop_name, proplen);
154 }
155 
156 
157 /*
158  * Configure the hardware on the system.
159  * Called before the rootfs is mounted
160  */
161 void
162 configure(void)
163 {
164 	extern void i_ddi_init_root();
165 
166 #if defined(__i386)
167 	extern int fpu_pentium_fdivbug;
168 #endif	/* __i386 */
169 	extern int fpu_ignored;
170 
171 	/*
172 	 * Determine if an FPU is attached
173 	 */
174 
175 	fpu_probe();
176 
177 #if defined(__i386)
178 	if (fpu_pentium_fdivbug) {
179 		printf("\
180 FP hardware exhibits Pentium floating point divide problem\n");
181 	}
182 #endif	/* __i386 */
183 
184 	if (fpu_ignored) {
185 		printf("FP hardware will not be used\n");
186 	} else if (!fpu_exists) {
187 		printf("No FPU in configuration\n");
188 	}
189 
190 	/*
191 	 * Initialize devices on the machine.
192 	 * Uses configuration tree built by the PROMs to determine what
193 	 * is present, and builds a tree of prototype dev_info nodes
194 	 * corresponding to the hardware which identified itself.
195 	 */
196 #if !defined(SAS) && !defined(MPSAS)
197 	/*
198 	 * Check for disabled drivers and initialize root node.
199 	 */
200 	check_driver_disable();
201 	i_ddi_init_root();
202 
203 	/* reprogram devices not set up by firmware (BIOS) */
204 	impl_bus_reprobe();
205 
206 	/*
207 	 * attach the isa nexus to get ACPI resource usage
208 	 * isa is "kind of" a pseudo node
209 	 */
210 #if defined(__xpv)
211 	if (DOMAIN_IS_INITDOMAIN(xen_info)) {
212 		if (pseudo_isa)
213 			(void) i_ddi_attach_pseudo_node("isa");
214 		else
215 			(void) i_ddi_attach_hw_nodes("isa");
216 	}
217 #else
218 	if (pseudo_isa)
219 		(void) i_ddi_attach_pseudo_node("isa");
220 	else
221 		(void) i_ddi_attach_hw_nodes("isa");
222 #endif
223 
224 #endif	/* !SAS && !MPSAS */
225 }
226 
227 /*
228  * The "status" property indicates the operational status of a device.
229  * If this property is present, the value is a string indicating the
230  * status of the device as follows:
231  *
232  *	"okay"		operational.
233  *	"disabled"	not operational, but might become operational.
234  *	"fail"		not operational because a fault has been detected,
235  *			and it is unlikely that the device will become
236  *			operational without repair. no additional details
237  *			are available.
238  *	"fail-xxx"	not operational because a fault has been detected,
239  *			and it is unlikely that the device will become
240  *			operational without repair. "xxx" is additional
241  *			human-readable information about the particular
242  *			fault condition that was detected.
243  *
244  * The absence of this property means that the operational status is
245  * unknown or okay.
246  *
247  * This routine checks the status property of the specified device node
248  * and returns 0 if the operational status indicates failure, and 1 otherwise.
249  *
250  * The property may exist on plug-in cards the existed before IEEE 1275-1994.
251  * And, in that case, the property may not even be a string. So we carefully
252  * check for the value "fail", in the beginning of the string, noting
253  * the property length.
254  */
255 int
256 status_okay(int id, char *buf, int buflen)
257 {
258 	char status_buf[OBP_MAXPROPNAME];
259 	char *bufp = buf;
260 	int len = buflen;
261 	int proplen;
262 	static const char *status = "status";
263 	static const char *fail = "fail";
264 	int fail_len = (int)strlen(fail);
265 
266 	/*
267 	 * Get the proplen ... if it's smaller than "fail",
268 	 * or doesn't exist ... then we don't care, since
269 	 * the value can't begin with the char string "fail".
270 	 *
271 	 * NB: proplen, if it's a string, includes the NULL in the
272 	 * the size of the property, and fail_len does not.
273 	 */
274 	proplen = prom_getproplen((pnode_t)id, (caddr_t)status);
275 	if (proplen <= fail_len)	/* nonexistant or uninteresting len */
276 		return (1);
277 
278 	/*
279 	 * if a buffer was provided, use it
280 	 */
281 	if ((buf == (char *)NULL) || (buflen <= 0)) {
282 		bufp = status_buf;
283 		len = sizeof (status_buf);
284 	}
285 	*bufp = (char)0;
286 
287 	/*
288 	 * Get the property into the buffer, to the extent of the buffer,
289 	 * and in case the buffer is smaller than the property size,
290 	 * NULL terminate the buffer. (This handles the case where
291 	 * a buffer was passed in and the caller wants to print the
292 	 * value, but the buffer was too small).
293 	 */
294 	(void) prom_bounded_getprop((pnode_t)id, (caddr_t)status,
295 	    (caddr_t)bufp, len);
296 	*(bufp + len - 1) = (char)0;
297 
298 	/*
299 	 * If the value begins with the char string "fail",
300 	 * then it means the node is failed. We don't care
301 	 * about any other values. We assume the node is ok
302 	 * although it might be 'disabled'.
303 	 */
304 	if (strncmp(bufp, fail, fail_len) == 0)
305 		return (0);
306 
307 	return (1);
308 }
309 
310 /*
311  * Check the status of the device node passed as an argument.
312  *
313  *	if ((status is OKAY) || (status is DISABLED))
314  *		return DDI_SUCCESS
315  *	else
316  *		print a warning and return DDI_FAILURE
317  */
318 /*ARGSUSED1*/
319 int
320 check_status(int id, char *name, dev_info_t *parent)
321 {
322 	char status_buf[64];
323 	char devtype_buf[OBP_MAXPROPNAME];
324 	int retval = DDI_FAILURE;
325 
326 	/*
327 	 * is the status okay?
328 	 */
329 	if (status_okay(id, status_buf, sizeof (status_buf)))
330 		return (DDI_SUCCESS);
331 
332 	/*
333 	 * a status property indicating bad memory will be associated
334 	 * with a node which has a "device_type" property with a value of
335 	 * "memory-controller". in this situation, return DDI_SUCCESS
336 	 */
337 	if (getlongprop_buf(id, OBP_DEVICETYPE, devtype_buf,
338 	    sizeof (devtype_buf)) > 0) {
339 		if (strcmp(devtype_buf, "memory-controller") == 0)
340 			retval = DDI_SUCCESS;
341 	}
342 
343 	/*
344 	 * print the status property information
345 	 */
346 	cmn_err(CE_WARN, "status '%s' for '%s'", status_buf, name);
347 	return (retval);
348 }
349 
350 /*ARGSUSED*/
351 uint_t
352 softlevel1(caddr_t arg1, caddr_t arg2)
353 {
354 	softint();
355 	return (1);
356 }
357 
358 /*
359  * Allow for implementation specific correction of PROM property values.
360  */
361 
362 /*ARGSUSED*/
363 void
364 impl_fix_props(dev_info_t *dip, dev_info_t *ch_dip, char *name, int len,
365     caddr_t buffer)
366 {
367 	/*
368 	 * There are no adjustments needed in this implementation.
369 	 */
370 }
371 
372 static int
373 getlongprop_buf(int id, char *name, char *buf, int maxlen)
374 {
375 	int size;
376 
377 	size = prom_getproplen((pnode_t)id, name);
378 	if (size <= 0 || (size > maxlen - 1))
379 		return (-1);
380 
381 	if (-1 == prom_getprop((pnode_t)id, name, buf))
382 		return (-1);
383 
384 	if (strcmp("name", name) == 0) {
385 		if (buf[size - 1] != '\0') {
386 			buf[size] = '\0';
387 			size += 1;
388 		}
389 	}
390 
391 	return (size);
392 }
393 
394 static int
395 get_prop_int_array(dev_info_t *di, char *pname, int **pval, uint_t *plen)
396 {
397 	int ret;
398 
399 	if ((ret = ddi_prop_lookup_int_array(DDI_DEV_T_ANY, di,
400 	    DDI_PROP_DONTPASS, pname, pval, plen))
401 	    == DDI_PROP_SUCCESS) {
402 		*plen = (*plen) * (sizeof (int));
403 	}
404 	return (ret);
405 }
406 
407 
408 /*
409  * Node Configuration
410  */
411 
412 struct prop_ispec {
413 	uint_t	pri, vec;
414 };
415 
416 /*
417  * For the x86, we're prepared to claim that the interrupt string
418  * is in the form of a list of <ipl,vec> specifications.
419  */
420 
421 #define	VEC_MIN	1
422 #define	VEC_MAX	255
423 
424 static int
425 impl_xlate_intrs(dev_info_t *child, int *in,
426     struct ddi_parent_private_data *pdptr)
427 {
428 	size_t size;
429 	int n;
430 	struct intrspec *new;
431 	caddr_t got_prop;
432 	int *inpri;
433 	int got_len;
434 	extern int ignore_hardware_nodes;	/* force flag from ddi_impl.c */
435 
436 	static char bad_intr_fmt[] =
437 	    "bad interrupt spec from %s%d - ipl %d, irq %d\n";
438 
439 	/*
440 	 * determine if the driver is expecting the new style "interrupts"
441 	 * property which just contains the IRQ, or the old style which
442 	 * contains pairs of <IPL,IRQ>.  if it is the new style, we always
443 	 * assign IPL 5 unless an "interrupt-priorities" property exists.
444 	 * in that case, the "interrupt-priorities" property contains the
445 	 * IPL values that match, one for one, the IRQ values in the
446 	 * "interrupts" property.
447 	 */
448 	inpri = NULL;
449 	if ((ddi_getprop(DDI_DEV_T_ANY, child, DDI_PROP_DONTPASS,
450 	    "ignore-hardware-nodes", -1) != -1) || ignore_hardware_nodes) {
451 		/* the old style "interrupts" property... */
452 
453 		/*
454 		 * The list consists of <ipl,vec> elements
455 		 */
456 		if ((n = (*in++ >> 1)) < 1)
457 			return (DDI_FAILURE);
458 
459 		pdptr->par_nintr = n;
460 		size = n * sizeof (struct intrspec);
461 		new = pdptr->par_intr = kmem_zalloc(size, KM_SLEEP);
462 
463 		while (n--) {
464 			int level = *in++;
465 			int vec = *in++;
466 
467 			if (level < 1 || level > MAXIPL ||
468 			    vec < VEC_MIN || vec > VEC_MAX) {
469 				cmn_err(CE_CONT, bad_intr_fmt,
470 				    DEVI(child)->devi_name,
471 				    DEVI(child)->devi_instance, level, vec);
472 				goto broken;
473 			}
474 			new->intrspec_pri = level;
475 			if (vec != 2)
476 				new->intrspec_vec = vec;
477 			else
478 				/*
479 				 * irq 2 on the PC bus is tied to irq 9
480 				 * on ISA, EISA and MicroChannel
481 				 */
482 				new->intrspec_vec = 9;
483 			new++;
484 		}
485 
486 		return (DDI_SUCCESS);
487 	} else {
488 		/* the new style "interrupts" property... */
489 
490 		/*
491 		 * The list consists of <vec> elements
492 		 */
493 		if ((n = (*in++)) < 1)
494 			return (DDI_FAILURE);
495 
496 		pdptr->par_nintr = n;
497 		size = n * sizeof (struct intrspec);
498 		new = pdptr->par_intr = kmem_zalloc(size, KM_SLEEP);
499 
500 		/* XXX check for "interrupt-priorities" property... */
501 		if (ddi_getlongprop(DDI_DEV_T_ANY, child, DDI_PROP_DONTPASS,
502 		    "interrupt-priorities", (caddr_t)&got_prop, &got_len)
503 		    == DDI_PROP_SUCCESS) {
504 			if (n != (got_len / sizeof (int))) {
505 				cmn_err(CE_CONT,
506 				    "bad interrupt-priorities length"
507 				    " from %s%d: expected %d, got %d\n",
508 				    DEVI(child)->devi_name,
509 				    DEVI(child)->devi_instance, n,
510 				    (int)(got_len / sizeof (int)));
511 				goto broken;
512 			}
513 			inpri = (int *)got_prop;
514 		}
515 
516 		while (n--) {
517 			int level;
518 			int vec = *in++;
519 
520 			if (inpri == NULL)
521 				level = 5;
522 			else
523 				level = *inpri++;
524 
525 			if (level < 1 || level > MAXIPL ||
526 			    vec < VEC_MIN || vec > VEC_MAX) {
527 				cmn_err(CE_CONT, bad_intr_fmt,
528 				    DEVI(child)->devi_name,
529 				    DEVI(child)->devi_instance, level, vec);
530 				goto broken;
531 			}
532 			new->intrspec_pri = level;
533 			if (vec != 2)
534 				new->intrspec_vec = vec;
535 			else
536 				/*
537 				 * irq 2 on the PC bus is tied to irq 9
538 				 * on ISA, EISA and MicroChannel
539 				 */
540 				new->intrspec_vec = 9;
541 			new++;
542 		}
543 
544 		if (inpri != NULL)
545 			kmem_free(got_prop, got_len);
546 		return (DDI_SUCCESS);
547 	}
548 
549 broken:
550 	kmem_free(pdptr->par_intr, size);
551 	pdptr->par_intr = NULL;
552 	pdptr->par_nintr = 0;
553 	if (inpri != NULL)
554 		kmem_free(got_prop, got_len);
555 
556 	return (DDI_FAILURE);
557 }
558 
559 /*
560  * Create a ddi_parent_private_data structure from the ddi properties of
561  * the dev_info node.
562  *
563  * The "reg" and either an "intr" or "interrupts" properties are required
564  * if the driver wishes to create mappings or field interrupts on behalf
565  * of the device.
566  *
567  * The "reg" property is assumed to be a list of at least one triple
568  *
569  *	<bustype, address, size>*1
570  *
571  * The "intr" property is assumed to be a list of at least one duple
572  *
573  *	<SPARC ipl, vector#>*1
574  *
575  * The "interrupts" property is assumed to be a list of at least one
576  * n-tuples that describes the interrupt capabilities of the bus the device
577  * is connected to.  For SBus, this looks like
578  *
579  *	<SBus-level>*1
580  *
581  * (This property obsoletes the 'intr' property).
582  *
583  * The "ranges" property is optional.
584  */
585 void
586 make_ddi_ppd(dev_info_t *child, struct ddi_parent_private_data **ppd)
587 {
588 	struct ddi_parent_private_data *pdptr;
589 	int n;
590 	int *reg_prop, *rng_prop, *intr_prop, *irupts_prop;
591 	uint_t reg_len, rng_len, intr_len, irupts_len;
592 
593 	*ppd = pdptr = kmem_zalloc(sizeof (*pdptr), KM_SLEEP);
594 
595 	/*
596 	 * Handle the 'reg' property.
597 	 */
598 	if ((get_prop_int_array(child, "reg", &reg_prop, &reg_len) ==
599 	    DDI_PROP_SUCCESS) && (reg_len != 0)) {
600 		pdptr->par_nreg = reg_len / (int)sizeof (struct regspec);
601 		pdptr->par_reg = (struct regspec *)reg_prop;
602 	}
603 
604 	/*
605 	 * See if I have a range (adding one where needed - this
606 	 * means to add one for sbus node in sun4c, when romvec > 0,
607 	 * if no range is already defined in the PROM node.
608 	 * (Currently no sun4c PROMS define range properties,
609 	 * but they should and may in the future.)  For the SBus
610 	 * node, the range is defined by the SBus reg property.
611 	 */
612 	if (get_prop_int_array(child, "ranges", &rng_prop, &rng_len)
613 	    == DDI_PROP_SUCCESS) {
614 		pdptr->par_nrng = rng_len / (int)(sizeof (struct rangespec));
615 		pdptr->par_rng = (struct rangespec *)rng_prop;
616 	}
617 
618 	/*
619 	 * Handle the 'intr' and 'interrupts' properties
620 	 */
621 
622 	/*
623 	 * For backwards compatibility
624 	 * we first look for the 'intr' property for the device.
625 	 */
626 	if (get_prop_int_array(child, "intr", &intr_prop, &intr_len)
627 	    != DDI_PROP_SUCCESS) {
628 		intr_len = 0;
629 	}
630 
631 	/*
632 	 * If we're to support bus adapters and future platforms cleanly,
633 	 * we need to support the generalized 'interrupts' property.
634 	 */
635 	if (get_prop_int_array(child, "interrupts", &irupts_prop,
636 	    &irupts_len) != DDI_PROP_SUCCESS) {
637 		irupts_len = 0;
638 	} else if (intr_len != 0) {
639 		/*
640 		 * If both 'intr' and 'interrupts' are defined,
641 		 * then 'interrupts' wins and we toss the 'intr' away.
642 		 */
643 		ddi_prop_free((void *)intr_prop);
644 		intr_len = 0;
645 	}
646 
647 	if (intr_len != 0) {
648 
649 		/*
650 		 * Translate the 'intr' property into an array
651 		 * an array of struct intrspec's.  There's not really
652 		 * very much to do here except copy what's out there.
653 		 */
654 
655 		struct intrspec *new;
656 		struct prop_ispec *l;
657 
658 		n = pdptr->par_nintr = intr_len / sizeof (struct prop_ispec);
659 		l = (struct prop_ispec *)intr_prop;
660 		pdptr->par_intr =
661 		    new = kmem_zalloc(n * sizeof (struct intrspec), KM_SLEEP);
662 		while (n--) {
663 			new->intrspec_pri = l->pri;
664 			new->intrspec_vec = l->vec;
665 			new++;
666 			l++;
667 		}
668 		ddi_prop_free((void *)intr_prop);
669 
670 	} else if ((n = irupts_len) != 0) {
671 		size_t size;
672 		int *out;
673 
674 		/*
675 		 * Translate the 'interrupts' property into an array
676 		 * of intrspecs for the rest of the DDI framework to
677 		 * toy with.  Only our ancestors really know how to
678 		 * do this, so ask 'em.  We massage the 'interrupts'
679 		 * property so that it is pre-pended by a count of
680 		 * the number of integers in the argument.
681 		 */
682 		size = sizeof (int) + n;
683 		out = kmem_alloc(size, KM_SLEEP);
684 		*out = n / sizeof (int);
685 		bcopy(irupts_prop, out + 1, (size_t)n);
686 		ddi_prop_free((void *)irupts_prop);
687 		if (impl_xlate_intrs(child, out, pdptr) != DDI_SUCCESS) {
688 			cmn_err(CE_CONT,
689 			    "Unable to translate 'interrupts' for %s%d\n",
690 			    DEVI(child)->devi_binding_name,
691 			    DEVI(child)->devi_instance);
692 		}
693 		kmem_free(out, size);
694 	}
695 }
696 
697 /*
698  * Name a child
699  */
700 static int
701 impl_sunbus_name_child(dev_info_t *child, char *name, int namelen)
702 {
703 	/*
704 	 * Fill in parent-private data and this function returns to us
705 	 * an indication if it used "registers" to fill in the data.
706 	 */
707 	if (ddi_get_parent_data(child) == NULL) {
708 		struct ddi_parent_private_data *pdptr;
709 		make_ddi_ppd(child, &pdptr);
710 		ddi_set_parent_data(child, pdptr);
711 	}
712 
713 	name[0] = '\0';
714 	if (sparc_pd_getnreg(child) > 0) {
715 		(void) snprintf(name, namelen, "%x,%x",
716 		    (uint_t)sparc_pd_getreg(child, 0)->regspec_bustype,
717 		    (uint_t)sparc_pd_getreg(child, 0)->regspec_addr);
718 	}
719 
720 	return (DDI_SUCCESS);
721 }
722 
723 /*
724  * Called from the bus_ctl op of sunbus (sbus, obio, etc) nexus drivers
725  * to implement the DDI_CTLOPS_INITCHILD operation.  That is, it names
726  * the children of sun busses based on the reg spec.
727  *
728  * Handles the following properties (in make_ddi_ppd):
729  *	Property		value
730  *	  Name			type
731  *	reg		register spec
732  *	intr		old-form interrupt spec
733  *	interrupts	new (bus-oriented) interrupt spec
734  *	ranges		range spec
735  */
736 int
737 impl_ddi_sunbus_initchild(dev_info_t *child)
738 {
739 	char name[MAXNAMELEN];
740 	void impl_ddi_sunbus_removechild(dev_info_t *);
741 
742 	/*
743 	 * Name the child, also makes parent private data
744 	 */
745 	(void) impl_sunbus_name_child(child, name, MAXNAMELEN);
746 	ddi_set_name_addr(child, name);
747 
748 	/*
749 	 * Attempt to merge a .conf node; if successful, remove the
750 	 * .conf node.
751 	 */
752 	if ((ndi_dev_is_persistent_node(child) == 0) &&
753 	    (ndi_merge_node(child, impl_sunbus_name_child) == DDI_SUCCESS)) {
754 		/*
755 		 * Return failure to remove node
756 		 */
757 		impl_ddi_sunbus_removechild(child);
758 		return (DDI_FAILURE);
759 	}
760 	return (DDI_SUCCESS);
761 }
762 
763 void
764 impl_free_ddi_ppd(dev_info_t *dip)
765 {
766 	struct ddi_parent_private_data *pdptr;
767 	size_t n;
768 
769 	if ((pdptr = ddi_get_parent_data(dip)) == NULL)
770 		return;
771 
772 	if ((n = (size_t)pdptr->par_nintr) != 0)
773 		/*
774 		 * Note that kmem_free is used here (instead of
775 		 * ddi_prop_free) because the contents of the
776 		 * property were placed into a separate buffer and
777 		 * mucked with a bit before being stored in par_intr.
778 		 * The actual return value from the prop lookup
779 		 * was freed with ddi_prop_free previously.
780 		 */
781 		kmem_free(pdptr->par_intr, n * sizeof (struct intrspec));
782 
783 	if ((n = (size_t)pdptr->par_nrng) != 0)
784 		ddi_prop_free((void *)pdptr->par_rng);
785 
786 	if ((n = pdptr->par_nreg) != 0)
787 		ddi_prop_free((void *)pdptr->par_reg);
788 
789 	kmem_free(pdptr, sizeof (*pdptr));
790 	ddi_set_parent_data(dip, NULL);
791 }
792 
793 void
794 impl_ddi_sunbus_removechild(dev_info_t *dip)
795 {
796 	impl_free_ddi_ppd(dip);
797 	ddi_set_name_addr(dip, NULL);
798 	/*
799 	 * Strip the node to properly convert it back to prototype form
800 	 */
801 	impl_rem_dev_props(dip);
802 }
803 
804 /*
805  * DDI Interrupt
806  */
807 
808 /*
809  * turn this on to force isa, eisa, and mca device to ignore the new
810  * hardware nodes in the device tree (normally turned on only for
811  * drivers that need it by setting the property "ignore-hardware-nodes"
812  * in their driver.conf file).
813  *
814  * 7/31/96 -- Turned off globally.  Leaving variable in for the moment
815  *		as safety valve.
816  */
817 int ignore_hardware_nodes = 0;
818 
819 /*
820  * Local data
821  */
822 static struct impl_bus_promops *impl_busp;
823 
824 
825 /*
826  * New DDI interrupt framework
827  */
828 
829 /*
830  * i_ddi_intr_ops:
831  *
832  * This is the interrupt operator function wrapper for the bus function
833  * bus_intr_op.
834  */
835 int
836 i_ddi_intr_ops(dev_info_t *dip, dev_info_t *rdip, ddi_intr_op_t op,
837     ddi_intr_handle_impl_t *hdlp, void * result)
838 {
839 	dev_info_t	*pdip = (dev_info_t *)DEVI(dip)->devi_parent;
840 	int		ret = DDI_FAILURE;
841 
842 	/* request parent to process this interrupt op */
843 	if (NEXUS_HAS_INTR_OP(pdip))
844 		ret = (*(DEVI(pdip)->devi_ops->devo_bus_ops->bus_intr_op))(
845 		    pdip, rdip, op, hdlp, result);
846 	else
847 		cmn_err(CE_WARN, "Failed to process interrupt "
848 		    "for %s%d due to down-rev nexus driver %s%d",
849 		    ddi_get_name(rdip), ddi_get_instance(rdip),
850 		    ddi_get_name(pdip), ddi_get_instance(pdip));
851 	return (ret);
852 }
853 
854 /*
855  * i_ddi_add_softint - allocate and add a soft interrupt to the system
856  */
857 int
858 i_ddi_add_softint(ddi_softint_hdl_impl_t *hdlp)
859 {
860 	int ret;
861 
862 	/* add soft interrupt handler */
863 	ret = add_avsoftintr((void *)hdlp, hdlp->ih_pri, hdlp->ih_cb_func,
864 	    DEVI(hdlp->ih_dip)->devi_name, hdlp->ih_cb_arg1, hdlp->ih_cb_arg2);
865 	return (ret ? DDI_SUCCESS : DDI_FAILURE);
866 }
867 
868 
869 void
870 i_ddi_remove_softint(ddi_softint_hdl_impl_t *hdlp)
871 {
872 	(void) rem_avsoftintr((void *)hdlp, hdlp->ih_pri, hdlp->ih_cb_func);
873 }
874 
875 
876 extern void (*setsoftint)(int, struct av_softinfo *);
877 extern boolean_t av_check_softint_pending(struct av_softinfo *, boolean_t);
878 
879 int
880 i_ddi_trigger_softint(ddi_softint_hdl_impl_t *hdlp, void *arg2)
881 {
882 	if (av_check_softint_pending(hdlp->ih_pending, B_FALSE))
883 		return (DDI_EPENDING);
884 
885 	update_avsoftintr_args((void *)hdlp, hdlp->ih_pri, arg2);
886 
887 	(*setsoftint)(hdlp->ih_pri, hdlp->ih_pending);
888 	return (DDI_SUCCESS);
889 }
890 
891 /*
892  * i_ddi_set_softint_pri:
893  *
894  * The way this works is that it first tries to add a softint vector
895  * at the new priority in hdlp. If that succeeds; then it removes the
896  * existing softint vector at the old priority.
897  */
898 int
899 i_ddi_set_softint_pri(ddi_softint_hdl_impl_t *hdlp, uint_t old_pri)
900 {
901 	int ret;
902 
903 	/*
904 	 * If a softint is pending at the old priority then fail the request.
905 	 */
906 	if (av_check_softint_pending(hdlp->ih_pending, B_TRUE))
907 		return (DDI_FAILURE);
908 
909 	ret = av_softint_movepri((void *)hdlp, old_pri);
910 	return (ret ? DDI_SUCCESS : DDI_FAILURE);
911 }
912 
913 void
914 i_ddi_alloc_intr_phdl(ddi_intr_handle_impl_t *hdlp)
915 {
916 	hdlp->ih_private = (void *)kmem_zalloc(sizeof (ihdl_plat_t), KM_SLEEP);
917 }
918 
919 void
920 i_ddi_free_intr_phdl(ddi_intr_handle_impl_t *hdlp)
921 {
922 	kmem_free(hdlp->ih_private, sizeof (ihdl_plat_t));
923 	hdlp->ih_private = NULL;
924 }
925 
926 int
927 i_ddi_get_intx_nintrs(dev_info_t *dip)
928 {
929 	struct ddi_parent_private_data *pdp;
930 
931 	if ((pdp = ddi_get_parent_data(dip)) == NULL)
932 		return (0);
933 
934 	return (pdp->par_nintr);
935 }
936 
937 /*
938  * DDI Memory/DMA
939  */
940 
941 /*
942  * Support for allocating DMAable memory to implement
943  * ddi_dma_mem_alloc(9F) interface.
944  */
945 
946 #define	KA_ALIGN_SHIFT	7
947 #define	KA_ALIGN	(1 << KA_ALIGN_SHIFT)
948 #define	KA_NCACHE	(PAGESHIFT + 1 - KA_ALIGN_SHIFT)
949 
950 /*
951  * Dummy DMA attribute template for kmem_io[].kmem_io_attr.  We only
952  * care about addr_lo, addr_hi, and align.  addr_hi will be dynamically set.
953  */
954 
955 static ddi_dma_attr_t kmem_io_attr = {
956 	DMA_ATTR_V0,
957 	0x0000000000000000ULL,		/* dma_attr_addr_lo */
958 	0x0000000000000000ULL,		/* dma_attr_addr_hi */
959 	0x00ffffff,
960 	0x1000,				/* dma_attr_align */
961 	1, 1, 0xffffffffULL, 0xffffffffULL, 0x1, 1, 0
962 };
963 
964 /* kmem io memory ranges and indices */
965 enum {
966 	IO_4P, IO_64G, IO_4G, IO_2G, IO_1G, IO_512M,
967 	IO_256M, IO_128M, IO_64M, IO_32M, IO_16M, MAX_MEM_RANGES
968 };
969 
970 static struct {
971 	vmem_t		*kmem_io_arena;
972 	kmem_cache_t	*kmem_io_cache[KA_NCACHE];
973 	ddi_dma_attr_t	kmem_io_attr;
974 } kmem_io[MAX_MEM_RANGES];
975 
976 static int kmem_io_idx;		/* index of first populated kmem_io[] */
977 
978 static page_t *
979 page_create_io_wrapper(void *addr, size_t len, int vmflag, void *arg)
980 {
981 	extern page_t *page_create_io(vnode_t *, u_offset_t, uint_t,
982 	    uint_t, struct as *, caddr_t, ddi_dma_attr_t *);
983 
984 	return (page_create_io(&kvp, (u_offset_t)(uintptr_t)addr, len,
985 	    PG_EXCL | ((vmflag & VM_NOSLEEP) ? 0 : PG_WAIT), &kas, addr, arg));
986 }
987 
988 #ifdef __xpv
989 static void
990 segkmem_free_io(vmem_t *vmp, void * ptr, size_t size)
991 {
992 	extern void page_destroy_io(page_t *);
993 	segkmem_xfree(vmp, ptr, size, page_destroy_io);
994 }
995 #endif
996 
997 static void *
998 segkmem_alloc_io_4P(vmem_t *vmp, size_t size, int vmflag)
999 {
1000 	return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
1001 	    page_create_io_wrapper, &kmem_io[IO_4P].kmem_io_attr));
1002 }
1003 
1004 static void *
1005 segkmem_alloc_io_64G(vmem_t *vmp, size_t size, int vmflag)
1006 {
1007 	return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
1008 	    page_create_io_wrapper, &kmem_io[IO_64G].kmem_io_attr));
1009 }
1010 
1011 static void *
1012 segkmem_alloc_io_4G(vmem_t *vmp, size_t size, int vmflag)
1013 {
1014 	return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
1015 	    page_create_io_wrapper, &kmem_io[IO_4G].kmem_io_attr));
1016 }
1017 
1018 static void *
1019 segkmem_alloc_io_2G(vmem_t *vmp, size_t size, int vmflag)
1020 {
1021 	return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
1022 	    page_create_io_wrapper, &kmem_io[IO_2G].kmem_io_attr));
1023 }
1024 
1025 static void *
1026 segkmem_alloc_io_1G(vmem_t *vmp, size_t size, int vmflag)
1027 {
1028 	return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
1029 	    page_create_io_wrapper, &kmem_io[IO_1G].kmem_io_attr));
1030 }
1031 
1032 static void *
1033 segkmem_alloc_io_512M(vmem_t *vmp, size_t size, int vmflag)
1034 {
1035 	return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
1036 	    page_create_io_wrapper, &kmem_io[IO_512M].kmem_io_attr));
1037 }
1038 
1039 static void *
1040 segkmem_alloc_io_256M(vmem_t *vmp, size_t size, int vmflag)
1041 {
1042 	return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
1043 	    page_create_io_wrapper, &kmem_io[IO_256M].kmem_io_attr));
1044 }
1045 
1046 static void *
1047 segkmem_alloc_io_128M(vmem_t *vmp, size_t size, int vmflag)
1048 {
1049 	return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
1050 	    page_create_io_wrapper, &kmem_io[IO_128M].kmem_io_attr));
1051 }
1052 
1053 static void *
1054 segkmem_alloc_io_64M(vmem_t *vmp, size_t size, int vmflag)
1055 {
1056 	return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
1057 	    page_create_io_wrapper, &kmem_io[IO_64M].kmem_io_attr));
1058 }
1059 
1060 static void *
1061 segkmem_alloc_io_32M(vmem_t *vmp, size_t size, int vmflag)
1062 {
1063 	return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
1064 	    page_create_io_wrapper, &kmem_io[IO_32M].kmem_io_attr));
1065 }
1066 
1067 static void *
1068 segkmem_alloc_io_16M(vmem_t *vmp, size_t size, int vmflag)
1069 {
1070 	return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
1071 	    page_create_io_wrapper, &kmem_io[IO_16M].kmem_io_attr));
1072 }
1073 
1074 struct {
1075 	uint64_t	io_limit;
1076 	char		*io_name;
1077 	void		*(*io_alloc)(vmem_t *, size_t, int);
1078 	int		io_initial;	/* kmem_io_init during startup */
1079 } io_arena_params[MAX_MEM_RANGES] = {
1080 	{0x000fffffffffffffULL,	"kmem_io_4P",	segkmem_alloc_io_4P,	1},
1081 	{0x0000000fffffffffULL,	"kmem_io_64G",	segkmem_alloc_io_64G,	0},
1082 	{0x00000000ffffffffULL,	"kmem_io_4G",	segkmem_alloc_io_4G,	1},
1083 	{0x000000007fffffffULL,	"kmem_io_2G",	segkmem_alloc_io_2G,	1},
1084 	{0x000000003fffffffULL,	"kmem_io_1G",	segkmem_alloc_io_1G,	0},
1085 	{0x000000001fffffffULL,	"kmem_io_512M",	segkmem_alloc_io_512M,	0},
1086 	{0x000000000fffffffULL,	"kmem_io_256M",	segkmem_alloc_io_256M,	0},
1087 	{0x0000000007ffffffULL,	"kmem_io_128M",	segkmem_alloc_io_128M,	0},
1088 	{0x0000000003ffffffULL,	"kmem_io_64M",	segkmem_alloc_io_64M,	0},
1089 	{0x0000000001ffffffULL,	"kmem_io_32M",	segkmem_alloc_io_32M,	0},
1090 	{0x0000000000ffffffULL,	"kmem_io_16M",	segkmem_alloc_io_16M,	1}
1091 };
1092 
1093 void
1094 kmem_io_init(int a)
1095 {
1096 	int	c;
1097 	char name[40];
1098 
1099 	kmem_io[a].kmem_io_arena = vmem_create(io_arena_params[a].io_name,
1100 	    NULL, 0, PAGESIZE, io_arena_params[a].io_alloc,
1101 #ifdef __xpv
1102 	    segkmem_free_io,
1103 #else
1104 	    segkmem_free,
1105 #endif
1106 	    heap_arena, 0, VM_SLEEP);
1107 
1108 	for (c = 0; c < KA_NCACHE; c++) {
1109 		size_t size = KA_ALIGN << c;
1110 		(void) sprintf(name, "%s_%lu",
1111 		    io_arena_params[a].io_name, size);
1112 		kmem_io[a].kmem_io_cache[c] = kmem_cache_create(name,
1113 		    size, size, NULL, NULL, NULL, NULL,
1114 		    kmem_io[a].kmem_io_arena, 0);
1115 	}
1116 }
1117 
1118 /*
1119  * Return the index of the highest memory range for addr.
1120  */
1121 static int
1122 kmem_io_index(uint64_t addr)
1123 {
1124 	int n;
1125 
1126 	for (n = kmem_io_idx; n < MAX_MEM_RANGES; n++) {
1127 		if (kmem_io[n].kmem_io_attr.dma_attr_addr_hi <= addr) {
1128 			if (kmem_io[n].kmem_io_arena == NULL)
1129 				kmem_io_init(n);
1130 			return (n);
1131 		}
1132 	}
1133 	panic("kmem_io_index: invalid addr - must be at least 16m");
1134 
1135 	/*NOTREACHED*/
1136 }
1137 
1138 /*
1139  * Return the index of the next kmem_io populated memory range
1140  * after curindex.
1141  */
1142 static int
1143 kmem_io_index_next(int curindex)
1144 {
1145 	int n;
1146 
1147 	for (n = curindex + 1; n < MAX_MEM_RANGES; n++) {
1148 		if (kmem_io[n].kmem_io_arena)
1149 			return (n);
1150 	}
1151 	return (-1);
1152 }
1153 
1154 /*
1155  * allow kmem to be mapped in with different PTE cache attribute settings.
1156  * Used by i_ddi_mem_alloc()
1157  */
1158 int
1159 kmem_override_cache_attrs(caddr_t kva, size_t size, uint_t order)
1160 {
1161 	uint_t hat_flags;
1162 	caddr_t kva_end;
1163 	uint_t hat_attr;
1164 	pfn_t pfn;
1165 
1166 	if (hat_getattr(kas.a_hat, kva, &hat_attr) == -1) {
1167 		return (-1);
1168 	}
1169 
1170 	hat_attr &= ~HAT_ORDER_MASK;
1171 	hat_attr |= order | HAT_NOSYNC;
1172 	hat_flags = HAT_LOAD_LOCK;
1173 
1174 	kva_end = (caddr_t)(((uintptr_t)kva + size + PAGEOFFSET) &
1175 	    (uintptr_t)PAGEMASK);
1176 	kva = (caddr_t)((uintptr_t)kva & (uintptr_t)PAGEMASK);
1177 
1178 	while (kva < kva_end) {
1179 		pfn = hat_getpfnum(kas.a_hat, kva);
1180 		hat_unload(kas.a_hat, kva, PAGESIZE, HAT_UNLOAD_UNLOCK);
1181 		hat_devload(kas.a_hat, kva, PAGESIZE, pfn, hat_attr, hat_flags);
1182 		kva += MMU_PAGESIZE;
1183 	}
1184 
1185 	return (0);
1186 }
1187 
1188 void
1189 ka_init(void)
1190 {
1191 	int a;
1192 	paddr_t maxphysaddr;
1193 #if !defined(__xpv)
1194 	extern pfn_t physmax;
1195 
1196 	maxphysaddr = mmu_ptob((paddr_t)physmax) + MMU_PAGEOFFSET;
1197 #else
1198 	maxphysaddr = mmu_ptob((paddr_t)HYPERVISOR_memory_op(
1199 	    XENMEM_maximum_ram_page, NULL)) + MMU_PAGEOFFSET;
1200 #endif
1201 
1202 	ASSERT(maxphysaddr <= io_arena_params[0].io_limit);
1203 
1204 	for (a = 0; a < MAX_MEM_RANGES; a++) {
1205 		if (maxphysaddr >= io_arena_params[a + 1].io_limit) {
1206 			if (maxphysaddr > io_arena_params[a + 1].io_limit)
1207 				io_arena_params[a].io_limit = maxphysaddr;
1208 			else
1209 				a++;
1210 			break;
1211 		}
1212 	}
1213 	kmem_io_idx = a;
1214 
1215 	for (; a < MAX_MEM_RANGES; a++) {
1216 		kmem_io[a].kmem_io_attr = kmem_io_attr;
1217 		kmem_io[a].kmem_io_attr.dma_attr_addr_hi =
1218 		    io_arena_params[a].io_limit;
1219 		/*
1220 		 * initialize kmem_io[] arena/cache corresponding to
1221 		 * maxphysaddr and to the "common" io memory ranges that
1222 		 * have io_initial set to a non-zero value.
1223 		 */
1224 		if (io_arena_params[a].io_initial || a == kmem_io_idx)
1225 			kmem_io_init(a);
1226 	}
1227 }
1228 
1229 /*
1230  * put contig address/size
1231  */
1232 static void *
1233 putctgas(void *addr, size_t size)
1234 {
1235 	struct ctgas	*ctgp = &ctglist;
1236 	int		i;
1237 
1238 	CTGLOCK();
1239 	do {
1240 		if ((i = ctgp->ctg_index) < CTGENTRIES) {
1241 			ctgp->ctg_addr[i] = addr;
1242 			ctgp->ctg_size[i] = size;
1243 			ctgp->ctg_index++;
1244 			break;
1245 		}
1246 		if (!ctgp->ctg_next)
1247 			ctgp->ctg_next = kmem_zalloc(sizeof (struct ctgas),
1248 			    KM_NOSLEEP);
1249 		ctgp = ctgp->ctg_next;
1250 	} while (ctgp);
1251 
1252 	CTGUNLOCK();
1253 	return (ctgp);
1254 }
1255 
1256 /*
1257  * get contig size by addr
1258  */
1259 static size_t
1260 getctgsz(void *addr)
1261 {
1262 	struct ctgas	*ctgp = &ctglist;
1263 	int		i, j;
1264 	size_t		sz;
1265 
1266 	ASSERT(addr);
1267 	CTGLOCK();
1268 
1269 	while (ctgp) {
1270 		for (i = 0; i < ctgp->ctg_index; i++) {
1271 			if (addr != ctgp->ctg_addr[i])
1272 				continue;
1273 
1274 			sz = ctgp->ctg_size[i];
1275 			j = --ctgp->ctg_index;
1276 			if (i != j) {
1277 				ctgp->ctg_size[i] = ctgp->ctg_size[j];
1278 				ctgp->ctg_addr[i] = ctgp->ctg_addr[j];
1279 			}
1280 			CTGUNLOCK();
1281 			return (sz);
1282 		}
1283 		ctgp = ctgp->ctg_next;
1284 	}
1285 
1286 	CTGUNLOCK();
1287 	return (0);
1288 }
1289 
1290 /*
1291  * contig_alloc:
1292  *
1293  *	allocates contiguous memory to satisfy the 'size' and dma attributes
1294  *	specified in 'attr'.
1295  *
1296  *	Not all of memory need to be physically contiguous if the
1297  *	scatter-gather list length is greater than 1.
1298  */
1299 
1300 /*ARGSUSED*/
1301 void *
1302 contig_alloc(size_t size, ddi_dma_attr_t *attr, uintptr_t align, int cansleep)
1303 {
1304 	pgcnt_t		pgcnt = btopr(size);
1305 	size_t		asize = pgcnt * PAGESIZE;
1306 	page_t		*ppl;
1307 	int		pflag;
1308 	void		*addr;
1309 
1310 	extern page_t *page_create_io(vnode_t *, u_offset_t, uint_t,
1311 	    uint_t, struct as *, caddr_t, ddi_dma_attr_t *);
1312 
1313 	/* segkmem_xalloc */
1314 
1315 	if (align <= PAGESIZE)
1316 		addr = vmem_alloc(heap_arena, asize,
1317 		    (cansleep) ? VM_SLEEP : VM_NOSLEEP);
1318 	else
1319 		addr = vmem_xalloc(heap_arena, asize, align, 0, 0, NULL, NULL,
1320 		    (cansleep) ? VM_SLEEP : VM_NOSLEEP);
1321 	if (addr) {
1322 		ASSERT(!((uintptr_t)addr & (align - 1)));
1323 
1324 		if (page_resv(pgcnt, (cansleep) ? KM_SLEEP : KM_NOSLEEP) == 0) {
1325 			vmem_free(heap_arena, addr, asize);
1326 			return (NULL);
1327 		}
1328 		pflag = PG_EXCL;
1329 
1330 		if (cansleep)
1331 			pflag |= PG_WAIT;
1332 
1333 		/* 4k req gets from freelists rather than pfn search */
1334 		if (pgcnt > 1 || align > PAGESIZE)
1335 			pflag |= PG_PHYSCONTIG;
1336 
1337 		ppl = page_create_io(&kvp, (u_offset_t)(uintptr_t)addr,
1338 		    asize, pflag, &kas, (caddr_t)addr, attr);
1339 
1340 		if (!ppl) {
1341 			vmem_free(heap_arena, addr, asize);
1342 			page_unresv(pgcnt);
1343 			return (NULL);
1344 		}
1345 
1346 		while (ppl != NULL) {
1347 			page_t	*pp = ppl;
1348 			page_sub(&ppl, pp);
1349 			ASSERT(page_iolock_assert(pp));
1350 			page_io_unlock(pp);
1351 			page_downgrade(pp);
1352 			hat_memload(kas.a_hat, (caddr_t)(uintptr_t)pp->p_offset,
1353 			    pp, (PROT_ALL & ~PROT_USER) |
1354 			    HAT_NOSYNC, HAT_LOAD_LOCK);
1355 		}
1356 	}
1357 	return (addr);
1358 }
1359 
1360 void
1361 contig_free(void *addr, size_t size)
1362 {
1363 	pgcnt_t	pgcnt = btopr(size);
1364 	size_t	asize = pgcnt * PAGESIZE;
1365 	caddr_t	a, ea;
1366 	page_t	*pp;
1367 
1368 	hat_unload(kas.a_hat, addr, asize, HAT_UNLOAD_UNLOCK);
1369 
1370 	for (a = addr, ea = a + asize; a < ea; a += PAGESIZE) {
1371 		pp = page_find(&kvp, (u_offset_t)(uintptr_t)a);
1372 		if (!pp)
1373 			panic("contig_free: contig pp not found");
1374 
1375 		if (!page_tryupgrade(pp)) {
1376 			page_unlock(pp);
1377 			pp = page_lookup(&kvp,
1378 			    (u_offset_t)(uintptr_t)a, SE_EXCL);
1379 			if (pp == NULL)
1380 				panic("contig_free: page freed");
1381 		}
1382 		page_destroy(pp, 0);
1383 	}
1384 
1385 	page_unresv(pgcnt);
1386 	vmem_free(heap_arena, addr, asize);
1387 }
1388 
1389 /*
1390  * Allocate from the system, aligned on a specific boundary.
1391  * The alignment, if non-zero, must be a power of 2.
1392  */
1393 static void *
1394 kalloca(size_t size, size_t align, int cansleep, int physcontig,
1395 	ddi_dma_attr_t *attr)
1396 {
1397 	size_t *addr, *raddr, rsize;
1398 	size_t hdrsize = 4 * sizeof (size_t);	/* must be power of 2 */
1399 	int a, i, c;
1400 	vmem_t *vmp;
1401 	kmem_cache_t *cp = NULL;
1402 
1403 	if (attr->dma_attr_addr_lo > mmu_ptob((uint64_t)ddiphysmin))
1404 		return (NULL);
1405 
1406 	align = MAX(align, hdrsize);
1407 	ASSERT((align & (align - 1)) == 0);
1408 
1409 	/*
1410 	 * All of our allocators guarantee 16-byte alignment, so we don't
1411 	 * need to reserve additional space for the header.
1412 	 * To simplify picking the correct kmem_io_cache, we round up to
1413 	 * a multiple of KA_ALIGN.
1414 	 */
1415 	rsize = P2ROUNDUP_TYPED(size + align, KA_ALIGN, size_t);
1416 
1417 	if (physcontig && rsize > PAGESIZE) {
1418 		if (addr = contig_alloc(size, attr, align, cansleep)) {
1419 			if (!putctgas(addr, size))
1420 				contig_free(addr, size);
1421 			else
1422 				return (addr);
1423 		}
1424 		return (NULL);
1425 	}
1426 
1427 	a = kmem_io_index(attr->dma_attr_addr_hi);
1428 
1429 	if (rsize > PAGESIZE) {
1430 		vmp = kmem_io[a].kmem_io_arena;
1431 		raddr = vmem_alloc(vmp, rsize,
1432 		    (cansleep) ? VM_SLEEP : VM_NOSLEEP);
1433 	} else {
1434 		c = highbit((rsize >> KA_ALIGN_SHIFT) - 1);
1435 		cp = kmem_io[a].kmem_io_cache[c];
1436 		raddr = kmem_cache_alloc(cp, (cansleep) ? KM_SLEEP :
1437 		    KM_NOSLEEP);
1438 	}
1439 
1440 	if (raddr == NULL) {
1441 		int	na;
1442 
1443 		ASSERT(cansleep == 0);
1444 		if (rsize > PAGESIZE)
1445 			return (NULL);
1446 		/*
1447 		 * System does not have memory in the requested range.
1448 		 * Try smaller kmem io ranges and larger cache sizes
1449 		 * to see if there might be memory available in
1450 		 * these other caches.
1451 		 */
1452 
1453 		for (na = kmem_io_index_next(a); na >= 0;
1454 		    na = kmem_io_index_next(na)) {
1455 			ASSERT(kmem_io[na].kmem_io_arena);
1456 			cp = kmem_io[na].kmem_io_cache[c];
1457 			raddr = kmem_cache_alloc(cp, KM_NOSLEEP);
1458 			if (raddr)
1459 				goto kallocdone;
1460 		}
1461 		/* now try the larger kmem io cache sizes */
1462 		for (na = a; na >= 0; na = kmem_io_index_next(na)) {
1463 			for (i = c + 1; i < KA_NCACHE; i++) {
1464 				cp = kmem_io[na].kmem_io_cache[i];
1465 				raddr = kmem_cache_alloc(cp, KM_NOSLEEP);
1466 				if (raddr)
1467 					goto kallocdone;
1468 			}
1469 		}
1470 		return (NULL);
1471 	}
1472 
1473 kallocdone:
1474 	ASSERT(!P2BOUNDARY((uintptr_t)raddr, rsize, PAGESIZE) ||
1475 	    rsize > PAGESIZE);
1476 
1477 	addr = (size_t *)P2ROUNDUP((uintptr_t)raddr + hdrsize, align);
1478 	ASSERT((uintptr_t)addr + size - (uintptr_t)raddr <= rsize);
1479 
1480 	addr[-4] = (size_t)cp;
1481 	addr[-3] = (size_t)vmp;
1482 	addr[-2] = (size_t)raddr;
1483 	addr[-1] = rsize;
1484 
1485 	return (addr);
1486 }
1487 
1488 static void
1489 kfreea(void *addr)
1490 {
1491 	size_t		size;
1492 
1493 	if (!((uintptr_t)addr & PAGEOFFSET) && (size = getctgsz(addr))) {
1494 		contig_free(addr, size);
1495 	} else {
1496 		size_t	*saddr = addr;
1497 		if (saddr[-4] == 0)
1498 			vmem_free((vmem_t *)saddr[-3], (void *)saddr[-2],
1499 			    saddr[-1]);
1500 		else
1501 			kmem_cache_free((kmem_cache_t *)saddr[-4],
1502 			    (void *)saddr[-2]);
1503 	}
1504 }
1505 
1506 /*ARGSUSED*/
1507 void
1508 i_ddi_devacc_to_hatacc(ddi_device_acc_attr_t *devaccp, uint_t *hataccp)
1509 {
1510 }
1511 
1512 /*
1513  * Check if the specified cache attribute is supported on the platform.
1514  * This function must be called before i_ddi_cacheattr_to_hatacc().
1515  */
1516 boolean_t
1517 i_ddi_check_cache_attr(uint_t flags)
1518 {
1519 	/*
1520 	 * The cache attributes are mutually exclusive. Any combination of
1521 	 * the attributes leads to a failure.
1522 	 */
1523 	uint_t cache_attr = IOMEM_CACHE_ATTR(flags);
1524 	if ((cache_attr != 0) && ((cache_attr & (cache_attr - 1)) != 0))
1525 		return (B_FALSE);
1526 
1527 	/* All cache attributes are supported on X86/X64 */
1528 	if (cache_attr & (IOMEM_DATA_UNCACHED | IOMEM_DATA_CACHED |
1529 	    IOMEM_DATA_UC_WR_COMBINE))
1530 		return (B_TRUE);
1531 
1532 	/* undefined attributes */
1533 	return (B_FALSE);
1534 }
1535 
1536 /* set HAT cache attributes from the cache attributes */
1537 void
1538 i_ddi_cacheattr_to_hatacc(uint_t flags, uint_t *hataccp)
1539 {
1540 	uint_t cache_attr = IOMEM_CACHE_ATTR(flags);
1541 	static char *fname = "i_ddi_cacheattr_to_hatacc";
1542 
1543 	/*
1544 	 * If write-combining is not supported, then it falls back
1545 	 * to uncacheable.
1546 	 */
1547 	if (cache_attr == IOMEM_DATA_UC_WR_COMBINE && !(x86_feature & X86_PAT))
1548 		cache_attr = IOMEM_DATA_UNCACHED;
1549 
1550 	/*
1551 	 * set HAT attrs according to the cache attrs.
1552 	 */
1553 	switch (cache_attr) {
1554 	case IOMEM_DATA_UNCACHED:
1555 		*hataccp &= ~HAT_ORDER_MASK;
1556 		*hataccp |= (HAT_STRICTORDER | HAT_PLAT_NOCACHE);
1557 		break;
1558 	case IOMEM_DATA_UC_WR_COMBINE:
1559 		*hataccp &= ~HAT_ORDER_MASK;
1560 		*hataccp |= (HAT_MERGING_OK | HAT_PLAT_NOCACHE);
1561 		break;
1562 	case IOMEM_DATA_CACHED:
1563 		*hataccp &= ~HAT_ORDER_MASK;
1564 		*hataccp |= HAT_UNORDERED_OK;
1565 		break;
1566 	/*
1567 	 * This case must not occur because the cache attribute is scrutinized
1568 	 * before this function is called.
1569 	 */
1570 	default:
1571 		/*
1572 		 * set cacheable to hat attrs.
1573 		 */
1574 		*hataccp &= ~HAT_ORDER_MASK;
1575 		*hataccp |= HAT_UNORDERED_OK;
1576 		cmn_err(CE_WARN, "%s: cache_attr=0x%x is ignored.",
1577 		    fname, cache_attr);
1578 	}
1579 }
1580 
1581 /*
1582  * This should actually be called i_ddi_dma_mem_alloc. There should
1583  * also be an i_ddi_pio_mem_alloc. i_ddi_dma_mem_alloc should call
1584  * through the device tree with the DDI_CTLOPS_DMA_ALIGN ctl ops to
1585  * get alignment requirements for DMA memory. i_ddi_pio_mem_alloc
1586  * should use DDI_CTLOPS_PIO_ALIGN. Since we only have i_ddi_mem_alloc
1587  * so far which is used for both, DMA and PIO, we have to use the DMA
1588  * ctl ops to make everybody happy.
1589  */
1590 /*ARGSUSED*/
1591 int
1592 i_ddi_mem_alloc(dev_info_t *dip, ddi_dma_attr_t *attr,
1593 	size_t length, int cansleep, int flags,
1594 	ddi_device_acc_attr_t *accattrp, caddr_t *kaddrp,
1595 	size_t *real_length, ddi_acc_hdl_t *ap)
1596 {
1597 	caddr_t a;
1598 	int iomin;
1599 	ddi_acc_impl_t *iap;
1600 	int physcontig = 0;
1601 	pgcnt_t npages;
1602 	pgcnt_t minctg;
1603 	uint_t order;
1604 	int e;
1605 
1606 	/*
1607 	 * Check legality of arguments
1608 	 */
1609 	if (length == 0 || kaddrp == NULL || attr == NULL) {
1610 		return (DDI_FAILURE);
1611 	}
1612 
1613 	if (attr->dma_attr_minxfer == 0 || attr->dma_attr_align == 0 ||
1614 	    (attr->dma_attr_align & (attr->dma_attr_align - 1)) ||
1615 	    (attr->dma_attr_minxfer & (attr->dma_attr_minxfer - 1))) {
1616 			return (DDI_FAILURE);
1617 	}
1618 
1619 	/*
1620 	 * figure out most restrictive alignment requirement
1621 	 */
1622 	iomin = attr->dma_attr_minxfer;
1623 	iomin = maxbit(iomin, attr->dma_attr_align);
1624 	if (iomin == 0)
1625 		return (DDI_FAILURE);
1626 
1627 	ASSERT((iomin & (iomin - 1)) == 0);
1628 
1629 	/*
1630 	 * if we allocate memory with IOMEM_DATA_UNCACHED or
1631 	 * IOMEM_DATA_UC_WR_COMBINE, make sure we allocate a page aligned
1632 	 * memory that ends on a page boundry.
1633 	 * Don't want to have to different cache mappings to the same
1634 	 * physical page.
1635 	 */
1636 	if (OVERRIDE_CACHE_ATTR(flags)) {
1637 		iomin = (iomin + MMU_PAGEOFFSET) & MMU_PAGEMASK;
1638 		length = (length + MMU_PAGEOFFSET) & (size_t)MMU_PAGEMASK;
1639 	}
1640 
1641 	/*
1642 	 * Determine if we need to satisfy the request for physically
1643 	 * contiguous memory or alignments larger than pagesize.
1644 	 */
1645 	npages = btopr(length + attr->dma_attr_align);
1646 	minctg = howmany(npages, attr->dma_attr_sgllen);
1647 
1648 	if (minctg > 1) {
1649 		uint64_t pfnseg = attr->dma_attr_seg >> PAGESHIFT;
1650 		/*
1651 		 * verify that the minimum contig requirement for the
1652 		 * actual length does not cross segment boundary.
1653 		 */
1654 		length = P2ROUNDUP_TYPED(length, attr->dma_attr_minxfer,
1655 		    size_t);
1656 		npages = btopr(length);
1657 		minctg = howmany(npages, attr->dma_attr_sgllen);
1658 		if (minctg > pfnseg + 1)
1659 			return (DDI_FAILURE);
1660 		physcontig = 1;
1661 	} else {
1662 		length = P2ROUNDUP_TYPED(length, iomin, size_t);
1663 	}
1664 
1665 	/*
1666 	 * Allocate the requested amount from the system.
1667 	 */
1668 	a = kalloca(length, iomin, cansleep, physcontig, attr);
1669 
1670 	if ((*kaddrp = a) == NULL)
1671 		return (DDI_FAILURE);
1672 
1673 	/*
1674 	 * if we to modify the cache attributes, go back and muck with the
1675 	 * mappings.
1676 	 */
1677 	if (OVERRIDE_CACHE_ATTR(flags)) {
1678 		order = 0;
1679 		i_ddi_cacheattr_to_hatacc(flags, &order);
1680 		e = kmem_override_cache_attrs(a, length, order);
1681 		if (e != 0) {
1682 			kfreea(a);
1683 			return (DDI_FAILURE);
1684 		}
1685 	}
1686 
1687 	if (real_length) {
1688 		*real_length = length;
1689 	}
1690 	if (ap) {
1691 		/*
1692 		 * initialize access handle
1693 		 */
1694 		iap = (ddi_acc_impl_t *)ap->ah_platform_private;
1695 		iap->ahi_acc_attr |= DDI_ACCATTR_CPU_VADDR;
1696 		impl_acc_hdl_init(ap);
1697 	}
1698 
1699 	return (DDI_SUCCESS);
1700 }
1701 
1702 /*
1703  * covert old DMA limits structure to DMA attribute structure
1704  * and continue
1705  */
1706 int
1707 i_ddi_mem_alloc_lim(dev_info_t *dip, ddi_dma_lim_t *limits,
1708 	size_t length, int cansleep, int streaming,
1709 	ddi_device_acc_attr_t *accattrp, caddr_t *kaddrp,
1710 	uint_t *real_length, ddi_acc_hdl_t *ap)
1711 {
1712 	ddi_dma_attr_t dma_attr, *attrp;
1713 	size_t rlen;
1714 	int ret;
1715 
1716 	if (limits == NULL) {
1717 		return (DDI_FAILURE);
1718 	}
1719 
1720 	/*
1721 	 * set up DMA attribute structure to pass to i_ddi_mem_alloc()
1722 	 */
1723 	attrp = &dma_attr;
1724 	attrp->dma_attr_version = DMA_ATTR_V0;
1725 	attrp->dma_attr_addr_lo = (uint64_t)limits->dlim_addr_lo;
1726 	attrp->dma_attr_addr_hi = (uint64_t)limits->dlim_addr_hi;
1727 	attrp->dma_attr_count_max = (uint64_t)limits->dlim_ctreg_max;
1728 	attrp->dma_attr_align = 1;
1729 	attrp->dma_attr_burstsizes = (uint_t)limits->dlim_burstsizes;
1730 	attrp->dma_attr_minxfer = (uint32_t)limits->dlim_minxfer;
1731 	attrp->dma_attr_maxxfer = (uint64_t)limits->dlim_reqsize;
1732 	attrp->dma_attr_seg = (uint64_t)limits->dlim_adreg_max;
1733 	attrp->dma_attr_sgllen = limits->dlim_sgllen;
1734 	attrp->dma_attr_granular = (uint32_t)limits->dlim_granular;
1735 	attrp->dma_attr_flags = 0;
1736 
1737 	ret = i_ddi_mem_alloc(dip, attrp, length, cansleep, streaming,
1738 	    accattrp, kaddrp, &rlen, ap);
1739 	if (ret == DDI_SUCCESS) {
1740 		if (real_length)
1741 			*real_length = (uint_t)rlen;
1742 	}
1743 	return (ret);
1744 }
1745 
1746 /* ARGSUSED */
1747 void
1748 i_ddi_mem_free(caddr_t kaddr, ddi_acc_hdl_t *ap)
1749 {
1750 	if (ap != NULL) {
1751 		/*
1752 		 * if we modified the cache attributes on alloc, go back and
1753 		 * fix them since this memory could be returned to the
1754 		 * general pool.
1755 		 */
1756 		if (OVERRIDE_CACHE_ATTR(ap->ah_xfermodes)) {
1757 			uint_t order = 0;
1758 			int e;
1759 			i_ddi_cacheattr_to_hatacc(IOMEM_DATA_CACHED, &order);
1760 			e = kmem_override_cache_attrs(kaddr, ap->ah_len, order);
1761 			if (e != 0) {
1762 				cmn_err(CE_WARN, "i_ddi_mem_free() failed to "
1763 				    "override cache attrs, memory leaked\n");
1764 				return;
1765 			}
1766 		}
1767 	}
1768 	kfreea(kaddr);
1769 }
1770 
1771 /*
1772  * Access Barriers
1773  *
1774  */
1775 /*ARGSUSED*/
1776 int
1777 i_ddi_ontrap(ddi_acc_handle_t hp)
1778 {
1779 	return (DDI_FAILURE);
1780 }
1781 
1782 /*ARGSUSED*/
1783 void
1784 i_ddi_notrap(ddi_acc_handle_t hp)
1785 {
1786 }
1787 
1788 
1789 /*
1790  * Misc Functions
1791  */
1792 
1793 /*
1794  * Implementation instance override functions
1795  *
1796  * No override on i86pc
1797  */
1798 /*ARGSUSED*/
1799 uint_t
1800 impl_assign_instance(dev_info_t *dip)
1801 {
1802 	return ((uint_t)-1);
1803 }
1804 
1805 /*ARGSUSED*/
1806 int
1807 impl_keep_instance(dev_info_t *dip)
1808 {
1809 
1810 #if defined(__xpv)
1811 	/*
1812 	 * Do not persist instance numbers assigned to devices in dom0
1813 	 */
1814 	dev_info_t *pdip;
1815 	if (DOMAIN_IS_INITDOMAIN(xen_info)) {
1816 		if (((pdip = ddi_get_parent(dip)) != NULL) &&
1817 		    (strcmp(ddi_get_name(pdip), "xpvd") == 0))
1818 			return (DDI_SUCCESS);
1819 	}
1820 #endif
1821 	return (DDI_FAILURE);
1822 }
1823 
1824 /*ARGSUSED*/
1825 int
1826 impl_free_instance(dev_info_t *dip)
1827 {
1828 	return (DDI_FAILURE);
1829 }
1830 
1831 /*ARGSUSED*/
1832 int
1833 impl_check_cpu(dev_info_t *devi)
1834 {
1835 	return (DDI_SUCCESS);
1836 }
1837 
1838 /*
1839  * Referenced in common/cpr_driver.c: Power off machine.
1840  * Don't know how to power off i86pc.
1841  */
1842 void
1843 arch_power_down()
1844 {}
1845 
1846 /*
1847  * Copy name to property_name, since name
1848  * is in the low address range below kernelbase.
1849  */
1850 static void
1851 copy_boot_str(const char *boot_str, char *kern_str, int len)
1852 {
1853 	int i = 0;
1854 
1855 	while (i < len - 1 && boot_str[i] != '\0') {
1856 		kern_str[i] = boot_str[i];
1857 		i++;
1858 	}
1859 
1860 	kern_str[i] = 0;	/* null terminate */
1861 	if (boot_str[i] != '\0')
1862 		cmn_err(CE_WARN,
1863 		    "boot property string is truncated to %s", kern_str);
1864 }
1865 
1866 static void
1867 get_boot_properties(void)
1868 {
1869 	extern char hw_provider[];
1870 	dev_info_t *devi;
1871 	char *name;
1872 	int length;
1873 	char property_name[50], property_val[50];
1874 	void *bop_staging_area;
1875 
1876 	bop_staging_area = kmem_zalloc(MMU_PAGESIZE, KM_NOSLEEP);
1877 
1878 	/*
1879 	 * Import "root" properties from the boot.
1880 	 *
1881 	 * We do this by invoking BOP_NEXTPROP until the list
1882 	 * is completely copied in.
1883 	 */
1884 
1885 	devi = ddi_root_node();
1886 	for (name = BOP_NEXTPROP(bootops, "");		/* get first */
1887 	    name;					/* NULL => DONE */
1888 	    name = BOP_NEXTPROP(bootops, name)) {	/* get next */
1889 
1890 		/* copy string to memory above kernelbase */
1891 		copy_boot_str(name, property_name, 50);
1892 
1893 		/*
1894 		 * Skip vga properties. They will be picked up later
1895 		 * by get_vga_properties.
1896 		 */
1897 		if (strcmp(property_name, "display-edif-block") == 0 ||
1898 		    strcmp(property_name, "display-edif-id") == 0) {
1899 			continue;
1900 		}
1901 
1902 		length = BOP_GETPROPLEN(bootops, property_name);
1903 		if (length == 0)
1904 			continue;
1905 		if (length > MMU_PAGESIZE) {
1906 			cmn_err(CE_NOTE,
1907 			    "boot property %s longer than 0x%x, ignored\n",
1908 			    property_name, MMU_PAGESIZE);
1909 			continue;
1910 		}
1911 		BOP_GETPROP(bootops, property_name, bop_staging_area);
1912 
1913 		/*
1914 		 * special properties:
1915 		 * si-machine, si-hw-provider
1916 		 *	goes to kernel data structures.
1917 		 * bios-boot-device and stdout
1918 		 *	goes to hardware property list so it may show up
1919 		 *	in the prtconf -vp output. This is needed by
1920 		 *	Install/Upgrade. Once we fix install upgrade,
1921 		 *	this can be taken out.
1922 		 */
1923 		if (strcmp(name, "si-machine") == 0) {
1924 			(void) strncpy(utsname.machine, bop_staging_area,
1925 			    SYS_NMLN);
1926 			utsname.machine[SYS_NMLN - 1] = (char)NULL;
1927 		} else if (strcmp(name, "si-hw-provider") == 0) {
1928 			(void) strncpy(hw_provider, bop_staging_area, SYS_NMLN);
1929 			hw_provider[SYS_NMLN - 1] = (char)NULL;
1930 		} else if (strcmp(name, "bios-boot-device") == 0) {
1931 			copy_boot_str(bop_staging_area, property_val, 50);
1932 			(void) ndi_prop_update_string(DDI_DEV_T_NONE, devi,
1933 			    property_name, property_val);
1934 		} else if (strcmp(name, "stdout") == 0) {
1935 			(void) ndi_prop_update_int(DDI_DEV_T_NONE, devi,
1936 			    property_name, *((int *)bop_staging_area));
1937 		} else {
1938 			/* Property type unknown, use old prop interface */
1939 			(void) e_ddi_prop_create(DDI_DEV_T_NONE, devi,
1940 			    DDI_PROP_CANSLEEP, property_name, bop_staging_area,
1941 			    length);
1942 		}
1943 	}
1944 
1945 	kmem_free(bop_staging_area, MMU_PAGESIZE);
1946 }
1947 
1948 static void
1949 get_vga_properties(void)
1950 {
1951 	dev_info_t *devi;
1952 	major_t major;
1953 	char *name;
1954 	int length;
1955 	char property_val[50];
1956 	void *bop_staging_area;
1957 
1958 	/*
1959 	 * XXXX Hack Allert!
1960 	 * There really needs to be a better way for identifying various
1961 	 * console framebuffers and their related issues.  Till then,
1962 	 * check for this one as a replacement to vgatext.
1963 	 */
1964 	major = ddi_name_to_major("ragexl");
1965 	if (major == (major_t)-1) {
1966 		major = ddi_name_to_major("vgatext");
1967 		if (major == (major_t)-1)
1968 			return;
1969 	}
1970 	devi = devnamesp[major].dn_head;
1971 	if (devi == NULL)
1972 		return;
1973 
1974 	bop_staging_area = kmem_zalloc(MMU_PAGESIZE, KM_SLEEP);
1975 
1976 	/*
1977 	 * Import "vga" properties from the boot.
1978 	 */
1979 	name = "display-edif-block";
1980 	length = BOP_GETPROPLEN(bootops, name);
1981 	if (length > 0 && length < MMU_PAGESIZE) {
1982 		BOP_GETPROP(bootops, name, bop_staging_area);
1983 		(void) ndi_prop_update_byte_array(DDI_DEV_T_NONE,
1984 		    devi, name, bop_staging_area, length);
1985 	}
1986 
1987 	/*
1988 	 * kdmconfig is also looking for display-type and
1989 	 * video-adapter-type. We default to color and svga.
1990 	 *
1991 	 * Could it be "monochrome", "vga"?
1992 	 * Nah, you've got to come to the 21st century...
1993 	 * And you can set monitor type manually in kdmconfig
1994 	 * if you are really an old junky.
1995 	 */
1996 	(void) ndi_prop_update_string(DDI_DEV_T_NONE,
1997 	    devi, "display-type", "color");
1998 	(void) ndi_prop_update_string(DDI_DEV_T_NONE,
1999 	    devi, "video-adapter-type", "svga");
2000 
2001 	name = "display-edif-id";
2002 	length = BOP_GETPROPLEN(bootops, name);
2003 	if (length > 0 && length < MMU_PAGESIZE) {
2004 		BOP_GETPROP(bootops, name, bop_staging_area);
2005 		copy_boot_str(bop_staging_area, property_val, length);
2006 		(void) ndi_prop_update_string(DDI_DEV_T_NONE,
2007 		    devi, name, property_val);
2008 	}
2009 
2010 	kmem_free(bop_staging_area, MMU_PAGESIZE);
2011 }
2012 
2013 
2014 /*
2015  * This is temporary, but absolutely necessary.  If we are being
2016  * booted with a device tree created by the DevConf project's bootconf
2017  * program, then we have device information nodes that reflect
2018  * reality.  At this point in time in the Solaris release schedule, the
2019  * kernel drivers aren't prepared for reality.  They still depend on their
2020  * own ad-hoc interpretations of the properties created when their .conf
2021  * files were interpreted. These drivers use an "ignore-hardware-nodes"
2022  * property to prevent them from using the nodes passed up from the bootconf
2023  * device tree.
2024  *
2025  * Trying to assemble root file system drivers as we are booting from
2026  * devconf will fail if the kernel driver is basing its name_addr's on the
2027  * psuedo-node device info while the bootpath passed up from bootconf is using
2028  * reality-based name_addrs.  We help the boot along in this case by
2029  * looking at the pre-bootconf bootpath and determining if we would have
2030  * successfully matched if that had been the bootpath we had chosen.
2031  *
2032  * Note that we only even perform this extra check if we've booted
2033  * using bootconf's 1275 compliant bootpath, this is the boot device, and
2034  * we're trying to match the name_addr specified in the 1275 bootpath.
2035  */
2036 
2037 #define	MAXCOMPONENTLEN	32
2038 
2039 int
2040 x86_old_bootpath_name_addr_match(dev_info_t *cdip, char *caddr, char *naddr)
2041 {
2042 	/*
2043 	 *  There are multiple criteria to be met before we can even
2044 	 *  consider allowing a name_addr match here.
2045 	 *
2046 	 *  1) We must have been booted such that the bootconf program
2047 	 *	created device tree nodes and properties.  This can be
2048 	 *	determined by examining the 'bootpath' property.  This
2049 	 *	property will be a non-null string iff bootconf was
2050 	 *	involved in the boot.
2051 	 *
2052 	 *  2) The module that we want to match must be the boot device.
2053 	 *
2054 	 *  3) The instance of the module we are thinking of letting be
2055 	 *	our match must be ignoring hardware nodes.
2056 	 *
2057 	 *  4) The name_addr we want to match must be the name_addr
2058 	 *	specified in the 1275 bootpath.
2059 	 */
2060 	static char bootdev_module[MAXCOMPONENTLEN];
2061 	static char bootdev_oldmod[MAXCOMPONENTLEN];
2062 	static char bootdev_newaddr[MAXCOMPONENTLEN];
2063 	static char bootdev_oldaddr[MAXCOMPONENTLEN];
2064 	static int  quickexit;
2065 
2066 	char *daddr;
2067 	int dlen;
2068 
2069 	char	*lkupname;
2070 	int	rv = DDI_FAILURE;
2071 
2072 	if ((ddi_getlongprop(DDI_DEV_T_ANY, cdip, DDI_PROP_DONTPASS,
2073 	    "devconf-addr", (caddr_t)&daddr, &dlen) == DDI_PROP_SUCCESS) &&
2074 	    (ddi_getprop(DDI_DEV_T_ANY, cdip, DDI_PROP_DONTPASS,
2075 	    "ignore-hardware-nodes", -1) != -1)) {
2076 		if (strcmp(daddr, caddr) == 0) {
2077 			return (DDI_SUCCESS);
2078 		}
2079 	}
2080 
2081 	if (quickexit)
2082 		return (rv);
2083 
2084 	if (bootdev_module[0] == '\0') {
2085 		char *addrp, *eoaddrp;
2086 		char *busp, *modp, *atp;
2087 		char *bp1275, *bp;
2088 		int  bp1275len, bplen;
2089 
2090 		bp1275 = bp = addrp = eoaddrp = busp = modp = atp = NULL;
2091 
2092 		if (ddi_getlongprop(DDI_DEV_T_ANY,
2093 		    ddi_root_node(), 0, "bootpath",
2094 		    (caddr_t)&bp1275, &bp1275len) != DDI_PROP_SUCCESS ||
2095 		    bp1275len <= 1) {
2096 			/*
2097 			 * We didn't boot from bootconf so we never need to
2098 			 * do any special matches.
2099 			 */
2100 			quickexit = 1;
2101 			if (bp1275)
2102 				kmem_free(bp1275, bp1275len);
2103 			return (rv);
2104 		}
2105 
2106 		if (ddi_getlongprop(DDI_DEV_T_ANY,
2107 		    ddi_root_node(), 0, "boot-path",
2108 		    (caddr_t)&bp, &bplen) != DDI_PROP_SUCCESS || bplen <= 1) {
2109 			/*
2110 			 * No fallback position for matching. This is
2111 			 * certainly unexpected, but we'll handle it
2112 			 * just in case.
2113 			 */
2114 			quickexit = 1;
2115 			kmem_free(bp1275, bp1275len);
2116 			if (bp)
2117 				kmem_free(bp, bplen);
2118 			return (rv);
2119 		}
2120 
2121 		/*
2122 		 *  Determine boot device module and 1275 name_addr
2123 		 *
2124 		 *  bootpath assumed to be of the form /bus/module@name_addr
2125 		 */
2126 		if (busp = strchr(bp1275, '/')) {
2127 			if (modp = strchr(busp + 1, '/')) {
2128 				if (atp = strchr(modp + 1, '@')) {
2129 					*atp = '\0';
2130 					addrp = atp + 1;
2131 					if (eoaddrp = strchr(addrp, '/'))
2132 						*eoaddrp = '\0';
2133 				}
2134 			}
2135 		}
2136 
2137 		if (modp && addrp) {
2138 			(void) strncpy(bootdev_module, modp + 1,
2139 			    MAXCOMPONENTLEN);
2140 			bootdev_module[MAXCOMPONENTLEN - 1] = '\0';
2141 
2142 			(void) strncpy(bootdev_newaddr, addrp, MAXCOMPONENTLEN);
2143 			bootdev_newaddr[MAXCOMPONENTLEN - 1] = '\0';
2144 		} else {
2145 			quickexit = 1;
2146 			kmem_free(bp1275, bp1275len);
2147 			kmem_free(bp, bplen);
2148 			return (rv);
2149 		}
2150 
2151 		/*
2152 		 *  Determine fallback name_addr
2153 		 *
2154 		 *  10/3/96 - Also save fallback module name because it
2155 		 *  might actually be different than the current module
2156 		 *  name.  E.G., ISA pnp drivers have new names.
2157 		 *
2158 		 *  bootpath assumed to be of the form /bus/module@name_addr
2159 		 */
2160 		addrp = NULL;
2161 		if (busp = strchr(bp, '/')) {
2162 			if (modp = strchr(busp + 1, '/')) {
2163 				if (atp = strchr(modp + 1, '@')) {
2164 					*atp = '\0';
2165 					addrp = atp + 1;
2166 					if (eoaddrp = strchr(addrp, '/'))
2167 						*eoaddrp = '\0';
2168 				}
2169 			}
2170 		}
2171 
2172 		if (modp && addrp) {
2173 			(void) strncpy(bootdev_oldmod, modp + 1,
2174 			    MAXCOMPONENTLEN);
2175 			bootdev_module[MAXCOMPONENTLEN - 1] = '\0';
2176 
2177 			(void) strncpy(bootdev_oldaddr, addrp, MAXCOMPONENTLEN);
2178 			bootdev_oldaddr[MAXCOMPONENTLEN - 1] = '\0';
2179 		}
2180 
2181 		/* Free up the bootpath storage now that we're done with it. */
2182 		kmem_free(bp1275, bp1275len);
2183 		kmem_free(bp, bplen);
2184 
2185 		if (bootdev_oldaddr[0] == '\0') {
2186 			quickexit = 1;
2187 			return (rv);
2188 		}
2189 	}
2190 
2191 	if (((lkupname = ddi_get_name(cdip)) != NULL) &&
2192 	    (strcmp(bootdev_module, lkupname) == 0 ||
2193 	    strcmp(bootdev_oldmod, lkupname) == 0) &&
2194 	    ((ddi_getprop(DDI_DEV_T_ANY, cdip, DDI_PROP_DONTPASS,
2195 	    "ignore-hardware-nodes", -1) != -1) ||
2196 	    ignore_hardware_nodes) &&
2197 	    strcmp(bootdev_newaddr, caddr) == 0 &&
2198 	    strcmp(bootdev_oldaddr, naddr) == 0) {
2199 		rv = DDI_SUCCESS;
2200 	}
2201 
2202 	return (rv);
2203 }
2204 
2205 /*
2206  * Perform a copy from a memory mapped device (whose devinfo pointer is devi)
2207  * separately mapped at devaddr in the kernel to a kernel buffer at kaddr.
2208  */
2209 /*ARGSUSED*/
2210 int
2211 e_ddi_copyfromdev(dev_info_t *devi,
2212     off_t off, const void *devaddr, void *kaddr, size_t len)
2213 {
2214 	bcopy(devaddr, kaddr, len);
2215 	return (0);
2216 }
2217 
2218 /*
2219  * Perform a copy to a memory mapped device (whose devinfo pointer is devi)
2220  * separately mapped at devaddr in the kernel from a kernel buffer at kaddr.
2221  */
2222 /*ARGSUSED*/
2223 int
2224 e_ddi_copytodev(dev_info_t *devi,
2225     off_t off, const void *kaddr, void *devaddr, size_t len)
2226 {
2227 	bcopy(kaddr, devaddr, len);
2228 	return (0);
2229 }
2230 
2231 
2232 static int
2233 poke_mem(peekpoke_ctlops_t *in_args)
2234 {
2235 	int err = DDI_SUCCESS;
2236 	on_trap_data_t otd;
2237 
2238 	/* Set up protected environment. */
2239 	if (!on_trap(&otd, OT_DATA_ACCESS)) {
2240 		switch (in_args->size) {
2241 		case sizeof (uint8_t):
2242 			*(uint8_t *)(in_args->dev_addr) =
2243 			    *(uint8_t *)in_args->host_addr;
2244 			break;
2245 
2246 		case sizeof (uint16_t):
2247 			*(uint16_t *)(in_args->dev_addr) =
2248 			    *(uint16_t *)in_args->host_addr;
2249 			break;
2250 
2251 		case sizeof (uint32_t):
2252 			*(uint32_t *)(in_args->dev_addr) =
2253 			    *(uint32_t *)in_args->host_addr;
2254 			break;
2255 
2256 		case sizeof (uint64_t):
2257 			*(uint64_t *)(in_args->dev_addr) =
2258 			    *(uint64_t *)in_args->host_addr;
2259 			break;
2260 
2261 		default:
2262 			err = DDI_FAILURE;
2263 			break;
2264 		}
2265 	} else
2266 		err = DDI_FAILURE;
2267 
2268 	/* Take down protected environment. */
2269 	no_trap();
2270 
2271 	return (err);
2272 }
2273 
2274 
2275 static int
2276 peek_mem(peekpoke_ctlops_t *in_args)
2277 {
2278 	int err = DDI_SUCCESS;
2279 	on_trap_data_t otd;
2280 
2281 	if (!on_trap(&otd, OT_DATA_ACCESS)) {
2282 		switch (in_args->size) {
2283 		case sizeof (uint8_t):
2284 			*(uint8_t *)in_args->host_addr =
2285 			    *(uint8_t *)in_args->dev_addr;
2286 			break;
2287 
2288 		case sizeof (uint16_t):
2289 			*(uint16_t *)in_args->host_addr =
2290 			    *(uint16_t *)in_args->dev_addr;
2291 			break;
2292 
2293 		case sizeof (uint32_t):
2294 			*(uint32_t *)in_args->host_addr =
2295 			    *(uint32_t *)in_args->dev_addr;
2296 			break;
2297 
2298 		case sizeof (uint64_t):
2299 			*(uint64_t *)in_args->host_addr =
2300 			    *(uint64_t *)in_args->dev_addr;
2301 			break;
2302 
2303 		default:
2304 			err = DDI_FAILURE;
2305 			break;
2306 		}
2307 	} else
2308 		err = DDI_FAILURE;
2309 
2310 	no_trap();
2311 	return (err);
2312 }
2313 
2314 
2315 /*
2316  * This is called only to process peek/poke when the DIP is NULL.
2317  * Assume that this is for memory, as nexi take care of device safe accesses.
2318  */
2319 int
2320 peekpoke_mem(ddi_ctl_enum_t cmd, peekpoke_ctlops_t *in_args)
2321 {
2322 	return (cmd == DDI_CTLOPS_PEEK ? peek_mem(in_args) : poke_mem(in_args));
2323 }
2324 
2325 /*
2326  * we've just done a cautious put/get. Check if it was successful by
2327  * calling pci_ereport_post() on all puts and for any gets that return -1
2328  */
2329 static int
2330 pci_peekpoke_check_fma(dev_info_t *dip, void *arg, ddi_ctl_enum_t ctlop,
2331     void (*scan)(dev_info_t *, ddi_fm_error_t *))
2332 {
2333 	int	rval = DDI_SUCCESS;
2334 	peekpoke_ctlops_t *in_args = (peekpoke_ctlops_t *)arg;
2335 	ddi_fm_error_t de;
2336 	ddi_acc_impl_t *hp = (ddi_acc_impl_t *)in_args->handle;
2337 	ddi_acc_hdl_t *hdlp = (ddi_acc_hdl_t *)in_args->handle;
2338 	int check_err = 0;
2339 	int repcount = in_args->repcount;
2340 
2341 	if (ctlop == DDI_CTLOPS_POKE &&
2342 	    hdlp->ah_acc.devacc_attr_access != DDI_CAUTIOUS_ACC)
2343 		return (DDI_SUCCESS);
2344 
2345 	if (ctlop == DDI_CTLOPS_PEEK &&
2346 	    hdlp->ah_acc.devacc_attr_access != DDI_CAUTIOUS_ACC) {
2347 		for (; repcount; repcount--) {
2348 			switch (in_args->size) {
2349 			case sizeof (uint8_t):
2350 				if (*(uint8_t *)in_args->host_addr == 0xff)
2351 					check_err = 1;
2352 				break;
2353 			case sizeof (uint16_t):
2354 				if (*(uint16_t *)in_args->host_addr == 0xffff)
2355 					check_err = 1;
2356 				break;
2357 			case sizeof (uint32_t):
2358 				if (*(uint32_t *)in_args->host_addr ==
2359 				    0xffffffff)
2360 					check_err = 1;
2361 				break;
2362 			case sizeof (uint64_t):
2363 				if (*(uint64_t *)in_args->host_addr ==
2364 				    0xffffffffffffffff)
2365 					check_err = 1;
2366 				break;
2367 			}
2368 		}
2369 		if (check_err == 0)
2370 			return (DDI_SUCCESS);
2371 	}
2372 	/*
2373 	 * for a cautious put or get or a non-cautious get that returned -1 call
2374 	 * io framework to see if there really was an error
2375 	 */
2376 	bzero(&de, sizeof (ddi_fm_error_t));
2377 	de.fme_version = DDI_FME_VERSION;
2378 	de.fme_ena = fm_ena_generate(0, FM_ENA_FMT1);
2379 	if (hdlp->ah_acc.devacc_attr_access == DDI_CAUTIOUS_ACC) {
2380 		de.fme_flag = DDI_FM_ERR_EXPECTED;
2381 		de.fme_acc_handle = in_args->handle;
2382 	} else if (hdlp->ah_acc.devacc_attr_access == DDI_DEFAULT_ACC) {
2383 		/*
2384 		 * We only get here with DDI_DEFAULT_ACC for config space gets.
2385 		 * Non-hardened drivers may be probing the hardware and
2386 		 * expecting -1 returned. So need to treat errors on
2387 		 * DDI_DEFAULT_ACC as DDI_FM_ERR_EXPECTED.
2388 		 */
2389 		de.fme_flag = DDI_FM_ERR_EXPECTED;
2390 		de.fme_acc_handle = in_args->handle;
2391 	} else {
2392 		/*
2393 		 * Hardened driver doing protected accesses shouldn't
2394 		 * get errors unless there's a hardware problem. Treat
2395 		 * as nonfatal if there's an error, but set UNEXPECTED
2396 		 * so we raise ereports on any errors and potentially
2397 		 * fault the device
2398 		 */
2399 		de.fme_flag = DDI_FM_ERR_UNEXPECTED;
2400 	}
2401 	(void) scan(dip, &de);
2402 	if (hdlp->ah_acc.devacc_attr_access != DDI_DEFAULT_ACC &&
2403 	    de.fme_status != DDI_FM_OK) {
2404 		ndi_err_t *errp = (ndi_err_t *)hp->ahi_err;
2405 		rval = DDI_FAILURE;
2406 		errp->err_ena = de.fme_ena;
2407 		errp->err_expected = de.fme_flag;
2408 		errp->err_status = DDI_FM_NONFATAL;
2409 	}
2410 	return (rval);
2411 }
2412 
2413 /*
2414  * pci_peekpoke_check_nofma() is for when an error occurs on a register access
2415  * during pci_ereport_post(). We can't call pci_ereport_post() again or we'd
2416  * recurse, so assume all puts are OK and gets have failed if they return -1
2417  */
2418 static int
2419 pci_peekpoke_check_nofma(void *arg, ddi_ctl_enum_t ctlop)
2420 {
2421 	int rval = DDI_SUCCESS;
2422 	peekpoke_ctlops_t *in_args = (peekpoke_ctlops_t *)arg;
2423 	ddi_acc_impl_t *hp = (ddi_acc_impl_t *)in_args->handle;
2424 	ddi_acc_hdl_t *hdlp = (ddi_acc_hdl_t *)in_args->handle;
2425 	int repcount = in_args->repcount;
2426 
2427 	if (ctlop == DDI_CTLOPS_POKE)
2428 		return (rval);
2429 
2430 	for (; repcount; repcount--) {
2431 		switch (in_args->size) {
2432 		case sizeof (uint8_t):
2433 			if (*(uint8_t *)in_args->host_addr == 0xff)
2434 				rval = DDI_FAILURE;
2435 			break;
2436 		case sizeof (uint16_t):
2437 			if (*(uint16_t *)in_args->host_addr == 0xffff)
2438 				rval = DDI_FAILURE;
2439 			break;
2440 		case sizeof (uint32_t):
2441 			if (*(uint32_t *)in_args->host_addr == 0xffffffff)
2442 				rval = DDI_FAILURE;
2443 			break;
2444 		case sizeof (uint64_t):
2445 			if (*(uint64_t *)in_args->host_addr ==
2446 			    0xffffffffffffffff)
2447 				rval = DDI_FAILURE;
2448 			break;
2449 		}
2450 	}
2451 	if (hdlp->ah_acc.devacc_attr_access != DDI_DEFAULT_ACC &&
2452 	    rval == DDI_FAILURE) {
2453 		ndi_err_t *errp = (ndi_err_t *)hp->ahi_err;
2454 		errp->err_ena = fm_ena_generate(0, FM_ENA_FMT1);
2455 		errp->err_expected = DDI_FM_ERR_UNEXPECTED;
2456 		errp->err_status = DDI_FM_NONFATAL;
2457 	}
2458 	return (rval);
2459 }
2460 
2461 int
2462 pci_peekpoke_check(dev_info_t *dip, dev_info_t *rdip,
2463 	ddi_ctl_enum_t ctlop, void *arg, void *result,
2464 	int (*handler)(dev_info_t *, dev_info_t *, ddi_ctl_enum_t, void *,
2465 	void *), kmutex_t *err_mutexp, kmutex_t *peek_poke_mutexp,
2466 	void (*scan)(dev_info_t *, ddi_fm_error_t *))
2467 {
2468 	int rval;
2469 	peekpoke_ctlops_t *in_args = (peekpoke_ctlops_t *)arg;
2470 	ddi_acc_impl_t *hp = (ddi_acc_impl_t *)in_args->handle;
2471 
2472 	/*
2473 	 * this function only supports cautious accesses, not peeks/pokes
2474 	 * which don't have a handle
2475 	 */
2476 	if (hp == NULL)
2477 		return (DDI_FAILURE);
2478 
2479 	if (hp->ahi_acc_attr & DDI_ACCATTR_CONFIG_SPACE) {
2480 		if (!mutex_tryenter(err_mutexp)) {
2481 			/*
2482 			 * As this may be a recursive call from within
2483 			 * pci_ereport_post() we can't wait for the mutexes.
2484 			 * Fortunately we know someone is already calling
2485 			 * pci_ereport_post() which will handle the error bits
2486 			 * for us, and as this is a config space access we can
2487 			 * just do the access and check return value for -1
2488 			 * using pci_peekpoke_check_nofma().
2489 			 */
2490 			rval = handler(dip, rdip, ctlop, arg, result);
2491 			if (rval == DDI_SUCCESS)
2492 				rval = pci_peekpoke_check_nofma(arg, ctlop);
2493 			return (rval);
2494 		}
2495 		/*
2496 		 * This can't be a recursive call. Drop the err_mutex and get
2497 		 * both mutexes in the right order. If an error hasn't already
2498 		 * been detected by the ontrap code, use pci_peekpoke_check_fma
2499 		 * which will call pci_ereport_post() to check error status.
2500 		 */
2501 		mutex_exit(err_mutexp);
2502 	}
2503 	mutex_enter(peek_poke_mutexp);
2504 	rval = handler(dip, rdip, ctlop, arg, result);
2505 	if (rval == DDI_SUCCESS) {
2506 		mutex_enter(err_mutexp);
2507 		rval = pci_peekpoke_check_fma(dip, arg, ctlop, scan);
2508 		mutex_exit(err_mutexp);
2509 	}
2510 	mutex_exit(peek_poke_mutexp);
2511 	return (rval);
2512 }
2513 
2514 void
2515 impl_setup_ddi(void)
2516 {
2517 #if !defined(__xpv)
2518 	extern void startup_bios_disk(void);
2519 	extern int post_fastreboot;
2520 #endif
2521 	dev_info_t *xdip, *isa_dip;
2522 	rd_existing_t rd_mem_prop;
2523 	int err;
2524 
2525 	ndi_devi_alloc_sleep(ddi_root_node(), "ramdisk",
2526 	    (pnode_t)DEVI_SID_NODEID, &xdip);
2527 
2528 	(void) BOP_GETPROP(bootops,
2529 	    "ramdisk_start", (void *)&ramdisk_start);
2530 	(void) BOP_GETPROP(bootops,
2531 	    "ramdisk_end", (void *)&ramdisk_end);
2532 
2533 #ifdef __xpv
2534 	ramdisk_start -= ONE_GIG;
2535 	ramdisk_end -= ONE_GIG;
2536 #endif
2537 	rd_mem_prop.phys = ramdisk_start;
2538 	rd_mem_prop.size = ramdisk_end - ramdisk_start + 1;
2539 
2540 	(void) ndi_prop_update_byte_array(DDI_DEV_T_NONE, xdip,
2541 	    RD_EXISTING_PROP_NAME, (uchar_t *)&rd_mem_prop,
2542 	    sizeof (rd_mem_prop));
2543 	err = ndi_devi_bind_driver(xdip, 0);
2544 	ASSERT(err == 0);
2545 
2546 	/* isa node */
2547 	if (pseudo_isa) {
2548 		ndi_devi_alloc_sleep(ddi_root_node(), "isa",
2549 		    (pnode_t)DEVI_SID_NODEID, &isa_dip);
2550 		(void) ndi_prop_update_string(DDI_DEV_T_NONE, isa_dip,
2551 		    "device_type", "isa");
2552 		(void) ndi_prop_update_string(DDI_DEV_T_NONE, isa_dip,
2553 		    "bus-type", "isa");
2554 		(void) ndi_devi_bind_driver(isa_dip, 0);
2555 	}
2556 
2557 	/*
2558 	 * Read in the properties from the boot.
2559 	 */
2560 	get_boot_properties();
2561 
2562 	/* not framebuffer should be enumerated, if present */
2563 	get_vga_properties();
2564 
2565 #if !defined(__xpv)
2566 	if (!post_fastreboot)
2567 		startup_bios_disk();
2568 #endif
2569 	/* do bus dependent probes. */
2570 	impl_bus_initialprobe();
2571 }
2572 
2573 dev_t
2574 getrootdev(void)
2575 {
2576 	/*
2577 	 * Precedence given to rootdev if set in /etc/system
2578 	 */
2579 	if (root_is_svm == B_TRUE) {
2580 		return (ddi_pathname_to_dev_t(svm_bootpath));
2581 	}
2582 
2583 	/*
2584 	 * Usually rootfs.bo_name is initialized by the
2585 	 * the bootpath property from bootenv.rc, but
2586 	 * defaults to "/ramdisk:a" otherwise.
2587 	 */
2588 	return (ddi_pathname_to_dev_t(rootfs.bo_name));
2589 }
2590 
2591 static struct bus_probe {
2592 	struct bus_probe *next;
2593 	void (*probe)(int);
2594 } *bus_probes;
2595 
2596 void
2597 impl_bus_add_probe(void (*func)(int))
2598 {
2599 	struct bus_probe *probe;
2600 	struct bus_probe *lastprobe = NULL;
2601 
2602 	probe = kmem_alloc(sizeof (*probe), KM_SLEEP);
2603 	probe->probe = func;
2604 	probe->next = NULL;
2605 
2606 	if (!bus_probes) {
2607 		bus_probes = probe;
2608 		return;
2609 	}
2610 
2611 	lastprobe = bus_probes;
2612 	while (lastprobe->next)
2613 		lastprobe = lastprobe->next;
2614 	lastprobe->next = probe;
2615 }
2616 
2617 /*ARGSUSED*/
2618 void
2619 impl_bus_delete_probe(void (*func)(int))
2620 {
2621 	struct bus_probe *prev = NULL;
2622 	struct bus_probe *probe = bus_probes;
2623 
2624 	while (probe) {
2625 		if (probe->probe == func)
2626 			break;
2627 		prev = probe;
2628 		probe = probe->next;
2629 	}
2630 
2631 	if (probe == NULL)
2632 		return;
2633 
2634 	if (prev)
2635 		prev->next = probe->next;
2636 	else
2637 		bus_probes = probe->next;
2638 
2639 	kmem_free(probe, sizeof (struct bus_probe));
2640 }
2641 
2642 /*
2643  * impl_bus_initialprobe
2644  *	Modload the prom simulator, then let it probe to verify existence
2645  *	and type of PCI support.
2646  */
2647 static void
2648 impl_bus_initialprobe(void)
2649 {
2650 	struct bus_probe *probe;
2651 
2652 	/* load modules to install bus probes */
2653 #if defined(__xpv)
2654 	if (DOMAIN_IS_INITDOMAIN(xen_info)) {
2655 		if (modload("misc", "pci_autoconfig") < 0) {
2656 			panic("failed to load misc/pci_autoconfig");
2657 		}
2658 
2659 		if (modload("drv", "isa") < 0)
2660 			panic("failed to load drv/isa");
2661 	}
2662 
2663 	(void) modload("misc", "xpv_autoconfig");
2664 #else
2665 	(void) modload("misc", "acpidev");
2666 
2667 	if (modload("misc", "pci_autoconfig") < 0) {
2668 		panic("failed to load misc/pci_autoconfig");
2669 	}
2670 
2671 	if (modload("drv", "isa") < 0)
2672 		panic("failed to load drv/isa");
2673 #endif
2674 
2675 	probe = bus_probes;
2676 	while (probe) {
2677 		/* run the probe functions */
2678 		(*probe->probe)(0);
2679 		probe = probe->next;
2680 	}
2681 }
2682 
2683 /*
2684  * impl_bus_reprobe
2685  *	Reprogram devices not set up by firmware.
2686  */
2687 static void
2688 impl_bus_reprobe(void)
2689 {
2690 	struct bus_probe *probe;
2691 
2692 	probe = bus_probes;
2693 	while (probe) {
2694 		/* run the probe function */
2695 		(*probe->probe)(1);
2696 		probe = probe->next;
2697 	}
2698 }
2699 
2700 
2701 /*
2702  * The following functions ready a cautious request to go up to the nexus
2703  * driver.  It is up to the nexus driver to decide how to process the request.
2704  * It may choose to call i_ddi_do_caut_get/put in this file, or do it
2705  * differently.
2706  */
2707 
2708 static void
2709 i_ddi_caut_getput_ctlops(ddi_acc_impl_t *hp, uint64_t host_addr,
2710     uint64_t dev_addr, size_t size, size_t repcount, uint_t flags,
2711     ddi_ctl_enum_t cmd)
2712 {
2713 	peekpoke_ctlops_t	cautacc_ctlops_arg;
2714 
2715 	cautacc_ctlops_arg.size = size;
2716 	cautacc_ctlops_arg.dev_addr = dev_addr;
2717 	cautacc_ctlops_arg.host_addr = host_addr;
2718 	cautacc_ctlops_arg.handle = (ddi_acc_handle_t)hp;
2719 	cautacc_ctlops_arg.repcount = repcount;
2720 	cautacc_ctlops_arg.flags = flags;
2721 
2722 	(void) ddi_ctlops(hp->ahi_common.ah_dip, hp->ahi_common.ah_dip, cmd,
2723 	    &cautacc_ctlops_arg, NULL);
2724 }
2725 
2726 uint8_t
2727 i_ddi_caut_get8(ddi_acc_impl_t *hp, uint8_t *addr)
2728 {
2729 	uint8_t value;
2730 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)&value, (uintptr_t)addr,
2731 	    sizeof (uint8_t), 1, 0, DDI_CTLOPS_PEEK);
2732 
2733 	return (value);
2734 }
2735 
2736 uint16_t
2737 i_ddi_caut_get16(ddi_acc_impl_t *hp, uint16_t *addr)
2738 {
2739 	uint16_t value;
2740 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)&value, (uintptr_t)addr,
2741 	    sizeof (uint16_t), 1, 0, DDI_CTLOPS_PEEK);
2742 
2743 	return (value);
2744 }
2745 
2746 uint32_t
2747 i_ddi_caut_get32(ddi_acc_impl_t *hp, uint32_t *addr)
2748 {
2749 	uint32_t value;
2750 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)&value, (uintptr_t)addr,
2751 	    sizeof (uint32_t), 1, 0, DDI_CTLOPS_PEEK);
2752 
2753 	return (value);
2754 }
2755 
2756 uint64_t
2757 i_ddi_caut_get64(ddi_acc_impl_t *hp, uint64_t *addr)
2758 {
2759 	uint64_t value;
2760 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)&value, (uintptr_t)addr,
2761 	    sizeof (uint64_t), 1, 0, DDI_CTLOPS_PEEK);
2762 
2763 	return (value);
2764 }
2765 
2766 void
2767 i_ddi_caut_put8(ddi_acc_impl_t *hp, uint8_t *addr, uint8_t value)
2768 {
2769 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)&value, (uintptr_t)addr,
2770 	    sizeof (uint8_t), 1, 0, DDI_CTLOPS_POKE);
2771 }
2772 
2773 void
2774 i_ddi_caut_put16(ddi_acc_impl_t *hp, uint16_t *addr, uint16_t value)
2775 {
2776 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)&value, (uintptr_t)addr,
2777 	    sizeof (uint16_t), 1, 0, DDI_CTLOPS_POKE);
2778 }
2779 
2780 void
2781 i_ddi_caut_put32(ddi_acc_impl_t *hp, uint32_t *addr, uint32_t value)
2782 {
2783 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)&value, (uintptr_t)addr,
2784 	    sizeof (uint32_t), 1, 0, DDI_CTLOPS_POKE);
2785 }
2786 
2787 void
2788 i_ddi_caut_put64(ddi_acc_impl_t *hp, uint64_t *addr, uint64_t value)
2789 {
2790 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)&value, (uintptr_t)addr,
2791 	    sizeof (uint64_t), 1, 0, DDI_CTLOPS_POKE);
2792 }
2793 
2794 void
2795 i_ddi_caut_rep_get8(ddi_acc_impl_t *hp, uint8_t *host_addr, uint8_t *dev_addr,
2796 	size_t repcount, uint_t flags)
2797 {
2798 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)host_addr, (uintptr_t)dev_addr,
2799 	    sizeof (uint8_t), repcount, flags, DDI_CTLOPS_PEEK);
2800 }
2801 
2802 void
2803 i_ddi_caut_rep_get16(ddi_acc_impl_t *hp, uint16_t *host_addr,
2804     uint16_t *dev_addr, size_t repcount, uint_t flags)
2805 {
2806 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)host_addr, (uintptr_t)dev_addr,
2807 	    sizeof (uint16_t), repcount, flags, DDI_CTLOPS_PEEK);
2808 }
2809 
2810 void
2811 i_ddi_caut_rep_get32(ddi_acc_impl_t *hp, uint32_t *host_addr,
2812     uint32_t *dev_addr, size_t repcount, uint_t flags)
2813 {
2814 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)host_addr, (uintptr_t)dev_addr,
2815 	    sizeof (uint32_t), repcount, flags, DDI_CTLOPS_PEEK);
2816 }
2817 
2818 void
2819 i_ddi_caut_rep_get64(ddi_acc_impl_t *hp, uint64_t *host_addr,
2820     uint64_t *dev_addr, size_t repcount, uint_t flags)
2821 {
2822 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)host_addr, (uintptr_t)dev_addr,
2823 	    sizeof (uint64_t), repcount, flags, DDI_CTLOPS_PEEK);
2824 }
2825 
2826 void
2827 i_ddi_caut_rep_put8(ddi_acc_impl_t *hp, uint8_t *host_addr, uint8_t *dev_addr,
2828 	size_t repcount, uint_t flags)
2829 {
2830 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)host_addr, (uintptr_t)dev_addr,
2831 	    sizeof (uint8_t), repcount, flags, DDI_CTLOPS_POKE);
2832 }
2833 
2834 void
2835 i_ddi_caut_rep_put16(ddi_acc_impl_t *hp, uint16_t *host_addr,
2836     uint16_t *dev_addr, size_t repcount, uint_t flags)
2837 {
2838 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)host_addr, (uintptr_t)dev_addr,
2839 	    sizeof (uint16_t), repcount, flags, DDI_CTLOPS_POKE);
2840 }
2841 
2842 void
2843 i_ddi_caut_rep_put32(ddi_acc_impl_t *hp, uint32_t *host_addr,
2844     uint32_t *dev_addr, size_t repcount, uint_t flags)
2845 {
2846 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)host_addr, (uintptr_t)dev_addr,
2847 	    sizeof (uint32_t), repcount, flags, DDI_CTLOPS_POKE);
2848 }
2849 
2850 void
2851 i_ddi_caut_rep_put64(ddi_acc_impl_t *hp, uint64_t *host_addr,
2852     uint64_t *dev_addr, size_t repcount, uint_t flags)
2853 {
2854 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)host_addr, (uintptr_t)dev_addr,
2855 	    sizeof (uint64_t), repcount, flags, DDI_CTLOPS_POKE);
2856 }
2857 
2858 boolean_t
2859 i_ddi_copybuf_required(ddi_dma_attr_t *attrp)
2860 {
2861 	uint64_t hi_pa;
2862 
2863 	hi_pa = ((uint64_t)physmax + 1ull) << PAGESHIFT;
2864 	if (attrp->dma_attr_addr_hi < hi_pa) {
2865 		return (B_TRUE);
2866 	}
2867 
2868 	return (B_FALSE);
2869 }
2870 
2871 size_t
2872 i_ddi_copybuf_size()
2873 {
2874 	return (dma_max_copybuf_size);
2875 }
2876 
2877 /*
2878  * i_ddi_dma_max()
2879  *    returns the maximum DMA size which can be performed in a single DMA
2880  *    window taking into account the devices DMA contraints (attrp), the
2881  *    maximum copy buffer size (if applicable), and the worse case buffer
2882  *    fragmentation.
2883  */
2884 /*ARGSUSED*/
2885 uint32_t
2886 i_ddi_dma_max(dev_info_t *dip, ddi_dma_attr_t *attrp)
2887 {
2888 	uint64_t maxxfer;
2889 
2890 
2891 	/*
2892 	 * take the min of maxxfer and the the worse case fragementation
2893 	 * (e.g. every cookie <= 1 page)
2894 	 */
2895 	maxxfer = MIN(attrp->dma_attr_maxxfer,
2896 	    ((uint64_t)(attrp->dma_attr_sgllen - 1) << PAGESHIFT));
2897 
2898 	/*
2899 	 * If the DMA engine can't reach all off memory, we also need to take
2900 	 * the max size of the copybuf into consideration.
2901 	 */
2902 	if (i_ddi_copybuf_required(attrp)) {
2903 		maxxfer = MIN(i_ddi_copybuf_size(), maxxfer);
2904 	}
2905 
2906 	/*
2907 	 * we only return a 32-bit value. Make sure it's not -1. Round to a
2908 	 * page so it won't be mistaken for an error value during debug.
2909 	 */
2910 	if (maxxfer >= 0xFFFFFFFF) {
2911 		maxxfer = 0xFFFFF000;
2912 	}
2913 
2914 	/*
2915 	 * make sure the value we return is a whole multiple of the
2916 	 * granlarity.
2917 	 */
2918 	if (attrp->dma_attr_granular > 1) {
2919 		maxxfer = maxxfer - (maxxfer % attrp->dma_attr_granular);
2920 	}
2921 
2922 	return ((uint32_t)maxxfer);
2923 }
2924