xref: /titanic_50/usr/src/uts/i86pc/os/ddi_impl.c (revision b533f56bf95137d3de6666bd923e15ec373ea611)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Copyright 2012 Garrett D'Amore <garrett@damore.org>.  All rights reserved.
25  */
26 
27 /*
28  * PC specific DDI implementation
29  */
30 #include <sys/types.h>
31 #include <sys/autoconf.h>
32 #include <sys/avintr.h>
33 #include <sys/bootconf.h>
34 #include <sys/conf.h>
35 #include <sys/cpuvar.h>
36 #include <sys/ddi_impldefs.h>
37 #include <sys/ddi_subrdefs.h>
38 #include <sys/ethernet.h>
39 #include <sys/fp.h>
40 #include <sys/instance.h>
41 #include <sys/kmem.h>
42 #include <sys/machsystm.h>
43 #include <sys/modctl.h>
44 #include <sys/promif.h>
45 #include <sys/prom_plat.h>
46 #include <sys/sunndi.h>
47 #include <sys/ndi_impldefs.h>
48 #include <sys/ddi_impldefs.h>
49 #include <sys/sysmacros.h>
50 #include <sys/systeminfo.h>
51 #include <sys/utsname.h>
52 #include <sys/atomic.h>
53 #include <sys/spl.h>
54 #include <sys/archsystm.h>
55 #include <vm/seg_kmem.h>
56 #include <sys/ontrap.h>
57 #include <sys/fm/protocol.h>
58 #include <sys/ramdisk.h>
59 #include <sys/sunndi.h>
60 #include <sys/vmem.h>
61 #include <sys/pci_impl.h>
62 #if defined(__xpv)
63 #include <sys/hypervisor.h>
64 #endif
65 #include <sys/mach_intr.h>
66 #include <vm/hat_i86.h>
67 #include <sys/x86_archext.h>
68 
69 /*
70  * DDI Boot Configuration
71  */
72 
73 /*
74  * Platform drivers on this platform
75  */
76 char *platform_module_list[] = {
77 	"acpippm",
78 	"ppm",
79 	(char *)0
80 };
81 
82 /* pci bus resource maps */
83 struct pci_bus_resource *pci_bus_res;
84 
85 size_t dma_max_copybuf_size = 0x101000;		/* 1M + 4K */
86 
87 uint64_t ramdisk_start, ramdisk_end;
88 
89 int pseudo_isa = 0;
90 
91 /*
92  * Forward declarations
93  */
94 static int getlongprop_buf();
95 static void get_boot_properties(void);
96 static void impl_bus_initialprobe(void);
97 static void impl_bus_reprobe(void);
98 
99 static int poke_mem(peekpoke_ctlops_t *in_args);
100 static int peek_mem(peekpoke_ctlops_t *in_args);
101 
102 static int kmem_override_cache_attrs(caddr_t, size_t, uint_t);
103 
104 #if defined(__amd64) && !defined(__xpv)
105 extern void immu_init(void);
106 #endif
107 
108 #define	CTGENTRIES	15
109 
110 static struct ctgas {
111 	struct ctgas	*ctg_next;
112 	int		ctg_index;
113 	void		*ctg_addr[CTGENTRIES];
114 	size_t		ctg_size[CTGENTRIES];
115 } ctglist;
116 
117 static kmutex_t		ctgmutex;
118 #define	CTGLOCK()	mutex_enter(&ctgmutex)
119 #define	CTGUNLOCK()	mutex_exit(&ctgmutex)
120 
121 /*
122  * Minimum pfn value of page_t's put on the free list.  This is to simplify
123  * support of ddi dma memory requests which specify small, non-zero addr_lo
124  * values.
125  *
126  * The default value of 2, which corresponds to the only known non-zero addr_lo
127  * value used, means a single page will be sacrificed (pfn typically starts
128  * at 1).  ddiphysmin can be set to 0 to disable. It cannot be set above 0x100
129  * otherwise mp startup panics.
130  */
131 pfn_t	ddiphysmin = 2;
132 
133 static void
134 check_driver_disable(void)
135 {
136 	int proplen = 128;
137 	char *prop_name;
138 	char *drv_name, *propval;
139 	major_t major;
140 
141 	prop_name = kmem_alloc(proplen, KM_SLEEP);
142 	for (major = 0; major < devcnt; major++) {
143 		drv_name = ddi_major_to_name(major);
144 		if (drv_name == NULL)
145 			continue;
146 		(void) snprintf(prop_name, proplen, "disable-%s", drv_name);
147 		if (ddi_prop_lookup_string(DDI_DEV_T_ANY, ddi_root_node(),
148 		    DDI_PROP_DONTPASS, prop_name, &propval) == DDI_SUCCESS) {
149 			if (strcmp(propval, "true") == 0) {
150 				devnamesp[major].dn_flags |= DN_DRIVER_REMOVED;
151 				cmn_err(CE_NOTE, "driver %s disabled",
152 				    drv_name);
153 			}
154 			ddi_prop_free(propval);
155 		}
156 	}
157 	kmem_free(prop_name, proplen);
158 }
159 
160 
161 /*
162  * Configure the hardware on the system.
163  * Called before the rootfs is mounted
164  */
165 void
166 configure(void)
167 {
168 	extern void i_ddi_init_root();
169 
170 #if defined(__i386)
171 	extern int fpu_pentium_fdivbug;
172 #endif	/* __i386 */
173 	extern int fpu_ignored;
174 
175 	/*
176 	 * Determine if an FPU is attached
177 	 */
178 
179 	fpu_probe();
180 
181 #if defined(__i386)
182 	if (fpu_pentium_fdivbug) {
183 		printf("\
184 FP hardware exhibits Pentium floating point divide problem\n");
185 	}
186 #endif	/* __i386 */
187 
188 	if (fpu_ignored) {
189 		printf("FP hardware will not be used\n");
190 	} else if (!fpu_exists) {
191 		printf("No FPU in configuration\n");
192 	}
193 
194 	/*
195 	 * Initialize devices on the machine.
196 	 * Uses configuration tree built by the PROMs to determine what
197 	 * is present, and builds a tree of prototype dev_info nodes
198 	 * corresponding to the hardware which identified itself.
199 	 */
200 
201 	/*
202 	 * Initialize root node.
203 	 */
204 	i_ddi_init_root();
205 
206 	/* reprogram devices not set up by firmware (BIOS) */
207 	impl_bus_reprobe();
208 
209 #if defined(__amd64) && !defined(__xpv)
210 	/*
211 	 * Setup but don't startup the IOMMU
212 	 * Startup happens later via a direct call
213 	 * to IOMMU code by boot code.
214 	 * At this point, all PCI bus renumbering
215 	 * is done, so safe to init the IMMU
216 	 * AKA Intel IOMMU.
217 	 */
218 	immu_init();
219 #endif
220 
221 	/*
222 	 * attach the isa nexus to get ACPI resource usage
223 	 * isa is "kind of" a pseudo node
224 	 */
225 #if defined(__xpv)
226 	if (DOMAIN_IS_INITDOMAIN(xen_info)) {
227 		if (pseudo_isa)
228 			(void) i_ddi_attach_pseudo_node("isa");
229 		else
230 			(void) i_ddi_attach_hw_nodes("isa");
231 	}
232 #else
233 	if (pseudo_isa)
234 		(void) i_ddi_attach_pseudo_node("isa");
235 	else
236 		(void) i_ddi_attach_hw_nodes("isa");
237 #endif
238 }
239 
240 /*
241  * The "status" property indicates the operational status of a device.
242  * If this property is present, the value is a string indicating the
243  * status of the device as follows:
244  *
245  *	"okay"		operational.
246  *	"disabled"	not operational, but might become operational.
247  *	"fail"		not operational because a fault has been detected,
248  *			and it is unlikely that the device will become
249  *			operational without repair. no additional details
250  *			are available.
251  *	"fail-xxx"	not operational because a fault has been detected,
252  *			and it is unlikely that the device will become
253  *			operational without repair. "xxx" is additional
254  *			human-readable information about the particular
255  *			fault condition that was detected.
256  *
257  * The absence of this property means that the operational status is
258  * unknown or okay.
259  *
260  * This routine checks the status property of the specified device node
261  * and returns 0 if the operational status indicates failure, and 1 otherwise.
262  *
263  * The property may exist on plug-in cards the existed before IEEE 1275-1994.
264  * And, in that case, the property may not even be a string. So we carefully
265  * check for the value "fail", in the beginning of the string, noting
266  * the property length.
267  */
268 int
269 status_okay(int id, char *buf, int buflen)
270 {
271 	char status_buf[OBP_MAXPROPNAME];
272 	char *bufp = buf;
273 	int len = buflen;
274 	int proplen;
275 	static const char *status = "status";
276 	static const char *fail = "fail";
277 	int fail_len = (int)strlen(fail);
278 
279 	/*
280 	 * Get the proplen ... if it's smaller than "fail",
281 	 * or doesn't exist ... then we don't care, since
282 	 * the value can't begin with the char string "fail".
283 	 *
284 	 * NB: proplen, if it's a string, includes the NULL in the
285 	 * the size of the property, and fail_len does not.
286 	 */
287 	proplen = prom_getproplen((pnode_t)id, (caddr_t)status);
288 	if (proplen <= fail_len)	/* nonexistant or uninteresting len */
289 		return (1);
290 
291 	/*
292 	 * if a buffer was provided, use it
293 	 */
294 	if ((buf == (char *)NULL) || (buflen <= 0)) {
295 		bufp = status_buf;
296 		len = sizeof (status_buf);
297 	}
298 	*bufp = (char)0;
299 
300 	/*
301 	 * Get the property into the buffer, to the extent of the buffer,
302 	 * and in case the buffer is smaller than the property size,
303 	 * NULL terminate the buffer. (This handles the case where
304 	 * a buffer was passed in and the caller wants to print the
305 	 * value, but the buffer was too small).
306 	 */
307 	(void) prom_bounded_getprop((pnode_t)id, (caddr_t)status,
308 	    (caddr_t)bufp, len);
309 	*(bufp + len - 1) = (char)0;
310 
311 	/*
312 	 * If the value begins with the char string "fail",
313 	 * then it means the node is failed. We don't care
314 	 * about any other values. We assume the node is ok
315 	 * although it might be 'disabled'.
316 	 */
317 	if (strncmp(bufp, fail, fail_len) == 0)
318 		return (0);
319 
320 	return (1);
321 }
322 
323 /*
324  * Check the status of the device node passed as an argument.
325  *
326  *	if ((status is OKAY) || (status is DISABLED))
327  *		return DDI_SUCCESS
328  *	else
329  *		print a warning and return DDI_FAILURE
330  */
331 /*ARGSUSED1*/
332 int
333 check_status(int id, char *name, dev_info_t *parent)
334 {
335 	char status_buf[64];
336 	char devtype_buf[OBP_MAXPROPNAME];
337 	int retval = DDI_FAILURE;
338 
339 	/*
340 	 * is the status okay?
341 	 */
342 	if (status_okay(id, status_buf, sizeof (status_buf)))
343 		return (DDI_SUCCESS);
344 
345 	/*
346 	 * a status property indicating bad memory will be associated
347 	 * with a node which has a "device_type" property with a value of
348 	 * "memory-controller". in this situation, return DDI_SUCCESS
349 	 */
350 	if (getlongprop_buf(id, OBP_DEVICETYPE, devtype_buf,
351 	    sizeof (devtype_buf)) > 0) {
352 		if (strcmp(devtype_buf, "memory-controller") == 0)
353 			retval = DDI_SUCCESS;
354 	}
355 
356 	/*
357 	 * print the status property information
358 	 */
359 	cmn_err(CE_WARN, "status '%s' for '%s'", status_buf, name);
360 	return (retval);
361 }
362 
363 /*ARGSUSED*/
364 uint_t
365 softlevel1(caddr_t arg1, caddr_t arg2)
366 {
367 	softint();
368 	return (1);
369 }
370 
371 /*
372  * Allow for implementation specific correction of PROM property values.
373  */
374 
375 /*ARGSUSED*/
376 void
377 impl_fix_props(dev_info_t *dip, dev_info_t *ch_dip, char *name, int len,
378     caddr_t buffer)
379 {
380 	/*
381 	 * There are no adjustments needed in this implementation.
382 	 */
383 }
384 
385 static int
386 getlongprop_buf(int id, char *name, char *buf, int maxlen)
387 {
388 	int size;
389 
390 	size = prom_getproplen((pnode_t)id, name);
391 	if (size <= 0 || (size > maxlen - 1))
392 		return (-1);
393 
394 	if (-1 == prom_getprop((pnode_t)id, name, buf))
395 		return (-1);
396 
397 	if (strcmp("name", name) == 0) {
398 		if (buf[size - 1] != '\0') {
399 			buf[size] = '\0';
400 			size += 1;
401 		}
402 	}
403 
404 	return (size);
405 }
406 
407 static int
408 get_prop_int_array(dev_info_t *di, char *pname, int **pval, uint_t *plen)
409 {
410 	int ret;
411 
412 	if ((ret = ddi_prop_lookup_int_array(DDI_DEV_T_ANY, di,
413 	    DDI_PROP_DONTPASS, pname, pval, plen))
414 	    == DDI_PROP_SUCCESS) {
415 		*plen = (*plen) * (sizeof (int));
416 	}
417 	return (ret);
418 }
419 
420 
421 /*
422  * Node Configuration
423  */
424 
425 struct prop_ispec {
426 	uint_t	pri, vec;
427 };
428 
429 /*
430  * For the x86, we're prepared to claim that the interrupt string
431  * is in the form of a list of <ipl,vec> specifications.
432  */
433 
434 #define	VEC_MIN	1
435 #define	VEC_MAX	255
436 
437 static int
438 impl_xlate_intrs(dev_info_t *child, int *in,
439     struct ddi_parent_private_data *pdptr)
440 {
441 	size_t size;
442 	int n;
443 	struct intrspec *new;
444 	caddr_t got_prop;
445 	int *inpri;
446 	int got_len;
447 	extern int ignore_hardware_nodes;	/* force flag from ddi_impl.c */
448 
449 	static char bad_intr_fmt[] =
450 	    "bad interrupt spec from %s%d - ipl %d, irq %d\n";
451 
452 	/*
453 	 * determine if the driver is expecting the new style "interrupts"
454 	 * property which just contains the IRQ, or the old style which
455 	 * contains pairs of <IPL,IRQ>.  if it is the new style, we always
456 	 * assign IPL 5 unless an "interrupt-priorities" property exists.
457 	 * in that case, the "interrupt-priorities" property contains the
458 	 * IPL values that match, one for one, the IRQ values in the
459 	 * "interrupts" property.
460 	 */
461 	inpri = NULL;
462 	if ((ddi_getprop(DDI_DEV_T_ANY, child, DDI_PROP_DONTPASS,
463 	    "ignore-hardware-nodes", -1) != -1) || ignore_hardware_nodes) {
464 		/* the old style "interrupts" property... */
465 
466 		/*
467 		 * The list consists of <ipl,vec> elements
468 		 */
469 		if ((n = (*in++ >> 1)) < 1)
470 			return (DDI_FAILURE);
471 
472 		pdptr->par_nintr = n;
473 		size = n * sizeof (struct intrspec);
474 		new = pdptr->par_intr = kmem_zalloc(size, KM_SLEEP);
475 
476 		while (n--) {
477 			int level = *in++;
478 			int vec = *in++;
479 
480 			if (level < 1 || level > MAXIPL ||
481 			    vec < VEC_MIN || vec > VEC_MAX) {
482 				cmn_err(CE_CONT, bad_intr_fmt,
483 				    DEVI(child)->devi_name,
484 				    DEVI(child)->devi_instance, level, vec);
485 				goto broken;
486 			}
487 			new->intrspec_pri = level;
488 			if (vec != 2)
489 				new->intrspec_vec = vec;
490 			else
491 				/*
492 				 * irq 2 on the PC bus is tied to irq 9
493 				 * on ISA, EISA and MicroChannel
494 				 */
495 				new->intrspec_vec = 9;
496 			new++;
497 		}
498 
499 		return (DDI_SUCCESS);
500 	} else {
501 		/* the new style "interrupts" property... */
502 
503 		/*
504 		 * The list consists of <vec> elements
505 		 */
506 		if ((n = (*in++)) < 1)
507 			return (DDI_FAILURE);
508 
509 		pdptr->par_nintr = n;
510 		size = n * sizeof (struct intrspec);
511 		new = pdptr->par_intr = kmem_zalloc(size, KM_SLEEP);
512 
513 		/* XXX check for "interrupt-priorities" property... */
514 		if (ddi_getlongprop(DDI_DEV_T_ANY, child, DDI_PROP_DONTPASS,
515 		    "interrupt-priorities", (caddr_t)&got_prop, &got_len)
516 		    == DDI_PROP_SUCCESS) {
517 			if (n != (got_len / sizeof (int))) {
518 				cmn_err(CE_CONT,
519 				    "bad interrupt-priorities length"
520 				    " from %s%d: expected %d, got %d\n",
521 				    DEVI(child)->devi_name,
522 				    DEVI(child)->devi_instance, n,
523 				    (int)(got_len / sizeof (int)));
524 				goto broken;
525 			}
526 			inpri = (int *)got_prop;
527 		}
528 
529 		while (n--) {
530 			int level;
531 			int vec = *in++;
532 
533 			if (inpri == NULL)
534 				level = 5;
535 			else
536 				level = *inpri++;
537 
538 			if (level < 1 || level > MAXIPL ||
539 			    vec < VEC_MIN || vec > VEC_MAX) {
540 				cmn_err(CE_CONT, bad_intr_fmt,
541 				    DEVI(child)->devi_name,
542 				    DEVI(child)->devi_instance, level, vec);
543 				goto broken;
544 			}
545 			new->intrspec_pri = level;
546 			if (vec != 2)
547 				new->intrspec_vec = vec;
548 			else
549 				/*
550 				 * irq 2 on the PC bus is tied to irq 9
551 				 * on ISA, EISA and MicroChannel
552 				 */
553 				new->intrspec_vec = 9;
554 			new++;
555 		}
556 
557 		if (inpri != NULL)
558 			kmem_free(got_prop, got_len);
559 		return (DDI_SUCCESS);
560 	}
561 
562 broken:
563 	kmem_free(pdptr->par_intr, size);
564 	pdptr->par_intr = NULL;
565 	pdptr->par_nintr = 0;
566 	if (inpri != NULL)
567 		kmem_free(got_prop, got_len);
568 
569 	return (DDI_FAILURE);
570 }
571 
572 /*
573  * Create a ddi_parent_private_data structure from the ddi properties of
574  * the dev_info node.
575  *
576  * The "reg" and either an "intr" or "interrupts" properties are required
577  * if the driver wishes to create mappings or field interrupts on behalf
578  * of the device.
579  *
580  * The "reg" property is assumed to be a list of at least one triple
581  *
582  *	<bustype, address, size>*1
583  *
584  * The "intr" property is assumed to be a list of at least one duple
585  *
586  *	<SPARC ipl, vector#>*1
587  *
588  * The "interrupts" property is assumed to be a list of at least one
589  * n-tuples that describes the interrupt capabilities of the bus the device
590  * is connected to.  For SBus, this looks like
591  *
592  *	<SBus-level>*1
593  *
594  * (This property obsoletes the 'intr' property).
595  *
596  * The "ranges" property is optional.
597  */
598 void
599 make_ddi_ppd(dev_info_t *child, struct ddi_parent_private_data **ppd)
600 {
601 	struct ddi_parent_private_data *pdptr;
602 	int n;
603 	int *reg_prop, *rng_prop, *intr_prop, *irupts_prop;
604 	uint_t reg_len, rng_len, intr_len, irupts_len;
605 
606 	*ppd = pdptr = kmem_zalloc(sizeof (*pdptr), KM_SLEEP);
607 
608 	/*
609 	 * Handle the 'reg' property.
610 	 */
611 	if ((get_prop_int_array(child, "reg", &reg_prop, &reg_len) ==
612 	    DDI_PROP_SUCCESS) && (reg_len != 0)) {
613 		pdptr->par_nreg = reg_len / (int)sizeof (struct regspec);
614 		pdptr->par_reg = (struct regspec *)reg_prop;
615 	}
616 
617 	/*
618 	 * See if I have a range (adding one where needed - this
619 	 * means to add one for sbus node in sun4c, when romvec > 0,
620 	 * if no range is already defined in the PROM node.
621 	 * (Currently no sun4c PROMS define range properties,
622 	 * but they should and may in the future.)  For the SBus
623 	 * node, the range is defined by the SBus reg property.
624 	 */
625 	if (get_prop_int_array(child, "ranges", &rng_prop, &rng_len)
626 	    == DDI_PROP_SUCCESS) {
627 		pdptr->par_nrng = rng_len / (int)(sizeof (struct rangespec));
628 		pdptr->par_rng = (struct rangespec *)rng_prop;
629 	}
630 
631 	/*
632 	 * Handle the 'intr' and 'interrupts' properties
633 	 */
634 
635 	/*
636 	 * For backwards compatibility
637 	 * we first look for the 'intr' property for the device.
638 	 */
639 	if (get_prop_int_array(child, "intr", &intr_prop, &intr_len)
640 	    != DDI_PROP_SUCCESS) {
641 		intr_len = 0;
642 	}
643 
644 	/*
645 	 * If we're to support bus adapters and future platforms cleanly,
646 	 * we need to support the generalized 'interrupts' property.
647 	 */
648 	if (get_prop_int_array(child, "interrupts", &irupts_prop,
649 	    &irupts_len) != DDI_PROP_SUCCESS) {
650 		irupts_len = 0;
651 	} else if (intr_len != 0) {
652 		/*
653 		 * If both 'intr' and 'interrupts' are defined,
654 		 * then 'interrupts' wins and we toss the 'intr' away.
655 		 */
656 		ddi_prop_free((void *)intr_prop);
657 		intr_len = 0;
658 	}
659 
660 	if (intr_len != 0) {
661 
662 		/*
663 		 * Translate the 'intr' property into an array
664 		 * an array of struct intrspec's.  There's not really
665 		 * very much to do here except copy what's out there.
666 		 */
667 
668 		struct intrspec *new;
669 		struct prop_ispec *l;
670 
671 		n = pdptr->par_nintr = intr_len / sizeof (struct prop_ispec);
672 		l = (struct prop_ispec *)intr_prop;
673 		pdptr->par_intr =
674 		    new = kmem_zalloc(n * sizeof (struct intrspec), KM_SLEEP);
675 		while (n--) {
676 			new->intrspec_pri = l->pri;
677 			new->intrspec_vec = l->vec;
678 			new++;
679 			l++;
680 		}
681 		ddi_prop_free((void *)intr_prop);
682 
683 	} else if ((n = irupts_len) != 0) {
684 		size_t size;
685 		int *out;
686 
687 		/*
688 		 * Translate the 'interrupts' property into an array
689 		 * of intrspecs for the rest of the DDI framework to
690 		 * toy with.  Only our ancestors really know how to
691 		 * do this, so ask 'em.  We massage the 'interrupts'
692 		 * property so that it is pre-pended by a count of
693 		 * the number of integers in the argument.
694 		 */
695 		size = sizeof (int) + n;
696 		out = kmem_alloc(size, KM_SLEEP);
697 		*out = n / sizeof (int);
698 		bcopy(irupts_prop, out + 1, (size_t)n);
699 		ddi_prop_free((void *)irupts_prop);
700 		if (impl_xlate_intrs(child, out, pdptr) != DDI_SUCCESS) {
701 			cmn_err(CE_CONT,
702 			    "Unable to translate 'interrupts' for %s%d\n",
703 			    DEVI(child)->devi_binding_name,
704 			    DEVI(child)->devi_instance);
705 		}
706 		kmem_free(out, size);
707 	}
708 }
709 
710 /*
711  * Name a child
712  */
713 static int
714 impl_sunbus_name_child(dev_info_t *child, char *name, int namelen)
715 {
716 	/*
717 	 * Fill in parent-private data and this function returns to us
718 	 * an indication if it used "registers" to fill in the data.
719 	 */
720 	if (ddi_get_parent_data(child) == NULL) {
721 		struct ddi_parent_private_data *pdptr;
722 		make_ddi_ppd(child, &pdptr);
723 		ddi_set_parent_data(child, pdptr);
724 	}
725 
726 	name[0] = '\0';
727 	if (sparc_pd_getnreg(child) > 0) {
728 		(void) snprintf(name, namelen, "%x,%x",
729 		    (uint_t)sparc_pd_getreg(child, 0)->regspec_bustype,
730 		    (uint_t)sparc_pd_getreg(child, 0)->regspec_addr);
731 	}
732 
733 	return (DDI_SUCCESS);
734 }
735 
736 /*
737  * Called from the bus_ctl op of sunbus (sbus, obio, etc) nexus drivers
738  * to implement the DDI_CTLOPS_INITCHILD operation.  That is, it names
739  * the children of sun busses based on the reg spec.
740  *
741  * Handles the following properties (in make_ddi_ppd):
742  *	Property		value
743  *	  Name			type
744  *	reg		register spec
745  *	intr		old-form interrupt spec
746  *	interrupts	new (bus-oriented) interrupt spec
747  *	ranges		range spec
748  */
749 int
750 impl_ddi_sunbus_initchild(dev_info_t *child)
751 {
752 	char name[MAXNAMELEN];
753 	void impl_ddi_sunbus_removechild(dev_info_t *);
754 
755 	/*
756 	 * Name the child, also makes parent private data
757 	 */
758 	(void) impl_sunbus_name_child(child, name, MAXNAMELEN);
759 	ddi_set_name_addr(child, name);
760 
761 	/*
762 	 * Attempt to merge a .conf node; if successful, remove the
763 	 * .conf node.
764 	 */
765 	if ((ndi_dev_is_persistent_node(child) == 0) &&
766 	    (ndi_merge_node(child, impl_sunbus_name_child) == DDI_SUCCESS)) {
767 		/*
768 		 * Return failure to remove node
769 		 */
770 		impl_ddi_sunbus_removechild(child);
771 		return (DDI_FAILURE);
772 	}
773 	return (DDI_SUCCESS);
774 }
775 
776 void
777 impl_free_ddi_ppd(dev_info_t *dip)
778 {
779 	struct ddi_parent_private_data *pdptr;
780 	size_t n;
781 
782 	if ((pdptr = ddi_get_parent_data(dip)) == NULL)
783 		return;
784 
785 	if ((n = (size_t)pdptr->par_nintr) != 0)
786 		/*
787 		 * Note that kmem_free is used here (instead of
788 		 * ddi_prop_free) because the contents of the
789 		 * property were placed into a separate buffer and
790 		 * mucked with a bit before being stored in par_intr.
791 		 * The actual return value from the prop lookup
792 		 * was freed with ddi_prop_free previously.
793 		 */
794 		kmem_free(pdptr->par_intr, n * sizeof (struct intrspec));
795 
796 	if ((n = (size_t)pdptr->par_nrng) != 0)
797 		ddi_prop_free((void *)pdptr->par_rng);
798 
799 	if ((n = pdptr->par_nreg) != 0)
800 		ddi_prop_free((void *)pdptr->par_reg);
801 
802 	kmem_free(pdptr, sizeof (*pdptr));
803 	ddi_set_parent_data(dip, NULL);
804 }
805 
806 void
807 impl_ddi_sunbus_removechild(dev_info_t *dip)
808 {
809 	impl_free_ddi_ppd(dip);
810 	ddi_set_name_addr(dip, NULL);
811 	/*
812 	 * Strip the node to properly convert it back to prototype form
813 	 */
814 	impl_rem_dev_props(dip);
815 }
816 
817 /*
818  * DDI Interrupt
819  */
820 
821 /*
822  * turn this on to force isa, eisa, and mca device to ignore the new
823  * hardware nodes in the device tree (normally turned on only for
824  * drivers that need it by setting the property "ignore-hardware-nodes"
825  * in their driver.conf file).
826  *
827  * 7/31/96 -- Turned off globally.  Leaving variable in for the moment
828  *		as safety valve.
829  */
830 int ignore_hardware_nodes = 0;
831 
832 /*
833  * Local data
834  */
835 static struct impl_bus_promops *impl_busp;
836 
837 
838 /*
839  * New DDI interrupt framework
840  */
841 
842 /*
843  * i_ddi_intr_ops:
844  *
845  * This is the interrupt operator function wrapper for the bus function
846  * bus_intr_op.
847  */
848 int
849 i_ddi_intr_ops(dev_info_t *dip, dev_info_t *rdip, ddi_intr_op_t op,
850     ddi_intr_handle_impl_t *hdlp, void * result)
851 {
852 	dev_info_t	*pdip = (dev_info_t *)DEVI(dip)->devi_parent;
853 	int		ret = DDI_FAILURE;
854 
855 	/* request parent to process this interrupt op */
856 	if (NEXUS_HAS_INTR_OP(pdip))
857 		ret = (*(DEVI(pdip)->devi_ops->devo_bus_ops->bus_intr_op))(
858 		    pdip, rdip, op, hdlp, result);
859 	else
860 		cmn_err(CE_WARN, "Failed to process interrupt "
861 		    "for %s%d due to down-rev nexus driver %s%d",
862 		    ddi_get_name(rdip), ddi_get_instance(rdip),
863 		    ddi_get_name(pdip), ddi_get_instance(pdip));
864 	return (ret);
865 }
866 
867 /*
868  * i_ddi_add_softint - allocate and add a soft interrupt to the system
869  */
870 int
871 i_ddi_add_softint(ddi_softint_hdl_impl_t *hdlp)
872 {
873 	int ret;
874 
875 	/* add soft interrupt handler */
876 	ret = add_avsoftintr((void *)hdlp, hdlp->ih_pri, hdlp->ih_cb_func,
877 	    DEVI(hdlp->ih_dip)->devi_name, hdlp->ih_cb_arg1, hdlp->ih_cb_arg2);
878 	return (ret ? DDI_SUCCESS : DDI_FAILURE);
879 }
880 
881 
882 void
883 i_ddi_remove_softint(ddi_softint_hdl_impl_t *hdlp)
884 {
885 	(void) rem_avsoftintr((void *)hdlp, hdlp->ih_pri, hdlp->ih_cb_func);
886 }
887 
888 
889 extern void (*setsoftint)(int, struct av_softinfo *);
890 extern boolean_t av_check_softint_pending(struct av_softinfo *, boolean_t);
891 
892 int
893 i_ddi_trigger_softint(ddi_softint_hdl_impl_t *hdlp, void *arg2)
894 {
895 	if (av_check_softint_pending(hdlp->ih_pending, B_FALSE))
896 		return (DDI_EPENDING);
897 
898 	update_avsoftintr_args((void *)hdlp, hdlp->ih_pri, arg2);
899 
900 	(*setsoftint)(hdlp->ih_pri, hdlp->ih_pending);
901 	return (DDI_SUCCESS);
902 }
903 
904 /*
905  * i_ddi_set_softint_pri:
906  *
907  * The way this works is that it first tries to add a softint vector
908  * at the new priority in hdlp. If that succeeds; then it removes the
909  * existing softint vector at the old priority.
910  */
911 int
912 i_ddi_set_softint_pri(ddi_softint_hdl_impl_t *hdlp, uint_t old_pri)
913 {
914 	int ret;
915 
916 	/*
917 	 * If a softint is pending at the old priority then fail the request.
918 	 */
919 	if (av_check_softint_pending(hdlp->ih_pending, B_TRUE))
920 		return (DDI_FAILURE);
921 
922 	ret = av_softint_movepri((void *)hdlp, old_pri);
923 	return (ret ? DDI_SUCCESS : DDI_FAILURE);
924 }
925 
926 void
927 i_ddi_alloc_intr_phdl(ddi_intr_handle_impl_t *hdlp)
928 {
929 	hdlp->ih_private = (void *)kmem_zalloc(sizeof (ihdl_plat_t), KM_SLEEP);
930 }
931 
932 void
933 i_ddi_free_intr_phdl(ddi_intr_handle_impl_t *hdlp)
934 {
935 	kmem_free(hdlp->ih_private, sizeof (ihdl_plat_t));
936 	hdlp->ih_private = NULL;
937 }
938 
939 int
940 i_ddi_get_intx_nintrs(dev_info_t *dip)
941 {
942 	struct ddi_parent_private_data *pdp;
943 
944 	if ((pdp = ddi_get_parent_data(dip)) == NULL)
945 		return (0);
946 
947 	return (pdp->par_nintr);
948 }
949 
950 /*
951  * DDI Memory/DMA
952  */
953 
954 /*
955  * Support for allocating DMAable memory to implement
956  * ddi_dma_mem_alloc(9F) interface.
957  */
958 
959 #define	KA_ALIGN_SHIFT	7
960 #define	KA_ALIGN	(1 << KA_ALIGN_SHIFT)
961 #define	KA_NCACHE	(PAGESHIFT + 1 - KA_ALIGN_SHIFT)
962 
963 /*
964  * Dummy DMA attribute template for kmem_io[].kmem_io_attr.  We only
965  * care about addr_lo, addr_hi, and align.  addr_hi will be dynamically set.
966  */
967 
968 static ddi_dma_attr_t kmem_io_attr = {
969 	DMA_ATTR_V0,
970 	0x0000000000000000ULL,		/* dma_attr_addr_lo */
971 	0x0000000000000000ULL,		/* dma_attr_addr_hi */
972 	0x00ffffff,
973 	0x1000,				/* dma_attr_align */
974 	1, 1, 0xffffffffULL, 0xffffffffULL, 0x1, 1, 0
975 };
976 
977 /* kmem io memory ranges and indices */
978 enum {
979 	IO_4P, IO_64G, IO_4G, IO_2G, IO_1G, IO_512M,
980 	IO_256M, IO_128M, IO_64M, IO_32M, IO_16M, MAX_MEM_RANGES
981 };
982 
983 static struct {
984 	vmem_t		*kmem_io_arena;
985 	kmem_cache_t	*kmem_io_cache[KA_NCACHE];
986 	ddi_dma_attr_t	kmem_io_attr;
987 } kmem_io[MAX_MEM_RANGES];
988 
989 static int kmem_io_idx;		/* index of first populated kmem_io[] */
990 
991 static page_t *
992 page_create_io_wrapper(void *addr, size_t len, int vmflag, void *arg)
993 {
994 	extern page_t *page_create_io(vnode_t *, u_offset_t, uint_t,
995 	    uint_t, struct as *, caddr_t, ddi_dma_attr_t *);
996 
997 	return (page_create_io(&kvp, (u_offset_t)(uintptr_t)addr, len,
998 	    PG_EXCL | ((vmflag & VM_NOSLEEP) ? 0 : PG_WAIT), &kas, addr, arg));
999 }
1000 
1001 #ifdef __xpv
1002 static void
1003 segkmem_free_io(vmem_t *vmp, void * ptr, size_t size)
1004 {
1005 	extern void page_destroy_io(page_t *);
1006 	segkmem_xfree(vmp, ptr, size, page_destroy_io);
1007 }
1008 #endif
1009 
1010 static void *
1011 segkmem_alloc_io_4P(vmem_t *vmp, size_t size, int vmflag)
1012 {
1013 	return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
1014 	    page_create_io_wrapper, &kmem_io[IO_4P].kmem_io_attr));
1015 }
1016 
1017 static void *
1018 segkmem_alloc_io_64G(vmem_t *vmp, size_t size, int vmflag)
1019 {
1020 	return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
1021 	    page_create_io_wrapper, &kmem_io[IO_64G].kmem_io_attr));
1022 }
1023 
1024 static void *
1025 segkmem_alloc_io_4G(vmem_t *vmp, size_t size, int vmflag)
1026 {
1027 	return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
1028 	    page_create_io_wrapper, &kmem_io[IO_4G].kmem_io_attr));
1029 }
1030 
1031 static void *
1032 segkmem_alloc_io_2G(vmem_t *vmp, size_t size, int vmflag)
1033 {
1034 	return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
1035 	    page_create_io_wrapper, &kmem_io[IO_2G].kmem_io_attr));
1036 }
1037 
1038 static void *
1039 segkmem_alloc_io_1G(vmem_t *vmp, size_t size, int vmflag)
1040 {
1041 	return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
1042 	    page_create_io_wrapper, &kmem_io[IO_1G].kmem_io_attr));
1043 }
1044 
1045 static void *
1046 segkmem_alloc_io_512M(vmem_t *vmp, size_t size, int vmflag)
1047 {
1048 	return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
1049 	    page_create_io_wrapper, &kmem_io[IO_512M].kmem_io_attr));
1050 }
1051 
1052 static void *
1053 segkmem_alloc_io_256M(vmem_t *vmp, size_t size, int vmflag)
1054 {
1055 	return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
1056 	    page_create_io_wrapper, &kmem_io[IO_256M].kmem_io_attr));
1057 }
1058 
1059 static void *
1060 segkmem_alloc_io_128M(vmem_t *vmp, size_t size, int vmflag)
1061 {
1062 	return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
1063 	    page_create_io_wrapper, &kmem_io[IO_128M].kmem_io_attr));
1064 }
1065 
1066 static void *
1067 segkmem_alloc_io_64M(vmem_t *vmp, size_t size, int vmflag)
1068 {
1069 	return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
1070 	    page_create_io_wrapper, &kmem_io[IO_64M].kmem_io_attr));
1071 }
1072 
1073 static void *
1074 segkmem_alloc_io_32M(vmem_t *vmp, size_t size, int vmflag)
1075 {
1076 	return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
1077 	    page_create_io_wrapper, &kmem_io[IO_32M].kmem_io_attr));
1078 }
1079 
1080 static void *
1081 segkmem_alloc_io_16M(vmem_t *vmp, size_t size, int vmflag)
1082 {
1083 	return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
1084 	    page_create_io_wrapper, &kmem_io[IO_16M].kmem_io_attr));
1085 }
1086 
1087 struct {
1088 	uint64_t	io_limit;
1089 	char		*io_name;
1090 	void		*(*io_alloc)(vmem_t *, size_t, int);
1091 	int		io_initial;	/* kmem_io_init during startup */
1092 } io_arena_params[MAX_MEM_RANGES] = {
1093 	{0x000fffffffffffffULL,	"kmem_io_4P",	segkmem_alloc_io_4P,	1},
1094 	{0x0000000fffffffffULL,	"kmem_io_64G",	segkmem_alloc_io_64G,	0},
1095 	{0x00000000ffffffffULL,	"kmem_io_4G",	segkmem_alloc_io_4G,	1},
1096 	{0x000000007fffffffULL,	"kmem_io_2G",	segkmem_alloc_io_2G,	1},
1097 	{0x000000003fffffffULL,	"kmem_io_1G",	segkmem_alloc_io_1G,	0},
1098 	{0x000000001fffffffULL,	"kmem_io_512M",	segkmem_alloc_io_512M,	0},
1099 	{0x000000000fffffffULL,	"kmem_io_256M",	segkmem_alloc_io_256M,	0},
1100 	{0x0000000007ffffffULL,	"kmem_io_128M",	segkmem_alloc_io_128M,	0},
1101 	{0x0000000003ffffffULL,	"kmem_io_64M",	segkmem_alloc_io_64M,	0},
1102 	{0x0000000001ffffffULL,	"kmem_io_32M",	segkmem_alloc_io_32M,	0},
1103 	{0x0000000000ffffffULL,	"kmem_io_16M",	segkmem_alloc_io_16M,	1}
1104 };
1105 
1106 void
1107 kmem_io_init(int a)
1108 {
1109 	int	c;
1110 	char name[40];
1111 
1112 	kmem_io[a].kmem_io_arena = vmem_create(io_arena_params[a].io_name,
1113 	    NULL, 0, PAGESIZE, io_arena_params[a].io_alloc,
1114 #ifdef __xpv
1115 	    segkmem_free_io,
1116 #else
1117 	    segkmem_free,
1118 #endif
1119 	    heap_arena, 0, VM_SLEEP);
1120 
1121 	for (c = 0; c < KA_NCACHE; c++) {
1122 		size_t size = KA_ALIGN << c;
1123 		(void) sprintf(name, "%s_%lu",
1124 		    io_arena_params[a].io_name, size);
1125 		kmem_io[a].kmem_io_cache[c] = kmem_cache_create(name,
1126 		    size, size, NULL, NULL, NULL, NULL,
1127 		    kmem_io[a].kmem_io_arena, 0);
1128 	}
1129 }
1130 
1131 /*
1132  * Return the index of the highest memory range for addr.
1133  */
1134 static int
1135 kmem_io_index(uint64_t addr)
1136 {
1137 	int n;
1138 
1139 	for (n = kmem_io_idx; n < MAX_MEM_RANGES; n++) {
1140 		if (kmem_io[n].kmem_io_attr.dma_attr_addr_hi <= addr) {
1141 			if (kmem_io[n].kmem_io_arena == NULL)
1142 				kmem_io_init(n);
1143 			return (n);
1144 		}
1145 	}
1146 	panic("kmem_io_index: invalid addr - must be at least 16m");
1147 
1148 	/*NOTREACHED*/
1149 }
1150 
1151 /*
1152  * Return the index of the next kmem_io populated memory range
1153  * after curindex.
1154  */
1155 static int
1156 kmem_io_index_next(int curindex)
1157 {
1158 	int n;
1159 
1160 	for (n = curindex + 1; n < MAX_MEM_RANGES; n++) {
1161 		if (kmem_io[n].kmem_io_arena)
1162 			return (n);
1163 	}
1164 	return (-1);
1165 }
1166 
1167 /*
1168  * allow kmem to be mapped in with different PTE cache attribute settings.
1169  * Used by i_ddi_mem_alloc()
1170  */
1171 int
1172 kmem_override_cache_attrs(caddr_t kva, size_t size, uint_t order)
1173 {
1174 	uint_t hat_flags;
1175 	caddr_t kva_end;
1176 	uint_t hat_attr;
1177 	pfn_t pfn;
1178 
1179 	if (hat_getattr(kas.a_hat, kva, &hat_attr) == -1) {
1180 		return (-1);
1181 	}
1182 
1183 	hat_attr &= ~HAT_ORDER_MASK;
1184 	hat_attr |= order | HAT_NOSYNC;
1185 	hat_flags = HAT_LOAD_LOCK;
1186 
1187 	kva_end = (caddr_t)(((uintptr_t)kva + size + PAGEOFFSET) &
1188 	    (uintptr_t)PAGEMASK);
1189 	kva = (caddr_t)((uintptr_t)kva & (uintptr_t)PAGEMASK);
1190 
1191 	while (kva < kva_end) {
1192 		pfn = hat_getpfnum(kas.a_hat, kva);
1193 		hat_unload(kas.a_hat, kva, PAGESIZE, HAT_UNLOAD_UNLOCK);
1194 		hat_devload(kas.a_hat, kva, PAGESIZE, pfn, hat_attr, hat_flags);
1195 		kva += MMU_PAGESIZE;
1196 	}
1197 
1198 	return (0);
1199 }
1200 
1201 void
1202 ka_init(void)
1203 {
1204 	int a;
1205 	paddr_t maxphysaddr;
1206 #if !defined(__xpv)
1207 	extern pfn_t physmax;
1208 
1209 	maxphysaddr = mmu_ptob((paddr_t)physmax) + MMU_PAGEOFFSET;
1210 #else
1211 	maxphysaddr = mmu_ptob((paddr_t)HYPERVISOR_memory_op(
1212 	    XENMEM_maximum_ram_page, NULL)) + MMU_PAGEOFFSET;
1213 #endif
1214 
1215 	ASSERT(maxphysaddr <= io_arena_params[0].io_limit);
1216 
1217 	for (a = 0; a < MAX_MEM_RANGES; a++) {
1218 		if (maxphysaddr >= io_arena_params[a + 1].io_limit) {
1219 			if (maxphysaddr > io_arena_params[a + 1].io_limit)
1220 				io_arena_params[a].io_limit = maxphysaddr;
1221 			else
1222 				a++;
1223 			break;
1224 		}
1225 	}
1226 	kmem_io_idx = a;
1227 
1228 	for (; a < MAX_MEM_RANGES; a++) {
1229 		kmem_io[a].kmem_io_attr = kmem_io_attr;
1230 		kmem_io[a].kmem_io_attr.dma_attr_addr_hi =
1231 		    io_arena_params[a].io_limit;
1232 		/*
1233 		 * initialize kmem_io[] arena/cache corresponding to
1234 		 * maxphysaddr and to the "common" io memory ranges that
1235 		 * have io_initial set to a non-zero value.
1236 		 */
1237 		if (io_arena_params[a].io_initial || a == kmem_io_idx)
1238 			kmem_io_init(a);
1239 	}
1240 }
1241 
1242 /*
1243  * put contig address/size
1244  */
1245 static void *
1246 putctgas(void *addr, size_t size)
1247 {
1248 	struct ctgas	*ctgp = &ctglist;
1249 	int		i;
1250 
1251 	CTGLOCK();
1252 	do {
1253 		if ((i = ctgp->ctg_index) < CTGENTRIES) {
1254 			ctgp->ctg_addr[i] = addr;
1255 			ctgp->ctg_size[i] = size;
1256 			ctgp->ctg_index++;
1257 			break;
1258 		}
1259 		if (!ctgp->ctg_next)
1260 			ctgp->ctg_next = kmem_zalloc(sizeof (struct ctgas),
1261 			    KM_NOSLEEP);
1262 		ctgp = ctgp->ctg_next;
1263 	} while (ctgp);
1264 
1265 	CTGUNLOCK();
1266 	return (ctgp);
1267 }
1268 
1269 /*
1270  * get contig size by addr
1271  */
1272 static size_t
1273 getctgsz(void *addr)
1274 {
1275 	struct ctgas	*ctgp = &ctglist;
1276 	int		i, j;
1277 	size_t		sz;
1278 
1279 	ASSERT(addr);
1280 	CTGLOCK();
1281 
1282 	while (ctgp) {
1283 		for (i = 0; i < ctgp->ctg_index; i++) {
1284 			if (addr != ctgp->ctg_addr[i])
1285 				continue;
1286 
1287 			sz = ctgp->ctg_size[i];
1288 			j = --ctgp->ctg_index;
1289 			if (i != j) {
1290 				ctgp->ctg_size[i] = ctgp->ctg_size[j];
1291 				ctgp->ctg_addr[i] = ctgp->ctg_addr[j];
1292 			}
1293 			CTGUNLOCK();
1294 			return (sz);
1295 		}
1296 		ctgp = ctgp->ctg_next;
1297 	}
1298 
1299 	CTGUNLOCK();
1300 	return (0);
1301 }
1302 
1303 /*
1304  * contig_alloc:
1305  *
1306  *	allocates contiguous memory to satisfy the 'size' and dma attributes
1307  *	specified in 'attr'.
1308  *
1309  *	Not all of memory need to be physically contiguous if the
1310  *	scatter-gather list length is greater than 1.
1311  */
1312 
1313 /*ARGSUSED*/
1314 void *
1315 contig_alloc(size_t size, ddi_dma_attr_t *attr, uintptr_t align, int cansleep)
1316 {
1317 	pgcnt_t		pgcnt = btopr(size);
1318 	size_t		asize = pgcnt * PAGESIZE;
1319 	page_t		*ppl;
1320 	int		pflag;
1321 	void		*addr;
1322 
1323 	extern page_t *page_create_io(vnode_t *, u_offset_t, uint_t,
1324 	    uint_t, struct as *, caddr_t, ddi_dma_attr_t *);
1325 
1326 	/* segkmem_xalloc */
1327 
1328 	if (align <= PAGESIZE)
1329 		addr = vmem_alloc(heap_arena, asize,
1330 		    (cansleep) ? VM_SLEEP : VM_NOSLEEP);
1331 	else
1332 		addr = vmem_xalloc(heap_arena, asize, align, 0, 0, NULL, NULL,
1333 		    (cansleep) ? VM_SLEEP : VM_NOSLEEP);
1334 	if (addr) {
1335 		ASSERT(!((uintptr_t)addr & (align - 1)));
1336 
1337 		if (page_resv(pgcnt, (cansleep) ? KM_SLEEP : KM_NOSLEEP) == 0) {
1338 			vmem_free(heap_arena, addr, asize);
1339 			return (NULL);
1340 		}
1341 		pflag = PG_EXCL;
1342 
1343 		if (cansleep)
1344 			pflag |= PG_WAIT;
1345 
1346 		/* 4k req gets from freelists rather than pfn search */
1347 		if (pgcnt > 1 || align > PAGESIZE)
1348 			pflag |= PG_PHYSCONTIG;
1349 
1350 		ppl = page_create_io(&kvp, (u_offset_t)(uintptr_t)addr,
1351 		    asize, pflag, &kas, (caddr_t)addr, attr);
1352 
1353 		if (!ppl) {
1354 			vmem_free(heap_arena, addr, asize);
1355 			page_unresv(pgcnt);
1356 			return (NULL);
1357 		}
1358 
1359 		while (ppl != NULL) {
1360 			page_t	*pp = ppl;
1361 			page_sub(&ppl, pp);
1362 			ASSERT(page_iolock_assert(pp));
1363 			page_io_unlock(pp);
1364 			page_downgrade(pp);
1365 			hat_memload(kas.a_hat, (caddr_t)(uintptr_t)pp->p_offset,
1366 			    pp, (PROT_ALL & ~PROT_USER) |
1367 			    HAT_NOSYNC, HAT_LOAD_LOCK);
1368 		}
1369 	}
1370 	return (addr);
1371 }
1372 
1373 void
1374 contig_free(void *addr, size_t size)
1375 {
1376 	pgcnt_t	pgcnt = btopr(size);
1377 	size_t	asize = pgcnt * PAGESIZE;
1378 	caddr_t	a, ea;
1379 	page_t	*pp;
1380 
1381 	hat_unload(kas.a_hat, addr, asize, HAT_UNLOAD_UNLOCK);
1382 
1383 	for (a = addr, ea = a + asize; a < ea; a += PAGESIZE) {
1384 		pp = page_find(&kvp, (u_offset_t)(uintptr_t)a);
1385 		if (!pp)
1386 			panic("contig_free: contig pp not found");
1387 
1388 		if (!page_tryupgrade(pp)) {
1389 			page_unlock(pp);
1390 			pp = page_lookup(&kvp,
1391 			    (u_offset_t)(uintptr_t)a, SE_EXCL);
1392 			if (pp == NULL)
1393 				panic("contig_free: page freed");
1394 		}
1395 		page_destroy(pp, 0);
1396 	}
1397 
1398 	page_unresv(pgcnt);
1399 	vmem_free(heap_arena, addr, asize);
1400 }
1401 
1402 /*
1403  * Allocate from the system, aligned on a specific boundary.
1404  * The alignment, if non-zero, must be a power of 2.
1405  */
1406 static void *
1407 kalloca(size_t size, size_t align, int cansleep, int physcontig,
1408 	ddi_dma_attr_t *attr)
1409 {
1410 	size_t *addr, *raddr, rsize;
1411 	size_t hdrsize = 4 * sizeof (size_t);	/* must be power of 2 */
1412 	int a, i, c;
1413 	vmem_t *vmp;
1414 	kmem_cache_t *cp = NULL;
1415 
1416 	if (attr->dma_attr_addr_lo > mmu_ptob((uint64_t)ddiphysmin))
1417 		return (NULL);
1418 
1419 	align = MAX(align, hdrsize);
1420 	ASSERT((align & (align - 1)) == 0);
1421 
1422 	/*
1423 	 * All of our allocators guarantee 16-byte alignment, so we don't
1424 	 * need to reserve additional space for the header.
1425 	 * To simplify picking the correct kmem_io_cache, we round up to
1426 	 * a multiple of KA_ALIGN.
1427 	 */
1428 	rsize = P2ROUNDUP_TYPED(size + align, KA_ALIGN, size_t);
1429 
1430 	if (physcontig && rsize > PAGESIZE) {
1431 		if (addr = contig_alloc(size, attr, align, cansleep)) {
1432 			if (!putctgas(addr, size))
1433 				contig_free(addr, size);
1434 			else
1435 				return (addr);
1436 		}
1437 		return (NULL);
1438 	}
1439 
1440 	a = kmem_io_index(attr->dma_attr_addr_hi);
1441 
1442 	if (rsize > PAGESIZE) {
1443 		vmp = kmem_io[a].kmem_io_arena;
1444 		raddr = vmem_alloc(vmp, rsize,
1445 		    (cansleep) ? VM_SLEEP : VM_NOSLEEP);
1446 	} else {
1447 		c = highbit((rsize >> KA_ALIGN_SHIFT) - 1);
1448 		cp = kmem_io[a].kmem_io_cache[c];
1449 		raddr = kmem_cache_alloc(cp, (cansleep) ? KM_SLEEP :
1450 		    KM_NOSLEEP);
1451 	}
1452 
1453 	if (raddr == NULL) {
1454 		int	na;
1455 
1456 		ASSERT(cansleep == 0);
1457 		if (rsize > PAGESIZE)
1458 			return (NULL);
1459 		/*
1460 		 * System does not have memory in the requested range.
1461 		 * Try smaller kmem io ranges and larger cache sizes
1462 		 * to see if there might be memory available in
1463 		 * these other caches.
1464 		 */
1465 
1466 		for (na = kmem_io_index_next(a); na >= 0;
1467 		    na = kmem_io_index_next(na)) {
1468 			ASSERT(kmem_io[na].kmem_io_arena);
1469 			cp = kmem_io[na].kmem_io_cache[c];
1470 			raddr = kmem_cache_alloc(cp, KM_NOSLEEP);
1471 			if (raddr)
1472 				goto kallocdone;
1473 		}
1474 		/* now try the larger kmem io cache sizes */
1475 		for (na = a; na >= 0; na = kmem_io_index_next(na)) {
1476 			for (i = c + 1; i < KA_NCACHE; i++) {
1477 				cp = kmem_io[na].kmem_io_cache[i];
1478 				raddr = kmem_cache_alloc(cp, KM_NOSLEEP);
1479 				if (raddr)
1480 					goto kallocdone;
1481 			}
1482 		}
1483 		return (NULL);
1484 	}
1485 
1486 kallocdone:
1487 	ASSERT(!P2BOUNDARY((uintptr_t)raddr, rsize, PAGESIZE) ||
1488 	    rsize > PAGESIZE);
1489 
1490 	addr = (size_t *)P2ROUNDUP((uintptr_t)raddr + hdrsize, align);
1491 	ASSERT((uintptr_t)addr + size - (uintptr_t)raddr <= rsize);
1492 
1493 	addr[-4] = (size_t)cp;
1494 	addr[-3] = (size_t)vmp;
1495 	addr[-2] = (size_t)raddr;
1496 	addr[-1] = rsize;
1497 
1498 	return (addr);
1499 }
1500 
1501 static void
1502 kfreea(void *addr)
1503 {
1504 	size_t		size;
1505 
1506 	if (!((uintptr_t)addr & PAGEOFFSET) && (size = getctgsz(addr))) {
1507 		contig_free(addr, size);
1508 	} else {
1509 		size_t	*saddr = addr;
1510 		if (saddr[-4] == 0)
1511 			vmem_free((vmem_t *)saddr[-3], (void *)saddr[-2],
1512 			    saddr[-1]);
1513 		else
1514 			kmem_cache_free((kmem_cache_t *)saddr[-4],
1515 			    (void *)saddr[-2]);
1516 	}
1517 }
1518 
1519 /*ARGSUSED*/
1520 void
1521 i_ddi_devacc_to_hatacc(ddi_device_acc_attr_t *devaccp, uint_t *hataccp)
1522 {
1523 }
1524 
1525 /*
1526  * Check if the specified cache attribute is supported on the platform.
1527  * This function must be called before i_ddi_cacheattr_to_hatacc().
1528  */
1529 boolean_t
1530 i_ddi_check_cache_attr(uint_t flags)
1531 {
1532 	/*
1533 	 * The cache attributes are mutually exclusive. Any combination of
1534 	 * the attributes leads to a failure.
1535 	 */
1536 	uint_t cache_attr = IOMEM_CACHE_ATTR(flags);
1537 	if ((cache_attr != 0) && ((cache_attr & (cache_attr - 1)) != 0))
1538 		return (B_FALSE);
1539 
1540 	/* All cache attributes are supported on X86/X64 */
1541 	if (cache_attr & (IOMEM_DATA_UNCACHED | IOMEM_DATA_CACHED |
1542 	    IOMEM_DATA_UC_WR_COMBINE))
1543 		return (B_TRUE);
1544 
1545 	/* undefined attributes */
1546 	return (B_FALSE);
1547 }
1548 
1549 /* set HAT cache attributes from the cache attributes */
1550 void
1551 i_ddi_cacheattr_to_hatacc(uint_t flags, uint_t *hataccp)
1552 {
1553 	uint_t cache_attr = IOMEM_CACHE_ATTR(flags);
1554 	static char *fname = "i_ddi_cacheattr_to_hatacc";
1555 
1556 	/*
1557 	 * If write-combining is not supported, then it falls back
1558 	 * to uncacheable.
1559 	 */
1560 	if (cache_attr == IOMEM_DATA_UC_WR_COMBINE &&
1561 	    !is_x86_feature(x86_featureset, X86FSET_PAT))
1562 		cache_attr = IOMEM_DATA_UNCACHED;
1563 
1564 	/*
1565 	 * set HAT attrs according to the cache attrs.
1566 	 */
1567 	switch (cache_attr) {
1568 	case IOMEM_DATA_UNCACHED:
1569 		*hataccp &= ~HAT_ORDER_MASK;
1570 		*hataccp |= (HAT_STRICTORDER | HAT_PLAT_NOCACHE);
1571 		break;
1572 	case IOMEM_DATA_UC_WR_COMBINE:
1573 		*hataccp &= ~HAT_ORDER_MASK;
1574 		*hataccp |= (HAT_MERGING_OK | HAT_PLAT_NOCACHE);
1575 		break;
1576 	case IOMEM_DATA_CACHED:
1577 		*hataccp &= ~HAT_ORDER_MASK;
1578 		*hataccp |= HAT_UNORDERED_OK;
1579 		break;
1580 	/*
1581 	 * This case must not occur because the cache attribute is scrutinized
1582 	 * before this function is called.
1583 	 */
1584 	default:
1585 		/*
1586 		 * set cacheable to hat attrs.
1587 		 */
1588 		*hataccp &= ~HAT_ORDER_MASK;
1589 		*hataccp |= HAT_UNORDERED_OK;
1590 		cmn_err(CE_WARN, "%s: cache_attr=0x%x is ignored.",
1591 		    fname, cache_attr);
1592 	}
1593 }
1594 
1595 /*
1596  * This should actually be called i_ddi_dma_mem_alloc. There should
1597  * also be an i_ddi_pio_mem_alloc. i_ddi_dma_mem_alloc should call
1598  * through the device tree with the DDI_CTLOPS_DMA_ALIGN ctl ops to
1599  * get alignment requirements for DMA memory. i_ddi_pio_mem_alloc
1600  * should use DDI_CTLOPS_PIO_ALIGN. Since we only have i_ddi_mem_alloc
1601  * so far which is used for both, DMA and PIO, we have to use the DMA
1602  * ctl ops to make everybody happy.
1603  */
1604 /*ARGSUSED*/
1605 int
1606 i_ddi_mem_alloc(dev_info_t *dip, ddi_dma_attr_t *attr,
1607 	size_t length, int cansleep, int flags,
1608 	ddi_device_acc_attr_t *accattrp, caddr_t *kaddrp,
1609 	size_t *real_length, ddi_acc_hdl_t *ap)
1610 {
1611 	caddr_t a;
1612 	int iomin;
1613 	ddi_acc_impl_t *iap;
1614 	int physcontig = 0;
1615 	pgcnt_t npages;
1616 	pgcnt_t minctg;
1617 	uint_t order;
1618 	int e;
1619 
1620 	/*
1621 	 * Check legality of arguments
1622 	 */
1623 	if (length == 0 || kaddrp == NULL || attr == NULL) {
1624 		return (DDI_FAILURE);
1625 	}
1626 
1627 	if (attr->dma_attr_minxfer == 0 || attr->dma_attr_align == 0 ||
1628 	    (attr->dma_attr_align & (attr->dma_attr_align - 1)) ||
1629 	    (attr->dma_attr_minxfer & (attr->dma_attr_minxfer - 1))) {
1630 			return (DDI_FAILURE);
1631 	}
1632 
1633 	/*
1634 	 * figure out most restrictive alignment requirement
1635 	 */
1636 	iomin = attr->dma_attr_minxfer;
1637 	iomin = maxbit(iomin, attr->dma_attr_align);
1638 	if (iomin == 0)
1639 		return (DDI_FAILURE);
1640 
1641 	ASSERT((iomin & (iomin - 1)) == 0);
1642 
1643 	/*
1644 	 * if we allocate memory with IOMEM_DATA_UNCACHED or
1645 	 * IOMEM_DATA_UC_WR_COMBINE, make sure we allocate a page aligned
1646 	 * memory that ends on a page boundry.
1647 	 * Don't want to have to different cache mappings to the same
1648 	 * physical page.
1649 	 */
1650 	if (OVERRIDE_CACHE_ATTR(flags)) {
1651 		iomin = (iomin + MMU_PAGEOFFSET) & MMU_PAGEMASK;
1652 		length = (length + MMU_PAGEOFFSET) & (size_t)MMU_PAGEMASK;
1653 	}
1654 
1655 	/*
1656 	 * Determine if we need to satisfy the request for physically
1657 	 * contiguous memory or alignments larger than pagesize.
1658 	 */
1659 	npages = btopr(length + attr->dma_attr_align);
1660 	minctg = howmany(npages, attr->dma_attr_sgllen);
1661 
1662 	if (minctg > 1) {
1663 		uint64_t pfnseg = attr->dma_attr_seg >> PAGESHIFT;
1664 		/*
1665 		 * verify that the minimum contig requirement for the
1666 		 * actual length does not cross segment boundary.
1667 		 */
1668 		length = P2ROUNDUP_TYPED(length, attr->dma_attr_minxfer,
1669 		    size_t);
1670 		npages = btopr(length);
1671 		minctg = howmany(npages, attr->dma_attr_sgllen);
1672 		if (minctg > pfnseg + 1)
1673 			return (DDI_FAILURE);
1674 		physcontig = 1;
1675 	} else {
1676 		length = P2ROUNDUP_TYPED(length, iomin, size_t);
1677 	}
1678 
1679 	/*
1680 	 * Allocate the requested amount from the system.
1681 	 */
1682 	a = kalloca(length, iomin, cansleep, physcontig, attr);
1683 
1684 	if ((*kaddrp = a) == NULL)
1685 		return (DDI_FAILURE);
1686 
1687 	/*
1688 	 * if we to modify the cache attributes, go back and muck with the
1689 	 * mappings.
1690 	 */
1691 	if (OVERRIDE_CACHE_ATTR(flags)) {
1692 		order = 0;
1693 		i_ddi_cacheattr_to_hatacc(flags, &order);
1694 		e = kmem_override_cache_attrs(a, length, order);
1695 		if (e != 0) {
1696 			kfreea(a);
1697 			return (DDI_FAILURE);
1698 		}
1699 	}
1700 
1701 	if (real_length) {
1702 		*real_length = length;
1703 	}
1704 	if (ap) {
1705 		/*
1706 		 * initialize access handle
1707 		 */
1708 		iap = (ddi_acc_impl_t *)ap->ah_platform_private;
1709 		iap->ahi_acc_attr |= DDI_ACCATTR_CPU_VADDR;
1710 		impl_acc_hdl_init(ap);
1711 	}
1712 
1713 	return (DDI_SUCCESS);
1714 }
1715 
1716 /* ARGSUSED */
1717 void
1718 i_ddi_mem_free(caddr_t kaddr, ddi_acc_hdl_t *ap)
1719 {
1720 	if (ap != NULL) {
1721 		/*
1722 		 * if we modified the cache attributes on alloc, go back and
1723 		 * fix them since this memory could be returned to the
1724 		 * general pool.
1725 		 */
1726 		if (OVERRIDE_CACHE_ATTR(ap->ah_xfermodes)) {
1727 			uint_t order = 0;
1728 			int e;
1729 			i_ddi_cacheattr_to_hatacc(IOMEM_DATA_CACHED, &order);
1730 			e = kmem_override_cache_attrs(kaddr, ap->ah_len, order);
1731 			if (e != 0) {
1732 				cmn_err(CE_WARN, "i_ddi_mem_free() failed to "
1733 				    "override cache attrs, memory leaked\n");
1734 				return;
1735 			}
1736 		}
1737 	}
1738 	kfreea(kaddr);
1739 }
1740 
1741 /*
1742  * Access Barriers
1743  *
1744  */
1745 /*ARGSUSED*/
1746 int
1747 i_ddi_ontrap(ddi_acc_handle_t hp)
1748 {
1749 	return (DDI_FAILURE);
1750 }
1751 
1752 /*ARGSUSED*/
1753 void
1754 i_ddi_notrap(ddi_acc_handle_t hp)
1755 {
1756 }
1757 
1758 
1759 /*
1760  * Misc Functions
1761  */
1762 
1763 /*
1764  * Implementation instance override functions
1765  *
1766  * No override on i86pc
1767  */
1768 /*ARGSUSED*/
1769 uint_t
1770 impl_assign_instance(dev_info_t *dip)
1771 {
1772 	return ((uint_t)-1);
1773 }
1774 
1775 /*ARGSUSED*/
1776 int
1777 impl_keep_instance(dev_info_t *dip)
1778 {
1779 
1780 #if defined(__xpv)
1781 	/*
1782 	 * Do not persist instance numbers assigned to devices in dom0
1783 	 */
1784 	dev_info_t *pdip;
1785 	if (DOMAIN_IS_INITDOMAIN(xen_info)) {
1786 		if (((pdip = ddi_get_parent(dip)) != NULL) &&
1787 		    (strcmp(ddi_get_name(pdip), "xpvd") == 0))
1788 			return (DDI_SUCCESS);
1789 	}
1790 #endif
1791 	return (DDI_FAILURE);
1792 }
1793 
1794 /*ARGSUSED*/
1795 int
1796 impl_free_instance(dev_info_t *dip)
1797 {
1798 	return (DDI_FAILURE);
1799 }
1800 
1801 /*ARGSUSED*/
1802 int
1803 impl_check_cpu(dev_info_t *devi)
1804 {
1805 	return (DDI_SUCCESS);
1806 }
1807 
1808 /*
1809  * Referenced in common/cpr_driver.c: Power off machine.
1810  * Don't know how to power off i86pc.
1811  */
1812 void
1813 arch_power_down()
1814 {}
1815 
1816 /*
1817  * Copy name to property_name, since name
1818  * is in the low address range below kernelbase.
1819  */
1820 static void
1821 copy_boot_str(const char *boot_str, char *kern_str, int len)
1822 {
1823 	int i = 0;
1824 
1825 	while (i < len - 1 && boot_str[i] != '\0') {
1826 		kern_str[i] = boot_str[i];
1827 		i++;
1828 	}
1829 
1830 	kern_str[i] = 0;	/* null terminate */
1831 	if (boot_str[i] != '\0')
1832 		cmn_err(CE_WARN,
1833 		    "boot property string is truncated to %s", kern_str);
1834 }
1835 
1836 static void
1837 get_boot_properties(void)
1838 {
1839 	extern char hw_provider[];
1840 	dev_info_t *devi;
1841 	char *name;
1842 	int length;
1843 	char property_name[50], property_val[50];
1844 	void *bop_staging_area;
1845 
1846 	bop_staging_area = kmem_zalloc(MMU_PAGESIZE, KM_NOSLEEP);
1847 
1848 	/*
1849 	 * Import "root" properties from the boot.
1850 	 *
1851 	 * We do this by invoking BOP_NEXTPROP until the list
1852 	 * is completely copied in.
1853 	 */
1854 
1855 	devi = ddi_root_node();
1856 	for (name = BOP_NEXTPROP(bootops, "");		/* get first */
1857 	    name;					/* NULL => DONE */
1858 	    name = BOP_NEXTPROP(bootops, name)) {	/* get next */
1859 
1860 		/* copy string to memory above kernelbase */
1861 		copy_boot_str(name, property_name, 50);
1862 
1863 		/*
1864 		 * Skip vga properties. They will be picked up later
1865 		 * by get_vga_properties.
1866 		 */
1867 		if (strcmp(property_name, "display-edif-block") == 0 ||
1868 		    strcmp(property_name, "display-edif-id") == 0) {
1869 			continue;
1870 		}
1871 
1872 		length = BOP_GETPROPLEN(bootops, property_name);
1873 		if (length == 0)
1874 			continue;
1875 		if (length > MMU_PAGESIZE) {
1876 			cmn_err(CE_NOTE,
1877 			    "boot property %s longer than 0x%x, ignored\n",
1878 			    property_name, MMU_PAGESIZE);
1879 			continue;
1880 		}
1881 		BOP_GETPROP(bootops, property_name, bop_staging_area);
1882 
1883 		/*
1884 		 * special properties:
1885 		 * si-machine, si-hw-provider
1886 		 *	goes to kernel data structures.
1887 		 * bios-boot-device and stdout
1888 		 *	goes to hardware property list so it may show up
1889 		 *	in the prtconf -vp output. This is needed by
1890 		 *	Install/Upgrade. Once we fix install upgrade,
1891 		 *	this can be taken out.
1892 		 */
1893 		if (strcmp(name, "si-machine") == 0) {
1894 			(void) strncpy(utsname.machine, bop_staging_area,
1895 			    SYS_NMLN);
1896 			utsname.machine[SYS_NMLN - 1] = (char)NULL;
1897 		} else if (strcmp(name, "si-hw-provider") == 0) {
1898 			(void) strncpy(hw_provider, bop_staging_area, SYS_NMLN);
1899 			hw_provider[SYS_NMLN - 1] = (char)NULL;
1900 		} else if (strcmp(name, "bios-boot-device") == 0) {
1901 			copy_boot_str(bop_staging_area, property_val, 50);
1902 			(void) ndi_prop_update_string(DDI_DEV_T_NONE, devi,
1903 			    property_name, property_val);
1904 		} else if (strcmp(name, "stdout") == 0) {
1905 			(void) ndi_prop_update_int(DDI_DEV_T_NONE, devi,
1906 			    property_name, *((int *)bop_staging_area));
1907 		} else {
1908 			/* Property type unknown, use old prop interface */
1909 			(void) e_ddi_prop_create(DDI_DEV_T_NONE, devi,
1910 			    DDI_PROP_CANSLEEP, property_name, bop_staging_area,
1911 			    length);
1912 		}
1913 	}
1914 
1915 	kmem_free(bop_staging_area, MMU_PAGESIZE);
1916 }
1917 
1918 static void
1919 get_vga_properties(void)
1920 {
1921 	dev_info_t *devi;
1922 	major_t major;
1923 	char *name;
1924 	int length;
1925 	char property_val[50];
1926 	void *bop_staging_area;
1927 
1928 	/*
1929 	 * XXXX Hack Allert!
1930 	 * There really needs to be a better way for identifying various
1931 	 * console framebuffers and their related issues.  Till then,
1932 	 * check for this one as a replacement to vgatext.
1933 	 */
1934 	major = ddi_name_to_major("ragexl");
1935 	if (major == (major_t)-1) {
1936 		major = ddi_name_to_major("vgatext");
1937 		if (major == (major_t)-1)
1938 			return;
1939 	}
1940 	devi = devnamesp[major].dn_head;
1941 	if (devi == NULL)
1942 		return;
1943 
1944 	bop_staging_area = kmem_zalloc(MMU_PAGESIZE, KM_SLEEP);
1945 
1946 	/*
1947 	 * Import "vga" properties from the boot.
1948 	 */
1949 	name = "display-edif-block";
1950 	length = BOP_GETPROPLEN(bootops, name);
1951 	if (length > 0 && length < MMU_PAGESIZE) {
1952 		BOP_GETPROP(bootops, name, bop_staging_area);
1953 		(void) ndi_prop_update_byte_array(DDI_DEV_T_NONE,
1954 		    devi, name, bop_staging_area, length);
1955 	}
1956 
1957 	/*
1958 	 * kdmconfig is also looking for display-type and
1959 	 * video-adapter-type. We default to color and svga.
1960 	 *
1961 	 * Could it be "monochrome", "vga"?
1962 	 * Nah, you've got to come to the 21st century...
1963 	 * And you can set monitor type manually in kdmconfig
1964 	 * if you are really an old junky.
1965 	 */
1966 	(void) ndi_prop_update_string(DDI_DEV_T_NONE,
1967 	    devi, "display-type", "color");
1968 	(void) ndi_prop_update_string(DDI_DEV_T_NONE,
1969 	    devi, "video-adapter-type", "svga");
1970 
1971 	name = "display-edif-id";
1972 	length = BOP_GETPROPLEN(bootops, name);
1973 	if (length > 0 && length < MMU_PAGESIZE) {
1974 		BOP_GETPROP(bootops, name, bop_staging_area);
1975 		copy_boot_str(bop_staging_area, property_val, length);
1976 		(void) ndi_prop_update_string(DDI_DEV_T_NONE,
1977 		    devi, name, property_val);
1978 	}
1979 
1980 	kmem_free(bop_staging_area, MMU_PAGESIZE);
1981 }
1982 
1983 
1984 /*
1985  * This is temporary, but absolutely necessary.  If we are being
1986  * booted with a device tree created by the DevConf project's bootconf
1987  * program, then we have device information nodes that reflect
1988  * reality.  At this point in time in the Solaris release schedule, the
1989  * kernel drivers aren't prepared for reality.  They still depend on their
1990  * own ad-hoc interpretations of the properties created when their .conf
1991  * files were interpreted. These drivers use an "ignore-hardware-nodes"
1992  * property to prevent them from using the nodes passed up from the bootconf
1993  * device tree.
1994  *
1995  * Trying to assemble root file system drivers as we are booting from
1996  * devconf will fail if the kernel driver is basing its name_addr's on the
1997  * psuedo-node device info while the bootpath passed up from bootconf is using
1998  * reality-based name_addrs.  We help the boot along in this case by
1999  * looking at the pre-bootconf bootpath and determining if we would have
2000  * successfully matched if that had been the bootpath we had chosen.
2001  *
2002  * Note that we only even perform this extra check if we've booted
2003  * using bootconf's 1275 compliant bootpath, this is the boot device, and
2004  * we're trying to match the name_addr specified in the 1275 bootpath.
2005  */
2006 
2007 #define	MAXCOMPONENTLEN	32
2008 
2009 int
2010 x86_old_bootpath_name_addr_match(dev_info_t *cdip, char *caddr, char *naddr)
2011 {
2012 	/*
2013 	 *  There are multiple criteria to be met before we can even
2014 	 *  consider allowing a name_addr match here.
2015 	 *
2016 	 *  1) We must have been booted such that the bootconf program
2017 	 *	created device tree nodes and properties.  This can be
2018 	 *	determined by examining the 'bootpath' property.  This
2019 	 *	property will be a non-null string iff bootconf was
2020 	 *	involved in the boot.
2021 	 *
2022 	 *  2) The module that we want to match must be the boot device.
2023 	 *
2024 	 *  3) The instance of the module we are thinking of letting be
2025 	 *	our match must be ignoring hardware nodes.
2026 	 *
2027 	 *  4) The name_addr we want to match must be the name_addr
2028 	 *	specified in the 1275 bootpath.
2029 	 */
2030 	static char bootdev_module[MAXCOMPONENTLEN];
2031 	static char bootdev_oldmod[MAXCOMPONENTLEN];
2032 	static char bootdev_newaddr[MAXCOMPONENTLEN];
2033 	static char bootdev_oldaddr[MAXCOMPONENTLEN];
2034 	static int  quickexit;
2035 
2036 	char *daddr;
2037 	int dlen;
2038 
2039 	char	*lkupname;
2040 	int	rv = DDI_FAILURE;
2041 
2042 	if ((ddi_getlongprop(DDI_DEV_T_ANY, cdip, DDI_PROP_DONTPASS,
2043 	    "devconf-addr", (caddr_t)&daddr, &dlen) == DDI_PROP_SUCCESS) &&
2044 	    (ddi_getprop(DDI_DEV_T_ANY, cdip, DDI_PROP_DONTPASS,
2045 	    "ignore-hardware-nodes", -1) != -1)) {
2046 		if (strcmp(daddr, caddr) == 0) {
2047 			return (DDI_SUCCESS);
2048 		}
2049 	}
2050 
2051 	if (quickexit)
2052 		return (rv);
2053 
2054 	if (bootdev_module[0] == '\0') {
2055 		char *addrp, *eoaddrp;
2056 		char *busp, *modp, *atp;
2057 		char *bp1275, *bp;
2058 		int  bp1275len, bplen;
2059 
2060 		bp1275 = bp = addrp = eoaddrp = busp = modp = atp = NULL;
2061 
2062 		if (ddi_getlongprop(DDI_DEV_T_ANY,
2063 		    ddi_root_node(), 0, "bootpath",
2064 		    (caddr_t)&bp1275, &bp1275len) != DDI_PROP_SUCCESS ||
2065 		    bp1275len <= 1) {
2066 			/*
2067 			 * We didn't boot from bootconf so we never need to
2068 			 * do any special matches.
2069 			 */
2070 			quickexit = 1;
2071 			if (bp1275)
2072 				kmem_free(bp1275, bp1275len);
2073 			return (rv);
2074 		}
2075 
2076 		if (ddi_getlongprop(DDI_DEV_T_ANY,
2077 		    ddi_root_node(), 0, "boot-path",
2078 		    (caddr_t)&bp, &bplen) != DDI_PROP_SUCCESS || bplen <= 1) {
2079 			/*
2080 			 * No fallback position for matching. This is
2081 			 * certainly unexpected, but we'll handle it
2082 			 * just in case.
2083 			 */
2084 			quickexit = 1;
2085 			kmem_free(bp1275, bp1275len);
2086 			if (bp)
2087 				kmem_free(bp, bplen);
2088 			return (rv);
2089 		}
2090 
2091 		/*
2092 		 *  Determine boot device module and 1275 name_addr
2093 		 *
2094 		 *  bootpath assumed to be of the form /bus/module@name_addr
2095 		 */
2096 		if (busp = strchr(bp1275, '/')) {
2097 			if (modp = strchr(busp + 1, '/')) {
2098 				if (atp = strchr(modp + 1, '@')) {
2099 					*atp = '\0';
2100 					addrp = atp + 1;
2101 					if (eoaddrp = strchr(addrp, '/'))
2102 						*eoaddrp = '\0';
2103 				}
2104 			}
2105 		}
2106 
2107 		if (modp && addrp) {
2108 			(void) strncpy(bootdev_module, modp + 1,
2109 			    MAXCOMPONENTLEN);
2110 			bootdev_module[MAXCOMPONENTLEN - 1] = '\0';
2111 
2112 			(void) strncpy(bootdev_newaddr, addrp, MAXCOMPONENTLEN);
2113 			bootdev_newaddr[MAXCOMPONENTLEN - 1] = '\0';
2114 		} else {
2115 			quickexit = 1;
2116 			kmem_free(bp1275, bp1275len);
2117 			kmem_free(bp, bplen);
2118 			return (rv);
2119 		}
2120 
2121 		/*
2122 		 *  Determine fallback name_addr
2123 		 *
2124 		 *  10/3/96 - Also save fallback module name because it
2125 		 *  might actually be different than the current module
2126 		 *  name.  E.G., ISA pnp drivers have new names.
2127 		 *
2128 		 *  bootpath assumed to be of the form /bus/module@name_addr
2129 		 */
2130 		addrp = NULL;
2131 		if (busp = strchr(bp, '/')) {
2132 			if (modp = strchr(busp + 1, '/')) {
2133 				if (atp = strchr(modp + 1, '@')) {
2134 					*atp = '\0';
2135 					addrp = atp + 1;
2136 					if (eoaddrp = strchr(addrp, '/'))
2137 						*eoaddrp = '\0';
2138 				}
2139 			}
2140 		}
2141 
2142 		if (modp && addrp) {
2143 			(void) strncpy(bootdev_oldmod, modp + 1,
2144 			    MAXCOMPONENTLEN);
2145 			bootdev_module[MAXCOMPONENTLEN - 1] = '\0';
2146 
2147 			(void) strncpy(bootdev_oldaddr, addrp, MAXCOMPONENTLEN);
2148 			bootdev_oldaddr[MAXCOMPONENTLEN - 1] = '\0';
2149 		}
2150 
2151 		/* Free up the bootpath storage now that we're done with it. */
2152 		kmem_free(bp1275, bp1275len);
2153 		kmem_free(bp, bplen);
2154 
2155 		if (bootdev_oldaddr[0] == '\0') {
2156 			quickexit = 1;
2157 			return (rv);
2158 		}
2159 	}
2160 
2161 	if (((lkupname = ddi_get_name(cdip)) != NULL) &&
2162 	    (strcmp(bootdev_module, lkupname) == 0 ||
2163 	    strcmp(bootdev_oldmod, lkupname) == 0) &&
2164 	    ((ddi_getprop(DDI_DEV_T_ANY, cdip, DDI_PROP_DONTPASS,
2165 	    "ignore-hardware-nodes", -1) != -1) ||
2166 	    ignore_hardware_nodes) &&
2167 	    strcmp(bootdev_newaddr, caddr) == 0 &&
2168 	    strcmp(bootdev_oldaddr, naddr) == 0) {
2169 		rv = DDI_SUCCESS;
2170 	}
2171 
2172 	return (rv);
2173 }
2174 
2175 /*
2176  * Perform a copy from a memory mapped device (whose devinfo pointer is devi)
2177  * separately mapped at devaddr in the kernel to a kernel buffer at kaddr.
2178  */
2179 /*ARGSUSED*/
2180 int
2181 e_ddi_copyfromdev(dev_info_t *devi,
2182     off_t off, const void *devaddr, void *kaddr, size_t len)
2183 {
2184 	bcopy(devaddr, kaddr, len);
2185 	return (0);
2186 }
2187 
2188 /*
2189  * Perform a copy to a memory mapped device (whose devinfo pointer is devi)
2190  * separately mapped at devaddr in the kernel from a kernel buffer at kaddr.
2191  */
2192 /*ARGSUSED*/
2193 int
2194 e_ddi_copytodev(dev_info_t *devi,
2195     off_t off, const void *kaddr, void *devaddr, size_t len)
2196 {
2197 	bcopy(kaddr, devaddr, len);
2198 	return (0);
2199 }
2200 
2201 
2202 static int
2203 poke_mem(peekpoke_ctlops_t *in_args)
2204 {
2205 	int err = DDI_SUCCESS;
2206 	on_trap_data_t otd;
2207 
2208 	/* Set up protected environment. */
2209 	if (!on_trap(&otd, OT_DATA_ACCESS)) {
2210 		switch (in_args->size) {
2211 		case sizeof (uint8_t):
2212 			*(uint8_t *)(in_args->dev_addr) =
2213 			    *(uint8_t *)in_args->host_addr;
2214 			break;
2215 
2216 		case sizeof (uint16_t):
2217 			*(uint16_t *)(in_args->dev_addr) =
2218 			    *(uint16_t *)in_args->host_addr;
2219 			break;
2220 
2221 		case sizeof (uint32_t):
2222 			*(uint32_t *)(in_args->dev_addr) =
2223 			    *(uint32_t *)in_args->host_addr;
2224 			break;
2225 
2226 		case sizeof (uint64_t):
2227 			*(uint64_t *)(in_args->dev_addr) =
2228 			    *(uint64_t *)in_args->host_addr;
2229 			break;
2230 
2231 		default:
2232 			err = DDI_FAILURE;
2233 			break;
2234 		}
2235 	} else
2236 		err = DDI_FAILURE;
2237 
2238 	/* Take down protected environment. */
2239 	no_trap();
2240 
2241 	return (err);
2242 }
2243 
2244 
2245 static int
2246 peek_mem(peekpoke_ctlops_t *in_args)
2247 {
2248 	int err = DDI_SUCCESS;
2249 	on_trap_data_t otd;
2250 
2251 	if (!on_trap(&otd, OT_DATA_ACCESS)) {
2252 		switch (in_args->size) {
2253 		case sizeof (uint8_t):
2254 			*(uint8_t *)in_args->host_addr =
2255 			    *(uint8_t *)in_args->dev_addr;
2256 			break;
2257 
2258 		case sizeof (uint16_t):
2259 			*(uint16_t *)in_args->host_addr =
2260 			    *(uint16_t *)in_args->dev_addr;
2261 			break;
2262 
2263 		case sizeof (uint32_t):
2264 			*(uint32_t *)in_args->host_addr =
2265 			    *(uint32_t *)in_args->dev_addr;
2266 			break;
2267 
2268 		case sizeof (uint64_t):
2269 			*(uint64_t *)in_args->host_addr =
2270 			    *(uint64_t *)in_args->dev_addr;
2271 			break;
2272 
2273 		default:
2274 			err = DDI_FAILURE;
2275 			break;
2276 		}
2277 	} else
2278 		err = DDI_FAILURE;
2279 
2280 	no_trap();
2281 	return (err);
2282 }
2283 
2284 
2285 /*
2286  * This is called only to process peek/poke when the DIP is NULL.
2287  * Assume that this is for memory, as nexi take care of device safe accesses.
2288  */
2289 int
2290 peekpoke_mem(ddi_ctl_enum_t cmd, peekpoke_ctlops_t *in_args)
2291 {
2292 	return (cmd == DDI_CTLOPS_PEEK ? peek_mem(in_args) : poke_mem(in_args));
2293 }
2294 
2295 /*
2296  * we've just done a cautious put/get. Check if it was successful by
2297  * calling pci_ereport_post() on all puts and for any gets that return -1
2298  */
2299 static int
2300 pci_peekpoke_check_fma(dev_info_t *dip, void *arg, ddi_ctl_enum_t ctlop,
2301     void (*scan)(dev_info_t *, ddi_fm_error_t *))
2302 {
2303 	int	rval = DDI_SUCCESS;
2304 	peekpoke_ctlops_t *in_args = (peekpoke_ctlops_t *)arg;
2305 	ddi_fm_error_t de;
2306 	ddi_acc_impl_t *hp = (ddi_acc_impl_t *)in_args->handle;
2307 	ddi_acc_hdl_t *hdlp = (ddi_acc_hdl_t *)in_args->handle;
2308 	int check_err = 0;
2309 	int repcount = in_args->repcount;
2310 
2311 	if (ctlop == DDI_CTLOPS_POKE &&
2312 	    hdlp->ah_acc.devacc_attr_access != DDI_CAUTIOUS_ACC)
2313 		return (DDI_SUCCESS);
2314 
2315 	if (ctlop == DDI_CTLOPS_PEEK &&
2316 	    hdlp->ah_acc.devacc_attr_access != DDI_CAUTIOUS_ACC) {
2317 		for (; repcount; repcount--) {
2318 			switch (in_args->size) {
2319 			case sizeof (uint8_t):
2320 				if (*(uint8_t *)in_args->host_addr == 0xff)
2321 					check_err = 1;
2322 				break;
2323 			case sizeof (uint16_t):
2324 				if (*(uint16_t *)in_args->host_addr == 0xffff)
2325 					check_err = 1;
2326 				break;
2327 			case sizeof (uint32_t):
2328 				if (*(uint32_t *)in_args->host_addr ==
2329 				    0xffffffff)
2330 					check_err = 1;
2331 				break;
2332 			case sizeof (uint64_t):
2333 				if (*(uint64_t *)in_args->host_addr ==
2334 				    0xffffffffffffffff)
2335 					check_err = 1;
2336 				break;
2337 			}
2338 		}
2339 		if (check_err == 0)
2340 			return (DDI_SUCCESS);
2341 	}
2342 	/*
2343 	 * for a cautious put or get or a non-cautious get that returned -1 call
2344 	 * io framework to see if there really was an error
2345 	 */
2346 	bzero(&de, sizeof (ddi_fm_error_t));
2347 	de.fme_version = DDI_FME_VERSION;
2348 	de.fme_ena = fm_ena_generate(0, FM_ENA_FMT1);
2349 	if (hdlp->ah_acc.devacc_attr_access == DDI_CAUTIOUS_ACC) {
2350 		de.fme_flag = DDI_FM_ERR_EXPECTED;
2351 		de.fme_acc_handle = in_args->handle;
2352 	} else if (hdlp->ah_acc.devacc_attr_access == DDI_DEFAULT_ACC) {
2353 		/*
2354 		 * We only get here with DDI_DEFAULT_ACC for config space gets.
2355 		 * Non-hardened drivers may be probing the hardware and
2356 		 * expecting -1 returned. So need to treat errors on
2357 		 * DDI_DEFAULT_ACC as DDI_FM_ERR_EXPECTED.
2358 		 */
2359 		de.fme_flag = DDI_FM_ERR_EXPECTED;
2360 		de.fme_acc_handle = in_args->handle;
2361 	} else {
2362 		/*
2363 		 * Hardened driver doing protected accesses shouldn't
2364 		 * get errors unless there's a hardware problem. Treat
2365 		 * as nonfatal if there's an error, but set UNEXPECTED
2366 		 * so we raise ereports on any errors and potentially
2367 		 * fault the device
2368 		 */
2369 		de.fme_flag = DDI_FM_ERR_UNEXPECTED;
2370 	}
2371 	(void) scan(dip, &de);
2372 	if (hdlp->ah_acc.devacc_attr_access != DDI_DEFAULT_ACC &&
2373 	    de.fme_status != DDI_FM_OK) {
2374 		ndi_err_t *errp = (ndi_err_t *)hp->ahi_err;
2375 		rval = DDI_FAILURE;
2376 		errp->err_ena = de.fme_ena;
2377 		errp->err_expected = de.fme_flag;
2378 		errp->err_status = DDI_FM_NONFATAL;
2379 	}
2380 	return (rval);
2381 }
2382 
2383 /*
2384  * pci_peekpoke_check_nofma() is for when an error occurs on a register access
2385  * during pci_ereport_post(). We can't call pci_ereport_post() again or we'd
2386  * recurse, so assume all puts are OK and gets have failed if they return -1
2387  */
2388 static int
2389 pci_peekpoke_check_nofma(void *arg, ddi_ctl_enum_t ctlop)
2390 {
2391 	int rval = DDI_SUCCESS;
2392 	peekpoke_ctlops_t *in_args = (peekpoke_ctlops_t *)arg;
2393 	ddi_acc_impl_t *hp = (ddi_acc_impl_t *)in_args->handle;
2394 	ddi_acc_hdl_t *hdlp = (ddi_acc_hdl_t *)in_args->handle;
2395 	int repcount = in_args->repcount;
2396 
2397 	if (ctlop == DDI_CTLOPS_POKE)
2398 		return (rval);
2399 
2400 	for (; repcount; repcount--) {
2401 		switch (in_args->size) {
2402 		case sizeof (uint8_t):
2403 			if (*(uint8_t *)in_args->host_addr == 0xff)
2404 				rval = DDI_FAILURE;
2405 			break;
2406 		case sizeof (uint16_t):
2407 			if (*(uint16_t *)in_args->host_addr == 0xffff)
2408 				rval = DDI_FAILURE;
2409 			break;
2410 		case sizeof (uint32_t):
2411 			if (*(uint32_t *)in_args->host_addr == 0xffffffff)
2412 				rval = DDI_FAILURE;
2413 			break;
2414 		case sizeof (uint64_t):
2415 			if (*(uint64_t *)in_args->host_addr ==
2416 			    0xffffffffffffffff)
2417 				rval = DDI_FAILURE;
2418 			break;
2419 		}
2420 	}
2421 	if (hdlp->ah_acc.devacc_attr_access != DDI_DEFAULT_ACC &&
2422 	    rval == DDI_FAILURE) {
2423 		ndi_err_t *errp = (ndi_err_t *)hp->ahi_err;
2424 		errp->err_ena = fm_ena_generate(0, FM_ENA_FMT1);
2425 		errp->err_expected = DDI_FM_ERR_UNEXPECTED;
2426 		errp->err_status = DDI_FM_NONFATAL;
2427 	}
2428 	return (rval);
2429 }
2430 
2431 int
2432 pci_peekpoke_check(dev_info_t *dip, dev_info_t *rdip,
2433 	ddi_ctl_enum_t ctlop, void *arg, void *result,
2434 	int (*handler)(dev_info_t *, dev_info_t *, ddi_ctl_enum_t, void *,
2435 	void *), kmutex_t *err_mutexp, kmutex_t *peek_poke_mutexp,
2436 	void (*scan)(dev_info_t *, ddi_fm_error_t *))
2437 {
2438 	int rval;
2439 	peekpoke_ctlops_t *in_args = (peekpoke_ctlops_t *)arg;
2440 	ddi_acc_impl_t *hp = (ddi_acc_impl_t *)in_args->handle;
2441 
2442 	/*
2443 	 * this function only supports cautious accesses, not peeks/pokes
2444 	 * which don't have a handle
2445 	 */
2446 	if (hp == NULL)
2447 		return (DDI_FAILURE);
2448 
2449 	if (hp->ahi_acc_attr & DDI_ACCATTR_CONFIG_SPACE) {
2450 		if (!mutex_tryenter(err_mutexp)) {
2451 			/*
2452 			 * As this may be a recursive call from within
2453 			 * pci_ereport_post() we can't wait for the mutexes.
2454 			 * Fortunately we know someone is already calling
2455 			 * pci_ereport_post() which will handle the error bits
2456 			 * for us, and as this is a config space access we can
2457 			 * just do the access and check return value for -1
2458 			 * using pci_peekpoke_check_nofma().
2459 			 */
2460 			rval = handler(dip, rdip, ctlop, arg, result);
2461 			if (rval == DDI_SUCCESS)
2462 				rval = pci_peekpoke_check_nofma(arg, ctlop);
2463 			return (rval);
2464 		}
2465 		/*
2466 		 * This can't be a recursive call. Drop the err_mutex and get
2467 		 * both mutexes in the right order. If an error hasn't already
2468 		 * been detected by the ontrap code, use pci_peekpoke_check_fma
2469 		 * which will call pci_ereport_post() to check error status.
2470 		 */
2471 		mutex_exit(err_mutexp);
2472 	}
2473 	mutex_enter(peek_poke_mutexp);
2474 	rval = handler(dip, rdip, ctlop, arg, result);
2475 	if (rval == DDI_SUCCESS) {
2476 		mutex_enter(err_mutexp);
2477 		rval = pci_peekpoke_check_fma(dip, arg, ctlop, scan);
2478 		mutex_exit(err_mutexp);
2479 	}
2480 	mutex_exit(peek_poke_mutexp);
2481 	return (rval);
2482 }
2483 
2484 void
2485 impl_setup_ddi(void)
2486 {
2487 #if !defined(__xpv)
2488 	extern void startup_bios_disk(void);
2489 	extern int post_fastreboot;
2490 #endif
2491 	dev_info_t *xdip, *isa_dip;
2492 	rd_existing_t rd_mem_prop;
2493 	int err;
2494 
2495 	ndi_devi_alloc_sleep(ddi_root_node(), "ramdisk",
2496 	    (pnode_t)DEVI_SID_NODEID, &xdip);
2497 
2498 	(void) BOP_GETPROP(bootops,
2499 	    "ramdisk_start", (void *)&ramdisk_start);
2500 	(void) BOP_GETPROP(bootops,
2501 	    "ramdisk_end", (void *)&ramdisk_end);
2502 
2503 #ifdef __xpv
2504 	ramdisk_start -= ONE_GIG;
2505 	ramdisk_end -= ONE_GIG;
2506 #endif
2507 	rd_mem_prop.phys = ramdisk_start;
2508 	rd_mem_prop.size = ramdisk_end - ramdisk_start + 1;
2509 
2510 	(void) ndi_prop_update_byte_array(DDI_DEV_T_NONE, xdip,
2511 	    RD_EXISTING_PROP_NAME, (uchar_t *)&rd_mem_prop,
2512 	    sizeof (rd_mem_prop));
2513 	err = ndi_devi_bind_driver(xdip, 0);
2514 	ASSERT(err == 0);
2515 
2516 	/* isa node */
2517 	if (pseudo_isa) {
2518 		ndi_devi_alloc_sleep(ddi_root_node(), "isa",
2519 		    (pnode_t)DEVI_SID_NODEID, &isa_dip);
2520 		(void) ndi_prop_update_string(DDI_DEV_T_NONE, isa_dip,
2521 		    "device_type", "isa");
2522 		(void) ndi_prop_update_string(DDI_DEV_T_NONE, isa_dip,
2523 		    "bus-type", "isa");
2524 		(void) ndi_devi_bind_driver(isa_dip, 0);
2525 	}
2526 
2527 	/*
2528 	 * Read in the properties from the boot.
2529 	 */
2530 	get_boot_properties();
2531 
2532 	/* not framebuffer should be enumerated, if present */
2533 	get_vga_properties();
2534 
2535 	/*
2536 	 * Check for administratively disabled drivers.
2537 	 */
2538 	check_driver_disable();
2539 
2540 #if !defined(__xpv)
2541 	if (!post_fastreboot)
2542 		startup_bios_disk();
2543 #endif
2544 	/* do bus dependent probes. */
2545 	impl_bus_initialprobe();
2546 }
2547 
2548 dev_t
2549 getrootdev(void)
2550 {
2551 	/*
2552 	 * Precedence given to rootdev if set in /etc/system
2553 	 */
2554 	if (root_is_svm == B_TRUE) {
2555 		return (ddi_pathname_to_dev_t(svm_bootpath));
2556 	}
2557 
2558 	/*
2559 	 * Usually rootfs.bo_name is initialized by the
2560 	 * the bootpath property from bootenv.rc, but
2561 	 * defaults to "/ramdisk:a" otherwise.
2562 	 */
2563 	return (ddi_pathname_to_dev_t(rootfs.bo_name));
2564 }
2565 
2566 static struct bus_probe {
2567 	struct bus_probe *next;
2568 	void (*probe)(int);
2569 } *bus_probes;
2570 
2571 void
2572 impl_bus_add_probe(void (*func)(int))
2573 {
2574 	struct bus_probe *probe;
2575 	struct bus_probe *lastprobe = NULL;
2576 
2577 	probe = kmem_alloc(sizeof (*probe), KM_SLEEP);
2578 	probe->probe = func;
2579 	probe->next = NULL;
2580 
2581 	if (!bus_probes) {
2582 		bus_probes = probe;
2583 		return;
2584 	}
2585 
2586 	lastprobe = bus_probes;
2587 	while (lastprobe->next)
2588 		lastprobe = lastprobe->next;
2589 	lastprobe->next = probe;
2590 }
2591 
2592 /*ARGSUSED*/
2593 void
2594 impl_bus_delete_probe(void (*func)(int))
2595 {
2596 	struct bus_probe *prev = NULL;
2597 	struct bus_probe *probe = bus_probes;
2598 
2599 	while (probe) {
2600 		if (probe->probe == func)
2601 			break;
2602 		prev = probe;
2603 		probe = probe->next;
2604 	}
2605 
2606 	if (probe == NULL)
2607 		return;
2608 
2609 	if (prev)
2610 		prev->next = probe->next;
2611 	else
2612 		bus_probes = probe->next;
2613 
2614 	kmem_free(probe, sizeof (struct bus_probe));
2615 }
2616 
2617 /*
2618  * impl_bus_initialprobe
2619  *	Modload the prom simulator, then let it probe to verify existence
2620  *	and type of PCI support.
2621  */
2622 static void
2623 impl_bus_initialprobe(void)
2624 {
2625 	struct bus_probe *probe;
2626 
2627 	/* load modules to install bus probes */
2628 #if defined(__xpv)
2629 	if (DOMAIN_IS_INITDOMAIN(xen_info)) {
2630 		if (modload("misc", "pci_autoconfig") < 0) {
2631 			panic("failed to load misc/pci_autoconfig");
2632 		}
2633 
2634 		if (modload("drv", "isa") < 0)
2635 			panic("failed to load drv/isa");
2636 	}
2637 
2638 	(void) modload("misc", "xpv_autoconfig");
2639 #else
2640 	if (modload("misc", "pci_autoconfig") < 0) {
2641 		panic("failed to load misc/pci_autoconfig");
2642 	}
2643 
2644 	(void) modload("misc", "acpidev");
2645 
2646 	if (modload("drv", "isa") < 0)
2647 		panic("failed to load drv/isa");
2648 #endif
2649 
2650 	probe = bus_probes;
2651 	while (probe) {
2652 		/* run the probe functions */
2653 		(*probe->probe)(0);
2654 		probe = probe->next;
2655 	}
2656 }
2657 
2658 /*
2659  * impl_bus_reprobe
2660  *	Reprogram devices not set up by firmware.
2661  */
2662 static void
2663 impl_bus_reprobe(void)
2664 {
2665 	struct bus_probe *probe;
2666 
2667 	probe = bus_probes;
2668 	while (probe) {
2669 		/* run the probe function */
2670 		(*probe->probe)(1);
2671 		probe = probe->next;
2672 	}
2673 }
2674 
2675 
2676 /*
2677  * The following functions ready a cautious request to go up to the nexus
2678  * driver.  It is up to the nexus driver to decide how to process the request.
2679  * It may choose to call i_ddi_do_caut_get/put in this file, or do it
2680  * differently.
2681  */
2682 
2683 static void
2684 i_ddi_caut_getput_ctlops(ddi_acc_impl_t *hp, uint64_t host_addr,
2685     uint64_t dev_addr, size_t size, size_t repcount, uint_t flags,
2686     ddi_ctl_enum_t cmd)
2687 {
2688 	peekpoke_ctlops_t	cautacc_ctlops_arg;
2689 
2690 	cautacc_ctlops_arg.size = size;
2691 	cautacc_ctlops_arg.dev_addr = dev_addr;
2692 	cautacc_ctlops_arg.host_addr = host_addr;
2693 	cautacc_ctlops_arg.handle = (ddi_acc_handle_t)hp;
2694 	cautacc_ctlops_arg.repcount = repcount;
2695 	cautacc_ctlops_arg.flags = flags;
2696 
2697 	(void) ddi_ctlops(hp->ahi_common.ah_dip, hp->ahi_common.ah_dip, cmd,
2698 	    &cautacc_ctlops_arg, NULL);
2699 }
2700 
2701 uint8_t
2702 i_ddi_caut_get8(ddi_acc_impl_t *hp, uint8_t *addr)
2703 {
2704 	uint8_t value;
2705 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)&value, (uintptr_t)addr,
2706 	    sizeof (uint8_t), 1, 0, DDI_CTLOPS_PEEK);
2707 
2708 	return (value);
2709 }
2710 
2711 uint16_t
2712 i_ddi_caut_get16(ddi_acc_impl_t *hp, uint16_t *addr)
2713 {
2714 	uint16_t value;
2715 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)&value, (uintptr_t)addr,
2716 	    sizeof (uint16_t), 1, 0, DDI_CTLOPS_PEEK);
2717 
2718 	return (value);
2719 }
2720 
2721 uint32_t
2722 i_ddi_caut_get32(ddi_acc_impl_t *hp, uint32_t *addr)
2723 {
2724 	uint32_t value;
2725 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)&value, (uintptr_t)addr,
2726 	    sizeof (uint32_t), 1, 0, DDI_CTLOPS_PEEK);
2727 
2728 	return (value);
2729 }
2730 
2731 uint64_t
2732 i_ddi_caut_get64(ddi_acc_impl_t *hp, uint64_t *addr)
2733 {
2734 	uint64_t value;
2735 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)&value, (uintptr_t)addr,
2736 	    sizeof (uint64_t), 1, 0, DDI_CTLOPS_PEEK);
2737 
2738 	return (value);
2739 }
2740 
2741 void
2742 i_ddi_caut_put8(ddi_acc_impl_t *hp, uint8_t *addr, uint8_t value)
2743 {
2744 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)&value, (uintptr_t)addr,
2745 	    sizeof (uint8_t), 1, 0, DDI_CTLOPS_POKE);
2746 }
2747 
2748 void
2749 i_ddi_caut_put16(ddi_acc_impl_t *hp, uint16_t *addr, uint16_t value)
2750 {
2751 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)&value, (uintptr_t)addr,
2752 	    sizeof (uint16_t), 1, 0, DDI_CTLOPS_POKE);
2753 }
2754 
2755 void
2756 i_ddi_caut_put32(ddi_acc_impl_t *hp, uint32_t *addr, uint32_t value)
2757 {
2758 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)&value, (uintptr_t)addr,
2759 	    sizeof (uint32_t), 1, 0, DDI_CTLOPS_POKE);
2760 }
2761 
2762 void
2763 i_ddi_caut_put64(ddi_acc_impl_t *hp, uint64_t *addr, uint64_t value)
2764 {
2765 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)&value, (uintptr_t)addr,
2766 	    sizeof (uint64_t), 1, 0, DDI_CTLOPS_POKE);
2767 }
2768 
2769 void
2770 i_ddi_caut_rep_get8(ddi_acc_impl_t *hp, uint8_t *host_addr, uint8_t *dev_addr,
2771 	size_t repcount, uint_t flags)
2772 {
2773 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)host_addr, (uintptr_t)dev_addr,
2774 	    sizeof (uint8_t), repcount, flags, DDI_CTLOPS_PEEK);
2775 }
2776 
2777 void
2778 i_ddi_caut_rep_get16(ddi_acc_impl_t *hp, uint16_t *host_addr,
2779     uint16_t *dev_addr, size_t repcount, uint_t flags)
2780 {
2781 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)host_addr, (uintptr_t)dev_addr,
2782 	    sizeof (uint16_t), repcount, flags, DDI_CTLOPS_PEEK);
2783 }
2784 
2785 void
2786 i_ddi_caut_rep_get32(ddi_acc_impl_t *hp, uint32_t *host_addr,
2787     uint32_t *dev_addr, size_t repcount, uint_t flags)
2788 {
2789 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)host_addr, (uintptr_t)dev_addr,
2790 	    sizeof (uint32_t), repcount, flags, DDI_CTLOPS_PEEK);
2791 }
2792 
2793 void
2794 i_ddi_caut_rep_get64(ddi_acc_impl_t *hp, uint64_t *host_addr,
2795     uint64_t *dev_addr, size_t repcount, uint_t flags)
2796 {
2797 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)host_addr, (uintptr_t)dev_addr,
2798 	    sizeof (uint64_t), repcount, flags, DDI_CTLOPS_PEEK);
2799 }
2800 
2801 void
2802 i_ddi_caut_rep_put8(ddi_acc_impl_t *hp, uint8_t *host_addr, uint8_t *dev_addr,
2803 	size_t repcount, uint_t flags)
2804 {
2805 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)host_addr, (uintptr_t)dev_addr,
2806 	    sizeof (uint8_t), repcount, flags, DDI_CTLOPS_POKE);
2807 }
2808 
2809 void
2810 i_ddi_caut_rep_put16(ddi_acc_impl_t *hp, uint16_t *host_addr,
2811     uint16_t *dev_addr, size_t repcount, uint_t flags)
2812 {
2813 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)host_addr, (uintptr_t)dev_addr,
2814 	    sizeof (uint16_t), repcount, flags, DDI_CTLOPS_POKE);
2815 }
2816 
2817 void
2818 i_ddi_caut_rep_put32(ddi_acc_impl_t *hp, uint32_t *host_addr,
2819     uint32_t *dev_addr, size_t repcount, uint_t flags)
2820 {
2821 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)host_addr, (uintptr_t)dev_addr,
2822 	    sizeof (uint32_t), repcount, flags, DDI_CTLOPS_POKE);
2823 }
2824 
2825 void
2826 i_ddi_caut_rep_put64(ddi_acc_impl_t *hp, uint64_t *host_addr,
2827     uint64_t *dev_addr, size_t repcount, uint_t flags)
2828 {
2829 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)host_addr, (uintptr_t)dev_addr,
2830 	    sizeof (uint64_t), repcount, flags, DDI_CTLOPS_POKE);
2831 }
2832 
2833 boolean_t
2834 i_ddi_copybuf_required(ddi_dma_attr_t *attrp)
2835 {
2836 	uint64_t hi_pa;
2837 
2838 	hi_pa = ((uint64_t)physmax + 1ull) << PAGESHIFT;
2839 	if (attrp->dma_attr_addr_hi < hi_pa) {
2840 		return (B_TRUE);
2841 	}
2842 
2843 	return (B_FALSE);
2844 }
2845 
2846 size_t
2847 i_ddi_copybuf_size()
2848 {
2849 	return (dma_max_copybuf_size);
2850 }
2851 
2852 /*
2853  * i_ddi_dma_max()
2854  *    returns the maximum DMA size which can be performed in a single DMA
2855  *    window taking into account the devices DMA contraints (attrp), the
2856  *    maximum copy buffer size (if applicable), and the worse case buffer
2857  *    fragmentation.
2858  */
2859 /*ARGSUSED*/
2860 uint32_t
2861 i_ddi_dma_max(dev_info_t *dip, ddi_dma_attr_t *attrp)
2862 {
2863 	uint64_t maxxfer;
2864 
2865 
2866 	/*
2867 	 * take the min of maxxfer and the the worse case fragementation
2868 	 * (e.g. every cookie <= 1 page)
2869 	 */
2870 	maxxfer = MIN(attrp->dma_attr_maxxfer,
2871 	    ((uint64_t)(attrp->dma_attr_sgllen - 1) << PAGESHIFT));
2872 
2873 	/*
2874 	 * If the DMA engine can't reach all off memory, we also need to take
2875 	 * the max size of the copybuf into consideration.
2876 	 */
2877 	if (i_ddi_copybuf_required(attrp)) {
2878 		maxxfer = MIN(i_ddi_copybuf_size(), maxxfer);
2879 	}
2880 
2881 	/*
2882 	 * we only return a 32-bit value. Make sure it's not -1. Round to a
2883 	 * page so it won't be mistaken for an error value during debug.
2884 	 */
2885 	if (maxxfer >= 0xFFFFFFFF) {
2886 		maxxfer = 0xFFFFF000;
2887 	}
2888 
2889 	/*
2890 	 * make sure the value we return is a whole multiple of the
2891 	 * granlarity.
2892 	 */
2893 	if (attrp->dma_attr_granular > 1) {
2894 		maxxfer = maxxfer - (maxxfer % attrp->dma_attr_granular);
2895 	}
2896 
2897 	return ((uint32_t)maxxfer);
2898 }
2899 
2900 /*ARGSUSED*/
2901 void
2902 translate_devid(dev_info_t *dip)
2903 {
2904 }
2905 
2906 pfn_t
2907 i_ddi_paddr_to_pfn(paddr_t paddr)
2908 {
2909 	pfn_t pfn;
2910 
2911 #ifdef __xpv
2912 	if (DOMAIN_IS_INITDOMAIN(xen_info)) {
2913 		pfn = xen_assign_pfn(mmu_btop(paddr));
2914 	} else {
2915 		pfn = mmu_btop(paddr);
2916 	}
2917 #else
2918 	pfn = mmu_btop(paddr);
2919 #endif
2920 
2921 	return (pfn);
2922 }
2923