xref: /illumos-gate/usr/src/uts/i86pc/os/ddi_impl.c (revision f73e1ebf60792a8bdb2d559097c3131b68c09318)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Copyright 2012 Garrett D'Amore <garrett@damore.org>
25  * Copyright 2014 Pluribus Networks, Inc.
26  * Copyright 2016 Nexenta Systems, Inc.
27  * Copyright 2018 Joyent, Inc.
28  */
29 
30 /*
31  * PC specific DDI implementation
32  */
33 #include <sys/types.h>
34 #include <sys/autoconf.h>
35 #include <sys/avintr.h>
36 #include <sys/bootconf.h>
37 #include <sys/conf.h>
38 #include <sys/cpuvar.h>
39 #include <sys/ddi_impldefs.h>
40 #include <sys/ddi_subrdefs.h>
41 #include <sys/ethernet.h>
42 #include <sys/fp.h>
43 #include <sys/instance.h>
44 #include <sys/kmem.h>
45 #include <sys/machsystm.h>
46 #include <sys/modctl.h>
47 #include <sys/promif.h>
48 #include <sys/prom_plat.h>
49 #include <sys/sunndi.h>
50 #include <sys/ndi_impldefs.h>
51 #include <sys/ddi_impldefs.h>
52 #include <sys/sysmacros.h>
53 #include <sys/systeminfo.h>
54 #include <sys/utsname.h>
55 #include <sys/atomic.h>
56 #include <sys/spl.h>
57 #include <sys/archsystm.h>
58 #include <vm/seg_kmem.h>
59 #include <sys/ontrap.h>
60 #include <sys/fm/protocol.h>
61 #include <sys/ramdisk.h>
62 #include <sys/sunndi.h>
63 #include <sys/vmem.h>
64 #include <sys/pci_impl.h>
65 #if defined(__xpv)
66 #include <sys/hypervisor.h>
67 #endif
68 #include <sys/mach_intr.h>
69 #include <vm/hat_i86.h>
70 #include <sys/x86_archext.h>
71 #include <sys/avl.h>
72 #include <sys/font.h>
73 
74 /*
75  * DDI Boot Configuration
76  */
77 
78 /*
79  * Platform drivers on this platform
80  */
81 char *platform_module_list[] = {
82 	"acpippm",
83 	"ppm",
84 	(char *)0
85 };
86 
87 /* pci bus resource maps */
88 struct pci_bus_resource *pci_bus_res;
89 
90 size_t dma_max_copybuf_size = 0x101000;		/* 1M + 4K */
91 
92 uint64_t ramdisk_start, ramdisk_end;
93 
94 int pseudo_isa = 0;
95 
96 /*
97  * Forward declarations
98  */
99 static int getlongprop_buf();
100 static void get_boot_properties(void);
101 static void impl_bus_initialprobe(void);
102 static void impl_bus_reprobe(void);
103 
104 static int poke_mem(peekpoke_ctlops_t *in_args);
105 static int peek_mem(peekpoke_ctlops_t *in_args);
106 
107 static int kmem_override_cache_attrs(caddr_t, size_t, uint_t);
108 
109 #if defined(__amd64) && !defined(__xpv)
110 extern void immu_init(void);
111 #endif
112 
113 /*
114  * We use an AVL tree to store contiguous address allocations made with the
115  * kalloca() routine, so that we can return the size to free with kfreea().
116  * Note that in the future it would be vastly faster if we could eliminate
117  * this lookup by insisting that all callers keep track of their own sizes,
118  * just as for kmem_alloc().
119  */
120 struct ctgas {
121 	avl_node_t ctg_link;
122 	void *ctg_addr;
123 	size_t ctg_size;
124 };
125 
126 static avl_tree_t ctgtree;
127 
128 static kmutex_t		ctgmutex;
129 #define	CTGLOCK()	mutex_enter(&ctgmutex)
130 #define	CTGUNLOCK()	mutex_exit(&ctgmutex)
131 
132 /*
133  * Minimum pfn value of page_t's put on the free list.  This is to simplify
134  * support of ddi dma memory requests which specify small, non-zero addr_lo
135  * values.
136  *
137  * The default value of 2, which corresponds to the only known non-zero addr_lo
138  * value used, means a single page will be sacrificed (pfn typically starts
139  * at 1).  ddiphysmin can be set to 0 to disable. It cannot be set above 0x100
140  * otherwise mp startup panics.
141  */
142 pfn_t	ddiphysmin = 2;
143 
144 static void
145 check_driver_disable(void)
146 {
147 	int proplen = 128;
148 	char *prop_name;
149 	char *drv_name, *propval;
150 	major_t major;
151 
152 	prop_name = kmem_alloc(proplen, KM_SLEEP);
153 	for (major = 0; major < devcnt; major++) {
154 		drv_name = ddi_major_to_name(major);
155 		if (drv_name == NULL)
156 			continue;
157 		(void) snprintf(prop_name, proplen, "disable-%s", drv_name);
158 		if (ddi_prop_lookup_string(DDI_DEV_T_ANY, ddi_root_node(),
159 		    DDI_PROP_DONTPASS, prop_name, &propval) == DDI_SUCCESS) {
160 			if (strcmp(propval, "true") == 0) {
161 				devnamesp[major].dn_flags |= DN_DRIVER_REMOVED;
162 				cmn_err(CE_NOTE, "driver %s disabled",
163 				    drv_name);
164 			}
165 			ddi_prop_free(propval);
166 		}
167 	}
168 	kmem_free(prop_name, proplen);
169 }
170 
171 
172 /*
173  * Configure the hardware on the system.
174  * Called before the rootfs is mounted
175  */
176 void
177 configure(void)
178 {
179 	extern void i_ddi_init_root();
180 
181 #if defined(__i386)
182 	extern int fpu_pentium_fdivbug;
183 #endif	/* __i386 */
184 	extern int fpu_ignored;
185 
186 	/*
187 	 * Determine if an FPU is attached
188 	 */
189 
190 	fpu_probe();
191 
192 #if defined(__i386)
193 	if (fpu_pentium_fdivbug) {
194 		printf("\
195 FP hardware exhibits Pentium floating point divide problem\n");
196 	}
197 #endif	/* __i386 */
198 
199 	if (fpu_ignored) {
200 		printf("FP hardware will not be used\n");
201 	} else if (!fpu_exists) {
202 		printf("No FPU in configuration\n");
203 	}
204 
205 	/*
206 	 * Initialize devices on the machine.
207 	 * Uses configuration tree built by the PROMs to determine what
208 	 * is present, and builds a tree of prototype dev_info nodes
209 	 * corresponding to the hardware which identified itself.
210 	 */
211 
212 	/*
213 	 * Initialize root node.
214 	 */
215 	i_ddi_init_root();
216 
217 	/* reprogram devices not set up by firmware (BIOS) */
218 	impl_bus_reprobe();
219 
220 #if defined(__amd64) && !defined(__xpv)
221 	/*
222 	 * Setup but don't startup the IOMMU
223 	 * Startup happens later via a direct call
224 	 * to IOMMU code by boot code.
225 	 * At this point, all PCI bus renumbering
226 	 * is done, so safe to init the IMMU
227 	 * AKA Intel IOMMU.
228 	 */
229 	immu_init();
230 #endif
231 
232 	/*
233 	 * attach the isa nexus to get ACPI resource usage
234 	 * isa is "kind of" a pseudo node
235 	 */
236 #if defined(__xpv)
237 	if (DOMAIN_IS_INITDOMAIN(xen_info)) {
238 		if (pseudo_isa)
239 			(void) i_ddi_attach_pseudo_node("isa");
240 		else
241 			(void) i_ddi_attach_hw_nodes("isa");
242 	}
243 #else
244 	if (pseudo_isa)
245 		(void) i_ddi_attach_pseudo_node("isa");
246 	else
247 		(void) i_ddi_attach_hw_nodes("isa");
248 #endif
249 }
250 
251 /*
252  * The "status" property indicates the operational status of a device.
253  * If this property is present, the value is a string indicating the
254  * status of the device as follows:
255  *
256  *	"okay"		operational.
257  *	"disabled"	not operational, but might become operational.
258  *	"fail"		not operational because a fault has been detected,
259  *			and it is unlikely that the device will become
260  *			operational without repair. no additional details
261  *			are available.
262  *	"fail-xxx"	not operational because a fault has been detected,
263  *			and it is unlikely that the device will become
264  *			operational without repair. "xxx" is additional
265  *			human-readable information about the particular
266  *			fault condition that was detected.
267  *
268  * The absence of this property means that the operational status is
269  * unknown or okay.
270  *
271  * This routine checks the status property of the specified device node
272  * and returns 0 if the operational status indicates failure, and 1 otherwise.
273  *
274  * The property may exist on plug-in cards the existed before IEEE 1275-1994.
275  * And, in that case, the property may not even be a string. So we carefully
276  * check for the value "fail", in the beginning of the string, noting
277  * the property length.
278  */
279 int
280 status_okay(int id, char *buf, int buflen)
281 {
282 	char status_buf[OBP_MAXPROPNAME];
283 	char *bufp = buf;
284 	int len = buflen;
285 	int proplen;
286 	static const char *status = "status";
287 	static const char *fail = "fail";
288 	int fail_len = (int)strlen(fail);
289 
290 	/*
291 	 * Get the proplen ... if it's smaller than "fail",
292 	 * or doesn't exist ... then we don't care, since
293 	 * the value can't begin with the char string "fail".
294 	 *
295 	 * NB: proplen, if it's a string, includes the NULL in the
296 	 * the size of the property, and fail_len does not.
297 	 */
298 	proplen = prom_getproplen((pnode_t)id, (caddr_t)status);
299 	if (proplen <= fail_len)	/* nonexistant or uninteresting len */
300 		return (1);
301 
302 	/*
303 	 * if a buffer was provided, use it
304 	 */
305 	if ((buf == (char *)NULL) || (buflen <= 0)) {
306 		bufp = status_buf;
307 		len = sizeof (status_buf);
308 	}
309 	*bufp = (char)0;
310 
311 	/*
312 	 * Get the property into the buffer, to the extent of the buffer,
313 	 * and in case the buffer is smaller than the property size,
314 	 * NULL terminate the buffer. (This handles the case where
315 	 * a buffer was passed in and the caller wants to print the
316 	 * value, but the buffer was too small).
317 	 */
318 	(void) prom_bounded_getprop((pnode_t)id, (caddr_t)status,
319 	    (caddr_t)bufp, len);
320 	*(bufp + len - 1) = (char)0;
321 
322 	/*
323 	 * If the value begins with the char string "fail",
324 	 * then it means the node is failed. We don't care
325 	 * about any other values. We assume the node is ok
326 	 * although it might be 'disabled'.
327 	 */
328 	if (strncmp(bufp, fail, fail_len) == 0)
329 		return (0);
330 
331 	return (1);
332 }
333 
334 /*
335  * Check the status of the device node passed as an argument.
336  *
337  *	if ((status is OKAY) || (status is DISABLED))
338  *		return DDI_SUCCESS
339  *	else
340  *		print a warning and return DDI_FAILURE
341  */
342 /*ARGSUSED1*/
343 int
344 check_status(int id, char *name, dev_info_t *parent)
345 {
346 	char status_buf[64];
347 	char devtype_buf[OBP_MAXPROPNAME];
348 	int retval = DDI_FAILURE;
349 
350 	/*
351 	 * is the status okay?
352 	 */
353 	if (status_okay(id, status_buf, sizeof (status_buf)))
354 		return (DDI_SUCCESS);
355 
356 	/*
357 	 * a status property indicating bad memory will be associated
358 	 * with a node which has a "device_type" property with a value of
359 	 * "memory-controller". in this situation, return DDI_SUCCESS
360 	 */
361 	if (getlongprop_buf(id, OBP_DEVICETYPE, devtype_buf,
362 	    sizeof (devtype_buf)) > 0) {
363 		if (strcmp(devtype_buf, "memory-controller") == 0)
364 			retval = DDI_SUCCESS;
365 	}
366 
367 	/*
368 	 * print the status property information
369 	 */
370 	cmn_err(CE_WARN, "status '%s' for '%s'", status_buf, name);
371 	return (retval);
372 }
373 
374 /*ARGSUSED*/
375 uint_t
376 softlevel1(caddr_t arg1, caddr_t arg2)
377 {
378 	softint();
379 	return (1);
380 }
381 
382 /*
383  * Allow for implementation specific correction of PROM property values.
384  */
385 
386 /*ARGSUSED*/
387 void
388 impl_fix_props(dev_info_t *dip, dev_info_t *ch_dip, char *name, int len,
389     caddr_t buffer)
390 {
391 	/*
392 	 * There are no adjustments needed in this implementation.
393 	 */
394 }
395 
396 static int
397 getlongprop_buf(int id, char *name, char *buf, int maxlen)
398 {
399 	int size;
400 
401 	size = prom_getproplen((pnode_t)id, name);
402 	if (size <= 0 || (size > maxlen - 1))
403 		return (-1);
404 
405 	if (-1 == prom_getprop((pnode_t)id, name, buf))
406 		return (-1);
407 
408 	if (strcmp("name", name) == 0) {
409 		if (buf[size - 1] != '\0') {
410 			buf[size] = '\0';
411 			size += 1;
412 		}
413 	}
414 
415 	return (size);
416 }
417 
418 static int
419 get_prop_int_array(dev_info_t *di, char *pname, int **pval, uint_t *plen)
420 {
421 	int ret;
422 
423 	if ((ret = ddi_prop_lookup_int_array(DDI_DEV_T_ANY, di,
424 	    DDI_PROP_DONTPASS, pname, pval, plen))
425 	    == DDI_PROP_SUCCESS) {
426 		*plen = (*plen) * (sizeof (int));
427 	}
428 	return (ret);
429 }
430 
431 
432 /*
433  * Node Configuration
434  */
435 
436 struct prop_ispec {
437 	uint_t	pri, vec;
438 };
439 
440 /*
441  * For the x86, we're prepared to claim that the interrupt string
442  * is in the form of a list of <ipl,vec> specifications.
443  */
444 
445 #define	VEC_MIN	1
446 #define	VEC_MAX	255
447 
448 static int
449 impl_xlate_intrs(dev_info_t *child, int *in,
450     struct ddi_parent_private_data *pdptr)
451 {
452 	size_t size;
453 	int n;
454 	struct intrspec *new;
455 	caddr_t got_prop;
456 	int *inpri;
457 	int got_len;
458 	extern int ignore_hardware_nodes;	/* force flag from ddi_impl.c */
459 
460 	static char bad_intr_fmt[] =
461 	    "bad interrupt spec from %s%d - ipl %d, irq %d\n";
462 
463 	/*
464 	 * determine if the driver is expecting the new style "interrupts"
465 	 * property which just contains the IRQ, or the old style which
466 	 * contains pairs of <IPL,IRQ>.  if it is the new style, we always
467 	 * assign IPL 5 unless an "interrupt-priorities" property exists.
468 	 * in that case, the "interrupt-priorities" property contains the
469 	 * IPL values that match, one for one, the IRQ values in the
470 	 * "interrupts" property.
471 	 */
472 	inpri = NULL;
473 	if ((ddi_getprop(DDI_DEV_T_ANY, child, DDI_PROP_DONTPASS,
474 	    "ignore-hardware-nodes", -1) != -1) || ignore_hardware_nodes) {
475 		/* the old style "interrupts" property... */
476 
477 		/*
478 		 * The list consists of <ipl,vec> elements
479 		 */
480 		if ((n = (*in++ >> 1)) < 1)
481 			return (DDI_FAILURE);
482 
483 		pdptr->par_nintr = n;
484 		size = n * sizeof (struct intrspec);
485 		new = pdptr->par_intr = kmem_zalloc(size, KM_SLEEP);
486 
487 		while (n--) {
488 			int level = *in++;
489 			int vec = *in++;
490 
491 			if (level < 1 || level > MAXIPL ||
492 			    vec < VEC_MIN || vec > VEC_MAX) {
493 				cmn_err(CE_CONT, bad_intr_fmt,
494 				    DEVI(child)->devi_name,
495 				    DEVI(child)->devi_instance, level, vec);
496 				goto broken;
497 			}
498 			new->intrspec_pri = level;
499 			if (vec != 2)
500 				new->intrspec_vec = vec;
501 			else
502 				/*
503 				 * irq 2 on the PC bus is tied to irq 9
504 				 * on ISA, EISA and MicroChannel
505 				 */
506 				new->intrspec_vec = 9;
507 			new++;
508 		}
509 
510 		return (DDI_SUCCESS);
511 	} else {
512 		/* the new style "interrupts" property... */
513 
514 		/*
515 		 * The list consists of <vec> elements
516 		 */
517 		if ((n = (*in++)) < 1)
518 			return (DDI_FAILURE);
519 
520 		pdptr->par_nintr = n;
521 		size = n * sizeof (struct intrspec);
522 		new = pdptr->par_intr = kmem_zalloc(size, KM_SLEEP);
523 
524 		/* XXX check for "interrupt-priorities" property... */
525 		if (ddi_getlongprop(DDI_DEV_T_ANY, child, DDI_PROP_DONTPASS,
526 		    "interrupt-priorities", (caddr_t)&got_prop, &got_len)
527 		    == DDI_PROP_SUCCESS) {
528 			if (n != (got_len / sizeof (int))) {
529 				cmn_err(CE_CONT,
530 				    "bad interrupt-priorities length"
531 				    " from %s%d: expected %d, got %d\n",
532 				    DEVI(child)->devi_name,
533 				    DEVI(child)->devi_instance, n,
534 				    (int)(got_len / sizeof (int)));
535 				goto broken;
536 			}
537 			inpri = (int *)got_prop;
538 		}
539 
540 		while (n--) {
541 			int level;
542 			int vec = *in++;
543 
544 			if (inpri == NULL)
545 				level = 5;
546 			else
547 				level = *inpri++;
548 
549 			if (level < 1 || level > MAXIPL ||
550 			    vec < VEC_MIN || vec > VEC_MAX) {
551 				cmn_err(CE_CONT, bad_intr_fmt,
552 				    DEVI(child)->devi_name,
553 				    DEVI(child)->devi_instance, level, vec);
554 				goto broken;
555 			}
556 			new->intrspec_pri = level;
557 			if (vec != 2)
558 				new->intrspec_vec = vec;
559 			else
560 				/*
561 				 * irq 2 on the PC bus is tied to irq 9
562 				 * on ISA, EISA and MicroChannel
563 				 */
564 				new->intrspec_vec = 9;
565 			new++;
566 		}
567 
568 		if (inpri != NULL)
569 			kmem_free(got_prop, got_len);
570 		return (DDI_SUCCESS);
571 	}
572 
573 broken:
574 	kmem_free(pdptr->par_intr, size);
575 	pdptr->par_intr = NULL;
576 	pdptr->par_nintr = 0;
577 	if (inpri != NULL)
578 		kmem_free(got_prop, got_len);
579 
580 	return (DDI_FAILURE);
581 }
582 
583 /*
584  * Create a ddi_parent_private_data structure from the ddi properties of
585  * the dev_info node.
586  *
587  * The "reg" and either an "intr" or "interrupts" properties are required
588  * if the driver wishes to create mappings or field interrupts on behalf
589  * of the device.
590  *
591  * The "reg" property is assumed to be a list of at least one triple
592  *
593  *	<bustype, address, size>*1
594  *
595  * The "intr" property is assumed to be a list of at least one duple
596  *
597  *	<SPARC ipl, vector#>*1
598  *
599  * The "interrupts" property is assumed to be a list of at least one
600  * n-tuples that describes the interrupt capabilities of the bus the device
601  * is connected to.  For SBus, this looks like
602  *
603  *	<SBus-level>*1
604  *
605  * (This property obsoletes the 'intr' property).
606  *
607  * The "ranges" property is optional.
608  */
609 void
610 make_ddi_ppd(dev_info_t *child, struct ddi_parent_private_data **ppd)
611 {
612 	struct ddi_parent_private_data *pdptr;
613 	int n;
614 	int *reg_prop, *rng_prop, *intr_prop, *irupts_prop;
615 	uint_t reg_len, rng_len, intr_len, irupts_len;
616 
617 	*ppd = pdptr = kmem_zalloc(sizeof (*pdptr), KM_SLEEP);
618 
619 	/*
620 	 * Handle the 'reg' property.
621 	 */
622 	if ((get_prop_int_array(child, "reg", &reg_prop, &reg_len) ==
623 	    DDI_PROP_SUCCESS) && (reg_len != 0)) {
624 		pdptr->par_nreg = reg_len / (int)sizeof (struct regspec);
625 		pdptr->par_reg = (struct regspec *)reg_prop;
626 	}
627 
628 	/*
629 	 * See if I have a range (adding one where needed - this
630 	 * means to add one for sbus node in sun4c, when romvec > 0,
631 	 * if no range is already defined in the PROM node.
632 	 * (Currently no sun4c PROMS define range properties,
633 	 * but they should and may in the future.)  For the SBus
634 	 * node, the range is defined by the SBus reg property.
635 	 */
636 	if (get_prop_int_array(child, "ranges", &rng_prop, &rng_len)
637 	    == DDI_PROP_SUCCESS) {
638 		pdptr->par_nrng = rng_len / (int)(sizeof (struct rangespec));
639 		pdptr->par_rng = (struct rangespec *)rng_prop;
640 	}
641 
642 	/*
643 	 * Handle the 'intr' and 'interrupts' properties
644 	 */
645 
646 	/*
647 	 * For backwards compatibility
648 	 * we first look for the 'intr' property for the device.
649 	 */
650 	if (get_prop_int_array(child, "intr", &intr_prop, &intr_len)
651 	    != DDI_PROP_SUCCESS) {
652 		intr_len = 0;
653 	}
654 
655 	/*
656 	 * If we're to support bus adapters and future platforms cleanly,
657 	 * we need to support the generalized 'interrupts' property.
658 	 */
659 	if (get_prop_int_array(child, "interrupts", &irupts_prop,
660 	    &irupts_len) != DDI_PROP_SUCCESS) {
661 		irupts_len = 0;
662 	} else if (intr_len != 0) {
663 		/*
664 		 * If both 'intr' and 'interrupts' are defined,
665 		 * then 'interrupts' wins and we toss the 'intr' away.
666 		 */
667 		ddi_prop_free((void *)intr_prop);
668 		intr_len = 0;
669 	}
670 
671 	if (intr_len != 0) {
672 
673 		/*
674 		 * Translate the 'intr' property into an array
675 		 * an array of struct intrspec's.  There's not really
676 		 * very much to do here except copy what's out there.
677 		 */
678 
679 		struct intrspec *new;
680 		struct prop_ispec *l;
681 
682 		n = pdptr->par_nintr = intr_len / sizeof (struct prop_ispec);
683 		l = (struct prop_ispec *)intr_prop;
684 		pdptr->par_intr =
685 		    new = kmem_zalloc(n * sizeof (struct intrspec), KM_SLEEP);
686 		while (n--) {
687 			new->intrspec_pri = l->pri;
688 			new->intrspec_vec = l->vec;
689 			new++;
690 			l++;
691 		}
692 		ddi_prop_free((void *)intr_prop);
693 
694 	} else if ((n = irupts_len) != 0) {
695 		size_t size;
696 		int *out;
697 
698 		/*
699 		 * Translate the 'interrupts' property into an array
700 		 * of intrspecs for the rest of the DDI framework to
701 		 * toy with.  Only our ancestors really know how to
702 		 * do this, so ask 'em.  We massage the 'interrupts'
703 		 * property so that it is pre-pended by a count of
704 		 * the number of integers in the argument.
705 		 */
706 		size = sizeof (int) + n;
707 		out = kmem_alloc(size, KM_SLEEP);
708 		*out = n / sizeof (int);
709 		bcopy(irupts_prop, out + 1, (size_t)n);
710 		ddi_prop_free((void *)irupts_prop);
711 		if (impl_xlate_intrs(child, out, pdptr) != DDI_SUCCESS) {
712 			cmn_err(CE_CONT,
713 			    "Unable to translate 'interrupts' for %s%d\n",
714 			    DEVI(child)->devi_binding_name,
715 			    DEVI(child)->devi_instance);
716 		}
717 		kmem_free(out, size);
718 	}
719 }
720 
721 /*
722  * Name a child
723  */
724 static int
725 impl_sunbus_name_child(dev_info_t *child, char *name, int namelen)
726 {
727 	/*
728 	 * Fill in parent-private data and this function returns to us
729 	 * an indication if it used "registers" to fill in the data.
730 	 */
731 	if (ddi_get_parent_data(child) == NULL) {
732 		struct ddi_parent_private_data *pdptr;
733 		make_ddi_ppd(child, &pdptr);
734 		ddi_set_parent_data(child, pdptr);
735 	}
736 
737 	name[0] = '\0';
738 	if (sparc_pd_getnreg(child) > 0) {
739 		(void) snprintf(name, namelen, "%x,%x",
740 		    (uint_t)sparc_pd_getreg(child, 0)->regspec_bustype,
741 		    (uint_t)sparc_pd_getreg(child, 0)->regspec_addr);
742 	}
743 
744 	return (DDI_SUCCESS);
745 }
746 
747 /*
748  * Called from the bus_ctl op of sunbus (sbus, obio, etc) nexus drivers
749  * to implement the DDI_CTLOPS_INITCHILD operation.  That is, it names
750  * the children of sun busses based on the reg spec.
751  *
752  * Handles the following properties (in make_ddi_ppd):
753  *	Property		value
754  *	  Name			type
755  *	reg		register spec
756  *	intr		old-form interrupt spec
757  *	interrupts	new (bus-oriented) interrupt spec
758  *	ranges		range spec
759  */
760 int
761 impl_ddi_sunbus_initchild(dev_info_t *child)
762 {
763 	char name[MAXNAMELEN];
764 	void impl_ddi_sunbus_removechild(dev_info_t *);
765 
766 	/*
767 	 * Name the child, also makes parent private data
768 	 */
769 	(void) impl_sunbus_name_child(child, name, MAXNAMELEN);
770 	ddi_set_name_addr(child, name);
771 
772 	/*
773 	 * Attempt to merge a .conf node; if successful, remove the
774 	 * .conf node.
775 	 */
776 	if ((ndi_dev_is_persistent_node(child) == 0) &&
777 	    (ndi_merge_node(child, impl_sunbus_name_child) == DDI_SUCCESS)) {
778 		/*
779 		 * Return failure to remove node
780 		 */
781 		impl_ddi_sunbus_removechild(child);
782 		return (DDI_FAILURE);
783 	}
784 	return (DDI_SUCCESS);
785 }
786 
787 void
788 impl_free_ddi_ppd(dev_info_t *dip)
789 {
790 	struct ddi_parent_private_data *pdptr;
791 	size_t n;
792 
793 	if ((pdptr = ddi_get_parent_data(dip)) == NULL)
794 		return;
795 
796 	if ((n = (size_t)pdptr->par_nintr) != 0)
797 		/*
798 		 * Note that kmem_free is used here (instead of
799 		 * ddi_prop_free) because the contents of the
800 		 * property were placed into a separate buffer and
801 		 * mucked with a bit before being stored in par_intr.
802 		 * The actual return value from the prop lookup
803 		 * was freed with ddi_prop_free previously.
804 		 */
805 		kmem_free(pdptr->par_intr, n * sizeof (struct intrspec));
806 
807 	if ((n = (size_t)pdptr->par_nrng) != 0)
808 		ddi_prop_free((void *)pdptr->par_rng);
809 
810 	if ((n = pdptr->par_nreg) != 0)
811 		ddi_prop_free((void *)pdptr->par_reg);
812 
813 	kmem_free(pdptr, sizeof (*pdptr));
814 	ddi_set_parent_data(dip, NULL);
815 }
816 
817 void
818 impl_ddi_sunbus_removechild(dev_info_t *dip)
819 {
820 	impl_free_ddi_ppd(dip);
821 	ddi_set_name_addr(dip, NULL);
822 	/*
823 	 * Strip the node to properly convert it back to prototype form
824 	 */
825 	impl_rem_dev_props(dip);
826 }
827 
828 /*
829  * DDI Interrupt
830  */
831 
832 /*
833  * turn this on to force isa, eisa, and mca device to ignore the new
834  * hardware nodes in the device tree (normally turned on only for
835  * drivers that need it by setting the property "ignore-hardware-nodes"
836  * in their driver.conf file).
837  *
838  * 7/31/96 -- Turned off globally.  Leaving variable in for the moment
839  *		as safety valve.
840  */
841 int ignore_hardware_nodes = 0;
842 
843 /*
844  * Local data
845  */
846 static struct impl_bus_promops *impl_busp;
847 
848 
849 /*
850  * New DDI interrupt framework
851  */
852 
853 /*
854  * i_ddi_intr_ops:
855  *
856  * This is the interrupt operator function wrapper for the bus function
857  * bus_intr_op.
858  */
859 int
860 i_ddi_intr_ops(dev_info_t *dip, dev_info_t *rdip, ddi_intr_op_t op,
861     ddi_intr_handle_impl_t *hdlp, void * result)
862 {
863 	dev_info_t	*pdip = (dev_info_t *)DEVI(dip)->devi_parent;
864 	int		ret = DDI_FAILURE;
865 
866 	/* request parent to process this interrupt op */
867 	if (NEXUS_HAS_INTR_OP(pdip))
868 		ret = (*(DEVI(pdip)->devi_ops->devo_bus_ops->bus_intr_op))(
869 		    pdip, rdip, op, hdlp, result);
870 	else
871 		cmn_err(CE_WARN, "Failed to process interrupt "
872 		    "for %s%d due to down-rev nexus driver %s%d",
873 		    ddi_get_name(rdip), ddi_get_instance(rdip),
874 		    ddi_get_name(pdip), ddi_get_instance(pdip));
875 	return (ret);
876 }
877 
878 /*
879  * i_ddi_add_softint - allocate and add a soft interrupt to the system
880  */
881 int
882 i_ddi_add_softint(ddi_softint_hdl_impl_t *hdlp)
883 {
884 	int ret;
885 
886 	/* add soft interrupt handler */
887 	ret = add_avsoftintr((void *)hdlp, hdlp->ih_pri, hdlp->ih_cb_func,
888 	    DEVI(hdlp->ih_dip)->devi_name, hdlp->ih_cb_arg1, hdlp->ih_cb_arg2);
889 	return (ret ? DDI_SUCCESS : DDI_FAILURE);
890 }
891 
892 
893 void
894 i_ddi_remove_softint(ddi_softint_hdl_impl_t *hdlp)
895 {
896 	(void) rem_avsoftintr((void *)hdlp, hdlp->ih_pri, hdlp->ih_cb_func);
897 }
898 
899 
900 extern void (*setsoftint)(int, struct av_softinfo *);
901 extern boolean_t av_check_softint_pending(struct av_softinfo *, boolean_t);
902 
903 int
904 i_ddi_trigger_softint(ddi_softint_hdl_impl_t *hdlp, void *arg2)
905 {
906 	if (av_check_softint_pending(hdlp->ih_pending, B_FALSE))
907 		return (DDI_EPENDING);
908 
909 	update_avsoftintr_args((void *)hdlp, hdlp->ih_pri, arg2);
910 
911 	(*setsoftint)(hdlp->ih_pri, hdlp->ih_pending);
912 	return (DDI_SUCCESS);
913 }
914 
915 /*
916  * i_ddi_set_softint_pri:
917  *
918  * The way this works is that it first tries to add a softint vector
919  * at the new priority in hdlp. If that succeeds; then it removes the
920  * existing softint vector at the old priority.
921  */
922 int
923 i_ddi_set_softint_pri(ddi_softint_hdl_impl_t *hdlp, uint_t old_pri)
924 {
925 	int ret;
926 
927 	/*
928 	 * If a softint is pending at the old priority then fail the request.
929 	 */
930 	if (av_check_softint_pending(hdlp->ih_pending, B_TRUE))
931 		return (DDI_FAILURE);
932 
933 	ret = av_softint_movepri((void *)hdlp, old_pri);
934 	return (ret ? DDI_SUCCESS : DDI_FAILURE);
935 }
936 
937 void
938 i_ddi_alloc_intr_phdl(ddi_intr_handle_impl_t *hdlp)
939 {
940 	hdlp->ih_private = (void *)kmem_zalloc(sizeof (ihdl_plat_t), KM_SLEEP);
941 }
942 
943 void
944 i_ddi_free_intr_phdl(ddi_intr_handle_impl_t *hdlp)
945 {
946 	kmem_free(hdlp->ih_private, sizeof (ihdl_plat_t));
947 	hdlp->ih_private = NULL;
948 }
949 
950 int
951 i_ddi_get_intx_nintrs(dev_info_t *dip)
952 {
953 	struct ddi_parent_private_data *pdp;
954 
955 	if ((pdp = ddi_get_parent_data(dip)) == NULL)
956 		return (0);
957 
958 	return (pdp->par_nintr);
959 }
960 
961 /*
962  * DDI Memory/DMA
963  */
964 
965 /*
966  * Support for allocating DMAable memory to implement
967  * ddi_dma_mem_alloc(9F) interface.
968  */
969 
970 #define	KA_ALIGN_SHIFT	7
971 #define	KA_ALIGN	(1 << KA_ALIGN_SHIFT)
972 #define	KA_NCACHE	(PAGESHIFT + 1 - KA_ALIGN_SHIFT)
973 
974 /*
975  * Dummy DMA attribute template for kmem_io[].kmem_io_attr.  We only
976  * care about addr_lo, addr_hi, and align.  addr_hi will be dynamically set.
977  */
978 
979 static ddi_dma_attr_t kmem_io_attr = {
980 	DMA_ATTR_V0,
981 	0x0000000000000000ULL,		/* dma_attr_addr_lo */
982 	0x0000000000000000ULL,		/* dma_attr_addr_hi */
983 	0x00ffffff,
984 	0x1000,				/* dma_attr_align */
985 	1, 1, 0xffffffffULL, 0xffffffffULL, 0x1, 1, 0
986 };
987 
988 /* kmem io memory ranges and indices */
989 enum {
990 	IO_4P, IO_64G, IO_4G, IO_2G, IO_1G, IO_512M,
991 	IO_256M, IO_128M, IO_64M, IO_32M, IO_16M, MAX_MEM_RANGES
992 };
993 
994 static struct {
995 	vmem_t		*kmem_io_arena;
996 	kmem_cache_t	*kmem_io_cache[KA_NCACHE];
997 	ddi_dma_attr_t	kmem_io_attr;
998 } kmem_io[MAX_MEM_RANGES];
999 
1000 static int kmem_io_idx;		/* index of first populated kmem_io[] */
1001 
1002 static page_t *
1003 page_create_io_wrapper(void *addr, size_t len, int vmflag, void *arg)
1004 {
1005 	extern page_t *page_create_io(vnode_t *, u_offset_t, uint_t,
1006 	    uint_t, struct as *, caddr_t, ddi_dma_attr_t *);
1007 
1008 	return (page_create_io(&kvp, (u_offset_t)(uintptr_t)addr, len,
1009 	    PG_EXCL | ((vmflag & VM_NOSLEEP) ? 0 : PG_WAIT), &kas, addr, arg));
1010 }
1011 
1012 #ifdef __xpv
1013 static void
1014 segkmem_free_io(vmem_t *vmp, void *ptr, size_t size)
1015 {
1016 	extern void page_destroy_io(page_t *);
1017 	segkmem_xfree(vmp, ptr, size, &kvp, page_destroy_io);
1018 }
1019 #endif
1020 
1021 static void *
1022 segkmem_alloc_io_4P(vmem_t *vmp, size_t size, int vmflag)
1023 {
1024 	return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
1025 	    page_create_io_wrapper, &kmem_io[IO_4P].kmem_io_attr));
1026 }
1027 
1028 static void *
1029 segkmem_alloc_io_64G(vmem_t *vmp, size_t size, int vmflag)
1030 {
1031 	return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
1032 	    page_create_io_wrapper, &kmem_io[IO_64G].kmem_io_attr));
1033 }
1034 
1035 static void *
1036 segkmem_alloc_io_4G(vmem_t *vmp, size_t size, int vmflag)
1037 {
1038 	return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
1039 	    page_create_io_wrapper, &kmem_io[IO_4G].kmem_io_attr));
1040 }
1041 
1042 static void *
1043 segkmem_alloc_io_2G(vmem_t *vmp, size_t size, int vmflag)
1044 {
1045 	return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
1046 	    page_create_io_wrapper, &kmem_io[IO_2G].kmem_io_attr));
1047 }
1048 
1049 static void *
1050 segkmem_alloc_io_1G(vmem_t *vmp, size_t size, int vmflag)
1051 {
1052 	return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
1053 	    page_create_io_wrapper, &kmem_io[IO_1G].kmem_io_attr));
1054 }
1055 
1056 static void *
1057 segkmem_alloc_io_512M(vmem_t *vmp, size_t size, int vmflag)
1058 {
1059 	return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
1060 	    page_create_io_wrapper, &kmem_io[IO_512M].kmem_io_attr));
1061 }
1062 
1063 static void *
1064 segkmem_alloc_io_256M(vmem_t *vmp, size_t size, int vmflag)
1065 {
1066 	return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
1067 	    page_create_io_wrapper, &kmem_io[IO_256M].kmem_io_attr));
1068 }
1069 
1070 static void *
1071 segkmem_alloc_io_128M(vmem_t *vmp, size_t size, int vmflag)
1072 {
1073 	return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
1074 	    page_create_io_wrapper, &kmem_io[IO_128M].kmem_io_attr));
1075 }
1076 
1077 static void *
1078 segkmem_alloc_io_64M(vmem_t *vmp, size_t size, int vmflag)
1079 {
1080 	return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
1081 	    page_create_io_wrapper, &kmem_io[IO_64M].kmem_io_attr));
1082 }
1083 
1084 static void *
1085 segkmem_alloc_io_32M(vmem_t *vmp, size_t size, int vmflag)
1086 {
1087 	return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
1088 	    page_create_io_wrapper, &kmem_io[IO_32M].kmem_io_attr));
1089 }
1090 
1091 static void *
1092 segkmem_alloc_io_16M(vmem_t *vmp, size_t size, int vmflag)
1093 {
1094 	return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
1095 	    page_create_io_wrapper, &kmem_io[IO_16M].kmem_io_attr));
1096 }
1097 
1098 struct {
1099 	uint64_t	io_limit;
1100 	char		*io_name;
1101 	void		*(*io_alloc)(vmem_t *, size_t, int);
1102 	int		io_initial;	/* kmem_io_init during startup */
1103 } io_arena_params[MAX_MEM_RANGES] = {
1104 	{0x000fffffffffffffULL,	"kmem_io_4P",	segkmem_alloc_io_4P,	1},
1105 	{0x0000000fffffffffULL,	"kmem_io_64G",	segkmem_alloc_io_64G,	0},
1106 	{0x00000000ffffffffULL,	"kmem_io_4G",	segkmem_alloc_io_4G,	1},
1107 	{0x000000007fffffffULL,	"kmem_io_2G",	segkmem_alloc_io_2G,	1},
1108 	{0x000000003fffffffULL,	"kmem_io_1G",	segkmem_alloc_io_1G,	0},
1109 	{0x000000001fffffffULL,	"kmem_io_512M",	segkmem_alloc_io_512M,	0},
1110 	{0x000000000fffffffULL,	"kmem_io_256M",	segkmem_alloc_io_256M,	0},
1111 	{0x0000000007ffffffULL,	"kmem_io_128M",	segkmem_alloc_io_128M,	0},
1112 	{0x0000000003ffffffULL,	"kmem_io_64M",	segkmem_alloc_io_64M,	0},
1113 	{0x0000000001ffffffULL,	"kmem_io_32M",	segkmem_alloc_io_32M,	0},
1114 	{0x0000000000ffffffULL,	"kmem_io_16M",	segkmem_alloc_io_16M,	1}
1115 };
1116 
1117 void
1118 kmem_io_init(int a)
1119 {
1120 	int	c;
1121 	char name[40];
1122 
1123 	kmem_io[a].kmem_io_arena = vmem_create(io_arena_params[a].io_name,
1124 	    NULL, 0, PAGESIZE, io_arena_params[a].io_alloc,
1125 #ifdef __xpv
1126 	    segkmem_free_io,
1127 #else
1128 	    segkmem_free,
1129 #endif
1130 	    heap_arena, 0, VM_SLEEP);
1131 
1132 	for (c = 0; c < KA_NCACHE; c++) {
1133 		size_t size = KA_ALIGN << c;
1134 		(void) sprintf(name, "%s_%lu",
1135 		    io_arena_params[a].io_name, size);
1136 		kmem_io[a].kmem_io_cache[c] = kmem_cache_create(name,
1137 		    size, size, NULL, NULL, NULL, NULL,
1138 		    kmem_io[a].kmem_io_arena, 0);
1139 	}
1140 }
1141 
1142 /*
1143  * Return the index of the highest memory range for addr.
1144  */
1145 static int
1146 kmem_io_index(uint64_t addr)
1147 {
1148 	int n;
1149 
1150 	for (n = kmem_io_idx; n < MAX_MEM_RANGES; n++) {
1151 		if (kmem_io[n].kmem_io_attr.dma_attr_addr_hi <= addr) {
1152 			if (kmem_io[n].kmem_io_arena == NULL)
1153 				kmem_io_init(n);
1154 			return (n);
1155 		}
1156 	}
1157 	panic("kmem_io_index: invalid addr - must be at least 16m");
1158 
1159 	/*NOTREACHED*/
1160 }
1161 
1162 /*
1163  * Return the index of the next kmem_io populated memory range
1164  * after curindex.
1165  */
1166 static int
1167 kmem_io_index_next(int curindex)
1168 {
1169 	int n;
1170 
1171 	for (n = curindex + 1; n < MAX_MEM_RANGES; n++) {
1172 		if (kmem_io[n].kmem_io_arena)
1173 			return (n);
1174 	}
1175 	return (-1);
1176 }
1177 
1178 /*
1179  * allow kmem to be mapped in with different PTE cache attribute settings.
1180  * Used by i_ddi_mem_alloc()
1181  */
1182 int
1183 kmem_override_cache_attrs(caddr_t kva, size_t size, uint_t order)
1184 {
1185 	uint_t hat_flags;
1186 	caddr_t kva_end;
1187 	uint_t hat_attr;
1188 	pfn_t pfn;
1189 
1190 	if (hat_getattr(kas.a_hat, kva, &hat_attr) == -1) {
1191 		return (-1);
1192 	}
1193 
1194 	hat_attr &= ~HAT_ORDER_MASK;
1195 	hat_attr |= order | HAT_NOSYNC;
1196 	hat_flags = HAT_LOAD_LOCK;
1197 
1198 	kva_end = (caddr_t)(((uintptr_t)kva + size + PAGEOFFSET) &
1199 	    (uintptr_t)PAGEMASK);
1200 	kva = (caddr_t)((uintptr_t)kva & (uintptr_t)PAGEMASK);
1201 
1202 	while (kva < kva_end) {
1203 		pfn = hat_getpfnum(kas.a_hat, kva);
1204 		hat_unload(kas.a_hat, kva, PAGESIZE, HAT_UNLOAD_UNLOCK);
1205 		hat_devload(kas.a_hat, kva, PAGESIZE, pfn, hat_attr, hat_flags);
1206 		kva += MMU_PAGESIZE;
1207 	}
1208 
1209 	return (0);
1210 }
1211 
1212 static int
1213 ctgcompare(const void *a1, const void *a2)
1214 {
1215 	/* we just want to compare virtual addresses */
1216 	a1 = ((struct ctgas *)a1)->ctg_addr;
1217 	a2 = ((struct ctgas *)a2)->ctg_addr;
1218 	return (a1 == a2 ? 0 : (a1 < a2 ? -1 : 1));
1219 }
1220 
1221 void
1222 ka_init(void)
1223 {
1224 	int a;
1225 	paddr_t maxphysaddr;
1226 #if !defined(__xpv)
1227 	extern pfn_t physmax;
1228 
1229 	maxphysaddr = mmu_ptob((paddr_t)physmax) + MMU_PAGEOFFSET;
1230 #else
1231 	maxphysaddr = mmu_ptob((paddr_t)HYPERVISOR_memory_op(
1232 	    XENMEM_maximum_ram_page, NULL)) + MMU_PAGEOFFSET;
1233 #endif
1234 
1235 	ASSERT(maxphysaddr <= io_arena_params[0].io_limit);
1236 
1237 	for (a = 0; a < MAX_MEM_RANGES; a++) {
1238 		if (maxphysaddr >= io_arena_params[a + 1].io_limit) {
1239 			if (maxphysaddr > io_arena_params[a + 1].io_limit)
1240 				io_arena_params[a].io_limit = maxphysaddr;
1241 			else
1242 				a++;
1243 			break;
1244 		}
1245 	}
1246 	kmem_io_idx = a;
1247 
1248 	for (; a < MAX_MEM_RANGES; a++) {
1249 		kmem_io[a].kmem_io_attr = kmem_io_attr;
1250 		kmem_io[a].kmem_io_attr.dma_attr_addr_hi =
1251 		    io_arena_params[a].io_limit;
1252 		/*
1253 		 * initialize kmem_io[] arena/cache corresponding to
1254 		 * maxphysaddr and to the "common" io memory ranges that
1255 		 * have io_initial set to a non-zero value.
1256 		 */
1257 		if (io_arena_params[a].io_initial || a == kmem_io_idx)
1258 			kmem_io_init(a);
1259 	}
1260 
1261 	/* initialize ctgtree */
1262 	avl_create(&ctgtree, ctgcompare, sizeof (struct ctgas),
1263 	    offsetof(struct ctgas, ctg_link));
1264 }
1265 
1266 /*
1267  * put contig address/size
1268  */
1269 static void *
1270 putctgas(void *addr, size_t size)
1271 {
1272 	struct ctgas    *ctgp;
1273 	if ((ctgp = kmem_zalloc(sizeof (*ctgp), KM_NOSLEEP)) != NULL) {
1274 		ctgp->ctg_addr = addr;
1275 		ctgp->ctg_size = size;
1276 		CTGLOCK();
1277 		avl_add(&ctgtree, ctgp);
1278 		CTGUNLOCK();
1279 	}
1280 	return (ctgp);
1281 }
1282 
1283 /*
1284  * get contig size by addr
1285  */
1286 static size_t
1287 getctgsz(void *addr)
1288 {
1289 	struct ctgas    *ctgp;
1290 	struct ctgas    find;
1291 	size_t		sz = 0;
1292 
1293 	find.ctg_addr = addr;
1294 	CTGLOCK();
1295 	if ((ctgp = avl_find(&ctgtree, &find, NULL)) != NULL) {
1296 		avl_remove(&ctgtree, ctgp);
1297 	}
1298 	CTGUNLOCK();
1299 
1300 	if (ctgp != NULL) {
1301 		sz = ctgp->ctg_size;
1302 		kmem_free(ctgp, sizeof (*ctgp));
1303 	}
1304 
1305 	return (sz);
1306 }
1307 
1308 /*
1309  * contig_alloc:
1310  *
1311  *	allocates contiguous memory to satisfy the 'size' and dma attributes
1312  *	specified in 'attr'.
1313  *
1314  *	Not all of memory need to be physically contiguous if the
1315  *	scatter-gather list length is greater than 1.
1316  */
1317 
1318 /*ARGSUSED*/
1319 void *
1320 contig_alloc(size_t size, ddi_dma_attr_t *attr, uintptr_t align, int cansleep)
1321 {
1322 	pgcnt_t		pgcnt = btopr(size);
1323 	size_t		asize = pgcnt * PAGESIZE;
1324 	page_t		*ppl;
1325 	int		pflag;
1326 	void		*addr;
1327 
1328 	extern page_t *page_create_io(vnode_t *, u_offset_t, uint_t,
1329 	    uint_t, struct as *, caddr_t, ddi_dma_attr_t *);
1330 
1331 	/* segkmem_xalloc */
1332 
1333 	if (align <= PAGESIZE)
1334 		addr = vmem_alloc(heap_arena, asize,
1335 		    (cansleep) ? VM_SLEEP : VM_NOSLEEP);
1336 	else
1337 		addr = vmem_xalloc(heap_arena, asize, align, 0, 0, NULL, NULL,
1338 		    (cansleep) ? VM_SLEEP : VM_NOSLEEP);
1339 	if (addr) {
1340 		ASSERT(!((uintptr_t)addr & (align - 1)));
1341 
1342 		if (page_resv(pgcnt, (cansleep) ? KM_SLEEP : KM_NOSLEEP) == 0) {
1343 			vmem_free(heap_arena, addr, asize);
1344 			return (NULL);
1345 		}
1346 		pflag = PG_EXCL;
1347 
1348 		if (cansleep)
1349 			pflag |= PG_WAIT;
1350 
1351 		/* 4k req gets from freelists rather than pfn search */
1352 		if (pgcnt > 1 || align > PAGESIZE)
1353 			pflag |= PG_PHYSCONTIG;
1354 
1355 		ppl = page_create_io(&kvp, (u_offset_t)(uintptr_t)addr,
1356 		    asize, pflag, &kas, (caddr_t)addr, attr);
1357 
1358 		if (!ppl) {
1359 			vmem_free(heap_arena, addr, asize);
1360 			page_unresv(pgcnt);
1361 			return (NULL);
1362 		}
1363 
1364 		while (ppl != NULL) {
1365 			page_t	*pp = ppl;
1366 			page_sub(&ppl, pp);
1367 			ASSERT(page_iolock_assert(pp));
1368 			page_io_unlock(pp);
1369 			page_downgrade(pp);
1370 			hat_memload(kas.a_hat, (caddr_t)(uintptr_t)pp->p_offset,
1371 			    pp, (PROT_ALL & ~PROT_USER) |
1372 			    HAT_NOSYNC, HAT_LOAD_LOCK);
1373 		}
1374 	}
1375 	return (addr);
1376 }
1377 
1378 void
1379 contig_free(void *addr, size_t size)
1380 {
1381 	pgcnt_t	pgcnt = btopr(size);
1382 	size_t	asize = pgcnt * PAGESIZE;
1383 	caddr_t	a, ea;
1384 	page_t	*pp;
1385 
1386 	hat_unload(kas.a_hat, addr, asize, HAT_UNLOAD_UNLOCK);
1387 
1388 	for (a = addr, ea = a + asize; a < ea; a += PAGESIZE) {
1389 		pp = page_find(&kvp, (u_offset_t)(uintptr_t)a);
1390 		if (!pp)
1391 			panic("contig_free: contig pp not found");
1392 
1393 		if (!page_tryupgrade(pp)) {
1394 			page_unlock(pp);
1395 			pp = page_lookup(&kvp,
1396 			    (u_offset_t)(uintptr_t)a, SE_EXCL);
1397 			if (pp == NULL)
1398 				panic("contig_free: page freed");
1399 		}
1400 		page_destroy(pp, 0);
1401 	}
1402 
1403 	page_unresv(pgcnt);
1404 	vmem_free(heap_arena, addr, asize);
1405 }
1406 
1407 /*
1408  * Allocate from the system, aligned on a specific boundary.
1409  * The alignment, if non-zero, must be a power of 2.
1410  */
1411 static void *
1412 kalloca(size_t size, size_t align, int cansleep, int physcontig,
1413     ddi_dma_attr_t *attr)
1414 {
1415 	size_t *addr, *raddr, rsize;
1416 	size_t hdrsize = 4 * sizeof (size_t);	/* must be power of 2 */
1417 	int a, i, c;
1418 	vmem_t *vmp = NULL;
1419 	kmem_cache_t *cp = NULL;
1420 
1421 	if (attr->dma_attr_addr_lo > mmu_ptob((uint64_t)ddiphysmin))
1422 		return (NULL);
1423 
1424 	align = MAX(align, hdrsize);
1425 	ASSERT((align & (align - 1)) == 0);
1426 
1427 	/*
1428 	 * All of our allocators guarantee 16-byte alignment, so we don't
1429 	 * need to reserve additional space for the header.
1430 	 * To simplify picking the correct kmem_io_cache, we round up to
1431 	 * a multiple of KA_ALIGN.
1432 	 */
1433 	rsize = P2ROUNDUP_TYPED(size + align, KA_ALIGN, size_t);
1434 
1435 	if (physcontig && rsize > PAGESIZE) {
1436 		if (addr = contig_alloc(size, attr, align, cansleep)) {
1437 			if (!putctgas(addr, size))
1438 				contig_free(addr, size);
1439 			else
1440 				return (addr);
1441 		}
1442 		return (NULL);
1443 	}
1444 
1445 	a = kmem_io_index(attr->dma_attr_addr_hi);
1446 
1447 	if (rsize > PAGESIZE) {
1448 		vmp = kmem_io[a].kmem_io_arena;
1449 		raddr = vmem_alloc(vmp, rsize,
1450 		    (cansleep) ? VM_SLEEP : VM_NOSLEEP);
1451 	} else {
1452 		c = highbit((rsize >> KA_ALIGN_SHIFT) - 1);
1453 		cp = kmem_io[a].kmem_io_cache[c];
1454 		raddr = kmem_cache_alloc(cp, (cansleep) ? KM_SLEEP :
1455 		    KM_NOSLEEP);
1456 	}
1457 
1458 	if (raddr == NULL) {
1459 		int	na;
1460 
1461 		ASSERT(cansleep == 0);
1462 		if (rsize > PAGESIZE)
1463 			return (NULL);
1464 		/*
1465 		 * System does not have memory in the requested range.
1466 		 * Try smaller kmem io ranges and larger cache sizes
1467 		 * to see if there might be memory available in
1468 		 * these other caches.
1469 		 */
1470 
1471 		for (na = kmem_io_index_next(a); na >= 0;
1472 		    na = kmem_io_index_next(na)) {
1473 			ASSERT(kmem_io[na].kmem_io_arena);
1474 			cp = kmem_io[na].kmem_io_cache[c];
1475 			raddr = kmem_cache_alloc(cp, KM_NOSLEEP);
1476 			if (raddr)
1477 				goto kallocdone;
1478 		}
1479 		/* now try the larger kmem io cache sizes */
1480 		for (na = a; na >= 0; na = kmem_io_index_next(na)) {
1481 			for (i = c + 1; i < KA_NCACHE; i++) {
1482 				cp = kmem_io[na].kmem_io_cache[i];
1483 				raddr = kmem_cache_alloc(cp, KM_NOSLEEP);
1484 				if (raddr)
1485 					goto kallocdone;
1486 			}
1487 		}
1488 		return (NULL);
1489 	}
1490 
1491 kallocdone:
1492 	ASSERT(!P2BOUNDARY((uintptr_t)raddr, rsize, PAGESIZE) ||
1493 	    rsize > PAGESIZE);
1494 
1495 	addr = (size_t *)P2ROUNDUP((uintptr_t)raddr + hdrsize, align);
1496 	ASSERT((uintptr_t)addr + size - (uintptr_t)raddr <= rsize);
1497 
1498 	addr[-4] = (size_t)cp;
1499 	addr[-3] = (size_t)vmp;
1500 	addr[-2] = (size_t)raddr;
1501 	addr[-1] = rsize;
1502 
1503 	return (addr);
1504 }
1505 
1506 static void
1507 kfreea(void *addr)
1508 {
1509 	size_t		size;
1510 
1511 	if (!((uintptr_t)addr & PAGEOFFSET) && (size = getctgsz(addr))) {
1512 		contig_free(addr, size);
1513 	} else {
1514 		size_t	*saddr = addr;
1515 		if (saddr[-4] == 0)
1516 			vmem_free((vmem_t *)saddr[-3], (void *)saddr[-2],
1517 			    saddr[-1]);
1518 		else
1519 			kmem_cache_free((kmem_cache_t *)saddr[-4],
1520 			    (void *)saddr[-2]);
1521 	}
1522 }
1523 
1524 /*ARGSUSED*/
1525 void
1526 i_ddi_devacc_to_hatacc(ddi_device_acc_attr_t *devaccp, uint_t *hataccp)
1527 {
1528 }
1529 
1530 /*
1531  * Check if the specified cache attribute is supported on the platform.
1532  * This function must be called before i_ddi_cacheattr_to_hatacc().
1533  */
1534 boolean_t
1535 i_ddi_check_cache_attr(uint_t flags)
1536 {
1537 	/*
1538 	 * The cache attributes are mutually exclusive. Any combination of
1539 	 * the attributes leads to a failure.
1540 	 */
1541 	uint_t cache_attr = IOMEM_CACHE_ATTR(flags);
1542 	if ((cache_attr != 0) && !ISP2(cache_attr))
1543 		return (B_FALSE);
1544 
1545 	/* All cache attributes are supported on X86/X64 */
1546 	if (cache_attr & (IOMEM_DATA_UNCACHED | IOMEM_DATA_CACHED |
1547 	    IOMEM_DATA_UC_WR_COMBINE))
1548 		return (B_TRUE);
1549 
1550 	/* undefined attributes */
1551 	return (B_FALSE);
1552 }
1553 
1554 /* set HAT cache attributes from the cache attributes */
1555 void
1556 i_ddi_cacheattr_to_hatacc(uint_t flags, uint_t *hataccp)
1557 {
1558 	uint_t cache_attr = IOMEM_CACHE_ATTR(flags);
1559 	static char *fname = "i_ddi_cacheattr_to_hatacc";
1560 
1561 	/*
1562 	 * If write-combining is not supported, then it falls back
1563 	 * to uncacheable.
1564 	 */
1565 	if (cache_attr == IOMEM_DATA_UC_WR_COMBINE &&
1566 	    !is_x86_feature(x86_featureset, X86FSET_PAT))
1567 		cache_attr = IOMEM_DATA_UNCACHED;
1568 
1569 	/*
1570 	 * set HAT attrs according to the cache attrs.
1571 	 */
1572 	switch (cache_attr) {
1573 	case IOMEM_DATA_UNCACHED:
1574 		*hataccp &= ~HAT_ORDER_MASK;
1575 		*hataccp |= (HAT_STRICTORDER | HAT_PLAT_NOCACHE);
1576 		break;
1577 	case IOMEM_DATA_UC_WR_COMBINE:
1578 		*hataccp &= ~HAT_ORDER_MASK;
1579 		*hataccp |= (HAT_MERGING_OK | HAT_PLAT_NOCACHE);
1580 		break;
1581 	case IOMEM_DATA_CACHED:
1582 		*hataccp &= ~HAT_ORDER_MASK;
1583 		*hataccp |= HAT_UNORDERED_OK;
1584 		break;
1585 	/*
1586 	 * This case must not occur because the cache attribute is scrutinized
1587 	 * before this function is called.
1588 	 */
1589 	default:
1590 		/*
1591 		 * set cacheable to hat attrs.
1592 		 */
1593 		*hataccp &= ~HAT_ORDER_MASK;
1594 		*hataccp |= HAT_UNORDERED_OK;
1595 		cmn_err(CE_WARN, "%s: cache_attr=0x%x is ignored.",
1596 		    fname, cache_attr);
1597 	}
1598 }
1599 
1600 /*
1601  * This should actually be called i_ddi_dma_mem_alloc. There should
1602  * also be an i_ddi_pio_mem_alloc. i_ddi_dma_mem_alloc should call
1603  * through the device tree with the DDI_CTLOPS_DMA_ALIGN ctl ops to
1604  * get alignment requirements for DMA memory. i_ddi_pio_mem_alloc
1605  * should use DDI_CTLOPS_PIO_ALIGN. Since we only have i_ddi_mem_alloc
1606  * so far which is used for both, DMA and PIO, we have to use the DMA
1607  * ctl ops to make everybody happy.
1608  */
1609 /*ARGSUSED*/
1610 int
1611 i_ddi_mem_alloc(dev_info_t *dip, ddi_dma_attr_t *attr,
1612     size_t length, int cansleep, int flags,
1613     ddi_device_acc_attr_t *accattrp, caddr_t *kaddrp,
1614     size_t *real_length, ddi_acc_hdl_t *ap)
1615 {
1616 	caddr_t a;
1617 	int iomin;
1618 	ddi_acc_impl_t *iap;
1619 	int physcontig = 0;
1620 	pgcnt_t npages;
1621 	pgcnt_t minctg;
1622 	uint_t order;
1623 	int e;
1624 
1625 	/*
1626 	 * Check legality of arguments
1627 	 */
1628 	if (length == 0 || kaddrp == NULL || attr == NULL) {
1629 		return (DDI_FAILURE);
1630 	}
1631 
1632 	if (attr->dma_attr_minxfer == 0 || attr->dma_attr_align == 0 ||
1633 	    !ISP2(attr->dma_attr_align) || !ISP2(attr->dma_attr_minxfer)) {
1634 		return (DDI_FAILURE);
1635 	}
1636 
1637 	/*
1638 	 * figure out most restrictive alignment requirement
1639 	 */
1640 	iomin = attr->dma_attr_minxfer;
1641 	iomin = maxbit(iomin, attr->dma_attr_align);
1642 	if (iomin == 0)
1643 		return (DDI_FAILURE);
1644 
1645 	ASSERT((iomin & (iomin - 1)) == 0);
1646 
1647 	/*
1648 	 * if we allocate memory with IOMEM_DATA_UNCACHED or
1649 	 * IOMEM_DATA_UC_WR_COMBINE, make sure we allocate a page aligned
1650 	 * memory that ends on a page boundry.
1651 	 * Don't want to have to different cache mappings to the same
1652 	 * physical page.
1653 	 */
1654 	if (OVERRIDE_CACHE_ATTR(flags)) {
1655 		iomin = (iomin + MMU_PAGEOFFSET) & MMU_PAGEMASK;
1656 		length = (length + MMU_PAGEOFFSET) & (size_t)MMU_PAGEMASK;
1657 	}
1658 
1659 	/*
1660 	 * Determine if we need to satisfy the request for physically
1661 	 * contiguous memory or alignments larger than pagesize.
1662 	 */
1663 	npages = btopr(length + attr->dma_attr_align);
1664 	minctg = howmany(npages, attr->dma_attr_sgllen);
1665 
1666 	if (minctg > 1) {
1667 		uint64_t pfnseg = attr->dma_attr_seg >> PAGESHIFT;
1668 		/*
1669 		 * verify that the minimum contig requirement for the
1670 		 * actual length does not cross segment boundary.
1671 		 */
1672 		length = P2ROUNDUP_TYPED(length, attr->dma_attr_minxfer,
1673 		    size_t);
1674 		npages = btopr(length);
1675 		minctg = howmany(npages, attr->dma_attr_sgllen);
1676 		if (minctg > pfnseg + 1)
1677 			return (DDI_FAILURE);
1678 		physcontig = 1;
1679 	} else {
1680 		length = P2ROUNDUP_TYPED(length, iomin, size_t);
1681 	}
1682 
1683 	/*
1684 	 * Allocate the requested amount from the system.
1685 	 */
1686 	a = kalloca(length, iomin, cansleep, physcontig, attr);
1687 
1688 	if ((*kaddrp = a) == NULL)
1689 		return (DDI_FAILURE);
1690 
1691 	/*
1692 	 * if we to modify the cache attributes, go back and muck with the
1693 	 * mappings.
1694 	 */
1695 	if (OVERRIDE_CACHE_ATTR(flags)) {
1696 		order = 0;
1697 		i_ddi_cacheattr_to_hatacc(flags, &order);
1698 		e = kmem_override_cache_attrs(a, length, order);
1699 		if (e != 0) {
1700 			kfreea(a);
1701 			return (DDI_FAILURE);
1702 		}
1703 	}
1704 
1705 	if (real_length) {
1706 		*real_length = length;
1707 	}
1708 	if (ap) {
1709 		/*
1710 		 * initialize access handle
1711 		 */
1712 		iap = (ddi_acc_impl_t *)ap->ah_platform_private;
1713 		iap->ahi_acc_attr |= DDI_ACCATTR_CPU_VADDR;
1714 		impl_acc_hdl_init(ap);
1715 	}
1716 
1717 	return (DDI_SUCCESS);
1718 }
1719 
1720 /* ARGSUSED */
1721 void
1722 i_ddi_mem_free(caddr_t kaddr, ddi_acc_hdl_t *ap)
1723 {
1724 	if (ap != NULL) {
1725 		/*
1726 		 * if we modified the cache attributes on alloc, go back and
1727 		 * fix them since this memory could be returned to the
1728 		 * general pool.
1729 		 */
1730 		if (OVERRIDE_CACHE_ATTR(ap->ah_xfermodes)) {
1731 			uint_t order = 0;
1732 			int e;
1733 			i_ddi_cacheattr_to_hatacc(IOMEM_DATA_CACHED, &order);
1734 			e = kmem_override_cache_attrs(kaddr, ap->ah_len, order);
1735 			if (e != 0) {
1736 				cmn_err(CE_WARN, "i_ddi_mem_free() failed to "
1737 				    "override cache attrs, memory leaked\n");
1738 				return;
1739 			}
1740 		}
1741 	}
1742 	kfreea(kaddr);
1743 }
1744 
1745 /*
1746  * Access Barriers
1747  *
1748  */
1749 /*ARGSUSED*/
1750 int
1751 i_ddi_ontrap(ddi_acc_handle_t hp)
1752 {
1753 	return (DDI_FAILURE);
1754 }
1755 
1756 /*ARGSUSED*/
1757 void
1758 i_ddi_notrap(ddi_acc_handle_t hp)
1759 {
1760 }
1761 
1762 
1763 /*
1764  * Misc Functions
1765  */
1766 
1767 /*
1768  * Implementation instance override functions
1769  *
1770  * No override on i86pc
1771  */
1772 /*ARGSUSED*/
1773 uint_t
1774 impl_assign_instance(dev_info_t *dip)
1775 {
1776 	return ((uint_t)-1);
1777 }
1778 
1779 /*ARGSUSED*/
1780 int
1781 impl_keep_instance(dev_info_t *dip)
1782 {
1783 
1784 #if defined(__xpv)
1785 	/*
1786 	 * Do not persist instance numbers assigned to devices in dom0
1787 	 */
1788 	dev_info_t *pdip;
1789 	if (DOMAIN_IS_INITDOMAIN(xen_info)) {
1790 		if (((pdip = ddi_get_parent(dip)) != NULL) &&
1791 		    (strcmp(ddi_get_name(pdip), "xpvd") == 0))
1792 			return (DDI_SUCCESS);
1793 	}
1794 #endif
1795 	return (DDI_FAILURE);
1796 }
1797 
1798 /*ARGSUSED*/
1799 int
1800 impl_free_instance(dev_info_t *dip)
1801 {
1802 	return (DDI_FAILURE);
1803 }
1804 
1805 /*ARGSUSED*/
1806 int
1807 impl_check_cpu(dev_info_t *devi)
1808 {
1809 	return (DDI_SUCCESS);
1810 }
1811 
1812 /*
1813  * Referenced in common/cpr_driver.c: Power off machine.
1814  * Don't know how to power off i86pc.
1815  */
1816 void
1817 arch_power_down()
1818 {}
1819 
1820 /*
1821  * Copy name to property_name, since name
1822  * is in the low address range below kernelbase.
1823  */
1824 static void
1825 copy_boot_str(const char *boot_str, char *kern_str, int len)
1826 {
1827 	int i = 0;
1828 
1829 	while (i < len - 1 && boot_str[i] != '\0') {
1830 		kern_str[i] = boot_str[i];
1831 		i++;
1832 	}
1833 
1834 	kern_str[i] = 0;	/* null terminate */
1835 	if (boot_str[i] != '\0')
1836 		cmn_err(CE_WARN,
1837 		    "boot property string is truncated to %s", kern_str);
1838 }
1839 
1840 static void
1841 get_boot_properties(void)
1842 {
1843 	extern char hw_provider[];
1844 	dev_info_t *devi;
1845 	char *name;
1846 	int length, flags;
1847 	char property_name[50], property_val[50];
1848 	void *bop_staging_area;
1849 
1850 	bop_staging_area = kmem_zalloc(MMU_PAGESIZE, KM_NOSLEEP);
1851 
1852 	/*
1853 	 * Import "root" properties from the boot.
1854 	 *
1855 	 * We do this by invoking BOP_NEXTPROP until the list
1856 	 * is completely copied in.
1857 	 */
1858 
1859 	devi = ddi_root_node();
1860 	for (name = BOP_NEXTPROP(bootops, "");		/* get first */
1861 	    name;					/* NULL => DONE */
1862 	    name = BOP_NEXTPROP(bootops, name)) {	/* get next */
1863 
1864 		/* copy string to memory above kernelbase */
1865 		copy_boot_str(name, property_name, 50);
1866 
1867 		/*
1868 		 * Skip vga properties. They will be picked up later
1869 		 * by get_vga_properties.
1870 		 */
1871 		if (strcmp(property_name, "display-edif-block") == 0 ||
1872 		    strcmp(property_name, "display-edif-id") == 0) {
1873 			continue;
1874 		}
1875 
1876 		length = BOP_GETPROPLEN(bootops, property_name);
1877 		if (length < 0)
1878 			continue;
1879 		if (length > MMU_PAGESIZE) {
1880 			cmn_err(CE_NOTE,
1881 			    "boot property %s longer than 0x%x, ignored\n",
1882 			    property_name, MMU_PAGESIZE);
1883 			continue;
1884 		}
1885 		BOP_GETPROP(bootops, property_name, bop_staging_area);
1886 		flags = do_bsys_getproptype(bootops, property_name);
1887 
1888 		/*
1889 		 * special properties:
1890 		 * si-machine, si-hw-provider
1891 		 *	goes to kernel data structures.
1892 		 * bios-boot-device and stdout
1893 		 *	goes to hardware property list so it may show up
1894 		 *	in the prtconf -vp output. This is needed by
1895 		 *	Install/Upgrade. Once we fix install upgrade,
1896 		 *	this can be taken out.
1897 		 */
1898 		if (strcmp(name, "si-machine") == 0) {
1899 			(void) strncpy(utsname.machine, bop_staging_area,
1900 			    SYS_NMLN);
1901 			utsname.machine[SYS_NMLN - 1] = '\0';
1902 			continue;
1903 		}
1904 		if (strcmp(name, "si-hw-provider") == 0) {
1905 			(void) strncpy(hw_provider, bop_staging_area, SYS_NMLN);
1906 			hw_provider[SYS_NMLN - 1] = '\0';
1907 			continue;
1908 		}
1909 		if (strcmp(name, "bios-boot-device") == 0) {
1910 			copy_boot_str(bop_staging_area, property_val, 50);
1911 			(void) ndi_prop_update_string(DDI_DEV_T_NONE, devi,
1912 			    property_name, property_val);
1913 			continue;
1914 		}
1915 		if (strcmp(name, "stdout") == 0) {
1916 			(void) ndi_prop_update_int(DDI_DEV_T_NONE, devi,
1917 			    property_name, *((int *)bop_staging_area));
1918 			continue;
1919 		}
1920 
1921 		/* Boolean property */
1922 		if (length == 0) {
1923 			(void) e_ddi_prop_create(DDI_DEV_T_NONE, devi,
1924 			    DDI_PROP_CANSLEEP, property_name, NULL, 0);
1925 			continue;
1926 		}
1927 
1928 		/* Now anything else based on type. */
1929 		switch (flags) {
1930 		case DDI_PROP_TYPE_INT:
1931 			if (length == sizeof (int)) {
1932 				(void) e_ddi_prop_update_int(DDI_DEV_T_NONE,
1933 				    devi, property_name,
1934 				    *((int *)bop_staging_area));
1935 			} else {
1936 				(void) e_ddi_prop_update_int_array(
1937 				    DDI_DEV_T_NONE, devi, property_name,
1938 				    bop_staging_area, length / sizeof (int));
1939 			}
1940 			break;
1941 		case DDI_PROP_TYPE_STRING:
1942 			(void) e_ddi_prop_update_string(DDI_DEV_T_NONE, devi,
1943 			    property_name, bop_staging_area);
1944 			break;
1945 		case DDI_PROP_TYPE_BYTE:
1946 			(void) e_ddi_prop_update_byte_array(DDI_DEV_T_NONE,
1947 			    devi, property_name, bop_staging_area, length);
1948 			break;
1949 		case DDI_PROP_TYPE_INT64:
1950 			if (length == sizeof (int64_t)) {
1951 				(void) e_ddi_prop_update_int64(DDI_DEV_T_NONE,
1952 				    devi, property_name,
1953 				    *((int64_t *)bop_staging_area));
1954 			} else {
1955 				(void) e_ddi_prop_update_int64_array(
1956 				    DDI_DEV_T_NONE, devi, property_name,
1957 				    bop_staging_area,
1958 				    length / sizeof (int64_t));
1959 			}
1960 			break;
1961 		default:
1962 			/* Property type unknown, use old prop interface */
1963 			(void) e_ddi_prop_create(DDI_DEV_T_NONE, devi,
1964 			    DDI_PROP_CANSLEEP, property_name, bop_staging_area,
1965 			    length);
1966 		}
1967 	}
1968 
1969 	kmem_free(bop_staging_area, MMU_PAGESIZE);
1970 }
1971 
1972 static void
1973 get_vga_properties(void)
1974 {
1975 	dev_info_t *devi;
1976 	major_t major;
1977 	char *name;
1978 	int length;
1979 	char property_val[50];
1980 	void *bop_staging_area;
1981 
1982 	/*
1983 	 * XXXX Hack Allert!
1984 	 * There really needs to be a better way for identifying various
1985 	 * console framebuffers and their related issues.  Till then,
1986 	 * check for this one as a replacement to vgatext.
1987 	 */
1988 	major = ddi_name_to_major("ragexl");
1989 	if (major == (major_t)-1) {
1990 		major = ddi_name_to_major("vgatext");
1991 		if (major == (major_t)-1)
1992 			return;
1993 	}
1994 	devi = devnamesp[major].dn_head;
1995 	if (devi == NULL)
1996 		return;
1997 
1998 	bop_staging_area = kmem_zalloc(MMU_PAGESIZE, KM_SLEEP);
1999 
2000 	/*
2001 	 * Import "vga" properties from the boot.
2002 	 */
2003 	name = "display-edif-block";
2004 	length = BOP_GETPROPLEN(bootops, name);
2005 	if (length > 0 && length < MMU_PAGESIZE) {
2006 		BOP_GETPROP(bootops, name, bop_staging_area);
2007 		(void) ndi_prop_update_byte_array(DDI_DEV_T_NONE,
2008 		    devi, name, bop_staging_area, length);
2009 	}
2010 
2011 	/*
2012 	 * kdmconfig is also looking for display-type and
2013 	 * video-adapter-type. We default to color and svga.
2014 	 *
2015 	 * Could it be "monochrome", "vga"?
2016 	 * Nah, you've got to come to the 21st century...
2017 	 * And you can set monitor type manually in kdmconfig
2018 	 * if you are really an old junky.
2019 	 */
2020 	(void) ndi_prop_update_string(DDI_DEV_T_NONE,
2021 	    devi, "display-type", "color");
2022 	(void) ndi_prop_update_string(DDI_DEV_T_NONE,
2023 	    devi, "video-adapter-type", "svga");
2024 
2025 	name = "display-edif-id";
2026 	length = BOP_GETPROPLEN(bootops, name);
2027 	if (length > 0 && length < MMU_PAGESIZE) {
2028 		BOP_GETPROP(bootops, name, bop_staging_area);
2029 		copy_boot_str(bop_staging_area, property_val, length);
2030 		(void) ndi_prop_update_string(DDI_DEV_T_NONE,
2031 		    devi, name, property_val);
2032 	}
2033 
2034 	kmem_free(bop_staging_area, MMU_PAGESIZE);
2035 }
2036 
2037 /*
2038  * Copy console font to kernel memory. The temporary font setup
2039  * to use font module was done in early console setup, using low
2040  * memory and data from font module. Now we need to allocate
2041  * kernel memory and copy data over, so the low memory can be freed.
2042  * We can have at most one entry in font list from early boot.
2043  */
2044 static void
2045 get_console_font(void)
2046 {
2047 	struct fontlist *fp, *fl;
2048 	bitmap_data_t *bd;
2049 	struct font *fd, *tmp;
2050 	int i;
2051 
2052 	if (STAILQ_EMPTY(&fonts))
2053 		return;
2054 
2055 	fl = STAILQ_FIRST(&fonts);
2056 	STAILQ_REMOVE_HEAD(&fonts, font_next);
2057 	fp = kmem_zalloc(sizeof (*fp), KM_SLEEP);
2058 	bd = kmem_zalloc(sizeof (*bd), KM_SLEEP);
2059 	fd = kmem_zalloc(sizeof (*fd), KM_SLEEP);
2060 
2061 	fp->font_name = NULL;
2062 	fp->font_flags = FONT_BOOT;
2063 	fp->font_data = bd;
2064 
2065 	bd->width = fl->font_data->width;
2066 	bd->height = fl->font_data->height;
2067 	bd->uncompressed_size = fl->font_data->uncompressed_size;
2068 	bd->font = fd;
2069 
2070 	tmp = fl->font_data->font;
2071 	fd->vf_width = tmp->vf_width;
2072 	fd->vf_height = tmp->vf_height;
2073 	for (i = 0; i < VFNT_MAPS; i++) {
2074 		if (tmp->vf_map_count[i] == 0)
2075 			continue;
2076 		fd->vf_map_count[i] = tmp->vf_map_count[i];
2077 		fd->vf_map[i] = kmem_alloc(fd->vf_map_count[i] *
2078 		    sizeof (*fd->vf_map[i]), KM_SLEEP);
2079 		bcopy(tmp->vf_map[i], fd->vf_map[i], fd->vf_map_count[i] *
2080 		    sizeof (*fd->vf_map[i]));
2081 	}
2082 	fd->vf_bytes = kmem_alloc(bd->uncompressed_size, KM_SLEEP);
2083 	bcopy(tmp->vf_bytes, fd->vf_bytes, bd->uncompressed_size);
2084 	STAILQ_INSERT_HEAD(&fonts, fp, font_next);
2085 }
2086 
2087 /*
2088  * This is temporary, but absolutely necessary.  If we are being
2089  * booted with a device tree created by the DevConf project's bootconf
2090  * program, then we have device information nodes that reflect
2091  * reality.  At this point in time in the Solaris release schedule, the
2092  * kernel drivers aren't prepared for reality.  They still depend on their
2093  * own ad-hoc interpretations of the properties created when their .conf
2094  * files were interpreted. These drivers use an "ignore-hardware-nodes"
2095  * property to prevent them from using the nodes passed up from the bootconf
2096  * device tree.
2097  *
2098  * Trying to assemble root file system drivers as we are booting from
2099  * devconf will fail if the kernel driver is basing its name_addr's on the
2100  * psuedo-node device info while the bootpath passed up from bootconf is using
2101  * reality-based name_addrs.  We help the boot along in this case by
2102  * looking at the pre-bootconf bootpath and determining if we would have
2103  * successfully matched if that had been the bootpath we had chosen.
2104  *
2105  * Note that we only even perform this extra check if we've booted
2106  * using bootconf's 1275 compliant bootpath, this is the boot device, and
2107  * we're trying to match the name_addr specified in the 1275 bootpath.
2108  */
2109 
2110 #define	MAXCOMPONENTLEN	32
2111 
2112 int
2113 x86_old_bootpath_name_addr_match(dev_info_t *cdip, char *caddr, char *naddr)
2114 {
2115 	/*
2116 	 *  There are multiple criteria to be met before we can even
2117 	 *  consider allowing a name_addr match here.
2118 	 *
2119 	 *  1) We must have been booted such that the bootconf program
2120 	 *	created device tree nodes and properties.  This can be
2121 	 *	determined by examining the 'bootpath' property.  This
2122 	 *	property will be a non-null string iff bootconf was
2123 	 *	involved in the boot.
2124 	 *
2125 	 *  2) The module that we want to match must be the boot device.
2126 	 *
2127 	 *  3) The instance of the module we are thinking of letting be
2128 	 *	our match must be ignoring hardware nodes.
2129 	 *
2130 	 *  4) The name_addr we want to match must be the name_addr
2131 	 *	specified in the 1275 bootpath.
2132 	 */
2133 	static char bootdev_module[MAXCOMPONENTLEN];
2134 	static char bootdev_oldmod[MAXCOMPONENTLEN];
2135 	static char bootdev_newaddr[MAXCOMPONENTLEN];
2136 	static char bootdev_oldaddr[MAXCOMPONENTLEN];
2137 	static int  quickexit;
2138 
2139 	char *daddr;
2140 	int dlen;
2141 
2142 	char	*lkupname;
2143 	int	rv = DDI_FAILURE;
2144 
2145 	if ((ddi_getlongprop(DDI_DEV_T_ANY, cdip, DDI_PROP_DONTPASS,
2146 	    "devconf-addr", (caddr_t)&daddr, &dlen) == DDI_PROP_SUCCESS) &&
2147 	    (ddi_getprop(DDI_DEV_T_ANY, cdip, DDI_PROP_DONTPASS,
2148 	    "ignore-hardware-nodes", -1) != -1)) {
2149 		if (strcmp(daddr, caddr) == 0) {
2150 			return (DDI_SUCCESS);
2151 		}
2152 	}
2153 
2154 	if (quickexit)
2155 		return (rv);
2156 
2157 	if (bootdev_module[0] == '\0') {
2158 		char *addrp, *eoaddrp;
2159 		char *busp, *modp, *atp;
2160 		char *bp1275, *bp;
2161 		int  bp1275len, bplen;
2162 
2163 		bp1275 = bp = addrp = eoaddrp = busp = modp = atp = NULL;
2164 
2165 		if (ddi_getlongprop(DDI_DEV_T_ANY,
2166 		    ddi_root_node(), 0, "bootpath",
2167 		    (caddr_t)&bp1275, &bp1275len) != DDI_PROP_SUCCESS ||
2168 		    bp1275len <= 1) {
2169 			/*
2170 			 * We didn't boot from bootconf so we never need to
2171 			 * do any special matches.
2172 			 */
2173 			quickexit = 1;
2174 			if (bp1275)
2175 				kmem_free(bp1275, bp1275len);
2176 			return (rv);
2177 		}
2178 
2179 		if (ddi_getlongprop(DDI_DEV_T_ANY,
2180 		    ddi_root_node(), 0, "boot-path",
2181 		    (caddr_t)&bp, &bplen) != DDI_PROP_SUCCESS || bplen <= 1) {
2182 			/*
2183 			 * No fallback position for matching. This is
2184 			 * certainly unexpected, but we'll handle it
2185 			 * just in case.
2186 			 */
2187 			quickexit = 1;
2188 			kmem_free(bp1275, bp1275len);
2189 			if (bp)
2190 				kmem_free(bp, bplen);
2191 			return (rv);
2192 		}
2193 
2194 		/*
2195 		 *  Determine boot device module and 1275 name_addr
2196 		 *
2197 		 *  bootpath assumed to be of the form /bus/module@name_addr
2198 		 */
2199 		if (busp = strchr(bp1275, '/')) {
2200 			if (modp = strchr(busp + 1, '/')) {
2201 				if (atp = strchr(modp + 1, '@')) {
2202 					*atp = '\0';
2203 					addrp = atp + 1;
2204 					if (eoaddrp = strchr(addrp, '/'))
2205 						*eoaddrp = '\0';
2206 				}
2207 			}
2208 		}
2209 
2210 		if (modp && addrp) {
2211 			(void) strncpy(bootdev_module, modp + 1,
2212 			    MAXCOMPONENTLEN);
2213 			bootdev_module[MAXCOMPONENTLEN - 1] = '\0';
2214 
2215 			(void) strncpy(bootdev_newaddr, addrp, MAXCOMPONENTLEN);
2216 			bootdev_newaddr[MAXCOMPONENTLEN - 1] = '\0';
2217 		} else {
2218 			quickexit = 1;
2219 			kmem_free(bp1275, bp1275len);
2220 			kmem_free(bp, bplen);
2221 			return (rv);
2222 		}
2223 
2224 		/*
2225 		 *  Determine fallback name_addr
2226 		 *
2227 		 *  10/3/96 - Also save fallback module name because it
2228 		 *  might actually be different than the current module
2229 		 *  name.  E.G., ISA pnp drivers have new names.
2230 		 *
2231 		 *  bootpath assumed to be of the form /bus/module@name_addr
2232 		 */
2233 		addrp = NULL;
2234 		if (busp = strchr(bp, '/')) {
2235 			if (modp = strchr(busp + 1, '/')) {
2236 				if (atp = strchr(modp + 1, '@')) {
2237 					*atp = '\0';
2238 					addrp = atp + 1;
2239 					if (eoaddrp = strchr(addrp, '/'))
2240 						*eoaddrp = '\0';
2241 				}
2242 			}
2243 		}
2244 
2245 		if (modp && addrp) {
2246 			(void) strncpy(bootdev_oldmod, modp + 1,
2247 			    MAXCOMPONENTLEN);
2248 			bootdev_module[MAXCOMPONENTLEN - 1] = '\0';
2249 
2250 			(void) strncpy(bootdev_oldaddr, addrp, MAXCOMPONENTLEN);
2251 			bootdev_oldaddr[MAXCOMPONENTLEN - 1] = '\0';
2252 		}
2253 
2254 		/* Free up the bootpath storage now that we're done with it. */
2255 		kmem_free(bp1275, bp1275len);
2256 		kmem_free(bp, bplen);
2257 
2258 		if (bootdev_oldaddr[0] == '\0') {
2259 			quickexit = 1;
2260 			return (rv);
2261 		}
2262 	}
2263 
2264 	if (((lkupname = ddi_get_name(cdip)) != NULL) &&
2265 	    (strcmp(bootdev_module, lkupname) == 0 ||
2266 	    strcmp(bootdev_oldmod, lkupname) == 0) &&
2267 	    ((ddi_getprop(DDI_DEV_T_ANY, cdip, DDI_PROP_DONTPASS,
2268 	    "ignore-hardware-nodes", -1) != -1) ||
2269 	    ignore_hardware_nodes) &&
2270 	    strcmp(bootdev_newaddr, caddr) == 0 &&
2271 	    strcmp(bootdev_oldaddr, naddr) == 0) {
2272 		rv = DDI_SUCCESS;
2273 	}
2274 
2275 	return (rv);
2276 }
2277 
2278 /*
2279  * Perform a copy from a memory mapped device (whose devinfo pointer is devi)
2280  * separately mapped at devaddr in the kernel to a kernel buffer at kaddr.
2281  */
2282 /*ARGSUSED*/
2283 int
2284 e_ddi_copyfromdev(dev_info_t *devi,
2285     off_t off, const void *devaddr, void *kaddr, size_t len)
2286 {
2287 	bcopy(devaddr, kaddr, len);
2288 	return (0);
2289 }
2290 
2291 /*
2292  * Perform a copy to a memory mapped device (whose devinfo pointer is devi)
2293  * separately mapped at devaddr in the kernel from a kernel buffer at kaddr.
2294  */
2295 /*ARGSUSED*/
2296 int
2297 e_ddi_copytodev(dev_info_t *devi,
2298     off_t off, const void *kaddr, void *devaddr, size_t len)
2299 {
2300 	bcopy(kaddr, devaddr, len);
2301 	return (0);
2302 }
2303 
2304 
2305 static int
2306 poke_mem(peekpoke_ctlops_t *in_args)
2307 {
2308 	int err = DDI_SUCCESS;
2309 	on_trap_data_t otd;
2310 
2311 	/* Set up protected environment. */
2312 	if (!on_trap(&otd, OT_DATA_ACCESS)) {
2313 		switch (in_args->size) {
2314 		case sizeof (uint8_t):
2315 			*(uint8_t *)(in_args->dev_addr) =
2316 			    *(uint8_t *)in_args->host_addr;
2317 			break;
2318 
2319 		case sizeof (uint16_t):
2320 			*(uint16_t *)(in_args->dev_addr) =
2321 			    *(uint16_t *)in_args->host_addr;
2322 			break;
2323 
2324 		case sizeof (uint32_t):
2325 			*(uint32_t *)(in_args->dev_addr) =
2326 			    *(uint32_t *)in_args->host_addr;
2327 			break;
2328 
2329 		case sizeof (uint64_t):
2330 			*(uint64_t *)(in_args->dev_addr) =
2331 			    *(uint64_t *)in_args->host_addr;
2332 			break;
2333 
2334 		default:
2335 			err = DDI_FAILURE;
2336 			break;
2337 		}
2338 	} else
2339 		err = DDI_FAILURE;
2340 
2341 	/* Take down protected environment. */
2342 	no_trap();
2343 
2344 	return (err);
2345 }
2346 
2347 
2348 static int
2349 peek_mem(peekpoke_ctlops_t *in_args)
2350 {
2351 	int err = DDI_SUCCESS;
2352 	on_trap_data_t otd;
2353 
2354 	if (!on_trap(&otd, OT_DATA_ACCESS)) {
2355 		switch (in_args->size) {
2356 		case sizeof (uint8_t):
2357 			*(uint8_t *)in_args->host_addr =
2358 			    *(uint8_t *)in_args->dev_addr;
2359 			break;
2360 
2361 		case sizeof (uint16_t):
2362 			*(uint16_t *)in_args->host_addr =
2363 			    *(uint16_t *)in_args->dev_addr;
2364 			break;
2365 
2366 		case sizeof (uint32_t):
2367 			*(uint32_t *)in_args->host_addr =
2368 			    *(uint32_t *)in_args->dev_addr;
2369 			break;
2370 
2371 		case sizeof (uint64_t):
2372 			*(uint64_t *)in_args->host_addr =
2373 			    *(uint64_t *)in_args->dev_addr;
2374 			break;
2375 
2376 		default:
2377 			err = DDI_FAILURE;
2378 			break;
2379 		}
2380 	} else
2381 		err = DDI_FAILURE;
2382 
2383 	no_trap();
2384 	return (err);
2385 }
2386 
2387 
2388 /*
2389  * This is called only to process peek/poke when the DIP is NULL.
2390  * Assume that this is for memory, as nexi take care of device safe accesses.
2391  */
2392 int
2393 peekpoke_mem(ddi_ctl_enum_t cmd, peekpoke_ctlops_t *in_args)
2394 {
2395 	return (cmd == DDI_CTLOPS_PEEK ? peek_mem(in_args) : poke_mem(in_args));
2396 }
2397 
2398 /*
2399  * we've just done a cautious put/get. Check if it was successful by
2400  * calling pci_ereport_post() on all puts and for any gets that return -1
2401  */
2402 static int
2403 pci_peekpoke_check_fma(dev_info_t *dip, void *arg, ddi_ctl_enum_t ctlop,
2404     void (*scan)(dev_info_t *, ddi_fm_error_t *))
2405 {
2406 	int	rval = DDI_SUCCESS;
2407 	peekpoke_ctlops_t *in_args = (peekpoke_ctlops_t *)arg;
2408 	ddi_fm_error_t de;
2409 	ddi_acc_impl_t *hp = (ddi_acc_impl_t *)in_args->handle;
2410 	ddi_acc_hdl_t *hdlp = (ddi_acc_hdl_t *)in_args->handle;
2411 	int check_err = 0;
2412 	int repcount = in_args->repcount;
2413 
2414 	if (ctlop == DDI_CTLOPS_POKE &&
2415 	    hdlp->ah_acc.devacc_attr_access != DDI_CAUTIOUS_ACC)
2416 		return (DDI_SUCCESS);
2417 
2418 	if (ctlop == DDI_CTLOPS_PEEK &&
2419 	    hdlp->ah_acc.devacc_attr_access != DDI_CAUTIOUS_ACC) {
2420 		for (; repcount; repcount--) {
2421 			switch (in_args->size) {
2422 			case sizeof (uint8_t):
2423 				if (*(uint8_t *)in_args->host_addr == 0xff)
2424 					check_err = 1;
2425 				break;
2426 			case sizeof (uint16_t):
2427 				if (*(uint16_t *)in_args->host_addr == 0xffff)
2428 					check_err = 1;
2429 				break;
2430 			case sizeof (uint32_t):
2431 				if (*(uint32_t *)in_args->host_addr ==
2432 				    0xffffffff)
2433 					check_err = 1;
2434 				break;
2435 			case sizeof (uint64_t):
2436 				if (*(uint64_t *)in_args->host_addr ==
2437 				    0xffffffffffffffff)
2438 					check_err = 1;
2439 				break;
2440 			}
2441 		}
2442 		if (check_err == 0)
2443 			return (DDI_SUCCESS);
2444 	}
2445 	/*
2446 	 * for a cautious put or get or a non-cautious get that returned -1 call
2447 	 * io framework to see if there really was an error
2448 	 */
2449 	bzero(&de, sizeof (ddi_fm_error_t));
2450 	de.fme_version = DDI_FME_VERSION;
2451 	de.fme_ena = fm_ena_generate(0, FM_ENA_FMT1);
2452 	if (hdlp->ah_acc.devacc_attr_access == DDI_CAUTIOUS_ACC) {
2453 		de.fme_flag = DDI_FM_ERR_EXPECTED;
2454 		de.fme_acc_handle = in_args->handle;
2455 	} else if (hdlp->ah_acc.devacc_attr_access == DDI_DEFAULT_ACC) {
2456 		/*
2457 		 * We only get here with DDI_DEFAULT_ACC for config space gets.
2458 		 * Non-hardened drivers may be probing the hardware and
2459 		 * expecting -1 returned. So need to treat errors on
2460 		 * DDI_DEFAULT_ACC as DDI_FM_ERR_EXPECTED.
2461 		 */
2462 		de.fme_flag = DDI_FM_ERR_EXPECTED;
2463 		de.fme_acc_handle = in_args->handle;
2464 	} else {
2465 		/*
2466 		 * Hardened driver doing protected accesses shouldn't
2467 		 * get errors unless there's a hardware problem. Treat
2468 		 * as nonfatal if there's an error, but set UNEXPECTED
2469 		 * so we raise ereports on any errors and potentially
2470 		 * fault the device
2471 		 */
2472 		de.fme_flag = DDI_FM_ERR_UNEXPECTED;
2473 	}
2474 	(void) scan(dip, &de);
2475 	if (hdlp->ah_acc.devacc_attr_access != DDI_DEFAULT_ACC &&
2476 	    de.fme_status != DDI_FM_OK) {
2477 		ndi_err_t *errp = (ndi_err_t *)hp->ahi_err;
2478 		rval = DDI_FAILURE;
2479 		errp->err_ena = de.fme_ena;
2480 		errp->err_expected = de.fme_flag;
2481 		errp->err_status = DDI_FM_NONFATAL;
2482 	}
2483 	return (rval);
2484 }
2485 
2486 /*
2487  * pci_peekpoke_check_nofma() is for when an error occurs on a register access
2488  * during pci_ereport_post(). We can't call pci_ereport_post() again or we'd
2489  * recurse, so assume all puts are OK and gets have failed if they return -1
2490  */
2491 static int
2492 pci_peekpoke_check_nofma(void *arg, ddi_ctl_enum_t ctlop)
2493 {
2494 	int rval = DDI_SUCCESS;
2495 	peekpoke_ctlops_t *in_args = (peekpoke_ctlops_t *)arg;
2496 	ddi_acc_impl_t *hp = (ddi_acc_impl_t *)in_args->handle;
2497 	ddi_acc_hdl_t *hdlp = (ddi_acc_hdl_t *)in_args->handle;
2498 	int repcount = in_args->repcount;
2499 
2500 	if (ctlop == DDI_CTLOPS_POKE)
2501 		return (rval);
2502 
2503 	for (; repcount; repcount--) {
2504 		switch (in_args->size) {
2505 		case sizeof (uint8_t):
2506 			if (*(uint8_t *)in_args->host_addr == 0xff)
2507 				rval = DDI_FAILURE;
2508 			break;
2509 		case sizeof (uint16_t):
2510 			if (*(uint16_t *)in_args->host_addr == 0xffff)
2511 				rval = DDI_FAILURE;
2512 			break;
2513 		case sizeof (uint32_t):
2514 			if (*(uint32_t *)in_args->host_addr == 0xffffffff)
2515 				rval = DDI_FAILURE;
2516 			break;
2517 		case sizeof (uint64_t):
2518 			if (*(uint64_t *)in_args->host_addr ==
2519 			    0xffffffffffffffff)
2520 				rval = DDI_FAILURE;
2521 			break;
2522 		}
2523 	}
2524 	if (hdlp->ah_acc.devacc_attr_access != DDI_DEFAULT_ACC &&
2525 	    rval == DDI_FAILURE) {
2526 		ndi_err_t *errp = (ndi_err_t *)hp->ahi_err;
2527 		errp->err_ena = fm_ena_generate(0, FM_ENA_FMT1);
2528 		errp->err_expected = DDI_FM_ERR_UNEXPECTED;
2529 		errp->err_status = DDI_FM_NONFATAL;
2530 	}
2531 	return (rval);
2532 }
2533 
2534 int
2535 pci_peekpoke_check(dev_info_t *dip, dev_info_t *rdip,
2536     ddi_ctl_enum_t ctlop, void *arg, void *result,
2537     int (*handler)(dev_info_t *, dev_info_t *, ddi_ctl_enum_t, void *,
2538     void *), kmutex_t *err_mutexp, kmutex_t *peek_poke_mutexp,
2539     void (*scan)(dev_info_t *, ddi_fm_error_t *))
2540 {
2541 	int rval;
2542 	peekpoke_ctlops_t *in_args = (peekpoke_ctlops_t *)arg;
2543 	ddi_acc_impl_t *hp = (ddi_acc_impl_t *)in_args->handle;
2544 
2545 	/*
2546 	 * this function only supports cautious accesses, not peeks/pokes
2547 	 * which don't have a handle
2548 	 */
2549 	if (hp == NULL)
2550 		return (DDI_FAILURE);
2551 
2552 	if (hp->ahi_acc_attr & DDI_ACCATTR_CONFIG_SPACE) {
2553 		if (!mutex_tryenter(err_mutexp)) {
2554 			/*
2555 			 * As this may be a recursive call from within
2556 			 * pci_ereport_post() we can't wait for the mutexes.
2557 			 * Fortunately we know someone is already calling
2558 			 * pci_ereport_post() which will handle the error bits
2559 			 * for us, and as this is a config space access we can
2560 			 * just do the access and check return value for -1
2561 			 * using pci_peekpoke_check_nofma().
2562 			 */
2563 			rval = handler(dip, rdip, ctlop, arg, result);
2564 			if (rval == DDI_SUCCESS)
2565 				rval = pci_peekpoke_check_nofma(arg, ctlop);
2566 			return (rval);
2567 		}
2568 		/*
2569 		 * This can't be a recursive call. Drop the err_mutex and get
2570 		 * both mutexes in the right order. If an error hasn't already
2571 		 * been detected by the ontrap code, use pci_peekpoke_check_fma
2572 		 * which will call pci_ereport_post() to check error status.
2573 		 */
2574 		mutex_exit(err_mutexp);
2575 	}
2576 	mutex_enter(peek_poke_mutexp);
2577 	rval = handler(dip, rdip, ctlop, arg, result);
2578 	if (rval == DDI_SUCCESS) {
2579 		mutex_enter(err_mutexp);
2580 		rval = pci_peekpoke_check_fma(dip, arg, ctlop, scan);
2581 		mutex_exit(err_mutexp);
2582 	}
2583 	mutex_exit(peek_poke_mutexp);
2584 	return (rval);
2585 }
2586 
2587 void
2588 impl_setup_ddi(void)
2589 {
2590 #if !defined(__xpv)
2591 	extern void startup_bios_disk(void);
2592 	extern int post_fastreboot;
2593 #endif
2594 	dev_info_t *xdip, *isa_dip;
2595 	rd_existing_t rd_mem_prop;
2596 	int err;
2597 
2598 	ndi_devi_alloc_sleep(ddi_root_node(), "ramdisk",
2599 	    (pnode_t)DEVI_SID_NODEID, &xdip);
2600 
2601 	(void) BOP_GETPROP(bootops,
2602 	    "ramdisk_start", (void *)&ramdisk_start);
2603 	(void) BOP_GETPROP(bootops,
2604 	    "ramdisk_end", (void *)&ramdisk_end);
2605 
2606 #ifdef __xpv
2607 	ramdisk_start -= ONE_GIG;
2608 	ramdisk_end -= ONE_GIG;
2609 #endif
2610 	rd_mem_prop.phys = ramdisk_start;
2611 	rd_mem_prop.size = ramdisk_end - ramdisk_start + 1;
2612 
2613 	(void) ndi_prop_update_byte_array(DDI_DEV_T_NONE, xdip,
2614 	    RD_EXISTING_PROP_NAME, (uchar_t *)&rd_mem_prop,
2615 	    sizeof (rd_mem_prop));
2616 	err = ndi_devi_bind_driver(xdip, 0);
2617 	ASSERT(err == 0);
2618 
2619 	/* isa node */
2620 	if (pseudo_isa) {
2621 		ndi_devi_alloc_sleep(ddi_root_node(), "isa",
2622 		    (pnode_t)DEVI_SID_NODEID, &isa_dip);
2623 		(void) ndi_prop_update_string(DDI_DEV_T_NONE, isa_dip,
2624 		    "device_type", "isa");
2625 		(void) ndi_prop_update_string(DDI_DEV_T_NONE, isa_dip,
2626 		    "bus-type", "isa");
2627 		(void) ndi_devi_bind_driver(isa_dip, 0);
2628 	}
2629 
2630 	/*
2631 	 * Read in the properties from the boot.
2632 	 */
2633 	get_boot_properties();
2634 
2635 	/* not framebuffer should be enumerated, if present */
2636 	get_vga_properties();
2637 
2638 	/* Copy console font if provided by boot. */
2639 	get_console_font();
2640 
2641 	/*
2642 	 * Check for administratively disabled drivers.
2643 	 */
2644 	check_driver_disable();
2645 
2646 #if !defined(__xpv)
2647 	if (!post_fastreboot && BOP_GETPROPLEN(bootops, "efi-systab") < 0)
2648 		startup_bios_disk();
2649 #endif
2650 	/* do bus dependent probes. */
2651 	impl_bus_initialprobe();
2652 }
2653 
2654 dev_t
2655 getrootdev(void)
2656 {
2657 	/*
2658 	 * Usually rootfs.bo_name is initialized by the
2659 	 * the bootpath property from bootenv.rc, but
2660 	 * defaults to "/ramdisk:a" otherwise.
2661 	 */
2662 	return (ddi_pathname_to_dev_t(rootfs.bo_name));
2663 }
2664 
2665 static struct bus_probe {
2666 	struct bus_probe *next;
2667 	void (*probe)(int);
2668 } *bus_probes;
2669 
2670 void
2671 impl_bus_add_probe(void (*func)(int))
2672 {
2673 	struct bus_probe *probe;
2674 	struct bus_probe *lastprobe = NULL;
2675 
2676 	probe = kmem_alloc(sizeof (*probe), KM_SLEEP);
2677 	probe->probe = func;
2678 	probe->next = NULL;
2679 
2680 	if (!bus_probes) {
2681 		bus_probes = probe;
2682 		return;
2683 	}
2684 
2685 	lastprobe = bus_probes;
2686 	while (lastprobe->next)
2687 		lastprobe = lastprobe->next;
2688 	lastprobe->next = probe;
2689 }
2690 
2691 /*ARGSUSED*/
2692 void
2693 impl_bus_delete_probe(void (*func)(int))
2694 {
2695 	struct bus_probe *prev = NULL;
2696 	struct bus_probe *probe = bus_probes;
2697 
2698 	while (probe) {
2699 		if (probe->probe == func)
2700 			break;
2701 		prev = probe;
2702 		probe = probe->next;
2703 	}
2704 
2705 	if (probe == NULL)
2706 		return;
2707 
2708 	if (prev)
2709 		prev->next = probe->next;
2710 	else
2711 		bus_probes = probe->next;
2712 
2713 	kmem_free(probe, sizeof (struct bus_probe));
2714 }
2715 
2716 /*
2717  * impl_bus_initialprobe
2718  *	Modload the prom simulator, then let it probe to verify existence
2719  *	and type of PCI support.
2720  */
2721 static void
2722 impl_bus_initialprobe(void)
2723 {
2724 	struct bus_probe *probe;
2725 
2726 	/* load modules to install bus probes */
2727 #if defined(__xpv)
2728 	if (DOMAIN_IS_INITDOMAIN(xen_info)) {
2729 		if (modload("misc", "pci_autoconfig") < 0) {
2730 			panic("failed to load misc/pci_autoconfig");
2731 		}
2732 
2733 		if (modload("drv", "isa") < 0)
2734 			panic("failed to load drv/isa");
2735 	}
2736 
2737 	(void) modload("misc", "xpv_autoconfig");
2738 #else
2739 	if (modload("misc", "pci_autoconfig") < 0) {
2740 		panic("failed to load misc/pci_autoconfig");
2741 	}
2742 
2743 	(void) modload("misc", "acpidev");
2744 
2745 	if (modload("drv", "isa") < 0)
2746 		panic("failed to load drv/isa");
2747 #endif
2748 
2749 	probe = bus_probes;
2750 	while (probe) {
2751 		/* run the probe functions */
2752 		(*probe->probe)(0);
2753 		probe = probe->next;
2754 	}
2755 }
2756 
2757 /*
2758  * impl_bus_reprobe
2759  *	Reprogram devices not set up by firmware.
2760  */
2761 static void
2762 impl_bus_reprobe(void)
2763 {
2764 	struct bus_probe *probe;
2765 
2766 	probe = bus_probes;
2767 	while (probe) {
2768 		/* run the probe function */
2769 		(*probe->probe)(1);
2770 		probe = probe->next;
2771 	}
2772 }
2773 
2774 
2775 /*
2776  * The following functions ready a cautious request to go up to the nexus
2777  * driver.  It is up to the nexus driver to decide how to process the request.
2778  * It may choose to call i_ddi_do_caut_get/put in this file, or do it
2779  * differently.
2780  */
2781 
2782 static void
2783 i_ddi_caut_getput_ctlops(ddi_acc_impl_t *hp, uint64_t host_addr,
2784     uint64_t dev_addr, size_t size, size_t repcount, uint_t flags,
2785     ddi_ctl_enum_t cmd)
2786 {
2787 	peekpoke_ctlops_t	cautacc_ctlops_arg;
2788 
2789 	cautacc_ctlops_arg.size = size;
2790 	cautacc_ctlops_arg.dev_addr = dev_addr;
2791 	cautacc_ctlops_arg.host_addr = host_addr;
2792 	cautacc_ctlops_arg.handle = (ddi_acc_handle_t)hp;
2793 	cautacc_ctlops_arg.repcount = repcount;
2794 	cautacc_ctlops_arg.flags = flags;
2795 
2796 	(void) ddi_ctlops(hp->ahi_common.ah_dip, hp->ahi_common.ah_dip, cmd,
2797 	    &cautacc_ctlops_arg, NULL);
2798 }
2799 
2800 uint8_t
2801 i_ddi_caut_get8(ddi_acc_impl_t *hp, uint8_t *addr)
2802 {
2803 	uint8_t value;
2804 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)&value, (uintptr_t)addr,
2805 	    sizeof (uint8_t), 1, 0, DDI_CTLOPS_PEEK);
2806 
2807 	return (value);
2808 }
2809 
2810 uint16_t
2811 i_ddi_caut_get16(ddi_acc_impl_t *hp, uint16_t *addr)
2812 {
2813 	uint16_t value;
2814 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)&value, (uintptr_t)addr,
2815 	    sizeof (uint16_t), 1, 0, DDI_CTLOPS_PEEK);
2816 
2817 	return (value);
2818 }
2819 
2820 uint32_t
2821 i_ddi_caut_get32(ddi_acc_impl_t *hp, uint32_t *addr)
2822 {
2823 	uint32_t value;
2824 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)&value, (uintptr_t)addr,
2825 	    sizeof (uint32_t), 1, 0, DDI_CTLOPS_PEEK);
2826 
2827 	return (value);
2828 }
2829 
2830 uint64_t
2831 i_ddi_caut_get64(ddi_acc_impl_t *hp, uint64_t *addr)
2832 {
2833 	uint64_t value;
2834 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)&value, (uintptr_t)addr,
2835 	    sizeof (uint64_t), 1, 0, DDI_CTLOPS_PEEK);
2836 
2837 	return (value);
2838 }
2839 
2840 void
2841 i_ddi_caut_put8(ddi_acc_impl_t *hp, uint8_t *addr, uint8_t value)
2842 {
2843 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)&value, (uintptr_t)addr,
2844 	    sizeof (uint8_t), 1, 0, DDI_CTLOPS_POKE);
2845 }
2846 
2847 void
2848 i_ddi_caut_put16(ddi_acc_impl_t *hp, uint16_t *addr, uint16_t value)
2849 {
2850 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)&value, (uintptr_t)addr,
2851 	    sizeof (uint16_t), 1, 0, DDI_CTLOPS_POKE);
2852 }
2853 
2854 void
2855 i_ddi_caut_put32(ddi_acc_impl_t *hp, uint32_t *addr, uint32_t value)
2856 {
2857 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)&value, (uintptr_t)addr,
2858 	    sizeof (uint32_t), 1, 0, DDI_CTLOPS_POKE);
2859 }
2860 
2861 void
2862 i_ddi_caut_put64(ddi_acc_impl_t *hp, uint64_t *addr, uint64_t value)
2863 {
2864 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)&value, (uintptr_t)addr,
2865 	    sizeof (uint64_t), 1, 0, DDI_CTLOPS_POKE);
2866 }
2867 
2868 void
2869 i_ddi_caut_rep_get8(ddi_acc_impl_t *hp, uint8_t *host_addr, uint8_t *dev_addr,
2870     size_t repcount, uint_t flags)
2871 {
2872 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)host_addr, (uintptr_t)dev_addr,
2873 	    sizeof (uint8_t), repcount, flags, DDI_CTLOPS_PEEK);
2874 }
2875 
2876 void
2877 i_ddi_caut_rep_get16(ddi_acc_impl_t *hp, uint16_t *host_addr,
2878     uint16_t *dev_addr, size_t repcount, uint_t flags)
2879 {
2880 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)host_addr, (uintptr_t)dev_addr,
2881 	    sizeof (uint16_t), repcount, flags, DDI_CTLOPS_PEEK);
2882 }
2883 
2884 void
2885 i_ddi_caut_rep_get32(ddi_acc_impl_t *hp, uint32_t *host_addr,
2886     uint32_t *dev_addr, size_t repcount, uint_t flags)
2887 {
2888 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)host_addr, (uintptr_t)dev_addr,
2889 	    sizeof (uint32_t), repcount, flags, DDI_CTLOPS_PEEK);
2890 }
2891 
2892 void
2893 i_ddi_caut_rep_get64(ddi_acc_impl_t *hp, uint64_t *host_addr,
2894     uint64_t *dev_addr, size_t repcount, uint_t flags)
2895 {
2896 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)host_addr, (uintptr_t)dev_addr,
2897 	    sizeof (uint64_t), repcount, flags, DDI_CTLOPS_PEEK);
2898 }
2899 
2900 void
2901 i_ddi_caut_rep_put8(ddi_acc_impl_t *hp, uint8_t *host_addr, uint8_t *dev_addr,
2902     size_t repcount, uint_t flags)
2903 {
2904 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)host_addr, (uintptr_t)dev_addr,
2905 	    sizeof (uint8_t), repcount, flags, DDI_CTLOPS_POKE);
2906 }
2907 
2908 void
2909 i_ddi_caut_rep_put16(ddi_acc_impl_t *hp, uint16_t *host_addr,
2910     uint16_t *dev_addr, size_t repcount, uint_t flags)
2911 {
2912 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)host_addr, (uintptr_t)dev_addr,
2913 	    sizeof (uint16_t), repcount, flags, DDI_CTLOPS_POKE);
2914 }
2915 
2916 void
2917 i_ddi_caut_rep_put32(ddi_acc_impl_t *hp, uint32_t *host_addr,
2918     uint32_t *dev_addr, size_t repcount, uint_t flags)
2919 {
2920 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)host_addr, (uintptr_t)dev_addr,
2921 	    sizeof (uint32_t), repcount, flags, DDI_CTLOPS_POKE);
2922 }
2923 
2924 void
2925 i_ddi_caut_rep_put64(ddi_acc_impl_t *hp, uint64_t *host_addr,
2926     uint64_t *dev_addr, size_t repcount, uint_t flags)
2927 {
2928 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)host_addr, (uintptr_t)dev_addr,
2929 	    sizeof (uint64_t), repcount, flags, DDI_CTLOPS_POKE);
2930 }
2931 
2932 boolean_t
2933 i_ddi_copybuf_required(ddi_dma_attr_t *attrp)
2934 {
2935 	uint64_t hi_pa;
2936 
2937 	hi_pa = ((uint64_t)physmax + 1ull) << PAGESHIFT;
2938 	if (attrp->dma_attr_addr_hi < hi_pa) {
2939 		return (B_TRUE);
2940 	}
2941 
2942 	return (B_FALSE);
2943 }
2944 
2945 size_t
2946 i_ddi_copybuf_size()
2947 {
2948 	return (dma_max_copybuf_size);
2949 }
2950 
2951 /*
2952  * i_ddi_dma_max()
2953  *    returns the maximum DMA size which can be performed in a single DMA
2954  *    window taking into account the devices DMA contraints (attrp), the
2955  *    maximum copy buffer size (if applicable), and the worse case buffer
2956  *    fragmentation.
2957  */
2958 /*ARGSUSED*/
2959 uint32_t
2960 i_ddi_dma_max(dev_info_t *dip, ddi_dma_attr_t *attrp)
2961 {
2962 	uint64_t maxxfer;
2963 
2964 
2965 	/*
2966 	 * take the min of maxxfer and the the worse case fragementation
2967 	 * (e.g. every cookie <= 1 page)
2968 	 */
2969 	maxxfer = MIN(attrp->dma_attr_maxxfer,
2970 	    ((uint64_t)(attrp->dma_attr_sgllen - 1) << PAGESHIFT));
2971 
2972 	/*
2973 	 * If the DMA engine can't reach all off memory, we also need to take
2974 	 * the max size of the copybuf into consideration.
2975 	 */
2976 	if (i_ddi_copybuf_required(attrp)) {
2977 		maxxfer = MIN(i_ddi_copybuf_size(), maxxfer);
2978 	}
2979 
2980 	/*
2981 	 * we only return a 32-bit value. Make sure it's not -1. Round to a
2982 	 * page so it won't be mistaken for an error value during debug.
2983 	 */
2984 	if (maxxfer >= 0xFFFFFFFF) {
2985 		maxxfer = 0xFFFFF000;
2986 	}
2987 
2988 	/*
2989 	 * make sure the value we return is a whole multiple of the
2990 	 * granlarity.
2991 	 */
2992 	if (attrp->dma_attr_granular > 1) {
2993 		maxxfer = maxxfer - (maxxfer % attrp->dma_attr_granular);
2994 	}
2995 
2996 	return ((uint32_t)maxxfer);
2997 }
2998 
2999 /*ARGSUSED*/
3000 void
3001 translate_devid(dev_info_t *dip)
3002 {
3003 }
3004 
3005 pfn_t
3006 i_ddi_paddr_to_pfn(paddr_t paddr)
3007 {
3008 	pfn_t pfn;
3009 
3010 #ifdef __xpv
3011 	if (DOMAIN_IS_INITDOMAIN(xen_info)) {
3012 		pfn = xen_assign_pfn(mmu_btop(paddr));
3013 	} else {
3014 		pfn = mmu_btop(paddr);
3015 	}
3016 #else
3017 	pfn = mmu_btop(paddr);
3018 #endif
3019 
3020 	return (pfn);
3021 }
3022