xref: /illumos-gate/usr/src/uts/i86pc/os/ddi_impl.c (revision 13b136d3061155363c62c9f6568d25b8b27da8f6)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Copyright 2012 Garrett D'Amore <garrett@damore.org>
25  * Copyright 2014 Pluribus Networks, Inc.
26  * Copyright 2016 Nexenta Systems, Inc.
27  */
28 
29 /*
30  * PC specific DDI implementation
31  */
32 #include <sys/types.h>
33 #include <sys/autoconf.h>
34 #include <sys/avintr.h>
35 #include <sys/bootconf.h>
36 #include <sys/conf.h>
37 #include <sys/cpuvar.h>
38 #include <sys/ddi_impldefs.h>
39 #include <sys/ddi_subrdefs.h>
40 #include <sys/ethernet.h>
41 #include <sys/fp.h>
42 #include <sys/instance.h>
43 #include <sys/kmem.h>
44 #include <sys/machsystm.h>
45 #include <sys/modctl.h>
46 #include <sys/promif.h>
47 #include <sys/prom_plat.h>
48 #include <sys/sunndi.h>
49 #include <sys/ndi_impldefs.h>
50 #include <sys/ddi_impldefs.h>
51 #include <sys/sysmacros.h>
52 #include <sys/systeminfo.h>
53 #include <sys/utsname.h>
54 #include <sys/atomic.h>
55 #include <sys/spl.h>
56 #include <sys/archsystm.h>
57 #include <vm/seg_kmem.h>
58 #include <sys/ontrap.h>
59 #include <sys/fm/protocol.h>
60 #include <sys/ramdisk.h>
61 #include <sys/sunndi.h>
62 #include <sys/vmem.h>
63 #include <sys/pci_impl.h>
64 #if defined(__xpv)
65 #include <sys/hypervisor.h>
66 #endif
67 #include <sys/mach_intr.h>
68 #include <vm/hat_i86.h>
69 #include <sys/x86_archext.h>
70 #include <sys/avl.h>
71 #include <sys/font.h>
72 
73 /*
74  * DDI Boot Configuration
75  */
76 
77 /*
78  * Platform drivers on this platform
79  */
80 char *platform_module_list[] = {
81 	"acpippm",
82 	"ppm",
83 	(char *)0
84 };
85 
86 /* pci bus resource maps */
87 struct pci_bus_resource *pci_bus_res;
88 
89 size_t dma_max_copybuf_size = 0x101000;		/* 1M + 4K */
90 
91 uint64_t ramdisk_start, ramdisk_end;
92 
93 int pseudo_isa = 0;
94 
95 /*
96  * Forward declarations
97  */
98 static int getlongprop_buf();
99 static void get_boot_properties(void);
100 static void impl_bus_initialprobe(void);
101 static void impl_bus_reprobe(void);
102 
103 static int poke_mem(peekpoke_ctlops_t *in_args);
104 static int peek_mem(peekpoke_ctlops_t *in_args);
105 
106 static int kmem_override_cache_attrs(caddr_t, size_t, uint_t);
107 
108 #if defined(__amd64) && !defined(__xpv)
109 extern void immu_init(void);
110 #endif
111 
112 /*
113  * We use an AVL tree to store contiguous address allocations made with the
114  * kalloca() routine, so that we can return the size to free with kfreea().
115  * Note that in the future it would be vastly faster if we could eliminate
116  * this lookup by insisting that all callers keep track of their own sizes,
117  * just as for kmem_alloc().
118  */
119 struct ctgas {
120 	avl_node_t ctg_link;
121 	void *ctg_addr;
122 	size_t ctg_size;
123 };
124 
125 static avl_tree_t ctgtree;
126 
127 static kmutex_t		ctgmutex;
128 #define	CTGLOCK()	mutex_enter(&ctgmutex)
129 #define	CTGUNLOCK()	mutex_exit(&ctgmutex)
130 
131 /*
132  * Minimum pfn value of page_t's put on the free list.  This is to simplify
133  * support of ddi dma memory requests which specify small, non-zero addr_lo
134  * values.
135  *
136  * The default value of 2, which corresponds to the only known non-zero addr_lo
137  * value used, means a single page will be sacrificed (pfn typically starts
138  * at 1).  ddiphysmin can be set to 0 to disable. It cannot be set above 0x100
139  * otherwise mp startup panics.
140  */
141 pfn_t	ddiphysmin = 2;
142 
143 static void
144 check_driver_disable(void)
145 {
146 	int proplen = 128;
147 	char *prop_name;
148 	char *drv_name, *propval;
149 	major_t major;
150 
151 	prop_name = kmem_alloc(proplen, KM_SLEEP);
152 	for (major = 0; major < devcnt; major++) {
153 		drv_name = ddi_major_to_name(major);
154 		if (drv_name == NULL)
155 			continue;
156 		(void) snprintf(prop_name, proplen, "disable-%s", drv_name);
157 		if (ddi_prop_lookup_string(DDI_DEV_T_ANY, ddi_root_node(),
158 		    DDI_PROP_DONTPASS, prop_name, &propval) == DDI_SUCCESS) {
159 			if (strcmp(propval, "true") == 0) {
160 				devnamesp[major].dn_flags |= DN_DRIVER_REMOVED;
161 				cmn_err(CE_NOTE, "driver %s disabled",
162 				    drv_name);
163 			}
164 			ddi_prop_free(propval);
165 		}
166 	}
167 	kmem_free(prop_name, proplen);
168 }
169 
170 
171 /*
172  * Configure the hardware on the system.
173  * Called before the rootfs is mounted
174  */
175 void
176 configure(void)
177 {
178 	extern void i_ddi_init_root();
179 
180 #if defined(__i386)
181 	extern int fpu_pentium_fdivbug;
182 #endif	/* __i386 */
183 	extern int fpu_ignored;
184 
185 	/*
186 	 * Determine if an FPU is attached
187 	 */
188 
189 	fpu_probe();
190 
191 #if defined(__i386)
192 	if (fpu_pentium_fdivbug) {
193 		printf("\
194 FP hardware exhibits Pentium floating point divide problem\n");
195 	}
196 #endif	/* __i386 */
197 
198 	if (fpu_ignored) {
199 		printf("FP hardware will not be used\n");
200 	} else if (!fpu_exists) {
201 		printf("No FPU in configuration\n");
202 	}
203 
204 	/*
205 	 * Initialize devices on the machine.
206 	 * Uses configuration tree built by the PROMs to determine what
207 	 * is present, and builds a tree of prototype dev_info nodes
208 	 * corresponding to the hardware which identified itself.
209 	 */
210 
211 	/*
212 	 * Initialize root node.
213 	 */
214 	i_ddi_init_root();
215 
216 	/* reprogram devices not set up by firmware (BIOS) */
217 	impl_bus_reprobe();
218 
219 #if defined(__amd64) && !defined(__xpv)
220 	/*
221 	 * Setup but don't startup the IOMMU
222 	 * Startup happens later via a direct call
223 	 * to IOMMU code by boot code.
224 	 * At this point, all PCI bus renumbering
225 	 * is done, so safe to init the IMMU
226 	 * AKA Intel IOMMU.
227 	 */
228 	immu_init();
229 #endif
230 
231 	/*
232 	 * attach the isa nexus to get ACPI resource usage
233 	 * isa is "kind of" a pseudo node
234 	 */
235 #if defined(__xpv)
236 	if (DOMAIN_IS_INITDOMAIN(xen_info)) {
237 		if (pseudo_isa)
238 			(void) i_ddi_attach_pseudo_node("isa");
239 		else
240 			(void) i_ddi_attach_hw_nodes("isa");
241 	}
242 #else
243 	if (pseudo_isa)
244 		(void) i_ddi_attach_pseudo_node("isa");
245 	else
246 		(void) i_ddi_attach_hw_nodes("isa");
247 #endif
248 }
249 
250 /*
251  * The "status" property indicates the operational status of a device.
252  * If this property is present, the value is a string indicating the
253  * status of the device as follows:
254  *
255  *	"okay"		operational.
256  *	"disabled"	not operational, but might become operational.
257  *	"fail"		not operational because a fault has been detected,
258  *			and it is unlikely that the device will become
259  *			operational without repair. no additional details
260  *			are available.
261  *	"fail-xxx"	not operational because a fault has been detected,
262  *			and it is unlikely that the device will become
263  *			operational without repair. "xxx" is additional
264  *			human-readable information about the particular
265  *			fault condition that was detected.
266  *
267  * The absence of this property means that the operational status is
268  * unknown or okay.
269  *
270  * This routine checks the status property of the specified device node
271  * and returns 0 if the operational status indicates failure, and 1 otherwise.
272  *
273  * The property may exist on plug-in cards the existed before IEEE 1275-1994.
274  * And, in that case, the property may not even be a string. So we carefully
275  * check for the value "fail", in the beginning of the string, noting
276  * the property length.
277  */
278 int
279 status_okay(int id, char *buf, int buflen)
280 {
281 	char status_buf[OBP_MAXPROPNAME];
282 	char *bufp = buf;
283 	int len = buflen;
284 	int proplen;
285 	static const char *status = "status";
286 	static const char *fail = "fail";
287 	int fail_len = (int)strlen(fail);
288 
289 	/*
290 	 * Get the proplen ... if it's smaller than "fail",
291 	 * or doesn't exist ... then we don't care, since
292 	 * the value can't begin with the char string "fail".
293 	 *
294 	 * NB: proplen, if it's a string, includes the NULL in the
295 	 * the size of the property, and fail_len does not.
296 	 */
297 	proplen = prom_getproplen((pnode_t)id, (caddr_t)status);
298 	if (proplen <= fail_len)	/* nonexistant or uninteresting len */
299 		return (1);
300 
301 	/*
302 	 * if a buffer was provided, use it
303 	 */
304 	if ((buf == (char *)NULL) || (buflen <= 0)) {
305 		bufp = status_buf;
306 		len = sizeof (status_buf);
307 	}
308 	*bufp = (char)0;
309 
310 	/*
311 	 * Get the property into the buffer, to the extent of the buffer,
312 	 * and in case the buffer is smaller than the property size,
313 	 * NULL terminate the buffer. (This handles the case where
314 	 * a buffer was passed in and the caller wants to print the
315 	 * value, but the buffer was too small).
316 	 */
317 	(void) prom_bounded_getprop((pnode_t)id, (caddr_t)status,
318 	    (caddr_t)bufp, len);
319 	*(bufp + len - 1) = (char)0;
320 
321 	/*
322 	 * If the value begins with the char string "fail",
323 	 * then it means the node is failed. We don't care
324 	 * about any other values. We assume the node is ok
325 	 * although it might be 'disabled'.
326 	 */
327 	if (strncmp(bufp, fail, fail_len) == 0)
328 		return (0);
329 
330 	return (1);
331 }
332 
333 /*
334  * Check the status of the device node passed as an argument.
335  *
336  *	if ((status is OKAY) || (status is DISABLED))
337  *		return DDI_SUCCESS
338  *	else
339  *		print a warning and return DDI_FAILURE
340  */
341 /*ARGSUSED1*/
342 int
343 check_status(int id, char *name, dev_info_t *parent)
344 {
345 	char status_buf[64];
346 	char devtype_buf[OBP_MAXPROPNAME];
347 	int retval = DDI_FAILURE;
348 
349 	/*
350 	 * is the status okay?
351 	 */
352 	if (status_okay(id, status_buf, sizeof (status_buf)))
353 		return (DDI_SUCCESS);
354 
355 	/*
356 	 * a status property indicating bad memory will be associated
357 	 * with a node which has a "device_type" property with a value of
358 	 * "memory-controller". in this situation, return DDI_SUCCESS
359 	 */
360 	if (getlongprop_buf(id, OBP_DEVICETYPE, devtype_buf,
361 	    sizeof (devtype_buf)) > 0) {
362 		if (strcmp(devtype_buf, "memory-controller") == 0)
363 			retval = DDI_SUCCESS;
364 	}
365 
366 	/*
367 	 * print the status property information
368 	 */
369 	cmn_err(CE_WARN, "status '%s' for '%s'", status_buf, name);
370 	return (retval);
371 }
372 
373 /*ARGSUSED*/
374 uint_t
375 softlevel1(caddr_t arg1, caddr_t arg2)
376 {
377 	softint();
378 	return (1);
379 }
380 
381 /*
382  * Allow for implementation specific correction of PROM property values.
383  */
384 
385 /*ARGSUSED*/
386 void
387 impl_fix_props(dev_info_t *dip, dev_info_t *ch_dip, char *name, int len,
388     caddr_t buffer)
389 {
390 	/*
391 	 * There are no adjustments needed in this implementation.
392 	 */
393 }
394 
395 static int
396 getlongprop_buf(int id, char *name, char *buf, int maxlen)
397 {
398 	int size;
399 
400 	size = prom_getproplen((pnode_t)id, name);
401 	if (size <= 0 || (size > maxlen - 1))
402 		return (-1);
403 
404 	if (-1 == prom_getprop((pnode_t)id, name, buf))
405 		return (-1);
406 
407 	if (strcmp("name", name) == 0) {
408 		if (buf[size - 1] != '\0') {
409 			buf[size] = '\0';
410 			size += 1;
411 		}
412 	}
413 
414 	return (size);
415 }
416 
417 static int
418 get_prop_int_array(dev_info_t *di, char *pname, int **pval, uint_t *plen)
419 {
420 	int ret;
421 
422 	if ((ret = ddi_prop_lookup_int_array(DDI_DEV_T_ANY, di,
423 	    DDI_PROP_DONTPASS, pname, pval, plen))
424 	    == DDI_PROP_SUCCESS) {
425 		*plen = (*plen) * (sizeof (int));
426 	}
427 	return (ret);
428 }
429 
430 
431 /*
432  * Node Configuration
433  */
434 
435 struct prop_ispec {
436 	uint_t	pri, vec;
437 };
438 
439 /*
440  * For the x86, we're prepared to claim that the interrupt string
441  * is in the form of a list of <ipl,vec> specifications.
442  */
443 
444 #define	VEC_MIN	1
445 #define	VEC_MAX	255
446 
447 static int
448 impl_xlate_intrs(dev_info_t *child, int *in,
449     struct ddi_parent_private_data *pdptr)
450 {
451 	size_t size;
452 	int n;
453 	struct intrspec *new;
454 	caddr_t got_prop;
455 	int *inpri;
456 	int got_len;
457 	extern int ignore_hardware_nodes;	/* force flag from ddi_impl.c */
458 
459 	static char bad_intr_fmt[] =
460 	    "bad interrupt spec from %s%d - ipl %d, irq %d\n";
461 
462 	/*
463 	 * determine if the driver is expecting the new style "interrupts"
464 	 * property which just contains the IRQ, or the old style which
465 	 * contains pairs of <IPL,IRQ>.  if it is the new style, we always
466 	 * assign IPL 5 unless an "interrupt-priorities" property exists.
467 	 * in that case, the "interrupt-priorities" property contains the
468 	 * IPL values that match, one for one, the IRQ values in the
469 	 * "interrupts" property.
470 	 */
471 	inpri = NULL;
472 	if ((ddi_getprop(DDI_DEV_T_ANY, child, DDI_PROP_DONTPASS,
473 	    "ignore-hardware-nodes", -1) != -1) || ignore_hardware_nodes) {
474 		/* the old style "interrupts" property... */
475 
476 		/*
477 		 * The list consists of <ipl,vec> elements
478 		 */
479 		if ((n = (*in++ >> 1)) < 1)
480 			return (DDI_FAILURE);
481 
482 		pdptr->par_nintr = n;
483 		size = n * sizeof (struct intrspec);
484 		new = pdptr->par_intr = kmem_zalloc(size, KM_SLEEP);
485 
486 		while (n--) {
487 			int level = *in++;
488 			int vec = *in++;
489 
490 			if (level < 1 || level > MAXIPL ||
491 			    vec < VEC_MIN || vec > VEC_MAX) {
492 				cmn_err(CE_CONT, bad_intr_fmt,
493 				    DEVI(child)->devi_name,
494 				    DEVI(child)->devi_instance, level, vec);
495 				goto broken;
496 			}
497 			new->intrspec_pri = level;
498 			if (vec != 2)
499 				new->intrspec_vec = vec;
500 			else
501 				/*
502 				 * irq 2 on the PC bus is tied to irq 9
503 				 * on ISA, EISA and MicroChannel
504 				 */
505 				new->intrspec_vec = 9;
506 			new++;
507 		}
508 
509 		return (DDI_SUCCESS);
510 	} else {
511 		/* the new style "interrupts" property... */
512 
513 		/*
514 		 * The list consists of <vec> elements
515 		 */
516 		if ((n = (*in++)) < 1)
517 			return (DDI_FAILURE);
518 
519 		pdptr->par_nintr = n;
520 		size = n * sizeof (struct intrspec);
521 		new = pdptr->par_intr = kmem_zalloc(size, KM_SLEEP);
522 
523 		/* XXX check for "interrupt-priorities" property... */
524 		if (ddi_getlongprop(DDI_DEV_T_ANY, child, DDI_PROP_DONTPASS,
525 		    "interrupt-priorities", (caddr_t)&got_prop, &got_len)
526 		    == DDI_PROP_SUCCESS) {
527 			if (n != (got_len / sizeof (int))) {
528 				cmn_err(CE_CONT,
529 				    "bad interrupt-priorities length"
530 				    " from %s%d: expected %d, got %d\n",
531 				    DEVI(child)->devi_name,
532 				    DEVI(child)->devi_instance, n,
533 				    (int)(got_len / sizeof (int)));
534 				goto broken;
535 			}
536 			inpri = (int *)got_prop;
537 		}
538 
539 		while (n--) {
540 			int level;
541 			int vec = *in++;
542 
543 			if (inpri == NULL)
544 				level = 5;
545 			else
546 				level = *inpri++;
547 
548 			if (level < 1 || level > MAXIPL ||
549 			    vec < VEC_MIN || vec > VEC_MAX) {
550 				cmn_err(CE_CONT, bad_intr_fmt,
551 				    DEVI(child)->devi_name,
552 				    DEVI(child)->devi_instance, level, vec);
553 				goto broken;
554 			}
555 			new->intrspec_pri = level;
556 			if (vec != 2)
557 				new->intrspec_vec = vec;
558 			else
559 				/*
560 				 * irq 2 on the PC bus is tied to irq 9
561 				 * on ISA, EISA and MicroChannel
562 				 */
563 				new->intrspec_vec = 9;
564 			new++;
565 		}
566 
567 		if (inpri != NULL)
568 			kmem_free(got_prop, got_len);
569 		return (DDI_SUCCESS);
570 	}
571 
572 broken:
573 	kmem_free(pdptr->par_intr, size);
574 	pdptr->par_intr = NULL;
575 	pdptr->par_nintr = 0;
576 	if (inpri != NULL)
577 		kmem_free(got_prop, got_len);
578 
579 	return (DDI_FAILURE);
580 }
581 
582 /*
583  * Create a ddi_parent_private_data structure from the ddi properties of
584  * the dev_info node.
585  *
586  * The "reg" and either an "intr" or "interrupts" properties are required
587  * if the driver wishes to create mappings or field interrupts on behalf
588  * of the device.
589  *
590  * The "reg" property is assumed to be a list of at least one triple
591  *
592  *	<bustype, address, size>*1
593  *
594  * The "intr" property is assumed to be a list of at least one duple
595  *
596  *	<SPARC ipl, vector#>*1
597  *
598  * The "interrupts" property is assumed to be a list of at least one
599  * n-tuples that describes the interrupt capabilities of the bus the device
600  * is connected to.  For SBus, this looks like
601  *
602  *	<SBus-level>*1
603  *
604  * (This property obsoletes the 'intr' property).
605  *
606  * The "ranges" property is optional.
607  */
608 void
609 make_ddi_ppd(dev_info_t *child, struct ddi_parent_private_data **ppd)
610 {
611 	struct ddi_parent_private_data *pdptr;
612 	int n;
613 	int *reg_prop, *rng_prop, *intr_prop, *irupts_prop;
614 	uint_t reg_len, rng_len, intr_len, irupts_len;
615 
616 	*ppd = pdptr = kmem_zalloc(sizeof (*pdptr), KM_SLEEP);
617 
618 	/*
619 	 * Handle the 'reg' property.
620 	 */
621 	if ((get_prop_int_array(child, "reg", &reg_prop, &reg_len) ==
622 	    DDI_PROP_SUCCESS) && (reg_len != 0)) {
623 		pdptr->par_nreg = reg_len / (int)sizeof (struct regspec);
624 		pdptr->par_reg = (struct regspec *)reg_prop;
625 	}
626 
627 	/*
628 	 * See if I have a range (adding one where needed - this
629 	 * means to add one for sbus node in sun4c, when romvec > 0,
630 	 * if no range is already defined in the PROM node.
631 	 * (Currently no sun4c PROMS define range properties,
632 	 * but they should and may in the future.)  For the SBus
633 	 * node, the range is defined by the SBus reg property.
634 	 */
635 	if (get_prop_int_array(child, "ranges", &rng_prop, &rng_len)
636 	    == DDI_PROP_SUCCESS) {
637 		pdptr->par_nrng = rng_len / (int)(sizeof (struct rangespec));
638 		pdptr->par_rng = (struct rangespec *)rng_prop;
639 	}
640 
641 	/*
642 	 * Handle the 'intr' and 'interrupts' properties
643 	 */
644 
645 	/*
646 	 * For backwards compatibility
647 	 * we first look for the 'intr' property for the device.
648 	 */
649 	if (get_prop_int_array(child, "intr", &intr_prop, &intr_len)
650 	    != DDI_PROP_SUCCESS) {
651 		intr_len = 0;
652 	}
653 
654 	/*
655 	 * If we're to support bus adapters and future platforms cleanly,
656 	 * we need to support the generalized 'interrupts' property.
657 	 */
658 	if (get_prop_int_array(child, "interrupts", &irupts_prop,
659 	    &irupts_len) != DDI_PROP_SUCCESS) {
660 		irupts_len = 0;
661 	} else if (intr_len != 0) {
662 		/*
663 		 * If both 'intr' and 'interrupts' are defined,
664 		 * then 'interrupts' wins and we toss the 'intr' away.
665 		 */
666 		ddi_prop_free((void *)intr_prop);
667 		intr_len = 0;
668 	}
669 
670 	if (intr_len != 0) {
671 
672 		/*
673 		 * Translate the 'intr' property into an array
674 		 * an array of struct intrspec's.  There's not really
675 		 * very much to do here except copy what's out there.
676 		 */
677 
678 		struct intrspec *new;
679 		struct prop_ispec *l;
680 
681 		n = pdptr->par_nintr = intr_len / sizeof (struct prop_ispec);
682 		l = (struct prop_ispec *)intr_prop;
683 		pdptr->par_intr =
684 		    new = kmem_zalloc(n * sizeof (struct intrspec), KM_SLEEP);
685 		while (n--) {
686 			new->intrspec_pri = l->pri;
687 			new->intrspec_vec = l->vec;
688 			new++;
689 			l++;
690 		}
691 		ddi_prop_free((void *)intr_prop);
692 
693 	} else if ((n = irupts_len) != 0) {
694 		size_t size;
695 		int *out;
696 
697 		/*
698 		 * Translate the 'interrupts' property into an array
699 		 * of intrspecs for the rest of the DDI framework to
700 		 * toy with.  Only our ancestors really know how to
701 		 * do this, so ask 'em.  We massage the 'interrupts'
702 		 * property so that it is pre-pended by a count of
703 		 * the number of integers in the argument.
704 		 */
705 		size = sizeof (int) + n;
706 		out = kmem_alloc(size, KM_SLEEP);
707 		*out = n / sizeof (int);
708 		bcopy(irupts_prop, out + 1, (size_t)n);
709 		ddi_prop_free((void *)irupts_prop);
710 		if (impl_xlate_intrs(child, out, pdptr) != DDI_SUCCESS) {
711 			cmn_err(CE_CONT,
712 			    "Unable to translate 'interrupts' for %s%d\n",
713 			    DEVI(child)->devi_binding_name,
714 			    DEVI(child)->devi_instance);
715 		}
716 		kmem_free(out, size);
717 	}
718 }
719 
720 /*
721  * Name a child
722  */
723 static int
724 impl_sunbus_name_child(dev_info_t *child, char *name, int namelen)
725 {
726 	/*
727 	 * Fill in parent-private data and this function returns to us
728 	 * an indication if it used "registers" to fill in the data.
729 	 */
730 	if (ddi_get_parent_data(child) == NULL) {
731 		struct ddi_parent_private_data *pdptr;
732 		make_ddi_ppd(child, &pdptr);
733 		ddi_set_parent_data(child, pdptr);
734 	}
735 
736 	name[0] = '\0';
737 	if (sparc_pd_getnreg(child) > 0) {
738 		(void) snprintf(name, namelen, "%x,%x",
739 		    (uint_t)sparc_pd_getreg(child, 0)->regspec_bustype,
740 		    (uint_t)sparc_pd_getreg(child, 0)->regspec_addr);
741 	}
742 
743 	return (DDI_SUCCESS);
744 }
745 
746 /*
747  * Called from the bus_ctl op of sunbus (sbus, obio, etc) nexus drivers
748  * to implement the DDI_CTLOPS_INITCHILD operation.  That is, it names
749  * the children of sun busses based on the reg spec.
750  *
751  * Handles the following properties (in make_ddi_ppd):
752  *	Property		value
753  *	  Name			type
754  *	reg		register spec
755  *	intr		old-form interrupt spec
756  *	interrupts	new (bus-oriented) interrupt spec
757  *	ranges		range spec
758  */
759 int
760 impl_ddi_sunbus_initchild(dev_info_t *child)
761 {
762 	char name[MAXNAMELEN];
763 	void impl_ddi_sunbus_removechild(dev_info_t *);
764 
765 	/*
766 	 * Name the child, also makes parent private data
767 	 */
768 	(void) impl_sunbus_name_child(child, name, MAXNAMELEN);
769 	ddi_set_name_addr(child, name);
770 
771 	/*
772 	 * Attempt to merge a .conf node; if successful, remove the
773 	 * .conf node.
774 	 */
775 	if ((ndi_dev_is_persistent_node(child) == 0) &&
776 	    (ndi_merge_node(child, impl_sunbus_name_child) == DDI_SUCCESS)) {
777 		/*
778 		 * Return failure to remove node
779 		 */
780 		impl_ddi_sunbus_removechild(child);
781 		return (DDI_FAILURE);
782 	}
783 	return (DDI_SUCCESS);
784 }
785 
786 void
787 impl_free_ddi_ppd(dev_info_t *dip)
788 {
789 	struct ddi_parent_private_data *pdptr;
790 	size_t n;
791 
792 	if ((pdptr = ddi_get_parent_data(dip)) == NULL)
793 		return;
794 
795 	if ((n = (size_t)pdptr->par_nintr) != 0)
796 		/*
797 		 * Note that kmem_free is used here (instead of
798 		 * ddi_prop_free) because the contents of the
799 		 * property were placed into a separate buffer and
800 		 * mucked with a bit before being stored in par_intr.
801 		 * The actual return value from the prop lookup
802 		 * was freed with ddi_prop_free previously.
803 		 */
804 		kmem_free(pdptr->par_intr, n * sizeof (struct intrspec));
805 
806 	if ((n = (size_t)pdptr->par_nrng) != 0)
807 		ddi_prop_free((void *)pdptr->par_rng);
808 
809 	if ((n = pdptr->par_nreg) != 0)
810 		ddi_prop_free((void *)pdptr->par_reg);
811 
812 	kmem_free(pdptr, sizeof (*pdptr));
813 	ddi_set_parent_data(dip, NULL);
814 }
815 
816 void
817 impl_ddi_sunbus_removechild(dev_info_t *dip)
818 {
819 	impl_free_ddi_ppd(dip);
820 	ddi_set_name_addr(dip, NULL);
821 	/*
822 	 * Strip the node to properly convert it back to prototype form
823 	 */
824 	impl_rem_dev_props(dip);
825 }
826 
827 /*
828  * DDI Interrupt
829  */
830 
831 /*
832  * turn this on to force isa, eisa, and mca device to ignore the new
833  * hardware nodes in the device tree (normally turned on only for
834  * drivers that need it by setting the property "ignore-hardware-nodes"
835  * in their driver.conf file).
836  *
837  * 7/31/96 -- Turned off globally.  Leaving variable in for the moment
838  *		as safety valve.
839  */
840 int ignore_hardware_nodes = 0;
841 
842 /*
843  * Local data
844  */
845 static struct impl_bus_promops *impl_busp;
846 
847 
848 /*
849  * New DDI interrupt framework
850  */
851 
852 /*
853  * i_ddi_intr_ops:
854  *
855  * This is the interrupt operator function wrapper for the bus function
856  * bus_intr_op.
857  */
858 int
859 i_ddi_intr_ops(dev_info_t *dip, dev_info_t *rdip, ddi_intr_op_t op,
860     ddi_intr_handle_impl_t *hdlp, void * result)
861 {
862 	dev_info_t	*pdip = (dev_info_t *)DEVI(dip)->devi_parent;
863 	int		ret = DDI_FAILURE;
864 
865 	/* request parent to process this interrupt op */
866 	if (NEXUS_HAS_INTR_OP(pdip))
867 		ret = (*(DEVI(pdip)->devi_ops->devo_bus_ops->bus_intr_op))(
868 		    pdip, rdip, op, hdlp, result);
869 	else
870 		cmn_err(CE_WARN, "Failed to process interrupt "
871 		    "for %s%d due to down-rev nexus driver %s%d",
872 		    ddi_get_name(rdip), ddi_get_instance(rdip),
873 		    ddi_get_name(pdip), ddi_get_instance(pdip));
874 	return (ret);
875 }
876 
877 /*
878  * i_ddi_add_softint - allocate and add a soft interrupt to the system
879  */
880 int
881 i_ddi_add_softint(ddi_softint_hdl_impl_t *hdlp)
882 {
883 	int ret;
884 
885 	/* add soft interrupt handler */
886 	ret = add_avsoftintr((void *)hdlp, hdlp->ih_pri, hdlp->ih_cb_func,
887 	    DEVI(hdlp->ih_dip)->devi_name, hdlp->ih_cb_arg1, hdlp->ih_cb_arg2);
888 	return (ret ? DDI_SUCCESS : DDI_FAILURE);
889 }
890 
891 
892 void
893 i_ddi_remove_softint(ddi_softint_hdl_impl_t *hdlp)
894 {
895 	(void) rem_avsoftintr((void *)hdlp, hdlp->ih_pri, hdlp->ih_cb_func);
896 }
897 
898 
899 extern void (*setsoftint)(int, struct av_softinfo *);
900 extern boolean_t av_check_softint_pending(struct av_softinfo *, boolean_t);
901 
902 int
903 i_ddi_trigger_softint(ddi_softint_hdl_impl_t *hdlp, void *arg2)
904 {
905 	if (av_check_softint_pending(hdlp->ih_pending, B_FALSE))
906 		return (DDI_EPENDING);
907 
908 	update_avsoftintr_args((void *)hdlp, hdlp->ih_pri, arg2);
909 
910 	(*setsoftint)(hdlp->ih_pri, hdlp->ih_pending);
911 	return (DDI_SUCCESS);
912 }
913 
914 /*
915  * i_ddi_set_softint_pri:
916  *
917  * The way this works is that it first tries to add a softint vector
918  * at the new priority in hdlp. If that succeeds; then it removes the
919  * existing softint vector at the old priority.
920  */
921 int
922 i_ddi_set_softint_pri(ddi_softint_hdl_impl_t *hdlp, uint_t old_pri)
923 {
924 	int ret;
925 
926 	/*
927 	 * If a softint is pending at the old priority then fail the request.
928 	 */
929 	if (av_check_softint_pending(hdlp->ih_pending, B_TRUE))
930 		return (DDI_FAILURE);
931 
932 	ret = av_softint_movepri((void *)hdlp, old_pri);
933 	return (ret ? DDI_SUCCESS : DDI_FAILURE);
934 }
935 
936 void
937 i_ddi_alloc_intr_phdl(ddi_intr_handle_impl_t *hdlp)
938 {
939 	hdlp->ih_private = (void *)kmem_zalloc(sizeof (ihdl_plat_t), KM_SLEEP);
940 }
941 
942 void
943 i_ddi_free_intr_phdl(ddi_intr_handle_impl_t *hdlp)
944 {
945 	kmem_free(hdlp->ih_private, sizeof (ihdl_plat_t));
946 	hdlp->ih_private = NULL;
947 }
948 
949 int
950 i_ddi_get_intx_nintrs(dev_info_t *dip)
951 {
952 	struct ddi_parent_private_data *pdp;
953 
954 	if ((pdp = ddi_get_parent_data(dip)) == NULL)
955 		return (0);
956 
957 	return (pdp->par_nintr);
958 }
959 
960 /*
961  * DDI Memory/DMA
962  */
963 
964 /*
965  * Support for allocating DMAable memory to implement
966  * ddi_dma_mem_alloc(9F) interface.
967  */
968 
969 #define	KA_ALIGN_SHIFT	7
970 #define	KA_ALIGN	(1 << KA_ALIGN_SHIFT)
971 #define	KA_NCACHE	(PAGESHIFT + 1 - KA_ALIGN_SHIFT)
972 
973 /*
974  * Dummy DMA attribute template for kmem_io[].kmem_io_attr.  We only
975  * care about addr_lo, addr_hi, and align.  addr_hi will be dynamically set.
976  */
977 
978 static ddi_dma_attr_t kmem_io_attr = {
979 	DMA_ATTR_V0,
980 	0x0000000000000000ULL,		/* dma_attr_addr_lo */
981 	0x0000000000000000ULL,		/* dma_attr_addr_hi */
982 	0x00ffffff,
983 	0x1000,				/* dma_attr_align */
984 	1, 1, 0xffffffffULL, 0xffffffffULL, 0x1, 1, 0
985 };
986 
987 /* kmem io memory ranges and indices */
988 enum {
989 	IO_4P, IO_64G, IO_4G, IO_2G, IO_1G, IO_512M,
990 	IO_256M, IO_128M, IO_64M, IO_32M, IO_16M, MAX_MEM_RANGES
991 };
992 
993 static struct {
994 	vmem_t		*kmem_io_arena;
995 	kmem_cache_t	*kmem_io_cache[KA_NCACHE];
996 	ddi_dma_attr_t	kmem_io_attr;
997 } kmem_io[MAX_MEM_RANGES];
998 
999 static int kmem_io_idx;		/* index of first populated kmem_io[] */
1000 
1001 static page_t *
1002 page_create_io_wrapper(void *addr, size_t len, int vmflag, void *arg)
1003 {
1004 	extern page_t *page_create_io(vnode_t *, u_offset_t, uint_t,
1005 	    uint_t, struct as *, caddr_t, ddi_dma_attr_t *);
1006 
1007 	return (page_create_io(&kvp, (u_offset_t)(uintptr_t)addr, len,
1008 	    PG_EXCL | ((vmflag & VM_NOSLEEP) ? 0 : PG_WAIT), &kas, addr, arg));
1009 }
1010 
1011 #ifdef __xpv
1012 static void
1013 segkmem_free_io(vmem_t *vmp, void * ptr, size_t size)
1014 {
1015 	extern void page_destroy_io(page_t *);
1016 	segkmem_xfree(vmp, ptr, size, page_destroy_io);
1017 }
1018 #endif
1019 
1020 static void *
1021 segkmem_alloc_io_4P(vmem_t *vmp, size_t size, int vmflag)
1022 {
1023 	return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
1024 	    page_create_io_wrapper, &kmem_io[IO_4P].kmem_io_attr));
1025 }
1026 
1027 static void *
1028 segkmem_alloc_io_64G(vmem_t *vmp, size_t size, int vmflag)
1029 {
1030 	return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
1031 	    page_create_io_wrapper, &kmem_io[IO_64G].kmem_io_attr));
1032 }
1033 
1034 static void *
1035 segkmem_alloc_io_4G(vmem_t *vmp, size_t size, int vmflag)
1036 {
1037 	return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
1038 	    page_create_io_wrapper, &kmem_io[IO_4G].kmem_io_attr));
1039 }
1040 
1041 static void *
1042 segkmem_alloc_io_2G(vmem_t *vmp, size_t size, int vmflag)
1043 {
1044 	return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
1045 	    page_create_io_wrapper, &kmem_io[IO_2G].kmem_io_attr));
1046 }
1047 
1048 static void *
1049 segkmem_alloc_io_1G(vmem_t *vmp, size_t size, int vmflag)
1050 {
1051 	return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
1052 	    page_create_io_wrapper, &kmem_io[IO_1G].kmem_io_attr));
1053 }
1054 
1055 static void *
1056 segkmem_alloc_io_512M(vmem_t *vmp, size_t size, int vmflag)
1057 {
1058 	return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
1059 	    page_create_io_wrapper, &kmem_io[IO_512M].kmem_io_attr));
1060 }
1061 
1062 static void *
1063 segkmem_alloc_io_256M(vmem_t *vmp, size_t size, int vmflag)
1064 {
1065 	return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
1066 	    page_create_io_wrapper, &kmem_io[IO_256M].kmem_io_attr));
1067 }
1068 
1069 static void *
1070 segkmem_alloc_io_128M(vmem_t *vmp, size_t size, int vmflag)
1071 {
1072 	return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
1073 	    page_create_io_wrapper, &kmem_io[IO_128M].kmem_io_attr));
1074 }
1075 
1076 static void *
1077 segkmem_alloc_io_64M(vmem_t *vmp, size_t size, int vmflag)
1078 {
1079 	return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
1080 	    page_create_io_wrapper, &kmem_io[IO_64M].kmem_io_attr));
1081 }
1082 
1083 static void *
1084 segkmem_alloc_io_32M(vmem_t *vmp, size_t size, int vmflag)
1085 {
1086 	return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
1087 	    page_create_io_wrapper, &kmem_io[IO_32M].kmem_io_attr));
1088 }
1089 
1090 static void *
1091 segkmem_alloc_io_16M(vmem_t *vmp, size_t size, int vmflag)
1092 {
1093 	return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
1094 	    page_create_io_wrapper, &kmem_io[IO_16M].kmem_io_attr));
1095 }
1096 
1097 struct {
1098 	uint64_t	io_limit;
1099 	char		*io_name;
1100 	void		*(*io_alloc)(vmem_t *, size_t, int);
1101 	int		io_initial;	/* kmem_io_init during startup */
1102 } io_arena_params[MAX_MEM_RANGES] = {
1103 	{0x000fffffffffffffULL,	"kmem_io_4P",	segkmem_alloc_io_4P,	1},
1104 	{0x0000000fffffffffULL,	"kmem_io_64G",	segkmem_alloc_io_64G,	0},
1105 	{0x00000000ffffffffULL,	"kmem_io_4G",	segkmem_alloc_io_4G,	1},
1106 	{0x000000007fffffffULL,	"kmem_io_2G",	segkmem_alloc_io_2G,	1},
1107 	{0x000000003fffffffULL,	"kmem_io_1G",	segkmem_alloc_io_1G,	0},
1108 	{0x000000001fffffffULL,	"kmem_io_512M",	segkmem_alloc_io_512M,	0},
1109 	{0x000000000fffffffULL,	"kmem_io_256M",	segkmem_alloc_io_256M,	0},
1110 	{0x0000000007ffffffULL,	"kmem_io_128M",	segkmem_alloc_io_128M,	0},
1111 	{0x0000000003ffffffULL,	"kmem_io_64M",	segkmem_alloc_io_64M,	0},
1112 	{0x0000000001ffffffULL,	"kmem_io_32M",	segkmem_alloc_io_32M,	0},
1113 	{0x0000000000ffffffULL,	"kmem_io_16M",	segkmem_alloc_io_16M,	1}
1114 };
1115 
1116 void
1117 kmem_io_init(int a)
1118 {
1119 	int	c;
1120 	char name[40];
1121 
1122 	kmem_io[a].kmem_io_arena = vmem_create(io_arena_params[a].io_name,
1123 	    NULL, 0, PAGESIZE, io_arena_params[a].io_alloc,
1124 #ifdef __xpv
1125 	    segkmem_free_io,
1126 #else
1127 	    segkmem_free,
1128 #endif
1129 	    heap_arena, 0, VM_SLEEP);
1130 
1131 	for (c = 0; c < KA_NCACHE; c++) {
1132 		size_t size = KA_ALIGN << c;
1133 		(void) sprintf(name, "%s_%lu",
1134 		    io_arena_params[a].io_name, size);
1135 		kmem_io[a].kmem_io_cache[c] = kmem_cache_create(name,
1136 		    size, size, NULL, NULL, NULL, NULL,
1137 		    kmem_io[a].kmem_io_arena, 0);
1138 	}
1139 }
1140 
1141 /*
1142  * Return the index of the highest memory range for addr.
1143  */
1144 static int
1145 kmem_io_index(uint64_t addr)
1146 {
1147 	int n;
1148 
1149 	for (n = kmem_io_idx; n < MAX_MEM_RANGES; n++) {
1150 		if (kmem_io[n].kmem_io_attr.dma_attr_addr_hi <= addr) {
1151 			if (kmem_io[n].kmem_io_arena == NULL)
1152 				kmem_io_init(n);
1153 			return (n);
1154 		}
1155 	}
1156 	panic("kmem_io_index: invalid addr - must be at least 16m");
1157 
1158 	/*NOTREACHED*/
1159 }
1160 
1161 /*
1162  * Return the index of the next kmem_io populated memory range
1163  * after curindex.
1164  */
1165 static int
1166 kmem_io_index_next(int curindex)
1167 {
1168 	int n;
1169 
1170 	for (n = curindex + 1; n < MAX_MEM_RANGES; n++) {
1171 		if (kmem_io[n].kmem_io_arena)
1172 			return (n);
1173 	}
1174 	return (-1);
1175 }
1176 
1177 /*
1178  * allow kmem to be mapped in with different PTE cache attribute settings.
1179  * Used by i_ddi_mem_alloc()
1180  */
1181 int
1182 kmem_override_cache_attrs(caddr_t kva, size_t size, uint_t order)
1183 {
1184 	uint_t hat_flags;
1185 	caddr_t kva_end;
1186 	uint_t hat_attr;
1187 	pfn_t pfn;
1188 
1189 	if (hat_getattr(kas.a_hat, kva, &hat_attr) == -1) {
1190 		return (-1);
1191 	}
1192 
1193 	hat_attr &= ~HAT_ORDER_MASK;
1194 	hat_attr |= order | HAT_NOSYNC;
1195 	hat_flags = HAT_LOAD_LOCK;
1196 
1197 	kva_end = (caddr_t)(((uintptr_t)kva + size + PAGEOFFSET) &
1198 	    (uintptr_t)PAGEMASK);
1199 	kva = (caddr_t)((uintptr_t)kva & (uintptr_t)PAGEMASK);
1200 
1201 	while (kva < kva_end) {
1202 		pfn = hat_getpfnum(kas.a_hat, kva);
1203 		hat_unload(kas.a_hat, kva, PAGESIZE, HAT_UNLOAD_UNLOCK);
1204 		hat_devload(kas.a_hat, kva, PAGESIZE, pfn, hat_attr, hat_flags);
1205 		kva += MMU_PAGESIZE;
1206 	}
1207 
1208 	return (0);
1209 }
1210 
1211 static int
1212 ctgcompare(const void *a1, const void *a2)
1213 {
1214 	/* we just want to compare virtual addresses */
1215 	a1 = ((struct ctgas *)a1)->ctg_addr;
1216 	a2 = ((struct ctgas *)a2)->ctg_addr;
1217 	return (a1 == a2 ? 0 : (a1 < a2 ? -1 : 1));
1218 }
1219 
1220 void
1221 ka_init(void)
1222 {
1223 	int a;
1224 	paddr_t maxphysaddr;
1225 #if !defined(__xpv)
1226 	extern pfn_t physmax;
1227 
1228 	maxphysaddr = mmu_ptob((paddr_t)physmax) + MMU_PAGEOFFSET;
1229 #else
1230 	maxphysaddr = mmu_ptob((paddr_t)HYPERVISOR_memory_op(
1231 	    XENMEM_maximum_ram_page, NULL)) + MMU_PAGEOFFSET;
1232 #endif
1233 
1234 	ASSERT(maxphysaddr <= io_arena_params[0].io_limit);
1235 
1236 	for (a = 0; a < MAX_MEM_RANGES; a++) {
1237 		if (maxphysaddr >= io_arena_params[a + 1].io_limit) {
1238 			if (maxphysaddr > io_arena_params[a + 1].io_limit)
1239 				io_arena_params[a].io_limit = maxphysaddr;
1240 			else
1241 				a++;
1242 			break;
1243 		}
1244 	}
1245 	kmem_io_idx = a;
1246 
1247 	for (; a < MAX_MEM_RANGES; a++) {
1248 		kmem_io[a].kmem_io_attr = kmem_io_attr;
1249 		kmem_io[a].kmem_io_attr.dma_attr_addr_hi =
1250 		    io_arena_params[a].io_limit;
1251 		/*
1252 		 * initialize kmem_io[] arena/cache corresponding to
1253 		 * maxphysaddr and to the "common" io memory ranges that
1254 		 * have io_initial set to a non-zero value.
1255 		 */
1256 		if (io_arena_params[a].io_initial || a == kmem_io_idx)
1257 			kmem_io_init(a);
1258 	}
1259 
1260 	/* initialize ctgtree */
1261 	avl_create(&ctgtree, ctgcompare, sizeof (struct ctgas),
1262 	    offsetof(struct ctgas, ctg_link));
1263 }
1264 
1265 /*
1266  * put contig address/size
1267  */
1268 static void *
1269 putctgas(void *addr, size_t size)
1270 {
1271 	struct ctgas    *ctgp;
1272 	if ((ctgp = kmem_zalloc(sizeof (*ctgp), KM_NOSLEEP)) != NULL) {
1273 		ctgp->ctg_addr = addr;
1274 		ctgp->ctg_size = size;
1275 		CTGLOCK();
1276 		avl_add(&ctgtree, ctgp);
1277 		CTGUNLOCK();
1278 	}
1279 	return (ctgp);
1280 }
1281 
1282 /*
1283  * get contig size by addr
1284  */
1285 static size_t
1286 getctgsz(void *addr)
1287 {
1288 	struct ctgas    *ctgp;
1289 	struct ctgas    find;
1290 	size_t		sz = 0;
1291 
1292 	find.ctg_addr = addr;
1293 	CTGLOCK();
1294 	if ((ctgp = avl_find(&ctgtree, &find, NULL)) != NULL) {
1295 		avl_remove(&ctgtree, ctgp);
1296 	}
1297 	CTGUNLOCK();
1298 
1299 	if (ctgp != NULL) {
1300 		sz = ctgp->ctg_size;
1301 		kmem_free(ctgp, sizeof (*ctgp));
1302 	}
1303 
1304 	return (sz);
1305 }
1306 
1307 /*
1308  * contig_alloc:
1309  *
1310  *	allocates contiguous memory to satisfy the 'size' and dma attributes
1311  *	specified in 'attr'.
1312  *
1313  *	Not all of memory need to be physically contiguous if the
1314  *	scatter-gather list length is greater than 1.
1315  */
1316 
1317 /*ARGSUSED*/
1318 void *
1319 contig_alloc(size_t size, ddi_dma_attr_t *attr, uintptr_t align, int cansleep)
1320 {
1321 	pgcnt_t		pgcnt = btopr(size);
1322 	size_t		asize = pgcnt * PAGESIZE;
1323 	page_t		*ppl;
1324 	int		pflag;
1325 	void		*addr;
1326 
1327 	extern page_t *page_create_io(vnode_t *, u_offset_t, uint_t,
1328 	    uint_t, struct as *, caddr_t, ddi_dma_attr_t *);
1329 
1330 	/* segkmem_xalloc */
1331 
1332 	if (align <= PAGESIZE)
1333 		addr = vmem_alloc(heap_arena, asize,
1334 		    (cansleep) ? VM_SLEEP : VM_NOSLEEP);
1335 	else
1336 		addr = vmem_xalloc(heap_arena, asize, align, 0, 0, NULL, NULL,
1337 		    (cansleep) ? VM_SLEEP : VM_NOSLEEP);
1338 	if (addr) {
1339 		ASSERT(!((uintptr_t)addr & (align - 1)));
1340 
1341 		if (page_resv(pgcnt, (cansleep) ? KM_SLEEP : KM_NOSLEEP) == 0) {
1342 			vmem_free(heap_arena, addr, asize);
1343 			return (NULL);
1344 		}
1345 		pflag = PG_EXCL;
1346 
1347 		if (cansleep)
1348 			pflag |= PG_WAIT;
1349 
1350 		/* 4k req gets from freelists rather than pfn search */
1351 		if (pgcnt > 1 || align > PAGESIZE)
1352 			pflag |= PG_PHYSCONTIG;
1353 
1354 		ppl = page_create_io(&kvp, (u_offset_t)(uintptr_t)addr,
1355 		    asize, pflag, &kas, (caddr_t)addr, attr);
1356 
1357 		if (!ppl) {
1358 			vmem_free(heap_arena, addr, asize);
1359 			page_unresv(pgcnt);
1360 			return (NULL);
1361 		}
1362 
1363 		while (ppl != NULL) {
1364 			page_t	*pp = ppl;
1365 			page_sub(&ppl, pp);
1366 			ASSERT(page_iolock_assert(pp));
1367 			page_io_unlock(pp);
1368 			page_downgrade(pp);
1369 			hat_memload(kas.a_hat, (caddr_t)(uintptr_t)pp->p_offset,
1370 			    pp, (PROT_ALL & ~PROT_USER) |
1371 			    HAT_NOSYNC, HAT_LOAD_LOCK);
1372 		}
1373 	}
1374 	return (addr);
1375 }
1376 
1377 void
1378 contig_free(void *addr, size_t size)
1379 {
1380 	pgcnt_t	pgcnt = btopr(size);
1381 	size_t	asize = pgcnt * PAGESIZE;
1382 	caddr_t	a, ea;
1383 	page_t	*pp;
1384 
1385 	hat_unload(kas.a_hat, addr, asize, HAT_UNLOAD_UNLOCK);
1386 
1387 	for (a = addr, ea = a + asize; a < ea; a += PAGESIZE) {
1388 		pp = page_find(&kvp, (u_offset_t)(uintptr_t)a);
1389 		if (!pp)
1390 			panic("contig_free: contig pp not found");
1391 
1392 		if (!page_tryupgrade(pp)) {
1393 			page_unlock(pp);
1394 			pp = page_lookup(&kvp,
1395 			    (u_offset_t)(uintptr_t)a, SE_EXCL);
1396 			if (pp == NULL)
1397 				panic("contig_free: page freed");
1398 		}
1399 		page_destroy(pp, 0);
1400 	}
1401 
1402 	page_unresv(pgcnt);
1403 	vmem_free(heap_arena, addr, asize);
1404 }
1405 
1406 /*
1407  * Allocate from the system, aligned on a specific boundary.
1408  * The alignment, if non-zero, must be a power of 2.
1409  */
1410 static void *
1411 kalloca(size_t size, size_t align, int cansleep, int physcontig,
1412     ddi_dma_attr_t *attr)
1413 {
1414 	size_t *addr, *raddr, rsize;
1415 	size_t hdrsize = 4 * sizeof (size_t);	/* must be power of 2 */
1416 	int a, i, c;
1417 	vmem_t *vmp;
1418 	kmem_cache_t *cp = NULL;
1419 
1420 	if (attr->dma_attr_addr_lo > mmu_ptob((uint64_t)ddiphysmin))
1421 		return (NULL);
1422 
1423 	align = MAX(align, hdrsize);
1424 	ASSERT((align & (align - 1)) == 0);
1425 
1426 	/*
1427 	 * All of our allocators guarantee 16-byte alignment, so we don't
1428 	 * need to reserve additional space for the header.
1429 	 * To simplify picking the correct kmem_io_cache, we round up to
1430 	 * a multiple of KA_ALIGN.
1431 	 */
1432 	rsize = P2ROUNDUP_TYPED(size + align, KA_ALIGN, size_t);
1433 
1434 	if (physcontig && rsize > PAGESIZE) {
1435 		if (addr = contig_alloc(size, attr, align, cansleep)) {
1436 			if (!putctgas(addr, size))
1437 				contig_free(addr, size);
1438 			else
1439 				return (addr);
1440 		}
1441 		return (NULL);
1442 	}
1443 
1444 	a = kmem_io_index(attr->dma_attr_addr_hi);
1445 
1446 	if (rsize > PAGESIZE) {
1447 		vmp = kmem_io[a].kmem_io_arena;
1448 		raddr = vmem_alloc(vmp, rsize,
1449 		    (cansleep) ? VM_SLEEP : VM_NOSLEEP);
1450 	} else {
1451 		c = highbit((rsize >> KA_ALIGN_SHIFT) - 1);
1452 		cp = kmem_io[a].kmem_io_cache[c];
1453 		raddr = kmem_cache_alloc(cp, (cansleep) ? KM_SLEEP :
1454 		    KM_NOSLEEP);
1455 	}
1456 
1457 	if (raddr == NULL) {
1458 		int	na;
1459 
1460 		ASSERT(cansleep == 0);
1461 		if (rsize > PAGESIZE)
1462 			return (NULL);
1463 		/*
1464 		 * System does not have memory in the requested range.
1465 		 * Try smaller kmem io ranges and larger cache sizes
1466 		 * to see if there might be memory available in
1467 		 * these other caches.
1468 		 */
1469 
1470 		for (na = kmem_io_index_next(a); na >= 0;
1471 		    na = kmem_io_index_next(na)) {
1472 			ASSERT(kmem_io[na].kmem_io_arena);
1473 			cp = kmem_io[na].kmem_io_cache[c];
1474 			raddr = kmem_cache_alloc(cp, KM_NOSLEEP);
1475 			if (raddr)
1476 				goto kallocdone;
1477 		}
1478 		/* now try the larger kmem io cache sizes */
1479 		for (na = a; na >= 0; na = kmem_io_index_next(na)) {
1480 			for (i = c + 1; i < KA_NCACHE; i++) {
1481 				cp = kmem_io[na].kmem_io_cache[i];
1482 				raddr = kmem_cache_alloc(cp, KM_NOSLEEP);
1483 				if (raddr)
1484 					goto kallocdone;
1485 			}
1486 		}
1487 		return (NULL);
1488 	}
1489 
1490 kallocdone:
1491 	ASSERT(!P2BOUNDARY((uintptr_t)raddr, rsize, PAGESIZE) ||
1492 	    rsize > PAGESIZE);
1493 
1494 	addr = (size_t *)P2ROUNDUP((uintptr_t)raddr + hdrsize, align);
1495 	ASSERT((uintptr_t)addr + size - (uintptr_t)raddr <= rsize);
1496 
1497 	addr[-4] = (size_t)cp;
1498 	addr[-3] = (size_t)vmp;
1499 	addr[-2] = (size_t)raddr;
1500 	addr[-1] = rsize;
1501 
1502 	return (addr);
1503 }
1504 
1505 static void
1506 kfreea(void *addr)
1507 {
1508 	size_t		size;
1509 
1510 	if (!((uintptr_t)addr & PAGEOFFSET) && (size = getctgsz(addr))) {
1511 		contig_free(addr, size);
1512 	} else {
1513 		size_t	*saddr = addr;
1514 		if (saddr[-4] == 0)
1515 			vmem_free((vmem_t *)saddr[-3], (void *)saddr[-2],
1516 			    saddr[-1]);
1517 		else
1518 			kmem_cache_free((kmem_cache_t *)saddr[-4],
1519 			    (void *)saddr[-2]);
1520 	}
1521 }
1522 
1523 /*ARGSUSED*/
1524 void
1525 i_ddi_devacc_to_hatacc(ddi_device_acc_attr_t *devaccp, uint_t *hataccp)
1526 {
1527 }
1528 
1529 /*
1530  * Check if the specified cache attribute is supported on the platform.
1531  * This function must be called before i_ddi_cacheattr_to_hatacc().
1532  */
1533 boolean_t
1534 i_ddi_check_cache_attr(uint_t flags)
1535 {
1536 	/*
1537 	 * The cache attributes are mutually exclusive. Any combination of
1538 	 * the attributes leads to a failure.
1539 	 */
1540 	uint_t cache_attr = IOMEM_CACHE_ATTR(flags);
1541 	if ((cache_attr != 0) && !ISP2(cache_attr))
1542 		return (B_FALSE);
1543 
1544 	/* All cache attributes are supported on X86/X64 */
1545 	if (cache_attr & (IOMEM_DATA_UNCACHED | IOMEM_DATA_CACHED |
1546 	    IOMEM_DATA_UC_WR_COMBINE))
1547 		return (B_TRUE);
1548 
1549 	/* undefined attributes */
1550 	return (B_FALSE);
1551 }
1552 
1553 /* set HAT cache attributes from the cache attributes */
1554 void
1555 i_ddi_cacheattr_to_hatacc(uint_t flags, uint_t *hataccp)
1556 {
1557 	uint_t cache_attr = IOMEM_CACHE_ATTR(flags);
1558 	static char *fname = "i_ddi_cacheattr_to_hatacc";
1559 
1560 	/*
1561 	 * If write-combining is not supported, then it falls back
1562 	 * to uncacheable.
1563 	 */
1564 	if (cache_attr == IOMEM_DATA_UC_WR_COMBINE &&
1565 	    !is_x86_feature(x86_featureset, X86FSET_PAT))
1566 		cache_attr = IOMEM_DATA_UNCACHED;
1567 
1568 	/*
1569 	 * set HAT attrs according to the cache attrs.
1570 	 */
1571 	switch (cache_attr) {
1572 	case IOMEM_DATA_UNCACHED:
1573 		*hataccp &= ~HAT_ORDER_MASK;
1574 		*hataccp |= (HAT_STRICTORDER | HAT_PLAT_NOCACHE);
1575 		break;
1576 	case IOMEM_DATA_UC_WR_COMBINE:
1577 		*hataccp &= ~HAT_ORDER_MASK;
1578 		*hataccp |= (HAT_MERGING_OK | HAT_PLAT_NOCACHE);
1579 		break;
1580 	case IOMEM_DATA_CACHED:
1581 		*hataccp &= ~HAT_ORDER_MASK;
1582 		*hataccp |= HAT_UNORDERED_OK;
1583 		break;
1584 	/*
1585 	 * This case must not occur because the cache attribute is scrutinized
1586 	 * before this function is called.
1587 	 */
1588 	default:
1589 		/*
1590 		 * set cacheable to hat attrs.
1591 		 */
1592 		*hataccp &= ~HAT_ORDER_MASK;
1593 		*hataccp |= HAT_UNORDERED_OK;
1594 		cmn_err(CE_WARN, "%s: cache_attr=0x%x is ignored.",
1595 		    fname, cache_attr);
1596 	}
1597 }
1598 
1599 /*
1600  * This should actually be called i_ddi_dma_mem_alloc. There should
1601  * also be an i_ddi_pio_mem_alloc. i_ddi_dma_mem_alloc should call
1602  * through the device tree with the DDI_CTLOPS_DMA_ALIGN ctl ops to
1603  * get alignment requirements for DMA memory. i_ddi_pio_mem_alloc
1604  * should use DDI_CTLOPS_PIO_ALIGN. Since we only have i_ddi_mem_alloc
1605  * so far which is used for both, DMA and PIO, we have to use the DMA
1606  * ctl ops to make everybody happy.
1607  */
1608 /*ARGSUSED*/
1609 int
1610 i_ddi_mem_alloc(dev_info_t *dip, ddi_dma_attr_t *attr,
1611     size_t length, int cansleep, int flags,
1612     ddi_device_acc_attr_t *accattrp, caddr_t *kaddrp,
1613     size_t *real_length, ddi_acc_hdl_t *ap)
1614 {
1615 	caddr_t a;
1616 	int iomin;
1617 	ddi_acc_impl_t *iap;
1618 	int physcontig = 0;
1619 	pgcnt_t npages;
1620 	pgcnt_t minctg;
1621 	uint_t order;
1622 	int e;
1623 
1624 	/*
1625 	 * Check legality of arguments
1626 	 */
1627 	if (length == 0 || kaddrp == NULL || attr == NULL) {
1628 		return (DDI_FAILURE);
1629 	}
1630 
1631 	if (attr->dma_attr_minxfer == 0 || attr->dma_attr_align == 0 ||
1632 	    !ISP2(attr->dma_attr_align) || !ISP2(attr->dma_attr_minxfer)) {
1633 		return (DDI_FAILURE);
1634 	}
1635 
1636 	/*
1637 	 * figure out most restrictive alignment requirement
1638 	 */
1639 	iomin = attr->dma_attr_minxfer;
1640 	iomin = maxbit(iomin, attr->dma_attr_align);
1641 	if (iomin == 0)
1642 		return (DDI_FAILURE);
1643 
1644 	ASSERT((iomin & (iomin - 1)) == 0);
1645 
1646 	/*
1647 	 * if we allocate memory with IOMEM_DATA_UNCACHED or
1648 	 * IOMEM_DATA_UC_WR_COMBINE, make sure we allocate a page aligned
1649 	 * memory that ends on a page boundry.
1650 	 * Don't want to have to different cache mappings to the same
1651 	 * physical page.
1652 	 */
1653 	if (OVERRIDE_CACHE_ATTR(flags)) {
1654 		iomin = (iomin + MMU_PAGEOFFSET) & MMU_PAGEMASK;
1655 		length = (length + MMU_PAGEOFFSET) & (size_t)MMU_PAGEMASK;
1656 	}
1657 
1658 	/*
1659 	 * Determine if we need to satisfy the request for physically
1660 	 * contiguous memory or alignments larger than pagesize.
1661 	 */
1662 	npages = btopr(length + attr->dma_attr_align);
1663 	minctg = howmany(npages, attr->dma_attr_sgllen);
1664 
1665 	if (minctg > 1) {
1666 		uint64_t pfnseg = attr->dma_attr_seg >> PAGESHIFT;
1667 		/*
1668 		 * verify that the minimum contig requirement for the
1669 		 * actual length does not cross segment boundary.
1670 		 */
1671 		length = P2ROUNDUP_TYPED(length, attr->dma_attr_minxfer,
1672 		    size_t);
1673 		npages = btopr(length);
1674 		minctg = howmany(npages, attr->dma_attr_sgllen);
1675 		if (minctg > pfnseg + 1)
1676 			return (DDI_FAILURE);
1677 		physcontig = 1;
1678 	} else {
1679 		length = P2ROUNDUP_TYPED(length, iomin, size_t);
1680 	}
1681 
1682 	/*
1683 	 * Allocate the requested amount from the system.
1684 	 */
1685 	a = kalloca(length, iomin, cansleep, physcontig, attr);
1686 
1687 	if ((*kaddrp = a) == NULL)
1688 		return (DDI_FAILURE);
1689 
1690 	/*
1691 	 * if we to modify the cache attributes, go back and muck with the
1692 	 * mappings.
1693 	 */
1694 	if (OVERRIDE_CACHE_ATTR(flags)) {
1695 		order = 0;
1696 		i_ddi_cacheattr_to_hatacc(flags, &order);
1697 		e = kmem_override_cache_attrs(a, length, order);
1698 		if (e != 0) {
1699 			kfreea(a);
1700 			return (DDI_FAILURE);
1701 		}
1702 	}
1703 
1704 	if (real_length) {
1705 		*real_length = length;
1706 	}
1707 	if (ap) {
1708 		/*
1709 		 * initialize access handle
1710 		 */
1711 		iap = (ddi_acc_impl_t *)ap->ah_platform_private;
1712 		iap->ahi_acc_attr |= DDI_ACCATTR_CPU_VADDR;
1713 		impl_acc_hdl_init(ap);
1714 	}
1715 
1716 	return (DDI_SUCCESS);
1717 }
1718 
1719 /* ARGSUSED */
1720 void
1721 i_ddi_mem_free(caddr_t kaddr, ddi_acc_hdl_t *ap)
1722 {
1723 	if (ap != NULL) {
1724 		/*
1725 		 * if we modified the cache attributes on alloc, go back and
1726 		 * fix them since this memory could be returned to the
1727 		 * general pool.
1728 		 */
1729 		if (OVERRIDE_CACHE_ATTR(ap->ah_xfermodes)) {
1730 			uint_t order = 0;
1731 			int e;
1732 			i_ddi_cacheattr_to_hatacc(IOMEM_DATA_CACHED, &order);
1733 			e = kmem_override_cache_attrs(kaddr, ap->ah_len, order);
1734 			if (e != 0) {
1735 				cmn_err(CE_WARN, "i_ddi_mem_free() failed to "
1736 				    "override cache attrs, memory leaked\n");
1737 				return;
1738 			}
1739 		}
1740 	}
1741 	kfreea(kaddr);
1742 }
1743 
1744 /*
1745  * Access Barriers
1746  *
1747  */
1748 /*ARGSUSED*/
1749 int
1750 i_ddi_ontrap(ddi_acc_handle_t hp)
1751 {
1752 	return (DDI_FAILURE);
1753 }
1754 
1755 /*ARGSUSED*/
1756 void
1757 i_ddi_notrap(ddi_acc_handle_t hp)
1758 {
1759 }
1760 
1761 
1762 /*
1763  * Misc Functions
1764  */
1765 
1766 /*
1767  * Implementation instance override functions
1768  *
1769  * No override on i86pc
1770  */
1771 /*ARGSUSED*/
1772 uint_t
1773 impl_assign_instance(dev_info_t *dip)
1774 {
1775 	return ((uint_t)-1);
1776 }
1777 
1778 /*ARGSUSED*/
1779 int
1780 impl_keep_instance(dev_info_t *dip)
1781 {
1782 
1783 #if defined(__xpv)
1784 	/*
1785 	 * Do not persist instance numbers assigned to devices in dom0
1786 	 */
1787 	dev_info_t *pdip;
1788 	if (DOMAIN_IS_INITDOMAIN(xen_info)) {
1789 		if (((pdip = ddi_get_parent(dip)) != NULL) &&
1790 		    (strcmp(ddi_get_name(pdip), "xpvd") == 0))
1791 			return (DDI_SUCCESS);
1792 	}
1793 #endif
1794 	return (DDI_FAILURE);
1795 }
1796 
1797 /*ARGSUSED*/
1798 int
1799 impl_free_instance(dev_info_t *dip)
1800 {
1801 	return (DDI_FAILURE);
1802 }
1803 
1804 /*ARGSUSED*/
1805 int
1806 impl_check_cpu(dev_info_t *devi)
1807 {
1808 	return (DDI_SUCCESS);
1809 }
1810 
1811 /*
1812  * Referenced in common/cpr_driver.c: Power off machine.
1813  * Don't know how to power off i86pc.
1814  */
1815 void
1816 arch_power_down()
1817 {}
1818 
1819 /*
1820  * Copy name to property_name, since name
1821  * is in the low address range below kernelbase.
1822  */
1823 static void
1824 copy_boot_str(const char *boot_str, char *kern_str, int len)
1825 {
1826 	int i = 0;
1827 
1828 	while (i < len - 1 && boot_str[i] != '\0') {
1829 		kern_str[i] = boot_str[i];
1830 		i++;
1831 	}
1832 
1833 	kern_str[i] = 0;	/* null terminate */
1834 	if (boot_str[i] != '\0')
1835 		cmn_err(CE_WARN,
1836 		    "boot property string is truncated to %s", kern_str);
1837 }
1838 
1839 static void
1840 get_boot_properties(void)
1841 {
1842 	extern char hw_provider[];
1843 	dev_info_t *devi;
1844 	char *name;
1845 	int length, flags;
1846 	char property_name[50], property_val[50];
1847 	void *bop_staging_area;
1848 
1849 	bop_staging_area = kmem_zalloc(MMU_PAGESIZE, KM_NOSLEEP);
1850 
1851 	/*
1852 	 * Import "root" properties from the boot.
1853 	 *
1854 	 * We do this by invoking BOP_NEXTPROP until the list
1855 	 * is completely copied in.
1856 	 */
1857 
1858 	devi = ddi_root_node();
1859 	for (name = BOP_NEXTPROP(bootops, "");		/* get first */
1860 	    name;					/* NULL => DONE */
1861 	    name = BOP_NEXTPROP(bootops, name)) {	/* get next */
1862 
1863 		/* copy string to memory above kernelbase */
1864 		copy_boot_str(name, property_name, 50);
1865 
1866 		/*
1867 		 * Skip vga properties. They will be picked up later
1868 		 * by get_vga_properties.
1869 		 */
1870 		if (strcmp(property_name, "display-edif-block") == 0 ||
1871 		    strcmp(property_name, "display-edif-id") == 0) {
1872 			continue;
1873 		}
1874 
1875 		length = BOP_GETPROPLEN(bootops, property_name);
1876 		if (length < 0)
1877 			continue;
1878 		if (length > MMU_PAGESIZE) {
1879 			cmn_err(CE_NOTE,
1880 			    "boot property %s longer than 0x%x, ignored\n",
1881 			    property_name, MMU_PAGESIZE);
1882 			continue;
1883 		}
1884 		BOP_GETPROP(bootops, property_name, bop_staging_area);
1885 		flags = do_bsys_getproptype(bootops, property_name);
1886 
1887 		/*
1888 		 * special properties:
1889 		 * si-machine, si-hw-provider
1890 		 *	goes to kernel data structures.
1891 		 * bios-boot-device and stdout
1892 		 *	goes to hardware property list so it may show up
1893 		 *	in the prtconf -vp output. This is needed by
1894 		 *	Install/Upgrade. Once we fix install upgrade,
1895 		 *	this can be taken out.
1896 		 */
1897 		if (strcmp(name, "si-machine") == 0) {
1898 			(void) strncpy(utsname.machine, bop_staging_area,
1899 			    SYS_NMLN);
1900 			utsname.machine[SYS_NMLN - 1] = '\0';
1901 			continue;
1902 		}
1903 		if (strcmp(name, "si-hw-provider") == 0) {
1904 			(void) strncpy(hw_provider, bop_staging_area, SYS_NMLN);
1905 			hw_provider[SYS_NMLN - 1] = '\0';
1906 			continue;
1907 		}
1908 		if (strcmp(name, "bios-boot-device") == 0) {
1909 			copy_boot_str(bop_staging_area, property_val, 50);
1910 			(void) ndi_prop_update_string(DDI_DEV_T_NONE, devi,
1911 			    property_name, property_val);
1912 			continue;
1913 		}
1914 		if (strcmp(name, "stdout") == 0) {
1915 			(void) ndi_prop_update_int(DDI_DEV_T_NONE, devi,
1916 			    property_name, *((int *)bop_staging_area));
1917 			continue;
1918 		}
1919 
1920 		/* Boolean property */
1921 		if (length == 0) {
1922 			(void) e_ddi_prop_create(DDI_DEV_T_NONE, devi,
1923 			    DDI_PROP_CANSLEEP, property_name, NULL, 0);
1924 			continue;
1925 		}
1926 
1927 		/* Now anything else based on type. */
1928 		switch (flags) {
1929 		case DDI_PROP_TYPE_INT:
1930 			if (length == sizeof (int)) {
1931 				(void) e_ddi_prop_update_int(DDI_DEV_T_NONE,
1932 				    devi, property_name,
1933 				    *((int *)bop_staging_area));
1934 			} else {
1935 				(void) e_ddi_prop_update_int_array(
1936 				    DDI_DEV_T_NONE, devi, property_name,
1937 				    bop_staging_area, length / sizeof (int));
1938 			}
1939 			break;
1940 		case DDI_PROP_TYPE_STRING:
1941 			(void) e_ddi_prop_update_string(DDI_DEV_T_NONE, devi,
1942 			    property_name, bop_staging_area);
1943 			break;
1944 		case DDI_PROP_TYPE_BYTE:
1945 			(void) e_ddi_prop_update_byte_array(DDI_DEV_T_NONE,
1946 			    devi, property_name, bop_staging_area, length);
1947 			break;
1948 		case DDI_PROP_TYPE_INT64:
1949 			if (length == sizeof (int64_t)) {
1950 				(void) e_ddi_prop_update_int64(DDI_DEV_T_NONE,
1951 				    devi, property_name,
1952 				    *((int64_t *)bop_staging_area));
1953 			} else {
1954 				(void) e_ddi_prop_update_int64_array(
1955 				    DDI_DEV_T_NONE, devi, property_name,
1956 				    bop_staging_area,
1957 				    length / sizeof (int64_t));
1958 			}
1959 			break;
1960 		default:
1961 			/* Property type unknown, use old prop interface */
1962 			(void) e_ddi_prop_create(DDI_DEV_T_NONE, devi,
1963 			    DDI_PROP_CANSLEEP, property_name, bop_staging_area,
1964 			    length);
1965 		}
1966 	}
1967 
1968 	kmem_free(bop_staging_area, MMU_PAGESIZE);
1969 }
1970 
1971 static void
1972 get_vga_properties(void)
1973 {
1974 	dev_info_t *devi;
1975 	major_t major;
1976 	char *name;
1977 	int length;
1978 	char property_val[50];
1979 	void *bop_staging_area;
1980 
1981 	/*
1982 	 * XXXX Hack Allert!
1983 	 * There really needs to be a better way for identifying various
1984 	 * console framebuffers and their related issues.  Till then,
1985 	 * check for this one as a replacement to vgatext.
1986 	 */
1987 	major = ddi_name_to_major("ragexl");
1988 	if (major == (major_t)-1) {
1989 		major = ddi_name_to_major("vgatext");
1990 		if (major == (major_t)-1)
1991 			return;
1992 	}
1993 	devi = devnamesp[major].dn_head;
1994 	if (devi == NULL)
1995 		return;
1996 
1997 	bop_staging_area = kmem_zalloc(MMU_PAGESIZE, KM_SLEEP);
1998 
1999 	/*
2000 	 * Import "vga" properties from the boot.
2001 	 */
2002 	name = "display-edif-block";
2003 	length = BOP_GETPROPLEN(bootops, name);
2004 	if (length > 0 && length < MMU_PAGESIZE) {
2005 		BOP_GETPROP(bootops, name, bop_staging_area);
2006 		(void) ndi_prop_update_byte_array(DDI_DEV_T_NONE,
2007 		    devi, name, bop_staging_area, length);
2008 	}
2009 
2010 	/*
2011 	 * kdmconfig is also looking for display-type and
2012 	 * video-adapter-type. We default to color and svga.
2013 	 *
2014 	 * Could it be "monochrome", "vga"?
2015 	 * Nah, you've got to come to the 21st century...
2016 	 * And you can set monitor type manually in kdmconfig
2017 	 * if you are really an old junky.
2018 	 */
2019 	(void) ndi_prop_update_string(DDI_DEV_T_NONE,
2020 	    devi, "display-type", "color");
2021 	(void) ndi_prop_update_string(DDI_DEV_T_NONE,
2022 	    devi, "video-adapter-type", "svga");
2023 
2024 	name = "display-edif-id";
2025 	length = BOP_GETPROPLEN(bootops, name);
2026 	if (length > 0 && length < MMU_PAGESIZE) {
2027 		BOP_GETPROP(bootops, name, bop_staging_area);
2028 		copy_boot_str(bop_staging_area, property_val, length);
2029 		(void) ndi_prop_update_string(DDI_DEV_T_NONE,
2030 		    devi, name, property_val);
2031 	}
2032 
2033 	kmem_free(bop_staging_area, MMU_PAGESIZE);
2034 }
2035 
2036 /*
2037  * Copy console font to kernel memory. The temporary font setup
2038  * to use font module was done in early console setup, using low
2039  * memory and data from font module. Now we need to allocate
2040  * kernel memory and copy data over, so the low memory can be freed.
2041  * We can have at most one entry in font list from early boot.
2042  */
2043 static void
2044 get_console_font(void)
2045 {
2046 	struct fontlist *fp, *fl;
2047 	bitmap_data_t *bd;
2048 	struct font *fd, *tmp;
2049 	int i;
2050 
2051 	if (STAILQ_EMPTY(&fonts))
2052 		return;
2053 
2054 	fl = STAILQ_FIRST(&fonts);
2055 	STAILQ_REMOVE_HEAD(&fonts, font_next);
2056 	fp = kmem_zalloc(sizeof (*fp), KM_SLEEP);
2057 	bd = kmem_zalloc(sizeof (*bd), KM_SLEEP);
2058 	fd = kmem_zalloc(sizeof (*fd), KM_SLEEP);
2059 
2060 	fp->font_name = NULL;
2061 	fp->font_flags = FONT_BOOT;
2062 	fp->font_data = bd;
2063 
2064 	bd->width = fl->font_data->width;
2065 	bd->height = fl->font_data->height;
2066 	bd->uncompressed_size = fl->font_data->uncompressed_size;
2067 	bd->font = fd;
2068 
2069 	tmp = fl->font_data->font;
2070 	fd->vf_width = tmp->vf_width;
2071 	fd->vf_height = tmp->vf_height;
2072 	for (i = 0; i < VFNT_MAPS; i++) {
2073 		if (tmp->vf_map_count[i] == 0)
2074 			continue;
2075 		fd->vf_map_count[i] = tmp->vf_map_count[i];
2076 		fd->vf_map[i] = kmem_alloc(fd->vf_map_count[i] *
2077 		    sizeof (*fd->vf_map[i]), KM_SLEEP);
2078 		bcopy(tmp->vf_map[i], fd->vf_map[i], fd->vf_map_count[i] *
2079 		    sizeof (*fd->vf_map[i]));
2080 	}
2081 	fd->vf_bytes = kmem_alloc(bd->uncompressed_size, KM_SLEEP);
2082 	bcopy(tmp->vf_bytes, fd->vf_bytes, bd->uncompressed_size);
2083 	STAILQ_INSERT_HEAD(&fonts, fp, font_next);
2084 }
2085 
2086 /*
2087  * This is temporary, but absolutely necessary.  If we are being
2088  * booted with a device tree created by the DevConf project's bootconf
2089  * program, then we have device information nodes that reflect
2090  * reality.  At this point in time in the Solaris release schedule, the
2091  * kernel drivers aren't prepared for reality.  They still depend on their
2092  * own ad-hoc interpretations of the properties created when their .conf
2093  * files were interpreted. These drivers use an "ignore-hardware-nodes"
2094  * property to prevent them from using the nodes passed up from the bootconf
2095  * device tree.
2096  *
2097  * Trying to assemble root file system drivers as we are booting from
2098  * devconf will fail if the kernel driver is basing its name_addr's on the
2099  * psuedo-node device info while the bootpath passed up from bootconf is using
2100  * reality-based name_addrs.  We help the boot along in this case by
2101  * looking at the pre-bootconf bootpath and determining if we would have
2102  * successfully matched if that had been the bootpath we had chosen.
2103  *
2104  * Note that we only even perform this extra check if we've booted
2105  * using bootconf's 1275 compliant bootpath, this is the boot device, and
2106  * we're trying to match the name_addr specified in the 1275 bootpath.
2107  */
2108 
2109 #define	MAXCOMPONENTLEN	32
2110 
2111 int
2112 x86_old_bootpath_name_addr_match(dev_info_t *cdip, char *caddr, char *naddr)
2113 {
2114 	/*
2115 	 *  There are multiple criteria to be met before we can even
2116 	 *  consider allowing a name_addr match here.
2117 	 *
2118 	 *  1) We must have been booted such that the bootconf program
2119 	 *	created device tree nodes and properties.  This can be
2120 	 *	determined by examining the 'bootpath' property.  This
2121 	 *	property will be a non-null string iff bootconf was
2122 	 *	involved in the boot.
2123 	 *
2124 	 *  2) The module that we want to match must be the boot device.
2125 	 *
2126 	 *  3) The instance of the module we are thinking of letting be
2127 	 *	our match must be ignoring hardware nodes.
2128 	 *
2129 	 *  4) The name_addr we want to match must be the name_addr
2130 	 *	specified in the 1275 bootpath.
2131 	 */
2132 	static char bootdev_module[MAXCOMPONENTLEN];
2133 	static char bootdev_oldmod[MAXCOMPONENTLEN];
2134 	static char bootdev_newaddr[MAXCOMPONENTLEN];
2135 	static char bootdev_oldaddr[MAXCOMPONENTLEN];
2136 	static int  quickexit;
2137 
2138 	char *daddr;
2139 	int dlen;
2140 
2141 	char	*lkupname;
2142 	int	rv = DDI_FAILURE;
2143 
2144 	if ((ddi_getlongprop(DDI_DEV_T_ANY, cdip, DDI_PROP_DONTPASS,
2145 	    "devconf-addr", (caddr_t)&daddr, &dlen) == DDI_PROP_SUCCESS) &&
2146 	    (ddi_getprop(DDI_DEV_T_ANY, cdip, DDI_PROP_DONTPASS,
2147 	    "ignore-hardware-nodes", -1) != -1)) {
2148 		if (strcmp(daddr, caddr) == 0) {
2149 			return (DDI_SUCCESS);
2150 		}
2151 	}
2152 
2153 	if (quickexit)
2154 		return (rv);
2155 
2156 	if (bootdev_module[0] == '\0') {
2157 		char *addrp, *eoaddrp;
2158 		char *busp, *modp, *atp;
2159 		char *bp1275, *bp;
2160 		int  bp1275len, bplen;
2161 
2162 		bp1275 = bp = addrp = eoaddrp = busp = modp = atp = NULL;
2163 
2164 		if (ddi_getlongprop(DDI_DEV_T_ANY,
2165 		    ddi_root_node(), 0, "bootpath",
2166 		    (caddr_t)&bp1275, &bp1275len) != DDI_PROP_SUCCESS ||
2167 		    bp1275len <= 1) {
2168 			/*
2169 			 * We didn't boot from bootconf so we never need to
2170 			 * do any special matches.
2171 			 */
2172 			quickexit = 1;
2173 			if (bp1275)
2174 				kmem_free(bp1275, bp1275len);
2175 			return (rv);
2176 		}
2177 
2178 		if (ddi_getlongprop(DDI_DEV_T_ANY,
2179 		    ddi_root_node(), 0, "boot-path",
2180 		    (caddr_t)&bp, &bplen) != DDI_PROP_SUCCESS || bplen <= 1) {
2181 			/*
2182 			 * No fallback position for matching. This is
2183 			 * certainly unexpected, but we'll handle it
2184 			 * just in case.
2185 			 */
2186 			quickexit = 1;
2187 			kmem_free(bp1275, bp1275len);
2188 			if (bp)
2189 				kmem_free(bp, bplen);
2190 			return (rv);
2191 		}
2192 
2193 		/*
2194 		 *  Determine boot device module and 1275 name_addr
2195 		 *
2196 		 *  bootpath assumed to be of the form /bus/module@name_addr
2197 		 */
2198 		if (busp = strchr(bp1275, '/')) {
2199 			if (modp = strchr(busp + 1, '/')) {
2200 				if (atp = strchr(modp + 1, '@')) {
2201 					*atp = '\0';
2202 					addrp = atp + 1;
2203 					if (eoaddrp = strchr(addrp, '/'))
2204 						*eoaddrp = '\0';
2205 				}
2206 			}
2207 		}
2208 
2209 		if (modp && addrp) {
2210 			(void) strncpy(bootdev_module, modp + 1,
2211 			    MAXCOMPONENTLEN);
2212 			bootdev_module[MAXCOMPONENTLEN - 1] = '\0';
2213 
2214 			(void) strncpy(bootdev_newaddr, addrp, MAXCOMPONENTLEN);
2215 			bootdev_newaddr[MAXCOMPONENTLEN - 1] = '\0';
2216 		} else {
2217 			quickexit = 1;
2218 			kmem_free(bp1275, bp1275len);
2219 			kmem_free(bp, bplen);
2220 			return (rv);
2221 		}
2222 
2223 		/*
2224 		 *  Determine fallback name_addr
2225 		 *
2226 		 *  10/3/96 - Also save fallback module name because it
2227 		 *  might actually be different than the current module
2228 		 *  name.  E.G., ISA pnp drivers have new names.
2229 		 *
2230 		 *  bootpath assumed to be of the form /bus/module@name_addr
2231 		 */
2232 		addrp = NULL;
2233 		if (busp = strchr(bp, '/')) {
2234 			if (modp = strchr(busp + 1, '/')) {
2235 				if (atp = strchr(modp + 1, '@')) {
2236 					*atp = '\0';
2237 					addrp = atp + 1;
2238 					if (eoaddrp = strchr(addrp, '/'))
2239 						*eoaddrp = '\0';
2240 				}
2241 			}
2242 		}
2243 
2244 		if (modp && addrp) {
2245 			(void) strncpy(bootdev_oldmod, modp + 1,
2246 			    MAXCOMPONENTLEN);
2247 			bootdev_module[MAXCOMPONENTLEN - 1] = '\0';
2248 
2249 			(void) strncpy(bootdev_oldaddr, addrp, MAXCOMPONENTLEN);
2250 			bootdev_oldaddr[MAXCOMPONENTLEN - 1] = '\0';
2251 		}
2252 
2253 		/* Free up the bootpath storage now that we're done with it. */
2254 		kmem_free(bp1275, bp1275len);
2255 		kmem_free(bp, bplen);
2256 
2257 		if (bootdev_oldaddr[0] == '\0') {
2258 			quickexit = 1;
2259 			return (rv);
2260 		}
2261 	}
2262 
2263 	if (((lkupname = ddi_get_name(cdip)) != NULL) &&
2264 	    (strcmp(bootdev_module, lkupname) == 0 ||
2265 	    strcmp(bootdev_oldmod, lkupname) == 0) &&
2266 	    ((ddi_getprop(DDI_DEV_T_ANY, cdip, DDI_PROP_DONTPASS,
2267 	    "ignore-hardware-nodes", -1) != -1) ||
2268 	    ignore_hardware_nodes) &&
2269 	    strcmp(bootdev_newaddr, caddr) == 0 &&
2270 	    strcmp(bootdev_oldaddr, naddr) == 0) {
2271 		rv = DDI_SUCCESS;
2272 	}
2273 
2274 	return (rv);
2275 }
2276 
2277 /*
2278  * Perform a copy from a memory mapped device (whose devinfo pointer is devi)
2279  * separately mapped at devaddr in the kernel to a kernel buffer at kaddr.
2280  */
2281 /*ARGSUSED*/
2282 int
2283 e_ddi_copyfromdev(dev_info_t *devi,
2284     off_t off, const void *devaddr, void *kaddr, size_t len)
2285 {
2286 	bcopy(devaddr, kaddr, len);
2287 	return (0);
2288 }
2289 
2290 /*
2291  * Perform a copy to a memory mapped device (whose devinfo pointer is devi)
2292  * separately mapped at devaddr in the kernel from a kernel buffer at kaddr.
2293  */
2294 /*ARGSUSED*/
2295 int
2296 e_ddi_copytodev(dev_info_t *devi,
2297     off_t off, const void *kaddr, void *devaddr, size_t len)
2298 {
2299 	bcopy(kaddr, devaddr, len);
2300 	return (0);
2301 }
2302 
2303 
2304 static int
2305 poke_mem(peekpoke_ctlops_t *in_args)
2306 {
2307 	int err = DDI_SUCCESS;
2308 	on_trap_data_t otd;
2309 
2310 	/* Set up protected environment. */
2311 	if (!on_trap(&otd, OT_DATA_ACCESS)) {
2312 		switch (in_args->size) {
2313 		case sizeof (uint8_t):
2314 			*(uint8_t *)(in_args->dev_addr) =
2315 			    *(uint8_t *)in_args->host_addr;
2316 			break;
2317 
2318 		case sizeof (uint16_t):
2319 			*(uint16_t *)(in_args->dev_addr) =
2320 			    *(uint16_t *)in_args->host_addr;
2321 			break;
2322 
2323 		case sizeof (uint32_t):
2324 			*(uint32_t *)(in_args->dev_addr) =
2325 			    *(uint32_t *)in_args->host_addr;
2326 			break;
2327 
2328 		case sizeof (uint64_t):
2329 			*(uint64_t *)(in_args->dev_addr) =
2330 			    *(uint64_t *)in_args->host_addr;
2331 			break;
2332 
2333 		default:
2334 			err = DDI_FAILURE;
2335 			break;
2336 		}
2337 	} else
2338 		err = DDI_FAILURE;
2339 
2340 	/* Take down protected environment. */
2341 	no_trap();
2342 
2343 	return (err);
2344 }
2345 
2346 
2347 static int
2348 peek_mem(peekpoke_ctlops_t *in_args)
2349 {
2350 	int err = DDI_SUCCESS;
2351 	on_trap_data_t otd;
2352 
2353 	if (!on_trap(&otd, OT_DATA_ACCESS)) {
2354 		switch (in_args->size) {
2355 		case sizeof (uint8_t):
2356 			*(uint8_t *)in_args->host_addr =
2357 			    *(uint8_t *)in_args->dev_addr;
2358 			break;
2359 
2360 		case sizeof (uint16_t):
2361 			*(uint16_t *)in_args->host_addr =
2362 			    *(uint16_t *)in_args->dev_addr;
2363 			break;
2364 
2365 		case sizeof (uint32_t):
2366 			*(uint32_t *)in_args->host_addr =
2367 			    *(uint32_t *)in_args->dev_addr;
2368 			break;
2369 
2370 		case sizeof (uint64_t):
2371 			*(uint64_t *)in_args->host_addr =
2372 			    *(uint64_t *)in_args->dev_addr;
2373 			break;
2374 
2375 		default:
2376 			err = DDI_FAILURE;
2377 			break;
2378 		}
2379 	} else
2380 		err = DDI_FAILURE;
2381 
2382 	no_trap();
2383 	return (err);
2384 }
2385 
2386 
2387 /*
2388  * This is called only to process peek/poke when the DIP is NULL.
2389  * Assume that this is for memory, as nexi take care of device safe accesses.
2390  */
2391 int
2392 peekpoke_mem(ddi_ctl_enum_t cmd, peekpoke_ctlops_t *in_args)
2393 {
2394 	return (cmd == DDI_CTLOPS_PEEK ? peek_mem(in_args) : poke_mem(in_args));
2395 }
2396 
2397 /*
2398  * we've just done a cautious put/get. Check if it was successful by
2399  * calling pci_ereport_post() on all puts and for any gets that return -1
2400  */
2401 static int
2402 pci_peekpoke_check_fma(dev_info_t *dip, void *arg, ddi_ctl_enum_t ctlop,
2403     void (*scan)(dev_info_t *, ddi_fm_error_t *))
2404 {
2405 	int	rval = DDI_SUCCESS;
2406 	peekpoke_ctlops_t *in_args = (peekpoke_ctlops_t *)arg;
2407 	ddi_fm_error_t de;
2408 	ddi_acc_impl_t *hp = (ddi_acc_impl_t *)in_args->handle;
2409 	ddi_acc_hdl_t *hdlp = (ddi_acc_hdl_t *)in_args->handle;
2410 	int check_err = 0;
2411 	int repcount = in_args->repcount;
2412 
2413 	if (ctlop == DDI_CTLOPS_POKE &&
2414 	    hdlp->ah_acc.devacc_attr_access != DDI_CAUTIOUS_ACC)
2415 		return (DDI_SUCCESS);
2416 
2417 	if (ctlop == DDI_CTLOPS_PEEK &&
2418 	    hdlp->ah_acc.devacc_attr_access != DDI_CAUTIOUS_ACC) {
2419 		for (; repcount; repcount--) {
2420 			switch (in_args->size) {
2421 			case sizeof (uint8_t):
2422 				if (*(uint8_t *)in_args->host_addr == 0xff)
2423 					check_err = 1;
2424 				break;
2425 			case sizeof (uint16_t):
2426 				if (*(uint16_t *)in_args->host_addr == 0xffff)
2427 					check_err = 1;
2428 				break;
2429 			case sizeof (uint32_t):
2430 				if (*(uint32_t *)in_args->host_addr ==
2431 				    0xffffffff)
2432 					check_err = 1;
2433 				break;
2434 			case sizeof (uint64_t):
2435 				if (*(uint64_t *)in_args->host_addr ==
2436 				    0xffffffffffffffff)
2437 					check_err = 1;
2438 				break;
2439 			}
2440 		}
2441 		if (check_err == 0)
2442 			return (DDI_SUCCESS);
2443 	}
2444 	/*
2445 	 * for a cautious put or get or a non-cautious get that returned -1 call
2446 	 * io framework to see if there really was an error
2447 	 */
2448 	bzero(&de, sizeof (ddi_fm_error_t));
2449 	de.fme_version = DDI_FME_VERSION;
2450 	de.fme_ena = fm_ena_generate(0, FM_ENA_FMT1);
2451 	if (hdlp->ah_acc.devacc_attr_access == DDI_CAUTIOUS_ACC) {
2452 		de.fme_flag = DDI_FM_ERR_EXPECTED;
2453 		de.fme_acc_handle = in_args->handle;
2454 	} else if (hdlp->ah_acc.devacc_attr_access == DDI_DEFAULT_ACC) {
2455 		/*
2456 		 * We only get here with DDI_DEFAULT_ACC for config space gets.
2457 		 * Non-hardened drivers may be probing the hardware and
2458 		 * expecting -1 returned. So need to treat errors on
2459 		 * DDI_DEFAULT_ACC as DDI_FM_ERR_EXPECTED.
2460 		 */
2461 		de.fme_flag = DDI_FM_ERR_EXPECTED;
2462 		de.fme_acc_handle = in_args->handle;
2463 	} else {
2464 		/*
2465 		 * Hardened driver doing protected accesses shouldn't
2466 		 * get errors unless there's a hardware problem. Treat
2467 		 * as nonfatal if there's an error, but set UNEXPECTED
2468 		 * so we raise ereports on any errors and potentially
2469 		 * fault the device
2470 		 */
2471 		de.fme_flag = DDI_FM_ERR_UNEXPECTED;
2472 	}
2473 	(void) scan(dip, &de);
2474 	if (hdlp->ah_acc.devacc_attr_access != DDI_DEFAULT_ACC &&
2475 	    de.fme_status != DDI_FM_OK) {
2476 		ndi_err_t *errp = (ndi_err_t *)hp->ahi_err;
2477 		rval = DDI_FAILURE;
2478 		errp->err_ena = de.fme_ena;
2479 		errp->err_expected = de.fme_flag;
2480 		errp->err_status = DDI_FM_NONFATAL;
2481 	}
2482 	return (rval);
2483 }
2484 
2485 /*
2486  * pci_peekpoke_check_nofma() is for when an error occurs on a register access
2487  * during pci_ereport_post(). We can't call pci_ereport_post() again or we'd
2488  * recurse, so assume all puts are OK and gets have failed if they return -1
2489  */
2490 static int
2491 pci_peekpoke_check_nofma(void *arg, ddi_ctl_enum_t ctlop)
2492 {
2493 	int rval = DDI_SUCCESS;
2494 	peekpoke_ctlops_t *in_args = (peekpoke_ctlops_t *)arg;
2495 	ddi_acc_impl_t *hp = (ddi_acc_impl_t *)in_args->handle;
2496 	ddi_acc_hdl_t *hdlp = (ddi_acc_hdl_t *)in_args->handle;
2497 	int repcount = in_args->repcount;
2498 
2499 	if (ctlop == DDI_CTLOPS_POKE)
2500 		return (rval);
2501 
2502 	for (; repcount; repcount--) {
2503 		switch (in_args->size) {
2504 		case sizeof (uint8_t):
2505 			if (*(uint8_t *)in_args->host_addr == 0xff)
2506 				rval = DDI_FAILURE;
2507 			break;
2508 		case sizeof (uint16_t):
2509 			if (*(uint16_t *)in_args->host_addr == 0xffff)
2510 				rval = DDI_FAILURE;
2511 			break;
2512 		case sizeof (uint32_t):
2513 			if (*(uint32_t *)in_args->host_addr == 0xffffffff)
2514 				rval = DDI_FAILURE;
2515 			break;
2516 		case sizeof (uint64_t):
2517 			if (*(uint64_t *)in_args->host_addr ==
2518 			    0xffffffffffffffff)
2519 				rval = DDI_FAILURE;
2520 			break;
2521 		}
2522 	}
2523 	if (hdlp->ah_acc.devacc_attr_access != DDI_DEFAULT_ACC &&
2524 	    rval == DDI_FAILURE) {
2525 		ndi_err_t *errp = (ndi_err_t *)hp->ahi_err;
2526 		errp->err_ena = fm_ena_generate(0, FM_ENA_FMT1);
2527 		errp->err_expected = DDI_FM_ERR_UNEXPECTED;
2528 		errp->err_status = DDI_FM_NONFATAL;
2529 	}
2530 	return (rval);
2531 }
2532 
2533 int
2534 pci_peekpoke_check(dev_info_t *dip, dev_info_t *rdip,
2535     ddi_ctl_enum_t ctlop, void *arg, void *result,
2536     int (*handler)(dev_info_t *, dev_info_t *, ddi_ctl_enum_t, void *,
2537     void *), kmutex_t *err_mutexp, kmutex_t *peek_poke_mutexp,
2538     void (*scan)(dev_info_t *, ddi_fm_error_t *))
2539 {
2540 	int rval;
2541 	peekpoke_ctlops_t *in_args = (peekpoke_ctlops_t *)arg;
2542 	ddi_acc_impl_t *hp = (ddi_acc_impl_t *)in_args->handle;
2543 
2544 	/*
2545 	 * this function only supports cautious accesses, not peeks/pokes
2546 	 * which don't have a handle
2547 	 */
2548 	if (hp == NULL)
2549 		return (DDI_FAILURE);
2550 
2551 	if (hp->ahi_acc_attr & DDI_ACCATTR_CONFIG_SPACE) {
2552 		if (!mutex_tryenter(err_mutexp)) {
2553 			/*
2554 			 * As this may be a recursive call from within
2555 			 * pci_ereport_post() we can't wait for the mutexes.
2556 			 * Fortunately we know someone is already calling
2557 			 * pci_ereport_post() which will handle the error bits
2558 			 * for us, and as this is a config space access we can
2559 			 * just do the access and check return value for -1
2560 			 * using pci_peekpoke_check_nofma().
2561 			 */
2562 			rval = handler(dip, rdip, ctlop, arg, result);
2563 			if (rval == DDI_SUCCESS)
2564 				rval = pci_peekpoke_check_nofma(arg, ctlop);
2565 			return (rval);
2566 		}
2567 		/*
2568 		 * This can't be a recursive call. Drop the err_mutex and get
2569 		 * both mutexes in the right order. If an error hasn't already
2570 		 * been detected by the ontrap code, use pci_peekpoke_check_fma
2571 		 * which will call pci_ereport_post() to check error status.
2572 		 */
2573 		mutex_exit(err_mutexp);
2574 	}
2575 	mutex_enter(peek_poke_mutexp);
2576 	rval = handler(dip, rdip, ctlop, arg, result);
2577 	if (rval == DDI_SUCCESS) {
2578 		mutex_enter(err_mutexp);
2579 		rval = pci_peekpoke_check_fma(dip, arg, ctlop, scan);
2580 		mutex_exit(err_mutexp);
2581 	}
2582 	mutex_exit(peek_poke_mutexp);
2583 	return (rval);
2584 }
2585 
2586 void
2587 impl_setup_ddi(void)
2588 {
2589 #if !defined(__xpv)
2590 	extern void startup_bios_disk(void);
2591 	extern int post_fastreboot;
2592 #endif
2593 	dev_info_t *xdip, *isa_dip;
2594 	rd_existing_t rd_mem_prop;
2595 	int err;
2596 
2597 	ndi_devi_alloc_sleep(ddi_root_node(), "ramdisk",
2598 	    (pnode_t)DEVI_SID_NODEID, &xdip);
2599 
2600 	(void) BOP_GETPROP(bootops,
2601 	    "ramdisk_start", (void *)&ramdisk_start);
2602 	(void) BOP_GETPROP(bootops,
2603 	    "ramdisk_end", (void *)&ramdisk_end);
2604 
2605 #ifdef __xpv
2606 	ramdisk_start -= ONE_GIG;
2607 	ramdisk_end -= ONE_GIG;
2608 #endif
2609 	rd_mem_prop.phys = ramdisk_start;
2610 	rd_mem_prop.size = ramdisk_end - ramdisk_start + 1;
2611 
2612 	(void) ndi_prop_update_byte_array(DDI_DEV_T_NONE, xdip,
2613 	    RD_EXISTING_PROP_NAME, (uchar_t *)&rd_mem_prop,
2614 	    sizeof (rd_mem_prop));
2615 	err = ndi_devi_bind_driver(xdip, 0);
2616 	ASSERT(err == 0);
2617 
2618 	/* isa node */
2619 	if (pseudo_isa) {
2620 		ndi_devi_alloc_sleep(ddi_root_node(), "isa",
2621 		    (pnode_t)DEVI_SID_NODEID, &isa_dip);
2622 		(void) ndi_prop_update_string(DDI_DEV_T_NONE, isa_dip,
2623 		    "device_type", "isa");
2624 		(void) ndi_prop_update_string(DDI_DEV_T_NONE, isa_dip,
2625 		    "bus-type", "isa");
2626 		(void) ndi_devi_bind_driver(isa_dip, 0);
2627 	}
2628 
2629 	/*
2630 	 * Read in the properties from the boot.
2631 	 */
2632 	get_boot_properties();
2633 
2634 	/* not framebuffer should be enumerated, if present */
2635 	get_vga_properties();
2636 
2637 	/* Copy console font if provided by boot. */
2638 	get_console_font();
2639 
2640 	/*
2641 	 * Check for administratively disabled drivers.
2642 	 */
2643 	check_driver_disable();
2644 
2645 #if !defined(__xpv)
2646 	if (!post_fastreboot && BOP_GETPROPLEN(bootops, "efi-systab") < 0)
2647 		startup_bios_disk();
2648 #endif
2649 	/* do bus dependent probes. */
2650 	impl_bus_initialprobe();
2651 }
2652 
2653 dev_t
2654 getrootdev(void)
2655 {
2656 	/*
2657 	 * Usually rootfs.bo_name is initialized by the
2658 	 * the bootpath property from bootenv.rc, but
2659 	 * defaults to "/ramdisk:a" otherwise.
2660 	 */
2661 	return (ddi_pathname_to_dev_t(rootfs.bo_name));
2662 }
2663 
2664 static struct bus_probe {
2665 	struct bus_probe *next;
2666 	void (*probe)(int);
2667 } *bus_probes;
2668 
2669 void
2670 impl_bus_add_probe(void (*func)(int))
2671 {
2672 	struct bus_probe *probe;
2673 	struct bus_probe *lastprobe = NULL;
2674 
2675 	probe = kmem_alloc(sizeof (*probe), KM_SLEEP);
2676 	probe->probe = func;
2677 	probe->next = NULL;
2678 
2679 	if (!bus_probes) {
2680 		bus_probes = probe;
2681 		return;
2682 	}
2683 
2684 	lastprobe = bus_probes;
2685 	while (lastprobe->next)
2686 		lastprobe = lastprobe->next;
2687 	lastprobe->next = probe;
2688 }
2689 
2690 /*ARGSUSED*/
2691 void
2692 impl_bus_delete_probe(void (*func)(int))
2693 {
2694 	struct bus_probe *prev = NULL;
2695 	struct bus_probe *probe = bus_probes;
2696 
2697 	while (probe) {
2698 		if (probe->probe == func)
2699 			break;
2700 		prev = probe;
2701 		probe = probe->next;
2702 	}
2703 
2704 	if (probe == NULL)
2705 		return;
2706 
2707 	if (prev)
2708 		prev->next = probe->next;
2709 	else
2710 		bus_probes = probe->next;
2711 
2712 	kmem_free(probe, sizeof (struct bus_probe));
2713 }
2714 
2715 /*
2716  * impl_bus_initialprobe
2717  *	Modload the prom simulator, then let it probe to verify existence
2718  *	and type of PCI support.
2719  */
2720 static void
2721 impl_bus_initialprobe(void)
2722 {
2723 	struct bus_probe *probe;
2724 
2725 	/* load modules to install bus probes */
2726 #if defined(__xpv)
2727 	if (DOMAIN_IS_INITDOMAIN(xen_info)) {
2728 		if (modload("misc", "pci_autoconfig") < 0) {
2729 			panic("failed to load misc/pci_autoconfig");
2730 		}
2731 
2732 		if (modload("drv", "isa") < 0)
2733 			panic("failed to load drv/isa");
2734 	}
2735 
2736 	(void) modload("misc", "xpv_autoconfig");
2737 #else
2738 	if (modload("misc", "pci_autoconfig") < 0) {
2739 		panic("failed to load misc/pci_autoconfig");
2740 	}
2741 
2742 	(void) modload("misc", "acpidev");
2743 
2744 	if (modload("drv", "isa") < 0)
2745 		panic("failed to load drv/isa");
2746 #endif
2747 
2748 	probe = bus_probes;
2749 	while (probe) {
2750 		/* run the probe functions */
2751 		(*probe->probe)(0);
2752 		probe = probe->next;
2753 	}
2754 }
2755 
2756 /*
2757  * impl_bus_reprobe
2758  *	Reprogram devices not set up by firmware.
2759  */
2760 static void
2761 impl_bus_reprobe(void)
2762 {
2763 	struct bus_probe *probe;
2764 
2765 	probe = bus_probes;
2766 	while (probe) {
2767 		/* run the probe function */
2768 		(*probe->probe)(1);
2769 		probe = probe->next;
2770 	}
2771 }
2772 
2773 
2774 /*
2775  * The following functions ready a cautious request to go up to the nexus
2776  * driver.  It is up to the nexus driver to decide how to process the request.
2777  * It may choose to call i_ddi_do_caut_get/put in this file, or do it
2778  * differently.
2779  */
2780 
2781 static void
2782 i_ddi_caut_getput_ctlops(ddi_acc_impl_t *hp, uint64_t host_addr,
2783     uint64_t dev_addr, size_t size, size_t repcount, uint_t flags,
2784     ddi_ctl_enum_t cmd)
2785 {
2786 	peekpoke_ctlops_t	cautacc_ctlops_arg;
2787 
2788 	cautacc_ctlops_arg.size = size;
2789 	cautacc_ctlops_arg.dev_addr = dev_addr;
2790 	cautacc_ctlops_arg.host_addr = host_addr;
2791 	cautacc_ctlops_arg.handle = (ddi_acc_handle_t)hp;
2792 	cautacc_ctlops_arg.repcount = repcount;
2793 	cautacc_ctlops_arg.flags = flags;
2794 
2795 	(void) ddi_ctlops(hp->ahi_common.ah_dip, hp->ahi_common.ah_dip, cmd,
2796 	    &cautacc_ctlops_arg, NULL);
2797 }
2798 
2799 uint8_t
2800 i_ddi_caut_get8(ddi_acc_impl_t *hp, uint8_t *addr)
2801 {
2802 	uint8_t value;
2803 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)&value, (uintptr_t)addr,
2804 	    sizeof (uint8_t), 1, 0, DDI_CTLOPS_PEEK);
2805 
2806 	return (value);
2807 }
2808 
2809 uint16_t
2810 i_ddi_caut_get16(ddi_acc_impl_t *hp, uint16_t *addr)
2811 {
2812 	uint16_t value;
2813 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)&value, (uintptr_t)addr,
2814 	    sizeof (uint16_t), 1, 0, DDI_CTLOPS_PEEK);
2815 
2816 	return (value);
2817 }
2818 
2819 uint32_t
2820 i_ddi_caut_get32(ddi_acc_impl_t *hp, uint32_t *addr)
2821 {
2822 	uint32_t value;
2823 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)&value, (uintptr_t)addr,
2824 	    sizeof (uint32_t), 1, 0, DDI_CTLOPS_PEEK);
2825 
2826 	return (value);
2827 }
2828 
2829 uint64_t
2830 i_ddi_caut_get64(ddi_acc_impl_t *hp, uint64_t *addr)
2831 {
2832 	uint64_t value;
2833 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)&value, (uintptr_t)addr,
2834 	    sizeof (uint64_t), 1, 0, DDI_CTLOPS_PEEK);
2835 
2836 	return (value);
2837 }
2838 
2839 void
2840 i_ddi_caut_put8(ddi_acc_impl_t *hp, uint8_t *addr, uint8_t value)
2841 {
2842 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)&value, (uintptr_t)addr,
2843 	    sizeof (uint8_t), 1, 0, DDI_CTLOPS_POKE);
2844 }
2845 
2846 void
2847 i_ddi_caut_put16(ddi_acc_impl_t *hp, uint16_t *addr, uint16_t value)
2848 {
2849 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)&value, (uintptr_t)addr,
2850 	    sizeof (uint16_t), 1, 0, DDI_CTLOPS_POKE);
2851 }
2852 
2853 void
2854 i_ddi_caut_put32(ddi_acc_impl_t *hp, uint32_t *addr, uint32_t value)
2855 {
2856 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)&value, (uintptr_t)addr,
2857 	    sizeof (uint32_t), 1, 0, DDI_CTLOPS_POKE);
2858 }
2859 
2860 void
2861 i_ddi_caut_put64(ddi_acc_impl_t *hp, uint64_t *addr, uint64_t value)
2862 {
2863 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)&value, (uintptr_t)addr,
2864 	    sizeof (uint64_t), 1, 0, DDI_CTLOPS_POKE);
2865 }
2866 
2867 void
2868 i_ddi_caut_rep_get8(ddi_acc_impl_t *hp, uint8_t *host_addr, uint8_t *dev_addr,
2869     size_t repcount, uint_t flags)
2870 {
2871 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)host_addr, (uintptr_t)dev_addr,
2872 	    sizeof (uint8_t), repcount, flags, DDI_CTLOPS_PEEK);
2873 }
2874 
2875 void
2876 i_ddi_caut_rep_get16(ddi_acc_impl_t *hp, uint16_t *host_addr,
2877     uint16_t *dev_addr, size_t repcount, uint_t flags)
2878 {
2879 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)host_addr, (uintptr_t)dev_addr,
2880 	    sizeof (uint16_t), repcount, flags, DDI_CTLOPS_PEEK);
2881 }
2882 
2883 void
2884 i_ddi_caut_rep_get32(ddi_acc_impl_t *hp, uint32_t *host_addr,
2885     uint32_t *dev_addr, size_t repcount, uint_t flags)
2886 {
2887 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)host_addr, (uintptr_t)dev_addr,
2888 	    sizeof (uint32_t), repcount, flags, DDI_CTLOPS_PEEK);
2889 }
2890 
2891 void
2892 i_ddi_caut_rep_get64(ddi_acc_impl_t *hp, uint64_t *host_addr,
2893     uint64_t *dev_addr, size_t repcount, uint_t flags)
2894 {
2895 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)host_addr, (uintptr_t)dev_addr,
2896 	    sizeof (uint64_t), repcount, flags, DDI_CTLOPS_PEEK);
2897 }
2898 
2899 void
2900 i_ddi_caut_rep_put8(ddi_acc_impl_t *hp, uint8_t *host_addr, uint8_t *dev_addr,
2901     size_t repcount, uint_t flags)
2902 {
2903 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)host_addr, (uintptr_t)dev_addr,
2904 	    sizeof (uint8_t), repcount, flags, DDI_CTLOPS_POKE);
2905 }
2906 
2907 void
2908 i_ddi_caut_rep_put16(ddi_acc_impl_t *hp, uint16_t *host_addr,
2909     uint16_t *dev_addr, size_t repcount, uint_t flags)
2910 {
2911 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)host_addr, (uintptr_t)dev_addr,
2912 	    sizeof (uint16_t), repcount, flags, DDI_CTLOPS_POKE);
2913 }
2914 
2915 void
2916 i_ddi_caut_rep_put32(ddi_acc_impl_t *hp, uint32_t *host_addr,
2917     uint32_t *dev_addr, size_t repcount, uint_t flags)
2918 {
2919 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)host_addr, (uintptr_t)dev_addr,
2920 	    sizeof (uint32_t), repcount, flags, DDI_CTLOPS_POKE);
2921 }
2922 
2923 void
2924 i_ddi_caut_rep_put64(ddi_acc_impl_t *hp, uint64_t *host_addr,
2925     uint64_t *dev_addr, size_t repcount, uint_t flags)
2926 {
2927 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)host_addr, (uintptr_t)dev_addr,
2928 	    sizeof (uint64_t), repcount, flags, DDI_CTLOPS_POKE);
2929 }
2930 
2931 boolean_t
2932 i_ddi_copybuf_required(ddi_dma_attr_t *attrp)
2933 {
2934 	uint64_t hi_pa;
2935 
2936 	hi_pa = ((uint64_t)physmax + 1ull) << PAGESHIFT;
2937 	if (attrp->dma_attr_addr_hi < hi_pa) {
2938 		return (B_TRUE);
2939 	}
2940 
2941 	return (B_FALSE);
2942 }
2943 
2944 size_t
2945 i_ddi_copybuf_size()
2946 {
2947 	return (dma_max_copybuf_size);
2948 }
2949 
2950 /*
2951  * i_ddi_dma_max()
2952  *    returns the maximum DMA size which can be performed in a single DMA
2953  *    window taking into account the devices DMA contraints (attrp), the
2954  *    maximum copy buffer size (if applicable), and the worse case buffer
2955  *    fragmentation.
2956  */
2957 /*ARGSUSED*/
2958 uint32_t
2959 i_ddi_dma_max(dev_info_t *dip, ddi_dma_attr_t *attrp)
2960 {
2961 	uint64_t maxxfer;
2962 
2963 
2964 	/*
2965 	 * take the min of maxxfer and the the worse case fragementation
2966 	 * (e.g. every cookie <= 1 page)
2967 	 */
2968 	maxxfer = MIN(attrp->dma_attr_maxxfer,
2969 	    ((uint64_t)(attrp->dma_attr_sgllen - 1) << PAGESHIFT));
2970 
2971 	/*
2972 	 * If the DMA engine can't reach all off memory, we also need to take
2973 	 * the max size of the copybuf into consideration.
2974 	 */
2975 	if (i_ddi_copybuf_required(attrp)) {
2976 		maxxfer = MIN(i_ddi_copybuf_size(), maxxfer);
2977 	}
2978 
2979 	/*
2980 	 * we only return a 32-bit value. Make sure it's not -1. Round to a
2981 	 * page so it won't be mistaken for an error value during debug.
2982 	 */
2983 	if (maxxfer >= 0xFFFFFFFF) {
2984 		maxxfer = 0xFFFFF000;
2985 	}
2986 
2987 	/*
2988 	 * make sure the value we return is a whole multiple of the
2989 	 * granlarity.
2990 	 */
2991 	if (attrp->dma_attr_granular > 1) {
2992 		maxxfer = maxxfer - (maxxfer % attrp->dma_attr_granular);
2993 	}
2994 
2995 	return ((uint32_t)maxxfer);
2996 }
2997 
2998 /*ARGSUSED*/
2999 void
3000 translate_devid(dev_info_t *dip)
3001 {
3002 }
3003 
3004 pfn_t
3005 i_ddi_paddr_to_pfn(paddr_t paddr)
3006 {
3007 	pfn_t pfn;
3008 
3009 #ifdef __xpv
3010 	if (DOMAIN_IS_INITDOMAIN(xen_info)) {
3011 		pfn = xen_assign_pfn(mmu_btop(paddr));
3012 	} else {
3013 		pfn = mmu_btop(paddr);
3014 	}
3015 #else
3016 	pfn = mmu_btop(paddr);
3017 #endif
3018 
3019 	return (pfn);
3020 }
3021