xref: /illumos-gate/usr/src/uts/i86pc/os/ddi_impl.c (revision dd891561fb3e50f856d7d730f22a12cc1db51788)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Copyright 2012 Garrett D'Amore <garrett@damore.org>
25  * Copyright 2014 Pluribus Networks, Inc.
26  * Copyright 2016 Nexenta Systems, Inc.
27  */
28 
29 /*
30  * PC specific DDI implementation
31  */
32 #include <sys/types.h>
33 #include <sys/autoconf.h>
34 #include <sys/avintr.h>
35 #include <sys/bootconf.h>
36 #include <sys/conf.h>
37 #include <sys/cpuvar.h>
38 #include <sys/ddi_impldefs.h>
39 #include <sys/ddi_subrdefs.h>
40 #include <sys/ethernet.h>
41 #include <sys/fp.h>
42 #include <sys/instance.h>
43 #include <sys/kmem.h>
44 #include <sys/machsystm.h>
45 #include <sys/modctl.h>
46 #include <sys/promif.h>
47 #include <sys/prom_plat.h>
48 #include <sys/sunndi.h>
49 #include <sys/ndi_impldefs.h>
50 #include <sys/ddi_impldefs.h>
51 #include <sys/sysmacros.h>
52 #include <sys/systeminfo.h>
53 #include <sys/utsname.h>
54 #include <sys/atomic.h>
55 #include <sys/spl.h>
56 #include <sys/archsystm.h>
57 #include <vm/seg_kmem.h>
58 #include <sys/ontrap.h>
59 #include <sys/fm/protocol.h>
60 #include <sys/ramdisk.h>
61 #include <sys/sunndi.h>
62 #include <sys/vmem.h>
63 #include <sys/pci_impl.h>
64 #if defined(__xpv)
65 #include <sys/hypervisor.h>
66 #endif
67 #include <sys/mach_intr.h>
68 #include <vm/hat_i86.h>
69 #include <sys/x86_archext.h>
70 #include <sys/avl.h>
71 
72 /*
73  * DDI Boot Configuration
74  */
75 
76 /*
77  * Platform drivers on this platform
78  */
79 char *platform_module_list[] = {
80 	"acpippm",
81 	"ppm",
82 	(char *)0
83 };
84 
85 /* pci bus resource maps */
86 struct pci_bus_resource *pci_bus_res;
87 
88 size_t dma_max_copybuf_size = 0x101000;		/* 1M + 4K */
89 
90 uint64_t ramdisk_start, ramdisk_end;
91 
92 int pseudo_isa = 0;
93 
94 /*
95  * Forward declarations
96  */
97 static int getlongprop_buf();
98 static void get_boot_properties(void);
99 static void impl_bus_initialprobe(void);
100 static void impl_bus_reprobe(void);
101 
102 static int poke_mem(peekpoke_ctlops_t *in_args);
103 static int peek_mem(peekpoke_ctlops_t *in_args);
104 
105 static int kmem_override_cache_attrs(caddr_t, size_t, uint_t);
106 
107 #if defined(__amd64) && !defined(__xpv)
108 extern void immu_init(void);
109 #endif
110 
111 /*
112  * We use an AVL tree to store contiguous address allocations made with the
113  * kalloca() routine, so that we can return the size to free with kfreea().
114  * Note that in the future it would be vastly faster if we could eliminate
115  * this lookup by insisting that all callers keep track of their own sizes,
116  * just as for kmem_alloc().
117  */
118 struct ctgas {
119 	avl_node_t ctg_link;
120 	void *ctg_addr;
121 	size_t ctg_size;
122 };
123 
124 static avl_tree_t ctgtree;
125 
126 static kmutex_t		ctgmutex;
127 #define	CTGLOCK()	mutex_enter(&ctgmutex)
128 #define	CTGUNLOCK()	mutex_exit(&ctgmutex)
129 
130 /*
131  * Minimum pfn value of page_t's put on the free list.  This is to simplify
132  * support of ddi dma memory requests which specify small, non-zero addr_lo
133  * values.
134  *
135  * The default value of 2, which corresponds to the only known non-zero addr_lo
136  * value used, means a single page will be sacrificed (pfn typically starts
137  * at 1).  ddiphysmin can be set to 0 to disable. It cannot be set above 0x100
138  * otherwise mp startup panics.
139  */
140 pfn_t	ddiphysmin = 2;
141 
142 static void
143 check_driver_disable(void)
144 {
145 	int proplen = 128;
146 	char *prop_name;
147 	char *drv_name, *propval;
148 	major_t major;
149 
150 	prop_name = kmem_alloc(proplen, KM_SLEEP);
151 	for (major = 0; major < devcnt; major++) {
152 		drv_name = ddi_major_to_name(major);
153 		if (drv_name == NULL)
154 			continue;
155 		(void) snprintf(prop_name, proplen, "disable-%s", drv_name);
156 		if (ddi_prop_lookup_string(DDI_DEV_T_ANY, ddi_root_node(),
157 		    DDI_PROP_DONTPASS, prop_name, &propval) == DDI_SUCCESS) {
158 			if (strcmp(propval, "true") == 0) {
159 				devnamesp[major].dn_flags |= DN_DRIVER_REMOVED;
160 				cmn_err(CE_NOTE, "driver %s disabled",
161 				    drv_name);
162 			}
163 			ddi_prop_free(propval);
164 		}
165 	}
166 	kmem_free(prop_name, proplen);
167 }
168 
169 
170 /*
171  * Configure the hardware on the system.
172  * Called before the rootfs is mounted
173  */
174 void
175 configure(void)
176 {
177 	extern void i_ddi_init_root();
178 
179 #if defined(__i386)
180 	extern int fpu_pentium_fdivbug;
181 #endif	/* __i386 */
182 	extern int fpu_ignored;
183 
184 	/*
185 	 * Determine if an FPU is attached
186 	 */
187 
188 	fpu_probe();
189 
190 #if defined(__i386)
191 	if (fpu_pentium_fdivbug) {
192 		printf("\
193 FP hardware exhibits Pentium floating point divide problem\n");
194 	}
195 #endif	/* __i386 */
196 
197 	if (fpu_ignored) {
198 		printf("FP hardware will not be used\n");
199 	} else if (!fpu_exists) {
200 		printf("No FPU in configuration\n");
201 	}
202 
203 	/*
204 	 * Initialize devices on the machine.
205 	 * Uses configuration tree built by the PROMs to determine what
206 	 * is present, and builds a tree of prototype dev_info nodes
207 	 * corresponding to the hardware which identified itself.
208 	 */
209 
210 	/*
211 	 * Initialize root node.
212 	 */
213 	i_ddi_init_root();
214 
215 	/* reprogram devices not set up by firmware (BIOS) */
216 	impl_bus_reprobe();
217 
218 #if defined(__amd64) && !defined(__xpv)
219 	/*
220 	 * Setup but don't startup the IOMMU
221 	 * Startup happens later via a direct call
222 	 * to IOMMU code by boot code.
223 	 * At this point, all PCI bus renumbering
224 	 * is done, so safe to init the IMMU
225 	 * AKA Intel IOMMU.
226 	 */
227 	immu_init();
228 #endif
229 
230 	/*
231 	 * attach the isa nexus to get ACPI resource usage
232 	 * isa is "kind of" a pseudo node
233 	 */
234 #if defined(__xpv)
235 	if (DOMAIN_IS_INITDOMAIN(xen_info)) {
236 		if (pseudo_isa)
237 			(void) i_ddi_attach_pseudo_node("isa");
238 		else
239 			(void) i_ddi_attach_hw_nodes("isa");
240 	}
241 #else
242 	if (pseudo_isa)
243 		(void) i_ddi_attach_pseudo_node("isa");
244 	else
245 		(void) i_ddi_attach_hw_nodes("isa");
246 #endif
247 }
248 
249 /*
250  * The "status" property indicates the operational status of a device.
251  * If this property is present, the value is a string indicating the
252  * status of the device as follows:
253  *
254  *	"okay"		operational.
255  *	"disabled"	not operational, but might become operational.
256  *	"fail"		not operational because a fault has been detected,
257  *			and it is unlikely that the device will become
258  *			operational without repair. no additional details
259  *			are available.
260  *	"fail-xxx"	not operational because a fault has been detected,
261  *			and it is unlikely that the device will become
262  *			operational without repair. "xxx" is additional
263  *			human-readable information about the particular
264  *			fault condition that was detected.
265  *
266  * The absence of this property means that the operational status is
267  * unknown or okay.
268  *
269  * This routine checks the status property of the specified device node
270  * and returns 0 if the operational status indicates failure, and 1 otherwise.
271  *
272  * The property may exist on plug-in cards the existed before IEEE 1275-1994.
273  * And, in that case, the property may not even be a string. So we carefully
274  * check for the value "fail", in the beginning of the string, noting
275  * the property length.
276  */
277 int
278 status_okay(int id, char *buf, int buflen)
279 {
280 	char status_buf[OBP_MAXPROPNAME];
281 	char *bufp = buf;
282 	int len = buflen;
283 	int proplen;
284 	static const char *status = "status";
285 	static const char *fail = "fail";
286 	int fail_len = (int)strlen(fail);
287 
288 	/*
289 	 * Get the proplen ... if it's smaller than "fail",
290 	 * or doesn't exist ... then we don't care, since
291 	 * the value can't begin with the char string "fail".
292 	 *
293 	 * NB: proplen, if it's a string, includes the NULL in the
294 	 * the size of the property, and fail_len does not.
295 	 */
296 	proplen = prom_getproplen((pnode_t)id, (caddr_t)status);
297 	if (proplen <= fail_len)	/* nonexistant or uninteresting len */
298 		return (1);
299 
300 	/*
301 	 * if a buffer was provided, use it
302 	 */
303 	if ((buf == (char *)NULL) || (buflen <= 0)) {
304 		bufp = status_buf;
305 		len = sizeof (status_buf);
306 	}
307 	*bufp = (char)0;
308 
309 	/*
310 	 * Get the property into the buffer, to the extent of the buffer,
311 	 * and in case the buffer is smaller than the property size,
312 	 * NULL terminate the buffer. (This handles the case where
313 	 * a buffer was passed in and the caller wants to print the
314 	 * value, but the buffer was too small).
315 	 */
316 	(void) prom_bounded_getprop((pnode_t)id, (caddr_t)status,
317 	    (caddr_t)bufp, len);
318 	*(bufp + len - 1) = (char)0;
319 
320 	/*
321 	 * If the value begins with the char string "fail",
322 	 * then it means the node is failed. We don't care
323 	 * about any other values. We assume the node is ok
324 	 * although it might be 'disabled'.
325 	 */
326 	if (strncmp(bufp, fail, fail_len) == 0)
327 		return (0);
328 
329 	return (1);
330 }
331 
332 /*
333  * Check the status of the device node passed as an argument.
334  *
335  *	if ((status is OKAY) || (status is DISABLED))
336  *		return DDI_SUCCESS
337  *	else
338  *		print a warning and return DDI_FAILURE
339  */
340 /*ARGSUSED1*/
341 int
342 check_status(int id, char *name, dev_info_t *parent)
343 {
344 	char status_buf[64];
345 	char devtype_buf[OBP_MAXPROPNAME];
346 	int retval = DDI_FAILURE;
347 
348 	/*
349 	 * is the status okay?
350 	 */
351 	if (status_okay(id, status_buf, sizeof (status_buf)))
352 		return (DDI_SUCCESS);
353 
354 	/*
355 	 * a status property indicating bad memory will be associated
356 	 * with a node which has a "device_type" property with a value of
357 	 * "memory-controller". in this situation, return DDI_SUCCESS
358 	 */
359 	if (getlongprop_buf(id, OBP_DEVICETYPE, devtype_buf,
360 	    sizeof (devtype_buf)) > 0) {
361 		if (strcmp(devtype_buf, "memory-controller") == 0)
362 			retval = DDI_SUCCESS;
363 	}
364 
365 	/*
366 	 * print the status property information
367 	 */
368 	cmn_err(CE_WARN, "status '%s' for '%s'", status_buf, name);
369 	return (retval);
370 }
371 
372 /*ARGSUSED*/
373 uint_t
374 softlevel1(caddr_t arg1, caddr_t arg2)
375 {
376 	softint();
377 	return (1);
378 }
379 
380 /*
381  * Allow for implementation specific correction of PROM property values.
382  */
383 
384 /*ARGSUSED*/
385 void
386 impl_fix_props(dev_info_t *dip, dev_info_t *ch_dip, char *name, int len,
387     caddr_t buffer)
388 {
389 	/*
390 	 * There are no adjustments needed in this implementation.
391 	 */
392 }
393 
394 static int
395 getlongprop_buf(int id, char *name, char *buf, int maxlen)
396 {
397 	int size;
398 
399 	size = prom_getproplen((pnode_t)id, name);
400 	if (size <= 0 || (size > maxlen - 1))
401 		return (-1);
402 
403 	if (-1 == prom_getprop((pnode_t)id, name, buf))
404 		return (-1);
405 
406 	if (strcmp("name", name) == 0) {
407 		if (buf[size - 1] != '\0') {
408 			buf[size] = '\0';
409 			size += 1;
410 		}
411 	}
412 
413 	return (size);
414 }
415 
416 static int
417 get_prop_int_array(dev_info_t *di, char *pname, int **pval, uint_t *plen)
418 {
419 	int ret;
420 
421 	if ((ret = ddi_prop_lookup_int_array(DDI_DEV_T_ANY, di,
422 	    DDI_PROP_DONTPASS, pname, pval, plen))
423 	    == DDI_PROP_SUCCESS) {
424 		*plen = (*plen) * (sizeof (int));
425 	}
426 	return (ret);
427 }
428 
429 
430 /*
431  * Node Configuration
432  */
433 
434 struct prop_ispec {
435 	uint_t	pri, vec;
436 };
437 
438 /*
439  * For the x86, we're prepared to claim that the interrupt string
440  * is in the form of a list of <ipl,vec> specifications.
441  */
442 
443 #define	VEC_MIN	1
444 #define	VEC_MAX	255
445 
446 static int
447 impl_xlate_intrs(dev_info_t *child, int *in,
448     struct ddi_parent_private_data *pdptr)
449 {
450 	size_t size;
451 	int n;
452 	struct intrspec *new;
453 	caddr_t got_prop;
454 	int *inpri;
455 	int got_len;
456 	extern int ignore_hardware_nodes;	/* force flag from ddi_impl.c */
457 
458 	static char bad_intr_fmt[] =
459 	    "bad interrupt spec from %s%d - ipl %d, irq %d\n";
460 
461 	/*
462 	 * determine if the driver is expecting the new style "interrupts"
463 	 * property which just contains the IRQ, or the old style which
464 	 * contains pairs of <IPL,IRQ>.  if it is the new style, we always
465 	 * assign IPL 5 unless an "interrupt-priorities" property exists.
466 	 * in that case, the "interrupt-priorities" property contains the
467 	 * IPL values that match, one for one, the IRQ values in the
468 	 * "interrupts" property.
469 	 */
470 	inpri = NULL;
471 	if ((ddi_getprop(DDI_DEV_T_ANY, child, DDI_PROP_DONTPASS,
472 	    "ignore-hardware-nodes", -1) != -1) || ignore_hardware_nodes) {
473 		/* the old style "interrupts" property... */
474 
475 		/*
476 		 * The list consists of <ipl,vec> elements
477 		 */
478 		if ((n = (*in++ >> 1)) < 1)
479 			return (DDI_FAILURE);
480 
481 		pdptr->par_nintr = n;
482 		size = n * sizeof (struct intrspec);
483 		new = pdptr->par_intr = kmem_zalloc(size, KM_SLEEP);
484 
485 		while (n--) {
486 			int level = *in++;
487 			int vec = *in++;
488 
489 			if (level < 1 || level > MAXIPL ||
490 			    vec < VEC_MIN || vec > VEC_MAX) {
491 				cmn_err(CE_CONT, bad_intr_fmt,
492 				    DEVI(child)->devi_name,
493 				    DEVI(child)->devi_instance, level, vec);
494 				goto broken;
495 			}
496 			new->intrspec_pri = level;
497 			if (vec != 2)
498 				new->intrspec_vec = vec;
499 			else
500 				/*
501 				 * irq 2 on the PC bus is tied to irq 9
502 				 * on ISA, EISA and MicroChannel
503 				 */
504 				new->intrspec_vec = 9;
505 			new++;
506 		}
507 
508 		return (DDI_SUCCESS);
509 	} else {
510 		/* the new style "interrupts" property... */
511 
512 		/*
513 		 * The list consists of <vec> elements
514 		 */
515 		if ((n = (*in++)) < 1)
516 			return (DDI_FAILURE);
517 
518 		pdptr->par_nintr = n;
519 		size = n * sizeof (struct intrspec);
520 		new = pdptr->par_intr = kmem_zalloc(size, KM_SLEEP);
521 
522 		/* XXX check for "interrupt-priorities" property... */
523 		if (ddi_getlongprop(DDI_DEV_T_ANY, child, DDI_PROP_DONTPASS,
524 		    "interrupt-priorities", (caddr_t)&got_prop, &got_len)
525 		    == DDI_PROP_SUCCESS) {
526 			if (n != (got_len / sizeof (int))) {
527 				cmn_err(CE_CONT,
528 				    "bad interrupt-priorities length"
529 				    " from %s%d: expected %d, got %d\n",
530 				    DEVI(child)->devi_name,
531 				    DEVI(child)->devi_instance, n,
532 				    (int)(got_len / sizeof (int)));
533 				goto broken;
534 			}
535 			inpri = (int *)got_prop;
536 		}
537 
538 		while (n--) {
539 			int level;
540 			int vec = *in++;
541 
542 			if (inpri == NULL)
543 				level = 5;
544 			else
545 				level = *inpri++;
546 
547 			if (level < 1 || level > MAXIPL ||
548 			    vec < VEC_MIN || vec > VEC_MAX) {
549 				cmn_err(CE_CONT, bad_intr_fmt,
550 				    DEVI(child)->devi_name,
551 				    DEVI(child)->devi_instance, level, vec);
552 				goto broken;
553 			}
554 			new->intrspec_pri = level;
555 			if (vec != 2)
556 				new->intrspec_vec = vec;
557 			else
558 				/*
559 				 * irq 2 on the PC bus is tied to irq 9
560 				 * on ISA, EISA and MicroChannel
561 				 */
562 				new->intrspec_vec = 9;
563 			new++;
564 		}
565 
566 		if (inpri != NULL)
567 			kmem_free(got_prop, got_len);
568 		return (DDI_SUCCESS);
569 	}
570 
571 broken:
572 	kmem_free(pdptr->par_intr, size);
573 	pdptr->par_intr = NULL;
574 	pdptr->par_nintr = 0;
575 	if (inpri != NULL)
576 		kmem_free(got_prop, got_len);
577 
578 	return (DDI_FAILURE);
579 }
580 
581 /*
582  * Create a ddi_parent_private_data structure from the ddi properties of
583  * the dev_info node.
584  *
585  * The "reg" and either an "intr" or "interrupts" properties are required
586  * if the driver wishes to create mappings or field interrupts on behalf
587  * of the device.
588  *
589  * The "reg" property is assumed to be a list of at least one triple
590  *
591  *	<bustype, address, size>*1
592  *
593  * The "intr" property is assumed to be a list of at least one duple
594  *
595  *	<SPARC ipl, vector#>*1
596  *
597  * The "interrupts" property is assumed to be a list of at least one
598  * n-tuples that describes the interrupt capabilities of the bus the device
599  * is connected to.  For SBus, this looks like
600  *
601  *	<SBus-level>*1
602  *
603  * (This property obsoletes the 'intr' property).
604  *
605  * The "ranges" property is optional.
606  */
607 void
608 make_ddi_ppd(dev_info_t *child, struct ddi_parent_private_data **ppd)
609 {
610 	struct ddi_parent_private_data *pdptr;
611 	int n;
612 	int *reg_prop, *rng_prop, *intr_prop, *irupts_prop;
613 	uint_t reg_len, rng_len, intr_len, irupts_len;
614 
615 	*ppd = pdptr = kmem_zalloc(sizeof (*pdptr), KM_SLEEP);
616 
617 	/*
618 	 * Handle the 'reg' property.
619 	 */
620 	if ((get_prop_int_array(child, "reg", &reg_prop, &reg_len) ==
621 	    DDI_PROP_SUCCESS) && (reg_len != 0)) {
622 		pdptr->par_nreg = reg_len / (int)sizeof (struct regspec);
623 		pdptr->par_reg = (struct regspec *)reg_prop;
624 	}
625 
626 	/*
627 	 * See if I have a range (adding one where needed - this
628 	 * means to add one for sbus node in sun4c, when romvec > 0,
629 	 * if no range is already defined in the PROM node.
630 	 * (Currently no sun4c PROMS define range properties,
631 	 * but they should and may in the future.)  For the SBus
632 	 * node, the range is defined by the SBus reg property.
633 	 */
634 	if (get_prop_int_array(child, "ranges", &rng_prop, &rng_len)
635 	    == DDI_PROP_SUCCESS) {
636 		pdptr->par_nrng = rng_len / (int)(sizeof (struct rangespec));
637 		pdptr->par_rng = (struct rangespec *)rng_prop;
638 	}
639 
640 	/*
641 	 * Handle the 'intr' and 'interrupts' properties
642 	 */
643 
644 	/*
645 	 * For backwards compatibility
646 	 * we first look for the 'intr' property for the device.
647 	 */
648 	if (get_prop_int_array(child, "intr", &intr_prop, &intr_len)
649 	    != DDI_PROP_SUCCESS) {
650 		intr_len = 0;
651 	}
652 
653 	/*
654 	 * If we're to support bus adapters and future platforms cleanly,
655 	 * we need to support the generalized 'interrupts' property.
656 	 */
657 	if (get_prop_int_array(child, "interrupts", &irupts_prop,
658 	    &irupts_len) != DDI_PROP_SUCCESS) {
659 		irupts_len = 0;
660 	} else if (intr_len != 0) {
661 		/*
662 		 * If both 'intr' and 'interrupts' are defined,
663 		 * then 'interrupts' wins and we toss the 'intr' away.
664 		 */
665 		ddi_prop_free((void *)intr_prop);
666 		intr_len = 0;
667 	}
668 
669 	if (intr_len != 0) {
670 
671 		/*
672 		 * Translate the 'intr' property into an array
673 		 * an array of struct intrspec's.  There's not really
674 		 * very much to do here except copy what's out there.
675 		 */
676 
677 		struct intrspec *new;
678 		struct prop_ispec *l;
679 
680 		n = pdptr->par_nintr = intr_len / sizeof (struct prop_ispec);
681 		l = (struct prop_ispec *)intr_prop;
682 		pdptr->par_intr =
683 		    new = kmem_zalloc(n * sizeof (struct intrspec), KM_SLEEP);
684 		while (n--) {
685 			new->intrspec_pri = l->pri;
686 			new->intrspec_vec = l->vec;
687 			new++;
688 			l++;
689 		}
690 		ddi_prop_free((void *)intr_prop);
691 
692 	} else if ((n = irupts_len) != 0) {
693 		size_t size;
694 		int *out;
695 
696 		/*
697 		 * Translate the 'interrupts' property into an array
698 		 * of intrspecs for the rest of the DDI framework to
699 		 * toy with.  Only our ancestors really know how to
700 		 * do this, so ask 'em.  We massage the 'interrupts'
701 		 * property so that it is pre-pended by a count of
702 		 * the number of integers in the argument.
703 		 */
704 		size = sizeof (int) + n;
705 		out = kmem_alloc(size, KM_SLEEP);
706 		*out = n / sizeof (int);
707 		bcopy(irupts_prop, out + 1, (size_t)n);
708 		ddi_prop_free((void *)irupts_prop);
709 		if (impl_xlate_intrs(child, out, pdptr) != DDI_SUCCESS) {
710 			cmn_err(CE_CONT,
711 			    "Unable to translate 'interrupts' for %s%d\n",
712 			    DEVI(child)->devi_binding_name,
713 			    DEVI(child)->devi_instance);
714 		}
715 		kmem_free(out, size);
716 	}
717 }
718 
719 /*
720  * Name a child
721  */
722 static int
723 impl_sunbus_name_child(dev_info_t *child, char *name, int namelen)
724 {
725 	/*
726 	 * Fill in parent-private data and this function returns to us
727 	 * an indication if it used "registers" to fill in the data.
728 	 */
729 	if (ddi_get_parent_data(child) == NULL) {
730 		struct ddi_parent_private_data *pdptr;
731 		make_ddi_ppd(child, &pdptr);
732 		ddi_set_parent_data(child, pdptr);
733 	}
734 
735 	name[0] = '\0';
736 	if (sparc_pd_getnreg(child) > 0) {
737 		(void) snprintf(name, namelen, "%x,%x",
738 		    (uint_t)sparc_pd_getreg(child, 0)->regspec_bustype,
739 		    (uint_t)sparc_pd_getreg(child, 0)->regspec_addr);
740 	}
741 
742 	return (DDI_SUCCESS);
743 }
744 
745 /*
746  * Called from the bus_ctl op of sunbus (sbus, obio, etc) nexus drivers
747  * to implement the DDI_CTLOPS_INITCHILD operation.  That is, it names
748  * the children of sun busses based on the reg spec.
749  *
750  * Handles the following properties (in make_ddi_ppd):
751  *	Property		value
752  *	  Name			type
753  *	reg		register spec
754  *	intr		old-form interrupt spec
755  *	interrupts	new (bus-oriented) interrupt spec
756  *	ranges		range spec
757  */
758 int
759 impl_ddi_sunbus_initchild(dev_info_t *child)
760 {
761 	char name[MAXNAMELEN];
762 	void impl_ddi_sunbus_removechild(dev_info_t *);
763 
764 	/*
765 	 * Name the child, also makes parent private data
766 	 */
767 	(void) impl_sunbus_name_child(child, name, MAXNAMELEN);
768 	ddi_set_name_addr(child, name);
769 
770 	/*
771 	 * Attempt to merge a .conf node; if successful, remove the
772 	 * .conf node.
773 	 */
774 	if ((ndi_dev_is_persistent_node(child) == 0) &&
775 	    (ndi_merge_node(child, impl_sunbus_name_child) == DDI_SUCCESS)) {
776 		/*
777 		 * Return failure to remove node
778 		 */
779 		impl_ddi_sunbus_removechild(child);
780 		return (DDI_FAILURE);
781 	}
782 	return (DDI_SUCCESS);
783 }
784 
785 void
786 impl_free_ddi_ppd(dev_info_t *dip)
787 {
788 	struct ddi_parent_private_data *pdptr;
789 	size_t n;
790 
791 	if ((pdptr = ddi_get_parent_data(dip)) == NULL)
792 		return;
793 
794 	if ((n = (size_t)pdptr->par_nintr) != 0)
795 		/*
796 		 * Note that kmem_free is used here (instead of
797 		 * ddi_prop_free) because the contents of the
798 		 * property were placed into a separate buffer and
799 		 * mucked with a bit before being stored in par_intr.
800 		 * The actual return value from the prop lookup
801 		 * was freed with ddi_prop_free previously.
802 		 */
803 		kmem_free(pdptr->par_intr, n * sizeof (struct intrspec));
804 
805 	if ((n = (size_t)pdptr->par_nrng) != 0)
806 		ddi_prop_free((void *)pdptr->par_rng);
807 
808 	if ((n = pdptr->par_nreg) != 0)
809 		ddi_prop_free((void *)pdptr->par_reg);
810 
811 	kmem_free(pdptr, sizeof (*pdptr));
812 	ddi_set_parent_data(dip, NULL);
813 }
814 
815 void
816 impl_ddi_sunbus_removechild(dev_info_t *dip)
817 {
818 	impl_free_ddi_ppd(dip);
819 	ddi_set_name_addr(dip, NULL);
820 	/*
821 	 * Strip the node to properly convert it back to prototype form
822 	 */
823 	impl_rem_dev_props(dip);
824 }
825 
826 /*
827  * DDI Interrupt
828  */
829 
830 /*
831  * turn this on to force isa, eisa, and mca device to ignore the new
832  * hardware nodes in the device tree (normally turned on only for
833  * drivers that need it by setting the property "ignore-hardware-nodes"
834  * in their driver.conf file).
835  *
836  * 7/31/96 -- Turned off globally.  Leaving variable in for the moment
837  *		as safety valve.
838  */
839 int ignore_hardware_nodes = 0;
840 
841 /*
842  * Local data
843  */
844 static struct impl_bus_promops *impl_busp;
845 
846 
847 /*
848  * New DDI interrupt framework
849  */
850 
851 /*
852  * i_ddi_intr_ops:
853  *
854  * This is the interrupt operator function wrapper for the bus function
855  * bus_intr_op.
856  */
857 int
858 i_ddi_intr_ops(dev_info_t *dip, dev_info_t *rdip, ddi_intr_op_t op,
859     ddi_intr_handle_impl_t *hdlp, void * result)
860 {
861 	dev_info_t	*pdip = (dev_info_t *)DEVI(dip)->devi_parent;
862 	int		ret = DDI_FAILURE;
863 
864 	/* request parent to process this interrupt op */
865 	if (NEXUS_HAS_INTR_OP(pdip))
866 		ret = (*(DEVI(pdip)->devi_ops->devo_bus_ops->bus_intr_op))(
867 		    pdip, rdip, op, hdlp, result);
868 	else
869 		cmn_err(CE_WARN, "Failed to process interrupt "
870 		    "for %s%d due to down-rev nexus driver %s%d",
871 		    ddi_get_name(rdip), ddi_get_instance(rdip),
872 		    ddi_get_name(pdip), ddi_get_instance(pdip));
873 	return (ret);
874 }
875 
876 /*
877  * i_ddi_add_softint - allocate and add a soft interrupt to the system
878  */
879 int
880 i_ddi_add_softint(ddi_softint_hdl_impl_t *hdlp)
881 {
882 	int ret;
883 
884 	/* add soft interrupt handler */
885 	ret = add_avsoftintr((void *)hdlp, hdlp->ih_pri, hdlp->ih_cb_func,
886 	    DEVI(hdlp->ih_dip)->devi_name, hdlp->ih_cb_arg1, hdlp->ih_cb_arg2);
887 	return (ret ? DDI_SUCCESS : DDI_FAILURE);
888 }
889 
890 
891 void
892 i_ddi_remove_softint(ddi_softint_hdl_impl_t *hdlp)
893 {
894 	(void) rem_avsoftintr((void *)hdlp, hdlp->ih_pri, hdlp->ih_cb_func);
895 }
896 
897 
898 extern void (*setsoftint)(int, struct av_softinfo *);
899 extern boolean_t av_check_softint_pending(struct av_softinfo *, boolean_t);
900 
901 int
902 i_ddi_trigger_softint(ddi_softint_hdl_impl_t *hdlp, void *arg2)
903 {
904 	if (av_check_softint_pending(hdlp->ih_pending, B_FALSE))
905 		return (DDI_EPENDING);
906 
907 	update_avsoftintr_args((void *)hdlp, hdlp->ih_pri, arg2);
908 
909 	(*setsoftint)(hdlp->ih_pri, hdlp->ih_pending);
910 	return (DDI_SUCCESS);
911 }
912 
913 /*
914  * i_ddi_set_softint_pri:
915  *
916  * The way this works is that it first tries to add a softint vector
917  * at the new priority in hdlp. If that succeeds; then it removes the
918  * existing softint vector at the old priority.
919  */
920 int
921 i_ddi_set_softint_pri(ddi_softint_hdl_impl_t *hdlp, uint_t old_pri)
922 {
923 	int ret;
924 
925 	/*
926 	 * If a softint is pending at the old priority then fail the request.
927 	 */
928 	if (av_check_softint_pending(hdlp->ih_pending, B_TRUE))
929 		return (DDI_FAILURE);
930 
931 	ret = av_softint_movepri((void *)hdlp, old_pri);
932 	return (ret ? DDI_SUCCESS : DDI_FAILURE);
933 }
934 
935 void
936 i_ddi_alloc_intr_phdl(ddi_intr_handle_impl_t *hdlp)
937 {
938 	hdlp->ih_private = (void *)kmem_zalloc(sizeof (ihdl_plat_t), KM_SLEEP);
939 }
940 
941 void
942 i_ddi_free_intr_phdl(ddi_intr_handle_impl_t *hdlp)
943 {
944 	kmem_free(hdlp->ih_private, sizeof (ihdl_plat_t));
945 	hdlp->ih_private = NULL;
946 }
947 
948 int
949 i_ddi_get_intx_nintrs(dev_info_t *dip)
950 {
951 	struct ddi_parent_private_data *pdp;
952 
953 	if ((pdp = ddi_get_parent_data(dip)) == NULL)
954 		return (0);
955 
956 	return (pdp->par_nintr);
957 }
958 
959 /*
960  * DDI Memory/DMA
961  */
962 
963 /*
964  * Support for allocating DMAable memory to implement
965  * ddi_dma_mem_alloc(9F) interface.
966  */
967 
968 #define	KA_ALIGN_SHIFT	7
969 #define	KA_ALIGN	(1 << KA_ALIGN_SHIFT)
970 #define	KA_NCACHE	(PAGESHIFT + 1 - KA_ALIGN_SHIFT)
971 
972 /*
973  * Dummy DMA attribute template for kmem_io[].kmem_io_attr.  We only
974  * care about addr_lo, addr_hi, and align.  addr_hi will be dynamically set.
975  */
976 
977 static ddi_dma_attr_t kmem_io_attr = {
978 	DMA_ATTR_V0,
979 	0x0000000000000000ULL,		/* dma_attr_addr_lo */
980 	0x0000000000000000ULL,		/* dma_attr_addr_hi */
981 	0x00ffffff,
982 	0x1000,				/* dma_attr_align */
983 	1, 1, 0xffffffffULL, 0xffffffffULL, 0x1, 1, 0
984 };
985 
986 /* kmem io memory ranges and indices */
987 enum {
988 	IO_4P, IO_64G, IO_4G, IO_2G, IO_1G, IO_512M,
989 	IO_256M, IO_128M, IO_64M, IO_32M, IO_16M, MAX_MEM_RANGES
990 };
991 
992 static struct {
993 	vmem_t		*kmem_io_arena;
994 	kmem_cache_t	*kmem_io_cache[KA_NCACHE];
995 	ddi_dma_attr_t	kmem_io_attr;
996 } kmem_io[MAX_MEM_RANGES];
997 
998 static int kmem_io_idx;		/* index of first populated kmem_io[] */
999 
1000 static page_t *
1001 page_create_io_wrapper(void *addr, size_t len, int vmflag, void *arg)
1002 {
1003 	extern page_t *page_create_io(vnode_t *, u_offset_t, uint_t,
1004 	    uint_t, struct as *, caddr_t, ddi_dma_attr_t *);
1005 
1006 	return (page_create_io(&kvp, (u_offset_t)(uintptr_t)addr, len,
1007 	    PG_EXCL | ((vmflag & VM_NOSLEEP) ? 0 : PG_WAIT), &kas, addr, arg));
1008 }
1009 
1010 #ifdef __xpv
1011 static void
1012 segkmem_free_io(vmem_t *vmp, void * ptr, size_t size)
1013 {
1014 	extern void page_destroy_io(page_t *);
1015 	segkmem_xfree(vmp, ptr, size, page_destroy_io);
1016 }
1017 #endif
1018 
1019 static void *
1020 segkmem_alloc_io_4P(vmem_t *vmp, size_t size, int vmflag)
1021 {
1022 	return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
1023 	    page_create_io_wrapper, &kmem_io[IO_4P].kmem_io_attr));
1024 }
1025 
1026 static void *
1027 segkmem_alloc_io_64G(vmem_t *vmp, size_t size, int vmflag)
1028 {
1029 	return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
1030 	    page_create_io_wrapper, &kmem_io[IO_64G].kmem_io_attr));
1031 }
1032 
1033 static void *
1034 segkmem_alloc_io_4G(vmem_t *vmp, size_t size, int vmflag)
1035 {
1036 	return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
1037 	    page_create_io_wrapper, &kmem_io[IO_4G].kmem_io_attr));
1038 }
1039 
1040 static void *
1041 segkmem_alloc_io_2G(vmem_t *vmp, size_t size, int vmflag)
1042 {
1043 	return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
1044 	    page_create_io_wrapper, &kmem_io[IO_2G].kmem_io_attr));
1045 }
1046 
1047 static void *
1048 segkmem_alloc_io_1G(vmem_t *vmp, size_t size, int vmflag)
1049 {
1050 	return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
1051 	    page_create_io_wrapper, &kmem_io[IO_1G].kmem_io_attr));
1052 }
1053 
1054 static void *
1055 segkmem_alloc_io_512M(vmem_t *vmp, size_t size, int vmflag)
1056 {
1057 	return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
1058 	    page_create_io_wrapper, &kmem_io[IO_512M].kmem_io_attr));
1059 }
1060 
1061 static void *
1062 segkmem_alloc_io_256M(vmem_t *vmp, size_t size, int vmflag)
1063 {
1064 	return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
1065 	    page_create_io_wrapper, &kmem_io[IO_256M].kmem_io_attr));
1066 }
1067 
1068 static void *
1069 segkmem_alloc_io_128M(vmem_t *vmp, size_t size, int vmflag)
1070 {
1071 	return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
1072 	    page_create_io_wrapper, &kmem_io[IO_128M].kmem_io_attr));
1073 }
1074 
1075 static void *
1076 segkmem_alloc_io_64M(vmem_t *vmp, size_t size, int vmflag)
1077 {
1078 	return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
1079 	    page_create_io_wrapper, &kmem_io[IO_64M].kmem_io_attr));
1080 }
1081 
1082 static void *
1083 segkmem_alloc_io_32M(vmem_t *vmp, size_t size, int vmflag)
1084 {
1085 	return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
1086 	    page_create_io_wrapper, &kmem_io[IO_32M].kmem_io_attr));
1087 }
1088 
1089 static void *
1090 segkmem_alloc_io_16M(vmem_t *vmp, size_t size, int vmflag)
1091 {
1092 	return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
1093 	    page_create_io_wrapper, &kmem_io[IO_16M].kmem_io_attr));
1094 }
1095 
1096 struct {
1097 	uint64_t	io_limit;
1098 	char		*io_name;
1099 	void		*(*io_alloc)(vmem_t *, size_t, int);
1100 	int		io_initial;	/* kmem_io_init during startup */
1101 } io_arena_params[MAX_MEM_RANGES] = {
1102 	{0x000fffffffffffffULL,	"kmem_io_4P",	segkmem_alloc_io_4P,	1},
1103 	{0x0000000fffffffffULL,	"kmem_io_64G",	segkmem_alloc_io_64G,	0},
1104 	{0x00000000ffffffffULL,	"kmem_io_4G",	segkmem_alloc_io_4G,	1},
1105 	{0x000000007fffffffULL,	"kmem_io_2G",	segkmem_alloc_io_2G,	1},
1106 	{0x000000003fffffffULL,	"kmem_io_1G",	segkmem_alloc_io_1G,	0},
1107 	{0x000000001fffffffULL,	"kmem_io_512M",	segkmem_alloc_io_512M,	0},
1108 	{0x000000000fffffffULL,	"kmem_io_256M",	segkmem_alloc_io_256M,	0},
1109 	{0x0000000007ffffffULL,	"kmem_io_128M",	segkmem_alloc_io_128M,	0},
1110 	{0x0000000003ffffffULL,	"kmem_io_64M",	segkmem_alloc_io_64M,	0},
1111 	{0x0000000001ffffffULL,	"kmem_io_32M",	segkmem_alloc_io_32M,	0},
1112 	{0x0000000000ffffffULL,	"kmem_io_16M",	segkmem_alloc_io_16M,	1}
1113 };
1114 
1115 void
1116 kmem_io_init(int a)
1117 {
1118 	int	c;
1119 	char name[40];
1120 
1121 	kmem_io[a].kmem_io_arena = vmem_create(io_arena_params[a].io_name,
1122 	    NULL, 0, PAGESIZE, io_arena_params[a].io_alloc,
1123 #ifdef __xpv
1124 	    segkmem_free_io,
1125 #else
1126 	    segkmem_free,
1127 #endif
1128 	    heap_arena, 0, VM_SLEEP);
1129 
1130 	for (c = 0; c < KA_NCACHE; c++) {
1131 		size_t size = KA_ALIGN << c;
1132 		(void) sprintf(name, "%s_%lu",
1133 		    io_arena_params[a].io_name, size);
1134 		kmem_io[a].kmem_io_cache[c] = kmem_cache_create(name,
1135 		    size, size, NULL, NULL, NULL, NULL,
1136 		    kmem_io[a].kmem_io_arena, 0);
1137 	}
1138 }
1139 
1140 /*
1141  * Return the index of the highest memory range for addr.
1142  */
1143 static int
1144 kmem_io_index(uint64_t addr)
1145 {
1146 	int n;
1147 
1148 	for (n = kmem_io_idx; n < MAX_MEM_RANGES; n++) {
1149 		if (kmem_io[n].kmem_io_attr.dma_attr_addr_hi <= addr) {
1150 			if (kmem_io[n].kmem_io_arena == NULL)
1151 				kmem_io_init(n);
1152 			return (n);
1153 		}
1154 	}
1155 	panic("kmem_io_index: invalid addr - must be at least 16m");
1156 
1157 	/*NOTREACHED*/
1158 }
1159 
1160 /*
1161  * Return the index of the next kmem_io populated memory range
1162  * after curindex.
1163  */
1164 static int
1165 kmem_io_index_next(int curindex)
1166 {
1167 	int n;
1168 
1169 	for (n = curindex + 1; n < MAX_MEM_RANGES; n++) {
1170 		if (kmem_io[n].kmem_io_arena)
1171 			return (n);
1172 	}
1173 	return (-1);
1174 }
1175 
1176 /*
1177  * allow kmem to be mapped in with different PTE cache attribute settings.
1178  * Used by i_ddi_mem_alloc()
1179  */
1180 int
1181 kmem_override_cache_attrs(caddr_t kva, size_t size, uint_t order)
1182 {
1183 	uint_t hat_flags;
1184 	caddr_t kva_end;
1185 	uint_t hat_attr;
1186 	pfn_t pfn;
1187 
1188 	if (hat_getattr(kas.a_hat, kva, &hat_attr) == -1) {
1189 		return (-1);
1190 	}
1191 
1192 	hat_attr &= ~HAT_ORDER_MASK;
1193 	hat_attr |= order | HAT_NOSYNC;
1194 	hat_flags = HAT_LOAD_LOCK;
1195 
1196 	kva_end = (caddr_t)(((uintptr_t)kva + size + PAGEOFFSET) &
1197 	    (uintptr_t)PAGEMASK);
1198 	kva = (caddr_t)((uintptr_t)kva & (uintptr_t)PAGEMASK);
1199 
1200 	while (kva < kva_end) {
1201 		pfn = hat_getpfnum(kas.a_hat, kva);
1202 		hat_unload(kas.a_hat, kva, PAGESIZE, HAT_UNLOAD_UNLOCK);
1203 		hat_devload(kas.a_hat, kva, PAGESIZE, pfn, hat_attr, hat_flags);
1204 		kva += MMU_PAGESIZE;
1205 	}
1206 
1207 	return (0);
1208 }
1209 
1210 static int
1211 ctgcompare(const void *a1, const void *a2)
1212 {
1213 	/* we just want to compare virtual addresses */
1214 	a1 = ((struct ctgas *)a1)->ctg_addr;
1215 	a2 = ((struct ctgas *)a2)->ctg_addr;
1216 	return (a1 == a2 ? 0 : (a1 < a2 ? -1 : 1));
1217 }
1218 
1219 void
1220 ka_init(void)
1221 {
1222 	int a;
1223 	paddr_t maxphysaddr;
1224 #if !defined(__xpv)
1225 	extern pfn_t physmax;
1226 
1227 	maxphysaddr = mmu_ptob((paddr_t)physmax) + MMU_PAGEOFFSET;
1228 #else
1229 	maxphysaddr = mmu_ptob((paddr_t)HYPERVISOR_memory_op(
1230 	    XENMEM_maximum_ram_page, NULL)) + MMU_PAGEOFFSET;
1231 #endif
1232 
1233 	ASSERT(maxphysaddr <= io_arena_params[0].io_limit);
1234 
1235 	for (a = 0; a < MAX_MEM_RANGES; a++) {
1236 		if (maxphysaddr >= io_arena_params[a + 1].io_limit) {
1237 			if (maxphysaddr > io_arena_params[a + 1].io_limit)
1238 				io_arena_params[a].io_limit = maxphysaddr;
1239 			else
1240 				a++;
1241 			break;
1242 		}
1243 	}
1244 	kmem_io_idx = a;
1245 
1246 	for (; a < MAX_MEM_RANGES; a++) {
1247 		kmem_io[a].kmem_io_attr = kmem_io_attr;
1248 		kmem_io[a].kmem_io_attr.dma_attr_addr_hi =
1249 		    io_arena_params[a].io_limit;
1250 		/*
1251 		 * initialize kmem_io[] arena/cache corresponding to
1252 		 * maxphysaddr and to the "common" io memory ranges that
1253 		 * have io_initial set to a non-zero value.
1254 		 */
1255 		if (io_arena_params[a].io_initial || a == kmem_io_idx)
1256 			kmem_io_init(a);
1257 	}
1258 
1259 	/* initialize ctgtree */
1260 	avl_create(&ctgtree, ctgcompare, sizeof (struct ctgas),
1261 	    offsetof(struct ctgas, ctg_link));
1262 }
1263 
1264 /*
1265  * put contig address/size
1266  */
1267 static void *
1268 putctgas(void *addr, size_t size)
1269 {
1270 	struct ctgas    *ctgp;
1271 	if ((ctgp = kmem_zalloc(sizeof (*ctgp), KM_NOSLEEP)) != NULL) {
1272 		ctgp->ctg_addr = addr;
1273 		ctgp->ctg_size = size;
1274 		CTGLOCK();
1275 		avl_add(&ctgtree, ctgp);
1276 		CTGUNLOCK();
1277 	}
1278 	return (ctgp);
1279 }
1280 
1281 /*
1282  * get contig size by addr
1283  */
1284 static size_t
1285 getctgsz(void *addr)
1286 {
1287 	struct ctgas    *ctgp;
1288 	struct ctgas    find;
1289 	size_t		sz = 0;
1290 
1291 	find.ctg_addr = addr;
1292 	CTGLOCK();
1293 	if ((ctgp = avl_find(&ctgtree, &find, NULL)) != NULL) {
1294 		avl_remove(&ctgtree, ctgp);
1295 	}
1296 	CTGUNLOCK();
1297 
1298 	if (ctgp != NULL) {
1299 		sz = ctgp->ctg_size;
1300 		kmem_free(ctgp, sizeof (*ctgp));
1301 	}
1302 
1303 	return (sz);
1304 }
1305 
1306 /*
1307  * contig_alloc:
1308  *
1309  *	allocates contiguous memory to satisfy the 'size' and dma attributes
1310  *	specified in 'attr'.
1311  *
1312  *	Not all of memory need to be physically contiguous if the
1313  *	scatter-gather list length is greater than 1.
1314  */
1315 
1316 /*ARGSUSED*/
1317 void *
1318 contig_alloc(size_t size, ddi_dma_attr_t *attr, uintptr_t align, int cansleep)
1319 {
1320 	pgcnt_t		pgcnt = btopr(size);
1321 	size_t		asize = pgcnt * PAGESIZE;
1322 	page_t		*ppl;
1323 	int		pflag;
1324 	void		*addr;
1325 
1326 	extern page_t *page_create_io(vnode_t *, u_offset_t, uint_t,
1327 	    uint_t, struct as *, caddr_t, ddi_dma_attr_t *);
1328 
1329 	/* segkmem_xalloc */
1330 
1331 	if (align <= PAGESIZE)
1332 		addr = vmem_alloc(heap_arena, asize,
1333 		    (cansleep) ? VM_SLEEP : VM_NOSLEEP);
1334 	else
1335 		addr = vmem_xalloc(heap_arena, asize, align, 0, 0, NULL, NULL,
1336 		    (cansleep) ? VM_SLEEP : VM_NOSLEEP);
1337 	if (addr) {
1338 		ASSERT(!((uintptr_t)addr & (align - 1)));
1339 
1340 		if (page_resv(pgcnt, (cansleep) ? KM_SLEEP : KM_NOSLEEP) == 0) {
1341 			vmem_free(heap_arena, addr, asize);
1342 			return (NULL);
1343 		}
1344 		pflag = PG_EXCL;
1345 
1346 		if (cansleep)
1347 			pflag |= PG_WAIT;
1348 
1349 		/* 4k req gets from freelists rather than pfn search */
1350 		if (pgcnt > 1 || align > PAGESIZE)
1351 			pflag |= PG_PHYSCONTIG;
1352 
1353 		ppl = page_create_io(&kvp, (u_offset_t)(uintptr_t)addr,
1354 		    asize, pflag, &kas, (caddr_t)addr, attr);
1355 
1356 		if (!ppl) {
1357 			vmem_free(heap_arena, addr, asize);
1358 			page_unresv(pgcnt);
1359 			return (NULL);
1360 		}
1361 
1362 		while (ppl != NULL) {
1363 			page_t	*pp = ppl;
1364 			page_sub(&ppl, pp);
1365 			ASSERT(page_iolock_assert(pp));
1366 			page_io_unlock(pp);
1367 			page_downgrade(pp);
1368 			hat_memload(kas.a_hat, (caddr_t)(uintptr_t)pp->p_offset,
1369 			    pp, (PROT_ALL & ~PROT_USER) |
1370 			    HAT_NOSYNC, HAT_LOAD_LOCK);
1371 		}
1372 	}
1373 	return (addr);
1374 }
1375 
1376 void
1377 contig_free(void *addr, size_t size)
1378 {
1379 	pgcnt_t	pgcnt = btopr(size);
1380 	size_t	asize = pgcnt * PAGESIZE;
1381 	caddr_t	a, ea;
1382 	page_t	*pp;
1383 
1384 	hat_unload(kas.a_hat, addr, asize, HAT_UNLOAD_UNLOCK);
1385 
1386 	for (a = addr, ea = a + asize; a < ea; a += PAGESIZE) {
1387 		pp = page_find(&kvp, (u_offset_t)(uintptr_t)a);
1388 		if (!pp)
1389 			panic("contig_free: contig pp not found");
1390 
1391 		if (!page_tryupgrade(pp)) {
1392 			page_unlock(pp);
1393 			pp = page_lookup(&kvp,
1394 			    (u_offset_t)(uintptr_t)a, SE_EXCL);
1395 			if (pp == NULL)
1396 				panic("contig_free: page freed");
1397 		}
1398 		page_destroy(pp, 0);
1399 	}
1400 
1401 	page_unresv(pgcnt);
1402 	vmem_free(heap_arena, addr, asize);
1403 }
1404 
1405 /*
1406  * Allocate from the system, aligned on a specific boundary.
1407  * The alignment, if non-zero, must be a power of 2.
1408  */
1409 static void *
1410 kalloca(size_t size, size_t align, int cansleep, int physcontig,
1411     ddi_dma_attr_t *attr)
1412 {
1413 	size_t *addr, *raddr, rsize;
1414 	size_t hdrsize = 4 * sizeof (size_t);	/* must be power of 2 */
1415 	int a, i, c;
1416 	vmem_t *vmp;
1417 	kmem_cache_t *cp = NULL;
1418 
1419 	if (attr->dma_attr_addr_lo > mmu_ptob((uint64_t)ddiphysmin))
1420 		return (NULL);
1421 
1422 	align = MAX(align, hdrsize);
1423 	ASSERT((align & (align - 1)) == 0);
1424 
1425 	/*
1426 	 * All of our allocators guarantee 16-byte alignment, so we don't
1427 	 * need to reserve additional space for the header.
1428 	 * To simplify picking the correct kmem_io_cache, we round up to
1429 	 * a multiple of KA_ALIGN.
1430 	 */
1431 	rsize = P2ROUNDUP_TYPED(size + align, KA_ALIGN, size_t);
1432 
1433 	if (physcontig && rsize > PAGESIZE) {
1434 		if (addr = contig_alloc(size, attr, align, cansleep)) {
1435 			if (!putctgas(addr, size))
1436 				contig_free(addr, size);
1437 			else
1438 				return (addr);
1439 		}
1440 		return (NULL);
1441 	}
1442 
1443 	a = kmem_io_index(attr->dma_attr_addr_hi);
1444 
1445 	if (rsize > PAGESIZE) {
1446 		vmp = kmem_io[a].kmem_io_arena;
1447 		raddr = vmem_alloc(vmp, rsize,
1448 		    (cansleep) ? VM_SLEEP : VM_NOSLEEP);
1449 	} else {
1450 		c = highbit((rsize >> KA_ALIGN_SHIFT) - 1);
1451 		cp = kmem_io[a].kmem_io_cache[c];
1452 		raddr = kmem_cache_alloc(cp, (cansleep) ? KM_SLEEP :
1453 		    KM_NOSLEEP);
1454 	}
1455 
1456 	if (raddr == NULL) {
1457 		int	na;
1458 
1459 		ASSERT(cansleep == 0);
1460 		if (rsize > PAGESIZE)
1461 			return (NULL);
1462 		/*
1463 		 * System does not have memory in the requested range.
1464 		 * Try smaller kmem io ranges and larger cache sizes
1465 		 * to see if there might be memory available in
1466 		 * these other caches.
1467 		 */
1468 
1469 		for (na = kmem_io_index_next(a); na >= 0;
1470 		    na = kmem_io_index_next(na)) {
1471 			ASSERT(kmem_io[na].kmem_io_arena);
1472 			cp = kmem_io[na].kmem_io_cache[c];
1473 			raddr = kmem_cache_alloc(cp, KM_NOSLEEP);
1474 			if (raddr)
1475 				goto kallocdone;
1476 		}
1477 		/* now try the larger kmem io cache sizes */
1478 		for (na = a; na >= 0; na = kmem_io_index_next(na)) {
1479 			for (i = c + 1; i < KA_NCACHE; i++) {
1480 				cp = kmem_io[na].kmem_io_cache[i];
1481 				raddr = kmem_cache_alloc(cp, KM_NOSLEEP);
1482 				if (raddr)
1483 					goto kallocdone;
1484 			}
1485 		}
1486 		return (NULL);
1487 	}
1488 
1489 kallocdone:
1490 	ASSERT(!P2BOUNDARY((uintptr_t)raddr, rsize, PAGESIZE) ||
1491 	    rsize > PAGESIZE);
1492 
1493 	addr = (size_t *)P2ROUNDUP((uintptr_t)raddr + hdrsize, align);
1494 	ASSERT((uintptr_t)addr + size - (uintptr_t)raddr <= rsize);
1495 
1496 	addr[-4] = (size_t)cp;
1497 	addr[-3] = (size_t)vmp;
1498 	addr[-2] = (size_t)raddr;
1499 	addr[-1] = rsize;
1500 
1501 	return (addr);
1502 }
1503 
1504 static void
1505 kfreea(void *addr)
1506 {
1507 	size_t		size;
1508 
1509 	if (!((uintptr_t)addr & PAGEOFFSET) && (size = getctgsz(addr))) {
1510 		contig_free(addr, size);
1511 	} else {
1512 		size_t	*saddr = addr;
1513 		if (saddr[-4] == 0)
1514 			vmem_free((vmem_t *)saddr[-3], (void *)saddr[-2],
1515 			    saddr[-1]);
1516 		else
1517 			kmem_cache_free((kmem_cache_t *)saddr[-4],
1518 			    (void *)saddr[-2]);
1519 	}
1520 }
1521 
1522 /*ARGSUSED*/
1523 void
1524 i_ddi_devacc_to_hatacc(ddi_device_acc_attr_t *devaccp, uint_t *hataccp)
1525 {
1526 }
1527 
1528 /*
1529  * Check if the specified cache attribute is supported on the platform.
1530  * This function must be called before i_ddi_cacheattr_to_hatacc().
1531  */
1532 boolean_t
1533 i_ddi_check_cache_attr(uint_t flags)
1534 {
1535 	/*
1536 	 * The cache attributes are mutually exclusive. Any combination of
1537 	 * the attributes leads to a failure.
1538 	 */
1539 	uint_t cache_attr = IOMEM_CACHE_ATTR(flags);
1540 	if ((cache_attr != 0) && !ISP2(cache_attr))
1541 		return (B_FALSE);
1542 
1543 	/* All cache attributes are supported on X86/X64 */
1544 	if (cache_attr & (IOMEM_DATA_UNCACHED | IOMEM_DATA_CACHED |
1545 	    IOMEM_DATA_UC_WR_COMBINE))
1546 		return (B_TRUE);
1547 
1548 	/* undefined attributes */
1549 	return (B_FALSE);
1550 }
1551 
1552 /* set HAT cache attributes from the cache attributes */
1553 void
1554 i_ddi_cacheattr_to_hatacc(uint_t flags, uint_t *hataccp)
1555 {
1556 	uint_t cache_attr = IOMEM_CACHE_ATTR(flags);
1557 	static char *fname = "i_ddi_cacheattr_to_hatacc";
1558 
1559 	/*
1560 	 * If write-combining is not supported, then it falls back
1561 	 * to uncacheable.
1562 	 */
1563 	if (cache_attr == IOMEM_DATA_UC_WR_COMBINE &&
1564 	    !is_x86_feature(x86_featureset, X86FSET_PAT))
1565 		cache_attr = IOMEM_DATA_UNCACHED;
1566 
1567 	/*
1568 	 * set HAT attrs according to the cache attrs.
1569 	 */
1570 	switch (cache_attr) {
1571 	case IOMEM_DATA_UNCACHED:
1572 		*hataccp &= ~HAT_ORDER_MASK;
1573 		*hataccp |= (HAT_STRICTORDER | HAT_PLAT_NOCACHE);
1574 		break;
1575 	case IOMEM_DATA_UC_WR_COMBINE:
1576 		*hataccp &= ~HAT_ORDER_MASK;
1577 		*hataccp |= (HAT_MERGING_OK | HAT_PLAT_NOCACHE);
1578 		break;
1579 	case IOMEM_DATA_CACHED:
1580 		*hataccp &= ~HAT_ORDER_MASK;
1581 		*hataccp |= HAT_UNORDERED_OK;
1582 		break;
1583 	/*
1584 	 * This case must not occur because the cache attribute is scrutinized
1585 	 * before this function is called.
1586 	 */
1587 	default:
1588 		/*
1589 		 * set cacheable to hat attrs.
1590 		 */
1591 		*hataccp &= ~HAT_ORDER_MASK;
1592 		*hataccp |= HAT_UNORDERED_OK;
1593 		cmn_err(CE_WARN, "%s: cache_attr=0x%x is ignored.",
1594 		    fname, cache_attr);
1595 	}
1596 }
1597 
1598 /*
1599  * This should actually be called i_ddi_dma_mem_alloc. There should
1600  * also be an i_ddi_pio_mem_alloc. i_ddi_dma_mem_alloc should call
1601  * through the device tree with the DDI_CTLOPS_DMA_ALIGN ctl ops to
1602  * get alignment requirements for DMA memory. i_ddi_pio_mem_alloc
1603  * should use DDI_CTLOPS_PIO_ALIGN. Since we only have i_ddi_mem_alloc
1604  * so far which is used for both, DMA and PIO, we have to use the DMA
1605  * ctl ops to make everybody happy.
1606  */
1607 /*ARGSUSED*/
1608 int
1609 i_ddi_mem_alloc(dev_info_t *dip, ddi_dma_attr_t *attr,
1610     size_t length, int cansleep, int flags,
1611     ddi_device_acc_attr_t *accattrp, caddr_t *kaddrp,
1612     size_t *real_length, ddi_acc_hdl_t *ap)
1613 {
1614 	caddr_t a;
1615 	int iomin;
1616 	ddi_acc_impl_t *iap;
1617 	int physcontig = 0;
1618 	pgcnt_t npages;
1619 	pgcnt_t minctg;
1620 	uint_t order;
1621 	int e;
1622 
1623 	/*
1624 	 * Check legality of arguments
1625 	 */
1626 	if (length == 0 || kaddrp == NULL || attr == NULL) {
1627 		return (DDI_FAILURE);
1628 	}
1629 
1630 	if (attr->dma_attr_minxfer == 0 || attr->dma_attr_align == 0 ||
1631 	    !ISP2(attr->dma_attr_align) || !ISP2(attr->dma_attr_minxfer)) {
1632 		return (DDI_FAILURE);
1633 	}
1634 
1635 	/*
1636 	 * figure out most restrictive alignment requirement
1637 	 */
1638 	iomin = attr->dma_attr_minxfer;
1639 	iomin = maxbit(iomin, attr->dma_attr_align);
1640 	if (iomin == 0)
1641 		return (DDI_FAILURE);
1642 
1643 	ASSERT((iomin & (iomin - 1)) == 0);
1644 
1645 	/*
1646 	 * if we allocate memory with IOMEM_DATA_UNCACHED or
1647 	 * IOMEM_DATA_UC_WR_COMBINE, make sure we allocate a page aligned
1648 	 * memory that ends on a page boundry.
1649 	 * Don't want to have to different cache mappings to the same
1650 	 * physical page.
1651 	 */
1652 	if (OVERRIDE_CACHE_ATTR(flags)) {
1653 		iomin = (iomin + MMU_PAGEOFFSET) & MMU_PAGEMASK;
1654 		length = (length + MMU_PAGEOFFSET) & (size_t)MMU_PAGEMASK;
1655 	}
1656 
1657 	/*
1658 	 * Determine if we need to satisfy the request for physically
1659 	 * contiguous memory or alignments larger than pagesize.
1660 	 */
1661 	npages = btopr(length + attr->dma_attr_align);
1662 	minctg = howmany(npages, attr->dma_attr_sgllen);
1663 
1664 	if (minctg > 1) {
1665 		uint64_t pfnseg = attr->dma_attr_seg >> PAGESHIFT;
1666 		/*
1667 		 * verify that the minimum contig requirement for the
1668 		 * actual length does not cross segment boundary.
1669 		 */
1670 		length = P2ROUNDUP_TYPED(length, attr->dma_attr_minxfer,
1671 		    size_t);
1672 		npages = btopr(length);
1673 		minctg = howmany(npages, attr->dma_attr_sgllen);
1674 		if (minctg > pfnseg + 1)
1675 			return (DDI_FAILURE);
1676 		physcontig = 1;
1677 	} else {
1678 		length = P2ROUNDUP_TYPED(length, iomin, size_t);
1679 	}
1680 
1681 	/*
1682 	 * Allocate the requested amount from the system.
1683 	 */
1684 	a = kalloca(length, iomin, cansleep, physcontig, attr);
1685 
1686 	if ((*kaddrp = a) == NULL)
1687 		return (DDI_FAILURE);
1688 
1689 	/*
1690 	 * if we to modify the cache attributes, go back and muck with the
1691 	 * mappings.
1692 	 */
1693 	if (OVERRIDE_CACHE_ATTR(flags)) {
1694 		order = 0;
1695 		i_ddi_cacheattr_to_hatacc(flags, &order);
1696 		e = kmem_override_cache_attrs(a, length, order);
1697 		if (e != 0) {
1698 			kfreea(a);
1699 			return (DDI_FAILURE);
1700 		}
1701 	}
1702 
1703 	if (real_length) {
1704 		*real_length = length;
1705 	}
1706 	if (ap) {
1707 		/*
1708 		 * initialize access handle
1709 		 */
1710 		iap = (ddi_acc_impl_t *)ap->ah_platform_private;
1711 		iap->ahi_acc_attr |= DDI_ACCATTR_CPU_VADDR;
1712 		impl_acc_hdl_init(ap);
1713 	}
1714 
1715 	return (DDI_SUCCESS);
1716 }
1717 
1718 /* ARGSUSED */
1719 void
1720 i_ddi_mem_free(caddr_t kaddr, ddi_acc_hdl_t *ap)
1721 {
1722 	if (ap != NULL) {
1723 		/*
1724 		 * if we modified the cache attributes on alloc, go back and
1725 		 * fix them since this memory could be returned to the
1726 		 * general pool.
1727 		 */
1728 		if (OVERRIDE_CACHE_ATTR(ap->ah_xfermodes)) {
1729 			uint_t order = 0;
1730 			int e;
1731 			i_ddi_cacheattr_to_hatacc(IOMEM_DATA_CACHED, &order);
1732 			e = kmem_override_cache_attrs(kaddr, ap->ah_len, order);
1733 			if (e != 0) {
1734 				cmn_err(CE_WARN, "i_ddi_mem_free() failed to "
1735 				    "override cache attrs, memory leaked\n");
1736 				return;
1737 			}
1738 		}
1739 	}
1740 	kfreea(kaddr);
1741 }
1742 
1743 /*
1744  * Access Barriers
1745  *
1746  */
1747 /*ARGSUSED*/
1748 int
1749 i_ddi_ontrap(ddi_acc_handle_t hp)
1750 {
1751 	return (DDI_FAILURE);
1752 }
1753 
1754 /*ARGSUSED*/
1755 void
1756 i_ddi_notrap(ddi_acc_handle_t hp)
1757 {
1758 }
1759 
1760 
1761 /*
1762  * Misc Functions
1763  */
1764 
1765 /*
1766  * Implementation instance override functions
1767  *
1768  * No override on i86pc
1769  */
1770 /*ARGSUSED*/
1771 uint_t
1772 impl_assign_instance(dev_info_t *dip)
1773 {
1774 	return ((uint_t)-1);
1775 }
1776 
1777 /*ARGSUSED*/
1778 int
1779 impl_keep_instance(dev_info_t *dip)
1780 {
1781 
1782 #if defined(__xpv)
1783 	/*
1784 	 * Do not persist instance numbers assigned to devices in dom0
1785 	 */
1786 	dev_info_t *pdip;
1787 	if (DOMAIN_IS_INITDOMAIN(xen_info)) {
1788 		if (((pdip = ddi_get_parent(dip)) != NULL) &&
1789 		    (strcmp(ddi_get_name(pdip), "xpvd") == 0))
1790 			return (DDI_SUCCESS);
1791 	}
1792 #endif
1793 	return (DDI_FAILURE);
1794 }
1795 
1796 /*ARGSUSED*/
1797 int
1798 impl_free_instance(dev_info_t *dip)
1799 {
1800 	return (DDI_FAILURE);
1801 }
1802 
1803 /*ARGSUSED*/
1804 int
1805 impl_check_cpu(dev_info_t *devi)
1806 {
1807 	return (DDI_SUCCESS);
1808 }
1809 
1810 /*
1811  * Referenced in common/cpr_driver.c: Power off machine.
1812  * Don't know how to power off i86pc.
1813  */
1814 void
1815 arch_power_down()
1816 {}
1817 
1818 /*
1819  * Copy name to property_name, since name
1820  * is in the low address range below kernelbase.
1821  */
1822 static void
1823 copy_boot_str(const char *boot_str, char *kern_str, int len)
1824 {
1825 	int i = 0;
1826 
1827 	while (i < len - 1 && boot_str[i] != '\0') {
1828 		kern_str[i] = boot_str[i];
1829 		i++;
1830 	}
1831 
1832 	kern_str[i] = 0;	/* null terminate */
1833 	if (boot_str[i] != '\0')
1834 		cmn_err(CE_WARN,
1835 		    "boot property string is truncated to %s", kern_str);
1836 }
1837 
1838 static void
1839 get_boot_properties(void)
1840 {
1841 	extern char hw_provider[];
1842 	dev_info_t *devi;
1843 	char *name;
1844 	int length, flags;
1845 	char property_name[50], property_val[50];
1846 	void *bop_staging_area;
1847 
1848 	bop_staging_area = kmem_zalloc(MMU_PAGESIZE, KM_NOSLEEP);
1849 
1850 	/*
1851 	 * Import "root" properties from the boot.
1852 	 *
1853 	 * We do this by invoking BOP_NEXTPROP until the list
1854 	 * is completely copied in.
1855 	 */
1856 
1857 	devi = ddi_root_node();
1858 	for (name = BOP_NEXTPROP(bootops, "");		/* get first */
1859 	    name;					/* NULL => DONE */
1860 	    name = BOP_NEXTPROP(bootops, name)) {	/* get next */
1861 
1862 		/* copy string to memory above kernelbase */
1863 		copy_boot_str(name, property_name, 50);
1864 
1865 		/*
1866 		 * Skip vga properties. They will be picked up later
1867 		 * by get_vga_properties.
1868 		 */
1869 		if (strcmp(property_name, "display-edif-block") == 0 ||
1870 		    strcmp(property_name, "display-edif-id") == 0) {
1871 			continue;
1872 		}
1873 
1874 		length = BOP_GETPROPLEN(bootops, property_name);
1875 		if (length < 0)
1876 			continue;
1877 		if (length > MMU_PAGESIZE) {
1878 			cmn_err(CE_NOTE,
1879 			    "boot property %s longer than 0x%x, ignored\n",
1880 			    property_name, MMU_PAGESIZE);
1881 			continue;
1882 		}
1883 		BOP_GETPROP(bootops, property_name, bop_staging_area);
1884 		flags = do_bsys_getproptype(bootops, property_name);
1885 
1886 		/*
1887 		 * special properties:
1888 		 * si-machine, si-hw-provider
1889 		 *	goes to kernel data structures.
1890 		 * bios-boot-device and stdout
1891 		 *	goes to hardware property list so it may show up
1892 		 *	in the prtconf -vp output. This is needed by
1893 		 *	Install/Upgrade. Once we fix install upgrade,
1894 		 *	this can be taken out.
1895 		 */
1896 		if (strcmp(name, "si-machine") == 0) {
1897 			(void) strncpy(utsname.machine, bop_staging_area,
1898 			    SYS_NMLN);
1899 			utsname.machine[SYS_NMLN - 1] = '\0';
1900 			continue;
1901 		}
1902 		if (strcmp(name, "si-hw-provider") == 0) {
1903 			(void) strncpy(hw_provider, bop_staging_area, SYS_NMLN);
1904 			hw_provider[SYS_NMLN - 1] = '\0';
1905 			continue;
1906 		}
1907 		if (strcmp(name, "bios-boot-device") == 0) {
1908 			copy_boot_str(bop_staging_area, property_val, 50);
1909 			(void) ndi_prop_update_string(DDI_DEV_T_NONE, devi,
1910 			    property_name, property_val);
1911 			continue;
1912 		}
1913 		if (strcmp(name, "stdout") == 0) {
1914 			(void) ndi_prop_update_int(DDI_DEV_T_NONE, devi,
1915 			    property_name, *((int *)bop_staging_area));
1916 			continue;
1917 		}
1918 
1919 		/* Boolean property */
1920 		if (length == 0) {
1921 			(void) e_ddi_prop_create(DDI_DEV_T_NONE, devi,
1922 			    DDI_PROP_CANSLEEP, property_name, NULL, 0);
1923 			continue;
1924 		}
1925 
1926 		/* Now anything else based on type. */
1927 		switch (flags) {
1928 		case DDI_PROP_TYPE_INT:
1929 			if (length == sizeof (int)) {
1930 				(void) e_ddi_prop_update_int(DDI_DEV_T_NONE,
1931 				    devi, property_name,
1932 				    *((int *)bop_staging_area));
1933 			} else {
1934 				(void) e_ddi_prop_update_int_array(
1935 				    DDI_DEV_T_NONE, devi, property_name,
1936 				    bop_staging_area, length / sizeof (int));
1937 			}
1938 			break;
1939 		case DDI_PROP_TYPE_STRING:
1940 			(void) e_ddi_prop_update_string(DDI_DEV_T_NONE, devi,
1941 			    property_name, bop_staging_area);
1942 			break;
1943 		case DDI_PROP_TYPE_BYTE:
1944 			(void) e_ddi_prop_update_byte_array(DDI_DEV_T_NONE,
1945 			    devi, property_name, bop_staging_area, length);
1946 			break;
1947 		case DDI_PROP_TYPE_INT64:
1948 			if (length == sizeof (int64_t)) {
1949 				(void) e_ddi_prop_update_int64(DDI_DEV_T_NONE,
1950 				    devi, property_name,
1951 				    *((int64_t *)bop_staging_area));
1952 			} else {
1953 				(void) e_ddi_prop_update_int64_array(
1954 				    DDI_DEV_T_NONE, devi, property_name,
1955 				    bop_staging_area,
1956 				    length / sizeof (int64_t));
1957 			}
1958 			break;
1959 		default:
1960 			/* Property type unknown, use old prop interface */
1961 			(void) e_ddi_prop_create(DDI_DEV_T_NONE, devi,
1962 			    DDI_PROP_CANSLEEP, property_name, bop_staging_area,
1963 			    length);
1964 		}
1965 	}
1966 
1967 	kmem_free(bop_staging_area, MMU_PAGESIZE);
1968 }
1969 
1970 static void
1971 get_vga_properties(void)
1972 {
1973 	dev_info_t *devi;
1974 	major_t major;
1975 	char *name;
1976 	int length;
1977 	char property_val[50];
1978 	void *bop_staging_area;
1979 
1980 	/*
1981 	 * XXXX Hack Allert!
1982 	 * There really needs to be a better way for identifying various
1983 	 * console framebuffers and their related issues.  Till then,
1984 	 * check for this one as a replacement to vgatext.
1985 	 */
1986 	major = ddi_name_to_major("ragexl");
1987 	if (major == (major_t)-1) {
1988 		major = ddi_name_to_major("vgatext");
1989 		if (major == (major_t)-1)
1990 			return;
1991 	}
1992 	devi = devnamesp[major].dn_head;
1993 	if (devi == NULL)
1994 		return;
1995 
1996 	bop_staging_area = kmem_zalloc(MMU_PAGESIZE, KM_SLEEP);
1997 
1998 	/*
1999 	 * Import "vga" properties from the boot.
2000 	 */
2001 	name = "display-edif-block";
2002 	length = BOP_GETPROPLEN(bootops, name);
2003 	if (length > 0 && length < MMU_PAGESIZE) {
2004 		BOP_GETPROP(bootops, name, bop_staging_area);
2005 		(void) ndi_prop_update_byte_array(DDI_DEV_T_NONE,
2006 		    devi, name, bop_staging_area, length);
2007 	}
2008 
2009 	/*
2010 	 * kdmconfig is also looking for display-type and
2011 	 * video-adapter-type. We default to color and svga.
2012 	 *
2013 	 * Could it be "monochrome", "vga"?
2014 	 * Nah, you've got to come to the 21st century...
2015 	 * And you can set monitor type manually in kdmconfig
2016 	 * if you are really an old junky.
2017 	 */
2018 	(void) ndi_prop_update_string(DDI_DEV_T_NONE,
2019 	    devi, "display-type", "color");
2020 	(void) ndi_prop_update_string(DDI_DEV_T_NONE,
2021 	    devi, "video-adapter-type", "svga");
2022 
2023 	name = "display-edif-id";
2024 	length = BOP_GETPROPLEN(bootops, name);
2025 	if (length > 0 && length < MMU_PAGESIZE) {
2026 		BOP_GETPROP(bootops, name, bop_staging_area);
2027 		copy_boot_str(bop_staging_area, property_val, length);
2028 		(void) ndi_prop_update_string(DDI_DEV_T_NONE,
2029 		    devi, name, property_val);
2030 	}
2031 
2032 	kmem_free(bop_staging_area, MMU_PAGESIZE);
2033 }
2034 
2035 
2036 /*
2037  * This is temporary, but absolutely necessary.  If we are being
2038  * booted with a device tree created by the DevConf project's bootconf
2039  * program, then we have device information nodes that reflect
2040  * reality.  At this point in time in the Solaris release schedule, the
2041  * kernel drivers aren't prepared for reality.  They still depend on their
2042  * own ad-hoc interpretations of the properties created when their .conf
2043  * files were interpreted. These drivers use an "ignore-hardware-nodes"
2044  * property to prevent them from using the nodes passed up from the bootconf
2045  * device tree.
2046  *
2047  * Trying to assemble root file system drivers as we are booting from
2048  * devconf will fail if the kernel driver is basing its name_addr's on the
2049  * psuedo-node device info while the bootpath passed up from bootconf is using
2050  * reality-based name_addrs.  We help the boot along in this case by
2051  * looking at the pre-bootconf bootpath and determining if we would have
2052  * successfully matched if that had been the bootpath we had chosen.
2053  *
2054  * Note that we only even perform this extra check if we've booted
2055  * using bootconf's 1275 compliant bootpath, this is the boot device, and
2056  * we're trying to match the name_addr specified in the 1275 bootpath.
2057  */
2058 
2059 #define	MAXCOMPONENTLEN	32
2060 
2061 int
2062 x86_old_bootpath_name_addr_match(dev_info_t *cdip, char *caddr, char *naddr)
2063 {
2064 	/*
2065 	 *  There are multiple criteria to be met before we can even
2066 	 *  consider allowing a name_addr match here.
2067 	 *
2068 	 *  1) We must have been booted such that the bootconf program
2069 	 *	created device tree nodes and properties.  This can be
2070 	 *	determined by examining the 'bootpath' property.  This
2071 	 *	property will be a non-null string iff bootconf was
2072 	 *	involved in the boot.
2073 	 *
2074 	 *  2) The module that we want to match must be the boot device.
2075 	 *
2076 	 *  3) The instance of the module we are thinking of letting be
2077 	 *	our match must be ignoring hardware nodes.
2078 	 *
2079 	 *  4) The name_addr we want to match must be the name_addr
2080 	 *	specified in the 1275 bootpath.
2081 	 */
2082 	static char bootdev_module[MAXCOMPONENTLEN];
2083 	static char bootdev_oldmod[MAXCOMPONENTLEN];
2084 	static char bootdev_newaddr[MAXCOMPONENTLEN];
2085 	static char bootdev_oldaddr[MAXCOMPONENTLEN];
2086 	static int  quickexit;
2087 
2088 	char *daddr;
2089 	int dlen;
2090 
2091 	char	*lkupname;
2092 	int	rv = DDI_FAILURE;
2093 
2094 	if ((ddi_getlongprop(DDI_DEV_T_ANY, cdip, DDI_PROP_DONTPASS,
2095 	    "devconf-addr", (caddr_t)&daddr, &dlen) == DDI_PROP_SUCCESS) &&
2096 	    (ddi_getprop(DDI_DEV_T_ANY, cdip, DDI_PROP_DONTPASS,
2097 	    "ignore-hardware-nodes", -1) != -1)) {
2098 		if (strcmp(daddr, caddr) == 0) {
2099 			return (DDI_SUCCESS);
2100 		}
2101 	}
2102 
2103 	if (quickexit)
2104 		return (rv);
2105 
2106 	if (bootdev_module[0] == '\0') {
2107 		char *addrp, *eoaddrp;
2108 		char *busp, *modp, *atp;
2109 		char *bp1275, *bp;
2110 		int  bp1275len, bplen;
2111 
2112 		bp1275 = bp = addrp = eoaddrp = busp = modp = atp = NULL;
2113 
2114 		if (ddi_getlongprop(DDI_DEV_T_ANY,
2115 		    ddi_root_node(), 0, "bootpath",
2116 		    (caddr_t)&bp1275, &bp1275len) != DDI_PROP_SUCCESS ||
2117 		    bp1275len <= 1) {
2118 			/*
2119 			 * We didn't boot from bootconf so we never need to
2120 			 * do any special matches.
2121 			 */
2122 			quickexit = 1;
2123 			if (bp1275)
2124 				kmem_free(bp1275, bp1275len);
2125 			return (rv);
2126 		}
2127 
2128 		if (ddi_getlongprop(DDI_DEV_T_ANY,
2129 		    ddi_root_node(), 0, "boot-path",
2130 		    (caddr_t)&bp, &bplen) != DDI_PROP_SUCCESS || bplen <= 1) {
2131 			/*
2132 			 * No fallback position for matching. This is
2133 			 * certainly unexpected, but we'll handle it
2134 			 * just in case.
2135 			 */
2136 			quickexit = 1;
2137 			kmem_free(bp1275, bp1275len);
2138 			if (bp)
2139 				kmem_free(bp, bplen);
2140 			return (rv);
2141 		}
2142 
2143 		/*
2144 		 *  Determine boot device module and 1275 name_addr
2145 		 *
2146 		 *  bootpath assumed to be of the form /bus/module@name_addr
2147 		 */
2148 		if (busp = strchr(bp1275, '/')) {
2149 			if (modp = strchr(busp + 1, '/')) {
2150 				if (atp = strchr(modp + 1, '@')) {
2151 					*atp = '\0';
2152 					addrp = atp + 1;
2153 					if (eoaddrp = strchr(addrp, '/'))
2154 						*eoaddrp = '\0';
2155 				}
2156 			}
2157 		}
2158 
2159 		if (modp && addrp) {
2160 			(void) strncpy(bootdev_module, modp + 1,
2161 			    MAXCOMPONENTLEN);
2162 			bootdev_module[MAXCOMPONENTLEN - 1] = '\0';
2163 
2164 			(void) strncpy(bootdev_newaddr, addrp, MAXCOMPONENTLEN);
2165 			bootdev_newaddr[MAXCOMPONENTLEN - 1] = '\0';
2166 		} else {
2167 			quickexit = 1;
2168 			kmem_free(bp1275, bp1275len);
2169 			kmem_free(bp, bplen);
2170 			return (rv);
2171 		}
2172 
2173 		/*
2174 		 *  Determine fallback name_addr
2175 		 *
2176 		 *  10/3/96 - Also save fallback module name because it
2177 		 *  might actually be different than the current module
2178 		 *  name.  E.G., ISA pnp drivers have new names.
2179 		 *
2180 		 *  bootpath assumed to be of the form /bus/module@name_addr
2181 		 */
2182 		addrp = NULL;
2183 		if (busp = strchr(bp, '/')) {
2184 			if (modp = strchr(busp + 1, '/')) {
2185 				if (atp = strchr(modp + 1, '@')) {
2186 					*atp = '\0';
2187 					addrp = atp + 1;
2188 					if (eoaddrp = strchr(addrp, '/'))
2189 						*eoaddrp = '\0';
2190 				}
2191 			}
2192 		}
2193 
2194 		if (modp && addrp) {
2195 			(void) strncpy(bootdev_oldmod, modp + 1,
2196 			    MAXCOMPONENTLEN);
2197 			bootdev_module[MAXCOMPONENTLEN - 1] = '\0';
2198 
2199 			(void) strncpy(bootdev_oldaddr, addrp, MAXCOMPONENTLEN);
2200 			bootdev_oldaddr[MAXCOMPONENTLEN - 1] = '\0';
2201 		}
2202 
2203 		/* Free up the bootpath storage now that we're done with it. */
2204 		kmem_free(bp1275, bp1275len);
2205 		kmem_free(bp, bplen);
2206 
2207 		if (bootdev_oldaddr[0] == '\0') {
2208 			quickexit = 1;
2209 			return (rv);
2210 		}
2211 	}
2212 
2213 	if (((lkupname = ddi_get_name(cdip)) != NULL) &&
2214 	    (strcmp(bootdev_module, lkupname) == 0 ||
2215 	    strcmp(bootdev_oldmod, lkupname) == 0) &&
2216 	    ((ddi_getprop(DDI_DEV_T_ANY, cdip, DDI_PROP_DONTPASS,
2217 	    "ignore-hardware-nodes", -1) != -1) ||
2218 	    ignore_hardware_nodes) &&
2219 	    strcmp(bootdev_newaddr, caddr) == 0 &&
2220 	    strcmp(bootdev_oldaddr, naddr) == 0) {
2221 		rv = DDI_SUCCESS;
2222 	}
2223 
2224 	return (rv);
2225 }
2226 
2227 /*
2228  * Perform a copy from a memory mapped device (whose devinfo pointer is devi)
2229  * separately mapped at devaddr in the kernel to a kernel buffer at kaddr.
2230  */
2231 /*ARGSUSED*/
2232 int
2233 e_ddi_copyfromdev(dev_info_t *devi,
2234     off_t off, const void *devaddr, void *kaddr, size_t len)
2235 {
2236 	bcopy(devaddr, kaddr, len);
2237 	return (0);
2238 }
2239 
2240 /*
2241  * Perform a copy to a memory mapped device (whose devinfo pointer is devi)
2242  * separately mapped at devaddr in the kernel from a kernel buffer at kaddr.
2243  */
2244 /*ARGSUSED*/
2245 int
2246 e_ddi_copytodev(dev_info_t *devi,
2247     off_t off, const void *kaddr, void *devaddr, size_t len)
2248 {
2249 	bcopy(kaddr, devaddr, len);
2250 	return (0);
2251 }
2252 
2253 
2254 static int
2255 poke_mem(peekpoke_ctlops_t *in_args)
2256 {
2257 	int err = DDI_SUCCESS;
2258 	on_trap_data_t otd;
2259 
2260 	/* Set up protected environment. */
2261 	if (!on_trap(&otd, OT_DATA_ACCESS)) {
2262 		switch (in_args->size) {
2263 		case sizeof (uint8_t):
2264 			*(uint8_t *)(in_args->dev_addr) =
2265 			    *(uint8_t *)in_args->host_addr;
2266 			break;
2267 
2268 		case sizeof (uint16_t):
2269 			*(uint16_t *)(in_args->dev_addr) =
2270 			    *(uint16_t *)in_args->host_addr;
2271 			break;
2272 
2273 		case sizeof (uint32_t):
2274 			*(uint32_t *)(in_args->dev_addr) =
2275 			    *(uint32_t *)in_args->host_addr;
2276 			break;
2277 
2278 		case sizeof (uint64_t):
2279 			*(uint64_t *)(in_args->dev_addr) =
2280 			    *(uint64_t *)in_args->host_addr;
2281 			break;
2282 
2283 		default:
2284 			err = DDI_FAILURE;
2285 			break;
2286 		}
2287 	} else
2288 		err = DDI_FAILURE;
2289 
2290 	/* Take down protected environment. */
2291 	no_trap();
2292 
2293 	return (err);
2294 }
2295 
2296 
2297 static int
2298 peek_mem(peekpoke_ctlops_t *in_args)
2299 {
2300 	int err = DDI_SUCCESS;
2301 	on_trap_data_t otd;
2302 
2303 	if (!on_trap(&otd, OT_DATA_ACCESS)) {
2304 		switch (in_args->size) {
2305 		case sizeof (uint8_t):
2306 			*(uint8_t *)in_args->host_addr =
2307 			    *(uint8_t *)in_args->dev_addr;
2308 			break;
2309 
2310 		case sizeof (uint16_t):
2311 			*(uint16_t *)in_args->host_addr =
2312 			    *(uint16_t *)in_args->dev_addr;
2313 			break;
2314 
2315 		case sizeof (uint32_t):
2316 			*(uint32_t *)in_args->host_addr =
2317 			    *(uint32_t *)in_args->dev_addr;
2318 			break;
2319 
2320 		case sizeof (uint64_t):
2321 			*(uint64_t *)in_args->host_addr =
2322 			    *(uint64_t *)in_args->dev_addr;
2323 			break;
2324 
2325 		default:
2326 			err = DDI_FAILURE;
2327 			break;
2328 		}
2329 	} else
2330 		err = DDI_FAILURE;
2331 
2332 	no_trap();
2333 	return (err);
2334 }
2335 
2336 
2337 /*
2338  * This is called only to process peek/poke when the DIP is NULL.
2339  * Assume that this is for memory, as nexi take care of device safe accesses.
2340  */
2341 int
2342 peekpoke_mem(ddi_ctl_enum_t cmd, peekpoke_ctlops_t *in_args)
2343 {
2344 	return (cmd == DDI_CTLOPS_PEEK ? peek_mem(in_args) : poke_mem(in_args));
2345 }
2346 
2347 /*
2348  * we've just done a cautious put/get. Check if it was successful by
2349  * calling pci_ereport_post() on all puts and for any gets that return -1
2350  */
2351 static int
2352 pci_peekpoke_check_fma(dev_info_t *dip, void *arg, ddi_ctl_enum_t ctlop,
2353     void (*scan)(dev_info_t *, ddi_fm_error_t *))
2354 {
2355 	int	rval = DDI_SUCCESS;
2356 	peekpoke_ctlops_t *in_args = (peekpoke_ctlops_t *)arg;
2357 	ddi_fm_error_t de;
2358 	ddi_acc_impl_t *hp = (ddi_acc_impl_t *)in_args->handle;
2359 	ddi_acc_hdl_t *hdlp = (ddi_acc_hdl_t *)in_args->handle;
2360 	int check_err = 0;
2361 	int repcount = in_args->repcount;
2362 
2363 	if (ctlop == DDI_CTLOPS_POKE &&
2364 	    hdlp->ah_acc.devacc_attr_access != DDI_CAUTIOUS_ACC)
2365 		return (DDI_SUCCESS);
2366 
2367 	if (ctlop == DDI_CTLOPS_PEEK &&
2368 	    hdlp->ah_acc.devacc_attr_access != DDI_CAUTIOUS_ACC) {
2369 		for (; repcount; repcount--) {
2370 			switch (in_args->size) {
2371 			case sizeof (uint8_t):
2372 				if (*(uint8_t *)in_args->host_addr == 0xff)
2373 					check_err = 1;
2374 				break;
2375 			case sizeof (uint16_t):
2376 				if (*(uint16_t *)in_args->host_addr == 0xffff)
2377 					check_err = 1;
2378 				break;
2379 			case sizeof (uint32_t):
2380 				if (*(uint32_t *)in_args->host_addr ==
2381 				    0xffffffff)
2382 					check_err = 1;
2383 				break;
2384 			case sizeof (uint64_t):
2385 				if (*(uint64_t *)in_args->host_addr ==
2386 				    0xffffffffffffffff)
2387 					check_err = 1;
2388 				break;
2389 			}
2390 		}
2391 		if (check_err == 0)
2392 			return (DDI_SUCCESS);
2393 	}
2394 	/*
2395 	 * for a cautious put or get or a non-cautious get that returned -1 call
2396 	 * io framework to see if there really was an error
2397 	 */
2398 	bzero(&de, sizeof (ddi_fm_error_t));
2399 	de.fme_version = DDI_FME_VERSION;
2400 	de.fme_ena = fm_ena_generate(0, FM_ENA_FMT1);
2401 	if (hdlp->ah_acc.devacc_attr_access == DDI_CAUTIOUS_ACC) {
2402 		de.fme_flag = DDI_FM_ERR_EXPECTED;
2403 		de.fme_acc_handle = in_args->handle;
2404 	} else if (hdlp->ah_acc.devacc_attr_access == DDI_DEFAULT_ACC) {
2405 		/*
2406 		 * We only get here with DDI_DEFAULT_ACC for config space gets.
2407 		 * Non-hardened drivers may be probing the hardware and
2408 		 * expecting -1 returned. So need to treat errors on
2409 		 * DDI_DEFAULT_ACC as DDI_FM_ERR_EXPECTED.
2410 		 */
2411 		de.fme_flag = DDI_FM_ERR_EXPECTED;
2412 		de.fme_acc_handle = in_args->handle;
2413 	} else {
2414 		/*
2415 		 * Hardened driver doing protected accesses shouldn't
2416 		 * get errors unless there's a hardware problem. Treat
2417 		 * as nonfatal if there's an error, but set UNEXPECTED
2418 		 * so we raise ereports on any errors and potentially
2419 		 * fault the device
2420 		 */
2421 		de.fme_flag = DDI_FM_ERR_UNEXPECTED;
2422 	}
2423 	(void) scan(dip, &de);
2424 	if (hdlp->ah_acc.devacc_attr_access != DDI_DEFAULT_ACC &&
2425 	    de.fme_status != DDI_FM_OK) {
2426 		ndi_err_t *errp = (ndi_err_t *)hp->ahi_err;
2427 		rval = DDI_FAILURE;
2428 		errp->err_ena = de.fme_ena;
2429 		errp->err_expected = de.fme_flag;
2430 		errp->err_status = DDI_FM_NONFATAL;
2431 	}
2432 	return (rval);
2433 }
2434 
2435 /*
2436  * pci_peekpoke_check_nofma() is for when an error occurs on a register access
2437  * during pci_ereport_post(). We can't call pci_ereport_post() again or we'd
2438  * recurse, so assume all puts are OK and gets have failed if they return -1
2439  */
2440 static int
2441 pci_peekpoke_check_nofma(void *arg, ddi_ctl_enum_t ctlop)
2442 {
2443 	int rval = DDI_SUCCESS;
2444 	peekpoke_ctlops_t *in_args = (peekpoke_ctlops_t *)arg;
2445 	ddi_acc_impl_t *hp = (ddi_acc_impl_t *)in_args->handle;
2446 	ddi_acc_hdl_t *hdlp = (ddi_acc_hdl_t *)in_args->handle;
2447 	int repcount = in_args->repcount;
2448 
2449 	if (ctlop == DDI_CTLOPS_POKE)
2450 		return (rval);
2451 
2452 	for (; repcount; repcount--) {
2453 		switch (in_args->size) {
2454 		case sizeof (uint8_t):
2455 			if (*(uint8_t *)in_args->host_addr == 0xff)
2456 				rval = DDI_FAILURE;
2457 			break;
2458 		case sizeof (uint16_t):
2459 			if (*(uint16_t *)in_args->host_addr == 0xffff)
2460 				rval = DDI_FAILURE;
2461 			break;
2462 		case sizeof (uint32_t):
2463 			if (*(uint32_t *)in_args->host_addr == 0xffffffff)
2464 				rval = DDI_FAILURE;
2465 			break;
2466 		case sizeof (uint64_t):
2467 			if (*(uint64_t *)in_args->host_addr ==
2468 			    0xffffffffffffffff)
2469 				rval = DDI_FAILURE;
2470 			break;
2471 		}
2472 	}
2473 	if (hdlp->ah_acc.devacc_attr_access != DDI_DEFAULT_ACC &&
2474 	    rval == DDI_FAILURE) {
2475 		ndi_err_t *errp = (ndi_err_t *)hp->ahi_err;
2476 		errp->err_ena = fm_ena_generate(0, FM_ENA_FMT1);
2477 		errp->err_expected = DDI_FM_ERR_UNEXPECTED;
2478 		errp->err_status = DDI_FM_NONFATAL;
2479 	}
2480 	return (rval);
2481 }
2482 
2483 int
2484 pci_peekpoke_check(dev_info_t *dip, dev_info_t *rdip,
2485     ddi_ctl_enum_t ctlop, void *arg, void *result,
2486     int (*handler)(dev_info_t *, dev_info_t *, ddi_ctl_enum_t, void *,
2487     void *), kmutex_t *err_mutexp, kmutex_t *peek_poke_mutexp,
2488     void (*scan)(dev_info_t *, ddi_fm_error_t *))
2489 {
2490 	int rval;
2491 	peekpoke_ctlops_t *in_args = (peekpoke_ctlops_t *)arg;
2492 	ddi_acc_impl_t *hp = (ddi_acc_impl_t *)in_args->handle;
2493 
2494 	/*
2495 	 * this function only supports cautious accesses, not peeks/pokes
2496 	 * which don't have a handle
2497 	 */
2498 	if (hp == NULL)
2499 		return (DDI_FAILURE);
2500 
2501 	if (hp->ahi_acc_attr & DDI_ACCATTR_CONFIG_SPACE) {
2502 		if (!mutex_tryenter(err_mutexp)) {
2503 			/*
2504 			 * As this may be a recursive call from within
2505 			 * pci_ereport_post() we can't wait for the mutexes.
2506 			 * Fortunately we know someone is already calling
2507 			 * pci_ereport_post() which will handle the error bits
2508 			 * for us, and as this is a config space access we can
2509 			 * just do the access and check return value for -1
2510 			 * using pci_peekpoke_check_nofma().
2511 			 */
2512 			rval = handler(dip, rdip, ctlop, arg, result);
2513 			if (rval == DDI_SUCCESS)
2514 				rval = pci_peekpoke_check_nofma(arg, ctlop);
2515 			return (rval);
2516 		}
2517 		/*
2518 		 * This can't be a recursive call. Drop the err_mutex and get
2519 		 * both mutexes in the right order. If an error hasn't already
2520 		 * been detected by the ontrap code, use pci_peekpoke_check_fma
2521 		 * which will call pci_ereport_post() to check error status.
2522 		 */
2523 		mutex_exit(err_mutexp);
2524 	}
2525 	mutex_enter(peek_poke_mutexp);
2526 	rval = handler(dip, rdip, ctlop, arg, result);
2527 	if (rval == DDI_SUCCESS) {
2528 		mutex_enter(err_mutexp);
2529 		rval = pci_peekpoke_check_fma(dip, arg, ctlop, scan);
2530 		mutex_exit(err_mutexp);
2531 	}
2532 	mutex_exit(peek_poke_mutexp);
2533 	return (rval);
2534 }
2535 
2536 void
2537 impl_setup_ddi(void)
2538 {
2539 #if !defined(__xpv)
2540 	extern void startup_bios_disk(void);
2541 	extern int post_fastreboot;
2542 #endif
2543 	dev_info_t *xdip, *isa_dip;
2544 	rd_existing_t rd_mem_prop;
2545 	int err;
2546 
2547 	ndi_devi_alloc_sleep(ddi_root_node(), "ramdisk",
2548 	    (pnode_t)DEVI_SID_NODEID, &xdip);
2549 
2550 	(void) BOP_GETPROP(bootops,
2551 	    "ramdisk_start", (void *)&ramdisk_start);
2552 	(void) BOP_GETPROP(bootops,
2553 	    "ramdisk_end", (void *)&ramdisk_end);
2554 
2555 #ifdef __xpv
2556 	ramdisk_start -= ONE_GIG;
2557 	ramdisk_end -= ONE_GIG;
2558 #endif
2559 	rd_mem_prop.phys = ramdisk_start;
2560 	rd_mem_prop.size = ramdisk_end - ramdisk_start + 1;
2561 
2562 	(void) ndi_prop_update_byte_array(DDI_DEV_T_NONE, xdip,
2563 	    RD_EXISTING_PROP_NAME, (uchar_t *)&rd_mem_prop,
2564 	    sizeof (rd_mem_prop));
2565 	err = ndi_devi_bind_driver(xdip, 0);
2566 	ASSERT(err == 0);
2567 
2568 	/* isa node */
2569 	if (pseudo_isa) {
2570 		ndi_devi_alloc_sleep(ddi_root_node(), "isa",
2571 		    (pnode_t)DEVI_SID_NODEID, &isa_dip);
2572 		(void) ndi_prop_update_string(DDI_DEV_T_NONE, isa_dip,
2573 		    "device_type", "isa");
2574 		(void) ndi_prop_update_string(DDI_DEV_T_NONE, isa_dip,
2575 		    "bus-type", "isa");
2576 		(void) ndi_devi_bind_driver(isa_dip, 0);
2577 	}
2578 
2579 	/*
2580 	 * Read in the properties from the boot.
2581 	 */
2582 	get_boot_properties();
2583 
2584 	/* not framebuffer should be enumerated, if present */
2585 	get_vga_properties();
2586 
2587 	/*
2588 	 * Check for administratively disabled drivers.
2589 	 */
2590 	check_driver_disable();
2591 
2592 #if !defined(__xpv)
2593 	if (!post_fastreboot && BOP_GETPROPLEN(bootops, "efi-systab") < 0)
2594 		startup_bios_disk();
2595 #endif
2596 	/* do bus dependent probes. */
2597 	impl_bus_initialprobe();
2598 }
2599 
2600 dev_t
2601 getrootdev(void)
2602 {
2603 	/*
2604 	 * Usually rootfs.bo_name is initialized by the
2605 	 * the bootpath property from bootenv.rc, but
2606 	 * defaults to "/ramdisk:a" otherwise.
2607 	 */
2608 	return (ddi_pathname_to_dev_t(rootfs.bo_name));
2609 }
2610 
2611 static struct bus_probe {
2612 	struct bus_probe *next;
2613 	void (*probe)(int);
2614 } *bus_probes;
2615 
2616 void
2617 impl_bus_add_probe(void (*func)(int))
2618 {
2619 	struct bus_probe *probe;
2620 	struct bus_probe *lastprobe = NULL;
2621 
2622 	probe = kmem_alloc(sizeof (*probe), KM_SLEEP);
2623 	probe->probe = func;
2624 	probe->next = NULL;
2625 
2626 	if (!bus_probes) {
2627 		bus_probes = probe;
2628 		return;
2629 	}
2630 
2631 	lastprobe = bus_probes;
2632 	while (lastprobe->next)
2633 		lastprobe = lastprobe->next;
2634 	lastprobe->next = probe;
2635 }
2636 
2637 /*ARGSUSED*/
2638 void
2639 impl_bus_delete_probe(void (*func)(int))
2640 {
2641 	struct bus_probe *prev = NULL;
2642 	struct bus_probe *probe = bus_probes;
2643 
2644 	while (probe) {
2645 		if (probe->probe == func)
2646 			break;
2647 		prev = probe;
2648 		probe = probe->next;
2649 	}
2650 
2651 	if (probe == NULL)
2652 		return;
2653 
2654 	if (prev)
2655 		prev->next = probe->next;
2656 	else
2657 		bus_probes = probe->next;
2658 
2659 	kmem_free(probe, sizeof (struct bus_probe));
2660 }
2661 
2662 /*
2663  * impl_bus_initialprobe
2664  *	Modload the prom simulator, then let it probe to verify existence
2665  *	and type of PCI support.
2666  */
2667 static void
2668 impl_bus_initialprobe(void)
2669 {
2670 	struct bus_probe *probe;
2671 
2672 	/* load modules to install bus probes */
2673 #if defined(__xpv)
2674 	if (DOMAIN_IS_INITDOMAIN(xen_info)) {
2675 		if (modload("misc", "pci_autoconfig") < 0) {
2676 			panic("failed to load misc/pci_autoconfig");
2677 		}
2678 
2679 		if (modload("drv", "isa") < 0)
2680 			panic("failed to load drv/isa");
2681 	}
2682 
2683 	(void) modload("misc", "xpv_autoconfig");
2684 #else
2685 	if (modload("misc", "pci_autoconfig") < 0) {
2686 		panic("failed to load misc/pci_autoconfig");
2687 	}
2688 
2689 	(void) modload("misc", "acpidev");
2690 
2691 	if (modload("drv", "isa") < 0)
2692 		panic("failed to load drv/isa");
2693 #endif
2694 
2695 	probe = bus_probes;
2696 	while (probe) {
2697 		/* run the probe functions */
2698 		(*probe->probe)(0);
2699 		probe = probe->next;
2700 	}
2701 }
2702 
2703 /*
2704  * impl_bus_reprobe
2705  *	Reprogram devices not set up by firmware.
2706  */
2707 static void
2708 impl_bus_reprobe(void)
2709 {
2710 	struct bus_probe *probe;
2711 
2712 	probe = bus_probes;
2713 	while (probe) {
2714 		/* run the probe function */
2715 		(*probe->probe)(1);
2716 		probe = probe->next;
2717 	}
2718 }
2719 
2720 
2721 /*
2722  * The following functions ready a cautious request to go up to the nexus
2723  * driver.  It is up to the nexus driver to decide how to process the request.
2724  * It may choose to call i_ddi_do_caut_get/put in this file, or do it
2725  * differently.
2726  */
2727 
2728 static void
2729 i_ddi_caut_getput_ctlops(ddi_acc_impl_t *hp, uint64_t host_addr,
2730     uint64_t dev_addr, size_t size, size_t repcount, uint_t flags,
2731     ddi_ctl_enum_t cmd)
2732 {
2733 	peekpoke_ctlops_t	cautacc_ctlops_arg;
2734 
2735 	cautacc_ctlops_arg.size = size;
2736 	cautacc_ctlops_arg.dev_addr = dev_addr;
2737 	cautacc_ctlops_arg.host_addr = host_addr;
2738 	cautacc_ctlops_arg.handle = (ddi_acc_handle_t)hp;
2739 	cautacc_ctlops_arg.repcount = repcount;
2740 	cautacc_ctlops_arg.flags = flags;
2741 
2742 	(void) ddi_ctlops(hp->ahi_common.ah_dip, hp->ahi_common.ah_dip, cmd,
2743 	    &cautacc_ctlops_arg, NULL);
2744 }
2745 
2746 uint8_t
2747 i_ddi_caut_get8(ddi_acc_impl_t *hp, uint8_t *addr)
2748 {
2749 	uint8_t value;
2750 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)&value, (uintptr_t)addr,
2751 	    sizeof (uint8_t), 1, 0, DDI_CTLOPS_PEEK);
2752 
2753 	return (value);
2754 }
2755 
2756 uint16_t
2757 i_ddi_caut_get16(ddi_acc_impl_t *hp, uint16_t *addr)
2758 {
2759 	uint16_t value;
2760 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)&value, (uintptr_t)addr,
2761 	    sizeof (uint16_t), 1, 0, DDI_CTLOPS_PEEK);
2762 
2763 	return (value);
2764 }
2765 
2766 uint32_t
2767 i_ddi_caut_get32(ddi_acc_impl_t *hp, uint32_t *addr)
2768 {
2769 	uint32_t value;
2770 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)&value, (uintptr_t)addr,
2771 	    sizeof (uint32_t), 1, 0, DDI_CTLOPS_PEEK);
2772 
2773 	return (value);
2774 }
2775 
2776 uint64_t
2777 i_ddi_caut_get64(ddi_acc_impl_t *hp, uint64_t *addr)
2778 {
2779 	uint64_t value;
2780 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)&value, (uintptr_t)addr,
2781 	    sizeof (uint64_t), 1, 0, DDI_CTLOPS_PEEK);
2782 
2783 	return (value);
2784 }
2785 
2786 void
2787 i_ddi_caut_put8(ddi_acc_impl_t *hp, uint8_t *addr, uint8_t value)
2788 {
2789 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)&value, (uintptr_t)addr,
2790 	    sizeof (uint8_t), 1, 0, DDI_CTLOPS_POKE);
2791 }
2792 
2793 void
2794 i_ddi_caut_put16(ddi_acc_impl_t *hp, uint16_t *addr, uint16_t value)
2795 {
2796 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)&value, (uintptr_t)addr,
2797 	    sizeof (uint16_t), 1, 0, DDI_CTLOPS_POKE);
2798 }
2799 
2800 void
2801 i_ddi_caut_put32(ddi_acc_impl_t *hp, uint32_t *addr, uint32_t value)
2802 {
2803 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)&value, (uintptr_t)addr,
2804 	    sizeof (uint32_t), 1, 0, DDI_CTLOPS_POKE);
2805 }
2806 
2807 void
2808 i_ddi_caut_put64(ddi_acc_impl_t *hp, uint64_t *addr, uint64_t value)
2809 {
2810 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)&value, (uintptr_t)addr,
2811 	    sizeof (uint64_t), 1, 0, DDI_CTLOPS_POKE);
2812 }
2813 
2814 void
2815 i_ddi_caut_rep_get8(ddi_acc_impl_t *hp, uint8_t *host_addr, uint8_t *dev_addr,
2816     size_t repcount, uint_t flags)
2817 {
2818 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)host_addr, (uintptr_t)dev_addr,
2819 	    sizeof (uint8_t), repcount, flags, DDI_CTLOPS_PEEK);
2820 }
2821 
2822 void
2823 i_ddi_caut_rep_get16(ddi_acc_impl_t *hp, uint16_t *host_addr,
2824     uint16_t *dev_addr, size_t repcount, uint_t flags)
2825 {
2826 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)host_addr, (uintptr_t)dev_addr,
2827 	    sizeof (uint16_t), repcount, flags, DDI_CTLOPS_PEEK);
2828 }
2829 
2830 void
2831 i_ddi_caut_rep_get32(ddi_acc_impl_t *hp, uint32_t *host_addr,
2832     uint32_t *dev_addr, size_t repcount, uint_t flags)
2833 {
2834 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)host_addr, (uintptr_t)dev_addr,
2835 	    sizeof (uint32_t), repcount, flags, DDI_CTLOPS_PEEK);
2836 }
2837 
2838 void
2839 i_ddi_caut_rep_get64(ddi_acc_impl_t *hp, uint64_t *host_addr,
2840     uint64_t *dev_addr, size_t repcount, uint_t flags)
2841 {
2842 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)host_addr, (uintptr_t)dev_addr,
2843 	    sizeof (uint64_t), repcount, flags, DDI_CTLOPS_PEEK);
2844 }
2845 
2846 void
2847 i_ddi_caut_rep_put8(ddi_acc_impl_t *hp, uint8_t *host_addr, uint8_t *dev_addr,
2848     size_t repcount, uint_t flags)
2849 {
2850 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)host_addr, (uintptr_t)dev_addr,
2851 	    sizeof (uint8_t), repcount, flags, DDI_CTLOPS_POKE);
2852 }
2853 
2854 void
2855 i_ddi_caut_rep_put16(ddi_acc_impl_t *hp, uint16_t *host_addr,
2856     uint16_t *dev_addr, size_t repcount, uint_t flags)
2857 {
2858 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)host_addr, (uintptr_t)dev_addr,
2859 	    sizeof (uint16_t), repcount, flags, DDI_CTLOPS_POKE);
2860 }
2861 
2862 void
2863 i_ddi_caut_rep_put32(ddi_acc_impl_t *hp, uint32_t *host_addr,
2864     uint32_t *dev_addr, size_t repcount, uint_t flags)
2865 {
2866 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)host_addr, (uintptr_t)dev_addr,
2867 	    sizeof (uint32_t), repcount, flags, DDI_CTLOPS_POKE);
2868 }
2869 
2870 void
2871 i_ddi_caut_rep_put64(ddi_acc_impl_t *hp, uint64_t *host_addr,
2872     uint64_t *dev_addr, size_t repcount, uint_t flags)
2873 {
2874 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)host_addr, (uintptr_t)dev_addr,
2875 	    sizeof (uint64_t), repcount, flags, DDI_CTLOPS_POKE);
2876 }
2877 
2878 boolean_t
2879 i_ddi_copybuf_required(ddi_dma_attr_t *attrp)
2880 {
2881 	uint64_t hi_pa;
2882 
2883 	hi_pa = ((uint64_t)physmax + 1ull) << PAGESHIFT;
2884 	if (attrp->dma_attr_addr_hi < hi_pa) {
2885 		return (B_TRUE);
2886 	}
2887 
2888 	return (B_FALSE);
2889 }
2890 
2891 size_t
2892 i_ddi_copybuf_size()
2893 {
2894 	return (dma_max_copybuf_size);
2895 }
2896 
2897 /*
2898  * i_ddi_dma_max()
2899  *    returns the maximum DMA size which can be performed in a single DMA
2900  *    window taking into account the devices DMA contraints (attrp), the
2901  *    maximum copy buffer size (if applicable), and the worse case buffer
2902  *    fragmentation.
2903  */
2904 /*ARGSUSED*/
2905 uint32_t
2906 i_ddi_dma_max(dev_info_t *dip, ddi_dma_attr_t *attrp)
2907 {
2908 	uint64_t maxxfer;
2909 
2910 
2911 	/*
2912 	 * take the min of maxxfer and the the worse case fragementation
2913 	 * (e.g. every cookie <= 1 page)
2914 	 */
2915 	maxxfer = MIN(attrp->dma_attr_maxxfer,
2916 	    ((uint64_t)(attrp->dma_attr_sgllen - 1) << PAGESHIFT));
2917 
2918 	/*
2919 	 * If the DMA engine can't reach all off memory, we also need to take
2920 	 * the max size of the copybuf into consideration.
2921 	 */
2922 	if (i_ddi_copybuf_required(attrp)) {
2923 		maxxfer = MIN(i_ddi_copybuf_size(), maxxfer);
2924 	}
2925 
2926 	/*
2927 	 * we only return a 32-bit value. Make sure it's not -1. Round to a
2928 	 * page so it won't be mistaken for an error value during debug.
2929 	 */
2930 	if (maxxfer >= 0xFFFFFFFF) {
2931 		maxxfer = 0xFFFFF000;
2932 	}
2933 
2934 	/*
2935 	 * make sure the value we return is a whole multiple of the
2936 	 * granlarity.
2937 	 */
2938 	if (attrp->dma_attr_granular > 1) {
2939 		maxxfer = maxxfer - (maxxfer % attrp->dma_attr_granular);
2940 	}
2941 
2942 	return ((uint32_t)maxxfer);
2943 }
2944 
2945 /*ARGSUSED*/
2946 void
2947 translate_devid(dev_info_t *dip)
2948 {
2949 }
2950 
2951 pfn_t
2952 i_ddi_paddr_to_pfn(paddr_t paddr)
2953 {
2954 	pfn_t pfn;
2955 
2956 #ifdef __xpv
2957 	if (DOMAIN_IS_INITDOMAIN(xen_info)) {
2958 		pfn = xen_assign_pfn(mmu_btop(paddr));
2959 	} else {
2960 		pfn = mmu_btop(paddr);
2961 	}
2962 #else
2963 	pfn = mmu_btop(paddr);
2964 #endif
2965 
2966 	return (pfn);
2967 }
2968