xref: /titanic_52/usr/src/uts/i86pc/io/pci/pci_tools.c (revision e23347b1b88ce2c0847fad6e9467a1f953597aa7)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <sys/types.h>
27 #include <sys/mkdev.h>
28 #include <sys/stat.h>
29 #include <sys/sunddi.h>
30 #include <vm/seg_kmem.h>
31 #include <sys/machparam.h>
32 #include <sys/sunndi.h>
33 #include <sys/ontrap.h>
34 #include <sys/psm.h>
35 #include <sys/pcie.h>
36 #include <sys/hotplug/pci/pcihp.h>
37 #include <sys/pci_cfgspace.h>
38 #include <sys/pci_tools.h>
39 #include <io/pci/pci_tools_ext.h>
40 #include <sys/apic.h>
41 #include <io/pci/pci_var.h>
42 #include <sys/promif.h>
43 #include <sys/x86_archext.h>
44 #include <sys/cpuvar.h>
45 
46 #ifdef __xpv
47 #include <sys/hypervisor.h>
48 #endif
49 
50 #define	PCIEX_BDF_OFFSET_DELTA	4
51 #define	PCIEX_REG_FUNC_SHIFT	(PCI_REG_FUNC_SHIFT + PCIEX_BDF_OFFSET_DELTA)
52 #define	PCIEX_REG_DEV_SHIFT	(PCI_REG_DEV_SHIFT + PCIEX_BDF_OFFSET_DELTA)
53 #define	PCIEX_REG_BUS_SHIFT	(PCI_REG_BUS_SHIFT + PCIEX_BDF_OFFSET_DELTA)
54 
55 #define	SUCCESS	0
56 
57 int pcitool_debug = 0;
58 
59 /*
60  * Offsets of BARS in config space.  First entry of 0 means config space.
61  * Entries here correlate to pcitool_bars_t enumerated type.
62  */
63 static uint8_t pci_bars[] = {
64 	0x0,
65 	PCI_CONF_BASE0,
66 	PCI_CONF_BASE1,
67 	PCI_CONF_BASE2,
68 	PCI_CONF_BASE3,
69 	PCI_CONF_BASE4,
70 	PCI_CONF_BASE5,
71 	PCI_CONF_ROM
72 };
73 
74 /* Max offset allowed into config space for a particular device. */
75 static uint64_t max_cfg_size = PCI_CONF_HDR_SIZE;
76 
77 static uint64_t pcitool_swap_endian(uint64_t data, int size);
78 static int pcitool_pciex_cfg_access(dev_info_t *dip, pcitool_reg_t *prg,
79     boolean_t write_flag);
80 static int pcitool_cfg_access(dev_info_t *dip, pcitool_reg_t *prg,
81     boolean_t write_flag);
82 static int pcitool_io_access(dev_info_t *dip, pcitool_reg_t *prg,
83     boolean_t write_flag);
84 static int pcitool_mem_access(dev_info_t *dip, pcitool_reg_t *prg,
85     uint64_t virt_addr, boolean_t write_flag);
86 static uint64_t pcitool_map(uint64_t phys_addr, size_t size, size_t *num_pages);
87 static void pcitool_unmap(uint64_t virt_addr, size_t num_pages);
88 
89 /* Extern declarations */
90 extern int	(*psm_intr_ops)(dev_info_t *, ddi_intr_handle_impl_t *,
91 		    psm_intr_op_t, int *);
92 
93 int
94 pcitool_init(dev_info_t *dip, boolean_t is_pciex)
95 {
96 	int instance = ddi_get_instance(dip);
97 
98 	/* Create pcitool nodes for register access and interrupt routing. */
99 
100 	if (ddi_create_minor_node(dip, PCI_MINOR_REG, S_IFCHR,
101 	    PCIHP_AP_MINOR_NUM(instance, PCI_TOOL_REG_MINOR_NUM),
102 	    DDI_NT_REGACC, 0) != DDI_SUCCESS) {
103 		return (DDI_FAILURE);
104 	}
105 
106 	if (ddi_create_minor_node(dip, PCI_MINOR_INTR, S_IFCHR,
107 	    PCIHP_AP_MINOR_NUM(instance, PCI_TOOL_INTR_MINOR_NUM),
108 	    DDI_NT_INTRCTL, 0) != DDI_SUCCESS) {
109 		ddi_remove_minor_node(dip, PCI_MINOR_REG);
110 		return (DDI_FAILURE);
111 	}
112 
113 	if (is_pciex)
114 		max_cfg_size = PCIE_CONF_HDR_SIZE;
115 
116 	return (DDI_SUCCESS);
117 }
118 
119 void
120 pcitool_uninit(dev_info_t *dip)
121 {
122 	ddi_remove_minor_node(dip, PCI_MINOR_INTR);
123 	ddi_remove_minor_node(dip, PCI_MINOR_REG);
124 }
125 
126 /*ARGSUSED*/
127 static int
128 pcitool_set_intr(dev_info_t *dip, void *arg, int mode)
129 {
130 	ddi_intr_handle_impl_t info_hdl;
131 	pcitool_intr_set_t iset;
132 	uint32_t old_cpu;
133 	int ret, result;
134 	size_t copyinout_size;
135 	int rval = SUCCESS;
136 
137 	/* Version 1 of pcitool_intr_set_t doesn't have flags. */
138 	copyinout_size = (size_t)&iset.flags - (size_t)&iset;
139 
140 	if (ddi_copyin(arg, &iset, copyinout_size, mode) != DDI_SUCCESS)
141 		return (EFAULT);
142 
143 	switch (iset.user_version) {
144 	case PCITOOL_V1:
145 		break;
146 
147 	case PCITOOL_V2:
148 		copyinout_size = sizeof (pcitool_intr_set_t);
149 		if (ddi_copyin(arg, &iset, copyinout_size, mode) != DDI_SUCCESS)
150 			return (EFAULT);
151 		break;
152 
153 	default:
154 		iset.status = PCITOOL_OUT_OF_RANGE;
155 		rval = ENOTSUP;
156 		goto done_set_intr;
157 	}
158 
159 	if (iset.flags & PCITOOL_INTR_FLAG_SET_MSI) {
160 		rval = ENOTSUP;
161 		iset.status = PCITOOL_IO_ERROR;
162 		goto done_set_intr;
163 	}
164 
165 	if (iset.ino > APIC_MAX_VECTOR) {
166 		rval = EINVAL;
167 		iset.status = PCITOOL_INVALID_INO;
168 		goto done_set_intr;
169 	}
170 
171 	iset.status = PCITOOL_SUCCESS;
172 
173 	if ((old_cpu = pci_get_cpu_from_vecirq(iset.ino, IS_VEC)) == -1) {
174 		iset.status = PCITOOL_IO_ERROR;
175 		rval = EINVAL;
176 		goto done_set_intr;
177 	}
178 
179 
180 	old_cpu &= ~PSMGI_CPU_USER_BOUND;
181 
182 	/*
183 	 * For this locally-declared and used handle, ih_private will contain a
184 	 * CPU value, not an ihdl_plat_t as used for global interrupt handling.
185 	 */
186 	info_hdl.ih_vector = iset.ino;
187 	info_hdl.ih_private = (void *)(uintptr_t)iset.cpu_id;
188 	info_hdl.ih_flags = PSMGI_INTRBY_VEC;
189 	if (pcitool_debug)
190 		prom_printf("user version:%d, flags:0x%x\n",
191 		    iset.user_version, iset.flags);
192 
193 	result = ENOTSUP;
194 	if ((iset.user_version >= PCITOOL_V2) &&
195 	    (iset.flags & PCITOOL_INTR_FLAG_SET_GROUP)) {
196 		ret = (*psm_intr_ops)(NULL, &info_hdl, PSM_INTR_OP_GRP_SET_CPU,
197 		    &result);
198 	} else {
199 		ret = (*psm_intr_ops)(NULL, &info_hdl, PSM_INTR_OP_SET_CPU,
200 		    &result);
201 	}
202 
203 	if (ret != PSM_SUCCESS) {
204 		switch (result) {
205 		case EIO:		/* Error making the change */
206 			rval = EIO;
207 			iset.status = PCITOOL_IO_ERROR;
208 			break;
209 		case ENXIO:		/* Couldn't convert vector to irq */
210 			rval = EINVAL;
211 			iset.status = PCITOOL_INVALID_INO;
212 			break;
213 		case EINVAL:		/* CPU out of range */
214 			rval = EINVAL;
215 			iset.status = PCITOOL_INVALID_CPUID;
216 			break;
217 		case ENOTSUP:		/* Requested PSM intr ops missing */
218 			rval = ENOTSUP;
219 			iset.status = PCITOOL_IO_ERROR;
220 			break;
221 		}
222 	}
223 
224 	/* Return original CPU. */
225 	iset.cpu_id = old_cpu;
226 
227 done_set_intr:
228 	iset.drvr_version = PCITOOL_VERSION;
229 	if (ddi_copyout(&iset, arg, copyinout_size, mode) != DDI_SUCCESS)
230 		rval = EFAULT;
231 	return (rval);
232 }
233 
234 
235 /* It is assumed that dip != NULL */
236 static void
237 pcitool_get_intr_dev_info(dev_info_t *dip, pcitool_intr_dev_t *devs)
238 {
239 	(void) strncpy(devs->driver_name,
240 	    ddi_driver_name(dip), MAXMODCONFNAME-2);
241 	devs->driver_name[MAXMODCONFNAME-1] = '\0';
242 	(void) ddi_pathname(dip, devs->path);
243 	devs->dev_inst = ddi_get_instance(dip);
244 }
245 
246 
247 /*ARGSUSED*/
248 static int
249 pcitool_get_intr(dev_info_t *dip, void *arg, int mode)
250 {
251 	/* Array part isn't used here, but oh well... */
252 	pcitool_intr_get_t partial_iget;
253 	pcitool_intr_get_t *iget = &partial_iget;
254 	size_t	iget_kmem_alloc_size = 0;
255 	uint8_t num_devs_ret;
256 	int copyout_rval;
257 	int rval = SUCCESS;
258 	int circ;
259 	int i;
260 
261 	ddi_intr_handle_impl_t info_hdl;
262 	apic_get_intr_t intr_info;
263 
264 	/* Read in just the header part, no array section. */
265 	if (ddi_copyin(arg, &partial_iget, PCITOOL_IGET_SIZE(0), mode) !=
266 	    DDI_SUCCESS)
267 		return (EFAULT);
268 
269 	if (partial_iget.flags & PCITOOL_INTR_FLAG_GET_MSI) {
270 		partial_iget.status = PCITOOL_IO_ERROR;
271 		partial_iget.num_devs_ret = 0;
272 		rval = ENOTSUP;
273 		goto done_get_intr;
274 	}
275 
276 	/* Validate argument. */
277 	if (partial_iget.ino > APIC_MAX_VECTOR) {
278 		partial_iget.status = PCITOOL_INVALID_INO;
279 		partial_iget.num_devs_ret = 0;
280 		rval = EINVAL;
281 		goto done_get_intr;
282 	}
283 
284 	num_devs_ret = partial_iget.num_devs_ret;
285 	intr_info.avgi_dip_list = NULL;
286 	intr_info.avgi_req_flags =
287 	    PSMGI_REQ_CPUID | PSMGI_REQ_NUM_DEVS | PSMGI_INTRBY_VEC;
288 	/*
289 	 * For this locally-declared and used handle, ih_private will contain a
290 	 * pointer to apic_get_intr_t, not an ihdl_plat_t as used for
291 	 * global interrupt handling.
292 	 */
293 	info_hdl.ih_private = &intr_info;
294 	info_hdl.ih_vector = partial_iget.ino;
295 
296 	/* Caller wants device information returned. */
297 	if (num_devs_ret > 0) {
298 
299 		intr_info.avgi_req_flags |= PSMGI_REQ_GET_DEVS;
300 
301 		/*
302 		 * Allocate room.
303 		 * If num_devs_ret == 0 iget remains pointing to partial_iget.
304 		 */
305 		iget_kmem_alloc_size = PCITOOL_IGET_SIZE(num_devs_ret);
306 		iget = kmem_alloc(iget_kmem_alloc_size, KM_SLEEP);
307 
308 		/* Read in whole structure to verify there's room. */
309 		if (ddi_copyin(arg, iget, iget_kmem_alloc_size, mode) !=
310 		    SUCCESS) {
311 
312 			/* Be consistent and just return EFAULT here. */
313 			kmem_free(iget, iget_kmem_alloc_size);
314 
315 			return (EFAULT);
316 		}
317 	}
318 
319 	bzero(iget, PCITOOL_IGET_SIZE(num_devs_ret));
320 	iget->ino = info_hdl.ih_vector;
321 
322 	/*
323 	 * Lock device tree branch from the pci root nexus on down if info will
324 	 * be extracted from dips returned from the tree.
325 	 */
326 	if (intr_info.avgi_req_flags & PSMGI_REQ_GET_DEVS) {
327 		ndi_devi_enter(dip, &circ);
328 	}
329 
330 	/* Call psm_intr_ops(PSM_INTR_OP_GET_INTR) to get information. */
331 	if ((rval = (*psm_intr_ops)(NULL, &info_hdl,
332 	    PSM_INTR_OP_GET_INTR, NULL)) != PSM_SUCCESS) {
333 		iget->status = PCITOOL_IO_ERROR;
334 		iget->num_devs_ret = 0;
335 		rval = EINVAL;
336 		goto done_get_intr;
337 	}
338 
339 	/*
340 	 * Fill in the pcitool_intr_get_t to be returned,
341 	 * with the CPU, num_devs_ret and num_devs.
342 	 */
343 	iget->cpu_id = intr_info.avgi_cpu_id & ~PSMGI_CPU_USER_BOUND;
344 
345 	/* Number of devices returned by apic. */
346 	iget->num_devs = intr_info.avgi_num_devs;
347 
348 	/* Device info was returned. */
349 	if (intr_info.avgi_req_flags & PSMGI_REQ_GET_DEVS) {
350 
351 		/*
352 		 * num devs returned is num devs ret by apic,
353 		 * space permitting.
354 		 */
355 		iget->num_devs_ret = min(num_devs_ret, intr_info.avgi_num_devs);
356 
357 		/*
358 		 * Loop thru list of dips and extract driver, name and instance.
359 		 * Fill in the pcitool_intr_dev_t's with this info.
360 		 */
361 		for (i = 0; i < iget->num_devs_ret; i++)
362 			pcitool_get_intr_dev_info(intr_info.avgi_dip_list[i],
363 			    &iget->dev[i]);
364 
365 		/* Free kmem_alloc'ed memory of the apic_get_intr_t */
366 		kmem_free(intr_info.avgi_dip_list,
367 		    intr_info.avgi_num_devs * sizeof (dev_info_t *));
368 	}
369 
370 done_get_intr:
371 
372 	if (intr_info.avgi_req_flags & PSMGI_REQ_GET_DEVS) {
373 		ndi_devi_exit(dip, circ);
374 	}
375 
376 	iget->drvr_version = PCITOOL_VERSION;
377 	copyout_rval = ddi_copyout(iget, arg,
378 	    PCITOOL_IGET_SIZE(num_devs_ret), mode);
379 
380 	if (iget_kmem_alloc_size > 0)
381 		kmem_free(iget, iget_kmem_alloc_size);
382 
383 	if (copyout_rval != DDI_SUCCESS)
384 		rval = EFAULT;
385 
386 	return (rval);
387 }
388 
389 /*ARGSUSED*/
390 static int
391 pcitool_intr_info(dev_info_t *dip, void *arg, int mode)
392 {
393 	pcitool_intr_info_t intr_info;
394 	ddi_intr_handle_impl_t info_hdl;
395 	int rval = SUCCESS;
396 
397 	/* If we need user_version, and to ret same user version as passed in */
398 	if (ddi_copyin(arg, &intr_info, sizeof (pcitool_intr_info_t), mode) !=
399 	    DDI_SUCCESS) {
400 		if (pcitool_debug)
401 			prom_printf("Error reading arguments\n");
402 		return (EFAULT);
403 	}
404 
405 	if (intr_info.flags & PCITOOL_INTR_FLAG_GET_MSI)
406 		return (ENOTSUP);
407 
408 	/* For UPPC systems, psm_intr_ops has no entry for APIC_TYPE. */
409 	if ((rval = (*psm_intr_ops)(NULL, &info_hdl,
410 	    PSM_INTR_OP_APIC_TYPE, NULL)) != PSM_SUCCESS) {
411 		intr_info.ctlr_type = PCITOOL_CTLR_TYPE_UPPC;
412 		intr_info.ctlr_version = 0;
413 
414 	} else {
415 		intr_info.ctlr_version = (uint32_t)info_hdl.ih_ver;
416 		if (strcmp((char *)info_hdl.ih_private,
417 		    APIC_PCPLUSMP_NAME) == 0)
418 			intr_info.ctlr_type = PCITOOL_CTLR_TYPE_PCPLUSMP;
419 		else
420 			intr_info.ctlr_type = PCITOOL_CTLR_TYPE_UNKNOWN;
421 	}
422 
423 	intr_info.num_intr = APIC_MAX_VECTOR;
424 	intr_info.drvr_version = PCITOOL_VERSION;
425 	if (ddi_copyout(&intr_info, arg, sizeof (pcitool_intr_info_t), mode) !=
426 	    DDI_SUCCESS) {
427 		if (pcitool_debug)
428 			prom_printf("Error returning arguments.\n");
429 		rval = EFAULT;
430 	}
431 
432 	return (rval);
433 }
434 
435 
436 
437 /*
438  * Main function for handling interrupt CPU binding requests and queries.
439  * Need to implement later
440  */
441 /*ARGSUSED*/
442 int
443 pcitool_intr_admn(dev_info_t *dip, void *arg, int cmd, int mode)
444 {
445 	int rval;
446 
447 	switch (cmd) {
448 
449 	/* Associate a new CPU with a given vector */
450 	case PCITOOL_DEVICE_SET_INTR:
451 		rval = pcitool_set_intr(dip, arg, mode);
452 		break;
453 
454 	case PCITOOL_DEVICE_GET_INTR:
455 		rval = pcitool_get_intr(dip, arg, mode);
456 		break;
457 
458 	case PCITOOL_SYSTEM_INTR_INFO:
459 		rval = pcitool_intr_info(dip, arg, mode);
460 		break;
461 
462 	default:
463 		rval = ENOTSUP;
464 	}
465 
466 	return (rval);
467 }
468 
469 
470 /*
471  * A note about ontrap handling:
472  *
473  * X86 systems on which this module was tested return FFs instead of bus errors
474  * when accessing devices with invalid addresses.  Ontrap handling, which
475  * gracefully handles kernel bus errors, is installed anyway, in case future
476  * X86 platforms require it.
477  */
478 
479 /*
480  * Perform register accesses on the nexus device itself.
481  * No explicit PCI nexus device for X86, so not applicable.
482  */
483 
484 /*ARGSUSED*/
485 int
486 pcitool_bus_reg_ops(dev_info_t *dip, void *arg, int cmd, int mode)
487 {
488 	return (ENOTSUP);
489 }
490 
491 /* Swap endianness. */
492 static uint64_t
493 pcitool_swap_endian(uint64_t data, int size)
494 {
495 	typedef union {
496 		uint64_t data64;
497 		uint8_t data8[8];
498 	} data_split_t;
499 
500 	data_split_t orig_data;
501 	data_split_t returned_data;
502 	int i;
503 
504 	orig_data.data64 = data;
505 	returned_data.data64 = 0;
506 
507 	for (i = 0; i < size; i++) {
508 		returned_data.data8[i] = orig_data.data8[size - 1 - i];
509 	}
510 
511 	return (returned_data.data64);
512 }
513 
514 
515 /*
516  * Access device.  prg is modified.
517  *
518  * Extended config space is available only through memory-mapped access.
519  * Standard config space on pci express devices is available either way,
520  * so do it memory-mapped here too, for simplicity, if allowed by MCFG.
521  * If anything fails, return EINVAL so caller can try I/O access.
522  */
523 /*ARGSUSED*/
524 static int
525 pcitool_pciex_cfg_access(dev_info_t *dip, pcitool_reg_t *prg,
526     boolean_t write_flag)
527 {
528 	int rval = SUCCESS;
529 	uint64_t virt_addr;
530 	size_t	num_virt_pages;
531 	int first_bus, last_bus;
532 	int64_t *ecfginfo;
533 	uint_t nelem;
534 
535 	prg->status = PCITOOL_SUCCESS;
536 
537 	if (ddi_prop_lookup_int64_array(DDI_DEV_T_ANY, dip, 0,
538 	    "ecfg", &ecfginfo, &nelem) == DDI_PROP_SUCCESS) {
539 
540 		/*
541 		 * We must have a four-element property; base addr [0] must
542 		 * be nonzero.  Also, segment [1] must be 0 for now; we don't
543 		 * handle nonzero segments (or create a property containing
544 		 * them)
545 		 */
546 		if ((nelem != 4) || (ecfginfo[0] == 0) || (ecfginfo[1] != 0)) {
547 			ddi_prop_free(ecfginfo);
548 			return (EINVAL);
549 		}
550 
551 		prg->phys_addr = ecfginfo[0];
552 		first_bus = ecfginfo[2];
553 		last_bus = ecfginfo[3];
554 
555 		ddi_prop_free(ecfginfo);
556 
557 		if (prg->bus_no < first_bus || prg->bus_no > last_bus)
558 			return (EINVAL);
559 	} else {
560 		return (EINVAL);
561 	}
562 
563 	prg->phys_addr += prg->offset +
564 	    ((prg->bus_no << PCIEX_REG_BUS_SHIFT) |
565 	    (prg->dev_no << PCIEX_REG_DEV_SHIFT) |
566 	    (prg->func_no << PCIEX_REG_FUNC_SHIFT));
567 
568 	virt_addr = pcitool_map(prg->phys_addr,
569 	    PCITOOL_ACC_ATTR_SIZE(prg->acc_attr), &num_virt_pages);
570 
571 	if (virt_addr == NULL)
572 		return (EINVAL);
573 
574 	rval = pcitool_mem_access(dip, prg, virt_addr, write_flag);
575 	pcitool_unmap(virt_addr, num_virt_pages);
576 	return (rval);
577 }
578 
579 /* Access device.  prg is modified. */
580 /*ARGSUSED*/
581 static int
582 pcitool_cfg_access(dev_info_t *dip, pcitool_reg_t *prg, boolean_t write_flag)
583 {
584 	int size = PCITOOL_ACC_ATTR_SIZE(prg->acc_attr);
585 	boolean_t big_endian = PCITOOL_ACC_IS_BIG_ENDIAN(prg->acc_attr);
586 	int rval = SUCCESS;
587 	uint64_t local_data;
588 
589 	/*
590 	 * NOTE: there is no way to verify whether or not the address is
591 	 * valid other than that it is within the maximum offset.  The
592 	 * put functions return void and the get functions return ff on
593 	 * error.
594 	 */
595 
596 	if (prg->offset + size - 1 > 0xFF) {
597 		prg->status = PCITOOL_INVALID_ADDRESS;
598 		return (ENOTSUP);
599 	}
600 
601 	prg->status = PCITOOL_SUCCESS;
602 
603 	if (write_flag) {
604 
605 		if (big_endian) {
606 			local_data = pcitool_swap_endian(prg->data, size);
607 		} else {
608 			local_data = prg->data;
609 		}
610 
611 		switch (size) {
612 		case 1:
613 			(*pci_putb_func)(prg->bus_no, prg->dev_no,
614 			    prg->func_no, prg->offset, local_data);
615 			break;
616 		case 2:
617 			(*pci_putw_func)(prg->bus_no, prg->dev_no,
618 			    prg->func_no, prg->offset, local_data);
619 			break;
620 		case 4:
621 			(*pci_putl_func)(prg->bus_no, prg->dev_no,
622 			    prg->func_no, prg->offset, local_data);
623 			break;
624 		default:
625 			rval = ENOTSUP;
626 			prg->status = PCITOOL_INVALID_SIZE;
627 			break;
628 		}
629 	} else {
630 		switch (size) {
631 		case 1:
632 			local_data = (*pci_getb_func)(prg->bus_no, prg->dev_no,
633 			    prg->func_no, prg->offset);
634 			break;
635 		case 2:
636 			local_data = (*pci_getw_func)(prg->bus_no, prg->dev_no,
637 			    prg->func_no, prg->offset);
638 			break;
639 		case 4:
640 			local_data = (*pci_getl_func)(prg->bus_no, prg->dev_no,
641 			    prg->func_no, prg->offset);
642 			break;
643 		default:
644 			rval = ENOTSUP;
645 			prg->status = PCITOOL_INVALID_SIZE;
646 			break;
647 		}
648 
649 		if (rval == SUCCESS) {
650 			if (big_endian) {
651 				prg->data =
652 				    pcitool_swap_endian(local_data, size);
653 			} else {
654 				prg->data = local_data;
655 			}
656 		}
657 	}
658 	prg->phys_addr = 0;	/* Config space is not memory mapped on X86. */
659 	return (rval);
660 }
661 
662 
663 /*ARGSUSED*/
664 static int
665 pcitool_io_access(dev_info_t *dip, pcitool_reg_t *prg, boolean_t write_flag)
666 {
667 	int port = (int)prg->phys_addr;
668 	size_t size = PCITOOL_ACC_ATTR_SIZE(prg->acc_attr);
669 	boolean_t big_endian = PCITOOL_ACC_IS_BIG_ENDIAN(prg->acc_attr);
670 	int rval = SUCCESS;
671 	on_trap_data_t otd;
672 	uint64_t local_data;
673 
674 
675 	/*
676 	 * on_trap works like setjmp.
677 	 *
678 	 * A non-zero return here means on_trap has returned from an error.
679 	 *
680 	 * A zero return here means that on_trap has just returned from setup.
681 	 */
682 	if (on_trap(&otd, OT_DATA_ACCESS)) {
683 		no_trap();
684 		if (pcitool_debug)
685 			prom_printf(
686 			    "pcitool_io_access: on_trap caught an error...\n");
687 		prg->status = PCITOOL_INVALID_ADDRESS;
688 		return (EFAULT);
689 	}
690 
691 	if (write_flag) {
692 
693 		if (big_endian) {
694 			local_data = pcitool_swap_endian(prg->data, size);
695 		} else {
696 			local_data = prg->data;
697 		}
698 
699 		if (pcitool_debug)
700 			prom_printf("Writing %ld byte(s) to port 0x%x\n",
701 			    size, port);
702 
703 		switch (size) {
704 		case 1:
705 			outb(port, (uint8_t)local_data);
706 			break;
707 		case 2:
708 			outw(port, (uint16_t)local_data);
709 			break;
710 		case 4:
711 			outl(port, (uint32_t)local_data);
712 			break;
713 		default:
714 			rval = ENOTSUP;
715 			prg->status = PCITOOL_INVALID_SIZE;
716 			break;
717 		}
718 	} else {
719 		if (pcitool_debug)
720 			prom_printf("Reading %ld byte(s) from port 0x%x\n",
721 			    size, port);
722 
723 		switch (size) {
724 		case 1:
725 			local_data = inb(port);
726 			break;
727 		case 2:
728 			local_data = inw(port);
729 			break;
730 		case 4:
731 			local_data = inl(port);
732 			break;
733 		default:
734 			rval = ENOTSUP;
735 			prg->status = PCITOOL_INVALID_SIZE;
736 			break;
737 		}
738 
739 		if (rval == SUCCESS) {
740 			if (big_endian) {
741 				prg->data =
742 				    pcitool_swap_endian(local_data, size);
743 			} else {
744 				prg->data = local_data;
745 			}
746 		}
747 	}
748 
749 	no_trap();
750 	return (rval);
751 }
752 
753 /*ARGSUSED*/
754 static int
755 pcitool_mem_access(dev_info_t *dip, pcitool_reg_t *prg, uint64_t virt_addr,
756 	boolean_t write_flag)
757 {
758 	size_t size = PCITOOL_ACC_ATTR_SIZE(prg->acc_attr);
759 	boolean_t big_endian = PCITOOL_ACC_IS_BIG_ENDIAN(prg->acc_attr);
760 	int rval = DDI_SUCCESS;
761 	on_trap_data_t otd;
762 	uint64_t local_data;
763 
764 	/*
765 	 * on_trap works like setjmp.
766 	 *
767 	 * A non-zero return here means on_trap has returned from an error.
768 	 *
769 	 * A zero return here means that on_trap has just returned from setup.
770 	 */
771 	if (on_trap(&otd, OT_DATA_ACCESS)) {
772 		no_trap();
773 		if (pcitool_debug)
774 			prom_printf(
775 			    "pcitool_mem_access: on_trap caught an error...\n");
776 		prg->status = PCITOOL_INVALID_ADDRESS;
777 		return (EFAULT);
778 	}
779 
780 	if (write_flag) {
781 
782 		if (big_endian) {
783 			local_data = pcitool_swap_endian(prg->data, size);
784 		} else {
785 			local_data = prg->data;
786 		}
787 
788 		switch (size) {
789 		case 1:
790 			*((uint8_t *)(uintptr_t)virt_addr) = local_data;
791 			break;
792 		case 2:
793 			*((uint16_t *)(uintptr_t)virt_addr) = local_data;
794 			break;
795 		case 4:
796 			*((uint32_t *)(uintptr_t)virt_addr) = local_data;
797 			break;
798 		case 8:
799 			*((uint64_t *)(uintptr_t)virt_addr) = local_data;
800 			break;
801 		default:
802 			rval = ENOTSUP;
803 			prg->status = PCITOOL_INVALID_SIZE;
804 			break;
805 		}
806 	} else {
807 		switch (size) {
808 		case 1:
809 			local_data = *((uint8_t *)(uintptr_t)virt_addr);
810 			break;
811 		case 2:
812 			local_data = *((uint16_t *)(uintptr_t)virt_addr);
813 			break;
814 		case 4:
815 			local_data = *((uint32_t *)(uintptr_t)virt_addr);
816 			break;
817 		case 8:
818 			local_data = *((uint64_t *)(uintptr_t)virt_addr);
819 			break;
820 		default:
821 			rval = ENOTSUP;
822 			prg->status = PCITOOL_INVALID_SIZE;
823 			break;
824 		}
825 
826 		if (rval == SUCCESS) {
827 			if (big_endian) {
828 				prg->data =
829 				    pcitool_swap_endian(local_data, size);
830 			} else {
831 				prg->data = local_data;
832 			}
833 		}
834 	}
835 
836 	no_trap();
837 	return (rval);
838 }
839 
840 /*
841  * Map up to 2 pages which contain the address we want to access.
842  *
843  * Mapping should span no more than 8 bytes.  With X86 it is possible for an
844  * 8 byte value to start on a 4 byte boundary, so it can cross a page boundary.
845  * We'll never have to map more than two pages.
846  */
847 
848 static uint64_t
849 pcitool_map(uint64_t phys_addr, size_t size, size_t *num_pages)
850 {
851 
852 	uint64_t page_base = phys_addr & ~MMU_PAGEOFFSET;
853 	uint64_t offset = phys_addr & MMU_PAGEOFFSET;
854 	void *virt_base;
855 	uint64_t returned_addr;
856 	pfn_t pfn;
857 
858 	if (pcitool_debug)
859 		prom_printf("pcitool_map: Called with PA:0x%p\n",
860 		    (void *)(uintptr_t)phys_addr);
861 
862 	*num_pages = 1;
863 
864 	/* Desired mapping would span more than two pages. */
865 	if ((offset + size) > (MMU_PAGESIZE * 2)) {
866 		if (pcitool_debug)
867 			prom_printf("boundary violation: "
868 			    "offset:0x%" PRIx64 ", size:%ld, pagesize:0x%lx\n",
869 			    offset, (uintptr_t)size, (uintptr_t)MMU_PAGESIZE);
870 		return (NULL);
871 
872 	} else if ((offset + size) > MMU_PAGESIZE) {
873 		(*num_pages)++;
874 	}
875 
876 	/* Get page(s) of virtual space. */
877 	virt_base = vmem_alloc(heap_arena, ptob(*num_pages), VM_NOSLEEP);
878 	if (virt_base == NULL) {
879 		if (pcitool_debug)
880 			prom_printf("Couldn't get virtual base address.\n");
881 		return (NULL);
882 	}
883 
884 	if (pcitool_debug)
885 		prom_printf("Got base virtual address:0x%p\n", virt_base);
886 
887 #ifdef __xpv
888 	/*
889 	 * We should only get here if we are dom0.
890 	 * We're using a real device so we need to translate the MA to a PFN.
891 	 */
892 	ASSERT(DOMAIN_IS_INITDOMAIN(xen_info));
893 	pfn = xen_assign_pfn(mmu_btop(page_base));
894 #else
895 	pfn = btop(page_base);
896 #endif
897 
898 	/* Now map the allocated virtual space to the physical address. */
899 	hat_devload(kas.a_hat, virt_base, mmu_ptob(*num_pages), pfn,
900 	    PROT_READ | PROT_WRITE | HAT_STRICTORDER,
901 	    HAT_LOAD_LOCK);
902 
903 	returned_addr = ((uintptr_t)(virt_base)) + offset;
904 
905 	if (pcitool_debug)
906 		prom_printf("pcitool_map: returning VA:0x%p\n",
907 		    (void *)(uintptr_t)returned_addr);
908 
909 	return (returned_addr);
910 }
911 
912 /* Unmap the mapped page(s). */
913 static void
914 pcitool_unmap(uint64_t virt_addr, size_t num_pages)
915 {
916 	void *base_virt_addr = (void *)(uintptr_t)(virt_addr & ~MMU_PAGEOFFSET);
917 
918 	hat_unload(kas.a_hat, base_virt_addr, ptob(num_pages),
919 	    HAT_UNLOAD_UNLOCK);
920 	vmem_free(heap_arena, base_virt_addr, ptob(num_pages));
921 }
922 
923 
924 /* Perform register accesses on PCI leaf devices. */
925 int
926 pcitool_dev_reg_ops(dev_info_t *dip, void *arg, int cmd, int mode)
927 {
928 	boolean_t	write_flag = B_FALSE;
929 	int		rval = 0;
930 	pcitool_reg_t	prg;
931 	uint8_t		size;
932 
933 	uint64_t	base_addr;
934 	uint64_t	virt_addr;
935 	size_t		num_virt_pages;
936 
937 	switch (cmd) {
938 	case (PCITOOL_DEVICE_SET_REG):
939 		write_flag = B_TRUE;
940 
941 	/*FALLTHRU*/
942 	case (PCITOOL_DEVICE_GET_REG):
943 		if (pcitool_debug)
944 			prom_printf("pci_dev_reg_ops set/get reg\n");
945 		if (ddi_copyin(arg, &prg, sizeof (pcitool_reg_t), mode) !=
946 		    DDI_SUCCESS) {
947 			if (pcitool_debug)
948 				prom_printf("Error reading arguments\n");
949 			return (EFAULT);
950 		}
951 
952 		if (prg.barnum >= (sizeof (pci_bars) / sizeof (pci_bars[0]))) {
953 			prg.status = PCITOOL_OUT_OF_RANGE;
954 			rval = EINVAL;
955 			goto done_reg;
956 		}
957 
958 		if (pcitool_debug)
959 			prom_printf("raw bus:0x%x, dev:0x%x, func:0x%x\n",
960 			    prg.bus_no, prg.dev_no, prg.func_no);
961 		/* Validate address arguments of bus / dev / func */
962 		if (((prg.bus_no &
963 		    (PCI_REG_BUS_M >> PCI_REG_BUS_SHIFT)) !=
964 		    prg.bus_no) ||
965 		    ((prg.dev_no &
966 		    (PCI_REG_DEV_M >> PCI_REG_DEV_SHIFT)) !=
967 		    prg.dev_no) ||
968 		    ((prg.func_no &
969 		    (PCI_REG_FUNC_M >> PCI_REG_FUNC_SHIFT)) !=
970 		    prg.func_no)) {
971 			prg.status = PCITOOL_INVALID_ADDRESS;
972 			rval = EINVAL;
973 			goto done_reg;
974 		}
975 
976 		size = PCITOOL_ACC_ATTR_SIZE(prg.acc_attr);
977 
978 		/* Proper config space desired. */
979 		if (prg.barnum == 0) {
980 
981 			if (pcitool_debug)
982 				prom_printf(
983 				    "config access: offset:0x%" PRIx64 ", "
984 				    "phys_addr:0x%" PRIx64 "\n",
985 				    prg.offset, prg.phys_addr);
986 
987 			if (prg.offset >= max_cfg_size) {
988 				prg.status = PCITOOL_OUT_OF_RANGE;
989 				rval = EINVAL;
990 				goto done_reg;
991 			}
992 
993 			/*
994 			 * Access device.  prg is modified.
995 			 * First, check for AMD K8 northbridges for I/O access
996 			 * (This fix will move in future to pcitool user-land)
997 			 * Next, check for PCIe devices and do
998 			 * memory-mapped access
999 			 * Lastly, check for PCI devices and do I/O access
1000 			 */
1001 			if ((prg.bus_no == 0) &&
1002 			    (prg.dev_no >= 0x18) &&
1003 			    (prg.dev_no <
1004 			    (0x18 + ncpus/cpuid_get_ncpu_per_chip(CPU))) &&
1005 			    (cpuid_getvendor(CPU) == X86_VENDOR_AMD) &&
1006 			    (cpuid_getfamily(CPU) == 0xf)) {
1007 				rval = pcitool_cfg_access(dip, &prg,
1008 				    write_flag);
1009 			} else if (max_cfg_size == PCIE_CONF_HDR_SIZE) {
1010 				rval = pcitool_pciex_cfg_access(dip, &prg,
1011 				    write_flag);
1012 				if (rval == EINVAL) {
1013 					/* Not valid for MMIO; try IO */
1014 					rval = pcitool_cfg_access(dip, &prg,
1015 					    write_flag);
1016 				}
1017 			} else {
1018 				rval = pcitool_cfg_access(dip, &prg,
1019 				    write_flag);
1020 			}
1021 
1022 			if (pcitool_debug)
1023 				prom_printf(
1024 				    "config access: data:0x%" PRIx64 "\n",
1025 				    prg.data);
1026 
1027 		/* IO/ MEM/ MEM64 space. */
1028 		} else {
1029 
1030 			pcitool_reg_t	prg2;
1031 			bcopy(&prg, &prg2, sizeof (pcitool_reg_t));
1032 
1033 			/*
1034 			 * Translate BAR number into offset of the BAR in
1035 			 * the device's config space.
1036 			 */
1037 			prg2.offset = pci_bars[prg2.barnum];
1038 			prg2.acc_attr =
1039 			    PCITOOL_ACC_ATTR_SIZE_4 | PCITOOL_ACC_ATTR_ENDN_LTL;
1040 
1041 			if (pcitool_debug)
1042 				prom_printf(
1043 				    "barnum:%d, bar_offset:0x%" PRIx64 "\n",
1044 				    prg2.barnum, prg2.offset);
1045 			/*
1046 			 * Get Bus Address Register (BAR) from config space.
1047 			 * prg2.offset is the offset into config space of the
1048 			 * BAR desired.  prg.status is modified on error.
1049 			 */
1050 			rval = pcitool_cfg_access(dip, &prg2, B_FALSE);
1051 			if (rval != SUCCESS) {
1052 				if (pcitool_debug)
1053 					prom_printf("BAR access failed\n");
1054 				prg.status = prg2.status;
1055 				goto done_reg;
1056 			}
1057 			/*
1058 			 * Reference proper PCI space based on the BAR.
1059 			 * If 64 bit MEM space, need to load other half of the
1060 			 * BAR first.
1061 			 */
1062 
1063 			if (pcitool_debug)
1064 				prom_printf("bar returned is 0x%" PRIx64 "\n",
1065 				    prg2.data);
1066 			if (!prg2.data) {
1067 				if (pcitool_debug)
1068 					prom_printf("BAR data == 0\n");
1069 				rval = EINVAL;
1070 				prg.status = PCITOOL_INVALID_ADDRESS;
1071 				goto done_reg;
1072 			}
1073 			if (prg2.data == 0xffffffff) {
1074 				if (pcitool_debug)
1075 					prom_printf("BAR data == -1\n");
1076 				rval = EINVAL;
1077 				prg.status = PCITOOL_INVALID_ADDRESS;
1078 				goto done_reg;
1079 			}
1080 
1081 			/*
1082 			 * BAR has bits saying this space is IO space, unless
1083 			 * this is the ROM address register.
1084 			 */
1085 			if (((PCI_BASE_SPACE_M & prg2.data) ==
1086 			    PCI_BASE_SPACE_IO) &&
1087 			    (prg2.offset != PCI_CONF_ROM)) {
1088 				if (pcitool_debug)
1089 					prom_printf("IO space\n");
1090 
1091 				prg2.data &= PCI_BASE_IO_ADDR_M;
1092 				prg.phys_addr = prg2.data + prg.offset;
1093 
1094 				rval = pcitool_io_access(dip, &prg, write_flag);
1095 				if ((rval != SUCCESS) && (pcitool_debug))
1096 					prom_printf("IO access failed\n");
1097 
1098 				goto done_reg;
1099 
1100 
1101 			/*
1102 			 * BAR has bits saying this space is 64 bit memory
1103 			 * space, unless this is the ROM address register.
1104 			 *
1105 			 * The 64 bit address stored in two BAR cells is not
1106 			 * necessarily aligned on an 8-byte boundary.
1107 			 * Need to keep the first 4 bytes read,
1108 			 * and do a separate read of the high 4 bytes.
1109 			 */
1110 
1111 			} else if ((PCI_BASE_TYPE_ALL & prg2.data) &&
1112 			    (prg2.offset != PCI_CONF_ROM)) {
1113 
1114 				uint32_t low_bytes =
1115 				    (uint32_t)(prg2.data & ~PCI_BASE_TYPE_ALL);
1116 
1117 				/*
1118 				 * Don't try to read the next 4 bytes
1119 				 * past the end of BARs.
1120 				 */
1121 				if (prg2.offset >= PCI_CONF_BASE5) {
1122 					prg.status = PCITOOL_OUT_OF_RANGE;
1123 					rval = EIO;
1124 					goto done_reg;
1125 				}
1126 
1127 				/*
1128 				 * Access device.
1129 				 * prg2.status is modified on error.
1130 				 */
1131 				prg2.offset += 4;
1132 				rval = pcitool_cfg_access(dip, &prg2, B_FALSE);
1133 				if (rval != SUCCESS) {
1134 					prg.status = prg2.status;
1135 					goto done_reg;
1136 				}
1137 
1138 				if (prg2.data == 0xffffffff) {
1139 					prg.status = PCITOOL_INVALID_ADDRESS;
1140 					prg.status = EFAULT;
1141 					goto done_reg;
1142 				}
1143 
1144 				prg2.data = (prg2.data << 32) + low_bytes;
1145 				if (pcitool_debug)
1146 					prom_printf(
1147 					    "64 bit mem space.  "
1148 					    "64-bit bar is 0x%" PRIx64 "\n",
1149 					    prg2.data);
1150 
1151 			/* Mem32 space, including ROM */
1152 			} else {
1153 
1154 				if (prg2.offset == PCI_CONF_ROM) {
1155 					if (pcitool_debug)
1156 						prom_printf(
1157 						    "Additional ROM "
1158 						    "checking\n");
1159 					/* Can't write to ROM */
1160 					if (write_flag) {
1161 						prg.status = PCITOOL_ROM_WRITE;
1162 						rval = EIO;
1163 						goto done_reg;
1164 
1165 					/* ROM disabled for reading */
1166 					} else if (!(prg2.data & 0x00000001)) {
1167 						prg.status =
1168 						    PCITOOL_ROM_DISABLED;
1169 						rval = EIO;
1170 						goto done_reg;
1171 					}
1172 				}
1173 
1174 				if (pcitool_debug)
1175 					prom_printf("32 bit mem space\n");
1176 			}
1177 
1178 			/* Common code for all IO/MEM range spaces. */
1179 
1180 			base_addr = prg2.data;
1181 			if (pcitool_debug)
1182 				prom_printf(
1183 				    "addr portion of bar is 0x%" PRIx64 ", "
1184 				    "base=0x%" PRIx64 ", "
1185 				    "offset:0x%" PRIx64 "\n",
1186 				    prg2.data, base_addr, prg.offset);
1187 			/*
1188 			 * Use offset provided by caller to index into
1189 			 * desired space, then access.
1190 			 * Note that prg.status is modified on error.
1191 			 */
1192 			prg.phys_addr = base_addr + prg.offset;
1193 
1194 			virt_addr = pcitool_map(prg.phys_addr, size,
1195 			    &num_virt_pages);
1196 			if (virt_addr == NULL) {
1197 				prg.status = PCITOOL_IO_ERROR;
1198 				rval = EIO;
1199 				goto done_reg;
1200 			}
1201 
1202 			rval = pcitool_mem_access(dip, &prg, virt_addr,
1203 			    write_flag);
1204 			pcitool_unmap(virt_addr, num_virt_pages);
1205 		}
1206 done_reg:
1207 		prg.drvr_version = PCITOOL_VERSION;
1208 		if (ddi_copyout(&prg, arg, sizeof (pcitool_reg_t), mode) !=
1209 		    DDI_SUCCESS) {
1210 			if (pcitool_debug)
1211 				prom_printf("Error returning arguments.\n");
1212 			rval = EFAULT;
1213 		}
1214 		break;
1215 	default:
1216 		rval = ENOTTY;
1217 		break;
1218 	}
1219 	return (rval);
1220 }
1221