xref: /titanic_41/usr/src/uts/i86pc/io/pci/pci_tools.c (revision 4a2e944d74dafc80c85d74c8b11430bbecd98824)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23  */
24 
25 #include <sys/types.h>
26 #include <sys/mkdev.h>
27 #include <sys/stat.h>
28 #include <sys/sunddi.h>
29 #include <vm/seg_kmem.h>
30 #include <sys/machparam.h>
31 #include <sys/sunndi.h>
32 #include <sys/ontrap.h>
33 #include <sys/psm.h>
34 #include <sys/pcie.h>
35 #include <sys/pci_cfgspace.h>
36 #include <sys/pci_tools.h>
37 #include <io/pci/pci_tools_ext.h>
38 #include <sys/apic.h>
39 #include <io/pci/pci_var.h>
40 #include <sys/pci_impl.h>
41 #include <sys/promif.h>
42 #include <sys/x86_archext.h>
43 #include <sys/cpuvar.h>
44 #include <sys/pci_cfgacc.h>
45 
46 #ifdef __xpv
47 #include <sys/hypervisor.h>
48 #endif
49 
50 #define	PCIEX_BDF_OFFSET_DELTA	4
51 #define	PCIEX_REG_FUNC_SHIFT	(PCI_REG_FUNC_SHIFT + PCIEX_BDF_OFFSET_DELTA)
52 #define	PCIEX_REG_DEV_SHIFT	(PCI_REG_DEV_SHIFT + PCIEX_BDF_OFFSET_DELTA)
53 #define	PCIEX_REG_BUS_SHIFT	(PCI_REG_BUS_SHIFT + PCIEX_BDF_OFFSET_DELTA)
54 
55 #define	SUCCESS	0
56 
57 extern uint64_t mcfg_mem_base;
58 int pcitool_debug = 0;
59 
60 /*
61  * Offsets of BARS in config space.  First entry of 0 means config space.
62  * Entries here correlate to pcitool_bars_t enumerated type.
63  */
64 static uint8_t pci_bars[] = {
65 	0x0,
66 	PCI_CONF_BASE0,
67 	PCI_CONF_BASE1,
68 	PCI_CONF_BASE2,
69 	PCI_CONF_BASE3,
70 	PCI_CONF_BASE4,
71 	PCI_CONF_BASE5,
72 	PCI_CONF_ROM
73 };
74 
75 /* Max offset allowed into config space for a particular device. */
76 static uint64_t max_cfg_size = PCI_CONF_HDR_SIZE;
77 
78 static uint64_t pcitool_swap_endian(uint64_t data, int size);
79 static int pcitool_cfg_access(pcitool_reg_t *prg, boolean_t write_flag,
80     boolean_t io_access);
81 static int pcitool_io_access(pcitool_reg_t *prg, boolean_t write_flag);
82 static int pcitool_mem_access(pcitool_reg_t *prg, uint64_t virt_addr,
83     boolean_t write_flag);
84 static uint64_t pcitool_map(uint64_t phys_addr, size_t size, size_t *num_pages);
85 static void pcitool_unmap(uint64_t virt_addr, size_t num_pages);
86 
87 /* Extern declarations */
88 extern int	(*psm_intr_ops)(dev_info_t *, ddi_intr_handle_impl_t *,
89 		    psm_intr_op_t, int *);
90 
91 int
92 pcitool_init(dev_info_t *dip, boolean_t is_pciex)
93 {
94 	int instance = ddi_get_instance(dip);
95 
96 	/* Create pcitool nodes for register access and interrupt routing. */
97 
98 	if (ddi_create_minor_node(dip, PCI_MINOR_REG, S_IFCHR,
99 	    PCI_MINOR_NUM(instance, PCI_TOOL_REG_MINOR_NUM),
100 	    DDI_NT_REGACC, 0) != DDI_SUCCESS) {
101 		return (DDI_FAILURE);
102 	}
103 
104 	if (ddi_create_minor_node(dip, PCI_MINOR_INTR, S_IFCHR,
105 	    PCI_MINOR_NUM(instance, PCI_TOOL_INTR_MINOR_NUM),
106 	    DDI_NT_INTRCTL, 0) != DDI_SUCCESS) {
107 		ddi_remove_minor_node(dip, PCI_MINOR_REG);
108 		return (DDI_FAILURE);
109 	}
110 
111 	if (is_pciex)
112 		max_cfg_size = PCIE_CONF_HDR_SIZE;
113 
114 	return (DDI_SUCCESS);
115 }
116 
117 void
118 pcitool_uninit(dev_info_t *dip)
119 {
120 	ddi_remove_minor_node(dip, PCI_MINOR_INTR);
121 	ddi_remove_minor_node(dip, PCI_MINOR_REG);
122 }
123 
124 /*ARGSUSED*/
125 static int
126 pcitool_set_intr(dev_info_t *dip, void *arg, int mode)
127 {
128 	ddi_intr_handle_impl_t info_hdl;
129 	pcitool_intr_set_t iset;
130 	uint32_t old_cpu;
131 	int ret, result;
132 	size_t copyinout_size;
133 	int rval = SUCCESS;
134 
135 	/* Version 1 of pcitool_intr_set_t doesn't have flags. */
136 	copyinout_size = (size_t)&iset.flags - (size_t)&iset;
137 
138 	if (ddi_copyin(arg, &iset, copyinout_size, mode) != DDI_SUCCESS)
139 		return (EFAULT);
140 
141 	switch (iset.user_version) {
142 	case PCITOOL_V1:
143 		break;
144 
145 	case PCITOOL_V2:
146 		copyinout_size = sizeof (pcitool_intr_set_t);
147 		if (ddi_copyin(arg, &iset, copyinout_size, mode) != DDI_SUCCESS)
148 			return (EFAULT);
149 		break;
150 
151 	default:
152 		iset.status = PCITOOL_OUT_OF_RANGE;
153 		rval = ENOTSUP;
154 		goto done_set_intr;
155 	}
156 
157 	if (iset.flags & PCITOOL_INTR_FLAG_SET_MSI) {
158 		rval = ENOTSUP;
159 		iset.status = PCITOOL_IO_ERROR;
160 		goto done_set_intr;
161 	}
162 
163 	if (iset.ino > APIC_MAX_VECTOR) {
164 		rval = EINVAL;
165 		iset.status = PCITOOL_INVALID_INO;
166 		goto done_set_intr;
167 	}
168 
169 	iset.status = PCITOOL_SUCCESS;
170 
171 	if ((old_cpu = pci_get_cpu_from_vecirq(iset.ino, IS_VEC)) == -1) {
172 		iset.status = PCITOOL_IO_ERROR;
173 		rval = EINVAL;
174 		goto done_set_intr;
175 	}
176 
177 
178 	old_cpu &= ~PSMGI_CPU_USER_BOUND;
179 
180 	/*
181 	 * For this locally-declared and used handle, ih_private will contain a
182 	 * CPU value, not an ihdl_plat_t as used for global interrupt handling.
183 	 */
184 	info_hdl.ih_vector = iset.ino;
185 	info_hdl.ih_private = (void *)(uintptr_t)iset.cpu_id;
186 	info_hdl.ih_flags = PSMGI_INTRBY_VEC;
187 	if (pcitool_debug)
188 		prom_printf("user version:%d, flags:0x%x\n",
189 		    iset.user_version, iset.flags);
190 
191 	result = ENOTSUP;
192 	if ((iset.user_version >= PCITOOL_V2) &&
193 	    (iset.flags & PCITOOL_INTR_FLAG_SET_GROUP)) {
194 		ret = (*psm_intr_ops)(NULL, &info_hdl, PSM_INTR_OP_GRP_SET_CPU,
195 		    &result);
196 	} else {
197 		ret = (*psm_intr_ops)(NULL, &info_hdl, PSM_INTR_OP_SET_CPU,
198 		    &result);
199 	}
200 
201 	if (ret != PSM_SUCCESS) {
202 		switch (result) {
203 		case EIO:		/* Error making the change */
204 			rval = EIO;
205 			iset.status = PCITOOL_IO_ERROR;
206 			break;
207 		case ENXIO:		/* Couldn't convert vector to irq */
208 			rval = EINVAL;
209 			iset.status = PCITOOL_INVALID_INO;
210 			break;
211 		case EINVAL:		/* CPU out of range */
212 			rval = EINVAL;
213 			iset.status = PCITOOL_INVALID_CPUID;
214 			break;
215 		case ENOTSUP:		/* Requested PSM intr ops missing */
216 			rval = ENOTSUP;
217 			iset.status = PCITOOL_IO_ERROR;
218 			break;
219 		}
220 	}
221 
222 	/* Return original CPU. */
223 	iset.cpu_id = old_cpu;
224 
225 done_set_intr:
226 	iset.drvr_version = PCITOOL_VERSION;
227 	if (ddi_copyout(&iset, arg, copyinout_size, mode) != DDI_SUCCESS)
228 		rval = EFAULT;
229 	return (rval);
230 }
231 
232 
233 /* It is assumed that dip != NULL */
234 static void
235 pcitool_get_intr_dev_info(dev_info_t *dip, pcitool_intr_dev_t *devs)
236 {
237 	(void) strncpy(devs->driver_name,
238 	    ddi_driver_name(dip), MAXMODCONFNAME-2);
239 	devs->driver_name[MAXMODCONFNAME-1] = '\0';
240 	(void) ddi_pathname(dip, devs->path);
241 	devs->dev_inst = ddi_get_instance(dip);
242 }
243 
244 static int
245 pcitool_get_intr(dev_info_t *dip, void *arg, int mode)
246 {
247 	/* Array part isn't used here, but oh well... */
248 	pcitool_intr_get_t partial_iget;
249 	pcitool_intr_get_t *iget = &partial_iget;
250 	size_t	iget_kmem_alloc_size = 0;
251 	uint8_t num_devs_ret;
252 	int copyout_rval;
253 	int rval = SUCCESS;
254 	int circ;
255 	int i;
256 
257 	ddi_intr_handle_impl_t info_hdl;
258 	apic_get_intr_t intr_info;
259 
260 	/* Read in just the header part, no array section. */
261 	if (ddi_copyin(arg, &partial_iget, PCITOOL_IGET_SIZE(0), mode) !=
262 	    DDI_SUCCESS)
263 		return (EFAULT);
264 
265 	if (partial_iget.flags & PCITOOL_INTR_FLAG_GET_MSI) {
266 		partial_iget.status = PCITOOL_IO_ERROR;
267 		partial_iget.num_devs_ret = 0;
268 		rval = ENOTSUP;
269 		goto done_get_intr;
270 	}
271 
272 	/* Validate argument. */
273 	if (partial_iget.ino > APIC_MAX_VECTOR) {
274 		partial_iget.status = PCITOOL_INVALID_INO;
275 		partial_iget.num_devs_ret = 0;
276 		rval = EINVAL;
277 		goto done_get_intr;
278 	}
279 
280 	num_devs_ret = partial_iget.num_devs_ret;
281 	intr_info.avgi_dip_list = NULL;
282 	intr_info.avgi_req_flags =
283 	    PSMGI_REQ_CPUID | PSMGI_REQ_NUM_DEVS | PSMGI_INTRBY_VEC;
284 	/*
285 	 * For this locally-declared and used handle, ih_private will contain a
286 	 * pointer to apic_get_intr_t, not an ihdl_plat_t as used for
287 	 * global interrupt handling.
288 	 */
289 	info_hdl.ih_private = &intr_info;
290 	info_hdl.ih_vector = partial_iget.ino;
291 
292 	/* Caller wants device information returned. */
293 	if (num_devs_ret > 0) {
294 
295 		intr_info.avgi_req_flags |= PSMGI_REQ_GET_DEVS;
296 
297 		/*
298 		 * Allocate room.
299 		 * If num_devs_ret == 0 iget remains pointing to partial_iget.
300 		 */
301 		iget_kmem_alloc_size = PCITOOL_IGET_SIZE(num_devs_ret);
302 		iget = kmem_alloc(iget_kmem_alloc_size, KM_SLEEP);
303 
304 		/* Read in whole structure to verify there's room. */
305 		if (ddi_copyin(arg, iget, iget_kmem_alloc_size, mode) !=
306 		    SUCCESS) {
307 
308 			/* Be consistent and just return EFAULT here. */
309 			kmem_free(iget, iget_kmem_alloc_size);
310 
311 			return (EFAULT);
312 		}
313 	}
314 
315 	bzero(iget, PCITOOL_IGET_SIZE(num_devs_ret));
316 	iget->ino = info_hdl.ih_vector;
317 
318 	/*
319 	 * Lock device tree branch from the pci root nexus on down if info will
320 	 * be extracted from dips returned from the tree.
321 	 */
322 	if (intr_info.avgi_req_flags & PSMGI_REQ_GET_DEVS) {
323 		ndi_devi_enter(dip, &circ);
324 	}
325 
326 	/* Call psm_intr_ops(PSM_INTR_OP_GET_INTR) to get information. */
327 	if ((rval = (*psm_intr_ops)(NULL, &info_hdl,
328 	    PSM_INTR_OP_GET_INTR, NULL)) != PSM_SUCCESS) {
329 		iget->status = PCITOOL_IO_ERROR;
330 		iget->num_devs_ret = 0;
331 		rval = EINVAL;
332 		goto done_get_intr;
333 	}
334 
335 	/*
336 	 * Fill in the pcitool_intr_get_t to be returned,
337 	 * with the CPU, num_devs_ret and num_devs.
338 	 */
339 	if (intr_info.avgi_cpu_id == IRQ_UNBOUND ||
340 	    intr_info.avgi_cpu_id == IRQ_UNINIT)
341 		iget->cpu_id = 0;
342 	else
343 		iget->cpu_id = intr_info.avgi_cpu_id & ~PSMGI_CPU_USER_BOUND;
344 
345 	/* Number of devices returned by apic. */
346 	iget->num_devs = intr_info.avgi_num_devs;
347 
348 	/* Device info was returned. */
349 	if (intr_info.avgi_req_flags & PSMGI_REQ_GET_DEVS) {
350 
351 		/*
352 		 * num devs returned is num devs ret by apic,
353 		 * space permitting.
354 		 */
355 		iget->num_devs_ret = min(num_devs_ret, intr_info.avgi_num_devs);
356 
357 		/*
358 		 * Loop thru list of dips and extract driver, name and instance.
359 		 * Fill in the pcitool_intr_dev_t's with this info.
360 		 */
361 		for (i = 0; i < iget->num_devs_ret; i++)
362 			pcitool_get_intr_dev_info(intr_info.avgi_dip_list[i],
363 			    &iget->dev[i]);
364 
365 		/* Free kmem_alloc'ed memory of the apic_get_intr_t */
366 		kmem_free(intr_info.avgi_dip_list,
367 		    intr_info.avgi_num_devs * sizeof (dev_info_t *));
368 	}
369 
370 done_get_intr:
371 
372 	if (intr_info.avgi_req_flags & PSMGI_REQ_GET_DEVS) {
373 		ndi_devi_exit(dip, circ);
374 	}
375 
376 	iget->drvr_version = PCITOOL_VERSION;
377 	copyout_rval = ddi_copyout(iget, arg,
378 	    PCITOOL_IGET_SIZE(num_devs_ret), mode);
379 
380 	if (iget_kmem_alloc_size > 0)
381 		kmem_free(iget, iget_kmem_alloc_size);
382 
383 	if (copyout_rval != DDI_SUCCESS)
384 		rval = EFAULT;
385 
386 	return (rval);
387 }
388 
389 /*ARGSUSED*/
390 static int
391 pcitool_intr_info(dev_info_t *dip, void *arg, int mode)
392 {
393 	pcitool_intr_info_t intr_info;
394 	ddi_intr_handle_impl_t info_hdl;
395 	int rval = SUCCESS;
396 
397 	/* If we need user_version, and to ret same user version as passed in */
398 	if (ddi_copyin(arg, &intr_info, sizeof (pcitool_intr_info_t), mode) !=
399 	    DDI_SUCCESS) {
400 		if (pcitool_debug)
401 			prom_printf("Error reading arguments\n");
402 		return (EFAULT);
403 	}
404 
405 	if (intr_info.flags & PCITOOL_INTR_FLAG_GET_MSI)
406 		return (ENOTSUP);
407 
408 	/* For UPPC systems, psm_intr_ops has no entry for APIC_TYPE. */
409 	if ((rval = (*psm_intr_ops)(NULL, &info_hdl,
410 	    PSM_INTR_OP_APIC_TYPE, NULL)) != PSM_SUCCESS) {
411 		intr_info.ctlr_type = PCITOOL_CTLR_TYPE_UPPC;
412 		intr_info.ctlr_version = 0;
413 
414 	} else {
415 		intr_info.ctlr_version = (uint32_t)info_hdl.ih_ver;
416 		if (strcmp((char *)info_hdl.ih_private,
417 		    APIC_PCPLUSMP_NAME) == 0)
418 			intr_info.ctlr_type = PCITOOL_CTLR_TYPE_PCPLUSMP;
419 		else
420 			intr_info.ctlr_type = PCITOOL_CTLR_TYPE_UNKNOWN;
421 	}
422 
423 	intr_info.num_intr = APIC_MAX_VECTOR;
424 	intr_info.drvr_version = PCITOOL_VERSION;
425 	if (ddi_copyout(&intr_info, arg, sizeof (pcitool_intr_info_t), mode) !=
426 	    DDI_SUCCESS) {
427 		if (pcitool_debug)
428 			prom_printf("Error returning arguments.\n");
429 		rval = EFAULT;
430 	}
431 
432 	return (rval);
433 }
434 
435 
436 
437 /*
438  * Main function for handling interrupt CPU binding requests and queries.
439  * Need to implement later
440  */
441 int
442 pcitool_intr_admn(dev_info_t *dip, void *arg, int cmd, int mode)
443 {
444 	int rval;
445 
446 	switch (cmd) {
447 
448 	/* Associate a new CPU with a given vector */
449 	case PCITOOL_DEVICE_SET_INTR:
450 		rval = pcitool_set_intr(dip, arg, mode);
451 		break;
452 
453 	case PCITOOL_DEVICE_GET_INTR:
454 		rval = pcitool_get_intr(dip, arg, mode);
455 		break;
456 
457 	case PCITOOL_SYSTEM_INTR_INFO:
458 		rval = pcitool_intr_info(dip, arg, mode);
459 		break;
460 
461 	default:
462 		rval = ENOTSUP;
463 	}
464 
465 	return (rval);
466 }
467 
468 /*
469  * Perform register accesses on the nexus device itself.
470  * No explicit PCI nexus device for X86, so not applicable.
471  */
472 
473 /*ARGSUSED*/
474 int
475 pcitool_bus_reg_ops(dev_info_t *dip, void *arg, int cmd, int mode)
476 {
477 	return (ENOTSUP);
478 }
479 
480 /* Swap endianness. */
481 static uint64_t
482 pcitool_swap_endian(uint64_t data, int size)
483 {
484 	typedef union {
485 		uint64_t data64;
486 		uint8_t data8[8];
487 	} data_split_t;
488 
489 	data_split_t orig_data;
490 	data_split_t returned_data;
491 	int i;
492 
493 	orig_data.data64 = data;
494 	returned_data.data64 = 0;
495 
496 	for (i = 0; i < size; i++) {
497 		returned_data.data8[i] = orig_data.data8[size - 1 - i];
498 	}
499 
500 	return (returned_data.data64);
501 }
502 
503 /*
504  * A note about ontrap handling:
505  *
506  * X86 systems on which this module was tested return FFs instead of bus errors
507  * when accessing devices with invalid addresses.  Ontrap handling, which
508  * gracefully handles kernel bus errors, is installed anyway for I/O and mem
509  * space accessing (not for pci config space), in case future X86 platforms
510  * require it.
511  */
512 
513 /* Access device.  prg is modified. */
514 static int
515 pcitool_cfg_access(pcitool_reg_t *prg, boolean_t write_flag,
516     boolean_t io_access)
517 {
518 	int size = PCITOOL_ACC_ATTR_SIZE(prg->acc_attr);
519 	boolean_t big_endian = PCITOOL_ACC_IS_BIG_ENDIAN(prg->acc_attr);
520 	int rval = SUCCESS;
521 	uint64_t local_data;
522 	pci_cfgacc_req_t req;
523 	uint32_t max_offset;
524 
525 	if ((size <= 0) || (size > 8) || ((size & (size - 1)) != 0)) {
526 		prg->status = PCITOOL_INVALID_SIZE;
527 		return (ENOTSUP);
528 	}
529 
530 	/*
531 	 * NOTE: there is no way to verify whether or not the address is
532 	 * valid other than that it is within the maximum offset.  The
533 	 * put functions return void and the get functions return -1 on error.
534 	 */
535 
536 	if (io_access)
537 		max_offset = 0xFF;
538 	else
539 		max_offset = 0xFFF;
540 	if (prg->offset + size - 1 > max_offset) {
541 		prg->status = PCITOOL_INVALID_ADDRESS;
542 		return (ENOTSUP);
543 	}
544 
545 	prg->status = PCITOOL_SUCCESS;
546 
547 	req.rcdip = NULL;
548 	req.bdf = PCI_GETBDF(prg->bus_no, prg->dev_no, prg->func_no);
549 	req.offset = prg->offset;
550 	req.size = size;
551 	req.write = write_flag;
552 	req.ioacc = io_access;
553 	if (write_flag) {
554 		if (big_endian) {
555 			local_data = pcitool_swap_endian(prg->data, size);
556 		} else {
557 			local_data = prg->data;
558 		}
559 		VAL64(&req) = local_data;
560 		pci_cfgacc_acc(&req);
561 	} else {
562 		pci_cfgacc_acc(&req);
563 		switch (size) {
564 		case 1:
565 			local_data = VAL8(&req);
566 			break;
567 		case 2:
568 			local_data = VAL16(&req);
569 			break;
570 		case 4:
571 			local_data = VAL32(&req);
572 			break;
573 		case 8:
574 			local_data = VAL64(&req);
575 			break;
576 		}
577 		if (big_endian) {
578 			prg->data =
579 			    pcitool_swap_endian(local_data, size);
580 		} else {
581 			prg->data = local_data;
582 		}
583 	}
584 	/*
585 	 * Check if legacy IO config access is used, in which case
586 	 * only first 256 bytes are valid.
587 	 */
588 	if (req.ioacc && (prg->offset + size - 1 > 0xFF)) {
589 		prg->status = PCITOOL_INVALID_ADDRESS;
590 		return (ENOTSUP);
591 	}
592 
593 	/* Set phys_addr only if MMIO is used */
594 	prg->phys_addr = 0;
595 	if (!req.ioacc && mcfg_mem_base != 0) {
596 		prg->phys_addr = mcfg_mem_base + prg->offset +
597 		    ((prg->bus_no << PCIEX_REG_BUS_SHIFT) |
598 		    (prg->dev_no << PCIEX_REG_DEV_SHIFT) |
599 		    (prg->func_no << PCIEX_REG_FUNC_SHIFT));
600 	}
601 
602 	return (rval);
603 }
604 
605 static int
606 pcitool_io_access(pcitool_reg_t *prg, boolean_t write_flag)
607 {
608 	int port = (int)prg->phys_addr;
609 	size_t size = PCITOOL_ACC_ATTR_SIZE(prg->acc_attr);
610 	boolean_t big_endian = PCITOOL_ACC_IS_BIG_ENDIAN(prg->acc_attr);
611 	int rval = SUCCESS;
612 	on_trap_data_t otd;
613 	uint64_t local_data;
614 
615 
616 	/*
617 	 * on_trap works like setjmp.
618 	 *
619 	 * A non-zero return here means on_trap has returned from an error.
620 	 *
621 	 * A zero return here means that on_trap has just returned from setup.
622 	 */
623 	if (on_trap(&otd, OT_DATA_ACCESS)) {
624 		no_trap();
625 		if (pcitool_debug)
626 			prom_printf(
627 			    "pcitool_io_access: on_trap caught an error...\n");
628 		prg->status = PCITOOL_INVALID_ADDRESS;
629 		return (EFAULT);
630 	}
631 
632 	if (write_flag) {
633 
634 		if (big_endian) {
635 			local_data = pcitool_swap_endian(prg->data, size);
636 		} else {
637 			local_data = prg->data;
638 		}
639 
640 		if (pcitool_debug)
641 			prom_printf("Writing %ld byte(s) to port 0x%x\n",
642 			    size, port);
643 
644 		switch (size) {
645 		case 1:
646 			outb(port, (uint8_t)local_data);
647 			break;
648 		case 2:
649 			outw(port, (uint16_t)local_data);
650 			break;
651 		case 4:
652 			outl(port, (uint32_t)local_data);
653 			break;
654 		default:
655 			rval = ENOTSUP;
656 			prg->status = PCITOOL_INVALID_SIZE;
657 			break;
658 		}
659 	} else {
660 		if (pcitool_debug)
661 			prom_printf("Reading %ld byte(s) from port 0x%x\n",
662 			    size, port);
663 
664 		switch (size) {
665 		case 1:
666 			local_data = inb(port);
667 			break;
668 		case 2:
669 			local_data = inw(port);
670 			break;
671 		case 4:
672 			local_data = inl(port);
673 			break;
674 		default:
675 			rval = ENOTSUP;
676 			prg->status = PCITOOL_INVALID_SIZE;
677 			break;
678 		}
679 
680 		if (rval == SUCCESS) {
681 			if (big_endian) {
682 				prg->data =
683 				    pcitool_swap_endian(local_data, size);
684 			} else {
685 				prg->data = local_data;
686 			}
687 		}
688 	}
689 
690 	no_trap();
691 	return (rval);
692 }
693 
694 static int
695 pcitool_mem_access(pcitool_reg_t *prg, uint64_t virt_addr, boolean_t write_flag)
696 {
697 	size_t size = PCITOOL_ACC_ATTR_SIZE(prg->acc_attr);
698 	boolean_t big_endian = PCITOOL_ACC_IS_BIG_ENDIAN(prg->acc_attr);
699 	int rval = DDI_SUCCESS;
700 	on_trap_data_t otd;
701 	uint64_t local_data;
702 
703 	/*
704 	 * on_trap works like setjmp.
705 	 *
706 	 * A non-zero return here means on_trap has returned from an error.
707 	 *
708 	 * A zero return here means that on_trap has just returned from setup.
709 	 */
710 	if (on_trap(&otd, OT_DATA_ACCESS)) {
711 		no_trap();
712 		if (pcitool_debug)
713 			prom_printf(
714 			    "pcitool_mem_access: on_trap caught an error...\n");
715 		prg->status = PCITOOL_INVALID_ADDRESS;
716 		return (EFAULT);
717 	}
718 
719 	if (write_flag) {
720 
721 		if (big_endian) {
722 			local_data = pcitool_swap_endian(prg->data, size);
723 		} else {
724 			local_data = prg->data;
725 		}
726 
727 		switch (size) {
728 		case 1:
729 			*((uint8_t *)(uintptr_t)virt_addr) = local_data;
730 			break;
731 		case 2:
732 			*((uint16_t *)(uintptr_t)virt_addr) = local_data;
733 			break;
734 		case 4:
735 			*((uint32_t *)(uintptr_t)virt_addr) = local_data;
736 			break;
737 		case 8:
738 			*((uint64_t *)(uintptr_t)virt_addr) = local_data;
739 			break;
740 		default:
741 			rval = ENOTSUP;
742 			prg->status = PCITOOL_INVALID_SIZE;
743 			break;
744 		}
745 	} else {
746 		switch (size) {
747 		case 1:
748 			local_data = *((uint8_t *)(uintptr_t)virt_addr);
749 			break;
750 		case 2:
751 			local_data = *((uint16_t *)(uintptr_t)virt_addr);
752 			break;
753 		case 4:
754 			local_data = *((uint32_t *)(uintptr_t)virt_addr);
755 			break;
756 		case 8:
757 			local_data = *((uint64_t *)(uintptr_t)virt_addr);
758 			break;
759 		default:
760 			rval = ENOTSUP;
761 			prg->status = PCITOOL_INVALID_SIZE;
762 			break;
763 		}
764 
765 		if (rval == SUCCESS) {
766 			if (big_endian) {
767 				prg->data =
768 				    pcitool_swap_endian(local_data, size);
769 			} else {
770 				prg->data = local_data;
771 			}
772 		}
773 	}
774 
775 	no_trap();
776 	return (rval);
777 }
778 
779 /*
780  * Map up to 2 pages which contain the address we want to access.
781  *
782  * Mapping should span no more than 8 bytes.  With X86 it is possible for an
783  * 8 byte value to start on a 4 byte boundary, so it can cross a page boundary.
784  * We'll never have to map more than two pages.
785  */
786 
787 static uint64_t
788 pcitool_map(uint64_t phys_addr, size_t size, size_t *num_pages)
789 {
790 
791 	uint64_t page_base = phys_addr & ~MMU_PAGEOFFSET;
792 	uint64_t offset = phys_addr & MMU_PAGEOFFSET;
793 	void *virt_base;
794 	uint64_t returned_addr;
795 	pfn_t pfn;
796 
797 	if (pcitool_debug)
798 		prom_printf("pcitool_map: Called with PA:0x%p\n",
799 		    (void *)(uintptr_t)phys_addr);
800 
801 	*num_pages = 1;
802 
803 	/* Desired mapping would span more than two pages. */
804 	if ((offset + size) > (MMU_PAGESIZE * 2)) {
805 		if (pcitool_debug)
806 			prom_printf("boundary violation: "
807 			    "offset:0x%" PRIx64 ", size:%ld, pagesize:0x%lx\n",
808 			    offset, (uintptr_t)size, (uintptr_t)MMU_PAGESIZE);
809 		return (NULL);
810 
811 	} else if ((offset + size) > MMU_PAGESIZE) {
812 		(*num_pages)++;
813 	}
814 
815 	/* Get page(s) of virtual space. */
816 	virt_base = vmem_alloc(heap_arena, ptob(*num_pages), VM_NOSLEEP);
817 	if (virt_base == NULL) {
818 		if (pcitool_debug)
819 			prom_printf("Couldn't get virtual base address.\n");
820 		return (NULL);
821 	}
822 
823 	if (pcitool_debug)
824 		prom_printf("Got base virtual address:0x%p\n", virt_base);
825 
826 #ifdef __xpv
827 	/*
828 	 * We should only get here if we are dom0.
829 	 * We're using a real device so we need to translate the MA to a PFN.
830 	 */
831 	ASSERT(DOMAIN_IS_INITDOMAIN(xen_info));
832 	pfn = xen_assign_pfn(mmu_btop(page_base));
833 #else
834 	pfn = btop(page_base);
835 #endif
836 
837 	/* Now map the allocated virtual space to the physical address. */
838 	hat_devload(kas.a_hat, virt_base, mmu_ptob(*num_pages), pfn,
839 	    PROT_READ | PROT_WRITE | HAT_STRICTORDER,
840 	    HAT_LOAD_LOCK);
841 
842 	returned_addr = ((uintptr_t)(virt_base)) + offset;
843 
844 	if (pcitool_debug)
845 		prom_printf("pcitool_map: returning VA:0x%p\n",
846 		    (void *)(uintptr_t)returned_addr);
847 
848 	return (returned_addr);
849 }
850 
851 /* Unmap the mapped page(s). */
852 static void
853 pcitool_unmap(uint64_t virt_addr, size_t num_pages)
854 {
855 	void *base_virt_addr = (void *)(uintptr_t)(virt_addr & ~MMU_PAGEOFFSET);
856 
857 	hat_unload(kas.a_hat, base_virt_addr, ptob(num_pages),
858 	    HAT_UNLOAD_UNLOCK);
859 	vmem_free(heap_arena, base_virt_addr, ptob(num_pages));
860 }
861 
862 
863 /* Perform register accesses on PCI leaf devices. */
864 /*ARGSUSED*/
865 int
866 pcitool_dev_reg_ops(dev_info_t *dip, void *arg, int cmd, int mode)
867 {
868 	boolean_t	write_flag = B_FALSE;
869 	boolean_t	io_access = B_TRUE;
870 	int		rval = 0;
871 	pcitool_reg_t	prg;
872 	uint8_t		size;
873 
874 	uint64_t	base_addr;
875 	uint64_t	virt_addr;
876 	size_t		num_virt_pages;
877 
878 	switch (cmd) {
879 	case (PCITOOL_DEVICE_SET_REG):
880 		write_flag = B_TRUE;
881 
882 	/*FALLTHRU*/
883 	case (PCITOOL_DEVICE_GET_REG):
884 		if (pcitool_debug)
885 			prom_printf("pci_dev_reg_ops set/get reg\n");
886 		if (ddi_copyin(arg, &prg, sizeof (pcitool_reg_t), mode) !=
887 		    DDI_SUCCESS) {
888 			if (pcitool_debug)
889 				prom_printf("Error reading arguments\n");
890 			return (EFAULT);
891 		}
892 
893 		if (prg.barnum >= (sizeof (pci_bars) / sizeof (pci_bars[0]))) {
894 			prg.status = PCITOOL_OUT_OF_RANGE;
895 			rval = EINVAL;
896 			goto done_reg;
897 		}
898 
899 		if (pcitool_debug)
900 			prom_printf("raw bus:0x%x, dev:0x%x, func:0x%x\n",
901 			    prg.bus_no, prg.dev_no, prg.func_no);
902 		/* Validate address arguments of bus / dev / func */
903 		if (((prg.bus_no &
904 		    (PCI_REG_BUS_M >> PCI_REG_BUS_SHIFT)) !=
905 		    prg.bus_no) ||
906 		    ((prg.dev_no &
907 		    (PCI_REG_DEV_M >> PCI_REG_DEV_SHIFT)) !=
908 		    prg.dev_no) ||
909 		    ((prg.func_no &
910 		    (PCI_REG_FUNC_M >> PCI_REG_FUNC_SHIFT)) !=
911 		    prg.func_no)) {
912 			prg.status = PCITOOL_INVALID_ADDRESS;
913 			rval = EINVAL;
914 			goto done_reg;
915 		}
916 
917 		size = PCITOOL_ACC_ATTR_SIZE(prg.acc_attr);
918 
919 		/* Proper config space desired. */
920 		if (prg.barnum == 0) {
921 
922 			if (pcitool_debug)
923 				prom_printf(
924 				    "config access: offset:0x%" PRIx64 ", "
925 				    "phys_addr:0x%" PRIx64 "\n",
926 				    prg.offset, prg.phys_addr);
927 
928 			if (prg.offset >= max_cfg_size) {
929 				prg.status = PCITOOL_OUT_OF_RANGE;
930 				rval = EINVAL;
931 				goto done_reg;
932 			}
933 			if (max_cfg_size == PCIE_CONF_HDR_SIZE)
934 				io_access = B_FALSE;
935 
936 			rval = pcitool_cfg_access(&prg, write_flag, io_access);
937 			if (pcitool_debug)
938 				prom_printf(
939 				    "config access: data:0x%" PRIx64 "\n",
940 				    prg.data);
941 
942 		/* IO/ MEM/ MEM64 space. */
943 		} else {
944 
945 			pcitool_reg_t	prg2;
946 			bcopy(&prg, &prg2, sizeof (pcitool_reg_t));
947 
948 			/*
949 			 * Translate BAR number into offset of the BAR in
950 			 * the device's config space.
951 			 */
952 			prg2.offset = pci_bars[prg2.barnum];
953 			prg2.acc_attr =
954 			    PCITOOL_ACC_ATTR_SIZE_4 | PCITOOL_ACC_ATTR_ENDN_LTL;
955 
956 			if (pcitool_debug)
957 				prom_printf(
958 				    "barnum:%d, bar_offset:0x%" PRIx64 "\n",
959 				    prg2.barnum, prg2.offset);
960 			/*
961 			 * Get Bus Address Register (BAR) from config space.
962 			 * prg2.offset is the offset into config space of the
963 			 * BAR desired.  prg.status is modified on error.
964 			 */
965 			rval = pcitool_cfg_access(&prg2, B_FALSE, B_TRUE);
966 			if (rval != SUCCESS) {
967 				if (pcitool_debug)
968 					prom_printf("BAR access failed\n");
969 				prg.status = prg2.status;
970 				goto done_reg;
971 			}
972 			/*
973 			 * Reference proper PCI space based on the BAR.
974 			 * If 64 bit MEM space, need to load other half of the
975 			 * BAR first.
976 			 */
977 
978 			if (pcitool_debug)
979 				prom_printf("bar returned is 0x%" PRIx64 "\n",
980 				    prg2.data);
981 			if (!prg2.data) {
982 				if (pcitool_debug)
983 					prom_printf("BAR data == 0\n");
984 				rval = EINVAL;
985 				prg.status = PCITOOL_INVALID_ADDRESS;
986 				goto done_reg;
987 			}
988 			if (prg2.data == 0xffffffff) {
989 				if (pcitool_debug)
990 					prom_printf("BAR data == -1\n");
991 				rval = EINVAL;
992 				prg.status = PCITOOL_INVALID_ADDRESS;
993 				goto done_reg;
994 			}
995 
996 			/*
997 			 * BAR has bits saying this space is IO space, unless
998 			 * this is the ROM address register.
999 			 */
1000 			if (((PCI_BASE_SPACE_M & prg2.data) ==
1001 			    PCI_BASE_SPACE_IO) &&
1002 			    (prg2.offset != PCI_CONF_ROM)) {
1003 				if (pcitool_debug)
1004 					prom_printf("IO space\n");
1005 
1006 				prg2.data &= PCI_BASE_IO_ADDR_M;
1007 				prg.phys_addr = prg2.data + prg.offset;
1008 
1009 				rval = pcitool_io_access(&prg, write_flag);
1010 				if ((rval != SUCCESS) && (pcitool_debug))
1011 					prom_printf("IO access failed\n");
1012 
1013 				goto done_reg;
1014 
1015 
1016 			/*
1017 			 * BAR has bits saying this space is 64 bit memory
1018 			 * space, unless this is the ROM address register.
1019 			 *
1020 			 * The 64 bit address stored in two BAR cells is not
1021 			 * necessarily aligned on an 8-byte boundary.
1022 			 * Need to keep the first 4 bytes read,
1023 			 * and do a separate read of the high 4 bytes.
1024 			 */
1025 
1026 			} else if ((PCI_BASE_TYPE_ALL & prg2.data) &&
1027 			    (prg2.offset != PCI_CONF_ROM)) {
1028 
1029 				uint32_t low_bytes =
1030 				    (uint32_t)(prg2.data & ~PCI_BASE_TYPE_ALL);
1031 
1032 				/*
1033 				 * Don't try to read the next 4 bytes
1034 				 * past the end of BARs.
1035 				 */
1036 				if (prg2.offset >= PCI_CONF_BASE5) {
1037 					prg.status = PCITOOL_OUT_OF_RANGE;
1038 					rval = EIO;
1039 					goto done_reg;
1040 				}
1041 
1042 				/*
1043 				 * Access device.
1044 				 * prg2.status is modified on error.
1045 				 */
1046 				prg2.offset += 4;
1047 				rval = pcitool_cfg_access(&prg2,
1048 				    B_FALSE, B_TRUE);
1049 				if (rval != SUCCESS) {
1050 					prg.status = prg2.status;
1051 					goto done_reg;
1052 				}
1053 
1054 				if (prg2.data == 0xffffffff) {
1055 					prg.status = PCITOOL_INVALID_ADDRESS;
1056 					prg.status = EFAULT;
1057 					goto done_reg;
1058 				}
1059 
1060 				prg2.data = (prg2.data << 32) + low_bytes;
1061 				if (pcitool_debug)
1062 					prom_printf(
1063 					    "64 bit mem space.  "
1064 					    "64-bit bar is 0x%" PRIx64 "\n",
1065 					    prg2.data);
1066 
1067 			/* Mem32 space, including ROM */
1068 			} else {
1069 
1070 				if (prg2.offset == PCI_CONF_ROM) {
1071 					if (pcitool_debug)
1072 						prom_printf(
1073 						    "Additional ROM "
1074 						    "checking\n");
1075 					/* Can't write to ROM */
1076 					if (write_flag) {
1077 						prg.status = PCITOOL_ROM_WRITE;
1078 						rval = EIO;
1079 						goto done_reg;
1080 
1081 					/* ROM disabled for reading */
1082 					} else if (!(prg2.data & 0x00000001)) {
1083 						prg.status =
1084 						    PCITOOL_ROM_DISABLED;
1085 						rval = EIO;
1086 						goto done_reg;
1087 					}
1088 				}
1089 
1090 				if (pcitool_debug)
1091 					prom_printf("32 bit mem space\n");
1092 			}
1093 
1094 			/* Common code for all IO/MEM range spaces. */
1095 
1096 			base_addr = prg2.data;
1097 			if (pcitool_debug)
1098 				prom_printf(
1099 				    "addr portion of bar is 0x%" PRIx64 ", "
1100 				    "base=0x%" PRIx64 ", "
1101 				    "offset:0x%" PRIx64 "\n",
1102 				    prg2.data, base_addr, prg.offset);
1103 			/*
1104 			 * Use offset provided by caller to index into
1105 			 * desired space, then access.
1106 			 * Note that prg.status is modified on error.
1107 			 */
1108 			prg.phys_addr = base_addr + prg.offset;
1109 
1110 			virt_addr = pcitool_map(prg.phys_addr, size,
1111 			    &num_virt_pages);
1112 			if (virt_addr == NULL) {
1113 				prg.status = PCITOOL_IO_ERROR;
1114 				rval = EIO;
1115 				goto done_reg;
1116 			}
1117 
1118 			rval = pcitool_mem_access(&prg, virt_addr, write_flag);
1119 			pcitool_unmap(virt_addr, num_virt_pages);
1120 		}
1121 done_reg:
1122 		prg.drvr_version = PCITOOL_VERSION;
1123 		if (ddi_copyout(&prg, arg, sizeof (pcitool_reg_t), mode) !=
1124 		    DDI_SUCCESS) {
1125 			if (pcitool_debug)
1126 				prom_printf("Error returning arguments.\n");
1127 			rval = EFAULT;
1128 		}
1129 		break;
1130 	default:
1131 		rval = ENOTTY;
1132 		break;
1133 	}
1134 	return (rval);
1135 }
1136