xref: /illumos-gate/usr/src/uts/i86pc/io/pci/pci_tools.c (revision 4d8d108f42a089b7b4441353f2ad7a75e1c7b31d)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23  */
24 
25 /*
26  * Copyright 2023 Oxide Computer Company
27  */
28 
29 #include <sys/sysmacros.h>
30 #include <sys/types.h>
31 #include <sys/mkdev.h>
32 #include <sys/stat.h>
33 #include <sys/sunddi.h>
34 #include <vm/seg_kmem.h>
35 #include <sys/machparam.h>
36 #include <sys/sunndi.h>
37 #include <sys/ontrap.h>
38 #include <sys/psm.h>
39 #include <sys/pcie.h>
40 #include <sys/pci_cfgspace.h>
41 #include <sys/pci_tools.h>
42 #include <io/pci/pci_tools_ext.h>
43 #include <sys/apic.h>
44 #include <sys/apix.h>
45 #include <io/pci/pci_var.h>
46 #include <sys/pci_impl.h>
47 #include <sys/promif.h>
48 #include <sys/x86_archext.h>
49 #include <sys/cpuvar.h>
50 #include <sys/pci_cfgacc.h>
51 
52 #ifdef __xpv
53 #include <sys/hypervisor.h>
54 #endif
55 
56 #define	PCIEX_BDF_OFFSET_DELTA	4
57 #define	PCIEX_REG_FUNC_SHIFT	(PCI_REG_FUNC_SHIFT + PCIEX_BDF_OFFSET_DELTA)
58 #define	PCIEX_REG_DEV_SHIFT	(PCI_REG_DEV_SHIFT + PCIEX_BDF_OFFSET_DELTA)
59 #define	PCIEX_REG_BUS_SHIFT	(PCI_REG_BUS_SHIFT + PCIEX_BDF_OFFSET_DELTA)
60 
61 #define	SUCCESS	0
62 
63 extern uint64_t mcfg_mem_base;
64 extern uint_t pci_iocfg_max_offset;
65 int pcitool_debug = 0;
66 
67 /*
68  * Offsets of BARS in config space.  First entry of 0 means config space.
69  * Entries here correlate to pcitool_bars_t enumerated type.
70  */
71 static uint8_t pci_bars[] = {
72 	0x0,
73 	PCI_CONF_BASE0,
74 	PCI_CONF_BASE1,
75 	PCI_CONF_BASE2,
76 	PCI_CONF_BASE3,
77 	PCI_CONF_BASE4,
78 	PCI_CONF_BASE5,
79 	PCI_CONF_ROM
80 };
81 
82 /* Max offset allowed into config space for a particular device. */
83 static uint64_t max_cfg_size = PCI_CONF_HDR_SIZE;
84 
85 static uint64_t pcitool_swap_endian(uint64_t data, int size);
86 static int pcitool_cfg_access(pcitool_reg_t *prg, boolean_t write_flag,
87     boolean_t io_access);
88 static int pcitool_io_access(pcitool_reg_t *prg, boolean_t write_flag);
89 static int pcitool_mem_access(pcitool_reg_t *prg, uint64_t virt_addr,
90     boolean_t write_flag);
91 static uint64_t pcitool_map(uint64_t phys_addr, size_t size, size_t *num_pages);
92 static void pcitool_unmap(uint64_t virt_addr, size_t num_pages);
93 
94 /* Extern declarations */
95 extern int	(*psm_intr_ops)(dev_info_t *, ddi_intr_handle_impl_t *,
96 		    psm_intr_op_t, int *);
97 
98 int
99 pcitool_init(dev_info_t *dip, boolean_t is_pciex)
100 {
101 	int instance = ddi_get_instance(dip);
102 
103 	/* Create pcitool nodes for register access and interrupt routing. */
104 
105 	if (ddi_create_minor_node(dip, PCI_MINOR_REG, S_IFCHR,
106 	    PCI_MINOR_NUM(instance, PCI_TOOL_REG_MINOR_NUM),
107 	    DDI_NT_REGACC, 0) != DDI_SUCCESS) {
108 		return (DDI_FAILURE);
109 	}
110 
111 	if (ddi_create_minor_node(dip, PCI_MINOR_INTR, S_IFCHR,
112 	    PCI_MINOR_NUM(instance, PCI_TOOL_INTR_MINOR_NUM),
113 	    DDI_NT_INTRCTL, 0) != DDI_SUCCESS) {
114 		ddi_remove_minor_node(dip, PCI_MINOR_REG);
115 		return (DDI_FAILURE);
116 	}
117 
118 	if (is_pciex)
119 		max_cfg_size = PCIE_CONF_HDR_SIZE;
120 
121 	return (DDI_SUCCESS);
122 }
123 
124 void
125 pcitool_uninit(dev_info_t *dip)
126 {
127 	ddi_remove_minor_node(dip, PCI_MINOR_INTR);
128 	ddi_remove_minor_node(dip, PCI_MINOR_REG);
129 }
130 
131 /*ARGSUSED*/
132 static int
133 pcitool_set_intr(dev_info_t *dip, void *arg, int mode)
134 {
135 	ddi_intr_handle_impl_t info_hdl;
136 	pcitool_intr_set_t iset;
137 	uint32_t old_cpu;
138 	int ret, result;
139 	size_t copyinout_size;
140 	int rval = SUCCESS;
141 	apic_get_type_t type_info;
142 
143 	/* Version 1 of pcitool_intr_set_t doesn't have flags. */
144 	copyinout_size = (size_t)&iset.flags - (size_t)&iset;
145 
146 	if (ddi_copyin(arg, &iset, copyinout_size, mode) != DDI_SUCCESS)
147 		return (EFAULT);
148 
149 	switch (iset.user_version) {
150 	case PCITOOL_V1:
151 		break;
152 
153 	case PCITOOL_V2:
154 		copyinout_size = sizeof (pcitool_intr_set_t);
155 		if (ddi_copyin(arg, &iset, copyinout_size, mode) != DDI_SUCCESS)
156 			return (EFAULT);
157 		break;
158 
159 	default:
160 		iset.status = PCITOOL_OUT_OF_RANGE;
161 		rval = ENOTSUP;
162 		goto done_set_intr;
163 	}
164 
165 	if (iset.flags & PCITOOL_INTR_FLAG_SET_MSI) {
166 		rval = ENOTSUP;
167 		iset.status = PCITOOL_IO_ERROR;
168 		goto done_set_intr;
169 	}
170 
171 	info_hdl.ih_private = &type_info;
172 
173 	if ((*psm_intr_ops)(NULL, &info_hdl,
174 	    PSM_INTR_OP_APIC_TYPE, NULL) != PSM_SUCCESS) {
175 		rval = ENOTSUP;
176 		iset.status = PCITOOL_IO_ERROR;
177 		goto done_set_intr;
178 	}
179 
180 	if (strcmp(type_info.avgi_type, APIC_APIX_NAME) == 0) {
181 		if (iset.old_cpu > type_info.avgi_num_cpu) {
182 			rval = EINVAL;
183 			iset.status = PCITOOL_INVALID_CPUID;
184 			goto done_set_intr;
185 		}
186 		old_cpu = iset.old_cpu;
187 	} else {
188 		if ((old_cpu =
189 		    pci_get_cpu_from_vecirq(iset.ino, IS_VEC)) == -1) {
190 			iset.status = PCITOOL_IO_ERROR;
191 			rval = EINVAL;
192 			goto done_set_intr;
193 		}
194 	}
195 
196 	if (iset.ino > type_info.avgi_num_intr) {
197 		rval = EINVAL;
198 		iset.status = PCITOOL_INVALID_INO;
199 		goto done_set_intr;
200 	}
201 
202 	iset.status = PCITOOL_SUCCESS;
203 
204 	old_cpu &= ~PSMGI_CPU_USER_BOUND;
205 
206 	/*
207 	 * For this locally-declared and used handle, ih_private will contain a
208 	 * CPU value, not an ihdl_plat_t as used for global interrupt handling.
209 	 */
210 	if (strcmp(type_info.avgi_type, APIC_APIX_NAME) == 0) {
211 		info_hdl.ih_vector = APIX_VIRTVECTOR(old_cpu, iset.ino);
212 	} else {
213 		info_hdl.ih_vector = iset.ino;
214 	}
215 	info_hdl.ih_private = (void *)(uintptr_t)iset.cpu_id;
216 	info_hdl.ih_flags = PSMGI_INTRBY_VEC;
217 	if (pcitool_debug)
218 		prom_printf("user version:%d, flags:0x%x\n",
219 		    iset.user_version, iset.flags);
220 
221 	result = ENOTSUP;
222 	if ((iset.user_version >= PCITOOL_V2) &&
223 	    (iset.flags & PCITOOL_INTR_FLAG_SET_GROUP)) {
224 		ret = (*psm_intr_ops)(NULL, &info_hdl, PSM_INTR_OP_GRP_SET_CPU,
225 		    &result);
226 	} else {
227 		ret = (*psm_intr_ops)(NULL, &info_hdl, PSM_INTR_OP_SET_CPU,
228 		    &result);
229 	}
230 
231 	if (ret != PSM_SUCCESS) {
232 		switch (result) {
233 		case EIO:		/* Error making the change */
234 			rval = EIO;
235 			iset.status = PCITOOL_IO_ERROR;
236 			break;
237 		case ENXIO:		/* Couldn't convert vector to irq */
238 			rval = EINVAL;
239 			iset.status = PCITOOL_INVALID_INO;
240 			break;
241 		case EINVAL:		/* CPU out of range */
242 			rval = EINVAL;
243 			iset.status = PCITOOL_INVALID_CPUID;
244 			break;
245 		case ENOTSUP:		/* Requested PSM intr ops missing */
246 			rval = ENOTSUP;
247 			iset.status = PCITOOL_IO_ERROR;
248 			break;
249 		}
250 	}
251 
252 	/* Return original CPU. */
253 	iset.cpu_id = old_cpu;
254 
255 	/* Return new vector */
256 	if (strcmp(type_info.avgi_type, APIC_APIX_NAME) == 0) {
257 		iset.ino = APIX_VIRTVEC_VECTOR(info_hdl.ih_vector);
258 	}
259 
260 done_set_intr:
261 	iset.drvr_version = PCITOOL_VERSION;
262 	if (ddi_copyout(&iset, arg, copyinout_size, mode) != DDI_SUCCESS)
263 		rval = EFAULT;
264 	return (rval);
265 }
266 
267 
268 /* It is assumed that dip != NULL */
269 static void
270 pcitool_get_intr_dev_info(dev_info_t *dip, pcitool_intr_dev_t *devs)
271 {
272 	(void) strncpy(devs->driver_name,
273 	    ddi_driver_name(dip), MAXMODCONFNAME-2);
274 	devs->driver_name[MAXMODCONFNAME-1] = '\0';
275 	(void) ddi_pathname(dip, devs->path);
276 	devs->dev_inst = ddi_get_instance(dip);
277 }
278 
279 static int
280 pcitool_get_intr(dev_info_t *dip, void *arg, int mode)
281 {
282 	/* Array part isn't used here, but oh well... */
283 	pcitool_intr_get_t partial_iget;
284 	pcitool_intr_get_t *iget = &partial_iget;
285 	size_t	iget_kmem_alloc_size = 0;
286 	uint8_t num_devs_ret = 0;
287 	int copyout_rval;
288 	int rval = SUCCESS;
289 	int i;
290 	ddi_intr_handle_impl_t info_hdl;
291 	apic_get_intr_t intr_info;
292 	apic_get_type_t type_info;
293 
294 	/* Read in just the header part, no array section. */
295 	if (ddi_copyin(arg, &partial_iget, PCITOOL_IGET_SIZE(0), mode) !=
296 	    DDI_SUCCESS)
297 		return (EFAULT);
298 
299 	if (partial_iget.flags & PCITOOL_INTR_FLAG_GET_MSI) {
300 		partial_iget.status = PCITOOL_IO_ERROR;
301 		partial_iget.num_devs_ret = 0;
302 		rval = ENOTSUP;
303 		goto done_get_intr;
304 	}
305 
306 	info_hdl.ih_private = &type_info;
307 
308 	if ((*psm_intr_ops)(NULL, &info_hdl,
309 	    PSM_INTR_OP_APIC_TYPE, NULL) != PSM_SUCCESS) {
310 		iget->status = PCITOOL_IO_ERROR;
311 		iget->num_devs_ret = 0;
312 		rval = EINVAL;
313 		goto done_get_intr;
314 	}
315 
316 	if (strcmp(type_info.avgi_type, APIC_APIX_NAME) == 0) {
317 		if (partial_iget.cpu_id > type_info.avgi_num_cpu) {
318 			partial_iget.status = PCITOOL_INVALID_CPUID;
319 			partial_iget.num_devs_ret = 0;
320 			rval = EINVAL;
321 			goto done_get_intr;
322 		}
323 	}
324 
325 	/* Validate argument. */
326 	if ((partial_iget.ino & APIX_VIRTVEC_VECMASK) >
327 	    type_info.avgi_num_intr) {
328 		partial_iget.status = PCITOOL_INVALID_INO;
329 		partial_iget.num_devs_ret = 0;
330 		rval = EINVAL;
331 		goto done_get_intr;
332 	}
333 
334 	num_devs_ret = partial_iget.num_devs_ret;
335 	intr_info.avgi_dip_list = NULL;
336 	intr_info.avgi_req_flags =
337 	    PSMGI_REQ_CPUID | PSMGI_REQ_NUM_DEVS | PSMGI_INTRBY_VEC;
338 	/*
339 	 * For this locally-declared and used handle, ih_private will contain a
340 	 * pointer to apic_get_intr_t, not an ihdl_plat_t as used for
341 	 * global interrupt handling.
342 	 */
343 	info_hdl.ih_private = &intr_info;
344 
345 	if (strcmp(type_info.avgi_type, APIC_APIX_NAME) == 0) {
346 		info_hdl.ih_vector =
347 		    APIX_VIRTVECTOR(partial_iget.cpu_id, partial_iget.ino);
348 	} else {
349 		info_hdl.ih_vector = partial_iget.ino;
350 	}
351 
352 	/* Caller wants device information returned. */
353 	if (num_devs_ret > 0) {
354 
355 		intr_info.avgi_req_flags |= PSMGI_REQ_GET_DEVS;
356 
357 		/*
358 		 * Allocate room.
359 		 * If num_devs_ret == 0 iget remains pointing to partial_iget.
360 		 */
361 		iget_kmem_alloc_size = PCITOOL_IGET_SIZE(num_devs_ret);
362 		iget = kmem_alloc(iget_kmem_alloc_size, KM_SLEEP);
363 
364 		/* Read in whole structure to verify there's room. */
365 		if (ddi_copyin(arg, iget, iget_kmem_alloc_size, mode) !=
366 		    SUCCESS) {
367 
368 			/* Be consistent and just return EFAULT here. */
369 			kmem_free(iget, iget_kmem_alloc_size);
370 
371 			return (EFAULT);
372 		}
373 	}
374 
375 	bzero(iget, PCITOOL_IGET_SIZE(num_devs_ret));
376 	iget->ino = info_hdl.ih_vector;
377 
378 	/*
379 	 * Lock device tree branch from the pci root nexus on down if info will
380 	 * be extracted from dips returned from the tree.
381 	 */
382 	if (intr_info.avgi_req_flags & PSMGI_REQ_GET_DEVS) {
383 		ndi_devi_enter(dip);
384 	}
385 
386 	/* Call psm_intr_ops(PSM_INTR_OP_GET_INTR) to get information. */
387 	if ((rval = (*psm_intr_ops)(NULL, &info_hdl,
388 	    PSM_INTR_OP_GET_INTR, NULL)) != PSM_SUCCESS) {
389 		iget->status = PCITOOL_IO_ERROR;
390 		iget->num_devs_ret = 0;
391 		rval = EINVAL;
392 		goto done_get_intr;
393 	}
394 
395 	/*
396 	 * Fill in the pcitool_intr_get_t to be returned,
397 	 * with the CPU, num_devs_ret and num_devs.
398 	 */
399 	if (intr_info.avgi_cpu_id == IRQ_UNBOUND ||
400 	    intr_info.avgi_cpu_id == IRQ_UNINIT)
401 		iget->cpu_id = 0;
402 	else
403 		iget->cpu_id = intr_info.avgi_cpu_id & ~PSMGI_CPU_USER_BOUND;
404 
405 	/* Number of devices returned by apic. */
406 	iget->num_devs = intr_info.avgi_num_devs;
407 
408 	/* Device info was returned. */
409 	if (intr_info.avgi_req_flags & PSMGI_REQ_GET_DEVS) {
410 
411 		/*
412 		 * num devs returned is num devs ret by apic,
413 		 * space permitting.
414 		 */
415 		iget->num_devs_ret = min(num_devs_ret, intr_info.avgi_num_devs);
416 
417 		/*
418 		 * Loop thru list of dips and extract driver, name and instance.
419 		 * Fill in the pcitool_intr_dev_t's with this info.
420 		 */
421 		for (i = 0; i < iget->num_devs_ret; i++)
422 			pcitool_get_intr_dev_info(intr_info.avgi_dip_list[i],
423 			    &iget->dev[i]);
424 
425 		/* Free kmem_alloc'ed memory of the apic_get_intr_t */
426 		kmem_free(intr_info.avgi_dip_list,
427 		    intr_info.avgi_num_devs * sizeof (dev_info_t *));
428 	}
429 
430 done_get_intr:
431 
432 	if (intr_info.avgi_req_flags & PSMGI_REQ_GET_DEVS) {
433 		ndi_devi_exit(dip);
434 	}
435 
436 	iget->drvr_version = PCITOOL_VERSION;
437 	copyout_rval = ddi_copyout(iget, arg,
438 	    PCITOOL_IGET_SIZE(num_devs_ret), mode);
439 
440 	if (iget_kmem_alloc_size > 0)
441 		kmem_free(iget, iget_kmem_alloc_size);
442 
443 	if (copyout_rval != DDI_SUCCESS)
444 		rval = EFAULT;
445 
446 	return (rval);
447 }
448 
449 /*ARGSUSED*/
450 static int
451 pcitool_intr_info(dev_info_t *dip, void *arg, int mode)
452 {
453 	pcitool_intr_info_t intr_info;
454 	ddi_intr_handle_impl_t info_hdl;
455 	int rval = SUCCESS;
456 	apic_get_type_t type_info;
457 
458 	/* If we need user_version, and to ret same user version as passed in */
459 	if (ddi_copyin(arg, &intr_info, sizeof (pcitool_intr_info_t), mode) !=
460 	    DDI_SUCCESS) {
461 		if (pcitool_debug)
462 			prom_printf("Error reading arguments\n");
463 		return (EFAULT);
464 	}
465 
466 	if (intr_info.flags & PCITOOL_INTR_FLAG_GET_MSI)
467 		return (ENOTSUP);
468 
469 	info_hdl.ih_private = &type_info;
470 
471 	/* For UPPC systems, psm_intr_ops has no entry for APIC_TYPE. */
472 	if ((rval = (*psm_intr_ops)(NULL, &info_hdl,
473 	    PSM_INTR_OP_APIC_TYPE, NULL)) != PSM_SUCCESS) {
474 		intr_info.ctlr_type = PCITOOL_CTLR_TYPE_UPPC;
475 		intr_info.ctlr_version = 0;
476 		intr_info.num_intr = APIC_MAX_VECTOR;
477 	} else {
478 		intr_info.ctlr_version = (uint32_t)info_hdl.ih_ver;
479 		intr_info.num_cpu = type_info.avgi_num_cpu;
480 		if (strcmp(type_info.avgi_type,
481 		    APIC_PCPLUSMP_NAME) == 0) {
482 			intr_info.ctlr_type = PCITOOL_CTLR_TYPE_PCPLUSMP;
483 			intr_info.num_intr = type_info.avgi_num_intr;
484 		} else if (strcmp(type_info.avgi_type,
485 		    APIC_APIX_NAME) == 0) {
486 			intr_info.ctlr_type = PCITOOL_CTLR_TYPE_APIX;
487 			intr_info.num_intr = type_info.avgi_num_intr;
488 		} else {
489 			intr_info.ctlr_type = PCITOOL_CTLR_TYPE_UNKNOWN;
490 			intr_info.num_intr = APIC_MAX_VECTOR;
491 		}
492 	}
493 
494 	intr_info.drvr_version = PCITOOL_VERSION;
495 	if (ddi_copyout(&intr_info, arg, sizeof (pcitool_intr_info_t), mode) !=
496 	    DDI_SUCCESS) {
497 		if (pcitool_debug)
498 			prom_printf("Error returning arguments.\n");
499 		rval = EFAULT;
500 	}
501 
502 	return (rval);
503 }
504 
505 
506 
507 /*
508  * Main function for handling interrupt CPU binding requests and queries.
509  * Need to implement later
510  */
511 int
512 pcitool_intr_admn(dev_info_t *dip, void *arg, int cmd, int mode)
513 {
514 	int rval;
515 
516 	switch (cmd) {
517 
518 	/* Associate a new CPU with a given vector */
519 	case PCITOOL_DEVICE_SET_INTR:
520 		rval = pcitool_set_intr(dip, arg, mode);
521 		break;
522 
523 	case PCITOOL_DEVICE_GET_INTR:
524 		rval = pcitool_get_intr(dip, arg, mode);
525 		break;
526 
527 	case PCITOOL_SYSTEM_INTR_INFO:
528 		rval = pcitool_intr_info(dip, arg, mode);
529 		break;
530 
531 	default:
532 		rval = ENOTSUP;
533 	}
534 
535 	return (rval);
536 }
537 
538 /*
539  * Perform register accesses on the nexus device itself.
540  * No explicit PCI nexus device for X86, so not applicable.
541  */
542 
543 /*ARGSUSED*/
544 int
545 pcitool_bus_reg_ops(dev_info_t *dip, void *arg, int cmd, int mode)
546 {
547 	return (ENOTSUP);
548 }
549 
550 /* Swap endianness. */
551 static uint64_t
552 pcitool_swap_endian(uint64_t data, int size)
553 {
554 	typedef union {
555 		uint64_t data64;
556 		uint8_t data8[8];
557 	} data_split_t;
558 
559 	data_split_t orig_data;
560 	data_split_t returned_data;
561 	int i;
562 
563 	orig_data.data64 = data;
564 	returned_data.data64 = 0;
565 
566 	for (i = 0; i < size; i++) {
567 		returned_data.data8[i] = orig_data.data8[size - 1 - i];
568 	}
569 
570 	return (returned_data.data64);
571 }
572 
573 /*
574  * A note about ontrap handling:
575  *
576  * X86 systems on which this module was tested return FFs instead of bus errors
577  * when accessing devices with invalid addresses.  Ontrap handling, which
578  * gracefully handles kernel bus errors, is installed anyway for I/O and mem
579  * space accessing (not for pci config space), in case future X86 platforms
580  * require it.
581  */
582 
583 /* Access device.  prg is modified. */
584 static int
585 pcitool_cfg_access(pcitool_reg_t *prg, boolean_t write_flag,
586     boolean_t io_access)
587 {
588 	int size = PCITOOL_ACC_ATTR_SIZE(prg->acc_attr);
589 	boolean_t big_endian = PCITOOL_ACC_IS_BIG_ENDIAN(prg->acc_attr);
590 	int rval = SUCCESS;
591 	uint64_t local_data;
592 	pci_cfgacc_req_t req;
593 	uint32_t max_offset;
594 
595 	if ((size <= 0) || (size > 8) || !ISP2(size)) {
596 		prg->status = PCITOOL_INVALID_SIZE;
597 		return (ENOTSUP);
598 	}
599 
600 	/*
601 	 * NOTE: there is no way to verify whether or not the address is
602 	 * valid other than that it is within the maximum offset.  The
603 	 * put functions return void and the get functions return -1 on error.
604 	 */
605 
606 	if (io_access)
607 		max_offset = pci_iocfg_max_offset;
608 	else
609 		max_offset = 0xFFF;
610 	if (prg->offset + size - 1 > max_offset) {
611 		prg->status = PCITOOL_INVALID_ADDRESS;
612 		return (ENOTSUP);
613 	}
614 
615 	prg->status = PCITOOL_SUCCESS;
616 
617 	req.rcdip = NULL;
618 	req.bdf = PCI_GETBDF(prg->bus_no, prg->dev_no, prg->func_no);
619 	req.offset = prg->offset;
620 	req.size = size;
621 	req.write = write_flag;
622 	req.ioacc = io_access;
623 	if (write_flag) {
624 		if (big_endian) {
625 			local_data = pcitool_swap_endian(prg->data, size);
626 		} else {
627 			local_data = prg->data;
628 		}
629 		VAL64(&req) = local_data;
630 		pci_cfgacc_acc(&req);
631 	} else {
632 		pci_cfgacc_acc(&req);
633 		switch (size) {
634 		case 1:
635 			local_data = VAL8(&req);
636 			break;
637 		case 2:
638 			local_data = VAL16(&req);
639 			break;
640 		case 4:
641 			local_data = VAL32(&req);
642 			break;
643 		case 8:
644 			local_data = VAL64(&req);
645 			break;
646 		default:
647 			prg->status = PCITOOL_INVALID_ADDRESS;
648 			return (ENOTSUP);
649 		}
650 		if (big_endian) {
651 			prg->data =
652 			    pcitool_swap_endian(local_data, size);
653 		} else {
654 			prg->data = local_data;
655 		}
656 	}
657 	/*
658 	 * Check if legacy I/O config access is used, in which case the valid
659 	 * range varies with the I/O space mechanism used.
660 	 */
661 	if (req.ioacc && (prg->offset + size - 1 > pci_iocfg_max_offset)) {
662 		prg->status = PCITOOL_INVALID_ADDRESS;
663 		return (ENOTSUP);
664 	}
665 
666 	/* Set phys_addr only if MMIO is used */
667 	prg->phys_addr = 0;
668 	if (!req.ioacc && mcfg_mem_base != 0) {
669 		prg->phys_addr = mcfg_mem_base + prg->offset +
670 		    ((prg->bus_no << PCIEX_REG_BUS_SHIFT) |
671 		    (prg->dev_no << PCIEX_REG_DEV_SHIFT) |
672 		    (prg->func_no << PCIEX_REG_FUNC_SHIFT));
673 	}
674 
675 	return (rval);
676 }
677 
678 static int
679 pcitool_io_access(pcitool_reg_t *prg, boolean_t write_flag)
680 {
681 	int port = (int)prg->phys_addr;
682 	size_t size = PCITOOL_ACC_ATTR_SIZE(prg->acc_attr);
683 	boolean_t big_endian = PCITOOL_ACC_IS_BIG_ENDIAN(prg->acc_attr);
684 	volatile int rval = SUCCESS;
685 	on_trap_data_t otd;
686 	volatile uint64_t local_data;
687 
688 
689 	/*
690 	 * on_trap works like setjmp.
691 	 *
692 	 * A non-zero return here means on_trap has returned from an error.
693 	 *
694 	 * A zero return here means that on_trap has just returned from setup.
695 	 */
696 	if (on_trap(&otd, OT_DATA_ACCESS)) {
697 		no_trap();
698 		if (pcitool_debug)
699 			prom_printf(
700 			    "pcitool_io_access: on_trap caught an error...\n");
701 		prg->status = PCITOOL_INVALID_ADDRESS;
702 		return (EFAULT);
703 	}
704 
705 	if (write_flag) {
706 
707 		if (big_endian) {
708 			local_data = pcitool_swap_endian(prg->data, size);
709 		} else {
710 			local_data = prg->data;
711 		}
712 
713 		if (pcitool_debug)
714 			prom_printf("Writing %ld byte(s) to port 0x%x\n",
715 			    size, port);
716 
717 		switch (size) {
718 		case 1:
719 			outb(port, (uint8_t)local_data);
720 			break;
721 		case 2:
722 			outw(port, (uint16_t)local_data);
723 			break;
724 		case 4:
725 			outl(port, (uint32_t)local_data);
726 			break;
727 		default:
728 			rval = ENOTSUP;
729 			prg->status = PCITOOL_INVALID_SIZE;
730 			break;
731 		}
732 	} else {
733 		if (pcitool_debug)
734 			prom_printf("Reading %ld byte(s) from port 0x%x\n",
735 			    size, port);
736 
737 		switch (size) {
738 		case 1:
739 			local_data = inb(port);
740 			break;
741 		case 2:
742 			local_data = inw(port);
743 			break;
744 		case 4:
745 			local_data = inl(port);
746 			break;
747 		default:
748 			rval = ENOTSUP;
749 			prg->status = PCITOOL_INVALID_SIZE;
750 			break;
751 		}
752 
753 		if (rval == SUCCESS) {
754 			if (big_endian) {
755 				prg->data =
756 				    pcitool_swap_endian(local_data, size);
757 			} else {
758 				prg->data = local_data;
759 			}
760 		}
761 	}
762 
763 	no_trap();
764 	return (rval);
765 }
766 
767 static int
768 pcitool_mem_access(pcitool_reg_t *prg, uint64_t virt_addr, boolean_t write_flag)
769 {
770 	size_t size = PCITOOL_ACC_ATTR_SIZE(prg->acc_attr);
771 	boolean_t big_endian = PCITOOL_ACC_IS_BIG_ENDIAN(prg->acc_attr);
772 	volatile int rval = DDI_SUCCESS;
773 	on_trap_data_t otd;
774 	volatile uint64_t local_data;
775 
776 	/*
777 	 * on_trap works like setjmp.
778 	 *
779 	 * A non-zero return here means on_trap has returned from an error.
780 	 *
781 	 * A zero return here means that on_trap has just returned from setup.
782 	 */
783 	if (on_trap(&otd, OT_DATA_ACCESS)) {
784 		no_trap();
785 		if (pcitool_debug)
786 			prom_printf(
787 			    "pcitool_mem_access: on_trap caught an error...\n");
788 		prg->status = PCITOOL_INVALID_ADDRESS;
789 		return (EFAULT);
790 	}
791 
792 	if (write_flag) {
793 
794 		if (big_endian) {
795 			local_data = pcitool_swap_endian(prg->data, size);
796 		} else {
797 			local_data = prg->data;
798 		}
799 
800 		switch (size) {
801 		case 1:
802 			*((uint8_t *)(uintptr_t)virt_addr) = local_data;
803 			break;
804 		case 2:
805 			*((uint16_t *)(uintptr_t)virt_addr) = local_data;
806 			break;
807 		case 4:
808 			*((uint32_t *)(uintptr_t)virt_addr) = local_data;
809 			break;
810 		case 8:
811 			*((uint64_t *)(uintptr_t)virt_addr) = local_data;
812 			break;
813 		default:
814 			rval = ENOTSUP;
815 			prg->status = PCITOOL_INVALID_SIZE;
816 			break;
817 		}
818 	} else {
819 		switch (size) {
820 		case 1:
821 			local_data = *((uint8_t *)(uintptr_t)virt_addr);
822 			break;
823 		case 2:
824 			local_data = *((uint16_t *)(uintptr_t)virt_addr);
825 			break;
826 		case 4:
827 			local_data = *((uint32_t *)(uintptr_t)virt_addr);
828 			break;
829 		case 8:
830 			local_data = *((uint64_t *)(uintptr_t)virt_addr);
831 			break;
832 		default:
833 			rval = ENOTSUP;
834 			prg->status = PCITOOL_INVALID_SIZE;
835 			break;
836 		}
837 
838 		if (rval == SUCCESS) {
839 			if (big_endian) {
840 				prg->data =
841 				    pcitool_swap_endian(local_data, size);
842 			} else {
843 				prg->data = local_data;
844 			}
845 		}
846 	}
847 
848 	no_trap();
849 	return (rval);
850 }
851 
852 /*
853  * Map up to 2 pages which contain the address we want to access.
854  *
855  * Mapping should span no more than 8 bytes.  With X86 it is possible for an
856  * 8 byte value to start on a 4 byte boundary, so it can cross a page boundary.
857  * We'll never have to map more than two pages.
858  */
859 
860 static uint64_t
861 pcitool_map(uint64_t phys_addr, size_t size, size_t *num_pages)
862 {
863 
864 	uint64_t page_base = phys_addr & ~MMU_PAGEOFFSET;
865 	uint64_t offset = phys_addr & MMU_PAGEOFFSET;
866 	void *virt_base;
867 	uint64_t returned_addr;
868 	pfn_t pfn;
869 
870 	if (pcitool_debug)
871 		prom_printf("pcitool_map: Called with PA:0x%p\n",
872 		    (void *)(uintptr_t)phys_addr);
873 
874 	*num_pages = 1;
875 
876 	/* Desired mapping would span more than two pages. */
877 	if ((offset + size) > (MMU_PAGESIZE * 2)) {
878 		if (pcitool_debug)
879 			prom_printf("boundary violation: "
880 			    "offset:0x%" PRIx64 ", size:%ld, pagesize:0x%lx\n",
881 			    offset, (uintptr_t)size, (uintptr_t)MMU_PAGESIZE);
882 		return (0);
883 
884 	} else if ((offset + size) > MMU_PAGESIZE) {
885 		(*num_pages)++;
886 	}
887 
888 	/* Get page(s) of virtual space. */
889 	virt_base = vmem_alloc(heap_arena, ptob(*num_pages), VM_NOSLEEP);
890 	if (virt_base == NULL) {
891 		if (pcitool_debug)
892 			prom_printf("Couldn't get virtual base address.\n");
893 		return (0);
894 	}
895 
896 	if (pcitool_debug)
897 		prom_printf("Got base virtual address:0x%p\n", virt_base);
898 
899 #ifdef __xpv
900 	/*
901 	 * We should only get here if we are dom0.
902 	 * We're using a real device so we need to translate the MA to a PFN.
903 	 */
904 	ASSERT(DOMAIN_IS_INITDOMAIN(xen_info));
905 	pfn = xen_assign_pfn(mmu_btop(page_base));
906 #else
907 	pfn = btop(page_base);
908 #endif
909 
910 	/* Now map the allocated virtual space to the physical address. */
911 	hat_devload(kas.a_hat, virt_base, mmu_ptob(*num_pages), pfn,
912 	    PROT_READ | PROT_WRITE | HAT_STRICTORDER,
913 	    HAT_LOAD_LOCK);
914 
915 	returned_addr = ((uintptr_t)(virt_base)) + offset;
916 
917 	if (pcitool_debug)
918 		prom_printf("pcitool_map: returning VA:0x%p\n",
919 		    (void *)(uintptr_t)returned_addr);
920 
921 	return (returned_addr);
922 }
923 
924 /* Unmap the mapped page(s). */
925 static void
926 pcitool_unmap(uint64_t virt_addr, size_t num_pages)
927 {
928 	void *base_virt_addr = (void *)(uintptr_t)(virt_addr & ~MMU_PAGEOFFSET);
929 
930 	hat_unload(kas.a_hat, base_virt_addr, ptob(num_pages),
931 	    HAT_UNLOAD_UNLOCK);
932 	vmem_free(heap_arena, base_virt_addr, ptob(num_pages));
933 }
934 
935 
936 /* Perform register accesses on PCI leaf devices. */
937 /*ARGSUSED*/
938 int
939 pcitool_dev_reg_ops(dev_info_t *dip, void *arg, int cmd, int mode)
940 {
941 	boolean_t	write_flag = B_FALSE;
942 	boolean_t	io_access = B_TRUE;
943 	int		rval = 0;
944 	pcitool_reg_t	prg;
945 	uint8_t		size;
946 
947 	uint64_t	base_addr;
948 	uint64_t	virt_addr;
949 	size_t		num_virt_pages;
950 
951 	switch (cmd) {
952 	case (PCITOOL_DEVICE_SET_REG):
953 		write_flag = B_TRUE;
954 
955 	/*FALLTHRU*/
956 	case (PCITOOL_DEVICE_GET_REG):
957 		if (pcitool_debug)
958 			prom_printf("pci_dev_reg_ops set/get reg\n");
959 		if (ddi_copyin(arg, &prg, sizeof (pcitool_reg_t), mode) !=
960 		    DDI_SUCCESS) {
961 			if (pcitool_debug)
962 				prom_printf("Error reading arguments\n");
963 			return (EFAULT);
964 		}
965 
966 		if (prg.barnum >= (sizeof (pci_bars) / sizeof (pci_bars[0]))) {
967 			prg.status = PCITOOL_OUT_OF_RANGE;
968 			rval = EINVAL;
969 			goto done_reg;
970 		}
971 
972 		if (pcitool_debug)
973 			prom_printf("raw bus:0x%x, dev:0x%x, func:0x%x\n",
974 			    prg.bus_no, prg.dev_no, prg.func_no);
975 		/* Validate address arguments of bus / dev / func */
976 		if (((prg.bus_no &
977 		    (PCI_REG_BUS_M >> PCI_REG_BUS_SHIFT)) !=
978 		    prg.bus_no) ||
979 		    ((prg.dev_no &
980 		    (PCI_REG_DEV_M >> PCI_REG_DEV_SHIFT)) !=
981 		    prg.dev_no) ||
982 		    ((prg.func_no &
983 		    (PCI_REG_FUNC_M >> PCI_REG_FUNC_SHIFT)) !=
984 		    prg.func_no)) {
985 			prg.status = PCITOOL_INVALID_ADDRESS;
986 			rval = EINVAL;
987 			goto done_reg;
988 		}
989 
990 		size = PCITOOL_ACC_ATTR_SIZE(prg.acc_attr);
991 
992 		/* Proper config space desired. */
993 		if (prg.barnum == 0) {
994 
995 			if (pcitool_debug)
996 				prom_printf(
997 				    "config access: offset:0x%" PRIx64 ", "
998 				    "phys_addr:0x%" PRIx64 "\n",
999 				    prg.offset, prg.phys_addr);
1000 
1001 			if (prg.offset >= max_cfg_size) {
1002 				prg.status = PCITOOL_OUT_OF_RANGE;
1003 				rval = EINVAL;
1004 				goto done_reg;
1005 			}
1006 			if (max_cfg_size == PCIE_CONF_HDR_SIZE)
1007 				io_access = B_FALSE;
1008 
1009 			rval = pcitool_cfg_access(&prg, write_flag, io_access);
1010 			if (pcitool_debug)
1011 				prom_printf(
1012 				    "config access: data:0x%" PRIx64 "\n",
1013 				    prg.data);
1014 
1015 		/* IO/ MEM/ MEM64 space. */
1016 		} else {
1017 
1018 			pcitool_reg_t	prg2;
1019 			bcopy(&prg, &prg2, sizeof (pcitool_reg_t));
1020 
1021 			/*
1022 			 * Translate BAR number into offset of the BAR in
1023 			 * the device's config space.
1024 			 */
1025 			prg2.offset = pci_bars[prg2.barnum];
1026 			prg2.acc_attr =
1027 			    PCITOOL_ACC_ATTR_SIZE_4 | PCITOOL_ACC_ATTR_ENDN_LTL;
1028 
1029 			if (pcitool_debug)
1030 				prom_printf(
1031 				    "barnum:%d, bar_offset:0x%" PRIx64 "\n",
1032 				    prg2.barnum, prg2.offset);
1033 			/*
1034 			 * Get Bus Address Register (BAR) from config space.
1035 			 * prg2.offset is the offset into config space of the
1036 			 * BAR desired.  prg.status is modified on error.
1037 			 */
1038 			rval = pcitool_cfg_access(&prg2, B_FALSE, B_TRUE);
1039 			if (rval != SUCCESS) {
1040 				if (pcitool_debug)
1041 					prom_printf("BAR access failed\n");
1042 				prg.status = prg2.status;
1043 				goto done_reg;
1044 			}
1045 			/*
1046 			 * Reference proper PCI space based on the BAR.
1047 			 * If 64 bit MEM space, need to load other half of the
1048 			 * BAR first.
1049 			 */
1050 
1051 			if (pcitool_debug)
1052 				prom_printf("bar returned is 0x%" PRIx64 "\n",
1053 				    prg2.data);
1054 			if (!prg2.data) {
1055 				if (pcitool_debug)
1056 					prom_printf("BAR data == 0\n");
1057 				rval = EINVAL;
1058 				prg.status = PCITOOL_INVALID_ADDRESS;
1059 				goto done_reg;
1060 			}
1061 			if (prg2.data == 0xffffffff) {
1062 				if (pcitool_debug)
1063 					prom_printf("BAR data == -1\n");
1064 				rval = EINVAL;
1065 				prg.status = PCITOOL_INVALID_ADDRESS;
1066 				goto done_reg;
1067 			}
1068 
1069 			/*
1070 			 * BAR has bits saying this space is IO space, unless
1071 			 * this is the ROM address register.
1072 			 */
1073 			if (((PCI_BASE_SPACE_M & prg2.data) ==
1074 			    PCI_BASE_SPACE_IO) &&
1075 			    (prg2.offset != PCI_CONF_ROM)) {
1076 				if (pcitool_debug)
1077 					prom_printf("IO space\n");
1078 
1079 				prg2.data &= PCI_BASE_IO_ADDR_M;
1080 				prg.phys_addr = prg2.data + prg.offset;
1081 
1082 				rval = pcitool_io_access(&prg, write_flag);
1083 				if ((rval != SUCCESS) && (pcitool_debug))
1084 					prom_printf("IO access failed\n");
1085 
1086 				goto done_reg;
1087 
1088 
1089 			/*
1090 			 * BAR has bits saying this space is 64 bit memory
1091 			 * space, unless this is the ROM address register.
1092 			 *
1093 			 * The 64 bit address stored in two BAR cells is not
1094 			 * necessarily aligned on an 8-byte boundary.
1095 			 * Need to keep the first 4 bytes read,
1096 			 * and do a separate read of the high 4 bytes.
1097 			 */
1098 
1099 			} else if ((PCI_BASE_TYPE_ALL & prg2.data) &&
1100 			    (prg2.offset != PCI_CONF_ROM)) {
1101 
1102 				uint32_t low_bytes =
1103 				    (uint32_t)(prg2.data & ~PCI_BASE_TYPE_ALL);
1104 
1105 				/*
1106 				 * Don't try to read the next 4 bytes
1107 				 * past the end of BARs.
1108 				 */
1109 				if (prg2.offset >= PCI_CONF_BASE5) {
1110 					prg.status = PCITOOL_OUT_OF_RANGE;
1111 					rval = EIO;
1112 					goto done_reg;
1113 				}
1114 
1115 				/*
1116 				 * Access device.
1117 				 * prg2.status is modified on error.
1118 				 */
1119 				prg2.offset += 4;
1120 				rval = pcitool_cfg_access(&prg2,
1121 				    B_FALSE, B_TRUE);
1122 				if (rval != SUCCESS) {
1123 					prg.status = prg2.status;
1124 					goto done_reg;
1125 				}
1126 
1127 				if (prg2.data == 0xffffffff) {
1128 					prg.status = PCITOOL_INVALID_ADDRESS;
1129 					prg.status = EFAULT;
1130 					goto done_reg;
1131 				}
1132 
1133 				prg2.data = (prg2.data << 32) + low_bytes;
1134 				if (pcitool_debug)
1135 					prom_printf(
1136 					    "64 bit mem space.  "
1137 					    "64-bit bar is 0x%" PRIx64 "\n",
1138 					    prg2.data);
1139 
1140 			/* Mem32 space, including ROM */
1141 			} else {
1142 
1143 				if (prg2.offset == PCI_CONF_ROM) {
1144 					if (pcitool_debug)
1145 						prom_printf(
1146 						    "Additional ROM "
1147 						    "checking\n");
1148 					/* Can't write to ROM */
1149 					if (write_flag) {
1150 						prg.status = PCITOOL_ROM_WRITE;
1151 						rval = EIO;
1152 						goto done_reg;
1153 
1154 					/* ROM disabled for reading */
1155 					} else if (!(prg2.data & 0x00000001)) {
1156 						prg.status =
1157 						    PCITOOL_ROM_DISABLED;
1158 						rval = EIO;
1159 						goto done_reg;
1160 					}
1161 				}
1162 
1163 				if (pcitool_debug)
1164 					prom_printf("32 bit mem space\n");
1165 			}
1166 
1167 			/* Common code for all IO/MEM range spaces. */
1168 
1169 			base_addr = prg2.data;
1170 			if (pcitool_debug)
1171 				prom_printf(
1172 				    "addr portion of bar is 0x%" PRIx64 ", "
1173 				    "base=0x%" PRIx64 ", "
1174 				    "offset:0x%" PRIx64 "\n",
1175 				    prg2.data, base_addr, prg.offset);
1176 			/*
1177 			 * Use offset provided by caller to index into
1178 			 * desired space, then access.
1179 			 * Note that prg.status is modified on error.
1180 			 */
1181 			prg.phys_addr = base_addr + prg.offset;
1182 
1183 			virt_addr = pcitool_map(prg.phys_addr, size,
1184 			    &num_virt_pages);
1185 			if (virt_addr == 0) {
1186 				prg.status = PCITOOL_IO_ERROR;
1187 				rval = EIO;
1188 				goto done_reg;
1189 			}
1190 
1191 			rval = pcitool_mem_access(&prg, virt_addr, write_flag);
1192 			pcitool_unmap(virt_addr, num_virt_pages);
1193 		}
1194 done_reg:
1195 		prg.drvr_version = PCITOOL_VERSION;
1196 		if (ddi_copyout(&prg, arg, sizeof (pcitool_reg_t), mode) !=
1197 		    DDI_SUCCESS) {
1198 			if (pcitool_debug)
1199 				prom_printf("Error returning arguments.\n");
1200 			rval = EFAULT;
1201 		}
1202 		break;
1203 	default:
1204 		rval = ENOTTY;
1205 		break;
1206 	}
1207 	return (rval);
1208 }
1209