xref: /titanic_52/usr/src/uts/i86pc/io/pci/pci_tools.c (revision 5e298e8fe759ff38ee8bb74a75a43cf80f57d057)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <sys/types.h>
27 #include <sys/mkdev.h>
28 #include <sys/stat.h>
29 #include <sys/sunddi.h>
30 #include <vm/seg_kmem.h>
31 #include <sys/machparam.h>
32 #include <sys/sunndi.h>
33 #include <sys/ontrap.h>
34 #include <sys/psm.h>
35 #include <sys/pcie.h>
36 #include <sys/pci_cfgspace.h>
37 #include <sys/pci_tools.h>
38 #include <io/pci/pci_tools_ext.h>
39 #include <sys/apic.h>
40 #include <io/pci/pci_var.h>
41 #include <sys/pci_impl.h>
42 #include <sys/promif.h>
43 #include <sys/x86_archext.h>
44 #include <sys/cpuvar.h>
45 #include <sys/pci_cfgacc.h>
46 
47 #ifdef __xpv
48 #include <sys/hypervisor.h>
49 #endif
50 
51 #define	PCIEX_BDF_OFFSET_DELTA	4
52 #define	PCIEX_REG_FUNC_SHIFT	(PCI_REG_FUNC_SHIFT + PCIEX_BDF_OFFSET_DELTA)
53 #define	PCIEX_REG_DEV_SHIFT	(PCI_REG_DEV_SHIFT + PCIEX_BDF_OFFSET_DELTA)
54 #define	PCIEX_REG_BUS_SHIFT	(PCI_REG_BUS_SHIFT + PCIEX_BDF_OFFSET_DELTA)
55 
56 #define	SUCCESS	0
57 
58 extern uint64_t mcfg_mem_base;
59 int pcitool_debug = 0;
60 
61 /*
62  * Offsets of BARS in config space.  First entry of 0 means config space.
63  * Entries here correlate to pcitool_bars_t enumerated type.
64  */
65 static uint8_t pci_bars[] = {
66 	0x0,
67 	PCI_CONF_BASE0,
68 	PCI_CONF_BASE1,
69 	PCI_CONF_BASE2,
70 	PCI_CONF_BASE3,
71 	PCI_CONF_BASE4,
72 	PCI_CONF_BASE5,
73 	PCI_CONF_ROM
74 };
75 
76 /* Max offset allowed into config space for a particular device. */
77 static uint64_t max_cfg_size = PCI_CONF_HDR_SIZE;
78 
79 static uint64_t pcitool_swap_endian(uint64_t data, int size);
80 static int pcitool_cfg_access(pcitool_reg_t *prg, boolean_t write_flag,
81     boolean_t io_access);
82 static int pcitool_io_access(pcitool_reg_t *prg, boolean_t write_flag);
83 static int pcitool_mem_access(pcitool_reg_t *prg, uint64_t virt_addr,
84     boolean_t write_flag);
85 static uint64_t pcitool_map(uint64_t phys_addr, size_t size, size_t *num_pages);
86 static void pcitool_unmap(uint64_t virt_addr, size_t num_pages);
87 
88 /* Extern declarations */
89 extern int	(*psm_intr_ops)(dev_info_t *, ddi_intr_handle_impl_t *,
90 		    psm_intr_op_t, int *);
91 
92 int
93 pcitool_init(dev_info_t *dip, boolean_t is_pciex)
94 {
95 	int instance = ddi_get_instance(dip);
96 
97 	/* Create pcitool nodes for register access and interrupt routing. */
98 
99 	if (ddi_create_minor_node(dip, PCI_MINOR_REG, S_IFCHR,
100 	    PCI_MINOR_NUM(instance, PCI_TOOL_REG_MINOR_NUM),
101 	    DDI_NT_REGACC, 0) != DDI_SUCCESS) {
102 		return (DDI_FAILURE);
103 	}
104 
105 	if (ddi_create_minor_node(dip, PCI_MINOR_INTR, S_IFCHR,
106 	    PCI_MINOR_NUM(instance, PCI_TOOL_INTR_MINOR_NUM),
107 	    DDI_NT_INTRCTL, 0) != DDI_SUCCESS) {
108 		ddi_remove_minor_node(dip, PCI_MINOR_REG);
109 		return (DDI_FAILURE);
110 	}
111 
112 	if (is_pciex)
113 		max_cfg_size = PCIE_CONF_HDR_SIZE;
114 
115 	return (DDI_SUCCESS);
116 }
117 
118 void
119 pcitool_uninit(dev_info_t *dip)
120 {
121 	ddi_remove_minor_node(dip, PCI_MINOR_INTR);
122 	ddi_remove_minor_node(dip, PCI_MINOR_REG);
123 }
124 
125 /*ARGSUSED*/
126 static int
127 pcitool_set_intr(dev_info_t *dip, void *arg, int mode)
128 {
129 	ddi_intr_handle_impl_t info_hdl;
130 	pcitool_intr_set_t iset;
131 	uint32_t old_cpu;
132 	int ret, result;
133 	size_t copyinout_size;
134 	int rval = SUCCESS;
135 
136 	/* Version 1 of pcitool_intr_set_t doesn't have flags. */
137 	copyinout_size = (size_t)&iset.flags - (size_t)&iset;
138 
139 	if (ddi_copyin(arg, &iset, copyinout_size, mode) != DDI_SUCCESS)
140 		return (EFAULT);
141 
142 	switch (iset.user_version) {
143 	case PCITOOL_V1:
144 		break;
145 
146 	case PCITOOL_V2:
147 		copyinout_size = sizeof (pcitool_intr_set_t);
148 		if (ddi_copyin(arg, &iset, copyinout_size, mode) != DDI_SUCCESS)
149 			return (EFAULT);
150 		break;
151 
152 	default:
153 		iset.status = PCITOOL_OUT_OF_RANGE;
154 		rval = ENOTSUP;
155 		goto done_set_intr;
156 	}
157 
158 	if (iset.flags & PCITOOL_INTR_FLAG_SET_MSI) {
159 		rval = ENOTSUP;
160 		iset.status = PCITOOL_IO_ERROR;
161 		goto done_set_intr;
162 	}
163 
164 	if (iset.ino > APIC_MAX_VECTOR) {
165 		rval = EINVAL;
166 		iset.status = PCITOOL_INVALID_INO;
167 		goto done_set_intr;
168 	}
169 
170 	iset.status = PCITOOL_SUCCESS;
171 
172 	if ((old_cpu = pci_get_cpu_from_vecirq(iset.ino, IS_VEC)) == -1) {
173 		iset.status = PCITOOL_IO_ERROR;
174 		rval = EINVAL;
175 		goto done_set_intr;
176 	}
177 
178 
179 	old_cpu &= ~PSMGI_CPU_USER_BOUND;
180 
181 	/*
182 	 * For this locally-declared and used handle, ih_private will contain a
183 	 * CPU value, not an ihdl_plat_t as used for global interrupt handling.
184 	 */
185 	info_hdl.ih_vector = iset.ino;
186 	info_hdl.ih_private = (void *)(uintptr_t)iset.cpu_id;
187 	info_hdl.ih_flags = PSMGI_INTRBY_VEC;
188 	if (pcitool_debug)
189 		prom_printf("user version:%d, flags:0x%x\n",
190 		    iset.user_version, iset.flags);
191 
192 	result = ENOTSUP;
193 	if ((iset.user_version >= PCITOOL_V2) &&
194 	    (iset.flags & PCITOOL_INTR_FLAG_SET_GROUP)) {
195 		ret = (*psm_intr_ops)(NULL, &info_hdl, PSM_INTR_OP_GRP_SET_CPU,
196 		    &result);
197 	} else {
198 		ret = (*psm_intr_ops)(NULL, &info_hdl, PSM_INTR_OP_SET_CPU,
199 		    &result);
200 	}
201 
202 	if (ret != PSM_SUCCESS) {
203 		switch (result) {
204 		case EIO:		/* Error making the change */
205 			rval = EIO;
206 			iset.status = PCITOOL_IO_ERROR;
207 			break;
208 		case ENXIO:		/* Couldn't convert vector to irq */
209 			rval = EINVAL;
210 			iset.status = PCITOOL_INVALID_INO;
211 			break;
212 		case EINVAL:		/* CPU out of range */
213 			rval = EINVAL;
214 			iset.status = PCITOOL_INVALID_CPUID;
215 			break;
216 		case ENOTSUP:		/* Requested PSM intr ops missing */
217 			rval = ENOTSUP;
218 			iset.status = PCITOOL_IO_ERROR;
219 			break;
220 		}
221 	}
222 
223 	/* Return original CPU. */
224 	iset.cpu_id = old_cpu;
225 
226 done_set_intr:
227 	iset.drvr_version = PCITOOL_VERSION;
228 	if (ddi_copyout(&iset, arg, copyinout_size, mode) != DDI_SUCCESS)
229 		rval = EFAULT;
230 	return (rval);
231 }
232 
233 
234 /* It is assumed that dip != NULL */
235 static void
236 pcitool_get_intr_dev_info(dev_info_t *dip, pcitool_intr_dev_t *devs)
237 {
238 	(void) strncpy(devs->driver_name,
239 	    ddi_driver_name(dip), MAXMODCONFNAME-2);
240 	devs->driver_name[MAXMODCONFNAME-1] = '\0';
241 	(void) ddi_pathname(dip, devs->path);
242 	devs->dev_inst = ddi_get_instance(dip);
243 }
244 
245 static int
246 pcitool_get_intr(dev_info_t *dip, void *arg, int mode)
247 {
248 	/* Array part isn't used here, but oh well... */
249 	pcitool_intr_get_t partial_iget;
250 	pcitool_intr_get_t *iget = &partial_iget;
251 	size_t	iget_kmem_alloc_size = 0;
252 	uint8_t num_devs_ret;
253 	int copyout_rval;
254 	int rval = SUCCESS;
255 	int circ;
256 	int i;
257 
258 	ddi_intr_handle_impl_t info_hdl;
259 	apic_get_intr_t intr_info;
260 
261 	/* Read in just the header part, no array section. */
262 	if (ddi_copyin(arg, &partial_iget, PCITOOL_IGET_SIZE(0), mode) !=
263 	    DDI_SUCCESS)
264 		return (EFAULT);
265 
266 	if (partial_iget.flags & PCITOOL_INTR_FLAG_GET_MSI) {
267 		partial_iget.status = PCITOOL_IO_ERROR;
268 		partial_iget.num_devs_ret = 0;
269 		rval = ENOTSUP;
270 		goto done_get_intr;
271 	}
272 
273 	/* Validate argument. */
274 	if (partial_iget.ino > APIC_MAX_VECTOR) {
275 		partial_iget.status = PCITOOL_INVALID_INO;
276 		partial_iget.num_devs_ret = 0;
277 		rval = EINVAL;
278 		goto done_get_intr;
279 	}
280 
281 	num_devs_ret = partial_iget.num_devs_ret;
282 	intr_info.avgi_dip_list = NULL;
283 	intr_info.avgi_req_flags =
284 	    PSMGI_REQ_CPUID | PSMGI_REQ_NUM_DEVS | PSMGI_INTRBY_VEC;
285 	/*
286 	 * For this locally-declared and used handle, ih_private will contain a
287 	 * pointer to apic_get_intr_t, not an ihdl_plat_t as used for
288 	 * global interrupt handling.
289 	 */
290 	info_hdl.ih_private = &intr_info;
291 	info_hdl.ih_vector = partial_iget.ino;
292 
293 	/* Caller wants device information returned. */
294 	if (num_devs_ret > 0) {
295 
296 		intr_info.avgi_req_flags |= PSMGI_REQ_GET_DEVS;
297 
298 		/*
299 		 * Allocate room.
300 		 * If num_devs_ret == 0 iget remains pointing to partial_iget.
301 		 */
302 		iget_kmem_alloc_size = PCITOOL_IGET_SIZE(num_devs_ret);
303 		iget = kmem_alloc(iget_kmem_alloc_size, KM_SLEEP);
304 
305 		/* Read in whole structure to verify there's room. */
306 		if (ddi_copyin(arg, iget, iget_kmem_alloc_size, mode) !=
307 		    SUCCESS) {
308 
309 			/* Be consistent and just return EFAULT here. */
310 			kmem_free(iget, iget_kmem_alloc_size);
311 
312 			return (EFAULT);
313 		}
314 	}
315 
316 	bzero(iget, PCITOOL_IGET_SIZE(num_devs_ret));
317 	iget->ino = info_hdl.ih_vector;
318 
319 	/*
320 	 * Lock device tree branch from the pci root nexus on down if info will
321 	 * be extracted from dips returned from the tree.
322 	 */
323 	if (intr_info.avgi_req_flags & PSMGI_REQ_GET_DEVS) {
324 		ndi_devi_enter(dip, &circ);
325 	}
326 
327 	/* Call psm_intr_ops(PSM_INTR_OP_GET_INTR) to get information. */
328 	if ((rval = (*psm_intr_ops)(NULL, &info_hdl,
329 	    PSM_INTR_OP_GET_INTR, NULL)) != PSM_SUCCESS) {
330 		iget->status = PCITOOL_IO_ERROR;
331 		iget->num_devs_ret = 0;
332 		rval = EINVAL;
333 		goto done_get_intr;
334 	}
335 
336 	/*
337 	 * Fill in the pcitool_intr_get_t to be returned,
338 	 * with the CPU, num_devs_ret and num_devs.
339 	 */
340 	iget->cpu_id = intr_info.avgi_cpu_id & ~PSMGI_CPU_USER_BOUND;
341 
342 	/* Number of devices returned by apic. */
343 	iget->num_devs = intr_info.avgi_num_devs;
344 
345 	/* Device info was returned. */
346 	if (intr_info.avgi_req_flags & PSMGI_REQ_GET_DEVS) {
347 
348 		/*
349 		 * num devs returned is num devs ret by apic,
350 		 * space permitting.
351 		 */
352 		iget->num_devs_ret = min(num_devs_ret, intr_info.avgi_num_devs);
353 
354 		/*
355 		 * Loop thru list of dips and extract driver, name and instance.
356 		 * Fill in the pcitool_intr_dev_t's with this info.
357 		 */
358 		for (i = 0; i < iget->num_devs_ret; i++)
359 			pcitool_get_intr_dev_info(intr_info.avgi_dip_list[i],
360 			    &iget->dev[i]);
361 
362 		/* Free kmem_alloc'ed memory of the apic_get_intr_t */
363 		kmem_free(intr_info.avgi_dip_list,
364 		    intr_info.avgi_num_devs * sizeof (dev_info_t *));
365 	}
366 
367 done_get_intr:
368 
369 	if (intr_info.avgi_req_flags & PSMGI_REQ_GET_DEVS) {
370 		ndi_devi_exit(dip, circ);
371 	}
372 
373 	iget->drvr_version = PCITOOL_VERSION;
374 	copyout_rval = ddi_copyout(iget, arg,
375 	    PCITOOL_IGET_SIZE(num_devs_ret), mode);
376 
377 	if (iget_kmem_alloc_size > 0)
378 		kmem_free(iget, iget_kmem_alloc_size);
379 
380 	if (copyout_rval != DDI_SUCCESS)
381 		rval = EFAULT;
382 
383 	return (rval);
384 }
385 
386 /*ARGSUSED*/
387 static int
388 pcitool_intr_info(dev_info_t *dip, void *arg, int mode)
389 {
390 	pcitool_intr_info_t intr_info;
391 	ddi_intr_handle_impl_t info_hdl;
392 	int rval = SUCCESS;
393 
394 	/* If we need user_version, and to ret same user version as passed in */
395 	if (ddi_copyin(arg, &intr_info, sizeof (pcitool_intr_info_t), mode) !=
396 	    DDI_SUCCESS) {
397 		if (pcitool_debug)
398 			prom_printf("Error reading arguments\n");
399 		return (EFAULT);
400 	}
401 
402 	if (intr_info.flags & PCITOOL_INTR_FLAG_GET_MSI)
403 		return (ENOTSUP);
404 
405 	/* For UPPC systems, psm_intr_ops has no entry for APIC_TYPE. */
406 	if ((rval = (*psm_intr_ops)(NULL, &info_hdl,
407 	    PSM_INTR_OP_APIC_TYPE, NULL)) != PSM_SUCCESS) {
408 		intr_info.ctlr_type = PCITOOL_CTLR_TYPE_UPPC;
409 		intr_info.ctlr_version = 0;
410 
411 	} else {
412 		intr_info.ctlr_version = (uint32_t)info_hdl.ih_ver;
413 		if (strcmp((char *)info_hdl.ih_private,
414 		    APIC_PCPLUSMP_NAME) == 0)
415 			intr_info.ctlr_type = PCITOOL_CTLR_TYPE_PCPLUSMP;
416 		else
417 			intr_info.ctlr_type = PCITOOL_CTLR_TYPE_UNKNOWN;
418 	}
419 
420 	intr_info.num_intr = APIC_MAX_VECTOR;
421 	intr_info.drvr_version = PCITOOL_VERSION;
422 	if (ddi_copyout(&intr_info, arg, sizeof (pcitool_intr_info_t), mode) !=
423 	    DDI_SUCCESS) {
424 		if (pcitool_debug)
425 			prom_printf("Error returning arguments.\n");
426 		rval = EFAULT;
427 	}
428 
429 	return (rval);
430 }
431 
432 
433 
434 /*
435  * Main function for handling interrupt CPU binding requests and queries.
436  * Need to implement later
437  */
438 int
439 pcitool_intr_admn(dev_info_t *dip, void *arg, int cmd, int mode)
440 {
441 	int rval;
442 
443 	switch (cmd) {
444 
445 	/* Associate a new CPU with a given vector */
446 	case PCITOOL_DEVICE_SET_INTR:
447 		rval = pcitool_set_intr(dip, arg, mode);
448 		break;
449 
450 	case PCITOOL_DEVICE_GET_INTR:
451 		rval = pcitool_get_intr(dip, arg, mode);
452 		break;
453 
454 	case PCITOOL_SYSTEM_INTR_INFO:
455 		rval = pcitool_intr_info(dip, arg, mode);
456 		break;
457 
458 	default:
459 		rval = ENOTSUP;
460 	}
461 
462 	return (rval);
463 }
464 
465 /*
466  * Perform register accesses on the nexus device itself.
467  * No explicit PCI nexus device for X86, so not applicable.
468  */
469 
470 /*ARGSUSED*/
471 int
472 pcitool_bus_reg_ops(dev_info_t *dip, void *arg, int cmd, int mode)
473 {
474 	return (ENOTSUP);
475 }
476 
477 /* Swap endianness. */
478 static uint64_t
479 pcitool_swap_endian(uint64_t data, int size)
480 {
481 	typedef union {
482 		uint64_t data64;
483 		uint8_t data8[8];
484 	} data_split_t;
485 
486 	data_split_t orig_data;
487 	data_split_t returned_data;
488 	int i;
489 
490 	orig_data.data64 = data;
491 	returned_data.data64 = 0;
492 
493 	for (i = 0; i < size; i++) {
494 		returned_data.data8[i] = orig_data.data8[size - 1 - i];
495 	}
496 
497 	return (returned_data.data64);
498 }
499 
500 /*
501  * A note about ontrap handling:
502  *
503  * X86 systems on which this module was tested return FFs instead of bus errors
504  * when accessing devices with invalid addresses.  Ontrap handling, which
505  * gracefully handles kernel bus errors, is installed anyway for I/O and mem
506  * space accessing (not for pci config space), in case future X86 platforms
507  * require it.
508  */
509 
510 /* Access device.  prg is modified. */
511 static int
512 pcitool_cfg_access(pcitool_reg_t *prg, boolean_t write_flag,
513     boolean_t io_access)
514 {
515 	int size = PCITOOL_ACC_ATTR_SIZE(prg->acc_attr);
516 	boolean_t big_endian = PCITOOL_ACC_IS_BIG_ENDIAN(prg->acc_attr);
517 	int rval = SUCCESS;
518 	uint64_t local_data;
519 	pci_cfgacc_req_t req;
520 	uint32_t max_offset;
521 
522 	if ((size <= 0) || (size > 8) || ((size & (size - 1)) != 0)) {
523 		prg->status = PCITOOL_INVALID_SIZE;
524 		return (ENOTSUP);
525 	}
526 
527 	/*
528 	 * NOTE: there is no way to verify whether or not the address is
529 	 * valid other than that it is within the maximum offset.  The
530 	 * put functions return void and the get functions return -1 on error.
531 	 */
532 
533 	if (io_access)
534 		max_offset = 0xFF;
535 	else
536 		max_offset = 0xFFF;
537 	if (prg->offset + size - 1 > max_offset) {
538 		prg->status = PCITOOL_INVALID_ADDRESS;
539 		return (ENOTSUP);
540 	}
541 
542 	prg->status = PCITOOL_SUCCESS;
543 
544 	req.rcdip = NULL;
545 	req.bdf = PCI_GETBDF(prg->bus_no, prg->dev_no, prg->func_no);
546 	req.offset = prg->offset;
547 	req.size = size;
548 	req.write = write_flag;
549 	req.ioacc = io_access;
550 	if (write_flag) {
551 		if (big_endian) {
552 			local_data = pcitool_swap_endian(prg->data, size);
553 		} else {
554 			local_data = prg->data;
555 		}
556 		VAL64(&req) = local_data;
557 		pci_cfgacc_acc(&req);
558 	} else {
559 		pci_cfgacc_acc(&req);
560 		local_data = VAL64(&req);
561 		if (big_endian) {
562 			prg->data =
563 			    pcitool_swap_endian(local_data, size);
564 		} else {
565 			prg->data = local_data;
566 		}
567 	}
568 	/*
569 	 * Check if legacy IO config access is used, in which case
570 	 * only first 256 bytes are valid.
571 	 */
572 	if (req.ioacc && (prg->offset + size - 1 > 0xFF)) {
573 		prg->status = PCITOOL_INVALID_ADDRESS;
574 		return (ENOTSUP);
575 	}
576 
577 	/* Set phys_addr only if MMIO is used */
578 	prg->phys_addr = 0;
579 	if (!req.ioacc && mcfg_mem_base != 0) {
580 		prg->phys_addr = mcfg_mem_base + prg->offset +
581 		    ((prg->bus_no << PCIEX_REG_BUS_SHIFT) |
582 		    (prg->dev_no << PCIEX_REG_DEV_SHIFT) |
583 		    (prg->func_no << PCIEX_REG_FUNC_SHIFT));
584 	}
585 
586 	return (rval);
587 }
588 
589 static int
590 pcitool_io_access(pcitool_reg_t *prg, boolean_t write_flag)
591 {
592 	int port = (int)prg->phys_addr;
593 	size_t size = PCITOOL_ACC_ATTR_SIZE(prg->acc_attr);
594 	boolean_t big_endian = PCITOOL_ACC_IS_BIG_ENDIAN(prg->acc_attr);
595 	int rval = SUCCESS;
596 	on_trap_data_t otd;
597 	uint64_t local_data;
598 
599 
600 	/*
601 	 * on_trap works like setjmp.
602 	 *
603 	 * A non-zero return here means on_trap has returned from an error.
604 	 *
605 	 * A zero return here means that on_trap has just returned from setup.
606 	 */
607 	if (on_trap(&otd, OT_DATA_ACCESS)) {
608 		no_trap();
609 		if (pcitool_debug)
610 			prom_printf(
611 			    "pcitool_io_access: on_trap caught an error...\n");
612 		prg->status = PCITOOL_INVALID_ADDRESS;
613 		return (EFAULT);
614 	}
615 
616 	if (write_flag) {
617 
618 		if (big_endian) {
619 			local_data = pcitool_swap_endian(prg->data, size);
620 		} else {
621 			local_data = prg->data;
622 		}
623 
624 		if (pcitool_debug)
625 			prom_printf("Writing %ld byte(s) to port 0x%x\n",
626 			    size, port);
627 
628 		switch (size) {
629 		case 1:
630 			outb(port, (uint8_t)local_data);
631 			break;
632 		case 2:
633 			outw(port, (uint16_t)local_data);
634 			break;
635 		case 4:
636 			outl(port, (uint32_t)local_data);
637 			break;
638 		default:
639 			rval = ENOTSUP;
640 			prg->status = PCITOOL_INVALID_SIZE;
641 			break;
642 		}
643 	} else {
644 		if (pcitool_debug)
645 			prom_printf("Reading %ld byte(s) from port 0x%x\n",
646 			    size, port);
647 
648 		switch (size) {
649 		case 1:
650 			local_data = inb(port);
651 			break;
652 		case 2:
653 			local_data = inw(port);
654 			break;
655 		case 4:
656 			local_data = inl(port);
657 			break;
658 		default:
659 			rval = ENOTSUP;
660 			prg->status = PCITOOL_INVALID_SIZE;
661 			break;
662 		}
663 
664 		if (rval == SUCCESS) {
665 			if (big_endian) {
666 				prg->data =
667 				    pcitool_swap_endian(local_data, size);
668 			} else {
669 				prg->data = local_data;
670 			}
671 		}
672 	}
673 
674 	no_trap();
675 	return (rval);
676 }
677 
678 static int
679 pcitool_mem_access(pcitool_reg_t *prg, uint64_t virt_addr, boolean_t write_flag)
680 {
681 	size_t size = PCITOOL_ACC_ATTR_SIZE(prg->acc_attr);
682 	boolean_t big_endian = PCITOOL_ACC_IS_BIG_ENDIAN(prg->acc_attr);
683 	int rval = DDI_SUCCESS;
684 	on_trap_data_t otd;
685 	uint64_t local_data;
686 
687 	/*
688 	 * on_trap works like setjmp.
689 	 *
690 	 * A non-zero return here means on_trap has returned from an error.
691 	 *
692 	 * A zero return here means that on_trap has just returned from setup.
693 	 */
694 	if (on_trap(&otd, OT_DATA_ACCESS)) {
695 		no_trap();
696 		if (pcitool_debug)
697 			prom_printf(
698 			    "pcitool_mem_access: on_trap caught an error...\n");
699 		prg->status = PCITOOL_INVALID_ADDRESS;
700 		return (EFAULT);
701 	}
702 
703 	if (write_flag) {
704 
705 		if (big_endian) {
706 			local_data = pcitool_swap_endian(prg->data, size);
707 		} else {
708 			local_data = prg->data;
709 		}
710 
711 		switch (size) {
712 		case 1:
713 			*((uint8_t *)(uintptr_t)virt_addr) = local_data;
714 			break;
715 		case 2:
716 			*((uint16_t *)(uintptr_t)virt_addr) = local_data;
717 			break;
718 		case 4:
719 			*((uint32_t *)(uintptr_t)virt_addr) = local_data;
720 			break;
721 		case 8:
722 			*((uint64_t *)(uintptr_t)virt_addr) = local_data;
723 			break;
724 		default:
725 			rval = ENOTSUP;
726 			prg->status = PCITOOL_INVALID_SIZE;
727 			break;
728 		}
729 	} else {
730 		switch (size) {
731 		case 1:
732 			local_data = *((uint8_t *)(uintptr_t)virt_addr);
733 			break;
734 		case 2:
735 			local_data = *((uint16_t *)(uintptr_t)virt_addr);
736 			break;
737 		case 4:
738 			local_data = *((uint32_t *)(uintptr_t)virt_addr);
739 			break;
740 		case 8:
741 			local_data = *((uint64_t *)(uintptr_t)virt_addr);
742 			break;
743 		default:
744 			rval = ENOTSUP;
745 			prg->status = PCITOOL_INVALID_SIZE;
746 			break;
747 		}
748 
749 		if (rval == SUCCESS) {
750 			if (big_endian) {
751 				prg->data =
752 				    pcitool_swap_endian(local_data, size);
753 			} else {
754 				prg->data = local_data;
755 			}
756 		}
757 	}
758 
759 	no_trap();
760 	return (rval);
761 }
762 
763 /*
764  * Map up to 2 pages which contain the address we want to access.
765  *
766  * Mapping should span no more than 8 bytes.  With X86 it is possible for an
767  * 8 byte value to start on a 4 byte boundary, so it can cross a page boundary.
768  * We'll never have to map more than two pages.
769  */
770 
771 static uint64_t
772 pcitool_map(uint64_t phys_addr, size_t size, size_t *num_pages)
773 {
774 
775 	uint64_t page_base = phys_addr & ~MMU_PAGEOFFSET;
776 	uint64_t offset = phys_addr & MMU_PAGEOFFSET;
777 	void *virt_base;
778 	uint64_t returned_addr;
779 	pfn_t pfn;
780 
781 	if (pcitool_debug)
782 		prom_printf("pcitool_map: Called with PA:0x%p\n",
783 		    (void *)(uintptr_t)phys_addr);
784 
785 	*num_pages = 1;
786 
787 	/* Desired mapping would span more than two pages. */
788 	if ((offset + size) > (MMU_PAGESIZE * 2)) {
789 		if (pcitool_debug)
790 			prom_printf("boundary violation: "
791 			    "offset:0x%" PRIx64 ", size:%ld, pagesize:0x%lx\n",
792 			    offset, (uintptr_t)size, (uintptr_t)MMU_PAGESIZE);
793 		return (NULL);
794 
795 	} else if ((offset + size) > MMU_PAGESIZE) {
796 		(*num_pages)++;
797 	}
798 
799 	/* Get page(s) of virtual space. */
800 	virt_base = vmem_alloc(heap_arena, ptob(*num_pages), VM_NOSLEEP);
801 	if (virt_base == NULL) {
802 		if (pcitool_debug)
803 			prom_printf("Couldn't get virtual base address.\n");
804 		return (NULL);
805 	}
806 
807 	if (pcitool_debug)
808 		prom_printf("Got base virtual address:0x%p\n", virt_base);
809 
810 #ifdef __xpv
811 	/*
812 	 * We should only get here if we are dom0.
813 	 * We're using a real device so we need to translate the MA to a PFN.
814 	 */
815 	ASSERT(DOMAIN_IS_INITDOMAIN(xen_info));
816 	pfn = xen_assign_pfn(mmu_btop(page_base));
817 #else
818 	pfn = btop(page_base);
819 #endif
820 
821 	/* Now map the allocated virtual space to the physical address. */
822 	hat_devload(kas.a_hat, virt_base, mmu_ptob(*num_pages), pfn,
823 	    PROT_READ | PROT_WRITE | HAT_STRICTORDER,
824 	    HAT_LOAD_LOCK);
825 
826 	returned_addr = ((uintptr_t)(virt_base)) + offset;
827 
828 	if (pcitool_debug)
829 		prom_printf("pcitool_map: returning VA:0x%p\n",
830 		    (void *)(uintptr_t)returned_addr);
831 
832 	return (returned_addr);
833 }
834 
835 /* Unmap the mapped page(s). */
836 static void
837 pcitool_unmap(uint64_t virt_addr, size_t num_pages)
838 {
839 	void *base_virt_addr = (void *)(uintptr_t)(virt_addr & ~MMU_PAGEOFFSET);
840 
841 	hat_unload(kas.a_hat, base_virt_addr, ptob(num_pages),
842 	    HAT_UNLOAD_UNLOCK);
843 	vmem_free(heap_arena, base_virt_addr, ptob(num_pages));
844 }
845 
846 
847 /* Perform register accesses on PCI leaf devices. */
848 /*ARGSUSED*/
849 int
850 pcitool_dev_reg_ops(dev_info_t *dip, void *arg, int cmd, int mode)
851 {
852 	boolean_t	write_flag = B_FALSE;
853 	boolean_t	io_access = B_TRUE;
854 	int		rval = 0;
855 	pcitool_reg_t	prg;
856 	uint8_t		size;
857 
858 	uint64_t	base_addr;
859 	uint64_t	virt_addr;
860 	size_t		num_virt_pages;
861 
862 	switch (cmd) {
863 	case (PCITOOL_DEVICE_SET_REG):
864 		write_flag = B_TRUE;
865 
866 	/*FALLTHRU*/
867 	case (PCITOOL_DEVICE_GET_REG):
868 		if (pcitool_debug)
869 			prom_printf("pci_dev_reg_ops set/get reg\n");
870 		if (ddi_copyin(arg, &prg, sizeof (pcitool_reg_t), mode) !=
871 		    DDI_SUCCESS) {
872 			if (pcitool_debug)
873 				prom_printf("Error reading arguments\n");
874 			return (EFAULT);
875 		}
876 
877 		if (prg.barnum >= (sizeof (pci_bars) / sizeof (pci_bars[0]))) {
878 			prg.status = PCITOOL_OUT_OF_RANGE;
879 			rval = EINVAL;
880 			goto done_reg;
881 		}
882 
883 		if (pcitool_debug)
884 			prom_printf("raw bus:0x%x, dev:0x%x, func:0x%x\n",
885 			    prg.bus_no, prg.dev_no, prg.func_no);
886 		/* Validate address arguments of bus / dev / func */
887 		if (((prg.bus_no &
888 		    (PCI_REG_BUS_M >> PCI_REG_BUS_SHIFT)) !=
889 		    prg.bus_no) ||
890 		    ((prg.dev_no &
891 		    (PCI_REG_DEV_M >> PCI_REG_DEV_SHIFT)) !=
892 		    prg.dev_no) ||
893 		    ((prg.func_no &
894 		    (PCI_REG_FUNC_M >> PCI_REG_FUNC_SHIFT)) !=
895 		    prg.func_no)) {
896 			prg.status = PCITOOL_INVALID_ADDRESS;
897 			rval = EINVAL;
898 			goto done_reg;
899 		}
900 
901 		size = PCITOOL_ACC_ATTR_SIZE(prg.acc_attr);
902 
903 		/* Proper config space desired. */
904 		if (prg.barnum == 0) {
905 
906 			if (pcitool_debug)
907 				prom_printf(
908 				    "config access: offset:0x%" PRIx64 ", "
909 				    "phys_addr:0x%" PRIx64 "\n",
910 				    prg.offset, prg.phys_addr);
911 
912 			if (prg.offset >= max_cfg_size) {
913 				prg.status = PCITOOL_OUT_OF_RANGE;
914 				rval = EINVAL;
915 				goto done_reg;
916 			}
917 			if (max_cfg_size == PCIE_CONF_HDR_SIZE)
918 				io_access = B_FALSE;
919 
920 			rval = pcitool_cfg_access(&prg, write_flag, io_access);
921 			if (pcitool_debug)
922 				prom_printf(
923 				    "config access: data:0x%" PRIx64 "\n",
924 				    prg.data);
925 
926 		/* IO/ MEM/ MEM64 space. */
927 		} else {
928 
929 			pcitool_reg_t	prg2;
930 			bcopy(&prg, &prg2, sizeof (pcitool_reg_t));
931 
932 			/*
933 			 * Translate BAR number into offset of the BAR in
934 			 * the device's config space.
935 			 */
936 			prg2.offset = pci_bars[prg2.barnum];
937 			prg2.acc_attr =
938 			    PCITOOL_ACC_ATTR_SIZE_4 | PCITOOL_ACC_ATTR_ENDN_LTL;
939 
940 			if (pcitool_debug)
941 				prom_printf(
942 				    "barnum:%d, bar_offset:0x%" PRIx64 "\n",
943 				    prg2.barnum, prg2.offset);
944 			/*
945 			 * Get Bus Address Register (BAR) from config space.
946 			 * prg2.offset is the offset into config space of the
947 			 * BAR desired.  prg.status is modified on error.
948 			 */
949 			rval = pcitool_cfg_access(&prg2, B_FALSE, B_TRUE);
950 			if (rval != SUCCESS) {
951 				if (pcitool_debug)
952 					prom_printf("BAR access failed\n");
953 				prg.status = prg2.status;
954 				goto done_reg;
955 			}
956 			/*
957 			 * Reference proper PCI space based on the BAR.
958 			 * If 64 bit MEM space, need to load other half of the
959 			 * BAR first.
960 			 */
961 
962 			if (pcitool_debug)
963 				prom_printf("bar returned is 0x%" PRIx64 "\n",
964 				    prg2.data);
965 			if (!prg2.data) {
966 				if (pcitool_debug)
967 					prom_printf("BAR data == 0\n");
968 				rval = EINVAL;
969 				prg.status = PCITOOL_INVALID_ADDRESS;
970 				goto done_reg;
971 			}
972 			if (prg2.data == 0xffffffff) {
973 				if (pcitool_debug)
974 					prom_printf("BAR data == -1\n");
975 				rval = EINVAL;
976 				prg.status = PCITOOL_INVALID_ADDRESS;
977 				goto done_reg;
978 			}
979 
980 			/*
981 			 * BAR has bits saying this space is IO space, unless
982 			 * this is the ROM address register.
983 			 */
984 			if (((PCI_BASE_SPACE_M & prg2.data) ==
985 			    PCI_BASE_SPACE_IO) &&
986 			    (prg2.offset != PCI_CONF_ROM)) {
987 				if (pcitool_debug)
988 					prom_printf("IO space\n");
989 
990 				prg2.data &= PCI_BASE_IO_ADDR_M;
991 				prg.phys_addr = prg2.data + prg.offset;
992 
993 				rval = pcitool_io_access(&prg, write_flag);
994 				if ((rval != SUCCESS) && (pcitool_debug))
995 					prom_printf("IO access failed\n");
996 
997 				goto done_reg;
998 
999 
1000 			/*
1001 			 * BAR has bits saying this space is 64 bit memory
1002 			 * space, unless this is the ROM address register.
1003 			 *
1004 			 * The 64 bit address stored in two BAR cells is not
1005 			 * necessarily aligned on an 8-byte boundary.
1006 			 * Need to keep the first 4 bytes read,
1007 			 * and do a separate read of the high 4 bytes.
1008 			 */
1009 
1010 			} else if ((PCI_BASE_TYPE_ALL & prg2.data) &&
1011 			    (prg2.offset != PCI_CONF_ROM)) {
1012 
1013 				uint32_t low_bytes =
1014 				    (uint32_t)(prg2.data & ~PCI_BASE_TYPE_ALL);
1015 
1016 				/*
1017 				 * Don't try to read the next 4 bytes
1018 				 * past the end of BARs.
1019 				 */
1020 				if (prg2.offset >= PCI_CONF_BASE5) {
1021 					prg.status = PCITOOL_OUT_OF_RANGE;
1022 					rval = EIO;
1023 					goto done_reg;
1024 				}
1025 
1026 				/*
1027 				 * Access device.
1028 				 * prg2.status is modified on error.
1029 				 */
1030 				prg2.offset += 4;
1031 				rval = pcitool_cfg_access(&prg2,
1032 				    B_FALSE, B_TRUE);
1033 				if (rval != SUCCESS) {
1034 					prg.status = prg2.status;
1035 					goto done_reg;
1036 				}
1037 
1038 				if (prg2.data == 0xffffffff) {
1039 					prg.status = PCITOOL_INVALID_ADDRESS;
1040 					prg.status = EFAULT;
1041 					goto done_reg;
1042 				}
1043 
1044 				prg2.data = (prg2.data << 32) + low_bytes;
1045 				if (pcitool_debug)
1046 					prom_printf(
1047 					    "64 bit mem space.  "
1048 					    "64-bit bar is 0x%" PRIx64 "\n",
1049 					    prg2.data);
1050 
1051 			/* Mem32 space, including ROM */
1052 			} else {
1053 
1054 				if (prg2.offset == PCI_CONF_ROM) {
1055 					if (pcitool_debug)
1056 						prom_printf(
1057 						    "Additional ROM "
1058 						    "checking\n");
1059 					/* Can't write to ROM */
1060 					if (write_flag) {
1061 						prg.status = PCITOOL_ROM_WRITE;
1062 						rval = EIO;
1063 						goto done_reg;
1064 
1065 					/* ROM disabled for reading */
1066 					} else if (!(prg2.data & 0x00000001)) {
1067 						prg.status =
1068 						    PCITOOL_ROM_DISABLED;
1069 						rval = EIO;
1070 						goto done_reg;
1071 					}
1072 				}
1073 
1074 				if (pcitool_debug)
1075 					prom_printf("32 bit mem space\n");
1076 			}
1077 
1078 			/* Common code for all IO/MEM range spaces. */
1079 
1080 			base_addr = prg2.data;
1081 			if (pcitool_debug)
1082 				prom_printf(
1083 				    "addr portion of bar is 0x%" PRIx64 ", "
1084 				    "base=0x%" PRIx64 ", "
1085 				    "offset:0x%" PRIx64 "\n",
1086 				    prg2.data, base_addr, prg.offset);
1087 			/*
1088 			 * Use offset provided by caller to index into
1089 			 * desired space, then access.
1090 			 * Note that prg.status is modified on error.
1091 			 */
1092 			prg.phys_addr = base_addr + prg.offset;
1093 
1094 			virt_addr = pcitool_map(prg.phys_addr, size,
1095 			    &num_virt_pages);
1096 			if (virt_addr == NULL) {
1097 				prg.status = PCITOOL_IO_ERROR;
1098 				rval = EIO;
1099 				goto done_reg;
1100 			}
1101 
1102 			rval = pcitool_mem_access(&prg, virt_addr, write_flag);
1103 			pcitool_unmap(virt_addr, num_virt_pages);
1104 		}
1105 done_reg:
1106 		prg.drvr_version = PCITOOL_VERSION;
1107 		if (ddi_copyout(&prg, arg, sizeof (pcitool_reg_t), mode) !=
1108 		    DDI_SUCCESS) {
1109 			if (pcitool_debug)
1110 				prom_printf("Error returning arguments.\n");
1111 			rval = EFAULT;
1112 		}
1113 		break;
1114 	default:
1115 		rval = ENOTTY;
1116 		break;
1117 	}
1118 	return (rval);
1119 }
1120