xref: /illumos-gate/usr/src/uts/i86pc/io/pci/pci_tools.c (revision dd72704bd9e794056c558153663c739e2012d721)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23  */
24 
25 #include <sys/sysmacros.h>
26 #include <sys/types.h>
27 #include <sys/mkdev.h>
28 #include <sys/stat.h>
29 #include <sys/sunddi.h>
30 #include <vm/seg_kmem.h>
31 #include <sys/machparam.h>
32 #include <sys/sunndi.h>
33 #include <sys/ontrap.h>
34 #include <sys/psm.h>
35 #include <sys/pcie.h>
36 #include <sys/pci_cfgspace.h>
37 #include <sys/pci_tools.h>
38 #include <io/pci/pci_tools_ext.h>
39 #include <sys/apic.h>
40 #include <sys/apix.h>
41 #include <io/pci/pci_var.h>
42 #include <sys/pci_impl.h>
43 #include <sys/promif.h>
44 #include <sys/x86_archext.h>
45 #include <sys/cpuvar.h>
46 #include <sys/pci_cfgacc.h>
47 
48 #ifdef __xpv
49 #include <sys/hypervisor.h>
50 #endif
51 
52 #define	PCIEX_BDF_OFFSET_DELTA	4
53 #define	PCIEX_REG_FUNC_SHIFT	(PCI_REG_FUNC_SHIFT + PCIEX_BDF_OFFSET_DELTA)
54 #define	PCIEX_REG_DEV_SHIFT	(PCI_REG_DEV_SHIFT + PCIEX_BDF_OFFSET_DELTA)
55 #define	PCIEX_REG_BUS_SHIFT	(PCI_REG_BUS_SHIFT + PCIEX_BDF_OFFSET_DELTA)
56 
57 #define	SUCCESS	0
58 
59 extern uint64_t mcfg_mem_base;
60 extern uint_t pci_iocfg_max_offset;
61 int pcitool_debug = 0;
62 
63 /*
64  * Offsets of BARS in config space.  First entry of 0 means config space.
65  * Entries here correlate to pcitool_bars_t enumerated type.
66  */
67 static uint8_t pci_bars[] = {
68 	0x0,
69 	PCI_CONF_BASE0,
70 	PCI_CONF_BASE1,
71 	PCI_CONF_BASE2,
72 	PCI_CONF_BASE3,
73 	PCI_CONF_BASE4,
74 	PCI_CONF_BASE5,
75 	PCI_CONF_ROM
76 };
77 
78 /* Max offset allowed into config space for a particular device. */
79 static uint64_t max_cfg_size = PCI_CONF_HDR_SIZE;
80 
81 static uint64_t pcitool_swap_endian(uint64_t data, int size);
82 static int pcitool_cfg_access(pcitool_reg_t *prg, boolean_t write_flag,
83     boolean_t io_access);
84 static int pcitool_io_access(pcitool_reg_t *prg, boolean_t write_flag);
85 static int pcitool_mem_access(pcitool_reg_t *prg, uint64_t virt_addr,
86     boolean_t write_flag);
87 static uint64_t pcitool_map(uint64_t phys_addr, size_t size, size_t *num_pages);
88 static void pcitool_unmap(uint64_t virt_addr, size_t num_pages);
89 
90 /* Extern declarations */
91 extern int	(*psm_intr_ops)(dev_info_t *, ddi_intr_handle_impl_t *,
92 		    psm_intr_op_t, int *);
93 
94 int
95 pcitool_init(dev_info_t *dip, boolean_t is_pciex)
96 {
97 	int instance = ddi_get_instance(dip);
98 
99 	/* Create pcitool nodes for register access and interrupt routing. */
100 
101 	if (ddi_create_minor_node(dip, PCI_MINOR_REG, S_IFCHR,
102 	    PCI_MINOR_NUM(instance, PCI_TOOL_REG_MINOR_NUM),
103 	    DDI_NT_REGACC, 0) != DDI_SUCCESS) {
104 		return (DDI_FAILURE);
105 	}
106 
107 	if (ddi_create_minor_node(dip, PCI_MINOR_INTR, S_IFCHR,
108 	    PCI_MINOR_NUM(instance, PCI_TOOL_INTR_MINOR_NUM),
109 	    DDI_NT_INTRCTL, 0) != DDI_SUCCESS) {
110 		ddi_remove_minor_node(dip, PCI_MINOR_REG);
111 		return (DDI_FAILURE);
112 	}
113 
114 	if (is_pciex)
115 		max_cfg_size = PCIE_CONF_HDR_SIZE;
116 
117 	return (DDI_SUCCESS);
118 }
119 
120 void
121 pcitool_uninit(dev_info_t *dip)
122 {
123 	ddi_remove_minor_node(dip, PCI_MINOR_INTR);
124 	ddi_remove_minor_node(dip, PCI_MINOR_REG);
125 }
126 
127 /*ARGSUSED*/
128 static int
129 pcitool_set_intr(dev_info_t *dip, void *arg, int mode)
130 {
131 	ddi_intr_handle_impl_t info_hdl;
132 	pcitool_intr_set_t iset;
133 	uint32_t old_cpu;
134 	int ret, result;
135 	size_t copyinout_size;
136 	int rval = SUCCESS;
137 	apic_get_type_t type_info;
138 
139 	/* Version 1 of pcitool_intr_set_t doesn't have flags. */
140 	copyinout_size = (size_t)&iset.flags - (size_t)&iset;
141 
142 	if (ddi_copyin(arg, &iset, copyinout_size, mode) != DDI_SUCCESS)
143 		return (EFAULT);
144 
145 	switch (iset.user_version) {
146 	case PCITOOL_V1:
147 		break;
148 
149 	case PCITOOL_V2:
150 		copyinout_size = sizeof (pcitool_intr_set_t);
151 		if (ddi_copyin(arg, &iset, copyinout_size, mode) != DDI_SUCCESS)
152 			return (EFAULT);
153 		break;
154 
155 	default:
156 		iset.status = PCITOOL_OUT_OF_RANGE;
157 		rval = ENOTSUP;
158 		goto done_set_intr;
159 	}
160 
161 	if (iset.flags & PCITOOL_INTR_FLAG_SET_MSI) {
162 		rval = ENOTSUP;
163 		iset.status = PCITOOL_IO_ERROR;
164 		goto done_set_intr;
165 	}
166 
167 	info_hdl.ih_private = &type_info;
168 
169 	if ((*psm_intr_ops)(NULL, &info_hdl,
170 	    PSM_INTR_OP_APIC_TYPE, NULL) != PSM_SUCCESS) {
171 		rval = ENOTSUP;
172 		iset.status = PCITOOL_IO_ERROR;
173 		goto done_set_intr;
174 	}
175 
176 	if (strcmp(type_info.avgi_type, APIC_APIX_NAME) == 0) {
177 		if (iset.old_cpu > type_info.avgi_num_cpu) {
178 			rval = EINVAL;
179 			iset.status = PCITOOL_INVALID_CPUID;
180 			goto done_set_intr;
181 		}
182 		old_cpu = iset.old_cpu;
183 	} else {
184 		if ((old_cpu =
185 		    pci_get_cpu_from_vecirq(iset.ino, IS_VEC)) == -1) {
186 			iset.status = PCITOOL_IO_ERROR;
187 			rval = EINVAL;
188 			goto done_set_intr;
189 		}
190 	}
191 
192 	if (iset.ino > type_info.avgi_num_intr) {
193 		rval = EINVAL;
194 		iset.status = PCITOOL_INVALID_INO;
195 		goto done_set_intr;
196 	}
197 
198 	iset.status = PCITOOL_SUCCESS;
199 
200 	old_cpu &= ~PSMGI_CPU_USER_BOUND;
201 
202 	/*
203 	 * For this locally-declared and used handle, ih_private will contain a
204 	 * CPU value, not an ihdl_plat_t as used for global interrupt handling.
205 	 */
206 	if (strcmp(type_info.avgi_type, APIC_APIX_NAME) == 0) {
207 		info_hdl.ih_vector = APIX_VIRTVECTOR(old_cpu, iset.ino);
208 	} else {
209 		info_hdl.ih_vector = iset.ino;
210 	}
211 	info_hdl.ih_private = (void *)(uintptr_t)iset.cpu_id;
212 	info_hdl.ih_flags = PSMGI_INTRBY_VEC;
213 	if (pcitool_debug)
214 		prom_printf("user version:%d, flags:0x%x\n",
215 		    iset.user_version, iset.flags);
216 
217 	result = ENOTSUP;
218 	if ((iset.user_version >= PCITOOL_V2) &&
219 	    (iset.flags & PCITOOL_INTR_FLAG_SET_GROUP)) {
220 		ret = (*psm_intr_ops)(NULL, &info_hdl, PSM_INTR_OP_GRP_SET_CPU,
221 		    &result);
222 	} else {
223 		ret = (*psm_intr_ops)(NULL, &info_hdl, PSM_INTR_OP_SET_CPU,
224 		    &result);
225 	}
226 
227 	if (ret != PSM_SUCCESS) {
228 		switch (result) {
229 		case EIO:		/* Error making the change */
230 			rval = EIO;
231 			iset.status = PCITOOL_IO_ERROR;
232 			break;
233 		case ENXIO:		/* Couldn't convert vector to irq */
234 			rval = EINVAL;
235 			iset.status = PCITOOL_INVALID_INO;
236 			break;
237 		case EINVAL:		/* CPU out of range */
238 			rval = EINVAL;
239 			iset.status = PCITOOL_INVALID_CPUID;
240 			break;
241 		case ENOTSUP:		/* Requested PSM intr ops missing */
242 			rval = ENOTSUP;
243 			iset.status = PCITOOL_IO_ERROR;
244 			break;
245 		}
246 	}
247 
248 	/* Return original CPU. */
249 	iset.cpu_id = old_cpu;
250 
251 	/* Return new vector */
252 	if (strcmp(type_info.avgi_type, APIC_APIX_NAME) == 0) {
253 		iset.ino = APIX_VIRTVEC_VECTOR(info_hdl.ih_vector);
254 	}
255 
256 done_set_intr:
257 	iset.drvr_version = PCITOOL_VERSION;
258 	if (ddi_copyout(&iset, arg, copyinout_size, mode) != DDI_SUCCESS)
259 		rval = EFAULT;
260 	return (rval);
261 }
262 
263 
264 /* It is assumed that dip != NULL */
265 static void
266 pcitool_get_intr_dev_info(dev_info_t *dip, pcitool_intr_dev_t *devs)
267 {
268 	(void) strncpy(devs->driver_name,
269 	    ddi_driver_name(dip), MAXMODCONFNAME-2);
270 	devs->driver_name[MAXMODCONFNAME-1] = '\0';
271 	(void) ddi_pathname(dip, devs->path);
272 	devs->dev_inst = ddi_get_instance(dip);
273 }
274 
275 static int
276 pcitool_get_intr(dev_info_t *dip, void *arg, int mode)
277 {
278 	/* Array part isn't used here, but oh well... */
279 	pcitool_intr_get_t partial_iget;
280 	pcitool_intr_get_t *iget = &partial_iget;
281 	size_t	iget_kmem_alloc_size = 0;
282 	uint8_t num_devs_ret = 0;
283 	int copyout_rval;
284 	int rval = SUCCESS;
285 	int circ;
286 	int i;
287 	ddi_intr_handle_impl_t info_hdl;
288 	apic_get_intr_t intr_info;
289 	apic_get_type_t type_info;
290 
291 	/* Read in just the header part, no array section. */
292 	if (ddi_copyin(arg, &partial_iget, PCITOOL_IGET_SIZE(0), mode) !=
293 	    DDI_SUCCESS)
294 		return (EFAULT);
295 
296 	if (partial_iget.flags & PCITOOL_INTR_FLAG_GET_MSI) {
297 		partial_iget.status = PCITOOL_IO_ERROR;
298 		partial_iget.num_devs_ret = 0;
299 		rval = ENOTSUP;
300 		goto done_get_intr;
301 	}
302 
303 	info_hdl.ih_private = &type_info;
304 
305 	if ((*psm_intr_ops)(NULL, &info_hdl,
306 	    PSM_INTR_OP_APIC_TYPE, NULL) != PSM_SUCCESS) {
307 		iget->status = PCITOOL_IO_ERROR;
308 		iget->num_devs_ret = 0;
309 		rval = EINVAL;
310 		goto done_get_intr;
311 	}
312 
313 	if (strcmp(type_info.avgi_type, APIC_APIX_NAME) == 0) {
314 		if (partial_iget.cpu_id > type_info.avgi_num_cpu) {
315 			partial_iget.status = PCITOOL_INVALID_CPUID;
316 			partial_iget.num_devs_ret = 0;
317 			rval = EINVAL;
318 			goto done_get_intr;
319 		}
320 	}
321 
322 	/* Validate argument. */
323 	if ((partial_iget.ino & APIX_VIRTVEC_VECMASK) >
324 	    type_info.avgi_num_intr) {
325 		partial_iget.status = PCITOOL_INVALID_INO;
326 		partial_iget.num_devs_ret = 0;
327 		rval = EINVAL;
328 		goto done_get_intr;
329 	}
330 
331 	num_devs_ret = partial_iget.num_devs_ret;
332 	intr_info.avgi_dip_list = NULL;
333 	intr_info.avgi_req_flags =
334 	    PSMGI_REQ_CPUID | PSMGI_REQ_NUM_DEVS | PSMGI_INTRBY_VEC;
335 	/*
336 	 * For this locally-declared and used handle, ih_private will contain a
337 	 * pointer to apic_get_intr_t, not an ihdl_plat_t as used for
338 	 * global interrupt handling.
339 	 */
340 	info_hdl.ih_private = &intr_info;
341 
342 	if (strcmp(type_info.avgi_type, APIC_APIX_NAME) == 0) {
343 		info_hdl.ih_vector =
344 		    APIX_VIRTVECTOR(partial_iget.cpu_id, partial_iget.ino);
345 	} else {
346 		info_hdl.ih_vector = partial_iget.ino;
347 	}
348 
349 	/* Caller wants device information returned. */
350 	if (num_devs_ret > 0) {
351 
352 		intr_info.avgi_req_flags |= PSMGI_REQ_GET_DEVS;
353 
354 		/*
355 		 * Allocate room.
356 		 * If num_devs_ret == 0 iget remains pointing to partial_iget.
357 		 */
358 		iget_kmem_alloc_size = PCITOOL_IGET_SIZE(num_devs_ret);
359 		iget = kmem_alloc(iget_kmem_alloc_size, KM_SLEEP);
360 
361 		/* Read in whole structure to verify there's room. */
362 		if (ddi_copyin(arg, iget, iget_kmem_alloc_size, mode) !=
363 		    SUCCESS) {
364 
365 			/* Be consistent and just return EFAULT here. */
366 			kmem_free(iget, iget_kmem_alloc_size);
367 
368 			return (EFAULT);
369 		}
370 	}
371 
372 	bzero(iget, PCITOOL_IGET_SIZE(num_devs_ret));
373 	iget->ino = info_hdl.ih_vector;
374 
375 	/*
376 	 * Lock device tree branch from the pci root nexus on down if info will
377 	 * be extracted from dips returned from the tree.
378 	 */
379 	if (intr_info.avgi_req_flags & PSMGI_REQ_GET_DEVS) {
380 		ndi_devi_enter(dip, &circ);
381 	}
382 
383 	/* Call psm_intr_ops(PSM_INTR_OP_GET_INTR) to get information. */
384 	if ((rval = (*psm_intr_ops)(NULL, &info_hdl,
385 	    PSM_INTR_OP_GET_INTR, NULL)) != PSM_SUCCESS) {
386 		iget->status = PCITOOL_IO_ERROR;
387 		iget->num_devs_ret = 0;
388 		rval = EINVAL;
389 		goto done_get_intr;
390 	}
391 
392 	/*
393 	 * Fill in the pcitool_intr_get_t to be returned,
394 	 * with the CPU, num_devs_ret and num_devs.
395 	 */
396 	if (intr_info.avgi_cpu_id == IRQ_UNBOUND ||
397 	    intr_info.avgi_cpu_id == IRQ_UNINIT)
398 		iget->cpu_id = 0;
399 	else
400 		iget->cpu_id = intr_info.avgi_cpu_id & ~PSMGI_CPU_USER_BOUND;
401 
402 	/* Number of devices returned by apic. */
403 	iget->num_devs = intr_info.avgi_num_devs;
404 
405 	/* Device info was returned. */
406 	if (intr_info.avgi_req_flags & PSMGI_REQ_GET_DEVS) {
407 
408 		/*
409 		 * num devs returned is num devs ret by apic,
410 		 * space permitting.
411 		 */
412 		iget->num_devs_ret = min(num_devs_ret, intr_info.avgi_num_devs);
413 
414 		/*
415 		 * Loop thru list of dips and extract driver, name and instance.
416 		 * Fill in the pcitool_intr_dev_t's with this info.
417 		 */
418 		for (i = 0; i < iget->num_devs_ret; i++)
419 			pcitool_get_intr_dev_info(intr_info.avgi_dip_list[i],
420 			    &iget->dev[i]);
421 
422 		/* Free kmem_alloc'ed memory of the apic_get_intr_t */
423 		kmem_free(intr_info.avgi_dip_list,
424 		    intr_info.avgi_num_devs * sizeof (dev_info_t *));
425 	}
426 
427 done_get_intr:
428 
429 	if (intr_info.avgi_req_flags & PSMGI_REQ_GET_DEVS) {
430 		ndi_devi_exit(dip, circ);
431 	}
432 
433 	iget->drvr_version = PCITOOL_VERSION;
434 	copyout_rval = ddi_copyout(iget, arg,
435 	    PCITOOL_IGET_SIZE(num_devs_ret), mode);
436 
437 	if (iget_kmem_alloc_size > 0)
438 		kmem_free(iget, iget_kmem_alloc_size);
439 
440 	if (copyout_rval != DDI_SUCCESS)
441 		rval = EFAULT;
442 
443 	return (rval);
444 }
445 
446 /*ARGSUSED*/
447 static int
448 pcitool_intr_info(dev_info_t *dip, void *arg, int mode)
449 {
450 	pcitool_intr_info_t intr_info;
451 	ddi_intr_handle_impl_t info_hdl;
452 	int rval = SUCCESS;
453 	apic_get_type_t type_info;
454 
455 	/* If we need user_version, and to ret same user version as passed in */
456 	if (ddi_copyin(arg, &intr_info, sizeof (pcitool_intr_info_t), mode) !=
457 	    DDI_SUCCESS) {
458 		if (pcitool_debug)
459 			prom_printf("Error reading arguments\n");
460 		return (EFAULT);
461 	}
462 
463 	if (intr_info.flags & PCITOOL_INTR_FLAG_GET_MSI)
464 		return (ENOTSUP);
465 
466 	info_hdl.ih_private = &type_info;
467 
468 	/* For UPPC systems, psm_intr_ops has no entry for APIC_TYPE. */
469 	if ((rval = (*psm_intr_ops)(NULL, &info_hdl,
470 	    PSM_INTR_OP_APIC_TYPE, NULL)) != PSM_SUCCESS) {
471 		intr_info.ctlr_type = PCITOOL_CTLR_TYPE_UPPC;
472 		intr_info.ctlr_version = 0;
473 		intr_info.num_intr = APIC_MAX_VECTOR;
474 	} else {
475 		intr_info.ctlr_version = (uint32_t)info_hdl.ih_ver;
476 		intr_info.num_cpu = type_info.avgi_num_cpu;
477 		if (strcmp(type_info.avgi_type,
478 		    APIC_PCPLUSMP_NAME) == 0) {
479 			intr_info.ctlr_type = PCITOOL_CTLR_TYPE_PCPLUSMP;
480 			intr_info.num_intr = type_info.avgi_num_intr;
481 		} else if (strcmp(type_info.avgi_type,
482 		    APIC_APIX_NAME) == 0) {
483 			intr_info.ctlr_type = PCITOOL_CTLR_TYPE_APIX;
484 			intr_info.num_intr = type_info.avgi_num_intr;
485 		} else {
486 			intr_info.ctlr_type = PCITOOL_CTLR_TYPE_UNKNOWN;
487 			intr_info.num_intr = APIC_MAX_VECTOR;
488 		}
489 	}
490 
491 	intr_info.drvr_version = PCITOOL_VERSION;
492 	if (ddi_copyout(&intr_info, arg, sizeof (pcitool_intr_info_t), mode) !=
493 	    DDI_SUCCESS) {
494 		if (pcitool_debug)
495 			prom_printf("Error returning arguments.\n");
496 		rval = EFAULT;
497 	}
498 
499 	return (rval);
500 }
501 
502 
503 
504 /*
505  * Main function for handling interrupt CPU binding requests and queries.
506  * Need to implement later
507  */
508 int
509 pcitool_intr_admn(dev_info_t *dip, void *arg, int cmd, int mode)
510 {
511 	int rval;
512 
513 	switch (cmd) {
514 
515 	/* Associate a new CPU with a given vector */
516 	case PCITOOL_DEVICE_SET_INTR:
517 		rval = pcitool_set_intr(dip, arg, mode);
518 		break;
519 
520 	case PCITOOL_DEVICE_GET_INTR:
521 		rval = pcitool_get_intr(dip, arg, mode);
522 		break;
523 
524 	case PCITOOL_SYSTEM_INTR_INFO:
525 		rval = pcitool_intr_info(dip, arg, mode);
526 		break;
527 
528 	default:
529 		rval = ENOTSUP;
530 	}
531 
532 	return (rval);
533 }
534 
535 /*
536  * Perform register accesses on the nexus device itself.
537  * No explicit PCI nexus device for X86, so not applicable.
538  */
539 
540 /*ARGSUSED*/
541 int
542 pcitool_bus_reg_ops(dev_info_t *dip, void *arg, int cmd, int mode)
543 {
544 	return (ENOTSUP);
545 }
546 
547 /* Swap endianness. */
548 static uint64_t
549 pcitool_swap_endian(uint64_t data, int size)
550 {
551 	typedef union {
552 		uint64_t data64;
553 		uint8_t data8[8];
554 	} data_split_t;
555 
556 	data_split_t orig_data;
557 	data_split_t returned_data;
558 	int i;
559 
560 	orig_data.data64 = data;
561 	returned_data.data64 = 0;
562 
563 	for (i = 0; i < size; i++) {
564 		returned_data.data8[i] = orig_data.data8[size - 1 - i];
565 	}
566 
567 	return (returned_data.data64);
568 }
569 
570 /*
571  * A note about ontrap handling:
572  *
573  * X86 systems on which this module was tested return FFs instead of bus errors
574  * when accessing devices with invalid addresses.  Ontrap handling, which
575  * gracefully handles kernel bus errors, is installed anyway for I/O and mem
576  * space accessing (not for pci config space), in case future X86 platforms
577  * require it.
578  */
579 
580 /* Access device.  prg is modified. */
581 static int
582 pcitool_cfg_access(pcitool_reg_t *prg, boolean_t write_flag,
583     boolean_t io_access)
584 {
585 	int size = PCITOOL_ACC_ATTR_SIZE(prg->acc_attr);
586 	boolean_t big_endian = PCITOOL_ACC_IS_BIG_ENDIAN(prg->acc_attr);
587 	int rval = SUCCESS;
588 	uint64_t local_data;
589 	pci_cfgacc_req_t req;
590 	uint32_t max_offset;
591 
592 	if ((size <= 0) || (size > 8) || !ISP2(size)) {
593 		prg->status = PCITOOL_INVALID_SIZE;
594 		return (ENOTSUP);
595 	}
596 
597 	/*
598 	 * NOTE: there is no way to verify whether or not the address is
599 	 * valid other than that it is within the maximum offset.  The
600 	 * put functions return void and the get functions return -1 on error.
601 	 */
602 
603 	if (io_access)
604 		max_offset = pci_iocfg_max_offset;
605 	else
606 		max_offset = 0xFFF;
607 	if (prg->offset + size - 1 > max_offset) {
608 		prg->status = PCITOOL_INVALID_ADDRESS;
609 		return (ENOTSUP);
610 	}
611 
612 	prg->status = PCITOOL_SUCCESS;
613 
614 	req.rcdip = NULL;
615 	req.bdf = PCI_GETBDF(prg->bus_no, prg->dev_no, prg->func_no);
616 	req.offset = prg->offset;
617 	req.size = size;
618 	req.write = write_flag;
619 	req.ioacc = io_access;
620 	if (write_flag) {
621 		if (big_endian) {
622 			local_data = pcitool_swap_endian(prg->data, size);
623 		} else {
624 			local_data = prg->data;
625 		}
626 		VAL64(&req) = local_data;
627 		pci_cfgacc_acc(&req);
628 	} else {
629 		pci_cfgacc_acc(&req);
630 		switch (size) {
631 		case 1:
632 			local_data = VAL8(&req);
633 			break;
634 		case 2:
635 			local_data = VAL16(&req);
636 			break;
637 		case 4:
638 			local_data = VAL32(&req);
639 			break;
640 		case 8:
641 			local_data = VAL64(&req);
642 			break;
643 		default:
644 			prg->status = PCITOOL_INVALID_ADDRESS;
645 			return (ENOTSUP);
646 		}
647 		if (big_endian) {
648 			prg->data =
649 			    pcitool_swap_endian(local_data, size);
650 		} else {
651 			prg->data = local_data;
652 		}
653 	}
654 	/*
655 	 * Check if legacy I/O config access is used, in which case the valid
656 	 * range varies with the I/O space mechanism used.
657 	 */
658 	if (req.ioacc && (prg->offset + size - 1 > pci_iocfg_max_offset)) {
659 		prg->status = PCITOOL_INVALID_ADDRESS;
660 		return (ENOTSUP);
661 	}
662 
663 	/* Set phys_addr only if MMIO is used */
664 	prg->phys_addr = 0;
665 	if (!req.ioacc && mcfg_mem_base != 0) {
666 		prg->phys_addr = mcfg_mem_base + prg->offset +
667 		    ((prg->bus_no << PCIEX_REG_BUS_SHIFT) |
668 		    (prg->dev_no << PCIEX_REG_DEV_SHIFT) |
669 		    (prg->func_no << PCIEX_REG_FUNC_SHIFT));
670 	}
671 
672 	return (rval);
673 }
674 
675 static int
676 pcitool_io_access(pcitool_reg_t *prg, boolean_t write_flag)
677 {
678 	int port = (int)prg->phys_addr;
679 	size_t size = PCITOOL_ACC_ATTR_SIZE(prg->acc_attr);
680 	boolean_t big_endian = PCITOOL_ACC_IS_BIG_ENDIAN(prg->acc_attr);
681 	volatile int rval = SUCCESS;
682 	on_trap_data_t otd;
683 	volatile uint64_t local_data;
684 
685 
686 	/*
687 	 * on_trap works like setjmp.
688 	 *
689 	 * A non-zero return here means on_trap has returned from an error.
690 	 *
691 	 * A zero return here means that on_trap has just returned from setup.
692 	 */
693 	if (on_trap(&otd, OT_DATA_ACCESS)) {
694 		no_trap();
695 		if (pcitool_debug)
696 			prom_printf(
697 			    "pcitool_io_access: on_trap caught an error...\n");
698 		prg->status = PCITOOL_INVALID_ADDRESS;
699 		return (EFAULT);
700 	}
701 
702 	if (write_flag) {
703 
704 		if (big_endian) {
705 			local_data = pcitool_swap_endian(prg->data, size);
706 		} else {
707 			local_data = prg->data;
708 		}
709 
710 		if (pcitool_debug)
711 			prom_printf("Writing %ld byte(s) to port 0x%x\n",
712 			    size, port);
713 
714 		switch (size) {
715 		case 1:
716 			outb(port, (uint8_t)local_data);
717 			break;
718 		case 2:
719 			outw(port, (uint16_t)local_data);
720 			break;
721 		case 4:
722 			outl(port, (uint32_t)local_data);
723 			break;
724 		default:
725 			rval = ENOTSUP;
726 			prg->status = PCITOOL_INVALID_SIZE;
727 			break;
728 		}
729 	} else {
730 		if (pcitool_debug)
731 			prom_printf("Reading %ld byte(s) from port 0x%x\n",
732 			    size, port);
733 
734 		switch (size) {
735 		case 1:
736 			local_data = inb(port);
737 			break;
738 		case 2:
739 			local_data = inw(port);
740 			break;
741 		case 4:
742 			local_data = inl(port);
743 			break;
744 		default:
745 			rval = ENOTSUP;
746 			prg->status = PCITOOL_INVALID_SIZE;
747 			break;
748 		}
749 
750 		if (rval == SUCCESS) {
751 			if (big_endian) {
752 				prg->data =
753 				    pcitool_swap_endian(local_data, size);
754 			} else {
755 				prg->data = local_data;
756 			}
757 		}
758 	}
759 
760 	no_trap();
761 	return (rval);
762 }
763 
764 static int
765 pcitool_mem_access(pcitool_reg_t *prg, uint64_t virt_addr, boolean_t write_flag)
766 {
767 	size_t size = PCITOOL_ACC_ATTR_SIZE(prg->acc_attr);
768 	boolean_t big_endian = PCITOOL_ACC_IS_BIG_ENDIAN(prg->acc_attr);
769 	volatile int rval = DDI_SUCCESS;
770 	on_trap_data_t otd;
771 	volatile uint64_t local_data;
772 
773 	/*
774 	 * on_trap works like setjmp.
775 	 *
776 	 * A non-zero return here means on_trap has returned from an error.
777 	 *
778 	 * A zero return here means that on_trap has just returned from setup.
779 	 */
780 	if (on_trap(&otd, OT_DATA_ACCESS)) {
781 		no_trap();
782 		if (pcitool_debug)
783 			prom_printf(
784 			    "pcitool_mem_access: on_trap caught an error...\n");
785 		prg->status = PCITOOL_INVALID_ADDRESS;
786 		return (EFAULT);
787 	}
788 
789 	if (write_flag) {
790 
791 		if (big_endian) {
792 			local_data = pcitool_swap_endian(prg->data, size);
793 		} else {
794 			local_data = prg->data;
795 		}
796 
797 		switch (size) {
798 		case 1:
799 			*((uint8_t *)(uintptr_t)virt_addr) = local_data;
800 			break;
801 		case 2:
802 			*((uint16_t *)(uintptr_t)virt_addr) = local_data;
803 			break;
804 		case 4:
805 			*((uint32_t *)(uintptr_t)virt_addr) = local_data;
806 			break;
807 		case 8:
808 			*((uint64_t *)(uintptr_t)virt_addr) = local_data;
809 			break;
810 		default:
811 			rval = ENOTSUP;
812 			prg->status = PCITOOL_INVALID_SIZE;
813 			break;
814 		}
815 	} else {
816 		switch (size) {
817 		case 1:
818 			local_data = *((uint8_t *)(uintptr_t)virt_addr);
819 			break;
820 		case 2:
821 			local_data = *((uint16_t *)(uintptr_t)virt_addr);
822 			break;
823 		case 4:
824 			local_data = *((uint32_t *)(uintptr_t)virt_addr);
825 			break;
826 		case 8:
827 			local_data = *((uint64_t *)(uintptr_t)virt_addr);
828 			break;
829 		default:
830 			rval = ENOTSUP;
831 			prg->status = PCITOOL_INVALID_SIZE;
832 			break;
833 		}
834 
835 		if (rval == SUCCESS) {
836 			if (big_endian) {
837 				prg->data =
838 				    pcitool_swap_endian(local_data, size);
839 			} else {
840 				prg->data = local_data;
841 			}
842 		}
843 	}
844 
845 	no_trap();
846 	return (rval);
847 }
848 
849 /*
850  * Map up to 2 pages which contain the address we want to access.
851  *
852  * Mapping should span no more than 8 bytes.  With X86 it is possible for an
853  * 8 byte value to start on a 4 byte boundary, so it can cross a page boundary.
854  * We'll never have to map more than two pages.
855  */
856 
857 static uint64_t
858 pcitool_map(uint64_t phys_addr, size_t size, size_t *num_pages)
859 {
860 
861 	uint64_t page_base = phys_addr & ~MMU_PAGEOFFSET;
862 	uint64_t offset = phys_addr & MMU_PAGEOFFSET;
863 	void *virt_base;
864 	uint64_t returned_addr;
865 	pfn_t pfn;
866 
867 	if (pcitool_debug)
868 		prom_printf("pcitool_map: Called with PA:0x%p\n",
869 		    (void *)(uintptr_t)phys_addr);
870 
871 	*num_pages = 1;
872 
873 	/* Desired mapping would span more than two pages. */
874 	if ((offset + size) > (MMU_PAGESIZE * 2)) {
875 		if (pcitool_debug)
876 			prom_printf("boundary violation: "
877 			    "offset:0x%" PRIx64 ", size:%ld, pagesize:0x%lx\n",
878 			    offset, (uintptr_t)size, (uintptr_t)MMU_PAGESIZE);
879 		return (0);
880 
881 	} else if ((offset + size) > MMU_PAGESIZE) {
882 		(*num_pages)++;
883 	}
884 
885 	/* Get page(s) of virtual space. */
886 	virt_base = vmem_alloc(heap_arena, ptob(*num_pages), VM_NOSLEEP);
887 	if (virt_base == NULL) {
888 		if (pcitool_debug)
889 			prom_printf("Couldn't get virtual base address.\n");
890 		return (0);
891 	}
892 
893 	if (pcitool_debug)
894 		prom_printf("Got base virtual address:0x%p\n", virt_base);
895 
896 #ifdef __xpv
897 	/*
898 	 * We should only get here if we are dom0.
899 	 * We're using a real device so we need to translate the MA to a PFN.
900 	 */
901 	ASSERT(DOMAIN_IS_INITDOMAIN(xen_info));
902 	pfn = xen_assign_pfn(mmu_btop(page_base));
903 #else
904 	pfn = btop(page_base);
905 #endif
906 
907 	/* Now map the allocated virtual space to the physical address. */
908 	hat_devload(kas.a_hat, virt_base, mmu_ptob(*num_pages), pfn,
909 	    PROT_READ | PROT_WRITE | HAT_STRICTORDER,
910 	    HAT_LOAD_LOCK);
911 
912 	returned_addr = ((uintptr_t)(virt_base)) + offset;
913 
914 	if (pcitool_debug)
915 		prom_printf("pcitool_map: returning VA:0x%p\n",
916 		    (void *)(uintptr_t)returned_addr);
917 
918 	return (returned_addr);
919 }
920 
921 /* Unmap the mapped page(s). */
922 static void
923 pcitool_unmap(uint64_t virt_addr, size_t num_pages)
924 {
925 	void *base_virt_addr = (void *)(uintptr_t)(virt_addr & ~MMU_PAGEOFFSET);
926 
927 	hat_unload(kas.a_hat, base_virt_addr, ptob(num_pages),
928 	    HAT_UNLOAD_UNLOCK);
929 	vmem_free(heap_arena, base_virt_addr, ptob(num_pages));
930 }
931 
932 
933 /* Perform register accesses on PCI leaf devices. */
934 /*ARGSUSED*/
935 int
936 pcitool_dev_reg_ops(dev_info_t *dip, void *arg, int cmd, int mode)
937 {
938 	boolean_t	write_flag = B_FALSE;
939 	boolean_t	io_access = B_TRUE;
940 	int		rval = 0;
941 	pcitool_reg_t	prg;
942 	uint8_t		size;
943 
944 	uint64_t	base_addr;
945 	uint64_t	virt_addr;
946 	size_t		num_virt_pages;
947 
948 	switch (cmd) {
949 	case (PCITOOL_DEVICE_SET_REG):
950 		write_flag = B_TRUE;
951 
952 	/*FALLTHRU*/
953 	case (PCITOOL_DEVICE_GET_REG):
954 		if (pcitool_debug)
955 			prom_printf("pci_dev_reg_ops set/get reg\n");
956 		if (ddi_copyin(arg, &prg, sizeof (pcitool_reg_t), mode) !=
957 		    DDI_SUCCESS) {
958 			if (pcitool_debug)
959 				prom_printf("Error reading arguments\n");
960 			return (EFAULT);
961 		}
962 
963 		if (prg.barnum >= (sizeof (pci_bars) / sizeof (pci_bars[0]))) {
964 			prg.status = PCITOOL_OUT_OF_RANGE;
965 			rval = EINVAL;
966 			goto done_reg;
967 		}
968 
969 		if (pcitool_debug)
970 			prom_printf("raw bus:0x%x, dev:0x%x, func:0x%x\n",
971 			    prg.bus_no, prg.dev_no, prg.func_no);
972 		/* Validate address arguments of bus / dev / func */
973 		if (((prg.bus_no &
974 		    (PCI_REG_BUS_M >> PCI_REG_BUS_SHIFT)) !=
975 		    prg.bus_no) ||
976 		    ((prg.dev_no &
977 		    (PCI_REG_DEV_M >> PCI_REG_DEV_SHIFT)) !=
978 		    prg.dev_no) ||
979 		    ((prg.func_no &
980 		    (PCI_REG_FUNC_M >> PCI_REG_FUNC_SHIFT)) !=
981 		    prg.func_no)) {
982 			prg.status = PCITOOL_INVALID_ADDRESS;
983 			rval = EINVAL;
984 			goto done_reg;
985 		}
986 
987 		size = PCITOOL_ACC_ATTR_SIZE(prg.acc_attr);
988 
989 		/* Proper config space desired. */
990 		if (prg.barnum == 0) {
991 
992 			if (pcitool_debug)
993 				prom_printf(
994 				    "config access: offset:0x%" PRIx64 ", "
995 				    "phys_addr:0x%" PRIx64 "\n",
996 				    prg.offset, prg.phys_addr);
997 
998 			if (prg.offset >= max_cfg_size) {
999 				prg.status = PCITOOL_OUT_OF_RANGE;
1000 				rval = EINVAL;
1001 				goto done_reg;
1002 			}
1003 			if (max_cfg_size == PCIE_CONF_HDR_SIZE)
1004 				io_access = B_FALSE;
1005 
1006 			rval = pcitool_cfg_access(&prg, write_flag, io_access);
1007 			if (pcitool_debug)
1008 				prom_printf(
1009 				    "config access: data:0x%" PRIx64 "\n",
1010 				    prg.data);
1011 
1012 		/* IO/ MEM/ MEM64 space. */
1013 		} else {
1014 
1015 			pcitool_reg_t	prg2;
1016 			bcopy(&prg, &prg2, sizeof (pcitool_reg_t));
1017 
1018 			/*
1019 			 * Translate BAR number into offset of the BAR in
1020 			 * the device's config space.
1021 			 */
1022 			prg2.offset = pci_bars[prg2.barnum];
1023 			prg2.acc_attr =
1024 			    PCITOOL_ACC_ATTR_SIZE_4 | PCITOOL_ACC_ATTR_ENDN_LTL;
1025 
1026 			if (pcitool_debug)
1027 				prom_printf(
1028 				    "barnum:%d, bar_offset:0x%" PRIx64 "\n",
1029 				    prg2.barnum, prg2.offset);
1030 			/*
1031 			 * Get Bus Address Register (BAR) from config space.
1032 			 * prg2.offset is the offset into config space of the
1033 			 * BAR desired.  prg.status is modified on error.
1034 			 */
1035 			rval = pcitool_cfg_access(&prg2, B_FALSE, B_TRUE);
1036 			if (rval != SUCCESS) {
1037 				if (pcitool_debug)
1038 					prom_printf("BAR access failed\n");
1039 				prg.status = prg2.status;
1040 				goto done_reg;
1041 			}
1042 			/*
1043 			 * Reference proper PCI space based on the BAR.
1044 			 * If 64 bit MEM space, need to load other half of the
1045 			 * BAR first.
1046 			 */
1047 
1048 			if (pcitool_debug)
1049 				prom_printf("bar returned is 0x%" PRIx64 "\n",
1050 				    prg2.data);
1051 			if (!prg2.data) {
1052 				if (pcitool_debug)
1053 					prom_printf("BAR data == 0\n");
1054 				rval = EINVAL;
1055 				prg.status = PCITOOL_INVALID_ADDRESS;
1056 				goto done_reg;
1057 			}
1058 			if (prg2.data == 0xffffffff) {
1059 				if (pcitool_debug)
1060 					prom_printf("BAR data == -1\n");
1061 				rval = EINVAL;
1062 				prg.status = PCITOOL_INVALID_ADDRESS;
1063 				goto done_reg;
1064 			}
1065 
1066 			/*
1067 			 * BAR has bits saying this space is IO space, unless
1068 			 * this is the ROM address register.
1069 			 */
1070 			if (((PCI_BASE_SPACE_M & prg2.data) ==
1071 			    PCI_BASE_SPACE_IO) &&
1072 			    (prg2.offset != PCI_CONF_ROM)) {
1073 				if (pcitool_debug)
1074 					prom_printf("IO space\n");
1075 
1076 				prg2.data &= PCI_BASE_IO_ADDR_M;
1077 				prg.phys_addr = prg2.data + prg.offset;
1078 
1079 				rval = pcitool_io_access(&prg, write_flag);
1080 				if ((rval != SUCCESS) && (pcitool_debug))
1081 					prom_printf("IO access failed\n");
1082 
1083 				goto done_reg;
1084 
1085 
1086 			/*
1087 			 * BAR has bits saying this space is 64 bit memory
1088 			 * space, unless this is the ROM address register.
1089 			 *
1090 			 * The 64 bit address stored in two BAR cells is not
1091 			 * necessarily aligned on an 8-byte boundary.
1092 			 * Need to keep the first 4 bytes read,
1093 			 * and do a separate read of the high 4 bytes.
1094 			 */
1095 
1096 			} else if ((PCI_BASE_TYPE_ALL & prg2.data) &&
1097 			    (prg2.offset != PCI_CONF_ROM)) {
1098 
1099 				uint32_t low_bytes =
1100 				    (uint32_t)(prg2.data & ~PCI_BASE_TYPE_ALL);
1101 
1102 				/*
1103 				 * Don't try to read the next 4 bytes
1104 				 * past the end of BARs.
1105 				 */
1106 				if (prg2.offset >= PCI_CONF_BASE5) {
1107 					prg.status = PCITOOL_OUT_OF_RANGE;
1108 					rval = EIO;
1109 					goto done_reg;
1110 				}
1111 
1112 				/*
1113 				 * Access device.
1114 				 * prg2.status is modified on error.
1115 				 */
1116 				prg2.offset += 4;
1117 				rval = pcitool_cfg_access(&prg2,
1118 				    B_FALSE, B_TRUE);
1119 				if (rval != SUCCESS) {
1120 					prg.status = prg2.status;
1121 					goto done_reg;
1122 				}
1123 
1124 				if (prg2.data == 0xffffffff) {
1125 					prg.status = PCITOOL_INVALID_ADDRESS;
1126 					prg.status = EFAULT;
1127 					goto done_reg;
1128 				}
1129 
1130 				prg2.data = (prg2.data << 32) + low_bytes;
1131 				if (pcitool_debug)
1132 					prom_printf(
1133 					    "64 bit mem space.  "
1134 					    "64-bit bar is 0x%" PRIx64 "\n",
1135 					    prg2.data);
1136 
1137 			/* Mem32 space, including ROM */
1138 			} else {
1139 
1140 				if (prg2.offset == PCI_CONF_ROM) {
1141 					if (pcitool_debug)
1142 						prom_printf(
1143 						    "Additional ROM "
1144 						    "checking\n");
1145 					/* Can't write to ROM */
1146 					if (write_flag) {
1147 						prg.status = PCITOOL_ROM_WRITE;
1148 						rval = EIO;
1149 						goto done_reg;
1150 
1151 					/* ROM disabled for reading */
1152 					} else if (!(prg2.data & 0x00000001)) {
1153 						prg.status =
1154 						    PCITOOL_ROM_DISABLED;
1155 						rval = EIO;
1156 						goto done_reg;
1157 					}
1158 				}
1159 
1160 				if (pcitool_debug)
1161 					prom_printf("32 bit mem space\n");
1162 			}
1163 
1164 			/* Common code for all IO/MEM range spaces. */
1165 
1166 			base_addr = prg2.data;
1167 			if (pcitool_debug)
1168 				prom_printf(
1169 				    "addr portion of bar is 0x%" PRIx64 ", "
1170 				    "base=0x%" PRIx64 ", "
1171 				    "offset:0x%" PRIx64 "\n",
1172 				    prg2.data, base_addr, prg.offset);
1173 			/*
1174 			 * Use offset provided by caller to index into
1175 			 * desired space, then access.
1176 			 * Note that prg.status is modified on error.
1177 			 */
1178 			prg.phys_addr = base_addr + prg.offset;
1179 
1180 			virt_addr = pcitool_map(prg.phys_addr, size,
1181 			    &num_virt_pages);
1182 			if (virt_addr == 0) {
1183 				prg.status = PCITOOL_IO_ERROR;
1184 				rval = EIO;
1185 				goto done_reg;
1186 			}
1187 
1188 			rval = pcitool_mem_access(&prg, virt_addr, write_flag);
1189 			pcitool_unmap(virt_addr, num_virt_pages);
1190 		}
1191 done_reg:
1192 		prg.drvr_version = PCITOOL_VERSION;
1193 		if (ddi_copyout(&prg, arg, sizeof (pcitool_reg_t), mode) !=
1194 		    DDI_SUCCESS) {
1195 			if (pcitool_debug)
1196 				prom_printf("Error returning arguments.\n");
1197 			rval = EFAULT;
1198 		}
1199 		break;
1200 	default:
1201 		rval = ENOTTY;
1202 		break;
1203 	}
1204 	return (rval);
1205 }
1206