xref: /illumos-gate/usr/src/uts/sun4v/io/px/px_tools_4v.c (revision adecd3c68045d04dc367d30faf2eb5cac1f45d5a)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/sysmacros.h>
29 #include <sys/machsystm.h>
30 #include <sys/cpuvar.h>
31 #include <sys/ddi_implfuncs.h>
32 #include <sys/hypervisor_api.h>
33 #include <sys/hsvc.h>
34 #include <px_obj.h>
35 #include <sys/pci_tools.h>
36 #include <px_tools_var.h>
37 #include "px_lib4v.h"
38 #include <px_tools_ext.h>
39 
40 /*
41  * Delay needed to have a safe environment envelop any error which could
42  * surface.  The larger the number of bridges and switches, the larger the
43  * number needed here.
44  *
45  * Note: this is a workaround until a better solution is found.  While this
46  * number is high, given enough bridges and switches in the device path, this
47  * workaround can break.  Also, other PIL 15 interrupts besides the ones we are
48  * enveloping could delay processing of the interrupt we are trying to protect.
49  */
50 int pxtool_cfg_delay_usec = 2500;
51 int pxtool_iomem_delay_usec = 25000;
52 
53 /* Currently there is no way of getting this info from hypervisor. */
54 #define	INTERRUPT_MAPPING_ENTRIES	64
55 
56 /* Number of inos per root complex. */
57 int pxtool_num_inos = INTERRUPT_MAPPING_ENTRIES;
58 
59 /* Verify hypervisor version for DIAG functions ra2pa and hpriv. */
60 #define	PXTOOL_HYP_VER_UNINIT	0
61 #define	PXTOOL_HYP_VER_BAD	1
62 #define	PXTOOL_HYP_VER_OK	2
63 
64 static int pxtool_hyp_version = PXTOOL_HYP_VER_UNINIT;
65 
66 /* Swap endianness. */
67 static uint64_t
68 pxtool_swap_endian(uint64_t data, int size)
69 {
70 	typedef union {
71 		uint64_t data64;
72 		uint8_t data8[8];
73 	} data_split_t;
74 
75 	data_split_t orig_data;
76 	data_split_t returned_data;
77 	int i;
78 
79 	orig_data.data64 = data;
80 	returned_data.data64 = 0;
81 
82 	for (i = 0; i < size; i++) {
83 		returned_data.data8[7 - i] = orig_data.data8[8 - size + i];
84 	}
85 
86 	return (returned_data.data64);
87 }
88 
89 static void
90 pxtool_validate_diag_hyp_svc(dev_info_t *dip, int *diag_svc_status_p)
91 {
92 	uint64_t pxtool_diag_maj_ver;
93 	uint64_t pxtool_diag_min_ver;
94 	int ret;
95 
96 	if (*diag_svc_status_p == PXTOOL_HYP_VER_UNINIT) {
97 
98 		*diag_svc_status_p = PXTOOL_HYP_VER_BAD;
99 
100 		/*
101 		 * Verify that hypervisor DIAG API has been
102 		 * negotiated (by unix).
103 		 */
104 		if ((ret = hsvc_version(HSVC_GROUP_DIAG,
105 		    &pxtool_diag_maj_ver, &pxtool_diag_min_ver)) != 0) {
106 			DBG(DBG_TOOLS, dip,
107 			    "diag hypervisor svc not negotiated: "
108 			    "grp:0x%lx, errno:%d\n", HSVC_GROUP_DIAG, ret);
109 
110 		} else if (pxtool_diag_maj_ver == 1) {
111 			/*
112 			 * Major version 1 is OK.
113 			 *
114 			 * Code maintainers: if the version changes, check for
115 			 * API changes in hv_ra2pa() and hv_hpriv() before
116 			 * accepting the new version.
117 			 */
118 			*diag_svc_status_p = PXTOOL_HYP_VER_OK;
119 
120 		} else {
121 			DBG(DBG_TOOLS, dip,
122 			    "diag hypervisor svc: bad major number: "
123 			    "grp:0x%lx, maj:0x%lx, min:0x%lx\n",
124 			    HSVC_GROUP_DIAG, pxtool_diag_maj_ver,
125 			    pxtool_diag_min_ver);
126 		}
127 	}
128 }
129 
130 static int
131 pxtool_phys_access(px_t *px_p, uintptr_t dev_addr,
132     uint64_t *data_p, boolean_t is_big_endian, boolean_t is_write)
133 {
134 	uint64_t rfunc, pfunc;
135 	uint64_t rdata_addr, pdata_addr;
136 	uint64_t to_addr, from_addr;
137 	uint64_t local_data;
138 	int rval;
139 	dev_info_t *dip = px_p->px_dip;
140 
141 	DBG(DBG_TOOLS, dip,
142 	    "pxtool_phys_access: dev_addr:0x%" PRIx64 "\n", dev_addr);
143 	DBG(DBG_TOOLS, dip, "    data_addr:0x%" PRIx64 ", is_write:%s\n",
144 	    data_p, (is_write ? "yes" : "no"));
145 
146 	if (pxtool_hyp_version != PXTOOL_HYP_VER_OK) {
147 		pxtool_validate_diag_hyp_svc(dip, &pxtool_hyp_version);
148 		if (pxtool_hyp_version != PXTOOL_HYP_VER_OK) {
149 			DBG(DBG_TOOLS, dip, "Couldn't validate diag hyp svc\n");
150 			return (EPERM);
151 		}
152 	}
153 
154 	if ((rfunc = va_to_pa((void *)px_phys_acc_4v))  == (uint64_t)-1) {
155 		DBG(DBG_TOOLS, dip, "Error getting real addr for function\n");
156 		return (EIO);
157 	}
158 
159 	if ((pfunc = hv_ra2pa(rfunc)) == -1) {
160 		DBG(DBG_TOOLS, dip, "Error getting phys addr for function\n");
161 		return (EIO);
162 	}
163 
164 	if ((rdata_addr = va_to_pa((void *)&local_data))  == (uint64_t)-1) {
165 		DBG(DBG_TOOLS, dip, "Error getting real addr for data_p\n");
166 		return (EIO);
167 	}
168 
169 	if ((pdata_addr = hv_ra2pa(rdata_addr)) == -1) {
170 		DBG(DBG_TOOLS, dip, "Error getting phys addr for data ptr\n");
171 		return (EIO);
172 	}
173 
174 	if (is_write) {
175 		to_addr = dev_addr;
176 		from_addr = pdata_addr;
177 
178 		if (is_big_endian)
179 			local_data = *data_p;
180 		else
181 			local_data =
182 			    pxtool_swap_endian(*data_p, sizeof (uint64_t));
183 	} else {
184 		to_addr = pdata_addr;
185 		from_addr = dev_addr;
186 	}
187 
188 	rval = hv_hpriv((void *)pfunc, from_addr, to_addr, NULL);
189 	switch (rval) {
190 	case H_ENOACCESS:	/* Returned by non-debug hypervisor. */
191 		rval = ENOTSUP;
192 		break;
193 	case H_EOK:
194 		rval = SUCCESS;
195 		break;
196 	default:
197 		rval = EIO;
198 		break;
199 	}
200 
201 	if ((rval == SUCCESS) && (!is_write)) {
202 		if (is_big_endian)
203 			*data_p = local_data;
204 		else
205 			*data_p =
206 			    pxtool_swap_endian(local_data, sizeof (uint64_t));
207 	}
208 
209 	return (rval);
210 }
211 
212 /*
213  * This function is for PCI config space access.
214  * It assumes that offset, bdf, acc_attr are valid in prg_p.
215  * This function modifies prg_p status and data.
216  *
217  * prg_p->phys_addr isn't used.
218  */
219 
220 /*ARGSUSED*/
221 int
222 pxtool_pcicfg_access(px_t *px_p, pcitool_reg_t *prg_p,
223     uint64_t *data_p, boolean_t is_write)
224 {
225 	pci_cfg_data_t data;
226 	on_trap_data_t otd;
227 	dev_info_t *dip = px_p->px_dip;
228 	px_pec_t *pec_p = px_p->px_pec_p;
229 	pci_device_t bdf = PX_GET_BDF(prg_p);
230 	size_t size = PCITOOL_ACC_ATTR_SIZE(prg_p->acc_attr);
231 	int rval = 0;
232 
233 	/* Alignment checking. */
234 	if (!IS_P2ALIGNED(prg_p->offset, size)) {
235 		DBG(DBG_TOOLS, dip, "not aligned.\n");
236 		prg_p->status = PCITOOL_NOT_ALIGNED;
237 		return (EINVAL);
238 	}
239 
240 	mutex_enter(&pec_p->pec_pokefault_mutex);
241 	pec_p->pec_ontrap_data = &otd;
242 
243 	if (is_write) {
244 
245 		if (PCITOOL_ACC_IS_BIG_ENDIAN(prg_p->acc_attr))
246 			data.qw = pxtool_swap_endian(*data_p, size);
247 		else
248 			data.qw = *data_p;
249 
250 		switch (size) {
251 			case sizeof (uint8_t):
252 				data.b = (uint8_t)data.qw;
253 				break;
254 			case sizeof (uint16_t):
255 				data.w = (uint16_t)data.qw;
256 				break;
257 			case sizeof (uint32_t):
258 				data.dw = (uint32_t)data.qw;
259 				break;
260 			case sizeof (uint64_t):
261 				break;
262 		}
263 
264 		DBG(DBG_TOOLS, dip, "put: bdf:0x%x, off:0x%" PRIx64 ", size:"
265 		    "0x%" PRIx64 ", data:0x%" PRIx64 "\n",
266 		    bdf, prg_p->offset, size, data.qw);
267 
268 		pec_p->pec_safeacc_type = DDI_FM_ERR_POKE;
269 
270 		if (!on_trap(&otd, OT_DATA_ACCESS)) {
271 			otd.ot_trampoline = (uintptr_t)&poke_fault;
272 			rval = hvio_config_put(px_p->px_dev_hdl, bdf,
273 			    prg_p->offset, size, data);
274 		} else
275 			rval = H_EIO;
276 
277 		if (otd.ot_trap & OT_DATA_ACCESS)
278 			rval = H_EIO;
279 
280 	} else {
281 
282 		data.qw = 0;
283 
284 		pec_p->pec_safeacc_type = DDI_FM_ERR_PEEK;
285 
286 		if (!on_trap(&otd, OT_DATA_ACCESS)) {
287 			otd.ot_trampoline = (uintptr_t)&peek_fault;
288 			rval = hvio_config_get(px_p->px_dev_hdl, bdf,
289 			    prg_p->offset, size, &data);
290 		} else
291 			rval = H_EIO;
292 
293 		DBG(DBG_TOOLS, dip, "get: bdf:0x%x, off:0x%" PRIx64 ", size:"
294 		    "0x%" PRIx64 ", data:0x%" PRIx64 "\n",
295 		    bdf, prg_p->offset, size, data.qw);
296 
297 		switch (size) {
298 			case sizeof (uint8_t):
299 				*data_p = data.b;
300 				break;
301 			case sizeof (uint16_t):
302 				*data_p = data.w;
303 				break;
304 			case sizeof (uint32_t):
305 				*data_p = data.dw;
306 				break;
307 			case sizeof (uint64_t):
308 				*data_p = data.qw;
309 				break;
310 			default:
311 				DBG(DBG_TOOLS, dip,
312 				    "bad size:0x%" PRIx64 "\n", size);
313 				break;
314 		}
315 		if (PCITOOL_ACC_IS_BIG_ENDIAN(prg_p->acc_attr))
316 			*data_p = pxtool_swap_endian(*data_p, size);
317 	}
318 
319 	/*
320 	 * Workaround: delay taking down safe access env.
321 	 * For more info, see comments where pxtool_cfg_delay_usec is declared.
322 	 */
323 	if (pxtool_cfg_delay_usec > 0)
324 		drv_usecwait(pxtool_cfg_delay_usec);
325 
326 	no_trap();
327 	pec_p->pec_ontrap_data = NULL;
328 	pec_p->pec_safeacc_type = DDI_FM_ERR_UNEXPECTED;
329 	mutex_exit(&pec_p->pec_pokefault_mutex);
330 
331 	if (rval != SUCCESS) {
332 		prg_p->status = PCITOOL_INVALID_ADDRESS;
333 		rval = EINVAL;
334 	} else
335 		prg_p->status = PCITOOL_SUCCESS;
336 
337 	return (rval);
338 }
339 
340 
341 /*
342  * This function is for PCI IO space and memory space access.
343  * It assumes that offset, bdf, acc_attr are current in prg_p.
344  * It assumes that prg_p->phys_addr is the final phys addr (including offset).
345  * This function modifies prg_p status and data.
346  */
347 int
348 pxtool_pciiomem_access(px_t *px_p, pcitool_reg_t *prg_p,
349     uint64_t *data_p, boolean_t is_write)
350 {
351 	on_trap_data_t otd;
352 	uint32_t io_stat = 0;
353 	dev_info_t *dip = px_p->px_dip;
354 	px_pec_t *pec_p = px_p->px_pec_p;
355 	size_t size = PCITOOL_ACC_ATTR_SIZE(prg_p->acc_attr);
356 	int rval = 0;
357 
358 	/* Alignment checking. */
359 	if (!IS_P2ALIGNED(prg_p->offset, size)) {
360 		DBG(DBG_TOOLS, dip, "not aligned.\n");
361 		prg_p->status = PCITOOL_NOT_ALIGNED;
362 		return (EINVAL);
363 	}
364 
365 	mutex_enter(&pec_p->pec_pokefault_mutex);
366 	pec_p->pec_ontrap_data = &otd;
367 
368 	if (is_write) {
369 		pci_device_t bdf = PX_GET_BDF(prg_p);
370 
371 		if (PCITOOL_ACC_IS_BIG_ENDIAN(prg_p->acc_attr))
372 			*data_p = pxtool_swap_endian(*data_p, size);
373 
374 		pec_p->pec_safeacc_type = DDI_FM_ERR_POKE;
375 
376 		if (!on_trap(&otd, OT_DATA_ACCESS)) {
377 			otd.ot_trampoline = (uintptr_t)&poke_fault;
378 			rval = hvio_poke(px_p->px_dev_hdl, prg_p->phys_addr,
379 			    size, *data_p, bdf, &io_stat);
380 		} else
381 			rval = H_EIO;
382 
383 		if (otd.ot_trap & OT_DATA_ACCESS)
384 			rval = H_EIO;
385 
386 		DBG(DBG_TOOLS, dip, "iomem:phys_addr:0x%" PRIx64 ", bdf:0x%x, "
387 		    "rval:%d, io_stat:%d\n", prg_p->phys_addr, bdf,
388 		    rval, io_stat);
389 	} else {
390 
391 		*data_p = 0;
392 
393 		pec_p->pec_safeacc_type = DDI_FM_ERR_PEEK;
394 
395 		if (!on_trap(&otd, OT_DATA_ACCESS)) {
396 			otd.ot_trampoline = (uintptr_t)&peek_fault;
397 			rval = hvio_peek(px_p->px_dev_hdl, prg_p->phys_addr,
398 			    size, &io_stat, data_p);
399 		} else
400 			rval = H_EIO;
401 
402 		DBG(DBG_TOOLS, dip, "iomem:phys_addr:0x%" PRIx64 ", "
403 		    "size:0x%" PRIx64 ", hdl:0x%" PRIx64 ", "
404 		    "rval:%d, io_stat:%d\n", prg_p->phys_addr,
405 		    size, px_p->px_dev_hdl, rval, io_stat);
406 		DBG(DBG_TOOLS, dip, "read data:0x%" PRIx64 "\n", *data_p);
407 
408 		if (PCITOOL_ACC_IS_BIG_ENDIAN(prg_p->acc_attr))
409 			*data_p = pxtool_swap_endian(*data_p, size);
410 	}
411 
412 	/*
413 	 * Workaround: delay taking down safe access env.
414 	 * For more info, see comment where pxtool_iomem_delay_usec is declared.
415 	 */
416 	if (pxtool_iomem_delay_usec > 0)
417 		delay(drv_usectohz(pxtool_iomem_delay_usec));
418 
419 	no_trap();
420 	pec_p->pec_ontrap_data = NULL;
421 	pec_p->pec_safeacc_type = DDI_FM_ERR_UNEXPECTED;
422 	mutex_exit(&pec_p->pec_pokefault_mutex);
423 
424 	if (rval != SUCCESS) {
425 		prg_p->status = PCITOOL_INVALID_ADDRESS;
426 		rval = EINVAL;
427 	} else if (io_stat != SUCCESS) {
428 		prg_p->status = PCITOOL_IO_ERROR;
429 		rval = EIO;
430 	} else
431 		prg_p->status = PCITOOL_SUCCESS;
432 
433 	return (rval);
434 }
435 
436 
437 /*ARGSUSED*/
438 int
439 pxtool_dev_reg_ops_platchk(dev_info_t *dip, pcitool_reg_t *prg_p)
440 {
441 	return (SUCCESS);
442 }
443 
444 
445 /*
446  * Perform register accesses on the nexus device itself.
447  */
448 int
449 pxtool_bus_reg_ops(dev_info_t *dip, void *arg, int cmd, int mode)
450 {
451 
452 	pcitool_reg_t		prg;
453 	size_t			size;
454 	px_t			*px_p = DIP_TO_STATE(dip);
455 	boolean_t		is_write = B_FALSE;
456 	uint32_t		rval = 0;
457 
458 	if (cmd == PCITOOL_NEXUS_SET_REG)
459 		is_write = B_TRUE;
460 
461 	DBG(DBG_TOOLS, dip, "pxtool_bus_reg_ops set/get reg\n");
462 
463 	/* Read data from userland. */
464 	if (ddi_copyin(arg, &prg, sizeof (pcitool_reg_t),
465 	    mode) != DDI_SUCCESS) {
466 		DBG(DBG_TOOLS, dip, "Error reading arguments\n");
467 		return (EFAULT);
468 	}
469 
470 	size = PCITOOL_ACC_ATTR_SIZE(prg.acc_attr);
471 
472 	DBG(DBG_TOOLS, dip, "raw bus:0x%x, dev:0x%x, func:0x%x\n",
473 	    prg.bus_no, prg.dev_no, prg.func_no);
474 	DBG(DBG_TOOLS, dip, "barnum:0x%x, offset:0x%" PRIx64 ", acc:0x%x\n",
475 	    prg.barnum, prg.offset, prg.acc_attr);
476 	DBG(DBG_TOOLS, dip, "data:0x%" PRIx64 ", phys_addr:0x%" PRIx64 "\n",
477 	    prg.data, prg.phys_addr);
478 
479 	/*
480 	 * If bank num == ff, base phys addr passed in from userland.
481 	 *
482 	 * Normal bank specification is invalid, as there is no OBP property to
483 	 * back it up.
484 	 */
485 	if (prg.barnum != PCITOOL_BASE) {
486 		prg.status = PCITOOL_OUT_OF_RANGE;
487 		rval = EINVAL;
488 		goto done;
489 	}
490 
491 	/* Allow only size of 8-bytes. */
492 	if (size != sizeof (uint64_t)) {
493 		prg.status = PCITOOL_INVALID_SIZE;
494 		rval = EINVAL;
495 		goto done;
496 	}
497 
498 	/* Alignment checking. */
499 	if (!IS_P2ALIGNED(prg.offset, size)) {
500 		DBG(DBG_TOOLS, dip, "not aligned.\n");
501 		prg.status = PCITOOL_NOT_ALIGNED;
502 		rval = EINVAL;
503 		goto done;
504 	}
505 
506 	prg.phys_addr += prg.offset;
507 
508 	/*
509 	 * Only the hypervisor can access nexus registers.  As a result, there
510 	 * can be no error recovery in the OS.  If there is an error, the
511 	 * system will go down, but with a trap type 7f.  The OS cannot
512 	 * intervene with this kind of trap.
513 	 */
514 
515 	/* Access device.  prg.status is modified. */
516 	rval = pxtool_phys_access(px_p, prg.phys_addr, &prg.data,
517 	    PCITOOL_ACC_IS_BIG_ENDIAN(prg.acc_attr), is_write);
518 done:
519 	if (ddi_copyout(&prg, arg, sizeof (pcitool_reg_t),
520 	    mode) != DDI_SUCCESS) {
521 		DBG(DBG_TOOLS, dip, "Copyout failed.\n");
522 		return (EFAULT);
523 	}
524 
525 	return (rval);
526 }
527