xref: /illumos-gate/usr/src/uts/sun4v/io/px/px_tools_4v.c (revision 5bbb4db2c3f208d12bf0fd11769728f9e5ba66a2)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <sys/sysmacros.h>
27 #include <sys/machsystm.h>
28 #include <sys/cpuvar.h>
29 #include <sys/ddi_implfuncs.h>
30 #include <sys/hypervisor_api.h>
31 #include <sys/hsvc.h>
32 #include <px_obj.h>
33 #include <sys/pci_tools.h>
34 #include <px_tools_var.h>
35 #include "px_lib4v.h"
36 #include <px_tools_ext.h>
37 
38 /*
39  * Delay needed to have a safe environment envelop any error which could
40  * surface.  The larger the number of bridges and switches, the larger the
41  * number needed here.
42  *
43  * Note: this is a workaround until a better solution is found.  While this
44  * number is high, given enough bridges and switches in the device path, this
45  * workaround can break.  Also, other PIL 15 interrupts besides the ones we are
46  * enveloping could delay processing of the interrupt we are trying to protect.
47  */
48 int pxtool_cfg_delay_usec = 2500;
49 int pxtool_iomem_delay_usec = 25000;
50 
51 /* Currently there is no way of getting this info from hypervisor. */
52 #define	INTERRUPT_MAPPING_ENTRIES	64
53 
54 /* Number of inos per root complex. */
55 int pxtool_num_inos = INTERRUPT_MAPPING_ENTRIES;
56 
57 /* Verify hypervisor version for DIAG functions ra2pa and hpriv. */
58 #define	PXTOOL_HYP_VER_UNINIT	0
59 #define	PXTOOL_HYP_VER_BAD	1
60 #define	PXTOOL_HYP_VER_OK	2
61 
62 static int pxtool_hyp_version = PXTOOL_HYP_VER_UNINIT;
63 
64 /* Swap endianness. */
65 static uint64_t
66 pxtool_swap_endian(uint64_t data, int size)
67 {
68 	typedef union {
69 		uint64_t data64;
70 		uint8_t data8[8];
71 	} data_split_t;
72 
73 	data_split_t orig_data;
74 	data_split_t returned_data;
75 	int i;
76 
77 	orig_data.data64 = data;
78 	returned_data.data64 = 0;
79 
80 	for (i = 0; i < size; i++) {
81 		returned_data.data8[7 - i] = orig_data.data8[8 - size + i];
82 	}
83 
84 	return (returned_data.data64);
85 }
86 
87 static void
88 pxtool_validate_diag_hyp_svc(dev_info_t *dip, int *diag_svc_status_p)
89 {
90 	uint64_t pxtool_diag_maj_ver;
91 	uint64_t pxtool_diag_min_ver;
92 	int ret;
93 
94 	if (*diag_svc_status_p == PXTOOL_HYP_VER_UNINIT) {
95 
96 		*diag_svc_status_p = PXTOOL_HYP_VER_BAD;
97 
98 		/*
99 		 * Verify that hypervisor DIAG API has been
100 		 * negotiated (by unix).
101 		 */
102 		if ((ret = hsvc_version(HSVC_GROUP_DIAG,
103 		    &pxtool_diag_maj_ver, &pxtool_diag_min_ver)) != 0) {
104 			DBG(DBG_TOOLS, dip,
105 			    "diag hypervisor svc not negotiated: "
106 			    "grp:0x%lx, errno:%d\n", HSVC_GROUP_DIAG, ret);
107 
108 		} else if (pxtool_diag_maj_ver == 1) {
109 			/*
110 			 * Major version 1 is OK.
111 			 *
112 			 * Code maintainers: if the version changes, check for
113 			 * API changes in hv_ra2pa() and hv_hpriv() before
114 			 * accepting the new version.
115 			 */
116 			*diag_svc_status_p = PXTOOL_HYP_VER_OK;
117 
118 		} else {
119 			DBG(DBG_TOOLS, dip,
120 			    "diag hypervisor svc: bad major number: "
121 			    "grp:0x%lx, maj:0x%lx, min:0x%lx\n",
122 			    HSVC_GROUP_DIAG, pxtool_diag_maj_ver,
123 			    pxtool_diag_min_ver);
124 		}
125 	}
126 }
127 
128 static int
129 pxtool_phys_access(px_t *px_p, uintptr_t dev_addr,
130     uint64_t *data_p, boolean_t is_big_endian, boolean_t is_write)
131 {
132 	uint64_t rfunc, pfunc;
133 	uint64_t rdata_addr, pdata_addr;
134 	uint64_t to_addr, from_addr;
135 	uint64_t local_data;
136 	int rval;
137 	dev_info_t *dip = px_p->px_dip;
138 
139 	DBG(DBG_TOOLS, dip,
140 	    "pxtool_phys_access: dev_addr:0x%" PRIx64 "\n", dev_addr);
141 	DBG(DBG_TOOLS, dip, "    data_addr:0x%" PRIx64 ", is_write:%s\n",
142 	    data_p, (is_write ? "yes" : "no"));
143 
144 	if (pxtool_hyp_version != PXTOOL_HYP_VER_OK) {
145 		pxtool_validate_diag_hyp_svc(dip, &pxtool_hyp_version);
146 		if (pxtool_hyp_version != PXTOOL_HYP_VER_OK) {
147 			DBG(DBG_TOOLS, dip, "Couldn't validate diag hyp svc\n");
148 			return (EPERM);
149 		}
150 	}
151 
152 	if ((rfunc = va_to_pa((void *)px_phys_acc_4v))  == (uint64_t)-1) {
153 		DBG(DBG_TOOLS, dip, "Error getting real addr for function\n");
154 		return (EIO);
155 	}
156 
157 	if ((pfunc = hv_ra2pa(rfunc)) == -1) {
158 		DBG(DBG_TOOLS, dip, "Error getting phys addr for function\n");
159 		return (EIO);
160 	}
161 
162 	if ((rdata_addr = va_to_pa((void *)&local_data))  == (uint64_t)-1) {
163 		DBG(DBG_TOOLS, dip, "Error getting real addr for data_p\n");
164 		return (EIO);
165 	}
166 
167 	if ((pdata_addr = hv_ra2pa(rdata_addr)) == -1) {
168 		DBG(DBG_TOOLS, dip, "Error getting phys addr for data ptr\n");
169 		return (EIO);
170 	}
171 
172 	if (is_write) {
173 		to_addr = dev_addr;
174 		from_addr = pdata_addr;
175 
176 		if (is_big_endian)
177 			local_data = *data_p;
178 		else
179 			local_data =
180 			    pxtool_swap_endian(*data_p, sizeof (uint64_t));
181 	} else {
182 		to_addr = pdata_addr;
183 		from_addr = dev_addr;
184 	}
185 
186 	rval = hv_hpriv((void *)pfunc, from_addr, to_addr, NULL);
187 	switch (rval) {
188 	case H_ENOACCESS:	/* Returned by non-debug hypervisor. */
189 		rval = ENOTSUP;
190 		break;
191 	case H_EOK:
192 		rval = SUCCESS;
193 		break;
194 	default:
195 		rval = EIO;
196 		break;
197 	}
198 
199 	if ((rval == SUCCESS) && (!is_write)) {
200 		if (is_big_endian)
201 			*data_p = local_data;
202 		else
203 			*data_p =
204 			    pxtool_swap_endian(local_data, sizeof (uint64_t));
205 	}
206 
207 	return (rval);
208 }
209 
210 /*
211  * This function is for PCI config space access.
212  * It assumes that offset, bdf, acc_attr are valid in prg_p.
213  * This function modifies prg_p status and data.
214  *
215  * prg_p->phys_addr isn't used.
216  */
217 
218 /*ARGSUSED*/
219 int
220 pxtool_pcicfg_access(px_t *px_p, pcitool_reg_t *prg_p,
221     uint64_t *data_p, boolean_t is_write)
222 {
223 	pci_cfg_data_t data;
224 	on_trap_data_t otd;
225 	dev_info_t *dip = px_p->px_dip;
226 	px_pec_t *pec_p = px_p->px_pec_p;
227 	pci_device_t bdf = PX_GET_BDF(prg_p);
228 	size_t size = PCITOOL_ACC_ATTR_SIZE(prg_p->acc_attr);
229 	int rval = 0;
230 
231 	/* Alignment checking. */
232 	if (!IS_P2ALIGNED(prg_p->offset, size)) {
233 		DBG(DBG_TOOLS, dip, "not aligned.\n");
234 		prg_p->status = PCITOOL_NOT_ALIGNED;
235 		return (EINVAL);
236 	}
237 
238 	mutex_enter(&pec_p->pec_pokefault_mutex);
239 	pec_p->pec_ontrap_data = &otd;
240 
241 	if (is_write) {
242 
243 		if (PCITOOL_ACC_IS_BIG_ENDIAN(prg_p->acc_attr))
244 			data.qw = pxtool_swap_endian(*data_p, size);
245 		else
246 			data.qw = *data_p;
247 
248 		switch (size) {
249 			case sizeof (uint8_t):
250 				data.b = (uint8_t)data.qw;
251 				break;
252 			case sizeof (uint16_t):
253 				data.w = (uint16_t)data.qw;
254 				break;
255 			case sizeof (uint32_t):
256 				data.dw = (uint32_t)data.qw;
257 				break;
258 			case sizeof (uint64_t):
259 				break;
260 		}
261 
262 		DBG(DBG_TOOLS, dip, "put: bdf:0x%x, off:0x%" PRIx64 ", size:"
263 		    "0x%" PRIx64 ", data:0x%" PRIx64 "\n",
264 		    bdf, prg_p->offset, size, data.qw);
265 
266 		pec_p->pec_safeacc_type = DDI_FM_ERR_POKE;
267 
268 		if (!on_trap(&otd, OT_DATA_ACCESS)) {
269 			otd.ot_trampoline = (uintptr_t)&poke_fault;
270 			rval = hvio_config_put(px_p->px_dev_hdl, bdf,
271 			    prg_p->offset, size, data);
272 		} else
273 			rval = H_EIO;
274 
275 		if (otd.ot_trap & OT_DATA_ACCESS)
276 			rval = H_EIO;
277 
278 	} else {
279 
280 		data.qw = 0;
281 
282 		pec_p->pec_safeacc_type = DDI_FM_ERR_PEEK;
283 
284 		if (!on_trap(&otd, OT_DATA_ACCESS)) {
285 			otd.ot_trampoline = (uintptr_t)&peek_fault;
286 			rval = hvio_config_get(px_p->px_dev_hdl, bdf,
287 			    prg_p->offset, size, &data);
288 		} else
289 			rval = H_EIO;
290 
291 		DBG(DBG_TOOLS, dip, "get: bdf:0x%x, off:0x%" PRIx64 ", size:"
292 		    "0x%" PRIx64 ", data:0x%" PRIx64 "\n",
293 		    bdf, prg_p->offset, size, data.qw);
294 		*data_p = data.qw;
295 
296 		if (PCITOOL_ACC_IS_BIG_ENDIAN(prg_p->acc_attr))
297 			*data_p = pxtool_swap_endian(*data_p, size);
298 	}
299 
300 	/*
301 	 * Workaround: delay taking down safe access env.
302 	 * For more info, see comments where pxtool_cfg_delay_usec is declared.
303 	 */
304 	if (pxtool_cfg_delay_usec > 0)
305 		drv_usecwait(pxtool_cfg_delay_usec);
306 
307 	no_trap();
308 	pec_p->pec_ontrap_data = NULL;
309 	pec_p->pec_safeacc_type = DDI_FM_ERR_UNEXPECTED;
310 	mutex_exit(&pec_p->pec_pokefault_mutex);
311 
312 	if (rval != SUCCESS) {
313 		prg_p->status = PCITOOL_INVALID_ADDRESS;
314 		rval = EINVAL;
315 	} else
316 		prg_p->status = PCITOOL_SUCCESS;
317 
318 	return (rval);
319 }
320 
321 
322 /*
323  * This function is for PCI IO space and memory space access.
324  * It assumes that offset, bdf, acc_attr are current in prg_p.
325  * It assumes that prg_p->phys_addr is the final phys addr (including offset).
326  * This function modifies prg_p status and data.
327  */
328 int
329 pxtool_pciiomem_access(px_t *px_p, pcitool_reg_t *prg_p,
330     uint64_t *data_p, boolean_t is_write)
331 {
332 	on_trap_data_t otd;
333 	uint32_t io_stat = 0;
334 	dev_info_t *dip = px_p->px_dip;
335 	px_pec_t *pec_p = px_p->px_pec_p;
336 	size_t size = PCITOOL_ACC_ATTR_SIZE(prg_p->acc_attr);
337 	int rval = 0;
338 
339 	/* Alignment checking. */
340 	if (!IS_P2ALIGNED(prg_p->offset, size)) {
341 		DBG(DBG_TOOLS, dip, "not aligned.\n");
342 		prg_p->status = PCITOOL_NOT_ALIGNED;
343 		return (EINVAL);
344 	}
345 
346 	mutex_enter(&pec_p->pec_pokefault_mutex);
347 	pec_p->pec_ontrap_data = &otd;
348 
349 	if (is_write) {
350 		pci_device_t bdf = PX_GET_BDF(prg_p);
351 
352 		if (PCITOOL_ACC_IS_BIG_ENDIAN(prg_p->acc_attr))
353 			*data_p = pxtool_swap_endian(*data_p, size);
354 
355 		pec_p->pec_safeacc_type = DDI_FM_ERR_POKE;
356 
357 		if (!on_trap(&otd, OT_DATA_ACCESS)) {
358 			otd.ot_trampoline = (uintptr_t)&poke_fault;
359 			rval = hvio_poke(px_p->px_dev_hdl, prg_p->phys_addr,
360 			    size, *data_p, bdf, &io_stat);
361 		} else
362 			rval = H_EIO;
363 
364 		if (otd.ot_trap & OT_DATA_ACCESS)
365 			rval = H_EIO;
366 
367 		DBG(DBG_TOOLS, dip, "iomem:phys_addr:0x%" PRIx64 ", bdf:0x%x, "
368 		    "rval:%d, io_stat:%d\n", prg_p->phys_addr, bdf,
369 		    rval, io_stat);
370 	} else {
371 
372 		*data_p = 0;
373 
374 		pec_p->pec_safeacc_type = DDI_FM_ERR_PEEK;
375 
376 		if (!on_trap(&otd, OT_DATA_ACCESS)) {
377 			otd.ot_trampoline = (uintptr_t)&peek_fault;
378 			rval = hvio_peek(px_p->px_dev_hdl, prg_p->phys_addr,
379 			    size, &io_stat, data_p);
380 		} else
381 			rval = H_EIO;
382 
383 		DBG(DBG_TOOLS, dip, "iomem:phys_addr:0x%" PRIx64 ", "
384 		    "size:0x%" PRIx64 ", hdl:0x%" PRIx64 ", "
385 		    "rval:%d, io_stat:%d\n", prg_p->phys_addr,
386 		    size, px_p->px_dev_hdl, rval, io_stat);
387 		DBG(DBG_TOOLS, dip, "read data:0x%" PRIx64 "\n", *data_p);
388 
389 		if (PCITOOL_ACC_IS_BIG_ENDIAN(prg_p->acc_attr))
390 			*data_p = pxtool_swap_endian(*data_p, size);
391 	}
392 
393 	/*
394 	 * Workaround: delay taking down safe access env.
395 	 * For more info, see comment where pxtool_iomem_delay_usec is declared.
396 	 */
397 	if (pxtool_iomem_delay_usec > 0)
398 		delay(drv_usectohz(pxtool_iomem_delay_usec));
399 
400 	no_trap();
401 	pec_p->pec_ontrap_data = NULL;
402 	pec_p->pec_safeacc_type = DDI_FM_ERR_UNEXPECTED;
403 	mutex_exit(&pec_p->pec_pokefault_mutex);
404 
405 	if (rval != SUCCESS) {
406 		prg_p->status = PCITOOL_INVALID_ADDRESS;
407 		rval = EINVAL;
408 	} else if (io_stat != SUCCESS) {
409 		prg_p->status = PCITOOL_IO_ERROR;
410 		rval = EIO;
411 	} else
412 		prg_p->status = PCITOOL_SUCCESS;
413 
414 	return (rval);
415 }
416 
417 
418 /*ARGSUSED*/
419 int
420 pxtool_dev_reg_ops_platchk(dev_info_t *dip, pcitool_reg_t *prg_p)
421 {
422 	return (SUCCESS);
423 }
424 
425 
426 /*
427  * Perform register accesses on the nexus device itself.
428  */
429 int
430 pxtool_bus_reg_ops(dev_info_t *dip, void *arg, int cmd, int mode)
431 {
432 
433 	pcitool_reg_t		prg;
434 	size_t			size;
435 	px_t			*px_p = DIP_TO_STATE(dip);
436 	boolean_t		is_write = B_FALSE;
437 	uint32_t		rval = 0;
438 
439 	if (cmd == PCITOOL_NEXUS_SET_REG)
440 		is_write = B_TRUE;
441 
442 	DBG(DBG_TOOLS, dip, "pxtool_bus_reg_ops set/get reg\n");
443 
444 	/* Read data from userland. */
445 	if (ddi_copyin(arg, &prg, sizeof (pcitool_reg_t),
446 	    mode) != DDI_SUCCESS) {
447 		DBG(DBG_TOOLS, dip, "Error reading arguments\n");
448 		return (EFAULT);
449 	}
450 
451 	size = PCITOOL_ACC_ATTR_SIZE(prg.acc_attr);
452 
453 	DBG(DBG_TOOLS, dip, "raw bus:0x%x, dev:0x%x, func:0x%x\n",
454 	    prg.bus_no, prg.dev_no, prg.func_no);
455 	DBG(DBG_TOOLS, dip, "barnum:0x%x, offset:0x%" PRIx64 ", acc:0x%x\n",
456 	    prg.barnum, prg.offset, prg.acc_attr);
457 	DBG(DBG_TOOLS, dip, "data:0x%" PRIx64 ", phys_addr:0x%" PRIx64 "\n",
458 	    prg.data, prg.phys_addr);
459 
460 	/*
461 	 * If bank num == ff, base phys addr passed in from userland.
462 	 *
463 	 * Normal bank specification is invalid, as there is no OBP property to
464 	 * back it up.
465 	 */
466 	if (prg.barnum != PCITOOL_BASE) {
467 		prg.status = PCITOOL_OUT_OF_RANGE;
468 		rval = EINVAL;
469 		goto done;
470 	}
471 
472 	/* Allow only size of 8-bytes. */
473 	if (size != sizeof (uint64_t)) {
474 		prg.status = PCITOOL_INVALID_SIZE;
475 		rval = EINVAL;
476 		goto done;
477 	}
478 
479 	/* Alignment checking. */
480 	if (!IS_P2ALIGNED(prg.offset, size)) {
481 		DBG(DBG_TOOLS, dip, "not aligned.\n");
482 		prg.status = PCITOOL_NOT_ALIGNED;
483 		rval = EINVAL;
484 		goto done;
485 	}
486 
487 	prg.phys_addr += prg.offset;
488 
489 	/*
490 	 * Only the hypervisor can access nexus registers.  As a result, there
491 	 * can be no error recovery in the OS.  If there is an error, the
492 	 * system will go down, but with a trap type 7f.  The OS cannot
493 	 * intervene with this kind of trap.
494 	 */
495 
496 	/* Access device.  prg.status is modified. */
497 	rval = pxtool_phys_access(px_p, prg.phys_addr, &prg.data,
498 	    PCITOOL_ACC_IS_BIG_ENDIAN(prg.acc_attr), is_write);
499 done:
500 	prg.drvr_version = PCITOOL_VERSION;
501 	if (ddi_copyout(&prg, arg, sizeof (pcitool_reg_t),
502 	    mode) != DDI_SUCCESS) {
503 		DBG(DBG_TOOLS, dip, "Copyout failed.\n");
504 		return (EFAULT);
505 	}
506 
507 	return (rval);
508 }
509