xref: /illumos-gate/usr/src/uts/sun4v/io/px/px_tools_4v.c (revision f73e1ebf60792a8bdb2d559097c3131b68c09318)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <sys/sysmacros.h>
27 #include <sys/machsystm.h>
28 #include <sys/cpuvar.h>
29 #include <sys/ddi_implfuncs.h>
30 #include <sys/hypervisor_api.h>
31 #include <sys/hsvc.h>
32 #include <px_obj.h>
33 #include <sys/pci_tools.h>
34 #include <sys/pci_cfgacc.h>
35 #include <px_tools_var.h>
36 #include "px_lib4v.h"
37 #include <px_tools_ext.h>
38 
39 /*
40  * Delay needed to have a safe environment envelop any error which could
41  * surface.  The larger the number of bridges and switches, the larger the
42  * number needed here.
43  *
44  * Note: this is a workaround until a better solution is found.  While this
45  * number is high, given enough bridges and switches in the device path, this
46  * workaround can break.  Also, other PIL 15 interrupts besides the ones we are
47  * enveloping could delay processing of the interrupt we are trying to protect.
48  */
49 int pxtool_cfg_delay_usec = 2500;
50 int pxtool_iomem_delay_usec = 25000;
51 
52 /* Currently there is no way of getting this info from hypervisor. */
53 #define	INTERRUPT_MAPPING_ENTRIES	64
54 
55 /* Number of inos per root complex. */
56 int pxtool_num_inos = INTERRUPT_MAPPING_ENTRIES;
57 
58 /* Verify hypervisor version for DIAG functions ra2pa and hpriv. */
59 #define	PXTOOL_HYP_VER_UNINIT	0
60 #define	PXTOOL_HYP_VER_BAD	1
61 #define	PXTOOL_HYP_VER_OK	2
62 
63 static int pxtool_hyp_version = PXTOOL_HYP_VER_UNINIT;
64 
65 /* Swap endianness. */
66 static uint64_t
67 pxtool_swap_endian(uint64_t data, int size)
68 {
69 	typedef union {
70 		uint64_t data64;
71 		uint8_t data8[8];
72 	} data_split_t;
73 
74 	data_split_t orig_data;
75 	data_split_t returned_data;
76 	int i;
77 
78 	orig_data.data64 = data;
79 	returned_data.data64 = 0;
80 
81 	for (i = 0; i < size; i++) {
82 		returned_data.data8[7 - i] = orig_data.data8[8 - size + i];
83 	}
84 
85 	return (returned_data.data64);
86 }
87 
88 static void
89 pxtool_validate_diag_hyp_svc(dev_info_t *dip, int *diag_svc_status_p)
90 {
91 	uint64_t pxtool_diag_maj_ver;
92 	uint64_t pxtool_diag_min_ver;
93 	int ret;
94 
95 	if (*diag_svc_status_p == PXTOOL_HYP_VER_UNINIT) {
96 
97 		*diag_svc_status_p = PXTOOL_HYP_VER_BAD;
98 
99 		/*
100 		 * Verify that hypervisor DIAG API has been
101 		 * negotiated (by unix).
102 		 */
103 		if ((ret = hsvc_version(HSVC_GROUP_DIAG,
104 		    &pxtool_diag_maj_ver, &pxtool_diag_min_ver)) != 0) {
105 			DBG(DBG_TOOLS, dip,
106 			    "diag hypervisor svc not negotiated: "
107 			    "grp:0x%lx, errno:%d\n", HSVC_GROUP_DIAG, ret);
108 
109 		} else if (pxtool_diag_maj_ver == 1) {
110 			/*
111 			 * Major version 1 is OK.
112 			 *
113 			 * Code maintainers: if the version changes, check for
114 			 * API changes in hv_ra2pa() and hv_hpriv() before
115 			 * accepting the new version.
116 			 */
117 			*diag_svc_status_p = PXTOOL_HYP_VER_OK;
118 
119 		} else {
120 			DBG(DBG_TOOLS, dip,
121 			    "diag hypervisor svc: bad major number: "
122 			    "grp:0x%lx, maj:0x%lx, min:0x%lx\n",
123 			    HSVC_GROUP_DIAG, pxtool_diag_maj_ver,
124 			    pxtool_diag_min_ver);
125 		}
126 	}
127 }
128 
129 static int
130 pxtool_phys_access(px_t *px_p, uintptr_t dev_addr,
131     uint64_t *data_p, boolean_t is_big_endian, boolean_t is_write)
132 {
133 	uint64_t rfunc, pfunc;
134 	uint64_t rdata_addr, pdata_addr;
135 	uint64_t to_addr, from_addr;
136 	uint64_t local_data;
137 	int rval;
138 	dev_info_t *dip = px_p->px_dip;
139 
140 	DBG(DBG_TOOLS, dip,
141 	    "pxtool_phys_access: dev_addr:0x%" PRIx64 "\n", dev_addr);
142 	DBG(DBG_TOOLS, dip, "    data_addr:0x%" PRIx64 ", is_write:%s\n",
143 	    data_p, (is_write ? "yes" : "no"));
144 
145 	if (pxtool_hyp_version != PXTOOL_HYP_VER_OK) {
146 		pxtool_validate_diag_hyp_svc(dip, &pxtool_hyp_version);
147 		if (pxtool_hyp_version != PXTOOL_HYP_VER_OK) {
148 			DBG(DBG_TOOLS, dip, "Couldn't validate diag hyp svc\n");
149 			return (EPERM);
150 		}
151 	}
152 
153 	if ((rfunc = va_to_pa((void *)px_phys_acc_4v))  == (uint64_t)-1) {
154 		DBG(DBG_TOOLS, dip, "Error getting real addr for function\n");
155 		return (EIO);
156 	}
157 
158 	if ((pfunc = hv_ra2pa(rfunc)) == -1) {
159 		DBG(DBG_TOOLS, dip, "Error getting phys addr for function\n");
160 		return (EIO);
161 	}
162 
163 	if ((rdata_addr = va_to_pa((void *)&local_data))  == (uint64_t)-1) {
164 		DBG(DBG_TOOLS, dip, "Error getting real addr for data_p\n");
165 		return (EIO);
166 	}
167 
168 	if ((pdata_addr = hv_ra2pa(rdata_addr)) == -1) {
169 		DBG(DBG_TOOLS, dip, "Error getting phys addr for data ptr\n");
170 		return (EIO);
171 	}
172 
173 	if (is_write) {
174 		to_addr = dev_addr;
175 		from_addr = pdata_addr;
176 
177 		if (is_big_endian)
178 			local_data = *data_p;
179 		else
180 			local_data =
181 			    pxtool_swap_endian(*data_p, sizeof (uint64_t));
182 	} else {
183 		to_addr = pdata_addr;
184 		from_addr = dev_addr;
185 	}
186 
187 	rval = hv_hpriv((void *)pfunc, from_addr, to_addr, 0);
188 	switch (rval) {
189 	case H_ENOACCESS:	/* Returned by non-debug hypervisor. */
190 		rval = ENOTSUP;
191 		break;
192 	case H_EOK:
193 		rval = SUCCESS;
194 		break;
195 	default:
196 		rval = EIO;
197 		break;
198 	}
199 
200 	if ((rval == SUCCESS) && (!is_write)) {
201 		if (is_big_endian)
202 			*data_p = local_data;
203 		else
204 			*data_p =
205 			    pxtool_swap_endian(local_data, sizeof (uint64_t));
206 	}
207 
208 	return (rval);
209 }
210 
211 /*
212  * This function is for PCI config space access.
213  * It assumes that offset, bdf, acc_attr are valid in prg_p.
214  * This function modifies prg_p status and data.
215  *
216  * prg_p->phys_addr isn't used.
217  */
218 
219 int
220 pxtool_pcicfg_access(px_t *px_p, pcitool_reg_t *prg_p,
221     uint64_t *data_p, boolean_t is_write)
222 {
223 	pci_cfg_data_t data;
224 	on_trap_data_t otd;
225 	dev_info_t *dip = px_p->px_dip;
226 	px_pec_t *pec_p = px_p->px_pec_p;
227 	size_t size = PCITOOL_ACC_ATTR_SIZE(prg_p->acc_attr);
228 	int rval = 0;
229 	pci_cfgacc_req_t req;
230 
231 	if ((size <= 0) || (size > 8)) {
232 		DBG(DBG_TOOLS, dip, "not supported size.\n");
233 		prg_p->status = PCITOOL_INVALID_SIZE;
234 		return (ENOTSUP);
235 	}
236 
237 	/* Alignment checking. */
238 	if (!IS_P2ALIGNED(prg_p->offset, size)) {
239 		DBG(DBG_TOOLS, dip, "not aligned.\n");
240 		prg_p->status = PCITOOL_NOT_ALIGNED;
241 		return (EINVAL);
242 	}
243 
244 	mutex_enter(&pec_p->pec_pokefault_mutex);
245 	pec_p->pec_ontrap_data = &otd;
246 
247 	req.rcdip = dip;
248 	req.bdf = PCI_GETBDF(prg_p->bus_no, prg_p->dev_no, prg_p->func_no);
249 	req.offset = prg_p->offset;
250 	req.size = size;
251 	req.write = is_write;
252 	if (is_write) {
253 
254 		if (PCITOOL_ACC_IS_BIG_ENDIAN(prg_p->acc_attr))
255 			data.qw = pxtool_swap_endian(*data_p, size);
256 		else
257 			data.qw = *data_p;
258 
259 		switch (size) {
260 			case sizeof (uint8_t):
261 				data.b = (uint8_t)data.qw;
262 				break;
263 			case sizeof (uint16_t):
264 				data.w = (uint16_t)data.qw;
265 				break;
266 			case sizeof (uint32_t):
267 				data.dw = (uint32_t)data.qw;
268 				break;
269 			case sizeof (uint64_t):
270 				break;
271 		}
272 
273 		DBG(DBG_TOOLS, dip, "put: bdf:%d,%d,%d, off:0x%"PRIx64", size:"
274 		    "0x%"PRIx64", data:0x%"PRIx64"\n",
275 		    prg_p->bus_no, prg_p->dev_no, prg_p->func_no,
276 		    prg_p->offset, size, data.qw);
277 
278 		pec_p->pec_safeacc_type = DDI_FM_ERR_POKE;
279 
280 		if (!on_trap(&otd, OT_DATA_ACCESS)) {
281 			otd.ot_trampoline = (uintptr_t)&poke_fault;
282 			VAL64(&req) = data.qw;
283 			pci_cfgacc_acc(&req);
284 		} else
285 			rval = H_EIO;
286 
287 		if (otd.ot_trap & OT_DATA_ACCESS)
288 			rval = H_EIO;
289 
290 	} else {
291 
292 		data.qw = 0;
293 
294 		pec_p->pec_safeacc_type = DDI_FM_ERR_PEEK;
295 
296 		if (!on_trap(&otd, OT_DATA_ACCESS)) {
297 			otd.ot_trampoline = (uintptr_t)&peek_fault;
298 			pci_cfgacc_acc(&req);
299 			data.qw = VAL64(&req);
300 		} else
301 			rval = H_EIO;
302 
303 		switch (size) {
304 			case sizeof (uint8_t):
305 				data.qw = (uint64_t)data.b;
306 				break;
307 			case sizeof (uint16_t):
308 				data.qw = (uint64_t)data.w;
309 				break;
310 			case sizeof (uint32_t):
311 				data.qw = (uint64_t)data.dw;
312 				break;
313 			case sizeof (uint64_t):
314 				break;
315 		}
316 
317 		DBG(DBG_TOOLS, dip, "get: bdf:%d,%d,%d, off:0x%"PRIx64", size:"
318 		    "0x%"PRIx64", data:0x%"PRIx64"\n",
319 		    prg_p->bus_no, prg_p->dev_no, prg_p->func_no,
320 		    prg_p->offset, size, data.qw);
321 		*data_p = data.qw;
322 
323 		if (PCITOOL_ACC_IS_BIG_ENDIAN(prg_p->acc_attr))
324 			*data_p = pxtool_swap_endian(*data_p, size);
325 	}
326 
327 	/*
328 	 * Workaround: delay taking down safe access env.
329 	 * For more info, see comments where pxtool_cfg_delay_usec is declared.
330 	 */
331 	if (pxtool_cfg_delay_usec > 0)
332 		drv_usecwait(pxtool_cfg_delay_usec);
333 
334 	no_trap();
335 	pec_p->pec_ontrap_data = NULL;
336 	pec_p->pec_safeacc_type = DDI_FM_ERR_UNEXPECTED;
337 	mutex_exit(&pec_p->pec_pokefault_mutex);
338 
339 	if (rval != SUCCESS) {
340 		prg_p->status = PCITOOL_INVALID_ADDRESS;
341 		rval = EINVAL;
342 	} else
343 		prg_p->status = PCITOOL_SUCCESS;
344 
345 	return (rval);
346 }
347 
348 
349 /*
350  * This function is for PCI IO space and memory space access.
351  * It assumes that offset, bdf, acc_attr are current in prg_p.
352  * It assumes that prg_p->phys_addr is the final phys addr (including offset).
353  * This function modifies prg_p status and data.
354  */
355 int
356 pxtool_pciiomem_access(px_t *px_p, pcitool_reg_t *prg_p,
357     uint64_t *data_p, boolean_t is_write)
358 {
359 	on_trap_data_t otd;
360 	uint32_t io_stat = 0;
361 	dev_info_t *dip = px_p->px_dip;
362 	px_pec_t *pec_p = px_p->px_pec_p;
363 	size_t size = PCITOOL_ACC_ATTR_SIZE(prg_p->acc_attr);
364 	int rval = 0;
365 
366 	/* Alignment checking. */
367 	if (!IS_P2ALIGNED(prg_p->offset, size)) {
368 		DBG(DBG_TOOLS, dip, "not aligned.\n");
369 		prg_p->status = PCITOOL_NOT_ALIGNED;
370 		return (EINVAL);
371 	}
372 
373 	mutex_enter(&pec_p->pec_pokefault_mutex);
374 	pec_p->pec_ontrap_data = &otd;
375 
376 	if (is_write) {
377 		pci_device_t bdf = PX_GET_BDF(prg_p);
378 
379 		if (PCITOOL_ACC_IS_BIG_ENDIAN(prg_p->acc_attr))
380 			*data_p = pxtool_swap_endian(*data_p, size);
381 
382 		pec_p->pec_safeacc_type = DDI_FM_ERR_POKE;
383 
384 		if (!on_trap(&otd, OT_DATA_ACCESS)) {
385 			otd.ot_trampoline = (uintptr_t)&poke_fault;
386 			rval = hvio_poke(px_p->px_dev_hdl, prg_p->phys_addr,
387 			    size, *data_p, bdf, &io_stat);
388 		} else
389 			rval = H_EIO;
390 
391 		if (otd.ot_trap & OT_DATA_ACCESS)
392 			rval = H_EIO;
393 
394 		DBG(DBG_TOOLS, dip, "iomem:phys_addr:0x%" PRIx64 ", bdf:0x%x, "
395 		    "rval:%d, io_stat:%d\n", prg_p->phys_addr, bdf,
396 		    rval, io_stat);
397 	} else {
398 
399 		*data_p = 0;
400 
401 		pec_p->pec_safeacc_type = DDI_FM_ERR_PEEK;
402 
403 		if (!on_trap(&otd, OT_DATA_ACCESS)) {
404 			otd.ot_trampoline = (uintptr_t)&peek_fault;
405 			rval = hvio_peek(px_p->px_dev_hdl, prg_p->phys_addr,
406 			    size, &io_stat, data_p);
407 		} else
408 			rval = H_EIO;
409 
410 		DBG(DBG_TOOLS, dip, "iomem:phys_addr:0x%" PRIx64 ", "
411 		    "size:0x%" PRIx64 ", hdl:0x%" PRIx64 ", "
412 		    "rval:%d, io_stat:%d\n", prg_p->phys_addr,
413 		    size, px_p->px_dev_hdl, rval, io_stat);
414 		DBG(DBG_TOOLS, dip, "read data:0x%" PRIx64 "\n", *data_p);
415 
416 		if (PCITOOL_ACC_IS_BIG_ENDIAN(prg_p->acc_attr))
417 			*data_p = pxtool_swap_endian(*data_p, size);
418 	}
419 
420 	/*
421 	 * Workaround: delay taking down safe access env.
422 	 * For more info, see comment where pxtool_iomem_delay_usec is declared.
423 	 */
424 	if (pxtool_iomem_delay_usec > 0)
425 		delay(drv_usectohz(pxtool_iomem_delay_usec));
426 
427 	no_trap();
428 	pec_p->pec_ontrap_data = NULL;
429 	pec_p->pec_safeacc_type = DDI_FM_ERR_UNEXPECTED;
430 	mutex_exit(&pec_p->pec_pokefault_mutex);
431 
432 	if (rval != SUCCESS) {
433 		prg_p->status = PCITOOL_INVALID_ADDRESS;
434 		rval = EINVAL;
435 	} else if (io_stat != SUCCESS) {
436 		prg_p->status = PCITOOL_IO_ERROR;
437 		rval = EIO;
438 	} else
439 		prg_p->status = PCITOOL_SUCCESS;
440 
441 	return (rval);
442 }
443 
444 
445 /*ARGSUSED*/
446 int
447 pxtool_dev_reg_ops_platchk(dev_info_t *dip, pcitool_reg_t *prg_p)
448 {
449 	return (SUCCESS);
450 }
451 
452 
453 /*
454  * Perform register accesses on the nexus device itself.
455  */
456 int
457 pxtool_bus_reg_ops(dev_info_t *dip, void *arg, int cmd, int mode)
458 {
459 
460 	pcitool_reg_t		prg;
461 	size_t			size;
462 	px_t			*px_p = DIP_TO_STATE(dip);
463 	boolean_t		is_write = B_FALSE;
464 	uint32_t		rval = 0;
465 
466 	if (cmd == PCITOOL_NEXUS_SET_REG)
467 		is_write = B_TRUE;
468 
469 	DBG(DBG_TOOLS, dip, "pxtool_bus_reg_ops set/get reg\n");
470 
471 	/* Read data from userland. */
472 	if (ddi_copyin(arg, &prg, sizeof (pcitool_reg_t),
473 	    mode) != DDI_SUCCESS) {
474 		DBG(DBG_TOOLS, dip, "Error reading arguments\n");
475 		return (EFAULT);
476 	}
477 
478 	size = PCITOOL_ACC_ATTR_SIZE(prg.acc_attr);
479 
480 	DBG(DBG_TOOLS, dip, "raw bus:0x%x, dev:0x%x, func:0x%x\n",
481 	    prg.bus_no, prg.dev_no, prg.func_no);
482 	DBG(DBG_TOOLS, dip, "barnum:0x%x, offset:0x%" PRIx64 ", acc:0x%x\n",
483 	    prg.barnum, prg.offset, prg.acc_attr);
484 	DBG(DBG_TOOLS, dip, "data:0x%" PRIx64 ", phys_addr:0x%" PRIx64 "\n",
485 	    prg.data, prg.phys_addr);
486 
487 	/*
488 	 * If bank num == ff, base phys addr passed in from userland.
489 	 *
490 	 * Normal bank specification is invalid, as there is no OBP property to
491 	 * back it up.
492 	 */
493 	if (prg.barnum != PCITOOL_BASE) {
494 		prg.status = PCITOOL_OUT_OF_RANGE;
495 		rval = EINVAL;
496 		goto done;
497 	}
498 
499 	/* Allow only size of 8-bytes. */
500 	if (size != sizeof (uint64_t)) {
501 		prg.status = PCITOOL_INVALID_SIZE;
502 		rval = EINVAL;
503 		goto done;
504 	}
505 
506 	/* Alignment checking. */
507 	if (!IS_P2ALIGNED(prg.offset, size)) {
508 		DBG(DBG_TOOLS, dip, "not aligned.\n");
509 		prg.status = PCITOOL_NOT_ALIGNED;
510 		rval = EINVAL;
511 		goto done;
512 	}
513 
514 	prg.phys_addr += prg.offset;
515 
516 	/*
517 	 * Only the hypervisor can access nexus registers.  As a result, there
518 	 * can be no error recovery in the OS.  If there is an error, the
519 	 * system will go down, but with a trap type 7f.  The OS cannot
520 	 * intervene with this kind of trap.
521 	 */
522 
523 	/* Access device.  prg.status is modified. */
524 	rval = pxtool_phys_access(px_p, prg.phys_addr, &prg.data,
525 	    PCITOOL_ACC_IS_BIG_ENDIAN(prg.acc_attr), is_write);
526 done:
527 	prg.drvr_version = PCITOOL_VERSION;
528 	if (ddi_copyout(&prg, arg, sizeof (pcitool_reg_t),
529 	    mode) != DDI_SUCCESS) {
530 		DBG(DBG_TOOLS, dip, "Copyout failed.\n");
531 		return (EFAULT);
532 	}
533 
534 	return (rval);
535 }
536