xref: /titanic_41/usr/src/uts/sparc/v9/os/simulator.c (revision 8461248208fabd3a8230615f8615e5bf1b4dcdcb)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /* common code with bug fixes from original version in trap.c */
30 
31 #include <sys/param.h>
32 #include <sys/types.h>
33 #include <sys/systm.h>
34 #include <sys/archsystm.h>
35 #include <sys/vmsystm.h>
36 #include <sys/fpu/fpusystm.h>
37 #include <sys/fpu/fpu_simulator.h>
38 #include <sys/inline.h>
39 #include <sys/debug.h>
40 #include <sys/privregs.h>
41 #include <sys/machpcb.h>
42 #include <sys/simulate.h>
43 #include <sys/proc.h>
44 #include <sys/cmn_err.h>
45 #include <sys/stack.h>
46 #include <sys/watchpoint.h>
47 #include <sys/trap.h>
48 #include <sys/machtrap.h>
49 #include <sys/mman.h>
50 #include <sys/asi.h>
51 #include <sys/copyops.h>
52 #include <vm/as.h>
53 #include <vm/page.h>
54 #include <sys/model.h>
55 #include <vm/seg_vn.h>
56 
57 #define	IS_IBIT_SET(x)	(x & 0x2000)
58 #define	IS_VIS1(op, op3)(op == 2 && op3 == 0x36)
59 #define	IS_PARTIAL_OR_SHORT_FLOAT_LD_ST(op, op3, asi)		\
60 		(op == 3 && (op3 == IOP_V8_LDDFA ||		\
61 		op3 == IOP_V8_STDFA) &&	asi > ASI_SNFL)
62 
63 static int aligndebug = 0;
64 
65 /*
66  * For the sake of those who must be compatible with unaligned
67  * architectures, users can link their programs to use a
68  * corrective trap handler that will fix unaligned references
69  * a special trap #6 (T_FIX_ALIGN) enables this 'feature'.
70  * Returns 1 for success, 0 for failure.
71  */
72 
73 int
74 do_unaligned(struct regs *rp, caddr_t *badaddr)
75 {
76 	uint_t	inst, op3, asi = 0;
77 	uint_t	rd, rs1, rs2;
78 	int	sz, nf = 0, ltlend = 0;
79 	int	floatflg;
80 	int	fsrflg;
81 	int	immflg;
82 	int	lddstdflg;
83 	caddr_t	addr;
84 	uint64_t val;
85 	union {
86 		uint64_t	l[2];
87 		uint32_t	i[4];
88 		uint16_t	s[8];
89 		uint8_t		c[16];
90 	} data;
91 
92 	ASSERT(USERMODE(rp->r_tstate));
93 	inst = fetch_user_instr((caddr_t)rp->r_pc);
94 
95 	op3 = (inst >> 19) & 0x3f;
96 	rd = (inst >> 25) & 0x1f;
97 	rs1 = (inst >> 14) & 0x1f;
98 	rs2 = inst & 0x1f;
99 	floatflg = (inst >> 24) & 1;
100 	immflg = (inst >> 13) & 1;
101 	lddstdflg = fsrflg = 0;
102 
103 	/* if not load or store do nothing */
104 	if ((inst >> 30) != 3)
105 		return (0);
106 
107 	/* if ldstub or swap, do nothing */
108 	if ((inst & 0xc1680000) == 0xc0680000)
109 		return (0);
110 
111 	/* if cas/casx, do nothing */
112 	if ((inst & 0xc1e00000) == 0xc1e00000)
113 		return (0);
114 
115 	if (floatflg) {
116 		switch ((inst >> 19) & 3) {	/* map size bits to a number */
117 		case 0: sz = 4;
118 			break;			/* ldf{a}/stf{a} */
119 		case 1: fsrflg = 1;
120 			if (rd == 0)
121 				sz = 4;		/* ldfsr/stfsr */
122 			else  if (rd == 1)
123 				sz = 8;		/* ldxfsr/stxfsr */
124 			else
125 				return (SIMU_ILLEGAL);
126 			break;
127 		case 2: sz = 16;
128 			break;		/* ldqf{a}/stqf{a} */
129 		case 3: sz = 8;
130 			break;		/* lddf{a}/stdf{a} */
131 		}
132 		/*
133 		 * Fix to access extra double register encoding plus
134 		 * compensate to access the correct fpu_dreg.
135 		 */
136 		if ((sz > 4) && (fsrflg == 0)) {
137 			if ((rd & 1) == 1)
138 				rd = (rd & 0x1e) | 0x20;
139 			rd = rd >> 1;
140 			if ((sz == 16) && ((rd & 0x1) != 0))
141 				return (SIMU_ILLEGAL);
142 		}
143 	} else {
144 		int sz_bits = (inst >> 19) & 0xf;
145 		switch (sz_bits) {		/* map size bits to a number */
146 		case 0:				/* lduw{a} */
147 		case 4:				/* stw{a} */
148 		case 8:				/* ldsw{a} */
149 		case 0xf:			/* swap */
150 			sz = 4; break;
151 		case 1:				/* ldub{a} */
152 		case 5:				/* stb{a} */
153 		case 9:				/* ldsb{a} */
154 		case 0xd:			/* ldstub */
155 			sz = 1; break;
156 		case 2:				/* lduh{a} */
157 		case 6:				/* sth{a} */
158 		case 0xa:			/* ldsh{a} */
159 			sz = 2; break;
160 		case 3:				/* ldd{a} */
161 		case 7:				/* std{a} */
162 			lddstdflg = 1;
163 			sz = 8; break;
164 		case 0xb:			/* ldx{a} */
165 		case 0xe:			/* stx{a} */
166 			sz = 8; break;
167 		}
168 	}
169 
170 
171 	/* only support primary and secondary asi's */
172 	if ((op3 >> 4) & 1) {
173 		if (immflg) {
174 			asi = (uint_t)(rp->r_tstate >> TSTATE_ASI_SHIFT) &
175 					TSTATE_ASI_MASK;
176 		} else {
177 			asi = (inst >> 5) & 0xff;
178 		}
179 		switch (asi) {
180 		case ASI_P:
181 		case ASI_S:
182 			break;
183 		case ASI_PNF:
184 		case ASI_SNF:
185 			nf = 1;
186 			break;
187 		case ASI_PL:
188 		case ASI_SL:
189 			ltlend = 1;
190 			break;
191 		case ASI_PNFL:
192 		case ASI_SNFL:
193 			ltlend = 1;
194 			nf = 1;
195 			break;
196 		default:
197 			return (0);
198 		}
199 		/*
200 		 * Non-faulting stores generate a data_access_exception trap,
201 		 * according to the Spitfire manual, which should be signaled
202 		 * as an illegal instruction trap, because it can't be fixed.
203 		 */
204 		if ((nf) && ((op3 == IOP_V8_STQFA) || (op3 == IOP_V8_STDFA)))
205 			return (SIMU_ILLEGAL);
206 	}
207 
208 	if (aligndebug) {
209 		printf("unaligned access at %p, instruction: 0x%x\n",
210 		    (void *)rp->r_pc, inst);
211 		printf("type %s", (((inst >> 21) & 1) ? "st" : "ld"));
212 		if (((inst >> 21) & 1) == 0)
213 		    printf(" %s", (((inst >> 22) & 1) ? "signed" : "unsigned"));
214 		printf(" asi 0x%x size %d immflg %d\n", asi, sz, immflg);
215 		printf("rd = %d, op3 = 0x%x, rs1 = %d, rs2 = %d, imm13=0x%x\n",
216 			rd, op3, rs1, rs2, (inst & 0x1fff));
217 	}
218 
219 	(void) flush_user_windows_to_stack(NULL);
220 	if (getreg(rp, rs1, &val, badaddr))
221 		return (SIMU_FAULT);
222 	addr = (caddr_t)val;		/* convert to 32/64 bit address */
223 	if (aligndebug)
224 		printf("addr 1 = %p\n", (void *)addr);
225 
226 	/* check immediate bit and use immediate field or reg (rs2) */
227 	if (immflg) {
228 		int imm;
229 		imm  = inst & 0x1fff;		/* mask out immediate field */
230 		imm <<= 19;			/* sign extend it */
231 		imm >>= 19;
232 		addr += imm;			/* compute address */
233 	} else {
234 		if (getreg(rp, rs2, &val, badaddr))
235 			return (SIMU_FAULT);
236 		addr += val;
237 	}
238 
239 	/*
240 	 * If this is a 32-bit program, chop the address accordingly.
241 	 */
242 	if (curproc->p_model == DATAMODEL_ILP32)
243 		addr = (caddr_t)(caddr32_t)addr;
244 
245 	if (aligndebug)
246 		printf("addr 2 = %p\n", (void *)addr);
247 
248 	if (addr >= curproc->p_as->a_userlimit) {
249 		*badaddr = addr;
250 		goto badret;
251 	}
252 
253 	/* a single bit differentiates ld and st */
254 	if ((inst >> 21) & 1) {			/* store */
255 		if (floatflg) {
256 			klwp_id_t lwp = ttolwp(curthread);
257 			kfpu_t *fp = lwptofpu(lwp);
258 			/* Ensure fp has been enabled */
259 			if (fpu_exists) {
260 				if (!(_fp_read_fprs() & FPRS_FEF))
261 					fp_enable();
262 			} else {
263 				if (!fp->fpu_en)
264 					fp_enable();
265 			}
266 			/* if fpu_exists read fpu reg */
267 			if (fpu_exists) {
268 				if (fsrflg) {
269 					_fp_read_pfsr(&data.l[0]);
270 				} else {
271 					if (sz == 4) {
272 						data.i[0] = 0;
273 						_fp_read_pfreg(
274 						    (unsigned *)&data.i[1], rd);
275 					}
276 					if (sz >= 8)
277 						_fp_read_pdreg(
278 							&data.l[0], rd);
279 					if (sz == 16)
280 						_fp_read_pdreg(
281 							&data.l[1], rd+1);
282 				}
283 			} else {
284 				if (fsrflg) {
285 					/* Clear reserved bits, set version=7 */
286 					fp->fpu_fsr &= ~0x30301000;
287 					fp->fpu_fsr |= 0xE0000;
288 					data.l[0] = fp->fpu_fsr;
289 				} else {
290 					if (sz == 4) {
291 						data.i[0] = 0;
292 						data.i[1] =
293 					    (unsigned)fp->fpu_fr.fpu_regs[rd];
294 					}
295 					if (sz >= 8)
296 						data.l[0] =
297 						    fp->fpu_fr.fpu_dregs[rd];
298 					if (sz == 16)
299 						data.l[1] =
300 						    fp->fpu_fr.fpu_dregs[rd+1];
301 				}
302 			}
303 		} else {
304 			if (lddstdflg) {
305 				if (getreg(rp, rd, &data.l[0], badaddr))
306 					return (SIMU_FAULT);
307 				if (getreg(rp, rd+1, &data.l[1], badaddr))
308 					return (SIMU_FAULT);
309 				data.i[0] = data.i[1];	/* combine the data */
310 				data.i[1] = data.i[3];
311 			} else {
312 				if (getreg(rp, rd, &data.l[0], badaddr))
313 					return (SIMU_FAULT);
314 			}
315 		}
316 
317 		if (aligndebug) {
318 			if (sz == 16) {
319 				printf("data %x %x %x %x\n",
320 				    data.i[0], data.i[1], data.i[2], data.c[3]);
321 			} else {
322 				printf("data %x %x %x %x %x %x %x %x\n",
323 				    data.c[0], data.c[1], data.c[2], data.c[3],
324 				    data.c[4], data.c[5], data.c[6], data.c[7]);
325 			}
326 		}
327 
328 		if (ltlend) {
329 			if (sz == 1) {
330 				if (xcopyout_little(&data.c[7], addr,
331 				    (size_t)sz) != 0)
332 					goto badret;
333 			} else if (sz == 2) {
334 				if (xcopyout_little(&data.s[3], addr,
335 				    (size_t)sz) != 0)
336 					goto badret;
337 			} else if (sz == 4) {
338 				if (xcopyout_little(&data.i[1], addr,
339 				    (size_t)sz) != 0)
340 					goto badret;
341 			} else {
342 				if (xcopyout_little(&data.l[0], addr,
343 				    (size_t)sz) != 0)
344 					goto badret;
345 			}
346 		} else {
347 			if (sz == 1) {
348 				if (copyout(&data.c[7], addr, (size_t)sz) == -1)
349 					goto badret;
350 			} else if (sz == 2) {
351 				if (copyout(&data.s[3], addr, (size_t)sz) == -1)
352 					goto badret;
353 			} else if (sz == 4) {
354 				if (copyout(&data.i[1], addr, (size_t)sz) == -1)
355 					goto badret;
356 			} else {
357 				if (copyout(&data.l[0], addr, (size_t)sz) == -1)
358 					goto badret;
359 			}
360 		}
361 	} else {				/* load */
362 		if (sz == 1) {
363 			if (ltlend) {
364 				if (xcopyin_little(addr, &data.c[7],
365 				    (size_t)sz) != 0) {
366 					if (nf)
367 						data.c[7] = 0;
368 					else
369 						goto badret;
370 				}
371 			} else {
372 				if (copyin(addr, &data.c[7],
373 				    (size_t)sz) == -1) {
374 					if (nf)
375 						data.c[7] = 0;
376 					else
377 						goto badret;
378 				}
379 			}
380 			/* if signed and the sign bit is set extend it */
381 			if (((inst >> 22) & 1) && ((data.c[7] >> 7) & 1)) {
382 				data.i[0] = (uint_t)-1;	/* extend sign bit */
383 				data.s[2] = (ushort_t)-1;
384 				data.c[6] = (uchar_t)-1;
385 			} else {
386 				data.i[0] = 0;	/* clear upper 32+24 bits */
387 				data.s[2] = 0;
388 				data.c[6] = 0;
389 			}
390 		} else if (sz == 2) {
391 			if (ltlend) {
392 				if (xcopyin_little(addr, &data.s[3],
393 				    (size_t)sz) != 0) {
394 					if (nf)
395 						data.s[3] = 0;
396 					else
397 						goto badret;
398 				}
399 			} else {
400 				if (copyin(addr, &data.s[3],
401 				    (size_t)sz) == -1) {
402 					if (nf)
403 						data.s[3] = 0;
404 					else
405 						goto badret;
406 				}
407 			}
408 			/* if signed and the sign bit is set extend it */
409 			if (((inst >> 22) & 1) && ((data.s[3] >> 15) & 1)) {
410 				data.i[0] = (uint_t)-1;	/* extend sign bit */
411 				data.s[2] = (ushort_t)-1;
412 			} else {
413 				data.i[0] = 0;	/* clear upper 32+16 bits */
414 				data.s[2] = 0;
415 			}
416 		} else if (sz == 4) {
417 			if (ltlend) {
418 				if (xcopyin_little(addr, &data.i[1],
419 				    (size_t)sz) != 0) {
420 					if (!nf)
421 						goto badret;
422 					data.i[1] = 0;
423 				}
424 			} else {
425 				if (copyin(addr, &data.i[1],
426 				    (size_t)sz) == -1) {
427 					if (!nf)
428 						goto badret;
429 					data.i[1] = 0;
430 				}
431 			}
432 			/* if signed and the sign bit is set extend it */
433 			if (((inst >> 22) & 1) && ((data.i[1] >> 31) & 1)) {
434 				data.i[0] = (uint_t)-1;	/* extend sign bit */
435 			} else {
436 				data.i[0] = 0;	/* clear upper 32 bits */
437 			}
438 		} else {
439 			if (ltlend) {
440 				if (xcopyin_little(addr, &data.l[0],
441 				    (size_t)sz) != 0) {
442 					if (!nf)
443 						goto badret;
444 					data.l[0] = 0;
445 				}
446 			} else {
447 				if (copyin(addr, &data.l[0],
448 				    (size_t)sz) == -1) {
449 					if (!nf)
450 						goto badret;
451 					data.l[0] = 0;
452 				}
453 			}
454 		}
455 
456 		if (aligndebug) {
457 			if (sz == 16) {
458 				printf("data %x %x %x %x\n",
459 				    data.i[0], data.i[1], data.i[2], data.c[3]);
460 			} else {
461 				printf("data %x %x %x %x %x %x %x %x\n",
462 				    data.c[0], data.c[1], data.c[2], data.c[3],
463 				    data.c[4], data.c[5], data.c[6], data.c[7]);
464 			}
465 		}
466 
467 		if (floatflg) {		/* if fpu_exists write fpu reg */
468 			klwp_id_t lwp = ttolwp(curthread);
469 			kfpu_t *fp = lwptofpu(lwp);
470 			/* Ensure fp has been enabled */
471 			if (fpu_exists) {
472 				if (!(_fp_read_fprs() & FPRS_FEF))
473 					fp_enable();
474 			} else {
475 				if (!fp->fpu_en)
476 					fp_enable();
477 			}
478 			/* if fpu_exists read fpu reg */
479 			if (fpu_exists) {
480 				if (fsrflg) {
481 					_fp_write_pfsr(&data.l[0]);
482 				} else {
483 					if (sz == 4)
484 						_fp_write_pfreg(
485 						    (unsigned *)&data.i[1], rd);
486 					if (sz >= 8)
487 						_fp_write_pdreg(
488 							&data.l[0], rd);
489 					if (sz == 16)
490 						_fp_write_pdreg(
491 							&data.l[1], rd+1);
492 				}
493 			} else {
494 				if (fsrflg) {
495 					fp->fpu_fsr = data.l[0];
496 				} else {
497 					if (sz == 4)
498 						fp->fpu_fr.fpu_regs[rd] =
499 							(unsigned)data.i[1];
500 					if (sz >= 8)
501 						fp->fpu_fr.fpu_dregs[rd] =
502 							data.l[0];
503 					if (sz == 16)
504 						fp->fpu_fr.fpu_dregs[rd+1] =
505 							data.l[1];
506 				}
507 			}
508 		} else {
509 			if (lddstdflg) {		/* split the data */
510 				data.i[2] = 0;
511 				data.i[3] = data.i[1];
512 				data.i[1] = data.i[0];
513 				data.i[0] = 0;
514 				if (putreg(&data.l[0], rp, rd, badaddr) == -1)
515 					goto badret;
516 				if (putreg(&data.l[1], rp, rd+1, badaddr) == -1)
517 					goto badret;
518 			} else {
519 				if (putreg(&data.l[0], rp, rd, badaddr) == -1)
520 					goto badret;
521 			}
522 		}
523 	}
524 	return (SIMU_SUCCESS);
525 badret:
526 	return (SIMU_FAULT);
527 }
528 
529 /*
530  * simulate popc
531  */
532 static int
533 simulate_popc(struct regs *rp, caddr_t *badaddr, uint_t inst)
534 {
535 	uint_t	rd, rs2, rs1;
536 	uint_t	immflg;
537 	uint64_t val, cnt = 0;
538 
539 	rd = (inst >> 25) & 0x1f;
540 	rs1 = (inst >> 14) & 0x1f;
541 	rs2 = inst & 0x1f;
542 	immflg = (inst >> 13) & 1;
543 
544 	if (rs1 > 0)
545 		return (SIMU_ILLEGAL);
546 
547 	(void) flush_user_windows_to_stack(NULL);
548 
549 	/* check immediate bit and use immediate field or reg (rs2) */
550 	if (immflg) {
551 		int64_t imm;
552 		imm  = inst & 0x1fff;		/* mask out immediate field */
553 		imm <<= 51;			/* sign extend it */
554 		imm >>= 51;
555 		if (imm != 0) {
556 			for (cnt = 0; imm != 0; imm &= imm-1)
557 				cnt++;
558 		}
559 	} else {
560 		if (getreg(rp, rs2, &val, badaddr))
561 			return (SIMU_FAULT);
562 		if (val != 0) {
563 			for (cnt = 0; val != 0; val &= val-1)
564 				cnt++;
565 		}
566 	}
567 
568 	if (putreg(&cnt, rp, rd, badaddr) == -1)
569 		return (SIMU_FAULT);
570 
571 	return (SIMU_SUCCESS);
572 }
573 
574 /*
575  * simulate unimplemented instructions (popc, ldqf{a}, stqf{a})
576  */
577 int
578 simulate_unimp(struct regs *rp, caddr_t *badaddr)
579 {
580 	uint_t	inst, optype, op3, asi;
581 	uint_t	rs1, rd;
582 	uint_t	ignor, i;
583 	machpcb_t *mpcb = lwptompcb(ttolwp(curthread));
584 	int	nomatch = 0;
585 	caddr_t	addr = (caddr_t)rp->r_pc;
586 	struct as *as;
587 	caddr_t	ka;
588 	pfn_t	pfnum;
589 	page_t *pp;
590 	proc_t *p = ttoproc(curthread);
591 	struct seg *mapseg;
592 	struct segvn_data *svd;
593 
594 	ASSERT(USERMODE(rp->r_tstate));
595 	inst = fetch_user_instr(addr);
596 	if (inst == (uint_t)-1) {
597 		mpcb->mpcb_illexcaddr = addr;
598 		mpcb->mpcb_illexcinsn = (uint32_t)-1;
599 		return (SIMU_ILLEGAL);
600 	}
601 
602 	/*
603 	 * When fixing dirty v8 instructions there's a race if two processors
604 	 * are executing the dirty executable at the same time.  If one
605 	 * cleans the instruction as the other is executing it the second
606 	 * processor will see a clean instruction when it comes through this
607 	 * code and will return SIMU_ILLEGAL.  To work around the race
608 	 * this code will keep track of the last illegal instruction seen
609 	 * by each lwp and will only take action if the illegal instruction
610 	 * is repeatable.
611 	 */
612 	if (addr != mpcb->mpcb_illexcaddr ||
613 	    inst != mpcb->mpcb_illexcinsn)
614 		nomatch = 1;
615 	mpcb->mpcb_illexcaddr = addr;
616 	mpcb->mpcb_illexcinsn = inst;
617 
618 	/* instruction fields */
619 	i = (inst >> 13) & 0x1;
620 	rd = (inst >> 25) & 0x1f;
621 	optype = (inst >> 30) & 0x3;
622 	op3 = (inst >> 19) & 0x3f;
623 	ignor = (inst >> 5) & 0xff;
624 	if (IS_IBIT_SET(inst)) {
625 		asi = (uint32_t)((rp->r_tstate >> TSTATE_ASI_SHIFT) &
626 		    TSTATE_ASI_MASK);
627 	} else {
628 		asi = ignor;
629 	}
630 
631 	if (IS_VIS1(optype, op3) ||
632 	    IS_PARTIAL_OR_SHORT_FLOAT_LD_ST(optype, op3, asi)) {
633 		klwp_t *lwp = ttolwp(curthread);
634 		kfpu_t *fp = lwptofpu(lwp);
635 		if (fpu_exists) {
636 			if (!(_fp_read_fprs() & FPRS_FEF))
637 				fp_enable();
638 			_fp_read_pfsr(&fp->fpu_fsr);
639 		} else {
640 			if (!fp->fpu_en)
641 				fp_enable();
642 		}
643 		fp_precise(rp);
644 		return (SIMU_RETRY);
645 	}
646 
647 	if (optype == 2 && op3 == IOP_V8_POPC) {
648 		return (simulate_popc(rp, badaddr, inst));
649 	} else if (optype == 3 && op3 == IOP_V8_POPC) {
650 		return (SIMU_ILLEGAL);
651 	}
652 
653 	if (optype == OP_V8_LDSTR) {
654 		if (op3 == IOP_V8_LDQF || op3 == IOP_V8_LDQFA ||
655 		    op3 == IOP_V8_STQF || op3 == IOP_V8_STQFA)
656 			return (do_unaligned(rp, badaddr));
657 	}
658 
659 	if (nomatch)
660 		return (SIMU_RETRY);
661 
662 	/*
663 	 * The rest of the code handles v8 binaries with instructions
664 	 * that have dirty (non-zero) bits in reserved or 'ignored'
665 	 * fields; these will cause core dumps on v9 machines.
666 	 *
667 	 * We only clean dirty instructions in 32-bit programs (ie, v8)
668 	 * running on SPARCv9 processors.  True v9 programs are forced
669 	 * to use the instruction set as intended.
670 	 */
671 	if (lwp_getdatamodel(curthread->t_lwp) != DATAMODEL_ILP32)
672 		return (SIMU_ILLEGAL);
673 	switch (optype) {
674 	case OP_V8_BRANCH:
675 	case OP_V8_CALL:
676 		return (SIMU_ILLEGAL);	/* these don't have ignored fields */
677 		/*NOTREACHED*/
678 	case OP_V8_ARITH:
679 		switch (op3) {
680 		case IOP_V8_RETT:
681 			if (rd == 0 && !(i == 0 && ignor))
682 				return (SIMU_ILLEGAL);
683 			if (rd)
684 				inst &= ~(0x1f << 25);
685 			if (i == 0 && ignor)
686 				inst &= ~(0xff << 5);
687 			break;
688 		case IOP_V8_TCC:
689 			if (i == 0 && ignor != 0) {
690 				inst &= ~(0xff << 5);
691 			} else if (i == 1 && (((inst >> 7) & 0x3f) != 0)) {
692 				inst &= ~(0x3f << 7);
693 			} else {
694 				return (SIMU_ILLEGAL);
695 			}
696 			break;
697 		case IOP_V8_JMPL:
698 		case IOP_V8_RESTORE:
699 		case IOP_V8_SAVE:
700 			if ((op3 == IOP_V8_RETT && rd) ||
701 			    (i == 0 && ignor)) {
702 				inst &= ~(0xff << 5);
703 			} else {
704 				return (SIMU_ILLEGAL);
705 			}
706 			break;
707 		case IOP_V8_FCMP:
708 			if (rd == 0)
709 				return (SIMU_ILLEGAL);
710 			inst &= ~(0x1f << 25);
711 			break;
712 		case IOP_V8_RDASR:
713 			rs1 = ((inst >> 14) & 0x1f);
714 			if (rs1 == 1 || (rs1 >= 7 && rs1 <= 14)) {
715 				/*
716 				 * The instruction specifies an invalid
717 				 * state register - better bail out than
718 				 * "fix" it when we're not sure what was
719 				 * intended.
720 				 */
721 				return (SIMU_ILLEGAL);
722 			}
723 				/*
724 				 * Note: this case includes the 'stbar'
725 				 * instruction (rs1 == 15 && i == 0).
726 				 */
727 				if ((ignor = (inst & 0x3fff)) != 0)
728 					inst &= ~(0x3fff);
729 			break;
730 		case IOP_V8_SRA:
731 		case IOP_V8_SRL:
732 		case IOP_V8_SLL:
733 			if (ignor == 0)
734 				return (SIMU_ILLEGAL);
735 			inst &= ~(0xff << 5);
736 			break;
737 		case IOP_V8_ADD:
738 		case IOP_V8_AND:
739 		case IOP_V8_OR:
740 		case IOP_V8_XOR:
741 		case IOP_V8_SUB:
742 		case IOP_V8_ANDN:
743 		case IOP_V8_ORN:
744 		case IOP_V8_XNOR:
745 		case IOP_V8_ADDC:
746 		case IOP_V8_UMUL:
747 		case IOP_V8_SMUL:
748 		case IOP_V8_SUBC:
749 		case IOP_V8_UDIV:
750 		case IOP_V8_SDIV:
751 		case IOP_V8_ADDcc:
752 		case IOP_V8_ANDcc:
753 		case IOP_V8_ORcc:
754 		case IOP_V8_XORcc:
755 		case IOP_V8_SUBcc:
756 		case IOP_V8_ANDNcc:
757 		case IOP_V8_ORNcc:
758 		case IOP_V8_XNORcc:
759 		case IOP_V8_ADDCcc:
760 		case IOP_V8_UMULcc:
761 		case IOP_V8_SMULcc:
762 		case IOP_V8_SUBCcc:
763 		case IOP_V8_UDIVcc:
764 		case IOP_V8_SDIVcc:
765 		case IOP_V8_TADDcc:
766 		case IOP_V8_TSUBcc:
767 		case IOP_V8_TADDccTV:
768 		case IOP_V8_TSUBccTV:
769 		case IOP_V8_MULScc:
770 		case IOP_V8_WRASR:
771 		case IOP_V8_FLUSH:
772 			if (i != 0 || ignor == 0)
773 				return (SIMU_ILLEGAL);
774 			inst &= ~(0xff << 5);
775 			break;
776 		default:
777 			return (SIMU_ILLEGAL);
778 		}
779 		break;
780 	case OP_V8_LDSTR:
781 		switch (op3) {
782 		case IOP_V8_STFSR:
783 		case IOP_V8_LDFSR:
784 			if (rd == 0 && !(i == 0 && ignor))
785 				return (SIMU_ILLEGAL);
786 			if (rd)
787 				inst &= ~(0x1f << 25);
788 			if (i == 0 && ignor)
789 				inst &= ~(0xff << 5);
790 			break;
791 		default:
792 			if (optype == OP_V8_LDSTR && !IS_LDST_ALT(op3) &&
793 			    i == 0 && ignor)
794 				inst &= ~(0xff << 5);
795 			else
796 				return (SIMU_ILLEGAL);
797 			break;
798 		}
799 		break;
800 	default:
801 		return (SIMU_ILLEGAL);
802 	}
803 
804 	as = p->p_as;
805 
806 	AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
807 	mapseg = as_findseg(as, (caddr_t)rp->r_pc, 0);
808 	ASSERT(mapseg != NULL);
809 	svd = (struct segvn_data *)mapseg->s_data;
810 
811 	/*
812 	 * We only create COW page for MAP_PRIVATE mappings.
813 	 */
814 	SEGVN_LOCK_ENTER(as, &svd->lock, RW_READER);
815 	if ((svd->type & MAP_TYPE) & MAP_SHARED) {
816 		SEGVN_LOCK_EXIT(as, &svd->lock);
817 		AS_LOCK_EXIT(as, &as->a_lock);
818 		return (SIMU_ILLEGAL);
819 	}
820 	SEGVN_LOCK_EXIT(as, &svd->lock);
821 	AS_LOCK_EXIT(as, &as->a_lock);
822 
823 	/*
824 	 * A "flush" instruction using the user PC's vaddr will not work
825 	 * here, at least on Spitfire. Instead we create a temporary kernel
826 	 * mapping to the user's text page, then modify and flush that.
827 	 * Break COW by locking user page.
828 	 */
829 	if (as_fault(as->a_hat, as, (caddr_t)(rp->r_pc & PAGEMASK), PAGESIZE,
830 	    F_SOFTLOCK, S_READ))
831 		return (SIMU_FAULT);
832 
833 	AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
834 	pfnum = hat_getpfnum(as->a_hat, (caddr_t)rp->r_pc);
835 	AS_LOCK_EXIT(as, &as->a_lock);
836 	if (pf_is_memory(pfnum)) {
837 		pp = page_numtopp_nolock(pfnum);
838 		ASSERT(pp == NULL || PAGE_LOCKED(pp));
839 	} else {
840 		(void) as_fault(as->a_hat, as, (caddr_t)(rp->r_pc & PAGEMASK),
841 		    PAGESIZE, F_SOFTUNLOCK, S_READ);
842 		return (SIMU_FAULT);
843 	}
844 
845 	AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
846 	ka = ppmapin(pp, PROT_READ|PROT_WRITE, (caddr_t)rp->r_pc);
847 	*(uint_t *)(ka + (uintptr_t)(rp->r_pc % PAGESIZE)) = inst;
848 	doflush(ka + (uintptr_t)(rp->r_pc % PAGESIZE));
849 	ppmapout(ka);
850 	AS_LOCK_EXIT(as, &as->a_lock);
851 
852 	(void) as_fault(as->a_hat, as, (caddr_t)(rp->r_pc & PAGEMASK),
853 	    PAGESIZE, F_SOFTUNLOCK, S_READ);
854 	return (SIMU_RETRY);
855 }
856 
857 /*
858  * Get the value of a register for instruction simulation
859  * by using the regs or window structure pointers.
860  * Return 0 for success, and -1 for failure.  If there is a failure,
861  * save the faulting address using badaddr pointer.
862  * We have 64 bit globals and outs, and 32 or 64 bit ins and locals.
863  * Don't truncate globals/outs for 32 bit programs, for v8+ support.
864  */
865 int
866 getreg(struct regs *rp, uint_t reg, uint64_t *val, caddr_t *badaddr)
867 {
868 	uint64_t *rgs, *sp;
869 	int rv = 0;
870 
871 	rgs = (uint64_t *)&rp->r_ps;		/* globals and outs */
872 	sp = (uint64_t *)rp->r_sp;		/* ins and locals */
873 	if (reg == 0) {
874 		*val = 0;
875 	} else if (reg < 16) {
876 		*val = rgs[reg];
877 	} else if (IS_V9STACK(sp)) {
878 		uint64_t *rw = (uint64_t *)((uintptr_t)sp + V9BIAS64);
879 		uint64_t *addr = (uint64_t *)&rw[reg - 16];
880 		uint64_t res;
881 
882 		if (USERMODE(rp->r_tstate)) {
883 			if (fuword64_nowatch(addr, &res) == -1) {
884 				*badaddr = (caddr_t)addr;
885 				rv = -1;
886 			}
887 		} else {
888 			res = *addr;
889 		}
890 		*val = res;
891 	} else {
892 		uint32_t *rw = (uint32_t *)(caddr32_t)sp;
893 		uint32_t *addr = (uint32_t *)&rw[reg - 16];
894 		uint32_t res;
895 
896 		if (USERMODE(rp->r_tstate)) {
897 			if (fuword32_nowatch(addr, &res) == -1) {
898 				*badaddr = (caddr_t)addr;
899 				rv = -1;
900 			}
901 		} else {
902 			res = *addr;
903 		}
904 		*val = (uint64_t)res;
905 	}
906 	return (rv);
907 }
908 
909 /*
910  * Set the value of a register after instruction simulation
911  * by using the regs or window structure pointers.
912  * Return 0 for succes -1 failure.
913  * save the faulting address using badaddr pointer.
914  * We have 64 bit globals and outs, and 32 or 64 bit ins and locals.
915  * Don't truncate globals/outs for 32 bit programs, for v8+ support.
916  */
917 int
918 putreg(uint64_t	*data, struct regs *rp, uint_t reg, caddr_t *badaddr)
919 {
920 	uint64_t *rgs, *sp;
921 	int rv = 0;
922 
923 	rgs = (uint64_t *)&rp->r_ps;		/* globals and outs */
924 	sp = (uint64_t *)rp->r_sp;		/* ins and locals */
925 	if (reg == 0) {
926 		return (0);
927 	} else if (reg < 16) {
928 		rgs[reg] = *data;
929 	} else if (IS_V9STACK(sp)) {
930 		uint64_t *rw = (uint64_t *)((uintptr_t)sp + V9BIAS64);
931 		uint64_t *addr = (uint64_t *)&rw[reg - 16];
932 		uint64_t res;
933 
934 		if (USERMODE(rp->r_tstate)) {
935 			struct machpcb *mpcb = lwptompcb(curthread->t_lwp);
936 
937 			res = *data;
938 			if (suword64_nowatch(addr, res) != 0) {
939 				*badaddr = (caddr_t)addr;
940 				rv = -1;
941 			}
942 			/*
943 			 * We have changed a local or in register;
944 			 * nuke the watchpoint return windows.
945 			 */
946 			mpcb->mpcb_rsp[0] = NULL;
947 			mpcb->mpcb_rsp[1] = NULL;
948 		} else {
949 			res = *data;
950 			*addr = res;
951 		}
952 	} else {
953 		uint32_t *rw = (uint32_t *)(caddr32_t)sp;
954 		uint32_t *addr = (uint32_t *)&rw[reg - 16];
955 		uint32_t res;
956 
957 		if (USERMODE(rp->r_tstate)) {
958 			struct machpcb *mpcb = lwptompcb(curthread->t_lwp);
959 
960 			res = (uint_t)*data;
961 			if (suword32_nowatch(addr, res) != 0) {
962 				*badaddr = (caddr_t)addr;
963 				rv = -1;
964 			}
965 			/*
966 			 * We have changed a local or in register;
967 			 * nuke the watchpoint return windows.
968 			 */
969 			mpcb->mpcb_rsp[0] = NULL;
970 			mpcb->mpcb_rsp[1] = NULL;
971 
972 		} else {
973 			res = (uint_t)*data;
974 			*addr = res;
975 		}
976 	}
977 	return (rv);
978 }
979 
980 /*
981  * Calculate a memory reference address from instruction
982  * operands, used to return the address of a fault, instead
983  * of the instruction when an error occurs.  This is code that is
984  * common with most of the routines that simulate instructions.
985  */
986 int
987 calc_memaddr(struct regs *rp, caddr_t *badaddr)
988 {
989 	uint_t	inst;
990 	uint_t	rd, rs1, rs2;
991 	int	sz;
992 	int	immflg;
993 	int	floatflg;
994 	caddr_t  addr;
995 	uint64_t val;
996 
997 	if (USERMODE(rp->r_tstate))
998 		inst = fetch_user_instr((caddr_t)rp->r_pc);
999 	else
1000 		inst = *(uint_t *)rp->r_pc;
1001 
1002 	rd = (inst >> 25) & 0x1f;
1003 	rs1 = (inst >> 14) & 0x1f;
1004 	rs2 = inst & 0x1f;
1005 	floatflg = (inst >> 24) & 1;
1006 	immflg = (inst >> 13) & 1;
1007 
1008 	if (floatflg) {
1009 		switch ((inst >> 19) & 3) {	/* map size bits to a number */
1010 		case 0: sz = 4; break;		/* ldf/stf */
1011 		case 1: return (0);		/* ld[x]fsr/st[x]fsr */
1012 		case 2: sz = 16; break;		/* ldqf/stqf */
1013 		case 3: sz = 8; break;		/* lddf/stdf */
1014 		}
1015 		/*
1016 		 * Fix to access extra double register encoding plus
1017 		 * compensate to access the correct fpu_dreg.
1018 		 */
1019 		if (sz > 4) {
1020 			if ((rd & 1) == 1)
1021 				rd = (rd & 0x1e) | 0x20;
1022 			rd = rd >> 1;
1023 		}
1024 	} else {
1025 		switch ((inst >> 19) & 0xf) {	/* map size bits to a number */
1026 		case 0:				/* lduw */
1027 		case 4:				/* stw */
1028 		case 8:				/* ldsw */
1029 		case 0xf:			/* swap */
1030 			sz = 4; break;
1031 		case 1:				/* ldub */
1032 		case 5:				/* stb */
1033 		case 9:				/* ldsb */
1034 		case 0xd:			/* ldstub */
1035 			sz = 1; break;
1036 		case 2:				/* lduh */
1037 		case 6:				/* sth */
1038 		case 0xa:			/* ldsh */
1039 			sz = 2; break;
1040 		case 3:				/* ldd */
1041 		case 7:				/* std */
1042 		case 0xb:			/* ldx */
1043 		case 0xe:			/* stx */
1044 			sz = 8; break;
1045 		}
1046 	}
1047 
1048 	if (USERMODE(rp->r_tstate))
1049 		(void) flush_user_windows_to_stack(NULL);
1050 	else
1051 		flush_windows();
1052 
1053 	if (getreg(rp, rs1, &val, badaddr))
1054 		return (SIMU_FAULT);
1055 	addr = (caddr_t)val;
1056 
1057 	/* check immediate bit and use immediate field or reg (rs2) */
1058 	if (immflg) {
1059 		int imm;
1060 		imm = inst & 0x1fff;		/* mask out immediate field */
1061 		imm <<= 19;			/* sign extend it */
1062 		imm >>= 19;
1063 		addr += imm;			/* compute address */
1064 	} else {
1065 		if (getreg(rp, rs2, &val, badaddr))
1066 			return (SIMU_FAULT);
1067 		addr += val;
1068 	}
1069 
1070 	/*
1071 	 * If this is a 32-bit program, chop the address accordingly.
1072 	 */
1073 	if (curproc->p_model == DATAMODEL_ILP32 &&
1074 	    USERMODE(rp->r_tstate))
1075 		addr = (caddr_t)(caddr32_t)addr;
1076 
1077 	*badaddr = addr;
1078 	return ((uintptr_t)addr & (sz - 1) ? SIMU_UNALIGN : SIMU_SUCCESS);
1079 }
1080 
1081 /*
1082  * Return the size of a load or store instruction (1, 2, 4, 8, 16, 64).
1083  * Also compute the precise address by instruction disassembly.
1084  * (v9 page faults only provide the page address via the hardware.)
1085  * Return 0 on failure (not a load or store instruction).
1086  */
1087 int
1088 instr_size(struct regs *rp, caddr_t *addrp, enum seg_rw rdwr)
1089 {
1090 	uint_t	inst, op3, asi;
1091 	uint_t	rd, rs1, rs2;
1092 	int	sz = 0;
1093 	int	immflg;
1094 	int	floatflg;
1095 	caddr_t	addr;
1096 	caddr_t badaddr;
1097 	uint64_t val;
1098 
1099 	if (rdwr == S_EXEC) {
1100 		*addrp = (caddr_t)rp->r_pc;
1101 		return (4);
1102 	}
1103 
1104 	/*
1105 	 * Fetch the instruction from user-level.
1106 	 * We would like to assert this:
1107 	 *   ASSERT(USERMODE(rp->r_tstate));
1108 	 * but we can't because we can reach this point from a
1109 	 * register window underflow/overflow and the v9 wbuf
1110 	 * traps call trap() with T_USER even though r_tstate
1111 	 * indicates a system trap, not a user trap.
1112 	 */
1113 	inst = fetch_user_instr((caddr_t)rp->r_pc);
1114 
1115 	op3 = (inst >> 19) & 0x3f;
1116 	rd = (inst >> 25) & 0x1f;
1117 	rs1 = (inst >> 14) & 0x1f;
1118 	rs2 = inst & 0x1f;
1119 	floatflg = (inst >> 24) & 1;
1120 	immflg = (inst >> 13) & 1;
1121 
1122 	/* if not load or store do nothing.  can't happen? */
1123 	if ((inst >> 30) != 3)
1124 		return (0);
1125 
1126 	if (immflg)
1127 		asi = (uint_t)((rp->r_tstate >> TSTATE_ASI_SHIFT) &
1128 				TSTATE_ASI_MASK);
1129 	else
1130 		asi = (inst >> 5) & 0xff;
1131 
1132 	if (floatflg) {
1133 		/* check for ld/st alternate and highest defined V9 asi */
1134 		if ((op3 & 0x30) == 0x30 && asi > ASI_SNFL) {
1135 			sz = extended_asi_size(asi);
1136 		} else {
1137 			switch (op3 & 3) {
1138 			case 0:
1139 				sz = 4;			/* ldf/stf/cas */
1140 				break;
1141 			case 1:
1142 				if (rd == 0)
1143 					sz = 4;		/* ldfsr/stfsr */
1144 				else
1145 					sz = 8;		/* ldxfsr/stxfsr */
1146 				break;
1147 			case 2:
1148 				if (op3 == 0x3e)
1149 					sz = 8;		/* casx */
1150 				else
1151 					sz = 16;	/* ldqf/stqf */
1152 				break;
1153 			case 3:
1154 				sz = 8;			/* lddf/stdf */
1155 				break;
1156 			}
1157 		}
1158 	} else {
1159 		switch (op3 & 0xf) {		/* map size bits to a number */
1160 		case 0:				/* lduw */
1161 		case 4:				/* stw */
1162 		case 8:				/* ldsw */
1163 		case 0xf:			/* swap */
1164 			sz = 4; break;
1165 		case 1:				/* ldub */
1166 		case 5:				/* stb */
1167 		case 9:				/* ldsb */
1168 		case 0xd:			/* ldstub */
1169 			sz = 1; break;
1170 		case 2:				/* lduh */
1171 		case 6:				/* sth */
1172 		case 0xa:			/* ldsh */
1173 			sz = 2; break;
1174 		case 3:				/* ldd */
1175 		case 7:				/* std */
1176 		case 0xb:			/* ldx */
1177 		case 0xe:			/* stx */
1178 			sz = 8; break;
1179 		}
1180 	}
1181 
1182 	if (sz == 0)	/* can't happen? */
1183 		return (0);
1184 	(void) flush_user_windows_to_stack(NULL);
1185 
1186 	if (getreg(rp, rs1, &val, &badaddr))
1187 		return (0);
1188 	addr = (caddr_t)val;
1189 
1190 	/* cas/casx don't use rs2 / simm13 to compute the address */
1191 	if ((op3 & 0x3d) != 0x3c) {
1192 		/* check immediate bit and use immediate field or reg (rs2) */
1193 		if (immflg) {
1194 			int imm;
1195 			imm  = inst & 0x1fff;	/* mask out immediate field */
1196 			imm <<= 19;		/* sign extend it */
1197 			imm >>= 19;
1198 			addr += imm;		/* compute address */
1199 		} else {
1200 			/*
1201 			 * asi's in the 0xCx range are partial store
1202 			 * instructions.  For these, rs2 is a mask, not part of
1203 			 * the address.
1204 			 */
1205 			if (!(floatflg && (asi & 0xf0) == 0xc0)) {
1206 				if (getreg(rp, rs2, &val, &badaddr))
1207 					return (0);
1208 				addr += val;
1209 			}
1210 		}
1211 	}
1212 
1213 	/*
1214 	 * If this is a 32-bit program, chop the address accordingly.
1215 	 */
1216 	if (curproc->p_model == DATAMODEL_ILP32)
1217 		addr = (caddr_t)(caddr32_t)addr;
1218 
1219 	*addrp = addr;
1220 	ASSERT(sz != 0);
1221 	return (sz);
1222 }
1223 
1224 /*
1225  * Fetch an instruction from user-level.
1226  * Deal with watchpoints, if they are in effect.
1227  */
1228 int32_t
1229 fetch_user_instr(caddr_t vaddr)
1230 {
1231 	proc_t *p = curproc;
1232 	int32_t instr;
1233 
1234 	/*
1235 	 * If this is a 32-bit program, chop the address accordingly.
1236 	 */
1237 	if (p->p_model == DATAMODEL_ILP32)
1238 		vaddr = (caddr_t)(caddr32_t)vaddr;
1239 
1240 	if (fuword32_nowatch(vaddr, (uint32_t *)&instr) == -1)
1241 		instr = -1;
1242 
1243 	return (instr);
1244 }
1245