xref: /illumos-gate/usr/src/uts/sparc/v9/os/simulator.c (revision 032624d56c174c5c55126582b32e314a6af15522)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /* common code with bug fixes from original version in trap.c */
30 
31 #include <sys/param.h>
32 #include <sys/types.h>
33 #include <sys/systm.h>
34 #include <sys/archsystm.h>
35 #include <sys/vmsystm.h>
36 #include <sys/fpu/fpusystm.h>
37 #include <sys/fpu/fpu_simulator.h>
38 #include <sys/inline.h>
39 #include <sys/debug.h>
40 #include <sys/privregs.h>
41 #include <sys/machpcb.h>
42 #include <sys/simulate.h>
43 #include <sys/proc.h>
44 #include <sys/cmn_err.h>
45 #include <sys/stack.h>
46 #include <sys/watchpoint.h>
47 #include <sys/trap.h>
48 #include <sys/machtrap.h>
49 #include <sys/mman.h>
50 #include <sys/asi.h>
51 #include <sys/copyops.h>
52 #include <vm/as.h>
53 #include <vm/page.h>
54 #include <sys/model.h>
55 #include <vm/seg_vn.h>
56 #include <sys/byteorder.h>
57 
58 #define	IS_IBIT_SET(x)	(x & 0x2000)
59 #define	IS_VIS1(op, op3)(op == 2 && op3 == 0x36)
60 #define	IS_PARTIAL_OR_SHORT_FLOAT_LD_ST(op, op3, asi)		\
61 		(op == 3 && (op3 == IOP_V8_LDDFA ||		\
62 		op3 == IOP_V8_STDFA) &&	asi > ASI_SNFL)
63 
64 static int aligndebug = 0;
65 
66 /*
67  * For the sake of those who must be compatible with unaligned
68  * architectures, users can link their programs to use a
69  * corrective trap handler that will fix unaligned references
70  * a special trap #6 (T_FIX_ALIGN) enables this 'feature'.
71  * Returns 1 for success, 0 for failure.
72  */
73 
74 int
75 do_unaligned(struct regs *rp, caddr_t *badaddr)
76 {
77 	uint_t	inst, op3, asi = 0;
78 	uint_t	rd, rs1, rs2;
79 	int	sz, nf = 0, ltlend = 0;
80 	int	floatflg;
81 	int	fsrflg;
82 	int	immflg;
83 	int	lddstdflg;
84 	caddr_t	addr;
85 	uint64_t val;
86 	union {
87 		uint64_t	l[2];
88 		uint32_t	i[4];
89 		uint16_t	s[8];
90 		uint8_t		c[16];
91 	} data;
92 
93 	ASSERT(USERMODE(rp->r_tstate));
94 	inst = fetch_user_instr((caddr_t)rp->r_pc);
95 
96 	op3 = (inst >> 19) & 0x3f;
97 	rd = (inst >> 25) & 0x1f;
98 	rs1 = (inst >> 14) & 0x1f;
99 	rs2 = inst & 0x1f;
100 	floatflg = (inst >> 24) & 1;
101 	immflg = (inst >> 13) & 1;
102 	lddstdflg = fsrflg = 0;
103 
104 	/* if not load or store do nothing */
105 	if ((inst >> 30) != 3)
106 		return (0);
107 
108 	/* if ldstub or swap, do nothing */
109 	if ((inst & 0xc1680000) == 0xc0680000)
110 		return (0);
111 
112 	/* if cas/casx, do nothing */
113 	if ((inst & 0xc1e00000) == 0xc1e00000)
114 		return (0);
115 
116 	if (floatflg) {
117 		switch ((inst >> 19) & 3) {	/* map size bits to a number */
118 		case 0: sz = 4;
119 			break;			/* ldf{a}/stf{a} */
120 		case 1: fsrflg = 1;
121 			if (rd == 0)
122 				sz = 4;		/* ldfsr/stfsr */
123 			else  if (rd == 1)
124 				sz = 8;		/* ldxfsr/stxfsr */
125 			else
126 				return (SIMU_ILLEGAL);
127 			break;
128 		case 2: sz = 16;
129 			break;		/* ldqf{a}/stqf{a} */
130 		case 3: sz = 8;
131 			break;		/* lddf{a}/stdf{a} */
132 		}
133 		/*
134 		 * Fix to access extra double register encoding plus
135 		 * compensate to access the correct fpu_dreg.
136 		 */
137 		if ((sz > 4) && (fsrflg == 0)) {
138 			if ((rd & 1) == 1)
139 				rd = (rd & 0x1e) | 0x20;
140 			rd = rd >> 1;
141 			if ((sz == 16) && ((rd & 0x1) != 0))
142 				return (SIMU_ILLEGAL);
143 		}
144 	} else {
145 		int sz_bits = (inst >> 19) & 0xf;
146 		switch (sz_bits) {		/* map size bits to a number */
147 		case 0:				/* lduw{a} */
148 		case 4:				/* stw{a} */
149 		case 8:				/* ldsw{a} */
150 		case 0xf:			/* swap */
151 			sz = 4; break;
152 		case 1:				/* ldub{a} */
153 		case 5:				/* stb{a} */
154 		case 9:				/* ldsb{a} */
155 		case 0xd:			/* ldstub */
156 			sz = 1; break;
157 		case 2:				/* lduh{a} */
158 		case 6:				/* sth{a} */
159 		case 0xa:			/* ldsh{a} */
160 			sz = 2; break;
161 		case 3:				/* ldd{a} */
162 		case 7:				/* std{a} */
163 			lddstdflg = 1;
164 			sz = 8; break;
165 		case 0xb:			/* ldx{a} */
166 		case 0xe:			/* stx{a} */
167 			sz = 8; break;
168 		}
169 	}
170 
171 
172 	/* only support primary and secondary asi's */
173 	if ((op3 >> 4) & 1) {
174 		if (immflg) {
175 			asi = (uint_t)(rp->r_tstate >> TSTATE_ASI_SHIFT) &
176 					TSTATE_ASI_MASK;
177 		} else {
178 			asi = (inst >> 5) & 0xff;
179 		}
180 		switch (asi) {
181 		case ASI_P:
182 		case ASI_S:
183 			break;
184 		case ASI_PNF:
185 		case ASI_SNF:
186 			nf = 1;
187 			break;
188 		case ASI_PL:
189 		case ASI_SL:
190 			ltlend = 1;
191 			break;
192 		case ASI_PNFL:
193 		case ASI_SNFL:
194 			ltlend = 1;
195 			nf = 1;
196 			break;
197 		default:
198 			return (0);
199 		}
200 		/*
201 		 * Non-faulting stores generate a data_access_exception trap,
202 		 * according to the Spitfire manual, which should be signaled
203 		 * as an illegal instruction trap, because it can't be fixed.
204 		 */
205 		if ((nf) && ((op3 == IOP_V8_STQFA) || (op3 == IOP_V8_STDFA)))
206 			return (SIMU_ILLEGAL);
207 	}
208 
209 	if (aligndebug) {
210 		printf("unaligned access at %p, instruction: 0x%x\n",
211 		    (void *)rp->r_pc, inst);
212 		printf("type %s", (((inst >> 21) & 1) ? "st" : "ld"));
213 		if (((inst >> 21) & 1) == 0)
214 		    printf(" %s", (((inst >> 22) & 1) ? "signed" : "unsigned"));
215 		printf(" asi 0x%x size %d immflg %d\n", asi, sz, immflg);
216 		printf("rd = %d, op3 = 0x%x, rs1 = %d, rs2 = %d, imm13=0x%x\n",
217 			rd, op3, rs1, rs2, (inst & 0x1fff));
218 	}
219 
220 	(void) flush_user_windows_to_stack(NULL);
221 	if (getreg(rp, rs1, &val, badaddr))
222 		return (SIMU_FAULT);
223 	addr = (caddr_t)val;		/* convert to 32/64 bit address */
224 	if (aligndebug)
225 		printf("addr 1 = %p\n", (void *)addr);
226 
227 	/* check immediate bit and use immediate field or reg (rs2) */
228 	if (immflg) {
229 		int imm;
230 		imm  = inst & 0x1fff;		/* mask out immediate field */
231 		imm <<= 19;			/* sign extend it */
232 		imm >>= 19;
233 		addr += imm;			/* compute address */
234 	} else {
235 		if (getreg(rp, rs2, &val, badaddr))
236 			return (SIMU_FAULT);
237 		addr += val;
238 	}
239 
240 	/*
241 	 * If this is a 32-bit program, chop the address accordingly.
242 	 */
243 	if (curproc->p_model == DATAMODEL_ILP32)
244 		addr = (caddr_t)(caddr32_t)addr;
245 
246 	if (aligndebug)
247 		printf("addr 2 = %p\n", (void *)addr);
248 
249 	if (addr >= curproc->p_as->a_userlimit) {
250 		*badaddr = addr;
251 		goto badret;
252 	}
253 
254 	/* a single bit differentiates ld and st */
255 	if ((inst >> 21) & 1) {			/* store */
256 		if (floatflg) {
257 			klwp_id_t lwp = ttolwp(curthread);
258 			kfpu_t *fp = lwptofpu(lwp);
259 			/* Ensure fp has been enabled */
260 			if (fpu_exists) {
261 				if (!(_fp_read_fprs() & FPRS_FEF))
262 					fp_enable();
263 			} else {
264 				if (!fp->fpu_en)
265 					fp_enable();
266 			}
267 			/* if fpu_exists read fpu reg */
268 			if (fpu_exists) {
269 				if (fsrflg) {
270 					_fp_read_pfsr(&data.l[0]);
271 				} else {
272 					if (sz == 4) {
273 						data.i[0] = 0;
274 						_fp_read_pfreg(
275 						    (unsigned *)&data.i[1], rd);
276 					}
277 					if (sz >= 8)
278 						_fp_read_pdreg(
279 							&data.l[0], rd);
280 					if (sz == 16)
281 						_fp_read_pdreg(
282 							&data.l[1], rd+1);
283 				}
284 			} else {
285 				if (fsrflg) {
286 					/* Clear reserved bits, set version=7 */
287 					fp->fpu_fsr &= ~0x30301000;
288 					fp->fpu_fsr |= 0xE0000;
289 					data.l[0] = fp->fpu_fsr;
290 				} else {
291 					if (sz == 4) {
292 						data.i[0] = 0;
293 						data.i[1] =
294 					    (unsigned)fp->fpu_fr.fpu_regs[rd];
295 					}
296 					if (sz >= 8)
297 						data.l[0] =
298 						    fp->fpu_fr.fpu_dregs[rd];
299 					if (sz == 16)
300 						data.l[1] =
301 						    fp->fpu_fr.fpu_dregs[rd+1];
302 				}
303 			}
304 		} else {
305 			if (lddstdflg) {		/* combine the data */
306 				if (getreg(rp, rd, &data.l[0], badaddr))
307 					return (SIMU_FAULT);
308 				if (getreg(rp, rd+1, &data.l[1], badaddr))
309 					return (SIMU_FAULT);
310 				if (ltlend) {
311 					/*
312 					 * For STD, each 32-bit word is byte-
313 					 * swapped individually.  For
314 					 * simplicity we don't want to do that
315 					 * below, so we swap the words now to
316 					 * get the desired result in the end.
317 					 */
318 					data.i[0] = data.i[3];
319 				} else {
320 					data.i[0] = data.i[1];
321 					data.i[1] = data.i[3];
322 				}
323 			} else {
324 				if (getreg(rp, rd, &data.l[0], badaddr))
325 					return (SIMU_FAULT);
326 			}
327 		}
328 
329 		if (aligndebug) {
330 			if (sz == 16) {
331 				printf("data %x %x %x %x\n",
332 				    data.i[0], data.i[1], data.i[2], data.c[3]);
333 			} else {
334 				printf("data %x %x %x %x %x %x %x %x\n",
335 				    data.c[0], data.c[1], data.c[2], data.c[3],
336 				    data.c[4], data.c[5], data.c[6], data.c[7]);
337 			}
338 		}
339 
340 		if (ltlend) {
341 			if (sz == 1) {
342 				if (xcopyout_little(&data.c[7], addr,
343 				    (size_t)sz) != 0)
344 					goto badret;
345 			} else if (sz == 2) {
346 				if (xcopyout_little(&data.s[3], addr,
347 				    (size_t)sz) != 0)
348 					goto badret;
349 			} else if (sz == 4) {
350 				if (xcopyout_little(&data.i[1], addr,
351 				    (size_t)sz) != 0)
352 					goto badret;
353 			} else {
354 				if (xcopyout_little(&data.l[0], addr,
355 				    (size_t)sz) != 0)
356 					goto badret;
357 			}
358 		} else {
359 			if (sz == 1) {
360 				if (copyout(&data.c[7], addr, (size_t)sz) == -1)
361 					goto badret;
362 			} else if (sz == 2) {
363 				if (copyout(&data.s[3], addr, (size_t)sz) == -1)
364 					goto badret;
365 			} else if (sz == 4) {
366 				if (copyout(&data.i[1], addr, (size_t)sz) == -1)
367 					goto badret;
368 			} else {
369 				if (copyout(&data.l[0], addr, (size_t)sz) == -1)
370 					goto badret;
371 			}
372 		}
373 	} else {				/* load */
374 		if (sz == 1) {
375 			if (ltlend) {
376 				if (xcopyin_little(addr, &data.c[7],
377 				    (size_t)sz) != 0) {
378 					if (nf)
379 						data.c[7] = 0;
380 					else
381 						goto badret;
382 				}
383 			} else {
384 				if (copyin(addr, &data.c[7],
385 				    (size_t)sz) == -1) {
386 					if (nf)
387 						data.c[7] = 0;
388 					else
389 						goto badret;
390 				}
391 			}
392 			/* if signed and the sign bit is set extend it */
393 			if (((inst >> 22) & 1) && ((data.c[7] >> 7) & 1)) {
394 				data.i[0] = (uint_t)-1;	/* extend sign bit */
395 				data.s[2] = (ushort_t)-1;
396 				data.c[6] = (uchar_t)-1;
397 			} else {
398 				data.i[0] = 0;	/* clear upper 32+24 bits */
399 				data.s[2] = 0;
400 				data.c[6] = 0;
401 			}
402 		} else if (sz == 2) {
403 			if (ltlend) {
404 				if (xcopyin_little(addr, &data.s[3],
405 				    (size_t)sz) != 0) {
406 					if (nf)
407 						data.s[3] = 0;
408 					else
409 						goto badret;
410 				}
411 			} else {
412 				if (copyin(addr, &data.s[3],
413 				    (size_t)sz) == -1) {
414 					if (nf)
415 						data.s[3] = 0;
416 					else
417 						goto badret;
418 				}
419 			}
420 			/* if signed and the sign bit is set extend it */
421 			if (((inst >> 22) & 1) && ((data.s[3] >> 15) & 1)) {
422 				data.i[0] = (uint_t)-1;	/* extend sign bit */
423 				data.s[2] = (ushort_t)-1;
424 			} else {
425 				data.i[0] = 0;	/* clear upper 32+16 bits */
426 				data.s[2] = 0;
427 			}
428 		} else if (sz == 4) {
429 			if (ltlend) {
430 				if (xcopyin_little(addr, &data.i[1],
431 				    (size_t)sz) != 0) {
432 					if (!nf)
433 						goto badret;
434 					data.i[1] = 0;
435 				}
436 			} else {
437 				if (copyin(addr, &data.i[1],
438 				    (size_t)sz) == -1) {
439 					if (!nf)
440 						goto badret;
441 					data.i[1] = 0;
442 				}
443 			}
444 			/* if signed and the sign bit is set extend it */
445 			if (((inst >> 22) & 1) && ((data.i[1] >> 31) & 1)) {
446 				data.i[0] = (uint_t)-1;	/* extend sign bit */
447 			} else {
448 				data.i[0] = 0;	/* clear upper 32 bits */
449 			}
450 		} else {
451 			if (ltlend) {
452 				if (xcopyin_little(addr, &data.l[0],
453 				    (size_t)sz) != 0) {
454 					if (!nf)
455 						goto badret;
456 					data.l[0] = 0;
457 				}
458 			} else {
459 				if (copyin(addr, &data.l[0],
460 				    (size_t)sz) == -1) {
461 					if (!nf)
462 						goto badret;
463 					data.l[0] = 0;
464 				}
465 			}
466 		}
467 
468 		if (aligndebug) {
469 			if (sz == 16) {
470 				printf("data %x %x %x %x\n",
471 				    data.i[0], data.i[1], data.i[2], data.c[3]);
472 			} else {
473 				printf("data %x %x %x %x %x %x %x %x\n",
474 				    data.c[0], data.c[1], data.c[2], data.c[3],
475 				    data.c[4], data.c[5], data.c[6], data.c[7]);
476 			}
477 		}
478 
479 		if (floatflg) {		/* if fpu_exists write fpu reg */
480 			klwp_id_t lwp = ttolwp(curthread);
481 			kfpu_t *fp = lwptofpu(lwp);
482 			/* Ensure fp has been enabled */
483 			if (fpu_exists) {
484 				if (!(_fp_read_fprs() & FPRS_FEF))
485 					fp_enable();
486 			} else {
487 				if (!fp->fpu_en)
488 					fp_enable();
489 			}
490 			/* if fpu_exists read fpu reg */
491 			if (fpu_exists) {
492 				if (fsrflg) {
493 					_fp_write_pfsr(&data.l[0]);
494 				} else {
495 					if (sz == 4)
496 						_fp_write_pfreg(
497 						    (unsigned *)&data.i[1], rd);
498 					if (sz >= 8)
499 						_fp_write_pdreg(
500 							&data.l[0], rd);
501 					if (sz == 16)
502 						_fp_write_pdreg(
503 							&data.l[1], rd+1);
504 				}
505 			} else {
506 				if (fsrflg) {
507 					fp->fpu_fsr = data.l[0];
508 				} else {
509 					if (sz == 4)
510 						fp->fpu_fr.fpu_regs[rd] =
511 							(unsigned)data.i[1];
512 					if (sz >= 8)
513 						fp->fpu_fr.fpu_dregs[rd] =
514 							data.l[0];
515 					if (sz == 16)
516 						fp->fpu_fr.fpu_dregs[rd+1] =
517 							data.l[1];
518 				}
519 			}
520 		} else {
521 			if (lddstdflg) {		/* split the data */
522 				if (ltlend) {
523 					/*
524 					 * For LDD, each 32-bit word is byte-
525 					 * swapped individually.  We didn't
526 					 * do that above, but this will give
527 					 * us the desired result.
528 					 */
529 					data.i[3] = data.i[0];
530 				} else {
531 					data.i[3] = data.i[1];
532 					data.i[1] = data.i[0];
533 				}
534 				data.i[0] = 0;
535 				data.i[2] = 0;
536 				if (putreg(&data.l[0], rp, rd, badaddr) == -1)
537 					goto badret;
538 				if (putreg(&data.l[1], rp, rd+1, badaddr) == -1)
539 					goto badret;
540 			} else {
541 				if (putreg(&data.l[0], rp, rd, badaddr) == -1)
542 					goto badret;
543 			}
544 		}
545 	}
546 	return (SIMU_SUCCESS);
547 badret:
548 	return (SIMU_FAULT);
549 }
550 
551 
552 int
553 simulate_lddstd(struct regs *rp, caddr_t *badaddr)
554 {
555 	uint_t	inst, op3, asi = 0;
556 	uint_t	rd, rs1, rs2;
557 	int	rv = 0;
558 	int	nf = 0, ltlend = 0, usermode;
559 	int	immflg;
560 	uint64_t reven;
561 	uint64_t rodd;
562 	caddr_t	addr;
563 	uint64_t val;
564 	uint64_t data;
565 
566 	usermode = USERMODE(rp->r_tstate);
567 
568 	if (usermode)
569 		inst = fetch_user_instr((caddr_t)rp->r_pc);
570 	else
571 		inst = *(uint_t *)rp->r_pc;
572 
573 	op3 = (inst >> 19) & 0x3f;
574 	rd = (inst >> 25) & 0x1f;
575 	rs1 = (inst >> 14) & 0x1f;
576 	rs2 = inst & 0x1f;
577 	immflg = (inst >> 13) & 1;
578 
579 	if (USERMODE(rp->r_tstate))
580 		(void) flush_user_windows_to_stack(NULL);
581 	else
582 		flush_windows();
583 
584 	if ((op3 >> 4) & 1) {		/* is this LDDA/STDA? */
585 		if (immflg) {
586 			asi = (uint_t)(rp->r_tstate >> TSTATE_ASI_SHIFT) &
587 					TSTATE_ASI_MASK;
588 		} else {
589 			asi = (inst >> 5) & 0xff;
590 		}
591 		switch (asi) {
592 		case ASI_P:
593 		case ASI_S:
594 			break;
595 		case ASI_PNF:
596 		case ASI_SNF:
597 			nf = 1;
598 			break;
599 		case ASI_PL:
600 		case ASI_SL:
601 			ltlend = 1;
602 			break;
603 		case ASI_PNFL:
604 		case ASI_SNFL:
605 			ltlend = 1;
606 			nf = 1;
607 			break;
608 		case ASI_AIUP:
609 		case ASI_AIUS:
610 			usermode = 1;
611 			break;
612 		case ASI_AIUPL:
613 		case ASI_AIUSL:
614 			usermode = 1;
615 			ltlend = 1;
616 			break;
617 		default:
618 			return (SIMU_ILLEGAL);
619 		}
620 	}
621 
622 	if (getreg(rp, rs1, &val, badaddr))
623 		return (SIMU_FAULT);
624 	addr = (caddr_t)val;		/* convert to 32/64 bit address */
625 
626 	/* check immediate bit and use immediate field or reg (rs2) */
627 	if (immflg) {
628 		int imm;
629 		imm  = inst & 0x1fff;		/* mask out immediate field */
630 		imm <<= 19;			/* sign extend it */
631 		imm >>= 19;
632 		addr += imm;			/* compute address */
633 	} else {
634 		if (getreg(rp, rs2, &val, badaddr))
635 			return (SIMU_FAULT);
636 		addr += val;
637 	}
638 
639 	/*
640 	 * T_UNIMP_LDD and T_UNIMP_STD are higher priority than
641 	 * T_ALIGNMENT.  So we have to make sure that the address is
642 	 * kosher before trying to use it, because the hardware hasn't
643 	 * checked it for us yet.
644 	 */
645 	if (((uintptr_t)addr & 0x7) != 0) {
646 		if (curproc->p_fixalignment)
647 			return (do_unaligned(rp, badaddr));
648 		else
649 			return (SIMU_UNALIGN);
650 	}
651 
652 	/*
653 	 * If this is a 32-bit program, chop the address accordingly.
654 	 */
655 	if (curproc->p_model == DATAMODEL_ILP32 && usermode)
656 		addr = (caddr_t)(caddr32_t)addr;
657 
658 	if ((inst >> 21) & 1) {			/* store */
659 		if (getreg(rp, rd, &reven, badaddr))
660 			return (SIMU_FAULT);
661 		if (getreg(rp, rd+1, &rodd, badaddr))
662 			return (SIMU_FAULT);
663 		if (ltlend) {
664 			reven = BSWAP_32(reven);
665 			rodd  = BSWAP_32(rodd);
666 		}
667 		data = (reven << 32) | rodd;
668 		if (usermode) {
669 			if (suword64_nowatch(addr, data) == -1)
670 				return (SIMU_FAULT);
671 		} else {
672 			*(uint64_t *)addr = data;
673 		}
674 	} else {				/* load */
675 		if (usermode) {
676 			if (fuword64_nowatch(addr, &data)) {
677 				if (nf)
678 					data = 0;
679 				else
680 					return (SIMU_FAULT);
681 			}
682 		} else
683 			data = *(uint64_t *)addr;
684 
685 		reven = (data >> 32);
686 		rodd  = (uint64_t)(uint32_t)data;
687 		if (ltlend) {
688 			reven = BSWAP_32(reven);
689 			rodd  = BSWAP_32(rodd);
690 		}
691 
692 		if (putreg(&reven, rp, rd, badaddr) == -1)
693 			return (SIMU_FAULT);
694 		if (putreg(&rodd, rp, rd+1, badaddr) == -1)
695 			return (SIMU_FAULT);
696 	}
697 	return (SIMU_SUCCESS);
698 }
699 
700 
701 /*
702  * simulate popc
703  */
704 static int
705 simulate_popc(struct regs *rp, caddr_t *badaddr, uint_t inst)
706 {
707 	uint_t	rd, rs2, rs1;
708 	uint_t	immflg;
709 	uint64_t val, cnt = 0;
710 
711 	rd = (inst >> 25) & 0x1f;
712 	rs1 = (inst >> 14) & 0x1f;
713 	rs2 = inst & 0x1f;
714 	immflg = (inst >> 13) & 1;
715 
716 	if (rs1 > 0)
717 		return (SIMU_ILLEGAL);
718 
719 	(void) flush_user_windows_to_stack(NULL);
720 
721 	/* check immediate bit and use immediate field or reg (rs2) */
722 	if (immflg) {
723 		int64_t imm;
724 		imm  = inst & 0x1fff;		/* mask out immediate field */
725 		imm <<= 51;			/* sign extend it */
726 		imm >>= 51;
727 		if (imm != 0) {
728 			for (cnt = 0; imm != 0; imm &= imm-1)
729 				cnt++;
730 		}
731 	} else {
732 		if (getreg(rp, rs2, &val, badaddr))
733 			return (SIMU_FAULT);
734 		if (val != 0) {
735 			for (cnt = 0; val != 0; val &= val-1)
736 				cnt++;
737 		}
738 	}
739 
740 	if (putreg(&cnt, rp, rd, badaddr) == -1)
741 		return (SIMU_FAULT);
742 
743 	return (SIMU_SUCCESS);
744 }
745 
746 /*
747  * simulate unimplemented instructions (popc, ldqf{a}, stqf{a})
748  */
749 int
750 simulate_unimp(struct regs *rp, caddr_t *badaddr)
751 {
752 	uint_t	inst, optype, op3, asi;
753 	uint_t	rs1, rd;
754 	uint_t	ignor, i;
755 	machpcb_t *mpcb = lwptompcb(ttolwp(curthread));
756 	int	nomatch = 0;
757 	caddr_t	addr = (caddr_t)rp->r_pc;
758 	struct as *as;
759 	caddr_t	ka;
760 	pfn_t	pfnum;
761 	page_t *pp;
762 	proc_t *p = ttoproc(curthread);
763 	struct seg *mapseg;
764 	struct segvn_data *svd;
765 
766 	ASSERT(USERMODE(rp->r_tstate));
767 	inst = fetch_user_instr(addr);
768 	if (inst == (uint_t)-1) {
769 		mpcb->mpcb_illexcaddr = addr;
770 		mpcb->mpcb_illexcinsn = (uint32_t)-1;
771 		return (SIMU_ILLEGAL);
772 	}
773 
774 	/*
775 	 * When fixing dirty v8 instructions there's a race if two processors
776 	 * are executing the dirty executable at the same time.  If one
777 	 * cleans the instruction as the other is executing it the second
778 	 * processor will see a clean instruction when it comes through this
779 	 * code and will return SIMU_ILLEGAL.  To work around the race
780 	 * this code will keep track of the last illegal instruction seen
781 	 * by each lwp and will only take action if the illegal instruction
782 	 * is repeatable.
783 	 */
784 	if (addr != mpcb->mpcb_illexcaddr ||
785 	    inst != mpcb->mpcb_illexcinsn)
786 		nomatch = 1;
787 	mpcb->mpcb_illexcaddr = addr;
788 	mpcb->mpcb_illexcinsn = inst;
789 
790 	/* instruction fields */
791 	i = (inst >> 13) & 0x1;
792 	rd = (inst >> 25) & 0x1f;
793 	optype = (inst >> 30) & 0x3;
794 	op3 = (inst >> 19) & 0x3f;
795 	ignor = (inst >> 5) & 0xff;
796 	if (IS_IBIT_SET(inst)) {
797 		asi = (uint32_t)((rp->r_tstate >> TSTATE_ASI_SHIFT) &
798 		    TSTATE_ASI_MASK);
799 	} else {
800 		asi = ignor;
801 	}
802 
803 	if (IS_VIS1(optype, op3) ||
804 	    IS_PARTIAL_OR_SHORT_FLOAT_LD_ST(optype, op3, asi)) {
805 		klwp_t *lwp = ttolwp(curthread);
806 		kfpu_t *fp = lwptofpu(lwp);
807 		if (fpu_exists) {
808 			if (!(_fp_read_fprs() & FPRS_FEF))
809 				fp_enable();
810 			_fp_read_pfsr(&fp->fpu_fsr);
811 		} else {
812 			if (!fp->fpu_en)
813 				fp_enable();
814 		}
815 		fp_precise(rp);
816 		return (SIMU_RETRY);
817 	}
818 
819 	if (optype == 2 && op3 == IOP_V8_POPC) {
820 		return (simulate_popc(rp, badaddr, inst));
821 	} else if (optype == 3 && op3 == IOP_V8_POPC) {
822 		return (SIMU_ILLEGAL);
823 	}
824 
825 	if (optype == OP_V8_LDSTR) {
826 		if (op3 == IOP_V8_LDQF || op3 == IOP_V8_LDQFA ||
827 		    op3 == IOP_V8_STQF || op3 == IOP_V8_STQFA)
828 			return (do_unaligned(rp, badaddr));
829 	}
830 
831 	if (nomatch)
832 		return (SIMU_RETRY);
833 
834 	/*
835 	 * The rest of the code handles v8 binaries with instructions
836 	 * that have dirty (non-zero) bits in reserved or 'ignored'
837 	 * fields; these will cause core dumps on v9 machines.
838 	 *
839 	 * We only clean dirty instructions in 32-bit programs (ie, v8)
840 	 * running on SPARCv9 processors.  True v9 programs are forced
841 	 * to use the instruction set as intended.
842 	 */
843 	if (lwp_getdatamodel(curthread->t_lwp) != DATAMODEL_ILP32)
844 		return (SIMU_ILLEGAL);
845 	switch (optype) {
846 	case OP_V8_BRANCH:
847 	case OP_V8_CALL:
848 		return (SIMU_ILLEGAL);	/* these don't have ignored fields */
849 		/*NOTREACHED*/
850 	case OP_V8_ARITH:
851 		switch (op3) {
852 		case IOP_V8_RETT:
853 			if (rd == 0 && !(i == 0 && ignor))
854 				return (SIMU_ILLEGAL);
855 			if (rd)
856 				inst &= ~(0x1f << 25);
857 			if (i == 0 && ignor)
858 				inst &= ~(0xff << 5);
859 			break;
860 		case IOP_V8_TCC:
861 			if (i == 0 && ignor != 0) {
862 				inst &= ~(0xff << 5);
863 			} else if (i == 1 && (((inst >> 7) & 0x3f) != 0)) {
864 				inst &= ~(0x3f << 7);
865 			} else {
866 				return (SIMU_ILLEGAL);
867 			}
868 			break;
869 		case IOP_V8_JMPL:
870 		case IOP_V8_RESTORE:
871 		case IOP_V8_SAVE:
872 			if ((op3 == IOP_V8_RETT && rd) ||
873 			    (i == 0 && ignor)) {
874 				inst &= ~(0xff << 5);
875 			} else {
876 				return (SIMU_ILLEGAL);
877 			}
878 			break;
879 		case IOP_V8_FCMP:
880 			if (rd == 0)
881 				return (SIMU_ILLEGAL);
882 			inst &= ~(0x1f << 25);
883 			break;
884 		case IOP_V8_RDASR:
885 			rs1 = ((inst >> 14) & 0x1f);
886 			if (rs1 == 1 || (rs1 >= 7 && rs1 <= 14)) {
887 				/*
888 				 * The instruction specifies an invalid
889 				 * state register - better bail out than
890 				 * "fix" it when we're not sure what was
891 				 * intended.
892 				 */
893 				return (SIMU_ILLEGAL);
894 			}
895 				/*
896 				 * Note: this case includes the 'stbar'
897 				 * instruction (rs1 == 15 && i == 0).
898 				 */
899 				if ((ignor = (inst & 0x3fff)) != 0)
900 					inst &= ~(0x3fff);
901 			break;
902 		case IOP_V8_SRA:
903 		case IOP_V8_SRL:
904 		case IOP_V8_SLL:
905 			if (ignor == 0)
906 				return (SIMU_ILLEGAL);
907 			inst &= ~(0xff << 5);
908 			break;
909 		case IOP_V8_ADD:
910 		case IOP_V8_AND:
911 		case IOP_V8_OR:
912 		case IOP_V8_XOR:
913 		case IOP_V8_SUB:
914 		case IOP_V8_ANDN:
915 		case IOP_V8_ORN:
916 		case IOP_V8_XNOR:
917 		case IOP_V8_ADDC:
918 		case IOP_V8_UMUL:
919 		case IOP_V8_SMUL:
920 		case IOP_V8_SUBC:
921 		case IOP_V8_UDIV:
922 		case IOP_V8_SDIV:
923 		case IOP_V8_ADDcc:
924 		case IOP_V8_ANDcc:
925 		case IOP_V8_ORcc:
926 		case IOP_V8_XORcc:
927 		case IOP_V8_SUBcc:
928 		case IOP_V8_ANDNcc:
929 		case IOP_V8_ORNcc:
930 		case IOP_V8_XNORcc:
931 		case IOP_V8_ADDCcc:
932 		case IOP_V8_UMULcc:
933 		case IOP_V8_SMULcc:
934 		case IOP_V8_SUBCcc:
935 		case IOP_V8_UDIVcc:
936 		case IOP_V8_SDIVcc:
937 		case IOP_V8_TADDcc:
938 		case IOP_V8_TSUBcc:
939 		case IOP_V8_TADDccTV:
940 		case IOP_V8_TSUBccTV:
941 		case IOP_V8_MULScc:
942 		case IOP_V8_WRASR:
943 		case IOP_V8_FLUSH:
944 			if (i != 0 || ignor == 0)
945 				return (SIMU_ILLEGAL);
946 			inst &= ~(0xff << 5);
947 			break;
948 		default:
949 			return (SIMU_ILLEGAL);
950 		}
951 		break;
952 	case OP_V8_LDSTR:
953 		switch (op3) {
954 		case IOP_V8_STFSR:
955 		case IOP_V8_LDFSR:
956 			if (rd == 0 && !(i == 0 && ignor))
957 				return (SIMU_ILLEGAL);
958 			if (rd)
959 				inst &= ~(0x1f << 25);
960 			if (i == 0 && ignor)
961 				inst &= ~(0xff << 5);
962 			break;
963 		default:
964 			if (optype == OP_V8_LDSTR && !IS_LDST_ALT(op3) &&
965 			    i == 0 && ignor)
966 				inst &= ~(0xff << 5);
967 			else
968 				return (SIMU_ILLEGAL);
969 			break;
970 		}
971 		break;
972 	default:
973 		return (SIMU_ILLEGAL);
974 	}
975 
976 	as = p->p_as;
977 
978 	AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
979 	mapseg = as_findseg(as, (caddr_t)rp->r_pc, 0);
980 	ASSERT(mapseg != NULL);
981 	svd = (struct segvn_data *)mapseg->s_data;
982 
983 	/*
984 	 * We only create COW page for MAP_PRIVATE mappings.
985 	 */
986 	SEGVN_LOCK_ENTER(as, &svd->lock, RW_READER);
987 	if ((svd->type & MAP_TYPE) & MAP_SHARED) {
988 		SEGVN_LOCK_EXIT(as, &svd->lock);
989 		AS_LOCK_EXIT(as, &as->a_lock);
990 		return (SIMU_ILLEGAL);
991 	}
992 	SEGVN_LOCK_EXIT(as, &svd->lock);
993 	AS_LOCK_EXIT(as, &as->a_lock);
994 
995 	/*
996 	 * A "flush" instruction using the user PC's vaddr will not work
997 	 * here, at least on Spitfire. Instead we create a temporary kernel
998 	 * mapping to the user's text page, then modify and flush that.
999 	 * Break COW by locking user page.
1000 	 */
1001 	if (as_fault(as->a_hat, as, (caddr_t)(rp->r_pc & PAGEMASK), PAGESIZE,
1002 	    F_SOFTLOCK, S_READ))
1003 		return (SIMU_FAULT);
1004 
1005 	AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
1006 	pfnum = hat_getpfnum(as->a_hat, (caddr_t)rp->r_pc);
1007 	AS_LOCK_EXIT(as, &as->a_lock);
1008 	if (pf_is_memory(pfnum)) {
1009 		pp = page_numtopp_nolock(pfnum);
1010 		ASSERT(pp == NULL || PAGE_LOCKED(pp));
1011 	} else {
1012 		(void) as_fault(as->a_hat, as, (caddr_t)(rp->r_pc & PAGEMASK),
1013 		    PAGESIZE, F_SOFTUNLOCK, S_READ);
1014 		return (SIMU_FAULT);
1015 	}
1016 
1017 	AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
1018 	ka = ppmapin(pp, PROT_READ|PROT_WRITE, (caddr_t)rp->r_pc);
1019 	*(uint_t *)(ka + (uintptr_t)(rp->r_pc % PAGESIZE)) = inst;
1020 	doflush(ka + (uintptr_t)(rp->r_pc % PAGESIZE));
1021 	ppmapout(ka);
1022 	AS_LOCK_EXIT(as, &as->a_lock);
1023 
1024 	(void) as_fault(as->a_hat, as, (caddr_t)(rp->r_pc & PAGEMASK),
1025 	    PAGESIZE, F_SOFTUNLOCK, S_READ);
1026 	return (SIMU_RETRY);
1027 }
1028 
1029 /*
1030  * Get the value of a register for instruction simulation
1031  * by using the regs or window structure pointers.
1032  * Return 0 for success, and -1 for failure.  If there is a failure,
1033  * save the faulting address using badaddr pointer.
1034  * We have 64 bit globals and outs, and 32 or 64 bit ins and locals.
1035  * Don't truncate globals/outs for 32 bit programs, for v8+ support.
1036  */
1037 int
1038 getreg(struct regs *rp, uint_t reg, uint64_t *val, caddr_t *badaddr)
1039 {
1040 	uint64_t *rgs, *sp;
1041 	int rv = 0;
1042 
1043 	rgs = (uint64_t *)&rp->r_ps;		/* globals and outs */
1044 	sp = (uint64_t *)rp->r_sp;		/* ins and locals */
1045 	if (reg == 0) {
1046 		*val = 0;
1047 	} else if (reg < 16) {
1048 		*val = rgs[reg];
1049 	} else if (IS_V9STACK(sp)) {
1050 		uint64_t *rw = (uint64_t *)((uintptr_t)sp + V9BIAS64);
1051 		uint64_t *addr = (uint64_t *)&rw[reg - 16];
1052 		uint64_t res;
1053 
1054 		if (USERMODE(rp->r_tstate)) {
1055 			if (fuword64_nowatch(addr, &res) == -1) {
1056 				*badaddr = (caddr_t)addr;
1057 				rv = -1;
1058 			}
1059 		} else {
1060 			res = *addr;
1061 		}
1062 		*val = res;
1063 	} else {
1064 		uint32_t *rw = (uint32_t *)(caddr32_t)sp;
1065 		uint32_t *addr = (uint32_t *)&rw[reg - 16];
1066 		uint32_t res;
1067 
1068 		if (USERMODE(rp->r_tstate)) {
1069 			if (fuword32_nowatch(addr, &res) == -1) {
1070 				*badaddr = (caddr_t)addr;
1071 				rv = -1;
1072 			}
1073 		} else {
1074 			res = *addr;
1075 		}
1076 		*val = (uint64_t)res;
1077 	}
1078 	return (rv);
1079 }
1080 
1081 /*
1082  * Set the value of a register after instruction simulation
1083  * by using the regs or window structure pointers.
1084  * Return 0 for succes -1 failure.
1085  * save the faulting address using badaddr pointer.
1086  * We have 64 bit globals and outs, and 32 or 64 bit ins and locals.
1087  * Don't truncate globals/outs for 32 bit programs, for v8+ support.
1088  */
1089 int
1090 putreg(uint64_t	*data, struct regs *rp, uint_t reg, caddr_t *badaddr)
1091 {
1092 	uint64_t *rgs, *sp;
1093 	int rv = 0;
1094 
1095 	rgs = (uint64_t *)&rp->r_ps;		/* globals and outs */
1096 	sp = (uint64_t *)rp->r_sp;		/* ins and locals */
1097 	if (reg == 0) {
1098 		return (0);
1099 	} else if (reg < 16) {
1100 		rgs[reg] = *data;
1101 	} else if (IS_V9STACK(sp)) {
1102 		uint64_t *rw = (uint64_t *)((uintptr_t)sp + V9BIAS64);
1103 		uint64_t *addr = (uint64_t *)&rw[reg - 16];
1104 		uint64_t res;
1105 
1106 		if (USERMODE(rp->r_tstate)) {
1107 			struct machpcb *mpcb = lwptompcb(curthread->t_lwp);
1108 
1109 			res = *data;
1110 			if (suword64_nowatch(addr, res) != 0) {
1111 				*badaddr = (caddr_t)addr;
1112 				rv = -1;
1113 			}
1114 			/*
1115 			 * We have changed a local or in register;
1116 			 * nuke the watchpoint return windows.
1117 			 */
1118 			mpcb->mpcb_rsp[0] = NULL;
1119 			mpcb->mpcb_rsp[1] = NULL;
1120 		} else {
1121 			res = *data;
1122 			*addr = res;
1123 		}
1124 	} else {
1125 		uint32_t *rw = (uint32_t *)(caddr32_t)sp;
1126 		uint32_t *addr = (uint32_t *)&rw[reg - 16];
1127 		uint32_t res;
1128 
1129 		if (USERMODE(rp->r_tstate)) {
1130 			struct machpcb *mpcb = lwptompcb(curthread->t_lwp);
1131 
1132 			res = (uint_t)*data;
1133 			if (suword32_nowatch(addr, res) != 0) {
1134 				*badaddr = (caddr_t)addr;
1135 				rv = -1;
1136 			}
1137 			/*
1138 			 * We have changed a local or in register;
1139 			 * nuke the watchpoint return windows.
1140 			 */
1141 			mpcb->mpcb_rsp[0] = NULL;
1142 			mpcb->mpcb_rsp[1] = NULL;
1143 
1144 		} else {
1145 			res = (uint_t)*data;
1146 			*addr = res;
1147 		}
1148 	}
1149 	return (rv);
1150 }
1151 
1152 /*
1153  * Calculate a memory reference address from instruction
1154  * operands, used to return the address of a fault, instead
1155  * of the instruction when an error occurs.  This is code that is
1156  * common with most of the routines that simulate instructions.
1157  */
1158 int
1159 calc_memaddr(struct regs *rp, caddr_t *badaddr)
1160 {
1161 	uint_t	inst;
1162 	uint_t	rd, rs1, rs2;
1163 	int	sz;
1164 	int	immflg;
1165 	int	floatflg;
1166 	caddr_t  addr;
1167 	uint64_t val;
1168 
1169 	if (USERMODE(rp->r_tstate))
1170 		inst = fetch_user_instr((caddr_t)rp->r_pc);
1171 	else
1172 		inst = *(uint_t *)rp->r_pc;
1173 
1174 	rd = (inst >> 25) & 0x1f;
1175 	rs1 = (inst >> 14) & 0x1f;
1176 	rs2 = inst & 0x1f;
1177 	floatflg = (inst >> 24) & 1;
1178 	immflg = (inst >> 13) & 1;
1179 
1180 	if (floatflg) {
1181 		switch ((inst >> 19) & 3) {	/* map size bits to a number */
1182 		case 0: sz = 4; break;		/* ldf/stf */
1183 		case 1: return (0);		/* ld[x]fsr/st[x]fsr */
1184 		case 2: sz = 16; break;		/* ldqf/stqf */
1185 		case 3: sz = 8; break;		/* lddf/stdf */
1186 		}
1187 		/*
1188 		 * Fix to access extra double register encoding plus
1189 		 * compensate to access the correct fpu_dreg.
1190 		 */
1191 		if (sz > 4) {
1192 			if ((rd & 1) == 1)
1193 				rd = (rd & 0x1e) | 0x20;
1194 			rd = rd >> 1;
1195 		}
1196 	} else {
1197 		switch ((inst >> 19) & 0xf) {	/* map size bits to a number */
1198 		case 0:				/* lduw */
1199 		case 4:				/* stw */
1200 		case 8:				/* ldsw */
1201 		case 0xf:			/* swap */
1202 			sz = 4; break;
1203 		case 1:				/* ldub */
1204 		case 5:				/* stb */
1205 		case 9:				/* ldsb */
1206 		case 0xd:			/* ldstub */
1207 			sz = 1; break;
1208 		case 2:				/* lduh */
1209 		case 6:				/* sth */
1210 		case 0xa:			/* ldsh */
1211 			sz = 2; break;
1212 		case 3:				/* ldd */
1213 		case 7:				/* std */
1214 		case 0xb:			/* ldx */
1215 		case 0xe:			/* stx */
1216 			sz = 8; break;
1217 		}
1218 	}
1219 
1220 	if (USERMODE(rp->r_tstate))
1221 		(void) flush_user_windows_to_stack(NULL);
1222 	else
1223 		flush_windows();
1224 
1225 	if (getreg(rp, rs1, &val, badaddr))
1226 		return (SIMU_FAULT);
1227 	addr = (caddr_t)val;
1228 
1229 	/* check immediate bit and use immediate field or reg (rs2) */
1230 	if (immflg) {
1231 		int imm;
1232 		imm = inst & 0x1fff;		/* mask out immediate field */
1233 		imm <<= 19;			/* sign extend it */
1234 		imm >>= 19;
1235 		addr += imm;			/* compute address */
1236 	} else {
1237 		if (getreg(rp, rs2, &val, badaddr))
1238 			return (SIMU_FAULT);
1239 		addr += val;
1240 	}
1241 
1242 	/*
1243 	 * If this is a 32-bit program, chop the address accordingly.
1244 	 */
1245 	if (curproc->p_model == DATAMODEL_ILP32 &&
1246 	    USERMODE(rp->r_tstate))
1247 		addr = (caddr_t)(caddr32_t)addr;
1248 
1249 	*badaddr = addr;
1250 	return ((uintptr_t)addr & (sz - 1) ? SIMU_UNALIGN : SIMU_SUCCESS);
1251 }
1252 
1253 /*
1254  * Return the size of a load or store instruction (1, 2, 4, 8, 16, 64).
1255  * Also compute the precise address by instruction disassembly.
1256  * (v9 page faults only provide the page address via the hardware.)
1257  * Return 0 on failure (not a load or store instruction).
1258  */
1259 int
1260 instr_size(struct regs *rp, caddr_t *addrp, enum seg_rw rdwr)
1261 {
1262 	uint_t	inst, op3, asi;
1263 	uint_t	rd, rs1, rs2;
1264 	int	sz = 0;
1265 	int	immflg;
1266 	int	floatflg;
1267 	caddr_t	addr;
1268 	caddr_t badaddr;
1269 	uint64_t val;
1270 
1271 	if (rdwr == S_EXEC) {
1272 		*addrp = (caddr_t)rp->r_pc;
1273 		return (4);
1274 	}
1275 
1276 	/*
1277 	 * Fetch the instruction from user-level.
1278 	 * We would like to assert this:
1279 	 *   ASSERT(USERMODE(rp->r_tstate));
1280 	 * but we can't because we can reach this point from a
1281 	 * register window underflow/overflow and the v9 wbuf
1282 	 * traps call trap() with T_USER even though r_tstate
1283 	 * indicates a system trap, not a user trap.
1284 	 */
1285 	inst = fetch_user_instr((caddr_t)rp->r_pc);
1286 
1287 	op3 = (inst >> 19) & 0x3f;
1288 	rd = (inst >> 25) & 0x1f;
1289 	rs1 = (inst >> 14) & 0x1f;
1290 	rs2 = inst & 0x1f;
1291 	floatflg = (inst >> 24) & 1;
1292 	immflg = (inst >> 13) & 1;
1293 
1294 	/* if not load or store do nothing.  can't happen? */
1295 	if ((inst >> 30) != 3)
1296 		return (0);
1297 
1298 	if (immflg)
1299 		asi = (uint_t)((rp->r_tstate >> TSTATE_ASI_SHIFT) &
1300 				TSTATE_ASI_MASK);
1301 	else
1302 		asi = (inst >> 5) & 0xff;
1303 
1304 	if (floatflg) {
1305 		/* check for ld/st alternate and highest defined V9 asi */
1306 		if ((op3 & 0x30) == 0x30 && asi > ASI_SNFL) {
1307 			sz = extended_asi_size(asi);
1308 		} else {
1309 			switch (op3 & 3) {
1310 			case 0:
1311 				sz = 4;			/* ldf/stf/cas */
1312 				break;
1313 			case 1:
1314 				if (rd == 0)
1315 					sz = 4;		/* ldfsr/stfsr */
1316 				else
1317 					sz = 8;		/* ldxfsr/stxfsr */
1318 				break;
1319 			case 2:
1320 				if (op3 == 0x3e)
1321 					sz = 8;		/* casx */
1322 				else
1323 					sz = 16;	/* ldqf/stqf */
1324 				break;
1325 			case 3:
1326 				sz = 8;			/* lddf/stdf */
1327 				break;
1328 			}
1329 		}
1330 	} else {
1331 		switch (op3 & 0xf) {		/* map size bits to a number */
1332 		case 0:				/* lduw */
1333 		case 4:				/* stw */
1334 		case 8:				/* ldsw */
1335 		case 0xf:			/* swap */
1336 			sz = 4; break;
1337 		case 1:				/* ldub */
1338 		case 5:				/* stb */
1339 		case 9:				/* ldsb */
1340 		case 0xd:			/* ldstub */
1341 			sz = 1; break;
1342 		case 2:				/* lduh */
1343 		case 6:				/* sth */
1344 		case 0xa:			/* ldsh */
1345 			sz = 2; break;
1346 		case 3:				/* ldd */
1347 		case 7:				/* std */
1348 		case 0xb:			/* ldx */
1349 		case 0xe:			/* stx */
1350 			sz = 8; break;
1351 		}
1352 	}
1353 
1354 	if (sz == 0)	/* can't happen? */
1355 		return (0);
1356 	(void) flush_user_windows_to_stack(NULL);
1357 
1358 	if (getreg(rp, rs1, &val, &badaddr))
1359 		return (0);
1360 	addr = (caddr_t)val;
1361 
1362 	/* cas/casx don't use rs2 / simm13 to compute the address */
1363 	if ((op3 & 0x3d) != 0x3c) {
1364 		/* check immediate bit and use immediate field or reg (rs2) */
1365 		if (immflg) {
1366 			int imm;
1367 			imm  = inst & 0x1fff;	/* mask out immediate field */
1368 			imm <<= 19;		/* sign extend it */
1369 			imm >>= 19;
1370 			addr += imm;		/* compute address */
1371 		} else {
1372 			/*
1373 			 * asi's in the 0xCx range are partial store
1374 			 * instructions.  For these, rs2 is a mask, not part of
1375 			 * the address.
1376 			 */
1377 			if (!(floatflg && (asi & 0xf0) == 0xc0)) {
1378 				if (getreg(rp, rs2, &val, &badaddr))
1379 					return (0);
1380 				addr += val;
1381 			}
1382 		}
1383 	}
1384 
1385 	/*
1386 	 * If this is a 32-bit program, chop the address accordingly.
1387 	 */
1388 	if (curproc->p_model == DATAMODEL_ILP32)
1389 		addr = (caddr_t)(caddr32_t)addr;
1390 
1391 	*addrp = addr;
1392 	ASSERT(sz != 0);
1393 	return (sz);
1394 }
1395 
1396 /*
1397  * Fetch an instruction from user-level.
1398  * Deal with watchpoints, if they are in effect.
1399  */
1400 int32_t
1401 fetch_user_instr(caddr_t vaddr)
1402 {
1403 	proc_t *p = curproc;
1404 	int32_t instr;
1405 
1406 	/*
1407 	 * If this is a 32-bit program, chop the address accordingly.
1408 	 */
1409 	if (p->p_model == DATAMODEL_ILP32)
1410 		vaddr = (caddr_t)(caddr32_t)vaddr;
1411 
1412 	if (fuword32_nowatch(vaddr, (uint32_t *)&instr) == -1)
1413 		instr = -1;
1414 
1415 	return (instr);
1416 }
1417