1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 /* common code with bug fixes from original version in trap.c */
27
28 #include <sys/param.h>
29 #include <sys/types.h>
30 #include <sys/systm.h>
31 #include <sys/archsystm.h>
32 #include <sys/vmsystm.h>
33 #include <sys/fpu/fpusystm.h>
34 #include <sys/fpu/fpu_simulator.h>
35 #include <sys/inline.h>
36 #include <sys/debug.h>
37 #include <sys/privregs.h>
38 #include <sys/machpcb.h>
39 #include <sys/simulate.h>
40 #include <sys/proc.h>
41 #include <sys/cmn_err.h>
42 #include <sys/stack.h>
43 #include <sys/watchpoint.h>
44 #include <sys/trap.h>
45 #include <sys/machtrap.h>
46 #include <sys/mman.h>
47 #include <sys/asi.h>
48 #include <sys/copyops.h>
49 #include <vm/as.h>
50 #include <vm/page.h>
51 #include <sys/model.h>
52 #include <vm/seg_vn.h>
53 #include <sys/byteorder.h>
54 #include <sys/time.h>
55
56 #define IS_IBIT_SET(x) (x & 0x2000)
57 #define IS_VIS1(op, op3)(op == 2 && op3 == 0x36)
58 #define IS_FLOAT_QUAD_OP(op, op3)(op == 2 && (op3 == 0x34 || \
59 op3 == 0x35))
60 #define IS_PARTIAL_OR_SHORT_FLOAT_LD_ST(op, op3, asi) \
61 (op == 3 && (op3 == IOP_V8_LDDFA || \
62 op3 == IOP_V8_STDFA) && asi > ASI_SNFL)
63
64 static int aligndebug = 0;
65
66 /*
67 * For the sake of those who must be compatible with unaligned
68 * architectures, users can link their programs to use a
69 * corrective trap handler that will fix unaligned references
70 * a special trap #6 (T_FIX_ALIGN) enables this 'feature'.
71 * Returns 1 for success, 0 for failure.
72 */
73
74 int
do_unaligned(struct regs * rp,caddr_t * badaddr)75 do_unaligned(struct regs *rp, caddr_t *badaddr)
76 {
77 uint_t inst, op3, asi = 0;
78 uint_t rd, rs1, rs2;
79 int sz, nf = 0, ltlend = 0;
80 int floatflg;
81 int fsrflg;
82 int immflg;
83 int lddstdflg;
84 caddr_t addr;
85 uint64_t val;
86 union {
87 uint64_t l[2];
88 uint32_t i[4];
89 uint16_t s[8];
90 uint8_t c[16];
91 } data;
92
93 ASSERT(USERMODE(rp->r_tstate));
94 inst = fetch_user_instr((caddr_t)rp->r_pc);
95
96 op3 = (inst >> 19) & 0x3f;
97 rd = (inst >> 25) & 0x1f;
98 rs1 = (inst >> 14) & 0x1f;
99 rs2 = inst & 0x1f;
100 floatflg = (inst >> 24) & 1;
101 immflg = (inst >> 13) & 1;
102 lddstdflg = fsrflg = 0;
103
104 /* if not load or store do nothing */
105 if ((inst >> 30) != 3)
106 return (0);
107
108 /* if ldstub or swap, do nothing */
109 if ((inst & 0xc1680000) == 0xc0680000)
110 return (0);
111
112 /* if cas/casx, do nothing */
113 if ((inst & 0xc1e00000) == 0xc1e00000)
114 return (0);
115
116 if (floatflg) {
117 switch ((inst >> 19) & 3) { /* map size bits to a number */
118 case 0: sz = 4;
119 break; /* ldf{a}/stf{a} */
120 case 1: fsrflg = 1;
121 if (rd == 0)
122 sz = 4; /* ldfsr/stfsr */
123 else if (rd == 1)
124 sz = 8; /* ldxfsr/stxfsr */
125 else
126 return (SIMU_ILLEGAL);
127 break;
128 case 2: sz = 16;
129 break; /* ldqf{a}/stqf{a} */
130 case 3: sz = 8;
131 break; /* lddf{a}/stdf{a} */
132 }
133 /*
134 * Fix to access extra double register encoding plus
135 * compensate to access the correct fpu_dreg.
136 */
137 if ((sz > 4) && (fsrflg == 0)) {
138 if ((rd & 1) == 1)
139 rd = (rd & 0x1e) | 0x20;
140 rd = rd >> 1;
141 if ((sz == 16) && ((rd & 0x1) != 0))
142 return (SIMU_ILLEGAL);
143 }
144 } else {
145 int sz_bits = (inst >> 19) & 0xf;
146 switch (sz_bits) { /* map size bits to a number */
147 case 0: /* lduw{a} */
148 case 4: /* stw{a} */
149 case 8: /* ldsw{a} */
150 case 0xf: /* swap */
151 sz = 4; break;
152 case 1: /* ldub{a} */
153 case 5: /* stb{a} */
154 case 9: /* ldsb{a} */
155 case 0xd: /* ldstub */
156 sz = 1; break;
157 case 2: /* lduh{a} */
158 case 6: /* sth{a} */
159 case 0xa: /* ldsh{a} */
160 sz = 2; break;
161 case 3: /* ldd{a} */
162 case 7: /* std{a} */
163 lddstdflg = 1;
164 sz = 8; break;
165 case 0xb: /* ldx{a} */
166 case 0xe: /* stx{a} */
167 sz = 8; break;
168 }
169 }
170
171
172 /* only support primary and secondary asi's */
173 if ((op3 >> 4) & 1) {
174 if (immflg) {
175 asi = (uint_t)(rp->r_tstate >> TSTATE_ASI_SHIFT) &
176 TSTATE_ASI_MASK;
177 } else {
178 asi = (inst >> 5) & 0xff;
179 }
180 switch (asi) {
181 case ASI_P:
182 case ASI_S:
183 break;
184 case ASI_PNF:
185 case ASI_SNF:
186 nf = 1;
187 break;
188 case ASI_PL:
189 case ASI_SL:
190 ltlend = 1;
191 break;
192 case ASI_PNFL:
193 case ASI_SNFL:
194 ltlend = 1;
195 nf = 1;
196 break;
197 default:
198 return (0);
199 }
200 /*
201 * Non-faulting stores generate a data_access_exception trap,
202 * according to the Spitfire manual, which should be signaled
203 * as an illegal instruction trap, because it can't be fixed.
204 */
205 if ((nf) && ((op3 == IOP_V8_STQFA) || (op3 == IOP_V8_STDFA)))
206 return (SIMU_ILLEGAL);
207 }
208
209 if (aligndebug) {
210 printf("unaligned access at %p, instruction: 0x%x\n",
211 (void *)rp->r_pc, inst);
212 printf("type %s", (((inst >> 21) & 1) ? "st" : "ld"));
213 if (((inst >> 21) & 1) == 0)
214 printf(" %s", (((inst >> 22) & 1) ?
215 "signed" : "unsigned"));
216 printf(" asi 0x%x size %d immflg %d\n", asi, sz, immflg);
217 printf("rd = %d, op3 = 0x%x, rs1 = %d, rs2 = %d, imm13=0x%x\n",
218 rd, op3, rs1, rs2, (inst & 0x1fff));
219 }
220
221 (void) flush_user_windows_to_stack(NULL);
222 if (getreg(rp, rs1, &val, badaddr))
223 return (SIMU_FAULT);
224 addr = (caddr_t)val; /* convert to 32/64 bit address */
225 if (aligndebug)
226 printf("addr 1 = %p\n", (void *)addr);
227
228 /* check immediate bit and use immediate field or reg (rs2) */
229 if (immflg) {
230 int imm;
231 imm = inst & 0x1fff; /* mask out immediate field */
232 imm <<= 19; /* sign extend it */
233 imm >>= 19;
234 addr += imm; /* compute address */
235 } else {
236 if (getreg(rp, rs2, &val, badaddr))
237 return (SIMU_FAULT);
238 addr += val;
239 }
240
241 /*
242 * If this is a 32-bit program, chop the address accordingly. The
243 * intermediate uintptr_t casts prevent warnings under a certain
244 * compiler, and the temporary 32 bit storage is intended to force
245 * proper code generation and break up what would otherwise be a
246 * quadruple cast.
247 */
248 if (curproc->p_model == DATAMODEL_ILP32) {
249 caddr32_t addr32 = (caddr32_t)(uintptr_t)addr;
250 addr = (caddr_t)(uintptr_t)addr32;
251 }
252
253 if (aligndebug)
254 printf("addr 2 = %p\n", (void *)addr);
255
256 if (addr >= curproc->p_as->a_userlimit) {
257 *badaddr = addr;
258 goto badret;
259 }
260
261 /* a single bit differentiates ld and st */
262 if ((inst >> 21) & 1) { /* store */
263 if (floatflg) {
264 klwp_id_t lwp = ttolwp(curthread);
265 kfpu_t *fp = lwptofpu(lwp);
266 /* Ensure fp has been enabled */
267 if (fpu_exists) {
268 if (!(_fp_read_fprs() & FPRS_FEF))
269 fp_enable();
270 } else {
271 if (!fp->fpu_en)
272 fp_enable();
273 }
274 /* if fpu_exists read fpu reg */
275 if (fpu_exists) {
276 if (fsrflg) {
277 _fp_read_pfsr(&data.l[0]);
278 } else {
279 if (sz == 4) {
280 data.i[0] = 0;
281 _fp_read_pfreg(
282 (unsigned *)&data.i[1], rd);
283 }
284 if (sz >= 8)
285 _fp_read_pdreg(
286 &data.l[0], rd);
287 if (sz == 16)
288 _fp_read_pdreg(
289 &data.l[1], rd+1);
290 }
291 } else {
292 if (fsrflg) {
293 /* Clear reserved bits, set version=7 */
294 fp->fpu_fsr &= ~0x30301000;
295 fp->fpu_fsr |= 0xE0000;
296 data.l[0] = fp->fpu_fsr;
297 } else {
298 if (sz == 4) {
299 data.i[0] = 0;
300 data.i[1] =
301 (unsigned)fp->
302 fpu_fr.fpu_regs[rd];
303 }
304 if (sz >= 8)
305 data.l[0] =
306 fp->fpu_fr.fpu_dregs[rd];
307 if (sz == 16)
308 data.l[1] =
309 fp->fpu_fr.fpu_dregs[rd+1];
310 }
311 }
312 } else {
313 if (lddstdflg) { /* combine the data */
314 if (getreg(rp, rd, &data.l[0], badaddr))
315 return (SIMU_FAULT);
316 if (getreg(rp, rd+1, &data.l[1], badaddr))
317 return (SIMU_FAULT);
318 if (ltlend) {
319 /*
320 * For STD, each 32-bit word is byte-
321 * swapped individually. For
322 * simplicity we don't want to do that
323 * below, so we swap the words now to
324 * get the desired result in the end.
325 */
326 data.i[0] = data.i[3];
327 } else {
328 data.i[0] = data.i[1];
329 data.i[1] = data.i[3];
330 }
331 } else {
332 if (getreg(rp, rd, &data.l[0], badaddr))
333 return (SIMU_FAULT);
334 }
335 }
336
337 if (aligndebug) {
338 if (sz == 16) {
339 printf("data %x %x %x %x\n",
340 data.i[0], data.i[1], data.i[2], data.c[3]);
341 } else {
342 printf("data %x %x %x %x %x %x %x %x\n",
343 data.c[0], data.c[1], data.c[2], data.c[3],
344 data.c[4], data.c[5], data.c[6], data.c[7]);
345 }
346 }
347
348 if (ltlend) {
349 if (sz == 1) {
350 if (xcopyout_little(&data.c[7], addr,
351 (size_t)sz) != 0)
352 goto badret;
353 } else if (sz == 2) {
354 if (xcopyout_little(&data.s[3], addr,
355 (size_t)sz) != 0)
356 goto badret;
357 } else if (sz == 4) {
358 if (xcopyout_little(&data.i[1], addr,
359 (size_t)sz) != 0)
360 goto badret;
361 } else {
362 if (xcopyout_little(&data.l[0], addr,
363 (size_t)sz) != 0)
364 goto badret;
365 }
366 } else {
367 if (sz == 1) {
368 if (copyout(&data.c[7], addr, (size_t)sz) == -1)
369 goto badret;
370 } else if (sz == 2) {
371 if (copyout(&data.s[3], addr, (size_t)sz) == -1)
372 goto badret;
373 } else if (sz == 4) {
374 if (copyout(&data.i[1], addr, (size_t)sz) == -1)
375 goto badret;
376 } else {
377 if (copyout(&data.l[0], addr, (size_t)sz) == -1)
378 goto badret;
379 }
380 }
381 } else { /* load */
382 if (sz == 1) {
383 if (ltlend) {
384 if (xcopyin_little(addr, &data.c[7],
385 (size_t)sz) != 0) {
386 if (nf)
387 data.c[7] = 0;
388 else
389 goto badret;
390 }
391 } else {
392 if (copyin(addr, &data.c[7],
393 (size_t)sz) == -1) {
394 if (nf)
395 data.c[7] = 0;
396 else
397 goto badret;
398 }
399 }
400 /* if signed and the sign bit is set extend it */
401 if (((inst >> 22) & 1) && ((data.c[7] >> 7) & 1)) {
402 data.i[0] = (uint_t)-1; /* extend sign bit */
403 data.s[2] = (ushort_t)-1;
404 data.c[6] = (uchar_t)-1;
405 } else {
406 data.i[0] = 0; /* clear upper 32+24 bits */
407 data.s[2] = 0;
408 data.c[6] = 0;
409 }
410 } else if (sz == 2) {
411 if (ltlend) {
412 if (xcopyin_little(addr, &data.s[3],
413 (size_t)sz) != 0) {
414 if (nf)
415 data.s[3] = 0;
416 else
417 goto badret;
418 }
419 } else {
420 if (copyin(addr, &data.s[3],
421 (size_t)sz) == -1) {
422 if (nf)
423 data.s[3] = 0;
424 else
425 goto badret;
426 }
427 }
428 /* if signed and the sign bit is set extend it */
429 if (((inst >> 22) & 1) && ((data.s[3] >> 15) & 1)) {
430 data.i[0] = (uint_t)-1; /* extend sign bit */
431 data.s[2] = (ushort_t)-1;
432 } else {
433 data.i[0] = 0; /* clear upper 32+16 bits */
434 data.s[2] = 0;
435 }
436 } else if (sz == 4) {
437 if (ltlend) {
438 if (xcopyin_little(addr, &data.i[1],
439 (size_t)sz) != 0) {
440 if (!nf)
441 goto badret;
442 data.i[1] = 0;
443 }
444 } else {
445 if (copyin(addr, &data.i[1],
446 (size_t)sz) == -1) {
447 if (!nf)
448 goto badret;
449 data.i[1] = 0;
450 }
451 }
452 /* if signed and the sign bit is set extend it */
453 if (((inst >> 22) & 1) && ((data.i[1] >> 31) & 1)) {
454 data.i[0] = (uint_t)-1; /* extend sign bit */
455 } else {
456 data.i[0] = 0; /* clear upper 32 bits */
457 }
458 } else {
459 if (ltlend) {
460 if (xcopyin_little(addr, &data.l[0],
461 (size_t)sz) != 0) {
462 if (!nf)
463 goto badret;
464 data.l[0] = 0;
465 }
466 } else {
467 if (copyin(addr, &data.l[0],
468 (size_t)sz) == -1) {
469 if (!nf)
470 goto badret;
471 data.l[0] = 0;
472 }
473 }
474 }
475
476 if (aligndebug) {
477 if (sz == 16) {
478 printf("data %x %x %x %x\n",
479 data.i[0], data.i[1], data.i[2], data.c[3]);
480 } else {
481 printf("data %x %x %x %x %x %x %x %x\n",
482 data.c[0], data.c[1], data.c[2], data.c[3],
483 data.c[4], data.c[5], data.c[6], data.c[7]);
484 }
485 }
486
487 if (floatflg) { /* if fpu_exists write fpu reg */
488 klwp_id_t lwp = ttolwp(curthread);
489 kfpu_t *fp = lwptofpu(lwp);
490 /* Ensure fp has been enabled */
491 if (fpu_exists) {
492 if (!(_fp_read_fprs() & FPRS_FEF))
493 fp_enable();
494 } else {
495 if (!fp->fpu_en)
496 fp_enable();
497 }
498 /* if fpu_exists read fpu reg */
499 if (fpu_exists) {
500 if (fsrflg) {
501 _fp_write_pfsr(&data.l[0]);
502 } else {
503 if (sz == 4)
504 _fp_write_pfreg(
505 (unsigned *)&data.i[1], rd);
506 if (sz >= 8)
507 _fp_write_pdreg(
508 &data.l[0], rd);
509 if (sz == 16)
510 _fp_write_pdreg(
511 &data.l[1], rd+1);
512 }
513 } else {
514 if (fsrflg) {
515 fp->fpu_fsr = data.l[0];
516 } else {
517 if (sz == 4)
518 fp->fpu_fr.fpu_regs[rd] =
519 (unsigned)data.i[1];
520 if (sz >= 8)
521 fp->fpu_fr.fpu_dregs[rd] =
522 data.l[0];
523 if (sz == 16)
524 fp->fpu_fr.fpu_dregs[rd+1] =
525 data.l[1];
526 }
527 }
528 } else {
529 if (lddstdflg) { /* split the data */
530 if (ltlend) {
531 /*
532 * For LDD, each 32-bit word is byte-
533 * swapped individually. We didn't
534 * do that above, but this will give
535 * us the desired result.
536 */
537 data.i[3] = data.i[0];
538 } else {
539 data.i[3] = data.i[1];
540 data.i[1] = data.i[0];
541 }
542 data.i[0] = 0;
543 data.i[2] = 0;
544 if (putreg(&data.l[0], rp, rd, badaddr) == -1)
545 goto badret;
546 if (putreg(&data.l[1], rp, rd+1, badaddr) == -1)
547 goto badret;
548 } else {
549 if (putreg(&data.l[0], rp, rd, badaddr) == -1)
550 goto badret;
551 }
552 }
553 }
554 return (SIMU_SUCCESS);
555 badret:
556 return (SIMU_FAULT);
557 }
558
559
560 int
simulate_lddstd(struct regs * rp,caddr_t * badaddr)561 simulate_lddstd(struct regs *rp, caddr_t *badaddr)
562 {
563 uint_t inst, op3, asi = 0;
564 uint_t rd, rs1, rs2;
565 int nf = 0, ltlend = 0, usermode;
566 int immflg;
567 uint64_t reven;
568 uint64_t rodd;
569 caddr_t addr;
570 uint64_t val;
571 uint64_t data;
572
573 usermode = USERMODE(rp->r_tstate);
574
575 if (usermode)
576 inst = fetch_user_instr((caddr_t)rp->r_pc);
577 else
578 inst = *(uint_t *)rp->r_pc;
579
580 op3 = (inst >> 19) & 0x3f;
581 rd = (inst >> 25) & 0x1f;
582 rs1 = (inst >> 14) & 0x1f;
583 rs2 = inst & 0x1f;
584 immflg = (inst >> 13) & 1;
585
586 if (USERMODE(rp->r_tstate))
587 (void) flush_user_windows_to_stack(NULL);
588 else
589 flush_windows();
590
591 if ((op3 >> 4) & 1) { /* is this LDDA/STDA? */
592 if (immflg) {
593 asi = (uint_t)(rp->r_tstate >> TSTATE_ASI_SHIFT) &
594 TSTATE_ASI_MASK;
595 } else {
596 asi = (inst >> 5) & 0xff;
597 }
598 switch (asi) {
599 case ASI_P:
600 case ASI_S:
601 break;
602 case ASI_PNF:
603 case ASI_SNF:
604 nf = 1;
605 break;
606 case ASI_PL:
607 case ASI_SL:
608 ltlend = 1;
609 break;
610 case ASI_PNFL:
611 case ASI_SNFL:
612 ltlend = 1;
613 nf = 1;
614 break;
615 case ASI_AIUP:
616 case ASI_AIUS:
617 usermode = 1;
618 break;
619 case ASI_AIUPL:
620 case ASI_AIUSL:
621 usermode = 1;
622 ltlend = 1;
623 break;
624 default:
625 return (SIMU_ILLEGAL);
626 }
627 }
628
629 if (getreg(rp, rs1, &val, badaddr))
630 return (SIMU_FAULT);
631 addr = (caddr_t)val; /* convert to 32/64 bit address */
632
633 /* check immediate bit and use immediate field or reg (rs2) */
634 if (immflg) {
635 int imm;
636 imm = inst & 0x1fff; /* mask out immediate field */
637 imm <<= 19; /* sign extend it */
638 imm >>= 19;
639 addr += imm; /* compute address */
640 } else {
641 if (getreg(rp, rs2, &val, badaddr))
642 return (SIMU_FAULT);
643 addr += val;
644 }
645
646 /*
647 * T_UNIMP_LDD and T_UNIMP_STD are higher priority than
648 * T_ALIGNMENT. So we have to make sure that the address is
649 * kosher before trying to use it, because the hardware hasn't
650 * checked it for us yet.
651 */
652 if (((uintptr_t)addr & 0x7) != 0) {
653 if (curproc->p_fixalignment)
654 return (do_unaligned(rp, badaddr));
655 else
656 return (SIMU_UNALIGN);
657 }
658
659 /*
660 * If this is a 32-bit program, chop the address accordingly. The
661 * intermediate uintptr_t casts prevent warnings under a certain
662 * compiler, and the temporary 32 bit storage is intended to force
663 * proper code generation and break up what would otherwise be a
664 * quadruple cast.
665 */
666 if (curproc->p_model == DATAMODEL_ILP32 && usermode) {
667 caddr32_t addr32 = (caddr32_t)(uintptr_t)addr;
668 addr = (caddr_t)(uintptr_t)addr32;
669 }
670
671 if ((inst >> 21) & 1) { /* store */
672 if (getreg(rp, rd, &reven, badaddr))
673 return (SIMU_FAULT);
674 if (getreg(rp, rd+1, &rodd, badaddr))
675 return (SIMU_FAULT);
676 if (ltlend) {
677 reven = BSWAP_32(reven);
678 rodd = BSWAP_32(rodd);
679 }
680 data = (reven << 32) | rodd;
681 if (usermode) {
682 if (suword64_nowatch(addr, data) == -1)
683 return (SIMU_FAULT);
684 } else {
685 *(uint64_t *)addr = data;
686 }
687 } else { /* load */
688 if (usermode) {
689 if (fuword64_nowatch(addr, &data)) {
690 if (nf)
691 data = 0;
692 else
693 return (SIMU_FAULT);
694 }
695 } else
696 data = *(uint64_t *)addr;
697
698 reven = (data >> 32);
699 rodd = (uint64_t)(uint32_t)data;
700 if (ltlend) {
701 reven = BSWAP_32(reven);
702 rodd = BSWAP_32(rodd);
703 }
704
705 if (putreg(&reven, rp, rd, badaddr) == -1)
706 return (SIMU_FAULT);
707 if (putreg(&rodd, rp, rd+1, badaddr) == -1)
708 return (SIMU_FAULT);
709 }
710 return (SIMU_SUCCESS);
711 }
712
713
714 /*
715 * simulate popc
716 */
717 static int
simulate_popc(struct regs * rp,caddr_t * badaddr,uint_t inst)718 simulate_popc(struct regs *rp, caddr_t *badaddr, uint_t inst)
719 {
720 uint_t rd, rs2, rs1;
721 uint_t immflg;
722 uint64_t val, cnt = 0;
723
724 rd = (inst >> 25) & 0x1f;
725 rs1 = (inst >> 14) & 0x1f;
726 rs2 = inst & 0x1f;
727 immflg = (inst >> 13) & 1;
728
729 if (rs1 > 0)
730 return (SIMU_ILLEGAL);
731
732 (void) flush_user_windows_to_stack(NULL);
733
734 /* check immediate bit and use immediate field or reg (rs2) */
735 if (immflg) {
736 int64_t imm;
737 imm = inst & 0x1fff; /* mask out immediate field */
738 imm <<= 51; /* sign extend it */
739 imm >>= 51;
740 if (imm != 0) {
741 for (cnt = 0; imm != 0; imm &= imm-1)
742 cnt++;
743 }
744 } else {
745 if (getreg(rp, rs2, &val, badaddr))
746 return (SIMU_FAULT);
747 if (val != 0) {
748 for (cnt = 0; val != 0; val &= val-1)
749 cnt++;
750 }
751 }
752
753 if (putreg(&cnt, rp, rd, badaddr) == -1)
754 return (SIMU_FAULT);
755
756 return (SIMU_SUCCESS);
757 }
758
759 /*
760 * simulate mulscc
761 */
762 static int
simulate_mulscc(struct regs * rp,caddr_t * badaddr,uint_t inst)763 simulate_mulscc(struct regs *rp, caddr_t *badaddr, uint_t inst)
764 {
765 uint32_t s1, s2;
766 uint32_t c, d, v;
767 uint_t rd, rs1;
768 int64_t d64;
769 uint64_t ud64;
770 uint64_t drs1;
771
772 (void) flush_user_windows_to_stack(NULL);
773
774 if ((inst >> 13) & 1) { /* immediate */
775 d64 = inst & 0x1fff;
776 d64 <<= 51; /* sign extend it */
777 d64 >>= 51;
778 } else {
779 uint_t rs2;
780 uint64_t drs2;
781
782 if (inst & 0x1fe0) {
783 return (SIMU_ILLEGAL);
784 }
785 rs2 = inst & 0x1f;
786 if (getreg(rp, rs2, &drs2, badaddr)) {
787 return (SIMU_FAULT);
788 }
789 d64 = (int64_t)drs2;
790 }
791
792 rs1 = (inst >> 14) & 0x1f;
793 if (getreg(rp, rs1, &drs1, badaddr)) {
794 return (SIMU_FAULT);
795 }
796 /* icc.n xor icc.v */
797 s1 = ((rp->r_tstate & TSTATE_IN) >> (TSTATE_CCR_SHIFT + 3)) ^
798 ((rp->r_tstate & TSTATE_IV) >> (TSTATE_CCR_SHIFT + 1));
799 s1 = (s1 << 31) | (((uint32_t)drs1) >> 1);
800
801 if (rp->r_y & 1) {
802 s2 = (uint32_t)d64;
803 } else {
804 s2 = 0;
805 }
806 d = s1 + s2;
807
808 ud64 = (uint64_t)d;
809
810 /* set the icc flags */
811 v = (s1 & s2 & ~d) | (~s1 & ~s2 & d);
812 c = (s1 & s2) | (~d & (s1 | s2));
813 rp->r_tstate &= ~TSTATE_ICC;
814 rp->r_tstate |= (uint64_t)((c >> 31) & 1) << (TSTATE_CCR_SHIFT + 0);
815 rp->r_tstate |= (uint64_t)((v >> 31) & 1) << (TSTATE_CCR_SHIFT + 1);
816 rp->r_tstate |= (uint64_t)(d ? 0 : 1) << (TSTATE_CCR_SHIFT + 2);
817 rp->r_tstate |= (uint64_t)((d >> 31) & 1) << (TSTATE_CCR_SHIFT + 3);
818
819 if (rp->r_tstate & TSTATE_IC) {
820 ud64 |= (1ULL << 32);
821 }
822
823 /* set the xcc flags */
824 rp->r_tstate &= ~TSTATE_XCC;
825 if (ud64 == 0) {
826 rp->r_tstate |= TSTATE_XZ;
827 }
828
829 rd = (inst >> 25) & 0x1f;
830 if (putreg(&ud64, rp, rd, badaddr)) {
831 return (SIMU_FAULT);
832 }
833
834 d64 = (drs1 << 32) | (uint32_t)rp->r_y;
835 d64 >>= 1;
836 rp->r_y = (uint32_t)d64;
837
838 return (SIMU_SUCCESS);
839 }
840
841 /*
842 * simulate unimplemented instructions (popc, ldqf{a}, stqf{a})
843 */
844 int
simulate_unimp(struct regs * rp,caddr_t * badaddr)845 simulate_unimp(struct regs *rp, caddr_t *badaddr)
846 {
847 uint_t inst, optype, op3, asi;
848 uint_t rs1, rd;
849 uint_t ignor, i;
850 machpcb_t *mpcb = lwptompcb(ttolwp(curthread));
851 int nomatch = 0;
852 caddr_t addr = (caddr_t)rp->r_pc;
853 struct as *as;
854 caddr_t ka;
855 pfn_t pfnum;
856 page_t *pp;
857 proc_t *p = ttoproc(curthread);
858 struct seg *mapseg;
859 struct segvn_data *svd;
860
861 ASSERT(USERMODE(rp->r_tstate));
862 inst = fetch_user_instr(addr);
863 if (inst == (uint_t)-1) {
864 mpcb->mpcb_illexcaddr = addr;
865 mpcb->mpcb_illexcinsn = (uint32_t)-1;
866 return (SIMU_ILLEGAL);
867 }
868
869 /*
870 * When fixing dirty v8 instructions there's a race if two processors
871 * are executing the dirty executable at the same time. If one
872 * cleans the instruction as the other is executing it the second
873 * processor will see a clean instruction when it comes through this
874 * code and will return SIMU_ILLEGAL. To work around the race
875 * this code will keep track of the last illegal instruction seen
876 * by each lwp and will only take action if the illegal instruction
877 * is repeatable.
878 */
879 if (addr != mpcb->mpcb_illexcaddr ||
880 inst != mpcb->mpcb_illexcinsn)
881 nomatch = 1;
882 mpcb->mpcb_illexcaddr = addr;
883 mpcb->mpcb_illexcinsn = inst;
884
885 /* instruction fields */
886 i = (inst >> 13) & 0x1;
887 rd = (inst >> 25) & 0x1f;
888 optype = (inst >> 30) & 0x3;
889 op3 = (inst >> 19) & 0x3f;
890 ignor = (inst >> 5) & 0xff;
891 if (IS_IBIT_SET(inst)) {
892 asi = (uint32_t)((rp->r_tstate >> TSTATE_ASI_SHIFT) &
893 TSTATE_ASI_MASK);
894 } else {
895 asi = ignor;
896 }
897
898 if (IS_VIS1(optype, op3) ||
899 IS_PARTIAL_OR_SHORT_FLOAT_LD_ST(optype, op3, asi) ||
900 IS_FLOAT_QUAD_OP(optype, op3)) {
901 klwp_t *lwp = ttolwp(curthread);
902 kfpu_t *fp = lwptofpu(lwp);
903 if (fpu_exists) {
904 if (!(_fp_read_fprs() & FPRS_FEF))
905 fp_enable();
906 _fp_read_pfsr(&fp->fpu_fsr);
907 } else {
908 if (!fp->fpu_en)
909 fp_enable();
910 }
911 fp_precise(rp);
912 return (SIMU_RETRY);
913 }
914
915 if (optype == 2 && op3 == IOP_V8_POPC) {
916 return (simulate_popc(rp, badaddr, inst));
917 } else if (optype == 3 && op3 == IOP_V8_POPC) {
918 return (SIMU_ILLEGAL);
919 } else if (optype == OP_V8_ARITH && op3 == IOP_V8_MULScc) {
920 return (simulate_mulscc(rp, badaddr, inst));
921 }
922
923 if (optype == OP_V8_LDSTR) {
924 if (op3 == IOP_V8_LDQF || op3 == IOP_V8_LDQFA ||
925 op3 == IOP_V8_STQF || op3 == IOP_V8_STQFA)
926 return (do_unaligned(rp, badaddr));
927 }
928
929 /* This is a new instruction so illexccnt should also be set. */
930 if (nomatch) {
931 mpcb->mpcb_illexccnt = 0;
932 return (SIMU_RETRY);
933 }
934
935 /*
936 * In order to keep us from entering into an infinite loop while
937 * attempting to clean up faulty instructions, we will return
938 * SIMU_ILLEGAL once we've cleaned up the instruction as much
939 * as we can, and still end up here.
940 */
941 if (mpcb->mpcb_illexccnt >= 3)
942 return (SIMU_ILLEGAL);
943
944 mpcb->mpcb_illexccnt += 1;
945
946 /*
947 * The rest of the code handles v8 binaries with instructions
948 * that have dirty (non-zero) bits in reserved or 'ignored'
949 * fields; these will cause core dumps on v9 machines.
950 *
951 * We only clean dirty instructions in 32-bit programs (ie, v8)
952 * running on SPARCv9 processors. True v9 programs are forced
953 * to use the instruction set as intended.
954 */
955 if (lwp_getdatamodel(curthread->t_lwp) != DATAMODEL_ILP32)
956 return (SIMU_ILLEGAL);
957 switch (optype) {
958 case OP_V8_BRANCH:
959 case OP_V8_CALL:
960 return (SIMU_ILLEGAL); /* these don't have ignored fields */
961 /*NOTREACHED*/
962 case OP_V8_ARITH:
963 switch (op3) {
964 case IOP_V8_RETT:
965 if (rd == 0 && !(i == 0 && ignor))
966 return (SIMU_ILLEGAL);
967 if (rd)
968 inst &= ~(0x1f << 25);
969 if (i == 0 && ignor)
970 inst &= ~(0xff << 5);
971 break;
972 case IOP_V8_TCC:
973 if (i == 0 && ignor != 0) {
974 inst &= ~(0xff << 5);
975 } else if (i == 1 && (((inst >> 7) & 0x3f) != 0)) {
976 inst &= ~(0x3f << 7);
977 } else {
978 return (SIMU_ILLEGAL);
979 }
980 break;
981 case IOP_V8_JMPL:
982 case IOP_V8_RESTORE:
983 case IOP_V8_SAVE:
984 if ((op3 == IOP_V8_RETT && rd) ||
985 (i == 0 && ignor)) {
986 inst &= ~(0xff << 5);
987 } else {
988 return (SIMU_ILLEGAL);
989 }
990 break;
991 case IOP_V8_FCMP:
992 if (rd == 0)
993 return (SIMU_ILLEGAL);
994 inst &= ~(0x1f << 25);
995 break;
996 case IOP_V8_RDASR:
997 rs1 = ((inst >> 14) & 0x1f);
998 if (rs1 == 1 || (rs1 >= 7 && rs1 <= 14)) {
999 /*
1000 * The instruction specifies an invalid
1001 * state register - better bail out than
1002 * "fix" it when we're not sure what was
1003 * intended.
1004 */
1005 return (SIMU_ILLEGAL);
1006 }
1007 /*
1008 * Note: this case includes the 'stbar'
1009 * instruction (rs1 == 15 && i == 0).
1010 */
1011 if ((ignor = (inst & 0x3fff)) != 0)
1012 inst &= ~(0x3fff);
1013 break;
1014 case IOP_V8_SRA:
1015 case IOP_V8_SRL:
1016 case IOP_V8_SLL:
1017 if (ignor == 0)
1018 return (SIMU_ILLEGAL);
1019 inst &= ~(0xff << 5);
1020 break;
1021 case IOP_V8_ADD:
1022 case IOP_V8_AND:
1023 case IOP_V8_OR:
1024 case IOP_V8_XOR:
1025 case IOP_V8_SUB:
1026 case IOP_V8_ANDN:
1027 case IOP_V8_ORN:
1028 case IOP_V8_XNOR:
1029 case IOP_V8_ADDC:
1030 case IOP_V8_UMUL:
1031 case IOP_V8_SMUL:
1032 case IOP_V8_SUBC:
1033 case IOP_V8_UDIV:
1034 case IOP_V8_SDIV:
1035 case IOP_V8_ADDcc:
1036 case IOP_V8_ANDcc:
1037 case IOP_V8_ORcc:
1038 case IOP_V8_XORcc:
1039 case IOP_V8_SUBcc:
1040 case IOP_V8_ANDNcc:
1041 case IOP_V8_ORNcc:
1042 case IOP_V8_XNORcc:
1043 case IOP_V8_ADDCcc:
1044 case IOP_V8_UMULcc:
1045 case IOP_V8_SMULcc:
1046 case IOP_V8_SUBCcc:
1047 case IOP_V8_UDIVcc:
1048 case IOP_V8_SDIVcc:
1049 case IOP_V8_TADDcc:
1050 case IOP_V8_TSUBcc:
1051 case IOP_V8_TADDccTV:
1052 case IOP_V8_TSUBccTV:
1053 case IOP_V8_MULScc:
1054 case IOP_V8_WRASR:
1055 case IOP_V8_FLUSH:
1056 if (i != 0 || ignor == 0)
1057 return (SIMU_ILLEGAL);
1058 inst &= ~(0xff << 5);
1059 break;
1060 default:
1061 return (SIMU_ILLEGAL);
1062 }
1063 break;
1064 case OP_V8_LDSTR:
1065 switch (op3) {
1066 case IOP_V8_STFSR:
1067 case IOP_V8_LDFSR:
1068 if (rd == 0 && !(i == 0 && ignor))
1069 return (SIMU_ILLEGAL);
1070 if (rd)
1071 inst &= ~(0x1f << 25);
1072 if (i == 0 && ignor)
1073 inst &= ~(0xff << 5);
1074 break;
1075 default:
1076 if (optype == OP_V8_LDSTR && !IS_LDST_ALT(op3) &&
1077 i == 0 && ignor)
1078 inst &= ~(0xff << 5);
1079 else
1080 return (SIMU_ILLEGAL);
1081 break;
1082 }
1083 break;
1084 default:
1085 return (SIMU_ILLEGAL);
1086 }
1087
1088 as = p->p_as;
1089
1090 AS_LOCK_ENTER(as, RW_READER);
1091 mapseg = as_findseg(as, (caddr_t)rp->r_pc, 0);
1092 ASSERT(mapseg != NULL);
1093 svd = (struct segvn_data *)mapseg->s_data;
1094
1095 /*
1096 * We only create COW page for MAP_PRIVATE mappings.
1097 */
1098 SEGVN_LOCK_ENTER(as, &svd->lock, RW_READER);
1099 if ((svd->type & MAP_TYPE) & MAP_SHARED) {
1100 SEGVN_LOCK_EXIT(as, &svd->lock);
1101 AS_LOCK_EXIT(as);
1102 return (SIMU_ILLEGAL);
1103 }
1104 SEGVN_LOCK_EXIT(as, &svd->lock);
1105 AS_LOCK_EXIT(as);
1106
1107 /*
1108 * A "flush" instruction using the user PC's vaddr will not work
1109 * here, at least on Spitfire. Instead we create a temporary kernel
1110 * mapping to the user's text page, then modify and flush that.
1111 * Break COW by locking user page.
1112 */
1113 if (as_fault(as->a_hat, as, (caddr_t)(rp->r_pc & PAGEMASK), PAGESIZE,
1114 F_SOFTLOCK, S_READ))
1115 return (SIMU_FAULT);
1116
1117 AS_LOCK_ENTER(as, RW_READER);
1118 pfnum = hat_getpfnum(as->a_hat, (caddr_t)rp->r_pc);
1119 AS_LOCK_EXIT(as);
1120 if (pf_is_memory(pfnum)) {
1121 pp = page_numtopp_nolock(pfnum);
1122 ASSERT(pp == NULL || PAGE_LOCKED(pp));
1123 } else {
1124 (void) as_fault(as->a_hat, as, (caddr_t)(rp->r_pc & PAGEMASK),
1125 PAGESIZE, F_SOFTUNLOCK, S_READ);
1126 return (SIMU_FAULT);
1127 }
1128
1129 AS_LOCK_ENTER(as, RW_READER);
1130 ka = ppmapin(pp, PROT_READ|PROT_WRITE, (caddr_t)rp->r_pc);
1131 *(uint_t *)(ka + (uintptr_t)(rp->r_pc % PAGESIZE)) = inst;
1132 doflush(ka + (uintptr_t)(rp->r_pc % PAGESIZE));
1133 ppmapout(ka);
1134 AS_LOCK_EXIT(as);
1135
1136 (void) as_fault(as->a_hat, as, (caddr_t)(rp->r_pc & PAGEMASK),
1137 PAGESIZE, F_SOFTUNLOCK, S_READ);
1138 return (SIMU_RETRY);
1139 }
1140
1141 /*
1142 * Simulate a "rd %tick" or "rd %stick" (%asr24) instruction.
1143 */
1144 int
simulate_rdtick(struct regs * rp)1145 simulate_rdtick(struct regs *rp)
1146 {
1147 uint_t inst, op, op3, rd, rs1, i;
1148 caddr_t badaddr;
1149
1150 inst = fetch_user_instr((caddr_t)rp->r_pc);
1151 op = (inst >> 30) & 0x3;
1152 rd = (inst >> 25) & 0x1F;
1153 op3 = (inst >> 19) & 0x3F;
1154 i = (inst >> 13) & 0x1;
1155
1156 /*
1157 * Make sure this is either a %tick read (rs1 == 0x4) or
1158 * a %stick read (rs1 == 0x18) instruction.
1159 */
1160 if (op == 2 && op3 == 0x28 && i == 0) {
1161 rs1 = (inst >> 14) & 0x1F;
1162
1163 if (rs1 == 0x4) {
1164 uint64_t tick;
1165 (void) flush_user_windows_to_stack(NULL);
1166 tick = gettick_counter();
1167 if (putreg(&tick, rp, rd, &badaddr) == 0)
1168 return (SIMU_SUCCESS);
1169 } else if (rs1 == 0x18) {
1170 uint64_t stick;
1171 (void) flush_user_windows_to_stack(NULL);
1172 stick = gethrtime_unscaled();
1173 if (putreg(&stick, rp, rd, &badaddr) == 0)
1174 return (SIMU_SUCCESS);
1175 }
1176 }
1177
1178 return (SIMU_FAULT);
1179 }
1180
1181 /*
1182 * Get the value of a register for instruction simulation
1183 * by using the regs or window structure pointers.
1184 * Return 0 for success, and -1 for failure. If there is a failure,
1185 * save the faulting address using badaddr pointer.
1186 * We have 64 bit globals and outs, and 32 or 64 bit ins and locals.
1187 * Don't truncate globals/outs for 32 bit programs, for v8+ support.
1188 */
1189 int
getreg(struct regs * rp,uint_t reg,uint64_t * val,caddr_t * badaddr)1190 getreg(struct regs *rp, uint_t reg, uint64_t *val, caddr_t *badaddr)
1191 {
1192 uint64_t *rgs, *sp;
1193 int rv = 0;
1194
1195 rgs = (uint64_t *)&rp->r_ps; /* globals and outs */
1196 sp = (uint64_t *)rp->r_sp; /* ins and locals */
1197 if (reg == 0) {
1198 *val = 0;
1199 } else if (reg < 16) {
1200 *val = rgs[reg];
1201 } else if (IS_V9STACK(sp)) {
1202 uint64_t *rw = (uint64_t *)((uintptr_t)sp + V9BIAS64);
1203 uint64_t *addr = (uint64_t *)&rw[reg - 16];
1204 uint64_t res;
1205
1206 if (USERMODE(rp->r_tstate)) {
1207 if (fuword64_nowatch(addr, &res) == -1) {
1208 *badaddr = (caddr_t)addr;
1209 rv = -1;
1210 }
1211 } else {
1212 res = *addr;
1213 }
1214 *val = res;
1215 } else {
1216 caddr32_t sp32 = (caddr32_t)(uintptr_t)sp;
1217 uint32_t *rw = (uint32_t *)(uintptr_t)sp32;
1218 uint32_t *addr = (uint32_t *)&rw[reg - 16];
1219 uint32_t res;
1220
1221 if (USERMODE(rp->r_tstate)) {
1222 if (fuword32_nowatch(addr, &res) == -1) {
1223 *badaddr = (caddr_t)addr;
1224 rv = -1;
1225 }
1226 } else {
1227 res = *addr;
1228 }
1229 *val = (uint64_t)res;
1230 }
1231 return (rv);
1232 }
1233
1234 /*
1235 * Set the value of a register after instruction simulation
1236 * by using the regs or window structure pointers.
1237 * Return 0 for succes -1 failure.
1238 * save the faulting address using badaddr pointer.
1239 * We have 64 bit globals and outs, and 32 or 64 bit ins and locals.
1240 * Don't truncate globals/outs for 32 bit programs, for v8+ support.
1241 */
1242 int
putreg(uint64_t * data,struct regs * rp,uint_t reg,caddr_t * badaddr)1243 putreg(uint64_t *data, struct regs *rp, uint_t reg, caddr_t *badaddr)
1244 {
1245 uint64_t *rgs, *sp;
1246 int rv = 0;
1247
1248 rgs = (uint64_t *)&rp->r_ps; /* globals and outs */
1249 sp = (uint64_t *)rp->r_sp; /* ins and locals */
1250 if (reg == 0) {
1251 return (0);
1252 } else if (reg < 16) {
1253 rgs[reg] = *data;
1254 } else if (IS_V9STACK(sp)) {
1255 uint64_t *rw = (uint64_t *)((uintptr_t)sp + V9BIAS64);
1256 uint64_t *addr = (uint64_t *)&rw[reg - 16];
1257 uint64_t res;
1258
1259 if (USERMODE(rp->r_tstate)) {
1260 struct machpcb *mpcb = lwptompcb(curthread->t_lwp);
1261
1262 res = *data;
1263 if (suword64_nowatch(addr, res) != 0) {
1264 *badaddr = (caddr_t)addr;
1265 rv = -1;
1266 }
1267 /*
1268 * We have changed a local or in register;
1269 * nuke the watchpoint return windows.
1270 */
1271 mpcb->mpcb_rsp[0] = NULL;
1272 mpcb->mpcb_rsp[1] = NULL;
1273 } else {
1274 res = *data;
1275 *addr = res;
1276 }
1277 } else {
1278 caddr32_t sp32 = (caddr32_t)(uintptr_t)sp;
1279 uint32_t *rw = (uint32_t *)(uintptr_t)sp32;
1280 uint32_t *addr = (uint32_t *)&rw[reg - 16];
1281 uint32_t res;
1282
1283 if (USERMODE(rp->r_tstate)) {
1284 struct machpcb *mpcb = lwptompcb(curthread->t_lwp);
1285
1286 res = (uint_t)*data;
1287 if (suword32_nowatch(addr, res) != 0) {
1288 *badaddr = (caddr_t)addr;
1289 rv = -1;
1290 }
1291 /*
1292 * We have changed a local or in register;
1293 * nuke the watchpoint return windows.
1294 */
1295 mpcb->mpcb_rsp[0] = NULL;
1296 mpcb->mpcb_rsp[1] = NULL;
1297
1298 } else {
1299 res = (uint_t)*data;
1300 *addr = res;
1301 }
1302 }
1303 return (rv);
1304 }
1305
1306 /*
1307 * Calculate a memory reference address from instruction
1308 * operands, used to return the address of a fault, instead
1309 * of the instruction when an error occurs. This is code that is
1310 * common with most of the routines that simulate instructions.
1311 */
1312 int
calc_memaddr(struct regs * rp,caddr_t * badaddr)1313 calc_memaddr(struct regs *rp, caddr_t *badaddr)
1314 {
1315 uint_t inst;
1316 uint_t rd, rs1, rs2;
1317 int sz;
1318 int immflg;
1319 int floatflg;
1320 caddr_t addr;
1321 uint64_t val;
1322
1323 if (USERMODE(rp->r_tstate))
1324 inst = fetch_user_instr((caddr_t)rp->r_pc);
1325 else
1326 inst = *(uint_t *)rp->r_pc;
1327
1328 rd = (inst >> 25) & 0x1f;
1329 rs1 = (inst >> 14) & 0x1f;
1330 rs2 = inst & 0x1f;
1331 floatflg = (inst >> 24) & 1;
1332 immflg = (inst >> 13) & 1;
1333
1334 if (floatflg) {
1335 switch ((inst >> 19) & 3) { /* map size bits to a number */
1336 case 0: sz = 4; break; /* ldf/stf */
1337 case 1: return (0); /* ld[x]fsr/st[x]fsr */
1338 case 2: sz = 16; break; /* ldqf/stqf */
1339 case 3: sz = 8; break; /* lddf/stdf */
1340 }
1341 /*
1342 * Fix to access extra double register encoding plus
1343 * compensate to access the correct fpu_dreg.
1344 */
1345 if (sz > 4) {
1346 if ((rd & 1) == 1)
1347 rd = (rd & 0x1e) | 0x20;
1348 rd = rd >> 1;
1349 }
1350 } else {
1351 switch ((inst >> 19) & 0xf) { /* map size bits to a number */
1352 case 0: /* lduw */
1353 case 4: /* stw */
1354 case 8: /* ldsw */
1355 case 0xf: /* swap */
1356 sz = 4; break;
1357 case 1: /* ldub */
1358 case 5: /* stb */
1359 case 9: /* ldsb */
1360 case 0xd: /* ldstub */
1361 sz = 1; break;
1362 case 2: /* lduh */
1363 case 6: /* sth */
1364 case 0xa: /* ldsh */
1365 sz = 2; break;
1366 case 3: /* ldd */
1367 case 7: /* std */
1368 case 0xb: /* ldx */
1369 case 0xe: /* stx */
1370 sz = 8; break;
1371 }
1372 }
1373
1374 if (USERMODE(rp->r_tstate))
1375 (void) flush_user_windows_to_stack(NULL);
1376 else
1377 flush_windows();
1378
1379 if (getreg(rp, rs1, &val, badaddr))
1380 return (SIMU_FAULT);
1381 addr = (caddr_t)val;
1382
1383 /* check immediate bit and use immediate field or reg (rs2) */
1384 if (immflg) {
1385 int imm;
1386 imm = inst & 0x1fff; /* mask out immediate field */
1387 imm <<= 19; /* sign extend it */
1388 imm >>= 19;
1389 addr += imm; /* compute address */
1390 } else {
1391 if (getreg(rp, rs2, &val, badaddr))
1392 return (SIMU_FAULT);
1393 addr += val;
1394 }
1395
1396 /*
1397 * If this is a 32-bit program, chop the address accordingly. The
1398 * intermediate uintptr_t casts prevent warnings under a certain
1399 * compiler, and the temporary 32 bit storage is intended to force
1400 * proper code generation and break up what would otherwise be a
1401 * quadruple cast.
1402 */
1403 if (curproc->p_model == DATAMODEL_ILP32 && USERMODE(rp->r_tstate)) {
1404 caddr32_t addr32 = (caddr32_t)(uintptr_t)addr;
1405 addr = (caddr_t)(uintptr_t)addr32;
1406 }
1407
1408 *badaddr = addr;
1409 return ((uintptr_t)addr & (sz - 1) ? SIMU_UNALIGN : SIMU_SUCCESS);
1410 }
1411
1412 /*
1413 * Return the size of a load or store instruction (1, 2, 4, 8, 16, 64).
1414 * Also compute the precise address by instruction disassembly.
1415 * (v9 page faults only provide the page address via the hardware.)
1416 * Return 0 on failure (not a load or store instruction).
1417 */
1418 int
instr_size(struct regs * rp,caddr_t * addrp,enum seg_rw rdwr)1419 instr_size(struct regs *rp, caddr_t *addrp, enum seg_rw rdwr)
1420 {
1421 uint_t inst, op3, asi;
1422 uint_t rd, rs1, rs2;
1423 int sz = 0;
1424 int immflg;
1425 int floatflg;
1426 caddr_t addr;
1427 caddr_t badaddr;
1428 uint64_t val;
1429
1430 if (rdwr == S_EXEC) {
1431 *addrp = (caddr_t)rp->r_pc;
1432 return (4);
1433 }
1434
1435 /*
1436 * Fetch the instruction from user-level.
1437 * We would like to assert this:
1438 * ASSERT(USERMODE(rp->r_tstate));
1439 * but we can't because we can reach this point from a
1440 * register window underflow/overflow and the v9 wbuf
1441 * traps call trap() with T_USER even though r_tstate
1442 * indicates a system trap, not a user trap.
1443 */
1444 inst = fetch_user_instr((caddr_t)rp->r_pc);
1445
1446 op3 = (inst >> 19) & 0x3f;
1447 rd = (inst >> 25) & 0x1f;
1448 rs1 = (inst >> 14) & 0x1f;
1449 rs2 = inst & 0x1f;
1450 floatflg = (inst >> 24) & 1;
1451 immflg = (inst >> 13) & 1;
1452
1453 /* if not load or store do nothing. can't happen? */
1454 if ((inst >> 30) != 3)
1455 return (0);
1456
1457 if (immflg)
1458 asi = (uint_t)((rp->r_tstate >> TSTATE_ASI_SHIFT) &
1459 TSTATE_ASI_MASK);
1460 else
1461 asi = (inst >> 5) & 0xff;
1462
1463 if (floatflg) {
1464 /* check for ld/st alternate and highest defined V9 asi */
1465 if ((op3 & 0x30) == 0x30 && asi > ASI_SNFL) {
1466 sz = extended_asi_size(asi);
1467 } else {
1468 switch (op3 & 3) {
1469 case 0:
1470 sz = 4; /* ldf/stf/cas */
1471 break;
1472 case 1:
1473 if (rd == 0)
1474 sz = 4; /* ldfsr/stfsr */
1475 else
1476 sz = 8; /* ldxfsr/stxfsr */
1477 break;
1478 case 2:
1479 if (op3 == 0x3e)
1480 sz = 8; /* casx */
1481 else
1482 sz = 16; /* ldqf/stqf */
1483 break;
1484 case 3:
1485 sz = 8; /* lddf/stdf */
1486 break;
1487 }
1488 }
1489 } else {
1490 switch (op3 & 0xf) { /* map size bits to a number */
1491 case 0: /* lduw */
1492 case 4: /* stw */
1493 case 8: /* ldsw */
1494 case 0xf: /* swap */
1495 sz = 4; break;
1496 case 1: /* ldub */
1497 case 5: /* stb */
1498 case 9: /* ldsb */
1499 case 0xd: /* ldstub */
1500 sz = 1; break;
1501 case 2: /* lduh */
1502 case 6: /* sth */
1503 case 0xa: /* ldsh */
1504 sz = 2; break;
1505 case 3: /* ldd */
1506 case 7: /* std */
1507 case 0xb: /* ldx */
1508 case 0xe: /* stx */
1509 sz = 8; break;
1510 }
1511 }
1512
1513 if (sz == 0) /* can't happen? */
1514 return (0);
1515 (void) flush_user_windows_to_stack(NULL);
1516
1517 if (getreg(rp, rs1, &val, &badaddr))
1518 return (0);
1519 addr = (caddr_t)val;
1520
1521 /* cas/casx don't use rs2 / simm13 to compute the address */
1522 if ((op3 & 0x3d) != 0x3c) {
1523 /* check immediate bit and use immediate field or reg (rs2) */
1524 if (immflg) {
1525 int imm;
1526 imm = inst & 0x1fff; /* mask out immediate field */
1527 imm <<= 19; /* sign extend it */
1528 imm >>= 19;
1529 addr += imm; /* compute address */
1530 } else {
1531 /*
1532 * asi's in the 0xCx range are partial store
1533 * instructions. For these, rs2 is a mask, not part of
1534 * the address.
1535 */
1536 if (!(floatflg && (asi & 0xf0) == 0xc0)) {
1537 if (getreg(rp, rs2, &val, &badaddr))
1538 return (0);
1539 addr += val;
1540 }
1541 }
1542 }
1543
1544 /*
1545 * If this is a 32-bit program, chop the address accordingly. The
1546 * intermediate uintptr_t casts prevent warnings under a certain
1547 * compiler, and the temporary 32 bit storage is intended to force
1548 * proper code generation and break up what would otherwise be a
1549 * quadruple cast.
1550 */
1551 if (curproc->p_model == DATAMODEL_ILP32) {
1552 caddr32_t addr32 = (caddr32_t)(uintptr_t)addr;
1553 addr = (caddr_t)(uintptr_t)addr32;
1554 }
1555
1556 *addrp = addr;
1557 ASSERT(sz != 0);
1558 return (sz);
1559 }
1560
1561 /*
1562 * Fetch an instruction from user-level.
1563 * Deal with watchpoints, if they are in effect.
1564 */
1565 int32_t
fetch_user_instr(caddr_t vaddr)1566 fetch_user_instr(caddr_t vaddr)
1567 {
1568 proc_t *p = curproc;
1569 int32_t instr;
1570
1571 /*
1572 * If this is a 32-bit program, chop the address accordingly. The
1573 * intermediate uintptr_t casts prevent warnings under a certain
1574 * compiler, and the temporary 32 bit storage is intended to force
1575 * proper code generation and break up what would otherwise be a
1576 * quadruple cast.
1577 */
1578 if (p->p_model == DATAMODEL_ILP32) {
1579 caddr32_t vaddr32 = (caddr32_t)(uintptr_t)vaddr;
1580 vaddr = (caddr_t)(uintptr_t)vaddr32;
1581 }
1582
1583 if (fuword32_nowatch(vaddr, (uint32_t *)&instr) == -1)
1584 instr = -1;
1585
1586 return (instr);
1587 }
1588