1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* common code with bug fixes from original version in trap.c */ 27 28 #include <sys/param.h> 29 #include <sys/types.h> 30 #include <sys/systm.h> 31 #include <sys/archsystm.h> 32 #include <sys/vmsystm.h> 33 #include <sys/fpu/fpusystm.h> 34 #include <sys/fpu/fpu_simulator.h> 35 #include <sys/inline.h> 36 #include <sys/debug.h> 37 #include <sys/privregs.h> 38 #include <sys/machpcb.h> 39 #include <sys/simulate.h> 40 #include <sys/proc.h> 41 #include <sys/cmn_err.h> 42 #include <sys/stack.h> 43 #include <sys/watchpoint.h> 44 #include <sys/trap.h> 45 #include <sys/machtrap.h> 46 #include <sys/mman.h> 47 #include <sys/asi.h> 48 #include <sys/copyops.h> 49 #include <vm/as.h> 50 #include <vm/page.h> 51 #include <sys/model.h> 52 #include <vm/seg_vn.h> 53 #include <sys/byteorder.h> 54 #include <sys/time.h> 55 56 #define IS_IBIT_SET(x) (x & 0x2000) 57 #define IS_VIS1(op, op3)(op == 2 && op3 == 0x36) 58 #define IS_FLOAT_QUAD_OP(op, op3)(op == 2 && (op3 == 0x34 || \ 59 op3 == 0x35)) 60 #define IS_PARTIAL_OR_SHORT_FLOAT_LD_ST(op, op3, asi) \ 61 (op == 3 && (op3 == IOP_V8_LDDFA || \ 62 op3 == IOP_V8_STDFA) && asi > ASI_SNFL) 63 64 static int aligndebug = 0; 65 66 /* 67 * For the sake of those who must be compatible with unaligned 68 * architectures, users can link their programs to use a 69 * corrective trap handler that will fix unaligned references 70 * a special trap #6 (T_FIX_ALIGN) enables this 'feature'. 71 * Returns 1 for success, 0 for failure. 72 */ 73 74 int 75 do_unaligned(struct regs *rp, caddr_t *badaddr) 76 { 77 uint_t inst, op3, asi = 0; 78 uint_t rd, rs1, rs2; 79 int sz, nf = 0, ltlend = 0; 80 int floatflg; 81 int fsrflg; 82 int immflg; 83 int lddstdflg; 84 caddr_t addr; 85 uint64_t val; 86 union { 87 uint64_t l[2]; 88 uint32_t i[4]; 89 uint16_t s[8]; 90 uint8_t c[16]; 91 } data; 92 93 ASSERT(USERMODE(rp->r_tstate)); 94 inst = fetch_user_instr((caddr_t)rp->r_pc); 95 96 op3 = (inst >> 19) & 0x3f; 97 rd = (inst >> 25) & 0x1f; 98 rs1 = (inst >> 14) & 0x1f; 99 rs2 = inst & 0x1f; 100 floatflg = (inst >> 24) & 1; 101 immflg = (inst >> 13) & 1; 102 lddstdflg = fsrflg = 0; 103 104 /* if not load or store do nothing */ 105 if ((inst >> 30) != 3) 106 return (0); 107 108 /* if ldstub or swap, do nothing */ 109 if ((inst & 0xc1680000) == 0xc0680000) 110 return (0); 111 112 /* if cas/casx, do nothing */ 113 if ((inst & 0xc1e00000) == 0xc1e00000) 114 return (0); 115 116 if (floatflg) { 117 switch ((inst >> 19) & 3) { /* map size bits to a number */ 118 case 0: sz = 4; 119 break; /* ldf{a}/stf{a} */ 120 case 1: fsrflg = 1; 121 if (rd == 0) 122 sz = 4; /* ldfsr/stfsr */ 123 else if (rd == 1) 124 sz = 8; /* ldxfsr/stxfsr */ 125 else 126 return (SIMU_ILLEGAL); 127 break; 128 case 2: sz = 16; 129 break; /* ldqf{a}/stqf{a} */ 130 case 3: sz = 8; 131 break; /* lddf{a}/stdf{a} */ 132 } 133 /* 134 * Fix to access extra double register encoding plus 135 * compensate to access the correct fpu_dreg. 136 */ 137 if ((sz > 4) && (fsrflg == 0)) { 138 if ((rd & 1) == 1) 139 rd = (rd & 0x1e) | 0x20; 140 rd = rd >> 1; 141 if ((sz == 16) && ((rd & 0x1) != 0)) 142 return (SIMU_ILLEGAL); 143 } 144 } else { 145 int sz_bits = (inst >> 19) & 0xf; 146 switch (sz_bits) { /* map size bits to a number */ 147 case 0: /* lduw{a} */ 148 case 4: /* stw{a} */ 149 case 8: /* ldsw{a} */ 150 case 0xf: /* swap */ 151 sz = 4; break; 152 case 1: /* ldub{a} */ 153 case 5: /* stb{a} */ 154 case 9: /* ldsb{a} */ 155 case 0xd: /* ldstub */ 156 sz = 1; break; 157 case 2: /* lduh{a} */ 158 case 6: /* sth{a} */ 159 case 0xa: /* ldsh{a} */ 160 sz = 2; break; 161 case 3: /* ldd{a} */ 162 case 7: /* std{a} */ 163 lddstdflg = 1; 164 sz = 8; break; 165 case 0xb: /* ldx{a} */ 166 case 0xe: /* stx{a} */ 167 sz = 8; break; 168 } 169 } 170 171 172 /* only support primary and secondary asi's */ 173 if ((op3 >> 4) & 1) { 174 if (immflg) { 175 asi = (uint_t)(rp->r_tstate >> TSTATE_ASI_SHIFT) & 176 TSTATE_ASI_MASK; 177 } else { 178 asi = (inst >> 5) & 0xff; 179 } 180 switch (asi) { 181 case ASI_P: 182 case ASI_S: 183 break; 184 case ASI_PNF: 185 case ASI_SNF: 186 nf = 1; 187 break; 188 case ASI_PL: 189 case ASI_SL: 190 ltlend = 1; 191 break; 192 case ASI_PNFL: 193 case ASI_SNFL: 194 ltlend = 1; 195 nf = 1; 196 break; 197 default: 198 return (0); 199 } 200 /* 201 * Non-faulting stores generate a data_access_exception trap, 202 * according to the Spitfire manual, which should be signaled 203 * as an illegal instruction trap, because it can't be fixed. 204 */ 205 if ((nf) && ((op3 == IOP_V8_STQFA) || (op3 == IOP_V8_STDFA))) 206 return (SIMU_ILLEGAL); 207 } 208 209 if (aligndebug) { 210 printf("unaligned access at %p, instruction: 0x%x\n", 211 (void *)rp->r_pc, inst); 212 printf("type %s", (((inst >> 21) & 1) ? "st" : "ld")); 213 if (((inst >> 21) & 1) == 0) 214 printf(" %s", (((inst >> 22) & 1) ? 215 "signed" : "unsigned")); 216 printf(" asi 0x%x size %d immflg %d\n", asi, sz, immflg); 217 printf("rd = %d, op3 = 0x%x, rs1 = %d, rs2 = %d, imm13=0x%x\n", 218 rd, op3, rs1, rs2, (inst & 0x1fff)); 219 } 220 221 (void) flush_user_windows_to_stack(NULL); 222 if (getreg(rp, rs1, &val, badaddr)) 223 return (SIMU_FAULT); 224 addr = (caddr_t)val; /* convert to 32/64 bit address */ 225 if (aligndebug) 226 printf("addr 1 = %p\n", (void *)addr); 227 228 /* check immediate bit and use immediate field or reg (rs2) */ 229 if (immflg) { 230 int imm; 231 imm = inst & 0x1fff; /* mask out immediate field */ 232 imm <<= 19; /* sign extend it */ 233 imm >>= 19; 234 addr += imm; /* compute address */ 235 } else { 236 if (getreg(rp, rs2, &val, badaddr)) 237 return (SIMU_FAULT); 238 addr += val; 239 } 240 241 /* 242 * If this is a 32-bit program, chop the address accordingly. The 243 * intermediate uintptr_t casts prevent warnings under a certain 244 * compiler, and the temporary 32 bit storage is intended to force 245 * proper code generation and break up what would otherwise be a 246 * quadruple cast. 247 */ 248 if (curproc->p_model == DATAMODEL_ILP32) { 249 caddr32_t addr32 = (caddr32_t)(uintptr_t)addr; 250 addr = (caddr_t)(uintptr_t)addr32; 251 } 252 253 if (aligndebug) 254 printf("addr 2 = %p\n", (void *)addr); 255 256 if (addr >= curproc->p_as->a_userlimit) { 257 *badaddr = addr; 258 goto badret; 259 } 260 261 /* a single bit differentiates ld and st */ 262 if ((inst >> 21) & 1) { /* store */ 263 if (floatflg) { 264 klwp_id_t lwp = ttolwp(curthread); 265 kfpu_t *fp = lwptofpu(lwp); 266 /* Ensure fp has been enabled */ 267 if (fpu_exists) { 268 if (!(_fp_read_fprs() & FPRS_FEF)) 269 fp_enable(); 270 } else { 271 if (!fp->fpu_en) 272 fp_enable(); 273 } 274 /* if fpu_exists read fpu reg */ 275 if (fpu_exists) { 276 if (fsrflg) { 277 _fp_read_pfsr(&data.l[0]); 278 } else { 279 if (sz == 4) { 280 data.i[0] = 0; 281 _fp_read_pfreg( 282 (unsigned *)&data.i[1], rd); 283 } 284 if (sz >= 8) 285 _fp_read_pdreg( 286 &data.l[0], rd); 287 if (sz == 16) 288 _fp_read_pdreg( 289 &data.l[1], rd+1); 290 } 291 } else { 292 if (fsrflg) { 293 /* Clear reserved bits, set version=7 */ 294 fp->fpu_fsr &= ~0x30301000; 295 fp->fpu_fsr |= 0xE0000; 296 data.l[0] = fp->fpu_fsr; 297 } else { 298 if (sz == 4) { 299 data.i[0] = 0; 300 data.i[1] = 301 (unsigned)fp-> 302 fpu_fr.fpu_regs[rd]; 303 } 304 if (sz >= 8) 305 data.l[0] = 306 fp->fpu_fr.fpu_dregs[rd]; 307 if (sz == 16) 308 data.l[1] = 309 fp->fpu_fr.fpu_dregs[rd+1]; 310 } 311 } 312 } else { 313 if (lddstdflg) { /* combine the data */ 314 if (getreg(rp, rd, &data.l[0], badaddr)) 315 return (SIMU_FAULT); 316 if (getreg(rp, rd+1, &data.l[1], badaddr)) 317 return (SIMU_FAULT); 318 if (ltlend) { 319 /* 320 * For STD, each 32-bit word is byte- 321 * swapped individually. For 322 * simplicity we don't want to do that 323 * below, so we swap the words now to 324 * get the desired result in the end. 325 */ 326 data.i[0] = data.i[3]; 327 } else { 328 data.i[0] = data.i[1]; 329 data.i[1] = data.i[3]; 330 } 331 } else { 332 if (getreg(rp, rd, &data.l[0], badaddr)) 333 return (SIMU_FAULT); 334 } 335 } 336 337 if (aligndebug) { 338 if (sz == 16) { 339 printf("data %x %x %x %x\n", 340 data.i[0], data.i[1], data.i[2], data.c[3]); 341 } else { 342 printf("data %x %x %x %x %x %x %x %x\n", 343 data.c[0], data.c[1], data.c[2], data.c[3], 344 data.c[4], data.c[5], data.c[6], data.c[7]); 345 } 346 } 347 348 if (ltlend) { 349 if (sz == 1) { 350 if (xcopyout_little(&data.c[7], addr, 351 (size_t)sz) != 0) 352 goto badret; 353 } else if (sz == 2) { 354 if (xcopyout_little(&data.s[3], addr, 355 (size_t)sz) != 0) 356 goto badret; 357 } else if (sz == 4) { 358 if (xcopyout_little(&data.i[1], addr, 359 (size_t)sz) != 0) 360 goto badret; 361 } else { 362 if (xcopyout_little(&data.l[0], addr, 363 (size_t)sz) != 0) 364 goto badret; 365 } 366 } else { 367 if (sz == 1) { 368 if (copyout(&data.c[7], addr, (size_t)sz) == -1) 369 goto badret; 370 } else if (sz == 2) { 371 if (copyout(&data.s[3], addr, (size_t)sz) == -1) 372 goto badret; 373 } else if (sz == 4) { 374 if (copyout(&data.i[1], addr, (size_t)sz) == -1) 375 goto badret; 376 } else { 377 if (copyout(&data.l[0], addr, (size_t)sz) == -1) 378 goto badret; 379 } 380 } 381 } else { /* load */ 382 if (sz == 1) { 383 if (ltlend) { 384 if (xcopyin_little(addr, &data.c[7], 385 (size_t)sz) != 0) { 386 if (nf) 387 data.c[7] = 0; 388 else 389 goto badret; 390 } 391 } else { 392 if (copyin(addr, &data.c[7], 393 (size_t)sz) == -1) { 394 if (nf) 395 data.c[7] = 0; 396 else 397 goto badret; 398 } 399 } 400 /* if signed and the sign bit is set extend it */ 401 if (((inst >> 22) & 1) && ((data.c[7] >> 7) & 1)) { 402 data.i[0] = (uint_t)-1; /* extend sign bit */ 403 data.s[2] = (ushort_t)-1; 404 data.c[6] = (uchar_t)-1; 405 } else { 406 data.i[0] = 0; /* clear upper 32+24 bits */ 407 data.s[2] = 0; 408 data.c[6] = 0; 409 } 410 } else if (sz == 2) { 411 if (ltlend) { 412 if (xcopyin_little(addr, &data.s[3], 413 (size_t)sz) != 0) { 414 if (nf) 415 data.s[3] = 0; 416 else 417 goto badret; 418 } 419 } else { 420 if (copyin(addr, &data.s[3], 421 (size_t)sz) == -1) { 422 if (nf) 423 data.s[3] = 0; 424 else 425 goto badret; 426 } 427 } 428 /* if signed and the sign bit is set extend it */ 429 if (((inst >> 22) & 1) && ((data.s[3] >> 15) & 1)) { 430 data.i[0] = (uint_t)-1; /* extend sign bit */ 431 data.s[2] = (ushort_t)-1; 432 } else { 433 data.i[0] = 0; /* clear upper 32+16 bits */ 434 data.s[2] = 0; 435 } 436 } else if (sz == 4) { 437 if (ltlend) { 438 if (xcopyin_little(addr, &data.i[1], 439 (size_t)sz) != 0) { 440 if (!nf) 441 goto badret; 442 data.i[1] = 0; 443 } 444 } else { 445 if (copyin(addr, &data.i[1], 446 (size_t)sz) == -1) { 447 if (!nf) 448 goto badret; 449 data.i[1] = 0; 450 } 451 } 452 /* if signed and the sign bit is set extend it */ 453 if (((inst >> 22) & 1) && ((data.i[1] >> 31) & 1)) { 454 data.i[0] = (uint_t)-1; /* extend sign bit */ 455 } else { 456 data.i[0] = 0; /* clear upper 32 bits */ 457 } 458 } else { 459 if (ltlend) { 460 if (xcopyin_little(addr, &data.l[0], 461 (size_t)sz) != 0) { 462 if (!nf) 463 goto badret; 464 data.l[0] = 0; 465 } 466 } else { 467 if (copyin(addr, &data.l[0], 468 (size_t)sz) == -1) { 469 if (!nf) 470 goto badret; 471 data.l[0] = 0; 472 } 473 } 474 } 475 476 if (aligndebug) { 477 if (sz == 16) { 478 printf("data %x %x %x %x\n", 479 data.i[0], data.i[1], data.i[2], data.c[3]); 480 } else { 481 printf("data %x %x %x %x %x %x %x %x\n", 482 data.c[0], data.c[1], data.c[2], data.c[3], 483 data.c[4], data.c[5], data.c[6], data.c[7]); 484 } 485 } 486 487 if (floatflg) { /* if fpu_exists write fpu reg */ 488 klwp_id_t lwp = ttolwp(curthread); 489 kfpu_t *fp = lwptofpu(lwp); 490 /* Ensure fp has been enabled */ 491 if (fpu_exists) { 492 if (!(_fp_read_fprs() & FPRS_FEF)) 493 fp_enable(); 494 } else { 495 if (!fp->fpu_en) 496 fp_enable(); 497 } 498 /* if fpu_exists read fpu reg */ 499 if (fpu_exists) { 500 if (fsrflg) { 501 _fp_write_pfsr(&data.l[0]); 502 } else { 503 if (sz == 4) 504 _fp_write_pfreg( 505 (unsigned *)&data.i[1], rd); 506 if (sz >= 8) 507 _fp_write_pdreg( 508 &data.l[0], rd); 509 if (sz == 16) 510 _fp_write_pdreg( 511 &data.l[1], rd+1); 512 } 513 } else { 514 if (fsrflg) { 515 fp->fpu_fsr = data.l[0]; 516 } else { 517 if (sz == 4) 518 fp->fpu_fr.fpu_regs[rd] = 519 (unsigned)data.i[1]; 520 if (sz >= 8) 521 fp->fpu_fr.fpu_dregs[rd] = 522 data.l[0]; 523 if (sz == 16) 524 fp->fpu_fr.fpu_dregs[rd+1] = 525 data.l[1]; 526 } 527 } 528 } else { 529 if (lddstdflg) { /* split the data */ 530 if (ltlend) { 531 /* 532 * For LDD, each 32-bit word is byte- 533 * swapped individually. We didn't 534 * do that above, but this will give 535 * us the desired result. 536 */ 537 data.i[3] = data.i[0]; 538 } else { 539 data.i[3] = data.i[1]; 540 data.i[1] = data.i[0]; 541 } 542 data.i[0] = 0; 543 data.i[2] = 0; 544 if (putreg(&data.l[0], rp, rd, badaddr) == -1) 545 goto badret; 546 if (putreg(&data.l[1], rp, rd+1, badaddr) == -1) 547 goto badret; 548 } else { 549 if (putreg(&data.l[0], rp, rd, badaddr) == -1) 550 goto badret; 551 } 552 } 553 } 554 return (SIMU_SUCCESS); 555 badret: 556 return (SIMU_FAULT); 557 } 558 559 560 int 561 simulate_lddstd(struct regs *rp, caddr_t *badaddr) 562 { 563 uint_t inst, op3, asi = 0; 564 uint_t rd, rs1, rs2; 565 int nf = 0, ltlend = 0, usermode; 566 int immflg; 567 uint64_t reven; 568 uint64_t rodd; 569 caddr_t addr; 570 uint64_t val; 571 uint64_t data; 572 573 usermode = USERMODE(rp->r_tstate); 574 575 if (usermode) 576 inst = fetch_user_instr((caddr_t)rp->r_pc); 577 else 578 inst = *(uint_t *)rp->r_pc; 579 580 op3 = (inst >> 19) & 0x3f; 581 rd = (inst >> 25) & 0x1f; 582 rs1 = (inst >> 14) & 0x1f; 583 rs2 = inst & 0x1f; 584 immflg = (inst >> 13) & 1; 585 586 if (USERMODE(rp->r_tstate)) 587 (void) flush_user_windows_to_stack(NULL); 588 else 589 flush_windows(); 590 591 if ((op3 >> 4) & 1) { /* is this LDDA/STDA? */ 592 if (immflg) { 593 asi = (uint_t)(rp->r_tstate >> TSTATE_ASI_SHIFT) & 594 TSTATE_ASI_MASK; 595 } else { 596 asi = (inst >> 5) & 0xff; 597 } 598 switch (asi) { 599 case ASI_P: 600 case ASI_S: 601 break; 602 case ASI_PNF: 603 case ASI_SNF: 604 nf = 1; 605 break; 606 case ASI_PL: 607 case ASI_SL: 608 ltlend = 1; 609 break; 610 case ASI_PNFL: 611 case ASI_SNFL: 612 ltlend = 1; 613 nf = 1; 614 break; 615 case ASI_AIUP: 616 case ASI_AIUS: 617 usermode = 1; 618 break; 619 case ASI_AIUPL: 620 case ASI_AIUSL: 621 usermode = 1; 622 ltlend = 1; 623 break; 624 default: 625 return (SIMU_ILLEGAL); 626 } 627 } 628 629 if (getreg(rp, rs1, &val, badaddr)) 630 return (SIMU_FAULT); 631 addr = (caddr_t)val; /* convert to 32/64 bit address */ 632 633 /* check immediate bit and use immediate field or reg (rs2) */ 634 if (immflg) { 635 int imm; 636 imm = inst & 0x1fff; /* mask out immediate field */ 637 imm <<= 19; /* sign extend it */ 638 imm >>= 19; 639 addr += imm; /* compute address */ 640 } else { 641 if (getreg(rp, rs2, &val, badaddr)) 642 return (SIMU_FAULT); 643 addr += val; 644 } 645 646 /* 647 * T_UNIMP_LDD and T_UNIMP_STD are higher priority than 648 * T_ALIGNMENT. So we have to make sure that the address is 649 * kosher before trying to use it, because the hardware hasn't 650 * checked it for us yet. 651 */ 652 if (((uintptr_t)addr & 0x7) != 0) { 653 if (curproc->p_fixalignment) 654 return (do_unaligned(rp, badaddr)); 655 else 656 return (SIMU_UNALIGN); 657 } 658 659 /* 660 * If this is a 32-bit program, chop the address accordingly. The 661 * intermediate uintptr_t casts prevent warnings under a certain 662 * compiler, and the temporary 32 bit storage is intended to force 663 * proper code generation and break up what would otherwise be a 664 * quadruple cast. 665 */ 666 if (curproc->p_model == DATAMODEL_ILP32 && usermode) { 667 caddr32_t addr32 = (caddr32_t)(uintptr_t)addr; 668 addr = (caddr_t)(uintptr_t)addr32; 669 } 670 671 if ((inst >> 21) & 1) { /* store */ 672 if (getreg(rp, rd, &reven, badaddr)) 673 return (SIMU_FAULT); 674 if (getreg(rp, rd+1, &rodd, badaddr)) 675 return (SIMU_FAULT); 676 if (ltlend) { 677 reven = BSWAP_32(reven); 678 rodd = BSWAP_32(rodd); 679 } 680 data = (reven << 32) | rodd; 681 if (usermode) { 682 if (suword64_nowatch(addr, data) == -1) 683 return (SIMU_FAULT); 684 } else { 685 *(uint64_t *)addr = data; 686 } 687 } else { /* load */ 688 if (usermode) { 689 if (fuword64_nowatch(addr, &data)) { 690 if (nf) 691 data = 0; 692 else 693 return (SIMU_FAULT); 694 } 695 } else 696 data = *(uint64_t *)addr; 697 698 reven = (data >> 32); 699 rodd = (uint64_t)(uint32_t)data; 700 if (ltlend) { 701 reven = BSWAP_32(reven); 702 rodd = BSWAP_32(rodd); 703 } 704 705 if (putreg(&reven, rp, rd, badaddr) == -1) 706 return (SIMU_FAULT); 707 if (putreg(&rodd, rp, rd+1, badaddr) == -1) 708 return (SIMU_FAULT); 709 } 710 return (SIMU_SUCCESS); 711 } 712 713 714 /* 715 * simulate popc 716 */ 717 static int 718 simulate_popc(struct regs *rp, caddr_t *badaddr, uint_t inst) 719 { 720 uint_t rd, rs2, rs1; 721 uint_t immflg; 722 uint64_t val, cnt = 0; 723 724 rd = (inst >> 25) & 0x1f; 725 rs1 = (inst >> 14) & 0x1f; 726 rs2 = inst & 0x1f; 727 immflg = (inst >> 13) & 1; 728 729 if (rs1 > 0) 730 return (SIMU_ILLEGAL); 731 732 (void) flush_user_windows_to_stack(NULL); 733 734 /* check immediate bit and use immediate field or reg (rs2) */ 735 if (immflg) { 736 int64_t imm; 737 imm = inst & 0x1fff; /* mask out immediate field */ 738 imm <<= 51; /* sign extend it */ 739 imm >>= 51; 740 if (imm != 0) { 741 for (cnt = 0; imm != 0; imm &= imm-1) 742 cnt++; 743 } 744 } else { 745 if (getreg(rp, rs2, &val, badaddr)) 746 return (SIMU_FAULT); 747 if (val != 0) { 748 for (cnt = 0; val != 0; val &= val-1) 749 cnt++; 750 } 751 } 752 753 if (putreg(&cnt, rp, rd, badaddr) == -1) 754 return (SIMU_FAULT); 755 756 return (SIMU_SUCCESS); 757 } 758 759 /* 760 * simulate mulscc 761 */ 762 static int 763 simulate_mulscc(struct regs *rp, caddr_t *badaddr, uint_t inst) 764 { 765 uint32_t s1, s2; 766 uint32_t c, d, v; 767 uint_t rd, rs1; 768 int64_t d64; 769 uint64_t ud64; 770 uint64_t drs1; 771 772 (void) flush_user_windows_to_stack(NULL); 773 774 if ((inst >> 13) & 1) { /* immediate */ 775 d64 = inst & 0x1fff; 776 d64 <<= 51; /* sign extend it */ 777 d64 >>= 51; 778 } else { 779 uint_t rs2; 780 uint64_t drs2; 781 782 if (inst & 0x1fe0) { 783 return (SIMU_ILLEGAL); 784 } 785 rs2 = inst & 0x1f; 786 if (getreg(rp, rs2, &drs2, badaddr)) { 787 return (SIMU_FAULT); 788 } 789 d64 = (int64_t)drs2; 790 } 791 792 rs1 = (inst >> 14) & 0x1f; 793 if (getreg(rp, rs1, &drs1, badaddr)) { 794 return (SIMU_FAULT); 795 } 796 /* icc.n xor icc.v */ 797 s1 = ((rp->r_tstate & TSTATE_IN) >> (TSTATE_CCR_SHIFT + 3)) ^ 798 ((rp->r_tstate & TSTATE_IV) >> (TSTATE_CCR_SHIFT + 1)); 799 s1 = (s1 << 31) | (((uint32_t)drs1) >> 1); 800 801 if (rp->r_y & 1) { 802 s2 = (uint32_t)d64; 803 } else { 804 s2 = 0; 805 } 806 d = s1 + s2; 807 808 ud64 = (uint64_t)d; 809 810 /* set the icc flags */ 811 v = (s1 & s2 & ~d) | (~s1 & ~s2 & d); 812 c = (s1 & s2) | (~d & (s1 | s2)); 813 rp->r_tstate &= ~TSTATE_ICC; 814 rp->r_tstate |= (uint64_t)((c >> 31) & 1) << (TSTATE_CCR_SHIFT + 0); 815 rp->r_tstate |= (uint64_t)((v >> 31) & 1) << (TSTATE_CCR_SHIFT + 1); 816 rp->r_tstate |= (uint64_t)(d ? 0 : 1) << (TSTATE_CCR_SHIFT + 2); 817 rp->r_tstate |= (uint64_t)((d >> 31) & 1) << (TSTATE_CCR_SHIFT + 3); 818 819 if (rp->r_tstate & TSTATE_IC) { 820 ud64 |= (1ULL << 32); 821 } 822 823 /* set the xcc flags */ 824 rp->r_tstate &= ~TSTATE_XCC; 825 if (ud64 == 0) { 826 rp->r_tstate |= TSTATE_XZ; 827 } 828 829 rd = (inst >> 25) & 0x1f; 830 if (putreg(&ud64, rp, rd, badaddr)) { 831 return (SIMU_FAULT); 832 } 833 834 d64 = (drs1 << 32) | (uint32_t)rp->r_y; 835 d64 >>= 1; 836 rp->r_y = (uint32_t)d64; 837 838 return (SIMU_SUCCESS); 839 } 840 841 /* 842 * simulate unimplemented instructions (popc, ldqf{a}, stqf{a}) 843 */ 844 int 845 simulate_unimp(struct regs *rp, caddr_t *badaddr) 846 { 847 uint_t inst, optype, op3, asi; 848 uint_t rs1, rd; 849 uint_t ignor, i; 850 machpcb_t *mpcb = lwptompcb(ttolwp(curthread)); 851 int nomatch = 0; 852 caddr_t addr = (caddr_t)rp->r_pc; 853 struct as *as; 854 caddr_t ka; 855 pfn_t pfnum; 856 page_t *pp; 857 proc_t *p = ttoproc(curthread); 858 struct seg *mapseg; 859 struct segvn_data *svd; 860 861 ASSERT(USERMODE(rp->r_tstate)); 862 inst = fetch_user_instr(addr); 863 if (inst == (uint_t)-1) { 864 mpcb->mpcb_illexcaddr = addr; 865 mpcb->mpcb_illexcinsn = (uint32_t)-1; 866 return (SIMU_ILLEGAL); 867 } 868 869 /* 870 * When fixing dirty v8 instructions there's a race if two processors 871 * are executing the dirty executable at the same time. If one 872 * cleans the instruction as the other is executing it the second 873 * processor will see a clean instruction when it comes through this 874 * code and will return SIMU_ILLEGAL. To work around the race 875 * this code will keep track of the last illegal instruction seen 876 * by each lwp and will only take action if the illegal instruction 877 * is repeatable. 878 */ 879 if (addr != mpcb->mpcb_illexcaddr || 880 inst != mpcb->mpcb_illexcinsn) 881 nomatch = 1; 882 mpcb->mpcb_illexcaddr = addr; 883 mpcb->mpcb_illexcinsn = inst; 884 885 /* instruction fields */ 886 i = (inst >> 13) & 0x1; 887 rd = (inst >> 25) & 0x1f; 888 optype = (inst >> 30) & 0x3; 889 op3 = (inst >> 19) & 0x3f; 890 ignor = (inst >> 5) & 0xff; 891 if (IS_IBIT_SET(inst)) { 892 asi = (uint32_t)((rp->r_tstate >> TSTATE_ASI_SHIFT) & 893 TSTATE_ASI_MASK); 894 } else { 895 asi = ignor; 896 } 897 898 if (IS_VIS1(optype, op3) || 899 IS_PARTIAL_OR_SHORT_FLOAT_LD_ST(optype, op3, asi) || 900 IS_FLOAT_QUAD_OP(optype, op3)) { 901 klwp_t *lwp = ttolwp(curthread); 902 kfpu_t *fp = lwptofpu(lwp); 903 if (fpu_exists) { 904 if (!(_fp_read_fprs() & FPRS_FEF)) 905 fp_enable(); 906 _fp_read_pfsr(&fp->fpu_fsr); 907 } else { 908 if (!fp->fpu_en) 909 fp_enable(); 910 } 911 fp_precise(rp); 912 return (SIMU_RETRY); 913 } 914 915 if (optype == 2 && op3 == IOP_V8_POPC) { 916 return (simulate_popc(rp, badaddr, inst)); 917 } else if (optype == 3 && op3 == IOP_V8_POPC) { 918 return (SIMU_ILLEGAL); 919 } else if (optype == OP_V8_ARITH && op3 == IOP_V8_MULScc) { 920 return (simulate_mulscc(rp, badaddr, inst)); 921 } 922 923 if (optype == OP_V8_LDSTR) { 924 if (op3 == IOP_V8_LDQF || op3 == IOP_V8_LDQFA || 925 op3 == IOP_V8_STQF || op3 == IOP_V8_STQFA) 926 return (do_unaligned(rp, badaddr)); 927 } 928 929 /* This is a new instruction so illexccnt should also be set. */ 930 if (nomatch) { 931 mpcb->mpcb_illexccnt = 0; 932 return (SIMU_RETRY); 933 } 934 935 /* 936 * In order to keep us from entering into an infinite loop while 937 * attempting to clean up faulty instructions, we will return 938 * SIMU_ILLEGAL once we've cleaned up the instruction as much 939 * as we can, and still end up here. 940 */ 941 if (mpcb->mpcb_illexccnt >= 3) 942 return (SIMU_ILLEGAL); 943 944 mpcb->mpcb_illexccnt += 1; 945 946 /* 947 * The rest of the code handles v8 binaries with instructions 948 * that have dirty (non-zero) bits in reserved or 'ignored' 949 * fields; these will cause core dumps on v9 machines. 950 * 951 * We only clean dirty instructions in 32-bit programs (ie, v8) 952 * running on SPARCv9 processors. True v9 programs are forced 953 * to use the instruction set as intended. 954 */ 955 if (lwp_getdatamodel(curthread->t_lwp) != DATAMODEL_ILP32) 956 return (SIMU_ILLEGAL); 957 switch (optype) { 958 case OP_V8_BRANCH: 959 case OP_V8_CALL: 960 return (SIMU_ILLEGAL); /* these don't have ignored fields */ 961 /*NOTREACHED*/ 962 case OP_V8_ARITH: 963 switch (op3) { 964 case IOP_V8_RETT: 965 if (rd == 0 && !(i == 0 && ignor)) 966 return (SIMU_ILLEGAL); 967 if (rd) 968 inst &= ~(0x1f << 25); 969 if (i == 0 && ignor) 970 inst &= ~(0xff << 5); 971 break; 972 case IOP_V8_TCC: 973 if (i == 0 && ignor != 0) { 974 inst &= ~(0xff << 5); 975 } else if (i == 1 && (((inst >> 7) & 0x3f) != 0)) { 976 inst &= ~(0x3f << 7); 977 } else { 978 return (SIMU_ILLEGAL); 979 } 980 break; 981 case IOP_V8_JMPL: 982 case IOP_V8_RESTORE: 983 case IOP_V8_SAVE: 984 if ((op3 == IOP_V8_RETT && rd) || 985 (i == 0 && ignor)) { 986 inst &= ~(0xff << 5); 987 } else { 988 return (SIMU_ILLEGAL); 989 } 990 break; 991 case IOP_V8_FCMP: 992 if (rd == 0) 993 return (SIMU_ILLEGAL); 994 inst &= ~(0x1f << 25); 995 break; 996 case IOP_V8_RDASR: 997 rs1 = ((inst >> 14) & 0x1f); 998 if (rs1 == 1 || (rs1 >= 7 && rs1 <= 14)) { 999 /* 1000 * The instruction specifies an invalid 1001 * state register - better bail out than 1002 * "fix" it when we're not sure what was 1003 * intended. 1004 */ 1005 return (SIMU_ILLEGAL); 1006 } 1007 /* 1008 * Note: this case includes the 'stbar' 1009 * instruction (rs1 == 15 && i == 0). 1010 */ 1011 if ((ignor = (inst & 0x3fff)) != 0) 1012 inst &= ~(0x3fff); 1013 break; 1014 case IOP_V8_SRA: 1015 case IOP_V8_SRL: 1016 case IOP_V8_SLL: 1017 if (ignor == 0) 1018 return (SIMU_ILLEGAL); 1019 inst &= ~(0xff << 5); 1020 break; 1021 case IOP_V8_ADD: 1022 case IOP_V8_AND: 1023 case IOP_V8_OR: 1024 case IOP_V8_XOR: 1025 case IOP_V8_SUB: 1026 case IOP_V8_ANDN: 1027 case IOP_V8_ORN: 1028 case IOP_V8_XNOR: 1029 case IOP_V8_ADDC: 1030 case IOP_V8_UMUL: 1031 case IOP_V8_SMUL: 1032 case IOP_V8_SUBC: 1033 case IOP_V8_UDIV: 1034 case IOP_V8_SDIV: 1035 case IOP_V8_ADDcc: 1036 case IOP_V8_ANDcc: 1037 case IOP_V8_ORcc: 1038 case IOP_V8_XORcc: 1039 case IOP_V8_SUBcc: 1040 case IOP_V8_ANDNcc: 1041 case IOP_V8_ORNcc: 1042 case IOP_V8_XNORcc: 1043 case IOP_V8_ADDCcc: 1044 case IOP_V8_UMULcc: 1045 case IOP_V8_SMULcc: 1046 case IOP_V8_SUBCcc: 1047 case IOP_V8_UDIVcc: 1048 case IOP_V8_SDIVcc: 1049 case IOP_V8_TADDcc: 1050 case IOP_V8_TSUBcc: 1051 case IOP_V8_TADDccTV: 1052 case IOP_V8_TSUBccTV: 1053 case IOP_V8_MULScc: 1054 case IOP_V8_WRASR: 1055 case IOP_V8_FLUSH: 1056 if (i != 0 || ignor == 0) 1057 return (SIMU_ILLEGAL); 1058 inst &= ~(0xff << 5); 1059 break; 1060 default: 1061 return (SIMU_ILLEGAL); 1062 } 1063 break; 1064 case OP_V8_LDSTR: 1065 switch (op3) { 1066 case IOP_V8_STFSR: 1067 case IOP_V8_LDFSR: 1068 if (rd == 0 && !(i == 0 && ignor)) 1069 return (SIMU_ILLEGAL); 1070 if (rd) 1071 inst &= ~(0x1f << 25); 1072 if (i == 0 && ignor) 1073 inst &= ~(0xff << 5); 1074 break; 1075 default: 1076 if (optype == OP_V8_LDSTR && !IS_LDST_ALT(op3) && 1077 i == 0 && ignor) 1078 inst &= ~(0xff << 5); 1079 else 1080 return (SIMU_ILLEGAL); 1081 break; 1082 } 1083 break; 1084 default: 1085 return (SIMU_ILLEGAL); 1086 } 1087 1088 as = p->p_as; 1089 1090 AS_LOCK_ENTER(as, RW_READER); 1091 mapseg = as_findseg(as, (caddr_t)rp->r_pc, 0); 1092 ASSERT(mapseg != NULL); 1093 svd = (struct segvn_data *)mapseg->s_data; 1094 1095 /* 1096 * We only create COW page for MAP_PRIVATE mappings. 1097 */ 1098 SEGVN_LOCK_ENTER(as, &svd->lock, RW_READER); 1099 if ((svd->type & MAP_TYPE) & MAP_SHARED) { 1100 SEGVN_LOCK_EXIT(as, &svd->lock); 1101 AS_LOCK_EXIT(as); 1102 return (SIMU_ILLEGAL); 1103 } 1104 SEGVN_LOCK_EXIT(as, &svd->lock); 1105 AS_LOCK_EXIT(as); 1106 1107 /* 1108 * A "flush" instruction using the user PC's vaddr will not work 1109 * here, at least on Spitfire. Instead we create a temporary kernel 1110 * mapping to the user's text page, then modify and flush that. 1111 * Break COW by locking user page. 1112 */ 1113 if (as_fault(as->a_hat, as, (caddr_t)(rp->r_pc & PAGEMASK), PAGESIZE, 1114 F_SOFTLOCK, S_READ)) 1115 return (SIMU_FAULT); 1116 1117 AS_LOCK_ENTER(as, RW_READER); 1118 pfnum = hat_getpfnum(as->a_hat, (caddr_t)rp->r_pc); 1119 AS_LOCK_EXIT(as); 1120 if (pf_is_memory(pfnum)) { 1121 pp = page_numtopp_nolock(pfnum); 1122 ASSERT(pp == NULL || PAGE_LOCKED(pp)); 1123 } else { 1124 (void) as_fault(as->a_hat, as, (caddr_t)(rp->r_pc & PAGEMASK), 1125 PAGESIZE, F_SOFTUNLOCK, S_READ); 1126 return (SIMU_FAULT); 1127 } 1128 1129 AS_LOCK_ENTER(as, RW_READER); 1130 ka = ppmapin(pp, PROT_READ|PROT_WRITE, (caddr_t)rp->r_pc); 1131 *(uint_t *)(ka + (uintptr_t)(rp->r_pc % PAGESIZE)) = inst; 1132 doflush(ka + (uintptr_t)(rp->r_pc % PAGESIZE)); 1133 ppmapout(ka); 1134 AS_LOCK_EXIT(as); 1135 1136 (void) as_fault(as->a_hat, as, (caddr_t)(rp->r_pc & PAGEMASK), 1137 PAGESIZE, F_SOFTUNLOCK, S_READ); 1138 return (SIMU_RETRY); 1139 } 1140 1141 /* 1142 * Simulate a "rd %tick" or "rd %stick" (%asr24) instruction. 1143 */ 1144 int 1145 simulate_rdtick(struct regs *rp) 1146 { 1147 uint_t inst, op, op3, rd, rs1, i; 1148 caddr_t badaddr; 1149 1150 inst = fetch_user_instr((caddr_t)rp->r_pc); 1151 op = (inst >> 30) & 0x3; 1152 rd = (inst >> 25) & 0x1F; 1153 op3 = (inst >> 19) & 0x3F; 1154 i = (inst >> 13) & 0x1; 1155 1156 /* 1157 * Make sure this is either a %tick read (rs1 == 0x4) or 1158 * a %stick read (rs1 == 0x18) instruction. 1159 */ 1160 if (op == 2 && op3 == 0x28 && i == 0) { 1161 rs1 = (inst >> 14) & 0x1F; 1162 1163 if (rs1 == 0x4) { 1164 uint64_t tick; 1165 (void) flush_user_windows_to_stack(NULL); 1166 tick = gettick_counter(); 1167 if (putreg(&tick, rp, rd, &badaddr) == 0) 1168 return (SIMU_SUCCESS); 1169 } else if (rs1 == 0x18) { 1170 uint64_t stick; 1171 (void) flush_user_windows_to_stack(NULL); 1172 stick = gethrtime_unscaled(); 1173 if (putreg(&stick, rp, rd, &badaddr) == 0) 1174 return (SIMU_SUCCESS); 1175 } 1176 } 1177 1178 return (SIMU_FAULT); 1179 } 1180 1181 /* 1182 * Get the value of a register for instruction simulation 1183 * by using the regs or window structure pointers. 1184 * Return 0 for success, and -1 for failure. If there is a failure, 1185 * save the faulting address using badaddr pointer. 1186 * We have 64 bit globals and outs, and 32 or 64 bit ins and locals. 1187 * Don't truncate globals/outs for 32 bit programs, for v8+ support. 1188 */ 1189 int 1190 getreg(struct regs *rp, uint_t reg, uint64_t *val, caddr_t *badaddr) 1191 { 1192 uint64_t *rgs, *sp; 1193 int rv = 0; 1194 1195 rgs = (uint64_t *)&rp->r_ps; /* globals and outs */ 1196 sp = (uint64_t *)rp->r_sp; /* ins and locals */ 1197 if (reg == 0) { 1198 *val = 0; 1199 } else if (reg < 16) { 1200 *val = rgs[reg]; 1201 } else if (IS_V9STACK(sp)) { 1202 uint64_t *rw = (uint64_t *)((uintptr_t)sp + V9BIAS64); 1203 uint64_t *addr = (uint64_t *)&rw[reg - 16]; 1204 uint64_t res; 1205 1206 if (USERMODE(rp->r_tstate)) { 1207 if (fuword64_nowatch(addr, &res) == -1) { 1208 *badaddr = (caddr_t)addr; 1209 rv = -1; 1210 } 1211 } else { 1212 res = *addr; 1213 } 1214 *val = res; 1215 } else { 1216 caddr32_t sp32 = (caddr32_t)(uintptr_t)sp; 1217 uint32_t *rw = (uint32_t *)(uintptr_t)sp32; 1218 uint32_t *addr = (uint32_t *)&rw[reg - 16]; 1219 uint32_t res; 1220 1221 if (USERMODE(rp->r_tstate)) { 1222 if (fuword32_nowatch(addr, &res) == -1) { 1223 *badaddr = (caddr_t)addr; 1224 rv = -1; 1225 } 1226 } else { 1227 res = *addr; 1228 } 1229 *val = (uint64_t)res; 1230 } 1231 return (rv); 1232 } 1233 1234 /* 1235 * Set the value of a register after instruction simulation 1236 * by using the regs or window structure pointers. 1237 * Return 0 for succes -1 failure. 1238 * save the faulting address using badaddr pointer. 1239 * We have 64 bit globals and outs, and 32 or 64 bit ins and locals. 1240 * Don't truncate globals/outs for 32 bit programs, for v8+ support. 1241 */ 1242 int 1243 putreg(uint64_t *data, struct regs *rp, uint_t reg, caddr_t *badaddr) 1244 { 1245 uint64_t *rgs, *sp; 1246 int rv = 0; 1247 1248 rgs = (uint64_t *)&rp->r_ps; /* globals and outs */ 1249 sp = (uint64_t *)rp->r_sp; /* ins and locals */ 1250 if (reg == 0) { 1251 return (0); 1252 } else if (reg < 16) { 1253 rgs[reg] = *data; 1254 } else if (IS_V9STACK(sp)) { 1255 uint64_t *rw = (uint64_t *)((uintptr_t)sp + V9BIAS64); 1256 uint64_t *addr = (uint64_t *)&rw[reg - 16]; 1257 uint64_t res; 1258 1259 if (USERMODE(rp->r_tstate)) { 1260 struct machpcb *mpcb = lwptompcb(curthread->t_lwp); 1261 1262 res = *data; 1263 if (suword64_nowatch(addr, res) != 0) { 1264 *badaddr = (caddr_t)addr; 1265 rv = -1; 1266 } 1267 /* 1268 * We have changed a local or in register; 1269 * nuke the watchpoint return windows. 1270 */ 1271 mpcb->mpcb_rsp[0] = NULL; 1272 mpcb->mpcb_rsp[1] = NULL; 1273 } else { 1274 res = *data; 1275 *addr = res; 1276 } 1277 } else { 1278 caddr32_t sp32 = (caddr32_t)(uintptr_t)sp; 1279 uint32_t *rw = (uint32_t *)(uintptr_t)sp32; 1280 uint32_t *addr = (uint32_t *)&rw[reg - 16]; 1281 uint32_t res; 1282 1283 if (USERMODE(rp->r_tstate)) { 1284 struct machpcb *mpcb = lwptompcb(curthread->t_lwp); 1285 1286 res = (uint_t)*data; 1287 if (suword32_nowatch(addr, res) != 0) { 1288 *badaddr = (caddr_t)addr; 1289 rv = -1; 1290 } 1291 /* 1292 * We have changed a local or in register; 1293 * nuke the watchpoint return windows. 1294 */ 1295 mpcb->mpcb_rsp[0] = NULL; 1296 mpcb->mpcb_rsp[1] = NULL; 1297 1298 } else { 1299 res = (uint_t)*data; 1300 *addr = res; 1301 } 1302 } 1303 return (rv); 1304 } 1305 1306 /* 1307 * Calculate a memory reference address from instruction 1308 * operands, used to return the address of a fault, instead 1309 * of the instruction when an error occurs. This is code that is 1310 * common with most of the routines that simulate instructions. 1311 */ 1312 int 1313 calc_memaddr(struct regs *rp, caddr_t *badaddr) 1314 { 1315 uint_t inst; 1316 uint_t rd, rs1, rs2; 1317 int sz; 1318 int immflg; 1319 int floatflg; 1320 caddr_t addr; 1321 uint64_t val; 1322 1323 if (USERMODE(rp->r_tstate)) 1324 inst = fetch_user_instr((caddr_t)rp->r_pc); 1325 else 1326 inst = *(uint_t *)rp->r_pc; 1327 1328 rd = (inst >> 25) & 0x1f; 1329 rs1 = (inst >> 14) & 0x1f; 1330 rs2 = inst & 0x1f; 1331 floatflg = (inst >> 24) & 1; 1332 immflg = (inst >> 13) & 1; 1333 1334 if (floatflg) { 1335 switch ((inst >> 19) & 3) { /* map size bits to a number */ 1336 case 0: sz = 4; break; /* ldf/stf */ 1337 case 1: return (0); /* ld[x]fsr/st[x]fsr */ 1338 case 2: sz = 16; break; /* ldqf/stqf */ 1339 case 3: sz = 8; break; /* lddf/stdf */ 1340 } 1341 /* 1342 * Fix to access extra double register encoding plus 1343 * compensate to access the correct fpu_dreg. 1344 */ 1345 if (sz > 4) { 1346 if ((rd & 1) == 1) 1347 rd = (rd & 0x1e) | 0x20; 1348 rd = rd >> 1; 1349 } 1350 } else { 1351 switch ((inst >> 19) & 0xf) { /* map size bits to a number */ 1352 case 0: /* lduw */ 1353 case 4: /* stw */ 1354 case 8: /* ldsw */ 1355 case 0xf: /* swap */ 1356 sz = 4; break; 1357 case 1: /* ldub */ 1358 case 5: /* stb */ 1359 case 9: /* ldsb */ 1360 case 0xd: /* ldstub */ 1361 sz = 1; break; 1362 case 2: /* lduh */ 1363 case 6: /* sth */ 1364 case 0xa: /* ldsh */ 1365 sz = 2; break; 1366 case 3: /* ldd */ 1367 case 7: /* std */ 1368 case 0xb: /* ldx */ 1369 case 0xe: /* stx */ 1370 sz = 8; break; 1371 } 1372 } 1373 1374 if (USERMODE(rp->r_tstate)) 1375 (void) flush_user_windows_to_stack(NULL); 1376 else 1377 flush_windows(); 1378 1379 if (getreg(rp, rs1, &val, badaddr)) 1380 return (SIMU_FAULT); 1381 addr = (caddr_t)val; 1382 1383 /* check immediate bit and use immediate field or reg (rs2) */ 1384 if (immflg) { 1385 int imm; 1386 imm = inst & 0x1fff; /* mask out immediate field */ 1387 imm <<= 19; /* sign extend it */ 1388 imm >>= 19; 1389 addr += imm; /* compute address */ 1390 } else { 1391 if (getreg(rp, rs2, &val, badaddr)) 1392 return (SIMU_FAULT); 1393 addr += val; 1394 } 1395 1396 /* 1397 * If this is a 32-bit program, chop the address accordingly. The 1398 * intermediate uintptr_t casts prevent warnings under a certain 1399 * compiler, and the temporary 32 bit storage is intended to force 1400 * proper code generation and break up what would otherwise be a 1401 * quadruple cast. 1402 */ 1403 if (curproc->p_model == DATAMODEL_ILP32 && USERMODE(rp->r_tstate)) { 1404 caddr32_t addr32 = (caddr32_t)(uintptr_t)addr; 1405 addr = (caddr_t)(uintptr_t)addr32; 1406 } 1407 1408 *badaddr = addr; 1409 return ((uintptr_t)addr & (sz - 1) ? SIMU_UNALIGN : SIMU_SUCCESS); 1410 } 1411 1412 /* 1413 * Return the size of a load or store instruction (1, 2, 4, 8, 16, 64). 1414 * Also compute the precise address by instruction disassembly. 1415 * (v9 page faults only provide the page address via the hardware.) 1416 * Return 0 on failure (not a load or store instruction). 1417 */ 1418 int 1419 instr_size(struct regs *rp, caddr_t *addrp, enum seg_rw rdwr) 1420 { 1421 uint_t inst, op3, asi; 1422 uint_t rd, rs1, rs2; 1423 int sz = 0; 1424 int immflg; 1425 int floatflg; 1426 caddr_t addr; 1427 caddr_t badaddr; 1428 uint64_t val; 1429 1430 if (rdwr == S_EXEC) { 1431 *addrp = (caddr_t)rp->r_pc; 1432 return (4); 1433 } 1434 1435 /* 1436 * Fetch the instruction from user-level. 1437 * We would like to assert this: 1438 * ASSERT(USERMODE(rp->r_tstate)); 1439 * but we can't because we can reach this point from a 1440 * register window underflow/overflow and the v9 wbuf 1441 * traps call trap() with T_USER even though r_tstate 1442 * indicates a system trap, not a user trap. 1443 */ 1444 inst = fetch_user_instr((caddr_t)rp->r_pc); 1445 1446 op3 = (inst >> 19) & 0x3f; 1447 rd = (inst >> 25) & 0x1f; 1448 rs1 = (inst >> 14) & 0x1f; 1449 rs2 = inst & 0x1f; 1450 floatflg = (inst >> 24) & 1; 1451 immflg = (inst >> 13) & 1; 1452 1453 /* if not load or store do nothing. can't happen? */ 1454 if ((inst >> 30) != 3) 1455 return (0); 1456 1457 if (immflg) 1458 asi = (uint_t)((rp->r_tstate >> TSTATE_ASI_SHIFT) & 1459 TSTATE_ASI_MASK); 1460 else 1461 asi = (inst >> 5) & 0xff; 1462 1463 if (floatflg) { 1464 /* check for ld/st alternate and highest defined V9 asi */ 1465 if ((op3 & 0x30) == 0x30 && asi > ASI_SNFL) { 1466 sz = extended_asi_size(asi); 1467 } else { 1468 switch (op3 & 3) { 1469 case 0: 1470 sz = 4; /* ldf/stf/cas */ 1471 break; 1472 case 1: 1473 if (rd == 0) 1474 sz = 4; /* ldfsr/stfsr */ 1475 else 1476 sz = 8; /* ldxfsr/stxfsr */ 1477 break; 1478 case 2: 1479 if (op3 == 0x3e) 1480 sz = 8; /* casx */ 1481 else 1482 sz = 16; /* ldqf/stqf */ 1483 break; 1484 case 3: 1485 sz = 8; /* lddf/stdf */ 1486 break; 1487 } 1488 } 1489 } else { 1490 switch (op3 & 0xf) { /* map size bits to a number */ 1491 case 0: /* lduw */ 1492 case 4: /* stw */ 1493 case 8: /* ldsw */ 1494 case 0xf: /* swap */ 1495 sz = 4; break; 1496 case 1: /* ldub */ 1497 case 5: /* stb */ 1498 case 9: /* ldsb */ 1499 case 0xd: /* ldstub */ 1500 sz = 1; break; 1501 case 2: /* lduh */ 1502 case 6: /* sth */ 1503 case 0xa: /* ldsh */ 1504 sz = 2; break; 1505 case 3: /* ldd */ 1506 case 7: /* std */ 1507 case 0xb: /* ldx */ 1508 case 0xe: /* stx */ 1509 sz = 8; break; 1510 } 1511 } 1512 1513 if (sz == 0) /* can't happen? */ 1514 return (0); 1515 (void) flush_user_windows_to_stack(NULL); 1516 1517 if (getreg(rp, rs1, &val, &badaddr)) 1518 return (0); 1519 addr = (caddr_t)val; 1520 1521 /* cas/casx don't use rs2 / simm13 to compute the address */ 1522 if ((op3 & 0x3d) != 0x3c) { 1523 /* check immediate bit and use immediate field or reg (rs2) */ 1524 if (immflg) { 1525 int imm; 1526 imm = inst & 0x1fff; /* mask out immediate field */ 1527 imm <<= 19; /* sign extend it */ 1528 imm >>= 19; 1529 addr += imm; /* compute address */ 1530 } else { 1531 /* 1532 * asi's in the 0xCx range are partial store 1533 * instructions. For these, rs2 is a mask, not part of 1534 * the address. 1535 */ 1536 if (!(floatflg && (asi & 0xf0) == 0xc0)) { 1537 if (getreg(rp, rs2, &val, &badaddr)) 1538 return (0); 1539 addr += val; 1540 } 1541 } 1542 } 1543 1544 /* 1545 * If this is a 32-bit program, chop the address accordingly. The 1546 * intermediate uintptr_t casts prevent warnings under a certain 1547 * compiler, and the temporary 32 bit storage is intended to force 1548 * proper code generation and break up what would otherwise be a 1549 * quadruple cast. 1550 */ 1551 if (curproc->p_model == DATAMODEL_ILP32) { 1552 caddr32_t addr32 = (caddr32_t)(uintptr_t)addr; 1553 addr = (caddr_t)(uintptr_t)addr32; 1554 } 1555 1556 *addrp = addr; 1557 ASSERT(sz != 0); 1558 return (sz); 1559 } 1560 1561 /* 1562 * Fetch an instruction from user-level. 1563 * Deal with watchpoints, if they are in effect. 1564 */ 1565 int32_t 1566 fetch_user_instr(caddr_t vaddr) 1567 { 1568 proc_t *p = curproc; 1569 int32_t instr; 1570 1571 /* 1572 * If this is a 32-bit program, chop the address accordingly. The 1573 * intermediate uintptr_t casts prevent warnings under a certain 1574 * compiler, and the temporary 32 bit storage is intended to force 1575 * proper code generation and break up what would otherwise be a 1576 * quadruple cast. 1577 */ 1578 if (p->p_model == DATAMODEL_ILP32) { 1579 caddr32_t vaddr32 = (caddr32_t)(uintptr_t)vaddr; 1580 vaddr = (caddr_t)(uintptr_t)vaddr32; 1581 } 1582 1583 if (fuword32_nowatch(vaddr, (uint32_t *)&instr) == -1) 1584 instr = -1; 1585 1586 return (instr); 1587 } 1588