1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* common code with bug fixes from original version in trap.c */ 27 28 #include <sys/param.h> 29 #include <sys/types.h> 30 #include <sys/systm.h> 31 #include <sys/archsystm.h> 32 #include <sys/vmsystm.h> 33 #include <sys/fpu/fpusystm.h> 34 #include <sys/fpu/fpu_simulator.h> 35 #include <sys/inline.h> 36 #include <sys/debug.h> 37 #include <sys/privregs.h> 38 #include <sys/machpcb.h> 39 #include <sys/simulate.h> 40 #include <sys/proc.h> 41 #include <sys/cmn_err.h> 42 #include <sys/stack.h> 43 #include <sys/watchpoint.h> 44 #include <sys/trap.h> 45 #include <sys/machtrap.h> 46 #include <sys/mman.h> 47 #include <sys/asi.h> 48 #include <sys/copyops.h> 49 #include <vm/as.h> 50 #include <vm/page.h> 51 #include <sys/model.h> 52 #include <vm/seg_vn.h> 53 #include <sys/byteorder.h> 54 55 #define IS_IBIT_SET(x) (x & 0x2000) 56 #define IS_VIS1(op, op3)(op == 2 && op3 == 0x36) 57 #define IS_FLOAT_QUAD_OP(op, op3)(op == 2 && (op3 == 0x34 || \ 58 op3 == 0x35)) 59 #define IS_PARTIAL_OR_SHORT_FLOAT_LD_ST(op, op3, asi) \ 60 (op == 3 && (op3 == IOP_V8_LDDFA || \ 61 op3 == IOP_V8_STDFA) && asi > ASI_SNFL) 62 63 static int aligndebug = 0; 64 65 /* 66 * For the sake of those who must be compatible with unaligned 67 * architectures, users can link their programs to use a 68 * corrective trap handler that will fix unaligned references 69 * a special trap #6 (T_FIX_ALIGN) enables this 'feature'. 70 * Returns 1 for success, 0 for failure. 71 */ 72 73 int 74 do_unaligned(struct regs *rp, caddr_t *badaddr) 75 { 76 uint_t inst, op3, asi = 0; 77 uint_t rd, rs1, rs2; 78 int sz, nf = 0, ltlend = 0; 79 int floatflg; 80 int fsrflg; 81 int immflg; 82 int lddstdflg; 83 caddr_t addr; 84 uint64_t val; 85 union { 86 uint64_t l[2]; 87 uint32_t i[4]; 88 uint16_t s[8]; 89 uint8_t c[16]; 90 } data; 91 92 ASSERT(USERMODE(rp->r_tstate)); 93 inst = fetch_user_instr((caddr_t)rp->r_pc); 94 95 op3 = (inst >> 19) & 0x3f; 96 rd = (inst >> 25) & 0x1f; 97 rs1 = (inst >> 14) & 0x1f; 98 rs2 = inst & 0x1f; 99 floatflg = (inst >> 24) & 1; 100 immflg = (inst >> 13) & 1; 101 lddstdflg = fsrflg = 0; 102 103 /* if not load or store do nothing */ 104 if ((inst >> 30) != 3) 105 return (0); 106 107 /* if ldstub or swap, do nothing */ 108 if ((inst & 0xc1680000) == 0xc0680000) 109 return (0); 110 111 /* if cas/casx, do nothing */ 112 if ((inst & 0xc1e00000) == 0xc1e00000) 113 return (0); 114 115 if (floatflg) { 116 switch ((inst >> 19) & 3) { /* map size bits to a number */ 117 case 0: sz = 4; 118 break; /* ldf{a}/stf{a} */ 119 case 1: fsrflg = 1; 120 if (rd == 0) 121 sz = 4; /* ldfsr/stfsr */ 122 else if (rd == 1) 123 sz = 8; /* ldxfsr/stxfsr */ 124 else 125 return (SIMU_ILLEGAL); 126 break; 127 case 2: sz = 16; 128 break; /* ldqf{a}/stqf{a} */ 129 case 3: sz = 8; 130 break; /* lddf{a}/stdf{a} */ 131 } 132 /* 133 * Fix to access extra double register encoding plus 134 * compensate to access the correct fpu_dreg. 135 */ 136 if ((sz > 4) && (fsrflg == 0)) { 137 if ((rd & 1) == 1) 138 rd = (rd & 0x1e) | 0x20; 139 rd = rd >> 1; 140 if ((sz == 16) && ((rd & 0x1) != 0)) 141 return (SIMU_ILLEGAL); 142 } 143 } else { 144 int sz_bits = (inst >> 19) & 0xf; 145 switch (sz_bits) { /* map size bits to a number */ 146 case 0: /* lduw{a} */ 147 case 4: /* stw{a} */ 148 case 8: /* ldsw{a} */ 149 case 0xf: /* swap */ 150 sz = 4; break; 151 case 1: /* ldub{a} */ 152 case 5: /* stb{a} */ 153 case 9: /* ldsb{a} */ 154 case 0xd: /* ldstub */ 155 sz = 1; break; 156 case 2: /* lduh{a} */ 157 case 6: /* sth{a} */ 158 case 0xa: /* ldsh{a} */ 159 sz = 2; break; 160 case 3: /* ldd{a} */ 161 case 7: /* std{a} */ 162 lddstdflg = 1; 163 sz = 8; break; 164 case 0xb: /* ldx{a} */ 165 case 0xe: /* stx{a} */ 166 sz = 8; break; 167 } 168 } 169 170 171 /* only support primary and secondary asi's */ 172 if ((op3 >> 4) & 1) { 173 if (immflg) { 174 asi = (uint_t)(rp->r_tstate >> TSTATE_ASI_SHIFT) & 175 TSTATE_ASI_MASK; 176 } else { 177 asi = (inst >> 5) & 0xff; 178 } 179 switch (asi) { 180 case ASI_P: 181 case ASI_S: 182 break; 183 case ASI_PNF: 184 case ASI_SNF: 185 nf = 1; 186 break; 187 case ASI_PL: 188 case ASI_SL: 189 ltlend = 1; 190 break; 191 case ASI_PNFL: 192 case ASI_SNFL: 193 ltlend = 1; 194 nf = 1; 195 break; 196 default: 197 return (0); 198 } 199 /* 200 * Non-faulting stores generate a data_access_exception trap, 201 * according to the Spitfire manual, which should be signaled 202 * as an illegal instruction trap, because it can't be fixed. 203 */ 204 if ((nf) && ((op3 == IOP_V8_STQFA) || (op3 == IOP_V8_STDFA))) 205 return (SIMU_ILLEGAL); 206 } 207 208 if (aligndebug) { 209 printf("unaligned access at %p, instruction: 0x%x\n", 210 (void *)rp->r_pc, inst); 211 printf("type %s", (((inst >> 21) & 1) ? "st" : "ld")); 212 if (((inst >> 21) & 1) == 0) 213 printf(" %s", (((inst >> 22) & 1) ? 214 "signed" : "unsigned")); 215 printf(" asi 0x%x size %d immflg %d\n", asi, sz, immflg); 216 printf("rd = %d, op3 = 0x%x, rs1 = %d, rs2 = %d, imm13=0x%x\n", 217 rd, op3, rs1, rs2, (inst & 0x1fff)); 218 } 219 220 (void) flush_user_windows_to_stack(NULL); 221 if (getreg(rp, rs1, &val, badaddr)) 222 return (SIMU_FAULT); 223 addr = (caddr_t)val; /* convert to 32/64 bit address */ 224 if (aligndebug) 225 printf("addr 1 = %p\n", (void *)addr); 226 227 /* check immediate bit and use immediate field or reg (rs2) */ 228 if (immflg) { 229 int imm; 230 imm = inst & 0x1fff; /* mask out immediate field */ 231 imm <<= 19; /* sign extend it */ 232 imm >>= 19; 233 addr += imm; /* compute address */ 234 } else { 235 if (getreg(rp, rs2, &val, badaddr)) 236 return (SIMU_FAULT); 237 addr += val; 238 } 239 240 /* 241 * If this is a 32-bit program, chop the address accordingly. The 242 * intermediate uintptr_t casts prevent warnings under a certain 243 * compiler, and the temporary 32 bit storage is intended to force 244 * proper code generation and break up what would otherwise be a 245 * quadruple cast. 246 */ 247 if (curproc->p_model == DATAMODEL_ILP32) { 248 caddr32_t addr32 = (caddr32_t)(uintptr_t)addr; 249 addr = (caddr_t)(uintptr_t)addr32; 250 } 251 252 if (aligndebug) 253 printf("addr 2 = %p\n", (void *)addr); 254 255 if (addr >= curproc->p_as->a_userlimit) { 256 *badaddr = addr; 257 goto badret; 258 } 259 260 /* a single bit differentiates ld and st */ 261 if ((inst >> 21) & 1) { /* store */ 262 if (floatflg) { 263 klwp_id_t lwp = ttolwp(curthread); 264 kfpu_t *fp = lwptofpu(lwp); 265 /* Ensure fp has been enabled */ 266 if (fpu_exists) { 267 if (!(_fp_read_fprs() & FPRS_FEF)) 268 fp_enable(); 269 } else { 270 if (!fp->fpu_en) 271 fp_enable(); 272 } 273 /* if fpu_exists read fpu reg */ 274 if (fpu_exists) { 275 if (fsrflg) { 276 _fp_read_pfsr(&data.l[0]); 277 } else { 278 if (sz == 4) { 279 data.i[0] = 0; 280 _fp_read_pfreg( 281 (unsigned *)&data.i[1], rd); 282 } 283 if (sz >= 8) 284 _fp_read_pdreg( 285 &data.l[0], rd); 286 if (sz == 16) 287 _fp_read_pdreg( 288 &data.l[1], rd+1); 289 } 290 } else { 291 if (fsrflg) { 292 /* Clear reserved bits, set version=7 */ 293 fp->fpu_fsr &= ~0x30301000; 294 fp->fpu_fsr |= 0xE0000; 295 data.l[0] = fp->fpu_fsr; 296 } else { 297 if (sz == 4) { 298 data.i[0] = 0; 299 data.i[1] = 300 (unsigned)fp-> 301 fpu_fr.fpu_regs[rd]; 302 } 303 if (sz >= 8) 304 data.l[0] = 305 fp->fpu_fr.fpu_dregs[rd]; 306 if (sz == 16) 307 data.l[1] = 308 fp->fpu_fr.fpu_dregs[rd+1]; 309 } 310 } 311 } else { 312 if (lddstdflg) { /* combine the data */ 313 if (getreg(rp, rd, &data.l[0], badaddr)) 314 return (SIMU_FAULT); 315 if (getreg(rp, rd+1, &data.l[1], badaddr)) 316 return (SIMU_FAULT); 317 if (ltlend) { 318 /* 319 * For STD, each 32-bit word is byte- 320 * swapped individually. For 321 * simplicity we don't want to do that 322 * below, so we swap the words now to 323 * get the desired result in the end. 324 */ 325 data.i[0] = data.i[3]; 326 } else { 327 data.i[0] = data.i[1]; 328 data.i[1] = data.i[3]; 329 } 330 } else { 331 if (getreg(rp, rd, &data.l[0], badaddr)) 332 return (SIMU_FAULT); 333 } 334 } 335 336 if (aligndebug) { 337 if (sz == 16) { 338 printf("data %x %x %x %x\n", 339 data.i[0], data.i[1], data.i[2], data.c[3]); 340 } else { 341 printf("data %x %x %x %x %x %x %x %x\n", 342 data.c[0], data.c[1], data.c[2], data.c[3], 343 data.c[4], data.c[5], data.c[6], data.c[7]); 344 } 345 } 346 347 if (ltlend) { 348 if (sz == 1) { 349 if (xcopyout_little(&data.c[7], addr, 350 (size_t)sz) != 0) 351 goto badret; 352 } else if (sz == 2) { 353 if (xcopyout_little(&data.s[3], addr, 354 (size_t)sz) != 0) 355 goto badret; 356 } else if (sz == 4) { 357 if (xcopyout_little(&data.i[1], addr, 358 (size_t)sz) != 0) 359 goto badret; 360 } else { 361 if (xcopyout_little(&data.l[0], addr, 362 (size_t)sz) != 0) 363 goto badret; 364 } 365 } else { 366 if (sz == 1) { 367 if (copyout(&data.c[7], addr, (size_t)sz) == -1) 368 goto badret; 369 } else if (sz == 2) { 370 if (copyout(&data.s[3], addr, (size_t)sz) == -1) 371 goto badret; 372 } else if (sz == 4) { 373 if (copyout(&data.i[1], addr, (size_t)sz) == -1) 374 goto badret; 375 } else { 376 if (copyout(&data.l[0], addr, (size_t)sz) == -1) 377 goto badret; 378 } 379 } 380 } else { /* load */ 381 if (sz == 1) { 382 if (ltlend) { 383 if (xcopyin_little(addr, &data.c[7], 384 (size_t)sz) != 0) { 385 if (nf) 386 data.c[7] = 0; 387 else 388 goto badret; 389 } 390 } else { 391 if (copyin(addr, &data.c[7], 392 (size_t)sz) == -1) { 393 if (nf) 394 data.c[7] = 0; 395 else 396 goto badret; 397 } 398 } 399 /* if signed and the sign bit is set extend it */ 400 if (((inst >> 22) & 1) && ((data.c[7] >> 7) & 1)) { 401 data.i[0] = (uint_t)-1; /* extend sign bit */ 402 data.s[2] = (ushort_t)-1; 403 data.c[6] = (uchar_t)-1; 404 } else { 405 data.i[0] = 0; /* clear upper 32+24 bits */ 406 data.s[2] = 0; 407 data.c[6] = 0; 408 } 409 } else if (sz == 2) { 410 if (ltlend) { 411 if (xcopyin_little(addr, &data.s[3], 412 (size_t)sz) != 0) { 413 if (nf) 414 data.s[3] = 0; 415 else 416 goto badret; 417 } 418 } else { 419 if (copyin(addr, &data.s[3], 420 (size_t)sz) == -1) { 421 if (nf) 422 data.s[3] = 0; 423 else 424 goto badret; 425 } 426 } 427 /* if signed and the sign bit is set extend it */ 428 if (((inst >> 22) & 1) && ((data.s[3] >> 15) & 1)) { 429 data.i[0] = (uint_t)-1; /* extend sign bit */ 430 data.s[2] = (ushort_t)-1; 431 } else { 432 data.i[0] = 0; /* clear upper 32+16 bits */ 433 data.s[2] = 0; 434 } 435 } else if (sz == 4) { 436 if (ltlend) { 437 if (xcopyin_little(addr, &data.i[1], 438 (size_t)sz) != 0) { 439 if (!nf) 440 goto badret; 441 data.i[1] = 0; 442 } 443 } else { 444 if (copyin(addr, &data.i[1], 445 (size_t)sz) == -1) { 446 if (!nf) 447 goto badret; 448 data.i[1] = 0; 449 } 450 } 451 /* if signed and the sign bit is set extend it */ 452 if (((inst >> 22) & 1) && ((data.i[1] >> 31) & 1)) { 453 data.i[0] = (uint_t)-1; /* extend sign bit */ 454 } else { 455 data.i[0] = 0; /* clear upper 32 bits */ 456 } 457 } else { 458 if (ltlend) { 459 if (xcopyin_little(addr, &data.l[0], 460 (size_t)sz) != 0) { 461 if (!nf) 462 goto badret; 463 data.l[0] = 0; 464 } 465 } else { 466 if (copyin(addr, &data.l[0], 467 (size_t)sz) == -1) { 468 if (!nf) 469 goto badret; 470 data.l[0] = 0; 471 } 472 } 473 } 474 475 if (aligndebug) { 476 if (sz == 16) { 477 printf("data %x %x %x %x\n", 478 data.i[0], data.i[1], data.i[2], data.c[3]); 479 } else { 480 printf("data %x %x %x %x %x %x %x %x\n", 481 data.c[0], data.c[1], data.c[2], data.c[3], 482 data.c[4], data.c[5], data.c[6], data.c[7]); 483 } 484 } 485 486 if (floatflg) { /* if fpu_exists write fpu reg */ 487 klwp_id_t lwp = ttolwp(curthread); 488 kfpu_t *fp = lwptofpu(lwp); 489 /* Ensure fp has been enabled */ 490 if (fpu_exists) { 491 if (!(_fp_read_fprs() & FPRS_FEF)) 492 fp_enable(); 493 } else { 494 if (!fp->fpu_en) 495 fp_enable(); 496 } 497 /* if fpu_exists read fpu reg */ 498 if (fpu_exists) { 499 if (fsrflg) { 500 _fp_write_pfsr(&data.l[0]); 501 } else { 502 if (sz == 4) 503 _fp_write_pfreg( 504 (unsigned *)&data.i[1], rd); 505 if (sz >= 8) 506 _fp_write_pdreg( 507 &data.l[0], rd); 508 if (sz == 16) 509 _fp_write_pdreg( 510 &data.l[1], rd+1); 511 } 512 } else { 513 if (fsrflg) { 514 fp->fpu_fsr = data.l[0]; 515 } else { 516 if (sz == 4) 517 fp->fpu_fr.fpu_regs[rd] = 518 (unsigned)data.i[1]; 519 if (sz >= 8) 520 fp->fpu_fr.fpu_dregs[rd] = 521 data.l[0]; 522 if (sz == 16) 523 fp->fpu_fr.fpu_dregs[rd+1] = 524 data.l[1]; 525 } 526 } 527 } else { 528 if (lddstdflg) { /* split the data */ 529 if (ltlend) { 530 /* 531 * For LDD, each 32-bit word is byte- 532 * swapped individually. We didn't 533 * do that above, but this will give 534 * us the desired result. 535 */ 536 data.i[3] = data.i[0]; 537 } else { 538 data.i[3] = data.i[1]; 539 data.i[1] = data.i[0]; 540 } 541 data.i[0] = 0; 542 data.i[2] = 0; 543 if (putreg(&data.l[0], rp, rd, badaddr) == -1) 544 goto badret; 545 if (putreg(&data.l[1], rp, rd+1, badaddr) == -1) 546 goto badret; 547 } else { 548 if (putreg(&data.l[0], rp, rd, badaddr) == -1) 549 goto badret; 550 } 551 } 552 } 553 return (SIMU_SUCCESS); 554 badret: 555 return (SIMU_FAULT); 556 } 557 558 559 int 560 simulate_lddstd(struct regs *rp, caddr_t *badaddr) 561 { 562 uint_t inst, op3, asi = 0; 563 uint_t rd, rs1, rs2; 564 int nf = 0, ltlend = 0, usermode; 565 int immflg; 566 uint64_t reven; 567 uint64_t rodd; 568 caddr_t addr; 569 uint64_t val; 570 uint64_t data; 571 572 usermode = USERMODE(rp->r_tstate); 573 574 if (usermode) 575 inst = fetch_user_instr((caddr_t)rp->r_pc); 576 else 577 inst = *(uint_t *)rp->r_pc; 578 579 op3 = (inst >> 19) & 0x3f; 580 rd = (inst >> 25) & 0x1f; 581 rs1 = (inst >> 14) & 0x1f; 582 rs2 = inst & 0x1f; 583 immflg = (inst >> 13) & 1; 584 585 if (USERMODE(rp->r_tstate)) 586 (void) flush_user_windows_to_stack(NULL); 587 else 588 flush_windows(); 589 590 if ((op3 >> 4) & 1) { /* is this LDDA/STDA? */ 591 if (immflg) { 592 asi = (uint_t)(rp->r_tstate >> TSTATE_ASI_SHIFT) & 593 TSTATE_ASI_MASK; 594 } else { 595 asi = (inst >> 5) & 0xff; 596 } 597 switch (asi) { 598 case ASI_P: 599 case ASI_S: 600 break; 601 case ASI_PNF: 602 case ASI_SNF: 603 nf = 1; 604 break; 605 case ASI_PL: 606 case ASI_SL: 607 ltlend = 1; 608 break; 609 case ASI_PNFL: 610 case ASI_SNFL: 611 ltlend = 1; 612 nf = 1; 613 break; 614 case ASI_AIUP: 615 case ASI_AIUS: 616 usermode = 1; 617 break; 618 case ASI_AIUPL: 619 case ASI_AIUSL: 620 usermode = 1; 621 ltlend = 1; 622 break; 623 default: 624 return (SIMU_ILLEGAL); 625 } 626 } 627 628 if (getreg(rp, rs1, &val, badaddr)) 629 return (SIMU_FAULT); 630 addr = (caddr_t)val; /* convert to 32/64 bit address */ 631 632 /* check immediate bit and use immediate field or reg (rs2) */ 633 if (immflg) { 634 int imm; 635 imm = inst & 0x1fff; /* mask out immediate field */ 636 imm <<= 19; /* sign extend it */ 637 imm >>= 19; 638 addr += imm; /* compute address */ 639 } else { 640 if (getreg(rp, rs2, &val, badaddr)) 641 return (SIMU_FAULT); 642 addr += val; 643 } 644 645 /* 646 * T_UNIMP_LDD and T_UNIMP_STD are higher priority than 647 * T_ALIGNMENT. So we have to make sure that the address is 648 * kosher before trying to use it, because the hardware hasn't 649 * checked it for us yet. 650 */ 651 if (((uintptr_t)addr & 0x7) != 0) { 652 if (curproc->p_fixalignment) 653 return (do_unaligned(rp, badaddr)); 654 else 655 return (SIMU_UNALIGN); 656 } 657 658 /* 659 * If this is a 32-bit program, chop the address accordingly. The 660 * intermediate uintptr_t casts prevent warnings under a certain 661 * compiler, and the temporary 32 bit storage is intended to force 662 * proper code generation and break up what would otherwise be a 663 * quadruple cast. 664 */ 665 if (curproc->p_model == DATAMODEL_ILP32 && usermode) { 666 caddr32_t addr32 = (caddr32_t)(uintptr_t)addr; 667 addr = (caddr_t)(uintptr_t)addr32; 668 } 669 670 if ((inst >> 21) & 1) { /* store */ 671 if (getreg(rp, rd, &reven, badaddr)) 672 return (SIMU_FAULT); 673 if (getreg(rp, rd+1, &rodd, badaddr)) 674 return (SIMU_FAULT); 675 if (ltlend) { 676 reven = BSWAP_32(reven); 677 rodd = BSWAP_32(rodd); 678 } 679 data = (reven << 32) | rodd; 680 if (usermode) { 681 if (suword64_nowatch(addr, data) == -1) 682 return (SIMU_FAULT); 683 } else { 684 *(uint64_t *)addr = data; 685 } 686 } else { /* load */ 687 if (usermode) { 688 if (fuword64_nowatch(addr, &data)) { 689 if (nf) 690 data = 0; 691 else 692 return (SIMU_FAULT); 693 } 694 } else 695 data = *(uint64_t *)addr; 696 697 reven = (data >> 32); 698 rodd = (uint64_t)(uint32_t)data; 699 if (ltlend) { 700 reven = BSWAP_32(reven); 701 rodd = BSWAP_32(rodd); 702 } 703 704 if (putreg(&reven, rp, rd, badaddr) == -1) 705 return (SIMU_FAULT); 706 if (putreg(&rodd, rp, rd+1, badaddr) == -1) 707 return (SIMU_FAULT); 708 } 709 return (SIMU_SUCCESS); 710 } 711 712 713 /* 714 * simulate popc 715 */ 716 static int 717 simulate_popc(struct regs *rp, caddr_t *badaddr, uint_t inst) 718 { 719 uint_t rd, rs2, rs1; 720 uint_t immflg; 721 uint64_t val, cnt = 0; 722 723 rd = (inst >> 25) & 0x1f; 724 rs1 = (inst >> 14) & 0x1f; 725 rs2 = inst & 0x1f; 726 immflg = (inst >> 13) & 1; 727 728 if (rs1 > 0) 729 return (SIMU_ILLEGAL); 730 731 (void) flush_user_windows_to_stack(NULL); 732 733 /* check immediate bit and use immediate field or reg (rs2) */ 734 if (immflg) { 735 int64_t imm; 736 imm = inst & 0x1fff; /* mask out immediate field */ 737 imm <<= 51; /* sign extend it */ 738 imm >>= 51; 739 if (imm != 0) { 740 for (cnt = 0; imm != 0; imm &= imm-1) 741 cnt++; 742 } 743 } else { 744 if (getreg(rp, rs2, &val, badaddr)) 745 return (SIMU_FAULT); 746 if (val != 0) { 747 for (cnt = 0; val != 0; val &= val-1) 748 cnt++; 749 } 750 } 751 752 if (putreg(&cnt, rp, rd, badaddr) == -1) 753 return (SIMU_FAULT); 754 755 return (SIMU_SUCCESS); 756 } 757 758 /* 759 * simulate mulscc 760 */ 761 static int 762 simulate_mulscc(struct regs *rp, caddr_t *badaddr, uint_t inst) 763 { 764 uint32_t s1, s2; 765 uint32_t c, d, v; 766 uint_t rd, rs1; 767 int64_t d64; 768 uint64_t ud64; 769 uint64_t drs1; 770 771 (void) flush_user_windows_to_stack(NULL); 772 773 if ((inst >> 13) & 1) { /* immediate */ 774 d64 = inst & 0x1fff; 775 d64 <<= 51; /* sign extend it */ 776 d64 >>= 51; 777 } else { 778 uint_t rs2; 779 uint64_t drs2; 780 781 if (inst & 0x1fe0) { 782 return (SIMU_ILLEGAL); 783 } 784 rs2 = inst & 0x1f; 785 if (getreg(rp, rs2, &drs2, badaddr)) { 786 return (SIMU_FAULT); 787 } 788 d64 = (int64_t)drs2; 789 } 790 791 rs1 = (inst >> 14) & 0x1f; 792 if (getreg(rp, rs1, &drs1, badaddr)) { 793 return (SIMU_FAULT); 794 } 795 /* icc.n xor icc.v */ 796 s1 = ((rp->r_tstate & TSTATE_IN) >> (TSTATE_CCR_SHIFT + 3)) ^ 797 ((rp->r_tstate & TSTATE_IV) >> (TSTATE_CCR_SHIFT + 1)); 798 s1 = (s1 << 31) | (((uint32_t)drs1) >> 1); 799 800 if (rp->r_y & 1) { 801 s2 = (uint32_t)d64; 802 } else { 803 s2 = 0; 804 } 805 d = s1 + s2; 806 807 ud64 = (uint64_t)d; 808 809 /* set the icc flags */ 810 v = (s1 & s2 & ~d) | (~s1 & ~s2 & d); 811 c = (s1 & s2) | (~d & (s1 | s2)); 812 rp->r_tstate &= ~TSTATE_ICC; 813 rp->r_tstate |= (uint64_t)((c >> 31) & 1) << (TSTATE_CCR_SHIFT + 0); 814 rp->r_tstate |= (uint64_t)((v >> 31) & 1) << (TSTATE_CCR_SHIFT + 1); 815 rp->r_tstate |= (uint64_t)(d ? 0 : 1) << (TSTATE_CCR_SHIFT + 2); 816 rp->r_tstate |= (uint64_t)((d >> 31) & 1) << (TSTATE_CCR_SHIFT + 3); 817 818 if (rp->r_tstate & TSTATE_IC) { 819 ud64 |= (1ULL << 32); 820 } 821 822 /* set the xcc flags */ 823 rp->r_tstate &= ~TSTATE_XCC; 824 if (ud64 == 0) { 825 rp->r_tstate |= TSTATE_XZ; 826 } 827 828 rd = (inst >> 25) & 0x1f; 829 if (putreg(&ud64, rp, rd, badaddr)) { 830 return (SIMU_FAULT); 831 } 832 833 d64 = (drs1 << 32) | (uint32_t)rp->r_y; 834 d64 >>= 1; 835 rp->r_y = (uint32_t)d64; 836 837 return (SIMU_SUCCESS); 838 } 839 840 /* 841 * simulate unimplemented instructions (popc, ldqf{a}, stqf{a}) 842 */ 843 int 844 simulate_unimp(struct regs *rp, caddr_t *badaddr) 845 { 846 uint_t inst, optype, op3, asi; 847 uint_t rs1, rd; 848 uint_t ignor, i; 849 machpcb_t *mpcb = lwptompcb(ttolwp(curthread)); 850 int nomatch = 0; 851 caddr_t addr = (caddr_t)rp->r_pc; 852 struct as *as; 853 caddr_t ka; 854 pfn_t pfnum; 855 page_t *pp; 856 proc_t *p = ttoproc(curthread); 857 struct seg *mapseg; 858 struct segvn_data *svd; 859 860 ASSERT(USERMODE(rp->r_tstate)); 861 inst = fetch_user_instr(addr); 862 if (inst == (uint_t)-1) { 863 mpcb->mpcb_illexcaddr = addr; 864 mpcb->mpcb_illexcinsn = (uint32_t)-1; 865 return (SIMU_ILLEGAL); 866 } 867 868 /* 869 * When fixing dirty v8 instructions there's a race if two processors 870 * are executing the dirty executable at the same time. If one 871 * cleans the instruction as the other is executing it the second 872 * processor will see a clean instruction when it comes through this 873 * code and will return SIMU_ILLEGAL. To work around the race 874 * this code will keep track of the last illegal instruction seen 875 * by each lwp and will only take action if the illegal instruction 876 * is repeatable. 877 */ 878 if (addr != mpcb->mpcb_illexcaddr || 879 inst != mpcb->mpcb_illexcinsn) 880 nomatch = 1; 881 mpcb->mpcb_illexcaddr = addr; 882 mpcb->mpcb_illexcinsn = inst; 883 884 /* instruction fields */ 885 i = (inst >> 13) & 0x1; 886 rd = (inst >> 25) & 0x1f; 887 optype = (inst >> 30) & 0x3; 888 op3 = (inst >> 19) & 0x3f; 889 ignor = (inst >> 5) & 0xff; 890 if (IS_IBIT_SET(inst)) { 891 asi = (uint32_t)((rp->r_tstate >> TSTATE_ASI_SHIFT) & 892 TSTATE_ASI_MASK); 893 } else { 894 asi = ignor; 895 } 896 897 if (IS_VIS1(optype, op3) || 898 IS_PARTIAL_OR_SHORT_FLOAT_LD_ST(optype, op3, asi) || 899 IS_FLOAT_QUAD_OP(optype, op3)) { 900 klwp_t *lwp = ttolwp(curthread); 901 kfpu_t *fp = lwptofpu(lwp); 902 if (fpu_exists) { 903 if (!(_fp_read_fprs() & FPRS_FEF)) 904 fp_enable(); 905 _fp_read_pfsr(&fp->fpu_fsr); 906 } else { 907 if (!fp->fpu_en) 908 fp_enable(); 909 } 910 fp_precise(rp); 911 return (SIMU_RETRY); 912 } 913 914 if (optype == 2 && op3 == IOP_V8_POPC) { 915 return (simulate_popc(rp, badaddr, inst)); 916 } else if (optype == 3 && op3 == IOP_V8_POPC) { 917 return (SIMU_ILLEGAL); 918 } else if (optype == OP_V8_ARITH && op3 == IOP_V8_MULScc) { 919 return (simulate_mulscc(rp, badaddr, inst)); 920 } 921 922 if (optype == OP_V8_LDSTR) { 923 if (op3 == IOP_V8_LDQF || op3 == IOP_V8_LDQFA || 924 op3 == IOP_V8_STQF || op3 == IOP_V8_STQFA) 925 return (do_unaligned(rp, badaddr)); 926 } 927 928 /* This is a new instruction so illexccnt should also be set. */ 929 if (nomatch) { 930 mpcb->mpcb_illexccnt = 0; 931 return (SIMU_RETRY); 932 } 933 934 /* 935 * In order to keep us from entering into an infinite loop while 936 * attempting to clean up faulty instructions, we will return 937 * SIMU_ILLEGAL once we've cleaned up the instruction as much 938 * as we can, and still end up here. 939 */ 940 if (mpcb->mpcb_illexccnt >= 3) 941 return (SIMU_ILLEGAL); 942 943 mpcb->mpcb_illexccnt += 1; 944 945 /* 946 * The rest of the code handles v8 binaries with instructions 947 * that have dirty (non-zero) bits in reserved or 'ignored' 948 * fields; these will cause core dumps on v9 machines. 949 * 950 * We only clean dirty instructions in 32-bit programs (ie, v8) 951 * running on SPARCv9 processors. True v9 programs are forced 952 * to use the instruction set as intended. 953 */ 954 if (lwp_getdatamodel(curthread->t_lwp) != DATAMODEL_ILP32) 955 return (SIMU_ILLEGAL); 956 switch (optype) { 957 case OP_V8_BRANCH: 958 case OP_V8_CALL: 959 return (SIMU_ILLEGAL); /* these don't have ignored fields */ 960 /*NOTREACHED*/ 961 case OP_V8_ARITH: 962 switch (op3) { 963 case IOP_V8_RETT: 964 if (rd == 0 && !(i == 0 && ignor)) 965 return (SIMU_ILLEGAL); 966 if (rd) 967 inst &= ~(0x1f << 25); 968 if (i == 0 && ignor) 969 inst &= ~(0xff << 5); 970 break; 971 case IOP_V8_TCC: 972 if (i == 0 && ignor != 0) { 973 inst &= ~(0xff << 5); 974 } else if (i == 1 && (((inst >> 7) & 0x3f) != 0)) { 975 inst &= ~(0x3f << 7); 976 } else { 977 return (SIMU_ILLEGAL); 978 } 979 break; 980 case IOP_V8_JMPL: 981 case IOP_V8_RESTORE: 982 case IOP_V8_SAVE: 983 if ((op3 == IOP_V8_RETT && rd) || 984 (i == 0 && ignor)) { 985 inst &= ~(0xff << 5); 986 } else { 987 return (SIMU_ILLEGAL); 988 } 989 break; 990 case IOP_V8_FCMP: 991 if (rd == 0) 992 return (SIMU_ILLEGAL); 993 inst &= ~(0x1f << 25); 994 break; 995 case IOP_V8_RDASR: 996 rs1 = ((inst >> 14) & 0x1f); 997 if (rs1 == 1 || (rs1 >= 7 && rs1 <= 14)) { 998 /* 999 * The instruction specifies an invalid 1000 * state register - better bail out than 1001 * "fix" it when we're not sure what was 1002 * intended. 1003 */ 1004 return (SIMU_ILLEGAL); 1005 } 1006 /* 1007 * Note: this case includes the 'stbar' 1008 * instruction (rs1 == 15 && i == 0). 1009 */ 1010 if ((ignor = (inst & 0x3fff)) != 0) 1011 inst &= ~(0x3fff); 1012 break; 1013 case IOP_V8_SRA: 1014 case IOP_V8_SRL: 1015 case IOP_V8_SLL: 1016 if (ignor == 0) 1017 return (SIMU_ILLEGAL); 1018 inst &= ~(0xff << 5); 1019 break; 1020 case IOP_V8_ADD: 1021 case IOP_V8_AND: 1022 case IOP_V8_OR: 1023 case IOP_V8_XOR: 1024 case IOP_V8_SUB: 1025 case IOP_V8_ANDN: 1026 case IOP_V8_ORN: 1027 case IOP_V8_XNOR: 1028 case IOP_V8_ADDC: 1029 case IOP_V8_UMUL: 1030 case IOP_V8_SMUL: 1031 case IOP_V8_SUBC: 1032 case IOP_V8_UDIV: 1033 case IOP_V8_SDIV: 1034 case IOP_V8_ADDcc: 1035 case IOP_V8_ANDcc: 1036 case IOP_V8_ORcc: 1037 case IOP_V8_XORcc: 1038 case IOP_V8_SUBcc: 1039 case IOP_V8_ANDNcc: 1040 case IOP_V8_ORNcc: 1041 case IOP_V8_XNORcc: 1042 case IOP_V8_ADDCcc: 1043 case IOP_V8_UMULcc: 1044 case IOP_V8_SMULcc: 1045 case IOP_V8_SUBCcc: 1046 case IOP_V8_UDIVcc: 1047 case IOP_V8_SDIVcc: 1048 case IOP_V8_TADDcc: 1049 case IOP_V8_TSUBcc: 1050 case IOP_V8_TADDccTV: 1051 case IOP_V8_TSUBccTV: 1052 case IOP_V8_MULScc: 1053 case IOP_V8_WRASR: 1054 case IOP_V8_FLUSH: 1055 if (i != 0 || ignor == 0) 1056 return (SIMU_ILLEGAL); 1057 inst &= ~(0xff << 5); 1058 break; 1059 default: 1060 return (SIMU_ILLEGAL); 1061 } 1062 break; 1063 case OP_V8_LDSTR: 1064 switch (op3) { 1065 case IOP_V8_STFSR: 1066 case IOP_V8_LDFSR: 1067 if (rd == 0 && !(i == 0 && ignor)) 1068 return (SIMU_ILLEGAL); 1069 if (rd) 1070 inst &= ~(0x1f << 25); 1071 if (i == 0 && ignor) 1072 inst &= ~(0xff << 5); 1073 break; 1074 default: 1075 if (optype == OP_V8_LDSTR && !IS_LDST_ALT(op3) && 1076 i == 0 && ignor) 1077 inst &= ~(0xff << 5); 1078 else 1079 return (SIMU_ILLEGAL); 1080 break; 1081 } 1082 break; 1083 default: 1084 return (SIMU_ILLEGAL); 1085 } 1086 1087 as = p->p_as; 1088 1089 AS_LOCK_ENTER(as, &as->a_lock, RW_READER); 1090 mapseg = as_findseg(as, (caddr_t)rp->r_pc, 0); 1091 ASSERT(mapseg != NULL); 1092 svd = (struct segvn_data *)mapseg->s_data; 1093 1094 /* 1095 * We only create COW page for MAP_PRIVATE mappings. 1096 */ 1097 SEGVN_LOCK_ENTER(as, &svd->lock, RW_READER); 1098 if ((svd->type & MAP_TYPE) & MAP_SHARED) { 1099 SEGVN_LOCK_EXIT(as, &svd->lock); 1100 AS_LOCK_EXIT(as, &as->a_lock); 1101 return (SIMU_ILLEGAL); 1102 } 1103 SEGVN_LOCK_EXIT(as, &svd->lock); 1104 AS_LOCK_EXIT(as, &as->a_lock); 1105 1106 /* 1107 * A "flush" instruction using the user PC's vaddr will not work 1108 * here, at least on Spitfire. Instead we create a temporary kernel 1109 * mapping to the user's text page, then modify and flush that. 1110 * Break COW by locking user page. 1111 */ 1112 if (as_fault(as->a_hat, as, (caddr_t)(rp->r_pc & PAGEMASK), PAGESIZE, 1113 F_SOFTLOCK, S_READ)) 1114 return (SIMU_FAULT); 1115 1116 AS_LOCK_ENTER(as, &as->a_lock, RW_READER); 1117 pfnum = hat_getpfnum(as->a_hat, (caddr_t)rp->r_pc); 1118 AS_LOCK_EXIT(as, &as->a_lock); 1119 if (pf_is_memory(pfnum)) { 1120 pp = page_numtopp_nolock(pfnum); 1121 ASSERT(pp == NULL || PAGE_LOCKED(pp)); 1122 } else { 1123 (void) as_fault(as->a_hat, as, (caddr_t)(rp->r_pc & PAGEMASK), 1124 PAGESIZE, F_SOFTUNLOCK, S_READ); 1125 return (SIMU_FAULT); 1126 } 1127 1128 AS_LOCK_ENTER(as, &as->a_lock, RW_READER); 1129 ka = ppmapin(pp, PROT_READ|PROT_WRITE, (caddr_t)rp->r_pc); 1130 *(uint_t *)(ka + (uintptr_t)(rp->r_pc % PAGESIZE)) = inst; 1131 doflush(ka + (uintptr_t)(rp->r_pc % PAGESIZE)); 1132 ppmapout(ka); 1133 AS_LOCK_EXIT(as, &as->a_lock); 1134 1135 (void) as_fault(as->a_hat, as, (caddr_t)(rp->r_pc & PAGEMASK), 1136 PAGESIZE, F_SOFTUNLOCK, S_READ); 1137 return (SIMU_RETRY); 1138 } 1139 1140 /* 1141 * Get the value of a register for instruction simulation 1142 * by using the regs or window structure pointers. 1143 * Return 0 for success, and -1 for failure. If there is a failure, 1144 * save the faulting address using badaddr pointer. 1145 * We have 64 bit globals and outs, and 32 or 64 bit ins and locals. 1146 * Don't truncate globals/outs for 32 bit programs, for v8+ support. 1147 */ 1148 int 1149 getreg(struct regs *rp, uint_t reg, uint64_t *val, caddr_t *badaddr) 1150 { 1151 uint64_t *rgs, *sp; 1152 int rv = 0; 1153 1154 rgs = (uint64_t *)&rp->r_ps; /* globals and outs */ 1155 sp = (uint64_t *)rp->r_sp; /* ins and locals */ 1156 if (reg == 0) { 1157 *val = 0; 1158 } else if (reg < 16) { 1159 *val = rgs[reg]; 1160 } else if (IS_V9STACK(sp)) { 1161 uint64_t *rw = (uint64_t *)((uintptr_t)sp + V9BIAS64); 1162 uint64_t *addr = (uint64_t *)&rw[reg - 16]; 1163 uint64_t res; 1164 1165 if (USERMODE(rp->r_tstate)) { 1166 if (fuword64_nowatch(addr, &res) == -1) { 1167 *badaddr = (caddr_t)addr; 1168 rv = -1; 1169 } 1170 } else { 1171 res = *addr; 1172 } 1173 *val = res; 1174 } else { 1175 caddr32_t sp32 = (caddr32_t)(uintptr_t)sp; 1176 uint32_t *rw = (uint32_t *)(uintptr_t)sp32; 1177 uint32_t *addr = (uint32_t *)&rw[reg - 16]; 1178 uint32_t res; 1179 1180 if (USERMODE(rp->r_tstate)) { 1181 if (fuword32_nowatch(addr, &res) == -1) { 1182 *badaddr = (caddr_t)addr; 1183 rv = -1; 1184 } 1185 } else { 1186 res = *addr; 1187 } 1188 *val = (uint64_t)res; 1189 } 1190 return (rv); 1191 } 1192 1193 /* 1194 * Set the value of a register after instruction simulation 1195 * by using the regs or window structure pointers. 1196 * Return 0 for succes -1 failure. 1197 * save the faulting address using badaddr pointer. 1198 * We have 64 bit globals and outs, and 32 or 64 bit ins and locals. 1199 * Don't truncate globals/outs for 32 bit programs, for v8+ support. 1200 */ 1201 int 1202 putreg(uint64_t *data, struct regs *rp, uint_t reg, caddr_t *badaddr) 1203 { 1204 uint64_t *rgs, *sp; 1205 int rv = 0; 1206 1207 rgs = (uint64_t *)&rp->r_ps; /* globals and outs */ 1208 sp = (uint64_t *)rp->r_sp; /* ins and locals */ 1209 if (reg == 0) { 1210 return (0); 1211 } else if (reg < 16) { 1212 rgs[reg] = *data; 1213 } else if (IS_V9STACK(sp)) { 1214 uint64_t *rw = (uint64_t *)((uintptr_t)sp + V9BIAS64); 1215 uint64_t *addr = (uint64_t *)&rw[reg - 16]; 1216 uint64_t res; 1217 1218 if (USERMODE(rp->r_tstate)) { 1219 struct machpcb *mpcb = lwptompcb(curthread->t_lwp); 1220 1221 res = *data; 1222 if (suword64_nowatch(addr, res) != 0) { 1223 *badaddr = (caddr_t)addr; 1224 rv = -1; 1225 } 1226 /* 1227 * We have changed a local or in register; 1228 * nuke the watchpoint return windows. 1229 */ 1230 mpcb->mpcb_rsp[0] = NULL; 1231 mpcb->mpcb_rsp[1] = NULL; 1232 } else { 1233 res = *data; 1234 *addr = res; 1235 } 1236 } else { 1237 caddr32_t sp32 = (caddr32_t)(uintptr_t)sp; 1238 uint32_t *rw = (uint32_t *)(uintptr_t)sp32; 1239 uint32_t *addr = (uint32_t *)&rw[reg - 16]; 1240 uint32_t res; 1241 1242 if (USERMODE(rp->r_tstate)) { 1243 struct machpcb *mpcb = lwptompcb(curthread->t_lwp); 1244 1245 res = (uint_t)*data; 1246 if (suword32_nowatch(addr, res) != 0) { 1247 *badaddr = (caddr_t)addr; 1248 rv = -1; 1249 } 1250 /* 1251 * We have changed a local or in register; 1252 * nuke the watchpoint return windows. 1253 */ 1254 mpcb->mpcb_rsp[0] = NULL; 1255 mpcb->mpcb_rsp[1] = NULL; 1256 1257 } else { 1258 res = (uint_t)*data; 1259 *addr = res; 1260 } 1261 } 1262 return (rv); 1263 } 1264 1265 /* 1266 * Calculate a memory reference address from instruction 1267 * operands, used to return the address of a fault, instead 1268 * of the instruction when an error occurs. This is code that is 1269 * common with most of the routines that simulate instructions. 1270 */ 1271 int 1272 calc_memaddr(struct regs *rp, caddr_t *badaddr) 1273 { 1274 uint_t inst; 1275 uint_t rd, rs1, rs2; 1276 int sz; 1277 int immflg; 1278 int floatflg; 1279 caddr_t addr; 1280 uint64_t val; 1281 1282 if (USERMODE(rp->r_tstate)) 1283 inst = fetch_user_instr((caddr_t)rp->r_pc); 1284 else 1285 inst = *(uint_t *)rp->r_pc; 1286 1287 rd = (inst >> 25) & 0x1f; 1288 rs1 = (inst >> 14) & 0x1f; 1289 rs2 = inst & 0x1f; 1290 floatflg = (inst >> 24) & 1; 1291 immflg = (inst >> 13) & 1; 1292 1293 if (floatflg) { 1294 switch ((inst >> 19) & 3) { /* map size bits to a number */ 1295 case 0: sz = 4; break; /* ldf/stf */ 1296 case 1: return (0); /* ld[x]fsr/st[x]fsr */ 1297 case 2: sz = 16; break; /* ldqf/stqf */ 1298 case 3: sz = 8; break; /* lddf/stdf */ 1299 } 1300 /* 1301 * Fix to access extra double register encoding plus 1302 * compensate to access the correct fpu_dreg. 1303 */ 1304 if (sz > 4) { 1305 if ((rd & 1) == 1) 1306 rd = (rd & 0x1e) | 0x20; 1307 rd = rd >> 1; 1308 } 1309 } else { 1310 switch ((inst >> 19) & 0xf) { /* map size bits to a number */ 1311 case 0: /* lduw */ 1312 case 4: /* stw */ 1313 case 8: /* ldsw */ 1314 case 0xf: /* swap */ 1315 sz = 4; break; 1316 case 1: /* ldub */ 1317 case 5: /* stb */ 1318 case 9: /* ldsb */ 1319 case 0xd: /* ldstub */ 1320 sz = 1; break; 1321 case 2: /* lduh */ 1322 case 6: /* sth */ 1323 case 0xa: /* ldsh */ 1324 sz = 2; break; 1325 case 3: /* ldd */ 1326 case 7: /* std */ 1327 case 0xb: /* ldx */ 1328 case 0xe: /* stx */ 1329 sz = 8; break; 1330 } 1331 } 1332 1333 if (USERMODE(rp->r_tstate)) 1334 (void) flush_user_windows_to_stack(NULL); 1335 else 1336 flush_windows(); 1337 1338 if (getreg(rp, rs1, &val, badaddr)) 1339 return (SIMU_FAULT); 1340 addr = (caddr_t)val; 1341 1342 /* check immediate bit and use immediate field or reg (rs2) */ 1343 if (immflg) { 1344 int imm; 1345 imm = inst & 0x1fff; /* mask out immediate field */ 1346 imm <<= 19; /* sign extend it */ 1347 imm >>= 19; 1348 addr += imm; /* compute address */ 1349 } else { 1350 if (getreg(rp, rs2, &val, badaddr)) 1351 return (SIMU_FAULT); 1352 addr += val; 1353 } 1354 1355 /* 1356 * If this is a 32-bit program, chop the address accordingly. The 1357 * intermediate uintptr_t casts prevent warnings under a certain 1358 * compiler, and the temporary 32 bit storage is intended to force 1359 * proper code generation and break up what would otherwise be a 1360 * quadruple cast. 1361 */ 1362 if (curproc->p_model == DATAMODEL_ILP32 && USERMODE(rp->r_tstate)) { 1363 caddr32_t addr32 = (caddr32_t)(uintptr_t)addr; 1364 addr = (caddr_t)(uintptr_t)addr32; 1365 } 1366 1367 *badaddr = addr; 1368 return ((uintptr_t)addr & (sz - 1) ? SIMU_UNALIGN : SIMU_SUCCESS); 1369 } 1370 1371 /* 1372 * Return the size of a load or store instruction (1, 2, 4, 8, 16, 64). 1373 * Also compute the precise address by instruction disassembly. 1374 * (v9 page faults only provide the page address via the hardware.) 1375 * Return 0 on failure (not a load or store instruction). 1376 */ 1377 int 1378 instr_size(struct regs *rp, caddr_t *addrp, enum seg_rw rdwr) 1379 { 1380 uint_t inst, op3, asi; 1381 uint_t rd, rs1, rs2; 1382 int sz = 0; 1383 int immflg; 1384 int floatflg; 1385 caddr_t addr; 1386 caddr_t badaddr; 1387 uint64_t val; 1388 1389 if (rdwr == S_EXEC) { 1390 *addrp = (caddr_t)rp->r_pc; 1391 return (4); 1392 } 1393 1394 /* 1395 * Fetch the instruction from user-level. 1396 * We would like to assert this: 1397 * ASSERT(USERMODE(rp->r_tstate)); 1398 * but we can't because we can reach this point from a 1399 * register window underflow/overflow and the v9 wbuf 1400 * traps call trap() with T_USER even though r_tstate 1401 * indicates a system trap, not a user trap. 1402 */ 1403 inst = fetch_user_instr((caddr_t)rp->r_pc); 1404 1405 op3 = (inst >> 19) & 0x3f; 1406 rd = (inst >> 25) & 0x1f; 1407 rs1 = (inst >> 14) & 0x1f; 1408 rs2 = inst & 0x1f; 1409 floatflg = (inst >> 24) & 1; 1410 immflg = (inst >> 13) & 1; 1411 1412 /* if not load or store do nothing. can't happen? */ 1413 if ((inst >> 30) != 3) 1414 return (0); 1415 1416 if (immflg) 1417 asi = (uint_t)((rp->r_tstate >> TSTATE_ASI_SHIFT) & 1418 TSTATE_ASI_MASK); 1419 else 1420 asi = (inst >> 5) & 0xff; 1421 1422 if (floatflg) { 1423 /* check for ld/st alternate and highest defined V9 asi */ 1424 if ((op3 & 0x30) == 0x30 && asi > ASI_SNFL) { 1425 sz = extended_asi_size(asi); 1426 } else { 1427 switch (op3 & 3) { 1428 case 0: 1429 sz = 4; /* ldf/stf/cas */ 1430 break; 1431 case 1: 1432 if (rd == 0) 1433 sz = 4; /* ldfsr/stfsr */ 1434 else 1435 sz = 8; /* ldxfsr/stxfsr */ 1436 break; 1437 case 2: 1438 if (op3 == 0x3e) 1439 sz = 8; /* casx */ 1440 else 1441 sz = 16; /* ldqf/stqf */ 1442 break; 1443 case 3: 1444 sz = 8; /* lddf/stdf */ 1445 break; 1446 } 1447 } 1448 } else { 1449 switch (op3 & 0xf) { /* map size bits to a number */ 1450 case 0: /* lduw */ 1451 case 4: /* stw */ 1452 case 8: /* ldsw */ 1453 case 0xf: /* swap */ 1454 sz = 4; break; 1455 case 1: /* ldub */ 1456 case 5: /* stb */ 1457 case 9: /* ldsb */ 1458 case 0xd: /* ldstub */ 1459 sz = 1; break; 1460 case 2: /* lduh */ 1461 case 6: /* sth */ 1462 case 0xa: /* ldsh */ 1463 sz = 2; break; 1464 case 3: /* ldd */ 1465 case 7: /* std */ 1466 case 0xb: /* ldx */ 1467 case 0xe: /* stx */ 1468 sz = 8; break; 1469 } 1470 } 1471 1472 if (sz == 0) /* can't happen? */ 1473 return (0); 1474 (void) flush_user_windows_to_stack(NULL); 1475 1476 if (getreg(rp, rs1, &val, &badaddr)) 1477 return (0); 1478 addr = (caddr_t)val; 1479 1480 /* cas/casx don't use rs2 / simm13 to compute the address */ 1481 if ((op3 & 0x3d) != 0x3c) { 1482 /* check immediate bit and use immediate field or reg (rs2) */ 1483 if (immflg) { 1484 int imm; 1485 imm = inst & 0x1fff; /* mask out immediate field */ 1486 imm <<= 19; /* sign extend it */ 1487 imm >>= 19; 1488 addr += imm; /* compute address */ 1489 } else { 1490 /* 1491 * asi's in the 0xCx range are partial store 1492 * instructions. For these, rs2 is a mask, not part of 1493 * the address. 1494 */ 1495 if (!(floatflg && (asi & 0xf0) == 0xc0)) { 1496 if (getreg(rp, rs2, &val, &badaddr)) 1497 return (0); 1498 addr += val; 1499 } 1500 } 1501 } 1502 1503 /* 1504 * If this is a 32-bit program, chop the address accordingly. The 1505 * intermediate uintptr_t casts prevent warnings under a certain 1506 * compiler, and the temporary 32 bit storage is intended to force 1507 * proper code generation and break up what would otherwise be a 1508 * quadruple cast. 1509 */ 1510 if (curproc->p_model == DATAMODEL_ILP32) { 1511 caddr32_t addr32 = (caddr32_t)(uintptr_t)addr; 1512 addr = (caddr_t)(uintptr_t)addr32; 1513 } 1514 1515 *addrp = addr; 1516 ASSERT(sz != 0); 1517 return (sz); 1518 } 1519 1520 /* 1521 * Fetch an instruction from user-level. 1522 * Deal with watchpoints, if they are in effect. 1523 */ 1524 int32_t 1525 fetch_user_instr(caddr_t vaddr) 1526 { 1527 proc_t *p = curproc; 1528 int32_t instr; 1529 1530 /* 1531 * If this is a 32-bit program, chop the address accordingly. The 1532 * intermediate uintptr_t casts prevent warnings under a certain 1533 * compiler, and the temporary 32 bit storage is intended to force 1534 * proper code generation and break up what would otherwise be a 1535 * quadruple cast. 1536 */ 1537 if (p->p_model == DATAMODEL_ILP32) { 1538 caddr32_t vaddr32 = (caddr32_t)(uintptr_t)vaddr; 1539 vaddr = (caddr_t)(uintptr_t)vaddr32; 1540 } 1541 1542 if (fuword32_nowatch(vaddr, (uint32_t *)&instr) == -1) 1543 instr = -1; 1544 1545 return (instr); 1546 } 1547