xref: /titanic_50/usr/src/uts/sun4/os/visinstr.c (revision e127a3e717f822eb855235fa3bd08235b2cf533d)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /* VIS floating point instruction simulator for Sparc FPU simulator. */
29 
30 #include <sys/types.h>
31 #include <sys/systm.h>
32 #include <sys/fpu/fpusystm.h>
33 #include <sys/fpu/fpu_simulator.h>
34 #include <sys/vis_simulator.h>
35 #include <sys/fpu/globals.h>
36 #include <sys/privregs.h>
37 #include <sys/sun4asi.h>
38 #include <sys/machasi.h>
39 #include <sys/debug.h>
40 #include <sys/cpu_module.h>
41 #include <sys/systm.h>
42 
43 #define	FPU_REG_FIELD uint32_reg	/* Coordinate with FPU_REGS_TYPE. */
44 #define	FPU_DREG_FIELD uint64_reg	/* Coordinate with FPU_DREGS_TYPE. */
45 #define	FPU_FSR_FIELD uint64_reg	/* Coordinate with V9_FPU_FSR_TYPE. */
46 
47 extern	uint_t	get_subcc_ccr(uint64_t, uint64_t);
48 
49 static enum ftt_type vis_array(fp_simd_type *, vis_inst_type, struct regs *,
50 				void *);
51 static enum ftt_type vis_alignaddr(fp_simd_type *, vis_inst_type,
52 				struct regs *, void *, kfpu_t *);
53 static enum ftt_type vis_edge(fp_simd_type *, vis_inst_type, struct regs *,
54 				void *);
55 static enum ftt_type vis_faligndata(fp_simd_type *, fp_inst_type,
56 				kfpu_t *);
57 static enum ftt_type vis_bmask(fp_simd_type *, vis_inst_type, struct regs *,
58 				void *, kfpu_t *);
59 static enum ftt_type vis_bshuffle(fp_simd_type *, fp_inst_type,
60 				kfpu_t *);
61 static enum ftt_type vis_siam(fp_simd_type *, vis_inst_type, kfpu_t *);
62 static enum ftt_type vis_fcmp(fp_simd_type *, vis_inst_type, struct regs *,
63 				void *);
64 static enum ftt_type vis_fmul(fp_simd_type *, vis_inst_type);
65 static enum ftt_type vis_fpixel(fp_simd_type *, vis_inst_type, kfpu_t *);
66 static enum ftt_type vis_fpaddsub(fp_simd_type *, vis_inst_type);
67 static enum ftt_type vis_pdist(fp_simd_type *, fp_inst_type);
68 static enum ftt_type vis_prtl_fst(fp_simd_type *, vis_inst_type, struct regs *,
69 				void *, uint_t);
70 static enum ftt_type vis_short_fls(fp_simd_type *, vis_inst_type,
71 				struct regs *, void *, uint_t);
72 static enum ftt_type vis_blk_fldst(fp_simd_type *, vis_inst_type,
73 				struct regs *, void *, uint_t);
74 
75 /*
76  * Simulator for VIS instructions with op3 == 0x36 that get fp_disabled
77  * traps.
78  */
79 enum ftt_type
80 vis_fpu_simulator(
81 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
82 	fp_inst_type	pinst,	/* FPU instruction to simulate. */
83 	struct regs	*pregs,	/* Pointer to PCB image of registers. */
84 	void		*prw,	/* Pointer to locals and ins. */
85 	kfpu_t		*fp)	/* Need to fp to access gsr reg */
86 {
87 	uint_t	nrs1, nrs2, nrd;	/* Register number fields. */
88 	uint_t	us1, us2, usr;
89 	uint64_t lus1, lus2, lusr;
90 	enum ftt_type ftt = ftt_none;
91 	union {
92 		vis_inst_type	inst;
93 		fp_inst_type	pinst;
94 	} f;
95 
96 	ASSERT(USERMODE(pregs->r_tstate));
97 	nrs1 = pinst.rs1;
98 	nrs2 = pinst.rs2;
99 	nrd = pinst.rd;
100 	f.pinst = pinst;
101 	if ((f.inst.opf & 1) == 0) {		/* double precision */
102 		if ((nrs1 & 1) == 1) 		/* fix register encoding */
103 			nrs1 = (nrs1 & 0x1e) | 0x20;
104 		if ((nrs2 & 1) == 1)
105 			nrs2 = (nrs2 & 0x1e) | 0x20;
106 		if ((nrd & 1) == 1)
107 			nrd = (nrd & 0x1e) | 0x20;
108 	}
109 
110 	switch (f.inst.opf) {
111 		/* these instr's do not use fp regs */
112 	case edge8:
113 	case edge8l:
114 	case edge8n:
115 	case edge8ln:
116 	case edge16:
117 	case edge16l:
118 	case edge16n:
119 	case edge16ln:
120 	case edge32:
121 	case edge32l:
122 	case edge32n:
123 	case edge32ln:
124 		ftt = vis_edge(pfpsd, f.inst, pregs, prw);
125 		break;
126 	case array8:
127 	case array16:
128 	case array32:
129 		ftt = vis_array(pfpsd, f.inst, pregs, prw);
130 		break;
131 	case alignaddr:
132 	case alignaddrl:
133 		ftt = vis_alignaddr(pfpsd, f.inst, pregs, prw, fp);
134 		break;
135 	case bmask:
136 		ftt = vis_bmask(pfpsd, f.inst, pregs, prw, fp);
137 		break;
138 	case fcmple16:
139 	case fcmpne16:
140 	case fcmpgt16:
141 	case fcmpeq16:
142 	case fcmple32:
143 	case fcmpne32:
144 	case fcmpgt32:
145 	case fcmpeq32:
146 		ftt = vis_fcmp(pfpsd, f.inst, pregs, prw);
147 		break;
148 	case fmul8x16:
149 	case fmul8x16au:
150 	case fmul8x16al:
151 	case fmul8sux16:
152 	case fmul8ulx16:
153 	case fmuld8sux16:
154 	case fmuld8ulx16:
155 		ftt = vis_fmul(pfpsd, f.inst);
156 		break;
157 	case fpack16:
158 	case fpack32:
159 	case fpackfix:
160 	case fexpand:
161 	case fpmerge:
162 		ftt = vis_fpixel(pfpsd, f.inst, fp);
163 		break;
164 	case pdist:
165 		ftt = vis_pdist(pfpsd, pinst);
166 		break;
167 	case faligndata:
168 		ftt = vis_faligndata(pfpsd, pinst, fp);
169 		break;
170 	case bshuffle:
171 		ftt = vis_bshuffle(pfpsd, pinst, fp);
172 		break;
173 	case fpadd16:
174 	case fpadd16s:
175 	case fpadd32:
176 	case fpadd32s:
177 	case fpsub16:
178 	case fpsub16s:
179 	case fpsub32:
180 	case fpsub32s:
181 		ftt = vis_fpaddsub(pfpsd, f.inst);
182 		break;
183 	case fzero:
184 		lusr = 0;
185 		_fp_pack_extword(pfpsd, &lusr, nrd);
186 		break;
187 	case fzeros:
188 		usr = 0;
189 		_fp_pack_word(pfpsd, &usr, nrd);
190 		break;
191 	case fnor:
192 		_fp_unpack_extword(pfpsd, &lus1, nrs1);
193 		_fp_unpack_extword(pfpsd, &lus2, nrs2);
194 		lusr = ~(lus1 | lus2);
195 		_fp_pack_extword(pfpsd, &lusr, nrd);
196 		break;
197 	case fnors:
198 		_fp_unpack_word(pfpsd, &us1, nrs1);
199 		_fp_unpack_word(pfpsd, &us2, nrs2);
200 		usr = ~(us1 | us2);
201 		_fp_pack_word(pfpsd, &usr, nrd);
202 		break;
203 	case fandnot2:
204 		_fp_unpack_extword(pfpsd, &lus1, nrs1);
205 		_fp_unpack_extword(pfpsd, &lus2, nrs2);
206 		lusr = (lus1 & ~lus2);
207 		_fp_pack_extword(pfpsd, &lusr, nrd);
208 		break;
209 	case fandnot2s:
210 		_fp_unpack_word(pfpsd, &us1, nrs1);
211 		_fp_unpack_word(pfpsd, &us2, nrs2);
212 		usr = (us1 & ~us2);
213 		_fp_pack_word(pfpsd, &usr, nrd);
214 		break;
215 	case fnot2:
216 		_fp_unpack_extword(pfpsd, &lus2, nrs2);
217 		lusr = ~lus2;
218 		_fp_pack_extword(pfpsd, &lusr, nrd);
219 		break;
220 	case fnot2s:
221 		_fp_unpack_word(pfpsd, &us2, nrs2);
222 		usr = ~us2;
223 		_fp_pack_word(pfpsd, &usr, nrd);
224 		break;
225 	case fandnot1:
226 		_fp_unpack_extword(pfpsd, &lus1, nrs1);
227 		_fp_unpack_extword(pfpsd, &lus2, nrs2);
228 		lusr = (~lus1 & lus2);
229 		_fp_pack_extword(pfpsd, &lusr, nrd);
230 		break;
231 	case fandnot1s:
232 		_fp_unpack_word(pfpsd, &us1, nrs1);
233 		_fp_unpack_word(pfpsd, &us2, nrs2);
234 		usr = (~us1 & us2);
235 		_fp_pack_word(pfpsd, &usr, nrd);
236 		break;
237 	case fnot1:
238 		_fp_unpack_extword(pfpsd, &lus1, nrs1);
239 		lusr = ~lus1;
240 		_fp_pack_extword(pfpsd, &lusr, nrd);
241 		break;
242 	case fnot1s:
243 		_fp_unpack_word(pfpsd, &us1, nrs1);
244 		usr = ~us1;
245 		_fp_pack_word(pfpsd, &usr, nrd);
246 		break;
247 	case fxor:
248 		_fp_unpack_extword(pfpsd, &lus1, nrs1);
249 		_fp_unpack_extword(pfpsd, &lus2, nrs2);
250 		lusr = (lus1 ^ lus2);
251 		_fp_pack_extword(pfpsd, &lusr, nrd);
252 		break;
253 	case fxors:
254 		_fp_unpack_word(pfpsd, &us1, nrs1);
255 		_fp_unpack_word(pfpsd, &us2, nrs2);
256 		usr = (us1 ^ us2);
257 		_fp_pack_word(pfpsd, &usr, nrd);
258 		break;
259 	case fnand:
260 		_fp_unpack_extword(pfpsd, &lus1, nrs1);
261 		_fp_unpack_extword(pfpsd, &lus2, nrs2);
262 		lusr = ~(lus1 & lus2);
263 		_fp_pack_extword(pfpsd, &lusr, nrd);
264 		break;
265 	case fnands:
266 		_fp_unpack_word(pfpsd, &us1, nrs1);
267 		_fp_unpack_word(pfpsd, &us2, nrs2);
268 		usr = ~(us1 & us2);
269 		_fp_pack_word(pfpsd, &usr, nrd);
270 		break;
271 	case fand:
272 		_fp_unpack_extword(pfpsd, &lus1, nrs1);
273 		_fp_unpack_extword(pfpsd, &lus2, nrs2);
274 		lusr = (lus1 & lus2);
275 		_fp_pack_extword(pfpsd, &lusr, nrd);
276 		break;
277 	case fands:
278 		_fp_unpack_word(pfpsd, &us1, nrs1);
279 		_fp_unpack_word(pfpsd, &us2, nrs2);
280 		usr = (us1 & us2);
281 		_fp_pack_word(pfpsd, &usr, nrd);
282 		break;
283 	case fxnor:
284 		_fp_unpack_extword(pfpsd, &lus1, nrs1);
285 		_fp_unpack_extword(pfpsd, &lus2, nrs2);
286 		lusr = ~(lus1 ^ lus2);
287 		_fp_pack_extword(pfpsd, &lusr, nrd);
288 		break;
289 	case fxnors:
290 		_fp_unpack_word(pfpsd, &us1, nrs1);
291 		_fp_unpack_word(pfpsd, &us2, nrs2);
292 		usr = ~(us1 ^ us2);
293 		_fp_pack_word(pfpsd, &usr, nrd);
294 		break;
295 	case fsrc1:
296 		_fp_unpack_extword(pfpsd, &lusr, nrs1);
297 		_fp_pack_extword(pfpsd, &lusr, nrd);
298 		break;
299 	case fsrc1s:
300 		_fp_unpack_word(pfpsd, &usr, nrs1);
301 		_fp_pack_word(pfpsd, &usr, nrd);
302 		break;
303 	case fornot2:
304 		_fp_unpack_extword(pfpsd, &lus1, nrs1);
305 		_fp_unpack_extword(pfpsd, &lus2, nrs2);
306 		lusr = (lus1 | ~lus2);
307 		_fp_pack_extword(pfpsd, &lusr, nrd);
308 		break;
309 	case fornot2s:
310 		_fp_unpack_word(pfpsd, &us1, nrs1);
311 		_fp_unpack_word(pfpsd, &us2, nrs2);
312 		usr = (us1 | ~us2);
313 		_fp_pack_word(pfpsd, &usr, nrd);
314 		break;
315 	case fsrc2:
316 		_fp_unpack_extword(pfpsd, &lusr, nrs2);
317 		_fp_pack_extword(pfpsd, &lusr, nrd);
318 		break;
319 	case fsrc2s:
320 		_fp_unpack_word(pfpsd, &usr, nrs2);
321 		_fp_pack_word(pfpsd, &usr, nrd);
322 		break;
323 	case fornot1:
324 		_fp_unpack_extword(pfpsd, &lus1, nrs1);
325 		_fp_unpack_extword(pfpsd, &lus2, nrs2);
326 		lusr = (~lus1 | lus2);
327 		_fp_pack_extword(pfpsd, &lusr, nrd);
328 		break;
329 	case fornot1s:
330 		_fp_unpack_word(pfpsd, &us1, nrs1);
331 		_fp_unpack_word(pfpsd, &us2, nrs2);
332 		usr = (~us1 | us2);
333 		_fp_pack_word(pfpsd, &usr, nrd);
334 		break;
335 	case for_op:
336 		_fp_unpack_extword(pfpsd, &lus1, nrs1);
337 		_fp_unpack_extword(pfpsd, &lus2, nrs2);
338 		lusr = (lus1 | lus2);
339 		_fp_pack_extword(pfpsd, &lusr, nrd);
340 		break;
341 	case fors_op:
342 		_fp_unpack_word(pfpsd, &us1, nrs1);
343 		_fp_unpack_word(pfpsd, &us2, nrs2);
344 		usr = (us1 | us2);
345 		_fp_pack_word(pfpsd, &usr, nrd);
346 		break;
347 	case fone:
348 		lusr = 0xffffffffffffffff;
349 		_fp_pack_extword(pfpsd, &lusr, nrd);
350 		break;
351 	case fones:
352 		usr = 0xffffffffUL;
353 		_fp_pack_word(pfpsd, &usr, nrd);
354 		break;
355 	case siam:
356 		ftt = vis_siam(pfpsd, f.inst, fp);
357 		break;
358 	default:
359 		return (ftt_unimplemented);
360 	}
361 
362 	pregs->r_pc = pregs->r_npc;	/* Do not retry emulated instruction. */
363 	pregs->r_npc += 4;
364 	return (ftt);
365 }
366 
367 /*
368  * Simulator for edge instructions
369  */
370 static enum ftt_type
371 vis_edge(
372 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
373 	vis_inst_type	inst,	/* FPU instruction to simulate. */
374 	struct regs	*pregs,	/* Pointer to PCB image of registers. */
375 	void		*prw)	/* Pointer to locals and ins. */
376 
377 {
378 	uint_t	nrs1, nrs2, nrd;	/* Register number fields. */
379 	enum ftt_type ftt;
380 	uint64_t addrl, addrr, mask;
381 	uint64_t ah61l, ah61r;		/* Higher 61 bits of address */
382 	int al3l, al3r;			/* Lower 3 bits of address */
383 	int am32;			/* Whether PSTATE.AM == 1 */
384 	uint_t	ccr;
385 
386 	nrs1 = inst.rs1;
387 	nrs2 = inst.rs2;
388 	nrd = inst.rd;
389 
390 	ftt = read_iureg(pfpsd, nrs1, pregs, prw, &addrl);
391 	if (ftt != ftt_none)
392 		return (ftt);
393 	ftt = read_iureg(pfpsd, nrs2, pregs, prw, &addrr);
394 	if (ftt != ftt_none)
395 		return (ftt);
396 
397 	/* Get PSTATE.AM to determine 32-bit vs 64-bit addressing */
398 	am32 =  pregs->r_tstate & TSTATE_AM;
399 	if (am32 == 1) {
400 		ah61l = addrl & 0xffffffff8;
401 		ah61r = addrr & 0xffffffff8;
402 	} else {
403 		ah61l = addrl & ~0x7;
404 		ah61r = addrr & ~0x7;
405 	}
406 
407 
408 	switch (inst.opf) {
409 	case edge8:
410 	case edge8n:
411 	case edge8l:
412 	case edge8ln:
413 		al3l = addrl & 0x7;
414 		switch (inst.opf) {
415 		case edge8:
416 		case edge8n:
417 			if (inst.opf == edge8) {
418 				VISINFO_KSTAT(vis_edge8);
419 			} else {
420 				VISINFO_KSTAT(vis_edge8n);
421 			}
422 			mask = 0xff >> al3l;
423 			if (ah61l == ah61r) {
424 				al3r = addrr & 0x7;
425 				mask &= (0xff << (0x7 - al3r)) & 0xff;
426 			}
427 			break;
428 		case edge8l:
429 		case edge8ln:
430 			if (inst.opf == edge8l) {
431 				VISINFO_KSTAT(vis_edge8l);
432 			} else {
433 				VISINFO_KSTAT(vis_edge8ln);
434 			}
435 			mask = (0xff << al3l) & 0xff;
436 			if (ah61l == ah61r) {
437 				al3r = addrr & 0x7;
438 				mask &= 0xff >> (0x7 - al3r);
439 			}
440 			break;
441 		}
442 		break;
443 	case edge16:
444 	case edge16l:
445 	case edge16n:
446 	case edge16ln:
447 		al3l = addrl & 0x6;
448 		al3l >>= 0x1;
449 		switch (inst.opf) {
450 		case edge16:
451 		case edge16n:
452 			if (inst.opf == edge16) {
453 				VISINFO_KSTAT(vis_edge16);
454 
455 			} else {
456 				VISINFO_KSTAT(vis_edge16n);
457 			}
458 			mask = 0xf >> al3l;
459 			if (ah61l == ah61r) {
460 				al3r = addrr & 0x6;
461 				al3r >>= 0x1;
462 				mask &= (0xf << (0x3 - al3r)) & 0xf;
463 			}
464 			break;
465 		case edge16l:
466 		case edge16ln:
467 			if (inst.opf == edge16l) {
468 				VISINFO_KSTAT(vis_edge16l);
469 
470 			} else {
471 				VISINFO_KSTAT(vis_edge16ln);
472 			}
473 
474 			mask = (0xf << al3l) & 0xf;
475 			if (ah61l == ah61r) {
476 				al3r = addrr & 0x6;
477 				al3r >>= 0x1;
478 				mask &= 0xf >> (0x3 - al3r);
479 			}
480 			break;
481 		}
482 		break;
483 	case edge32:
484 	case edge32l:
485 	case edge32n:
486 	case edge32ln:
487 		al3l = addrl & 0x4;
488 		al3l >>= 0x2;
489 
490 		switch (inst.opf) {
491 		case edge32:
492 		case edge32n:
493 			if (inst.opf == edge32) {
494 				VISINFO_KSTAT(vis_edge32);
495 
496 			} else {
497 				VISINFO_KSTAT(vis_edge32n);
498 			}
499 			mask = 0x3 >> al3l;
500 			if (ah61l == ah61r) {
501 				al3r = addrr & 0x4;
502 				al3r >>= 0x2;
503 				mask &= (0x3 << (0x1 - al3r)) & 0x3;
504 			}
505 			break;
506 		case edge32l:
507 		case edge32ln:
508 			if (inst.opf == edge32l) {
509 				VISINFO_KSTAT(vis_edge32l);
510 
511 			} else {
512 				VISINFO_KSTAT(vis_edge32ln);
513 			}
514 			mask = (0x3 << al3l) & 0x3;
515 			if (ah61l == ah61r) {
516 				al3r = addrr & 0x4;
517 				al3r >>= 0x2;
518 				mask &= 0x3 >> (0x1 - al3r);
519 			}
520 			break;
521 		}
522 		break;
523 	}
524 
525 	ftt = write_iureg(pfpsd, nrd, pregs, prw, &mask);
526 
527 	switch (inst.opf) {
528 	case edge8:
529 	case edge8l:
530 	case edge16:
531 	case edge16l:
532 	case edge32:
533 	case edge32l:
534 
535 		/* Update flags per SUBcc outcome */
536 		pregs->r_tstate &= ~((uint64_t)TSTATE_CCR_MASK
537 					<< TSTATE_CCR_SHIFT);
538 		ccr = get_subcc_ccr(addrl, addrr);  /* get subcc cond. codes */
539 		pregs->r_tstate |= ((uint64_t)ccr << TSTATE_CCR_SHIFT);
540 
541 		break;
542 	}
543 	return (ftt);
544 }
545 
546 /*
547  * Simulator for three dimentional array addressing instructions.
548  */
549 static enum ftt_type
550 vis_array(
551 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
552 	vis_inst_type	inst,	/* FPU instruction to simulate. */
553 	struct regs	*pregs,	/* Pointer to PCB image of registers. */
554 	void		*prw)	/* Pointer to locals and ins. */
555 
556 {
557 	uint_t	nrs1, nrs2, nrd;	/* Register number fields. */
558 	enum ftt_type ftt;
559 	uint64_t laddr, bsize, baddr;
560 	uint64_t nbit;
561 	int oy, oz;
562 
563 	nrs1 = inst.rs1;
564 	nrs2 = inst.rs2;
565 	nrd = inst.rd;
566 
567 	ftt = read_iureg(pfpsd, nrs1, pregs, prw, &laddr);
568 	if (ftt != ftt_none)
569 		return (ftt);
570 	ftt = read_iureg(pfpsd, nrs2, pregs, prw, &bsize);
571 	if (ftt != ftt_none)
572 		return (ftt);
573 
574 	if (bsize > 5) {
575 		bsize = 5;
576 	}
577 	nbit = (1 << bsize) - 1;	/* Number of bits for XY<6+n-1:6> */
578 	oy = 17 + bsize;		/* Offset of Y<6+n-1:6> */
579 	oz = 17 + 2 * bsize;		/* Offset of Z<8:5> */
580 
581 	baddr = 0;
582 	baddr |= (laddr >> (11 -  0)) & (0x03 <<  0);	/* X_integer<1:0> */
583 	baddr |= (laddr >> (33 -  2)) & (0x03 <<  2);	/* Y_integer<1:0> */
584 	baddr |= (laddr >> (55 -  4)) & (0x01 <<  4);	/* Z_integer<0>   */
585 	baddr |= (laddr >> (13 -  5)) & (0x0f <<  5);	/* X_integer<5:2> */
586 	baddr |= (laddr >> (35 -  9)) & (0x0f <<  9);	/* Y_integer<5:2> */
587 	baddr |= (laddr >> (56 - 13)) & (0x0f << 13);	/* Z_integer<4:1> */
588 	baddr |= (laddr >> (17 - 17)) & (nbit << 17);	/* X_integer<6+n-1:6> */
589 	baddr |= (laddr >> (39 - oy)) & (nbit << oy);	/* Y_integer<6+n-1:6> */
590 	baddr |= (laddr >> (60 - oz)) & (0x0f << oz);	/* Z_integer<8:5> */
591 
592 	switch (inst.opf) {
593 	case array8:
594 		VISINFO_KSTAT(vis_array8);
595 		break;
596 	case array16:
597 		VISINFO_KSTAT(vis_array16);
598 		baddr <<= 1;
599 		break;
600 	case array32:
601 		VISINFO_KSTAT(vis_array32);
602 		baddr <<= 2;
603 		break;
604 	}
605 
606 	ftt = write_iureg(pfpsd, nrd, pregs, prw, &baddr);
607 
608 	return (ftt);
609 }
610 
611 /*
612  * Simulator for alignaddr and alignaddrl instructions.
613  */
614 static enum ftt_type
615 vis_alignaddr(
616 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
617 	vis_inst_type	inst,	/* FPU instruction to simulate. */
618 	struct regs	*pregs,	/* Pointer to PCB image of registers. */
619 	void		*prw,	/* Pointer to locals and ins. */
620 	kfpu_t		*fp)	/* Need to fp to access gsr reg */
621 {
622 	uint_t	nrs1, nrs2, nrd;	/* Register number fields. */
623 	enum ftt_type ftt;
624 	uint64_t ea, tea, g, r;
625 	short s;
626 
627 	nrs1 = inst.rs1;
628 	nrs2 = inst.rs2;
629 	nrd = inst.rd;
630 
631 	ftt = read_iureg(pfpsd, nrs1, pregs, prw, &ea);
632 	if (ftt != ftt_none)
633 		return (ftt);
634 	ftt = read_iureg(pfpsd, nrs2, pregs, prw, &tea);
635 	if (ftt != ftt_none)
636 		return (ftt);
637 	ea += tea;
638 	r = ea & ~0x7;	/* zero least 3 significant bits */
639 	ftt = write_iureg(pfpsd, nrd, pregs, prw, &r);
640 
641 
642 	g = pfpsd->fp_current_read_gsr(fp);
643 	g &= ~(GSR_ALIGN_MASK);		/* zero the align offset */
644 	r = ea & 0x7;
645 	if (inst.opf == alignaddrl) {
646 		s = (short)(~r);	/* 2's complement for alignaddrl */
647 		if (s < 0)
648 			r = (uint64_t)((s + 1) & 0x7);
649 		else
650 			r = (uint64_t)(s & 0x7);
651 	}
652 	g |= (r << GSR_ALIGN_SHIFT) & GSR_ALIGN_MASK;
653 	pfpsd->fp_current_write_gsr(g, fp);
654 
655 	return (ftt);
656 }
657 
658 /*
659  * Simulator for bmask instruction.
660  */
661 static enum ftt_type
662 vis_bmask(
663 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
664 	vis_inst_type	inst,	/* FPU instruction to simulate. */
665 	struct regs	*pregs,	/* Pointer to PCB image of registers. */
666 	void		*prw,	/* Pointer to locals and ins. */
667 	kfpu_t		*fp)	/* Need to fp to access gsr reg */
668 {
669 	uint_t	nrs1, nrs2, nrd;	/* Register number fields. */
670 	enum ftt_type ftt;
671 	uint64_t ea, tea, g;
672 
673 	VISINFO_KSTAT(vis_bmask);
674 	nrs1 = inst.rs1;
675 	nrs2 = inst.rs2;
676 	nrd = inst.rd;
677 
678 	ftt = read_iureg(pfpsd, nrs1, pregs, prw, &ea);
679 	if (ftt != ftt_none)
680 		return (ftt);
681 	ftt = read_iureg(pfpsd, nrs2, pregs, prw, &tea);
682 	if (ftt != ftt_none)
683 		return (ftt);
684 	ea += tea;
685 	ftt = write_iureg(pfpsd, nrd, pregs, prw, &ea);
686 
687 	g = pfpsd->fp_current_read_gsr(fp);
688 	g &= ~(GSR_MASK_MASK);		/* zero the mask offset */
689 
690 	/* Put the least significant 32 bits of ea in GSR.mask */
691 	g |= (ea << GSR_MASK_SHIFT) & GSR_MASK_MASK;
692 	pfpsd->fp_current_write_gsr(g, fp);
693 	return (ftt);
694 }
695 
696 /*
697  * Simulator for fp[add|sub]* instruction.
698  */
699 static enum ftt_type
700 vis_fpaddsub(
701 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
702 	vis_inst_type	inst)	/* FPU instruction to simulate. */
703 {
704 	uint_t	nrs1, nrs2, nrd;	/* Register number fields. */
705 	union {
706 		uint64_t	ll;
707 		uint32_t	i[2];
708 		uint16_t	s[4];
709 	} lrs1, lrs2, lrd;
710 	union {
711 		uint32_t	i;
712 		uint16_t	s[2];
713 	} krs1, krs2, krd;
714 	int i;
715 
716 	nrs1 = inst.rs1;
717 	nrs2 = inst.rs2;
718 	nrd = inst.rd;
719 	if ((inst.opf & 1) == 0) {	/* double precision */
720 		if ((nrs1 & 1) == 1) 	/* fix register encoding */
721 			nrs1 = (nrs1 & 0x1e) | 0x20;
722 		if ((nrs2 & 1) == 1)
723 			nrs2 = (nrs2 & 0x1e) | 0x20;
724 		if ((nrd & 1) == 1)
725 			nrd = (nrd & 0x1e) | 0x20;
726 	}
727 	switch (inst.opf) {
728 	case fpadd16:
729 		_fp_unpack_extword(pfpsd, &lrs1.ll, nrs1);
730 		_fp_unpack_extword(pfpsd, &lrs2.ll, nrs2);
731 		for (i = 0; i <= 3; i++) {
732 			lrd.s[i] = lrs1.s[i] + lrs2.s[i];
733 		}
734 		_fp_pack_extword(pfpsd, &lrd.ll, nrd);
735 		break;
736 	case fpadd16s:
737 		_fp_unpack_word(pfpsd, &krs1.i, nrs1);
738 		_fp_unpack_word(pfpsd, &krs2.i, nrs2);
739 		for (i = 0; i <= 1; i++) {
740 			krd.s[i] = krs1.s[i] + krs2.s[i];
741 		}
742 		_fp_pack_word(pfpsd, &krd.i, nrd);
743 		break;
744 	case fpadd32:
745 		_fp_unpack_extword(pfpsd, &lrs1.ll, nrs1);
746 		_fp_unpack_extword(pfpsd, &lrs2.ll, nrs2);
747 		for (i = 0; i <= 1; i++) {
748 			lrd.i[i] = lrs1.i[i] + lrs2.i[i];
749 		}
750 		_fp_pack_extword(pfpsd, &lrd.ll, nrd);
751 		break;
752 	case fpadd32s:
753 		_fp_unpack_word(pfpsd, &krs1.i, nrs1);
754 		_fp_unpack_word(pfpsd, &krs2.i, nrs2);
755 		krd.i = krs1.i + krs2.i;
756 		_fp_pack_word(pfpsd, &krd.i, nrd);
757 		break;
758 	case fpsub16:
759 		_fp_unpack_extword(pfpsd, &lrs1.ll, nrs1);
760 		_fp_unpack_extword(pfpsd, &lrs2.ll, nrs2);
761 		for (i = 0; i <= 3; i++) {
762 			lrd.s[i] = lrs1.s[i] - lrs2.s[i];
763 		}
764 		_fp_pack_extword(pfpsd, &lrd.ll, nrd);
765 		break;
766 	case fpsub16s:
767 		_fp_unpack_word(pfpsd, &krs1.i, nrs1);
768 		_fp_unpack_word(pfpsd, &krs2.i, nrs2);
769 		for (i = 0; i <= 1; i++) {
770 			krd.s[i] = krs1.s[i] - krs2.s[i];
771 		}
772 		_fp_pack_word(pfpsd, &krd.i, nrd);
773 		break;
774 	case fpsub32:
775 		_fp_unpack_extword(pfpsd, &lrs1.ll, nrs1);
776 		_fp_unpack_extword(pfpsd, &lrs2.ll, nrs2);
777 		for (i = 0; i <= 1; i++) {
778 			lrd.i[i] = lrs1.i[i] - lrs2.i[i];
779 		}
780 		_fp_pack_extword(pfpsd, &lrd.ll, nrd);
781 		break;
782 	case fpsub32s:
783 		_fp_unpack_word(pfpsd, &krs1.i, nrs1);
784 		_fp_unpack_word(pfpsd, &krs2.i, nrs2);
785 		krd.i = krs1.i - krs2.i;
786 		_fp_pack_word(pfpsd, &krd.i, nrd);
787 		break;
788 	}
789 	return (ftt_none);
790 }
791 
792 /*
793  * Simulator for fcmp* instruction.
794  */
795 static enum ftt_type
796 vis_fcmp(
797 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
798 	vis_inst_type	inst,	/* FPU instruction to simulate. */
799 	struct regs	*pregs,	/* Pointer to PCB image of registers. */
800 	void		*prw)	/* Pointer to locals and ins. */
801 {
802 	uint_t	nrs1, nrs2, nrd;	/* Register number fields. */
803 	union {
804 		uint64_t	ll;
805 		uint32_t	i[2];
806 		uint16_t	s[4];
807 	} krs1, krs2, krd;
808 	enum ftt_type ftt;
809 	short sr1, sr2;
810 	int i, ir1, ir2;
811 
812 	nrs1 = inst.rs1;
813 	nrs2 = inst.rs2;
814 	nrd = inst.rd;
815 	krd.ll = 0;
816 	if ((nrs1 & 1) == 1) 	/* fix register encoding */
817 		nrs1 = (nrs1 & 0x1e) | 0x20;
818 	if ((nrs2 & 1) == 1)
819 		nrs2 = (nrs2 & 0x1e) | 0x20;
820 
821 	_fp_unpack_extword(pfpsd, &krs1.ll, nrs1);
822 	_fp_unpack_extword(pfpsd, &krs2.ll, nrs2);
823 	switch (inst.opf) {
824 	case fcmple16:
825 		VISINFO_KSTAT(vis_fcmple16);
826 		for (i = 0; i <= 3; i++) {
827 			sr1 = (short)krs1.s[i];
828 			sr2 = (short)krs2.s[i];
829 			if (sr1 <= sr2)
830 				krd.ll += (0x8 >> i);
831 		}
832 		break;
833 	case fcmpne16:
834 		VISINFO_KSTAT(vis_fcmpne16);
835 		for (i = 0; i <= 3; i++) {
836 			sr1 = (short)krs1.s[i];
837 			sr2 = (short)krs2.s[i];
838 			if (sr1 != sr2)
839 				krd.ll += (0x8 >> i);
840 		}
841 		break;
842 	case fcmpgt16:
843 		VISINFO_KSTAT(vis_fcmpgt16);
844 		for (i = 0; i <= 3; i++) {
845 			sr1 = (short)krs1.s[i];
846 			sr2 = (short)krs2.s[i];
847 			if (sr1 > sr2)
848 				krd.ll += (0x8 >> i);
849 		}
850 		break;
851 	case fcmpeq16:
852 		VISINFO_KSTAT(vis_fcmpeq16);
853 		for (i = 0; i <= 3; i++) {
854 			sr1 = (short)krs1.s[i];
855 			sr2 = (short)krs2.s[i];
856 			if (sr1 == sr2)
857 				krd.ll += (0x8 >> i);
858 		}
859 		break;
860 	case fcmple32:
861 		VISINFO_KSTAT(vis_fcmple32);
862 		for (i = 0; i <= 1; i++) {
863 			ir1 = (int)krs1.i[i];
864 			ir2 = (int)krs2.i[i];
865 			if (ir1 <= ir2)
866 				krd.ll += (0x2 >> i);
867 		}
868 		break;
869 	case fcmpne32:
870 		VISINFO_KSTAT(vis_fcmpne32);
871 		for (i = 0; i <= 1; i++) {
872 			ir1 = (int)krs1.i[i];
873 			ir2 = (int)krs2.i[i];
874 			if (ir1 != ir2)
875 				krd.ll += (0x2 >> i);
876 		}
877 		break;
878 	case fcmpgt32:
879 		VISINFO_KSTAT(vis_fcmpgt32);
880 		for (i = 0; i <= 1; i++) {
881 			ir1 = (int)krs1.i[i];
882 			ir2 = (int)krs2.i[i];
883 			if (ir1 > ir2)
884 				krd.ll += (0x2 >> i);
885 		}
886 		break;
887 	case fcmpeq32:
888 		VISINFO_KSTAT(vis_fcmpeq32);
889 		for (i = 0; i <= 1; i++) {
890 			ir1 = (int)krs1.i[i];
891 			ir2 = (int)krs2.i[i];
892 			if (ir1 == ir2)
893 				krd.ll += (0x2 >> i);
894 		}
895 		break;
896 	}
897 	ftt = write_iureg(pfpsd, nrd, pregs, prw, &krd.ll);
898 	return (ftt);
899 }
900 
901 /*
902  * Simulator for fmul* instruction.
903  */
904 static enum ftt_type
905 vis_fmul(
906 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
907 	vis_inst_type	inst)	/* FPU instruction to simulate. */
908 {
909 	uint_t	nrs1, nrs2, nrd;	/* Register number fields. */
910 	union {
911 		uint64_t	ll;
912 		uint32_t	i[2];
913 		uint16_t	s[4];
914 		uint8_t		c[8];
915 	} lrs1, lrs2, lrd;
916 	union {
917 		uint32_t	i;
918 		uint16_t	s[2];
919 		uint8_t		c[4];
920 	} krs1, krs2, kres;
921 	short s1, s2, sres;
922 	ushort_t us1;
923 	char c1;
924 	int i;
925 
926 	nrs1 = inst.rs1;
927 	nrs2 = inst.rs2;
928 	nrd = inst.rd;
929 	if ((inst.opf & 1) == 0) {	/* double precision */
930 		if ((nrd & 1) == 1) 	/* fix register encoding */
931 			nrd = (nrd & 0x1e) | 0x20;
932 	}
933 
934 	switch (inst.opf) {
935 	case fmul8x16:
936 		VISINFO_KSTAT(vis_fmul8x16);
937 		_fp_unpack_word(pfpsd, &krs1.i, nrs1);
938 		if ((nrs2 & 1) == 1)
939 			nrs2 = (nrs2 & 0x1e) | 0x20;
940 		_fp_unpack_extword(pfpsd, &lrs2.ll, nrs2);
941 		for (i = 0; i <= 3; i++) {
942 			us1 = (ushort_t)krs1.c[i];
943 			s2 = (short)lrs2.s[i];
944 			kres.i = us1 * s2;
945 			sres = (short)((kres.c[1] << 8) | kres.c[2]);
946 			if (kres.c[3] >= 0x80)
947 				sres++;
948 			lrd.s[i] = sres;
949 		}
950 		_fp_pack_extword(pfpsd, &lrd.ll, nrd);
951 		break;
952 	case fmul8x16au:
953 		VISINFO_KSTAT(vis_fmul8x16au);
954 		_fp_unpack_word(pfpsd, &krs1.i, nrs1);
955 		_fp_unpack_word(pfpsd, &krs2.i, nrs2);
956 		for (i = 0; i <= 3; i++) {
957 			us1 = (ushort_t)krs1.c[i];
958 			s2 = (short)krs2.s[0];
959 			kres.i = us1 * s2;
960 			sres = (short)((kres.c[1] << 8) | kres.c[2]);
961 			if (kres.c[3] >= 0x80)
962 				sres++;
963 			lrd.s[i] = sres;
964 		}
965 		_fp_pack_extword(pfpsd, &lrd.ll, nrd);
966 		break;
967 	case fmul8x16al:
968 		VISINFO_KSTAT(vis_fmul8x16al);
969 		_fp_unpack_word(pfpsd, &krs1.i, nrs1);
970 		_fp_unpack_word(pfpsd, &krs2.i, nrs2);
971 		for (i = 0; i <= 3; i++) {
972 			us1 = (ushort_t)krs1.c[i];
973 			s2 = (short)krs2.s[1];
974 			kres.i = us1 * s2;
975 			sres = (short)((kres.c[1] << 8) | kres.c[2]);
976 			if (kres.c[3] >= 0x80)
977 				sres++;
978 			lrd.s[i] = sres;
979 		}
980 		_fp_pack_extword(pfpsd, &lrd.ll, nrd);
981 		break;
982 	case fmul8sux16:
983 		VISINFO_KSTAT(vis_fmul8sux16);
984 		if ((nrs1 & 1) == 1) 	/* fix register encoding */
985 			nrs1 = (nrs1 & 0x1e) | 0x20;
986 		_fp_unpack_extword(pfpsd, &lrs1.ll, nrs1);
987 		if ((nrs2 & 1) == 1)
988 			nrs2 = (nrs2 & 0x1e) | 0x20;
989 		_fp_unpack_extword(pfpsd, &lrs2.ll, nrs2);
990 		for (i = 0; i <= 3; i++) {
991 			c1 = lrs1.c[(i*2)];
992 			s1 = (short)c1;		/* keeps the sign alive */
993 			s2 = (short)lrs2.s[i];
994 			kres.i = s1 * s2;
995 			sres = (short)((kres.c[1] << 8) | kres.c[2]);
996 			if (kres.c[3] >= 0x80)
997 				sres++;
998 			if (sres < 0)
999 				lrd.s[i] = (sres & 0xFFFF);
1000 			else
1001 				lrd.s[i] = sres;
1002 		}
1003 		_fp_pack_extword(pfpsd, &lrd.ll, nrd);
1004 		break;
1005 	case fmul8ulx16:
1006 		VISINFO_KSTAT(vis_fmul8ulx16);
1007 		if ((nrs1 & 1) == 1) 	/* fix register encoding */
1008 			nrs1 = (nrs1 & 0x1e) | 0x20;
1009 		_fp_unpack_extword(pfpsd, &lrs1.ll, nrs1);
1010 		if ((nrs2 & 1) == 1)
1011 			nrs2 = (nrs2 & 0x1e) | 0x20;
1012 		_fp_unpack_extword(pfpsd, &lrs2.ll, nrs2);
1013 		for (i = 0; i <= 3; i++) {
1014 			us1 = (ushort_t)lrs1.c[(i*2)+1];
1015 			s2 = (short)lrs2.s[i];
1016 			kres.i = us1 * s2;
1017 			sres = (short)kres.s[0];
1018 			if (kres.s[1] >= 0x8000)
1019 				sres++;
1020 			lrd.s[i] = sres;
1021 		}
1022 		_fp_pack_extword(pfpsd, &lrd.ll, nrd);
1023 		break;
1024 	case fmuld8sux16:
1025 		VISINFO_KSTAT(vis_fmuld8sux16);
1026 		_fp_unpack_word(pfpsd, &krs1.i, nrs1);
1027 		_fp_unpack_word(pfpsd, &krs2.i, nrs2);
1028 		for (i = 0; i <= 1; i++) {
1029 			c1 = krs1.c[(i*2)];
1030 			s1 = (short)c1;		/* keeps the sign alive */
1031 			s2 = (short)krs2.s[i];
1032 			kres.i = s1 * s2;
1033 			lrd.i[i] = kres.i << 8;
1034 		}
1035 		_fp_pack_extword(pfpsd, &lrd.ll, nrd);
1036 		break;
1037 	case fmuld8ulx16:
1038 		VISINFO_KSTAT(vis_fmuld8ulx16);
1039 		_fp_unpack_word(pfpsd, &krs1.i, nrs1);
1040 		_fp_unpack_word(pfpsd, &krs2.i, nrs2);
1041 		for (i = 0; i <= 1; i++) {
1042 			us1 = (ushort_t)krs1.c[(i*2)+1];
1043 			s2 = (short)krs2.s[i];
1044 			lrd.i[i] = us1 * s2;
1045 		}
1046 		_fp_pack_extword(pfpsd, &lrd.ll, nrd);
1047 		break;
1048 	}
1049 	return (ftt_none);
1050 }
1051 
1052 /*
1053  * Simulator for fpixel formatting instructions.
1054  */
1055 static enum ftt_type
1056 vis_fpixel(
1057 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
1058 	vis_inst_type	inst,	/* FPU instruction to simulate. */
1059 	kfpu_t		*fp)	/* Need to fp to access gsr reg */
1060 {
1061 	uint_t	nrs1, nrs2, nrd;	/* Register number fields. */
1062 	int	i, j, k, sf;
1063 	union {
1064 		uint64_t	ll;
1065 		uint32_t	i[2];
1066 		uint16_t	s[4];
1067 		uint8_t		c[8];
1068 	} lrs1, lrs2, lrd;
1069 	union {
1070 		uint32_t	i;
1071 		uint16_t	s[2];
1072 		uint8_t		c[4];
1073 	} krs1, krs2, krd;
1074 	uint64_t r;
1075 	int64_t l, m;
1076 	short s;
1077 	uchar_t uc;
1078 
1079 	nrs1 = inst.rs1;
1080 	nrs2 = inst.rs2;
1081 	nrd = inst.rd;
1082 	if ((inst.opf != fpack16) && (inst.opf != fpackfix)) {
1083 		if ((nrd & 1) == 1) 	/* fix register encoding */
1084 			nrd = (nrd & 0x1e) | 0x20;
1085 	}
1086 
1087 	switch (inst.opf) {
1088 	case fpack16:
1089 		VISINFO_KSTAT(vis_fpack16);
1090 		if ((nrs2 & 1) == 1) 	/* fix register encoding */
1091 			nrs2 = (nrs2 & 0x1e) | 0x20;
1092 		_fp_unpack_extword(pfpsd, &lrs2.ll, nrs2);
1093 		r = pfpsd->fp_current_read_gsr(fp);
1094 		/* fpack16 ignores GSR.scale msb */
1095 		sf = (int)(GSR_SCALE(r) & 0xf);
1096 		for (i = 0; i <= 3; i++) {
1097 			s = (short)lrs2.s[i];	/* preserve the sign */
1098 			j = ((int)s << sf);
1099 			k = j >> 7;
1100 			if (k < 0) {
1101 				uc = 0;
1102 			} else if (k > 255) {
1103 				uc = 255;
1104 			} else {
1105 				uc = (uchar_t)k;
1106 			}
1107 			krd.c[i] = uc;
1108 		}
1109 		_fp_pack_word(pfpsd, &krd.i, nrd);
1110 		break;
1111 	case fpack32:
1112 		VISINFO_KSTAT(vis_fpack32);
1113 		if ((nrs1 & 1) == 1) 	/* fix register encoding */
1114 			nrs1 = (nrs1 & 0x1e) | 0x20;
1115 		_fp_unpack_extword(pfpsd, &lrs1.ll, nrs1);
1116 		if ((nrs2 & 1) == 1)
1117 			nrs2 = (nrs2 & 0x1e) | 0x20;
1118 		_fp_unpack_extword(pfpsd, &lrs2.ll, nrs2);
1119 
1120 		r = pfpsd->fp_current_read_gsr(fp);
1121 		sf = (int)GSR_SCALE(r);
1122 		lrd.ll = lrs1.ll << 8;
1123 		for (i = 0, k = 3; i <= 1; i++, k += 4) {
1124 			j = (int)lrs2.i[i];	/* preserve the sign */
1125 			l = ((int64_t)j << sf);
1126 			m = l >> 23;
1127 			if (m < 0) {
1128 				uc = 0;
1129 			} else if (m > 255) {
1130 				uc = 255;
1131 			} else {
1132 				uc = (uchar_t)m;
1133 			}
1134 			lrd.c[k] = uc;
1135 		}
1136 		_fp_pack_extword(pfpsd, &lrd.ll, nrd);
1137 		break;
1138 	case fpackfix:
1139 		VISINFO_KSTAT(vis_fpackfix);
1140 		if ((nrs2 & 1) == 1)
1141 			nrs2 = (nrs2 & 0x1e) | 0x20;
1142 		_fp_unpack_extword(pfpsd, &lrs2.ll, nrs2);
1143 
1144 		r = pfpsd->fp_current_read_gsr(fp);
1145 		sf = (int)GSR_SCALE(r);
1146 		for (i = 0; i <= 1; i++) {
1147 			j = (int)lrs2.i[i];	/* preserve the sign */
1148 			l = ((int64_t)j << sf);
1149 			m = l >> 16;
1150 			if (m < -32768) {
1151 				s = -32768;
1152 			} else if (m > 32767) {
1153 				s = 32767;
1154 			} else {
1155 				s = (short)m;
1156 			}
1157 			krd.s[i] = s;
1158 		}
1159 		_fp_pack_word(pfpsd, &krd.i, nrd);
1160 		break;
1161 	case fexpand:
1162 		VISINFO_KSTAT(vis_fexpand);
1163 		_fp_unpack_word(pfpsd, &krs2.i, nrs2);
1164 		for (i = 0; i <= 3; i++) {
1165 			uc = krs2.c[i];
1166 			lrd.s[i] = (ushort_t)(uc << 4);
1167 		}
1168 		_fp_pack_extword(pfpsd, &lrd.ll, nrd);
1169 		break;
1170 	case fpmerge:
1171 		VISINFO_KSTAT(vis_fpmerge);
1172 		_fp_unpack_word(pfpsd, &krs1.i, nrs1);
1173 		_fp_unpack_word(pfpsd, &krs2.i, nrs2);
1174 		for (i = 0, j = 0; i <= 3; i++, j += 2) {
1175 			lrd.c[j] = krs1.c[i];
1176 			lrd.c[j+1] = krs2.c[i];
1177 		}
1178 		_fp_pack_extword(pfpsd, &lrd.ll, nrd);
1179 		break;
1180 	}
1181 	return (ftt_none);
1182 }
1183 
1184 /*
1185  * Simulator for pdist instruction.
1186  */
1187 enum ftt_type
1188 vis_pdist(
1189 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
1190 	fp_inst_type	pinst)	/* FPU instruction to simulate. */
1191 {
1192 	uint_t	nrs1, nrs2, nrd;	/* Register number fields. */
1193 	int	i;
1194 	short	s;
1195 	union {
1196 		uint64_t	ll;
1197 		uint8_t		c[8];
1198 	} lrs1, lrs2, lrd;
1199 
1200 	nrs1 = pinst.rs1;
1201 	nrs2 = pinst.rs2;
1202 	nrd = pinst.rd;
1203 	VISINFO_KSTAT(vis_pdist);
1204 	if ((nrs1 & 1) == 1) 		/* fix register encoding */
1205 		nrs1 = (nrs1 & 0x1e) | 0x20;
1206 	if ((nrs2 & 1) == 1)
1207 		nrs2 = (nrs2 & 0x1e) | 0x20;
1208 	if ((nrd & 1) == 1)
1209 		nrd = (nrd & 0x1e) | 0x20;
1210 
1211 	_fp_unpack_extword(pfpsd, &lrs1.ll, nrs1);
1212 	_fp_unpack_extword(pfpsd, &lrs2.ll, nrs2);
1213 	_fp_unpack_extword(pfpsd, &lrd.ll, nrd);
1214 
1215 	for (i = 0; i <= 7; i++) {
1216 		s = (short)(lrs1.c[i] - lrs2.c[i]);
1217 		if (s < 0)
1218 			s = ~s + 1;
1219 		lrd.ll += s;
1220 	}
1221 
1222 	_fp_pack_extword(pfpsd, &lrd.ll, nrd);
1223 	return (ftt_none);
1224 }
1225 
1226 /*
1227  * Simulator for faligndata instruction.
1228  */
1229 static enum ftt_type
1230 vis_faligndata(
1231 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
1232 	fp_inst_type	pinst,	/* FPU instruction to simulate. */
1233 	kfpu_t		*fp)	/* Need to fp to access gsr reg */
1234 {
1235 	uint_t	nrs1, nrs2, nrd;	/* Register number fields. */
1236 	int	i, j, k, ao;
1237 	union {
1238 		uint64_t	ll;
1239 		uint8_t		c[8];
1240 	} lrs1, lrs2, lrd;
1241 	uint64_t r;
1242 
1243 	nrs1 = pinst.rs1;
1244 	nrs2 = pinst.rs2;
1245 	nrd = pinst.rd;
1246 	if ((nrs1 & 1) == 1) 		/* fix register encoding */
1247 		nrs1 = (nrs1 & 0x1e) | 0x20;
1248 	if ((nrs2 & 1) == 1)
1249 		nrs2 = (nrs2 & 0x1e) | 0x20;
1250 	if ((nrd & 1) == 1)
1251 		nrd = (nrd & 0x1e) | 0x20;
1252 
1253 	_fp_unpack_extword(pfpsd, &lrs1.ll, nrs1);
1254 	_fp_unpack_extword(pfpsd, &lrs2.ll, nrs2);
1255 
1256 	r = pfpsd->fp_current_read_gsr(fp);
1257 	ao = (int)GSR_ALIGN(r);
1258 
1259 	for (i = 0, j = ao, k = 0; i <= 7; i++)
1260 		if (j <= 7) {
1261 			lrd.c[i] = lrs1.c[j++];
1262 		} else {
1263 			lrd.c[i] = lrs2.c[k++];
1264 		}
1265 	_fp_pack_extword(pfpsd, &lrd.ll, nrd);
1266 
1267 	return (ftt_none);
1268 }
1269 
1270 /*
1271  * Simulator for bshuffle instruction.
1272  */
1273 static enum ftt_type
1274 vis_bshuffle(
1275 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
1276 	fp_inst_type	pinst,	/* FPU instruction to simulate. */
1277 	kfpu_t		*fp)	/* Need to fp to access gsr reg */
1278 {
1279 	uint_t	nrs1, nrs2, nrd;	/* Register number fields. */
1280 	int	i, j, ao;
1281 	union {
1282 		uint64_t	ll;
1283 		uint8_t		c[8];
1284 	} lrs1, lrs2, lrd;
1285 	uint64_t r;
1286 
1287 	VISINFO_KSTAT(vis_bshuffle);
1288 	nrs1 = pinst.rs1;
1289 	nrs2 = pinst.rs2;
1290 	nrd = pinst.rd;
1291 	if ((nrs1 & 1) == 1) 		/* fix register encoding */
1292 		nrs1 = (nrs1 & 0x1e) | 0x20;
1293 	if ((nrs2 & 1) == 1)
1294 		nrs2 = (nrs2 & 0x1e) | 0x20;
1295 	if ((nrd & 1) == 1)
1296 		nrd = (nrd & 0x1e) | 0x20;
1297 
1298 	_fp_unpack_extword(pfpsd, &lrs1.ll, nrs1);
1299 	_fp_unpack_extword(pfpsd, &lrs2.ll, nrs2);
1300 
1301 	r = pfpsd->fp_current_read_gsr(fp);
1302 	ao = (int)GSR_MASK(r);
1303 
1304 	/*
1305 	 * BSHUFFLE Destination Byte Selection
1306 	 * rd Byte	Source
1307 	 * 0		rs byte[GSR.mask<31..28>]
1308 	 * 1		rs byte[GSR.mask<27..24>]
1309 	 * 2		rs byte[GSR.mask<23..20>]
1310 	 * 3		rs byte[GSR.mask<19..16>]
1311 	 * 4		rs byte[GSR.mask<15..12>]
1312 	 * 5		rs byte[GSR.mask<11..8>]
1313 	 * 6		rs byte[GSR.mask<7..4>]
1314 	 * 7		rs byte[GSR.mask<3..0>]
1315 	 * P.S. rs1 is the upper half and rs2 is the lower half
1316 	 * Bytes in the source value are numbered from most to
1317 	 * least significant
1318 	 */
1319 	for (i = 7; i >= 0; i--, ao = (ao >> 4)) {
1320 		j = ao & 0xf;		/* get byte number */
1321 		if (j < 8) {
1322 			lrd.c[i] = lrs1.c[j];
1323 		} else {
1324 			lrd.c[i] = lrs2.c[j - 8];
1325 		}
1326 	}
1327 	_fp_pack_extword(pfpsd, &lrd.ll, nrd);
1328 
1329 	return (ftt_none);
1330 }
1331 
1332 /*
1333  * Simulator for siam instruction.
1334  */
1335 static enum ftt_type
1336 vis_siam(
1337 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
1338 	vis_inst_type	inst,	/* FPU instruction to simulate. */
1339 	kfpu_t		*fp)	/* Need to fp to access gsr reg */
1340 {
1341 	uint_t	nrs2;			/* Register number fields. */
1342 	uint64_t g, r;
1343 	nrs2 = inst.rs2;
1344 
1345 	g = pfpsd->fp_current_read_gsr(fp);
1346 	g &= ~(GSR_IM_IRND_MASK);	/* zero the IM and IRND fields */
1347 	r = nrs2 & 0x7;			/* get mode(3 bit) */
1348 	g |= (r << GSR_IRND_SHIFT);
1349 	pfpsd->fp_current_write_gsr(g, fp);
1350 	return (ftt_none);
1351 }
1352 
1353 /*
1354  * Simulator for VIS loads and stores between floating-point unit and memory.
1355  */
1356 enum ftt_type
1357 vis_fldst(
1358 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
1359 	fp_inst_type	pinst,	/* FPU instruction to simulate. */
1360 	struct regs	*pregs,	/* Pointer to PCB image of registers. */
1361 	void		*prw,	/* Pointer to locals and ins. */
1362 	uint_t		asi)	/* asi to emulate! */
1363 {
1364 	union {
1365 		vis_inst_type	inst;
1366 		fp_inst_type	pinst;
1367 	} i;
1368 
1369 	ASSERT(USERMODE(pregs->r_tstate));
1370 	i.pinst = pinst;
1371 	switch (asi) {
1372 		case ASI_PST8_P:
1373 		case ASI_PST8_S:
1374 		case ASI_PST16_P:
1375 		case ASI_PST16_S:
1376 		case ASI_PST32_P:
1377 		case ASI_PST32_S:
1378 		case ASI_PST8_PL:
1379 		case ASI_PST8_SL:
1380 		case ASI_PST16_PL:
1381 		case ASI_PST16_SL:
1382 		case ASI_PST32_PL:
1383 		case ASI_PST32_SL:
1384 			return (vis_prtl_fst(pfpsd, i.inst, pregs,
1385 				prw, asi));
1386 		case ASI_FL8_P:
1387 		case ASI_FL8_S:
1388 		case ASI_FL8_PL:
1389 		case ASI_FL8_SL:
1390 		case ASI_FL16_P:
1391 		case ASI_FL16_S:
1392 		case ASI_FL16_PL:
1393 		case ASI_FL16_SL:
1394 			return (vis_short_fls(pfpsd, i.inst, pregs,
1395 				prw, asi));
1396 		case ASI_BLK_AIUP:
1397 		case ASI_BLK_AIUS:
1398 		case ASI_BLK_AIUPL:
1399 		case ASI_BLK_AIUSL:
1400 		case ASI_BLK_P:
1401 		case ASI_BLK_S:
1402 		case ASI_BLK_PL:
1403 		case ASI_BLK_SL:
1404 		case ASI_BLK_COMMIT_P:
1405 		case ASI_BLK_COMMIT_S:
1406 			return (vis_blk_fldst(pfpsd, i.inst, pregs,
1407 				prw, asi));
1408 		default:
1409 			return (ftt_unimplemented);
1410 	}
1411 }
1412 
1413 /*
1414  * Simulator for partial stores between floating-point unit and memory.
1415  */
1416 static enum ftt_type
1417 vis_prtl_fst(
1418 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
1419 	vis_inst_type	inst,	/* ISE instruction to simulate. */
1420 	struct regs	*pregs,	/* Pointer to PCB image of registers. */
1421 	void		*prw,	/* Pointer to locals and ins. */
1422 	uint_t		asi)	/* asi to emulate! */
1423 {
1424 	uint_t	nrs1, nrs2, nrd;	/* Register number fields. */
1425 	uint_t	opf, msk;
1426 	int	h, i, j;
1427 	uint64_t ea, tmsk;
1428 	union {
1429 		freg_type	f;
1430 		uint64_t	ll;
1431 		uint32_t	i[2];
1432 		uint16_t	s[4];
1433 		uint8_t		c[8];
1434 	} k, l, res;
1435 	enum ftt_type   ftt;
1436 
1437 	nrs1 = inst.rs1;
1438 	nrs2 = inst.rs2;
1439 	nrd = inst.rd;
1440 	if ((nrd & 1) == 1) 		/* fix register encoding */
1441 		nrd = (nrd & 0x1e) | 0x20;
1442 	opf = inst.opf;
1443 	res.ll = 0;
1444 	if ((opf & 0x100) == 0) {	/* effective address = rs1  */
1445 		ftt = read_iureg(pfpsd, nrs1, pregs, prw, &ea);
1446 		if (ftt != ftt_none)
1447 			return (ftt);
1448 		ftt = read_iureg(pfpsd, nrs2, pregs, prw, &tmsk);
1449 		if (ftt != ftt_none)
1450 			return (ftt);
1451 		msk = (uint_t)tmsk;
1452 	} else {
1453 		pfpsd->fp_trapaddr = (caddr_t)pregs->r_pc;
1454 		return (ftt_unimplemented);
1455 	}
1456 
1457 	pfpsd->fp_trapaddr = (caddr_t)ea; /* setup bad addr in case we trap */
1458 	if ((ea & 0x3) != 0)
1459 		return (ftt_alignment);	/* Require 32 bit-alignment. */
1460 
1461 	switch (asi) {
1462 	case ASI_PST8_P:
1463 	case ASI_PST8_S:
1464 		ftt = _fp_read_extword((uint64_t *)ea, &l.ll, pfpsd);
1465 		if (ftt != ftt_none)
1466 			return (ftt);
1467 		_fp_unpack_extword(pfpsd, &k.f.FPU_DREG_FIELD, nrd);
1468 		for (i = 0, j = 0x80; i <= 7; i++, j >>= 1) {
1469 			if ((msk & j) == j)
1470 				res.c[i] = k.c[i];
1471 			else
1472 				res.c[i] = l.c[i];
1473 		}
1474 		ftt = _fp_write_extword((uint64_t *)ea, res.ll, pfpsd);
1475 		if (ftt != ftt_none)
1476 			return (ftt);
1477 		break;
1478 	case ASI_PST8_PL:	/* little-endian */
1479 	case ASI_PST8_SL:
1480 		ftt = _fp_read_extword((uint64_t *)ea, &l.ll, pfpsd);
1481 		if (ftt != ftt_none)
1482 			return (ftt);
1483 		_fp_unpack_extword(pfpsd, &k.f.FPU_DREG_FIELD, nrd);
1484 		for (h = 7, i = 0, j = 1; i <= 7; h--, i++, j <<= 1) {
1485 			if ((msk & j) == j)
1486 				res.c[i] = k.c[h];
1487 			else
1488 				res.c[i] = l.c[i];
1489 		}
1490 		ftt = _fp_write_extword((uint64_t *)ea, res.ll, pfpsd);
1491 		if (ftt != ftt_none)
1492 			return (ftt);
1493 		break;
1494 	case ASI_PST16_P:
1495 	case ASI_PST16_S:
1496 		ftt = _fp_read_extword((uint64_t *)ea, &l.ll, pfpsd);
1497 		if (ftt != ftt_none)
1498 			return (ftt);
1499 		_fp_unpack_extword(pfpsd, &k.f.FPU_DREG_FIELD, nrd);
1500 		for (i = 0, j = 0x8; i <= 3; i++, j >>= 1) {
1501 			if ((msk & j) == j)
1502 				res.s[i] = k.s[i];
1503 			else
1504 				res.s[i] = l.s[i];
1505 		}
1506 		ftt = _fp_write_extword((uint64_t *)ea, res.ll, pfpsd);
1507 		if (ftt != ftt_none)
1508 			return (ftt);
1509 		break;
1510 	case ASI_PST16_PL:
1511 	case ASI_PST16_SL:
1512 		ftt = _fp_read_extword((uint64_t *)ea, &l.ll, pfpsd);
1513 		if (ftt != ftt_none)
1514 			return (ftt);
1515 		_fp_unpack_extword(pfpsd, &k.f.FPU_DREG_FIELD, nrd);
1516 		for (h = 7, i = 0, j = 1; i <= 6; h -= 2, i += 2, j <<= 1) {
1517 			if ((msk & j) == j) {
1518 				res.c[i] = k.c[h];
1519 				res.c[i+1] = k.c[h-1];
1520 			} else {
1521 				res.c[i] = l.c[i];
1522 				res.c[i+1] = l.c[i+1];
1523 			}
1524 		}
1525 		ftt = _fp_write_extword((uint64_t *)ea, res.ll, pfpsd);
1526 		if (ftt != ftt_none)
1527 			return (ftt);
1528 		break;
1529 	case ASI_PST32_P:
1530 	case ASI_PST32_S:
1531 		ftt = _fp_read_extword((uint64_t *)ea, &l.ll, pfpsd);
1532 		if (ftt != ftt_none)
1533 			return (ftt);
1534 		_fp_unpack_extword(pfpsd, &k.f.FPU_DREG_FIELD, nrd);
1535 		for (i = 0, j = 0x2; i <= 1; i++, j >>= 1) {
1536 			if ((msk & j) == j)
1537 				res.i[i] = k.i[i];
1538 			else
1539 				res.i[i] = l.i[i];
1540 		}
1541 		ftt = _fp_write_extword((uint64_t *)ea, res.ll, pfpsd);
1542 		if (ftt != ftt_none)
1543 			return (ftt);
1544 		break;
1545 	case ASI_PST32_PL:
1546 	case ASI_PST32_SL:
1547 		ftt = _fp_read_extword((uint64_t *)ea, &l.ll, pfpsd);
1548 		if (ftt != ftt_none)
1549 			return (ftt);
1550 		_fp_unpack_extword(pfpsd, &k.f.FPU_DREG_FIELD, nrd);
1551 		for (h = 7, i = 0, j = 1; i <= 4; h -= 4, i += 4, j <<= 1) {
1552 			if ((msk & j) == j) {
1553 				res.c[i] = k.c[h];
1554 				res.c[i+1] = k.c[h-1];
1555 				res.c[i+2] = k.c[h-2];
1556 				res.c[i+3] = k.c[h-3];
1557 			} else {
1558 				res.c[i] = l.c[i];
1559 				res.c[i+1] = l.c[i+1];
1560 				res.c[i+2] = l.c[i+2];
1561 				res.c[i+3] = l.c[i+3];
1562 			}
1563 		}
1564 		ftt = _fp_write_extword((uint64_t *)ea, res.ll, pfpsd);
1565 		if (ftt != ftt_none)
1566 			return (ftt);
1567 		break;
1568 	}
1569 
1570 	pregs->r_pc = pregs->r_npc;	/* Do not retry emulated instruction. */
1571 	pregs->r_npc += 4;
1572 	return (ftt_none);
1573 }
1574 
1575 /*
1576  * Simulator for short load/stores between floating-point unit and memory.
1577  */
1578 static enum ftt_type
1579 vis_short_fls(
1580 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
1581 	vis_inst_type	inst,	/* ISE instruction to simulate. */
1582 	struct regs	*pregs,	/* Pointer to PCB image of registers. */
1583 	void		*prw,	/* Pointer to locals and ins. */
1584 	uint_t		asi)	/* asi to emulate! */
1585 {
1586 	uint_t	nrs1, nrs2, nrd;	/* Register number fields. */
1587 	uint_t	opf;
1588 	uint64_t ea, tea;
1589 	union {
1590 		freg_type	f;
1591 		uint64_t	ll;
1592 		uint32_t	i[2];
1593 		uint16_t	s[4];
1594 		uint8_t		c[8];
1595 	} k;
1596 	union {
1597 		vis_inst_type	inst;
1598 		int		i;
1599 	} fp;
1600 	enum ftt_type   ftt = ftt_none;
1601 	ushort_t us;
1602 	uchar_t uc;
1603 
1604 	nrs1 = inst.rs1;
1605 	nrs2 = inst.rs2;
1606 	nrd = inst.rd;
1607 	if ((nrd & 1) == 1) 		/* fix register encoding */
1608 		nrd = (nrd & 0x1e) | 0x20;
1609 	opf = inst.opf;
1610 	fp.inst = inst;
1611 	if ((opf & 0x100) == 0) { /* effective address = rs1 + rs2 */
1612 		ftt = read_iureg(pfpsd, nrs1, pregs, prw, &ea);
1613 		if (ftt != ftt_none)
1614 			return (ftt);
1615 		ftt = read_iureg(pfpsd, nrs2, pregs, prw, &tea);
1616 		if (ftt != ftt_none)
1617 			return (ftt);
1618 		ea += tea;
1619 	} else {	/* effective address = rs1 + imm13 */
1620 		fp.inst = inst;
1621 		ea = (fp.i << 19) >> 19;	/* Extract simm13 field. */
1622 		ftt = read_iureg(pfpsd, nrs1, pregs, prw, &tea);
1623 		if (ftt != ftt_none)
1624 			return (ftt);
1625 		ea += tea;
1626 	}
1627 	if (get_udatamodel() == DATAMODEL_ILP32)
1628 		ea = (uint64_t)(caddr32_t)ea;
1629 
1630 	pfpsd->fp_trapaddr = (caddr_t)ea; /* setup bad addr in case we trap */
1631 	switch (asi) {
1632 	case ASI_FL8_P:
1633 	case ASI_FL8_S:
1634 	case ASI_FL8_PL:		/* little-endian */
1635 	case ASI_FL8_SL:
1636 		if ((inst.op3 & 7) == 3) {	/* load byte */
1637 			if (fuword8((void *)ea, &uc) == -1)
1638 				return (ftt_fault);
1639 			k.ll = 0;
1640 			k.c[7] = uc;
1641 			_fp_pack_extword(pfpsd, &k.f.FPU_DREG_FIELD, nrd);
1642 		} else {			/* store byte */
1643 			_fp_unpack_extword(pfpsd, &k.f.FPU_DREG_FIELD, nrd);
1644 			uc = k.c[7];
1645 			if (subyte((caddr_t)ea, uc) == -1)
1646 				return (ftt_fault);
1647 		}
1648 		break;
1649 	case ASI_FL16_P:
1650 	case ASI_FL16_S:
1651 		if ((ea & 1) == 1)
1652 			return (ftt_alignment);
1653 		if ((inst.op3 & 7) == 3) {	/* load short */
1654 			if (fuword16((void *)ea, &us) == -1)
1655 				return (ftt_fault);
1656 			k.ll = 0;
1657 			k.s[3] = us;
1658 			_fp_pack_extword(pfpsd, &k.f.FPU_DREG_FIELD, nrd);
1659 		} else {			/* store short */
1660 			_fp_unpack_extword(pfpsd, &k.f.FPU_DREG_FIELD, nrd);
1661 			us = k.s[3];
1662 			if (suword16((caddr_t)ea, us) == -1)
1663 				return (ftt_fault);
1664 		}
1665 		break;
1666 	case ASI_FL16_PL:		/* little-endian */
1667 	case ASI_FL16_SL:
1668 		if ((ea & 1) == 1)
1669 			return (ftt_alignment);
1670 		if ((inst.op3 & 7) == 3) {	/* load short */
1671 			if (fuword16((void *)ea, &us) == -1)
1672 				return (ftt_fault);
1673 			k.ll = 0;
1674 			k.c[6] = (uchar_t)us;
1675 			k.c[7] = (uchar_t)((us & 0xff00) >> 8);
1676 			_fp_pack_extword(pfpsd, &k.f.FPU_DREG_FIELD, nrd);
1677 		} else {			/* store short */
1678 			_fp_unpack_extword(pfpsd, &k.f.FPU_DREG_FIELD, nrd);
1679 			uc = k.c[7];
1680 			us = (ushort_t)((uc << 8) | k.c[6]);
1681 			if (suword16((void *)ea, us) == -1)
1682 				return (ftt_fault);
1683 		}
1684 		break;
1685 	}
1686 
1687 	pregs->r_pc = pregs->r_npc;	/* Do not retry emulated instruction. */
1688 	pregs->r_npc += 4;
1689 	return (ftt_none);
1690 }
1691 
1692 /*
1693  * Simulator for block loads and stores between floating-point unit and memory.
1694  * We pass the addrees of ea to sync_data_memory() to flush the Ecache.
1695  * Sync_data_memory() calls platform dependent code to flush the Ecache.
1696  */
1697 static enum ftt_type
1698 vis_blk_fldst(
1699 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
1700 	vis_inst_type	inst,	/* ISE instruction to simulate. */
1701 	struct regs	*pregs,	/* Pointer to PCB image of registers. */
1702 	void		*prw,	/* Pointer to locals and ins. */
1703 	uint_t		asi)	/* asi to emulate! */
1704 {
1705 	uint_t	nrs1, nrs2, nrd;	/* Register number fields. */
1706 	uint_t	opf, h, i, j;
1707 	uint64_t ea, tea;
1708 	union {
1709 		freg_type	f;
1710 		uint64_t	ll;
1711 		uint8_t		c[8];
1712 	} k, l;
1713 	union {
1714 		vis_inst_type	inst;
1715 		int32_t		i;
1716 	} fp;
1717 	enum ftt_type   ftt;
1718 	boolean_t little_endian = B_FALSE;
1719 
1720 	nrs1 = inst.rs1;
1721 	nrs2 = inst.rs2;
1722 	nrd = inst.rd;
1723 	if ((nrd & 1) == 1) 		/* fix register encoding */
1724 		nrd = (nrd & 0x1e) | 0x20;
1725 
1726 	/* ensure register is 8-double precision aligned */
1727 	if ((nrd & 0xf) != 0)
1728 		return (ftt_unimplemented);
1729 
1730 	opf = inst.opf;
1731 	if ((opf & 0x100) == 0) { 	/* effective address = rs1 + rs2 */
1732 		ftt = read_iureg(pfpsd, nrs1, pregs, prw, &ea);
1733 		if (ftt != ftt_none)
1734 			return (ftt);
1735 		ftt = read_iureg(pfpsd, nrs2, pregs, prw, &tea);
1736 		if (ftt != ftt_none)
1737 			return (ftt);
1738 		ea += tea;
1739 	} else {			/* effective address = rs1 + imm13 */
1740 		fp.inst = inst;
1741 		ea = (fp.i << 19) >> 19;	/* Extract simm13 field. */
1742 		ftt = read_iureg(pfpsd, nrs1, pregs, prw, &tea);
1743 		if (ftt != ftt_none)
1744 			return (ftt);
1745 		ea += tea;
1746 	}
1747 	if ((ea & 0x3F) != 0)		/* Require 64 byte-alignment. */
1748 		return (ftt_alignment);
1749 
1750 	pfpsd->fp_trapaddr = (caddr_t)ea; /* setup bad addr in case we trap */
1751 	switch (asi) {
1752 	case ASI_BLK_AIUPL:
1753 	case ASI_BLK_AIUSL:
1754 	case ASI_BLK_PL:
1755 	case ASI_BLK_SL:
1756 		little_endian = B_TRUE;
1757 		/* FALLTHROUGH */
1758 	case ASI_BLK_AIUP:
1759 	case ASI_BLK_AIUS:
1760 	case ASI_BLK_P:
1761 	case ASI_BLK_S:
1762 	case ASI_BLK_COMMIT_P:
1763 	case ASI_BLK_COMMIT_S:
1764 		if ((inst.op3 & 7) == 3) {	/* lddf */
1765 		    for (i = 0; i < 8; i++, nrd += 2) {
1766 			ftt = _fp_read_extword((uint64_t *)ea, &k.ll, pfpsd);
1767 			if (ftt != ftt_none)
1768 				return (ftt);
1769 			if (little_endian) {
1770 				for (j = 0, h = 7; j < 8; j++, h--)
1771 					l.c[h] = k.c[j];
1772 				k.ll = l.ll;
1773 			}
1774 			_fp_pack_extword(pfpsd, &k.f.FPU_DREG_FIELD, nrd);
1775 			ea += 8;
1776 		    }
1777 		} else {			/* stdf */
1778 		    for (i = 0; i < 8; i++, nrd += 2) {
1779 			_fp_unpack_extword(pfpsd, &k.f.FPU_DREG_FIELD, nrd);
1780 			if (little_endian) {
1781 				for (j = 0, h = 7; j < 8; j++, h--)
1782 					l.c[h] = k.c[j];
1783 				k.ll = l.ll;
1784 			}
1785 			ftt = _fp_write_extword((uint64_t *)ea, k.ll, pfpsd);
1786 			if (ftt != ftt_none)
1787 				return (ftt);
1788 			ea += 8;
1789 		    }
1790 		}
1791 		if ((asi == ASI_BLK_COMMIT_P) || (asi == ASI_BLK_COMMIT_S))
1792 			sync_data_memory((caddr_t)(ea - 64), 64);
1793 		break;
1794 	default:
1795 		/* addr of unimp inst */
1796 		pfpsd->fp_trapaddr = (caddr_t)pregs->r_pc;
1797 		return (ftt_unimplemented);
1798 	}
1799 
1800 	pregs->r_pc = pregs->r_npc;	/* Do not retry emulated instruction. */
1801 	pregs->r_npc += 4;
1802 	return (ftt_none);
1803 }
1804 
1805 /*
1806  * Simulator for rd %gsr instruction.
1807  */
1808 enum ftt_type
1809 vis_rdgsr(
1810 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
1811 	fp_inst_type	pinst,	/* FPU instruction to simulate. */
1812 	struct regs	*pregs,	/* Pointer to PCB image of registers. */
1813 	void		*prw,	/* Pointer to locals and ins. */
1814 	kfpu_t		*fp)	/* Need to fp to access gsr reg */
1815 {
1816 	uint_t nrd;
1817 	uint64_t r;
1818 	enum ftt_type ftt = ftt_none;
1819 
1820 	nrd = pinst.rd;
1821 
1822 	r = pfpsd->fp_current_read_gsr(fp);
1823 	ftt = write_iureg(pfpsd, nrd, pregs, prw, &r);
1824 	pregs->r_pc = pregs->r_npc;	/* Do not retry emulated instruction. */
1825 	pregs->r_npc += 4;
1826 	return (ftt);
1827 }
1828 
1829 /*
1830  * Simulator for wr %gsr instruction.
1831  */
1832 enum ftt_type
1833 vis_wrgsr(
1834 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
1835 	fp_inst_type	pinst,	/* FPU instruction to simulate. */
1836 	struct regs	*pregs,	/* Pointer to PCB image of registers. */
1837 	void		*prw,	/* Pointer to locals and ins. */
1838 	kfpu_t		*fp)	/* Need to fp to access gsr reg */
1839 {
1840 	uint_t nrs1;
1841 	uint64_t r, r1, r2;
1842 	enum ftt_type ftt = ftt_none;
1843 
1844 	nrs1 = pinst.rs1;
1845 	ftt = read_iureg(pfpsd, nrs1, pregs, prw, &r1);
1846 	if (ftt != ftt_none)
1847 		return (ftt);
1848 	if (pinst.ibit == 0) {	/* copy the value in r[rs2] */
1849 		uint_t nrs2;
1850 
1851 		nrs2 = pinst.rs2;
1852 		ftt = read_iureg(pfpsd, nrs2, pregs, prw, &r2);
1853 		if (ftt != ftt_none)
1854 			return (ftt);
1855 	} else {	/* use sign_ext(simm13) */
1856 		union {
1857 			fp_inst_type	inst;
1858 			uint32_t	i;
1859 		} fp;
1860 
1861 		fp.inst = pinst;		/* Extract simm13 field */
1862 		r2 = (fp.i << 19) >> 19;
1863 	}
1864 	r = r1 ^ r2;
1865 	pfpsd->fp_current_write_gsr(r, fp);
1866 	pregs->r_pc = pregs->r_npc;	/* Do not retry emulated instruction. */
1867 	pregs->r_npc += 4;
1868 	return (ftt);
1869 }
1870 
1871 /*
1872  * This is the loadable module wrapper.
1873  */
1874 #include <sys/errno.h>
1875 #include <sys/modctl.h>
1876 
1877 /*
1878  * Module linkage information for the kernel.
1879  */
1880 extern struct mod_ops mod_miscops;
1881 
1882 static struct modlmisc modlmisc = {
1883 	&mod_miscops,
1884 	"vis fp simulation",
1885 };
1886 
1887 static struct modlinkage modlinkage = {
1888 	MODREV_1, (void *)&modlmisc, NULL
1889 };
1890 
1891 int
1892 _init(void)
1893 {
1894 	return (mod_install(&modlinkage));
1895 }
1896 
1897 int
1898 _info(struct modinfo *modinfop)
1899 {
1900 	return (mod_info(&modlinkage, modinfop));
1901 }
1902