xref: /titanic_50/usr/src/uts/sun4/os/visinstr.c (revision 381a2a9a387f449fab7d0c7e97c4184c26963abf)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /* VIS floating point instruction simulator for Sparc FPU simulator. */
29 
30 #include <sys/types.h>
31 #include <sys/systm.h>
32 #include <sys/fpu/fpusystm.h>
33 #include <sys/fpu/fpu_simulator.h>
34 #include <sys/vis_simulator.h>
35 #include <sys/fpu/globals.h>
36 #include <sys/privregs.h>
37 #include <sys/sun4asi.h>
38 #include <sys/machasi.h>
39 #include <sys/debug.h>
40 #include <sys/cpu_module.h>
41 #include <sys/systm.h>
42 #include <sys/machsystm.h>
43 
44 #define	FPU_REG_FIELD uint32_reg	/* Coordinate with FPU_REGS_TYPE. */
45 #define	FPU_DREG_FIELD uint64_reg	/* Coordinate with FPU_DREGS_TYPE. */
46 #define	FPU_FSR_FIELD uint64_reg	/* Coordinate with V9_FPU_FSR_TYPE. */
47 
48 extern	uint_t	get_subcc_ccr(uint64_t, uint64_t);
49 
50 static enum ftt_type vis_array(fp_simd_type *, vis_inst_type, struct regs *,
51 				void *);
52 static enum ftt_type vis_alignaddr(fp_simd_type *, vis_inst_type,
53 				struct regs *, void *, kfpu_t *);
54 static enum ftt_type vis_edge(fp_simd_type *, vis_inst_type, struct regs *,
55 				void *);
56 static enum ftt_type vis_faligndata(fp_simd_type *, fp_inst_type,
57 				kfpu_t *);
58 static enum ftt_type vis_bmask(fp_simd_type *, vis_inst_type, struct regs *,
59 				void *, kfpu_t *);
60 static enum ftt_type vis_bshuffle(fp_simd_type *, fp_inst_type,
61 				kfpu_t *);
62 static enum ftt_type vis_siam(fp_simd_type *, vis_inst_type, kfpu_t *);
63 static enum ftt_type vis_fcmp(fp_simd_type *, vis_inst_type, struct regs *,
64 				void *);
65 static enum ftt_type vis_fmul(fp_simd_type *, vis_inst_type);
66 static enum ftt_type vis_fpixel(fp_simd_type *, vis_inst_type, kfpu_t *);
67 static enum ftt_type vis_fpaddsub(fp_simd_type *, vis_inst_type);
68 static enum ftt_type vis_pdist(fp_simd_type *, fp_inst_type);
69 static enum ftt_type vis_prtl_fst(fp_simd_type *, vis_inst_type, struct regs *,
70 				void *, uint_t);
71 static enum ftt_type vis_short_fls(fp_simd_type *, vis_inst_type,
72 				struct regs *, void *, uint_t);
73 static enum ftt_type vis_blk_fldst(fp_simd_type *, vis_inst_type,
74 				struct regs *, void *, uint_t);
75 
76 /*
77  * Simulator for VIS instructions with op3 == 0x36 that get fp_disabled
78  * traps.
79  */
80 enum ftt_type
81 vis_fpu_simulator(
82 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
83 	fp_inst_type	pinst,	/* FPU instruction to simulate. */
84 	struct regs	*pregs,	/* Pointer to PCB image of registers. */
85 	void		*prw,	/* Pointer to locals and ins. */
86 	kfpu_t		*fp)	/* Need to fp to access gsr reg */
87 {
88 	uint_t	nrs1, nrs2, nrd;	/* Register number fields. */
89 	uint_t	us1, us2, usr;
90 	uint64_t lus1, lus2, lusr;
91 	enum ftt_type ftt = ftt_none;
92 	union {
93 		vis_inst_type	inst;
94 		fp_inst_type	pinst;
95 	} f;
96 
97 	ASSERT(USERMODE(pregs->r_tstate));
98 	nrs1 = pinst.rs1;
99 	nrs2 = pinst.rs2;
100 	nrd = pinst.rd;
101 	f.pinst = pinst;
102 	if ((f.inst.opf & 1) == 0) {		/* double precision */
103 		if ((nrs1 & 1) == 1) 		/* fix register encoding */
104 			nrs1 = (nrs1 & 0x1e) | 0x20;
105 		if ((nrs2 & 1) == 1)
106 			nrs2 = (nrs2 & 0x1e) | 0x20;
107 		if ((nrd & 1) == 1)
108 			nrd = (nrd & 0x1e) | 0x20;
109 	}
110 
111 	switch (f.inst.opf) {
112 		/* these instr's do not use fp regs */
113 	case edge8:
114 	case edge8l:
115 	case edge8n:
116 	case edge8ln:
117 	case edge16:
118 	case edge16l:
119 	case edge16n:
120 	case edge16ln:
121 	case edge32:
122 	case edge32l:
123 	case edge32n:
124 	case edge32ln:
125 		ftt = vis_edge(pfpsd, f.inst, pregs, prw);
126 		break;
127 	case array8:
128 	case array16:
129 	case array32:
130 		ftt = vis_array(pfpsd, f.inst, pregs, prw);
131 		break;
132 	case alignaddr:
133 	case alignaddrl:
134 		ftt = vis_alignaddr(pfpsd, f.inst, pregs, prw, fp);
135 		break;
136 	case bmask:
137 		ftt = vis_bmask(pfpsd, f.inst, pregs, prw, fp);
138 		break;
139 	case fcmple16:
140 	case fcmpne16:
141 	case fcmpgt16:
142 	case fcmpeq16:
143 	case fcmple32:
144 	case fcmpne32:
145 	case fcmpgt32:
146 	case fcmpeq32:
147 		ftt = vis_fcmp(pfpsd, f.inst, pregs, prw);
148 		break;
149 	case fmul8x16:
150 	case fmul8x16au:
151 	case fmul8x16al:
152 	case fmul8sux16:
153 	case fmul8ulx16:
154 	case fmuld8sux16:
155 	case fmuld8ulx16:
156 		ftt = vis_fmul(pfpsd, f.inst);
157 		break;
158 	case fpack16:
159 	case fpack32:
160 	case fpackfix:
161 	case fexpand:
162 	case fpmerge:
163 		ftt = vis_fpixel(pfpsd, f.inst, fp);
164 		break;
165 	case pdist:
166 		ftt = vis_pdist(pfpsd, pinst);
167 		break;
168 	case faligndata:
169 		ftt = vis_faligndata(pfpsd, pinst, fp);
170 		break;
171 	case bshuffle:
172 		ftt = vis_bshuffle(pfpsd, pinst, fp);
173 		break;
174 	case fpadd16:
175 	case fpadd16s:
176 	case fpadd32:
177 	case fpadd32s:
178 	case fpsub16:
179 	case fpsub16s:
180 	case fpsub32:
181 	case fpsub32s:
182 		ftt = vis_fpaddsub(pfpsd, f.inst);
183 		break;
184 	case fzero:
185 		lusr = 0;
186 		_fp_pack_extword(pfpsd, &lusr, nrd);
187 		break;
188 	case fzeros:
189 		usr = 0;
190 		_fp_pack_word(pfpsd, &usr, nrd);
191 		break;
192 	case fnor:
193 		_fp_unpack_extword(pfpsd, &lus1, nrs1);
194 		_fp_unpack_extword(pfpsd, &lus2, nrs2);
195 		lusr = ~(lus1 | lus2);
196 		_fp_pack_extword(pfpsd, &lusr, nrd);
197 		break;
198 	case fnors:
199 		_fp_unpack_word(pfpsd, &us1, nrs1);
200 		_fp_unpack_word(pfpsd, &us2, nrs2);
201 		usr = ~(us1 | us2);
202 		_fp_pack_word(pfpsd, &usr, nrd);
203 		break;
204 	case fandnot2:
205 		_fp_unpack_extword(pfpsd, &lus1, nrs1);
206 		_fp_unpack_extword(pfpsd, &lus2, nrs2);
207 		lusr = (lus1 & ~lus2);
208 		_fp_pack_extword(pfpsd, &lusr, nrd);
209 		break;
210 	case fandnot2s:
211 		_fp_unpack_word(pfpsd, &us1, nrs1);
212 		_fp_unpack_word(pfpsd, &us2, nrs2);
213 		usr = (us1 & ~us2);
214 		_fp_pack_word(pfpsd, &usr, nrd);
215 		break;
216 	case fnot2:
217 		_fp_unpack_extword(pfpsd, &lus2, nrs2);
218 		lusr = ~lus2;
219 		_fp_pack_extword(pfpsd, &lusr, nrd);
220 		break;
221 	case fnot2s:
222 		_fp_unpack_word(pfpsd, &us2, nrs2);
223 		usr = ~us2;
224 		_fp_pack_word(pfpsd, &usr, nrd);
225 		break;
226 	case fandnot1:
227 		_fp_unpack_extword(pfpsd, &lus1, nrs1);
228 		_fp_unpack_extword(pfpsd, &lus2, nrs2);
229 		lusr = (~lus1 & lus2);
230 		_fp_pack_extword(pfpsd, &lusr, nrd);
231 		break;
232 	case fandnot1s:
233 		_fp_unpack_word(pfpsd, &us1, nrs1);
234 		_fp_unpack_word(pfpsd, &us2, nrs2);
235 		usr = (~us1 & us2);
236 		_fp_pack_word(pfpsd, &usr, nrd);
237 		break;
238 	case fnot1:
239 		_fp_unpack_extword(pfpsd, &lus1, nrs1);
240 		lusr = ~lus1;
241 		_fp_pack_extword(pfpsd, &lusr, nrd);
242 		break;
243 	case fnot1s:
244 		_fp_unpack_word(pfpsd, &us1, nrs1);
245 		usr = ~us1;
246 		_fp_pack_word(pfpsd, &usr, nrd);
247 		break;
248 	case fxor:
249 		_fp_unpack_extword(pfpsd, &lus1, nrs1);
250 		_fp_unpack_extword(pfpsd, &lus2, nrs2);
251 		lusr = (lus1 ^ lus2);
252 		_fp_pack_extword(pfpsd, &lusr, nrd);
253 		break;
254 	case fxors:
255 		_fp_unpack_word(pfpsd, &us1, nrs1);
256 		_fp_unpack_word(pfpsd, &us2, nrs2);
257 		usr = (us1 ^ us2);
258 		_fp_pack_word(pfpsd, &usr, nrd);
259 		break;
260 	case fnand:
261 		_fp_unpack_extword(pfpsd, &lus1, nrs1);
262 		_fp_unpack_extword(pfpsd, &lus2, nrs2);
263 		lusr = ~(lus1 & lus2);
264 		_fp_pack_extword(pfpsd, &lusr, nrd);
265 		break;
266 	case fnands:
267 		_fp_unpack_word(pfpsd, &us1, nrs1);
268 		_fp_unpack_word(pfpsd, &us2, nrs2);
269 		usr = ~(us1 & us2);
270 		_fp_pack_word(pfpsd, &usr, nrd);
271 		break;
272 	case fand:
273 		_fp_unpack_extword(pfpsd, &lus1, nrs1);
274 		_fp_unpack_extword(pfpsd, &lus2, nrs2);
275 		lusr = (lus1 & lus2);
276 		_fp_pack_extword(pfpsd, &lusr, nrd);
277 		break;
278 	case fands:
279 		_fp_unpack_word(pfpsd, &us1, nrs1);
280 		_fp_unpack_word(pfpsd, &us2, nrs2);
281 		usr = (us1 & us2);
282 		_fp_pack_word(pfpsd, &usr, nrd);
283 		break;
284 	case fxnor:
285 		_fp_unpack_extword(pfpsd, &lus1, nrs1);
286 		_fp_unpack_extword(pfpsd, &lus2, nrs2);
287 		lusr = ~(lus1 ^ lus2);
288 		_fp_pack_extword(pfpsd, &lusr, nrd);
289 		break;
290 	case fxnors:
291 		_fp_unpack_word(pfpsd, &us1, nrs1);
292 		_fp_unpack_word(pfpsd, &us2, nrs2);
293 		usr = ~(us1 ^ us2);
294 		_fp_pack_word(pfpsd, &usr, nrd);
295 		break;
296 	case fsrc1:
297 		_fp_unpack_extword(pfpsd, &lusr, nrs1);
298 		_fp_pack_extword(pfpsd, &lusr, nrd);
299 		break;
300 	case fsrc1s:
301 		_fp_unpack_word(pfpsd, &usr, nrs1);
302 		_fp_pack_word(pfpsd, &usr, nrd);
303 		break;
304 	case fornot2:
305 		_fp_unpack_extword(pfpsd, &lus1, nrs1);
306 		_fp_unpack_extword(pfpsd, &lus2, nrs2);
307 		lusr = (lus1 | ~lus2);
308 		_fp_pack_extword(pfpsd, &lusr, nrd);
309 		break;
310 	case fornot2s:
311 		_fp_unpack_word(pfpsd, &us1, nrs1);
312 		_fp_unpack_word(pfpsd, &us2, nrs2);
313 		usr = (us1 | ~us2);
314 		_fp_pack_word(pfpsd, &usr, nrd);
315 		break;
316 	case fsrc2:
317 		_fp_unpack_extword(pfpsd, &lusr, nrs2);
318 		_fp_pack_extword(pfpsd, &lusr, nrd);
319 		break;
320 	case fsrc2s:
321 		_fp_unpack_word(pfpsd, &usr, nrs2);
322 		_fp_pack_word(pfpsd, &usr, nrd);
323 		break;
324 	case fornot1:
325 		_fp_unpack_extword(pfpsd, &lus1, nrs1);
326 		_fp_unpack_extword(pfpsd, &lus2, nrs2);
327 		lusr = (~lus1 | lus2);
328 		_fp_pack_extword(pfpsd, &lusr, nrd);
329 		break;
330 	case fornot1s:
331 		_fp_unpack_word(pfpsd, &us1, nrs1);
332 		_fp_unpack_word(pfpsd, &us2, nrs2);
333 		usr = (~us1 | us2);
334 		_fp_pack_word(pfpsd, &usr, nrd);
335 		break;
336 	case for_op:
337 		_fp_unpack_extword(pfpsd, &lus1, nrs1);
338 		_fp_unpack_extword(pfpsd, &lus2, nrs2);
339 		lusr = (lus1 | lus2);
340 		_fp_pack_extword(pfpsd, &lusr, nrd);
341 		break;
342 	case fors_op:
343 		_fp_unpack_word(pfpsd, &us1, nrs1);
344 		_fp_unpack_word(pfpsd, &us2, nrs2);
345 		usr = (us1 | us2);
346 		_fp_pack_word(pfpsd, &usr, nrd);
347 		break;
348 	case fone:
349 		lusr = 0xffffffffffffffff;
350 		_fp_pack_extword(pfpsd, &lusr, nrd);
351 		break;
352 	case fones:
353 		usr = 0xffffffffUL;
354 		_fp_pack_word(pfpsd, &usr, nrd);
355 		break;
356 	case siam:
357 		ftt = vis_siam(pfpsd, f.inst, fp);
358 		break;
359 	default:
360 		return (ftt_unimplemented);
361 	}
362 
363 	pregs->r_pc = pregs->r_npc;	/* Do not retry emulated instruction. */
364 	pregs->r_npc += 4;
365 	return (ftt);
366 }
367 
368 /*
369  * Simulator for edge instructions
370  */
371 static enum ftt_type
372 vis_edge(
373 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
374 	vis_inst_type	inst,	/* FPU instruction to simulate. */
375 	struct regs	*pregs,	/* Pointer to PCB image of registers. */
376 	void		*prw)	/* Pointer to locals and ins. */
377 
378 {
379 	uint_t	nrs1, nrs2, nrd;	/* Register number fields. */
380 	enum ftt_type ftt;
381 	uint64_t addrl, addrr, mask;
382 	uint64_t ah61l, ah61r;		/* Higher 61 bits of address */
383 	int al3l, al3r;			/* Lower 3 bits of address */
384 	int am32;			/* Whether PSTATE.AM == 1 */
385 	uint_t	ccr;
386 
387 	nrs1 = inst.rs1;
388 	nrs2 = inst.rs2;
389 	nrd = inst.rd;
390 
391 	ftt = read_iureg(pfpsd, nrs1, pregs, prw, &addrl);
392 	if (ftt != ftt_none)
393 		return (ftt);
394 	ftt = read_iureg(pfpsd, nrs2, pregs, prw, &addrr);
395 	if (ftt != ftt_none)
396 		return (ftt);
397 
398 	/* Get PSTATE.AM to determine 32-bit vs 64-bit addressing */
399 	am32 =  pregs->r_tstate & TSTATE_AM;
400 	if (am32 == 1) {
401 		ah61l = addrl & 0xffffffff8;
402 		ah61r = addrr & 0xffffffff8;
403 	} else {
404 		ah61l = addrl & ~0x7;
405 		ah61r = addrr & ~0x7;
406 	}
407 
408 
409 	switch (inst.opf) {
410 	case edge8:
411 	case edge8n:
412 	case edge8l:
413 	case edge8ln:
414 		al3l = addrl & 0x7;
415 		switch (inst.opf) {
416 		case edge8:
417 		case edge8n:
418 			if (inst.opf == edge8) {
419 				VISINFO_KSTAT(vis_edge8);
420 			} else {
421 				VISINFO_KSTAT(vis_edge8n);
422 			}
423 			mask = 0xff >> al3l;
424 			if (ah61l == ah61r) {
425 				al3r = addrr & 0x7;
426 				mask &= (0xff << (0x7 - al3r)) & 0xff;
427 			}
428 			break;
429 		case edge8l:
430 		case edge8ln:
431 			if (inst.opf == edge8l) {
432 				VISINFO_KSTAT(vis_edge8l);
433 			} else {
434 				VISINFO_KSTAT(vis_edge8ln);
435 			}
436 			mask = (0xff << al3l) & 0xff;
437 			if (ah61l == ah61r) {
438 				al3r = addrr & 0x7;
439 				mask &= 0xff >> (0x7 - al3r);
440 			}
441 			break;
442 		}
443 		break;
444 	case edge16:
445 	case edge16l:
446 	case edge16n:
447 	case edge16ln:
448 		al3l = addrl & 0x6;
449 		al3l >>= 0x1;
450 		switch (inst.opf) {
451 		case edge16:
452 		case edge16n:
453 			if (inst.opf == edge16) {
454 				VISINFO_KSTAT(vis_edge16);
455 
456 			} else {
457 				VISINFO_KSTAT(vis_edge16n);
458 			}
459 			mask = 0xf >> al3l;
460 			if (ah61l == ah61r) {
461 				al3r = addrr & 0x6;
462 				al3r >>= 0x1;
463 				mask &= (0xf << (0x3 - al3r)) & 0xf;
464 			}
465 			break;
466 		case edge16l:
467 		case edge16ln:
468 			if (inst.opf == edge16l) {
469 				VISINFO_KSTAT(vis_edge16l);
470 
471 			} else {
472 				VISINFO_KSTAT(vis_edge16ln);
473 			}
474 
475 			mask = (0xf << al3l) & 0xf;
476 			if (ah61l == ah61r) {
477 				al3r = addrr & 0x6;
478 				al3r >>= 0x1;
479 				mask &= 0xf >> (0x3 - al3r);
480 			}
481 			break;
482 		}
483 		break;
484 	case edge32:
485 	case edge32l:
486 	case edge32n:
487 	case edge32ln:
488 		al3l = addrl & 0x4;
489 		al3l >>= 0x2;
490 
491 		switch (inst.opf) {
492 		case edge32:
493 		case edge32n:
494 			if (inst.opf == edge32) {
495 				VISINFO_KSTAT(vis_edge32);
496 
497 			} else {
498 				VISINFO_KSTAT(vis_edge32n);
499 			}
500 			mask = 0x3 >> al3l;
501 			if (ah61l == ah61r) {
502 				al3r = addrr & 0x4;
503 				al3r >>= 0x2;
504 				mask &= (0x3 << (0x1 - al3r)) & 0x3;
505 			}
506 			break;
507 		case edge32l:
508 		case edge32ln:
509 			if (inst.opf == edge32l) {
510 				VISINFO_KSTAT(vis_edge32l);
511 
512 			} else {
513 				VISINFO_KSTAT(vis_edge32ln);
514 			}
515 			mask = (0x3 << al3l) & 0x3;
516 			if (ah61l == ah61r) {
517 				al3r = addrr & 0x4;
518 				al3r >>= 0x2;
519 				mask &= 0x3 >> (0x1 - al3r);
520 			}
521 			break;
522 		}
523 		break;
524 	}
525 
526 	ftt = write_iureg(pfpsd, nrd, pregs, prw, &mask);
527 
528 	switch (inst.opf) {
529 	case edge8:
530 	case edge8l:
531 	case edge16:
532 	case edge16l:
533 	case edge32:
534 	case edge32l:
535 
536 		/* Update flags per SUBcc outcome */
537 		pregs->r_tstate &= ~((uint64_t)TSTATE_CCR_MASK
538 					<< TSTATE_CCR_SHIFT);
539 		ccr = get_subcc_ccr(addrl, addrr);  /* get subcc cond. codes */
540 		pregs->r_tstate |= ((uint64_t)ccr << TSTATE_CCR_SHIFT);
541 
542 		break;
543 	}
544 	return (ftt);
545 }
546 
547 /*
548  * Simulator for three dimentional array addressing instructions.
549  */
550 static enum ftt_type
551 vis_array(
552 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
553 	vis_inst_type	inst,	/* FPU instruction to simulate. */
554 	struct regs	*pregs,	/* Pointer to PCB image of registers. */
555 	void		*prw)	/* Pointer to locals and ins. */
556 
557 {
558 	uint_t	nrs1, nrs2, nrd;	/* Register number fields. */
559 	enum ftt_type ftt;
560 	uint64_t laddr, bsize, baddr;
561 	uint64_t nbit;
562 	int oy, oz;
563 
564 	nrs1 = inst.rs1;
565 	nrs2 = inst.rs2;
566 	nrd = inst.rd;
567 
568 	ftt = read_iureg(pfpsd, nrs1, pregs, prw, &laddr);
569 	if (ftt != ftt_none)
570 		return (ftt);
571 	ftt = read_iureg(pfpsd, nrs2, pregs, prw, &bsize);
572 	if (ftt != ftt_none)
573 		return (ftt);
574 
575 	if (bsize > 5) {
576 		bsize = 5;
577 	}
578 	nbit = (1 << bsize) - 1;	/* Number of bits for XY<6+n-1:6> */
579 	oy = 17 + bsize;		/* Offset of Y<6+n-1:6> */
580 	oz = 17 + 2 * bsize;		/* Offset of Z<8:5> */
581 
582 	baddr = 0;
583 	baddr |= (laddr >> (11 -  0)) & (0x03 <<  0);	/* X_integer<1:0> */
584 	baddr |= (laddr >> (33 -  2)) & (0x03 <<  2);	/* Y_integer<1:0> */
585 	baddr |= (laddr >> (55 -  4)) & (0x01 <<  4);	/* Z_integer<0>   */
586 	baddr |= (laddr >> (13 -  5)) & (0x0f <<  5);	/* X_integer<5:2> */
587 	baddr |= (laddr >> (35 -  9)) & (0x0f <<  9);	/* Y_integer<5:2> */
588 	baddr |= (laddr >> (56 - 13)) & (0x0f << 13);	/* Z_integer<4:1> */
589 	baddr |= (laddr >> (17 - 17)) & (nbit << 17);	/* X_integer<6+n-1:6> */
590 	baddr |= (laddr >> (39 - oy)) & (nbit << oy);	/* Y_integer<6+n-1:6> */
591 	baddr |= (laddr >> (60 - oz)) & (0x0f << oz);	/* Z_integer<8:5> */
592 
593 	switch (inst.opf) {
594 	case array8:
595 		VISINFO_KSTAT(vis_array8);
596 		break;
597 	case array16:
598 		VISINFO_KSTAT(vis_array16);
599 		baddr <<= 1;
600 		break;
601 	case array32:
602 		VISINFO_KSTAT(vis_array32);
603 		baddr <<= 2;
604 		break;
605 	}
606 
607 	ftt = write_iureg(pfpsd, nrd, pregs, prw, &baddr);
608 
609 	return (ftt);
610 }
611 
612 /*
613  * Simulator for alignaddr and alignaddrl instructions.
614  */
615 static enum ftt_type
616 vis_alignaddr(
617 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
618 	vis_inst_type	inst,	/* FPU instruction to simulate. */
619 	struct regs	*pregs,	/* Pointer to PCB image of registers. */
620 	void		*prw,	/* Pointer to locals and ins. */
621 	kfpu_t		*fp)	/* Need to fp to access gsr reg */
622 {
623 	uint_t	nrs1, nrs2, nrd;	/* Register number fields. */
624 	enum ftt_type ftt;
625 	uint64_t ea, tea, g, r;
626 	short s;
627 
628 	nrs1 = inst.rs1;
629 	nrs2 = inst.rs2;
630 	nrd = inst.rd;
631 
632 	ftt = read_iureg(pfpsd, nrs1, pregs, prw, &ea);
633 	if (ftt != ftt_none)
634 		return (ftt);
635 	ftt = read_iureg(pfpsd, nrs2, pregs, prw, &tea);
636 	if (ftt != ftt_none)
637 		return (ftt);
638 	ea += tea;
639 	r = ea & ~0x7;	/* zero least 3 significant bits */
640 	ftt = write_iureg(pfpsd, nrd, pregs, prw, &r);
641 
642 
643 	g = pfpsd->fp_current_read_gsr(fp);
644 	g &= ~(GSR_ALIGN_MASK);		/* zero the align offset */
645 	r = ea & 0x7;
646 	if (inst.opf == alignaddrl) {
647 		s = (short)(~r);	/* 2's complement for alignaddrl */
648 		if (s < 0)
649 			r = (uint64_t)((s + 1) & 0x7);
650 		else
651 			r = (uint64_t)(s & 0x7);
652 	}
653 	g |= (r << GSR_ALIGN_SHIFT) & GSR_ALIGN_MASK;
654 	pfpsd->fp_current_write_gsr(g, fp);
655 
656 	return (ftt);
657 }
658 
659 /*
660  * Simulator for bmask instruction.
661  */
662 static enum ftt_type
663 vis_bmask(
664 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
665 	vis_inst_type	inst,	/* FPU instruction to simulate. */
666 	struct regs	*pregs,	/* Pointer to PCB image of registers. */
667 	void		*prw,	/* Pointer to locals and ins. */
668 	kfpu_t		*fp)	/* Need to fp to access gsr reg */
669 {
670 	uint_t	nrs1, nrs2, nrd;	/* Register number fields. */
671 	enum ftt_type ftt;
672 	uint64_t ea, tea, g;
673 
674 	VISINFO_KSTAT(vis_bmask);
675 	nrs1 = inst.rs1;
676 	nrs2 = inst.rs2;
677 	nrd = inst.rd;
678 
679 	ftt = read_iureg(pfpsd, nrs1, pregs, prw, &ea);
680 	if (ftt != ftt_none)
681 		return (ftt);
682 	ftt = read_iureg(pfpsd, nrs2, pregs, prw, &tea);
683 	if (ftt != ftt_none)
684 		return (ftt);
685 	ea += tea;
686 	ftt = write_iureg(pfpsd, nrd, pregs, prw, &ea);
687 
688 	g = pfpsd->fp_current_read_gsr(fp);
689 	g &= ~(GSR_MASK_MASK);		/* zero the mask offset */
690 
691 	/* Put the least significant 32 bits of ea in GSR.mask */
692 	g |= (ea << GSR_MASK_SHIFT) & GSR_MASK_MASK;
693 	pfpsd->fp_current_write_gsr(g, fp);
694 	return (ftt);
695 }
696 
697 /*
698  * Simulator for fp[add|sub]* instruction.
699  */
700 static enum ftt_type
701 vis_fpaddsub(
702 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
703 	vis_inst_type	inst)	/* FPU instruction to simulate. */
704 {
705 	uint_t	nrs1, nrs2, nrd;	/* Register number fields. */
706 	union {
707 		uint64_t	ll;
708 		uint32_t	i[2];
709 		uint16_t	s[4];
710 	} lrs1, lrs2, lrd;
711 	union {
712 		uint32_t	i;
713 		uint16_t	s[2];
714 	} krs1, krs2, krd;
715 	int i;
716 
717 	nrs1 = inst.rs1;
718 	nrs2 = inst.rs2;
719 	nrd = inst.rd;
720 	if ((inst.opf & 1) == 0) {	/* double precision */
721 		if ((nrs1 & 1) == 1) 	/* fix register encoding */
722 			nrs1 = (nrs1 & 0x1e) | 0x20;
723 		if ((nrs2 & 1) == 1)
724 			nrs2 = (nrs2 & 0x1e) | 0x20;
725 		if ((nrd & 1) == 1)
726 			nrd = (nrd & 0x1e) | 0x20;
727 	}
728 	switch (inst.opf) {
729 	case fpadd16:
730 		_fp_unpack_extword(pfpsd, &lrs1.ll, nrs1);
731 		_fp_unpack_extword(pfpsd, &lrs2.ll, nrs2);
732 		for (i = 0; i <= 3; i++) {
733 			lrd.s[i] = lrs1.s[i] + lrs2.s[i];
734 		}
735 		_fp_pack_extword(pfpsd, &lrd.ll, nrd);
736 		break;
737 	case fpadd16s:
738 		_fp_unpack_word(pfpsd, &krs1.i, nrs1);
739 		_fp_unpack_word(pfpsd, &krs2.i, nrs2);
740 		for (i = 0; i <= 1; i++) {
741 			krd.s[i] = krs1.s[i] + krs2.s[i];
742 		}
743 		_fp_pack_word(pfpsd, &krd.i, nrd);
744 		break;
745 	case fpadd32:
746 		_fp_unpack_extword(pfpsd, &lrs1.ll, nrs1);
747 		_fp_unpack_extword(pfpsd, &lrs2.ll, nrs2);
748 		for (i = 0; i <= 1; i++) {
749 			lrd.i[i] = lrs1.i[i] + lrs2.i[i];
750 		}
751 		_fp_pack_extword(pfpsd, &lrd.ll, nrd);
752 		break;
753 	case fpadd32s:
754 		_fp_unpack_word(pfpsd, &krs1.i, nrs1);
755 		_fp_unpack_word(pfpsd, &krs2.i, nrs2);
756 		krd.i = krs1.i + krs2.i;
757 		_fp_pack_word(pfpsd, &krd.i, nrd);
758 		break;
759 	case fpsub16:
760 		_fp_unpack_extword(pfpsd, &lrs1.ll, nrs1);
761 		_fp_unpack_extword(pfpsd, &lrs2.ll, nrs2);
762 		for (i = 0; i <= 3; i++) {
763 			lrd.s[i] = lrs1.s[i] - lrs2.s[i];
764 		}
765 		_fp_pack_extword(pfpsd, &lrd.ll, nrd);
766 		break;
767 	case fpsub16s:
768 		_fp_unpack_word(pfpsd, &krs1.i, nrs1);
769 		_fp_unpack_word(pfpsd, &krs2.i, nrs2);
770 		for (i = 0; i <= 1; i++) {
771 			krd.s[i] = krs1.s[i] - krs2.s[i];
772 		}
773 		_fp_pack_word(pfpsd, &krd.i, nrd);
774 		break;
775 	case fpsub32:
776 		_fp_unpack_extword(pfpsd, &lrs1.ll, nrs1);
777 		_fp_unpack_extword(pfpsd, &lrs2.ll, nrs2);
778 		for (i = 0; i <= 1; i++) {
779 			lrd.i[i] = lrs1.i[i] - lrs2.i[i];
780 		}
781 		_fp_pack_extword(pfpsd, &lrd.ll, nrd);
782 		break;
783 	case fpsub32s:
784 		_fp_unpack_word(pfpsd, &krs1.i, nrs1);
785 		_fp_unpack_word(pfpsd, &krs2.i, nrs2);
786 		krd.i = krs1.i - krs2.i;
787 		_fp_pack_word(pfpsd, &krd.i, nrd);
788 		break;
789 	}
790 	return (ftt_none);
791 }
792 
793 /*
794  * Simulator for fcmp* instruction.
795  */
796 static enum ftt_type
797 vis_fcmp(
798 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
799 	vis_inst_type	inst,	/* FPU instruction to simulate. */
800 	struct regs	*pregs,	/* Pointer to PCB image of registers. */
801 	void		*prw)	/* Pointer to locals and ins. */
802 {
803 	uint_t	nrs1, nrs2, nrd;	/* Register number fields. */
804 	union {
805 		uint64_t	ll;
806 		uint32_t	i[2];
807 		uint16_t	s[4];
808 	} krs1, krs2, krd;
809 	enum ftt_type ftt;
810 	short sr1, sr2;
811 	int i, ir1, ir2;
812 
813 	nrs1 = inst.rs1;
814 	nrs2 = inst.rs2;
815 	nrd = inst.rd;
816 	krd.ll = 0;
817 	if ((nrs1 & 1) == 1) 	/* fix register encoding */
818 		nrs1 = (nrs1 & 0x1e) | 0x20;
819 	if ((nrs2 & 1) == 1)
820 		nrs2 = (nrs2 & 0x1e) | 0x20;
821 
822 	_fp_unpack_extword(pfpsd, &krs1.ll, nrs1);
823 	_fp_unpack_extword(pfpsd, &krs2.ll, nrs2);
824 	switch (inst.opf) {
825 	case fcmple16:
826 		VISINFO_KSTAT(vis_fcmple16);
827 		for (i = 0; i <= 3; i++) {
828 			sr1 = (short)krs1.s[i];
829 			sr2 = (short)krs2.s[i];
830 			if (sr1 <= sr2)
831 				krd.ll += (0x8 >> i);
832 		}
833 		break;
834 	case fcmpne16:
835 		VISINFO_KSTAT(vis_fcmpne16);
836 		for (i = 0; i <= 3; i++) {
837 			sr1 = (short)krs1.s[i];
838 			sr2 = (short)krs2.s[i];
839 			if (sr1 != sr2)
840 				krd.ll += (0x8 >> i);
841 		}
842 		break;
843 	case fcmpgt16:
844 		VISINFO_KSTAT(vis_fcmpgt16);
845 		for (i = 0; i <= 3; i++) {
846 			sr1 = (short)krs1.s[i];
847 			sr2 = (short)krs2.s[i];
848 			if (sr1 > sr2)
849 				krd.ll += (0x8 >> i);
850 		}
851 		break;
852 	case fcmpeq16:
853 		VISINFO_KSTAT(vis_fcmpeq16);
854 		for (i = 0; i <= 3; i++) {
855 			sr1 = (short)krs1.s[i];
856 			sr2 = (short)krs2.s[i];
857 			if (sr1 == sr2)
858 				krd.ll += (0x8 >> i);
859 		}
860 		break;
861 	case fcmple32:
862 		VISINFO_KSTAT(vis_fcmple32);
863 		for (i = 0; i <= 1; i++) {
864 			ir1 = (int)krs1.i[i];
865 			ir2 = (int)krs2.i[i];
866 			if (ir1 <= ir2)
867 				krd.ll += (0x2 >> i);
868 		}
869 		break;
870 	case fcmpne32:
871 		VISINFO_KSTAT(vis_fcmpne32);
872 		for (i = 0; i <= 1; i++) {
873 			ir1 = (int)krs1.i[i];
874 			ir2 = (int)krs2.i[i];
875 			if (ir1 != ir2)
876 				krd.ll += (0x2 >> i);
877 		}
878 		break;
879 	case fcmpgt32:
880 		VISINFO_KSTAT(vis_fcmpgt32);
881 		for (i = 0; i <= 1; i++) {
882 			ir1 = (int)krs1.i[i];
883 			ir2 = (int)krs2.i[i];
884 			if (ir1 > ir2)
885 				krd.ll += (0x2 >> i);
886 		}
887 		break;
888 	case fcmpeq32:
889 		VISINFO_KSTAT(vis_fcmpeq32);
890 		for (i = 0; i <= 1; i++) {
891 			ir1 = (int)krs1.i[i];
892 			ir2 = (int)krs2.i[i];
893 			if (ir1 == ir2)
894 				krd.ll += (0x2 >> i);
895 		}
896 		break;
897 	}
898 	ftt = write_iureg(pfpsd, nrd, pregs, prw, &krd.ll);
899 	return (ftt);
900 }
901 
902 /*
903  * Simulator for fmul* instruction.
904  */
905 static enum ftt_type
906 vis_fmul(
907 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
908 	vis_inst_type	inst)	/* FPU instruction to simulate. */
909 {
910 	uint_t	nrs1, nrs2, nrd;	/* Register number fields. */
911 	union {
912 		uint64_t	ll;
913 		uint32_t	i[2];
914 		uint16_t	s[4];
915 		uint8_t		c[8];
916 	} lrs1, lrs2, lrd;
917 	union {
918 		uint32_t	i;
919 		uint16_t	s[2];
920 		uint8_t		c[4];
921 	} krs1, krs2, kres;
922 	short s1, s2, sres;
923 	ushort_t us1;
924 	char c1;
925 	int i;
926 
927 	nrs1 = inst.rs1;
928 	nrs2 = inst.rs2;
929 	nrd = inst.rd;
930 	if ((inst.opf & 1) == 0) {	/* double precision */
931 		if ((nrd & 1) == 1) 	/* fix register encoding */
932 			nrd = (nrd & 0x1e) | 0x20;
933 	}
934 
935 	switch (inst.opf) {
936 	case fmul8x16:
937 		VISINFO_KSTAT(vis_fmul8x16);
938 		_fp_unpack_word(pfpsd, &krs1.i, nrs1);
939 		if ((nrs2 & 1) == 1)
940 			nrs2 = (nrs2 & 0x1e) | 0x20;
941 		_fp_unpack_extword(pfpsd, &lrs2.ll, nrs2);
942 		for (i = 0; i <= 3; i++) {
943 			us1 = (ushort_t)krs1.c[i];
944 			s2 = (short)lrs2.s[i];
945 			kres.i = us1 * s2;
946 			sres = (short)((kres.c[1] << 8) | kres.c[2]);
947 			if (kres.c[3] >= 0x80)
948 				sres++;
949 			lrd.s[i] = sres;
950 		}
951 		_fp_pack_extword(pfpsd, &lrd.ll, nrd);
952 		break;
953 	case fmul8x16au:
954 		VISINFO_KSTAT(vis_fmul8x16au);
955 		_fp_unpack_word(pfpsd, &krs1.i, nrs1);
956 		_fp_unpack_word(pfpsd, &krs2.i, nrs2);
957 		for (i = 0; i <= 3; i++) {
958 			us1 = (ushort_t)krs1.c[i];
959 			s2 = (short)krs2.s[0];
960 			kres.i = us1 * s2;
961 			sres = (short)((kres.c[1] << 8) | kres.c[2]);
962 			if (kres.c[3] >= 0x80)
963 				sres++;
964 			lrd.s[i] = sres;
965 		}
966 		_fp_pack_extword(pfpsd, &lrd.ll, nrd);
967 		break;
968 	case fmul8x16al:
969 		VISINFO_KSTAT(vis_fmul8x16al);
970 		_fp_unpack_word(pfpsd, &krs1.i, nrs1);
971 		_fp_unpack_word(pfpsd, &krs2.i, nrs2);
972 		for (i = 0; i <= 3; i++) {
973 			us1 = (ushort_t)krs1.c[i];
974 			s2 = (short)krs2.s[1];
975 			kres.i = us1 * s2;
976 			sres = (short)((kres.c[1] << 8) | kres.c[2]);
977 			if (kres.c[3] >= 0x80)
978 				sres++;
979 			lrd.s[i] = sres;
980 		}
981 		_fp_pack_extword(pfpsd, &lrd.ll, nrd);
982 		break;
983 	case fmul8sux16:
984 		VISINFO_KSTAT(vis_fmul8sux16);
985 		if ((nrs1 & 1) == 1) 	/* fix register encoding */
986 			nrs1 = (nrs1 & 0x1e) | 0x20;
987 		_fp_unpack_extword(pfpsd, &lrs1.ll, nrs1);
988 		if ((nrs2 & 1) == 1)
989 			nrs2 = (nrs2 & 0x1e) | 0x20;
990 		_fp_unpack_extword(pfpsd, &lrs2.ll, nrs2);
991 		for (i = 0; i <= 3; i++) {
992 			c1 = lrs1.c[(i*2)];
993 			s1 = (short)c1;		/* keeps the sign alive */
994 			s2 = (short)lrs2.s[i];
995 			kres.i = s1 * s2;
996 			sres = (short)((kres.c[1] << 8) | kres.c[2]);
997 			if (kres.c[3] >= 0x80)
998 				sres++;
999 			if (sres < 0)
1000 				lrd.s[i] = (sres & 0xFFFF);
1001 			else
1002 				lrd.s[i] = sres;
1003 		}
1004 		_fp_pack_extword(pfpsd, &lrd.ll, nrd);
1005 		break;
1006 	case fmul8ulx16:
1007 		VISINFO_KSTAT(vis_fmul8ulx16);
1008 		if ((nrs1 & 1) == 1) 	/* fix register encoding */
1009 			nrs1 = (nrs1 & 0x1e) | 0x20;
1010 		_fp_unpack_extword(pfpsd, &lrs1.ll, nrs1);
1011 		if ((nrs2 & 1) == 1)
1012 			nrs2 = (nrs2 & 0x1e) | 0x20;
1013 		_fp_unpack_extword(pfpsd, &lrs2.ll, nrs2);
1014 		for (i = 0; i <= 3; i++) {
1015 			us1 = (ushort_t)lrs1.c[(i*2)+1];
1016 			s2 = (short)lrs2.s[i];
1017 			kres.i = us1 * s2;
1018 			sres = (short)kres.s[0];
1019 			if (kres.s[1] >= 0x8000)
1020 				sres++;
1021 			lrd.s[i] = sres;
1022 		}
1023 		_fp_pack_extword(pfpsd, &lrd.ll, nrd);
1024 		break;
1025 	case fmuld8sux16:
1026 		VISINFO_KSTAT(vis_fmuld8sux16);
1027 		_fp_unpack_word(pfpsd, &krs1.i, nrs1);
1028 		_fp_unpack_word(pfpsd, &krs2.i, nrs2);
1029 		for (i = 0; i <= 1; i++) {
1030 			c1 = krs1.c[(i*2)];
1031 			s1 = (short)c1;		/* keeps the sign alive */
1032 			s2 = (short)krs2.s[i];
1033 			kres.i = s1 * s2;
1034 			lrd.i[i] = kres.i << 8;
1035 		}
1036 		_fp_pack_extword(pfpsd, &lrd.ll, nrd);
1037 		break;
1038 	case fmuld8ulx16:
1039 		VISINFO_KSTAT(vis_fmuld8ulx16);
1040 		_fp_unpack_word(pfpsd, &krs1.i, nrs1);
1041 		_fp_unpack_word(pfpsd, &krs2.i, nrs2);
1042 		for (i = 0; i <= 1; i++) {
1043 			us1 = (ushort_t)krs1.c[(i*2)+1];
1044 			s2 = (short)krs2.s[i];
1045 			lrd.i[i] = us1 * s2;
1046 		}
1047 		_fp_pack_extword(pfpsd, &lrd.ll, nrd);
1048 		break;
1049 	}
1050 	return (ftt_none);
1051 }
1052 
1053 /*
1054  * Simulator for fpixel formatting instructions.
1055  */
1056 static enum ftt_type
1057 vis_fpixel(
1058 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
1059 	vis_inst_type	inst,	/* FPU instruction to simulate. */
1060 	kfpu_t		*fp)	/* Need to fp to access gsr reg */
1061 {
1062 	uint_t	nrs1, nrs2, nrd;	/* Register number fields. */
1063 	int	i, j, k, sf;
1064 	union {
1065 		uint64_t	ll;
1066 		uint32_t	i[2];
1067 		uint16_t	s[4];
1068 		uint8_t		c[8];
1069 	} lrs1, lrs2, lrd;
1070 	union {
1071 		uint32_t	i;
1072 		uint16_t	s[2];
1073 		uint8_t		c[4];
1074 	} krs1, krs2, krd;
1075 	uint64_t r;
1076 	int64_t l, m;
1077 	short s;
1078 	uchar_t uc;
1079 
1080 	nrs1 = inst.rs1;
1081 	nrs2 = inst.rs2;
1082 	nrd = inst.rd;
1083 	if ((inst.opf != fpack16) && (inst.opf != fpackfix)) {
1084 		if ((nrd & 1) == 1) 	/* fix register encoding */
1085 			nrd = (nrd & 0x1e) | 0x20;
1086 	}
1087 
1088 	switch (inst.opf) {
1089 	case fpack16:
1090 		VISINFO_KSTAT(vis_fpack16);
1091 		if ((nrs2 & 1) == 1) 	/* fix register encoding */
1092 			nrs2 = (nrs2 & 0x1e) | 0x20;
1093 		_fp_unpack_extword(pfpsd, &lrs2.ll, nrs2);
1094 		r = pfpsd->fp_current_read_gsr(fp);
1095 		/* fpack16 ignores GSR.scale msb */
1096 		sf = (int)(GSR_SCALE(r) & 0xf);
1097 		for (i = 0; i <= 3; i++) {
1098 			s = (short)lrs2.s[i];	/* preserve the sign */
1099 			j = ((int)s << sf);
1100 			k = j >> 7;
1101 			if (k < 0) {
1102 				uc = 0;
1103 			} else if (k > 255) {
1104 				uc = 255;
1105 			} else {
1106 				uc = (uchar_t)k;
1107 			}
1108 			krd.c[i] = uc;
1109 		}
1110 		_fp_pack_word(pfpsd, &krd.i, nrd);
1111 		break;
1112 	case fpack32:
1113 		VISINFO_KSTAT(vis_fpack32);
1114 		if ((nrs1 & 1) == 1) 	/* fix register encoding */
1115 			nrs1 = (nrs1 & 0x1e) | 0x20;
1116 		_fp_unpack_extword(pfpsd, &lrs1.ll, nrs1);
1117 		if ((nrs2 & 1) == 1)
1118 			nrs2 = (nrs2 & 0x1e) | 0x20;
1119 		_fp_unpack_extword(pfpsd, &lrs2.ll, nrs2);
1120 
1121 		r = pfpsd->fp_current_read_gsr(fp);
1122 		sf = (int)GSR_SCALE(r);
1123 		lrd.ll = lrs1.ll << 8;
1124 		for (i = 0, k = 3; i <= 1; i++, k += 4) {
1125 			j = (int)lrs2.i[i];	/* preserve the sign */
1126 			l = ((int64_t)j << sf);
1127 			m = l >> 23;
1128 			if (m < 0) {
1129 				uc = 0;
1130 			} else if (m > 255) {
1131 				uc = 255;
1132 			} else {
1133 				uc = (uchar_t)m;
1134 			}
1135 			lrd.c[k] = uc;
1136 		}
1137 		_fp_pack_extword(pfpsd, &lrd.ll, nrd);
1138 		break;
1139 	case fpackfix:
1140 		VISINFO_KSTAT(vis_fpackfix);
1141 		if ((nrs2 & 1) == 1)
1142 			nrs2 = (nrs2 & 0x1e) | 0x20;
1143 		_fp_unpack_extword(pfpsd, &lrs2.ll, nrs2);
1144 
1145 		r = pfpsd->fp_current_read_gsr(fp);
1146 		sf = (int)GSR_SCALE(r);
1147 		for (i = 0; i <= 1; i++) {
1148 			j = (int)lrs2.i[i];	/* preserve the sign */
1149 			l = ((int64_t)j << sf);
1150 			m = l >> 16;
1151 			if (m < -32768) {
1152 				s = -32768;
1153 			} else if (m > 32767) {
1154 				s = 32767;
1155 			} else {
1156 				s = (short)m;
1157 			}
1158 			krd.s[i] = s;
1159 		}
1160 		_fp_pack_word(pfpsd, &krd.i, nrd);
1161 		break;
1162 	case fexpand:
1163 		VISINFO_KSTAT(vis_fexpand);
1164 		_fp_unpack_word(pfpsd, &krs2.i, nrs2);
1165 		for (i = 0; i <= 3; i++) {
1166 			uc = krs2.c[i];
1167 			lrd.s[i] = (ushort_t)(uc << 4);
1168 		}
1169 		_fp_pack_extword(pfpsd, &lrd.ll, nrd);
1170 		break;
1171 	case fpmerge:
1172 		VISINFO_KSTAT(vis_fpmerge);
1173 		_fp_unpack_word(pfpsd, &krs1.i, nrs1);
1174 		_fp_unpack_word(pfpsd, &krs2.i, nrs2);
1175 		for (i = 0, j = 0; i <= 3; i++, j += 2) {
1176 			lrd.c[j] = krs1.c[i];
1177 			lrd.c[j+1] = krs2.c[i];
1178 		}
1179 		_fp_pack_extword(pfpsd, &lrd.ll, nrd);
1180 		break;
1181 	}
1182 	return (ftt_none);
1183 }
1184 
1185 /*
1186  * Simulator for pdist instruction.
1187  */
1188 enum ftt_type
1189 vis_pdist(
1190 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
1191 	fp_inst_type	pinst)	/* FPU instruction to simulate. */
1192 {
1193 	uint_t	nrs1, nrs2, nrd;	/* Register number fields. */
1194 	int	i;
1195 	short	s;
1196 	union {
1197 		uint64_t	ll;
1198 		uint8_t		c[8];
1199 	} lrs1, lrs2, lrd;
1200 
1201 	nrs1 = pinst.rs1;
1202 	nrs2 = pinst.rs2;
1203 	nrd = pinst.rd;
1204 	VISINFO_KSTAT(vis_pdist);
1205 	if ((nrs1 & 1) == 1) 		/* fix register encoding */
1206 		nrs1 = (nrs1 & 0x1e) | 0x20;
1207 	if ((nrs2 & 1) == 1)
1208 		nrs2 = (nrs2 & 0x1e) | 0x20;
1209 	if ((nrd & 1) == 1)
1210 		nrd = (nrd & 0x1e) | 0x20;
1211 
1212 	_fp_unpack_extword(pfpsd, &lrs1.ll, nrs1);
1213 	_fp_unpack_extword(pfpsd, &lrs2.ll, nrs2);
1214 	_fp_unpack_extword(pfpsd, &lrd.ll, nrd);
1215 
1216 	for (i = 0; i <= 7; i++) {
1217 		s = (short)(lrs1.c[i] - lrs2.c[i]);
1218 		if (s < 0)
1219 			s = ~s + 1;
1220 		lrd.ll += s;
1221 	}
1222 
1223 	_fp_pack_extword(pfpsd, &lrd.ll, nrd);
1224 	return (ftt_none);
1225 }
1226 
1227 /*
1228  * Simulator for faligndata instruction.
1229  */
1230 static enum ftt_type
1231 vis_faligndata(
1232 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
1233 	fp_inst_type	pinst,	/* FPU instruction to simulate. */
1234 	kfpu_t		*fp)	/* Need to fp to access gsr reg */
1235 {
1236 	uint_t	nrs1, nrs2, nrd;	/* Register number fields. */
1237 	int	i, j, k, ao;
1238 	union {
1239 		uint64_t	ll;
1240 		uint8_t		c[8];
1241 	} lrs1, lrs2, lrd;
1242 	uint64_t r;
1243 
1244 	nrs1 = pinst.rs1;
1245 	nrs2 = pinst.rs2;
1246 	nrd = pinst.rd;
1247 	if ((nrs1 & 1) == 1) 		/* fix register encoding */
1248 		nrs1 = (nrs1 & 0x1e) | 0x20;
1249 	if ((nrs2 & 1) == 1)
1250 		nrs2 = (nrs2 & 0x1e) | 0x20;
1251 	if ((nrd & 1) == 1)
1252 		nrd = (nrd & 0x1e) | 0x20;
1253 
1254 	_fp_unpack_extword(pfpsd, &lrs1.ll, nrs1);
1255 	_fp_unpack_extword(pfpsd, &lrs2.ll, nrs2);
1256 
1257 	r = pfpsd->fp_current_read_gsr(fp);
1258 	ao = (int)GSR_ALIGN(r);
1259 
1260 	for (i = 0, j = ao, k = 0; i <= 7; i++)
1261 		if (j <= 7) {
1262 			lrd.c[i] = lrs1.c[j++];
1263 		} else {
1264 			lrd.c[i] = lrs2.c[k++];
1265 		}
1266 	_fp_pack_extword(pfpsd, &lrd.ll, nrd);
1267 
1268 	return (ftt_none);
1269 }
1270 
1271 /*
1272  * Simulator for bshuffle instruction.
1273  */
1274 static enum ftt_type
1275 vis_bshuffle(
1276 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
1277 	fp_inst_type	pinst,	/* FPU instruction to simulate. */
1278 	kfpu_t		*fp)	/* Need to fp to access gsr reg */
1279 {
1280 	uint_t	nrs1, nrs2, nrd;	/* Register number fields. */
1281 	int	i, j, ao;
1282 	union {
1283 		uint64_t	ll;
1284 		uint8_t		c[8];
1285 	} lrs1, lrs2, lrd;
1286 	uint64_t r;
1287 
1288 	VISINFO_KSTAT(vis_bshuffle);
1289 	nrs1 = pinst.rs1;
1290 	nrs2 = pinst.rs2;
1291 	nrd = pinst.rd;
1292 	if ((nrs1 & 1) == 1) 		/* fix register encoding */
1293 		nrs1 = (nrs1 & 0x1e) | 0x20;
1294 	if ((nrs2 & 1) == 1)
1295 		nrs2 = (nrs2 & 0x1e) | 0x20;
1296 	if ((nrd & 1) == 1)
1297 		nrd = (nrd & 0x1e) | 0x20;
1298 
1299 	_fp_unpack_extword(pfpsd, &lrs1.ll, nrs1);
1300 	_fp_unpack_extword(pfpsd, &lrs2.ll, nrs2);
1301 
1302 	r = pfpsd->fp_current_read_gsr(fp);
1303 	ao = (int)GSR_MASK(r);
1304 
1305 	/*
1306 	 * BSHUFFLE Destination Byte Selection
1307 	 * rd Byte	Source
1308 	 * 0		rs byte[GSR.mask<31..28>]
1309 	 * 1		rs byte[GSR.mask<27..24>]
1310 	 * 2		rs byte[GSR.mask<23..20>]
1311 	 * 3		rs byte[GSR.mask<19..16>]
1312 	 * 4		rs byte[GSR.mask<15..12>]
1313 	 * 5		rs byte[GSR.mask<11..8>]
1314 	 * 6		rs byte[GSR.mask<7..4>]
1315 	 * 7		rs byte[GSR.mask<3..0>]
1316 	 * P.S. rs1 is the upper half and rs2 is the lower half
1317 	 * Bytes in the source value are numbered from most to
1318 	 * least significant
1319 	 */
1320 	for (i = 7; i >= 0; i--, ao = (ao >> 4)) {
1321 		j = ao & 0xf;		/* get byte number */
1322 		if (j < 8) {
1323 			lrd.c[i] = lrs1.c[j];
1324 		} else {
1325 			lrd.c[i] = lrs2.c[j - 8];
1326 		}
1327 	}
1328 	_fp_pack_extword(pfpsd, &lrd.ll, nrd);
1329 
1330 	return (ftt_none);
1331 }
1332 
1333 /*
1334  * Simulator for siam instruction.
1335  */
1336 static enum ftt_type
1337 vis_siam(
1338 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
1339 	vis_inst_type	inst,	/* FPU instruction to simulate. */
1340 	kfpu_t		*fp)	/* Need to fp to access gsr reg */
1341 {
1342 	uint_t	nrs2;			/* Register number fields. */
1343 	uint64_t g, r;
1344 	nrs2 = inst.rs2;
1345 
1346 	g = pfpsd->fp_current_read_gsr(fp);
1347 	g &= ~(GSR_IM_IRND_MASK);	/* zero the IM and IRND fields */
1348 	r = nrs2 & 0x7;			/* get mode(3 bit) */
1349 	g |= (r << GSR_IRND_SHIFT);
1350 	pfpsd->fp_current_write_gsr(g, fp);
1351 	return (ftt_none);
1352 }
1353 
1354 /*
1355  * Simulator for VIS loads and stores between floating-point unit and memory.
1356  */
1357 enum ftt_type
1358 vis_fldst(
1359 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
1360 	fp_inst_type	pinst,	/* FPU instruction to simulate. */
1361 	struct regs	*pregs,	/* Pointer to PCB image of registers. */
1362 	void		*prw,	/* Pointer to locals and ins. */
1363 	uint_t		asi)	/* asi to emulate! */
1364 {
1365 	union {
1366 		vis_inst_type	inst;
1367 		fp_inst_type	pinst;
1368 	} i;
1369 
1370 	ASSERT(USERMODE(pregs->r_tstate));
1371 	i.pinst = pinst;
1372 	switch (asi) {
1373 		case ASI_PST8_P:
1374 		case ASI_PST8_S:
1375 		case ASI_PST16_P:
1376 		case ASI_PST16_S:
1377 		case ASI_PST32_P:
1378 		case ASI_PST32_S:
1379 		case ASI_PST8_PL:
1380 		case ASI_PST8_SL:
1381 		case ASI_PST16_PL:
1382 		case ASI_PST16_SL:
1383 		case ASI_PST32_PL:
1384 		case ASI_PST32_SL:
1385 			return (vis_prtl_fst(pfpsd, i.inst, pregs,
1386 				prw, asi));
1387 		case ASI_FL8_P:
1388 		case ASI_FL8_S:
1389 		case ASI_FL8_PL:
1390 		case ASI_FL8_SL:
1391 		case ASI_FL16_P:
1392 		case ASI_FL16_S:
1393 		case ASI_FL16_PL:
1394 		case ASI_FL16_SL:
1395 			return (vis_short_fls(pfpsd, i.inst, pregs,
1396 				prw, asi));
1397 		case ASI_BLK_AIUP:
1398 		case ASI_BLK_AIUS:
1399 		case ASI_BLK_AIUPL:
1400 		case ASI_BLK_AIUSL:
1401 		case ASI_BLK_P:
1402 		case ASI_BLK_S:
1403 		case ASI_BLK_PL:
1404 		case ASI_BLK_SL:
1405 		case ASI_BLK_COMMIT_P:
1406 		case ASI_BLK_COMMIT_S:
1407 			return (vis_blk_fldst(pfpsd, i.inst, pregs,
1408 				prw, asi));
1409 		default:
1410 			return (ftt_unimplemented);
1411 	}
1412 }
1413 
1414 /*
1415  * Simulator for partial stores between floating-point unit and memory.
1416  */
1417 static enum ftt_type
1418 vis_prtl_fst(
1419 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
1420 	vis_inst_type	inst,	/* ISE instruction to simulate. */
1421 	struct regs	*pregs,	/* Pointer to PCB image of registers. */
1422 	void		*prw,	/* Pointer to locals and ins. */
1423 	uint_t		asi)	/* asi to emulate! */
1424 {
1425 	uint_t	nrs1, nrs2, nrd;	/* Register number fields. */
1426 	uint_t	opf, msk;
1427 	int	h, i, j;
1428 	uint64_t ea, tmsk;
1429 	union {
1430 		freg_type	f;
1431 		uint64_t	ll;
1432 		uint32_t	i[2];
1433 		uint16_t	s[4];
1434 		uint8_t		c[8];
1435 	} k, l, res;
1436 	enum ftt_type   ftt;
1437 
1438 	nrs1 = inst.rs1;
1439 	nrs2 = inst.rs2;
1440 	nrd = inst.rd;
1441 	if ((nrd & 1) == 1) 		/* fix register encoding */
1442 		nrd = (nrd & 0x1e) | 0x20;
1443 	opf = inst.opf;
1444 	res.ll = 0;
1445 	if ((opf & 0x100) == 0) {	/* effective address = rs1  */
1446 		ftt = read_iureg(pfpsd, nrs1, pregs, prw, &ea);
1447 		if (ftt != ftt_none)
1448 			return (ftt);
1449 		ftt = read_iureg(pfpsd, nrs2, pregs, prw, &tmsk);
1450 		if (ftt != ftt_none)
1451 			return (ftt);
1452 		msk = (uint_t)tmsk;
1453 	} else {
1454 		pfpsd->fp_trapaddr = (caddr_t)pregs->r_pc;
1455 		return (ftt_unimplemented);
1456 	}
1457 
1458 	pfpsd->fp_trapaddr = (caddr_t)ea; /* setup bad addr in case we trap */
1459 	if ((ea & 0x3) != 0)
1460 		return (ftt_alignment);	/* Require 32 bit-alignment. */
1461 
1462 	switch (asi) {
1463 	case ASI_PST8_P:
1464 	case ASI_PST8_S:
1465 		ftt = _fp_read_extword((uint64_t *)ea, &l.ll, pfpsd);
1466 		if (ftt != ftt_none)
1467 			return (ftt);
1468 		_fp_unpack_extword(pfpsd, &k.f.FPU_DREG_FIELD, nrd);
1469 		for (i = 0, j = 0x80; i <= 7; i++, j >>= 1) {
1470 			if ((msk & j) == j)
1471 				res.c[i] = k.c[i];
1472 			else
1473 				res.c[i] = l.c[i];
1474 		}
1475 		ftt = _fp_write_extword((uint64_t *)ea, res.ll, pfpsd);
1476 		if (ftt != ftt_none)
1477 			return (ftt);
1478 		break;
1479 	case ASI_PST8_PL:	/* little-endian */
1480 	case ASI_PST8_SL:
1481 		ftt = _fp_read_extword((uint64_t *)ea, &l.ll, pfpsd);
1482 		if (ftt != ftt_none)
1483 			return (ftt);
1484 		_fp_unpack_extword(pfpsd, &k.f.FPU_DREG_FIELD, nrd);
1485 		for (h = 7, i = 0, j = 1; i <= 7; h--, i++, j <<= 1) {
1486 			if ((msk & j) == j)
1487 				res.c[i] = k.c[h];
1488 			else
1489 				res.c[i] = l.c[i];
1490 		}
1491 		ftt = _fp_write_extword((uint64_t *)ea, res.ll, pfpsd);
1492 		if (ftt != ftt_none)
1493 			return (ftt);
1494 		break;
1495 	case ASI_PST16_P:
1496 	case ASI_PST16_S:
1497 		ftt = _fp_read_extword((uint64_t *)ea, &l.ll, pfpsd);
1498 		if (ftt != ftt_none)
1499 			return (ftt);
1500 		_fp_unpack_extword(pfpsd, &k.f.FPU_DREG_FIELD, nrd);
1501 		for (i = 0, j = 0x8; i <= 3; i++, j >>= 1) {
1502 			if ((msk & j) == j)
1503 				res.s[i] = k.s[i];
1504 			else
1505 				res.s[i] = l.s[i];
1506 		}
1507 		ftt = _fp_write_extword((uint64_t *)ea, res.ll, pfpsd);
1508 		if (ftt != ftt_none)
1509 			return (ftt);
1510 		break;
1511 	case ASI_PST16_PL:
1512 	case ASI_PST16_SL:
1513 		ftt = _fp_read_extword((uint64_t *)ea, &l.ll, pfpsd);
1514 		if (ftt != ftt_none)
1515 			return (ftt);
1516 		_fp_unpack_extword(pfpsd, &k.f.FPU_DREG_FIELD, nrd);
1517 		for (h = 7, i = 0, j = 1; i <= 6; h -= 2, i += 2, j <<= 1) {
1518 			if ((msk & j) == j) {
1519 				res.c[i] = k.c[h];
1520 				res.c[i+1] = k.c[h-1];
1521 			} else {
1522 				res.c[i] = l.c[i];
1523 				res.c[i+1] = l.c[i+1];
1524 			}
1525 		}
1526 		ftt = _fp_write_extword((uint64_t *)ea, res.ll, pfpsd);
1527 		if (ftt != ftt_none)
1528 			return (ftt);
1529 		break;
1530 	case ASI_PST32_P:
1531 	case ASI_PST32_S:
1532 		ftt = _fp_read_extword((uint64_t *)ea, &l.ll, pfpsd);
1533 		if (ftt != ftt_none)
1534 			return (ftt);
1535 		_fp_unpack_extword(pfpsd, &k.f.FPU_DREG_FIELD, nrd);
1536 		for (i = 0, j = 0x2; i <= 1; i++, j >>= 1) {
1537 			if ((msk & j) == j)
1538 				res.i[i] = k.i[i];
1539 			else
1540 				res.i[i] = l.i[i];
1541 		}
1542 		ftt = _fp_write_extword((uint64_t *)ea, res.ll, pfpsd);
1543 		if (ftt != ftt_none)
1544 			return (ftt);
1545 		break;
1546 	case ASI_PST32_PL:
1547 	case ASI_PST32_SL:
1548 		ftt = _fp_read_extword((uint64_t *)ea, &l.ll, pfpsd);
1549 		if (ftt != ftt_none)
1550 			return (ftt);
1551 		_fp_unpack_extword(pfpsd, &k.f.FPU_DREG_FIELD, nrd);
1552 		for (h = 7, i = 0, j = 1; i <= 4; h -= 4, i += 4, j <<= 1) {
1553 			if ((msk & j) == j) {
1554 				res.c[i] = k.c[h];
1555 				res.c[i+1] = k.c[h-1];
1556 				res.c[i+2] = k.c[h-2];
1557 				res.c[i+3] = k.c[h-3];
1558 			} else {
1559 				res.c[i] = l.c[i];
1560 				res.c[i+1] = l.c[i+1];
1561 				res.c[i+2] = l.c[i+2];
1562 				res.c[i+3] = l.c[i+3];
1563 			}
1564 		}
1565 		ftt = _fp_write_extword((uint64_t *)ea, res.ll, pfpsd);
1566 		if (ftt != ftt_none)
1567 			return (ftt);
1568 		break;
1569 	}
1570 
1571 	pregs->r_pc = pregs->r_npc;	/* Do not retry emulated instruction. */
1572 	pregs->r_npc += 4;
1573 	return (ftt_none);
1574 }
1575 
1576 /*
1577  * Simulator for short load/stores between floating-point unit and memory.
1578  */
1579 static enum ftt_type
1580 vis_short_fls(
1581 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
1582 	vis_inst_type	inst,	/* ISE instruction to simulate. */
1583 	struct regs	*pregs,	/* Pointer to PCB image of registers. */
1584 	void		*prw,	/* Pointer to locals and ins. */
1585 	uint_t		asi)	/* asi to emulate! */
1586 {
1587 	uint_t	nrs1, nrs2, nrd;	/* Register number fields. */
1588 	uint_t	opf;
1589 	uint64_t ea, tea;
1590 	union {
1591 		freg_type	f;
1592 		uint64_t	ll;
1593 		uint32_t	i[2];
1594 		uint16_t	s[4];
1595 		uint8_t		c[8];
1596 	} k;
1597 	union {
1598 		vis_inst_type	inst;
1599 		int		i;
1600 	} fp;
1601 	enum ftt_type   ftt = ftt_none;
1602 	ushort_t us;
1603 	uchar_t uc;
1604 
1605 	nrs1 = inst.rs1;
1606 	nrs2 = inst.rs2;
1607 	nrd = inst.rd;
1608 	if ((nrd & 1) == 1) 		/* fix register encoding */
1609 		nrd = (nrd & 0x1e) | 0x20;
1610 	opf = inst.opf;
1611 	fp.inst = inst;
1612 	if ((opf & 0x100) == 0) { /* effective address = rs1 + rs2 */
1613 		ftt = read_iureg(pfpsd, nrs1, pregs, prw, &ea);
1614 		if (ftt != ftt_none)
1615 			return (ftt);
1616 		ftt = read_iureg(pfpsd, nrs2, pregs, prw, &tea);
1617 		if (ftt != ftt_none)
1618 			return (ftt);
1619 		ea += tea;
1620 	} else {	/* effective address = rs1 + imm13 */
1621 		fp.inst = inst;
1622 		ea = (fp.i << 19) >> 19;	/* Extract simm13 field. */
1623 		ftt = read_iureg(pfpsd, nrs1, pregs, prw, &tea);
1624 		if (ftt != ftt_none)
1625 			return (ftt);
1626 		ea += tea;
1627 	}
1628 	if (get_udatamodel() == DATAMODEL_ILP32)
1629 		ea = (uint64_t)(caddr32_t)ea;
1630 
1631 	pfpsd->fp_trapaddr = (caddr_t)ea; /* setup bad addr in case we trap */
1632 	switch (asi) {
1633 	case ASI_FL8_P:
1634 	case ASI_FL8_S:
1635 	case ASI_FL8_PL:		/* little-endian */
1636 	case ASI_FL8_SL:
1637 		if ((inst.op3 & 7) == 3) {	/* load byte */
1638 			if (fuword8((void *)ea, &uc) == -1)
1639 				return (ftt_fault);
1640 			k.ll = 0;
1641 			k.c[7] = uc;
1642 			_fp_pack_extword(pfpsd, &k.f.FPU_DREG_FIELD, nrd);
1643 		} else {			/* store byte */
1644 			_fp_unpack_extword(pfpsd, &k.f.FPU_DREG_FIELD, nrd);
1645 			uc = k.c[7];
1646 			if (subyte((caddr_t)ea, uc) == -1)
1647 				return (ftt_fault);
1648 		}
1649 		break;
1650 	case ASI_FL16_P:
1651 	case ASI_FL16_S:
1652 		if ((ea & 1) == 1)
1653 			return (ftt_alignment);
1654 		if ((inst.op3 & 7) == 3) {	/* load short */
1655 			if (fuword16((void *)ea, &us) == -1)
1656 				return (ftt_fault);
1657 			k.ll = 0;
1658 			k.s[3] = us;
1659 			_fp_pack_extword(pfpsd, &k.f.FPU_DREG_FIELD, nrd);
1660 		} else {			/* store short */
1661 			_fp_unpack_extword(pfpsd, &k.f.FPU_DREG_FIELD, nrd);
1662 			us = k.s[3];
1663 			if (suword16((caddr_t)ea, us) == -1)
1664 				return (ftt_fault);
1665 		}
1666 		break;
1667 	case ASI_FL16_PL:		/* little-endian */
1668 	case ASI_FL16_SL:
1669 		if ((ea & 1) == 1)
1670 			return (ftt_alignment);
1671 		if ((inst.op3 & 7) == 3) {	/* load short */
1672 			if (fuword16((void *)ea, &us) == -1)
1673 				return (ftt_fault);
1674 			k.ll = 0;
1675 			k.c[6] = (uchar_t)us;
1676 			k.c[7] = (uchar_t)((us & 0xff00) >> 8);
1677 			_fp_pack_extword(pfpsd, &k.f.FPU_DREG_FIELD, nrd);
1678 		} else {			/* store short */
1679 			_fp_unpack_extword(pfpsd, &k.f.FPU_DREG_FIELD, nrd);
1680 			uc = k.c[7];
1681 			us = (ushort_t)((uc << 8) | k.c[6]);
1682 			if (suword16((void *)ea, us) == -1)
1683 				return (ftt_fault);
1684 		}
1685 		break;
1686 	}
1687 
1688 	pregs->r_pc = pregs->r_npc;	/* Do not retry emulated instruction. */
1689 	pregs->r_npc += 4;
1690 	return (ftt_none);
1691 }
1692 
1693 /*
1694  * Simulator for block loads and stores between floating-point unit and memory.
1695  * XXX - OK, so it is really gross to flush the whole Ecache for a block commit
1696  *	 store - but the circumstances under which this code actually gets
1697  *	 used in real life are so obscure that you can live with it!
1698  */
1699 static enum ftt_type
1700 vis_blk_fldst(
1701 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
1702 	vis_inst_type	inst,	/* ISE instruction to simulate. */
1703 	struct regs	*pregs,	/* Pointer to PCB image of registers. */
1704 	void		*prw,	/* Pointer to locals and ins. */
1705 	uint_t		asi)	/* asi to emulate! */
1706 {
1707 	uint_t	nrs1, nrs2, nrd;	/* Register number fields. */
1708 	uint_t	opf, h, i, j;
1709 	uint64_t ea, tea;
1710 	union {
1711 		freg_type	f;
1712 		uint64_t	ll;
1713 		uint8_t		c[8];
1714 	} k, l;
1715 	union {
1716 		vis_inst_type	inst;
1717 		int32_t		i;
1718 	} fp;
1719 	enum ftt_type   ftt;
1720 	boolean_t little_endian = B_FALSE;
1721 
1722 	nrs1 = inst.rs1;
1723 	nrs2 = inst.rs2;
1724 	nrd = inst.rd;
1725 	if ((nrd & 1) == 1) 		/* fix register encoding */
1726 		nrd = (nrd & 0x1e) | 0x20;
1727 
1728 	/* ensure register is 8-double precision aligned */
1729 	if ((nrd & 0xf) != 0)
1730 		return (ftt_unimplemented);
1731 
1732 	opf = inst.opf;
1733 	if ((opf & 0x100) == 0) { 	/* effective address = rs1 + rs2 */
1734 		ftt = read_iureg(pfpsd, nrs1, pregs, prw, &ea);
1735 		if (ftt != ftt_none)
1736 			return (ftt);
1737 		ftt = read_iureg(pfpsd, nrs2, pregs, prw, &tea);
1738 		if (ftt != ftt_none)
1739 			return (ftt);
1740 		ea += tea;
1741 	} else {			/* effective address = rs1 + imm13 */
1742 		fp.inst = inst;
1743 		ea = (fp.i << 19) >> 19;	/* Extract simm13 field. */
1744 		ftt = read_iureg(pfpsd, nrs1, pregs, prw, &tea);
1745 		if (ftt != ftt_none)
1746 			return (ftt);
1747 		ea += tea;
1748 	}
1749 	if ((ea & 0x3F) != 0)		/* Require 64 byte-alignment. */
1750 		return (ftt_alignment);
1751 
1752 	pfpsd->fp_trapaddr = (caddr_t)ea; /* setup bad addr in case we trap */
1753 	switch (asi) {
1754 	case ASI_BLK_AIUPL:
1755 	case ASI_BLK_AIUSL:
1756 	case ASI_BLK_PL:
1757 	case ASI_BLK_SL:
1758 		little_endian = B_TRUE;
1759 		/* FALLTHROUGH */
1760 	case ASI_BLK_AIUP:
1761 	case ASI_BLK_AIUS:
1762 	case ASI_BLK_P:
1763 	case ASI_BLK_S:
1764 	case ASI_BLK_COMMIT_P:
1765 	case ASI_BLK_COMMIT_S:
1766 		if ((inst.op3 & 7) == 3) {	/* lddf */
1767 		    for (i = 0; i < 8; i++, ea += 8, nrd += 2) {
1768 			ftt = _fp_read_extword((uint64_t *)ea, &k.ll, pfpsd);
1769 			if (ftt != ftt_none)
1770 				return (ftt);
1771 			if (little_endian) {
1772 				for (j = 0, h = 7; j < 8; j++, h--)
1773 					l.c[h] = k.c[j];
1774 				k.ll = l.ll;
1775 			}
1776 			_fp_pack_extword(pfpsd, &k.f.FPU_DREG_FIELD, nrd);
1777 		    }
1778 		} else {			/* stdf */
1779 		    for (i = 0; i < 8; i++, ea += 8, nrd += 2) {
1780 			_fp_unpack_extword(pfpsd, &k.f.FPU_DREG_FIELD, nrd);
1781 			if (little_endian) {
1782 				for (j = 0, h = 7; j < 8; j++, h--)
1783 					l.c[h] = k.c[j];
1784 				k.ll = l.ll;
1785 			}
1786 			ftt = _fp_write_extword((uint64_t *)ea, k.ll, pfpsd);
1787 			if (ftt != ftt_none)
1788 				return (ftt);
1789 		    }
1790 		}
1791 		if ((asi == ASI_BLK_COMMIT_P) || (asi == ASI_BLK_COMMIT_S))
1792 			cpu_flush_ecache();
1793 		break;
1794 	default:
1795 		/* addr of unimp inst */
1796 		pfpsd->fp_trapaddr = (caddr_t)pregs->r_pc;
1797 		return (ftt_unimplemented);
1798 	}
1799 
1800 	pregs->r_pc = pregs->r_npc;	/* Do not retry emulated instruction. */
1801 	pregs->r_npc += 4;
1802 	return (ftt_none);
1803 }
1804 
1805 /*
1806  * Simulator for rd %gsr instruction.
1807  */
1808 enum ftt_type
1809 vis_rdgsr(
1810 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
1811 	fp_inst_type	pinst,	/* FPU instruction to simulate. */
1812 	struct regs	*pregs,	/* Pointer to PCB image of registers. */
1813 	void		*prw,	/* Pointer to locals and ins. */
1814 	kfpu_t		*fp)	/* Need to fp to access gsr reg */
1815 {
1816 	uint_t nrd;
1817 	uint64_t r;
1818 	enum ftt_type ftt = ftt_none;
1819 
1820 	nrd = pinst.rd;
1821 
1822 	r = pfpsd->fp_current_read_gsr(fp);
1823 	ftt = write_iureg(pfpsd, nrd, pregs, prw, &r);
1824 	pregs->r_pc = pregs->r_npc;	/* Do not retry emulated instruction. */
1825 	pregs->r_npc += 4;
1826 	return (ftt);
1827 }
1828 
1829 /*
1830  * Simulator for wr %gsr instruction.
1831  */
1832 enum ftt_type
1833 vis_wrgsr(
1834 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
1835 	fp_inst_type	pinst,	/* FPU instruction to simulate. */
1836 	struct regs	*pregs,	/* Pointer to PCB image of registers. */
1837 	void		*prw,	/* Pointer to locals and ins. */
1838 	kfpu_t		*fp)	/* Need to fp to access gsr reg */
1839 {
1840 	uint_t nrs1;
1841 	uint64_t r, r1, r2;
1842 	enum ftt_type ftt = ftt_none;
1843 
1844 	nrs1 = pinst.rs1;
1845 	ftt = read_iureg(pfpsd, nrs1, pregs, prw, &r1);
1846 	if (ftt != ftt_none)
1847 		return (ftt);
1848 	if (pinst.ibit == 0) {	/* copy the value in r[rs2] */
1849 		uint_t nrs2;
1850 
1851 		nrs2 = pinst.rs2;
1852 		ftt = read_iureg(pfpsd, nrs2, pregs, prw, &r2);
1853 		if (ftt != ftt_none)
1854 			return (ftt);
1855 	} else {	/* use sign_ext(simm13) */
1856 		union {
1857 			fp_inst_type	inst;
1858 			uint32_t	i;
1859 		} fp;
1860 
1861 		fp.inst = pinst;		/* Extract simm13 field */
1862 		r2 = (fp.i << 19) >> 19;
1863 	}
1864 	r = r1 ^ r2;
1865 	pfpsd->fp_current_write_gsr(r, fp);
1866 	pregs->r_pc = pregs->r_npc;	/* Do not retry emulated instruction. */
1867 	pregs->r_npc += 4;
1868 	return (ftt);
1869 }
1870 
1871 /*
1872  * This is the loadable module wrapper.
1873  */
1874 #include <sys/errno.h>
1875 #include <sys/modctl.h>
1876 
1877 /*
1878  * Module linkage information for the kernel.
1879  */
1880 extern struct mod_ops mod_miscops;
1881 
1882 static struct modlmisc modlmisc = {
1883 	&mod_miscops,
1884 	"vis fp simulation",
1885 };
1886 
1887 static struct modlinkage modlinkage = {
1888 	MODREV_1, (void *)&modlmisc, NULL
1889 };
1890 
1891 int
1892 _init(void)
1893 {
1894 	return (mod_install(&modlinkage));
1895 }
1896 
1897 int
1898 _info(struct modinfo *modinfop)
1899 {
1900 	return (mod_info(&modlinkage, modinfop));
1901 }
1902