xref: /illumos-gate/usr/src/uts/sun4/os/visinstr.c (revision 8b80e8cb6855118d46f605e91b5ed4ce83417395)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /* VIS floating point instruction simulator for Sparc FPU simulator. */
29 
30 #include <sys/types.h>
31 #include <sys/systm.h>
32 #include <sys/fpu/fpusystm.h>
33 #include <sys/fpu/fpu_simulator.h>
34 #include <sys/vis_simulator.h>
35 #include <sys/fpu/globals.h>
36 #include <sys/privregs.h>
37 #include <sys/sun4asi.h>
38 #include <sys/machasi.h>
39 #include <sys/debug.h>
40 #include <sys/cpu_module.h>
41 #include <sys/systm.h>
42 
43 #define	FPU_REG_FIELD uint32_reg	/* Coordinate with FPU_REGS_TYPE. */
44 #define	FPU_DREG_FIELD uint64_reg	/* Coordinate with FPU_DREGS_TYPE. */
45 #define	FPU_FSR_FIELD uint64_reg	/* Coordinate with V9_FPU_FSR_TYPE. */
46 
47 extern	uint_t	get_subcc_ccr(uint64_t, uint64_t);
48 
49 static enum ftt_type vis_array(fp_simd_type *, vis_inst_type, struct regs *,
50 				void *);
51 static enum ftt_type vis_alignaddr(fp_simd_type *, vis_inst_type,
52 				struct regs *, void *, kfpu_t *);
53 static enum ftt_type vis_edge(fp_simd_type *, vis_inst_type, struct regs *,
54 				void *);
55 static enum ftt_type vis_faligndata(fp_simd_type *, fp_inst_type,
56 				kfpu_t *);
57 static enum ftt_type vis_bmask(fp_simd_type *, vis_inst_type, struct regs *,
58 				void *, kfpu_t *);
59 static enum ftt_type vis_bshuffle(fp_simd_type *, fp_inst_type,
60 				kfpu_t *);
61 static enum ftt_type vis_siam(fp_simd_type *, vis_inst_type, kfpu_t *);
62 static enum ftt_type vis_fcmp(fp_simd_type *, vis_inst_type, struct regs *,
63 				void *);
64 static enum ftt_type vis_fmul(fp_simd_type *, vis_inst_type);
65 static enum ftt_type vis_fpixel(fp_simd_type *, vis_inst_type, kfpu_t *);
66 static enum ftt_type vis_fpaddsub(fp_simd_type *, vis_inst_type);
67 static enum ftt_type vis_pdist(fp_simd_type *, fp_inst_type);
68 static enum ftt_type vis_prtl_fst(fp_simd_type *, vis_inst_type, struct regs *,
69 				void *, uint_t);
70 static enum ftt_type vis_short_fls(fp_simd_type *, vis_inst_type,
71 				struct regs *, void *, uint_t);
72 static enum ftt_type vis_blk_fldst(fp_simd_type *, vis_inst_type,
73 				struct regs *, void *, uint_t);
74 
75 /*
76  * Simulator for VIS instructions with op3 == 0x36 that get fp_disabled
77  * traps.
78  */
79 enum ftt_type
80 vis_fpu_simulator(
81 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
82 	fp_inst_type	pinst,	/* FPU instruction to simulate. */
83 	struct regs	*pregs,	/* Pointer to PCB image of registers. */
84 	void		*prw,	/* Pointer to locals and ins. */
85 	kfpu_t		*fp)	/* Need to fp to access gsr reg */
86 {
87 	uint_t	nrs1, nrs2, nrd;	/* Register number fields. */
88 	uint_t	us1, us2, usr;
89 	uint64_t lus1, lus2, lusr;
90 	enum ftt_type ftt = ftt_none;
91 	union {
92 		vis_inst_type	inst;
93 		fp_inst_type	pinst;
94 	} f;
95 
96 	ASSERT(USERMODE(pregs->r_tstate));
97 	nrs1 = pinst.rs1;
98 	nrs2 = pinst.rs2;
99 	nrd = pinst.rd;
100 	f.pinst = pinst;
101 	if ((f.inst.opf & 1) == 0) {		/* double precision */
102 		if ((nrs1 & 1) == 1) 		/* fix register encoding */
103 			nrs1 = (nrs1 & 0x1e) | 0x20;
104 		if ((nrs2 & 1) == 1)
105 			nrs2 = (nrs2 & 0x1e) | 0x20;
106 		if ((nrd & 1) == 1)
107 			nrd = (nrd & 0x1e) | 0x20;
108 	}
109 
110 	switch (f.inst.opf) {
111 		/* these instr's do not use fp regs */
112 	case edge8:
113 	case edge8l:
114 	case edge8n:
115 	case edge8ln:
116 	case edge16:
117 	case edge16l:
118 	case edge16n:
119 	case edge16ln:
120 	case edge32:
121 	case edge32l:
122 	case edge32n:
123 	case edge32ln:
124 		ftt = vis_edge(pfpsd, f.inst, pregs, prw);
125 		break;
126 	case array8:
127 	case array16:
128 	case array32:
129 		ftt = vis_array(pfpsd, f.inst, pregs, prw);
130 		break;
131 	case alignaddr:
132 	case alignaddrl:
133 		ftt = vis_alignaddr(pfpsd, f.inst, pregs, prw, fp);
134 		break;
135 	case bmask:
136 		ftt = vis_bmask(pfpsd, f.inst, pregs, prw, fp);
137 		break;
138 	case fcmple16:
139 	case fcmpne16:
140 	case fcmpgt16:
141 	case fcmpeq16:
142 	case fcmple32:
143 	case fcmpne32:
144 	case fcmpgt32:
145 	case fcmpeq32:
146 		ftt = vis_fcmp(pfpsd, f.inst, pregs, prw);
147 		break;
148 	case fmul8x16:
149 	case fmul8x16au:
150 	case fmul8x16al:
151 	case fmul8sux16:
152 	case fmul8ulx16:
153 	case fmuld8sux16:
154 	case fmuld8ulx16:
155 		ftt = vis_fmul(pfpsd, f.inst);
156 		break;
157 	case fpack16:
158 	case fpack32:
159 	case fpackfix:
160 	case fexpand:
161 	case fpmerge:
162 		ftt = vis_fpixel(pfpsd, f.inst, fp);
163 		break;
164 	case pdist:
165 		ftt = vis_pdist(pfpsd, pinst);
166 		break;
167 	case faligndata:
168 		ftt = vis_faligndata(pfpsd, pinst, fp);
169 		break;
170 	case bshuffle:
171 		ftt = vis_bshuffle(pfpsd, pinst, fp);
172 		break;
173 	case fpadd16:
174 	case fpadd16s:
175 	case fpadd32:
176 	case fpadd32s:
177 	case fpsub16:
178 	case fpsub16s:
179 	case fpsub32:
180 	case fpsub32s:
181 		ftt = vis_fpaddsub(pfpsd, f.inst);
182 		break;
183 	case fzero:
184 		lusr = 0;
185 		_fp_pack_extword(pfpsd, &lusr, nrd);
186 		break;
187 	case fzeros:
188 		usr = 0;
189 		_fp_pack_word(pfpsd, &usr, nrd);
190 		break;
191 	case fnor:
192 		_fp_unpack_extword(pfpsd, &lus1, nrs1);
193 		_fp_unpack_extword(pfpsd, &lus2, nrs2);
194 		lusr = ~(lus1 | lus2);
195 		_fp_pack_extword(pfpsd, &lusr, nrd);
196 		break;
197 	case fnors:
198 		_fp_unpack_word(pfpsd, &us1, nrs1);
199 		_fp_unpack_word(pfpsd, &us2, nrs2);
200 		usr = ~(us1 | us2);
201 		_fp_pack_word(pfpsd, &usr, nrd);
202 		break;
203 	case fandnot2:
204 		_fp_unpack_extword(pfpsd, &lus1, nrs1);
205 		_fp_unpack_extword(pfpsd, &lus2, nrs2);
206 		lusr = (lus1 & ~lus2);
207 		_fp_pack_extword(pfpsd, &lusr, nrd);
208 		break;
209 	case fandnot2s:
210 		_fp_unpack_word(pfpsd, &us1, nrs1);
211 		_fp_unpack_word(pfpsd, &us2, nrs2);
212 		usr = (us1 & ~us2);
213 		_fp_pack_word(pfpsd, &usr, nrd);
214 		break;
215 	case fnot2:
216 		_fp_unpack_extword(pfpsd, &lus2, nrs2);
217 		lusr = ~lus2;
218 		_fp_pack_extword(pfpsd, &lusr, nrd);
219 		break;
220 	case fnot2s:
221 		_fp_unpack_word(pfpsd, &us2, nrs2);
222 		usr = ~us2;
223 		_fp_pack_word(pfpsd, &usr, nrd);
224 		break;
225 	case fandnot1:
226 		_fp_unpack_extword(pfpsd, &lus1, nrs1);
227 		_fp_unpack_extword(pfpsd, &lus2, nrs2);
228 		lusr = (~lus1 & lus2);
229 		_fp_pack_extword(pfpsd, &lusr, nrd);
230 		break;
231 	case fandnot1s:
232 		_fp_unpack_word(pfpsd, &us1, nrs1);
233 		_fp_unpack_word(pfpsd, &us2, nrs2);
234 		usr = (~us1 & us2);
235 		_fp_pack_word(pfpsd, &usr, nrd);
236 		break;
237 	case fnot1:
238 		_fp_unpack_extword(pfpsd, &lus1, nrs1);
239 		lusr = ~lus1;
240 		_fp_pack_extword(pfpsd, &lusr, nrd);
241 		break;
242 	case fnot1s:
243 		_fp_unpack_word(pfpsd, &us1, nrs1);
244 		usr = ~us1;
245 		_fp_pack_word(pfpsd, &usr, nrd);
246 		break;
247 	case fxor:
248 		_fp_unpack_extword(pfpsd, &lus1, nrs1);
249 		_fp_unpack_extword(pfpsd, &lus2, nrs2);
250 		lusr = (lus1 ^ lus2);
251 		_fp_pack_extword(pfpsd, &lusr, nrd);
252 		break;
253 	case fxors:
254 		_fp_unpack_word(pfpsd, &us1, nrs1);
255 		_fp_unpack_word(pfpsd, &us2, nrs2);
256 		usr = (us1 ^ us2);
257 		_fp_pack_word(pfpsd, &usr, nrd);
258 		break;
259 	case fnand:
260 		_fp_unpack_extword(pfpsd, &lus1, nrs1);
261 		_fp_unpack_extword(pfpsd, &lus2, nrs2);
262 		lusr = ~(lus1 & lus2);
263 		_fp_pack_extword(pfpsd, &lusr, nrd);
264 		break;
265 	case fnands:
266 		_fp_unpack_word(pfpsd, &us1, nrs1);
267 		_fp_unpack_word(pfpsd, &us2, nrs2);
268 		usr = ~(us1 & us2);
269 		_fp_pack_word(pfpsd, &usr, nrd);
270 		break;
271 	case fand:
272 		_fp_unpack_extword(pfpsd, &lus1, nrs1);
273 		_fp_unpack_extword(pfpsd, &lus2, nrs2);
274 		lusr = (lus1 & lus2);
275 		_fp_pack_extword(pfpsd, &lusr, nrd);
276 		break;
277 	case fands:
278 		_fp_unpack_word(pfpsd, &us1, nrs1);
279 		_fp_unpack_word(pfpsd, &us2, nrs2);
280 		usr = (us1 & us2);
281 		_fp_pack_word(pfpsd, &usr, nrd);
282 		break;
283 	case fxnor:
284 		_fp_unpack_extword(pfpsd, &lus1, nrs1);
285 		_fp_unpack_extword(pfpsd, &lus2, nrs2);
286 		lusr = ~(lus1 ^ lus2);
287 		_fp_pack_extword(pfpsd, &lusr, nrd);
288 		break;
289 	case fxnors:
290 		_fp_unpack_word(pfpsd, &us1, nrs1);
291 		_fp_unpack_word(pfpsd, &us2, nrs2);
292 		usr = ~(us1 ^ us2);
293 		_fp_pack_word(pfpsd, &usr, nrd);
294 		break;
295 	case fsrc1:
296 		_fp_unpack_extword(pfpsd, &lusr, nrs1);
297 		_fp_pack_extword(pfpsd, &lusr, nrd);
298 		break;
299 	case fsrc1s:
300 		_fp_unpack_word(pfpsd, &usr, nrs1);
301 		_fp_pack_word(pfpsd, &usr, nrd);
302 		break;
303 	case fornot2:
304 		_fp_unpack_extword(pfpsd, &lus1, nrs1);
305 		_fp_unpack_extword(pfpsd, &lus2, nrs2);
306 		lusr = (lus1 | ~lus2);
307 		_fp_pack_extword(pfpsd, &lusr, nrd);
308 		break;
309 	case fornot2s:
310 		_fp_unpack_word(pfpsd, &us1, nrs1);
311 		_fp_unpack_word(pfpsd, &us2, nrs2);
312 		usr = (us1 | ~us2);
313 		_fp_pack_word(pfpsd, &usr, nrd);
314 		break;
315 	case fsrc2:
316 		_fp_unpack_extword(pfpsd, &lusr, nrs2);
317 		_fp_pack_extword(pfpsd, &lusr, nrd);
318 		break;
319 	case fsrc2s:
320 		_fp_unpack_word(pfpsd, &usr, nrs2);
321 		_fp_pack_word(pfpsd, &usr, nrd);
322 		break;
323 	case fornot1:
324 		_fp_unpack_extword(pfpsd, &lus1, nrs1);
325 		_fp_unpack_extword(pfpsd, &lus2, nrs2);
326 		lusr = (~lus1 | lus2);
327 		_fp_pack_extword(pfpsd, &lusr, nrd);
328 		break;
329 	case fornot1s:
330 		_fp_unpack_word(pfpsd, &us1, nrs1);
331 		_fp_unpack_word(pfpsd, &us2, nrs2);
332 		usr = (~us1 | us2);
333 		_fp_pack_word(pfpsd, &usr, nrd);
334 		break;
335 	case for_op:
336 		_fp_unpack_extword(pfpsd, &lus1, nrs1);
337 		_fp_unpack_extword(pfpsd, &lus2, nrs2);
338 		lusr = (lus1 | lus2);
339 		_fp_pack_extword(pfpsd, &lusr, nrd);
340 		break;
341 	case fors_op:
342 		_fp_unpack_word(pfpsd, &us1, nrs1);
343 		_fp_unpack_word(pfpsd, &us2, nrs2);
344 		usr = (us1 | us2);
345 		_fp_pack_word(pfpsd, &usr, nrd);
346 		break;
347 	case fone:
348 		lusr = 0xffffffffffffffff;
349 		_fp_pack_extword(pfpsd, &lusr, nrd);
350 		break;
351 	case fones:
352 		usr = 0xffffffffUL;
353 		_fp_pack_word(pfpsd, &usr, nrd);
354 		break;
355 	case siam:
356 		ftt = vis_siam(pfpsd, f.inst, fp);
357 		break;
358 	default:
359 		return (ftt_unimplemented);
360 	}
361 
362 	pregs->r_pc = pregs->r_npc;	/* Do not retry emulated instruction. */
363 	pregs->r_npc += 4;
364 	return (ftt);
365 }
366 
367 /*
368  * Simulator for edge instructions
369  */
370 static enum ftt_type
371 vis_edge(
372 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
373 	vis_inst_type	inst,	/* FPU instruction to simulate. */
374 	struct regs	*pregs,	/* Pointer to PCB image of registers. */
375 	void		*prw)	/* Pointer to locals and ins. */
376 
377 {
378 	uint_t	nrs1, nrs2, nrd;	/* Register number fields. */
379 	enum ftt_type ftt;
380 	uint64_t addrl, addrr, mask;
381 	uint64_t ah61l, ah61r;		/* Higher 61 bits of address */
382 	int al3l, al3r;			/* Lower 3 bits of address */
383 	uint_t	ccr;
384 
385 	nrs1 = inst.rs1;
386 	nrs2 = inst.rs2;
387 	nrd = inst.rd;
388 
389 	ftt = read_iureg(pfpsd, nrs1, pregs, prw, &addrl);
390 	if (ftt != ftt_none)
391 		return (ftt);
392 	ftt = read_iureg(pfpsd, nrs2, pregs, prw, &addrr);
393 	if (ftt != ftt_none)
394 		return (ftt);
395 
396 	/* Test PSTATE.AM to determine 32-bit vs 64-bit addressing */
397 	if ((pregs->r_tstate & TSTATE_AM) != 0) {
398 		ah61l = addrl & 0xfffffff8;
399 		ah61r = addrr & 0xfffffff8;
400 	} else {
401 		ah61l = addrl & ~0x7;
402 		ah61r = addrr & ~0x7;
403 	}
404 
405 
406 	switch (inst.opf) {
407 	case edge8:
408 	case edge8n:
409 	case edge8l:
410 	case edge8ln:
411 		al3l = addrl & 0x7;
412 		switch (inst.opf) {
413 		case edge8:
414 		case edge8n:
415 			if (inst.opf == edge8) {
416 				VISINFO_KSTAT(vis_edge8);
417 			} else {
418 				VISINFO_KSTAT(vis_edge8n);
419 			}
420 			mask = 0xff >> al3l;
421 			if (ah61l == ah61r) {
422 				al3r = addrr & 0x7;
423 				mask &= (0xff << (0x7 - al3r)) & 0xff;
424 			}
425 			break;
426 		case edge8l:
427 		case edge8ln:
428 			if (inst.opf == edge8l) {
429 				VISINFO_KSTAT(vis_edge8l);
430 			} else {
431 				VISINFO_KSTAT(vis_edge8ln);
432 			}
433 			mask = (0xff << al3l) & 0xff;
434 			if (ah61l == ah61r) {
435 				al3r = addrr & 0x7;
436 				mask &= 0xff >> (0x7 - al3r);
437 			}
438 			break;
439 		}
440 		break;
441 	case edge16:
442 	case edge16l:
443 	case edge16n:
444 	case edge16ln:
445 		al3l = addrl & 0x6;
446 		al3l >>= 0x1;
447 		switch (inst.opf) {
448 		case edge16:
449 		case edge16n:
450 			if (inst.opf == edge16) {
451 				VISINFO_KSTAT(vis_edge16);
452 
453 			} else {
454 				VISINFO_KSTAT(vis_edge16n);
455 			}
456 			mask = 0xf >> al3l;
457 			if (ah61l == ah61r) {
458 				al3r = addrr & 0x6;
459 				al3r >>= 0x1;
460 				mask &= (0xf << (0x3 - al3r)) & 0xf;
461 			}
462 			break;
463 		case edge16l:
464 		case edge16ln:
465 			if (inst.opf == edge16l) {
466 				VISINFO_KSTAT(vis_edge16l);
467 
468 			} else {
469 				VISINFO_KSTAT(vis_edge16ln);
470 			}
471 
472 			mask = (0xf << al3l) & 0xf;
473 			if (ah61l == ah61r) {
474 				al3r = addrr & 0x6;
475 				al3r >>= 0x1;
476 				mask &= 0xf >> (0x3 - al3r);
477 			}
478 			break;
479 		}
480 		break;
481 	case edge32:
482 	case edge32l:
483 	case edge32n:
484 	case edge32ln:
485 		al3l = addrl & 0x4;
486 		al3l >>= 0x2;
487 
488 		switch (inst.opf) {
489 		case edge32:
490 		case edge32n:
491 			if (inst.opf == edge32) {
492 				VISINFO_KSTAT(vis_edge32);
493 
494 			} else {
495 				VISINFO_KSTAT(vis_edge32n);
496 			}
497 			mask = 0x3 >> al3l;
498 			if (ah61l == ah61r) {
499 				al3r = addrr & 0x4;
500 				al3r >>= 0x2;
501 				mask &= (0x3 << (0x1 - al3r)) & 0x3;
502 			}
503 			break;
504 		case edge32l:
505 		case edge32ln:
506 			if (inst.opf == edge32l) {
507 				VISINFO_KSTAT(vis_edge32l);
508 
509 			} else {
510 				VISINFO_KSTAT(vis_edge32ln);
511 			}
512 			mask = (0x3 << al3l) & 0x3;
513 			if (ah61l == ah61r) {
514 				al3r = addrr & 0x4;
515 				al3r >>= 0x2;
516 				mask &= 0x3 >> (0x1 - al3r);
517 			}
518 			break;
519 		}
520 		break;
521 	}
522 
523 	ftt = write_iureg(pfpsd, nrd, pregs, prw, &mask);
524 
525 	switch (inst.opf) {
526 	case edge8:
527 	case edge8l:
528 	case edge16:
529 	case edge16l:
530 	case edge32:
531 	case edge32l:
532 
533 		/* Update flags per SUBcc outcome */
534 		pregs->r_tstate &= ~((uint64_t)TSTATE_CCR_MASK
535 					<< TSTATE_CCR_SHIFT);
536 		ccr = get_subcc_ccr(addrl, addrr);  /* get subcc cond. codes */
537 		pregs->r_tstate |= ((uint64_t)ccr << TSTATE_CCR_SHIFT);
538 
539 		break;
540 	}
541 	return (ftt);
542 }
543 
544 /*
545  * Simulator for three dimentional array addressing instructions.
546  */
547 static enum ftt_type
548 vis_array(
549 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
550 	vis_inst_type	inst,	/* FPU instruction to simulate. */
551 	struct regs	*pregs,	/* Pointer to PCB image of registers. */
552 	void		*prw)	/* Pointer to locals and ins. */
553 
554 {
555 	uint_t	nrs1, nrs2, nrd;	/* Register number fields. */
556 	enum ftt_type ftt;
557 	uint64_t laddr, bsize, baddr;
558 	uint64_t nbit;
559 	int oy, oz;
560 
561 	nrs1 = inst.rs1;
562 	nrs2 = inst.rs2;
563 	nrd = inst.rd;
564 
565 	ftt = read_iureg(pfpsd, nrs1, pregs, prw, &laddr);
566 	if (ftt != ftt_none)
567 		return (ftt);
568 	ftt = read_iureg(pfpsd, nrs2, pregs, prw, &bsize);
569 	if (ftt != ftt_none)
570 		return (ftt);
571 
572 	if (bsize > 5) {
573 		bsize = 5;
574 	}
575 	nbit = (1 << bsize) - 1;	/* Number of bits for XY<6+n-1:6> */
576 	oy = 17 + bsize;		/* Offset of Y<6+n-1:6> */
577 	oz = 17 + 2 * bsize;		/* Offset of Z<8:5> */
578 
579 	baddr = 0;
580 	baddr |= (laddr >> (11 -  0)) & (0x03 <<  0);	/* X_integer<1:0> */
581 	baddr |= (laddr >> (33 -  2)) & (0x03 <<  2);	/* Y_integer<1:0> */
582 	baddr |= (laddr >> (55 -  4)) & (0x01 <<  4);	/* Z_integer<0>   */
583 	baddr |= (laddr >> (13 -  5)) & (0x0f <<  5);	/* X_integer<5:2> */
584 	baddr |= (laddr >> (35 -  9)) & (0x0f <<  9);	/* Y_integer<5:2> */
585 	baddr |= (laddr >> (56 - 13)) & (0x0f << 13);	/* Z_integer<4:1> */
586 	baddr |= (laddr >> (17 - 17)) & (nbit << 17);	/* X_integer<6+n-1:6> */
587 	baddr |= (laddr >> (39 - oy)) & (nbit << oy);	/* Y_integer<6+n-1:6> */
588 	baddr |= (laddr >> (60 - oz)) & (0x0f << oz);	/* Z_integer<8:5> */
589 
590 	switch (inst.opf) {
591 	case array8:
592 		VISINFO_KSTAT(vis_array8);
593 		break;
594 	case array16:
595 		VISINFO_KSTAT(vis_array16);
596 		baddr <<= 1;
597 		break;
598 	case array32:
599 		VISINFO_KSTAT(vis_array32);
600 		baddr <<= 2;
601 		break;
602 	}
603 
604 	ftt = write_iureg(pfpsd, nrd, pregs, prw, &baddr);
605 
606 	return (ftt);
607 }
608 
609 /*
610  * Simulator for alignaddr and alignaddrl instructions.
611  */
612 static enum ftt_type
613 vis_alignaddr(
614 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
615 	vis_inst_type	inst,	/* FPU instruction to simulate. */
616 	struct regs	*pregs,	/* Pointer to PCB image of registers. */
617 	void		*prw,	/* Pointer to locals and ins. */
618 	kfpu_t		*fp)	/* Need to fp to access gsr reg */
619 {
620 	uint_t	nrs1, nrs2, nrd;	/* Register number fields. */
621 	enum ftt_type ftt;
622 	uint64_t ea, tea, g, r;
623 	short s;
624 
625 	nrs1 = inst.rs1;
626 	nrs2 = inst.rs2;
627 	nrd = inst.rd;
628 
629 	ftt = read_iureg(pfpsd, nrs1, pregs, prw, &ea);
630 	if (ftt != ftt_none)
631 		return (ftt);
632 	ftt = read_iureg(pfpsd, nrs2, pregs, prw, &tea);
633 	if (ftt != ftt_none)
634 		return (ftt);
635 	ea += tea;
636 	r = ea & ~0x7;	/* zero least 3 significant bits */
637 	ftt = write_iureg(pfpsd, nrd, pregs, prw, &r);
638 
639 
640 	g = pfpsd->fp_current_read_gsr(fp);
641 	g &= ~(GSR_ALIGN_MASK);		/* zero the align offset */
642 	r = ea & 0x7;
643 	if (inst.opf == alignaddrl) {
644 		s = (short)(~r);	/* 2's complement for alignaddrl */
645 		if (s < 0)
646 			r = (uint64_t)((s + 1) & 0x7);
647 		else
648 			r = (uint64_t)(s & 0x7);
649 	}
650 	g |= (r << GSR_ALIGN_SHIFT) & GSR_ALIGN_MASK;
651 	pfpsd->fp_current_write_gsr(g, fp);
652 
653 	return (ftt);
654 }
655 
656 /*
657  * Simulator for bmask instruction.
658  */
659 static enum ftt_type
660 vis_bmask(
661 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
662 	vis_inst_type	inst,	/* FPU instruction to simulate. */
663 	struct regs	*pregs,	/* Pointer to PCB image of registers. */
664 	void		*prw,	/* Pointer to locals and ins. */
665 	kfpu_t		*fp)	/* Need to fp to access gsr reg */
666 {
667 	uint_t	nrs1, nrs2, nrd;	/* Register number fields. */
668 	enum ftt_type ftt;
669 	uint64_t ea, tea, g;
670 
671 	VISINFO_KSTAT(vis_bmask);
672 	nrs1 = inst.rs1;
673 	nrs2 = inst.rs2;
674 	nrd = inst.rd;
675 
676 	ftt = read_iureg(pfpsd, nrs1, pregs, prw, &ea);
677 	if (ftt != ftt_none)
678 		return (ftt);
679 	ftt = read_iureg(pfpsd, nrs2, pregs, prw, &tea);
680 	if (ftt != ftt_none)
681 		return (ftt);
682 	ea += tea;
683 	ftt = write_iureg(pfpsd, nrd, pregs, prw, &ea);
684 
685 	g = pfpsd->fp_current_read_gsr(fp);
686 	g &= ~(GSR_MASK_MASK);		/* zero the mask offset */
687 
688 	/* Put the least significant 32 bits of ea in GSR.mask */
689 	g |= (ea << GSR_MASK_SHIFT) & GSR_MASK_MASK;
690 	pfpsd->fp_current_write_gsr(g, fp);
691 	return (ftt);
692 }
693 
694 /*
695  * Simulator for fp[add|sub]* instruction.
696  */
697 static enum ftt_type
698 vis_fpaddsub(
699 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
700 	vis_inst_type	inst)	/* FPU instruction to simulate. */
701 {
702 	uint_t	nrs1, nrs2, nrd;	/* Register number fields. */
703 	union {
704 		uint64_t	ll;
705 		uint32_t	i[2];
706 		uint16_t	s[4];
707 	} lrs1, lrs2, lrd;
708 	union {
709 		uint32_t	i;
710 		uint16_t	s[2];
711 	} krs1, krs2, krd;
712 	int i;
713 
714 	nrs1 = inst.rs1;
715 	nrs2 = inst.rs2;
716 	nrd = inst.rd;
717 	if ((inst.opf & 1) == 0) {	/* double precision */
718 		if ((nrs1 & 1) == 1) 	/* fix register encoding */
719 			nrs1 = (nrs1 & 0x1e) | 0x20;
720 		if ((nrs2 & 1) == 1)
721 			nrs2 = (nrs2 & 0x1e) | 0x20;
722 		if ((nrd & 1) == 1)
723 			nrd = (nrd & 0x1e) | 0x20;
724 	}
725 	switch (inst.opf) {
726 	case fpadd16:
727 		_fp_unpack_extword(pfpsd, &lrs1.ll, nrs1);
728 		_fp_unpack_extword(pfpsd, &lrs2.ll, nrs2);
729 		for (i = 0; i <= 3; i++) {
730 			lrd.s[i] = lrs1.s[i] + lrs2.s[i];
731 		}
732 		_fp_pack_extword(pfpsd, &lrd.ll, nrd);
733 		break;
734 	case fpadd16s:
735 		_fp_unpack_word(pfpsd, &krs1.i, nrs1);
736 		_fp_unpack_word(pfpsd, &krs2.i, nrs2);
737 		for (i = 0; i <= 1; i++) {
738 			krd.s[i] = krs1.s[i] + krs2.s[i];
739 		}
740 		_fp_pack_word(pfpsd, &krd.i, nrd);
741 		break;
742 	case fpadd32:
743 		_fp_unpack_extword(pfpsd, &lrs1.ll, nrs1);
744 		_fp_unpack_extword(pfpsd, &lrs2.ll, nrs2);
745 		for (i = 0; i <= 1; i++) {
746 			lrd.i[i] = lrs1.i[i] + lrs2.i[i];
747 		}
748 		_fp_pack_extword(pfpsd, &lrd.ll, nrd);
749 		break;
750 	case fpadd32s:
751 		_fp_unpack_word(pfpsd, &krs1.i, nrs1);
752 		_fp_unpack_word(pfpsd, &krs2.i, nrs2);
753 		krd.i = krs1.i + krs2.i;
754 		_fp_pack_word(pfpsd, &krd.i, nrd);
755 		break;
756 	case fpsub16:
757 		_fp_unpack_extword(pfpsd, &lrs1.ll, nrs1);
758 		_fp_unpack_extword(pfpsd, &lrs2.ll, nrs2);
759 		for (i = 0; i <= 3; i++) {
760 			lrd.s[i] = lrs1.s[i] - lrs2.s[i];
761 		}
762 		_fp_pack_extword(pfpsd, &lrd.ll, nrd);
763 		break;
764 	case fpsub16s:
765 		_fp_unpack_word(pfpsd, &krs1.i, nrs1);
766 		_fp_unpack_word(pfpsd, &krs2.i, nrs2);
767 		for (i = 0; i <= 1; i++) {
768 			krd.s[i] = krs1.s[i] - krs2.s[i];
769 		}
770 		_fp_pack_word(pfpsd, &krd.i, nrd);
771 		break;
772 	case fpsub32:
773 		_fp_unpack_extword(pfpsd, &lrs1.ll, nrs1);
774 		_fp_unpack_extword(pfpsd, &lrs2.ll, nrs2);
775 		for (i = 0; i <= 1; i++) {
776 			lrd.i[i] = lrs1.i[i] - lrs2.i[i];
777 		}
778 		_fp_pack_extword(pfpsd, &lrd.ll, nrd);
779 		break;
780 	case fpsub32s:
781 		_fp_unpack_word(pfpsd, &krs1.i, nrs1);
782 		_fp_unpack_word(pfpsd, &krs2.i, nrs2);
783 		krd.i = krs1.i - krs2.i;
784 		_fp_pack_word(pfpsd, &krd.i, nrd);
785 		break;
786 	}
787 	return (ftt_none);
788 }
789 
790 /*
791  * Simulator for fcmp* instruction.
792  */
793 static enum ftt_type
794 vis_fcmp(
795 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
796 	vis_inst_type	inst,	/* FPU instruction to simulate. */
797 	struct regs	*pregs,	/* Pointer to PCB image of registers. */
798 	void		*prw)	/* Pointer to locals and ins. */
799 {
800 	uint_t	nrs1, nrs2, nrd;	/* Register number fields. */
801 	union {
802 		uint64_t	ll;
803 		uint32_t	i[2];
804 		uint16_t	s[4];
805 	} krs1, krs2, krd;
806 	enum ftt_type ftt;
807 	short sr1, sr2;
808 	int i, ir1, ir2;
809 
810 	nrs1 = inst.rs1;
811 	nrs2 = inst.rs2;
812 	nrd = inst.rd;
813 	krd.ll = 0;
814 	if ((nrs1 & 1) == 1) 	/* fix register encoding */
815 		nrs1 = (nrs1 & 0x1e) | 0x20;
816 	if ((nrs2 & 1) == 1)
817 		nrs2 = (nrs2 & 0x1e) | 0x20;
818 
819 	_fp_unpack_extword(pfpsd, &krs1.ll, nrs1);
820 	_fp_unpack_extword(pfpsd, &krs2.ll, nrs2);
821 	switch (inst.opf) {
822 	case fcmple16:
823 		VISINFO_KSTAT(vis_fcmple16);
824 		for (i = 0; i <= 3; i++) {
825 			sr1 = (short)krs1.s[i];
826 			sr2 = (short)krs2.s[i];
827 			if (sr1 <= sr2)
828 				krd.ll += (0x8 >> i);
829 		}
830 		break;
831 	case fcmpne16:
832 		VISINFO_KSTAT(vis_fcmpne16);
833 		for (i = 0; i <= 3; i++) {
834 			sr1 = (short)krs1.s[i];
835 			sr2 = (short)krs2.s[i];
836 			if (sr1 != sr2)
837 				krd.ll += (0x8 >> i);
838 		}
839 		break;
840 	case fcmpgt16:
841 		VISINFO_KSTAT(vis_fcmpgt16);
842 		for (i = 0; i <= 3; i++) {
843 			sr1 = (short)krs1.s[i];
844 			sr2 = (short)krs2.s[i];
845 			if (sr1 > sr2)
846 				krd.ll += (0x8 >> i);
847 		}
848 		break;
849 	case fcmpeq16:
850 		VISINFO_KSTAT(vis_fcmpeq16);
851 		for (i = 0; i <= 3; i++) {
852 			sr1 = (short)krs1.s[i];
853 			sr2 = (short)krs2.s[i];
854 			if (sr1 == sr2)
855 				krd.ll += (0x8 >> i);
856 		}
857 		break;
858 	case fcmple32:
859 		VISINFO_KSTAT(vis_fcmple32);
860 		for (i = 0; i <= 1; i++) {
861 			ir1 = (int)krs1.i[i];
862 			ir2 = (int)krs2.i[i];
863 			if (ir1 <= ir2)
864 				krd.ll += (0x2 >> i);
865 		}
866 		break;
867 	case fcmpne32:
868 		VISINFO_KSTAT(vis_fcmpne32);
869 		for (i = 0; i <= 1; i++) {
870 			ir1 = (int)krs1.i[i];
871 			ir2 = (int)krs2.i[i];
872 			if (ir1 != ir2)
873 				krd.ll += (0x2 >> i);
874 		}
875 		break;
876 	case fcmpgt32:
877 		VISINFO_KSTAT(vis_fcmpgt32);
878 		for (i = 0; i <= 1; i++) {
879 			ir1 = (int)krs1.i[i];
880 			ir2 = (int)krs2.i[i];
881 			if (ir1 > ir2)
882 				krd.ll += (0x2 >> i);
883 		}
884 		break;
885 	case fcmpeq32:
886 		VISINFO_KSTAT(vis_fcmpeq32);
887 		for (i = 0; i <= 1; i++) {
888 			ir1 = (int)krs1.i[i];
889 			ir2 = (int)krs2.i[i];
890 			if (ir1 == ir2)
891 				krd.ll += (0x2 >> i);
892 		}
893 		break;
894 	}
895 	ftt = write_iureg(pfpsd, nrd, pregs, prw, &krd.ll);
896 	return (ftt);
897 }
898 
899 /*
900  * Simulator for fmul* instruction.
901  */
902 static enum ftt_type
903 vis_fmul(
904 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
905 	vis_inst_type	inst)	/* FPU instruction to simulate. */
906 {
907 	uint_t	nrs1, nrs2, nrd;	/* Register number fields. */
908 	union {
909 		uint64_t	ll;
910 		uint32_t	i[2];
911 		uint16_t	s[4];
912 		uint8_t		c[8];
913 	} lrs1, lrs2, lrd;
914 	union {
915 		uint32_t	i;
916 		uint16_t	s[2];
917 		uint8_t		c[4];
918 	} krs1, krs2, kres;
919 	short s1, s2, sres;
920 	ushort_t us1;
921 	char c1;
922 	int i;
923 
924 	nrs1 = inst.rs1;
925 	nrs2 = inst.rs2;
926 	nrd = inst.rd;
927 	if ((inst.opf & 1) == 0) {	/* double precision */
928 		if ((nrd & 1) == 1) 	/* fix register encoding */
929 			nrd = (nrd & 0x1e) | 0x20;
930 	}
931 
932 	switch (inst.opf) {
933 	case fmul8x16:
934 		VISINFO_KSTAT(vis_fmul8x16);
935 		_fp_unpack_word(pfpsd, &krs1.i, nrs1);
936 		if ((nrs2 & 1) == 1)
937 			nrs2 = (nrs2 & 0x1e) | 0x20;
938 		_fp_unpack_extword(pfpsd, &lrs2.ll, nrs2);
939 		for (i = 0; i <= 3; i++) {
940 			us1 = (ushort_t)krs1.c[i];
941 			s2 = (short)lrs2.s[i];
942 			kres.i = us1 * s2;
943 			sres = (short)((kres.c[1] << 8) | kres.c[2]);
944 			if (kres.c[3] >= 0x80)
945 				sres++;
946 			lrd.s[i] = sres;
947 		}
948 		_fp_pack_extword(pfpsd, &lrd.ll, nrd);
949 		break;
950 	case fmul8x16au:
951 		VISINFO_KSTAT(vis_fmul8x16au);
952 		_fp_unpack_word(pfpsd, &krs1.i, nrs1);
953 		_fp_unpack_word(pfpsd, &krs2.i, nrs2);
954 		for (i = 0; i <= 3; i++) {
955 			us1 = (ushort_t)krs1.c[i];
956 			s2 = (short)krs2.s[0];
957 			kres.i = us1 * s2;
958 			sres = (short)((kres.c[1] << 8) | kres.c[2]);
959 			if (kres.c[3] >= 0x80)
960 				sres++;
961 			lrd.s[i] = sres;
962 		}
963 		_fp_pack_extword(pfpsd, &lrd.ll, nrd);
964 		break;
965 	case fmul8x16al:
966 		VISINFO_KSTAT(vis_fmul8x16al);
967 		_fp_unpack_word(pfpsd, &krs1.i, nrs1);
968 		_fp_unpack_word(pfpsd, &krs2.i, nrs2);
969 		for (i = 0; i <= 3; i++) {
970 			us1 = (ushort_t)krs1.c[i];
971 			s2 = (short)krs2.s[1];
972 			kres.i = us1 * s2;
973 			sres = (short)((kres.c[1] << 8) | kres.c[2]);
974 			if (kres.c[3] >= 0x80)
975 				sres++;
976 			lrd.s[i] = sres;
977 		}
978 		_fp_pack_extword(pfpsd, &lrd.ll, nrd);
979 		break;
980 	case fmul8sux16:
981 		VISINFO_KSTAT(vis_fmul8sux16);
982 		if ((nrs1 & 1) == 1) 	/* fix register encoding */
983 			nrs1 = (nrs1 & 0x1e) | 0x20;
984 		_fp_unpack_extword(pfpsd, &lrs1.ll, nrs1);
985 		if ((nrs2 & 1) == 1)
986 			nrs2 = (nrs2 & 0x1e) | 0x20;
987 		_fp_unpack_extword(pfpsd, &lrs2.ll, nrs2);
988 		for (i = 0; i <= 3; i++) {
989 			c1 = lrs1.c[(i*2)];
990 			s1 = (short)c1;		/* keeps the sign alive */
991 			s2 = (short)lrs2.s[i];
992 			kres.i = s1 * s2;
993 			sres = (short)((kres.c[1] << 8) | kres.c[2]);
994 			if (kres.c[3] >= 0x80)
995 				sres++;
996 			if (sres < 0)
997 				lrd.s[i] = (sres & 0xFFFF);
998 			else
999 				lrd.s[i] = sres;
1000 		}
1001 		_fp_pack_extword(pfpsd, &lrd.ll, nrd);
1002 		break;
1003 	case fmul8ulx16:
1004 		VISINFO_KSTAT(vis_fmul8ulx16);
1005 		if ((nrs1 & 1) == 1) 	/* fix register encoding */
1006 			nrs1 = (nrs1 & 0x1e) | 0x20;
1007 		_fp_unpack_extword(pfpsd, &lrs1.ll, nrs1);
1008 		if ((nrs2 & 1) == 1)
1009 			nrs2 = (nrs2 & 0x1e) | 0x20;
1010 		_fp_unpack_extword(pfpsd, &lrs2.ll, nrs2);
1011 		for (i = 0; i <= 3; i++) {
1012 			us1 = (ushort_t)lrs1.c[(i*2)+1];
1013 			s2 = (short)lrs2.s[i];
1014 			kres.i = us1 * s2;
1015 			sres = (short)kres.s[0];
1016 			if (kres.s[1] >= 0x8000)
1017 				sres++;
1018 			lrd.s[i] = sres;
1019 		}
1020 		_fp_pack_extword(pfpsd, &lrd.ll, nrd);
1021 		break;
1022 	case fmuld8sux16:
1023 		VISINFO_KSTAT(vis_fmuld8sux16);
1024 		_fp_unpack_word(pfpsd, &krs1.i, nrs1);
1025 		_fp_unpack_word(pfpsd, &krs2.i, nrs2);
1026 		for (i = 0; i <= 1; i++) {
1027 			c1 = krs1.c[(i*2)];
1028 			s1 = (short)c1;		/* keeps the sign alive */
1029 			s2 = (short)krs2.s[i];
1030 			kres.i = s1 * s2;
1031 			lrd.i[i] = kres.i << 8;
1032 		}
1033 		_fp_pack_extword(pfpsd, &lrd.ll, nrd);
1034 		break;
1035 	case fmuld8ulx16:
1036 		VISINFO_KSTAT(vis_fmuld8ulx16);
1037 		_fp_unpack_word(pfpsd, &krs1.i, nrs1);
1038 		_fp_unpack_word(pfpsd, &krs2.i, nrs2);
1039 		for (i = 0; i <= 1; i++) {
1040 			us1 = (ushort_t)krs1.c[(i*2)+1];
1041 			s2 = (short)krs2.s[i];
1042 			lrd.i[i] = us1 * s2;
1043 		}
1044 		_fp_pack_extword(pfpsd, &lrd.ll, nrd);
1045 		break;
1046 	}
1047 	return (ftt_none);
1048 }
1049 
1050 /*
1051  * Simulator for fpixel formatting instructions.
1052  */
1053 static enum ftt_type
1054 vis_fpixel(
1055 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
1056 	vis_inst_type	inst,	/* FPU instruction to simulate. */
1057 	kfpu_t		*fp)	/* Need to fp to access gsr reg */
1058 {
1059 	uint_t	nrs1, nrs2, nrd;	/* Register number fields. */
1060 	int	i, j, k, sf;
1061 	union {
1062 		uint64_t	ll;
1063 		uint32_t	i[2];
1064 		uint16_t	s[4];
1065 		uint8_t		c[8];
1066 	} lrs1, lrs2, lrd;
1067 	union {
1068 		uint32_t	i;
1069 		uint16_t	s[2];
1070 		uint8_t		c[4];
1071 	} krs1, krs2, krd;
1072 	uint64_t r;
1073 	int64_t l, m;
1074 	short s;
1075 	uchar_t uc;
1076 
1077 	nrs1 = inst.rs1;
1078 	nrs2 = inst.rs2;
1079 	nrd = inst.rd;
1080 	if ((inst.opf != fpack16) && (inst.opf != fpackfix)) {
1081 		if ((nrd & 1) == 1) 	/* fix register encoding */
1082 			nrd = (nrd & 0x1e) | 0x20;
1083 	}
1084 
1085 	switch (inst.opf) {
1086 	case fpack16:
1087 		VISINFO_KSTAT(vis_fpack16);
1088 		if ((nrs2 & 1) == 1) 	/* fix register encoding */
1089 			nrs2 = (nrs2 & 0x1e) | 0x20;
1090 		_fp_unpack_extword(pfpsd, &lrs2.ll, nrs2);
1091 		r = pfpsd->fp_current_read_gsr(fp);
1092 		/* fpack16 ignores GSR.scale msb */
1093 		sf = (int)(GSR_SCALE(r) & 0xf);
1094 		for (i = 0; i <= 3; i++) {
1095 			s = (short)lrs2.s[i];	/* preserve the sign */
1096 			j = ((int)s << sf);
1097 			k = j >> 7;
1098 			if (k < 0) {
1099 				uc = 0;
1100 			} else if (k > 255) {
1101 				uc = 255;
1102 			} else {
1103 				uc = (uchar_t)k;
1104 			}
1105 			krd.c[i] = uc;
1106 		}
1107 		_fp_pack_word(pfpsd, &krd.i, nrd);
1108 		break;
1109 	case fpack32:
1110 		VISINFO_KSTAT(vis_fpack32);
1111 		if ((nrs1 & 1) == 1) 	/* fix register encoding */
1112 			nrs1 = (nrs1 & 0x1e) | 0x20;
1113 		_fp_unpack_extword(pfpsd, &lrs1.ll, nrs1);
1114 		if ((nrs2 & 1) == 1)
1115 			nrs2 = (nrs2 & 0x1e) | 0x20;
1116 		_fp_unpack_extword(pfpsd, &lrs2.ll, nrs2);
1117 
1118 		r = pfpsd->fp_current_read_gsr(fp);
1119 		sf = (int)GSR_SCALE(r);
1120 		lrd.ll = lrs1.ll << 8;
1121 		for (i = 0, k = 3; i <= 1; i++, k += 4) {
1122 			j = (int)lrs2.i[i];	/* preserve the sign */
1123 			l = ((int64_t)j << sf);
1124 			m = l >> 23;
1125 			if (m < 0) {
1126 				uc = 0;
1127 			} else if (m > 255) {
1128 				uc = 255;
1129 			} else {
1130 				uc = (uchar_t)m;
1131 			}
1132 			lrd.c[k] = uc;
1133 		}
1134 		_fp_pack_extword(pfpsd, &lrd.ll, nrd);
1135 		break;
1136 	case fpackfix:
1137 		VISINFO_KSTAT(vis_fpackfix);
1138 		if ((nrs2 & 1) == 1)
1139 			nrs2 = (nrs2 & 0x1e) | 0x20;
1140 		_fp_unpack_extword(pfpsd, &lrs2.ll, nrs2);
1141 
1142 		r = pfpsd->fp_current_read_gsr(fp);
1143 		sf = (int)GSR_SCALE(r);
1144 		for (i = 0; i <= 1; i++) {
1145 			j = (int)lrs2.i[i];	/* preserve the sign */
1146 			l = ((int64_t)j << sf);
1147 			m = l >> 16;
1148 			if (m < -32768) {
1149 				s = -32768;
1150 			} else if (m > 32767) {
1151 				s = 32767;
1152 			} else {
1153 				s = (short)m;
1154 			}
1155 			krd.s[i] = s;
1156 		}
1157 		_fp_pack_word(pfpsd, &krd.i, nrd);
1158 		break;
1159 	case fexpand:
1160 		VISINFO_KSTAT(vis_fexpand);
1161 		_fp_unpack_word(pfpsd, &krs2.i, nrs2);
1162 		for (i = 0; i <= 3; i++) {
1163 			uc = krs2.c[i];
1164 			lrd.s[i] = (ushort_t)(uc << 4);
1165 		}
1166 		_fp_pack_extword(pfpsd, &lrd.ll, nrd);
1167 		break;
1168 	case fpmerge:
1169 		VISINFO_KSTAT(vis_fpmerge);
1170 		_fp_unpack_word(pfpsd, &krs1.i, nrs1);
1171 		_fp_unpack_word(pfpsd, &krs2.i, nrs2);
1172 		for (i = 0, j = 0; i <= 3; i++, j += 2) {
1173 			lrd.c[j] = krs1.c[i];
1174 			lrd.c[j+1] = krs2.c[i];
1175 		}
1176 		_fp_pack_extword(pfpsd, &lrd.ll, nrd);
1177 		break;
1178 	}
1179 	return (ftt_none);
1180 }
1181 
1182 /*
1183  * Simulator for pdist instruction.
1184  */
1185 enum ftt_type
1186 vis_pdist(
1187 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
1188 	fp_inst_type	pinst)	/* FPU instruction to simulate. */
1189 {
1190 	uint_t	nrs1, nrs2, nrd;	/* Register number fields. */
1191 	int	i;
1192 	short	s;
1193 	union {
1194 		uint64_t	ll;
1195 		uint8_t		c[8];
1196 	} lrs1, lrs2, lrd;
1197 
1198 	nrs1 = pinst.rs1;
1199 	nrs2 = pinst.rs2;
1200 	nrd = pinst.rd;
1201 	VISINFO_KSTAT(vis_pdist);
1202 	if ((nrs1 & 1) == 1) 		/* fix register encoding */
1203 		nrs1 = (nrs1 & 0x1e) | 0x20;
1204 	if ((nrs2 & 1) == 1)
1205 		nrs2 = (nrs2 & 0x1e) | 0x20;
1206 	if ((nrd & 1) == 1)
1207 		nrd = (nrd & 0x1e) | 0x20;
1208 
1209 	_fp_unpack_extword(pfpsd, &lrs1.ll, nrs1);
1210 	_fp_unpack_extword(pfpsd, &lrs2.ll, nrs2);
1211 	_fp_unpack_extword(pfpsd, &lrd.ll, nrd);
1212 
1213 	for (i = 0; i <= 7; i++) {
1214 		s = (short)(lrs1.c[i] - lrs2.c[i]);
1215 		if (s < 0)
1216 			s = ~s + 1;
1217 		lrd.ll += s;
1218 	}
1219 
1220 	_fp_pack_extword(pfpsd, &lrd.ll, nrd);
1221 	return (ftt_none);
1222 }
1223 
1224 /*
1225  * Simulator for faligndata instruction.
1226  */
1227 static enum ftt_type
1228 vis_faligndata(
1229 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
1230 	fp_inst_type	pinst,	/* FPU instruction to simulate. */
1231 	kfpu_t		*fp)	/* Need to fp to access gsr reg */
1232 {
1233 	uint_t	nrs1, nrs2, nrd;	/* Register number fields. */
1234 	int	i, j, k, ao;
1235 	union {
1236 		uint64_t	ll;
1237 		uint8_t		c[8];
1238 	} lrs1, lrs2, lrd;
1239 	uint64_t r;
1240 
1241 	nrs1 = pinst.rs1;
1242 	nrs2 = pinst.rs2;
1243 	nrd = pinst.rd;
1244 	if ((nrs1 & 1) == 1) 		/* fix register encoding */
1245 		nrs1 = (nrs1 & 0x1e) | 0x20;
1246 	if ((nrs2 & 1) == 1)
1247 		nrs2 = (nrs2 & 0x1e) | 0x20;
1248 	if ((nrd & 1) == 1)
1249 		nrd = (nrd & 0x1e) | 0x20;
1250 
1251 	_fp_unpack_extword(pfpsd, &lrs1.ll, nrs1);
1252 	_fp_unpack_extword(pfpsd, &lrs2.ll, nrs2);
1253 
1254 	r = pfpsd->fp_current_read_gsr(fp);
1255 	ao = (int)GSR_ALIGN(r);
1256 
1257 	for (i = 0, j = ao, k = 0; i <= 7; i++)
1258 		if (j <= 7) {
1259 			lrd.c[i] = lrs1.c[j++];
1260 		} else {
1261 			lrd.c[i] = lrs2.c[k++];
1262 		}
1263 	_fp_pack_extword(pfpsd, &lrd.ll, nrd);
1264 
1265 	return (ftt_none);
1266 }
1267 
1268 /*
1269  * Simulator for bshuffle instruction.
1270  */
1271 static enum ftt_type
1272 vis_bshuffle(
1273 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
1274 	fp_inst_type	pinst,	/* FPU instruction to simulate. */
1275 	kfpu_t		*fp)	/* Need to fp to access gsr reg */
1276 {
1277 	uint_t	nrs1, nrs2, nrd;	/* Register number fields. */
1278 	int	i, j, ao;
1279 	union {
1280 		uint64_t	ll;
1281 		uint8_t		c[8];
1282 	} lrs1, lrs2, lrd;
1283 	uint64_t r;
1284 
1285 	VISINFO_KSTAT(vis_bshuffle);
1286 	nrs1 = pinst.rs1;
1287 	nrs2 = pinst.rs2;
1288 	nrd = pinst.rd;
1289 	if ((nrs1 & 1) == 1) 		/* fix register encoding */
1290 		nrs1 = (nrs1 & 0x1e) | 0x20;
1291 	if ((nrs2 & 1) == 1)
1292 		nrs2 = (nrs2 & 0x1e) | 0x20;
1293 	if ((nrd & 1) == 1)
1294 		nrd = (nrd & 0x1e) | 0x20;
1295 
1296 	_fp_unpack_extword(pfpsd, &lrs1.ll, nrs1);
1297 	_fp_unpack_extword(pfpsd, &lrs2.ll, nrs2);
1298 
1299 	r = pfpsd->fp_current_read_gsr(fp);
1300 	ao = (int)GSR_MASK(r);
1301 
1302 	/*
1303 	 * BSHUFFLE Destination Byte Selection
1304 	 * rd Byte	Source
1305 	 * 0		rs byte[GSR.mask<31..28>]
1306 	 * 1		rs byte[GSR.mask<27..24>]
1307 	 * 2		rs byte[GSR.mask<23..20>]
1308 	 * 3		rs byte[GSR.mask<19..16>]
1309 	 * 4		rs byte[GSR.mask<15..12>]
1310 	 * 5		rs byte[GSR.mask<11..8>]
1311 	 * 6		rs byte[GSR.mask<7..4>]
1312 	 * 7		rs byte[GSR.mask<3..0>]
1313 	 * P.S. rs1 is the upper half and rs2 is the lower half
1314 	 * Bytes in the source value are numbered from most to
1315 	 * least significant
1316 	 */
1317 	for (i = 7; i >= 0; i--, ao = (ao >> 4)) {
1318 		j = ao & 0xf;		/* get byte number */
1319 		if (j < 8) {
1320 			lrd.c[i] = lrs1.c[j];
1321 		} else {
1322 			lrd.c[i] = lrs2.c[j - 8];
1323 		}
1324 	}
1325 	_fp_pack_extword(pfpsd, &lrd.ll, nrd);
1326 
1327 	return (ftt_none);
1328 }
1329 
1330 /*
1331  * Simulator for siam instruction.
1332  */
1333 static enum ftt_type
1334 vis_siam(
1335 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
1336 	vis_inst_type	inst,	/* FPU instruction to simulate. */
1337 	kfpu_t		*fp)	/* Need to fp to access gsr reg */
1338 {
1339 	uint_t	nrs2;			/* Register number fields. */
1340 	uint64_t g, r;
1341 	nrs2 = inst.rs2;
1342 
1343 	g = pfpsd->fp_current_read_gsr(fp);
1344 	g &= ~(GSR_IM_IRND_MASK);	/* zero the IM and IRND fields */
1345 	r = nrs2 & 0x7;			/* get mode(3 bit) */
1346 	g |= (r << GSR_IRND_SHIFT);
1347 	pfpsd->fp_current_write_gsr(g, fp);
1348 	return (ftt_none);
1349 }
1350 
1351 /*
1352  * Simulator for VIS loads and stores between floating-point unit and memory.
1353  */
1354 enum ftt_type
1355 vis_fldst(
1356 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
1357 	fp_inst_type	pinst,	/* FPU instruction to simulate. */
1358 	struct regs	*pregs,	/* Pointer to PCB image of registers. */
1359 	void		*prw,	/* Pointer to locals and ins. */
1360 	uint_t		asi)	/* asi to emulate! */
1361 {
1362 	union {
1363 		vis_inst_type	inst;
1364 		fp_inst_type	pinst;
1365 	} i;
1366 
1367 	ASSERT(USERMODE(pregs->r_tstate));
1368 	i.pinst = pinst;
1369 	switch (asi) {
1370 		case ASI_PST8_P:
1371 		case ASI_PST8_S:
1372 		case ASI_PST16_P:
1373 		case ASI_PST16_S:
1374 		case ASI_PST32_P:
1375 		case ASI_PST32_S:
1376 		case ASI_PST8_PL:
1377 		case ASI_PST8_SL:
1378 		case ASI_PST16_PL:
1379 		case ASI_PST16_SL:
1380 		case ASI_PST32_PL:
1381 		case ASI_PST32_SL:
1382 			return (vis_prtl_fst(pfpsd, i.inst, pregs,
1383 			    prw, asi));
1384 		case ASI_FL8_P:
1385 		case ASI_FL8_S:
1386 		case ASI_FL8_PL:
1387 		case ASI_FL8_SL:
1388 		case ASI_FL16_P:
1389 		case ASI_FL16_S:
1390 		case ASI_FL16_PL:
1391 		case ASI_FL16_SL:
1392 			return (vis_short_fls(pfpsd, i.inst, pregs,
1393 			    prw, asi));
1394 		case ASI_BLK_AIUP:
1395 		case ASI_BLK_AIUS:
1396 		case ASI_BLK_AIUPL:
1397 		case ASI_BLK_AIUSL:
1398 		case ASI_BLK_P:
1399 		case ASI_BLK_S:
1400 		case ASI_BLK_PL:
1401 		case ASI_BLK_SL:
1402 		case ASI_BLK_COMMIT_P:
1403 		case ASI_BLK_COMMIT_S:
1404 			return (vis_blk_fldst(pfpsd, i.inst, pregs,
1405 			    prw, asi));
1406 		default:
1407 			return (ftt_unimplemented);
1408 	}
1409 }
1410 
1411 /*
1412  * Simulator for partial stores between floating-point unit and memory.
1413  */
1414 static enum ftt_type
1415 vis_prtl_fst(
1416 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
1417 	vis_inst_type	inst,	/* ISE instruction to simulate. */
1418 	struct regs	*pregs,	/* Pointer to PCB image of registers. */
1419 	void		*prw,	/* Pointer to locals and ins. */
1420 	uint_t		asi)	/* asi to emulate! */
1421 {
1422 	uint_t	nrs1, nrs2, nrd;	/* Register number fields. */
1423 	uint_t	opf, msk;
1424 	int	h, i, j;
1425 	uint64_t ea, tmsk;
1426 	union {
1427 		freg_type	f;
1428 		uint64_t	ll;
1429 		uint32_t	i[2];
1430 		uint16_t	s[4];
1431 		uint8_t		c[8];
1432 	} k, l, res;
1433 	enum ftt_type   ftt;
1434 
1435 	nrs1 = inst.rs1;
1436 	nrs2 = inst.rs2;
1437 	nrd = inst.rd;
1438 	if ((nrd & 1) == 1) 		/* fix register encoding */
1439 		nrd = (nrd & 0x1e) | 0x20;
1440 	opf = inst.opf;
1441 	res.ll = 0;
1442 	if ((opf & 0x100) == 0) {	/* effective address = rs1  */
1443 		ftt = read_iureg(pfpsd, nrs1, pregs, prw, &ea);
1444 		if (ftt != ftt_none)
1445 			return (ftt);
1446 		ftt = read_iureg(pfpsd, nrs2, pregs, prw, &tmsk);
1447 		if (ftt != ftt_none)
1448 			return (ftt);
1449 		msk = (uint_t)tmsk;
1450 	} else {
1451 		pfpsd->fp_trapaddr = (caddr_t)pregs->r_pc;
1452 		return (ftt_unimplemented);
1453 	}
1454 
1455 	pfpsd->fp_trapaddr = (caddr_t)ea; /* setup bad addr in case we trap */
1456 	if ((ea & 0x3) != 0)
1457 		return (ftt_alignment);	/* Require 32 bit-alignment. */
1458 
1459 	switch (asi) {
1460 	case ASI_PST8_P:
1461 	case ASI_PST8_S:
1462 		ftt = _fp_read_extword((uint64_t *)ea, &l.ll, pfpsd);
1463 		if (ftt != ftt_none)
1464 			return (ftt);
1465 		_fp_unpack_extword(pfpsd, &k.f.FPU_DREG_FIELD, nrd);
1466 		for (i = 0, j = 0x80; i <= 7; i++, j >>= 1) {
1467 			if ((msk & j) == j)
1468 				res.c[i] = k.c[i];
1469 			else
1470 				res.c[i] = l.c[i];
1471 		}
1472 		ftt = _fp_write_extword((uint64_t *)ea, res.ll, pfpsd);
1473 		if (ftt != ftt_none)
1474 			return (ftt);
1475 		break;
1476 	case ASI_PST8_PL:	/* little-endian */
1477 	case ASI_PST8_SL:
1478 		ftt = _fp_read_extword((uint64_t *)ea, &l.ll, pfpsd);
1479 		if (ftt != ftt_none)
1480 			return (ftt);
1481 		_fp_unpack_extword(pfpsd, &k.f.FPU_DREG_FIELD, nrd);
1482 		for (h = 7, i = 0, j = 1; i <= 7; h--, i++, j <<= 1) {
1483 			if ((msk & j) == j)
1484 				res.c[i] = k.c[h];
1485 			else
1486 				res.c[i] = l.c[i];
1487 		}
1488 		ftt = _fp_write_extword((uint64_t *)ea, res.ll, pfpsd);
1489 		if (ftt != ftt_none)
1490 			return (ftt);
1491 		break;
1492 	case ASI_PST16_P:
1493 	case ASI_PST16_S:
1494 		ftt = _fp_read_extword((uint64_t *)ea, &l.ll, pfpsd);
1495 		if (ftt != ftt_none)
1496 			return (ftt);
1497 		_fp_unpack_extword(pfpsd, &k.f.FPU_DREG_FIELD, nrd);
1498 		for (i = 0, j = 0x8; i <= 3; i++, j >>= 1) {
1499 			if ((msk & j) == j)
1500 				res.s[i] = k.s[i];
1501 			else
1502 				res.s[i] = l.s[i];
1503 		}
1504 		ftt = _fp_write_extword((uint64_t *)ea, res.ll, pfpsd);
1505 		if (ftt != ftt_none)
1506 			return (ftt);
1507 		break;
1508 	case ASI_PST16_PL:
1509 	case ASI_PST16_SL:
1510 		ftt = _fp_read_extword((uint64_t *)ea, &l.ll, pfpsd);
1511 		if (ftt != ftt_none)
1512 			return (ftt);
1513 		_fp_unpack_extword(pfpsd, &k.f.FPU_DREG_FIELD, nrd);
1514 		for (h = 7, i = 0, j = 1; i <= 6; h -= 2, i += 2, j <<= 1) {
1515 			if ((msk & j) == j) {
1516 				res.c[i] = k.c[h];
1517 				res.c[i+1] = k.c[h-1];
1518 			} else {
1519 				res.c[i] = l.c[i];
1520 				res.c[i+1] = l.c[i+1];
1521 			}
1522 		}
1523 		ftt = _fp_write_extword((uint64_t *)ea, res.ll, pfpsd);
1524 		if (ftt != ftt_none)
1525 			return (ftt);
1526 		break;
1527 	case ASI_PST32_P:
1528 	case ASI_PST32_S:
1529 		ftt = _fp_read_extword((uint64_t *)ea, &l.ll, pfpsd);
1530 		if (ftt != ftt_none)
1531 			return (ftt);
1532 		_fp_unpack_extword(pfpsd, &k.f.FPU_DREG_FIELD, nrd);
1533 		for (i = 0, j = 0x2; i <= 1; i++, j >>= 1) {
1534 			if ((msk & j) == j)
1535 				res.i[i] = k.i[i];
1536 			else
1537 				res.i[i] = l.i[i];
1538 		}
1539 		ftt = _fp_write_extword((uint64_t *)ea, res.ll, pfpsd);
1540 		if (ftt != ftt_none)
1541 			return (ftt);
1542 		break;
1543 	case ASI_PST32_PL:
1544 	case ASI_PST32_SL:
1545 		ftt = _fp_read_extword((uint64_t *)ea, &l.ll, pfpsd);
1546 		if (ftt != ftt_none)
1547 			return (ftt);
1548 		_fp_unpack_extword(pfpsd, &k.f.FPU_DREG_FIELD, nrd);
1549 		for (h = 7, i = 0, j = 1; i <= 4; h -= 4, i += 4, j <<= 1) {
1550 			if ((msk & j) == j) {
1551 				res.c[i] = k.c[h];
1552 				res.c[i+1] = k.c[h-1];
1553 				res.c[i+2] = k.c[h-2];
1554 				res.c[i+3] = k.c[h-3];
1555 			} else {
1556 				res.c[i] = l.c[i];
1557 				res.c[i+1] = l.c[i+1];
1558 				res.c[i+2] = l.c[i+2];
1559 				res.c[i+3] = l.c[i+3];
1560 			}
1561 		}
1562 		ftt = _fp_write_extword((uint64_t *)ea, res.ll, pfpsd);
1563 		if (ftt != ftt_none)
1564 			return (ftt);
1565 		break;
1566 	}
1567 
1568 	pregs->r_pc = pregs->r_npc;	/* Do not retry emulated instruction. */
1569 	pregs->r_npc += 4;
1570 	return (ftt_none);
1571 }
1572 
1573 /*
1574  * Simulator for short load/stores between floating-point unit and memory.
1575  */
1576 static enum ftt_type
1577 vis_short_fls(
1578 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
1579 	vis_inst_type	inst,	/* ISE instruction to simulate. */
1580 	struct regs	*pregs,	/* Pointer to PCB image of registers. */
1581 	void		*prw,	/* Pointer to locals and ins. */
1582 	uint_t		asi)	/* asi to emulate! */
1583 {
1584 	uint_t	nrs1, nrs2, nrd;	/* Register number fields. */
1585 	uint_t	opf;
1586 	uint64_t ea, tea;
1587 	union {
1588 		freg_type	f;
1589 		uint64_t	ll;
1590 		uint32_t	i[2];
1591 		uint16_t	s[4];
1592 		uint8_t		c[8];
1593 	} k;
1594 	union {
1595 		vis_inst_type	inst;
1596 		int		i;
1597 	} fp;
1598 	enum ftt_type   ftt = ftt_none;
1599 	ushort_t us;
1600 	uchar_t uc;
1601 
1602 	nrs1 = inst.rs1;
1603 	nrs2 = inst.rs2;
1604 	nrd = inst.rd;
1605 	if ((nrd & 1) == 1) 		/* fix register encoding */
1606 		nrd = (nrd & 0x1e) | 0x20;
1607 	opf = inst.opf;
1608 	fp.inst = inst;
1609 	if ((opf & 0x100) == 0) { /* effective address = rs1 + rs2 */
1610 		ftt = read_iureg(pfpsd, nrs1, pregs, prw, &ea);
1611 		if (ftt != ftt_none)
1612 			return (ftt);
1613 		ftt = read_iureg(pfpsd, nrs2, pregs, prw, &tea);
1614 		if (ftt != ftt_none)
1615 			return (ftt);
1616 		ea += tea;
1617 	} else {	/* effective address = rs1 + imm13 */
1618 		fp.inst = inst;
1619 		ea = (fp.i << 19) >> 19;	/* Extract simm13 field. */
1620 		ftt = read_iureg(pfpsd, nrs1, pregs, prw, &tea);
1621 		if (ftt != ftt_none)
1622 			return (ftt);
1623 		ea += tea;
1624 	}
1625 	if (get_udatamodel() == DATAMODEL_ILP32)
1626 		ea = (uint64_t)(caddr32_t)ea;
1627 
1628 	pfpsd->fp_trapaddr = (caddr_t)ea; /* setup bad addr in case we trap */
1629 	switch (asi) {
1630 	case ASI_FL8_P:
1631 	case ASI_FL8_S:
1632 	case ASI_FL8_PL:		/* little-endian */
1633 	case ASI_FL8_SL:
1634 		if ((inst.op3 & 7) == 3) {	/* load byte */
1635 			if (fuword8((void *)ea, &uc) == -1)
1636 				return (ftt_fault);
1637 			k.ll = 0;
1638 			k.c[7] = uc;
1639 			_fp_pack_extword(pfpsd, &k.f.FPU_DREG_FIELD, nrd);
1640 		} else {			/* store byte */
1641 			_fp_unpack_extword(pfpsd, &k.f.FPU_DREG_FIELD, nrd);
1642 			uc = k.c[7];
1643 			if (subyte((caddr_t)ea, uc) == -1)
1644 				return (ftt_fault);
1645 		}
1646 		break;
1647 	case ASI_FL16_P:
1648 	case ASI_FL16_S:
1649 		if ((ea & 1) == 1)
1650 			return (ftt_alignment);
1651 		if ((inst.op3 & 7) == 3) {	/* load short */
1652 			if (fuword16((void *)ea, &us) == -1)
1653 				return (ftt_fault);
1654 			k.ll = 0;
1655 			k.s[3] = us;
1656 			_fp_pack_extword(pfpsd, &k.f.FPU_DREG_FIELD, nrd);
1657 		} else {			/* store short */
1658 			_fp_unpack_extword(pfpsd, &k.f.FPU_DREG_FIELD, nrd);
1659 			us = k.s[3];
1660 			if (suword16((caddr_t)ea, us) == -1)
1661 				return (ftt_fault);
1662 		}
1663 		break;
1664 	case ASI_FL16_PL:		/* little-endian */
1665 	case ASI_FL16_SL:
1666 		if ((ea & 1) == 1)
1667 			return (ftt_alignment);
1668 		if ((inst.op3 & 7) == 3) {	/* load short */
1669 			if (fuword16((void *)ea, &us) == -1)
1670 				return (ftt_fault);
1671 			k.ll = 0;
1672 			k.c[6] = (uchar_t)us;
1673 			k.c[7] = (uchar_t)((us & 0xff00) >> 8);
1674 			_fp_pack_extword(pfpsd, &k.f.FPU_DREG_FIELD, nrd);
1675 		} else {			/* store short */
1676 			_fp_unpack_extword(pfpsd, &k.f.FPU_DREG_FIELD, nrd);
1677 			uc = k.c[7];
1678 			us = (ushort_t)((uc << 8) | k.c[6]);
1679 			if (suword16((void *)ea, us) == -1)
1680 				return (ftt_fault);
1681 		}
1682 		break;
1683 	}
1684 
1685 	pregs->r_pc = pregs->r_npc;	/* Do not retry emulated instruction. */
1686 	pregs->r_npc += 4;
1687 	return (ftt_none);
1688 }
1689 
1690 /*
1691  * Simulator for block loads and stores between floating-point unit and memory.
1692  * We pass the addrees of ea to sync_data_memory() to flush the Ecache.
1693  * Sync_data_memory() calls platform dependent code to flush the Ecache.
1694  */
1695 static enum ftt_type
1696 vis_blk_fldst(
1697 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
1698 	vis_inst_type	inst,	/* ISE instruction to simulate. */
1699 	struct regs	*pregs,	/* Pointer to PCB image of registers. */
1700 	void		*prw,	/* Pointer to locals and ins. */
1701 	uint_t		asi)	/* asi to emulate! */
1702 {
1703 	uint_t	nrs1, nrs2, nrd;	/* Register number fields. */
1704 	uint_t	opf, h, i, j;
1705 	uint64_t ea, tea;
1706 	union {
1707 		freg_type	f;
1708 		uint64_t	ll;
1709 		uint8_t		c[8];
1710 	} k, l;
1711 	union {
1712 		vis_inst_type	inst;
1713 		int32_t		i;
1714 	} fp;
1715 	enum ftt_type   ftt;
1716 	boolean_t little_endian = B_FALSE;
1717 
1718 	nrs1 = inst.rs1;
1719 	nrs2 = inst.rs2;
1720 	nrd = inst.rd;
1721 	if ((nrd & 1) == 1) 		/* fix register encoding */
1722 		nrd = (nrd & 0x1e) | 0x20;
1723 
1724 	/* ensure register is 8-double precision aligned */
1725 	if ((nrd & 0xf) != 0)
1726 		return (ftt_unimplemented);
1727 
1728 	opf = inst.opf;
1729 	if ((opf & 0x100) == 0) { 	/* effective address = rs1 + rs2 */
1730 		ftt = read_iureg(pfpsd, nrs1, pregs, prw, &ea);
1731 		if (ftt != ftt_none)
1732 			return (ftt);
1733 		ftt = read_iureg(pfpsd, nrs2, pregs, prw, &tea);
1734 		if (ftt != ftt_none)
1735 			return (ftt);
1736 		ea += tea;
1737 	} else {			/* effective address = rs1 + imm13 */
1738 		fp.inst = inst;
1739 		ea = (fp.i << 19) >> 19;	/* Extract simm13 field. */
1740 		ftt = read_iureg(pfpsd, nrs1, pregs, prw, &tea);
1741 		if (ftt != ftt_none)
1742 			return (ftt);
1743 		ea += tea;
1744 	}
1745 	if ((ea & 0x3F) != 0)		/* Require 64 byte-alignment. */
1746 		return (ftt_alignment);
1747 
1748 	pfpsd->fp_trapaddr = (caddr_t)ea; /* setup bad addr in case we trap */
1749 	switch (asi) {
1750 	case ASI_BLK_AIUPL:
1751 	case ASI_BLK_AIUSL:
1752 	case ASI_BLK_PL:
1753 	case ASI_BLK_SL:
1754 		little_endian = B_TRUE;
1755 		/* FALLTHROUGH */
1756 	case ASI_BLK_AIUP:
1757 	case ASI_BLK_AIUS:
1758 	case ASI_BLK_P:
1759 	case ASI_BLK_S:
1760 	case ASI_BLK_COMMIT_P:
1761 	case ASI_BLK_COMMIT_S:
1762 		if ((inst.op3 & 7) == 3) {	/* lddf */
1763 			for (i = 0; i < 8; i++, nrd += 2) {
1764 				ftt = _fp_read_extword((uint64_t *)ea, &k.ll,
1765 				    pfpsd);
1766 				if (ftt != ftt_none)
1767 					return (ftt);
1768 				if (little_endian) {
1769 					for (j = 0, h = 7; j < 8; j++, h--)
1770 						l.c[h] = k.c[j];
1771 					k.ll = l.ll;
1772 				}
1773 				_fp_pack_extword(pfpsd, &k.f.FPU_DREG_FIELD,
1774 				    nrd);
1775 				ea += 8;
1776 			}
1777 		} else {			/* stdf */
1778 			for (i = 0; i < 8; i++, nrd += 2) {
1779 				_fp_unpack_extword(pfpsd, &k.f.FPU_DREG_FIELD,
1780 				    nrd);
1781 				if (little_endian) {
1782 					for (j = 0, h = 7; j < 8; j++, h--)
1783 						l.c[h] = k.c[j];
1784 					k.ll = l.ll;
1785 				}
1786 				ftt = _fp_write_extword((uint64_t *)ea, k.ll,
1787 				    pfpsd);
1788 				if (ftt != ftt_none)
1789 					return (ftt);
1790 				ea += 8;
1791 			}
1792 		}
1793 		if ((asi == ASI_BLK_COMMIT_P) || (asi == ASI_BLK_COMMIT_S))
1794 			sync_data_memory((caddr_t)(ea - 64), 64);
1795 		break;
1796 	default:
1797 		/* addr of unimp inst */
1798 		pfpsd->fp_trapaddr = (caddr_t)pregs->r_pc;
1799 		return (ftt_unimplemented);
1800 	}
1801 
1802 	pregs->r_pc = pregs->r_npc;	/* Do not retry emulated instruction. */
1803 	pregs->r_npc += 4;
1804 	return (ftt_none);
1805 }
1806 
1807 /*
1808  * Simulator for rd %gsr instruction.
1809  */
1810 enum ftt_type
1811 vis_rdgsr(
1812 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
1813 	fp_inst_type	pinst,	/* FPU instruction to simulate. */
1814 	struct regs	*pregs,	/* Pointer to PCB image of registers. */
1815 	void		*prw,	/* Pointer to locals and ins. */
1816 	kfpu_t		*fp)	/* Need to fp to access gsr reg */
1817 {
1818 	uint_t nrd;
1819 	uint64_t r;
1820 	enum ftt_type ftt = ftt_none;
1821 
1822 	nrd = pinst.rd;
1823 
1824 	r = pfpsd->fp_current_read_gsr(fp);
1825 	ftt = write_iureg(pfpsd, nrd, pregs, prw, &r);
1826 	pregs->r_pc = pregs->r_npc;	/* Do not retry emulated instruction. */
1827 	pregs->r_npc += 4;
1828 	return (ftt);
1829 }
1830 
1831 /*
1832  * Simulator for wr %gsr instruction.
1833  */
1834 enum ftt_type
1835 vis_wrgsr(
1836 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
1837 	fp_inst_type	pinst,	/* FPU instruction to simulate. */
1838 	struct regs	*pregs,	/* Pointer to PCB image of registers. */
1839 	void		*prw,	/* Pointer to locals and ins. */
1840 	kfpu_t		*fp)	/* Need to fp to access gsr reg */
1841 {
1842 	uint_t nrs1;
1843 	uint64_t r, r1, r2;
1844 	enum ftt_type ftt = ftt_none;
1845 
1846 	nrs1 = pinst.rs1;
1847 	ftt = read_iureg(pfpsd, nrs1, pregs, prw, &r1);
1848 	if (ftt != ftt_none)
1849 		return (ftt);
1850 	if (pinst.ibit == 0) {	/* copy the value in r[rs2] */
1851 		uint_t nrs2;
1852 
1853 		nrs2 = pinst.rs2;
1854 		ftt = read_iureg(pfpsd, nrs2, pregs, prw, &r2);
1855 		if (ftt != ftt_none)
1856 			return (ftt);
1857 	} else {	/* use sign_ext(simm13) */
1858 		union {
1859 			fp_inst_type	inst;
1860 			uint32_t	i;
1861 		} fp;
1862 
1863 		fp.inst = pinst;		/* Extract simm13 field */
1864 		r2 = (fp.i << 19) >> 19;
1865 	}
1866 	r = r1 ^ r2;
1867 	pfpsd->fp_current_write_gsr(r, fp);
1868 	pregs->r_pc = pregs->r_npc;	/* Do not retry emulated instruction. */
1869 	pregs->r_npc += 4;
1870 	return (ftt);
1871 }
1872 
1873 /*
1874  * This is the loadable module wrapper.
1875  */
1876 #include <sys/errno.h>
1877 #include <sys/modctl.h>
1878 
1879 /*
1880  * Module linkage information for the kernel.
1881  */
1882 extern struct mod_ops mod_miscops;
1883 
1884 static struct modlmisc modlmisc = {
1885 	&mod_miscops,
1886 	"vis fp simulation",
1887 };
1888 
1889 static struct modlinkage modlinkage = {
1890 	MODREV_1, (void *)&modlmisc, NULL
1891 };
1892 
1893 int
1894 _init(void)
1895 {
1896 	return (mod_install(&modlinkage));
1897 }
1898 
1899 int
1900 _info(struct modinfo *modinfop)
1901 {
1902 	return (mod_info(&modlinkage, modinfop));
1903 }
1904