xref: /illumos-gate/usr/src/uts/sun4/os/visinstr.c (revision b1593d50e783f7d66722dde093752b74ffa95176)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /* VIS floating point instruction simulator for Sparc FPU simulator. */
27 
28 #include <sys/types.h>
29 #include <sys/systm.h>
30 #include <sys/fpu/fpusystm.h>
31 #include <sys/fpu/fpu_simulator.h>
32 #include <sys/vis_simulator.h>
33 #include <sys/fpu/globals.h>
34 #include <sys/privregs.h>
35 #include <sys/sun4asi.h>
36 #include <sys/machasi.h>
37 #include <sys/debug.h>
38 #include <sys/cpu_module.h>
39 #include <sys/systm.h>
40 
41 #define	FPU_REG_FIELD uint32_reg	/* Coordinate with FPU_REGS_TYPE. */
42 #define	FPU_DREG_FIELD uint64_reg	/* Coordinate with FPU_DREGS_TYPE. */
43 #define	FPU_FSR_FIELD uint64_reg	/* Coordinate with V9_FPU_FSR_TYPE. */
44 
45 extern	uint_t	get_subcc_ccr(uint64_t, uint64_t);
46 
47 static enum ftt_type vis_array(fp_simd_type *, vis_inst_type, struct regs *,
48 				void *);
49 static enum ftt_type vis_alignaddr(fp_simd_type *, vis_inst_type,
50 				struct regs *, void *, kfpu_t *);
51 static enum ftt_type vis_edge(fp_simd_type *, vis_inst_type, struct regs *,
52 				void *);
53 static enum ftt_type vis_faligndata(fp_simd_type *, fp_inst_type,
54 				kfpu_t *);
55 static enum ftt_type vis_bmask(fp_simd_type *, vis_inst_type, struct regs *,
56 				void *, kfpu_t *);
57 static enum ftt_type vis_bshuffle(fp_simd_type *, fp_inst_type,
58 				kfpu_t *);
59 static enum ftt_type vis_siam(fp_simd_type *, vis_inst_type, kfpu_t *);
60 static enum ftt_type vis_fcmp(fp_simd_type *, vis_inst_type, struct regs *,
61 				void *);
62 static enum ftt_type vis_fmul(fp_simd_type *, vis_inst_type);
63 static enum ftt_type vis_fpixel(fp_simd_type *, vis_inst_type, kfpu_t *);
64 static enum ftt_type vis_fpaddsub(fp_simd_type *, vis_inst_type);
65 static enum ftt_type vis_pdist(fp_simd_type *, fp_inst_type, struct regs *,
66 				void *, uint_t);
67 static enum ftt_type vis_prtl_fst(fp_simd_type *, vis_inst_type, struct regs *,
68 				void *, uint_t);
69 static enum ftt_type vis_short_fls(fp_simd_type *, vis_inst_type,
70 				struct regs *, void *, uint_t);
71 static enum ftt_type vis_blk_fldst(fp_simd_type *, vis_inst_type,
72 				struct regs *, void *, uint_t);
73 
74 /*
75  * Simulator for VIS instructions with op3 == 0x36 that get fp_disabled
76  * traps.
77  */
78 enum ftt_type
79 vis_fpu_simulator(
80 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
81 	fp_inst_type	pinst,	/* FPU instruction to simulate. */
82 	struct regs	*pregs,	/* Pointer to PCB image of registers. */
83 	void		*prw,	/* Pointer to locals and ins. */
84 	kfpu_t		*fp)	/* Need to fp to access gsr reg */
85 {
86 	uint_t	nrs1, nrs2, nrd;	/* Register number fields. */
87 	uint_t	us1, us2, usr;
88 	uint64_t lus1, lus2, lusr;
89 	enum ftt_type ftt = ftt_none;
90 	union {
91 		vis_inst_type	inst;
92 		fp_inst_type	pinst;
93 	} f;
94 
95 	ASSERT(USERMODE(pregs->r_tstate));
96 	nrs1 = pinst.rs1;
97 	nrs2 = pinst.rs2;
98 	nrd = pinst.rd;
99 	f.pinst = pinst;
100 	if ((f.inst.opf & 1) == 0) {		/* double precision */
101 		if ((nrs1 & 1) == 1) 		/* fix register encoding */
102 			nrs1 = (nrs1 & 0x1e) | 0x20;
103 		if ((nrs2 & 1) == 1)
104 			nrs2 = (nrs2 & 0x1e) | 0x20;
105 		if ((nrd & 1) == 1)
106 			nrd = (nrd & 0x1e) | 0x20;
107 	}
108 
109 	switch (f.inst.opf) {
110 		/* these instr's do not use fp regs */
111 	case edge8:
112 	case edge8l:
113 	case edge8n:
114 	case edge8ln:
115 	case edge16:
116 	case edge16l:
117 	case edge16n:
118 	case edge16ln:
119 	case edge32:
120 	case edge32l:
121 	case edge32n:
122 	case edge32ln:
123 		ftt = vis_edge(pfpsd, f.inst, pregs, prw);
124 		break;
125 	case array8:
126 	case array16:
127 	case array32:
128 		ftt = vis_array(pfpsd, f.inst, pregs, prw);
129 		break;
130 	case alignaddr:
131 	case alignaddrl:
132 		ftt = vis_alignaddr(pfpsd, f.inst, pregs, prw, fp);
133 		break;
134 	case bmask:
135 		ftt = vis_bmask(pfpsd, f.inst, pregs, prw, fp);
136 		break;
137 	case fcmple16:
138 	case fcmpne16:
139 	case fcmpgt16:
140 	case fcmpeq16:
141 	case fcmple32:
142 	case fcmpne32:
143 	case fcmpgt32:
144 	case fcmpeq32:
145 		ftt = vis_fcmp(pfpsd, f.inst, pregs, prw);
146 		break;
147 	case fmul8x16:
148 	case fmul8x16au:
149 	case fmul8x16al:
150 	case fmul8sux16:
151 	case fmul8ulx16:
152 	case fmuld8sux16:
153 	case fmuld8ulx16:
154 		ftt = vis_fmul(pfpsd, f.inst);
155 		break;
156 	case fpack16:
157 	case fpack32:
158 	case fpackfix:
159 	case fexpand:
160 	case fpmerge:
161 		ftt = vis_fpixel(pfpsd, f.inst, fp);
162 		break;
163 	case pdist:
164 	case pdistn:
165 		ftt = vis_pdist(pfpsd, pinst, pregs, prw, f.inst.opf);
166 		break;
167 	case faligndata:
168 		ftt = vis_faligndata(pfpsd, pinst, fp);
169 		break;
170 	case bshuffle:
171 		ftt = vis_bshuffle(pfpsd, pinst, fp);
172 		break;
173 	case fpadd16:
174 	case fpadd16s:
175 	case fpadd32:
176 	case fpadd32s:
177 	case fpsub16:
178 	case fpsub16s:
179 	case fpsub32:
180 	case fpsub32s:
181 		ftt = vis_fpaddsub(pfpsd, f.inst);
182 		break;
183 	case fzero:
184 		lusr = 0;
185 		_fp_pack_extword(pfpsd, &lusr, nrd);
186 		break;
187 	case fzeros:
188 		usr = 0;
189 		_fp_pack_word(pfpsd, &usr, nrd);
190 		break;
191 	case fnor:
192 		_fp_unpack_extword(pfpsd, &lus1, nrs1);
193 		_fp_unpack_extword(pfpsd, &lus2, nrs2);
194 		lusr = ~(lus1 | lus2);
195 		_fp_pack_extword(pfpsd, &lusr, nrd);
196 		break;
197 	case fnors:
198 		_fp_unpack_word(pfpsd, &us1, nrs1);
199 		_fp_unpack_word(pfpsd, &us2, nrs2);
200 		usr = ~(us1 | us2);
201 		_fp_pack_word(pfpsd, &usr, nrd);
202 		break;
203 	case fandnot2:
204 		_fp_unpack_extword(pfpsd, &lus1, nrs1);
205 		_fp_unpack_extword(pfpsd, &lus2, nrs2);
206 		lusr = (lus1 & ~lus2);
207 		_fp_pack_extword(pfpsd, &lusr, nrd);
208 		break;
209 	case fandnot2s:
210 		_fp_unpack_word(pfpsd, &us1, nrs1);
211 		_fp_unpack_word(pfpsd, &us2, nrs2);
212 		usr = (us1 & ~us2);
213 		_fp_pack_word(pfpsd, &usr, nrd);
214 		break;
215 	case fnot2:
216 		_fp_unpack_extword(pfpsd, &lus2, nrs2);
217 		lusr = ~lus2;
218 		_fp_pack_extword(pfpsd, &lusr, nrd);
219 		break;
220 	case fnot2s:
221 		_fp_unpack_word(pfpsd, &us2, nrs2);
222 		usr = ~us2;
223 		_fp_pack_word(pfpsd, &usr, nrd);
224 		break;
225 	case fandnot1:
226 		_fp_unpack_extword(pfpsd, &lus1, nrs1);
227 		_fp_unpack_extword(pfpsd, &lus2, nrs2);
228 		lusr = (~lus1 & lus2);
229 		_fp_pack_extword(pfpsd, &lusr, nrd);
230 		break;
231 	case fandnot1s:
232 		_fp_unpack_word(pfpsd, &us1, nrs1);
233 		_fp_unpack_word(pfpsd, &us2, nrs2);
234 		usr = (~us1 & us2);
235 		_fp_pack_word(pfpsd, &usr, nrd);
236 		break;
237 	case fnot1:
238 		_fp_unpack_extword(pfpsd, &lus1, nrs1);
239 		lusr = ~lus1;
240 		_fp_pack_extword(pfpsd, &lusr, nrd);
241 		break;
242 	case fnot1s:
243 		_fp_unpack_word(pfpsd, &us1, nrs1);
244 		usr = ~us1;
245 		_fp_pack_word(pfpsd, &usr, nrd);
246 		break;
247 	case fxor:
248 		_fp_unpack_extword(pfpsd, &lus1, nrs1);
249 		_fp_unpack_extword(pfpsd, &lus2, nrs2);
250 		lusr = (lus1 ^ lus2);
251 		_fp_pack_extword(pfpsd, &lusr, nrd);
252 		break;
253 	case fxors:
254 		_fp_unpack_word(pfpsd, &us1, nrs1);
255 		_fp_unpack_word(pfpsd, &us2, nrs2);
256 		usr = (us1 ^ us2);
257 		_fp_pack_word(pfpsd, &usr, nrd);
258 		break;
259 	case fnand:
260 		_fp_unpack_extword(pfpsd, &lus1, nrs1);
261 		_fp_unpack_extword(pfpsd, &lus2, nrs2);
262 		lusr = ~(lus1 & lus2);
263 		_fp_pack_extword(pfpsd, &lusr, nrd);
264 		break;
265 	case fnands:
266 		_fp_unpack_word(pfpsd, &us1, nrs1);
267 		_fp_unpack_word(pfpsd, &us2, nrs2);
268 		usr = ~(us1 & us2);
269 		_fp_pack_word(pfpsd, &usr, nrd);
270 		break;
271 	case fand:
272 		_fp_unpack_extword(pfpsd, &lus1, nrs1);
273 		_fp_unpack_extword(pfpsd, &lus2, nrs2);
274 		lusr = (lus1 & lus2);
275 		_fp_pack_extword(pfpsd, &lusr, nrd);
276 		break;
277 	case fands:
278 		_fp_unpack_word(pfpsd, &us1, nrs1);
279 		_fp_unpack_word(pfpsd, &us2, nrs2);
280 		usr = (us1 & us2);
281 		_fp_pack_word(pfpsd, &usr, nrd);
282 		break;
283 	case fxnor:
284 		_fp_unpack_extword(pfpsd, &lus1, nrs1);
285 		_fp_unpack_extword(pfpsd, &lus2, nrs2);
286 		lusr = ~(lus1 ^ lus2);
287 		_fp_pack_extword(pfpsd, &lusr, nrd);
288 		break;
289 	case fxnors:
290 		_fp_unpack_word(pfpsd, &us1, nrs1);
291 		_fp_unpack_word(pfpsd, &us2, nrs2);
292 		usr = ~(us1 ^ us2);
293 		_fp_pack_word(pfpsd, &usr, nrd);
294 		break;
295 	case fsrc1:
296 		_fp_unpack_extword(pfpsd, &lusr, nrs1);
297 		_fp_pack_extword(pfpsd, &lusr, nrd);
298 		break;
299 	case fsrc1s:
300 		_fp_unpack_word(pfpsd, &usr, nrs1);
301 		_fp_pack_word(pfpsd, &usr, nrd);
302 		break;
303 	case fornot2:
304 		_fp_unpack_extword(pfpsd, &lus1, nrs1);
305 		_fp_unpack_extword(pfpsd, &lus2, nrs2);
306 		lusr = (lus1 | ~lus2);
307 		_fp_pack_extword(pfpsd, &lusr, nrd);
308 		break;
309 	case fornot2s:
310 		_fp_unpack_word(pfpsd, &us1, nrs1);
311 		_fp_unpack_word(pfpsd, &us2, nrs2);
312 		usr = (us1 | ~us2);
313 		_fp_pack_word(pfpsd, &usr, nrd);
314 		break;
315 	case fsrc2:
316 		_fp_unpack_extword(pfpsd, &lusr, nrs2);
317 		_fp_pack_extword(pfpsd, &lusr, nrd);
318 		break;
319 	case fsrc2s:
320 		_fp_unpack_word(pfpsd, &usr, nrs2);
321 		_fp_pack_word(pfpsd, &usr, nrd);
322 		break;
323 	case fornot1:
324 		_fp_unpack_extword(pfpsd, &lus1, nrs1);
325 		_fp_unpack_extword(pfpsd, &lus2, nrs2);
326 		lusr = (~lus1 | lus2);
327 		_fp_pack_extword(pfpsd, &lusr, nrd);
328 		break;
329 	case fornot1s:
330 		_fp_unpack_word(pfpsd, &us1, nrs1);
331 		_fp_unpack_word(pfpsd, &us2, nrs2);
332 		usr = (~us1 | us2);
333 		_fp_pack_word(pfpsd, &usr, nrd);
334 		break;
335 	case for_op:
336 		_fp_unpack_extword(pfpsd, &lus1, nrs1);
337 		_fp_unpack_extword(pfpsd, &lus2, nrs2);
338 		lusr = (lus1 | lus2);
339 		_fp_pack_extword(pfpsd, &lusr, nrd);
340 		break;
341 	case fors_op:
342 		_fp_unpack_word(pfpsd, &us1, nrs1);
343 		_fp_unpack_word(pfpsd, &us2, nrs2);
344 		usr = (us1 | us2);
345 		_fp_pack_word(pfpsd, &usr, nrd);
346 		break;
347 	case fone:
348 		lusr = 0xffffffffffffffff;
349 		_fp_pack_extword(pfpsd, &lusr, nrd);
350 		break;
351 	case fones:
352 		usr = 0xffffffffUL;
353 		_fp_pack_word(pfpsd, &usr, nrd);
354 		break;
355 	case siam:
356 		ftt = vis_siam(pfpsd, f.inst, fp);
357 		break;
358 	default:
359 		return (ftt_unimplemented);
360 	}
361 
362 	pregs->r_pc = pregs->r_npc;	/* Do not retry emulated instruction. */
363 	pregs->r_npc += 4;
364 	return (ftt);
365 }
366 
367 /*
368  * Simulator for edge instructions
369  */
370 static enum ftt_type
371 vis_edge(
372 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
373 	vis_inst_type	inst,	/* FPU instruction to simulate. */
374 	struct regs	*pregs,	/* Pointer to PCB image of registers. */
375 	void		*prw)	/* Pointer to locals and ins. */
376 
377 {
378 	uint_t	nrs1, nrs2, nrd;	/* Register number fields. */
379 	enum ftt_type ftt;
380 	uint64_t addrl, addrr, mask;
381 	uint64_t ah61l, ah61r;		/* Higher 61 bits of address */
382 	int al3l, al3r;			/* Lower 3 bits of address */
383 	uint_t	ccr;
384 
385 	nrs1 = inst.rs1;
386 	nrs2 = inst.rs2;
387 	nrd = inst.rd;
388 
389 	ftt = read_iureg(pfpsd, nrs1, pregs, prw, &addrl);
390 	if (ftt != ftt_none)
391 		return (ftt);
392 	ftt = read_iureg(pfpsd, nrs2, pregs, prw, &addrr);
393 	if (ftt != ftt_none)
394 		return (ftt);
395 
396 	/* Test PSTATE.AM to determine 32-bit vs 64-bit addressing */
397 	if ((pregs->r_tstate & TSTATE_AM) != 0) {
398 		ah61l = addrl & 0xfffffff8;
399 		ah61r = addrr & 0xfffffff8;
400 	} else {
401 		ah61l = addrl & ~0x7;
402 		ah61r = addrr & ~0x7;
403 	}
404 
405 
406 	switch (inst.opf) {
407 	case edge8:
408 	case edge8n:
409 	case edge8l:
410 	case edge8ln:
411 		al3l = addrl & 0x7;
412 		switch (inst.opf) {
413 		case edge8:
414 		case edge8n:
415 			if (inst.opf == edge8) {
416 				VISINFO_KSTAT(vis_edge8);
417 			} else {
418 				VISINFO_KSTAT(vis_edge8n);
419 			}
420 			mask = 0xff >> al3l;
421 			if (ah61l == ah61r) {
422 				al3r = addrr & 0x7;
423 				mask &= (0xff << (0x7 - al3r)) & 0xff;
424 			}
425 			break;
426 		case edge8l:
427 		case edge8ln:
428 			if (inst.opf == edge8l) {
429 				VISINFO_KSTAT(vis_edge8l);
430 			} else {
431 				VISINFO_KSTAT(vis_edge8ln);
432 			}
433 			mask = (0xff << al3l) & 0xff;
434 			if (ah61l == ah61r) {
435 				al3r = addrr & 0x7;
436 				mask &= 0xff >> (0x7 - al3r);
437 			}
438 			break;
439 		}
440 		break;
441 	case edge16:
442 	case edge16l:
443 	case edge16n:
444 	case edge16ln:
445 		al3l = addrl & 0x6;
446 		al3l >>= 0x1;
447 		switch (inst.opf) {
448 		case edge16:
449 		case edge16n:
450 			if (inst.opf == edge16) {
451 				VISINFO_KSTAT(vis_edge16);
452 
453 			} else {
454 				VISINFO_KSTAT(vis_edge16n);
455 			}
456 			mask = 0xf >> al3l;
457 			if (ah61l == ah61r) {
458 				al3r = addrr & 0x6;
459 				al3r >>= 0x1;
460 				mask &= (0xf << (0x3 - al3r)) & 0xf;
461 			}
462 			break;
463 		case edge16l:
464 		case edge16ln:
465 			if (inst.opf == edge16l) {
466 				VISINFO_KSTAT(vis_edge16l);
467 
468 			} else {
469 				VISINFO_KSTAT(vis_edge16ln);
470 			}
471 
472 			mask = (0xf << al3l) & 0xf;
473 			if (ah61l == ah61r) {
474 				al3r = addrr & 0x6;
475 				al3r >>= 0x1;
476 				mask &= 0xf >> (0x3 - al3r);
477 			}
478 			break;
479 		}
480 		break;
481 	case edge32:
482 	case edge32l:
483 	case edge32n:
484 	case edge32ln:
485 		al3l = addrl & 0x4;
486 		al3l >>= 0x2;
487 
488 		switch (inst.opf) {
489 		case edge32:
490 		case edge32n:
491 			if (inst.opf == edge32) {
492 				VISINFO_KSTAT(vis_edge32);
493 
494 			} else {
495 				VISINFO_KSTAT(vis_edge32n);
496 			}
497 			mask = 0x3 >> al3l;
498 			if (ah61l == ah61r) {
499 				al3r = addrr & 0x4;
500 				al3r >>= 0x2;
501 				mask &= (0x3 << (0x1 - al3r)) & 0x3;
502 			}
503 			break;
504 		case edge32l:
505 		case edge32ln:
506 			if (inst.opf == edge32l) {
507 				VISINFO_KSTAT(vis_edge32l);
508 
509 			} else {
510 				VISINFO_KSTAT(vis_edge32ln);
511 			}
512 			mask = (0x3 << al3l) & 0x3;
513 			if (ah61l == ah61r) {
514 				al3r = addrr & 0x4;
515 				al3r >>= 0x2;
516 				mask &= 0x3 >> (0x1 - al3r);
517 			}
518 			break;
519 		}
520 		break;
521 	}
522 
523 	ftt = write_iureg(pfpsd, nrd, pregs, prw, &mask);
524 
525 	switch (inst.opf) {
526 	case edge8:
527 	case edge8l:
528 	case edge16:
529 	case edge16l:
530 	case edge32:
531 	case edge32l:
532 
533 		/* Update flags per SUBcc outcome */
534 		pregs->r_tstate &= ~((uint64_t)TSTATE_CCR_MASK
535 					<< TSTATE_CCR_SHIFT);
536 		ccr = get_subcc_ccr(addrl, addrr);  /* get subcc cond. codes */
537 		pregs->r_tstate |= ((uint64_t)ccr << TSTATE_CCR_SHIFT);
538 
539 		break;
540 	}
541 	return (ftt);
542 }
543 
544 /*
545  * Simulator for three dimentional array addressing instructions.
546  */
547 static enum ftt_type
548 vis_array(
549 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
550 	vis_inst_type	inst,	/* FPU instruction to simulate. */
551 	struct regs	*pregs,	/* Pointer to PCB image of registers. */
552 	void		*prw)	/* Pointer to locals and ins. */
553 
554 {
555 	uint_t	nrs1, nrs2, nrd;	/* Register number fields. */
556 	enum ftt_type ftt;
557 	uint64_t laddr, bsize, baddr;
558 	uint64_t nbit;
559 	int oy, oz;
560 
561 	nrs1 = inst.rs1;
562 	nrs2 = inst.rs2;
563 	nrd = inst.rd;
564 
565 	ftt = read_iureg(pfpsd, nrs1, pregs, prw, &laddr);
566 	if (ftt != ftt_none)
567 		return (ftt);
568 	ftt = read_iureg(pfpsd, nrs2, pregs, prw, &bsize);
569 	if (ftt != ftt_none)
570 		return (ftt);
571 
572 	if (bsize > 5) {
573 		bsize = 5;
574 	}
575 	nbit = (1 << bsize) - 1;	/* Number of bits for XY<6+n-1:6> */
576 	oy = 17 + bsize;		/* Offset of Y<6+n-1:6> */
577 	oz = 17 + 2 * bsize;		/* Offset of Z<8:5> */
578 
579 	baddr = 0;
580 	baddr |= (laddr >> (11 -  0)) & (0x03 <<  0);	/* X_integer<1:0> */
581 	baddr |= (laddr >> (33 -  2)) & (0x03 <<  2);	/* Y_integer<1:0> */
582 	baddr |= (laddr >> (55 -  4)) & (0x01 <<  4);	/* Z_integer<0>   */
583 	baddr |= (laddr >> (13 -  5)) & (0x0f <<  5);	/* X_integer<5:2> */
584 	baddr |= (laddr >> (35 -  9)) & (0x0f <<  9);	/* Y_integer<5:2> */
585 	baddr |= (laddr >> (56 - 13)) & (0x0f << 13);	/* Z_integer<4:1> */
586 	baddr |= (laddr >> (17 - 17)) & (nbit << 17);	/* X_integer<6+n-1:6> */
587 	baddr |= (laddr >> (39 - oy)) & (nbit << oy);	/* Y_integer<6+n-1:6> */
588 	baddr |= (laddr >> (60 - oz)) & (0x0f << oz);	/* Z_integer<8:5> */
589 
590 	switch (inst.opf) {
591 	case array8:
592 		VISINFO_KSTAT(vis_array8);
593 		break;
594 	case array16:
595 		VISINFO_KSTAT(vis_array16);
596 		baddr <<= 1;
597 		break;
598 	case array32:
599 		VISINFO_KSTAT(vis_array32);
600 		baddr <<= 2;
601 		break;
602 	}
603 
604 	ftt = write_iureg(pfpsd, nrd, pregs, prw, &baddr);
605 
606 	return (ftt);
607 }
608 
609 /*
610  * Simulator for alignaddr and alignaddrl instructions.
611  */
612 static enum ftt_type
613 vis_alignaddr(
614 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
615 	vis_inst_type	inst,	/* FPU instruction to simulate. */
616 	struct regs	*pregs,	/* Pointer to PCB image of registers. */
617 	void		*prw,	/* Pointer to locals and ins. */
618 	kfpu_t		*fp)	/* Need to fp to access gsr reg */
619 {
620 	uint_t	nrs1, nrs2, nrd;	/* Register number fields. */
621 	enum ftt_type ftt;
622 	uint64_t ea, tea, g, r;
623 	short s;
624 
625 	nrs1 = inst.rs1;
626 	nrs2 = inst.rs2;
627 	nrd = inst.rd;
628 
629 	ftt = read_iureg(pfpsd, nrs1, pregs, prw, &ea);
630 	if (ftt != ftt_none)
631 		return (ftt);
632 	ftt = read_iureg(pfpsd, nrs2, pregs, prw, &tea);
633 	if (ftt != ftt_none)
634 		return (ftt);
635 	ea += tea;
636 	r = ea & ~0x7;	/* zero least 3 significant bits */
637 	ftt = write_iureg(pfpsd, nrd, pregs, prw, &r);
638 
639 
640 	g = pfpsd->fp_current_read_gsr(fp);
641 	g &= ~(GSR_ALIGN_MASK);		/* zero the align offset */
642 	r = ea & 0x7;
643 	if (inst.opf == alignaddrl) {
644 		s = (short)(~r);	/* 2's complement for alignaddrl */
645 		if (s < 0)
646 			r = (uint64_t)((s + 1) & 0x7);
647 		else
648 			r = (uint64_t)(s & 0x7);
649 	}
650 	g |= (r << GSR_ALIGN_SHIFT) & GSR_ALIGN_MASK;
651 	pfpsd->fp_current_write_gsr(g, fp);
652 
653 	return (ftt);
654 }
655 
656 /*
657  * Simulator for bmask instruction.
658  */
659 static enum ftt_type
660 vis_bmask(
661 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
662 	vis_inst_type	inst,	/* FPU instruction to simulate. */
663 	struct regs	*pregs,	/* Pointer to PCB image of registers. */
664 	void		*prw,	/* Pointer to locals and ins. */
665 	kfpu_t		*fp)	/* Need to fp to access gsr reg */
666 {
667 	uint_t	nrs1, nrs2, nrd;	/* Register number fields. */
668 	enum ftt_type ftt;
669 	uint64_t ea, tea, g;
670 
671 	VISINFO_KSTAT(vis_bmask);
672 	nrs1 = inst.rs1;
673 	nrs2 = inst.rs2;
674 	nrd = inst.rd;
675 
676 	ftt = read_iureg(pfpsd, nrs1, pregs, prw, &ea);
677 	if (ftt != ftt_none)
678 		return (ftt);
679 	ftt = read_iureg(pfpsd, nrs2, pregs, prw, &tea);
680 	if (ftt != ftt_none)
681 		return (ftt);
682 	ea += tea;
683 	ftt = write_iureg(pfpsd, nrd, pregs, prw, &ea);
684 
685 	g = pfpsd->fp_current_read_gsr(fp);
686 	g &= ~(GSR_MASK_MASK);		/* zero the mask offset */
687 
688 	/* Put the least significant 32 bits of ea in GSR.mask */
689 	g |= (ea << GSR_MASK_SHIFT) & GSR_MASK_MASK;
690 	pfpsd->fp_current_write_gsr(g, fp);
691 	return (ftt);
692 }
693 
694 /*
695  * Simulator for fp[add|sub]* instruction.
696  */
697 static enum ftt_type
698 vis_fpaddsub(
699 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
700 	vis_inst_type	inst)	/* FPU instruction to simulate. */
701 {
702 	uint_t	nrs1, nrs2, nrd;	/* Register number fields. */
703 	union {
704 		uint64_t	ll;
705 		uint32_t	i[2];
706 		uint16_t	s[4];
707 	} lrs1, lrs2, lrd;
708 	union {
709 		uint32_t	i;
710 		uint16_t	s[2];
711 	} krs1, krs2, krd;
712 	int i;
713 
714 	nrs1 = inst.rs1;
715 	nrs2 = inst.rs2;
716 	nrd = inst.rd;
717 	if ((inst.opf & 1) == 0) {	/* double precision */
718 		if ((nrs1 & 1) == 1) 	/* fix register encoding */
719 			nrs1 = (nrs1 & 0x1e) | 0x20;
720 		if ((nrs2 & 1) == 1)
721 			nrs2 = (nrs2 & 0x1e) | 0x20;
722 		if ((nrd & 1) == 1)
723 			nrd = (nrd & 0x1e) | 0x20;
724 	}
725 	switch (inst.opf) {
726 	case fpadd16:
727 		_fp_unpack_extword(pfpsd, &lrs1.ll, nrs1);
728 		_fp_unpack_extword(pfpsd, &lrs2.ll, nrs2);
729 		for (i = 0; i <= 3; i++) {
730 			lrd.s[i] = lrs1.s[i] + lrs2.s[i];
731 		}
732 		_fp_pack_extword(pfpsd, &lrd.ll, nrd);
733 		break;
734 	case fpadd16s:
735 		_fp_unpack_word(pfpsd, &krs1.i, nrs1);
736 		_fp_unpack_word(pfpsd, &krs2.i, nrs2);
737 		for (i = 0; i <= 1; i++) {
738 			krd.s[i] = krs1.s[i] + krs2.s[i];
739 		}
740 		_fp_pack_word(pfpsd, &krd.i, nrd);
741 		break;
742 	case fpadd32:
743 		_fp_unpack_extword(pfpsd, &lrs1.ll, nrs1);
744 		_fp_unpack_extword(pfpsd, &lrs2.ll, nrs2);
745 		for (i = 0; i <= 1; i++) {
746 			lrd.i[i] = lrs1.i[i] + lrs2.i[i];
747 		}
748 		_fp_pack_extword(pfpsd, &lrd.ll, nrd);
749 		break;
750 	case fpadd32s:
751 		_fp_unpack_word(pfpsd, &krs1.i, nrs1);
752 		_fp_unpack_word(pfpsd, &krs2.i, nrs2);
753 		krd.i = krs1.i + krs2.i;
754 		_fp_pack_word(pfpsd, &krd.i, nrd);
755 		break;
756 	case fpsub16:
757 		_fp_unpack_extword(pfpsd, &lrs1.ll, nrs1);
758 		_fp_unpack_extword(pfpsd, &lrs2.ll, nrs2);
759 		for (i = 0; i <= 3; i++) {
760 			lrd.s[i] = lrs1.s[i] - lrs2.s[i];
761 		}
762 		_fp_pack_extword(pfpsd, &lrd.ll, nrd);
763 		break;
764 	case fpsub16s:
765 		_fp_unpack_word(pfpsd, &krs1.i, nrs1);
766 		_fp_unpack_word(pfpsd, &krs2.i, nrs2);
767 		for (i = 0; i <= 1; i++) {
768 			krd.s[i] = krs1.s[i] - krs2.s[i];
769 		}
770 		_fp_pack_word(pfpsd, &krd.i, nrd);
771 		break;
772 	case fpsub32:
773 		_fp_unpack_extword(pfpsd, &lrs1.ll, nrs1);
774 		_fp_unpack_extword(pfpsd, &lrs2.ll, nrs2);
775 		for (i = 0; i <= 1; i++) {
776 			lrd.i[i] = lrs1.i[i] - lrs2.i[i];
777 		}
778 		_fp_pack_extword(pfpsd, &lrd.ll, nrd);
779 		break;
780 	case fpsub32s:
781 		_fp_unpack_word(pfpsd, &krs1.i, nrs1);
782 		_fp_unpack_word(pfpsd, &krs2.i, nrs2);
783 		krd.i = krs1.i - krs2.i;
784 		_fp_pack_word(pfpsd, &krd.i, nrd);
785 		break;
786 	}
787 	return (ftt_none);
788 }
789 
790 /*
791  * Simulator for fcmp* instruction.
792  */
793 static enum ftt_type
794 vis_fcmp(
795 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
796 	vis_inst_type	inst,	/* FPU instruction to simulate. */
797 	struct regs	*pregs,	/* Pointer to PCB image of registers. */
798 	void		*prw)	/* Pointer to locals and ins. */
799 {
800 	uint_t	nrs1, nrs2, nrd;	/* Register number fields. */
801 	union {
802 		uint64_t	ll;
803 		uint32_t	i[2];
804 		uint16_t	s[4];
805 	} krs1, krs2, krd;
806 	enum ftt_type ftt;
807 	short sr1, sr2;
808 	int i, ir1, ir2;
809 
810 	nrs1 = inst.rs1;
811 	nrs2 = inst.rs2;
812 	nrd = inst.rd;
813 	krd.ll = 0;
814 	if ((nrs1 & 1) == 1) 	/* fix register encoding */
815 		nrs1 = (nrs1 & 0x1e) | 0x20;
816 	if ((nrs2 & 1) == 1)
817 		nrs2 = (nrs2 & 0x1e) | 0x20;
818 
819 	_fp_unpack_extword(pfpsd, &krs1.ll, nrs1);
820 	_fp_unpack_extword(pfpsd, &krs2.ll, nrs2);
821 	switch (inst.opf) {
822 	case fcmple16:
823 		VISINFO_KSTAT(vis_fcmple16);
824 		for (i = 0; i <= 3; i++) {
825 			sr1 = (short)krs1.s[i];
826 			sr2 = (short)krs2.s[i];
827 			if (sr1 <= sr2)
828 				krd.ll += (0x8 >> i);
829 		}
830 		break;
831 	case fcmpne16:
832 		VISINFO_KSTAT(vis_fcmpne16);
833 		for (i = 0; i <= 3; i++) {
834 			sr1 = (short)krs1.s[i];
835 			sr2 = (short)krs2.s[i];
836 			if (sr1 != sr2)
837 				krd.ll += (0x8 >> i);
838 		}
839 		break;
840 	case fcmpgt16:
841 		VISINFO_KSTAT(vis_fcmpgt16);
842 		for (i = 0; i <= 3; i++) {
843 			sr1 = (short)krs1.s[i];
844 			sr2 = (short)krs2.s[i];
845 			if (sr1 > sr2)
846 				krd.ll += (0x8 >> i);
847 		}
848 		break;
849 	case fcmpeq16:
850 		VISINFO_KSTAT(vis_fcmpeq16);
851 		for (i = 0; i <= 3; i++) {
852 			sr1 = (short)krs1.s[i];
853 			sr2 = (short)krs2.s[i];
854 			if (sr1 == sr2)
855 				krd.ll += (0x8 >> i);
856 		}
857 		break;
858 	case fcmple32:
859 		VISINFO_KSTAT(vis_fcmple32);
860 		for (i = 0; i <= 1; i++) {
861 			ir1 = (int)krs1.i[i];
862 			ir2 = (int)krs2.i[i];
863 			if (ir1 <= ir2)
864 				krd.ll += (0x2 >> i);
865 		}
866 		break;
867 	case fcmpne32:
868 		VISINFO_KSTAT(vis_fcmpne32);
869 		for (i = 0; i <= 1; i++) {
870 			ir1 = (int)krs1.i[i];
871 			ir2 = (int)krs2.i[i];
872 			if (ir1 != ir2)
873 				krd.ll += (0x2 >> i);
874 		}
875 		break;
876 	case fcmpgt32:
877 		VISINFO_KSTAT(vis_fcmpgt32);
878 		for (i = 0; i <= 1; i++) {
879 			ir1 = (int)krs1.i[i];
880 			ir2 = (int)krs2.i[i];
881 			if (ir1 > ir2)
882 				krd.ll += (0x2 >> i);
883 		}
884 		break;
885 	case fcmpeq32:
886 		VISINFO_KSTAT(vis_fcmpeq32);
887 		for (i = 0; i <= 1; i++) {
888 			ir1 = (int)krs1.i[i];
889 			ir2 = (int)krs2.i[i];
890 			if (ir1 == ir2)
891 				krd.ll += (0x2 >> i);
892 		}
893 		break;
894 	}
895 	ftt = write_iureg(pfpsd, nrd, pregs, prw, &krd.ll);
896 	return (ftt);
897 }
898 
899 /*
900  * Simulator for fmul* instruction.
901  */
902 static enum ftt_type
903 vis_fmul(
904 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
905 	vis_inst_type	inst)	/* FPU instruction to simulate. */
906 {
907 	uint_t	nrs1, nrs2, nrd;	/* Register number fields. */
908 	union {
909 		uint64_t	ll;
910 		uint32_t	i[2];
911 		uint16_t	s[4];
912 		uint8_t		c[8];
913 	} lrs1, lrs2, lrd;
914 	union {
915 		uint32_t	i;
916 		uint16_t	s[2];
917 		uint8_t		c[4];
918 	} krs1, krs2, kres;
919 	short s1, s2, sres;
920 	ushort_t us1;
921 	char c1;
922 	int i;
923 
924 	nrs1 = inst.rs1;
925 	nrs2 = inst.rs2;
926 	nrd = inst.rd;
927 	if ((inst.opf & 1) == 0) {	/* double precision */
928 		if ((nrd & 1) == 1) 	/* fix register encoding */
929 			nrd = (nrd & 0x1e) | 0x20;
930 	}
931 
932 	switch (inst.opf) {
933 	case fmul8x16:
934 		VISINFO_KSTAT(vis_fmul8x16);
935 		_fp_unpack_word(pfpsd, &krs1.i, nrs1);
936 		if ((nrs2 & 1) == 1)
937 			nrs2 = (nrs2 & 0x1e) | 0x20;
938 		_fp_unpack_extword(pfpsd, &lrs2.ll, nrs2);
939 		for (i = 0; i <= 3; i++) {
940 			us1 = (ushort_t)krs1.c[i];
941 			s2 = (short)lrs2.s[i];
942 			kres.i = us1 * s2;
943 			sres = (short)((kres.c[1] << 8) | kres.c[2]);
944 			if (kres.c[3] >= 0x80)
945 				sres++;
946 			lrd.s[i] = sres;
947 		}
948 		_fp_pack_extword(pfpsd, &lrd.ll, nrd);
949 		break;
950 	case fmul8x16au:
951 		VISINFO_KSTAT(vis_fmul8x16au);
952 		_fp_unpack_word(pfpsd, &krs1.i, nrs1);
953 		_fp_unpack_word(pfpsd, &krs2.i, nrs2);
954 		for (i = 0; i <= 3; i++) {
955 			us1 = (ushort_t)krs1.c[i];
956 			s2 = (short)krs2.s[0];
957 			kres.i = us1 * s2;
958 			sres = (short)((kres.c[1] << 8) | kres.c[2]);
959 			if (kres.c[3] >= 0x80)
960 				sres++;
961 			lrd.s[i] = sres;
962 		}
963 		_fp_pack_extword(pfpsd, &lrd.ll, nrd);
964 		break;
965 	case fmul8x16al:
966 		VISINFO_KSTAT(vis_fmul8x16al);
967 		_fp_unpack_word(pfpsd, &krs1.i, nrs1);
968 		_fp_unpack_word(pfpsd, &krs2.i, nrs2);
969 		for (i = 0; i <= 3; i++) {
970 			us1 = (ushort_t)krs1.c[i];
971 			s2 = (short)krs2.s[1];
972 			kres.i = us1 * s2;
973 			sres = (short)((kres.c[1] << 8) | kres.c[2]);
974 			if (kres.c[3] >= 0x80)
975 				sres++;
976 			lrd.s[i] = sres;
977 		}
978 		_fp_pack_extword(pfpsd, &lrd.ll, nrd);
979 		break;
980 	case fmul8sux16:
981 		VISINFO_KSTAT(vis_fmul8sux16);
982 		if ((nrs1 & 1) == 1) 	/* fix register encoding */
983 			nrs1 = (nrs1 & 0x1e) | 0x20;
984 		_fp_unpack_extword(pfpsd, &lrs1.ll, nrs1);
985 		if ((nrs2 & 1) == 1)
986 			nrs2 = (nrs2 & 0x1e) | 0x20;
987 		_fp_unpack_extword(pfpsd, &lrs2.ll, nrs2);
988 		for (i = 0; i <= 3; i++) {
989 			c1 = lrs1.c[(i*2)];
990 			s1 = (short)c1;		/* keeps the sign alive */
991 			s2 = (short)lrs2.s[i];
992 			kres.i = s1 * s2;
993 			sres = (short)((kres.c[1] << 8) | kres.c[2]);
994 			if (kres.c[3] >= 0x80)
995 				sres++;
996 			if (sres < 0)
997 				lrd.s[i] = (sres & 0xFFFF);
998 			else
999 				lrd.s[i] = sres;
1000 		}
1001 		_fp_pack_extword(pfpsd, &lrd.ll, nrd);
1002 		break;
1003 	case fmul8ulx16:
1004 		VISINFO_KSTAT(vis_fmul8ulx16);
1005 		if ((nrs1 & 1) == 1) 	/* fix register encoding */
1006 			nrs1 = (nrs1 & 0x1e) | 0x20;
1007 		_fp_unpack_extword(pfpsd, &lrs1.ll, nrs1);
1008 		if ((nrs2 & 1) == 1)
1009 			nrs2 = (nrs2 & 0x1e) | 0x20;
1010 		_fp_unpack_extword(pfpsd, &lrs2.ll, nrs2);
1011 		for (i = 0; i <= 3; i++) {
1012 			us1 = (ushort_t)lrs1.c[(i*2)+1];
1013 			s2 = (short)lrs2.s[i];
1014 			kres.i = us1 * s2;
1015 			sres = (short)kres.s[0];
1016 			if (kres.s[1] >= 0x8000)
1017 				sres++;
1018 			lrd.s[i] = sres;
1019 		}
1020 		_fp_pack_extword(pfpsd, &lrd.ll, nrd);
1021 		break;
1022 	case fmuld8sux16:
1023 		VISINFO_KSTAT(vis_fmuld8sux16);
1024 		_fp_unpack_word(pfpsd, &krs1.i, nrs1);
1025 		_fp_unpack_word(pfpsd, &krs2.i, nrs2);
1026 		for (i = 0; i <= 1; i++) {
1027 			c1 = krs1.c[(i*2)];
1028 			s1 = (short)c1;		/* keeps the sign alive */
1029 			s2 = (short)krs2.s[i];
1030 			kres.i = s1 * s2;
1031 			lrd.i[i] = kres.i << 8;
1032 		}
1033 		_fp_pack_extword(pfpsd, &lrd.ll, nrd);
1034 		break;
1035 	case fmuld8ulx16:
1036 		VISINFO_KSTAT(vis_fmuld8ulx16);
1037 		_fp_unpack_word(pfpsd, &krs1.i, nrs1);
1038 		_fp_unpack_word(pfpsd, &krs2.i, nrs2);
1039 		for (i = 0; i <= 1; i++) {
1040 			us1 = (ushort_t)krs1.c[(i*2)+1];
1041 			s2 = (short)krs2.s[i];
1042 			lrd.i[i] = us1 * s2;
1043 		}
1044 		_fp_pack_extword(pfpsd, &lrd.ll, nrd);
1045 		break;
1046 	}
1047 	return (ftt_none);
1048 }
1049 
1050 /*
1051  * Simulator for fpixel formatting instructions.
1052  */
1053 static enum ftt_type
1054 vis_fpixel(
1055 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
1056 	vis_inst_type	inst,	/* FPU instruction to simulate. */
1057 	kfpu_t		*fp)	/* Need to fp to access gsr reg */
1058 {
1059 	uint_t	nrs1, nrs2, nrd;	/* Register number fields. */
1060 	int	i, j, k, sf;
1061 	union {
1062 		uint64_t	ll;
1063 		uint32_t	i[2];
1064 		uint16_t	s[4];
1065 		uint8_t		c[8];
1066 	} lrs1, lrs2, lrd;
1067 	union {
1068 		uint32_t	i;
1069 		uint16_t	s[2];
1070 		uint8_t		c[4];
1071 	} krs1, krs2, krd;
1072 	uint64_t r;
1073 	int64_t l, m;
1074 	short s;
1075 	uchar_t uc;
1076 
1077 	nrs1 = inst.rs1;
1078 	nrs2 = inst.rs2;
1079 	nrd = inst.rd;
1080 	if ((inst.opf != fpack16) && (inst.opf != fpackfix)) {
1081 		if ((nrd & 1) == 1) 	/* fix register encoding */
1082 			nrd = (nrd & 0x1e) | 0x20;
1083 	}
1084 
1085 	switch (inst.opf) {
1086 	case fpack16:
1087 		VISINFO_KSTAT(vis_fpack16);
1088 		if ((nrs2 & 1) == 1) 	/* fix register encoding */
1089 			nrs2 = (nrs2 & 0x1e) | 0x20;
1090 		_fp_unpack_extword(pfpsd, &lrs2.ll, nrs2);
1091 		r = pfpsd->fp_current_read_gsr(fp);
1092 		/* fpack16 ignores GSR.scale msb */
1093 		sf = (int)(GSR_SCALE(r) & 0xf);
1094 		for (i = 0; i <= 3; i++) {
1095 			s = (short)lrs2.s[i];	/* preserve the sign */
1096 			j = ((int)s << sf);
1097 			k = j >> 7;
1098 			if (k < 0) {
1099 				uc = 0;
1100 			} else if (k > 255) {
1101 				uc = 255;
1102 			} else {
1103 				uc = (uchar_t)k;
1104 			}
1105 			krd.c[i] = uc;
1106 		}
1107 		_fp_pack_word(pfpsd, &krd.i, nrd);
1108 		break;
1109 	case fpack32:
1110 		VISINFO_KSTAT(vis_fpack32);
1111 		if ((nrs1 & 1) == 1) 	/* fix register encoding */
1112 			nrs1 = (nrs1 & 0x1e) | 0x20;
1113 		_fp_unpack_extword(pfpsd, &lrs1.ll, nrs1);
1114 		if ((nrs2 & 1) == 1)
1115 			nrs2 = (nrs2 & 0x1e) | 0x20;
1116 		_fp_unpack_extword(pfpsd, &lrs2.ll, nrs2);
1117 
1118 		r = pfpsd->fp_current_read_gsr(fp);
1119 		sf = (int)GSR_SCALE(r);
1120 		lrd.ll = lrs1.ll << 8;
1121 		for (i = 0, k = 3; i <= 1; i++, k += 4) {
1122 			j = (int)lrs2.i[i];	/* preserve the sign */
1123 			l = ((int64_t)j << sf);
1124 			m = l >> 23;
1125 			if (m < 0) {
1126 				uc = 0;
1127 			} else if (m > 255) {
1128 				uc = 255;
1129 			} else {
1130 				uc = (uchar_t)m;
1131 			}
1132 			lrd.c[k] = uc;
1133 		}
1134 		_fp_pack_extword(pfpsd, &lrd.ll, nrd);
1135 		break;
1136 	case fpackfix:
1137 		VISINFO_KSTAT(vis_fpackfix);
1138 		if ((nrs2 & 1) == 1)
1139 			nrs2 = (nrs2 & 0x1e) | 0x20;
1140 		_fp_unpack_extword(pfpsd, &lrs2.ll, nrs2);
1141 
1142 		r = pfpsd->fp_current_read_gsr(fp);
1143 		sf = (int)GSR_SCALE(r);
1144 		for (i = 0; i <= 1; i++) {
1145 			j = (int)lrs2.i[i];	/* preserve the sign */
1146 			l = ((int64_t)j << sf);
1147 			m = l >> 16;
1148 			if (m < -32768) {
1149 				s = -32768;
1150 			} else if (m > 32767) {
1151 				s = 32767;
1152 			} else {
1153 				s = (short)m;
1154 			}
1155 			krd.s[i] = s;
1156 		}
1157 		_fp_pack_word(pfpsd, &krd.i, nrd);
1158 		break;
1159 	case fexpand:
1160 		VISINFO_KSTAT(vis_fexpand);
1161 		_fp_unpack_word(pfpsd, &krs2.i, nrs2);
1162 		for (i = 0; i <= 3; i++) {
1163 			uc = krs2.c[i];
1164 			lrd.s[i] = (ushort_t)(uc << 4);
1165 		}
1166 		_fp_pack_extword(pfpsd, &lrd.ll, nrd);
1167 		break;
1168 	case fpmerge:
1169 		VISINFO_KSTAT(vis_fpmerge);
1170 		_fp_unpack_word(pfpsd, &krs1.i, nrs1);
1171 		_fp_unpack_word(pfpsd, &krs2.i, nrs2);
1172 		for (i = 0, j = 0; i <= 3; i++, j += 2) {
1173 			lrd.c[j] = krs1.c[i];
1174 			lrd.c[j+1] = krs2.c[i];
1175 		}
1176 		_fp_pack_extword(pfpsd, &lrd.ll, nrd);
1177 		break;
1178 	}
1179 	return (ftt_none);
1180 }
1181 
1182 /*
1183  * Simulator for pdist instruction.
1184  */
1185 enum ftt_type
1186 vis_pdist(
1187 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
1188 	fp_inst_type	pinst,	/* FPU instruction to simulate. */
1189 	struct regs	*pregs,	/* Pointer to PCB image of registers. */
1190 	void		*prw,	/* Pointer to locals and ins. */
1191 	uint_t		op)	/* Opcode pdist or pdistn */
1192 {
1193 	uint_t	nrs1, nrs2, nrd;	/* Register number fields. */
1194 	int	i;
1195 	short	s;
1196 	union {
1197 		uint64_t	ll;
1198 		uint8_t		c[8];
1199 	} lrs1, lrs2, lrd;
1200 
1201 	nrs1 = pinst.rs1;
1202 	nrs2 = pinst.rs2;
1203 	nrd = pinst.rd;
1204 
1205 	if ((nrs1 & 1) == 1) 		/* fix register encoding */
1206 		nrs1 = (nrs1 & 0x1e) | 0x20;
1207 	if ((nrs2 & 1) == 1)
1208 		nrs2 = (nrs2 & 0x1e) | 0x20;
1209 	if ((nrd & 1) == 1)
1210 		nrd = (nrd & 0x1e) | 0x20;
1211 
1212 	_fp_unpack_extword(pfpsd, &lrs1.ll, nrs1);
1213 	_fp_unpack_extword(pfpsd, &lrs2.ll, nrs2);
1214 
1215 	if (op == pdist) {
1216 		VISINFO_KSTAT(vis_pdist);
1217 		_fp_unpack_extword(pfpsd, &lrd.ll, nrd);
1218 	} else {
1219 		/* pdistn */
1220 		VISINFO_KSTAT(vis_pdistn);
1221 		lrd.ll = 0;
1222 	}
1223 
1224 	for (i = 0; i <= 7; i++) {
1225 		s = (short)(lrs1.c[i] - lrs2.c[i]);
1226 		if (s < 0)
1227 			s = ~s + 1;
1228 		lrd.ll += s;
1229 	}
1230 
1231 	if (op == pdist)
1232 		_fp_pack_extword(pfpsd, &lrd.ll, nrd);
1233 	else
1234 		/* pdistn */
1235 		(void) write_iureg(pfpsd, nrd, pregs, prw, &lrd.ll);
1236 	return (ftt_none);
1237 }
1238 
1239 /*
1240  * Simulator for faligndata instruction.
1241  */
1242 static enum ftt_type
1243 vis_faligndata(
1244 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
1245 	fp_inst_type	pinst,	/* FPU instruction to simulate. */
1246 	kfpu_t		*fp)	/* Need to fp to access gsr reg */
1247 {
1248 	uint_t	nrs1, nrs2, nrd;	/* Register number fields. */
1249 	int	i, j, k, ao;
1250 	union {
1251 		uint64_t	ll;
1252 		uint8_t		c[8];
1253 	} lrs1, lrs2, lrd;
1254 	uint64_t r;
1255 
1256 	nrs1 = pinst.rs1;
1257 	nrs2 = pinst.rs2;
1258 	nrd = pinst.rd;
1259 	if ((nrs1 & 1) == 1) 		/* fix register encoding */
1260 		nrs1 = (nrs1 & 0x1e) | 0x20;
1261 	if ((nrs2 & 1) == 1)
1262 		nrs2 = (nrs2 & 0x1e) | 0x20;
1263 	if ((nrd & 1) == 1)
1264 		nrd = (nrd & 0x1e) | 0x20;
1265 
1266 	_fp_unpack_extword(pfpsd, &lrs1.ll, nrs1);
1267 	_fp_unpack_extword(pfpsd, &lrs2.ll, nrs2);
1268 
1269 	r = pfpsd->fp_current_read_gsr(fp);
1270 	ao = (int)GSR_ALIGN(r);
1271 
1272 	for (i = 0, j = ao, k = 0; i <= 7; i++)
1273 		if (j <= 7) {
1274 			lrd.c[i] = lrs1.c[j++];
1275 		} else {
1276 			lrd.c[i] = lrs2.c[k++];
1277 		}
1278 	_fp_pack_extword(pfpsd, &lrd.ll, nrd);
1279 
1280 	return (ftt_none);
1281 }
1282 
1283 /*
1284  * Simulator for bshuffle instruction.
1285  */
1286 static enum ftt_type
1287 vis_bshuffle(
1288 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
1289 	fp_inst_type	pinst,	/* FPU instruction to simulate. */
1290 	kfpu_t		*fp)	/* Need to fp to access gsr reg */
1291 {
1292 	uint_t	nrs1, nrs2, nrd;	/* Register number fields. */
1293 	int	i, j, ao;
1294 	union {
1295 		uint64_t	ll;
1296 		uint8_t		c[8];
1297 	} lrs1, lrs2, lrd;
1298 	uint64_t r;
1299 
1300 	VISINFO_KSTAT(vis_bshuffle);
1301 	nrs1 = pinst.rs1;
1302 	nrs2 = pinst.rs2;
1303 	nrd = pinst.rd;
1304 	if ((nrs1 & 1) == 1) 		/* fix register encoding */
1305 		nrs1 = (nrs1 & 0x1e) | 0x20;
1306 	if ((nrs2 & 1) == 1)
1307 		nrs2 = (nrs2 & 0x1e) | 0x20;
1308 	if ((nrd & 1) == 1)
1309 		nrd = (nrd & 0x1e) | 0x20;
1310 
1311 	_fp_unpack_extword(pfpsd, &lrs1.ll, nrs1);
1312 	_fp_unpack_extword(pfpsd, &lrs2.ll, nrs2);
1313 
1314 	r = pfpsd->fp_current_read_gsr(fp);
1315 	ao = (int)GSR_MASK(r);
1316 
1317 	/*
1318 	 * BSHUFFLE Destination Byte Selection
1319 	 * rd Byte	Source
1320 	 * 0		rs byte[GSR.mask<31..28>]
1321 	 * 1		rs byte[GSR.mask<27..24>]
1322 	 * 2		rs byte[GSR.mask<23..20>]
1323 	 * 3		rs byte[GSR.mask<19..16>]
1324 	 * 4		rs byte[GSR.mask<15..12>]
1325 	 * 5		rs byte[GSR.mask<11..8>]
1326 	 * 6		rs byte[GSR.mask<7..4>]
1327 	 * 7		rs byte[GSR.mask<3..0>]
1328 	 * P.S. rs1 is the upper half and rs2 is the lower half
1329 	 * Bytes in the source value are numbered from most to
1330 	 * least significant
1331 	 */
1332 	for (i = 7; i >= 0; i--, ao = (ao >> 4)) {
1333 		j = ao & 0xf;		/* get byte number */
1334 		if (j < 8) {
1335 			lrd.c[i] = lrs1.c[j];
1336 		} else {
1337 			lrd.c[i] = lrs2.c[j - 8];
1338 		}
1339 	}
1340 	_fp_pack_extword(pfpsd, &lrd.ll, nrd);
1341 
1342 	return (ftt_none);
1343 }
1344 
1345 /*
1346  * Simulator for siam instruction.
1347  */
1348 static enum ftt_type
1349 vis_siam(
1350 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
1351 	vis_inst_type	inst,	/* FPU instruction to simulate. */
1352 	kfpu_t		*fp)	/* Need to fp to access gsr reg */
1353 {
1354 	uint_t	nrs2;			/* Register number fields. */
1355 	uint64_t g, r;
1356 	nrs2 = inst.rs2;
1357 
1358 	g = pfpsd->fp_current_read_gsr(fp);
1359 	g &= ~(GSR_IM_IRND_MASK);	/* zero the IM and IRND fields */
1360 	r = nrs2 & 0x7;			/* get mode(3 bit) */
1361 	g |= (r << GSR_IRND_SHIFT);
1362 	pfpsd->fp_current_write_gsr(g, fp);
1363 	return (ftt_none);
1364 }
1365 
1366 /*
1367  * Simulator for VIS loads and stores between floating-point unit and memory.
1368  */
1369 enum ftt_type
1370 vis_fldst(
1371 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
1372 	fp_inst_type	pinst,	/* FPU instruction to simulate. */
1373 	struct regs	*pregs,	/* Pointer to PCB image of registers. */
1374 	void		*prw,	/* Pointer to locals and ins. */
1375 	uint_t		asi)	/* asi to emulate! */
1376 {
1377 	union {
1378 		vis_inst_type	inst;
1379 		fp_inst_type	pinst;
1380 	} i;
1381 
1382 	ASSERT(USERMODE(pregs->r_tstate));
1383 	i.pinst = pinst;
1384 	switch (asi) {
1385 		case ASI_PST8_P:
1386 		case ASI_PST8_S:
1387 		case ASI_PST16_P:
1388 		case ASI_PST16_S:
1389 		case ASI_PST32_P:
1390 		case ASI_PST32_S:
1391 		case ASI_PST8_PL:
1392 		case ASI_PST8_SL:
1393 		case ASI_PST16_PL:
1394 		case ASI_PST16_SL:
1395 		case ASI_PST32_PL:
1396 		case ASI_PST32_SL:
1397 			return (vis_prtl_fst(pfpsd, i.inst, pregs,
1398 			    prw, asi));
1399 		case ASI_FL8_P:
1400 		case ASI_FL8_S:
1401 		case ASI_FL8_PL:
1402 		case ASI_FL8_SL:
1403 		case ASI_FL16_P:
1404 		case ASI_FL16_S:
1405 		case ASI_FL16_PL:
1406 		case ASI_FL16_SL:
1407 			return (vis_short_fls(pfpsd, i.inst, pregs,
1408 			    prw, asi));
1409 		case ASI_BLK_AIUP:
1410 		case ASI_BLK_AIUS:
1411 		case ASI_BLK_AIUPL:
1412 		case ASI_BLK_AIUSL:
1413 		case ASI_BLK_P:
1414 		case ASI_BLK_S:
1415 		case ASI_BLK_PL:
1416 		case ASI_BLK_SL:
1417 		case ASI_BLK_COMMIT_P:
1418 		case ASI_BLK_COMMIT_S:
1419 			return (vis_blk_fldst(pfpsd, i.inst, pregs,
1420 			    prw, asi));
1421 		default:
1422 			return (ftt_unimplemented);
1423 	}
1424 }
1425 
1426 /*
1427  * Simulator for partial stores between floating-point unit and memory.
1428  */
1429 static enum ftt_type
1430 vis_prtl_fst(
1431 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
1432 	vis_inst_type	inst,	/* ISE instruction to simulate. */
1433 	struct regs	*pregs,	/* Pointer to PCB image of registers. */
1434 	void		*prw,	/* Pointer to locals and ins. */
1435 	uint_t		asi)	/* asi to emulate! */
1436 {
1437 	uint_t	nrs1, nrs2, nrd;	/* Register number fields. */
1438 	uint_t	opf, msk;
1439 	int	h, i, j;
1440 	uint64_t ea, tmsk;
1441 	union {
1442 		freg_type	f;
1443 		uint64_t	ll;
1444 		uint32_t	i[2];
1445 		uint16_t	s[4];
1446 		uint8_t		c[8];
1447 	} k, l, res;
1448 	enum ftt_type   ftt;
1449 
1450 	nrs1 = inst.rs1;
1451 	nrs2 = inst.rs2;
1452 	nrd = inst.rd;
1453 	if ((nrd & 1) == 1) 		/* fix register encoding */
1454 		nrd = (nrd & 0x1e) | 0x20;
1455 	opf = inst.opf;
1456 	res.ll = 0;
1457 	if ((opf & 0x100) == 0) {	/* effective address = rs1  */
1458 		ftt = read_iureg(pfpsd, nrs1, pregs, prw, &ea);
1459 		if (ftt != ftt_none)
1460 			return (ftt);
1461 		ftt = read_iureg(pfpsd, nrs2, pregs, prw, &tmsk);
1462 		if (ftt != ftt_none)
1463 			return (ftt);
1464 		msk = (uint_t)tmsk;
1465 	} else {
1466 		pfpsd->fp_trapaddr = (caddr_t)pregs->r_pc;
1467 		return (ftt_unimplemented);
1468 	}
1469 
1470 	pfpsd->fp_trapaddr = (caddr_t)ea; /* setup bad addr in case we trap */
1471 	if ((ea & 0x3) != 0)
1472 		return (ftt_alignment);	/* Require 32 bit-alignment. */
1473 
1474 	switch (asi) {
1475 	case ASI_PST8_P:
1476 	case ASI_PST8_S:
1477 		ftt = _fp_read_extword((uint64_t *)ea, &l.ll, pfpsd);
1478 		if (ftt != ftt_none)
1479 			return (ftt);
1480 		_fp_unpack_extword(pfpsd, &k.f.FPU_DREG_FIELD, nrd);
1481 		for (i = 0, j = 0x80; i <= 7; i++, j >>= 1) {
1482 			if ((msk & j) == j)
1483 				res.c[i] = k.c[i];
1484 			else
1485 				res.c[i] = l.c[i];
1486 		}
1487 		ftt = _fp_write_extword((uint64_t *)ea, res.ll, pfpsd);
1488 		if (ftt != ftt_none)
1489 			return (ftt);
1490 		break;
1491 	case ASI_PST8_PL:	/* little-endian */
1492 	case ASI_PST8_SL:
1493 		ftt = _fp_read_extword((uint64_t *)ea, &l.ll, pfpsd);
1494 		if (ftt != ftt_none)
1495 			return (ftt);
1496 		_fp_unpack_extword(pfpsd, &k.f.FPU_DREG_FIELD, nrd);
1497 		for (h = 7, i = 0, j = 1; i <= 7; h--, i++, j <<= 1) {
1498 			if ((msk & j) == j)
1499 				res.c[i] = k.c[h];
1500 			else
1501 				res.c[i] = l.c[i];
1502 		}
1503 		ftt = _fp_write_extword((uint64_t *)ea, res.ll, pfpsd);
1504 		if (ftt != ftt_none)
1505 			return (ftt);
1506 		break;
1507 	case ASI_PST16_P:
1508 	case ASI_PST16_S:
1509 		ftt = _fp_read_extword((uint64_t *)ea, &l.ll, pfpsd);
1510 		if (ftt != ftt_none)
1511 			return (ftt);
1512 		_fp_unpack_extword(pfpsd, &k.f.FPU_DREG_FIELD, nrd);
1513 		for (i = 0, j = 0x8; i <= 3; i++, j >>= 1) {
1514 			if ((msk & j) == j)
1515 				res.s[i] = k.s[i];
1516 			else
1517 				res.s[i] = l.s[i];
1518 		}
1519 		ftt = _fp_write_extword((uint64_t *)ea, res.ll, pfpsd);
1520 		if (ftt != ftt_none)
1521 			return (ftt);
1522 		break;
1523 	case ASI_PST16_PL:
1524 	case ASI_PST16_SL:
1525 		ftt = _fp_read_extword((uint64_t *)ea, &l.ll, pfpsd);
1526 		if (ftt != ftt_none)
1527 			return (ftt);
1528 		_fp_unpack_extword(pfpsd, &k.f.FPU_DREG_FIELD, nrd);
1529 		for (h = 7, i = 0, j = 1; i <= 6; h -= 2, i += 2, j <<= 1) {
1530 			if ((msk & j) == j) {
1531 				res.c[i] = k.c[h];
1532 				res.c[i+1] = k.c[h-1];
1533 			} else {
1534 				res.c[i] = l.c[i];
1535 				res.c[i+1] = l.c[i+1];
1536 			}
1537 		}
1538 		ftt = _fp_write_extword((uint64_t *)ea, res.ll, pfpsd);
1539 		if (ftt != ftt_none)
1540 			return (ftt);
1541 		break;
1542 	case ASI_PST32_P:
1543 	case ASI_PST32_S:
1544 		ftt = _fp_read_extword((uint64_t *)ea, &l.ll, pfpsd);
1545 		if (ftt != ftt_none)
1546 			return (ftt);
1547 		_fp_unpack_extword(pfpsd, &k.f.FPU_DREG_FIELD, nrd);
1548 		for (i = 0, j = 0x2; i <= 1; i++, j >>= 1) {
1549 			if ((msk & j) == j)
1550 				res.i[i] = k.i[i];
1551 			else
1552 				res.i[i] = l.i[i];
1553 		}
1554 		ftt = _fp_write_extword((uint64_t *)ea, res.ll, pfpsd);
1555 		if (ftt != ftt_none)
1556 			return (ftt);
1557 		break;
1558 	case ASI_PST32_PL:
1559 	case ASI_PST32_SL:
1560 		ftt = _fp_read_extword((uint64_t *)ea, &l.ll, pfpsd);
1561 		if (ftt != ftt_none)
1562 			return (ftt);
1563 		_fp_unpack_extword(pfpsd, &k.f.FPU_DREG_FIELD, nrd);
1564 		for (h = 7, i = 0, j = 1; i <= 4; h -= 4, i += 4, j <<= 1) {
1565 			if ((msk & j) == j) {
1566 				res.c[i] = k.c[h];
1567 				res.c[i+1] = k.c[h-1];
1568 				res.c[i+2] = k.c[h-2];
1569 				res.c[i+3] = k.c[h-3];
1570 			} else {
1571 				res.c[i] = l.c[i];
1572 				res.c[i+1] = l.c[i+1];
1573 				res.c[i+2] = l.c[i+2];
1574 				res.c[i+3] = l.c[i+3];
1575 			}
1576 		}
1577 		ftt = _fp_write_extword((uint64_t *)ea, res.ll, pfpsd);
1578 		if (ftt != ftt_none)
1579 			return (ftt);
1580 		break;
1581 	}
1582 
1583 	pregs->r_pc = pregs->r_npc;	/* Do not retry emulated instruction. */
1584 	pregs->r_npc += 4;
1585 	return (ftt_none);
1586 }
1587 
1588 /*
1589  * Simulator for short load/stores between floating-point unit and memory.
1590  */
1591 static enum ftt_type
1592 vis_short_fls(
1593 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
1594 	vis_inst_type	inst,	/* ISE instruction to simulate. */
1595 	struct regs	*pregs,	/* Pointer to PCB image of registers. */
1596 	void		*prw,	/* Pointer to locals and ins. */
1597 	uint_t		asi)	/* asi to emulate! */
1598 {
1599 	uint_t	nrs1, nrs2, nrd;	/* Register number fields. */
1600 	uint_t	opf;
1601 	uint64_t ea, tea;
1602 	union {
1603 		freg_type	f;
1604 		uint64_t	ll;
1605 		uint32_t	i[2];
1606 		uint16_t	s[4];
1607 		uint8_t		c[8];
1608 	} k;
1609 	union {
1610 		vis_inst_type	inst;
1611 		int		i;
1612 	} fp;
1613 	enum ftt_type   ftt = ftt_none;
1614 	ushort_t us;
1615 	uchar_t uc;
1616 
1617 	nrs1 = inst.rs1;
1618 	nrs2 = inst.rs2;
1619 	nrd = inst.rd;
1620 	if ((nrd & 1) == 1) 		/* fix register encoding */
1621 		nrd = (nrd & 0x1e) | 0x20;
1622 	opf = inst.opf;
1623 	fp.inst = inst;
1624 	if ((opf & 0x100) == 0) { /* effective address = rs1 + rs2 */
1625 		ftt = read_iureg(pfpsd, nrs1, pregs, prw, &ea);
1626 		if (ftt != ftt_none)
1627 			return (ftt);
1628 		ftt = read_iureg(pfpsd, nrs2, pregs, prw, &tea);
1629 		if (ftt != ftt_none)
1630 			return (ftt);
1631 		ea += tea;
1632 	} else {	/* effective address = rs1 + imm13 */
1633 		fp.inst = inst;
1634 		ea = (fp.i << 19) >> 19;	/* Extract simm13 field. */
1635 		ftt = read_iureg(pfpsd, nrs1, pregs, prw, &tea);
1636 		if (ftt != ftt_none)
1637 			return (ftt);
1638 		ea += tea;
1639 	}
1640 	if (get_udatamodel() == DATAMODEL_ILP32)
1641 		ea = (uint64_t)(caddr32_t)ea;
1642 
1643 	pfpsd->fp_trapaddr = (caddr_t)ea; /* setup bad addr in case we trap */
1644 	switch (asi) {
1645 	case ASI_FL8_P:
1646 	case ASI_FL8_S:
1647 	case ASI_FL8_PL:		/* little-endian */
1648 	case ASI_FL8_SL:
1649 		if ((inst.op3 & 7) == 3) {	/* load byte */
1650 			if (fuword8((void *)ea, &uc) == -1)
1651 				return (ftt_fault);
1652 			k.ll = 0;
1653 			k.c[7] = uc;
1654 			_fp_pack_extword(pfpsd, &k.f.FPU_DREG_FIELD, nrd);
1655 		} else {			/* store byte */
1656 			_fp_unpack_extword(pfpsd, &k.f.FPU_DREG_FIELD, nrd);
1657 			uc = k.c[7];
1658 			if (subyte((caddr_t)ea, uc) == -1)
1659 				return (ftt_fault);
1660 		}
1661 		break;
1662 	case ASI_FL16_P:
1663 	case ASI_FL16_S:
1664 		if ((ea & 1) == 1)
1665 			return (ftt_alignment);
1666 		if ((inst.op3 & 7) == 3) {	/* load short */
1667 			if (fuword16((void *)ea, &us) == -1)
1668 				return (ftt_fault);
1669 			k.ll = 0;
1670 			k.s[3] = us;
1671 			_fp_pack_extword(pfpsd, &k.f.FPU_DREG_FIELD, nrd);
1672 		} else {			/* store short */
1673 			_fp_unpack_extword(pfpsd, &k.f.FPU_DREG_FIELD, nrd);
1674 			us = k.s[3];
1675 			if (suword16((caddr_t)ea, us) == -1)
1676 				return (ftt_fault);
1677 		}
1678 		break;
1679 	case ASI_FL16_PL:		/* little-endian */
1680 	case ASI_FL16_SL:
1681 		if ((ea & 1) == 1)
1682 			return (ftt_alignment);
1683 		if ((inst.op3 & 7) == 3) {	/* load short */
1684 			if (fuword16((void *)ea, &us) == -1)
1685 				return (ftt_fault);
1686 			k.ll = 0;
1687 			k.c[6] = (uchar_t)us;
1688 			k.c[7] = (uchar_t)((us & 0xff00) >> 8);
1689 			_fp_pack_extword(pfpsd, &k.f.FPU_DREG_FIELD, nrd);
1690 		} else {			/* store short */
1691 			_fp_unpack_extword(pfpsd, &k.f.FPU_DREG_FIELD, nrd);
1692 			uc = k.c[7];
1693 			us = (ushort_t)((uc << 8) | k.c[6]);
1694 			if (suword16((void *)ea, us) == -1)
1695 				return (ftt_fault);
1696 		}
1697 		break;
1698 	}
1699 
1700 	pregs->r_pc = pregs->r_npc;	/* Do not retry emulated instruction. */
1701 	pregs->r_npc += 4;
1702 	return (ftt_none);
1703 }
1704 
1705 /*
1706  * Simulator for block loads and stores between floating-point unit and memory.
1707  * We pass the addrees of ea to sync_data_memory() to flush the Ecache.
1708  * Sync_data_memory() calls platform dependent code to flush the Ecache.
1709  */
1710 static enum ftt_type
1711 vis_blk_fldst(
1712 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
1713 	vis_inst_type	inst,	/* ISE instruction to simulate. */
1714 	struct regs	*pregs,	/* Pointer to PCB image of registers. */
1715 	void		*prw,	/* Pointer to locals and ins. */
1716 	uint_t		asi)	/* asi to emulate! */
1717 {
1718 	uint_t	nrs1, nrs2, nrd;	/* Register number fields. */
1719 	uint_t	opf, h, i, j;
1720 	uint64_t ea, tea;
1721 	union {
1722 		freg_type	f;
1723 		uint64_t	ll;
1724 		uint8_t		c[8];
1725 	} k, l;
1726 	union {
1727 		vis_inst_type	inst;
1728 		int32_t		i;
1729 	} fp;
1730 	enum ftt_type   ftt;
1731 	boolean_t little_endian = B_FALSE;
1732 
1733 	nrs1 = inst.rs1;
1734 	nrs2 = inst.rs2;
1735 	nrd = inst.rd;
1736 	if ((nrd & 1) == 1) 		/* fix register encoding */
1737 		nrd = (nrd & 0x1e) | 0x20;
1738 
1739 	/* ensure register is 8-double precision aligned */
1740 	if ((nrd & 0xf) != 0)
1741 		return (ftt_unimplemented);
1742 
1743 	opf = inst.opf;
1744 	if ((opf & 0x100) == 0) { 	/* effective address = rs1 + rs2 */
1745 		ftt = read_iureg(pfpsd, nrs1, pregs, prw, &ea);
1746 		if (ftt != ftt_none)
1747 			return (ftt);
1748 		ftt = read_iureg(pfpsd, nrs2, pregs, prw, &tea);
1749 		if (ftt != ftt_none)
1750 			return (ftt);
1751 		ea += tea;
1752 	} else {			/* effective address = rs1 + imm13 */
1753 		fp.inst = inst;
1754 		ea = (fp.i << 19) >> 19;	/* Extract simm13 field. */
1755 		ftt = read_iureg(pfpsd, nrs1, pregs, prw, &tea);
1756 		if (ftt != ftt_none)
1757 			return (ftt);
1758 		ea += tea;
1759 	}
1760 	if ((ea & 0x3F) != 0)		/* Require 64 byte-alignment. */
1761 		return (ftt_alignment);
1762 
1763 	pfpsd->fp_trapaddr = (caddr_t)ea; /* setup bad addr in case we trap */
1764 	switch (asi) {
1765 	case ASI_BLK_AIUPL:
1766 	case ASI_BLK_AIUSL:
1767 	case ASI_BLK_PL:
1768 	case ASI_BLK_SL:
1769 		little_endian = B_TRUE;
1770 		/* FALLTHROUGH */
1771 	case ASI_BLK_AIUP:
1772 	case ASI_BLK_AIUS:
1773 	case ASI_BLK_P:
1774 	case ASI_BLK_S:
1775 	case ASI_BLK_COMMIT_P:
1776 	case ASI_BLK_COMMIT_S:
1777 		if ((inst.op3 & 7) == 3) {	/* lddf */
1778 			for (i = 0; i < 8; i++, nrd += 2) {
1779 				ftt = _fp_read_extword((uint64_t *)ea, &k.ll,
1780 				    pfpsd);
1781 				if (ftt != ftt_none)
1782 					return (ftt);
1783 				if (little_endian) {
1784 					for (j = 0, h = 7; j < 8; j++, h--)
1785 						l.c[h] = k.c[j];
1786 					k.ll = l.ll;
1787 				}
1788 				_fp_pack_extword(pfpsd, &k.f.FPU_DREG_FIELD,
1789 				    nrd);
1790 				ea += 8;
1791 			}
1792 		} else {			/* stdf */
1793 			for (i = 0; i < 8; i++, nrd += 2) {
1794 				_fp_unpack_extword(pfpsd, &k.f.FPU_DREG_FIELD,
1795 				    nrd);
1796 				if (little_endian) {
1797 					for (j = 0, h = 7; j < 8; j++, h--)
1798 						l.c[h] = k.c[j];
1799 					k.ll = l.ll;
1800 				}
1801 				ftt = _fp_write_extword((uint64_t *)ea, k.ll,
1802 				    pfpsd);
1803 				if (ftt != ftt_none)
1804 					return (ftt);
1805 				ea += 8;
1806 			}
1807 		}
1808 		if ((asi == ASI_BLK_COMMIT_P) || (asi == ASI_BLK_COMMIT_S))
1809 			sync_data_memory((caddr_t)(ea - 64), 64);
1810 		break;
1811 	default:
1812 		/* addr of unimp inst */
1813 		pfpsd->fp_trapaddr = (caddr_t)pregs->r_pc;
1814 		return (ftt_unimplemented);
1815 	}
1816 
1817 	pregs->r_pc = pregs->r_npc;	/* Do not retry emulated instruction. */
1818 	pregs->r_npc += 4;
1819 	return (ftt_none);
1820 }
1821 
1822 /*
1823  * Simulator for rd %gsr instruction.
1824  */
1825 enum ftt_type
1826 vis_rdgsr(
1827 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
1828 	fp_inst_type	pinst,	/* FPU instruction to simulate. */
1829 	struct regs	*pregs,	/* Pointer to PCB image of registers. */
1830 	void		*prw,	/* Pointer to locals and ins. */
1831 	kfpu_t		*fp)	/* Need to fp to access gsr reg */
1832 {
1833 	uint_t nrd;
1834 	uint64_t r;
1835 	enum ftt_type ftt = ftt_none;
1836 
1837 	nrd = pinst.rd;
1838 
1839 	r = pfpsd->fp_current_read_gsr(fp);
1840 	ftt = write_iureg(pfpsd, nrd, pregs, prw, &r);
1841 	pregs->r_pc = pregs->r_npc;	/* Do not retry emulated instruction. */
1842 	pregs->r_npc += 4;
1843 	return (ftt);
1844 }
1845 
1846 /*
1847  * Simulator for wr %gsr instruction.
1848  */
1849 enum ftt_type
1850 vis_wrgsr(
1851 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
1852 	fp_inst_type	pinst,	/* FPU instruction to simulate. */
1853 	struct regs	*pregs,	/* Pointer to PCB image of registers. */
1854 	void		*prw,	/* Pointer to locals and ins. */
1855 	kfpu_t		*fp)	/* Need to fp to access gsr reg */
1856 {
1857 	uint_t nrs1;
1858 	uint64_t r, r1, r2;
1859 	enum ftt_type ftt = ftt_none;
1860 
1861 	nrs1 = pinst.rs1;
1862 	ftt = read_iureg(pfpsd, nrs1, pregs, prw, &r1);
1863 	if (ftt != ftt_none)
1864 		return (ftt);
1865 	if (pinst.ibit == 0) {	/* copy the value in r[rs2] */
1866 		uint_t nrs2;
1867 
1868 		nrs2 = pinst.rs2;
1869 		ftt = read_iureg(pfpsd, nrs2, pregs, prw, &r2);
1870 		if (ftt != ftt_none)
1871 			return (ftt);
1872 	} else {	/* use sign_ext(simm13) */
1873 		union {
1874 			fp_inst_type	inst;
1875 			uint32_t	i;
1876 		} fp;
1877 
1878 		fp.inst = pinst;		/* Extract simm13 field */
1879 		r2 = (fp.i << 19) >> 19;
1880 	}
1881 	r = r1 ^ r2;
1882 	pfpsd->fp_current_write_gsr(r, fp);
1883 	pregs->r_pc = pregs->r_npc;	/* Do not retry emulated instruction. */
1884 	pregs->r_npc += 4;
1885 	return (ftt);
1886 }
1887 
1888 /*
1889  * This is the loadable module wrapper.
1890  */
1891 #include <sys/errno.h>
1892 #include <sys/modctl.h>
1893 
1894 /*
1895  * Module linkage information for the kernel.
1896  */
1897 extern struct mod_ops mod_miscops;
1898 
1899 static struct modlmisc modlmisc = {
1900 	&mod_miscops,
1901 	"vis fp simulation",
1902 };
1903 
1904 static struct modlinkage modlinkage = {
1905 	MODREV_1, (void *)&modlmisc, NULL
1906 };
1907 
1908 int
1909 _init(void)
1910 {
1911 	return (mod_install(&modlinkage));
1912 }
1913 
1914 int
1915 _info(struct modinfo *modinfop)
1916 {
1917 	return (mod_info(&modlinkage, modinfop));
1918 }
1919