xref: /titanic_52/usr/src/uts/sun4/os/visinstr.c (revision 70025d765b044c6d8594bb965a2247a61e991a99)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /* VIS floating point instruction simulator for Sparc FPU simulator. */
30 
31 #include <sys/types.h>
32 #include <sys/systm.h>
33 #include <sys/fpu/fpusystm.h>
34 #include <sys/fpu/fpu_simulator.h>
35 #include <sys/vis_simulator.h>
36 #include <sys/fpu/globals.h>
37 #include <sys/privregs.h>
38 #include <sys/sun4asi.h>
39 #include <sys/machasi.h>
40 #include <sys/debug.h>
41 #include <sys/cpu_module.h>
42 #include <sys/systm.h>
43 #include <sys/machsystm.h>
44 
45 #define	FPU_REG_FIELD uint32_reg	/* Coordinate with FPU_REGS_TYPE. */
46 #define	FPU_DREG_FIELD uint64_reg	/* Coordinate with FPU_DREGS_TYPE. */
47 #define	FPU_FSR_FIELD uint64_reg	/* Coordinate with V9_FPU_FSR_TYPE. */
48 
49 static enum ftt_type vis_array(fp_simd_type *, vis_inst_type, struct regs *,
50 				void *);
51 static enum ftt_type vis_alignaddr(fp_simd_type *, vis_inst_type,
52 				struct regs *, void *, kfpu_t *);
53 static enum ftt_type vis_edge(fp_simd_type *, vis_inst_type, struct regs *,
54 				void *);
55 static enum ftt_type vis_faligndata(fp_simd_type *, fp_inst_type,
56 				kfpu_t *);
57 static enum ftt_type vis_bmask(fp_simd_type *, vis_inst_type, struct regs *,
58 				void *, kfpu_t *);
59 static enum ftt_type vis_bshuffle(fp_simd_type *, fp_inst_type,
60 				kfpu_t *);
61 static enum ftt_type vis_siam(fp_simd_type *, vis_inst_type, kfpu_t *);
62 static enum ftt_type vis_fcmp(fp_simd_type *, vis_inst_type, struct regs *,
63 				void *);
64 static enum ftt_type vis_fmul(fp_simd_type *, vis_inst_type);
65 static enum ftt_type vis_fpixel(fp_simd_type *, vis_inst_type, kfpu_t *);
66 static enum ftt_type vis_fpaddsub(fp_simd_type *, vis_inst_type);
67 static enum ftt_type vis_pdist(fp_simd_type *, fp_inst_type);
68 static enum ftt_type vis_prtl_fst(fp_simd_type *, vis_inst_type, struct regs *,
69 				void *, uint_t);
70 static enum ftt_type vis_short_fls(fp_simd_type *, vis_inst_type,
71 				struct regs *, void *, uint_t);
72 static enum ftt_type vis_blk_fldst(fp_simd_type *, vis_inst_type,
73 				struct regs *, void *, uint_t);
74 
75 /*
76  * Simulator for VIS instructions with op3 == 0x36 that get fp_disabled
77  * traps.
78  */
79 enum ftt_type
80 vis_fpu_simulator(
81 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
82 	fp_inst_type	pinst,	/* FPU instruction to simulate. */
83 	struct regs	*pregs,	/* Pointer to PCB image of registers. */
84 	void		*prw,	/* Pointer to locals and ins. */
85 	kfpu_t		*fp)	/* Need to fp to access gsr reg */
86 {
87 	uint_t	nrs1, nrs2, nrd;	/* Register number fields. */
88 	uint_t	us1, us2, usr;
89 	uint64_t lus1, lus2, lusr;
90 	enum ftt_type ftt = ftt_none;
91 	union {
92 		vis_inst_type	inst;
93 		fp_inst_type	pinst;
94 	} f;
95 
96 	ASSERT(USERMODE(pregs->r_tstate));
97 	nrs1 = pinst.rs1;
98 	nrs2 = pinst.rs2;
99 	nrd = pinst.rd;
100 	f.pinst = pinst;
101 	if ((f.inst.opf & 1) == 0) {		/* double precision */
102 		if ((nrs1 & 1) == 1) 		/* fix register encoding */
103 			nrs1 = (nrs1 & 0x1e) | 0x20;
104 		if ((nrs2 & 1) == 1)
105 			nrs2 = (nrs2 & 0x1e) | 0x20;
106 		if ((nrd & 1) == 1)
107 			nrd = (nrd & 0x1e) | 0x20;
108 	}
109 
110 	switch (f.inst.opf) {
111 		/* these instr's do not use fp regs */
112 	case edge8:
113 	case edge8l:
114 	case edge8n:
115 	case edge8ln:
116 	case edge16:
117 	case edge16l:
118 	case edge16n:
119 	case edge16ln:
120 	case edge32:
121 	case edge32l:
122 	case edge32n:
123 	case edge32ln:
124 		ftt = vis_edge(pfpsd, f.inst, pregs, prw);
125 		break;
126 	case array8:
127 	case array16:
128 	case array32:
129 		ftt = vis_array(pfpsd, f.inst, pregs, prw);
130 		break;
131 	case alignaddr:
132 	case alignaddrl:
133 		ftt = vis_alignaddr(pfpsd, f.inst, pregs, prw, fp);
134 		break;
135 	case bmask:
136 		ftt = vis_bmask(pfpsd, f.inst, pregs, prw, fp);
137 		break;
138 	case fcmple16:
139 	case fcmpne16:
140 	case fcmpgt16:
141 	case fcmpeq16:
142 	case fcmple32:
143 	case fcmpne32:
144 	case fcmpgt32:
145 	case fcmpeq32:
146 		ftt = vis_fcmp(pfpsd, f.inst, pregs, prw);
147 		break;
148 	case fmul8x16:
149 	case fmul8x16au:
150 	case fmul8x16al:
151 	case fmul8sux16:
152 	case fmul8ulx16:
153 	case fmuld8sux16:
154 	case fmuld8ulx16:
155 		ftt = vis_fmul(pfpsd, f.inst);
156 		break;
157 	case fpack16:
158 	case fpack32:
159 	case fpackfix:
160 	case fexpand:
161 	case fpmerge:
162 		ftt = vis_fpixel(pfpsd, f.inst, fp);
163 		break;
164 	case pdist:
165 		ftt = vis_pdist(pfpsd, pinst);
166 		break;
167 	case faligndata:
168 		ftt = vis_faligndata(pfpsd, pinst, fp);
169 		break;
170 	case bshuffle:
171 		ftt = vis_bshuffle(pfpsd, pinst, fp);
172 		break;
173 	case fpadd16:
174 	case fpadd16s:
175 	case fpadd32:
176 	case fpadd32s:
177 	case fpsub16:
178 	case fpsub16s:
179 	case fpsub32:
180 	case fpsub32s:
181 		ftt = vis_fpaddsub(pfpsd, f.inst);
182 		break;
183 	case fzero:
184 		lusr = 0;
185 		_fp_pack_extword(pfpsd, &lusr, nrd);
186 		break;
187 	case fzeros:
188 		usr = 0;
189 		_fp_pack_word(pfpsd, &usr, nrd);
190 		break;
191 	case fnor:
192 		_fp_unpack_extword(pfpsd, &lus1, nrs1);
193 		_fp_unpack_extword(pfpsd, &lus2, nrs2);
194 		lusr = ~(lus1 | lus2);
195 		_fp_pack_extword(pfpsd, &lusr, nrd);
196 		break;
197 	case fnors:
198 		_fp_unpack_word(pfpsd, &us1, nrs1);
199 		_fp_unpack_word(pfpsd, &us2, nrs2);
200 		usr = ~(us1 | us2);
201 		_fp_pack_word(pfpsd, &usr, nrd);
202 		break;
203 	case fandnot2:
204 		_fp_unpack_extword(pfpsd, &lus1, nrs1);
205 		_fp_unpack_extword(pfpsd, &lus2, nrs2);
206 		lusr = (lus1 & ~lus2);
207 		_fp_pack_extword(pfpsd, &lusr, nrd);
208 		break;
209 	case fandnot2s:
210 		_fp_unpack_word(pfpsd, &us1, nrs1);
211 		_fp_unpack_word(pfpsd, &us2, nrs2);
212 		usr = (us1 & ~us2);
213 		_fp_pack_word(pfpsd, &usr, nrd);
214 		break;
215 	case fnot2:
216 		_fp_unpack_extword(pfpsd, &lus2, nrs2);
217 		lusr = ~lus2;
218 		_fp_pack_extword(pfpsd, &lusr, nrd);
219 		break;
220 	case fnot2s:
221 		_fp_unpack_word(pfpsd, &us2, nrs2);
222 		usr = ~us2;
223 		_fp_pack_word(pfpsd, &usr, nrd);
224 		break;
225 	case fandnot1:
226 		_fp_unpack_extword(pfpsd, &lus1, nrs1);
227 		_fp_unpack_extword(pfpsd, &lus2, nrs2);
228 		lusr = (~lus1 & lus2);
229 		_fp_pack_extword(pfpsd, &lusr, nrd);
230 		break;
231 	case fandnot1s:
232 		_fp_unpack_word(pfpsd, &us1, nrs1);
233 		_fp_unpack_word(pfpsd, &us2, nrs2);
234 		usr = (~us1 & us2);
235 		_fp_pack_word(pfpsd, &usr, nrd);
236 		break;
237 	case fnot1:
238 		_fp_unpack_extword(pfpsd, &lus1, nrs1);
239 		lusr = ~lus1;
240 		_fp_pack_extword(pfpsd, &lusr, nrd);
241 		break;
242 	case fnot1s:
243 		_fp_unpack_word(pfpsd, &us1, nrs1);
244 		usr = ~us1;
245 		_fp_pack_word(pfpsd, &usr, nrd);
246 		break;
247 	case fxor:
248 		_fp_unpack_extword(pfpsd, &lus1, nrs1);
249 		_fp_unpack_extword(pfpsd, &lus2, nrs2);
250 		lusr = (lus1 ^ lus2);
251 		_fp_pack_extword(pfpsd, &lusr, nrd);
252 		break;
253 	case fxors:
254 		_fp_unpack_word(pfpsd, &us1, nrs1);
255 		_fp_unpack_word(pfpsd, &us2, nrs2);
256 		usr = (us1 ^ us2);
257 		_fp_pack_word(pfpsd, &usr, nrd);
258 		break;
259 	case fnand:
260 		_fp_unpack_extword(pfpsd, &lus1, nrs1);
261 		_fp_unpack_extword(pfpsd, &lus2, nrs2);
262 		lusr = ~(lus1 & lus2);
263 		_fp_pack_extword(pfpsd, &lusr, nrd);
264 		break;
265 	case fnands:
266 		_fp_unpack_word(pfpsd, &us1, nrs1);
267 		_fp_unpack_word(pfpsd, &us2, nrs2);
268 		usr = ~(us1 & us2);
269 		_fp_pack_word(pfpsd, &usr, nrd);
270 		break;
271 	case fand:
272 		_fp_unpack_extword(pfpsd, &lus1, nrs1);
273 		_fp_unpack_extword(pfpsd, &lus2, nrs2);
274 		lusr = (lus1 & lus2);
275 		_fp_pack_extword(pfpsd, &lusr, nrd);
276 		break;
277 	case fands:
278 		_fp_unpack_word(pfpsd, &us1, nrs1);
279 		_fp_unpack_word(pfpsd, &us2, nrs2);
280 		usr = (us1 & us2);
281 		_fp_pack_word(pfpsd, &usr, nrd);
282 		break;
283 	case fxnor:
284 		_fp_unpack_extword(pfpsd, &lus1, nrs1);
285 		_fp_unpack_extword(pfpsd, &lus2, nrs2);
286 		lusr = ~(lus1 ^ lus2);
287 		_fp_pack_extword(pfpsd, &lusr, nrd);
288 		break;
289 	case fxnors:
290 		_fp_unpack_word(pfpsd, &us1, nrs1);
291 		_fp_unpack_word(pfpsd, &us2, nrs2);
292 		usr = ~(us1 ^ us2);
293 		_fp_pack_word(pfpsd, &usr, nrd);
294 		break;
295 	case fsrc1:
296 		_fp_unpack_extword(pfpsd, &lusr, nrs1);
297 		_fp_pack_extword(pfpsd, &lusr, nrd);
298 		break;
299 	case fsrc1s:
300 		_fp_unpack_word(pfpsd, &usr, nrs1);
301 		_fp_pack_word(pfpsd, &usr, nrd);
302 		break;
303 	case fornot2:
304 		_fp_unpack_extword(pfpsd, &lus1, nrs1);
305 		_fp_unpack_extword(pfpsd, &lus2, nrs2);
306 		lusr = (lus1 | ~lus2);
307 		_fp_pack_extword(pfpsd, &lusr, nrd);
308 		break;
309 	case fornot2s:
310 		_fp_unpack_word(pfpsd, &us1, nrs1);
311 		_fp_unpack_word(pfpsd, &us2, nrs2);
312 		usr = (us1 | ~us2);
313 		_fp_pack_word(pfpsd, &usr, nrd);
314 		break;
315 	case fsrc2:
316 		_fp_unpack_extword(pfpsd, &lusr, nrs2);
317 		_fp_pack_extword(pfpsd, &lusr, nrd);
318 		break;
319 	case fsrc2s:
320 		_fp_unpack_word(pfpsd, &usr, nrs2);
321 		_fp_pack_word(pfpsd, &usr, nrd);
322 		break;
323 	case fornot1:
324 		_fp_unpack_extword(pfpsd, &lus1, nrs1);
325 		_fp_unpack_extword(pfpsd, &lus2, nrs2);
326 		lusr = (~lus1 | lus2);
327 		_fp_pack_extword(pfpsd, &lusr, nrd);
328 		break;
329 	case fornot1s:
330 		_fp_unpack_word(pfpsd, &us1, nrs1);
331 		_fp_unpack_word(pfpsd, &us2, nrs2);
332 		usr = (~us1 | us2);
333 		_fp_pack_word(pfpsd, &usr, nrd);
334 		break;
335 	case for_op:
336 		_fp_unpack_extword(pfpsd, &lus1, nrs1);
337 		_fp_unpack_extword(pfpsd, &lus2, nrs2);
338 		lusr = (lus1 | lus2);
339 		_fp_pack_extword(pfpsd, &lusr, nrd);
340 		break;
341 	case fors_op:
342 		_fp_unpack_word(pfpsd, &us1, nrs1);
343 		_fp_unpack_word(pfpsd, &us2, nrs2);
344 		usr = (us1 | us2);
345 		_fp_pack_word(pfpsd, &usr, nrd);
346 		break;
347 	case fone:
348 		lusr = 0xffffffffffffffff;
349 		_fp_pack_extword(pfpsd, &lusr, nrd);
350 		break;
351 	case fones:
352 		usr = 0xffffffffUL;
353 		_fp_pack_word(pfpsd, &usr, nrd);
354 		break;
355 	case siam:
356 		ftt = vis_siam(pfpsd, f.inst, fp);
357 		break;
358 	default:
359 		return (ftt_unimplemented);
360 	}
361 
362 	pregs->r_pc = pregs->r_npc;	/* Do not retry emulated instruction. */
363 	pregs->r_npc += 4;
364 	return (ftt);
365 }
366 
367 /*
368  * Simulator for edge instructions
369  */
370 static enum ftt_type
371 vis_edge(
372 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
373 	vis_inst_type	inst,	/* FPU instruction to simulate. */
374 	struct regs	*pregs,	/* Pointer to PCB image of registers. */
375 	void		*prw)	/* Pointer to locals and ins. */
376 
377 {
378 	uint_t	nrs1, nrs2, nrd;	/* Register number fields. */
379 	enum ftt_type ftt;
380 	uint64_t addrl, addrr, mask;
381 	uint64_t ah61l, ah61r;		/* Higher 61 bits of address */
382 	int al3l, al3r;			/* Lower 3 bits of address */
383 	int am32;			/* Whether PSTATE.AM == 1 */
384 
385 	nrs1 = inst.rs1;
386 	nrs2 = inst.rs2;
387 	nrd = inst.rd;
388 
389 	ftt = read_iureg(pfpsd, nrs1, pregs, prw, &addrl);
390 	if (ftt != ftt_none)
391 		return (ftt);
392 	ftt = read_iureg(pfpsd, nrs2, pregs, prw, &addrr);
393 	if (ftt != ftt_none)
394 		return (ftt);
395 
396 	/* Get PSTATE.AM to determine 32-bit vs 64-bit addressing */
397 	am32 =  pregs->r_tstate & TSTATE_AM;
398 	if (am32 == 1) {
399 		ah61l = addrl & 0xffffffff8;
400 		ah61r = addrr & 0xffffffff8;
401 	} else {
402 		ah61l = addrl & ~0x7;
403 		ah61r = addrr & ~0x7;
404 	}
405 
406 
407 	switch (inst.opf) {
408 	case edge8:
409 	case edge8n:
410 	case edge8l:
411 	case edge8ln:
412 		al3l = addrl & 0x7;
413 		switch (inst.opf) {
414 		case edge8:
415 		case edge8n:
416 			if (inst.opf == edge8) {
417 				VISINFO_KSTAT(vis_edge8);
418 			} else {
419 				VISINFO_KSTAT(vis_edge8n);
420 			}
421 			mask = 0xff >> al3l;
422 			if (ah61l == ah61r) {
423 				al3r = addrr & 0x7;
424 				mask &= (0xff << (0x7 - al3r)) & 0xff;
425 			}
426 			break;
427 		case edge8l:
428 		case edge8ln:
429 			if (inst.opf == edge8l) {
430 				VISINFO_KSTAT(vis_edge8l);
431 			} else {
432 				VISINFO_KSTAT(vis_edge8ln);
433 			}
434 			mask = (0xff << al3l) & 0xff;
435 			if (ah61l == ah61r) {
436 				al3r = addrr & 0x7;
437 				mask &= 0xff >> (0x7 - al3r);
438 			}
439 			break;
440 		}
441 		break;
442 	case edge16:
443 	case edge16l:
444 	case edge16n:
445 	case edge16ln:
446 		al3l = addrl & 0x6;
447 		al3l >>= 0x1;
448 		switch (inst.opf) {
449 		case edge16:
450 		case edge16n:
451 			if (inst.opf == edge16) {
452 				VISINFO_KSTAT(vis_edge16);
453 
454 			} else {
455 				VISINFO_KSTAT(vis_edge16n);
456 			}
457 			mask = 0xf >> al3l;
458 			if (ah61l == ah61r) {
459 				al3r = addrr & 0x6;
460 				al3r >>= 0x1;
461 				mask &= (0xf << (0x3 - al3r)) & 0xf;
462 			}
463 			break;
464 		case edge16l:
465 		case edge16ln:
466 			if (inst.opf == edge16l) {
467 				VISINFO_KSTAT(vis_edge16l);
468 
469 			} else {
470 				VISINFO_KSTAT(vis_edge16ln);
471 			}
472 
473 			mask = (0xf << al3l) & 0xf;
474 			if (ah61l == ah61r) {
475 				al3r = addrr & 0x6;
476 				al3r >>= 0x1;
477 				mask &= 0xf >> (0x3 - al3r);
478 			}
479 			break;
480 		}
481 		break;
482 	case edge32:
483 	case edge32l:
484 	case edge32n:
485 	case edge32ln:
486 		al3l = addrl & 0x4;
487 		al3l >>= 0x2;
488 
489 		switch (inst.opf) {
490 		case edge32:
491 		case edge32n:
492 			if (inst.opf == edge32) {
493 				VISINFO_KSTAT(vis_edge32);
494 
495 			} else {
496 				VISINFO_KSTAT(vis_edge32n);
497 			}
498 			mask = 0x3 >> al3l;
499 			if (ah61l == ah61r) {
500 				al3r = addrr & 0x4;
501 				al3r >>= 0x2;
502 				mask &= (0x3 << (0x1 - al3r)) & 0x3;
503 			}
504 			break;
505 		case edge32l:
506 		case edge32ln:
507 			if (inst.opf == edge32l) {
508 				VISINFO_KSTAT(vis_edge32l);
509 
510 			} else {
511 				VISINFO_KSTAT(vis_edge32ln);
512 			}
513 			mask = (0x3 << al3l) & 0x3;
514 			if (ah61l == ah61r) {
515 				al3r = addrr & 0x4;
516 				al3r >>= 0x2;
517 				mask &= 0x3 >> (0x1 - al3r);
518 			}
519 			break;
520 		}
521 		break;
522 	}
523 
524 	ftt = write_iureg(pfpsd, nrd, pregs, prw, &mask);
525 
526 	switch (inst.opf) {
527 	case edge8:
528 	case edge8l:
529 	case edge16:
530 	case edge16l:
531 	case edge32:
532 	case edge32l:
533 
534 		/* We need to set the CCR if we have a carry overflow */
535 		/* If this is a 64 bit app, we need to CCR.xcc.v */
536 		/* This is the same as the SUBcc instruction */
537 		if (addrl > addrr) {
538 			if (am32 == 1) {
539 				pregs->r_tstate |= TSTATE_IV;
540 			} else {
541 				pregs->r_tstate |= TSTATE_XV;
542 			}
543 		}
544 		break;
545 	}
546 	return (ftt);
547 	}
548 /*
549  * Simulator for three dimentional array addressing instructions.
550  */
551 static enum ftt_type
552 vis_array(
553 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
554 	vis_inst_type	inst,	/* FPU instruction to simulate. */
555 	struct regs	*pregs,	/* Pointer to PCB image of registers. */
556 	void		*prw)	/* Pointer to locals and ins. */
557 
558 {
559 	uint_t	nrs1, nrs2, nrd;	/* Register number fields. */
560 	enum ftt_type ftt;
561 	uint64_t laddr, bsize, baddr;
562 	uint64_t nbit;
563 	int oy, oz;
564 
565 	nrs1 = inst.rs1;
566 	nrs2 = inst.rs2;
567 	nrd = inst.rd;
568 
569 	ftt = read_iureg(pfpsd, nrs1, pregs, prw, &laddr);
570 	if (ftt != ftt_none)
571 		return (ftt);
572 	ftt = read_iureg(pfpsd, nrs2, pregs, prw, &bsize);
573 	if (ftt != ftt_none)
574 		return (ftt);
575 
576 	if (bsize > 5) {
577 		bsize = 5;
578 	}
579 	nbit = (1 << bsize) - 1;	/* Number of bits for XY<6+n-1:6> */
580 	oy = 17 + bsize;		/* Offset of Y<6+n-1:6> */
581 	oz = 17 + 2 * bsize;		/* Offset of Z<8:5> */
582 
583 	baddr = 0;
584 	baddr |= (laddr >> (11 -  0)) & (0x03 <<  0);	/* X_integer<1:0> */
585 	baddr |= (laddr >> (33 -  2)) & (0x03 <<  2);	/* Y_integer<1:0> */
586 	baddr |= (laddr >> (55 -  4)) & (0x01 <<  4);	/* Z_integer<0>   */
587 	baddr |= (laddr >> (13 -  5)) & (0x0f <<  5);	/* X_integer<5:2> */
588 	baddr |= (laddr >> (35 -  9)) & (0x0f <<  9);	/* Y_integer<5:2> */
589 	baddr |= (laddr >> (56 - 13)) & (0x0f << 13);	/* Z_integer<4:1> */
590 	baddr |= (laddr >> (17 - 17)) & (nbit << 17);	/* X_integer<6+n-1:6> */
591 	baddr |= (laddr >> (39 - oy)) & (nbit << oy);	/* Y_integer<6+n-1:6> */
592 	baddr |= (laddr >> (60 - oz)) & (0x0f << oz);	/* Z_integer<8:5> */
593 
594 	switch (inst.opf) {
595 	case array8:
596 		VISINFO_KSTAT(vis_array8);
597 		break;
598 	case array16:
599 		VISINFO_KSTAT(vis_array16);
600 		baddr <<= 1;
601 		break;
602 	case array32:
603 		VISINFO_KSTAT(vis_array32);
604 		baddr <<= 2;
605 		break;
606 	}
607 
608 	ftt = write_iureg(pfpsd, nrd, pregs, prw, &baddr);
609 
610 	return (ftt);
611 }
612 
613 /*
614  * Simulator for alignaddr and alignaddrl instructions.
615  */
616 static enum ftt_type
617 vis_alignaddr(
618 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
619 	vis_inst_type	inst,	/* FPU instruction to simulate. */
620 	struct regs	*pregs,	/* Pointer to PCB image of registers. */
621 	void		*prw,	/* Pointer to locals and ins. */
622 	kfpu_t		*fp)	/* Need to fp to access gsr reg */
623 {
624 	uint_t	nrs1, nrs2, nrd;	/* Register number fields. */
625 	enum ftt_type ftt;
626 	uint64_t ea, tea, g, r;
627 	short s;
628 
629 	nrs1 = inst.rs1;
630 	nrs2 = inst.rs2;
631 	nrd = inst.rd;
632 
633 	ftt = read_iureg(pfpsd, nrs1, pregs, prw, &ea);
634 	if (ftt != ftt_none)
635 		return (ftt);
636 	ftt = read_iureg(pfpsd, nrs2, pregs, prw, &tea);
637 	if (ftt != ftt_none)
638 		return (ftt);
639 	ea += tea;
640 	r = ea & ~0x7;	/* zero least 3 significant bits */
641 	ftt = write_iureg(pfpsd, nrd, pregs, prw, &r);
642 
643 
644 	g = pfpsd->fp_current_read_gsr(fp);
645 	g &= ~(GSR_ALIGN_MASK);		/* zero the align offset */
646 	r = ea & 0x7;
647 	if (inst.opf == alignaddrl) {
648 		s = (short)(~r);	/* 2's complement for alignaddrl */
649 		if (s < 0)
650 			r = (uint64_t)((s + 1) & 0x7);
651 		else
652 			r = (uint64_t)(s & 0x7);
653 	}
654 	g |= (r << GSR_ALIGN_SHIFT) & GSR_ALIGN_MASK;
655 	pfpsd->fp_current_write_gsr(g, fp);
656 
657 	return (ftt);
658 }
659 
660 /*
661  * Simulator for bmask instruction.
662  */
663 static enum ftt_type
664 vis_bmask(
665 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
666 	vis_inst_type	inst,	/* FPU instruction to simulate. */
667 	struct regs	*pregs,	/* Pointer to PCB image of registers. */
668 	void		*prw,	/* Pointer to locals and ins. */
669 	kfpu_t		*fp)	/* Need to fp to access gsr reg */
670 {
671 	uint_t	nrs1, nrs2, nrd;	/* Register number fields. */
672 	enum ftt_type ftt;
673 	uint64_t ea, tea, g;
674 
675 	VISINFO_KSTAT(vis_bmask);
676 	nrs1 = inst.rs1;
677 	nrs2 = inst.rs2;
678 	nrd = inst.rd;
679 
680 	ftt = read_iureg(pfpsd, nrs1, pregs, prw, &ea);
681 	if (ftt != ftt_none)
682 		return (ftt);
683 	ftt = read_iureg(pfpsd, nrs2, pregs, prw, &tea);
684 	if (ftt != ftt_none)
685 		return (ftt);
686 	ea += tea;
687 	ftt = write_iureg(pfpsd, nrd, pregs, prw, &ea);
688 
689 	g = pfpsd->fp_current_read_gsr(fp);
690 	g &= ~(GSR_MASK_MASK);		/* zero the mask offset */
691 
692 	/* Put the least significant 32 bits of ea in GSR.mask */
693 	g |= (ea << GSR_MASK_SHIFT) & GSR_MASK_MASK;
694 	pfpsd->fp_current_write_gsr(g, fp);
695 	return (ftt);
696 }
697 
698 /*
699  * Simulator for fp[add|sub]* instruction.
700  */
701 static enum ftt_type
702 vis_fpaddsub(
703 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
704 	vis_inst_type	inst)	/* FPU instruction to simulate. */
705 {
706 	uint_t	nrs1, nrs2, nrd;	/* Register number fields. */
707 	union {
708 		uint64_t	ll;
709 		uint32_t	i[2];
710 		uint16_t	s[4];
711 	} lrs1, lrs2, lrd;
712 	union {
713 		uint32_t	i;
714 		uint16_t	s[2];
715 	} krs1, krs2, krd;
716 	int i;
717 
718 	nrs1 = inst.rs1;
719 	nrs2 = inst.rs2;
720 	nrd = inst.rd;
721 	if ((inst.opf & 1) == 0) {	/* double precision */
722 		if ((nrs1 & 1) == 1) 	/* fix register encoding */
723 			nrs1 = (nrs1 & 0x1e) | 0x20;
724 		if ((nrs2 & 1) == 1)
725 			nrs2 = (nrs2 & 0x1e) | 0x20;
726 		if ((nrd & 1) == 1)
727 			nrd = (nrd & 0x1e) | 0x20;
728 	}
729 	switch (inst.opf) {
730 	case fpadd16:
731 		_fp_unpack_extword(pfpsd, &lrs1.ll, nrs1);
732 		_fp_unpack_extword(pfpsd, &lrs2.ll, nrs2);
733 		for (i = 0; i <= 3; i++) {
734 			lrd.s[i] = lrs1.s[i] + lrs2.s[i];
735 		}
736 		_fp_pack_extword(pfpsd, &lrd.ll, nrd);
737 		break;
738 	case fpadd16s:
739 		_fp_unpack_word(pfpsd, &krs1.i, nrs1);
740 		_fp_unpack_word(pfpsd, &krs2.i, nrs2);
741 		for (i = 0; i <= 1; i++) {
742 			krd.s[i] = krs1.s[i] + krs2.s[i];
743 		}
744 		_fp_pack_word(pfpsd, &krd.i, nrd);
745 		break;
746 	case fpadd32:
747 		_fp_unpack_extword(pfpsd, &lrs1.ll, nrs1);
748 		_fp_unpack_extword(pfpsd, &lrs2.ll, nrs2);
749 		for (i = 0; i <= 1; i++) {
750 			lrd.i[i] = lrs1.i[i] + lrs2.i[i];
751 		}
752 		_fp_pack_extword(pfpsd, &lrd.ll, nrd);
753 		break;
754 	case fpadd32s:
755 		_fp_unpack_word(pfpsd, &krs1.i, nrs1);
756 		_fp_unpack_word(pfpsd, &krs2.i, nrs2);
757 		krd.i = krs1.i + krs2.i;
758 		_fp_pack_word(pfpsd, &krd.i, nrd);
759 		break;
760 	case fpsub16:
761 		_fp_unpack_extword(pfpsd, &lrs1.ll, nrs1);
762 		_fp_unpack_extword(pfpsd, &lrs2.ll, nrs2);
763 		for (i = 0; i <= 3; i++) {
764 			lrd.s[i] = lrs1.s[i] - lrs2.s[i];
765 		}
766 		_fp_pack_extword(pfpsd, &lrd.ll, nrd);
767 		break;
768 	case fpsub16s:
769 		_fp_unpack_word(pfpsd, &krs1.i, nrs1);
770 		_fp_unpack_word(pfpsd, &krs2.i, nrs2);
771 		for (i = 0; i <= 1; i++) {
772 			krd.s[i] = krs1.s[i] - krs2.s[i];
773 		}
774 		_fp_pack_word(pfpsd, &krd.i, nrd);
775 		break;
776 	case fpsub32:
777 		_fp_unpack_extword(pfpsd, &lrs1.ll, nrs1);
778 		_fp_unpack_extword(pfpsd, &lrs2.ll, nrs2);
779 		for (i = 0; i <= 1; i++) {
780 			lrd.i[i] = lrs1.i[i] - lrs2.i[i];
781 		}
782 		_fp_pack_extword(pfpsd, &lrd.ll, nrd);
783 		break;
784 	case fpsub32s:
785 		_fp_unpack_word(pfpsd, &krs1.i, nrs1);
786 		_fp_unpack_word(pfpsd, &krs2.i, nrs2);
787 		krd.i = krs1.i - krs2.i;
788 		_fp_pack_word(pfpsd, &krd.i, nrd);
789 		break;
790 	}
791 	return (ftt_none);
792 }
793 
794 /*
795  * Simulator for fcmp* instruction.
796  */
797 static enum ftt_type
798 vis_fcmp(
799 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
800 	vis_inst_type	inst,	/* FPU instruction to simulate. */
801 	struct regs	*pregs,	/* Pointer to PCB image of registers. */
802 	void		*prw)	/* Pointer to locals and ins. */
803 {
804 	uint_t	nrs1, nrs2, nrd;	/* Register number fields. */
805 	union {
806 		uint64_t	ll;
807 		uint32_t	i[2];
808 		uint16_t	s[4];
809 	} krs1, krs2, krd;
810 	enum ftt_type ftt;
811 	short sr1, sr2;
812 	int i, ir1, ir2;
813 
814 	nrs1 = inst.rs1;
815 	nrs2 = inst.rs2;
816 	nrd = inst.rd;
817 	krd.ll = 0;
818 	if ((nrs1 & 1) == 1) 	/* fix register encoding */
819 		nrs1 = (nrs1 & 0x1e) | 0x20;
820 	if ((nrs2 & 1) == 1)
821 		nrs2 = (nrs2 & 0x1e) | 0x20;
822 
823 	_fp_unpack_extword(pfpsd, &krs1.ll, nrs1);
824 	_fp_unpack_extword(pfpsd, &krs2.ll, nrs2);
825 	switch (inst.opf) {
826 	case fcmple16:
827 		VISINFO_KSTAT(vis_fcmple16);
828 		for (i = 0; i <= 3; i++) {
829 			sr1 = (short)krs1.s[i];
830 			sr2 = (short)krs2.s[i];
831 			if (sr1 <= sr2)
832 				krd.ll += (0x8 >> i);
833 		}
834 		break;
835 	case fcmpne16:
836 		VISINFO_KSTAT(vis_fcmpne16);
837 		for (i = 0; i <= 3; i++) {
838 			sr1 = (short)krs1.s[i];
839 			sr2 = (short)krs2.s[i];
840 			if (sr1 != sr2)
841 				krd.ll += (0x8 >> i);
842 		}
843 		break;
844 	case fcmpgt16:
845 		VISINFO_KSTAT(vis_fcmpgt16);
846 		for (i = 0; i <= 3; i++) {
847 			sr1 = (short)krs1.s[i];
848 			sr2 = (short)krs2.s[i];
849 			if (sr1 > sr2)
850 				krd.ll += (0x8 >> i);
851 		}
852 		break;
853 	case fcmpeq16:
854 		VISINFO_KSTAT(vis_fcmpeq16);
855 		for (i = 0; i <= 3; i++) {
856 			sr1 = (short)krs1.s[i];
857 			sr2 = (short)krs2.s[i];
858 			if (sr1 == sr2)
859 				krd.ll += (0x8 >> i);
860 		}
861 		break;
862 	case fcmple32:
863 		VISINFO_KSTAT(vis_fcmple32);
864 		for (i = 0; i <= 1; i++) {
865 			ir1 = (int)krs1.i[i];
866 			ir2 = (int)krs2.i[i];
867 			if (ir1 <= ir2)
868 				krd.ll += (0x2 >> i);
869 		}
870 		break;
871 	case fcmpne32:
872 		VISINFO_KSTAT(vis_fcmpne32);
873 		for (i = 0; i <= 1; i++) {
874 			ir1 = (int)krs1.i[i];
875 			ir2 = (int)krs2.i[i];
876 			if (ir1 != ir2)
877 				krd.ll += (0x2 >> i);
878 		}
879 		break;
880 	case fcmpgt32:
881 		VISINFO_KSTAT(vis_fcmpgt32);
882 		for (i = 0; i <= 1; i++) {
883 			ir1 = (int)krs1.i[i];
884 			ir2 = (int)krs2.i[i];
885 			if (ir1 > ir2)
886 				krd.ll += (0x2 >> i);
887 		}
888 		break;
889 	case fcmpeq32:
890 		VISINFO_KSTAT(vis_fcmpeq32);
891 		for (i = 0; i <= 1; i++) {
892 			ir1 = (int)krs1.i[i];
893 			ir2 = (int)krs2.i[i];
894 			if (ir1 == ir2)
895 				krd.ll += (0x2 >> i);
896 		}
897 		break;
898 	}
899 	ftt = write_iureg(pfpsd, nrd, pregs, prw, &krd.ll);
900 	return (ftt);
901 }
902 
903 /*
904  * Simulator for fmul* instruction.
905  */
906 static enum ftt_type
907 vis_fmul(
908 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
909 	vis_inst_type	inst)	/* FPU instruction to simulate. */
910 {
911 	uint_t	nrs1, nrs2, nrd;	/* Register number fields. */
912 	union {
913 		uint64_t	ll;
914 		uint32_t	i[2];
915 		uint16_t	s[4];
916 		uint8_t		c[8];
917 	} lrs1, lrs2, lrd;
918 	union {
919 		uint32_t	i;
920 		uint16_t	s[2];
921 		uint8_t		c[4];
922 	} krs1, krs2, kres;
923 	short s1, s2, sres;
924 	ushort_t us1;
925 	char c1;
926 	int i;
927 
928 	nrs1 = inst.rs1;
929 	nrs2 = inst.rs2;
930 	nrd = inst.rd;
931 	if ((inst.opf & 1) == 0) {	/* double precision */
932 		if ((nrd & 1) == 1) 	/* fix register encoding */
933 			nrd = (nrd & 0x1e) | 0x20;
934 	}
935 
936 	switch (inst.opf) {
937 	case fmul8x16:
938 		VISINFO_KSTAT(vis_fmul8x16);
939 		_fp_unpack_word(pfpsd, &krs1.i, nrs1);
940 		if ((nrs2 & 1) == 1)
941 			nrs2 = (nrs2 & 0x1e) | 0x20;
942 		_fp_unpack_extword(pfpsd, &lrs2.ll, nrs2);
943 		for (i = 0; i <= 3; i++) {
944 			us1 = (ushort_t)krs1.c[i];
945 			s2 = (short)lrs2.s[i];
946 			kres.i = us1 * s2;
947 			sres = (short)((kres.c[1] << 8) | kres.c[2]);
948 			if (kres.c[3] >= 0x80)
949 				sres++;
950 			lrd.s[i] = sres;
951 		}
952 		_fp_pack_extword(pfpsd, &lrd.ll, nrd);
953 		break;
954 	case fmul8x16au:
955 		VISINFO_KSTAT(vis_fmul8x16au);
956 		_fp_unpack_word(pfpsd, &krs1.i, nrs1);
957 		_fp_unpack_word(pfpsd, &krs2.i, nrs2);
958 		for (i = 0; i <= 3; i++) {
959 			us1 = (ushort_t)krs1.c[i];
960 			s2 = (short)krs2.s[0];
961 			kres.i = us1 * s2;
962 			sres = (short)((kres.c[1] << 8) | kres.c[2]);
963 			if (kres.c[3] >= 0x80)
964 				sres++;
965 			lrd.s[i] = sres;
966 		}
967 		_fp_pack_extword(pfpsd, &lrd.ll, nrd);
968 		break;
969 	case fmul8x16al:
970 		VISINFO_KSTAT(vis_fmul8x16al);
971 		_fp_unpack_word(pfpsd, &krs1.i, nrs1);
972 		_fp_unpack_word(pfpsd, &krs2.i, nrs2);
973 		for (i = 0; i <= 3; i++) {
974 			us1 = (ushort_t)krs1.c[i];
975 			s2 = (short)krs2.s[1];
976 			kres.i = us1 * s2;
977 			sres = (short)((kres.c[1] << 8) | kres.c[2]);
978 			if (kres.c[3] >= 0x80)
979 				sres++;
980 			lrd.s[i] = sres;
981 		}
982 		_fp_pack_extword(pfpsd, &lrd.ll, nrd);
983 		break;
984 	case fmul8sux16:
985 		VISINFO_KSTAT(vis_fmul8sux16);
986 		if ((nrs1 & 1) == 1) 	/* fix register encoding */
987 			nrs1 = (nrs1 & 0x1e) | 0x20;
988 		_fp_unpack_extword(pfpsd, &lrs1.ll, nrs1);
989 		if ((nrs2 & 1) == 1)
990 			nrs2 = (nrs2 & 0x1e) | 0x20;
991 		_fp_unpack_extword(pfpsd, &lrs2.ll, nrs2);
992 		for (i = 0; i <= 3; i++) {
993 			c1 = lrs1.c[(i*2)];
994 			s1 = (short)c1;		/* keeps the sign alive */
995 			s2 = (short)lrs2.s[i];
996 			kres.i = s1 * s2;
997 			sres = (short)((kres.c[1] << 8) | kres.c[2]);
998 			if (kres.c[3] >= 0x80)
999 				sres++;
1000 			if (sres < 0)
1001 				lrd.s[i] = (sres & 0xFFFF);
1002 			else
1003 				lrd.s[i] = sres;
1004 		}
1005 		_fp_pack_extword(pfpsd, &lrd.ll, nrd);
1006 		break;
1007 	case fmul8ulx16:
1008 		VISINFO_KSTAT(vis_fmul8ulx16);
1009 		if ((nrs1 & 1) == 1) 	/* fix register encoding */
1010 			nrs1 = (nrs1 & 0x1e) | 0x20;
1011 		_fp_unpack_extword(pfpsd, &lrs1.ll, nrs1);
1012 		if ((nrs2 & 1) == 1)
1013 			nrs2 = (nrs2 & 0x1e) | 0x20;
1014 		_fp_unpack_extword(pfpsd, &lrs2.ll, nrs2);
1015 		for (i = 0; i <= 3; i++) {
1016 			us1 = (ushort_t)lrs1.c[(i*2)+1];
1017 			s2 = (short)lrs2.s[i];
1018 			kres.i = us1 * s2;
1019 			sres = (short)kres.s[0];
1020 			if (kres.s[1] >= 0x8000)
1021 				sres++;
1022 			lrd.s[i] = sres;
1023 		}
1024 		_fp_pack_extword(pfpsd, &lrd.ll, nrd);
1025 		break;
1026 	case fmuld8sux16:
1027 		VISINFO_KSTAT(vis_fmuld8sux16);
1028 		_fp_unpack_word(pfpsd, &krs1.i, nrs1);
1029 		_fp_unpack_word(pfpsd, &krs2.i, nrs2);
1030 		for (i = 0; i <= 1; i++) {
1031 			c1 = krs1.c[(i*2)];
1032 			s1 = (short)c1;		/* keeps the sign alive */
1033 			s2 = (short)krs2.s[i];
1034 			kres.i = s1 * s2;
1035 			lrd.i[i] = kres.i << 8;
1036 		}
1037 		_fp_pack_extword(pfpsd, &lrd.ll, nrd);
1038 		break;
1039 	case fmuld8ulx16:
1040 		VISINFO_KSTAT(vis_fmuld8ulx16);
1041 		_fp_unpack_word(pfpsd, &krs1.i, nrs1);
1042 		_fp_unpack_word(pfpsd, &krs2.i, nrs2);
1043 		for (i = 0; i <= 1; i++) {
1044 			us1 = (ushort_t)krs1.c[(i*2)+1];
1045 			s2 = (short)krs2.s[i];
1046 			lrd.i[i] = us1 * s2;
1047 		}
1048 		_fp_pack_extword(pfpsd, &lrd.ll, nrd);
1049 		break;
1050 	}
1051 	return (ftt_none);
1052 }
1053 
1054 /*
1055  * Simulator for fpixel formatting instructions.
1056  */
1057 static enum ftt_type
1058 vis_fpixel(
1059 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
1060 	vis_inst_type	inst,	/* FPU instruction to simulate. */
1061 	kfpu_t		*fp)	/* Need to fp to access gsr reg */
1062 {
1063 	uint_t	nrs1, nrs2, nrd;	/* Register number fields. */
1064 	int	i, j, k, sf;
1065 	union {
1066 		uint64_t	ll;
1067 		uint32_t	i[2];
1068 		uint16_t	s[4];
1069 		uint8_t		c[8];
1070 	} lrs1, lrs2, lrd;
1071 	union {
1072 		uint32_t	i;
1073 		uint16_t	s[2];
1074 		uint8_t		c[4];
1075 	} krs1, krs2, krd;
1076 	uint64_t r;
1077 	int64_t l, m;
1078 	short s;
1079 	uchar_t uc;
1080 
1081 	nrs1 = inst.rs1;
1082 	nrs2 = inst.rs2;
1083 	nrd = inst.rd;
1084 	if ((inst.opf != fpack16) && (inst.opf != fpackfix)) {
1085 		if ((nrd & 1) == 1) 	/* fix register encoding */
1086 			nrd = (nrd & 0x1e) | 0x20;
1087 	}
1088 
1089 	switch (inst.opf) {
1090 	case fpack16:
1091 		VISINFO_KSTAT(vis_fpack16);
1092 		if ((nrs2 & 1) == 1) 	/* fix register encoding */
1093 			nrs2 = (nrs2 & 0x1e) | 0x20;
1094 		_fp_unpack_extword(pfpsd, &lrs2.ll, nrs2);
1095 		r = pfpsd->fp_current_read_gsr(fp);
1096 		/* fpack16 ignores GSR.scale msb */
1097 		sf = (int)(GSR_SCALE(r) & 0xf);
1098 		for (i = 0; i <= 3; i++) {
1099 			s = (short)lrs2.s[i];	/* preserve the sign */
1100 			j = ((int)s << sf);
1101 			k = j >> 7;
1102 			if (k < 0) {
1103 				uc = 0;
1104 			} else if (k > 255) {
1105 				uc = 255;
1106 			} else {
1107 				uc = (uchar_t)k;
1108 			}
1109 			krd.c[i] = uc;
1110 		}
1111 		_fp_pack_word(pfpsd, &krd.i, nrd);
1112 		break;
1113 	case fpack32:
1114 		VISINFO_KSTAT(vis_fpack32);
1115 		if ((nrs1 & 1) == 1) 	/* fix register encoding */
1116 			nrs1 = (nrs1 & 0x1e) | 0x20;
1117 		_fp_unpack_extword(pfpsd, &lrs1.ll, nrs1);
1118 		if ((nrs2 & 1) == 1)
1119 			nrs2 = (nrs2 & 0x1e) | 0x20;
1120 		_fp_unpack_extword(pfpsd, &lrs2.ll, nrs2);
1121 
1122 		r = pfpsd->fp_current_read_gsr(fp);
1123 		sf = (int)GSR_SCALE(r);
1124 		lrd.ll = lrs1.ll << 8;
1125 		for (i = 0, k = 3; i <= 1; i++, k += 4) {
1126 			j = (int)lrs2.i[i];	/* preserve the sign */
1127 			l = ((int64_t)j << sf);
1128 			m = l >> 23;
1129 			if (m < 0) {
1130 				uc = 0;
1131 			} else if (m > 255) {
1132 				uc = 255;
1133 			} else {
1134 				uc = (uchar_t)m;
1135 			}
1136 			lrd.c[k] = uc;
1137 		}
1138 		_fp_pack_extword(pfpsd, &lrd.ll, nrd);
1139 		break;
1140 	case fpackfix:
1141 		VISINFO_KSTAT(vis_fpackfix);
1142 		if ((nrs2 & 1) == 1)
1143 			nrs2 = (nrs2 & 0x1e) | 0x20;
1144 		_fp_unpack_extword(pfpsd, &lrs2.ll, nrs2);
1145 
1146 		r = pfpsd->fp_current_read_gsr(fp);
1147 		sf = (int)GSR_SCALE(r);
1148 		for (i = 0; i <= 1; i++) {
1149 			j = (int)lrs2.i[i];	/* preserve the sign */
1150 			l = ((int64_t)j << sf);
1151 			m = l >> 16;
1152 			if (m < -32768) {
1153 				s = -32768;
1154 			} else if (m > 32767) {
1155 				s = 32767;
1156 			} else {
1157 				s = (short)m;
1158 			}
1159 			krd.s[i] = s;
1160 		}
1161 		_fp_pack_word(pfpsd, &krd.i, nrd);
1162 		break;
1163 	case fexpand:
1164 		VISINFO_KSTAT(vis_fexpand);
1165 		_fp_unpack_word(pfpsd, &krs2.i, nrs2);
1166 		for (i = 0; i <= 3; i++) {
1167 			uc = krs2.c[i];
1168 			lrd.s[i] = (ushort_t)(uc << 4);
1169 		}
1170 		_fp_pack_extword(pfpsd, &lrd.ll, nrd);
1171 		break;
1172 	case fpmerge:
1173 		VISINFO_KSTAT(vis_fpmerge);
1174 		_fp_unpack_word(pfpsd, &krs1.i, nrs1);
1175 		_fp_unpack_word(pfpsd, &krs2.i, nrs2);
1176 		for (i = 0, j = 0; i <= 3; i++, j += 2) {
1177 			lrd.c[j] = krs1.c[i];
1178 			lrd.c[j+1] = krs2.c[i];
1179 		}
1180 		_fp_pack_extword(pfpsd, &lrd.ll, nrd);
1181 		break;
1182 	}
1183 	return (ftt_none);
1184 }
1185 
1186 /*
1187  * Simulator for pdist instruction.
1188  */
1189 enum ftt_type
1190 vis_pdist(
1191 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
1192 	fp_inst_type	pinst)	/* FPU instruction to simulate. */
1193 {
1194 	uint_t	nrs1, nrs2, nrd;	/* Register number fields. */
1195 	int	i;
1196 	short	s;
1197 	union {
1198 		uint64_t	ll;
1199 		uint8_t		c[8];
1200 	} lrs1, lrs2, lrd;
1201 
1202 	nrs1 = pinst.rs1;
1203 	nrs2 = pinst.rs2;
1204 	nrd = pinst.rd;
1205 	VISINFO_KSTAT(vis_pdist);
1206 	if ((nrs1 & 1) == 1) 		/* fix register encoding */
1207 		nrs1 = (nrs1 & 0x1e) | 0x20;
1208 	if ((nrs2 & 1) == 1)
1209 		nrs2 = (nrs2 & 0x1e) | 0x20;
1210 	if ((nrd & 1) == 1)
1211 		nrd = (nrd & 0x1e) | 0x20;
1212 
1213 	_fp_unpack_extword(pfpsd, &lrs1.ll, nrs1);
1214 	_fp_unpack_extword(pfpsd, &lrs2.ll, nrs2);
1215 	_fp_unpack_extword(pfpsd, &lrd.ll, nrd);
1216 
1217 	for (i = 0; i <= 7; i++) {
1218 		s = (short)(lrs1.c[i] - lrs2.c[i]);
1219 		if (s < 0)
1220 			s = ~s + 1;
1221 		lrd.ll += s;
1222 	}
1223 
1224 	_fp_pack_extword(pfpsd, &lrd.ll, nrd);
1225 	return (ftt_none);
1226 }
1227 
1228 /*
1229  * Simulator for faligndata instruction.
1230  */
1231 static enum ftt_type
1232 vis_faligndata(
1233 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
1234 	fp_inst_type	pinst,	/* FPU instruction to simulate. */
1235 	kfpu_t		*fp)	/* Need to fp to access gsr reg */
1236 {
1237 	uint_t	nrs1, nrs2, nrd;	/* Register number fields. */
1238 	int	i, j, k, ao;
1239 	union {
1240 		uint64_t	ll;
1241 		uint8_t		c[8];
1242 	} lrs1, lrs2, lrd;
1243 	uint64_t r;
1244 
1245 	nrs1 = pinst.rs1;
1246 	nrs2 = pinst.rs2;
1247 	nrd = pinst.rd;
1248 	if ((nrs1 & 1) == 1) 		/* fix register encoding */
1249 		nrs1 = (nrs1 & 0x1e) | 0x20;
1250 	if ((nrs2 & 1) == 1)
1251 		nrs2 = (nrs2 & 0x1e) | 0x20;
1252 	if ((nrd & 1) == 1)
1253 		nrd = (nrd & 0x1e) | 0x20;
1254 
1255 	_fp_unpack_extword(pfpsd, &lrs1.ll, nrs1);
1256 	_fp_unpack_extword(pfpsd, &lrs2.ll, nrs2);
1257 
1258 	r = pfpsd->fp_current_read_gsr(fp);
1259 	ao = (int)GSR_ALIGN(r);
1260 
1261 	for (i = 0, j = ao, k = 0; i <= 7; i++)
1262 		if (j <= 7) {
1263 			lrd.c[i] = lrs1.c[j++];
1264 		} else {
1265 			lrd.c[i] = lrs2.c[k++];
1266 		}
1267 	_fp_pack_extword(pfpsd, &lrd.ll, nrd);
1268 
1269 	return (ftt_none);
1270 }
1271 
1272 /*
1273  * Simulator for bshuffle instruction.
1274  */
1275 static enum ftt_type
1276 vis_bshuffle(
1277 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
1278 	fp_inst_type	pinst,	/* FPU instruction to simulate. */
1279 	kfpu_t		*fp)	/* Need to fp to access gsr reg */
1280 {
1281 	uint_t	nrs1, nrs2, nrd;	/* Register number fields. */
1282 	int	i, j, ao;
1283 	union {
1284 		uint64_t	ll;
1285 		uint8_t		c[8];
1286 	} lrs1, lrs2, lrd;
1287 	uint64_t r;
1288 
1289 	VISINFO_KSTAT(vis_bshuffle);
1290 	nrs1 = pinst.rs1;
1291 	nrs2 = pinst.rs2;
1292 	nrd = pinst.rd;
1293 	if ((nrs1 & 1) == 1) 		/* fix register encoding */
1294 		nrs1 = (nrs1 & 0x1e) | 0x20;
1295 	if ((nrs2 & 1) == 1)
1296 		nrs2 = (nrs2 & 0x1e) | 0x20;
1297 	if ((nrd & 1) == 1)
1298 		nrd = (nrd & 0x1e) | 0x20;
1299 
1300 	_fp_unpack_extword(pfpsd, &lrs1.ll, nrs1);
1301 	_fp_unpack_extword(pfpsd, &lrs2.ll, nrs2);
1302 
1303 	r = pfpsd->fp_current_read_gsr(fp);
1304 	ao = (int)GSR_MASK(r);
1305 
1306 	/*
1307 	 * BSHUFFLE Destination Byte Selection
1308 	 * rd Byte	Source
1309 	 * 0		rs byte[GSR.mask<31..28>]
1310 	 * 1		rs byte[GSR.mask<27..24>]
1311 	 * 2		rs byte[GSR.mask<23..20>]
1312 	 * 3		rs byte[GSR.mask<19..16>]
1313 	 * 4		rs byte[GSR.mask<15..12>]
1314 	 * 5		rs byte[GSR.mask<11..8>]
1315 	 * 6		rs byte[GSR.mask<7..4>]
1316 	 * 7		rs byte[GSR.mask<3..0>]
1317 	 * P.S. rs1 is the upper half and rs2 is the lower half
1318 	 * Bytes in the source value are numbered from most to
1319 	 * least significant
1320 	 */
1321 	for (i = 7; i >= 0; i--, ao = (ao >> 4)) {
1322 		j = ao & 0xf;		/* get byte number */
1323 		if (j < 8) {
1324 			lrd.c[i] = lrs1.c[j];
1325 		} else {
1326 			lrd.c[i] = lrs2.c[j - 8];
1327 		}
1328 	}
1329 	_fp_pack_extword(pfpsd, &lrd.ll, nrd);
1330 
1331 	return (ftt_none);
1332 }
1333 
1334 /*
1335  * Simulator for siam instruction.
1336  */
1337 static enum ftt_type
1338 vis_siam(
1339 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
1340 	vis_inst_type	inst,	/* FPU instruction to simulate. */
1341 	kfpu_t		*fp)	/* Need to fp to access gsr reg */
1342 {
1343 	uint_t	nrs2;			/* Register number fields. */
1344 	uint64_t g, r;
1345 	nrs2 = inst.rs2;
1346 
1347 	g = pfpsd->fp_current_read_gsr(fp);
1348 	g &= ~(GSR_IM_IRND_MASK);	/* zero the IM and IRND fields */
1349 	r = nrs2 & 0x7;			/* get mode(3 bit) */
1350 	g |= (r << GSR_IRND_SHIFT);
1351 	pfpsd->fp_current_write_gsr(g, fp);
1352 	return (ftt_none);
1353 }
1354 
1355 /*
1356  * Simulator for VIS loads and stores between floating-point unit and memory.
1357  */
1358 enum ftt_type
1359 vis_fldst(
1360 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
1361 	fp_inst_type	pinst,	/* FPU instruction to simulate. */
1362 	struct regs	*pregs,	/* Pointer to PCB image of registers. */
1363 	void		*prw,	/* Pointer to locals and ins. */
1364 	uint_t		asi)	/* asi to emulate! */
1365 {
1366 	union {
1367 		vis_inst_type	inst;
1368 		fp_inst_type	pinst;
1369 	} i;
1370 
1371 	ASSERT(USERMODE(pregs->r_tstate));
1372 	i.pinst = pinst;
1373 	switch (asi) {
1374 		case ASI_PST8_P:
1375 		case ASI_PST8_S:
1376 		case ASI_PST16_P:
1377 		case ASI_PST16_S:
1378 		case ASI_PST32_P:
1379 		case ASI_PST32_S:
1380 		case ASI_PST8_PL:
1381 		case ASI_PST8_SL:
1382 		case ASI_PST16_PL:
1383 		case ASI_PST16_SL:
1384 		case ASI_PST32_PL:
1385 		case ASI_PST32_SL:
1386 			return (vis_prtl_fst(pfpsd, i.inst, pregs,
1387 				prw, asi));
1388 		case ASI_FL8_P:
1389 		case ASI_FL8_S:
1390 		case ASI_FL8_PL:
1391 		case ASI_FL8_SL:
1392 		case ASI_FL16_P:
1393 		case ASI_FL16_S:
1394 		case ASI_FL16_PL:
1395 		case ASI_FL16_SL:
1396 			return (vis_short_fls(pfpsd, i.inst, pregs,
1397 				prw, asi));
1398 		case ASI_BLK_AIUP:
1399 		case ASI_BLK_AIUS:
1400 		case ASI_BLK_AIUPL:
1401 		case ASI_BLK_AIUSL:
1402 		case ASI_BLK_P:
1403 		case ASI_BLK_S:
1404 		case ASI_BLK_PL:
1405 		case ASI_BLK_SL:
1406 		case ASI_BLK_COMMIT_P:
1407 		case ASI_BLK_COMMIT_S:
1408 			return (vis_blk_fldst(pfpsd, i.inst, pregs,
1409 				prw, asi));
1410 		default:
1411 			return (ftt_unimplemented);
1412 	}
1413 }
1414 
1415 /*
1416  * Simulator for partial stores between floating-point unit and memory.
1417  */
1418 static enum ftt_type
1419 vis_prtl_fst(
1420 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
1421 	vis_inst_type	inst,	/* ISE instruction to simulate. */
1422 	struct regs	*pregs,	/* Pointer to PCB image of registers. */
1423 	void		*prw,	/* Pointer to locals and ins. */
1424 	uint_t		asi)	/* asi to emulate! */
1425 {
1426 	uint_t	nrs1, nrs2, nrd;	/* Register number fields. */
1427 	uint_t	opf, msk;
1428 	int	h, i, j;
1429 	uint64_t ea, tmsk;
1430 	union {
1431 		freg_type	f;
1432 		uint64_t	ll;
1433 		uint32_t	i[2];
1434 		uint16_t	s[4];
1435 		uint8_t		c[8];
1436 	} k, l, res;
1437 	enum ftt_type   ftt;
1438 
1439 	nrs1 = inst.rs1;
1440 	nrs2 = inst.rs2;
1441 	nrd = inst.rd;
1442 	if ((nrd & 1) == 1) 		/* fix register encoding */
1443 		nrd = (nrd & 0x1e) | 0x20;
1444 	opf = inst.opf;
1445 	res.ll = 0;
1446 	if ((opf & 0x100) == 0) {	/* effective address = rs1  */
1447 		ftt = read_iureg(pfpsd, nrs1, pregs, prw, &ea);
1448 		if (ftt != ftt_none)
1449 			return (ftt);
1450 		ftt = read_iureg(pfpsd, nrs2, pregs, prw, &tmsk);
1451 		if (ftt != ftt_none)
1452 			return (ftt);
1453 		msk = (uint_t)tmsk;
1454 	} else {
1455 		pfpsd->fp_trapaddr = (caddr_t)pregs->r_pc;
1456 		return (ftt_unimplemented);
1457 	}
1458 
1459 	pfpsd->fp_trapaddr = (caddr_t)ea; /* setup bad addr in case we trap */
1460 	if ((ea & 0x3) != 0)
1461 		return (ftt_alignment);	/* Require 32 bit-alignment. */
1462 
1463 	switch (asi) {
1464 	case ASI_PST8_P:
1465 	case ASI_PST8_S:
1466 		ftt = _fp_read_extword((uint64_t *)ea, &l.ll, pfpsd);
1467 		if (ftt != ftt_none)
1468 			return (ftt);
1469 		_fp_unpack_extword(pfpsd, &k.f.FPU_DREG_FIELD, nrd);
1470 		for (i = 0, j = 0x80; i <= 7; i++, j >>= 1) {
1471 			if ((msk & j) == j)
1472 				res.c[i] = k.c[i];
1473 			else
1474 				res.c[i] = l.c[i];
1475 		}
1476 		ftt = _fp_write_extword((uint64_t *)ea, res.ll, pfpsd);
1477 		if (ftt != ftt_none)
1478 			return (ftt);
1479 		break;
1480 	case ASI_PST8_PL:	/* little-endian */
1481 	case ASI_PST8_SL:
1482 		ftt = _fp_read_extword((uint64_t *)ea, &l.ll, pfpsd);
1483 		if (ftt != ftt_none)
1484 			return (ftt);
1485 		_fp_unpack_extword(pfpsd, &k.f.FPU_DREG_FIELD, nrd);
1486 		for (h = 7, i = 0, j = 0x80; i <= 7; h--, i++, j >>= 1) {
1487 			if ((msk & j) == j)
1488 				res.c[h] = k.c[i];
1489 			else
1490 				res.c[h] = l.c[i];
1491 		}
1492 		ftt = _fp_write_extword((uint64_t *)ea, res.ll, pfpsd);
1493 		if (ftt != ftt_none)
1494 			return (ftt);
1495 		break;
1496 	case ASI_PST16_P:
1497 	case ASI_PST16_S:
1498 		ftt = _fp_read_extword((uint64_t *)ea, &l.ll, pfpsd);
1499 		if (ftt != ftt_none)
1500 			return (ftt);
1501 		_fp_unpack_extword(pfpsd, &k.f.FPU_DREG_FIELD, nrd);
1502 		for (i = 0, j = 0x8; i <= 3; i++, j >>= 1) {
1503 			if ((msk & j) == j)
1504 				res.s[i] = k.s[i];
1505 			else
1506 				res.s[i] = l.s[i];
1507 		}
1508 		ftt = _fp_write_extword((uint64_t *)ea, res.ll, pfpsd);
1509 		if (ftt != ftt_none)
1510 			return (ftt);
1511 		break;
1512 	case ASI_PST16_PL:
1513 	case ASI_PST16_SL:
1514 		ftt = _fp_read_extword((uint64_t *)ea, &l.ll, pfpsd);
1515 		if (ftt != ftt_none)
1516 			return (ftt);
1517 		_fp_unpack_extword(pfpsd, &k.f.FPU_DREG_FIELD, nrd);
1518 		for (h = 7, i = 0, j = 0x8; i <= 6; h -= 2, i += 2, j >>= 1) {
1519 			if ((msk & j) == j) {
1520 				res.c[h] = k.c[i];
1521 				res.c[h-1] = k.c[i+1];
1522 			} else {
1523 				res.c[h] = l.c[i];
1524 				res.c[h-1] = l.c[i+1];
1525 			}
1526 		}
1527 		ftt = _fp_write_extword((uint64_t *)ea, res.ll, pfpsd);
1528 		if (ftt != ftt_none)
1529 			return (ftt);
1530 		break;
1531 	case ASI_PST32_P:
1532 	case ASI_PST32_S:
1533 		ftt = _fp_read_extword((uint64_t *)ea, &l.ll, pfpsd);
1534 		if (ftt != ftt_none)
1535 			return (ftt);
1536 		_fp_unpack_extword(pfpsd, &k.f.FPU_DREG_FIELD, nrd);
1537 		for (i = 0, j = 0x2; i <= 1; i++, j >>= 1) {
1538 			if ((msk & j) == j)
1539 				res.i[i] = k.i[i];
1540 			else
1541 				res.i[i] = l.i[i];
1542 		}
1543 		ftt = _fp_write_extword((uint64_t *)ea, res.ll, pfpsd);
1544 		if (ftt != ftt_none)
1545 			return (ftt);
1546 		break;
1547 	case ASI_PST32_PL:
1548 	case ASI_PST32_SL:
1549 		ftt = _fp_read_extword((uint64_t *)ea, &l.ll, pfpsd);
1550 		if (ftt != ftt_none)
1551 			return (ftt);
1552 		_fp_unpack_extword(pfpsd, &k.f.FPU_DREG_FIELD, nrd);
1553 		for (h = 7, i = 0, j = 0x2; i <= 4; h -= 4, i += 4, j >>= 1) {
1554 			if ((msk & j) == j) {
1555 				res.c[h] = k.c[i];
1556 				res.c[h-1] = k.c[i+1];
1557 				res.c[h-2] = k.c[i+2];
1558 				res.c[h-3] = k.c[i+3];
1559 			} else {
1560 				res.c[h] = l.c[i];
1561 				res.c[h-1] = l.c[i+1];
1562 				res.c[h-2] = l.c[i+2];
1563 				res.c[h-3] = l.c[i+3];
1564 			}
1565 		}
1566 		ftt = _fp_write_extword((uint64_t *)ea, res.ll, pfpsd);
1567 		if (ftt != ftt_none)
1568 			return (ftt);
1569 		break;
1570 	}
1571 
1572 	pregs->r_pc = pregs->r_npc;	/* Do not retry emulated instruction. */
1573 	pregs->r_npc += 4;
1574 	return (ftt_none);
1575 }
1576 
1577 /*
1578  * Simulator for short load/stores between floating-point unit and memory.
1579  */
1580 static enum ftt_type
1581 vis_short_fls(
1582 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
1583 	vis_inst_type	inst,	/* ISE instruction to simulate. */
1584 	struct regs	*pregs,	/* Pointer to PCB image of registers. */
1585 	void		*prw,	/* Pointer to locals and ins. */
1586 	uint_t		asi)	/* asi to emulate! */
1587 {
1588 	uint_t	nrs1, nrs2, nrd;	/* Register number fields. */
1589 	uint_t	opf;
1590 	uint64_t ea, tea;
1591 	union {
1592 		freg_type	f;
1593 		uint64_t	ll;
1594 		uint32_t	i[2];
1595 		uint16_t	s[4];
1596 		uint8_t		c[8];
1597 	} k;
1598 	union {
1599 		vis_inst_type	inst;
1600 		int		i;
1601 	} fp;
1602 	enum ftt_type   ftt = ftt_none;
1603 	ushort_t us;
1604 	uchar_t uc;
1605 
1606 	nrs1 = inst.rs1;
1607 	nrs2 = inst.rs2;
1608 	nrd = inst.rd;
1609 	if ((nrd & 1) == 1) 		/* fix register encoding */
1610 		nrd = (nrd & 0x1e) | 0x20;
1611 	opf = inst.opf;
1612 	fp.inst = inst;
1613 	if ((opf & 0x100) == 0) { /* effective address = rs1 + rs2 */
1614 		ftt = read_iureg(pfpsd, nrs1, pregs, prw, &ea);
1615 		if (ftt != ftt_none)
1616 			return (ftt);
1617 		ftt = read_iureg(pfpsd, nrs2, pregs, prw, &tea);
1618 		if (ftt != ftt_none)
1619 			return (ftt);
1620 		ea += tea;
1621 	} else {	/* effective address = rs1 + imm13 */
1622 		fp.inst = inst;
1623 		ea = (fp.i << 19) >> 19;	/* Extract simm13 field. */
1624 		ftt = read_iureg(pfpsd, nrs1, pregs, prw, &tea);
1625 		if (ftt != ftt_none)
1626 			return (ftt);
1627 		ea += tea;
1628 	}
1629 	if (get_udatamodel() == DATAMODEL_ILP32)
1630 		ea = (uint64_t)(caddr32_t)ea;
1631 
1632 	pfpsd->fp_trapaddr = (caddr_t)ea; /* setup bad addr in case we trap */
1633 	switch (asi) {
1634 	case ASI_FL8_P:
1635 	case ASI_FL8_S:
1636 	case ASI_FL8_PL:		/* little-endian */
1637 	case ASI_FL8_SL:
1638 		if ((inst.op3 & 7) == 3) {	/* load byte */
1639 			if (fuword8((void *)ea, &uc) == -1)
1640 				return (ftt_fault);
1641 			k.ll = 0;
1642 			k.c[7] = uc;
1643 			_fp_pack_extword(pfpsd, &k.f.FPU_DREG_FIELD, nrd);
1644 		} else {			/* store byte */
1645 			_fp_unpack_extword(pfpsd, &k.f.FPU_DREG_FIELD, nrd);
1646 			uc = k.c[7];
1647 			if (subyte((caddr_t)ea, uc) == -1)
1648 				return (ftt_fault);
1649 		}
1650 		break;
1651 	case ASI_FL16_P:
1652 	case ASI_FL16_S:
1653 		if ((ea & 1) == 1)
1654 			return (ftt_alignment);
1655 		if ((inst.op3 & 7) == 3) {	/* load short */
1656 			if (fuword16((void *)ea, &us) == -1)
1657 				return (ftt_fault);
1658 			k.ll = 0;
1659 			k.s[3] = us;
1660 			_fp_pack_extword(pfpsd, &k.f.FPU_DREG_FIELD, nrd);
1661 		} else {			/* store short */
1662 			_fp_unpack_extword(pfpsd, &k.f.FPU_DREG_FIELD, nrd);
1663 			us = k.s[3];
1664 			if (suword16((caddr_t)ea, us) == -1)
1665 				return (ftt_fault);
1666 		}
1667 		break;
1668 	case ASI_FL16_PL:		/* little-endian */
1669 	case ASI_FL16_SL:
1670 		if ((ea & 1) == 1)
1671 			return (ftt_alignment);
1672 		if ((inst.op3 & 7) == 3) {	/* load short */
1673 			if (fuword16((void *)ea, &us) == -1)
1674 				return (ftt_fault);
1675 			k.ll = 0;
1676 			k.c[6] = (uchar_t)us;
1677 			k.c[7] = (uchar_t)((us & 0xff00) >> 8);
1678 			_fp_pack_extword(pfpsd, &k.f.FPU_DREG_FIELD, nrd);
1679 		} else {			/* store short */
1680 			_fp_unpack_extword(pfpsd, &k.f.FPU_DREG_FIELD, nrd);
1681 			uc = k.c[7];
1682 			us = (ushort_t)((uc << 8) | k.c[6]);
1683 			if (suword16((void *)ea, us) == -1)
1684 				return (ftt_fault);
1685 		}
1686 		break;
1687 	}
1688 
1689 	pregs->r_pc = pregs->r_npc;	/* Do not retry emulated instruction. */
1690 	pregs->r_npc += 4;
1691 	return (ftt_none);
1692 }
1693 
1694 /*
1695  * Simulator for block loads and stores between floating-point unit and memory.
1696  * XXX - OK, so it is really gross to flush the whole Ecache for a block commit
1697  *	 store - but the circumstances under which this code actually gets
1698  *	 used in real life are so obscure that you can live with it!
1699  */
1700 static enum ftt_type
1701 vis_blk_fldst(
1702 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
1703 	vis_inst_type	inst,	/* ISE instruction to simulate. */
1704 	struct regs	*pregs,	/* Pointer to PCB image of registers. */
1705 	void		*prw,	/* Pointer to locals and ins. */
1706 	uint_t		asi)	/* asi to emulate! */
1707 {
1708 	uint_t	nrs1, nrs2, nrd;	/* Register number fields. */
1709 	uint_t	opf, h, i, j;
1710 	uint64_t ea, tea;
1711 	union {
1712 		freg_type	f;
1713 		uint64_t	ll;
1714 		uint8_t		c[8];
1715 	} k, l;
1716 	union {
1717 		vis_inst_type	inst;
1718 		int32_t		i;
1719 	} fp;
1720 	enum ftt_type   ftt;
1721 	boolean_t little_endian = B_FALSE;
1722 
1723 	nrs1 = inst.rs1;
1724 	nrs2 = inst.rs2;
1725 	nrd = inst.rd;
1726 	if ((nrd & 1) == 1) 		/* fix register encoding */
1727 		nrd = (nrd & 0x1e) | 0x20;
1728 
1729 	/* ensure register is 8-double precision aligned */
1730 	if ((nrd & 0xf) != 0)
1731 		return (ftt_unimplemented);
1732 
1733 	opf = inst.opf;
1734 	if ((opf & 0x100) == 0) { 	/* effective address = rs1 + rs2 */
1735 		ftt = read_iureg(pfpsd, nrs1, pregs, prw, &ea);
1736 		if (ftt != ftt_none)
1737 			return (ftt);
1738 		ftt = read_iureg(pfpsd, nrs2, pregs, prw, &tea);
1739 		if (ftt != ftt_none)
1740 			return (ftt);
1741 		ea += tea;
1742 	} else {			/* effective address = rs1 + imm13 */
1743 		fp.inst = inst;
1744 		ea = (fp.i << 19) >> 19;	/* Extract simm13 field. */
1745 		ftt = read_iureg(pfpsd, nrs1, pregs, prw, &tea);
1746 		if (ftt != ftt_none)
1747 			return (ftt);
1748 		ea += tea;
1749 	}
1750 	if ((ea & 0x3F) != 0)		/* Require 64 byte-alignment. */
1751 		return (ftt_alignment);
1752 
1753 	pfpsd->fp_trapaddr = (caddr_t)ea; /* setup bad addr in case we trap */
1754 	switch (asi) {
1755 	case ASI_BLK_AIUPL:
1756 	case ASI_BLK_AIUSL:
1757 	case ASI_BLK_PL:
1758 	case ASI_BLK_SL:
1759 		little_endian = B_TRUE;
1760 		/* FALLTHROUGH */
1761 	case ASI_BLK_AIUP:
1762 	case ASI_BLK_AIUS:
1763 	case ASI_BLK_P:
1764 	case ASI_BLK_S:
1765 	case ASI_BLK_COMMIT_P:
1766 	case ASI_BLK_COMMIT_S:
1767 		if ((inst.op3 & 7) == 3) {	/* lddf */
1768 		    for (i = 0; i < 8; i++, ea += 8, nrd += 2) {
1769 			ftt = _fp_read_extword((uint64_t *)ea, &k.ll, pfpsd);
1770 			if (ftt != ftt_none)
1771 				return (ftt);
1772 			if (little_endian) {
1773 				for (j = 0, h = 7; j < 8; j++, h--)
1774 					l.c[h] = k.c[j];
1775 				k.ll = l.ll;
1776 			}
1777 			_fp_pack_extword(pfpsd, &k.f.FPU_DREG_FIELD, nrd);
1778 		    }
1779 		} else {			/* stdf */
1780 		    for (i = 0; i < 8; i++, ea += 8, nrd += 2) {
1781 			_fp_unpack_extword(pfpsd, &k.f.FPU_DREG_FIELD, nrd);
1782 			if (little_endian) {
1783 				for (j = 0, h = 7; j < 8; j++, h--)
1784 					l.c[h] = k.c[j];
1785 				k.ll = l.ll;
1786 			}
1787 			ftt = _fp_write_extword((uint64_t *)ea, k.ll, pfpsd);
1788 			if (ftt != ftt_none)
1789 				return (ftt);
1790 		    }
1791 		}
1792 		if ((asi == ASI_BLK_COMMIT_P) || (asi == ASI_BLK_COMMIT_S))
1793 			cpu_flush_ecache();
1794 		break;
1795 	default:
1796 		/* addr of unimp inst */
1797 		pfpsd->fp_trapaddr = (caddr_t)pregs->r_pc;
1798 		return (ftt_unimplemented);
1799 	}
1800 
1801 	pregs->r_pc = pregs->r_npc;	/* Do not retry emulated instruction. */
1802 	pregs->r_npc += 4;
1803 	return (ftt_none);
1804 }
1805 
1806 /*
1807  * Simulator for rd %gsr instruction.
1808  */
1809 enum ftt_type
1810 vis_rdgsr(
1811 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
1812 	fp_inst_type	pinst,	/* FPU instruction to simulate. */
1813 	struct regs	*pregs,	/* Pointer to PCB image of registers. */
1814 	void		*prw,	/* Pointer to locals and ins. */
1815 	kfpu_t		*fp)	/* Need to fp to access gsr reg */
1816 {
1817 	uint_t nrd;
1818 	uint64_t r;
1819 	enum ftt_type ftt = ftt_none;
1820 
1821 	nrd = pinst.rd;
1822 
1823 	r = pfpsd->fp_current_read_gsr(fp);
1824 	ftt = write_iureg(pfpsd, nrd, pregs, prw, &r);
1825 	pregs->r_pc = pregs->r_npc;	/* Do not retry emulated instruction. */
1826 	pregs->r_npc += 4;
1827 	return (ftt);
1828 }
1829 
1830 /*
1831  * Simulator for wr %gsr instruction.
1832  */
1833 enum ftt_type
1834 vis_wrgsr(
1835 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
1836 	fp_inst_type	pinst,	/* FPU instruction to simulate. */
1837 	struct regs	*pregs,	/* Pointer to PCB image of registers. */
1838 	void		*prw,	/* Pointer to locals and ins. */
1839 	kfpu_t		*fp)	/* Need to fp to access gsr reg */
1840 {
1841 	uint_t nrs1;
1842 	uint64_t r, r1, r2;
1843 	enum ftt_type ftt = ftt_none;
1844 
1845 	nrs1 = pinst.rs1;
1846 	ftt = read_iureg(pfpsd, nrs1, pregs, prw, &r1);
1847 	if (ftt != ftt_none)
1848 		return (ftt);
1849 	if (pinst.ibit == 0) {	/* copy the value in r[rs2] */
1850 		uint_t nrs2;
1851 
1852 		nrs2 = pinst.rs2;
1853 		ftt = read_iureg(pfpsd, nrs2, pregs, prw, &r2);
1854 		if (ftt != ftt_none)
1855 			return (ftt);
1856 	} else {	/* use sign_ext(simm13) */
1857 		union {
1858 			fp_inst_type	inst;
1859 			uint32_t	i;
1860 		} fp;
1861 
1862 		fp.inst = pinst;		/* Extract simm13 field */
1863 		r2 = (fp.i << 19) >> 19;
1864 	}
1865 	r = r1 ^ r2;
1866 	pfpsd->fp_current_write_gsr(r, fp);
1867 	pregs->r_pc = pregs->r_npc;	/* Do not retry emulated instruction. */
1868 	pregs->r_npc += 4;
1869 	return (ftt);
1870 }
1871 
1872 /*
1873  * This is the loadable module wrapper.
1874  */
1875 #include <sys/errno.h>
1876 #include <sys/modctl.h>
1877 
1878 /*
1879  * Module linkage information for the kernel.
1880  */
1881 extern struct mod_ops mod_miscops;
1882 
1883 static struct modlmisc modlmisc = {
1884 	&mod_miscops,
1885 	"vis fp simulation",
1886 };
1887 
1888 static struct modlinkage modlinkage = {
1889 	MODREV_1, (void *)&modlmisc, NULL
1890 };
1891 
1892 int
1893 _init(void)
1894 {
1895 	return (mod_install(&modlinkage));
1896 }
1897 
1898 int
1899 _info(struct modinfo *modinfop)
1900 {
1901 	return (mod_info(&modlinkage, modinfop));
1902 }
1903