1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
24 */
25 /*
26 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
27 * Use is subject to license terms.
28 */
29
30 #include <ucontext.h>
31 #include <fenv.h>
32 #if defined(__SUNPRO_C)
33 #include <sunmath.h>
34 #else
35 #include <sys/ieeefp.h>
36 #endif
37 #include "fex_handler.h"
38 #include "fenv_inlines.h"
39
40 #if !defined(REG_PC)
41 #define REG_PC EIP
42 #endif
43
44 #if !defined(REG_PS)
45 #define REG_PS EFL
46 #endif
47
48 #ifdef __amd64
49 #define regno(X) ((X < 4)? REG_RAX - X : \
50 ((X > 4)? REG_RAX + 1 - X : REG_RSP))
51 #else
52 #define regno(X) (EAX - X)
53 #endif
54
55 /*
56 * Support for SSE instructions
57 */
58
59 /*
60 * Decode an SSE instruction. Fill in *inst and return the length of the
61 * instruction in bytes. Return 0 if the instruction is not recognized.
62 */
63 int
__fex_parse_sse(ucontext_t * uap,sseinst_t * inst)64 __fex_parse_sse(ucontext_t *uap, sseinst_t *inst)
65 {
66 unsigned char *ip;
67 char *addr;
68 int i, dbl, simd, rex, modrm, sib, r;
69
70 i = 0;
71 ip = (unsigned char *)uap->uc_mcontext.gregs[REG_PC];
72
73 /* look for pseudo-prefixes */
74 dbl = 0;
75 simd = SIMD;
76 if (ip[i] == 0xF3) {
77 simd = 0;
78 i++;
79 } else if (ip[i] == 0x66) {
80 dbl = DOUBLE;
81 i++;
82 } else if (ip[i] == 0xF2) {
83 dbl = DOUBLE;
84 simd = 0;
85 i++;
86 }
87
88 /* look for AMD64 REX prefix */
89 rex = 0;
90 if (ip[i] >= 0x40 && ip[i] <= 0x4F) {
91 rex = ip[i];
92 i++;
93 }
94
95 /* parse opcode */
96 if (ip[i++] != 0x0F)
97 return 0;
98 switch (ip[i++]) {
99 case 0x2A:
100 inst->op = (int)cvtsi2ss + simd + dbl;
101 if (!simd)
102 inst->op = (int)inst->op + (rex & 8);
103 break;
104
105 case 0x2C:
106 inst->op = (int)cvttss2si + simd + dbl;
107 if (!simd)
108 inst->op = (int)inst->op + (rex & 8);
109 break;
110
111 case 0x2D:
112 inst->op = (int)cvtss2si + simd + dbl;
113 if (!simd)
114 inst->op = (int)inst->op + (rex & 8);
115 break;
116
117 case 0x2E:
118 /* oddball: scalar instruction in a SIMD opcode group */
119 if (!simd)
120 return 0;
121 inst->op = (int)ucomiss + dbl;
122 break;
123
124 case 0x2F:
125 /* oddball: scalar instruction in a SIMD opcode group */
126 if (!simd)
127 return 0;
128 inst->op = (int)comiss + dbl;
129 break;
130
131 case 0x51:
132 inst->op = (int)sqrtss + simd + dbl;
133 break;
134
135 case 0x58:
136 inst->op = (int)addss + simd + dbl;
137 break;
138
139 case 0x59:
140 inst->op = (int)mulss + simd + dbl;
141 break;
142
143 case 0x5A:
144 inst->op = (int)cvtss2sd + simd + dbl;
145 break;
146
147 case 0x5B:
148 if (dbl) {
149 if (simd)
150 inst->op = cvtps2dq;
151 else
152 return 0;
153 } else {
154 inst->op = (simd)? cvtdq2ps : cvttps2dq;
155 }
156 break;
157
158 case 0x5C:
159 inst->op = (int)subss + simd + dbl;
160 break;
161
162 case 0x5D:
163 inst->op = (int)minss + simd + dbl;
164 break;
165
166 case 0x5E:
167 inst->op = (int)divss + simd + dbl;
168 break;
169
170 case 0x5F:
171 inst->op = (int)maxss + simd + dbl;
172 break;
173
174 case 0xC2:
175 inst->op = (int)cmpss + simd + dbl;
176 break;
177
178 case 0xE6:
179 if (simd) {
180 if (dbl)
181 inst->op = cvttpd2dq;
182 else
183 return 0;
184 } else {
185 inst->op = (dbl)? cvtpd2dq : cvtdq2pd;
186 }
187 break;
188
189 default:
190 return 0;
191 }
192
193 /* locate operands */
194 modrm = ip[i++];
195
196 if (inst->op == cvtss2si || inst->op == cvttss2si ||
197 inst->op == cvtsd2si || inst->op == cvttsd2si ||
198 inst->op == cvtss2siq || inst->op == cvttss2siq ||
199 inst->op == cvtsd2siq || inst->op == cvttsd2siq) {
200 /* op1 is a gp register */
201 r = ((rex & 4) << 1) | ((modrm >> 3) & 7);
202 inst->op1 = (sseoperand_t *)&uap->uc_mcontext.gregs[regno(r)];
203 } else if (inst->op == cvtps2pi || inst->op == cvttps2pi ||
204 inst->op == cvtpd2pi || inst->op == cvttpd2pi) {
205 /* op1 is a mmx register */
206 #ifdef __amd64
207 inst->op1 = (sseoperand_t *)&uap->uc_mcontext.fpregs.fp_reg_set.
208 fpchip_state.st[(modrm >> 3) & 7];
209 #else
210 inst->op1 = (sseoperand_t *)(10 * ((modrm >> 3) & 7) +
211 (char *)&uap->uc_mcontext.fpregs.fp_reg_set.
212 fpchip_state.state[7]);
213 #endif
214 } else {
215 /* op1 is a xmm register */
216 r = ((rex & 4) << 1) | ((modrm >> 3) & 7);
217 inst->op1 = (sseoperand_t *)&uap->uc_mcontext.fpregs.
218 fp_reg_set.fpchip_state.xmm[r];
219 }
220
221 if ((modrm >> 6) == 3) {
222 if (inst->op == cvtsi2ss || inst->op == cvtsi2sd ||
223 inst->op == cvtsi2ssq || inst->op == cvtsi2sdq) {
224 /* op2 is a gp register */
225 r = ((rex & 1) << 3) | (modrm & 7);
226 inst->op2 = (sseoperand_t *)&uap->uc_mcontext.
227 gregs[regno(r)];
228 } else if (inst->op == cvtpi2ps || inst->op == cvtpi2pd) {
229 /* op2 is a mmx register */
230 #ifdef __amd64
231 inst->op2 = (sseoperand_t *)&uap->uc_mcontext.fpregs.
232 fp_reg_set.fpchip_state.st[modrm & 7];
233 #else
234 inst->op2 = (sseoperand_t *)(10 * (modrm & 7) +
235 (char *)&uap->uc_mcontext.fpregs.fp_reg_set.
236 fpchip_state.state[7]);
237 #endif
238 } else {
239 /* op2 is a xmm register */
240 r = ((rex & 1) << 3) | (modrm & 7);
241 inst->op2 = (sseoperand_t *)&uap->uc_mcontext.fpregs.
242 fp_reg_set.fpchip_state.xmm[r];
243 }
244 } else if ((modrm & 0xc7) == 0x05) {
245 #ifdef __amd64
246 /* address of next instruction + offset */
247 r = i + 4;
248 if (inst->op == cmpss || inst->op == cmpps ||
249 inst->op == cmpsd || inst->op == cmppd)
250 r++;
251 inst->op2 = (sseoperand_t *)(ip + r + *(int *)(ip + i));
252 #else
253 /* absolute address */
254 inst->op2 = (sseoperand_t *)(*(int *)(ip + i));
255 #endif
256 i += 4;
257 } else {
258 /* complex address */
259 if ((modrm & 7) == 4) {
260 /* parse sib byte */
261 sib = ip[i++];
262 if ((sib & 7) == 5 && (modrm >> 6) == 0) {
263 /* start with absolute address */
264 addr = (char *)(uintptr_t)(*(int *)(ip + i));
265 i += 4;
266 } else {
267 /* start with base */
268 r = ((rex & 1) << 3) | (sib & 7);
269 addr = (char *)uap->uc_mcontext.gregs[regno(r)];
270 }
271 r = ((rex & 2) << 2) | ((sib >> 3) & 7);
272 if (r != 4) {
273 /* add scaled index */
274 addr += uap->uc_mcontext.gregs[regno(r)]
275 << (sib >> 6);
276 }
277 } else {
278 r = ((rex & 1) << 3) | (modrm & 7);
279 addr = (char *)uap->uc_mcontext.gregs[regno(r)];
280 }
281
282 /* add displacement, if any */
283 if ((modrm >> 6) == 1) {
284 addr += (char)ip[i++];
285 } else if ((modrm >> 6) == 2) {
286 addr += *(int *)(ip + i);
287 i += 4;
288 }
289 inst->op2 = (sseoperand_t *)addr;
290 }
291
292 if (inst->op == cmpss || inst->op == cmpps || inst->op == cmpsd ||
293 inst->op == cmppd) {
294 /* get the immediate operand */
295 inst->imm = ip[i++];
296 }
297
298 return i;
299 }
300
301 static enum fp_class_type
my_fp_classf(float * x)302 my_fp_classf(float *x)
303 {
304 int i = *(int *)x & ~0x80000000;
305
306 if (i < 0x7f800000) {
307 if (i < 0x00800000)
308 return ((i == 0)? fp_zero : fp_subnormal);
309 return fp_normal;
310 }
311 else if (i == 0x7f800000)
312 return fp_infinity;
313 else if (i & 0x400000)
314 return fp_quiet;
315 else
316 return fp_signaling;
317 }
318
319 static enum fp_class_type
my_fp_class(double * x)320 my_fp_class(double *x)
321 {
322 int i = *(1+(int *)x) & ~0x80000000;
323
324 if (i < 0x7ff00000) {
325 if (i < 0x00100000)
326 return (((i | *(int *)x) == 0)? fp_zero : fp_subnormal);
327 return fp_normal;
328 }
329 else if (i == 0x7ff00000 && *(int *)x == 0)
330 return fp_infinity;
331 else if (i & 0x80000)
332 return fp_quiet;
333 else
334 return fp_signaling;
335 }
336
337 /*
338 * Inspect a scalar SSE instruction that incurred an invalid operation
339 * exception to determine which type of exception it was.
340 */
341 static enum fex_exception
__fex_get_sse_invalid_type(sseinst_t * inst)342 __fex_get_sse_invalid_type(sseinst_t *inst)
343 {
344 enum fp_class_type t1, t2;
345
346 /* check op2 for signaling nan */
347 t2 = ((int)inst->op & DOUBLE)? my_fp_class(&inst->op2->d[0]) :
348 my_fp_classf(&inst->op2->f[0]);
349 if (t2 == fp_signaling)
350 return fex_inv_snan;
351
352 /* eliminate all single-operand instructions */
353 switch (inst->op) {
354 case cvtsd2ss:
355 case cvtss2sd:
356 /* hmm, this shouldn't have happened */
357 return (enum fex_exception) -1;
358
359 case sqrtss:
360 case sqrtsd:
361 return fex_inv_sqrt;
362
363 case cvtss2si:
364 case cvtsd2si:
365 case cvttss2si:
366 case cvttsd2si:
367 case cvtss2siq:
368 case cvtsd2siq:
369 case cvttss2siq:
370 case cvttsd2siq:
371 return fex_inv_int;
372 default:
373 break;
374 }
375
376 /* check op1 for signaling nan */
377 t1 = ((int)inst->op & DOUBLE)? my_fp_class(&inst->op1->d[0]) :
378 my_fp_classf(&inst->op1->f[0]);
379 if (t1 == fp_signaling)
380 return fex_inv_snan;
381
382 /* check two-operand instructions for other cases */
383 switch (inst->op) {
384 case cmpss:
385 case cmpsd:
386 case minss:
387 case minsd:
388 case maxss:
389 case maxsd:
390 case comiss:
391 case comisd:
392 return fex_inv_cmp;
393
394 case addss:
395 case addsd:
396 case subss:
397 case subsd:
398 if (t1 == fp_infinity && t2 == fp_infinity)
399 return fex_inv_isi;
400 break;
401
402 case mulss:
403 case mulsd:
404 if ((t1 == fp_zero && t2 == fp_infinity) ||
405 (t2 == fp_zero && t1 == fp_infinity))
406 return fex_inv_zmi;
407 break;
408
409 case divss:
410 case divsd:
411 if (t1 == fp_zero && t2 == fp_zero)
412 return fex_inv_zdz;
413 if (t1 == fp_infinity && t2 == fp_infinity)
414 return fex_inv_idi;
415 default:
416 break;
417 }
418
419 return (enum fex_exception)-1;
420 }
421
422 /* inline templates */
423 extern void sse_cmpeqss(float *, float *, int *);
424 extern void sse_cmpltss(float *, float *, int *);
425 extern void sse_cmpless(float *, float *, int *);
426 extern void sse_cmpunordss(float *, float *, int *);
427 extern void sse_minss(float *, float *, float *);
428 extern void sse_maxss(float *, float *, float *);
429 extern void sse_addss(float *, float *, float *);
430 extern void sse_subss(float *, float *, float *);
431 extern void sse_mulss(float *, float *, float *);
432 extern void sse_divss(float *, float *, float *);
433 extern void sse_sqrtss(float *, float *);
434 extern void sse_ucomiss(float *, float *);
435 extern void sse_comiss(float *, float *);
436 extern void sse_cvtss2sd(float *, double *);
437 extern void sse_cvtsi2ss(int *, float *);
438 extern void sse_cvttss2si(float *, int *);
439 extern void sse_cvtss2si(float *, int *);
440 #ifdef __amd64
441 extern void sse_cvtsi2ssq(long long *, float *);
442 extern void sse_cvttss2siq(float *, long long *);
443 extern void sse_cvtss2siq(float *, long long *);
444 #endif
445 extern void sse_cmpeqsd(double *, double *, long long *);
446 extern void sse_cmpltsd(double *, double *, long long *);
447 extern void sse_cmplesd(double *, double *, long long *);
448 extern void sse_cmpunordsd(double *, double *, long long *);
449 extern void sse_minsd(double *, double *, double *);
450 extern void sse_maxsd(double *, double *, double *);
451 extern void sse_addsd(double *, double *, double *);
452 extern void sse_subsd(double *, double *, double *);
453 extern void sse_mulsd(double *, double *, double *);
454 extern void sse_divsd(double *, double *, double *);
455 extern void sse_sqrtsd(double *, double *);
456 extern void sse_ucomisd(double *, double *);
457 extern void sse_comisd(double *, double *);
458 extern void sse_cvtsd2ss(double *, float *);
459 extern void sse_cvtsi2sd(int *, double *);
460 extern void sse_cvttsd2si(double *, int *);
461 extern void sse_cvtsd2si(double *, int *);
462 #ifdef __amd64
463 extern void sse_cvtsi2sdq(long long *, double *);
464 extern void sse_cvttsd2siq(double *, long long *);
465 extern void sse_cvtsd2siq(double *, long long *);
466 #endif
467
468 /*
469 * Fill in *info with the operands, default untrapped result, and
470 * flags produced by a scalar SSE instruction, and return the type
471 * of trapped exception (if any). On entry, the mxcsr must have
472 * all exceptions masked and all flags clear. The same conditions
473 * will hold on exit.
474 *
475 * This routine does not work if the instruction specified by *inst
476 * is not a scalar instruction.
477 */
478 enum fex_exception
__fex_get_sse_op(ucontext_t * uap,sseinst_t * inst,fex_info_t * info)479 __fex_get_sse_op(ucontext_t *uap, sseinst_t *inst, fex_info_t *info)
480 {
481 unsigned int e, te, mxcsr, oldmxcsr, subnorm;
482
483 /*
484 * Perform the operation with traps disabled and check the
485 * exception flags. If the underflow trap was enabled, also
486 * check for an exact subnormal result.
487 */
488 __fenv_getmxcsr(&oldmxcsr);
489 subnorm = 0;
490 if ((int)inst->op & DOUBLE) {
491 if (inst->op == cvtsi2sd) {
492 info->op1.type = fex_int;
493 info->op1.val.i = inst->op2->i[0];
494 info->op2.type = fex_nodata;
495 } else if (inst->op == cvtsi2sdq) {
496 info->op1.type = fex_llong;
497 info->op1.val.l = inst->op2->l[0];
498 info->op2.type = fex_nodata;
499 } else if (inst->op == sqrtsd || inst->op == cvtsd2ss ||
500 inst->op == cvttsd2si || inst->op == cvtsd2si ||
501 inst->op == cvttsd2siq || inst->op == cvtsd2siq) {
502 info->op1.type = fex_double;
503 info->op1.val.d = inst->op2->d[0];
504 info->op2.type = fex_nodata;
505 } else {
506 info->op1.type = fex_double;
507 info->op1.val.d = inst->op1->d[0];
508 info->op2.type = fex_double;
509 info->op2.val.d = inst->op2->d[0];
510 }
511 info->res.type = fex_double;
512 switch (inst->op) {
513 case cmpsd:
514 info->op = fex_cmp;
515 info->res.type = fex_llong;
516 switch (inst->imm & 3) {
517 case 0:
518 sse_cmpeqsd(&info->op1.val.d, &info->op2.val.d,
519 &info->res.val.l);
520 break;
521
522 case 1:
523 sse_cmpltsd(&info->op1.val.d, &info->op2.val.d,
524 &info->res.val.l);
525 break;
526
527 case 2:
528 sse_cmplesd(&info->op1.val.d, &info->op2.val.d,
529 &info->res.val.l);
530 break;
531
532 case 3:
533 sse_cmpunordsd(&info->op1.val.d,
534 &info->op2.val.d, &info->res.val.l);
535 }
536 if (inst->imm & 4)
537 info->res.val.l ^= 0xffffffffffffffffull;
538 break;
539
540 case minsd:
541 info->op = fex_other;
542 sse_minsd(&info->op1.val.d, &info->op2.val.d,
543 &info->res.val.d);
544 break;
545
546 case maxsd:
547 info->op = fex_other;
548 sse_maxsd(&info->op1.val.d, &info->op2.val.d,
549 &info->res.val.d);
550 break;
551
552 case addsd:
553 info->op = fex_add;
554 sse_addsd(&info->op1.val.d, &info->op2.val.d,
555 &info->res.val.d);
556 if (my_fp_class(&info->res.val.d) == fp_subnormal)
557 subnorm = 1;
558 break;
559
560 case subsd:
561 info->op = fex_sub;
562 sse_subsd(&info->op1.val.d, &info->op2.val.d,
563 &info->res.val.d);
564 if (my_fp_class(&info->res.val.d) == fp_subnormal)
565 subnorm = 1;
566 break;
567
568 case mulsd:
569 info->op = fex_mul;
570 sse_mulsd(&info->op1.val.d, &info->op2.val.d,
571 &info->res.val.d);
572 if (my_fp_class(&info->res.val.d) == fp_subnormal)
573 subnorm = 1;
574 break;
575
576 case divsd:
577 info->op = fex_div;
578 sse_divsd(&info->op1.val.d, &info->op2.val.d,
579 &info->res.val.d);
580 if (my_fp_class(&info->res.val.d) == fp_subnormal)
581 subnorm = 1;
582 break;
583
584 case sqrtsd:
585 info->op = fex_sqrt;
586 sse_sqrtsd(&info->op1.val.d, &info->res.val.d);
587 break;
588
589 case cvtsd2ss:
590 info->op = fex_cnvt;
591 info->res.type = fex_float;
592 sse_cvtsd2ss(&info->op1.val.d, &info->res.val.f);
593 if (my_fp_classf(&info->res.val.f) == fp_subnormal)
594 subnorm = 1;
595 break;
596
597 case cvtsi2sd:
598 info->op = fex_cnvt;
599 sse_cvtsi2sd(&info->op1.val.i, &info->res.val.d);
600 break;
601
602 case cvttsd2si:
603 info->op = fex_cnvt;
604 info->res.type = fex_int;
605 sse_cvttsd2si(&info->op1.val.d, &info->res.val.i);
606 break;
607
608 case cvtsd2si:
609 info->op = fex_cnvt;
610 info->res.type = fex_int;
611 sse_cvtsd2si(&info->op1.val.d, &info->res.val.i);
612 break;
613
614 #ifdef __amd64
615 case cvtsi2sdq:
616 info->op = fex_cnvt;
617 sse_cvtsi2sdq(&info->op1.val.l, &info->res.val.d);
618 break;
619
620 case cvttsd2siq:
621 info->op = fex_cnvt;
622 info->res.type = fex_llong;
623 sse_cvttsd2siq(&info->op1.val.d, &info->res.val.l);
624 break;
625
626 case cvtsd2siq:
627 info->op = fex_cnvt;
628 info->res.type = fex_llong;
629 sse_cvtsd2siq(&info->op1.val.d, &info->res.val.l);
630 break;
631 #endif
632
633 case ucomisd:
634 info->op = fex_cmp;
635 info->res.type = fex_nodata;
636 sse_ucomisd(&info->op1.val.d, &info->op2.val.d);
637 break;
638
639 case comisd:
640 info->op = fex_cmp;
641 info->res.type = fex_nodata;
642 sse_comisd(&info->op1.val.d, &info->op2.val.d);
643 break;
644 default:
645 break;
646 }
647 } else {
648 if (inst->op == cvtsi2ss) {
649 info->op1.type = fex_int;
650 info->op1.val.i = inst->op2->i[0];
651 info->op2.type = fex_nodata;
652 } else if (inst->op == cvtsi2ssq) {
653 info->op1.type = fex_llong;
654 info->op1.val.l = inst->op2->l[0];
655 info->op2.type = fex_nodata;
656 } else if (inst->op == sqrtss || inst->op == cvtss2sd ||
657 inst->op == cvttss2si || inst->op == cvtss2si ||
658 inst->op == cvttss2siq || inst->op == cvtss2siq) {
659 info->op1.type = fex_float;
660 info->op1.val.f = inst->op2->f[0];
661 info->op2.type = fex_nodata;
662 } else {
663 info->op1.type = fex_float;
664 info->op1.val.f = inst->op1->f[0];
665 info->op2.type = fex_float;
666 info->op2.val.f = inst->op2->f[0];
667 }
668 info->res.type = fex_float;
669 switch (inst->op) {
670 case cmpss:
671 info->op = fex_cmp;
672 info->res.type = fex_int;
673 switch (inst->imm & 3) {
674 case 0:
675 sse_cmpeqss(&info->op1.val.f, &info->op2.val.f,
676 &info->res.val.i);
677 break;
678
679 case 1:
680 sse_cmpltss(&info->op1.val.f, &info->op2.val.f,
681 &info->res.val.i);
682 break;
683
684 case 2:
685 sse_cmpless(&info->op1.val.f, &info->op2.val.f,
686 &info->res.val.i);
687 break;
688
689 case 3:
690 sse_cmpunordss(&info->op1.val.f,
691 &info->op2.val.f, &info->res.val.i);
692 }
693 if (inst->imm & 4)
694 info->res.val.i ^= 0xffffffffu;
695 break;
696
697 case minss:
698 info->op = fex_other;
699 sse_minss(&info->op1.val.f, &info->op2.val.f,
700 &info->res.val.f);
701 break;
702
703 case maxss:
704 info->op = fex_other;
705 sse_maxss(&info->op1.val.f, &info->op2.val.f,
706 &info->res.val.f);
707 break;
708
709 case addss:
710 info->op = fex_add;
711 sse_addss(&info->op1.val.f, &info->op2.val.f,
712 &info->res.val.f);
713 if (my_fp_classf(&info->res.val.f) == fp_subnormal)
714 subnorm = 1;
715 break;
716
717 case subss:
718 info->op = fex_sub;
719 sse_subss(&info->op1.val.f, &info->op2.val.f,
720 &info->res.val.f);
721 if (my_fp_classf(&info->res.val.f) == fp_subnormal)
722 subnorm = 1;
723 break;
724
725 case mulss:
726 info->op = fex_mul;
727 sse_mulss(&info->op1.val.f, &info->op2.val.f,
728 &info->res.val.f);
729 if (my_fp_classf(&info->res.val.f) == fp_subnormal)
730 subnorm = 1;
731 break;
732
733 case divss:
734 info->op = fex_div;
735 sse_divss(&info->op1.val.f, &info->op2.val.f,
736 &info->res.val.f);
737 if (my_fp_classf(&info->res.val.f) == fp_subnormal)
738 subnorm = 1;
739 break;
740
741 case sqrtss:
742 info->op = fex_sqrt;
743 sse_sqrtss(&info->op1.val.f, &info->res.val.f);
744 break;
745
746 case cvtss2sd:
747 info->op = fex_cnvt;
748 info->res.type = fex_double;
749 sse_cvtss2sd(&info->op1.val.f, &info->res.val.d);
750 break;
751
752 case cvtsi2ss:
753 info->op = fex_cnvt;
754 sse_cvtsi2ss(&info->op1.val.i, &info->res.val.f);
755 break;
756
757 case cvttss2si:
758 info->op = fex_cnvt;
759 info->res.type = fex_int;
760 sse_cvttss2si(&info->op1.val.f, &info->res.val.i);
761 break;
762
763 case cvtss2si:
764 info->op = fex_cnvt;
765 info->res.type = fex_int;
766 sse_cvtss2si(&info->op1.val.f, &info->res.val.i);
767 break;
768
769 #ifdef __amd64
770 case cvtsi2ssq:
771 info->op = fex_cnvt;
772 sse_cvtsi2ssq(&info->op1.val.l, &info->res.val.f);
773 break;
774
775 case cvttss2siq:
776 info->op = fex_cnvt;
777 info->res.type = fex_llong;
778 sse_cvttss2siq(&info->op1.val.f, &info->res.val.l);
779 break;
780
781 case cvtss2siq:
782 info->op = fex_cnvt;
783 info->res.type = fex_llong;
784 sse_cvtss2siq(&info->op1.val.f, &info->res.val.l);
785 break;
786 #endif
787
788 case ucomiss:
789 info->op = fex_cmp;
790 info->res.type = fex_nodata;
791 sse_ucomiss(&info->op1.val.f, &info->op2.val.f);
792 break;
793
794 case comiss:
795 info->op = fex_cmp;
796 info->res.type = fex_nodata;
797 sse_comiss(&info->op1.val.f, &info->op2.val.f);
798 break;
799 default:
800 break;
801 }
802 }
803 __fenv_getmxcsr(&mxcsr);
804 info->flags = mxcsr & 0x3d;
805 __fenv_setmxcsr(&oldmxcsr);
806
807 /* determine which exception would have been trapped */
808 te = ~(uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.mxcsr
809 >> 7) & 0x3d;
810 e = mxcsr & te;
811 if (e & FE_INVALID)
812 return __fex_get_sse_invalid_type(inst);
813 if (e & FE_DIVBYZERO)
814 return fex_division;
815 if (e & FE_OVERFLOW)
816 return fex_overflow;
817 if ((e & FE_UNDERFLOW) || (subnorm && (te & FE_UNDERFLOW)))
818 return fex_underflow;
819 if (e & FE_INEXACT)
820 return fex_inexact;
821 return (enum fex_exception)-1;
822 }
823
824 /*
825 * Emulate a SIMD SSE instruction to determine which exceptions occur
826 * in each part. For i = 0, 1, 2, and 3, set e[i] to indicate the
827 * trapped exception that would occur if the i-th part of the SIMD
828 * instruction were executed in isolation; set e[i] to -1 if no
829 * trapped exception would occur in this part. Also fill in info[i]
830 * with the corresponding operands, default untrapped result, and
831 * flags.
832 *
833 * This routine does not work if the instruction specified by *inst
834 * is not a SIMD instruction.
835 */
836 void
__fex_get_simd_op(ucontext_t * uap,sseinst_t * inst,enum fex_exception * e,fex_info_t * info)837 __fex_get_simd_op(ucontext_t *uap, sseinst_t *inst, enum fex_exception *e,
838 fex_info_t *info)
839 {
840 sseinst_t dummy;
841 int i;
842
843 e[0] = e[1] = e[2] = e[3] = -1;
844
845 /* perform each part of the SIMD operation */
846 switch (inst->op) {
847 case cmpps:
848 dummy.op = cmpss;
849 dummy.imm = inst->imm;
850 for (i = 0; i < 4; i++) {
851 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
852 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
853 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
854 }
855 break;
856
857 case minps:
858 dummy.op = minss;
859 for (i = 0; i < 4; i++) {
860 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
861 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
862 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
863 }
864 break;
865
866 case maxps:
867 dummy.op = maxss;
868 for (i = 0; i < 4; i++) {
869 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
870 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
871 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
872 }
873 break;
874
875 case addps:
876 dummy.op = addss;
877 for (i = 0; i < 4; i++) {
878 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
879 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
880 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
881 }
882 break;
883
884 case subps:
885 dummy.op = subss;
886 for (i = 0; i < 4; i++) {
887 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
888 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
889 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
890 }
891 break;
892
893 case mulps:
894 dummy.op = mulss;
895 for (i = 0; i < 4; i++) {
896 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
897 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
898 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
899 }
900 break;
901
902 case divps:
903 dummy.op = divss;
904 for (i = 0; i < 4; i++) {
905 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
906 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
907 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
908 }
909 break;
910
911 case sqrtps:
912 dummy.op = sqrtss;
913 for (i = 0; i < 4; i++) {
914 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
915 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
916 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
917 }
918 break;
919
920 case cvtdq2ps:
921 dummy.op = cvtsi2ss;
922 for (i = 0; i < 4; i++) {
923 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
924 dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
925 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
926 }
927 break;
928
929 case cvttps2dq:
930 dummy.op = cvttss2si;
931 for (i = 0; i < 4; i++) {
932 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
933 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
934 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
935 }
936 break;
937
938 case cvtps2dq:
939 dummy.op = cvtss2si;
940 for (i = 0; i < 4; i++) {
941 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
942 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
943 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
944 }
945 break;
946
947 case cvtpi2ps:
948 dummy.op = cvtsi2ss;
949 for (i = 0; i < 2; i++) {
950 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
951 dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
952 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
953 }
954 break;
955
956 case cvttps2pi:
957 dummy.op = cvttss2si;
958 for (i = 0; i < 2; i++) {
959 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
960 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
961 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
962 }
963 break;
964
965 case cvtps2pi:
966 dummy.op = cvtss2si;
967 for (i = 0; i < 2; i++) {
968 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
969 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
970 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
971 }
972 break;
973
974 case cmppd:
975 dummy.op = cmpsd;
976 dummy.imm = inst->imm;
977 for (i = 0; i < 2; i++) {
978 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
979 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
980 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
981 }
982 break;
983
984 case minpd:
985 dummy.op = minsd;
986 for (i = 0; i < 2; i++) {
987 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
988 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
989 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
990 }
991 break;
992
993 case maxpd:
994 dummy.op = maxsd;
995 for (i = 0; i < 2; i++) {
996 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
997 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
998 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
999 }
1000 break;
1001
1002 case addpd:
1003 dummy.op = addsd;
1004 for (i = 0; i < 2; i++) {
1005 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1006 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1007 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1008 }
1009 break;
1010
1011 case subpd:
1012 dummy.op = subsd;
1013 for (i = 0; i < 2; i++) {
1014 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1015 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1016 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1017 }
1018 break;
1019
1020 case mulpd:
1021 dummy.op = mulsd;
1022 for (i = 0; i < 2; i++) {
1023 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1024 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1025 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1026 }
1027 break;
1028
1029 case divpd:
1030 dummy.op = divsd;
1031 for (i = 0; i < 2; i++) {
1032 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1033 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1034 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1035 }
1036 break;
1037
1038 case sqrtpd:
1039 dummy.op = sqrtsd;
1040 for (i = 0; i < 2; i++) {
1041 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1042 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1043 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1044 }
1045 break;
1046
1047 case cvtpi2pd:
1048 case cvtdq2pd:
1049 dummy.op = cvtsi2sd;
1050 for (i = 0; i < 2; i++) {
1051 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1052 dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
1053 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1054 }
1055 break;
1056
1057 case cvttpd2pi:
1058 case cvttpd2dq:
1059 dummy.op = cvttsd2si;
1060 for (i = 0; i < 2; i++) {
1061 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1062 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1063 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1064 }
1065 break;
1066
1067 case cvtpd2pi:
1068 case cvtpd2dq:
1069 dummy.op = cvtsd2si;
1070 for (i = 0; i < 2; i++) {
1071 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1072 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1073 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1074 }
1075 break;
1076
1077 case cvtps2pd:
1078 dummy.op = cvtss2sd;
1079 for (i = 0; i < 2; i++) {
1080 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1081 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1082 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1083 }
1084 break;
1085
1086 case cvtpd2ps:
1087 dummy.op = cvtsd2ss;
1088 for (i = 0; i < 2; i++) {
1089 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1090 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1091 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1092 }
1093 default:
1094 break;
1095 }
1096 }
1097
1098 /*
1099 * Store the result value from *info in the destination of the scalar
1100 * SSE instruction specified by *inst. If no result is given but the
1101 * exception is underflow or overflow, supply the default trapped result.
1102 *
1103 * This routine does not work if the instruction specified by *inst
1104 * is not a scalar instruction.
1105 */
1106 void
__fex_st_sse_result(ucontext_t * uap,sseinst_t * inst,enum fex_exception e,fex_info_t * info)1107 __fex_st_sse_result(ucontext_t *uap, sseinst_t *inst, enum fex_exception e,
1108 fex_info_t *info)
1109 {
1110 int i = 0;
1111 long long l = 0L;;
1112 float f = 0.0, fscl;
1113 double d = 0.0L, dscl;
1114
1115 /* for compares that write eflags, just set the flags
1116 to indicate "unordered" */
1117 if (inst->op == ucomiss || inst->op == comiss ||
1118 inst->op == ucomisd || inst->op == comisd) {
1119 uap->uc_mcontext.gregs[REG_PS] |= 0x45;
1120 return;
1121 }
1122
1123 /* if info doesn't specify a result value, try to generate
1124 the default trapped result */
1125 if (info->res.type == fex_nodata) {
1126 /* set scale factors for exponent wrapping */
1127 switch (e) {
1128 case fex_overflow:
1129 fscl = 1.262177448e-29f; /* 2^-96 */
1130 dscl = 6.441148769597133308e-232; /* 2^-768 */
1131 break;
1132
1133 case fex_underflow:
1134 fscl = 7.922816251e+28f; /* 2^96 */
1135 dscl = 1.552518092300708935e+231; /* 2^768 */
1136 break;
1137
1138 default:
1139 (void) __fex_get_sse_op(uap, inst, info);
1140 if (info->res.type == fex_nodata)
1141 return;
1142 goto stuff;
1143 }
1144
1145 /* generate the wrapped result */
1146 if (inst->op == cvtsd2ss) {
1147 info->op1.type = fex_double;
1148 info->op1.val.d = inst->op2->d[0];
1149 info->op2.type = fex_nodata;
1150 info->res.type = fex_float;
1151 info->res.val.f = (float)(fscl * (fscl *
1152 info->op1.val.d));
1153 } else if ((int)inst->op & DOUBLE) {
1154 info->op1.type = fex_double;
1155 info->op1.val.d = inst->op1->d[0];
1156 info->op2.type = fex_double;
1157 info->op2.val.d = inst->op2->d[0];
1158 info->res.type = fex_double;
1159 switch (inst->op) {
1160 case addsd:
1161 info->res.val.d = dscl * (dscl *
1162 info->op1.val.d + dscl * info->op2.val.d);
1163 break;
1164
1165 case subsd:
1166 info->res.val.d = dscl * (dscl *
1167 info->op1.val.d - dscl * info->op2.val.d);
1168 break;
1169
1170 case mulsd:
1171 info->res.val.d = (dscl * info->op1.val.d) *
1172 (dscl * info->op2.val.d);
1173 break;
1174
1175 case divsd:
1176 info->res.val.d = (dscl * info->op1.val.d) /
1177 (info->op2.val.d / dscl);
1178 break;
1179
1180 default:
1181 return;
1182 }
1183 } else {
1184 info->op1.type = fex_float;
1185 info->op1.val.f = inst->op1->f[0];
1186 info->op2.type = fex_float;
1187 info->op2.val.f = inst->op2->f[0];
1188 info->res.type = fex_float;
1189 switch (inst->op) {
1190 case addss:
1191 info->res.val.f = fscl * (fscl *
1192 info->op1.val.f + fscl * info->op2.val.f);
1193 break;
1194
1195 case subss:
1196 info->res.val.f = fscl * (fscl *
1197 info->op1.val.f - fscl * info->op2.val.f);
1198 break;
1199
1200 case mulss:
1201 info->res.val.f = (fscl * info->op1.val.f) *
1202 (fscl * info->op2.val.f);
1203 break;
1204
1205 case divss:
1206 info->res.val.f = (fscl * info->op1.val.f) /
1207 (info->op2.val.f / fscl);
1208 break;
1209
1210 default:
1211 return;
1212 }
1213 }
1214 }
1215
1216 /* put the result in the destination */
1217 stuff:
1218 if (inst->op == cmpss || inst->op == cvttss2si || inst->op == cvtss2si
1219 || inst->op == cvttsd2si || inst->op == cvtsd2si) {
1220 switch (info->res.type) {
1221 case fex_int:
1222 i = info->res.val.i;
1223 break;
1224
1225 case fex_llong:
1226 i = info->res.val.l;
1227 break;
1228
1229 case fex_float:
1230 i = info->res.val.f;
1231 break;
1232
1233 case fex_double:
1234 i = info->res.val.d;
1235 break;
1236
1237 case fex_ldouble:
1238 i = info->res.val.q;
1239 break;
1240
1241 default:
1242 break;
1243 }
1244 inst->op1->i[0] = i;
1245 } else if (inst->op == cmpsd || inst->op == cvttss2siq ||
1246 inst->op == cvtss2siq || inst->op == cvttsd2siq ||
1247 inst->op == cvtsd2siq) {
1248 switch (info->res.type) {
1249 case fex_int:
1250 l = info->res.val.i;
1251 break;
1252
1253 case fex_llong:
1254 l = info->res.val.l;
1255 break;
1256
1257 case fex_float:
1258 l = info->res.val.f;
1259 break;
1260
1261 case fex_double:
1262 l = info->res.val.d;
1263 break;
1264
1265 case fex_ldouble:
1266 l = info->res.val.q;
1267 break;
1268
1269 default:
1270 break;
1271 }
1272 inst->op1->l[0] = l;
1273 } else if ((((int)inst->op & DOUBLE) && inst->op != cvtsd2ss) ||
1274 inst->op == cvtss2sd) {
1275 switch (info->res.type) {
1276 case fex_int:
1277 d = info->res.val.i;
1278 break;
1279
1280 case fex_llong:
1281 d = info->res.val.l;
1282 break;
1283
1284 case fex_float:
1285 d = info->res.val.f;
1286 break;
1287
1288 case fex_double:
1289 d = info->res.val.d;
1290 break;
1291
1292 case fex_ldouble:
1293 d = info->res.val.q;
1294 break;
1295
1296 default:
1297 break;
1298 }
1299 inst->op1->d[0] = d;
1300 } else {
1301 switch (info->res.type) {
1302 case fex_int:
1303 f = info->res.val.i;
1304 break;
1305
1306 case fex_llong:
1307 f = info->res.val.l;
1308 break;
1309
1310 case fex_float:
1311 f = info->res.val.f;
1312 break;
1313
1314 case fex_double:
1315 f = info->res.val.d;
1316 break;
1317
1318 case fex_ldouble:
1319 f = info->res.val.q;
1320 break;
1321
1322 default:
1323 break;
1324 }
1325 inst->op1->f[0] = f;
1326 }
1327 }
1328
1329 /*
1330 * Store the results from a SIMD instruction. For each i, store
1331 * the result value from info[i] in the i-th part of the destination
1332 * of the SIMD SSE instruction specified by *inst. If no result
1333 * is given but the exception indicated by e[i] is underflow or
1334 * overflow, supply the default trapped result.
1335 *
1336 * This routine does not work if the instruction specified by *inst
1337 * is not a SIMD instruction.
1338 */
1339 void
__fex_st_simd_result(ucontext_t * uap,sseinst_t * inst,enum fex_exception * e,fex_info_t * info)1340 __fex_st_simd_result(ucontext_t *uap, sseinst_t *inst, enum fex_exception *e,
1341 fex_info_t *info)
1342 {
1343 sseinst_t dummy;
1344 int i;
1345
1346 /* store each part */
1347 switch (inst->op) {
1348 case cmpps:
1349 dummy.op = cmpss;
1350 dummy.imm = inst->imm;
1351 for (i = 0; i < 4; i++) {
1352 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1353 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1354 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1355 }
1356 break;
1357
1358 case minps:
1359 dummy.op = minss;
1360 for (i = 0; i < 4; i++) {
1361 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1362 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1363 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1364 }
1365 break;
1366
1367 case maxps:
1368 dummy.op = maxss;
1369 for (i = 0; i < 4; i++) {
1370 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1371 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1372 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1373 }
1374 break;
1375
1376 case addps:
1377 dummy.op = addss;
1378 for (i = 0; i < 4; i++) {
1379 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1380 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1381 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1382 }
1383 break;
1384
1385 case subps:
1386 dummy.op = subss;
1387 for (i = 0; i < 4; i++) {
1388 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1389 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1390 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1391 }
1392 break;
1393
1394 case mulps:
1395 dummy.op = mulss;
1396 for (i = 0; i < 4; i++) {
1397 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1398 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1399 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1400 }
1401 break;
1402
1403 case divps:
1404 dummy.op = divss;
1405 for (i = 0; i < 4; i++) {
1406 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1407 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1408 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1409 }
1410 break;
1411
1412 case sqrtps:
1413 dummy.op = sqrtss;
1414 for (i = 0; i < 4; i++) {
1415 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1416 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1417 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1418 }
1419 break;
1420
1421 case cvtdq2ps:
1422 dummy.op = cvtsi2ss;
1423 for (i = 0; i < 4; i++) {
1424 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1425 dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
1426 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1427 }
1428 break;
1429
1430 case cvttps2dq:
1431 dummy.op = cvttss2si;
1432 for (i = 0; i < 4; i++) {
1433 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1434 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1435 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1436 }
1437 break;
1438
1439 case cvtps2dq:
1440 dummy.op = cvtss2si;
1441 for (i = 0; i < 4; i++) {
1442 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1443 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1444 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1445 }
1446 break;
1447
1448 case cvtpi2ps:
1449 dummy.op = cvtsi2ss;
1450 for (i = 0; i < 2; i++) {
1451 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1452 dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
1453 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1454 }
1455 break;
1456
1457 case cvttps2pi:
1458 dummy.op = cvttss2si;
1459 for (i = 0; i < 2; i++) {
1460 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1461 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1462 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1463 }
1464 break;
1465
1466 case cvtps2pi:
1467 dummy.op = cvtss2si;
1468 for (i = 0; i < 2; i++) {
1469 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1470 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1471 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1472 }
1473 break;
1474
1475 case cmppd:
1476 dummy.op = cmpsd;
1477 dummy.imm = inst->imm;
1478 for (i = 0; i < 2; i++) {
1479 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1480 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1481 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1482 }
1483 break;
1484
1485 case minpd:
1486 dummy.op = minsd;
1487 for (i = 0; i < 2; i++) {
1488 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1489 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1490 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1491 }
1492 break;
1493
1494 case maxpd:
1495 dummy.op = maxsd;
1496 for (i = 0; i < 2; i++) {
1497 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1498 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1499 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1500 }
1501 break;
1502
1503 case addpd:
1504 dummy.op = addsd;
1505 for (i = 0; i < 2; i++) {
1506 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1507 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1508 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1509 }
1510 break;
1511
1512 case subpd:
1513 dummy.op = subsd;
1514 for (i = 0; i < 2; i++) {
1515 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1516 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1517 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1518 }
1519 break;
1520
1521 case mulpd:
1522 dummy.op = mulsd;
1523 for (i = 0; i < 2; i++) {
1524 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1525 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1526 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1527 }
1528 break;
1529
1530 case divpd:
1531 dummy.op = divsd;
1532 for (i = 0; i < 2; i++) {
1533 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1534 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1535 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1536 }
1537 break;
1538
1539 case sqrtpd:
1540 dummy.op = sqrtsd;
1541 for (i = 0; i < 2; i++) {
1542 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1543 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1544 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1545 }
1546 break;
1547
1548 case cvtpi2pd:
1549 case cvtdq2pd:
1550 dummy.op = cvtsi2sd;
1551 for (i = 0; i < 2; i++) {
1552 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1553 dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
1554 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1555 }
1556 break;
1557
1558 case cvttpd2pi:
1559 case cvttpd2dq:
1560 dummy.op = cvttsd2si;
1561 for (i = 0; i < 2; i++) {
1562 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1563 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1564 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1565 }
1566 /* for cvttpd2dq, zero the high 64 bits of the destination */
1567 if (inst->op == cvttpd2dq)
1568 inst->op1->l[1] = 0ll;
1569 break;
1570
1571 case cvtpd2pi:
1572 case cvtpd2dq:
1573 dummy.op = cvtsd2si;
1574 for (i = 0; i < 2; i++) {
1575 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1576 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1577 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1578 }
1579 /* for cvtpd2dq, zero the high 64 bits of the destination */
1580 if (inst->op == cvtpd2dq)
1581 inst->op1->l[1] = 0ll;
1582 break;
1583
1584 case cvtps2pd:
1585 dummy.op = cvtss2sd;
1586 for (i = 0; i < 2; i++) {
1587 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1588 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1589 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1590 }
1591 break;
1592
1593 case cvtpd2ps:
1594 dummy.op = cvtsd2ss;
1595 for (i = 0; i < 2; i++) {
1596 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1597 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1598 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1599 }
1600 /* zero the high 64 bits of the destination */
1601 inst->op1->l[1] = 0ll;
1602
1603 default:
1604 break;
1605 }
1606 }
1607
1608