xref: /linux/arch/parisc/math-emu/fmpyfadd.c (revision 75bf465f0bc33e9b776a46d6a1b9b990f5fb7c37)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Linux/PA-RISC Project (http://www.parisc-linux.org/)
4  *
5  * Floating-point emulation code
6  *  Copyright (C) 2001 Hewlett-Packard (Paul Bame) <bame@debian.org>
7  */
8 /*
9  * BEGIN_DESC
10  *
11  *  File:
12  *	@(#)	pa/spmath/fmpyfadd.c		$Revision: 1.1 $
13  *
14  *  Purpose:
15  *	Double Floating-point Multiply Fused Add
16  *	Double Floating-point Multiply Negate Fused Add
17  *	Single Floating-point Multiply Fused Add
18  *	Single Floating-point Multiply Negate Fused Add
19  *
20  *  External Interfaces:
21  *	dbl_fmpyfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
22  *	dbl_fmpynfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
23  *	sgl_fmpyfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
24  *	sgl_fmpynfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
25  *
26  *  Internal Interfaces:
27  *
28  *  Theory:
29  *	<<please update with a overview of the operation of this file>>
30  *
31  * END_DESC
32 */
33 
34 
35 #include "float.h"
36 #include "sgl_float.h"
37 #include "dbl_float.h"
38 
39 
40 /*
41  *  Double Floating-point Multiply Fused Add
42  */
43 
44 int
dbl_fmpyfadd(dbl_floating_point * src1ptr,dbl_floating_point * src2ptr,dbl_floating_point * src3ptr,unsigned int * status,dbl_floating_point * dstptr)45 dbl_fmpyfadd(
46 	    dbl_floating_point *src1ptr,
47 	    dbl_floating_point *src2ptr,
48 	    dbl_floating_point *src3ptr,
49 	    unsigned int *status,
50 	    dbl_floating_point *dstptr)
51 {
52 	unsigned int opnd1p1, opnd1p2, opnd2p1, opnd2p2, opnd3p1, opnd3p2;
53 	register unsigned int tmpresp1, tmpresp2, tmpresp3, tmpresp4;
54 	unsigned int rightp1, rightp2, rightp3, rightp4;
55 	unsigned int resultp1, resultp2 = 0, resultp3 = 0, resultp4 = 0;
56 	register int mpy_exponent, add_exponent, count;
57 	boolean inexact = FALSE, is_tiny = FALSE;
58 
59 	unsigned int signlessleft1, signlessright1, save;
60 	register int result_exponent, diff_exponent;
61 	int sign_save, jumpsize;
62 
63 	Dbl_copyfromptr(src1ptr,opnd1p1,opnd1p2);
64 	Dbl_copyfromptr(src2ptr,opnd2p1,opnd2p2);
65 	Dbl_copyfromptr(src3ptr,opnd3p1,opnd3p2);
66 
67 	/*
68 	 * set sign bit of result of multiply
69 	 */
70 	if (Dbl_sign(opnd1p1) ^ Dbl_sign(opnd2p1))
71 		Dbl_setnegativezerop1(resultp1);
72 	else Dbl_setzerop1(resultp1);
73 
74 	/*
75 	 * Generate multiply exponent
76 	 */
77 	mpy_exponent = Dbl_exponent(opnd1p1) + Dbl_exponent(opnd2p1) - DBL_BIAS;
78 
79 	/*
80 	 * check first operand for NaN's or infinity
81 	 */
82 	if (Dbl_isinfinity_exponent(opnd1p1)) {
83 		if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) {
84 			if (Dbl_isnotnan(opnd2p1,opnd2p2) &&
85 			    Dbl_isnotnan(opnd3p1,opnd3p2)) {
86 				if (Dbl_iszero_exponentmantissa(opnd2p1,opnd2p2)) {
87 					/*
88 					 * invalid since operands are infinity
89 					 * and zero
90 					 */
91 					if (Is_invalidtrap_enabled())
92 						return(OPC_2E_INVALIDEXCEPTION);
93 					Set_invalidflag();
94 					Dbl_makequietnan(resultp1,resultp2);
95 					Dbl_copytoptr(resultp1,resultp2,dstptr);
96 					return(NOEXCEPTION);
97 				}
98 				/*
99 				 * Check third operand for infinity with a
100 				 *  sign opposite of the multiply result
101 				 */
102 				if (Dbl_isinfinity(opnd3p1,opnd3p2) &&
103 				    (Dbl_sign(resultp1) ^ Dbl_sign(opnd3p1))) {
104 					/*
105 					 * invalid since attempting a magnitude
106 					 * subtraction of infinities
107 					 */
108 					if (Is_invalidtrap_enabled())
109 						return(OPC_2E_INVALIDEXCEPTION);
110 					Set_invalidflag();
111 					Dbl_makequietnan(resultp1,resultp2);
112 					Dbl_copytoptr(resultp1,resultp2,dstptr);
113 					return(NOEXCEPTION);
114 				}
115 
116 				/*
117 			 	 * return infinity
118 			 	 */
119 				Dbl_setinfinity_exponentmantissa(resultp1,resultp2);
120 				Dbl_copytoptr(resultp1,resultp2,dstptr);
121 				return(NOEXCEPTION);
122 			}
123 		}
124 		else {
125 			/*
126 		 	 * is NaN; signaling or quiet?
127 		 	 */
128 			if (Dbl_isone_signaling(opnd1p1)) {
129 				/* trap if INVALIDTRAP enabled */
130 				if (Is_invalidtrap_enabled())
131 			    		return(OPC_2E_INVALIDEXCEPTION);
132 				/* make NaN quiet */
133 				Set_invalidflag();
134 				Dbl_set_quiet(opnd1p1);
135 			}
136 			/*
137 			 * is second operand a signaling NaN?
138 			 */
139 			else if (Dbl_is_signalingnan(opnd2p1)) {
140 				/* trap if INVALIDTRAP enabled */
141 				if (Is_invalidtrap_enabled())
142 			    		return(OPC_2E_INVALIDEXCEPTION);
143 				/* make NaN quiet */
144 				Set_invalidflag();
145 				Dbl_set_quiet(opnd2p1);
146 				Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
147 				return(NOEXCEPTION);
148 			}
149 			/*
150 			 * is third operand a signaling NaN?
151 			 */
152 			else if (Dbl_is_signalingnan(opnd3p1)) {
153 				/* trap if INVALIDTRAP enabled */
154 				if (Is_invalidtrap_enabled())
155 			    		return(OPC_2E_INVALIDEXCEPTION);
156 				/* make NaN quiet */
157 				Set_invalidflag();
158 				Dbl_set_quiet(opnd3p1);
159 				Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
160 				return(NOEXCEPTION);
161 			}
162 			/*
163 		 	 * return quiet NaN
164 		 	 */
165 			Dbl_copytoptr(opnd1p1,opnd1p2,dstptr);
166 			return(NOEXCEPTION);
167 		}
168 	}
169 
170 	/*
171 	 * check second operand for NaN's or infinity
172 	 */
173 	if (Dbl_isinfinity_exponent(opnd2p1)) {
174 		if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) {
175 			if (Dbl_isnotnan(opnd3p1,opnd3p2)) {
176 				if (Dbl_iszero_exponentmantissa(opnd1p1,opnd1p2)) {
177 					/*
178 					 * invalid since multiply operands are
179 					 * zero & infinity
180 					 */
181 					if (Is_invalidtrap_enabled())
182 						return(OPC_2E_INVALIDEXCEPTION);
183 					Set_invalidflag();
184 					Dbl_makequietnan(opnd2p1,opnd2p2);
185 					Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
186 					return(NOEXCEPTION);
187 				}
188 
189 				/*
190 				 * Check third operand for infinity with a
191 				 *  sign opposite of the multiply result
192 				 */
193 				if (Dbl_isinfinity(opnd3p1,opnd3p2) &&
194 				    (Dbl_sign(resultp1) ^ Dbl_sign(opnd3p1))) {
195 					/*
196 					 * invalid since attempting a magnitude
197 					 * subtraction of infinities
198 					 */
199 					if (Is_invalidtrap_enabled())
200 				       		return(OPC_2E_INVALIDEXCEPTION);
201 				       	Set_invalidflag();
202 				       	Dbl_makequietnan(resultp1,resultp2);
203 					Dbl_copytoptr(resultp1,resultp2,dstptr);
204 					return(NOEXCEPTION);
205 				}
206 
207 				/*
208 				 * return infinity
209 				 */
210 				Dbl_setinfinity_exponentmantissa(resultp1,resultp2);
211 				Dbl_copytoptr(resultp1,resultp2,dstptr);
212 				return(NOEXCEPTION);
213 			}
214 		}
215 		else {
216 			/*
217 			 * is NaN; signaling or quiet?
218 			 */
219 			if (Dbl_isone_signaling(opnd2p1)) {
220 				/* trap if INVALIDTRAP enabled */
221 				if (Is_invalidtrap_enabled())
222 					return(OPC_2E_INVALIDEXCEPTION);
223 				/* make NaN quiet */
224 				Set_invalidflag();
225 				Dbl_set_quiet(opnd2p1);
226 			}
227 			/*
228 			 * is third operand a signaling NaN?
229 			 */
230 			else if (Dbl_is_signalingnan(opnd3p1)) {
231 			       	/* trap if INVALIDTRAP enabled */
232 			       	if (Is_invalidtrap_enabled())
233 				   		return(OPC_2E_INVALIDEXCEPTION);
234 			       	/* make NaN quiet */
235 			       	Set_invalidflag();
236 			       	Dbl_set_quiet(opnd3p1);
237 				Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
238 		       		return(NOEXCEPTION);
239 			}
240 			/*
241 			 * return quiet NaN
242 			 */
243 			Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
244 			return(NOEXCEPTION);
245 		}
246 	}
247 
248 	/*
249 	 * check third operand for NaN's or infinity
250 	 */
251 	if (Dbl_isinfinity_exponent(opnd3p1)) {
252 		if (Dbl_iszero_mantissa(opnd3p1,opnd3p2)) {
253 			/* return infinity */
254 			Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
255 			return(NOEXCEPTION);
256 		} else {
257 			/*
258 			 * is NaN; signaling or quiet?
259 			 */
260 			if (Dbl_isone_signaling(opnd3p1)) {
261 				/* trap if INVALIDTRAP enabled */
262 				if (Is_invalidtrap_enabled())
263 					return(OPC_2E_INVALIDEXCEPTION);
264 				/* make NaN quiet */
265 				Set_invalidflag();
266 				Dbl_set_quiet(opnd3p1);
267 			}
268 			/*
269 			 * return quiet NaN
270  			 */
271 			Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
272 			return(NOEXCEPTION);
273 		}
274     	}
275 
276 	/*
277 	 * Generate multiply mantissa
278 	 */
279 	if (Dbl_isnotzero_exponent(opnd1p1)) {
280 		/* set hidden bit */
281 		Dbl_clear_signexponent_set_hidden(opnd1p1);
282 	}
283 	else {
284 		/* check for zero */
285 		if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) {
286 			/*
287 			 * Perform the add opnd3 with zero here.
288 			 */
289 			if (Dbl_iszero_exponentmantissa(opnd3p1,opnd3p2)) {
290 				if (Is_rounding_mode(ROUNDMINUS)) {
291 					Dbl_or_signs(opnd3p1,resultp1);
292 				} else {
293 					Dbl_and_signs(opnd3p1,resultp1);
294 				}
295 			}
296 			/*
297 			 * Now let's check for trapped underflow case.
298 			 */
299 			else if (Dbl_iszero_exponent(opnd3p1) &&
300 			         Is_underflowtrap_enabled()) {
301                     		/* need to normalize results mantissa */
302                     		sign_save = Dbl_signextendedsign(opnd3p1);
303 				result_exponent = 0;
304                     		Dbl_leftshiftby1(opnd3p1,opnd3p2);
305                     		Dbl_normalize(opnd3p1,opnd3p2,result_exponent);
306                     		Dbl_set_sign(opnd3p1,/*using*/sign_save);
307                     		Dbl_setwrapped_exponent(opnd3p1,result_exponent,
308 							unfl);
309                     		Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
310                     		/* inexact = FALSE */
311                     		return(OPC_2E_UNDERFLOWEXCEPTION);
312 			}
313 			Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
314 			return(NOEXCEPTION);
315 		}
316 		/* is denormalized, adjust exponent */
317 		Dbl_clear_signexponent(opnd1p1);
318 		Dbl_leftshiftby1(opnd1p1,opnd1p2);
319 		Dbl_normalize(opnd1p1,opnd1p2,mpy_exponent);
320 	}
321 	/* opnd2 needs to have hidden bit set with msb in hidden bit */
322 	if (Dbl_isnotzero_exponent(opnd2p1)) {
323 		Dbl_clear_signexponent_set_hidden(opnd2p1);
324 	}
325 	else {
326 		/* check for zero */
327 		if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) {
328 			/*
329 			 * Perform the add opnd3 with zero here.
330 			 */
331 			if (Dbl_iszero_exponentmantissa(opnd3p1,opnd3p2)) {
332 				if (Is_rounding_mode(ROUNDMINUS)) {
333 					Dbl_or_signs(opnd3p1,resultp1);
334 				} else {
335 					Dbl_and_signs(opnd3p1,resultp1);
336 				}
337 			}
338 			/*
339 			 * Now let's check for trapped underflow case.
340 			 */
341 			else if (Dbl_iszero_exponent(opnd3p1) &&
342 			    Is_underflowtrap_enabled()) {
343                     		/* need to normalize results mantissa */
344                     		sign_save = Dbl_signextendedsign(opnd3p1);
345 				result_exponent = 0;
346                     		Dbl_leftshiftby1(opnd3p1,opnd3p2);
347                     		Dbl_normalize(opnd3p1,opnd3p2,result_exponent);
348                     		Dbl_set_sign(opnd3p1,/*using*/sign_save);
349                     		Dbl_setwrapped_exponent(opnd3p1,result_exponent,
350 							unfl);
351                     		Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
352                     		/* inexact = FALSE */
353 				return(OPC_2E_UNDERFLOWEXCEPTION);
354 			}
355 			Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
356 			return(NOEXCEPTION);
357 		}
358 		/* is denormalized; want to normalize */
359 		Dbl_clear_signexponent(opnd2p1);
360 		Dbl_leftshiftby1(opnd2p1,opnd2p2);
361 		Dbl_normalize(opnd2p1,opnd2p2,mpy_exponent);
362 	}
363 
364 	/* Multiply the first two source mantissas together */
365 
366 	/*
367 	 * The intermediate result will be kept in tmpres,
368 	 * which needs enough room for 106 bits of mantissa,
369 	 * so lets call it a Double extended.
370 	 */
371 	Dblext_setzero(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
372 
373 	/*
374 	 * Four bits at a time are inspected in each loop, and a
375 	 * simple shift and add multiply algorithm is used.
376 	 */
377 	for (count = DBL_P-1; count >= 0; count -= 4) {
378 		Dblext_rightshiftby4(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
379 		if (Dbit28p2(opnd1p2)) {
380 	 		/* Fourword_add should be an ADD followed by 3 ADDC's */
381 			Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
382 			 opnd2p1<<3 | opnd2p2>>29, opnd2p2<<3, 0, 0);
383 		}
384 		if (Dbit29p2(opnd1p2)) {
385 			Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
386 			 opnd2p1<<2 | opnd2p2>>30, opnd2p2<<2, 0, 0);
387 		}
388 		if (Dbit30p2(opnd1p2)) {
389 			Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
390 			 opnd2p1<<1 | opnd2p2>>31, opnd2p2<<1, 0, 0);
391 		}
392 		if (Dbit31p2(opnd1p2)) {
393 			Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
394 			 opnd2p1, opnd2p2, 0, 0);
395 		}
396 		Dbl_rightshiftby4(opnd1p1,opnd1p2);
397 	}
398 	if (Is_dexthiddenoverflow(tmpresp1)) {
399 		/* result mantissa >= 2 (mantissa overflow) */
400 		mpy_exponent++;
401 		Dblext_rightshiftby1(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
402 	}
403 
404 	/*
405 	 * Restore the sign of the mpy result which was saved in resultp1.
406 	 * The exponent will continue to be kept in mpy_exponent.
407 	 */
408 	Dblext_set_sign(tmpresp1,Dbl_sign(resultp1));
409 
410 	/*
411 	 * No rounding is required, since the result of the multiply
412 	 * is exact in the extended format.
413 	 */
414 
415 	/*
416 	 * Now we are ready to perform the add portion of the operation.
417 	 *
418 	 * The exponents need to be kept as integers for now, since the
419 	 * multiply result might not fit into the exponent field.  We
420 	 * can't overflow or underflow because of this yet, since the
421 	 * add could bring the final result back into range.
422 	 */
423 	add_exponent = Dbl_exponent(opnd3p1);
424 
425 	/*
426 	 * Check for denormalized or zero add operand.
427 	 */
428 	if (add_exponent == 0) {
429 		/* check for zero */
430 		if (Dbl_iszero_mantissa(opnd3p1,opnd3p2)) {
431 			/* right is zero */
432 			/* Left can't be zero and must be result.
433 			 *
434 			 * The final result is now in tmpres and mpy_exponent,
435 			 * and needs to be rounded and squeezed back into
436 			 * double precision format from double extended.
437 			 */
438 			result_exponent = mpy_exponent;
439 			Dblext_copy(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
440 				resultp1,resultp2,resultp3,resultp4);
441 			sign_save = Dbl_signextendedsign(resultp1);/*save sign*/
442 			goto round;
443 		}
444 
445 		/*
446 		 * Neither are zeroes.
447 		 * Adjust exponent and normalize add operand.
448 		 */
449 		sign_save = Dbl_signextendedsign(opnd3p1);	/* save sign */
450 		Dbl_clear_signexponent(opnd3p1);
451 		Dbl_leftshiftby1(opnd3p1,opnd3p2);
452 		Dbl_normalize(opnd3p1,opnd3p2,add_exponent);
453 		Dbl_set_sign(opnd3p1,sign_save);	/* restore sign */
454 	} else {
455 		Dbl_clear_exponent_set_hidden(opnd3p1);
456 	}
457 	/*
458 	 * Copy opnd3 to the double extended variable called right.
459 	 */
460 	Dbl_copyto_dblext(opnd3p1,opnd3p2,rightp1,rightp2,rightp3,rightp4);
461 
462 	/*
463 	 * A zero "save" helps discover equal operands (for later),
464 	 * and is used in swapping operands (if needed).
465 	 */
466 	Dblext_xortointp1(tmpresp1,rightp1,/*to*/save);
467 
468 	/*
469 	 * Compare magnitude of operands.
470 	 */
471 	Dblext_copytoint_exponentmantissap1(tmpresp1,signlessleft1);
472 	Dblext_copytoint_exponentmantissap1(rightp1,signlessright1);
473 	if (mpy_exponent < add_exponent || mpy_exponent == add_exponent &&
474 	    Dblext_ismagnitudeless(tmpresp2,rightp2,signlessleft1,signlessright1)){
475 		/*
476 		 * Set the left operand to the larger one by XOR swap.
477 		 * First finish the first word "save".
478 		 */
479 		Dblext_xorfromintp1(save,rightp1,/*to*/rightp1);
480 		Dblext_xorfromintp1(save,tmpresp1,/*to*/tmpresp1);
481 		Dblext_swap_lower(tmpresp2,tmpresp3,tmpresp4,
482 			rightp2,rightp3,rightp4);
483 		/* also setup exponents used in rest of routine */
484 		diff_exponent = add_exponent - mpy_exponent;
485 		result_exponent = add_exponent;
486 	} else {
487 		/* also setup exponents used in rest of routine */
488 		diff_exponent = mpy_exponent - add_exponent;
489 		result_exponent = mpy_exponent;
490 	}
491 	/* Invariant: left is not smaller than right. */
492 
493 	/*
494 	 * Special case alignment of operands that would force alignment
495 	 * beyond the extent of the extension.  A further optimization
496 	 * could special case this but only reduces the path length for
497 	 * this infrequent case.
498 	 */
499 	if (diff_exponent > DBLEXT_THRESHOLD) {
500 		diff_exponent = DBLEXT_THRESHOLD;
501 	}
502 
503 	/* Align right operand by shifting it to the right */
504 	Dblext_clear_sign(rightp1);
505 	Dblext_right_align(rightp1,rightp2,rightp3,rightp4,
506 		/*shifted by*/diff_exponent);
507 
508 	/* Treat sum and difference of the operands separately. */
509 	if ((int)save < 0) {
510 		/*
511 		 * Difference of the two operands.  Overflow can occur if the
512 		 * multiply overflowed.  A borrow can occur out of the hidden
513 		 * bit and force a post normalization phase.
514 		 */
515 		Dblext_subtract(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
516 			rightp1,rightp2,rightp3,rightp4,
517 			resultp1,resultp2,resultp3,resultp4);
518 		sign_save = Dbl_signextendedsign(resultp1);
519 		if (Dbl_iszero_hidden(resultp1)) {
520 			/* Handle normalization */
521 		/* A straightforward algorithm would now shift the
522 		 * result and extension left until the hidden bit
523 		 * becomes one.  Not all of the extension bits need
524 		 * participate in the shift.  Only the two most
525 		 * significant bits (round and guard) are needed.
526 		 * If only a single shift is needed then the guard
527 		 * bit becomes a significant low order bit and the
528 		 * extension must participate in the rounding.
529 		 * If more than a single shift is needed, then all
530 		 * bits to the right of the guard bit are zeros,
531 		 * and the guard bit may or may not be zero. */
532 			Dblext_leftshiftby1(resultp1,resultp2,resultp3,
533 				resultp4);
534 
535 			/* Need to check for a zero result.  The sign and
536 			 * exponent fields have already been zeroed.  The more
537 			 * efficient test of the full object can be used.
538 			 */
539 			 if(Dblext_iszero(resultp1,resultp2,resultp3,resultp4)){
540 				/* Must have been "x-x" or "x+(-x)". */
541 				if (Is_rounding_mode(ROUNDMINUS))
542 					Dbl_setone_sign(resultp1);
543 				Dbl_copytoptr(resultp1,resultp2,dstptr);
544 				return(NOEXCEPTION);
545 			}
546 			result_exponent--;
547 
548 			/* Look to see if normalization is finished. */
549 			if (Dbl_isone_hidden(resultp1)) {
550 				/* No further normalization is needed */
551 				goto round;
552 			}
553 
554 			/* Discover first one bit to determine shift amount.
555 			 * Use a modified binary search.  We have already
556 			 * shifted the result one position right and still
557 			 * not found a one so the remainder of the extension
558 			 * must be zero and simplifies rounding. */
559 			/* Scan bytes */
560 			while (Dbl_iszero_hiddenhigh7mantissa(resultp1)) {
561 				Dblext_leftshiftby8(resultp1,resultp2,resultp3,resultp4);
562 				result_exponent -= 8;
563 			}
564 			/* Now narrow it down to the nibble */
565 			if (Dbl_iszero_hiddenhigh3mantissa(resultp1)) {
566 				/* The lower nibble contains the
567 				 * normalizing one */
568 				Dblext_leftshiftby4(resultp1,resultp2,resultp3,resultp4);
569 				result_exponent -= 4;
570 			}
571 			/* Select case where first bit is set (already
572 			 * normalized) otherwise select the proper shift. */
573 			jumpsize = Dbl_hiddenhigh3mantissa(resultp1);
574 			if (jumpsize <= 7) switch(jumpsize) {
575 			case 1:
576 				Dblext_leftshiftby3(resultp1,resultp2,resultp3,
577 					resultp4);
578 				result_exponent -= 3;
579 				break;
580 			case 2:
581 			case 3:
582 				Dblext_leftshiftby2(resultp1,resultp2,resultp3,
583 					resultp4);
584 				result_exponent -= 2;
585 				break;
586 			case 4:
587 			case 5:
588 			case 6:
589 			case 7:
590 				Dblext_leftshiftby1(resultp1,resultp2,resultp3,
591 					resultp4);
592 				result_exponent -= 1;
593 				break;
594 			}
595 		} /* end if (hidden...)... */
596 	/* Fall through and round */
597 	} /* end if (save < 0)... */
598 	else {
599 		/* Add magnitudes */
600 		Dblext_addition(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
601 			rightp1,rightp2,rightp3,rightp4,
602 			/*to*/resultp1,resultp2,resultp3,resultp4);
603 		sign_save = Dbl_signextendedsign(resultp1);
604 		if (Dbl_isone_hiddenoverflow(resultp1)) {
605 	    		/* Prenormalization required. */
606 	    		Dblext_arithrightshiftby1(resultp1,resultp2,resultp3,
607 				resultp4);
608 	    		result_exponent++;
609 		} /* end if hiddenoverflow... */
610 	} /* end else ...add magnitudes... */
611 
612 	/* Round the result.  If the extension and lower two words are
613 	 * all zeros, then the result is exact.  Otherwise round in the
614 	 * correct direction.  Underflow is possible. If a postnormalization
615 	 * is necessary, then the mantissa is all zeros so no shift is needed.
616 	 */
617   round:
618 	if (result_exponent <= 0 && !Is_underflowtrap_enabled()) {
619 		Dblext_denormalize(resultp1,resultp2,resultp3,resultp4,
620 			result_exponent,is_tiny);
621 	}
622 	Dbl_set_sign(resultp1,/*using*/sign_save);
623 	if (Dblext_isnotzero_mantissap3(resultp3) ||
624 	    Dblext_isnotzero_mantissap4(resultp4)) {
625 		inexact = TRUE;
626 		switch(Rounding_mode()) {
627 		case ROUNDNEAREST: /* The default. */
628 			if (Dblext_isone_highp3(resultp3)) {
629 				/* at least 1/2 ulp */
630 				if (Dblext_isnotzero_low31p3(resultp3) ||
631 				    Dblext_isnotzero_mantissap4(resultp4) ||
632 				    Dblext_isone_lowp2(resultp2)) {
633 					/* either exactly half way and odd or
634 					 * more than 1/2ulp */
635 					Dbl_increment(resultp1,resultp2);
636 				}
637 			}
638 	    		break;
639 
640 		case ROUNDPLUS:
641 	    		if (Dbl_iszero_sign(resultp1)) {
642 				/* Round up positive results */
643 				Dbl_increment(resultp1,resultp2);
644 			}
645 			break;
646 
647 		case ROUNDMINUS:
648 	    		if (Dbl_isone_sign(resultp1)) {
649 				/* Round down negative results */
650 				Dbl_increment(resultp1,resultp2);
651 			}
652 
653 		case ROUNDZERO:;
654 			/* truncate is simple */
655 		} /* end switch... */
656 		if (Dbl_isone_hiddenoverflow(resultp1)) result_exponent++;
657 	}
658 	if (result_exponent >= DBL_INFINITY_EXPONENT) {
659                 /* trap if OVERFLOWTRAP enabled */
660                 if (Is_overflowtrap_enabled()) {
661                         /*
662                          * Adjust bias of result
663                          */
664                         Dbl_setwrapped_exponent(resultp1,result_exponent,ovfl);
665                         Dbl_copytoptr(resultp1,resultp2,dstptr);
666                         if (inexact)
667                             if (Is_inexacttrap_enabled())
668                                 return (OPC_2E_OVERFLOWEXCEPTION |
669 					OPC_2E_INEXACTEXCEPTION);
670                             else Set_inexactflag();
671                         return (OPC_2E_OVERFLOWEXCEPTION);
672                 }
673                 inexact = TRUE;
674                 Set_overflowflag();
675                 /* set result to infinity or largest number */
676                 Dbl_setoverflow(resultp1,resultp2);
677 
678 	} else if (result_exponent <= 0) {	/* underflow case */
679 		if (Is_underflowtrap_enabled()) {
680                         /*
681                          * Adjust bias of result
682                          */
683                 	Dbl_setwrapped_exponent(resultp1,result_exponent,unfl);
684 			Dbl_copytoptr(resultp1,resultp2,dstptr);
685                         if (inexact)
686                             if (Is_inexacttrap_enabled())
687                                 return (OPC_2E_UNDERFLOWEXCEPTION |
688 					OPC_2E_INEXACTEXCEPTION);
689                             else Set_inexactflag();
690 	    		return(OPC_2E_UNDERFLOWEXCEPTION);
691 		}
692 		else if (inexact && is_tiny) Set_underflowflag();
693 	}
694 	else Dbl_set_exponent(resultp1,result_exponent);
695 	Dbl_copytoptr(resultp1,resultp2,dstptr);
696 	if (inexact)
697 		if (Is_inexacttrap_enabled()) return(OPC_2E_INEXACTEXCEPTION);
698 		else Set_inexactflag();
699     	return(NOEXCEPTION);
700 }
701 
702 /*
703  *  Double Floating-point Multiply Negate Fused Add
704  */
705 
dbl_fmpynfadd(src1ptr,src2ptr,src3ptr,status,dstptr)706 dbl_fmpynfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
707 
708 dbl_floating_point *src1ptr, *src2ptr, *src3ptr, *dstptr;
709 unsigned int *status;
710 {
711 	unsigned int opnd1p1, opnd1p2, opnd2p1, opnd2p2, opnd3p1, opnd3p2;
712 	register unsigned int tmpresp1, tmpresp2, tmpresp3, tmpresp4;
713 	unsigned int rightp1, rightp2, rightp3, rightp4;
714 	unsigned int resultp1, resultp2 = 0, resultp3 = 0, resultp4 = 0;
715 	register int mpy_exponent, add_exponent, count;
716 	boolean inexact = FALSE, is_tiny = FALSE;
717 
718 	unsigned int signlessleft1, signlessright1, save;
719 	register int result_exponent, diff_exponent;
720 	int sign_save, jumpsize;
721 
722 	Dbl_copyfromptr(src1ptr,opnd1p1,opnd1p2);
723 	Dbl_copyfromptr(src2ptr,opnd2p1,opnd2p2);
724 	Dbl_copyfromptr(src3ptr,opnd3p1,opnd3p2);
725 
726 	/*
727 	 * set sign bit of result of multiply
728 	 */
729 	if (Dbl_sign(opnd1p1) ^ Dbl_sign(opnd2p1))
730 		Dbl_setzerop1(resultp1);
731 	else
732 		Dbl_setnegativezerop1(resultp1);
733 
734 	/*
735 	 * Generate multiply exponent
736 	 */
737 	mpy_exponent = Dbl_exponent(opnd1p1) + Dbl_exponent(opnd2p1) - DBL_BIAS;
738 
739 	/*
740 	 * check first operand for NaN's or infinity
741 	 */
742 	if (Dbl_isinfinity_exponent(opnd1p1)) {
743 		if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) {
744 			if (Dbl_isnotnan(opnd2p1,opnd2p2) &&
745 			    Dbl_isnotnan(opnd3p1,opnd3p2)) {
746 				if (Dbl_iszero_exponentmantissa(opnd2p1,opnd2p2)) {
747 					/*
748 					 * invalid since operands are infinity
749 					 * and zero
750 					 */
751 					if (Is_invalidtrap_enabled())
752 						return(OPC_2E_INVALIDEXCEPTION);
753 					Set_invalidflag();
754 					Dbl_makequietnan(resultp1,resultp2);
755 					Dbl_copytoptr(resultp1,resultp2,dstptr);
756 					return(NOEXCEPTION);
757 				}
758 				/*
759 				 * Check third operand for infinity with a
760 				 *  sign opposite of the multiply result
761 				 */
762 				if (Dbl_isinfinity(opnd3p1,opnd3p2) &&
763 				    (Dbl_sign(resultp1) ^ Dbl_sign(opnd3p1))) {
764 					/*
765 					 * invalid since attempting a magnitude
766 					 * subtraction of infinities
767 					 */
768 					if (Is_invalidtrap_enabled())
769 						return(OPC_2E_INVALIDEXCEPTION);
770 					Set_invalidflag();
771 					Dbl_makequietnan(resultp1,resultp2);
772 					Dbl_copytoptr(resultp1,resultp2,dstptr);
773 					return(NOEXCEPTION);
774 				}
775 
776 				/*
777 			 	 * return infinity
778 			 	 */
779 				Dbl_setinfinity_exponentmantissa(resultp1,resultp2);
780 				Dbl_copytoptr(resultp1,resultp2,dstptr);
781 				return(NOEXCEPTION);
782 			}
783 		}
784 		else {
785 			/*
786 		 	 * is NaN; signaling or quiet?
787 		 	 */
788 			if (Dbl_isone_signaling(opnd1p1)) {
789 				/* trap if INVALIDTRAP enabled */
790 				if (Is_invalidtrap_enabled())
791 			    		return(OPC_2E_INVALIDEXCEPTION);
792 				/* make NaN quiet */
793 				Set_invalidflag();
794 				Dbl_set_quiet(opnd1p1);
795 			}
796 			/*
797 			 * is second operand a signaling NaN?
798 			 */
799 			else if (Dbl_is_signalingnan(opnd2p1)) {
800 				/* trap if INVALIDTRAP enabled */
801 				if (Is_invalidtrap_enabled())
802 			    		return(OPC_2E_INVALIDEXCEPTION);
803 				/* make NaN quiet */
804 				Set_invalidflag();
805 				Dbl_set_quiet(opnd2p1);
806 				Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
807 				return(NOEXCEPTION);
808 			}
809 			/*
810 			 * is third operand a signaling NaN?
811 			 */
812 			else if (Dbl_is_signalingnan(opnd3p1)) {
813 				/* trap if INVALIDTRAP enabled */
814 				if (Is_invalidtrap_enabled())
815 			    		return(OPC_2E_INVALIDEXCEPTION);
816 				/* make NaN quiet */
817 				Set_invalidflag();
818 				Dbl_set_quiet(opnd3p1);
819 				Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
820 				return(NOEXCEPTION);
821 			}
822 			/*
823 		 	 * return quiet NaN
824 		 	 */
825 			Dbl_copytoptr(opnd1p1,opnd1p2,dstptr);
826 			return(NOEXCEPTION);
827 		}
828 	}
829 
830 	/*
831 	 * check second operand for NaN's or infinity
832 	 */
833 	if (Dbl_isinfinity_exponent(opnd2p1)) {
834 		if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) {
835 			if (Dbl_isnotnan(opnd3p1,opnd3p2)) {
836 				if (Dbl_iszero_exponentmantissa(opnd1p1,opnd1p2)) {
837 					/*
838 					 * invalid since multiply operands are
839 					 * zero & infinity
840 					 */
841 					if (Is_invalidtrap_enabled())
842 						return(OPC_2E_INVALIDEXCEPTION);
843 					Set_invalidflag();
844 					Dbl_makequietnan(opnd2p1,opnd2p2);
845 					Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
846 					return(NOEXCEPTION);
847 				}
848 
849 				/*
850 				 * Check third operand for infinity with a
851 				 *  sign opposite of the multiply result
852 				 */
853 				if (Dbl_isinfinity(opnd3p1,opnd3p2) &&
854 				    (Dbl_sign(resultp1) ^ Dbl_sign(opnd3p1))) {
855 					/*
856 					 * invalid since attempting a magnitude
857 					 * subtraction of infinities
858 					 */
859 					if (Is_invalidtrap_enabled())
860 				       		return(OPC_2E_INVALIDEXCEPTION);
861 				       	Set_invalidflag();
862 				       	Dbl_makequietnan(resultp1,resultp2);
863 					Dbl_copytoptr(resultp1,resultp2,dstptr);
864 					return(NOEXCEPTION);
865 				}
866 
867 				/*
868 				 * return infinity
869 				 */
870 				Dbl_setinfinity_exponentmantissa(resultp1,resultp2);
871 				Dbl_copytoptr(resultp1,resultp2,dstptr);
872 				return(NOEXCEPTION);
873 			}
874 		}
875 		else {
876 			/*
877 			 * is NaN; signaling or quiet?
878 			 */
879 			if (Dbl_isone_signaling(opnd2p1)) {
880 				/* trap if INVALIDTRAP enabled */
881 				if (Is_invalidtrap_enabled())
882 					return(OPC_2E_INVALIDEXCEPTION);
883 				/* make NaN quiet */
884 				Set_invalidflag();
885 				Dbl_set_quiet(opnd2p1);
886 			}
887 			/*
888 			 * is third operand a signaling NaN?
889 			 */
890 			else if (Dbl_is_signalingnan(opnd3p1)) {
891 			       	/* trap if INVALIDTRAP enabled */
892 			       	if (Is_invalidtrap_enabled())
893 				   		return(OPC_2E_INVALIDEXCEPTION);
894 			       	/* make NaN quiet */
895 			       	Set_invalidflag();
896 			       	Dbl_set_quiet(opnd3p1);
897 				Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
898 		       		return(NOEXCEPTION);
899 			}
900 			/*
901 			 * return quiet NaN
902 			 */
903 			Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
904 			return(NOEXCEPTION);
905 		}
906 	}
907 
908 	/*
909 	 * check third operand for NaN's or infinity
910 	 */
911 	if (Dbl_isinfinity_exponent(opnd3p1)) {
912 		if (Dbl_iszero_mantissa(opnd3p1,opnd3p2)) {
913 			/* return infinity */
914 			Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
915 			return(NOEXCEPTION);
916 		} else {
917 			/*
918 			 * is NaN; signaling or quiet?
919 			 */
920 			if (Dbl_isone_signaling(opnd3p1)) {
921 				/* trap if INVALIDTRAP enabled */
922 				if (Is_invalidtrap_enabled())
923 					return(OPC_2E_INVALIDEXCEPTION);
924 				/* make NaN quiet */
925 				Set_invalidflag();
926 				Dbl_set_quiet(opnd3p1);
927 			}
928 			/*
929 			 * return quiet NaN
930  			 */
931 			Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
932 			return(NOEXCEPTION);
933 		}
934     	}
935 
936 	/*
937 	 * Generate multiply mantissa
938 	 */
939 	if (Dbl_isnotzero_exponent(opnd1p1)) {
940 		/* set hidden bit */
941 		Dbl_clear_signexponent_set_hidden(opnd1p1);
942 	}
943 	else {
944 		/* check for zero */
945 		if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) {
946 			/*
947 			 * Perform the add opnd3 with zero here.
948 			 */
949 			if (Dbl_iszero_exponentmantissa(opnd3p1,opnd3p2)) {
950 				if (Is_rounding_mode(ROUNDMINUS)) {
951 					Dbl_or_signs(opnd3p1,resultp1);
952 				} else {
953 					Dbl_and_signs(opnd3p1,resultp1);
954 				}
955 			}
956 			/*
957 			 * Now let's check for trapped underflow case.
958 			 */
959 			else if (Dbl_iszero_exponent(opnd3p1) &&
960 			         Is_underflowtrap_enabled()) {
961                     		/* need to normalize results mantissa */
962                     		sign_save = Dbl_signextendedsign(opnd3p1);
963 				result_exponent = 0;
964                     		Dbl_leftshiftby1(opnd3p1,opnd3p2);
965                     		Dbl_normalize(opnd3p1,opnd3p2,result_exponent);
966                     		Dbl_set_sign(opnd3p1,/*using*/sign_save);
967                     		Dbl_setwrapped_exponent(opnd3p1,result_exponent,
968 							unfl);
969                     		Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
970                     		/* inexact = FALSE */
971                     		return(OPC_2E_UNDERFLOWEXCEPTION);
972 			}
973 			Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
974 			return(NOEXCEPTION);
975 		}
976 		/* is denormalized, adjust exponent */
977 		Dbl_clear_signexponent(opnd1p1);
978 		Dbl_leftshiftby1(opnd1p1,opnd1p2);
979 		Dbl_normalize(opnd1p1,opnd1p2,mpy_exponent);
980 	}
981 	/* opnd2 needs to have hidden bit set with msb in hidden bit */
982 	if (Dbl_isnotzero_exponent(opnd2p1)) {
983 		Dbl_clear_signexponent_set_hidden(opnd2p1);
984 	}
985 	else {
986 		/* check for zero */
987 		if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) {
988 			/*
989 			 * Perform the add opnd3 with zero here.
990 			 */
991 			if (Dbl_iszero_exponentmantissa(opnd3p1,opnd3p2)) {
992 				if (Is_rounding_mode(ROUNDMINUS)) {
993 					Dbl_or_signs(opnd3p1,resultp1);
994 				} else {
995 					Dbl_and_signs(opnd3p1,resultp1);
996 				}
997 			}
998 			/*
999 			 * Now let's check for trapped underflow case.
1000 			 */
1001 			else if (Dbl_iszero_exponent(opnd3p1) &&
1002 			    Is_underflowtrap_enabled()) {
1003                     		/* need to normalize results mantissa */
1004                     		sign_save = Dbl_signextendedsign(opnd3p1);
1005 				result_exponent = 0;
1006                     		Dbl_leftshiftby1(opnd3p1,opnd3p2);
1007                     		Dbl_normalize(opnd3p1,opnd3p2,result_exponent);
1008                     		Dbl_set_sign(opnd3p1,/*using*/sign_save);
1009                     		Dbl_setwrapped_exponent(opnd3p1,result_exponent,
1010 							unfl);
1011                     		Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
1012                     		/* inexact = FALSE */
1013                     		return(OPC_2E_UNDERFLOWEXCEPTION);
1014 			}
1015 			Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
1016 			return(NOEXCEPTION);
1017 		}
1018 		/* is denormalized; want to normalize */
1019 		Dbl_clear_signexponent(opnd2p1);
1020 		Dbl_leftshiftby1(opnd2p1,opnd2p2);
1021 		Dbl_normalize(opnd2p1,opnd2p2,mpy_exponent);
1022 	}
1023 
1024 	/* Multiply the first two source mantissas together */
1025 
1026 	/*
1027 	 * The intermediate result will be kept in tmpres,
1028 	 * which needs enough room for 106 bits of mantissa,
1029 	 * so lets call it a Double extended.
1030 	 */
1031 	Dblext_setzero(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
1032 
1033 	/*
1034 	 * Four bits at a time are inspected in each loop, and a
1035 	 * simple shift and add multiply algorithm is used.
1036 	 */
1037 	for (count = DBL_P-1; count >= 0; count -= 4) {
1038 		Dblext_rightshiftby4(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
1039 		if (Dbit28p2(opnd1p2)) {
1040 	 		/* Fourword_add should be an ADD followed by 3 ADDC's */
1041 			Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
1042 			 opnd2p1<<3 | opnd2p2>>29, opnd2p2<<3, 0, 0);
1043 		}
1044 		if (Dbit29p2(opnd1p2)) {
1045 			Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
1046 			 opnd2p1<<2 | opnd2p2>>30, opnd2p2<<2, 0, 0);
1047 		}
1048 		if (Dbit30p2(opnd1p2)) {
1049 			Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
1050 			 opnd2p1<<1 | opnd2p2>>31, opnd2p2<<1, 0, 0);
1051 		}
1052 		if (Dbit31p2(opnd1p2)) {
1053 			Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
1054 			 opnd2p1, opnd2p2, 0, 0);
1055 		}
1056 		Dbl_rightshiftby4(opnd1p1,opnd1p2);
1057 	}
1058 	if (Is_dexthiddenoverflow(tmpresp1)) {
1059 		/* result mantissa >= 2 (mantissa overflow) */
1060 		mpy_exponent++;
1061 		Dblext_rightshiftby1(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
1062 	}
1063 
1064 	/*
1065 	 * Restore the sign of the mpy result which was saved in resultp1.
1066 	 * The exponent will continue to be kept in mpy_exponent.
1067 	 */
1068 	Dblext_set_sign(tmpresp1,Dbl_sign(resultp1));
1069 
1070 	/*
1071 	 * No rounding is required, since the result of the multiply
1072 	 * is exact in the extended format.
1073 	 */
1074 
1075 	/*
1076 	 * Now we are ready to perform the add portion of the operation.
1077 	 *
1078 	 * The exponents need to be kept as integers for now, since the
1079 	 * multiply result might not fit into the exponent field.  We
1080 	 * can't overflow or underflow because of this yet, since the
1081 	 * add could bring the final result back into range.
1082 	 */
1083 	add_exponent = Dbl_exponent(opnd3p1);
1084 
1085 	/*
1086 	 * Check for denormalized or zero add operand.
1087 	 */
1088 	if (add_exponent == 0) {
1089 		/* check for zero */
1090 		if (Dbl_iszero_mantissa(opnd3p1,opnd3p2)) {
1091 			/* right is zero */
1092 			/* Left can't be zero and must be result.
1093 			 *
1094 			 * The final result is now in tmpres and mpy_exponent,
1095 			 * and needs to be rounded and squeezed back into
1096 			 * double precision format from double extended.
1097 			 */
1098 			result_exponent = mpy_exponent;
1099 			Dblext_copy(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
1100 				resultp1,resultp2,resultp3,resultp4);
1101 			sign_save = Dbl_signextendedsign(resultp1);/*save sign*/
1102 			goto round;
1103 		}
1104 
1105 		/*
1106 		 * Neither are zeroes.
1107 		 * Adjust exponent and normalize add operand.
1108 		 */
1109 		sign_save = Dbl_signextendedsign(opnd3p1);	/* save sign */
1110 		Dbl_clear_signexponent(opnd3p1);
1111 		Dbl_leftshiftby1(opnd3p1,opnd3p2);
1112 		Dbl_normalize(opnd3p1,opnd3p2,add_exponent);
1113 		Dbl_set_sign(opnd3p1,sign_save);	/* restore sign */
1114 	} else {
1115 		Dbl_clear_exponent_set_hidden(opnd3p1);
1116 	}
1117 	/*
1118 	 * Copy opnd3 to the double extended variable called right.
1119 	 */
1120 	Dbl_copyto_dblext(opnd3p1,opnd3p2,rightp1,rightp2,rightp3,rightp4);
1121 
1122 	/*
1123 	 * A zero "save" helps discover equal operands (for later),
1124 	 * and is used in swapping operands (if needed).
1125 	 */
1126 	Dblext_xortointp1(tmpresp1,rightp1,/*to*/save);
1127 
1128 	/*
1129 	 * Compare magnitude of operands.
1130 	 */
1131 	Dblext_copytoint_exponentmantissap1(tmpresp1,signlessleft1);
1132 	Dblext_copytoint_exponentmantissap1(rightp1,signlessright1);
1133 	if (mpy_exponent < add_exponent || mpy_exponent == add_exponent &&
1134 	    Dblext_ismagnitudeless(tmpresp2,rightp2,signlessleft1,signlessright1)){
1135 		/*
1136 		 * Set the left operand to the larger one by XOR swap.
1137 		 * First finish the first word "save".
1138 		 */
1139 		Dblext_xorfromintp1(save,rightp1,/*to*/rightp1);
1140 		Dblext_xorfromintp1(save,tmpresp1,/*to*/tmpresp1);
1141 		Dblext_swap_lower(tmpresp2,tmpresp3,tmpresp4,
1142 			rightp2,rightp3,rightp4);
1143 		/* also setup exponents used in rest of routine */
1144 		diff_exponent = add_exponent - mpy_exponent;
1145 		result_exponent = add_exponent;
1146 	} else {
1147 		/* also setup exponents used in rest of routine */
1148 		diff_exponent = mpy_exponent - add_exponent;
1149 		result_exponent = mpy_exponent;
1150 	}
1151 	/* Invariant: left is not smaller than right. */
1152 
1153 	/*
1154 	 * Special case alignment of operands that would force alignment
1155 	 * beyond the extent of the extension.  A further optimization
1156 	 * could special case this but only reduces the path length for
1157 	 * this infrequent case.
1158 	 */
1159 	if (diff_exponent > DBLEXT_THRESHOLD) {
1160 		diff_exponent = DBLEXT_THRESHOLD;
1161 	}
1162 
1163 	/* Align right operand by shifting it to the right */
1164 	Dblext_clear_sign(rightp1);
1165 	Dblext_right_align(rightp1,rightp2,rightp3,rightp4,
1166 		/*shifted by*/diff_exponent);
1167 
1168 	/* Treat sum and difference of the operands separately. */
1169 	if ((int)save < 0) {
1170 		/*
1171 		 * Difference of the two operands.  Overflow can occur if the
1172 		 * multiply overflowed.  A borrow can occur out of the hidden
1173 		 * bit and force a post normalization phase.
1174 		 */
1175 		Dblext_subtract(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
1176 			rightp1,rightp2,rightp3,rightp4,
1177 			resultp1,resultp2,resultp3,resultp4);
1178 		sign_save = Dbl_signextendedsign(resultp1);
1179 		if (Dbl_iszero_hidden(resultp1)) {
1180 			/* Handle normalization */
1181 		/* A straightforward algorithm would now shift the
1182 		 * result and extension left until the hidden bit
1183 		 * becomes one.  Not all of the extension bits need
1184 		 * participate in the shift.  Only the two most
1185 		 * significant bits (round and guard) are needed.
1186 		 * If only a single shift is needed then the guard
1187 		 * bit becomes a significant low order bit and the
1188 		 * extension must participate in the rounding.
1189 		 * If more than a single shift is needed, then all
1190 		 * bits to the right of the guard bit are zeros,
1191 		 * and the guard bit may or may not be zero. */
1192 			Dblext_leftshiftby1(resultp1,resultp2,resultp3,
1193 				resultp4);
1194 
1195 			/* Need to check for a zero result.  The sign and
1196 			 * exponent fields have already been zeroed.  The more
1197 			 * efficient test of the full object can be used.
1198 			 */
1199 			 if (Dblext_iszero(resultp1,resultp2,resultp3,resultp4)) {
1200 				/* Must have been "x-x" or "x+(-x)". */
1201 				if (Is_rounding_mode(ROUNDMINUS))
1202 					Dbl_setone_sign(resultp1);
1203 				Dbl_copytoptr(resultp1,resultp2,dstptr);
1204 				return(NOEXCEPTION);
1205 			}
1206 			result_exponent--;
1207 
1208 			/* Look to see if normalization is finished. */
1209 			if (Dbl_isone_hidden(resultp1)) {
1210 				/* No further normalization is needed */
1211 				goto round;
1212 			}
1213 
1214 			/* Discover first one bit to determine shift amount.
1215 			 * Use a modified binary search.  We have already
1216 			 * shifted the result one position right and still
1217 			 * not found a one so the remainder of the extension
1218 			 * must be zero and simplifies rounding. */
1219 			/* Scan bytes */
1220 			while (Dbl_iszero_hiddenhigh7mantissa(resultp1)) {
1221 				Dblext_leftshiftby8(resultp1,resultp2,resultp3,resultp4);
1222 				result_exponent -= 8;
1223 			}
1224 			/* Now narrow it down to the nibble */
1225 			if (Dbl_iszero_hiddenhigh3mantissa(resultp1)) {
1226 				/* The lower nibble contains the
1227 				 * normalizing one */
1228 				Dblext_leftshiftby4(resultp1,resultp2,resultp3,resultp4);
1229 				result_exponent -= 4;
1230 			}
1231 			/* Select case where first bit is set (already
1232 			 * normalized) otherwise select the proper shift. */
1233 			jumpsize = Dbl_hiddenhigh3mantissa(resultp1);
1234 			if (jumpsize <= 7) switch(jumpsize) {
1235 			case 1:
1236 				Dblext_leftshiftby3(resultp1,resultp2,resultp3,
1237 					resultp4);
1238 				result_exponent -= 3;
1239 				break;
1240 			case 2:
1241 			case 3:
1242 				Dblext_leftshiftby2(resultp1,resultp2,resultp3,
1243 					resultp4);
1244 				result_exponent -= 2;
1245 				break;
1246 			case 4:
1247 			case 5:
1248 			case 6:
1249 			case 7:
1250 				Dblext_leftshiftby1(resultp1,resultp2,resultp3,
1251 					resultp4);
1252 				result_exponent -= 1;
1253 				break;
1254 			}
1255 		} /* end if (hidden...)... */
1256 	/* Fall through and round */
1257 	} /* end if (save < 0)... */
1258 	else {
1259 		/* Add magnitudes */
1260 		Dblext_addition(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
1261 			rightp1,rightp2,rightp3,rightp4,
1262 			/*to*/resultp1,resultp2,resultp3,resultp4);
1263 		sign_save = Dbl_signextendedsign(resultp1);
1264 		if (Dbl_isone_hiddenoverflow(resultp1)) {
1265 	    		/* Prenormalization required. */
1266 	    		Dblext_arithrightshiftby1(resultp1,resultp2,resultp3,
1267 				resultp4);
1268 	    		result_exponent++;
1269 		} /* end if hiddenoverflow... */
1270 	} /* end else ...add magnitudes... */
1271 
1272 	/* Round the result.  If the extension and lower two words are
1273 	 * all zeros, then the result is exact.  Otherwise round in the
1274 	 * correct direction.  Underflow is possible. If a postnormalization
1275 	 * is necessary, then the mantissa is all zeros so no shift is needed.
1276 	 */
1277   round:
1278 	if (result_exponent <= 0 && !Is_underflowtrap_enabled()) {
1279 		Dblext_denormalize(resultp1,resultp2,resultp3,resultp4,
1280 			result_exponent,is_tiny);
1281 	}
1282 	Dbl_set_sign(resultp1,/*using*/sign_save);
1283 	if (Dblext_isnotzero_mantissap3(resultp3) ||
1284 	    Dblext_isnotzero_mantissap4(resultp4)) {
1285 		inexact = TRUE;
1286 		switch(Rounding_mode()) {
1287 		case ROUNDNEAREST: /* The default. */
1288 			if (Dblext_isone_highp3(resultp3)) {
1289 				/* at least 1/2 ulp */
1290 				if (Dblext_isnotzero_low31p3(resultp3) ||
1291 				    Dblext_isnotzero_mantissap4(resultp4) ||
1292 				    Dblext_isone_lowp2(resultp2)) {
1293 					/* either exactly half way and odd or
1294 					 * more than 1/2ulp */
1295 					Dbl_increment(resultp1,resultp2);
1296 				}
1297 			}
1298 	    		break;
1299 
1300 		case ROUNDPLUS:
1301 	    		if (Dbl_iszero_sign(resultp1)) {
1302 				/* Round up positive results */
1303 				Dbl_increment(resultp1,resultp2);
1304 			}
1305 			break;
1306 
1307 		case ROUNDMINUS:
1308 	    		if (Dbl_isone_sign(resultp1)) {
1309 				/* Round down negative results */
1310 				Dbl_increment(resultp1,resultp2);
1311 			}
1312 
1313 		case ROUNDZERO:;
1314 			/* truncate is simple */
1315 		} /* end switch... */
1316 		if (Dbl_isone_hiddenoverflow(resultp1)) result_exponent++;
1317 	}
1318 	if (result_exponent >= DBL_INFINITY_EXPONENT) {
1319 		/* Overflow */
1320 		if (Is_overflowtrap_enabled()) {
1321                         /*
1322                          * Adjust bias of result
1323                          */
1324                         Dbl_setwrapped_exponent(resultp1,result_exponent,ovfl);
1325                         Dbl_copytoptr(resultp1,resultp2,dstptr);
1326                         if (inexact)
1327                             if (Is_inexacttrap_enabled())
1328                                 return (OPC_2E_OVERFLOWEXCEPTION |
1329 					OPC_2E_INEXACTEXCEPTION);
1330                             else Set_inexactflag();
1331                         return (OPC_2E_OVERFLOWEXCEPTION);
1332 		}
1333 		inexact = TRUE;
1334 		Set_overflowflag();
1335 		Dbl_setoverflow(resultp1,resultp2);
1336 	} else if (result_exponent <= 0) {	/* underflow case */
1337 		if (Is_underflowtrap_enabled()) {
1338                         /*
1339                          * Adjust bias of result
1340                          */
1341                 	Dbl_setwrapped_exponent(resultp1,result_exponent,unfl);
1342 			Dbl_copytoptr(resultp1,resultp2,dstptr);
1343                         if (inexact)
1344                             if (Is_inexacttrap_enabled())
1345                                 return (OPC_2E_UNDERFLOWEXCEPTION |
1346 					OPC_2E_INEXACTEXCEPTION);
1347                             else Set_inexactflag();
1348 	    		return(OPC_2E_UNDERFLOWEXCEPTION);
1349 		}
1350 		else if (inexact && is_tiny) Set_underflowflag();
1351 	}
1352 	else Dbl_set_exponent(resultp1,result_exponent);
1353 	Dbl_copytoptr(resultp1,resultp2,dstptr);
1354 	if (inexact)
1355 		if (Is_inexacttrap_enabled()) return(OPC_2E_INEXACTEXCEPTION);
1356 		else Set_inexactflag();
1357     	return(NOEXCEPTION);
1358 }
1359 
1360 /*
1361  *  Single Floating-point Multiply Fused Add
1362  */
1363 
sgl_fmpyfadd(src1ptr,src2ptr,src3ptr,status,dstptr)1364 sgl_fmpyfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
1365 
1366 sgl_floating_point *src1ptr, *src2ptr, *src3ptr, *dstptr;
1367 unsigned int *status;
1368 {
1369 	unsigned int opnd1, opnd2, opnd3;
1370 	register unsigned int tmpresp1, tmpresp2;
1371 	unsigned int rightp1, rightp2;
1372 	unsigned int resultp1, resultp2 = 0;
1373 	register int mpy_exponent, add_exponent, count;
1374 	boolean inexact = FALSE, is_tiny = FALSE;
1375 
1376 	unsigned int signlessleft1, signlessright1, save;
1377 	register int result_exponent, diff_exponent;
1378 	int sign_save, jumpsize;
1379 
1380 	Sgl_copyfromptr(src1ptr,opnd1);
1381 	Sgl_copyfromptr(src2ptr,opnd2);
1382 	Sgl_copyfromptr(src3ptr,opnd3);
1383 
1384 	/*
1385 	 * set sign bit of result of multiply
1386 	 */
1387 	if (Sgl_sign(opnd1) ^ Sgl_sign(opnd2))
1388 		Sgl_setnegativezero(resultp1);
1389 	else Sgl_setzero(resultp1);
1390 
1391 	/*
1392 	 * Generate multiply exponent
1393 	 */
1394 	mpy_exponent = Sgl_exponent(opnd1) + Sgl_exponent(opnd2) - SGL_BIAS;
1395 
1396 	/*
1397 	 * check first operand for NaN's or infinity
1398 	 */
1399 	if (Sgl_isinfinity_exponent(opnd1)) {
1400 		if (Sgl_iszero_mantissa(opnd1)) {
1401 			if (Sgl_isnotnan(opnd2) && Sgl_isnotnan(opnd3)) {
1402 				if (Sgl_iszero_exponentmantissa(opnd2)) {
1403 					/*
1404 					 * invalid since operands are infinity
1405 					 * and zero
1406 					 */
1407 					if (Is_invalidtrap_enabled())
1408 						return(OPC_2E_INVALIDEXCEPTION);
1409 					Set_invalidflag();
1410 					Sgl_makequietnan(resultp1);
1411 					Sgl_copytoptr(resultp1,dstptr);
1412 					return(NOEXCEPTION);
1413 				}
1414 				/*
1415 				 * Check third operand for infinity with a
1416 				 *  sign opposite of the multiply result
1417 				 */
1418 				if (Sgl_isinfinity(opnd3) &&
1419 				    (Sgl_sign(resultp1) ^ Sgl_sign(opnd3))) {
1420 					/*
1421 					 * invalid since attempting a magnitude
1422 					 * subtraction of infinities
1423 					 */
1424 					if (Is_invalidtrap_enabled())
1425 						return(OPC_2E_INVALIDEXCEPTION);
1426 					Set_invalidflag();
1427 					Sgl_makequietnan(resultp1);
1428 					Sgl_copytoptr(resultp1,dstptr);
1429 					return(NOEXCEPTION);
1430 				}
1431 
1432 				/*
1433 			 	 * return infinity
1434 			 	 */
1435 				Sgl_setinfinity_exponentmantissa(resultp1);
1436 				Sgl_copytoptr(resultp1,dstptr);
1437 				return(NOEXCEPTION);
1438 			}
1439 		}
1440 		else {
1441 			/*
1442 		 	 * is NaN; signaling or quiet?
1443 		 	 */
1444 			if (Sgl_isone_signaling(opnd1)) {
1445 				/* trap if INVALIDTRAP enabled */
1446 				if (Is_invalidtrap_enabled())
1447 			    		return(OPC_2E_INVALIDEXCEPTION);
1448 				/* make NaN quiet */
1449 				Set_invalidflag();
1450 				Sgl_set_quiet(opnd1);
1451 			}
1452 			/*
1453 			 * is second operand a signaling NaN?
1454 			 */
1455 			else if (Sgl_is_signalingnan(opnd2)) {
1456 				/* trap if INVALIDTRAP enabled */
1457 				if (Is_invalidtrap_enabled())
1458 			    		return(OPC_2E_INVALIDEXCEPTION);
1459 				/* make NaN quiet */
1460 				Set_invalidflag();
1461 				Sgl_set_quiet(opnd2);
1462 				Sgl_copytoptr(opnd2,dstptr);
1463 				return(NOEXCEPTION);
1464 			}
1465 			/*
1466 			 * is third operand a signaling NaN?
1467 			 */
1468 			else if (Sgl_is_signalingnan(opnd3)) {
1469 				/* trap if INVALIDTRAP enabled */
1470 				if (Is_invalidtrap_enabled())
1471 			    		return(OPC_2E_INVALIDEXCEPTION);
1472 				/* make NaN quiet */
1473 				Set_invalidflag();
1474 				Sgl_set_quiet(opnd3);
1475 				Sgl_copytoptr(opnd3,dstptr);
1476 				return(NOEXCEPTION);
1477 			}
1478 			/*
1479 		 	 * return quiet NaN
1480 		 	 */
1481 			Sgl_copytoptr(opnd1,dstptr);
1482 			return(NOEXCEPTION);
1483 		}
1484 	}
1485 
1486 	/*
1487 	 * check second operand for NaN's or infinity
1488 	 */
1489 	if (Sgl_isinfinity_exponent(opnd2)) {
1490 		if (Sgl_iszero_mantissa(opnd2)) {
1491 			if (Sgl_isnotnan(opnd3)) {
1492 				if (Sgl_iszero_exponentmantissa(opnd1)) {
1493 					/*
1494 					 * invalid since multiply operands are
1495 					 * zero & infinity
1496 					 */
1497 					if (Is_invalidtrap_enabled())
1498 						return(OPC_2E_INVALIDEXCEPTION);
1499 					Set_invalidflag();
1500 					Sgl_makequietnan(opnd2);
1501 					Sgl_copytoptr(opnd2,dstptr);
1502 					return(NOEXCEPTION);
1503 				}
1504 
1505 				/*
1506 				 * Check third operand for infinity with a
1507 				 *  sign opposite of the multiply result
1508 				 */
1509 				if (Sgl_isinfinity(opnd3) &&
1510 				    (Sgl_sign(resultp1) ^ Sgl_sign(opnd3))) {
1511 					/*
1512 					 * invalid since attempting a magnitude
1513 					 * subtraction of infinities
1514 					 */
1515 					if (Is_invalidtrap_enabled())
1516 				       		return(OPC_2E_INVALIDEXCEPTION);
1517 				       	Set_invalidflag();
1518 				       	Sgl_makequietnan(resultp1);
1519 					Sgl_copytoptr(resultp1,dstptr);
1520 					return(NOEXCEPTION);
1521 				}
1522 
1523 				/*
1524 				 * return infinity
1525 				 */
1526 				Sgl_setinfinity_exponentmantissa(resultp1);
1527 				Sgl_copytoptr(resultp1,dstptr);
1528 				return(NOEXCEPTION);
1529 			}
1530 		}
1531 		else {
1532 			/*
1533 			 * is NaN; signaling or quiet?
1534 			 */
1535 			if (Sgl_isone_signaling(opnd2)) {
1536 				/* trap if INVALIDTRAP enabled */
1537 				if (Is_invalidtrap_enabled())
1538 					return(OPC_2E_INVALIDEXCEPTION);
1539 				/* make NaN quiet */
1540 				Set_invalidflag();
1541 				Sgl_set_quiet(opnd2);
1542 			}
1543 			/*
1544 			 * is third operand a signaling NaN?
1545 			 */
1546 			else if (Sgl_is_signalingnan(opnd3)) {
1547 			       	/* trap if INVALIDTRAP enabled */
1548 			       	if (Is_invalidtrap_enabled())
1549 				   		return(OPC_2E_INVALIDEXCEPTION);
1550 			       	/* make NaN quiet */
1551 			       	Set_invalidflag();
1552 			       	Sgl_set_quiet(opnd3);
1553 				Sgl_copytoptr(opnd3,dstptr);
1554 		       		return(NOEXCEPTION);
1555 			}
1556 			/*
1557 			 * return quiet NaN
1558 			 */
1559 			Sgl_copytoptr(opnd2,dstptr);
1560 			return(NOEXCEPTION);
1561 		}
1562 	}
1563 
1564 	/*
1565 	 * check third operand for NaN's or infinity
1566 	 */
1567 	if (Sgl_isinfinity_exponent(opnd3)) {
1568 		if (Sgl_iszero_mantissa(opnd3)) {
1569 			/* return infinity */
1570 			Sgl_copytoptr(opnd3,dstptr);
1571 			return(NOEXCEPTION);
1572 		} else {
1573 			/*
1574 			 * is NaN; signaling or quiet?
1575 			 */
1576 			if (Sgl_isone_signaling(opnd3)) {
1577 				/* trap if INVALIDTRAP enabled */
1578 				if (Is_invalidtrap_enabled())
1579 					return(OPC_2E_INVALIDEXCEPTION);
1580 				/* make NaN quiet */
1581 				Set_invalidflag();
1582 				Sgl_set_quiet(opnd3);
1583 			}
1584 			/*
1585 			 * return quiet NaN
1586  			 */
1587 			Sgl_copytoptr(opnd3,dstptr);
1588 			return(NOEXCEPTION);
1589 		}
1590     	}
1591 
1592 	/*
1593 	 * Generate multiply mantissa
1594 	 */
1595 	if (Sgl_isnotzero_exponent(opnd1)) {
1596 		/* set hidden bit */
1597 		Sgl_clear_signexponent_set_hidden(opnd1);
1598 	}
1599 	else {
1600 		/* check for zero */
1601 		if (Sgl_iszero_mantissa(opnd1)) {
1602 			/*
1603 			 * Perform the add opnd3 with zero here.
1604 			 */
1605 			if (Sgl_iszero_exponentmantissa(opnd3)) {
1606 				if (Is_rounding_mode(ROUNDMINUS)) {
1607 					Sgl_or_signs(opnd3,resultp1);
1608 				} else {
1609 					Sgl_and_signs(opnd3,resultp1);
1610 				}
1611 			}
1612 			/*
1613 			 * Now let's check for trapped underflow case.
1614 			 */
1615 			else if (Sgl_iszero_exponent(opnd3) &&
1616 			         Is_underflowtrap_enabled()) {
1617                     		/* need to normalize results mantissa */
1618                     		sign_save = Sgl_signextendedsign(opnd3);
1619 				result_exponent = 0;
1620                     		Sgl_leftshiftby1(opnd3);
1621                     		Sgl_normalize(opnd3,result_exponent);
1622                     		Sgl_set_sign(opnd3,/*using*/sign_save);
1623                     		Sgl_setwrapped_exponent(opnd3,result_exponent,
1624 							unfl);
1625                     		Sgl_copytoptr(opnd3,dstptr);
1626                     		/* inexact = FALSE */
1627                     		return(OPC_2E_UNDERFLOWEXCEPTION);
1628 			}
1629 			Sgl_copytoptr(opnd3,dstptr);
1630 			return(NOEXCEPTION);
1631 		}
1632 		/* is denormalized, adjust exponent */
1633 		Sgl_clear_signexponent(opnd1);
1634 		Sgl_leftshiftby1(opnd1);
1635 		Sgl_normalize(opnd1,mpy_exponent);
1636 	}
1637 	/* opnd2 needs to have hidden bit set with msb in hidden bit */
1638 	if (Sgl_isnotzero_exponent(opnd2)) {
1639 		Sgl_clear_signexponent_set_hidden(opnd2);
1640 	}
1641 	else {
1642 		/* check for zero */
1643 		if (Sgl_iszero_mantissa(opnd2)) {
1644 			/*
1645 			 * Perform the add opnd3 with zero here.
1646 			 */
1647 			if (Sgl_iszero_exponentmantissa(opnd3)) {
1648 				if (Is_rounding_mode(ROUNDMINUS)) {
1649 					Sgl_or_signs(opnd3,resultp1);
1650 				} else {
1651 					Sgl_and_signs(opnd3,resultp1);
1652 				}
1653 			}
1654 			/*
1655 			 * Now let's check for trapped underflow case.
1656 			 */
1657 			else if (Sgl_iszero_exponent(opnd3) &&
1658 			    Is_underflowtrap_enabled()) {
1659                     		/* need to normalize results mantissa */
1660                     		sign_save = Sgl_signextendedsign(opnd3);
1661 				result_exponent = 0;
1662                     		Sgl_leftshiftby1(opnd3);
1663                     		Sgl_normalize(opnd3,result_exponent);
1664                     		Sgl_set_sign(opnd3,/*using*/sign_save);
1665                     		Sgl_setwrapped_exponent(opnd3,result_exponent,
1666 							unfl);
1667                     		Sgl_copytoptr(opnd3,dstptr);
1668                     		/* inexact = FALSE */
1669                     		return(OPC_2E_UNDERFLOWEXCEPTION);
1670 			}
1671 			Sgl_copytoptr(opnd3,dstptr);
1672 			return(NOEXCEPTION);
1673 		}
1674 		/* is denormalized; want to normalize */
1675 		Sgl_clear_signexponent(opnd2);
1676 		Sgl_leftshiftby1(opnd2);
1677 		Sgl_normalize(opnd2,mpy_exponent);
1678 	}
1679 
1680 	/* Multiply the first two source mantissas together */
1681 
1682 	/*
1683 	 * The intermediate result will be kept in tmpres,
1684 	 * which needs enough room for 106 bits of mantissa,
1685 	 * so lets call it a Double extended.
1686 	 */
1687 	Sglext_setzero(tmpresp1,tmpresp2);
1688 
1689 	/*
1690 	 * Four bits at a time are inspected in each loop, and a
1691 	 * simple shift and add multiply algorithm is used.
1692 	 */
1693 	for (count = SGL_P-1; count >= 0; count -= 4) {
1694 		Sglext_rightshiftby4(tmpresp1,tmpresp2);
1695 		if (Sbit28(opnd1)) {
1696 	 		/* Twoword_add should be an ADD followed by 2 ADDC's */
1697 			Twoword_add(tmpresp1, tmpresp2, opnd2<<3, 0);
1698 		}
1699 		if (Sbit29(opnd1)) {
1700 			Twoword_add(tmpresp1, tmpresp2, opnd2<<2, 0);
1701 		}
1702 		if (Sbit30(opnd1)) {
1703 			Twoword_add(tmpresp1, tmpresp2, opnd2<<1, 0);
1704 		}
1705 		if (Sbit31(opnd1)) {
1706 			Twoword_add(tmpresp1, tmpresp2, opnd2, 0);
1707 		}
1708 		Sgl_rightshiftby4(opnd1);
1709 	}
1710 	if (Is_sexthiddenoverflow(tmpresp1)) {
1711 		/* result mantissa >= 2 (mantissa overflow) */
1712 		mpy_exponent++;
1713 		Sglext_rightshiftby4(tmpresp1,tmpresp2);
1714 	} else {
1715 		Sglext_rightshiftby3(tmpresp1,tmpresp2);
1716 	}
1717 
1718 	/*
1719 	 * Restore the sign of the mpy result which was saved in resultp1.
1720 	 * The exponent will continue to be kept in mpy_exponent.
1721 	 */
1722 	Sglext_set_sign(tmpresp1,Sgl_sign(resultp1));
1723 
1724 	/*
1725 	 * No rounding is required, since the result of the multiply
1726 	 * is exact in the extended format.
1727 	 */
1728 
1729 	/*
1730 	 * Now we are ready to perform the add portion of the operation.
1731 	 *
1732 	 * The exponents need to be kept as integers for now, since the
1733 	 * multiply result might not fit into the exponent field.  We
1734 	 * can't overflow or underflow because of this yet, since the
1735 	 * add could bring the final result back into range.
1736 	 */
1737 	add_exponent = Sgl_exponent(opnd3);
1738 
1739 	/*
1740 	 * Check for denormalized or zero add operand.
1741 	 */
1742 	if (add_exponent == 0) {
1743 		/* check for zero */
1744 		if (Sgl_iszero_mantissa(opnd3)) {
1745 			/* right is zero */
1746 			/* Left can't be zero and must be result.
1747 			 *
1748 			 * The final result is now in tmpres and mpy_exponent,
1749 			 * and needs to be rounded and squeezed back into
1750 			 * double precision format from double extended.
1751 			 */
1752 			result_exponent = mpy_exponent;
1753 			Sglext_copy(tmpresp1,tmpresp2,resultp1,resultp2);
1754 			sign_save = Sgl_signextendedsign(resultp1);/*save sign*/
1755 			goto round;
1756 		}
1757 
1758 		/*
1759 		 * Neither are zeroes.
1760 		 * Adjust exponent and normalize add operand.
1761 		 */
1762 		sign_save = Sgl_signextendedsign(opnd3);	/* save sign */
1763 		Sgl_clear_signexponent(opnd3);
1764 		Sgl_leftshiftby1(opnd3);
1765 		Sgl_normalize(opnd3,add_exponent);
1766 		Sgl_set_sign(opnd3,sign_save);		/* restore sign */
1767 	} else {
1768 		Sgl_clear_exponent_set_hidden(opnd3);
1769 	}
1770 	/*
1771 	 * Copy opnd3 to the double extended variable called right.
1772 	 */
1773 	Sgl_copyto_sglext(opnd3,rightp1,rightp2);
1774 
1775 	/*
1776 	 * A zero "save" helps discover equal operands (for later),
1777 	 * and is used in swapping operands (if needed).
1778 	 */
1779 	Sglext_xortointp1(tmpresp1,rightp1,/*to*/save);
1780 
1781 	/*
1782 	 * Compare magnitude of operands.
1783 	 */
1784 	Sglext_copytoint_exponentmantissa(tmpresp1,signlessleft1);
1785 	Sglext_copytoint_exponentmantissa(rightp1,signlessright1);
1786 	if (mpy_exponent < add_exponent || mpy_exponent == add_exponent &&
1787 	    Sglext_ismagnitudeless(signlessleft1,signlessright1)) {
1788 		/*
1789 		 * Set the left operand to the larger one by XOR swap.
1790 		 * First finish the first word "save".
1791 		 */
1792 		Sglext_xorfromintp1(save,rightp1,/*to*/rightp1);
1793 		Sglext_xorfromintp1(save,tmpresp1,/*to*/tmpresp1);
1794 		Sglext_swap_lower(tmpresp2,rightp2);
1795 		/* also setup exponents used in rest of routine */
1796 		diff_exponent = add_exponent - mpy_exponent;
1797 		result_exponent = add_exponent;
1798 	} else {
1799 		/* also setup exponents used in rest of routine */
1800 		diff_exponent = mpy_exponent - add_exponent;
1801 		result_exponent = mpy_exponent;
1802 	}
1803 	/* Invariant: left is not smaller than right. */
1804 
1805 	/*
1806 	 * Special case alignment of operands that would force alignment
1807 	 * beyond the extent of the extension.  A further optimization
1808 	 * could special case this but only reduces the path length for
1809 	 * this infrequent case.
1810 	 */
1811 	if (diff_exponent > SGLEXT_THRESHOLD) {
1812 		diff_exponent = SGLEXT_THRESHOLD;
1813 	}
1814 
1815 	/* Align right operand by shifting it to the right */
1816 	Sglext_clear_sign(rightp1);
1817 	Sglext_right_align(rightp1,rightp2,/*shifted by*/diff_exponent);
1818 
1819 	/* Treat sum and difference of the operands separately. */
1820 	if ((int)save < 0) {
1821 		/*
1822 		 * Difference of the two operands.  Overflow can occur if the
1823 		 * multiply overflowed.  A borrow can occur out of the hidden
1824 		 * bit and force a post normalization phase.
1825 		 */
1826 		Sglext_subtract(tmpresp1,tmpresp2, rightp1,rightp2,
1827 			resultp1,resultp2);
1828 		sign_save = Sgl_signextendedsign(resultp1);
1829 		if (Sgl_iszero_hidden(resultp1)) {
1830 			/* Handle normalization */
1831 		/* A straightforward algorithm would now shift the
1832 		 * result and extension left until the hidden bit
1833 		 * becomes one.  Not all of the extension bits need
1834 		 * participate in the shift.  Only the two most
1835 		 * significant bits (round and guard) are needed.
1836 		 * If only a single shift is needed then the guard
1837 		 * bit becomes a significant low order bit and the
1838 		 * extension must participate in the rounding.
1839 		 * If more than a single shift is needed, then all
1840 		 * bits to the right of the guard bit are zeros,
1841 		 * and the guard bit may or may not be zero. */
1842 			Sglext_leftshiftby1(resultp1,resultp2);
1843 
1844 			/* Need to check for a zero result.  The sign and
1845 			 * exponent fields have already been zeroed.  The more
1846 			 * efficient test of the full object can be used.
1847 			 */
1848 			 if (Sglext_iszero(resultp1,resultp2)) {
1849 				/* Must have been "x-x" or "x+(-x)". */
1850 				if (Is_rounding_mode(ROUNDMINUS))
1851 					Sgl_setone_sign(resultp1);
1852 				Sgl_copytoptr(resultp1,dstptr);
1853 				return(NOEXCEPTION);
1854 			}
1855 			result_exponent--;
1856 
1857 			/* Look to see if normalization is finished. */
1858 			if (Sgl_isone_hidden(resultp1)) {
1859 				/* No further normalization is needed */
1860 				goto round;
1861 			}
1862 
1863 			/* Discover first one bit to determine shift amount.
1864 			 * Use a modified binary search.  We have already
1865 			 * shifted the result one position right and still
1866 			 * not found a one so the remainder of the extension
1867 			 * must be zero and simplifies rounding. */
1868 			/* Scan bytes */
1869 			while (Sgl_iszero_hiddenhigh7mantissa(resultp1)) {
1870 				Sglext_leftshiftby8(resultp1,resultp2);
1871 				result_exponent -= 8;
1872 			}
1873 			/* Now narrow it down to the nibble */
1874 			if (Sgl_iszero_hiddenhigh3mantissa(resultp1)) {
1875 				/* The lower nibble contains the
1876 				 * normalizing one */
1877 				Sglext_leftshiftby4(resultp1,resultp2);
1878 				result_exponent -= 4;
1879 			}
1880 			/* Select case where first bit is set (already
1881 			 * normalized) otherwise select the proper shift. */
1882 			jumpsize = Sgl_hiddenhigh3mantissa(resultp1);
1883 			if (jumpsize <= 7) switch(jumpsize) {
1884 			case 1:
1885 				Sglext_leftshiftby3(resultp1,resultp2);
1886 				result_exponent -= 3;
1887 				break;
1888 			case 2:
1889 			case 3:
1890 				Sglext_leftshiftby2(resultp1,resultp2);
1891 				result_exponent -= 2;
1892 				break;
1893 			case 4:
1894 			case 5:
1895 			case 6:
1896 			case 7:
1897 				Sglext_leftshiftby1(resultp1,resultp2);
1898 				result_exponent -= 1;
1899 				break;
1900 			}
1901 		} /* end if (hidden...)... */
1902 	/* Fall through and round */
1903 	} /* end if (save < 0)... */
1904 	else {
1905 		/* Add magnitudes */
1906 		Sglext_addition(tmpresp1,tmpresp2,
1907 			rightp1,rightp2, /*to*/resultp1,resultp2);
1908 		sign_save = Sgl_signextendedsign(resultp1);
1909 		if (Sgl_isone_hiddenoverflow(resultp1)) {
1910 	    		/* Prenormalization required. */
1911 	    		Sglext_arithrightshiftby1(resultp1,resultp2);
1912 	    		result_exponent++;
1913 		} /* end if hiddenoverflow... */
1914 	} /* end else ...add magnitudes... */
1915 
1916 	/* Round the result.  If the extension and lower two words are
1917 	 * all zeros, then the result is exact.  Otherwise round in the
1918 	 * correct direction.  Underflow is possible. If a postnormalization
1919 	 * is necessary, then the mantissa is all zeros so no shift is needed.
1920 	 */
1921   round:
1922 	if (result_exponent <= 0 && !Is_underflowtrap_enabled()) {
1923 		Sglext_denormalize(resultp1,resultp2,result_exponent,is_tiny);
1924 	}
1925 	Sgl_set_sign(resultp1,/*using*/sign_save);
1926 	if (Sglext_isnotzero_mantissap2(resultp2)) {
1927 		inexact = TRUE;
1928 		switch(Rounding_mode()) {
1929 		case ROUNDNEAREST: /* The default. */
1930 			if (Sglext_isone_highp2(resultp2)) {
1931 				/* at least 1/2 ulp */
1932 				if (Sglext_isnotzero_low31p2(resultp2) ||
1933 				    Sglext_isone_lowp1(resultp1)) {
1934 					/* either exactly half way and odd or
1935 					 * more than 1/2ulp */
1936 					Sgl_increment(resultp1);
1937 				}
1938 			}
1939 	    		break;
1940 
1941 		case ROUNDPLUS:
1942 	    		if (Sgl_iszero_sign(resultp1)) {
1943 				/* Round up positive results */
1944 				Sgl_increment(resultp1);
1945 			}
1946 			break;
1947 
1948 		case ROUNDMINUS:
1949 	    		if (Sgl_isone_sign(resultp1)) {
1950 				/* Round down negative results */
1951 				Sgl_increment(resultp1);
1952 			}
1953 
1954 		case ROUNDZERO:;
1955 			/* truncate is simple */
1956 		} /* end switch... */
1957 		if (Sgl_isone_hiddenoverflow(resultp1)) result_exponent++;
1958 	}
1959 	if (result_exponent >= SGL_INFINITY_EXPONENT) {
1960 		/* Overflow */
1961 		if (Is_overflowtrap_enabled()) {
1962                         /*
1963                          * Adjust bias of result
1964                          */
1965                         Sgl_setwrapped_exponent(resultp1,result_exponent,ovfl);
1966                         Sgl_copytoptr(resultp1,dstptr);
1967                         if (inexact)
1968                             if (Is_inexacttrap_enabled())
1969                                 return (OPC_2E_OVERFLOWEXCEPTION |
1970 					OPC_2E_INEXACTEXCEPTION);
1971                             else Set_inexactflag();
1972                         return (OPC_2E_OVERFLOWEXCEPTION);
1973 		}
1974 		inexact = TRUE;
1975 		Set_overflowflag();
1976 		Sgl_setoverflow(resultp1);
1977 	} else if (result_exponent <= 0) {	/* underflow case */
1978 		if (Is_underflowtrap_enabled()) {
1979                         /*
1980                          * Adjust bias of result
1981                          */
1982                 	Sgl_setwrapped_exponent(resultp1,result_exponent,unfl);
1983 			Sgl_copytoptr(resultp1,dstptr);
1984                         if (inexact)
1985                             if (Is_inexacttrap_enabled())
1986                                 return (OPC_2E_UNDERFLOWEXCEPTION |
1987 					OPC_2E_INEXACTEXCEPTION);
1988                             else Set_inexactflag();
1989 	    		return(OPC_2E_UNDERFLOWEXCEPTION);
1990 		}
1991 		else if (inexact && is_tiny) Set_underflowflag();
1992 	}
1993 	else Sgl_set_exponent(resultp1,result_exponent);
1994 	Sgl_copytoptr(resultp1,dstptr);
1995 	if (inexact)
1996 		if (Is_inexacttrap_enabled()) return(OPC_2E_INEXACTEXCEPTION);
1997 		else Set_inexactflag();
1998     	return(NOEXCEPTION);
1999 }
2000 
2001 /*
2002  *  Single Floating-point Multiply Negate Fused Add
2003  */
2004 
sgl_fmpynfadd(src1ptr,src2ptr,src3ptr,status,dstptr)2005 sgl_fmpynfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
2006 
2007 sgl_floating_point *src1ptr, *src2ptr, *src3ptr, *dstptr;
2008 unsigned int *status;
2009 {
2010 	unsigned int opnd1, opnd2, opnd3;
2011 	register unsigned int tmpresp1, tmpresp2;
2012 	unsigned int rightp1, rightp2;
2013 	unsigned int resultp1, resultp2 = 0;
2014 	register int mpy_exponent, add_exponent, count;
2015 	boolean inexact = FALSE, is_tiny = FALSE;
2016 
2017 	unsigned int signlessleft1, signlessright1, save;
2018 	register int result_exponent, diff_exponent;
2019 	int sign_save, jumpsize;
2020 
2021 	Sgl_copyfromptr(src1ptr,opnd1);
2022 	Sgl_copyfromptr(src2ptr,opnd2);
2023 	Sgl_copyfromptr(src3ptr,opnd3);
2024 
2025 	/*
2026 	 * set sign bit of result of multiply
2027 	 */
2028 	if (Sgl_sign(opnd1) ^ Sgl_sign(opnd2))
2029 		Sgl_setzero(resultp1);
2030 	else
2031 		Sgl_setnegativezero(resultp1);
2032 
2033 	/*
2034 	 * Generate multiply exponent
2035 	 */
2036 	mpy_exponent = Sgl_exponent(opnd1) + Sgl_exponent(opnd2) - SGL_BIAS;
2037 
2038 	/*
2039 	 * check first operand for NaN's or infinity
2040 	 */
2041 	if (Sgl_isinfinity_exponent(opnd1)) {
2042 		if (Sgl_iszero_mantissa(opnd1)) {
2043 			if (Sgl_isnotnan(opnd2) && Sgl_isnotnan(opnd3)) {
2044 				if (Sgl_iszero_exponentmantissa(opnd2)) {
2045 					/*
2046 					 * invalid since operands are infinity
2047 					 * and zero
2048 					 */
2049 					if (Is_invalidtrap_enabled())
2050 						return(OPC_2E_INVALIDEXCEPTION);
2051 					Set_invalidflag();
2052 					Sgl_makequietnan(resultp1);
2053 					Sgl_copytoptr(resultp1,dstptr);
2054 					return(NOEXCEPTION);
2055 				}
2056 				/*
2057 				 * Check third operand for infinity with a
2058 				 *  sign opposite of the multiply result
2059 				 */
2060 				if (Sgl_isinfinity(opnd3) &&
2061 				    (Sgl_sign(resultp1) ^ Sgl_sign(opnd3))) {
2062 					/*
2063 					 * invalid since attempting a magnitude
2064 					 * subtraction of infinities
2065 					 */
2066 					if (Is_invalidtrap_enabled())
2067 						return(OPC_2E_INVALIDEXCEPTION);
2068 					Set_invalidflag();
2069 					Sgl_makequietnan(resultp1);
2070 					Sgl_copytoptr(resultp1,dstptr);
2071 					return(NOEXCEPTION);
2072 				}
2073 
2074 				/*
2075 			 	 * return infinity
2076 			 	 */
2077 				Sgl_setinfinity_exponentmantissa(resultp1);
2078 				Sgl_copytoptr(resultp1,dstptr);
2079 				return(NOEXCEPTION);
2080 			}
2081 		}
2082 		else {
2083 			/*
2084 		 	 * is NaN; signaling or quiet?
2085 		 	 */
2086 			if (Sgl_isone_signaling(opnd1)) {
2087 				/* trap if INVALIDTRAP enabled */
2088 				if (Is_invalidtrap_enabled())
2089 			    		return(OPC_2E_INVALIDEXCEPTION);
2090 				/* make NaN quiet */
2091 				Set_invalidflag();
2092 				Sgl_set_quiet(opnd1);
2093 			}
2094 			/*
2095 			 * is second operand a signaling NaN?
2096 			 */
2097 			else if (Sgl_is_signalingnan(opnd2)) {
2098 				/* trap if INVALIDTRAP enabled */
2099 				if (Is_invalidtrap_enabled())
2100 			    		return(OPC_2E_INVALIDEXCEPTION);
2101 				/* make NaN quiet */
2102 				Set_invalidflag();
2103 				Sgl_set_quiet(opnd2);
2104 				Sgl_copytoptr(opnd2,dstptr);
2105 				return(NOEXCEPTION);
2106 			}
2107 			/*
2108 			 * is third operand a signaling NaN?
2109 			 */
2110 			else if (Sgl_is_signalingnan(opnd3)) {
2111 				/* trap if INVALIDTRAP enabled */
2112 				if (Is_invalidtrap_enabled())
2113 			    		return(OPC_2E_INVALIDEXCEPTION);
2114 				/* make NaN quiet */
2115 				Set_invalidflag();
2116 				Sgl_set_quiet(opnd3);
2117 				Sgl_copytoptr(opnd3,dstptr);
2118 				return(NOEXCEPTION);
2119 			}
2120 			/*
2121 		 	 * return quiet NaN
2122 		 	 */
2123 			Sgl_copytoptr(opnd1,dstptr);
2124 			return(NOEXCEPTION);
2125 		}
2126 	}
2127 
2128 	/*
2129 	 * check second operand for NaN's or infinity
2130 	 */
2131 	if (Sgl_isinfinity_exponent(opnd2)) {
2132 		if (Sgl_iszero_mantissa(opnd2)) {
2133 			if (Sgl_isnotnan(opnd3)) {
2134 				if (Sgl_iszero_exponentmantissa(opnd1)) {
2135 					/*
2136 					 * invalid since multiply operands are
2137 					 * zero & infinity
2138 					 */
2139 					if (Is_invalidtrap_enabled())
2140 						return(OPC_2E_INVALIDEXCEPTION);
2141 					Set_invalidflag();
2142 					Sgl_makequietnan(opnd2);
2143 					Sgl_copytoptr(opnd2,dstptr);
2144 					return(NOEXCEPTION);
2145 				}
2146 
2147 				/*
2148 				 * Check third operand for infinity with a
2149 				 *  sign opposite of the multiply result
2150 				 */
2151 				if (Sgl_isinfinity(opnd3) &&
2152 				    (Sgl_sign(resultp1) ^ Sgl_sign(opnd3))) {
2153 					/*
2154 					 * invalid since attempting a magnitude
2155 					 * subtraction of infinities
2156 					 */
2157 					if (Is_invalidtrap_enabled())
2158 				       		return(OPC_2E_INVALIDEXCEPTION);
2159 				       	Set_invalidflag();
2160 				       	Sgl_makequietnan(resultp1);
2161 					Sgl_copytoptr(resultp1,dstptr);
2162 					return(NOEXCEPTION);
2163 				}
2164 
2165 				/*
2166 				 * return infinity
2167 				 */
2168 				Sgl_setinfinity_exponentmantissa(resultp1);
2169 				Sgl_copytoptr(resultp1,dstptr);
2170 				return(NOEXCEPTION);
2171 			}
2172 		}
2173 		else {
2174 			/*
2175 			 * is NaN; signaling or quiet?
2176 			 */
2177 			if (Sgl_isone_signaling(opnd2)) {
2178 				/* trap if INVALIDTRAP enabled */
2179 				if (Is_invalidtrap_enabled())
2180 					return(OPC_2E_INVALIDEXCEPTION);
2181 				/* make NaN quiet */
2182 				Set_invalidflag();
2183 				Sgl_set_quiet(opnd2);
2184 			}
2185 			/*
2186 			 * is third operand a signaling NaN?
2187 			 */
2188 			else if (Sgl_is_signalingnan(opnd3)) {
2189 			       	/* trap if INVALIDTRAP enabled */
2190 			       	if (Is_invalidtrap_enabled())
2191 				   		return(OPC_2E_INVALIDEXCEPTION);
2192 			       	/* make NaN quiet */
2193 			       	Set_invalidflag();
2194 			       	Sgl_set_quiet(opnd3);
2195 				Sgl_copytoptr(opnd3,dstptr);
2196 		       		return(NOEXCEPTION);
2197 			}
2198 			/*
2199 			 * return quiet NaN
2200 			 */
2201 			Sgl_copytoptr(opnd2,dstptr);
2202 			return(NOEXCEPTION);
2203 		}
2204 	}
2205 
2206 	/*
2207 	 * check third operand for NaN's or infinity
2208 	 */
2209 	if (Sgl_isinfinity_exponent(opnd3)) {
2210 		if (Sgl_iszero_mantissa(opnd3)) {
2211 			/* return infinity */
2212 			Sgl_copytoptr(opnd3,dstptr);
2213 			return(NOEXCEPTION);
2214 		} else {
2215 			/*
2216 			 * is NaN; signaling or quiet?
2217 			 */
2218 			if (Sgl_isone_signaling(opnd3)) {
2219 				/* trap if INVALIDTRAP enabled */
2220 				if (Is_invalidtrap_enabled())
2221 					return(OPC_2E_INVALIDEXCEPTION);
2222 				/* make NaN quiet */
2223 				Set_invalidflag();
2224 				Sgl_set_quiet(opnd3);
2225 			}
2226 			/*
2227 			 * return quiet NaN
2228  			 */
2229 			Sgl_copytoptr(opnd3,dstptr);
2230 			return(NOEXCEPTION);
2231 		}
2232     	}
2233 
2234 	/*
2235 	 * Generate multiply mantissa
2236 	 */
2237 	if (Sgl_isnotzero_exponent(opnd1)) {
2238 		/* set hidden bit */
2239 		Sgl_clear_signexponent_set_hidden(opnd1);
2240 	}
2241 	else {
2242 		/* check for zero */
2243 		if (Sgl_iszero_mantissa(opnd1)) {
2244 			/*
2245 			 * Perform the add opnd3 with zero here.
2246 			 */
2247 			if (Sgl_iszero_exponentmantissa(opnd3)) {
2248 				if (Is_rounding_mode(ROUNDMINUS)) {
2249 					Sgl_or_signs(opnd3,resultp1);
2250 				} else {
2251 					Sgl_and_signs(opnd3,resultp1);
2252 				}
2253 			}
2254 			/*
2255 			 * Now let's check for trapped underflow case.
2256 			 */
2257 			else if (Sgl_iszero_exponent(opnd3) &&
2258 			         Is_underflowtrap_enabled()) {
2259                     		/* need to normalize results mantissa */
2260                     		sign_save = Sgl_signextendedsign(opnd3);
2261 				result_exponent = 0;
2262                     		Sgl_leftshiftby1(opnd3);
2263                     		Sgl_normalize(opnd3,result_exponent);
2264                     		Sgl_set_sign(opnd3,/*using*/sign_save);
2265                     		Sgl_setwrapped_exponent(opnd3,result_exponent,
2266 							unfl);
2267                     		Sgl_copytoptr(opnd3,dstptr);
2268                     		/* inexact = FALSE */
2269                     		return(OPC_2E_UNDERFLOWEXCEPTION);
2270 			}
2271 			Sgl_copytoptr(opnd3,dstptr);
2272 			return(NOEXCEPTION);
2273 		}
2274 		/* is denormalized, adjust exponent */
2275 		Sgl_clear_signexponent(opnd1);
2276 		Sgl_leftshiftby1(opnd1);
2277 		Sgl_normalize(opnd1,mpy_exponent);
2278 	}
2279 	/* opnd2 needs to have hidden bit set with msb in hidden bit */
2280 	if (Sgl_isnotzero_exponent(opnd2)) {
2281 		Sgl_clear_signexponent_set_hidden(opnd2);
2282 	}
2283 	else {
2284 		/* check for zero */
2285 		if (Sgl_iszero_mantissa(opnd2)) {
2286 			/*
2287 			 * Perform the add opnd3 with zero here.
2288 			 */
2289 			if (Sgl_iszero_exponentmantissa(opnd3)) {
2290 				if (Is_rounding_mode(ROUNDMINUS)) {
2291 					Sgl_or_signs(opnd3,resultp1);
2292 				} else {
2293 					Sgl_and_signs(opnd3,resultp1);
2294 				}
2295 			}
2296 			/*
2297 			 * Now let's check for trapped underflow case.
2298 			 */
2299 			else if (Sgl_iszero_exponent(opnd3) &&
2300 			    Is_underflowtrap_enabled()) {
2301                     		/* need to normalize results mantissa */
2302                     		sign_save = Sgl_signextendedsign(opnd3);
2303 				result_exponent = 0;
2304                     		Sgl_leftshiftby1(opnd3);
2305                     		Sgl_normalize(opnd3,result_exponent);
2306                     		Sgl_set_sign(opnd3,/*using*/sign_save);
2307                     		Sgl_setwrapped_exponent(opnd3,result_exponent,
2308 							unfl);
2309                     		Sgl_copytoptr(opnd3,dstptr);
2310                     		/* inexact = FALSE */
2311                     		return(OPC_2E_UNDERFLOWEXCEPTION);
2312 			}
2313 			Sgl_copytoptr(opnd3,dstptr);
2314 			return(NOEXCEPTION);
2315 		}
2316 		/* is denormalized; want to normalize */
2317 		Sgl_clear_signexponent(opnd2);
2318 		Sgl_leftshiftby1(opnd2);
2319 		Sgl_normalize(opnd2,mpy_exponent);
2320 	}
2321 
2322 	/* Multiply the first two source mantissas together */
2323 
2324 	/*
2325 	 * The intermediate result will be kept in tmpres,
2326 	 * which needs enough room for 106 bits of mantissa,
2327 	 * so lets call it a Double extended.
2328 	 */
2329 	Sglext_setzero(tmpresp1,tmpresp2);
2330 
2331 	/*
2332 	 * Four bits at a time are inspected in each loop, and a
2333 	 * simple shift and add multiply algorithm is used.
2334 	 */
2335 	for (count = SGL_P-1; count >= 0; count -= 4) {
2336 		Sglext_rightshiftby4(tmpresp1,tmpresp2);
2337 		if (Sbit28(opnd1)) {
2338 	 		/* Twoword_add should be an ADD followed by 2 ADDC's */
2339 			Twoword_add(tmpresp1, tmpresp2, opnd2<<3, 0);
2340 		}
2341 		if (Sbit29(opnd1)) {
2342 			Twoword_add(tmpresp1, tmpresp2, opnd2<<2, 0);
2343 		}
2344 		if (Sbit30(opnd1)) {
2345 			Twoword_add(tmpresp1, tmpresp2, opnd2<<1, 0);
2346 		}
2347 		if (Sbit31(opnd1)) {
2348 			Twoword_add(tmpresp1, tmpresp2, opnd2, 0);
2349 		}
2350 		Sgl_rightshiftby4(opnd1);
2351 	}
2352 	if (Is_sexthiddenoverflow(tmpresp1)) {
2353 		/* result mantissa >= 2 (mantissa overflow) */
2354 		mpy_exponent++;
2355 		Sglext_rightshiftby4(tmpresp1,tmpresp2);
2356 	} else {
2357 		Sglext_rightshiftby3(tmpresp1,tmpresp2);
2358 	}
2359 
2360 	/*
2361 	 * Restore the sign of the mpy result which was saved in resultp1.
2362 	 * The exponent will continue to be kept in mpy_exponent.
2363 	 */
2364 	Sglext_set_sign(tmpresp1,Sgl_sign(resultp1));
2365 
2366 	/*
2367 	 * No rounding is required, since the result of the multiply
2368 	 * is exact in the extended format.
2369 	 */
2370 
2371 	/*
2372 	 * Now we are ready to perform the add portion of the operation.
2373 	 *
2374 	 * The exponents need to be kept as integers for now, since the
2375 	 * multiply result might not fit into the exponent field.  We
2376 	 * can't overflow or underflow because of this yet, since the
2377 	 * add could bring the final result back into range.
2378 	 */
2379 	add_exponent = Sgl_exponent(opnd3);
2380 
2381 	/*
2382 	 * Check for denormalized or zero add operand.
2383 	 */
2384 	if (add_exponent == 0) {
2385 		/* check for zero */
2386 		if (Sgl_iszero_mantissa(opnd3)) {
2387 			/* right is zero */
2388 			/* Left can't be zero and must be result.
2389 			 *
2390 			 * The final result is now in tmpres and mpy_exponent,
2391 			 * and needs to be rounded and squeezed back into
2392 			 * double precision format from double extended.
2393 			 */
2394 			result_exponent = mpy_exponent;
2395 			Sglext_copy(tmpresp1,tmpresp2,resultp1,resultp2);
2396 			sign_save = Sgl_signextendedsign(resultp1);/*save sign*/
2397 			goto round;
2398 		}
2399 
2400 		/*
2401 		 * Neither are zeroes.
2402 		 * Adjust exponent and normalize add operand.
2403 		 */
2404 		sign_save = Sgl_signextendedsign(opnd3);	/* save sign */
2405 		Sgl_clear_signexponent(opnd3);
2406 		Sgl_leftshiftby1(opnd3);
2407 		Sgl_normalize(opnd3,add_exponent);
2408 		Sgl_set_sign(opnd3,sign_save);		/* restore sign */
2409 	} else {
2410 		Sgl_clear_exponent_set_hidden(opnd3);
2411 	}
2412 	/*
2413 	 * Copy opnd3 to the double extended variable called right.
2414 	 */
2415 	Sgl_copyto_sglext(opnd3,rightp1,rightp2);
2416 
2417 	/*
2418 	 * A zero "save" helps discover equal operands (for later),
2419 	 * and is used in swapping operands (if needed).
2420 	 */
2421 	Sglext_xortointp1(tmpresp1,rightp1,/*to*/save);
2422 
2423 	/*
2424 	 * Compare magnitude of operands.
2425 	 */
2426 	Sglext_copytoint_exponentmantissa(tmpresp1,signlessleft1);
2427 	Sglext_copytoint_exponentmantissa(rightp1,signlessright1);
2428 	if (mpy_exponent < add_exponent || mpy_exponent == add_exponent &&
2429 	    Sglext_ismagnitudeless(signlessleft1,signlessright1)) {
2430 		/*
2431 		 * Set the left operand to the larger one by XOR swap.
2432 		 * First finish the first word "save".
2433 		 */
2434 		Sglext_xorfromintp1(save,rightp1,/*to*/rightp1);
2435 		Sglext_xorfromintp1(save,tmpresp1,/*to*/tmpresp1);
2436 		Sglext_swap_lower(tmpresp2,rightp2);
2437 		/* also setup exponents used in rest of routine */
2438 		diff_exponent = add_exponent - mpy_exponent;
2439 		result_exponent = add_exponent;
2440 	} else {
2441 		/* also setup exponents used in rest of routine */
2442 		diff_exponent = mpy_exponent - add_exponent;
2443 		result_exponent = mpy_exponent;
2444 	}
2445 	/* Invariant: left is not smaller than right. */
2446 
2447 	/*
2448 	 * Special case alignment of operands that would force alignment
2449 	 * beyond the extent of the extension.  A further optimization
2450 	 * could special case this but only reduces the path length for
2451 	 * this infrequent case.
2452 	 */
2453 	if (diff_exponent > SGLEXT_THRESHOLD) {
2454 		diff_exponent = SGLEXT_THRESHOLD;
2455 	}
2456 
2457 	/* Align right operand by shifting it to the right */
2458 	Sglext_clear_sign(rightp1);
2459 	Sglext_right_align(rightp1,rightp2,/*shifted by*/diff_exponent);
2460 
2461 	/* Treat sum and difference of the operands separately. */
2462 	if ((int)save < 0) {
2463 		/*
2464 		 * Difference of the two operands.  Overflow can occur if the
2465 		 * multiply overflowed.  A borrow can occur out of the hidden
2466 		 * bit and force a post normalization phase.
2467 		 */
2468 		Sglext_subtract(tmpresp1,tmpresp2, rightp1,rightp2,
2469 			resultp1,resultp2);
2470 		sign_save = Sgl_signextendedsign(resultp1);
2471 		if (Sgl_iszero_hidden(resultp1)) {
2472 			/* Handle normalization */
2473 		/* A straightforward algorithm would now shift the
2474 		 * result and extension left until the hidden bit
2475 		 * becomes one.  Not all of the extension bits need
2476 		 * participate in the shift.  Only the two most
2477 		 * significant bits (round and guard) are needed.
2478 		 * If only a single shift is needed then the guard
2479 		 * bit becomes a significant low order bit and the
2480 		 * extension must participate in the rounding.
2481 		 * If more than a single shift is needed, then all
2482 		 * bits to the right of the guard bit are zeros,
2483 		 * and the guard bit may or may not be zero. */
2484 			Sglext_leftshiftby1(resultp1,resultp2);
2485 
2486 			/* Need to check for a zero result.  The sign and
2487 			 * exponent fields have already been zeroed.  The more
2488 			 * efficient test of the full object can be used.
2489 			 */
2490 			 if (Sglext_iszero(resultp1,resultp2)) {
2491 				/* Must have been "x-x" or "x+(-x)". */
2492 				if (Is_rounding_mode(ROUNDMINUS))
2493 					Sgl_setone_sign(resultp1);
2494 				Sgl_copytoptr(resultp1,dstptr);
2495 				return(NOEXCEPTION);
2496 			}
2497 			result_exponent--;
2498 
2499 			/* Look to see if normalization is finished. */
2500 			if (Sgl_isone_hidden(resultp1)) {
2501 				/* No further normalization is needed */
2502 				goto round;
2503 			}
2504 
2505 			/* Discover first one bit to determine shift amount.
2506 			 * Use a modified binary search.  We have already
2507 			 * shifted the result one position right and still
2508 			 * not found a one so the remainder of the extension
2509 			 * must be zero and simplifies rounding. */
2510 			/* Scan bytes */
2511 			while (Sgl_iszero_hiddenhigh7mantissa(resultp1)) {
2512 				Sglext_leftshiftby8(resultp1,resultp2);
2513 				result_exponent -= 8;
2514 			}
2515 			/* Now narrow it down to the nibble */
2516 			if (Sgl_iszero_hiddenhigh3mantissa(resultp1)) {
2517 				/* The lower nibble contains the
2518 				 * normalizing one */
2519 				Sglext_leftshiftby4(resultp1,resultp2);
2520 				result_exponent -= 4;
2521 			}
2522 			/* Select case where first bit is set (already
2523 			 * normalized) otherwise select the proper shift. */
2524 			jumpsize = Sgl_hiddenhigh3mantissa(resultp1);
2525 			if (jumpsize <= 7) switch(jumpsize) {
2526 			case 1:
2527 				Sglext_leftshiftby3(resultp1,resultp2);
2528 				result_exponent -= 3;
2529 				break;
2530 			case 2:
2531 			case 3:
2532 				Sglext_leftshiftby2(resultp1,resultp2);
2533 				result_exponent -= 2;
2534 				break;
2535 			case 4:
2536 			case 5:
2537 			case 6:
2538 			case 7:
2539 				Sglext_leftshiftby1(resultp1,resultp2);
2540 				result_exponent -= 1;
2541 				break;
2542 			}
2543 		} /* end if (hidden...)... */
2544 	/* Fall through and round */
2545 	} /* end if (save < 0)... */
2546 	else {
2547 		/* Add magnitudes */
2548 		Sglext_addition(tmpresp1,tmpresp2,
2549 			rightp1,rightp2, /*to*/resultp1,resultp2);
2550 		sign_save = Sgl_signextendedsign(resultp1);
2551 		if (Sgl_isone_hiddenoverflow(resultp1)) {
2552 	    		/* Prenormalization required. */
2553 	    		Sglext_arithrightshiftby1(resultp1,resultp2);
2554 	    		result_exponent++;
2555 		} /* end if hiddenoverflow... */
2556 	} /* end else ...add magnitudes... */
2557 
2558 	/* Round the result.  If the extension and lower two words are
2559 	 * all zeros, then the result is exact.  Otherwise round in the
2560 	 * correct direction.  Underflow is possible. If a postnormalization
2561 	 * is necessary, then the mantissa is all zeros so no shift is needed.
2562 	 */
2563   round:
2564 	if (result_exponent <= 0 && !Is_underflowtrap_enabled()) {
2565 		Sglext_denormalize(resultp1,resultp2,result_exponent,is_tiny);
2566 	}
2567 	Sgl_set_sign(resultp1,/*using*/sign_save);
2568 	if (Sglext_isnotzero_mantissap2(resultp2)) {
2569 		inexact = TRUE;
2570 		switch(Rounding_mode()) {
2571 		case ROUNDNEAREST: /* The default. */
2572 			if (Sglext_isone_highp2(resultp2)) {
2573 				/* at least 1/2 ulp */
2574 				if (Sglext_isnotzero_low31p2(resultp2) ||
2575 				    Sglext_isone_lowp1(resultp1)) {
2576 					/* either exactly half way and odd or
2577 					 * more than 1/2ulp */
2578 					Sgl_increment(resultp1);
2579 				}
2580 			}
2581 	    		break;
2582 
2583 		case ROUNDPLUS:
2584 	    		if (Sgl_iszero_sign(resultp1)) {
2585 				/* Round up positive results */
2586 				Sgl_increment(resultp1);
2587 			}
2588 			break;
2589 
2590 		case ROUNDMINUS:
2591 	    		if (Sgl_isone_sign(resultp1)) {
2592 				/* Round down negative results */
2593 				Sgl_increment(resultp1);
2594 			}
2595 
2596 		case ROUNDZERO:;
2597 			/* truncate is simple */
2598 		} /* end switch... */
2599 		if (Sgl_isone_hiddenoverflow(resultp1)) result_exponent++;
2600 	}
2601 	if (result_exponent >= SGL_INFINITY_EXPONENT) {
2602 		/* Overflow */
2603 		if (Is_overflowtrap_enabled()) {
2604                         /*
2605                          * Adjust bias of result
2606                          */
2607                         Sgl_setwrapped_exponent(resultp1,result_exponent,ovfl);
2608                         Sgl_copytoptr(resultp1,dstptr);
2609                         if (inexact)
2610                             if (Is_inexacttrap_enabled())
2611                                 return (OPC_2E_OVERFLOWEXCEPTION |
2612 					OPC_2E_INEXACTEXCEPTION);
2613                             else Set_inexactflag();
2614                         return (OPC_2E_OVERFLOWEXCEPTION);
2615 		}
2616 		inexact = TRUE;
2617 		Set_overflowflag();
2618 		Sgl_setoverflow(resultp1);
2619 	} else if (result_exponent <= 0) {	/* underflow case */
2620 		if (Is_underflowtrap_enabled()) {
2621                         /*
2622                          * Adjust bias of result
2623                          */
2624                 	Sgl_setwrapped_exponent(resultp1,result_exponent,unfl);
2625 			Sgl_copytoptr(resultp1,dstptr);
2626                         if (inexact)
2627                             if (Is_inexacttrap_enabled())
2628                                 return (OPC_2E_UNDERFLOWEXCEPTION |
2629 					OPC_2E_INEXACTEXCEPTION);
2630                             else Set_inexactflag();
2631 	    		return(OPC_2E_UNDERFLOWEXCEPTION);
2632 		}
2633 		else if (inexact && is_tiny) Set_underflowflag();
2634 	}
2635 	else Sgl_set_exponent(resultp1,result_exponent);
2636 	Sgl_copytoptr(resultp1,dstptr);
2637 	if (inexact)
2638 		if (Is_inexacttrap_enabled()) return(OPC_2E_INEXACTEXCEPTION);
2639 		else Set_inexactflag();
2640     	return(NOEXCEPTION);
2641 }
2642 
2643