xref: /linux/arch/m68k/ifpsp060/src/pfpsp.S (revision c8bfe3fad4f86a029da7157bae9699c816f0c309)
1~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
3M68000 Hi-Performance Microprocessor Division
4M68060 Software Package
5Production Release P1.00 -- October 10, 1994
6
7M68060 Software Package Copyright © 1993, 1994 Motorola Inc.  All rights reserved.
8
9THE SOFTWARE is provided on an "AS IS" basis and without warranty.
10To the maximum extent permitted by applicable law,
11MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
12INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE
13and any warranty against infringement with regard to the SOFTWARE
14(INCLUDING ANY MODIFIED VERSIONS THEREOF) and any accompanying written materials.
15
16To the maximum extent permitted by applicable law,
17IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
18(INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS,
19BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR OTHER PECUNIARY LOSS)
20ARISING OF THE USE OR INABILITY TO USE THE SOFTWARE.
21Motorola assumes no responsibility for the maintenance and support of the SOFTWARE.
22
23You are hereby granted a copyright license to use, modify, and distribute the SOFTWARE
24so long as this entire notice is retained without alteration in any modified and/or
25redistributed versions, and that such modified versions are clearly identified as such.
26No licenses are granted by implication, estoppel or otherwise under any patents
27or trademarks of Motorola, Inc.
28~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
29# freal.s:
30#	This file is appended to the top of the 060FPSP package
31# and contains the entry points into the package. The user, in
32# effect, branches to one of the branch table entries located
33# after _060FPSP_TABLE.
34#	Also, subroutine stubs exist in this file (_fpsp_done for
35# example) that are referenced by the FPSP package itself in order
36# to call a given routine. The stub routine actually performs the
37# callout. The FPSP code does a "bsr" to the stub routine. This
38# extra layer of hierarchy adds a slight performance penalty but
39# it makes the FPSP code easier to read and more mainatinable.
40#
41
42set	_off_bsun,	0x00
43set	_off_snan,	0x04
44set	_off_operr,	0x08
45set	_off_ovfl,	0x0c
46set	_off_unfl,	0x10
47set	_off_dz,	0x14
48set	_off_inex,	0x18
49set	_off_fline,	0x1c
50set	_off_fpu_dis,	0x20
51set	_off_trap,	0x24
52set	_off_trace,	0x28
53set	_off_access,	0x2c
54set	_off_done,	0x30
55
56set	_off_imr,	0x40
57set	_off_dmr,	0x44
58set	_off_dmw,	0x48
59set	_off_irw,	0x4c
60set	_off_irl,	0x50
61set	_off_drb,	0x54
62set	_off_drw,	0x58
63set	_off_drl,	0x5c
64set	_off_dwb,	0x60
65set	_off_dww,	0x64
66set	_off_dwl,	0x68
67
68_060FPSP_TABLE:
69
70###############################################################
71
72# Here's the table of ENTRY POINTS for those linking the package.
73	bra.l		_fpsp_snan
74	short		0x0000
75	bra.l		_fpsp_operr
76	short		0x0000
77	bra.l		_fpsp_ovfl
78	short		0x0000
79	bra.l		_fpsp_unfl
80	short		0x0000
81	bra.l		_fpsp_dz
82	short		0x0000
83	bra.l		_fpsp_inex
84	short		0x0000
85	bra.l		_fpsp_fline
86	short		0x0000
87	bra.l		_fpsp_unsupp
88	short		0x0000
89	bra.l		_fpsp_effadd
90	short		0x0000
91
92	space		56
93
94###############################################################
95	global		_fpsp_done
96_fpsp_done:
97	mov.l		%d0,-(%sp)
98	mov.l		(_060FPSP_TABLE-0x80+_off_done,%pc),%d0
99	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
100	mov.l		0x4(%sp),%d0
101	rtd		&0x4
102
103	global		_real_ovfl
104_real_ovfl:
105	mov.l		%d0,-(%sp)
106	mov.l		(_060FPSP_TABLE-0x80+_off_ovfl,%pc),%d0
107	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
108	mov.l		0x4(%sp),%d0
109	rtd		&0x4
110
111	global		_real_unfl
112_real_unfl:
113	mov.l		%d0,-(%sp)
114	mov.l		(_060FPSP_TABLE-0x80+_off_unfl,%pc),%d0
115	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
116	mov.l		0x4(%sp),%d0
117	rtd		&0x4
118
119	global		_real_inex
120_real_inex:
121	mov.l		%d0,-(%sp)
122	mov.l		(_060FPSP_TABLE-0x80+_off_inex,%pc),%d0
123	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
124	mov.l		0x4(%sp),%d0
125	rtd		&0x4
126
127	global		_real_bsun
128_real_bsun:
129	mov.l		%d0,-(%sp)
130	mov.l		(_060FPSP_TABLE-0x80+_off_bsun,%pc),%d0
131	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
132	mov.l		0x4(%sp),%d0
133	rtd		&0x4
134
135	global		_real_operr
136_real_operr:
137	mov.l		%d0,-(%sp)
138	mov.l		(_060FPSP_TABLE-0x80+_off_operr,%pc),%d0
139	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
140	mov.l		0x4(%sp),%d0
141	rtd		&0x4
142
143	global		_real_snan
144_real_snan:
145	mov.l		%d0,-(%sp)
146	mov.l		(_060FPSP_TABLE-0x80+_off_snan,%pc),%d0
147	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
148	mov.l		0x4(%sp),%d0
149	rtd		&0x4
150
151	global		_real_dz
152_real_dz:
153	mov.l		%d0,-(%sp)
154	mov.l		(_060FPSP_TABLE-0x80+_off_dz,%pc),%d0
155	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
156	mov.l		0x4(%sp),%d0
157	rtd		&0x4
158
159	global		_real_fline
160_real_fline:
161	mov.l		%d0,-(%sp)
162	mov.l		(_060FPSP_TABLE-0x80+_off_fline,%pc),%d0
163	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
164	mov.l		0x4(%sp),%d0
165	rtd		&0x4
166
167	global		_real_fpu_disabled
168_real_fpu_disabled:
169	mov.l		%d0,-(%sp)
170	mov.l		(_060FPSP_TABLE-0x80+_off_fpu_dis,%pc),%d0
171	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
172	mov.l		0x4(%sp),%d0
173	rtd		&0x4
174
175	global		_real_trap
176_real_trap:
177	mov.l		%d0,-(%sp)
178	mov.l		(_060FPSP_TABLE-0x80+_off_trap,%pc),%d0
179	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
180	mov.l		0x4(%sp),%d0
181	rtd		&0x4
182
183	global		_real_trace
184_real_trace:
185	mov.l		%d0,-(%sp)
186	mov.l		(_060FPSP_TABLE-0x80+_off_trace,%pc),%d0
187	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
188	mov.l		0x4(%sp),%d0
189	rtd		&0x4
190
191	global		_real_access
192_real_access:
193	mov.l		%d0,-(%sp)
194	mov.l		(_060FPSP_TABLE-0x80+_off_access,%pc),%d0
195	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
196	mov.l		0x4(%sp),%d0
197	rtd		&0x4
198
199#######################################
200
201	global		_imem_read
202_imem_read:
203	mov.l		%d0,-(%sp)
204	mov.l		(_060FPSP_TABLE-0x80+_off_imr,%pc),%d0
205	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
206	mov.l		0x4(%sp),%d0
207	rtd		&0x4
208
209	global		_dmem_read
210_dmem_read:
211	mov.l		%d0,-(%sp)
212	mov.l		(_060FPSP_TABLE-0x80+_off_dmr,%pc),%d0
213	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
214	mov.l		0x4(%sp),%d0
215	rtd		&0x4
216
217	global		_dmem_write
218_dmem_write:
219	mov.l		%d0,-(%sp)
220	mov.l		(_060FPSP_TABLE-0x80+_off_dmw,%pc),%d0
221	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
222	mov.l		0x4(%sp),%d0
223	rtd		&0x4
224
225	global		_imem_read_word
226_imem_read_word:
227	mov.l		%d0,-(%sp)
228	mov.l		(_060FPSP_TABLE-0x80+_off_irw,%pc),%d0
229	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
230	mov.l		0x4(%sp),%d0
231	rtd		&0x4
232
233	global		_imem_read_long
234_imem_read_long:
235	mov.l		%d0,-(%sp)
236	mov.l		(_060FPSP_TABLE-0x80+_off_irl,%pc),%d0
237	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
238	mov.l		0x4(%sp),%d0
239	rtd		&0x4
240
241	global		_dmem_read_byte
242_dmem_read_byte:
243	mov.l		%d0,-(%sp)
244	mov.l		(_060FPSP_TABLE-0x80+_off_drb,%pc),%d0
245	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
246	mov.l		0x4(%sp),%d0
247	rtd		&0x4
248
249	global		_dmem_read_word
250_dmem_read_word:
251	mov.l		%d0,-(%sp)
252	mov.l		(_060FPSP_TABLE-0x80+_off_drw,%pc),%d0
253	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
254	mov.l		0x4(%sp),%d0
255	rtd		&0x4
256
257	global		_dmem_read_long
258_dmem_read_long:
259	mov.l		%d0,-(%sp)
260	mov.l		(_060FPSP_TABLE-0x80+_off_drl,%pc),%d0
261	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
262	mov.l		0x4(%sp),%d0
263	rtd		&0x4
264
265	global		_dmem_write_byte
266_dmem_write_byte:
267	mov.l		%d0,-(%sp)
268	mov.l		(_060FPSP_TABLE-0x80+_off_dwb,%pc),%d0
269	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
270	mov.l		0x4(%sp),%d0
271	rtd		&0x4
272
273	global		_dmem_write_word
274_dmem_write_word:
275	mov.l		%d0,-(%sp)
276	mov.l		(_060FPSP_TABLE-0x80+_off_dww,%pc),%d0
277	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
278	mov.l		0x4(%sp),%d0
279	rtd		&0x4
280
281	global		_dmem_write_long
282_dmem_write_long:
283	mov.l		%d0,-(%sp)
284	mov.l		(_060FPSP_TABLE-0x80+_off_dwl,%pc),%d0
285	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
286	mov.l		0x4(%sp),%d0
287	rtd		&0x4
288
289#
290# This file contains a set of define statements for constants
291# in order to promote readability within the corecode itself.
292#
293
294set LOCAL_SIZE,		192			# stack frame size(bytes)
295set LV,			-LOCAL_SIZE		# stack offset
296
297set EXC_SR,		0x4			# stack status register
298set EXC_PC,		0x6			# stack pc
299set EXC_VOFF,		0xa			# stacked vector offset
300set EXC_EA,		0xc			# stacked <ea>
301
302set EXC_FP,		0x0			# frame pointer
303
304set EXC_AREGS,		-68			# offset of all address regs
305set EXC_DREGS,		-100			# offset of all data regs
306set EXC_FPREGS,		-36			# offset of all fp regs
307
308set EXC_A7,		EXC_AREGS+(7*4)		# offset of saved a7
309set OLD_A7,		EXC_AREGS+(6*4)		# extra copy of saved a7
310set EXC_A6,		EXC_AREGS+(6*4)		# offset of saved a6
311set EXC_A5,		EXC_AREGS+(5*4)
312set EXC_A4,		EXC_AREGS+(4*4)
313set EXC_A3,		EXC_AREGS+(3*4)
314set EXC_A2,		EXC_AREGS+(2*4)
315set EXC_A1,		EXC_AREGS+(1*4)
316set EXC_A0,		EXC_AREGS+(0*4)
317set EXC_D7,		EXC_DREGS+(7*4)
318set EXC_D6,		EXC_DREGS+(6*4)
319set EXC_D5,		EXC_DREGS+(5*4)
320set EXC_D4,		EXC_DREGS+(4*4)
321set EXC_D3,		EXC_DREGS+(3*4)
322set EXC_D2,		EXC_DREGS+(2*4)
323set EXC_D1,		EXC_DREGS+(1*4)
324set EXC_D0,		EXC_DREGS+(0*4)
325
326set EXC_FP0,		EXC_FPREGS+(0*12)	# offset of saved fp0
327set EXC_FP1,		EXC_FPREGS+(1*12)	# offset of saved fp1
328set EXC_FP2,		EXC_FPREGS+(2*12)	# offset of saved fp2 (not used)
329
330set FP_SCR1,		LV+80			# fp scratch 1
331set FP_SCR1_EX,		FP_SCR1+0
332set FP_SCR1_SGN,	FP_SCR1+2
333set FP_SCR1_HI,		FP_SCR1+4
334set FP_SCR1_LO,		FP_SCR1+8
335
336set FP_SCR0,		LV+68			# fp scratch 0
337set FP_SCR0_EX,		FP_SCR0+0
338set FP_SCR0_SGN,	FP_SCR0+2
339set FP_SCR0_HI,		FP_SCR0+4
340set FP_SCR0_LO,		FP_SCR0+8
341
342set FP_DST,		LV+56			# fp destination operand
343set FP_DST_EX,		FP_DST+0
344set FP_DST_SGN,		FP_DST+2
345set FP_DST_HI,		FP_DST+4
346set FP_DST_LO,		FP_DST+8
347
348set FP_SRC,		LV+44			# fp source operand
349set FP_SRC_EX,		FP_SRC+0
350set FP_SRC_SGN,		FP_SRC+2
351set FP_SRC_HI,		FP_SRC+4
352set FP_SRC_LO,		FP_SRC+8
353
354set USER_FPIAR,		LV+40			# FP instr address register
355
356set USER_FPSR,		LV+36			# FP status register
357set FPSR_CC,		USER_FPSR+0		# FPSR condition codes
358set FPSR_QBYTE,		USER_FPSR+1		# FPSR qoutient byte
359set FPSR_EXCEPT,	USER_FPSR+2		# FPSR exception status byte
360set FPSR_AEXCEPT,	USER_FPSR+3		# FPSR accrued exception byte
361
362set USER_FPCR,		LV+32			# FP control register
363set FPCR_ENABLE,	USER_FPCR+2		# FPCR exception enable
364set FPCR_MODE,		USER_FPCR+3		# FPCR rounding mode control
365
366set L_SCR3,		LV+28			# integer scratch 3
367set L_SCR2,		LV+24			# integer scratch 2
368set L_SCR1,		LV+20			# integer scratch 1
369
370set STORE_FLG,		LV+19			# flag: operand store (ie. not fcmp/ftst)
371
372set EXC_TEMP2,		LV+24			# temporary space
373set EXC_TEMP,		LV+16			# temporary space
374
375set DTAG,		LV+15			# destination operand type
376set STAG,		LV+14			# source operand type
377
378set SPCOND_FLG,		LV+10			# flag: special case (see below)
379
380set EXC_CC,		LV+8			# saved condition codes
381set EXC_EXTWPTR,	LV+4			# saved current PC (active)
382set EXC_EXTWORD,	LV+2			# saved extension word
383set EXC_CMDREG,		LV+2			# saved extension word
384set EXC_OPWORD,		LV+0			# saved operation word
385
386################################
387
388# Helpful macros
389
390set FTEMP,		0			# offsets within an
391set FTEMP_EX,		0			# extended precision
392set FTEMP_SGN,		2			# value saved in memory.
393set FTEMP_HI,		4
394set FTEMP_LO,		8
395set FTEMP_GRS,		12
396
397set LOCAL,		0			# offsets within an
398set LOCAL_EX,		0			# extended precision
399set LOCAL_SGN,		2			# value saved in memory.
400set LOCAL_HI,		4
401set LOCAL_LO,		8
402set LOCAL_GRS,		12
403
404set DST,		0			# offsets within an
405set DST_EX,		0			# extended precision
406set DST_HI,		4			# value saved in memory.
407set DST_LO,		8
408
409set SRC,		0			# offsets within an
410set SRC_EX,		0			# extended precision
411set SRC_HI,		4			# value saved in memory.
412set SRC_LO,		8
413
414set SGL_LO,		0x3f81			# min sgl prec exponent
415set SGL_HI,		0x407e			# max sgl prec exponent
416set DBL_LO,		0x3c01			# min dbl prec exponent
417set DBL_HI,		0x43fe			# max dbl prec exponent
418set EXT_LO,		0x0			# min ext prec exponent
419set EXT_HI,		0x7ffe			# max ext prec exponent
420
421set EXT_BIAS,		0x3fff			# extended precision bias
422set SGL_BIAS,		0x007f			# single precision bias
423set DBL_BIAS,		0x03ff			# double precision bias
424
425set NORM,		0x00			# operand type for STAG/DTAG
426set ZERO,		0x01			# operand type for STAG/DTAG
427set INF,		0x02			# operand type for STAG/DTAG
428set QNAN,		0x03			# operand type for STAG/DTAG
429set DENORM,		0x04			# operand type for STAG/DTAG
430set SNAN,		0x05			# operand type for STAG/DTAG
431set UNNORM,		0x06			# operand type for STAG/DTAG
432
433##################
434# FPSR/FPCR bits #
435##################
436set neg_bit,		0x3			# negative result
437set z_bit,		0x2			# zero result
438set inf_bit,		0x1			# infinite result
439set nan_bit,		0x0			# NAN result
440
441set q_sn_bit,		0x7			# sign bit of quotient byte
442
443set bsun_bit,		7			# branch on unordered
444set snan_bit,		6			# signalling NAN
445set operr_bit,		5			# operand error
446set ovfl_bit,		4			# overflow
447set unfl_bit,		3			# underflow
448set dz_bit,		2			# divide by zero
449set inex2_bit,		1			# inexact result 2
450set inex1_bit,		0			# inexact result 1
451
452set aiop_bit,		7			# accrued inexact operation bit
453set aovfl_bit,		6			# accrued overflow bit
454set aunfl_bit,		5			# accrued underflow bit
455set adz_bit,		4			# accrued dz bit
456set ainex_bit,		3			# accrued inexact bit
457
458#############################
459# FPSR individual bit masks #
460#############################
461set neg_mask,		0x08000000		# negative bit mask (lw)
462set inf_mask,		0x02000000		# infinity bit mask (lw)
463set z_mask,		0x04000000		# zero bit mask (lw)
464set nan_mask,		0x01000000		# nan bit mask (lw)
465
466set neg_bmask,		0x08			# negative bit mask (byte)
467set inf_bmask,		0x02			# infinity bit mask (byte)
468set z_bmask,		0x04			# zero bit mask (byte)
469set nan_bmask,		0x01			# nan bit mask (byte)
470
471set bsun_mask,		0x00008000		# bsun exception mask
472set snan_mask,		0x00004000		# snan exception mask
473set operr_mask,		0x00002000		# operr exception mask
474set ovfl_mask,		0x00001000		# overflow exception mask
475set unfl_mask,		0x00000800		# underflow exception mask
476set dz_mask,		0x00000400		# dz exception mask
477set inex2_mask,		0x00000200		# inex2 exception mask
478set inex1_mask,		0x00000100		# inex1 exception mask
479
480set aiop_mask,		0x00000080		# accrued illegal operation
481set aovfl_mask,		0x00000040		# accrued overflow
482set aunfl_mask,		0x00000020		# accrued underflow
483set adz_mask,		0x00000010		# accrued divide by zero
484set ainex_mask,		0x00000008		# accrued inexact
485
486######################################
487# FPSR combinations used in the FPSP #
488######################################
489set dzinf_mask,		inf_mask+dz_mask+adz_mask
490set opnan_mask,		nan_mask+operr_mask+aiop_mask
491set nzi_mask,		0x01ffffff		#clears N, Z, and I
492set unfinx_mask,	unfl_mask+inex2_mask+aunfl_mask+ainex_mask
493set unf2inx_mask,	unfl_mask+inex2_mask+ainex_mask
494set ovfinx_mask,	ovfl_mask+inex2_mask+aovfl_mask+ainex_mask
495set inx1a_mask,		inex1_mask+ainex_mask
496set inx2a_mask,		inex2_mask+ainex_mask
497set snaniop_mask,	nan_mask+snan_mask+aiop_mask
498set snaniop2_mask,	snan_mask+aiop_mask
499set naniop_mask,	nan_mask+aiop_mask
500set neginf_mask,	neg_mask+inf_mask
501set infaiop_mask,	inf_mask+aiop_mask
502set negz_mask,		neg_mask+z_mask
503set opaop_mask,		operr_mask+aiop_mask
504set unfl_inx_mask,	unfl_mask+aunfl_mask+ainex_mask
505set ovfl_inx_mask,	ovfl_mask+aovfl_mask+ainex_mask
506
507#########
508# misc. #
509#########
510set rnd_stky_bit,	29			# stky bit pos in longword
511
512set sign_bit,		0x7			# sign bit
513set signan_bit,		0x6			# signalling nan bit
514
515set sgl_thresh,		0x3f81			# minimum sgl exponent
516set dbl_thresh,		0x3c01			# minimum dbl exponent
517
518set x_mode,		0x0			# extended precision
519set s_mode,		0x4			# single precision
520set d_mode,		0x8			# double precision
521
522set rn_mode,		0x0			# round-to-nearest
523set rz_mode,		0x1			# round-to-zero
524set rm_mode,		0x2			# round-tp-minus-infinity
525set rp_mode,		0x3			# round-to-plus-infinity
526
527set mantissalen,	64			# length of mantissa in bits
528
529set BYTE,		1			# len(byte) == 1 byte
530set WORD,		2			# len(word) == 2 bytes
531set LONG,		4			# len(longword) == 2 bytes
532
533set BSUN_VEC,		0xc0			# bsun    vector offset
534set INEX_VEC,		0xc4			# inexact vector offset
535set DZ_VEC,		0xc8			# dz      vector offset
536set UNFL_VEC,		0xcc			# unfl    vector offset
537set OPERR_VEC,		0xd0			# operr   vector offset
538set OVFL_VEC,		0xd4			# ovfl    vector offset
539set SNAN_VEC,		0xd8			# snan    vector offset
540
541###########################
542# SPecial CONDition FLaGs #
543###########################
544set ftrapcc_flg,	0x01			# flag bit: ftrapcc exception
545set fbsun_flg,		0x02			# flag bit: bsun exception
546set mia7_flg,		0x04			# flag bit: (a7)+ <ea>
547set mda7_flg,		0x08			# flag bit: -(a7) <ea>
548set fmovm_flg,		0x40			# flag bit: fmovm instruction
549set immed_flg,		0x80			# flag bit: &<data> <ea>
550
551set ftrapcc_bit,	0x0
552set fbsun_bit,		0x1
553set mia7_bit,		0x2
554set mda7_bit,		0x3
555set immed_bit,		0x7
556
557##################################
558# TRANSCENDENTAL "LAST-OP" FLAGS #
559##################################
560set FMUL_OP,		0x0			# fmul instr performed last
561set FDIV_OP,		0x1			# fdiv performed last
562set FADD_OP,		0x2			# fadd performed last
563set FMOV_OP,		0x3			# fmov performed last
564
565#############
566# CONSTANTS #
567#############
568T1:	long		0x40C62D38,0xD3D64634	# 16381 LOG2 LEAD
569T2:	long		0x3D6F90AE,0xB1E75CC7	# 16381 LOG2 TRAIL
570
571PI:	long		0x40000000,0xC90FDAA2,0x2168C235,0x00000000
572PIBY2:	long		0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000
573
574TWOBYPI:
575	long		0x3FE45F30,0x6DC9C883
576
577#########################################################################
578# XDEF ****************************************************************	#
579#	_fpsp_ovfl(): 060FPSP entry point for FP Overflow exception.	#
580#									#
581#	This handler should be the first code executed upon taking the	#
582#	FP Overflow exception in an operating system.			#
583#									#
584# XREF ****************************************************************	#
585#	_imem_read_long() - read instruction longword			#
586#	fix_skewed_ops() - adjust src operand in fsave frame		#
587#	set_tag_x() - determine optype of src/dst operands		#
588#	store_fpreg() - store opclass 0 or 2 result to FP regfile	#
589#	unnorm_fix() - change UNNORM operands to NORM or ZERO		#
590#	load_fpn2() - load dst operand from FP regfile			#
591#	fout() - emulate an opclass 3 instruction			#
592#	tbl_unsupp - add of table of emulation routines for opclass 0,2	#
593#	_fpsp_done() - "callout" for 060FPSP exit (all work done!)	#
594#	_real_ovfl() - "callout" for Overflow exception enabled code	#
595#	_real_inex() - "callout" for Inexact exception enabled code	#
596#	_real_trace() - "callout" for Trace exception code		#
597#									#
598# INPUT ***************************************************************	#
599#	- The system stack contains the FP Ovfl exception stack frame	#
600#	- The fsave frame contains the source operand			#
601#									#
602# OUTPUT **************************************************************	#
603#	Overflow Exception enabled:					#
604#	- The system stack is unchanged					#
605#	- The fsave frame contains the adjusted src op for opclass 0,2	#
606#	Overflow Exception disabled:					#
607#	- The system stack is unchanged					#
608#	- The "exception present" flag in the fsave frame is cleared	#
609#									#
610# ALGORITHM ***********************************************************	#
611#	On the 060, if an FP overflow is present as the result of any	#
612# instruction, the 060 will take an overflow exception whether the	#
613# exception is enabled or disabled in the FPCR. For the disabled case,	#
614# This handler emulates the instruction to determine what the correct	#
615# default result should be for the operation. This default result is	#
616# then stored in either the FP regfile, data regfile, or memory.	#
617# Finally, the handler exits through the "callout" _fpsp_done()		#
618# denoting that no exceptional conditions exist within the machine.	#
619#	If the exception is enabled, then this handler must create the	#
620# exceptional operand and plave it in the fsave state frame, and store	#
621# the default result (only if the instruction is opclass 3). For	#
622# exceptions enabled, this handler must exit through the "callout"	#
623# _real_ovfl() so that the operating system enabled overflow handler	#
624# can handle this case.							#
625#	Two other conditions exist. First, if overflow was disabled	#
626# but the inexact exception was enabled, this handler must exit		#
627# through the "callout" _real_inex() regardless of whether the result	#
628# was inexact.								#
629#	Also, in the case of an opclass three instruction where		#
630# overflow was disabled and the trace exception was enabled, this	#
631# handler must exit through the "callout" _real_trace().		#
632#									#
633#########################################################################
634
635	global		_fpsp_ovfl
636_fpsp_ovfl:
637
638#$#	sub.l		&24,%sp			# make room for src/dst
639
640	link.w		%a6,&-LOCAL_SIZE	# init stack frame
641
642	fsave		FP_SRC(%a6)		# grab the "busy" frame
643
644	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
645	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
646	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
647
648# the FPIAR holds the "current PC" of the faulting instruction
649	mov.l		USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
650	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
651	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
652	bsr.l		_imem_read_long		# fetch the instruction words
653	mov.l		%d0,EXC_OPWORD(%a6)
654
655##############################################################################
656
657	btst		&0x5,EXC_CMDREG(%a6)	# is instr an fmove out?
658	bne.w		fovfl_out
659
660
661	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
662	bsr.l		fix_skewed_ops		# fix src op
663
664# since, I believe, only NORMs and DENORMs can come through here,
665# maybe we can avoid the subroutine call.
666	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
667	bsr.l		set_tag_x		# tag the operand type
668	mov.b		%d0,STAG(%a6)		# maybe NORM,DENORM
669
670# bit five of the fp extension word separates the monadic and dyadic operations
671# that can pass through fpsp_ovfl(). remember that fcmp, ftst, and fsincos
672# will never take this exception.
673	btst		&0x5,1+EXC_CMDREG(%a6)	# is operation monadic or dyadic?
674	beq.b		fovfl_extract		# monadic
675
676	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
677	bsr.l		load_fpn2		# load dst into FP_DST
678
679	lea		FP_DST(%a6),%a0		# pass: ptr to dst op
680	bsr.l		set_tag_x		# tag the operand type
681	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
682	bne.b		fovfl_op2_done		# no
683	bsr.l		unnorm_fix		# yes; convert to NORM,DENORM,or ZERO
684fovfl_op2_done:
685	mov.b		%d0,DTAG(%a6)		# save dst optype tag
686
687fovfl_extract:
688
689#$#	mov.l		FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
690#$#	mov.l		FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
691#$#	mov.l		FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
692#$#	mov.l		FP_DST_EX(%a6),TRAP_DSTOP_EX(%a6)
693#$#	mov.l		FP_DST_HI(%a6),TRAP_DSTOP_HI(%a6)
694#$#	mov.l		FP_DST_LO(%a6),TRAP_DSTOP_LO(%a6)
695
696	clr.l		%d0
697	mov.b		FPCR_MODE(%a6),%d0	# pass rnd prec/mode
698
699	mov.b		1+EXC_CMDREG(%a6),%d1
700	andi.w		&0x007f,%d1		# extract extension
701
702	andi.l		&0x00ff01ff,USER_FPSR(%a6) # zero all but accured field
703
704	fmov.l		&0x0,%fpcr		# zero current control regs
705	fmov.l		&0x0,%fpsr
706
707	lea		FP_SRC(%a6),%a0
708	lea		FP_DST(%a6),%a1
709
710# maybe we can make these entry points ONLY the OVFL entry points of each routine.
711	mov.l		(tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
712	jsr		(tbl_unsupp.l,%pc,%d1.l*1)
713
714# the operation has been emulated. the result is in fp0.
715# the EXOP, if an exception occurred, is in fp1.
716# we must save the default result regardless of whether
717# traps are enabled or disabled.
718	bfextu		EXC_CMDREG(%a6){&6:&3},%d0
719	bsr.l		store_fpreg
720
721# the exceptional possibilities we have left ourselves with are ONLY overflow
722# and inexact. and, the inexact is such that overflow occurred and was disabled
723# but inexact was enabled.
724	btst		&ovfl_bit,FPCR_ENABLE(%a6)
725	bne.b		fovfl_ovfl_on
726
727	btst		&inex2_bit,FPCR_ENABLE(%a6)
728	bne.b		fovfl_inex_on
729
730	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
731	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
732	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
733
734	unlk		%a6
735#$#	add.l		&24,%sp
736	bra.l		_fpsp_done
737
738# overflow is enabled AND overflow, of course, occurred. so, we have the EXOP
739# in fp1. now, simply jump to _real_ovfl()!
740fovfl_ovfl_on:
741	fmovm.x		&0x40,FP_SRC(%a6)	# save EXOP (fp1) to stack
742
743	mov.w		&0xe005,2+FP_SRC(%a6)	# save exc status
744
745	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
746	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
747	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
748
749	frestore	FP_SRC(%a6)		# do this after fmovm,other f<op>s!
750
751	unlk		%a6
752
753	bra.l		_real_ovfl
754
755# overflow occurred but is disabled. meanwhile, inexact is enabled. Therefore,
756# we must jump to real_inex().
757fovfl_inex_on:
758
759	fmovm.x		&0x40,FP_SRC(%a6)	# save EXOP (fp1) to stack
760
761	mov.b		&0xc4,1+EXC_VOFF(%a6)	# vector offset = 0xc4
762	mov.w		&0xe001,2+FP_SRC(%a6)	# save exc status
763
764	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
765	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
766	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
767
768	frestore	FP_SRC(%a6)		# do this after fmovm,other f<op>s!
769
770	unlk		%a6
771
772	bra.l		_real_inex
773
774########################################################################
775fovfl_out:
776
777
778#$#	mov.l		FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
779#$#	mov.l		FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
780#$#	mov.l		FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
781
782# the src operand is definitely a NORM(!), so tag it as such
783	mov.b		&NORM,STAG(%a6)		# set src optype tag
784
785	clr.l		%d0
786	mov.b		FPCR_MODE(%a6),%d0	# pass rnd prec/mode
787
788	and.l		&0xffff00ff,USER_FPSR(%a6) # zero all but accured field
789
790	fmov.l		&0x0,%fpcr		# zero current control regs
791	fmov.l		&0x0,%fpsr
792
793	lea		FP_SRC(%a6),%a0		# pass ptr to src operand
794
795	bsr.l		fout
796
797	btst		&ovfl_bit,FPCR_ENABLE(%a6)
798	bne.w		fovfl_ovfl_on
799
800	btst		&inex2_bit,FPCR_ENABLE(%a6)
801	bne.w		fovfl_inex_on
802
803	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
804	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
805	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
806
807	unlk		%a6
808#$#	add.l		&24,%sp
809
810	btst		&0x7,(%sp)		# is trace on?
811	beq.l		_fpsp_done		# no
812
813	fmov.l		%fpiar,0x8(%sp)		# "Current PC" is in FPIAR
814	mov.w		&0x2024,0x6(%sp)	# stk fmt = 0x2; voff = 0x024
815	bra.l		_real_trace
816
817#########################################################################
818# XDEF ****************************************************************	#
819#	_fpsp_unfl(): 060FPSP entry point for FP Underflow exception.	#
820#									#
821#	This handler should be the first code executed upon taking the	#
822#	FP Underflow exception in an operating system.			#
823#									#
824# XREF ****************************************************************	#
825#	_imem_read_long() - read instruction longword			#
826#	fix_skewed_ops() - adjust src operand in fsave frame		#
827#	set_tag_x() - determine optype of src/dst operands		#
828#	store_fpreg() - store opclass 0 or 2 result to FP regfile	#
829#	unnorm_fix() - change UNNORM operands to NORM or ZERO		#
830#	load_fpn2() - load dst operand from FP regfile			#
831#	fout() - emulate an opclass 3 instruction			#
832#	tbl_unsupp - add of table of emulation routines for opclass 0,2	#
833#	_fpsp_done() - "callout" for 060FPSP exit (all work done!)	#
834#	_real_ovfl() - "callout" for Overflow exception enabled code	#
835#	_real_inex() - "callout" for Inexact exception enabled code	#
836#	_real_trace() - "callout" for Trace exception code		#
837#									#
838# INPUT ***************************************************************	#
839#	- The system stack contains the FP Unfl exception stack frame	#
840#	- The fsave frame contains the source operand			#
841#									#
842# OUTPUT **************************************************************	#
843#	Underflow Exception enabled:					#
844#	- The system stack is unchanged					#
845#	- The fsave frame contains the adjusted src op for opclass 0,2	#
846#	Underflow Exception disabled:					#
847#	- The system stack is unchanged					#
848#	- The "exception present" flag in the fsave frame is cleared	#
849#									#
850# ALGORITHM ***********************************************************	#
851#	On the 060, if an FP underflow is present as the result of any	#
852# instruction, the 060 will take an underflow exception whether the	#
853# exception is enabled or disabled in the FPCR. For the disabled case,	#
854# This handler emulates the instruction to determine what the correct	#
855# default result should be for the operation. This default result is	#
856# then stored in either the FP regfile, data regfile, or memory.	#
857# Finally, the handler exits through the "callout" _fpsp_done()		#
858# denoting that no exceptional conditions exist within the machine.	#
859#	If the exception is enabled, then this handler must create the	#
860# exceptional operand and plave it in the fsave state frame, and store	#
861# the default result (only if the instruction is opclass 3). For	#
862# exceptions enabled, this handler must exit through the "callout"	#
863# _real_unfl() so that the operating system enabled overflow handler	#
864# can handle this case.							#
865#	Two other conditions exist. First, if underflow was disabled	#
866# but the inexact exception was enabled and the result was inexact,	#
867# this handler must exit through the "callout" _real_inex().		#
868# was inexact.								#
869#	Also, in the case of an opclass three instruction where		#
870# underflow was disabled and the trace exception was enabled, this	#
871# handler must exit through the "callout" _real_trace().		#
872#									#
873#########################################################################
874
875	global		_fpsp_unfl
876_fpsp_unfl:
877
878#$#	sub.l		&24,%sp			# make room for src/dst
879
880	link.w		%a6,&-LOCAL_SIZE	# init stack frame
881
882	fsave		FP_SRC(%a6)		# grab the "busy" frame
883
884	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
885	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
886	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
887
888# the FPIAR holds the "current PC" of the faulting instruction
889	mov.l		USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
890	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
891	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
892	bsr.l		_imem_read_long		# fetch the instruction words
893	mov.l		%d0,EXC_OPWORD(%a6)
894
895##############################################################################
896
897	btst		&0x5,EXC_CMDREG(%a6)	# is instr an fmove out?
898	bne.w		funfl_out
899
900
901	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
902	bsr.l		fix_skewed_ops		# fix src op
903
904	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
905	bsr.l		set_tag_x		# tag the operand type
906	mov.b		%d0,STAG(%a6)		# maybe NORM,DENORM
907
908# bit five of the fp ext word separates the monadic and dyadic operations
909# that can pass through fpsp_unfl(). remember that fcmp, and ftst
910# will never take this exception.
911	btst		&0x5,1+EXC_CMDREG(%a6)	# is op monadic or dyadic?
912	beq.b		funfl_extract		# monadic
913
914# now, what's left that's not dyadic is fsincos. we can distinguish it
915# from all dyadics by the '0110xxx pattern
916	btst		&0x4,1+EXC_CMDREG(%a6)	# is op an fsincos?
917	bne.b		funfl_extract		# yes
918
919	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
920	bsr.l		load_fpn2		# load dst into FP_DST
921
922	lea		FP_DST(%a6),%a0		# pass: ptr to dst op
923	bsr.l		set_tag_x		# tag the operand type
924	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
925	bne.b		funfl_op2_done		# no
926	bsr.l		unnorm_fix		# yes; convert to NORM,DENORM,or ZERO
927funfl_op2_done:
928	mov.b		%d0,DTAG(%a6)		# save dst optype tag
929
930funfl_extract:
931
932#$#	mov.l		FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
933#$#	mov.l		FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
934#$#	mov.l		FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
935#$#	mov.l		FP_DST_EX(%a6),TRAP_DSTOP_EX(%a6)
936#$#	mov.l		FP_DST_HI(%a6),TRAP_DSTOP_HI(%a6)
937#$#	mov.l		FP_DST_LO(%a6),TRAP_DSTOP_LO(%a6)
938
939	clr.l		%d0
940	mov.b		FPCR_MODE(%a6),%d0	# pass rnd prec/mode
941
942	mov.b		1+EXC_CMDREG(%a6),%d1
943	andi.w		&0x007f,%d1		# extract extension
944
945	andi.l		&0x00ff01ff,USER_FPSR(%a6)
946
947	fmov.l		&0x0,%fpcr		# zero current control regs
948	fmov.l		&0x0,%fpsr
949
950	lea		FP_SRC(%a6),%a0
951	lea		FP_DST(%a6),%a1
952
953# maybe we can make these entry points ONLY the OVFL entry points of each routine.
954	mov.l		(tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
955	jsr		(tbl_unsupp.l,%pc,%d1.l*1)
956
957	bfextu		EXC_CMDREG(%a6){&6:&3},%d0
958	bsr.l		store_fpreg
959
960# The `060 FPU multiplier hardware is such that if the result of a
961# multiply operation is the smallest possible normalized number
962# (0x00000000_80000000_00000000), then the machine will take an
963# underflow exception. Since this is incorrect, we need to check
964# if our emulation, after re-doing the operation, decided that
965# no underflow was called for. We do these checks only in
966# funfl_{unfl,inex}_on() because w/ both exceptions disabled, this
967# special case will simply exit gracefully with the correct result.
968
969# the exceptional possibilities we have left ourselves with are ONLY overflow
970# and inexact. and, the inexact is such that overflow occurred and was disabled
971# but inexact was enabled.
972	btst		&unfl_bit,FPCR_ENABLE(%a6)
973	bne.b		funfl_unfl_on
974
975funfl_chkinex:
976	btst		&inex2_bit,FPCR_ENABLE(%a6)
977	bne.b		funfl_inex_on
978
979funfl_exit:
980	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
981	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
982	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
983
984	unlk		%a6
985#$#	add.l		&24,%sp
986	bra.l		_fpsp_done
987
988# overflow is enabled AND overflow, of course, occurred. so, we have the EXOP
989# in fp1 (don't forget to save fp0). what to do now?
990# well, we simply have to get to go to _real_unfl()!
991funfl_unfl_on:
992
993# The `060 FPU multiplier hardware is such that if the result of a
994# multiply operation is the smallest possible normalized number
995# (0x00000000_80000000_00000000), then the machine will take an
996# underflow exception. Since this is incorrect, we check here to see
997# if our emulation, after re-doing the operation, decided that
998# no underflow was called for.
999	btst		&unfl_bit,FPSR_EXCEPT(%a6)
1000	beq.w		funfl_chkinex
1001
1002funfl_unfl_on2:
1003	fmovm.x		&0x40,FP_SRC(%a6)	# save EXOP (fp1) to stack
1004
1005	mov.w		&0xe003,2+FP_SRC(%a6)	# save exc status
1006
1007	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
1008	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1009	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1010
1011	frestore	FP_SRC(%a6)		# do this after fmovm,other f<op>s!
1012
1013	unlk		%a6
1014
1015	bra.l		_real_unfl
1016
1017# underflow occurred but is disabled. meanwhile, inexact is enabled. Therefore,
1018# we must jump to real_inex().
1019funfl_inex_on:
1020
1021# The `060 FPU multiplier hardware is such that if the result of a
1022# multiply operation is the smallest possible normalized number
1023# (0x00000000_80000000_00000000), then the machine will take an
1024# underflow exception.
1025# But, whether bogus or not, if inexact is enabled AND it occurred,
1026# then we have to branch to real_inex.
1027
1028	btst		&inex2_bit,FPSR_EXCEPT(%a6)
1029	beq.w		funfl_exit
1030
1031funfl_inex_on2:
1032
1033	fmovm.x		&0x40,FP_SRC(%a6)	# save EXOP to stack
1034
1035	mov.b		&0xc4,1+EXC_VOFF(%a6)	# vector offset = 0xc4
1036	mov.w		&0xe001,2+FP_SRC(%a6)	# save exc status
1037
1038	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
1039	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1040	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1041
1042	frestore	FP_SRC(%a6)		# do this after fmovm,other f<op>s!
1043
1044	unlk		%a6
1045
1046	bra.l		_real_inex
1047
1048#######################################################################
1049funfl_out:
1050
1051
1052#$#	mov.l		FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
1053#$#	mov.l		FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
1054#$#	mov.l		FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
1055
1056# the src operand is definitely a NORM(!), so tag it as such
1057	mov.b		&NORM,STAG(%a6)		# set src optype tag
1058
1059	clr.l		%d0
1060	mov.b		FPCR_MODE(%a6),%d0	# pass rnd prec/mode
1061
1062	and.l		&0xffff00ff,USER_FPSR(%a6) # zero all but accured field
1063
1064	fmov.l		&0x0,%fpcr		# zero current control regs
1065	fmov.l		&0x0,%fpsr
1066
1067	lea		FP_SRC(%a6),%a0		# pass ptr to src operand
1068
1069	bsr.l		fout
1070
1071	btst		&unfl_bit,FPCR_ENABLE(%a6)
1072	bne.w		funfl_unfl_on2
1073
1074	btst		&inex2_bit,FPCR_ENABLE(%a6)
1075	bne.w		funfl_inex_on2
1076
1077	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
1078	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1079	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1080
1081	unlk		%a6
1082#$#	add.l		&24,%sp
1083
1084	btst		&0x7,(%sp)		# is trace on?
1085	beq.l		_fpsp_done		# no
1086
1087	fmov.l		%fpiar,0x8(%sp)		# "Current PC" is in FPIAR
1088	mov.w		&0x2024,0x6(%sp)	# stk fmt = 0x2; voff = 0x024
1089	bra.l		_real_trace
1090
1091#########################################################################
1092# XDEF ****************************************************************	#
1093#	_fpsp_unsupp(): 060FPSP entry point for FP "Unimplemented	#
1094#		        Data Type" exception.				#
1095#									#
1096#	This handler should be the first code executed upon taking the	#
1097#	FP Unimplemented Data Type exception in an operating system.	#
1098#									#
1099# XREF ****************************************************************	#
1100#	_imem_read_{word,long}() - read instruction word/longword	#
1101#	fix_skewed_ops() - adjust src operand in fsave frame		#
1102#	set_tag_x() - determine optype of src/dst operands		#
1103#	store_fpreg() - store opclass 0 or 2 result to FP regfile	#
1104#	unnorm_fix() - change UNNORM operands to NORM or ZERO		#
1105#	load_fpn2() - load dst operand from FP regfile			#
1106#	load_fpn1() - load src operand from FP regfile			#
1107#	fout() - emulate an opclass 3 instruction			#
1108#	tbl_unsupp - add of table of emulation routines for opclass 0,2	#
1109#	_real_inex() - "callout" to operating system inexact handler	#
1110#	_fpsp_done() - "callout" for exit; work all done		#
1111#	_real_trace() - "callout" for Trace enabled exception		#
1112#	funimp_skew() - adjust fsave src ops to "incorrect" value	#
1113#	_real_snan() - "callout" for SNAN exception			#
1114#	_real_operr() - "callout" for OPERR exception			#
1115#	_real_ovfl() - "callout" for OVFL exception			#
1116#	_real_unfl() - "callout" for UNFL exception			#
1117#	get_packed() - fetch packed operand from memory			#
1118#									#
1119# INPUT ***************************************************************	#
1120#	- The system stack contains the "Unimp Data Type" stk frame	#
1121#	- The fsave frame contains the ssrc op (for UNNORM/DENORM)	#
1122#									#
1123# OUTPUT **************************************************************	#
1124#	If Inexact exception (opclass 3):				#
1125#	- The system stack is changed to an Inexact exception stk frame	#
1126#	If SNAN exception (opclass 3):					#
1127#	- The system stack is changed to an SNAN exception stk frame	#
1128#	If OPERR exception (opclass 3):					#
1129#	- The system stack is changed to an OPERR exception stk frame	#
1130#	If OVFL exception (opclass 3):					#
1131#	- The system stack is changed to an OVFL exception stk frame	#
1132#	If UNFL exception (opclass 3):					#
1133#	- The system stack is changed to an UNFL exception stack frame	#
1134#	If Trace exception enabled:					#
1135#	- The system stack is changed to a Trace exception stack frame	#
1136#	Else: (normal case)						#
1137#	- Correct result has been stored as appropriate			#
1138#									#
1139# ALGORITHM ***********************************************************	#
1140#	Two main instruction types can enter here: (1) DENORM or UNNORM	#
1141# unimplemented data types. These can be either opclass 0,2 or 3	#
1142# instructions, and (2) PACKED unimplemented data format instructions	#
1143# also of opclasses 0,2, or 3.						#
1144#	For UNNORM/DENORM opclass 0 and 2, the handler fetches the src	#
1145# operand from the fsave state frame and the dst operand (if dyadic)	#
1146# from the FP register file. The instruction is then emulated by	#
1147# choosing an emulation routine from a table of routines indexed by	#
1148# instruction type. Once the instruction has been emulated and result	#
1149# saved, then we check to see if any enabled exceptions resulted from	#
1150# instruction emulation. If none, then we exit through the "callout"	#
1151# _fpsp_done(). If there is an enabled FP exception, then we insert	#
1152# this exception into the FPU in the fsave state frame and then exit	#
1153# through _fpsp_done().							#
1154#	PACKED opclass 0 and 2 is similar in how the instruction is	#
1155# emulated and exceptions handled. The differences occur in how the	#
1156# handler loads the packed op (by calling get_packed() routine) and	#
1157# by the fact that a Trace exception could be pending for PACKED ops.	#
1158# If a Trace exception is pending, then the current exception stack	#
1159# frame is changed to a Trace exception stack frame and an exit is	#
1160# made through _real_trace().						#
1161#	For UNNORM/DENORM opclass 3, the actual move out to memory is	#
1162# performed by calling the routine fout(). If no exception should occur	#
1163# as the result of emulation, then an exit either occurs through	#
1164# _fpsp_done() or through _real_trace() if a Trace exception is pending	#
1165# (a Trace stack frame must be created here, too). If an FP exception	#
1166# should occur, then we must create an exception stack frame of that	#
1167# type and jump to either _real_snan(), _real_operr(), _real_inex(),	#
1168# _real_unfl(), or _real_ovfl() as appropriate. PACKED opclass 3	#
1169# emulation is performed in a similar manner.				#
1170#									#
1171#########################################################################
1172
1173#
1174# (1) DENORM and UNNORM (unimplemented) data types:
1175#
1176#				post-instruction
1177#				*****************
1178#				*      EA	*
1179#	 pre-instruction	*		*
1180#	*****************	*****************
1181#	* 0x0 *  0x0dc  *	* 0x3 *  0x0dc  *
1182#	*****************	*****************
1183#	*     Next	*	*     Next	*
1184#	*      PC	*	*      PC	*
1185#	*****************	*****************
1186#	*      SR	*	*      SR	*
1187#	*****************	*****************
1188#
1189# (2) PACKED format (unsupported) opclasses two and three:
1190#	*****************
1191#	*      EA	*
1192#	*		*
1193#	*****************
1194#	* 0x2 *  0x0dc	*
1195#	*****************
1196#	*     Next	*
1197#	*      PC	*
1198#	*****************
1199#	*      SR	*
1200#	*****************
1201#
1202	global		_fpsp_unsupp
1203_fpsp_unsupp:
1204
1205	link.w		%a6,&-LOCAL_SIZE	# init stack frame
1206
1207	fsave		FP_SRC(%a6)		# save fp state
1208
1209	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
1210	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
1211	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
1212
1213	btst		&0x5,EXC_SR(%a6)	# user or supervisor mode?
1214	bne.b		fu_s
1215fu_u:
1216	mov.l		%usp,%a0		# fetch user stack pointer
1217	mov.l		%a0,EXC_A7(%a6)		# save on stack
1218	bra.b		fu_cont
1219# if the exception is an opclass zero or two unimplemented data type
1220# exception, then the a7' calculated here is wrong since it doesn't
1221# stack an ea. however, we don't need an a7' for this case anyways.
1222fu_s:
1223	lea		0x4+EXC_EA(%a6),%a0	# load old a7'
1224	mov.l		%a0,EXC_A7(%a6)		# save on stack
1225
1226fu_cont:
1227
1228# the FPIAR holds the "current PC" of the faulting instruction
1229# the FPIAR should be set correctly for ALL exceptions passing through
1230# this point.
1231	mov.l		USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
1232	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
1233	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
1234	bsr.l		_imem_read_long		# fetch the instruction words
1235	mov.l		%d0,EXC_OPWORD(%a6)	# store OPWORD and EXTWORD
1236
1237############################
1238
1239	clr.b		SPCOND_FLG(%a6)		# clear special condition flag
1240
1241# Separate opclass three (fpn-to-mem) ops since they have a different
1242# stack frame and protocol.
1243	btst		&0x5,EXC_CMDREG(%a6)	# is it an fmove out?
1244	bne.w		fu_out			# yes
1245
1246# Separate packed opclass two instructions.
1247	bfextu		EXC_CMDREG(%a6){&0:&6},%d0
1248	cmpi.b		%d0,&0x13
1249	beq.w		fu_in_pack
1250
1251
1252# I'm not sure at this point what FPSR bits are valid for this instruction.
1253# so, since the emulation routines re-create them anyways, zero exception field
1254	andi.l		&0x00ff00ff,USER_FPSR(%a6) # zero exception field
1255
1256	fmov.l		&0x0,%fpcr		# zero current control regs
1257	fmov.l		&0x0,%fpsr
1258
1259# Opclass two w/ memory-to-fpn operation will have an incorrect extended
1260# precision format if the src format was single or double and the
1261# source data type was an INF, NAN, DENORM, or UNNORM
1262	lea		FP_SRC(%a6),%a0		# pass ptr to input
1263	bsr.l		fix_skewed_ops
1264
1265# we don't know whether the src operand or the dst operand (or both) is the
1266# UNNORM or DENORM. call the function that tags the operand type. if the
1267# input is an UNNORM, then convert it to a NORM, DENORM, or ZERO.
1268	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
1269	bsr.l		set_tag_x		# tag the operand type
1270	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
1271	bne.b		fu_op2			# no
1272	bsr.l		unnorm_fix		# yes; convert to NORM,DENORM,or ZERO
1273
1274fu_op2:
1275	mov.b		%d0,STAG(%a6)		# save src optype tag
1276
1277	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
1278
1279# bit five of the fp extension word separates the monadic and dyadic operations
1280# at this point
1281	btst		&0x5,1+EXC_CMDREG(%a6)	# is operation monadic or dyadic?
1282	beq.b		fu_extract		# monadic
1283	cmpi.b		1+EXC_CMDREG(%a6),&0x3a	# is operation an ftst?
1284	beq.b		fu_extract		# yes, so it's monadic, too
1285
1286	bsr.l		load_fpn2		# load dst into FP_DST
1287
1288	lea		FP_DST(%a6),%a0		# pass: ptr to dst op
1289	bsr.l		set_tag_x		# tag the operand type
1290	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
1291	bne.b		fu_op2_done		# no
1292	bsr.l		unnorm_fix		# yes; convert to NORM,DENORM,or ZERO
1293fu_op2_done:
1294	mov.b		%d0,DTAG(%a6)		# save dst optype tag
1295
1296fu_extract:
1297	clr.l		%d0
1298	mov.b		FPCR_MODE(%a6),%d0	# fetch rnd mode/prec
1299
1300	bfextu		1+EXC_CMDREG(%a6){&1:&7},%d1 # extract extension
1301
1302	lea		FP_SRC(%a6),%a0
1303	lea		FP_DST(%a6),%a1
1304
1305	mov.l		(tbl_unsupp.l,%pc,%d1.l*4),%d1 # fetch routine addr
1306	jsr		(tbl_unsupp.l,%pc,%d1.l*1)
1307
1308#
1309# Exceptions in order of precedence:
1310#	BSUN	: none
1311#	SNAN	: all dyadic ops
1312#	OPERR	: fsqrt(-NORM)
1313#	OVFL	: all except ftst,fcmp
1314#	UNFL	: all except ftst,fcmp
1315#	DZ	: fdiv
1316#	INEX2	: all except ftst,fcmp
1317#	INEX1	: none (packed doesn't go through here)
1318#
1319
1320# we determine the highest priority exception(if any) set by the
1321# emulation routine that has also been enabled by the user.
1322	mov.b		FPCR_ENABLE(%a6),%d0	# fetch exceptions set
1323	bne.b		fu_in_ena		# some are enabled
1324
1325fu_in_cont:
1326# fcmp and ftst do not store any result.
1327	mov.b		1+EXC_CMDREG(%a6),%d0	# fetch extension
1328	andi.b		&0x38,%d0		# extract bits 3-5
1329	cmpi.b		%d0,&0x38		# is instr fcmp or ftst?
1330	beq.b		fu_in_exit		# yes
1331
1332	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
1333	bsr.l		store_fpreg		# store the result
1334
1335fu_in_exit:
1336
1337	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1338	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1339	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1340
1341	unlk		%a6
1342
1343	bra.l		_fpsp_done
1344
1345fu_in_ena:
1346	and.b		FPSR_EXCEPT(%a6),%d0	# keep only ones enabled
1347	bfffo		%d0{&24:&8},%d0		# find highest priority exception
1348	bne.b		fu_in_exc		# there is at least one set
1349
1350#
1351# No exceptions occurred that were also enabled. Now:
1352#
1353#	if (OVFL && ovfl_disabled && inexact_enabled) {
1354#	    branch to _real_inex() (even if the result was exact!);
1355#	} else {
1356#	    save the result in the proper fp reg (unless the op is fcmp or ftst);
1357#	    return;
1358#	}
1359#
1360	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set?
1361	beq.b		fu_in_cont		# no
1362
1363fu_in_ovflchk:
1364	btst		&inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled?
1365	beq.b		fu_in_cont		# no
1366	bra.w		fu_in_exc_ovfl		# go insert overflow frame
1367
1368#
1369# An exception occurred and that exception was enabled:
1370#
1371#	shift enabled exception field into lo byte of d0;
1372#	if (((INEX2 || INEX1) && inex_enabled && OVFL && ovfl_disabled) ||
1373#	    ((INEX2 || INEX1) && inex_enabled && UNFL && unfl_disabled)) {
1374#		/*
1375#		 * this is the case where we must call _real_inex() now or else
1376#		 * there will be no other way to pass it the exceptional operand
1377#		 */
1378#		call _real_inex();
1379#	} else {
1380#		restore exc state (SNAN||OPERR||OVFL||UNFL||DZ||INEX) into the FPU;
1381#	}
1382#
1383fu_in_exc:
1384	subi.l		&24,%d0			# fix offset to be 0-8
1385	cmpi.b		%d0,&0x6		# is exception INEX? (6)
1386	bne.b		fu_in_exc_exit		# no
1387
1388# the enabled exception was inexact
1389	btst		&unfl_bit,FPSR_EXCEPT(%a6) # did disabled underflow occur?
1390	bne.w		fu_in_exc_unfl		# yes
1391	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # did disabled overflow occur?
1392	bne.w		fu_in_exc_ovfl		# yes
1393
1394# here, we insert the correct fsave status value into the fsave frame for the
1395# corresponding exception. the operand in the fsave frame should be the original
1396# src operand.
1397fu_in_exc_exit:
1398	mov.l		%d0,-(%sp)		# save d0
1399	bsr.l		funimp_skew		# skew sgl or dbl inputs
1400	mov.l		(%sp)+,%d0		# restore d0
1401
1402	mov.w		(tbl_except.b,%pc,%d0.w*2),2+FP_SRC(%a6) # create exc status
1403
1404	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1405	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1406	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1407
1408	frestore	FP_SRC(%a6)		# restore src op
1409
1410	unlk		%a6
1411
1412	bra.l		_fpsp_done
1413
1414tbl_except:
1415	short		0xe000,0xe006,0xe004,0xe005
1416	short		0xe003,0xe002,0xe001,0xe001
1417
1418fu_in_exc_unfl:
1419	mov.w		&0x4,%d0
1420	bra.b		fu_in_exc_exit
1421fu_in_exc_ovfl:
1422	mov.w		&0x03,%d0
1423	bra.b		fu_in_exc_exit
1424
1425# If the input operand to this operation was opclass two and a single
1426# or double precision denorm, inf, or nan, the operand needs to be
1427# "corrected" in order to have the proper equivalent extended precision
1428# number.
1429	global		fix_skewed_ops
1430fix_skewed_ops:
1431	bfextu		EXC_CMDREG(%a6){&0:&6},%d0 # extract opclass,src fmt
1432	cmpi.b		%d0,&0x11		# is class = 2 & fmt = sgl?
1433	beq.b		fso_sgl			# yes
1434	cmpi.b		%d0,&0x15		# is class = 2 & fmt = dbl?
1435	beq.b		fso_dbl			# yes
1436	rts					# no
1437
1438fso_sgl:
1439	mov.w		LOCAL_EX(%a0),%d0	# fetch src exponent
1440	andi.w		&0x7fff,%d0		# strip sign
1441	cmpi.w		%d0,&0x3f80		# is |exp| == $3f80?
1442	beq.b		fso_sgl_dnrm_zero	# yes
1443	cmpi.w		%d0,&0x407f		# no; is |exp| == $407f?
1444	beq.b		fso_infnan		# yes
1445	rts					# no
1446
1447fso_sgl_dnrm_zero:
1448	andi.l		&0x7fffffff,LOCAL_HI(%a0) # clear j-bit
1449	beq.b		fso_zero		# it's a skewed zero
1450fso_sgl_dnrm:
1451# here, we count on norm not to alter a0...
1452	bsr.l		norm			# normalize mantissa
1453	neg.w		%d0			# -shft amt
1454	addi.w		&0x3f81,%d0		# adjust new exponent
1455	andi.w		&0x8000,LOCAL_EX(%a0)	# clear old exponent
1456	or.w		%d0,LOCAL_EX(%a0)	# insert new exponent
1457	rts
1458
1459fso_zero:
1460	andi.w		&0x8000,LOCAL_EX(%a0)	# clear bogus exponent
1461	rts
1462
1463fso_infnan:
1464	andi.b		&0x7f,LOCAL_HI(%a0)	# clear j-bit
1465	ori.w		&0x7fff,LOCAL_EX(%a0)	# make exponent = $7fff
1466	rts
1467
1468fso_dbl:
1469	mov.w		LOCAL_EX(%a0),%d0	# fetch src exponent
1470	andi.w		&0x7fff,%d0		# strip sign
1471	cmpi.w		%d0,&0x3c00		# is |exp| == $3c00?
1472	beq.b		fso_dbl_dnrm_zero	# yes
1473	cmpi.w		%d0,&0x43ff		# no; is |exp| == $43ff?
1474	beq.b		fso_infnan		# yes
1475	rts					# no
1476
1477fso_dbl_dnrm_zero:
1478	andi.l		&0x7fffffff,LOCAL_HI(%a0) # clear j-bit
1479	bne.b		fso_dbl_dnrm		# it's a skewed denorm
1480	tst.l		LOCAL_LO(%a0)		# is it a zero?
1481	beq.b		fso_zero		# yes
1482fso_dbl_dnrm:
1483# here, we count on norm not to alter a0...
1484	bsr.l		norm			# normalize mantissa
1485	neg.w		%d0			# -shft amt
1486	addi.w		&0x3c01,%d0		# adjust new exponent
1487	andi.w		&0x8000,LOCAL_EX(%a0)	# clear old exponent
1488	or.w		%d0,LOCAL_EX(%a0)	# insert new exponent
1489	rts
1490
1491#################################################################
1492
1493# fmove out took an unimplemented data type exception.
1494# the src operand is in FP_SRC. Call _fout() to write out the result and
1495# to determine which exceptions, if any, to take.
1496fu_out:
1497
1498# Separate packed move outs from the UNNORM and DENORM move outs.
1499	bfextu		EXC_CMDREG(%a6){&3:&3},%d0
1500	cmpi.b		%d0,&0x3
1501	beq.w		fu_out_pack
1502	cmpi.b		%d0,&0x7
1503	beq.w		fu_out_pack
1504
1505
1506# I'm not sure at this point what FPSR bits are valid for this instruction.
1507# so, since the emulation routines re-create them anyways, zero exception field.
1508# fmove out doesn't affect ccodes.
1509	and.l		&0xffff00ff,USER_FPSR(%a6) # zero exception field
1510
1511	fmov.l		&0x0,%fpcr		# zero current control regs
1512	fmov.l		&0x0,%fpsr
1513
1514# the src can ONLY be a DENORM or an UNNORM! so, don't make any big subroutine
1515# call here. just figure out what it is...
1516	mov.w		FP_SRC_EX(%a6),%d0	# get exponent
1517	andi.w		&0x7fff,%d0		# strip sign
1518	beq.b		fu_out_denorm		# it's a DENORM
1519
1520	lea		FP_SRC(%a6),%a0
1521	bsr.l		unnorm_fix		# yes; fix it
1522
1523	mov.b		%d0,STAG(%a6)
1524
1525	bra.b		fu_out_cont
1526fu_out_denorm:
1527	mov.b		&DENORM,STAG(%a6)
1528fu_out_cont:
1529
1530	clr.l		%d0
1531	mov.b		FPCR_MODE(%a6),%d0	# fetch rnd mode/prec
1532
1533	lea		FP_SRC(%a6),%a0		# pass ptr to src operand
1534
1535	mov.l		(%a6),EXC_A6(%a6)	# in case a6 changes
1536	bsr.l		fout			# call fmove out routine
1537
1538# Exceptions in order of precedence:
1539#	BSUN	: none
1540#	SNAN	: none
1541#	OPERR	: fmove.{b,w,l} out of large UNNORM
1542#	OVFL	: fmove.{s,d}
1543#	UNFL	: fmove.{s,d,x}
1544#	DZ	: none
1545#	INEX2	: all
1546#	INEX1	: none (packed doesn't travel through here)
1547
1548# determine the highest priority exception(if any) set by the
1549# emulation routine that has also been enabled by the user.
1550	mov.b		FPCR_ENABLE(%a6),%d0	# fetch exceptions enabled
1551	bne.w		fu_out_ena		# some are enabled
1552
1553fu_out_done:
1554
1555	mov.l		EXC_A6(%a6),(%a6)	# in case a6 changed
1556
1557# on extended precision opclass three instructions using pre-decrement or
1558# post-increment addressing mode, the address register is not updated. is the
1559# address register was the stack pointer used from user mode, then let's update
1560# it here. if it was used from supervisor mode, then we have to handle this
1561# as a special case.
1562	btst		&0x5,EXC_SR(%a6)
1563	bne.b		fu_out_done_s
1564
1565	mov.l		EXC_A7(%a6),%a0		# restore a7
1566	mov.l		%a0,%usp
1567
1568fu_out_done_cont:
1569	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1570	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1571	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1572
1573	unlk		%a6
1574
1575	btst		&0x7,(%sp)		# is trace on?
1576	bne.b		fu_out_trace		# yes
1577
1578	bra.l		_fpsp_done
1579
1580# is the ea mode pre-decrement of the stack pointer from supervisor mode?
1581# ("fmov.x fpm,-(a7)") if so,
1582fu_out_done_s:
1583	cmpi.b		SPCOND_FLG(%a6),&mda7_flg
1584	bne.b		fu_out_done_cont
1585
1586# the extended precision result is still in fp0. but, we need to save it
1587# somewhere on the stack until we can copy it to its final resting place.
1588# here, we're counting on the top of the stack to be the old place-holders
1589# for fp0/fp1 which have already been restored. that way, we can write
1590# over those destinations with the shifted stack frame.
1591	fmovm.x		&0x80,FP_SRC(%a6)	# put answer on stack
1592
1593	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1594	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1595	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1596
1597	mov.l		(%a6),%a6		# restore frame pointer
1598
1599	mov.l		LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
1600	mov.l		LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
1601
1602# now, copy the result to the proper place on the stack
1603	mov.l		LOCAL_SIZE+FP_SRC_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp)
1604	mov.l		LOCAL_SIZE+FP_SRC_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp)
1605	mov.l		LOCAL_SIZE+FP_SRC_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp)
1606
1607	add.l		&LOCAL_SIZE-0x8,%sp
1608
1609	btst		&0x7,(%sp)
1610	bne.b		fu_out_trace
1611
1612	bra.l		_fpsp_done
1613
1614fu_out_ena:
1615	and.b		FPSR_EXCEPT(%a6),%d0	# keep only ones enabled
1616	bfffo		%d0{&24:&8},%d0		# find highest priority exception
1617	bne.b		fu_out_exc		# there is at least one set
1618
1619# no exceptions were set.
1620# if a disabled overflow occurred and inexact was enabled but the result
1621# was exact, then a branch to _real_inex() is made.
1622	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set?
1623	beq.w		fu_out_done		# no
1624
1625fu_out_ovflchk:
1626	btst		&inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled?
1627	beq.w		fu_out_done		# no
1628	bra.w		fu_inex			# yes
1629
1630#
1631# The fp move out that took the "Unimplemented Data Type" exception was
1632# being traced. Since the stack frames are similar, get the "current" PC
1633# from FPIAR and put it in the trace stack frame then jump to _real_trace().
1634#
1635#		  UNSUPP FRAME		   TRACE FRAME
1636#		*****************	*****************
1637#		*      EA	*	*    Current	*
1638#		*		*	*      PC	*
1639#		*****************	*****************
1640#		* 0x3 *  0x0dc	*	* 0x2 *  0x024	*
1641#		*****************	*****************
1642#		*     Next	*	*     Next	*
1643#		*      PC	*	*      PC	*
1644#		*****************	*****************
1645#		*      SR	*	*      SR	*
1646#		*****************	*****************
1647#
1648fu_out_trace:
1649	mov.w		&0x2024,0x6(%sp)
1650	fmov.l		%fpiar,0x8(%sp)
1651	bra.l		_real_trace
1652
1653# an exception occurred and that exception was enabled.
1654fu_out_exc:
1655	subi.l		&24,%d0			# fix offset to be 0-8
1656
1657# we don't mess with the existing fsave frame. just re-insert it and
1658# jump to the "_real_{}()" handler...
1659	mov.w		(tbl_fu_out.b,%pc,%d0.w*2),%d0
1660	jmp		(tbl_fu_out.b,%pc,%d0.w*1)
1661
1662	swbeg		&0x8
1663tbl_fu_out:
1664	short		tbl_fu_out	- tbl_fu_out	# BSUN can't happen
1665	short		tbl_fu_out	- tbl_fu_out	# SNAN can't happen
1666	short		fu_operr	- tbl_fu_out	# OPERR
1667	short		fu_ovfl		- tbl_fu_out	# OVFL
1668	short		fu_unfl		- tbl_fu_out	# UNFL
1669	short		tbl_fu_out	- tbl_fu_out	# DZ can't happen
1670	short		fu_inex		- tbl_fu_out	# INEX2
1671	short		tbl_fu_out	- tbl_fu_out	# INEX1 won't make it here
1672
1673# for snan,operr,ovfl,unfl, src op is still in FP_SRC so just
1674# frestore it.
1675fu_snan:
1676	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1677	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1678	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1679
1680	mov.w		&0x30d8,EXC_VOFF(%a6)	# vector offset = 0xd8
1681	mov.w		&0xe006,2+FP_SRC(%a6)
1682
1683	frestore	FP_SRC(%a6)
1684
1685	unlk		%a6
1686
1687
1688	bra.l		_real_snan
1689
1690fu_operr:
1691	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1692	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1693	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1694
1695	mov.w		&0x30d0,EXC_VOFF(%a6)	# vector offset = 0xd0
1696	mov.w		&0xe004,2+FP_SRC(%a6)
1697
1698	frestore	FP_SRC(%a6)
1699
1700	unlk		%a6
1701
1702
1703	bra.l		_real_operr
1704
1705fu_ovfl:
1706	fmovm.x		&0x40,FP_SRC(%a6)	# save EXOP to the stack
1707
1708	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1709	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1710	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1711
1712	mov.w		&0x30d4,EXC_VOFF(%a6)	# vector offset = 0xd4
1713	mov.w		&0xe005,2+FP_SRC(%a6)
1714
1715	frestore	FP_SRC(%a6)		# restore EXOP
1716
1717	unlk		%a6
1718
1719	bra.l		_real_ovfl
1720
1721# underflow can happen for extended precision. extended precision opclass
1722# three instruction exceptions don't update the stack pointer. so, if the
1723# exception occurred from user mode, then simply update a7 and exit normally.
1724# if the exception occurred from supervisor mode, check if
1725fu_unfl:
1726	mov.l		EXC_A6(%a6),(%a6)	# restore a6
1727
1728	btst		&0x5,EXC_SR(%a6)
1729	bne.w		fu_unfl_s
1730
1731	mov.l		EXC_A7(%a6),%a0		# restore a7 whether we need
1732	mov.l		%a0,%usp		# to or not...
1733
1734fu_unfl_cont:
1735	fmovm.x		&0x40,FP_SRC(%a6)	# save EXOP to the stack
1736
1737	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1738	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1739	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1740
1741	mov.w		&0x30cc,EXC_VOFF(%a6)	# vector offset = 0xcc
1742	mov.w		&0xe003,2+FP_SRC(%a6)
1743
1744	frestore	FP_SRC(%a6)		# restore EXOP
1745
1746	unlk		%a6
1747
1748	bra.l		_real_unfl
1749
1750fu_unfl_s:
1751	cmpi.b		SPCOND_FLG(%a6),&mda7_flg # was the <ea> mode -(sp)?
1752	bne.b		fu_unfl_cont
1753
1754# the extended precision result is still in fp0. but, we need to save it
1755# somewhere on the stack until we can copy it to its final resting place
1756# (where the exc frame is currently). make sure it's not at the top of the
1757# frame or it will get overwritten when the exc stack frame is shifted "down".
1758	fmovm.x		&0x80,FP_SRC(%a6)	# put answer on stack
1759	fmovm.x		&0x40,FP_DST(%a6)	# put EXOP on stack
1760
1761	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1762	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1763	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1764
1765	mov.w		&0x30cc,EXC_VOFF(%a6)	# vector offset = 0xcc
1766	mov.w		&0xe003,2+FP_DST(%a6)
1767
1768	frestore	FP_DST(%a6)		# restore EXOP
1769
1770	mov.l		(%a6),%a6		# restore frame pointer
1771
1772	mov.l		LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
1773	mov.l		LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
1774	mov.l		LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
1775
1776# now, copy the result to the proper place on the stack
1777	mov.l		LOCAL_SIZE+FP_SRC_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp)
1778	mov.l		LOCAL_SIZE+FP_SRC_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp)
1779	mov.l		LOCAL_SIZE+FP_SRC_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp)
1780
1781	add.l		&LOCAL_SIZE-0x8,%sp
1782
1783	bra.l		_real_unfl
1784
1785# fmove in and out enter here.
1786fu_inex:
1787	fmovm.x		&0x40,FP_SRC(%a6)	# save EXOP to the stack
1788
1789	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1790	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1791	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1792
1793	mov.w		&0x30c4,EXC_VOFF(%a6)	# vector offset = 0xc4
1794	mov.w		&0xe001,2+FP_SRC(%a6)
1795
1796	frestore	FP_SRC(%a6)		# restore EXOP
1797
1798	unlk		%a6
1799
1800
1801	bra.l		_real_inex
1802
1803#########################################################################
1804#########################################################################
1805fu_in_pack:
1806
1807
1808# I'm not sure at this point what FPSR bits are valid for this instruction.
1809# so, since the emulation routines re-create them anyways, zero exception field
1810	andi.l		&0x0ff00ff,USER_FPSR(%a6) # zero exception field
1811
1812	fmov.l		&0x0,%fpcr		# zero current control regs
1813	fmov.l		&0x0,%fpsr
1814
1815	bsr.l		get_packed		# fetch packed src operand
1816
1817	lea		FP_SRC(%a6),%a0		# pass ptr to src
1818	bsr.l		set_tag_x		# set src optype tag
1819
1820	mov.b		%d0,STAG(%a6)		# save src optype tag
1821
1822	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
1823
1824# bit five of the fp extension word separates the monadic and dyadic operations
1825# at this point
1826	btst		&0x5,1+EXC_CMDREG(%a6)	# is operation monadic or dyadic?
1827	beq.b		fu_extract_p		# monadic
1828	cmpi.b		1+EXC_CMDREG(%a6),&0x3a	# is operation an ftst?
1829	beq.b		fu_extract_p		# yes, so it's monadic, too
1830
1831	bsr.l		load_fpn2		# load dst into FP_DST
1832
1833	lea		FP_DST(%a6),%a0		# pass: ptr to dst op
1834	bsr.l		set_tag_x		# tag the operand type
1835	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
1836	bne.b		fu_op2_done_p		# no
1837	bsr.l		unnorm_fix		# yes; convert to NORM,DENORM,or ZERO
1838fu_op2_done_p:
1839	mov.b		%d0,DTAG(%a6)		# save dst optype tag
1840
1841fu_extract_p:
1842	clr.l		%d0
1843	mov.b		FPCR_MODE(%a6),%d0	# fetch rnd mode/prec
1844
1845	bfextu		1+EXC_CMDREG(%a6){&1:&7},%d1 # extract extension
1846
1847	lea		FP_SRC(%a6),%a0
1848	lea		FP_DST(%a6),%a1
1849
1850	mov.l		(tbl_unsupp.l,%pc,%d1.l*4),%d1 # fetch routine addr
1851	jsr		(tbl_unsupp.l,%pc,%d1.l*1)
1852
1853#
1854# Exceptions in order of precedence:
1855#	BSUN	: none
1856#	SNAN	: all dyadic ops
1857#	OPERR	: fsqrt(-NORM)
1858#	OVFL	: all except ftst,fcmp
1859#	UNFL	: all except ftst,fcmp
1860#	DZ	: fdiv
1861#	INEX2	: all except ftst,fcmp
1862#	INEX1	: all
1863#
1864
1865# we determine the highest priority exception(if any) set by the
1866# emulation routine that has also been enabled by the user.
1867	mov.b		FPCR_ENABLE(%a6),%d0	# fetch exceptions enabled
1868	bne.w		fu_in_ena_p		# some are enabled
1869
1870fu_in_cont_p:
1871# fcmp and ftst do not store any result.
1872	mov.b		1+EXC_CMDREG(%a6),%d0	# fetch extension
1873	andi.b		&0x38,%d0		# extract bits 3-5
1874	cmpi.b		%d0,&0x38		# is instr fcmp or ftst?
1875	beq.b		fu_in_exit_p		# yes
1876
1877	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
1878	bsr.l		store_fpreg		# store the result
1879
1880fu_in_exit_p:
1881
1882	btst		&0x5,EXC_SR(%a6)	# user or supervisor?
1883	bne.w		fu_in_exit_s_p		# supervisor
1884
1885	mov.l		EXC_A7(%a6),%a0		# update user a7
1886	mov.l		%a0,%usp
1887
1888fu_in_exit_cont_p:
1889	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1890	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1891	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1892
1893	unlk		%a6			# unravel stack frame
1894
1895	btst		&0x7,(%sp)		# is trace on?
1896	bne.w		fu_trace_p		# yes
1897
1898	bra.l		_fpsp_done		# exit to os
1899
1900# the exception occurred in supervisor mode. check to see if the
1901# addressing mode was (a7)+. if so, we'll need to shift the
1902# stack frame "up".
1903fu_in_exit_s_p:
1904	btst		&mia7_bit,SPCOND_FLG(%a6) # was ea mode (a7)+
1905	beq.b		fu_in_exit_cont_p	# no
1906
1907	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1908	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1909	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1910
1911	unlk		%a6			# unravel stack frame
1912
1913# shift the stack frame "up". we don't really care about the <ea> field.
1914	mov.l		0x4(%sp),0x10(%sp)
1915	mov.l		0x0(%sp),0xc(%sp)
1916	add.l		&0xc,%sp
1917
1918	btst		&0x7,(%sp)		# is trace on?
1919	bne.w		fu_trace_p		# yes
1920
1921	bra.l		_fpsp_done		# exit to os
1922
1923fu_in_ena_p:
1924	and.b		FPSR_EXCEPT(%a6),%d0	# keep only ones enabled & set
1925	bfffo		%d0{&24:&8},%d0		# find highest priority exception
1926	bne.b		fu_in_exc_p		# at least one was set
1927
1928#
1929# No exceptions occurred that were also enabled. Now:
1930#
1931#	if (OVFL && ovfl_disabled && inexact_enabled) {
1932#	    branch to _real_inex() (even if the result was exact!);
1933#	} else {
1934#	    save the result in the proper fp reg (unless the op is fcmp or ftst);
1935#	    return;
1936#	}
1937#
1938	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set?
1939	beq.w		fu_in_cont_p		# no
1940
1941fu_in_ovflchk_p:
1942	btst		&inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled?
1943	beq.w		fu_in_cont_p		# no
1944	bra.w		fu_in_exc_ovfl_p	# do _real_inex() now
1945
1946#
1947# An exception occurred and that exception was enabled:
1948#
1949#	shift enabled exception field into lo byte of d0;
1950#	if (((INEX2 || INEX1) && inex_enabled && OVFL && ovfl_disabled) ||
1951#	    ((INEX2 || INEX1) && inex_enabled && UNFL && unfl_disabled)) {
1952#		/*
1953#		 * this is the case where we must call _real_inex() now or else
1954#		 * there will be no other way to pass it the exceptional operand
1955#		 */
1956#		call _real_inex();
1957#	} else {
1958#		restore exc state (SNAN||OPERR||OVFL||UNFL||DZ||INEX) into the FPU;
1959#	}
1960#
1961fu_in_exc_p:
1962	subi.l		&24,%d0			# fix offset to be 0-8
1963	cmpi.b		%d0,&0x6		# is exception INEX? (6 or 7)
1964	blt.b		fu_in_exc_exit_p	# no
1965
1966# the enabled exception was inexact
1967	btst		&unfl_bit,FPSR_EXCEPT(%a6) # did disabled underflow occur?
1968	bne.w		fu_in_exc_unfl_p	# yes
1969	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # did disabled overflow occur?
1970	bne.w		fu_in_exc_ovfl_p	# yes
1971
1972# here, we insert the correct fsave status value into the fsave frame for the
1973# corresponding exception. the operand in the fsave frame should be the original
1974# src operand.
1975# as a reminder for future predicted pain and agony, we are passing in fsave the
1976# "non-skewed" operand for cases of sgl and dbl src INFs,NANs, and DENORMs.
1977# this is INCORRECT for enabled SNAN which would give to the user the skewed SNAN!!!
1978fu_in_exc_exit_p:
1979	btst		&0x5,EXC_SR(%a6)	# user or supervisor?
1980	bne.w		fu_in_exc_exit_s_p	# supervisor
1981
1982	mov.l		EXC_A7(%a6),%a0		# update user a7
1983	mov.l		%a0,%usp
1984
1985fu_in_exc_exit_cont_p:
1986	mov.w		(tbl_except_p.b,%pc,%d0.w*2),2+FP_SRC(%a6)
1987
1988	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1989	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1990	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1991
1992	frestore	FP_SRC(%a6)		# restore src op
1993
1994	unlk		%a6
1995
1996	btst		&0x7,(%sp)		# is trace enabled?
1997	bne.w		fu_trace_p		# yes
1998
1999	bra.l		_fpsp_done
2000
2001tbl_except_p:
2002	short		0xe000,0xe006,0xe004,0xe005
2003	short		0xe003,0xe002,0xe001,0xe001
2004
2005fu_in_exc_ovfl_p:
2006	mov.w		&0x3,%d0
2007	bra.w		fu_in_exc_exit_p
2008
2009fu_in_exc_unfl_p:
2010	mov.w		&0x4,%d0
2011	bra.w		fu_in_exc_exit_p
2012
2013fu_in_exc_exit_s_p:
2014	btst		&mia7_bit,SPCOND_FLG(%a6)
2015	beq.b		fu_in_exc_exit_cont_p
2016
2017	mov.w		(tbl_except_p.b,%pc,%d0.w*2),2+FP_SRC(%a6)
2018
2019	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
2020	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2021	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
2022
2023	frestore	FP_SRC(%a6)		# restore src op
2024
2025	unlk		%a6			# unravel stack frame
2026
2027# shift stack frame "up". who cares about <ea> field.
2028	mov.l		0x4(%sp),0x10(%sp)
2029	mov.l		0x0(%sp),0xc(%sp)
2030	add.l		&0xc,%sp
2031
2032	btst		&0x7,(%sp)		# is trace on?
2033	bne.b		fu_trace_p		# yes
2034
2035	bra.l		_fpsp_done		# exit to os
2036
2037#
2038# The opclass two PACKED instruction that took an "Unimplemented Data Type"
2039# exception was being traced. Make the "current" PC the FPIAR and put it in the
2040# trace stack frame then jump to _real_trace().
2041#
2042#		  UNSUPP FRAME		   TRACE FRAME
2043#		*****************	*****************
2044#		*      EA	*	*    Current	*
2045#		*		*	*      PC	*
2046#		*****************	*****************
2047#		* 0x2 *	0x0dc	*	* 0x2 *  0x024	*
2048#		*****************	*****************
2049#		*     Next	*	*     Next	*
2050#		*      PC	*	*      PC	*
2051#		*****************	*****************
2052#		*      SR	*	*      SR	*
2053#		*****************	*****************
2054fu_trace_p:
2055	mov.w		&0x2024,0x6(%sp)
2056	fmov.l		%fpiar,0x8(%sp)
2057
2058	bra.l		_real_trace
2059
2060#########################################################
2061#########################################################
2062fu_out_pack:
2063
2064
2065# I'm not sure at this point what FPSR bits are valid for this instruction.
2066# so, since the emulation routines re-create them anyways, zero exception field.
2067# fmove out doesn't affect ccodes.
2068	and.l		&0xffff00ff,USER_FPSR(%a6) # zero exception field
2069
2070	fmov.l		&0x0,%fpcr		# zero current control regs
2071	fmov.l		&0x0,%fpsr
2072
2073	bfextu		EXC_CMDREG(%a6){&6:&3},%d0
2074	bsr.l		load_fpn1
2075
2076# unlike other opclass 3, unimplemented data type exceptions, packed must be
2077# able to detect all operand types.
2078	lea		FP_SRC(%a6),%a0
2079	bsr.l		set_tag_x		# tag the operand type
2080	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
2081	bne.b		fu_op2_p		# no
2082	bsr.l		unnorm_fix		# yes; convert to NORM,DENORM,or ZERO
2083
2084fu_op2_p:
2085	mov.b		%d0,STAG(%a6)		# save src optype tag
2086
2087	clr.l		%d0
2088	mov.b		FPCR_MODE(%a6),%d0	# fetch rnd mode/prec
2089
2090	lea		FP_SRC(%a6),%a0		# pass ptr to src operand
2091
2092	mov.l		(%a6),EXC_A6(%a6)	# in case a6 changes
2093	bsr.l		fout			# call fmove out routine
2094
2095# Exceptions in order of precedence:
2096#	BSUN	: no
2097#	SNAN	: yes
2098#	OPERR	: if ((k_factor > +17) || (dec. exp exceeds 3 digits))
2099#	OVFL	: no
2100#	UNFL	: no
2101#	DZ	: no
2102#	INEX2	: yes
2103#	INEX1	: no
2104
2105# determine the highest priority exception(if any) set by the
2106# emulation routine that has also been enabled by the user.
2107	mov.b		FPCR_ENABLE(%a6),%d0	# fetch exceptions enabled
2108	bne.w		fu_out_ena_p		# some are enabled
2109
2110fu_out_exit_p:
2111	mov.l		EXC_A6(%a6),(%a6)	# restore a6
2112
2113	btst		&0x5,EXC_SR(%a6)	# user or supervisor?
2114	bne.b		fu_out_exit_s_p		# supervisor
2115
2116	mov.l		EXC_A7(%a6),%a0		# update user a7
2117	mov.l		%a0,%usp
2118
2119fu_out_exit_cont_p:
2120	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
2121	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2122	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
2123
2124	unlk		%a6			# unravel stack frame
2125
2126	btst		&0x7,(%sp)		# is trace on?
2127	bne.w		fu_trace_p		# yes
2128
2129	bra.l		_fpsp_done		# exit to os
2130
2131# the exception occurred in supervisor mode. check to see if the
2132# addressing mode was -(a7). if so, we'll need to shift the
2133# stack frame "down".
2134fu_out_exit_s_p:
2135	btst		&mda7_bit,SPCOND_FLG(%a6) # was ea mode -(a7)
2136	beq.b		fu_out_exit_cont_p	# no
2137
2138	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
2139	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2140	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
2141
2142	mov.l		(%a6),%a6		# restore frame pointer
2143
2144	mov.l		LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
2145	mov.l		LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
2146
2147# now, copy the result to the proper place on the stack
2148	mov.l		LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp)
2149	mov.l		LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp)
2150	mov.l		LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp)
2151
2152	add.l		&LOCAL_SIZE-0x8,%sp
2153
2154	btst		&0x7,(%sp)
2155	bne.w		fu_trace_p
2156
2157	bra.l		_fpsp_done
2158
2159fu_out_ena_p:
2160	and.b		FPSR_EXCEPT(%a6),%d0	# keep only ones enabled
2161	bfffo		%d0{&24:&8},%d0		# find highest priority exception
2162	beq.w		fu_out_exit_p
2163
2164	mov.l		EXC_A6(%a6),(%a6)	# restore a6
2165
2166# an exception occurred and that exception was enabled.
2167# the only exception possible on packed move out are INEX, OPERR, and SNAN.
2168fu_out_exc_p:
2169	cmpi.b		%d0,&0x1a
2170	bgt.w		fu_inex_p2
2171	beq.w		fu_operr_p
2172
2173fu_snan_p:
2174	btst		&0x5,EXC_SR(%a6)
2175	bne.b		fu_snan_s_p
2176
2177	mov.l		EXC_A7(%a6),%a0
2178	mov.l		%a0,%usp
2179	bra.w		fu_snan
2180
2181fu_snan_s_p:
2182	cmpi.b		SPCOND_FLG(%a6),&mda7_flg
2183	bne.w		fu_snan
2184
2185# the instruction was "fmove.p fpn,-(a7)" from supervisor mode.
2186# the strategy is to move the exception frame "down" 12 bytes. then, we
2187# can store the default result where the exception frame was.
2188	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
2189	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2190	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
2191
2192	mov.w		&0x30d8,EXC_VOFF(%a6)	# vector offset = 0xd0
2193	mov.w		&0xe006,2+FP_SRC(%a6)	# set fsave status
2194
2195	frestore	FP_SRC(%a6)		# restore src operand
2196
2197	mov.l		(%a6),%a6		# restore frame pointer
2198
2199	mov.l		LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
2200	mov.l		LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
2201	mov.l		LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
2202
2203# now, we copy the default result to its proper location
2204	mov.l		LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp)
2205	mov.l		LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp)
2206	mov.l		LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp)
2207
2208	add.l		&LOCAL_SIZE-0x8,%sp
2209
2210
2211	bra.l		_real_snan
2212
2213fu_operr_p:
2214	btst		&0x5,EXC_SR(%a6)
2215	bne.w		fu_operr_p_s
2216
2217	mov.l		EXC_A7(%a6),%a0
2218	mov.l		%a0,%usp
2219	bra.w		fu_operr
2220
2221fu_operr_p_s:
2222	cmpi.b		SPCOND_FLG(%a6),&mda7_flg
2223	bne.w		fu_operr
2224
2225# the instruction was "fmove.p fpn,-(a7)" from supervisor mode.
2226# the strategy is to move the exception frame "down" 12 bytes. then, we
2227# can store the default result where the exception frame was.
2228	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
2229	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2230	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
2231
2232	mov.w		&0x30d0,EXC_VOFF(%a6)	# vector offset = 0xd0
2233	mov.w		&0xe004,2+FP_SRC(%a6)	# set fsave status
2234
2235	frestore	FP_SRC(%a6)		# restore src operand
2236
2237	mov.l		(%a6),%a6		# restore frame pointer
2238
2239	mov.l		LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
2240	mov.l		LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
2241	mov.l		LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
2242
2243# now, we copy the default result to its proper location
2244	mov.l		LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp)
2245	mov.l		LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp)
2246	mov.l		LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp)
2247
2248	add.l		&LOCAL_SIZE-0x8,%sp
2249
2250
2251	bra.l		_real_operr
2252
2253fu_inex_p2:
2254	btst		&0x5,EXC_SR(%a6)
2255	bne.w		fu_inex_s_p2
2256
2257	mov.l		EXC_A7(%a6),%a0
2258	mov.l		%a0,%usp
2259	bra.w		fu_inex
2260
2261fu_inex_s_p2:
2262	cmpi.b		SPCOND_FLG(%a6),&mda7_flg
2263	bne.w		fu_inex
2264
2265# the instruction was "fmove.p fpn,-(a7)" from supervisor mode.
2266# the strategy is to move the exception frame "down" 12 bytes. then, we
2267# can store the default result where the exception frame was.
2268	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
2269	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2270	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
2271
2272	mov.w		&0x30c4,EXC_VOFF(%a6)	# vector offset = 0xc4
2273	mov.w		&0xe001,2+FP_SRC(%a6)	# set fsave status
2274
2275	frestore	FP_SRC(%a6)		# restore src operand
2276
2277	mov.l		(%a6),%a6		# restore frame pointer
2278
2279	mov.l		LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
2280	mov.l		LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
2281	mov.l		LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
2282
2283# now, we copy the default result to its proper location
2284	mov.l		LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp)
2285	mov.l		LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp)
2286	mov.l		LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp)
2287
2288	add.l		&LOCAL_SIZE-0x8,%sp
2289
2290
2291	bra.l		_real_inex
2292
2293#########################################################################
2294
2295#
2296# if we're stuffing a source operand back into an fsave frame then we
2297# have to make sure that for single or double source operands that the
2298# format stuffed is as weird as the hardware usually makes it.
2299#
2300	global		funimp_skew
2301funimp_skew:
2302	bfextu		EXC_EXTWORD(%a6){&3:&3},%d0 # extract src specifier
2303	cmpi.b		%d0,&0x1		# was src sgl?
2304	beq.b		funimp_skew_sgl		# yes
2305	cmpi.b		%d0,&0x5		# was src dbl?
2306	beq.b		funimp_skew_dbl		# yes
2307	rts
2308
2309funimp_skew_sgl:
2310	mov.w		FP_SRC_EX(%a6),%d0	# fetch DENORM exponent
2311	andi.w		&0x7fff,%d0		# strip sign
2312	beq.b		funimp_skew_sgl_not
2313	cmpi.w		%d0,&0x3f80
2314	bgt.b		funimp_skew_sgl_not
2315	neg.w		%d0			# make exponent negative
2316	addi.w		&0x3f81,%d0		# find amt to shift
2317	mov.l		FP_SRC_HI(%a6),%d1	# fetch DENORM hi(man)
2318	lsr.l		%d0,%d1			# shift it
2319	bset		&31,%d1			# set j-bit
2320	mov.l		%d1,FP_SRC_HI(%a6)	# insert new hi(man)
2321	andi.w		&0x8000,FP_SRC_EX(%a6)	# clear old exponent
2322	ori.w		&0x3f80,FP_SRC_EX(%a6)	# insert new "skewed" exponent
2323funimp_skew_sgl_not:
2324	rts
2325
2326funimp_skew_dbl:
2327	mov.w		FP_SRC_EX(%a6),%d0	# fetch DENORM exponent
2328	andi.w		&0x7fff,%d0		# strip sign
2329	beq.b		funimp_skew_dbl_not
2330	cmpi.w		%d0,&0x3c00
2331	bgt.b		funimp_skew_dbl_not
2332
2333	tst.b		FP_SRC_EX(%a6)		# make "internal format"
2334	smi.b		0x2+FP_SRC(%a6)
2335	mov.w		%d0,FP_SRC_EX(%a6)	# insert exponent with cleared sign
2336	clr.l		%d0			# clear g,r,s
2337	lea		FP_SRC(%a6),%a0		# pass ptr to src op
2338	mov.w		&0x3c01,%d1		# pass denorm threshold
2339	bsr.l		dnrm_lp			# denorm it
2340	mov.w		&0x3c00,%d0		# new exponent
2341	tst.b		0x2+FP_SRC(%a6)		# is sign set?
2342	beq.b		fss_dbl_denorm_done	# no
2343	bset		&15,%d0			# set sign
2344fss_dbl_denorm_done:
2345	bset		&0x7,FP_SRC_HI(%a6)	# set j-bit
2346	mov.w		%d0,FP_SRC_EX(%a6)	# insert new exponent
2347funimp_skew_dbl_not:
2348	rts
2349
2350#########################################################################
2351	global		_mem_write2
2352_mem_write2:
2353	btst		&0x5,EXC_SR(%a6)
2354	beq.l		_dmem_write
2355	mov.l		0x0(%a0),FP_DST_EX(%a6)
2356	mov.l		0x4(%a0),FP_DST_HI(%a6)
2357	mov.l		0x8(%a0),FP_DST_LO(%a6)
2358	clr.l		%d1
2359	rts
2360
2361#########################################################################
2362# XDEF ****************************************************************	#
2363#	_fpsp_effadd(): 060FPSP entry point for FP "Unimplemented	#
2364#			effective address" exception.			#
2365#									#
2366#	This handler should be the first code executed upon taking the	#
2367#	FP Unimplemented Effective Address exception in an operating	#
2368#	system.								#
2369#									#
2370# XREF ****************************************************************	#
2371#	_imem_read_long() - read instruction longword			#
2372#	fix_skewed_ops() - adjust src operand in fsave frame		#
2373#	set_tag_x() - determine optype of src/dst operands		#
2374#	store_fpreg() - store opclass 0 or 2 result to FP regfile	#
2375#	unnorm_fix() - change UNNORM operands to NORM or ZERO		#
2376#	load_fpn2() - load dst operand from FP regfile			#
2377#	tbl_unsupp - add of table of emulation routines for opclass 0,2	#
2378#	decbin() - convert packed data to FP binary data		#
2379#	_real_fpu_disabled() - "callout" for "FPU disabled" exception	#
2380#	_real_access() - "callout" for access error exception		#
2381#	_mem_read() - read extended immediate operand from memory	#
2382#	_fpsp_done() - "callout" for exit; work all done		#
2383#	_real_trace() - "callout" for Trace enabled exception		#
2384#	fmovm_dynamic() - emulate dynamic fmovm instruction		#
2385#	fmovm_ctrl() - emulate fmovm control instruction		#
2386#									#
2387# INPUT ***************************************************************	#
2388#	- The system stack contains the "Unimplemented <ea>" stk frame	#
2389#									#
2390# OUTPUT **************************************************************	#
2391#	If access error:						#
2392#	- The system stack is changed to an access error stack frame	#
2393#	If FPU disabled:						#
2394#	- The system stack is changed to an FPU disabled stack frame	#
2395#	If Trace exception enabled:					#
2396#	- The system stack is changed to a Trace exception stack frame	#
2397#	Else: (normal case)						#
2398#	- None (correct result has been stored as appropriate)		#
2399#									#
2400# ALGORITHM ***********************************************************	#
2401#	This exception handles 3 types of operations:			#
2402# (1) FP Instructions using extended precision or packed immediate	#
2403#     addressing mode.							#
2404# (2) The "fmovm.x" instruction w/ dynamic register specification.	#
2405# (3) The "fmovm.l" instruction w/ 2 or 3 control registers.		#
2406#									#
2407#	For immediate data operations, the data is read in w/ a		#
2408# _mem_read() "callout", converted to FP binary (if packed), and used	#
2409# as the source operand to the instruction specified by the instruction	#
2410# word. If no FP exception should be reported ads a result of the	#
2411# emulation, then the result is stored to the destination register and	#
2412# the handler exits through _fpsp_done(). If an enabled exc has been	#
2413# signalled as a result of emulation, then an fsave state frame		#
2414# corresponding to the FP exception type must be entered into the 060	#
2415# FPU before exiting. In either the enabled or disabled cases, we	#
2416# must also check if a Trace exception is pending, in which case, we	#
2417# must create a Trace exception stack frame from the current exception	#
2418# stack frame. If no Trace is pending, we simply exit through		#
2419# _fpsp_done().								#
2420#	For "fmovm.x", call the routine fmovm_dynamic() which will	#
2421# decode and emulate the instruction. No FP exceptions can be pending	#
2422# as a result of this operation emulation. A Trace exception can be	#
2423# pending, though, which means the current stack frame must be changed	#
2424# to a Trace stack frame and an exit made through _real_trace().	#
2425# For the case of "fmovm.x Dn,-(a7)", where the offending instruction	#
2426# was executed from supervisor mode, this handler must store the FP	#
2427# register file values to the system stack by itself since		#
2428# fmovm_dynamic() can't handle this. A normal exit is made through	#
2429# fpsp_done().								#
2430#	For "fmovm.l", fmovm_ctrl() is used to emulate the instruction.	#
2431# Again, a Trace exception may be pending and an exit made through	#
2432# _real_trace(). Else, a normal exit is made through _fpsp_done().	#
2433#									#
2434#	Before any of the above is attempted, it must be checked to	#
2435# see if the FPU is disabled. Since the "Unimp <ea>" exception is taken	#
2436# before the "FPU disabled" exception, but the "FPU disabled" exception	#
2437# has higher priority, we check the disabled bit in the PCR. If set,	#
2438# then we must create an 8 word "FPU disabled" exception stack frame	#
2439# from the current 4 word exception stack frame. This includes		#
2440# reproducing the effective address of the instruction to put on the	#
2441# new stack frame.							#
2442#									#
2443#	In the process of all emulation work, if a _mem_read()		#
2444# "callout" returns a failing result indicating an access error, then	#
2445# we must create an access error stack frame from the current stack	#
2446# frame. This information includes a faulting address and a fault-	#
2447# status-longword. These are created within this handler.		#
2448#									#
2449#########################################################################
2450
2451	global		_fpsp_effadd
2452_fpsp_effadd:
2453
2454# This exception type takes priority over the "Line F Emulator"
2455# exception. Therefore, the FPU could be disabled when entering here.
2456# So, we must check to see if it's disabled and handle that case separately.
2457	mov.l		%d0,-(%sp)		# save d0
2458	movc		%pcr,%d0		# load proc cr
2459	btst		&0x1,%d0		# is FPU disabled?
2460	bne.w		iea_disabled		# yes
2461	mov.l		(%sp)+,%d0		# restore d0
2462
2463	link		%a6,&-LOCAL_SIZE	# init stack frame
2464
2465	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
2466	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
2467	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
2468
2469# PC of instruction that took the exception is the PC in the frame
2470	mov.l		EXC_PC(%a6),EXC_EXTWPTR(%a6)
2471
2472	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
2473	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
2474	bsr.l		_imem_read_long		# fetch the instruction words
2475	mov.l		%d0,EXC_OPWORD(%a6)	# store OPWORD and EXTWORD
2476
2477#########################################################################
2478
2479	tst.w		%d0			# is operation fmovem?
2480	bmi.w		iea_fmovm		# yes
2481
2482#
2483# here, we will have:
2484#	fabs	fdabs	fsabs		facos		fmod
2485#	fadd	fdadd	fsadd		fasin		frem
2486#	fcmp				fatan		fscale
2487#	fdiv	fddiv	fsdiv		fatanh		fsin
2488#	fint				fcos		fsincos
2489#	fintrz				fcosh		fsinh
2490#	fmove	fdmove	fsmove		fetox		ftan
2491#	fmul	fdmul	fsmul		fetoxm1		ftanh
2492#	fneg	fdneg	fsneg		fgetexp		ftentox
2493#	fsgldiv				fgetman		ftwotox
2494#	fsglmul				flog10
2495#	fsqrt				flog2
2496#	fsub	fdsub	fssub		flogn
2497#	ftst				flognp1
2498# which can all use f<op>.{x,p}
2499# so, now it's immediate data extended precision AND PACKED FORMAT!
2500#
2501iea_op:
2502	andi.l		&0x00ff00ff,USER_FPSR(%a6)
2503
2504	btst		&0xa,%d0		# is src fmt x or p?
2505	bne.b		iea_op_pack		# packed
2506
2507
2508	mov.l		EXC_EXTWPTR(%a6),%a0	# pass: ptr to #<data>
2509	lea		FP_SRC(%a6),%a1		# pass: ptr to super addr
2510	mov.l		&0xc,%d0		# pass: 12 bytes
2511	bsr.l		_imem_read		# read extended immediate
2512
2513	tst.l		%d1			# did ifetch fail?
2514	bne.w		iea_iacc		# yes
2515
2516	bra.b		iea_op_setsrc
2517
2518iea_op_pack:
2519
2520	mov.l		EXC_EXTWPTR(%a6),%a0	# pass: ptr to #<data>
2521	lea		FP_SRC(%a6),%a1		# pass: ptr to super dst
2522	mov.l		&0xc,%d0		# pass: 12 bytes
2523	bsr.l		_imem_read		# read packed operand
2524
2525	tst.l		%d1			# did ifetch fail?
2526	bne.w		iea_iacc		# yes
2527
2528# The packed operand is an INF or a NAN if the exponent field is all ones.
2529	bfextu		FP_SRC(%a6){&1:&15},%d0	# get exp
2530	cmpi.w		%d0,&0x7fff		# INF or NAN?
2531	beq.b		iea_op_setsrc		# operand is an INF or NAN
2532
2533# The packed operand is a zero if the mantissa is all zero, else it's
2534# a normal packed op.
2535	mov.b		3+FP_SRC(%a6),%d0	# get byte 4
2536	andi.b		&0x0f,%d0		# clear all but last nybble
2537	bne.b		iea_op_gp_not_spec	# not a zero
2538	tst.l		FP_SRC_HI(%a6)		# is lw 2 zero?
2539	bne.b		iea_op_gp_not_spec	# not a zero
2540	tst.l		FP_SRC_LO(%a6)		# is lw 3 zero?
2541	beq.b		iea_op_setsrc		# operand is a ZERO
2542iea_op_gp_not_spec:
2543	lea		FP_SRC(%a6),%a0		# pass: ptr to packed op
2544	bsr.l		decbin			# convert to extended
2545	fmovm.x		&0x80,FP_SRC(%a6)	# make this the srcop
2546
2547iea_op_setsrc:
2548	addi.l		&0xc,EXC_EXTWPTR(%a6)	# update extension word pointer
2549
2550# FP_SRC now holds the src operand.
2551	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
2552	bsr.l		set_tag_x		# tag the operand type
2553	mov.b		%d0,STAG(%a6)		# could be ANYTHING!!!
2554	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
2555	bne.b		iea_op_getdst		# no
2556	bsr.l		unnorm_fix		# yes; convert to NORM/DENORM/ZERO
2557	mov.b		%d0,STAG(%a6)		# set new optype tag
2558iea_op_getdst:
2559	clr.b		STORE_FLG(%a6)		# clear "store result" boolean
2560
2561	btst		&0x5,1+EXC_CMDREG(%a6)	# is operation monadic or dyadic?
2562	beq.b		iea_op_extract		# monadic
2563	btst		&0x4,1+EXC_CMDREG(%a6)	# is operation fsincos,ftst,fcmp?
2564	bne.b		iea_op_spec		# yes
2565
2566iea_op_loaddst:
2567	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # fetch dst regno
2568	bsr.l		load_fpn2		# load dst operand
2569
2570	lea		FP_DST(%a6),%a0		# pass: ptr to dst op
2571	bsr.l		set_tag_x		# tag the operand type
2572	mov.b		%d0,DTAG(%a6)		# could be ANYTHING!!!
2573	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
2574	bne.b		iea_op_extract		# no
2575	bsr.l		unnorm_fix		# yes; convert to NORM/DENORM/ZERO
2576	mov.b		%d0,DTAG(%a6)		# set new optype tag
2577	bra.b		iea_op_extract
2578
2579# the operation is fsincos, ftst, or fcmp. only fcmp is dyadic
2580iea_op_spec:
2581	btst		&0x3,1+EXC_CMDREG(%a6)	# is operation fsincos?
2582	beq.b		iea_op_extract		# yes
2583# now, we're left with ftst and fcmp. so, first let's tag them so that they don't
2584# store a result. then, only fcmp will branch back and pick up a dst operand.
2585	st		STORE_FLG(%a6)		# don't store a final result
2586	btst		&0x1,1+EXC_CMDREG(%a6)	# is operation fcmp?
2587	beq.b		iea_op_loaddst		# yes
2588
2589iea_op_extract:
2590	clr.l		%d0
2591	mov.b		FPCR_MODE(%a6),%d0	# pass: rnd mode,prec
2592
2593	mov.b		1+EXC_CMDREG(%a6),%d1
2594	andi.w		&0x007f,%d1		# extract extension
2595
2596	fmov.l		&0x0,%fpcr
2597	fmov.l		&0x0,%fpsr
2598
2599	lea		FP_SRC(%a6),%a0
2600	lea		FP_DST(%a6),%a1
2601
2602	mov.l		(tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
2603	jsr		(tbl_unsupp.l,%pc,%d1.l*1)
2604
2605#
2606# Exceptions in order of precedence:
2607#	BSUN	: none
2608#	SNAN	: all operations
2609#	OPERR	: all reg-reg or mem-reg operations that can normally operr
2610#	OVFL	: same as OPERR
2611#	UNFL	: same as OPERR
2612#	DZ	: same as OPERR
2613#	INEX2	: same as OPERR
2614#	INEX1	: all packed immediate operations
2615#
2616
2617# we determine the highest priority exception(if any) set by the
2618# emulation routine that has also been enabled by the user.
2619	mov.b		FPCR_ENABLE(%a6),%d0	# fetch exceptions enabled
2620	bne.b		iea_op_ena		# some are enabled
2621
2622# now, we save the result, unless, of course, the operation was ftst or fcmp.
2623# these don't save results.
2624iea_op_save:
2625	tst.b		STORE_FLG(%a6)		# does this op store a result?
2626	bne.b		iea_op_exit1		# exit with no frestore
2627
2628iea_op_store:
2629	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # fetch dst regno
2630	bsr.l		store_fpreg		# store the result
2631
2632iea_op_exit1:
2633	mov.l		EXC_PC(%a6),USER_FPIAR(%a6) # set FPIAR to "Current PC"
2634	mov.l		EXC_EXTWPTR(%a6),EXC_PC(%a6) # set "Next PC" in exc frame
2635
2636	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
2637	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2638	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
2639
2640	unlk		%a6			# unravel the frame
2641
2642	btst		&0x7,(%sp)		# is trace on?
2643	bne.w		iea_op_trace		# yes
2644
2645	bra.l		_fpsp_done		# exit to os
2646
2647iea_op_ena:
2648	and.b		FPSR_EXCEPT(%a6),%d0	# keep only ones enable and set
2649	bfffo		%d0{&24:&8},%d0		# find highest priority exception
2650	bne.b		iea_op_exc		# at least one was set
2651
2652# no exception occurred. now, did a disabled, exact overflow occur with inexact
2653# enabled? if so, then we have to stuff an overflow frame into the FPU.
2654	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # did overflow occur?
2655	beq.b		iea_op_save
2656
2657iea_op_ovfl:
2658	btst		&inex2_bit,FPCR_ENABLE(%a6) # is inexact enabled?
2659	beq.b		iea_op_store		# no
2660	bra.b		iea_op_exc_ovfl		# yes
2661
2662# an enabled exception occurred. we have to insert the exception type back into
2663# the machine.
2664iea_op_exc:
2665	subi.l		&24,%d0			# fix offset to be 0-8
2666	cmpi.b		%d0,&0x6		# is exception INEX?
2667	bne.b		iea_op_exc_force	# no
2668
2669# the enabled exception was inexact. so, if it occurs with an overflow
2670# or underflow that was disabled, then we have to force an overflow or
2671# underflow frame.
2672	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # did overflow occur?
2673	bne.b		iea_op_exc_ovfl		# yes
2674	btst		&unfl_bit,FPSR_EXCEPT(%a6) # did underflow occur?
2675	bne.b		iea_op_exc_unfl		# yes
2676
2677iea_op_exc_force:
2678	mov.w		(tbl_iea_except.b,%pc,%d0.w*2),2+FP_SRC(%a6)
2679	bra.b		iea_op_exit2		# exit with frestore
2680
2681tbl_iea_except:
2682	short		0xe002, 0xe006, 0xe004, 0xe005
2683	short		0xe003, 0xe002, 0xe001, 0xe001
2684
2685iea_op_exc_ovfl:
2686	mov.w		&0xe005,2+FP_SRC(%a6)
2687	bra.b		iea_op_exit2
2688
2689iea_op_exc_unfl:
2690	mov.w		&0xe003,2+FP_SRC(%a6)
2691
2692iea_op_exit2:
2693	mov.l		EXC_PC(%a6),USER_FPIAR(%a6) # set FPIAR to "Current PC"
2694	mov.l		EXC_EXTWPTR(%a6),EXC_PC(%a6) # set "Next PC" in exc frame
2695
2696	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
2697	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2698	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
2699
2700	frestore	FP_SRC(%a6)		# restore exceptional state
2701
2702	unlk		%a6			# unravel the frame
2703
2704	btst		&0x7,(%sp)		# is trace on?
2705	bne.b		iea_op_trace		# yes
2706
2707	bra.l		_fpsp_done		# exit to os
2708
2709#
2710# The opclass two instruction that took an "Unimplemented Effective Address"
2711# exception was being traced. Make the "current" PC the FPIAR and put it in
2712# the trace stack frame then jump to _real_trace().
2713#
2714#		 UNIMP EA FRAME		   TRACE FRAME
2715#		*****************	*****************
2716#		* 0x0 *  0x0f0	*	*    Current	*
2717#		*****************	*      PC	*
2718#		*    Current	*	*****************
2719#		*      PC	*	* 0x2 *  0x024	*
2720#		*****************	*****************
2721#		*      SR	*	*     Next	*
2722#		*****************	*      PC	*
2723#					*****************
2724#					*      SR	*
2725#					*****************
2726iea_op_trace:
2727	mov.l		(%sp),-(%sp)		# shift stack frame "down"
2728	mov.w		0x8(%sp),0x4(%sp)
2729	mov.w		&0x2024,0x6(%sp)	# stk fmt = 0x2; voff = 0x024
2730	fmov.l		%fpiar,0x8(%sp)		# "Current PC" is in FPIAR
2731
2732	bra.l		_real_trace
2733
2734#########################################################################
2735iea_fmovm:
2736	btst		&14,%d0			# ctrl or data reg
2737	beq.w		iea_fmovm_ctrl
2738
2739iea_fmovm_data:
2740
2741	btst		&0x5,EXC_SR(%a6)	# user or supervisor mode
2742	bne.b		iea_fmovm_data_s
2743
2744iea_fmovm_data_u:
2745	mov.l		%usp,%a0
2746	mov.l		%a0,EXC_A7(%a6)		# store current a7
2747	bsr.l		fmovm_dynamic		# do dynamic fmovm
2748	mov.l		EXC_A7(%a6),%a0		# load possibly new a7
2749	mov.l		%a0,%usp		# update usp
2750	bra.w		iea_fmovm_exit
2751
2752iea_fmovm_data_s:
2753	clr.b		SPCOND_FLG(%a6)
2754	lea		0x2+EXC_VOFF(%a6),%a0
2755	mov.l		%a0,EXC_A7(%a6)
2756	bsr.l		fmovm_dynamic		# do dynamic fmovm
2757
2758	cmpi.b		SPCOND_FLG(%a6),&mda7_flg
2759	beq.w		iea_fmovm_data_predec
2760	cmpi.b		SPCOND_FLG(%a6),&mia7_flg
2761	bne.w		iea_fmovm_exit
2762
2763# right now, d0 = the size.
2764# the data has been fetched from the supervisor stack, but we have not
2765# incremented the stack pointer by the appropriate number of bytes.
2766# do it here.
2767iea_fmovm_data_postinc:
2768	btst		&0x7,EXC_SR(%a6)
2769	bne.b		iea_fmovm_data_pi_trace
2770
2771	mov.w		EXC_SR(%a6),(EXC_SR,%a6,%d0)
2772	mov.l		EXC_EXTWPTR(%a6),(EXC_PC,%a6,%d0)
2773	mov.w		&0x00f0,(EXC_VOFF,%a6,%d0)
2774
2775	lea		(EXC_SR,%a6,%d0),%a0
2776	mov.l		%a0,EXC_SR(%a6)
2777
2778	fmovm.x		EXC_FP0(%a6),&0xc0	# restore fp0-fp1
2779	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2780	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
2781
2782	unlk		%a6
2783	mov.l		(%sp)+,%sp
2784	bra.l		_fpsp_done
2785
2786iea_fmovm_data_pi_trace:
2787	mov.w		EXC_SR(%a6),(EXC_SR-0x4,%a6,%d0)
2788	mov.l		EXC_EXTWPTR(%a6),(EXC_PC-0x4,%a6,%d0)
2789	mov.w		&0x2024,(EXC_VOFF-0x4,%a6,%d0)
2790	mov.l		EXC_PC(%a6),(EXC_VOFF+0x2-0x4,%a6,%d0)
2791
2792	lea		(EXC_SR-0x4,%a6,%d0),%a0
2793	mov.l		%a0,EXC_SR(%a6)
2794
2795	fmovm.x		EXC_FP0(%a6),&0xc0	# restore fp0-fp1
2796	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2797	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
2798
2799	unlk		%a6
2800	mov.l		(%sp)+,%sp
2801	bra.l		_real_trace
2802
2803# right now, d1 = size and d0 = the strg.
2804iea_fmovm_data_predec:
2805	mov.b		%d1,EXC_VOFF(%a6)	# store strg
2806	mov.b		%d0,0x1+EXC_VOFF(%a6)	# store size
2807
2808	fmovm.x		EXC_FP0(%a6),&0xc0	# restore fp0-fp1
2809	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2810	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
2811
2812	mov.l		(%a6),-(%sp)		# make a copy of a6
2813	mov.l		%d0,-(%sp)		# save d0
2814	mov.l		%d1,-(%sp)		# save d1
2815	mov.l		EXC_EXTWPTR(%a6),-(%sp)	# make a copy of Next PC
2816
2817	clr.l		%d0
2818	mov.b		0x1+EXC_VOFF(%a6),%d0	# fetch size
2819	neg.l		%d0			# get negative of size
2820
2821	btst		&0x7,EXC_SR(%a6)	# is trace enabled?
2822	beq.b		iea_fmovm_data_p2
2823
2824	mov.w		EXC_SR(%a6),(EXC_SR-0x4,%a6,%d0)
2825	mov.l		EXC_PC(%a6),(EXC_VOFF-0x2,%a6,%d0)
2826	mov.l		(%sp)+,(EXC_PC-0x4,%a6,%d0)
2827	mov.w		&0x2024,(EXC_VOFF-0x4,%a6,%d0)
2828
2829	pea		(%a6,%d0)		# create final sp
2830	bra.b		iea_fmovm_data_p3
2831
2832iea_fmovm_data_p2:
2833	mov.w		EXC_SR(%a6),(EXC_SR,%a6,%d0)
2834	mov.l		(%sp)+,(EXC_PC,%a6,%d0)
2835	mov.w		&0x00f0,(EXC_VOFF,%a6,%d0)
2836
2837	pea		(0x4,%a6,%d0)		# create final sp
2838
2839iea_fmovm_data_p3:
2840	clr.l		%d1
2841	mov.b		EXC_VOFF(%a6),%d1	# fetch strg
2842
2843	tst.b		%d1
2844	bpl.b		fm_1
2845	fmovm.x		&0x80,(0x4+0x8,%a6,%d0)
2846	addi.l		&0xc,%d0
2847fm_1:
2848	lsl.b		&0x1,%d1
2849	bpl.b		fm_2
2850	fmovm.x		&0x40,(0x4+0x8,%a6,%d0)
2851	addi.l		&0xc,%d0
2852fm_2:
2853	lsl.b		&0x1,%d1
2854	bpl.b		fm_3
2855	fmovm.x		&0x20,(0x4+0x8,%a6,%d0)
2856	addi.l		&0xc,%d0
2857fm_3:
2858	lsl.b		&0x1,%d1
2859	bpl.b		fm_4
2860	fmovm.x		&0x10,(0x4+0x8,%a6,%d0)
2861	addi.l		&0xc,%d0
2862fm_4:
2863	lsl.b		&0x1,%d1
2864	bpl.b		fm_5
2865	fmovm.x		&0x08,(0x4+0x8,%a6,%d0)
2866	addi.l		&0xc,%d0
2867fm_5:
2868	lsl.b		&0x1,%d1
2869	bpl.b		fm_6
2870	fmovm.x		&0x04,(0x4+0x8,%a6,%d0)
2871	addi.l		&0xc,%d0
2872fm_6:
2873	lsl.b		&0x1,%d1
2874	bpl.b		fm_7
2875	fmovm.x		&0x02,(0x4+0x8,%a6,%d0)
2876	addi.l		&0xc,%d0
2877fm_7:
2878	lsl.b		&0x1,%d1
2879	bpl.b		fm_end
2880	fmovm.x		&0x01,(0x4+0x8,%a6,%d0)
2881fm_end:
2882	mov.l		0x4(%sp),%d1
2883	mov.l		0x8(%sp),%d0
2884	mov.l		0xc(%sp),%a6
2885	mov.l		(%sp)+,%sp
2886
2887	btst		&0x7,(%sp)		# is trace enabled?
2888	beq.l		_fpsp_done
2889	bra.l		_real_trace
2890
2891#########################################################################
2892iea_fmovm_ctrl:
2893
2894	bsr.l		fmovm_ctrl		# load ctrl regs
2895
2896iea_fmovm_exit:
2897	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
2898	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2899	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
2900
2901	btst		&0x7,EXC_SR(%a6)	# is trace on?
2902	bne.b		iea_fmovm_trace		# yes
2903
2904	mov.l		EXC_EXTWPTR(%a6),EXC_PC(%a6) # set Next PC
2905
2906	unlk		%a6			# unravel the frame
2907
2908	bra.l		_fpsp_done		# exit to os
2909
2910#
2911# The control reg instruction that took an "Unimplemented Effective Address"
2912# exception was being traced. The "Current PC" for the trace frame is the
2913# PC stacked for Unimp EA. The "Next PC" is in EXC_EXTWPTR.
2914# After fixing the stack frame, jump to _real_trace().
2915#
2916#		 UNIMP EA FRAME		   TRACE FRAME
2917#		*****************	*****************
2918#		* 0x0 *  0x0f0	*	*    Current	*
2919#		*****************	*      PC	*
2920#		*    Current	*	*****************
2921#		*      PC	*	* 0x2 *  0x024	*
2922#		*****************	*****************
2923#		*      SR	*	*     Next	*
2924#		*****************	*      PC	*
2925#					*****************
2926#					*      SR	*
2927#					*****************
2928# this ain't a pretty solution, but it works:
2929# -restore a6 (not with unlk)
2930# -shift stack frame down over where old a6 used to be
2931# -add LOCAL_SIZE to stack pointer
2932iea_fmovm_trace:
2933	mov.l		(%a6),%a6		# restore frame pointer
2934	mov.w		EXC_SR+LOCAL_SIZE(%sp),0x0+LOCAL_SIZE(%sp)
2935	mov.l		EXC_PC+LOCAL_SIZE(%sp),0x8+LOCAL_SIZE(%sp)
2936	mov.l		EXC_EXTWPTR+LOCAL_SIZE(%sp),0x2+LOCAL_SIZE(%sp)
2937	mov.w		&0x2024,0x6+LOCAL_SIZE(%sp) # stk fmt = 0x2; voff = 0x024
2938	add.l		&LOCAL_SIZE,%sp		# clear stack frame
2939
2940	bra.l		_real_trace
2941
2942#########################################################################
2943# The FPU is disabled and so we should really have taken the "Line
2944# F Emulator" exception. So, here we create an 8-word stack frame
2945# from our 4-word stack frame. This means we must calculate the length
2946# the faulting instruction to get the "next PC". This is trivial for
2947# immediate operands but requires some extra work for fmovm dynamic
2948# which can use most addressing modes.
2949iea_disabled:
2950	mov.l		(%sp)+,%d0		# restore d0
2951
2952	link		%a6,&-LOCAL_SIZE	# init stack frame
2953
2954	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
2955
2956# PC of instruction that took the exception is the PC in the frame
2957	mov.l		EXC_PC(%a6),EXC_EXTWPTR(%a6)
2958	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
2959	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
2960	bsr.l		_imem_read_long		# fetch the instruction words
2961	mov.l		%d0,EXC_OPWORD(%a6)	# store OPWORD and EXTWORD
2962
2963	tst.w		%d0			# is instr fmovm?
2964	bmi.b		iea_dis_fmovm		# yes
2965# instruction is using an extended precision immediate operand. Therefore,
2966# the total instruction length is 16 bytes.
2967iea_dis_immed:
2968	mov.l		&0x10,%d0		# 16 bytes of instruction
2969	bra.b		iea_dis_cont
2970iea_dis_fmovm:
2971	btst		&0xe,%d0		# is instr fmovm ctrl
2972	bne.b		iea_dis_fmovm_data	# no
2973# the instruction is a fmovm.l with 2 or 3 registers.
2974	bfextu		%d0{&19:&3},%d1
2975	mov.l		&0xc,%d0
2976	cmpi.b		%d1,&0x7		# move all regs?
2977	bne.b		iea_dis_cont
2978	addq.l		&0x4,%d0
2979	bra.b		iea_dis_cont
2980# the instruction is an fmovm.x dynamic which can use many addressing
2981# modes and thus can have several different total instruction lengths.
2982# call fmovm_calc_ea which will go through the ea calc process and,
2983# as a by-product, will tell us how long the instruction is.
2984iea_dis_fmovm_data:
2985	clr.l		%d0
2986	bsr.l		fmovm_calc_ea
2987	mov.l		EXC_EXTWPTR(%a6),%d0
2988	sub.l		EXC_PC(%a6),%d0
2989iea_dis_cont:
2990	mov.w		%d0,EXC_VOFF(%a6)	# store stack shift value
2991
2992	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
2993
2994	unlk		%a6
2995
2996# here, we actually create the 8-word frame from the 4-word frame,
2997# with the "next PC" as additional info.
2998# the <ea> field is let as undefined.
2999	subq.l		&0x8,%sp		# make room for new stack
3000	mov.l		%d0,-(%sp)		# save d0
3001	mov.w		0xc(%sp),0x4(%sp)	# move SR
3002	mov.l		0xe(%sp),0x6(%sp)	# move Current PC
3003	clr.l		%d0
3004	mov.w		0x12(%sp),%d0
3005	mov.l		0x6(%sp),0x10(%sp)	# move Current PC
3006	add.l		%d0,0x6(%sp)		# make Next PC
3007	mov.w		&0x402c,0xa(%sp)	# insert offset,frame format
3008	mov.l		(%sp)+,%d0		# restore d0
3009
3010	bra.l		_real_fpu_disabled
3011
3012##########
3013
3014iea_iacc:
3015	movc		%pcr,%d0
3016	btst		&0x1,%d0
3017	bne.b		iea_iacc_cont
3018	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3019	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1 on stack
3020iea_iacc_cont:
3021	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
3022
3023	unlk		%a6
3024
3025	subq.w		&0x8,%sp		# make stack frame bigger
3026	mov.l		0x8(%sp),(%sp)		# store SR,hi(PC)
3027	mov.w		0xc(%sp),0x4(%sp)	# store lo(PC)
3028	mov.w		&0x4008,0x6(%sp)	# store voff
3029	mov.l		0x2(%sp),0x8(%sp)	# store ea
3030	mov.l		&0x09428001,0xc(%sp)	# store fslw
3031
3032iea_acc_done:
3033	btst		&0x5,(%sp)		# user or supervisor mode?
3034	beq.b		iea_acc_done2		# user
3035	bset		&0x2,0xd(%sp)		# set supervisor TM bit
3036
3037iea_acc_done2:
3038	bra.l		_real_access
3039
3040iea_dacc:
3041	lea		-LOCAL_SIZE(%a6),%sp
3042
3043	movc		%pcr,%d1
3044	btst		&0x1,%d1
3045	bne.b		iea_dacc_cont
3046	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1 on stack
3047	fmovm.l		LOCAL_SIZE+USER_FPCR(%sp),%fpcr,%fpsr,%fpiar # restore ctrl regs
3048iea_dacc_cont:
3049	mov.l		(%a6),%a6
3050
3051	mov.l		0x4+LOCAL_SIZE(%sp),-0x8+0x4+LOCAL_SIZE(%sp)
3052	mov.w		0x8+LOCAL_SIZE(%sp),-0x8+0x8+LOCAL_SIZE(%sp)
3053	mov.w		&0x4008,-0x8+0xa+LOCAL_SIZE(%sp)
3054	mov.l		%a0,-0x8+0xc+LOCAL_SIZE(%sp)
3055	mov.w		%d0,-0x8+0x10+LOCAL_SIZE(%sp)
3056	mov.w		&0x0001,-0x8+0x12+LOCAL_SIZE(%sp)
3057
3058	movm.l		LOCAL_SIZE+EXC_DREGS(%sp),&0x0303 # restore d0-d1/a0-a1
3059	add.w		&LOCAL_SIZE-0x4,%sp
3060
3061	bra.b		iea_acc_done
3062
3063#########################################################################
3064# XDEF ****************************************************************	#
3065#	_fpsp_operr(): 060FPSP entry point for FP Operr exception.	#
3066#									#
3067#	This handler should be the first code executed upon taking the	#
3068#	FP Operand Error exception in an operating system.		#
3069#									#
3070# XREF ****************************************************************	#
3071#	_imem_read_long() - read instruction longword			#
3072#	fix_skewed_ops() - adjust src operand in fsave frame		#
3073#	_real_operr() - "callout" to operating system operr handler	#
3074#	_dmem_write_{byte,word,long}() - store data to mem (opclass 3)	#
3075#	store_dreg_{b,w,l}() - store data to data regfile (opclass 3)	#
3076#	facc_out_{b,w,l}() - store to memory took access error (opcl 3)	#
3077#									#
3078# INPUT ***************************************************************	#
3079#	- The system stack contains the FP Operr exception frame	#
3080#	- The fsave frame contains the source operand			#
3081#									#
3082# OUTPUT **************************************************************	#
3083#	No access error:						#
3084#	- The system stack is unchanged					#
3085#	- The fsave frame contains the adjusted src op for opclass 0,2	#
3086#									#
3087# ALGORITHM ***********************************************************	#
3088#	In a system where the FP Operr exception is enabled, the goal	#
3089# is to get to the handler specified at _real_operr(). But, on the 060,	#
3090# for opclass zero and two instruction taking this exception, the	#
3091# input operand in the fsave frame may be incorrect for some cases	#
3092# and needs to be corrected. This handler calls fix_skewed_ops() to	#
3093# do just this and then exits through _real_operr().			#
3094#	For opclass 3 instructions, the 060 doesn't store the default	#
3095# operr result out to memory or data register file as it should.	#
3096# This code must emulate the move out before finally exiting through	#
3097# _real_inex(). The move out, if to memory, is performed using		#
3098# _mem_write() "callout" routines that may return a failing result.	#
3099# In this special case, the handler must exit through facc_out()	#
3100# which creates an access error stack frame from the current operr	#
3101# stack frame.								#
3102#									#
3103#########################################################################
3104
3105	global		_fpsp_operr
3106_fpsp_operr:
3107
3108	link.w		%a6,&-LOCAL_SIZE	# init stack frame
3109
3110	fsave		FP_SRC(%a6)		# grab the "busy" frame
3111
3112	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
3113	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
3114	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
3115
3116# the FPIAR holds the "current PC" of the faulting instruction
3117	mov.l		USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
3118
3119	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
3120	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
3121	bsr.l		_imem_read_long		# fetch the instruction words
3122	mov.l		%d0,EXC_OPWORD(%a6)
3123
3124##############################################################################
3125
3126	btst		&13,%d0			# is instr an fmove out?
3127	bne.b		foperr_out		# fmove out
3128
3129
3130# here, we simply see if the operand in the fsave frame needs to be "unskewed".
3131# this would be the case for opclass two operations with a source infinity or
3132# denorm operand in the sgl or dbl format. NANs also become skewed, but can't
3133# cause an operr so we don't need to check for them here.
3134	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
3135	bsr.l		fix_skewed_ops		# fix src op
3136
3137foperr_exit:
3138	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
3139	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3140	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
3141
3142	frestore	FP_SRC(%a6)
3143
3144	unlk		%a6
3145	bra.l		_real_operr
3146
3147########################################################################
3148
3149#
3150# the hardware does not save the default result to memory on enabled
3151# operand error exceptions. we do this here before passing control to
3152# the user operand error handler.
3153#
3154# byte, word, and long destination format operations can pass
3155# through here. we simply need to test the sign of the src
3156# operand and save the appropriate minimum or maximum integer value
3157# to the effective address as pointed to by the stacked effective address.
3158#
3159# although packed opclass three operations can take operand error
3160# exceptions, they won't pass through here since they are caught
3161# first by the unsupported data format exception handler. that handler
3162# sends them directly to _real_operr() if necessary.
3163#
3164foperr_out:
3165
3166	mov.w		FP_SRC_EX(%a6),%d1	# fetch exponent
3167	andi.w		&0x7fff,%d1
3168	cmpi.w		%d1,&0x7fff
3169	bne.b		foperr_out_not_qnan
3170# the operand is either an infinity or a QNAN.
3171	tst.l		FP_SRC_LO(%a6)
3172	bne.b		foperr_out_qnan
3173	mov.l		FP_SRC_HI(%a6),%d1
3174	andi.l		&0x7fffffff,%d1
3175	beq.b		foperr_out_not_qnan
3176foperr_out_qnan:
3177	mov.l		FP_SRC_HI(%a6),L_SCR1(%a6)
3178	bra.b		foperr_out_jmp
3179
3180foperr_out_not_qnan:
3181	mov.l		&0x7fffffff,%d1
3182	tst.b		FP_SRC_EX(%a6)
3183	bpl.b		foperr_out_not_qnan2
3184	addq.l		&0x1,%d1
3185foperr_out_not_qnan2:
3186	mov.l		%d1,L_SCR1(%a6)
3187
3188foperr_out_jmp:
3189	bfextu		%d0{&19:&3},%d0		# extract dst format field
3190	mov.b		1+EXC_OPWORD(%a6),%d1	# extract <ea> mode,reg
3191	mov.w		(tbl_operr.b,%pc,%d0.w*2),%a0
3192	jmp		(tbl_operr.b,%pc,%a0)
3193
3194tbl_operr:
3195	short		foperr_out_l - tbl_operr # long word integer
3196	short		tbl_operr    - tbl_operr # sgl prec shouldn't happen
3197	short		tbl_operr    - tbl_operr # ext prec shouldn't happen
3198	short		foperr_exit  - tbl_operr # packed won't enter here
3199	short		foperr_out_w - tbl_operr # word integer
3200	short		tbl_operr    - tbl_operr # dbl prec shouldn't happen
3201	short		foperr_out_b - tbl_operr # byte integer
3202	short		tbl_operr    - tbl_operr # packed won't enter here
3203
3204foperr_out_b:
3205	mov.b		L_SCR1(%a6),%d0		# load positive default result
3206	cmpi.b		%d1,&0x7		# is <ea> mode a data reg?
3207	ble.b		foperr_out_b_save_dn	# yes
3208	mov.l		EXC_EA(%a6),%a0		# pass: <ea> of default result
3209	bsr.l		_dmem_write_byte	# write the default result
3210
3211	tst.l		%d1			# did dstore fail?
3212	bne.l		facc_out_b		# yes
3213
3214	bra.w		foperr_exit
3215foperr_out_b_save_dn:
3216	andi.w		&0x0007,%d1
3217	bsr.l		store_dreg_b		# store result to regfile
3218	bra.w		foperr_exit
3219
3220foperr_out_w:
3221	mov.w		L_SCR1(%a6),%d0		# load positive default result
3222	cmpi.b		%d1,&0x7		# is <ea> mode a data reg?
3223	ble.b		foperr_out_w_save_dn	# yes
3224	mov.l		EXC_EA(%a6),%a0		# pass: <ea> of default result
3225	bsr.l		_dmem_write_word	# write the default result
3226
3227	tst.l		%d1			# did dstore fail?
3228	bne.l		facc_out_w		# yes
3229
3230	bra.w		foperr_exit
3231foperr_out_w_save_dn:
3232	andi.w		&0x0007,%d1
3233	bsr.l		store_dreg_w		# store result to regfile
3234	bra.w		foperr_exit
3235
3236foperr_out_l:
3237	mov.l		L_SCR1(%a6),%d0		# load positive default result
3238	cmpi.b		%d1,&0x7		# is <ea> mode a data reg?
3239	ble.b		foperr_out_l_save_dn	# yes
3240	mov.l		EXC_EA(%a6),%a0		# pass: <ea> of default result
3241	bsr.l		_dmem_write_long	# write the default result
3242
3243	tst.l		%d1			# did dstore fail?
3244	bne.l		facc_out_l		# yes
3245
3246	bra.w		foperr_exit
3247foperr_out_l_save_dn:
3248	andi.w		&0x0007,%d1
3249	bsr.l		store_dreg_l		# store result to regfile
3250	bra.w		foperr_exit
3251
3252#########################################################################
3253# XDEF ****************************************************************	#
3254#	_fpsp_snan(): 060FPSP entry point for FP SNAN exception.	#
3255#									#
3256#	This handler should be the first code executed upon taking the	#
3257#	FP Signalling NAN exception in an operating system.		#
3258#									#
3259# XREF ****************************************************************	#
3260#	_imem_read_long() - read instruction longword			#
3261#	fix_skewed_ops() - adjust src operand in fsave frame		#
3262#	_real_snan() - "callout" to operating system SNAN handler	#
3263#	_dmem_write_{byte,word,long}() - store data to mem (opclass 3)	#
3264#	store_dreg_{b,w,l}() - store data to data regfile (opclass 3)	#
3265#	facc_out_{b,w,l,d,x}() - store to mem took acc error (opcl 3)	#
3266#	_calc_ea_fout() - fix An if <ea> is -() or ()+; also get <ea>	#
3267#									#
3268# INPUT ***************************************************************	#
3269#	- The system stack contains the FP SNAN exception frame		#
3270#	- The fsave frame contains the source operand			#
3271#									#
3272# OUTPUT **************************************************************	#
3273#	No access error:						#
3274#	- The system stack is unchanged					#
3275#	- The fsave frame contains the adjusted src op for opclass 0,2	#
3276#									#
3277# ALGORITHM ***********************************************************	#
3278#	In a system where the FP SNAN exception is enabled, the goal	#
3279# is to get to the handler specified at _real_snan(). But, on the 060,	#
3280# for opclass zero and two instructions taking this exception, the	#
3281# input operand in the fsave frame may be incorrect for some cases	#
3282# and needs to be corrected. This handler calls fix_skewed_ops() to	#
3283# do just this and then exits through _real_snan().			#
3284#	For opclass 3 instructions, the 060 doesn't store the default	#
3285# SNAN result out to memory or data register file as it should.		#
3286# This code must emulate the move out before finally exiting through	#
3287# _real_snan(). The move out, if to memory, is performed using		#
3288# _mem_write() "callout" routines that may return a failing result.	#
3289# In this special case, the handler must exit through facc_out()	#
3290# which creates an access error stack frame from the current SNAN	#
3291# stack frame.								#
3292#	For the case of an extended precision opclass 3 instruction,	#
3293# if the effective addressing mode was -() or ()+, then the address	#
3294# register must get updated by calling _calc_ea_fout(). If the <ea>	#
3295# was -(a7) from supervisor mode, then the exception frame currently	#
3296# on the system stack must be carefully moved "down" to make room	#
3297# for the operand being moved.						#
3298#									#
3299#########################################################################
3300
3301	global		_fpsp_snan
3302_fpsp_snan:
3303
3304	link.w		%a6,&-LOCAL_SIZE	# init stack frame
3305
3306	fsave		FP_SRC(%a6)		# grab the "busy" frame
3307
3308	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
3309	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
3310	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
3311
3312# the FPIAR holds the "current PC" of the faulting instruction
3313	mov.l		USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
3314
3315	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
3316	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
3317	bsr.l		_imem_read_long		# fetch the instruction words
3318	mov.l		%d0,EXC_OPWORD(%a6)
3319
3320##############################################################################
3321
3322	btst		&13,%d0			# is instr an fmove out?
3323	bne.w		fsnan_out		# fmove out
3324
3325
3326# here, we simply see if the operand in the fsave frame needs to be "unskewed".
3327# this would be the case for opclass two operations with a source infinity or
3328# denorm operand in the sgl or dbl format. NANs also become skewed and must be
3329# fixed here.
3330	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
3331	bsr.l		fix_skewed_ops		# fix src op
3332
3333fsnan_exit:
3334	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
3335	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3336	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
3337
3338	frestore	FP_SRC(%a6)
3339
3340	unlk		%a6
3341	bra.l		_real_snan
3342
3343########################################################################
3344
3345#
3346# the hardware does not save the default result to memory on enabled
3347# snan exceptions. we do this here before passing control to
3348# the user snan handler.
3349#
3350# byte, word, long, and packed destination format operations can pass
3351# through here. since packed format operations already were handled by
3352# fpsp_unsupp(), then we need to do nothing else for them here.
3353# for byte, word, and long, we simply need to test the sign of the src
3354# operand and save the appropriate minimum or maximum integer value
3355# to the effective address as pointed to by the stacked effective address.
3356#
3357fsnan_out:
3358
3359	bfextu		%d0{&19:&3},%d0		# extract dst format field
3360	mov.b		1+EXC_OPWORD(%a6),%d1	# extract <ea> mode,reg
3361	mov.w		(tbl_snan.b,%pc,%d0.w*2),%a0
3362	jmp		(tbl_snan.b,%pc,%a0)
3363
3364tbl_snan:
3365	short		fsnan_out_l - tbl_snan # long word integer
3366	short		fsnan_out_s - tbl_snan # sgl prec shouldn't happen
3367	short		fsnan_out_x - tbl_snan # ext prec shouldn't happen
3368	short		tbl_snan    - tbl_snan # packed needs no help
3369	short		fsnan_out_w - tbl_snan # word integer
3370	short		fsnan_out_d - tbl_snan # dbl prec shouldn't happen
3371	short		fsnan_out_b - tbl_snan # byte integer
3372	short		tbl_snan    - tbl_snan # packed needs no help
3373
3374fsnan_out_b:
3375	mov.b		FP_SRC_HI(%a6),%d0	# load upper byte of SNAN
3376	bset		&6,%d0			# set SNAN bit
3377	cmpi.b		%d1,&0x7		# is <ea> mode a data reg?
3378	ble.b		fsnan_out_b_dn		# yes
3379	mov.l		EXC_EA(%a6),%a0		# pass: <ea> of default result
3380	bsr.l		_dmem_write_byte	# write the default result
3381
3382	tst.l		%d1			# did dstore fail?
3383	bne.l		facc_out_b		# yes
3384
3385	bra.w		fsnan_exit
3386fsnan_out_b_dn:
3387	andi.w		&0x0007,%d1
3388	bsr.l		store_dreg_b		# store result to regfile
3389	bra.w		fsnan_exit
3390
3391fsnan_out_w:
3392	mov.w		FP_SRC_HI(%a6),%d0	# load upper word of SNAN
3393	bset		&14,%d0			# set SNAN bit
3394	cmpi.b		%d1,&0x7		# is <ea> mode a data reg?
3395	ble.b		fsnan_out_w_dn		# yes
3396	mov.l		EXC_EA(%a6),%a0		# pass: <ea> of default result
3397	bsr.l		_dmem_write_word	# write the default result
3398
3399	tst.l		%d1			# did dstore fail?
3400	bne.l		facc_out_w		# yes
3401
3402	bra.w		fsnan_exit
3403fsnan_out_w_dn:
3404	andi.w		&0x0007,%d1
3405	bsr.l		store_dreg_w		# store result to regfile
3406	bra.w		fsnan_exit
3407
3408fsnan_out_l:
3409	mov.l		FP_SRC_HI(%a6),%d0	# load upper longword of SNAN
3410	bset		&30,%d0			# set SNAN bit
3411	cmpi.b		%d1,&0x7		# is <ea> mode a data reg?
3412	ble.b		fsnan_out_l_dn		# yes
3413	mov.l		EXC_EA(%a6),%a0		# pass: <ea> of default result
3414	bsr.l		_dmem_write_long	# write the default result
3415
3416	tst.l		%d1			# did dstore fail?
3417	bne.l		facc_out_l		# yes
3418
3419	bra.w		fsnan_exit
3420fsnan_out_l_dn:
3421	andi.w		&0x0007,%d1
3422	bsr.l		store_dreg_l		# store result to regfile
3423	bra.w		fsnan_exit
3424
3425fsnan_out_s:
3426	cmpi.b		%d1,&0x7		# is <ea> mode a data reg?
3427	ble.b		fsnan_out_d_dn		# yes
3428	mov.l		FP_SRC_EX(%a6),%d0	# fetch SNAN sign
3429	andi.l		&0x80000000,%d0		# keep sign
3430	ori.l		&0x7fc00000,%d0		# insert new exponent,SNAN bit
3431	mov.l		FP_SRC_HI(%a6),%d1	# load mantissa
3432	lsr.l		&0x8,%d1		# shift mantissa for sgl
3433	or.l		%d1,%d0			# create sgl SNAN
3434	mov.l		EXC_EA(%a6),%a0		# pass: <ea> of default result
3435	bsr.l		_dmem_write_long	# write the default result
3436
3437	tst.l		%d1			# did dstore fail?
3438	bne.l		facc_out_l		# yes
3439
3440	bra.w		fsnan_exit
3441fsnan_out_d_dn:
3442	mov.l		FP_SRC_EX(%a6),%d0	# fetch SNAN sign
3443	andi.l		&0x80000000,%d0		# keep sign
3444	ori.l		&0x7fc00000,%d0		# insert new exponent,SNAN bit
3445	mov.l		%d1,-(%sp)
3446	mov.l		FP_SRC_HI(%a6),%d1	# load mantissa
3447	lsr.l		&0x8,%d1		# shift mantissa for sgl
3448	or.l		%d1,%d0			# create sgl SNAN
3449	mov.l		(%sp)+,%d1
3450	andi.w		&0x0007,%d1
3451	bsr.l		store_dreg_l		# store result to regfile
3452	bra.w		fsnan_exit
3453
3454fsnan_out_d:
3455	mov.l		FP_SRC_EX(%a6),%d0	# fetch SNAN sign
3456	andi.l		&0x80000000,%d0		# keep sign
3457	ori.l		&0x7ff80000,%d0		# insert new exponent,SNAN bit
3458	mov.l		FP_SRC_HI(%a6),%d1	# load hi mantissa
3459	mov.l		%d0,FP_SCR0_EX(%a6)	# store to temp space
3460	mov.l		&11,%d0			# load shift amt
3461	lsr.l		%d0,%d1
3462	or.l		%d1,FP_SCR0_EX(%a6)	# create dbl hi
3463	mov.l		FP_SRC_HI(%a6),%d1	# load hi mantissa
3464	andi.l		&0x000007ff,%d1
3465	ror.l		%d0,%d1
3466	mov.l		%d1,FP_SCR0_HI(%a6)	# store to temp space
3467	mov.l		FP_SRC_LO(%a6),%d1	# load lo mantissa
3468	lsr.l		%d0,%d1
3469	or.l		%d1,FP_SCR0_HI(%a6)	# create dbl lo
3470	lea		FP_SCR0(%a6),%a0	# pass: ptr to operand
3471	mov.l		EXC_EA(%a6),%a1		# pass: dst addr
3472	movq.l		&0x8,%d0		# pass: size of 8 bytes
3473	bsr.l		_dmem_write		# write the default result
3474
3475	tst.l		%d1			# did dstore fail?
3476	bne.l		facc_out_d		# yes
3477
3478	bra.w		fsnan_exit
3479
3480# for extended precision, if the addressing mode is pre-decrement or
3481# post-increment, then the address register did not get updated.
3482# in addition, for pre-decrement, the stacked <ea> is incorrect.
3483fsnan_out_x:
3484	clr.b		SPCOND_FLG(%a6)		# clear special case flag
3485
3486	mov.w		FP_SRC_EX(%a6),FP_SCR0_EX(%a6)
3487	clr.w		2+FP_SCR0(%a6)
3488	mov.l		FP_SRC_HI(%a6),%d0
3489	bset		&30,%d0
3490	mov.l		%d0,FP_SCR0_HI(%a6)
3491	mov.l		FP_SRC_LO(%a6),FP_SCR0_LO(%a6)
3492
3493	btst		&0x5,EXC_SR(%a6)	# supervisor mode exception?
3494	bne.b		fsnan_out_x_s		# yes
3495
3496	mov.l		%usp,%a0		# fetch user stack pointer
3497	mov.l		%a0,EXC_A7(%a6)		# save on stack for calc_ea()
3498	mov.l		(%a6),EXC_A6(%a6)
3499
3500	bsr.l		_calc_ea_fout		# find the correct ea,update An
3501	mov.l		%a0,%a1
3502	mov.l		%a0,EXC_EA(%a6)		# stack correct <ea>
3503
3504	mov.l		EXC_A7(%a6),%a0
3505	mov.l		%a0,%usp		# restore user stack pointer
3506	mov.l		EXC_A6(%a6),(%a6)
3507
3508fsnan_out_x_save:
3509	lea		FP_SCR0(%a6),%a0	# pass: ptr to operand
3510	movq.l		&0xc,%d0		# pass: size of extended
3511	bsr.l		_dmem_write		# write the default result
3512
3513	tst.l		%d1			# did dstore fail?
3514	bne.l		facc_out_x		# yes
3515
3516	bra.w		fsnan_exit
3517
3518fsnan_out_x_s:
3519	mov.l		(%a6),EXC_A6(%a6)
3520
3521	bsr.l		_calc_ea_fout		# find the correct ea,update An
3522	mov.l		%a0,%a1
3523	mov.l		%a0,EXC_EA(%a6)		# stack correct <ea>
3524
3525	mov.l		EXC_A6(%a6),(%a6)
3526
3527	cmpi.b		SPCOND_FLG(%a6),&mda7_flg # is <ea> mode -(a7)?
3528	bne.b		fsnan_out_x_save	# no
3529
3530# the operation was "fmove.x SNAN,-(a7)" from supervisor mode.
3531	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
3532	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3533	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
3534
3535	frestore	FP_SRC(%a6)
3536
3537	mov.l		EXC_A6(%a6),%a6		# restore frame pointer
3538
3539	mov.l		LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
3540	mov.l		LOCAL_SIZE+EXC_PC+0x2(%sp),LOCAL_SIZE+EXC_PC+0x2-0xc(%sp)
3541	mov.l		LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
3542
3543	mov.l		LOCAL_SIZE+FP_SCR0_EX(%sp),LOCAL_SIZE+EXC_SR(%sp)
3544	mov.l		LOCAL_SIZE+FP_SCR0_HI(%sp),LOCAL_SIZE+EXC_PC+0x2(%sp)
3545	mov.l		LOCAL_SIZE+FP_SCR0_LO(%sp),LOCAL_SIZE+EXC_EA(%sp)
3546
3547	add.l		&LOCAL_SIZE-0x8,%sp
3548
3549	bra.l		_real_snan
3550
3551#########################################################################
3552# XDEF ****************************************************************	#
3553#	_fpsp_inex(): 060FPSP entry point for FP Inexact exception.	#
3554#									#
3555#	This handler should be the first code executed upon taking the	#
3556#	FP Inexact exception in an operating system.			#
3557#									#
3558# XREF ****************************************************************	#
3559#	_imem_read_long() - read instruction longword			#
3560#	fix_skewed_ops() - adjust src operand in fsave frame		#
3561#	set_tag_x() - determine optype of src/dst operands		#
3562#	store_fpreg() - store opclass 0 or 2 result to FP regfile	#
3563#	unnorm_fix() - change UNNORM operands to NORM or ZERO		#
3564#	load_fpn2() - load dst operand from FP regfile			#
3565#	smovcr() - emulate an "fmovcr" instruction			#
3566#	fout() - emulate an opclass 3 instruction			#
3567#	tbl_unsupp - add of table of emulation routines for opclass 0,2	#
3568#	_real_inex() - "callout" to operating system inexact handler	#
3569#									#
3570# INPUT ***************************************************************	#
3571#	- The system stack contains the FP Inexact exception frame	#
3572#	- The fsave frame contains the source operand			#
3573#									#
3574# OUTPUT **************************************************************	#
3575#	- The system stack is unchanged					#
3576#	- The fsave frame contains the adjusted src op for opclass 0,2	#
3577#									#
3578# ALGORITHM ***********************************************************	#
3579#	In a system where the FP Inexact exception is enabled, the goal	#
3580# is to get to the handler specified at _real_inex(). But, on the 060,	#
3581# for opclass zero and two instruction taking this exception, the	#
3582# hardware doesn't store the correct result to the destination FP	#
3583# register as did the '040 and '881/2. This handler must emulate the	#
3584# instruction in order to get this value and then store it to the	#
3585# correct register before calling _real_inex().				#
3586#	For opclass 3 instructions, the 060 doesn't store the default	#
3587# inexact result out to memory or data register file as it should.	#
3588# This code must emulate the move out by calling fout() before finally	#
3589# exiting through _real_inex().						#
3590#									#
3591#########################################################################
3592
3593	global		_fpsp_inex
3594_fpsp_inex:
3595
3596	link.w		%a6,&-LOCAL_SIZE	# init stack frame
3597
3598	fsave		FP_SRC(%a6)		# grab the "busy" frame
3599
3600	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
3601	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
3602	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
3603
3604# the FPIAR holds the "current PC" of the faulting instruction
3605	mov.l		USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
3606
3607	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
3608	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
3609	bsr.l		_imem_read_long		# fetch the instruction words
3610	mov.l		%d0,EXC_OPWORD(%a6)
3611
3612##############################################################################
3613
3614	btst		&13,%d0			# is instr an fmove out?
3615	bne.w		finex_out		# fmove out
3616
3617
3618# the hardware, for "fabs" and "fneg" w/ a long source format, puts the
3619# longword integer directly into the upper longword of the mantissa along
3620# w/ an exponent value of 0x401e. we convert this to extended precision here.
3621	bfextu		%d0{&19:&3},%d0		# fetch instr size
3622	bne.b		finex_cont		# instr size is not long
3623	cmpi.w		FP_SRC_EX(%a6),&0x401e	# is exponent 0x401e?
3624	bne.b		finex_cont		# no
3625	fmov.l		&0x0,%fpcr
3626	fmov.l		FP_SRC_HI(%a6),%fp0	# load integer src
3627	fmov.x		%fp0,FP_SRC(%a6)	# store integer as extended precision
3628	mov.w		&0xe001,0x2+FP_SRC(%a6)
3629
3630finex_cont:
3631	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
3632	bsr.l		fix_skewed_ops		# fix src op
3633
3634# Here, we zero the ccode and exception byte field since we're going to
3635# emulate the whole instruction. Notice, though, that we don't kill the
3636# INEX1 bit. This is because a packed op has long since been converted
3637# to extended before arriving here. Therefore, we need to retain the
3638# INEX1 bit from when the operand was first converted.
3639	andi.l		&0x00ff01ff,USER_FPSR(%a6) # zero all but accured field
3640
3641	fmov.l		&0x0,%fpcr		# zero current control regs
3642	fmov.l		&0x0,%fpsr
3643
3644	bfextu		EXC_EXTWORD(%a6){&0:&6},%d1 # extract upper 6 of cmdreg
3645	cmpi.b		%d1,&0x17		# is op an fmovecr?
3646	beq.w		finex_fmovcr		# yes
3647
3648	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
3649	bsr.l		set_tag_x		# tag the operand type
3650	mov.b		%d0,STAG(%a6)		# maybe NORM,DENORM
3651
3652# bits four and five of the fp extension word separate the monadic and dyadic
3653# operations that can pass through fpsp_inex(). remember that fcmp and ftst
3654# will never take this exception, but fsincos will.
3655	btst		&0x5,1+EXC_CMDREG(%a6)	# is operation monadic or dyadic?
3656	beq.b		finex_extract		# monadic
3657
3658	btst		&0x4,1+EXC_CMDREG(%a6)	# is operation an fsincos?
3659	bne.b		finex_extract		# yes
3660
3661	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
3662	bsr.l		load_fpn2		# load dst into FP_DST
3663
3664	lea		FP_DST(%a6),%a0		# pass: ptr to dst op
3665	bsr.l		set_tag_x		# tag the operand type
3666	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
3667	bne.b		finex_op2_done		# no
3668	bsr.l		unnorm_fix		# yes; convert to NORM,DENORM,or ZERO
3669finex_op2_done:
3670	mov.b		%d0,DTAG(%a6)		# save dst optype tag
3671
3672finex_extract:
3673	clr.l		%d0
3674	mov.b		FPCR_MODE(%a6),%d0	# pass rnd prec/mode
3675
3676	mov.b		1+EXC_CMDREG(%a6),%d1
3677	andi.w		&0x007f,%d1		# extract extension
3678
3679	lea		FP_SRC(%a6),%a0
3680	lea		FP_DST(%a6),%a1
3681
3682	mov.l		(tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
3683	jsr		(tbl_unsupp.l,%pc,%d1.l*1)
3684
3685# the operation has been emulated. the result is in fp0.
3686finex_save:
3687	bfextu		EXC_CMDREG(%a6){&6:&3},%d0
3688	bsr.l		store_fpreg
3689
3690finex_exit:
3691	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
3692	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3693	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
3694
3695	frestore	FP_SRC(%a6)
3696
3697	unlk		%a6
3698	bra.l		_real_inex
3699
3700finex_fmovcr:
3701	clr.l		%d0
3702	mov.b		FPCR_MODE(%a6),%d0	# pass rnd prec,mode
3703	mov.b		1+EXC_CMDREG(%a6),%d1
3704	andi.l		&0x0000007f,%d1		# pass rom offset
3705	bsr.l		smovcr
3706	bra.b		finex_save
3707
3708########################################################################
3709
3710#
3711# the hardware does not save the default result to memory on enabled
3712# inexact exceptions. we do this here before passing control to
3713# the user inexact handler.
3714#
3715# byte, word, and long destination format operations can pass
3716# through here. so can double and single precision.
3717# although packed opclass three operations can take inexact
3718# exceptions, they won't pass through here since they are caught
3719# first by the unsupported data format exception handler. that handler
3720# sends them directly to _real_inex() if necessary.
3721#
3722finex_out:
3723
3724	mov.b		&NORM,STAG(%a6)		# src is a NORM
3725
3726	clr.l		%d0
3727	mov.b		FPCR_MODE(%a6),%d0	# pass rnd prec,mode
3728
3729	andi.l		&0xffff00ff,USER_FPSR(%a6) # zero exception field
3730
3731	lea		FP_SRC(%a6),%a0		# pass ptr to src operand
3732
3733	bsr.l		fout			# store the default result
3734
3735	bra.b		finex_exit
3736
3737#########################################################################
3738# XDEF ****************************************************************	#
3739#	_fpsp_dz(): 060FPSP entry point for FP DZ exception.		#
3740#									#
3741#	This handler should be the first code executed upon taking	#
3742#	the FP DZ exception in an operating system.			#
3743#									#
3744# XREF ****************************************************************	#
3745#	_imem_read_long() - read instruction longword from memory	#
3746#	fix_skewed_ops() - adjust fsave operand				#
3747#	_real_dz() - "callout" exit point from FP DZ handler		#
3748#									#
3749# INPUT ***************************************************************	#
3750#	- The system stack contains the FP DZ exception stack.		#
3751#	- The fsave frame contains the source operand.			#
3752#									#
3753# OUTPUT **************************************************************	#
3754#	- The system stack contains the FP DZ exception stack.		#
3755#	- The fsave frame contains the adjusted source operand.		#
3756#									#
3757# ALGORITHM ***********************************************************	#
3758#	In a system where the DZ exception is enabled, the goal is to	#
3759# get to the handler specified at _real_dz(). But, on the 060, when the	#
3760# exception is taken, the input operand in the fsave state frame may	#
3761# be incorrect for some cases and need to be adjusted. So, this package	#
3762# adjusts the operand using fix_skewed_ops() and then branches to	#
3763# _real_dz().								#
3764#									#
3765#########################################################################
3766
3767	global		_fpsp_dz
3768_fpsp_dz:
3769
3770	link.w		%a6,&-LOCAL_SIZE	# init stack frame
3771
3772	fsave		FP_SRC(%a6)		# grab the "busy" frame
3773
3774	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
3775	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
3776	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
3777
3778# the FPIAR holds the "current PC" of the faulting instruction
3779	mov.l		USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
3780
3781	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
3782	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
3783	bsr.l		_imem_read_long		# fetch the instruction words
3784	mov.l		%d0,EXC_OPWORD(%a6)
3785
3786##############################################################################
3787
3788
3789# here, we simply see if the operand in the fsave frame needs to be "unskewed".
3790# this would be the case for opclass two operations with a source zero
3791# in the sgl or dbl format.
3792	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
3793	bsr.l		fix_skewed_ops		# fix src op
3794
3795fdz_exit:
3796	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
3797	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3798	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
3799
3800	frestore	FP_SRC(%a6)
3801
3802	unlk		%a6
3803	bra.l		_real_dz
3804
3805#########################################################################
3806# XDEF ****************************************************************	#
3807#	_fpsp_fline(): 060FPSP entry point for "Line F emulator"	#
3808#		       exception when the "reduced" version of the	#
3809#		       FPSP is implemented that does not emulate	#
3810#		       FP unimplemented instructions.			#
3811#									#
3812#	This handler should be the first code executed upon taking a	#
3813#	"Line F Emulator" exception in an operating system integrating	#
3814#	the reduced version of 060FPSP.					#
3815#									#
3816# XREF ****************************************************************	#
3817#	_real_fpu_disabled() - Handle "FPU disabled" exceptions		#
3818#	_real_fline() - Handle all other cases (treated equally)	#
3819#									#
3820# INPUT ***************************************************************	#
3821#	- The system stack contains a "Line F Emulator" exception	#
3822#	  stack frame.							#
3823#									#
3824# OUTPUT **************************************************************	#
3825#	- The system stack is unchanged.				#
3826#									#
3827# ALGORITHM ***********************************************************	#
3828#	When a "Line F Emulator" exception occurs in a system where	#
3829# "FPU Unimplemented" instructions will not be emulated, the exception	#
3830# can occur because then FPU is disabled or the instruction is to be	#
3831# classifed as "Line F". This module determines which case exists and	#
3832# calls the appropriate "callout".					#
3833#									#
3834#########################################################################
3835
3836	global		_fpsp_fline
3837_fpsp_fline:
3838
3839# check to see if the FPU is disabled. if so, jump to the OS entry
3840# point for that condition.
3841	cmpi.w		0x6(%sp),&0x402c
3842	beq.l		_real_fpu_disabled
3843
3844	bra.l		_real_fline
3845
3846#########################################################################
3847# XDEF ****************************************************************	#
3848#	_dcalc_ea(): calc correct <ea> from <ea> stacked on exception	#
3849#									#
3850# XREF ****************************************************************	#
3851#	inc_areg() - increment an address register			#
3852#	dec_areg() - decrement an address register			#
3853#									#
3854# INPUT ***************************************************************	#
3855#	d0 = number of bytes to adjust <ea> by				#
3856#									#
3857# OUTPUT **************************************************************	#
3858#	None								#
3859#									#
3860# ALGORITHM ***********************************************************	#
3861# "Dummy" CALCulate Effective Address:					#
3862#	The stacked <ea> for FP unimplemented instructions and opclass	#
3863#	two packed instructions is correct with the exception of...	#
3864#									#
3865#	1) -(An)   : The register is not updated regardless of size.	#
3866#		     Also, for extended precision and packed, the	#
3867#		     stacked <ea> value is 8 bytes too big		#
3868#	2) (An)+   : The register is not updated.			#
3869#	3) #<data> : The upper longword of the immediate operand is	#
3870#		     stacked b,w,l and s sizes are completely stacked.	#
3871#		     d,x, and p are not.				#
3872#									#
3873#########################################################################
3874
3875	global		_dcalc_ea
3876_dcalc_ea:
3877	mov.l		%d0, %a0		# move # bytes to %a0
3878
3879	mov.b		1+EXC_OPWORD(%a6), %d0	# fetch opcode word
3880	mov.l		%d0, %d1		# make a copy
3881
3882	andi.w		&0x38, %d0		# extract mode field
3883	andi.l		&0x7, %d1		# extract reg  field
3884
3885	cmpi.b		%d0,&0x18		# is mode (An)+ ?
3886	beq.b		dcea_pi			# yes
3887
3888	cmpi.b		%d0,&0x20		# is mode -(An) ?
3889	beq.b		dcea_pd			# yes
3890
3891	or.w		%d1,%d0			# concat mode,reg
3892	cmpi.b		%d0,&0x3c		# is mode #<data>?
3893
3894	beq.b		dcea_imm		# yes
3895
3896	mov.l		EXC_EA(%a6),%a0		# return <ea>
3897	rts
3898
3899# need to set immediate data flag here since we'll need to do
3900# an imem_read to fetch this later.
3901dcea_imm:
3902	mov.b		&immed_flg,SPCOND_FLG(%a6)
3903	lea		([USER_FPIAR,%a6],0x4),%a0 # no; return <ea>
3904	rts
3905
3906# here, the <ea> is stacked correctly. however, we must update the
3907# address register...
3908dcea_pi:
3909	mov.l		%a0,%d0			# pass amt to inc by
3910	bsr.l		inc_areg		# inc addr register
3911
3912	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
3913	rts
3914
3915# the <ea> is stacked correctly for all but extended and packed which
3916# the <ea>s are 8 bytes too large.
3917# it would make no sense to have a pre-decrement to a7 in supervisor
3918# mode so we don't even worry about this tricky case here : )
3919dcea_pd:
3920	mov.l		%a0,%d0			# pass amt to dec by
3921	bsr.l		dec_areg		# dec addr register
3922
3923	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
3924
3925	cmpi.b		%d0,&0xc		# is opsize ext or packed?
3926	beq.b		dcea_pd2		# yes
3927	rts
3928dcea_pd2:
3929	sub.l		&0x8,%a0		# correct <ea>
3930	mov.l		%a0,EXC_EA(%a6)		# put correct <ea> on stack
3931	rts
3932
3933#########################################################################
3934# XDEF ****************************************************************	#
3935#	_calc_ea_fout(): calculate correct stacked <ea> for extended	#
3936#			 and packed data opclass 3 operations.		#
3937#									#
3938# XREF ****************************************************************	#
3939#	None								#
3940#									#
3941# INPUT ***************************************************************	#
3942#	None								#
3943#									#
3944# OUTPUT **************************************************************	#
3945#	a0 = return correct effective address				#
3946#									#
3947# ALGORITHM ***********************************************************	#
3948#	For opclass 3 extended and packed data operations, the <ea>	#
3949# stacked for the exception is incorrect for -(an) and (an)+ addressing	#
3950# modes. Also, while we're at it, the index register itself must get	#
3951# updated.								#
3952#	So, for -(an), we must subtract 8 off of the stacked <ea> value	#
3953# and return that value as the correct <ea> and store that value in An.	#
3954# For (an)+, the stacked <ea> is correct but we must adjust An by +12.	#
3955#									#
3956#########################################################################
3957
3958# This calc_ea is currently used to retrieve the correct <ea>
3959# for fmove outs of type extended and packed.
3960	global		_calc_ea_fout
3961_calc_ea_fout:
3962	mov.b		1+EXC_OPWORD(%a6),%d0	# fetch opcode word
3963	mov.l		%d0,%d1			# make a copy
3964
3965	andi.w		&0x38,%d0		# extract mode field
3966	andi.l		&0x7,%d1		# extract reg  field
3967
3968	cmpi.b		%d0,&0x18		# is mode (An)+ ?
3969	beq.b		ceaf_pi			# yes
3970
3971	cmpi.b		%d0,&0x20		# is mode -(An) ?
3972	beq.w		ceaf_pd			# yes
3973
3974	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
3975	rts
3976
3977# (An)+ : extended and packed fmove out
3978#	: stacked <ea> is correct
3979#	: "An" not updated
3980ceaf_pi:
3981	mov.w		(tbl_ceaf_pi.b,%pc,%d1.w*2),%d1
3982	mov.l		EXC_EA(%a6),%a0
3983	jmp		(tbl_ceaf_pi.b,%pc,%d1.w*1)
3984
3985	swbeg		&0x8
3986tbl_ceaf_pi:
3987	short		ceaf_pi0 - tbl_ceaf_pi
3988	short		ceaf_pi1 - tbl_ceaf_pi
3989	short		ceaf_pi2 - tbl_ceaf_pi
3990	short		ceaf_pi3 - tbl_ceaf_pi
3991	short		ceaf_pi4 - tbl_ceaf_pi
3992	short		ceaf_pi5 - tbl_ceaf_pi
3993	short		ceaf_pi6 - tbl_ceaf_pi
3994	short		ceaf_pi7 - tbl_ceaf_pi
3995
3996ceaf_pi0:
3997	addi.l		&0xc,EXC_DREGS+0x8(%a6)
3998	rts
3999ceaf_pi1:
4000	addi.l		&0xc,EXC_DREGS+0xc(%a6)
4001	rts
4002ceaf_pi2:
4003	add.l		&0xc,%a2
4004	rts
4005ceaf_pi3:
4006	add.l		&0xc,%a3
4007	rts
4008ceaf_pi4:
4009	add.l		&0xc,%a4
4010	rts
4011ceaf_pi5:
4012	add.l		&0xc,%a5
4013	rts
4014ceaf_pi6:
4015	addi.l		&0xc,EXC_A6(%a6)
4016	rts
4017ceaf_pi7:
4018	mov.b		&mia7_flg,SPCOND_FLG(%a6)
4019	addi.l		&0xc,EXC_A7(%a6)
4020	rts
4021
4022# -(An) : extended and packed fmove out
4023#	: stacked <ea> = actual <ea> + 8
4024#	: "An" not updated
4025ceaf_pd:
4026	mov.w		(tbl_ceaf_pd.b,%pc,%d1.w*2),%d1
4027	mov.l		EXC_EA(%a6),%a0
4028	sub.l		&0x8,%a0
4029	sub.l		&0x8,EXC_EA(%a6)
4030	jmp		(tbl_ceaf_pd.b,%pc,%d1.w*1)
4031
4032	swbeg		&0x8
4033tbl_ceaf_pd:
4034	short		ceaf_pd0 - tbl_ceaf_pd
4035	short		ceaf_pd1 - tbl_ceaf_pd
4036	short		ceaf_pd2 - tbl_ceaf_pd
4037	short		ceaf_pd3 - tbl_ceaf_pd
4038	short		ceaf_pd4 - tbl_ceaf_pd
4039	short		ceaf_pd5 - tbl_ceaf_pd
4040	short		ceaf_pd6 - tbl_ceaf_pd
4041	short		ceaf_pd7 - tbl_ceaf_pd
4042
4043ceaf_pd0:
4044	mov.l		%a0,EXC_DREGS+0x8(%a6)
4045	rts
4046ceaf_pd1:
4047	mov.l		%a0,EXC_DREGS+0xc(%a6)
4048	rts
4049ceaf_pd2:
4050	mov.l		%a0,%a2
4051	rts
4052ceaf_pd3:
4053	mov.l		%a0,%a3
4054	rts
4055ceaf_pd4:
4056	mov.l		%a0,%a4
4057	rts
4058ceaf_pd5:
4059	mov.l		%a0,%a5
4060	rts
4061ceaf_pd6:
4062	mov.l		%a0,EXC_A6(%a6)
4063	rts
4064ceaf_pd7:
4065	mov.l		%a0,EXC_A7(%a6)
4066	mov.b		&mda7_flg,SPCOND_FLG(%a6)
4067	rts
4068
4069#
4070# This table holds the offsets of the emulation routines for each individual
4071# math operation relative to the address of this table. Included are
4072# routines like fadd/fmul/fabs. The transcendentals ARE NOT. This is because
4073# this table is for the version if the 060FPSP without transcendentals.
4074# The location within the table is determined by the extension bits of the
4075# operation longword.
4076#
4077
4078	swbeg		&109
4079tbl_unsupp:
4080	long		fin		- tbl_unsupp	# 00: fmove
4081	long		fint		- tbl_unsupp	# 01: fint
4082	long		tbl_unsupp	- tbl_unsupp	# 02: fsinh
4083	long		fintrz		- tbl_unsupp	# 03: fintrz
4084	long		fsqrt		- tbl_unsupp	# 04: fsqrt
4085	long		tbl_unsupp	- tbl_unsupp
4086	long		tbl_unsupp	- tbl_unsupp	# 06: flognp1
4087	long		tbl_unsupp	- tbl_unsupp
4088	long		tbl_unsupp	- tbl_unsupp	# 08: fetoxm1
4089	long		tbl_unsupp	- tbl_unsupp	# 09: ftanh
4090	long		tbl_unsupp	- tbl_unsupp	# 0a: fatan
4091	long		tbl_unsupp	- tbl_unsupp
4092	long		tbl_unsupp	- tbl_unsupp	# 0c: fasin
4093	long		tbl_unsupp	- tbl_unsupp	# 0d: fatanh
4094	long		tbl_unsupp	- tbl_unsupp	# 0e: fsin
4095	long		tbl_unsupp	- tbl_unsupp	# 0f: ftan
4096	long		tbl_unsupp	- tbl_unsupp	# 10: fetox
4097	long		tbl_unsupp	- tbl_unsupp	# 11: ftwotox
4098	long		tbl_unsupp	- tbl_unsupp	# 12: ftentox
4099	long		tbl_unsupp	- tbl_unsupp
4100	long		tbl_unsupp	- tbl_unsupp	# 14: flogn
4101	long		tbl_unsupp	- tbl_unsupp	# 15: flog10
4102	long		tbl_unsupp	- tbl_unsupp	# 16: flog2
4103	long		tbl_unsupp	- tbl_unsupp
4104	long		fabs		- tbl_unsupp	# 18: fabs
4105	long		tbl_unsupp	- tbl_unsupp	# 19: fcosh
4106	long		fneg		- tbl_unsupp	# 1a: fneg
4107	long		tbl_unsupp	- tbl_unsupp
4108	long		tbl_unsupp	- tbl_unsupp	# 1c: facos
4109	long		tbl_unsupp	- tbl_unsupp	# 1d: fcos
4110	long		tbl_unsupp	- tbl_unsupp	# 1e: fgetexp
4111	long		tbl_unsupp	- tbl_unsupp	# 1f: fgetman
4112	long		fdiv		- tbl_unsupp	# 20: fdiv
4113	long		tbl_unsupp	- tbl_unsupp	# 21: fmod
4114	long		fadd		- tbl_unsupp	# 22: fadd
4115	long		fmul		- tbl_unsupp	# 23: fmul
4116	long		fsgldiv		- tbl_unsupp	# 24: fsgldiv
4117	long		tbl_unsupp	- tbl_unsupp	# 25: frem
4118	long		tbl_unsupp	- tbl_unsupp	# 26: fscale
4119	long		fsglmul		- tbl_unsupp	# 27: fsglmul
4120	long		fsub		- tbl_unsupp	# 28: fsub
4121	long		tbl_unsupp	- tbl_unsupp
4122	long		tbl_unsupp	- tbl_unsupp
4123	long		tbl_unsupp	- tbl_unsupp
4124	long		tbl_unsupp	- tbl_unsupp
4125	long		tbl_unsupp	- tbl_unsupp
4126	long		tbl_unsupp	- tbl_unsupp
4127	long		tbl_unsupp	- tbl_unsupp
4128	long		tbl_unsupp	- tbl_unsupp	# 30: fsincos
4129	long		tbl_unsupp	- tbl_unsupp	# 31: fsincos
4130	long		tbl_unsupp	- tbl_unsupp	# 32: fsincos
4131	long		tbl_unsupp	- tbl_unsupp	# 33: fsincos
4132	long		tbl_unsupp	- tbl_unsupp	# 34: fsincos
4133	long		tbl_unsupp	- tbl_unsupp	# 35: fsincos
4134	long		tbl_unsupp	- tbl_unsupp	# 36: fsincos
4135	long		tbl_unsupp	- tbl_unsupp	# 37: fsincos
4136	long		fcmp		- tbl_unsupp	# 38: fcmp
4137	long		tbl_unsupp	- tbl_unsupp
4138	long		ftst		- tbl_unsupp	# 3a: ftst
4139	long		tbl_unsupp	- tbl_unsupp
4140	long		tbl_unsupp	- tbl_unsupp
4141	long		tbl_unsupp	- tbl_unsupp
4142	long		tbl_unsupp	- tbl_unsupp
4143	long		tbl_unsupp	- tbl_unsupp
4144	long		fsin		- tbl_unsupp	# 40: fsmove
4145	long		fssqrt		- tbl_unsupp	# 41: fssqrt
4146	long		tbl_unsupp	- tbl_unsupp
4147	long		tbl_unsupp	- tbl_unsupp
4148	long		fdin		- tbl_unsupp	# 44: fdmove
4149	long		fdsqrt		- tbl_unsupp	# 45: fdsqrt
4150	long		tbl_unsupp	- tbl_unsupp
4151	long		tbl_unsupp	- tbl_unsupp
4152	long		tbl_unsupp	- tbl_unsupp
4153	long		tbl_unsupp	- tbl_unsupp
4154	long		tbl_unsupp	- tbl_unsupp
4155	long		tbl_unsupp	- tbl_unsupp
4156	long		tbl_unsupp	- tbl_unsupp
4157	long		tbl_unsupp	- tbl_unsupp
4158	long		tbl_unsupp	- tbl_unsupp
4159	long		tbl_unsupp	- tbl_unsupp
4160	long		tbl_unsupp	- tbl_unsupp
4161	long		tbl_unsupp	- tbl_unsupp
4162	long		tbl_unsupp	- tbl_unsupp
4163	long		tbl_unsupp	- tbl_unsupp
4164	long		tbl_unsupp	- tbl_unsupp
4165	long		tbl_unsupp	- tbl_unsupp
4166	long		tbl_unsupp	- tbl_unsupp
4167	long		tbl_unsupp	- tbl_unsupp
4168	long		fsabs		- tbl_unsupp	# 58: fsabs
4169	long		tbl_unsupp	- tbl_unsupp
4170	long		fsneg		- tbl_unsupp	# 5a: fsneg
4171	long		tbl_unsupp	- tbl_unsupp
4172	long		fdabs		- tbl_unsupp	# 5c: fdabs
4173	long		tbl_unsupp	- tbl_unsupp
4174	long		fdneg		- tbl_unsupp	# 5e: fdneg
4175	long		tbl_unsupp	- tbl_unsupp
4176	long		fsdiv		- tbl_unsupp	# 60: fsdiv
4177	long		tbl_unsupp	- tbl_unsupp
4178	long		fsadd		- tbl_unsupp	# 62: fsadd
4179	long		fsmul		- tbl_unsupp	# 63: fsmul
4180	long		fddiv		- tbl_unsupp	# 64: fddiv
4181	long		tbl_unsupp	- tbl_unsupp
4182	long		fdadd		- tbl_unsupp	# 66: fdadd
4183	long		fdmul		- tbl_unsupp	# 67: fdmul
4184	long		fssub		- tbl_unsupp	# 68: fssub
4185	long		tbl_unsupp	- tbl_unsupp
4186	long		tbl_unsupp	- tbl_unsupp
4187	long		tbl_unsupp	- tbl_unsupp
4188	long		fdsub		- tbl_unsupp	# 6c: fdsub
4189
4190#################################################
4191# Add this here so non-fp modules can compile.
4192# (smovcr is called from fpsp_inex.)
4193	global		smovcr
4194smovcr:
4195	bra.b		smovcr
4196
4197#########################################################################
4198# XDEF ****************************************************************	#
4199#	fmovm_dynamic(): emulate "fmovm" dynamic instruction		#
4200#									#
4201# XREF ****************************************************************	#
4202#	fetch_dreg() - fetch data register				#
4203#	{i,d,}mem_read() - fetch data from memory			#
4204#	_mem_write() - write data to memory				#
4205#	iea_iacc() - instruction memory access error occurred		#
4206#	iea_dacc() - data memory access error occurred			#
4207#	restore() - restore An index regs if access error occurred	#
4208#									#
4209# INPUT ***************************************************************	#
4210#	None								#
4211#									#
4212# OUTPUT **************************************************************	#
4213#	If instr is "fmovm Dn,-(A7)" from supervisor mode,		#
4214#		d0 = size of dump					#
4215#		d1 = Dn							#
4216#	Else if instruction access error,				#
4217#		d0 = FSLW						#
4218#	Else if data access error,					#
4219#		d0 = FSLW						#
4220#		a0 = address of fault					#
4221#	Else								#
4222#		none.							#
4223#									#
4224# ALGORITHM ***********************************************************	#
4225#	The effective address must be calculated since this is entered	#
4226# from an "Unimplemented Effective Address" exception handler. So, we	#
4227# have our own fcalc_ea() routine here. If an access error is flagged	#
4228# by a _{i,d,}mem_read() call, we must exit through the special		#
4229# handler.								#
4230#	The data register is determined and its value loaded to get the	#
4231# string of FP registers affected. This value is used as an index into	#
4232# a lookup table such that we can determine the number of bytes		#
4233# involved.								#
4234#	If the instruction is "fmovm.x <ea>,Dn", a _mem_read() is used	#
4235# to read in all FP values. Again, _mem_read() may fail and require a	#
4236# special exit.								#
4237#	If the instruction is "fmovm.x DN,<ea>", a _mem_write() is used	#
4238# to write all FP values. _mem_write() may also fail.			#
4239#	If the instruction is "fmovm.x DN,-(a7)" from supervisor mode,	#
4240# then we return the size of the dump and the string to the caller	#
4241# so that the move can occur outside of this routine. This special	#
4242# case is required so that moves to the system stack are handled	#
4243# correctly.								#
4244#									#
4245# DYNAMIC:								#
4246#	fmovm.x	dn, <ea>						#
4247#	fmovm.x	<ea>, dn						#
4248#									#
4249#	      <WORD 1>		      <WORD2>				#
4250#	1111 0010 00 |<ea>|	11@& 1000 0$$$ 0000			#
4251#									#
4252#	& = (0): predecrement addressing mode				#
4253#	    (1): postincrement or control addressing mode		#
4254#	@ = (0): move listed regs from memory to the FPU		#
4255#	    (1): move listed regs from the FPU to memory		#
4256#	$$$    : index of data register holding reg select mask		#
4257#									#
4258# NOTES:								#
4259#	If the data register holds a zero, then the			#
4260#	instruction is a nop.						#
4261#									#
4262#########################################################################
4263
4264	global		fmovm_dynamic
4265fmovm_dynamic:
4266
4267# extract the data register in which the bit string resides...
4268	mov.b		1+EXC_EXTWORD(%a6),%d1	# fetch extword
4269	andi.w		&0x70,%d1		# extract reg bits
4270	lsr.b		&0x4,%d1		# shift into lo bits
4271
4272# fetch the bit string into d0...
4273	bsr.l		fetch_dreg		# fetch reg string
4274
4275	andi.l		&0x000000ff,%d0		# keep only lo byte
4276
4277	mov.l		%d0,-(%sp)		# save strg
4278	mov.b		(tbl_fmovm_size.w,%pc,%d0),%d0
4279	mov.l		%d0,-(%sp)		# save size
4280	bsr.l		fmovm_calc_ea		# calculate <ea>
4281	mov.l		(%sp)+,%d0		# restore size
4282	mov.l		(%sp)+,%d1		# restore strg
4283
4284# if the bit string is a zero, then the operation is a no-op
4285# but, make sure that we've calculated ea and advanced the opword pointer
4286	beq.w		fmovm_data_done
4287
4288# separate move ins from move outs...
4289	btst		&0x5,EXC_EXTWORD(%a6)	# is it a move in or out?
4290	beq.w		fmovm_data_in		# it's a move out
4291
4292#############
4293# MOVE OUT: #
4294#############
4295fmovm_data_out:
4296	btst		&0x4,EXC_EXTWORD(%a6)	# control or predecrement?
4297	bne.w		fmovm_out_ctrl		# control
4298
4299############################
4300fmovm_out_predec:
4301# for predecrement mode, the bit string is the opposite of both control
4302# operations and postincrement mode. (bit7 = FP7 ... bit0 = FP0)
4303# here, we convert it to be just like the others...
4304	mov.b		(tbl_fmovm_convert.w,%pc,%d1.w*1),%d1
4305
4306	btst		&0x5,EXC_SR(%a6)	# user or supervisor mode?
4307	beq.b		fmovm_out_ctrl		# user
4308
4309fmovm_out_predec_s:
4310	cmpi.b		SPCOND_FLG(%a6),&mda7_flg # is <ea> mode -(a7)?
4311	bne.b		fmovm_out_ctrl
4312
4313# the operation was unfortunately an: fmovm.x dn,-(sp)
4314# called from supervisor mode.
4315# we're also passing "size" and "strg" back to the calling routine
4316	rts
4317
4318############################
4319fmovm_out_ctrl:
4320	mov.l		%a0,%a1			# move <ea> to a1
4321
4322	sub.l		%d0,%sp			# subtract size of dump
4323	lea		(%sp),%a0
4324
4325	tst.b		%d1			# should FP0 be moved?
4326	bpl.b		fmovm_out_ctrl_fp1	# no
4327
4328	mov.l		0x0+EXC_FP0(%a6),(%a0)+	# yes
4329	mov.l		0x4+EXC_FP0(%a6),(%a0)+
4330	mov.l		0x8+EXC_FP0(%a6),(%a0)+
4331
4332fmovm_out_ctrl_fp1:
4333	lsl.b		&0x1,%d1		# should FP1 be moved?
4334	bpl.b		fmovm_out_ctrl_fp2	# no
4335
4336	mov.l		0x0+EXC_FP1(%a6),(%a0)+	# yes
4337	mov.l		0x4+EXC_FP1(%a6),(%a0)+
4338	mov.l		0x8+EXC_FP1(%a6),(%a0)+
4339
4340fmovm_out_ctrl_fp2:
4341	lsl.b		&0x1,%d1		# should FP2 be moved?
4342	bpl.b		fmovm_out_ctrl_fp3	# no
4343
4344	fmovm.x		&0x20,(%a0)		# yes
4345	add.l		&0xc,%a0
4346
4347fmovm_out_ctrl_fp3:
4348	lsl.b		&0x1,%d1		# should FP3 be moved?
4349	bpl.b		fmovm_out_ctrl_fp4	# no
4350
4351	fmovm.x		&0x10,(%a0)		# yes
4352	add.l		&0xc,%a0
4353
4354fmovm_out_ctrl_fp4:
4355	lsl.b		&0x1,%d1		# should FP4 be moved?
4356	bpl.b		fmovm_out_ctrl_fp5	# no
4357
4358	fmovm.x		&0x08,(%a0)		# yes
4359	add.l		&0xc,%a0
4360
4361fmovm_out_ctrl_fp5:
4362	lsl.b		&0x1,%d1		# should FP5 be moved?
4363	bpl.b		fmovm_out_ctrl_fp6	# no
4364
4365	fmovm.x		&0x04,(%a0)		# yes
4366	add.l		&0xc,%a0
4367
4368fmovm_out_ctrl_fp6:
4369	lsl.b		&0x1,%d1		# should FP6 be moved?
4370	bpl.b		fmovm_out_ctrl_fp7	# no
4371
4372	fmovm.x		&0x02,(%a0)		# yes
4373	add.l		&0xc,%a0
4374
4375fmovm_out_ctrl_fp7:
4376	lsl.b		&0x1,%d1		# should FP7 be moved?
4377	bpl.b		fmovm_out_ctrl_done	# no
4378
4379	fmovm.x		&0x01,(%a0)		# yes
4380	add.l		&0xc,%a0
4381
4382fmovm_out_ctrl_done:
4383	mov.l		%a1,L_SCR1(%a6)
4384
4385	lea		(%sp),%a0		# pass: supervisor src
4386	mov.l		%d0,-(%sp)		# save size
4387	bsr.l		_dmem_write		# copy data to user mem
4388
4389	mov.l		(%sp)+,%d0
4390	add.l		%d0,%sp			# clear fpreg data from stack
4391
4392	tst.l		%d1			# did dstore err?
4393	bne.w		fmovm_out_err		# yes
4394
4395	rts
4396
4397############
4398# MOVE IN: #
4399############
4400fmovm_data_in:
4401	mov.l		%a0,L_SCR1(%a6)
4402
4403	sub.l		%d0,%sp			# make room for fpregs
4404	lea		(%sp),%a1
4405
4406	mov.l		%d1,-(%sp)		# save bit string for later
4407	mov.l		%d0,-(%sp)		# save # of bytes
4408
4409	bsr.l		_dmem_read		# copy data from user mem
4410
4411	mov.l		(%sp)+,%d0		# retrieve # of bytes
4412
4413	tst.l		%d1			# did dfetch fail?
4414	bne.w		fmovm_in_err		# yes
4415
4416	mov.l		(%sp)+,%d1		# load bit string
4417
4418	lea		(%sp),%a0		# addr of stack
4419
4420	tst.b		%d1			# should FP0 be moved?
4421	bpl.b		fmovm_data_in_fp1	# no
4422
4423	mov.l		(%a0)+,0x0+EXC_FP0(%a6)	# yes
4424	mov.l		(%a0)+,0x4+EXC_FP0(%a6)
4425	mov.l		(%a0)+,0x8+EXC_FP0(%a6)
4426
4427fmovm_data_in_fp1:
4428	lsl.b		&0x1,%d1		# should FP1 be moved?
4429	bpl.b		fmovm_data_in_fp2	# no
4430
4431	mov.l		(%a0)+,0x0+EXC_FP1(%a6)	# yes
4432	mov.l		(%a0)+,0x4+EXC_FP1(%a6)
4433	mov.l		(%a0)+,0x8+EXC_FP1(%a6)
4434
4435fmovm_data_in_fp2:
4436	lsl.b		&0x1,%d1		# should FP2 be moved?
4437	bpl.b		fmovm_data_in_fp3	# no
4438
4439	fmovm.x		(%a0)+,&0x20		# yes
4440
4441fmovm_data_in_fp3:
4442	lsl.b		&0x1,%d1		# should FP3 be moved?
4443	bpl.b		fmovm_data_in_fp4	# no
4444
4445	fmovm.x		(%a0)+,&0x10		# yes
4446
4447fmovm_data_in_fp4:
4448	lsl.b		&0x1,%d1		# should FP4 be moved?
4449	bpl.b		fmovm_data_in_fp5	# no
4450
4451	fmovm.x		(%a0)+,&0x08		# yes
4452
4453fmovm_data_in_fp5:
4454	lsl.b		&0x1,%d1		# should FP5 be moved?
4455	bpl.b		fmovm_data_in_fp6	# no
4456
4457	fmovm.x		(%a0)+,&0x04		# yes
4458
4459fmovm_data_in_fp6:
4460	lsl.b		&0x1,%d1		# should FP6 be moved?
4461	bpl.b		fmovm_data_in_fp7	# no
4462
4463	fmovm.x		(%a0)+,&0x02		# yes
4464
4465fmovm_data_in_fp7:
4466	lsl.b		&0x1,%d1		# should FP7 be moved?
4467	bpl.b		fmovm_data_in_done	# no
4468
4469	fmovm.x		(%a0)+,&0x01		# yes
4470
4471fmovm_data_in_done:
4472	add.l		%d0,%sp			# remove fpregs from stack
4473	rts
4474
4475#####################################
4476
4477fmovm_data_done:
4478	rts
4479
4480##############################################################################
4481
4482#
4483# table indexed by the operation's bit string that gives the number
4484# of bytes that will be moved.
4485#
4486# number of bytes = (# of 1's in bit string) * 12(bytes/fpreg)
4487#
4488tbl_fmovm_size:
4489	byte	0x00,0x0c,0x0c,0x18,0x0c,0x18,0x18,0x24
4490	byte	0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
4491	byte	0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
4492	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4493	byte	0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
4494	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4495	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4496	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4497	byte	0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
4498	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4499	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4500	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4501	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4502	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4503	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4504	byte	0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
4505	byte	0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
4506	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4507	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4508	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4509	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4510	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4511	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4512	byte	0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
4513	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4514	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4515	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4516	byte	0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
4517	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4518	byte	0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
4519	byte	0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
4520	byte	0x3c,0x48,0x48,0x54,0x48,0x54,0x54,0x60
4521
4522#
4523# table to convert a pre-decrement bit string into a post-increment
4524# or control bit string.
4525# ex:	0x00	==>	0x00
4526#	0x01	==>	0x80
4527#	0x02	==>	0x40
4528#		.
4529#		.
4530#	0xfd	==>	0xbf
4531#	0xfe	==>	0x7f
4532#	0xff	==>	0xff
4533#
4534tbl_fmovm_convert:
4535	byte	0x00,0x80,0x40,0xc0,0x20,0xa0,0x60,0xe0
4536	byte	0x10,0x90,0x50,0xd0,0x30,0xb0,0x70,0xf0
4537	byte	0x08,0x88,0x48,0xc8,0x28,0xa8,0x68,0xe8
4538	byte	0x18,0x98,0x58,0xd8,0x38,0xb8,0x78,0xf8
4539	byte	0x04,0x84,0x44,0xc4,0x24,0xa4,0x64,0xe4
4540	byte	0x14,0x94,0x54,0xd4,0x34,0xb4,0x74,0xf4
4541	byte	0x0c,0x8c,0x4c,0xcc,0x2c,0xac,0x6c,0xec
4542	byte	0x1c,0x9c,0x5c,0xdc,0x3c,0xbc,0x7c,0xfc
4543	byte	0x02,0x82,0x42,0xc2,0x22,0xa2,0x62,0xe2
4544	byte	0x12,0x92,0x52,0xd2,0x32,0xb2,0x72,0xf2
4545	byte	0x0a,0x8a,0x4a,0xca,0x2a,0xaa,0x6a,0xea
4546	byte	0x1a,0x9a,0x5a,0xda,0x3a,0xba,0x7a,0xfa
4547	byte	0x06,0x86,0x46,0xc6,0x26,0xa6,0x66,0xe6
4548	byte	0x16,0x96,0x56,0xd6,0x36,0xb6,0x76,0xf6
4549	byte	0x0e,0x8e,0x4e,0xce,0x2e,0xae,0x6e,0xee
4550	byte	0x1e,0x9e,0x5e,0xde,0x3e,0xbe,0x7e,0xfe
4551	byte	0x01,0x81,0x41,0xc1,0x21,0xa1,0x61,0xe1
4552	byte	0x11,0x91,0x51,0xd1,0x31,0xb1,0x71,0xf1
4553	byte	0x09,0x89,0x49,0xc9,0x29,0xa9,0x69,0xe9
4554	byte	0x19,0x99,0x59,0xd9,0x39,0xb9,0x79,0xf9
4555	byte	0x05,0x85,0x45,0xc5,0x25,0xa5,0x65,0xe5
4556	byte	0x15,0x95,0x55,0xd5,0x35,0xb5,0x75,0xf5
4557	byte	0x0d,0x8d,0x4d,0xcd,0x2d,0xad,0x6d,0xed
4558	byte	0x1d,0x9d,0x5d,0xdd,0x3d,0xbd,0x7d,0xfd
4559	byte	0x03,0x83,0x43,0xc3,0x23,0xa3,0x63,0xe3
4560	byte	0x13,0x93,0x53,0xd3,0x33,0xb3,0x73,0xf3
4561	byte	0x0b,0x8b,0x4b,0xcb,0x2b,0xab,0x6b,0xeb
4562	byte	0x1b,0x9b,0x5b,0xdb,0x3b,0xbb,0x7b,0xfb
4563	byte	0x07,0x87,0x47,0xc7,0x27,0xa7,0x67,0xe7
4564	byte	0x17,0x97,0x57,0xd7,0x37,0xb7,0x77,0xf7
4565	byte	0x0f,0x8f,0x4f,0xcf,0x2f,0xaf,0x6f,0xef
4566	byte	0x1f,0x9f,0x5f,0xdf,0x3f,0xbf,0x7f,0xff
4567
4568	global		fmovm_calc_ea
4569###############################################
4570# _fmovm_calc_ea: calculate effective address #
4571###############################################
4572fmovm_calc_ea:
4573	mov.l		%d0,%a0			# move # bytes to a0
4574
4575# currently, MODE and REG are taken from the EXC_OPWORD. this could be
4576# easily changed if they were inputs passed in registers.
4577	mov.w		EXC_OPWORD(%a6),%d0	# fetch opcode word
4578	mov.w		%d0,%d1			# make a copy
4579
4580	andi.w		&0x3f,%d0		# extract mode field
4581	andi.l		&0x7,%d1		# extract reg  field
4582
4583# jump to the corresponding function for each {MODE,REG} pair.
4584	mov.w		(tbl_fea_mode.b,%pc,%d0.w*2),%d0 # fetch jmp distance
4585	jmp		(tbl_fea_mode.b,%pc,%d0.w*1) # jmp to correct ea mode
4586
4587	swbeg		&64
4588tbl_fea_mode:
4589	short		tbl_fea_mode	-	tbl_fea_mode
4590	short		tbl_fea_mode	-	tbl_fea_mode
4591	short		tbl_fea_mode	-	tbl_fea_mode
4592	short		tbl_fea_mode	-	tbl_fea_mode
4593	short		tbl_fea_mode	-	tbl_fea_mode
4594	short		tbl_fea_mode	-	tbl_fea_mode
4595	short		tbl_fea_mode	-	tbl_fea_mode
4596	short		tbl_fea_mode	-	tbl_fea_mode
4597
4598	short		tbl_fea_mode	-	tbl_fea_mode
4599	short		tbl_fea_mode	-	tbl_fea_mode
4600	short		tbl_fea_mode	-	tbl_fea_mode
4601	short		tbl_fea_mode	-	tbl_fea_mode
4602	short		tbl_fea_mode	-	tbl_fea_mode
4603	short		tbl_fea_mode	-	tbl_fea_mode
4604	short		tbl_fea_mode	-	tbl_fea_mode
4605	short		tbl_fea_mode	-	tbl_fea_mode
4606
4607	short		faddr_ind_a0	-	tbl_fea_mode
4608	short		faddr_ind_a1	-	tbl_fea_mode
4609	short		faddr_ind_a2	-	tbl_fea_mode
4610	short		faddr_ind_a3	-	tbl_fea_mode
4611	short		faddr_ind_a4	-	tbl_fea_mode
4612	short		faddr_ind_a5	-	tbl_fea_mode
4613	short		faddr_ind_a6	-	tbl_fea_mode
4614	short		faddr_ind_a7	-	tbl_fea_mode
4615
4616	short		faddr_ind_p_a0	-	tbl_fea_mode
4617	short		faddr_ind_p_a1	-	tbl_fea_mode
4618	short		faddr_ind_p_a2	-	tbl_fea_mode
4619	short		faddr_ind_p_a3	-	tbl_fea_mode
4620	short		faddr_ind_p_a4	-	tbl_fea_mode
4621	short		faddr_ind_p_a5	-	tbl_fea_mode
4622	short		faddr_ind_p_a6	-	tbl_fea_mode
4623	short		faddr_ind_p_a7	-	tbl_fea_mode
4624
4625	short		faddr_ind_m_a0	-	tbl_fea_mode
4626	short		faddr_ind_m_a1	-	tbl_fea_mode
4627	short		faddr_ind_m_a2	-	tbl_fea_mode
4628	short		faddr_ind_m_a3	-	tbl_fea_mode
4629	short		faddr_ind_m_a4	-	tbl_fea_mode
4630	short		faddr_ind_m_a5	-	tbl_fea_mode
4631	short		faddr_ind_m_a6	-	tbl_fea_mode
4632	short		faddr_ind_m_a7	-	tbl_fea_mode
4633
4634	short		faddr_ind_disp_a0	-	tbl_fea_mode
4635	short		faddr_ind_disp_a1	-	tbl_fea_mode
4636	short		faddr_ind_disp_a2	-	tbl_fea_mode
4637	short		faddr_ind_disp_a3	-	tbl_fea_mode
4638	short		faddr_ind_disp_a4	-	tbl_fea_mode
4639	short		faddr_ind_disp_a5	-	tbl_fea_mode
4640	short		faddr_ind_disp_a6	-	tbl_fea_mode
4641	short		faddr_ind_disp_a7	-	tbl_fea_mode
4642
4643	short		faddr_ind_ext	-	tbl_fea_mode
4644	short		faddr_ind_ext	-	tbl_fea_mode
4645	short		faddr_ind_ext	-	tbl_fea_mode
4646	short		faddr_ind_ext	-	tbl_fea_mode
4647	short		faddr_ind_ext	-	tbl_fea_mode
4648	short		faddr_ind_ext	-	tbl_fea_mode
4649	short		faddr_ind_ext	-	tbl_fea_mode
4650	short		faddr_ind_ext	-	tbl_fea_mode
4651
4652	short		fabs_short	-	tbl_fea_mode
4653	short		fabs_long	-	tbl_fea_mode
4654	short		fpc_ind		-	tbl_fea_mode
4655	short		fpc_ind_ext	-	tbl_fea_mode
4656	short		tbl_fea_mode	-	tbl_fea_mode
4657	short		tbl_fea_mode	-	tbl_fea_mode
4658	short		tbl_fea_mode	-	tbl_fea_mode
4659	short		tbl_fea_mode	-	tbl_fea_mode
4660
4661###################################
4662# Address register indirect: (An) #
4663###################################
4664faddr_ind_a0:
4665	mov.l		EXC_DREGS+0x8(%a6),%a0	# Get current a0
4666	rts
4667
4668faddr_ind_a1:
4669	mov.l		EXC_DREGS+0xc(%a6),%a0	# Get current a1
4670	rts
4671
4672faddr_ind_a2:
4673	mov.l		%a2,%a0			# Get current a2
4674	rts
4675
4676faddr_ind_a3:
4677	mov.l		%a3,%a0			# Get current a3
4678	rts
4679
4680faddr_ind_a4:
4681	mov.l		%a4,%a0			# Get current a4
4682	rts
4683
4684faddr_ind_a5:
4685	mov.l		%a5,%a0			# Get current a5
4686	rts
4687
4688faddr_ind_a6:
4689	mov.l		(%a6),%a0		# Get current a6
4690	rts
4691
4692faddr_ind_a7:
4693	mov.l		EXC_A7(%a6),%a0		# Get current a7
4694	rts
4695
4696#####################################################
4697# Address register indirect w/ postincrement: (An)+ #
4698#####################################################
4699faddr_ind_p_a0:
4700	mov.l		EXC_DREGS+0x8(%a6),%d0	# Get current a0
4701	mov.l		%d0,%d1
4702	add.l		%a0,%d1			# Increment
4703	mov.l		%d1,EXC_DREGS+0x8(%a6)	# Save incr value
4704	mov.l		%d0,%a0
4705	rts
4706
4707faddr_ind_p_a1:
4708	mov.l		EXC_DREGS+0xc(%a6),%d0	# Get current a1
4709	mov.l		%d0,%d1
4710	add.l		%a0,%d1			# Increment
4711	mov.l		%d1,EXC_DREGS+0xc(%a6)	# Save incr value
4712	mov.l		%d0,%a0
4713	rts
4714
4715faddr_ind_p_a2:
4716	mov.l		%a2,%d0			# Get current a2
4717	mov.l		%d0,%d1
4718	add.l		%a0,%d1			# Increment
4719	mov.l		%d1,%a2			# Save incr value
4720	mov.l		%d0,%a0
4721	rts
4722
4723faddr_ind_p_a3:
4724	mov.l		%a3,%d0			# Get current a3
4725	mov.l		%d0,%d1
4726	add.l		%a0,%d1			# Increment
4727	mov.l		%d1,%a3			# Save incr value
4728	mov.l		%d0,%a0
4729	rts
4730
4731faddr_ind_p_a4:
4732	mov.l		%a4,%d0			# Get current a4
4733	mov.l		%d0,%d1
4734	add.l		%a0,%d1			# Increment
4735	mov.l		%d1,%a4			# Save incr value
4736	mov.l		%d0,%a0
4737	rts
4738
4739faddr_ind_p_a5:
4740	mov.l		%a5,%d0			# Get current a5
4741	mov.l		%d0,%d1
4742	add.l		%a0,%d1			# Increment
4743	mov.l		%d1,%a5			# Save incr value
4744	mov.l		%d0,%a0
4745	rts
4746
4747faddr_ind_p_a6:
4748	mov.l		(%a6),%d0		# Get current a6
4749	mov.l		%d0,%d1
4750	add.l		%a0,%d1			# Increment
4751	mov.l		%d1,(%a6)		# Save incr value
4752	mov.l		%d0,%a0
4753	rts
4754
4755faddr_ind_p_a7:
4756	mov.b		&mia7_flg,SPCOND_FLG(%a6) # set "special case" flag
4757
4758	mov.l		EXC_A7(%a6),%d0		# Get current a7
4759	mov.l		%d0,%d1
4760	add.l		%a0,%d1			# Increment
4761	mov.l		%d1,EXC_A7(%a6)		# Save incr value
4762	mov.l		%d0,%a0
4763	rts
4764
4765####################################################
4766# Address register indirect w/ predecrement: -(An) #
4767####################################################
4768faddr_ind_m_a0:
4769	mov.l		EXC_DREGS+0x8(%a6),%d0	# Get current a0
4770	sub.l		%a0,%d0			# Decrement
4771	mov.l		%d0,EXC_DREGS+0x8(%a6)	# Save decr value
4772	mov.l		%d0,%a0
4773	rts
4774
4775faddr_ind_m_a1:
4776	mov.l		EXC_DREGS+0xc(%a6),%d0	# Get current a1
4777	sub.l		%a0,%d0			# Decrement
4778	mov.l		%d0,EXC_DREGS+0xc(%a6)	# Save decr value
4779	mov.l		%d0,%a0
4780	rts
4781
4782faddr_ind_m_a2:
4783	mov.l		%a2,%d0			# Get current a2
4784	sub.l		%a0,%d0			# Decrement
4785	mov.l		%d0,%a2			# Save decr value
4786	mov.l		%d0,%a0
4787	rts
4788
4789faddr_ind_m_a3:
4790	mov.l		%a3,%d0			# Get current a3
4791	sub.l		%a0,%d0			# Decrement
4792	mov.l		%d0,%a3			# Save decr value
4793	mov.l		%d0,%a0
4794	rts
4795
4796faddr_ind_m_a4:
4797	mov.l		%a4,%d0			# Get current a4
4798	sub.l		%a0,%d0			# Decrement
4799	mov.l		%d0,%a4			# Save decr value
4800	mov.l		%d0,%a0
4801	rts
4802
4803faddr_ind_m_a5:
4804	mov.l		%a5,%d0			# Get current a5
4805	sub.l		%a0,%d0			# Decrement
4806	mov.l		%d0,%a5			# Save decr value
4807	mov.l		%d0,%a0
4808	rts
4809
4810faddr_ind_m_a6:
4811	mov.l		(%a6),%d0		# Get current a6
4812	sub.l		%a0,%d0			# Decrement
4813	mov.l		%d0,(%a6)		# Save decr value
4814	mov.l		%d0,%a0
4815	rts
4816
4817faddr_ind_m_a7:
4818	mov.b		&mda7_flg,SPCOND_FLG(%a6) # set "special case" flag
4819
4820	mov.l		EXC_A7(%a6),%d0		# Get current a7
4821	sub.l		%a0,%d0			# Decrement
4822	mov.l		%d0,EXC_A7(%a6)		# Save decr value
4823	mov.l		%d0,%a0
4824	rts
4825
4826########################################################
4827# Address register indirect w/ displacement: (d16, An) #
4828########################################################
4829faddr_ind_disp_a0:
4830	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
4831	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
4832	bsr.l		_imem_read_word
4833
4834	tst.l		%d1			# did ifetch fail?
4835	bne.l		iea_iacc		# yes
4836
4837	mov.w		%d0,%a0			# sign extend displacement
4838
4839	add.l		EXC_DREGS+0x8(%a6),%a0	# a0 + d16
4840	rts
4841
4842faddr_ind_disp_a1:
4843	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
4844	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
4845	bsr.l		_imem_read_word
4846
4847	tst.l		%d1			# did ifetch fail?
4848	bne.l		iea_iacc		# yes
4849
4850	mov.w		%d0,%a0			# sign extend displacement
4851
4852	add.l		EXC_DREGS+0xc(%a6),%a0	# a1 + d16
4853	rts
4854
4855faddr_ind_disp_a2:
4856	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
4857	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
4858	bsr.l		_imem_read_word
4859
4860	tst.l		%d1			# did ifetch fail?
4861	bne.l		iea_iacc		# yes
4862
4863	mov.w		%d0,%a0			# sign extend displacement
4864
4865	add.l		%a2,%a0			# a2 + d16
4866	rts
4867
4868faddr_ind_disp_a3:
4869	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
4870	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
4871	bsr.l		_imem_read_word
4872
4873	tst.l		%d1			# did ifetch fail?
4874	bne.l		iea_iacc		# yes
4875
4876	mov.w		%d0,%a0			# sign extend displacement
4877
4878	add.l		%a3,%a0			# a3 + d16
4879	rts
4880
4881faddr_ind_disp_a4:
4882	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
4883	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
4884	bsr.l		_imem_read_word
4885
4886	tst.l		%d1			# did ifetch fail?
4887	bne.l		iea_iacc		# yes
4888
4889	mov.w		%d0,%a0			# sign extend displacement
4890
4891	add.l		%a4,%a0			# a4 + d16
4892	rts
4893
4894faddr_ind_disp_a5:
4895	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
4896	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
4897	bsr.l		_imem_read_word
4898
4899	tst.l		%d1			# did ifetch fail?
4900	bne.l		iea_iacc		# yes
4901
4902	mov.w		%d0,%a0			# sign extend displacement
4903
4904	add.l		%a5,%a0			# a5 + d16
4905	rts
4906
4907faddr_ind_disp_a6:
4908	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
4909	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
4910	bsr.l		_imem_read_word
4911
4912	tst.l		%d1			# did ifetch fail?
4913	bne.l		iea_iacc		# yes
4914
4915	mov.w		%d0,%a0			# sign extend displacement
4916
4917	add.l		(%a6),%a0		# a6 + d16
4918	rts
4919
4920faddr_ind_disp_a7:
4921	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
4922	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
4923	bsr.l		_imem_read_word
4924
4925	tst.l		%d1			# did ifetch fail?
4926	bne.l		iea_iacc		# yes
4927
4928	mov.w		%d0,%a0			# sign extend displacement
4929
4930	add.l		EXC_A7(%a6),%a0		# a7 + d16
4931	rts
4932
4933########################################################################
4934# Address register indirect w/ index(8-bit displacement): (d8, An, Xn) #
4935#    "       "         "    w/   "  (base displacement): (bd, An, Xn)  #
4936# Memory indirect postindexed: ([bd, An], Xn, od)		       #
4937# Memory indirect preindexed: ([bd, An, Xn], od)		       #
4938########################################################################
4939faddr_ind_ext:
4940	addq.l		&0x8,%d1
4941	bsr.l		fetch_dreg		# fetch base areg
4942	mov.l		%d0,-(%sp)
4943
4944	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
4945	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
4946	bsr.l		_imem_read_word		# fetch extword in d0
4947
4948	tst.l		%d1			# did ifetch fail?
4949	bne.l		iea_iacc		# yes
4950
4951	mov.l		(%sp)+,%a0
4952
4953	btst		&0x8,%d0
4954	bne.w		fcalc_mem_ind
4955
4956	mov.l		%d0,L_SCR1(%a6)		# hold opword
4957
4958	mov.l		%d0,%d1
4959	rol.w		&0x4,%d1
4960	andi.w		&0xf,%d1		# extract index regno
4961
4962# count on fetch_dreg() not to alter a0...
4963	bsr.l		fetch_dreg		# fetch index
4964
4965	mov.l		%d2,-(%sp)		# save d2
4966	mov.l		L_SCR1(%a6),%d2		# fetch opword
4967
4968	btst		&0xb,%d2		# is it word or long?
4969	bne.b		faii8_long
4970	ext.l		%d0			# sign extend word index
4971faii8_long:
4972	mov.l		%d2,%d1
4973	rol.w		&0x7,%d1
4974	andi.l		&0x3,%d1		# extract scale value
4975
4976	lsl.l		%d1,%d0			# shift index by scale
4977
4978	extb.l		%d2			# sign extend displacement
4979	add.l		%d2,%d0			# index + disp
4980	add.l		%d0,%a0			# An + (index + disp)
4981
4982	mov.l		(%sp)+,%d2		# restore old d2
4983	rts
4984
4985###########################
4986# Absolute short: (XXX).W #
4987###########################
4988fabs_short:
4989	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
4990	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
4991	bsr.l		_imem_read_word		# fetch short address
4992
4993	tst.l		%d1			# did ifetch fail?
4994	bne.l		iea_iacc		# yes
4995
4996	mov.w		%d0,%a0			# return <ea> in a0
4997	rts
4998
4999##########################
5000# Absolute long: (XXX).L #
5001##########################
5002fabs_long:
5003	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
5004	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
5005	bsr.l		_imem_read_long		# fetch long address
5006
5007	tst.l		%d1			# did ifetch fail?
5008	bne.l		iea_iacc		# yes
5009
5010	mov.l		%d0,%a0			# return <ea> in a0
5011	rts
5012
5013#######################################################
5014# Program counter indirect w/ displacement: (d16, PC) #
5015#######################################################
5016fpc_ind:
5017	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
5018	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
5019	bsr.l		_imem_read_word		# fetch word displacement
5020
5021	tst.l		%d1			# did ifetch fail?
5022	bne.l		iea_iacc		# yes
5023
5024	mov.w		%d0,%a0			# sign extend displacement
5025
5026	add.l		EXC_EXTWPTR(%a6),%a0	# pc + d16
5027
5028# _imem_read_word() increased the extwptr by 2. need to adjust here.
5029	subq.l		&0x2,%a0		# adjust <ea>
5030	rts
5031
5032##########################################################
5033# PC indirect w/ index(8-bit displacement): (d8, PC, An) #
5034# "     "     w/   "  (base displacement): (bd, PC, An)  #
5035# PC memory indirect postindexed: ([bd, PC], Xn, od)     #
5036# PC memory indirect preindexed: ([bd, PC, Xn], od)      #
5037##########################################################
5038fpc_ind_ext:
5039	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
5040	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
5041	bsr.l		_imem_read_word		# fetch ext word
5042
5043	tst.l		%d1			# did ifetch fail?
5044	bne.l		iea_iacc		# yes
5045
5046	mov.l		EXC_EXTWPTR(%a6),%a0	# put base in a0
5047	subq.l		&0x2,%a0		# adjust base
5048
5049	btst		&0x8,%d0		# is disp only 8 bits?
5050	bne.w		fcalc_mem_ind		# calc memory indirect
5051
5052	mov.l		%d0,L_SCR1(%a6)		# store opword
5053
5054	mov.l		%d0,%d1			# make extword copy
5055	rol.w		&0x4,%d1		# rotate reg num into place
5056	andi.w		&0xf,%d1		# extract register number
5057
5058# count on fetch_dreg() not to alter a0...
5059	bsr.l		fetch_dreg		# fetch index
5060
5061	mov.l		%d2,-(%sp)		# save d2
5062	mov.l		L_SCR1(%a6),%d2		# fetch opword
5063
5064	btst		&0xb,%d2		# is index word or long?
5065	bne.b		fpii8_long		# long
5066	ext.l		%d0			# sign extend word index
5067fpii8_long:
5068	mov.l		%d2,%d1
5069	rol.w		&0x7,%d1		# rotate scale value into place
5070	andi.l		&0x3,%d1		# extract scale value
5071
5072	lsl.l		%d1,%d0			# shift index by scale
5073
5074	extb.l		%d2			# sign extend displacement
5075	add.l		%d2,%d0			# disp + index
5076	add.l		%d0,%a0			# An + (index + disp)
5077
5078	mov.l		(%sp)+,%d2		# restore temp register
5079	rts
5080
5081# d2 = index
5082# d3 = base
5083# d4 = od
5084# d5 = extword
5085fcalc_mem_ind:
5086	btst		&0x6,%d0		# is the index suppressed?
5087	beq.b		fcalc_index
5088
5089	movm.l		&0x3c00,-(%sp)		# save d2-d5
5090
5091	mov.l		%d0,%d5			# put extword in d5
5092	mov.l		%a0,%d3			# put base in d3
5093
5094	clr.l		%d2			# yes, so index = 0
5095	bra.b		fbase_supp_ck
5096
5097# index:
5098fcalc_index:
5099	mov.l		%d0,L_SCR1(%a6)		# save d0 (opword)
5100	bfextu		%d0{&16:&4},%d1		# fetch dreg index
5101	bsr.l		fetch_dreg
5102
5103	movm.l		&0x3c00,-(%sp)		# save d2-d5
5104	mov.l		%d0,%d2			# put index in d2
5105	mov.l		L_SCR1(%a6),%d5
5106	mov.l		%a0,%d3
5107
5108	btst		&0xb,%d5		# is index word or long?
5109	bne.b		fno_ext
5110	ext.l		%d2
5111
5112fno_ext:
5113	bfextu		%d5{&21:&2},%d0
5114	lsl.l		%d0,%d2
5115
5116# base address (passed as parameter in d3):
5117# we clear the value here if it should actually be suppressed.
5118fbase_supp_ck:
5119	btst		&0x7,%d5		# is the bd suppressed?
5120	beq.b		fno_base_sup
5121	clr.l		%d3
5122
5123# base displacement:
5124fno_base_sup:
5125	bfextu		%d5{&26:&2},%d0		# get bd size
5126#	beq.l		fmovm_error		# if (size == 0) it's reserved
5127
5128	cmpi.b		%d0,&0x2
5129	blt.b		fno_bd
5130	beq.b		fget_word_bd
5131
5132	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
5133	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
5134	bsr.l		_imem_read_long
5135
5136	tst.l		%d1			# did ifetch fail?
5137	bne.l		fcea_iacc		# yes
5138
5139	bra.b		fchk_ind
5140
5141fget_word_bd:
5142	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
5143	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
5144	bsr.l		_imem_read_word
5145
5146	tst.l		%d1			# did ifetch fail?
5147	bne.l		fcea_iacc		# yes
5148
5149	ext.l		%d0			# sign extend bd
5150
5151fchk_ind:
5152	add.l		%d0,%d3			# base += bd
5153
5154# outer displacement:
5155fno_bd:
5156	bfextu		%d5{&30:&2},%d0		# is od suppressed?
5157	beq.w		faii_bd
5158
5159	cmpi.b		%d0,&0x2
5160	blt.b		fnull_od
5161	beq.b		fword_od
5162
5163	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
5164	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
5165	bsr.l		_imem_read_long
5166
5167	tst.l		%d1			# did ifetch fail?
5168	bne.l		fcea_iacc		# yes
5169
5170	bra.b		fadd_them
5171
5172fword_od:
5173	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
5174	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
5175	bsr.l		_imem_read_word
5176
5177	tst.l		%d1			# did ifetch fail?
5178	bne.l		fcea_iacc		# yes
5179
5180	ext.l		%d0			# sign extend od
5181	bra.b		fadd_them
5182
5183fnull_od:
5184	clr.l		%d0
5185
5186fadd_them:
5187	mov.l		%d0,%d4
5188
5189	btst		&0x2,%d5		# pre or post indexing?
5190	beq.b		fpre_indexed
5191
5192	mov.l		%d3,%a0
5193	bsr.l		_dmem_read_long
5194
5195	tst.l		%d1			# did dfetch fail?
5196	bne.w		fcea_err		# yes
5197
5198	add.l		%d2,%d0			# <ea> += index
5199	add.l		%d4,%d0			# <ea> += od
5200	bra.b		fdone_ea
5201
5202fpre_indexed:
5203	add.l		%d2,%d3			# preindexing
5204	mov.l		%d3,%a0
5205	bsr.l		_dmem_read_long
5206
5207	tst.l		%d1			# did dfetch fail?
5208	bne.w		fcea_err		# yes
5209
5210	add.l		%d4,%d0			# ea += od
5211	bra.b		fdone_ea
5212
5213faii_bd:
5214	add.l		%d2,%d3			# ea = (base + bd) + index
5215	mov.l		%d3,%d0
5216fdone_ea:
5217	mov.l		%d0,%a0
5218
5219	movm.l		(%sp)+,&0x003c		# restore d2-d5
5220	rts
5221
5222#########################################################
5223fcea_err:
5224	mov.l		%d3,%a0
5225
5226	movm.l		(%sp)+,&0x003c		# restore d2-d5
5227	mov.w		&0x0101,%d0
5228	bra.l		iea_dacc
5229
5230fcea_iacc:
5231	movm.l		(%sp)+,&0x003c		# restore d2-d5
5232	bra.l		iea_iacc
5233
5234fmovm_out_err:
5235	bsr.l		restore
5236	mov.w		&0x00e1,%d0
5237	bra.b		fmovm_err
5238
5239fmovm_in_err:
5240	bsr.l		restore
5241	mov.w		&0x0161,%d0
5242
5243fmovm_err:
5244	mov.l		L_SCR1(%a6),%a0
5245	bra.l		iea_dacc
5246
5247#########################################################################
5248# XDEF ****************************************************************	#
5249#	fmovm_ctrl(): emulate fmovm.l of control registers instr	#
5250#									#
5251# XREF ****************************************************************	#
5252#	_imem_read_long() - read longword from memory			#
5253#	iea_iacc() - _imem_read_long() failed; error recovery		#
5254#									#
5255# INPUT ***************************************************************	#
5256#	None								#
5257#									#
5258# OUTPUT **************************************************************	#
5259#	If _imem_read_long() doesn't fail:				#
5260#		USER_FPCR(a6)  = new FPCR value				#
5261#		USER_FPSR(a6)  = new FPSR value				#
5262#		USER_FPIAR(a6) = new FPIAR value			#
5263#									#
5264# ALGORITHM ***********************************************************	#
5265#	Decode the instruction type by looking at the extension word	#
5266# in order to see how many control registers to fetch from memory.	#
5267# Fetch them using _imem_read_long(). If this fetch fails, exit through	#
5268# the special access error exit handler iea_iacc().			#
5269#									#
5270# Instruction word decoding:						#
5271#									#
5272#	fmovem.l #<data>, {FPIAR&|FPCR&|FPSR}				#
5273#									#
5274#		WORD1			WORD2				#
5275#	1111 0010 00 111100	100$ $$00 0000 0000			#
5276#									#
5277#	$$$ (100): FPCR							#
5278#	    (010): FPSR							#
5279#	    (001): FPIAR						#
5280#	    (000): FPIAR						#
5281#									#
5282#########################################################################
5283
5284	global		fmovm_ctrl
5285fmovm_ctrl:
5286	mov.b		EXC_EXTWORD(%a6),%d0	# fetch reg select bits
5287	cmpi.b		%d0,&0x9c		# fpcr & fpsr & fpiar ?
5288	beq.w		fctrl_in_7		# yes
5289	cmpi.b		%d0,&0x98		# fpcr & fpsr ?
5290	beq.w		fctrl_in_6		# yes
5291	cmpi.b		%d0,&0x94		# fpcr & fpiar ?
5292	beq.b		fctrl_in_5		# yes
5293
5294# fmovem.l #<data>, fpsr/fpiar
5295fctrl_in_3:
5296	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
5297	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
5298	bsr.l		_imem_read_long		# fetch FPSR from mem
5299
5300	tst.l		%d1			# did ifetch fail?
5301	bne.l		iea_iacc		# yes
5302
5303	mov.l		%d0,USER_FPSR(%a6)	# store new FPSR to stack
5304	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
5305	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
5306	bsr.l		_imem_read_long		# fetch FPIAR from mem
5307
5308	tst.l		%d1			# did ifetch fail?
5309	bne.l		iea_iacc		# yes
5310
5311	mov.l		%d0,USER_FPIAR(%a6)	# store new FPIAR to stack
5312	rts
5313
5314# fmovem.l #<data>, fpcr/fpiar
5315fctrl_in_5:
5316	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
5317	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
5318	bsr.l		_imem_read_long		# fetch FPCR from mem
5319
5320	tst.l		%d1			# did ifetch fail?
5321	bne.l		iea_iacc		# yes
5322
5323	mov.l		%d0,USER_FPCR(%a6)	# store new FPCR to stack
5324	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
5325	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
5326	bsr.l		_imem_read_long		# fetch FPIAR from mem
5327
5328	tst.l		%d1			# did ifetch fail?
5329	bne.l		iea_iacc		# yes
5330
5331	mov.l		%d0,USER_FPIAR(%a6)	# store new FPIAR to stack
5332	rts
5333
5334# fmovem.l #<data>, fpcr/fpsr
5335fctrl_in_6:
5336	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
5337	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
5338	bsr.l		_imem_read_long		# fetch FPCR from mem
5339
5340	tst.l		%d1			# did ifetch fail?
5341	bne.l		iea_iacc		# yes
5342
5343	mov.l		%d0,USER_FPCR(%a6)	# store new FPCR to mem
5344	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
5345	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
5346	bsr.l		_imem_read_long		# fetch FPSR from mem
5347
5348	tst.l		%d1			# did ifetch fail?
5349	bne.l		iea_iacc		# yes
5350
5351	mov.l		%d0,USER_FPSR(%a6)	# store new FPSR to mem
5352	rts
5353
5354# fmovem.l #<data>, fpcr/fpsr/fpiar
5355fctrl_in_7:
5356	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
5357	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
5358	bsr.l		_imem_read_long		# fetch FPCR from mem
5359
5360	tst.l		%d1			# did ifetch fail?
5361	bne.l		iea_iacc		# yes
5362
5363	mov.l		%d0,USER_FPCR(%a6)	# store new FPCR to mem
5364	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
5365	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
5366	bsr.l		_imem_read_long		# fetch FPSR from mem
5367
5368	tst.l		%d1			# did ifetch fail?
5369	bne.l		iea_iacc		# yes
5370
5371	mov.l		%d0,USER_FPSR(%a6)	# store new FPSR to mem
5372	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
5373	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
5374	bsr.l		_imem_read_long		# fetch FPIAR from mem
5375
5376	tst.l		%d1			# did ifetch fail?
5377	bne.l		iea_iacc		# yes
5378
5379	mov.l		%d0,USER_FPIAR(%a6)	# store new FPIAR to mem
5380	rts
5381
5382##########################################################################
5383
5384#########################################################################
5385# XDEF ****************************************************************	#
5386#	addsub_scaler2(): scale inputs to fadd/fsub such that no	#
5387#			  OVFL/UNFL exceptions will result		#
5388#									#
5389# XREF ****************************************************************	#
5390#	norm() - normalize mantissa after adjusting exponent		#
5391#									#
5392# INPUT ***************************************************************	#
5393#	FP_SRC(a6) = fp op1(src)					#
5394#	FP_DST(a6) = fp op2(dst)					#
5395#									#
5396# OUTPUT **************************************************************	#
5397#	FP_SRC(a6) = fp op1 scaled(src)					#
5398#	FP_DST(a6) = fp op2 scaled(dst)					#
5399#	d0         = scale amount					#
5400#									#
5401# ALGORITHM ***********************************************************	#
5402#	If the DST exponent is > the SRC exponent, set the DST exponent	#
5403# equal to 0x3fff and scale the SRC exponent by the value that the	#
5404# DST exponent was scaled by. If the SRC exponent is greater or equal,	#
5405# do the opposite. Return this scale factor in d0.			#
5406#	If the two exponents differ by > the number of mantissa bits	#
5407# plus two, then set the smallest exponent to a very small value as a	#
5408# quick shortcut.							#
5409#									#
5410#########################################################################
5411
5412	global		addsub_scaler2
5413addsub_scaler2:
5414	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
5415	mov.l		DST_HI(%a1),FP_SCR1_HI(%a6)
5416	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
5417	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
5418	mov.w		SRC_EX(%a0),%d0
5419	mov.w		DST_EX(%a1),%d1
5420	mov.w		%d0,FP_SCR0_EX(%a6)
5421	mov.w		%d1,FP_SCR1_EX(%a6)
5422
5423	andi.w		&0x7fff,%d0
5424	andi.w		&0x7fff,%d1
5425	mov.w		%d0,L_SCR1(%a6)		# store src exponent
5426	mov.w		%d1,2+L_SCR1(%a6)	# store dst exponent
5427
5428	cmp.w		%d0, %d1		# is src exp >= dst exp?
5429	bge.l		src_exp_ge2
5430
5431# dst exp is >  src exp; scale dst to exp = 0x3fff
5432dst_exp_gt2:
5433	bsr.l		scale_to_zero_dst
5434	mov.l		%d0,-(%sp)		# save scale factor
5435
5436	cmpi.b		STAG(%a6),&DENORM	# is dst denormalized?
5437	bne.b		cmpexp12
5438
5439	lea		FP_SCR0(%a6),%a0
5440	bsr.l		norm			# normalize the denorm; result is new exp
5441	neg.w		%d0			# new exp = -(shft val)
5442	mov.w		%d0,L_SCR1(%a6)		# inset new exp
5443
5444cmpexp12:
5445	mov.w		2+L_SCR1(%a6),%d0
5446	subi.w		&mantissalen+2,%d0	# subtract mantissalen+2 from larger exp
5447
5448	cmp.w		%d0,L_SCR1(%a6)		# is difference >= len(mantissa)+2?
5449	bge.b		quick_scale12
5450
5451	mov.w		L_SCR1(%a6),%d0
5452	add.w		0x2(%sp),%d0		# scale src exponent by scale factor
5453	mov.w		FP_SCR0_EX(%a6),%d1
5454	and.w		&0x8000,%d1
5455	or.w		%d1,%d0			# concat {sgn,new exp}
5456	mov.w		%d0,FP_SCR0_EX(%a6)	# insert new dst exponent
5457
5458	mov.l		(%sp)+,%d0		# return SCALE factor
5459	rts
5460
5461quick_scale12:
5462	andi.w		&0x8000,FP_SCR0_EX(%a6)	# zero src exponent
5463	bset		&0x0,1+FP_SCR0_EX(%a6)	# set exp = 1
5464
5465	mov.l		(%sp)+,%d0		# return SCALE factor
5466	rts
5467
5468# src exp is >= dst exp; scale src to exp = 0x3fff
5469src_exp_ge2:
5470	bsr.l		scale_to_zero_src
5471	mov.l		%d0,-(%sp)		# save scale factor
5472
5473	cmpi.b		DTAG(%a6),&DENORM	# is dst denormalized?
5474	bne.b		cmpexp22
5475	lea		FP_SCR1(%a6),%a0
5476	bsr.l		norm			# normalize the denorm; result is new exp
5477	neg.w		%d0			# new exp = -(shft val)
5478	mov.w		%d0,2+L_SCR1(%a6)	# inset new exp
5479
5480cmpexp22:
5481	mov.w		L_SCR1(%a6),%d0
5482	subi.w		&mantissalen+2,%d0	# subtract mantissalen+2 from larger exp
5483
5484	cmp.w		%d0,2+L_SCR1(%a6)	# is difference >= len(mantissa)+2?
5485	bge.b		quick_scale22
5486
5487	mov.w		2+L_SCR1(%a6),%d0
5488	add.w		0x2(%sp),%d0		# scale dst exponent by scale factor
5489	mov.w		FP_SCR1_EX(%a6),%d1
5490	andi.w		&0x8000,%d1
5491	or.w		%d1,%d0			# concat {sgn,new exp}
5492	mov.w		%d0,FP_SCR1_EX(%a6)	# insert new dst exponent
5493
5494	mov.l		(%sp)+,%d0		# return SCALE factor
5495	rts
5496
5497quick_scale22:
5498	andi.w		&0x8000,FP_SCR1_EX(%a6)	# zero dst exponent
5499	bset		&0x0,1+FP_SCR1_EX(%a6)	# set exp = 1
5500
5501	mov.l		(%sp)+,%d0		# return SCALE factor
5502	rts
5503
5504##########################################################################
5505
5506#########################################################################
5507# XDEF ****************************************************************	#
5508#	scale_to_zero_src(): scale the exponent of extended precision	#
5509#			     value at FP_SCR0(a6).			#
5510#									#
5511# XREF ****************************************************************	#
5512#	norm() - normalize the mantissa if the operand was a DENORM	#
5513#									#
5514# INPUT ***************************************************************	#
5515#	FP_SCR0(a6) = extended precision operand to be scaled		#
5516#									#
5517# OUTPUT **************************************************************	#
5518#	FP_SCR0(a6) = scaled extended precision operand			#
5519#	d0	    = scale value					#
5520#									#
5521# ALGORITHM ***********************************************************	#
5522#	Set the exponent of the input operand to 0x3fff. Save the value	#
5523# of the difference between the original and new exponent. Then,	#
5524# normalize the operand if it was a DENORM. Add this normalization	#
5525# value to the previous value. Return the result.			#
5526#									#
5527#########################################################################
5528
5529	global		scale_to_zero_src
5530scale_to_zero_src:
5531	mov.w		FP_SCR0_EX(%a6),%d1	# extract operand's {sgn,exp}
5532	mov.w		%d1,%d0			# make a copy
5533
5534	andi.l		&0x7fff,%d1		# extract operand's exponent
5535
5536	andi.w		&0x8000,%d0		# extract operand's sgn
5537	or.w		&0x3fff,%d0		# insert new operand's exponent(=0)
5538
5539	mov.w		%d0,FP_SCR0_EX(%a6)	# insert biased exponent
5540
5541	cmpi.b		STAG(%a6),&DENORM	# is operand normalized?
5542	beq.b		stzs_denorm		# normalize the DENORM
5543
5544stzs_norm:
5545	mov.l		&0x3fff,%d0
5546	sub.l		%d1,%d0			# scale = BIAS + (-exp)
5547
5548	rts
5549
5550stzs_denorm:
5551	lea		FP_SCR0(%a6),%a0	# pass ptr to src op
5552	bsr.l		norm			# normalize denorm
5553	neg.l		%d0			# new exponent = -(shft val)
5554	mov.l		%d0,%d1			# prepare for op_norm call
5555	bra.b		stzs_norm		# finish scaling
5556
5557###
5558
5559#########################################################################
5560# XDEF ****************************************************************	#
5561#	scale_sqrt(): scale the input operand exponent so a subsequent	#
5562#		      fsqrt operation won't take an exception.		#
5563#									#
5564# XREF ****************************************************************	#
5565#	norm() - normalize the mantissa if the operand was a DENORM	#
5566#									#
5567# INPUT ***************************************************************	#
5568#	FP_SCR0(a6) = extended precision operand to be scaled		#
5569#									#
5570# OUTPUT **************************************************************	#
5571#	FP_SCR0(a6) = scaled extended precision operand			#
5572#	d0	    = scale value					#
5573#									#
5574# ALGORITHM ***********************************************************	#
5575#	If the input operand is a DENORM, normalize it.			#
5576#	If the exponent of the input operand is even, set the exponent	#
5577# to 0x3ffe and return a scale factor of "(exp-0x3ffe)/2". If the	#
5578# exponent of the input operand is off, set the exponent to ox3fff and	#
5579# return a scale factor of "(exp-0x3fff)/2".				#
5580#									#
5581#########################################################################
5582
5583	global		scale_sqrt
5584scale_sqrt:
5585	cmpi.b		STAG(%a6),&DENORM	# is operand normalized?
5586	beq.b		ss_denorm		# normalize the DENORM
5587
5588	mov.w		FP_SCR0_EX(%a6),%d1	# extract operand's {sgn,exp}
5589	andi.l		&0x7fff,%d1		# extract operand's exponent
5590
5591	andi.w		&0x8000,FP_SCR0_EX(%a6)	# extract operand's sgn
5592
5593	btst		&0x0,%d1		# is exp even or odd?
5594	beq.b		ss_norm_even
5595
5596	ori.w		&0x3fff,FP_SCR0_EX(%a6)	# insert new operand's exponent(=0)
5597
5598	mov.l		&0x3fff,%d0
5599	sub.l		%d1,%d0			# scale = BIAS + (-exp)
5600	asr.l		&0x1,%d0		# divide scale factor by 2
5601	rts
5602
5603ss_norm_even:
5604	ori.w		&0x3ffe,FP_SCR0_EX(%a6)	# insert new operand's exponent(=0)
5605
5606	mov.l		&0x3ffe,%d0
5607	sub.l		%d1,%d0			# scale = BIAS + (-exp)
5608	asr.l		&0x1,%d0		# divide scale factor by 2
5609	rts
5610
5611ss_denorm:
5612	lea		FP_SCR0(%a6),%a0	# pass ptr to src op
5613	bsr.l		norm			# normalize denorm
5614
5615	btst		&0x0,%d0		# is exp even or odd?
5616	beq.b		ss_denorm_even
5617
5618	ori.w		&0x3fff,FP_SCR0_EX(%a6)	# insert new operand's exponent(=0)
5619
5620	add.l		&0x3fff,%d0
5621	asr.l		&0x1,%d0		# divide scale factor by 2
5622	rts
5623
5624ss_denorm_even:
5625	ori.w		&0x3ffe,FP_SCR0_EX(%a6)	# insert new operand's exponent(=0)
5626
5627	add.l		&0x3ffe,%d0
5628	asr.l		&0x1,%d0		# divide scale factor by 2
5629	rts
5630
5631###
5632
5633#########################################################################
5634# XDEF ****************************************************************	#
5635#	scale_to_zero_dst(): scale the exponent of extended precision	#
5636#			     value at FP_SCR1(a6).			#
5637#									#
5638# XREF ****************************************************************	#
5639#	norm() - normalize the mantissa if the operand was a DENORM	#
5640#									#
5641# INPUT ***************************************************************	#
5642#	FP_SCR1(a6) = extended precision operand to be scaled		#
5643#									#
5644# OUTPUT **************************************************************	#
5645#	FP_SCR1(a6) = scaled extended precision operand			#
5646#	d0	    = scale value					#
5647#									#
5648# ALGORITHM ***********************************************************	#
5649#	Set the exponent of the input operand to 0x3fff. Save the value	#
5650# of the difference between the original and new exponent. Then,	#
5651# normalize the operand if it was a DENORM. Add this normalization	#
5652# value to the previous value. Return the result.			#
5653#									#
5654#########################################################################
5655
5656	global		scale_to_zero_dst
5657scale_to_zero_dst:
5658	mov.w		FP_SCR1_EX(%a6),%d1	# extract operand's {sgn,exp}
5659	mov.w		%d1,%d0			# make a copy
5660
5661	andi.l		&0x7fff,%d1		# extract operand's exponent
5662
5663	andi.w		&0x8000,%d0		# extract operand's sgn
5664	or.w		&0x3fff,%d0		# insert new operand's exponent(=0)
5665
5666	mov.w		%d0,FP_SCR1_EX(%a6)	# insert biased exponent
5667
5668	cmpi.b		DTAG(%a6),&DENORM	# is operand normalized?
5669	beq.b		stzd_denorm		# normalize the DENORM
5670
5671stzd_norm:
5672	mov.l		&0x3fff,%d0
5673	sub.l		%d1,%d0			# scale = BIAS + (-exp)
5674	rts
5675
5676stzd_denorm:
5677	lea		FP_SCR1(%a6),%a0	# pass ptr to dst op
5678	bsr.l		norm			# normalize denorm
5679	neg.l		%d0			# new exponent = -(shft val)
5680	mov.l		%d0,%d1			# prepare for op_norm call
5681	bra.b		stzd_norm		# finish scaling
5682
5683##########################################################################
5684
5685#########################################################################
5686# XDEF ****************************************************************	#
5687#	res_qnan(): return default result w/ QNAN operand for dyadic	#
5688#	res_snan(): return default result w/ SNAN operand for dyadic	#
5689#	res_qnan_1op(): return dflt result w/ QNAN operand for monadic	#
5690#	res_snan_1op(): return dflt result w/ SNAN operand for monadic	#
5691#									#
5692# XREF ****************************************************************	#
5693#	None								#
5694#									#
5695# INPUT ***************************************************************	#
5696#	FP_SRC(a6) = pointer to extended precision src operand		#
5697#	FP_DST(a6) = pointer to extended precision dst operand		#
5698#									#
5699# OUTPUT **************************************************************	#
5700#	fp0 = default result						#
5701#									#
5702# ALGORITHM ***********************************************************	#
5703#	If either operand (but not both operands) of an operation is a	#
5704# nonsignalling NAN, then that NAN is returned as the result. If both	#
5705# operands are nonsignalling NANs, then the destination operand		#
5706# nonsignalling NAN is returned as the result.				#
5707#	If either operand to an operation is a signalling NAN (SNAN),	#
5708# then, the SNAN bit is set in the FPSR EXC byte. If the SNAN trap	#
5709# enable bit is set in the FPCR, then the trap is taken and the		#
5710# destination is not modified. If the SNAN trap enable bit is not set,	#
5711# then the SNAN is converted to a nonsignalling NAN (by setting the	#
5712# SNAN bit in the operand to one), and the operation continues as	#
5713# described in the preceding paragraph, for nonsignalling NANs.		#
5714#	Make sure the appropriate FPSR bits are set before exiting.	#
5715#									#
5716#########################################################################
5717
5718	global		res_qnan
5719	global		res_snan
5720res_qnan:
5721res_snan:
5722	cmp.b		DTAG(%a6), &SNAN	# is the dst an SNAN?
5723	beq.b		dst_snan2
5724	cmp.b		DTAG(%a6), &QNAN	# is the dst a  QNAN?
5725	beq.b		dst_qnan2
5726src_nan:
5727	cmp.b		STAG(%a6), &QNAN
5728	beq.b		src_qnan2
5729	global		res_snan_1op
5730res_snan_1op:
5731src_snan2:
5732	bset		&0x6, FP_SRC_HI(%a6)	# set SNAN bit
5733	or.l		&nan_mask+aiop_mask+snan_mask, USER_FPSR(%a6)
5734	lea		FP_SRC(%a6), %a0
5735	bra.b		nan_comp
5736	global		res_qnan_1op
5737res_qnan_1op:
5738src_qnan2:
5739	or.l		&nan_mask, USER_FPSR(%a6)
5740	lea		FP_SRC(%a6), %a0
5741	bra.b		nan_comp
5742dst_snan2:
5743	or.l		&nan_mask+aiop_mask+snan_mask, USER_FPSR(%a6)
5744	bset		&0x6, FP_DST_HI(%a6)	# set SNAN bit
5745	lea		FP_DST(%a6), %a0
5746	bra.b		nan_comp
5747dst_qnan2:
5748	lea		FP_DST(%a6), %a0
5749	cmp.b		STAG(%a6), &SNAN
5750	bne		nan_done
5751	or.l		&aiop_mask+snan_mask, USER_FPSR(%a6)
5752nan_done:
5753	or.l		&nan_mask, USER_FPSR(%a6)
5754nan_comp:
5755	btst		&0x7, FTEMP_EX(%a0)	# is NAN neg?
5756	beq.b		nan_not_neg
5757	or.l		&neg_mask, USER_FPSR(%a6)
5758nan_not_neg:
5759	fmovm.x		(%a0), &0x80
5760	rts
5761
5762#########################################################################
5763# XDEF ****************************************************************	#
5764#	res_operr(): return default result during operand error		#
5765#									#
5766# XREF ****************************************************************	#
5767#	None								#
5768#									#
5769# INPUT ***************************************************************	#
5770#	None								#
5771#									#
5772# OUTPUT **************************************************************	#
5773#	fp0 = default operand error result				#
5774#									#
5775# ALGORITHM ***********************************************************	#
5776#	An nonsignalling NAN is returned as the default result when	#
5777# an operand error occurs for the following cases:			#
5778#									#
5779#	Multiply: (Infinity x Zero)					#
5780#	Divide  : (Zero / Zero) || (Infinity / Infinity)		#
5781#									#
5782#########################################################################
5783
5784	global		res_operr
5785res_operr:
5786	or.l		&nan_mask+operr_mask+aiop_mask, USER_FPSR(%a6)
5787	fmovm.x		nan_return(%pc), &0x80
5788	rts
5789
5790nan_return:
5791	long		0x7fff0000, 0xffffffff, 0xffffffff
5792
5793#########################################################################
5794# XDEF ****************************************************************	#
5795#	_denorm(): denormalize an intermediate result			#
5796#									#
5797# XREF ****************************************************************	#
5798#	None								#
5799#									#
5800# INPUT *************************************************************** #
5801#	a0 = points to the operand to be denormalized			#
5802#		(in the internal extended format)			#
5803#									#
5804#	d0 = rounding precision						#
5805#									#
5806# OUTPUT **************************************************************	#
5807#	a0 = pointer to the denormalized result				#
5808#		(in the internal extended format)			#
5809#									#
5810#	d0 = guard,round,sticky						#
5811#									#
5812# ALGORITHM ***********************************************************	#
5813#	According to the exponent underflow threshold for the given	#
5814# precision, shift the mantissa bits to the right in order raise the	#
5815# exponent of the operand to the threshold value. While shifting the	#
5816# mantissa bits right, maintain the value of the guard, round, and	#
5817# sticky bits.								#
5818# other notes:								#
5819#	(1) _denorm() is called by the underflow routines		#
5820#	(2) _denorm() does NOT affect the status register		#
5821#									#
5822#########################################################################
5823
5824#
5825# table of exponent threshold values for each precision
5826#
5827tbl_thresh:
5828	short		0x0
5829	short		sgl_thresh
5830	short		dbl_thresh
5831
5832	global		_denorm
5833_denorm:
5834#
5835# Load the exponent threshold for the precision selected and check
5836# to see if (threshold - exponent) is > 65 in which case we can
5837# simply calculate the sticky bit and zero the mantissa. otherwise
5838# we have to call the denormalization routine.
5839#
5840	lsr.b		&0x2, %d0		# shift prec to lo bits
5841	mov.w		(tbl_thresh.b,%pc,%d0.w*2), %d1 # load prec threshold
5842	mov.w		%d1, %d0		# copy d1 into d0
5843	sub.w		FTEMP_EX(%a0), %d0	# diff = threshold - exp
5844	cmpi.w		%d0, &66		# is diff > 65? (mant + g,r bits)
5845	bpl.b		denorm_set_stky		# yes; just calc sticky
5846
5847	clr.l		%d0			# clear g,r,s
5848	btst		&inex2_bit, FPSR_EXCEPT(%a6) # yes; was INEX2 set?
5849	beq.b		denorm_call		# no; don't change anything
5850	bset		&29, %d0		# yes; set sticky bit
5851
5852denorm_call:
5853	bsr.l		dnrm_lp			# denormalize the number
5854	rts
5855
5856#
5857# all bit would have been shifted off during the denorm so simply
5858# calculate if the sticky should be set and clear the entire mantissa.
5859#
5860denorm_set_stky:
5861	mov.l		&0x20000000, %d0	# set sticky bit in return value
5862	mov.w		%d1, FTEMP_EX(%a0)	# load exp with threshold
5863	clr.l		FTEMP_HI(%a0)		# set d1 = 0 (ms mantissa)
5864	clr.l		FTEMP_LO(%a0)		# set d2 = 0 (ms mantissa)
5865	rts
5866
5867#									#
5868# dnrm_lp(): normalize exponent/mantissa to specified threshold		#
5869#									#
5870# INPUT:								#
5871#	%a0	   : points to the operand to be denormalized		#
5872#	%d0{31:29} : initial guard,round,sticky				#
5873#	%d1{15:0}  : denormalization threshold				#
5874# OUTPUT:								#
5875#	%a0	   : points to the denormalized operand			#
5876#	%d0{31:29} : final guard,round,sticky				#
5877#									#
5878
5879# *** Local Equates *** #
5880set	GRS,		L_SCR2			# g,r,s temp storage
5881set	FTEMP_LO2,	L_SCR1			# FTEMP_LO copy
5882
5883	global		dnrm_lp
5884dnrm_lp:
5885
5886#
5887# make a copy of FTEMP_LO and place the g,r,s bits directly after it
5888# in memory so as to make the bitfield extraction for denormalization easier.
5889#
5890	mov.l		FTEMP_LO(%a0), FTEMP_LO2(%a6) # make FTEMP_LO copy
5891	mov.l		%d0, GRS(%a6)		# place g,r,s after it
5892
5893#
5894# check to see how much less than the underflow threshold the operand
5895# exponent is.
5896#
5897	mov.l		%d1, %d0		# copy the denorm threshold
5898	sub.w		FTEMP_EX(%a0), %d1	# d1 = threshold - uns exponent
5899	ble.b		dnrm_no_lp		# d1 <= 0
5900	cmpi.w		%d1, &0x20		# is ( 0 <= d1 < 32) ?
5901	blt.b		case_1			# yes
5902	cmpi.w		%d1, &0x40		# is (32 <= d1 < 64) ?
5903	blt.b		case_2			# yes
5904	bra.w		case_3			# (d1 >= 64)
5905
5906#
5907# No normalization necessary
5908#
5909dnrm_no_lp:
5910	mov.l		GRS(%a6), %d0		# restore original g,r,s
5911	rts
5912
5913#
5914# case (0<d1<32)
5915#
5916# %d0 = denorm threshold
5917# %d1 = "n" = amt to shift
5918#
5919#	---------------------------------------------------------
5920#	|     FTEMP_HI	  |	FTEMP_LO     |grs000.........000|
5921#	---------------------------------------------------------
5922#	<-(32 - n)-><-(n)-><-(32 - n)-><-(n)-><-(32 - n)-><-(n)->
5923#	\	   \		      \			 \
5924#	 \	    \		       \		  \
5925#	  \	     \			\		   \
5926#	   \	      \			 \		    \
5927#	    \	       \		  \		     \
5928#	     \		\		   \		      \
5929#	      \		 \		    \		       \
5930#	       \	  \		     \			\
5931#	<-(n)-><-(32 - n)-><------(32)-------><------(32)------->
5932#	---------------------------------------------------------
5933#	|0.....0| NEW_HI  |  NEW_FTEMP_LO     |grs		|
5934#	---------------------------------------------------------
5935#
5936case_1:
5937	mov.l		%d2, -(%sp)		# create temp storage
5938
5939	mov.w		%d0, FTEMP_EX(%a0)	# exponent = denorm threshold
5940	mov.l		&32, %d0
5941	sub.w		%d1, %d0		# %d0 = 32 - %d1
5942
5943	cmpi.w		%d1, &29		# is shft amt >= 29
5944	blt.b		case1_extract		# no; no fix needed
5945	mov.b		GRS(%a6), %d2
5946	or.b		%d2, 3+FTEMP_LO2(%a6)
5947
5948case1_extract:
5949	bfextu		FTEMP_HI(%a0){&0:%d0}, %d2 # %d2 = new FTEMP_HI
5950	bfextu		FTEMP_HI(%a0){%d0:&32}, %d1 # %d1 = new FTEMP_LO
5951	bfextu		FTEMP_LO2(%a6){%d0:&32}, %d0 # %d0 = new G,R,S
5952
5953	mov.l		%d2, FTEMP_HI(%a0)	# store new FTEMP_HI
5954	mov.l		%d1, FTEMP_LO(%a0)	# store new FTEMP_LO
5955
5956	bftst		%d0{&2:&30}		# were bits shifted off?
5957	beq.b		case1_sticky_clear	# no; go finish
5958	bset		&rnd_stky_bit, %d0	# yes; set sticky bit
5959
5960case1_sticky_clear:
5961	and.l		&0xe0000000, %d0	# clear all but G,R,S
5962	mov.l		(%sp)+, %d2		# restore temp register
5963	rts
5964
5965#
5966# case (32<=d1<64)
5967#
5968# %d0 = denorm threshold
5969# %d1 = "n" = amt to shift
5970#
5971#	---------------------------------------------------------
5972#	|     FTEMP_HI	  |	FTEMP_LO     |grs000.........000|
5973#	---------------------------------------------------------
5974#	<-(32 - n)-><-(n)-><-(32 - n)-><-(n)-><-(32 - n)-><-(n)->
5975#	\	   \		      \
5976#	 \	    \		       \
5977#	  \	     \			-------------------
5978#	   \	      --------------------		   \
5979#	    -------------------		  \		    \
5980#			       \	   \		     \
5981#				\	    \		      \
5982#				 \	     \		       \
5983#	<-------(32)------><-(n)-><-(32 - n)-><------(32)------->
5984#	---------------------------------------------------------
5985#	|0...............0|0....0| NEW_LO     |grs		|
5986#	---------------------------------------------------------
5987#
5988case_2:
5989	mov.l		%d2, -(%sp)		# create temp storage
5990
5991	mov.w		%d0, FTEMP_EX(%a0)	# exponent = denorm threshold
5992	subi.w		&0x20, %d1		# %d1 now between 0 and 32
5993	mov.l		&0x20, %d0
5994	sub.w		%d1, %d0		# %d0 = 32 - %d1
5995
5996# subtle step here; or in the g,r,s at the bottom of FTEMP_LO to minimize
5997# the number of bits to check for the sticky detect.
5998# it only plays a role in shift amounts of 61-63.
5999	mov.b		GRS(%a6), %d2
6000	or.b		%d2, 3+FTEMP_LO2(%a6)
6001
6002	bfextu		FTEMP_HI(%a0){&0:%d0}, %d2 # %d2 = new FTEMP_LO
6003	bfextu		FTEMP_HI(%a0){%d0:&32}, %d1 # %d1 = new G,R,S
6004
6005	bftst		%d1{&2:&30}		# were any bits shifted off?
6006	bne.b		case2_set_sticky	# yes; set sticky bit
6007	bftst		FTEMP_LO2(%a6){%d0:&31}	# were any bits shifted off?
6008	bne.b		case2_set_sticky	# yes; set sticky bit
6009
6010	mov.l		%d1, %d0		# move new G,R,S to %d0
6011	bra.b		case2_end
6012
6013case2_set_sticky:
6014	mov.l		%d1, %d0		# move new G,R,S to %d0
6015	bset		&rnd_stky_bit, %d0	# set sticky bit
6016
6017case2_end:
6018	clr.l		FTEMP_HI(%a0)		# store FTEMP_HI = 0
6019	mov.l		%d2, FTEMP_LO(%a0)	# store FTEMP_LO
6020	and.l		&0xe0000000, %d0	# clear all but G,R,S
6021
6022	mov.l		(%sp)+,%d2		# restore temp register
6023	rts
6024
6025#
6026# case (d1>=64)
6027#
6028# %d0 = denorm threshold
6029# %d1 = amt to shift
6030#
6031case_3:
6032	mov.w		%d0, FTEMP_EX(%a0)	# insert denorm threshold
6033
6034	cmpi.w		%d1, &65		# is shift amt > 65?
6035	blt.b		case3_64		# no; it's == 64
6036	beq.b		case3_65		# no; it's == 65
6037
6038#
6039# case (d1>65)
6040#
6041# Shift value is > 65 and out of range. All bits are shifted off.
6042# Return a zero mantissa with the sticky bit set
6043#
6044	clr.l		FTEMP_HI(%a0)		# clear hi(mantissa)
6045	clr.l		FTEMP_LO(%a0)		# clear lo(mantissa)
6046	mov.l		&0x20000000, %d0	# set sticky bit
6047	rts
6048
6049#
6050# case (d1 == 64)
6051#
6052#	---------------------------------------------------------
6053#	|     FTEMP_HI	  |	FTEMP_LO     |grs000.........000|
6054#	---------------------------------------------------------
6055#	<-------(32)------>
6056#	\		   \
6057#	 \		    \
6058#	  \		     \
6059#	   \		      ------------------------------
6060#	    -------------------------------		    \
6061#					   \		     \
6062#					    \		      \
6063#					     \		       \
6064#					      <-------(32)------>
6065#	---------------------------------------------------------
6066#	|0...............0|0................0|grs		|
6067#	---------------------------------------------------------
6068#
6069case3_64:
6070	mov.l		FTEMP_HI(%a0), %d0	# fetch hi(mantissa)
6071	mov.l		%d0, %d1		# make a copy
6072	and.l		&0xc0000000, %d0	# extract G,R
6073	and.l		&0x3fffffff, %d1	# extract other bits
6074
6075	bra.b		case3_complete
6076
6077#
6078# case (d1 == 65)
6079#
6080#	---------------------------------------------------------
6081#	|     FTEMP_HI	  |	FTEMP_LO     |grs000.........000|
6082#	---------------------------------------------------------
6083#	<-------(32)------>
6084#	\		   \
6085#	 \		    \
6086#	  \		     \
6087#	   \		      ------------------------------
6088#	    --------------------------------		    \
6089#					    \		     \
6090#					     \		      \
6091#					      \		       \
6092#					       <-------(31)----->
6093#	---------------------------------------------------------
6094#	|0...............0|0................0|0rs		|
6095#	---------------------------------------------------------
6096#
6097case3_65:
6098	mov.l		FTEMP_HI(%a0), %d0	# fetch hi(mantissa)
6099	and.l		&0x80000000, %d0	# extract R bit
6100	lsr.l		&0x1, %d0		# shift high bit into R bit
6101	and.l		&0x7fffffff, %d1	# extract other bits
6102
6103case3_complete:
6104# last operation done was an "and" of the bits shifted off so the condition
6105# codes are already set so branch accordingly.
6106	bne.b		case3_set_sticky	# yes; go set new sticky
6107	tst.l		FTEMP_LO(%a0)		# were any bits shifted off?
6108	bne.b		case3_set_sticky	# yes; go set new sticky
6109	tst.b		GRS(%a6)		# were any bits shifted off?
6110	bne.b		case3_set_sticky	# yes; go set new sticky
6111
6112#
6113# no bits were shifted off so don't set the sticky bit.
6114# the guard and
6115# the entire mantissa is zero.
6116#
6117	clr.l		FTEMP_HI(%a0)		# clear hi(mantissa)
6118	clr.l		FTEMP_LO(%a0)		# clear lo(mantissa)
6119	rts
6120
6121#
6122# some bits were shifted off so set the sticky bit.
6123# the entire mantissa is zero.
6124#
6125case3_set_sticky:
6126	bset		&rnd_stky_bit,%d0	# set new sticky bit
6127	clr.l		FTEMP_HI(%a0)		# clear hi(mantissa)
6128	clr.l		FTEMP_LO(%a0)		# clear lo(mantissa)
6129	rts
6130
6131#########################################################################
6132# XDEF ****************************************************************	#
6133#	_round(): round result according to precision/mode		#
6134#									#
6135# XREF ****************************************************************	#
6136#	None								#
6137#									#
6138# INPUT ***************************************************************	#
6139#	a0	  = ptr to input operand in internal extended format	#
6140#	d1(hi)    = contains rounding precision:			#
6141#			ext = $0000xxxx					#
6142#			sgl = $0004xxxx					#
6143#			dbl = $0008xxxx					#
6144#	d1(lo)	  = contains rounding mode:				#
6145#			RN  = $xxxx0000					#
6146#			RZ  = $xxxx0001					#
6147#			RM  = $xxxx0002					#
6148#			RP  = $xxxx0003					#
6149#	d0{31:29} = contains the g,r,s bits (extended)			#
6150#									#
6151# OUTPUT **************************************************************	#
6152#	a0 = pointer to rounded result					#
6153#									#
6154# ALGORITHM ***********************************************************	#
6155#	On return the value pointed to by a0 is correctly rounded,	#
6156#	a0 is preserved and the g-r-s bits in d0 are cleared.		#
6157#	The result is not typed - the tag field is invalid.  The	#
6158#	result is still in the internal extended format.		#
6159#									#
6160#	The INEX bit of USER_FPSR will be set if the rounded result was	#
6161#	inexact (i.e. if any of the g-r-s bits were set).		#
6162#									#
6163#########################################################################
6164
6165	global		_round
6166_round:
6167#
6168# ext_grs() looks at the rounding precision and sets the appropriate
6169# G,R,S bits.
6170# If (G,R,S == 0) then result is exact and round is done, else set
6171# the inex flag in status reg and continue.
6172#
6173	bsr.l		ext_grs			# extract G,R,S
6174
6175	tst.l		%d0			# are G,R,S zero?
6176	beq.w		truncate		# yes; round is complete
6177
6178	or.w		&inx2a_mask, 2+USER_FPSR(%a6) # set inex2/ainex
6179
6180#
6181# Use rounding mode as an index into a jump table for these modes.
6182# All of the following assumes grs != 0.
6183#
6184	mov.w		(tbl_mode.b,%pc,%d1.w*2), %a1 # load jump offset
6185	jmp		(tbl_mode.b,%pc,%a1)	# jmp to rnd mode handler
6186
6187tbl_mode:
6188	short		rnd_near - tbl_mode
6189	short		truncate - tbl_mode	# RZ always truncates
6190	short		rnd_mnus - tbl_mode
6191	short		rnd_plus - tbl_mode
6192
6193#################################################################
6194#	ROUND PLUS INFINITY					#
6195#								#
6196#	If sign of fp number = 0 (positive), then add 1 to l.	#
6197#################################################################
6198rnd_plus:
6199	tst.b		FTEMP_SGN(%a0)		# check for sign
6200	bmi.w		truncate		# if positive then truncate
6201
6202	mov.l		&0xffffffff, %d0	# force g,r,s to be all f's
6203	swap		%d1			# set up d1 for round prec.
6204
6205	cmpi.b		%d1, &s_mode		# is prec = sgl?
6206	beq.w		add_sgl			# yes
6207	bgt.w		add_dbl			# no; it's dbl
6208	bra.w		add_ext			# no; it's ext
6209
6210#################################################################
6211#	ROUND MINUS INFINITY					#
6212#								#
6213#	If sign of fp number = 1 (negative), then add 1 to l.	#
6214#################################################################
6215rnd_mnus:
6216	tst.b		FTEMP_SGN(%a0)		# check for sign
6217	bpl.w		truncate		# if negative then truncate
6218
6219	mov.l		&0xffffffff, %d0	# force g,r,s to be all f's
6220	swap		%d1			# set up d1 for round prec.
6221
6222	cmpi.b		%d1, &s_mode		# is prec = sgl?
6223	beq.w		add_sgl			# yes
6224	bgt.w		add_dbl			# no; it's dbl
6225	bra.w		add_ext			# no; it's ext
6226
6227#################################################################
6228#	ROUND NEAREST						#
6229#								#
6230#	If (g=1), then add 1 to l and if (r=s=0), then clear l	#
6231#	Note that this will round to even in case of a tie.	#
6232#################################################################
6233rnd_near:
6234	asl.l		&0x1, %d0		# shift g-bit to c-bit
6235	bcc.w		truncate		# if (g=1) then
6236
6237	swap		%d1			# set up d1 for round prec.
6238
6239	cmpi.b		%d1, &s_mode		# is prec = sgl?
6240	beq.w		add_sgl			# yes
6241	bgt.w		add_dbl			# no; it's dbl
6242	bra.w		add_ext			# no; it's ext
6243
6244# *** LOCAL EQUATES ***
6245set	ad_1_sgl,	0x00000100	# constant to add 1 to l-bit in sgl prec
6246set	ad_1_dbl,	0x00000800	# constant to add 1 to l-bit in dbl prec
6247
6248#########################
6249#	ADD SINGLE	#
6250#########################
6251add_sgl:
6252	add.l		&ad_1_sgl, FTEMP_HI(%a0)
6253	bcc.b		scc_clr			# no mantissa overflow
6254	roxr.w		FTEMP_HI(%a0)		# shift v-bit back in
6255	roxr.w		FTEMP_HI+2(%a0)		# shift v-bit back in
6256	add.w		&0x1, FTEMP_EX(%a0)	# and incr exponent
6257scc_clr:
6258	tst.l		%d0			# test for rs = 0
6259	bne.b		sgl_done
6260	and.w		&0xfe00, FTEMP_HI+2(%a0) # clear the l-bit
6261sgl_done:
6262	and.l		&0xffffff00, FTEMP_HI(%a0) # truncate bits beyond sgl limit
6263	clr.l		FTEMP_LO(%a0)		# clear d2
6264	rts
6265
6266#########################
6267#	ADD EXTENDED	#
6268#########################
6269add_ext:
6270	addq.l		&1,FTEMP_LO(%a0)	# add 1 to l-bit
6271	bcc.b		xcc_clr			# test for carry out
6272	addq.l		&1,FTEMP_HI(%a0)	# propagate carry
6273	bcc.b		xcc_clr
6274	roxr.w		FTEMP_HI(%a0)		# mant is 0 so restore v-bit
6275	roxr.w		FTEMP_HI+2(%a0)		# mant is 0 so restore v-bit
6276	roxr.w		FTEMP_LO(%a0)
6277	roxr.w		FTEMP_LO+2(%a0)
6278	add.w		&0x1,FTEMP_EX(%a0)	# and inc exp
6279xcc_clr:
6280	tst.l		%d0			# test rs = 0
6281	bne.b		add_ext_done
6282	and.b		&0xfe,FTEMP_LO+3(%a0)	# clear the l bit
6283add_ext_done:
6284	rts
6285
6286#########################
6287#	ADD DOUBLE	#
6288#########################
6289add_dbl:
6290	add.l		&ad_1_dbl, FTEMP_LO(%a0) # add 1 to lsb
6291	bcc.b		dcc_clr			# no carry
6292	addq.l		&0x1, FTEMP_HI(%a0)	# propagate carry
6293	bcc.b		dcc_clr			# no carry
6294
6295	roxr.w		FTEMP_HI(%a0)		# mant is 0 so restore v-bit
6296	roxr.w		FTEMP_HI+2(%a0)		# mant is 0 so restore v-bit
6297	roxr.w		FTEMP_LO(%a0)
6298	roxr.w		FTEMP_LO+2(%a0)
6299	addq.w		&0x1, FTEMP_EX(%a0)	# incr exponent
6300dcc_clr:
6301	tst.l		%d0			# test for rs = 0
6302	bne.b		dbl_done
6303	and.w		&0xf000, FTEMP_LO+2(%a0) # clear the l-bit
6304
6305dbl_done:
6306	and.l		&0xfffff800,FTEMP_LO(%a0) # truncate bits beyond dbl limit
6307	rts
6308
6309###########################
6310# Truncate all other bits #
6311###########################
6312truncate:
6313	swap		%d1			# select rnd prec
6314
6315	cmpi.b		%d1, &s_mode		# is prec sgl?
6316	beq.w		sgl_done		# yes
6317	bgt.b		dbl_done		# no; it's dbl
6318	rts					# no; it's ext
6319
6320
6321#
6322# ext_grs(): extract guard, round and sticky bits according to
6323#	     rounding precision.
6324#
6325# INPUT
6326#	d0	   = extended precision g,r,s (in d0{31:29})
6327#	d1	   = {PREC,ROUND}
6328# OUTPUT
6329#	d0{31:29}  = guard, round, sticky
6330#
6331# The ext_grs extract the guard/round/sticky bits according to the
6332# selected rounding precision. It is called by the round subroutine
6333# only.  All registers except d0 are kept intact. d0 becomes an
6334# updated guard,round,sticky in d0{31:29}
6335#
6336# Notes: the ext_grs uses the round PREC, and therefore has to swap d1
6337#	 prior to usage, and needs to restore d1 to original. this
6338#	 routine is tightly tied to the round routine and not meant to
6339#	 uphold standard subroutine calling practices.
6340#
6341
6342ext_grs:
6343	swap		%d1			# have d1.w point to round precision
6344	tst.b		%d1			# is rnd prec = extended?
6345	bne.b		ext_grs_not_ext		# no; go handle sgl or dbl
6346
6347#
6348# %d0 actually already hold g,r,s since _round() had it before calling
6349# this function. so, as long as we don't disturb it, we are "returning" it.
6350#
6351ext_grs_ext:
6352	swap		%d1			# yes; return to correct positions
6353	rts
6354
6355ext_grs_not_ext:
6356	movm.l		&0x3000, -(%sp)		# make some temp registers {d2/d3}
6357
6358	cmpi.b		%d1, &s_mode		# is rnd prec = sgl?
6359	bne.b		ext_grs_dbl		# no; go handle dbl
6360
6361#
6362# sgl:
6363#	96		64	  40	32		0
6364#	-----------------------------------------------------
6365#	| EXP	|XXXXXXX|	  |xx	|		|grs|
6366#	-----------------------------------------------------
6367#			<--(24)--->nn\			   /
6368#				   ee ---------------------
6369#				   ww		|
6370#						v
6371#				   gr	   new sticky
6372#
6373ext_grs_sgl:
6374	bfextu		FTEMP_HI(%a0){&24:&2}, %d3 # sgl prec. g-r are 2 bits right
6375	mov.l		&30, %d2		# of the sgl prec. limits
6376	lsl.l		%d2, %d3		# shift g-r bits to MSB of d3
6377	mov.l		FTEMP_HI(%a0), %d2	# get word 2 for s-bit test
6378	and.l		&0x0000003f, %d2	# s bit is the or of all other
6379	bne.b		ext_grs_st_stky		# bits to the right of g-r
6380	tst.l		FTEMP_LO(%a0)		# test lower mantissa
6381	bne.b		ext_grs_st_stky		# if any are set, set sticky
6382	tst.l		%d0			# test original g,r,s
6383	bne.b		ext_grs_st_stky		# if any are set, set sticky
6384	bra.b		ext_grs_end_sd		# if words 3 and 4 are clr, exit
6385
6386#
6387# dbl:
6388#	96		64		32	 11	0
6389#	-----------------------------------------------------
6390#	| EXP	|XXXXXXX|		|	 |xx	|grs|
6391#	-----------------------------------------------------
6392#						  nn\	    /
6393#						  ee -------
6394#						  ww	|
6395#							v
6396#						  gr	new sticky
6397#
6398ext_grs_dbl:
6399	bfextu		FTEMP_LO(%a0){&21:&2}, %d3 # dbl-prec. g-r are 2 bits right
6400	mov.l		&30, %d2		# of the dbl prec. limits
6401	lsl.l		%d2, %d3		# shift g-r bits to the MSB of d3
6402	mov.l		FTEMP_LO(%a0), %d2	# get lower mantissa  for s-bit test
6403	and.l		&0x000001ff, %d2	# s bit is the or-ing of all
6404	bne.b		ext_grs_st_stky		# other bits to the right of g-r
6405	tst.l		%d0			# test word original g,r,s
6406	bne.b		ext_grs_st_stky		# if any are set, set sticky
6407	bra.b		ext_grs_end_sd		# if clear, exit
6408
6409ext_grs_st_stky:
6410	bset		&rnd_stky_bit, %d3	# set sticky bit
6411ext_grs_end_sd:
6412	mov.l		%d3, %d0		# return grs to d0
6413
6414	movm.l		(%sp)+, &0xc		# restore scratch registers {d2/d3}
6415
6416	swap		%d1			# restore d1 to original
6417	rts
6418
6419#########################################################################
6420# norm(): normalize the mantissa of an extended precision input. the	#
6421#	  input operand should not be normalized already.		#
6422#									#
6423# XDEF ****************************************************************	#
6424#	norm()								#
6425#									#
6426# XREF **************************************************************** #
6427#	none								#
6428#									#
6429# INPUT *************************************************************** #
6430#	a0 = pointer fp extended precision operand to normalize		#
6431#									#
6432# OUTPUT ************************************************************** #
6433#	d0 = number of bit positions the mantissa was shifted		#
6434#	a0 = the input operand's mantissa is normalized; the exponent	#
6435#	     is unchanged.						#
6436#									#
6437#########################################################################
6438	global		norm
6439norm:
6440	mov.l		%d2, -(%sp)		# create some temp regs
6441	mov.l		%d3, -(%sp)
6442
6443	mov.l		FTEMP_HI(%a0), %d0	# load hi(mantissa)
6444	mov.l		FTEMP_LO(%a0), %d1	# load lo(mantissa)
6445
6446	bfffo		%d0{&0:&32}, %d2	# how many places to shift?
6447	beq.b		norm_lo			# hi(man) is all zeroes!
6448
6449norm_hi:
6450	lsl.l		%d2, %d0		# left shift hi(man)
6451	bfextu		%d1{&0:%d2}, %d3	# extract lo bits
6452
6453	or.l		%d3, %d0		# create hi(man)
6454	lsl.l		%d2, %d1		# create lo(man)
6455
6456	mov.l		%d0, FTEMP_HI(%a0)	# store new hi(man)
6457	mov.l		%d1, FTEMP_LO(%a0)	# store new lo(man)
6458
6459	mov.l		%d2, %d0		# return shift amount
6460
6461	mov.l		(%sp)+, %d3		# restore temp regs
6462	mov.l		(%sp)+, %d2
6463
6464	rts
6465
6466norm_lo:
6467	bfffo		%d1{&0:&32}, %d2	# how many places to shift?
6468	lsl.l		%d2, %d1		# shift lo(man)
6469	add.l		&32, %d2		# add 32 to shft amount
6470
6471	mov.l		%d1, FTEMP_HI(%a0)	# store hi(man)
6472	clr.l		FTEMP_LO(%a0)		# lo(man) is now zero
6473
6474	mov.l		%d2, %d0		# return shift amount
6475
6476	mov.l		(%sp)+, %d3		# restore temp regs
6477	mov.l		(%sp)+, %d2
6478
6479	rts
6480
6481#########################################################################
6482# unnorm_fix(): - changes an UNNORM to one of NORM, DENORM, or ZERO	#
6483#		- returns corresponding optype tag			#
6484#									#
6485# XDEF ****************************************************************	#
6486#	unnorm_fix()							#
6487#									#
6488# XREF **************************************************************** #
6489#	norm() - normalize the mantissa					#
6490#									#
6491# INPUT *************************************************************** #
6492#	a0 = pointer to unnormalized extended precision number		#
6493#									#
6494# OUTPUT ************************************************************** #
6495#	d0 = optype tag - is corrected to one of NORM, DENORM, or ZERO	#
6496#	a0 = input operand has been converted to a norm, denorm, or	#
6497#	     zero; both the exponent and mantissa are changed.		#
6498#									#
6499#########################################################################
6500
6501	global		unnorm_fix
6502unnorm_fix:
6503	bfffo		FTEMP_HI(%a0){&0:&32}, %d0 # how many shifts are needed?
6504	bne.b		unnorm_shift		# hi(man) is not all zeroes
6505
6506#
6507# hi(man) is all zeroes so see if any bits in lo(man) are set
6508#
6509unnorm_chk_lo:
6510	bfffo		FTEMP_LO(%a0){&0:&32}, %d0 # is operand really a zero?
6511	beq.w		unnorm_zero		# yes
6512
6513	add.w		&32, %d0		# no; fix shift distance
6514
6515#
6516# d0 = # shifts needed for complete normalization
6517#
6518unnorm_shift:
6519	clr.l		%d1			# clear top word
6520	mov.w		FTEMP_EX(%a0), %d1	# extract exponent
6521	and.w		&0x7fff, %d1		# strip off sgn
6522
6523	cmp.w		%d0, %d1		# will denorm push exp < 0?
6524	bgt.b		unnorm_nrm_zero		# yes; denorm only until exp = 0
6525
6526#
6527# exponent would not go < 0. Therefore, number stays normalized
6528#
6529	sub.w		%d0, %d1		# shift exponent value
6530	mov.w		FTEMP_EX(%a0), %d0	# load old exponent
6531	and.w		&0x8000, %d0		# save old sign
6532	or.w		%d0, %d1		# {sgn,new exp}
6533	mov.w		%d1, FTEMP_EX(%a0)	# insert new exponent
6534
6535	bsr.l		norm			# normalize UNNORM
6536
6537	mov.b		&NORM, %d0		# return new optype tag
6538	rts
6539
6540#
6541# exponent would go < 0, so only denormalize until exp = 0
6542#
6543unnorm_nrm_zero:
6544	cmp.b		%d1, &32		# is exp <= 32?
6545	bgt.b		unnorm_nrm_zero_lrg	# no; go handle large exponent
6546
6547	bfextu		FTEMP_HI(%a0){%d1:&32}, %d0 # extract new hi(man)
6548	mov.l		%d0, FTEMP_HI(%a0)	# save new hi(man)
6549
6550	mov.l		FTEMP_LO(%a0), %d0	# fetch old lo(man)
6551	lsl.l		%d1, %d0		# extract new lo(man)
6552	mov.l		%d0, FTEMP_LO(%a0)	# save new lo(man)
6553
6554	and.w		&0x8000, FTEMP_EX(%a0)	# set exp = 0
6555
6556	mov.b		&DENORM, %d0		# return new optype tag
6557	rts
6558
6559#
6560# only mantissa bits set are in lo(man)
6561#
6562unnorm_nrm_zero_lrg:
6563	sub.w		&32, %d1		# adjust shft amt by 32
6564
6565	mov.l		FTEMP_LO(%a0), %d0	# fetch old lo(man)
6566	lsl.l		%d1, %d0		# left shift lo(man)
6567
6568	mov.l		%d0, FTEMP_HI(%a0)	# store new hi(man)
6569	clr.l		FTEMP_LO(%a0)		# lo(man) = 0
6570
6571	and.w		&0x8000, FTEMP_EX(%a0)	# set exp = 0
6572
6573	mov.b		&DENORM, %d0		# return new optype tag
6574	rts
6575
6576#
6577# whole mantissa is zero so this UNNORM is actually a zero
6578#
6579unnorm_zero:
6580	and.w		&0x8000, FTEMP_EX(%a0)	# force exponent to zero
6581
6582	mov.b		&ZERO, %d0		# fix optype tag
6583	rts
6584
6585#########################################################################
6586# XDEF ****************************************************************	#
6587#	set_tag_x(): return the optype of the input ext fp number	#
6588#									#
6589# XREF ****************************************************************	#
6590#	None								#
6591#									#
6592# INPUT ***************************************************************	#
6593#	a0 = pointer to extended precision operand			#
6594#									#
6595# OUTPUT **************************************************************	#
6596#	d0 = value of type tag						#
6597#		one of: NORM, INF, QNAN, SNAN, DENORM, UNNORM, ZERO	#
6598#									#
6599# ALGORITHM ***********************************************************	#
6600#	Simply test the exponent, j-bit, and mantissa values to		#
6601# determine the type of operand.					#
6602#	If it's an unnormalized zero, alter the operand and force it	#
6603# to be a normal zero.							#
6604#									#
6605#########################################################################
6606
6607	global		set_tag_x
6608set_tag_x:
6609	mov.w		FTEMP_EX(%a0), %d0	# extract exponent
6610	andi.w		&0x7fff, %d0		# strip off sign
6611	cmpi.w		%d0, &0x7fff		# is (EXP == MAX)?
6612	beq.b		inf_or_nan_x
6613not_inf_or_nan_x:
6614	btst		&0x7,FTEMP_HI(%a0)
6615	beq.b		not_norm_x
6616is_norm_x:
6617	mov.b		&NORM, %d0
6618	rts
6619not_norm_x:
6620	tst.w		%d0			# is exponent = 0?
6621	bne.b		is_unnorm_x
6622not_unnorm_x:
6623	tst.l		FTEMP_HI(%a0)
6624	bne.b		is_denorm_x
6625	tst.l		FTEMP_LO(%a0)
6626	bne.b		is_denorm_x
6627is_zero_x:
6628	mov.b		&ZERO, %d0
6629	rts
6630is_denorm_x:
6631	mov.b		&DENORM, %d0
6632	rts
6633# must distinguish now "Unnormalized zeroes" which we
6634# must convert to zero.
6635is_unnorm_x:
6636	tst.l		FTEMP_HI(%a0)
6637	bne.b		is_unnorm_reg_x
6638	tst.l		FTEMP_LO(%a0)
6639	bne.b		is_unnorm_reg_x
6640# it's an "unnormalized zero". let's convert it to an actual zero...
6641	andi.w		&0x8000,FTEMP_EX(%a0)	# clear exponent
6642	mov.b		&ZERO, %d0
6643	rts
6644is_unnorm_reg_x:
6645	mov.b		&UNNORM, %d0
6646	rts
6647inf_or_nan_x:
6648	tst.l		FTEMP_LO(%a0)
6649	bne.b		is_nan_x
6650	mov.l		FTEMP_HI(%a0), %d0
6651	and.l		&0x7fffffff, %d0	# msb is a don't care!
6652	bne.b		is_nan_x
6653is_inf_x:
6654	mov.b		&INF, %d0
6655	rts
6656is_nan_x:
6657	btst		&0x6, FTEMP_HI(%a0)
6658	beq.b		is_snan_x
6659	mov.b		&QNAN, %d0
6660	rts
6661is_snan_x:
6662	mov.b		&SNAN, %d0
6663	rts
6664
6665#########################################################################
6666# XDEF ****************************************************************	#
6667#	set_tag_d(): return the optype of the input dbl fp number	#
6668#									#
6669# XREF ****************************************************************	#
6670#	None								#
6671#									#
6672# INPUT ***************************************************************	#
6673#	a0 = points to double precision operand				#
6674#									#
6675# OUTPUT **************************************************************	#
6676#	d0 = value of type tag						#
6677#		one of: NORM, INF, QNAN, SNAN, DENORM, ZERO		#
6678#									#
6679# ALGORITHM ***********************************************************	#
6680#	Simply test the exponent, j-bit, and mantissa values to		#
6681# determine the type of operand.					#
6682#									#
6683#########################################################################
6684
6685	global		set_tag_d
6686set_tag_d:
6687	mov.l		FTEMP(%a0), %d0
6688	mov.l		%d0, %d1
6689
6690	andi.l		&0x7ff00000, %d0
6691	beq.b		zero_or_denorm_d
6692
6693	cmpi.l		%d0, &0x7ff00000
6694	beq.b		inf_or_nan_d
6695
6696is_norm_d:
6697	mov.b		&NORM, %d0
6698	rts
6699zero_or_denorm_d:
6700	and.l		&0x000fffff, %d1
6701	bne		is_denorm_d
6702	tst.l		4+FTEMP(%a0)
6703	bne		is_denorm_d
6704is_zero_d:
6705	mov.b		&ZERO, %d0
6706	rts
6707is_denorm_d:
6708	mov.b		&DENORM, %d0
6709	rts
6710inf_or_nan_d:
6711	and.l		&0x000fffff, %d1
6712	bne		is_nan_d
6713	tst.l		4+FTEMP(%a0)
6714	bne		is_nan_d
6715is_inf_d:
6716	mov.b		&INF, %d0
6717	rts
6718is_nan_d:
6719	btst		&19, %d1
6720	bne		is_qnan_d
6721is_snan_d:
6722	mov.b		&SNAN, %d0
6723	rts
6724is_qnan_d:
6725	mov.b		&QNAN, %d0
6726	rts
6727
6728#########################################################################
6729# XDEF ****************************************************************	#
6730#	set_tag_s(): return the optype of the input sgl fp number	#
6731#									#
6732# XREF ****************************************************************	#
6733#	None								#
6734#									#
6735# INPUT ***************************************************************	#
6736#	a0 = pointer to single precision operand			#
6737#									#
6738# OUTPUT **************************************************************	#
6739#	d0 = value of type tag						#
6740#		one of: NORM, INF, QNAN, SNAN, DENORM, ZERO		#
6741#									#
6742# ALGORITHM ***********************************************************	#
6743#	Simply test the exponent, j-bit, and mantissa values to		#
6744# determine the type of operand.					#
6745#									#
6746#########################################################################
6747
6748	global		set_tag_s
6749set_tag_s:
6750	mov.l		FTEMP(%a0), %d0
6751	mov.l		%d0, %d1
6752
6753	andi.l		&0x7f800000, %d0
6754	beq.b		zero_or_denorm_s
6755
6756	cmpi.l		%d0, &0x7f800000
6757	beq.b		inf_or_nan_s
6758
6759is_norm_s:
6760	mov.b		&NORM, %d0
6761	rts
6762zero_or_denorm_s:
6763	and.l		&0x007fffff, %d1
6764	bne		is_denorm_s
6765is_zero_s:
6766	mov.b		&ZERO, %d0
6767	rts
6768is_denorm_s:
6769	mov.b		&DENORM, %d0
6770	rts
6771inf_or_nan_s:
6772	and.l		&0x007fffff, %d1
6773	bne		is_nan_s
6774is_inf_s:
6775	mov.b		&INF, %d0
6776	rts
6777is_nan_s:
6778	btst		&22, %d1
6779	bne		is_qnan_s
6780is_snan_s:
6781	mov.b		&SNAN, %d0
6782	rts
6783is_qnan_s:
6784	mov.b		&QNAN, %d0
6785	rts
6786
6787#########################################################################
6788# XDEF ****************************************************************	#
6789#	unf_res(): routine to produce default underflow result of a	#
6790#		   scaled extended precision number; this is used by	#
6791#		   fadd/fdiv/fmul/etc. emulation routines.		#
6792#	unf_res4(): same as above but for fsglmul/fsgldiv which use	#
6793#		    single round prec and extended prec mode.		#
6794#									#
6795# XREF ****************************************************************	#
6796#	_denorm() - denormalize according to scale factor		#
6797#	_round() - round denormalized number according to rnd prec	#
6798#									#
6799# INPUT ***************************************************************	#
6800#	a0 = pointer to extended precison operand			#
6801#	d0 = scale factor						#
6802#	d1 = rounding precision/mode					#
6803#									#
6804# OUTPUT **************************************************************	#
6805#	a0 = pointer to default underflow result in extended precision	#
6806#	d0.b = result FPSR_cc which caller may or may not want to save	#
6807#									#
6808# ALGORITHM ***********************************************************	#
6809#	Convert the input operand to "internal format" which means the	#
6810# exponent is extended to 16 bits and the sign is stored in the unused	#
6811# portion of the extended precison operand. Denormalize the number	#
6812# according to the scale factor passed in d0. Then, round the		#
6813# denormalized result.							#
6814#	Set the FPSR_exc bits as appropriate but return the cc bits in	#
6815# d0 in case the caller doesn't want to save them (as is the case for	#
6816# fmove out).								#
6817#	unf_res4() for fsglmul/fsgldiv forces the denorm to extended	#
6818# precision and the rounding mode to single.				#
6819#									#
6820#########################################################################
6821	global		unf_res
6822unf_res:
6823	mov.l		%d1, -(%sp)		# save rnd prec,mode on stack
6824
6825	btst		&0x7, FTEMP_EX(%a0)	# make "internal" format
6826	sne		FTEMP_SGN(%a0)
6827
6828	mov.w		FTEMP_EX(%a0), %d1	# extract exponent
6829	and.w		&0x7fff, %d1
6830	sub.w		%d0, %d1
6831	mov.w		%d1, FTEMP_EX(%a0)	# insert 16 bit exponent
6832
6833	mov.l		%a0, -(%sp)		# save operand ptr during calls
6834
6835	mov.l		0x4(%sp),%d0		# pass rnd prec.
6836	andi.w		&0x00c0,%d0
6837	lsr.w		&0x4,%d0
6838	bsr.l		_denorm			# denorm result
6839
6840	mov.l		(%sp),%a0
6841	mov.w		0x6(%sp),%d1		# load prec:mode into %d1
6842	andi.w		&0xc0,%d1		# extract rnd prec
6843	lsr.w		&0x4,%d1
6844	swap		%d1
6845	mov.w		0x6(%sp),%d1
6846	andi.w		&0x30,%d1
6847	lsr.w		&0x4,%d1
6848	bsr.l		_round			# round the denorm
6849
6850	mov.l		(%sp)+, %a0
6851
6852# result is now rounded properly. convert back to normal format
6853	bclr		&0x7, FTEMP_EX(%a0)	# clear sgn first; may have residue
6854	tst.b		FTEMP_SGN(%a0)		# is "internal result" sign set?
6855	beq.b		unf_res_chkifzero	# no; result is positive
6856	bset		&0x7, FTEMP_EX(%a0)	# set result sgn
6857	clr.b		FTEMP_SGN(%a0)		# clear temp sign
6858
6859# the number may have become zero after rounding. set ccodes accordingly.
6860unf_res_chkifzero:
6861	clr.l		%d0
6862	tst.l		FTEMP_HI(%a0)		# is value now a zero?
6863	bne.b		unf_res_cont		# no
6864	tst.l		FTEMP_LO(%a0)
6865	bne.b		unf_res_cont		# no
6866#	bset		&z_bit, FPSR_CC(%a6)	# yes; set zero ccode bit
6867	bset		&z_bit, %d0		# yes; set zero ccode bit
6868
6869unf_res_cont:
6870
6871#
6872# can inex1 also be set along with unfl and inex2???
6873#
6874# we know that underflow has occurred. aunfl should be set if INEX2 is also set.
6875#
6876	btst		&inex2_bit, FPSR_EXCEPT(%a6) # is INEX2 set?
6877	beq.b		unf_res_end		# no
6878	bset		&aunfl_bit, FPSR_AEXCEPT(%a6) # yes; set aunfl
6879
6880unf_res_end:
6881	add.l		&0x4, %sp		# clear stack
6882	rts
6883
6884# unf_res() for fsglmul() and fsgldiv().
6885	global		unf_res4
6886unf_res4:
6887	mov.l		%d1,-(%sp)		# save rnd prec,mode on stack
6888
6889	btst		&0x7,FTEMP_EX(%a0)	# make "internal" format
6890	sne		FTEMP_SGN(%a0)
6891
6892	mov.w		FTEMP_EX(%a0),%d1	# extract exponent
6893	and.w		&0x7fff,%d1
6894	sub.w		%d0,%d1
6895	mov.w		%d1,FTEMP_EX(%a0)	# insert 16 bit exponent
6896
6897	mov.l		%a0,-(%sp)		# save operand ptr during calls
6898
6899	clr.l		%d0			# force rnd prec = ext
6900	bsr.l		_denorm			# denorm result
6901
6902	mov.l		(%sp),%a0
6903	mov.w		&s_mode,%d1		# force rnd prec = sgl
6904	swap		%d1
6905	mov.w		0x6(%sp),%d1		# load rnd mode
6906	andi.w		&0x30,%d1		# extract rnd prec
6907	lsr.w		&0x4,%d1
6908	bsr.l		_round			# round the denorm
6909
6910	mov.l		(%sp)+,%a0
6911
6912# result is now rounded properly. convert back to normal format
6913	bclr		&0x7,FTEMP_EX(%a0)	# clear sgn first; may have residue
6914	tst.b		FTEMP_SGN(%a0)		# is "internal result" sign set?
6915	beq.b		unf_res4_chkifzero	# no; result is positive
6916	bset		&0x7,FTEMP_EX(%a0)	# set result sgn
6917	clr.b		FTEMP_SGN(%a0)		# clear temp sign
6918
6919# the number may have become zero after rounding. set ccodes accordingly.
6920unf_res4_chkifzero:
6921	clr.l		%d0
6922	tst.l		FTEMP_HI(%a0)		# is value now a zero?
6923	bne.b		unf_res4_cont		# no
6924	tst.l		FTEMP_LO(%a0)
6925	bne.b		unf_res4_cont		# no
6926#	bset		&z_bit,FPSR_CC(%a6)	# yes; set zero ccode bit
6927	bset		&z_bit,%d0		# yes; set zero ccode bit
6928
6929unf_res4_cont:
6930
6931#
6932# can inex1 also be set along with unfl and inex2???
6933#
6934# we know that underflow has occurred. aunfl should be set if INEX2 is also set.
6935#
6936	btst		&inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set?
6937	beq.b		unf_res4_end		# no
6938	bset		&aunfl_bit,FPSR_AEXCEPT(%a6) # yes; set aunfl
6939
6940unf_res4_end:
6941	add.l		&0x4,%sp		# clear stack
6942	rts
6943
6944#########################################################################
6945# XDEF ****************************************************************	#
6946#	ovf_res(): routine to produce the default overflow result of	#
6947#		   an overflowing number.				#
6948#	ovf_res2(): same as above but the rnd mode/prec are passed	#
6949#		    differently.					#
6950#									#
6951# XREF ****************************************************************	#
6952#	none								#
6953#									#
6954# INPUT ***************************************************************	#
6955#	d1.b	= '-1' => (-); '0' => (+)				#
6956#   ovf_res():								#
6957#	d0	= rnd mode/prec						#
6958#   ovf_res2():								#
6959#	hi(d0)	= rnd prec						#
6960#	lo(d0)	= rnd mode						#
6961#									#
6962# OUTPUT **************************************************************	#
6963#	a0	= points to extended precision result			#
6964#	d0.b	= condition code bits					#
6965#									#
6966# ALGORITHM ***********************************************************	#
6967#	The default overflow result can be determined by the sign of	#
6968# the result and the rounding mode/prec in effect. These bits are	#
6969# concatenated together to create an index into the default result	#
6970# table. A pointer to the correct result is returned in a0. The		#
6971# resulting condition codes are returned in d0 in case the caller	#
6972# doesn't want FPSR_cc altered (as is the case for fmove out).		#
6973#									#
6974#########################################################################
6975
6976	global		ovf_res
6977ovf_res:
6978	andi.w		&0x10,%d1		# keep result sign
6979	lsr.b		&0x4,%d0		# shift prec/mode
6980	or.b		%d0,%d1			# concat the two
6981	mov.w		%d1,%d0			# make a copy
6982	lsl.b		&0x1,%d1		# multiply d1 by 2
6983	bra.b		ovf_res_load
6984
6985	global		ovf_res2
6986ovf_res2:
6987	and.w		&0x10, %d1		# keep result sign
6988	or.b		%d0, %d1		# insert rnd mode
6989	swap		%d0
6990	or.b		%d0, %d1		# insert rnd prec
6991	mov.w		%d1, %d0		# make a copy
6992	lsl.b		&0x1, %d1		# shift left by 1
6993
6994#
6995# use the rounding mode, precision, and result sign as in index into the
6996# two tables below to fetch the default result and the result ccodes.
6997#
6998ovf_res_load:
6999	mov.b		(tbl_ovfl_cc.b,%pc,%d0.w*1), %d0 # fetch result ccodes
7000	lea		(tbl_ovfl_result.b,%pc,%d1.w*8), %a0 # return result ptr
7001
7002	rts
7003
7004tbl_ovfl_cc:
7005	byte		0x2, 0x0, 0x0, 0x2
7006	byte		0x2, 0x0, 0x0, 0x2
7007	byte		0x2, 0x0, 0x0, 0x2
7008	byte		0x0, 0x0, 0x0, 0x0
7009	byte		0x2+0x8, 0x8, 0x2+0x8, 0x8
7010	byte		0x2+0x8, 0x8, 0x2+0x8, 0x8
7011	byte		0x2+0x8, 0x8, 0x2+0x8, 0x8
7012
7013tbl_ovfl_result:
7014	long		0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN
7015	long		0x7ffe0000,0xffffffff,0xffffffff,0x00000000 # +EXT; RZ
7016	long		0x7ffe0000,0xffffffff,0xffffffff,0x00000000 # +EXT; RM
7017	long		0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP
7018
7019	long		0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN
7020	long		0x407e0000,0xffffff00,0x00000000,0x00000000 # +SGL; RZ
7021	long		0x407e0000,0xffffff00,0x00000000,0x00000000 # +SGL; RM
7022	long		0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP
7023
7024	long		0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN
7025	long		0x43fe0000,0xffffffff,0xfffff800,0x00000000 # +DBL; RZ
7026	long		0x43fe0000,0xffffffff,0xfffff800,0x00000000 # +DBL; RM
7027	long		0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP
7028
7029	long		0x00000000,0x00000000,0x00000000,0x00000000
7030	long		0x00000000,0x00000000,0x00000000,0x00000000
7031	long		0x00000000,0x00000000,0x00000000,0x00000000
7032	long		0x00000000,0x00000000,0x00000000,0x00000000
7033
7034	long		0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN
7035	long		0xfffe0000,0xffffffff,0xffffffff,0x00000000 # -EXT; RZ
7036	long		0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM
7037	long		0xfffe0000,0xffffffff,0xffffffff,0x00000000 # -EXT; RP
7038
7039	long		0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN
7040	long		0xc07e0000,0xffffff00,0x00000000,0x00000000 # -SGL; RZ
7041	long		0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM
7042	long		0xc07e0000,0xffffff00,0x00000000,0x00000000 # -SGL; RP
7043
7044	long		0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN
7045	long		0xc3fe0000,0xffffffff,0xfffff800,0x00000000 # -DBL; RZ
7046	long		0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM
7047	long		0xc3fe0000,0xffffffff,0xfffff800,0x00000000 # -DBL; RP
7048
7049#########################################################################
7050# XDEF ****************************************************************	#
7051#	fout(): move from fp register to memory or data register	#
7052#									#
7053# XREF ****************************************************************	#
7054#	_round() - needed to create EXOP for sgl/dbl precision		#
7055#	norm() - needed to create EXOP for extended precision		#
7056#	ovf_res() - create default overflow result for sgl/dbl precision#
7057#	unf_res() - create default underflow result for sgl/dbl prec.	#
7058#	dst_dbl() - create rounded dbl precision result.		#
7059#	dst_sgl() - create rounded sgl precision result.		#
7060#	fetch_dreg() - fetch dynamic k-factor reg for packed.		#
7061#	bindec() - convert FP binary number to packed number.		#
7062#	_mem_write() - write data to memory.				#
7063#	_mem_write2() - write data to memory unless supv mode -(a7) exc.#
7064#	_dmem_write_{byte,word,long}() - write data to memory.		#
7065#	store_dreg_{b,w,l}() - store data to data register file.	#
7066#	facc_out_{b,w,l,d,x}() - data access error occurred.		#
7067#									#
7068# INPUT ***************************************************************	#
7069#	a0 = pointer to extended precision source operand		#
7070#	d0 = round prec,mode						#
7071#									#
7072# OUTPUT **************************************************************	#
7073#	fp0 : intermediate underflow or overflow result if		#
7074#	      OVFL/UNFL occurred for a sgl or dbl operand		#
7075#									#
7076# ALGORITHM ***********************************************************	#
7077#	This routine is accessed by many handlers that need to do an	#
7078# opclass three move of an operand out to memory.			#
7079#	Decode an fmove out (opclass 3) instruction to determine if	#
7080# it's b,w,l,s,d,x, or p in size. b,w,l can be stored to either a data	#
7081# register or memory. The algorithm uses a standard "fmove" to create	#
7082# the rounded result. Also, since exceptions are disabled, this also	#
7083# create the correct OPERR default result if appropriate.		#
7084#	For sgl or dbl precision, overflow or underflow can occur. If	#
7085# either occurs and is enabled, the EXOP.				#
7086#	For extended precision, the stacked <ea> must be fixed along	#
7087# w/ the address index register as appropriate w/ _calc_ea_fout(). If	#
7088# the source is a denorm and if underflow is enabled, an EXOP must be	#
7089# created.								#
7090#	For packed, the k-factor must be fetched from the instruction	#
7091# word or a data register. The <ea> must be fixed as w/ extended	#
7092# precision. Then, bindec() is called to create the appropriate		#
7093# packed result.							#
7094#	If at any time an access error is flagged by one of the move-	#
7095# to-memory routines, then a special exit must be made so that the	#
7096# access error can be handled properly.					#
7097#									#
7098#########################################################################
7099
7100	global		fout
7101fout:
7102	bfextu		EXC_CMDREG(%a6){&3:&3},%d1 # extract dst fmt
7103	mov.w		(tbl_fout.b,%pc,%d1.w*2),%a1 # use as index
7104	jmp		(tbl_fout.b,%pc,%a1)	# jump to routine
7105
7106	swbeg		&0x8
7107tbl_fout:
7108	short		fout_long	-	tbl_fout
7109	short		fout_sgl	-	tbl_fout
7110	short		fout_ext	-	tbl_fout
7111	short		fout_pack	-	tbl_fout
7112	short		fout_word	-	tbl_fout
7113	short		fout_dbl	-	tbl_fout
7114	short		fout_byte	-	tbl_fout
7115	short		fout_pack	-	tbl_fout
7116
7117#################################################################
7118# fmove.b out ###################################################
7119#################################################################
7120
7121# Only "Unimplemented Data Type" exceptions enter here. The operand
7122# is either a DENORM or a NORM.
7123fout_byte:
7124	tst.b		STAG(%a6)		# is operand normalized?
7125	bne.b		fout_byte_denorm	# no
7126
7127	fmovm.x		SRC(%a0),&0x80		# load value
7128
7129fout_byte_norm:
7130	fmov.l		%d0,%fpcr		# insert rnd prec,mode
7131
7132	fmov.b		%fp0,%d0		# exec move out w/ correct rnd mode
7133
7134	fmov.l		&0x0,%fpcr		# clear FPCR
7135	fmov.l		%fpsr,%d1		# fetch FPSR
7136	or.w		%d1,2+USER_FPSR(%a6)	# save new exc,accrued bits
7137
7138	mov.b		1+EXC_OPWORD(%a6),%d1	# extract dst mode
7139	andi.b		&0x38,%d1		# is mode == 0? (Dreg dst)
7140	beq.b		fout_byte_dn		# must save to integer regfile
7141
7142	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
7143	bsr.l		_dmem_write_byte	# write byte
7144
7145	tst.l		%d1			# did dstore fail?
7146	bne.l		facc_out_b		# yes
7147
7148	rts
7149
7150fout_byte_dn:
7151	mov.b		1+EXC_OPWORD(%a6),%d1	# extract Dn
7152	andi.w		&0x7,%d1
7153	bsr.l		store_dreg_b
7154	rts
7155
7156fout_byte_denorm:
7157	mov.l		SRC_EX(%a0),%d1
7158	andi.l		&0x80000000,%d1		# keep DENORM sign
7159	ori.l		&0x00800000,%d1		# make smallest sgl
7160	fmov.s		%d1,%fp0
7161	bra.b		fout_byte_norm
7162
7163#################################################################
7164# fmove.w out ###################################################
7165#################################################################
7166
7167# Only "Unimplemented Data Type" exceptions enter here. The operand
7168# is either a DENORM or a NORM.
7169fout_word:
7170	tst.b		STAG(%a6)		# is operand normalized?
7171	bne.b		fout_word_denorm	# no
7172
7173	fmovm.x		SRC(%a0),&0x80		# load value
7174
7175fout_word_norm:
7176	fmov.l		%d0,%fpcr		# insert rnd prec:mode
7177
7178	fmov.w		%fp0,%d0		# exec move out w/ correct rnd mode
7179
7180	fmov.l		&0x0,%fpcr		# clear FPCR
7181	fmov.l		%fpsr,%d1		# fetch FPSR
7182	or.w		%d1,2+USER_FPSR(%a6)	# save new exc,accrued bits
7183
7184	mov.b		1+EXC_OPWORD(%a6),%d1	# extract dst mode
7185	andi.b		&0x38,%d1		# is mode == 0? (Dreg dst)
7186	beq.b		fout_word_dn		# must save to integer regfile
7187
7188	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
7189	bsr.l		_dmem_write_word	# write word
7190
7191	tst.l		%d1			# did dstore fail?
7192	bne.l		facc_out_w		# yes
7193
7194	rts
7195
7196fout_word_dn:
7197	mov.b		1+EXC_OPWORD(%a6),%d1	# extract Dn
7198	andi.w		&0x7,%d1
7199	bsr.l		store_dreg_w
7200	rts
7201
7202fout_word_denorm:
7203	mov.l		SRC_EX(%a0),%d1
7204	andi.l		&0x80000000,%d1		# keep DENORM sign
7205	ori.l		&0x00800000,%d1		# make smallest sgl
7206	fmov.s		%d1,%fp0
7207	bra.b		fout_word_norm
7208
7209#################################################################
7210# fmove.l out ###################################################
7211#################################################################
7212
7213# Only "Unimplemented Data Type" exceptions enter here. The operand
7214# is either a DENORM or a NORM.
7215fout_long:
7216	tst.b		STAG(%a6)		# is operand normalized?
7217	bne.b		fout_long_denorm	# no
7218
7219	fmovm.x		SRC(%a0),&0x80		# load value
7220
7221fout_long_norm:
7222	fmov.l		%d0,%fpcr		# insert rnd prec:mode
7223
7224	fmov.l		%fp0,%d0		# exec move out w/ correct rnd mode
7225
7226	fmov.l		&0x0,%fpcr		# clear FPCR
7227	fmov.l		%fpsr,%d1		# fetch FPSR
7228	or.w		%d1,2+USER_FPSR(%a6)	# save new exc,accrued bits
7229
7230fout_long_write:
7231	mov.b		1+EXC_OPWORD(%a6),%d1	# extract dst mode
7232	andi.b		&0x38,%d1		# is mode == 0? (Dreg dst)
7233	beq.b		fout_long_dn		# must save to integer regfile
7234
7235	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
7236	bsr.l		_dmem_write_long	# write long
7237
7238	tst.l		%d1			# did dstore fail?
7239	bne.l		facc_out_l		# yes
7240
7241	rts
7242
7243fout_long_dn:
7244	mov.b		1+EXC_OPWORD(%a6),%d1	# extract Dn
7245	andi.w		&0x7,%d1
7246	bsr.l		store_dreg_l
7247	rts
7248
7249fout_long_denorm:
7250	mov.l		SRC_EX(%a0),%d1
7251	andi.l		&0x80000000,%d1		# keep DENORM sign
7252	ori.l		&0x00800000,%d1		# make smallest sgl
7253	fmov.s		%d1,%fp0
7254	bra.b		fout_long_norm
7255
7256#################################################################
7257# fmove.x out ###################################################
7258#################################################################
7259
7260# Only "Unimplemented Data Type" exceptions enter here. The operand
7261# is either a DENORM or a NORM.
7262# The DENORM causes an Underflow exception.
7263fout_ext:
7264
7265# we copy the extended precision result to FP_SCR0 so that the reserved
7266# 16-bit field gets zeroed. we do this since we promise not to disturb
7267# what's at SRC(a0).
7268	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
7269	clr.w		2+FP_SCR0_EX(%a6)	# clear reserved field
7270	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
7271	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
7272
7273	fmovm.x		SRC(%a0),&0x80		# return result
7274
7275	bsr.l		_calc_ea_fout		# fix stacked <ea>
7276
7277	mov.l		%a0,%a1			# pass: dst addr
7278	lea		FP_SCR0(%a6),%a0	# pass: src addr
7279	mov.l		&0xc,%d0		# pass: opsize is 12 bytes
7280
7281# we must not yet write the extended precision data to the stack
7282# in the pre-decrement case from supervisor mode or else we'll corrupt
7283# the stack frame. so, leave it in FP_SRC for now and deal with it later...
7284	cmpi.b		SPCOND_FLG(%a6),&mda7_flg
7285	beq.b		fout_ext_a7
7286
7287	bsr.l		_dmem_write		# write ext prec number to memory
7288
7289	tst.l		%d1			# did dstore fail?
7290	bne.w		fout_ext_err		# yes
7291
7292	tst.b		STAG(%a6)		# is operand normalized?
7293	bne.b		fout_ext_denorm		# no
7294	rts
7295
7296# the number is a DENORM. must set the underflow exception bit
7297fout_ext_denorm:
7298	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set underflow exc bit
7299
7300	mov.b		FPCR_ENABLE(%a6),%d0
7301	andi.b		&0x0a,%d0		# is UNFL or INEX enabled?
7302	bne.b		fout_ext_exc		# yes
7303	rts
7304
7305# we don't want to do the write if the exception occurred in supervisor mode
7306# so _mem_write2() handles this for us.
7307fout_ext_a7:
7308	bsr.l		_mem_write2		# write ext prec number to memory
7309
7310	tst.l		%d1			# did dstore fail?
7311	bne.w		fout_ext_err		# yes
7312
7313	tst.b		STAG(%a6)		# is operand normalized?
7314	bne.b		fout_ext_denorm		# no
7315	rts
7316
7317fout_ext_exc:
7318	lea		FP_SCR0(%a6),%a0
7319	bsr.l		norm			# normalize the mantissa
7320	neg.w		%d0			# new exp = -(shft amt)
7321	andi.w		&0x7fff,%d0
7322	andi.w		&0x8000,FP_SCR0_EX(%a6)	# keep only old sign
7323	or.w		%d0,FP_SCR0_EX(%a6)	# insert new exponent
7324	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
7325	rts
7326
7327fout_ext_err:
7328	mov.l		EXC_A6(%a6),(%a6)	# fix stacked a6
7329	bra.l		facc_out_x
7330
7331#########################################################################
7332# fmove.s out ###########################################################
7333#########################################################################
7334fout_sgl:
7335	andi.b		&0x30,%d0		# clear rnd prec
7336	ori.b		&s_mode*0x10,%d0	# insert sgl prec
7337	mov.l		%d0,L_SCR3(%a6)		# save rnd prec,mode on stack
7338
7339#
7340# operand is a normalized number. first, we check to see if the move out
7341# would cause either an underflow or overflow. these cases are handled
7342# separately. otherwise, set the FPCR to the proper rounding mode and
7343# execute the move.
7344#
7345	mov.w		SRC_EX(%a0),%d0		# extract exponent
7346	andi.w		&0x7fff,%d0		# strip sign
7347
7348	cmpi.w		%d0,&SGL_HI		# will operand overflow?
7349	bgt.w		fout_sgl_ovfl		# yes; go handle OVFL
7350	beq.w		fout_sgl_may_ovfl	# maybe; go handle possible OVFL
7351	cmpi.w		%d0,&SGL_LO		# will operand underflow?
7352	blt.w		fout_sgl_unfl		# yes; go handle underflow
7353
7354#
7355# NORMs(in range) can be stored out by a simple "fmov.s"
7356# Unnormalized inputs can come through this point.
7357#
7358fout_sgl_exg:
7359	fmovm.x		SRC(%a0),&0x80		# fetch fop from stack
7360
7361	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
7362	fmov.l		&0x0,%fpsr		# clear FPSR
7363
7364	fmov.s		%fp0,%d0		# store does convert and round
7365
7366	fmov.l		&0x0,%fpcr		# clear FPCR
7367	fmov.l		%fpsr,%d1		# save FPSR
7368
7369	or.w		%d1,2+USER_FPSR(%a6)	# set possible inex2/ainex
7370
7371fout_sgl_exg_write:
7372	mov.b		1+EXC_OPWORD(%a6),%d1	# extract dst mode
7373	andi.b		&0x38,%d1		# is mode == 0? (Dreg dst)
7374	beq.b		fout_sgl_exg_write_dn	# must save to integer regfile
7375
7376	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
7377	bsr.l		_dmem_write_long	# write long
7378
7379	tst.l		%d1			# did dstore fail?
7380	bne.l		facc_out_l		# yes
7381
7382	rts
7383
7384fout_sgl_exg_write_dn:
7385	mov.b		1+EXC_OPWORD(%a6),%d1	# extract Dn
7386	andi.w		&0x7,%d1
7387	bsr.l		store_dreg_l
7388	rts
7389
7390#
7391# here, we know that the operand would UNFL if moved out to single prec,
7392# so, denorm and round and then use generic store single routine to
7393# write the value to memory.
7394#
7395fout_sgl_unfl:
7396	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set UNFL
7397
7398	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
7399	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
7400	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
7401	mov.l		%a0,-(%sp)
7402
7403	clr.l		%d0			# pass: S.F. = 0
7404
7405	cmpi.b		STAG(%a6),&DENORM	# fetch src optype tag
7406	bne.b		fout_sgl_unfl_cont	# let DENORMs fall through
7407
7408	lea		FP_SCR0(%a6),%a0
7409	bsr.l		norm			# normalize the DENORM
7410
7411fout_sgl_unfl_cont:
7412	lea		FP_SCR0(%a6),%a0	# pass: ptr to operand
7413	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
7414	bsr.l		unf_res			# calc default underflow result
7415
7416	lea		FP_SCR0(%a6),%a0	# pass: ptr to fop
7417	bsr.l		dst_sgl			# convert to single prec
7418
7419	mov.b		1+EXC_OPWORD(%a6),%d1	# extract dst mode
7420	andi.b		&0x38,%d1		# is mode == 0? (Dreg dst)
7421	beq.b		fout_sgl_unfl_dn	# must save to integer regfile
7422
7423	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
7424	bsr.l		_dmem_write_long	# write long
7425
7426	tst.l		%d1			# did dstore fail?
7427	bne.l		facc_out_l		# yes
7428
7429	bra.b		fout_sgl_unfl_chkexc
7430
7431fout_sgl_unfl_dn:
7432	mov.b		1+EXC_OPWORD(%a6),%d1	# extract Dn
7433	andi.w		&0x7,%d1
7434	bsr.l		store_dreg_l
7435
7436fout_sgl_unfl_chkexc:
7437	mov.b		FPCR_ENABLE(%a6),%d1
7438	andi.b		&0x0a,%d1		# is UNFL or INEX enabled?
7439	bne.w		fout_sd_exc_unfl	# yes
7440	addq.l		&0x4,%sp
7441	rts
7442
7443#
7444# it's definitely an overflow so call ovf_res to get the correct answer
7445#
7446fout_sgl_ovfl:
7447	tst.b		3+SRC_HI(%a0)		# is result inexact?
7448	bne.b		fout_sgl_ovfl_inex2
7449	tst.l		SRC_LO(%a0)		# is result inexact?
7450	bne.b		fout_sgl_ovfl_inex2
7451	ori.w		&ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex
7452	bra.b		fout_sgl_ovfl_cont
7453fout_sgl_ovfl_inex2:
7454	ori.w		&ovfinx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex/inex2
7455
7456fout_sgl_ovfl_cont:
7457	mov.l		%a0,-(%sp)
7458
7459# call ovf_res() w/ sgl prec and the correct rnd mode to create the default
7460# overflow result. DON'T save the returned ccodes from ovf_res() since
7461# fmove out doesn't alter them.
7462	tst.b		SRC_EX(%a0)		# is operand negative?
7463	smi		%d1			# set if so
7464	mov.l		L_SCR3(%a6),%d0		# pass: sgl prec,rnd mode
7465	bsr.l		ovf_res			# calc OVFL result
7466	fmovm.x		(%a0),&0x80		# load default overflow result
7467	fmov.s		%fp0,%d0		# store to single
7468
7469	mov.b		1+EXC_OPWORD(%a6),%d1	# extract dst mode
7470	andi.b		&0x38,%d1		# is mode == 0? (Dreg dst)
7471	beq.b		fout_sgl_ovfl_dn	# must save to integer regfile
7472
7473	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
7474	bsr.l		_dmem_write_long	# write long
7475
7476	tst.l		%d1			# did dstore fail?
7477	bne.l		facc_out_l		# yes
7478
7479	bra.b		fout_sgl_ovfl_chkexc
7480
7481fout_sgl_ovfl_dn:
7482	mov.b		1+EXC_OPWORD(%a6),%d1	# extract Dn
7483	andi.w		&0x7,%d1
7484	bsr.l		store_dreg_l
7485
7486fout_sgl_ovfl_chkexc:
7487	mov.b		FPCR_ENABLE(%a6),%d1
7488	andi.b		&0x0a,%d1		# is UNFL or INEX enabled?
7489	bne.w		fout_sd_exc_ovfl	# yes
7490	addq.l		&0x4,%sp
7491	rts
7492
7493#
7494# move out MAY overflow:
7495# (1) force the exp to 0x3fff
7496# (2) do a move w/ appropriate rnd mode
7497# (3) if exp still equals zero, then insert original exponent
7498#	for the correct result.
7499#     if exp now equals one, then it overflowed so call ovf_res.
7500#
7501fout_sgl_may_ovfl:
7502	mov.w		SRC_EX(%a0),%d1		# fetch current sign
7503	andi.w		&0x8000,%d1		# keep it,clear exp
7504	ori.w		&0x3fff,%d1		# insert exp = 0
7505	mov.w		%d1,FP_SCR0_EX(%a6)	# insert scaled exp
7506	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6) # copy hi(man)
7507	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6) # copy lo(man)
7508
7509	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
7510
7511	fmov.x		FP_SCR0(%a6),%fp0	# force fop to be rounded
7512	fmov.l		&0x0,%fpcr		# clear FPCR
7513
7514	fabs.x		%fp0			# need absolute value
7515	fcmp.b		%fp0,&0x2		# did exponent increase?
7516	fblt.w		fout_sgl_exg		# no; go finish NORM
7517	bra.w		fout_sgl_ovfl		# yes; go handle overflow
7518
7519################
7520
7521fout_sd_exc_unfl:
7522	mov.l		(%sp)+,%a0
7523
7524	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
7525	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
7526	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
7527
7528	cmpi.b		STAG(%a6),&DENORM	# was src a DENORM?
7529	bne.b		fout_sd_exc_cont	# no
7530
7531	lea		FP_SCR0(%a6),%a0
7532	bsr.l		norm
7533	neg.l		%d0
7534	andi.w		&0x7fff,%d0
7535	bfins		%d0,FP_SCR0_EX(%a6){&1:&15}
7536	bra.b		fout_sd_exc_cont
7537
7538fout_sd_exc:
7539fout_sd_exc_ovfl:
7540	mov.l		(%sp)+,%a0		# restore a0
7541
7542	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
7543	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
7544	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
7545
7546fout_sd_exc_cont:
7547	bclr		&0x7,FP_SCR0_EX(%a6)	# clear sign bit
7548	sne.b		2+FP_SCR0_EX(%a6)	# set internal sign bit
7549	lea		FP_SCR0(%a6),%a0	# pass: ptr to DENORM
7550
7551	mov.b		3+L_SCR3(%a6),%d1
7552	lsr.b		&0x4,%d1
7553	andi.w		&0x0c,%d1
7554	swap		%d1
7555	mov.b		3+L_SCR3(%a6),%d1
7556	lsr.b		&0x4,%d1
7557	andi.w		&0x03,%d1
7558	clr.l		%d0			# pass: zero g,r,s
7559	bsr.l		_round			# round the DENORM
7560
7561	tst.b		2+FP_SCR0_EX(%a6)	# is EXOP negative?
7562	beq.b		fout_sd_exc_done	# no
7563	bset		&0x7,FP_SCR0_EX(%a6)	# yes
7564
7565fout_sd_exc_done:
7566	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
7567	rts
7568
7569#################################################################
7570# fmove.d out ###################################################
7571#################################################################
7572fout_dbl:
7573	andi.b		&0x30,%d0		# clear rnd prec
7574	ori.b		&d_mode*0x10,%d0	# insert dbl prec
7575	mov.l		%d0,L_SCR3(%a6)		# save rnd prec,mode on stack
7576
7577#
7578# operand is a normalized number. first, we check to see if the move out
7579# would cause either an underflow or overflow. these cases are handled
7580# separately. otherwise, set the FPCR to the proper rounding mode and
7581# execute the move.
7582#
7583	mov.w		SRC_EX(%a0),%d0		# extract exponent
7584	andi.w		&0x7fff,%d0		# strip sign
7585
7586	cmpi.w		%d0,&DBL_HI		# will operand overflow?
7587	bgt.w		fout_dbl_ovfl		# yes; go handle OVFL
7588	beq.w		fout_dbl_may_ovfl	# maybe; go handle possible OVFL
7589	cmpi.w		%d0,&DBL_LO		# will operand underflow?
7590	blt.w		fout_dbl_unfl		# yes; go handle underflow
7591
7592#
7593# NORMs(in range) can be stored out by a simple "fmov.d"
7594# Unnormalized inputs can come through this point.
7595#
7596fout_dbl_exg:
7597	fmovm.x		SRC(%a0),&0x80		# fetch fop from stack
7598
7599	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
7600	fmov.l		&0x0,%fpsr		# clear FPSR
7601
7602	fmov.d		%fp0,L_SCR1(%a6)	# store does convert and round
7603
7604	fmov.l		&0x0,%fpcr		# clear FPCR
7605	fmov.l		%fpsr,%d0		# save FPSR
7606
7607	or.w		%d0,2+USER_FPSR(%a6)	# set possible inex2/ainex
7608
7609	mov.l		EXC_EA(%a6),%a1		# pass: dst addr
7610	lea		L_SCR1(%a6),%a0		# pass: src addr
7611	movq.l		&0x8,%d0		# pass: opsize is 8 bytes
7612	bsr.l		_dmem_write		# store dbl fop to memory
7613
7614	tst.l		%d1			# did dstore fail?
7615	bne.l		facc_out_d		# yes
7616
7617	rts					# no; so we're finished
7618
7619#
7620# here, we know that the operand would UNFL if moved out to double prec,
7621# so, denorm and round and then use generic store double routine to
7622# write the value to memory.
7623#
7624fout_dbl_unfl:
7625	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set UNFL
7626
7627	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
7628	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
7629	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
7630	mov.l		%a0,-(%sp)
7631
7632	clr.l		%d0			# pass: S.F. = 0
7633
7634	cmpi.b		STAG(%a6),&DENORM	# fetch src optype tag
7635	bne.b		fout_dbl_unfl_cont	# let DENORMs fall through
7636
7637	lea		FP_SCR0(%a6),%a0
7638	bsr.l		norm			# normalize the DENORM
7639
7640fout_dbl_unfl_cont:
7641	lea		FP_SCR0(%a6),%a0	# pass: ptr to operand
7642	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
7643	bsr.l		unf_res			# calc default underflow result
7644
7645	lea		FP_SCR0(%a6),%a0	# pass: ptr to fop
7646	bsr.l		dst_dbl			# convert to single prec
7647	mov.l		%d0,L_SCR1(%a6)
7648	mov.l		%d1,L_SCR2(%a6)
7649
7650	mov.l		EXC_EA(%a6),%a1		# pass: dst addr
7651	lea		L_SCR1(%a6),%a0		# pass: src addr
7652	movq.l		&0x8,%d0		# pass: opsize is 8 bytes
7653	bsr.l		_dmem_write		# store dbl fop to memory
7654
7655	tst.l		%d1			# did dstore fail?
7656	bne.l		facc_out_d		# yes
7657
7658	mov.b		FPCR_ENABLE(%a6),%d1
7659	andi.b		&0x0a,%d1		# is UNFL or INEX enabled?
7660	bne.w		fout_sd_exc_unfl	# yes
7661	addq.l		&0x4,%sp
7662	rts
7663
7664#
7665# it's definitely an overflow so call ovf_res to get the correct answer
7666#
7667fout_dbl_ovfl:
7668	mov.w		2+SRC_LO(%a0),%d0
7669	andi.w		&0x7ff,%d0
7670	bne.b		fout_dbl_ovfl_inex2
7671
7672	ori.w		&ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex
7673	bra.b		fout_dbl_ovfl_cont
7674fout_dbl_ovfl_inex2:
7675	ori.w		&ovfinx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex/inex2
7676
7677fout_dbl_ovfl_cont:
7678	mov.l		%a0,-(%sp)
7679
7680# call ovf_res() w/ dbl prec and the correct rnd mode to create the default
7681# overflow result. DON'T save the returned ccodes from ovf_res() since
7682# fmove out doesn't alter them.
7683	tst.b		SRC_EX(%a0)		# is operand negative?
7684	smi		%d1			# set if so
7685	mov.l		L_SCR3(%a6),%d0		# pass: dbl prec,rnd mode
7686	bsr.l		ovf_res			# calc OVFL result
7687	fmovm.x		(%a0),&0x80		# load default overflow result
7688	fmov.d		%fp0,L_SCR1(%a6)	# store to double
7689
7690	mov.l		EXC_EA(%a6),%a1		# pass: dst addr
7691	lea		L_SCR1(%a6),%a0		# pass: src addr
7692	movq.l		&0x8,%d0		# pass: opsize is 8 bytes
7693	bsr.l		_dmem_write		# store dbl fop to memory
7694
7695	tst.l		%d1			# did dstore fail?
7696	bne.l		facc_out_d		# yes
7697
7698	mov.b		FPCR_ENABLE(%a6),%d1
7699	andi.b		&0x0a,%d1		# is UNFL or INEX enabled?
7700	bne.w		fout_sd_exc_ovfl	# yes
7701	addq.l		&0x4,%sp
7702	rts
7703
7704#
7705# move out MAY overflow:
7706# (1) force the exp to 0x3fff
7707# (2) do a move w/ appropriate rnd mode
7708# (3) if exp still equals zero, then insert original exponent
7709#	for the correct result.
7710#     if exp now equals one, then it overflowed so call ovf_res.
7711#
7712fout_dbl_may_ovfl:
7713	mov.w		SRC_EX(%a0),%d1		# fetch current sign
7714	andi.w		&0x8000,%d1		# keep it,clear exp
7715	ori.w		&0x3fff,%d1		# insert exp = 0
7716	mov.w		%d1,FP_SCR0_EX(%a6)	# insert scaled exp
7717	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6) # copy hi(man)
7718	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6) # copy lo(man)
7719
7720	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
7721
7722	fmov.x		FP_SCR0(%a6),%fp0	# force fop to be rounded
7723	fmov.l		&0x0,%fpcr		# clear FPCR
7724
7725	fabs.x		%fp0			# need absolute value
7726	fcmp.b		%fp0,&0x2		# did exponent increase?
7727	fblt.w		fout_dbl_exg		# no; go finish NORM
7728	bra.w		fout_dbl_ovfl		# yes; go handle overflow
7729
7730#########################################################################
7731# XDEF ****************************************************************	#
7732#	dst_dbl(): create double precision value from extended prec.	#
7733#									#
7734# XREF ****************************************************************	#
7735#	None								#
7736#									#
7737# INPUT ***************************************************************	#
7738#	a0 = pointer to source operand in extended precision		#
7739#									#
7740# OUTPUT **************************************************************	#
7741#	d0 = hi(double precision result)				#
7742#	d1 = lo(double precision result)				#
7743#									#
7744# ALGORITHM ***********************************************************	#
7745#									#
7746#  Changes extended precision to double precision.			#
7747#  Note: no attempt is made to round the extended value to double.	#
7748#	dbl_sign = ext_sign						#
7749#	dbl_exp = ext_exp - $3fff(ext bias) + $7ff(dbl bias)		#
7750#	get rid of ext integer bit					#
7751#	dbl_mant = ext_mant{62:12}					#
7752#									#
7753#		---------------   ---------------    ---------------	#
7754#  extended ->  |s|    exp    |   |1| ms mant   |    | ls mant     |	#
7755#		---------------   ---------------    ---------------	#
7756#		 95	    64    63 62	      32      31     11	  0	#
7757#				     |			     |		#
7758#				     |			     |		#
7759#				     |			     |		#
7760#			             v			     v		#
7761#			      ---------------   ---------------		#
7762#  double   ->		      |s|exp| mant  |   |  mant       |		#
7763#			      ---------------   ---------------		#
7764#			      63     51   32   31	       0	#
7765#									#
7766#########################################################################
7767
7768dst_dbl:
7769	clr.l		%d0			# clear d0
7770	mov.w		FTEMP_EX(%a0),%d0	# get exponent
7771	subi.w		&EXT_BIAS,%d0		# subtract extended precision bias
7772	addi.w		&DBL_BIAS,%d0		# add double precision bias
7773	tst.b		FTEMP_HI(%a0)		# is number a denorm?
7774	bmi.b		dst_get_dupper		# no
7775	subq.w		&0x1,%d0		# yes; denorm bias = DBL_BIAS - 1
7776dst_get_dupper:
7777	swap		%d0			# d0 now in upper word
7778	lsl.l		&0x4,%d0		# d0 in proper place for dbl prec exp
7779	tst.b		FTEMP_EX(%a0)		# test sign
7780	bpl.b		dst_get_dman		# if positive, go process mantissa
7781	bset		&0x1f,%d0		# if negative, set sign
7782dst_get_dman:
7783	mov.l		FTEMP_HI(%a0),%d1	# get ms mantissa
7784	bfextu		%d1{&1:&20},%d1		# get upper 20 bits of ms
7785	or.l		%d1,%d0			# put these bits in ms word of double
7786	mov.l		%d0,L_SCR1(%a6)		# put the new exp back on the stack
7787	mov.l		FTEMP_HI(%a0),%d1	# get ms mantissa
7788	mov.l		&21,%d0			# load shift count
7789	lsl.l		%d0,%d1			# put lower 11 bits in upper bits
7790	mov.l		%d1,L_SCR2(%a6)		# build lower lword in memory
7791	mov.l		FTEMP_LO(%a0),%d1	# get ls mantissa
7792	bfextu		%d1{&0:&21},%d0		# get ls 21 bits of double
7793	mov.l		L_SCR2(%a6),%d1
7794	or.l		%d0,%d1			# put them in double result
7795	mov.l		L_SCR1(%a6),%d0
7796	rts
7797
7798#########################################################################
7799# XDEF ****************************************************************	#
7800#	dst_sgl(): create single precision value from extended prec	#
7801#									#
7802# XREF ****************************************************************	#
7803#									#
7804# INPUT ***************************************************************	#
7805#	a0 = pointer to source operand in extended precision		#
7806#									#
7807# OUTPUT **************************************************************	#
7808#	d0 = single precision result					#
7809#									#
7810# ALGORITHM ***********************************************************	#
7811#									#
7812# Changes extended precision to single precision.			#
7813#	sgl_sign = ext_sign						#
7814#	sgl_exp = ext_exp - $3fff(ext bias) + $7f(sgl bias)		#
7815#	get rid of ext integer bit					#
7816#	sgl_mant = ext_mant{62:12}					#
7817#									#
7818#		---------------   ---------------    ---------------	#
7819#  extended ->  |s|    exp    |   |1| ms mant   |    | ls mant     |	#
7820#		---------------   ---------------    ---------------	#
7821#		 95	    64    63 62	   40 32      31     12	  0	#
7822#				     |	   |				#
7823#				     |	   |				#
7824#				     |	   |				#
7825#			             v     v				#
7826#			      ---------------				#
7827#  single   ->		      |s|exp| mant  |				#
7828#			      ---------------				#
7829#			      31     22     0				#
7830#									#
7831#########################################################################
7832
7833dst_sgl:
7834	clr.l		%d0
7835	mov.w		FTEMP_EX(%a0),%d0	# get exponent
7836	subi.w		&EXT_BIAS,%d0		# subtract extended precision bias
7837	addi.w		&SGL_BIAS,%d0		# add single precision bias
7838	tst.b		FTEMP_HI(%a0)		# is number a denorm?
7839	bmi.b		dst_get_supper		# no
7840	subq.w		&0x1,%d0		# yes; denorm bias = SGL_BIAS - 1
7841dst_get_supper:
7842	swap		%d0			# put exp in upper word of d0
7843	lsl.l		&0x7,%d0		# shift it into single exp bits
7844	tst.b		FTEMP_EX(%a0)		# test sign
7845	bpl.b		dst_get_sman		# if positive, continue
7846	bset		&0x1f,%d0		# if negative, put in sign first
7847dst_get_sman:
7848	mov.l		FTEMP_HI(%a0),%d1	# get ms mantissa
7849	andi.l		&0x7fffff00,%d1		# get upper 23 bits of ms
7850	lsr.l		&0x8,%d1		# and put them flush right
7851	or.l		%d1,%d0			# put these bits in ms word of single
7852	rts
7853
7854##############################################################################
7855fout_pack:
7856	bsr.l		_calc_ea_fout		# fetch the <ea>
7857	mov.l		%a0,-(%sp)
7858
7859	mov.b		STAG(%a6),%d0		# fetch input type
7860	bne.w		fout_pack_not_norm	# input is not NORM
7861
7862fout_pack_norm:
7863	btst		&0x4,EXC_CMDREG(%a6)	# static or dynamic?
7864	beq.b		fout_pack_s		# static
7865
7866fout_pack_d:
7867	mov.b		1+EXC_CMDREG(%a6),%d1	# fetch dynamic reg
7868	lsr.b		&0x4,%d1
7869	andi.w		&0x7,%d1
7870
7871	bsr.l		fetch_dreg		# fetch Dn w/ k-factor
7872
7873	bra.b		fout_pack_type
7874fout_pack_s:
7875	mov.b		1+EXC_CMDREG(%a6),%d0	# fetch static field
7876
7877fout_pack_type:
7878	bfexts		%d0{&25:&7},%d0		# extract k-factor
7879	mov.l	%d0,-(%sp)
7880
7881	lea		FP_SRC(%a6),%a0		# pass: ptr to input
7882
7883# bindec is currently scrambling FP_SRC for denorm inputs.
7884# we'll have to change this, but for now, tough luck!!!
7885	bsr.l		bindec			# convert xprec to packed
7886
7887#	andi.l		&0xcfff000f,FP_SCR0(%a6) # clear unused fields
7888	andi.l		&0xcffff00f,FP_SCR0(%a6) # clear unused fields
7889
7890	mov.l	(%sp)+,%d0
7891
7892	tst.b		3+FP_SCR0_EX(%a6)
7893	bne.b		fout_pack_set
7894	tst.l		FP_SCR0_HI(%a6)
7895	bne.b		fout_pack_set
7896	tst.l		FP_SCR0_LO(%a6)
7897	bne.b		fout_pack_set
7898
7899# add the extra condition that only if the k-factor was zero, too, should
7900# we zero the exponent
7901	tst.l		%d0
7902	bne.b		fout_pack_set
7903# "mantissa" is all zero which means that the answer is zero. but, the '040
7904# algorithm allows the exponent to be non-zero. the 881/2 do not. Therefore,
7905# if the mantissa is zero, I will zero the exponent, too.
7906# the question now is whether the exponents sign bit is allowed to be non-zero
7907# for a zero, also...
7908	andi.w		&0xf000,FP_SCR0(%a6)
7909
7910fout_pack_set:
7911
7912	lea		FP_SCR0(%a6),%a0	# pass: src addr
7913
7914fout_pack_write:
7915	mov.l		(%sp)+,%a1		# pass: dst addr
7916	mov.l		&0xc,%d0		# pass: opsize is 12 bytes
7917
7918	cmpi.b		SPCOND_FLG(%a6),&mda7_flg
7919	beq.b		fout_pack_a7
7920
7921	bsr.l		_dmem_write		# write ext prec number to memory
7922
7923	tst.l		%d1			# did dstore fail?
7924	bne.w		fout_ext_err		# yes
7925
7926	rts
7927
7928# we don't want to do the write if the exception occurred in supervisor mode
7929# so _mem_write2() handles this for us.
7930fout_pack_a7:
7931	bsr.l		_mem_write2		# write ext prec number to memory
7932
7933	tst.l		%d1			# did dstore fail?
7934	bne.w		fout_ext_err		# yes
7935
7936	rts
7937
7938fout_pack_not_norm:
7939	cmpi.b		%d0,&DENORM		# is it a DENORM?
7940	beq.w		fout_pack_norm		# yes
7941	lea		FP_SRC(%a6),%a0
7942	clr.w		2+FP_SRC_EX(%a6)
7943	cmpi.b		%d0,&SNAN		# is it an SNAN?
7944	beq.b		fout_pack_snan		# yes
7945	bra.b		fout_pack_write		# no
7946
7947fout_pack_snan:
7948	ori.w		&snaniop2_mask,FPSR_EXCEPT(%a6) # set SNAN/AIOP
7949	bset		&0x6,FP_SRC_HI(%a6)	# set snan bit
7950	bra.b		fout_pack_write
7951
7952#########################################################################
7953# XDEF ****************************************************************	#
7954#	fmul(): emulates the fmul instruction				#
7955#	fsmul(): emulates the fsmul instruction				#
7956#	fdmul(): emulates the fdmul instruction				#
7957#									#
7958# XREF ****************************************************************	#
7959#	scale_to_zero_src() - scale src exponent to zero		#
7960#	scale_to_zero_dst() - scale dst exponent to zero		#
7961#	unf_res() - return default underflow result			#
7962#	ovf_res() - return default overflow result			#
7963#	res_qnan() - return QNAN result					#
7964#	res_snan() - return SNAN result					#
7965#									#
7966# INPUT ***************************************************************	#
7967#	a0 = pointer to extended precision source operand		#
7968#	a1 = pointer to extended precision destination operand		#
7969#	d0  rnd prec,mode						#
7970#									#
7971# OUTPUT **************************************************************	#
7972#	fp0 = result							#
7973#	fp1 = EXOP (if exception occurred)				#
7974#									#
7975# ALGORITHM ***********************************************************	#
7976#	Handle NANs, infinities, and zeroes as special cases. Divide	#
7977# norms/denorms into ext/sgl/dbl precision.				#
7978#	For norms/denorms, scale the exponents such that a multiply	#
7979# instruction won't cause an exception. Use the regular fmul to		#
7980# compute a result. Check if the regular operands would have taken	#
7981# an exception. If so, return the default overflow/underflow result	#
7982# and return the EXOP if exceptions are enabled. Else, scale the	#
7983# result operand to the proper exponent.				#
7984#									#
7985#########################################################################
7986
7987	align		0x10
7988tbl_fmul_ovfl:
7989	long		0x3fff - 0x7ffe		# ext_max
7990	long		0x3fff - 0x407e		# sgl_max
7991	long		0x3fff - 0x43fe		# dbl_max
7992tbl_fmul_unfl:
7993	long		0x3fff + 0x0001		# ext_unfl
7994	long		0x3fff - 0x3f80		# sgl_unfl
7995	long		0x3fff - 0x3c00		# dbl_unfl
7996
7997	global		fsmul
7998fsmul:
7999	andi.b		&0x30,%d0		# clear rnd prec
8000	ori.b		&s_mode*0x10,%d0	# insert sgl prec
8001	bra.b		fmul
8002
8003	global		fdmul
8004fdmul:
8005	andi.b		&0x30,%d0
8006	ori.b		&d_mode*0x10,%d0	# insert dbl prec
8007
8008	global		fmul
8009fmul:
8010	mov.l		%d0,L_SCR3(%a6)		# store rnd info
8011
8012	clr.w		%d1
8013	mov.b		DTAG(%a6),%d1
8014	lsl.b		&0x3,%d1
8015	or.b		STAG(%a6),%d1		# combine src tags
8016	bne.w		fmul_not_norm		# optimize on non-norm input
8017
8018fmul_norm:
8019	mov.w		DST_EX(%a1),FP_SCR1_EX(%a6)
8020	mov.l		DST_HI(%a1),FP_SCR1_HI(%a6)
8021	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
8022
8023	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
8024	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
8025	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
8026
8027	bsr.l		scale_to_zero_src	# scale src exponent
8028	mov.l		%d0,-(%sp)		# save scale factor 1
8029
8030	bsr.l		scale_to_zero_dst	# scale dst exponent
8031
8032	add.l		%d0,(%sp)		# SCALE_FACTOR = scale1 + scale2
8033
8034	mov.w		2+L_SCR3(%a6),%d1	# fetch precision
8035	lsr.b		&0x6,%d1		# shift to lo bits
8036	mov.l		(%sp)+,%d0		# load S.F.
8037	cmp.l		%d0,(tbl_fmul_ovfl.w,%pc,%d1.w*4) # would result ovfl?
8038	beq.w		fmul_may_ovfl		# result may rnd to overflow
8039	blt.w		fmul_ovfl		# result will overflow
8040
8041	cmp.l		%d0,(tbl_fmul_unfl.w,%pc,%d1.w*4) # would result unfl?
8042	beq.w		fmul_may_unfl		# result may rnd to no unfl
8043	bgt.w		fmul_unfl		# result will underflow
8044
8045#
8046# NORMAL:
8047# - the result of the multiply operation will neither overflow nor underflow.
8048# - do the multiply to the proper precision and rounding mode.
8049# - scale the result exponent using the scale factor. if both operands were
8050# normalized then we really don't need to go through this scaling. but for now,
8051# this will do.
8052#
8053fmul_normal:
8054	fmovm.x		FP_SCR1(%a6),&0x80	# load dst operand
8055
8056	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
8057	fmov.l		&0x0,%fpsr		# clear FPSR
8058
8059	fmul.x		FP_SCR0(%a6),%fp0	# execute multiply
8060
8061	fmov.l		%fpsr,%d1		# save status
8062	fmov.l		&0x0,%fpcr		# clear FPCR
8063
8064	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
8065
8066fmul_normal_exit:
8067	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
8068	mov.l		%d2,-(%sp)		# save d2
8069	mov.w		FP_SCR0_EX(%a6),%d1	# load {sgn,exp}
8070	mov.l		%d1,%d2			# make a copy
8071	andi.l		&0x7fff,%d1		# strip sign
8072	andi.w		&0x8000,%d2		# keep old sign
8073	sub.l		%d0,%d1			# add scale factor
8074	or.w		%d2,%d1			# concat old sign,new exp
8075	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
8076	mov.l		(%sp)+,%d2		# restore d2
8077	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
8078	rts
8079
8080#
8081# OVERFLOW:
8082# - the result of the multiply operation is an overflow.
8083# - do the multiply to the proper precision and rounding mode in order to
8084# set the inexact bits.
8085# - calculate the default result and return it in fp0.
8086# - if overflow or inexact is enabled, we need a multiply result rounded to
8087# extended precision. if the original operation was extended, then we have this
8088# result. if the original operation was single or double, we have to do another
8089# multiply using extended precision and the correct rounding mode. the result
8090# of this operation then has its exponent scaled by -0x6000 to create the
8091# exceptional operand.
8092#
8093fmul_ovfl:
8094	fmovm.x		FP_SCR1(%a6),&0x80	# load dst operand
8095
8096	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
8097	fmov.l		&0x0,%fpsr		# clear FPSR
8098
8099	fmul.x		FP_SCR0(%a6),%fp0	# execute multiply
8100
8101	fmov.l		%fpsr,%d1		# save status
8102	fmov.l		&0x0,%fpcr		# clear FPCR
8103
8104	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
8105
8106# save setting this until now because this is where fmul_may_ovfl may jump in
8107fmul_ovfl_tst:
8108	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
8109
8110	mov.b		FPCR_ENABLE(%a6),%d1
8111	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
8112	bne.b		fmul_ovfl_ena		# yes
8113
8114# calculate the default result
8115fmul_ovfl_dis:
8116	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
8117	sne		%d1			# set sign param accordingly
8118	mov.l		L_SCR3(%a6),%d0		# pass rnd prec,mode
8119	bsr.l		ovf_res			# calculate default result
8120	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
8121	fmovm.x		(%a0),&0x80		# return default result in fp0
8122	rts
8123
8124#
8125# OVFL is enabled; Create EXOP:
8126# - if precision is extended, then we have the EXOP. simply bias the exponent
8127# with an extra -0x6000. if the precision is single or double, we need to
8128# calculate a result rounded to extended precision.
8129#
8130fmul_ovfl_ena:
8131	mov.l		L_SCR3(%a6),%d1
8132	andi.b		&0xc0,%d1		# test the rnd prec
8133	bne.b		fmul_ovfl_ena_sd	# it's sgl or dbl
8134
8135fmul_ovfl_ena_cont:
8136	fmovm.x		&0x80,FP_SCR0(%a6)	# move result to stack
8137
8138	mov.l		%d2,-(%sp)		# save d2
8139	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
8140	mov.w		%d1,%d2			# make a copy
8141	andi.l		&0x7fff,%d1		# strip sign
8142	sub.l		%d0,%d1			# add scale factor
8143	subi.l		&0x6000,%d1		# subtract bias
8144	andi.w		&0x7fff,%d1		# clear sign bit
8145	andi.w		&0x8000,%d2		# keep old sign
8146	or.w		%d2,%d1			# concat old sign,new exp
8147	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
8148	mov.l		(%sp)+,%d2		# restore d2
8149	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
8150	bra.b		fmul_ovfl_dis
8151
8152fmul_ovfl_ena_sd:
8153	fmovm.x		FP_SCR1(%a6),&0x80	# load dst operand
8154
8155	mov.l		L_SCR3(%a6),%d1
8156	andi.b		&0x30,%d1		# keep rnd mode only
8157	fmov.l		%d1,%fpcr		# set FPCR
8158
8159	fmul.x		FP_SCR0(%a6),%fp0	# execute multiply
8160
8161	fmov.l		&0x0,%fpcr		# clear FPCR
8162	bra.b		fmul_ovfl_ena_cont
8163
8164#
8165# may OVERFLOW:
8166# - the result of the multiply operation MAY overflow.
8167# - do the multiply to the proper precision and rounding mode in order to
8168# set the inexact bits.
8169# - calculate the default result and return it in fp0.
8170#
8171fmul_may_ovfl:
8172	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
8173
8174	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
8175	fmov.l		&0x0,%fpsr		# clear FPSR
8176
8177	fmul.x		FP_SCR0(%a6),%fp0	# execute multiply
8178
8179	fmov.l		%fpsr,%d1		# save status
8180	fmov.l		&0x0,%fpcr		# clear FPCR
8181
8182	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
8183
8184	fabs.x		%fp0,%fp1		# make a copy of result
8185	fcmp.b		%fp1,&0x2		# is |result| >= 2.b?
8186	fbge.w		fmul_ovfl_tst		# yes; overflow has occurred
8187
8188# no, it didn't overflow; we have correct result
8189	bra.w		fmul_normal_exit
8190
8191#
8192# UNDERFLOW:
8193# - the result of the multiply operation is an underflow.
8194# - do the multiply to the proper precision and rounding mode in order to
8195# set the inexact bits.
8196# - calculate the default result and return it in fp0.
8197# - if overflow or inexact is enabled, we need a multiply result rounded to
8198# extended precision. if the original operation was extended, then we have this
8199# result. if the original operation was single or double, we have to do another
8200# multiply using extended precision and the correct rounding mode. the result
8201# of this operation then has its exponent scaled by -0x6000 to create the
8202# exceptional operand.
8203#
8204fmul_unfl:
8205	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
8206
8207# for fun, let's use only extended precision, round to zero. then, let
8208# the unf_res() routine figure out all the rest.
8209# will we get the correct answer.
8210	fmovm.x		FP_SCR1(%a6),&0x80	# load dst operand
8211
8212	fmov.l		&rz_mode*0x10,%fpcr	# set FPCR
8213	fmov.l		&0x0,%fpsr		# clear FPSR
8214
8215	fmul.x		FP_SCR0(%a6),%fp0	# execute multiply
8216
8217	fmov.l		%fpsr,%d1		# save status
8218	fmov.l		&0x0,%fpcr		# clear FPCR
8219
8220	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
8221
8222	mov.b		FPCR_ENABLE(%a6),%d1
8223	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
8224	bne.b		fmul_unfl_ena		# yes
8225
8226fmul_unfl_dis:
8227	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
8228
8229	lea		FP_SCR0(%a6),%a0	# pass: result addr
8230	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
8231	bsr.l		unf_res			# calculate default result
8232	or.b		%d0,FPSR_CC(%a6)	# unf_res2 may have set 'Z'
8233	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
8234	rts
8235
8236#
8237# UNFL is enabled.
8238#
8239fmul_unfl_ena:
8240	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op
8241
8242	mov.l		L_SCR3(%a6),%d1
8243	andi.b		&0xc0,%d1		# is precision extended?
8244	bne.b		fmul_unfl_ena_sd	# no, sgl or dbl
8245
8246# if the rnd mode is anything but RZ, then we have to re-do the above
8247# multiplication because we used RZ for all.
8248	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
8249
8250fmul_unfl_ena_cont:
8251	fmov.l		&0x0,%fpsr		# clear FPSR
8252
8253	fmul.x		FP_SCR0(%a6),%fp1	# execute multiply
8254
8255	fmov.l		&0x0,%fpcr		# clear FPCR
8256
8257	fmovm.x		&0x40,FP_SCR0(%a6)	# save result to stack
8258	mov.l		%d2,-(%sp)		# save d2
8259	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
8260	mov.l		%d1,%d2			# make a copy
8261	andi.l		&0x7fff,%d1		# strip sign
8262	andi.w		&0x8000,%d2		# keep old sign
8263	sub.l		%d0,%d1			# add scale factor
8264	addi.l		&0x6000,%d1		# add bias
8265	andi.w		&0x7fff,%d1
8266	or.w		%d2,%d1			# concat old sign,new exp
8267	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
8268	mov.l		(%sp)+,%d2		# restore d2
8269	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
8270	bra.w		fmul_unfl_dis
8271
8272fmul_unfl_ena_sd:
8273	mov.l		L_SCR3(%a6),%d1
8274	andi.b		&0x30,%d1		# use only rnd mode
8275	fmov.l		%d1,%fpcr		# set FPCR
8276
8277	bra.b		fmul_unfl_ena_cont
8278
8279# MAY UNDERFLOW:
8280# -use the correct rounding mode and precision. this code favors operations
8281# that do not underflow.
8282fmul_may_unfl:
8283	fmovm.x		FP_SCR1(%a6),&0x80	# load dst operand
8284
8285	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
8286	fmov.l		&0x0,%fpsr		# clear FPSR
8287
8288	fmul.x		FP_SCR0(%a6),%fp0	# execute multiply
8289
8290	fmov.l		%fpsr,%d1		# save status
8291	fmov.l		&0x0,%fpcr		# clear FPCR
8292
8293	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
8294
8295	fabs.x		%fp0,%fp1		# make a copy of result
8296	fcmp.b		%fp1,&0x2		# is |result| > 2.b?
8297	fbgt.w		fmul_normal_exit	# no; no underflow occurred
8298	fblt.w		fmul_unfl		# yes; underflow occurred
8299
8300#
8301# we still don't know if underflow occurred. result is ~ equal to 2. but,
8302# we don't know if the result was an underflow that rounded up to a 2 or
8303# a normalized number that rounded down to a 2. so, redo the entire operation
8304# using RZ as the rounding mode to see what the pre-rounded result is.
8305# this case should be relatively rare.
8306#
8307	fmovm.x		FP_SCR1(%a6),&0x40	# load dst operand
8308
8309	mov.l		L_SCR3(%a6),%d1
8310	andi.b		&0xc0,%d1		# keep rnd prec
8311	ori.b		&rz_mode*0x10,%d1	# insert RZ
8312
8313	fmov.l		%d1,%fpcr		# set FPCR
8314	fmov.l		&0x0,%fpsr		# clear FPSR
8315
8316	fmul.x		FP_SCR0(%a6),%fp1	# execute multiply
8317
8318	fmov.l		&0x0,%fpcr		# clear FPCR
8319	fabs.x		%fp1			# make absolute value
8320	fcmp.b		%fp1,&0x2		# is |result| < 2.b?
8321	fbge.w		fmul_normal_exit	# no; no underflow occurred
8322	bra.w		fmul_unfl		# yes, underflow occurred
8323
8324################################################################################
8325
8326#
8327# Multiply: inputs are not both normalized; what are they?
8328#
8329fmul_not_norm:
8330	mov.w		(tbl_fmul_op.b,%pc,%d1.w*2),%d1
8331	jmp		(tbl_fmul_op.b,%pc,%d1.w)
8332
8333	swbeg		&48
8334tbl_fmul_op:
8335	short		fmul_norm	- tbl_fmul_op # NORM x NORM
8336	short		fmul_zero	- tbl_fmul_op # NORM x ZERO
8337	short		fmul_inf_src	- tbl_fmul_op # NORM x INF
8338	short		fmul_res_qnan	- tbl_fmul_op # NORM x QNAN
8339	short		fmul_norm	- tbl_fmul_op # NORM x DENORM
8340	short		fmul_res_snan	- tbl_fmul_op # NORM x SNAN
8341	short		tbl_fmul_op	- tbl_fmul_op #
8342	short		tbl_fmul_op	- tbl_fmul_op #
8343
8344	short		fmul_zero	- tbl_fmul_op # ZERO x NORM
8345	short		fmul_zero	- tbl_fmul_op # ZERO x ZERO
8346	short		fmul_res_operr	- tbl_fmul_op # ZERO x INF
8347	short		fmul_res_qnan	- tbl_fmul_op # ZERO x QNAN
8348	short		fmul_zero	- tbl_fmul_op # ZERO x DENORM
8349	short		fmul_res_snan	- tbl_fmul_op # ZERO x SNAN
8350	short		tbl_fmul_op	- tbl_fmul_op #
8351	short		tbl_fmul_op	- tbl_fmul_op #
8352
8353	short		fmul_inf_dst	- tbl_fmul_op # INF x NORM
8354	short		fmul_res_operr	- tbl_fmul_op # INF x ZERO
8355	short		fmul_inf_dst	- tbl_fmul_op # INF x INF
8356	short		fmul_res_qnan	- tbl_fmul_op # INF x QNAN
8357	short		fmul_inf_dst	- tbl_fmul_op # INF x DENORM
8358	short		fmul_res_snan	- tbl_fmul_op # INF x SNAN
8359	short		tbl_fmul_op	- tbl_fmul_op #
8360	short		tbl_fmul_op	- tbl_fmul_op #
8361
8362	short		fmul_res_qnan	- tbl_fmul_op # QNAN x NORM
8363	short		fmul_res_qnan	- tbl_fmul_op # QNAN x ZERO
8364	short		fmul_res_qnan	- tbl_fmul_op # QNAN x INF
8365	short		fmul_res_qnan	- tbl_fmul_op # QNAN x QNAN
8366	short		fmul_res_qnan	- tbl_fmul_op # QNAN x DENORM
8367	short		fmul_res_snan	- tbl_fmul_op # QNAN x SNAN
8368	short		tbl_fmul_op	- tbl_fmul_op #
8369	short		tbl_fmul_op	- tbl_fmul_op #
8370
8371	short		fmul_norm	- tbl_fmul_op # NORM x NORM
8372	short		fmul_zero	- tbl_fmul_op # NORM x ZERO
8373	short		fmul_inf_src	- tbl_fmul_op # NORM x INF
8374	short		fmul_res_qnan	- tbl_fmul_op # NORM x QNAN
8375	short		fmul_norm	- tbl_fmul_op # NORM x DENORM
8376	short		fmul_res_snan	- tbl_fmul_op # NORM x SNAN
8377	short		tbl_fmul_op	- tbl_fmul_op #
8378	short		tbl_fmul_op	- tbl_fmul_op #
8379
8380	short		fmul_res_snan	- tbl_fmul_op # SNAN x NORM
8381	short		fmul_res_snan	- tbl_fmul_op # SNAN x ZERO
8382	short		fmul_res_snan	- tbl_fmul_op # SNAN x INF
8383	short		fmul_res_snan	- tbl_fmul_op # SNAN x QNAN
8384	short		fmul_res_snan	- tbl_fmul_op # SNAN x DENORM
8385	short		fmul_res_snan	- tbl_fmul_op # SNAN x SNAN
8386	short		tbl_fmul_op	- tbl_fmul_op #
8387	short		tbl_fmul_op	- tbl_fmul_op #
8388
8389fmul_res_operr:
8390	bra.l		res_operr
8391fmul_res_snan:
8392	bra.l		res_snan
8393fmul_res_qnan:
8394	bra.l		res_qnan
8395
8396#
8397# Multiply: (Zero x Zero) || (Zero x norm) || (Zero x denorm)
8398#
8399	global		fmul_zero		# global for fsglmul
8400fmul_zero:
8401	mov.b		SRC_EX(%a0),%d0		# exclusive or the signs
8402	mov.b		DST_EX(%a1),%d1
8403	eor.b		%d0,%d1
8404	bpl.b		fmul_zero_p		# result ZERO is pos.
8405fmul_zero_n:
8406	fmov.s		&0x80000000,%fp0	# load -ZERO
8407	mov.b		&z_bmask+neg_bmask,FPSR_CC(%a6) # set Z/N
8408	rts
8409fmul_zero_p:
8410	fmov.s		&0x00000000,%fp0	# load +ZERO
8411	mov.b		&z_bmask,FPSR_CC(%a6)	# set Z
8412	rts
8413
8414#
8415# Multiply: (inf x inf) || (inf x norm) || (inf x denorm)
8416#
8417# Note: The j-bit for an infinity is a don't-care. However, to be
8418# strictly compatible w/ the 68881/882, we make sure to return an
8419# INF w/ the j-bit set if the input INF j-bit was set. Destination
8420# INFs take priority.
8421#
8422	global		fmul_inf_dst		# global for fsglmul
8423fmul_inf_dst:
8424	fmovm.x		DST(%a1),&0x80		# return INF result in fp0
8425	mov.b		SRC_EX(%a0),%d0		# exclusive or the signs
8426	mov.b		DST_EX(%a1),%d1
8427	eor.b		%d0,%d1
8428	bpl.b		fmul_inf_dst_p		# result INF is pos.
8429fmul_inf_dst_n:
8430	fabs.x		%fp0			# clear result sign
8431	fneg.x		%fp0			# set result sign
8432	mov.b		&inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/N
8433	rts
8434fmul_inf_dst_p:
8435	fabs.x		%fp0			# clear result sign
8436	mov.b		&inf_bmask,FPSR_CC(%a6)	# set INF
8437	rts
8438
8439	global		fmul_inf_src		# global for fsglmul
8440fmul_inf_src:
8441	fmovm.x		SRC(%a0),&0x80		# return INF result in fp0
8442	mov.b		SRC_EX(%a0),%d0		# exclusive or the signs
8443	mov.b		DST_EX(%a1),%d1
8444	eor.b		%d0,%d1
8445	bpl.b		fmul_inf_dst_p		# result INF is pos.
8446	bra.b		fmul_inf_dst_n
8447
8448#########################################################################
8449# XDEF ****************************************************************	#
8450#	fin(): emulates the fmove instruction				#
8451#	fsin(): emulates the fsmove instruction				#
8452#	fdin(): emulates the fdmove instruction				#
8453#									#
8454# XREF ****************************************************************	#
8455#	norm() - normalize mantissa for EXOP on denorm			#
8456#	scale_to_zero_src() - scale src exponent to zero		#
8457#	ovf_res() - return default overflow result			#
8458#	unf_res() - return default underflow result			#
8459#	res_qnan_1op() - return QNAN result				#
8460#	res_snan_1op() - return SNAN result				#
8461#									#
8462# INPUT ***************************************************************	#
8463#	a0 = pointer to extended precision source operand		#
8464#	d0 = round prec/mode						#
8465#									#
8466# OUTPUT **************************************************************	#
8467#	fp0 = result							#
8468#	fp1 = EXOP (if exception occurred)				#
8469#									#
8470# ALGORITHM ***********************************************************	#
8471#	Handle NANs, infinities, and zeroes as special cases. Divide	#
8472# norms into extended, single, and double precision.			#
8473#	Norms can be emulated w/ a regular fmove instruction. For	#
8474# sgl/dbl, must scale exponent and perform an "fmove". Check to see	#
8475# if the result would have overflowed/underflowed. If so, use unf_res()	#
8476# or ovf_res() to return the default result. Also return EXOP if	#
8477# exception is enabled. If no exception, return the default result.	#
8478#	Unnorms don't pass through here.				#
8479#									#
8480#########################################################################
8481
8482	global		fsin
8483fsin:
8484	andi.b		&0x30,%d0		# clear rnd prec
8485	ori.b		&s_mode*0x10,%d0	# insert sgl precision
8486	bra.b		fin
8487
8488	global		fdin
8489fdin:
8490	andi.b		&0x30,%d0		# clear rnd prec
8491	ori.b		&d_mode*0x10,%d0	# insert dbl precision
8492
8493	global		fin
8494fin:
8495	mov.l		%d0,L_SCR3(%a6)		# store rnd info
8496
8497	mov.b		STAG(%a6),%d1		# fetch src optype tag
8498	bne.w		fin_not_norm		# optimize on non-norm input
8499
8500#
8501# FP MOVE IN: NORMs and DENORMs ONLY!
8502#
8503fin_norm:
8504	andi.b		&0xc0,%d0		# is precision extended?
8505	bne.w		fin_not_ext		# no, so go handle dbl or sgl
8506
8507#
8508# precision selected is extended. so...we cannot get an underflow
8509# or overflow because of rounding to the correct precision. so...
8510# skip the scaling and unscaling...
8511#
8512	tst.b		SRC_EX(%a0)		# is the operand negative?
8513	bpl.b		fin_norm_done		# no
8514	bset		&neg_bit,FPSR_CC(%a6)	# yes, so set 'N' ccode bit
8515fin_norm_done:
8516	fmovm.x		SRC(%a0),&0x80		# return result in fp0
8517	rts
8518
8519#
8520# for an extended precision DENORM, the UNFL exception bit is set
8521# the accrued bit is NOT set in this instance(no inexactness!)
8522#
8523fin_denorm:
8524	andi.b		&0xc0,%d0		# is precision extended?
8525	bne.w		fin_not_ext		# no, so go handle dbl or sgl
8526
8527	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
8528	tst.b		SRC_EX(%a0)		# is the operand negative?
8529	bpl.b		fin_denorm_done		# no
8530	bset		&neg_bit,FPSR_CC(%a6)	# yes, so set 'N' ccode bit
8531fin_denorm_done:
8532	fmovm.x		SRC(%a0),&0x80		# return result in fp0
8533	btst		&unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled?
8534	bne.b		fin_denorm_unfl_ena	# yes
8535	rts
8536
8537#
8538# the input is an extended DENORM and underflow is enabled in the FPCR.
8539# normalize the mantissa and add the bias of 0x6000 to the resulting negative
8540# exponent and insert back into the operand.
8541#
8542fin_denorm_unfl_ena:
8543	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
8544	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
8545	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
8546	lea		FP_SCR0(%a6),%a0	# pass: ptr to operand
8547	bsr.l		norm			# normalize result
8548	neg.w		%d0			# new exponent = -(shft val)
8549	addi.w		&0x6000,%d0		# add new bias to exponent
8550	mov.w		FP_SCR0_EX(%a6),%d1	# fetch old sign,exp
8551	andi.w		&0x8000,%d1		# keep old sign
8552	andi.w		&0x7fff,%d0		# clear sign position
8553	or.w		%d1,%d0			# concat new exo,old sign
8554	mov.w		%d0,FP_SCR0_EX(%a6)	# insert new exponent
8555	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
8556	rts
8557
8558#
8559# operand is to be rounded to single or double precision
8560#
8561fin_not_ext:
8562	cmpi.b		%d0,&s_mode*0x10	# separate sgl/dbl prec
8563	bne.b		fin_dbl
8564
8565#
8566# operand is to be rounded to single precision
8567#
8568fin_sgl:
8569	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
8570	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
8571	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
8572	bsr.l		scale_to_zero_src	# calculate scale factor
8573
8574	cmpi.l		%d0,&0x3fff-0x3f80	# will move in underflow?
8575	bge.w		fin_sd_unfl		# yes; go handle underflow
8576	cmpi.l		%d0,&0x3fff-0x407e	# will move in overflow?
8577	beq.w		fin_sd_may_ovfl		# maybe; go check
8578	blt.w		fin_sd_ovfl		# yes; go handle overflow
8579
8580#
8581# operand will NOT overflow or underflow when moved into the fp reg file
8582#
8583fin_sd_normal:
8584	fmov.l		&0x0,%fpsr		# clear FPSR
8585	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
8586
8587	fmov.x		FP_SCR0(%a6),%fp0	# perform move
8588
8589	fmov.l		%fpsr,%d1		# save FPSR
8590	fmov.l		&0x0,%fpcr		# clear FPCR
8591
8592	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
8593
8594fin_sd_normal_exit:
8595	mov.l		%d2,-(%sp)		# save d2
8596	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
8597	mov.w		FP_SCR0_EX(%a6),%d1	# load {sgn,exp}
8598	mov.w		%d1,%d2			# make a copy
8599	andi.l		&0x7fff,%d1		# strip sign
8600	sub.l		%d0,%d1			# add scale factor
8601	andi.w		&0x8000,%d2		# keep old sign
8602	or.w		%d1,%d2			# concat old sign,new exponent
8603	mov.w		%d2,FP_SCR0_EX(%a6)	# insert new exponent
8604	mov.l		(%sp)+,%d2		# restore d2
8605	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
8606	rts
8607
8608#
8609# operand is to be rounded to double precision
8610#
8611fin_dbl:
8612	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
8613	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
8614	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
8615	bsr.l		scale_to_zero_src	# calculate scale factor
8616
8617	cmpi.l		%d0,&0x3fff-0x3c00	# will move in underflow?
8618	bge.w		fin_sd_unfl		# yes; go handle underflow
8619	cmpi.l		%d0,&0x3fff-0x43fe	# will move in overflow?
8620	beq.w		fin_sd_may_ovfl		# maybe; go check
8621	blt.w		fin_sd_ovfl		# yes; go handle overflow
8622	bra.w		fin_sd_normal		# no; ho handle normalized op
8623
8624#
8625# operand WILL underflow when moved in to the fp register file
8626#
8627fin_sd_unfl:
8628	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
8629
8630	tst.b		FP_SCR0_EX(%a6)		# is operand negative?
8631	bpl.b		fin_sd_unfl_tst
8632	bset		&neg_bit,FPSR_CC(%a6)	# set 'N' ccode bit
8633
8634# if underflow or inexact is enabled, then go calculate the EXOP first.
8635fin_sd_unfl_tst:
8636	mov.b		FPCR_ENABLE(%a6),%d1
8637	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
8638	bne.b		fin_sd_unfl_ena		# yes
8639
8640fin_sd_unfl_dis:
8641	lea		FP_SCR0(%a6),%a0	# pass: result addr
8642	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
8643	bsr.l		unf_res			# calculate default result
8644	or.b		%d0,FPSR_CC(%a6)	# unf_res may have set 'Z'
8645	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
8646	rts
8647
8648#
8649# operand will underflow AND underflow or inexact is enabled.
8650# Therefore, we must return the result rounded to extended precision.
8651#
8652fin_sd_unfl_ena:
8653	mov.l		FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
8654	mov.l		FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
8655	mov.w		FP_SCR0_EX(%a6),%d1	# load current exponent
8656
8657	mov.l		%d2,-(%sp)		# save d2
8658	mov.w		%d1,%d2			# make a copy
8659	andi.l		&0x7fff,%d1		# strip sign
8660	sub.l		%d0,%d1			# subtract scale factor
8661	andi.w		&0x8000,%d2		# extract old sign
8662	addi.l		&0x6000,%d1		# add new bias
8663	andi.w		&0x7fff,%d1
8664	or.w		%d1,%d2			# concat old sign,new exp
8665	mov.w		%d2,FP_SCR1_EX(%a6)	# insert new exponent
8666	fmovm.x		FP_SCR1(%a6),&0x40	# return EXOP in fp1
8667	mov.l		(%sp)+,%d2		# restore d2
8668	bra.b		fin_sd_unfl_dis
8669
8670#
8671# operand WILL overflow.
8672#
8673fin_sd_ovfl:
8674	fmov.l		&0x0,%fpsr		# clear FPSR
8675	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
8676
8677	fmov.x		FP_SCR0(%a6),%fp0	# perform move
8678
8679	fmov.l		&0x0,%fpcr		# clear FPCR
8680	fmov.l		%fpsr,%d1		# save FPSR
8681
8682	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
8683
8684fin_sd_ovfl_tst:
8685	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
8686
8687	mov.b		FPCR_ENABLE(%a6),%d1
8688	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
8689	bne.b		fin_sd_ovfl_ena		# yes
8690
8691#
8692# OVFL is not enabled; therefore, we must create the default result by
8693# calling ovf_res().
8694#
8695fin_sd_ovfl_dis:
8696	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
8697	sne		%d1			# set sign param accordingly
8698	mov.l		L_SCR3(%a6),%d0		# pass: prec,mode
8699	bsr.l		ovf_res			# calculate default result
8700	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
8701	fmovm.x		(%a0),&0x80		# return default result in fp0
8702	rts
8703
8704#
8705# OVFL is enabled.
8706# the INEX2 bit has already been updated by the round to the correct precision.
8707# now, round to extended(and don't alter the FPSR).
8708#
8709fin_sd_ovfl_ena:
8710	mov.l		%d2,-(%sp)		# save d2
8711	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
8712	mov.l		%d1,%d2			# make a copy
8713	andi.l		&0x7fff,%d1		# strip sign
8714	andi.w		&0x8000,%d2		# keep old sign
8715	sub.l		%d0,%d1			# add scale factor
8716	sub.l		&0x6000,%d1		# subtract bias
8717	andi.w		&0x7fff,%d1
8718	or.w		%d2,%d1
8719	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
8720	mov.l		(%sp)+,%d2		# restore d2
8721	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
8722	bra.b		fin_sd_ovfl_dis
8723
8724#
8725# the move in MAY overflow. so...
8726#
8727fin_sd_may_ovfl:
8728	fmov.l		&0x0,%fpsr		# clear FPSR
8729	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
8730
8731	fmov.x		FP_SCR0(%a6),%fp0	# perform the move
8732
8733	fmov.l		%fpsr,%d1		# save status
8734	fmov.l		&0x0,%fpcr		# clear FPCR
8735
8736	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
8737
8738	fabs.x		%fp0,%fp1		# make a copy of result
8739	fcmp.b		%fp1,&0x2		# is |result| >= 2.b?
8740	fbge.w		fin_sd_ovfl_tst		# yes; overflow has occurred
8741
8742# no, it didn't overflow; we have correct result
8743	bra.w		fin_sd_normal_exit
8744
8745##########################################################################
8746
8747#
8748# operand is not a NORM: check its optype and branch accordingly
8749#
8750fin_not_norm:
8751	cmpi.b		%d1,&DENORM		# weed out DENORM
8752	beq.w		fin_denorm
8753	cmpi.b		%d1,&SNAN		# weed out SNANs
8754	beq.l		res_snan_1op
8755	cmpi.b		%d1,&QNAN		# weed out QNANs
8756	beq.l		res_qnan_1op
8757
8758#
8759# do the fmove in; at this point, only possible ops are ZERO and INF.
8760# use fmov to determine ccodes.
8761# prec:mode should be zero at this point but it won't affect answer anyways.
8762#
8763	fmov.x		SRC(%a0),%fp0		# do fmove in
8764	fmov.l		%fpsr,%d0		# no exceptions possible
8765	rol.l		&0x8,%d0		# put ccodes in lo byte
8766	mov.b		%d0,FPSR_CC(%a6)	# insert correct ccodes
8767	rts
8768
8769#########################################################################
8770# XDEF ****************************************************************	#
8771#	fdiv(): emulates the fdiv instruction				#
8772#	fsdiv(): emulates the fsdiv instruction				#
8773#	fddiv(): emulates the fddiv instruction				#
8774#									#
8775# XREF ****************************************************************	#
8776#	scale_to_zero_src() - scale src exponent to zero		#
8777#	scale_to_zero_dst() - scale dst exponent to zero		#
8778#	unf_res() - return default underflow result			#
8779#	ovf_res() - return default overflow result			#
8780#	res_qnan() - return QNAN result					#
8781#	res_snan() - return SNAN result					#
8782#									#
8783# INPUT ***************************************************************	#
8784#	a0 = pointer to extended precision source operand		#
8785#	a1 = pointer to extended precision destination operand		#
8786#	d0  rnd prec,mode						#
8787#									#
8788# OUTPUT **************************************************************	#
8789#	fp0 = result							#
8790#	fp1 = EXOP (if exception occurred)				#
8791#									#
8792# ALGORITHM ***********************************************************	#
8793#	Handle NANs, infinities, and zeroes as special cases. Divide	#
8794# norms/denorms into ext/sgl/dbl precision.				#
8795#	For norms/denorms, scale the exponents such that a divide	#
8796# instruction won't cause an exception. Use the regular fdiv to		#
8797# compute a result. Check if the regular operands would have taken	#
8798# an exception. If so, return the default overflow/underflow result	#
8799# and return the EXOP if exceptions are enabled. Else, scale the	#
8800# result operand to the proper exponent.				#
8801#									#
8802#########################################################################
8803
8804	align		0x10
8805tbl_fdiv_unfl:
8806	long		0x3fff - 0x0000		# ext_unfl
8807	long		0x3fff - 0x3f81		# sgl_unfl
8808	long		0x3fff - 0x3c01		# dbl_unfl
8809
8810tbl_fdiv_ovfl:
8811	long		0x3fff - 0x7ffe		# ext overflow exponent
8812	long		0x3fff - 0x407e		# sgl overflow exponent
8813	long		0x3fff - 0x43fe		# dbl overflow exponent
8814
8815	global		fsdiv
8816fsdiv:
8817	andi.b		&0x30,%d0		# clear rnd prec
8818	ori.b		&s_mode*0x10,%d0	# insert sgl prec
8819	bra.b		fdiv
8820
8821	global		fddiv
8822fddiv:
8823	andi.b		&0x30,%d0		# clear rnd prec
8824	ori.b		&d_mode*0x10,%d0	# insert dbl prec
8825
8826	global		fdiv
8827fdiv:
8828	mov.l		%d0,L_SCR3(%a6)		# store rnd info
8829
8830	clr.w		%d1
8831	mov.b		DTAG(%a6),%d1
8832	lsl.b		&0x3,%d1
8833	or.b		STAG(%a6),%d1		# combine src tags
8834
8835	bne.w		fdiv_not_norm		# optimize on non-norm input
8836
8837#
8838# DIVIDE: NORMs and DENORMs ONLY!
8839#
8840fdiv_norm:
8841	mov.w		DST_EX(%a1),FP_SCR1_EX(%a6)
8842	mov.l		DST_HI(%a1),FP_SCR1_HI(%a6)
8843	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
8844
8845	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
8846	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
8847	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
8848
8849	bsr.l		scale_to_zero_src	# scale src exponent
8850	mov.l		%d0,-(%sp)		# save scale factor 1
8851
8852	bsr.l		scale_to_zero_dst	# scale dst exponent
8853
8854	neg.l		(%sp)			# SCALE FACTOR = scale1 - scale2
8855	add.l		%d0,(%sp)
8856
8857	mov.w		2+L_SCR3(%a6),%d1	# fetch precision
8858	lsr.b		&0x6,%d1		# shift to lo bits
8859	mov.l		(%sp)+,%d0		# load S.F.
8860	cmp.l		%d0,(tbl_fdiv_ovfl.b,%pc,%d1.w*4) # will result overflow?
8861	ble.w		fdiv_may_ovfl		# result will overflow
8862
8863	cmp.l		%d0,(tbl_fdiv_unfl.w,%pc,%d1.w*4) # will result underflow?
8864	beq.w		fdiv_may_unfl		# maybe
8865	bgt.w		fdiv_unfl		# yes; go handle underflow
8866
8867fdiv_normal:
8868	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
8869
8870	fmov.l		L_SCR3(%a6),%fpcr	# save FPCR
8871	fmov.l		&0x0,%fpsr		# clear FPSR
8872
8873	fdiv.x		FP_SCR0(%a6),%fp0	# perform divide
8874
8875	fmov.l		%fpsr,%d1		# save FPSR
8876	fmov.l		&0x0,%fpcr		# clear FPCR
8877
8878	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
8879
8880fdiv_normal_exit:
8881	fmovm.x		&0x80,FP_SCR0(%a6)	# store result on stack
8882	mov.l		%d2,-(%sp)		# store d2
8883	mov.w		FP_SCR0_EX(%a6),%d1	# load {sgn,exp}
8884	mov.l		%d1,%d2			# make a copy
8885	andi.l		&0x7fff,%d1		# strip sign
8886	andi.w		&0x8000,%d2		# keep old sign
8887	sub.l		%d0,%d1			# add scale factor
8888	or.w		%d2,%d1			# concat old sign,new exp
8889	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
8890	mov.l		(%sp)+,%d2		# restore d2
8891	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
8892	rts
8893
8894tbl_fdiv_ovfl2:
8895	long		0x7fff
8896	long		0x407f
8897	long		0x43ff
8898
8899fdiv_no_ovfl:
8900	mov.l		(%sp)+,%d0		# restore scale factor
8901	bra.b		fdiv_normal_exit
8902
8903fdiv_may_ovfl:
8904	mov.l		%d0,-(%sp)		# save scale factor
8905
8906	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
8907
8908	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
8909	fmov.l		&0x0,%fpsr		# set FPSR
8910
8911	fdiv.x		FP_SCR0(%a6),%fp0	# execute divide
8912
8913	fmov.l		%fpsr,%d0
8914	fmov.l		&0x0,%fpcr
8915
8916	or.l		%d0,USER_FPSR(%a6)	# save INEX,N
8917
8918	fmovm.x		&0x01,-(%sp)		# save result to stack
8919	mov.w		(%sp),%d0		# fetch new exponent
8920	add.l		&0xc,%sp		# clear result from stack
8921	andi.l		&0x7fff,%d0		# strip sign
8922	sub.l		(%sp),%d0		# add scale factor
8923	cmp.l		%d0,(tbl_fdiv_ovfl2.b,%pc,%d1.w*4)
8924	blt.b		fdiv_no_ovfl
8925	mov.l		(%sp)+,%d0
8926
8927fdiv_ovfl_tst:
8928	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
8929
8930	mov.b		FPCR_ENABLE(%a6),%d1
8931	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
8932	bne.b		fdiv_ovfl_ena		# yes
8933
8934fdiv_ovfl_dis:
8935	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
8936	sne		%d1			# set sign param accordingly
8937	mov.l		L_SCR3(%a6),%d0		# pass prec:rnd
8938	bsr.l		ovf_res			# calculate default result
8939	or.b		%d0,FPSR_CC(%a6)	# set INF if applicable
8940	fmovm.x		(%a0),&0x80		# return default result in fp0
8941	rts
8942
8943fdiv_ovfl_ena:
8944	mov.l		L_SCR3(%a6),%d1
8945	andi.b		&0xc0,%d1		# is precision extended?
8946	bne.b		fdiv_ovfl_ena_sd	# no, do sgl or dbl
8947
8948fdiv_ovfl_ena_cont:
8949	fmovm.x		&0x80,FP_SCR0(%a6)	# move result to stack
8950
8951	mov.l		%d2,-(%sp)		# save d2
8952	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
8953	mov.w		%d1,%d2			# make a copy
8954	andi.l		&0x7fff,%d1		# strip sign
8955	sub.l		%d0,%d1			# add scale factor
8956	subi.l		&0x6000,%d1		# subtract bias
8957	andi.w		&0x7fff,%d1		# clear sign bit
8958	andi.w		&0x8000,%d2		# keep old sign
8959	or.w		%d2,%d1			# concat old sign,new exp
8960	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
8961	mov.l		(%sp)+,%d2		# restore d2
8962	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
8963	bra.b		fdiv_ovfl_dis
8964
8965fdiv_ovfl_ena_sd:
8966	fmovm.x		FP_SCR1(%a6),&0x80	# load dst operand
8967
8968	mov.l		L_SCR3(%a6),%d1
8969	andi.b		&0x30,%d1		# keep rnd mode
8970	fmov.l		%d1,%fpcr		# set FPCR
8971
8972	fdiv.x		FP_SCR0(%a6),%fp0	# execute divide
8973
8974	fmov.l		&0x0,%fpcr		# clear FPCR
8975	bra.b		fdiv_ovfl_ena_cont
8976
8977fdiv_unfl:
8978	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
8979
8980	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
8981
8982	fmov.l		&rz_mode*0x10,%fpcr	# set FPCR
8983	fmov.l		&0x0,%fpsr		# clear FPSR
8984
8985	fdiv.x		FP_SCR0(%a6),%fp0	# execute divide
8986
8987	fmov.l		%fpsr,%d1		# save status
8988	fmov.l		&0x0,%fpcr		# clear FPCR
8989
8990	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
8991
8992	mov.b		FPCR_ENABLE(%a6),%d1
8993	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
8994	bne.b		fdiv_unfl_ena		# yes
8995
8996fdiv_unfl_dis:
8997	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
8998
8999	lea		FP_SCR0(%a6),%a0	# pass: result addr
9000	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
9001	bsr.l		unf_res			# calculate default result
9002	or.b		%d0,FPSR_CC(%a6)	# 'Z' may have been set
9003	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
9004	rts
9005
9006#
9007# UNFL is enabled.
9008#
9009fdiv_unfl_ena:
9010	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op
9011
9012	mov.l		L_SCR3(%a6),%d1
9013	andi.b		&0xc0,%d1		# is precision extended?
9014	bne.b		fdiv_unfl_ena_sd	# no, sgl or dbl
9015
9016	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
9017
9018fdiv_unfl_ena_cont:
9019	fmov.l		&0x0,%fpsr		# clear FPSR
9020
9021	fdiv.x		FP_SCR0(%a6),%fp1	# execute divide
9022
9023	fmov.l		&0x0,%fpcr		# clear FPCR
9024
9025	fmovm.x		&0x40,FP_SCR0(%a6)	# save result to stack
9026	mov.l		%d2,-(%sp)		# save d2
9027	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
9028	mov.l		%d1,%d2			# make a copy
9029	andi.l		&0x7fff,%d1		# strip sign
9030	andi.w		&0x8000,%d2		# keep old sign
9031	sub.l		%d0,%d1			# add scale factoer
9032	addi.l		&0x6000,%d1		# add bias
9033	andi.w		&0x7fff,%d1
9034	or.w		%d2,%d1			# concat old sign,new exp
9035	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exp
9036	mov.l		(%sp)+,%d2		# restore d2
9037	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
9038	bra.w		fdiv_unfl_dis
9039
9040fdiv_unfl_ena_sd:
9041	mov.l		L_SCR3(%a6),%d1
9042	andi.b		&0x30,%d1		# use only rnd mode
9043	fmov.l		%d1,%fpcr		# set FPCR
9044
9045	bra.b		fdiv_unfl_ena_cont
9046
9047#
9048# the divide operation MAY underflow:
9049#
9050fdiv_may_unfl:
9051	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
9052
9053	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
9054	fmov.l		&0x0,%fpsr		# clear FPSR
9055
9056	fdiv.x		FP_SCR0(%a6),%fp0	# execute divide
9057
9058	fmov.l		%fpsr,%d1		# save status
9059	fmov.l		&0x0,%fpcr		# clear FPCR
9060
9061	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
9062
9063	fabs.x		%fp0,%fp1		# make a copy of result
9064	fcmp.b		%fp1,&0x1		# is |result| > 1.b?
9065	fbgt.w		fdiv_normal_exit	# no; no underflow occurred
9066	fblt.w		fdiv_unfl		# yes; underflow occurred
9067
9068#
9069# we still don't know if underflow occurred. result is ~ equal to 1. but,
9070# we don't know if the result was an underflow that rounded up to a 1
9071# or a normalized number that rounded down to a 1. so, redo the entire
9072# operation using RZ as the rounding mode to see what the pre-rounded
9073# result is. this case should be relatively rare.
9074#
9075	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op into fp1
9076
9077	mov.l		L_SCR3(%a6),%d1
9078	andi.b		&0xc0,%d1		# keep rnd prec
9079	ori.b		&rz_mode*0x10,%d1	# insert RZ
9080
9081	fmov.l		%d1,%fpcr		# set FPCR
9082	fmov.l		&0x0,%fpsr		# clear FPSR
9083
9084	fdiv.x		FP_SCR0(%a6),%fp1	# execute divide
9085
9086	fmov.l		&0x0,%fpcr		# clear FPCR
9087	fabs.x		%fp1			# make absolute value
9088	fcmp.b		%fp1,&0x1		# is |result| < 1.b?
9089	fbge.w		fdiv_normal_exit	# no; no underflow occurred
9090	bra.w		fdiv_unfl		# yes; underflow occurred
9091
9092############################################################################
9093
9094#
9095# Divide: inputs are not both normalized; what are they?
9096#
9097fdiv_not_norm:
9098	mov.w		(tbl_fdiv_op.b,%pc,%d1.w*2),%d1
9099	jmp		(tbl_fdiv_op.b,%pc,%d1.w*1)
9100
9101	swbeg		&48
9102tbl_fdiv_op:
9103	short		fdiv_norm	- tbl_fdiv_op # NORM / NORM
9104	short		fdiv_inf_load	- tbl_fdiv_op # NORM / ZERO
9105	short		fdiv_zero_load	- tbl_fdiv_op # NORM / INF
9106	short		fdiv_res_qnan	- tbl_fdiv_op # NORM / QNAN
9107	short		fdiv_norm	- tbl_fdiv_op # NORM / DENORM
9108	short		fdiv_res_snan	- tbl_fdiv_op # NORM / SNAN
9109	short		tbl_fdiv_op	- tbl_fdiv_op #
9110	short		tbl_fdiv_op	- tbl_fdiv_op #
9111
9112	short		fdiv_zero_load	- tbl_fdiv_op # ZERO / NORM
9113	short		fdiv_res_operr	- tbl_fdiv_op # ZERO / ZERO
9114	short		fdiv_zero_load	- tbl_fdiv_op # ZERO / INF
9115	short		fdiv_res_qnan	- tbl_fdiv_op # ZERO / QNAN
9116	short		fdiv_zero_load	- tbl_fdiv_op # ZERO / DENORM
9117	short		fdiv_res_snan	- tbl_fdiv_op # ZERO / SNAN
9118	short		tbl_fdiv_op	- tbl_fdiv_op #
9119	short		tbl_fdiv_op	- tbl_fdiv_op #
9120
9121	short		fdiv_inf_dst	- tbl_fdiv_op # INF / NORM
9122	short		fdiv_inf_dst	- tbl_fdiv_op # INF / ZERO
9123	short		fdiv_res_operr	- tbl_fdiv_op # INF / INF
9124	short		fdiv_res_qnan	- tbl_fdiv_op # INF / QNAN
9125	short		fdiv_inf_dst	- tbl_fdiv_op # INF / DENORM
9126	short		fdiv_res_snan	- tbl_fdiv_op # INF / SNAN
9127	short		tbl_fdiv_op	- tbl_fdiv_op #
9128	short		tbl_fdiv_op	- tbl_fdiv_op #
9129
9130	short		fdiv_res_qnan	- tbl_fdiv_op # QNAN / NORM
9131	short		fdiv_res_qnan	- tbl_fdiv_op # QNAN / ZERO
9132	short		fdiv_res_qnan	- tbl_fdiv_op # QNAN / INF
9133	short		fdiv_res_qnan	- tbl_fdiv_op # QNAN / QNAN
9134	short		fdiv_res_qnan	- tbl_fdiv_op # QNAN / DENORM
9135	short		fdiv_res_snan	- tbl_fdiv_op # QNAN / SNAN
9136	short		tbl_fdiv_op	- tbl_fdiv_op #
9137	short		tbl_fdiv_op	- tbl_fdiv_op #
9138
9139	short		fdiv_norm	- tbl_fdiv_op # DENORM / NORM
9140	short		fdiv_inf_load	- tbl_fdiv_op # DENORM / ZERO
9141	short		fdiv_zero_load	- tbl_fdiv_op # DENORM / INF
9142	short		fdiv_res_qnan	- tbl_fdiv_op # DENORM / QNAN
9143	short		fdiv_norm	- tbl_fdiv_op # DENORM / DENORM
9144	short		fdiv_res_snan	- tbl_fdiv_op # DENORM / SNAN
9145	short		tbl_fdiv_op	- tbl_fdiv_op #
9146	short		tbl_fdiv_op	- tbl_fdiv_op #
9147
9148	short		fdiv_res_snan	- tbl_fdiv_op # SNAN / NORM
9149	short		fdiv_res_snan	- tbl_fdiv_op # SNAN / ZERO
9150	short		fdiv_res_snan	- tbl_fdiv_op # SNAN / INF
9151	short		fdiv_res_snan	- tbl_fdiv_op # SNAN / QNAN
9152	short		fdiv_res_snan	- tbl_fdiv_op # SNAN / DENORM
9153	short		fdiv_res_snan	- tbl_fdiv_op # SNAN / SNAN
9154	short		tbl_fdiv_op	- tbl_fdiv_op #
9155	short		tbl_fdiv_op	- tbl_fdiv_op #
9156
9157fdiv_res_qnan:
9158	bra.l		res_qnan
9159fdiv_res_snan:
9160	bra.l		res_snan
9161fdiv_res_operr:
9162	bra.l		res_operr
9163
9164	global		fdiv_zero_load		# global for fsgldiv
9165fdiv_zero_load:
9166	mov.b		SRC_EX(%a0),%d0		# result sign is exclusive
9167	mov.b		DST_EX(%a1),%d1		# or of input signs.
9168	eor.b		%d0,%d1
9169	bpl.b		fdiv_zero_load_p	# result is positive
9170	fmov.s		&0x80000000,%fp0	# load a -ZERO
9171	mov.b		&z_bmask+neg_bmask,FPSR_CC(%a6)	# set Z/N
9172	rts
9173fdiv_zero_load_p:
9174	fmov.s		&0x00000000,%fp0	# load a +ZERO
9175	mov.b		&z_bmask,FPSR_CC(%a6)	# set Z
9176	rts
9177
9178#
9179# The destination was In Range and the source was a ZERO. The result,
9180# Therefore, is an INF w/ the proper sign.
9181# So, determine the sign and return a new INF (w/ the j-bit cleared).
9182#
9183	global		fdiv_inf_load		# global for fsgldiv
9184fdiv_inf_load:
9185	ori.w		&dz_mask+adz_mask,2+USER_FPSR(%a6) # no; set DZ/ADZ
9186	mov.b		SRC_EX(%a0),%d0		# load both signs
9187	mov.b		DST_EX(%a1),%d1
9188	eor.b		%d0,%d1
9189	bpl.b		fdiv_inf_load_p		# result is positive
9190	fmov.s		&0xff800000,%fp0	# make result -INF
9191	mov.b		&inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/N
9192	rts
9193fdiv_inf_load_p:
9194	fmov.s		&0x7f800000,%fp0	# make result +INF
9195	mov.b		&inf_bmask,FPSR_CC(%a6)	# set INF
9196	rts
9197
9198#
9199# The destination was an INF w/ an In Range or ZERO source, the result is
9200# an INF w/ the proper sign.
9201# The 68881/882 returns the destination INF w/ the new sign(if the j-bit of the
9202# dst INF is set, then then j-bit of the result INF is also set).
9203#
9204	global		fdiv_inf_dst		# global for fsgldiv
9205fdiv_inf_dst:
9206	mov.b		DST_EX(%a1),%d0		# load both signs
9207	mov.b		SRC_EX(%a0),%d1
9208	eor.b		%d0,%d1
9209	bpl.b		fdiv_inf_dst_p		# result is positive
9210
9211	fmovm.x		DST(%a1),&0x80		# return result in fp0
9212	fabs.x		%fp0			# clear sign bit
9213	fneg.x		%fp0			# set sign bit
9214	mov.b		&inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/NEG
9215	rts
9216
9217fdiv_inf_dst_p:
9218	fmovm.x		DST(%a1),&0x80		# return result in fp0
9219	fabs.x		%fp0			# return positive INF
9220	mov.b		&inf_bmask,FPSR_CC(%a6) # set INF
9221	rts
9222
9223#########################################################################
9224# XDEF ****************************************************************	#
9225#	fneg(): emulates the fneg instruction				#
9226#	fsneg(): emulates the fsneg instruction				#
9227#	fdneg(): emulates the fdneg instruction				#
9228#									#
9229# XREF ****************************************************************	#
9230#	norm() - normalize a denorm to provide EXOP			#
9231#	scale_to_zero_src() - scale sgl/dbl source exponent		#
9232#	ovf_res() - return default overflow result			#
9233#	unf_res() - return default underflow result			#
9234#	res_qnan_1op() - return QNAN result				#
9235#	res_snan_1op() - return SNAN result				#
9236#									#
9237# INPUT ***************************************************************	#
9238#	a0 = pointer to extended precision source operand		#
9239#	d0 = rnd prec,mode						#
9240#									#
9241# OUTPUT **************************************************************	#
9242#	fp0 = result							#
9243#	fp1 = EXOP (if exception occurred)				#
9244#									#
9245# ALGORITHM ***********************************************************	#
9246#	Handle NANs, zeroes, and infinities as special cases. Separate	#
9247# norms/denorms into ext/sgl/dbl precisions. Extended precision can be	#
9248# emulated by simply setting sign bit. Sgl/dbl operands must be scaled	#
9249# and an actual fneg performed to see if overflow/underflow would have	#
9250# occurred. If so, return default underflow/overflow result. Else,	#
9251# scale the result exponent and return result. FPSR gets set based on	#
9252# the result value.							#
9253#									#
9254#########################################################################
9255
9256	global		fsneg
9257fsneg:
9258	andi.b		&0x30,%d0		# clear rnd prec
9259	ori.b		&s_mode*0x10,%d0	# insert sgl precision
9260	bra.b		fneg
9261
9262	global		fdneg
9263fdneg:
9264	andi.b		&0x30,%d0		# clear rnd prec
9265	ori.b		&d_mode*0x10,%d0	# insert dbl prec
9266
9267	global		fneg
9268fneg:
9269	mov.l		%d0,L_SCR3(%a6)		# store rnd info
9270	mov.b		STAG(%a6),%d1
9271	bne.w		fneg_not_norm		# optimize on non-norm input
9272
9273#
9274# NEGATE SIGN : norms and denorms ONLY!
9275#
9276fneg_norm:
9277	andi.b		&0xc0,%d0		# is precision extended?
9278	bne.w		fneg_not_ext		# no; go handle sgl or dbl
9279
9280#
9281# precision selected is extended. so...we can not get an underflow
9282# or overflow because of rounding to the correct precision. so...
9283# skip the scaling and unscaling...
9284#
9285	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
9286	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
9287	mov.w		SRC_EX(%a0),%d0
9288	eori.w		&0x8000,%d0		# negate sign
9289	bpl.b		fneg_norm_load		# sign is positive
9290	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'N' ccode bit
9291fneg_norm_load:
9292	mov.w		%d0,FP_SCR0_EX(%a6)
9293	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
9294	rts
9295
9296#
9297# for an extended precision DENORM, the UNFL exception bit is set
9298# the accrued bit is NOT set in this instance(no inexactness!)
9299#
9300fneg_denorm:
9301	andi.b		&0xc0,%d0		# is precision extended?
9302	bne.b		fneg_not_ext		# no; go handle sgl or dbl
9303
9304	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
9305
9306	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
9307	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
9308	mov.w		SRC_EX(%a0),%d0
9309	eori.w		&0x8000,%d0		# negate sign
9310	bpl.b		fneg_denorm_done	# no
9311	mov.b		&neg_bmask,FPSR_CC(%a6)	# yes, set 'N' ccode bit
9312fneg_denorm_done:
9313	mov.w		%d0,FP_SCR0_EX(%a6)
9314	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
9315
9316	btst		&unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled?
9317	bne.b		fneg_ext_unfl_ena	# yes
9318	rts
9319
9320#
9321# the input is an extended DENORM and underflow is enabled in the FPCR.
9322# normalize the mantissa and add the bias of 0x6000 to the resulting negative
9323# exponent and insert back into the operand.
9324#
9325fneg_ext_unfl_ena:
9326	lea		FP_SCR0(%a6),%a0	# pass: ptr to operand
9327	bsr.l		norm			# normalize result
9328	neg.w		%d0			# new exponent = -(shft val)
9329	addi.w		&0x6000,%d0		# add new bias to exponent
9330	mov.w		FP_SCR0_EX(%a6),%d1	# fetch old sign,exp
9331	andi.w		&0x8000,%d1		# keep old sign
9332	andi.w		&0x7fff,%d0		# clear sign position
9333	or.w		%d1,%d0			# concat old sign, new exponent
9334	mov.w		%d0,FP_SCR0_EX(%a6)	# insert new exponent
9335	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
9336	rts
9337
9338#
9339# operand is either single or double
9340#
9341fneg_not_ext:
9342	cmpi.b		%d0,&s_mode*0x10	# separate sgl/dbl prec
9343	bne.b		fneg_dbl
9344
9345#
9346# operand is to be rounded to single precision
9347#
9348fneg_sgl:
9349	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
9350	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
9351	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
9352	bsr.l		scale_to_zero_src	# calculate scale factor
9353
9354	cmpi.l		%d0,&0x3fff-0x3f80	# will move in underflow?
9355	bge.w		fneg_sd_unfl		# yes; go handle underflow
9356	cmpi.l		%d0,&0x3fff-0x407e	# will move in overflow?
9357	beq.w		fneg_sd_may_ovfl	# maybe; go check
9358	blt.w		fneg_sd_ovfl		# yes; go handle overflow
9359
9360#
9361# operand will NOT overflow or underflow when moved in to the fp reg file
9362#
9363fneg_sd_normal:
9364	fmov.l		&0x0,%fpsr		# clear FPSR
9365	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
9366
9367	fneg.x		FP_SCR0(%a6),%fp0	# perform negation
9368
9369	fmov.l		%fpsr,%d1		# save FPSR
9370	fmov.l		&0x0,%fpcr		# clear FPCR
9371
9372	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
9373
9374fneg_sd_normal_exit:
9375	mov.l		%d2,-(%sp)		# save d2
9376	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
9377	mov.w		FP_SCR0_EX(%a6),%d1	# load sgn,exp
9378	mov.w		%d1,%d2			# make a copy
9379	andi.l		&0x7fff,%d1		# strip sign
9380	sub.l		%d0,%d1			# add scale factor
9381	andi.w		&0x8000,%d2		# keep old sign
9382	or.w		%d1,%d2			# concat old sign,new exp
9383	mov.w		%d2,FP_SCR0_EX(%a6)	# insert new exponent
9384	mov.l		(%sp)+,%d2		# restore d2
9385	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
9386	rts
9387
9388#
9389# operand is to be rounded to double precision
9390#
9391fneg_dbl:
9392	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
9393	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
9394	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
9395	bsr.l		scale_to_zero_src	# calculate scale factor
9396
9397	cmpi.l		%d0,&0x3fff-0x3c00	# will move in underflow?
9398	bge.b		fneg_sd_unfl		# yes; go handle underflow
9399	cmpi.l		%d0,&0x3fff-0x43fe	# will move in overflow?
9400	beq.w		fneg_sd_may_ovfl	# maybe; go check
9401	blt.w		fneg_sd_ovfl		# yes; go handle overflow
9402	bra.w		fneg_sd_normal		# no; ho handle normalized op
9403
9404#
9405# operand WILL underflow when moved in to the fp register file
9406#
9407fneg_sd_unfl:
9408	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
9409
9410	eori.b		&0x80,FP_SCR0_EX(%a6)	# negate sign
9411	bpl.b		fneg_sd_unfl_tst
9412	bset		&neg_bit,FPSR_CC(%a6)	# set 'N' ccode bit
9413
9414# if underflow or inexact is enabled, go calculate EXOP first.
9415fneg_sd_unfl_tst:
9416	mov.b		FPCR_ENABLE(%a6),%d1
9417	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
9418	bne.b		fneg_sd_unfl_ena	# yes
9419
9420fneg_sd_unfl_dis:
9421	lea		FP_SCR0(%a6),%a0	# pass: result addr
9422	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
9423	bsr.l		unf_res			# calculate default result
9424	or.b		%d0,FPSR_CC(%a6)	# unf_res may have set 'Z'
9425	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
9426	rts
9427
9428#
9429# operand will underflow AND underflow is enabled.
9430# Therefore, we must return the result rounded to extended precision.
9431#
9432fneg_sd_unfl_ena:
9433	mov.l		FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
9434	mov.l		FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
9435	mov.w		FP_SCR0_EX(%a6),%d1	# load current exponent
9436
9437	mov.l		%d2,-(%sp)		# save d2
9438	mov.l		%d1,%d2			# make a copy
9439	andi.l		&0x7fff,%d1		# strip sign
9440	andi.w		&0x8000,%d2		# keep old sign
9441	sub.l		%d0,%d1			# subtract scale factor
9442	addi.l		&0x6000,%d1		# add new bias
9443	andi.w		&0x7fff,%d1
9444	or.w		%d2,%d1			# concat new sign,new exp
9445	mov.w		%d1,FP_SCR1_EX(%a6)	# insert new exp
9446	fmovm.x		FP_SCR1(%a6),&0x40	# return EXOP in fp1
9447	mov.l		(%sp)+,%d2		# restore d2
9448	bra.b		fneg_sd_unfl_dis
9449
9450#
9451# operand WILL overflow.
9452#
9453fneg_sd_ovfl:
9454	fmov.l		&0x0,%fpsr		# clear FPSR
9455	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
9456
9457	fneg.x		FP_SCR0(%a6),%fp0	# perform negation
9458
9459	fmov.l		&0x0,%fpcr		# clear FPCR
9460	fmov.l		%fpsr,%d1		# save FPSR
9461
9462	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
9463
9464fneg_sd_ovfl_tst:
9465	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
9466
9467	mov.b		FPCR_ENABLE(%a6),%d1
9468	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
9469	bne.b		fneg_sd_ovfl_ena	# yes
9470
9471#
9472# OVFL is not enabled; therefore, we must create the default result by
9473# calling ovf_res().
9474#
9475fneg_sd_ovfl_dis:
9476	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
9477	sne		%d1			# set sign param accordingly
9478	mov.l		L_SCR3(%a6),%d0		# pass: prec,mode
9479	bsr.l		ovf_res			# calculate default result
9480	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
9481	fmovm.x		(%a0),&0x80		# return default result in fp0
9482	rts
9483
9484#
9485# OVFL is enabled.
9486# the INEX2 bit has already been updated by the round to the correct precision.
9487# now, round to extended(and don't alter the FPSR).
9488#
9489fneg_sd_ovfl_ena:
9490	mov.l		%d2,-(%sp)		# save d2
9491	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
9492	mov.l		%d1,%d2			# make a copy
9493	andi.l		&0x7fff,%d1		# strip sign
9494	andi.w		&0x8000,%d2		# keep old sign
9495	sub.l		%d0,%d1			# add scale factor
9496	subi.l		&0x6000,%d1		# subtract bias
9497	andi.w		&0x7fff,%d1
9498	or.w		%d2,%d1			# concat sign,exp
9499	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
9500	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
9501	mov.l		(%sp)+,%d2		# restore d2
9502	bra.b		fneg_sd_ovfl_dis
9503
9504#
9505# the move in MAY underflow. so...
9506#
9507fneg_sd_may_ovfl:
9508	fmov.l		&0x0,%fpsr		# clear FPSR
9509	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
9510
9511	fneg.x		FP_SCR0(%a6),%fp0	# perform negation
9512
9513	fmov.l		%fpsr,%d1		# save status
9514	fmov.l		&0x0,%fpcr		# clear FPCR
9515
9516	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
9517
9518	fabs.x		%fp0,%fp1		# make a copy of result
9519	fcmp.b		%fp1,&0x2		# is |result| >= 2.b?
9520	fbge.w		fneg_sd_ovfl_tst	# yes; overflow has occurred
9521
9522# no, it didn't overflow; we have correct result
9523	bra.w		fneg_sd_normal_exit
9524
9525##########################################################################
9526
9527#
9528# input is not normalized; what is it?
9529#
9530fneg_not_norm:
9531	cmpi.b		%d1,&DENORM		# weed out DENORM
9532	beq.w		fneg_denorm
9533	cmpi.b		%d1,&SNAN		# weed out SNAN
9534	beq.l		res_snan_1op
9535	cmpi.b		%d1,&QNAN		# weed out QNAN
9536	beq.l		res_qnan_1op
9537
9538#
9539# do the fneg; at this point, only possible ops are ZERO and INF.
9540# use fneg to determine ccodes.
9541# prec:mode should be zero at this point but it won't affect answer anyways.
9542#
9543	fneg.x		SRC_EX(%a0),%fp0	# do fneg
9544	fmov.l		%fpsr,%d0
9545	rol.l		&0x8,%d0		# put ccodes in lo byte
9546	mov.b		%d0,FPSR_CC(%a6)	# insert correct ccodes
9547	rts
9548
9549#########################################################################
9550# XDEF ****************************************************************	#
9551#	ftst(): emulates the ftest instruction				#
9552#									#
9553# XREF ****************************************************************	#
9554#	res{s,q}nan_1op() - set NAN result for monadic instruction	#
9555#									#
9556# INPUT ***************************************************************	#
9557#	a0 = pointer to extended precision source operand		#
9558#									#
9559# OUTPUT **************************************************************	#
9560#	none								#
9561#									#
9562# ALGORITHM ***********************************************************	#
9563#	Check the source operand tag (STAG) and set the FPCR according	#
9564# to the operand type and sign.						#
9565#									#
9566#########################################################################
9567
9568	global		ftst
9569ftst:
9570	mov.b		STAG(%a6),%d1
9571	bne.b		ftst_not_norm		# optimize on non-norm input
9572
9573#
9574# Norm:
9575#
9576ftst_norm:
9577	tst.b		SRC_EX(%a0)		# is operand negative?
9578	bmi.b		ftst_norm_m		# yes
9579	rts
9580ftst_norm_m:
9581	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'N' ccode bit
9582	rts
9583
9584#
9585# input is not normalized; what is it?
9586#
9587ftst_not_norm:
9588	cmpi.b		%d1,&ZERO		# weed out ZERO
9589	beq.b		ftst_zero
9590	cmpi.b		%d1,&INF		# weed out INF
9591	beq.b		ftst_inf
9592	cmpi.b		%d1,&SNAN		# weed out SNAN
9593	beq.l		res_snan_1op
9594	cmpi.b		%d1,&QNAN		# weed out QNAN
9595	beq.l		res_qnan_1op
9596
9597#
9598# Denorm:
9599#
9600ftst_denorm:
9601	tst.b		SRC_EX(%a0)		# is operand negative?
9602	bmi.b		ftst_denorm_m		# yes
9603	rts
9604ftst_denorm_m:
9605	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'N' ccode bit
9606	rts
9607
9608#
9609# Infinity:
9610#
9611ftst_inf:
9612	tst.b		SRC_EX(%a0)		# is operand negative?
9613	bmi.b		ftst_inf_m		# yes
9614ftst_inf_p:
9615	mov.b		&inf_bmask,FPSR_CC(%a6)	# set 'I' ccode bit
9616	rts
9617ftst_inf_m:
9618	mov.b		&inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'I','N' ccode bits
9619	rts
9620
9621#
9622# Zero:
9623#
9624ftst_zero:
9625	tst.b		SRC_EX(%a0)		# is operand negative?
9626	bmi.b		ftst_zero_m		# yes
9627ftst_zero_p:
9628	mov.b		&z_bmask,FPSR_CC(%a6)	# set 'N' ccode bit
9629	rts
9630ftst_zero_m:
9631	mov.b		&z_bmask+neg_bmask,FPSR_CC(%a6)	# set 'Z','N' ccode bits
9632	rts
9633
9634#########################################################################
9635# XDEF ****************************************************************	#
9636#	fint(): emulates the fint instruction				#
9637#									#
9638# XREF ****************************************************************	#
9639#	res_{s,q}nan_1op() - set NAN result for monadic operation	#
9640#									#
9641# INPUT ***************************************************************	#
9642#	a0 = pointer to extended precision source operand		#
9643#	d0 = round precision/mode					#
9644#									#
9645# OUTPUT **************************************************************	#
9646#	fp0 = result							#
9647#									#
9648# ALGORITHM ***********************************************************	#
9649#	Separate according to operand type. Unnorms don't pass through	#
9650# here. For norms, load the rounding mode/prec, execute a "fint", then	#
9651# store the resulting FPSR bits.					#
9652#	For denorms, force the j-bit to a one and do the same as for	#
9653# norms. Denorms are so low that the answer will either be a zero or a	#
9654# one.									#
9655#	For zeroes/infs/NANs, return the same while setting the FPSR	#
9656# as appropriate.							#
9657#									#
9658#########################################################################
9659
9660	global		fint
9661fint:
9662	mov.b		STAG(%a6),%d1
9663	bne.b		fint_not_norm		# optimize on non-norm input
9664
9665#
9666# Norm:
9667#
9668fint_norm:
9669	andi.b		&0x30,%d0		# set prec = ext
9670
9671	fmov.l		%d0,%fpcr		# set FPCR
9672	fmov.l		&0x0,%fpsr		# clear FPSR
9673
9674	fint.x		SRC(%a0),%fp0		# execute fint
9675
9676	fmov.l		&0x0,%fpcr		# clear FPCR
9677	fmov.l		%fpsr,%d0		# save FPSR
9678	or.l		%d0,USER_FPSR(%a6)	# set exception bits
9679
9680	rts
9681
9682#
9683# input is not normalized; what is it?
9684#
9685fint_not_norm:
9686	cmpi.b		%d1,&ZERO		# weed out ZERO
9687	beq.b		fint_zero
9688	cmpi.b		%d1,&INF		# weed out INF
9689	beq.b		fint_inf
9690	cmpi.b		%d1,&DENORM		# weed out DENORM
9691	beq.b		fint_denorm
9692	cmpi.b		%d1,&SNAN		# weed out SNAN
9693	beq.l		res_snan_1op
9694	bra.l		res_qnan_1op		# weed out QNAN
9695
9696#
9697# Denorm:
9698#
9699# for DENORMs, the result will be either (+/-)ZERO or (+/-)1.
9700# also, the INEX2 and AINEX exception bits will be set.
9701# so, we could either set these manually or force the DENORM
9702# to a very small NORM and ship it to the NORM routine.
9703# I do the latter.
9704#
9705fint_denorm:
9706	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6) # copy sign, zero exp
9707	mov.b		&0x80,FP_SCR0_HI(%a6)	# force DENORM ==> small NORM
9708	lea		FP_SCR0(%a6),%a0
9709	bra.b		fint_norm
9710
9711#
9712# Zero:
9713#
9714fint_zero:
9715	tst.b		SRC_EX(%a0)		# is ZERO negative?
9716	bmi.b		fint_zero_m		# yes
9717fint_zero_p:
9718	fmov.s		&0x00000000,%fp0	# return +ZERO in fp0
9719	mov.b		&z_bmask,FPSR_CC(%a6)	# set 'Z' ccode bit
9720	rts
9721fint_zero_m:
9722	fmov.s		&0x80000000,%fp0	# return -ZERO in fp0
9723	mov.b		&z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits
9724	rts
9725
9726#
9727# Infinity:
9728#
9729fint_inf:
9730	fmovm.x		SRC(%a0),&0x80		# return result in fp0
9731	tst.b		SRC_EX(%a0)		# is INF negative?
9732	bmi.b		fint_inf_m		# yes
9733fint_inf_p:
9734	mov.b		&inf_bmask,FPSR_CC(%a6)	# set 'I' ccode bit
9735	rts
9736fint_inf_m:
9737	mov.b		&inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'N','I' ccode bits
9738	rts
9739
9740#########################################################################
9741# XDEF ****************************************************************	#
9742#	fintrz(): emulates the fintrz instruction			#
9743#									#
9744# XREF ****************************************************************	#
9745#	res_{s,q}nan_1op() - set NAN result for monadic operation	#
9746#									#
9747# INPUT ***************************************************************	#
9748#	a0 = pointer to extended precision source operand		#
9749#	d0 = round precision/mode					#
9750#									#
9751# OUTPUT **************************************************************	#
9752#	fp0 = result							#
9753#									#
9754# ALGORITHM ***********************************************************	#
9755#	Separate according to operand type. Unnorms don't pass through	#
9756# here. For norms, load the rounding mode/prec, execute a "fintrz",	#
9757# then store the resulting FPSR bits.					#
9758#	For denorms, force the j-bit to a one and do the same as for	#
9759# norms. Denorms are so low that the answer will either be a zero or a	#
9760# one.									#
9761#	For zeroes/infs/NANs, return the same while setting the FPSR	#
9762# as appropriate.							#
9763#									#
9764#########################################################################
9765
9766	global		fintrz
9767fintrz:
9768	mov.b		STAG(%a6),%d1
9769	bne.b		fintrz_not_norm		# optimize on non-norm input
9770
9771#
9772# Norm:
9773#
9774fintrz_norm:
9775	fmov.l		&0x0,%fpsr		# clear FPSR
9776
9777	fintrz.x	SRC(%a0),%fp0		# execute fintrz
9778
9779	fmov.l		%fpsr,%d0		# save FPSR
9780	or.l		%d0,USER_FPSR(%a6)	# set exception bits
9781
9782	rts
9783
9784#
9785# input is not normalized; what is it?
9786#
9787fintrz_not_norm:
9788	cmpi.b		%d1,&ZERO		# weed out ZERO
9789	beq.b		fintrz_zero
9790	cmpi.b		%d1,&INF		# weed out INF
9791	beq.b		fintrz_inf
9792	cmpi.b		%d1,&DENORM		# weed out DENORM
9793	beq.b		fintrz_denorm
9794	cmpi.b		%d1,&SNAN		# weed out SNAN
9795	beq.l		res_snan_1op
9796	bra.l		res_qnan_1op		# weed out QNAN
9797
9798#
9799# Denorm:
9800#
9801# for DENORMs, the result will be (+/-)ZERO.
9802# also, the INEX2 and AINEX exception bits will be set.
9803# so, we could either set these manually or force the DENORM
9804# to a very small NORM and ship it to the NORM routine.
9805# I do the latter.
9806#
9807fintrz_denorm:
9808	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6) # copy sign, zero exp
9809	mov.b		&0x80,FP_SCR0_HI(%a6)	# force DENORM ==> small NORM
9810	lea		FP_SCR0(%a6),%a0
9811	bra.b		fintrz_norm
9812
9813#
9814# Zero:
9815#
9816fintrz_zero:
9817	tst.b		SRC_EX(%a0)		# is ZERO negative?
9818	bmi.b		fintrz_zero_m		# yes
9819fintrz_zero_p:
9820	fmov.s		&0x00000000,%fp0	# return +ZERO in fp0
9821	mov.b		&z_bmask,FPSR_CC(%a6)	# set 'Z' ccode bit
9822	rts
9823fintrz_zero_m:
9824	fmov.s		&0x80000000,%fp0	# return -ZERO in fp0
9825	mov.b		&z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits
9826	rts
9827
9828#
9829# Infinity:
9830#
9831fintrz_inf:
9832	fmovm.x		SRC(%a0),&0x80		# return result in fp0
9833	tst.b		SRC_EX(%a0)		# is INF negative?
9834	bmi.b		fintrz_inf_m		# yes
9835fintrz_inf_p:
9836	mov.b		&inf_bmask,FPSR_CC(%a6)	# set 'I' ccode bit
9837	rts
9838fintrz_inf_m:
9839	mov.b		&inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'N','I' ccode bits
9840	rts
9841
9842#########################################################################
9843# XDEF ****************************************************************	#
9844#	fabs():  emulates the fabs instruction				#
9845#	fsabs(): emulates the fsabs instruction				#
9846#	fdabs(): emulates the fdabs instruction				#
9847#									#
9848# XREF **************************************************************** #
9849#	norm() - normalize denorm mantissa to provide EXOP		#
9850#	scale_to_zero_src() - make exponent. = 0; get scale factor	#
9851#	unf_res() - calculate underflow result				#
9852#	ovf_res() - calculate overflow result				#
9853#	res_{s,q}nan_1op() - set NAN result for monadic operation	#
9854#									#
9855# INPUT *************************************************************** #
9856#	a0 = pointer to extended precision source operand		#
9857#	d0 = rnd precision/mode						#
9858#									#
9859# OUTPUT ************************************************************** #
9860#	fp0 = result							#
9861#	fp1 = EXOP (if exception occurred)				#
9862#									#
9863# ALGORITHM ***********************************************************	#
9864#	Handle NANs, infinities, and zeroes as special cases. Divide	#
9865# norms into extended, single, and double precision.			#
9866#	Simply clear sign for extended precision norm. Ext prec denorm	#
9867# gets an EXOP created for it since it's an underflow.			#
9868#	Double and single precision can overflow and underflow. First,	#
9869# scale the operand such that the exponent is zero. Perform an "fabs"	#
9870# using the correct rnd mode/prec. Check to see if the original		#
9871# exponent would take an exception. If so, use unf_res() or ovf_res()	#
9872# to calculate the default result. Also, create the EXOP for the	#
9873# exceptional case. If no exception should occur, insert the correct	#
9874# result exponent and return.						#
9875#	Unnorms don't pass through here.				#
9876#									#
9877#########################################################################
9878
9879	global		fsabs
9880fsabs:
9881	andi.b		&0x30,%d0		# clear rnd prec
9882	ori.b		&s_mode*0x10,%d0	# insert sgl precision
9883	bra.b		fabs
9884
9885	global		fdabs
9886fdabs:
9887	andi.b		&0x30,%d0		# clear rnd prec
9888	ori.b		&d_mode*0x10,%d0	# insert dbl precision
9889
9890	global		fabs
9891fabs:
9892	mov.l		%d0,L_SCR3(%a6)		# store rnd info
9893	mov.b		STAG(%a6),%d1
9894	bne.w		fabs_not_norm		# optimize on non-norm input
9895
9896#
9897# ABSOLUTE VALUE: norms and denorms ONLY!
9898#
9899fabs_norm:
9900	andi.b		&0xc0,%d0		# is precision extended?
9901	bne.b		fabs_not_ext		# no; go handle sgl or dbl
9902
9903#
9904# precision selected is extended. so...we can not get an underflow
9905# or overflow because of rounding to the correct precision. so...
9906# skip the scaling and unscaling...
9907#
9908	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
9909	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
9910	mov.w		SRC_EX(%a0),%d1
9911	bclr		&15,%d1			# force absolute value
9912	mov.w		%d1,FP_SCR0_EX(%a6)	# insert exponent
9913	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
9914	rts
9915
9916#
9917# for an extended precision DENORM, the UNFL exception bit is set
9918# the accrued bit is NOT set in this instance(no inexactness!)
9919#
9920fabs_denorm:
9921	andi.b		&0xc0,%d0		# is precision extended?
9922	bne.b		fabs_not_ext		# no
9923
9924	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
9925
9926	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
9927	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
9928	mov.w		SRC_EX(%a0),%d0
9929	bclr		&15,%d0			# clear sign
9930	mov.w		%d0,FP_SCR0_EX(%a6)	# insert exponent
9931
9932	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
9933
9934	btst		&unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled?
9935	bne.b		fabs_ext_unfl_ena
9936	rts
9937
9938#
9939# the input is an extended DENORM and underflow is enabled in the FPCR.
9940# normalize the mantissa and add the bias of 0x6000 to the resulting negative
9941# exponent and insert back into the operand.
9942#
9943fabs_ext_unfl_ena:
9944	lea		FP_SCR0(%a6),%a0	# pass: ptr to operand
9945	bsr.l		norm			# normalize result
9946	neg.w		%d0			# new exponent = -(shft val)
9947	addi.w		&0x6000,%d0		# add new bias to exponent
9948	mov.w		FP_SCR0_EX(%a6),%d1	# fetch old sign,exp
9949	andi.w		&0x8000,%d1		# keep old sign
9950	andi.w		&0x7fff,%d0		# clear sign position
9951	or.w		%d1,%d0			# concat old sign, new exponent
9952	mov.w		%d0,FP_SCR0_EX(%a6)	# insert new exponent
9953	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
9954	rts
9955
9956#
9957# operand is either single or double
9958#
9959fabs_not_ext:
9960	cmpi.b		%d0,&s_mode*0x10	# separate sgl/dbl prec
9961	bne.b		fabs_dbl
9962
9963#
9964# operand is to be rounded to single precision
9965#
9966fabs_sgl:
9967	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
9968	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
9969	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
9970	bsr.l		scale_to_zero_src	# calculate scale factor
9971
9972	cmpi.l		%d0,&0x3fff-0x3f80	# will move in underflow?
9973	bge.w		fabs_sd_unfl		# yes; go handle underflow
9974	cmpi.l		%d0,&0x3fff-0x407e	# will move in overflow?
9975	beq.w		fabs_sd_may_ovfl	# maybe; go check
9976	blt.w		fabs_sd_ovfl		# yes; go handle overflow
9977
9978#
9979# operand will NOT overflow or underflow when moved in to the fp reg file
9980#
9981fabs_sd_normal:
9982	fmov.l		&0x0,%fpsr		# clear FPSR
9983	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
9984
9985	fabs.x		FP_SCR0(%a6),%fp0	# perform absolute
9986
9987	fmov.l		%fpsr,%d1		# save FPSR
9988	fmov.l		&0x0,%fpcr		# clear FPCR
9989
9990	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
9991
9992fabs_sd_normal_exit:
9993	mov.l		%d2,-(%sp)		# save d2
9994	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
9995	mov.w		FP_SCR0_EX(%a6),%d1	# load sgn,exp
9996	mov.l		%d1,%d2			# make a copy
9997	andi.l		&0x7fff,%d1		# strip sign
9998	sub.l		%d0,%d1			# add scale factor
9999	andi.w		&0x8000,%d2		# keep old sign
10000	or.w		%d1,%d2			# concat old sign,new exp
10001	mov.w		%d2,FP_SCR0_EX(%a6)	# insert new exponent
10002	mov.l		(%sp)+,%d2		# restore d2
10003	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
10004	rts
10005
10006#
10007# operand is to be rounded to double precision
10008#
10009fabs_dbl:
10010	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
10011	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
10012	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
10013	bsr.l		scale_to_zero_src	# calculate scale factor
10014
10015	cmpi.l		%d0,&0x3fff-0x3c00	# will move in underflow?
10016	bge.b		fabs_sd_unfl		# yes; go handle underflow
10017	cmpi.l		%d0,&0x3fff-0x43fe	# will move in overflow?
10018	beq.w		fabs_sd_may_ovfl	# maybe; go check
10019	blt.w		fabs_sd_ovfl		# yes; go handle overflow
10020	bra.w		fabs_sd_normal		# no; ho handle normalized op
10021
10022#
10023# operand WILL underflow when moved in to the fp register file
10024#
10025fabs_sd_unfl:
10026	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
10027
10028	bclr		&0x7,FP_SCR0_EX(%a6)	# force absolute value
10029
10030# if underflow or inexact is enabled, go calculate EXOP first.
10031	mov.b		FPCR_ENABLE(%a6),%d1
10032	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
10033	bne.b		fabs_sd_unfl_ena	# yes
10034
10035fabs_sd_unfl_dis:
10036	lea		FP_SCR0(%a6),%a0	# pass: result addr
10037	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
10038	bsr.l		unf_res			# calculate default result
10039	or.b		%d0,FPSR_CC(%a6)	# set possible 'Z' ccode
10040	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
10041	rts
10042
10043#
10044# operand will underflow AND underflow is enabled.
10045# Therefore, we must return the result rounded to extended precision.
10046#
10047fabs_sd_unfl_ena:
10048	mov.l		FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
10049	mov.l		FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
10050	mov.w		FP_SCR0_EX(%a6),%d1	# load current exponent
10051
10052	mov.l		%d2,-(%sp)		# save d2
10053	mov.l		%d1,%d2			# make a copy
10054	andi.l		&0x7fff,%d1		# strip sign
10055	andi.w		&0x8000,%d2		# keep old sign
10056	sub.l		%d0,%d1			# subtract scale factor
10057	addi.l		&0x6000,%d1		# add new bias
10058	andi.w		&0x7fff,%d1
10059	or.w		%d2,%d1			# concat new sign,new exp
10060	mov.w		%d1,FP_SCR1_EX(%a6)	# insert new exp
10061	fmovm.x		FP_SCR1(%a6),&0x40	# return EXOP in fp1
10062	mov.l		(%sp)+,%d2		# restore d2
10063	bra.b		fabs_sd_unfl_dis
10064
10065#
10066# operand WILL overflow.
10067#
10068fabs_sd_ovfl:
10069	fmov.l		&0x0,%fpsr		# clear FPSR
10070	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
10071
10072	fabs.x		FP_SCR0(%a6),%fp0	# perform absolute
10073
10074	fmov.l		&0x0,%fpcr		# clear FPCR
10075	fmov.l		%fpsr,%d1		# save FPSR
10076
10077	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
10078
10079fabs_sd_ovfl_tst:
10080	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
10081
10082	mov.b		FPCR_ENABLE(%a6),%d1
10083	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
10084	bne.b		fabs_sd_ovfl_ena	# yes
10085
10086#
10087# OVFL is not enabled; therefore, we must create the default result by
10088# calling ovf_res().
10089#
10090fabs_sd_ovfl_dis:
10091	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
10092	sne		%d1			# set sign param accordingly
10093	mov.l		L_SCR3(%a6),%d0		# pass: prec,mode
10094	bsr.l		ovf_res			# calculate default result
10095	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
10096	fmovm.x		(%a0),&0x80		# return default result in fp0
10097	rts
10098
10099#
10100# OVFL is enabled.
10101# the INEX2 bit has already been updated by the round to the correct precision.
10102# now, round to extended(and don't alter the FPSR).
10103#
10104fabs_sd_ovfl_ena:
10105	mov.l		%d2,-(%sp)		# save d2
10106	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
10107	mov.l		%d1,%d2			# make a copy
10108	andi.l		&0x7fff,%d1		# strip sign
10109	andi.w		&0x8000,%d2		# keep old sign
10110	sub.l		%d0,%d1			# add scale factor
10111	subi.l		&0x6000,%d1		# subtract bias
10112	andi.w		&0x7fff,%d1
10113	or.w		%d2,%d1			# concat sign,exp
10114	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
10115	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
10116	mov.l		(%sp)+,%d2		# restore d2
10117	bra.b		fabs_sd_ovfl_dis
10118
10119#
10120# the move in MAY underflow. so...
10121#
10122fabs_sd_may_ovfl:
10123	fmov.l		&0x0,%fpsr		# clear FPSR
10124	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
10125
10126	fabs.x		FP_SCR0(%a6),%fp0	# perform absolute
10127
10128	fmov.l		%fpsr,%d1		# save status
10129	fmov.l		&0x0,%fpcr		# clear FPCR
10130
10131	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
10132
10133	fabs.x		%fp0,%fp1		# make a copy of result
10134	fcmp.b		%fp1,&0x2		# is |result| >= 2.b?
10135	fbge.w		fabs_sd_ovfl_tst	# yes; overflow has occurred
10136
10137# no, it didn't overflow; we have correct result
10138	bra.w		fabs_sd_normal_exit
10139
10140##########################################################################
10141
10142#
10143# input is not normalized; what is it?
10144#
10145fabs_not_norm:
10146	cmpi.b		%d1,&DENORM		# weed out DENORM
10147	beq.w		fabs_denorm
10148	cmpi.b		%d1,&SNAN		# weed out SNAN
10149	beq.l		res_snan_1op
10150	cmpi.b		%d1,&QNAN		# weed out QNAN
10151	beq.l		res_qnan_1op
10152
10153	fabs.x		SRC(%a0),%fp0		# force absolute value
10154
10155	cmpi.b		%d1,&INF		# weed out INF
10156	beq.b		fabs_inf
10157fabs_zero:
10158	mov.b		&z_bmask,FPSR_CC(%a6)	# set 'Z' ccode bit
10159	rts
10160fabs_inf:
10161	mov.b		&inf_bmask,FPSR_CC(%a6)	# set 'I' ccode bit
10162	rts
10163
10164#########################################################################
10165# XDEF ****************************************************************	#
10166#	fcmp(): fp compare op routine					#
10167#									#
10168# XREF ****************************************************************	#
10169#	res_qnan() - return QNAN result					#
10170#	res_snan() - return SNAN result					#
10171#									#
10172# INPUT ***************************************************************	#
10173#	a0 = pointer to extended precision source operand		#
10174#	a1 = pointer to extended precision destination operand		#
10175#	d0 = round prec/mode						#
10176#									#
10177# OUTPUT ************************************************************** #
10178#	None								#
10179#									#
10180# ALGORITHM ***********************************************************	#
10181#	Handle NANs and denorms as special cases. For everything else,	#
10182# just use the actual fcmp instruction to produce the correct condition	#
10183# codes.								#
10184#									#
10185#########################################################################
10186
10187	global		fcmp
10188fcmp:
10189	clr.w		%d1
10190	mov.b		DTAG(%a6),%d1
10191	lsl.b		&0x3,%d1
10192	or.b		STAG(%a6),%d1
10193	bne.b		fcmp_not_norm		# optimize on non-norm input
10194
10195#
10196# COMPARE FP OPs : NORMs, ZEROs, INFs, and "corrected" DENORMs
10197#
10198fcmp_norm:
10199	fmovm.x		DST(%a1),&0x80		# load dst op
10200
10201	fcmp.x		%fp0,SRC(%a0)		# do compare
10202
10203	fmov.l		%fpsr,%d0		# save FPSR
10204	rol.l		&0x8,%d0		# extract ccode bits
10205	mov.b		%d0,FPSR_CC(%a6)	# set ccode bits(no exc bits are set)
10206
10207	rts
10208
10209#
10210# fcmp: inputs are not both normalized; what are they?
10211#
10212fcmp_not_norm:
10213	mov.w		(tbl_fcmp_op.b,%pc,%d1.w*2),%d1
10214	jmp		(tbl_fcmp_op.b,%pc,%d1.w*1)
10215
10216	swbeg		&48
10217tbl_fcmp_op:
10218	short		fcmp_norm	- tbl_fcmp_op # NORM - NORM
10219	short		fcmp_norm	- tbl_fcmp_op # NORM - ZERO
10220	short		fcmp_norm	- tbl_fcmp_op # NORM - INF
10221	short		fcmp_res_qnan	- tbl_fcmp_op # NORM - QNAN
10222	short		fcmp_nrm_dnrm	- tbl_fcmp_op # NORM - DENORM
10223	short		fcmp_res_snan	- tbl_fcmp_op # NORM - SNAN
10224	short		tbl_fcmp_op	- tbl_fcmp_op #
10225	short		tbl_fcmp_op	- tbl_fcmp_op #
10226
10227	short		fcmp_norm	- tbl_fcmp_op # ZERO - NORM
10228	short		fcmp_norm	- tbl_fcmp_op # ZERO - ZERO
10229	short		fcmp_norm	- tbl_fcmp_op # ZERO - INF
10230	short		fcmp_res_qnan	- tbl_fcmp_op # ZERO - QNAN
10231	short		fcmp_dnrm_s	- tbl_fcmp_op # ZERO - DENORM
10232	short		fcmp_res_snan	- tbl_fcmp_op # ZERO - SNAN
10233	short		tbl_fcmp_op	- tbl_fcmp_op #
10234	short		tbl_fcmp_op	- tbl_fcmp_op #
10235
10236	short		fcmp_norm	- tbl_fcmp_op # INF - NORM
10237	short		fcmp_norm	- tbl_fcmp_op # INF - ZERO
10238	short		fcmp_norm	- tbl_fcmp_op # INF - INF
10239	short		fcmp_res_qnan	- tbl_fcmp_op # INF - QNAN
10240	short		fcmp_dnrm_s	- tbl_fcmp_op # INF - DENORM
10241	short		fcmp_res_snan	- tbl_fcmp_op # INF - SNAN
10242	short		tbl_fcmp_op	- tbl_fcmp_op #
10243	short		tbl_fcmp_op	- tbl_fcmp_op #
10244
10245	short		fcmp_res_qnan	- tbl_fcmp_op # QNAN - NORM
10246	short		fcmp_res_qnan	- tbl_fcmp_op # QNAN - ZERO
10247	short		fcmp_res_qnan	- tbl_fcmp_op # QNAN - INF
10248	short		fcmp_res_qnan	- tbl_fcmp_op # QNAN - QNAN
10249	short		fcmp_res_qnan	- tbl_fcmp_op # QNAN - DENORM
10250	short		fcmp_res_snan	- tbl_fcmp_op # QNAN - SNAN
10251	short		tbl_fcmp_op	- tbl_fcmp_op #
10252	short		tbl_fcmp_op	- tbl_fcmp_op #
10253
10254	short		fcmp_dnrm_nrm	- tbl_fcmp_op # DENORM - NORM
10255	short		fcmp_dnrm_d	- tbl_fcmp_op # DENORM - ZERO
10256	short		fcmp_dnrm_d	- tbl_fcmp_op # DENORM - INF
10257	short		fcmp_res_qnan	- tbl_fcmp_op # DENORM - QNAN
10258	short		fcmp_dnrm_sd	- tbl_fcmp_op # DENORM - DENORM
10259	short		fcmp_res_snan	- tbl_fcmp_op # DENORM - SNAN
10260	short		tbl_fcmp_op	- tbl_fcmp_op #
10261	short		tbl_fcmp_op	- tbl_fcmp_op #
10262
10263	short		fcmp_res_snan	- tbl_fcmp_op # SNAN - NORM
10264	short		fcmp_res_snan	- tbl_fcmp_op # SNAN - ZERO
10265	short		fcmp_res_snan	- tbl_fcmp_op # SNAN - INF
10266	short		fcmp_res_snan	- tbl_fcmp_op # SNAN - QNAN
10267	short		fcmp_res_snan	- tbl_fcmp_op # SNAN - DENORM
10268	short		fcmp_res_snan	- tbl_fcmp_op # SNAN - SNAN
10269	short		tbl_fcmp_op	- tbl_fcmp_op #
10270	short		tbl_fcmp_op	- tbl_fcmp_op #
10271
10272# unlike all other functions for QNAN and SNAN, fcmp does NOT set the
10273# 'N' bit for a negative QNAN or SNAN input so we must squelch it here.
10274fcmp_res_qnan:
10275	bsr.l		res_qnan
10276	andi.b		&0xf7,FPSR_CC(%a6)
10277	rts
10278fcmp_res_snan:
10279	bsr.l		res_snan
10280	andi.b		&0xf7,FPSR_CC(%a6)
10281	rts
10282
10283#
10284# DENORMs are a little more difficult.
10285# If you have a 2 DENORMs, then you can just force the j-bit to a one
10286# and use the fcmp_norm routine.
10287# If you have a DENORM and an INF or ZERO, just force the DENORM's j-bit to a one
10288# and use the fcmp_norm routine.
10289# If you have a DENORM and a NORM with opposite signs, then use fcmp_norm, also.
10290# But with a DENORM and a NORM of the same sign, the neg bit is set if the
10291# (1) signs are (+) and the DENORM is the dst or
10292# (2) signs are (-) and the DENORM is the src
10293#
10294
10295fcmp_dnrm_s:
10296	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
10297	mov.l		SRC_HI(%a0),%d0
10298	bset		&31,%d0			# DENORM src; make into small norm
10299	mov.l		%d0,FP_SCR0_HI(%a6)
10300	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
10301	lea		FP_SCR0(%a6),%a0
10302	bra.w		fcmp_norm
10303
10304fcmp_dnrm_d:
10305	mov.l		DST_EX(%a1),FP_SCR0_EX(%a6)
10306	mov.l		DST_HI(%a1),%d0
10307	bset		&31,%d0			# DENORM src; make into small norm
10308	mov.l		%d0,FP_SCR0_HI(%a6)
10309	mov.l		DST_LO(%a1),FP_SCR0_LO(%a6)
10310	lea		FP_SCR0(%a6),%a1
10311	bra.w		fcmp_norm
10312
10313fcmp_dnrm_sd:
10314	mov.w		DST_EX(%a1),FP_SCR1_EX(%a6)
10315	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
10316	mov.l		DST_HI(%a1),%d0
10317	bset		&31,%d0			# DENORM dst; make into small norm
10318	mov.l		%d0,FP_SCR1_HI(%a6)
10319	mov.l		SRC_HI(%a0),%d0
10320	bset		&31,%d0			# DENORM dst; make into small norm
10321	mov.l		%d0,FP_SCR0_HI(%a6)
10322	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
10323	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
10324	lea		FP_SCR1(%a6),%a1
10325	lea		FP_SCR0(%a6),%a0
10326	bra.w		fcmp_norm
10327
10328fcmp_nrm_dnrm:
10329	mov.b		SRC_EX(%a0),%d0		# determine if like signs
10330	mov.b		DST_EX(%a1),%d1
10331	eor.b		%d0,%d1
10332	bmi.w		fcmp_dnrm_s
10333
10334# signs are the same, so must determine the answer ourselves.
10335	tst.b		%d0			# is src op negative?
10336	bmi.b		fcmp_nrm_dnrm_m		# yes
10337	rts
10338fcmp_nrm_dnrm_m:
10339	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'Z' ccode bit
10340	rts
10341
10342fcmp_dnrm_nrm:
10343	mov.b		SRC_EX(%a0),%d0		# determine if like signs
10344	mov.b		DST_EX(%a1),%d1
10345	eor.b		%d0,%d1
10346	bmi.w		fcmp_dnrm_d
10347
10348# signs are the same, so must determine the answer ourselves.
10349	tst.b		%d0			# is src op negative?
10350	bpl.b		fcmp_dnrm_nrm_m		# no
10351	rts
10352fcmp_dnrm_nrm_m:
10353	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'Z' ccode bit
10354	rts
10355
10356#########################################################################
10357# XDEF ****************************************************************	#
10358#	fsglmul(): emulates the fsglmul instruction			#
10359#									#
10360# XREF ****************************************************************	#
10361#	scale_to_zero_src() - scale src exponent to zero		#
10362#	scale_to_zero_dst() - scale dst exponent to zero		#
10363#	unf_res4() - return default underflow result for sglop		#
10364#	ovf_res() - return default overflow result			#
10365#	res_qnan() - return QNAN result					#
10366#	res_snan() - return SNAN result					#
10367#									#
10368# INPUT ***************************************************************	#
10369#	a0 = pointer to extended precision source operand		#
10370#	a1 = pointer to extended precision destination operand		#
10371#	d0  rnd prec,mode						#
10372#									#
10373# OUTPUT **************************************************************	#
10374#	fp0 = result							#
10375#	fp1 = EXOP (if exception occurred)				#
10376#									#
10377# ALGORITHM ***********************************************************	#
10378#	Handle NANs, infinities, and zeroes as special cases. Divide	#
10379# norms/denorms into ext/sgl/dbl precision.				#
10380#	For norms/denorms, scale the exponents such that a multiply	#
10381# instruction won't cause an exception. Use the regular fsglmul to	#
10382# compute a result. Check if the regular operands would have taken	#
10383# an exception. If so, return the default overflow/underflow result	#
10384# and return the EXOP if exceptions are enabled. Else, scale the	#
10385# result operand to the proper exponent.				#
10386#									#
10387#########################################################################
10388
10389	global		fsglmul
10390fsglmul:
10391	mov.l		%d0,L_SCR3(%a6)		# store rnd info
10392
10393	clr.w		%d1
10394	mov.b		DTAG(%a6),%d1
10395	lsl.b		&0x3,%d1
10396	or.b		STAG(%a6),%d1
10397
10398	bne.w		fsglmul_not_norm	# optimize on non-norm input
10399
10400fsglmul_norm:
10401	mov.w		DST_EX(%a1),FP_SCR1_EX(%a6)
10402	mov.l		DST_HI(%a1),FP_SCR1_HI(%a6)
10403	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
10404
10405	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
10406	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
10407	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
10408
10409	bsr.l		scale_to_zero_src	# scale exponent
10410	mov.l		%d0,-(%sp)		# save scale factor 1
10411
10412	bsr.l		scale_to_zero_dst	# scale dst exponent
10413
10414	add.l		(%sp)+,%d0		# SCALE_FACTOR = scale1 + scale2
10415
10416	cmpi.l		%d0,&0x3fff-0x7ffe	# would result ovfl?
10417	beq.w		fsglmul_may_ovfl	# result may rnd to overflow
10418	blt.w		fsglmul_ovfl		# result will overflow
10419
10420	cmpi.l		%d0,&0x3fff+0x0001	# would result unfl?
10421	beq.w		fsglmul_may_unfl	# result may rnd to no unfl
10422	bgt.w		fsglmul_unfl		# result will underflow
10423
10424fsglmul_normal:
10425	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
10426
10427	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
10428	fmov.l		&0x0,%fpsr		# clear FPSR
10429
10430	fsglmul.x	FP_SCR0(%a6),%fp0	# execute sgl multiply
10431
10432	fmov.l		%fpsr,%d1		# save status
10433	fmov.l		&0x0,%fpcr		# clear FPCR
10434
10435	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
10436
10437fsglmul_normal_exit:
10438	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
10439	mov.l		%d2,-(%sp)		# save d2
10440	mov.w		FP_SCR0_EX(%a6),%d1	# load {sgn,exp}
10441	mov.l		%d1,%d2			# make a copy
10442	andi.l		&0x7fff,%d1		# strip sign
10443	andi.w		&0x8000,%d2		# keep old sign
10444	sub.l		%d0,%d1			# add scale factor
10445	or.w		%d2,%d1			# concat old sign,new exp
10446	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
10447	mov.l		(%sp)+,%d2		# restore d2
10448	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
10449	rts
10450
10451fsglmul_ovfl:
10452	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
10453
10454	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
10455	fmov.l		&0x0,%fpsr		# clear FPSR
10456
10457	fsglmul.x	FP_SCR0(%a6),%fp0	# execute sgl multiply
10458
10459	fmov.l		%fpsr,%d1		# save status
10460	fmov.l		&0x0,%fpcr		# clear FPCR
10461
10462	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
10463
10464fsglmul_ovfl_tst:
10465
10466# save setting this until now because this is where fsglmul_may_ovfl may jump in
10467	or.l		&ovfl_inx_mask, USER_FPSR(%a6) # set ovfl/aovfl/ainex
10468
10469	mov.b		FPCR_ENABLE(%a6),%d1
10470	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
10471	bne.b		fsglmul_ovfl_ena	# yes
10472
10473fsglmul_ovfl_dis:
10474	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
10475	sne		%d1			# set sign param accordingly
10476	mov.l		L_SCR3(%a6),%d0		# pass prec:rnd
10477	andi.b		&0x30,%d0		# force prec = ext
10478	bsr.l		ovf_res			# calculate default result
10479	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
10480	fmovm.x		(%a0),&0x80		# return default result in fp0
10481	rts
10482
10483fsglmul_ovfl_ena:
10484	fmovm.x		&0x80,FP_SCR0(%a6)	# move result to stack
10485
10486	mov.l		%d2,-(%sp)		# save d2
10487	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
10488	mov.l		%d1,%d2			# make a copy
10489	andi.l		&0x7fff,%d1		# strip sign
10490	sub.l		%d0,%d1			# add scale factor
10491	subi.l		&0x6000,%d1		# subtract bias
10492	andi.w		&0x7fff,%d1
10493	andi.w		&0x8000,%d2		# keep old sign
10494	or.w		%d2,%d1			# concat old sign,new exp
10495	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
10496	mov.l		(%sp)+,%d2		# restore d2
10497	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
10498	bra.b		fsglmul_ovfl_dis
10499
10500fsglmul_may_ovfl:
10501	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
10502
10503	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
10504	fmov.l		&0x0,%fpsr		# clear FPSR
10505
10506	fsglmul.x	FP_SCR0(%a6),%fp0	# execute sgl multiply
10507
10508	fmov.l		%fpsr,%d1		# save status
10509	fmov.l		&0x0,%fpcr		# clear FPCR
10510
10511	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
10512
10513	fabs.x		%fp0,%fp1		# make a copy of result
10514	fcmp.b		%fp1,&0x2		# is |result| >= 2.b?
10515	fbge.w		fsglmul_ovfl_tst	# yes; overflow has occurred
10516
10517# no, it didn't overflow; we have correct result
10518	bra.w		fsglmul_normal_exit
10519
10520fsglmul_unfl:
10521	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
10522
10523	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
10524
10525	fmov.l		&rz_mode*0x10,%fpcr	# set FPCR
10526	fmov.l		&0x0,%fpsr		# clear FPSR
10527
10528	fsglmul.x	FP_SCR0(%a6),%fp0	# execute sgl multiply
10529
10530	fmov.l		%fpsr,%d1		# save status
10531	fmov.l		&0x0,%fpcr		# clear FPCR
10532
10533	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
10534
10535	mov.b		FPCR_ENABLE(%a6),%d1
10536	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
10537	bne.b		fsglmul_unfl_ena	# yes
10538
10539fsglmul_unfl_dis:
10540	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
10541
10542	lea		FP_SCR0(%a6),%a0	# pass: result addr
10543	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
10544	bsr.l		unf_res4		# calculate default result
10545	or.b		%d0,FPSR_CC(%a6)	# 'Z' bit may have been set
10546	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
10547	rts
10548
10549#
10550# UNFL is enabled.
10551#
10552fsglmul_unfl_ena:
10553	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op
10554
10555	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
10556	fmov.l		&0x0,%fpsr		# clear FPSR
10557
10558	fsglmul.x	FP_SCR0(%a6),%fp1	# execute sgl multiply
10559
10560	fmov.l		&0x0,%fpcr		# clear FPCR
10561
10562	fmovm.x		&0x40,FP_SCR0(%a6)	# save result to stack
10563	mov.l		%d2,-(%sp)		# save d2
10564	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
10565	mov.l		%d1,%d2			# make a copy
10566	andi.l		&0x7fff,%d1		# strip sign
10567	andi.w		&0x8000,%d2		# keep old sign
10568	sub.l		%d0,%d1			# add scale factor
10569	addi.l		&0x6000,%d1		# add bias
10570	andi.w		&0x7fff,%d1
10571	or.w		%d2,%d1			# concat old sign,new exp
10572	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
10573	mov.l		(%sp)+,%d2		# restore d2
10574	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
10575	bra.w		fsglmul_unfl_dis
10576
10577fsglmul_may_unfl:
10578	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
10579
10580	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
10581	fmov.l		&0x0,%fpsr		# clear FPSR
10582
10583	fsglmul.x	FP_SCR0(%a6),%fp0	# execute sgl multiply
10584
10585	fmov.l		%fpsr,%d1		# save status
10586	fmov.l		&0x0,%fpcr		# clear FPCR
10587
10588	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
10589
10590	fabs.x		%fp0,%fp1		# make a copy of result
10591	fcmp.b		%fp1,&0x2		# is |result| > 2.b?
10592	fbgt.w		fsglmul_normal_exit	# no; no underflow occurred
10593	fblt.w		fsglmul_unfl		# yes; underflow occurred
10594
10595#
10596# we still don't know if underflow occurred. result is ~ equal to 2. but,
10597# we don't know if the result was an underflow that rounded up to a 2 or
10598# a normalized number that rounded down to a 2. so, redo the entire operation
10599# using RZ as the rounding mode to see what the pre-rounded result is.
10600# this case should be relatively rare.
10601#
10602	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op into fp1
10603
10604	mov.l		L_SCR3(%a6),%d1
10605	andi.b		&0xc0,%d1		# keep rnd prec
10606	ori.b		&rz_mode*0x10,%d1	# insert RZ
10607
10608	fmov.l		%d1,%fpcr		# set FPCR
10609	fmov.l		&0x0,%fpsr		# clear FPSR
10610
10611	fsglmul.x	FP_SCR0(%a6),%fp1	# execute sgl multiply
10612
10613	fmov.l		&0x0,%fpcr		# clear FPCR
10614	fabs.x		%fp1			# make absolute value
10615	fcmp.b		%fp1,&0x2		# is |result| < 2.b?
10616	fbge.w		fsglmul_normal_exit	# no; no underflow occurred
10617	bra.w		fsglmul_unfl		# yes, underflow occurred
10618
10619##############################################################################
10620
10621#
10622# Single Precision Multiply: inputs are not both normalized; what are they?
10623#
10624fsglmul_not_norm:
10625	mov.w		(tbl_fsglmul_op.b,%pc,%d1.w*2),%d1
10626	jmp		(tbl_fsglmul_op.b,%pc,%d1.w*1)
10627
10628	swbeg		&48
10629tbl_fsglmul_op:
10630	short		fsglmul_norm		- tbl_fsglmul_op # NORM x NORM
10631	short		fsglmul_zero		- tbl_fsglmul_op # NORM x ZERO
10632	short		fsglmul_inf_src		- tbl_fsglmul_op # NORM x INF
10633	short		fsglmul_res_qnan	- tbl_fsglmul_op # NORM x QNAN
10634	short		fsglmul_norm		- tbl_fsglmul_op # NORM x DENORM
10635	short		fsglmul_res_snan	- tbl_fsglmul_op # NORM x SNAN
10636	short		tbl_fsglmul_op		- tbl_fsglmul_op #
10637	short		tbl_fsglmul_op		- tbl_fsglmul_op #
10638
10639	short		fsglmul_zero		- tbl_fsglmul_op # ZERO x NORM
10640	short		fsglmul_zero		- tbl_fsglmul_op # ZERO x ZERO
10641	short		fsglmul_res_operr	- tbl_fsglmul_op # ZERO x INF
10642	short		fsglmul_res_qnan	- tbl_fsglmul_op # ZERO x QNAN
10643	short		fsglmul_zero		- tbl_fsglmul_op # ZERO x DENORM
10644	short		fsglmul_res_snan	- tbl_fsglmul_op # ZERO x SNAN
10645	short		tbl_fsglmul_op		- tbl_fsglmul_op #
10646	short		tbl_fsglmul_op		- tbl_fsglmul_op #
10647
10648	short		fsglmul_inf_dst		- tbl_fsglmul_op # INF x NORM
10649	short		fsglmul_res_operr	- tbl_fsglmul_op # INF x ZERO
10650	short		fsglmul_inf_dst		- tbl_fsglmul_op # INF x INF
10651	short		fsglmul_res_qnan	- tbl_fsglmul_op # INF x QNAN
10652	short		fsglmul_inf_dst		- tbl_fsglmul_op # INF x DENORM
10653	short		fsglmul_res_snan	- tbl_fsglmul_op # INF x SNAN
10654	short		tbl_fsglmul_op		- tbl_fsglmul_op #
10655	short		tbl_fsglmul_op		- tbl_fsglmul_op #
10656
10657	short		fsglmul_res_qnan	- tbl_fsglmul_op # QNAN x NORM
10658	short		fsglmul_res_qnan	- tbl_fsglmul_op # QNAN x ZERO
10659	short		fsglmul_res_qnan	- tbl_fsglmul_op # QNAN x INF
10660	short		fsglmul_res_qnan	- tbl_fsglmul_op # QNAN x QNAN
10661	short		fsglmul_res_qnan	- tbl_fsglmul_op # QNAN x DENORM
10662	short		fsglmul_res_snan	- tbl_fsglmul_op # QNAN x SNAN
10663	short		tbl_fsglmul_op		- tbl_fsglmul_op #
10664	short		tbl_fsglmul_op		- tbl_fsglmul_op #
10665
10666	short		fsglmul_norm		- tbl_fsglmul_op # NORM x NORM
10667	short		fsglmul_zero		- tbl_fsglmul_op # NORM x ZERO
10668	short		fsglmul_inf_src		- tbl_fsglmul_op # NORM x INF
10669	short		fsglmul_res_qnan	- tbl_fsglmul_op # NORM x QNAN
10670	short		fsglmul_norm		- tbl_fsglmul_op # NORM x DENORM
10671	short		fsglmul_res_snan	- tbl_fsglmul_op # NORM x SNAN
10672	short		tbl_fsglmul_op		- tbl_fsglmul_op #
10673	short		tbl_fsglmul_op		- tbl_fsglmul_op #
10674
10675	short		fsglmul_res_snan	- tbl_fsglmul_op # SNAN x NORM
10676	short		fsglmul_res_snan	- tbl_fsglmul_op # SNAN x ZERO
10677	short		fsglmul_res_snan	- tbl_fsglmul_op # SNAN x INF
10678	short		fsglmul_res_snan	- tbl_fsglmul_op # SNAN x QNAN
10679	short		fsglmul_res_snan	- tbl_fsglmul_op # SNAN x DENORM
10680	short		fsglmul_res_snan	- tbl_fsglmul_op # SNAN x SNAN
10681	short		tbl_fsglmul_op		- tbl_fsglmul_op #
10682	short		tbl_fsglmul_op		- tbl_fsglmul_op #
10683
10684fsglmul_res_operr:
10685	bra.l		res_operr
10686fsglmul_res_snan:
10687	bra.l		res_snan
10688fsglmul_res_qnan:
10689	bra.l		res_qnan
10690fsglmul_zero:
10691	bra.l		fmul_zero
10692fsglmul_inf_src:
10693	bra.l		fmul_inf_src
10694fsglmul_inf_dst:
10695	bra.l		fmul_inf_dst
10696
10697#########################################################################
10698# XDEF ****************************************************************	#
10699#	fsgldiv(): emulates the fsgldiv instruction			#
10700#									#
10701# XREF ****************************************************************	#
10702#	scale_to_zero_src() - scale src exponent to zero		#
10703#	scale_to_zero_dst() - scale dst exponent to zero		#
10704#	unf_res4() - return default underflow result for sglop		#
10705#	ovf_res() - return default overflow result			#
10706#	res_qnan() - return QNAN result					#
10707#	res_snan() - return SNAN result					#
10708#									#
10709# INPUT ***************************************************************	#
10710#	a0 = pointer to extended precision source operand		#
10711#	a1 = pointer to extended precision destination operand		#
10712#	d0  rnd prec,mode						#
10713#									#
10714# OUTPUT **************************************************************	#
10715#	fp0 = result							#
10716#	fp1 = EXOP (if exception occurred)				#
10717#									#
10718# ALGORITHM ***********************************************************	#
10719#	Handle NANs, infinities, and zeroes as special cases. Divide	#
10720# norms/denorms into ext/sgl/dbl precision.				#
10721#	For norms/denorms, scale the exponents such that a divide	#
10722# instruction won't cause an exception. Use the regular fsgldiv to	#
10723# compute a result. Check if the regular operands would have taken	#
10724# an exception. If so, return the default overflow/underflow result	#
10725# and return the EXOP if exceptions are enabled. Else, scale the	#
10726# result operand to the proper exponent.				#
10727#									#
10728#########################################################################
10729
10730	global		fsgldiv
10731fsgldiv:
10732	mov.l		%d0,L_SCR3(%a6)		# store rnd info
10733
10734	clr.w		%d1
10735	mov.b		DTAG(%a6),%d1
10736	lsl.b		&0x3,%d1
10737	or.b		STAG(%a6),%d1		# combine src tags
10738
10739	bne.w		fsgldiv_not_norm	# optimize on non-norm input
10740
10741#
10742# DIVIDE: NORMs and DENORMs ONLY!
10743#
10744fsgldiv_norm:
10745	mov.w		DST_EX(%a1),FP_SCR1_EX(%a6)
10746	mov.l		DST_HI(%a1),FP_SCR1_HI(%a6)
10747	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
10748
10749	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
10750	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
10751	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
10752
10753	bsr.l		scale_to_zero_src	# calculate scale factor 1
10754	mov.l		%d0,-(%sp)		# save scale factor 1
10755
10756	bsr.l		scale_to_zero_dst	# calculate scale factor 2
10757
10758	neg.l		(%sp)			# S.F. = scale1 - scale2
10759	add.l		%d0,(%sp)
10760
10761	mov.w		2+L_SCR3(%a6),%d1	# fetch precision,mode
10762	lsr.b		&0x6,%d1
10763	mov.l		(%sp)+,%d0
10764	cmpi.l		%d0,&0x3fff-0x7ffe
10765	ble.w		fsgldiv_may_ovfl
10766
10767	cmpi.l		%d0,&0x3fff-0x0000	# will result underflow?
10768	beq.w		fsgldiv_may_unfl	# maybe
10769	bgt.w		fsgldiv_unfl		# yes; go handle underflow
10770
10771fsgldiv_normal:
10772	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
10773
10774	fmov.l		L_SCR3(%a6),%fpcr	# save FPCR
10775	fmov.l		&0x0,%fpsr		# clear FPSR
10776
10777	fsgldiv.x	FP_SCR0(%a6),%fp0	# perform sgl divide
10778
10779	fmov.l		%fpsr,%d1		# save FPSR
10780	fmov.l		&0x0,%fpcr		# clear FPCR
10781
10782	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
10783
10784fsgldiv_normal_exit:
10785	fmovm.x		&0x80,FP_SCR0(%a6)	# store result on stack
10786	mov.l		%d2,-(%sp)		# save d2
10787	mov.w		FP_SCR0_EX(%a6),%d1	# load {sgn,exp}
10788	mov.l		%d1,%d2			# make a copy
10789	andi.l		&0x7fff,%d1		# strip sign
10790	andi.w		&0x8000,%d2		# keep old sign
10791	sub.l		%d0,%d1			# add scale factor
10792	or.w		%d2,%d1			# concat old sign,new exp
10793	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
10794	mov.l		(%sp)+,%d2		# restore d2
10795	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
10796	rts
10797
10798fsgldiv_may_ovfl:
10799	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
10800
10801	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
10802	fmov.l		&0x0,%fpsr		# set FPSR
10803
10804	fsgldiv.x	FP_SCR0(%a6),%fp0	# execute divide
10805
10806	fmov.l		%fpsr,%d1
10807	fmov.l		&0x0,%fpcr
10808
10809	or.l		%d1,USER_FPSR(%a6)	# save INEX,N
10810
10811	fmovm.x		&0x01,-(%sp)		# save result to stack
10812	mov.w		(%sp),%d1		# fetch new exponent
10813	add.l		&0xc,%sp		# clear result
10814	andi.l		&0x7fff,%d1		# strip sign
10815	sub.l		%d0,%d1			# add scale factor
10816	cmp.l		%d1,&0x7fff		# did divide overflow?
10817	blt.b		fsgldiv_normal_exit
10818
10819fsgldiv_ovfl_tst:
10820	or.w		&ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex
10821
10822	mov.b		FPCR_ENABLE(%a6),%d1
10823	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
10824	bne.b		fsgldiv_ovfl_ena	# yes
10825
10826fsgldiv_ovfl_dis:
10827	btst		&neg_bit,FPSR_CC(%a6)	# is result negative
10828	sne		%d1			# set sign param accordingly
10829	mov.l		L_SCR3(%a6),%d0		# pass prec:rnd
10830	andi.b		&0x30,%d0		# kill precision
10831	bsr.l		ovf_res			# calculate default result
10832	or.b		%d0,FPSR_CC(%a6)	# set INF if applicable
10833	fmovm.x		(%a0),&0x80		# return default result in fp0
10834	rts
10835
10836fsgldiv_ovfl_ena:
10837	fmovm.x		&0x80,FP_SCR0(%a6)	# move result to stack
10838
10839	mov.l		%d2,-(%sp)		# save d2
10840	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
10841	mov.l		%d1,%d2			# make a copy
10842	andi.l		&0x7fff,%d1		# strip sign
10843	andi.w		&0x8000,%d2		# keep old sign
10844	sub.l		%d0,%d1			# add scale factor
10845	subi.l		&0x6000,%d1		# subtract new bias
10846	andi.w		&0x7fff,%d1		# clear ms bit
10847	or.w		%d2,%d1			# concat old sign,new exp
10848	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
10849	mov.l		(%sp)+,%d2		# restore d2
10850	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
10851	bra.b		fsgldiv_ovfl_dis
10852
10853fsgldiv_unfl:
10854	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
10855
10856	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
10857
10858	fmov.l		&rz_mode*0x10,%fpcr	# set FPCR
10859	fmov.l		&0x0,%fpsr		# clear FPSR
10860
10861	fsgldiv.x	FP_SCR0(%a6),%fp0	# execute sgl divide
10862
10863	fmov.l		%fpsr,%d1		# save status
10864	fmov.l		&0x0,%fpcr		# clear FPCR
10865
10866	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
10867
10868	mov.b		FPCR_ENABLE(%a6),%d1
10869	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
10870	bne.b		fsgldiv_unfl_ena	# yes
10871
10872fsgldiv_unfl_dis:
10873	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
10874
10875	lea		FP_SCR0(%a6),%a0	# pass: result addr
10876	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
10877	bsr.l		unf_res4		# calculate default result
10878	or.b		%d0,FPSR_CC(%a6)	# 'Z' bit may have been set
10879	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
10880	rts
10881
10882#
10883# UNFL is enabled.
10884#
10885fsgldiv_unfl_ena:
10886	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op
10887
10888	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
10889	fmov.l		&0x0,%fpsr		# clear FPSR
10890
10891	fsgldiv.x	FP_SCR0(%a6),%fp1	# execute sgl divide
10892
10893	fmov.l		&0x0,%fpcr		# clear FPCR
10894
10895	fmovm.x		&0x40,FP_SCR0(%a6)	# save result to stack
10896	mov.l		%d2,-(%sp)		# save d2
10897	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
10898	mov.l		%d1,%d2			# make a copy
10899	andi.l		&0x7fff,%d1		# strip sign
10900	andi.w		&0x8000,%d2		# keep old sign
10901	sub.l		%d0,%d1			# add scale factor
10902	addi.l		&0x6000,%d1		# add bias
10903	andi.w		&0x7fff,%d1		# clear top bit
10904	or.w		%d2,%d1			# concat old sign, new exp
10905	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
10906	mov.l		(%sp)+,%d2		# restore d2
10907	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
10908	bra.b		fsgldiv_unfl_dis
10909
10910#
10911# the divide operation MAY underflow:
10912#
10913fsgldiv_may_unfl:
10914	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
10915
10916	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
10917	fmov.l		&0x0,%fpsr		# clear FPSR
10918
10919	fsgldiv.x	FP_SCR0(%a6),%fp0	# execute sgl divide
10920
10921	fmov.l		%fpsr,%d1		# save status
10922	fmov.l		&0x0,%fpcr		# clear FPCR
10923
10924	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
10925
10926	fabs.x		%fp0,%fp1		# make a copy of result
10927	fcmp.b		%fp1,&0x1		# is |result| > 1.b?
10928	fbgt.w		fsgldiv_normal_exit	# no; no underflow occurred
10929	fblt.w		fsgldiv_unfl		# yes; underflow occurred
10930
10931#
10932# we still don't know if underflow occurred. result is ~ equal to 1. but,
10933# we don't know if the result was an underflow that rounded up to a 1
10934# or a normalized number that rounded down to a 1. so, redo the entire
10935# operation using RZ as the rounding mode to see what the pre-rounded
10936# result is. this case should be relatively rare.
10937#
10938	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op into %fp1
10939
10940	clr.l		%d1			# clear scratch register
10941	ori.b		&rz_mode*0x10,%d1	# force RZ rnd mode
10942
10943	fmov.l		%d1,%fpcr		# set FPCR
10944	fmov.l		&0x0,%fpsr		# clear FPSR
10945
10946	fsgldiv.x	FP_SCR0(%a6),%fp1	# execute sgl divide
10947
10948	fmov.l		&0x0,%fpcr		# clear FPCR
10949	fabs.x		%fp1			# make absolute value
10950	fcmp.b		%fp1,&0x1		# is |result| < 1.b?
10951	fbge.w		fsgldiv_normal_exit	# no; no underflow occurred
10952	bra.w		fsgldiv_unfl		# yes; underflow occurred
10953
10954############################################################################
10955
10956#
10957# Divide: inputs are not both normalized; what are they?
10958#
10959fsgldiv_not_norm:
10960	mov.w		(tbl_fsgldiv_op.b,%pc,%d1.w*2),%d1
10961	jmp		(tbl_fsgldiv_op.b,%pc,%d1.w*1)
10962
10963	swbeg		&48
10964tbl_fsgldiv_op:
10965	short		fsgldiv_norm		- tbl_fsgldiv_op # NORM / NORM
10966	short		fsgldiv_inf_load	- tbl_fsgldiv_op # NORM / ZERO
10967	short		fsgldiv_zero_load	- tbl_fsgldiv_op # NORM / INF
10968	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # NORM / QNAN
10969	short		fsgldiv_norm		- tbl_fsgldiv_op # NORM / DENORM
10970	short		fsgldiv_res_snan	- tbl_fsgldiv_op # NORM / SNAN
10971	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
10972	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
10973
10974	short		fsgldiv_zero_load	- tbl_fsgldiv_op # ZERO / NORM
10975	short		fsgldiv_res_operr	- tbl_fsgldiv_op # ZERO / ZERO
10976	short		fsgldiv_zero_load	- tbl_fsgldiv_op # ZERO / INF
10977	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # ZERO / QNAN
10978	short		fsgldiv_zero_load	- tbl_fsgldiv_op # ZERO / DENORM
10979	short		fsgldiv_res_snan	- tbl_fsgldiv_op # ZERO / SNAN
10980	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
10981	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
10982
10983	short		fsgldiv_inf_dst		- tbl_fsgldiv_op # INF / NORM
10984	short		fsgldiv_inf_dst		- tbl_fsgldiv_op # INF / ZERO
10985	short		fsgldiv_res_operr	- tbl_fsgldiv_op # INF / INF
10986	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # INF / QNAN
10987	short		fsgldiv_inf_dst		- tbl_fsgldiv_op # INF / DENORM
10988	short		fsgldiv_res_snan	- tbl_fsgldiv_op # INF / SNAN
10989	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
10990	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
10991
10992	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # QNAN / NORM
10993	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # QNAN / ZERO
10994	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # QNAN / INF
10995	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # QNAN / QNAN
10996	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # QNAN / DENORM
10997	short		fsgldiv_res_snan	- tbl_fsgldiv_op # QNAN / SNAN
10998	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
10999	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
11000
11001	short		fsgldiv_norm		- tbl_fsgldiv_op # DENORM / NORM
11002	short		fsgldiv_inf_load	- tbl_fsgldiv_op # DENORM / ZERO
11003	short		fsgldiv_zero_load	- tbl_fsgldiv_op # DENORM / INF
11004	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # DENORM / QNAN
11005	short		fsgldiv_norm		- tbl_fsgldiv_op # DENORM / DENORM
11006	short		fsgldiv_res_snan	- tbl_fsgldiv_op # DENORM / SNAN
11007	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
11008	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
11009
11010	short		fsgldiv_res_snan	- tbl_fsgldiv_op # SNAN / NORM
11011	short		fsgldiv_res_snan	- tbl_fsgldiv_op # SNAN / ZERO
11012	short		fsgldiv_res_snan	- tbl_fsgldiv_op # SNAN / INF
11013	short		fsgldiv_res_snan	- tbl_fsgldiv_op # SNAN / QNAN
11014	short		fsgldiv_res_snan	- tbl_fsgldiv_op # SNAN / DENORM
11015	short		fsgldiv_res_snan	- tbl_fsgldiv_op # SNAN / SNAN
11016	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
11017	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
11018
11019fsgldiv_res_qnan:
11020	bra.l		res_qnan
11021fsgldiv_res_snan:
11022	bra.l		res_snan
11023fsgldiv_res_operr:
11024	bra.l		res_operr
11025fsgldiv_inf_load:
11026	bra.l		fdiv_inf_load
11027fsgldiv_zero_load:
11028	bra.l		fdiv_zero_load
11029fsgldiv_inf_dst:
11030	bra.l		fdiv_inf_dst
11031
11032#########################################################################
11033# XDEF ****************************************************************	#
11034#	fadd(): emulates the fadd instruction				#
11035#	fsadd(): emulates the fadd instruction				#
11036#	fdadd(): emulates the fdadd instruction				#
11037#									#
11038# XREF ****************************************************************	#
11039#	addsub_scaler2() - scale the operands so they won't take exc	#
11040#	ovf_res() - return default overflow result			#
11041#	unf_res() - return default underflow result			#
11042#	res_qnan() - set QNAN result					#
11043#	res_snan() - set SNAN result					#
11044#	res_operr() - set OPERR result					#
11045#	scale_to_zero_src() - set src operand exponent equal to zero	#
11046#	scale_to_zero_dst() - set dst operand exponent equal to zero	#
11047#									#
11048# INPUT ***************************************************************	#
11049#	a0 = pointer to extended precision source operand		#
11050#	a1 = pointer to extended precision destination operand		#
11051#									#
11052# OUTPUT **************************************************************	#
11053#	fp0 = result							#
11054#	fp1 = EXOP (if exception occurred)				#
11055#									#
11056# ALGORITHM ***********************************************************	#
11057#	Handle NANs, infinities, and zeroes as special cases. Divide	#
11058# norms into extended, single, and double precision.			#
11059#	Do addition after scaling exponents such that exception won't	#
11060# occur. Then, check result exponent to see if exception would have	#
11061# occurred. If so, return default result and maybe EXOP. Else, insert	#
11062# the correct result exponent and return. Set FPSR bits as appropriate.	#
11063#									#
11064#########################################################################
11065
11066	global		fsadd
11067fsadd:
11068	andi.b		&0x30,%d0		# clear rnd prec
11069	ori.b		&s_mode*0x10,%d0	# insert sgl prec
11070	bra.b		fadd
11071
11072	global		fdadd
11073fdadd:
11074	andi.b		&0x30,%d0		# clear rnd prec
11075	ori.b		&d_mode*0x10,%d0	# insert dbl prec
11076
11077	global		fadd
11078fadd:
11079	mov.l		%d0,L_SCR3(%a6)		# store rnd info
11080
11081	clr.w		%d1
11082	mov.b		DTAG(%a6),%d1
11083	lsl.b		&0x3,%d1
11084	or.b		STAG(%a6),%d1		# combine src tags
11085
11086	bne.w		fadd_not_norm		# optimize on non-norm input
11087
11088#
11089# ADD: norms and denorms
11090#
11091fadd_norm:
11092	bsr.l		addsub_scaler2		# scale exponents
11093
11094fadd_zero_entry:
11095	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
11096
11097	fmov.l		&0x0,%fpsr		# clear FPSR
11098	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
11099
11100	fadd.x		FP_SCR0(%a6),%fp0	# execute add
11101
11102	fmov.l		&0x0,%fpcr		# clear FPCR
11103	fmov.l		%fpsr,%d1		# fetch INEX2,N,Z
11104
11105	or.l		%d1,USER_FPSR(%a6)	# save exc and ccode bits
11106
11107	fbeq.w		fadd_zero_exit		# if result is zero, end now
11108
11109	mov.l		%d2,-(%sp)		# save d2
11110
11111	fmovm.x		&0x01,-(%sp)		# save result to stack
11112
11113	mov.w		2+L_SCR3(%a6),%d1
11114	lsr.b		&0x6,%d1
11115
11116	mov.w		(%sp),%d2		# fetch new sign, exp
11117	andi.l		&0x7fff,%d2		# strip sign
11118	sub.l		%d0,%d2			# add scale factor
11119
11120	cmp.l		%d2,(tbl_fadd_ovfl.b,%pc,%d1.w*4) # is it an overflow?
11121	bge.b		fadd_ovfl		# yes
11122
11123	cmp.l		%d2,(tbl_fadd_unfl.b,%pc,%d1.w*4) # is it an underflow?
11124	blt.w		fadd_unfl		# yes
11125	beq.w		fadd_may_unfl		# maybe; go find out
11126
11127fadd_normal:
11128	mov.w		(%sp),%d1
11129	andi.w		&0x8000,%d1		# keep sign
11130	or.w		%d2,%d1			# concat sign,new exp
11131	mov.w		%d1,(%sp)		# insert new exponent
11132
11133	fmovm.x		(%sp)+,&0x80		# return result in fp0
11134
11135	mov.l		(%sp)+,%d2		# restore d2
11136	rts
11137
11138fadd_zero_exit:
11139#	fmov.s		&0x00000000,%fp0	# return zero in fp0
11140	rts
11141
11142tbl_fadd_ovfl:
11143	long		0x7fff			# ext ovfl
11144	long		0x407f			# sgl ovfl
11145	long		0x43ff			# dbl ovfl
11146
11147tbl_fadd_unfl:
11148	long	        0x0000			# ext unfl
11149	long		0x3f81			# sgl unfl
11150	long		0x3c01			# dbl unfl
11151
11152fadd_ovfl:
11153	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
11154
11155	mov.b		FPCR_ENABLE(%a6),%d1
11156	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
11157	bne.b		fadd_ovfl_ena		# yes
11158
11159	add.l		&0xc,%sp
11160fadd_ovfl_dis:
11161	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
11162	sne		%d1			# set sign param accordingly
11163	mov.l		L_SCR3(%a6),%d0		# pass prec:rnd
11164	bsr.l		ovf_res			# calculate default result
11165	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
11166	fmovm.x		(%a0),&0x80		# return default result in fp0
11167	mov.l		(%sp)+,%d2		# restore d2
11168	rts
11169
11170fadd_ovfl_ena:
11171	mov.b		L_SCR3(%a6),%d1
11172	andi.b		&0xc0,%d1		# is precision extended?
11173	bne.b		fadd_ovfl_ena_sd	# no; prec = sgl or dbl
11174
11175fadd_ovfl_ena_cont:
11176	mov.w		(%sp),%d1
11177	andi.w		&0x8000,%d1		# keep sign
11178	subi.l		&0x6000,%d2		# add extra bias
11179	andi.w		&0x7fff,%d2
11180	or.w		%d2,%d1			# concat sign,new exp
11181	mov.w		%d1,(%sp)		# insert new exponent
11182
11183	fmovm.x		(%sp)+,&0x40		# return EXOP in fp1
11184	bra.b		fadd_ovfl_dis
11185
11186fadd_ovfl_ena_sd:
11187	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
11188
11189	mov.l		L_SCR3(%a6),%d1
11190	andi.b		&0x30,%d1		# keep rnd mode
11191	fmov.l		%d1,%fpcr		# set FPCR
11192
11193	fadd.x		FP_SCR0(%a6),%fp0	# execute add
11194
11195	fmov.l		&0x0,%fpcr		# clear FPCR
11196
11197	add.l		&0xc,%sp
11198	fmovm.x		&0x01,-(%sp)
11199	bra.b		fadd_ovfl_ena_cont
11200
11201fadd_unfl:
11202	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
11203
11204	add.l		&0xc,%sp
11205
11206	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
11207
11208	fmov.l		&rz_mode*0x10,%fpcr	# set FPCR
11209	fmov.l		&0x0,%fpsr		# clear FPSR
11210
11211	fadd.x		FP_SCR0(%a6),%fp0	# execute add
11212
11213	fmov.l		&0x0,%fpcr		# clear FPCR
11214	fmov.l		%fpsr,%d1		# save status
11215
11216	or.l		%d1,USER_FPSR(%a6)	# save INEX,N
11217
11218	mov.b		FPCR_ENABLE(%a6),%d1
11219	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
11220	bne.b		fadd_unfl_ena		# yes
11221
11222fadd_unfl_dis:
11223	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
11224
11225	lea		FP_SCR0(%a6),%a0	# pass: result addr
11226	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
11227	bsr.l		unf_res			# calculate default result
11228	or.b		%d0,FPSR_CC(%a6)	# 'Z' bit may have been set
11229	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
11230	mov.l		(%sp)+,%d2		# restore d2
11231	rts
11232
11233fadd_unfl_ena:
11234	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op
11235
11236	mov.l		L_SCR3(%a6),%d1
11237	andi.b		&0xc0,%d1		# is precision extended?
11238	bne.b		fadd_unfl_ena_sd	# no; sgl or dbl
11239
11240	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
11241
11242fadd_unfl_ena_cont:
11243	fmov.l		&0x0,%fpsr		# clear FPSR
11244
11245	fadd.x		FP_SCR0(%a6),%fp1	# execute multiply
11246
11247	fmov.l		&0x0,%fpcr		# clear FPCR
11248
11249	fmovm.x		&0x40,FP_SCR0(%a6)	# save result to stack
11250	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
11251	mov.l		%d1,%d2			# make a copy
11252	andi.l		&0x7fff,%d1		# strip sign
11253	andi.w		&0x8000,%d2		# keep old sign
11254	sub.l		%d0,%d1			# add scale factor
11255	addi.l		&0x6000,%d1		# add new bias
11256	andi.w		&0x7fff,%d1		# clear top bit
11257	or.w		%d2,%d1			# concat sign,new exp
11258	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
11259	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
11260	bra.w		fadd_unfl_dis
11261
11262fadd_unfl_ena_sd:
11263	mov.l		L_SCR3(%a6),%d1
11264	andi.b		&0x30,%d1		# use only rnd mode
11265	fmov.l		%d1,%fpcr		# set FPCR
11266
11267	bra.b		fadd_unfl_ena_cont
11268
11269#
11270# result is equal to the smallest normalized number in the selected precision
11271# if the precision is extended, this result could not have come from an
11272# underflow that rounded up.
11273#
11274fadd_may_unfl:
11275	mov.l		L_SCR3(%a6),%d1
11276	andi.b		&0xc0,%d1
11277	beq.w		fadd_normal		# yes; no underflow occurred
11278
11279	mov.l		0x4(%sp),%d1		# extract hi(man)
11280	cmpi.l		%d1,&0x80000000		# is hi(man) = 0x80000000?
11281	bne.w		fadd_normal		# no; no underflow occurred
11282
11283	tst.l		0x8(%sp)		# is lo(man) = 0x0?
11284	bne.w		fadd_normal		# no; no underflow occurred
11285
11286	btst		&inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set?
11287	beq.w		fadd_normal		# no; no underflow occurred
11288
11289#
11290# ok, so now the result has a exponent equal to the smallest normalized
11291# exponent for the selected precision. also, the mantissa is equal to
11292# 0x8000000000000000 and this mantissa is the result of rounding non-zero
11293# g,r,s.
11294# now, we must determine whether the pre-rounded result was an underflow
11295# rounded "up" or a normalized number rounded "down".
11296# so, we do this be re-executing the add using RZ as the rounding mode and
11297# seeing if the new result is smaller or equal to the current result.
11298#
11299	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op into fp1
11300
11301	mov.l		L_SCR3(%a6),%d1
11302	andi.b		&0xc0,%d1		# keep rnd prec
11303	ori.b		&rz_mode*0x10,%d1	# insert rnd mode
11304	fmov.l		%d1,%fpcr		# set FPCR
11305	fmov.l		&0x0,%fpsr		# clear FPSR
11306
11307	fadd.x		FP_SCR0(%a6),%fp1	# execute add
11308
11309	fmov.l		&0x0,%fpcr		# clear FPCR
11310
11311	fabs.x		%fp0			# compare absolute values
11312	fabs.x		%fp1
11313	fcmp.x		%fp0,%fp1		# is first result > second?
11314
11315	fbgt.w		fadd_unfl		# yes; it's an underflow
11316	bra.w		fadd_normal		# no; it's not an underflow
11317
11318##########################################################################
11319
11320#
11321# Add: inputs are not both normalized; what are they?
11322#
11323fadd_not_norm:
11324	mov.w		(tbl_fadd_op.b,%pc,%d1.w*2),%d1
11325	jmp		(tbl_fadd_op.b,%pc,%d1.w*1)
11326
11327	swbeg		&48
11328tbl_fadd_op:
11329	short		fadd_norm	- tbl_fadd_op # NORM + NORM
11330	short		fadd_zero_src	- tbl_fadd_op # NORM + ZERO
11331	short		fadd_inf_src	- tbl_fadd_op # NORM + INF
11332	short		fadd_res_qnan	- tbl_fadd_op # NORM + QNAN
11333	short		fadd_norm	- tbl_fadd_op # NORM + DENORM
11334	short		fadd_res_snan	- tbl_fadd_op # NORM + SNAN
11335	short		tbl_fadd_op	- tbl_fadd_op #
11336	short		tbl_fadd_op	- tbl_fadd_op #
11337
11338	short		fadd_zero_dst	- tbl_fadd_op # ZERO + NORM
11339	short		fadd_zero_2	- tbl_fadd_op # ZERO + ZERO
11340	short		fadd_inf_src	- tbl_fadd_op # ZERO + INF
11341	short		fadd_res_qnan	- tbl_fadd_op # NORM + QNAN
11342	short		fadd_zero_dst	- tbl_fadd_op # ZERO + DENORM
11343	short		fadd_res_snan	- tbl_fadd_op # NORM + SNAN
11344	short		tbl_fadd_op	- tbl_fadd_op #
11345	short		tbl_fadd_op	- tbl_fadd_op #
11346
11347	short		fadd_inf_dst	- tbl_fadd_op # INF + NORM
11348	short		fadd_inf_dst	- tbl_fadd_op # INF + ZERO
11349	short		fadd_inf_2	- tbl_fadd_op # INF + INF
11350	short		fadd_res_qnan	- tbl_fadd_op # NORM + QNAN
11351	short		fadd_inf_dst	- tbl_fadd_op # INF + DENORM
11352	short		fadd_res_snan	- tbl_fadd_op # NORM + SNAN
11353	short		tbl_fadd_op	- tbl_fadd_op #
11354	short		tbl_fadd_op	- tbl_fadd_op #
11355
11356	short		fadd_res_qnan	- tbl_fadd_op # QNAN + NORM
11357	short		fadd_res_qnan	- tbl_fadd_op # QNAN + ZERO
11358	short		fadd_res_qnan	- tbl_fadd_op # QNAN + INF
11359	short		fadd_res_qnan	- tbl_fadd_op # QNAN + QNAN
11360	short		fadd_res_qnan	- tbl_fadd_op # QNAN + DENORM
11361	short		fadd_res_snan	- tbl_fadd_op # QNAN + SNAN
11362	short		tbl_fadd_op	- tbl_fadd_op #
11363	short		tbl_fadd_op	- tbl_fadd_op #
11364
11365	short		fadd_norm	- tbl_fadd_op # DENORM + NORM
11366	short		fadd_zero_src	- tbl_fadd_op # DENORM + ZERO
11367	short		fadd_inf_src	- tbl_fadd_op # DENORM + INF
11368	short		fadd_res_qnan	- tbl_fadd_op # NORM + QNAN
11369	short		fadd_norm	- tbl_fadd_op # DENORM + DENORM
11370	short		fadd_res_snan	- tbl_fadd_op # NORM + SNAN
11371	short		tbl_fadd_op	- tbl_fadd_op #
11372	short		tbl_fadd_op	- tbl_fadd_op #
11373
11374	short		fadd_res_snan	- tbl_fadd_op # SNAN + NORM
11375	short		fadd_res_snan	- tbl_fadd_op # SNAN + ZERO
11376	short		fadd_res_snan	- tbl_fadd_op # SNAN + INF
11377	short		fadd_res_snan	- tbl_fadd_op # SNAN + QNAN
11378	short		fadd_res_snan	- tbl_fadd_op # SNAN + DENORM
11379	short		fadd_res_snan	- tbl_fadd_op # SNAN + SNAN
11380	short		tbl_fadd_op	- tbl_fadd_op #
11381	short		tbl_fadd_op	- tbl_fadd_op #
11382
11383fadd_res_qnan:
11384	bra.l		res_qnan
11385fadd_res_snan:
11386	bra.l		res_snan
11387
11388#
11389# both operands are ZEROes
11390#
11391fadd_zero_2:
11392	mov.b		SRC_EX(%a0),%d0		# are the signs opposite
11393	mov.b		DST_EX(%a1),%d1
11394	eor.b		%d0,%d1
11395	bmi.w		fadd_zero_2_chk_rm	# weed out (-ZERO)+(+ZERO)
11396
11397# the signs are the same. so determine whether they are positive or negative
11398# and return the appropriately signed zero.
11399	tst.b		%d0			# are ZEROes positive or negative?
11400	bmi.b		fadd_zero_rm		# negative
11401	fmov.s		&0x00000000,%fp0	# return +ZERO
11402	mov.b		&z_bmask,FPSR_CC(%a6)	# set Z
11403	rts
11404
11405#
11406# the ZEROes have opposite signs:
11407# - Therefore, we return +ZERO if the rounding modes are RN,RZ, or RP.
11408# - -ZERO is returned in the case of RM.
11409#
11410fadd_zero_2_chk_rm:
11411	mov.b		3+L_SCR3(%a6),%d1
11412	andi.b		&0x30,%d1		# extract rnd mode
11413	cmpi.b		%d1,&rm_mode*0x10	# is rnd mode == RM?
11414	beq.b		fadd_zero_rm		# yes
11415	fmov.s		&0x00000000,%fp0	# return +ZERO
11416	mov.b		&z_bmask,FPSR_CC(%a6)	# set Z
11417	rts
11418
11419fadd_zero_rm:
11420	fmov.s		&0x80000000,%fp0	# return -ZERO
11421	mov.b		&neg_bmask+z_bmask,FPSR_CC(%a6) # set NEG/Z
11422	rts
11423
11424#
11425# one operand is a ZERO and the other is a DENORM or NORM. scale
11426# the DENORM or NORM and jump to the regular fadd routine.
11427#
11428fadd_zero_dst:
11429	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
11430	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
11431	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
11432	bsr.l		scale_to_zero_src	# scale the operand
11433	clr.w		FP_SCR1_EX(%a6)
11434	clr.l		FP_SCR1_HI(%a6)
11435	clr.l		FP_SCR1_LO(%a6)
11436	bra.w		fadd_zero_entry		# go execute fadd
11437
11438fadd_zero_src:
11439	mov.w		DST_EX(%a1),FP_SCR1_EX(%a6)
11440	mov.l		DST_HI(%a1),FP_SCR1_HI(%a6)
11441	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
11442	bsr.l		scale_to_zero_dst	# scale the operand
11443	clr.w		FP_SCR0_EX(%a6)
11444	clr.l		FP_SCR0_HI(%a6)
11445	clr.l		FP_SCR0_LO(%a6)
11446	bra.w		fadd_zero_entry		# go execute fadd
11447
11448#
11449# both operands are INFs. an OPERR will result if the INFs have
11450# different signs. else, an INF of the same sign is returned
11451#
11452fadd_inf_2:
11453	mov.b		SRC_EX(%a0),%d0		# exclusive or the signs
11454	mov.b		DST_EX(%a1),%d1
11455	eor.b		%d1,%d0
11456	bmi.l		res_operr		# weed out (-INF)+(+INF)
11457
11458# ok, so it's not an OPERR. but, we do have to remember to return the
11459# src INF since that's where the 881/882 gets the j-bit from...
11460
11461#
11462# operands are INF and one of {ZERO, INF, DENORM, NORM}
11463#
11464fadd_inf_src:
11465	fmovm.x		SRC(%a0),&0x80		# return src INF
11466	tst.b		SRC_EX(%a0)		# is INF positive?
11467	bpl.b		fadd_inf_done		# yes; we're done
11468	mov.b		&neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
11469	rts
11470
11471#
11472# operands are INF and one of {ZERO, INF, DENORM, NORM}
11473#
11474fadd_inf_dst:
11475	fmovm.x		DST(%a1),&0x80		# return dst INF
11476	tst.b		DST_EX(%a1)		# is INF positive?
11477	bpl.b		fadd_inf_done		# yes; we're done
11478	mov.b		&neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
11479	rts
11480
11481fadd_inf_done:
11482	mov.b		&inf_bmask,FPSR_CC(%a6) # set INF
11483	rts
11484
11485#########################################################################
11486# XDEF ****************************************************************	#
11487#	fsub(): emulates the fsub instruction				#
11488#	fssub(): emulates the fssub instruction				#
11489#	fdsub(): emulates the fdsub instruction				#
11490#									#
11491# XREF ****************************************************************	#
11492#	addsub_scaler2() - scale the operands so they won't take exc	#
11493#	ovf_res() - return default overflow result			#
11494#	unf_res() - return default underflow result			#
11495#	res_qnan() - set QNAN result					#
11496#	res_snan() - set SNAN result					#
11497#	res_operr() - set OPERR result					#
11498#	scale_to_zero_src() - set src operand exponent equal to zero	#
11499#	scale_to_zero_dst() - set dst operand exponent equal to zero	#
11500#									#
11501# INPUT ***************************************************************	#
11502#	a0 = pointer to extended precision source operand		#
11503#	a1 = pointer to extended precision destination operand		#
11504#									#
11505# OUTPUT **************************************************************	#
11506#	fp0 = result							#
11507#	fp1 = EXOP (if exception occurred)				#
11508#									#
11509# ALGORITHM ***********************************************************	#
11510#	Handle NANs, infinities, and zeroes as special cases. Divide	#
11511# norms into extended, single, and double precision.			#
11512#	Do subtraction after scaling exponents such that exception won't#
11513# occur. Then, check result exponent to see if exception would have	#
11514# occurred. If so, return default result and maybe EXOP. Else, insert	#
11515# the correct result exponent and return. Set FPSR bits as appropriate.	#
11516#									#
11517#########################################################################
11518
11519	global		fssub
11520fssub:
11521	andi.b		&0x30,%d0		# clear rnd prec
11522	ori.b		&s_mode*0x10,%d0	# insert sgl prec
11523	bra.b		fsub
11524
11525	global		fdsub
11526fdsub:
11527	andi.b		&0x30,%d0		# clear rnd prec
11528	ori.b		&d_mode*0x10,%d0	# insert dbl prec
11529
11530	global		fsub
11531fsub:
11532	mov.l		%d0,L_SCR3(%a6)		# store rnd info
11533
11534	clr.w		%d1
11535	mov.b		DTAG(%a6),%d1
11536	lsl.b		&0x3,%d1
11537	or.b		STAG(%a6),%d1		# combine src tags
11538
11539	bne.w		fsub_not_norm		# optimize on non-norm input
11540
11541#
11542# SUB: norms and denorms
11543#
11544fsub_norm:
11545	bsr.l		addsub_scaler2		# scale exponents
11546
11547fsub_zero_entry:
11548	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
11549
11550	fmov.l		&0x0,%fpsr		# clear FPSR
11551	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
11552
11553	fsub.x		FP_SCR0(%a6),%fp0	# execute subtract
11554
11555	fmov.l		&0x0,%fpcr		# clear FPCR
11556	fmov.l		%fpsr,%d1		# fetch INEX2, N, Z
11557
11558	or.l		%d1,USER_FPSR(%a6)	# save exc and ccode bits
11559
11560	fbeq.w		fsub_zero_exit		# if result zero, end now
11561
11562	mov.l		%d2,-(%sp)		# save d2
11563
11564	fmovm.x		&0x01,-(%sp)		# save result to stack
11565
11566	mov.w		2+L_SCR3(%a6),%d1
11567	lsr.b		&0x6,%d1
11568
11569	mov.w		(%sp),%d2		# fetch new exponent
11570	andi.l		&0x7fff,%d2		# strip sign
11571	sub.l		%d0,%d2			# add scale factor
11572
11573	cmp.l		%d2,(tbl_fsub_ovfl.b,%pc,%d1.w*4) # is it an overflow?
11574	bge.b		fsub_ovfl		# yes
11575
11576	cmp.l		%d2,(tbl_fsub_unfl.b,%pc,%d1.w*4) # is it an underflow?
11577	blt.w		fsub_unfl		# yes
11578	beq.w		fsub_may_unfl		# maybe; go find out
11579
11580fsub_normal:
11581	mov.w		(%sp),%d1
11582	andi.w		&0x8000,%d1		# keep sign
11583	or.w		%d2,%d1			# insert new exponent
11584	mov.w		%d1,(%sp)		# insert new exponent
11585
11586	fmovm.x		(%sp)+,&0x80		# return result in fp0
11587
11588	mov.l		(%sp)+,%d2		# restore d2
11589	rts
11590
11591fsub_zero_exit:
11592#	fmov.s		&0x00000000,%fp0	# return zero in fp0
11593	rts
11594
11595tbl_fsub_ovfl:
11596	long		0x7fff			# ext ovfl
11597	long		0x407f			# sgl ovfl
11598	long		0x43ff			# dbl ovfl
11599
11600tbl_fsub_unfl:
11601	long	        0x0000			# ext unfl
11602	long		0x3f81			# sgl unfl
11603	long		0x3c01			# dbl unfl
11604
11605fsub_ovfl:
11606	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
11607
11608	mov.b		FPCR_ENABLE(%a6),%d1
11609	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
11610	bne.b		fsub_ovfl_ena		# yes
11611
11612	add.l		&0xc,%sp
11613fsub_ovfl_dis:
11614	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
11615	sne		%d1			# set sign param accordingly
11616	mov.l		L_SCR3(%a6),%d0		# pass prec:rnd
11617	bsr.l		ovf_res			# calculate default result
11618	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
11619	fmovm.x		(%a0),&0x80		# return default result in fp0
11620	mov.l		(%sp)+,%d2		# restore d2
11621	rts
11622
11623fsub_ovfl_ena:
11624	mov.b		L_SCR3(%a6),%d1
11625	andi.b		&0xc0,%d1		# is precision extended?
11626	bne.b		fsub_ovfl_ena_sd	# no
11627
11628fsub_ovfl_ena_cont:
11629	mov.w		(%sp),%d1		# fetch {sgn,exp}
11630	andi.w		&0x8000,%d1		# keep sign
11631	subi.l		&0x6000,%d2		# subtract new bias
11632	andi.w		&0x7fff,%d2		# clear top bit
11633	or.w		%d2,%d1			# concat sign,exp
11634	mov.w		%d1,(%sp)		# insert new exponent
11635
11636	fmovm.x		(%sp)+,&0x40		# return EXOP in fp1
11637	bra.b		fsub_ovfl_dis
11638
11639fsub_ovfl_ena_sd:
11640	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
11641
11642	mov.l		L_SCR3(%a6),%d1
11643	andi.b		&0x30,%d1		# clear rnd prec
11644	fmov.l		%d1,%fpcr		# set FPCR
11645
11646	fsub.x		FP_SCR0(%a6),%fp0	# execute subtract
11647
11648	fmov.l		&0x0,%fpcr		# clear FPCR
11649
11650	add.l		&0xc,%sp
11651	fmovm.x		&0x01,-(%sp)
11652	bra.b		fsub_ovfl_ena_cont
11653
11654fsub_unfl:
11655	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
11656
11657	add.l		&0xc,%sp
11658
11659	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
11660
11661	fmov.l		&rz_mode*0x10,%fpcr	# set FPCR
11662	fmov.l		&0x0,%fpsr		# clear FPSR
11663
11664	fsub.x		FP_SCR0(%a6),%fp0	# execute subtract
11665
11666	fmov.l		&0x0,%fpcr		# clear FPCR
11667	fmov.l		%fpsr,%d1		# save status
11668
11669	or.l		%d1,USER_FPSR(%a6)
11670
11671	mov.b		FPCR_ENABLE(%a6),%d1
11672	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
11673	bne.b		fsub_unfl_ena		# yes
11674
11675fsub_unfl_dis:
11676	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
11677
11678	lea		FP_SCR0(%a6),%a0	# pass: result addr
11679	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
11680	bsr.l		unf_res			# calculate default result
11681	or.b		%d0,FPSR_CC(%a6)	# 'Z' may have been set
11682	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
11683	mov.l		(%sp)+,%d2		# restore d2
11684	rts
11685
11686fsub_unfl_ena:
11687	fmovm.x		FP_SCR1(%a6),&0x40
11688
11689	mov.l		L_SCR3(%a6),%d1
11690	andi.b		&0xc0,%d1		# is precision extended?
11691	bne.b		fsub_unfl_ena_sd	# no
11692
11693	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
11694
11695fsub_unfl_ena_cont:
11696	fmov.l		&0x0,%fpsr		# clear FPSR
11697
11698	fsub.x		FP_SCR0(%a6),%fp1	# execute subtract
11699
11700	fmov.l		&0x0,%fpcr		# clear FPCR
11701
11702	fmovm.x		&0x40,FP_SCR0(%a6)	# store result to stack
11703	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
11704	mov.l		%d1,%d2			# make a copy
11705	andi.l		&0x7fff,%d1		# strip sign
11706	andi.w		&0x8000,%d2		# keep old sign
11707	sub.l		%d0,%d1			# add scale factor
11708	addi.l		&0x6000,%d1		# subtract new bias
11709	andi.w		&0x7fff,%d1		# clear top bit
11710	or.w		%d2,%d1			# concat sgn,exp
11711	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
11712	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
11713	bra.w		fsub_unfl_dis
11714
11715fsub_unfl_ena_sd:
11716	mov.l		L_SCR3(%a6),%d1
11717	andi.b		&0x30,%d1		# clear rnd prec
11718	fmov.l		%d1,%fpcr		# set FPCR
11719
11720	bra.b		fsub_unfl_ena_cont
11721
11722#
11723# result is equal to the smallest normalized number in the selected precision
11724# if the precision is extended, this result could not have come from an
11725# underflow that rounded up.
11726#
11727fsub_may_unfl:
11728	mov.l		L_SCR3(%a6),%d1
11729	andi.b		&0xc0,%d1		# fetch rnd prec
11730	beq.w		fsub_normal		# yes; no underflow occurred
11731
11732	mov.l		0x4(%sp),%d1
11733	cmpi.l		%d1,&0x80000000		# is hi(man) = 0x80000000?
11734	bne.w		fsub_normal		# no; no underflow occurred
11735
11736	tst.l		0x8(%sp)		# is lo(man) = 0x0?
11737	bne.w		fsub_normal		# no; no underflow occurred
11738
11739	btst		&inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set?
11740	beq.w		fsub_normal		# no; no underflow occurred
11741
11742#
11743# ok, so now the result has a exponent equal to the smallest normalized
11744# exponent for the selected precision. also, the mantissa is equal to
11745# 0x8000000000000000 and this mantissa is the result of rounding non-zero
11746# g,r,s.
11747# now, we must determine whether the pre-rounded result was an underflow
11748# rounded "up" or a normalized number rounded "down".
11749# so, we do this be re-executing the add using RZ as the rounding mode and
11750# seeing if the new result is smaller or equal to the current result.
11751#
11752	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op into fp1
11753
11754	mov.l		L_SCR3(%a6),%d1
11755	andi.b		&0xc0,%d1		# keep rnd prec
11756	ori.b		&rz_mode*0x10,%d1	# insert rnd mode
11757	fmov.l		%d1,%fpcr		# set FPCR
11758	fmov.l		&0x0,%fpsr		# clear FPSR
11759
11760	fsub.x		FP_SCR0(%a6),%fp1	# execute subtract
11761
11762	fmov.l		&0x0,%fpcr		# clear FPCR
11763
11764	fabs.x		%fp0			# compare absolute values
11765	fabs.x		%fp1
11766	fcmp.x		%fp0,%fp1		# is first result > second?
11767
11768	fbgt.w		fsub_unfl		# yes; it's an underflow
11769	bra.w		fsub_normal		# no; it's not an underflow
11770
11771##########################################################################
11772
11773#
11774# Sub: inputs are not both normalized; what are they?
11775#
11776fsub_not_norm:
11777	mov.w		(tbl_fsub_op.b,%pc,%d1.w*2),%d1
11778	jmp		(tbl_fsub_op.b,%pc,%d1.w*1)
11779
11780	swbeg		&48
11781tbl_fsub_op:
11782	short		fsub_norm	- tbl_fsub_op # NORM - NORM
11783	short		fsub_zero_src	- tbl_fsub_op # NORM - ZERO
11784	short		fsub_inf_src	- tbl_fsub_op # NORM - INF
11785	short		fsub_res_qnan	- tbl_fsub_op # NORM - QNAN
11786	short		fsub_norm	- tbl_fsub_op # NORM - DENORM
11787	short		fsub_res_snan	- tbl_fsub_op # NORM - SNAN
11788	short		tbl_fsub_op	- tbl_fsub_op #
11789	short		tbl_fsub_op	- tbl_fsub_op #
11790
11791	short		fsub_zero_dst	- tbl_fsub_op # ZERO - NORM
11792	short		fsub_zero_2	- tbl_fsub_op # ZERO - ZERO
11793	short		fsub_inf_src	- tbl_fsub_op # ZERO - INF
11794	short		fsub_res_qnan	- tbl_fsub_op # NORM - QNAN
11795	short		fsub_zero_dst	- tbl_fsub_op # ZERO - DENORM
11796	short		fsub_res_snan	- tbl_fsub_op # NORM - SNAN
11797	short		tbl_fsub_op	- tbl_fsub_op #
11798	short		tbl_fsub_op	- tbl_fsub_op #
11799
11800	short		fsub_inf_dst	- tbl_fsub_op # INF - NORM
11801	short		fsub_inf_dst	- tbl_fsub_op # INF - ZERO
11802	short		fsub_inf_2	- tbl_fsub_op # INF - INF
11803	short		fsub_res_qnan	- tbl_fsub_op # NORM - QNAN
11804	short		fsub_inf_dst	- tbl_fsub_op # INF - DENORM
11805	short		fsub_res_snan	- tbl_fsub_op # NORM - SNAN
11806	short		tbl_fsub_op	- tbl_fsub_op #
11807	short		tbl_fsub_op	- tbl_fsub_op #
11808
11809	short		fsub_res_qnan	- tbl_fsub_op # QNAN - NORM
11810	short		fsub_res_qnan	- tbl_fsub_op # QNAN - ZERO
11811	short		fsub_res_qnan	- tbl_fsub_op # QNAN - INF
11812	short		fsub_res_qnan	- tbl_fsub_op # QNAN - QNAN
11813	short		fsub_res_qnan	- tbl_fsub_op # QNAN - DENORM
11814	short		fsub_res_snan	- tbl_fsub_op # QNAN - SNAN
11815	short		tbl_fsub_op	- tbl_fsub_op #
11816	short		tbl_fsub_op	- tbl_fsub_op #
11817
11818	short		fsub_norm	- tbl_fsub_op # DENORM - NORM
11819	short		fsub_zero_src	- tbl_fsub_op # DENORM - ZERO
11820	short		fsub_inf_src	- tbl_fsub_op # DENORM - INF
11821	short		fsub_res_qnan	- tbl_fsub_op # NORM - QNAN
11822	short		fsub_norm	- tbl_fsub_op # DENORM - DENORM
11823	short		fsub_res_snan	- tbl_fsub_op # NORM - SNAN
11824	short		tbl_fsub_op	- tbl_fsub_op #
11825	short		tbl_fsub_op	- tbl_fsub_op #
11826
11827	short		fsub_res_snan	- tbl_fsub_op # SNAN - NORM
11828	short		fsub_res_snan	- tbl_fsub_op # SNAN - ZERO
11829	short		fsub_res_snan	- tbl_fsub_op # SNAN - INF
11830	short		fsub_res_snan	- tbl_fsub_op # SNAN - QNAN
11831	short		fsub_res_snan	- tbl_fsub_op # SNAN - DENORM
11832	short		fsub_res_snan	- tbl_fsub_op # SNAN - SNAN
11833	short		tbl_fsub_op	- tbl_fsub_op #
11834	short		tbl_fsub_op	- tbl_fsub_op #
11835
11836fsub_res_qnan:
11837	bra.l		res_qnan
11838fsub_res_snan:
11839	bra.l		res_snan
11840
11841#
11842# both operands are ZEROes
11843#
11844fsub_zero_2:
11845	mov.b		SRC_EX(%a0),%d0
11846	mov.b		DST_EX(%a1),%d1
11847	eor.b		%d1,%d0
11848	bpl.b		fsub_zero_2_chk_rm
11849
11850# the signs are opposite, so, return a ZERO w/ the sign of the dst ZERO
11851	tst.b		%d0			# is dst negative?
11852	bmi.b		fsub_zero_2_rm		# yes
11853	fmov.s		&0x00000000,%fp0	# no; return +ZERO
11854	mov.b		&z_bmask,FPSR_CC(%a6)	# set Z
11855	rts
11856
11857#
11858# the ZEROes have the same signs:
11859# - Therefore, we return +ZERO if the rounding mode is RN,RZ, or RP
11860# - -ZERO is returned in the case of RM.
11861#
11862fsub_zero_2_chk_rm:
11863	mov.b		3+L_SCR3(%a6),%d1
11864	andi.b		&0x30,%d1		# extract rnd mode
11865	cmpi.b		%d1,&rm_mode*0x10	# is rnd mode = RM?
11866	beq.b		fsub_zero_2_rm		# yes
11867	fmov.s		&0x00000000,%fp0	# no; return +ZERO
11868	mov.b		&z_bmask,FPSR_CC(%a6)	# set Z
11869	rts
11870
11871fsub_zero_2_rm:
11872	fmov.s		&0x80000000,%fp0	# return -ZERO
11873	mov.b		&z_bmask+neg_bmask,FPSR_CC(%a6)	# set Z/NEG
11874	rts
11875
11876#
11877# one operand is a ZERO and the other is a DENORM or a NORM.
11878# scale the DENORM or NORM and jump to the regular fsub routine.
11879#
11880fsub_zero_dst:
11881	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
11882	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
11883	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
11884	bsr.l		scale_to_zero_src	# scale the operand
11885	clr.w		FP_SCR1_EX(%a6)
11886	clr.l		FP_SCR1_HI(%a6)
11887	clr.l		FP_SCR1_LO(%a6)
11888	bra.w		fsub_zero_entry		# go execute fsub
11889
11890fsub_zero_src:
11891	mov.w		DST_EX(%a1),FP_SCR1_EX(%a6)
11892	mov.l		DST_HI(%a1),FP_SCR1_HI(%a6)
11893	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
11894	bsr.l		scale_to_zero_dst	# scale the operand
11895	clr.w		FP_SCR0_EX(%a6)
11896	clr.l		FP_SCR0_HI(%a6)
11897	clr.l		FP_SCR0_LO(%a6)
11898	bra.w		fsub_zero_entry		# go execute fsub
11899
11900#
11901# both operands are INFs. an OPERR will result if the INFs have the
11902# same signs. else,
11903#
11904fsub_inf_2:
11905	mov.b		SRC_EX(%a0),%d0		# exclusive or the signs
11906	mov.b		DST_EX(%a1),%d1
11907	eor.b		%d1,%d0
11908	bpl.l		res_operr		# weed out (-INF)+(+INF)
11909
11910# ok, so it's not an OPERR. but we do have to remember to return
11911# the src INF since that's where the 881/882 gets the j-bit.
11912
11913fsub_inf_src:
11914	fmovm.x		SRC(%a0),&0x80		# return src INF
11915	fneg.x		%fp0			# invert sign
11916	fbge.w		fsub_inf_done		# sign is now positive
11917	mov.b		&neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
11918	rts
11919
11920fsub_inf_dst:
11921	fmovm.x		DST(%a1),&0x80		# return dst INF
11922	tst.b		DST_EX(%a1)		# is INF negative?
11923	bpl.b		fsub_inf_done		# no
11924	mov.b		&neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
11925	rts
11926
11927fsub_inf_done:
11928	mov.b		&inf_bmask,FPSR_CC(%a6)	# set INF
11929	rts
11930
11931#########################################################################
11932# XDEF ****************************************************************	#
11933#	fsqrt(): emulates the fsqrt instruction				#
11934#	fssqrt(): emulates the fssqrt instruction			#
11935#	fdsqrt(): emulates the fdsqrt instruction			#
11936#									#
11937# XREF ****************************************************************	#
11938#	scale_sqrt() - scale the source operand				#
11939#	unf_res() - return default underflow result			#
11940#	ovf_res() - return default overflow result			#
11941#	res_qnan_1op() - return QNAN result				#
11942#	res_snan_1op() - return SNAN result				#
11943#									#
11944# INPUT ***************************************************************	#
11945#	a0 = pointer to extended precision source operand		#
11946#	d0  rnd prec,mode						#
11947#									#
11948# OUTPUT **************************************************************	#
11949#	fp0 = result							#
11950#	fp1 = EXOP (if exception occurred)				#
11951#									#
11952# ALGORITHM ***********************************************************	#
11953#	Handle NANs, infinities, and zeroes as special cases. Divide	#
11954# norms/denorms into ext/sgl/dbl precision.				#
11955#	For norms/denorms, scale the exponents such that a sqrt		#
11956# instruction won't cause an exception. Use the regular fsqrt to	#
11957# compute a result. Check if the regular operands would have taken	#
11958# an exception. If so, return the default overflow/underflow result	#
11959# and return the EXOP if exceptions are enabled. Else, scale the	#
11960# result operand to the proper exponent.				#
11961#									#
11962#########################################################################
11963
11964	global		fssqrt
11965fssqrt:
11966	andi.b		&0x30,%d0		# clear rnd prec
11967	ori.b		&s_mode*0x10,%d0	# insert sgl precision
11968	bra.b		fsqrt
11969
11970	global		fdsqrt
11971fdsqrt:
11972	andi.b		&0x30,%d0		# clear rnd prec
11973	ori.b		&d_mode*0x10,%d0	# insert dbl precision
11974
11975	global		fsqrt
11976fsqrt:
11977	mov.l		%d0,L_SCR3(%a6)		# store rnd info
11978	clr.w		%d1
11979	mov.b		STAG(%a6),%d1
11980	bne.w		fsqrt_not_norm		# optimize on non-norm input
11981
11982#
11983# SQUARE ROOT: norms and denorms ONLY!
11984#
11985fsqrt_norm:
11986	tst.b		SRC_EX(%a0)		# is operand negative?
11987	bmi.l		res_operr		# yes
11988
11989	andi.b		&0xc0,%d0		# is precision extended?
11990	bne.b		fsqrt_not_ext		# no; go handle sgl or dbl
11991
11992	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
11993	fmov.l		&0x0,%fpsr		# clear FPSR
11994
11995	fsqrt.x		(%a0),%fp0		# execute square root
11996
11997	fmov.l		%fpsr,%d1
11998	or.l		%d1,USER_FPSR(%a6)	# set N,INEX
11999
12000	rts
12001
12002fsqrt_denorm:
12003	tst.b		SRC_EX(%a0)		# is operand negative?
12004	bmi.l		res_operr		# yes
12005
12006	andi.b		&0xc0,%d0		# is precision extended?
12007	bne.b		fsqrt_not_ext		# no; go handle sgl or dbl
12008
12009	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
12010	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
12011	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
12012
12013	bsr.l		scale_sqrt		# calculate scale factor
12014
12015	bra.w		fsqrt_sd_normal
12016
12017#
12018# operand is either single or double
12019#
12020fsqrt_not_ext:
12021	cmpi.b		%d0,&s_mode*0x10	# separate sgl/dbl prec
12022	bne.w		fsqrt_dbl
12023
12024#
12025# operand is to be rounded to single precision
12026#
12027fsqrt_sgl:
12028	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
12029	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
12030	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
12031
12032	bsr.l		scale_sqrt		# calculate scale factor
12033
12034	cmpi.l		%d0,&0x3fff-0x3f81	# will move in underflow?
12035	beq.w		fsqrt_sd_may_unfl
12036	bgt.w		fsqrt_sd_unfl		# yes; go handle underflow
12037	cmpi.l		%d0,&0x3fff-0x407f	# will move in overflow?
12038	beq.w		fsqrt_sd_may_ovfl	# maybe; go check
12039	blt.w		fsqrt_sd_ovfl		# yes; go handle overflow
12040
12041#
12042# operand will NOT overflow or underflow when moved in to the fp reg file
12043#
12044fsqrt_sd_normal:
12045	fmov.l		&0x0,%fpsr		# clear FPSR
12046	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
12047
12048	fsqrt.x		FP_SCR0(%a6),%fp0	# perform absolute
12049
12050	fmov.l		%fpsr,%d1		# save FPSR
12051	fmov.l		&0x0,%fpcr		# clear FPCR
12052
12053	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
12054
12055fsqrt_sd_normal_exit:
12056	mov.l		%d2,-(%sp)		# save d2
12057	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
12058	mov.w		FP_SCR0_EX(%a6),%d1	# load sgn,exp
12059	mov.l		%d1,%d2			# make a copy
12060	andi.l		&0x7fff,%d1		# strip sign
12061	sub.l		%d0,%d1			# add scale factor
12062	andi.w		&0x8000,%d2		# keep old sign
12063	or.w		%d1,%d2			# concat old sign,new exp
12064	mov.w		%d2,FP_SCR0_EX(%a6)	# insert new exponent
12065	mov.l		(%sp)+,%d2		# restore d2
12066	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
12067	rts
12068
12069#
12070# operand is to be rounded to double precision
12071#
12072fsqrt_dbl:
12073	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
12074	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
12075	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
12076
12077	bsr.l		scale_sqrt		# calculate scale factor
12078
12079	cmpi.l		%d0,&0x3fff-0x3c01	# will move in underflow?
12080	beq.w		fsqrt_sd_may_unfl
12081	bgt.b		fsqrt_sd_unfl		# yes; go handle underflow
12082	cmpi.l		%d0,&0x3fff-0x43ff	# will move in overflow?
12083	beq.w		fsqrt_sd_may_ovfl	# maybe; go check
12084	blt.w		fsqrt_sd_ovfl		# yes; go handle overflow
12085	bra.w		fsqrt_sd_normal		# no; ho handle normalized op
12086
12087# we're on the line here and the distinguising characteristic is whether
12088# the exponent is 3fff or 3ffe. if it's 3ffe, then it's a safe number
12089# elsewise fall through to underflow.
12090fsqrt_sd_may_unfl:
12091	btst		&0x0,1+FP_SCR0_EX(%a6)	# is exponent 0x3fff?
12092	bne.w		fsqrt_sd_normal		# yes, so no underflow
12093
12094#
12095# operand WILL underflow when moved in to the fp register file
12096#
12097fsqrt_sd_unfl:
12098	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
12099
12100	fmov.l		&rz_mode*0x10,%fpcr	# set FPCR
12101	fmov.l		&0x0,%fpsr		# clear FPSR
12102
12103	fsqrt.x		FP_SCR0(%a6),%fp0	# execute square root
12104
12105	fmov.l		%fpsr,%d1		# save status
12106	fmov.l		&0x0,%fpcr		# clear FPCR
12107
12108	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
12109
12110# if underflow or inexact is enabled, go calculate EXOP first.
12111	mov.b		FPCR_ENABLE(%a6),%d1
12112	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
12113	bne.b		fsqrt_sd_unfl_ena	# yes
12114
12115fsqrt_sd_unfl_dis:
12116	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
12117
12118	lea		FP_SCR0(%a6),%a0	# pass: result addr
12119	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
12120	bsr.l		unf_res			# calculate default result
12121	or.b		%d0,FPSR_CC(%a6)	# set possible 'Z' ccode
12122	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
12123	rts
12124
12125#
12126# operand will underflow AND underflow is enabled.
12127# Therefore, we must return the result rounded to extended precision.
12128#
12129fsqrt_sd_unfl_ena:
12130	mov.l		FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
12131	mov.l		FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
12132	mov.w		FP_SCR0_EX(%a6),%d1	# load current exponent
12133
12134	mov.l		%d2,-(%sp)		# save d2
12135	mov.l		%d1,%d2			# make a copy
12136	andi.l		&0x7fff,%d1		# strip sign
12137	andi.w		&0x8000,%d2		# keep old sign
12138	sub.l		%d0,%d1			# subtract scale factor
12139	addi.l		&0x6000,%d1		# add new bias
12140	andi.w		&0x7fff,%d1
12141	or.w		%d2,%d1			# concat new sign,new exp
12142	mov.w		%d1,FP_SCR1_EX(%a6)	# insert new exp
12143	fmovm.x		FP_SCR1(%a6),&0x40	# return EXOP in fp1
12144	mov.l		(%sp)+,%d2		# restore d2
12145	bra.b		fsqrt_sd_unfl_dis
12146
12147#
12148# operand WILL overflow.
12149#
12150fsqrt_sd_ovfl:
12151	fmov.l		&0x0,%fpsr		# clear FPSR
12152	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
12153
12154	fsqrt.x		FP_SCR0(%a6),%fp0	# perform square root
12155
12156	fmov.l		&0x0,%fpcr		# clear FPCR
12157	fmov.l		%fpsr,%d1		# save FPSR
12158
12159	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
12160
12161fsqrt_sd_ovfl_tst:
12162	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
12163
12164	mov.b		FPCR_ENABLE(%a6),%d1
12165	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
12166	bne.b		fsqrt_sd_ovfl_ena	# yes
12167
12168#
12169# OVFL is not enabled; therefore, we must create the default result by
12170# calling ovf_res().
12171#
12172fsqrt_sd_ovfl_dis:
12173	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
12174	sne		%d1			# set sign param accordingly
12175	mov.l		L_SCR3(%a6),%d0		# pass: prec,mode
12176	bsr.l		ovf_res			# calculate default result
12177	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
12178	fmovm.x		(%a0),&0x80		# return default result in fp0
12179	rts
12180
12181#
12182# OVFL is enabled.
12183# the INEX2 bit has already been updated by the round to the correct precision.
12184# now, round to extended(and don't alter the FPSR).
12185#
12186fsqrt_sd_ovfl_ena:
12187	mov.l		%d2,-(%sp)		# save d2
12188	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
12189	mov.l		%d1,%d2			# make a copy
12190	andi.l		&0x7fff,%d1		# strip sign
12191	andi.w		&0x8000,%d2		# keep old sign
12192	sub.l		%d0,%d1			# add scale factor
12193	subi.l		&0x6000,%d1		# subtract bias
12194	andi.w		&0x7fff,%d1
12195	or.w		%d2,%d1			# concat sign,exp
12196	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
12197	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
12198	mov.l		(%sp)+,%d2		# restore d2
12199	bra.b		fsqrt_sd_ovfl_dis
12200
12201#
12202# the move in MAY underflow. so...
12203#
12204fsqrt_sd_may_ovfl:
12205	btst		&0x0,1+FP_SCR0_EX(%a6)	# is exponent 0x3fff?
12206	bne.w		fsqrt_sd_ovfl		# yes, so overflow
12207
12208	fmov.l		&0x0,%fpsr		# clear FPSR
12209	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
12210
12211	fsqrt.x		FP_SCR0(%a6),%fp0	# perform absolute
12212
12213	fmov.l		%fpsr,%d1		# save status
12214	fmov.l		&0x0,%fpcr		# clear FPCR
12215
12216	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
12217
12218	fmov.x		%fp0,%fp1		# make a copy of result
12219	fcmp.b		%fp1,&0x1		# is |result| >= 1.b?
12220	fbge.w		fsqrt_sd_ovfl_tst	# yes; overflow has occurred
12221
12222# no, it didn't overflow; we have correct result
12223	bra.w		fsqrt_sd_normal_exit
12224
12225##########################################################################
12226
12227#
12228# input is not normalized; what is it?
12229#
12230fsqrt_not_norm:
12231	cmpi.b		%d1,&DENORM		# weed out DENORM
12232	beq.w		fsqrt_denorm
12233	cmpi.b		%d1,&ZERO		# weed out ZERO
12234	beq.b		fsqrt_zero
12235	cmpi.b		%d1,&INF		# weed out INF
12236	beq.b		fsqrt_inf
12237	cmpi.b		%d1,&SNAN		# weed out SNAN
12238	beq.l		res_snan_1op
12239	bra.l		res_qnan_1op
12240
12241#
12242#	fsqrt(+0) = +0
12243#	fsqrt(-0) = -0
12244#	fsqrt(+INF) = +INF
12245#	fsqrt(-INF) = OPERR
12246#
12247fsqrt_zero:
12248	tst.b		SRC_EX(%a0)		# is ZERO positive or negative?
12249	bmi.b		fsqrt_zero_m		# negative
12250fsqrt_zero_p:
12251	fmov.s		&0x00000000,%fp0	# return +ZERO
12252	mov.b		&z_bmask,FPSR_CC(%a6)	# set 'Z' ccode bit
12253	rts
12254fsqrt_zero_m:
12255	fmov.s		&0x80000000,%fp0	# return -ZERO
12256	mov.b		&z_bmask+neg_bmask,FPSR_CC(%a6)	# set 'Z','N' ccode bits
12257	rts
12258
12259fsqrt_inf:
12260	tst.b		SRC_EX(%a0)		# is INF positive or negative?
12261	bmi.l		res_operr		# negative
12262fsqrt_inf_p:
12263	fmovm.x		SRC(%a0),&0x80		# return +INF in fp0
12264	mov.b		&inf_bmask,FPSR_CC(%a6)	# set 'I' ccode bit
12265	rts
12266
12267#########################################################################
12268# XDEF ****************************************************************	#
12269#	fetch_dreg(): fetch register according to index in d1		#
12270#									#
12271# XREF ****************************************************************	#
12272#	None								#
12273#									#
12274# INPUT ***************************************************************	#
12275#	d1 = index of register to fetch from				#
12276#									#
12277# OUTPUT **************************************************************	#
12278#	d0 = value of register fetched					#
12279#									#
12280# ALGORITHM ***********************************************************	#
12281#	According to the index value in d1 which can range from zero	#
12282# to fifteen, load the corresponding register file value (where		#
12283# address register indexes start at 8). D0/D1/A0/A1/A6/A7 are on the	#
12284# stack. The rest should still be in their original places.		#
12285#									#
12286#########################################################################
12287
12288# this routine leaves d1 intact for subsequent store_dreg calls.
12289	global		fetch_dreg
12290fetch_dreg:
12291	mov.w		(tbl_fdreg.b,%pc,%d1.w*2),%d0
12292	jmp		(tbl_fdreg.b,%pc,%d0.w*1)
12293
12294tbl_fdreg:
12295	short		fdreg0 - tbl_fdreg
12296	short		fdreg1 - tbl_fdreg
12297	short		fdreg2 - tbl_fdreg
12298	short		fdreg3 - tbl_fdreg
12299	short		fdreg4 - tbl_fdreg
12300	short		fdreg5 - tbl_fdreg
12301	short		fdreg6 - tbl_fdreg
12302	short		fdreg7 - tbl_fdreg
12303	short		fdreg8 - tbl_fdreg
12304	short		fdreg9 - tbl_fdreg
12305	short		fdrega - tbl_fdreg
12306	short		fdregb - tbl_fdreg
12307	short		fdregc - tbl_fdreg
12308	short		fdregd - tbl_fdreg
12309	short		fdrege - tbl_fdreg
12310	short		fdregf - tbl_fdreg
12311
12312fdreg0:
12313	mov.l		EXC_DREGS+0x0(%a6),%d0
12314	rts
12315fdreg1:
12316	mov.l		EXC_DREGS+0x4(%a6),%d0
12317	rts
12318fdreg2:
12319	mov.l		%d2,%d0
12320	rts
12321fdreg3:
12322	mov.l		%d3,%d0
12323	rts
12324fdreg4:
12325	mov.l		%d4,%d0
12326	rts
12327fdreg5:
12328	mov.l		%d5,%d0
12329	rts
12330fdreg6:
12331	mov.l		%d6,%d0
12332	rts
12333fdreg7:
12334	mov.l		%d7,%d0
12335	rts
12336fdreg8:
12337	mov.l		EXC_DREGS+0x8(%a6),%d0
12338	rts
12339fdreg9:
12340	mov.l		EXC_DREGS+0xc(%a6),%d0
12341	rts
12342fdrega:
12343	mov.l		%a2,%d0
12344	rts
12345fdregb:
12346	mov.l		%a3,%d0
12347	rts
12348fdregc:
12349	mov.l		%a4,%d0
12350	rts
12351fdregd:
12352	mov.l		%a5,%d0
12353	rts
12354fdrege:
12355	mov.l		(%a6),%d0
12356	rts
12357fdregf:
12358	mov.l		EXC_A7(%a6),%d0
12359	rts
12360
12361#########################################################################
12362# XDEF ****************************************************************	#
12363#	store_dreg_l(): store longword to data register specified by d1	#
12364#									#
12365# XREF ****************************************************************	#
12366#	None								#
12367#									#
12368# INPUT ***************************************************************	#
12369#	d0 = longowrd value to store					#
12370#	d1 = index of register to fetch from				#
12371#									#
12372# OUTPUT **************************************************************	#
12373#	(data register is updated)					#
12374#									#
12375# ALGORITHM ***********************************************************	#
12376#	According to the index value in d1, store the longword value	#
12377# in d0 to the corresponding data register. D0/D1 are on the stack	#
12378# while the rest are in their initial places.				#
12379#									#
12380#########################################################################
12381
12382	global		store_dreg_l
12383store_dreg_l:
12384	mov.w		(tbl_sdregl.b,%pc,%d1.w*2),%d1
12385	jmp		(tbl_sdregl.b,%pc,%d1.w*1)
12386
12387tbl_sdregl:
12388	short		sdregl0 - tbl_sdregl
12389	short		sdregl1 - tbl_sdregl
12390	short		sdregl2 - tbl_sdregl
12391	short		sdregl3 - tbl_sdregl
12392	short		sdregl4 - tbl_sdregl
12393	short		sdregl5 - tbl_sdregl
12394	short		sdregl6 - tbl_sdregl
12395	short		sdregl7 - tbl_sdregl
12396
12397sdregl0:
12398	mov.l		%d0,EXC_DREGS+0x0(%a6)
12399	rts
12400sdregl1:
12401	mov.l		%d0,EXC_DREGS+0x4(%a6)
12402	rts
12403sdregl2:
12404	mov.l		%d0,%d2
12405	rts
12406sdregl3:
12407	mov.l		%d0,%d3
12408	rts
12409sdregl4:
12410	mov.l		%d0,%d4
12411	rts
12412sdregl5:
12413	mov.l		%d0,%d5
12414	rts
12415sdregl6:
12416	mov.l		%d0,%d6
12417	rts
12418sdregl7:
12419	mov.l		%d0,%d7
12420	rts
12421
12422#########################################################################
12423# XDEF ****************************************************************	#
12424#	store_dreg_w(): store word to data register specified by d1	#
12425#									#
12426# XREF ****************************************************************	#
12427#	None								#
12428#									#
12429# INPUT ***************************************************************	#
12430#	d0 = word value to store					#
12431#	d1 = index of register to fetch from				#
12432#									#
12433# OUTPUT **************************************************************	#
12434#	(data register is updated)					#
12435#									#
12436# ALGORITHM ***********************************************************	#
12437#	According to the index value in d1, store the word value	#
12438# in d0 to the corresponding data register. D0/D1 are on the stack	#
12439# while the rest are in their initial places.				#
12440#									#
12441#########################################################################
12442
12443	global		store_dreg_w
12444store_dreg_w:
12445	mov.w		(tbl_sdregw.b,%pc,%d1.w*2),%d1
12446	jmp		(tbl_sdregw.b,%pc,%d1.w*1)
12447
12448tbl_sdregw:
12449	short		sdregw0 - tbl_sdregw
12450	short		sdregw1 - tbl_sdregw
12451	short		sdregw2 - tbl_sdregw
12452	short		sdregw3 - tbl_sdregw
12453	short		sdregw4 - tbl_sdregw
12454	short		sdregw5 - tbl_sdregw
12455	short		sdregw6 - tbl_sdregw
12456	short		sdregw7 - tbl_sdregw
12457
12458sdregw0:
12459	mov.w		%d0,2+EXC_DREGS+0x0(%a6)
12460	rts
12461sdregw1:
12462	mov.w		%d0,2+EXC_DREGS+0x4(%a6)
12463	rts
12464sdregw2:
12465	mov.w		%d0,%d2
12466	rts
12467sdregw3:
12468	mov.w		%d0,%d3
12469	rts
12470sdregw4:
12471	mov.w		%d0,%d4
12472	rts
12473sdregw5:
12474	mov.w		%d0,%d5
12475	rts
12476sdregw6:
12477	mov.w		%d0,%d6
12478	rts
12479sdregw7:
12480	mov.w		%d0,%d7
12481	rts
12482
12483#########################################################################
12484# XDEF ****************************************************************	#
12485#	store_dreg_b(): store byte to data register specified by d1	#
12486#									#
12487# XREF ****************************************************************	#
12488#	None								#
12489#									#
12490# INPUT ***************************************************************	#
12491#	d0 = byte value to store					#
12492#	d1 = index of register to fetch from				#
12493#									#
12494# OUTPUT **************************************************************	#
12495#	(data register is updated)					#
12496#									#
12497# ALGORITHM ***********************************************************	#
12498#	According to the index value in d1, store the byte value	#
12499# in d0 to the corresponding data register. D0/D1 are on the stack	#
12500# while the rest are in their initial places.				#
12501#									#
12502#########################################################################
12503
12504	global		store_dreg_b
12505store_dreg_b:
12506	mov.w		(tbl_sdregb.b,%pc,%d1.w*2),%d1
12507	jmp		(tbl_sdregb.b,%pc,%d1.w*1)
12508
12509tbl_sdregb:
12510	short		sdregb0 - tbl_sdregb
12511	short		sdregb1 - tbl_sdregb
12512	short		sdregb2 - tbl_sdregb
12513	short		sdregb3 - tbl_sdregb
12514	short		sdregb4 - tbl_sdregb
12515	short		sdregb5 - tbl_sdregb
12516	short		sdregb6 - tbl_sdregb
12517	short		sdregb7 - tbl_sdregb
12518
12519sdregb0:
12520	mov.b		%d0,3+EXC_DREGS+0x0(%a6)
12521	rts
12522sdregb1:
12523	mov.b		%d0,3+EXC_DREGS+0x4(%a6)
12524	rts
12525sdregb2:
12526	mov.b		%d0,%d2
12527	rts
12528sdregb3:
12529	mov.b		%d0,%d3
12530	rts
12531sdregb4:
12532	mov.b		%d0,%d4
12533	rts
12534sdregb5:
12535	mov.b		%d0,%d5
12536	rts
12537sdregb6:
12538	mov.b		%d0,%d6
12539	rts
12540sdregb7:
12541	mov.b		%d0,%d7
12542	rts
12543
12544#########################################################################
12545# XDEF ****************************************************************	#
12546#	inc_areg(): increment an address register by the value in d0	#
12547#									#
12548# XREF ****************************************************************	#
12549#	None								#
12550#									#
12551# INPUT ***************************************************************	#
12552#	d0 = amount to increment by					#
12553#	d1 = index of address register to increment			#
12554#									#
12555# OUTPUT **************************************************************	#
12556#	(address register is updated)					#
12557#									#
12558# ALGORITHM ***********************************************************	#
12559#	Typically used for an instruction w/ a post-increment <ea>,	#
12560# this routine adds the increment value in d0 to the address register	#
12561# specified by d1. A0/A1/A6/A7 reside on the stack. The rest reside	#
12562# in their original places.						#
12563#	For a7, if the increment amount is one, then we have to		#
12564# increment by two. For any a7 update, set the mia7_flag so that if	#
12565# an access error exception occurs later in emulation, this address	#
12566# register update can be undone.					#
12567#									#
12568#########################################################################
12569
12570	global		inc_areg
12571inc_areg:
12572	mov.w		(tbl_iareg.b,%pc,%d1.w*2),%d1
12573	jmp		(tbl_iareg.b,%pc,%d1.w*1)
12574
12575tbl_iareg:
12576	short		iareg0 - tbl_iareg
12577	short		iareg1 - tbl_iareg
12578	short		iareg2 - tbl_iareg
12579	short		iareg3 - tbl_iareg
12580	short		iareg4 - tbl_iareg
12581	short		iareg5 - tbl_iareg
12582	short		iareg6 - tbl_iareg
12583	short		iareg7 - tbl_iareg
12584
12585iareg0:	add.l		%d0,EXC_DREGS+0x8(%a6)
12586	rts
12587iareg1:	add.l		%d0,EXC_DREGS+0xc(%a6)
12588	rts
12589iareg2:	add.l		%d0,%a2
12590	rts
12591iareg3:	add.l		%d0,%a3
12592	rts
12593iareg4:	add.l		%d0,%a4
12594	rts
12595iareg5:	add.l		%d0,%a5
12596	rts
12597iareg6:	add.l		%d0,(%a6)
12598	rts
12599iareg7:	mov.b		&mia7_flg,SPCOND_FLG(%a6)
12600	cmpi.b		%d0,&0x1
12601	beq.b		iareg7b
12602	add.l		%d0,EXC_A7(%a6)
12603	rts
12604iareg7b:
12605	addq.l		&0x2,EXC_A7(%a6)
12606	rts
12607
12608#########################################################################
12609# XDEF ****************************************************************	#
12610#	dec_areg(): decrement an address register by the value in d0	#
12611#									#
12612# XREF ****************************************************************	#
12613#	None								#
12614#									#
12615# INPUT ***************************************************************	#
12616#	d0 = amount to decrement by					#
12617#	d1 = index of address register to decrement			#
12618#									#
12619# OUTPUT **************************************************************	#
12620#	(address register is updated)					#
12621#									#
12622# ALGORITHM ***********************************************************	#
12623#	Typically used for an instruction w/ a pre-decrement <ea>,	#
12624# this routine adds the decrement value in d0 to the address register	#
12625# specified by d1. A0/A1/A6/A7 reside on the stack. The rest reside	#
12626# in their original places.						#
12627#	For a7, if the decrement amount is one, then we have to		#
12628# decrement by two. For any a7 update, set the mda7_flag so that if	#
12629# an access error exception occurs later in emulation, this address	#
12630# register update can be undone.					#
12631#									#
12632#########################################################################
12633
12634	global		dec_areg
12635dec_areg:
12636	mov.w		(tbl_dareg.b,%pc,%d1.w*2),%d1
12637	jmp		(tbl_dareg.b,%pc,%d1.w*1)
12638
12639tbl_dareg:
12640	short		dareg0 - tbl_dareg
12641	short		dareg1 - tbl_dareg
12642	short		dareg2 - tbl_dareg
12643	short		dareg3 - tbl_dareg
12644	short		dareg4 - tbl_dareg
12645	short		dareg5 - tbl_dareg
12646	short		dareg6 - tbl_dareg
12647	short		dareg7 - tbl_dareg
12648
12649dareg0:	sub.l		%d0,EXC_DREGS+0x8(%a6)
12650	rts
12651dareg1:	sub.l		%d0,EXC_DREGS+0xc(%a6)
12652	rts
12653dareg2:	sub.l		%d0,%a2
12654	rts
12655dareg3:	sub.l		%d0,%a3
12656	rts
12657dareg4:	sub.l		%d0,%a4
12658	rts
12659dareg5:	sub.l		%d0,%a5
12660	rts
12661dareg6:	sub.l		%d0,(%a6)
12662	rts
12663dareg7:	mov.b		&mda7_flg,SPCOND_FLG(%a6)
12664	cmpi.b		%d0,&0x1
12665	beq.b		dareg7b
12666	sub.l		%d0,EXC_A7(%a6)
12667	rts
12668dareg7b:
12669	subq.l		&0x2,EXC_A7(%a6)
12670	rts
12671
12672##############################################################################
12673
12674#########################################################################
12675# XDEF ****************************************************************	#
12676#	load_fpn1(): load FP register value into FP_SRC(a6).		#
12677#									#
12678# XREF ****************************************************************	#
12679#	None								#
12680#									#
12681# INPUT ***************************************************************	#
12682#	d0 = index of FP register to load				#
12683#									#
12684# OUTPUT **************************************************************	#
12685#	FP_SRC(a6) = value loaded from FP register file			#
12686#									#
12687# ALGORITHM ***********************************************************	#
12688#	Using the index in d0, load FP_SRC(a6) with a number from the	#
12689# FP register file.							#
12690#									#
12691#########################################################################
12692
12693	global		load_fpn1
12694load_fpn1:
12695	mov.w		(tbl_load_fpn1.b,%pc,%d0.w*2), %d0
12696	jmp		(tbl_load_fpn1.b,%pc,%d0.w*1)
12697
12698tbl_load_fpn1:
12699	short		load_fpn1_0 - tbl_load_fpn1
12700	short		load_fpn1_1 - tbl_load_fpn1
12701	short		load_fpn1_2 - tbl_load_fpn1
12702	short		load_fpn1_3 - tbl_load_fpn1
12703	short		load_fpn1_4 - tbl_load_fpn1
12704	short		load_fpn1_5 - tbl_load_fpn1
12705	short		load_fpn1_6 - tbl_load_fpn1
12706	short		load_fpn1_7 - tbl_load_fpn1
12707
12708load_fpn1_0:
12709	mov.l		0+EXC_FP0(%a6), 0+FP_SRC(%a6)
12710	mov.l		4+EXC_FP0(%a6), 4+FP_SRC(%a6)
12711	mov.l		8+EXC_FP0(%a6), 8+FP_SRC(%a6)
12712	lea		FP_SRC(%a6), %a0
12713	rts
12714load_fpn1_1:
12715	mov.l		0+EXC_FP1(%a6), 0+FP_SRC(%a6)
12716	mov.l		4+EXC_FP1(%a6), 4+FP_SRC(%a6)
12717	mov.l		8+EXC_FP1(%a6), 8+FP_SRC(%a6)
12718	lea		FP_SRC(%a6), %a0
12719	rts
12720load_fpn1_2:
12721	fmovm.x		&0x20, FP_SRC(%a6)
12722	lea		FP_SRC(%a6), %a0
12723	rts
12724load_fpn1_3:
12725	fmovm.x		&0x10, FP_SRC(%a6)
12726	lea		FP_SRC(%a6), %a0
12727	rts
12728load_fpn1_4:
12729	fmovm.x		&0x08, FP_SRC(%a6)
12730	lea		FP_SRC(%a6), %a0
12731	rts
12732load_fpn1_5:
12733	fmovm.x		&0x04, FP_SRC(%a6)
12734	lea		FP_SRC(%a6), %a0
12735	rts
12736load_fpn1_6:
12737	fmovm.x		&0x02, FP_SRC(%a6)
12738	lea		FP_SRC(%a6), %a0
12739	rts
12740load_fpn1_7:
12741	fmovm.x		&0x01, FP_SRC(%a6)
12742	lea		FP_SRC(%a6), %a0
12743	rts
12744
12745#############################################################################
12746
12747#########################################################################
12748# XDEF ****************************************************************	#
12749#	load_fpn2(): load FP register value into FP_DST(a6).		#
12750#									#
12751# XREF ****************************************************************	#
12752#	None								#
12753#									#
12754# INPUT ***************************************************************	#
12755#	d0 = index of FP register to load				#
12756#									#
12757# OUTPUT **************************************************************	#
12758#	FP_DST(a6) = value loaded from FP register file			#
12759#									#
12760# ALGORITHM ***********************************************************	#
12761#	Using the index in d0, load FP_DST(a6) with a number from the	#
12762# FP register file.							#
12763#									#
12764#########################################################################
12765
12766	global		load_fpn2
12767load_fpn2:
12768	mov.w		(tbl_load_fpn2.b,%pc,%d0.w*2), %d0
12769	jmp		(tbl_load_fpn2.b,%pc,%d0.w*1)
12770
12771tbl_load_fpn2:
12772	short		load_fpn2_0 - tbl_load_fpn2
12773	short		load_fpn2_1 - tbl_load_fpn2
12774	short		load_fpn2_2 - tbl_load_fpn2
12775	short		load_fpn2_3 - tbl_load_fpn2
12776	short		load_fpn2_4 - tbl_load_fpn2
12777	short		load_fpn2_5 - tbl_load_fpn2
12778	short		load_fpn2_6 - tbl_load_fpn2
12779	short		load_fpn2_7 - tbl_load_fpn2
12780
12781load_fpn2_0:
12782	mov.l		0+EXC_FP0(%a6), 0+FP_DST(%a6)
12783	mov.l		4+EXC_FP0(%a6), 4+FP_DST(%a6)
12784	mov.l		8+EXC_FP0(%a6), 8+FP_DST(%a6)
12785	lea		FP_DST(%a6), %a0
12786	rts
12787load_fpn2_1:
12788	mov.l		0+EXC_FP1(%a6), 0+FP_DST(%a6)
12789	mov.l		4+EXC_FP1(%a6), 4+FP_DST(%a6)
12790	mov.l		8+EXC_FP1(%a6), 8+FP_DST(%a6)
12791	lea		FP_DST(%a6), %a0
12792	rts
12793load_fpn2_2:
12794	fmovm.x		&0x20, FP_DST(%a6)
12795	lea		FP_DST(%a6), %a0
12796	rts
12797load_fpn2_3:
12798	fmovm.x		&0x10, FP_DST(%a6)
12799	lea		FP_DST(%a6), %a0
12800	rts
12801load_fpn2_4:
12802	fmovm.x		&0x08, FP_DST(%a6)
12803	lea		FP_DST(%a6), %a0
12804	rts
12805load_fpn2_5:
12806	fmovm.x		&0x04, FP_DST(%a6)
12807	lea		FP_DST(%a6), %a0
12808	rts
12809load_fpn2_6:
12810	fmovm.x		&0x02, FP_DST(%a6)
12811	lea		FP_DST(%a6), %a0
12812	rts
12813load_fpn2_7:
12814	fmovm.x		&0x01, FP_DST(%a6)
12815	lea		FP_DST(%a6), %a0
12816	rts
12817
12818#############################################################################
12819
12820#########################################################################
12821# XDEF ****************************************************************	#
12822#	store_fpreg(): store an fp value to the fpreg designated d0.	#
12823#									#
12824# XREF ****************************************************************	#
12825#	None								#
12826#									#
12827# INPUT ***************************************************************	#
12828#	fp0 = extended precision value to store				#
12829#	d0  = index of floating-point register				#
12830#									#
12831# OUTPUT **************************************************************	#
12832#	None								#
12833#									#
12834# ALGORITHM ***********************************************************	#
12835#	Store the value in fp0 to the FP register designated by the	#
12836# value in d0. The FP number can be DENORM or SNAN so we have to be	#
12837# careful that we don't take an exception here.				#
12838#									#
12839#########################################################################
12840
12841	global		store_fpreg
12842store_fpreg:
12843	mov.w		(tbl_store_fpreg.b,%pc,%d0.w*2), %d0
12844	jmp		(tbl_store_fpreg.b,%pc,%d0.w*1)
12845
12846tbl_store_fpreg:
12847	short		store_fpreg_0 - tbl_store_fpreg
12848	short		store_fpreg_1 - tbl_store_fpreg
12849	short		store_fpreg_2 - tbl_store_fpreg
12850	short		store_fpreg_3 - tbl_store_fpreg
12851	short		store_fpreg_4 - tbl_store_fpreg
12852	short		store_fpreg_5 - tbl_store_fpreg
12853	short		store_fpreg_6 - tbl_store_fpreg
12854	short		store_fpreg_7 - tbl_store_fpreg
12855
12856store_fpreg_0:
12857	fmovm.x		&0x80, EXC_FP0(%a6)
12858	rts
12859store_fpreg_1:
12860	fmovm.x		&0x80, EXC_FP1(%a6)
12861	rts
12862store_fpreg_2:
12863	fmovm.x		&0x01, -(%sp)
12864	fmovm.x		(%sp)+, &0x20
12865	rts
12866store_fpreg_3:
12867	fmovm.x		&0x01, -(%sp)
12868	fmovm.x		(%sp)+, &0x10
12869	rts
12870store_fpreg_4:
12871	fmovm.x		&0x01, -(%sp)
12872	fmovm.x		(%sp)+, &0x08
12873	rts
12874store_fpreg_5:
12875	fmovm.x		&0x01, -(%sp)
12876	fmovm.x		(%sp)+, &0x04
12877	rts
12878store_fpreg_6:
12879	fmovm.x		&0x01, -(%sp)
12880	fmovm.x		(%sp)+, &0x02
12881	rts
12882store_fpreg_7:
12883	fmovm.x		&0x01, -(%sp)
12884	fmovm.x		(%sp)+, &0x01
12885	rts
12886
12887#########################################################################
12888# XDEF ****************************************************************	#
12889#	get_packed(): fetch a packed operand from memory and then	#
12890#		      convert it to a floating-point binary number.	#
12891#									#
12892# XREF ****************************************************************	#
12893#	_dcalc_ea() - calculate the correct <ea>			#
12894#	_mem_read() - fetch the packed operand from memory		#
12895#	facc_in_x() - the fetch failed so jump to special exit code	#
12896#	decbin()    - convert packed to binary extended precision	#
12897#									#
12898# INPUT ***************************************************************	#
12899#	None								#
12900#									#
12901# OUTPUT **************************************************************	#
12902#	If no failure on _mem_read():					#
12903#	FP_SRC(a6) = packed operand now as a binary FP number		#
12904#									#
12905# ALGORITHM ***********************************************************	#
12906#	Get the correct <ea> which is the value on the exception stack	#
12907# frame w/ maybe a correction factor if the <ea> is -(an) or (an)+.	#
12908# Then, fetch the operand from memory. If the fetch fails, exit		#
12909# through facc_in_x().							#
12910#	If the packed operand is a ZERO,NAN, or INF, convert it to	#
12911# its binary representation here. Else, call decbin() which will	#
12912# convert the packed value to an extended precision binary value.	#
12913#									#
12914#########################################################################
12915
12916# the stacked <ea> for packed is correct except for -(An).
12917# the base reg must be updated for both -(An) and (An)+.
12918	global		get_packed
12919get_packed:
12920	mov.l		&0xc,%d0		# packed is 12 bytes
12921	bsr.l		_dcalc_ea		# fetch <ea>; correct An
12922
12923	lea		FP_SRC(%a6),%a1		# pass: ptr to super dst
12924	mov.l		&0xc,%d0		# pass: 12 bytes
12925	bsr.l		_dmem_read		# read packed operand
12926
12927	tst.l		%d1			# did dfetch fail?
12928	bne.l		facc_in_x		# yes
12929
12930# The packed operand is an INF or a NAN if the exponent field is all ones.
12931	bfextu		FP_SRC(%a6){&1:&15},%d0	# get exp
12932	cmpi.w		%d0,&0x7fff		# INF or NAN?
12933	bne.b		gp_try_zero		# no
12934	rts					# operand is an INF or NAN
12935
12936# The packed operand is a zero if the mantissa is all zero, else it's
12937# a normal packed op.
12938gp_try_zero:
12939	mov.b		3+FP_SRC(%a6),%d0	# get byte 4
12940	andi.b		&0x0f,%d0		# clear all but last nybble
12941	bne.b		gp_not_spec		# not a zero
12942	tst.l		FP_SRC_HI(%a6)		# is lw 2 zero?
12943	bne.b		gp_not_spec		# not a zero
12944	tst.l		FP_SRC_LO(%a6)		# is lw 3 zero?
12945	bne.b		gp_not_spec		# not a zero
12946	rts					# operand is a ZERO
12947gp_not_spec:
12948	lea		FP_SRC(%a6),%a0		# pass: ptr to packed op
12949	bsr.l		decbin			# convert to extended
12950	fmovm.x		&0x80,FP_SRC(%a6)	# make this the srcop
12951	rts
12952
12953#########################################################################
12954# decbin(): Converts normalized packed bcd value pointed to by register	#
12955#	    a0 to extended-precision value in fp0.			#
12956#									#
12957# INPUT ***************************************************************	#
12958#	a0 = pointer to normalized packed bcd value			#
12959#									#
12960# OUTPUT **************************************************************	#
12961#	fp0 = exact fp representation of the packed bcd value.		#
12962#									#
12963# ALGORITHM ***********************************************************	#
12964#	Expected is a normal bcd (i.e. non-exceptional; all inf, zero,	#
12965#	and NaN operands are dispatched without entering this routine)	#
12966#	value in 68881/882 format at location (a0).			#
12967#									#
12968#	A1. Convert the bcd exponent to binary by successive adds and	#
12969#	muls. Set the sign according to SE. Subtract 16 to compensate	#
12970#	for the mantissa which is to be interpreted as 17 integer	#
12971#	digits, rather than 1 integer and 16 fraction digits.		#
12972#	Note: this operation can never overflow.			#
12973#									#
12974#	A2. Convert the bcd mantissa to binary by successive		#
12975#	adds and muls in FP0. Set the sign according to SM.		#
12976#	The mantissa digits will be converted with the decimal point	#
12977#	assumed following the least-significant digit.			#
12978#	Note: this operation can never overflow.			#
12979#									#
12980#	A3. Count the number of leading/trailing zeros in the		#
12981#	bcd string.  If SE is positive, count the leading zeros;	#
12982#	if negative, count the trailing zeros.  Set the adjusted	#
12983#	exponent equal to the exponent from A1 and the zero count	#
12984#	added if SM = 1 and subtracted if SM = 0.  Scale the		#
12985#	mantissa the equivalent of forcing in the bcd value:		#
12986#									#
12987#	SM = 0	a non-zero digit in the integer position		#
12988#	SM = 1	a non-zero digit in Mant0, lsd of the fraction		#
12989#									#
12990#	this will insure that any value, regardless of its		#
12991#	representation (ex. 0.1E2, 1E1, 10E0, 100E-1), is converted	#
12992#	consistently.							#
12993#									#
12994#	A4. Calculate the factor 10^exp in FP1 using a table of		#
12995#	10^(2^n) values.  To reduce the error in forming factors	#
12996#	greater than 10^27, a directed rounding scheme is used with	#
12997#	tables rounded to RN, RM, and RP, according to the table	#
12998#	in the comments of the pwrten section.				#
12999#									#
13000#	A5. Form the final binary number by scaling the mantissa by	#
13001#	the exponent factor.  This is done by multiplying the		#
13002#	mantissa in FP0 by the factor in FP1 if the adjusted		#
13003#	exponent sign is positive, and dividing FP0 by FP1 if		#
13004#	it is negative.							#
13005#									#
13006#	Clean up and return. Check if the final mul or div was inexact.	#
13007#	If so, set INEX1 in USER_FPSR.					#
13008#									#
13009#########################################################################
13010
13011#
13012#	PTENRN, PTENRM, and PTENRP are arrays of powers of 10 rounded
13013#	to nearest, minus, and plus, respectively.  The tables include
13014#	10**{1,2,4,8,16,32,64,128,256,512,1024,2048,4096}.  No rounding
13015#	is required until the power is greater than 27, however, all
13016#	tables include the first 5 for ease of indexing.
13017#
13018RTABLE:
13019	byte		0,0,0,0
13020	byte		2,3,2,3
13021	byte		2,3,3,2
13022	byte		3,2,2,3
13023
13024	set		FNIBS,7
13025	set		FSTRT,0
13026
13027	set		ESTRT,4
13028	set		EDIGITS,2
13029
13030	global		decbin
13031decbin:
13032	mov.l		0x0(%a0),FP_SCR0_EX(%a6) # make a copy of input
13033	mov.l		0x4(%a0),FP_SCR0_HI(%a6) # so we don't alter it
13034	mov.l		0x8(%a0),FP_SCR0_LO(%a6)
13035
13036	lea		FP_SCR0(%a6),%a0
13037
13038	movm.l		&0x3c00,-(%sp)		# save d2-d5
13039	fmovm.x		&0x1,-(%sp)		# save fp1
13040#
13041# Calculate exponent:
13042#  1. Copy bcd value in memory for use as a working copy.
13043#  2. Calculate absolute value of exponent in d1 by mul and add.
13044#  3. Correct for exponent sign.
13045#  4. Subtract 16 to compensate for interpreting the mant as all integer digits.
13046#     (i.e., all digits assumed left of the decimal point.)
13047#
13048# Register usage:
13049#
13050#  calc_e:
13051#	(*)  d0: temp digit storage
13052#	(*)  d1: accumulator for binary exponent
13053#	(*)  d2: digit count
13054#	(*)  d3: offset pointer
13055#	( )  d4: first word of bcd
13056#	( )  a0: pointer to working bcd value
13057#	( )  a6: pointer to original bcd value
13058#	(*)  FP_SCR1: working copy of original bcd value
13059#	(*)  L_SCR1: copy of original exponent word
13060#
13061calc_e:
13062	mov.l		&EDIGITS,%d2		# # of nibbles (digits) in fraction part
13063	mov.l		&ESTRT,%d3		# counter to pick up digits
13064	mov.l		(%a0),%d4		# get first word of bcd
13065	clr.l		%d1			# zero d1 for accumulator
13066e_gd:
13067	mulu.l		&0xa,%d1		# mul partial product by one digit place
13068	bfextu		%d4{%d3:&4},%d0		# get the digit and zero extend into d0
13069	add.l		%d0,%d1			# d1 = d1 + d0
13070	addq.b		&4,%d3			# advance d3 to the next digit
13071	dbf.w		%d2,e_gd		# if we have used all 3 digits, exit loop
13072	btst		&30,%d4			# get SE
13073	beq.b		e_pos			# don't negate if pos
13074	neg.l		%d1			# negate before subtracting
13075e_pos:
13076	sub.l		&16,%d1			# sub to compensate for shift of mant
13077	bge.b		e_save			# if still pos, do not neg
13078	neg.l		%d1			# now negative, make pos and set SE
13079	or.l		&0x40000000,%d4		# set SE in d4,
13080	or.l		&0x40000000,(%a0)	# and in working bcd
13081e_save:
13082	mov.l		%d1,-(%sp)		# save exp on stack
13083#
13084#
13085# Calculate mantissa:
13086#  1. Calculate absolute value of mantissa in fp0 by mul and add.
13087#  2. Correct for mantissa sign.
13088#     (i.e., all digits assumed left of the decimal point.)
13089#
13090# Register usage:
13091#
13092#  calc_m:
13093#	(*)  d0: temp digit storage
13094#	(*)  d1: lword counter
13095#	(*)  d2: digit count
13096#	(*)  d3: offset pointer
13097#	( )  d4: words 2 and 3 of bcd
13098#	( )  a0: pointer to working bcd value
13099#	( )  a6: pointer to original bcd value
13100#	(*) fp0: mantissa accumulator
13101#	( )  FP_SCR1: working copy of original bcd value
13102#	( )  L_SCR1: copy of original exponent word
13103#
13104calc_m:
13105	mov.l		&1,%d1			# word counter, init to 1
13106	fmov.s		&0x00000000,%fp0	# accumulator
13107#
13108#
13109#  Since the packed number has a long word between the first & second parts,
13110#  get the integer digit then skip down & get the rest of the
13111#  mantissa.  We will unroll the loop once.
13112#
13113	bfextu		(%a0){&28:&4},%d0	# integer part is ls digit in long word
13114	fadd.b		%d0,%fp0		# add digit to sum in fp0
13115#
13116#
13117#  Get the rest of the mantissa.
13118#
13119loadlw:
13120	mov.l		(%a0,%d1.L*4),%d4	# load mantissa lonqword into d4
13121	mov.l		&FSTRT,%d3		# counter to pick up digits
13122	mov.l		&FNIBS,%d2		# reset number of digits per a0 ptr
13123md2b:
13124	fmul.s		&0x41200000,%fp0	# fp0 = fp0 * 10
13125	bfextu		%d4{%d3:&4},%d0		# get the digit and zero extend
13126	fadd.b		%d0,%fp0		# fp0 = fp0 + digit
13127#
13128#
13129#  If all the digits (8) in that long word have been converted (d2=0),
13130#  then inc d1 (=2) to point to the next long word and reset d3 to 0
13131#  to initialize the digit offset, and set d2 to 7 for the digit count;
13132#  else continue with this long word.
13133#
13134	addq.b		&4,%d3			# advance d3 to the next digit
13135	dbf.w		%d2,md2b		# check for last digit in this lw
13136nextlw:
13137	addq.l		&1,%d1			# inc lw pointer in mantissa
13138	cmp.l		%d1,&2			# test for last lw
13139	ble.b		loadlw			# if not, get last one
13140#
13141#  Check the sign of the mant and make the value in fp0 the same sign.
13142#
13143m_sign:
13144	btst		&31,(%a0)		# test sign of the mantissa
13145	beq.b		ap_st_z			# if clear, go to append/strip zeros
13146	fneg.x		%fp0			# if set, negate fp0
13147#
13148# Append/strip zeros:
13149#
13150#  For adjusted exponents which have an absolute value greater than 27*,
13151#  this routine calculates the amount needed to normalize the mantissa
13152#  for the adjusted exponent.  That number is subtracted from the exp
13153#  if the exp was positive, and added if it was negative.  The purpose
13154#  of this is to reduce the value of the exponent and the possibility
13155#  of error in calculation of pwrten.
13156#
13157#  1. Branch on the sign of the adjusted exponent.
13158#  2p.(positive exp)
13159#   2. Check M16 and the digits in lwords 2 and 3 in descending order.
13160#   3. Add one for each zero encountered until a non-zero digit.
13161#   4. Subtract the count from the exp.
13162#   5. Check if the exp has crossed zero in #3 above; make the exp abs
13163#	   and set SE.
13164#	6. Multiply the mantissa by 10**count.
13165#  2n.(negative exp)
13166#   2. Check the digits in lwords 3 and 2 in descending order.
13167#   3. Add one for each zero encountered until a non-zero digit.
13168#   4. Add the count to the exp.
13169#   5. Check if the exp has crossed zero in #3 above; clear SE.
13170#   6. Divide the mantissa by 10**count.
13171#
13172#  *Why 27?  If the adjusted exponent is within -28 < expA < 28, than
13173#   any adjustment due to append/strip zeros will drive the resultane
13174#   exponent towards zero.  Since all pwrten constants with a power
13175#   of 27 or less are exact, there is no need to use this routine to
13176#   attempt to lessen the resultant exponent.
13177#
13178# Register usage:
13179#
13180#  ap_st_z:
13181#	(*)  d0: temp digit storage
13182#	(*)  d1: zero count
13183#	(*)  d2: digit count
13184#	(*)  d3: offset pointer
13185#	( )  d4: first word of bcd
13186#	(*)  d5: lword counter
13187#	( )  a0: pointer to working bcd value
13188#	( )  FP_SCR1: working copy of original bcd value
13189#	( )  L_SCR1: copy of original exponent word
13190#
13191#
13192# First check the absolute value of the exponent to see if this
13193# routine is necessary.  If so, then check the sign of the exponent
13194# and do append (+) or strip (-) zeros accordingly.
13195# This section handles a positive adjusted exponent.
13196#
13197ap_st_z:
13198	mov.l		(%sp),%d1		# load expA for range test
13199	cmp.l		%d1,&27			# test is with 27
13200	ble.w		pwrten			# if abs(expA) <28, skip ap/st zeros
13201	btst		&30,(%a0)		# check sign of exp
13202	bne.b		ap_st_n			# if neg, go to neg side
13203	clr.l		%d1			# zero count reg
13204	mov.l		(%a0),%d4		# load lword 1 to d4
13205	bfextu		%d4{&28:&4},%d0		# get M16 in d0
13206	bne.b		ap_p_fx			# if M16 is non-zero, go fix exp
13207	addq.l		&1,%d1			# inc zero count
13208	mov.l		&1,%d5			# init lword counter
13209	mov.l		(%a0,%d5.L*4),%d4	# get lword 2 to d4
13210	bne.b		ap_p_cl			# if lw 2 is zero, skip it
13211	addq.l		&8,%d1			# and inc count by 8
13212	addq.l		&1,%d5			# inc lword counter
13213	mov.l		(%a0,%d5.L*4),%d4	# get lword 3 to d4
13214ap_p_cl:
13215	clr.l		%d3			# init offset reg
13216	mov.l		&7,%d2			# init digit counter
13217ap_p_gd:
13218	bfextu		%d4{%d3:&4},%d0		# get digit
13219	bne.b		ap_p_fx			# if non-zero, go to fix exp
13220	addq.l		&4,%d3			# point to next digit
13221	addq.l		&1,%d1			# inc digit counter
13222	dbf.w		%d2,ap_p_gd		# get next digit
13223ap_p_fx:
13224	mov.l		%d1,%d0			# copy counter to d2
13225	mov.l		(%sp),%d1		# get adjusted exp from memory
13226	sub.l		%d0,%d1			# subtract count from exp
13227	bge.b		ap_p_fm			# if still pos, go to pwrten
13228	neg.l		%d1			# now its neg; get abs
13229	mov.l		(%a0),%d4		# load lword 1 to d4
13230	or.l		&0x40000000,%d4		# and set SE in d4
13231	or.l		&0x40000000,(%a0)	# and in memory
13232#
13233# Calculate the mantissa multiplier to compensate for the striping of
13234# zeros from the mantissa.
13235#
13236ap_p_fm:
13237	lea.l		PTENRN(%pc),%a1		# get address of power-of-ten table
13238	clr.l		%d3			# init table index
13239	fmov.s		&0x3f800000,%fp1	# init fp1 to 1
13240	mov.l		&3,%d2			# init d2 to count bits in counter
13241ap_p_el:
13242	asr.l		&1,%d0			# shift lsb into carry
13243	bcc.b		ap_p_en			# if 1, mul fp1 by pwrten factor
13244	fmul.x		(%a1,%d3),%fp1		# mul by 10**(d3_bit_no)
13245ap_p_en:
13246	add.l		&12,%d3			# inc d3 to next rtable entry
13247	tst.l		%d0			# check if d0 is zero
13248	bne.b		ap_p_el			# if not, get next bit
13249	fmul.x		%fp1,%fp0		# mul mantissa by 10**(no_bits_shifted)
13250	bra.b		pwrten			# go calc pwrten
13251#
13252# This section handles a negative adjusted exponent.
13253#
13254ap_st_n:
13255	clr.l		%d1			# clr counter
13256	mov.l		&2,%d5			# set up d5 to point to lword 3
13257	mov.l		(%a0,%d5.L*4),%d4	# get lword 3
13258	bne.b		ap_n_cl			# if not zero, check digits
13259	sub.l		&1,%d5			# dec d5 to point to lword 2
13260	addq.l		&8,%d1			# inc counter by 8
13261	mov.l		(%a0,%d5.L*4),%d4	# get lword 2
13262ap_n_cl:
13263	mov.l		&28,%d3			# point to last digit
13264	mov.l		&7,%d2			# init digit counter
13265ap_n_gd:
13266	bfextu		%d4{%d3:&4},%d0		# get digit
13267	bne.b		ap_n_fx			# if non-zero, go to exp fix
13268	subq.l		&4,%d3			# point to previous digit
13269	addq.l		&1,%d1			# inc digit counter
13270	dbf.w		%d2,ap_n_gd		# get next digit
13271ap_n_fx:
13272	mov.l		%d1,%d0			# copy counter to d0
13273	mov.l		(%sp),%d1		# get adjusted exp from memory
13274	sub.l		%d0,%d1			# subtract count from exp
13275	bgt.b		ap_n_fm			# if still pos, go fix mantissa
13276	neg.l		%d1			# take abs of exp and clr SE
13277	mov.l		(%a0),%d4		# load lword 1 to d4
13278	and.l		&0xbfffffff,%d4		# and clr SE in d4
13279	and.l		&0xbfffffff,(%a0)	# and in memory
13280#
13281# Calculate the mantissa multiplier to compensate for the appending of
13282# zeros to the mantissa.
13283#
13284ap_n_fm:
13285	lea.l		PTENRN(%pc),%a1		# get address of power-of-ten table
13286	clr.l		%d3			# init table index
13287	fmov.s		&0x3f800000,%fp1	# init fp1 to 1
13288	mov.l		&3,%d2			# init d2 to count bits in counter
13289ap_n_el:
13290	asr.l		&1,%d0			# shift lsb into carry
13291	bcc.b		ap_n_en			# if 1, mul fp1 by pwrten factor
13292	fmul.x		(%a1,%d3),%fp1		# mul by 10**(d3_bit_no)
13293ap_n_en:
13294	add.l		&12,%d3			# inc d3 to next rtable entry
13295	tst.l		%d0			# check if d0 is zero
13296	bne.b		ap_n_el			# if not, get next bit
13297	fdiv.x		%fp1,%fp0		# div mantissa by 10**(no_bits_shifted)
13298#
13299#
13300# Calculate power-of-ten factor from adjusted and shifted exponent.
13301#
13302# Register usage:
13303#
13304#  pwrten:
13305#	(*)  d0: temp
13306#	( )  d1: exponent
13307#	(*)  d2: {FPCR[6:5],SM,SE} as index in RTABLE; temp
13308#	(*)  d3: FPCR work copy
13309#	( )  d4: first word of bcd
13310#	(*)  a1: RTABLE pointer
13311#  calc_p:
13312#	(*)  d0: temp
13313#	( )  d1: exponent
13314#	(*)  d3: PWRTxx table index
13315#	( )  a0: pointer to working copy of bcd
13316#	(*)  a1: PWRTxx pointer
13317#	(*) fp1: power-of-ten accumulator
13318#
13319# Pwrten calculates the exponent factor in the selected rounding mode
13320# according to the following table:
13321#
13322#	Sign of Mant  Sign of Exp  Rounding Mode  PWRTEN Rounding Mode
13323#
13324#	ANY	  ANY	RN	RN
13325#
13326#	 +	   +	RP	RP
13327#	 -	   +	RP	RM
13328#	 +	   -	RP	RM
13329#	 -	   -	RP	RP
13330#
13331#	 +	   +	RM	RM
13332#	 -	   +	RM	RP
13333#	 +	   -	RM	RP
13334#	 -	   -	RM	RM
13335#
13336#	 +	   +	RZ	RM
13337#	 -	   +	RZ	RM
13338#	 +	   -	RZ	RP
13339#	 -	   -	RZ	RP
13340#
13341#
13342pwrten:
13343	mov.l		USER_FPCR(%a6),%d3	# get user's FPCR
13344	bfextu		%d3{&26:&2},%d2		# isolate rounding mode bits
13345	mov.l		(%a0),%d4		# reload 1st bcd word to d4
13346	asl.l		&2,%d2			# format d2 to be
13347	bfextu		%d4{&0:&2},%d0		# {FPCR[6],FPCR[5],SM,SE}
13348	add.l		%d0,%d2			# in d2 as index into RTABLE
13349	lea.l		RTABLE(%pc),%a1		# load rtable base
13350	mov.b		(%a1,%d2),%d0		# load new rounding bits from table
13351	clr.l		%d3			# clear d3 to force no exc and extended
13352	bfins		%d0,%d3{&26:&2}		# stuff new rounding bits in FPCR
13353	fmov.l		%d3,%fpcr		# write new FPCR
13354	asr.l		&1,%d0			# write correct PTENxx table
13355	bcc.b		not_rp			# to a1
13356	lea.l		PTENRP(%pc),%a1		# it is RP
13357	bra.b		calc_p			# go to init section
13358not_rp:
13359	asr.l		&1,%d0			# keep checking
13360	bcc.b		not_rm
13361	lea.l		PTENRM(%pc),%a1		# it is RM
13362	bra.b		calc_p			# go to init section
13363not_rm:
13364	lea.l		PTENRN(%pc),%a1		# it is RN
13365calc_p:
13366	mov.l		%d1,%d0			# copy exp to d0;use d0
13367	bpl.b		no_neg			# if exp is negative,
13368	neg.l		%d0			# invert it
13369	or.l		&0x40000000,(%a0)	# and set SE bit
13370no_neg:
13371	clr.l		%d3			# table index
13372	fmov.s		&0x3f800000,%fp1	# init fp1 to 1
13373e_loop:
13374	asr.l		&1,%d0			# shift next bit into carry
13375	bcc.b		e_next			# if zero, skip the mul
13376	fmul.x		(%a1,%d3),%fp1		# mul by 10**(d3_bit_no)
13377e_next:
13378	add.l		&12,%d3			# inc d3 to next rtable entry
13379	tst.l		%d0			# check if d0 is zero
13380	bne.b		e_loop			# not zero, continue shifting
13381#
13382#
13383#  Check the sign of the adjusted exp and make the value in fp0 the
13384#  same sign. If the exp was pos then multiply fp1*fp0;
13385#  else divide fp0/fp1.
13386#
13387# Register Usage:
13388#  norm:
13389#	( )  a0: pointer to working bcd value
13390#	(*) fp0: mantissa accumulator
13391#	( ) fp1: scaling factor - 10**(abs(exp))
13392#
13393pnorm:
13394	btst		&30,(%a0)		# test the sign of the exponent
13395	beq.b		mul			# if clear, go to multiply
13396div:
13397	fdiv.x		%fp1,%fp0		# exp is negative, so divide mant by exp
13398	bra.b		end_dec
13399mul:
13400	fmul.x		%fp1,%fp0		# exp is positive, so multiply by exp
13401#
13402#
13403# Clean up and return with result in fp0.
13404#
13405# If the final mul/div in decbin incurred an inex exception,
13406# it will be inex2, but will be reported as inex1 by get_op.
13407#
13408end_dec:
13409	fmov.l		%fpsr,%d0		# get status register
13410	bclr		&inex2_bit+8,%d0	# test for inex2 and clear it
13411	beq.b		no_exc			# skip this if no exc
13412	ori.w		&inx1a_mask,2+USER_FPSR(%a6) # set INEX1/AINEX
13413no_exc:
13414	add.l		&0x4,%sp		# clear 1 lw param
13415	fmovm.x		(%sp)+,&0x40		# restore fp1
13416	movm.l		(%sp)+,&0x3c		# restore d2-d5
13417	fmov.l		&0x0,%fpcr
13418	fmov.l		&0x0,%fpsr
13419	rts
13420
13421#########################################################################
13422# bindec(): Converts an input in extended precision format to bcd format#
13423#									#
13424# INPUT ***************************************************************	#
13425#	a0 = pointer to the input extended precision value in memory.	#
13426#	     the input may be either normalized, unnormalized, or	#
13427#	     denormalized.						#
13428#	d0 = contains the k-factor sign-extended to 32-bits.		#
13429#									#
13430# OUTPUT **************************************************************	#
13431#	FP_SCR0(a6) = bcd format result on the stack.			#
13432#									#
13433# ALGORITHM ***********************************************************	#
13434#									#
13435#	A1.	Set RM and size ext;  Set SIGMA = sign of input.	#
13436#		The k-factor is saved for use in d7. Clear the		#
13437#		BINDEC_FLG for separating normalized/denormalized	#
13438#		input.  If input is unnormalized or denormalized,	#
13439#		normalize it.						#
13440#									#
13441#	A2.	Set X = abs(input).					#
13442#									#
13443#	A3.	Compute ILOG.						#
13444#		ILOG is the log base 10 of the input value.  It is	#
13445#		approximated by adding e + 0.f when the original	#
13446#		value is viewed as 2^^e * 1.f in extended precision.	#
13447#		This value is stored in d6.				#
13448#									#
13449#	A4.	Clr INEX bit.						#
13450#		The operation in A3 above may have set INEX2.		#
13451#									#
13452#	A5.	Set ICTR = 0;						#
13453#		ICTR is a flag used in A13.  It must be set before the	#
13454#		loop entry A6.						#
13455#									#
13456#	A6.	Calculate LEN.						#
13457#		LEN is the number of digits to be displayed.  The	#
13458#		k-factor can dictate either the total number of digits,	#
13459#		if it is a positive number, or the number of digits	#
13460#		after the decimal point which are to be included as	#
13461#		significant.  See the 68882 manual for examples.	#
13462#		If LEN is computed to be greater than 17, set OPERR in	#
13463#		USER_FPSR.  LEN is stored in d4.			#
13464#									#
13465#	A7.	Calculate SCALE.					#
13466#		SCALE is equal to 10^ISCALE, where ISCALE is the number	#
13467#		of decimal places needed to insure LEN integer digits	#
13468#		in the output before conversion to bcd. LAMBDA is the	#
13469#		sign of ISCALE, used in A9. Fp1 contains		#
13470#		10^^(abs(ISCALE)) using a rounding mode which is a	#
13471#		function of the original rounding mode and the signs	#
13472#		of ISCALE and X.  A table is given in the code.		#
13473#									#
13474#	A8.	Clr INEX; Force RZ.					#
13475#		The operation in A3 above may have set INEX2.		#
13476#		RZ mode is forced for the scaling operation to insure	#
13477#		only one rounding error.  The grs bits are collected in #
13478#		the INEX flag for use in A10.				#
13479#									#
13480#	A9.	Scale X -> Y.						#
13481#		The mantissa is scaled to the desired number of		#
13482#		significant digits.  The excess digits are collected	#
13483#		in INEX2.						#
13484#									#
13485#	A10.	Or in INEX.						#
13486#		If INEX is set, round error occurred.  This is		#
13487#		compensated for by 'or-ing' in the INEX2 flag to	#
13488#		the lsb of Y.						#
13489#									#
13490#	A11.	Restore original FPCR; set size ext.			#
13491#		Perform FINT operation in the user's rounding mode.	#
13492#		Keep the size to extended.				#
13493#									#
13494#	A12.	Calculate YINT = FINT(Y) according to user's rounding	#
13495#		mode.  The FPSP routine sintd0 is used.  The output	#
13496#		is in fp0.						#
13497#									#
13498#	A13.	Check for LEN digits.					#
13499#		If the int operation results in more than LEN digits,	#
13500#		or less than LEN -1 digits, adjust ILOG and repeat from	#
13501#		A6.  This test occurs only on the first pass.  If the	#
13502#		result is exactly 10^LEN, decrement ILOG and divide	#
13503#		the mantissa by 10.					#
13504#									#
13505#	A14.	Convert the mantissa to bcd.				#
13506#		The binstr routine is used to convert the LEN digit	#
13507#		mantissa to bcd in memory.  The input to binstr is	#
13508#		to be a fraction; i.e. (mantissa)/10^LEN and adjusted	#
13509#		such that the decimal point is to the left of bit 63.	#
13510#		The bcd digits are stored in the correct position in	#
13511#		the final string area in memory.			#
13512#									#
13513#	A15.	Convert the exponent to bcd.				#
13514#		As in A14 above, the exp is converted to bcd and the	#
13515#		digits are stored in the final string.			#
13516#		Test the length of the final exponent string.  If the	#
13517#		length is 4, set operr.					#
13518#									#
13519#	A16.	Write sign bits to final string.			#
13520#									#
13521#########################################################################
13522
13523set	BINDEC_FLG,	EXC_TEMP	# DENORM flag
13524
13525# Constants in extended precision
13526PLOG2:
13527	long		0x3FFD0000,0x9A209A84,0xFBCFF798,0x00000000
13528PLOG2UP1:
13529	long		0x3FFD0000,0x9A209A84,0xFBCFF799,0x00000000
13530
13531# Constants in single precision
13532FONE:
13533	long		0x3F800000,0x00000000,0x00000000,0x00000000
13534FTWO:
13535	long		0x40000000,0x00000000,0x00000000,0x00000000
13536FTEN:
13537	long		0x41200000,0x00000000,0x00000000,0x00000000
13538F4933:
13539	long		0x459A2800,0x00000000,0x00000000,0x00000000
13540
13541RBDTBL:
13542	byte		0,0,0,0
13543	byte		3,3,2,2
13544	byte		3,2,2,3
13545	byte		2,3,3,2
13546
13547#	Implementation Notes:
13548#
13549#	The registers are used as follows:
13550#
13551#		d0: scratch; LEN input to binstr
13552#		d1: scratch
13553#		d2: upper 32-bits of mantissa for binstr
13554#		d3: scratch;lower 32-bits of mantissa for binstr
13555#		d4: LEN
13556#		d5: LAMBDA/ICTR
13557#		d6: ILOG
13558#		d7: k-factor
13559#		a0: ptr for original operand/final result
13560#		a1: scratch pointer
13561#		a2: pointer to FP_X; abs(original value) in ext
13562#		fp0: scratch
13563#		fp1: scratch
13564#		fp2: scratch
13565#		F_SCR1:
13566#		F_SCR2:
13567#		L_SCR1:
13568#		L_SCR2:
13569
13570	global		bindec
13571bindec:
13572	movm.l		&0x3f20,-(%sp)	#  {%d2-%d7/%a2}
13573	fmovm.x		&0x7,-(%sp)	#  {%fp0-%fp2}
13574
13575# A1. Set RM and size ext. Set SIGMA = sign input;
13576#     The k-factor is saved for use in d7.  Clear BINDEC_FLG for
13577#     separating  normalized/denormalized input.  If the input
13578#     is a denormalized number, set the BINDEC_FLG memory word
13579#     to signal denorm.  If the input is unnormalized, normalize
13580#     the input and test for denormalized result.
13581#
13582	fmov.l		&rm_mode*0x10,%fpcr	# set RM and ext
13583	mov.l		(%a0),L_SCR2(%a6)	# save exponent for sign check
13584	mov.l		%d0,%d7		# move k-factor to d7
13585
13586	clr.b		BINDEC_FLG(%a6)	# clr norm/denorm flag
13587	cmpi.b		STAG(%a6),&DENORM # is input a DENORM?
13588	bne.w		A2_str		# no; input is a NORM
13589
13590#
13591# Normalize the denorm
13592#
13593un_de_norm:
13594	mov.w		(%a0),%d0
13595	and.w		&0x7fff,%d0	# strip sign of normalized exp
13596	mov.l		4(%a0),%d1
13597	mov.l		8(%a0),%d2
13598norm_loop:
13599	sub.w		&1,%d0
13600	lsl.l		&1,%d2
13601	roxl.l		&1,%d1
13602	tst.l		%d1
13603	bge.b		norm_loop
13604#
13605# Test if the normalized input is denormalized
13606#
13607	tst.w		%d0
13608	bgt.b		pos_exp		# if greater than zero, it is a norm
13609	st		BINDEC_FLG(%a6)	# set flag for denorm
13610pos_exp:
13611	and.w		&0x7fff,%d0	# strip sign of normalized exp
13612	mov.w		%d0,(%a0)
13613	mov.l		%d1,4(%a0)
13614	mov.l		%d2,8(%a0)
13615
13616# A2. Set X = abs(input).
13617#
13618A2_str:
13619	mov.l		(%a0),FP_SCR1(%a6)	# move input to work space
13620	mov.l		4(%a0),FP_SCR1+4(%a6)	# move input to work space
13621	mov.l		8(%a0),FP_SCR1+8(%a6)	# move input to work space
13622	and.l		&0x7fffffff,FP_SCR1(%a6)	# create abs(X)
13623
13624# A3. Compute ILOG.
13625#     ILOG is the log base 10 of the input value.  It is approx-
13626#     imated by adding e + 0.f when the original value is viewed
13627#     as 2^^e * 1.f in extended precision.  This value is stored
13628#     in d6.
13629#
13630# Register usage:
13631#	Input/Output
13632#	d0: k-factor/exponent
13633#	d2: x/x
13634#	d3: x/x
13635#	d4: x/x
13636#	d5: x/x
13637#	d6: x/ILOG
13638#	d7: k-factor/Unchanged
13639#	a0: ptr for original operand/final result
13640#	a1: x/x
13641#	a2: x/x
13642#	fp0: x/float(ILOG)
13643#	fp1: x/x
13644#	fp2: x/x
13645#	F_SCR1:x/x
13646#	F_SCR2:Abs(X)/Abs(X) with $3fff exponent
13647#	L_SCR1:x/x
13648#	L_SCR2:first word of X packed/Unchanged
13649
13650	tst.b		BINDEC_FLG(%a6)	# check for denorm
13651	beq.b		A3_cont		# if clr, continue with norm
13652	mov.l		&-4933,%d6	# force ILOG = -4933
13653	bra.b		A4_str
13654A3_cont:
13655	mov.w		FP_SCR1(%a6),%d0	# move exp to d0
13656	mov.w		&0x3fff,FP_SCR1(%a6)	# replace exponent with 0x3fff
13657	fmov.x		FP_SCR1(%a6),%fp0	# now fp0 has 1.f
13658	sub.w		&0x3fff,%d0	# strip off bias
13659	fadd.w		%d0,%fp0	# add in exp
13660	fsub.s		FONE(%pc),%fp0	# subtract off 1.0
13661	fbge.w		pos_res		# if pos, branch
13662	fmul.x		PLOG2UP1(%pc),%fp0	# if neg, mul by LOG2UP1
13663	fmov.l		%fp0,%d6	# put ILOG in d6 as a lword
13664	bra.b		A4_str		# go move out ILOG
13665pos_res:
13666	fmul.x		PLOG2(%pc),%fp0	# if pos, mul by LOG2
13667	fmov.l		%fp0,%d6	# put ILOG in d6 as a lword
13668
13669
13670# A4. Clr INEX bit.
13671#     The operation in A3 above may have set INEX2.
13672
13673A4_str:
13674	fmov.l		&0,%fpsr	# zero all of fpsr - nothing needed
13675
13676
13677# A5. Set ICTR = 0;
13678#     ICTR is a flag used in A13.  It must be set before the
13679#     loop entry A6. The lower word of d5 is used for ICTR.
13680
13681	clr.w		%d5		# clear ICTR
13682
13683# A6. Calculate LEN.
13684#     LEN is the number of digits to be displayed.  The k-factor
13685#     can dictate either the total number of digits, if it is
13686#     a positive number, or the number of digits after the
13687#     original decimal point which are to be included as
13688#     significant.  See the 68882 manual for examples.
13689#     If LEN is computed to be greater than 17, set OPERR in
13690#     USER_FPSR.  LEN is stored in d4.
13691#
13692# Register usage:
13693#	Input/Output
13694#	d0: exponent/Unchanged
13695#	d2: x/x/scratch
13696#	d3: x/x
13697#	d4: exc picture/LEN
13698#	d5: ICTR/Unchanged
13699#	d6: ILOG/Unchanged
13700#	d7: k-factor/Unchanged
13701#	a0: ptr for original operand/final result
13702#	a1: x/x
13703#	a2: x/x
13704#	fp0: float(ILOG)/Unchanged
13705#	fp1: x/x
13706#	fp2: x/x
13707#	F_SCR1:x/x
13708#	F_SCR2:Abs(X) with $3fff exponent/Unchanged
13709#	L_SCR1:x/x
13710#	L_SCR2:first word of X packed/Unchanged
13711
13712A6_str:
13713	tst.l		%d7		# branch on sign of k
13714	ble.b		k_neg		# if k <= 0, LEN = ILOG + 1 - k
13715	mov.l		%d7,%d4		# if k > 0, LEN = k
13716	bra.b		len_ck		# skip to LEN check
13717k_neg:
13718	mov.l		%d6,%d4		# first load ILOG to d4
13719	sub.l		%d7,%d4		# subtract off k
13720	addq.l		&1,%d4		# add in the 1
13721len_ck:
13722	tst.l		%d4		# LEN check: branch on sign of LEN
13723	ble.b		LEN_ng		# if neg, set LEN = 1
13724	cmp.l		%d4,&17		# test if LEN > 17
13725	ble.b		A7_str		# if not, forget it
13726	mov.l		&17,%d4		# set max LEN = 17
13727	tst.l		%d7		# if negative, never set OPERR
13728	ble.b		A7_str		# if positive, continue
13729	or.l		&opaop_mask,USER_FPSR(%a6)	# set OPERR & AIOP in USER_FPSR
13730	bra.b		A7_str		# finished here
13731LEN_ng:
13732	mov.l		&1,%d4		# min LEN is 1
13733
13734
13735# A7. Calculate SCALE.
13736#     SCALE is equal to 10^ISCALE, where ISCALE is the number
13737#     of decimal places needed to insure LEN integer digits
13738#     in the output before conversion to bcd. LAMBDA is the sign
13739#     of ISCALE, used in A9.  Fp1 contains 10^^(abs(ISCALE)) using
13740#     the rounding mode as given in the following table (see
13741#     Coonen, p. 7.23 as ref.; however, the SCALE variable is
13742#     of opposite sign in bindec.sa from Coonen).
13743#
13744#	Initial					USE
13745#	FPCR[6:5]	LAMBDA	SIGN(X)		FPCR[6:5]
13746#	----------------------------------------------
13747#	 RN	00	   0	   0		00/0	RN
13748#	 RN	00	   0	   1		00/0	RN
13749#	 RN	00	   1	   0		00/0	RN
13750#	 RN	00	   1	   1		00/0	RN
13751#	 RZ	01	   0	   0		11/3	RP
13752#	 RZ	01	   0	   1		11/3	RP
13753#	 RZ	01	   1	   0		10/2	RM
13754#	 RZ	01	   1	   1		10/2	RM
13755#	 RM	10	   0	   0		11/3	RP
13756#	 RM	10	   0	   1		10/2	RM
13757#	 RM	10	   1	   0		10/2	RM
13758#	 RM	10	   1	   1		11/3	RP
13759#	 RP	11	   0	   0		10/2	RM
13760#	 RP	11	   0	   1		11/3	RP
13761#	 RP	11	   1	   0		11/3	RP
13762#	 RP	11	   1	   1		10/2	RM
13763#
13764# Register usage:
13765#	Input/Output
13766#	d0: exponent/scratch - final is 0
13767#	d2: x/0 or 24 for A9
13768#	d3: x/scratch - offset ptr into PTENRM array
13769#	d4: LEN/Unchanged
13770#	d5: 0/ICTR:LAMBDA
13771#	d6: ILOG/ILOG or k if ((k<=0)&(ILOG<k))
13772#	d7: k-factor/Unchanged
13773#	a0: ptr for original operand/final result
13774#	a1: x/ptr to PTENRM array
13775#	a2: x/x
13776#	fp0: float(ILOG)/Unchanged
13777#	fp1: x/10^ISCALE
13778#	fp2: x/x
13779#	F_SCR1:x/x
13780#	F_SCR2:Abs(X) with $3fff exponent/Unchanged
13781#	L_SCR1:x/x
13782#	L_SCR2:first word of X packed/Unchanged
13783
13784A7_str:
13785	tst.l		%d7		# test sign of k
13786	bgt.b		k_pos		# if pos and > 0, skip this
13787	cmp.l		%d7,%d6		# test k - ILOG
13788	blt.b		k_pos		# if ILOG >= k, skip this
13789	mov.l		%d7,%d6		# if ((k<0) & (ILOG < k)) ILOG = k
13790k_pos:
13791	mov.l		%d6,%d0		# calc ILOG + 1 - LEN in d0
13792	addq.l		&1,%d0		# add the 1
13793	sub.l		%d4,%d0		# sub off LEN
13794	swap		%d5		# use upper word of d5 for LAMBDA
13795	clr.w		%d5		# set it zero initially
13796	clr.w		%d2		# set up d2 for very small case
13797	tst.l		%d0		# test sign of ISCALE
13798	bge.b		iscale		# if pos, skip next inst
13799	addq.w		&1,%d5		# if neg, set LAMBDA true
13800	cmp.l		%d0,&0xffffecd4	# test iscale <= -4908
13801	bgt.b		no_inf		# if false, skip rest
13802	add.l		&24,%d0		# add in 24 to iscale
13803	mov.l		&24,%d2		# put 24 in d2 for A9
13804no_inf:
13805	neg.l		%d0		# and take abs of ISCALE
13806iscale:
13807	fmov.s		FONE(%pc),%fp1	# init fp1 to 1
13808	bfextu		USER_FPCR(%a6){&26:&2},%d1	# get initial rmode bits
13809	lsl.w		&1,%d1		# put them in bits 2:1
13810	add.w		%d5,%d1		# add in LAMBDA
13811	lsl.w		&1,%d1		# put them in bits 3:1
13812	tst.l		L_SCR2(%a6)	# test sign of original x
13813	bge.b		x_pos		# if pos, don't set bit 0
13814	addq.l		&1,%d1		# if neg, set bit 0
13815x_pos:
13816	lea.l		RBDTBL(%pc),%a2	# load rbdtbl base
13817	mov.b		(%a2,%d1),%d3	# load d3 with new rmode
13818	lsl.l		&4,%d3		# put bits in proper position
13819	fmov.l		%d3,%fpcr	# load bits into fpu
13820	lsr.l		&4,%d3		# put bits in proper position
13821	tst.b		%d3		# decode new rmode for pten table
13822	bne.b		not_rn		# if zero, it is RN
13823	lea.l		PTENRN(%pc),%a1	# load a1 with RN table base
13824	bra.b		rmode		# exit decode
13825not_rn:
13826	lsr.b		&1,%d3		# get lsb in carry
13827	bcc.b		not_rp2		# if carry clear, it is RM
13828	lea.l		PTENRP(%pc),%a1	# load a1 with RP table base
13829	bra.b		rmode		# exit decode
13830not_rp2:
13831	lea.l		PTENRM(%pc),%a1	# load a1 with RM table base
13832rmode:
13833	clr.l		%d3		# clr table index
13834e_loop2:
13835	lsr.l		&1,%d0		# shift next bit into carry
13836	bcc.b		e_next2		# if zero, skip the mul
13837	fmul.x		(%a1,%d3),%fp1	# mul by 10**(d3_bit_no)
13838e_next2:
13839	add.l		&12,%d3		# inc d3 to next pwrten table entry
13840	tst.l		%d0		# test if ISCALE is zero
13841	bne.b		e_loop2		# if not, loop
13842
13843# A8. Clr INEX; Force RZ.
13844#     The operation in A3 above may have set INEX2.
13845#     RZ mode is forced for the scaling operation to insure
13846#     only one rounding error.  The grs bits are collected in
13847#     the INEX flag for use in A10.
13848#
13849# Register usage:
13850#	Input/Output
13851
13852	fmov.l		&0,%fpsr	# clr INEX
13853	fmov.l		&rz_mode*0x10,%fpcr	# set RZ rounding mode
13854
13855# A9. Scale X -> Y.
13856#     The mantissa is scaled to the desired number of significant
13857#     digits.  The excess digits are collected in INEX2. If mul,
13858#     Check d2 for excess 10 exponential value.  If not zero,
13859#     the iscale value would have caused the pwrten calculation
13860#     to overflow.  Only a negative iscale can cause this, so
13861#     multiply by 10^(d2), which is now only allowed to be 24,
13862#     with a multiply by 10^8 and 10^16, which is exact since
13863#     10^24 is exact.  If the input was denormalized, we must
13864#     create a busy stack frame with the mul command and the
13865#     two operands, and allow the fpu to complete the multiply.
13866#
13867# Register usage:
13868#	Input/Output
13869#	d0: FPCR with RZ mode/Unchanged
13870#	d2: 0 or 24/unchanged
13871#	d3: x/x
13872#	d4: LEN/Unchanged
13873#	d5: ICTR:LAMBDA
13874#	d6: ILOG/Unchanged
13875#	d7: k-factor/Unchanged
13876#	a0: ptr for original operand/final result
13877#	a1: ptr to PTENRM array/Unchanged
13878#	a2: x/x
13879#	fp0: float(ILOG)/X adjusted for SCALE (Y)
13880#	fp1: 10^ISCALE/Unchanged
13881#	fp2: x/x
13882#	F_SCR1:x/x
13883#	F_SCR2:Abs(X) with $3fff exponent/Unchanged
13884#	L_SCR1:x/x
13885#	L_SCR2:first word of X packed/Unchanged
13886
13887A9_str:
13888	fmov.x		(%a0),%fp0	# load X from memory
13889	fabs.x		%fp0		# use abs(X)
13890	tst.w		%d5		# LAMBDA is in lower word of d5
13891	bne.b		sc_mul		# if neg (LAMBDA = 1), scale by mul
13892	fdiv.x		%fp1,%fp0	# calculate X / SCALE -> Y to fp0
13893	bra.w		A10_st		# branch to A10
13894
13895sc_mul:
13896	tst.b		BINDEC_FLG(%a6)	# check for denorm
13897	beq.w		A9_norm		# if norm, continue with mul
13898
13899# for DENORM, we must calculate:
13900#	fp0 = input_op * 10^ISCALE * 10^24
13901# since the input operand is a DENORM, we can't multiply it directly.
13902# so, we do the multiplication of the exponents and mantissas separately.
13903# in this way, we avoid underflow on intermediate stages of the
13904# multiplication and guarantee a result without exception.
13905	fmovm.x		&0x2,-(%sp)	# save 10^ISCALE to stack
13906
13907	mov.w		(%sp),%d3	# grab exponent
13908	andi.w		&0x7fff,%d3	# clear sign
13909	ori.w		&0x8000,(%a0)	# make DENORM exp negative
13910	add.w		(%a0),%d3	# add DENORM exp to 10^ISCALE exp
13911	subi.w		&0x3fff,%d3	# subtract BIAS
13912	add.w		36(%a1),%d3
13913	subi.w		&0x3fff,%d3	# subtract BIAS
13914	add.w		48(%a1),%d3
13915	subi.w		&0x3fff,%d3	# subtract BIAS
13916
13917	bmi.w		sc_mul_err	# is result is DENORM, punt!!!
13918
13919	andi.w		&0x8000,(%sp)	# keep sign
13920	or.w		%d3,(%sp)	# insert new exponent
13921	andi.w		&0x7fff,(%a0)	# clear sign bit on DENORM again
13922	mov.l		0x8(%a0),-(%sp) # put input op mantissa on stk
13923	mov.l		0x4(%a0),-(%sp)
13924	mov.l		&0x3fff0000,-(%sp) # force exp to zero
13925	fmovm.x		(%sp)+,&0x80	# load normalized DENORM into fp0
13926	fmul.x		(%sp)+,%fp0
13927
13928#	fmul.x	36(%a1),%fp0	# multiply fp0 by 10^8
13929#	fmul.x	48(%a1),%fp0	# multiply fp0 by 10^16
13930	mov.l		36+8(%a1),-(%sp) # get 10^8 mantissa
13931	mov.l		36+4(%a1),-(%sp)
13932	mov.l		&0x3fff0000,-(%sp) # force exp to zero
13933	mov.l		48+8(%a1),-(%sp) # get 10^16 mantissa
13934	mov.l		48+4(%a1),-(%sp)
13935	mov.l		&0x3fff0000,-(%sp)# force exp to zero
13936	fmul.x		(%sp)+,%fp0	# multiply fp0 by 10^8
13937	fmul.x		(%sp)+,%fp0	# multiply fp0 by 10^16
13938	bra.b		A10_st
13939
13940sc_mul_err:
13941	bra.b		sc_mul_err
13942
13943A9_norm:
13944	tst.w		%d2		# test for small exp case
13945	beq.b		A9_con		# if zero, continue as normal
13946	fmul.x		36(%a1),%fp0	# multiply fp0 by 10^8
13947	fmul.x		48(%a1),%fp0	# multiply fp0 by 10^16
13948A9_con:
13949	fmul.x		%fp1,%fp0	# calculate X * SCALE -> Y to fp0
13950
13951# A10. Or in INEX.
13952#      If INEX is set, round error occurred.  This is compensated
13953#      for by 'or-ing' in the INEX2 flag to the lsb of Y.
13954#
13955# Register usage:
13956#	Input/Output
13957#	d0: FPCR with RZ mode/FPSR with INEX2 isolated
13958#	d2: x/x
13959#	d3: x/x
13960#	d4: LEN/Unchanged
13961#	d5: ICTR:LAMBDA
13962#	d6: ILOG/Unchanged
13963#	d7: k-factor/Unchanged
13964#	a0: ptr for original operand/final result
13965#	a1: ptr to PTENxx array/Unchanged
13966#	a2: x/ptr to FP_SCR1(a6)
13967#	fp0: Y/Y with lsb adjusted
13968#	fp1: 10^ISCALE/Unchanged
13969#	fp2: x/x
13970
13971A10_st:
13972	fmov.l		%fpsr,%d0	# get FPSR
13973	fmov.x		%fp0,FP_SCR1(%a6)	# move Y to memory
13974	lea.l		FP_SCR1(%a6),%a2	# load a2 with ptr to FP_SCR1
13975	btst		&9,%d0		# check if INEX2 set
13976	beq.b		A11_st		# if clear, skip rest
13977	or.l		&1,8(%a2)	# or in 1 to lsb of mantissa
13978	fmov.x		FP_SCR1(%a6),%fp0	# write adjusted Y back to fpu
13979
13980
13981# A11. Restore original FPCR; set size ext.
13982#      Perform FINT operation in the user's rounding mode.  Keep
13983#      the size to extended.  The sintdo entry point in the sint
13984#      routine expects the FPCR value to be in USER_FPCR for
13985#      mode and precision.  The original FPCR is saved in L_SCR1.
13986
13987A11_st:
13988	mov.l		USER_FPCR(%a6),L_SCR1(%a6)	# save it for later
13989	and.l		&0x00000030,USER_FPCR(%a6)	# set size to ext,
13990#					;block exceptions
13991
13992
13993# A12. Calculate YINT = FINT(Y) according to user's rounding mode.
13994#      The FPSP routine sintd0 is used.  The output is in fp0.
13995#
13996# Register usage:
13997#	Input/Output
13998#	d0: FPSR with AINEX cleared/FPCR with size set to ext
13999#	d2: x/x/scratch
14000#	d3: x/x
14001#	d4: LEN/Unchanged
14002#	d5: ICTR:LAMBDA/Unchanged
14003#	d6: ILOG/Unchanged
14004#	d7: k-factor/Unchanged
14005#	a0: ptr for original operand/src ptr for sintdo
14006#	a1: ptr to PTENxx array/Unchanged
14007#	a2: ptr to FP_SCR1(a6)/Unchanged
14008#	a6: temp pointer to FP_SCR1(a6) - orig value saved and restored
14009#	fp0: Y/YINT
14010#	fp1: 10^ISCALE/Unchanged
14011#	fp2: x/x
14012#	F_SCR1:x/x
14013#	F_SCR2:Y adjusted for inex/Y with original exponent
14014#	L_SCR1:x/original USER_FPCR
14015#	L_SCR2:first word of X packed/Unchanged
14016
14017A12_st:
14018	movm.l	&0xc0c0,-(%sp)	# save regs used by sintd0	 {%d0-%d1/%a0-%a1}
14019	mov.l	L_SCR1(%a6),-(%sp)
14020	mov.l	L_SCR2(%a6),-(%sp)
14021
14022	lea.l		FP_SCR1(%a6),%a0	# a0 is ptr to FP_SCR1(a6)
14023	fmov.x		%fp0,(%a0)	# move Y to memory at FP_SCR1(a6)
14024	tst.l		L_SCR2(%a6)	# test sign of original operand
14025	bge.b		do_fint12		# if pos, use Y
14026	or.l		&0x80000000,(%a0)	# if neg, use -Y
14027do_fint12:
14028	mov.l	USER_FPSR(%a6),-(%sp)
14029#	bsr	sintdo		# sint routine returns int in fp0
14030
14031	fmov.l	USER_FPCR(%a6),%fpcr
14032	fmov.l	&0x0,%fpsr			# clear the AEXC bits!!!
14033##	mov.l		USER_FPCR(%a6),%d0	# ext prec/keep rnd mode
14034##	andi.l		&0x00000030,%d0
14035##	fmov.l		%d0,%fpcr
14036	fint.x		FP_SCR1(%a6),%fp0	# do fint()
14037	fmov.l	%fpsr,%d0
14038	or.w	%d0,FPSR_EXCEPT(%a6)
14039##	fmov.l		&0x0,%fpcr
14040##	fmov.l		%fpsr,%d0		# don't keep ccodes
14041##	or.w		%d0,FPSR_EXCEPT(%a6)
14042
14043	mov.b	(%sp),USER_FPSR(%a6)
14044	add.l	&4,%sp
14045
14046	mov.l	(%sp)+,L_SCR2(%a6)
14047	mov.l	(%sp)+,L_SCR1(%a6)
14048	movm.l	(%sp)+,&0x303	# restore regs used by sint	 {%d0-%d1/%a0-%a1}
14049
14050	mov.l	L_SCR2(%a6),FP_SCR1(%a6)	# restore original exponent
14051	mov.l	L_SCR1(%a6),USER_FPCR(%a6)	# restore user's FPCR
14052
14053# A13. Check for LEN digits.
14054#      If the int operation results in more than LEN digits,
14055#      or less than LEN -1 digits, adjust ILOG and repeat from
14056#      A6.  This test occurs only on the first pass.  If the
14057#      result is exactly 10^LEN, decrement ILOG and divide
14058#      the mantissa by 10.  The calculation of 10^LEN cannot
14059#      be inexact, since all powers of ten up to 10^27 are exact
14060#      in extended precision, so the use of a previous power-of-ten
14061#      table will introduce no error.
14062#
14063#
14064# Register usage:
14065#	Input/Output
14066#	d0: FPCR with size set to ext/scratch final = 0
14067#	d2: x/x
14068#	d3: x/scratch final = x
14069#	d4: LEN/LEN adjusted
14070#	d5: ICTR:LAMBDA/LAMBDA:ICTR
14071#	d6: ILOG/ILOG adjusted
14072#	d7: k-factor/Unchanged
14073#	a0: pointer into memory for packed bcd string formation
14074#	a1: ptr to PTENxx array/Unchanged
14075#	a2: ptr to FP_SCR1(a6)/Unchanged
14076#	fp0: int portion of Y/abs(YINT) adjusted
14077#	fp1: 10^ISCALE/Unchanged
14078#	fp2: x/10^LEN
14079#	F_SCR1:x/x
14080#	F_SCR2:Y with original exponent/Unchanged
14081#	L_SCR1:original USER_FPCR/Unchanged
14082#	L_SCR2:first word of X packed/Unchanged
14083
14084A13_st:
14085	swap		%d5		# put ICTR in lower word of d5
14086	tst.w		%d5		# check if ICTR = 0
14087	bne		not_zr		# if non-zero, go to second test
14088#
14089# Compute 10^(LEN-1)
14090#
14091	fmov.s		FONE(%pc),%fp2	# init fp2 to 1.0
14092	mov.l		%d4,%d0		# put LEN in d0
14093	subq.l		&1,%d0		# d0 = LEN -1
14094	clr.l		%d3		# clr table index
14095l_loop:
14096	lsr.l		&1,%d0		# shift next bit into carry
14097	bcc.b		l_next		# if zero, skip the mul
14098	fmul.x		(%a1,%d3),%fp2	# mul by 10**(d3_bit_no)
14099l_next:
14100	add.l		&12,%d3		# inc d3 to next pwrten table entry
14101	tst.l		%d0		# test if LEN is zero
14102	bne.b		l_loop		# if not, loop
14103#
14104# 10^LEN-1 is computed for this test and A14.  If the input was
14105# denormalized, check only the case in which YINT > 10^LEN.
14106#
14107	tst.b		BINDEC_FLG(%a6)	# check if input was norm
14108	beq.b		A13_con		# if norm, continue with checking
14109	fabs.x		%fp0		# take abs of YINT
14110	bra		test_2
14111#
14112# Compare abs(YINT) to 10^(LEN-1) and 10^LEN
14113#
14114A13_con:
14115	fabs.x		%fp0		# take abs of YINT
14116	fcmp.x		%fp0,%fp2	# compare abs(YINT) with 10^(LEN-1)
14117	fbge.w		test_2		# if greater, do next test
14118	subq.l		&1,%d6		# subtract 1 from ILOG
14119	mov.w		&1,%d5		# set ICTR
14120	fmov.l		&rm_mode*0x10,%fpcr	# set rmode to RM
14121	fmul.s		FTEN(%pc),%fp2	# compute 10^LEN
14122	bra.w		A6_str		# return to A6 and recompute YINT
14123test_2:
14124	fmul.s		FTEN(%pc),%fp2	# compute 10^LEN
14125	fcmp.x		%fp0,%fp2	# compare abs(YINT) with 10^LEN
14126	fblt.w		A14_st		# if less, all is ok, go to A14
14127	fbgt.w		fix_ex		# if greater, fix and redo
14128	fdiv.s		FTEN(%pc),%fp0	# if equal, divide by 10
14129	addq.l		&1,%d6		# and inc ILOG
14130	bra.b		A14_st		# and continue elsewhere
14131fix_ex:
14132	addq.l		&1,%d6		# increment ILOG by 1
14133	mov.w		&1,%d5		# set ICTR
14134	fmov.l		&rm_mode*0x10,%fpcr	# set rmode to RM
14135	bra.w		A6_str		# return to A6 and recompute YINT
14136#
14137# Since ICTR <> 0, we have already been through one adjustment,
14138# and shouldn't have another; this is to check if abs(YINT) = 10^LEN
14139# 10^LEN is again computed using whatever table is in a1 since the
14140# value calculated cannot be inexact.
14141#
14142not_zr:
14143	fmov.s		FONE(%pc),%fp2	# init fp2 to 1.0
14144	mov.l		%d4,%d0		# put LEN in d0
14145	clr.l		%d3		# clr table index
14146z_loop:
14147	lsr.l		&1,%d0		# shift next bit into carry
14148	bcc.b		z_next		# if zero, skip the mul
14149	fmul.x		(%a1,%d3),%fp2	# mul by 10**(d3_bit_no)
14150z_next:
14151	add.l		&12,%d3		# inc d3 to next pwrten table entry
14152	tst.l		%d0		# test if LEN is zero
14153	bne.b		z_loop		# if not, loop
14154	fabs.x		%fp0		# get abs(YINT)
14155	fcmp.x		%fp0,%fp2	# check if abs(YINT) = 10^LEN
14156	fbneq.w		A14_st		# if not, skip this
14157	fdiv.s		FTEN(%pc),%fp0	# divide abs(YINT) by 10
14158	addq.l		&1,%d6		# and inc ILOG by 1
14159	addq.l		&1,%d4		# and inc LEN
14160	fmul.s		FTEN(%pc),%fp2	# if LEN++, the get 10^^LEN
14161
14162# A14. Convert the mantissa to bcd.
14163#      The binstr routine is used to convert the LEN digit
14164#      mantissa to bcd in memory.  The input to binstr is
14165#      to be a fraction; i.e. (mantissa)/10^LEN and adjusted
14166#      such that the decimal point is to the left of bit 63.
14167#      The bcd digits are stored in the correct position in
14168#      the final string area in memory.
14169#
14170#
14171# Register usage:
14172#	Input/Output
14173#	d0: x/LEN call to binstr - final is 0
14174#	d1: x/0
14175#	d2: x/ms 32-bits of mant of abs(YINT)
14176#	d3: x/ls 32-bits of mant of abs(YINT)
14177#	d4: LEN/Unchanged
14178#	d5: ICTR:LAMBDA/LAMBDA:ICTR
14179#	d6: ILOG
14180#	d7: k-factor/Unchanged
14181#	a0: pointer into memory for packed bcd string formation
14182#	    /ptr to first mantissa byte in result string
14183#	a1: ptr to PTENxx array/Unchanged
14184#	a2: ptr to FP_SCR1(a6)/Unchanged
14185#	fp0: int portion of Y/abs(YINT) adjusted
14186#	fp1: 10^ISCALE/Unchanged
14187#	fp2: 10^LEN/Unchanged
14188#	F_SCR1:x/Work area for final result
14189#	F_SCR2:Y with original exponent/Unchanged
14190#	L_SCR1:original USER_FPCR/Unchanged
14191#	L_SCR2:first word of X packed/Unchanged
14192
14193A14_st:
14194	fmov.l		&rz_mode*0x10,%fpcr	# force rz for conversion
14195	fdiv.x		%fp2,%fp0	# divide abs(YINT) by 10^LEN
14196	lea.l		FP_SCR0(%a6),%a0
14197	fmov.x		%fp0,(%a0)	# move abs(YINT)/10^LEN to memory
14198	mov.l		4(%a0),%d2	# move 2nd word of FP_RES to d2
14199	mov.l		8(%a0),%d3	# move 3rd word of FP_RES to d3
14200	clr.l		4(%a0)		# zero word 2 of FP_RES
14201	clr.l		8(%a0)		# zero word 3 of FP_RES
14202	mov.l		(%a0),%d0	# move exponent to d0
14203	swap		%d0		# put exponent in lower word
14204	beq.b		no_sft		# if zero, don't shift
14205	sub.l		&0x3ffd,%d0	# sub bias less 2 to make fract
14206	tst.l		%d0		# check if > 1
14207	bgt.b		no_sft		# if so, don't shift
14208	neg.l		%d0		# make exp positive
14209m_loop:
14210	lsr.l		&1,%d2		# shift d2:d3 right, add 0s
14211	roxr.l		&1,%d3		# the number of places
14212	dbf.w		%d0,m_loop	# given in d0
14213no_sft:
14214	tst.l		%d2		# check for mantissa of zero
14215	bne.b		no_zr		# if not, go on
14216	tst.l		%d3		# continue zero check
14217	beq.b		zer_m		# if zero, go directly to binstr
14218no_zr:
14219	clr.l		%d1		# put zero in d1 for addx
14220	add.l		&0x00000080,%d3	# inc at bit 7
14221	addx.l		%d1,%d2		# continue inc
14222	and.l		&0xffffff80,%d3	# strip off lsb not used by 882
14223zer_m:
14224	mov.l		%d4,%d0		# put LEN in d0 for binstr call
14225	addq.l		&3,%a0		# a0 points to M16 byte in result
14226	bsr		binstr		# call binstr to convert mant
14227
14228
14229# A15. Convert the exponent to bcd.
14230#      As in A14 above, the exp is converted to bcd and the
14231#      digits are stored in the final string.
14232#
14233#      Digits are stored in L_SCR1(a6) on return from BINDEC as:
14234#
14235#	 32               16 15                0
14236#	-----------------------------------------
14237#	|  0 | e3 | e2 | e1 | e4 |  X |  X |  X |
14238#	-----------------------------------------
14239#
14240# And are moved into their proper places in FP_SCR0.  If digit e4
14241# is non-zero, OPERR is signaled.  In all cases, all 4 digits are
14242# written as specified in the 881/882 manual for packed decimal.
14243#
14244# Register usage:
14245#	Input/Output
14246#	d0: x/LEN call to binstr - final is 0
14247#	d1: x/scratch (0);shift count for final exponent packing
14248#	d2: x/ms 32-bits of exp fraction/scratch
14249#	d3: x/ls 32-bits of exp fraction
14250#	d4: LEN/Unchanged
14251#	d5: ICTR:LAMBDA/LAMBDA:ICTR
14252#	d6: ILOG
14253#	d7: k-factor/Unchanged
14254#	a0: ptr to result string/ptr to L_SCR1(a6)
14255#	a1: ptr to PTENxx array/Unchanged
14256#	a2: ptr to FP_SCR1(a6)/Unchanged
14257#	fp0: abs(YINT) adjusted/float(ILOG)
14258#	fp1: 10^ISCALE/Unchanged
14259#	fp2: 10^LEN/Unchanged
14260#	F_SCR1:Work area for final result/BCD result
14261#	F_SCR2:Y with original exponent/ILOG/10^4
14262#	L_SCR1:original USER_FPCR/Exponent digits on return from binstr
14263#	L_SCR2:first word of X packed/Unchanged
14264
14265A15_st:
14266	tst.b		BINDEC_FLG(%a6)	# check for denorm
14267	beq.b		not_denorm
14268	ftest.x		%fp0		# test for zero
14269	fbeq.w		den_zero	# if zero, use k-factor or 4933
14270	fmov.l		%d6,%fp0	# float ILOG
14271	fabs.x		%fp0		# get abs of ILOG
14272	bra.b		convrt
14273den_zero:
14274	tst.l		%d7		# check sign of the k-factor
14275	blt.b		use_ilog	# if negative, use ILOG
14276	fmov.s		F4933(%pc),%fp0	# force exponent to 4933
14277	bra.b		convrt		# do it
14278use_ilog:
14279	fmov.l		%d6,%fp0	# float ILOG
14280	fabs.x		%fp0		# get abs of ILOG
14281	bra.b		convrt
14282not_denorm:
14283	ftest.x		%fp0		# test for zero
14284	fbneq.w		not_zero	# if zero, force exponent
14285	fmov.s		FONE(%pc),%fp0	# force exponent to 1
14286	bra.b		convrt		# do it
14287not_zero:
14288	fmov.l		%d6,%fp0	# float ILOG
14289	fabs.x		%fp0		# get abs of ILOG
14290convrt:
14291	fdiv.x		24(%a1),%fp0	# compute ILOG/10^4
14292	fmov.x		%fp0,FP_SCR1(%a6)	# store fp0 in memory
14293	mov.l		4(%a2),%d2	# move word 2 to d2
14294	mov.l		8(%a2),%d3	# move word 3 to d3
14295	mov.w		(%a2),%d0	# move exp to d0
14296	beq.b		x_loop_fin	# if zero, skip the shift
14297	sub.w		&0x3ffd,%d0	# subtract off bias
14298	neg.w		%d0		# make exp positive
14299x_loop:
14300	lsr.l		&1,%d2		# shift d2:d3 right
14301	roxr.l		&1,%d3		# the number of places
14302	dbf.w		%d0,x_loop	# given in d0
14303x_loop_fin:
14304	clr.l		%d1		# put zero in d1 for addx
14305	add.l		&0x00000080,%d3	# inc at bit 6
14306	addx.l		%d1,%d2		# continue inc
14307	and.l		&0xffffff80,%d3	# strip off lsb not used by 882
14308	mov.l		&4,%d0		# put 4 in d0 for binstr call
14309	lea.l		L_SCR1(%a6),%a0	# a0 is ptr to L_SCR1 for exp digits
14310	bsr		binstr		# call binstr to convert exp
14311	mov.l		L_SCR1(%a6),%d0	# load L_SCR1 lword to d0
14312	mov.l		&12,%d1		# use d1 for shift count
14313	lsr.l		%d1,%d0		# shift d0 right by 12
14314	bfins		%d0,FP_SCR0(%a6){&4:&12}	# put e3:e2:e1 in FP_SCR0
14315	lsr.l		%d1,%d0		# shift d0 right by 12
14316	bfins		%d0,FP_SCR0(%a6){&16:&4}	# put e4 in FP_SCR0
14317	tst.b		%d0		# check if e4 is zero
14318	beq.b		A16_st		# if zero, skip rest
14319	or.l		&opaop_mask,USER_FPSR(%a6)	# set OPERR & AIOP in USER_FPSR
14320
14321
14322# A16. Write sign bits to final string.
14323#	   Sigma is bit 31 of initial value; RHO is bit 31 of d6 (ILOG).
14324#
14325# Register usage:
14326#	Input/Output
14327#	d0: x/scratch - final is x
14328#	d2: x/x
14329#	d3: x/x
14330#	d4: LEN/Unchanged
14331#	d5: ICTR:LAMBDA/LAMBDA:ICTR
14332#	d6: ILOG/ILOG adjusted
14333#	d7: k-factor/Unchanged
14334#	a0: ptr to L_SCR1(a6)/Unchanged
14335#	a1: ptr to PTENxx array/Unchanged
14336#	a2: ptr to FP_SCR1(a6)/Unchanged
14337#	fp0: float(ILOG)/Unchanged
14338#	fp1: 10^ISCALE/Unchanged
14339#	fp2: 10^LEN/Unchanged
14340#	F_SCR1:BCD result with correct signs
14341#	F_SCR2:ILOG/10^4
14342#	L_SCR1:Exponent digits on return from binstr
14343#	L_SCR2:first word of X packed/Unchanged
14344
14345A16_st:
14346	clr.l		%d0		# clr d0 for collection of signs
14347	and.b		&0x0f,FP_SCR0(%a6)	# clear first nibble of FP_SCR0
14348	tst.l		L_SCR2(%a6)	# check sign of original mantissa
14349	bge.b		mant_p		# if pos, don't set SM
14350	mov.l		&2,%d0		# move 2 in to d0 for SM
14351mant_p:
14352	tst.l		%d6		# check sign of ILOG
14353	bge.b		wr_sgn		# if pos, don't set SE
14354	addq.l		&1,%d0		# set bit 0 in d0 for SE
14355wr_sgn:
14356	bfins		%d0,FP_SCR0(%a6){&0:&2}	# insert SM and SE into FP_SCR0
14357
14358# Clean up and restore all registers used.
14359
14360	fmov.l		&0,%fpsr	# clear possible inex2/ainex bits
14361	fmovm.x		(%sp)+,&0xe0	#  {%fp0-%fp2}
14362	movm.l		(%sp)+,&0x4fc	#  {%d2-%d7/%a2}
14363	rts
14364
14365	global		PTENRN
14366PTENRN:
14367	long		0x40020000,0xA0000000,0x00000000	# 10 ^ 1
14368	long		0x40050000,0xC8000000,0x00000000	# 10 ^ 2
14369	long		0x400C0000,0x9C400000,0x00000000	# 10 ^ 4
14370	long		0x40190000,0xBEBC2000,0x00000000	# 10 ^ 8
14371	long		0x40340000,0x8E1BC9BF,0x04000000	# 10 ^ 16
14372	long		0x40690000,0x9DC5ADA8,0x2B70B59E	# 10 ^ 32
14373	long		0x40D30000,0xC2781F49,0xFFCFA6D5	# 10 ^ 64
14374	long		0x41A80000,0x93BA47C9,0x80E98CE0	# 10 ^ 128
14375	long		0x43510000,0xAA7EEBFB,0x9DF9DE8E	# 10 ^ 256
14376	long		0x46A30000,0xE319A0AE,0xA60E91C7	# 10 ^ 512
14377	long		0x4D480000,0xC9767586,0x81750C17	# 10 ^ 1024
14378	long		0x5A920000,0x9E8B3B5D,0xC53D5DE5	# 10 ^ 2048
14379	long		0x75250000,0xC4605202,0x8A20979B	# 10 ^ 4096
14380
14381	global		PTENRP
14382PTENRP:
14383	long		0x40020000,0xA0000000,0x00000000	# 10 ^ 1
14384	long		0x40050000,0xC8000000,0x00000000	# 10 ^ 2
14385	long		0x400C0000,0x9C400000,0x00000000	# 10 ^ 4
14386	long		0x40190000,0xBEBC2000,0x00000000	# 10 ^ 8
14387	long		0x40340000,0x8E1BC9BF,0x04000000	# 10 ^ 16
14388	long		0x40690000,0x9DC5ADA8,0x2B70B59E	# 10 ^ 32
14389	long		0x40D30000,0xC2781F49,0xFFCFA6D6	# 10 ^ 64
14390	long		0x41A80000,0x93BA47C9,0x80E98CE0	# 10 ^ 128
14391	long		0x43510000,0xAA7EEBFB,0x9DF9DE8E	# 10 ^ 256
14392	long		0x46A30000,0xE319A0AE,0xA60E91C7	# 10 ^ 512
14393	long		0x4D480000,0xC9767586,0x81750C18	# 10 ^ 1024
14394	long		0x5A920000,0x9E8B3B5D,0xC53D5DE5	# 10 ^ 2048
14395	long		0x75250000,0xC4605202,0x8A20979B	# 10 ^ 4096
14396
14397	global		PTENRM
14398PTENRM:
14399	long		0x40020000,0xA0000000,0x00000000	# 10 ^ 1
14400	long		0x40050000,0xC8000000,0x00000000	# 10 ^ 2
14401	long		0x400C0000,0x9C400000,0x00000000	# 10 ^ 4
14402	long		0x40190000,0xBEBC2000,0x00000000	# 10 ^ 8
14403	long		0x40340000,0x8E1BC9BF,0x04000000	# 10 ^ 16
14404	long		0x40690000,0x9DC5ADA8,0x2B70B59D	# 10 ^ 32
14405	long		0x40D30000,0xC2781F49,0xFFCFA6D5	# 10 ^ 64
14406	long		0x41A80000,0x93BA47C9,0x80E98CDF	# 10 ^ 128
14407	long		0x43510000,0xAA7EEBFB,0x9DF9DE8D	# 10 ^ 256
14408	long		0x46A30000,0xE319A0AE,0xA60E91C6	# 10 ^ 512
14409	long		0x4D480000,0xC9767586,0x81750C17	# 10 ^ 1024
14410	long		0x5A920000,0x9E8B3B5D,0xC53D5DE4	# 10 ^ 2048
14411	long		0x75250000,0xC4605202,0x8A20979A	# 10 ^ 4096
14412
14413#########################################################################
14414# binstr(): Converts a 64-bit binary integer to bcd.			#
14415#									#
14416# INPUT *************************************************************** #
14417#	d2:d3 = 64-bit binary integer					#
14418#	d0    = desired length (LEN)					#
14419#	a0    = pointer to start in memory for bcd characters		#
14420#		(This pointer must point to byte 4 of the first		#
14421#		 lword of the packed decimal memory string.)		#
14422#									#
14423# OUTPUT ************************************************************** #
14424#	a0 = pointer to LEN bcd digits representing the 64-bit integer.	#
14425#									#
14426# ALGORITHM ***********************************************************	#
14427#	The 64-bit binary is assumed to have a decimal point before	#
14428#	bit 63.  The fraction is multiplied by 10 using a mul by 2	#
14429#	shift and a mul by 8 shift.  The bits shifted out of the	#
14430#	msb form a decimal digit.  This process is iterated until	#
14431#	LEN digits are formed.						#
14432#									#
14433# A1. Init d7 to 1.  D7 is the byte digit counter, and if 1, the	#
14434#     digit formed will be assumed the least significant.  This is	#
14435#     to force the first byte formed to have a 0 in the upper 4 bits.	#
14436#									#
14437# A2. Beginning of the loop:						#
14438#     Copy the fraction in d2:d3 to d4:d5.				#
14439#									#
14440# A3. Multiply the fraction in d2:d3 by 8 using bit-field		#
14441#     extracts and shifts.  The three msbs from d2 will go into d1.	#
14442#									#
14443# A4. Multiply the fraction in d4:d5 by 2 using shifts.  The msb	#
14444#     will be collected by the carry.					#
14445#									#
14446# A5. Add using the carry the 64-bit quantities in d2:d3 and d4:d5	#
14447#     into d2:d3.  D1 will contain the bcd digit formed.		#
14448#									#
14449# A6. Test d7.  If zero, the digit formed is the ms digit.  If non-	#
14450#     zero, it is the ls digit.  Put the digit in its place in the	#
14451#     upper word of d0.  If it is the ls digit, write the word		#
14452#     from d0 to memory.						#
14453#									#
14454# A7. Decrement d6 (LEN counter) and repeat the loop until zero.	#
14455#									#
14456#########################################################################
14457
14458#	Implementation Notes:
14459#
14460#	The registers are used as follows:
14461#
14462#		d0: LEN counter
14463#		d1: temp used to form the digit
14464#		d2: upper 32-bits of fraction for mul by 8
14465#		d3: lower 32-bits of fraction for mul by 8
14466#		d4: upper 32-bits of fraction for mul by 2
14467#		d5: lower 32-bits of fraction for mul by 2
14468#		d6: temp for bit-field extracts
14469#		d7: byte digit formation word;digit count {0,1}
14470#		a0: pointer into memory for packed bcd string formation
14471#
14472
14473	global		binstr
14474binstr:
14475	movm.l		&0xff00,-(%sp)	#  {%d0-%d7}
14476
14477#
14478# A1: Init d7
14479#
14480	mov.l		&1,%d7		# init d7 for second digit
14481	subq.l		&1,%d0		# for dbf d0 would have LEN+1 passes
14482#
14483# A2. Copy d2:d3 to d4:d5.  Start loop.
14484#
14485loop:
14486	mov.l		%d2,%d4		# copy the fraction before muls
14487	mov.l		%d3,%d5		# to d4:d5
14488#
14489# A3. Multiply d2:d3 by 8; extract msbs into d1.
14490#
14491	bfextu		%d2{&0:&3},%d1	# copy 3 msbs of d2 into d1
14492	asl.l		&3,%d2		# shift d2 left by 3 places
14493	bfextu		%d3{&0:&3},%d6	# copy 3 msbs of d3 into d6
14494	asl.l		&3,%d3		# shift d3 left by 3 places
14495	or.l		%d6,%d2		# or in msbs from d3 into d2
14496#
14497# A4. Multiply d4:d5 by 2; add carry out to d1.
14498#
14499	asl.l		&1,%d5		# mul d5 by 2
14500	roxl.l		&1,%d4		# mul d4 by 2
14501	swap		%d6		# put 0 in d6 lower word
14502	addx.w		%d6,%d1		# add in extend from mul by 2
14503#
14504# A5. Add mul by 8 to mul by 2.  D1 contains the digit formed.
14505#
14506	add.l		%d5,%d3		# add lower 32 bits
14507	nop				# ERRATA FIX #13 (Rev. 1.2 6/6/90)
14508	addx.l		%d4,%d2		# add with extend upper 32 bits
14509	nop				# ERRATA FIX #13 (Rev. 1.2 6/6/90)
14510	addx.w		%d6,%d1		# add in extend from add to d1
14511	swap		%d6		# with d6 = 0; put 0 in upper word
14512#
14513# A6. Test d7 and branch.
14514#
14515	tst.w		%d7		# if zero, store digit & to loop
14516	beq.b		first_d		# if non-zero, form byte & write
14517sec_d:
14518	swap		%d7		# bring first digit to word d7b
14519	asl.w		&4,%d7		# first digit in upper 4 bits d7b
14520	add.w		%d1,%d7		# add in ls digit to d7b
14521	mov.b		%d7,(%a0)+	# store d7b byte in memory
14522	swap		%d7		# put LEN counter in word d7a
14523	clr.w		%d7		# set d7a to signal no digits done
14524	dbf.w		%d0,loop	# do loop some more!
14525	bra.b		end_bstr	# finished, so exit
14526first_d:
14527	swap		%d7		# put digit word in d7b
14528	mov.w		%d1,%d7		# put new digit in d7b
14529	swap		%d7		# put LEN counter in word d7a
14530	addq.w		&1,%d7		# set d7a to signal first digit done
14531	dbf.w		%d0,loop	# do loop some more!
14532	swap		%d7		# put last digit in string
14533	lsl.w		&4,%d7		# move it to upper 4 bits
14534	mov.b		%d7,(%a0)+	# store it in memory string
14535#
14536# Clean up and return with result in fp0.
14537#
14538end_bstr:
14539	movm.l		(%sp)+,&0xff	#  {%d0-%d7}
14540	rts
14541
14542#########################################################################
14543# XDEF ****************************************************************	#
14544#	facc_in_b(): dmem_read_byte failed				#
14545#	facc_in_w(): dmem_read_word failed				#
14546#	facc_in_l(): dmem_read_long failed				#
14547#	facc_in_d(): dmem_read of dbl prec failed			#
14548#	facc_in_x(): dmem_read of ext prec failed			#
14549#									#
14550#	facc_out_b(): dmem_write_byte failed				#
14551#	facc_out_w(): dmem_write_word failed				#
14552#	facc_out_l(): dmem_write_long failed				#
14553#	facc_out_d(): dmem_write of dbl prec failed			#
14554#	facc_out_x(): dmem_write of ext prec failed			#
14555#									#
14556# XREF ****************************************************************	#
14557#	_real_access() - exit through access error handler		#
14558#									#
14559# INPUT ***************************************************************	#
14560#	None								#
14561#									#
14562# OUTPUT **************************************************************	#
14563#	None								#
14564#									#
14565# ALGORITHM ***********************************************************	#
14566#	Flow jumps here when an FP data fetch call gets an error	#
14567# result. This means the operating system wants an access error frame	#
14568# made out of the current exception stack frame.			#
14569#	So, we first call restore() which makes sure that any updated	#
14570# -(an)+ register gets returned to its pre-exception value and then	#
14571# we change the stack to an access error stack frame.			#
14572#									#
14573#########################################################################
14574
14575facc_in_b:
14576	movq.l		&0x1,%d0			# one byte
14577	bsr.w		restore				# fix An
14578
14579	mov.w		&0x0121,EXC_VOFF(%a6)		# set FSLW
14580	bra.w		facc_finish
14581
14582facc_in_w:
14583	movq.l		&0x2,%d0			# two bytes
14584	bsr.w		restore				# fix An
14585
14586	mov.w		&0x0141,EXC_VOFF(%a6)		# set FSLW
14587	bra.b		facc_finish
14588
14589facc_in_l:
14590	movq.l		&0x4,%d0			# four bytes
14591	bsr.w		restore				# fix An
14592
14593	mov.w		&0x0101,EXC_VOFF(%a6)		# set FSLW
14594	bra.b		facc_finish
14595
14596facc_in_d:
14597	movq.l		&0x8,%d0			# eight bytes
14598	bsr.w		restore				# fix An
14599
14600	mov.w		&0x0161,EXC_VOFF(%a6)		# set FSLW
14601	bra.b		facc_finish
14602
14603facc_in_x:
14604	movq.l		&0xc,%d0			# twelve bytes
14605	bsr.w		restore				# fix An
14606
14607	mov.w		&0x0161,EXC_VOFF(%a6)		# set FSLW
14608	bra.b		facc_finish
14609
14610################################################################
14611
14612facc_out_b:
14613	movq.l		&0x1,%d0			# one byte
14614	bsr.w		restore				# restore An
14615
14616	mov.w		&0x00a1,EXC_VOFF(%a6)		# set FSLW
14617	bra.b		facc_finish
14618
14619facc_out_w:
14620	movq.l		&0x2,%d0			# two bytes
14621	bsr.w		restore				# restore An
14622
14623	mov.w		&0x00c1,EXC_VOFF(%a6)		# set FSLW
14624	bra.b		facc_finish
14625
14626facc_out_l:
14627	movq.l		&0x4,%d0			# four bytes
14628	bsr.w		restore				# restore An
14629
14630	mov.w		&0x0081,EXC_VOFF(%a6)		# set FSLW
14631	bra.b		facc_finish
14632
14633facc_out_d:
14634	movq.l		&0x8,%d0			# eight bytes
14635	bsr.w		restore				# restore An
14636
14637	mov.w		&0x00e1,EXC_VOFF(%a6)		# set FSLW
14638	bra.b		facc_finish
14639
14640facc_out_x:
14641	mov.l		&0xc,%d0			# twelve bytes
14642	bsr.w		restore				# restore An
14643
14644	mov.w		&0x00e1,EXC_VOFF(%a6)		# set FSLW
14645
14646# here's where we actually create the access error frame from the
14647# current exception stack frame.
14648facc_finish:
14649	mov.l		USER_FPIAR(%a6),EXC_PC(%a6) # store current PC
14650
14651	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
14652	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
14653	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
14654
14655	unlk		%a6
14656
14657	mov.l		(%sp),-(%sp)		# store SR, hi(PC)
14658	mov.l		0x8(%sp),0x4(%sp)	# store lo(PC)
14659	mov.l		0xc(%sp),0x8(%sp)	# store EA
14660	mov.l		&0x00000001,0xc(%sp)	# store FSLW
14661	mov.w		0x6(%sp),0xc(%sp)	# fix FSLW (size)
14662	mov.w		&0x4008,0x6(%sp)	# store voff
14663
14664	btst		&0x5,(%sp)		# supervisor or user mode?
14665	beq.b		facc_out2		# user
14666	bset		&0x2,0xd(%sp)		# set supervisor TM bit
14667
14668facc_out2:
14669	bra.l		_real_access
14670
14671##################################################################
14672
14673# if the effective addressing mode was predecrement or postincrement,
14674# the emulation has already changed its value to the correct post-
14675# instruction value. but since we're exiting to the access error
14676# handler, then AN must be returned to its pre-instruction value.
14677# we do that here.
14678restore:
14679	mov.b		EXC_OPWORD+0x1(%a6),%d1
14680	andi.b		&0x38,%d1		# extract opmode
14681	cmpi.b		%d1,&0x18		# postinc?
14682	beq.w		rest_inc
14683	cmpi.b		%d1,&0x20		# predec?
14684	beq.w		rest_dec
14685	rts
14686
14687rest_inc:
14688	mov.b		EXC_OPWORD+0x1(%a6),%d1
14689	andi.w		&0x0007,%d1		# fetch An
14690
14691	mov.w		(tbl_rest_inc.b,%pc,%d1.w*2),%d1
14692	jmp		(tbl_rest_inc.b,%pc,%d1.w*1)
14693
14694tbl_rest_inc:
14695	short		ri_a0 - tbl_rest_inc
14696	short		ri_a1 - tbl_rest_inc
14697	short		ri_a2 - tbl_rest_inc
14698	short		ri_a3 - tbl_rest_inc
14699	short		ri_a4 - tbl_rest_inc
14700	short		ri_a5 - tbl_rest_inc
14701	short		ri_a6 - tbl_rest_inc
14702	short		ri_a7 - tbl_rest_inc
14703
14704ri_a0:
14705	sub.l		%d0,EXC_DREGS+0x8(%a6)	# fix stacked a0
14706	rts
14707ri_a1:
14708	sub.l		%d0,EXC_DREGS+0xc(%a6)	# fix stacked a1
14709	rts
14710ri_a2:
14711	sub.l		%d0,%a2			# fix a2
14712	rts
14713ri_a3:
14714	sub.l		%d0,%a3			# fix a3
14715	rts
14716ri_a4:
14717	sub.l		%d0,%a4			# fix a4
14718	rts
14719ri_a5:
14720	sub.l		%d0,%a5			# fix a5
14721	rts
14722ri_a6:
14723	sub.l		%d0,(%a6)		# fix stacked a6
14724	rts
14725# if it's a fmove out instruction, we don't have to fix a7
14726# because we hadn't changed it yet. if it's an opclass two
14727# instruction (data moved in) and the exception was in supervisor
14728# mode, then also also wasn't updated. if it was user mode, then
14729# restore the correct a7 which is in the USP currently.
14730ri_a7:
14731	cmpi.b		EXC_VOFF(%a6),&0x30	# move in or out?
14732	bne.b		ri_a7_done		# out
14733
14734	btst		&0x5,EXC_SR(%a6)	# user or supervisor?
14735	bne.b		ri_a7_done		# supervisor
14736	movc		%usp,%a0		# restore USP
14737	sub.l		%d0,%a0
14738	movc		%a0,%usp
14739ri_a7_done:
14740	rts
14741
14742# need to invert adjustment value if the <ea> was predec
14743rest_dec:
14744	neg.l		%d0
14745	bra.b		rest_inc
14746